validation.py 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146
  1. '''
  2. @author: olivier.massot, sept. 2018
  3. '''
  4. from datetime import datetime
  5. import zipfile
  6. from cerberus.validator import Validator
  7. from path import Path, TempDir
  8. from core.validation_errors import MissingFile, FormatError
  9. class BaseModel():
  10. index = {}
  11. pk = ""
  12. schema = {}
  13. def __init__(self, **kwargs):
  14. self.__dict__.update(kwargs)
  15. @classmethod
  16. def indexer(cls, instance):
  17. if instance.getitem(cls.pk) in cls.index:
  18. raise ValueError("Duplicate PK")
  19. cls.index[instance.getitem(cls.pk)]
  20. class BaseGeoModel(BaseModel):
  21. def __init__(self, geom, **kwargs):
  22. super(BaseGeoModel, self).__init__(**kwargs)
  23. self.geom = geom
  24. class ValidationReport():
  25. def __init__(self, title = ""):
  26. self.title = title
  27. self.errors = {}
  28. @property
  29. def valid(self):
  30. return len(self.error) == 0
  31. def is_french_date(field, value, error):
  32. try:
  33. datetime.strptime(value, '%d/%m/%Y')
  34. except:
  35. error(field, 'Doit être une date au format jj/mm/aaaa')
  36. class BaseValidator():
  37. FILES = {}
  38. @classmethod
  39. def submit(cls, subject):
  40. """ prends un dossier ou une archive en entrée et vérifie son contenu """
  41. subject = Path(subject)
  42. if subject.isfile():
  43. with TempDir() as dirname:
  44. zip_ref = zipfile.ZipFile(subject, 'r')
  45. zip_ref.extractall(dirname)
  46. zip_ref.close()
  47. if Path(dirname / subject.stem).isdir(): # cas où l'archive contient un dossier qui lui-même contient les fichiers
  48. dirname /= subject.stem
  49. return cls._submit_folder(dirname)
  50. elif subject.isdir():
  51. return cls._submit_folder(subject)
  52. else:
  53. raise FileNotFoundError(f"Impossible de trouver le fichier ou répertoire: {subject}")
  54. @classmethod
  55. def _submit_folder(cls, folder):
  56. dataset = {}
  57. report = ValidationReport("Contrôle des données de {} au format {}".format(folder.name, cls.name))
  58. # Charge les données en mémoire
  59. for filename, model in cls.files.items():
  60. path_ = Path(folder) / filename
  61. if not path_.isfile():
  62. report.errors[MissingFile] = MissingFile("Le fichier '{}' est manquant".format(filename))
  63. continue
  64. dataset[model] = cls._load_file(model, path_)
  65. # Controle la structure des données (champs, formats et types)
  66. cls._structure_validation(dataset)
  67. # Contrôle la géométrie (optionnel)
  68. cls._geometry_validation(dataset)
  69. # Validation technique
  70. cls._technical_validation(dataset)
  71. return report
  72. @classmethod
  73. def _load_file(cls, model, filename):
  74. """ Charge les données du fichier et les associe à un modèle.
  75. Attention: pas de contrôle de format o de validité à ce niveau! """
  76. raise NotImplementedError()
  77. @classmethod
  78. def _structure_validation(cls, dataset):
  79. errors = {}
  80. errors[FormatError] = []
  81. for model in dataset:
  82. v = Validator(model.schema)
  83. for item in dataset[model]:
  84. v.validate(item.__dict__)
  85. for fieldname, verrors in v.errors.items():
  86. for err in verrors:
  87. errors[FormatError].append(FormatError("{}: {}".format(fieldname, err)))
  88. return errors
  89. @classmethod
  90. def _geometry_validation(cls, dataset):
  91. pass
  92. @classmethod
  93. def _organize_dataset(cls, dataset):
  94. raise NotImplementedError()
  95. @classmethod
  96. def _technical_validation(cls, dataset):
  97. raise NotImplementedError()