validation.py 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188
  1. '''
  2. @author: olivier.massot, sept. 2018
  3. '''
  4. import time
  5. import zipfile
  6. from cerberus.validator import Validator
  7. from path import Path, TempDir
  8. from core import gis
  9. from core.cerberus_extend import CerberusErrorHandler, GeoValidator
  10. from core.validation_errors import MissingFile, \
  11. UnreadableFile, FormatError, WrongSrid
  12. from schemas.common import SRID
  13. class BaseModel():
  14. filename = ""
  15. pk = ""
  16. schema = {}
  17. def __init__(self, **kwargs):
  18. self.__dict__.update(kwargs)
  19. @classmethod
  20. def index_item(cls, instance):
  21. if getattr(instance, cls.pk) in cls.index:
  22. raise ValueError("Duplicate PK")
  23. cls.index[getattr(instance, cls.pk)] = instance
  24. class BaseGeoModel(BaseModel):
  25. def __init__(self, geom, **kwargs):
  26. super(BaseGeoModel, self).__init__(**kwargs)
  27. self.geom = geom
  28. class ValidatorInterruption(BaseException):
  29. pass
  30. class Checkpoint():
  31. def __init__(self, name, valid=True):
  32. self.name = name
  33. self.valid = valid
  34. class BaseValidator():
  35. schema_name = ""
  36. models = {}
  37. dataset = {}
  38. def __init__(self):
  39. self.valid = True
  40. self.checkpoints = []
  41. self.errors = []
  42. self.dt = 0
  43. def checkpoint(self, name):
  44. valid = (len(self.errors) == 0)
  45. self.checkpoints.append(Checkpoint(name, valid))
  46. if not valid:
  47. self.valid = False
  48. raise ValidatorInterruption()
  49. def log_error(self, validation_error):
  50. self.errors.append(validation_error)
  51. @classmethod
  52. def submit(cls, subject):
  53. """ prends un dossier ou une archive en entrée et vérifie son contenu """
  54. subject = Path(subject)
  55. if subject.isfile():
  56. with TempDir() as dirname:
  57. zip_ref = zipfile.ZipFile(subject, 'r')
  58. zip_ref.extractall(dirname)
  59. zip_ref.close()
  60. if Path(dirname / subject.stem).isdir(): # cas où l'archive contient un dossier qui lui-même contient les fichiers
  61. dirname /= subject.stem
  62. return cls._submit_folder(dirname)
  63. elif subject.isdir():
  64. return cls._submit_folder(subject)
  65. else:
  66. raise FileNotFoundError(f"Impossible de trouver le fichier ou répertoire: {subject}")
  67. @classmethod
  68. def _submit_folder(cls, folder):
  69. validator = cls()
  70. t0 = time.time()
  71. try:
  72. validator.validate(folder)
  73. except ValidatorInterruption:
  74. pass
  75. validator.dt = time.time() - t0
  76. report = validator.build_report("{} - Validation de {}".format(validator.schema_name, folder.name))
  77. return report
  78. def validate(self, folder):
  79. # Chargement des données en mémoire
  80. self._load_files(folder)
  81. self.checkpoint("Chargement des données")
  82. # Controle la structure des données (champs, formats et types)
  83. self._structure_validation()
  84. self.checkpoint("Contrôle de la structure des données")
  85. # Validation technique
  86. self._technical_validation()
  87. self.checkpoint("Validation Métier")
  88. def _load_files(self, folder):
  89. """ Charge les données du fichier et les associe à un modèle.
  90. Attention: pas de contrôle de format ou de validité à ce niveau! """
  91. raise NotImplementedError()
  92. def _structure_validation(self):
  93. for model in self.models:
  94. v = GeoValidator(model.schema, error_handler=CerberusErrorHandler)
  95. for item in self.dataset[model]:
  96. v.validate(item.__dict__)
  97. for field, verrors in v.errors.items():
  98. for err in verrors:
  99. self.log_error(FormatError(err, filename=model.filename, field=field))
  100. @classmethod
  101. def _technical_validation(cls):
  102. raise NotImplementedError()
  103. def build_report(self, title):
  104. report = {}
  105. report["title"] = title
  106. report["exec_time"] = "{} s.".format(self.dt)
  107. report["checkpoints"] = [{"name": chk.name, "valid": chk.valid} for chk in self.checkpoints]
  108. report["errors"] = {}
  109. for err in self.errors:
  110. if not err.name in report["errors"]:
  111. report["errors"][err.name] = {"help": err.help, "list": []}
  112. err_report = {"filename": err.filename or "-",
  113. "field": err.field or "-",
  114. "message": err.message}
  115. if err_report not in report["errors"][err.name]["list"]:
  116. report["errors"][err.name]["list"].append(err_report)
  117. return report
  118. class NetgeoValidator(BaseValidator):
  119. def _load_files(self, folder):
  120. for model in self.models:
  121. filename = model.filename
  122. path_ = Path(folder) / filename
  123. if not path_.isfile():
  124. self.log_error(MissingFile("Fichier manquant: '{}'".format(filename)))
  125. continue
  126. self.dataset[model] = []
  127. try:
  128. with gis.ShapeFile(path_, srid=SRID) as sf:
  129. fields = sf.fields()
  130. for record in sf.records():
  131. data = dict(zip(fields, record.record))
  132. item = model(record.shape, **data)
  133. self.dataset[model].append(item)
  134. except gis.ShapeError as e:
  135. self.log_error(UnreadableFile(str(e)))
  136. except gis.SridError:
  137. self.log_error(WrongSrid(str(e)))