validation.py 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231
  1. '''
  2. @author: olivier.massot, sept. 2018
  3. '''
  4. import time
  5. import zipfile
  6. from path import Path, TempDir
  7. from core import gis
  8. from core.cerberus_extend import CerberusErrorHandler, GeoValidator
  9. from schemas.common import SRID
  10. class BaseModel():
  11. filename = ""
  12. pk = ""
  13. schema = {}
  14. def __init__(self, **kwargs):
  15. self.__dict__.update(kwargs)
  16. class BaseGeoModel(BaseModel):
  17. def __init__(self, geom, **kwargs):
  18. super(BaseGeoModel, self).__init__(**kwargs)
  19. self.geom = geom
  20. class ValidatorInterruption(BaseException):
  21. pass
  22. class Checkpoint():
  23. def __init__(self, name, valid=True):
  24. self.name = name
  25. self.valid = valid
  26. VALIDATION_ERROR_LEVELS = {10: "MINEURE", 20: "AVERTISSEMENT", 30: "ERREUR", 40: "CRITIQUE"}
  27. MINOR = 10
  28. WARNING = 20
  29. ERROR = 30
  30. CRITICAL = 40
  31. class BaseValidationError():
  32. name = "Erreur"
  33. level = ERROR
  34. help = ""
  35. def __init__(self, message, filename="", field=""):
  36. self.message = message
  37. self.filename = filename
  38. self.field = field
  39. def __repr__(self):
  40. return " - ".join(filter(None, [self.name, self.filename, self.field, self.message]))
  41. # Erreurs dans le chargement des fichiers
  42. class MissingFile(BaseValidationError):
  43. level = CRITICAL
  44. name = "Fichier Manquant"
  45. class UnreadableFile(BaseValidationError):
  46. level = CRITICAL
  47. name = "Fichier Illisible"
  48. class WrongSrid(BaseValidationError):
  49. level = CRITICAL
  50. name = "Mauvais SRID"
  51. ### Erreurs dans la structure des données
  52. class DataError(BaseValidationError):
  53. name = "Erreur de format"
  54. level = ERROR
  55. # Erreurs dans le contenu, erreurs métiers
  56. class TechnicalValidationError(BaseValidationError):
  57. level = ERROR
  58. class DuplicatedPk(TechnicalValidationError):
  59. name = "Doublons dans le champs"
  60. class RelationError(TechnicalValidationError):
  61. name = "Un objet lié n'existe pas"
  62. class BaseValidator():
  63. schema_name = ""
  64. models = {}
  65. dataset = {}
  66. def __init__(self):
  67. self.valid = True
  68. self.checkpoints = []
  69. self.errors = []
  70. self.dt = 0
  71. def checkpoint(self, name):
  72. valid = (len(self.errors) == 0)
  73. self.checkpoints.append(Checkpoint(name, valid))
  74. if not valid:
  75. self.valid = False
  76. raise ValidatorInterruption()
  77. def log_error(self, validation_error):
  78. self.errors.append(validation_error)
  79. @classmethod
  80. def submit(cls, subject):
  81. """ prends un dossier ou une archive en entrée et vérifie son contenu """
  82. subject = Path(subject)
  83. if subject.isfile():
  84. with TempDir() as dirname:
  85. zip_ref = zipfile.ZipFile(subject, 'r')
  86. zip_ref.extractall(dirname)
  87. zip_ref.close()
  88. if Path(dirname / subject.stem).isdir(): # cas où l'archive contient un dossier qui lui-même contient les fichiers
  89. dirname /= subject.stem
  90. return cls._submit_folder(dirname)
  91. elif subject.isdir():
  92. return cls._submit_folder(subject)
  93. else:
  94. raise FileNotFoundError(f"Impossible de trouver le fichier ou répertoire: {subject}")
  95. @classmethod
  96. def _submit_folder(cls, folder):
  97. validator = cls()
  98. t0 = time.time()
  99. try:
  100. validator.validate(folder)
  101. except ValidatorInterruption:
  102. pass
  103. validator.dt = time.time() - t0
  104. report = validator.build_report(validator.schema_name, folder.name)
  105. return report
  106. def validate(self, folder):
  107. # Chargement des données en mémoire
  108. self._load_files(folder)
  109. self.checkpoint("Chargement des données")
  110. # Controle la structure des données (champs, formats et types)
  111. self._structure_validation()
  112. self.checkpoint("Contrôle de la structure des données")
  113. # Validation technique
  114. self._technical_validation()
  115. self.checkpoint("Validation Métier")
  116. def _load_files(self, folder):
  117. """ Charge les données du fichier et les associe à un modèle.
  118. Attention: pas de contrôle de format ou de validité à ce niveau! """
  119. raise NotImplementedError()
  120. def _structure_validation(self):
  121. for model in self.models:
  122. v = GeoValidator(model.schema, error_handler=CerberusErrorHandler)
  123. for item in self.dataset[model]:
  124. v.validate(item.__dict__)
  125. for field, verrors in v.errors.items():
  126. for err in verrors:
  127. self.log_error(DataError(err, filename=model.filename, field=field))
  128. @classmethod
  129. def _technical_validation(cls):
  130. raise NotImplementedError()
  131. def build_report(self, schema, filename):
  132. report = {}
  133. report["schema"] = schema
  134. report["filename"] = filename
  135. report["exec_time"] = "{} s.".format(self.dt)
  136. report["checkpoints"] = [{"name": chk.name, "valid": chk.valid} for chk in self.checkpoints]
  137. report["errors"] = {}
  138. for err in self.errors:
  139. if not err.name in report["errors"]:
  140. report["errors"][err.name] = {"help": err.help, "list": []}
  141. err_report = {"filename": err.filename or "-",
  142. "field": err.field or "-",
  143. "message": err.message}
  144. if err_report not in report["errors"][err.name]["list"]:
  145. report["errors"][err.name]["list"].append(err_report)
  146. return report
  147. class NetgeoValidator(BaseValidator):
  148. def _load_files(self, folder):
  149. for model in self.models:
  150. filename = model.filename
  151. path_ = Path(folder) / filename
  152. if not path_.isfile():
  153. self.log_error(MissingFile("Fichier manquant: '{}'".format(filename)))
  154. continue
  155. self.dataset[model] = []
  156. try:
  157. with gis.ShapeFile(path_, srid=SRID) as sf:
  158. fields = sf.fields()
  159. for record in sf.records():
  160. data = dict(zip(fields, record.record))
  161. item = model(record.shape, **data)
  162. self.dataset[model].append(item)
  163. except gis.ShapeError as e:
  164. self.log_error(UnreadableFile(str(e)))
  165. except gis.SridError:
  166. self.log_error(WrongSrid(str(e)))