validation.py 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249
  1. '''
  2. @author: olivier.massot, sept. 2018
  3. '''
  4. import time
  5. import zipfile
  6. from path import Path, TempDir
  7. from core import gis
  8. from core.cerberus_extend import CerberusErrorHandler, GeoValidator, \
  9. _translate_messages
  10. from schemas.common import SRID
  11. class BaseModel():
  12. filename = ""
  13. pk = ""
  14. schema = {}
  15. def __init__(self, **kwargs):
  16. self.__dict__.update(kwargs)
  17. class BaseGeoModel(BaseModel):
  18. def __init__(self, geom, **kwargs):
  19. super(BaseGeoModel, self).__init__(**kwargs)
  20. self.geom = geom
  21. class ValidatorInterruption(BaseException):
  22. pass
  23. class Checkpoint():
  24. def __init__(self, name, valid=True):
  25. self.name = name
  26. self.valid = valid
  27. VALIDATION_ERROR_LEVELS = {10: "MINEURE", 20: "AVERTISSEMENT", 30: "ERREUR", 40: "CRITIQUE"}
  28. MINOR = 10
  29. WARNING = 20
  30. ERROR = 30
  31. CRITICAL = 40
  32. class BaseValidationError():
  33. name = "Erreur"
  34. level = ERROR
  35. help = ""
  36. def __init__(self, message, filename="", field=""):
  37. self.message = message
  38. self.filename = filename
  39. self.field = field
  40. def __repr__(self):
  41. return " - ".join(filter(None, [self.name, self.filename, self.field, self.message]))
  42. # Erreurs dans le chargement des fichiers
  43. class MissingFile(BaseValidationError):
  44. level = CRITICAL
  45. name = "Fichier Manquant"
  46. class UnreadableFile(BaseValidationError):
  47. level = CRITICAL
  48. name = "Fichier Illisible"
  49. class WrongSrid(BaseValidationError):
  50. level = CRITICAL
  51. name = "Mauvais SRID"
  52. ### Erreurs dans la structure des données
  53. class DataError(BaseValidationError):
  54. name = "Erreur de format"
  55. level = ERROR
  56. # level = CRITICAL
  57. # Erreurs dans le contenu, erreurs métiers
  58. class TechnicalValidationError(BaseValidationError):
  59. level = ERROR
  60. class DuplicatedPk(TechnicalValidationError):
  61. name = "Doublons dans le champs"
  62. class RelationError(TechnicalValidationError):
  63. level = CRITICAL
  64. name = "Un objet lié n'existe pas"
  65. class DuplicatedGeom(TechnicalValidationError):
  66. name = "Doublon graphique"
  67. class MissingItem(TechnicalValidationError):
  68. name = "Elément manquant"
  69. class DimensionError(TechnicalValidationError):
  70. name = "Elément manquant"
  71. class BaseValidator():
  72. schema_name = ""
  73. models = {}
  74. dataset = {}
  75. def __init__(self):
  76. self.valid = True
  77. self.checkpoints = []
  78. self.errors = []
  79. self.dt = 0
  80. def checkpoint(self, title):
  81. self.checkpoints.append(Checkpoint(title, (not self.errors)))
  82. if self.errors:
  83. self.valid = False
  84. if self.critical_happened():
  85. raise ValidatorInterruption()
  86. def critical_happened(self):
  87. return any([err.level == CRITICAL for err in self.errors])
  88. def log_error(self, validation_error):
  89. self.errors.append(validation_error)
  90. @classmethod
  91. def submit(cls, subject):
  92. """ prends un dossier ou une archive en entrée et vérifie son contenu """
  93. subject = Path(subject)
  94. if subject.isfile():
  95. with TempDir() as dirname:
  96. zip_ref = zipfile.ZipFile(subject, 'r')
  97. zip_ref.extractall(dirname)
  98. zip_ref.close()
  99. if Path(dirname / subject.stem).isdir(): # cas où l'archive contient un dossier qui lui-même contient les fichiers
  100. dirname /= subject.stem
  101. return cls._submit_folder(dirname)
  102. elif subject.isdir():
  103. return cls._submit_folder(subject)
  104. else:
  105. raise FileNotFoundError(f"Impossible de trouver le fichier ou répertoire: {subject}")
  106. @classmethod
  107. def _submit_folder(cls, folder):
  108. validator = cls()
  109. t0 = time.time()
  110. try:
  111. validator.validate(folder)
  112. except ValidatorInterruption:
  113. pass
  114. validator.dt = time.time() - t0
  115. report = validator.build_report(validator.schema_name, folder.name)
  116. return report
  117. def validate(self, folder):
  118. # Chargement des données en mémoire
  119. self._load_files(folder)
  120. self.checkpoint("Chargement des données")
  121. # Controle la structure des données (champs, formats et types)
  122. self._structure_validation()
  123. self.checkpoint("Contrôle de la structure des données")
  124. # Validation technique
  125. try:
  126. self._technical_validation()
  127. self.checkpoint("Validation Métier")
  128. except:
  129. self.checkpoint("Validation Métier [interrompu]")
  130. def _load_files(self, folder):
  131. """ Charge les données du fichier et les associe à un modèle.
  132. Attention: pas de contrôle de format ou de validité à ce niveau! """
  133. raise NotImplementedError()
  134. def _structure_validation(self):
  135. for model in self.models:
  136. v = GeoValidator(model.schema, purge_unknown=True, error_handler=CerberusErrorHandler, require_all=True)
  137. for item in self.dataset[model]:
  138. v.validate(item.__dict__)
  139. for field, verrors in v.errors.items():
  140. for err in verrors:
  141. self.log_error(DataError(_translate_messages(err), filename=model.filename, field=field))
  142. @classmethod
  143. def _technical_validation(cls):
  144. raise NotImplementedError()
  145. def build_report(self, schema, filename):
  146. report = {}
  147. report["schema"] = schema
  148. report["filename"] = filename
  149. report["exec_time"] = "{:.3g} s.".format(self.dt)
  150. report["checkpoints"] = [{"name": chk.name, "valid": chk.valid} for chk in self.checkpoints]
  151. report["errors"] = {}
  152. for err in self.errors:
  153. if not err.name in report["errors"]:
  154. report["errors"][err.name] = {"help": err.help, "list": []}
  155. err_report = {"filename": err.filename or "-",
  156. "field": err.field or "-",
  157. "message": err.message}
  158. if err_report not in report["errors"][err.name]["list"]:
  159. report["errors"][err.name]["list"].append(err_report)
  160. return report
  161. class NetgeoValidator(BaseValidator):
  162. def _load_files(self, folder):
  163. for model in self.models:
  164. filename = model.filename
  165. path_ = Path(folder) / filename
  166. if not path_.isfile():
  167. self.log_error(MissingFile("Fichier manquant: '{}'".format(filename)))
  168. continue
  169. self.dataset[model] = []
  170. try:
  171. with gis.ShapeFile(path_, srid=SRID) as sf:
  172. fields = sf.fields()
  173. for record in sf.records():
  174. data = dict(zip(fields, record.record))
  175. item = model(record.shape, **data)
  176. self.dataset[model].append(item)
  177. except gis.ShapeError as e:
  178. self.log_error(UnreadableFile(str(e)))
  179. except gis.SridError:
  180. self.log_error(WrongSrid(str(e)))