validation.py 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286
  1. '''
  2. @author: olivier.massot, sept. 2018
  3. '''
  4. import time
  5. import zipfile
  6. from path import Path, TempDir
  7. from core import gis_
  8. from core.cerberus_extend import CerberusErrorHandler, GeoValidator, \
  9. _translate_messages, ExtendedValidator
  10. from schemas.common import SRID
  11. class BaseModel():
  12. filename = ""
  13. pk = ""
  14. schema = {}
  15. def __init__(self, **kwargs):
  16. self.__dict__.update(kwargs)
  17. class BaseGeoModel(gis_.Feature):
  18. filename = ""
  19. pk = ""
  20. geom_type = 0
  21. bounding_box = (0,0,1,1)
  22. schema = {}
  23. def __init__(self, feature):
  24. self.__dict__.update(feature.__dict__)
  25. class ValidatorInterruption(BaseException):
  26. pass
  27. class Checkpoint():
  28. def __init__(self, name, valid=True):
  29. self.name = name
  30. self.valid = valid
  31. VALIDATION_ERROR_LEVELS = {10: "MINEURE", 20: "AVERTISSEMENT", 30: "ERREUR", 40: "CRITIQUE"}
  32. MINOR = 10
  33. WARNING = 20
  34. ERROR = 30
  35. CRITICAL = 40
  36. class BaseValidationError():
  37. name = "Erreur"
  38. level = ERROR
  39. help = ""
  40. def __init__(self, message, filename="", field=""):
  41. self.message = message
  42. self.filename = filename
  43. self.field = field
  44. def __repr__(self):
  45. return " - ".join(filter(None, [self.name, self.filename, self.field, self.message]))
  46. # Erreurs dans le chargement des fichiers
  47. class MissingFile(BaseValidationError):
  48. level = CRITICAL
  49. name = "Fichier Manquant"
  50. class UnreadableFile(BaseValidationError):
  51. level = CRITICAL
  52. name = "Fichier Illisible"
  53. class WrongSrid(BaseValidationError):
  54. level = CRITICAL
  55. name = "Mauvais SRID"
  56. ### Erreurs dans la structure des données
  57. class DataError(BaseValidationError):
  58. name = "Erreur de format"
  59. level = ERROR
  60. # level = CRITICAL
  61. class PositionError(BaseValidationError):
  62. name = "Position hors de la zone autorisée"
  63. level = CRITICAL
  64. class GeomTypeError(BaseValidationError):
  65. name = "Type de géométrie invalide"
  66. level = CRITICAL
  67. class InvalidGeometry(BaseValidationError):
  68. name = "Géométrie invalide"
  69. # Erreurs dans le contenu, erreurs métiers
  70. class TechnicalValidationError(BaseValidationError):
  71. level = ERROR
  72. class DuplicatedPk(TechnicalValidationError):
  73. name = "Doublons dans le champs"
  74. class RelationError(TechnicalValidationError):
  75. level = CRITICAL
  76. name = "Un objet lié n'existe pas"
  77. class DuplicatedGeom(TechnicalValidationError):
  78. name = "Doublon graphique"
  79. class MissingItem(TechnicalValidationError):
  80. name = "Elément manquant"
  81. class DimensionError(TechnicalValidationError):
  82. name = "Elément manquant"
  83. class BaseValidator():
  84. schema_name = ""
  85. models = {}
  86. dataset = {}
  87. def __init__(self):
  88. self.valid = True
  89. self.checkpoints = []
  90. self.errors = []
  91. self.dt = 0
  92. def checkpoint(self, title):
  93. self.checkpoints.append(Checkpoint(title, (not self.errors)))
  94. if self.errors:
  95. self.valid = False
  96. if self.critical_happened():
  97. raise ValidatorInterruption()
  98. def critical_happened(self):
  99. return any([err.level == CRITICAL for err in self.errors])
  100. def log_error(self, validation_error):
  101. self.errors.append(validation_error)
  102. @classmethod
  103. def submit(cls, subject):
  104. """ prends un dossier ou une archive en entrée et vérifie son contenu """
  105. subject = Path(subject)
  106. if subject.isfile():
  107. with TempDir() as dirname:
  108. zip_ref = zipfile.ZipFile(subject, 'r')
  109. zip_ref.extractall(dirname)
  110. zip_ref.close()
  111. if Path(dirname / subject.stem).isdir(): # cas où l'archive contient un dossier qui lui-même contient les fichiers
  112. dirname /= subject.stem
  113. return cls._submit_folder(dirname)
  114. elif subject.isdir():
  115. return cls._submit_folder(subject)
  116. else:
  117. raise FileNotFoundError(f"Impossible de trouver le fichier ou répertoire: {subject}")
  118. @classmethod
  119. def _submit_folder(cls, folder):
  120. validator = cls()
  121. t0 = time.time()
  122. try:
  123. validator.validate(folder)
  124. except ValidatorInterruption:
  125. pass
  126. validator.dt = time.time() - t0
  127. report = validator.build_report(validator.schema_name, folder.name)
  128. return report
  129. def validate(self, folder):
  130. # Chargement des données en mémoire
  131. self._load_files(folder)
  132. self.checkpoint("Chargement des données")
  133. # Controle la structure des données (champs, formats et types)
  134. self._structure_validation()
  135. self.checkpoint("Contrôle de la structure des données")
  136. # Validation technique
  137. try:
  138. self._technical_validation()
  139. self.checkpoint("Validation Métier")
  140. except:
  141. self.checkpoint("Validation Métier [interrompu]")
  142. def _load_files(self, folder):
  143. """ Charge les données du fichier et les associe à un modèle.
  144. Attention: pas de contrôle de format ou de validité à ce niveau! """
  145. raise NotImplementedError()
  146. def _structure_validation(self):
  147. for model in self.models:
  148. v = ExtendedValidator(model.schema, purge_unknown=True, error_handler=CerberusErrorHandler, require_all=True)
  149. for item in self.dataset[model]:
  150. v.validate(item.__dict__)
  151. for field, verrors in v.errors.items():
  152. for err in verrors:
  153. self.log_error(DataError(_translate_messages(err), filename=model.filename, field=field))
  154. @classmethod
  155. def _technical_validation(cls):
  156. raise NotImplementedError()
  157. def build_report(self, schema, filename):
  158. report = {}
  159. report["schema"] = schema
  160. report["filename"] = filename
  161. report["exec_time"] = "{:.3g} s.".format(self.dt)
  162. report["checkpoints"] = [{"name": chk.name, "valid": chk.valid} for chk in self.checkpoints]
  163. report["errors"] = {}
  164. for err in self.errors:
  165. if not err.name in report["errors"]:
  166. report["errors"][err.name] = {"help": err.help, "list": []}
  167. err_report = {"filename": err.filename or "-",
  168. "field": err.field or "-",
  169. "message": err.message}
  170. if err_report not in report["errors"][err.name]["list"]:
  171. report["errors"][err.name]["list"].append(err_report)
  172. return report
  173. class NetgeoValidator(BaseValidator):
  174. def _load_files(self, folder):
  175. for model in self.models:
  176. filename = model.filename
  177. path_ = Path(folder) / filename
  178. if not path_.isfile():
  179. self.log_error(MissingFile("Fichier manquant: '{}'".format(filename)))
  180. continue
  181. self.dataset[model] = []
  182. try:
  183. ds = gis_.Datasource(path_)
  184. layer = ds.layer
  185. if layer.srid != SRID:
  186. self.log_error(WrongSrid("Mauvaise projection: {} (attendu: {})".format(layer.srid, SRID)))
  187. for feature in layer:
  188. item = model(feature)
  189. self.dataset[model].append(item)
  190. except IOError:
  191. self.log_error(UnreadableFile("Fichier illisible: {}".format(path_.name)))
  192. def _structure_validation(self):
  193. for model in self.models:
  194. v = ExtendedValidator(model.schema, purge_unknown=True, error_handler=CerberusErrorHandler, require_all=True)
  195. xmin, ymin, xmax, ymax = model.bounding_box
  196. for item in self.dataset[model]:
  197. # geom type
  198. if item.geom_type != model.geom_type:
  199. self.log_error(GeomTypeError("Type de géométrie invalide: {} (attendu: {})".format(item.geom_name, gis_.GEOM_NAMES[model.geom_type]), filename=model.filename, field="geom"))
  200. # bounding box
  201. x1, y1, x2, y2 = item.bounding_box
  202. if any(x < xmin or x > xmax for x in (x1, x2)) or \
  203. any(y < ymin or y > ymax for y in (y1, y2)):
  204. self.log_error(PositionError("Situé hors de l'emprise autorisée", filename=model.filename, field="geom"))
  205. v.validate(item.__dict__)
  206. for field, verrors in v.errors.items():
  207. for err in verrors:
  208. self.log_error(DataError(_translate_messages(err), filename=model.filename, field=field))