validation.py 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316
  1. '''
  2. @author: olivier.massot, sept. 2018
  3. '''
  4. import time
  5. import zipfile
  6. from path import Path, TempDir
  7. from core import gis_
  8. from core.cerberus_extend import CerberusErrorHandler, \
  9. _translate_messages, ExtendedValidator
  10. from schemas.common import SRID
  11. class ValidatorInterruption(BaseException):
  12. pass
  13. class Checkpoint():
  14. def __init__(self, name, valid=True):
  15. self.name = name
  16. self.valid = valid
  17. ########### MODELES ################
  18. class BaseModel():
  19. filename = ""
  20. pk = ""
  21. schema = {}
  22. def __init__(self, **kwargs):
  23. self.__dict__.update(kwargs)
  24. class BaseGeoModel(gis_.Feature):
  25. filename = ""
  26. pk = ""
  27. geom_type = 0
  28. bounding_box = (0,0,1,1)
  29. schema = {}
  30. def __init__(self, feature):
  31. self.__dict__.update(feature.__dict__)
  32. ########### ERREURS DE VALIDATION ################
  33. VALIDATION_ERROR_LEVELS = {10: "MINEURE", 20: "AVERTISSEMENT", 30: "ERREUR", 40: "CRITIQUE"}
  34. MINOR = 10
  35. WARNING = 20
  36. ERROR = 30
  37. CRITICAL = 40
  38. class BaseValidationError():
  39. order_ = 0
  40. name = "Erreur"
  41. level = ERROR
  42. help = ""
  43. def __init__(self, message, filename="", field=""):
  44. self.message = message
  45. self.filename = filename
  46. self.field = field
  47. def __repr__(self):
  48. return " - ".join(filter(None, [self.name, self.filename, self.field, self.message]))
  49. # Erreurs dans le chargement des fichiers
  50. class InputError(BaseValidationError):
  51. order_ = 0
  52. level = CRITICAL
  53. name = "Erreur de chargement"
  54. class MissingFile(InputError):
  55. order_ = 1
  56. name = "Fichier Manquant"
  57. class UnreadableFile(InputError):
  58. order_ = 2
  59. name = "Fichier Illisible"
  60. class WrongSrid(InputError):
  61. order_ = 3
  62. name = "Mauvais SRID"
  63. ### Erreurs dans la structure des données
  64. class StructureError(BaseValidationError):
  65. order_ = 10
  66. name = "Erreur de structure"
  67. level = ERROR
  68. class GeomTypeError(StructureError):
  69. order_ = 12
  70. name = "Type de géométrie invalide"
  71. level = CRITICAL
  72. class BoundingBoxError(StructureError):
  73. order_ = 11
  74. name = "Coordonnées hors de la zone autorisée"
  75. class InvalidGeometry(StructureError):
  76. order_ = 13
  77. name = "Géométrie invalide"
  78. class DataError(StructureError):
  79. order_ = 14
  80. name = "Erreur de format"
  81. # Erreurs dans le contenu, erreurs métiers
  82. class TechnicalValidationError(BaseValidationError):
  83. order_ = 20
  84. level = ERROR
  85. name = "Erreur technique"
  86. class UniqueError(TechnicalValidationError):
  87. order_ = 21
  88. name = "Doublons dans le champs"
  89. class RelationError(TechnicalValidationError):
  90. order_ = 22
  91. level = CRITICAL
  92. name = "Un objet lié n'existe pas"
  93. class DuplicatedGeom(TechnicalValidationError):
  94. order_ = 23
  95. name = "Doublon graphique"
  96. class MissingItem(TechnicalValidationError):
  97. order_ = 24
  98. name = "Elément manquant"
  99. class DimensionError(TechnicalValidationError):
  100. order_ = 25
  101. name = "Elément de dimension"
  102. class PositionError(TechnicalValidationError):
  103. order_ = 26
  104. name = "Erreur de positionnement"
  105. ########### VALIDATION ################
  106. class BaseValidator():
  107. schema_name = ""
  108. models = {}
  109. dataset = {}
  110. def __init__(self):
  111. self.valid = True
  112. self.checkpoints = []
  113. self.errors = []
  114. self._current_checkpoint_valid = True
  115. self.dt = 0
  116. def checkpoint(self, title):
  117. self.checkpoints.append(Checkpoint(title, self._current_checkpoint_valid))
  118. self._current_checkpoint_valid = True
  119. if self.errors:
  120. self.valid = False
  121. if self.critical_happened():
  122. raise ValidatorInterruption()
  123. def critical_happened(self):
  124. return any([err.level == CRITICAL for err in self.errors])
  125. def log_error(self, validation_error):
  126. self._current_checkpoint_valid = False
  127. self.errors.append(validation_error)
  128. @classmethod
  129. def submit(cls, subject):
  130. """ prends un dossier ou une archive en entrée et vérifie son contenu """
  131. subject = Path(subject)
  132. if subject.isfile():
  133. with TempDir() as dirname:
  134. zip_ref = zipfile.ZipFile(subject, 'r')
  135. zip_ref.extractall(dirname)
  136. zip_ref.close()
  137. if Path(dirname / subject.stem).isdir(): # cas où l'archive contient un dossier qui lui-même contient les fichiers
  138. dirname /= subject.stem
  139. return cls._submit_folder(dirname)
  140. elif subject.isdir():
  141. return cls._submit_folder(subject)
  142. else:
  143. raise FileNotFoundError(f"Impossible de trouver le fichier ou répertoire: {subject}")
  144. @classmethod
  145. def _submit_folder(cls, folder):
  146. validator = cls()
  147. t0 = time.time()
  148. try:
  149. validator.validate(folder)
  150. except ValidatorInterruption:
  151. pass
  152. validator.dt = time.time() - t0
  153. report = validator.build_report(validator.schema_name, folder.name)
  154. return report
  155. def validate(self, folder):
  156. # Chargement des données en mémoire
  157. self._load_files(folder)
  158. self.checkpoint("Chargement des données")
  159. # Controle la structure des données (champs, formats et types)
  160. self._structure_validation()
  161. self.checkpoint("Contrôle de la structure des données")
  162. # Validation technique
  163. # try:
  164. self._technical_validation()
  165. self.checkpoint("Validation Métier")
  166. # except:
  167. # self.checkpoint("Validation Métier [interrompu]")
  168. def _load_files(self, folder):
  169. """ Charge les données du fichier et les associe à un modèle.
  170. Attention: pas de contrôle de format ou de validité à ce niveau! """
  171. raise NotImplementedError()
  172. def _structure_validation(self):
  173. for model in self.models:
  174. v = ExtendedValidator(model.schema, purge_unknown=True, error_handler=CerberusErrorHandler, require_all=True)
  175. for item in self.dataset[model]:
  176. v.validate(item.__dict__)
  177. for field, verrors in v.errors.items():
  178. for err in verrors:
  179. self.log_error(DataError(_translate_messages(err), filename=model.filename, field=field))
  180. @classmethod
  181. def _technical_validation(cls):
  182. raise NotImplementedError()
  183. def build_report(self, schema, filename):
  184. report = {}
  185. report["schema"] = schema
  186. report["filename"] = filename
  187. report["exec_time"] = "{:.3g} s.".format(self.dt)
  188. report["checkpoints"] = [{"name": chk.name, "valid": chk.valid} for chk in self.checkpoints]
  189. report["errors"] = {}
  190. for err in self.errors:
  191. if not err.name in report["errors"]:
  192. report["errors"][err.name] = {"help": err.help, "order_": err.order_, "list": []}
  193. err_report = {"filename": err.filename or "-",
  194. "field": err.field or "-",
  195. "message": err.message}
  196. if err_report not in report["errors"][err.name]["list"]:
  197. report["errors"][err.name]["list"].append(err_report)
  198. return report
  199. class NetgeoValidator(BaseValidator):
  200. def _load_files(self, folder):
  201. for model in self.models:
  202. filename = model.filename
  203. path_ = Path(folder) / filename
  204. if not path_.isfile():
  205. self.log_error(MissingFile("Fichier manquant: '{}'".format(filename)))
  206. continue
  207. self.dataset[model] = []
  208. try:
  209. ds = gis_.Datasource(path_)
  210. layer = ds.layer
  211. if layer.srid != SRID:
  212. self.log_error(WrongSrid("Mauvaise projection: {} (attendu: {})".format(layer.srid, SRID)))
  213. for feature in layer:
  214. item = model(feature)
  215. self.dataset[model].append(item)
  216. except IOError:
  217. self.log_error(UnreadableFile("Fichier illisible: {}".format(path_.name)))
  218. def _structure_validation(self):
  219. for model in self.models:
  220. v = ExtendedValidator(model.schema, purge_unknown=True, error_handler=CerberusErrorHandler, require_all=True)
  221. xmin, ymin, xmax, ymax = model.bounding_box
  222. for item in self.dataset[model]:
  223. # geom type
  224. if item.geom_type != model.geom_type:
  225. self.log_error(GeomTypeError("Type de géométrie invalide: {} (attendu: {})".format(item.geom_name, gis_.GEOM_NAMES[model.geom_type]), filename=model.filename, field="geom"))
  226. # bounding box
  227. x1, y1, x2, y2 = item.bounding_box
  228. if any(x < xmin or x > xmax for x in (x1, x2)) or \
  229. any(y < ymin or y > ymax for y in (y1, y2)):
  230. self.log_error(BoundingBoxError("Situé hors de l'emprise autorisée", filename=model.filename, field="geom"))
  231. v.validate(item.__dict__)
  232. for field, verrors in v.errors.items():
  233. for err in verrors:
  234. self.log_error(DataError(_translate_messages(err), filename=model.filename, field=field))