validation.py 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311
  1. '''
  2. @author: olivier.massot, sept. 2018
  3. '''
  4. import time
  5. import zipfile
  6. from path import Path, TempDir
  7. from core import gis_
  8. from core.cerberus_extend import CerberusErrorHandler, \
  9. _translate_messages, ExtendedValidator
  10. from schemas.common import SRID
  11. class ValidatorInterruption(BaseException):
  12. pass
  13. class Checkpoint():
  14. def __init__(self, name, valid=True):
  15. self.name = name
  16. self.valid = valid
  17. ########### MODELES ################
  18. class BaseModel():
  19. filename = ""
  20. pk = ""
  21. schema = {}
  22. def __init__(self, **kwargs):
  23. self.__dict__.update(kwargs)
  24. class BaseGeoModel(gis_.Feature):
  25. filename = ""
  26. pk = ""
  27. geom_type = 0
  28. bounding_box = (0,0,1,1)
  29. schema = {}
  30. def __init__(self, feature):
  31. self.__dict__.update(feature.__dict__)
  32. ########### ERREURS DE VALIDATION ################
  33. VALIDATION_ERROR_LEVELS = {10: "MINEURE", 20: "AVERTISSEMENT", 30: "ERREUR", 40: "CRITIQUE"}
  34. MINOR = 10
  35. WARNING = 20
  36. ERROR = 30
  37. CRITICAL = 40
  38. class BaseValidationError():
  39. order_ = 0
  40. name = "Erreur"
  41. level = ERROR
  42. help = ""
  43. def __init__(self, message, filename="", field=""):
  44. self.message = message
  45. self.filename = filename
  46. self.field = field
  47. def __repr__(self):
  48. return " - ".join(filter(None, [self.name, self.filename, self.field, self.message]))
  49. # Erreurs dans le chargement des fichiers
  50. class InputError(BaseValidationError):
  51. order_ = 0
  52. level = CRITICAL
  53. name = "Erreur de chargement"
  54. class MissingFile(InputError):
  55. order_ = 1
  56. name = "Fichier Manquant"
  57. class UnreadableFile(InputError):
  58. order_ = 2
  59. name = "Fichier Illisible"
  60. class WrongSrid(InputError):
  61. order_ = 3
  62. name = "Mauvais SRID"
  63. ### Erreurs dans la structure des données
  64. class StructureError(BaseValidationError):
  65. order_ = 10
  66. name = "Erreur de structure"
  67. level = ERROR
  68. class GeomTypeError(StructureError):
  69. order_ = 12
  70. name = "Type de géométrie invalide"
  71. level = CRITICAL
  72. class PositionError(StructureError):
  73. order_ = 11
  74. name = "Position hors de la zone autorisée"
  75. class InvalidGeometry(StructureError):
  76. order_ = 13
  77. name = "Géométrie invalide"
  78. class DataError(StructureError):
  79. order_ = 14
  80. name = "Erreur de format"
  81. # Erreurs dans le contenu, erreurs métiers
  82. class TechnicalValidationError(BaseValidationError):
  83. order_ = 20
  84. level = ERROR
  85. name = "Erreur technique"
  86. class UniqueError(TechnicalValidationError):
  87. order_ = 21
  88. name = "Doublons dans le champs"
  89. class RelationError(TechnicalValidationError):
  90. order_ = 22
  91. level = CRITICAL
  92. name = "Un objet lié n'existe pas"
  93. class DuplicatedGeom(TechnicalValidationError):
  94. order_ = 23
  95. name = "Doublon graphique"
  96. class MissingItem(TechnicalValidationError):
  97. order_ = 24
  98. name = "Elément manquant"
  99. class DimensionError(TechnicalValidationError):
  100. order_ = 25
  101. name = "Elément de dimension"
  102. ########### VALIDATION ################
  103. class BaseValidator():
  104. schema_name = ""
  105. models = {}
  106. dataset = {}
  107. def __init__(self):
  108. self.valid = True
  109. self.checkpoints = []
  110. self.errors = []
  111. self.dt = 0
  112. def checkpoint(self, title):
  113. self.checkpoints.append(Checkpoint(title, (not self.errors)))
  114. if self.errors:
  115. self.valid = False
  116. if self.critical_happened():
  117. raise ValidatorInterruption()
  118. def critical_happened(self):
  119. return any([err.level == CRITICAL for err in self.errors])
  120. def log_error(self, validation_error):
  121. self.errors.append(validation_error)
  122. @classmethod
  123. def submit(cls, subject):
  124. """ prends un dossier ou une archive en entrée et vérifie son contenu """
  125. subject = Path(subject)
  126. if subject.isfile():
  127. with TempDir() as dirname:
  128. zip_ref = zipfile.ZipFile(subject, 'r')
  129. zip_ref.extractall(dirname)
  130. zip_ref.close()
  131. if Path(dirname / subject.stem).isdir(): # cas où l'archive contient un dossier qui lui-même contient les fichiers
  132. dirname /= subject.stem
  133. return cls._submit_folder(dirname)
  134. elif subject.isdir():
  135. return cls._submit_folder(subject)
  136. else:
  137. raise FileNotFoundError(f"Impossible de trouver le fichier ou répertoire: {subject}")
  138. @classmethod
  139. def _submit_folder(cls, folder):
  140. validator = cls()
  141. t0 = time.time()
  142. try:
  143. validator.validate(folder)
  144. except ValidatorInterruption:
  145. pass
  146. validator.dt = time.time() - t0
  147. report = validator.build_report(validator.schema_name, folder.name)
  148. return report
  149. def validate(self, folder):
  150. # Chargement des données en mémoire
  151. self._load_files(folder)
  152. self.checkpoint("Chargement des données")
  153. # Controle la structure des données (champs, formats et types)
  154. self._structure_validation()
  155. self.checkpoint("Contrôle de la structure des données")
  156. # Validation technique
  157. # try:
  158. self._technical_validation()
  159. self.checkpoint("Validation Métier")
  160. # except:
  161. # self.checkpoint("Validation Métier [interrompu]")
  162. def _load_files(self, folder):
  163. """ Charge les données du fichier et les associe à un modèle.
  164. Attention: pas de contrôle de format ou de validité à ce niveau! """
  165. raise NotImplementedError()
  166. def _structure_validation(self):
  167. for model in self.models:
  168. v = ExtendedValidator(model.schema, purge_unknown=True, error_handler=CerberusErrorHandler, require_all=True)
  169. for item in self.dataset[model]:
  170. v.validate(item.__dict__)
  171. for field, verrors in v.errors.items():
  172. for err in verrors:
  173. self.log_error(DataError(_translate_messages(err), filename=model.filename, field=field))
  174. @classmethod
  175. def _technical_validation(cls):
  176. raise NotImplementedError()
  177. def build_report(self, schema, filename):
  178. report = {}
  179. report["schema"] = schema
  180. report["filename"] = filename
  181. report["exec_time"] = "{:.3g} s.".format(self.dt)
  182. report["checkpoints"] = [{"name": chk.name, "valid": chk.valid} for chk in self.checkpoints]
  183. report["errors"] = {}
  184. for err in self.errors:
  185. if not err.name in report["errors"]:
  186. report["errors"][err.name] = {"help": err.help, "order_": err.order_, "list": []}
  187. err_report = {"filename": err.filename or "-",
  188. "field": err.field or "-",
  189. "message": err.message}
  190. if err_report not in report["errors"][err.name]["list"]:
  191. report["errors"][err.name]["list"].append(err_report)
  192. return report
  193. class NetgeoValidator(BaseValidator):
  194. def _load_files(self, folder):
  195. for model in self.models:
  196. filename = model.filename
  197. path_ = Path(folder) / filename
  198. if not path_.isfile():
  199. self.log_error(MissingFile("Fichier manquant: '{}'".format(filename)))
  200. continue
  201. self.dataset[model] = []
  202. try:
  203. ds = gis_.Datasource(path_)
  204. layer = ds.layer
  205. if layer.srid != SRID:
  206. self.log_error(WrongSrid("Mauvaise projection: {} (attendu: {})".format(layer.srid, SRID)))
  207. for feature in layer:
  208. item = model(feature)
  209. self.dataset[model].append(item)
  210. except IOError:
  211. self.log_error(UnreadableFile("Fichier illisible: {}".format(path_.name)))
  212. def _structure_validation(self):
  213. for model in self.models:
  214. v = ExtendedValidator(model.schema, purge_unknown=True, error_handler=CerberusErrorHandler, require_all=True)
  215. xmin, ymin, xmax, ymax = model.bounding_box
  216. for item in self.dataset[model]:
  217. # geom type
  218. if item.geom_type != model.geom_type:
  219. self.log_error(GeomTypeError("Type de géométrie invalide: {} (attendu: {})".format(item.geom_name, gis_.GEOM_NAMES[model.geom_type]), filename=model.filename, field="geom"))
  220. # bounding box
  221. x1, y1, x2, y2 = item.bounding_box
  222. if any(x < xmin or x > xmax for x in (x1, x2)) or \
  223. any(y < ymin or y > ymax for y in (y1, y2)):
  224. self.log_error(PositionError("Situé hors de l'emprise autorisée", filename=model.filename, field="geom"))
  225. v.validate(item.__dict__)
  226. for field, verrors in v.errors.items():
  227. for err in verrors:
  228. self.log_error(DataError(_translate_messages(err), filename=model.filename, field=field))