main.py 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167
  1. '''
  2. Python 3.7+
  3. @author: olivier.massot, sept 2018
  4. '''
  5. from datetime import datetime
  6. import logging
  7. import zipfile
  8. from path import Path, TempDir
  9. import shapefile
  10. import yaml
  11. from core import logconf
  12. from core.constants import MAIN
  13. logger = logging.getLogger("datachecker")
  14. logconf.start("datachecker", logging.INFO)
  15. # TODO: Vérifier la projection (besoin de GDAL/OGR)
  16. # TODO: fonctions de controle spéciales
  17. # TODO: Rendu HTML
  18. def check(subject, checker):
  19. """ prends un dossier ou une archive en entier et vérifie son contenu selon les règles données par le fichier de config """
  20. subject, checker = Path(subject), Path(checker)
  21. if subject.isfile():
  22. with TempDir() as dirname:
  23. zip_ref = zipfile.ZipFile(subject, 'r')
  24. zip_ref.extractall(dirname)
  25. zip_ref.close()
  26. print()
  27. if Path(dirname / subject.stem).isdir(): # cas où l'archive contient un dossier qui lui-même contient les fichiers
  28. dirname /= subject.stem
  29. check_folder(dirname, checker)
  30. elif subject.isdir():
  31. check_folder(subject, checker)
  32. else:
  33. raise IOError(f"Impossible de trouver le fichier ou répertoire: {subject}")
  34. def check_folder(folder, checker):
  35. logging.info("***** Traitement de '%s' *****", folder.name)
  36. logging.info("> Controlleur: '%s'", checker.name)
  37. with open(checker, "r") as cf:
  38. config = yaml.load(cf)
  39. for filename, model in config["files"].items():
  40. path_ = folder / filename
  41. logging.info("* Traitement de %s", path_.name)
  42. if not path_.isfile():
  43. logger.error("Fichier introuvable")
  44. continue
  45. try:
  46. sf = shapefile.Reader(path_)
  47. except shapefile.ShapefileException:
  48. logger.error("Fichier SHAPE illisible")
  49. continue
  50. if "srid" in config:
  51. pass
  52. xmin, xmax, ymin, ymax = (int(config.get("xmin", 0)),
  53. int(config.get("xmax", float("inf"))),
  54. int(config.get("ymin", 0)),
  55. int(config.get("ymax", float("inf")))
  56. )
  57. if "shape_type" in model:
  58. shape_names = {1:"Point", 3:"Polyligne", 5:"Polygone"}
  59. if sf.shapeType != model["shape_type"]:
  60. logger.error("Le fichier shapefile n'est pas de type %s", shape_names[model["shape_type"]])
  61. del sf
  62. continue
  63. records = sf.shapeRecords()
  64. if not records:
  65. if not model["can_be_empty"]:
  66. logger.error("Le fichier shapefile ne contient aucune donnees")
  67. del sf, records
  68. continue
  69. else:
  70. logger.warning("Le fichier shapefile ne contient aucune donnees")
  71. if not "fields" in model:
  72. continue
  73. fields = [f[0] for f in sf.fields if f[0] != 'DeletionFlag']
  74. # controle d'éventuels champs inconnus
  75. for f in fields:
  76. if f not in model["fields"]:
  77. logger.warning("Champs inconnu: %s", f)
  78. # parcours et controle des enregistrements
  79. for i, record in enumerate(records):
  80. logging.info("\n> Enregistrement n°%s\n", i)
  81. record_data = {field: record.record[i] for i, field in enumerate(fields)}
  82. x1, y1, x2, y2 = sf.shapes()[i].bbox
  83. if not xmin <= x1 <= xmax or not xmin <= x2 <= xmax or \
  84. not ymin <= y1 <= ymax or not ymin <= y2 <= ymax:
  85. logger.error("L'élément est situé hors de la zone autorisée")
  86. for fieldname, fieldmodel in model["fields"].items():
  87. try:
  88. val = record_data[fieldname]
  89. except KeyError:
  90. if fieldmodel.get("required", True):
  91. logger.error("%s - Champs manquant", fieldname)
  92. continue
  93. type_ = fieldmodel.get("type", "str")
  94. if type_ == "int":
  95. try:
  96. _ = int(val)
  97. except (TypeError, ValueError):
  98. logger.error("%s - Valeur invalide, un entier est attendu ('%s')", fieldname, val)
  99. continue
  100. elif type_ == "float":
  101. try:
  102. _ = float(val)
  103. except (TypeError, ValueError):
  104. logger.error("%s - Valeur invalide, un flottant est attendu ('%s')", fieldname, val)
  105. continue
  106. elif type_ == "datetime":
  107. try:
  108. _ = datetime.strptime(val, fieldmodel.get("date_format", "%d/%m/%Y"))
  109. except ValueError:
  110. logger.error("%s - Valeur invalide, une date est attendu ('%s')", fieldname, val)
  111. continue
  112. else:
  113. if not fieldmodel.get("allow_empty", False) and not val:
  114. logger.error("%s - Champs vide", fieldname)
  115. continue
  116. if type_ == "str" and "max_len" in fieldmodel:
  117. if len(str(val)) > fieldmodel["max_len"]:
  118. logger.error("%s - Trop long, la longueur max. est de %s ('%s')", fieldname, fieldmodel["max_len"], val)
  119. try:
  120. if not val in fieldmodel["in_list"]:
  121. logger.error("%s - Valeur invalide, pas dans la liste ('%s')", fieldname, val)
  122. continue
  123. except KeyError:
  124. pass
  125. del sf, records
  126. if __name__ == "__main__":
  127. subject = MAIN / "work" / "SCOPELEC_CAP_097AP0_REC_180829_OK.zip"
  128. checker = MAIN / "resources" / "netgeo_v2-2_doe.yaml"
  129. check(subject, checker)
  130. logger.info("-- Fin --")