main.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162
  1. '''
  2. Python 3.7+
  3. @author: olivier.massot, sept 2018
  4. '''
  5. from datetime import datetime
  6. import logging
  7. import zipfile
  8. from path import Path, TempDir
  9. import shapefile
  10. import yaml
  11. from core import logconf
  12. from core.constants import MAIN
  13. logger = logging.getLogger("datachecker")
  14. logconf.start("datachecker", logging.INFO)
  15. # TODO: Vérifier la projection (besoin de GDAL/OGR)
  16. # TODO: max length
  17. # TODO: fonctions de controle spéciales
  18. # TODO: Rendu HTML
  19. def check(subject, checker):
  20. """ prends un dossier ou une archive en entier et vérifie son contenu selon les règles données par le fichier de config """
  21. subject, checker = Path(subject), Path(checker)
  22. if subject.isfile():
  23. with TempDir() as dirname:
  24. zip_ref = zipfile.ZipFile(subject, 'r')
  25. zip_ref.extractall(dirname)
  26. zip_ref.close()
  27. print()
  28. if Path(dirname / subject.stem).isdir(): # cas où l'archive contient un dossier qui lui-même contient les fichiers
  29. dirname /= subject.stem
  30. check_folder(dirname, checker)
  31. elif subject.isdir():
  32. check_folder(subject, checker)
  33. else:
  34. raise IOError(f"Impossible de trouver le fichier ou répertoire: {subject}")
  35. def check_folder(folder, checker):
  36. logging.info("***** Traitement de '%s' *****", folder.name)
  37. logging.info("> Controlleur: '%s'", checker.name)
  38. with open(checker, "r") as cf:
  39. config = yaml.load(cf)
  40. for filename, model in config["files"].items():
  41. path_ = folder / filename
  42. logging.info("* Traitement de %s", path_.name)
  43. if not path_.isfile():
  44. logger.error("Fichier introuvable")
  45. continue
  46. try:
  47. sf = shapefile.Reader(path_)
  48. except shapefile.ShapefileException:
  49. logger.error("Fichier SHAPE illisible")
  50. continue
  51. if "srid" in config:
  52. pass
  53. xmin, xmax, ymin, ymax = (int(config.get("xmin", 0)),
  54. int(config.get("xmax", float("inf"))),
  55. int(config.get("ymin", 0)),
  56. int(config.get("ymax", float("inf")))
  57. )
  58. if "shape_type" in model:
  59. shape_names = {1:"Point", 3:"Polyligne", 5:"Polygone"}
  60. if sf.shapeType != model["shape_type"]:
  61. logger.error("Le fichier shapefile n'est pas de type %s", shape_names[model["shape_type"]])
  62. del sf
  63. continue
  64. records = sf.shapeRecords()
  65. if not records:
  66. if not model["can_be_empty"]:
  67. logger.error("Le fichier shapefile ne contient aucune donnees")
  68. del sf, records
  69. continue
  70. else:
  71. logger.warning("Le fichier shapefile ne contient aucune donnees")
  72. if not "fields" in model:
  73. continue
  74. fields = [f[0] for f in sf.fields if f[0] != 'DeletionFlag']
  75. # controle d'éventuels champs inconnus
  76. for f in fields:
  77. if f not in model["fields"]:
  78. logger.warning("Champs inconnu: %s", f)
  79. # parcours et controle des enregistrements
  80. for i, record in enumerate(records):
  81. logging.info("\n> Enregistrement n°%s\n", i)
  82. record_data = {field: record.record[i] for i, field in enumerate(fields)}
  83. x1, y1, x2, y2 = sf.shapes()[i].bbox
  84. if not xmin <= x1 <= xmax or not xmin <= x2 <= xmax or \
  85. not ymin <= y1 <= ymax or not ymin <= y2 <= ymax:
  86. logger.error("L'élément est situé hors de la zone autorisée")
  87. for fieldname, fieldmodel in model["fields"].items():
  88. try:
  89. val = record_data[fieldname]
  90. except KeyError:
  91. if fieldmodel.get("required", True):
  92. logger.error("%s - Champs manquant", fieldname)
  93. continue
  94. type_ = fieldmodel.get("type", "str")
  95. if type_ == "float":
  96. try:
  97. _ = float(val)
  98. except (TypeError, ValueError):
  99. logger.error("%s - Valeur invalide, un flottant est attendu ('%s')", fieldname, val)
  100. continue
  101. elif type_ == "datetime":
  102. try:
  103. _ = datetime.strptime(val, fieldmodel.get("date_format", "%d/%m/%Y"))
  104. except ValueError:
  105. logger.error("%s - Valeur invalide, une date est attendu ('%s')", fieldname, val)
  106. continue
  107. else:
  108. if not fieldmodel.get("allow_empty", False) and not val:
  109. logger.error("%s - Champs vide", fieldname)
  110. continue
  111. if type_ == "str" and "max_len" in fieldmodel:
  112. if len(str(val)) > fieldmodel["max_len"]:
  113. logger.error("%s - Trop long, la longueur max. est de %s ('%s')", fieldname, fieldmodel["max_len"], val)
  114. try:
  115. if not val in fieldmodel["in_list"]:
  116. logger.error("%s - Valeur invalide, pas dans la liste ('%s')", fieldname, val)
  117. continue
  118. except KeyError:
  119. pass
  120. del sf, records
  121. if __name__ == "__main__":
  122. subject = MAIN / "work" / "SCOPELEC_CAP_097AP0_REC_180829_OK.zip"
  123. checker = MAIN / "resources" / "netgeo_v2-2_doe.yaml"
  124. check(subject, checker)
  125. logger.info("-- Fin --")