main.py 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154
  1. '''
  2. Python 3.7+
  3. @author: olivier.massot, sept 2018
  4. '''
  5. from datetime import datetime
  6. import logging
  7. import zipfile
  8. from path import Path, TempDir
  9. import shapefile
  10. import yaml
  11. from core import logconf
  12. from core.constants import MAIN
  13. logger = logging.getLogger("datachecker")
  14. logconf.start("datachecker", logging.INFO)
  15. # TODO Vérifier la projection (besoin de GDAL/OGR)
  16. def check(subject, checker):
  17. """ prends un dossier ou une archive en entier et vérifie son contenu selon les règles données par le fichier de config """
  18. subject, checker = Path(subject), Path(checker)
  19. if subject.isfile():
  20. with TempDir() as dirname:
  21. zip_ref = zipfile.ZipFile(subject, 'r')
  22. zip_ref.extractall(dirname)
  23. zip_ref.close()
  24. print()
  25. if Path(dirname / subject.stem).isdir(): # cas où l'archive contient un dossier qui lui-même contient les fichiers
  26. dirname /= subject.stem
  27. check_folder(dirname, checker)
  28. elif subject.isdir():
  29. check_folder(subject, checker)
  30. else:
  31. raise IOError(f"Impossible de trouver le fichier ou répertoire: {subject}")
  32. def check_folder(folder, checker):
  33. logging.info("***** Traitement de '%s' *****", folder.name)
  34. logging.info("> Controlleur: '%s'", checker.name)
  35. with open(checker, "r") as cf:
  36. config = yaml.load(cf)
  37. for filename, model in config["files"].items():
  38. path_ = folder / filename
  39. logging.info("* Traitement de %s", path_.name)
  40. if not path_.isfile():
  41. logger.error("Fichier introuvable")
  42. continue
  43. try:
  44. sf = shapefile.Reader(path_)
  45. except shapefile.ShapefileException:
  46. logger.error("Fichier SHAPE illisible")
  47. continue
  48. if "srid" in config:
  49. pass
  50. xmin, xmax, ymin, ymax = (int(config.get("xmin", 0)),
  51. int(config.get("xmax", float("inf"))),
  52. int(config.get("ymin", 0)),
  53. int(config.get("ymax", float("inf")))
  54. )
  55. if "shape_type" in model:
  56. shape_names = {1:"Point", 3:"Polyligne", 5:"Polygone"}
  57. if sf.shapeType != model["shape_type"]:
  58. logger.error("Le fichier shapefile n'est pas de type %s", shape_names[model["shape_type"]])
  59. del sf
  60. continue
  61. records = sf.shapeRecords()
  62. if not records:
  63. if not model["can_be_empty"]:
  64. logger.error("Le fichier shapefile ne contient aucune donnees")
  65. del sf, records
  66. continue
  67. else:
  68. logger.warning("Le fichier shapefile ne contient aucune donnees")
  69. if not "fields" in model:
  70. continue
  71. fields = [f[0] for f in sf.fields if f[0] != 'DeletionFlag']
  72. # controle d'éventuels champs inconnus
  73. for f in fields:
  74. if f not in model["fields"]:
  75. logger.warning("Champs inconnu: %s", f)
  76. # parcours et controle des enregistrements
  77. for i, record in enumerate(records):
  78. logging.info("\n> Enregistrement n°%s\n", i)
  79. record_data = {field: record.record[i] for i, field in enumerate(fields)}
  80. x1, y1, x2, y2 = sf.shapes()[i].bbox
  81. if not xmin <= x1 <= xmax or not xmin <= x2 <= xmax or \
  82. not ymin <= y1 <= ymax or not ymin <= y2 <= ymax:
  83. logger.error("L'élément est situé hors de la zone autorisée")
  84. for fieldname, fieldmodel in model["fields"].items():
  85. try:
  86. val = record_data[fieldname]
  87. except KeyError:
  88. if fieldmodel.get("required", True):
  89. logger.error("%s - Champs manquant", fieldname)
  90. continue
  91. type_ = fieldmodel.get("type", "str")
  92. if type_ == "float":
  93. try:
  94. _ = float(val)
  95. except (TypeError, ValueError):
  96. logger.error("%s - Valeur invalide, un flottant est attendu ('%s')", fieldname, val)
  97. continue
  98. elif type_ == "datetime":
  99. try:
  100. _ = datetime.strptime(val, fieldmodel.get("date_format", "%d/%m/%Y"))
  101. except ValueError:
  102. logger.error("%s - Valeur invalide, une date est attendu ('%s')", fieldname, val)
  103. continue
  104. else:
  105. if not fieldmodel.get("allow_empty", False) and not val:
  106. logger.error("%s - Champs vide", fieldname)
  107. continue
  108. try:
  109. if not val in fieldmodel["in_list"]:
  110. logger.error("%s - Valeur invalide, pas dans la liste ('%s')", fieldname, val)
  111. continue
  112. except KeyError:
  113. pass
  114. del sf, records
  115. if __name__ == "__main__":
  116. subject = MAIN / "work" / "SCOPELEC_CAP_097AP0_REC_180829_OK.zip"
  117. checker = MAIN / "resources" / "netgeo_v2-2_doe.yaml"
  118. check(subject, checker)
  119. logger.info("-- Fin --")