Selaa lähdekoodia

go for spacy, ajoute le logging

olivier.massot 7 vuotta sitten
vanhempi
commit
a4f925ce8e
10 muutettua tiedostoa jossa 150 lisäystä ja 22 poistoa
  1. 7 0
      .gitignore
  2. 1 1
      .pydevproject
  3. 1 0
      core/__init__.py
  4. 47 0
      core/logging.yaml
  5. 55 0
      core/logging_.py
  6. 2 0
      core/nlp.py
  7. 12 13
      nestor.py
  8. 5 7
      requirements.txt
  9. 4 1
      resources/notes.txt
  10. 16 0
      train.py

+ 7 - 0
.gitignore

@@ -0,0 +1,7 @@
+*.pyc
+.project
+.pydevproject
+.settings/
+temp/
+test/
+output/

+ 1 - 1
.pydevproject

@@ -4,5 +4,5 @@
 <path>/${PROJECT_DIR_NAME}</path>
 </pydev_pathproperty>
 <pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 3.0</pydev_property>
-<pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">python36</pydev_property>
+<pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">python</pydev_property>
 </pydev_project>

+ 1 - 0
core/__init__.py

@@ -0,0 +1 @@
+$

+ 47 - 0
core/logging.yaml

@@ -0,0 +1,47 @@
+version: 1
+disable_existing_loggers: no
+formatters:
+    simple:
+        format: "%(asctime)s - %(levelname)s - %(message)s"
+    complete:
+        format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+    short:
+        format: "%(levelname)s - %(message)s"
+    message_only:
+        format: "%(message)s"
+        
+handlers:
+    console:
+        class: logging.StreamHandler
+        level: INFO
+        formatter: message_only
+        stream: ext://sys.stdout
+    file:
+        class: logging.handlers.RotatingFileHandler
+        level: DEBUG
+        formatter: complete
+        filename: debug.log
+        maxBytes: 100000
+        backupCount: 1
+        encoding: utf8
+ #   mail:
+ #       class: core.logging_.BufferingSMTPHandler
+ #       level: ERROR
+ #       formatter: complete
+ #       mailhost: smtp.bas-rhin.fr
+ #       fromaddr: log.hello@bas-rhin.fr
+ #       toaddrs: [user.name@bas-rhin.fr]
+ #       toaddrs: []
+ #       subject: log
+ #       capacity: 100000000
+        
+loggers:
+    hello:
+        level: DEBUG
+        handlers: [console, file]
+        propagate: no
+       
+root:
+    level: INFO
+    handlers: [console]
+    propagate: yes

+ 55 - 0
core/logging_.py

@@ -0,0 +1,55 @@
+'''
+Created on 6 juil. 2017
+
+@author: olivier.massot
+'''
+from email.mime.text import MIMEText
+import logging.config
+import smtplib
+
+from path import Path
+import yaml
+
+
+LOG_DIR = Path(__file__).parent / "logs"
+LOG_DIR.makedirs_p()
+
+def getLogger(name):
+    # charge la configuration du logging depuis le fichier 'logging.yaml'
+    configfile = Path(__file__).parent
+    with open(configfile / 'logging.yaml', 'rt') as f:
+        conf = yaml.load(f)
+    logging.config.dictConfig(conf)
+    return logging.getLogger(name)
+
+
+
+# ******************************
+
+class BufferingSMTPHandler(logging.handlers.BufferingHandler):
+    def __init__(self, mailhost, fromaddr, toaddrs, subject, capacity):
+        logging.handlers.BufferingHandler.__init__(self, capacity)
+        self.mailhost = mailhost
+        self.mailport = None
+        self.fromaddr = fromaddr
+        self.toaddrs = toaddrs
+        self.subject = subject
+
+    def flush(self):
+        try:
+            if len(self.buffer) > 0:
+                port = self.mailport if self.mailport else smtplib.SMTP_PORT
+
+                msg = "\n".join([self.format(record) for record in self.buffer])
+                msg = MIMEText(msg.encode('utf-8'), _charset='utf-8')
+                msg['Subject'] = self.subject
+                msg['From'] = self.fromaddr
+                msg['To'] = ",".join(self.toaddrs)
+
+                smtp = smtplib.SMTP(self.mailhost, port)
+                smtp.sendmail(self.fromaddr, self.toaddrs, msg.as_string())
+                smtp.quit()
+                self.buffer = []
+        except Exception as e:
+            print(e)
+            raise

+ 2 - 0
core/nlp.py

@@ -0,0 +1,2 @@
+import fr_core_news_sm
+nlp = fr_core_news_sm.load()

+ 12 - 13
nestor.py

@@ -1,19 +1,18 @@
-from textblob import TextBlob
+from core import logging_
+from core.nlp import nlp
 
-txt = "Tu es Nestor. Me comprends-tu?"
-blob = TextBlob(txt)
+logger = logging_.getLogger("nestor")
 
-if blob.detect_language() == "fr":
-    blob = blob.translate(from_lang="fr", to='en')
 
-print(blob.tags)
 
+instr = ""
+while instr != "exit":
+    instr = input(">")
+    doc = nlp(instr)
 
+    for token in doc:
+        print(token.text, token.lemma_, token.pos_, token.tag_, token.dep_,
+              token.shape_, token.is_alpha, token.is_stop)
 
-# from textblob import Blobber
-# from textblob_fr import PatternTagger, PatternAnalyzer
-# tb = Blobber(pos_tagger=PatternTagger(), analyzer=PatternAnalyzer())
-#
-# blob = tb(txt)
-#
-# print(blob.tags)
+    for ent in doc.ents:
+        print(ent.text, ent.start_char, ent.end_char, ent.label_)

+ 5 - 7
requirements.txt

@@ -1,9 +1,7 @@
-#spacy
-#python -m spacy download en
-#python -m spacy download fr
-#Microsoft Visual C++ 14.0
+spacy
 
+#A installer manuellement:
+# python -m spacy download en
+# python -m spacy download fr
+# Microsoft Visual C++ 14.0
 
-textblob
-# python -m textblob.download_corpora
-textblob-fr

+ 4 - 1
resources/notes.txt

@@ -1,6 +1,9 @@
 
 FONCTIONNALITES DE BASE
 
+* un bot en python:
+https://github.com/alfredfrancis/ai-chatbot-framework/
+
 * Bot
 https://worldwritable.com/natural-language-processing-for-programmers-90c4e04dc6de#.ghs8io8vs
 https://apps.worldwritable.com/tutorials/chatbot/
@@ -31,6 +34,6 @@ DIVERS
 Nestor: taches de base
 Hadock: Gestion projet, fichiers
 Tintin: recherche
-Tryphon: Organiseur de pensée
+Tryphon: Organiseur de pens�e
 Bianca: musique, son
 Dupond & Dupont

+ 16 - 0
train.py

@@ -0,0 +1,16 @@
+import random
+
+import spacy
+
+
+TRAIN_DATA = [
+     ("Uber blew through $1 million a week", {'entities': [(0, 4, 'ORG')]}),
+     ("Google rebrands its business apps", {'entities': [(0, 6, "ORG")]})]
+
+nlp = spacy.blank('fr')
+optimizer = nlp.begin_training()
+for i in range(20):
+    random.shuffle(TRAIN_DATA)
+    for text, annotations in TRAIN_DATA:
+        nlp.update([text], [annotations], sgd=optimizer)
+nlp.to_disk('/model')