7 năm trước cách đây · 6d65f5881f
--- a/memory/__init__.py
+++ b/memory/__init__.py
--- a/memory/train.py
+++ b/memory/train.py
@@ -0,0 +1,80 @@
 
				+"""Example of training spaCy's named entity recognizer, starting off with an
			
 
				+existing model or a blank model.
			
 
				+For more details, see the documentation:
			
 
				+* Training: https://spacy.io/usage/training
			
 
				+* NER: https://spacy.io/usage/linguistic-features#named-entities
			
 
				+Compatible with: spaCy v2.0.0+
			
 
				+"""
			
 
				+import random
			
 
				+
			
 
				+import fr_core_news_md
			
 
				+from path import Path
			
 
				+import spacy
			
 
				+
			
 
				+
			
 
				+# training data
			
 
				+TRAIN_DATA = [
			
 
				+    ('Qui est Georges Brassens?', { 'entities': [(8, 24, 'PERSON')] }),
			
 
				+    ("J'aime Strasbourg et Avignon.", { 'entities': [(7, 17, 'LOC'), (21, 28, 'LOC')] }),
			
 
				+    ("J'aime Strasbourg et Avignon.", { 'entities': [(7, 17, 'LOC'), (21, 28, 'LOC')] }),
			
 
				+]
			
 
				+
			
 
				+MODEL_DIR = Path(__file__).parent / "data"
			
 
				+
			
 
				+
			
 
				+def main(n_iter=100):
			
 
				+    """Load the model, set up the pipeline and train the entity recognizer."""
			
 
				+    nlp = spacy.load(MODEL_DIR)  # load existing spaCy model @UndefinedVariable
			
 
				+    print("Model loaded")
			
 
				+
			
 
				+    # create the built-in pipeline components and add them to the pipeline
			
 
				+    # nlp.create_pipe works for built-ins that are registered with spaCy
			
 
				+    if 'ner' not in nlp.pipe_names:
			
 
				+        ner = nlp.create_pipe('ner')
			
 
				+        nlp.add_pipe(ner, last=True)
			
 
				+    # otherwise, get it so we can add labels
			
 
				+    else:
			
 
				+        ner = nlp.get_pipe('ner')
			
 
				+
			
 
				+    # add labels
			
 
				+    for _, annotations in TRAIN_DATA:
			
 
				+        for ent in annotations.get('entities'):
			
 
				+            ner.add_label(ent[2])
			
 
				+
			
 
				+    # get names of other pipes to disable them during training
			
 
				+    other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'ner']
			
 
				+    with nlp.disable_pipes(*other_pipes):  # only train NER
			
 
				+        optimizer = nlp.begin_training()
			
 
				+        for _ in range(n_iter):
			
 
				+            random.shuffle(TRAIN_DATA)
			
 
				+            losses = {}
			
 
				+            for text, annotations in TRAIN_DATA:
			
 
				+                nlp.update(
			
 
				+                    [text],  # batch of texts
			
 
				+                    [annotations],  # batch of annotations
			
 
				+                    drop=0.5,  # dropout - make it harder to memorise data
			
 
				+                    sgd=optimizer,  # callable to update weights
			
 
				+                    losses=losses)
			
 
				+            print(losses)
			
 
				+
			
 
				+    # test the trained model
			
 
				+    for text, _ in TRAIN_DATA:
			
 
				+        doc = nlp(text)
			
 
				+        print('Entities', [(ent.text, ent.label_) for ent in doc.ents])
			
 
				+        print('Tokens', [(t.text, t.ent_type_, t.ent_iob) for t in doc])
			
 
				+
			
 
				+    # save model to output directory
			
 
				+    if not MODEL_DIR.exists():
			
 
				+        MODEL_DIR.mkdir()
			
 
				+    nlp.to_disk(MODEL_DIR)
			
 
				+    print("Saved model to", MODEL_DIR)
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    main()
			
 
				+
			
 
				+    # Expected output:
			
 
				+    # Entities [('Georges Brassens', 'PERSON')]
			
 
				+    # Tokens [('Qui', '', 2), ('est', '', 2), ('Georges', 'PERSON', 3), ('Brassens', 'PERSON', 1), ('?', '', 2)]
			
 
				+    # Entities [('Strasbourg', 'LOC'), ('Avignon', 'LOC')]
			
 
				+    # Tokens [("J'", '', 2), ('aime', '', 2), ('Strasbourg', 'LOC', 3), ('et', '', 2), ('Avignon', 'LOC', 3), ('.', '', 2)]
			
--- a/memory/training_data_example.json
+++ b/memory/training_data_example.json
@@ -0,0 +1,641 @@
 
				+[
			
 
				+    {
			
 
				+      "id": "wsj_0200",
			
 
				+      "paragraphs": [
			
 
				+        {
			
 
				+          "raw": "In an Oct. 19 review of \"The Misanthrope\" at Chicago's Goodman Theatre (\"Revitalized Classics Take the Stage in Windy City,\" Leisure & Arts), the role of Celimene, played by Kim Cattrall, was mistakenly attributed to Christina Haag. Ms. Haag plays Elianti.",
			
 
				+          "sentences": [
			
 
				+            {
			
 
				+              "tokens": [
			
 
				+                {
			
 
				+                  "head": 44,
			
 
				+                  "dep": "prep",
			
 
				+                  "tag": "IN",
			
 
				+                  "orth": "In",
			
 
				+                  "ner": "O",
			
 
				+                  "id": 0
			
 
				+                },
			
 
				+                {
			
 
				+                  "head": 3,
			
 
				+                  "dep": "det",
			
 
				+                  "tag": "DT",
			
 
				+                  "orth": "an",
			
 
				+                  "ner": "O",
			
 
				+                  "id": 1
			
 
				+                },
			
 
				+                {
			
 
				+                  "head": 2,
			
 
				+                  "dep": "nmod",
			
 
				+                  "tag": "NNP",
			
 
				+                  "orth": "Oct.",
			
 
				+                  "ner": "B-DATE",
			
 
				+                  "id": 2
			
 
				+                },
			
 
				+                {
			
 
				+                  "head": -1,
			
 
				+                  "dep": "nummod",
			
 
				+                  "tag": "CD",
			
 
				+                  "orth": "19",
			
 
				+                  "ner": "L-DATE",
			
 
				+                  "id": 3
			
 
				+                },
			
 
				+                {
			
 
				+                  "head": -4,
			
 
				+                  "dep": "pobj",
			
 
				+                  "tag": "NN",
			
 
				+                  "orth": "review",
			
 
				+                  "ner": "O",
			
 
				+                  "id": 4
			
 
				+                },
			
 
				+                {
			
 
				+                  "head": -1,
			
 
				+                  "dep": "prep",
			
 
				+                  "tag": "IN",
			
 
				+                  "orth": "of",
			
 
				+                  "ner": "O",
			
 
				+                  "id": 5
			
 
				+                },
			
 
				+                {
			
 
				+                  "head": 2,
			
 
				+                  "dep": "punct",
			
 
				+                  "tag": "``",
			
 
				+                  "orth": "``",
			
 
				+                  "ner": "O",
			
 
				+                  "id": 6
			
 
				+                },
			
 
				+                {
			
 
				+                  "head": 1,
			
 
				+                  "dep": "det",
			
 
				+                  "tag": "DT",
			
 
				+                  "orth": "The",
			
 
				+                  "ner": "B-WORK_OF_ART",
			
 
				+                  "id": 7
			
 
				+                },
			
 
				+                {
			
 
				+                  "head": -3,
			
 
				+                  "dep": "pobj",
			
 
				+                  "tag": "NN",
			
 
				+                  "orth": "Misanthrope",
			
 
				+                  "ner": "L-WORK_OF_ART",
			
 
				+                  "id": 8
			
 
				+                },
			
 
				+                {
			
 
				+                  "head": -1,
			
 
				+                  "dep": "punct",
			
 
				+                  "tag": "''",
			
 
				+                  "orth": "''",
			
 
				+                  "ner": "O",
			
 
				+                  "id": 9
			
 
				+                },
			
 
				+                {
			
 
				+                  "head": -2,
			
 
				+                  "dep": "prep",
			
 
				+                  "tag": "IN",
			
 
				+                  "orth": "at",
			
 
				+                  "ner": "O",
			
 
				+                  "id": 10
			
 
				+                },
			
 
				+                {
			
 
				+                  "head": 3,
			
 
				+                  "dep": "poss",
			
 
				+                  "tag": "NNP",
			
 
				+                  "orth": "Chicago",
			
 
				+                  "ner": "U-GPE",
			
 
				+                  "id": 11
			
 
				+                },
			
 
				+                {
			
 
				+                  "head": -1,
			
 
				+                  "dep": "case",
			
 
				+                  "tag": "POS",
			
 
				+                  "orth": "'s",
			
 
				+                  "ner": "O",
			
 
				+                  "id": 12
			
 
				+                },
			
 
				+                {
			
 
				+                  "head": 1,
			
 
				+                  "dep": "compound",
			
 
				+                  "tag": "NNP",
			
 
				+                  "orth": "Goodman",
			
 
				+                  "ner": "B-FAC",
			
 
				+                  "id": 13
			
 
				+                },
			
 
				+                {
			
 
				+                  "head": -4,
			
 
				+                  "dep": "pobj",
			
 
				+                  "tag": "NNP",
			
 
				+                  "orth": "Theatre",
			
 
				+                  "ner": "L-FAC",
			
 
				+                  "id": 14
			
 
				+                },
			
 
				+                {
			
 
				+                  "head": 4,
			
 
				+                  "dep": "punct",
			
 
				+                  "tag": "-LRB-",
			
 
				+                  "orth": "(",
			
 
				+                  "ner": "O",
			
 
				+                  "id": 15
			
 
				+                },
			
 
				+                {
			
 
				+                  "head": 3,
			
 
				+                  "dep": "punct",
			
 
				+                  "tag": "``",
			
 
				+                  "orth": "``",
			
 
				+                  "ner": "O",
			
 
				+                  "id": 16
			
 
				+                },
			
 
				+                {
			
 
				+                  "head": 1,
			
 
				+                  "dep": "amod",
			
 
				+                  "tag": "VBN",
			
 
				+                  "orth": "Revitalized",
			
 
				+                  "ner": "B-WORK_OF_ART",
			
 
				+                  "id": 17
			
 
				+                },
			
 
				+                {
			
 
				+                  "head": 1,
			
 
				+                  "dep": "nsubj",
			
 
				+                  "tag": "NNS",
			
 
				+                  "orth": "Classics",
			
 
				+                  "ner": "I-WORK_OF_ART",
			
 
				+                  "id": 18
			
 
				+                },
			
 
				+                {
			
 
				+                  "head": -15,
			
 
				+                  "dep": "appos",
			
 
				+                  "tag": "VBP",
			
 
				+                  "orth": "Take",
			
 
				+                  "ner": "I-WORK_OF_ART",
			
 
				+                  "id": 19
			
 
				+                },
			
 
				+                {
			
 
				+                  "head": 1,
			
 
				+                  "dep": "det",
			
 
				+                  "tag": "DT",
			
 
				+                  "orth": "the",
			
 
				+                  "ner": "I-WORK_OF_ART",
			
 
				+                  "id": 20
			
 
				+                },
			
 
				+                {
			
 
				+                  "head": -2,
			
 
				+                  "dep": "dobj",
			
 
				+                  "tag": "NN",
			
 
				+                  "orth": "Stage",
			
 
				+                  "ner": "I-WORK_OF_ART",
			
 
				+                  "id": 21
			
 
				+                },
			
 
				+                {
			
 
				+                  "head": -3,
			
 
				+                  "dep": "prep",
			
 
				+                  "tag": "IN",
			
 
				+                  "orth": "in",
			
 
				+                  "ner": "I-WORK_OF_ART",
			
 
				+                  "id": 22
			
 
				+                },
			
 
				+                {
			
 
				+                  "head": 1,
			
 
				+                  "dep": "compound",
			
 
				+                  "tag": "NNP",
			
 
				+                  "orth": "Windy",
			
 
				+                  "ner": "I-WORK_OF_ART",
			
 
				+                  "id": 23
			
 
				+                },
			
 
				+                {
			
 
				+                  "head": -2,
			
 
				+                  "dep": "pobj",
			
 
				+                  "tag": "NNP",
			
 
				+                  "orth": "City",
			
 
				+                  "ner": "L-WORK_OF_ART",
			
 
				+                  "id": 24
			
 
				+                },
			
 
				+                {
			
 
				+                  "head": -6,
			
 
				+                  "dep": "punct",
			
 
				+                  "tag": ",",
			
 
				+                  "orth": ",",
			
 
				+                  "ner": "O",
			
 
				+                  "id": 25
			
 
				+                },
			
 
				+                {
			
 
				+                  "head": -7,
			
 
				+                  "dep": "punct",
			
 
				+                  "tag": "''",
			
 
				+                  "orth": "''",
			
 
				+                  "ner": "O",
			
 
				+                  "id": 26
			
 
				+                },
			
 
				+                {
			
 
				+                  "head": -8,
			
 
				+                  "dep": "npadvmod",
			
 
				+                  "tag": "NN",
			
 
				+                  "orth": "Leisure",
			
 
				+                  "ner": "B-ORG",
			
 
				+                  "id": 27
			
 
				+                },
			
 
				+                {
			
 
				+                  "head": -1,
			
 
				+                  "dep": "cc",
			
 
				+                  "tag": "CC",
			
 
				+                  "orth": "&",
			
 
				+                  "ner": "I-ORG",
			
 
				+                  "id": 28
			
 
				+                },
			
 
				+                {
			
 
				+                  "head": -2,
			
 
				+                  "dep": "conj",
			
 
				+                  "tag": "NNS",
			
 
				+                  "orth": "Arts",
			
 
				+                  "ner": "L-ORG",
			
 
				+                  "id": 29
			
 
				+                },
			
 
				+                {
			
 
				+                  "head": -11,
			
 
				+                  "dep": "punct",
			
 
				+                  "tag": "-RRB-",
			
 
				+                  "orth": ")",
			
 
				+                  "ner": "O",
			
 
				+                  "id": 30
			
 
				+                },
			
 
				+                {
			
 
				+                  "head": 13,
			
 
				+                  "dep": "punct",
			
 
				+                  "tag": ",",
			
 
				+                  "orth": ",",
			
 
				+                  "ner": "O",
			
 
				+                  "id": 31
			
 
				+                },
			
 
				+                {
			
 
				+                  "head": 1,
			
 
				+                  "dep": "det",
			
 
				+                  "tag": "DT",
			
 
				+                  "orth": "the",
			
 
				+                  "ner": "O",
			
 
				+                  "id": 32
			
 
				+                },
			
 
				+                {
			
 
				+                  "head": 11,
			
 
				+                  "dep": "nsubjpass",
			
 
				+                  "tag": "NN",
			
 
				+                  "orth": "role",
			
 
				+                  "ner": "O",
			
 
				+                  "id": 33
			
 
				+                },
			
 
				+                {
			
 
				+                  "head": -1,
			
 
				+                  "dep": "prep",
			
 
				+                  "tag": "IN",
			
 
				+                  "orth": "of",
			
 
				+                  "ner": "O",
			
 
				+                  "id": 34
			
 
				+                },
			
 
				+                {
			
 
				+                  "head": -1,
			
 
				+                  "dep": "pobj",
			
 
				+                  "tag": "NNP",
			
 
				+                  "orth": "Celimene",
			
 
				+                  "ner": "U-PERSON",
			
 
				+                  "id": 35
			
 
				+                },
			
 
				+                {
			
 
				+                  "head": -3,
			
 
				+                  "dep": "punct",
			
 
				+                  "tag": ",",
			
 
				+                  "orth": ",",
			
 
				+                  "ner": "O",
			
 
				+                  "id": 36
			
 
				+                },
			
 
				+                {
			
 
				+                  "head": -4,
			
 
				+                  "dep": "acl",
			
 
				+                  "tag": "VBN",
			
 
				+                  "orth": "played",
			
 
				+                  "ner": "O",
			
 
				+                  "id": 37
			
 
				+                },
			
 
				+                {
			
 
				+                  "head": -1,
			
 
				+                  "dep": "agent",
			
 
				+                  "tag": "IN",
			
 
				+                  "orth": "by",
			
 
				+                  "ner": "O",
			
 
				+                  "id": 38
			
 
				+                },
			
 
				+                {
			
 
				+                  "head": 1,
			
 
				+                  "dep": "compound",
			
 
				+                  "tag": "NNP",
			
 
				+                  "orth": "Kim",
			
 
				+                  "ner": "B-PERSON",
			
 
				+                  "id": 39
			
 
				+                },
			
 
				+                {
			
 
				+                  "head": -2,
			
 
				+                  "dep": "pobj",
			
 
				+                  "tag": "NNP",
			
 
				+                  "orth": "Cattrall",
			
 
				+                  "ner": "L-PERSON",
			
 
				+                  "id": 40
			
 
				+                },
			
 
				+                {
			
 
				+                  "head": -8,
			
 
				+                  "dep": "punct",
			
 
				+                  "tag": ",",
			
 
				+                  "orth": ",",
			
 
				+                  "ner": "O",
			
 
				+                  "id": 41
			
 
				+                },
			
 
				+                {
			
 
				+                  "head": 2,
			
 
				+                  "dep": "auxpass",
			
 
				+                  "tag": "VBD",
			
 
				+                  "orth": "was",
			
 
				+                  "ner": "O",
			
 
				+                  "id": 42
			
 
				+                },
			
 
				+                {
			
 
				+                  "head": 1,
			
 
				+                  "dep": "advmod",
			
 
				+                  "tag": "RB",
			
 
				+                  "orth": "mistakenly",
			
 
				+                  "ner": "O",
			
 
				+                  "id": 43
			
 
				+                },
			
 
				+                {
			
 
				+                  "head": 0,
			
 
				+                  "dep": "root",
			
 
				+                  "tag": "VBN",
			
 
				+                  "orth": "attributed",
			
 
				+                  "ner": "O",
			
 
				+                  "id": 44
			
 
				+                },
			
 
				+                {
			
 
				+                  "head": -1,
			
 
				+                  "dep": "prep",
			
 
				+                  "tag": "IN",
			
 
				+                  "orth": "to",
			
 
				+                  "ner": "O",
			
 
				+                  "id": 45
			
 
				+                },
			
 
				+                {
			
 
				+                  "head": 1,
			
 
				+                  "dep": "compound",
			
 
				+                  "tag": "NNP",
			
 
				+                  "orth": "Christina",
			
 
				+                  "ner": "B-PERSON",
			
 
				+                  "id": 46
			
 
				+                },
			
 
				+                {
			
 
				+                  "head": -2,
			
 
				+                  "dep": "pobj",
			
 
				+                  "tag": "NNP",
			
 
				+                  "orth": "Haag",
			
 
				+                  "ner": "L-PERSON",
			
 
				+                  "id": 47
			
 
				+                },
			
 
				+                {
			
 
				+                  "head": -4,
			
 
				+                  "dep": "punct",
			
 
				+                  "tag": ".",
			
 
				+                  "orth": ".",
			
 
				+                  "ner": "O",
			
 
				+                  "id": 48
			
 
				+                }
			
 
				+              ],
			
 
				+              "brackets": [
			
 
				+                {
			
 
				+                  "first": 2,
			
 
				+                  "last": 3,
			
 
				+                  "label": "NML"
			
 
				+                },
			
 
				+                {
			
 
				+                  "first": 1,
			
 
				+                  "last": 4,
			
 
				+                  "label": "NP"
			
 
				+                },
			
 
				+                {
			
 
				+                  "first": 7,
			
 
				+                  "last": 8,
			
 
				+                  "label": "NP-TTL"
			
 
				+                },
			
 
				+                {
			
 
				+                  "first": 11,
			
 
				+                  "last": 12,
			
 
				+                  "label": "NP"
			
 
				+                },
			
 
				+                {
			
 
				+                  "first": 11,
			
 
				+                  "last": 14,
			
 
				+                  "label": "NP"
			
 
				+                },
			
 
				+                {
			
 
				+                  "first": 10,
			
 
				+                  "last": 14,
			
 
				+                  "label": "PP-LOC"
			
 
				+                },
			
 
				+                {
			
 
				+                  "first": 6,
			
 
				+                  "last": 14,
			
 
				+                  "label": "NP"
			
 
				+                },
			
 
				+                {
			
 
				+                  "first": 5,
			
 
				+                  "last": 14,
			
 
				+                  "label": "PP"
			
 
				+                },
			
 
				+                {
			
 
				+                  "first": 1,
			
 
				+                  "last": 14,
			
 
				+                  "label": "NP"
			
 
				+                },
			
 
				+                {
			
 
				+                  "first": 17,
			
 
				+                  "last": 18,
			
 
				+                  "label": "NP-SBJ"
			
 
				+                },
			
 
				+                {
			
 
				+                  "first": 20,
			
 
				+                  "last": 21,
			
 
				+                  "label": "NP"
			
 
				+                },
			
 
				+                {
			
 
				+                  "first": 23,
			
 
				+                  "last": 24,
			
 
				+                  "label": "NP"
			
 
				+                },
			
 
				+                {
			
 
				+                  "first": 22,
			
 
				+                  "last": 24,
			
 
				+                  "label": "PP-LOC"
			
 
				+                },
			
 
				+                {
			
 
				+                  "first": 19,
			
 
				+                  "last": 24,
			
 
				+                  "label": "VP"
			
 
				+                },
			
 
				+                {
			
 
				+                  "first": 17,
			
 
				+                  "last": 24,
			
 
				+                  "label": "S-HLN"
			
 
				+                },
			
 
				+                {
			
 
				+                  "first": 27,
			
 
				+                  "last": 29,
			
 
				+                  "label": "NP-TMP"
			
 
				+                },
			
 
				+                {
			
 
				+                  "first": 15,
			
 
				+                  "last": 30,
			
 
				+                  "label": "NP"
			
 
				+                },
			
 
				+                {
			
 
				+                  "first": 1,
			
 
				+                  "last": 30,
			
 
				+                  "label": "NP"
			
 
				+                },
			
 
				+                {
			
 
				+                  "first": 0,
			
 
				+                  "last": 30,
			
 
				+                  "label": "PP-LOC"
			
 
				+                },
			
 
				+                {
			
 
				+                  "first": 32,
			
 
				+                  "last": 33,
			
 
				+                  "label": "NP"
			
 
				+                },
			
 
				+                {
			
 
				+                  "first": 35,
			
 
				+                  "last": 35,
			
 
				+                  "label": "NP"
			
 
				+                },
			
 
				+                {
			
 
				+                  "first": 34,
			
 
				+                  "last": 35,
			
 
				+                  "label": "PP"
			
 
				+                },
			
 
				+                {
			
 
				+                  "first": 32,
			
 
				+                  "last": 35,
			
 
				+                  "label": "NP"
			
 
				+                },
			
 
				+                {
			
 
				+                  "first": 39,
			
 
				+                  "last": 40,
			
 
				+                  "label": "NP-LGS"
			
 
				+                },
			
 
				+                {
			
 
				+                  "first": 38,
			
 
				+                  "last": 40,
			
 
				+                  "label": "PP"
			
 
				+                },
			
 
				+                {
			
 
				+                  "first": 37,
			
 
				+                  "last": 40,
			
 
				+                  "label": "VP"
			
 
				+                },
			
 
				+                {
			
 
				+                  "first": 32,
			
 
				+                  "last": 41,
			
 
				+                  "label": "NP-SBJ-2"
			
 
				+                },
			
 
				+                {
			
 
				+                  "first": 43,
			
 
				+                  "last": 43,
			
 
				+                  "label": "ADVP-MNR"
			
 
				+                },
			
 
				+                {
			
 
				+                  "first": 46,
			
 
				+                  "last": 47,
			
 
				+                  "label": "NP"
			
 
				+                },
			
 
				+                {
			
 
				+                  "first": 45,
			
 
				+                  "last": 47,
			
 
				+                  "label": "PP-CLR"
			
 
				+                },
			
 
				+                {
			
 
				+                  "first": 44,
			
 
				+                  "last": 47,
			
 
				+                  "label": "VP"
			
 
				+                },
			
 
				+                {
			
 
				+                  "first": 42,
			
 
				+                  "last": 47,
			
 
				+                  "label": "VP"
			
 
				+                },
			
 
				+                {
			
 
				+                  "first": 0,
			
 
				+                  "last": 48,
			
 
				+                  "label": "S"
			
 
				+                }
			
 
				+              ]
			
 
				+            },
			
 
				+            {
			
 
				+              "tokens": [
			
 
				+                {
			
 
				+                  "head": 1,
			
 
				+                  "dep": "compound",
			
 
				+                  "tag": "NNP",
			
 
				+                  "orth": "Ms.",
			
 
				+                  "ner": "O",
			
 
				+                  "id": 0
			
 
				+                },
			
 
				+                {
			
 
				+                  "head": 1,
			
 
				+                  "dep": "nsubj",
			
 
				+                  "tag": "NNP",
			
 
				+                  "orth": "Haag",
			
 
				+                  "ner": "U-PERSON",
			
 
				+                  "id": 1
			
 
				+                },
			
 
				+                {
			
 
				+                  "head": 0,
			
 
				+                  "dep": "root",
			
 
				+                  "tag": "VBZ",
			
 
				+                  "orth": "plays",
			
 
				+                  "ner": "O",
			
 
				+                  "id": 2
			
 
				+                },
			
 
				+                {
			
 
				+                  "head": -1,
			
 
				+                  "dep": "dobj",
			
 
				+                  "tag": "NNP",
			
 
				+                  "orth": "Elianti",
			
 
				+                  "ner": "U-PERSON",
			
 
				+                  "id": 3
			
 
				+                },
			
 
				+                {
			
 
				+                  "head": -2,
			
 
				+                  "dep": "punct",
			
 
				+                  "tag": ".",
			
 
				+                  "orth": ".",
			
 
				+                  "ner": "O",
			
 
				+                  "id": 4
			
 
				+                }
			
 
				+              ],
			
 
				+              "brackets": [
			
 
				+                {
			
 
				+                  "first": 0,
			
 
				+                  "last": 1,
			
 
				+                  "label": "NP-SBJ"
			
 
				+                },
			
 
				+                {
			
 
				+                  "first": 3,
			
 
				+                  "last": 3,
			
 
				+                  "label": "NP"
			
 
				+                },
			
 
				+                {
			
 
				+                  "first": 2,
			
 
				+                  "last": 3,
			
 
				+                  "label": "VP"
			
 
				+                },
			
 
				+                {
			
 
				+                  "first": 0,
			
 
				+                  "last": 4,
			
 
				+                  "label": "S"
			
 
				+                }
			
 
				+              ]
			
 
				+            }
			
 
				+          ]
			
 
				+        }
			
 
				+      ]
			
 
				+    }
			
 
				+  ]
			
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,13 +5,5 @@ spacy
 
				 ## Package moyen (env 130mo)
			
 
				 # python -m spacy download fr_core_news_md
			
 
				 
			
 
				-## OU
			
 
				-
			
 
				-## Big package (env. 1.3go, pas testé avec spacy 2.0)
			
 
				-# python -m spacy download fr_depvec_web_lg
			
 
				-
			
 
				-## Named entities
			
 
				-# python -m spacy download xx_ent_wiki_sm
			
 
				-
			
 
				 # Microsoft Visual C++ 14.0
			
 
				 
			
--- a/resources/notes.txt
+++ b/resources/notes.txt
@@ -29,6 +29,20 @@ https://medium.com/artists-and-machine-intelligence/adventures-in-narrated-reali
 
				 Apis
			
 
				 http://developer.wordnik.com/
			
 
				 
			
 
				+
			
 
				+CORPUS:
			
 
				+https://www.ortolang.fr
			
 
				+http://www.resourcebook.eu/
			
 
				+https://vlo.clarin.eu
			
 
				+
			
 
				+VISUALISEURS:
			
 
				+https://explosion.ai/demos/displacy-ent
			
 
				+
			
 
				+Training Tool:
			
 
				+https://prodi.gy/
			
 
				+
			
 
				+
			
 
				+
			
 
				 DIVERS
			
 
				 
			
 
				 Nestor: taches de base
			
--- a/visualizer/__init__.py
+++ b/visualizer/__init__.py
--- a/visualizer/exemple.PNG
+++ b/visualizer/exemple.PNG
--- a/visualizer/vizualizer.py
+++ b/visualizer/vizualizer.py
@@ -0,0 +1,10 @@
 
				+from spacy import displacy
			
 
				+
			
 
				+from core.nlp import nlp
			
 
				+
			
 
				+
			
 
				+doc = nlp(u"J'ai trois beau enfants qui se nomment Nahé, Ancelo et Lino")
			
 
				+
			
 
				+options = {'compact': True, 'bg': '#09a3d5',
			
 
				+           'color': 'white', 'font': 'Source Sans Pro'}
			
 
				+displacy.serve(doc, style='dep', options=options)