training_data_example.json 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641
  1. [
  2. {
  3. "id": "wsj_0200",
  4. "paragraphs": [
  5. {
  6. "raw": "In an Oct. 19 review of \"The Misanthrope\" at Chicago's Goodman Theatre (\"Revitalized Classics Take the Stage in Windy City,\" Leisure & Arts), the role of Celimene, played by Kim Cattrall, was mistakenly attributed to Christina Haag. Ms. Haag plays Elianti.",
  7. "sentences": [
  8. {
  9. "tokens": [
  10. {
  11. "head": 44,
  12. "dep": "prep",
  13. "tag": "IN",
  14. "orth": "In",
  15. "ner": "O",
  16. "id": 0
  17. },
  18. {
  19. "head": 3,
  20. "dep": "det",
  21. "tag": "DT",
  22. "orth": "an",
  23. "ner": "O",
  24. "id": 1
  25. },
  26. {
  27. "head": 2,
  28. "dep": "nmod",
  29. "tag": "NNP",
  30. "orth": "Oct.",
  31. "ner": "B-DATE",
  32. "id": 2
  33. },
  34. {
  35. "head": -1,
  36. "dep": "nummod",
  37. "tag": "CD",
  38. "orth": "19",
  39. "ner": "L-DATE",
  40. "id": 3
  41. },
  42. {
  43. "head": -4,
  44. "dep": "pobj",
  45. "tag": "NN",
  46. "orth": "review",
  47. "ner": "O",
  48. "id": 4
  49. },
  50. {
  51. "head": -1,
  52. "dep": "prep",
  53. "tag": "IN",
  54. "orth": "of",
  55. "ner": "O",
  56. "id": 5
  57. },
  58. {
  59. "head": 2,
  60. "dep": "punct",
  61. "tag": "``",
  62. "orth": "``",
  63. "ner": "O",
  64. "id": 6
  65. },
  66. {
  67. "head": 1,
  68. "dep": "det",
  69. "tag": "DT",
  70. "orth": "The",
  71. "ner": "B-WORK_OF_ART",
  72. "id": 7
  73. },
  74. {
  75. "head": -3,
  76. "dep": "pobj",
  77. "tag": "NN",
  78. "orth": "Misanthrope",
  79. "ner": "L-WORK_OF_ART",
  80. "id": 8
  81. },
  82. {
  83. "head": -1,
  84. "dep": "punct",
  85. "tag": "''",
  86. "orth": "''",
  87. "ner": "O",
  88. "id": 9
  89. },
  90. {
  91. "head": -2,
  92. "dep": "prep",
  93. "tag": "IN",
  94. "orth": "at",
  95. "ner": "O",
  96. "id": 10
  97. },
  98. {
  99. "head": 3,
  100. "dep": "poss",
  101. "tag": "NNP",
  102. "orth": "Chicago",
  103. "ner": "U-GPE",
  104. "id": 11
  105. },
  106. {
  107. "head": -1,
  108. "dep": "case",
  109. "tag": "POS",
  110. "orth": "'s",
  111. "ner": "O",
  112. "id": 12
  113. },
  114. {
  115. "head": 1,
  116. "dep": "compound",
  117. "tag": "NNP",
  118. "orth": "Goodman",
  119. "ner": "B-FAC",
  120. "id": 13
  121. },
  122. {
  123. "head": -4,
  124. "dep": "pobj",
  125. "tag": "NNP",
  126. "orth": "Theatre",
  127. "ner": "L-FAC",
  128. "id": 14
  129. },
  130. {
  131. "head": 4,
  132. "dep": "punct",
  133. "tag": "-LRB-",
  134. "orth": "(",
  135. "ner": "O",
  136. "id": 15
  137. },
  138. {
  139. "head": 3,
  140. "dep": "punct",
  141. "tag": "``",
  142. "orth": "``",
  143. "ner": "O",
  144. "id": 16
  145. },
  146. {
  147. "head": 1,
  148. "dep": "amod",
  149. "tag": "VBN",
  150. "orth": "Revitalized",
  151. "ner": "B-WORK_OF_ART",
  152. "id": 17
  153. },
  154. {
  155. "head": 1,
  156. "dep": "nsubj",
  157. "tag": "NNS",
  158. "orth": "Classics",
  159. "ner": "I-WORK_OF_ART",
  160. "id": 18
  161. },
  162. {
  163. "head": -15,
  164. "dep": "appos",
  165. "tag": "VBP",
  166. "orth": "Take",
  167. "ner": "I-WORK_OF_ART",
  168. "id": 19
  169. },
  170. {
  171. "head": 1,
  172. "dep": "det",
  173. "tag": "DT",
  174. "orth": "the",
  175. "ner": "I-WORK_OF_ART",
  176. "id": 20
  177. },
  178. {
  179. "head": -2,
  180. "dep": "dobj",
  181. "tag": "NN",
  182. "orth": "Stage",
  183. "ner": "I-WORK_OF_ART",
  184. "id": 21
  185. },
  186. {
  187. "head": -3,
  188. "dep": "prep",
  189. "tag": "IN",
  190. "orth": "in",
  191. "ner": "I-WORK_OF_ART",
  192. "id": 22
  193. },
  194. {
  195. "head": 1,
  196. "dep": "compound",
  197. "tag": "NNP",
  198. "orth": "Windy",
  199. "ner": "I-WORK_OF_ART",
  200. "id": 23
  201. },
  202. {
  203. "head": -2,
  204. "dep": "pobj",
  205. "tag": "NNP",
  206. "orth": "City",
  207. "ner": "L-WORK_OF_ART",
  208. "id": 24
  209. },
  210. {
  211. "head": -6,
  212. "dep": "punct",
  213. "tag": ",",
  214. "orth": ",",
  215. "ner": "O",
  216. "id": 25
  217. },
  218. {
  219. "head": -7,
  220. "dep": "punct",
  221. "tag": "''",
  222. "orth": "''",
  223. "ner": "O",
  224. "id": 26
  225. },
  226. {
  227. "head": -8,
  228. "dep": "npadvmod",
  229. "tag": "NN",
  230. "orth": "Leisure",
  231. "ner": "B-ORG",
  232. "id": 27
  233. },
  234. {
  235. "head": -1,
  236. "dep": "cc",
  237. "tag": "CC",
  238. "orth": "&",
  239. "ner": "I-ORG",
  240. "id": 28
  241. },
  242. {
  243. "head": -2,
  244. "dep": "conj",
  245. "tag": "NNS",
  246. "orth": "Arts",
  247. "ner": "L-ORG",
  248. "id": 29
  249. },
  250. {
  251. "head": -11,
  252. "dep": "punct",
  253. "tag": "-RRB-",
  254. "orth": ")",
  255. "ner": "O",
  256. "id": 30
  257. },
  258. {
  259. "head": 13,
  260. "dep": "punct",
  261. "tag": ",",
  262. "orth": ",",
  263. "ner": "O",
  264. "id": 31
  265. },
  266. {
  267. "head": 1,
  268. "dep": "det",
  269. "tag": "DT",
  270. "orth": "the",
  271. "ner": "O",
  272. "id": 32
  273. },
  274. {
  275. "head": 11,
  276. "dep": "nsubjpass",
  277. "tag": "NN",
  278. "orth": "role",
  279. "ner": "O",
  280. "id": 33
  281. },
  282. {
  283. "head": -1,
  284. "dep": "prep",
  285. "tag": "IN",
  286. "orth": "of",
  287. "ner": "O",
  288. "id": 34
  289. },
  290. {
  291. "head": -1,
  292. "dep": "pobj",
  293. "tag": "NNP",
  294. "orth": "Celimene",
  295. "ner": "U-PERSON",
  296. "id": 35
  297. },
  298. {
  299. "head": -3,
  300. "dep": "punct",
  301. "tag": ",",
  302. "orth": ",",
  303. "ner": "O",
  304. "id": 36
  305. },
  306. {
  307. "head": -4,
  308. "dep": "acl",
  309. "tag": "VBN",
  310. "orth": "played",
  311. "ner": "O",
  312. "id": 37
  313. },
  314. {
  315. "head": -1,
  316. "dep": "agent",
  317. "tag": "IN",
  318. "orth": "by",
  319. "ner": "O",
  320. "id": 38
  321. },
  322. {
  323. "head": 1,
  324. "dep": "compound",
  325. "tag": "NNP",
  326. "orth": "Kim",
  327. "ner": "B-PERSON",
  328. "id": 39
  329. },
  330. {
  331. "head": -2,
  332. "dep": "pobj",
  333. "tag": "NNP",
  334. "orth": "Cattrall",
  335. "ner": "L-PERSON",
  336. "id": 40
  337. },
  338. {
  339. "head": -8,
  340. "dep": "punct",
  341. "tag": ",",
  342. "orth": ",",
  343. "ner": "O",
  344. "id": 41
  345. },
  346. {
  347. "head": 2,
  348. "dep": "auxpass",
  349. "tag": "VBD",
  350. "orth": "was",
  351. "ner": "O",
  352. "id": 42
  353. },
  354. {
  355. "head": 1,
  356. "dep": "advmod",
  357. "tag": "RB",
  358. "orth": "mistakenly",
  359. "ner": "O",
  360. "id": 43
  361. },
  362. {
  363. "head": 0,
  364. "dep": "root",
  365. "tag": "VBN",
  366. "orth": "attributed",
  367. "ner": "O",
  368. "id": 44
  369. },
  370. {
  371. "head": -1,
  372. "dep": "prep",
  373. "tag": "IN",
  374. "orth": "to",
  375. "ner": "O",
  376. "id": 45
  377. },
  378. {
  379. "head": 1,
  380. "dep": "compound",
  381. "tag": "NNP",
  382. "orth": "Christina",
  383. "ner": "B-PERSON",
  384. "id": 46
  385. },
  386. {
  387. "head": -2,
  388. "dep": "pobj",
  389. "tag": "NNP",
  390. "orth": "Haag",
  391. "ner": "L-PERSON",
  392. "id": 47
  393. },
  394. {
  395. "head": -4,
  396. "dep": "punct",
  397. "tag": ".",
  398. "orth": ".",
  399. "ner": "O",
  400. "id": 48
  401. }
  402. ],
  403. "brackets": [
  404. {
  405. "first": 2,
  406. "last": 3,
  407. "label": "NML"
  408. },
  409. {
  410. "first": 1,
  411. "last": 4,
  412. "label": "NP"
  413. },
  414. {
  415. "first": 7,
  416. "last": 8,
  417. "label": "NP-TTL"
  418. },
  419. {
  420. "first": 11,
  421. "last": 12,
  422. "label": "NP"
  423. },
  424. {
  425. "first": 11,
  426. "last": 14,
  427. "label": "NP"
  428. },
  429. {
  430. "first": 10,
  431. "last": 14,
  432. "label": "PP-LOC"
  433. },
  434. {
  435. "first": 6,
  436. "last": 14,
  437. "label": "NP"
  438. },
  439. {
  440. "first": 5,
  441. "last": 14,
  442. "label": "PP"
  443. },
  444. {
  445. "first": 1,
  446. "last": 14,
  447. "label": "NP"
  448. },
  449. {
  450. "first": 17,
  451. "last": 18,
  452. "label": "NP-SBJ"
  453. },
  454. {
  455. "first": 20,
  456. "last": 21,
  457. "label": "NP"
  458. },
  459. {
  460. "first": 23,
  461. "last": 24,
  462. "label": "NP"
  463. },
  464. {
  465. "first": 22,
  466. "last": 24,
  467. "label": "PP-LOC"
  468. },
  469. {
  470. "first": 19,
  471. "last": 24,
  472. "label": "VP"
  473. },
  474. {
  475. "first": 17,
  476. "last": 24,
  477. "label": "S-HLN"
  478. },
  479. {
  480. "first": 27,
  481. "last": 29,
  482. "label": "NP-TMP"
  483. },
  484. {
  485. "first": 15,
  486. "last": 30,
  487. "label": "NP"
  488. },
  489. {
  490. "first": 1,
  491. "last": 30,
  492. "label": "NP"
  493. },
  494. {
  495. "first": 0,
  496. "last": 30,
  497. "label": "PP-LOC"
  498. },
  499. {
  500. "first": 32,
  501. "last": 33,
  502. "label": "NP"
  503. },
  504. {
  505. "first": 35,
  506. "last": 35,
  507. "label": "NP"
  508. },
  509. {
  510. "first": 34,
  511. "last": 35,
  512. "label": "PP"
  513. },
  514. {
  515. "first": 32,
  516. "last": 35,
  517. "label": "NP"
  518. },
  519. {
  520. "first": 39,
  521. "last": 40,
  522. "label": "NP-LGS"
  523. },
  524. {
  525. "first": 38,
  526. "last": 40,
  527. "label": "PP"
  528. },
  529. {
  530. "first": 37,
  531. "last": 40,
  532. "label": "VP"
  533. },
  534. {
  535. "first": 32,
  536. "last": 41,
  537. "label": "NP-SBJ-2"
  538. },
  539. {
  540. "first": 43,
  541. "last": 43,
  542. "label": "ADVP-MNR"
  543. },
  544. {
  545. "first": 46,
  546. "last": 47,
  547. "label": "NP"
  548. },
  549. {
  550. "first": 45,
  551. "last": 47,
  552. "label": "PP-CLR"
  553. },
  554. {
  555. "first": 44,
  556. "last": 47,
  557. "label": "VP"
  558. },
  559. {
  560. "first": 42,
  561. "last": 47,
  562. "label": "VP"
  563. },
  564. {
  565. "first": 0,
  566. "last": 48,
  567. "label": "S"
  568. }
  569. ]
  570. },
  571. {
  572. "tokens": [
  573. {
  574. "head": 1,
  575. "dep": "compound",
  576. "tag": "NNP",
  577. "orth": "Ms.",
  578. "ner": "O",
  579. "id": 0
  580. },
  581. {
  582. "head": 1,
  583. "dep": "nsubj",
  584. "tag": "NNP",
  585. "orth": "Haag",
  586. "ner": "U-PERSON",
  587. "id": 1
  588. },
  589. {
  590. "head": 0,
  591. "dep": "root",
  592. "tag": "VBZ",
  593. "orth": "plays",
  594. "ner": "O",
  595. "id": 2
  596. },
  597. {
  598. "head": -1,
  599. "dep": "dobj",
  600. "tag": "NNP",
  601. "orth": "Elianti",
  602. "ner": "U-PERSON",
  603. "id": 3
  604. },
  605. {
  606. "head": -2,
  607. "dep": "punct",
  608. "tag": ".",
  609. "orth": ".",
  610. "ner": "O",
  611. "id": 4
  612. }
  613. ],
  614. "brackets": [
  615. {
  616. "first": 0,
  617. "last": 1,
  618. "label": "NP-SBJ"
  619. },
  620. {
  621. "first": 3,
  622. "last": 3,
  623. "label": "NP"
  624. },
  625. {
  626. "first": 2,
  627. "last": 3,
  628. "label": "VP"
  629. },
  630. {
  631. "first": 0,
  632. "last": 4,
  633. "label": "S"
  634. }
  635. ]
  636. }
  637. ]
  638. }
  639. ]
  640. }
  641. ]