ontonotes_preprocess.py 3.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859
  1. #coding:utf8
  2. import pickle
  3. from on.corpora.tree import tree
  4. parse_s = '''(TOP (IP (NP-SBJ (NP-APP (NP-PN (NR 菲律宾))
  5. (NP (NN 总统)))
  6. (NP-PN (NR 埃斯特拉达)))
  7. (VP (NP-TMP (NT 2号))
  8. (PP-MNR (P 透过)
  9. (NP (NP-PN (NR 马尼拉))
  10. (NP (NN 当地)
  11. (NN 电台))))
  12. (VP (VSB (VV 宣布)
  13. (VV 说))
  14. (PU ,)
  15. (IP-OBJ (PP-LOC (P 在)
  16. (LCP (NP (CP (WHNP-2 (-NONE- *OP*))
  17. (CP (IP (NP-SBJ (-NONE- *T*-2))
  18. (VP (ADVP (AD 仍))
  19. (VP (VV 遭到)
  20. (IP (NP-SBJ (CP (WHNP-1 (-NONE- *OP*))
  21. (CP (IP (NP-SBJ (-NONE- *T*-1))
  22. (VP (VA 激进)))
  23. (DEC 的)))
  24. (NP (NP (NP-APP (NN 回教)
  25. (NN 阿卜))
  26. (NP-PN (NR 沙耶夫)))
  27. (NP (NN 组织))))
  28. (VP (VV 羁押)
  29. (PP (P 在)
  30. (NP (NP (NP-PN (NR 非国))
  31. (NP (NN 南部)))
  32. (NP-PN (NR 和落岛)))))))))
  33. (DEC 的)))
  34. (NP (QP (CD 16)
  35. (CLP (M 名)))
  36. (NP (NN 人质))))
  37. (LC 当中)))
  38. (PU ,)
  39. (NP-SBJ (NN 军方))
  40. (VP (ADVP (AD 已经))
  41. (VP (VRD (VV 营救)
  42. (VV 出))
  43. (AS 了)
  44. (NP-OBJ (QP (CD 11)
  45. (CLP (M 名)))
  46. (NP (NP-PN (NR 菲律宾))
  47. (NP (NN 人质)))))))))
  48. (PU 。)))
  49. '''
  50. a = tree.from_string(parse_s)
  51. print(a.children)
  52. print("===")
  53. print(a.children[0].children)