-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathner.py
executable file
·43 lines (30 loc) · 975 Bytes
/
ner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Sun Jul 2 17:10:47 2017
@author: thanhan
"""
def extract_ne(sents):
from nltk.tag import StanfordNERTagger
import nltk
st = StanfordNERTagger('ner/english.all.3class.distsim.crf.ser.gz', 'ner/stanford-ner.jar')
sents_tk = []
for sent in sents:
sent_tk = nltk.word_tokenize(sent)
sents_tk.append(sent_tk)
ne = st.tag_sents(sents_tk)
res = []
for sent in ne:
last_tag = "O"
en = ""
sent.append(("", "O"))
for (word, tag) in sent:
if tag == 'O':
if en != "": res.append(en); en = ""
elif last_tag == tag:
en += " " + word
else:
if en != "": res.append(en); en = ""
en = word
last_tag = tag
return (ne, res)