[8-1] NLTK를 이용한 개체명 인식
import nltk
nltk.download('punkt')
nltk.download('words')
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
sentence = "Prime Minister Boris Johnson had previously said the UK would leave by 31 October."
tokens = nltk.word_tokenize(sentence)
print(tokens)
['Prime', 'Minister', 'Boris', 'Johnson', 'had', 'previously', 'said', 'the', 'UK', 'would', 'leave', 'by', '31', 'October', '.']
tagged = nltk.pos_tag(tokens)
print(tagged)
[('Prime', 'NNP'), ('Minister', 'NNP'), ('Boris', 'NNP'), ('Johnson', 'NNP'), ('had', 'VBD'), ('previously', 'RB'), ('said', 'VBD'), ('the', 'DT'), ('UK', 'NNP'), ('would', 'MD'), ('leave', 'VB'), ('by', 'IN'), ('31', 'CD'), ('October', 'NNP'), ('.', '.')]
entities = nltk.chunk.ne_chunk(tagged)
print(entities)
(S
Prime/NNP
Minister/NNP
(PERSON Boris/NNP Johnson/NNP)
had/VBD
previously/RB
said/VBD
the/DT
(ORGANIZATION UK/NNP)
would/MD
leave/VB
by/IN
31/CD
October/NNP
./.)