jupyter notebook에서 import nltk 수행하여 패키지를 다운로드받는다.
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import RegexpTokenizer
data2 = nltk.word_tokenizer(data1)
from nlkt.tokenize import WordPunctTokenizer
tokenizer = WordPunctTokenizer()
new_data2 = tokenizer.tokenize(data1)
from nltk.tokenize.regexp import RegexpTokenizer
tokenizer = RegexpTokenizer("[\w']+")
new_data3 = tokenizer.tokenize(data1)
data3 = nlkt.pos_tag(data2)
nltk의 기본 stopwords 사전 활용.
data4 = [ eash_word for each_word in new_data3 if each_word not in stopwords.words() ]
from collections import Counter
data5 = Counter(data4)
data6 = data5.most_common(100)
data7 = dict(data6)
g_data4 = nltk.Text(data4, name="Graph")
g_data4.plot(10) //상위 10개 키워드