transcription
Spooky NLP and Topic Modelling tutorial.ipynb
# from scipy.misc import imread
import imageio
punkt_tab
이라는 리소스를 찾기 위해 발생punkt
토크나이저 일부가 아니기 때문에 발생하는 문제LookupError:
**********************************************************************
Resource punkt_tab not found.
Please use the NLTK Downloader to obtain the resource:
>>> import nltk
>>> nltk.download('punkt_tab')
For more information see: https://www.nltk.org/data.html
Attempted to load tokenizers/punkt_tab/english/
Searched in:
- '/root/nltk_data'
- '/usr/nltk_data'
- '/usr/share/nltk_data'
- '/usr/lib/nltk_data'
- '/usr/share/nltk_data'
- '/usr/local/share/nltk_data'
- '/usr/lib/nltk_data'
- '/usr/local/lib/nltk_data'
**********************************************************************
nltk.download('punkt_tab')
min_df
는 0과 1 사이의 실수 or 1 이상의 정수여야 하는데, 코드에서는 0으로 설정하여 에러 발생sentence = ["I love to eat Burgers",
"I love to eat Fries"]
vectorizer = CountVectorizer(min_df=1) # 1로 수정
sentence_transform = vectorizer.fit_transform(sentence)
print("The features are:\n {}".format(vectorizer.get_feature_names_out()))
print("\nThe vectorized array looks like:\n {}".format(sentence_transform.toarray()))