[7-1] Lesk_Algorithm을 이용한 단어중의성 해소
!pip3 install nltk==3.3
import nltk
nltk.download('wordnet')
nltk.download('punkt')
nltk.download('stopwords')
from nltk.corpus import wordnet
from nltk import word_tokenize
from nltk.corpus import stopwords
import sys
def disambiguate(word, sentence, stopwords):
word_senses = wordnet.synsets(word)
best_sense = word_senses[0]
max_overlap = 0
context = set(word_tokenize(sentence))
for sense in word_senses:
signature = tokenized_gloss(sense)
overlap = compute_overlap(signature, context, stopwords)
if overlap > max_overlap:
max_overlap = overlap
best_sense = sense
return best_sense
def tokenized_gloss(sense):
tokens = set(word_tokenize(sense.definition()))
for example in sense.examples():
tokens.union(set(word_tokenize(example)))
return tokens
def compute_overlap(signature, context, stopwords):
gloss = signature.difference(stopwords)
return len(gloss.intersection(context))
stopwords = set(stopwords.words('english'))
sentence = ("They eat a meal")
context = set(word_tokenize(sentence))
word = 'eat'
print("Word :", word)
syn = wordnet.synsets('eat')[1]
print("Sense :", syn.name())
print("Definition :", syn.definition())
print("Sentence :", sentence)
signature = tokenized_gloss(syn)
print(signature)
print(compute_overlap(signature, context, stopwords))
print("Best sense: ", disambiguate(word, sentence, stopwords))
Word : eat
Sense : eat.v.02
Definition : eat a meal; take a meal
Sentence : They eat a meal
{';', 'a', 'meal', 'eat', 'take'}
2
Best sense: Synset('eat.v.02')