길이보정
def len_penalty(scores, sentences, optimum_len):
"""문장 길이와 최적 길이 사이의 차이에 따른 패널티를 반영한 점수를 반환합니다."""
length = np.array([len(s) for s in sentences])
penalty = np.abs(length - optimum_len) / optimum_len
return np.array(scores) - penalty
Top-K 샘플링
import random
import numpy as np
def softmax(x):
f_x = np.exp(x) / np.sum(np.exp(x))
return f_x
def top_k_sampling(score_list: List[int], weight: int = 1):
score_list = [i * weight for i in score_list]
softmax_list = softmax(score_list)
pick = random.choices(range(len(score_list)), weights=softmax_list)
return pick[0]
자카드 유사도 계산(분모 변형)
def jaccard_similarity(sentence1: List[str], sentence2: List[str]):
A = set(sentence1)
B = set(sentence2)
nominator = A.intersection(B)
if len(A) >= len(B):
denominator = A
else:
denominator = B
try:
similarity = len(nominator) / len(denominator)
except ZeroDivisionError:
similarity = 0.0
return similarity