DP를 이용한 오타교정 알고리즘. 삽입, 변경, 삭제를 통해 두 문자열이 같아지는 최소한의 개수를 구한다.
def levenshtein(s1, s2, debug=False):
if len(s1) < len(s2):
return levenshtein(s2, s1, debug)
if len(s2) == 0:
return len(s1)
previous_row = range(len(s2) + 1)
for i, c1 in enumerate(s1):
current_row = [i + 1]
for j, c2 in enumerate(s2):
insertions = previous_row[j + 1] + 1
deletions = current_row[j] + 1
substitutions = previous_row[j] + (c1 != c2)
current_row.append(min(insertions, deletions, substitutions))
if debug:
print(current_row[1:])
previous_row = current_row
return previous_row[-1]
s1 = '나이키'
s2 = '너이키'
print(levenshtein(s1, s2, debug=True))