from collections import Counter |
|
def words(text): return re.findall(r'\w+', text.lower()) |
|
WORDS = Counter(words(open('big.txt').read())) |
|
def P(word, N=sum(WORDS.values())): |
"Probability of `word`." |
return WORDS[word] / N |
|
def correction(word): |
"Most probable spelling correction for word." |
return max(candidates(word), key=P) |
|
def candidates(word): |
"Generate possible spelling corrections for word." |
return (known([word]) or known(edits1(word)) or known(edits2(word)) or [word]) |
|
def known(words): |
"The subset of `words` that appear in the dictionary of WORDS." |
return set(w for w in words if w in WORDS) |
|
def edits1(word): |
"All edits that are one edit away from `word`." |
letters = 'abcdefghijklmnopqrstuvwxyz' |
splits = [(word[:i], word[i:]) for i in range(len(word) + 1)] |
deletes = [L + R[1:] for L, R in splits if R] |
transposes = [L + R[1] + R[0] + R[2:] for L, R in splits if len(R)>1] |
replaces = [L + c + R[1:] for L, R in splits if R for c in letters] |
inserts = [L + c + R for L, R in splits for c in letters] |
return set(deletes + transposes + replaces + inserts) |
|
def edits2(word): |
"All edits that are two edits away from `word`." |
import re |
No hay comentarios.:
Publicar un comentario