Skip to content

Commit c1296d7

Browse files
committed
more comprehensible variable names
1 parent ef406ce commit c1296d7

File tree

1 file changed

+14
-14
lines changed

1 file changed

+14
-14
lines changed

pythainlp/summarize/__init__.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -16,31 +16,31 @@ def __init__(self, min_cut=0.1, max_cut=0.9):
1616
self.__max_cut = max_cut
1717
self.__stopwords = set(stopwords.words("thai") + list(punctuation))
1818

19-
def __compute_frequencies(self, word_sent):
20-
freq = defaultdict(int)
21-
for s in word_sent:
22-
for word in s:
19+
def __compute_frequencies(self, word_tokenized_sents):
20+
word_freqs = defaultdict(int)
21+
for sent in word_tokenized_sents:
22+
for word in sent:
2323
if word not in self.__stopwords:
24-
freq[word] += 1
24+
word_freqs[word] += 1
2525

26-
m = float(max(freq.values()))
27-
for w in list(freq):
28-
freq[w] = freq[w] / m
29-
if freq[w] >= self.__max_cut or freq[w] <= self.__min_cut:
30-
del freq[w]
26+
max_freq = float(max(word_freqs.values()))
27+
for w in list(word_freqs):
28+
word_freqs[w] = word_freqs[w] / max_freq
29+
if word_freqs[w] >= self.__max_cut or word_freqs[w] <= self.__min_cut:
30+
del word_freqs[w]
3131

32-
return freq
32+
return word_freqs
3333

3434
def __rank(self, ranking, n):
3535
return nlargest(n, ranking, key=ranking.get)
3636

3737
def summarize(self, text, n, tokenizer):
3838
sents = sent_tokenize(text)
39-
word_sent = [word_tokenize(s, tokenizer) for s in sents]
40-
self.__freq = self.__compute_frequencies(word_sent)
39+
word_tokenized_sents = [word_tokenize(sent, tokenizer) for sent in sents]
40+
self.__freq = self.__compute_frequencies(word_tokenized_sents)
4141
ranking = defaultdict(int)
4242

43-
for i, sent in enumerate(word_sent):
43+
for i, sent in enumerate(word_tokenized_sents):
4444
for w in sent:
4545
if w in self.__freq:
4646
ranking[i] += self.__freq[w]

0 commit comments

Comments
 (0)