@@ -16,31 +16,31 @@ def __init__(self, min_cut=0.1, max_cut=0.9):
16
16
self .__max_cut = max_cut
17
17
self .__stopwords = set (stopwords .words ("thai" ) + list (punctuation ))
18
18
19
- def __compute_frequencies (self , word_sent ):
20
- freq = defaultdict (int )
21
- for s in word_sent :
22
- for word in s :
19
+ def __compute_frequencies (self , word_tokenized_sents ):
20
+ word_freqs = defaultdict (int )
21
+ for sent in word_tokenized_sents :
22
+ for word in sent :
23
23
if word not in self .__stopwords :
24
- freq [word ] += 1
24
+ word_freqs [word ] += 1
25
25
26
- m = float (max (freq .values ()))
27
- for w in list (freq ):
28
- freq [w ] = freq [w ] / m
29
- if freq [w ] >= self .__max_cut or freq [w ] <= self .__min_cut :
30
- del freq [w ]
26
+ max_freq = float (max (word_freqs .values ()))
27
+ for w in list (word_freqs ):
28
+ word_freqs [w ] = word_freqs [w ] / max_freq
29
+ if word_freqs [w ] >= self .__max_cut or word_freqs [w ] <= self .__min_cut :
30
+ del word_freqs [w ]
31
31
32
- return freq
32
+ return word_freqs
33
33
34
34
def __rank (self , ranking , n ):
35
35
return nlargest (n , ranking , key = ranking .get )
36
36
37
37
def summarize (self , text , n , tokenizer ):
38
38
sents = sent_tokenize (text )
39
- word_sent = [word_tokenize (s , tokenizer ) for s in sents ]
40
- self .__freq = self .__compute_frequencies (word_sent )
39
+ word_tokenized_sents = [word_tokenize (sent , tokenizer ) for sent in sents ]
40
+ self .__freq = self .__compute_frequencies (word_tokenized_sents )
41
41
ranking = defaultdict (int )
42
42
43
- for i , sent in enumerate (word_sent ):
43
+ for i , sent in enumerate (word_tokenized_sents ):
44
44
for w in sent :
45
45
if w in self .__freq :
46
46
ranking [i ] += self .__freq [w ]
0 commit comments