@@ -23,6 +23,36 @@ namespace :backfill do
23
23
end
24
24
end
25
25
26
+ task :most_used_attribute_word_counts : :environment do
27
+ word_counts = { }
28
+ Attribute . where ( word_count_cache : nil ) . group ( :value ) . order ( 'count_id DESC' ) . limit ( 500 ) . count ( :id ) . each do |value , count |
29
+ word_count = WordCountAnalyzer ::Counter . new (
30
+ ellipsis : 'no_special_treatment' ,
31
+ hyperlink : 'count_as_one' ,
32
+ contraction : 'count_as_one' ,
33
+ hyphenated_word : 'count_as_one' ,
34
+ date : 'no_special_treatment' ,
35
+ number : 'count' ,
36
+ numbered_list : 'ignore' ,
37
+ xhtml : 'remove' ,
38
+ forward_slash : 'count_as_multiple_except_dates' ,
39
+ backslash : 'count_as_one' ,
40
+ dotted_line : 'ignore' ,
41
+ dashed_line : 'ignore' ,
42
+ underscore : 'ignore' ,
43
+ stray_punctuation : 'ignore'
44
+ ) . count ( value )
45
+
46
+ word_counts [ word_count ] ||= [ ]
47
+ word_counts [ word_count ] . push value
48
+ puts "#{ value } x #{ count } : #{ word_count } words"
49
+ end
50
+
51
+ word_counts . each do |count , values |
52
+ Attribute . where ( word_count_cache : nil , value : values ) . update_all ( word_count_cache : count )
53
+ end
54
+ end
55
+
26
56
desc "Backfill cached word counts on all documents"
27
57
task document_word_count_caches : :environment do
28
58
Document . where ( cached_word_count : nil ) . where . not ( body : [ nil , "" ] ) . find_each ( batch_size : 500 ) do |document |
0 commit comments