File Coverage

File:lib/CheckSpelling/SpellingCollator.pm
Coverage:83.4%

linestmtbrancondsubtimecode
1#! -*-perl-*-
2
3package CheckSpelling::SpellingCollator;
4
5our $VERSION='0.1.0';
6
1
1
1
109810
2
27
use warnings;
7
1
1
1
2
0
28
use File::Path qw(remove_tree);
8
1
1
1
191
1
2177
use CheckSpelling::Util;
9
10my %letter_map;
11my %ignored_event_map;
12my $disable_word_collating;
13
14sub get_field {
15
28
23
  my ($record, $field) = @_;
16
28
317
  return 0 unless $record =~ (/\b$field:\s*(\d+)/);
17
16
22
  return $1;
18}
19
20sub get_array {
21
2
2
  my ($record, $field) = @_;
22
2
15
  return () unless $record =~ (/\b$field: \[([^\]]+)\]/);
23
2
2
  my $values = $1;
24
2
4
  return split /\s*,\s*/, $values;
25}
26
27sub maybe {
28
7
4
  my ($next, $value) = @_;
29
7
14
  $next = $value unless $next && $next < $value;
30
7
5
  return $next;
31}
32
33my %expected = ();
34sub expect_item {
35
98
56
  my ($item, $value) = @_;
36
98
43
  our %expected;
37
98
31
  my $next;
38
98
97
  if (defined $expected{$item}) {
39
26
16
    $next = $expected{$item};
40
26
18
    $next = $value if $value < $next;
41  } elsif ($item =~ /^([A-Z])(.*)/) {
42
12
9
    $item = $1 . lc $2;
43
12
11
    if (defined $expected{$item}) {
44
2
1
      $next = $expected{$item};
45
2
2
      $next = maybe($next, $value + .1);
46    } else {
47
10
6
      $item = lc $item;
48
10
11
      if (defined $expected{$item}) {
49
5
2
        $next = $expected{$item};
50
5
5
        $next = maybe($next, $value + .2);
51      }
52    }
53  }
54
98
97
  return 0 unless defined $next;
55
33
18
  $expected{$item} = $next;
56
33
65
  return $value;
57}
58
59sub skip_item {
60
52
30
  my ($word) = @_;
61
52
26
  return 1 if expect_item($word, 1);
62
32
19
  my $key = lc $word;
63
32
22
  return 2 if expect_item($key, 2);
64
32
46
  if ($key =~ /.s$/) {
65
2
4
    if ($key =~ /ies$/) {
66
1
2
      $key =~ s/ies$/y/;
67    } else {
68
1
2
      $key =~ s/s$//;
69    }
70  } elsif ($key =~ /^(.+[^aeiou])ed$/) {
71
1
12
    $key = $1;
72  } elsif ($key =~ /^(.+)'[ds]$/) {
73
6
3
    $key = $1;
74  } else {
75
23
18
    return 0;
76  }
77
9
7
  return 3 if expect_item($key, 3);
78
0
0
  return 0;
79}
80
81sub should_skip_warning {
82
69
44
  my ($warning) = @_;
83
69
84
  if ($warning =~ /\(([-\w]+)\)$/) {
84
68
43
    my ($code) = ($1);
85
68
24
    our %ignored_event_map;
86
68
49
    return 1 if $ignored_event_map{$code};
87  }
88
68
55
  return 0;
89}
90
91sub log_skip_item {
92
48
54
  my ($item, $file, $warning, $unknown_word_limit) = @_;
93
48
34
  return 1 if should_skip_warning $warning;
94
48
30
  return 1 if skip_item($item);
95
19
10
  my $seen_count = $seen{$item};
96
19
12
  if (defined $seen_count) {
97
6
11
    if (!defined $unknown_word_limit || ($seen_count++ < $unknown_word_limit)) {
98
5
21
      print MORE_WARNINGS "$file$warning\n"
99    } else {
100
1
2
      $last_seen{$item} = "$file$warning";
101    }
102
6
7
    $seen{$item} = $seen_count;
103
6
10
    return 1;
104  }
105
13
9
  $seen{$item} = 1;
106
13
13
  return 0;
107}
108
109sub stem_word {
110
22
15
  my ($key) = @_;
111
22
7
  our $disable_word_collating;
112
22
9
  return $key if $disable_word_collating;
113
114
22
28
  if ($key =~ /.s$/) {
115
3
5
    if ($key =~ /ies$/) {
116
1
1
      $key =~ s/ies$/y/;
117    } else {
118
2
3
      $key =~ s/s$//;
119    }
120  } elsif ($key =~ /.[^aeiou]ed$/) {
121
1
3
    $key =~ s/ed$//;
122  }
123
22
39
  return $key;
124}
125
126sub collate_key {
127
77
52
  my ($key) = @_;
128
77
28
  our $disable_word_collating;
129
77
44
  if ($disable_word_collating) {
130
16
18
    $char = lc substr $key, 0, 1;
131  } else {
132
61
39
    $key = lc $key;
133
61
41
    $key =~ s/''+/'/g;
134
61
32
    $key =~ s/'[sd]$//;
135
61
36
    $key =~ s/^[^Ii]?'+(.*)/$1/;
136
61
26
    $key =~ s/(.*?)'$/$1/;
137
61
57
    $char = substr $key, 0, 1;
138  }
139
77
89
  return ($key, $char);
140}
141
142sub load_expect {
143
9
489
  my ($expect) = @_;
144
9
4
  our %expected;
145
9
8
  %expected = ();
146
9
97
  if (open(EXPECT, '<:utf8', $expect)) {
147
9
64
    while ($word = <EXPECT>) {
148
34
62
      $word =~ s/\R//;
149
34
81
      $expected{$word} = 0;
150    }
151
9
27
    close EXPECT;
152  }
153}
154
155sub harmonize_expect {
156
8
4
  our $disable_word_collating;
157
8
3
  our %letter_map;
158
8
4
  our %expected;
159
160
8
12
  for my $word (keys %expected) {
161
31
17
    my ($key, $char) = collate_key $word;
162
31
24
    my %word_map = ();
163
31
34
    next unless defined $letter_map{$char}{$key};
164
15
15
7
23
    %word_map = %{$letter_map{$char}{$key}};
165
15
18
    next if defined $word_map{$word};
166
3
3
    my $words = scalar keys %word_map;
167
3
3
    next if $words > 2;
168
3
2
    if ($word eq $key) {
169
1
1
      next if ($words > 1);
170    }
171
2
2
    delete $expected{$word};
172  }
173}
174
175sub group_related_words {
176
9
6
  our %letter_map;
177
9
2
  our $disable_word_collating;
178
9
9
  return if $disable_word_collating;
179
180  # group related words
181
7
18
  for my $char (sort CheckSpelling::Util::number_biased keys %letter_map) {
182
19
19
8
20
    for my $plural_key (sort keys(%{$letter_map{$char}})) {
183
22
11
      my $key = stem_word $plural_key;
184
22
22
      next if $key eq $plural_key;
185
4
4
      next unless defined $letter_map{$char}{$key};
186
3
3
2
3
      my %word_map = %{$letter_map{$char}{$key}};
187
3
3
2
4
      for $word (keys(%{$letter_map{$char}{$plural_key}})) {
188
3
2
        $word_map{$word} = 1;
189      }
190
3
3
      $letter_map{$char}{$key} = \%word_map;
191
3
4
      delete $letter_map{$char}{$plural_key};
192    }
193  }
194}
195
196sub count_warning {
197
10
10
  my ($warning) = @_;
198
10
5
  our %counters;
199
10
5
  our %ignored_event_map;
200
10
20
  if ($warning =~ /\(([-\w]+)\)$/) {
201
8
6
    my ($code) = ($1);
202
8
7
    next if defined $ignored_event_map{$code};
203
8
10
    ++$counters{$code};
204  }
205}
206
207sub report_timing {
208
0
0
  my ($name, $start_time, $directory, $marker) = @_;
209
0
0
  my $end_time = (stat "$directory/$marker")[9];
210
0
0
  $name =~ s/"/\\"/g;
211
0
0
  print TIMING_REPORT "\"$name\", $start_time, $end_time\n";
212}
213
214sub get_pattern_with_context {
215
18
15
  my ($path) = @_;
216
18
16
  return unless defined $ENV{$path};
217
9
12
  $ENV{$path} =~ /(.*)/;
218
9
53
  return unless open ITEMS, '<:utf8', $1;
219
220
9
7
  my @items;
221
9
4
  my $context = '';
222
9
46
  while (<ITEMS>) {
223
2
2
    my $pattern = $_;
224
2
4
    if ($pattern =~ /^#/) {
225
1
2
      if ($pattern =~ /^# /) {
226
1
2
        $context .= $pattern;
227      } else {
228
0
0
        $context = '';
229      }
230
1
2
      next;
231    }
232
1
1
    chomp $pattern;
233
1
5
    unless ($pattern =~ /./) {
234
0
0
      $context = '';
235
0
0
      next;
236    }
237
1
2
    push @items, $context.$pattern;
238
1
3
    $context = '';
239  }
240
9
22
  close ITEMS;
241
9
12
  return @items;
242}
243
244sub summarize_totals {
245
18
16
  my ($formatter, $path, $items, $totals, $file_counts) = @_;
246
18
18
9
20
  return unless @{$totals};
247
1
21
  return unless open my $fh, '>:utf8', $path;
248
1
1
0
2
  my $totals_count = scalar(@{$totals}) - 1;
249
1
0
  my @indices;
250
1
2
  if ($file_counts) {
251    @indices = sort {
252
0
0
0
0
      $totals->[$b] <=> $totals->[$a] ||
253      $file_counts->[$b] <=> $file_counts->[$a]
254    } 0 .. $totals_count;
255  } else {
256    @indices = sort {
257
1
0
1
0
      $totals->[$b] <=> $totals->[$a]
258    } 0 .. $totals_count;
259  }
260
1
2
  for my $i (@indices) {
261
1
5
    last unless $totals->[$i] > 0;
262
1
1
    my $rule_with_context = $items->[$i];
263
1
1
    my ($description, $rule);
264
1
4
    if ($rule_with_context =~ /^(.*\n)([^\n]+)$/s) {
265
1
1
      ($description, $rule) = ($1, $2);
266    } else {
267
0
0
      ($description, $rule) = ('', $rule_with_context);
268    }
269
1
2
    print $fh $formatter->(
270      $totals->[$i],
271      ($file_counts ? " file-count: $file_counts->[$i]" : ""),
272      $description,
273      $rule
274    );
275  }
276
1
38
  close $fh;
277}
278
279sub main {
280
9
16181
  my @directories;
281  my @cleanup_directories;
282
9
0
  my @check_file_paths;
283
284
9
10
  my $early_warnings = CheckSpelling::Util::get_file_from_env('early_warnings', '/dev/null');
285
9
6
  my $warning_output = CheckSpelling::Util::get_file_from_env('warning_output', '/dev/stderr');
286
9
9
  my $more_warnings = CheckSpelling::Util::get_file_from_env('more_warnings', '/dev/stderr');
287
9
9
  my $counter_summary = CheckSpelling::Util::get_file_from_env('counter_summary', '/dev/stderr');
288
9
10
  my $ignored_events = CheckSpelling::Util::get_file_from_env('ignored_events', '');
289
9
9
  if ($ignored_events) {
290
2
1
    our %ignored_event_map;
291
2
3
    for my $event (split /,/, $ignored_events) {
292
2
2
      $ignored_event_map{$event} = 1;
293    }
294  }
295
9
11
  my $should_exclude_file = CheckSpelling::Util::get_file_from_env('should_exclude_file', '/dev/null');
296
9
10
  my $unknown_word_limit = CheckSpelling::Util::get_val_from_env('unknown_word_limit', undef);
297
9
3
  my $unknown_file_word_limit = CheckSpelling::Util::get_val_from_env('unknown_file_word_limit', undef);
298
9
7
  my $candidate_example_limit = CheckSpelling::Util::get_file_from_env('INPUT_CANDIDATE_EXAMPLE_LIMIT', '3');
299
9
9
  my $disable_flags = CheckSpelling::Util::get_file_from_env('INPUT_DISABLE_CHECKS', '');
300
9
8
  my $only_check_changed_files = CheckSpelling::Util::get_file_from_env('INPUT_ONLY_CHECK_CHANGED_FILES', '');
301
9
9
  my $disable_noisy_file = $disable_flags =~ /(?:^|,|\s)noisy-file(?:,|\s|$)/;
302
9
25
  our $disable_word_collating = $only_check_changed_files || $disable_flags =~ /(?:^|,|\s)word-collating(?:,|\s|$)/;
303
9
5
  my $file_list = CheckSpelling::Util::get_file_from_env('check_file_names', '');
304
9
6
  my $timing_report = CheckSpelling::Util::get_file_from_env('timing_report', '');
305
9
4
  my ($start_time, $end_time);
306
307
9
177
  open WARNING_OUTPUT, '>:utf8', $warning_output;
308
9
151
  open MORE_WARNINGS, '>:utf8', $more_warnings;
309
9
112
  open COUNTER_SUMMARY, '>:utf8', $counter_summary;
310
9
86
  open SHOULD_EXCLUDE, '>:utf8', $should_exclude_file;
311
9
7
  if ($timing_report) {
312
0
0
    open TIMING_REPORT, '>:utf8', $timing_report;
313
0
0
    print TIMING_REPORT "file, start, finish\n";
314  }
315
316
9
9
  my @candidates = get_pattern_with_context('candidates_path');
317
9
8
  my @candidate_totals = (0) x scalar @candidates;
318
9
7
  my @candidate_file_counts = (0) x scalar @candidates;
319
320
9
2
  my @forbidden = get_pattern_with_context('forbidden_path');
321
9
11
  my @forbidden_totals = (0) x scalar @forbidden;
322
323
9
7
  my @delayed_warnings;
324
9
27
  our %letter_map = ();
325
326
9
5
  my %file_map = ();
327
328
9
23
  for my $directory (<>) {
329
12
10
    chomp $directory;
330
12
22
    next unless $directory =~ /^(.*)$/;
331
12
10
    $directory = $1;
332
12
38
    unless (-e $directory) {
333
1
3
      print STDERR "Could not find: $directory\n";
334
1
1
      next;
335    }
336
11
23
    unless (-d $directory) {
337
1
12
      print STDERR "Not a directory: $directory\n";
338
1
2
      next;
339    }
340
341    # if there's no filename, we can't report
342
10
69
    next unless open(NAME, '<:utf8', "$directory/name");
343
9
49
    my $file=<NAME>;
344
9
18
    close NAME;
345
346
9
21
    $file_map{$file} = $directory;
347  }
348
349
9
16
  for my $file (sort keys %file_map) {
350
9
8
    my $directory = $file_map{$file};
351
9
5
    if ($timing_report) {
352
0
0
      $start_time = (stat "$directory/name")[9];
353    }
354
355
9
49
    if (-e "$directory/skipped") {
356
1
6
      open SKIPPED, '<:utf8', "$directory/skipped";
357
1
5
      my $reason=<SKIPPED>;
358
1
3
      close SKIPPED;
359
1
1
      chomp $reason;
360
1
7
      push @delayed_warnings, "$file:1:1 ... 1, Warning - Skipping `$file` because $reason\n";
361
1
2
      print SHOULD_EXCLUDE "$file\n";
362
1
1
      push @cleanup_directories, $directory;
363
1
1
      report_timing($file, $start_time, $directory, 'skipped') if ($timing_report);
364
1
1
      next;
365    }
366
367    # stats isn't written if there was nothing interesting in the file
368
8
27
    unless (-s "$directory/stats") {
369
1
0
      push @directories, $directory;
370
1
1
      report_timing($file, $start_time, $directory, 'warnings') if ($timing_report);
371
1
2
      next;
372    }
373
374
7
8
    if ($file eq $file_list) {
375
1
6
      open FILE_LIST, '<:utf8', $file_list;
376
1
1
      push @check_file_paths, '0 placeholder';
377
1
6
      for my $check_file_path (<FILE_LIST>) {
378
4
4
        chomp $check_file_path;
379
4
3
        push @check_file_paths, $check_file_path;
380      }
381
1
2
      close FILE_LIST;
382    }
383
384
7
6
    my ($words, $unrecognized, $unknown, $unique);
385
386    {
387
7
7
1
38
      open STATS, '<:utf8', "$directory/stats";
388
7
29
      my $stats=<STATS>;
389
7
13
      close STATS;
390
7
7
      $words=get_field($stats, 'words');
391
7
3
      $unrecognized=get_field($stats, 'unrecognized');
392
7
4
      $unknown=get_field($stats, 'unknown');
393
7
7
      $unique=get_field($stats, 'unique');
394
7
6
      my @candidate_list;
395
7
5
      if (@candidate_totals) {
396
0
0
        @candidate_list=get_array($stats, 'candidates');
397
0
0
        my @lines=get_array($stats, 'candidate_lines');
398
0
0
        if (@candidate_list) {
399
0
0
          for (my $i=0; $i < scalar @candidate_list; $i++) {
400
0
0
            my $hits = $candidate_list[$i];
401
0
0
            if ($hits) {
402
0
0
              $candidate_totals[$i] += $hits;
403
0
0
              if ($candidate_file_counts[$i]++ < $candidate_example_limit) {
404
0
0
                my $pattern = (split /\n/,$candidates[$i])[-1];
405
0
0
                my $position = $lines[$i];
406
0
0
                $position =~ s/:(\d+)$/ ... $1/;
407
0
0
                my $wrapped = CheckSpelling::Util::wrap_in_backticks($pattern);
408
0
0
                push @delayed_warnings, "$file:$position, Notice - Line matches candidate pattern $wrapped (candidate-pattern)\n";
409              }
410            }
411          }
412        }
413      }
414
7
9
      if (@forbidden_totals) {
415
1
2
        @forbidden_list=get_array($stats, 'forbidden');
416
1
2
        my @lines=get_array($stats, 'forbidden_lines');
417
1
1
        if (@forbidden_list) {
418
1
1
          for (my $i=0; $i < scalar @forbidden_list; $i++) {
419
1
1
            my $hits = $forbidden_list[$i];
420
1
1
            if ($hits) {
421
1
3
              $forbidden_totals[$i] += $hits;
422            }
423          }
424        }
425      }
426      #print STDERR "$file (unrecognized: $unrecognized; unique: $unique; unknown: $unknown, words: $words, candidates: [".join(", ", @candidate_list)."])\n";
427    }
428
429
7
6
    report_timing($file, $start_time, $directory, 'unknown') if ($timing_report);
430    # These heuristics are very new and need tuning/feedback
431
7
7
    if (
432        ($unknown > $unique)
433        # || ($unrecognized > $words / 2)
434    ) {
435
0
0
      unless ($disable_noisy_file) {
436
0
0
        if ($file ne $file_list) {
437
0
0
          push @delayed_warnings, "$file:1:1 ... 1, Warning - Skipping `$file` because it seems to have more noise ($unknown) than unique words ($unique) (total: $unrecognized / $words). (noisy-file)\n";
438
0
0
          print SHOULD_EXCLUDE "$file\n";
439        } else {
440
0
0
          push @delayed_warnings, "$file:1:1 ... 1, Warning - Skipping file list because there seems to be more noise ($unknown) than unique words ($unique) (total: $unrecognized / $words). (noisy-file-list)\n";
441        }
442
0
0
        push @directories, $directory;
443
0
0
        next;
444      }
445    }
446
7
30
    unless (-s "$directory/unknown") {
447
1
1
      push @directories, $directory;
448
1
2
      next;
449    }
450
6
41
    open UNKNOWN, '<:utf8', "$directory/unknown";
451
6
52
    for $token (<UNKNOWN>) {
452
49
51
      $token =~ s/\R//;
453
49
46
      next unless $token =~ /./;
454
46
54
      my ($key, $char) = collate_key $token;
455
46
54
      $letter_map{$char} = () unless defined $letter_map{$char};
456
46
25
      my %word_map = ();
457
46
14
38
16
      %word_map = %{$letter_map{$char}{$key}} if defined $letter_map{$char}{$key};
458
46
50
      $word_map{$token} = 1;
459
46
61
      $letter_map{$char}{$key} = \%word_map;
460    }
461
6
21
    close UNKNOWN;
462
6
9
    push @directories, $directory;
463  }
464
9
20
  close SHOULD_EXCLUDE;
465
9
6
  close TIMING_REPORT if $timing_report;
466
467  summarize_totals(
468    sub {
469
0
0
      my ($hits, $files, $context, $pattern) = @_;
470
0
0
      return "# hit-count: $hits$files\n$context$pattern\n\n",
471    },
472
9
53
    CheckSpelling::Util::get_file_from_env('candidate_summary', '/dev/stderr'),
473    \@candidates,
474    \@candidate_totals,
475    \@candidate_file_counts,
476  );
477
478  summarize_totals(
479    sub {
480
1
1
      my (undef, undef, $context, $pattern) = @_;
481
1
3
      $context =~ s/^# //gm;
482
1
0
      chomp $context;
483
1
1
      my $details;
484
1
3
      if ($context =~ /^(.*?)$(.*)/ms) {
485
1
2
        ($context, $details) = ($1, $2);
486
1
1
        $details = "\n$details" if $details;
487      }
488
1
0
      $context = 'Pattern' unless $context;
489
1
5
      return "#### $context$details\n```\n$pattern\n```\n\n";
490    },
491
9
30
    CheckSpelling::Util::get_file_from_env('forbidden_summary', '/dev/stderr'),
492    \@forbidden,
493    \@forbidden_totals,
494  );
495
496
9
26
  group_related_words;
497
498
9
9
  if (defined $ENV{'expect'}) {
499
8
8
    $ENV{'expect'} =~ /(.*)/;
500
8
7
    load_expect $1;
501
8
9
    harmonize_expect;
502  }
503
504
9
8
  my %seen = ();
505
9
1
  our %counters;
506
9
8
  %counters = ();
507
508
9
29
  if (-s $early_warnings) {
509
1
6
    open WARNINGS, '<:utf8', $early_warnings;
510
1
8
    for my $warning (<WARNINGS>) {
511
1
1
      chomp $warning;
512
1
1
      count_warning $warning;
513
1
1
      next if should_skip_warning $warning;
514
1
4
      print WARNING_OUTPUT "$warning\n";
515    }
516
1
3
    close WARNINGS;
517  }
518
519
9
5
  my %last_seen;
520  my %unknown_file_word_count;
521
9
7
  for my $directory (@directories) {
522
8
26
    next unless (-s "$directory/warnings");
523
7
52
    next unless open(NAME, '<:utf8', "$directory/name");
524
7
24
    my $file=<NAME>;
525
7
15
    close NAME;
526
7
8
    my $is_file_list = $file eq $file_list;
527
7
40
    open WARNINGS, '<:utf8', "$directory/warnings";
528
7
19
    if (!$is_file_list) {
529
6
47
      for $warning (<WARNINGS>) {
530
49
33
        chomp $warning;
531
49
88
        if ($warning =~ m/:(\d+):(\d+ \.\.\. \d+): `(.*)`/) {
532
48
44
          my ($line, $range, $item) = ($1, $2, $3);
533
48
31
          my $wrapped = CheckSpelling::Util::wrap_in_backticks($item);
534
48
99
          $warning =~ s/:\d+:\d+ \.\.\. \d+: `.*`/:$line:$range, Warning - $wrapped is not a recognized word\. \(unrecognized-spelling\)/;
535
48
33
          next if log_skip_item($item, $file, $warning, $unknown_word_limit);
536        } else {
537
1
2
          if ($warning =~ /\`(.*?)\` in line\. \(token-is-substring\)/) {
538
0
0
            next if skip_item($1);
539          }
540
1
2
          count_warning $warning;
541        }
542
14
11
        next if should_skip_warning $warning;
543
14
41
        print WARNING_OUTPUT "$file$warning\n";
544      }
545    } else {
546
1
11
      for $warning (<WARNINGS>) {
547
6
4
        chomp $warning;
548
6
12
        next unless $warning =~ s/^:(\d+)/:1/;
549
6
7
        $file = $check_file_paths[$1];
550
6
17
        if ($warning =~ s/:(\d+ \.\.\. \d+): `(.*)`/:$1, Warning - `$2` is not a recognized word\. \(check-file-path\)/) {
551
4
4
          next if skip_item($2);
552
4
2
          if (defined $unknown_file_word_limit) {
553
4
7
            next if ++$unknown_file_word_count{$2} > $unknown_file_word_limit;
554          }
555        }
556
5
5
        next if should_skip_warning $warning;
557
4
13
        print WARNING_OUTPUT "$file$warning\n";
558
4
3
        count_warning $warning;
559      }
560    }
561
7
28
    close WARNINGS;
562  }
563
9
169
  close MORE_WARNINGS;
564
565
9
7
  for my $warning (@delayed_warnings) {
566
1
1
    next if should_skip_warning $warning;
567
1
1
    count_warning $warning;
568
1
2
    print WARNING_OUTPUT $warning;
569  }
570
9
8
  if (defined $unknown_word_limit) {
571
1
1
    for my $warned_word (sort keys %last_seen) {
572
0
0
      my $warning_count = $seen{$warned_word};
573
0
0
      next unless $warning_count >= $unknown_word_limit;
574
0
0
      my $warning = $last_seen{$warned_word};
575
0
0
      $warning =~ s/\Q. (unrecognized-spelling)\E/ -- found $warning_count times. (limited-references)\n/;
576
0
0
      next if should_skip_warning $warning;
577
0
0
      print WARNING_OUTPUT $warning;
578
0
0
      count_warning $warning;
579    }
580  }
581
9
229
  close WARNING_OUTPUT;
582
583
9
8
  if (%counters) {
584
2
1
    my $continue='';
585
2
4
    print COUNTER_SUMMARY "{\n";
586
2
6
    for my $code (sort keys %counters) {
587
4
7
      print COUNTER_SUMMARY qq<$continue"$code": $counters{$code}\n>;
588
4
3
      $continue=',';
589    }
590
2
2
    print COUNTER_SUMMARY "}\n";
591  }
592
9
64
  close COUNTER_SUMMARY;
593
594  # display the current unknown
595
9
24
  for my $char (sort keys %letter_map) {
596
34
34
18
61
    for $key (sort CheckSpelling::Util::case_biased keys(%{$letter_map{$char}})) {
597
29
29
14
39
      my %word_map = %{$letter_map{$char}{$key}};
598
29
25
      my @words = keys(%word_map);
599
29
18
      if (scalar(@words) > 1) {
600
13
20
12
71
        print $key." (".(join ", ", sort { length($a) <=> length($b) || $a cmp $b } @words).")";
601      } else {
602
16
48
        print $words[0];
603      }
604
29
118
      print "\n";
605    }
606  }
607}
608
6091;