File Coverage

File:lib/CheckSpelling/SpellingCollator.pm
Coverage:83.4%

linestmtbrancondsubtimecode
1#! -*-perl-*-
2
3package CheckSpelling::SpellingCollator;
4
5our $VERSION='0.1.0';
6
1
1
1
97938
3
21
use warnings;
7
1
1
1
2
1
22
use File::Path qw(remove_tree);
8
1
1
1
156
1
2000
use CheckSpelling::Util;
9
10my %letter_map;
11my %ignored_event_map;
12my $disable_word_collating;
13
14sub get_field {
15
28
23
  my ($record, $field) = @_;
16
28
318
  return 0 unless $record =~ (/\b$field:\s*(\d+)/);
17
16
17
  return $1;
18}
19
20sub get_array {
21
2
3
  my ($record, $field) = @_;
22
2
19
  return () unless $record =~ (/\b$field: \[([^\]]+)\]/);
23
2
19
  my $values = $1;
24
2
12
  return split /\s*,\s*/, $values;
25}
26
27sub maybe {
28
7
3
  my ($next, $value) = @_;
29
7
16
  $next = $value unless $next && $next < $value;
30
7
5
  return $next;
31}
32
33my %expected = ();
34sub expect_item {
35
98
54
  my ($item, $value) = @_;
36
98
39
  our %expected;
37
98
48
  my $next;
38
98
88
  if (defined $expected{$item}) {
39
26
10
    $next = $expected{$item};
40
26
25
    $next = $value if $value < $next;
41  } elsif ($item =~ /^([A-Z])(.*)/) {
42
12
10
    $item = $1 . lc $2;
43
12
12
    if (defined $expected{$item}) {
44
2
1
      $next = $expected{$item};
45
2
2
      $next = maybe($next, $value + .1);
46    } else {
47
10
5
      $item = lc $item;
48
10
8
      if (defined $expected{$item}) {
49
5
2
        $next = $expected{$item};
50
5
18
        $next = maybe($next, $value + .2);
51      }
52    }
53  }
54
98
83
  return 0 unless defined $next;
55
33
26
  $expected{$item} = $next;
56
33
72
  return $value;
57}
58
59sub skip_item {
60
52
27
  my ($word) = @_;
61
52
30
  return 1 if expect_item($word, 1);
62
32
14
  my $key = lc $word;
63
32
18
  return 2 if expect_item($key, 2);
64
32
44
  if ($key =~ /.s$/) {
65
2
2
    if ($key =~ /ies$/) {
66
1
11
      $key =~ s/ies$/y/;
67    } else {
68
1
2
      $key =~ s/s$//;
69    }
70  } elsif ($key =~ /^(.+[^aeiou])ed$/) {
71
1
1
    $key = $1;
72  } elsif ($key =~ /^(.+)'[ds]$/) {
73
6
3
    $key = $1;
74  } else {
75
23
19
    return 0;
76  }
77
9
5
  return 3 if expect_item($key, 3);
78
0
0
  return 0;
79}
80
81sub should_skip_warning {
82
69
43
  my ($warning) = @_;
83
69
80
  if ($warning =~ /\(([-\w]+)\)$/) {
84
68
39
    my ($code) = ($1);
85
68
26
    our %ignored_event_map;
86
68
50
    return 1 if $ignored_event_map{$code};
87  }
88
68
47
  return 0;
89}
90
91sub log_skip_item {
92
48
55
  my ($item, $file, $warning, $unknown_word_limit) = @_;
93
48
29
  return 1 if should_skip_warning $warning;
94
48
29
  return 1 if skip_item($item);
95
19
14
  my $seen_count = $seen{$item};
96
19
10
  if (defined $seen_count) {
97
6
9
    if (!defined $unknown_word_limit || ($seen_count++ < $unknown_word_limit)) {
98
5
17
      print MORE_WARNINGS "$file$warning\n"
99    } else {
100
1
1
      $last_seen{$item} = "$file$warning";
101    }
102
6
5
    $seen{$item} = $seen_count;
103
6
11
    return 1;
104  }
105
13
10
  $seen{$item} = 1;
106
13
15
  return 0;
107}
108
109sub stem_word {
110
22
16
  my ($key) = @_;
111
22
7
  our $disable_word_collating;
112
22
10
  return $key if $disable_word_collating;
113
114
22
21
  if ($key =~ /.s$/) {
115
3
5
    if ($key =~ /ies$/) {
116
1
1
      $key =~ s/ies$/y/;
117    } else {
118
2
3
      $key =~ s/s$//;
119    }
120  } elsif ($key =~ /.[^aeiou]ed$/) {
121
1
2
    $key =~ s/ed$//;
122  }
123
22
17
  return $key;
124}
125
126sub collate_key {
127
77
45
  my ($key) = @_;
128
77
33
  our $disable_word_collating;
129
77
36
  if ($disable_word_collating) {
130
16
14
    $char = lc substr $key, 0, 1;
131  } else {
132
61
43
    $key = lc $key;
133
61
31
    $key =~ s/''+/'/g;
134
61
39
    $key =~ s/'[sd]$//;
135
61
29
    $key =~ s/^[^Ii]?'+(.*)/$1/;
136
61
32
    $key =~ s/(.*?)'$/$1/;
137
61
45
    $char = substr $key, 0, 1;
138  }
139
77
93
  return ($key, $char);
140}
141
142sub load_expect {
143
9
436
  my ($expect) = @_;
144
9
5
  our %expected;
145
9
8
  %expected = ();
146
9
64
  if (open(EXPECT, '<:utf8', $expect)) {
147
9
38
    while ($word = <EXPECT>) {
148
34
46
      $word =~ s/\R//;
149
34
68
      $expected{$word} = 0;
150    }
151
9
22
    close EXPECT;
152  }
153}
154
155sub harmonize_expect {
156
8
3
  our $disable_word_collating;
157
8
6
  our %letter_map;
158
8
2
  our %expected;
159
160
8
11
  for my $word (keys %expected) {
161
31
18
    my ($key, $char) = collate_key $word;
162
31
11
    my %word_map = ();
163
31
30
    next unless defined $letter_map{$char}{$key};
164
15
15
11
14
    %word_map = %{$letter_map{$char}{$key}};
165
15
17
    next if defined $word_map{$word};
166
3
2
    my $words = scalar keys %word_map;
167
3
4
    next if $words > 2;
168
3
3
    if ($word eq $key) {
169
1
1
      next if ($words > 1);
170    }
171
2
3
    delete $expected{$word};
172  }
173}
174
175sub group_related_words {
176
9
4
  our %letter_map;
177
9
5
  our $disable_word_collating;
178
9
6
  return if $disable_word_collating;
179
180  # group related words
181
7
17
  for my $char (sort CheckSpelling::Util::number_biased keys %letter_map) {
182
19
19
9
19
    for my $plural_key (sort keys(%{$letter_map{$char}})) {
183
22
9
      my $key = stem_word $plural_key;
184
22
22
      next if $key eq $plural_key;
185
4
3
      next unless defined $letter_map{$char}{$key};
186
3
3
2
4
      my %word_map = %{$letter_map{$char}{$key}};
187
3
3
2
4
      for $word (keys(%{$letter_map{$char}{$plural_key}})) {
188
3
3
        $word_map{$word} = 1;
189      }
190
3
1
      $letter_map{$char}{$key} = \%word_map;
191
3
4
      delete $letter_map{$char}{$plural_key};
192    }
193  }
194}
195
196sub count_warning {
197
10
7
  my ($warning) = @_;
198
10
6
  our %counters;
199
10
3
  our %ignored_event_map;
200
10
16
  if ($warning =~ /\(([-\w]+)\)$/) {
201
8
7
    my ($code) = ($1);
202
8
6
    next if defined $ignored_event_map{$code};
203
8
9
    ++$counters{$code};
204  }
205}
206
207sub report_timing {
208
0
0
  my ($name, $start_time, $directory, $marker) = @_;
209
0
0
  my $end_time = (stat "$directory/$marker")[9];
210
0
0
  $name =~ s/"/\\"/g;
211
0
0
  print TIMING_REPORT "\"$name\", $start_time, $end_time\n";
212}
213
214sub get_pattern_with_context {
215
18
14
  my ($path) = @_;
216
18
20
  return unless defined $ENV{$path};
217
9
8
  $ENV{$path} =~ /(.*)/;
218
9
33
  return unless open ITEMS, '<:utf8', $1;
219
220
9
6
  my @items;
221
9
5
  my $context = '';
222
9
38
  while (<ITEMS>) {
223
2
3
    my $pattern = $_;
224
2
3
    if ($pattern =~ /^#/) {
225
1
2
      if ($pattern =~ /^# /) {
226
1
2
        $context .= $pattern;
227      } else {
228
0
0
        $context = '';
229      }
230
1
2
      next;
231    }
232
1
1
    chomp $pattern;
233
1
4
    unless ($pattern =~ /./) {
234
0
0
      $context = '';
235
0
0
      next;
236    }
237
1
2
    push @items, $context.$pattern;
238
1
2
    $context = '';
239  }
240
9
17
  close ITEMS;
241
9
14
  return @items;
242}
243
244sub summarize_totals {
245
18
17
  my ($formatter, $path, $items, $totals, $file_counts) = @_;
246
18
18
5
23
  return unless @{$totals};
247
1
12
  return unless open my $fh, '>:utf8', $path;
248
1
1
1
1
  my $totals_count = scalar(@{$totals}) - 1;
249
1
1
  my @indices;
250
1
1
  if ($file_counts) {
251    @indices = sort {
252
0
0
0
0
      $totals->[$b] <=> $totals->[$a] ||
253      $file_counts->[$b] <=> $file_counts->[$a]
254    } 0 .. $totals_count;
255  } else {
256    @indices = sort {
257
1
0
1
0
      $totals->[$b] <=> $totals->[$a]
258    } 0 .. $totals_count;
259  }
260
1
2
  for my $i (@indices) {
261
1
1
    last unless $totals->[$i] > 0;
262
1
0
    my $rule_with_context = $items->[$i];
263
1
2
    my ($description, $rule);
264
1
3
    if ($rule_with_context =~ /^(.*\n)([^\n]+)$/s) {
265
1
1
      ($description, $rule) = ($1, $2);
266    } else {
267
0
0
      ($description, $rule) = ('', $rule_with_context);
268    }
269
1
2
    print $fh $formatter->(
270      $totals->[$i],
271      ($file_counts ? " file-count: $file_counts->[$i]" : ""),
272      $description,
273      $rule
274    );
275  }
276
1
27
  close $fh;
277}
278
279sub main {
280
9
13235
  my @directories;
281  my @cleanup_directories;
282
9
0
  my @check_file_paths;
283
284
9
11
  my $early_warnings = CheckSpelling::Util::get_file_from_env('early_warnings', '/dev/null');
285
9
7
  my $warning_output = CheckSpelling::Util::get_file_from_env('warning_output', '/dev/stderr');
286
9
7
  my $more_warnings = CheckSpelling::Util::get_file_from_env('more_warnings', '/dev/stderr');
287
9
4
  my $counter_summary = CheckSpelling::Util::get_file_from_env('counter_summary', '/dev/stderr');
288
9
6
  my $ignored_events = CheckSpelling::Util::get_file_from_env('ignored_events', '');
289
9
12
  if ($ignored_events) {
290
2
0
    our %ignored_event_map;
291
2
4
    for my $event (split /,/, $ignored_events) {
292
2
2
      $ignored_event_map{$event} = 1;
293    }
294  }
295
9
9
  my $should_exclude_file = CheckSpelling::Util::get_file_from_env('should_exclude_file', '/dev/null');
296
9
8
  my $unknown_word_limit = CheckSpelling::Util::get_val_from_env('unknown_word_limit', undef);
297
9
5
  my $unknown_file_word_limit = CheckSpelling::Util::get_val_from_env('unknown_file_word_limit', undef);
298
9
5
  my $candidate_example_limit = CheckSpelling::Util::get_file_from_env('INPUT_CANDIDATE_EXAMPLE_LIMIT', '3');
299
9
5
  my $disable_flags = CheckSpelling::Util::get_file_from_env('INPUT_DISABLE_CHECKS', '');
300
9
7
  my $only_check_changed_files = CheckSpelling::Util::get_file_from_env('INPUT_ONLY_CHECK_CHANGED_FILES', '');
301
9
7
  my $disable_noisy_file = $disable_flags =~ /(?:^|,|\s)noisy-file(?:,|\s|$)/;
302
9
28
  our $disable_word_collating = $only_check_changed_files || $disable_flags =~ /(?:^|,|\s)word-collating(?:,|\s|$)/;
303
9
6
  my $file_list = CheckSpelling::Util::get_file_from_env('check_file_names', '');
304
9
5
  my $timing_report = CheckSpelling::Util::get_file_from_env('timing_report', '');
305
9
5
  my ($start_time, $end_time);
306
307
9
96
  open WARNING_OUTPUT, '>:utf8', $warning_output;
308
9
64
  open MORE_WARNINGS, '>:utf8', $more_warnings;
309
9
53
  open COUNTER_SUMMARY, '>:utf8', $counter_summary;
310
9
45
  open SHOULD_EXCLUDE, '>:utf8', $should_exclude_file;
311
9
8
  if ($timing_report) {
312
0
0
    open TIMING_REPORT, '>:utf8', $timing_report;
313
0
0
    print TIMING_REPORT "file, start, finish\n";
314  }
315
316
9
10
  my @candidates = get_pattern_with_context('candidates_path');
317
9
9
  my @candidate_totals = (0) x scalar @candidates;
318
9
6
  my @candidate_file_counts = (0) x scalar @candidates;
319
320
9
3
  my @forbidden = get_pattern_with_context('forbidden_path');
321
9
6
  my @forbidden_totals = (0) x scalar @forbidden;
322
323
9
3
  my @delayed_warnings;
324
9
20
  our %letter_map = ();
325
326
9
7
  my %file_map = ();
327
328
9
20
  for my $directory (<>) {
329
12
9
    chomp $directory;
330
12
16
    next unless $directory =~ /^(.*)$/;
331
12
11
    $directory = $1;
332
12
22
    unless (-e $directory) {
333
1
2
      print STDERR "Could not find: $directory\n";
334
1
1
      next;
335    }
336
11
16
    unless (-d $directory) {
337
1
12
      print STDERR "Not a directory: $directory\n";
338
1
1
      next;
339    }
340
341    # if there's no filename, we can't report
342
10
47
    next unless open(NAME, '<:utf8', "$directory/name");
343
9
36
    my $file=<NAME>;
344
9
10
    close NAME;
345
346
9
18
    $file_map{$file} = $directory;
347  }
348
349
9
15
  for my $file (sort keys %file_map) {
350
9
9
    my $directory = $file_map{$file};
351
9
6
    if ($timing_report) {
352
0
0
      $start_time = (stat "$directory/name")[9];
353    }
354
355
9
27
    if (-e "$directory/skipped") {
356
1
4
      open SKIPPED, '<:utf8', "$directory/skipped";
357
1
6
      my $reason=<SKIPPED>;
358
1
2
      close SKIPPED;
359
1
1
      chomp $reason;
360
1
3
      push @delayed_warnings, "$file:1:1 ... 1, Warning - Skipping `$file` because $reason\n";
361
1
6
      print SHOULD_EXCLUDE "$file\n";
362
1
1
      push @cleanup_directories, $directory;
363
1
1
      report_timing($file, $start_time, $directory, 'skipped') if ($timing_report);
364
1
1
      next;
365    }
366
367    # stats isn't written if there was nothing interesting in the file
368
8
17
    unless (-s "$directory/stats") {
369
1
1
      push @directories, $directory;
370
1
1
      report_timing($file, $start_time, $directory, 'warnings') if ($timing_report);
371
1
1
      next;
372    }
373
374
7
8
    if ($file eq $file_list) {
375
1
3
      open FILE_LIST, '<:utf8', $file_list;
376
1
1
      push @check_file_paths, '0 placeholder';
377
1
3
      for my $check_file_path (<FILE_LIST>) {
378
4
4
        chomp $check_file_path;
379
4
3
        push @check_file_paths, $check_file_path;
380      }
381
1
2
      close FILE_LIST;
382    }
383
384
7
2
    my ($words, $unrecognized, $unknown, $unique);
385
386    {
387
7
7
5
21
      open STATS, '<:utf8', "$directory/stats";
388
7
14
      my $stats=<STATS>;
389
7
10
      close STATS;
390
7
4
      $words=get_field($stats, 'words');
391
7
5
      $unrecognized=get_field($stats, 'unrecognized');
392
7
5
      $unknown=get_field($stats, 'unknown');
393
7
4
      $unique=get_field($stats, 'unique');
394
7
4
      my @candidate_list;
395
7
6
      if (@candidate_totals) {
396
0
0
        @candidate_list=get_array($stats, 'candidates');
397
0
0
        my @lines=get_array($stats, 'candidate_lines');
398
0
0
        if (@candidate_list) {
399
0
0
          for (my $i=0; $i < scalar @candidate_list; $i++) {
400
0
0
            my $hits = $candidate_list[$i];
401
0
0
            if ($hits) {
402
0
0
              $candidate_totals[$i] += $hits;
403
0
0
              if ($candidate_file_counts[$i]++ < $candidate_example_limit) {
404
0
0
                my $pattern = (split /\n/,$candidates[$i])[-1];
405
0
0
                my $position = $lines[$i];
406
0
0
                $position =~ s/:(\d+)$/ ... $1/;
407
0
0
                my $wrapped = CheckSpelling::Util::wrap_in_backticks($pattern);
408
0
0
                push @delayed_warnings, "$file:$position, Notice - Line matches candidate pattern $wrapped (candidate-pattern)\n";
409              }
410            }
411          }
412        }
413      }
414
7
8
      if (@forbidden_totals) {
415
1
1
        @forbidden_list=get_array($stats, 'forbidden');
416
1
2
        my @lines=get_array($stats, 'forbidden_lines');
417
1
3
        if (@forbidden_list) {
418
1
2
          for (my $i=0; $i < scalar @forbidden_list; $i++) {
419
1
1
            my $hits = $forbidden_list[$i];
420
1
1
            if ($hits) {
421
1
2
              $forbidden_totals[$i] += $hits;
422            }
423          }
424        }
425      }
426      #print STDERR "$file (unrecognized: $unrecognized; unique: $unique; unknown: $unknown, words: $words, candidates: [".join(", ", @candidate_list)."])\n";
427    }
428
429
7
7
    report_timing($file, $start_time, $directory, 'unknown') if ($timing_report);
430    # These heuristics are very new and need tuning/feedback
431
7
10
    if (
432        ($unknown > $unique)
433        # || ($unrecognized > $words / 2)
434    ) {
435
0
0
      unless ($disable_noisy_file) {
436
0
0
        if ($file ne $file_list) {
437
0
0
          push @delayed_warnings, "$file:1:1 ... 1, Warning - Skipping `$file` because it seems to have more noise ($unknown) than unique words ($unique) (total: $unrecognized / $words). (noisy-file)\n";
438
0
0
          print SHOULD_EXCLUDE "$file\n";
439        } else {
440
0
0
          push @delayed_warnings, "$file:1:1 ... 1, Warning - Skipping file list because there seems to be more noise ($unknown) than unique words ($unique) (total: $unrecognized / $words). (noisy-file-list)\n";
441        }
442
0
0
        push @directories, $directory;
443
0
0
        next;
444      }
445    }
446
7
19
    unless (-s "$directory/unknown") {
447
1
1
      push @directories, $directory;
448
1
1
      next;
449    }
450
6
20
    open UNKNOWN, '<:utf8', "$directory/unknown";
451
6
32
    for $token (<UNKNOWN>) {
452
49
47
      $token =~ s/\R//;
453
49
38
      next unless $token =~ /./;
454
46
39
      my ($key, $char) = collate_key $token;
455
46
46
      $letter_map{$char} = () unless defined $letter_map{$char};
456
46
24
      my %word_map = ();
457
46
14
37
17
      %word_map = %{$letter_map{$char}{$key}} if defined $letter_map{$char}{$key};
458
46
39
      $word_map{$token} = 1;
459
46
56
      $letter_map{$char}{$key} = \%word_map;
460    }
461
6
10
    close UNKNOWN;
462
6
10
    push @directories, $directory;
463  }
464
9
14
  close SHOULD_EXCLUDE;
465
9
7
  close TIMING_REPORT if $timing_report;
466
467  summarize_totals(
468    sub {
469
0
0
      my ($hits, $files, $context, $pattern) = @_;
470
0
0
      return "# hit-count: $hits$files\n$context$pattern\n\n",
471    },
472
9
24
    CheckSpelling::Util::get_file_from_env('candidate_summary', '/dev/stderr'),
473    \@candidates,
474    \@candidate_totals,
475    \@candidate_file_counts,
476  );
477
478  summarize_totals(
479    sub {
480
1
0
      my (undef, undef, $context, $pattern) = @_;
481
1
3
      $context =~ s/^# //gm;
482
1
1
      chomp $context;
483
1
0
      my $details;
484
1
4
      if ($context =~ /^(.*?)$(.*)/ms) {
485
1
4
        ($context, $details) = ($1, $2);
486
1
1
        $details = "\n$details" if $details;
487      }
488
1
1
      $context = 'Pattern' unless $context;
489
1
7
      return "#### $context$details\n```\n$pattern\n```\n\n";
490    },
491
9
28
    CheckSpelling::Util::get_file_from_env('forbidden_summary', '/dev/stderr'),
492    \@forbidden,
493    \@forbidden_totals,
494  );
495
496
9
27
  group_related_words;
497
498
9
10
  if (defined $ENV{'expect'}) {
499
8
5
    $ENV{'expect'} =~ /(.*)/;
500
8
8
    load_expect $1;
501
8
6
    harmonize_expect;
502  }
503
504
9
7
  my %seen = ();
505
9
4
  our %counters;
506
9
5
  %counters = ();
507
508
9
19
  if (-s $early_warnings) {
509
1
4
    open WARNINGS, '<:utf8', $early_warnings;
510
1
5
    for my $warning (<WARNINGS>) {
511
1
1
      chomp $warning;
512
1
1
      count_warning $warning;
513
1
2
      next if should_skip_warning $warning;
514
1
6
      print WARNING_OUTPUT "$warning\n";
515    }
516
1
2
    close WARNINGS;
517  }
518
519
9
6
  my %last_seen;
520  my %unknown_file_word_count;
521
9
7
  for my $directory (@directories) {
522
8
15
    next unless (-s "$directory/warnings");
523
7
25
    next unless open(NAME, '<:utf8', "$directory/name");
524
7
26
    my $file=<NAME>;
525
7
13
    close NAME;
526
7
6
    my $is_file_list = $file eq $file_list;
527
7
19
    open WARNINGS, '<:utf8', "$directory/warnings";
528
7
5
    if (!$is_file_list) {
529
6
34
      for $warning (<WARNINGS>) {
530
49
31
        chomp $warning;
531
49
85
        if ($warning =~ m/:(\d+):(\d+ \.\.\. \d+): `(.*)`/) {
532
48
55
          my ($line, $range, $item) = ($1, $2, $3);
533
48
34
          my $wrapped = CheckSpelling::Util::wrap_in_backticks($item);
534
48
82
          $warning =~ s/:\d+:\d+ \.\.\. \d+: `.*`/:$line:$range, Warning - $wrapped is not a recognized word\. \(unrecognized-spelling\)/;
535
48
34
          next if log_skip_item($item, $file, $warning, $unknown_word_limit);
536        } else {
537
1
1
          if ($warning =~ /\`(.*?)\` in line\. \(token-is-substring\)/) {
538
0
0
            next if skip_item($1);
539          }
540
1
1
          count_warning $warning;
541        }
542
14
6
        next if should_skip_warning $warning;
543
14
39
        print WARNING_OUTPUT "$file$warning\n";
544      }
545    } else {
546
1
6
      for $warning (<WARNINGS>) {
547
6
3
        chomp $warning;
548
6
10
        next unless $warning =~ s/^:(\d+)/:1/;
549
6
6
        $file = $check_file_paths[$1];
550
6
18
        if ($warning =~ s/:(\d+ \.\.\. \d+): `(.*)`/:$1, Warning - `$2` is not a recognized word\. \(check-file-path\)/) {
551
4
3
          next if skip_item($2);
552
4
5
          if (defined $unknown_file_word_limit) {
553
4
6
            next if ++$unknown_file_word_count{$2} > $unknown_file_word_limit;
554          }
555        }
556
5
4
        next if should_skip_warning $warning;
557
4
8
        print WARNING_OUTPUT "$file$warning\n";
558
4
4
        count_warning $warning;
559      }
560    }
561
7
18
    close WARNINGS;
562  }
563
9
119
  close MORE_WARNINGS;
564
565
9
5
  for my $warning (@delayed_warnings) {
566
1
1
    next if should_skip_warning $warning;
567
1
1
    count_warning $warning;
568
1
1
    print WARNING_OUTPUT $warning;
569  }
570
9
9
  if (defined $unknown_word_limit) {
571
1
1
    for my $warned_word (sort keys %last_seen) {
572
0
0
      my $warning_count = $seen{$warned_word};
573
0
0
      next unless $warning_count >= $unknown_word_limit;
574
0
0
      my $warning = $last_seen{$warned_word};
575
0
0
      $warning =~ s/\Q. (unrecognized-spelling)\E/ -- found $warning_count times. (limited-references)\n/;
576
0
0
      next if should_skip_warning $warning;
577
0
0
      print WARNING_OUTPUT $warning;
578
0
0
      count_warning $warning;
579    }
580  }
581
9
129
  close WARNING_OUTPUT;
582
583
9
9
  if (%counters) {
584
2
1
    my $continue='';
585
2
7
    print COUNTER_SUMMARY "{\n";
586
2
3
    for my $code (sort keys %counters) {
587
4
6
      print COUNTER_SUMMARY qq<$continue"$code": $counters{$code}\n>;
588
4
2
      $continue=',';
589    }
590
2
2
    print COUNTER_SUMMARY "}\n";
591  }
592
9
36
  close COUNTER_SUMMARY;
593
594  # display the current unknown
595
9
24
  for my $char (sort keys %letter_map) {
596
34
34
16
54
    for $key (sort CheckSpelling::Util::case_biased keys(%{$letter_map{$char}})) {
597
29
29
20
37
      my %word_map = %{$letter_map{$char}{$key}};
598
29
24
      my @words = keys(%word_map);
599
29
16
      if (scalar(@words) > 1) {
600
13
20
15
59
        print $key." (".(join ", ", sort { length($a) <=> length($b) || $a cmp $b } @words).")";
601      } else {
602
16
36
        print $words[0];
603      }
604
29
78
      print "\n";
605    }
606  }
607}
608
6091;