File Coverage

File:lib/CheckSpelling/SpellingCollator.pm
Coverage:89.6%

linestmtbrancondsubtimecode
1#! -*-perl-*-
2
3package CheckSpelling::SpellingCollator;
4
5our $VERSION='0.1.0';
6
1
1
109100
3
use 5.022;
7
1
1
1
2
0
53
use feature 'unicode_strings';
8
1
1
1
1
1
19
use warnings;
9
1
1
1
2
0
25
use File::Path qw(remove_tree);
10
1
1
1
240
1
2850
use CheckSpelling::Util;
11
12my %letter_map;
13my %ignored_event_map;
14my $disable_word_collating;
15
16my %last_seen;
17
18sub get_field {
19
40
31
  my ($record, $field) = @_;
20
40
343
  return 0 unless $record =~ (/\b$field:\s*(\d+)/);
21
30
34
  return $1;
22}
23
24sub get_array {
25
4
6
  my ($record, $field) = @_;
26
4
32
  return () unless $record =~ (/\b$field: \[([^\]]+)\]/);
27
4
3
  my $values = $1;
28
4
6
  return split /\s*,\s*/, $values;
29}
30
31sub maybe {
32
7
5
  my ($next, $value) = @_;
33
7
10
  $next = $value unless $next && $next < $value;
34
7
5
  return $next;
35}
36
37my %expected = ();
38sub expect_item {
39
94
56
  my ($item, $value) = @_;
40
94
44
  our %expected;
41
94
32
  my $next;
42
94
105
  if (defined $expected{$item}) {
43
22
14
    $next = $expected{$item};
44
22
17
    $next = $value if $value < $next;
45  } elsif ($item =~ /^([A-Z])(.*)/) {
46
12
10
    $item = $1 . lc $2;
47
12
9
    if (defined $expected{$item}) {
48
2
1
      $next = $expected{$item};
49
2
4
      $next = maybe($next, $value + .1);
50    } else {
51
10
6
      $item = lc $item;
52
10
8
      if (defined $expected{$item}) {
53
5
4
        $next = $expected{$item};
54
5
2
        $next = maybe($next, $value + .2);
55      }
56    }
57  }
58
94
88
  return 0 unless defined $next;
59
29
15
  $expected{$item} = $next;
60
29
60
  return $value;
61}
62
63sub skip_item {
64
50
27
  my ($word) = @_;
65
50
29
  return 1 if expect_item($word, 1);
66
32
30
  my $key = lc $word;
67
32
17
  return 2 if expect_item($key, 2);
68
32
37
  if ($key =~ /.s$/) {
69
2
5
    if ($key =~ /ies$/) {
70
1
4
      $key =~ s/ies$/y/;
71    } else {
72
1
2
      $key =~ s/s$//;
73    }
74  } elsif ($key =~ /^(.+[^aeiou])ed$/) {
75
1
1
    $key = $1;
76  } elsif ($key =~ /^(.+)'[ds]$/) {
77
4
4
    $key = $1;
78  } else {
79
25
24
    return 0;
80  }
81
7
3
  return 3 if expect_item($key, 3);
82
0
0
  return 0;
83}
84
85sub should_skip_warning {
86
72
47
  my ($warning) = @_;
87
72
99
  if ($warning =~ /\(([-\w]+)\)$/) {
88
68
42
    my ($code) = ($1);
89
68
33
    our %ignored_event_map;
90
68
51
    return 1 if $ignored_event_map{$code};
91  }
92
71
52
  return 0;
93}
94
95sub log_skip_item {
96
46
53
  my ($item, $file, $warning, $unknown_word_limit) = @_;
97
46
32
  return 1 if should_skip_warning $warning;
98
46
32
  return 1 if skip_item($item);
99
21
11
  our %seen;
100
21
10
  my $seen_count = $seen{$item};
101
21
14
  if (defined $seen_count) {
102
8
12
    if (!defined $unknown_word_limit || ($seen_count++ < $unknown_word_limit)) {
103
7
31
      print MORE_WARNINGS "$file$warning\n";
104    } else {
105
1
0
      our %last_seen;
106
1
2
      $last_seen{$item} = "$file$warning";
107    }
108
8
10
    $seen{$item} = $seen_count;
109
8
12
    return 1;
110  }
111
13
11
  $seen{$item} = 1;
112
13
13
  return 0;
113}
114
115sub stem_word {
116
22
17
  my ($key) = @_;
117
22
3
  our $disable_word_collating;
118
22
30
  return $key if $disable_word_collating;
119
120
22
20
  if ($key =~ /.s$/) {
121
3
3
    if ($key =~ /ies$/) {
122
1
2
      $key =~ s/ies$/y/;
123    } else {
124
2
3
      $key =~ s/s$//;
125    }
126  } elsif ($key =~ /.[^aeiou]ed$/) {
127
1
2
    $key =~ s/ed$//;
128  }
129
22
12
  return $key;
130}
131
132sub collate_key {
133
81
51
  my ($key) = @_;
134
81
28
  our $disable_word_collating;
135
81
37
  my $char;
136
81
48
  if ($disable_word_collating) {
137
8
8
    $char = lc substr $key, 0, 1;
138  } else {
139
73
47
    $key = lc $key;
140
73
46
    $key =~ s/''+/'/g;
141
73
39
    $key =~ s/'[sd]$//;
142
73
38
    $key =~ s/^[^Ii]?'+(.*)/$1/;
143
73
37
    $key =~ s/(.*?)'$/$1/;
144
73
61
    $char = substr $key, 0, 1;
145  }
146
81
102
  return ($key, $char);
147}
148
149sub load_expect {
150
12
484
  my ($expect) = @_;
151
12
3
  our %expected;
152
12
15
  %expected = ();
153
12
97
  if (open(EXPECT, '<:utf8', $expect)) {
154
12
61
    while (my $word = <EXPECT>) {
155
43
54
      $word =~ s/\R//;
156
43
82
      $expected{$word} = 0;
157    }
158
12
30
    close EXPECT;
159  }
160}
161
162sub harmonize_expect {
163
11
6
  our $disable_word_collating;
164
11
5
  our %letter_map;
165
11
4
  our %expected;
166
167
11
16
  for my $word (keys %expected) {
168
40
24
    my ($key, $char) = collate_key $word;
169
40
23
    my %word_map = ();
170
40
47
    next unless defined $letter_map{$char}{$key};
171
13
13
4
16
    %word_map = %{$letter_map{$char}{$key}};
172
13
21
    next if defined $word_map{$word};
173
3
2
    my $words = scalar keys %word_map;
174
3
2
    next if $words > 2;
175
3
2
    if ($word eq $key) {
176
1
2
      next if ($words > 1);
177    }
178
2
3
    delete $expected{$word};
179  }
180}
181
182sub group_related_words {
183
12
2
  our %letter_map;
184
12
8
  our $disable_word_collating;
185
12
8
  return if $disable_word_collating;
186
187  # group related words
188
11
26
  for my $char (sort CheckSpelling::Util::number_biased keys %letter_map) {
189
19
19
6
21
    for my $plural_key (sort keys(%{$letter_map{$char}})) {
190
22
8
      my $key = stem_word $plural_key;
191
22
23
      next if $key eq $plural_key;
192
4
3
      next unless defined $letter_map{$char}{$key};
193
3
3
2
5
      my %word_map = %{$letter_map{$char}{$key}};
194
3
3
1
5
      for my $word (keys(%{$letter_map{$char}{$plural_key}})) {
195
3
3
        $word_map{$word} = 1;
196      }
197
3
4
      $letter_map{$char}{$key} = \%word_map;
198
3
3
      delete $letter_map{$char}{$plural_key};
199    }
200  }
201}
202
203sub count_warning {
204
16
11
  my ($warning) = @_;
205
16
8
  our %counters;
206
16
10
  our %ignored_event_map;
207
16
26
  if ($warning =~ /\(([-\w]+)\)$/) {
208
10
9
    my ($code) = ($1);
209
10
8
    next if defined $ignored_event_map{$code};
210
10
11
    ++$counters{$code};
211  }
212}
213
214sub report_timing {
215
0
0
  my ($name, $start_time, $directory, $marker) = @_;
216
0
0
  my $end_time = (stat "$directory/$marker")[9];
217
0
0
  $name =~ s/"/\\"/g;
218
0
0
  print TIMING_REPORT "\"$name\", $start_time, $end_time\n";
219}
220
221sub get_pattern_with_context {
222
24
20
  my ($path) = @_;
223
24
22
  return unless defined $ENV{$path};
224
24
26
  $ENV{$path} =~ /(.*)/;
225
24
155
  return unless open ITEMS, '<:utf8', $1;
226
227
24
12
  my @items;
228
24
15
  my $context = '';
229
24
96
  while (<ITEMS>) {
230
5
5
    my $pattern = $_;
231
5
6
    if ($pattern =~ /^#/) {
232
2
4
      if ($pattern =~ /^# /) {
233
2
3
        $context .= $pattern;
234      } else {
235
0
0
        $context = '';
236      }
237
2
3
      next;
238    }
239
3
3
    chomp $pattern;
240
3
3
    unless ($pattern =~ /./) {
241
1
0
      $context = '';
242
1
2
      next;
243    }
244
2
4
    push @items, $context.$pattern;
245
2
5
    $context = '';
246  }
247
24
49
  close ITEMS;
248
24
27
  return @items;
249}
250
251sub summarize_totals {
252
24
19
  my ($formatter, $path, $items, $totals, $file_counts) = @_;
253
24
24
8
28
  return unless @{$totals};
254
2
62
  return unless open my $fh, '>:utf8', $path;
255
2
2
1
3
  my $totals_count = scalar(@{$totals}) - 1;
256
2
2
  my @indices;
257
2
2
  if ($file_counts) {
258    @indices = sort {
259
1
0
2
0
      $totals->[$b] <=> $totals->[$a] ||
260      $file_counts->[$b] <=> $file_counts->[$a]
261    } 0 .. $totals_count;
262  } else {
263    @indices = sort {
264
1
0
1
0
      $totals->[$b] <=> $totals->[$a]
265    } 0 .. $totals_count;
266  }
267
2
2
  for my $i (@indices) {
268
2
2
    last unless $totals->[$i] > 0;
269
2
2
    my $rule_with_context = $items->[$i];
270
2
2
    my ($description, $rule);
271
2
6
    if ($rule_with_context =~ /^(.*\n)([^\n]+)$/s) {
272
2
3
      ($description, $rule) = ($1, $2);
273    } else {
274
0
0
      ($description, $rule) = ('', $rule_with_context);
275    }
276
2
3
    print $fh $formatter->(
277      $totals->[$i],
278      ($file_counts ? " file-count: $file_counts->[$i]" : ""),
279      $description,
280      $rule
281    );
282  }
283
2
77
  close $fh;
284}
285
286sub get_special {
287
19
16
  my ($file, $special) = @_;
288
19
27
  return 'file-list' if $file eq $special->{'file_list'};
289
17
17
  return 'pr-title' if $file eq $special->{'pr_title_file'};
290
15
16
  return 'pr-description' if $file eq $special->{'pr_description_file'};
291
13
25
  return 'commit-message' if !rindex($file, $special->{'commit_messages'});
292
11
14
  return 'file';
293}
294
295sub main {
296
12
21821
  my @directories;
297  my @cleanup_directories;
298
12
0
  my @check_file_paths;
299
300
12
13
  my $early_warnings = CheckSpelling::Util::get_file_from_env('early_warnings', '/dev/null');
301
12
14
  my $warning_output = CheckSpelling::Util::get_file_from_env('warning_output', '/dev/stderr');
302
12
10
  my $more_warnings = CheckSpelling::Util::get_file_from_env('more_warnings', '/dev/stderr');
303
12
9
  my $counter_summary = CheckSpelling::Util::get_file_from_env('counter_summary', '/dev/stderr');
304
12
10
  my $ignored_events = CheckSpelling::Util::get_file_from_env('ignored_events', '');
305
12
12
  if ($ignored_events) {
306
6
6
    our %ignored_event_map;
307
6
7
    for my $event (split /,/, $ignored_events) {
308
6
6
      $ignored_event_map{$event} = 1;
309    }
310  }
311
12
10
  my $should_exclude_file = CheckSpelling::Util::get_file_from_env('should_exclude_file', '/dev/null');
312
12
10
  my $unknown_word_limit = CheckSpelling::Util::get_val_from_env('unknown_word_limit', undef);
313
12
7
  my $unknown_file_word_limit = CheckSpelling::Util::get_val_from_env('unknown_file_word_limit', undef);
314
12
8
  my $candidate_example_limit = CheckSpelling::Util::get_file_from_env('INPUT_CANDIDATE_EXAMPLE_LIMIT', '3');
315
12
10
  my $disable_flags = CheckSpelling::Util::get_file_from_env('INPUT_DISABLE_CHECKS', '');
316
12
11
  my $only_check_changed_files = CheckSpelling::Util::get_file_from_env('INPUT_ONLY_CHECK_CHANGED_FILES', '');
317
12
11
  my $disable_noisy_file = $disable_flags =~ /(?:^|,|\s)noisy-file(?:,|\s|$)/;
318
12
28
  our $disable_word_collating = $only_check_changed_files || $disable_flags =~ /(?:^|,|\s)word-collating(?:,|\s|$)/;
319
12
11
  my $file_list = CheckSpelling::Util::get_file_from_env('check_file_names', '');
320
12
11
  my $pr_title_file = CheckSpelling::Util::get_file_from_env('pr_title_file', '');
321
12
12
  my $pr_description_file = CheckSpelling::Util::get_file_from_env('pr_description_file', '');
322
12
9
  my $commit_messages = CheckSpelling::Util::get_file_from_env('commit_messages', '');
323
12
8
  my $timing_report = CheckSpelling::Util::get_file_from_env('timing_report', '');
324
12
18
  my $special = {
325    'file_list' => $file_list,
326    'pr_title_file' => $pr_title_file,
327    'pr_description_file' => $pr_description_file,
328    'commit_messages' => $commit_messages,
329  };
330
12
7
  my ($start_time, $end_time);
331
332
12
240
  open WARNING_OUTPUT, '>:utf8', $warning_output;
333
12
166
  open MORE_WARNINGS, '>:utf8', $more_warnings;
334
12
148
  open COUNTER_SUMMARY, '>:utf8', $counter_summary;
335
12
105
  open SHOULD_EXCLUDE, '>:utf8', $should_exclude_file;
336
12
12
  if ($timing_report) {
337
0
0
    open TIMING_REPORT, '>:utf8', $timing_report;
338
0
0
    print TIMING_REPORT "file, start, finish\n";
339  }
340
341
12
11
  my @candidates = get_pattern_with_context('candidates_path');
342
12
11
  my @candidate_totals = (0) x scalar @candidates;
343
12
8
  my @candidate_file_counts = (0) x scalar @candidates;
344
345
12
6
  my @forbidden = get_pattern_with_context('forbidden_path');
346
12
12
  my @forbidden_totals = (0) x scalar @forbidden;
347
348
12
6
  my @delayed_warnings;
349
12
22
  our %letter_map = ();
350
351
12
6
  my %file_map = ();
352
353
12
29
  for my $directory (<>) {
354
15
11
    chomp $directory;
355
15
28
    next unless $directory =~ /^(.*)$/;
356
15
11
    $directory = $1;
357
15
49
    unless (-e $directory) {
358
1
3
      print STDERR "Could not find: $directory\n";
359
1
1
      next;
360    }
361
14
33
    unless (-d $directory) {
362
1
10
      print STDERR "Not a directory: $directory\n";
363
1
1
      next;
364    }
365
366    # if there's no filename, we can't report
367
13
85
    next unless open(NAME, '<:utf8', "$directory/name");
368
12
60
    my $file=<NAME>;
369
12
24
    close NAME;
370
371
12
24
    $file_map{$file} = $directory;
372  }
373
374
12
21
  for my $file (sort keys %file_map) {
375
12
12
    my $directory = $file_map{$file};
376
12
10
    if ($timing_report) {
377
0
0
      $start_time = (stat "$directory/name")[9];
378    }
379
380
12
59
    if (-e "$directory/skipped") {
381
1
6
      open SKIPPED, '<:utf8', "$directory/skipped";
382
1
9
      my $reason=<SKIPPED>;
383
1
2
      close SKIPPED;
384
1
1
      chomp $reason;
385
1
4
      push @delayed_warnings, "$file:1:1 ... 1, Warning - Skipping `$file` because $reason\n";
386
1
11
      print SHOULD_EXCLUDE "$file\n";
387
1
1
      push @cleanup_directories, $directory;
388
1
1
      report_timing($file, $start_time, $directory, 'skipped') if ($timing_report);
389
1
1
      next;
390    }
391
392    # stats isn't written if there was nothing interesting in the file
393
11
32
    unless (-s "$directory/stats") {
394
1
1
      report_timing($file, $start_time, $directory, 'warnings') if ($timing_report);
395
1
1
      push @directories, $directory;
396
1
1
      next;
397    }
398
399
10
8
    if ($file eq $file_list) {
400
1
6
      open FILE_LIST, '<:utf8', $file_list;
401
1
1
      push @check_file_paths, '0 placeholder';
402
1
5
      for my $check_file_path (<FILE_LIST>) {
403
4
3
        chomp $check_file_path;
404
4
4
        push @check_file_paths, $check_file_path;
405      }
406
1
2
      close FILE_LIST;
407    }
408
409
10
9
    my ($words, $unrecognized, $unknown, $unique);
410
411    {
412
10
10
4
53
      open STATS, '<:utf8', "$directory/stats";
413
10
40
      my $stats=<STATS>;
414
10
19
      close STATS;
415
10
8
      $words=get_field($stats, 'words');
416
10
9
      $unrecognized=get_field($stats, 'unrecognized');
417
10
7
      $unknown=get_field($stats, 'unknown');
418
10
5
      $unique=get_field($stats, 'unique');
419
10
8
      my @candidate_list;
420
10
9
      if (@candidate_totals) {
421
1
1
        @candidate_list=get_array($stats, 'candidates');
422
1
5
        my @lines=get_array($stats, 'candidate_lines');
423
1
1
        if (@candidate_list) {
424
1
1
          for (my $i=0; $i < scalar @candidate_list; $i++) {
425
1
1
            my $hits = $candidate_list[$i];
426
1
1
            if ($hits) {
427
1
19
              $candidate_totals[$i] += $hits;
428
1
1
              if ($candidate_file_counts[$i]++ < $candidate_example_limit) {
429
1
3
                my $pattern = (split /\n/,$candidates[$i])[-1];
430
1
2
                my $position = $lines[$i];
431
1
5
                $position =~ s/:(\d+)$/ ... $1/;
432
1
1
                my $wrapped = CheckSpelling::Util::wrap_in_backticks($pattern);
433
1
2
                my $candidate_label = '';
434
1
7
                if ($candidates[$i] =~ /^#\s+(\S.+)/) {
435
1
2
                  $candidate_label = " ($1)";
436                }
437
1
11
                push @delayed_warnings, "$file:$position, Notice - Line matches candidate pattern$candidate_label $wrapped (candidate-pattern)\n";
438              }
439            }
440          }
441        }
442      }
443
10
18
      if (@forbidden_totals) {
444
1
1
        my @forbidden_list=get_array($stats, 'forbidden');
445
1
1
        my @lines=get_array($stats, 'forbidden_lines');
446
1
1
        if (@forbidden_list) {
447
1
1
          for (my $i=0; $i < scalar @forbidden_list; $i++) {
448
1
1
            my $hits = $forbidden_list[$i];
449
1
1
            if ($hits) {
450
1
2
              $forbidden_totals[$i] += $hits;
451            }
452          }
453        }
454      }
455      #print STDERR "$file (unrecognized: $unrecognized; unique: $unique; unknown: $unknown, words: $words, candidates: [".join(", ", @candidate_list)."])\n";
456    }
457
458
10
9
    report_timing($file, $start_time, $directory, 'unknown') if ($timing_report);
459
10
6
    my $kind = get_special($file, $special);
460    # These heuristics are very new and need tuning/feedback
461
10
10
    if (
462        ($unknown > $unique)
463        # || ($unrecognized > $words / 2)
464    ) {
465
1
1
      unless ($disable_noisy_file) {
466
1
1
        if ($kind eq 'file') {
467
1
7
          print SHOULD_EXCLUDE "$file\n";
468        }
469
1
1
        my $warning = "noisy-$kind";
470
1
1
        count_warning $warning;
471
1
1
        push @delayed_warnings, "$file:1:1 ... 1, Warning - Skipping `$file` because it seems to have more noise ($unknown) than unique words ($unique) (total: $unrecognized / $words). ($warning)\n";
472
1
1
        push @cleanup_directories, $directory;
473
1
2
        next;
474      }
475    }
476
9
6
    push @directories, $directory;
477
9
40
    unless ($kind =~ /^file/ && -s "$directory/unknown") {
478
4
6
      next;
479    }
480
5
32
    open UNKNOWN, '<:utf8', "$directory/unknown";
481
5
33
    for my $token (<UNKNOWN>) {
482
43
46
      $token =~ s/\R//;
483
43
40
      next unless $token =~ /./;
484
41
18
      my ($key, $char) = collate_key $token;
485
41
46
      $letter_map{$char} = () unless defined $letter_map{$char};
486
41
15
      my %word_map = ();
487
41
14
39
16
      %word_map = %{$letter_map{$char}{$key}} if defined $letter_map{$char}{$key};
488
41
60
      $word_map{$token} = 1;
489
41
49
      $letter_map{$char}{$key} = \%word_map;
490    }
491
5
20
    close UNKNOWN;
492  }
493
12
35
  close SHOULD_EXCLUDE;
494
12
10
  close TIMING_REPORT if $timing_report;
495
496  summarize_totals(
497    sub {
498
1
1
      my ($hits, $files, $context, $pattern) = @_;
499
1
6
      return "# hit-count: $hits$files\n$context$pattern\n\n",
500    },
501
12
34
    CheckSpelling::Util::get_file_from_env('candidate_summary', '/dev/stderr'),
502    \@candidates,
503    \@candidate_totals,
504    \@candidate_file_counts,
505  );
506
507  summarize_totals(
508    sub {
509
1
1
      my (undef, undef, $context, $pattern) = @_;
510
1
3
      $context =~ s/^# //gm;
511
1
0
      chomp $context;
512
1
1
      my $details;
513
1
3
      if ($context =~ /^(.*?)$(.*)/ms) {
514
1
1
        ($context, $details) = ($1, $2);
515
1
1
        $details = "\n$details" if $details;
516      }
517
1
1
      $context = 'Pattern' unless $context;
518
1
3
      return "##### $context$details\n```\n$pattern\n```\n\n";
519    },
520
12
34
    CheckSpelling::Util::get_file_from_env('forbidden_summary', '/dev/stderr'),
521    \@forbidden,
522    \@forbidden_totals,
523  );
524
525
12
36
  group_related_words;
526
527
12
12
  if (defined $ENV{'expect'}) {
528
11
9
    $ENV{'expect'} =~ /(.*)/;
529
11
11
    load_expect $1;
530
11
8
    harmonize_expect;
531  }
532
533
12
5
  my %seen = ();
534
12
7
  our %counters;
535
12
4
  %counters = ();
536
537
12
45
  if (-s $early_warnings) {
538
1
6
    open WARNINGS, '<:utf8', $early_warnings;
539
1
6
    for my $warning (<WARNINGS>) {
540
1
1
      chomp $warning;
541
1
1
      count_warning $warning;
542
1
1
      next if should_skip_warning $warning;
543
1
5
      print WARNING_OUTPUT "$warning\n";
544    }
545
1
2
    close WARNINGS;
546  }
547
548
12
4
  our %last_seen;
549
12
7
  my %unknown_file_word_count;
550
12
6
  for my $directory (@directories) {
551
10
32
    next unless (-s "$directory/warnings");
552
9
60
    next unless open(NAME, '<:utf8', "$directory/name");
553
9
32
    my $file=<NAME>;
554
9
18
    close NAME;
555
9
9
    my $kind = get_special($file, $special);
556
9
52
    open WARNINGS, '<:utf8', "$directory/warnings";
557
9
8
    if ($kind ne 'file-list') {
558
8
51
      for my $warning (<WARNINGS>) {
559
50
38
        chomp $warning;
560
50
101
        if ($warning =~ m/:(\d+):(\d+ \.\.\. \d+): `(.*)`/) {
561
46
40
          my ($line, $range, $item) = ($1, $2, $3);
562
46
32
          my $wrapped = CheckSpelling::Util::wrap_in_backticks($item);
563
46
23
          my $reason = 'unrecognized-spelling';
564
46
30
          $reason .= "-$kind" unless $kind eq 'file';
565
46
100
          $warning =~ s/:\d+:\d+ \.\.\. \d+: `.*`/:$line:$range, Warning - $wrapped is not a recognized word ($reason)/;
566
46
34
          next if log_skip_item($item, $file, $warning, $unknown_word_limit);
567
13
9
          count_warning $warning if $kind ne 'file';
568        } else {
569
4
3
          if ($warning =~ /\`(.*?)\` in line \(token-is-substring\)/) {
570
0
0
            next if skip_item($1);
571          }
572
4
3
          count_warning $warning;
573        }
574
17
12
        next if should_skip_warning $warning;
575
17
58
        print WARNING_OUTPUT "$file$warning\n";
576      }
577    } else {
578
1
12
      for my $warning (<WARNINGS>) {
579
6
6
        chomp $warning;
580
6
11
        next unless $warning =~ s/^:(\d+)/:1/;
581
6
7
        $file = $check_file_paths[$1];
582
6
11
        if ($warning =~ m/:(\d+ \.\.\. \d+): `(.*)`/) {
583
4
4
          my ($range, $item) = ($1, $2);
584
4
4
          my $wrapped = CheckSpelling::Util::wrap_in_backticks($item);
585
4
11
          $warning =~ s/:\d+ \.\.\. \d+: `.*`/:$range, Warning - $wrapped is not a recognized word (check-file-path)/;
586
4
4
          next if skip_item($item);
587
4
4
          if (defined $unknown_file_word_limit) {
588
4
4
            next if ++$unknown_file_word_count{$item} > $unknown_file_word_limit;
589          }
590        }
591
5
4
        next if should_skip_warning $warning;
592
4
13
        print WARNING_OUTPUT "$file$warning\n";
593
4
5
        count_warning $warning;
594      }
595    }
596
9
35
    close WARNINGS;
597  }
598
12
244
  close MORE_WARNINGS;
599
600
12
12
  for my $warning (@delayed_warnings) {
601
3
7
    next if should_skip_warning $warning;
602
3
4
    count_warning $warning;
603
3
7
    print WARNING_OUTPUT $warning;
604  }
605
12
8
  if (defined $unknown_word_limit) {
606
1
2
    for my $warned_word (sort keys %last_seen) {
607
1
2
      my $warning_count = $seen{$warned_word} || 0;
608
1
2
      next unless $warning_count >= $unknown_word_limit;
609
0
0
      my $warning = $last_seen{$warned_word};
610
0
0
      $warning =~ s/\Q (unrecognized-spelling)\E/ -- found $warning_count times (limited-references)\n/;
611
0
0
      next if should_skip_warning $warning;
612
0
0
      print WARNING_OUTPUT $warning;
613
0
0
      count_warning $warning;
614    }
615  }
616
12
321
  close WARNING_OUTPUT;
617
618
12
13
  if (%counters) {
619
3
4
    my $continue='';
620
3
4
    print COUNTER_SUMMARY "{\n";
621
3
6
    for my $code (sort keys %counters) {
622
6
9
      print COUNTER_SUMMARY qq<$continue"$code": $counters{$code}\n>;
623
6
4
      $continue=',';
624    }
625
3
3
    print COUNTER_SUMMARY "}\n";
626  }
627
12
96
  close COUNTER_SUMMARY;
628
629  # display the current unknown
630
12
28
  for my $char (sort keys %letter_map) {
631
43
43
25
98
    for my $key (sort CheckSpelling::Util::case_biased keys(%{$letter_map{$char}})) {
632
24
24
31
32
      my %word_map = %{$letter_map{$char}{$key}};
633
24
23
      my @words = keys(%word_map);
634
24
13
      if (scalar(@words) > 1) {
635
13
20
14
92
        print $key." (".(join ", ", sort { length($a) <=> length($b) || $a cmp $b } @words).")";
636      } else {
637
11
37
        print $words[0];
638      }
639
24
92
      print "\n";
640    }
641  }
642}
643
6441;