File Coverage

File:lib/CheckSpelling/SpellingCollator.pm
Coverage:89.7%

linestmtbrancondsubtimecode
1#! -*-perl-*-
2
3package CheckSpelling::SpellingCollator;
4
5our $VERSION='0.1.0';
6
1
1
112239
3
use 5.022;
7
1
1
1
2
0
7
use utf8;
8
1
1
1
14
0
77
use feature 'unicode_strings';
9
1
1
1
2
0
17
use warnings;
10
1
1
1
4
0
25
use File::Path qw(remove_tree);
11
1
1
1
234
1
3009
use CheckSpelling::Util;
12
13my %letter_map;
14my %ignored_event_map;
15my $disable_word_collating;
16
17my %last_seen;
18
19sub get_field {
20
40
31
  my ($record, $field) = @_;
21
40
339
  return 0 unless $record =~ (/\b$field:\s*(\d+)/);
22
30
30
  return $1;
23}
24
25sub get_array {
26
4
3
  my ($record, $field) = @_;
27
4
28
  return () unless $record =~ (/\b$field: \[([^\]]+)\]/);
28
4
4
  my $values = $1;
29
4
6
  return split /\s*,\s*/, $values;
30}
31
32sub maybe {
33
7
11
  my ($next, $value) = @_;
34
7
9
  $next = $value unless $next && $next < $value;
35
7
5
  return $next;
36}
37
38my %expected = ();
39sub expect_item {
40
94
61
  my ($item, $value) = @_;
41
94
32
  our %expected;
42
94
27
  my $next;
43
94
115
  if (defined $expected{$item}) {
44
22
16
    $next = $expected{$item};
45
22
12
    $next = $value if $value < $next;
46  } elsif ($item =~ /^([A-Z])(.*)/) {
47
12
10
    $item = $1 . lc $2;
48
12
5
    if (defined $expected{$item}) {
49
2
2
      $next = $expected{$item};
50
2
1
      $next = maybe($next, $value + .1);
51    } else {
52
10
7
      $item = lc $item;
53
10
8
      if (defined $expected{$item}) {
54
5
3
        $next = $expected{$item};
55
5
3
        $next = maybe($next, $value + .2);
56      }
57    }
58  }
59
94
94
  return 0 unless defined $next;
60
29
17
  $expected{$item} = $next;
61
29
61
  return $value;
62}
63
64sub skip_item {
65
50
24
  my ($word) = @_;
66
50
30
  return 1 if expect_item($word, 1);
67
32
20
  my $key = lc $word;
68
32
15
  return 2 if expect_item($key, 2);
69
32
39
  if ($key =~ /.s$/) {
70
2
3
    if ($key =~ /ies$/) {
71
1
1
      $key =~ s/ies$/y/;
72    } else {
73
1
2
      $key =~ s/s$//;
74    }
75  } elsif ($key =~ /^(.+[^aeiou])ed$/) {
76
1
3
    $key = $1;
77  } elsif ($key =~ /^(.+)'[ds]$/) {
78
4
3
    $key = $1;
79  } else {
80
25
25
    return 0;
81  }
82
7
5
  return 3 if expect_item($key, 3);
83
0
0
  return 0;
84}
85
86sub should_skip_warning {
87
72
53
  my ($warning) = @_;
88
72
90
  if ($warning =~ /\(([-\w]+)\)$/) {
89
68
46
    my ($code) = ($1);
90
68
25
    our %ignored_event_map;
91
68
52
    return 1 if $ignored_event_map{$code};
92  }
93
71
54
  return 0;
94}
95
96sub log_skip_item {
97
46
59
  my ($item, $file, $warning, $unknown_word_limit) = @_;
98
46
26
  return 1 if should_skip_warning $warning;
99
46
29
  return 1 if skip_item($item);
100
21
6
  our %seen;
101
21
13
  my $seen_count = $seen{$item};
102
21
24
  if (defined $seen_count) {
103
8
40
    if (!defined $unknown_word_limit || ($seen_count++ < $unknown_word_limit)) {
104
7
25
      print MORE_WARNINGS "$file$warning\n";
105    } else {
106
1
1
      our %last_seen;
107
1
1
      $last_seen{$item} = "$file$warning";
108    }
109
8
9
    $seen{$item} = $seen_count;
110
8
14
    return 1;
111  }
112
13
10
  $seen{$item} = 1;
113
13
12
  return 0;
114}
115
116sub stem_word {
117
22
17
  my ($key) = @_;
118
22
7
  our $disable_word_collating;
119
22
18
  return $key if $disable_word_collating;
120
121
22
18
  if ($key =~ /.s$/) {
122
3
1
    if ($key =~ /ies$/) {
123
1
4
      $key =~ s/ies$/y/;
124    } else {
125
2
3
      $key =~ s/s$//;
126    }
127  } elsif ($key =~ /.[^aeiou]ed$/) {
128
1
2
    $key =~ s/ed$//;
129  }
130
22
19
  return $key;
131}
132
133sub collate_key {
134
81
42
  my ($key) = @_;
135
81
39
  our $disable_word_collating;
136
81
30
  my $char;
137
81
63
  if ($disable_word_collating) {
138
8
9
    $char = lc substr $key, 0, 1;
139  } else {
140
73
45
    $key = lc $key;
141
73
48
    $key =~ s/''+/'/g;
142
73
40
    $key =~ s/'[sd]$//;
143
73
40
    $key =~ s/^[^Ii]?'+(.*)/$1/;
144
73
39
    $key =~ s/(.*?)'$/$1/;
145
73
65
    $char = substr $key, 0, 1;
146  }
147
81
96
  return ($key, $char);
148}
149
150sub load_expect {
151
12
495
  my ($expect) = @_;
152
12
7
  our %expected;
153
12
12
  %expected = ();
154
12
97
  if (open(EXPECT, '<:utf8', $expect)) {
155
12
58
    while (my $word = <EXPECT>) {
156
43
54
      $word =~ s/\R//;
157
43
81
      $expected{$word} = 0;
158    }
159
12
47
    close EXPECT;
160  }
161}
162
163sub harmonize_expect {
164
11
4
  our $disable_word_collating;
165
11
4
  our %letter_map;
166
11
5
  our %expected;
167
168
11
15
  for my $word (keys %expected) {
169
40
27
    my ($key, $char) = collate_key $word;
170
40
26
    my %word_map = ();
171
40
46
    next unless defined $letter_map{$char}{$key};
172
13
13
4
17
    %word_map = %{$letter_map{$char}{$key}};
173
13
18
    next if defined $word_map{$word};
174
3
2
    my $words = scalar keys %word_map;
175
3
2
    next if $words > 2;
176
3
4
    if ($word eq $key) {
177
1
1
      next if ($words > 1);
178    }
179
2
2
    delete $expected{$word};
180  }
181}
182
183sub group_related_words {
184
12
4
  our %letter_map;
185
12
6
  our $disable_word_collating;
186
12
9
  return if $disable_word_collating;
187
188  # group related words
189
11
22
  for my $char (sort CheckSpelling::Util::number_biased keys %letter_map) {
190
19
19
3
22
    for my $plural_key (sort keys(%{$letter_map{$char}})) {
191
22
9
      my $key = stem_word $plural_key;
192
22
21
      next if $key eq $plural_key;
193
4
4
      next unless defined $letter_map{$char}{$key};
194
3
3
2
7
      my %word_map = %{$letter_map{$char}{$key}};
195
3
3
1
3
      for my $word (keys(%{$letter_map{$char}{$plural_key}})) {
196
3
3
        $word_map{$word} = 1;
197      }
198
3
2
      $letter_map{$char}{$key} = \%word_map;
199
3
5
      delete $letter_map{$char}{$plural_key};
200    }
201  }
202}
203
204sub count_warning {
205
16
13
  my ($warning) = @_;
206
16
7
  our %counters;
207
16
10
  our %ignored_event_map;
208
16
25
  if ($warning =~ /\(([-\w]+)\)$/) {
209
10
6
    my ($code) = ($1);
210
10
6
    next if defined $ignored_event_map{$code};
211
10
11
    ++$counters{$code};
212  }
213}
214
215sub report_timing {
216
0
0
  my ($name, $start_time, $directory, $marker) = @_;
217
0
0
  my $end_time = (stat "$directory/$marker")[9];
218
0
0
  $name =~ s/"/\\"/g;
219
0
0
  print TIMING_REPORT "\"$name\", $start_time, $end_time\n";
220}
221
222sub get_pattern_with_context {
223
24
20
  my ($path) = @_;
224
24
22
  return unless defined $ENV{$path};
225
24
22
  $ENV{$path} =~ /(.*)/;
226
24
161
  return unless open ITEMS, '<:utf8', $1;
227
228
24
12
  my @items;
229
24
13
  my $context = '';
230
24
93
  while (<ITEMS>) {
231
5
7
    my $pattern = $_;
232
5
5
    if ($pattern =~ /^#/) {
233
2
2
      if ($pattern =~ /^# /) {
234
2
4
        $context .= $pattern;
235      } else {
236
0
0
        $context = '';
237      }
238
2
3
      next;
239    }
240
3
3
    chomp $pattern;
241
3
4
    unless ($pattern =~ /./) {
242
1
1
      $context = '';
243
1
1
      next;
244    }
245
2
3
    push @items, $context.$pattern;
246
2
5
    $context = '';
247  }
248
24
55
  close ITEMS;
249
24
24
  return @items;
250}
251
252sub summarize_totals {
253
24
18
  my ($formatter, $path, $items, $totals, $file_counts) = @_;
254
24
24
13
24
  return unless @{$totals};
255
2
70
  return unless open my $fh, '>:utf8', $path;
256
2
2
0
2
  my $totals_count = scalar(@{$totals}) - 1;
257
2
2
  my @indices;
258
2
2
  if ($file_counts) {
259    @indices = sort {
260
1
0
2
0
      $totals->[$b] <=> $totals->[$a] ||
261      $file_counts->[$b] <=> $file_counts->[$a]
262    } 0 .. $totals_count;
263  } else {
264    @indices = sort {
265
1
0
2
0
      $totals->[$b] <=> $totals->[$a]
266    } 0 .. $totals_count;
267  }
268
2
3
  for my $i (@indices) {
269
2
2
    last unless $totals->[$i] > 0;
270
2
2
    my $rule_with_context = $items->[$i];
271
2
2
    my ($description, $rule);
272
2
6
    if ($rule_with_context =~ /^(.*\n)([^\n]+)$/s) {
273
2
2
      ($description, $rule) = ($1, $2);
274    } else {
275
0
0
      ($description, $rule) = ('', $rule_with_context);
276    }
277
2
4
    print $fh $formatter->(
278      $totals->[$i],
279      ($file_counts ? " file-count: $file_counts->[$i]" : ""),
280      $description,
281      $rule
282    );
283  }
284
2
62
  close $fh;
285}
286
287sub get_special {
288
19
18
  my ($file, $special) = @_;
289
19
22
  return 'file-list' if $file eq $special->{'file_list'};
290
17
24
  return 'pr-title' if $file eq $special->{'pr_title_file'};
291
15
16
  return 'pr-description' if $file eq $special->{'pr_description_file'};
292
13
24
  return 'commit-message' if !rindex($file, $special->{'commit_messages'});
293
11
14
  return 'file';
294}
295
296sub main {
297
12
22154
  my @directories;
298  my @cleanup_directories;
299
12
0
  my @check_file_paths;
300
301
12
12
  my $early_warnings = CheckSpelling::Util::get_file_from_env('early_warnings', '/dev/null');
302
12
13
  my $warning_output = CheckSpelling::Util::get_file_from_env('warning_output', '/dev/stderr');
303
12
10
  my $more_warnings = CheckSpelling::Util::get_file_from_env('more_warnings', '/dev/stderr');
304
12
9
  my $counter_summary = CheckSpelling::Util::get_file_from_env('counter_summary', '/dev/stderr');
305
12
9
  my $ignored_events = CheckSpelling::Util::get_file_from_env('ignored_events', '');
306
12
10
  if ($ignored_events) {
307
6
4
    our %ignored_event_map;
308
6
8
    for my $event (split /,/, $ignored_events) {
309
6
6
      $ignored_event_map{$event} = 1;
310    }
311  }
312
12
8
  my $should_exclude_file = CheckSpelling::Util::get_file_from_env('should_exclude_file', '/dev/null');
313
12
11
  my $unknown_word_limit = CheckSpelling::Util::get_val_from_env('unknown_word_limit', undef);
314
12
6
  my $unknown_file_word_limit = CheckSpelling::Util::get_val_from_env('unknown_file_word_limit', undef);
315
12
6
  my $candidate_example_limit = CheckSpelling::Util::get_file_from_env('INPUT_CANDIDATE_EXAMPLE_LIMIT', '3');
316
12
6
  my $disable_flags = CheckSpelling::Util::get_file_from_env('INPUT_DISABLE_CHECKS', '');
317
12
7
  my $only_check_changed_files = CheckSpelling::Util::get_file_from_env('INPUT_ONLY_CHECK_CHANGED_FILES', '');
318
12
7
  my $disable_noisy_file = $disable_flags =~ /(?:^|,|\s)noisy-file(?:,|\s|$)/;
319
12
31
  our $disable_word_collating = $only_check_changed_files || $disable_flags =~ /(?:^|,|\s)word-collating(?:,|\s|$)/;
320
12
9
  my $file_list = CheckSpelling::Util::get_file_from_env('check_file_names', '');
321
12
7
  my $pr_title_file = CheckSpelling::Util::get_file_from_env('pr_title_file', '');
322
12
7
  my $pr_description_file = CheckSpelling::Util::get_file_from_env('pr_description_file', '');
323
12
7
  my $commit_messages = CheckSpelling::Util::get_file_from_env('commit_messages', '');
324
12
7
  my $timing_report = CheckSpelling::Util::get_file_from_env('timing_report', '');
325
12
17
  my $special = {
326    'file_list' => $file_list,
327    'pr_title_file' => $pr_title_file,
328    'pr_description_file' => $pr_description_file,
329    'commit_messages' => $commit_messages,
330  };
331
12
7
  my ($start_time, $end_time);
332
333
12
260
  open WARNING_OUTPUT, '>:utf8', $warning_output;
334
12
165
  open MORE_WARNINGS, '>:utf8', $more_warnings;
335
12
146
  open COUNTER_SUMMARY, '>:utf8', $counter_summary;
336
12
93
  open SHOULD_EXCLUDE, '>:utf8', $should_exclude_file;
337
12
11
  if ($timing_report) {
338
0
0
    open TIMING_REPORT, '>:utf8', $timing_report;
339
0
0
    print TIMING_REPORT "file, start, finish\n";
340  }
341
342
12
7
  my @candidates = get_pattern_with_context('candidates_path');
343
12
11
  my @candidate_totals = (0) x scalar @candidates;
344
12
8
  my @candidate_file_counts = (0) x scalar @candidates;
345
346
12
8
  my @forbidden = get_pattern_with_context('forbidden_path');
347
12
6
  my @forbidden_totals = (0) x scalar @forbidden;
348
349
12
6
  my @delayed_warnings;
350
12
24
  our %letter_map = ();
351
352
12
4
  my %file_map = ();
353
354
12
31
  for my $directory (<>) {
355
15
10
    chomp $directory;
356
15
25
    next unless $directory =~ /^(.*)$/;
357
15
11
    $directory = $1;
358
15
47
    unless (-e $directory) {
359
1
2
      print STDERR "Could not find: $directory\n";
360
1
2
      next;
361    }
362
14
34
    unless (-d $directory) {
363
1
17
      print STDERR "Not a directory: $directory\n";
364
1
2
      next;
365    }
366
367    # if there's no filename, we can't report
368
13
99
    next unless open(NAME, '<:utf8', "$directory/name");
369
12
61
    my $file=<NAME>;
370
12
21
    close NAME;
371
372
12
25
    $file_map{$file} = $directory;
373  }
374
375
12
21
  for my $file (sort keys %file_map) {
376
12
11
    my $directory = $file_map{$file};
377
12
9
    if ($timing_report) {
378
0
0
      $start_time = (stat "$directory/name")[9];
379    }
380
381
12
61
    if (-e "$directory/skipped") {
382
1
7
      open SKIPPED, '<:utf8', "$directory/skipped";
383
1
7
      my $reason=<SKIPPED>;
384
1
3
      close SKIPPED;
385
1
1
      chomp $reason;
386
1
3
      push @delayed_warnings, "$file:1:1 ... 1, Warning - Skipping `$file` because $reason\n";
387
1
2
      print SHOULD_EXCLUDE "$file\n";
388
1
1
      push @cleanup_directories, $directory;
389
1
1
      report_timing($file, $start_time, $directory, 'skipped') if ($timing_report);
390
1
1
      next;
391    }
392
393    # stats isn't written if there was nothing interesting in the file
394
11
35
    unless (-s "$directory/stats") {
395
1
1
      report_timing($file, $start_time, $directory, 'warnings') if ($timing_report);
396
1
1
      push @directories, $directory;
397
1
1
      next;
398    }
399
400
10
10
    if ($file eq $file_list) {
401
1
6
      open FILE_LIST, '<:utf8', $file_list;
402
1
1
      push @check_file_paths, '0 placeholder';
403
1
5
      for my $check_file_path (<FILE_LIST>) {
404
4
3
        chomp $check_file_path;
405
4
4
        push @check_file_paths, $check_file_path;
406      }
407
1
2
      close FILE_LIST;
408    }
409
410
10
6
    my ($words, $unrecognized, $unknown, $unique);
411
412    {
413
10
10
4
54
      open STATS, '<:utf8', "$directory/stats";
414
10
37
      my $stats=<STATS>;
415
10
18
      close STATS;
416
10
8
      $words=get_field($stats, 'words');
417
10
8
      $unrecognized=get_field($stats, 'unrecognized');
418
10
9
      $unknown=get_field($stats, 'unknown');
419
10
9
      $unique=get_field($stats, 'unique');
420
10
9
      my @candidate_list;
421
10
7
      if (@candidate_totals) {
422
1
0
        @candidate_list=get_array($stats, 'candidates');
423
1
1
        my @lines=get_array($stats, 'candidate_lines');
424
1
1
        if (@candidate_list) {
425
1
1
          for (my $i=0; $i < scalar @candidate_list; $i++) {
426
1
1
            my $hits = $candidate_list[$i];
427
1
1
            if ($hits) {
428
1
1
              $candidate_totals[$i] += $hits;
429
1
2
              if ($candidate_file_counts[$i]++ < $candidate_example_limit) {
430
1
2
                my $pattern = (split /\n/,$candidates[$i])[-1];
431
1
1
                my $position = $lines[$i];
432
1
5
                $position =~ s/:(\d+)$/ ... $1/;
433
1
1
                my $wrapped = CheckSpelling::Util::truncate_with_ellipsis(CheckSpelling::Util::wrap_in_backticks($pattern), 99);
434
1
1
                my $candidate_label = '';
435
1
2
                if ($candidates[$i] =~ /^#\s+(\S.+)/) {
436
1
2
                  $candidate_label = " ($1)";
437                }
438
1
5
                push @delayed_warnings, "$file:$position, Notice - Line matches candidate pattern$candidate_label $wrapped (candidate-pattern)\n";
439              }
440            }
441          }
442        }
443      }
444
10
11
      if (@forbidden_totals) {
445
1
1
        my @forbidden_list=get_array($stats, 'forbidden');
446
1
1
        my @lines=get_array($stats, 'forbidden_lines');
447
1
1
        if (@forbidden_list) {
448
1
1
          for (my $i=0; $i < scalar @forbidden_list; $i++) {
449
1
1
            my $hits = $forbidden_list[$i];
450
1
1
            if ($hits) {
451
1
2
              $forbidden_totals[$i] += $hits;
452            }
453          }
454        }
455      }
456      #print STDERR "$file (unrecognized: $unrecognized; unique: $unique; unknown: $unknown, words: $words, candidates: [".join(", ", @candidate_list)."])\n";
457    }
458
459
10
6
    report_timing($file, $start_time, $directory, 'unknown') if ($timing_report);
460
10
10
    my $kind = get_special($file, $special);
461    # These heuristics are very new and need tuning/feedback
462
10
11
    if (
463        ($unknown > $unique)
464        # || ($unrecognized > $words / 2)
465    ) {
466
1
1
      unless ($disable_noisy_file) {
467
1
1
        if ($kind eq 'file') {
468
1
2
          print SHOULD_EXCLUDE "$file\n";
469        }
470
1
1
        my $warning = "noisy-$kind";
471
1
1
        count_warning $warning;
472
1
2
        push @delayed_warnings, "$file:1:1 ... 1, Warning - Skipping `$file` because it seems to have more noise ($unknown) than unique words ($unique) (total: $unrecognized / $words). ($warning)\n";
473
1
0
        push @cleanup_directories, $directory;
474
1
6
        next;
475      }
476    }
477
9
8
    push @directories, $directory;
478
9
41
    unless ($kind =~ /^file/ && -s "$directory/unknown") {
479
4
6
      next;
480    }
481
5
33
    open UNKNOWN, '<:utf8', "$directory/unknown";
482
5
37
    for my $token (<UNKNOWN>) {
483
43
46
      $token =~ s/\R//;
484
43
39
      next unless $token =~ /./;
485
41
28
      my ($key, $char) = collate_key $token;
486
41
46
      $letter_map{$char} = () unless defined $letter_map{$char};
487
41
20
      my %word_map = ();
488
41
14
42
17
      %word_map = %{$letter_map{$char}{$key}} if defined $letter_map{$char}{$key};
489
41
64
      $word_map{$token} = 1;
490
41
49
      $letter_map{$char}{$key} = \%word_map;
491    }
492
5
20
    close UNKNOWN;
493  }
494
12
31
  close SHOULD_EXCLUDE;
495
12
8
  close TIMING_REPORT if $timing_report;
496
497  summarize_totals(
498    sub {
499
1
1
      my ($hits, $files, $context, $pattern) = @_;
500
1
6
      return "# hit-count: $hits$files\n$context$pattern\n\n",
501    },
502
12
33
    CheckSpelling::Util::get_file_from_env('candidate_summary', '/dev/stderr'),
503    \@candidates,
504    \@candidate_totals,
505    \@candidate_file_counts,
506  );
507
508  summarize_totals(
509    sub {
510
1
1
      my (undef, undef, $context, $pattern) = @_;
511
1
3
      $context =~ s/^# //gm;
512
1
0
      chomp $context;
513
1
1
      my $details;
514
1
3
      if ($context =~ /^(.*?)$(.*)/ms) {
515
1
1
        ($context, $details) = ($1, $2);
516
1
1
        $details = "\n$details" if $details;
517      }
518
1
1
      $context = 'Pattern' unless $context;
519
1
7
      return "##### $context$details\n```\n$pattern\n```\n\n";
520    },
521
12
38
    CheckSpelling::Util::get_file_from_env('forbidden_summary', '/dev/stderr'),
522    \@forbidden,
523    \@forbidden_totals,
524  );
525
526
12
32
  group_related_words;
527
528
12
13
  if (defined $ENV{'expect'}) {
529
11
10
    $ENV{'expect'} =~ /(.*)/;
530
11
10
    load_expect $1;
531
11
10
    harmonize_expect;
532  }
533
534
12
11
  my %seen = ();
535
12
4
  our %counters;
536
12
8
  %counters = ();
537
538
12
44
  if (-s $early_warnings) {
539
1
6
    open WARNINGS, '<:utf8', $early_warnings;
540
1
6
    for my $warning (<WARNINGS>) {
541
1
0
      chomp $warning;
542
1
2
      count_warning $warning;
543
1
1
      next if should_skip_warning $warning;
544
1
6
      print WARNING_OUTPUT "$warning\n";
545    }
546
1
2
    close WARNINGS;
547  }
548
549
12
9
  our %last_seen;
550
12
3
  my %unknown_file_word_count;
551
12
11
  for my $directory (@directories) {
552
10
32
    next unless (-s "$directory/warnings");
553
9
54
    next unless open(NAME, '<:utf8', "$directory/name");
554
9
31
    my $file=<NAME>;
555
9
17
    close NAME;
556
9
6
    my $kind = get_special($file, $special);
557
9
47
    open WARNINGS, '<:utf8', "$directory/warnings";
558
9
8
    if ($kind ne 'file-list') {
559
8
52
      for my $warning (<WARNINGS>) {
560
50
39
        chomp $warning;
561
50
89
        if ($warning =~ m/:(\d+):(\d+ \.\.\. \d+): `(.*)`/) {
562
46
44
          my ($line, $range, $item) = ($1, $2, $3);
563
46
32
          my $wrapped = CheckSpelling::Util::wrap_in_backticks($item);
564
46
23
          my $reason = 'unrecognized-spelling';
565
46
35
          $reason .= "-$kind" unless $kind eq 'file';
566
46
94
          $warning =~ s/:\d+:\d+ \.\.\. \d+: `.*`/:$line:$range, Warning - $wrapped is not a recognized word ($reason)/;
567
46
48
          next if log_skip_item($item, $file, $warning, $unknown_word_limit);
568
13
10
          count_warning $warning if $kind ne 'file';
569        } else {
570
4
4
          if ($warning =~ /\`(.*?)\` in line \(token-is-substring\)/) {
571
0
0
            next if skip_item($1);
572          }
573
4
6
          count_warning $warning;
574        }
575
17
8
        next if should_skip_warning $warning;
576
17
85
        print WARNING_OUTPUT "$file$warning\n";
577      }
578    } else {
579
1
7
      for my $warning (<WARNINGS>) {
580
6
4
        chomp $warning;
581
6
12
        next unless $warning =~ s/^:(\d+)/:1/;
582
6
4
        $file = $check_file_paths[$1];
583
6
13
        if ($warning =~ m/:(\d+ \.\.\. \d+): `(.*)`/) {
584
4
3
          my ($range, $item) = ($1, $2);
585
4
4
          my $wrapped = CheckSpelling::Util::wrap_in_backticks($item);
586
4
11
          $warning =~ s/:\d+ \.\.\. \d+: `.*`/:$range, Warning - $wrapped is not a recognized word (check-file-path)/;
587
4
4
          next if skip_item($item);
588
4
3
          if (defined $unknown_file_word_limit) {
589
4
6
            next if ++$unknown_file_word_count{$item} > $unknown_file_word_limit;
590          }
591        }
592
5
2
        next if should_skip_warning $warning;
593
4
12
        print WARNING_OUTPUT "$file$warning\n";
594
4
3
        count_warning $warning;
595      }
596    }
597
9
34
    close WARNINGS;
598  }
599
12
215
  close MORE_WARNINGS;
600
601
12
14
  for my $warning (@delayed_warnings) {
602
3
1
    next if should_skip_warning $warning;
603
3
2
    count_warning $warning;
604
3
5
    print WARNING_OUTPUT $warning;
605  }
606
12
9
  if (defined $unknown_word_limit) {
607
1
2
    for my $warned_word (sort keys %last_seen) {
608
1
6
      my $warning_count = $seen{$warned_word} || 0;
609
1
2
      next unless $warning_count >= $unknown_word_limit;
610
0
0
      my $warning = $last_seen{$warned_word};
611
0
0
      $warning =~ s/\Q (unrecognized-spelling)\E/ -- found $warning_count times (limited-references)\n/;
612
0
0
      next if should_skip_warning $warning;
613
0
0
      print WARNING_OUTPUT $warning;
614
0
0
      count_warning $warning;
615    }
616  }
617
12
262
  close WARNING_OUTPUT;
618
619
12
13
  if (%counters) {
620
3
1
    my $continue='';
621
3
5
    print COUNTER_SUMMARY "{\n";
622
3
6
    for my $code (sort keys %counters) {
623
6
9
      print COUNTER_SUMMARY qq<$continue"$code": $counters{$code}\n>;
624
6
3
      $continue=',';
625    }
626
3
3
    print COUNTER_SUMMARY "}\n";
627  }
628
12
79
  close COUNTER_SUMMARY;
629
630  # display the current unknown
631
12
28
  for my $char (sort keys %letter_map) {
632
43
43
27
111
    for my $key (sort CheckSpelling::Util::case_biased keys(%{$letter_map{$char}})) {
633
24
24
11
34
      my %word_map = %{$letter_map{$char}{$key}};
634
24
16
      my @words = keys(%word_map);
635
24
16
      if (scalar(@words) > 1) {
636
13
20
12
81
        print $key." (".(join ", ", sort { length($a) <=> length($b) || $a cmp $b } @words).")";
637      } else {
638
11
34
        print $words[0];
639      }
640
24
98
      print "\n";
641    }
642  }
643}
644
6451;