File Coverage

File:lib/CheckSpelling/SpellingCollator.pm
Coverage:89.7%

linestmtbrancondsubtimecode
1#! -*-perl-*-
2
3package CheckSpelling::SpellingCollator;
4
5our $VERSION='0.1.0';
6
1
1
108216
3
use 5.022;
7
1
1
1
2
1
2
use utf8;
8
1
1
1
14
1
46
use feature 'unicode_strings';
9
1
1
1
1
3
15
use warnings;
10
1
1
1
2
0
22
use File::Path qw(remove_tree);
11
1
1
1
236
1
2993
use CheckSpelling::Util;
12
13my %letter_map;
14my %ignored_event_map;
15my $disable_word_collating;
16
17my %last_seen;
18
19sub get_field {
20
40
34
  my ($record, $field) = @_;
21
40
374
  return 0 unless $record =~ (/\b$field:\s*(\d+)/);
22
30
33
  return $1;
23}
24
25sub get_array {
26
4
3
  my ($record, $field) = @_;
27
4
29
  return () unless $record =~ (/\b$field: \[([^\]]+)\]/);
28
4
3
  my $values = $1;
29
4
5
  return split /\s*,\s*/, $values;
30}
31
32sub maybe {
33
7
7
  my ($next, $value) = @_;
34
7
10
  $next = $value unless $next && $next < $value;
35
7
4
  return $next;
36}
37
38my %expected = ();
39sub expect_item {
40
94
58
  my ($item, $value) = @_;
41
94
48
  $item =~ s/’/'/g;
42
94
34
  our %expected;
43
94
52
  my $next;
44
94
91
  if (defined $expected{$item}) {
45
22
15
    $next = $expected{$item};
46
22
16
    $next = $value if $value < $next;
47  } elsif ($item =~ /^([A-Z])(.*)/) {
48
12
9
    $item = $1 . lc $2;
49
12
10
    if (defined $expected{$item}) {
50
2
1
      $next = $expected{$item};
51
2
4
      $next = maybe($next, $value + .1);
52    } else {
53
10
6
      $item = lc $item;
54
10
7
      if (defined $expected{$item}) {
55
5
5
        $next = $expected{$item};
56
5
1
        $next = maybe($next, $value + .2);
57      }
58    }
59  }
60
94
87
  return 0 unless defined $next;
61
29
22
  $expected{$item} = $next;
62
29
58
  return $value;
63}
64
65sub skip_item {
66
50
29
  my ($word) = @_;
67
50
24
  return 1 if expect_item($word, 1);
68
32
20
  my $key = lc $word;
69
32
16
  return 2 if expect_item($key, 2);
70
32
49
  if ($key =~ /.s$/) {
71
2
3
    if ($key =~ /ies$/) {
72
1
4
      $key =~ s/ies$/y/;
73    } else {
74
1
2
      $key =~ s/s$//;
75    }
76  } elsif ($key =~ /^(.+[^aeiou])ed$/) {
77
1
1
    $key = $1;
78  } elsif ($key =~ /^(.+)'[ds]$/) {
79
4
3
    $key = $1;
80  } else {
81
25
21
    return 0;
82  }
83
7
5
  return 3 if expect_item($key, 3);
84
0
0
  return 0;
85}
86
87sub should_skip_warning {
88
72
40
  my ($warning) = @_;
89
72
95
  if ($warning =~ /\(([-\w]+)\)$/) {
90
68
47
    my ($code) = ($1);
91
68
22
    our %ignored_event_map;
92
68
54
    return 1 if $ignored_event_map{$code};
93  }
94
71
59
  return 0;
95}
96
97sub log_skip_item {
98
46
57
  my ($item, $file, $warning, $unknown_word_limit) = @_;
99
46
32
  return 1 if should_skip_warning $warning;
100
46
23
  return 1 if skip_item($item);
101
21
6
  our %seen;
102
21
14
  my $seen_count = $seen{$item};
103
21
14
  if (defined $seen_count) {
104
8
10
    if (!defined $unknown_word_limit || ($seen_count++ < $unknown_word_limit)) {
105
7
31
      print MORE_WARNINGS "$file$warning\n";
106    } else {
107
1
1
      our %last_seen;
108
1
1
      $last_seen{$item} = "$file$warning";
109    }
110
8
7
    $seen{$item} = $seen_count;
111
8
15
    return 1;
112  }
113
13
11
  $seen{$item} = 1;
114
13
9
  return 0;
115}
116
117sub stem_word {
118
22
9
  my ($key) = @_;
119
22
14
  our $disable_word_collating;
120
22
6
  return $key if $disable_word_collating;
121
122
22
25
  if ($key =~ /.s$/) {
123
3
3
    if ($key =~ /ies$/) {
124
1
2
      $key =~ s/ies$/y/;
125    } else {
126
2
2
      $key =~ s/s$//;
127    }
128  } elsif ($key =~ /.[^aeiou]ed$/) {
129
1
2
    $key =~ s/ed$//;
130  }
131
22
17
  return $key;
132}
133
134sub collate_key {
135
81
54
  my ($key) = @_;
136
81
37
  our $disable_word_collating;
137
81
28
  my $char;
138
81
48
  if ($disable_word_collating) {
139
8
9
    $char = lc substr $key, 0, 1;
140  } else {
141
73
50
    $key = lc $key;
142
73
48
    $key =~ s/''+/'/g;
143
73
38
    $key =~ s/'[sd]$//;
144
73
40
    $key =~ s/^[^Ii]?'+(.*)/$1/;
145
73
31
    $key =~ s/(.*?)'$/$1/;
146
73
66
    $char = substr $key, 0, 1;
147  }
148
81
100
  return ($key, $char);
149}
150
151sub load_expect {
152
12
463
  my ($expect) = @_;
153
12
6
  our %expected;
154
12
16
  %expected = ();
155
12
106
  if (open(EXPECT, '<:utf8', $expect)) {
156
12
57
    while (my $word = <EXPECT>) {
157
43
59
      $word =~ s/\R//;
158
43
80
      $expected{$word} = 0;
159    }
160
12
31
    close EXPECT;
161  }
162}
163
164sub harmonize_expect {
165
11
7
  our $disable_word_collating;
166
11
1
  our %letter_map;
167
11
8
  our %expected;
168
169
11
14
  for my $word (keys %expected) {
170
40
26
    my ($key, $char) = collate_key $word;
171
40
38
    my %word_map = ();
172
40
49
    next unless defined $letter_map{$char}{$key};
173
13
13
7
15
    %word_map = %{$letter_map{$char}{$key}};
174
13
19
    next if defined $word_map{$word};
175
3
1
    my $words = scalar keys %word_map;
176
3
3
    next if $words > 2;
177
3
1
    if ($word eq $key) {
178
1
2
      next if ($words > 1);
179    }
180
2
2
    delete $expected{$word};
181  }
182}
183
184sub group_related_words {
185
12
4
  our %letter_map;
186
12
7
  our $disable_word_collating;
187
12
6
  return if $disable_word_collating;
188
189  # group related words
190
11
29
  for my $char (sort CheckSpelling::Util::number_biased keys %letter_map) {
191
19
19
9
17
    for my $plural_key (sort keys(%{$letter_map{$char}})) {
192
22
17
      my $key = stem_word $plural_key;
193
22
15
      next if $key eq $plural_key;
194
4
5
      next unless defined $letter_map{$char}{$key};
195
3
3
1
5
      my %word_map = %{$letter_map{$char}{$key}};
196
3
3
1
3
      for my $word (keys(%{$letter_map{$char}{$plural_key}})) {
197
3
2
        $word_map{$word} = 1;
198      }
199
3
3
      $letter_map{$char}{$key} = \%word_map;
200
3
3
      delete $letter_map{$char}{$plural_key};
201    }
202  }
203}
204
205sub count_warning {
206
16
13
  my ($warning) = @_;
207
16
7
  our %counters;
208
16
7
  our %ignored_event_map;
209
16
25
  if ($warning =~ /\(([-\w]+)\)$/) {
210
10
7
    my ($code) = ($1);
211
10
10
    next if defined $ignored_event_map{$code};
212
10
11
    ++$counters{$code};
213  }
214}
215
216sub report_timing {
217
0
0
  my ($name, $start_time, $directory, $marker) = @_;
218
0
0
  my $end_time = (stat "$directory/$marker")[9];
219
0
0
  $name =~ s/"/\\"/g;
220
0
0
  print TIMING_REPORT "\"$name\", $start_time, $end_time\n";
221}
222
223sub get_pattern_with_context {
224
24
19
  my ($path) = @_;
225
24
19
  return unless defined $ENV{$path};
226
24
22
  $ENV{$path} =~ /(.*)/;
227
24
137
  return unless open ITEMS, '<:utf8', $1;
228
229
24
13
  my @items;
230
24
11
  my $context = '';
231
24
89
  while (<ITEMS>) {
232
5
6
    my $pattern = $_;
233
5
5
    if ($pattern =~ /^#/) {
234
2
3
      if ($pattern =~ /^# /) {
235
2
4
        $context .= $pattern;
236      } else {
237
0
0
        $context = '';
238      }
239
2
3
      next;
240    }
241
3
3
    chomp $pattern;
242
3
4
    unless ($pattern =~ /./) {
243
1
1
      $context = '';
244
1
2
      next;
245    }
246
2
3
    push @items, $context.$pattern;
247
2
5
    $context = '';
248  }
249
24
50
  close ITEMS;
250
24
26
  return @items;
251}
252
253sub summarize_totals {
254
24
21
  my ($formatter, $path, $items, $totals, $file_counts) = @_;
255
24
24
8
26
  return unless @{$totals};
256
2
62
  return unless open my $fh, '>:utf8', $path;
257
2
2
0
2
  my $totals_count = scalar(@{$totals}) - 1;
258
2
2
  my @indices;
259
2
1
  if ($file_counts) {
260    @indices = sort {
261
1
0
2
0
      $totals->[$b] <=> $totals->[$a] ||
262      $file_counts->[$b] <=> $file_counts->[$a]
263    } 0 .. $totals_count;
264  } else {
265    @indices = sort {
266
1
0
1
0
      $totals->[$b] <=> $totals->[$a]
267    } 0 .. $totals_count;
268  }
269
2
3
  for my $i (@indices) {
270
2
2
    last unless $totals->[$i] > 0;
271
2
2
    my $rule_with_context = $items->[$i];
272
2
2
    my ($description, $rule);
273
2
6
    if ($rule_with_context =~ /^(.*\n)([^\n]+)$/s) {
274
2
3
      ($description, $rule) = ($1, $2);
275    } else {
276
0
0
      ($description, $rule) = ('', $rule_with_context);
277    }
278
2
3
    print $fh $formatter->(
279      $totals->[$i],
280      ($file_counts ? " file-count: $file_counts->[$i]" : ""),
281      $description,
282      $rule
283    );
284  }
285
2
63
  close $fh;
286}
287
288sub get_special {
289
19
17
  my ($file, $special) = @_;
290
19
24
  return 'file-list' if $file eq $special->{'file_list'};
291
17
16
  return 'pr-title' if $file eq $special->{'pr_title_file'};
292
15
15
  return 'pr-description' if $file eq $special->{'pr_description_file'};
293
13
25
  return 'commit-message' if !rindex($file, $special->{'commit_messages'});
294
11
14
  return 'file';
295}
296
297sub main {
298
12
22596
  my @directories;
299  my @cleanup_directories;
300
12
0
  my @check_file_paths;
301
302
12
13
  my $early_warnings = CheckSpelling::Util::get_file_from_env('early_warnings', '/dev/null');
303
12
11
  my $warning_output = CheckSpelling::Util::get_file_from_env('warning_output', '/dev/stderr');
304
12
8
  my $more_warnings = CheckSpelling::Util::get_file_from_env('more_warnings', '/dev/stderr');
305
12
7
  my $counter_summary = CheckSpelling::Util::get_file_from_env('counter_summary', '/dev/stderr');
306
12
6
  my $ignored_events = CheckSpelling::Util::get_file_from_env('ignored_events', '');
307
12
8
  if ($ignored_events) {
308
6
6
    our %ignored_event_map;
309
6
6
    for my $event (split /,/, $ignored_events) {
310
6
6
      $ignored_event_map{$event} = 1;
311    }
312  }
313
12
11
  my $should_exclude_file = CheckSpelling::Util::get_file_from_env('should_exclude_file', '/dev/null');
314
12
10
  my $unknown_word_limit = CheckSpelling::Util::get_val_from_env('unknown_word_limit', undef);
315
12
10
  my $unknown_file_word_limit = CheckSpelling::Util::get_val_from_env('unknown_file_word_limit', undef);
316
12
10
  my $candidate_example_limit = CheckSpelling::Util::get_file_from_env('INPUT_CANDIDATE_EXAMPLE_LIMIT', '3');
317
12
10
  my $disable_flags = CheckSpelling::Util::get_file_from_env('INPUT_DISABLE_CHECKS', '');
318
12
9
  my $only_check_changed_files = CheckSpelling::Util::get_file_from_env('INPUT_ONLY_CHECK_CHANGED_FILES', '');
319
12
10
  my $disable_noisy_file = $disable_flags =~ /(?:^|,|\s)noisy-file(?:,|\s|$)/;
320
12
30
  our $disable_word_collating = $only_check_changed_files || $disable_flags =~ /(?:^|,|\s)word-collating(?:,|\s|$)/;
321
12
8
  my $file_list = CheckSpelling::Util::get_file_from_env('check_file_names', '');
322
12
7
  my $pr_title_file = CheckSpelling::Util::get_file_from_env('pr_title_file', '');
323
12
6
  my $pr_description_file = CheckSpelling::Util::get_file_from_env('pr_description_file', '');
324
12
7
  my $commit_messages = CheckSpelling::Util::get_file_from_env('commit_messages', '');
325
12
7
  my $timing_report = CheckSpelling::Util::get_file_from_env('timing_report', '');
326
12
20
  my $special = {
327    'file_list' => $file_list,
328    'pr_title_file' => $pr_title_file,
329    'pr_description_file' => $pr_description_file,
330    'commit_messages' => $commit_messages,
331  };
332
12
5
  my ($start_time, $end_time);
333
334
12
248
  open WARNING_OUTPUT, '>:utf8', $warning_output;
335
12
197
  open MORE_WARNINGS, '>:utf8', $more_warnings;
336
12
148
  open COUNTER_SUMMARY, '>:utf8', $counter_summary;
337
12
96
  open SHOULD_EXCLUDE, '>:utf8', $should_exclude_file;
338
12
10
  if ($timing_report) {
339
0
0
    open TIMING_REPORT, '>:utf8', $timing_report;
340
0
0
    print TIMING_REPORT "file, start, finish\n";
341  }
342
343
12
9
  my @candidates = get_pattern_with_context('candidates_path');
344
12
9
  my @candidate_totals = (0) x scalar @candidates;
345
12
7
  my @candidate_file_counts = (0) x scalar @candidates;
346
347
12
7
  my @forbidden = get_pattern_with_context('forbidden_path');
348
12
9
  my @forbidden_totals = (0) x scalar @forbidden;
349
350
12
5
  my @delayed_warnings;
351
12
19
  our %letter_map = ();
352
353
12
11
  my %file_map = ();
354
355
12
26
  for my $directory (<>) {
356
15
15
    chomp $directory;
357
15
23
    next unless $directory =~ /^(.*)$/;
358
15
10
    $directory = $1;
359
15
48
    unless (-e $directory) {
360
1
3
      print STDERR "Could not find: $directory\n";
361
1
1
      next;
362    }
363
14
51
    unless (-d $directory) {
364
1
12
      print STDERR "Not a directory: $directory\n";
365
1
1
      next;
366    }
367
368    # if there's no filename, we can't report
369
13
84
    next unless open(NAME, '<:utf8', "$directory/name");
370
12
85
    my $file=<NAME>;
371
12
23
    close NAME;
372
373
12
26
    $file_map{$file} = $directory;
374  }
375
376
12
21
  for my $file (sort keys %file_map) {
377
12
10
    my $directory = $file_map{$file};
378
12
10
    if ($timing_report) {
379
0
0
      $start_time = (stat "$directory/name")[9];
380    }
381
382
12
59
    if (-e "$directory/skipped") {
383
1
7
      open SKIPPED, '<:utf8', "$directory/skipped";
384
1
8
      my $reason=<SKIPPED>;
385
1
2
      close SKIPPED;
386
1
1
      chomp $reason;
387
1
3
      push @delayed_warnings, "$file:1:1 ... 1, Warning - Skipping `$file` because $reason\n";
388
1
4
      print SHOULD_EXCLUDE "$file\n";
389
1
1
      push @cleanup_directories, $directory;
390
1
1
      report_timing($file, $start_time, $directory, 'skipped') if ($timing_report);
391
1
1
      next;
392    }
393
394    # stats isn't written if there was nothing interesting in the file
395
11
33
    unless (-s "$directory/stats") {
396
1
1
      report_timing($file, $start_time, $directory, 'warnings') if ($timing_report);
397
1
0
      push @directories, $directory;
398
1
1
      next;
399    }
400
401
10
10
    if ($file eq $file_list) {
402
1
6
      open FILE_LIST, '<:utf8', $file_list;
403
1
1
      push @check_file_paths, '0 placeholder';
404
1
6
      for my $check_file_path (<FILE_LIST>) {
405
4
3
        chomp $check_file_path;
406
4
3
        push @check_file_paths, $check_file_path;
407      }
408
1
3
      close FILE_LIST;
409    }
410
411
10
6
    my ($words, $unrecognized, $unknown, $unique);
412
413    {
414
10
10
4
54
      open STATS, '<:utf8', "$directory/stats";
415
10
44
      my $stats=<STATS>;
416
10
16
      close STATS;
417
10
8
      $words=get_field($stats, 'words');
418
10
11
      $unrecognized=get_field($stats, 'unrecognized');
419
10
6
      $unknown=get_field($stats, 'unknown');
420
10
10
      $unique=get_field($stats, 'unique');
421
10
4
      my @candidate_list;
422
10
7
      if (@candidate_totals) {
423
1
4
        @candidate_list=get_array($stats, 'candidates');
424
1
1
        my @lines=get_array($stats, 'candidate_lines');
425
1
1
        if (@candidate_list) {
426
1
1
          for (my $i=0; $i < scalar @candidate_list; $i++) {
427
1
1
            my $hits = $candidate_list[$i];
428
1
1
            if ($hits) {
429
1
1
              $candidate_totals[$i] += $hits;
430
1
1
              if ($candidate_file_counts[$i]++ < $candidate_example_limit) {
431
1
2
                my $pattern = (split /\n/,$candidates[$i])[-1];
432
1
1
                my $position = $lines[$i];
433
1
5
                $position =~ s/:(\d+)$/ ... $1/;
434
1
1
                my $wrapped = CheckSpelling::Util::truncate_with_ellipsis(CheckSpelling::Util::wrap_in_backticks($pattern), 99);
435
1
1
                my $candidate_label = '';
436
1
3
                if ($candidates[$i] =~ /^#\s+(\S.+)/) {
437
1
6
                  $candidate_label = " ($1)";
438                }
439
1
5
                push @delayed_warnings, "$file:$position, Notice - Line matches candidate pattern$candidate_label $wrapped (candidate-pattern)\n";
440              }
441            }
442          }
443        }
444      }
445
10
13
      if (@forbidden_totals) {
446
1
1
        my @forbidden_list=get_array($stats, 'forbidden');
447
1
1
        my @lines=get_array($stats, 'forbidden_lines');
448
1
2
        if (@forbidden_list) {
449
1
1
          for (my $i=0; $i < scalar @forbidden_list; $i++) {
450
1
1
            my $hits = $forbidden_list[$i];
451
1
1
            if ($hits) {
452
1
2
              $forbidden_totals[$i] += $hits;
453            }
454          }
455        }
456      }
457      #print STDERR "$file (unrecognized: $unrecognized; unique: $unique; unknown: $unknown, words: $words, candidates: [".join(", ", @candidate_list)."])\n";
458    }
459
460
10
9
    report_timing($file, $start_time, $directory, 'unknown') if ($timing_report);
461
10
7
    my $kind = get_special($file, $special);
462    # These heuristics are very new and need tuning/feedback
463
10
14
    if (
464        ($unknown > $unique)
465        # || ($unrecognized > $words / 2)
466    ) {
467
1
2
      unless ($disable_noisy_file) {
468
1
2
        if ($kind eq 'file') {
469
1
5
          print SHOULD_EXCLUDE "$file\n";
470        }
471
1
1
        my $warning = "noisy-$kind";
472
1
1
        count_warning $warning;
473
1
1
        push @delayed_warnings, "$file:1:1 ... 1, Warning - Skipping `$file` because it seems to have more noise ($unknown) than unique words ($unique) (total: $unrecognized / $words). ($warning)\n";
474
1
1
        push @cleanup_directories, $directory;
475
1
1
        next;
476      }
477    }
478
9
4
    push @directories, $directory;
479
9
42
    unless ($kind =~ /^file/ && -s "$directory/unknown") {
480
4
6
      next;
481    }
482
5
35
    open UNKNOWN, '<:utf8', "$directory/unknown";
483
5
42
    for my $token (<UNKNOWN>) {
484
43
39
      $token =~ s/\R//;
485
43
41
      next unless $token =~ /./;
486
41
24
      my ($key, $char) = collate_key $token;
487
41
46
      $letter_map{$char} = () unless defined $letter_map{$char};
488
41
20
      my %word_map = ();
489
41
14
35
16
      %word_map = %{$letter_map{$char}{$key}} if defined $letter_map{$char}{$key};
490
41
40
      $word_map{$token} = 1;
491
41
51
      $letter_map{$char}{$key} = \%word_map;
492    }
493
5
19
    close UNKNOWN;
494  }
495
12
30
  close SHOULD_EXCLUDE;
496
12
9
  close TIMING_REPORT if $timing_report;
497
498  summarize_totals(
499    sub {
500
1
1
      my ($hits, $files, $context, $pattern) = @_;
501
1
6
      return "# hit-count: $hits$files\n$context$pattern\n\n",
502    },
503
12
31
    CheckSpelling::Util::get_file_from_env('candidate_summary', '/dev/stderr'),
504    \@candidates,
505    \@candidate_totals,
506    \@candidate_file_counts,
507  );
508
509  summarize_totals(
510    sub {
511
1
1
      my (undef, undef, $context, $pattern) = @_;
512
1
3
      $context =~ s/^# //gm;
513
1
1
      chomp $context;
514
1
0
      my $details;
515
1
3
      if ($context =~ /^(.*?)$(.*)/ms) {
516
1
2
        ($context, $details) = ($1, $2);
517
1
0
        $details = "\n$details" if $details;
518      }
519
1
1
      $context = 'Pattern' unless $context;
520
1
8
      return "##### $context$details\n```\n$pattern\n```\n\n";
521    },
522
12
36
    CheckSpelling::Util::get_file_from_env('forbidden_summary', '/dev/stderr'),
523    \@forbidden,
524    \@forbidden_totals,
525  );
526
527
12
34
  group_related_words;
528
529
12
11
  if (defined $ENV{'expect'}) {
530
11
8
    $ENV{'expect'} =~ /(.*)/;
531
11
11
    load_expect $1;
532
11
8
    harmonize_expect;
533  }
534
535
12
8
  my %seen = ();
536
12
7
  our %counters;
537
12
6
  %counters = ();
538
539
12
40
  if (-s $early_warnings) {
540
1
6
    open WARNINGS, '<:utf8', $early_warnings;
541
1
6
    for my $warning (<WARNINGS>) {
542
1
1
      chomp $warning;
543
1
1
      count_warning $warning;
544
1
1
      next if should_skip_warning $warning;
545
1
5
      print WARNING_OUTPUT "$warning\n";
546    }
547
1
2
    close WARNINGS;
548  }
549
550
12
7
  our %last_seen;
551
12
8
  my %unknown_file_word_count;
552
12
9
  for my $directory (@directories) {
553
10
30
    next unless (-s "$directory/warnings");
554
9
57
    next unless open(NAME, '<:utf8', "$directory/name");
555
9
29
    my $file=<NAME>;
556
9
18
    close NAME;
557
9
7
    my $kind = get_special($file, $special);
558
9
49
    open WARNINGS, '<:utf8', "$directory/warnings";
559
9
6
    if ($kind ne 'file-list') {
560
8
50
      for my $warning (<WARNINGS>) {
561
50
31
        chomp $warning;
562
50
91
        if ($warning =~ m/:(\d+):(\d+ \.\.\. \d+): `(.*)`/) {
563
46
39
          my ($line, $range, $item) = ($1, $2, $3);
564
46
36
          my $wrapped = CheckSpelling::Util::wrap_in_backticks($item);
565
46
31
          my $reason = 'unrecognized-spelling';
566
46
23
          $reason .= "-$kind" unless $kind eq 'file';
567
46
114
          $warning =~ s/:\d+:\d+ \.\.\. \d+: `.*`/:$line:$range, Warning - $wrapped is not a recognized word ($reason)/;
568
46
40
          next if log_skip_item($item, $file, $warning, $unknown_word_limit);
569
13
11
          count_warning $warning if $kind ne 'file';
570        } else {
571
4
4
          if ($warning =~ /\`(.*?)\` in line \(token-is-substring\)/) {
572
0
0
            next if skip_item($1);
573          }
574
4
4
          count_warning $warning;
575        }
576
17
11
        next if should_skip_warning $warning;
577
17
62
        print WARNING_OUTPUT "$file$warning\n";
578      }
579    } else {
580
1
8
      for my $warning (<WARNINGS>) {
581
6
3
        chomp $warning;
582
6
13
        next unless $warning =~ s/^:(\d+)/:1/;
583
6
5
        $file = $check_file_paths[$1];
584
6
12
        if ($warning =~ m/:(\d+ \.\.\. \d+): `(.*)`/) {
585
4
4
          my ($range, $item) = ($1, $2);
586
4
3
          my $wrapped = CheckSpelling::Util::wrap_in_backticks($item);
587
4
11
          $warning =~ s/:\d+ \.\.\. \d+: `.*`/:$range, Warning - $wrapped is not a recognized word (check-file-path)/;
588
4
2
          next if skip_item($item);
589
4
4
          if (defined $unknown_file_word_limit) {
590
4
5
            next if ++$unknown_file_word_count{$item} > $unknown_file_word_limit;
591          }
592        }
593
5
6
        next if should_skip_warning $warning;
594
4
15
        print WARNING_OUTPUT "$file$warning\n";
595
4
4
        count_warning $warning;
596      }
597    }
598
9
32
    close WARNINGS;
599  }
600
12
199
  close MORE_WARNINGS;
601
602
12
12
  for my $warning (@delayed_warnings) {
603
3
2
    next if should_skip_warning $warning;
604
3
2
    count_warning $warning;
605
3
5
    print WARNING_OUTPUT $warning;
606  }
607
12
8
  if (defined $unknown_word_limit) {
608
1
2
    for my $warned_word (sort keys %last_seen) {
609
1
3
      my $warning_count = $seen{$warned_word} || 0;
610
1
1
      next unless $warning_count >= $unknown_word_limit;
611
0
0
      my $warning = $last_seen{$warned_word};
612
0
0
      $warning =~ s/\Q (unrecognized-spelling)\E/ -- found $warning_count times (limited-references)\n/;
613
0
0
      next if should_skip_warning $warning;
614
0
0
      print WARNING_OUTPUT $warning;
615
0
0
      count_warning $warning;
616    }
617  }
618
12
250
  close WARNING_OUTPUT;
619
620
12
13
  if (%counters) {
621
3
1
    my $continue='';
622
3
5
    print COUNTER_SUMMARY "{\n";
623
3
6
    for my $code (sort keys %counters) {
624
6
9
      print COUNTER_SUMMARY qq<$continue"$code": $counters{$code}\n>;
625
6
3
      $continue=',';
626    }
627
3
3
    print COUNTER_SUMMARY "}\n";
628  }
629
12
75
  close COUNTER_SUMMARY;
630
631  # display the current unknown
632
12
31
  for my $char (sort keys %letter_map) {
633
43
43
24
95
    for my $key (sort CheckSpelling::Util::case_biased keys(%{$letter_map{$char}})) {
634
24
24
16
35
      my %word_map = %{$letter_map{$char}{$key}};
635
24
18
      my @words = keys(%word_map);
636
24
21
      if (scalar(@words) > 1) {
637
13
20
7
72
        print $key." (".(join ", ", sort { length($a) <=> length($b) || $a cmp $b } @words).")";
638      } else {
639
11
36
        print $words[0];
640      }
641
24
90
      print "\n";
642    }
643  }
644}
645
6461;