File Coverage

File:lib/CheckSpelling/SpellingCollator.pm
Coverage:89.8%

linestmtbrancondsubtimecode
1#! -*-perl-*-
2
3package CheckSpelling::SpellingCollator;
4
5our $VERSION='0.1.0';
6
1
1
107423
2
use 5.022;
7
1
1
1
1
1
5
use utf8;
8
1
1
1
12
1
47
use feature 'unicode_strings';
9
1
1
1
2
0
16
use warnings;
10
1
1
1
2
0
21
use File::Path qw(remove_tree);
11
1
1
1
250
1
3077
use CheckSpelling::Util;
12
13my %letter_map;
14my %ignored_event_map;
15my $disable_word_collating;
16my $shortest_word;
17
18my %last_seen;
19
20sub get_field {
21
40
29
  my ($record, $field) = @_;
22
40
360
  return 0 unless $record =~ (/\b$field:\s*(\d+)/);
23
30
36
  return $1;
24}
25
26sub get_array {
27
4
3
  my ($record, $field) = @_;
28
4
31
  return () unless $record =~ (/\b$field: \[([^\]]+)\]/);
29
4
2
  my $values = $1;
30
4
6
  return split /\s*,\s*/, $values;
31}
32
33sub maybe {
34
7
5
  my ($next, $value) = @_;
35
7
14
  $next = $value unless $next && $next < $value;
36
7
4
  return $next;
37}
38
39my %expected = ();
40sub expect_item {
41
94
57
  my ($item, $value) = @_;
42
94
50
  $item =~ s/’/'/g;
43
94
50
  our %expected;
44
94
44
  my $next;
45
94
92
  if (defined $expected{$item}) {
46
22
11
    $next = $expected{$item};
47
22
18
    $next = $value if $value < $next;
48  } elsif ($item =~ /^([A-Z])(.*)/) {
49
12
8
    $item = $1 . lc $2;
50
12
7
    if (defined $expected{$item}) {
51
2
2
      $next = $expected{$item};
52
2
1
      $next = maybe($next, $value + .1);
53    } else {
54
10
8
      $item = lc $item;
55
10
5
      if (defined $expected{$item}) {
56
5
4
        $next = $expected{$item};
57
5
3
        $next = maybe($next, $value + .2);
58      }
59    }
60  }
61
94
99
  return 0 unless defined $next;
62
29
20
  $expected{$item} = $next;
63
29
55
  return $value;
64}
65
66sub skip_item {
67
50
27
  my ($word) = @_;
68
50
29
  return 1 if expect_item($word, 1);
69
32
24
  my $key = lc $word;
70
32
12
  return 2 if expect_item($key, 2);
71
32
44
  if ($key =~ /.s$/) {
72
2
3
    if ($key =~ /ies$/) {
73
1
2
      $key =~ s/ies$/y/;
74    } else {
75
1
1
      $key =~ s/s$//;
76    }
77  } elsif ($key =~ /^(.+[^aeiou])ed$/) {
78
1
1
    $key = $1;
79  } elsif ($key =~ /^(.+)'[ds]$/) {
80
4
2
    $key = $1;
81  } else {
82
25
19
    return 0;
83  }
84
7
6
  return 3 if expect_item($key, 3);
85
0
0
  return 0;
86}
87
88sub should_skip_warning {
89
72
39
  my ($warning) = @_;
90
72
94
  if ($warning =~ /\(([-\w]+)\)$/) {
91
68
42
    my ($code) = ($1);
92
68
23
    our %ignored_event_map;
93
68
52
    return 1 if $ignored_event_map{$code};
94  }
95
71
56
  return 0;
96}
97
98sub log_skip_item {
99
46
48
  my ($item, $file, $warning, $unknown_word_limit) = @_;
100
46
36
  return 1 if should_skip_warning $warning;
101
46
27
  return 1 if skip_item($item);
102
21
9
  our %seen;
103
21
11
  my $seen_count = $seen{$item};
104
21
20
  if (defined $seen_count) {
105
8
12
    if (!defined $unknown_word_limit || ($seen_count++ < $unknown_word_limit)) {
106
7
27
      print MORE_WARNINGS "$file$warning\n";
107    } else {
108
1
1
      our %last_seen;
109
1
2
      $last_seen{$item} = "$file$warning";
110    }
111
8
10
    $seen{$item} = $seen_count;
112
8
12
    return 1;
113  }
114
13
11
  $seen{$item} = 1;
115
13
11
  return 0;
116}
117
118sub stem_word {
119
28
870
  my ($key) = @_;
120
28
14
  our $disable_word_collating;
121
28
14
  return $key if $disable_word_collating;
122
28
13
  our $shortest_word;
123
28
18
  $shortest_word = 2 unless defined $shortest_word;
124
28
16
  my $key_length = length $key;
125
126
28
40
  if ($key =~ /.s$/) {
127
5
26
    if ($key_length > ($shortest_word + 1) && $key =~ /ies$/) {
128
1
2
      $key =~ s/ies$/y/;
129    } elsif ($key_length > $shortest_word && $key !~ /ies$/) {
130
3
5
      $key =~ s/s$//;
131    }
132  } elsif ($key_length > ($shortest_word + 1) && $key =~ /.[^aeiou]ed$/) {
133
2
3
    $key =~ s/ed$//;
134  }
135
28
28
  return $key;
136}
137
138sub collate_key {
139
81
47
  my ($key) = @_;
140
81
34
  our $disable_word_collating;
141
81
30
  my $char;
142
81
50
  if ($disable_word_collating) {
143
8
7
    $char = lc substr $key, 0, 1;
144  } else {
145
73
47
    $key = lc $key;
146
73
49
    $key =~ s/''+/'/g;
147
73
37
    $key =~ s/'[sd]$//;
148
73
45
    $key =~ s/^[^Ii]?'+(.*)/$1/;
149
73
30
    $key =~ s/(.*?)'$/$1/;
150
73
61
    $char = substr $key, 0, 1;
151  }
152
81
104
  return ($key, $char);
153}
154
155sub load_expect {
156
12
495
  my ($expect) = @_;
157
12
7
  our %expected;
158
12
12
  %expected = ();
159
12
96
  if (open(EXPECT, '<:utf8', $expect)) {
160
12
57
    while (my $word = <EXPECT>) {
161
43
60
      $word =~ s/\R//;
162
43
94
      $expected{$word} = 0;
163    }
164
12
30
    close EXPECT;
165  }
166}
167
168sub harmonize_expect {
169
11
5
  our $disable_word_collating;
170
11
3
  our %letter_map;
171
11
5
  our %expected;
172
173
11
14
  for my $word (keys %expected) {
174
40
25
    my ($key, $char) = collate_key $word;
175
40
18
    my %word_map = ();
176
40
51
    next unless defined $letter_map{$char}{$key};
177
13
13
5
16
    %word_map = %{$letter_map{$char}{$key}};
178
13
13
    next if defined $word_map{$word};
179
3
2
    my $words = scalar keys %word_map;
180
3
2
    next if $words > 2;
181
3
2
    if ($word eq $key) {
182
1
2
      next if ($words > 1);
183    }
184
2
3
    delete $expected{$word};
185  }
186}
187
188sub group_related_words {
189
12
5
  our %letter_map;
190
12
4
  our $disable_word_collating;
191
12
10
  return if $disable_word_collating;
192
193  # group related words
194
11
24
  for my $char (sort CheckSpelling::Util::number_biased keys %letter_map) {
195
19
19
10
17
    for my $plural_key (sort keys(%{$letter_map{$char}})) {
196
22
13
      my $key = stem_word $plural_key;
197
22
20
      next if $key eq $plural_key;
198
4
5
      next unless defined $letter_map{$char}{$key};
199
3
3
1
4
      my %word_map = %{$letter_map{$char}{$key}};
200
3
3
2
3
      for my $word (keys(%{$letter_map{$char}{$plural_key}})) {
201
3
2
        $word_map{$word} = 1;
202      }
203
3
4
      $letter_map{$char}{$key} = \%word_map;
204
3
3
      delete $letter_map{$char}{$plural_key};
205    }
206  }
207}
208
209sub count_warning {
210
16
14
  my ($warning) = @_;
211
16
4
  our %counters;
212
16
9
  our %ignored_event_map;
213
16
26
  if ($warning =~ /\(([-\w]+)\)$/) {
214
10
7
    my ($code) = ($1);
215
10
14
    next if defined $ignored_event_map{$code};
216
10
10
    ++$counters{$code};
217  }
218}
219
220sub report_timing {
221
0
0
  my ($name, $start_time, $directory, $marker) = @_;
222
0
0
  my $end_time = (stat "$directory/$marker")[9];
223
0
0
  $name =~ s/"/\\"/g;
224
0
0
  print TIMING_REPORT "\"$name\", $start_time, $end_time\n";
225}
226
227sub get_pattern_with_context {
228
24
15
  my ($path) = @_;
229
24
24
  return unless defined $ENV{$path};
230
24
25
  $ENV{$path} =~ /(.*)/;
231
24
157
  return unless open ITEMS, '<:utf8', $1;
232
233
24
11
  my @items;
234
24
13
  my $context = '';
235
24
92
  while (<ITEMS>) {
236
5
6
    my $pattern = $_;
237
5
6
    if ($pattern =~ /^#/) {
238
2
4
      if ($pattern =~ /^# /) {
239
2
3
        $context .= $pattern;
240      } else {
241
0
0
        $context = '';
242      }
243
2
3
      next;
244    }
245
3
3
    chomp $pattern;
246
3
4
    unless ($pattern =~ /./) {
247
1
0
      $context = '';
248
1
2
      next;
249    }
250
2
3
    push @items, $context.$pattern;
251
2
6
    $context = '';
252  }
253
24
66
  close ITEMS;
254
24
26
  return @items;
255}
256
257sub summarize_totals {
258
24
23
  my ($formatter, $path, $items, $totals, $file_counts) = @_;
259
24
24
7
22
  return unless @{$totals};
260
2
58
  return unless open my $fh, '>:utf8', $path;
261
2
2
1
1
  my $totals_count = scalar(@{$totals}) - 1;
262
2
2
  my @indices;
263
2
2
  if ($file_counts) {
264    @indices = sort {
265
1
0
1
0
      $totals->[$b] <=> $totals->[$a] ||
266      $file_counts->[$b] <=> $file_counts->[$a]
267    } 0 .. $totals_count;
268  } else {
269    @indices = sort {
270
1
0
1
0
      $totals->[$b] <=> $totals->[$a]
271    } 0 .. $totals_count;
272  }
273
2
3
  for my $i (@indices) {
274
2
2
    last unless $totals->[$i] > 0;
275
2
2
    my $rule_with_context = $items->[$i];
276
2
2
    my ($description, $rule);
277
2
4
    if ($rule_with_context =~ /^(.*\n)([^\n]+)$/s) {
278
2
4
      ($description, $rule) = ($1, $2);
279    } else {
280
0
0
      ($description, $rule) = ('', $rule_with_context);
281    }
282
2
3
    print $fh $formatter->(
283      $totals->[$i],
284      ($file_counts ? " file-count: $file_counts->[$i]" : ""),
285      $description,
286      $rule
287    );
288  }
289
2
61
  close $fh;
290}
291
292sub get_special {
293
19
13
  my ($file, $special) = @_;
294
19
28
  return 'file-list' if $file eq $special->{'file_list'};
295
17
19
  return 'pr-title' if $file eq $special->{'pr_title_file'};
296
15
14
  return 'pr-description' if $file eq $special->{'pr_description_file'};
297
13
29
  return 'commit-message' if !rindex($file, $special->{'commit_messages'});
298
11
12
  return 'file';
299}
300
301sub main {
302
12
25457
  my @directories;
303  my @cleanup_directories;
304
12
0
  my @check_file_paths;
305
306
12
13
  my $early_warnings = CheckSpelling::Util::get_file_from_env('early_warnings', '/dev/null');
307
12
10
  my $warning_output = CheckSpelling::Util::get_file_from_env('warning_output', '/dev/stderr');
308
12
8
  my $more_warnings = CheckSpelling::Util::get_file_from_env('more_warnings', '/dev/stderr');
309
12
8
  my $counter_summary = CheckSpelling::Util::get_file_from_env('counter_summary', '/dev/stderr');
310
12
7
  my $ignored_events = CheckSpelling::Util::get_file_from_env('ignored_events', '');
311
12
13
  if ($ignored_events) {
312
6
5
    our %ignored_event_map;
313
6
7
    for my $event (split /,/, $ignored_events) {
314
6
6
      $ignored_event_map{$event} = 1;
315    }
316  }
317
12
9
  my $should_exclude_file = CheckSpelling::Util::get_file_from_env('should_exclude_file', '/dev/null');
318
12
9
  my $unknown_word_limit = CheckSpelling::Util::get_val_from_env('unknown_word_limit', undef);
319
12
8
  my $unknown_file_word_limit = CheckSpelling::Util::get_val_from_env('unknown_file_word_limit', undef);
320
12
7
  my $candidate_example_limit = CheckSpelling::Util::get_file_from_env('INPUT_CANDIDATE_EXAMPLE_LIMIT', '3');
321
12
11
  my $disable_flags = CheckSpelling::Util::get_file_from_env('INPUT_DISABLE_CHECKS', '');
322
12
10
  my $only_check_changed_files = CheckSpelling::Util::get_file_from_env('INPUT_ONLY_CHECK_CHANGED_FILES', '');
323
12
8
  my $disable_noisy_file = $disable_flags =~ /(?:^|,|\s)noisy-file(?:,|\s|$)/;
324
12
33
  our $disable_word_collating = $only_check_changed_files || $disable_flags =~ /(?:^|,|\s)word-collating(?:,|\s|$)/;
325
12
7
  our $shortest_word = CheckSpelling::Util::get_val_from_env('INPUT_SHORTEST_WORD', undef);
326
12
9
  my $file_list = CheckSpelling::Util::get_file_from_env('check_file_names', '');
327
12
11
  my $pr_title_file = CheckSpelling::Util::get_file_from_env('pr_title_file', '');
328
12
11
  my $pr_description_file = CheckSpelling::Util::get_file_from_env('pr_description_file', '');
329
12
9
  my $commit_messages = CheckSpelling::Util::get_file_from_env('commit_messages', '');
330
12
9
  my $timing_report = CheckSpelling::Util::get_file_from_env('timing_report', '');
331
12
13
  my $special = {
332    'file_list' => $file_list,
333    'pr_title_file' => $pr_title_file,
334    'pr_description_file' => $pr_description_file,
335    'commit_messages' => $commit_messages,
336  };
337
12
11
  my ($start_time, $end_time);
338
339
12
254
  open WARNING_OUTPUT, '>:utf8', $warning_output;
340
12
166
  open MORE_WARNINGS, '>:utf8', $more_warnings;
341
12
149
  open COUNTER_SUMMARY, '>:utf8', $counter_summary;
342
12
86
  open SHOULD_EXCLUDE, '>:utf8', $should_exclude_file;
343
12
13
  if ($timing_report) {
344
0
0
    open TIMING_REPORT, '>:utf8', $timing_report;
345
0
0
    print TIMING_REPORT "file, start, finish\n";
346  }
347
348
12
9
  my @candidates = get_pattern_with_context('candidates_path');
349
12
11
  my @candidate_totals = (0) x scalar @candidates;
350
12
8
  my @candidate_file_counts = (0) x scalar @candidates;
351
352
12
7
  my @forbidden = get_pattern_with_context('forbidden_path');
353
12
12
  my @forbidden_totals = (0) x scalar @forbidden;
354
355
12
6
  my @delayed_warnings;
356
12
21
  our %letter_map = ();
357
358
12
7
  my %file_map = ();
359
360
12
29
  for my $directory (<>) {
361
15
11
    chomp $directory;
362
15
25
    next unless $directory =~ /^(.*)$/;
363
15
10
    $directory = $1;
364
15
50
    unless (-e $directory) {
365
1
3
      print STDERR "Could not find: $directory\n";
366
1
1
      next;
367    }
368
14
35
    unless (-d $directory) {
369
1
14
      print STDERR "Not a directory: $directory\n";
370
1
2
      next;
371    }
372
373    # if there's no filename, we can't report
374
13
81
    next unless open(NAME, '<:utf8', "$directory/name");
375
12
61
    my $file=<NAME>;
376
12
23
    close NAME;
377
378
12
26
    $file_map{$file} = $directory;
379  }
380
381
12
21
  for my $file (sort keys %file_map) {
382
12
10
    my $directory = $file_map{$file};
383
12
11
    if ($timing_report) {
384
0
0
      $start_time = (stat "$directory/name")[9];
385    }
386
387
12
59
    if (-e "$directory/skipped") {
388
1
7
      open SKIPPED, '<:utf8', "$directory/skipped";
389
1
7
      my $reason=<SKIPPED>;
390
1
3
      close SKIPPED;
391
1
1
      chomp $reason;
392
1
3
      push @delayed_warnings, "$file:1:1 ... 1, Warning - Skipping `$file` because $reason\n";
393
1
1
      print SHOULD_EXCLUDE "$file\n";
394
1
1
      push @cleanup_directories, $directory;
395
1
1
      report_timing($file, $start_time, $directory, 'skipped') if ($timing_report);
396
1
2
      next;
397    }
398
399    # stats isn't written if there was nothing interesting in the file
400
11
31
    unless (-s "$directory/stats") {
401
1
1
      report_timing($file, $start_time, $directory, 'warnings') if ($timing_report);
402
1
1
      push @directories, $directory;
403
1
1
      next;
404    }
405
406
10
11
    if ($file eq $file_list) {
407
1
5
      open FILE_LIST, '<:utf8', $file_list;
408
1
1
      push @check_file_paths, '0 placeholder';
409
1
6
      for my $check_file_path (<FILE_LIST>) {
410
4
3
        chomp $check_file_path;
411
4
3
        push @check_file_paths, $check_file_path;
412      }
413
1
3
      close FILE_LIST;
414    }
415
416
10
7
    my ($words, $unrecognized, $unknown, $unique);
417
418    {
419
10
10
4
52
      open STATS, '<:utf8', "$directory/stats";
420
10
40
      my $stats=<STATS>;
421
10
18
      close STATS;
422
10
8
      $words=get_field($stats, 'words');
423
10
8
      $unrecognized=get_field($stats, 'unrecognized');
424
10
7
      $unknown=get_field($stats, 'unknown');
425
10
8
      $unique=get_field($stats, 'unique');
426
10
2
      my @candidate_list;
427
10
9
      if (@candidate_totals) {
428
1
1
        @candidate_list=get_array($stats, 'candidates');
429
1
1
        my @lines=get_array($stats, 'candidate_lines');
430
1
1
        if (@candidate_list) {
431
1
2
          for (my $i=0; $i < scalar @candidate_list; $i++) {
432
1
1
            my $hits = $candidate_list[$i];
433
1
0
            if ($hits) {
434
1
1
              $candidate_totals[$i] += $hits;
435
1
1
              if ($candidate_file_counts[$i]++ < $candidate_example_limit) {
436
1
3
                my $pattern = (split /\n/,$candidates[$i])[-1];
437
1
1
                my $position = $lines[$i];
438
1
4
                $position =~ s/:(\d+)$/ ... $1/;
439
1
1
                my $wrapped = CheckSpelling::Util::truncate_with_ellipsis(CheckSpelling::Util::wrap_in_backticks($pattern), 99);
440
1
1
                my $candidate_label = '';
441
1
2
                if ($candidates[$i] =~ /^#\s+(\S.+)/) {
442
1
2
                  $candidate_label = " ($1)";
443                }
444
1
5
                push @delayed_warnings, "$file:$position, Notice - Line matches candidate pattern$candidate_label $wrapped (candidate-pattern)\n";
445              }
446            }
447          }
448        }
449      }
450
10
11
      if (@forbidden_totals) {
451
1
1
        my @forbidden_list=get_array($stats, 'forbidden');
452
1
0
        my @lines=get_array($stats, 'forbidden_lines');
453
1
1
        if (@forbidden_list) {
454
1
1
          for (my $i=0; $i < scalar @forbidden_list; $i++) {
455
1
1
            my $hits = $forbidden_list[$i];
456
1
1
            if ($hits) {
457
1
2
              $forbidden_totals[$i] += $hits;
458            }
459          }
460        }
461      }
462      #print STDERR "$file (unrecognized: $unrecognized; unique: $unique; unknown: $unknown, words: $words, candidates: [".join(", ", @candidate_list)."])\n";
463    }
464
465
10
9
    report_timing($file, $start_time, $directory, 'unknown') if ($timing_report);
466
10
6
    my $kind = get_special($file, $special);
467    # These heuristics are very new and need tuning/feedback
468
10
12
    if (
469        ($unknown > $unique)
470        # || ($unrecognized > $words / 2)
471    ) {
472
1
5
      unless ($disable_noisy_file) {
473
1
1
        if ($kind eq 'file') {
474
1
3
          print SHOULD_EXCLUDE "$file\n";
475        }
476
1
1
        my $warning = "noisy-$kind";
477
1
2
        count_warning $warning;
478
1
1
        push @delayed_warnings, "$file:1:1 ... 1, Warning - Skipping `$file` because it seems to have more noise ($unknown) than unique words ($unique) (total: $unrecognized / $words). ($warning)\n";
479
1
1
        push @cleanup_directories, $directory;
480
1
1
        next;
481      }
482    }
483
9
7
    push @directories, $directory;
484
9
37
    unless ($kind =~ /^file/ && -s "$directory/unknown") {
485
4
6
      next;
486    }
487
5
34
    open UNKNOWN, '<:utf8', "$directory/unknown";
488
5
38
    for my $token (<UNKNOWN>) {
489
43
43
      $token =~ s/\R//;
490
43
37
      next unless $token =~ /./;
491
41
21
      my ($key, $char) = collate_key $token;
492
41
44
      $letter_map{$char} = () unless defined $letter_map{$char};
493
41
21
      my %word_map = ();
494
41
14
40
18
      %word_map = %{$letter_map{$char}{$key}} if defined $letter_map{$char}{$key};
495
41
31
      $word_map{$token} = 1;
496
41
49
      $letter_map{$char}{$key} = \%word_map;
497    }
498
5
19
    close UNKNOWN;
499  }
500
12
29
  close SHOULD_EXCLUDE;
501
12
9
  close TIMING_REPORT if $timing_report;
502
503  summarize_totals(
504    sub {
505
1
2
      my ($hits, $files, $context, $pattern) = @_;
506
1
6
      return "# hit-count: $hits$files\n$context$pattern\n\n",
507    },
508
12
33
    CheckSpelling::Util::get_file_from_env('candidate_summary', '/dev/stderr'),
509    \@candidates,
510    \@candidate_totals,
511    \@candidate_file_counts,
512  );
513
514  summarize_totals(
515    sub {
516
1
1
      my (undef, undef, $context, $pattern) = @_;
517
1
2
      $context =~ s/^# //gm;
518
1
1
      chomp $context;
519
1
0
      my $details;
520
1
3
      if ($context =~ /^(.*?)$(.*)/ms) {
521
1
2
        ($context, $details) = ($1, $2);
522
1
1
        $details = "\n$details" if $details;
523      }
524
1
1
      $context = 'Pattern' unless $context;
525
1
6
      return "##### $context$details\n```\n$pattern\n```\n\n";
526    },
527
12
35
    CheckSpelling::Util::get_file_from_env('forbidden_summary', '/dev/stderr'),
528    \@forbidden,
529    \@forbidden_totals,
530  );
531
532
12
34
  group_related_words;
533
534
12
16
  if (defined $ENV{'expect'}) {
535
11
9
    $ENV{'expect'} =~ /(.*)/;
536
11
10
    load_expect $1;
537
11
10
    harmonize_expect;
538  }
539
540
12
6
  my %seen = ();
541
12
6
  our %counters;
542
12
8
  %counters = ();
543
544
12
38
  if (-s $early_warnings) {
545
1
6
    open WARNINGS, '<:utf8', $early_warnings;
546
1
8
    for my $warning (<WARNINGS>) {
547
1
1
      chomp $warning;
548
1
1
      count_warning $warning;
549
1
2
      next if should_skip_warning $warning;
550
1
5
      print WARNING_OUTPUT "$warning\n";
551    }
552
1
3
    close WARNINGS;
553  }
554
555
12
5
  our %last_seen;
556
12
8
  my %unknown_file_word_count;
557
12
11
  for my $directory (@directories) {
558
10
28
    next unless (-s "$directory/warnings");
559
9
55
    next unless open(NAME, '<:utf8', "$directory/name");
560
9
33
    my $file=<NAME>;
561
9
15
    close NAME;
562
9
7
    my $kind = get_special($file, $special);
563
9
48
    open WARNINGS, '<:utf8', "$directory/warnings";
564
9
9
    if ($kind ne 'file-list') {
565
8
47
      for my $warning (<WARNINGS>) {
566
50
40
        chomp $warning;
567
50
84
        if ($warning =~ m/:(\d+):(\d+ \.\.\. \d+): `(.*)`/) {
568
46
49
          my ($line, $range, $item) = ($1, $2, $3);
569
46
31
          my $wrapped = CheckSpelling::Util::wrap_in_backticks($item);
570
46
21
          my $reason = 'unrecognized-spelling';
571
46
31
          $reason .= "-$kind" unless $kind eq 'file';
572
46
100
          $warning =~ s/:\d+:\d+ \.\.\. \d+: `.*`/:$line:$range, Warning - $wrapped is not a recognized word ($reason)/;
573
46
28
          next if log_skip_item($item, $file, $warning, $unknown_word_limit);
574
13
11
          count_warning $warning if $kind ne 'file';
575        } else {
576
4
5
          if ($warning =~ /\`(.*?)\` in line \(token-is-substring\)/) {
577
0
0
            next if skip_item($1);
578          }
579
4
4
          count_warning $warning;
580        }
581
17
13
        next if should_skip_warning $warning;
582
17
61
        print WARNING_OUTPUT "$file$warning\n";
583      }
584    } else {
585
1
7
      for my $warning (<WARNINGS>) {
586
6
6
        chomp $warning;
587
6
11
        next unless $warning =~ s/^:(\d+)/:1/;
588
6
8
        $file = $check_file_paths[$1];
589
6
20
        if ($warning =~ m/:(\d+ \.\.\. \d+): `(.*)`/) {
590
4
4
          my ($range, $item) = ($1, $2);
591
4
5
          my $wrapped = CheckSpelling::Util::wrap_in_backticks($item);
592
4
13
          $warning =~ s/:\d+ \.\.\. \d+: `.*`/:$range, Warning - $wrapped is not a recognized word (check-file-path)/;
593
4
4
          next if skip_item($item);
594
4
6
          if (defined $unknown_file_word_limit) {
595
4
6
            next if ++$unknown_file_word_count{$item} > $unknown_file_word_limit;
596          }
597        }
598
5
3
        next if should_skip_warning $warning;
599
4
12
        print WARNING_OUTPUT "$file$warning\n";
600
4
8
        count_warning $warning;
601      }
602    }
603
9
34
    close WARNINGS;
604  }
605
12
199
  close MORE_WARNINGS;
606
607
12
9
  for my $warning (@delayed_warnings) {
608
3
2
    next if should_skip_warning $warning;
609
3
2
    count_warning $warning;
610
3
5
    print WARNING_OUTPUT $warning;
611  }
612
12
9
  if (defined $unknown_word_limit) {
613
1
2
    for my $warned_word (sort keys %last_seen) {
614
1
3
      my $warning_count = $seen{$warned_word} || 0;
615
1
1
      next unless $warning_count >= $unknown_word_limit;
616
0
0
      my $warning = $last_seen{$warned_word};
617
0
0
      $warning =~ s/\Q (unrecognized-spelling)\E/ -- found $warning_count times (limited-references)\n/;
618
0
0
      next if should_skip_warning $warning;
619
0
0
      print WARNING_OUTPUT $warning;
620
0
0
      count_warning $warning;
621    }
622  }
623
12
253
  close WARNING_OUTPUT;
624
625
12
15
  if (%counters) {
626
3
2
    my $continue='';
627
3
4
    print COUNTER_SUMMARY "{\n";
628
3
6
    for my $code (sort keys %counters) {
629
6
8
      print COUNTER_SUMMARY qq<$continue"$code": $counters{$code}\n>;
630
6
4
      $continue=',';
631    }
632
3
3
    print COUNTER_SUMMARY "}\n";
633  }
634
12
79
  close COUNTER_SUMMARY;
635
636  # display the current unknown
637
12
27
  for my $char (sort keys %letter_map) {
638
43
43
20
100
    for my $key (sort CheckSpelling::Util::case_biased keys(%{$letter_map{$char}})) {
639
24
24
9
40
      my %word_map = %{$letter_map{$char}{$key}};
640
24
16
      my @words = keys(%word_map);
641
24
25
      if (scalar(@words) > 1) {
642
13
19
10
70
        print $key." (".(join ", ", sort { length($a) <=> length($b) || $a cmp $b } @words).")";
643      } else {
644
11
37
        print $words[0];
645      }
646
24
89
      print "\n";
647    }
648  }
649}
650
6511;