File Coverage

File:lib/CheckSpelling/SpellingCollator.pm
Coverage:84.7%

linestmtbrancondsubtimecode
1#! -*-perl-*-
2
3package CheckSpelling::SpellingCollator;
4
5our $VERSION='0.1.0';
6
1
1
1
110152
2
37
use warnings;
7
1
1
1
2
0
22
use File::Path qw(remove_tree);
8
1
1
1
199
1
2432
use CheckSpelling::Util;
9
10my %letter_map;
11my %ignored_event_map;
12my $disable_word_collating;
13
14my %last_seen;
15
16sub get_field {
17
40
28
  my ($record, $field) = @_;
18
40
444
  return 0 unless $record =~ (/\b$field:\s*(\d+)/);
19
28
31
  return $1;
20}
21
22sub get_array {
23
2
2
  my ($record, $field) = @_;
24
2
17
  return () unless $record =~ (/\b$field: \[([^\]]+)\]/);
25
2
3
  my $values = $1;
26
2
3
  return split /\s*,\s*/, $values;
27}
28
29sub maybe {
30
7
4
  my ($next, $value) = @_;
31
7
8
  $next = $value unless $next && $next < $value;
32
7
6
  return $next;
33}
34
35my %expected = ();
36sub expect_item {
37
104
67
  my ($item, $value) = @_;
38
104
40
  our %expected;
39
104
44
  my $next;
40
104
98
  if (defined $expected{$item}) {
41
26
27
    $next = $expected{$item};
42
26
23
    $next = $value if $value < $next;
43  } elsif ($item =~ /^([A-Z])(.*)/) {
44
12
7
    $item = $1 . lc $2;
45
12
11
    if (defined $expected{$item}) {
46
2
1
      $next = $expected{$item};
47
2
2
      $next = maybe($next, $value + .1);
48    } else {
49
10
8
      $item = lc $item;
50
10
7
      if (defined $expected{$item}) {
51
5
2
        $next = $expected{$item};
52
5
4
        $next = maybe($next, $value + .2);
53      }
54    }
55  }
56
104
109
  return 0 unless defined $next;
57
33
26
  $expected{$item} = $next;
58
33
78
  return $value;
59}
60
61sub skip_item {
62
55
35
  my ($word) = @_;
63
55
23
  return 1 if expect_item($word, 1);
64
35
21
  my $key = lc $word;
65
35
18
  return 2 if expect_item($key, 2);
66
35
47
  if ($key =~ /.s$/) {
67
2
3
    if ($key =~ /ies$/) {
68
1
1
      $key =~ s/ies$/y/;
69    } else {
70
1
2
      $key =~ s/s$//;
71    }
72  } elsif ($key =~ /^(.+[^aeiou])ed$/) {
73
1
1
    $key = $1;
74  } elsif ($key =~ /^(.+)'[ds]$/) {
75
6
4
    $key = $1;
76  } else {
77
26
23
    return 0;
78  }
79
9
7
  return 3 if expect_item($key, 3);
80
0
0
  return 0;
81}
82
83sub should_skip_warning {
84
75
44
  my ($warning) = @_;
85
75
100
  if ($warning =~ /\(([-\w]+)\)$/) {
86
71
42
    my ($code) = ($1);
87
71
31
    our %ignored_event_map;
88
71
48
    return 1 if $ignored_event_map{$code};
89  }
90
74
65
  return 0;
91}
92
93sub log_skip_item {
94
51
60
  my ($item, $file, $warning, $unknown_word_limit) = @_;
95
51
42
  return 1 if should_skip_warning $warning;
96
51
24
  return 1 if skip_item($item);
97
22
13
  my $seen_count = $seen{$item};
98
22
13
  if (defined $seen_count) {
99
9
12
    if (!defined $unknown_word_limit || ($seen_count++ < $unknown_word_limit)) {
100
8
34
      print MORE_WARNINGS "$file$warning\n"
101    } else {
102
1
1
      our %last_seen;
103
1
1
      $last_seen{$item} = "$file$warning";
104    }
105
9
11
    $seen{$item} = $seen_count;
106
9
14
    return 1;
107  }
108
13
13
  $seen{$item} = 1;
109
13
13
  return 0;
110}
111
112sub stem_word {
113
22
10
  my ($key) = @_;
114
22
13
  our $disable_word_collating;
115
22
8
  return $key if $disable_word_collating;
116
117
22
28
  if ($key =~ /.s$/) {
118
3
6
    if ($key =~ /ies$/) {
119
1
1
      $key =~ s/ies$/y/;
120    } else {
121
2
4
      $key =~ s/s$//;
122    }
123  } elsif ($key =~ /.[^aeiou]ed$/) {
124
1
3
    $key =~ s/ed$//;
125  }
126
22
21
  return $key;
127}
128
129sub collate_key {
130
86
54
  my ($key) = @_;
131
86
38
  our $disable_word_collating;
132
86
42
  if ($disable_word_collating) {
133
16
15
    $char = lc substr $key, 0, 1;
134  } else {
135
70
48
    $key = lc $key;
136
70
50
    $key =~ s/''+/'/g;
137
70
40
    $key =~ s/'[sd]$//;
138
70
42
    $key =~ s/^[^Ii]?'+(.*)/$1/;
139
70
31
    $key =~ s/(.*?)'$/$1/;
140
70
65
    $char = substr $key, 0, 1;
141  }
142
86
106
  return ($key, $char);
143}
144
145sub load_expect {
146
12
519
  my ($expect) = @_;
147
12
5
  our %expected;
148
12
14
  %expected = ();
149
12
99
  if (open(EXPECT, '<:utf8', $expect)) {
150
12
63
    while ($word = <EXPECT>) {
151
43
63
      $word =~ s/\R//;
152
43
77
      $expected{$word} = 0;
153    }
154
12
32
    close EXPECT;
155  }
156}
157
158sub harmonize_expect {
159
11
6
  our $disable_word_collating;
160
11
6
  our %letter_map;
161
11
3
  our %expected;
162
163
11
19
  for my $word (keys %expected) {
164
40
28
    my ($key, $char) = collate_key $word;
165
40
26
    my %word_map = ();
166
40
44
    next unless defined $letter_map{$char}{$key};
167
15
15
6
17
    %word_map = %{$letter_map{$char}{$key}};
168
15
19
    next if defined $word_map{$word};
169
3
2
    my $words = scalar keys %word_map;
170
3
2
    next if $words > 2;
171
3
2
    if ($word eq $key) {
172
1
1
      next if ($words > 1);
173    }
174
2
2
    delete $expected{$word};
175  }
176}
177
178sub group_related_words {
179
12
5
  our %letter_map;
180
12
6
  our $disable_word_collating;
181
12
9
  return if $disable_word_collating;
182
183  # group related words
184
10
22
  for my $char (sort CheckSpelling::Util::number_biased keys %letter_map) {
185
19
19
13
22
    for my $plural_key (sort keys(%{$letter_map{$char}})) {
186
22
14
      my $key = stem_word $plural_key;
187
22
13
      next if $key eq $plural_key;
188
4
6
      next unless defined $letter_map{$char}{$key};
189
3
3
2
8
      my %word_map = %{$letter_map{$char}{$key}};
190
3
3
27
4
      for $word (keys(%{$letter_map{$char}{$plural_key}})) {
191
3
6
        $word_map{$word} = 1;
192      }
193
3
3
      $letter_map{$char}{$key} = \%word_map;
194
3
5
      delete $letter_map{$char}{$plural_key};
195    }
196  }
197}
198
199sub count_warning {
200
13
11
  my ($warning) = @_;
201
13
4
  our %counters;
202
13
9
  our %ignored_event_map;
203
13
22
  if ($warning =~ /\(([-\w]+)\)$/) {
204
8
5
    my ($code) = ($1);
205
8
8
    next if defined $ignored_event_map{$code};
206
8
10
    ++$counters{$code};
207  }
208}
209
210sub report_timing {
211
0
0
  my ($name, $start_time, $directory, $marker) = @_;
212
0
0
  my $end_time = (stat "$directory/$marker")[9];
213
0
0
  $name =~ s/"/\\"/g;
214
0
0
  print TIMING_REPORT "\"$name\", $start_time, $end_time\n";
215}
216
217sub get_pattern_with_context {
218
24
10
  my ($path) = @_;
219
24
30
  return unless defined $ENV{$path};
220
12
14
  $ENV{$path} =~ /(.*)/;
221
12
68
  return unless open ITEMS, '<:utf8', $1;
222
223
12
7
  my @items;
224
12
4
  my $context = '';
225
12
63
  while (<ITEMS>) {
226
2
2
    my $pattern = $_;
227
2
4
    if ($pattern =~ /^#/) {
228
1
2
      if ($pattern =~ /^# /) {
229
1
2
        $context .= $pattern;
230      } else {
231
0
0
        $context = '';
232      }
233
1
2
      next;
234    }
235
1
2
    chomp $pattern;
236
1
1
    unless ($pattern =~ /./) {
237
0
0
      $context = '';
238
0
0
      next;
239    }
240
1
2
    push @items, $context.$pattern;
241
1
3
    $context = '';
242  }
243
12
30
  close ITEMS;
244
12
14
  return @items;
245}
246
247sub summarize_totals {
248
24
17
  my ($formatter, $path, $items, $totals, $file_counts) = @_;
249
24
24
10
23
  return unless @{$totals};
250
1
21
  return unless open my $fh, '>:utf8', $path;
251
1
1
1
1
  my $totals_count = scalar(@{$totals}) - 1;
252
1
1
  my @indices;
253
1
1
  if ($file_counts) {
254    @indices = sort {
255
0
0
0
0
      $totals->[$b] <=> $totals->[$a] ||
256      $file_counts->[$b] <=> $file_counts->[$a]
257    } 0 .. $totals_count;
258  } else {
259    @indices = sort {
260
1
0
1
0
      $totals->[$b] <=> $totals->[$a]
261    } 0 .. $totals_count;
262  }
263
1
1
  for my $i (@indices) {
264
1
1
    last unless $totals->[$i] > 0;
265
1
1
    my $rule_with_context = $items->[$i];
266
1
1
    my ($description, $rule);
267
1
3
    if ($rule_with_context =~ /^(.*\n)([^\n]+)$/s) {
268
1
2
      ($description, $rule) = ($1, $2);
269    } else {
270
0
0
      ($description, $rule) = ('', $rule_with_context);
271    }
272
1
1
    print $fh $formatter->(
273      $totals->[$i],
274      ($file_counts ? " file-count: $file_counts->[$i]" : ""),
275      $description,
276      $rule
277    );
278  }
279
1
37
  close $fh;
280}
281
282sub get_special {
283
20
15
  my ($file, $special) = @_;
284
20
25
  return 'file-list' if $file eq $special->{'file_list'};
285
18
14
  return 'pr-title' if $file eq $special->{'pr_title_file'};
286
16
18
  return 'pr-description' if $file eq $special->{'pr_description_file'};
287
14
29
  return 'commit-message' if !rindex($file, $special->{'commit_messages'});
288
12
16
  return 'file';
289}
290
291sub main {
292
12
20941
  my @directories;
293  my @cleanup_directories;
294
12
0
  my @check_file_paths;
295
296
12
16
  my $early_warnings = CheckSpelling::Util::get_file_from_env('early_warnings', '/dev/null');
297
12
9
  my $warning_output = CheckSpelling::Util::get_file_from_env('warning_output', '/dev/stderr');
298
12
9
  my $more_warnings = CheckSpelling::Util::get_file_from_env('more_warnings', '/dev/stderr');
299
12
7
  my $counter_summary = CheckSpelling::Util::get_file_from_env('counter_summary', '/dev/stderr');
300
12
9
  my $ignored_events = CheckSpelling::Util::get_file_from_env('ignored_events', '');
301
12
10
  if ($ignored_events) {
302
5
0
    our %ignored_event_map;
303
5
8
    for my $event (split /,/, $ignored_events) {
304
5
5
      $ignored_event_map{$event} = 1;
305    }
306  }
307
12
11
  my $should_exclude_file = CheckSpelling::Util::get_file_from_env('should_exclude_file', '/dev/null');
308
12
13
  my $unknown_word_limit = CheckSpelling::Util::get_val_from_env('unknown_word_limit', undef);
309
12
8
  my $unknown_file_word_limit = CheckSpelling::Util::get_val_from_env('unknown_file_word_limit', undef);
310
12
7
  my $candidate_example_limit = CheckSpelling::Util::get_file_from_env('INPUT_CANDIDATE_EXAMPLE_LIMIT', '3');
311
12
9
  my $disable_flags = CheckSpelling::Util::get_file_from_env('INPUT_DISABLE_CHECKS', '');
312
12
6
  my $only_check_changed_files = CheckSpelling::Util::get_file_from_env('INPUT_ONLY_CHECK_CHANGED_FILES', '');
313
12
9
  my $disable_noisy_file = $disable_flags =~ /(?:^|,|\s)noisy-file(?:,|\s|$)/;
314
12
36
  our $disable_word_collating = $only_check_changed_files || $disable_flags =~ /(?:^|,|\s)word-collating(?:,|\s|$)/;
315
12
6
  my $file_list = CheckSpelling::Util::get_file_from_env('check_file_names', '');
316
12
9
  my $pr_title_file = CheckSpelling::Util::get_file_from_env('pr_title_file', '');
317
12
8
  my $pr_description_file = CheckSpelling::Util::get_file_from_env('pr_description_file', '');
318
12
5
  my $commit_messages = CheckSpelling::Util::get_file_from_env('commit_messages', '');
319
12
8
  my $timing_report = CheckSpelling::Util::get_file_from_env('timing_report', '');
320
12
19
  my $special = {
321    'file_list' => $file_list,
322    'pr_title_file' => $pr_title_file,
323    'pr_description_file' => $pr_description_file,
324    'commit_messages' => $commit_messages,
325  };
326
12
7
  my ($start_time, $end_time);
327
328
12
259
  open WARNING_OUTPUT, '>:utf8', $warning_output;
329
12
182
  open MORE_WARNINGS, '>:utf8', $more_warnings;
330
12
151
  open COUNTER_SUMMARY, '>:utf8', $counter_summary;
331
12
92
  open SHOULD_EXCLUDE, '>:utf8', $should_exclude_file;
332
12
10
  if ($timing_report) {
333
0
0
    open TIMING_REPORT, '>:utf8', $timing_report;
334
0
0
    print TIMING_REPORT "file, start, finish\n";
335  }
336
337
12
10
  my @candidates = get_pattern_with_context('candidates_path');
338
12
8
  my @candidate_totals = (0) x scalar @candidates;
339
12
6
  my @candidate_file_counts = (0) x scalar @candidates;
340
341
12
8
  my @forbidden = get_pattern_with_context('forbidden_path');
342
12
9
  my @forbidden_totals = (0) x scalar @forbidden;
343
344
12
6
  my @delayed_warnings;
345
12
21
  our %letter_map = ();
346
347
12
7
  my %file_map = ();
348
349
12
28
  for my $directory (<>) {
350
15
16
    chomp $directory;
351
15
23
    next unless $directory =~ /^(.*)$/;
352
15
18
    $directory = $1;
353
15
52
    unless (-e $directory) {
354
1
3
      print STDERR "Could not find: $directory\n";
355
1
1
      next;
356    }
357
14
35
    unless (-d $directory) {
358
1
10
      print STDERR "Not a directory: $directory\n";
359
1
1
      next;
360    }
361
362    # if there's no filename, we can't report
363
13
105
    next unless open(NAME, '<:utf8', "$directory/name");
364
12
59
    my $file=<NAME>;
365
12
22
    close NAME;
366
367
12
25
    $file_map{$file} = $directory;
368  }
369
370
12
23
  for my $file (sort keys %file_map) {
371
12
7
    my $directory = $file_map{$file};
372
12
11
    if ($timing_report) {
373
0
0
      $start_time = (stat "$directory/name")[9];
374    }
375
376
12
60
    if (-e "$directory/skipped") {
377
1
7
      open SKIPPED, '<:utf8', "$directory/skipped";
378
1
7
      my $reason=<SKIPPED>;
379
1
3
      close SKIPPED;
380
1
1
      chomp $reason;
381
1
3
      push @delayed_warnings, "$file:1:1 ... 1, Warning - Skipping `$file` because $reason\n";
382
1
9
      print SHOULD_EXCLUDE "$file\n";
383
1
1
      push @cleanup_directories, $directory;
384
1
1
      report_timing($file, $start_time, $directory, 'skipped') if ($timing_report);
385
1
2
      next;
386    }
387
388
11
9
    push @directories, $directory;
389    # stats isn't written if there was nothing interesting in the file
390
11
31
    unless (-s "$directory/stats") {
391
1
3
      report_timing($file, $start_time, $directory, 'warnings') if ($timing_report);
392
1
1
      next;
393    }
394
395
10
9
    if ($file eq $file_list) {
396
1
5
      open FILE_LIST, '<:utf8', $file_list;
397
1
1
      push @check_file_paths, '0 placeholder';
398
1
5
      for my $check_file_path (<FILE_LIST>) {
399
4
5
        chomp $check_file_path;
400
4
2
        push @check_file_paths, $check_file_path;
401      }
402
1
2
      close FILE_LIST;
403    }
404
405
10
8
    my ($words, $unrecognized, $unknown, $unique);
406
407    {
408
10
10
4
53
      open STATS, '<:utf8', "$directory/stats";
409
10
35
      my $stats=<STATS>;
410
10
18
      close STATS;
411
10
9
      $words=get_field($stats, 'words');
412
10
7
      $unrecognized=get_field($stats, 'unrecognized');
413
10
8
      $unknown=get_field($stats, 'unknown');
414
10
9
      $unique=get_field($stats, 'unique');
415
10
27
      my @candidate_list;
416
10
8
      if (@candidate_totals) {
417
0
0
        @candidate_list=get_array($stats, 'candidates');
418
0
0
        my @lines=get_array($stats, 'candidate_lines');
419
0
0
        if (@candidate_list) {
420
0
0
          for (my $i=0; $i < scalar @candidate_list; $i++) {
421
0
0
            my $hits = $candidate_list[$i];
422
0
0
            if ($hits) {
423
0
0
              $candidate_totals[$i] += $hits;
424
0
0
              if ($candidate_file_counts[$i]++ < $candidate_example_limit) {
425
0
0
                my $pattern = (split /\n/,$candidates[$i])[-1];
426
0
0
                my $position = $lines[$i];
427
0
0
                $position =~ s/:(\d+)$/ ... $1/;
428
0
0
                my $wrapped = CheckSpelling::Util::wrap_in_backticks($pattern);
429
0
0
                push @delayed_warnings, "$file:$position, Notice - Line matches candidate pattern $wrapped (candidate-pattern)\n";
430              }
431            }
432          }
433        }
434      }
435
10
10
      if (@forbidden_totals) {
436
1
1
        @forbidden_list=get_array($stats, 'forbidden');
437
1
1
        my @lines=get_array($stats, 'forbidden_lines');
438
1
1
        if (@forbidden_list) {
439
1
2
          for (my $i=0; $i < scalar @forbidden_list; $i++) {
440
1
1
            my $hits = $forbidden_list[$i];
441
1
2
            if ($hits) {
442
1
2
              $forbidden_totals[$i] += $hits;
443            }
444          }
445        }
446      }
447      #print STDERR "$file (unrecognized: $unrecognized; unique: $unique; unknown: $unknown, words: $words, candidates: [".join(", ", @candidate_list)."])\n";
448    }
449
450
10
8
    report_timing($file, $start_time, $directory, 'unknown') if ($timing_report);
451
10
9
    my $kind = get_special($file, $special);
452    # These heuristics are very new and need tuning/feedback
453
10
16
    if (
454        ($unknown > $unique)
455        # || ($unrecognized > $words / 2)
456    ) {
457
0
0
      unless ($disable_noisy_file) {
458
0
0
        if ($kind eq 'file') {
459
0
0
          print SHOULD_EXCLUDE "$file\n";
460        }
461
0
0
        push @delayed_warnings, "$file:1:1 ... 1, Warning - Skipping `$file` because it seems to have more noise ($unknown) than unique words ($unique) (total: $unrecognized / $words). (noisy-$kind)\n";
462
0
0
        next;
463      }
464    }
465
10
81
    unless ($kind =~ /^file/ && -s "$directory/unknown") {
466
4
5
      next;
467    }
468
6
41
    open UNKNOWN, '<:utf8', "$directory/unknown";
469
6
54
    for $token (<UNKNOWN>) {
470
49
51
      $token =~ s/\R//;
471
49
68
      next unless $token =~ /./;
472
46
21
      my ($key, $char) = collate_key $token;
473
46
47
      $letter_map{$char} = () unless defined $letter_map{$char};
474
46
22
      my %word_map = ();
475
46
14
39
18
      %word_map = %{$letter_map{$char}{$key}} if defined $letter_map{$char}{$key};
476
46
36
      $word_map{$token} = 1;
477
46
61
      $letter_map{$char}{$key} = \%word_map;
478    }
479
6
23
    close UNKNOWN;
480  }
481
12
26
  close SHOULD_EXCLUDE;
482
12
8
  close TIMING_REPORT if $timing_report;
483
484  summarize_totals(
485    sub {
486
0
0
      my ($hits, $files, $context, $pattern) = @_;
487
0
0
      return "# hit-count: $hits$files\n$context$pattern\n\n",
488    },
489
12
39
    CheckSpelling::Util::get_file_from_env('candidate_summary', '/dev/stderr'),
490    \@candidates,
491    \@candidate_totals,
492    \@candidate_file_counts,
493  );
494
495  summarize_totals(
496    sub {
497
1
1
      my (undef, undef, $context, $pattern) = @_;
498
1
2
      $context =~ s/^# //gm;
499
1
1
      chomp $context;
500
1
1
      my $details;
501
1
3
      if ($context =~ /^(.*?)$(.*)/ms) {
502
1
1
        ($context, $details) = ($1, $2);
503
1
1
        $details = "\n$details" if $details;
504      }
505
1
1
      $context = 'Pattern' unless $context;
506
1
3
      return "#### $context$details\n```\n$pattern\n```\n\n";
507    },
508
12
35
    CheckSpelling::Util::get_file_from_env('forbidden_summary', '/dev/stderr'),
509    \@forbidden,
510    \@forbidden_totals,
511  );
512
513
12
34
  group_related_words;
514
515
12
12
  if (defined $ENV{'expect'}) {
516
11
11
    $ENV{'expect'} =~ /(.*)/;
517
11
12
    load_expect $1;
518
11
8
    harmonize_expect;
519  }
520
521
12
10
  my %seen = ();
522
12
16
  our %counters;
523
12
10
  %counters = ();
524
525
12
40
  if (-s $early_warnings) {
526
1
6
    open WARNINGS, '<:utf8', $early_warnings;
527
1
9
    for my $warning (<WARNINGS>) {
528
1
1
      chomp $warning;
529
1
1
      count_warning $warning;
530
1
1
      next if should_skip_warning $warning;
531
1
5
      print WARNING_OUTPUT "$warning\n";
532    }
533
1
3
    close WARNINGS;
534  }
535
536
12
5
  our %last_seen;
537
12
6
  my %unknown_file_word_count;
538
12
10
  for my $directory (@directories) {
539
11
32
    next unless (-s "$directory/warnings");
540
10
60
    next unless open(NAME, '<:utf8', "$directory/name");
541
10
40
    my $file=<NAME>;
542
10
20
    close NAME;
543
10
7
    my $kind = get_special($file, $special);
544
10
53
    open WARNINGS, '<:utf8', "$directory/warnings";
545
10
8
    if ($kind ne 'file-list') {
546
9
54
      for $warning (<WARNINGS>) {
547
55
45
        chomp $warning;
548
55
97
        if ($warning =~ m/:(\d+):(\d+ \.\.\. \d+): `(.*)`/) {
549
51
49
          my ($line, $range, $item) = ($1, $2, $3);
550
51
31
          my $wrapped = CheckSpelling::Util::wrap_in_backticks($item);
551
51
30
          my $reason = 'unrecognized-spelling';
552
51
30
          $reason .= "-$kind" unless $kind eq 'file';
553
51
118
          $warning =~ s/:\d+:\d+ \.\.\. \d+: `.*`/:$line:$range, Warning - $wrapped is not a recognized word. ($reason)/;
554
51
25
          next if log_skip_item($item, $file, $warning, $unknown_word_limit);
555        } else {
556
4
5
          if ($warning =~ /\`(.*?)\` in line\. \(token-is-substring\)/) {
557
0
0
            next if skip_item($1);
558          }
559
4
4
          count_warning $warning;
560        }
561
17
10
        next if should_skip_warning $warning;
562
17
64
        print WARNING_OUTPUT "$file$warning\n";
563      }
564    } else {
565
1
7
      for $warning (<WARNINGS>) {
566
6
5
        chomp $warning;
567
6
11
        next unless $warning =~ s/^:(\d+)/:1/;
568
6
4
        $file = $check_file_paths[$1];
569
6
19
        if ($warning =~ m/:(\d+ \.\.\. \d+): `(.*)`/) {
570
4
4
          my ($range, $item) = ($1, $2);
571
4
3
          my $wrapped = CheckSpelling::Util::wrap_in_backticks($item);
572
4
14
          $warning =~ s/:\d+ \.\.\. \d+: `.*`/:$range, Warning - $wrapped is not a recognized word. (check-file-path)/;
573
4
2
          next if skip_item($item);
574
4
4
          if (defined $unknown_file_word_limit) {
575
4
4
            next if ++$unknown_file_word_count{$item} > $unknown_file_word_limit;
576          }
577        }
578
5
1
        next if should_skip_warning $warning;
579
4
10
        print WARNING_OUTPUT "$file$warning\n";
580
4
4
        count_warning $warning;
581      }
582    }
583
10
37
    close WARNINGS;
584  }
585
12
270
  close MORE_WARNINGS;
586
587
12
11
  for my $warning (@delayed_warnings) {
588
1
1
    next if should_skip_warning $warning;
589
1
1
    count_warning $warning;
590
1
2
    print WARNING_OUTPUT $warning;
591  }
592
12
9
  if (defined $unknown_word_limit) {
593
1
2
    for my $warned_word (sort keys %last_seen) {
594
1
2
      my $warning_count = $seen{$warned_word} || 0;
595
1
2
      next unless $warning_count >= $unknown_word_limit;
596
0
0
      my $warning = $last_seen{$warned_word};
597
0
0
      $warning =~ s/\Q. (unrecognized-spelling)\E/ -- found $warning_count times. (limited-references)\n/;
598
0
0
      next if should_skip_warning $warning;
599
0
0
      print WARNING_OUTPUT $warning;
600
0
0
      count_warning $warning;
601    }
602  }
603
12
267
  close WARNING_OUTPUT;
604
605
12
14
  if (%counters) {
606
2
2
    my $continue='';
607
2
3
    print COUNTER_SUMMARY "{\n";
608
2
4
    for my $code (sort keys %counters) {
609
4
6
      print COUNTER_SUMMARY qq<$continue"$code": $counters{$code}\n>;
610
4
3
      $continue=',';
611    }
612
2
3
    print COUNTER_SUMMARY "}\n";
613  }
614
12
62
  close COUNTER_SUMMARY;
615
616  # display the current unknown
617
12
36
  for my $char (sort keys %letter_map) {
618
43
43
23
93
    for $key (sort CheckSpelling::Util::case_biased keys(%{$letter_map{$char}})) {
619
29
29
30
40
      my %word_map = %{$letter_map{$char}{$key}};
620
29
49
      my @words = keys(%word_map);
621
29
15
      if (scalar(@words) > 1) {
622
13
21
13
70
        print $key." (".(join ", ", sort { length($a) <=> length($b) || $a cmp $b } @words).")";
623      } else {
624
16
56
        print $words[0];
625      }
626
29
106
      print "\n";
627    }
628  }
629}
630
6311;