File Coverage

File:lib/CheckSpelling/SpellingCollator.pm
Coverage:84.4%

linestmtbrancondsubtimecode
1#! -*-perl-*-
2
3package CheckSpelling::SpellingCollator;
4
5our $VERSION='0.1.0';
6
1
1
1
107188
2
24
use warnings;
7
1
1
1
1
1
23
use File::Path qw(remove_tree);
8
1
1
1
213
1
2273
use CheckSpelling::Util;
9
10my %letter_map;
11my %ignored_event_map;
12my $disable_word_collating;
13
14my %last_seen;
15
16sub get_field {
17
40
31
  my ($record, $field) = @_;
18
40
445
  return 0 unless $record =~ (/\b$field:\s*(\d+)/);
19
28
34
  return $1;
20}
21
22sub get_array {
23
2
1
  my ($record, $field) = @_;
24
2
14
  return () unless $record =~ (/\b$field: \[([^\]]+)\]/);
25
2
3
  my $values = $1;
26
2
3
  return split /\s*,\s*/, $values;
27}
28
29sub maybe {
30
7
5
  my ($next, $value) = @_;
31
7
7
  $next = $value unless $next && $next < $value;
32
7
5
  return $next;
33}
34
35my %expected = ();
36sub expect_item {
37
104
65
  my ($item, $value) = @_;
38
104
34
  our %expected;
39
104
39
  my $next;
40
104
108
  if (defined $expected{$item}) {
41
26
28
    $next = $expected{$item};
42
26
19
    $next = $value if $value < $next;
43  } elsif ($item =~ /^([A-Z])(.*)/) {
44
12
13
    $item = $1 . lc $2;
45
12
11
    if (defined $expected{$item}) {
46
2
3
      $next = $expected{$item};
47
2
2
      $next = maybe($next, $value + .1);
48    } else {
49
10
4
      $item = lc $item;
50
10
7
      if (defined $expected{$item}) {
51
5
3
        $next = $expected{$item};
52
5
3
        $next = maybe($next, $value + .2);
53      }
54    }
55  }
56
104
94
  return 0 unless defined $next;
57
33
20
  $expected{$item} = $next;
58
33
65
  return $value;
59}
60
61sub skip_item {
62
55
29
  my ($word) = @_;
63
55
33
  return 1 if expect_item($word, 1);
64
35
25
  my $key = lc $word;
65
35
34
  return 2 if expect_item($key, 2);
66
35
44
  if ($key =~ /.s$/) {
67
2
2
    if ($key =~ /ies$/) {
68
1
2
      $key =~ s/ies$/y/;
69    } else {
70
1
2
      $key =~ s/s$//;
71    }
72  } elsif ($key =~ /^(.+[^aeiou])ed$/) {
73
1
1
    $key = $1;
74  } elsif ($key =~ /^(.+)'[ds]$/) {
75
6
5
    $key = $1;
76  } else {
77
26
22
    return 0;
78  }
79
9
5
  return 3 if expect_item($key, 3);
80
0
0
  return 0;
81}
82
83sub should_skip_warning {
84
75
69
  my ($warning) = @_;
85
75
95
  if ($warning =~ /\(([-\w]+)\)$/) {
86
71
45
    my ($code) = ($1);
87
71
31
    our %ignored_event_map;
88
71
52
    return 1 if $ignored_event_map{$code};
89  }
90
74
60
  return 0;
91}
92
93sub log_skip_item {
94
51
59
  my ($item, $file, $warning, $unknown_word_limit) = @_;
95
51
37
  return 1 if should_skip_warning $warning;
96
51
27
  return 1 if skip_item($item);
97
22
8
  our %seen;
98
22
15
  my $seen_count = $seen{$item};
99
22
15
  if (defined $seen_count) {
100
9
12
    if (!defined $unknown_word_limit || ($seen_count++ < $unknown_word_limit)) {
101
8
37
      print MORE_WARNINGS "$file$warning\n";
102    } else {
103
1
0
      our %last_seen;
104
1
2
      $last_seen{$item} = "$file$warning";
105    }
106
9
10
    $seen{$item} = $seen_count;
107
9
14
    return 1;
108  }
109
13
11
  $seen{$item} = 1;
110
13
10
  return 0;
111}
112
113sub stem_word {
114
22
14
  my ($key) = @_;
115
22
29
  our $disable_word_collating;
116
22
12
  return $key if $disable_word_collating;
117
118
22
19
  if ($key =~ /.s$/) {
119
3
4
    if ($key =~ /ies$/) {
120
1
4
      $key =~ s/ies$/y/;
121    } else {
122
2
5
      $key =~ s/s$//;
123    }
124  } elsif ($key =~ /.[^aeiou]ed$/) {
125
1
3
    $key =~ s/ed$//;
126  }
127
22
21
  return $key;
128}
129
130sub collate_key {
131
86
50
  my ($key) = @_;
132
86
37
  our $disable_word_collating;
133
86
35
  my $char;
134
86
44
  if ($disable_word_collating) {
135
16
14
    $char = lc substr $key, 0, 1;
136  } else {
137
70
51
    $key = lc $key;
138
70
45
    $key =~ s/''+/'/g;
139
70
40
    $key =~ s/'[sd]$//;
140
70
37
    $key =~ s/^[^Ii]?'+(.*)/$1/;
141
70
32
    $key =~ s/(.*?)'$/$1/;
142
70
63
    $char = substr $key, 0, 1;
143  }
144
86
95
  return ($key, $char);
145}
146
147sub load_expect {
148
12
458
  my ($expect) = @_;
149
12
6
  our %expected;
150
12
11
  %expected = ();
151
12
99
  if (open(EXPECT, '<:utf8', $expect)) {
152
12
80
    while (my $word = <EXPECT>) {
153
43
54
      $word =~ s/\R//;
154
43
83
      $expected{$word} = 0;
155    }
156
12
37
    close EXPECT;
157  }
158}
159
160sub harmonize_expect {
161
11
6
  our $disable_word_collating;
162
11
5
  our %letter_map;
163
11
3
  our %expected;
164
165
11
17
  for my $word (keys %expected) {
166
40
20
    my ($key, $char) = collate_key $word;
167
40
29
    my %word_map = ();
168
40
43
    next unless defined $letter_map{$char}{$key};
169
15
15
6
17
    %word_map = %{$letter_map{$char}{$key}};
170
15
18
    next if defined $word_map{$word};
171
3
2
    my $words = scalar keys %word_map;
172
3
7
    next if $words > 2;
173
3
2
    if ($word eq $key) {
174
1
2
      next if ($words > 1);
175    }
176
2
2
    delete $expected{$word};
177  }
178}
179
180sub group_related_words {
181
12
5
  our %letter_map;
182
12
5
  our $disable_word_collating;
183
12
41
  return if $disable_word_collating;
184
185  # group related words
186
10
20
  for my $char (sort CheckSpelling::Util::number_biased keys %letter_map) {
187
19
19
7
21
    for my $plural_key (sort keys(%{$letter_map{$char}})) {
188
22
9
      my $key = stem_word $plural_key;
189
22
20
      next if $key eq $plural_key;
190
4
4
      next unless defined $letter_map{$char}{$key};
191
3
3
13
5
      my %word_map = %{$letter_map{$char}{$key}};
192
3
3
2
4
      for $word (keys(%{$letter_map{$char}{$plural_key}})) {
193
3
1
        $word_map{$word} = 1;
194      }
195
3
4
      $letter_map{$char}{$key} = \%word_map;
196
3
4
      delete $letter_map{$char}{$plural_key};
197    }
198  }
199}
200
201sub count_warning {
202
13
9
  my ($warning) = @_;
203
13
5
  our %counters;
204
13
8
  our %ignored_event_map;
205
13
19
  if ($warning =~ /\(([-\w]+)\)$/) {
206
8
7
    my ($code) = ($1);
207
8
5
    next if defined $ignored_event_map{$code};
208
8
11
    ++$counters{$code};
209  }
210}
211
212sub report_timing {
213
0
0
  my ($name, $start_time, $directory, $marker) = @_;
214
0
0
  my $end_time = (stat "$directory/$marker")[9];
215
0
0
  $name =~ s/"/\\"/g;
216
0
0
  print TIMING_REPORT "\"$name\", $start_time, $end_time\n";
217}
218
219sub get_pattern_with_context {
220
24
13
  my ($path) = @_;
221
24
26
  return unless defined $ENV{$path};
222
12
12
  $ENV{$path} =~ /(.*)/;
223
12
71
  return unless open ITEMS, '<:utf8', $1;
224
225
12
5
  my @items;
226
12
8
  my $context = '';
227
12
59
  while (<ITEMS>) {
228
2
2
    my $pattern = $_;
229
2
3
    if ($pattern =~ /^#/) {
230
1
2
      if ($pattern =~ /^# /) {
231
1
2
        $context .= $pattern;
232      } else {
233
0
0
        $context = '';
234      }
235
1
2
      next;
236    }
237
1
1
    chomp $pattern;
238
1
2
    unless ($pattern =~ /./) {
239
0
0
      $context = '';
240
0
0
      next;
241    }
242
1
1
    push @items, $context.$pattern;
243
1
3
    $context = '';
244  }
245
12
31
  close ITEMS;
246
12
12
  return @items;
247}
248
249sub summarize_totals {
250
24
20
  my ($formatter, $path, $items, $totals, $file_counts) = @_;
251
24
24
8
26
  return unless @{$totals};
252
1
20
  return unless open my $fh, '>:utf8', $path;
253
1
1
0
1
  my $totals_count = scalar(@{$totals}) - 1;
254
1
1
  my @indices;
255
1
1
  if ($file_counts) {
256    @indices = sort {
257
0
0
0
0
      $totals->[$b] <=> $totals->[$a] ||
258      $file_counts->[$b] <=> $file_counts->[$a]
259    } 0 .. $totals_count;
260  } else {
261    @indices = sort {
262
1
0
2
0
      $totals->[$b] <=> $totals->[$a]
263    } 0 .. $totals_count;
264  }
265
1
1
  for my $i (@indices) {
266
1
1
    last unless $totals->[$i] > 0;
267
1
0
    my $rule_with_context = $items->[$i];
268
1
1
    my ($description, $rule);
269
1
3
    if ($rule_with_context =~ /^(.*\n)([^\n]+)$/s) {
270
1
1
      ($description, $rule) = ($1, $2);
271    } else {
272
0
0
      ($description, $rule) = ('', $rule_with_context);
273    }
274
1
2
    print $fh $formatter->(
275      $totals->[$i],
276      ($file_counts ? " file-count: $file_counts->[$i]" : ""),
277      $description,
278      $rule
279    );
280  }
281
1
37
  close $fh;
282}
283
284sub get_special {
285
20
14
  my ($file, $special) = @_;
286
20
29
  return 'file-list' if $file eq $special->{'file_list'};
287
18
19
  return 'pr-title' if $file eq $special->{'pr_title_file'};
288
16
15
  return 'pr-description' if $file eq $special->{'pr_description_file'};
289
14
29
  return 'commit-message' if !rindex($file, $special->{'commit_messages'});
290
12
18
  return 'file';
291}
292
293sub main {
294
12
20380
  my @directories;
295  my @cleanup_directories;
296
12
0
  my @check_file_paths;
297
298
12
13
  my $early_warnings = CheckSpelling::Util::get_file_from_env('early_warnings', '/dev/null');
299
12
10
  my $warning_output = CheckSpelling::Util::get_file_from_env('warning_output', '/dev/stderr');
300
12
7
  my $more_warnings = CheckSpelling::Util::get_file_from_env('more_warnings', '/dev/stderr');
301
12
7
  my $counter_summary = CheckSpelling::Util::get_file_from_env('counter_summary', '/dev/stderr');
302
12
7
  my $ignored_events = CheckSpelling::Util::get_file_from_env('ignored_events', '');
303
12
13
  if ($ignored_events) {
304
5
3
    our %ignored_event_map;
305
5
5
    for my $event (split /,/, $ignored_events) {
306
5
6
      $ignored_event_map{$event} = 1;
307    }
308  }
309
12
9
  my $should_exclude_file = CheckSpelling::Util::get_file_from_env('should_exclude_file', '/dev/null');
310
12
8
  my $unknown_word_limit = CheckSpelling::Util::get_val_from_env('unknown_word_limit', undef);
311
12
8
  my $unknown_file_word_limit = CheckSpelling::Util::get_val_from_env('unknown_file_word_limit', undef);
312
12
9
  my $candidate_example_limit = CheckSpelling::Util::get_file_from_env('INPUT_CANDIDATE_EXAMPLE_LIMIT', '3');
313
12
8
  my $disable_flags = CheckSpelling::Util::get_file_from_env('INPUT_DISABLE_CHECKS', '');
314
12
8
  my $only_check_changed_files = CheckSpelling::Util::get_file_from_env('INPUT_ONLY_CHECK_CHANGED_FILES', '');
315
12
8
  my $disable_noisy_file = $disable_flags =~ /(?:^|,|\s)noisy-file(?:,|\s|$)/;
316
12
30
  our $disable_word_collating = $only_check_changed_files || $disable_flags =~ /(?:^|,|\s)word-collating(?:,|\s|$)/;
317
12
8
  my $file_list = CheckSpelling::Util::get_file_from_env('check_file_names', '');
318
12
7
  my $pr_title_file = CheckSpelling::Util::get_file_from_env('pr_title_file', '');
319
12
10
  my $pr_description_file = CheckSpelling::Util::get_file_from_env('pr_description_file', '');
320
12
9
  my $commit_messages = CheckSpelling::Util::get_file_from_env('commit_messages', '');
321
12
7
  my $timing_report = CheckSpelling::Util::get_file_from_env('timing_report', '');
322
12
18
  my $special = {
323    'file_list' => $file_list,
324    'pr_title_file' => $pr_title_file,
325    'pr_description_file' => $pr_description_file,
326    'commit_messages' => $commit_messages,
327  };
328
12
9
  my ($start_time, $end_time);
329
330
12
235
  open WARNING_OUTPUT, '>:utf8', $warning_output;
331
12
174
  open MORE_WARNINGS, '>:utf8', $more_warnings;
332
12
148
  open COUNTER_SUMMARY, '>:utf8', $counter_summary;
333
12
110
  open SHOULD_EXCLUDE, '>:utf8', $should_exclude_file;
334
12
9
  if ($timing_report) {
335
0
0
    open TIMING_REPORT, '>:utf8', $timing_report;
336
0
0
    print TIMING_REPORT "file, start, finish\n";
337  }
338
339
12
9
  my @candidates = get_pattern_with_context('candidates_path');
340
12
8
  my @candidate_totals = (0) x scalar @candidates;
341
12
6
  my @candidate_file_counts = (0) x scalar @candidates;
342
343
12
9
  my @forbidden = get_pattern_with_context('forbidden_path');
344
12
7
  my @forbidden_totals = (0) x scalar @forbidden;
345
346
12
6
  my @delayed_warnings;
347
12
22
  our %letter_map = ();
348
349
12
6
  my %file_map = ();
350
351
12
27
  for my $directory (<>) {
352
15
13
    chomp $directory;
353
15
26
    next unless $directory =~ /^(.*)$/;
354
15
11
    $directory = $1;
355
15
49
    unless (-e $directory) {
356
1
3
      print STDERR "Could not find: $directory\n";
357
1
1
      next;
358    }
359
14
31
    unless (-d $directory) {
360
1
10
      print STDERR "Not a directory: $directory\n";
361
1
1
      next;
362    }
363
364    # if there's no filename, we can't report
365
13
82
    next unless open(NAME, '<:utf8', "$directory/name");
366
12
62
    my $file=<NAME>;
367
12
22
    close NAME;
368
369
12
25
    $file_map{$file} = $directory;
370  }
371
372
12
23
  for my $file (sort keys %file_map) {
373
12
10
    my $directory = $file_map{$file};
374
12
12
    if ($timing_report) {
375
0
0
      $start_time = (stat "$directory/name")[9];
376    }
377
378
12
53
    if (-e "$directory/skipped") {
379
1
7
      open SKIPPED, '<:utf8', "$directory/skipped";
380
1
11
      my $reason=<SKIPPED>;
381
1
3
      close SKIPPED;
382
1
1
      chomp $reason;
383
1
3
      push @delayed_warnings, "$file:1:1 ... 1, Warning - Skipping `$file` because $reason\n";
384
1
2
      print SHOULD_EXCLUDE "$file\n";
385
1
1
      push @cleanup_directories, $directory;
386
1
23
      report_timing($file, $start_time, $directory, 'skipped') if ($timing_report);
387
1
3
      next;
388    }
389
390
11
8
    push @directories, $directory;
391    # stats isn't written if there was nothing interesting in the file
392
11
33
    unless (-s "$directory/stats") {
393
1
1
      report_timing($file, $start_time, $directory, 'warnings') if ($timing_report);
394
1
1
      next;
395    }
396
397
10
9
    if ($file eq $file_list) {
398
1
6
      open FILE_LIST, '<:utf8', $file_list;
399
1
1
      push @check_file_paths, '0 placeholder';
400
1
6
      for my $check_file_path (<FILE_LIST>) {
401
4
2
        chomp $check_file_path;
402
4
4
        push @check_file_paths, $check_file_path;
403      }
404
1
3
      close FILE_LIST;
405    }
406
407
10
6
    my ($words, $unrecognized, $unknown, $unique);
408
409    {
410
10
10
6
51
      open STATS, '<:utf8', "$directory/stats";
411
10
40
      my $stats=<STATS>;
412
10
18
      close STATS;
413
10
8
      $words=get_field($stats, 'words');
414
10
7
      $unrecognized=get_field($stats, 'unrecognized');
415
10
9
      $unknown=get_field($stats, 'unknown');
416
10
5
      $unique=get_field($stats, 'unique');
417
10
4
      my @candidate_list;
418
10
8
      if (@candidate_totals) {
419
0
0
        @candidate_list=get_array($stats, 'candidates');
420
0
0
        my @lines=get_array($stats, 'candidate_lines');
421
0
0
        if (@candidate_list) {
422
0
0
          for (my $i=0; $i < scalar @candidate_list; $i++) {
423
0
0
            my $hits = $candidate_list[$i];
424
0
0
            if ($hits) {
425
0
0
              $candidate_totals[$i] += $hits;
426
0
0
              if ($candidate_file_counts[$i]++ < $candidate_example_limit) {
427
0
0
                my $pattern = (split /\n/,$candidates[$i])[-1];
428
0
0
                my $position = $lines[$i];
429
0
0
                $position =~ s/:(\d+)$/ ... $1/;
430
0
0
                my $wrapped = CheckSpelling::Util::wrap_in_backticks($pattern);
431
0
0
                push @delayed_warnings, "$file:$position, Notice - Line matches candidate pattern $wrapped (candidate-pattern)\n";
432              }
433            }
434          }
435        }
436      }
437
10
10
      if (@forbidden_totals) {
438
1
1
        @forbidden_list=get_array($stats, 'forbidden');
439
1
1
        my @lines=get_array($stats, 'forbidden_lines');
440
1
2
        if (@forbidden_list) {
441
1
1
          for (my $i=0; $i < scalar @forbidden_list; $i++) {
442
1
1
            my $hits = $forbidden_list[$i];
443
1
1
            if ($hits) {
444
1
2
              $forbidden_totals[$i] += $hits;
445            }
446          }
447        }
448      }
449      #print STDERR "$file (unrecognized: $unrecognized; unique: $unique; unknown: $unknown, words: $words, candidates: [".join(", ", @candidate_list)."])\n";
450    }
451
452
10
10
    report_timing($file, $start_time, $directory, 'unknown') if ($timing_report);
453
10
6
    my $kind = get_special($file, $special);
454    # These heuristics are very new and need tuning/feedback
455
10
11
    if (
456        ($unknown > $unique)
457        # || ($unrecognized > $words / 2)
458    ) {
459
0
0
      unless ($disable_noisy_file) {
460
0
0
        if ($kind eq 'file') {
461
0
0
          print SHOULD_EXCLUDE "$file\n";
462        }
463
0
0
        $warning = "noisy-$kind";
464
0
0
        count_warning $warning;
465
0
0
        push @delayed_warnings, "$file:1:1 ... 1, Warning - Skipping `$file` because it seems to have more noise ($unknown) than unique words ($unique) (total: $unrecognized / $words). ($warning)\n";
466
0
0
        next;
467      }
468    }
469
10
43
    unless ($kind =~ /^file/ && -s "$directory/unknown") {
470
4
4
      next;
471    }
472
6
40
    open UNKNOWN, '<:utf8', "$directory/unknown";
473
6
50
    for $token (<UNKNOWN>) {
474
49
47
      $token =~ s/\R//;
475
49
45
      next unless $token =~ /./;
476
46
30
      my ($key, $char) = collate_key $token;
477
46
55
      $letter_map{$char} = () unless defined $letter_map{$char};
478
46
18
      my %word_map = ();
479
46
14
41
19
      %word_map = %{$letter_map{$char}{$key}} if defined $letter_map{$char}{$key};
480
46
40
      $word_map{$token} = 1;
481
46
63
      $letter_map{$char}{$key} = \%word_map;
482    }
483
6
22
    close UNKNOWN;
484  }
485
12
28
  close SHOULD_EXCLUDE;
486
12
9
  close TIMING_REPORT if $timing_report;
487
488  summarize_totals(
489    sub {
490
0
0
      my ($hits, $files, $context, $pattern) = @_;
491
0
0
      return "# hit-count: $hits$files\n$context$pattern\n\n",
492    },
493
12
30
    CheckSpelling::Util::get_file_from_env('candidate_summary', '/dev/stderr'),
494    \@candidates,
495    \@candidate_totals,
496    \@candidate_file_counts,
497  );
498
499  summarize_totals(
500    sub {
501
1
1
      my (undef, undef, $context, $pattern) = @_;
502
1
2
      $context =~ s/^# //gm;
503
1
1
      chomp $context;
504
1
0
      my $details;
505
1
3
      if ($context =~ /^(.*?)$(.*)/ms) {
506
1
1
        ($context, $details) = ($1, $2);
507
1
1
        $details = "\n$details" if $details;
508      }
509
1
1
      $context = 'Pattern' unless $context;
510
1
3
      return "#### $context$details\n```\n$pattern\n```\n\n";
511    },
512
12
34
    CheckSpelling::Util::get_file_from_env('forbidden_summary', '/dev/stderr'),
513    \@forbidden,
514    \@forbidden_totals,
515  );
516
517
12
35
  group_related_words;
518
519
12
12
  if (defined $ENV{'expect'}) {
520
11
10
    $ENV{'expect'} =~ /(.*)/;
521
11
11
    load_expect $1;
522
11
9
    harmonize_expect;
523  }
524
525
12
8
  my %seen = ();
526
12
6
  our %counters;
527
12
7
  %counters = ();
528
529
12
37
  if (-s $early_warnings) {
530
1
7
    open WARNINGS, '<:utf8', $early_warnings;
531
1
6
    for my $warning (<WARNINGS>) {
532
1
1
      chomp $warning;
533
1
1
      count_warning $warning;
534
1
1
      next if should_skip_warning $warning;
535
1
5
      print WARNING_OUTPUT "$warning\n";
536    }
537
1
3
    close WARNINGS;
538  }
539
540
12
6
  our %last_seen;
541
12
6
  my %unknown_file_word_count;
542
12
7
  for my $directory (@directories) {
543
11
33
    next unless (-s "$directory/warnings");
544
10
62
    next unless open(NAME, '<:utf8', "$directory/name");
545
10
31
    my $file=<NAME>;
546
10
18
    close NAME;
547
10
6
    my $kind = get_special($file, $special);
548
10
53
    open WARNINGS, '<:utf8', "$directory/warnings";
549
10
7
    if ($kind ne 'file-list') {
550
9
56
      for $warning (<WARNINGS>) {
551
55
42
        chomp $warning;
552
55
96
        if ($warning =~ m/:(\d+):(\d+ \.\.\. \d+): `(.*)`/) {
553
51
52
          my ($line, $range, $item) = ($1, $2, $3);
554
51
36
          my $wrapped = CheckSpelling::Util::wrap_in_backticks($item);
555
51
29
          my $reason = 'unrecognized-spelling';
556
51
31
          $reason .= "-$kind" unless $kind eq 'file';
557
51
107
          $warning =~ s/:\d+:\d+ \.\.\. \d+: `.*`/:$line:$range, Warning - $wrapped is not a recognized word. ($reason)/;
558
51
37
          next if log_skip_item($item, $file, $warning, $unknown_word_limit);
559
13
21
          count_warning $warning if $kind ne 'file';
560        } else {
561
4
4
          if ($warning =~ /\`(.*?)\` in line\. \(token-is-substring\)/) {
562
0
0
            next if skip_item($1);
563          }
564
4
4
          count_warning $warning;
565        }
566
17
13
        next if should_skip_warning $warning;
567
17
63
        print WARNING_OUTPUT "$file$warning\n";
568      }
569    } else {
570
1
8
      for $warning (<WARNINGS>) {
571
6
4
        chomp $warning;
572
6
12
        next unless $warning =~ s/^:(\d+)/:1/;
573
6
5
        $file = $check_file_paths[$1];
574
6
16
        if ($warning =~ m/:(\d+ \.\.\. \d+): `(.*)`/) {
575
4
3
          my ($range, $item) = ($1, $2);
576
4
5
          my $wrapped = CheckSpelling::Util::wrap_in_backticks($item);
577
4
10
          $warning =~ s/:\d+ \.\.\. \d+: `.*`/:$range, Warning - $wrapped is not a recognized word. (check-file-path)/;
578
4
5
          next if skip_item($item);
579
4
3
          if (defined $unknown_file_word_limit) {
580
4
6
            next if ++$unknown_file_word_count{$item} > $unknown_file_word_limit;
581          }
582        }
583
5
3
        next if should_skip_warning $warning;
584
4
10
        print WARNING_OUTPUT "$file$warning\n";
585
4
4
        count_warning $warning;
586      }
587    }
588
10
40
    close WARNINGS;
589  }
590
12
277
  close MORE_WARNINGS;
591
592
12
13
  for my $warning (@delayed_warnings) {
593
1
1
    next if should_skip_warning $warning;
594
1
1
    count_warning $warning;
595
1
1
    print WARNING_OUTPUT $warning;
596  }
597
12
9
  if (defined $unknown_word_limit) {
598
1
2
    for my $warned_word (sort keys %last_seen) {
599
1
3
      my $warning_count = $seen{$warned_word} || 0;
600
1
1
      next unless $warning_count >= $unknown_word_limit;
601
0
0
      my $warning = $last_seen{$warned_word};
602
0
0
      $warning =~ s/\Q. (unrecognized-spelling)\E/ -- found $warning_count times. (limited-references)\n/;
603
0
0
      next if should_skip_warning $warning;
604
0
0
      print WARNING_OUTPUT $warning;
605
0
0
      count_warning $warning;
606    }
607  }
608
12
265
  close WARNING_OUTPUT;
609
610
12
15
  if (%counters) {
611
2
1
    my $continue='';
612
2
3
    print COUNTER_SUMMARY "{\n";
613
2
4
    for my $code (sort keys %counters) {
614
4
6
      print COUNTER_SUMMARY qq<$continue"$code": $counters{$code}\n>;
615
4
3
      $continue=',';
616    }
617
2
2
    print COUNTER_SUMMARY "}\n";
618  }
619
12
64
  close COUNTER_SUMMARY;
620
621  # display the current unknown
622
12
37
  for my $char (sort keys %letter_map) {
623
43
43
21
114
    for $key (sort CheckSpelling::Util::case_biased keys(%{$letter_map{$char}})) {
624
29
29
11
58
      my %word_map = %{$letter_map{$char}{$key}};
625
29
28
      my @words = keys(%word_map);
626
29
18
      if (scalar(@words) > 1) {
627
13
18
11
75
        print $key." (".(join ", ", sort { length($a) <=> length($b) || $a cmp $b } @words).")";
628      } else {
629
16
70
        print $words[0];
630      }
631
29
116
      print "\n";
632    }
633  }
634}
635
6361;