File Coverage

File:lib/CheckSpelling/SpellingCollator.pm
Coverage:89.7%

linestmtbrancondsubtimecode
1#! -*-perl-*-
2
3package CheckSpelling::SpellingCollator;
4
5our $VERSION='0.1.0';
6
2
2
110381
4
use 5.022;
7
2
2
2
127
129
6
use utf8;
8
2
2
2
30
0
101
use feature 'unicode_strings';
9
2
2
2
4
1
35
use warnings;
10
2
2
2
3
1
49
use File::Path qw(remove_tree);
11
2
2
2
385
2
6362
use CheckSpelling::Util;
12
13my %letter_map;
14my $disable_word_collating;
15my $shortest_word;
16
17my %last_seen;
18
19sub get_field {
20
40
32
  my ($record, $field) = @_;
21
40
392
  return 0 unless $record =~ (/\b$field:\s*(\d+)/);
22
30
31
  return $1;
23}
24
25sub get_array {
26
4
3
  my ($record, $field) = @_;
27
4
30
  return () unless $record =~ (/\b$field: \[([^\]]+)\]/);
28
4
2
  my $values = $1;
29
4
7
  return split /\s*,\s*/, $values;
30}
31
32sub maybe {
33
7
6
  my ($next, $value) = @_;
34
7
12
  $next = $value unless $next && $next < $value;
35
7
7
  return $next;
36}
37
38my %expected = ();
39sub expect_item {
40
94
55
  my ($item, $value) = @_;
41
94
52
  $item =~ s/’/'/g;
42
94
48
  our %expected;
43
94
39
  my $next;
44
94
97
  if (defined $expected{$item}) {
45
22
7
    $next = $expected{$item};
46
22
19
    $next = $value if $value < $next;
47  } elsif ($item =~ /^([A-Z])(.*)/) {
48
12
8
    $item = $1 . lc $2;
49
12
6
    if (defined $expected{$item}) {
50
2
2
      $next = $expected{$item};
51
2
1
      $next = maybe($next, $value + .1);
52    } else {
53
10
7
      $item = lc $item;
54
10
5
      if (defined $expected{$item}) {
55
5
12
        $next = $expected{$item};
56
5
5
        $next = maybe($next, $value + .2);
57      }
58    }
59  }
60
94
82
  return 0 unless defined $next;
61
29
21
  $expected{$item} = $next;
62
29
60
  return $value;
63}
64
65sub skip_item {
66
50
33
  my ($word) = @_;
67
50
22
  return 1 if expect_item($word, 1);
68
32
16
  my $key = lc $word;
69
32
23
  return 2 if expect_item($key, 2);
70
32
49
  if ($key =~ /.s$/) {
71
2
3
    if ($key =~ /ies$/) {
72
1
1
      $key =~ s/ies$/y/;
73    } else {
74
1
2
      $key =~ s/s$//;
75    }
76  } elsif ($key =~ /^(.+[^aeiou])ed$/) {
77
1
1
    $key = $1;
78  } elsif ($key =~ /^(.+)'[ds]$/) {
79
4
2
    $key = $1;
80  } else {
81
25
23
    return 0;
82  }
83
7
5
  return 3 if expect_item($key, 3);
84
0
0
  return 0;
85}
86
87sub should_skip_warning {
88
63
42
  my ($warning) = @_;
89
63
76
  if ($warning =~ /\(([-\w]+)\)$/) {
90
59
36
    my ($code) = ($1);
91
59
33
    return 1 if CheckSpelling::Util::is_ignoring_event($code);
92  }
93
63
54
  return 0;
94}
95
96sub should_skip_warning_while_counting {
97
11
10
  my ($warning) = @_;
98
11
21
  return 0 unless ($warning =~ /\(([-\w]+)\)$/);
99
11
8
  my ($code) = ($1);
100
11
10
  return 1 if CheckSpelling::Util::is_ignoring_event($code);
101
102
9
3
  our %counters;
103
9
9
  ++$counters{$code};
104
9
9
  return 0;
105}
106
107sub log_skip_item {
108
46
53
  my ($item, $file, $warning, $unknown_word_limit) = @_;
109
46
25
  return 1 if should_skip_warning $warning;
110
46
20
  return 1 if skip_item($item);
111
21
9
  our %seen;
112
21
15
  my $seen_count = $seen{$item};
113
21
12
  if (defined $seen_count) {
114
8
8
    if (!defined $unknown_word_limit || ($seen_count++ < $unknown_word_limit)) {
115
7
34
      print MORE_WARNINGS "$file$warning\n";
116    } else {
117
1
1
      our %last_seen;
118
1
1
      $last_seen{$item} = "$file$warning";
119    }
120
8
8
    $seen{$item} = $seen_count;
121
8
17
    return 1;
122  }
123
13
12
  $seen{$item} = 1;
124
13
13
  return 0;
125}
126
127sub stem_word {
128
28
848
  my ($key) = @_;
129
28
8
  our $disable_word_collating;
130
28
22
  return $key if $disable_word_collating;
131
28
6
  our $shortest_word;
132
28
23
  $shortest_word = 2 unless defined $shortest_word;
133
28
12
  my $key_length = length $key;
134
135
28
45
  if ($key =~ /.s$/) {
136
5
37
    if ($key_length > ($shortest_word + 1) && $key =~ /ies$/) {
137
1
2
      $key =~ s/ies$/y/;
138    } elsif ($key_length > $shortest_word && $key !~ /ies$/) {
139
3
6
      $key =~ s/s$//;
140    }
141  } elsif ($key_length > ($shortest_word + 1) && $key =~ /.[^aeiou]ed$/) {
142
2
8
    $key =~ s/ed$//;
143  }
144
28
29
  return $key;
145}
146
147sub collate_key {
148
81
46
  my ($key) = @_;
149
81
34
  our $disable_word_collating;
150
81
31
  my $char;
151
81
51
  if ($disable_word_collating) {
152
8
9
    $char = lc substr $key, 0, 1;
153  } else {
154
73
44
    $key = lc $key;
155
73
57
    $key =~ s/''+/'/g;
156
73
35
    $key =~ s/'[sd]$//;
157
73
42
    $key =~ s/^[^Ii]?'+(.*)/$1/;
158
73
31
    $key =~ s/(.*?)'$/$1/;
159
73
66
    $char = substr $key, 0, 1;
160  }
161
81
94
  return ($key, $char);
162}
163
164sub load_expect {
165
12
509
  my ($expect) = @_;
166
12
6
  our %expected;
167
12
15
  %expected = ();
168
12
98
  if (open(EXPECT, '<:utf8', $expect)) {
169
12
64
    while (my $word = <EXPECT>) {
170
43
59
      $word =~ s/\R//;
171
43
96
      $expected{$word} = 0;
172    }
173
12
32
    close EXPECT;
174  }
175}
176
177sub harmonize_expect {
178
11
6
  our $disable_word_collating;
179
11
5
  our %letter_map;
180
11
6
  our %expected;
181
182
11
40
  for my $word (keys %expected) {
183
40
27
    my ($key, $char) = collate_key $word;
184
40
22
    my %word_map = ();
185
40
49
    next unless defined $letter_map{$char}{$key};
186
13
13
7
15
    %word_map = %{$letter_map{$char}{$key}};
187
13
16
    next if defined $word_map{$word};
188
3
1
    my $words = scalar keys %word_map;
189
3
3
    next if $words > 2;
190
3
3
    if ($word eq $key) {
191
1
1
      next if ($words > 1);
192    }
193
2
2
    delete $expected{$word};
194  }
195}
196
197sub group_related_words {
198
12
6
  our %letter_map;
199
12
5
  our $disable_word_collating;
200
12
9
  return if $disable_word_collating;
201
202  # group related words
203
11
25
  for my $char (sort CheckSpelling::Util::number_biased keys %letter_map) {
204
19
19
5
23
    for my $plural_key (sort keys(%{$letter_map{$char}})) {
205
22
9
      my $key = stem_word $plural_key;
206
22
25
      next if $key eq $plural_key;
207
4
5
      next unless defined $letter_map{$char}{$key};
208
3
3
2
7
      my %word_map = %{$letter_map{$char}{$key}};
209
3
3
14
4
      for my $word (keys(%{$letter_map{$char}{$plural_key}})) {
210
3
3
        $word_map{$word} = 1;
211      }
212
3
3
      $letter_map{$char}{$key} = \%word_map;
213
3
5
      delete $letter_map{$char}{$plural_key};
214    }
215  }
216}
217
218sub count_warning {
219
9
9
  my ($warning) = @_;
220
9
3
  our %counters;
221
9
20
  if ($warning =~ /\(([-\w]+)\)$/) {
222
3
3
    my ($code) = ($1);
223
3
4
    return if CheckSpelling::Util::is_ignoring_event($code);
224
2
2
    ++$counters{$code};
225
2
2
    return $code;
226  }
227}
228
229sub report_timing {
230
0
0
  my ($name, $start_time, $directory, $marker) = @_;
231
0
0
  my $end_time = (stat "$directory/$marker")[9];
232
0
0
  $name =~ s/"/\\"/g;
233
0
0
  print TIMING_REPORT "\"$name\", $start_time, $end_time\n";
234}
235
236sub get_pattern_with_context {
237
24
13
  my ($path) = @_;
238
24
23
  return unless defined $ENV{$path};
239
24
27
  $ENV{$path} =~ /(.*)/;
240
24
156
  return unless open ITEMS, '<:utf8', $1;
241
242
24
11
  my @items;
243
24
18
  my $context = '';
244
24
100
  while (<ITEMS>) {
245
5
6
    my $pattern = $_;
246
5
6
    if ($pattern =~ /^#/) {
247
2
3
      if ($pattern =~ /^# /) {
248
2
3
        $context .= $pattern;
249      } else {
250
0
0
        $context = '';
251      }
252
2
4
      next;
253    }
254
3
4
    chomp $pattern;
255
3
5
    unless ($pattern =~ /./) {
256
1
1
      $context = '';
257
1
1
      next;
258    }
259
2
3
    push @items, $context.$pattern;
260
2
6
    $context = '';
261  }
262
24
82
  close ITEMS;
263
24
28
  return @items;
264}
265
266sub summarize_totals {
267
24
22
  my ($formatter, $path, $items, $totals, $file_counts) = @_;
268
24
24
11
24
  return unless @{$totals};
269
2
58
  return unless open my $fh, '>:utf8', $path;
270
2
2
1
1
  my $totals_count = scalar(@{$totals}) - 1;
271
2
2
  my @indices;
272
2
2
  if ($file_counts) {
273    @indices = sort {
274
1
0
2
0
      $totals->[$b] <=> $totals->[$a] ||
275      $file_counts->[$b] <=> $file_counts->[$a]
276    } 0 .. $totals_count;
277  } else {
278    @indices = sort {
279
1
0
2
0
      $totals->[$b] <=> $totals->[$a]
280    } 0 .. $totals_count;
281  }
282
2
2
  for my $i (@indices) {
283
2
2
    last unless $totals->[$i] > 0;
284
2
2
    my $rule_with_context = $items->[$i];
285
2
2
    my ($description, $rule);
286
2
5
    if ($rule_with_context =~ /^(.*\n)([^\n]+)$/s) {
287
2
4
      ($description, $rule) = ($1, $2);
288    } else {
289
0
0
      ($description, $rule) = ('', $rule_with_context);
290    }
291
2
4
    print $fh $formatter->(
292      $totals->[$i],
293      ($file_counts ? " file-count: $file_counts->[$i]" : ""),
294      $description,
295      $rule
296    );
297  }
298
2
81
  close $fh;
299}
300
301sub get_special {
302
19
18
  my ($file, $special) = @_;
303
19
27
  return 'file-list' if $file eq $special->{'file_list'};
304
17
19
  return 'pr-title' if $file eq $special->{'pr_title_file'};
305
15
15
  return 'pr-description' if $file eq $special->{'pr_description_file'};
306
13
24
  return 'commit-message' if !rindex($file, $special->{'commit_messages'});
307
11
15
  return 'file';
308}
309
310sub main {
311
12
21462
  my @directories;
312  my @cleanup_directories;
313
12
0
  my @check_file_paths;
314
315
12
14
  CheckSpelling::Util::build_ignored_event_map();
316
12
12
  my $early_warnings = CheckSpelling::Util::get_file_from_env('early_warnings', '/dev/null');
317
12
10
  my $warning_output = CheckSpelling::Util::get_file_from_env('warning_output', '/dev/stderr');
318
12
8
  my $more_warnings = CheckSpelling::Util::get_file_from_env('more_warnings', '/dev/stderr');
319
12
11
  my $counter_summary = CheckSpelling::Util::get_file_from_env('counter_summary', '/dev/stderr');
320
12
9
  my $should_exclude_file = CheckSpelling::Util::get_file_from_env('should_exclude_file', '/dev/null');
321
12
12
  my $unknown_word_limit = CheckSpelling::Util::get_val_from_env('unknown_word_limit', undef);
322
12
7
  my $unknown_file_word_limit = CheckSpelling::Util::get_val_from_env('unknown_file_word_limit', undef);
323
12
7
  my $candidate_example_limit = CheckSpelling::Util::get_file_from_env('INPUT_CANDIDATE_EXAMPLE_LIMIT', '3');
324
12
9
  my $disable_flags = CheckSpelling::Util::get_file_from_env('INPUT_DISABLE_CHECKS', '');
325
12
10
  my $only_check_changed_files = CheckSpelling::Util::get_file_from_env('INPUT_ONLY_CHECK_CHANGED_FILES', '');
326
12
8
  my $disable_noisy_file = $disable_flags =~ /(?:^|,|\s)noisy-file(?:,|\s|$)/;
327
12
35
  our $disable_word_collating = $only_check_changed_files || $disable_flags =~ /(?:^|,|\s)word-collating(?:,|\s|$)/;
328
12
11
  our $shortest_word = CheckSpelling::Util::get_val_from_env('INPUT_SHORTEST_WORD', undef);
329
12
8
  my $file_list = CheckSpelling::Util::get_file_from_env('check_file_names', '');
330
12
9
  my $pr_title_file = CheckSpelling::Util::get_file_from_env('pr_title_file', '');
331
12
8
  my $pr_description_file = CheckSpelling::Util::get_file_from_env('pr_description_file', '');
332
12
8
  my $commit_messages = CheckSpelling::Util::get_file_from_env('commit_messages', '');
333
12
7
  my $timing_report = CheckSpelling::Util::get_file_from_env('timing_report', '');
334
12
18
  my $special = {
335    'file_list' => $file_list,
336    'pr_title_file' => $pr_title_file,
337    'pr_description_file' => $pr_description_file,
338    'commit_messages' => $commit_messages,
339  };
340
12
9
  my ($start_time, $end_time);
341
342
12
226
  open WARNING_OUTPUT, '>:utf8', $warning_output;
343
12
163
  open MORE_WARNINGS, '>:utf8', $more_warnings;
344
12
154
  open COUNTER_SUMMARY, '>:utf8', $counter_summary;
345
12
101
  open SHOULD_EXCLUDE, '>:utf8', $should_exclude_file;
346
12
12
  if ($timing_report) {
347
0
0
    open TIMING_REPORT, '>:utf8', $timing_report;
348
0
0
    print TIMING_REPORT "file, start, finish\n";
349  }
350
351
12
11
  my @candidates = get_pattern_with_context('candidates_path');
352
12
8
  my @candidate_totals = (0) x scalar @candidates;
353
12
9
  my @candidate_file_counts = (0) x scalar @candidates;
354
355
12
9
  my @forbidden = get_pattern_with_context('forbidden_path');
356
12
5
  my @forbidden_totals = (0) x scalar @forbidden;
357
358
12
9
  my @delayed_warnings;
359
12
23
  our %letter_map = ();
360
361
12
7
  my %file_map = ();
362
363
12
25
  for my $directory (<>) {
364
15
15
    chomp $directory;
365
15
26
    next unless $directory =~ /^(.*)$/;
366
15
13
    $directory = $1;
367
15
47
    unless (-e $directory) {
368
1
4
      print STDERR "Could not find: $directory\n";
369
1
1
      next;
370    }
371
14
37
    unless (-d $directory) {
372
1
12
      print STDERR "Not a directory: $directory\n";
373
1
2
      next;
374    }
375
376    # if there's no filename, we can't report
377
13
93
    next unless open(NAME, '<:utf8', "$directory/name");
378
12
68
    my $file=<NAME>;
379
12
24
    close NAME;
380
381
12
24
    $file_map{$file} = $directory;
382  }
383
384
12
22
  for my $file (sort keys %file_map) {
385
12
10
    my $directory = $file_map{$file};
386
12
12
    if ($timing_report) {
387
0
0
      $start_time = (stat "$directory/name")[9];
388    }
389
390
12
96
    if (-e "$directory/skipped") {
391
1
7
      open SKIPPED, '<:utf8', "$directory/skipped";
392
1
8
      my $reason=<SKIPPED>;
393
1
3
      close SKIPPED;
394
1
0
      chomp $reason;
395
1
2
      my $wrapped = CheckSpelling::Util::wrap_in_backticks($file);
396
1
4
      push @delayed_warnings, "$file:1:1 ... 1, Warning - Skipping $wrapped because $reason\n";
397
1
7
      print SHOULD_EXCLUDE "$file\n";
398
1
2
      push @cleanup_directories, $directory;
399
1
0
      report_timing($file, $start_time, $directory, 'skipped') if ($timing_report);
400
1
2
      next;
401    }
402
403    # stats isn't written if there was nothing interesting in the file
404
11
40
    unless (-s "$directory/stats") {
405
1
1
      report_timing($file, $start_time, $directory, 'warnings') if ($timing_report);
406
1
1
      push @directories, $directory;
407
1
1
      next;
408    }
409
410
10
10
    if ($file eq $file_list) {
411
1
6
      open FILE_LIST, '<:utf8', $file_list;
412
1
1
      push @check_file_paths, '0 placeholder';
413
1
5
      for my $check_file_path (<FILE_LIST>) {
414
4
4
        chomp $check_file_path;
415
4
3
        push @check_file_paths, $check_file_path;
416      }
417
1
3
      close FILE_LIST;
418    }
419
420
10
5
    my ($words, $unrecognized, $unknown, $unique);
421
422    {
423
10
10
5
59
      open STATS, '<:utf8', "$directory/stats";
424
10
47
      my $stats=<STATS>;
425
10
20
      close STATS;
426
10
8
      $words=get_field($stats, 'words');
427
10
6
      $unrecognized=get_field($stats, 'unrecognized');
428
10
9
      $unknown=get_field($stats, 'unknown');
429
10
9
      $unique=get_field($stats, 'unique');
430
10
6
      my @candidate_list;
431
10
9
      if (@candidate_totals) {
432
1
1
        @candidate_list=get_array($stats, 'candidates');
433
1
1
        my @lines=get_array($stats, 'candidate_lines');
434
1
2
        if (@candidate_list) {
435
1
1
          for (my $i=0; $i < scalar @candidate_list; $i++) {
436
1
1
            my $hits = $candidate_list[$i];
437
1
1
            if ($hits) {
438
1
1
              $candidate_totals[$i] += $hits;
439
1
1
              if ($candidate_file_counts[$i]++ < $candidate_example_limit) {
440
1
3
                my $pattern = (split /\n/,$candidates[$i])[-1];
441
1
1
                my $position = $lines[$i];
442
1
4
                $position =~ s/:(\d+)$/ ... $1/;
443
1
2
                my $wrapped = CheckSpelling::Util::truncate_with_ellipsis(CheckSpelling::Util::wrap_in_backticks($pattern), 99);
444
1
1
                my $candidate_label = '';
445
1
2
                if ($candidates[$i] =~ /^#\s+(\S.+)/) {
446
1
2
                  $candidate_label = " ($1)";
447                }
448
1
5
                push @delayed_warnings, "$file:$position, Notice - Line matches candidate pattern$candidate_label $wrapped (candidate-pattern)\n";
449              }
450            }
451          }
452        }
453      }
454
10
12
      if (@forbidden_totals) {
455
1
1
        my @forbidden_list=get_array($stats, 'forbidden');
456
1
1
        my @lines=get_array($stats, 'forbidden_lines');
457
1
0
        if (@forbidden_list) {
458
1
2
          for (my $i=0; $i < scalar @forbidden_list; $i++) {
459
1
0
            my $hits = $forbidden_list[$i];
460
1
1
            if ($hits) {
461
1
3
              $forbidden_totals[$i] += $hits;
462            }
463          }
464        }
465      }
466      #print STDERR "$file (unrecognized: $unrecognized; unique: $unique; unknown: $unknown, words: $words, candidates: [".join(", ", @candidate_list)."])\n";
467    }
468
469
10
5
    report_timing($file, $start_time, $directory, 'unknown') if ($timing_report);
470
10
10
    my $kind = get_special($file, $special);
471    # These heuristics are very new and need tuning/feedback
472
10
12
    if (
473        ($unknown > $unique)
474        # || ($unrecognized > $words / 2)
475    ) {
476
1
2
      unless ($disable_noisy_file) {
477
1
1
        if ($kind eq 'file') {
478
1
1
          print SHOULD_EXCLUDE "$file\n";
479        }
480
1
1
        my $warning = "noisy-$kind";
481
1
1
        count_warning $warning;
482
1
1
        my $wrapped = CheckSpelling::Util::wrap_in_backticks($file);
483
1
3
        push @delayed_warnings, "$file:1:1 ... 1, Warning - Skipping $wrapped because it seems to have more noise ($unknown) than unique words ($unique) (total: $unrecognized / $words). ($warning)\n";
484
1
0
        push @cleanup_directories, $directory;
485
1
2
        next;
486      }
487    }
488
9
7
    push @directories, $directory;
489
9
39
    unless ($kind =~ /^file/ && -s "$directory/unknown") {
490
4
7
      next;
491    }
492
5
31
    open UNKNOWN, '<:utf8', "$directory/unknown";
493
5
39
    for my $token (<UNKNOWN>) {
494
43
44
      $token =~ s/\R//;
495
43
40
      next unless $token =~ /./;
496
41
23
      my ($key, $char) = collate_key $token;
497
41
49
      $letter_map{$char} = () unless defined $letter_map{$char};
498
41
18
      my %word_map = ();
499
41
14
35
17
      %word_map = %{$letter_map{$char}{$key}} if defined $letter_map{$char}{$key};
500
41
40
      $word_map{$token} = 1;
501
41
50
      $letter_map{$char}{$key} = \%word_map;
502    }
503
5
19
    close UNKNOWN;
504  }
505
12
28
  close SHOULD_EXCLUDE;
506
12
11
  close TIMING_REPORT if $timing_report;
507
508  summarize_totals(
509    sub {
510
1
1
      my ($hits, $files, $context, $pattern) = @_;
511
1
8
      return "# hit-count: $hits$files\n$context$pattern\n\n",
512    },
513
12
36
    CheckSpelling::Util::get_file_from_env('candidate_summary', '/dev/stderr'),
514    \@candidates,
515    \@candidate_totals,
516    \@candidate_file_counts,
517  );
518
519  summarize_totals(
520    sub {
521
1
6
      my (undef, undef, $context, $pattern) = @_;
522
1
3
      $context =~ s/^# //gm;
523
1
1
      chomp $context;
524
1
0
      my $details;
525
1
4
      if ($context =~ /^(.*?)$(.*)/ms) {
526
1
1
        ($context, $details) = ($1, $2);
527
1
1
        $details = "\n$details" if $details;
528      }
529
1
1
      $context = 'Pattern' unless $context;
530
1
5
      return "##### $context$details\n```\n$pattern\n```\n\n";
531    },
532
12
48
    CheckSpelling::Util::get_file_from_env('forbidden_summary', '/dev/stderr'),
533    \@forbidden,
534    \@forbidden_totals,
535  );
536
537
12
42
  group_related_words;
538
539
12
19
  if (defined $ENV{'expect'}) {
540
11
13
    $ENV{'expect'} =~ /(.*)/;
541
11
10
    load_expect $1;
542
11
7
    harmonize_expect;
543  }
544
545
12
9
  my %seen = ();
546
12
4
  our %counters;
547
12
9
  %counters = ();
548
549
12
43
  if (-s $early_warnings) {
550
1
7
    open WARNINGS, '<:utf8', $early_warnings;
551
1
6
    for my $warning (<WARNINGS>) {
552
1
2
      chomp $warning;
553
1
1
      next if should_skip_warning_while_counting $warning;
554
1
5
      print WARNING_OUTPUT "$warning\n";
555    }
556
1
3
    close WARNINGS;
557  }
558
559
12
5
  our %last_seen;
560
12
5
  my %unknown_file_word_count;
561
12
12
  for my $directory (@directories) {
562
10
34
    next unless (-s "$directory/warnings");
563
9
55
    next unless open(NAME, '<:utf8', "$directory/name");
564
9
29
    my $file=<NAME>;
565
9
19
    close NAME;
566
9
6
    my $kind = get_special($file, $special);
567
9
53
    open WARNINGS, '<:utf8', "$directory/warnings";
568
9
8
    if ($kind ne 'file-list') {
569
8
59
      for my $warning (<WARNINGS>) {
570
50
18
        my $code;
571
50
35
        chomp $warning;
572
50
95
        if ($warning =~ m/:(\d+):(\d+ \.\.\. \d+): `(.*)`/) {
573
46
47
          my ($line, $range, $item) = ($1, $2, $3);
574
46
23
          my $wrapped = CheckSpelling::Util::wrap_in_backticks($item);
575
46
25
          my $reason = 'unrecognized-spelling';
576
46
27
          $reason .= "-$kind" unless $kind eq 'file';
577
46
94
          $warning =~ s/:\d+:\d+ \.\.\. \d+: `.*`/:$line:$range, Warning - $wrapped is not a recognized word ($reason)/;
578
46
35
          next if log_skip_item($item, $file, $warning, $unknown_word_limit);
579
13
7
          count_warning $warning if $kind ne 'file';
580
13
11
          $code = $reason;
581        } else {
582
4
5
          if ($warning =~ /\`(.*?)\` in line \(token-is-substring\)/) {
583
0
0
            next if skip_item($1);
584          }
585
4
3
          $code = count_warning $warning;
586        }
587
17
10
        next if should_skip_warning $warning, $code;
588
17
64
        print WARNING_OUTPUT "$file$warning\n";
589      }
590    } else {
591
1
7
      for my $warning (<WARNINGS>) {
592
6
5
        chomp $warning;
593
6
11
        next unless $warning =~ s/^:(\d+)/:1/;
594
6
4
        $file = $check_file_paths[$1];
595
6
14
        if ($warning =~ m/:(\d+ \.\.\. \d+): `(.*)`/) {
596
4
3
          my ($range, $item) = ($1, $2);
597
4
5
          my $wrapped = CheckSpelling::Util::wrap_in_backticks($item);
598
4
12
          $warning =~ s/:\d+ \.\.\. \d+: `.*`/:$range, Warning - $wrapped is not a recognized word (check-file-path)/;
599
4
2
          next if skip_item($item);
600
4
3
          if (defined $unknown_file_word_limit) {
601
4
6
            next if ++$unknown_file_word_count{$item} > $unknown_file_word_limit;
602          }
603        }
604
5
1
        next if should_skip_warning_while_counting $warning;
605
4
18
        print WARNING_OUTPUT "$file$warning\n";
606      }
607    }
608
9
34
    close WARNINGS;
609  }
610
12
260
  close MORE_WARNINGS;
611
612
12
11
  for my $warning (@delayed_warnings) {
613
3
3
    next if should_skip_warning_while_counting $warning;
614
3
4
    print WARNING_OUTPUT $warning;
615  }
616
12
12
  if (defined $unknown_word_limit) {
617
1
2
    for my $warned_word (sort keys %last_seen) {
618
1
3
      my $warning_count = $seen{$warned_word} || 0;
619
1
1
      next unless $warning_count >= $unknown_word_limit;
620
0
0
      my $warning = $last_seen{$warned_word};
621
0
0
      $warning =~ s/\Q (unrecognized-spelling)\E/ -- found $warning_count times (limited-references)\n/;
622
0
0
      next if should_skip_warning_while_counting $warning;
623
0
0
      print WARNING_OUTPUT $warning;
624    }
625  }
626
12
322
  close WARNING_OUTPUT;
627
628
12
13
  if (%counters) {
629
3
3
    my $continue='';
630
3
4
    print COUNTER_SUMMARY "{\n";
631
3
6
    for my $code (sort keys %counters) {
632
6
10
      print COUNTER_SUMMARY qq<$continue"$code": $counters{$code}\n>;
633
6
7
      $continue=',';
634    }
635
3
3
    print COUNTER_SUMMARY "}\n";
636  }
637
12
88
  close COUNTER_SUMMARY;
638
639  # display the current unknown
640
12
29
  for my $char (sort keys %letter_map) {
641
43
43
25
129
    for my $key (sort CheckSpelling::Util::case_biased keys(%{$letter_map{$char}})) {
642
24
24
15
44
      my %word_map = %{$letter_map{$char}{$key}};
643
24
23
      my @words = keys(%word_map);
644
24
15
      if (scalar(@words) > 1) {
645
13
20
11
75
        print $key." (".(join ", ", sort { length($a) <=> length($b) || $a cmp $b } @words).")";
646      } else {
647
11
45
        print $words[0];
648      }
649
24
101
      print "\n";
650    }
651  }
652}
653
654sub collate_expect {
655
2
659
  my ($collated, $notes) = @_;
656
2
2
  my %word_map;
657  my %has_canonical;
658
2
21
  open EXPECT, '<', $collated;
659
2
29
  while (<EXPECT>) {
660
7
6
    chomp;
661
7
17
    next unless /(.*) \((.*)\)/;
662
2
2
    my ($key, $list) = ($1, $2);
663
2
4
    for my $variant (split /, /, $list) {
664
4
7
      if ($variant eq $key) {
665
2
5
        $has_canonical{$key} = 1;
666      } else {
667
2
2
        $word_map{$variant} = $key;
668      }
669    }
670  }
671
2
6
  close EXPECT;
672
2
2
3
4
  my $pattern = '`('.join('|', map { quotemeta($_) } keys %word_map).')`';
673
2
14
  open SOURCES, '<', $notes;
674
2
12
  while (<SOURCES>) {
675
13
31
    if ($_ =~ /$pattern/) {
676
5
3
      my $match = $1;
677
5
6
      my $canonical_match = $word_map{$match};
678
5
4
      my $print = 0;
679
5
7
      my $wrapped = CheckSpelling::Util::wrap_in_backticks($canonical_match);
680
5
5
      if (defined $has_canonical{$canonical_match}) {
681
5
11
        $print = 1 if s/not a recognized word/ignored because another more general variant ($wrapped) is also in expect/;
682
5
8
        $print = 1 if s/unrecognized-spelling/ignored-expect-variant/;
683      } else {
684
0
0
        $print = 1 if s/is not a recognized word/should be replaced by the more general variant ($wrapped)/;
685
0
0
        $print = 1 if s/unrecognized-spelling/update-expect-variant/;
686      }
687
5
7
      next unless $print;
688    } else {
689
8
18
      next unless /\(((?:\w+-)+\w+)\)$/;
690
4
6
      next if $1 eq 'unrecognized-spelling';
691    }
692
6
34
    print;
693  }
694
2
5
  close SOURCES;
695
2
3
  return 0;
696}
697
6981;