File Coverage

File:lib/CheckSpelling/SpellingCollator.pm
Coverage:90.5%

linestmtbrancondsubtimecode
1#! -*-perl-*-
2
3package CheckSpelling::SpellingCollator;
4
5our $VERSION='0.1.0';
6
1
1
111281
1
use 5.022;
7
1
1
1
2
0
3
use utf8;
8
1
1
1
16
0
55
use feature 'unicode_strings';
9
1
1
1
1
1
16
use warnings;
10
1
1
1
3
0
28
use File::Path qw(remove_tree);
11
1
1
1
255
1
3336
use CheckSpelling::Util;
12
13my %letter_map;
14my %ignored_event_map;
15my $disable_word_collating;
16my $shortest_word;
17
18my %last_seen;
19
20sub get_field {
21
40
27
  my ($record, $field) = @_;
22
40
377
  return 0 unless $record =~ (/\b$field:\s*(\d+)/);
23
30
37
  return $1;
24}
25
26sub get_array {
27
4
5
  my ($record, $field) = @_;
28
4
32
  return () unless $record =~ (/\b$field: \[([^\]]+)\]/);
29
4
3
  my $values = $1;
30
4
7
  return split /\s*,\s*/, $values;
31}
32
33sub maybe {
34
7
5
  my ($next, $value) = @_;
35
7
21
  $next = $value unless $next && $next < $value;
36
7
7
  return $next;
37}
38
39my %expected = ();
40sub expect_item {
41
94
81
  my ($item, $value) = @_;
42
94
56
  $item =~ s/’/'/g;
43
94
42
  our %expected;
44
94
29
  my $next;
45
94
100
  if (defined $expected{$item}) {
46
22
17
    $next = $expected{$item};
47
22
11
    $next = $value if $value < $next;
48  } elsif ($item =~ /^([A-Z])(.*)/) {
49
12
11
    $item = $1 . lc $2;
50
12
9
    if (defined $expected{$item}) {
51
2
1
      $next = $expected{$item};
52
2
2
      $next = maybe($next, $value + .1);
53    } else {
54
10
5
      $item = lc $item;
55
10
9
      if (defined $expected{$item}) {
56
5
3
        $next = $expected{$item};
57
5
4
        $next = maybe($next, $value + .2);
58      }
59    }
60  }
61
94
104
  return 0 unless defined $next;
62
29
20
  $expected{$item} = $next;
63
29
57
  return $value;
64}
65
66sub skip_item {
67
50
28
  my ($word) = @_;
68
50
26
  return 1 if expect_item($word, 1);
69
32
22
  my $key = lc $word;
70
32
17
  return 2 if expect_item($key, 2);
71
32
42
  if ($key =~ /.s$/) {
72
2
5
    if ($key =~ /ies$/) {
73
1
1
      $key =~ s/ies$/y/;
74    } else {
75
1
2
      $key =~ s/s$//;
76    }
77  } elsif ($key =~ /^(.+[^aeiou])ed$/) {
78
1
1
    $key = $1;
79  } elsif ($key =~ /^(.+)'[ds]$/) {
80
4
4
    $key = $1;
81  } else {
82
25
25
    return 0;
83  }
84
7
8
  return 3 if expect_item($key, 3);
85
0
0
  return 0;
86}
87
88sub should_skip_warning {
89
63
42
  my ($warning) = @_;
90
63
89
  if ($warning =~ /\(([-\w]+)\)$/) {
91
59
40
    my ($code) = ($1);
92
59
18
    our %ignored_event_map;
93
59
49
    return 1 if $ignored_event_map{$code};
94  }
95
63
50
  return 0;
96}
97
98sub should_skip_warning_while_counting {
99
11
9
  my ($warning) = @_;
100
11
24
  return 0 unless ($warning =~ /\(([-\w]+)\)$/);
101
11
10
  my ($code) = ($1);
102
11
2
  our %ignored_event_map;
103
11
14
  return 1 if $ignored_event_map{$code};
104
105
9
3
  our %counters;
106
9
9
  ++$counters{$code};
107
9
11
  return 0;
108}
109
110sub log_skip_item {
111
46
61
  my ($item, $file, $warning, $unknown_word_limit) = @_;
112
46
28
  return 1 if should_skip_warning $warning;
113
46
27
  return 1 if skip_item($item);
114
21
9
  our %seen;
115
21
16
  my $seen_count = $seen{$item};
116
21
11
  if (defined $seen_count) {
117
8
12
    if (!defined $unknown_word_limit || ($seen_count++ < $unknown_word_limit)) {
118
7
32
      print MORE_WARNINGS "$file$warning\n";
119    } else {
120
1
0
      our %last_seen;
121
1
2
      $last_seen{$item} = "$file$warning";
122    }
123
8
9
    $seen{$item} = $seen_count;
124
8
14
    return 1;
125  }
126
13
10
  $seen{$item} = 1;
127
13
12
  return 0;
128}
129
130sub stem_word {
131
28
955
  my ($key) = @_;
132
28
15
  our $disable_word_collating;
133
28
22
  return $key if $disable_word_collating;
134
28
15
  our $shortest_word;
135
28
20
  $shortest_word = 2 unless defined $shortest_word;
136
28
16
  my $key_length = length $key;
137
138
28
58
  if ($key =~ /.s$/) {
139
5
24
    if ($key_length > ($shortest_word + 1) && $key =~ /ies$/) {
140
1
2
      $key =~ s/ies$/y/;
141    } elsif ($key_length > $shortest_word && $key !~ /ies$/) {
142
3
6
      $key =~ s/s$//;
143    }
144  } elsif ($key_length > ($shortest_word + 1) && $key =~ /.[^aeiou]ed$/) {
145
2
4
    $key =~ s/ed$//;
146  }
147
28
31
  return $key;
148}
149
150sub collate_key {
151
81
49
  my ($key) = @_;
152
81
34
  our $disable_word_collating;
153
81
32
  my $char;
154
81
51
  if ($disable_word_collating) {
155
8
8
    $char = lc substr $key, 0, 1;
156  } else {
157
73
49
    $key = lc $key;
158
73
40
    $key =~ s/''+/'/g;
159
73
52
    $key =~ s/'[sd]$//;
160
73
37
    $key =~ s/^[^Ii]?'+(.*)/$1/;
161
73
30
    $key =~ s/(.*?)'$/$1/;
162
73
63
    $char = substr $key, 0, 1;
163  }
164
81
110
  return ($key, $char);
165}
166
167sub load_expect {
168
12
489
  my ($expect) = @_;
169
12
30
  our %expected;
170
12
13
  %expected = ();
171
12
102
  if (open(EXPECT, '<:utf8', $expect)) {
172
12
60
    while (my $word = <EXPECT>) {
173
43
65
      $word =~ s/\R//;
174
43
104
      $expected{$word} = 0;
175    }
176
12
32
    close EXPECT;
177  }
178}
179
180sub harmonize_expect {
181
11
7
  our $disable_word_collating;
182
11
3
  our %letter_map;
183
11
7
  our %expected;
184
185
11
14
  for my $word (keys %expected) {
186
40
25
    my ($key, $char) = collate_key $word;
187
40
25
    my %word_map = ();
188
40
45
    next unless defined $letter_map{$char}{$key};
189
13
13
6
18
    %word_map = %{$letter_map{$char}{$key}};
190
13
17
    next if defined $word_map{$word};
191
3
2
    my $words = scalar keys %word_map;
192
3
6
    next if $words > 2;
193
3
3
    if ($word eq $key) {
194
1
2
      next if ($words > 1);
195    }
196
2
2
    delete $expected{$word};
197  }
198}
199
200sub group_related_words {
201
12
4
  our %letter_map;
202
12
8
  our $disable_word_collating;
203
12
9
  return if $disable_word_collating;
204
205  # group related words
206
11
38
  for my $char (sort CheckSpelling::Util::number_biased keys %letter_map) {
207
19
19
8
19
    for my $plural_key (sort keys(%{$letter_map{$char}})) {
208
22
18
      my $key = stem_word $plural_key;
209
22
21
      next if $key eq $plural_key;
210
4
4
      next unless defined $letter_map{$char}{$key};
211
3
3
3
3
      my %word_map = %{$letter_map{$char}{$key}};
212
3
3
2
4
      for my $word (keys(%{$letter_map{$char}{$plural_key}})) {
213
3
3
        $word_map{$word} = 1;
214      }
215
3
4
      $letter_map{$char}{$key} = \%word_map;
216
3
3
      delete $letter_map{$char}{$plural_key};
217    }
218  }
219}
220
221sub count_warning {
222
9
7
  my ($warning) = @_;
223
9
6
  our %counters;
224
9
10
  our %ignored_event_map;
225
9
19
  if ($warning =~ /\(([-\w]+)\)$/) {
226
3
3
    my ($code) = ($1);
227
3
4
    return if defined $ignored_event_map{$code};
228
2
2
    ++$counters{$code};
229
2
2
    return $code;
230  }
231}
232
233sub report_timing {
234
0
0
  my ($name, $start_time, $directory, $marker) = @_;
235
0
0
  my $end_time = (stat "$directory/$marker")[9];
236
0
0
  $name =~ s/"/\\"/g;
237
0
0
  print TIMING_REPORT "\"$name\", $start_time, $end_time\n";
238}
239
240sub get_pattern_with_context {
241
24
16
  my ($path) = @_;
242
24
25
  return unless defined $ENV{$path};
243
24
19
  $ENV{$path} =~ /(.*)/;
244
24
138
  return unless open ITEMS, '<:utf8', $1;
245
246
24
16
  my @items;
247
24
14
  my $context = '';
248
24
87
  while (<ITEMS>) {
249
5
5
    my $pattern = $_;
250
5
7
    if ($pattern =~ /^#/) {
251
2
3
      if ($pattern =~ /^# /) {
252
2
3
        $context .= $pattern;
253      } else {
254
0
0
        $context = '';
255      }
256
2
4
      next;
257    }
258
3
3
    chomp $pattern;
259
3
5
    unless ($pattern =~ /./) {
260
1
1
      $context = '';
261
1
2
      next;
262    }
263
2
3
    push @items, $context.$pattern;
264
2
6
    $context = '';
265  }
266
24
47
  close ITEMS;
267
24
29
  return @items;
268}
269
270sub summarize_totals {
271
24
20
  my ($formatter, $path, $items, $totals, $file_counts) = @_;
272
24
24
12
22
  return unless @{$totals};
273
2
59
  return unless open my $fh, '>:utf8', $path;
274
2
2
2
2
  my $totals_count = scalar(@{$totals}) - 1;
275
2
0
  my @indices;
276
2
3
  if ($file_counts) {
277    @indices = sort {
278
1
0
2
0
      $totals->[$b] <=> $totals->[$a] ||
279      $file_counts->[$b] <=> $file_counts->[$a]
280    } 0 .. $totals_count;
281  } else {
282    @indices = sort {
283
1
0
2
0
      $totals->[$b] <=> $totals->[$a]
284    } 0 .. $totals_count;
285  }
286
2
2
  for my $i (@indices) {
287
2
3
    last unless $totals->[$i] > 0;
288
2
2
    my $rule_with_context = $items->[$i];
289
2
2
    my ($description, $rule);
290
2
6
    if ($rule_with_context =~ /^(.*\n)([^\n]+)$/s) {
291
2
2
      ($description, $rule) = ($1, $2);
292    } else {
293
0
0
      ($description, $rule) = ('', $rule_with_context);
294    }
295
2
5
    print $fh $formatter->(
296      $totals->[$i],
297      ($file_counts ? " file-count: $file_counts->[$i]" : ""),
298      $description,
299      $rule
300    );
301  }
302
2
75
  close $fh;
303}
304
305sub get_special {
306
19
18
  my ($file, $special) = @_;
307
19
22
  return 'file-list' if $file eq $special->{'file_list'};
308
17
17
  return 'pr-title' if $file eq $special->{'pr_title_file'};
309
15
15
  return 'pr-description' if $file eq $special->{'pr_description_file'};
310
13
27
  return 'commit-message' if !rindex($file, $special->{'commit_messages'});
311
11
12
  return 'file';
312}
313
314sub main {
315
12
23010
  my @directories;
316  my @cleanup_directories;
317
12
0
  my @check_file_paths;
318
319
12
15
  my $early_warnings = CheckSpelling::Util::get_file_from_env('early_warnings', '/dev/null');
320
12
12
  my $warning_output = CheckSpelling::Util::get_file_from_env('warning_output', '/dev/stderr');
321
12
9
  my $more_warnings = CheckSpelling::Util::get_file_from_env('more_warnings', '/dev/stderr');
322
12
8
  my $counter_summary = CheckSpelling::Util::get_file_from_env('counter_summary', '/dev/stderr');
323
12
7
  my $ignored_events = CheckSpelling::Util::get_file_from_env('ignored_events', '');
324
12
8
  our %ignored_event_map = ();
325
12
12
  if ($ignored_events) {
326
6
8
    for my $event (split /,/, $ignored_events) {
327
6
9
      $ignored_event_map{$event} = 1;
328    }
329  }
330
12
9
  my $should_exclude_file = CheckSpelling::Util::get_file_from_env('should_exclude_file', '/dev/null');
331
12
11
  my $unknown_word_limit = CheckSpelling::Util::get_val_from_env('unknown_word_limit', undef);
332
12
5
  my $unknown_file_word_limit = CheckSpelling::Util::get_val_from_env('unknown_file_word_limit', undef);
333
12
8
  my $candidate_example_limit = CheckSpelling::Util::get_file_from_env('INPUT_CANDIDATE_EXAMPLE_LIMIT', '3');
334
12
9
  my $disable_flags = CheckSpelling::Util::get_file_from_env('INPUT_DISABLE_CHECKS', '');
335
12
8
  my $only_check_changed_files = CheckSpelling::Util::get_file_from_env('INPUT_ONLY_CHECK_CHANGED_FILES', '');
336
12
8
  my $disable_noisy_file = $disable_flags =~ /(?:^|,|\s)noisy-file(?:,|\s|$)/;
337
12
30
  our $disable_word_collating = $only_check_changed_files || $disable_flags =~ /(?:^|,|\s)word-collating(?:,|\s|$)/;
338
12
10
  our $shortest_word = CheckSpelling::Util::get_val_from_env('INPUT_SHORTEST_WORD', undef);
339
12
10
  my $file_list = CheckSpelling::Util::get_file_from_env('check_file_names', '');
340
12
12
  my $pr_title_file = CheckSpelling::Util::get_file_from_env('pr_title_file', '');
341
12
10
  my $pr_description_file = CheckSpelling::Util::get_file_from_env('pr_description_file', '');
342
12
10
  my $commit_messages = CheckSpelling::Util::get_file_from_env('commit_messages', '');
343
12
10
  my $timing_report = CheckSpelling::Util::get_file_from_env('timing_report', '');
344
12
19
  my $special = {
345    'file_list' => $file_list,
346    'pr_title_file' => $pr_title_file,
347    'pr_description_file' => $pr_description_file,
348    'commit_messages' => $commit_messages,
349  };
350
12
12
  my ($start_time, $end_time);
351
352
12
237
  open WARNING_OUTPUT, '>:utf8', $warning_output;
353
12
168
  open MORE_WARNINGS, '>:utf8', $more_warnings;
354
12
148
  open COUNTER_SUMMARY, '>:utf8', $counter_summary;
355
12
92
  open SHOULD_EXCLUDE, '>:utf8', $should_exclude_file;
356
12
11
  if ($timing_report) {
357
0
0
    open TIMING_REPORT, '>:utf8', $timing_report;
358
0
0
    print TIMING_REPORT "file, start, finish\n";
359  }
360
361
12
10
  my @candidates = get_pattern_with_context('candidates_path');
362
12
10
  my @candidate_totals = (0) x scalar @candidates;
363
12
11
  my @candidate_file_counts = (0) x scalar @candidates;
364
365
12
5
  my @forbidden = get_pattern_with_context('forbidden_path');
366
12
5
  my @forbidden_totals = (0) x scalar @forbidden;
367
368
12
8
  my @delayed_warnings;
369
12
22
  our %letter_map = ();
370
371
12
7
  my %file_map = ();
372
373
12
30
  for my $directory (<>) {
374
15
12
    chomp $directory;
375
15
24
    next unless $directory =~ /^(.*)$/;
376
15
11
    $directory = $1;
377
15
46
    unless (-e $directory) {
378
1
3
      print STDERR "Could not find: $directory\n";
379
1
1
      next;
380    }
381
14
33
    unless (-d $directory) {
382
1
13
      print STDERR "Not a directory: $directory\n";
383
1
2
      next;
384    }
385
386    # if there's no filename, we can't report
387
13
109
    next unless open(NAME, '<:utf8', "$directory/name");
388
12
61
    my $file=<NAME>;
389
12
24
    close NAME;
390
391
12
26
    $file_map{$file} = $directory;
392  }
393
394
12
19
  for my $file (sort keys %file_map) {
395
12
11
    my $directory = $file_map{$file};
396
12
9
    if ($timing_report) {
397
0
0
      $start_time = (stat "$directory/name")[9];
398    }
399
400
12
60
    if (-e "$directory/skipped") {
401
1
7
      open SKIPPED, '<:utf8', "$directory/skipped";
402
1
7
      my $reason=<SKIPPED>;
403
1
3
      close SKIPPED;
404
1
1
      chomp $reason;
405
1
4
      push @delayed_warnings, "$file:1:1 ... 1, Warning - Skipping `$file` because $reason\n";
406
1
2
      print SHOULD_EXCLUDE "$file\n";
407
1
1
      push @cleanup_directories, $directory;
408
1
1
      report_timing($file, $start_time, $directory, 'skipped') if ($timing_report);
409
1
1
      next;
410    }
411
412    # stats isn't written if there was nothing interesting in the file
413
11
34
    unless (-s "$directory/stats") {
414
1
1
      report_timing($file, $start_time, $directory, 'warnings') if ($timing_report);
415
1
1
      push @directories, $directory;
416
1
1
      next;
417    }
418
419
10
11
    if ($file eq $file_list) {
420
1
6
      open FILE_LIST, '<:utf8', $file_list;
421
1
0
      push @check_file_paths, '0 placeholder';
422
1
6
      for my $check_file_path (<FILE_LIST>) {
423
4
3
        chomp $check_file_path;
424
4
4
        push @check_file_paths, $check_file_path;
425      }
426
1
2
      close FILE_LIST;
427    }
428
429
10
6
    my ($words, $unrecognized, $unknown, $unique);
430
431    {
432
10
10
5
51
      open STATS, '<:utf8', "$directory/stats";
433
10
43
      my $stats=<STATS>;
434
10
16
      close STATS;
435
10
11
      $words=get_field($stats, 'words');
436
10
8
      $unrecognized=get_field($stats, 'unrecognized');
437
10
8
      $unknown=get_field($stats, 'unknown');
438
10
7
      $unique=get_field($stats, 'unique');
439
10
7
      my @candidate_list;
440
10
5
      if (@candidate_totals) {
441
1
2
        @candidate_list=get_array($stats, 'candidates');
442
1
1
        my @lines=get_array($stats, 'candidate_lines');
443
1
1
        if (@candidate_list) {
444
1
1
          for (my $i=0; $i < scalar @candidate_list; $i++) {
445
1
1
            my $hits = $candidate_list[$i];
446
1
1
            if ($hits) {
447
1
1
              $candidate_totals[$i] += $hits;
448
1
2
              if ($candidate_file_counts[$i]++ < $candidate_example_limit) {
449
1
3
                my $pattern = (split /\n/,$candidates[$i])[-1];
450
1
1
                my $position = $lines[$i];
451
1
5
                $position =~ s/:(\d+)$/ ... $1/;
452
1
1
                my $wrapped = CheckSpelling::Util::truncate_with_ellipsis(CheckSpelling::Util::wrap_in_backticks($pattern), 99);
453
1
1
                my $candidate_label = '';
454
1
3
                if ($candidates[$i] =~ /^#\s+(\S.+)/) {
455
1
2
                  $candidate_label = " ($1)";
456                }
457
1
5
                push @delayed_warnings, "$file:$position, Notice - Line matches candidate pattern$candidate_label $wrapped (candidate-pattern)\n";
458              }
459            }
460          }
461        }
462      }
463
10
11
      if (@forbidden_totals) {
464
1
5
        my @forbidden_list=get_array($stats, 'forbidden');
465
1
1
        my @lines=get_array($stats, 'forbidden_lines');
466
1
1
        if (@forbidden_list) {
467
1
1
          for (my $i=0; $i < scalar @forbidden_list; $i++) {
468
1
1
            my $hits = $forbidden_list[$i];
469
1
1
            if ($hits) {
470
1
2
              $forbidden_totals[$i] += $hits;
471            }
472          }
473        }
474      }
475      #print STDERR "$file (unrecognized: $unrecognized; unique: $unique; unknown: $unknown, words: $words, candidates: [".join(", ", @candidate_list)."])\n";
476    }
477
478
10
11
    report_timing($file, $start_time, $directory, 'unknown') if ($timing_report);
479
10
7
    my $kind = get_special($file, $special);
480    # These heuristics are very new and need tuning/feedback
481
10
12
    if (
482        ($unknown > $unique)
483        # || ($unrecognized > $words / 2)
484    ) {
485
1
1
      unless ($disable_noisy_file) {
486
1
2
        if ($kind eq 'file') {
487
1
2
          print SHOULD_EXCLUDE "$file\n";
488        }
489
1
1
        my $warning = "noisy-$kind";
490
1
1
        count_warning $warning;
491
1
1
        push @delayed_warnings, "$file:1:1 ... 1, Warning - Skipping `$file` because it seems to have more noise ($unknown) than unique words ($unique) (total: $unrecognized / $words). ($warning)\n";
492
1
1
        push @cleanup_directories, $directory;
493
1
2
        next;
494      }
495    }
496
9
7
    push @directories, $directory;
497
9
48
    unless ($kind =~ /^file/ && -s "$directory/unknown") {
498
4
5
      next;
499    }
500
5
35
    open UNKNOWN, '<:utf8', "$directory/unknown";
501
5
40
    for my $token (<UNKNOWN>) {
502
43
45
      $token =~ s/\R//;
503
43
42
      next unless $token =~ /./;
504
41
27
      my ($key, $char) = collate_key $token;
505
41
47
      $letter_map{$char} = () unless defined $letter_map{$char};
506
41
27
      my %word_map = ();
507
41
14
31
17
      %word_map = %{$letter_map{$char}{$key}} if defined $letter_map{$char}{$key};
508
41
41
      $word_map{$token} = 1;
509
41
45
      $letter_map{$char}{$key} = \%word_map;
510    }
511
5
34
    close UNKNOWN;
512  }
513
12
32
  close SHOULD_EXCLUDE;
514
12
9
  close TIMING_REPORT if $timing_report;
515
516  summarize_totals(
517    sub {
518
1
1
      my ($hits, $files, $context, $pattern) = @_;
519
1
7
      return "# hit-count: $hits$files\n$context$pattern\n\n",
520    },
521
12
38
    CheckSpelling::Util::get_file_from_env('candidate_summary', '/dev/stderr'),
522    \@candidates,
523    \@candidate_totals,
524    \@candidate_file_counts,
525  );
526
527  summarize_totals(
528    sub {
529
1
1
      my (undef, undef, $context, $pattern) = @_;
530
1
2
      $context =~ s/^# //gm;
531
1
5
      chomp $context;
532
1
0
      my $details;
533
1
4
      if ($context =~ /^(.*?)$(.*)/ms) {
534
1
1
        ($context, $details) = ($1, $2);
535
1
1
        $details = "\n$details" if $details;
536      }
537
1
1
      $context = 'Pattern' unless $context;
538
1
5
      return "##### $context$details\n```\n$pattern\n```\n\n";
539    },
540
12
41
    CheckSpelling::Util::get_file_from_env('forbidden_summary', '/dev/stderr'),
541    \@forbidden,
542    \@forbidden_totals,
543  );
544
545
12
41
  group_related_words;
546
547
12
13
  if (defined $ENV{'expect'}) {
548
11
9
    $ENV{'expect'} =~ /(.*)/;
549
11
8
    load_expect $1;
550
11
7
    harmonize_expect;
551  }
552
553
12
8
  my %seen = ();
554
12
4
  our %counters;
555
12
15
  %counters = ();
556
557
12
40
  if (-s $early_warnings) {
558
1
6
    open WARNINGS, '<:utf8', $early_warnings;
559
1
8
    for my $warning (<WARNINGS>) {
560
1
1
      chomp $warning;
561
1
1
      next if should_skip_warning_while_counting $warning;
562
1
8
      print WARNING_OUTPUT "$warning\n";
563    }
564
1
2
    close WARNINGS;
565  }
566
567
12
7
  our %last_seen;
568
12
5
  my %unknown_file_word_count;
569
12
11
  for my $directory (@directories) {
570
10
29
    next unless (-s "$directory/warnings");
571
9
58
    next unless open(NAME, '<:utf8', "$directory/name");
572
9
35
    my $file=<NAME>;
573
9
30
    close NAME;
574
9
7
    my $kind = get_special($file, $special);
575
9
45
    open WARNINGS, '<:utf8', "$directory/warnings";
576
9
10
    if ($kind ne 'file-list') {
577
8
52
      for my $warning (<WARNINGS>) {
578
50
20
        my $code;
579
50
36
        chomp $warning;
580
50
91
        if ($warning =~ m/:(\d+):(\d+ \.\.\. \d+): `(.*)`/) {
581
46
44
          my ($line, $range, $item) = ($1, $2, $3);
582
46
35
          my $wrapped = CheckSpelling::Util::wrap_in_backticks($item);
583
46
28
          my $reason = 'unrecognized-spelling';
584
46
26
          $reason .= "-$kind" unless $kind eq 'file';
585
46
106
          $warning =~ s/:\d+:\d+ \.\.\. \d+: `.*`/:$line:$range, Warning - $wrapped is not a recognized word ($reason)/;
586
46
34
          next if log_skip_item($item, $file, $warning, $unknown_word_limit);
587
13
8
          count_warning $warning if $kind ne 'file';
588
13
8
          $code = $reason;
589        } else {
590
4
5
          if ($warning =~ /\`(.*?)\` in line \(token-is-substring\)/) {
591
0
0
            next if skip_item($1);
592          }
593
4
4
          $code = count_warning $warning;
594        }
595
17
12
        next if should_skip_warning $warning, $code;
596
17
65
        print WARNING_OUTPUT "$file$warning\n";
597      }
598    } else {
599
1
8
      for my $warning (<WARNINGS>) {
600
6
4
        chomp $warning;
601
6
13
        next unless $warning =~ s/^:(\d+)/:1/;
602
6
4
        $file = $check_file_paths[$1];
603
6
14
        if ($warning =~ m/:(\d+ \.\.\. \d+): `(.*)`/) {
604
4
4
          my ($range, $item) = ($1, $2);
605
4
3
          my $wrapped = CheckSpelling::Util::wrap_in_backticks($item);
606
4
13
          $warning =~ s/:\d+ \.\.\. \d+: `.*`/:$range, Warning - $wrapped is not a recognized word (check-file-path)/;
607
4
4
          next if skip_item($item);
608
4
7
          if (defined $unknown_file_word_limit) {
609
4
6
            next if ++$unknown_file_word_count{$item} > $unknown_file_word_limit;
610          }
611        }
612
5
5
        next if should_skip_warning_while_counting $warning;
613
4
12
        print WARNING_OUTPUT "$file$warning\n";
614      }
615    }
616
9
37
    close WARNINGS;
617  }
618
12
241
  close MORE_WARNINGS;
619
620
12
10
  for my $warning (@delayed_warnings) {
621
3
4
    next if should_skip_warning_while_counting $warning;
622
3
3
    print WARNING_OUTPUT $warning;
623  }
624
12
10
  if (defined $unknown_word_limit) {
625
1
2
    for my $warned_word (sort keys %last_seen) {
626
1
3
      my $warning_count = $seen{$warned_word} || 0;
627
1
2
      next unless $warning_count >= $unknown_word_limit;
628
0
0
      my $warning = $last_seen{$warned_word};
629
0
0
      $warning =~ s/\Q (unrecognized-spelling)\E/ -- found $warning_count times (limited-references)\n/;
630
0
0
      next if should_skip_warning_while_counting $warning;
631
0
0
      print WARNING_OUTPUT $warning;
632    }
633  }
634
12
314
  close WARNING_OUTPUT;
635
636
12
14
  if (%counters) {
637
3
2
    my $continue='';
638
3
5
    print COUNTER_SUMMARY "{\n";
639
3
7
    for my $code (sort keys %counters) {
640
6
9
      print COUNTER_SUMMARY qq<$continue"$code": $counters{$code}\n>;
641
6
4
      $continue=',';
642    }
643
3
3
    print COUNTER_SUMMARY "}\n";
644  }
645
12
96
  close COUNTER_SUMMARY;
646
647  # display the current unknown
648
12
33
  for my $char (sort keys %letter_map) {
649
43
43
21
106
    for my $key (sort CheckSpelling::Util::case_biased keys(%{$letter_map{$char}})) {
650
24
24
13
35
      my %word_map = %{$letter_map{$char}{$key}};
651
24
22
      my @words = keys(%word_map);
652
24
18
      if (scalar(@words) > 1) {
653
13
20
8
74
        print $key." (".(join ", ", sort { length($a) <=> length($b) || $a cmp $b } @words).")";
654      } else {
655
11
36
        print $words[0];
656      }
657
24
89
      print "\n";
658    }
659  }
660}
661
662sub collate_expect {
663
1
689
  my ($collated, $notes) = @_;
664
1
1
  my @words;
665
1
6
  open EXPECT, '<', $collated;
666
1
5
  while (<EXPECT>) {
667
5
5
    chomp;
668
5
10
    next unless /(.*) \((.*)\)/;
669
1
2
    my ($key, $list) = ($1, $2);
670
1
1
    my @variants = split /, /, $list;
671
1
2
1
3
    @variants = grep { $_ ne $key } @variants;
672
1
1
    push @words, @variants;
673  }
674
1
3
  close EXPECT;
675
1
1
1
2
  my $pattern = '\`(?:'.join('|', map { quotemeta($_) } @words).')`';
676
1
6
  open SOURCES, '<', $notes;
677
1
3
  while (<SOURCES>) {
678
9
18
    if ($_ =~ /$pattern/) {
679
4
4
      my $print = 0;
680
4
4
      $print = 1 if s/not a recognized word/ignored by check-spelling because another more general variant is also in expect/;
681
4
5
      $print = 1 if s/unrecognized-spelling/ignored-expect-variant/;
682
4
4
      next unless $print;
683    } else {
684
5
16
      next unless /\(((?:\w+-)+\w+)\)$/;
685
2
5
      next if $1 eq 'unrecognized-spelling';
686    }
687
4
19
    print;
688  }
689
1
2
  close SOURCES;
690
1
2
  return 0;
691}
692
6931;