File Coverage

File:	lib/CheckSpelling/SpellingCollator.pm
Coverage:	89.7%

line	stmt	bran	cond	sub	time	code
1						#! --perl--
2
3						package CheckSpelling::SpellingCollator;
4
5						our $VERSION='0.1.0';
6	1 1			1	107924 2	use 5.022;
7	1 1 1			1	5 1 3	use utf8;
8	1 1 1			1	14 1 48	use feature 'unicode_strings';
9	1 1 1			1	2 1 18	use warnings;
10	1 1 1			1	1 1 21	use File::Path qw(remove_tree);
11	1 1 1			1	238 1 2966	use CheckSpelling::Util;
12
13						my %letter_map;
14						my %ignored_event_map;
15						my $disable_word_collating;
16
17						my %last_seen;
18
19						sub get_field {
20	40			40	48	my ($record, $field) = @_;
21	40	100			337	return 0 unless $record =~ (/\b$field:\s*(\d+)/);
22	30				32	return $1;
23						}
24
25						sub get_array {
26	4			4	4	my ($record, $field) = @_;
27	4	50			30	return () unless $record =~ (/\b$field: \[([^\]]+)\]/);
28	4				2	my $values = $1;
29	4				7	return split /\s,\s/, $values;
30						}
31
32						sub maybe {
33	7			7	6	my ($next, $value) = @_;
34	7	50	67		6	$next = $value unless $next && $next < $value;
35	7				6	return $next;
36						}
37
38						my %expected = ();
39						sub expect_item {
40	94			94	57	my ($item, $value) = @_;
41	94				33	our %expected;
42	94				39	my $next;
43	94	100 100			132	if (defined $expected{$item}) {
44	22				12	$next = $expected{$item};
45	22	100			20	$next = $value if $value < $next;
46						} elsif ($item =~ /^([A-Z])(.*)/) {
47	12				7	$item = $1 . lc $2;
48	12	100			9	if (defined $expected{$item}) {
49	2				0	$next = $expected{$item};
50	2				3	$next = maybe($next, $value + .1);
51						} else {
52	10				5	$item = lc $item;
53	10	100			7	if (defined $expected{$item}) {
54	5				3	$next = $expected{$item};
55	5				3	$next = maybe($next, $value + .2);
56						}
57						}
58						}
59	94	100			71	return 0 unless defined $next;
60	29				21	$expected{$item} = $next;
61	29				56	return $value;
62						}
63
64						sub skip_item {
65	50			50	29	my ($word) = @_;
66	50	100			24	return 1 if expect_item($word, 1);
67	32				23	my $key = lc $word;
68	32	50			15	return 2 if expect_item($key, 2);
69	32	100 100 100			40	if ($key =~ /.s$/) {
70	2	100			4	if ($key =~ /ies$/) {
71	1				2	$key =~ s/ies$/y/;
72						} else {
73	1				2	$key =~ s/s$//;
74						}
75						} elsif ($key =~ /^(.+[^aeiou])ed$/) {
76	1				3	$key = $1;
77						} elsif ($key =~ /^(.+)'[ds]$/) {
78	4				2	$key = $1;
79						} else {
80	25				26	return 0;
81						}
82	7	50			6	return 3 if expect_item($key, 3);
83	0				0	return 0;
84						}
85
86						sub should_skip_warning {
87	72			72	47	my ($warning) = @_;
88	72	100			94	if ($warning =~ /$([-\w]+)$$/) {
89	68				38	my ($code) = ($1);
90	68				26	our %ignored_event_map;
91	68	100			49	return 1 if $ignored_event_map{$code};
92						}
93	71				57	return 0;
94						}
95
96						sub log_skip_item {
97	46			46	53	my ($item, $file, $warning, $unknown_word_limit) = @_;
98	46	50			46	return 1 if should_skip_warning $warning;
99	46	100			24	return 1 if skip_item($item);
100	21				6	our %seen;
101	21				16	my $seen_count = $seen{$item};
102	21	100			15	if (defined $seen_count) {
103	8	100	100		11	if (!defined $unknown_word_limit \|\| ($seen_count++ < $unknown_word_limit)) {
104	7				27	print MORE_WARNINGS "$file$warning\n";
105						} else {
106	1				1	our %last_seen;
107	1				2	$last_seen{$item} = "$file$warning";
108						}
109	8				7	$seen{$item} = $seen_count;
110	8				15	return 1;
111						}
112	13				9	$seen{$item} = 1;
113	13				15	return 0;
114						}
115
116						sub stem_word {
117	22			22	14	my ($key) = @_;
118	22				8	our $disable_word_collating;
119	22	50			16	return $key if $disable_word_collating;
120
121	22	100 100			18	if ($key =~ /.s$/) {
122	3	100			4	if ($key =~ /ies$/) {
123	1				2	$key =~ s/ies$/y/;
124						} else {
125	2				2	$key =~ s/s$//;
126						}
127						} elsif ($key =~ /.[^aeiou]ed$/) {
128	1				2	$key =~ s/ed$//;
129						}
130	22				19	return $key;
131						}
132
133						sub collate_key {
134	81			81	49	my ($key) = @_;
135	81				38	our $disable_word_collating;
136	81				29	my $char;
137	81	100			50	if ($disable_word_collating) {
138	8				8	$char = lc substr $key, 0, 1;
139						} else {
140	73				50	$key = lc $key;
141	73				41	$key =~ s/''+/'/g;
142	73				44	$key =~ s/'[sd]$//;
143	73				62	$key =~ s/^[^Ii]?'+(.*)/$1/;
144	73				32	$key =~ s/(.*?)'$/$1/;
145	73				60	$char = substr $key, 0, 1;
146						}
147	81				99	return ($key, $char);
148						}
149
150						sub load_expect {
151	12			12	465	my ($expect) = @_;
152	12				8	our %expected;
153	12				13	%expected = ();
154	12	50			94	if (open(EXPECT, '<:utf8', $expect)) {
155	12				54	while (my $word = <EXPECT>) {
156	43				60	$word =~ s/\R//;
157	43				75	$expected{$word} = 0;
158						}
159	12				32	close EXPECT;
160						}
161						}
162
163						sub harmonize_expect {
164	11			11	2	our $disable_word_collating;
165	11				5	our %letter_map;
166	11				5	our %expected;
167
168	11				16	for my $word (keys %expected) {
169	40				20	my ($key, $char) = collate_key $word;
170	40				29	my %word_map = ();
171	40	100			44	next unless defined $letter_map{$char}{$key};
172	13 13				4 19	%word_map = %{$letter_map{$char}{$key}};
173	13	100			19	next if defined $word_map{$word};
174	3				1	my $words = scalar keys %word_map;
175	3	50			3	next if $words > 2;
176	3	100			3	if ($word eq $key) {
177	1	50			1	next if ($words > 1);
178						}
179	2				3	delete $expected{$word};
180						}
181						}
182
183						sub group_related_words {
184	12			12	8	our %letter_map;
185	12				5	our $disable_word_collating;
186	12	100			28	return if $disable_word_collating;
187
188						# group related words
189	11				24	for my $char (sort CheckSpelling::Util::number_biased keys %letter_map) {
190	19 19				4 33	for my $plural_key (sort keys(%{$letter_map{$char}})) {
191	22				9	my $key = stem_word $plural_key;
192	22	100			19	next if $key eq $plural_key;
193	4	100			4	next unless defined $letter_map{$char}{$key};
194	3 3				2 4	my %word_map = %{$letter_map{$char}{$key}};
195	3 3				2 3	for my $word (keys(%{$letter_map{$char}{$plural_key}})) {
196	3				4	$word_map{$word} = 1;
197						}
198	3				3	$letter_map{$char}{$key} = \%word_map;
199	3				4	delete $letter_map{$char}{$plural_key};
200						}
201						}
202						}
203
204						sub count_warning {
205	16			16	14	my ($warning) = @_;
206	16				7	our %counters;
207	16				8	our %ignored_event_map;
208	16	100			27	if ($warning =~ /$([-\w]+)$$/) {
209	10				6	my ($code) = ($1);
210	10	50			11	next if defined $ignored_event_map{$code};
211	10				11	++$counters{$code};
212						}
213						}
214
215						sub report_timing {
216	0			0	0	my ($name, $start_time, $directory, $marker) = @_;
217	0				0	my $end_time = (stat "$directory/$marker")[9];
218	0				0	$name =~ s/"/\\"/g;
219	0				0	print TIMING_REPORT "\"$name\", $start_time, $end_time\n";
220						}
221
222						sub get_pattern_with_context {
223	24			24	11	my ($path) = @_;
224	24	50			24	return unless defined $ENV{$path};
225	24				26	$ENV{$path} =~ /(.*)/;
226	24	50			135	return unless open ITEMS, '<:utf8', $1;
227
228	24				35	my @items;
229	24				11	my $context = '';
230	24				88	while (<ITEMS>) {
231	5				5	my $pattern = $_;
232	5	100			6	if ($pattern =~ /^#/) {
233	2	50			3	if ($pattern =~ /^# /) {
234	2				2	$context .= $pattern;
235						} else {
236	0				0	$context = '';
237						}
238	2				4	next;
239						}
240	3				3	chomp $pattern;
241	3	100			4	unless ($pattern =~ /./) {
242	1				1	$context = '';
243	1				2	next;
244						}
245	2				3	push @items, $context.$pattern;
246	2				5	$context = '';
247						}
248	24				53	close ITEMS;
249	24				30	return @items;
250						}
251
252						sub summarize_totals {
253	24			24	17	my ($formatter, $path, $items, $totals, $file_counts) = @_;
254	24 24	100			13 21	return unless @{$totals};
255	2	50			58	return unless open my $fh, '>:utf8', $path;
256	2 2				2 1	my $totals_count = scalar(@{$totals}) - 1;
257	2				1	my @indices;
258	2	100			2	if ($file_counts) {
259						@indices = sort {
260	1 0	0			2 0	$totals->[$b] <=> $totals->[$a] \|\|
261						$file_counts->[$b] <=> $file_counts->[$a]
262						} 0 .. $totals_count;
263						} else {
264						@indices = sort {
265	1 0				1 0	$totals->[$b] <=> $totals->[$a]
266						} 0 .. $totals_count;
267						}
268	2				3	for my $i (@indices) {
269	2	50			1	last unless $totals->[$i] > 0;
270	2				2	my $rule_with_context = $items->[$i];
271	2				2	my ($description, $rule);
272	2	50			6	if ($rule_with_context =~ /^(.*\n)([^\n]+)$/s) {
273	2				2	($description, $rule) = ($1, $2);
274						} else {
275	0				0	($description, $rule) = ('', $rule_with_context);
276						}
277	2	100			4	print $fh $formatter->(
278						$totals->[$i],
279						($file_counts ? " file-count: $file_counts->[$i]" : ""),
280						$description,
281						$rule
282						);
283						}
284	2				61	close $fh;
285						}
286
287						sub get_special {
288	19			19	14	my ($file, $special) = @_;
289	19	100			23	return 'file-list' if $file eq $special->{'file_list'};
290	17	100			19	return 'pr-title' if $file eq $special->{'pr_title_file'};
291	15	100			13	return 'pr-description' if $file eq $special->{'pr_description_file'};
292	13	100			26	return 'commit-message' if !rindex($file, $special->{'commit_messages'});
293	11				14	return 'file';
294						}
295
296						sub main {
297	12			12	21549	my @directories;
298						my @cleanup_directories;
299	12				0	my @check_file_paths;
300
301	12				12	my $early_warnings = CheckSpelling::Util::get_file_from_env('early_warnings', '/dev/null');
302	12				9	my $warning_output = CheckSpelling::Util::get_file_from_env('warning_output', '/dev/stderr');
303	12				8	my $more_warnings = CheckSpelling::Util::get_file_from_env('more_warnings', '/dev/stderr');
304	12				9	my $counter_summary = CheckSpelling::Util::get_file_from_env('counter_summary', '/dev/stderr');
305	12				9	my $ignored_events = CheckSpelling::Util::get_file_from_env('ignored_events', '');
306	12	100			12	if ($ignored_events) {
307	6				4	our %ignored_event_map;
308	6				6	for my $event (split /,/, $ignored_events) {
309	6				6	$ignored_event_map{$event} = 1;
310						}
311						}
312	12				12	my $should_exclude_file = CheckSpelling::Util::get_file_from_env('should_exclude_file', '/dev/null');
313	12				12	my $unknown_word_limit = CheckSpelling::Util::get_val_from_env('unknown_word_limit', undef);
314	12				6	my $unknown_file_word_limit = CheckSpelling::Util::get_val_from_env('unknown_file_word_limit', undef);
315	12				7	my $candidate_example_limit = CheckSpelling::Util::get_file_from_env('INPUT_CANDIDATE_EXAMPLE_LIMIT', '3');
316	12				8	my $disable_flags = CheckSpelling::Util::get_file_from_env('INPUT_DISABLE_CHECKS', '');
317	12				9	my $only_check_changed_files = CheckSpelling::Util::get_file_from_env('INPUT_ONLY_CHECK_CHANGED_FILES', '');
318	12				10	my $disable_noisy_file = $disable_flags =~ /(?:^\|,\|\s)noisy-file(?:,\|\s\|$)/;
319	12		67		29	our $disable_word_collating = $only_check_changed_files \|\| $disable_flags =~ /(?:^\|,\|\s)word-collating(?:,\|\s\|$)/;
320	12				9	my $file_list = CheckSpelling::Util::get_file_from_env('check_file_names', '');
321	12				10	my $pr_title_file = CheckSpelling::Util::get_file_from_env('pr_title_file', '');
322	12				10	my $pr_description_file = CheckSpelling::Util::get_file_from_env('pr_description_file', '');
323	12				9	my $commit_messages = CheckSpelling::Util::get_file_from_env('commit_messages', '');
324	12				9	my $timing_report = CheckSpelling::Util::get_file_from_env('timing_report', '');
325	12				19	my $special = {
326						'file_list' => $file_list,
327						'pr_title_file' => $pr_title_file,
328						'pr_description_file' => $pr_description_file,
329						'commit_messages' => $commit_messages,
330						};
331	12				7	my ($start_time, $end_time);
332
333	12				241	open WARNING_OUTPUT, '>:utf8', $warning_output;
334	12				163	open MORE_WARNINGS, '>:utf8', $more_warnings;
335	12				147	open COUNTER_SUMMARY, '>:utf8', $counter_summary;
336	12				92	open SHOULD_EXCLUDE, '>:utf8', $should_exclude_file;
337	12	50			11	if ($timing_report) {
338	0				0	open TIMING_REPORT, '>:utf8', $timing_report;
339	0				0	print TIMING_REPORT "file, start, finish\n";
340						}
341
342	12				10	my @candidates = get_pattern_with_context('candidates_path');
343	12				9	my @candidate_totals = (0) x scalar @candidates;
344	12				7	my @candidate_file_counts = (0) x scalar @candidates;
345
346	12				8	my @forbidden = get_pattern_with_context('forbidden_path');
347	12				6	my @forbidden_totals = (0) x scalar @forbidden;
348
349	12				8	my @delayed_warnings;
350	12				19	our %letter_map = ();
351
352	12				11	my %file_map = ();
353
354	12				27	for my $directory (<>) {
355	15				13	chomp $directory;
356	15	50			26	next unless $directory =~ /^(.*)$/;
357	15				7	$directory = $1;
358	15	100			46	unless (-e $directory) {
359	1				3	print STDERR "Could not find: $directory\n";
360	1				1	next;
361						}
362	14	100			37	unless (-d $directory) {
363	1				10	print STDERR "Not a directory: $directory\n";
364	1				2	next;
365						}
366
367						# if there's no filename, we can't report
368	13	100			82	next unless open(NAME, '<:utf8', "$directory/name");
369	12				55	my $file=<NAME>;
370	12				25	close NAME;
371
372	12				46	$file_map{$file} = $directory;
373						}
374
375	12				19	for my $file (sort keys %file_map) {
376	12				11	my $directory = $file_map{$file};
377	12	50			9	if ($timing_report) {
378	0				0	$start_time = (stat "$directory/name")[9];
379						}
380
381	12	100			61	if (-e "$directory/skipped") {
382	1				7	open SKIPPED, '<:utf8', "$directory/skipped";
383	1				6	my $reason=<SKIPPED>;
384	1				3	close SKIPPED;
385	1				1	chomp $reason;
386	1				3	push @delayed_warnings, "$file:1:1 ... 1, Warning - Skipping `$file` because $reason\n";
387	1				4	print SHOULD_EXCLUDE "$file\n";
388	1				1	push @cleanup_directories, $directory;
389	1	50			1	report_timing($file, $start_time, $directory, 'skipped') if ($timing_report);
390	1				1	next;
391						}
392
393						# stats isn't written if there was nothing interesting in the file
394	11	100			33	unless (-s "$directory/stats") {
395	1	50			1	report_timing($file, $start_time, $directory, 'warnings') if ($timing_report);
396	1				0	push @directories, $directory;
397	1				1	next;
398						}
399
400	10	100			7	if ($file eq $file_list) {
401	1				5	open FILE_LIST, '<:utf8', $file_list;
402	1				1	push @check_file_paths, '0 placeholder';
403	1				6	for my $check_file_path (<FILE_LIST>) {
404	4				3	chomp $check_file_path;
405	4				3	push @check_file_paths, $check_file_path;
406						}
407	1				3	close FILE_LIST;
408						}
409
410	10				7	my ($words, $unrecognized, $unknown, $unique);
411
412						{
413	10 10				4 55	open STATS, '<:utf8', "$directory/stats";
414	10				39	my $stats=<STATS>;
415	10				19	close STATS;
416	10				9	$words=get_field($stats, 'words');
417	10				9	$unrecognized=get_field($stats, 'unrecognized');
418	10				7	$unknown=get_field($stats, 'unknown');
419	10				7	$unique=get_field($stats, 'unique');
420	10				3	my @candidate_list;
421	10	100			8	if (@candidate_totals) {
422	1				3	@candidate_list=get_array($stats, 'candidates');
423	1				1	my @lines=get_array($stats, 'candidate_lines');
424	1	50			1	if (@candidate_list) {
425	1				1	for (my $i=0; $i < scalar @candidate_list; $i++) {
426	1				1	my $hits = $candidate_list[$i];
427	1	50			1	if ($hits) {
428	1				1	$candidate_totals[$i] += $hits;
429	1	50			1	if ($candidate_file_counts[$i]++ < $candidate_example_limit) {
430	1				2	my $pattern = (split /\n/,$candidates[$i])[-1];
431	1				2	my $position = $lines[$i];
432	1				4	$position =~ s/:(\d+)$/ ... $1/;
433	1				1	my $wrapped = CheckSpelling::Util::truncate_with_ellipsis(CheckSpelling::Util::wrap_in_backticks($pattern), 99);
434	1				1	my $candidate_label = '';
435	1	50			2	if ($candidates[$i] =~ /^#\s+(\S.+)/) {
436	1				2	$candidate_label = " ($1)";
437						}
438	1				4	push @delayed_warnings, "$file:$position, Notice - Line matches candidate pattern$candidate_label $wrapped (candidate-pattern)\n";
439						}
440						}
441						}
442						}
443						}
444	10	100			11	if (@forbidden_totals) {
445	1				1	my @forbidden_list=get_array($stats, 'forbidden');
446	1				1	my @lines=get_array($stats, 'forbidden_lines');
447	1	50			1	if (@forbidden_list) {
448	1				1	for (my $i=0; $i < scalar @forbidden_list; $i++) {
449	1				1	my $hits = $forbidden_list[$i];
450	1	50			1	if ($hits) {
451	1				2	$forbidden_totals[$i] += $hits;
452						}
453						}
454						}
455						}
456						#print STDERR "$file (unrecognized: $unrecognized; unique: $unique; unknown: $unknown, words: $words, candidates: [".join(", ", @candidate_list)."])\n";
457						}
458
459	10	50			9	report_timing($file, $start_time, $directory, 'unknown') if ($timing_report);
460	10				10	my $kind = get_special($file, $special);
461						# These heuristics are very new and need tuning/feedback
462	10	100			11	if (
463						($unknown > $unique)
464						# \|\| ($unrecognized > $words / 2)
465						) {
466	1	50			1	unless ($disable_noisy_file) {
467	1	50			1	if ($kind eq 'file') {
468	1				2	print SHOULD_EXCLUDE "$file\n";
469						}
470	1				1	my $warning = "noisy-$kind";
471	1				1	count_warning $warning;
472	1				6	push @delayed_warnings, "$file:1:1 ... 1, Warning - Skipping `$file` because it seems to have more noise ($unknown) than unique words ($unique) (total: $unrecognized / $words). ($warning)\n";
473	1				0	push @cleanup_directories, $directory;
474	1				2	next;
475						}
476						}
477	9				6	push @directories, $directory;
478	9	100	100		45	unless ($kind =~ /^file/ && -s "$directory/unknown") {
479	4				6	next;
480						}
481	5				32	open UNKNOWN, '<:utf8', "$directory/unknown";
482	5				41	for my $token (<UNKNOWN>) {
483	43				44	$token =~ s/\R//;
484	43	100			37	next unless $token =~ /./;
485	41				26	my ($key, $char) = collate_key $token;
486	41	100			46	$letter_map{$char} = () unless defined $letter_map{$char};
487	41				21	my %word_map = ();
488	41 14	100			39 20	%word_map = %{$letter_map{$char}{$key}} if defined $letter_map{$char}{$key};
489	41				33	$word_map{$token} = 1;
490	41				49	$letter_map{$char}{$key} = \%word_map;
491						}
492	5				19	close UNKNOWN;
493						}
494	12				28	close SHOULD_EXCLUDE;
495	12	50			8	close TIMING_REPORT if $timing_report;
496
497						summarize_totals(
498						sub {
499	1			1	1	my ($hits, $files, $context, $pattern) = @_;
500	1				6	return "# hit-count: $hits$files\n$context$pattern\n\n",
501						},
502	12				31	CheckSpelling::Util::get_file_from_env('candidate_summary', '/dev/stderr'),
503						\@candidates,
504						\@candidate_totals,
505						\@candidate_file_counts,
506						);
507
508						summarize_totals(
509						sub {
510	1			1	1	my (undef, undef, $context, $pattern) = @_;
511	1				2	$context =~ s/^# //gm;
512	1				1	chomp $context;
513	1				1	my $details;
514	1	50			3	if ($context =~ /^(.?)$(.)/ms) {
515	1				1	($context, $details) = ($1, $2);
516	1	50			1	$details = "\n$details" if $details;
517						}
518	1	50			1	$context = 'Pattern' unless $context;
519	1				7	return "##### $context$details\n```\n$pattern\n```\n\n";
520						},
521	12				37	CheckSpelling::Util::get_file_from_env('forbidden_summary', '/dev/stderr'),
522						\@forbidden,
523						\@forbidden_totals,
524						);
525
526	12				29	group_related_words;
527
528	12	100			11	if (defined $ENV{'expect'}) {
529	11				10	$ENV{'expect'} =~ /(.*)/;
530	11				10	load_expect $1;
531	11				9	harmonize_expect;
532						}
533
534	12				10	my %seen = ();
535	12				4	our %counters;
536	12				10	%counters = ();
537
538	12	100			43	if (-s $early_warnings) {
539	1				7	open WARNINGS, '<:utf8', $early_warnings;
540	1				5	for my $warning (<WARNINGS>) {
541	1				1	chomp $warning;
542	1				2	count_warning $warning;
543	1	50			1	next if should_skip_warning $warning;
544	1				22	print WARNING_OUTPUT "$warning\n";
545						}
546	1				3	close WARNINGS;
547						}
548
549	12				5	our %last_seen;
550	12				5	my %unknown_file_word_count;
551	12				8	for my $directory (@directories) {
552	10	100			31	next unless (-s "$directory/warnings");
553	9	50			57	next unless open(NAME, '<:utf8', "$directory/name");
554	9				30	my $file=<NAME>;
555	9				20	close NAME;
556	9				7	my $kind = get_special($file, $special);
557	9				49	open WARNINGS, '<:utf8', "$directory/warnings";
558	9	100			8	if ($kind ne 'file-list') {
559	8				45	for my $warning (<WARNINGS>) {
560	50				30	chomp $warning;
561	50	100			110	if ($warning =~ m/:(\d+):(\d+ \.\.\. \d+): `(.*)`/) {
562	46				41	my ($line, $range, $item) = ($1, $2, $3);
563	46				35	my $wrapped = CheckSpelling::Util::wrap_in_backticks($item);
564	46				24	my $reason = 'unrecognized-spelling';
565	46	100			28	$reason .= "-$kind" unless $kind eq 'file';
566	46				101	$warning =~ s/:\d+:\d+ \.\.\. \d+: `.*`/:$line:$range, Warning - $wrapped is not a recognized word ($reason)/;
567	46	100			31	next if log_skip_item($item, $file, $warning, $unknown_word_limit);
568	13	50			10	count_warning $warning if $kind ne 'file';
569						} else {
570	4	50			4	if ($warning =~ /\`(.*?)\` in line $token-is-substring$/) {
571	0	0			0	next if skip_item($1);
572						}
573	4				3	count_warning $warning;
574						}
575	17	50			12	next if should_skip_warning $warning;
576	17				57	print WARNING_OUTPUT "$file$warning\n";
577						}
578						} else {
579	1				7	for my $warning (<WARNINGS>) {
580	6				4	chomp $warning;
581	6	50			14	next unless $warning =~ s/^:(\d+)/:1/;
582	6				7	$file = $check_file_paths[$1];
583	6	100			12	if ($warning =~ m/:(\d+ \.\.\. \d+): `(.*)`/) {
584	4				6	my ($range, $item) = ($1, $2);
585	4				2	my $wrapped = CheckSpelling::Util::wrap_in_backticks($item);
586	4				11	$warning =~ s/:\d+ \.\.\. \d+: `.*`/:$range, Warning - $wrapped is not a recognized word (check-file-path)/;
587	4	50			2	next if skip_item($item);
588	4	50			4	if (defined $unknown_file_word_limit) {
589	4	100			5	next if ++$unknown_file_word_count{$item} > $unknown_file_word_limit;
590						}
591						}
592	5	100			3	next if should_skip_warning $warning;
593	4				16	print WARNING_OUTPUT "$file$warning\n";
594	4				4	count_warning $warning;
595						}
596						}
597	9				35	close WARNINGS;
598						}
599	12				194	close MORE_WARNINGS;
600
601	12				9	for my $warning (@delayed_warnings) {
602	3	50			2	next if should_skip_warning $warning;
603	3				3	count_warning $warning;
604	3				6	print WARNING_OUTPUT $warning;
605						}
606	12	100			11	if (defined $unknown_word_limit) {
607	1				2	for my $warned_word (sort keys %last_seen) {
608	1		50		6	my $warning_count = $seen{$warned_word} \|\| 0;
609	1	50			2	next unless $warning_count >= $unknown_word_limit;
610	0				0	my $warning = $last_seen{$warned_word};
611	0				0	$warning =~ s/\Q (unrecognized-spelling)\E/ -- found $warning_count times (limited-references)\n/;
612	0	0			0	next if should_skip_warning $warning;
613	0				0	print WARNING_OUTPUT $warning;
614	0				0	count_warning $warning;
615						}
616						}
617	12				258	close WARNING_OUTPUT;
618
619	12	100			15	if (%counters) {
620	3				3	my $continue='';
621	3				5	print COUNTER_SUMMARY "{\n";
622	3				8	for my $code (sort keys %counters) {
623	6				11	print COUNTER_SUMMARY qq<$continue"$code": $counters{$code}\n>;
624	6				5	$continue=',';
625						}
626	3				3	print COUNTER_SUMMARY "}\n";
627						}
628	12				84	close COUNTER_SUMMARY;
629
630						# display the current unknown
631	12				29	for my $char (sort keys %letter_map) {
632	43 43				20 102	for my $key (sort CheckSpelling::Util::case_biased keys(%{$letter_map{$char}})) {
633	24 24				15 37	my %word_map = %{$letter_map{$char}{$key}};
634	24				22	my @words = keys(%word_map);
635	24	100			17	if (scalar(@words) > 1) {
636	13 20	50			27 80	print $key." (".(join ", ", sort { length($a) <=> length($b) \|\| $a cmp $b } @words).")";
637						} else {
638	11				33	print $words[0];
639						}
640	24				92	print "\n";
641						}
642						}
643						}
644
645						1;