File Coverage

File:	lib/CheckSpelling/SpellingCollator.pm
Coverage:	89.7%

line	stmt	bran	cond	sub	time	code
1						#! --perl--
2
3						package CheckSpelling::SpellingCollator;
4
5						our $VERSION='0.1.0';
6	1 1			1	108216 3	use 5.022;
7	1 1 1			1	2 1 2	use utf8;
8	1 1 1			1	14 1 46	use feature 'unicode_strings';
9	1 1 1			1	1 3 15	use warnings;
10	1 1 1			1	2 0 22	use File::Path qw(remove_tree);
11	1 1 1			1	236 1 2993	use CheckSpelling::Util;
12
13						my %letter_map;
14						my %ignored_event_map;
15						my $disable_word_collating;
16
17						my %last_seen;
18
19						sub get_field {
20	40			40	34	my ($record, $field) = @_;
21	40	100			374	return 0 unless $record =~ (/\b$field:\s*(\d+)/);
22	30				33	return $1;
23						}
24
25						sub get_array {
26	4			4	3	my ($record, $field) = @_;
27	4	50			29	return () unless $record =~ (/\b$field: \[([^\]]+)\]/);
28	4				3	my $values = $1;
29	4				5	return split /\s,\s/, $values;
30						}
31
32						sub maybe {
33	7			7	7	my ($next, $value) = @_;
34	7	50	67		10	$next = $value unless $next && $next < $value;
35	7				4	return $next;
36						}
37
38						my %expected = ();
39						sub expect_item {
40	94			94	58	my ($item, $value) = @_;
41	94				48	$item =~ s/â€™/'/g;
42	94				34	our %expected;
43	94				52	my $next;
44	94	100 100			91	if (defined $expected{$item}) {
45	22				15	$next = $expected{$item};
46	22	100			16	$next = $value if $value < $next;
47						} elsif ($item =~ /^([A-Z])(.*)/) {
48	12				9	$item = $1 . lc $2;
49	12	100			10	if (defined $expected{$item}) {
50	2				1	$next = $expected{$item};
51	2				4	$next = maybe($next, $value + .1);
52						} else {
53	10				6	$item = lc $item;
54	10	100			7	if (defined $expected{$item}) {
55	5				5	$next = $expected{$item};
56	5				1	$next = maybe($next, $value + .2);
57						}
58						}
59						}
60	94	100			87	return 0 unless defined $next;
61	29				22	$expected{$item} = $next;
62	29				58	return $value;
63						}
64
65						sub skip_item {
66	50			50	29	my ($word) = @_;
67	50	100			24	return 1 if expect_item($word, 1);
68	32				20	my $key = lc $word;
69	32	50			16	return 2 if expect_item($key, 2);
70	32	100 100 100			49	if ($key =~ /.s$/) {
71	2	100			3	if ($key =~ /ies$/) {
72	1				4	$key =~ s/ies$/y/;
73						} else {
74	1				2	$key =~ s/s$//;
75						}
76						} elsif ($key =~ /^(.+[^aeiou])ed$/) {
77	1				1	$key = $1;
78						} elsif ($key =~ /^(.+)'[ds]$/) {
79	4				3	$key = $1;
80						} else {
81	25				21	return 0;
82						}
83	7	50			5	return 3 if expect_item($key, 3);
84	0				0	return 0;
85						}
86
87						sub should_skip_warning {
88	72			72	40	my ($warning) = @_;
89	72	100			95	if ($warning =~ /$([-\w]+)$$/) {
90	68				47	my ($code) = ($1);
91	68				22	our %ignored_event_map;
92	68	100			54	return 1 if $ignored_event_map{$code};
93						}
94	71				59	return 0;
95						}
96
97						sub log_skip_item {
98	46			46	57	my ($item, $file, $warning, $unknown_word_limit) = @_;
99	46	50			32	return 1 if should_skip_warning $warning;
100	46	100			23	return 1 if skip_item($item);
101	21				6	our %seen;
102	21				14	my $seen_count = $seen{$item};
103	21	100			14	if (defined $seen_count) {
104	8	100	100		10	if (!defined $unknown_word_limit \|\| ($seen_count++ < $unknown_word_limit)) {
105	7				31	print MORE_WARNINGS "$file$warning\n";
106						} else {
107	1				1	our %last_seen;
108	1				1	$last_seen{$item} = "$file$warning";
109						}
110	8				7	$seen{$item} = $seen_count;
111	8				15	return 1;
112						}
113	13				11	$seen{$item} = 1;
114	13				9	return 0;
115						}
116
117						sub stem_word {
118	22			22	9	my ($key) = @_;
119	22				14	our $disable_word_collating;
120	22	50			6	return $key if $disable_word_collating;
121
122	22	100 100			25	if ($key =~ /.s$/) {
123	3	100			3	if ($key =~ /ies$/) {
124	1				2	$key =~ s/ies$/y/;
125						} else {
126	2				2	$key =~ s/s$//;
127						}
128						} elsif ($key =~ /.[^aeiou]ed$/) {
129	1				2	$key =~ s/ed$//;
130						}
131	22				17	return $key;
132						}
133
134						sub collate_key {
135	81			81	54	my ($key) = @_;
136	81				37	our $disable_word_collating;
137	81				28	my $char;
138	81	100			48	if ($disable_word_collating) {
139	8				9	$char = lc substr $key, 0, 1;
140						} else {
141	73				50	$key = lc $key;
142	73				48	$key =~ s/''+/'/g;
143	73				38	$key =~ s/'[sd]$//;
144	73				40	$key =~ s/^[^Ii]?'+(.*)/$1/;
145	73				31	$key =~ s/(.*?)'$/$1/;
146	73				66	$char = substr $key, 0, 1;
147						}
148	81				100	return ($key, $char);
149						}
150
151						sub load_expect {
152	12			12	463	my ($expect) = @_;
153	12				6	our %expected;
154	12				16	%expected = ();
155	12	50			106	if (open(EXPECT, '<:utf8', $expect)) {
156	12				57	while (my $word = <EXPECT>) {
157	43				59	$word =~ s/\R//;
158	43				80	$expected{$word} = 0;
159						}
160	12				31	close EXPECT;
161						}
162						}
163
164						sub harmonize_expect {
165	11			11	7	our $disable_word_collating;
166	11				1	our %letter_map;
167	11				8	our %expected;
168
169	11				14	for my $word (keys %expected) {
170	40				26	my ($key, $char) = collate_key $word;
171	40				38	my %word_map = ();
172	40	100			49	next unless defined $letter_map{$char}{$key};
173	13 13				7 15	%word_map = %{$letter_map{$char}{$key}};
174	13	100			19	next if defined $word_map{$word};
175	3				1	my $words = scalar keys %word_map;
176	3	50			3	next if $words > 2;
177	3	100			1	if ($word eq $key) {
178	1	50			2	next if ($words > 1);
179						}
180	2				2	delete $expected{$word};
181						}
182						}
183
184						sub group_related_words {
185	12			12	4	our %letter_map;
186	12				7	our $disable_word_collating;
187	12	100			6	return if $disable_word_collating;
188
189						# group related words
190	11				29	for my $char (sort CheckSpelling::Util::number_biased keys %letter_map) {
191	19 19				9 17	for my $plural_key (sort keys(%{$letter_map{$char}})) {
192	22				17	my $key = stem_word $plural_key;
193	22	100			15	next if $key eq $plural_key;
194	4	100			5	next unless defined $letter_map{$char}{$key};
195	3 3				1 5	my %word_map = %{$letter_map{$char}{$key}};
196	3 3				1 3	for my $word (keys(%{$letter_map{$char}{$plural_key}})) {
197	3				2	$word_map{$word} = 1;
198						}
199	3				3	$letter_map{$char}{$key} = \%word_map;
200	3				3	delete $letter_map{$char}{$plural_key};
201						}
202						}
203						}
204
205						sub count_warning {
206	16			16	13	my ($warning) = @_;
207	16				7	our %counters;
208	16				7	our %ignored_event_map;
209	16	100			25	if ($warning =~ /$([-\w]+)$$/) {
210	10				7	my ($code) = ($1);
211	10	50			10	next if defined $ignored_event_map{$code};
212	10				11	++$counters{$code};
213						}
214						}
215
216						sub report_timing {
217	0			0	0	my ($name, $start_time, $directory, $marker) = @_;
218	0				0	my $end_time = (stat "$directory/$marker")[9];
219	0				0	$name =~ s/"/\\"/g;
220	0				0	print TIMING_REPORT "\"$name\", $start_time, $end_time\n";
221						}
222
223						sub get_pattern_with_context {
224	24			24	19	my ($path) = @_;
225	24	50			19	return unless defined $ENV{$path};
226	24				22	$ENV{$path} =~ /(.*)/;
227	24	50			137	return unless open ITEMS, '<:utf8', $1;
228
229	24				13	my @items;
230	24				11	my $context = '';
231	24				89	while (<ITEMS>) {
232	5				6	my $pattern = $_;
233	5	100			5	if ($pattern =~ /^#/) {
234	2	50			3	if ($pattern =~ /^# /) {
235	2				4	$context .= $pattern;
236						} else {
237	0				0	$context = '';
238						}
239	2				3	next;
240						}
241	3				3	chomp $pattern;
242	3	100			4	unless ($pattern =~ /./) {
243	1				1	$context = '';
244	1				2	next;
245						}
246	2				3	push @items, $context.$pattern;
247	2				5	$context = '';
248						}
249	24				50	close ITEMS;
250	24				26	return @items;
251						}
252
253						sub summarize_totals {
254	24			24	21	my ($formatter, $path, $items, $totals, $file_counts) = @_;
255	24 24	100			8 26	return unless @{$totals};
256	2	50			62	return unless open my $fh, '>:utf8', $path;
257	2 2				0 2	my $totals_count = scalar(@{$totals}) - 1;
258	2				2	my @indices;
259	2	100			1	if ($file_counts) {
260						@indices = sort {
261	1 0	0			2 0	$totals->[$b] <=> $totals->[$a] \|\|
262						$file_counts->[$b] <=> $file_counts->[$a]
263						} 0 .. $totals_count;
264						} else {
265						@indices = sort {
266	1 0				1 0	$totals->[$b] <=> $totals->[$a]
267						} 0 .. $totals_count;
268						}
269	2				3	for my $i (@indices) {
270	2	50			2	last unless $totals->[$i] > 0;
271	2				2	my $rule_with_context = $items->[$i];
272	2				2	my ($description, $rule);
273	2	50			6	if ($rule_with_context =~ /^(.*\n)([^\n]+)$/s) {
274	2				3	($description, $rule) = ($1, $2);
275						} else {
276	0				0	($description, $rule) = ('', $rule_with_context);
277						}
278	2	100			3	print $fh $formatter->(
279						$totals->[$i],
280						($file_counts ? " file-count: $file_counts->[$i]" : ""),
281						$description,
282						$rule
283						);
284						}
285	2				63	close $fh;
286						}
287
288						sub get_special {
289	19			19	17	my ($file, $special) = @_;
290	19	100			24	return 'file-list' if $file eq $special->{'file_list'};
291	17	100			16	return 'pr-title' if $file eq $special->{'pr_title_file'};
292	15	100			15	return 'pr-description' if $file eq $special->{'pr_description_file'};
293	13	100			25	return 'commit-message' if !rindex($file, $special->{'commit_messages'});
294	11				14	return 'file';
295						}
296
297						sub main {
298	12			12	22596	my @directories;
299						my @cleanup_directories;
300	12				0	my @check_file_paths;
301
302	12				13	my $early_warnings = CheckSpelling::Util::get_file_from_env('early_warnings', '/dev/null');
303	12				11	my $warning_output = CheckSpelling::Util::get_file_from_env('warning_output', '/dev/stderr');
304	12				8	my $more_warnings = CheckSpelling::Util::get_file_from_env('more_warnings', '/dev/stderr');
305	12				7	my $counter_summary = CheckSpelling::Util::get_file_from_env('counter_summary', '/dev/stderr');
306	12				6	my $ignored_events = CheckSpelling::Util::get_file_from_env('ignored_events', '');
307	12	100			8	if ($ignored_events) {
308	6				6	our %ignored_event_map;
309	6				6	for my $event (split /,/, $ignored_events) {
310	6				6	$ignored_event_map{$event} = 1;
311						}
312						}
313	12				11	my $should_exclude_file = CheckSpelling::Util::get_file_from_env('should_exclude_file', '/dev/null');
314	12				10	my $unknown_word_limit = CheckSpelling::Util::get_val_from_env('unknown_word_limit', undef);
315	12				10	my $unknown_file_word_limit = CheckSpelling::Util::get_val_from_env('unknown_file_word_limit', undef);
316	12				10	my $candidate_example_limit = CheckSpelling::Util::get_file_from_env('INPUT_CANDIDATE_EXAMPLE_LIMIT', '3');
317	12				10	my $disable_flags = CheckSpelling::Util::get_file_from_env('INPUT_DISABLE_CHECKS', '');
318	12				9	my $only_check_changed_files = CheckSpelling::Util::get_file_from_env('INPUT_ONLY_CHECK_CHANGED_FILES', '');
319	12				10	my $disable_noisy_file = $disable_flags =~ /(?:^\|,\|\s)noisy-file(?:,\|\s\|$)/;
320	12		67		30	our $disable_word_collating = $only_check_changed_files \|\| $disable_flags =~ /(?:^\|,\|\s)word-collating(?:,\|\s\|$)/;
321	12				8	my $file_list = CheckSpelling::Util::get_file_from_env('check_file_names', '');
322	12				7	my $pr_title_file = CheckSpelling::Util::get_file_from_env('pr_title_file', '');
323	12				6	my $pr_description_file = CheckSpelling::Util::get_file_from_env('pr_description_file', '');
324	12				7	my $commit_messages = CheckSpelling::Util::get_file_from_env('commit_messages', '');
325	12				7	my $timing_report = CheckSpelling::Util::get_file_from_env('timing_report', '');
326	12				20	my $special = {
327						'file_list' => $file_list,
328						'pr_title_file' => $pr_title_file,
329						'pr_description_file' => $pr_description_file,
330						'commit_messages' => $commit_messages,
331						};
332	12				5	my ($start_time, $end_time);
333
334	12				248	open WARNING_OUTPUT, '>:utf8', $warning_output;
335	12				197	open MORE_WARNINGS, '>:utf8', $more_warnings;
336	12				148	open COUNTER_SUMMARY, '>:utf8', $counter_summary;
337	12				96	open SHOULD_EXCLUDE, '>:utf8', $should_exclude_file;
338	12	50			10	if ($timing_report) {
339	0				0	open TIMING_REPORT, '>:utf8', $timing_report;
340	0				0	print TIMING_REPORT "file, start, finish\n";
341						}
342
343	12				9	my @candidates = get_pattern_with_context('candidates_path');
344	12				9	my @candidate_totals = (0) x scalar @candidates;
345	12				7	my @candidate_file_counts = (0) x scalar @candidates;
346
347	12				7	my @forbidden = get_pattern_with_context('forbidden_path');
348	12				9	my @forbidden_totals = (0) x scalar @forbidden;
349
350	12				5	my @delayed_warnings;
351	12				19	our %letter_map = ();
352
353	12				11	my %file_map = ();
354
355	12				26	for my $directory (<>) {
356	15				15	chomp $directory;
357	15	50			23	next unless $directory =~ /^(.*)$/;
358	15				10	$directory = $1;
359	15	100			48	unless (-e $directory) {
360	1				3	print STDERR "Could not find: $directory\n";
361	1				1	next;
362						}
363	14	100			51	unless (-d $directory) {
364	1				12	print STDERR "Not a directory: $directory\n";
365	1				1	next;
366						}
367
368						# if there's no filename, we can't report
369	13	100			84	next unless open(NAME, '<:utf8', "$directory/name");
370	12				85	my $file=<NAME>;
371	12				23	close NAME;
372
373	12				26	$file_map{$file} = $directory;
374						}
375
376	12				21	for my $file (sort keys %file_map) {
377	12				10	my $directory = $file_map{$file};
378	12	50			10	if ($timing_report) {
379	0				0	$start_time = (stat "$directory/name")[9];
380						}
381
382	12	100			59	if (-e "$directory/skipped") {
383	1				7	open SKIPPED, '<:utf8', "$directory/skipped";
384	1				8	my $reason=<SKIPPED>;
385	1				2	close SKIPPED;
386	1				1	chomp $reason;
387	1				3	push @delayed_warnings, "$file:1:1 ... 1, Warning - Skipping `$file` because $reason\n";
388	1				4	print SHOULD_EXCLUDE "$file\n";
389	1				1	push @cleanup_directories, $directory;
390	1	50			1	report_timing($file, $start_time, $directory, 'skipped') if ($timing_report);
391	1				1	next;
392						}
393
394						# stats isn't written if there was nothing interesting in the file
395	11	100			33	unless (-s "$directory/stats") {
396	1	50			1	report_timing($file, $start_time, $directory, 'warnings') if ($timing_report);
397	1				0	push @directories, $directory;
398	1				1	next;
399						}
400
401	10	100			10	if ($file eq $file_list) {
402	1				6	open FILE_LIST, '<:utf8', $file_list;
403	1				1	push @check_file_paths, '0 placeholder';
404	1				6	for my $check_file_path (<FILE_LIST>) {
405	4				3	chomp $check_file_path;
406	4				3	push @check_file_paths, $check_file_path;
407						}
408	1				3	close FILE_LIST;
409						}
410
411	10				6	my ($words, $unrecognized, $unknown, $unique);
412
413						{
414	10 10				4 54	open STATS, '<:utf8', "$directory/stats";
415	10				44	my $stats=<STATS>;
416	10				16	close STATS;
417	10				8	$words=get_field($stats, 'words');
418	10				11	$unrecognized=get_field($stats, 'unrecognized');
419	10				6	$unknown=get_field($stats, 'unknown');
420	10				10	$unique=get_field($stats, 'unique');
421	10				4	my @candidate_list;
422	10	100			7	if (@candidate_totals) {
423	1				4	@candidate_list=get_array($stats, 'candidates');
424	1				1	my @lines=get_array($stats, 'candidate_lines');
425	1	50			1	if (@candidate_list) {
426	1				1	for (my $i=0; $i < scalar @candidate_list; $i++) {
427	1				1	my $hits = $candidate_list[$i];
428	1	50			1	if ($hits) {
429	1				1	$candidate_totals[$i] += $hits;
430	1	50			1	if ($candidate_file_counts[$i]++ < $candidate_example_limit) {
431	1				2	my $pattern = (split /\n/,$candidates[$i])[-1];
432	1				1	my $position = $lines[$i];
433	1				5	$position =~ s/:(\d+)$/ ... $1/;
434	1				1	my $wrapped = CheckSpelling::Util::truncate_with_ellipsis(CheckSpelling::Util::wrap_in_backticks($pattern), 99);
435	1				1	my $candidate_label = '';
436	1	50			3	if ($candidates[$i] =~ /^#\s+(\S.+)/) {
437	1				6	$candidate_label = " ($1)";
438						}
439	1				5	push @delayed_warnings, "$file:$position, Notice - Line matches candidate pattern$candidate_label $wrapped (candidate-pattern)\n";
440						}
441						}
442						}
443						}
444						}
445	10	100			13	if (@forbidden_totals) {
446	1				1	my @forbidden_list=get_array($stats, 'forbidden');
447	1				1	my @lines=get_array($stats, 'forbidden_lines');
448	1	50			2	if (@forbidden_list) {
449	1				1	for (my $i=0; $i < scalar @forbidden_list; $i++) {
450	1				1	my $hits = $forbidden_list[$i];
451	1	50			1	if ($hits) {
452	1				2	$forbidden_totals[$i] += $hits;
453						}
454						}
455						}
456						}
457						#print STDERR "$file (unrecognized: $unrecognized; unique: $unique; unknown: $unknown, words: $words, candidates: [".join(", ", @candidate_list)."])\n";
458						}
459
460	10	50			9	report_timing($file, $start_time, $directory, 'unknown') if ($timing_report);
461	10				7	my $kind = get_special($file, $special);
462						# These heuristics are very new and need tuning/feedback
463	10	100			14	if (
464						($unknown > $unique)
465						# \|\| ($unrecognized > $words / 2)
466						) {
467	1	50			2	unless ($disable_noisy_file) {
468	1	50			2	if ($kind eq 'file') {
469	1				5	print SHOULD_EXCLUDE "$file\n";
470						}
471	1				1	my $warning = "noisy-$kind";
472	1				1	count_warning $warning;
473	1				1	push @delayed_warnings, "$file:1:1 ... 1, Warning - Skipping `$file` because it seems to have more noise ($unknown) than unique words ($unique) (total: $unrecognized / $words). ($warning)\n";
474	1				1	push @cleanup_directories, $directory;
475	1				1	next;
476						}
477						}
478	9				4	push @directories, $directory;
479	9	100	100		42	unless ($kind =~ /^file/ && -s "$directory/unknown") {
480	4				6	next;
481						}
482	5				35	open UNKNOWN, '<:utf8', "$directory/unknown";
483	5				42	for my $token (<UNKNOWN>) {
484	43				39	$token =~ s/\R//;
485	43	100			41	next unless $token =~ /./;
486	41				24	my ($key, $char) = collate_key $token;
487	41	100			46	$letter_map{$char} = () unless defined $letter_map{$char};
488	41				20	my %word_map = ();
489	41 14	100			35 16	%word_map = %{$letter_map{$char}{$key}} if defined $letter_map{$char}{$key};
490	41				40	$word_map{$token} = 1;
491	41				51	$letter_map{$char}{$key} = \%word_map;
492						}
493	5				19	close UNKNOWN;
494						}
495	12				30	close SHOULD_EXCLUDE;
496	12	50			9	close TIMING_REPORT if $timing_report;
497
498						summarize_totals(
499						sub {
500	1			1	1	my ($hits, $files, $context, $pattern) = @_;
501	1				6	return "# hit-count: $hits$files\n$context$pattern\n\n",
502						},
503	12				31	CheckSpelling::Util::get_file_from_env('candidate_summary', '/dev/stderr'),
504						\@candidates,
505						\@candidate_totals,
506						\@candidate_file_counts,
507						);
508
509						summarize_totals(
510						sub {
511	1			1	1	my (undef, undef, $context, $pattern) = @_;
512	1				3	$context =~ s/^# //gm;
513	1				1	chomp $context;
514	1				0	my $details;
515	1	50			3	if ($context =~ /^(.?)$(.)/ms) {
516	1				2	($context, $details) = ($1, $2);
517	1	50			0	$details = "\n$details" if $details;
518						}
519	1	50			1	$context = 'Pattern' unless $context;
520	1				8	return "##### $context$details\n```\n$pattern\n```\n\n";
521						},
522	12				36	CheckSpelling::Util::get_file_from_env('forbidden_summary', '/dev/stderr'),
523						\@forbidden,
524						\@forbidden_totals,
525						);
526
527	12				34	group_related_words;
528
529	12	100			11	if (defined $ENV{'expect'}) {
530	11				8	$ENV{'expect'} =~ /(.*)/;
531	11				11	load_expect $1;
532	11				8	harmonize_expect;
533						}
534
535	12				8	my %seen = ();
536	12				7	our %counters;
537	12				6	%counters = ();
538
539	12	100			40	if (-s $early_warnings) {
540	1				6	open WARNINGS, '<:utf8', $early_warnings;
541	1				6	for my $warning (<WARNINGS>) {
542	1				1	chomp $warning;
543	1				1	count_warning $warning;
544	1	50			1	next if should_skip_warning $warning;
545	1				5	print WARNING_OUTPUT "$warning\n";
546						}
547	1				2	close WARNINGS;
548						}
549
550	12				7	our %last_seen;
551	12				8	my %unknown_file_word_count;
552	12				9	for my $directory (@directories) {
553	10	100			30	next unless (-s "$directory/warnings");
554	9	50			57	next unless open(NAME, '<:utf8', "$directory/name");
555	9				29	my $file=<NAME>;
556	9				18	close NAME;
557	9				7	my $kind = get_special($file, $special);
558	9				49	open WARNINGS, '<:utf8', "$directory/warnings";
559	9	100			6	if ($kind ne 'file-list') {
560	8				50	for my $warning (<WARNINGS>) {
561	50				31	chomp $warning;
562	50	100			91	if ($warning =~ m/:(\d+):(\d+ \.\.\. \d+): `(.*)`/) {
563	46				39	my ($line, $range, $item) = ($1, $2, $3);
564	46				36	my $wrapped = CheckSpelling::Util::wrap_in_backticks($item);
565	46				31	my $reason = 'unrecognized-spelling';
566	46	100			23	$reason .= "-$kind" unless $kind eq 'file';
567	46				114	$warning =~ s/:\d+:\d+ \.\.\. \d+: `.*`/:$line:$range, Warning - $wrapped is not a recognized word ($reason)/;
568	46	100			40	next if log_skip_item($item, $file, $warning, $unknown_word_limit);
569	13	50			11	count_warning $warning if $kind ne 'file';
570						} else {
571	4	50			4	if ($warning =~ /\`(.*?)\` in line $token-is-substring$/) {
572	0	0			0	next if skip_item($1);
573						}
574	4				4	count_warning $warning;
575						}
576	17	50			11	next if should_skip_warning $warning;
577	17				62	print WARNING_OUTPUT "$file$warning\n";
578						}
579						} else {
580	1				8	for my $warning (<WARNINGS>) {
581	6				3	chomp $warning;
582	6	50			13	next unless $warning =~ s/^:(\d+)/:1/;
583	6				5	$file = $check_file_paths[$1];
584	6	100			12	if ($warning =~ m/:(\d+ \.\.\. \d+): `(.*)`/) {
585	4				4	my ($range, $item) = ($1, $2);
586	4				3	my $wrapped = CheckSpelling::Util::wrap_in_backticks($item);
587	4				11	$warning =~ s/:\d+ \.\.\. \d+: `.*`/:$range, Warning - $wrapped is not a recognized word (check-file-path)/;
588	4	50			2	next if skip_item($item);
589	4	50			4	if (defined $unknown_file_word_limit) {
590	4	100			5	next if ++$unknown_file_word_count{$item} > $unknown_file_word_limit;
591						}
592						}
593	5	100			6	next if should_skip_warning $warning;
594	4				15	print WARNING_OUTPUT "$file$warning\n";
595	4				4	count_warning $warning;
596						}
597						}
598	9				32	close WARNINGS;
599						}
600	12				199	close MORE_WARNINGS;
601
602	12				12	for my $warning (@delayed_warnings) {
603	3	50			2	next if should_skip_warning $warning;
604	3				2	count_warning $warning;
605	3				5	print WARNING_OUTPUT $warning;
606						}
607	12	100			8	if (defined $unknown_word_limit) {
608	1				2	for my $warned_word (sort keys %last_seen) {
609	1		50		3	my $warning_count = $seen{$warned_word} \|\| 0;
610	1	50			1	next unless $warning_count >= $unknown_word_limit;
611	0				0	my $warning = $last_seen{$warned_word};
612	0				0	$warning =~ s/\Q (unrecognized-spelling)\E/ -- found $warning_count times (limited-references)\n/;
613	0	0			0	next if should_skip_warning $warning;
614	0				0	print WARNING_OUTPUT $warning;
615	0				0	count_warning $warning;
616						}
617						}
618	12				250	close WARNING_OUTPUT;
619
620	12	100			13	if (%counters) {
621	3				1	my $continue='';
622	3				5	print COUNTER_SUMMARY "{\n";
623	3				6	for my $code (sort keys %counters) {
624	6				9	print COUNTER_SUMMARY qq<$continue"$code": $counters{$code}\n>;
625	6				3	$continue=',';
626						}
627	3				3	print COUNTER_SUMMARY "}\n";
628						}
629	12				75	close COUNTER_SUMMARY;
630
631						# display the current unknown
632	12				31	for my $char (sort keys %letter_map) {
633	43 43				24 95	for my $key (sort CheckSpelling::Util::case_biased keys(%{$letter_map{$char}})) {
634	24 24				16 35	my %word_map = %{$letter_map{$char}{$key}};
635	24				18	my @words = keys(%word_map);
636	24	100			21	if (scalar(@words) > 1) {
637	13 20	50			7 72	print $key." (".(join ", ", sort { length($a) <=> length($b) \|\| $a cmp $b } @words).")";
638						} else {
639	11				36	print $words[0];
640						}
641	24				90	print "\n";
642						}
643						}
644						}
645
646						1;