File Coverage

File:	lib/CheckSpelling/SpellingCollator.pm
Coverage:	89.8%

line	stmt	bran	cond	sub	time	code
1						#! --perl--
2
3						package CheckSpelling::SpellingCollator;
4
5						our $VERSION='0.1.0';
6	1 1			1	109062 1	use 5.022;
7	1 1 1			1	1 1 2	use utf8;
8	1 1 1			1	16 1 49	use feature 'unicode_strings';
9	1 1 1			1	1 2 17	use warnings;
10	1 1 1			1	2 0 22	use File::Path qw(remove_tree);
11	1 1 1			1	232 0 3054	use CheckSpelling::Util;
12
13						my %letter_map;
14						my %ignored_event_map;
15						my $disable_word_collating;
16						my $shortest_word;
17
18						my %last_seen;
19
20						sub get_field {
21	40			40	32	my ($record, $field) = @_;
22	40	100			355	return 0 unless $record =~ (/\b$field:\s*(\d+)/);
23	30				37	return $1;
24						}
25
26						sub get_array {
27	4			4	4	my ($record, $field) = @_;
28	4	50			29	return () unless $record =~ (/\b$field: \[([^\]]+)\]/);
29	4				3	my $values = $1;
30	4				6	return split /\s,\s/, $values;
31						}
32
33						sub maybe {
34	7			7	5	my ($next, $value) = @_;
35	7	50	67		14	$next = $value unless $next && $next < $value;
36	7				6	return $next;
37						}
38
39						my %expected = ();
40						sub expect_item {
41	94			94	55	my ($item, $value) = @_;
42	94				71	$item =~ s/â€™/'/g;
43	94				35	our %expected;
44	94				33	my $next;
45	94	100 100			97	if (defined $expected{$item}) {
46	22				12	$next = $expected{$item};
47	22	100			17	$next = $value if $value < $next;
48						} elsif ($item =~ /^([A-Z])(.*)/) {
49	12				10	$item = $1 . lc $2;
50	12	100			11	if (defined $expected{$item}) {
51	2				2	$next = $expected{$item};
52	2				1	$next = maybe($next, $value + .1);
53						} else {
54	10				5	$item = lc $item;
55	10	100			11	if (defined $expected{$item}) {
56	5				2	$next = $expected{$item};
57	5				4	$next = maybe($next, $value + .2);
58						}
59						}
60						}
61	94	100			104	return 0 unless defined $next;
62	29				22	$expected{$item} = $next;
63	29				57	return $value;
64						}
65
66						sub skip_item {
67	50			50	28	my ($word) = @_;
68	50	100			27	return 1 if expect_item($word, 1);
69	32				23	my $key = lc $word;
70	32	50			39	return 2 if expect_item($key, 2);
71	32	100 100 100			35	if ($key =~ /.s$/) {
72	2	100			2	if ($key =~ /ies$/) {
73	1				2	$key =~ s/ies$/y/;
74						} else {
75	1				2	$key =~ s/s$//;
76						}
77						} elsif ($key =~ /^(.+[^aeiou])ed$/) {
78	1				1	$key = $1;
79						} elsif ($key =~ /^(.+)'[ds]$/) {
80	4				3	$key = $1;
81						} else {
82	25				23	return 0;
83						}
84	7	50			5	return 3 if expect_item($key, 3);
85	0				0	return 0;
86						}
87
88						sub should_skip_warning {
89	72			72	44	my ($warning) = @_;
90	72	100			96	if ($warning =~ /$([-\w]+)$$/) {
91	68				42	my ($code) = ($1);
92	68				29	our %ignored_event_map;
93	68	100			52	return 1 if $ignored_event_map{$code};
94						}
95	71				56	return 0;
96						}
97
98						sub log_skip_item {
99	46			46	57	my ($item, $file, $warning, $unknown_word_limit) = @_;
100	46	50			31	return 1 if should_skip_warning $warning;
101	46	100			26	return 1 if skip_item($item);
102	21				8	our %seen;
103	21				14	my $seen_count = $seen{$item};
104	21	100			16	if (defined $seen_count) {
105	8	100	100		11	if (!defined $unknown_word_limit \|\| ($seen_count++ < $unknown_word_limit)) {
106	7				31	print MORE_WARNINGS "$file$warning\n";
107						} else {
108	1				1	our %last_seen;
109	1				2	$last_seen{$item} = "$file$warning";
110						}
111	8				9	$seen{$item} = $seen_count;
112	8				13	return 1;
113						}
114	13				9	$seen{$item} = 1;
115	13				13	return 0;
116						}
117
118						sub stem_word {
119	28			28	858	my ($key) = @_;
120	28				9	our $disable_word_collating;
121	28	50			20	return $key if $disable_word_collating;
122	28				10	our $shortest_word;
123	28	100			18	$shortest_word = 2 unless defined $shortest_word;
124	28				16	my $key_length = length $key;
125
126	28	100 100	100		40	if ($key =~ /.s$/) {
127	5	100 100	67 100		26	if ($key_length > ($shortest_word + 1) && $key =~ /ies$/) {
128	1				2	$key =~ s/ies$/y/;
129						} elsif ($key_length > $shortest_word && $key !~ /ies$/) {
130	3				3	$key =~ s/s$//;
131						}
132						} elsif ($key_length > ($shortest_word + 1) && $key =~ /.[^aeiou]ed$/) {
133	2				4	$key =~ s/ed$//;
134						}
135	28				31	return $key;
136						}
137
138						sub collate_key {
139	81			81	55	my ($key) = @_;
140	81				26	our $disable_word_collating;
141	81				31	my $char;
142	81	100			50	if ($disable_word_collating) {
143	8				9	$char = lc substr $key, 0, 1;
144						} else {
145	73				48	$key = lc $key;
146	73				46	$key =~ s/''+/'/g;
147	73				42	$key =~ s/'[sd]$//;
148	73				34	$key =~ s/^[^Ii]?'+(.*)/$1/;
149	73				37	$key =~ s/(.*?)'$/$1/;
150	73				64	$char = substr $key, 0, 1;
151						}
152	81				101	return ($key, $char);
153						}
154
155						sub load_expect {
156	12			12	504	my ($expect) = @_;
157	12				24	our %expected;
158	12				13	%expected = ();
159	12	50			94	if (open(EXPECT, '<:utf8', $expect)) {
160	12				59	while (my $word = <EXPECT>) {
161	43				57	$word =~ s/\R//;
162	43				75	$expected{$word} = 0;
163						}
164	12				34	close EXPECT;
165						}
166						}
167
168						sub harmonize_expect {
169	11			11	4	our $disable_word_collating;
170	11				5	our %letter_map;
171	11				6	our %expected;
172
173	11				14	for my $word (keys %expected) {
174	40				15	my ($key, $char) = collate_key $word;
175	40				21	my %word_map = ();
176	40	100			46	next unless defined $letter_map{$char}{$key};
177	13 13				7 20	%word_map = %{$letter_map{$char}{$key}};
178	13	100			16	next if defined $word_map{$word};
179	3				3	my $words = scalar keys %word_map;
180	3	50			1	next if $words > 2;
181	3	100			3	if ($word eq $key) {
182	1	50			2	next if ($words > 1);
183						}
184	2				2	delete $expected{$word};
185						}
186						}
187
188						sub group_related_words {
189	12			12	6	our %letter_map;
190	12				6	our $disable_word_collating;
191	12	100			9	return if $disable_word_collating;
192
193						# group related words
194	11				22	for my $char (sort CheckSpelling::Util::number_biased keys %letter_map) {
195	19 19				9 19	for my $plural_key (sort keys(%{$letter_map{$char}})) {
196	22				11	my $key = stem_word $plural_key;
197	22	100			22	next if $key eq $plural_key;
198	4	100			5	next unless defined $letter_map{$char}{$key};
199	3 3				1 5	my %word_map = %{$letter_map{$char}{$key}};
200	3 3				2 3	for my $word (keys(%{$letter_map{$char}{$plural_key}})) {
201	3				2	$word_map{$word} = 1;
202						}
203	3				2	$letter_map{$char}{$key} = \%word_map;
204	3				5	delete $letter_map{$char}{$plural_key};
205						}
206						}
207						}
208
209						sub count_warning {
210	16			16	16	my ($warning) = @_;
211	16				6	our %counters;
212	16				7	our %ignored_event_map;
213	16	100			30	if ($warning =~ /$([-\w]+)$$/) {
214	10				7	my ($code) = ($1);
215	10	50			9	next if defined $ignored_event_map{$code};
216	10				11	++$counters{$code};
217						}
218						}
219
220						sub report_timing {
221	0			0	0	my ($name, $start_time, $directory, $marker) = @_;
222	0				0	my $end_time = (stat "$directory/$marker")[9];
223	0				0	$name =~ s/"/\\"/g;
224	0				0	print TIMING_REPORT "\"$name\", $start_time, $end_time\n";
225						}
226
227						sub get_pattern_with_context {
228	24			24	21	my ($path) = @_;
229	24	50			22	return unless defined $ENV{$path};
230	24				22	$ENV{$path} =~ /(.*)/;
231	24	50			135	return unless open ITEMS, '<:utf8', $1;
232
233	24				14	my @items;
234	24				16	my $context = '';
235	24				84	while (<ITEMS>) {
236	5				5	my $pattern = $_;
237	5	100			6	if ($pattern =~ /^#/) {
238	2	50			3	if ($pattern =~ /^# /) {
239	2				3	$context .= $pattern;
240						} else {
241	0				0	$context = '';
242						}
243	2				3	next;
244						}
245	3				2	chomp $pattern;
246	3	100			3	unless ($pattern =~ /./) {
247	1				1	$context = '';
248	1				2	next;
249						}
250	2				4	push @items, $context.$pattern;
251	2				5	$context = '';
252						}
253	24				49	close ITEMS;
254	24				26	return @items;
255						}
256
257						sub summarize_totals {
258	24			24	20	my ($formatter, $path, $items, $totals, $file_counts) = @_;
259	24 24	100			11 24	return unless @{$totals};
260	2	50			58	return unless open my $fh, '>:utf8', $path;
261	2 2				2 2	my $totals_count = scalar(@{$totals}) - 1;
262	2				1	my @indices;
263	2	100			2	if ($file_counts) {
264						@indices = sort {
265	1 0	0			2 0	$totals->[$b] <=> $totals->[$a] \|\|
266						$file_counts->[$b] <=> $file_counts->[$a]
267						} 0 .. $totals_count;
268						} else {
269						@indices = sort {
270	1 0				1 0	$totals->[$b] <=> $totals->[$a]
271						} 0 .. $totals_count;
272						}
273	2				2	for my $i (@indices) {
274	2	50			2	last unless $totals->[$i] > 0;
275	2				2	my $rule_with_context = $items->[$i];
276	2				2	my ($description, $rule);
277	2	50			6	if ($rule_with_context =~ /^(.*\n)([^\n]+)$/s) {
278	2				3	($description, $rule) = ($1, $2);
279						} else {
280	0				0	($description, $rule) = ('', $rule_with_context);
281						}
282	2	100			4	print $fh $formatter->(
283						$totals->[$i],
284						($file_counts ? " file-count: $file_counts->[$i]" : ""),
285						$description,
286						$rule
287						);
288						}
289	2				62	close $fh;
290						}
291
292						sub get_special {
293	19			19	13	my ($file, $special) = @_;
294	19	100			25	return 'file-list' if $file eq $special->{'file_list'};
295	17	100			18	return 'pr-title' if $file eq $special->{'pr_title_file'};
296	15	100			13	return 'pr-description' if $file eq $special->{'pr_description_file'};
297	13	100			26	return 'commit-message' if !rindex($file, $special->{'commit_messages'});
298	11				14	return 'file';
299						}
300
301						sub main {
302	12			12	22508	my @directories;
303						my @cleanup_directories;
304	12				0	my @check_file_paths;
305
306	12				12	my $early_warnings = CheckSpelling::Util::get_file_from_env('early_warnings', '/dev/null');
307	12				9	my $warning_output = CheckSpelling::Util::get_file_from_env('warning_output', '/dev/stderr');
308	12				9	my $more_warnings = CheckSpelling::Util::get_file_from_env('more_warnings', '/dev/stderr');
309	12				8	my $counter_summary = CheckSpelling::Util::get_file_from_env('counter_summary', '/dev/stderr');
310	12				8	my $ignored_events = CheckSpelling::Util::get_file_from_env('ignored_events', '');
311	12	100			12	if ($ignored_events) {
312	6				2	our %ignored_event_map;
313	6				8	for my $event (split /,/, $ignored_events) {
314	6				7	$ignored_event_map{$event} = 1;
315						}
316						}
317	12				9	my $should_exclude_file = CheckSpelling::Util::get_file_from_env('should_exclude_file', '/dev/null');
318	12				10	my $unknown_word_limit = CheckSpelling::Util::get_val_from_env('unknown_word_limit', undef);
319	12				7	my $unknown_file_word_limit = CheckSpelling::Util::get_val_from_env('unknown_file_word_limit', undef);
320	12				7	my $candidate_example_limit = CheckSpelling::Util::get_file_from_env('INPUT_CANDIDATE_EXAMPLE_LIMIT', '3');
321	12				6	my $disable_flags = CheckSpelling::Util::get_file_from_env('INPUT_DISABLE_CHECKS', '');
322	12				8	my $only_check_changed_files = CheckSpelling::Util::get_file_from_env('INPUT_ONLY_CHECK_CHANGED_FILES', '');
323	12				9	my $disable_noisy_file = $disable_flags =~ /(?:^\|,\|\s)noisy-file(?:,\|\s\|$)/;
324	12		67		28	our $disable_word_collating = $only_check_changed_files \|\| $disable_flags =~ /(?:^\|,\|\s)word-collating(?:,\|\s\|$)/;
325	12				11	our $shortest_word = CheckSpelling::Util::get_val_from_env('INPUT_SHORTEST_WORD', undef);
326	12				11	my $file_list = CheckSpelling::Util::get_file_from_env('check_file_names', '');
327	12				8	my $pr_title_file = CheckSpelling::Util::get_file_from_env('pr_title_file', '');
328	12				9	my $pr_description_file = CheckSpelling::Util::get_file_from_env('pr_description_file', '');
329	12				6	my $commit_messages = CheckSpelling::Util::get_file_from_env('commit_messages', '');
330	12				9	my $timing_report = CheckSpelling::Util::get_file_from_env('timing_report', '');
331	12				21	my $special = {
332						'file_list' => $file_list,
333						'pr_title_file' => $pr_title_file,
334						'pr_description_file' => $pr_description_file,
335						'commit_messages' => $commit_messages,
336						};
337	12				5	my ($start_time, $end_time);
338
339	12				236	open WARNING_OUTPUT, '>:utf8', $warning_output;
340	12				165	open MORE_WARNINGS, '>:utf8', $more_warnings;
341	12				145	open COUNTER_SUMMARY, '>:utf8', $counter_summary;
342	12				103	open SHOULD_EXCLUDE, '>:utf8', $should_exclude_file;
343	12	50			10	if ($timing_report) {
344	0				0	open TIMING_REPORT, '>:utf8', $timing_report;
345	0				0	print TIMING_REPORT "file, start, finish\n";
346						}
347
348	12				10	my @candidates = get_pattern_with_context('candidates_path');
349	12				12	my @candidate_totals = (0) x scalar @candidates;
350	12				7	my @candidate_file_counts = (0) x scalar @candidates;
351
352	12				6	my @forbidden = get_pattern_with_context('forbidden_path');
353	12				10	my @forbidden_totals = (0) x scalar @forbidden;
354
355	12				9	my @delayed_warnings;
356	12				23	our %letter_map = ();
357
358	12				5	my %file_map = ();
359
360	12				32	for my $directory (<>) {
361	15				11	chomp $directory;
362	15	50			24	next unless $directory =~ /^(.*)$/;
363	15				14	$directory = $1;
364	15	100			44	unless (-e $directory) {
365	1				3	print STDERR "Could not find: $directory\n";
366	1				1	next;
367						}
368	14	100			30	unless (-d $directory) {
369	1				11	print STDERR "Not a directory: $directory\n";
370	1				1	next;
371						}
372
373						# if there's no filename, we can't report
374	13	100			98	next unless open(NAME, '<:utf8', "$directory/name");
375	12				60	my $file=<NAME>;
376	12				23	close NAME;
377
378	12				23	$file_map{$file} = $directory;
379						}
380
381	12				21	for my $file (sort keys %file_map) {
382	12				9	my $directory = $file_map{$file};
383	12	50			10	if ($timing_report) {
384	0				0	$start_time = (stat "$directory/name")[9];
385						}
386
387	12	100			59	if (-e "$directory/skipped") {
388	1				6	open SKIPPED, '<:utf8', "$directory/skipped";
389	1				9	my $reason=<SKIPPED>;
390	1				2	close SKIPPED;
391	1				1	chomp $reason;
392	1				3	push @delayed_warnings, "$file:1:1 ... 1, Warning - Skipping `$file` because $reason\n";
393	1				2	print SHOULD_EXCLUDE "$file\n";
394	1				1	push @cleanup_directories, $directory;
395	1	50			1	report_timing($file, $start_time, $directory, 'skipped') if ($timing_report);
396	1				1	next;
397						}
398
399						# stats isn't written if there was nothing interesting in the file
400	11	100			34	unless (-s "$directory/stats") {
401	1	50			1	report_timing($file, $start_time, $directory, 'warnings') if ($timing_report);
402	1				1	push @directories, $directory;
403	1				1	next;
404						}
405
406	10	100			8	if ($file eq $file_list) {
407	1				6	open FILE_LIST, '<:utf8', $file_list;
408	1				0	push @check_file_paths, '0 placeholder';
409	1				6	for my $check_file_path (<FILE_LIST>) {
410	4				4	chomp $check_file_path;
411	4				3	push @check_file_paths, $check_file_path;
412						}
413	1				2	close FILE_LIST;
414						}
415
416	10				8	my ($words, $unrecognized, $unknown, $unique);
417
418						{
419	10 10				6 50	open STATS, '<:utf8', "$directory/stats";
420	10				41	my $stats=<STATS>;
421	10				16	close STATS;
422	10				9	$words=get_field($stats, 'words');
423	10				7	$unrecognized=get_field($stats, 'unrecognized');
424	10				6	$unknown=get_field($stats, 'unknown');
425	10				7	$unique=get_field($stats, 'unique');
426	10				3	my @candidate_list;
427	10	100			9	if (@candidate_totals) {
428	1				1	@candidate_list=get_array($stats, 'candidates');
429	1				1	my @lines=get_array($stats, 'candidate_lines');
430	1	50			1	if (@candidate_list) {
431	1				1	for (my $i=0; $i < scalar @candidate_list; $i++) {
432	1				1	my $hits = $candidate_list[$i];
433	1	50			1	if ($hits) {
434	1				1	$candidate_totals[$i] += $hits;
435	1	50			1	if ($candidate_file_counts[$i]++ < $candidate_example_limit) {
436	1				2	my $pattern = (split /\n/,$candidates[$i])[-1];
437	1				1	my $position = $lines[$i];
438	1				5	$position =~ s/:(\d+)$/ ... $1/;
439	1				1	my $wrapped = CheckSpelling::Util::truncate_with_ellipsis(CheckSpelling::Util::wrap_in_backticks($pattern), 99);
440	1				1	my $candidate_label = '';
441	1	50			2	if ($candidates[$i] =~ /^#\s+(\S.+)/) {
442	1				1	$candidate_label = " ($1)";
443						}
444	1				5	push @delayed_warnings, "$file:$position, Notice - Line matches candidate pattern$candidate_label $wrapped (candidate-pattern)\n";
445						}
446						}
447						}
448						}
449						}
450	10	100			28	if (@forbidden_totals) {
451	1				1	my @forbidden_list=get_array($stats, 'forbidden');
452	1				1	my @lines=get_array($stats, 'forbidden_lines');
453	1	50			1	if (@forbidden_list) {
454	1				1	for (my $i=0; $i < scalar @forbidden_list; $i++) {
455	1				1	my $hits = $forbidden_list[$i];
456	1	50			0	if ($hits) {
457	1				2	$forbidden_totals[$i] += $hits;
458						}
459						}
460						}
461						}
462						#print STDERR "$file (unrecognized: $unrecognized; unique: $unique; unknown: $unknown, words: $words, candidates: [".join(", ", @candidate_list)."])\n";
463						}
464
465	10	50			10	report_timing($file, $start_time, $directory, 'unknown') if ($timing_report);
466	10				9	my $kind = get_special($file, $special);
467						# These heuristics are very new and need tuning/feedback
468	10	100			11	if (
469						($unknown > $unique)
470						# \|\| ($unrecognized > $words / 2)
471						) {
472	1	50			1	unless ($disable_noisy_file) {
473	1	50			1	if ($kind eq 'file') {
474	1				5	print SHOULD_EXCLUDE "$file\n";
475						}
476	1				1	my $warning = "noisy-$kind";
477	1				1	count_warning $warning;
478	1				2	push @delayed_warnings, "$file:1:1 ... 1, Warning - Skipping `$file` because it seems to have more noise ($unknown) than unique words ($unique) (total: $unrecognized / $words). ($warning)\n";
479	1				0	push @cleanup_directories, $directory;
480	1				2	next;
481						}
482						}
483	9				6	push @directories, $directory;
484	9	100	100		38	unless ($kind =~ /^file/ && -s "$directory/unknown") {
485	4				6	next;
486						}
487	5				32	open UNKNOWN, '<:utf8', "$directory/unknown";
488	5				33	for my $token (<UNKNOWN>) {
489	43				48	$token =~ s/\R//;
490	43	100			35	next unless $token =~ /./;
491	41				40	my ($key, $char) = collate_key $token;
492	41	100			48	$letter_map{$char} = () unless defined $letter_map{$char};
493	41				24	my %word_map = ();
494	41 14	100			31 20	%word_map = %{$letter_map{$char}{$key}} if defined $letter_map{$char}{$key};
495	41				34	$word_map{$token} = 1;
496	41				49	$letter_map{$char}{$key} = \%word_map;
497						}
498	5				19	close UNKNOWN;
499						}
500	12				36	close SHOULD_EXCLUDE;
501	12	50			9	close TIMING_REPORT if $timing_report;
502
503						summarize_totals(
504						sub {
505	1			1	1	my ($hits, $files, $context, $pattern) = @_;
506	1				5	return "# hit-count: $hits$files\n$context$pattern\n\n",
507						},
508	12				36	CheckSpelling::Util::get_file_from_env('candidate_summary', '/dev/stderr'),
509						\@candidates,
510						\@candidate_totals,
511						\@candidate_file_counts,
512						);
513
514						summarize_totals(
515						sub {
516	1			1	1	my (undef, undef, $context, $pattern) = @_;
517	1				2	$context =~ s/^# //gm;
518	1				1	chomp $context;
519	1				1	my $details;
520	1	50			3	if ($context =~ /^(.?)$(.)/ms) {
521	1				1	($context, $details) = ($1, $2);
522	1	50			1	$details = "\n$details" if $details;
523						}
524	1	50			6	$context = 'Pattern' unless $context;
525	1				5	return "##### $context$details\n```\n$pattern\n```\n\n";
526						},
527	12				34	CheckSpelling::Util::get_file_from_env('forbidden_summary', '/dev/stderr'),
528						\@forbidden,
529						\@forbidden_totals,
530						);
531
532	12				35	group_related_words;
533
534	12	100			17	if (defined $ENV{'expect'}) {
535	11				10	$ENV{'expect'} =~ /(.*)/;
536	11				9	load_expect $1;
537	11				7	harmonize_expect;
538						}
539
540	12				9	my %seen = ();
541	12				4	our %counters;
542	12				9	%counters = ();
543
544	12	100			45	if (-s $early_warnings) {
545	1				5	open WARNINGS, '<:utf8', $early_warnings;
546	1				8	for my $warning (<WARNINGS>) {
547	1				1	chomp $warning;
548	1				1	count_warning $warning;
549	1	50			3	next if should_skip_warning $warning;
550	1				4	print WARNING_OUTPUT "$warning\n";
551						}
552	1				3	close WARNINGS;
553						}
554
555	12				6	our %last_seen;
556	12				4	my %unknown_file_word_count;
557	12				9	for my $directory (@directories) {
558	10	100			29	next unless (-s "$directory/warnings");
559	9	50			53	next unless open(NAME, '<:utf8', "$directory/name");
560	9				35	my $file=<NAME>;
561	9				16	close NAME;
562	9				8	my $kind = get_special($file, $special);
563	9				46	open WARNINGS, '<:utf8', "$directory/warnings";
564	9	100			9	if ($kind ne 'file-list') {
565	8				46	for my $warning (<WARNINGS>) {
566	50				38	chomp $warning;
567	50	100			90	if ($warning =~ m/:(\d+):(\d+ \.\.\. \d+): `(.*)`/) {
568	46				40	my ($line, $range, $item) = ($1, $2, $3);
569	46				36	my $wrapped = CheckSpelling::Util::wrap_in_backticks($item);
570	46				25	my $reason = 'unrecognized-spelling';
571	46	100			30	$reason .= "-$kind" unless $kind eq 'file';
572	46				100	$warning =~ s/:\d+:\d+ \.\.\. \d+: `.*`/:$line:$range, Warning - $wrapped is not a recognized word ($reason)/;
573	46	100			33	next if log_skip_item($item, $file, $warning, $unknown_word_limit);
574	13	50			11	count_warning $warning if $kind ne 'file';
575						} else {
576	4	50			4	if ($warning =~ /\`(.*?)\` in line $token-is-substring$/) {
577	0	0			0	next if skip_item($1);
578						}
579	4				4	count_warning $warning;
580						}
581	17	50			6	next if should_skip_warning $warning;
582	17				57	print WARNING_OUTPUT "$file$warning\n";
583						}
584						} else {
585	1				7	for my $warning (<WARNINGS>) {
586	6				6	chomp $warning;
587	6	50			10	next unless $warning =~ s/^:(\d+)/:1/;
588	6				6	$file = $check_file_paths[$1];
589	6	100			12	if ($warning =~ m/:(\d+ \.\.\. \d+): `(.*)`/) {
590	4				4	my ($range, $item) = ($1, $2);
591	4				4	my $wrapped = CheckSpelling::Util::wrap_in_backticks($item);
592	4				11	$warning =~ s/:\d+ \.\.\. \d+: `.*`/:$range, Warning - $wrapped is not a recognized word (check-file-path)/;
593	4	50			2	next if skip_item($item);
594	4	50			3	if (defined $unknown_file_word_limit) {
595	4	100			10	next if ++$unknown_file_word_count{$item} > $unknown_file_word_limit;
596						}
597						}
598	5	100			4	next if should_skip_warning $warning;
599	4				11	print WARNING_OUTPUT "$file$warning\n";
600	4				9	count_warning $warning;
601						}
602						}
603	9				34	close WARNINGS;
604						}
605	12				214	close MORE_WARNINGS;
606
607	12				11	for my $warning (@delayed_warnings) {
608	3	50			3	next if should_skip_warning $warning;
609	3				2	count_warning $warning;
610	3				3	print WARNING_OUTPUT $warning;
611						}
612	12	100			11	if (defined $unknown_word_limit) {
613	1				2	for my $warned_word (sort keys %last_seen) {
614	1		50		3	my $warning_count = $seen{$warned_word} \|\| 0;
615	1	50			2	next unless $warning_count >= $unknown_word_limit;
616	0				0	my $warning = $last_seen{$warned_word};
617	0				0	$warning =~ s/\Q (unrecognized-spelling)\E/ -- found $warning_count times (limited-references)\n/;
618	0	0			0	next if should_skip_warning $warning;
619	0				0	print WARNING_OUTPUT $warning;
620	0				0	count_warning $warning;
621						}
622						}
623	12				253	close WARNING_OUTPUT;
624
625	12	100			12	if (%counters) {
626	3				3	my $continue='';
627	3				5	print COUNTER_SUMMARY "{\n";
628	3				5	for my $code (sort keys %counters) {
629	6				9	print COUNTER_SUMMARY qq<$continue"$code": $counters{$code}\n>;
630	6				5	$continue=',';
631						}
632	3				3	print COUNTER_SUMMARY "}\n";
633						}
634	12				93	close COUNTER_SUMMARY;
635
636						# display the current unknown
637	12				25	for my $char (sort keys %letter_map) {
638	43 43				23 95	for my $key (sort CheckSpelling::Util::case_biased keys(%{$letter_map{$char}})) {
639	24 24				15 32	my %word_map = %{$letter_map{$char}{$key}};
640	24				19	my @words = keys(%word_map);
641	24	100			14	if (scalar(@words) > 1) {
642	13 20	50			39 84	print $key." (".(join ", ", sort { length($a) <=> length($b) \|\| $a cmp $b } @words).")";
643						} else {
644	11				36	print $words[0];
645						}
646	24				99	print "\n";
647						}
648						}
649						}
650
651						1;