File Coverage

File:	lib/CheckSpelling/SpellingCollator.pm
Coverage:	83.9%

line	stmt	bran	cond	sub	time	code
1						#! --perl--
2
3						package CheckSpelling::SpellingCollator;
4
5						our $VERSION='0.1.0';
6	1 1 1			1	105681 2 21	use warnings;
7	1 1 1			1	2 1 22	use File::Path qw(remove_tree);
8	1 1 1			1	194 0 2204	use CheckSpelling::Util;
9
10						my %letter_map;
11						my %ignored_event_map;
12						my $disable_word_collating;
13
14						my %last_seen;
15
16						sub get_field {
17	28			28	18	my ($record, $field) = @_;
18	28	100			313	return 0 unless $record =~ (/\b$field:\s*(\d+)/);
19	16				20	return $1;
20						}
21
22						sub get_array {
23	2			2	2	my ($record, $field) = @_;
24	2	50			15	return () unless $record =~ (/\b$field: \[([^\]]+)\]/);
25	2				2	my $values = $1;
26	2				3	return split /\s,\s/, $values;
27						}
28
29						sub maybe {
30	7			7	2	my ($next, $value) = @_;
31	7	50	67		14	$next = $value unless $next && $next < $value;
32	7				4	return $next;
33						}
34
35						my %expected = ();
36						sub expect_item {
37	98			98	57	my ($item, $value) = @_;
38	98				33	our %expected;
39	98				41	my $next;
40	98	100 100			107	if (defined $expected{$item}) {
41	26				14	$next = $expected{$item};
42	26	100			20	$next = $value if $value < $next;
43						} elsif ($item =~ /^([A-Z])(.*)/) {
44	12				10	$item = $1 . lc $2;
45	12	100			10	if (defined $expected{$item}) {
46	2				2	$next = $expected{$item};
47	2				1	$next = maybe($next, $value + .1);
48						} else {
49	10				6	$item = lc $item;
50	10	100			7	if (defined $expected{$item}) {
51	5				4	$next = $expected{$item};
52	5				6	$next = maybe($next, $value + .2);
53						}
54						}
55						}
56	98	100			82	return 0 unless defined $next;
57	33				24	$expected{$item} = $next;
58	33				62	return $value;
59						}
60
61						sub skip_item {
62	52			52	27	my ($word) = @_;
63	52	100			26	return 1 if expect_item($word, 1);
64	32				21	my $key = lc $word;
65	32	50			17	return 2 if expect_item($key, 2);
66	32	100 100 100			49	if ($key =~ /.s$/) {
67	2	100			2	if ($key =~ /ies$/) {
68	1				1	$key =~ s/ies$/y/;
69						} else {
70	1				2	$key =~ s/s$//;
71						}
72						} elsif ($key =~ /^(.+[^aeiou])ed$/) {
73	1				1	$key = $1;
74						} elsif ($key =~ /^(.+)'[ds]$/) {
75	6				3	$key = $1;
76						} else {
77	23				18	return 0;
78						}
79	9	50			8	return 3 if expect_item($key, 3);
80	0				0	return 0;
81						}
82
83						sub should_skip_warning {
84	69			69	48	my ($warning) = @_;
85	69	100			78	if ($warning =~ /$([-\w]+)$$/) {
86	68				42	my ($code) = ($1);
87	68				35	our %ignored_event_map;
88	68	100			46	return 1 if $ignored_event_map{$code};
89						}
90	68				57	return 0;
91						}
92
93						sub log_skip_item {
94	48			48	55	my ($item, $file, $warning, $unknown_word_limit) = @_;
95	48	50			28	return 1 if should_skip_warning $warning;
96	48	100			25	return 1 if skip_item($item);
97	19				14	my $seen_count = $seen{$item};
98	19	100			10	if (defined $seen_count) {
99	6	100	100		9	if (!defined $unknown_word_limit \|\| ($seen_count++ < $unknown_word_limit)) {
100	5				30	print MORE_WARNINGS "$file$warning\n"
101						} else {
102	1				0	our %last_seen;
103	1				2	$last_seen{$item} = "$file$warning";
104						}
105	6				7	$seen{$item} = $seen_count;
106	6				9	return 1;
107						}
108	13				7	$seen{$item} = 1;
109	13				15	return 0;
110						}
111
112						sub stem_word {
113	22			22	15	my ($key) = @_;
114	22				6	our $disable_word_collating;
115	22	50			15	return $key if $disable_word_collating;
116
117	22	100 100			16	if ($key =~ /.s$/) {
118	3	100			4	if ($key =~ /ies$/) {
119	1				1	$key =~ s/ies$/y/;
120						} else {
121	2				2	$key =~ s/s$//;
122						}
123						} elsif ($key =~ /.[^aeiou]ed$/) {
124	1				2	$key =~ s/ed$//;
125						}
126	22				23	return $key;
127						}
128
129						sub collate_key {
130	77			77	40	my ($key) = @_;
131	77				37	our $disable_word_collating;
132	77	100			38	if ($disable_word_collating) {
133	16				16	$char = lc substr $key, 0, 1;
134						} else {
135	61				39	$key = lc $key;
136	61				60	$key =~ s/''+/'/g;
137	61				30	$key =~ s/'[sd]$//;
138	61				33	$key =~ s/^[^Ii]?'+(.*)/$1/;
139	61				26	$key =~ s/(.*?)'$/$1/;
140	61				64	$char = substr $key, 0, 1;
141						}
142	77				85	return ($key, $char);
143						}
144
145						sub load_expect {
146	9			9	470	my ($expect) = @_;
147	9				2	our %expected;
148	9				10	%expected = ();
149	9	50			69	if (open(EXPECT, '<:utf8', $expect)) {
150	9				55	while ($word = <EXPECT>) {
151	34				52	$word =~ s/\R//;
152	34				82	$expected{$word} = 0;
153						}
154	9				24	close EXPECT;
155						}
156						}
157
158						sub harmonize_expect {
159	8			8	3	our $disable_word_collating;
160	8				3	our %letter_map;
161	8				5	our %expected;
162
163	8				10	for my $word (keys %expected) {
164	31				23	my ($key, $char) = collate_key $word;
165	31				20	my %word_map = ();
166	31	100			31	next unless defined $letter_map{$char}{$key};
167	15 15				7 18	%word_map = %{$letter_map{$char}{$key}};
168	15	100			16	next if defined $word_map{$word};
169	3				1	my $words = scalar keys %word_map;
170	3	50			2	next if $words > 2;
171	3	100			5	if ($word eq $key) {
172	1	50			1	next if ($words > 1);
173						}
174	2				3	delete $expected{$word};
175						}
176						}
177
178						sub group_related_words {
179	9			9	3	our %letter_map;
180	9				5	our $disable_word_collating;
181	9	100			7	return if $disable_word_collating;
182
183						# group related words
184	7				15	for my $char (sort CheckSpelling::Util::number_biased keys %letter_map) {
185	19 19				5 20	for my $plural_key (sort keys(%{$letter_map{$char}})) {
186	22				10	my $key = stem_word $plural_key;
187	22	100			22	next if $key eq $plural_key;
188	4	100			4	next unless defined $letter_map{$char}{$key};
189	3 3				1 5	my %word_map = %{$letter_map{$char}{$key}};
190	3 3				3 3	for $word (keys(%{$letter_map{$char}{$plural_key}})) {
191	3				3	$word_map{$word} = 1;
192						}
193	3				1	$letter_map{$char}{$key} = \%word_map;
194	3				4	delete $letter_map{$char}{$plural_key};
195						}
196						}
197						}
198
199						sub count_warning {
200	10			10	9	my ($warning) = @_;
201	10				5	our %counters;
202	10				4	our %ignored_event_map;
203	10	100			16	if ($warning =~ /$([-\w]+)$$/) {
204	8				6	my ($code) = ($1);
205	8	50			7	next if defined $ignored_event_map{$code};
206	8				9	++$counters{$code};
207						}
208						}
209
210						sub report_timing {
211	0			0	0	my ($name, $start_time, $directory, $marker) = @_;
212	0				0	my $end_time = (stat "$directory/$marker")[9];
213	0				0	$name =~ s/"/\\"/g;
214	0				0	print TIMING_REPORT "\"$name\", $start_time, $end_time\n";
215						}
216
217						sub get_pattern_with_context {
218	18			18	11	my ($path) = @_;
219	18	100			18	return unless defined $ENV{$path};
220	9				9	$ENV{$path} =~ /(.*)/;
221	9	50			55	return unless open ITEMS, '<:utf8', $1;
222
223	9				7	my @items;
224	9				2	my $context = '';
225	9				44	while (<ITEMS>) {
226	2				3	my $pattern = $_;
227	2	100			3	if ($pattern =~ /^#/) {
228	1	50			1	if ($pattern =~ /^# /) {
229	1				2	$context .= $pattern;
230						} else {
231	0				0	$context = '';
232						}
233	1				2	next;
234						}
235	1				1	chomp $pattern;
236	1	50			4	unless ($pattern =~ /./) {
237	0				0	$context = '';
238	0				0	next;
239						}
240	1				2	push @items, $context.$pattern;
241	1				3	$context = '';
242						}
243	9				21	close ITEMS;
244	9				11	return @items;
245						}
246
247						sub summarize_totals {
248	18			18	14	my ($formatter, $path, $items, $totals, $file_counts) = @_;
249	18 18	100			9 14	return unless @{$totals};
250	1	50			20	return unless open my $fh, '>:utf8', $path;
251	1 1				4 1	my $totals_count = scalar(@{$totals}) - 1;
252	1				1	my @indices;
253	1	50			1	if ($file_counts) {
254						@indices = sort {
255	0 0	0			0 0	$totals->[$b] <=> $totals->[$a] \|\|
256						$file_counts->[$b] <=> $file_counts->[$a]
257						} 0 .. $totals_count;
258						} else {
259						@indices = sort {
260	1 0				1 0	$totals->[$b] <=> $totals->[$a]
261						} 0 .. $totals_count;
262						}
263	1				1	for my $i (@indices) {
264	1	50			1	last unless $totals->[$i] > 0;
265	1				1	my $rule_with_context = $items->[$i];
266	1				1	my ($description, $rule);
267	1	50			2	if ($rule_with_context =~ /^(.*\n)([^\n]+)$/s) {
268	1				2	($description, $rule) = ($1, $2);
269						} else {
270	0				0	($description, $rule) = ('', $rule_with_context);
271						}
272	1	50			1	print $fh $formatter->(
273						$totals->[$i],
274						($file_counts ? " file-count: $file_counts->[$i]" : ""),
275						$description,
276						$rule
277						);
278						}
279	1				36	close $fh;
280						}
281
282						sub main {
283	9			9	15370	my @directories;
284						my @cleanup_directories;
285	9				0	my @check_file_paths;
286
287	9				10	my $early_warnings = CheckSpelling::Util::get_file_from_env('early_warnings', '/dev/null');
288	9				7	my $warning_output = CheckSpelling::Util::get_file_from_env('warning_output', '/dev/stderr');
289	9				4	my $more_warnings = CheckSpelling::Util::get_file_from_env('more_warnings', '/dev/stderr');
290	9				5	my $counter_summary = CheckSpelling::Util::get_file_from_env('counter_summary', '/dev/stderr');
291	9				5	my $ignored_events = CheckSpelling::Util::get_file_from_env('ignored_events', '');
292	9	100			9	if ($ignored_events) {
293	2				2	our %ignored_event_map;
294	2				2	for my $event (split /,/, $ignored_events) {
295	2				2	$ignored_event_map{$event} = 1;
296						}
297						}
298	9				4	my $should_exclude_file = CheckSpelling::Util::get_file_from_env('should_exclude_file', '/dev/null');
299	9				8	my $unknown_word_limit = CheckSpelling::Util::get_val_from_env('unknown_word_limit', undef);
300	9				6	my $unknown_file_word_limit = CheckSpelling::Util::get_val_from_env('unknown_file_word_limit', undef);
301	9				3	my $candidate_example_limit = CheckSpelling::Util::get_file_from_env('INPUT_CANDIDATE_EXAMPLE_LIMIT', '3');
302	9				5	my $disable_flags = CheckSpelling::Util::get_file_from_env('INPUT_DISABLE_CHECKS', '');
303	9				5	my $only_check_changed_files = CheckSpelling::Util::get_file_from_env('INPUT_ONLY_CHECK_CHANGED_FILES', '');
304	9				5	my $disable_noisy_file = $disable_flags =~ /(?:^\|,\|\s)noisy-file(?:,\|\s\|$)/;
305	9		67		26	our $disable_word_collating = $only_check_changed_files \|\| $disable_flags =~ /(?:^\|,\|\s)word-collating(?:,\|\s\|$)/;
306	9				5	my $file_list = CheckSpelling::Util::get_file_from_env('check_file_names', '');
307	9				5	my $timing_report = CheckSpelling::Util::get_file_from_env('timing_report', '');
308	9				6	my ($start_time, $end_time);
309
310	9				170	open WARNING_OUTPUT, '>:utf8', $warning_output;
311	9				126	open MORE_WARNINGS, '>:utf8', $more_warnings;
312	9				109	open COUNTER_SUMMARY, '>:utf8', $counter_summary;
313	9				64	open SHOULD_EXCLUDE, '>:utf8', $should_exclude_file;
314	9	50			9	if ($timing_report) {
315	0				0	open TIMING_REPORT, '>:utf8', $timing_report;
316	0				0	print TIMING_REPORT "file, start, finish\n";
317						}
318
319	9				9	my @candidates = get_pattern_with_context('candidates_path');
320	9				7	my @candidate_totals = (0) x scalar @candidates;
321	9				3	my @candidate_file_counts = (0) x scalar @candidates;
322
323	9				8	my @forbidden = get_pattern_with_context('forbidden_path');
324	9				7	my @forbidden_totals = (0) x scalar @forbidden;
325
326	9				4	my @delayed_warnings;
327	9				37	our %letter_map = ();
328
329	9				5	my %file_map = ();
330
331	9				20	for my $directory (<>) {
332	12				12	chomp $directory;
333	12	50			17	next unless $directory =~ /^(.*)$/;
334	12				12	$directory = $1;
335	12	100			36	unless (-e $directory) {
336	1				3	print STDERR "Could not find: $directory\n";
337	1				1	next;
338						}
339	11	100			26	unless (-d $directory) {
340	1				13	print STDERR "Not a directory: $directory\n";
341	1				1	next;
342						}
343
344						# if there's no filename, we can't report
345	10	100			85	next unless open(NAME, '<:utf8', "$directory/name");
346	9				45	my $file=<NAME>;
347	9				19	close NAME;
348
349	9				19	$file_map{$file} = $directory;
350						}
351
352	9				15	for my $file (sort keys %file_map) {
353	9				12	my $directory = $file_map{$file};
354	9	50			8	if ($timing_report) {
355	0				0	$start_time = (stat "$directory/name")[9];
356						}
357
358	9	100			42	if (-e "$directory/skipped") {
359	1				7	open SKIPPED, '<:utf8', "$directory/skipped";
360	1				9	my $reason=<SKIPPED>;
361	1				2	close SKIPPED;
362	1				1	chomp $reason;
363	1				9	push @delayed_warnings, "$file:1:1 ... 1, Warning - Skipping `$file` because $reason\n";
364	1				4	print SHOULD_EXCLUDE "$file\n";
365	1				1	push @cleanup_directories, $directory;
366	1	50			1	report_timing($file, $start_time, $directory, 'skipped') if ($timing_report);
367	1				1	next;
368						}
369
370						# stats isn't written if there was nothing interesting in the file
371	8	100			24	unless (-s "$directory/stats") {
372	1				1	push @directories, $directory;
373	1	50			1	report_timing($file, $start_time, $directory, 'warnings') if ($timing_report);
374	1				1	next;
375						}
376
377	7	100			7	if ($file eq $file_list) {
378	1				6	open FILE_LIST, '<:utf8', $file_list;
379	1				0	push @check_file_paths, '0 placeholder';
380	1				6	for my $check_file_path (<FILE_LIST>) {
381	4				3	chomp $check_file_path;
382	4				4	push @check_file_paths, $check_file_path;
383						}
384	1				3	close FILE_LIST;
385						}
386
387	7				6	my ($words, $unrecognized, $unknown, $unique);
388
389						{
390	7 7				1 38	open STATS, '<:utf8', "$directory/stats";
391	7				25	my $stats=<STATS>;
392	7				11	close STATS;
393	7				6	$words=get_field($stats, 'words');
394	7				7	$unrecognized=get_field($stats, 'unrecognized');
395	7				7	$unknown=get_field($stats, 'unknown');
396	7				5	$unique=get_field($stats, 'unique');
397	7				4	my @candidate_list;
398	7	50			6	if (@candidate_totals) {
399	0				0	@candidate_list=get_array($stats, 'candidates');
400	0				0	my @lines=get_array($stats, 'candidate_lines');
401	0	0			0	if (@candidate_list) {
402	0				0	for (my $i=0; $i < scalar @candidate_list; $i++) {
403	0				0	my $hits = $candidate_list[$i];
404	0	0			0	if ($hits) {
405	0				0	$candidate_totals[$i] += $hits;
406	0	0			0	if ($candidate_file_counts[$i]++ < $candidate_example_limit) {
407	0				0	my $pattern = (split /\n/,$candidates[$i])[-1];
408	0				0	my $position = $lines[$i];
409	0				0	$position =~ s/:(\d+)$/ ... $1/;
410	0				0	my $wrapped = CheckSpelling::Util::wrap_in_backticks($pattern);
411	0				0	push @delayed_warnings, "$file:$position, Notice - Line matches candidate pattern $wrapped (candidate-pattern)\n";
412						}
413						}
414						}
415						}
416						}
417	7	100			6	if (@forbidden_totals) {
418	1				1	@forbidden_list=get_array($stats, 'forbidden');
419	1				1	my @lines=get_array($stats, 'forbidden_lines');
420	1	50			1	if (@forbidden_list) {
421	1				1	for (my $i=0; $i < scalar @forbidden_list; $i++) {
422	1				1	my $hits = $forbidden_list[$i];
423	1	50			1	if ($hits) {
424	1				2	$forbidden_totals[$i] += $hits;
425						}
426						}
427						}
428						}
429						#print STDERR "$file (unrecognized: $unrecognized; unique: $unique; unknown: $unknown, words: $words, candidates: [".join(", ", @candidate_list)."])\n";
430						}
431
432	7	50			7	report_timing($file, $start_time, $directory, 'unknown') if ($timing_report);
433						# These heuristics are very new and need tuning/feedback
434	7	50			7	if (
435						($unknown > $unique)
436						# \|\| ($unrecognized > $words / 2)
437						) {
438	0	0			0	unless ($disable_noisy_file) {
439	0	0			0	if ($file eq $file_list) {
440	0				0	push @delayed_warnings, "$file:1:1 ... 1, Warning - Skipping file list because there seems to be more noise ($unknown) than unique words ($unique) (total: $unrecognized / $words). (noisy-file-list)\n";
441						} else {
442	0				0	push @delayed_warnings, "$file:1:1 ... 1, Warning - Skipping `$file` because it seems to have more noise ($unknown) than unique words ($unique) (total: $unrecognized / $words). (noisy-file)\n";
443	0				0	print SHOULD_EXCLUDE "$file\n";
444						}
445	0				0	push @directories, $directory;
446	0				0	next;
447						}
448						}
449	7	100			25	unless (-s "$directory/unknown") {
450	1				1	push @directories, $directory;
451	1				2	next;
452						}
453	6				38	open UNKNOWN, '<:utf8', "$directory/unknown";
454	6				42	for $token (<UNKNOWN>) {
455	49				47	$token =~ s/\R//;
456	49	100			46	next unless $token =~ /./;
457	46				27	my ($key, $char) = collate_key $token;
458	46	100			50	$letter_map{$char} = () unless defined $letter_map{$char};
459	46				27	my %word_map = ();
460	46 14	100			37 18	%word_map = %{$letter_map{$char}{$key}} if defined $letter_map{$char}{$key};
461	46				38	$word_map{$token} = 1;
462	46				57	$letter_map{$char}{$key} = \%word_map;
463						}
464	6				18	close UNKNOWN;
465	6				8	push @directories, $directory;
466						}
467	9				19	close SHOULD_EXCLUDE;
468	9	50			6	close TIMING_REPORT if $timing_report;
469
470						summarize_totals(
471						sub {
472	0			0	0	my ($hits, $files, $context, $pattern) = @_;
473	0				0	return "# hit-count: $hits$files\n$context$pattern\n\n",
474						},
475	9				24	CheckSpelling::Util::get_file_from_env('candidate_summary', '/dev/stderr'),
476						\@candidates,
477						\@candidate_totals,
478						\@candidate_file_counts,
479						);
480
481						summarize_totals(
482						sub {
483	1			1	1	my (undef, undef, $context, $pattern) = @_;
484	1				2	$context =~ s/^# //gm;
485	1				2	chomp $context;
486	1				0	my $details;
487	1	50			3	if ($context =~ /^(.?)$(.)/ms) {
488	1				17	($context, $details) = ($1, $2);
489	1	50			1	$details = "\n$details" if $details;
490						}
491	1	50			1	$context = 'Pattern' unless $context;
492	1				10	return "#### $context$details\n```\n$pattern\n```\n\n";
493						},
494	9				27	CheckSpelling::Util::get_file_from_env('forbidden_summary', '/dev/stderr'),
495						\@forbidden,
496						\@forbidden_totals,
497						);
498
499	9				26	group_related_words;
500
501	9	100			10	if (defined $ENV{'expect'}) {
502	8				7	$ENV{'expect'} =~ /(.*)/;
503	8				8	load_expect $1;
504	8				5	harmonize_expect;
505						}
506
507	9				7	my %seen = ();
508	9				4	our %counters;
509	9				5	%counters = ();
510
511	9	100			33	if (-s $early_warnings) {
512	1				6	open WARNINGS, '<:utf8', $early_warnings;
513	1				8	for my $warning (<WARNINGS>) {
514	1				1	chomp $warning;
515	1				1	count_warning $warning;
516	1	50			3	next if should_skip_warning $warning;
517	1				4	print WARNING_OUTPUT "$warning\n";
518						}
519	1				3	close WARNINGS;
520						}
521
522	9				3	our %last_seen;
523	9				5	my %unknown_file_word_count;
524	9				6	for my $directory (@directories) {
525	8	100			24	next unless (-s "$directory/warnings");
526	7	50			46	next unless open(NAME, '<:utf8', "$directory/name");
527	7				37	my $file=<NAME>;
528	7				14	close NAME;
529	7				7	my $is_file_list = $file eq $file_list;
530	7				35	open WARNINGS, '<:utf8', "$directory/warnings";
531	7	100			6	if (!$is_file_list) {
532	6				39	for $warning (<WARNINGS>) {
533	49				34	chomp $warning;
534	49	100			82	if ($warning =~ m/:(\d+):(\d+ \.\.\. \d+): `(.*)`/) {
535	48				64	my ($line, $range, $item) = ($1, $2, $3);
536	48				33	my $wrapped = CheckSpelling::Util::wrap_in_backticks($item);
537	48				95	$warning =~ s/:\d+:\d+ \.\.\. \d+: `.*`/:$line:$range, Warning - $wrapped is not a recognized word\. $unrecognized-spelling$/;
538	48	100			33	next if log_skip_item($item, $file, $warning, $unknown_word_limit);
539						} else {
540	1	50			1	if ($warning =~ /\`(.*?)\` in line\. $token-is-substring$/) {
541	0	0			0	next if skip_item($1);
542						}
543	1				1	count_warning $warning;
544						}
545	14	50			8	next if should_skip_warning $warning;
546	14				64	print WARNING_OUTPUT "$file$warning\n";
547						}
548						} else {
549	1				6	for $warning (<WARNINGS>) {
550	6				3	chomp $warning;
551	6	50			13	next unless $warning =~ s/^:(\d+)/:1/;
552	6				5	$file = $check_file_paths[$1];
553	6	100			12	if ($warning =~ m/:(\d+ \.\.\. \d+): `(.*)`/) {
554	4				2	my ($range, $item) = ($1, $2);
555	4				4	my $wrapped = CheckSpelling::Util::wrap_in_backticks($item);
556	4				9	$warning =~ s/:\d+ \.\.\. \d+: `.*`/:$range, Warning - $wrapped is not a recognized word. (check-file-path)/;
557	4	50			4	next if skip_item($item);
558	4	50			2	if (defined $unknown_file_word_limit) {
559	4	100			7	next if ++$unknown_file_word_count{$item} > $unknown_file_word_limit;
560						}
561						}
562	5	100			3	next if should_skip_warning $warning;
563	4				11	print WARNING_OUTPUT "$file$warning\n";
564	4				4	count_warning $warning;
565						}
566						}
567	7				28	close WARNINGS;
568						}
569	9				157	close MORE_WARNINGS;
570
571	9				7	for my $warning (@delayed_warnings) {
572	1	50			2	next if should_skip_warning $warning;
573	1				1	count_warning $warning;
574	1				1	print WARNING_OUTPUT $warning;
575						}
576	9	100			5	if (defined $unknown_word_limit) {
577	1				3	for my $warned_word (sort keys %last_seen) {
578	1		50		3	my $warning_count = $seen{$warned_word} \|\| 0;
579	1	50			2	next unless $warning_count >= $unknown_word_limit;
580	0				0	my $warning = $last_seen{$warned_word};
581	0				0	$warning =~ s/\Q. (unrecognized-spelling)\E/ -- found $warning_count times. (limited-references)\n/;
582	0	0			0	next if should_skip_warning $warning;
583	0				0	print WARNING_OUTPUT $warning;
584	0				0	count_warning $warning;
585						}
586						}
587	9				169	close WARNING_OUTPUT;
588
589	9	100			10	if (%counters) {
590	2				2	my $continue='';
591	2				3	print COUNTER_SUMMARY "{\n";
592	2				4	for my $code (sort keys %counters) {
593	4				5	print COUNTER_SUMMARY qq<$continue"$code": $counters{$code}\n>;
594	4				3	$continue=',';
595						}
596	2				2	print COUNTER_SUMMARY "}\n";
597						}
598	9				57	close COUNTER_SUMMARY;
599
600						# display the current unknown
601	9				22	for my $char (sort keys %letter_map) {
602	34 34				14 65	for $key (sort CheckSpelling::Util::case_biased keys(%{$letter_map{$char}})) {
603	29 29				12 37	my %word_map = %{$letter_map{$char}{$key}};
604	29				25	my @words = keys(%word_map);
605	29	100			20	if (scalar(@words) > 1) {
606	13 19	50			9 92	print $key." (".(join ", ", sort { length($a) <=> length($b) \|\| $a cmp $b } @words).")";
607						} else {
608	16				55	print $words[0];
609						}
610	29				95	print "\n";
611						}
612						}
613						}
614
615						1;