File Coverage

File:	lib/CheckSpelling/Homoglyph.pm
Coverage:	79.2%

line	stmt	bran	sub	time	code
1					#! --perl--
2
3					package CheckSpelling::Homoglyph;
4
5					our $VERSION='0.1.0';
6					our $flatten=0;
7
8	1 1 1		1	2 1 2	use utf8;
9	1 1 1		1	15 1 260	use CheckSpelling::Util;
10
11					my %homoglyph_map;
12					my $homoglyphs;
13
14					my %homoglyph_to_glyph;
15
16					sub init {
17	14		14	11	my ($file) = @_;
18	1 1 1 14	50	1	2 1 11 190	return unless open (my $fh, '<:encoding(UTF-8)', $file);
19	14			780	my $ignore_pattern = CheckSpelling::Util::get_file_from_env_utf8('INPUT_IGNORE_PATTERN', q<[^a-zA-Z']>);
20	14			36	my $upper_pattern = CheckSpelling::Util::get_file_from_env_utf8('INPUT_UPPER_PATTERN', '[A-Z]');
21	14			25	my $lower_pattern = CheckSpelling::Util::get_file_from_env_utf8('INPUT_LOWER_PATTERN', '[a-z]');
22	14			22	my $not_lower_pattern = CheckSpelling::Util::get_file_from_env_utf8('INPUT_NOT_LOWER_PATTERN', '[^a-z]');
23	14			23	my $not_upper_or_lower_pattern = CheckSpelling::Util::get_file_from_env_utf8('INPUT_NOT_UPPER_OR_LOWER_PATTERN', '[^A-Za-z]');
24	14			23	my $punctuation_pattern = CheckSpelling::Util::get_file_from_env_utf8('INPUT_PUNCTUATION_PATTERN', q<'>);
25	14			35	local $/ = "\n";
26	14			105	while (<$fh>) {
27	686	50		825	next if /^#/;
28	686			324	s/^\\//;
29	686	50		642	next unless /(.)(.+)/;
30	686			588	my ($expected, $aliases) = ($1, $2);
31	686			748	my @chars = split('', $aliases);
32	686	50		780	next unless $expected =~ /$upper_pattern\|$lower_pattern\|$punctuation_pattern/;
33	686	50		682	next unless $expected =~ /$upper_pattern\|$lower_pattern/;
34	686			333	my @unexpected_chars;
35	686			349	for my $ch (@chars) {
36	16198	50		14473	if ($ch =~ /$ignore_pattern/) {
37	16198			6454	my %ref;
38	16198			6143	my $known_aliases = \%ref;
39	16198	100		10348	if (defined $homoglyph_map{$ch}) {
40	15041			7088	$known_aliases = $homoglyph_map{$ch};
41					}
42	16198			7933	$known_aliases->{$expected} = 1;
43	16198			12311	$homoglyph_map{$ch} = $known_aliases;
44					}
45					}
46					}
47	14			69	close $fh;
48	14			4314	my @glyphs = sort keys %homoglyph_map;
49	14			445	our $homoglyphs = join '', @glyphs;
50	14			8	our %homoglyph_to_glyph;
51	14			9	for my $ch (@glyphs) {
52	16198 16198			5986 9531	my @known_aliases = keys %{$homoglyph_map{$ch}};
53	16198	50		8272	if (scalar @known_aliases == 1) {
54	16198			10608	$homoglyph_to_glyph{$ch} = $known_aliases[0];
55					} else {
56	0				$homoglyph_to_glyph{$ch} = $ch;
57					}
58					}
59					}
60
61					sub dump_aliases {
62	0		0		my $a;
63	0				for my $ch (keys %homoglyph_map) {
64	0				$a = $ch;
65	0				my $b = $homoglyph_map{$a};
66	0				print "$a: ".(join " ", (sort keys %$b))."\n";
67					}
68	0				print "\n"."homoglyphs: $homoglyphs\n";
69					}
70					1;