File Coverage

File:lib/CheckSpelling/Homoglyph.pm
Coverage:79.2%

linestmtbrancondsubtimecode
1#! -*-perl-*-
2
3package CheckSpelling::Homoglyph;
4
5our $VERSION='0.1.0';
6our $flatten=0;
7
8
1
1
1
3
1
2
use utf8;
9
1
1
1
20
1
290
use CheckSpelling::Util;
10
11my %homoglyph_map;
12my $homoglyphs;
13
14my %homoglyph_to_glyph;
15
16sub init {
17
14
16
    my ($file) = @_;
18
1
1
1
14
2
1
9
211
    return unless open (my $fh, '<:encoding(UTF-8)', $file);
19
14
1028
    my $ignore_pattern = CheckSpelling::Util::get_file_from_env_utf8('INPUT_IGNORE_PATTERN', q<[^a-zA-Z']>);
20
14
42
    my $upper_pattern = CheckSpelling::Util::get_file_from_env_utf8('INPUT_UPPER_PATTERN', '[A-Z]');
21
14
34
    my $lower_pattern = CheckSpelling::Util::get_file_from_env_utf8('INPUT_LOWER_PATTERN', '[a-z]');
22
14
33
    my $not_lower_pattern = CheckSpelling::Util::get_file_from_env_utf8('INPUT_NOT_LOWER_PATTERN', '[^a-z]');
23
14
30
    my $not_upper_or_lower_pattern = CheckSpelling::Util::get_file_from_env_utf8('INPUT_NOT_UPPER_OR_LOWER_PATTERN', '[^A-Za-z]');
24
14
29
    my $punctuation_pattern = CheckSpelling::Util::get_file_from_env_utf8('INPUT_PUNCTUATION_PATTERN', q<'>);
25
14
43
    local $/ = "\n";
26
14
125
    while (<$fh>) {
27
686
864
        next if /^#/;
28
686
348
        s/^\\//;
29
686
716
        next unless /(.)(.+)/;
30
686
544
        my ($expected, $aliases) = ($1, $2);
31
686
808
        my @chars = split('', $aliases);
32
686
790
        next unless $expected =~ /$upper_pattern|$lower_pattern|$punctuation_pattern/;
33
686
707
        next unless $expected =~ /$upper_pattern|$lower_pattern/;
34
686
301
        my @unexpected_chars;
35
686
359
        for my $ch (@chars) {
36
16198
14161
            if ($ch =~ /$ignore_pattern/) {
37
16198
6423
                my %ref;
38
16198
6076
                my $known_aliases = \%ref;
39
16198
11208
                if (defined $homoglyph_map{$ch}) {
40
15041
6890
                    $known_aliases = $homoglyph_map{$ch};
41                }
42
16198
9199
                $known_aliases->{$expected} = 1;
43
16198
12542
                $homoglyph_map{$ch} = $known_aliases;
44            }
45        }
46    }
47
14
105
    close $fh;
48
14
4860
    my @glyphs = sort keys %homoglyph_map;
49
14
422
    our $homoglyphs = join '', @glyphs;
50
14
12
    our %homoglyph_to_glyph;
51
14
19
    for my $ch (@glyphs) {
52
16198
16198
6066
10190
        my @known_aliases = keys %{$homoglyph_map{$ch}};
53
16198
8568
        if (scalar @known_aliases == 1) {
54
16198
10882
            $homoglyph_to_glyph{$ch} = $known_aliases[0];
55        } else {
56
0
            $homoglyph_to_glyph{$ch} = $ch;
57        }
58    }
59}
60
61sub dump_aliases {
62
0
    my $a;
63
0
    for my $ch (keys %homoglyph_map) {
64
0
        $a = $ch;
65
0
        my $b = $homoglyph_map{$a};
66
0
        print "$a: ".(join " ", (sort keys %$b))."\n";
67    }
68
0
    print "\n"."homoglyphs: $homoglyphs\n";
69}
701;