File Coverage

File:lib/CheckSpelling/Homoglyph.pm
Coverage:100.0%

linestmtbrancondsubtimecode
1#! -*-perl-*-
2
3package CheckSpelling::Homoglyph;
4
5our $VERSION='0.1.0';
6our $flatten=0;
7
8
2
2
2
99893
2
5
use utf8;
9
2
2
2
143
1
420
use CheckSpelling::Util;
10
11my %homoglyph_map;
12my $homoglyphs;
13
14my %homoglyph_to_glyph;
15
16sub init {
17
16
233
    my ($file) = @_;
18
16
1
1
1
204
2
1
3
    return unless open (my $fh, '<:encoding(UTF-8)', $file);
19
15
950
    my $upper_pattern = CheckSpelling::Util::get_file_from_env_utf8('INPUT_UPPER_PATTERN', '[A-Z]');
20
15
37
    my $lower_pattern = CheckSpelling::Util::get_file_from_env_utf8('INPUT_LOWER_PATTERN', '[a-z]');
21
15
33
    my $punctuation_pattern = CheckSpelling::Util::get_file_from_env_utf8('INPUT_PUNCTUATION_PATTERN', q<'>);
22
15
41
    local $/ = "\n";
23
15
130
    while (<$fh>) {
24
692
894
        next if /^#/;
25
691
371
        s/^\\//;
26
691
1083
        next unless /^($upper_pattern|$lower_pattern|$punctuation_pattern)(.+)$/;
27
690
566
        my ($expected, $aliases) = ($1, $2);
28
690
836
        my @chars = split('', $aliases);
29
690
288
        my @unexpected_chars;
30
690
357
        for my $ch (@chars) {
31
16219
17419
            next if $ch =~ /$upper_pattern|$lower_pattern|$punctuation_pattern/;
32
16213
6315
            my %ref;
33
16213
6653
            my $known_aliases = \%ref;
34
16213
10955
            if (defined $homoglyph_map{$ch}) {
35
15042
7477
                $known_aliases = $homoglyph_map{$ch};
36            }
37
16213
8485
            $known_aliases->{$expected} = 1;
38
16213
13637
            $homoglyph_map{$ch} = $known_aliases;
39        }
40    }
41
15
96
    close $fh;
42
15
4472
    my @glyphs = sort keys %homoglyph_map;
43
15
375
    our $homoglyphs = join '', @glyphs;
44
15
14
    our %homoglyph_to_glyph;
45
15
13
    for my $ch (@glyphs) {
46
16212
16212
5751
9001
        my @known_aliases = keys %{$homoglyph_map{$ch}};
47
16212
7982
        if (scalar @known_aliases == 1) {
48
16211
10043
            $homoglyph_to_glyph{$ch} = $known_aliases[0];
49        } else {
50
1
1
            $homoglyph_to_glyph{$ch} = $ch;
51        }
52    }
53}
54
55sub dump_aliases {
56
1
571
    my $a;
57
1
3
    for my $ch (keys %homoglyph_map) {
58
14
5
        $a = $ch;
59
14
6
        my $b = $homoglyph_map{$a};
60
14
49
        print "$a: ".(join " ", (sort keys %$b))."\n";
61    }
62
1
1
    our $homoglyphs;
63
1
2
    print "\n"."homoglyphs: $homoglyphs\n";
64
1
1
    0;
65}
661;