File Coverage

File:lib/CheckSpelling/Homoglyph.pm
Coverage:100.0%

linestmtbrancondsubtimecode
1#! -*-perl-*-
2
3package CheckSpelling::Homoglyph;
4
5our $VERSION='0.1.0';
6our $flatten=0;
7
8
2
2
2
116862
2
6
use utf8;
9
2
2
2
188
2
463
use CheckSpelling::Util;
10
11my %homoglyph_map;
12my $homoglyphs;
13
14my %homoglyph_to_glyph;
15
16sub init {
17
13
223
    my ($file) = @_;
18
13
1
1
1
219
3
0
4
    return unless open (my $fh, '<:encoding(UTF-8)', $file);
19
12
1015
    my $upper_pattern = CheckSpelling::Util::get_file_from_env_utf8('INPUT_UPPER_PATTERN', '[A-Z]');
20
12
38
    my $lower_pattern = CheckSpelling::Util::get_file_from_env_utf8('INPUT_LOWER_PATTERN', '[a-z]');
21
12
25
    my $punctuation_pattern = CheckSpelling::Util::get_file_from_env_utf8('INPUT_PUNCTUATION_PATTERN', q<'>);
22
12
46
    local $/ = "\n";
23
12
121
    while (<$fh>) {
24
545
704
        next if /^#/;
25
544
306
        s/^\\//;
26
544
1949
        next unless /^($upper_pattern|$lower_pattern|$punctuation_pattern)((?:(?!$upper_pattern|$lower_pattern|$punctuation_pattern).)+)$/;
27
542
420
        my ($expected, $aliases) = ($1, $2);
28
542
641
        my @chars = split('', $aliases);
29
542
222
        my @unexpected_chars;
30
542
262
        for my $ch (@chars) {
31
12742
5493
            my %ref;
32
12742
4883
            my $known_aliases = \%ref;
33
12742
8785
            if (defined $homoglyph_map{$ch}) {
34
11571
5426
                $known_aliases = $homoglyph_map{$ch};
35            }
36
12742
7103
            $known_aliases->{$expected} = 1;
37
12742
9497
            $homoglyph_map{$ch} = $known_aliases;
38        }
39    }
40
12
75
    close $fh;
41
12
3970
    my @glyphs = sort keys %homoglyph_map;
42
12
333
    our $homoglyphs = join '', @glyphs;
43
12
8
    our %homoglyph_to_glyph;
44
12
13
    for my $ch (@glyphs) {
45
12741
12741
4863
8011
        my @known_aliases = keys %{$homoglyph_map{$ch}};
46
12741
6627
        if (scalar @known_aliases == 1) {
47
12740
8513
            $homoglyph_to_glyph{$ch} = $known_aliases[0];
48        } else {
49
1
2
            $homoglyph_to_glyph{$ch} = $ch;
50        }
51    }
52}
53
54sub dump_aliases {
55
1
794
    my $a;
56
1
3
    for my $ch (keys %homoglyph_map) {
57
14
7
        $a = $ch;
58
14
6
        my $b = $homoglyph_map{$a};
59
14
63
        print "$a: ".(join " ", (sort keys %$b))."\n";
60    }
61
1
1
    our $homoglyphs;
62
1
5
    print "\n"."homoglyphs: $homoglyphs\n";
63
1
1
    0;
64}
651;