| File: | lib/CheckSpelling/Homoglyph.pm |
| Coverage: | 100.0% |
| line | stmt | bran | cond | sub | time | code |
|---|---|---|---|---|---|---|
| 1 | #! -*-perl-*- | |||||
| 2 | ||||||
| 3 | package CheckSpelling::Homoglyph; | |||||
| 4 | ||||||
| 5 | our $VERSION='0.1.0'; | |||||
| 6 | our $flatten=0; | |||||
| 7 | ||||||
| 8 | 2 2 2 | 118034 3 5 | use utf8; | |||
| 9 | 2 2 2 | 179 2 547 | use CheckSpelling::Util; | |||
| 10 | ||||||
| 11 | my %homoglyph_map; | |||||
| 12 | my $homoglyphs; | |||||
| 13 | ||||||
| 14 | my %homoglyph_to_glyph; | |||||
| 15 | ||||||
| 16 | sub init { | |||||
| 17 | 13 | 180 | my ($file) = @_; | |||
| 18 | 13 1 1 1 | 181 3 0 3 | return unless open (my $fh, '<:encoding(UTF-8)', $file); | |||
| 19 | 12 | 859 | my $upper_pattern = CheckSpelling::Util::get_file_from_env_utf8('INPUT_UPPER_PATTERN', '[A-Z]'); | |||
| 20 | 12 | 33 | my $lower_pattern = CheckSpelling::Util::get_file_from_env_utf8('INPUT_LOWER_PATTERN', '[a-z]'); | |||
| 21 | 12 | 22 | my $punctuation_pattern = CheckSpelling::Util::get_file_from_env_utf8('INPUT_PUNCTUATION_PATTERN', q<'>); | |||
| 22 | 12 | 37 | local $/ = "\n"; | |||
| 23 | 12 | 102 | while (<$fh>) { | |||
| 24 | 545 | 657 | next if /^#/; | |||
| 25 | 544 | 286 | s/^\\//; | |||
| 26 | 544 | 816 | next unless /^($upper_pattern|$lower_pattern|$punctuation_pattern)(.+)$/; | |||
| 27 | 543 | 450 | my ($expected, $aliases) = ($1, $2); | |||
| 28 | 543 | 630 | my @chars = split('', $aliases); | |||
| 29 | 543 | 213 | my @unexpected_chars; | |||
| 30 | 543 | 294 | for my $ch (@chars) { | |||
| 31 | 12748 | 13548 | next if $ch =~ /$upper_pattern|$lower_pattern|$punctuation_pattern/; | |||
| 32 | 12742 | 5115 | my %ref; | |||
| 33 | 12742 | 4985 | my $known_aliases = \%ref; | |||
| 34 | 12742 | 8236 | if (defined $homoglyph_map{$ch}) { | |||
| 35 | 11571 | 5669 | $known_aliases = $homoglyph_map{$ch}; | |||
| 36 | } | |||||
| 37 | 12742 | 6334 | $known_aliases->{$expected} = 1; | |||
| 38 | 12742 | 10472 | $homoglyph_map{$ch} = $known_aliases; | |||
| 39 | } | |||||
| 40 | } | |||||
| 41 | 12 | 78 | close $fh; | |||
| 42 | 12 | 3556 | my @glyphs = sort keys %homoglyph_map; | |||
| 43 | 12 | 405 | our $homoglyphs = join '', @glyphs; | |||
| 44 | 12 | 10 | our %homoglyph_to_glyph; | |||
| 45 | 12 | 11 | for my $ch (@glyphs) { | |||
| 46 | 12741 12741 | 4767 7688 | my @known_aliases = keys %{$homoglyph_map{$ch}}; | |||
| 47 | 12741 | 6609 | if (scalar @known_aliases == 1) { | |||
| 48 | 12740 | 8760 | $homoglyph_to_glyph{$ch} = $known_aliases[0]; | |||
| 49 | } else { | |||||
| 50 | 1 | 1 | $homoglyph_to_glyph{$ch} = $ch; | |||
| 51 | } | |||||
| 52 | } | |||||
| 53 | } | |||||
| 54 | ||||||
| 55 | sub dump_aliases { | |||||
| 56 | 1 | 649 | my $a; | |||
| 57 | 1 | 2 | for my $ch (keys %homoglyph_map) { | |||
| 58 | 14 | 8 | $a = $ch; | |||
| 59 | 14 | 4 | my $b = $homoglyph_map{$a}; | |||
| 60 | 14 | 61 | print "$a: ".(join " ", (sort keys %$b))."\n"; | |||
| 61 | } | |||||
| 62 | 1 | 1 | our $homoglyphs; | |||
| 63 | 1 | 3 | print "\n"."homoglyphs: $homoglyphs\n"; | |||
| 64 | 1 | 1 | 0; | |||
| 65 | } | |||||
| 66 | 1; | |||||