| File: | lib/CheckSpelling/Homoglyph.pm |
| Coverage: | 100.0% |
| line | stmt | bran | cond | sub | time | code |
|---|---|---|---|---|---|---|
| 1 | #! -*-perl-*- | |||||
| 2 | ||||||
| 3 | package CheckSpelling::Homoglyph; | |||||
| 4 | ||||||
| 5 | our $VERSION='0.1.0'; | |||||
| 6 | our $flatten=0; | |||||
| 7 | ||||||
| 8 | 2 2 2 | 99893 2 5 | use utf8; | |||
| 9 | 2 2 2 | 143 1 420 | use CheckSpelling::Util; | |||
| 10 | ||||||
| 11 | my %homoglyph_map; | |||||
| 12 | my $homoglyphs; | |||||
| 13 | ||||||
| 14 | my %homoglyph_to_glyph; | |||||
| 15 | ||||||
| 16 | sub init { | |||||
| 17 | 16 | 233 | my ($file) = @_; | |||
| 18 | 16 1 1 1 | 204 2 1 3 | return unless open (my $fh, '<:encoding(UTF-8)', $file); | |||
| 19 | 15 | 950 | my $upper_pattern = CheckSpelling::Util::get_file_from_env_utf8('INPUT_UPPER_PATTERN', '[A-Z]'); | |||
| 20 | 15 | 37 | my $lower_pattern = CheckSpelling::Util::get_file_from_env_utf8('INPUT_LOWER_PATTERN', '[a-z]'); | |||
| 21 | 15 | 33 | my $punctuation_pattern = CheckSpelling::Util::get_file_from_env_utf8('INPUT_PUNCTUATION_PATTERN', q<'>); | |||
| 22 | 15 | 41 | local $/ = "\n"; | |||
| 23 | 15 | 130 | while (<$fh>) { | |||
| 24 | 692 | 894 | next if /^#/; | |||
| 25 | 691 | 371 | s/^\\//; | |||
| 26 | 691 | 1083 | next unless /^($upper_pattern|$lower_pattern|$punctuation_pattern)(.+)$/; | |||
| 27 | 690 | 566 | my ($expected, $aliases) = ($1, $2); | |||
| 28 | 690 | 836 | my @chars = split('', $aliases); | |||
| 29 | 690 | 288 | my @unexpected_chars; | |||
| 30 | 690 | 357 | for my $ch (@chars) { | |||
| 31 | 16219 | 17419 | next if $ch =~ /$upper_pattern|$lower_pattern|$punctuation_pattern/; | |||
| 32 | 16213 | 6315 | my %ref; | |||
| 33 | 16213 | 6653 | my $known_aliases = \%ref; | |||
| 34 | 16213 | 10955 | if (defined $homoglyph_map{$ch}) { | |||
| 35 | 15042 | 7477 | $known_aliases = $homoglyph_map{$ch}; | |||
| 36 | } | |||||
| 37 | 16213 | 8485 | $known_aliases->{$expected} = 1; | |||
| 38 | 16213 | 13637 | $homoglyph_map{$ch} = $known_aliases; | |||
| 39 | } | |||||
| 40 | } | |||||
| 41 | 15 | 96 | close $fh; | |||
| 42 | 15 | 4472 | my @glyphs = sort keys %homoglyph_map; | |||
| 43 | 15 | 375 | our $homoglyphs = join '', @glyphs; | |||
| 44 | 15 | 14 | our %homoglyph_to_glyph; | |||
| 45 | 15 | 13 | for my $ch (@glyphs) { | |||
| 46 | 16212 16212 | 5751 9001 | my @known_aliases = keys %{$homoglyph_map{$ch}}; | |||
| 47 | 16212 | 7982 | if (scalar @known_aliases == 1) { | |||
| 48 | 16211 | 10043 | $homoglyph_to_glyph{$ch} = $known_aliases[0]; | |||
| 49 | } else { | |||||
| 50 | 1 | 1 | $homoglyph_to_glyph{$ch} = $ch; | |||
| 51 | } | |||||
| 52 | } | |||||
| 53 | } | |||||
| 54 | ||||||
| 55 | sub dump_aliases { | |||||
| 56 | 1 | 571 | my $a; | |||
| 57 | 1 | 3 | for my $ch (keys %homoglyph_map) { | |||
| 58 | 14 | 5 | $a = $ch; | |||
| 59 | 14 | 6 | my $b = $homoglyph_map{$a}; | |||
| 60 | 14 | 49 | print "$a: ".(join " ", (sort keys %$b))."\n"; | |||
| 61 | } | |||||
| 62 | 1 | 1 | our $homoglyphs; | |||
| 63 | 1 | 2 | print "\n"."homoglyphs: $homoglyphs\n"; | |||
| 64 | 1 | 1 | 0; | |||
| 65 | } | |||||
| 66 | 1; | |||||