| File: | lib/CheckSpelling/Homoglyph.pm |
| Coverage: | 100.0% |
| line | stmt | bran | cond | sub | time | code |
|---|---|---|---|---|---|---|
| 1 | #! -*-perl-*- | |||||
| 2 | ||||||
| 3 | package CheckSpelling::Homoglyph; | |||||
| 4 | ||||||
| 5 | our $VERSION='0.1.0'; | |||||
| 6 | our $flatten=0; | |||||
| 7 | ||||||
| 8 | 2 2 2 | 109384 3 4 | use utf8; | |||
| 9 | 2 2 2 | 213 1 464 | use CheckSpelling::Util; | |||
| 10 | ||||||
| 11 | my %homoglyph_map; | |||||
| 12 | my $homoglyphs; | |||||
| 13 | ||||||
| 14 | my %homoglyph_to_glyph; | |||||
| 15 | ||||||
| 16 | sub init { | |||||
| 17 | 13 | 180 | my ($file) = @_; | |||
| 18 | 13 1 1 1 | 171 3 0 3 | return unless open (my $fh, '<:encoding(UTF-8)', $file); | |||
| 19 | 12 | 733 | my $upper_pattern = CheckSpelling::Util::get_file_from_env_utf8('INPUT_UPPER_PATTERN', '[A-Z]'); | |||
| 20 | 12 | 34 | my $lower_pattern = CheckSpelling::Util::get_file_from_env_utf8('INPUT_LOWER_PATTERN', '[a-z]'); | |||
| 21 | 12 | 24 | my $punctuation_pattern = CheckSpelling::Util::get_file_from_env_utf8('INPUT_PUNCTUATION_PATTERN', q<'>); | |||
| 22 | 12 | 32 | local $/ = "\n"; | |||
| 23 | 12 | 102 | while (<$fh>) { | |||
| 24 | 545 | 650 | next if /^#/; | |||
| 25 | 544 | 286 | s/^\\//; | |||
| 26 | 544 | 1787 | next unless /^($upper_pattern|$lower_pattern|$punctuation_pattern)((?:(?!$upper_pattern|$lower_pattern|$punctuation_pattern).)+)$/; | |||
| 27 | 542 | 423 | my ($expected, $aliases) = ($1, $2); | |||
| 28 | 542 | 621 | my @chars = split('', $aliases); | |||
| 29 | 542 | 222 | my @unexpected_chars; | |||
| 30 | 542 | 268 | for my $ch (@chars) { | |||
| 31 | 12742 | 5372 | my %ref; | |||
| 32 | 12742 | 4884 | my $known_aliases = \%ref; | |||
| 33 | 12742 | 8247 | if (defined $homoglyph_map{$ch}) { | |||
| 34 | 11571 | 5341 | $known_aliases = $homoglyph_map{$ch}; | |||
| 35 | } | |||||
| 36 | 12742 | 6019 | $known_aliases->{$expected} = 1; | |||
| 37 | 12742 | 9152 | $homoglyph_map{$ch} = $known_aliases; | |||
| 38 | } | |||||
| 39 | } | |||||
| 40 | 12 | 60 | close $fh; | |||
| 41 | 12 | 3393 | my @glyphs = sort keys %homoglyph_map; | |||
| 42 | 12 | 356 | our $homoglyphs = join '', @glyphs; | |||
| 43 | 12 | 9 | our %homoglyph_to_glyph; | |||
| 44 | 12 | 10 | for my $ch (@glyphs) { | |||
| 45 | 12741 12741 | 5044 7393 | my @known_aliases = keys %{$homoglyph_map{$ch}}; | |||
| 46 | 12741 | 6880 | if (scalar @known_aliases == 1) { | |||
| 47 | 12740 | 7741 | $homoglyph_to_glyph{$ch} = $known_aliases[0]; | |||
| 48 | } else { | |||||
| 49 | 1 | 1 | $homoglyph_to_glyph{$ch} = $ch; | |||
| 50 | } | |||||
| 51 | } | |||||
| 52 | } | |||||
| 53 | ||||||
| 54 | sub dump_aliases { | |||||
| 55 | 1 | 595 | my $a; | |||
| 56 | 1 | 2 | for my $ch (keys %homoglyph_map) { | |||
| 57 | 14 | 6 | $a = $ch; | |||
| 58 | 14 | 6 | my $b = $homoglyph_map{$a}; | |||
| 59 | 14 | 62 | print "$a: ".(join " ", (sort keys %$b))."\n"; | |||
| 60 | } | |||||
| 61 | 1 | 1 | our $homoglyphs; | |||
| 62 | 1 | 3 | print "\n"."homoglyphs: $homoglyphs\n"; | |||
| 63 | 1 | 4 | 0; | |||
| 64 | } | |||||
| 65 | 1; | |||||