| File: | lib/CheckSpelling/Homoglyph.pm |
| Coverage: | 100.0% |
| line | stmt | bran | cond | sub | time | code |
|---|---|---|---|---|---|---|
| 1 | #! -*-perl-*- | |||||
| 2 | ||||||
| 3 | package CheckSpelling::Homoglyph; | |||||
| 4 | ||||||
| 5 | our $VERSION='0.1.0'; | |||||
| 6 | our $flatten=0; | |||||
| 7 | ||||||
| 8 | 2 2 2 | 116258 1 6 | use utf8; | |||
| 9 | 2 2 2 | 195 2 435 | use CheckSpelling::Util; | |||
| 10 | ||||||
| 11 | my %homoglyph_map; | |||||
| 12 | my $homoglyphs; | |||||
| 13 | ||||||
| 14 | my %homoglyph_to_glyph; | |||||
| 15 | ||||||
| 16 | sub init { | |||||
| 17 | 13 | 225 | my ($file) = @_; | |||
| 18 | 13 1 1 1 | 177 2 0 3 | return unless open (my $fh, '<:encoding(UTF-8)', $file); | |||
| 19 | 12 | 820 | my $upper_pattern = CheckSpelling::Util::get_file_from_env_utf8('INPUT_UPPER_PATTERN', '[A-Z]'); | |||
| 20 | 12 | 33 | my $lower_pattern = CheckSpelling::Util::get_file_from_env_utf8('INPUT_LOWER_PATTERN', '[a-z]'); | |||
| 21 | 12 | 22 | my $punctuation_pattern = CheckSpelling::Util::get_file_from_env_utf8('INPUT_PUNCTUATION_PATTERN', q<'>); | |||
| 22 | 12 | 35 | local $/ = "\n"; | |||
| 23 | 12 | 119 | while (<$fh>) { | |||
| 24 | 545 | 727 | next if /^#/; | |||
| 25 | 544 | 259 | s/^\\//; | |||
| 26 | 544 | 1780 | next unless /^($upper_pattern|$lower_pattern|$punctuation_pattern)((?:(?!$upper_pattern|$lower_pattern|$punctuation_pattern).)+)$/; | |||
| 27 | 542 | 431 | my ($expected, $aliases) = ($1, $2); | |||
| 28 | 542 | 596 | my @chars = split('', $aliases); | |||
| 29 | 542 | 215 | my @unexpected_chars; | |||
| 30 | 542 | 270 | for my $ch (@chars) { | |||
| 31 | 12742 | 5391 | my %ref; | |||
| 32 | 12742 | 4913 | my $known_aliases = \%ref; | |||
| 33 | 12742 | 8033 | if (defined $homoglyph_map{$ch}) { | |||
| 34 | 11571 | 5613 | $known_aliases = $homoglyph_map{$ch}; | |||
| 35 | } | |||||
| 36 | 12742 | 5998 | $known_aliases->{$expected} = 1; | |||
| 37 | 12742 | 9422 | $homoglyph_map{$ch} = $known_aliases; | |||
| 38 | } | |||||
| 39 | } | |||||
| 40 | 12 | 64 | close $fh; | |||
| 41 | 12 | 3462 | my @glyphs = sort keys %homoglyph_map; | |||
| 42 | 12 | 333 | our $homoglyphs = join '', @glyphs; | |||
| 43 | 12 | 5 | our %homoglyph_to_glyph; | |||
| 44 | 12 | 12 | for my $ch (@glyphs) { | |||
| 45 | 12741 12741 | 4711 7783 | my @known_aliases = keys %{$homoglyph_map{$ch}}; | |||
| 46 | 12741 | 6389 | if (scalar @known_aliases == 1) { | |||
| 47 | 12740 | 8427 | $homoglyph_to_glyph{$ch} = $known_aliases[0]; | |||
| 48 | } else { | |||||
| 49 | 1 | 1 | $homoglyph_to_glyph{$ch} = $ch; | |||
| 50 | } | |||||
| 51 | } | |||||
| 52 | } | |||||
| 53 | ||||||
| 54 | sub dump_aliases { | |||||
| 55 | 1 | 696 | my $a; | |||
| 56 | 1 | 2 | for my $ch (keys %homoglyph_map) { | |||
| 57 | 14 | 7 | $a = $ch; | |||
| 58 | 14 | 5 | my $b = $homoglyph_map{$a}; | |||
| 59 | 14 | 61 | print "$a: ".(join " ", (sort keys %$b))."\n"; | |||
| 60 | } | |||||
| 61 | 1 | 1 | our $homoglyphs; | |||
| 62 | 1 | 3 | print "\n"."homoglyphs: $homoglyphs\n"; | |||
| 63 | 1 | 2 | 0; | |||
| 64 | } | |||||
| 65 | 1; | |||||