File Coverage

File:lib/CheckSpelling/DictionaryCoverage.pm
Coverage:95.9%

linestmtbrancondsubtimecode
1#! -*-perl-*-
2
3package CheckSpelling::DictionaryCoverage;
4
5our $VERSION='0.1.0';
6
1
1
1
107731
2
30
use File::Basename;
7
1
1
1
2
0
29
use Encode qw/decode_utf8 encode FB_DEFAULT/;
8
1
1
1
174
1
23
use CheckSpelling::Util;
9
10use constant {
11
1
554
  NO_MATCHES => -1,
12  NOT_UNIQUE => -2,
13  NO_WORD_SEEN_YET => '0'
14
1
1
2
0
};
15
16my $hunspell;
17
18sub entry {
19
6
185
  my ($name) = @_;
20
6
4
  my $handle;
21
6
52
  unless (open ($handle, '<:utf8', $name)) {
22
1
10
    print STDERR "Couldn't open dictionary `$name` (dictionary-not-found)\n";
23
1
3
    return 0;
24  }
25  return {
26
5
13
    name => $name,
27    handle => $handle,
28    word => NO_WORD_SEEN_YET,
29    uniq => 0,
30    covered => 0
31  }
32}
33
34sub hunspell_entry {
35
3
2
  my ($name) = @_;
36
3
22
  unless (open ($handle, '<:utf8', $name)) {
37
1
16
    print STDERR "Couldn't open dictionary `$name` (dictionary-not-found)\n";
38
1
2
    return 0;
39  }
40
2
12
  my $lines = <$handle>;
41
2
3
  chomp $lines;
42
2
6
  close $handle;
43
2
1
  my $aff = $name;
44
2
2
  my $encoding;
45
2
2
  $aff =~ s/dic$/aff/;
46
2
14
  if (open AFF, '<', $aff) {
47
1
6
    while (<AFF>) {
48
1
2
      next unless /^SET\s+(\S+)/;
49
1
3
      $encoding = $1 if ($1 !~ /utf-8/i);
50
1
1
      last;
51    }
52
1
2
    close AFF;
53  }
54
2
1
  my %map;
55  return {
56
2
491
    name => $name,
57    handle => undef,
58    encoding => $encoding,
59    engine => Text::Hunspell->new($aff, $name),
60    word => NO_WORD_SEEN_YET,
61    uniq => 0,
62    coverage => \%map,
63    lines => $lines
64  }
65}
66
67sub update_unique {
68
10
6
  my ($uniq, $file_id) = @_;
69
10
6
  if ($uniq == NO_MATCHES) {
70
9
6
    $uniq = $file_id;
71  } elsif ($uniq > NO_MATCHES) {
72
1
1
    $uniq = NOT_UNIQUE;
73  }
74
10
7
  return $uniq;
75}
76
77sub main {
78
7
8632
  my ($check, @dictionaries) = @_;
79
7
5
  my @files;
80  my $unknown_words;
81
7
58
  unless (open($unknown_words, '<:utf8', $check)) {
82
1
11
    print STDERR "Could not read $check\n";
83
1
3
    return 0;
84  }
85
86
6
3
  our $hunspell;
87
6
6
  for my $name (@dictionaries) {
88
8
12
    if ($name =~ /\.dic$/) {
89
3
8
      unless ($hunspell) {
90
1
1
1
1
159
942
10
27
        unless (eval 'use Text::Hunspell; 1') {
91
0
0
          print STDERR "Could not load Text::Hunspell for \`$name\` (hunspell-unavailable)\n";
92
0
0
          next;
93        }
94
1
0
        $hunspell = 1;
95      }
96
3
4
      push @files, hunspell_entry($name);
97    } else {
98
5
6
      push @files, entry($name);
99    }
100  }
101
102
6
6
  my @results=@files;
103
6
5
  while (@files) {
104
19
42
    last if eof($unknown_words);
105
14
12
    my $unknown = <$unknown_words>;
106
14
11
    chomp $unknown;
107
14
11
    last if ($unknown eq '');
108
14
7
    my @drop;
109
14
8
    my $uniq = NO_MATCHES;
110
14
9
    for (my $file_id = 0; $file_id < scalar @files; $file_id++) {
111
17
9
      my $current = $files[$file_id];
112      my ($word, $handle, $engine) = (
113        $current->{'word'},
114        $current->{'handle'},
115
17
18
        $current->{'engine'},
116      );
117
17
34
      while ($word ne '' && $word lt $unknown) {
118
18
12
        if ($engine) {
119          my $token_encoded = defined $hunspell_dictionary->{'encoding'} ?
120
10
11
            encode($hunspell_dictionary->{'encoding'}, $unknown) : $unknown;
121
10
26
          if ($engine->check($token_encoded)) {
122
5
41
            my $stem = $engine->stem($token_encoded);
123
5
12
            $current->{'coverage'}->{$stem} = 1;
124
5
5
            $uniq = update_unique($uniq, $file_id);
125          }
126
10
6
          last;
127        }
128
8
28
        if (eof $handle) {
129
1
2
          $word = '';
130        } else {
131
7
6
          $word = <$handle>;
132
7
17
          chomp $word;
133        }
134      }
135
17
17
      if ($word eq $unknown) {
136
5
3
        ++$current->{'covered'};
137
5
3
        $uniq = update_unique($uniq, $file_id);
138
5
5
        if (eof $handle) {
139
2
1
          $word = '';
140        } else {
141
3
3
          $word = <$handle>;
142
3
2
          chomp $word;
143        }
144      }
145
17
13
      $current->{'word'} = $word;
146
17
28
      if ($word eq '') {
147
3
5
        push @drop, $file_id;
148      }
149    }
150
14
6
    if ($uniq > NO_MATCHES) {
151
8
5
      my $current = $files[$uniq];
152
8
5
      ++$current->{'uniq'};
153    }
154
14
13
    if (@drop) {
155
3
3
      for $file_id (reverse @drop) {
156
3
5
        splice @files, $file_id, 1;
157      }
158    }
159  }
160
6
7
  my $re=CheckSpelling::Util::get_file_from_env('aliases', '');
161
6
9
  my $extra_dictionaries = CheckSpelling::Util::get_file_from_env('extra_dictionaries', '');
162
6
12
  @dictionaries=split /\n/, $extra_dictionaries;
163
6
7
  for (my $file_id = 0; $file_id < scalar @results; $file_id++) {
164
8
7
    my $current = $results[$file_id];
165    my $covered = $current->{'coverage'}
166
2
3
      ? scalar(keys %{$current->{'coverage'}})
167
8
7
      : $current->{'covered'};
168
8
17
    next unless $covered;
169
170
5
3
    my $name = $current->{'name'};
171
5
4
    my $source_link;
172
5
134
    my ($source_link_dir, $source_link_name) = (dirname($name), basename($name));
173
5
57
    if (open($source_link, '<', "$source_link_dir/.$source_link_name")) {
174
2
12
      $name = <$source_link>;
175
2
2
      chomp $name;
176
2
5
      close $source_link;
177    }
178
179
5
3
    my $uniq = $current->{'uniq'};
180
5
5
    my $handle = $current->{'handle'};
181
5
0
    my $lines;
182
5
8
    if ($handle) {
183
3
2
      my $word = $current->{'word'};
184
3
4
      $word = <$handle> while !eof($handle);
185
3
5
      $lines = $handle->input_line_number();
186    } else {
187
2
2
      $lines = $current->{'lines'};
188    }
189
190
5
27
    local $_ = $name;
191
5
200
    eval $re;
192
5
8
    my $url = $_;
193
194
5
1
    my $name_without_spaces = $name;
195
5
5
    $name_without_spaces =~ s/\s+/_/g;
196
197
5
5
    my $unique = '';
198
5
4
    if ($uniq) {
199
4
3
      $unique = " ($uniq uniquely)";
200    } else {
201
1
0
      $uniq = 0;
202    }
203
5
163
    print "$covered-$lines-$uniq-$name_without_spaces [$name]($url) ($lines) covers $covered of them$unique\n";
204  }
205}
206
2071;