File Coverage

File:lib/CheckSpelling/DictionaryCoverage.pm
Coverage:95.9%

linestmtbrancondsubtimecode
1#! -*-perl-*-
2
3package CheckSpelling::DictionaryCoverage;
4
5our $VERSION='0.1.0';
6
1
1
1
112933
3
45
use File::Basename;
7
1
1
1
2
0
37
use Encode qw/decode_utf8 encode FB_DEFAULT/;
8
1
1
1
183
1
24
use CheckSpelling::Util;
9
10use constant {
11
1
577
  NO_MATCHES => -1,
12  NOT_UNIQUE => -2,
13  NO_WORD_SEEN_YET => '0'
14
1
1
2
1
};
15
16my $hunspell;
17
18sub entry {
19
6
228
  my ($name) = @_;
20
6
2
  my $handle;
21
6
63
  unless (open ($handle, '<:utf8', $name)) {
22
1
13
    print STDERR "Couldn't open dictionary `$name` (dictionary-not-found)\n";
23
1
3
    return 0;
24  }
25  return {
26
5
13
    name => $name,
27    handle => $handle,
28    word => NO_WORD_SEEN_YET,
29    uniq => 0,
30    covered => 0
31  }
32}
33
34sub hunspell_entry {
35
3
3
  my ($name) = @_;
36
3
23
  unless (open ($handle, '<:utf8', $name)) {
37
1
16
    print STDERR "Couldn't open dictionary `$name` (dictionary-not-found)\n";
38
1
2
    return 0;
39  }
40
2
12
  my $lines = <$handle>;
41
2
3
  chomp $lines;
42
2
7
  close $handle;
43
2
1
  my $aff = $name;
44
2
1
  my $encoding;
45
2
4
  $aff =~ s/dic$/aff/;
46
2
14
  if (open AFF, '<', $aff) {
47
1
8
    while (<AFF>) {
48
1
2
      next unless /^SET\s+(\S+)/;
49
1
3
      $encoding = $1 if ($1 !~ /utf-8/i);
50
1
1
      last;
51    }
52
1
2
    close AFF;
53  }
54
2
2
  my %map;
55  return {
56
2
487
    name => $name,
57    handle => undef,
58    encoding => $encoding,
59    engine => Text::Hunspell->new($aff, $name),
60    word => NO_WORD_SEEN_YET,
61    uniq => 0,
62    coverage => \%map,
63    lines => $lines
64  }
65}
66
67sub update_unique {
68
10
8
  my ($uniq, $file_id) = @_;
69
10
5
  if ($uniq == NO_MATCHES) {
70
9
6
    $uniq = $file_id;
71  } elsif ($uniq > NO_MATCHES) {
72
1
1
    $uniq = NOT_UNIQUE;
73  }
74
10
4
  return $uniq;
75}
76
77sub main {
78
7
9822
  my ($check, @dictionaries) = @_;
79
7
8
  my @files;
80  my $unknown_words;
81
7
62
  unless (open($unknown_words, '<:utf8', $check)) {
82
1
10
    print STDERR "Could not read $check\n";
83
1
3
    return 0;
84  }
85
86
6
7
  our $hunspell;
87
6
5
  for my $name (@dictionaries) {
88
8
11
    if ($name =~ /\.dic$/) {
89
3
9
      unless ($hunspell) {
90
1
1
1
1
208
1031
11
30
        unless (eval 'use Text::Hunspell; 1') {
91
0
0
          print STDERR "Could not load Text::Hunspell for \`$name\` (hunspell-unavailable)\n";
92
0
0
          next;
93        }
94
1
1
        $hunspell = 1;
95      }
96
3
3
      push @files, hunspell_entry($name);
97    } else {
98
5
5
      push @files, entry($name);
99    }
100  }
101
102
6
6
  my @results=@files;
103
6
6
  while (@files) {
104
19
49
    last if eof($unknown_words);
105
14
15
    my $unknown = <$unknown_words>;
106
14
10
    chomp $unknown;
107
14
30
    last if ($unknown eq '');
108
14
8
    my @drop;
109
14
6
    my $uniq = NO_MATCHES;
110
14
13
    for (my $file_id = 0; $file_id < scalar @files; $file_id++) {
111
17
7
      my $current = $files[$file_id];
112      my ($word, $handle, $engine) = (
113        $current->{'word'},
114        $current->{'handle'},
115
17
19
        $current->{'engine'},
116      );
117
17
38
      while ($word ne '' && $word lt $unknown) {
118
18
19
        if ($engine) {
119          my $token_encoded = defined $hunspell_dictionary->{'encoding'} ?
120
10
9
            encode($hunspell_dictionary->{'encoding'}, $unknown) : $unknown;
121
10
31
          if ($engine->check($token_encoded)) {
122
5
35
            my $stem = $engine->stem($token_encoded);
123
5
7
            $current->{'coverage'}->{$stem} = 1;
124
5
4
            $uniq = update_unique($uniq, $file_id);
125          }
126
10
13
          last;
127        }
128
8
23
        if (eof $handle) {
129
1
2
          $word = '';
130        } else {
131
7
9
          $word = <$handle>;
132
7
20
          chomp $word;
133        }
134      }
135
17
17
      if ($word eq $unknown) {
136
5
3
        ++$current->{'covered'};
137
5
5
        $uniq = update_unique($uniq, $file_id);
138
5
7
        if (eof $handle) {
139
2
2
          $word = '';
140        } else {
141
3
2
          $word = <$handle>;
142
3
3
          chomp $word;
143        }
144      }
145
17
11
      $current->{'word'} = $word;
146
17
26
      if ($word eq '') {
147
3
3
        push @drop, $file_id;
148      }
149    }
150
14
10
    if ($uniq > NO_MATCHES) {
151
8
6
      my $current = $files[$uniq];
152
8
3
      ++$current->{'uniq'};
153    }
154
14
18
    if (@drop) {
155
3
2
      for $file_id (reverse @drop) {
156
3
6
        splice @files, $file_id, 1;
157      }
158    }
159  }
160
6
7
  my $re=CheckSpelling::Util::get_file_from_env('aliases', '');
161
6
4
  my $extra_dictionaries = CheckSpelling::Util::get_file_from_env('extra_dictionaries', '');
162
6
11
  @dictionaries=split /\n/, $extra_dictionaries;
163
6
10
  for (my $file_id = 0; $file_id < scalar @results; $file_id++) {
164
8
4
    my $current = $results[$file_id];
165    my $covered = $current->{'coverage'}
166
2
3
      ? scalar(keys %{$current->{'coverage'}})
167
8
7
      : $current->{'covered'};
168
8
19
    next unless $covered;
169
170
5
5
    my $name = $current->{'name'};
171
5
2
    my $source_link;
172
5
159
    my ($source_link_dir, $source_link_name) = (dirname($name), basename($name));
173
5
60
    if (open($source_link, '<', "$source_link_dir/.$source_link_name")) {
174
2
13
      $name = <$source_link>;
175
2
3
      chomp $name;
176
2
6
      close $source_link;
177    }
178
179
5
4
    my $uniq = $current->{'uniq'};
180
5
3
    my $handle = $current->{'handle'};
181
5
2
    my $lines;
182
5
3
    if ($handle) {
183
3
3
      my $word = $current->{'word'};
184
3
5
      $word = <$handle> while !eof($handle);
185
3
7
      $lines = $handle->input_line_number();
186    } else {
187
2
2
      $lines = $current->{'lines'};
188    }
189
190
5
34
    local $_ = $name;
191
5
208
    eval $re;
192
5
7
    my $url = $_;
193
194
5
4
    my $name_without_spaces = $name;
195
5
4
    $name_without_spaces =~ s/\s+/_/g;
196
197
5
5
    my $unique = '';
198
5
4
    if ($uniq) {
199
4
4
      $unique = " ($uniq uniquely)";
200    } else {
201
1
1
      $uniq = 0;
202    }
203
5
166
    print "$covered-$lines-$uniq-$name_without_spaces [$name]($url) ($lines) covers $covered of them$unique\n";
204  }
205}
206
2071;