File Coverage

File:lib/CheckSpelling/Sarif.pm
Coverage:86.8%

linestmtbrancondsubtimecode
1#! -*-perl-*-
2
3package CheckSpelling::Sarif;
4
5our $VERSION='0.1.0';
6our $flatten=0;
7
8
1
1
1
109874
1
2
use utf8;
9
10
1
1
1
15
1
22
use File::Basename;
11
1
1
1
1
1
15
use File::Spec;
12
1
1
1
468
1257
61
use Digest::SHA qw($errmsg);
13
1
1
1
3
0
28
use JSON::PP;
14
1
1
1
177
3758
25
use Hash::Merge qw( merge );
15
1
1
1
168
4
15
use CheckSpelling::Util;
16
1
1
1
161
1
1883
use CheckSpelling::GitSources;
17
18sub encode_low_ascii {
19
13
186
    $_ = shift;
20
13
1
16
3
    s/([\x{0}-\x{9}\x{0b}\x{1f}#%])/"\\u".sprintf("%04x",ord($1))/eg;
21
13
12
    return $_;
22}
23
24sub url_encode {
25
11
13
    $_ = shift;
26
11
0
31
0
    s<([^-!\$&'()*+,/:;=?\@A-Za-z0-9_.~])><"%".sprintf("%02x",ord($1))>eg;
27
11
14
    return $_;
28}
29
30sub double_slash_escape {
31
12
9
    $_ = shift;
32
12
58
    s/(["()\[\]\\])/\\\\$1/g;
33
12
12
    return $_;
34}
35
36sub fingerprintLocations {
37
7
6
    my ($locations, $encoded_files_ref, $line_hashes_ref, $hashes_needed_for_files_ref, $message, $hashed_message) = @_;
38
7
6
    my @locations_json = ();
39
7
2
    my @fingerprints = ();
40
7
9
    for my $location (@$locations) {
41
10
6
        my $encoded_file = $location->{uri};
42
10
5
        my $line = $location->{startLine};
43
10
5
        my $column = $location->{startColumn};
44
10
4
        my $endColumn = $location->{endColumn};
45
10
7
        my $partialFingerprint = '';
46
10
7
        my $file = $encoded_files_ref->{$encoded_file};
47
10
5
        if (defined $line_hashes_ref->{$file}) {
48
8
8
            my $line_hash = $line_hashes_ref->{$file}{$line};
49
8
6
            if (defined $line_hash) {
50
4
4
3
7
                my @instances = sort keys %{$hashes_needed_for_files_ref->{$file}{$line}{$hashed_message}};
51
4
2
                my $hit = scalar @instances;
52
4
4
                while (--$hit > 0) {
53
0
0
                    last if $instances[$hit] == $column;
54                }
55
4
8
                $partialFingerprint = Digest::SHA::sha1_base64("$line_hash:$message:$hit");
56            }
57        }
58
10
8
        push @fingerprints, $partialFingerprint;
59
10
9
        my $startColumn = $column ? qq<, "startColumn": $column> : '';
60
10
6
        $endColumn = $endColumn ? qq<, "endColumn": $endColumn> : '';
61
10
7
        $line = 1 unless $line;
62
10
7
        my $json_fragment = qq<{ "physicalLocation": { "artifactLocation": { "uri": "$encoded_file", "uriBaseId": "%SRCROOT%" }, "region": { "startLine": $line$startColumn$endColumn } } }>;
63
10
6
        push @locations_json, $json_fragment;
64    }
65
7
12
    return { locations_json => \@locations_json, fingerprints => \@fingerprints };
66}
67
68sub hashFiles {
69
2
3
    my ($hashes_needed_for_files_ref, $line_hashes_ref, $directoryToRepo_ref, $used_hashes_ref) = @_;
70
2
6
    for my $file (sort keys %$hashes_needed_for_files_ref) {
71
4
4
        $line_hashes_ref->{$file} = ();
72
4
22
        unless (-e $file) {
73
2
2
            delete $hashes_needed_for_files_ref->{$file};
74
2
1
            next;
75        }
76
2
2
1
4
        my @lines = sort (keys %{$hashes_needed_for_files_ref->{$file}});
77
2
49
        unless (defined $directoryToRepo_ref->{dirname($file)}) {
78
2
8
            my ($parsed_file, $git_base_dir, $prefix, $remote_url, $rev, $branch) = CheckSpelling::GitSources::git_source_and_rev($file);
79        }
80
2
32
        open $file_fh, '<', $file;
81
2
2
        my $line = shift @lines;
82
2
4
        $line = 2 if $line == 1;
83
2
4
        my $buffer = '';
84
2
13
        while (<$file_fh>) {
85
10
14
            if ($line == $.) {
86
5
15
                my $sample = substr $buffer, -100, 100;
87
5
8
                my $hash = Digest::SHA::sha1_base64($sample);
88
5
5
                for (; $line == $.; $line = shift @lines) {
89
6
8
                    my $hit = $used_hashes_ref->{$hash}++;
90
6
4
                    $hash = "$hash:$hit" if $hit;
91
6
6
                    $line_hashes_ref->{$file}{$line} = $hash;
92
6
10
                    last unless @lines;
93                }
94            }
95
10
38
            $buffer .= $_;
96
10
25
            $buffer =~ s/\s+/ /g;
97
10
15
            $buffer = substr $buffer, -100, 100;
98        }
99
2
9
        close $file_fh;
100    }
101}
102
103sub addToHashesNeededForFiles {
104
10
7
    my ($file, $line, $column, $message, $hashes_needed_for_files_ref) = @_;
105
10
29
    my $hashed_message = Digest::SHA::sha1_base64($message);
106
10
8
    $hashes_needed_for_files_ref->{$file} = () unless defined $hashes_needed_for_files_ref->{$file};
107
10
24
    $hashes_needed_for_files_ref->{$file}{$line} = () unless defined $hashes_needed_for_files_ref->{$file}{$line};
108
10
19
    $hashes_needed_for_files_ref->{$file}{$line}{$hashed_message} = () unless defined $hashes_needed_for_files_ref->{$file}{$line}{$hashed_message};
109
10
20
    $hashes_needed_for_files_ref->{$file}{$line}{$hashed_message}{$column} = '1';
110}
111
112sub encode_message {
113
12
9
    my ($message) = @_;
114
115    # single-slash-escape `"` and `\`
116
12
27
    $message =~ s/(["\\])/\\$1/g;
117    # encode `message` to protect against low ascii`
118
12
8
    $message = encode_low_ascii $message;
119    # double-slash-escape `"`, `(`, `)`, `]`
120
12
12
    $message = double_slash_escape $message;
121    # hack to make the first `...` identifier a link (that goes nowhere, but is probably blue and underlined) in GitHub's SARIF view
122
12
18
    if ($message =~ /(`{2,})/) {
123
2
2
        my $backticks = $1;
124
2
22
        while ($message =~ /($backticks`+)(?=[`].*?\g{-1})/gs) {
125
0
0
            $backticks = $1 if length($1) > length($backticks);
126        }
127
2
20
        $message =~ s/(^|[^\\])$backticks(.+?)$backticks/${1}[${2}](#security-tab)/;
128    } else {
129
10
37
        $message =~ s/(^|[^\\])\`((?:[^`\\]|\\(?!`))+)\`/${1}[${2}](#security-tab)/;
130    }
131
132    # replace '`' with `\`+`'` because GitHub's SARIF parser doesn't like them
133
12
17
    $message =~ s/\`/'/g;
134    # escape '*'/`_` for markdown
135
12
8
    my $slash = '\\';
136
12
12
    $message =~ s/([*])(.*?)([*])/$slash$1$2$slash$3/g;
137
12
8
    $message =~ s/([_])(.*?)([_])/$slash$1$2$slash$3/g;
138
12
13
    return $message;
139}
140
141sub parse_warnings {
142
1
1
    my ($warnings) = @_;
143
1
0
    our $flatten;
144
1
1
    our %directoryToRepo;
145
1
0
    our $provenanceInsertion;
146
1
0
    our %provenanceStringToIndex;
147
1
1
    our %directoryToProvenanceInsertion;
148
1
0
    my @results;
149
1
12
    unless (open WARNINGS, '<', $warnings) {
150
0
0
        print STDERR "Could not open $warnings\n";
151
0
0
        return [];
152    }
153
1
2
    my $rules = ();
154
1
1
    my %encoded_files = ();
155
1
0
    my %hashes_needed_for_files = ();
156
1
11
    while (<WARNINGS>) {
157
11
10
        next if m{^https://};
158
10
39
        next unless m{^(.+):(\d+):(\d+) \.\.\. (\d+),\s(Error|Warning|Notice)\s-\s(.+\s\((.+)\))$};
159
9
27
        my ($file, $line, $column, $endColumn, $severity, $message, $code) = ($1, $2, $3, $4, $5, $6, $7);
160
9
94
        my $directory = dirname($file);
161
9
8
        unless (defined $directoryToProvenanceInsertion{$directory}) {
162
2
3
            my $provenanceString = collectVersionControlProvenance($file);
163
2
165
            if ($provenanceString) {
164
2
4
                if (defined $provenanceStringToIndex{$provenanceString}) {
165
0
0
                    $directoryToProvenanceInsertion{$directory} = $provenanceStringToIndex{$provenanceString};
166                } else {
167
2
5
                    $provenanceStringToIndex{$provenanceString} = $provenanceInsertion;
168
2
4
                    $directoryToProvenanceInsertion{$directory} = $provenanceInsertion;
169
2
3
                    ++$provenanceInsertion;
170                }
171            }
172        }
173        # encode `file` to protect against low ascii`
174
9
8
        my $encoded_file = url_encode $file;
175
9
11
        $encoded_files{$encoded_file} = $file;
176
177
9
7
        $message = encode_message($message);
178
179
9
7
        unless (defined $rules->{$code}) {
180
3
5
            $rules->{$code} = {};
181        }
182
9
5
        my $rule = $rules->{$code};
183
9
6
        unless (defined $rule->{$message}) {
184
6
16
            $rule->{$message} = [];
185        }
186
9
8
        addToHashesNeededForFiles($file, $line, $column, $message, \%hashes_needed_for_files);
187
9
8
        my $locations = $rule->{$message};
188
9
17
        my $physicalLocation = {
189            'uri' => $encoded_file,
190            'startLine' => $line,
191            'startColumn' => $column,
192            'endColumn' => $endColumn,
193        };
194
9
4
        push @$locations, $physicalLocation;
195
9
30
        $rule->{$message} = $locations;
196    }
197
1
1
    my %line_hashes = ();
198
1
1
    my %used_hashes = ();
199
1
4
    hashFiles(\%hashes_needed_for_files, \%line_hashes, \%directoryToRepo, \%used_hashes);
200
1
1
1
4
    for my $code (sort keys %{$rules}) {
201
3
3
        my $rule = $rules->{$code};
202
3
3
1
4
        for my $message (sort keys %{$rule}) {
203
6
12
            my $hashed_message = Digest::SHA::sha1_base64($message);
204
6
5
            my $locations = $rule->{$message};
205
6
4
            my $fingerprintResults = fingerprintLocations($locations, \%encoded_files, \%line_hashes, \%hashes_needed_for_files, $message, $hashed_message);
206
6
6
5
4
            my @locations_json = @{$fingerprintResults->{locations_json}};
207
6
6
2
4
            my @fingerprints = @{$fingerprintResults->{fingerprints}};
208
6
4
            if ($flatten) {
209
0
0
                my $locations_json_flat = join ',', @locations_json;
210
0
0
                my $partialFingerprints;
211
0
0
                my $partialFingerprint = (sort @fingerprints)[0];
212
0
0
                if ($partialFingerprint ne '') {
213
0
0
                    $partialFingerprints = qq<"partialFingerprints": { "cs0" : "$partialFingerprint" },>;
214                }
215
0
0
                $message =~ s/\s\\\\\([^()]+?\\\)$//g;
216
0
0
                my $result_json = qq<{"ruleId": "$code", $partialFingerprints "message": { "text": "$message" }, "locations": [ $locations_json_flat ] }>;
217
0
0
                my $result = decode_json $result_json;
218
0
0
                push @results, $result;
219            } else {
220
6
1
                my $limit = scalar @locations_json;
221
6
6
                for (my $i = 0; $i < $limit; ++$i) {
222
9
7
                    my $locations_json_flat = $locations_json[$i];
223
9
4
                    my $partialFingerprints = '';
224
9
4
                    my $partialFingerprint = $fingerprints[$i];
225
9
7
                    if ($partialFingerprint ne '') {
226
4
3
                        $partialFingerprints = qq<"partialFingerprints": { "cs0" : "$partialFingerprint" },>;
227                    }
228
9
20
                    $message =~ s/\s\\\\\([^()]+?\\\)$//g;
229
9
7
                    my $result_json = qq<{"ruleId": "$code", $partialFingerprints "message": { "text": "$message" }, "locations": [ $locations_json_flat ] }>;
230
9
10
                    my $result = decode_json $result_json;
231
9
9815
                    push @results, $result;
232                }
233            }
234        }
235    }
236
1
7
    close WARNINGS;
237
1
11
    return \@results;
238}
239
240sub get_runs_from_sarif {
241
2
16
    my ($sarif_json) = @_;
242
2
0
    my %runs_view;
243
2
3
    return %runs_view unless $sarif_json->{'runs'};
244
2
2
1
2
    my @sarif_json_runs=@{$sarif_json->{'runs'}};
245
2
3
    foreach my $sarif_json_run (@sarif_json_runs) {
246
2
2
0
4
        my %sarif_json_run_hash=%{$sarif_json_run};
247
2
2
        next unless defined $sarif_json_run_hash{'tool'};
248
249
2
2
2
1
        my %sarif_json_run_tool_hash = %{$sarif_json_run_hash{'tool'}};
250
2
2
        next unless defined $sarif_json_run_tool_hash{'driver'};
251
252
2
2
2
4
        my %sarif_json_run_tool_driver_hash = %{$sarif_json_run_tool_hash{'driver'}};
253        next unless defined $sarif_json_run_tool_driver_hash{'name'} &&
254
2
7
            defined $sarif_json_run_tool_driver_hash{'rules'};
255
256
2
2
        my $driver_name = $sarif_json_run_tool_driver_hash{'name'};
257
2
2
0
6
        my @sarif_json_run_tool_driver_rules = @{$sarif_json_run_tool_driver_hash{'rules'}};
258
2
0
        my %driver_view;
259
2
2
        for my $driver_rule (@sarif_json_run_tool_driver_rules) {
260
45
31
            next unless defined $driver_rule->{'id'};
261
45
32
            $driver_view{$driver_rule->{'id'}} = $driver_rule;
262        }
263
2
4
        $runs_view{$sarif_json_run_tool_driver_hash{'name'}} = \%driver_view;
264    }
265
2
2
    return %runs_view;
266}
267
268sub collectVersionControlProvenance {
269
2
1
    my ($file) = @_;
270
2
6
    my ($parsed_file, $git_base_dir, $prefix, $remote_url, $rev, $branch) = CheckSpelling::GitSources::git_source_and_rev($file);
271
2
10
    return '' unless $remote_url;
272
2
2
    my $base = substr $parsed_file, 0, length($file);
273
2
3
    my $provenance = [$remote_url, $rev, $branch, $git_base_dir];
274
2
8
    return JSON::PP::encode_json($provenance);
275}
276
277sub generateVersionControlProvenance {
278
1
1
    my ($versionControlProvenanceList, $run) = @_;
279
1
0
    my %provenance;
280    sub buildVersionControlProvenance {
281
1
1
        my $d = $_;
282
1
1
1
1
        my ($remote_url, $rev, $branch, $git_base_dir) = @{JSON::PP::decode_json($d)};
283
1
305
        my $dir = $git_base_dir eq '.' ? '%SRCROOT%' : "DIR_$provenanceStringToIndex{$d}";
284
1
1
        my $mappedTo = {
285            "uriBaseId" => $dir
286        };
287
1
3
        my $versionControlProvenance = {
288            "mappedTo" => $mappedTo
289        };
290
1
1
        $versionControlProvenance->{"revisionId"} = $rev if defined $rev;
291
1
2
        $versionControlProvenance->{"branch"} = $branch if defined $branch;
292
1
2
        $versionControlProvenance->{"repositoryUri"} = $remote_url if defined $remote_url;
293
1
1
        return $versionControlProvenance;
294    }
295
1
1
    @provenanceList = map(buildVersionControlProvenance,@$versionControlProvenanceList);
296
1
1
    $run->{"versionControlProvenance"} = \@provenanceList;
297}
298
299my $provenanceInsertion = 0;
300my %provenanceStringToIndex = ();
301my %directoryToProvenanceInsertion = ();
302
303sub main {
304
1
22190
    my ($sarif_template_file, $sarif_template_overlay_file, $category) = @_;
305
1
7
    unless (-f $sarif_template_file) {
306
0
0
        warn "Could not find sarif template";
307
0
0
        return '';
308    }
309
310
1
2
    $ENV{GITHUB_SERVER_URL} = '' unless defined $ENV{GITHUB_SERVER_URL};
311
1
4
    $ENV{GITHUB_REPOSITORY} = '' unless defined $ENV{GITHUB_REPOSITORY};
312
1
5
    my $sarif_template = CheckSpelling::Util::read_file $sarif_template_file;
313
1
1
    die "sarif template is empty" unless $sarif_template;
314
315
1
0
9
0
    my $json = JSON::PP->new->utf8->pretty->sort_by(sub { $JSON::PP::a cmp $JSON::PP::b });
316
1
76
    my $sarif_json = $json->decode($sarif_template);
317
318
1
124775
    if (defined $sarif_template_overlay_file && -s $sarif_template_overlay_file) {
319
1
8
        my $merger = Hash::Merge->new();
320
1
118
        my $merge_behaviors = $merger->{'behaviors'}->{$merger->get_behavior()};
321
1
6
        my $merge_arrays = $merge_behaviors->{'ARRAY'}->{'ARRAY'};
322
323        $merge_behaviors->{'ARRAY'}->{'ARRAY'} = sub {
324
41
4180
            return $merge_arrays->(@_) if ref($_[0][0]).ref($_[1][0]);
325
41
41
22
46
            return [@{$_[1]}];
326
1
2
        };
327
328
1
2
        my $sarif_template_overlay = CheckSpelling::Util::read_file $sarif_template_overlay_file;
329
1
2
        my %runs_base = get_runs_from_sarif($sarif_json);
330
331
1
1
        my $sarif_template_hash = $json->decode($sarif_template_overlay);
332
1
1792
        my %runs_overlay = get_runs_from_sarif($sarif_template_hash);
333
1
2
        for my $run_id (keys %runs_overlay) {
334
1
1
            if (defined $runs_base{$run_id}) {
335
1
0
                my $run_base_hash = $runs_base{$run_id};
336
1
4
                my $run_overlay_hash = $runs_overlay{$run_id};
337
1
1
                for my $overlay_id (keys %$run_overlay_hash) {
338                    $run_base_hash->{$overlay_id} = $merger->merge(
339                        $run_overlay_hash->{$overlay_id},
340
1
3
                        $run_base_hash->{$overlay_id}
341                    );
342                }
343            } else {
344
0
0
                $runs_base{$run_id} = $runs_overlay{$run_id};
345            }
346        }
347        #$sarif_json->
348
1
1
42
1
        my @sarif_json_runs = @{$sarif_json->{'runs'}};
349
1
0
        foreach my $sarif_json_run (@sarif_json_runs) {
350
1
1
1
1
            my %sarif_json_run_hash=%{$sarif_json_run};
351
1
1
            next unless defined $sarif_json_run_hash{'tool'};
352
353
1
1
1
7
            my %sarif_json_run_tool_hash = %{$sarif_json_run_hash{'tool'}};
354
1
1
            next unless defined $sarif_json_run_tool_hash{'driver'};
355
356
1
1
1
2
            my %sarif_json_run_tool_driver_hash = %{$sarif_json_run_tool_hash{'driver'}};
357
1
1
            my $driver_name = $sarif_json_run_tool_driver_hash{'name'};
358            next unless defined $driver_name &&
359
1
4
                defined $sarif_json_run_tool_driver_hash{'rules'};
360
361
1
0
            my $driver_view_hash = $runs_base{$driver_name};
362
1
1
            next unless defined $driver_view_hash;
363
364
1
1
0
6
            my @sarif_json_run_tool_driver_rules = @{$sarif_json_run_tool_driver_hash{'rules'}};
365
1
1
            for my $driver_rule_number (0 .. scalar @sarif_json_run_tool_driver_rules) {
366
45
3192
                my $driver_rule = $sarif_json_run_tool_driver_rules[$driver_rule_number];
367
45
21
                my $driver_rule_id = $driver_rule->{'id'};
368                next unless defined $driver_rule_id &&
369
45
69
                    defined $driver_view_hash->{$driver_rule_id};
370
44
32
                $sarif_json_run_tool_driver_hash{'rules'}[$driver_rule_number] = $merger->merge($driver_view_hash->{$driver_rule_id}, $driver_rule);
371            }
372        }
373
1
1
        delete $sarif_template_hash->{'runs'};
374
1
1
        $sarif_json = $merger->merge($sarif_json, $sarif_template_hash);
375    }
376    {
377
1
1
1
631
1
1
        my @sarif_json_runs = @{$sarif_json->{'runs'}};
378
1
1
        foreach my $sarif_json_run (@sarif_json_runs) {
379
1
0
            my %sarif_json_run_automationDetails;
380
1
1
            $sarif_json_run_automationDetails{id} = $category;
381
1
1
            $sarif_json_run->{'automationDetails'} = \%sarif_json_run_automationDetails;
382        }
383    }
384
385
1
1
0
2
    my %sarif = %{$sarif_json};
386
387
1
1
    $sarif{'runs'}[0]{'tool'}{'driver'}{'version'} = $ENV{CHECK_SPELLING_VERSION};
388
389
1
42
    my $results = parse_warnings $ENV{warning_output};
390
1
1
    if ($results) {
391
1
1
        $sarif{'runs'}[0]{'results'} = $results;
392
1
1
        our %provenanceStringToIndex;
393
1
1
        my @provenanceList = keys %provenanceStringToIndex;
394
1
2
        generateVersionControlProvenance(\@provenanceList, $sarif{'runs'}[0]);
395
1
1
        my %codes;
396
1
2
        for my $result_ref (@$results) {
397
9
9
6
8
            my %result = %{$result_ref};
398
9
5
            $codes{$result{'ruleId'}} = 1;
399        }
400
1
1
        my $rules_ref = $sarif{'runs'}[0]{'tool'}{'driver'}{'rules'};
401
1
1
1
7
        my @rules = @{$rules_ref};
402
1
2
        my $missing_rule_definition_id = 'missing-rule-definition';
403
1
44
1
23
        my ($missing_rule_definition_ref) = grep { $_->{'id'} eq $missing_rule_definition_id } @rules;
404
1
44
1
23
        @rules = grep { defined $codes{$_->{'id'}} } @rules;
405
1
0
        my $code_index = 0;
406
1
2
1
2
        my %defined_codes = map { $_->{'id'} => $code_index++ } @rules;
407
1
3
1
2
        my @missing_codes = grep { !defined $defined_codes{$_}} keys %codes;
408
1
1
        my $missing_rule_definition_index;
409
1
1
        if (@missing_codes) {
410
1
1
            push @rules, $missing_rule_definition_ref;
411
1
0
            $missing_rule_definition_index = $defined_codes{$missing_rule_definition_id} = $code_index++;
412
1
28
            my $spellchecker = $ENV{spellchecker} || dirname(dirname(dirname(__FILE__)));
413
1
1
            my %hashes_needed_for_files = ();
414
1
0
            my %line_hashes = ();
415
1
1
            my %used_hashes = ();
416
1
0
            our %directoryToRepo;
417
1
1
            for my $missing_code (@missing_codes) {
418
1
1
                my $message = "No rule definition for `$missing_code`";
419
1
9856
                my $code_locations = `find '$spellchecker' -name '.git*' -prune -o \\( -name '*.sh' -o -name '*.pl' -o -name '*.pm' \\) -type f -print0|xargs -0 grep -n '$missing_code' | perl -pe 's<^\./><>'`;
420
1
7
                my @locations;
421
1
9
                for my $line (split /\n/, $code_locations) {
422
1
4
                    chomp $line;
423
1
12
                    my ($file, $lineno, $code) = $line =~ /^(.+?):(\d+):(.+)$/;
424
1
7
                    next unless defined $file;
425
1
27
                    $code =~ /^(.*?)\b$missing_code\b/;
426
1
5
                    my $startColumn = length($1) + 1;
427
1
3
                    my $endColumn = length($1) + length($missing_code) + 1;
428
1
10
                    my $location = {
429                        'uri' => url_encode($file),
430                        'startLine' => $lineno,
431                        'startColumn' => $startColumn,
432                        'endColumn' => $endColumn,
433                    };
434
1
94
                    my $relative = File::Spec->abs2rel($file, $spellchecker);
435
1
19
                    print STDERR "::notice title=${missing_rule_definition_id}::$relative:$lineno:$startColumn ... $endColumn, Notice - $message ($missing_rule_definition_id)\n";
436
1
4
                    push @locations, $location;
437
1
2
                    my $encoded_file = url_encode $file;
438
1
6
                    $encoded_files{$encoded_file} = $file;
439
1
2
                    addToHashesNeededForFiles($file, $lineno, $startColumn, $message, \%hashes_needed_for_files);
440                }
441
1
4
                hashFiles(\%hashes_needed_for_files, \%line_hashes, \%directoryToRepo, \%used_hashes);
442
1
2
                my $fingerprintResults = fingerprintLocations(\@locations, \%encoded_files, \%encoded_files, \%line_hashes, $message, Digest::SHA::sha1_base64($message));
443
1
1
0
2
                my @locations_json = @{$fingerprintResults->{locations_json}};
444
1
1
0
1
                my @fingerprints = @{$fingerprintResults->{fingerprints}};
445
1
2
                my $locations_json_flat = join ',', @locations_json;
446
1
1
                my $partialFingerprints = '';
447
1
3
                my $locations = $locations_json_flat ? qq<, "locations": [ $locations_json_flat ]> : '';
448
1
2
                my $result_json = qq<{"ruleId": "$missing_rule_definition_id", $partialFingerprints "message": { "text": "$message" }$locations }>;
449
1
8
                my $result = decode_json $result_json;
450
1
1
1009
6
                push @{$results}, $result;
451            }
452        }
453
1
1
        $sarif{'runs'}[0]{'tool'}{'driver'}{'rules'} = \@rules;
454
1
1
1
1
        for my $result_index (0 .. scalar @{$results}) {
455
11
6
            my $result = $results->[$result_index];
456
11
11
            my $ruleId = $result->{'ruleId'};
457
11
51
            next if defined $ruleId && defined $defined_codes{$ruleId};
458
2
98
            $result->{'ruleId'} = $missing_rule_definition_id;
459        }
460    }
461
462
1
9
    return JSON::PP->new->canonical([1])->utf8->encode(\%sarif);
463}
464
4651;