File Coverage

File:lib/CheckSpelling/Sarif.pm
Coverage:86.4%

linestmtbrancondsubtimecode
1#! -*-perl-*-
2
3package CheckSpelling::Sarif;
4
5our $VERSION='0.1.0';
6our $flatten=0;
7
8
1
1
1
110510
0
2
use utf8;
9
10
1
1
1
15
1
25
use File::Basename;
11
1
1
1
1
1
15
use File::Spec;
12
1
1
1
177
1231
60
use Digest::SHA qw($errmsg);
13
1
1
1
2
0
23
use JSON::PP;
14
1
1
1
190
3659
25
use Hash::Merge qw( merge );
15
1
1
1
167
4
16
use CheckSpelling::Util;
16
1
1
1
155
0
1915
use CheckSpelling::GitSources;
17
18sub encode_low_ascii {
19
13
185
    $_ = shift;
20
13
1
23
4
    s/([\x{0}-\x{9}\x{0b}\x{1f}#%])/"\\u".sprintf("%04x",ord($1))/eg;
21
13
11
    return $_;
22}
23
24sub url_encode {
25
11
12
    $_ = shift;
26
11
0
31
0
    s<([^-!\$&'()*+,/:;=?\@A-Za-z0-9_.~])><"%".sprintf("%02x",ord($1))>eg;
27
11
15
    return $_;
28}
29
30sub double_slash_escape {
31
12
7
    $_ = shift;
32
12
58
    s/(["()\[\]\\])/\\\\$1/g;
33
12
14
    return $_;
34}
35
36sub fingerprintLocations {
37
7
7
    my ($locations, $encoded_files_ref, $line_hashes_ref, $hashes_needed_for_files_ref, $message, $hashed_message) = @_;
38
7
3
    my @locations_json = ();
39
7
5
    my @fingerprints = ();
40
7
4
    for my $location (@$locations) {
41
10
5
        my $encoded_file = $location->{uri};
42
10
6
        my $line = $location->{startLine};
43
10
5
        my $column = $location->{startColumn};
44
10
6
        my $endColumn = $location->{endColumn};
45
10
7
        my $partialFingerprint = '';
46
10
4
        my $file = $encoded_files_ref->{$encoded_file};
47
10
8
        if (defined $line_hashes_ref->{$file}) {
48
8
8
            my $line_hash = $line_hashes_ref->{$file}{$line};
49
8
6
            if (defined $line_hash) {
50
4
4
2
7
                my @instances = sort keys %{$hashes_needed_for_files_ref->{$file}{$line}{$hashed_message}};
51
4
0
                my $hit = scalar @instances;
52
4
5
                while (--$hit > 0) {
53
0
0
                    last if $instances[$hit] == $column;
54                }
55
4
9
                $partialFingerprint = Digest::SHA::sha1_base64("$line_hash:$message:$hit");
56            }
57        }
58
10
8
        push @fingerprints, $partialFingerprint;
59
10
7
        my $startColumn = $column ? qq<, "startColumn": $column> : '';
60
10
8
        $endColumn = $endColumn ? qq<, "endColumn": $endColumn> : '';
61
10
4
        $line = 1 unless $line;
62
10
9
        my $json_fragment = qq<{ "physicalLocation": { "artifactLocation": { "uri": "$encoded_file", "uriBaseId": "%SRCROOT%" }, "region": { "startLine": $line$startColumn$endColumn } } }>;
63
10
9
        push @locations_json, $json_fragment;
64    }
65
7
11
    return { locations_json => \@locations_json, fingerprints => \@fingerprints };
66}
67
68sub hashFiles {
69
2
3
    my ($hashes_needed_for_files_ref, $line_hashes_ref, $directoryToRepo_ref, $used_hashes_ref) = @_;
70
2
6
    for my $file (sort keys %$hashes_needed_for_files_ref) {
71
4
4
        $line_hashes_ref->{$file} = ();
72
4
19
        unless (-e $file) {
73
2
4
            delete $hashes_needed_for_files_ref->{$file};
74
2
1
            next;
75        }
76
2
2
1
5
        my @lines = sort (keys %{$hashes_needed_for_files_ref->{$file}});
77
2
38
        unless (defined $directoryToRepo_ref->{dirname($file)}) {
78
2
7
            my ($parsed_file, $git_base_dir, $prefix, $remote_url, $rev, $branch) = CheckSpelling::GitSources::git_source_and_rev($file);
79        }
80
2
30
        open $file_fh, '<', $file;
81
2
1
        my $line = shift @lines;
82
2
3
        $line = 2 if $line == 1;
83
2
3
        my $buffer = '';
84
2
9
        while (<$file_fh>) {
85
10
11
            if ($line == $.) {
86
5
3
                my $sample = substr $buffer, -100, 100;
87
5
7
                my $hash = Digest::SHA::sha1_base64($sample);
88
5
16
                for (; $line == $.; $line = shift @lines) {
89
6
7
                    my $hit = $used_hashes_ref->{$hash}++;
90
6
4
                    $hash = "$hash:$hit" if $hit;
91
6
7
                    $line_hashes_ref->{$file}{$line} = $hash;
92
6
7
                    last unless @lines;
93                }
94            }
95
10
4
            $buffer .= $_;
96
10
23
            $buffer =~ s/\s+/ /g;
97
10
15
            $buffer = substr $buffer, -100, 100;
98        }
99
2
7
        close $file_fh;
100    }
101}
102
103sub addToHashesNeededForFiles {
104
10
7
    my ($file, $line, $column, $message, $hashes_needed_for_files_ref) = @_;
105
10
27
    my $hashed_message = Digest::SHA::sha1_base64($message);
106
10
11
    $hashes_needed_for_files_ref->{$file} = () unless defined $hashes_needed_for_files_ref->{$file};
107
10
21
    $hashes_needed_for_files_ref->{$file}{$line} = () unless defined $hashes_needed_for_files_ref->{$file}{$line};
108
10
20
    $hashes_needed_for_files_ref->{$file}{$line}{$hashed_message} = () unless defined $hashes_needed_for_files_ref->{$file}{$line}{$hashed_message};
109
10
18
    $hashes_needed_for_files_ref->{$file}{$line}{$hashed_message}{$column} = '1';
110}
111
112sub encode_message {
113
12
11
    my ($message) = @_;
114
115    # single-slash-escape `"` and `\`
116
12
21
    $message =~ s/(["\\])/\\$1/g;
117    # escape '*'/`_` for markdown
118    # encode `message` to protect against low ascii`
119
12
13
    $message = encode_low_ascii $message;
120    # double-slash-escape `"`, `(`, `)`, `]`
121
12
13
    $message = double_slash_escape $message;
122
12
7
    my $doubled_slash = '\\\\';
123
12
20
    $message =~ s/([*])(.*?)([*])/$doubled_slash$1$2$doubled_slash$3/g;
124
12
7
    $message =~ s/([_])(.*?)([_])/$doubled_slash$1$2$doubled_slash$3/g;
125    # hack to make the first `...` identifier a link (that goes nowhere, but is probably blue and underlined) in GitHub's SARIF view
126
12
19
    if ($message =~ /(`{2,})/) {
127
2
5
        my $backticks = $1;
128
2
30
        while ($message =~ /($backticks`+)(?=[`].*?\g{-1})/gs) {
129
0
0
            $backticks = $1 if length($1) > length($backticks);
130        }
131
2
21
        $message =~ s/(^|[^\\])$backticks(.+?)$backticks/${1}[${2}](#security-tab)/;
132    } else {
133
10
32
        $message =~ s/(^|[^\\])\`((?:[^`\\]|\\(?!`))+)\`/${1}[${2}](#security-tab)/;
134    }
135
136    # replace '`' with `\`+`'` because GitHub's SARIF parser doesn't like them
137
12
16
    $message =~ s/\`/'/g;
138
12
16
    return $message;
139}
140
141sub parse_warnings {
142
1
2
    my ($warnings) = @_;
143
1
0
    our $flatten;
144
1
0
    our %directoryToRepo;
145
1
1
    our $provenanceInsertion;
146
1
0
    our %provenanceStringToIndex;
147
1
1
    our %directoryToProvenanceInsertion;
148
1
0
    my @results;
149
1
12
    unless (open WARNINGS, '<', $warnings) {
150
0
0
        print STDERR "Could not open $warnings\n";
151
0
0
        return [];
152    }
153
1
2
    my $rules = ();
154
1
1
    my %encoded_files = ();
155
1
2
    my %hashes_needed_for_files = ();
156
1
33
    while (<WARNINGS>) {
157
11
13
        next if m{^https://};
158
10
37
        next unless m{^(.+):(\d+):(\d+) \.\.\. (\d+),\s(Error|Warning|Notice)\s-\s(.+\s\((.+)\))$};
159
9
27
        my ($file, $line, $column, $endColumn, $severity, $message, $code) = ($1, $2, $3, $4, $5, $6, $7);
160
9
99
        my $directory = dirname($file);
161
9
8
        unless (defined $directoryToProvenanceInsertion{$directory}) {
162
2
3
            my $provenanceString = collectVersionControlProvenance($file);
163
2
171
            if ($provenanceString) {
164
2
1
                if (defined $provenanceStringToIndex{$provenanceString}) {
165
0
0
                    $directoryToProvenanceInsertion{$directory} = $provenanceStringToIndex{$provenanceString};
166                } else {
167
2
4
                    $provenanceStringToIndex{$provenanceString} = $provenanceInsertion;
168
2
3
                    $directoryToProvenanceInsertion{$directory} = $provenanceInsertion;
169
2
0
                    ++$provenanceInsertion;
170                }
171            }
172        }
173        # encode `file` to protect against low ascii`
174
9
6
        my $encoded_file = url_encode $file;
175
9
5
        $encoded_files{$encoded_file} = $file;
176
177
9
8
        $message = encode_message($message);
178
179
9
6
        unless (defined $rules->{$code}) {
180
3
6
            $rules->{$code} = {};
181        }
182
9
7
        my $rule = $rules->{$code};
183
9
4
        unless (defined $rule->{$message}) {
184
6
16
            $rule->{$message} = [];
185        }
186
9
9
        addToHashesNeededForFiles($file, $line, $column, $message, \%hashes_needed_for_files);
187
9
6
        my $locations = $rule->{$message};
188
9
14
        my $physicalLocation = {
189            'uri' => $encoded_file,
190            'startLine' => $line,
191            'startColumn' => $column,
192            'endColumn' => $endColumn,
193        };
194
9
6
        push @$locations, $physicalLocation;
195
9
24
        $rule->{$message} = $locations;
196    }
197
1
1
    my %line_hashes = ();
198
1
1
    my %used_hashes = ();
199
1
2
    hashFiles(\%hashes_needed_for_files, \%line_hashes, \%directoryToRepo, \%used_hashes);
200
1
1
2
2
    for my $code (sort keys %{$rules}) {
201
3
3
        my $rule = $rules->{$code};
202
3
3
0
5
        for my $message (sort keys %{$rule}) {
203
6
12
            my $hashed_message = Digest::SHA::sha1_base64($message);
204
6
2
            my $locations = $rule->{$message};
205
6
7
            my $fingerprintResults = fingerprintLocations($locations, \%encoded_files, \%line_hashes, \%hashes_needed_for_files, $message, $hashed_message);
206
6
6
4
4
            my @locations_json = @{$fingerprintResults->{locations_json}};
207
6
6
3
4
            my @fingerprints = @{$fingerprintResults->{fingerprints}};
208
6
3
            if ($flatten) {
209
0
0
                my $locations_json_flat = join ',', @locations_json;
210
0
0
                my $partialFingerprints;
211
0
0
                my $partialFingerprint = (sort @fingerprints)[0];
212
0
0
                if ($partialFingerprint ne '') {
213
0
0
                    $partialFingerprints = qq<"partialFingerprints": { "cs0" : "$partialFingerprint" },>;
214                }
215
0
0
                $message =~ s/\s\\\\\([^()]+?\\\)$//g;
216
0
0
                my $result_json = qq<{"ruleId": "$code", $partialFingerprints "message": { "text": "$message" }, "locations": [ $locations_json_flat ] }>;
217
0
0
                my $result = decode_json $result_json;
218
0
0
                push @results, $result;
219            } else {
220
6
4
                my $limit = scalar @locations_json;
221
6
4
                for (my $i = 0; $i < $limit; ++$i) {
222
9
8
                    my $locations_json_flat = $locations_json[$i];
223
9
1
                    my $partialFingerprints = '';
224
9
8
                    my $partialFingerprint = $fingerprints[$i];
225
9
4
                    if ($partialFingerprint ne '') {
226
4
3
                        $partialFingerprints = qq<"partialFingerprints": { "cs0" : "$partialFingerprint" },>;
227                    }
228
9
21
                    $message =~ s/\s\\\\\([^()]+?\\\)$//g;
229
9
8
                    my $result_json = qq<{"ruleId": "$code", $partialFingerprints "message": { "text": "$message" }, "locations": [ $locations_json_flat ] }>;
230
9
3
                    my $result = eval {
231
9
10
                        decode_json $result_json;
232                    };
233
9
9849
                    if ($@) {
234
0
0
                        print STDERR "$@";
235
0
0
                        print STDERR "$result_json\n";
236                    } else {
237
9
19
                        push @results, $result;
238                    }
239                }
240            }
241        }
242    }
243
1
4
    close WARNINGS;
244
1
11
    return \@results;
245}
246
247sub get_runs_from_sarif {
248
2
16
    my ($sarif_json) = @_;
249
2
2
    my %runs_view;
250
2
3
    return %runs_view unless $sarif_json->{'runs'};
251
2
2
1
3
    my @sarif_json_runs=@{$sarif_json->{'runs'}};
252
2
2
    foreach my $sarif_json_run (@sarif_json_runs) {
253
2
2
1
4
        my %sarif_json_run_hash=%{$sarif_json_run};
254
2
2
        next unless defined $sarif_json_run_hash{'tool'};
255
256
2
2
1
2
        my %sarif_json_run_tool_hash = %{$sarif_json_run_hash{'tool'}};
257
2
2
        next unless defined $sarif_json_run_tool_hash{'driver'};
258
259
2
2
2
3
        my %sarif_json_run_tool_driver_hash = %{$sarif_json_run_tool_hash{'driver'}};
260        next unless defined $sarif_json_run_tool_driver_hash{'name'} &&
261
2
8
            defined $sarif_json_run_tool_driver_hash{'rules'};
262
263
2
1
        my $driver_name = $sarif_json_run_tool_driver_hash{'name'};
264
2
2
0
4
        my @sarif_json_run_tool_driver_rules = @{$sarif_json_run_tool_driver_hash{'rules'}};
265
2
1
        my %driver_view;
266
2
2
        for my $driver_rule (@sarif_json_run_tool_driver_rules) {
267
46
25
            next unless defined $driver_rule->{'id'};
268
46
67
            $driver_view{$driver_rule->{'id'}} = $driver_rule;
269        }
270
2
5
        $runs_view{$sarif_json_run_tool_driver_hash{'name'}} = \%driver_view;
271    }
272
2
2
    return %runs_view;
273}
274
275sub collectVersionControlProvenance {
276
2
2
    my ($file) = @_;
277
2
4
    my ($parsed_file, $git_base_dir, $prefix, $remote_url, $rev, $branch) = CheckSpelling::GitSources::git_source_and_rev($file);
278
2
4
    return '' unless $remote_url;
279
2
2
    my $base = substr $parsed_file, 0, length($file);
280
2
3
    my $provenance = [$remote_url, $rev, $branch, $git_base_dir];
281
2
6
    return JSON::PP::encode_json($provenance);
282}
283
284sub generateVersionControlProvenance {
285
1
2
    my ($versionControlProvenanceList, $run) = @_;
286
1
0
    my %provenance;
287    sub buildVersionControlProvenance {
288
1
0
        my $d = $_;
289
1
1
1
1
        my ($remote_url, $rev, $branch, $git_base_dir) = @{JSON::PP::decode_json($d)};
290
1
372
        my $dir = $git_base_dir eq '.' ? '%SRCROOT%' : "DIR_$provenanceStringToIndex{$d}";
291
1
2
        my $mappedTo = {
292            "uriBaseId" => $dir
293        };
294
1
2
        my $versionControlProvenance = {
295            "mappedTo" => $mappedTo
296        };
297
1
1
        $versionControlProvenance->{"revisionId"} = $rev if defined $rev;
298
1
3
        $versionControlProvenance->{"branch"} = $branch if defined $branch;
299
1
1
        $versionControlProvenance->{"repositoryUri"} = $remote_url if defined $remote_url;
300
1
1
        return $versionControlProvenance;
301    }
302
1
3
    @provenanceList = map(buildVersionControlProvenance,@$versionControlProvenanceList);
303
1
2
    $run->{"versionControlProvenance"} = \@provenanceList;
304}
305
306my $provenanceInsertion = 0;
307my %provenanceStringToIndex = ();
308my %directoryToProvenanceInsertion = ();
309
310sub main {
311
1
20003
    my ($sarif_template_file, $sarif_template_overlay_file, $category) = @_;
312
1
6
    unless (-f $sarif_template_file) {
313
0
0
        warn "Could not find sarif template";
314
0
0
        return '';
315    }
316
317
1
3
    $ENV{GITHUB_SERVER_URL} = '' unless defined $ENV{GITHUB_SERVER_URL};
318
1
4
    $ENV{GITHUB_REPOSITORY} = '' unless defined $ENV{GITHUB_REPOSITORY};
319
1
5
    my $sarif_template = CheckSpelling::Util::read_file $sarif_template_file;
320
1
2
    die "sarif template is empty" unless $sarif_template;
321
322
1
0
8
0
    my $json = JSON::PP->new->utf8->pretty->sort_by(sub { $JSON::PP::a cmp $JSON::PP::b });
323
1
67
    my $sarif_json = $json->decode($sarif_template);
324
325
1
129456
    if (defined $sarif_template_overlay_file && -s $sarif_template_overlay_file) {
326
1
9
        my $merger = Hash::Merge->new();
327
1
112
        my $merge_behaviors = $merger->{'behaviors'}->{$merger->get_behavior()};
328
1
5
        my $merge_arrays = $merge_behaviors->{'ARRAY'}->{'ARRAY'};
329
330        $merge_behaviors->{'ARRAY'}->{'ARRAY'} = sub {
331
42
4211
            return $merge_arrays->(@_) if ref($_[0][0]).ref($_[1][0]);
332
42
42
13
48
            return [@{$_[1]}];
333
1
2
        };
334
335
1
2
        my $sarif_template_overlay = CheckSpelling::Util::read_file $sarif_template_overlay_file;
336
1
5
        my %runs_base = get_runs_from_sarif($sarif_json);
337
338
1
3
        my $sarif_template_hash = $json->decode($sarif_template_overlay);
339
1
1795
        my %runs_overlay = get_runs_from_sarif($sarif_template_hash);
340
1
2
        for my $run_id (keys %runs_overlay) {
341
1
2
            if (defined $runs_base{$run_id}) {
342
1
1
                my $run_base_hash = $runs_base{$run_id};
343
1
0
                my $run_overlay_hash = $runs_overlay{$run_id};
344
1
2
                for my $overlay_id (keys %$run_overlay_hash) {
345                    $run_base_hash->{$overlay_id} = $merger->merge(
346                        $run_overlay_hash->{$overlay_id},
347
1
4
                        $run_base_hash->{$overlay_id}
348                    );
349                }
350            } else {
351
0
0
                $runs_base{$run_id} = $runs_overlay{$run_id};
352            }
353        }
354        #$sarif_json->
355
1
1
21
1
        my @sarif_json_runs = @{$sarif_json->{'runs'}};
356
1
1
        foreach my $sarif_json_run (@sarif_json_runs) {
357
1
1
1
1
            my %sarif_json_run_hash=%{$sarif_json_run};
358
1
1
            next unless defined $sarif_json_run_hash{'tool'};
359
360
1
1
1
11
            my %sarif_json_run_tool_hash = %{$sarif_json_run_hash{'tool'}};
361
1
2
            next unless defined $sarif_json_run_tool_hash{'driver'};
362
363
1
1
0
3
            my %sarif_json_run_tool_driver_hash = %{$sarif_json_run_tool_hash{'driver'}};
364
1
1
            my $driver_name = $sarif_json_run_tool_driver_hash{'name'};
365            next unless defined $driver_name &&
366
1
4
                defined $sarif_json_run_tool_driver_hash{'rules'};
367
368
1
1
            my $driver_view_hash = $runs_base{$driver_name};
369
1
5
            next unless defined $driver_view_hash;
370
371
1
1
0
4
            my @sarif_json_run_tool_driver_rules = @{$sarif_json_run_tool_driver_hash{'rules'}};
372
1
1
            for my $driver_rule_number (0 .. scalar @sarif_json_run_tool_driver_rules) {
373
46
3475
                my $driver_rule = $sarif_json_run_tool_driver_rules[$driver_rule_number];
374
46
21
                my $driver_rule_id = $driver_rule->{'id'};
375                next unless defined $driver_rule_id &&
376
46
64
                    defined $driver_view_hash->{$driver_rule_id};
377
45
31
                $sarif_json_run_tool_driver_hash{'rules'}[$driver_rule_number] = $merger->merge($driver_view_hash->{$driver_rule_id}, $driver_rule);
378            }
379        }
380
1
2
        delete $sarif_template_hash->{'runs'};
381
1
4
        $sarif_json = $merger->merge($sarif_json, $sarif_template_hash);
382    }
383    {
384
1
1
1
622
0
2
        my @sarif_json_runs = @{$sarif_json->{'runs'}};
385
1
1
        foreach my $sarif_json_run (@sarif_json_runs) {
386
1
0
            my %sarif_json_run_automationDetails;
387
1
1
            $sarif_json_run_automationDetails{id} = $category;
388
1
2
            $sarif_json_run->{'automationDetails'} = \%sarif_json_run_automationDetails;
389        }
390    }
391
392
1
1
0
2
    my %sarif = %{$sarif_json};
393
394
1
2
    $sarif{'runs'}[0]{'tool'}{'driver'}{'version'} = $ENV{CHECK_SPELLING_VERSION};
395
396
1
3
    my $results = parse_warnings $ENV{warning_output};
397
1
1
    if ($results) {
398
1
1
        $sarif{'runs'}[0]{'results'} = $results;
399
1
1
        our %provenanceStringToIndex;
400
1
1
        my @provenanceList = keys %provenanceStringToIndex;
401
1
2
        generateVersionControlProvenance(\@provenanceList, $sarif{'runs'}[0]);
402
1
1
        my %codes;
403
1
2
        for my $result_ref (@$results) {
404
9
9
4
8
            my %result = %{$result_ref};
405
9
8
            $codes{$result{'ruleId'}} = 1;
406        }
407
1
1
        my $rules_ref = $sarif{'runs'}[0]{'tool'}{'driver'}{'rules'};
408
1
1
1
5
        my @rules = @{$rules_ref};
409
1
1
        my $missing_rule_definition_id = 'missing-rule-definition';
410
1
45
1
25
        my ($missing_rule_definition_ref) = grep { $_->{'id'} eq $missing_rule_definition_id } @rules;
411
1
45
1
23
        @rules = grep { defined $codes{$_->{'id'}} } @rules;
412
1
1
        my $code_index = 0;
413
1
2
1
2
        my %defined_codes = map { $_->{'id'} => $code_index++ } @rules;
414
1
3
2
2
        my @missing_codes = grep { !defined $defined_codes{$_}} keys %codes;
415
1
1
        my $missing_rule_definition_index;
416
1
1
        if (@missing_codes) {
417
1
0
            push @rules, $missing_rule_definition_ref;
418
1
2
            $missing_rule_definition_index = $defined_codes{$missing_rule_definition_id} = $code_index++;
419
1
30
            my $spellchecker = $ENV{spellchecker} || dirname(dirname(dirname(__FILE__)));
420
1
2
            my %hashes_needed_for_files = ();
421
1
0
            my %line_hashes = ();
422
1
1
            my %used_hashes = ();
423
1
1
            our %directoryToRepo;
424
1
1
            for my $missing_code (@missing_codes) {
425
1
1
                my $message = "No rule definition for `$missing_code`";
426
1
9003
                my $code_locations = `find '$spellchecker' -name '.git*' -prune -o \\( -name '*.sh' -o -name '*.pl' -o -name '*.pm' \\) -type f -print0|xargs -0 grep -n '$missing_code' | perl -pe 's<^\./><>'`;
427
1
5
                my @locations;
428
1
7
                for my $line (split /\n/, $code_locations) {
429
1
2
                    chomp $line;
430
1
12
                    my ($file, $lineno, $code) = $line =~ /^(.+?):(\d+):(.+)$/;
431
1
4
                    next unless defined $file;
432
1
29
                    $code =~ /^(.*?)\b$missing_code\b/;
433
1
3
                    my $startColumn = length($1) + 1;
434
1
2
                    my $endColumn = length($1) + length($missing_code) + 1;
435
1
5
                    my $location = {
436                        'uri' => url_encode($file),
437                        'startLine' => $lineno,
438                        'startColumn' => $startColumn,
439                        'endColumn' => $endColumn,
440                    };
441
1
103
                    my $relative = File::Spec->abs2rel($file, $spellchecker);
442
1
23
                    print STDERR "::notice title=${missing_rule_definition_id}::$relative:$lineno:$startColumn ... $endColumn, Notice - $message ($missing_rule_definition_id)\n";
443
1
2
                    push @locations, $location;
444
1
3
                    my $encoded_file = url_encode $file;
445
1
3
                    $encoded_files{$encoded_file} = $file;
446
1
4
                    addToHashesNeededForFiles($file, $lineno, $startColumn, $message, \%hashes_needed_for_files);
447                }
448
1
3
                hashFiles(\%hashes_needed_for_files, \%line_hashes, \%directoryToRepo, \%used_hashes);
449
1
4
                my $fingerprintResults = fingerprintLocations(\@locations, \%encoded_files, \%encoded_files, \%line_hashes, $message, Digest::SHA::sha1_base64($message));
450
1
1
1
2
                my @locations_json = @{$fingerprintResults->{locations_json}};
451
1
1
1
3
                my @fingerprints = @{$fingerprintResults->{fingerprints}};
452
1
2
                my $locations_json_flat = join ',', @locations_json;
453
1
1
                my $partialFingerprints = '';
454
1
2
                my $locations = $locations_json_flat ? qq<, "locations": [ $locations_json_flat ]> : '';
455
1
2
                my $result_json = qq<{"ruleId": "$missing_rule_definition_id", $partialFingerprints "message": { "text": "$message" }$locations }>;
456
1
5
                my $result = decode_json $result_json;
457
1
1
1040
6
                push @{$results}, $result;
458            }
459        }
460
1
1
        $sarif{'runs'}[0]{'tool'}{'driver'}{'rules'} = \@rules;
461
1
1
1
2
        for my $result_index (0 .. scalar @{$results}) {
462
11
7
            my $result = $results->[$result_index];
463
11
7
            my $ruleId = $result->{'ruleId'};
464
11
17
            next if defined $ruleId && defined $defined_codes{$ruleId};
465
2
86
            $result->{'ruleId'} = $missing_rule_definition_id;
466        }
467    }
468
469
1
8
    return JSON::PP->new->canonical([1])->utf8->encode(\%sarif);
470}
471
4721;