File Coverage

File:lib/CheckSpelling/Sarif.pm
Coverage:86.8%

linestmtbrancondsubtimecode
1#! -*-perl-*-
2
3package CheckSpelling::Sarif;
4
5our $VERSION='0.1.0';
6our $flatten=0;
7
8
1
1
1
110994
1
2
use utf8;
9
10
1
1
1
13
1
22
use File::Basename;
11
1
1
1
3
1
15
use File::Spec;
12
1
1
1
424
1287
62
use Digest::SHA qw($errmsg);
13
1
1
1
2
1
23
use JSON::PP;
14
1
1
1
179
3814
27
use Hash::Merge qw( merge );
15
1
1
1
170
3
17
use CheckSpelling::Util;
16
1
1
1
184
1
1876
use CheckSpelling::GitSources;
17
18sub encode_low_ascii {
19
12
176
    $_ = shift;
20
12
1
11
3
    s/([\x{0}-\x{9}\x{0b}\x{1f}#%])/"\\u".sprintf("%04x",ord($1))/eg;
21
12
11
    return $_;
22}
23
24sub url_encode {
25
11
13
    $_ = shift;
26
11
0
9
0
    s<([^-!\$&'()*+,/:;=?\@A-Za-z0-9_.~])><"%".sprintf("%02x",ord($1))>eg;
27
11
14
    return $_;
28}
29
30sub double_slash_escape {
31
11
4
    $_ = shift;
32
11
50
    s/(["()\[\]\\])/\\\\$1/g;
33
11
13
    return $_;
34}
35
36sub fingerprintLocations {
37
7
8
    my ($locations, $encoded_files_ref, $line_hashes_ref, $hashes_needed_for_files_ref, $message, $hashed_message) = @_;
38
7
2
    my @locations_json = ();
39
7
5
    my @fingerprints = ();
40
7
6
    for my $location (@$locations) {
41
10
6
        my $encoded_file = $location->{uri};
42
10
4
        my $line = $location->{startLine};
43
10
7
        my $column = $location->{startColumn};
44
10
3
        my $endColumn = $location->{endColumn};
45
10
9
        my $partialFingerprint = '';
46
10
6
        my $file = $encoded_files_ref->{$encoded_file};
47
10
21
        if (defined $line_hashes_ref->{$file}) {
48
8
11
            my $line_hash = $line_hashes_ref->{$file}{$line};
49
8
4
            if (defined $line_hash) {
50
4
4
4
5
                my @instances = sort keys %{$hashes_needed_for_files_ref->{$file}{$line}{$hashed_message}};
51
4
3
                my $hit = scalar @instances;
52
4
4
                while (--$hit > 0) {
53
0
0
                    last if $instances[$hit] == $column;
54                }
55
4
10
                $partialFingerprint = Digest::SHA::sha1_base64("$line_hash:$message:$hit");
56            }
57        }
58
10
5
        push @fingerprints, $partialFingerprint;
59
10
11
        my $startColumn = $column ? qq<, "startColumn": $column> : '';
60
10
7
        $endColumn = $endColumn ? qq<, "endColumn": $endColumn> : '';
61
10
6
        $line = 1 unless $line;
62
10
9
        my $json_fragment = qq<{ "physicalLocation": { "artifactLocation": { "uri": "$encoded_file", "uriBaseId": "%SRCROOT%" }, "region": { "startLine": $line$startColumn$endColumn } } }>;
63
10
9
        push @locations_json, $json_fragment;
64    }
65
7
14
    return { locations_json => \@locations_json, fingerprints => \@fingerprints };
66}
67
68sub hashFiles {
69
2
2
    my ($hashes_needed_for_files_ref, $line_hashes_ref, $directoryToRepo_ref, $used_hashes_ref) = @_;
70
2
8
    for my $file (sort keys %$hashes_needed_for_files_ref) {
71
4
4
        $line_hashes_ref->{$file} = ();
72
4
21
        unless (-e $file) {
73
2
3
            delete $hashes_needed_for_files_ref->{$file};
74
2
1
            next;
75        }
76
2
2
2
3
        my @lines = sort (keys %{$hashes_needed_for_files_ref->{$file}});
77
2
50
        unless (defined $directoryToRepo_ref->{dirname($file)}) {
78
2
9
            my ($parsed_file, $git_base_dir, $prefix, $remote_url, $rev, $branch) = CheckSpelling::GitSources::git_source_and_rev($file);
79        }
80
2
36
        open $file_fh, '<', $file;
81
2
4
        my $line = shift @lines;
82
2
3
        $line = 2 if $line == 1;
83
2
3
        my $buffer = '';
84
2
11
        while (<$file_fh>) {
85
10
11
            if ($line == $.) {
86
5
3
                my $sample = substr $buffer, -100, 100;
87
5
10
                my $hash = Digest::SHA::sha1_base64($sample);
88
5
3
                for (; $line == $.; $line = shift @lines) {
89
6
9
                    my $hit = $used_hashes_ref->{$hash}++;
90
6
15
                    $hash = "$hash:$hit" if $hit;
91
6
7
                    $line_hashes_ref->{$file}{$line} = $hash;
92
6
6
                    last unless @lines;
93                }
94            }
95
10
5
            $buffer .= $_;
96
10
25
            $buffer =~ s/\s+/ /g;
97
10
12
            $buffer = substr $buffer, -100, 100;
98        }
99
2
8
        close $file_fh;
100    }
101}
102
103sub addToHashesNeededForFiles {
104
10
8
    my ($file, $line, $column, $message, $hashes_needed_for_files_ref) = @_;
105
10
27
    my $hashed_message = Digest::SHA::sha1_base64($message);
106
10
11
    $hashes_needed_for_files_ref->{$file} = () unless defined $hashes_needed_for_files_ref->{$file};
107
10
20
    $hashes_needed_for_files_ref->{$file}{$line} = () unless defined $hashes_needed_for_files_ref->{$file}{$line};
108
10
17
    $hashes_needed_for_files_ref->{$file}{$line}{$hashed_message} = () unless defined $hashes_needed_for_files_ref->{$file}{$line}{$hashed_message};
109
10
19
    $hashes_needed_for_files_ref->{$file}{$line}{$hashed_message}{$column} = '1';
110}
111
112sub encode_message {
113
11
9
    my ($message) = @_;
114
115    # single-slash-escape `"` and `\`
116
11
19
    $message =~ s/(["\\])/\\$1/g;
117    # encode `message` to protect against low ascii`
118
11
9
    $message = encode_low_ascii $message;
119    # double-slash-escape `"`, `(`, `)`, `]`
120
11
9
    $message = double_slash_escape $message;
121    # hack to make the first `...` identifier a link (that goes nowhere, but is probably blue and underlined) in GitHub's SARIF view
122
11
15
    if ($message =~ /(`{2,})/) {
123
2
2
        my $backticks = $1;
124
2
16
        while ($message =~ /($backticks`+)(?=[`].*?\g{-1})/gs) {
125
0
0
            $backticks = $1 if length($1) > length($backticks);
126        }
127
2
14
        $message =~ s/(^|[^\\])$backticks(.+?)$backticks/${1}[${2}](#security-tab)/;
128    } else {
129
9
32
        $message =~ s/(^|[^\\])\`((?:[^`\\]|\\(?!`))+)\`/${1}[${2}](#security-tab)/;
130    }
131
132    # replace '`' with `\`+`'` because GitHub's SARIF parser doesn't like them
133
11
16
    $message =~ s/\`/'/g;
134
11
8
    return $message;
135}
136
137sub parse_warnings {
138
1
1
    my ($warnings) = @_;
139
1
1
    our $flatten;
140
1
0
    our %directoryToRepo;
141
1
0
    our $provenanceInsertion;
142
1
1
    our %provenanceStringToIndex;
143
1
0
    our %directoryToProvenanceInsertion;
144
1
1
    my @results;
145
1
11
    unless (open WARNINGS, '<', $warnings) {
146
0
0
        print STDERR "Could not open $warnings\n";
147
0
0
        return [];
148    }
149
1
1
    my $rules = ();
150
1
0
    my %encoded_files = ();
151
1
1
    my %hashes_needed_for_files = ();
152
1
45
    while (<WARNINGS>) {
153
11
13
        next if m{^https://};
154
10
31
        next unless m{^(.+):(\d+):(\d+) \.\.\. (\d+),\s(Error|Warning|Notice)\s-\s(.+\s\((.+)\))$};
155
9
25
        my ($file, $line, $column, $endColumn, $severity, $message, $code) = ($1, $2, $3, $4, $5, $6, $7);
156
9
97
        my $directory = dirname($file);
157
9
7
        unless (defined $directoryToProvenanceInsertion{$directory}) {
158
2
1
            my $provenanceString = collectVersionControlProvenance($file);
159
2
154
            if ($provenanceString) {
160
2
4
                if (defined $provenanceStringToIndex{$provenanceString}) {
161
0
0
                    $directoryToProvenanceInsertion{$directory} = $provenanceStringToIndex{$provenanceString};
162                } else {
163
2
2
                    $provenanceStringToIndex{$provenanceString} = $provenanceInsertion;
164
2
4
                    $directoryToProvenanceInsertion{$directory} = $provenanceInsertion;
165
2
2
                    ++$provenanceInsertion;
166                }
167            }
168        }
169        # encode `file` to protect against low ascii`
170
9
77
        my $encoded_file = url_encode $file;
171
9
10
        $encoded_files{$encoded_file} = $file;
172
173
9
6
        $message = encode_message($message);
174
175
9
29
        unless (defined $rules->{$code}) {
176
3
8
            $rules->{$code} = {};
177        }
178
9
7
        my $rule = $rules->{$code};
179
9
5
        unless (defined $rule->{$message}) {
180
6
15
            $rule->{$message} = [];
181        }
182
9
8
        addToHashesNeededForFiles($file, $line, $column, $message, \%hashes_needed_for_files);
183
9
8
        my $locations = $rule->{$message};
184
9
15
        my $physicalLocation = {
185            'uri' => $encoded_file,
186            'startLine' => $line,
187            'startColumn' => $column,
188            'endColumn' => $endColumn,
189        };
190
9
7
        push @$locations, $physicalLocation;
191
9
24
        $rule->{$message} = $locations;
192    }
193
1
2
    my %line_hashes = ();
194
1
1
    my %used_hashes = ();
195
1
2
    hashFiles(\%hashes_needed_for_files, \%line_hashes, \%directoryToRepo, \%used_hashes);
196
1
1
1
3
    for my $code (sort keys %{$rules}) {
197
3
2
        my $rule = $rules->{$code};
198
3
3
1
5
        for my $message (sort keys %{$rule}) {
199
6
11
            my $hashed_message = Digest::SHA::sha1_base64($message);
200
6
3
            my $locations = $rule->{$message};
201
6
6
            my $fingerprintResults = fingerprintLocations($locations, \%encoded_files, \%line_hashes, \%hashes_needed_for_files, $message, $hashed_message);
202
6
6
5
2
            my @locations_json = @{$fingerprintResults->{locations_json}};
203
6
6
4
2
            my @fingerprints = @{$fingerprintResults->{fingerprints}};
204
6
6
            if ($flatten) {
205
0
0
                my $locations_json_flat = join ',', @locations_json;
206
0
0
                my $partialFingerprints;
207
0
0
                my $partialFingerprint = (sort @fingerprints)[0];
208
0
0
                if ($partialFingerprint ne '') {
209
0
0
                    $partialFingerprints = qq<"partialFingerprints": { "cs0" : "$partialFingerprint" },>;
210                }
211
0
0
                $message =~ s/\s\\\\\([^()]+?\\\)$//g;
212
0
0
                my $result_json = qq<{"ruleId": "$code", $partialFingerprints "message": { "text": "$message" }, "locations": [ $locations_json_flat ] }>;
213
0
0
                my $result = decode_json $result_json;
214
0
0
                push @results, $result;
215            } else {
216
6
1
                my $limit = scalar @locations_json;
217
6
6
                for (my $i = 0; $i < $limit; ++$i) {
218
9
7
                    my $locations_json_flat = $locations_json[$i];
219
9
2
                    my $partialFingerprints = '';
220
9
6
                    my $partialFingerprint = $fingerprints[$i];
221
9
4
                    if ($partialFingerprint ne '') {
222
4
4
                        $partialFingerprints = qq<"partialFingerprints": { "cs0" : "$partialFingerprint" },>;
223                    }
224
9
20
                    $message =~ s/\s\\\\\([^()]+?\\\)$//g;
225
9
7
                    my $result_json = qq<{"ruleId": "$code", $partialFingerprints "message": { "text": "$message" }, "locations": [ $locations_json_flat ] }>;
226
9
7
                    my $result = decode_json $result_json;
227
9
9703
                    push @results, $result;
228                }
229            }
230        }
231    }
232
1
5
    close WARNINGS;
233
1
8
    return \@results;
234}
235
236sub get_runs_from_sarif {
237
2
2
    my ($sarif_json) = @_;
238
2
1
    my %runs_view;
239
2
3
    return %runs_view unless $sarif_json->{'runs'};
240
2
2
0
17
    my @sarif_json_runs=@{$sarif_json->{'runs'}};
241
2
4
    foreach my $sarif_json_run (@sarif_json_runs) {
242
2
2
1
3
        my %sarif_json_run_hash=%{$sarif_json_run};
243
2
2
        next unless defined $sarif_json_run_hash{'tool'};
244
245
2
2
1
2
        my %sarif_json_run_tool_hash = %{$sarif_json_run_hash{'tool'}};
246
2
2
        next unless defined $sarif_json_run_tool_hash{'driver'};
247
248
2
2
2
3
        my %sarif_json_run_tool_driver_hash = %{$sarif_json_run_tool_hash{'driver'}};
249        next unless defined $sarif_json_run_tool_driver_hash{'name'} &&
250
2
4
            defined $sarif_json_run_tool_driver_hash{'rules'};
251
252
2
2
        my $driver_name = $sarif_json_run_tool_driver_hash{'name'};
253
2
2
6
4
        my @sarif_json_run_tool_driver_rules = @{$sarif_json_run_tool_driver_hash{'rules'}};
254
2
1
        my %driver_view;
255
2
2
        for my $driver_rule (@sarif_json_run_tool_driver_rules) {
256
42
21
            next unless defined $driver_rule->{'id'};
257
42
46
            $driver_view{$driver_rule->{'id'}} = $driver_rule;
258        }
259
2
3
        $runs_view{$sarif_json_run_tool_driver_hash{'name'}} = \%driver_view;
260    }
261
2
3
    return %runs_view;
262}
263
264sub collectVersionControlProvenance {
265
2
2
    my ($file) = @_;
266
2
6
    my ($parsed_file, $git_base_dir, $prefix, $remote_url, $rev, $branch) = CheckSpelling::GitSources::git_source_and_rev($file);
267
2
8
    return '' unless $remote_url;
268
2
1
    my $base = substr $parsed_file, 0, length($file);
269
2
2
    my $provenance = [$remote_url, $rev, $branch, $git_base_dir];
270
2
9
    return JSON::PP::encode_json($provenance);
271}
272
273sub generateVersionControlProvenance {
274
1
1
    my ($versionControlProvenanceList, $run) = @_;
275
1
0
    my %provenance;
276    sub buildVersionControlProvenance {
277
1
1
        my $d = $_;
278
1
1
0
1
        my ($remote_url, $rev, $branch, $git_base_dir) = @{JSON::PP::decode_json($d)};
279
1
299
        my $dir = $git_base_dir eq '.' ? '%SRCROOT%' : "DIR_$provenanceStringToIndex{$d}";
280
1
1
        my $mappedTo = {
281            "uriBaseId" => $dir
282        };
283
1
3
        my $versionControlProvenance = {
284            "mappedTo" => $mappedTo
285        };
286
1
2
        $versionControlProvenance->{"revisionId"} = $rev if defined $rev;
287
1
2
        $versionControlProvenance->{"branch"} = $branch if defined $branch;
288
1
2
        $versionControlProvenance->{"repositoryUri"} = $remote_url if defined $remote_url;
289
1
1
        return $versionControlProvenance;
290    }
291
1
1
    @provenanceList = map(buildVersionControlProvenance,@$versionControlProvenanceList);
292
1
1
    $run->{"versionControlProvenance"} = \@provenanceList;
293}
294
295my $provenanceInsertion = 0;
296my %provenanceStringToIndex = ();
297my %directoryToProvenanceInsertion = ();
298
299sub main {
300
1
21765
    my ($sarif_template_file, $sarif_template_overlay_file, $category) = @_;
301
1
6
    unless (-f $sarif_template_file) {
302
0
0
        warn "Could not find sarif template";
303
0
0
        return '';
304    }
305
306
1
2
    $ENV{GITHUB_SERVER_URL} = '' unless defined $ENV{GITHUB_SERVER_URL};
307
1
1
    $ENV{GITHUB_REPOSITORY} = '' unless defined $ENV{GITHUB_REPOSITORY};
308
1
7
    my $sarif_template = CheckSpelling::Util::read_file $sarif_template_file;
309
1
3
    die "sarif template is empty" unless $sarif_template;
310
311
1
0
10
0
    my $json = JSON::PP->new->utf8->pretty->sort_by(sub { $JSON::PP::a cmp $JSON::PP::b });
312
1
152
    my $sarif_json = $json->decode($sarif_template);
313
314
1
114366
    if (defined $sarif_template_overlay_file && -s $sarif_template_overlay_file) {
315
1
8
        my $merger = Hash::Merge->new();
316
1
108
        my $merge_behaviors = $merger->{'behaviors'}->{$merger->get_behavior()};
317
1
8
        my $merge_arrays = $merge_behaviors->{'ARRAY'}->{'ARRAY'};
318
319        $merge_behaviors->{'ARRAY'}->{'ARRAY'} = sub {
320
38
4011
            return $merge_arrays->(@_) if ref($_[0][0]).ref($_[1][0]);
321
38
38
12
50
            return [@{$_[1]}];
322
1
5
        };
323
324
1
1
        my $sarif_template_overlay = CheckSpelling::Util::read_file $sarif_template_overlay_file;
325
1
1
        my %runs_base = get_runs_from_sarif($sarif_json);
326
327
1
2
        my $sarif_template_hash = $json->decode($sarif_template_overlay);
328
1
1794
        my %runs_overlay = get_runs_from_sarif($sarif_template_hash);
329
1
3
        for my $run_id (keys %runs_overlay) {
330
1
1
            if (defined $runs_base{$run_id}) {
331
1
1
                my $run_base_hash = $runs_base{$run_id};
332
1
1
                my $run_overlay_hash = $runs_overlay{$run_id};
333
1
1
                for my $overlay_id (keys %$run_overlay_hash) {
334                    $run_base_hash->{$overlay_id} = $merger->merge(
335                        $run_overlay_hash->{$overlay_id},
336
1
3
                        $run_base_hash->{$overlay_id}
337                    );
338                }
339            } else {
340
0
0
                $runs_base{$run_id} = $runs_overlay{$run_id};
341            }
342        }
343        #$sarif_json->
344
1
1
20
1
        my @sarif_json_runs = @{$sarif_json->{'runs'}};
345
1
1
        foreach my $sarif_json_run (@sarif_json_runs) {
346
1
1
0
2
            my %sarif_json_run_hash=%{$sarif_json_run};
347
1
1
            next unless defined $sarif_json_run_hash{'tool'};
348
349
1
1
1
1
            my %sarif_json_run_tool_hash = %{$sarif_json_run_hash{'tool'}};
350
1
1
            next unless defined $sarif_json_run_tool_hash{'driver'};
351
352
1
1
1
2
            my %sarif_json_run_tool_driver_hash = %{$sarif_json_run_tool_hash{'driver'}};
353
1
0
            my $driver_name = $sarif_json_run_tool_driver_hash{'name'};
354            next unless defined $driver_name &&
355
1
7
                defined $sarif_json_run_tool_driver_hash{'rules'};
356
357
1
8
            my $driver_view_hash = $runs_base{$driver_name};
358
1
3
            next unless defined $driver_view_hash;
359
360
1
1
1
3
            my @sarif_json_run_tool_driver_rules = @{$sarif_json_run_tool_driver_hash{'rules'}};
361
1
1
            for my $driver_rule_number (0 .. scalar @sarif_json_run_tool_driver_rules) {
362
42
2961
                my $driver_rule = $sarif_json_run_tool_driver_rules[$driver_rule_number];
363
42
15
                my $driver_rule_id = $driver_rule->{'id'};
364                next unless defined $driver_rule_id &&
365
42
61
                    defined $driver_view_hash->{$driver_rule_id};
366
41
27
                $sarif_json_run_tool_driver_hash{'rules'}[$driver_rule_number] = $merger->merge($driver_view_hash->{$driver_rule_id}, $driver_rule);
367            }
368        }
369
1
2
        delete $sarif_template_hash->{'runs'};
370
1
1
        $sarif_json = $merger->merge($sarif_json, $sarif_template_hash);
371    }
372    {
373
1
1
1
597
0
1
        my @sarif_json_runs = @{$sarif_json->{'runs'}};
374
1
1
        foreach my $sarif_json_run (@sarif_json_runs) {
375
1
0
            my %sarif_json_run_automationDetails;
376
1
1
            $sarif_json_run_automationDetails{id} = $category;
377
1
3
            $sarif_json_run->{'automationDetails'} = \%sarif_json_run_automationDetails;
378        }
379    }
380
381
1
1
0
2
    my %sarif = %{$sarif_json};
382
383
1
2
    $sarif{'runs'}[0]{'tool'}{'driver'}{'version'} = $ENV{CHECK_SPELLING_VERSION};
384
385
1
3
    my $results = parse_warnings $ENV{warning_output};
386
1
1
    if ($results) {
387
1
4
        $sarif{'runs'}[0]{'results'} = $results;
388
1
1
        our %provenanceStringToIndex;
389
1
1
        my @provenanceList = keys %provenanceStringToIndex;
390
1
1
        generateVersionControlProvenance(\@provenanceList, $sarif{'runs'}[0]);
391
1
1
        my %codes;
392
1
2
        for my $result_ref (@$results) {
393
9
9
4
7
            my %result = %{$result_ref};
394
9
8
            $codes{$result{'ruleId'}} = 1;
395        }
396
1
1
        my $rules_ref = $sarif{'runs'}[0]{'tool'}{'driver'}{'rules'};
397
1
1
1
5
        my @rules = @{$rules_ref};
398
1
1
        my $missing_rule_definition_id = 'missing-rule-definition';
399
1
41
1
22
        my ($missing_rule_definition_ref) = grep { $_->{'id'} eq $missing_rule_definition_id } @rules;
400
1
41
1
21
        @rules = grep { defined $codes{$_->{'id'}} } @rules;
401
1
1
        my $code_index = 0;
402
1
2
1
2
        my %defined_codes = map { $_->{'id'} => $code_index++ } @rules;
403
1
3
2
2
        my @missing_codes = grep { !defined $defined_codes{$_}} keys %codes;
404
1
1
        my $missing_rule_definition_index;
405
1
1
        if (@missing_codes) {
406
1
1
            push @rules, $missing_rule_definition_ref;
407
1
1
            $missing_rule_definition_index = $defined_codes{$missing_rule_definition_id} = $code_index++;
408
1
28
            my $spellchecker = $ENV{spellchecker} || dirname(dirname(dirname(__FILE__)));
409
1
3
            my %hashes_needed_for_files = ();
410
1
0
            my %line_hashes = ();
411
1
1
            my %used_hashes = ();
412
1
0
            our %directoryToRepo;
413
1
1
            for my $missing_code (@missing_codes) {
414
1
1
                my $message = "No rule definition for `$missing_code`";
415
1
9833
                my $code_locations = `find '$spellchecker' -name '.git*' -prune -o \\( -name '*.sh' -o -name '*.pl' -o -name '*.pm' \\) -type f -print0|xargs -0 grep -n '$missing_code' | perl -pe 's<^\./><>'`;
416
1
7
                my @locations;
417
1
8
                for my $line (split /\n/, $code_locations) {
418
1
2
                    chomp $line;
419
1
13
                    my ($file, $lineno, $code) = $line =~ /^(.+?):(\d+):(.+)$/;
420
1
4
                    next unless defined $file;
421
1
30
                    $code =~ /^(.*?)\b$missing_code\b/;
422
1
5
                    my $startColumn = length($1) + 1;
423
1
2
                    my $endColumn = length($1) + length($missing_code) + 1;
424
1
5
                    my $location = {
425                        'uri' => url_encode($file),
426                        'startLine' => $lineno,
427                        'startColumn' => $startColumn,
428                        'endColumn' => $endColumn,
429                    };
430
1
99
                    my $relative = File::Spec->abs2rel($file, $spellchecker);
431
1
19
                    print STDERR "::notice title=${missing_rule_definition_id}::$relative:$lineno:$startColumn ... $endColumn, Notice - $message ($missing_rule_definition_id)\n";
432
1
3
                    push @locations, $location;
433
1
1
                    my $encoded_file = url_encode $file;
434
1
5
                    $encoded_files{$encoded_file} = $file;
435
1
3
                    addToHashesNeededForFiles($file, $lineno, $startColumn, $message, \%hashes_needed_for_files);
436                }
437
1
3
                hashFiles(\%hashes_needed_for_files, \%line_hashes, \%directoryToRepo, \%used_hashes);
438
1
3
                my $fingerprintResults = fingerprintLocations(\@locations, \%encoded_files, \%encoded_files, \%line_hashes, $message, Digest::SHA::sha1_base64($message));
439
1
1
1
1
                my @locations_json = @{$fingerprintResults->{locations_json}};
440
1
1
2
1
                my @fingerprints = @{$fingerprintResults->{fingerprints}};
441
1
3
                my $locations_json_flat = join ',', @locations_json;
442
1
1
                my $partialFingerprints = '';
443
1
1
                my $locations = $locations_json_flat ? qq<, "locations": [ $locations_json_flat ]> : '';
444
1
5
                my $result_json = qq<{"ruleId": "$missing_rule_definition_id", $partialFingerprints "message": { "text": "$message" }$locations }>;
445
1
6
                my $result = decode_json $result_json;
446
1
1
1080
6
                push @{$results}, $result;
447            }
448        }
449
1
2
        $sarif{'runs'}[0]{'tool'}{'driver'}{'rules'} = \@rules;
450
1
1
1
3
        for my $result_index (0 .. scalar @{$results}) {
451
11
10
            my $result = $results->[$result_index];
452
11
7
            my $ruleId = $result->{'ruleId'};
453
11
21
            next if defined $ruleId && defined $defined_codes{$ruleId};
454
2
90
            $result->{'ruleId'} = $missing_rule_definition_id;
455        }
456    }
457
458
1
6
    return JSON::PP->new->canonical([1])->utf8->encode(\%sarif);
459}
460
4611;