| File: | lib/CheckSpelling/Sarif.pm |
| Coverage: | 86.5% |
| line | stmt | bran | cond | sub | time | code |
|---|---|---|---|---|---|---|
| 1 | #! -*-perl-*- | |||||
| 2 | ||||||
| 3 | package CheckSpelling::Sarif; | |||||
| 4 | ||||||
| 5 | our $VERSION='0.1.0'; | |||||
| 6 | our $flatten=0; | |||||
| 7 | ||||||
| 8 | 1 1 1 | 106208 2 2 | use utf8; | |||
| 9 | ||||||
| 10 | 1 1 1 | 13 1 46 | use File::Basename; | |||
| 11 | 1 1 1 | 193 1217 71 | use Digest::SHA qw($errmsg); | |||
| 12 | 1 1 1 | 2 1 25 | use JSON::PP; | |||
| 13 | 1 1 1 | 170 3741 25 | use Hash::Merge qw( merge ); | |||
| 14 | 1 1 1 | 139 0 18 | use CheckSpelling::Util; | |||
| 15 | 1 1 1 | 146 0 1774 | use CheckSpelling::GitSources; | |||
| 16 | ||||||
| 17 | sub encode_low_ascii { | |||||
| 18 | 9 | 203 | $_ = shift; | |||
| 19 | 9 1 | 10 2 | s/([\x{0}-\x{9}\x{0b}\x{1f}#%])/"\\u".sprintf("%04x",ord($1))/eg; | |||
| 20 | 9 | 8 | return $_; | |||
| 21 | } | |||||
| 22 | ||||||
| 23 | sub url_encode { | |||||
| 24 | 10 | 9 | $_ = shift; | |||
| 25 | 10 0 | 12 0 | s<([^-!\$&'()*+,/:;=?\@A-Za-z0-9_.~])><"%".sprintf("%02x",ord($1))>eg; | |||
| 26 | 10 | 12 | return $_; | |||
| 27 | } | |||||
| 28 | ||||||
| 29 | sub double_slash_escape { | |||||
| 30 | 8 | 6 | $_ = shift; | |||
| 31 | 8 | 26 | s/(["()\]\\])/\\\\$1/g; | |||
| 32 | 8 | 6 | return $_; | |||
| 33 | } | |||||
| 34 | ||||||
| 35 | sub fingerprintLocations { | |||||
| 36 | 6 | 7 | my ($locations, $encoded_files_ref, $line_hashes_ref, $hashes_needed_for_files_ref, $message, $hashed_message) = @_; | |||
| 37 | 6 | 4 | my @locations_json = (); | |||
| 38 | 6 | 2 | my @fingerprints = (); | |||
| 39 | 6 | 7 | for my $location (@$locations) { | |||
| 40 | 9 | 5 | my $encoded_file = $location->{uri}; | |||
| 41 | 9 | 6 | my $line = $location->{startLine}; | |||
| 42 | 9 | 6 | my $column = $location->{startColumn}; | |||
| 43 | 9 | 5 | my $endColumn = $location->{endColumn}; | |||
| 44 | 9 | 6 | my $partialFingerprint = ''; | |||
| 45 | 9 | 6 | my $file = $encoded_files_ref->{$encoded_file}; | |||
| 46 | 9 | 5 | if (defined $line_hashes_ref->{$file}) { | |||
| 47 | 8 | 12 | my $line_hash = $line_hashes_ref->{$file}{$line}; | |||
| 48 | 8 | 7 | if (defined $line_hash) { | |||
| 49 | 4 4 | 1 8 | my @instances = sort keys %{$hashes_needed_for_files_ref->{$file}{$line}{$hashed_message}}; | |||
| 50 | 4 | 3 | my $hit = scalar @instances; | |||
| 51 | 4 | 3 | while (--$hit > 0) { | |||
| 52 | 0 | 0 | last if $instances[$hit] == $column; | |||
| 53 | } | |||||
| 54 | 4 | 8 | $partialFingerprint = Digest::SHA::sha1_base64("$line_hash:$message:$hit"); | |||
| 55 | } | |||||
| 56 | } | |||||
| 57 | 9 | 7 | push @fingerprints, $partialFingerprint; | |||
| 58 | 9 | 9 | my $startColumn = $column ? qq<, "startColumn": $column> : ''; | |||
| 59 | 9 | 9 | $endColumn = $endColumn ? qq<, "endColumn": $endColumn> : ''; | |||
| 60 | 9 | 6 | $line = 1 unless $line; | |||
| 61 | 9 | 10 | my $json_fragment = qq<{ "physicalLocation": { "artifactLocation": { "uri": "$encoded_file", "uriBaseId": "%SRCROOT%" }, "region": { "startLine": $line$startColumn$endColumn } } }>; | |||
| 62 | 9 | 8 | push @locations_json, $json_fragment; | |||
| 63 | } | |||||
| 64 | 6 | 14 | return { locations_json => \@locations_json, fingerprints => \@fingerprints }; | |||
| 65 | } | |||||
| 66 | ||||||
| 67 | sub hashFiles { | |||||
| 68 | 2 | 4 | my ($hashes_needed_for_files_ref, $line_hashes_ref, $directoryToRepo_ref, $used_hashes_ref) = @_; | |||
| 69 | 2 | 10 | for my $file (sort keys %$hashes_needed_for_files_ref) { | |||
| 70 | 3 | 2 | $line_hashes_ref->{$file} = (); | |||
| 71 | 3 | 21 | unless (-e $file) { | |||
| 72 | 1 | 3 | delete $hashes_needed_for_files_ref->{$file}; | |||
| 73 | 1 | 1 | next; | |||
| 74 | } | |||||
| 75 | 2 2 | 3 6 | my @lines = sort (keys %{$hashes_needed_for_files_ref->{$file}}); | |||
| 76 | 2 | 66 | unless (defined $directoryToRepo_ref->{dirname($file)}) { | |||
| 77 | 2 | 8 | my ($parsed_file, $git_base_dir, $prefix, $remote_url, $rev, $branch) = CheckSpelling::GitSources::git_source_and_rev($file); | |||
| 78 | } | |||||
| 79 | 2 | 28 | open $file_fh, '<', $file; | |||
| 80 | 2 | 3 | my $line = shift @lines; | |||
| 81 | 2 | 4 | $line = 2 if $line == 1; | |||
| 82 | 2 | 4 | my $buffer = ''; | |||
| 83 | 2 | 14 | while (<$file_fh>) { | |||
| 84 | 108 | 66 | if ($line == $.) { | |||
| 85 | 5 | 5 | my $sample = substr $buffer, -100, 100; | |||
| 86 | 5 | 7 | my $hash = Digest::SHA::sha1_base64($sample); | |||
| 87 | 5 | 4 | for (; $line == $.; $line = shift @lines) { | |||
| 88 | 6 | 8 | my $hit = $used_hashes_ref->{$hash}++; | |||
| 89 | 6 | 7 | $hash = "$hash:$hit" if $hit; | |||
| 90 | 6 | 3 | $line_hashes_ref->{$file}{$line} = $hash; | |||
| 91 | 6 | 7 | last unless @lines; | |||
| 92 | } | |||||
| 93 | } | |||||
| 94 | 108 | 43 | $buffer .= $_; | |||
| 95 | 108 | 314 | $buffer =~ s/\s+/ /g; | |||
| 96 | 108 | 116 | $buffer = substr $buffer, -100, 100; | |||
| 97 | } | |||||
| 98 | 2 | 8 | close $file_fh; | |||
| 99 | } | |||||
| 100 | } | |||||
| 101 | ||||||
| 102 | sub addToHashesNeededForFiles { | |||||
| 103 | 9 | 13 | my ($file, $line, $column, $message, $hashes_needed_for_files_ref) = @_; | |||
| 104 | 9 | 28 | my $hashed_message = Digest::SHA::sha1_base64($message); | |||
| 105 | 9 | 14 | $hashes_needed_for_files_ref->{$file} = () unless defined $hashes_needed_for_files_ref->{$file}; | |||
| 106 | 9 | 20 | $hashes_needed_for_files_ref->{$file}{$line} = () unless defined $hashes_needed_for_files_ref->{$file}{$line}; | |||
| 107 | 9 | 16 | $hashes_needed_for_files_ref->{$file}{$line}{$hashed_message} = () unless defined $hashes_needed_for_files_ref->{$file}{$line}{$hashed_message}; | |||
| 108 | 9 | 19 | $hashes_needed_for_files_ref->{$file}{$line}{$hashed_message}{$column} = '1'; | |||
| 109 | } | |||||
| 110 | ||||||
| 111 | sub parse_warnings { | |||||
| 112 | 1 | 2 | my ($warnings) = @_; | |||
| 113 | 1 | 0 | our $flatten; | |||
| 114 | 1 | 0 | our %directoryToRepo; | |||
| 115 | 1 | 1 | our $provenanceInsertion; | |||
| 116 | 1 | 0 | our %provenanceStringToIndex; | |||
| 117 | 1 | 1 | our %directoryToProvenanceInsertion; | |||
| 118 | 1 | 0 | my @results; | |||
| 119 | 1 | 14 | unless (open WARNINGS, '<', $warnings) { | |||
| 120 | 0 | 0 | print STDERR "Could not open $warnings\n"; | |||
| 121 | 0 | 0 | return []; | |||
| 122 | } | |||||
| 123 | 1 | 3 | my $rules = (); | |||
| 124 | 1 | 1 | my %encoded_files = (); | |||
| 125 | 1 | 2 | my %hashes_needed_for_files = (); | |||
| 126 | 1 | 15 | while (<WARNINGS>) { | |||
| 127 | 10 | 9 | next if m{^https://}; | |||
| 128 | 9 | 30 | next unless m{^(.+):(\d+):(\d+) \.\.\. (\d+),\s(Error|Warning|Notice)\s-\s(.+\s\((.+)\))$}; | |||
| 129 | 8 | 28 | my ($file, $line, $column, $endColumn, $severity, $message, $code) = ($1, $2, $3, $4, $5, $6, $7); | |||
| 130 | 8 | 93 | my $directory = dirname($file); | |||
| 131 | 8 | 5 | unless (defined $directoryToProvenanceInsertion{$directory}) { | |||
| 132 | 2 | 3 | my $provenanceString = collectVersionControlProvenance($file); | |||
| 133 | 2 | 163 | if ($provenanceString) { | |||
| 134 | 2 | 4 | if (defined $provenanceStringToIndex{$provenanceString}) { | |||
| 135 | 0 | 0 | $directoryToProvenanceInsertion{$directory} = $provenanceStringToIndex{$provenanceString}; | |||
| 136 | } else { | |||||
| 137 | 2 | 3 | $provenanceStringToIndex{$provenanceString} = $provenanceInsertion; | |||
| 138 | 2 | 4 | $directoryToProvenanceInsertion{$directory} = $provenanceInsertion; | |||
| 139 | 2 | 2 | ++$provenanceInsertion; | |||
| 140 | } | |||||
| 141 | } | |||||
| 142 | } | |||||
| 143 | # single-slash-escape `"` and `\` | |||||
| 144 | 8 | 9 | $message =~ s/(["\\])/\\$1/g; | |||
| 145 | 8 | 5 | $message = encode_low_ascii $message; | |||
| 146 | # double-slash-escape `"`, `(`, `)`, `]` | |||||
| 147 | 8 | 6 | $message = double_slash_escape $message; | |||
| 148 | # encode `message` and `file` to protect against low ascii` | |||||
| 149 | 8 | 6 | my $encoded_file = url_encode $file; | |||
| 150 | 8 | 9 | $encoded_files{$encoded_file} = $file; | |||
| 151 | # hack to make the first `...` identifier a link (that goes nowhere, but is probably blue and underlined) in GitHub's SARIF view | |||||
| 152 | 8 | 9 | if ($message =~ /(`{2,})/) { | |||
| 153 | 1 | 0 | my $backticks = $1; | |||
| 154 | 1 | 46 | while ($message =~ /($backticks`+)(?=[`].*?\g{-1})/gs) { | |||
| 155 | 0 | 0 | $backticks = $1 if length($1) > length($backticks); | |||
| 156 | } | |||||
| 157 | 1 | 16 | $message =~ s/(^|[^\\])$backticks(.+?)$backticks/${1}[${2}](#security-tab)/; | |||
| 158 | } else { | |||||
| 159 | 7 | 18 | $message =~ s/(^|[^\\])\`([^`]+[^`\\])\`/${1}[${2}](#security-tab)/; | |||
| 160 | } | |||||
| 161 | # replace '`' with `\`+`'` because GitHub's SARIF parser doesn't like them | |||||
| 162 | 8 | 7 | $message =~ s/\`/'/g; | |||
| 163 | 8 | 7 | unless (defined $rules->{$code}) { | |||
| 164 | 2 | 4 | $rules->{$code} = {}; | |||
| 165 | } | |||||
| 166 | 8 | 4 | my $rule = $rules->{$code}; | |||
| 167 | 8 | 7 | unless (defined $rule->{$message}) { | |||
| 168 | 5 | 9 | $rule->{$message} = []; | |||
| 169 | } | |||||
| 170 | 8 | 8 | addToHashesNeededForFiles($file, $line, $column, $message, \%hashes_needed_for_files); | |||
| 171 | 8 | 5 | my $locations = $rule->{$message}; | |||
| 172 | 8 | 13 | my $physicalLocation = { | |||
| 173 | 'uri' => $encoded_file, | |||||
| 174 | 'startLine' => $line, | |||||
| 175 | 'startColumn' => $column, | |||||
| 176 | 'endColumn' => $endColumn, | |||||
| 177 | }; | |||||
| 178 | 8 | 4 | push @$locations, $physicalLocation; | |||
| 179 | 8 | 20 | $rule->{$message} = $locations; | |||
| 180 | } | |||||
| 181 | 1 | 1 | my %line_hashes = (); | |||
| 182 | 1 | 1 | my %used_hashes = (); | |||
| 183 | 1 | 2 | hashFiles(\%hashes_needed_for_files, \%line_hashes, \%directoryToRepo, \%used_hashes); | |||
| 184 | 1 1 | 1 3 | for my $code (sort keys %{$rules}) { | |||
| 185 | 2 | 2 | my $rule = $rules->{$code}; | |||
| 186 | 2 2 | 1 4 | for my $message (sort keys %{$rule}) { | |||
| 187 | 5 | 9 | my $hashed_message = Digest::SHA::sha1_base64($message); | |||
| 188 | 5 | 13 | my $locations = $rule->{$message}; | |||
| 189 | 5 | 5 | my $fingerprintResults = fingerprintLocations($locations, \%encoded_files, \%line_hashes, \%hashes_needed_for_files, $message, $hashed_message); | |||
| 190 | 5 5 | 2 5 | my @locations_json = @{$fingerprintResults->{locations_json}}; | |||
| 191 | 5 5 | 2 5 | my @fingerprints = @{$fingerprintResults->{fingerprints}}; | |||
| 192 | 5 | 3 | if ($flatten) { | |||
| 193 | 0 | 0 | my $locations_json_flat = join ',', @locations_json; | |||
| 194 | 0 | 0 | my $partialFingerprints; | |||
| 195 | 0 | 0 | my $partialFingerprint = (sort @fingerprints)[0]; | |||
| 196 | 0 | 0 | if ($partialFingerprint ne '') { | |||
| 197 | 0 | 0 | $partialFingerprints = qq<"partialFingerprints": { "cs0" : "$partialFingerprint" },>; | |||
| 198 | } | |||||
| 199 | 0 | 0 | $message =~ s/\s\\\\\([^()]+?\\\)$//g; | |||
| 200 | 0 | 0 | my $result_json = qq<{"ruleId": "$code", $partialFingerprints "message": { "text": "$message" }, "locations": [ $locations_json_flat ] }>; | |||
| 201 | 0 | 0 | my $result = decode_json $result_json; | |||
| 202 | 0 | 0 | push @results, $result; | |||
| 203 | } else { | |||||
| 204 | 5 | 4 | my $limit = scalar @locations_json; | |||
| 205 | 5 | 2 | for (my $i = 0; $i < $limit; ++$i) { | |||
| 206 | 8 | 6 | my $locations_json_flat = $locations_json[$i]; | |||
| 207 | 8 | 2 | my $partialFingerprints = ''; | |||
| 208 | 8 | 5 | my $partialFingerprint = $fingerprints[$i]; | |||
| 209 | 8 | 4 | if ($partialFingerprint ne '') { | |||
| 210 | 4 | 2 | $partialFingerprints = qq<"partialFingerprints": { "cs0" : "$partialFingerprint" },>; | |||
| 211 | } | |||||
| 212 | 8 | 15 | $message =~ s/\s\\\\\([^()]+?\\\)$//g; | |||
| 213 | 8 | 8 | my $result_json = qq<{"ruleId": "$code", $partialFingerprints "message": { "text": "$message" }, "locations": [ $locations_json_flat ] }>; | |||
| 214 | 8 | 8 | my $result = decode_json $result_json; | |||
| 215 | 8 | 8134 | push @results, $result; | |||
| 216 | } | |||||
| 217 | } | |||||
| 218 | } | |||||
| 219 | } | |||||
| 220 | 1 | 7 | close WARNINGS; | |||
| 221 | 1 | 9 | return \@results; | |||
| 222 | } | |||||
| 223 | ||||||
| 224 | sub get_runs_from_sarif { | |||||
| 225 | 2 | 2 | my ($sarif_json) = @_; | |||
| 226 | 2 | 2 | my %runs_view; | |||
| 227 | 2 | 4 | return %runs_view unless $sarif_json->{'runs'}; | |||
| 228 | 2 2 | 1 2 | my @sarif_json_runs=@{$sarif_json->{'runs'}}; | |||
| 229 | 2 | 2 | foreach my $sarif_json_run (@sarif_json_runs) { | |||
| 230 | 2 2 | 3 6 | my %sarif_json_run_hash=%{$sarif_json_run}; | |||
| 231 | 2 | 2 | next unless defined $sarif_json_run_hash{'tool'}; | |||
| 232 | ||||||
| 233 | 2 2 | 2 4 | my %sarif_json_run_tool_hash = %{$sarif_json_run_hash{'tool'}}; | |||
| 234 | 2 | 2 | next unless defined $sarif_json_run_tool_hash{'driver'}; | |||
| 235 | ||||||
| 236 | 2 2 | 1 4 | my %sarif_json_run_tool_driver_hash = %{$sarif_json_run_tool_hash{'driver'}}; | |||
| 237 | next unless defined $sarif_json_run_tool_driver_hash{'name'} && | |||||
| 238 | 2 | 9 | defined $sarif_json_run_tool_driver_hash{'rules'}; | |||
| 239 | ||||||
| 240 | 2 | 1 | my $driver_name = $sarif_json_run_tool_driver_hash{'name'}; | |||
| 241 | 2 2 | 2 4 | my @sarif_json_run_tool_driver_rules = @{$sarif_json_run_tool_driver_hash{'rules'}}; | |||
| 242 | 2 | 2 | my %driver_view; | |||
| 243 | 2 | 2 | for my $driver_rule (@sarif_json_run_tool_driver_rules) { | |||
| 244 | 38 | 24 | next unless defined $driver_rule->{'id'}; | |||
| 245 | 38 | 49 | $driver_view{$driver_rule->{'id'}} = $driver_rule; | |||
| 246 | } | |||||
| 247 | 2 | 4 | $runs_view{$sarif_json_run_tool_driver_hash{'name'}} = \%driver_view; | |||
| 248 | } | |||||
| 249 | 2 | 16 | return %runs_view; | |||
| 250 | } | |||||
| 251 | ||||||
| 252 | sub collectVersionControlProvenance { | |||||
| 253 | 2 | 2 | my ($file) = @_; | |||
| 254 | 2 | 6 | my ($parsed_file, $git_base_dir, $prefix, $remote_url, $rev, $branch) = CheckSpelling::GitSources::git_source_and_rev($file); | |||
| 255 | 2 | 6 | return '' unless $remote_url; | |||
| 256 | 2 | 1 | my $base = substr $parsed_file, 0, length($file); | |||
| 257 | 2 | 3 | my $provenance = [$remote_url, $rev, $branch, $git_base_dir]; | |||
| 258 | 2 | 6 | return JSON::PP::encode_json($provenance); | |||
| 259 | } | |||||
| 260 | ||||||
| 261 | sub generateVersionControlProvenance { | |||||
| 262 | 1 | 1 | my ($versionControlProvenanceList, $run) = @_; | |||
| 263 | 1 | 1 | my %provenance; | |||
| 264 | sub buildVersionControlProvenance { | |||||
| 265 | 1 | 1 | my $d = $_; | |||
| 266 | 1 1 | 0 2 | my ($remote_url, $rev, $branch, $git_base_dir) = @{JSON::PP::decode_json($d)}; | |||
| 267 | 1 | 294 | my $dir = $git_base_dir eq '.' ? '%SRCROOT%' : "DIR_$provenanceStringToIndex{$d}"; | |||
| 268 | 1 | 2 | my $mappedTo = { | |||
| 269 | "uriBaseId" => $dir | |||||
| 270 | }; | |||||
| 271 | 1 | 1 | my $versionControlProvenance = { | |||
| 272 | "mappedTo" => $mappedTo | |||||
| 273 | }; | |||||
| 274 | 1 | 2 | $versionControlProvenance->{"revisionId"} = $rev if defined $rev; | |||
| 275 | 1 | 3 | $versionControlProvenance->{"branch"} = $branch if defined $branch; | |||
| 276 | 1 | 2 | $versionControlProvenance->{"repositoryUri"} = $remote_url if defined $remote_url; | |||
| 277 | 1 | 2 | return $versionControlProvenance; | |||
| 278 | } | |||||
| 279 | 1 | 1 | @provenanceList = map(buildVersionControlProvenance,@$versionControlProvenanceList); | |||
| 280 | 1 | 1 | $run->{"versionControlProvenance"} = \@provenanceList; | |||
| 281 | } | |||||
| 282 | ||||||
| 283 | my $provenanceInsertion = 0; | |||||
| 284 | my %provenanceStringToIndex = (); | |||||
| 285 | my %directoryToProvenanceInsertion = (); | |||||
| 286 | ||||||
| 287 | sub main { | |||||
| 288 | 1 | 18241 | my ($sarif_template_file, $sarif_template_overlay_file, $category) = @_; | |||
| 289 | 1 | 7 | unless (-f $sarif_template_file) { | |||
| 290 | 0 | 0 | warn "Could not find sarif template"; | |||
| 291 | 0 | 0 | return ''; | |||
| 292 | } | |||||
| 293 | ||||||
| 294 | 1 | 2 | $ENV{GITHUB_SERVER_URL} = '' unless defined $ENV{GITHUB_SERVER_URL}; | |||
| 295 | 1 | 10 | $ENV{GITHUB_REPOSITORY} = '' unless defined $ENV{GITHUB_REPOSITORY}; | |||
| 296 | 1 | 4 | my $sarif_template = CheckSpelling::Util::read_file $sarif_template_file; | |||
| 297 | 1 | 1 | die "sarif template is empty" unless $sarif_template; | |||
| 298 | ||||||
| 299 | 1 0 | 8 0 | my $json = JSON::PP->new->utf8->pretty->sort_by(sub { $JSON::PP::a cmp $JSON::PP::b }); | |||
| 300 | 1 | 79 | my $sarif_json = $json->decode($sarif_template); | |||
| 301 | ||||||
| 302 | 1 | 102399 | if (defined $sarif_template_overlay_file && -s $sarif_template_overlay_file) { | |||
| 303 | 1 | 13 | my $merger = Hash::Merge->new(); | |||
| 304 | 1 | 112 | my $merge_behaviors = $merger->{'behaviors'}->{$merger->get_behavior()}; | |||
| 305 | 1 | 7 | my $merge_arrays = $merge_behaviors->{'ARRAY'}->{'ARRAY'}; | |||
| 306 | ||||||
| 307 | $merge_behaviors->{'ARRAY'}->{'ARRAY'} = sub { | |||||
| 308 | 36 | 3822 | return $merge_arrays->(@_) if ref($_[0][0]).ref($_[1][0]); | |||
| 309 | 36 36 | 16 56 | return [@{$_[1]}]; | |||
| 310 | 1 | 3 | }; | |||
| 311 | ||||||
| 312 | 1 | 4 | my $sarif_template_overlay = CheckSpelling::Util::read_file $sarif_template_overlay_file; | |||
| 313 | 1 | 3 | my %runs_base = get_runs_from_sarif($sarif_json); | |||
| 314 | ||||||
| 315 | 1 | 3 | my $sarif_template_hash = $json->decode($sarif_template_overlay); | |||
| 316 | 1 | 1792 | my %runs_overlay = get_runs_from_sarif($sarif_template_hash); | |||
| 317 | 1 | 1 | for my $run_id (keys %runs_overlay) { | |||
| 318 | 1 | 2 | if (defined $runs_base{$run_id}) { | |||
| 319 | 1 | 1 | my $run_base_hash = $runs_base{$run_id}; | |||
| 320 | 1 | 0 | my $run_overlay_hash = $runs_overlay{$run_id}; | |||
| 321 | 1 | 2 | for my $overlay_id (keys %$run_overlay_hash) { | |||
| 322 | $run_base_hash->{$overlay_id} = $merger->merge( | |||||
| 323 | $run_overlay_hash->{$overlay_id}, | |||||
| 324 | 1 | 3 | $run_base_hash->{$overlay_id} | |||
| 325 | ); | |||||
| 326 | } | |||||
| 327 | } else { | |||||
| 328 | 0 | 0 | $runs_base{$run_id} = $runs_overlay{$run_id}; | |||
| 329 | } | |||||
| 330 | } | |||||
| 331 | #$sarif_json-> | |||||
| 332 | 1 1 | 63 1 | my @sarif_json_runs = @{$sarif_json->{'runs'}}; | |||
| 333 | 1 | 1 | foreach my $sarif_json_run (@sarif_json_runs) { | |||
| 334 | 1 1 | 1 1 | my %sarif_json_run_hash=%{$sarif_json_run}; | |||
| 335 | 1 | 2 | next unless defined $sarif_json_run_hash{'tool'}; | |||
| 336 | ||||||
| 337 | 1 1 | 0 1 | my %sarif_json_run_tool_hash = %{$sarif_json_run_hash{'tool'}}; | |||
| 338 | 1 | 1 | next unless defined $sarif_json_run_tool_hash{'driver'}; | |||
| 339 | ||||||
| 340 | 1 1 | 1 3 | my %sarif_json_run_tool_driver_hash = %{$sarif_json_run_tool_hash{'driver'}}; | |||
| 341 | 1 | 1 | my $driver_name = $sarif_json_run_tool_driver_hash{'name'}; | |||
| 342 | next unless defined $driver_name && | |||||
| 343 | 1 | 4 | defined $sarif_json_run_tool_driver_hash{'rules'}; | |||
| 344 | ||||||
| 345 | 1 | 1 | my $driver_view_hash = $runs_base{$driver_name}; | |||
| 346 | 1 | 1 | next unless defined $driver_view_hash; | |||
| 347 | ||||||
| 348 | 1 1 | 1 3 | my @sarif_json_run_tool_driver_rules = @{$sarif_json_run_tool_driver_hash{'rules'}}; | |||
| 349 | 1 | 1 | for my $driver_rule_number (0 .. scalar @sarif_json_run_tool_driver_rules) { | |||
| 350 | 38 | 2936 | my $driver_rule = $sarif_json_run_tool_driver_rules[$driver_rule_number]; | |||
| 351 | 38 | 17 | my $driver_rule_id = $driver_rule->{'id'}; | |||
| 352 | next unless defined $driver_rule_id && | |||||
| 353 | 38 | 55 | defined $driver_view_hash->{$driver_rule_id}; | |||
| 354 | 37 | 28 | $sarif_json_run_tool_driver_hash{'rules'}[$driver_rule_number] = $merger->merge($driver_view_hash->{$driver_rule_id}, $driver_rule); | |||
| 355 | } | |||||
| 356 | } | |||||
| 357 | 1 | 2 | delete $sarif_template_hash->{'runs'}; | |||
| 358 | 1 | 1 | $sarif_json = $merger->merge($sarif_json, $sarif_template_hash); | |||
| 359 | } | |||||
| 360 | { | |||||
| 361 | 1 1 1 | 626 1 3 | my @sarif_json_runs = @{$sarif_json->{'runs'}}; | |||
| 362 | 1 | 1 | foreach my $sarif_json_run (@sarif_json_runs) { | |||
| 363 | 1 | 0 | my %sarif_json_run_automationDetails; | |||
| 364 | 1 | 1 | $sarif_json_run_automationDetails{id} = $category; | |||
| 365 | 1 | 2 | $sarif_json_run->{'automationDetails'} = \%sarif_json_run_automationDetails; | |||
| 366 | } | |||||
| 367 | } | |||||
| 368 | ||||||
| 369 | 1 1 | 41 2 | my %sarif = %{$sarif_json}; | |||
| 370 | ||||||
| 371 | 1 | 3 | $sarif{'runs'}[0]{'tool'}{'driver'}{'version'} = $ENV{CHECK_SPELLING_VERSION}; | |||
| 372 | ||||||
| 373 | 1 | 2 | my $results = parse_warnings $ENV{warning_output}; | |||
| 374 | 1 | 1 | if ($results) { | |||
| 375 | 1 | 1 | $sarif{'runs'}[0]{'results'} = $results; | |||
| 376 | 1 | 1 | our %provenanceStringToIndex; | |||
| 377 | 1 | 1 | my @provenanceList = keys %provenanceStringToIndex; | |||
| 378 | 1 | 2 | generateVersionControlProvenance(\@provenanceList, $sarif{'runs'}[0]); | |||
| 379 | 1 | 1 | my %codes; | |||
| 380 | 1 | 2 | for my $result_ref (@$results) { | |||
| 381 | 8 8 | 3 8 | my %result = %{$result_ref}; | |||
| 382 | 8 | 7 | $codes{$result{'ruleId'}} = 1; | |||
| 383 | } | |||||
| 384 | 1 | 1 | my $rules_ref = $sarif{'runs'}[0]{'tool'}{'driver'}{'rules'}; | |||
| 385 | 1 1 | 1 5 | my @rules = @{$rules_ref}; | |||
| 386 | 1 | 0 | my $missing_rule_definition_id = 'missing-rule-definition'; | |||
| 387 | 1 37 | 2 23 | my ($missing_rule_definition_ref) = grep { $_->{'id'} eq $missing_rule_definition_id } @rules; | |||
| 388 | 1 37 | 1 19 | @rules = grep { defined $codes{$_->{'id'}} } @rules; | |||
| 389 | 1 | 1 | my $code_index = 0; | |||
| 390 | 1 1 | 0 2 | my %defined_codes = map { $_->{'id'} => $code_index++ } @rules; | |||
| 391 | 1 2 | 1 2 | my @missing_codes = grep { !defined $defined_codes{$_}} keys %codes; | |||
| 392 | 1 | 1 | my $missing_rule_definition_index; | |||
| 393 | 1 | 1 | if (@missing_codes) { | |||
| 394 | 1 | 0 | push @rules, $missing_rule_definition_ref; | |||
| 395 | 1 | 1 | $missing_rule_definition_index = $defined_codes{$missing_rule_definition_id} = $code_index++; | |||
| 396 | 1 | 29 | my $spellchecker = $ENV{spellchecker} || dirname(dirname(dirname(__FILE__))); | |||
| 397 | 1 | 2 | my %hashes_needed_for_files = (); | |||
| 398 | 1 | 1 | my %line_hashes = (); | |||
| 399 | 1 | 1 | my %used_hashes = (); | |||
| 400 | 1 | 0 | our %directoryToRepo; | |||
| 401 | 1 | 2 | for my $missing_code (@missing_codes) { | |||
| 402 | 1 | 1 | my $message = "No rule definition for `$missing_code`"; | |||
| 403 | 1 | 156855 | my $code_locations = `find '$spellchecker' -name '.git*' -prune -type f -name '*.sh' -o -name '*.pl' -o -name '*.pm' -o -name '*.t' -print0|xargs -0 grep -n '$missing_code' | perl -pe 's<^\./><>'`; | |||
| 404 | 1 | 8 | my @locations; | |||
| 405 | 1 | 9 | for my $line (split /\n/, $code_locations) { | |||
| 406 | 1 | 2 | chomp $line; | |||
| 407 | 1 | 11 | my ($file, $lineno, $code) = $line =~ /^(.+?):(\d+):(.+)$/; | |||
| 408 | 1 | 8 | next unless defined $file; | |||
| 409 | 1 | 30 | $code =~ /^(.*?)\b$missing_code\b/; | |||
| 410 | 1 | 4 | my $startColumn = length($1) + 1; | |||
| 411 | 1 | 9 | my $location = { | |||
| 412 | 'uri' => url_encode($file), | |||||
| 413 | 'startLine' => $lineno, | |||||
| 414 | 'startColumn' => $startColumn, | |||||
| 415 | 'endColumn' => length($1) + length($missing_code) + 1, | |||||
| 416 | }; | |||||
| 417 | 1 | 3 | push @locations, $location; | |||
| 418 | 1 | 2 | my $encoded_file = url_encode $file; | |||
| 419 | 1 | 6 | $encoded_files{$encoded_file} = $file; | |||
| 420 | 1 | 6 | addToHashesNeededForFiles($file, $lineno, $startColumn, $message, \%hashes_needed_for_files); | |||
| 421 | } | |||||
| 422 | 1 | 7 | hashFiles(\%hashes_needed_for_files, \%line_hashes, \%directoryToRepo, \%used_hashes); | |||
| 423 | 1 | 4 | my $fingerprintResults = fingerprintLocations(\@locations, \%encoded_files, \%encoded_files, \%line_hashes, $message, Digest::SHA::sha1_base64($message)); | |||
| 424 | 1 1 | 1 2 | my @locations_json = @{$fingerprintResults->{locations_json}}; | |||
| 425 | 1 1 | 1 1 | my @fingerprints = @{$fingerprintResults->{fingerprints}}; | |||
| 426 | 1 | 2 | my $locations_json_flat = join ',', @locations_json; | |||
| 427 | 1 | 1 | my $partialFingerprints = ''; | |||
| 428 | 1 | 3 | my $locations = $locations_json_flat ? qq<, "locations": [ $locations_json_flat ]> : ''; | |||
| 429 | 1 | 3 | my $result_json = qq<{"ruleId": "$missing_rule_definition_id", $partialFingerprints "message": { "text": "$message" }$locations }>; | |||
| 430 | 1 | 7 | my $result = decode_json $result_json; | |||
| 431 | 1 1 | 1024 6 | push @{$results}, $result; | |||
| 432 | } | |||||
| 433 | } | |||||
| 434 | 1 | 5 | $sarif{'runs'}[0]{'tool'}{'driver'}{'rules'} = \@rules; | |||
| 435 | 1 1 | 2 1 | for my $result_index (0 .. scalar @{$results}) { | |||
| 436 | 10 | 7 | my $result = $results->[$result_index]; | |||
| 437 | 10 | 8 | my $ruleId = $result->{'ruleId'}; | |||
| 438 | 10 | 19 | next if defined $ruleId && defined $defined_codes{$ruleId}; | |||
| 439 | 2 | 154 | $result->{'ruleId'} = $missing_rule_definition_id; | |||
| 440 | } | |||||
| 441 | } | |||||
| 442 | ||||||
| 443 | 1 | 5 | return encode_json \%sarif; | |||
| 444 | } | |||||
| 445 | ||||||
| 446 | 1; | |||||