From 161f26de4ee094177eb1bbd344acdbfd35b9e395 Mon Sep 17 00:00:00 2001 From: Gareth Jones Date: Thu, 9 Nov 2023 15:18:27 +1300 Subject: [PATCH] fix: properly handle file/url paths on Windows (#645) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Turns out that file -> url translation on Windows is busted, and that this is a hard problem that Go has an internal util for that has not yet been made public - I've done what apparently a number of other packages have done which is copying that helper into here and hoping one day it actually becomes public 😢 Note that until #646 is landed, there is no way to actually verify this is fixing the problem - #553 shows the result of both PRs being merged. --- internal/output/sarif.go | 8 ++- internal/sourceanalysis/go.go | 7 ++- internal/url/url.go | 70 +++++++++++++++++++++++ internal/url/url_other_test.go | 38 +++++++++++++ internal/url/url_test.go | 50 +++++++++++++++++ internal/url/url_windows_test.go | 96 ++++++++++++++++++++++++++++++++ 6 files changed, 266 insertions(+), 3 deletions(-) create mode 100644 internal/url/url.go create mode 100644 internal/url/url_other_test.go create mode 100644 internal/url/url_test.go create mode 100644 internal/url/url_windows_test.go diff --git a/internal/output/sarif.go b/internal/output/sarif.go index e1c7c28855b..09a12f104d7 100644 --- a/internal/output/sarif.go +++ b/internal/output/sarif.go @@ -8,6 +8,7 @@ import ( "strings" "text/template" + "github.com/google/osv-scanner/internal/url" "github.com/google/osv-scanner/internal/utility/results" "github.com/google/osv-scanner/internal/version" "github.com/google/osv-scanner/pkg/models" @@ -284,8 +285,11 @@ func PrintSARIFReport(vulnResult *models.VulnerabilityResults, outputWriter io.W for pws := range gv.PkgSource { artifactPath := stripGitHubWorkspace(pws.Source.Path) if filepath.IsAbs(artifactPath) { - // Support absolute paths. - artifactPath = "file://" + artifactPath + // this only errors if the file path is not absolute, + // which we've already confirmed is not the case + p, _ := url.FromFilePath(artifactPath) + + artifactPath = p.String() } run.AddDistinctArtifact(artifactPath) diff --git a/internal/sourceanalysis/go.go b/internal/sourceanalysis/go.go index 0258c56efdf..4c8d119fe3c 100644 --- a/internal/sourceanalysis/go.go +++ b/internal/sourceanalysis/go.go @@ -10,6 +10,7 @@ import ( "path/filepath" "github.com/google/osv-scanner/internal/sourceanalysis/govulncheck" + "github.com/google/osv-scanner/internal/url" "github.com/google/osv-scanner/pkg/models" "github.com/google/osv-scanner/pkg/reporter" "golang.org/x/vuln/scan" @@ -109,9 +110,13 @@ func runGovulncheck(moddir string, vulns []models.Vulnerability) (map[string][]* } } + // this only errors if the file path is not absolute, + // which paths from os.MkdirTemp should always be + dbdirURL, _ := url.FromFilePath(dbdir) + // Run govulncheck on the module at moddir and vulnerability database that // was just created. - cmd := scan.Command(context.Background(), "-db", fmt.Sprintf("file://%s", dbdir), "-C", moddir, "-json", "./...") + cmd := scan.Command(context.Background(), "-db", dbdirURL.String(), "-C", moddir, "-json", "./...") var b bytes.Buffer cmd.Stdout = &b if err := cmd.Start(); err != nil { diff --git a/internal/url/url.go b/internal/url/url.go new file mode 100644 index 00000000000..08f73d33d11 --- /dev/null +++ b/internal/url/url.go @@ -0,0 +1,70 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package url + +import ( + "errors" + "net/url" + "path/filepath" + "strings" +) + +// Code copied from https://github.com/golang/go/blob/7c2b69080a0b9e35174cc9c93497b6e7176f8275/src/cmd/go/internal/web/url.go +// TODO(golang.org/issue/32456): If accepted, move these functions into the +// net/url package. + +var errNotAbsolute = errors.New("path is not absolute") + +func FromFilePath(path string) (*url.URL, error) { + if !filepath.IsAbs(path) { + return nil, errNotAbsolute + } + + // If path has a Windows volume name, convert the volume to a host and prefix + // per https://blogs.msdn.microsoft.com/ie/2006/12/06/file-uris-in-windows/. + if vol := filepath.VolumeName(path); vol != "" { + if strings.HasPrefix(vol, `\\`) { + path = filepath.ToSlash(path[2:]) + i := strings.IndexByte(path, '/') + + if i < 0 { + // A degenerate case. + // \\host.example.com (without a share name) + // becomes + // file://host.example.com/ + return &url.URL{ + Scheme: "file", + Host: path, + Path: "/", + }, nil + } + + // \\host.example.com\Share\path\to\file + // becomes + // file://host.example.com/Share/path/to/file + return &url.URL{ + Scheme: "file", + Host: path[:i], + Path: filepath.ToSlash(path[i:]), + }, nil + } + + // C:\path\to\file + // becomes + // file:///C:/path/to/file + return &url.URL{ + Scheme: "file", + Path: "/" + filepath.ToSlash(path), + }, nil + } + + // /path/to/file + // becomes + // file:///path/to/file + return &url.URL{ + Scheme: "file", + Path: filepath.ToSlash(path), + }, nil +} diff --git a/internal/url/url_other_test.go b/internal/url/url_other_test.go new file mode 100644 index 00000000000..392cf5ddc75 --- /dev/null +++ b/internal/url/url_other_test.go @@ -0,0 +1,38 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !windows + +package url + +// Code copied from https://github.com/golang/go/blob/7c2b69080a0b9e35174cc9c93497b6e7176f8275/src/cmd/go/internal/web/url_other_test.go + +var urlTests = []struct { + url string + filePath string + canonicalURL string // If empty, assume equal to url. + wantErr string +}{ + // Examples from RFC 8089: + { + url: `file:///path/to/file`, + filePath: `/path/to/file`, + }, + { + url: `file:/path/to/file`, + filePath: `/path/to/file`, + canonicalURL: `file:///path/to/file`, + }, + { + url: `file://localhost/path/to/file`, + filePath: `/path/to/file`, + canonicalURL: `file:///path/to/file`, + }, + + // We reject non-local files. + { + url: `file://host.example.com/path/to/file`, + wantErr: "file URL specifies non-local host", + }, +} diff --git a/internal/url/url_test.go b/internal/url/url_test.go new file mode 100644 index 00000000000..595786ad507 --- /dev/null +++ b/internal/url/url_test.go @@ -0,0 +1,50 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package url + +import ( + "testing" +) + +// Code copied from https://github.com/golang/go/blob/7c2b69080a0b9e35174cc9c93497b6e7176f8275/src/cmd/go/internal/web/url_test.go + +func TestURLFromFilePath(t *testing.T) { + t.Parallel() + + for _, tc := range urlTests { + if tc.filePath == "" { + continue + } + tc := tc + + t.Run(tc.filePath, func(t *testing.T) { + t.Parallel() + + u, err := FromFilePath(tc.filePath) + if err != nil { + if err.Error() == tc.wantErr { + return + } + if tc.wantErr == "" { + t.Fatalf("urlFromFilePath(%v): %v; want ", tc.filePath, err) + } else { + t.Fatalf("urlFromFilePath(%v): %v; want %s", tc.filePath, err, tc.wantErr) + } + } + + if tc.wantErr != "" { + t.Fatalf("urlFromFilePath(%v) = ; want error: %s", tc.filePath, tc.wantErr) + } + + wantURL := tc.url + if tc.canonicalURL != "" { + wantURL = tc.canonicalURL + } + if u.String() != wantURL { + t.Errorf("urlFromFilePath(%v) = %v; want %s", tc.filePath, u, wantURL) + } + }) + } +} diff --git a/internal/url/url_windows_test.go b/internal/url/url_windows_test.go new file mode 100644 index 00000000000..1cf7e5ab9ab --- /dev/null +++ b/internal/url/url_windows_test.go @@ -0,0 +1,96 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package url + +// Code copied from https://github.com/golang/go/blob/7c2b69080a0b9e35174cc9c93497b6e7176f8275/src/cmd/go/internal/web/url_windows_test.go + +var urlTests = []struct { + url string + filePath string + canonicalURL string // If empty, assume equal to url. + wantErr string +}{ + // Examples from https://blogs.msdn.microsoft.com/ie/2006/12/06/file-uris-in-windows/: + + { + url: `file://laptop/My%20Documents/FileSchemeURIs.doc`, + filePath: `\\laptop\My Documents\FileSchemeURIs.doc`, + }, + { + url: `file:///C:/Documents%20and%20Settings/davris/FileSchemeURIs.doc`, + filePath: `C:\Documents and Settings\davris\FileSchemeURIs.doc`, + }, + { + url: `file:///D:/Program%20Files/Viewer/startup.htm`, + filePath: `D:\Program Files\Viewer\startup.htm`, + }, + { + url: `file:///C:/Program%20Files/Music/Web%20Sys/main.html?REQUEST=RADIO`, + filePath: `C:\Program Files\Music\Web Sys\main.html`, + canonicalURL: `file:///C:/Program%20Files/Music/Web%20Sys/main.html`, + }, + { + url: `file://applib/products/a-b/abc_9/4148.920a/media/start.swf`, + filePath: `\\applib\products\a-b\abc_9\4148.920a\media\start.swf`, + }, + { + url: `file:////applib/products/a%2Db/abc%5F9/4148.920a/media/start.swf`, + wantErr: "file URL missing drive letter", + }, + { + url: `C:\Program Files\Music\Web Sys\main.html?REQUEST=RADIO`, + wantErr: "non-file URL", + }, + + // The example "file://D:\Program Files\Viewer\startup.htm" errors out in + // url.Parse, so we substitute a slash-based path for testing instead. + { + url: `file://D:/Program Files/Viewer/startup.htm`, + wantErr: "file URL encodes volume in host field: too few slashes?", + }, + + // The blog post discourages the use of non-ASCII characters because they + // depend on the user's current codepage. However, when we are working with Go + // strings we assume UTF-8 encoding, and our url package refuses to encode + // URLs to non-ASCII strings. + { + url: `file:///C:/exampleㄓ.txt`, + filePath: `C:\exampleㄓ.txt`, + canonicalURL: `file:///C:/example%E3%84%93.txt`, + }, + { + url: `file:///C:/example%E3%84%93.txt`, + filePath: `C:\exampleㄓ.txt`, + }, + + // Examples from RFC 8089: + + // We allow the drive-letter variation from section E.2, because it is + // simpler to support than not to. However, we do not generate the shorter + // form in the reverse direction. + { + url: `file:c:/path/to/file`, + filePath: `c:\path\to\file`, + canonicalURL: `file:///c:/path/to/file`, + }, + + // We encode the UNC share name as the authority following section E.3.1, + // because that is what the Microsoft blog post explicitly recommends. + { + url: `file://host.example.com/Share/path/to/file.txt`, + filePath: `\\host.example.com\Share\path\to\file.txt`, + }, + + // We decline the four- and five-slash variations from section E.3.2. + // The paths in these URLs would change meaning under path.Clean. + { + url: `file:////host.example.com/path/to/file`, + wantErr: "file URL missing drive letter", + }, + { + url: `file://///host.example.com/path/to/file`, + wantErr: "file URL missing drive letter", + }, +}