Skip to content

Commit

Permalink
avoid relying on as_bytes() in OsStr::starts_with() on Windows
Browse files Browse the repository at this point in the history
Previously, starts_with() would panic on Windows if the OsStr contained
any invalid Unicode. Because this method is in the critical path for
parsing command line arguments, this meant that Clap up to v2.33.0 on
Windows would panic on non-Unicode filenames, even when the application
tried to use OsString specifically to account for this case. As a
workaround, this commit adds a custom implementation of
OsStr::starts_with() for Windows that doesn't rely on as_bytes().

Fixes clap-rs#1905.
  • Loading branch information
oconnor663 committed May 5, 2020
1 parent 3f1a9b7 commit eb27b4a
Showing 1 changed file with 75 additions and 0 deletions.
75 changes: 75 additions & 0 deletions src/osstringext.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,70 @@ pub trait OsStrExt2 {
fn split(&self, b: u8) -> OsSplit;
}

// A starts-with implementation that does not panic when the OsStr is invalid
// Unicode.
//
// A Windows OsStr is usually UTF-16. If `prefix` is valid UTF-8, we can
// re-encode it as UTF-16, and ask whether `osstr` starts with the same series
// of u16 code units. If `prefix` is not valid UTF-8, then this comparison
// isn't meaningful, and we just return false.
#[cfg(target_os = "windows")]
fn windows_osstr_starts_with(osstr: &OsStr, prefix: &[u8]) -> bool {
use std::os::windows::ffi::OsStrExt;
let prefix_str = if let Ok(s) = std::str::from_utf8(prefix) {
s
} else {
return false;
};
let mut osstr_units = osstr.encode_wide();
let mut prefix_units = prefix_str.encode_utf16();
loop {
match (osstr_units.next(), prefix_units.next()) {
(Some(o), Some(p)) if o == p => continue,
(_, None) => return true,
_ => return false,
}
}
}

#[test]
#[cfg(target_os = "windows")]
fn test_windows_osstr_starts_with() {
use std::ffi::OsString;
use std::os::windows::ffi::OsStringExt;

fn from_ascii(ascii: &[u8]) -> OsString {
let u16_vec: Vec<u16> = ascii.iter().map(|&c| c as u16).collect();
OsString::from_wide(&u16_vec)
}

// Test all the basic cases.
assert!(windows_osstr_starts_with(&from_ascii(b"abcdef"), b"abc"));
assert!(windows_osstr_starts_with(&from_ascii(b"abcdef"), b"abcdef"));
assert!(!windows_osstr_starts_with(&from_ascii(b"abcdef"), b"def"));
assert!(!windows_osstr_starts_with(&from_ascii(b"abc"), b"abcd"));

// Test the case where the candidate prefix is not valid UTF-8. Note that a
// standalone \xff byte is valid ASCII but not valid UTF-8. Thus although
// these byte strings look identical, they do not match.
assert!(!windows_osstr_starts_with(&from_ascii(b"\xff"), b"\xff"));

// Test the case where the OsString is not valid UTF-16. It should still be
// possible to match the valid characters at the front.
//
// UTF-16 surrogate characters are only valid in pairs. Including one on
// the end by itself makes this invalid UTF-16.
let surrogate_char: u16 = 0xDC00;
let mut invalid_unicode =
OsString::from_wide(&['a' as u16, 'b' as u16, 'c' as u16, surrogate_char]);
assert!(
invalid_unicode.to_str().is_none(),
"This string is invalid Unicode, and conversion to &str should fail.",
);
assert!(windows_osstr_starts_with(&invalid_unicode, b"abc"));
assert!(!windows_osstr_starts_with(&invalid_unicode, b"abcd"));
}

#[cfg(any(target_os = "windows", target_arch = "wasm32"))]
impl OsStrExt3 for OsStr {
fn from_bytes(b: &[u8]) -> &Self {
Expand All @@ -33,6 +97,17 @@ impl OsStrExt3 for OsStr {

impl OsStrExt2 for OsStr {
fn starts_with(&self, s: &[u8]) -> bool {
#[cfg(target_os = "windows")]
{
// On Windows, the as_bytes() method will panic if the OsStr
// contains invalid Unicode. To avoid this, we use a
// Windows-specific starts-with function that doesn't rely on
// as_bytes(). This is necessary for Windows command line
// applications to handle non-Unicode arguments successfully. Note
// that this entire module is replaced in Clap 3.x, so this
// workaround is specific to the 2.x branch.
return windows_osstr_starts_with(self, s);
}
self.as_bytes().starts_with(s)
}

Expand Down

0 comments on commit eb27b4a

Please sign in to comment.