diff --git a/Sources/SKCore/CompilationDatabase.swift b/Sources/SKCore/CompilationDatabase.swift index efdd1752b..67e02ae39 100644 --- a/Sources/SKCore/CompilationDatabase.swift +++ b/Sources/SKCore/CompilationDatabase.swift @@ -221,7 +221,11 @@ extension CompilationDatabase.Command: Codable { if let arguments = try container.decodeIfPresent([String].self, forKey: .arguments) { self.commandLine = arguments } else if let command = try container.decodeIfPresent(String.self, forKey: .command) { + #if os(Windows) + self.commandLine = splitWindowsCommandLine(command, initialCommandName: true) + #else self.commandLine = splitShellEscapedCommand(command) + #endif } else { throw CompilationDatabaseDecodingError.missingCommandOrArguments } @@ -355,3 +359,216 @@ public func splitShellEscapedCommand(_ cmd: String) -> [String] { var parser = Parser(cmd[...]) return parser.parse() } + +// MARK: - Windows + +fileprivate extension Character { + var isWhitespace: Bool { + switch self { + case " ", "\t": + return true + default: + return false + } + } + + var isWhitespaceOrNull: Bool { + return self.isWhitespace || self == "\0" + } + + func isWindowsSpecialChar(inCommandName: Bool) -> Bool { + if isWhitespace { + return true + } + if self == #"""# { + return true + } + if !inCommandName && self == #"\"# { + return true + } + return false + } +} + +fileprivate struct WindowsCommandParser { + /// The content of the entire command that shall be parsed. + private let content: String + + /// Whether we are parsing the initial command name. In this mode `\` is not treated as escaping the quote + /// character. + private var parsingCommandName: Bool + + /// An index into `content`, pointing to the character that we are currently parsing. + private var currentCharacterIndex: String.UTF8View.Index + + /// The split command line arguments. + private var result: [String] = [] + + /// The character that is currently being parsed. + /// + /// `nil` if we have reached the end of `content`. + private var currentCharacter: Character? { + guard currentCharacterIndex < content.endIndex else { + return nil + } + return self.content[currentCharacterIndex] + } + + /// The character after `currentCharacter`. + /// + /// `nil` if we have reached the end of `content`. + private var peek: Character? { + let nextIndex = content.index(after: currentCharacterIndex) + if nextIndex < content.endIndex { + return content[nextIndex] + } else { + return nil + } + } + + init(_ string: String, initialCommandName: Bool) { + self.content = string + self.currentCharacterIndex = self.content.startIndex + self.parsingCommandName = initialCommandName + } + + /// Designated entry point to split a Windows command line invocation. + mutating func parse() -> [String] { + while let currentCharacter { + if currentCharacter.isWhitespaceOrNull { + // Consume any whitespace separating arguments. + _ = consume() + } else { + result.append(parseSingleArgument()) + } + } + return result + } + + /// Consume the current character. + private mutating func consume() -> Character { + guard let character = currentCharacter else { + preconditionFailure("Nothing to consume") + } + currentCharacterIndex = content.index(after: currentCharacterIndex) + return character + } + + /// Consume the current character, asserting that it is `expectedCharacter` + private mutating func consume(expect expectedCharacter: Character) { + assert(currentCharacter == expectedCharacter) + _ = consume() + } + + /// Parses a single argument, consuming its characters and returns the parsed arguments with all escaping unfolded + /// (e.g. `\"` gets returned as `"`) + /// + /// Afterwards the parser points to the character after the argument. + mutating func parseSingleArgument() -> String { + var str = "" + while let currentCharacter { + if !currentCharacter.isWindowsSpecialChar(inCommandName: parsingCommandName) { + str.append(consume()) + continue + } + if currentCharacter.isWhitespaceOrNull { + parsingCommandName = false + return str + } else if currentCharacter == "\"" { + str += parseQuoted() + } else if currentCharacter == #"\"# { + assert(!parsingCommandName, "else we'd have treated it as a normal char"); + str.append(parseBackslash()) + } else { + preconditionFailure("unexpected special character"); + } + } + return str + } + + /// Assuming that we are positioned at a `"`, parse a quoted string and return the string contents without the + /// quotes. + mutating func parseQuoted() -> String { + // Discard the opening quote. Its not part of the unescaped text. + consume(expect: "\"") + + var str = "" + while let currentCharacter { + switch currentCharacter { + case "\"": + if peek == "\"" { + // Two adjacent quotes inside a quoted string are an escaped single quote. For example + // `" a "" b "` + // represents the string + // ` a " b ` + consume(expect: "\"") + consume(expect: "\"") + str += "\"" + } else { + // We have found the closing quote. Discard it and return. + consume(expect: "\"") + return str + } + case "\\" where !parsingCommandName: + str.append(parseBackslash()) + default: + str.append(consume()) + } + } + return str + } + + /// Backslashes are interpreted in a rather complicated way in the Windows-style + /// command line, because backslashes are used both to separate path and to + /// escape double quote. This method consumes runs of backslashes as well as the + /// following double quote if it's escaped. + /// + /// * If an even number of backslashes is followed by a double quote, one + /// backslash is output for every pair of backslashes, and the last double + /// quote remains unconsumed. The double quote will later be interpreted as + /// the start or end of a quoted string in the main loop outside of this + /// function. + /// + /// * If an odd number of backslashes is followed by a double quote, one + /// backslash is output for every pair of backslashes, and a double quote is + /// output for the last pair of backslash-double quote. The double quote is + /// consumed in this case. + /// + /// * Otherwise, backslashes are interpreted literally. + mutating func parseBackslash() -> String { + var str: String = "" + + let firstNonBackslashIndex = content[currentCharacterIndex...].firstIndex(where: { $0 != "\\" }) ?? content.endIndex + let numberOfBackslashes = content.distance(from: currentCharacterIndex, to: firstNonBackslashIndex) + + if firstNonBackslashIndex != content.endIndex && content[firstNonBackslashIndex] == "\"" { + str += String(repeating: "\\", count: numberOfBackslashes / 2) + if numberOfBackslashes.isMultiple(of: 2) { + // We have an even number of backslashes. Just add the escaped backslashes to `str` and return to parse the + // quote in the outer function. + currentCharacterIndex = firstNonBackslashIndex + } else { + // We have an odd number of backslashes. The last backslash escapes the quote. + str += "\"" + currentCharacterIndex = content.index(after: firstNonBackslashIndex) + } + return str + } + + // The sequence of backslashes is not followed by quotes. Interpret them literally. + str += String(repeating: "\\", count: numberOfBackslashes) + currentCharacterIndex = firstNonBackslashIndex + return str + } +} + +// Sometimes, this function will be handling a full command line including an +// executable pathname at the start. In that situation, the initial pathname +// needs different handling from the following arguments, because when +// CreateProcess or cmd.exe scans the pathname, it doesn't treat \ as +// escaping the quote character, whereas when libc scans the rest of the +// command line, it does. +public func splitWindowsCommandLine(_ cmd: String, initialCommandName: Bool) -> [String] { + var parser = WindowsCommandParser(cmd, initialCommandName: initialCommandName) + return parser.parse() +} diff --git a/Tests/SKCoreTests/CompilationDatabaseTests.swift b/Tests/SKCoreTests/CompilationDatabaseTests.swift index c7e4d753d..411b92c7f 100644 --- a/Tests/SKCoreTests/CompilationDatabaseTests.swift +++ b/Tests/SKCoreTests/CompilationDatabaseTests.swift @@ -57,7 +57,80 @@ final class CompilationDatabaseTests: XCTestCase { check("\"a\"bcd\"ef\"\"\"\"g\"", ["abcdefg"]) check("a'\\b \"c\"'", ["a\\b \"c\""]) } + + func testSplitShellEscapedCommandBasic() { + assertEscapedCommand("", []) + assertEscapedCommand(" ", []) + assertEscapedCommand("a", ["a"]) + assertEscapedCommand("abc", ["abc"]) + assertEscapedCommand("a😀c", ["a😀c"]) + assertEscapedCommand("😀c", ["😀c"]) + assertEscapedCommand("abc def", ["abc", "def"]) + assertEscapedCommand("abc def", ["abc", "def"]) + } + + func testSplitShellEscapedCommandDoubleQuotes() { + assertEscapedCommand("\"", [""]) + assertEscapedCommand(#""a"#, ["a"]) + assertEscapedCommand("\"\"", [""]) + assertEscapedCommand(#""a""#, ["a"]) + assertEscapedCommand(#""a\"""#, [#"a""#]) + assertEscapedCommand(#""a b c ""#, ["a b c "]) + assertEscapedCommand(#""a " "#, ["a "]) + assertEscapedCommand(#""a " b"#, ["a ", "b"]) + assertEscapedCommand(#""a "b"#, ["a b"]) + assertEscapedCommand(#"a"x ""b"#, ["ax b"], windows: [#"ax "b"#]) + + assertEscapedCommand(#""a"bcd"ef""""g""#, ["abcdefg"], windows: [#"abcdef""g"#]) + } + func testSplitShellEscapedCommandSingleQuotes() { + assertEscapedCommand("'", [""], windows: ["'"]) + assertEscapedCommand("'a", ["a"], windows: ["'a"]) + assertEscapedCommand("''", [""], windows: ["''"]) + assertEscapedCommand("'a'", ["a"], windows: ["'a'"]) + assertEscapedCommand(#"'a\"'"#, [#"a\""#], windows: [#"'a"'"#]) + assertEscapedCommand(#"'a b c '"#, ["a b c "], windows: ["'a", "b", "c", "'"]) + assertEscapedCommand(#"'a ' "#, ["a "], windows: ["'a", "'"]) + assertEscapedCommand(#"'a ' b"#, ["a ", "b"], windows: ["'a", "'", "b"]) + assertEscapedCommand(#"'a 'b"#, ["a b"], windows: ["'a", "'b"]) + assertEscapedCommand(#"a'x ''b"#, ["ax b"], windows: ["a'x", "''b"]) + } + + func testSplitShellEscapedCommandBackslash() { + assertEscapedCommand(#"a\\"#, [#"a\"#], windows: [#"a\\"#]) + assertEscapedCommand(#"a'\b "c"'"#, ["a\\b \"c\""], windows: [#"a'\b"#, #"c'"#]) + + assertEscapedCommand(#"\""#, ["\""]) + assertEscapedCommand(#"\\""#, [#"\"#]) + assertEscapedCommand(#"\\\""#, [#"\""#]) + assertEscapedCommand(#"\\ "#, [#"\"#], windows: [#"\\"#]) + assertEscapedCommand(#"\\\ "#, [#"\ "#], windows: [#"\\\"#]) + } + + func testSplitShellEscapedCommandWindowsCommand() { + assertEscapedCommand(#"C:\swift.exe"#, [#"C:swift.exe"#], windows: [#"C:\swift.exe"#], initialCommandName: true) + assertEscapedCommand( + #"C:\ swift.exe"#, + [#"C: swift.exe"#], + windows: [#"C:\"#, #"swift.exe"#], + initialCommandName: true + ) + assertEscapedCommand( + #"C:\ swift.exe"#, + [#"C: swift.exe"#], + windows: [#"C:\"#, #"swift.exe"#], + initialCommandName: false + ) + assertEscapedCommand(#"C:\"swift.exe""#, [#"C:"swift.exe"#], windows: [#"C:\swift.exe"#], initialCommandName: true) + assertEscapedCommand(#"C:\"swift.exe""#, [#"C:"swift.exe"#], windows: [#"C:"swift.exe"#], initialCommandName: false) + } + + func testSplitShellEscapedCommandWindowsTwoDoubleQuotes() { + assertEscapedCommand(#"" test with "" quote""#, [" test with quote"], windows: [#" test with " quote"#]) + assertEscapedCommand(#"" test with "" quote""#, [" test with quote"], windows: [#" test with " quote"#]) + } + func testEncodeCompDBCommand() throws { // Requires JSONEncoder.OutputFormatting.sortedKeys func check(_ cmd: CompilationDatabase.Command, _ expected: String, file: StaticString = #filePath, line: UInt = #line) throws { @@ -332,3 +405,33 @@ private func checkCompilationDatabaseBuildSystem(_ compdb: ByteString, file: Sta let buildSystem = CompilationDatabaseBuildSystem(projectRoot: try AbsolutePath(validating: "/a"), fileSystem: fs) try block(buildSystem) } + +/// Assert that splitting `str` into its command line components results in `expected`. +/// +/// By default assert that escaping using Unix and Windows rules results in the same split. If `windows` is specified, +/// assert that escaping with Windows rules produces `windows` and escaping using Unix rules results in `expected`. +/// +/// If set `initialCommandName` gets passed to the Windows split function. +private func assertEscapedCommand( + _ str: String, + _ expected: [String], + windows: [String]? = nil, + initialCommandName: Bool = false, + file: StaticString = #filePath, + line: UInt = #line +) { + XCTAssertEqual( + splitShellEscapedCommand(str), + expected, + "Splitting Unix command line arguments", + file: file, + line: line + ) + XCTAssertEqual( + splitWindowsCommandLine(str, initialCommandName: initialCommandName), + windows ?? expected, + "Splitting Windows command line arguments", + file: file, + line: line + ) +}