Skip to content

Commit

Permalink
bugfix: Process escaped colon in GCC depfiles. (ninja-build#1774)
Browse files Browse the repository at this point in the history
* Added ability to parse escaped colons in GCC Dep files enabling ninja to parse dep files of GCC 10 on Windows

* Added generated depfile_parser.cc

* Addressed formatting

* Added extra tests with real world examples of paths produced by both GCC 10 and Clang and GCC pre 10. Adjusted one test so it doesn't fail

* Adjusted regular expression to not match \: if the character following the : is either EOF or whitespace

* Fixed typo in regex (should be 0x20 for space not 0xa)

* Changed regular expression form using lookahead to instead matching a separate expression. This was needed as re2c pre version 1.17 is broken when using lookaheads. Also added tests for \: followed by whitespace

* Addressed formatting

* Forgot a missing std::

* Fixed formatting for spaces after , as well as respecting column width
  • Loading branch information
zero9178 authored May 20, 2020
1 parent cf021f3 commit 52649de
Show file tree
Hide file tree
Showing 3 changed files with 121 additions and 24 deletions.
87 changes: 63 additions & 24 deletions src/depfile_parser.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* Generated by re2c 1.1.1 */
/* Generated by re2c 1.3 */
// Copyright 2011 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
Expand Down Expand Up @@ -166,22 +166,23 @@ bool DepfileParser::Parse(string* content, string* err) {
goto yy5;
yy13:
yych = *(yymarker = ++in);
if (yych <= 0x1F) {
if (yych <= ' ') {
if (yych <= '\n') {
if (yych <= 0x00) goto yy5;
if (yych <= '\t') goto yy16;
goto yy17;
} else {
if (yych == '\r') goto yy19;
goto yy16;
if (yych <= 0x1F) goto yy16;
goto yy21;
}
} else {
if (yych <= '#') {
if (yych <= ' ') goto yy21;
if (yych <= '"') goto yy16;
goto yy23;
if (yych <= '9') {
if (yych == '#') goto yy23;
goto yy16;
} else {
if (yych == '\\') goto yy25;
if (yych <= ':') goto yy25;
if (yych == '\\') goto yy27;
goto yy16;
}
}
Expand Down Expand Up @@ -231,26 +232,63 @@ bool DepfileParser::Parse(string* content, string* err) {
}
yy25:
yych = *++in;
if (yych <= 0x1F) {
if (yych <= '\f') {
if (yych <= 0x00) goto yy28;
if (yych <= 0x08) goto yy26;
if (yych <= '\n') goto yy28;
} else {
if (yych <= '\r') goto yy28;
if (yych == ' ') goto yy28;
}
yy26:
{
// De-escape colon sign, but preserve other leading backslashes.
// Regular expression uses lookahead to make sure that no whitespace
// nor EOF follows. In that case it'd be the : at the end of a target
int len = (int)(in - start);
if (len > 2 && out < start)
memset(out, '\\', len - 2);
out += len - 2;
*out++ = ':';
continue;
}
yy27:
yych = *++in;
if (yych <= ' ') {
if (yych <= '\n') {
if (yych <= 0x00) goto yy11;
if (yych <= '\t') goto yy16;
goto yy11;
} else {
if (yych == '\r') goto yy11;
goto yy16;
if (yych <= 0x1F) goto yy16;
goto yy30;
}
} else {
if (yych <= '#') {
if (yych <= ' ') goto yy26;
if (yych <= '"') goto yy16;
goto yy23;
if (yych <= '9') {
if (yych == '#') goto yy23;
goto yy16;
} else {
if (yych == '\\') goto yy28;
if (yych <= ':') goto yy25;
if (yych == '\\') goto yy32;
goto yy16;
}
}
yy26:
yy28:
++in;
{
// Backslash followed by : and whitespace.
// It is therefore normal text and not an escaped colon
int len = (int)(in - start - 1);
// Need to shift it over if we're overwriting backslashes.
if (out < start)
memmove(out, start, len);
out += len;
if (*(in - 1) == '\n')
have_newline = true;
break;
}
yy30:
++in;
{
// 2N backslashes plus space -> 2N backslashes, end of filename.
Expand All @@ -260,24 +298,25 @@ bool DepfileParser::Parse(string* content, string* err) {
out += len - 1;
break;
}
yy28:
yy32:
yych = *++in;
if (yych <= 0x1F) {
if (yych <= ' ') {
if (yych <= '\n') {
if (yych <= 0x00) goto yy11;
if (yych <= '\t') goto yy16;
goto yy11;
} else {
if (yych == '\r') goto yy11;
goto yy16;
if (yych <= 0x1F) goto yy16;
goto yy21;
}
} else {
if (yych <= '#') {
if (yych <= ' ') goto yy21;
if (yych <= '"') goto yy16;
goto yy23;
if (yych <= '9') {
if (yych == '#') goto yy23;
goto yy16;
} else {
if (yych == '\\') goto yy25;
if (yych <= ':') goto yy25;
if (yych == '\\') goto yy27;
goto yy16;
}
}
Expand Down
23 changes: 23 additions & 0 deletions src/depfile_parser.in.cc
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,29 @@ bool DepfileParser::Parse(string* content, string* err) {
*out++ = '#';
continue;
}
'\\'+ ':' [\x00\x20\r\n\t] {
// Backslash followed by : and whitespace.
// It is therefore normal text and not an escaped colon
int len = (int)(in - start - 1);
// Need to shift it over if we're overwriting backslashes.
if (out < start)
memmove(out, start, len);
out += len;
if (*(in - 1) == '\n')
have_newline = true;
break;
}
'\\'+ ':' {
// De-escape colon sign, but preserve other leading backslashes.
// Regular expression uses lookahead to make sure that no whitespace
// nor EOF follows. In that case it'd be the : at the end of a target
int len = (int)(in - start);
if (len > 2 && out < start)
memset(out, '\\', len - 2);
out += len - 2;
*out++ = ':';
continue;
}
'$$' {
// De-escape dollar character.
*out++ = '$';
Expand Down
35 changes: 35 additions & 0 deletions src/depfile_parser_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,41 @@ TEST_F(DepfileParserTest, Escapes) {
ASSERT_EQ(0u, parser_.ins_.size());
}

TEST_F(DepfileParserTest, EscapedColons)
{
std::string err;
// Tests for correct parsing of depfiles produced on Windows
// by both Clang, GCC pre 10 and GCC 10
EXPECT_TRUE(Parse(
"c\\:\\gcc\\x86_64-w64-mingw32\\include\\stddef.o: \\\n"
" c:\\gcc\\x86_64-w64-mingw32\\include\\stddef.h \n",
&err));
ASSERT_EQ("", err);
ASSERT_EQ(1u, parser_.outs_.size());
EXPECT_EQ("c:\\gcc\\x86_64-w64-mingw32\\include\\stddef.o",
parser_.outs_[0].AsString());
ASSERT_EQ(1u, parser_.ins_.size());
EXPECT_EQ("c:\\gcc\\x86_64-w64-mingw32\\include\\stddef.h",
parser_.ins_[0].AsString());
}

TEST_F(DepfileParserTest, EscapedTargetColon)
{
std::string err;
EXPECT_TRUE(Parse(
"foo1\\: x\n"
"foo1\\:\n"
"foo1\\:\r\n"
"foo1\\:\t\n"
"foo1\\:",
&err));
ASSERT_EQ("", err);
ASSERT_EQ(1u, parser_.outs_.size());
EXPECT_EQ("foo1\\", parser_.outs_[0].AsString());
ASSERT_EQ(1u, parser_.ins_.size());
EXPECT_EQ("x", parser_.ins_[0].AsString());
}

TEST_F(DepfileParserTest, SpecialChars) {
// See filenames like istreambuf.iterator_op!= in
// https://github.com/google/libcxx/tree/master/test/iterators/stream.iterators/istreambuf.iterator/
Expand Down

0 comments on commit 52649de

Please sign in to comment.