Skip to content

Commit

Permalink
commonlib: Add ANTLR
Browse files Browse the repository at this point in the history
Signed-off-by: kingthorin <[email protected]>
  • Loading branch information
kingthorin committed Feb 6, 2025
1 parent dc146bf commit 3b2ad77
Show file tree
Hide file tree
Showing 7 changed files with 1,332 additions and 0 deletions.
32 changes: 32 additions & 0 deletions addOns/commonlib/commonlib.gradle.kts
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
import net.ltgt.gradle.errorprone.errorprone
import org.zaproxy.gradle.addon.AddOnStatus

description = "A common library, for use by other add-ons."

plugins {
antlr
}

zapAddOn {
addOnName.set("Common Library")
addOnStatus.set(AddOnStatus.RELEASE)
Expand Down Expand Up @@ -32,9 +37,36 @@ dependencies {
api("com.fasterxml.jackson.datatype:jackson-datatype-jdk8")
api("com.fasterxml.jackson.datatype:jackson-datatype-jsr310")

val antlrVersion = "4.13.2"
antlr("org.antlr:antlr4:$antlrVersion")
implementation("org.antlr:antlr4-runtime:$antlrVersion")

implementation("commons-io:commons-io:2.16.1")
implementation("org.apache.commons:commons-csv:1.10.0")
implementation("org.apache.commons:commons-collections4:4.4")

testImplementation(project(":testutils"))
}

val jsParserPkg = "org.zaproxy.addon.commonlib.parserapi.impl"
val jsParserDir = jsParserPkg.replace('.', '/')
val generateGrammarSource by tasks.existing(AntlrTask::class) {
val libDir = "$outputDirectory/$jsParserDir"
arguments = arguments + listOf("-package", jsParserPkg, "-lib", libDir)

doFirst {
mkdir(libDir)
}
}

tasks.withType<JavaCompile>().configureEach {
options.errorprone.excludedPaths.set(".*/(generated-src|$jsParserDir)/.*")
}

spotless {
javaWith3rdPartyFormatted(
project,
listOf(),
listOf("src/**/$jsParserDir/*.java")
)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,285 @@
/*
* The MIT License (MIT)
*
* Copyright (c) 2014 by Bart Kiers (original author) and Alexandre Vitorelli (contributor -> ported to CSharp)
* Copyright (c) 2017-2020 by Ivan Kochurkin (Positive Technologies):
added ECMAScript 6 support, cleared and transformed to the universal grammar.
* Copyright (c) 2018 by Juan Alvarez (contributor -> ported to Go)
* Copyright (c) 2019 by Student Main (contributor -> ES2020)
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/

// $antlr-format alignTrailingComments true, columnLimit 150, maxEmptyLinesToKeep 1, reflowComments false, useTab false
// $antlr-format allowShortRulesOnASingleLine true, allowShortBlocksOnASingleLine true, minEmptyLines 0, alignSemicolons ownLine
// $antlr-format alignColons trailing, singleLineOverrulesHangingColon true, alignLexerCommands true, alignLabels true, alignTrailers true

lexer grammar JavaScriptLexer;

channels {
ERROR
}

options {
superClass = JavaScriptLexerBase;
}

// Insert here @header for C++ lexer.

HashBangLine : { this.IsStartOfFile()}? '#!' ~[\r\n\u2028\u2029]*; // only allowed at start
MultiLineComment : '/*' .*? '*/' -> channel(HIDDEN);
SingleLineComment : '//' ~[\r\n\u2028\u2029]* -> channel(HIDDEN);
RegularExpressionLiteral:
'/' RegularExpressionFirstChar RegularExpressionChar* {this.IsRegexPossible()}? '/' IdentifierPart*
;

OpenBracket : '[';
CloseBracket : ']';
OpenParen : '(';
CloseParen : ')';
OpenBrace : '{' {this.ProcessOpenBrace();};
TemplateCloseBrace : {this.IsInTemplateString()}? '}' // Break lines here to ensure proper transformation by Go/transformGrammar.py
{this.ProcessTemplateCloseBrace();} -> popMode;
CloseBrace : '}' {this.ProcessCloseBrace();};
SemiColon : ';';
Comma : ',';
Assign : '=';
QuestionMark : '?';
QuestionMarkDot : '?.';
Colon : ':';
Ellipsis : '...';
Dot : '.';
PlusPlus : '++';
MinusMinus : '--';
Plus : '+';
Minus : '-';
BitNot : '~';
Not : '!';
Multiply : '*';
Divide : '/';
Modulus : '%';
Power : '**';
NullCoalesce : '??';
Hashtag : '#';
RightShiftArithmetic : '>>';
LeftShiftArithmetic : '<<';
RightShiftLogical : '>>>';
LessThan : '<';
MoreThan : '>';
LessThanEquals : '<=';
GreaterThanEquals : '>=';
Equals_ : '==';
NotEquals : '!=';
IdentityEquals : '===';
IdentityNotEquals : '!==';
BitAnd : '&';
BitXOr : '^';
BitOr : '|';
And : '&&';
Or : '||';
MultiplyAssign : '*=';
DivideAssign : '/=';
ModulusAssign : '%=';
PlusAssign : '+=';
MinusAssign : '-=';
LeftShiftArithmeticAssign : '<<=';
RightShiftArithmeticAssign : '>>=';
RightShiftLogicalAssign : '>>>=';
BitAndAssign : '&=';
BitXorAssign : '^=';
BitOrAssign : '|=';
PowerAssign : '**=';
NullishCoalescingAssign : '??=';
ARROW : '=>';

/// Null Literals

NullLiteral: 'null';

/// Boolean Literals

BooleanLiteral: 'true' | 'false';

/// Numeric Literals

DecimalLiteral:
DecimalIntegerLiteral '.' [0-9] [0-9_]* ExponentPart?
| '.' [0-9] [0-9_]* ExponentPart?
| DecimalIntegerLiteral ExponentPart?
;

/// Numeric Literals

HexIntegerLiteral : '0' [xX] [0-9a-fA-F] HexDigit*;
OctalIntegerLiteral : '0' [0-7]+ {!this.IsStrictMode()}?;
OctalIntegerLiteral2 : '0' [oO] [0-7] [_0-7]*;
BinaryIntegerLiteral : '0' [bB] [01] [_01]*;

BigHexIntegerLiteral : '0' [xX] [0-9a-fA-F] HexDigit* 'n';
BigOctalIntegerLiteral : '0' [oO] [0-7] [_0-7]* 'n';
BigBinaryIntegerLiteral : '0' [bB] [01] [_01]* 'n';
BigDecimalIntegerLiteral : DecimalIntegerLiteral 'n';

/// Keywords

Break : 'break';
Do : 'do';
Instanceof : 'instanceof';
Typeof : 'typeof';
Case : 'case';
Else : 'else';
New : 'new';
Var : 'var';
Catch : 'catch';
Finally : 'finally';
Return : 'return';
Void : 'void';
Continue : 'continue';
For : 'for';
Switch : 'switch';
While : 'while';
Debugger : 'debugger';
Function_ : 'function';
This : 'this';
With : 'with';
Default : 'default';
If : 'if';
Throw : 'throw';
Delete : 'delete';
In : 'in';
Try : 'try';
As : 'as';
From : 'from';
Of : 'of';
Yield : 'yield';
YieldStar : 'yield*';

/// Future Reserved Words

Class : 'class';
Enum : 'enum';
Extends : 'extends';
Super : 'super';
Const : 'const';
Export : 'export';
Import : 'import';

Async : 'async';
Await : 'await';

/// The following tokens are also considered to be FutureReservedWords
/// when parsing strict mode

Implements : 'implements' {this.IsStrictMode()}?;
StrictLet : 'let' {this.IsStrictMode()}?;
NonStrictLet : 'let' {!this.IsStrictMode()}?;
Private : 'private' {this.IsStrictMode()}?;
Public : 'public' {this.IsStrictMode()}?;
Interface : 'interface' {this.IsStrictMode()}?;
Package : 'package' {this.IsStrictMode()}?;
Protected : 'protected' {this.IsStrictMode()}?;
Static : 'static' {this.IsStrictMode()}?;

/// Identifier Names and Identifiers

Identifier: IdentifierStart IdentifierPart*;
/// String Literals
StringLiteral:
('"' DoubleStringCharacter* '"' | '\'' SingleStringCharacter* '\'') {this.ProcessStringLiteral();}
;

BackTick: '`' -> pushMode(TEMPLATE);

WhiteSpaces: [\t\u000B\u000C\u0020\u00A0]+ -> channel(HIDDEN);

LineTerminator: [\r\n\u2028\u2029] -> channel(HIDDEN);

/// Comments

HtmlComment : '<!--' .*? '-->' -> channel(HIDDEN);
CDataComment : '<![CDATA[' .*? ']]>' -> channel(HIDDEN);
UnexpectedCharacter : . -> channel(ERROR);

mode TEMPLATE;

BackTickInside : '`' -> type(BackTick), popMode;
TemplateStringStartExpression : '${' {this.ProcessTemplateOpenBrace();} -> pushMode(DEFAULT_MODE);
TemplateStringAtom : ~[`];

// Fragment rules

fragment DoubleStringCharacter: ~["\\\r\n] | '\\' EscapeSequence | LineContinuation;
fragment SingleStringCharacter: ~['\\\r\n] | '\\' EscapeSequence | LineContinuation;
fragment EscapeSequence:
CharacterEscapeSequence
| '0' // no digit ahead! TODO
| HexEscapeSequence
| UnicodeEscapeSequence
| ExtendedUnicodeEscapeSequence
;
fragment CharacterEscapeSequence: SingleEscapeCharacter | NonEscapeCharacter;
fragment HexEscapeSequence: 'x' HexDigit HexDigit;
fragment UnicodeEscapeSequence:
'u' HexDigit HexDigit HexDigit HexDigit
| 'u' '{' HexDigit HexDigit+ '}'
;
fragment ExtendedUnicodeEscapeSequence: 'u' '{' HexDigit+ '}';
fragment SingleEscapeCharacter: ['"\\bfnrtv];

fragment NonEscapeCharacter: ~['"\\bfnrtv0-9xu\r\n];
fragment EscapeCharacter: SingleEscapeCharacter | [0-9] | [xu];
fragment LineContinuation: '\\' [\r\n\u2028\u2029]+;
fragment HexDigit: [_0-9a-fA-F];
fragment DecimalIntegerLiteral: '0' | [1-9] [0-9_]*;
fragment ExponentPart: [eE] [+-]? [0-9_]+;
fragment IdentifierPart: IdentifierStart | [\p{Mn}] | [\p{Nd}] | [\p{Pc}] | '\u200C' | '\u200D';
fragment IdentifierStart: [\p{L}] | [$_] | '\\' UnicodeEscapeSequence;
fragment RegularExpressionFirstChar:
~[*\r\n\u2028\u2029\\/[]
| RegularExpressionBackslashSequence
| '[' RegularExpressionClassChar* ']'
;
fragment RegularExpressionChar:
~[\r\n\u2028\u2029\\/[]
| RegularExpressionBackslashSequence
| '[' RegularExpressionClassChar* ']'
;
fragment RegularExpressionClassChar: ~[\r\n\u2028\u2029\]\\] | RegularExpressionBackslashSequence;
fragment RegularExpressionBackslashSequence: '\\' ~[\r\n\u2028\u2029];
Loading

0 comments on commit 3b2ad77

Please sign in to comment.