361 lines
10 KiB
ANTLR
361 lines
10 KiB
ANTLR
/*
|
|
* [The "BSD license"]
|
|
* Copyright (c) 2014 Terence Parr
|
|
* Copyright (c) 2014 Sam Harwell
|
|
* Copyright (c) 2017 Chan Chung Kwong
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
*
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* 3. The name of the author may not be used to endorse or promote products
|
|
* derived from this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
|
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
|
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
|
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
/**
|
|
* A Java 9 grammar for ANTLR 4 derived from the Java Language Specification
|
|
* chapter 19.
|
|
*
|
|
* NOTE: This grammar results in a generated parser that is much slower
|
|
* than the Java 7 grammar in the grammars-v4/java directory. This
|
|
* one is, however, extremely close to the spec.
|
|
*
|
|
* You can test with
|
|
*
|
|
* $ antlr4 Java9.g4
|
|
* $ javac *.java
|
|
* $ grun Java9 compilationUnit *.java
|
|
*
|
|
* Or,
|
|
~/antlr/code/grammars-v4/java9 $ java Test .
|
|
/Users/parrt/antlr/code/grammars-v4/java9/./Java9BaseListener.java
|
|
/Users/parrt/antlr/code/grammars-v4/java9/./Java9Lexer.java
|
|
/Users/parrt/antlr/code/grammars-v4/java9/./Java9Listener.java
|
|
/Users/parrt/antlr/code/grammars-v4/java9/./Java9Parser.java
|
|
/Users/parrt/antlr/code/grammars-v4/java9/./Test.java
|
|
Total lexer+parser time 30844ms.
|
|
~/antlr/code/grammars-v4/java9 $ java Test examples/module-info.java
|
|
/home/kwong/projects/grammars-v4/java9/examples/module-info.java
|
|
Total lexer+parser time 914ms.
|
|
~/antlr/code/grammars-v4/java9 $ java Test examples/TryWithResourceDemo.java
|
|
/home/kwong/projects/grammars-v4/java9/examples/TryWithResourceDemo.java
|
|
Total lexer+parser time 3634ms.
|
|
~/antlr/code/grammars-v4/java9 $ java Test examples/helloworld.java
|
|
/home/kwong/projects/grammars-v4/java9/examples/helloworld.java
|
|
Total lexer+parser time 2497ms.
|
|
|
|
*/
|
|
|
|
// $antlr-format alignTrailingComments true, columnLimit 150, maxEmptyLinesToKeep 1, reflowComments false, useTab false
|
|
// $antlr-format allowShortRulesOnASingleLine true, allowShortBlocksOnASingleLine true, minEmptyLines 0, alignSemicolons ownLine
|
|
// $antlr-format alignColons trailing, singleLineOverrulesHangingColon true, alignLexerCommands true, alignLabels true, alignTrailers true
|
|
|
|
lexer grammar Java9Lexer;
|
|
|
|
options
|
|
{
|
|
superClass = Java9LexerBase;
|
|
}
|
|
|
|
// LEXER
|
|
|
|
// §3.9 Keywords
|
|
|
|
ABSTRACT : 'abstract';
|
|
ASSERT : 'assert';
|
|
BOOLEAN : 'boolean';
|
|
BREAK : 'break';
|
|
BYTE : 'byte';
|
|
CASE : 'case';
|
|
CATCH : 'catch';
|
|
CHAR : 'char';
|
|
CLASS : 'class';
|
|
CONST : 'const';
|
|
CONTINUE : 'continue';
|
|
DEFAULT : 'default';
|
|
DO : 'do';
|
|
DOUBLE : 'double';
|
|
ELSE : 'else';
|
|
ENUM : 'enum';
|
|
EXPORTS : 'exports';
|
|
EXTENDS : 'extends';
|
|
FINAL : 'final';
|
|
FINALLY : 'finally';
|
|
FLOAT : 'float';
|
|
FOR : 'for';
|
|
IF : 'if';
|
|
GOTO : 'goto';
|
|
IMPLEMENTS : 'implements';
|
|
IMPORT : 'import';
|
|
INSTANCEOF : 'instanceof';
|
|
INT : 'int';
|
|
INTERFACE : 'interface';
|
|
LONG : 'long';
|
|
MODULE : 'module';
|
|
NATIVE : 'native';
|
|
NEW : 'new';
|
|
OPEN : 'open';
|
|
OPERNS : 'opens';
|
|
PACKAGE : 'package';
|
|
PRIVATE : 'private';
|
|
PROTECTED : 'protected';
|
|
PROVIDES : 'provides';
|
|
PUBLIC : 'public';
|
|
REQUIRES : 'requires';
|
|
RETURN : 'return';
|
|
SHORT : 'short';
|
|
STATIC : 'static';
|
|
STRICTFP : 'strictfp';
|
|
SUPER : 'super';
|
|
SWITCH : 'switch';
|
|
SYNCHRONIZED : 'synchronized';
|
|
THIS : 'this';
|
|
THROW : 'throw';
|
|
THROWS : 'throws';
|
|
TO : 'to';
|
|
TRANSIENT : 'transient';
|
|
TRANSITIVE : 'transitive';
|
|
TRY : 'try';
|
|
USES : 'uses';
|
|
VOID : 'void';
|
|
VOLATILE : 'volatile';
|
|
WHILE : 'while';
|
|
WITH : 'with';
|
|
UNDER_SCORE : '_'; //Introduced in Java 9
|
|
|
|
// §3.10.1 Integer Literals
|
|
|
|
IntegerLiteral:
|
|
DecimalIntegerLiteral
|
|
| HexIntegerLiteral
|
|
| OctalIntegerLiteral
|
|
| BinaryIntegerLiteral
|
|
;
|
|
|
|
fragment DecimalIntegerLiteral: DecimalNumeral IntegerTypeSuffix?;
|
|
|
|
fragment HexIntegerLiteral: HexNumeral IntegerTypeSuffix?;
|
|
|
|
fragment OctalIntegerLiteral: OctalNumeral IntegerTypeSuffix?;
|
|
|
|
fragment BinaryIntegerLiteral: BinaryNumeral IntegerTypeSuffix?;
|
|
|
|
fragment IntegerTypeSuffix: [lL];
|
|
|
|
fragment DecimalNumeral: '0' | NonZeroDigit (Digits? | Underscores Digits);
|
|
|
|
fragment Digits: Digit (DigitsAndUnderscores? Digit)?;
|
|
|
|
fragment Digit: '0' | NonZeroDigit;
|
|
|
|
fragment NonZeroDigit: [1-9];
|
|
|
|
fragment DigitsAndUnderscores: DigitOrUnderscore+;
|
|
|
|
fragment DigitOrUnderscore: Digit | '_';
|
|
|
|
fragment Underscores: '_'+;
|
|
|
|
fragment HexNumeral: '0' [xX] HexDigits;
|
|
|
|
fragment HexDigits: HexDigit (HexDigitsAndUnderscores? HexDigit)?;
|
|
|
|
fragment HexDigit: [0-9a-fA-F];
|
|
|
|
fragment HexDigitsAndUnderscores: HexDigitOrUnderscore+;
|
|
|
|
fragment HexDigitOrUnderscore: HexDigit | '_';
|
|
|
|
fragment OctalNumeral: '0' Underscores? OctalDigits;
|
|
|
|
fragment OctalDigits: OctalDigit (OctalDigitsAndUnderscores? OctalDigit)?;
|
|
|
|
fragment OctalDigit: [0-7];
|
|
|
|
fragment OctalDigitsAndUnderscores: OctalDigitOrUnderscore+;
|
|
|
|
fragment OctalDigitOrUnderscore: OctalDigit | '_';
|
|
|
|
fragment BinaryNumeral: '0' [bB] BinaryDigits;
|
|
|
|
fragment BinaryDigits: BinaryDigit (BinaryDigitsAndUnderscores? BinaryDigit)?;
|
|
|
|
fragment BinaryDigit: [01];
|
|
|
|
fragment BinaryDigitsAndUnderscores: BinaryDigitOrUnderscore+;
|
|
|
|
fragment BinaryDigitOrUnderscore: BinaryDigit | '_';
|
|
|
|
// §3.10.2 Floating-Point Literals
|
|
|
|
FloatingPointLiteral: DecimalFloatingPointLiteral | HexadecimalFloatingPointLiteral;
|
|
|
|
fragment DecimalFloatingPointLiteral:
|
|
Digits '.' Digits? ExponentPart? FloatTypeSuffix?
|
|
| '.' Digits ExponentPart? FloatTypeSuffix?
|
|
| Digits ExponentPart FloatTypeSuffix?
|
|
| Digits FloatTypeSuffix
|
|
;
|
|
|
|
fragment ExponentPart: ExponentIndicator SignedInteger;
|
|
|
|
fragment ExponentIndicator: [eE];
|
|
|
|
fragment SignedInteger: Sign? Digits;
|
|
|
|
fragment Sign: [+-];
|
|
|
|
fragment FloatTypeSuffix: [fFdD];
|
|
|
|
fragment HexadecimalFloatingPointLiteral: HexSignificand BinaryExponent FloatTypeSuffix?;
|
|
|
|
fragment HexSignificand: HexNumeral '.'? | '0' [xX] HexDigits? '.' HexDigits;
|
|
|
|
fragment BinaryExponent: BinaryExponentIndicator SignedInteger;
|
|
|
|
fragment BinaryExponentIndicator: [pP];
|
|
|
|
// §3.10.3 Boolean Literals
|
|
|
|
BooleanLiteral: 'true' | 'false';
|
|
|
|
// §3.10.4 Character Literals
|
|
|
|
CharacterLiteral: '\'' SingleCharacter '\'' | '\'' EscapeSequence '\'';
|
|
|
|
fragment SingleCharacter: ~['\\\r\n];
|
|
|
|
// §3.10.5 String Literals
|
|
|
|
StringLiteral: '"' StringCharacters? '"';
|
|
|
|
fragment StringCharacters: StringCharacter+;
|
|
|
|
fragment StringCharacter: ~["\\\r\n] | EscapeSequence;
|
|
|
|
// §3.10.6 Escape Sequences for Character and String Literals
|
|
|
|
fragment EscapeSequence:
|
|
'\\' 'u005c'? [btnfr"'\\]
|
|
| OctalEscape
|
|
| UnicodeEscape // This is not in the spec but prevents having to preprocess the input
|
|
;
|
|
|
|
fragment OctalEscape:
|
|
'\\' 'u005c'? OctalDigit
|
|
| '\\' 'u005c'? OctalDigit OctalDigit
|
|
| '\\' 'u005c'? ZeroToThree OctalDigit OctalDigit
|
|
;
|
|
|
|
fragment ZeroToThree: [0-3];
|
|
|
|
// This is not in the spec but prevents having to preprocess the input
|
|
fragment UnicodeEscape: '\\' 'u'+ HexDigit HexDigit HexDigit HexDigit;
|
|
|
|
// §3.10.7 The Null Literal
|
|
|
|
NullLiteral: 'null';
|
|
|
|
// §3.11 Separators
|
|
|
|
LPAREN : '(';
|
|
RPAREN : ')';
|
|
LBRACE : '{';
|
|
RBRACE : '}';
|
|
LBRACK : '[';
|
|
RBRACK : ']';
|
|
SEMI : ';';
|
|
COMMA : ',';
|
|
DOT : '.';
|
|
ELLIPSIS : '...';
|
|
AT : '@';
|
|
COLONCOLON : '::';
|
|
|
|
// §3.12 Operators
|
|
|
|
ASSIGN : '=';
|
|
GT : '>';
|
|
LT : '<';
|
|
BANG : '!';
|
|
TILDE : '~';
|
|
QUESTION : '?';
|
|
COLON : ':';
|
|
ARROW : '->';
|
|
EQUAL : '==';
|
|
LE : '<=';
|
|
GE : '>=';
|
|
NOTEQUAL : '!=';
|
|
AND : '&&';
|
|
OR : '||';
|
|
INC : '++';
|
|
DEC : '--';
|
|
ADD : '+';
|
|
SUB : '-';
|
|
MUL : '*';
|
|
DIV : '/';
|
|
BITAND : '&';
|
|
BITOR : '|';
|
|
CARET : '^';
|
|
MOD : '%';
|
|
//LSHIFT : '<<';
|
|
//RSHIFT : '>>';
|
|
//URSHIFT : '>>>';
|
|
|
|
ADD_ASSIGN : '+=';
|
|
SUB_ASSIGN : '-=';
|
|
MUL_ASSIGN : '*=';
|
|
DIV_ASSIGN : '/=';
|
|
AND_ASSIGN : '&=';
|
|
OR_ASSIGN : '|=';
|
|
XOR_ASSIGN : '^=';
|
|
MOD_ASSIGN : '%=';
|
|
LSHIFT_ASSIGN : '<<=';
|
|
RSHIFT_ASSIGN : '>>=';
|
|
URSHIFT_ASSIGN : '>>>=';
|
|
|
|
// §3.8 Identifiers (must appear after all keywords in the grammar)
|
|
|
|
Identifier: JavaLetter JavaLetterOrDigit*;
|
|
|
|
fragment JavaLetter:
|
|
[a-zA-Z$_] // these are the "java letters" below 0x7F
|
|
| // covers all characters above 0x7F which are not a surrogate
|
|
~[\u0000-\u007F\uD800-\uDBFF] { Check1() }?
|
|
| // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF
|
|
[\uD800-\uDBFF] [\uDC00-\uDFFF] { Check2() }?
|
|
;
|
|
|
|
fragment JavaLetterOrDigit:
|
|
[a-zA-Z0-9$_] // these are the "java letters or digits" below 0x7F
|
|
| // covers all characters above 0x7F which are not a surrogate
|
|
~[\u0000-\u007F\uD800-\uDBFF] { Check3() }?
|
|
| // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF
|
|
[\uD800-\uDBFF] [\uDC00-\uDFFF] { Check4() }?
|
|
;
|
|
|
|
//
|
|
// Whitespace and comments
|
|
//
|
|
|
|
WS: [ \t\r\n\u000C]+ -> skip;
|
|
|
|
COMMENT: '/*' .*? '*/' -> channel(HIDDEN);
|
|
|
|
LINE_COMMENT: '//' ~[\r\n]* -> channel(HIDDEN); |