Files
ironjava/IronJava.Core/Grammar/Java9Lexer.g4
2025-07-21 22:29:22 -04:00

361 lines
10 KiB
ANTLR

/*
* [The "BSD license"]
* Copyright (c) 2014 Terence Parr
* Copyright (c) 2014 Sam Harwell
* Copyright (c) 2017 Chan Chung Kwong
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
* A Java 9 grammar for ANTLR 4 derived from the Java Language Specification
* chapter 19.
*
* NOTE: This grammar results in a generated parser that is much slower
* than the Java 7 grammar in the grammars-v4/java directory. This
* one is, however, extremely close to the spec.
*
* You can test with
*
* $ antlr4 Java9.g4
* $ javac *.java
* $ grun Java9 compilationUnit *.java
*
* Or,
~/antlr/code/grammars-v4/java9 $ java Test .
/Users/parrt/antlr/code/grammars-v4/java9/./Java9BaseListener.java
/Users/parrt/antlr/code/grammars-v4/java9/./Java9Lexer.java
/Users/parrt/antlr/code/grammars-v4/java9/./Java9Listener.java
/Users/parrt/antlr/code/grammars-v4/java9/./Java9Parser.java
/Users/parrt/antlr/code/grammars-v4/java9/./Test.java
Total lexer+parser time 30844ms.
~/antlr/code/grammars-v4/java9 $ java Test examples/module-info.java
/home/kwong/projects/grammars-v4/java9/examples/module-info.java
Total lexer+parser time 914ms.
~/antlr/code/grammars-v4/java9 $ java Test examples/TryWithResourceDemo.java
/home/kwong/projects/grammars-v4/java9/examples/TryWithResourceDemo.java
Total lexer+parser time 3634ms.
~/antlr/code/grammars-v4/java9 $ java Test examples/helloworld.java
/home/kwong/projects/grammars-v4/java9/examples/helloworld.java
Total lexer+parser time 2497ms.
*/
// $antlr-format alignTrailingComments true, columnLimit 150, maxEmptyLinesToKeep 1, reflowComments false, useTab false
// $antlr-format allowShortRulesOnASingleLine true, allowShortBlocksOnASingleLine true, minEmptyLines 0, alignSemicolons ownLine
// $antlr-format alignColons trailing, singleLineOverrulesHangingColon true, alignLexerCommands true, alignLabels true, alignTrailers true
lexer grammar Java9Lexer;
options
{
superClass = Java9LexerBase;
}
// LEXER
// §3.9 Keywords
ABSTRACT : 'abstract';
ASSERT : 'assert';
BOOLEAN : 'boolean';
BREAK : 'break';
BYTE : 'byte';
CASE : 'case';
CATCH : 'catch';
CHAR : 'char';
CLASS : 'class';
CONST : 'const';
CONTINUE : 'continue';
DEFAULT : 'default';
DO : 'do';
DOUBLE : 'double';
ELSE : 'else';
ENUM : 'enum';
EXPORTS : 'exports';
EXTENDS : 'extends';
FINAL : 'final';
FINALLY : 'finally';
FLOAT : 'float';
FOR : 'for';
IF : 'if';
GOTO : 'goto';
IMPLEMENTS : 'implements';
IMPORT : 'import';
INSTANCEOF : 'instanceof';
INT : 'int';
INTERFACE : 'interface';
LONG : 'long';
MODULE : 'module';
NATIVE : 'native';
NEW : 'new';
OPEN : 'open';
OPERNS : 'opens';
PACKAGE : 'package';
PRIVATE : 'private';
PROTECTED : 'protected';
PROVIDES : 'provides';
PUBLIC : 'public';
REQUIRES : 'requires';
RETURN : 'return';
SHORT : 'short';
STATIC : 'static';
STRICTFP : 'strictfp';
SUPER : 'super';
SWITCH : 'switch';
SYNCHRONIZED : 'synchronized';
THIS : 'this';
THROW : 'throw';
THROWS : 'throws';
TO : 'to';
TRANSIENT : 'transient';
TRANSITIVE : 'transitive';
TRY : 'try';
USES : 'uses';
VOID : 'void';
VOLATILE : 'volatile';
WHILE : 'while';
WITH : 'with';
UNDER_SCORE : '_'; //Introduced in Java 9
// §3.10.1 Integer Literals
IntegerLiteral:
DecimalIntegerLiteral
| HexIntegerLiteral
| OctalIntegerLiteral
| BinaryIntegerLiteral
;
fragment DecimalIntegerLiteral: DecimalNumeral IntegerTypeSuffix?;
fragment HexIntegerLiteral: HexNumeral IntegerTypeSuffix?;
fragment OctalIntegerLiteral: OctalNumeral IntegerTypeSuffix?;
fragment BinaryIntegerLiteral: BinaryNumeral IntegerTypeSuffix?;
fragment IntegerTypeSuffix: [lL];
fragment DecimalNumeral: '0' | NonZeroDigit (Digits? | Underscores Digits);
fragment Digits: Digit (DigitsAndUnderscores? Digit)?;
fragment Digit: '0' | NonZeroDigit;
fragment NonZeroDigit: [1-9];
fragment DigitsAndUnderscores: DigitOrUnderscore+;
fragment DigitOrUnderscore: Digit | '_';
fragment Underscores: '_'+;
fragment HexNumeral: '0' [xX] HexDigits;
fragment HexDigits: HexDigit (HexDigitsAndUnderscores? HexDigit)?;
fragment HexDigit: [0-9a-fA-F];
fragment HexDigitsAndUnderscores: HexDigitOrUnderscore+;
fragment HexDigitOrUnderscore: HexDigit | '_';
fragment OctalNumeral: '0' Underscores? OctalDigits;
fragment OctalDigits: OctalDigit (OctalDigitsAndUnderscores? OctalDigit)?;
fragment OctalDigit: [0-7];
fragment OctalDigitsAndUnderscores: OctalDigitOrUnderscore+;
fragment OctalDigitOrUnderscore: OctalDigit | '_';
fragment BinaryNumeral: '0' [bB] BinaryDigits;
fragment BinaryDigits: BinaryDigit (BinaryDigitsAndUnderscores? BinaryDigit)?;
fragment BinaryDigit: [01];
fragment BinaryDigitsAndUnderscores: BinaryDigitOrUnderscore+;
fragment BinaryDigitOrUnderscore: BinaryDigit | '_';
// §3.10.2 Floating-Point Literals
FloatingPointLiteral: DecimalFloatingPointLiteral | HexadecimalFloatingPointLiteral;
fragment DecimalFloatingPointLiteral:
Digits '.' Digits? ExponentPart? FloatTypeSuffix?
| '.' Digits ExponentPart? FloatTypeSuffix?
| Digits ExponentPart FloatTypeSuffix?
| Digits FloatTypeSuffix
;
fragment ExponentPart: ExponentIndicator SignedInteger;
fragment ExponentIndicator: [eE];
fragment SignedInteger: Sign? Digits;
fragment Sign: [+-];
fragment FloatTypeSuffix: [fFdD];
fragment HexadecimalFloatingPointLiteral: HexSignificand BinaryExponent FloatTypeSuffix?;
fragment HexSignificand: HexNumeral '.'? | '0' [xX] HexDigits? '.' HexDigits;
fragment BinaryExponent: BinaryExponentIndicator SignedInteger;
fragment BinaryExponentIndicator: [pP];
// §3.10.3 Boolean Literals
BooleanLiteral: 'true' | 'false';
// §3.10.4 Character Literals
CharacterLiteral: '\'' SingleCharacter '\'' | '\'' EscapeSequence '\'';
fragment SingleCharacter: ~['\\\r\n];
// §3.10.5 String Literals
StringLiteral: '"' StringCharacters? '"';
fragment StringCharacters: StringCharacter+;
fragment StringCharacter: ~["\\\r\n] | EscapeSequence;
// §3.10.6 Escape Sequences for Character and String Literals
fragment EscapeSequence:
'\\' 'u005c'? [btnfr"'\\]
| OctalEscape
| UnicodeEscape // This is not in the spec but prevents having to preprocess the input
;
fragment OctalEscape:
'\\' 'u005c'? OctalDigit
| '\\' 'u005c'? OctalDigit OctalDigit
| '\\' 'u005c'? ZeroToThree OctalDigit OctalDigit
;
fragment ZeroToThree: [0-3];
// This is not in the spec but prevents having to preprocess the input
fragment UnicodeEscape: '\\' 'u'+ HexDigit HexDigit HexDigit HexDigit;
// §3.10.7 The Null Literal
NullLiteral: 'null';
// §3.11 Separators
LPAREN : '(';
RPAREN : ')';
LBRACE : '{';
RBRACE : '}';
LBRACK : '[';
RBRACK : ']';
SEMI : ';';
COMMA : ',';
DOT : '.';
ELLIPSIS : '...';
AT : '@';
COLONCOLON : '::';
// §3.12 Operators
ASSIGN : '=';
GT : '>';
LT : '<';
BANG : '!';
TILDE : '~';
QUESTION : '?';
COLON : ':';
ARROW : '->';
EQUAL : '==';
LE : '<=';
GE : '>=';
NOTEQUAL : '!=';
AND : '&&';
OR : '||';
INC : '++';
DEC : '--';
ADD : '+';
SUB : '-';
MUL : '*';
DIV : '/';
BITAND : '&';
BITOR : '|';
CARET : '^';
MOD : '%';
//LSHIFT : '<<';
//RSHIFT : '>>';
//URSHIFT : '>>>';
ADD_ASSIGN : '+=';
SUB_ASSIGN : '-=';
MUL_ASSIGN : '*=';
DIV_ASSIGN : '/=';
AND_ASSIGN : '&=';
OR_ASSIGN : '|=';
XOR_ASSIGN : '^=';
MOD_ASSIGN : '%=';
LSHIFT_ASSIGN : '<<=';
RSHIFT_ASSIGN : '>>=';
URSHIFT_ASSIGN : '>>>=';
// §3.8 Identifiers (must appear after all keywords in the grammar)
Identifier: JavaLetter JavaLetterOrDigit*;
fragment JavaLetter:
[a-zA-Z$_] // these are the "java letters" below 0x7F
| // covers all characters above 0x7F which are not a surrogate
~[\u0000-\u007F\uD800-\uDBFF] { Check1() }?
| // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF
[\uD800-\uDBFF] [\uDC00-\uDFFF] { Check2() }?
;
fragment JavaLetterOrDigit:
[a-zA-Z0-9$_] // these are the "java letters or digits" below 0x7F
| // covers all characters above 0x7F which are not a surrogate
~[\u0000-\u007F\uD800-\uDBFF] { Check3() }?
| // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF
[\uD800-\uDBFF] [\uDC00-\uDFFF] { Check4() }?
;
//
// Whitespace and comments
//
WS: [ \t\r\n\u000C]+ -> skip;
COMMENT: '/*' .*? '*/' -> channel(HIDDEN);
LINE_COMMENT: '//' ~[\r\n]* -> channel(HIDDEN);