/* * [The "BSD license"] * Copyright (c) 2014 Terence Parr * Copyright (c) 2014 Sam Harwell * Copyright (c) 2017 Chan Chung Kwong * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /** * A Java 9 grammar for ANTLR 4 derived from the Java Language Specification * chapter 19. * * NOTE: This grammar results in a generated parser that is much slower * than the Java 7 grammar in the grammars-v4/java directory. This * one is, however, extremely close to the spec. * * You can test with * * $ antlr4 Java9.g4 * $ javac *.java * $ grun Java9 compilationUnit *.java * * Or, ~/antlr/code/grammars-v4/java9 $ java Test . /Users/parrt/antlr/code/grammars-v4/java9/./Java9BaseListener.java /Users/parrt/antlr/code/grammars-v4/java9/./Java9Lexer.java /Users/parrt/antlr/code/grammars-v4/java9/./Java9Listener.java /Users/parrt/antlr/code/grammars-v4/java9/./Java9Parser.java /Users/parrt/antlr/code/grammars-v4/java9/./Test.java Total lexer+parser time 30844ms. ~/antlr/code/grammars-v4/java9 $ java Test examples/module-info.java /home/kwong/projects/grammars-v4/java9/examples/module-info.java Total lexer+parser time 914ms. ~/antlr/code/grammars-v4/java9 $ java Test examples/TryWithResourceDemo.java /home/kwong/projects/grammars-v4/java9/examples/TryWithResourceDemo.java Total lexer+parser time 3634ms. ~/antlr/code/grammars-v4/java9 $ java Test examples/helloworld.java /home/kwong/projects/grammars-v4/java9/examples/helloworld.java Total lexer+parser time 2497ms. */ // $antlr-format alignTrailingComments true, columnLimit 150, maxEmptyLinesToKeep 1, reflowComments false, useTab false // $antlr-format allowShortRulesOnASingleLine true, allowShortBlocksOnASingleLine true, minEmptyLines 0, alignSemicolons ownLine // $antlr-format alignColons trailing, singleLineOverrulesHangingColon true, alignLexerCommands true, alignLabels true, alignTrailers true lexer grammar Java9Lexer; options { superClass = Java9LexerBase; } // LEXER // §3.9 Keywords ABSTRACT : 'abstract'; ASSERT : 'assert'; BOOLEAN : 'boolean'; BREAK : 'break'; BYTE : 'byte'; CASE : 'case'; CATCH : 'catch'; CHAR : 'char'; CLASS : 'class'; CONST : 'const'; CONTINUE : 'continue'; DEFAULT : 'default'; DO : 'do'; DOUBLE : 'double'; ELSE : 'else'; ENUM : 'enum'; EXPORTS : 'exports'; EXTENDS : 'extends'; FINAL : 'final'; FINALLY : 'finally'; FLOAT : 'float'; FOR : 'for'; IF : 'if'; GOTO : 'goto'; IMPLEMENTS : 'implements'; IMPORT : 'import'; INSTANCEOF : 'instanceof'; INT : 'int'; INTERFACE : 'interface'; LONG : 'long'; MODULE : 'module'; NATIVE : 'native'; NEW : 'new'; OPEN : 'open'; OPERNS : 'opens'; PACKAGE : 'package'; PRIVATE : 'private'; PROTECTED : 'protected'; PROVIDES : 'provides'; PUBLIC : 'public'; REQUIRES : 'requires'; RETURN : 'return'; SHORT : 'short'; STATIC : 'static'; STRICTFP : 'strictfp'; SUPER : 'super'; SWITCH : 'switch'; SYNCHRONIZED : 'synchronized'; THIS : 'this'; THROW : 'throw'; THROWS : 'throws'; TO : 'to'; TRANSIENT : 'transient'; TRANSITIVE : 'transitive'; TRY : 'try'; USES : 'uses'; VOID : 'void'; VOLATILE : 'volatile'; WHILE : 'while'; WITH : 'with'; UNDER_SCORE : '_'; //Introduced in Java 9 // §3.10.1 Integer Literals IntegerLiteral: DecimalIntegerLiteral | HexIntegerLiteral | OctalIntegerLiteral | BinaryIntegerLiteral ; fragment DecimalIntegerLiteral: DecimalNumeral IntegerTypeSuffix?; fragment HexIntegerLiteral: HexNumeral IntegerTypeSuffix?; fragment OctalIntegerLiteral: OctalNumeral IntegerTypeSuffix?; fragment BinaryIntegerLiteral: BinaryNumeral IntegerTypeSuffix?; fragment IntegerTypeSuffix: [lL]; fragment DecimalNumeral: '0' | NonZeroDigit (Digits? | Underscores Digits); fragment Digits: Digit (DigitsAndUnderscores? Digit)?; fragment Digit: '0' | NonZeroDigit; fragment NonZeroDigit: [1-9]; fragment DigitsAndUnderscores: DigitOrUnderscore+; fragment DigitOrUnderscore: Digit | '_'; fragment Underscores: '_'+; fragment HexNumeral: '0' [xX] HexDigits; fragment HexDigits: HexDigit (HexDigitsAndUnderscores? HexDigit)?; fragment HexDigit: [0-9a-fA-F]; fragment HexDigitsAndUnderscores: HexDigitOrUnderscore+; fragment HexDigitOrUnderscore: HexDigit | '_'; fragment OctalNumeral: '0' Underscores? OctalDigits; fragment OctalDigits: OctalDigit (OctalDigitsAndUnderscores? OctalDigit)?; fragment OctalDigit: [0-7]; fragment OctalDigitsAndUnderscores: OctalDigitOrUnderscore+; fragment OctalDigitOrUnderscore: OctalDigit | '_'; fragment BinaryNumeral: '0' [bB] BinaryDigits; fragment BinaryDigits: BinaryDigit (BinaryDigitsAndUnderscores? BinaryDigit)?; fragment BinaryDigit: [01]; fragment BinaryDigitsAndUnderscores: BinaryDigitOrUnderscore+; fragment BinaryDigitOrUnderscore: BinaryDigit | '_'; // §3.10.2 Floating-Point Literals FloatingPointLiteral: DecimalFloatingPointLiteral | HexadecimalFloatingPointLiteral; fragment DecimalFloatingPointLiteral: Digits '.' Digits? ExponentPart? FloatTypeSuffix? | '.' Digits ExponentPart? FloatTypeSuffix? | Digits ExponentPart FloatTypeSuffix? | Digits FloatTypeSuffix ; fragment ExponentPart: ExponentIndicator SignedInteger; fragment ExponentIndicator: [eE]; fragment SignedInteger: Sign? Digits; fragment Sign: [+-]; fragment FloatTypeSuffix: [fFdD]; fragment HexadecimalFloatingPointLiteral: HexSignificand BinaryExponent FloatTypeSuffix?; fragment HexSignificand: HexNumeral '.'? | '0' [xX] HexDigits? '.' HexDigits; fragment BinaryExponent: BinaryExponentIndicator SignedInteger; fragment BinaryExponentIndicator: [pP]; // §3.10.3 Boolean Literals BooleanLiteral: 'true' | 'false'; // §3.10.4 Character Literals CharacterLiteral: '\'' SingleCharacter '\'' | '\'' EscapeSequence '\''; fragment SingleCharacter: ~['\\\r\n]; // §3.10.5 String Literals StringLiteral: '"' StringCharacters? '"'; fragment StringCharacters: StringCharacter+; fragment StringCharacter: ~["\\\r\n] | EscapeSequence; // §3.10.6 Escape Sequences for Character and String Literals fragment EscapeSequence: '\\' 'u005c'? [btnfr"'\\] | OctalEscape | UnicodeEscape // This is not in the spec but prevents having to preprocess the input ; fragment OctalEscape: '\\' 'u005c'? OctalDigit | '\\' 'u005c'? OctalDigit OctalDigit | '\\' 'u005c'? ZeroToThree OctalDigit OctalDigit ; fragment ZeroToThree: [0-3]; // This is not in the spec but prevents having to preprocess the input fragment UnicodeEscape: '\\' 'u'+ HexDigit HexDigit HexDigit HexDigit; // §3.10.7 The Null Literal NullLiteral: 'null'; // §3.11 Separators LPAREN : '('; RPAREN : ')'; LBRACE : '{'; RBRACE : '}'; LBRACK : '['; RBRACK : ']'; SEMI : ';'; COMMA : ','; DOT : '.'; ELLIPSIS : '...'; AT : '@'; COLONCOLON : '::'; // §3.12 Operators ASSIGN : '='; GT : '>'; LT : '<'; BANG : '!'; TILDE : '~'; QUESTION : '?'; COLON : ':'; ARROW : '->'; EQUAL : '=='; LE : '<='; GE : '>='; NOTEQUAL : '!='; AND : '&&'; OR : '||'; INC : '++'; DEC : '--'; ADD : '+'; SUB : '-'; MUL : '*'; DIV : '/'; BITAND : '&'; BITOR : '|'; CARET : '^'; MOD : '%'; //LSHIFT : '<<'; //RSHIFT : '>>'; //URSHIFT : '>>>'; ADD_ASSIGN : '+='; SUB_ASSIGN : '-='; MUL_ASSIGN : '*='; DIV_ASSIGN : '/='; AND_ASSIGN : '&='; OR_ASSIGN : '|='; XOR_ASSIGN : '^='; MOD_ASSIGN : '%='; LSHIFT_ASSIGN : '<<='; RSHIFT_ASSIGN : '>>='; URSHIFT_ASSIGN : '>>>='; // §3.8 Identifiers (must appear after all keywords in the grammar) Identifier: JavaLetter JavaLetterOrDigit*; fragment JavaLetter: [a-zA-Z$_] // these are the "java letters" below 0x7F | // covers all characters above 0x7F which are not a surrogate ~[\u0000-\u007F\uD800-\uDBFF] { Check1() }? | // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF [\uD800-\uDBFF] [\uDC00-\uDFFF] { Check2() }? ; fragment JavaLetterOrDigit: [a-zA-Z0-9$_] // these are the "java letters or digits" below 0x7F | // covers all characters above 0x7F which are not a surrogate ~[\u0000-\u007F\uD800-\uDBFF] { Check3() }? | // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF [\uD800-\uDBFF] [\uDC00-\uDFFF] { Check4() }? ; // // Whitespace and comments // WS: [ \t\r\n\u000C]+ -> skip; COMMENT: '/*' .*? '*/' -> channel(HIDDEN); LINE_COMMENT: '//' ~[\r\n]* -> channel(HIDDEN);