diff --git a/src/main/antlr/PhpLexer.g4 b/src/main/antlr/PhpLexer.g4 new file mode 100644 index 00000000..02152399 --- /dev/null +++ b/src/main/antlr/PhpLexer.g4 @@ -0,0 +1,347 @@ +/* +PHP grammar. +The MIT License (MIT). +Copyright (c) 2015-2020, Ivan Kochurkin (kvanttt@gmail.com), Positive Technologies. +Copyright (c) 2019, Thierry Marianne (thierry.marianne@weaving-the-web.org) +Copyright (c) 2019-2020, Student Main for php7, php8 support. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +lexer grammar PhpLexer; + +channels { PhpComments, ErrorLexem, SkipChannel } + +options { + superClass=PhpLexerBase; +} + +SeaWhitespace: [ \t\r\n]+ -> channel(HIDDEN); +HtmlText: ~[<#]+; +XmlStart: ' pushMode(XML); +PHPStartEcho: PhpStartEchoFragment -> type(Echo), pushMode(PHP); +PHPStart: PhpStartFragment -> channel(SkipChannel), pushMode(PHP); +HtmlScriptOpen: ' pushMode(INSIDE); +HtmlStyleOpen: ' pushMode(INSIDE); +HtmlComment: '' -> channel(HIDDEN); +HtmlDtd: ''; +HtmlOpen: '<' -> pushMode(INSIDE); +Shebang + : '#' { this.IsNewLineOrStart(-2) }? '!' ~[\r\n]* + ; +NumberSign: '#' ~'<'* -> more; +Error: . -> channel(ErrorLexem); + +// TODO: parse xml attributes. +mode XML; + +XmlText: ~'?'+; +XmlClose: '?>' -> popMode; +XmlText2: '?' -> type(XmlText); + +mode INSIDE; + +PHPStartEchoInside: PhpStartEchoFragment -> type(Echo), pushMode(PHP); +PHPStartInside: PhpStartFragment -> channel(SkipChannel), pushMode(PHP); +HtmlClose: '>' { this.PushModeOnHtmlClose(); }; +HtmlSlashClose: '/>' -> popMode; +HtmlSlash: '/'; +HtmlEquals: '='; + +HtmlStartQuoteString: '\\'? '\'' -> pushMode(HtmlQuoteStringMode); +HtmlStartDoubleQuoteString: '\\'? '"' -> pushMode(HtmlDoubleQuoteStringMode); +HtmlHex: '#' HexDigit+ ; +HtmlDecimal: Digit+; +HtmlSpace: [ \t\r\n]+ -> channel(HIDDEN); +HtmlName: HtmlNameStartChar HtmlNameChar*; +ErrorInside: . -> channel(ErrorLexem); + +mode HtmlQuoteStringMode; + +PHPStartEchoInsideQuoteString: PhpStartEchoFragment -> type(Echo), pushMode(PHP); +PHPStartInsideQuoteString: PhpStartFragment -> channel(SkipChannel), pushMode(PHP); +HtmlEndQuoteString: '\'' '\''? -> popMode; +HtmlQuoteString: ~[<']+; +ErrorHtmlQuote: . -> channel(ErrorLexem); +mode HtmlDoubleQuoteStringMode; +PHPStartEchoDoubleQuoteString: PhpStartEchoFragment -> type(Echo), pushMode(PHP); +PHPStartDoubleQuoteString: PhpStartFragment -> channel(SkipChannel), pushMode(PHP); +HtmlEndDoubleQuoteString: '"' '"'? -> popMode; +HtmlDoubleQuoteString: ~[<"]+; +ErrorHtmlDoubleQuote: . -> channel(ErrorLexem); +// Parse JavaScript with https://github.com/antlr/grammars-v4/tree/master/javascript if necessary. +// Php blocks can exist inside Script blocks too. +mode SCRIPT; +ScriptText: ~'<'+; +// TODO: handle JS strings, but handle type(ScriptText); +//ScriptString2: '\'' (~'\'' | '\\' ('\r'? '\n' | .))* '\'' -> type(ScriptText); +HtmlScriptClose: '' -> popMode; +PHPStartInsideScriptEcho: PhpStartEchoFragment -> type(Echo), pushMode(PHP); +PHPStartInsideScript: PhpStartFragment -> channel(SkipChannel), pushMode(PHP); +ScriptText2: '<' -> type(ScriptText); +mode STYLE; +StyleBody: .*? '' -> popMode; +mode PHP; +PHPEnd: ('?' | '%' {this.HasAspTags()}?) '>' + | '' {this.HasPhpScriptTag()}?; +Whitespace: [ \t\r\n]+ -> channel(SkipChannel); +MultiLineComment: '/*' .*? '*/' -> channel(PhpComments); +SingleLineComment: '//' -> channel(SkipChannel), pushMode(SingleLineCommentMode); +ShellStyleComment: '#' -> channel(SkipChannel), pushMode(SingleLineCommentMode); +AttributeStart: '#['; +Abstract: 'abstract'; +Array: 'array'; +As: 'as'; +BinaryCast: 'binary'; +BoolType: 'bool' 'ean'?; +BooleanConstant: 'true' + | 'false'; +Break: 'break'; +Callable: 'callable'; +Case: 'case'; +Catch: 'catch'; +Class: 'class'; +Clone: 'clone'; +Const: 'const'; +Continue: 'continue'; +Declare: 'declare'; +Default: 'default'; +Do: 'do'; +DoubleCast: 'real'; +DoubleType: 'double'; +Echo: 'echo'; +Else: 'else'; +ElseIf: 'elseif'; +Empty: 'empty'; +EndDeclare: 'enddeclare'; +EndFor: 'endfor'; +EndForeach: 'endforeach'; +EndIf: 'endif'; +EndSwitch: 'endswitch'; +EndWhile: 'endwhile'; +Eval: 'eval'; +Exit: 'die'; +Extends: 'extends'; +Final: 'final'; +Finally: 'finally'; +FloatCast: 'float'; +For: 'for'; +Foreach: 'foreach'; +Function_: 'function'; +Global: 'global'; +Goto: 'goto'; +If: 'if'; +Implements: 'implements'; +Import: 'import'; +Include: 'include'; +IncludeOnce: 'include_once'; +InstanceOf: 'instanceof'; +InsteadOf: 'insteadof'; +Int8Cast: 'int8'; +Int16Cast: 'int16'; +Int64Type: 'int64'; +IntType: 'int' 'eger'?; +Interface: 'interface'; +IsSet: 'isset'; +List: 'list'; +LogicalAnd: 'and'; +LogicalOr: 'or'; +LogicalXor: 'xor'; +Match: 'match'; +Namespace: 'namespace'; +New: 'new'; +Null: 'null'; +ObjectType: 'object'; +Parent_: 'parent'; +Partial: 'partial'; +Print: 'print'; +Private: 'private'; +Protected: 'protected'; +Public: 'public'; +Require: 'require'; +RequireOnce: 'require_once'; +Resource: 'resource'; +Return: 'return'; +Static: 'static'; +StringType: 'string'; +Switch: 'switch'; +Throw: 'throw'; +Trait: 'trait'; +Try: 'try'; +Typeof: 'clrtypeof'; +UintCast: 'uint' ('8' | '16' | '64')?; +UnicodeCast: 'unicode'; +Unset: 'unset'; +Use: 'use'; +Var: 'var'; +While: 'while'; +Yield: 'yield'; +From: 'from'; +LambdaFn: 'fn'; +Get: '__get'; +Set: '__set'; +Call: '__call'; +CallStatic: '__callstatic'; +Constructor: '__construct'; +Destruct: '__destruct'; +Wakeup: '__wakeup'; +Sleep: '__sleep'; +Autoload: '__autoload'; +IsSet__: '__isset'; +Unset__: '__unset'; +ToString__: '__tostring'; +Invoke: '__invoke'; +SetState: '__set_state'; +Clone__: '__clone'; +DebugInfo: '__debuginfo'; +Namespace__: '__namespace__'; +Class__: '__class__'; +Traic__: '__trait__'; +Function__: '__function__'; +Method__: '__method__'; +Line__: '__line__'; +File__: '__file__'; +Dir__: '__dir__'; +Spaceship: '<=>'; +Lgeneric: '<:'; +Rgeneric: ':>'; +DoubleArrow: '=>'; +Inc: '++'; +Dec: '--'; +IsIdentical: '==='; +IsNoidentical: '!=='; +IsEqual: '=='; +IsNotEq: '<>' + | '!='; +IsSmallerOrEqual: '<='; +IsGreaterOrEqual: '>='; +PlusEqual: '+='; +MinusEqual: '-='; +MulEqual: '*='; +Pow: '**'; +PowEqual: '**='; +DivEqual: '/='; +Concaequal: '.='; +ModEqual: '%='; +ShiftLeftEqual: '<<='; +ShiftRightEqual: '>>='; +AndEqual: '&='; +OrEqual: '|='; +XorEqual: '^='; +BooleanOr: '||'; +BooleanAnd: '&&'; +NullCoalescing: '??'; +NullCoalescingEqual:'??='; +ShiftLeft: '<<'; +ShiftRight: '>>'; +DoubleColon: '::'; +ObjectOperator: '->'; +NamespaceSeparator: '\\'; +Ellipsis: '...'; +Less: '<'; +Greater: '>'; +Ampersand: '&'; +Pipe: '|'; +Bang: '!'; +Caret: '^'; +Plus: '+'; +Minus: '-'; +Asterisk: '*'; +Percent: '%'; +Divide: '/'; +Tilde: '~'; +SuppressWarnings: '@'; +Dollar: '$'; +Dot: '.'; +QuestionMark: '?'; +OpenRoundBracket: '('; +CloseRoundBracket: ')'; +OpenSquareBracket: '['; +CloseSquareBracket: ']'; +OpenCurlyBracket: '{'; +CloseCurlyBracket: '}' +{ this.PopModeOnCurlyBracketClose(); }; +Comma: ','; +Colon: ':'; +SemiColon: ';'; +Eq: '='; +Quote: '\''; +BackQuote: '`'; +VarName: '$' NameString; +Label: [a-z_][a-z_0-9]*; +Octal: '0' [0-7]+; +Decimal: '0' | NonZeroDigit Digit*; +Real: (Digit+ '.' Digit* | '.' Digit+) ExponentPart? + | Digit+ ExponentPart; +Hex: '0x' HexDigit+; +Binary: '0b' [01_]+; +BackQuoteString: '`' ~'`'* '`'; +SingleQuoteString: '\'' (~('\'' | '\\') | '\\' . )* '\''; +DoubleQuote: '"' -> pushMode(InterpolationString); +StartNowDoc + : '<<<' [ \t]* '\'' NameString '\'' { this.ShouldPushHereDocMode(1) }? -> pushMode(HereDoc) + ; +StartHereDoc + : '<<<' [ \t]* NameString { this.ShouldPushHereDocMode(1) }? -> pushMode(HereDoc) + ; +ErrorPhp: . -> channel(ErrorLexem); + +mode InterpolationString; + +VarNameInInterpolation: '$' NameString -> type(VarName); // TODO: fix such cases: "$people->john" +DollarString: '$' -> type(StringPart); +CurlyDollar: '{' { this.IsCurlyDollar(1) }? { this.SetInsideString(); } -> channel(SkipChannel), pushMode(PHP); +CurlyString: '{' -> type(StringPart); +EscapedChar: '\\' . -> type(StringPart); +DoubleQuoteInInterpolation: '"' -> type(DoubleQuote), popMode; +UnicodeEscape: '\\u{' [a-zA-Z0-9][a-zA-Z0-9]+ '}'; +StringPart: ~[${\\"]+; +mode SingleLineCommentMode; +Comment: ~[\r\n?]+ -> channel(PhpComments); +PHPEndSingleLineComment: '?' '>'; +CommentQuestionMark: '?' -> type(Comment), channel(PhpComments); +CommentEnd: [\r\n] -> channel(SkipChannel), popMode; // exit from comment. +mode HereDoc; // TODO: interpolation for heredoc strings. +HereDocText: ~[\r\n]*? ('\r'? '\n' | '\r'); +// fragments. +// '' will be transformed to '' +fragment PhpStartEchoFragment: '<' ('?' '=' | { this.HasAspTags() }? '%' '='); +fragment PhpStartFragment: '<' ('?' 'php'? | { this.HasAspTags() }? '%'); +fragment NameString: [a-zA-Z_\u0080-\ufffe][a-zA-Z0-9_\u0080-\ufffe]*; +fragment HtmlNameChar + : HtmlNameStartChar + | '-' + | '_' + | '.' + | Digit + | '\u00B7' + | '\u0300'..'\u036F' + | '\u203F'..'\u2040' + ; +fragment HtmlNameStartChar + : [:a-z] + | '\u2070'..'\u218F' + | '\u2C00'..'\u2FEF' + | '\u3001'..'\uD7FF' + | '\uF900'..'\uFDCF' + | '\uFDF0'..'\uFFFD' + ; +fragment ExponentPart: 'e' [+-]? Digit+; +fragment NonZeroDigit: [1-9_]; +fragment Digit: [0-9_]; +fragment HexDigit: [a-f0-9_]; diff --git a/src/main/antlr/PhpParser.g4 b/src/main/antlr/PhpParser.g4 new file mode 100644 index 00000000..cc319905 --- /dev/null +++ b/src/main/antlr/PhpParser.g4 @@ -0,0 +1,925 @@ +/* +PHP grammar. +The MIT License (MIT). +Copyright (c) 2015-2020, Ivan Kochurkin (kvanttt@gmail.com), Positive Technologies. +Copyright (c) 2019-2020, Student Main for php7, php8 support. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +parser grammar PhpParser; + +options { tokenVocab=PhpLexer; } + +// HTML +// Also see here: https://github.com/antlr/grammars-v4/tree/master/html + +htmlDocument + : Shebang? (inlineHtml | phpBlock)* EOF + ; + +inlineHtml + : htmlElement+ + | scriptText + ; + +// TODO: split into html, css and xml elements +htmlElement + : HtmlDtd + | HtmlClose + | HtmlStyleOpen + | HtmlOpen + | HtmlName + | HtmlSlashClose + | HtmlSlash + | HtmlText + | HtmlEquals + | HtmlStartQuoteString + | HtmlEndQuoteString + | HtmlStartDoubleQuoteString + | HtmlEndDoubleQuoteString + | HtmlHex + | HtmlDecimal + | HtmlQuoteString + | HtmlDoubleQuoteString + + | StyleBody + + | HtmlScriptOpen + | HtmlScriptClose + + | XmlStart XmlText* XmlClose + ; + +// Script +// Parse JavaScript with https://github.com/antlr/grammars-v4/tree/master/javascript if necessary. + +scriptText + : ScriptText+ + ; + +// PHP + +phpBlock + : importStatement* topStatement+ + ; + +importStatement + : Import Namespace namespaceNameList SemiColon + ; + +topStatement + : statement + | useDeclaration + | namespaceDeclaration + | functionDeclaration + | classDeclaration + | globalConstantDeclaration + ; + +useDeclaration + : Use (Function_ | Const)? useDeclarationContentList SemiColon + ; + +useDeclarationContentList + : '\\'? useDeclarationContent (',' '\\'? useDeclarationContent)* + ; + +useDeclarationContent + : namespaceNameList + ; + +namespaceDeclaration + : Namespace (namespaceNameList? OpenCurlyBracket namespaceStatement* CloseCurlyBracket | namespaceNameList SemiColon) + ; + +namespaceStatement + : statement + | useDeclaration + | functionDeclaration + | classDeclaration + | globalConstantDeclaration + ; + +functionDeclaration + : attributes? Function_ '&'? identifier typeParameterListInBrackets? '(' formalParameterList ')' (':' QuestionMark? typeHint)? blockStatement + ; + +classDeclaration + : attributes? Private? modifier? Partial? ( + classEntryType identifier typeParameterListInBrackets? (Extends qualifiedStaticTypeRef)? (Implements interfaceList)? + | Interface identifier typeParameterListInBrackets? (Extends interfaceList)? ) + OpenCurlyBracket classStatement* CloseCurlyBracket + ; + +classEntryType + : Class + | Trait + ; + +interfaceList + : qualifiedStaticTypeRef (',' qualifiedStaticTypeRef)* + ; + +typeParameterListInBrackets + : '<:' typeParameterList ':>' + | '<:' typeParameterWithDefaultsList ':>' + | '<:' typeParameterList ',' typeParameterWithDefaultsList ':>' + ; + +typeParameterList + : typeParameterDecl (',' typeParameterDecl)* + ; + +typeParameterWithDefaultsList + : typeParameterWithDefaultDecl (',' typeParameterWithDefaultDecl)* + ; + +typeParameterDecl + : attributes? identifier + ; + +typeParameterWithDefaultDecl + : attributes? identifier Eq (qualifiedStaticTypeRef | primitiveType) + ; + +genericDynamicArgs + : '<:' typeRef (',' typeRef)* ':>' + ; + +attributes + : attributeGroup+ + ; + +attributeGroup + : AttributeStart (identifier ':')? attribute (',' attribute)* ']' + ; + +attribute + : qualifiedNamespaceName arguments? + ; + +innerStatementList + : innerStatement* + ; + +innerStatement + : statement + | functionDeclaration + | classDeclaration + ; + +// Statements + +statement + : identifier ':' + | blockStatement + | ifStatement + | whileStatement + | doWhileStatement + | forStatement + | switchStatement + | breakStatement + | continueStatement + | returnStatement + | yieldExpression SemiColon + | globalStatement + | staticVariableStatement + | echoStatement + | expressionStatement + | unsetStatement + | foreachStatement + | tryCatchFinally + | throwStatement + | gotoStatement + | declareStatement + | emptyStatement + | inlineHtmlStatement + ; + +emptyStatement + : SemiColon + ; + +blockStatement + : OpenCurlyBracket innerStatementList CloseCurlyBracket + ; + +ifStatement + : If parentheses statement elseIfStatement* elseStatement? + | If parentheses ':' innerStatementList elseIfColonStatement* elseColonStatement? EndIf SemiColon + ; + +elseIfStatement + : ElseIf parentheses statement + ; + +elseIfColonStatement + : ElseIf parentheses ':' innerStatementList + ; + +elseStatement + : Else statement + ; + +elseColonStatement + : Else ':' innerStatementList + ; + +whileStatement + : While parentheses (statement | ':' innerStatementList EndWhile SemiColon) + ; + +doWhileStatement + : Do statement While parentheses SemiColon + ; + +forStatement + : For '(' forInit? SemiColon expressionList? SemiColon forUpdate? ')' (statement | ':' innerStatementList EndFor SemiColon ) + ; + +forInit + : expressionList + ; + +forUpdate + : expressionList + ; + +switchStatement + : Switch parentheses (OpenCurlyBracket SemiColon? switchBlock* CloseCurlyBracket | ':' SemiColon? switchBlock* EndSwitch SemiColon) + ; + +switchBlock + : ((Case expression | Default) (':' | SemiColon))+ innerStatementList + ; + +breakStatement + : Break expression? SemiColon + ; + +continueStatement + : Continue expression? SemiColon + ; + +returnStatement + : Return expression? SemiColon + ; + +expressionStatement + : expression SemiColon + ; + +unsetStatement + : Unset '(' chainList ')' SemiColon + ; + +foreachStatement + : Foreach + ( '(' chain As '&'? assignable ('=>' '&'? chain)? ')' + | '(' expression As assignable ('=>' '&'? chain)? ')' + | '(' chain As List '(' assignmentList ')' ')' ) + (statement | ':' innerStatementList EndForeach SemiColon) + ; + +tryCatchFinally + : Try blockStatement (catchClause+ finallyStatement? | catchClause* finallyStatement) + ; + +catchClause + : Catch '(' qualifiedStaticTypeRef ('|' qualifiedStaticTypeRef)* VarName ')' blockStatement + ; + +finallyStatement + : Finally blockStatement + ; + +throwStatement + : Throw expression SemiColon + ; + +gotoStatement + : Goto identifier SemiColon + ; + +declareStatement + : Declare '(' declareList ')' (statement | ':' innerStatementList EndDeclare SemiColon) + ; + +inlineHtmlStatement + : inlineHtml+ + ; + +declareList + : identifierInitializer (',' identifierInitializer)* + ; + +formalParameterList + : formalParameter? (',' formalParameter)* ','? + ; + +formalParameter + : attributes? memberModifier? QuestionMark? typeHint? '&'? '...'? variableInitializer + ; + +typeHint + : qualifiedStaticTypeRef + | Callable + | primitiveType + | typeHint '|' typeHint + ; + +globalStatement + : Global globalVar (',' globalVar)* SemiColon + ; + +globalVar + : VarName + | Dollar chain + | Dollar OpenCurlyBracket expression CloseCurlyBracket + ; + +echoStatement + : Echo expressionList SemiColon + ; + +staticVariableStatement + : Static variableInitializer (',' variableInitializer)* SemiColon + ; + +classStatement + : attributes? ( propertyModifiers typeHint? variableInitializer (',' variableInitializer)* SemiColon + | memberModifiers? ( Const typeHint? identifierInitializer (',' identifierInitializer)* SemiColon + | Function_ '&'? identifier typeParameterListInBrackets? '(' formalParameterList ')' + baseCtorCall? methodBody)) + | Use qualifiedNamespaceNameList traitAdaptations + ; + +traitAdaptations + : SemiColon + | OpenCurlyBracket traitAdaptationStatement* CloseCurlyBracket + ; + +traitAdaptationStatement + : traitPrecedence + | traitAlias + ; + +traitPrecedence + : qualifiedNamespaceName '::' identifier InsteadOf qualifiedNamespaceNameList SemiColon + ; + +traitAlias + : traitMethodReference As (memberModifier | memberModifier? identifier) SemiColon + ; + +traitMethodReference + : (qualifiedNamespaceName '::')? identifier + ; + +baseCtorCall + : ':' identifier arguments? + ; + +methodBody + : SemiColon + | blockStatement + ; + +propertyModifiers + : memberModifiers + | Var + ; + +memberModifiers + : memberModifier+ + ; + +variableInitializer + : VarName (Eq constantInitializer)? + ; + +identifierInitializer + : identifier Eq constantInitializer + ; + +globalConstantDeclaration + : attributes? Const identifierInitializer (',' identifierInitializer)* SemiColon + ; + +expressionList + : expression (',' expression)* + ; + +parentheses + : '(' (expression | yieldExpression) ')' + ; + +// Expressions +// Grouped by priorities: http://php.net/manual/en/language.operators.precedence.php +expression + : Clone expression #CloneExpression + | newExpr #NewExpression + + | stringConstant '[' expression ']' #IndexerExpression + + | '(' castOperation ')' expression #CastExpression + | ('~' | '@') expression #UnaryOperatorExpression + + | ('!' | '+' | '-') expression #UnaryOperatorExpression + + | ('++' | '--') chain #PrefixIncDecExpression + | chain ('++' | '--') #PostfixIncDecExpression + + | Print expression #PrintExpression + + | chain #ChainExpression + | constant #ScalarExpression + | string #ScalarExpression + | Label #ScalarExpression + + | BackQuoteString #BackQuoteStringExpression + | parentheses #ParenthesisExpression + | arrayCreation #ArrayCreationExpression + + | Yield #SpecialWordExpression + | List '(' assignmentList ')' Eq expression #SpecialWordExpression + | IsSet '(' chainList ')' #SpecialWordExpression + | Empty '(' chain ')' #SpecialWordExpression + | Eval '(' expression ')' #SpecialWordExpression + | Exit ( '(' ')' | parentheses )? #SpecialWordExpression + | (Include | IncludeOnce) expression #SpecialWordExpression + | (Require | RequireOnce) expression #SpecialWordExpression + + | lambdaFunctionExpr #LambdaFunctionExpression + | matchExpr #MatchExpression + + | expression op='**' expression #ArithmeticExpression + | expression InstanceOf typeRef #InstanceOfExpression + | expression op=('*' | Divide | '%') expression #ArithmeticExpression + + | expression op=('+' | '-' | '.') expression #ArithmeticExpression + + | expression op=('<<' | '>>') expression #ComparisonExpression + | expression op=(Less | '<=' | Greater | '>=') expression #ComparisonExpression + | expression op=('===' | '!==' | '==' | IsNotEq) expression #ComparisonExpression + + | expression op='&' expression #BitwiseExpression + | expression op='^' expression #BitwiseExpression + | expression op='|' expression #BitwiseExpression + | expression op='&&' expression #BitwiseExpression + | expression op='||' expression #BitwiseExpression + + | expression op=QuestionMark expression? ':' expression #ConditionalExpression + | expression op='??' expression #NullCoalescingExpression + | expression op='<=>' expression #SpaceshipExpression + + | Throw expression #SpecialWordExpression + + | assignable assignmentOperator attributes? expression #AssignmentExpression + | assignable Eq attributes? '&' (chain | newExpr) #AssignmentExpression + + | expression op=LogicalAnd expression #LogicalExpression + | expression op=LogicalXor expression #LogicalExpression + | expression op=LogicalOr expression #LogicalExpression + ; + +assignable + : chain + | arrayCreation + ; + +arrayCreation + : (Array '(' arrayItemList? ')' | '[' arrayItemList? ']') ('[' expression ']')? + ; + +lambdaFunctionExpr + : Static? Function_ '&'? '(' formalParameterList ')' lambdaFunctionUseVars? (':' typeHint)? blockStatement + | LambdaFn '(' formalParameterList')' '=>' expression + ; + +matchExpr + : Match '(' expression ')' OpenCurlyBracket matchItem (',' matchItem)* ','? CloseCurlyBracket + ; + +matchItem + : expression (',' expression)* '=>' expression + ; + +newExpr + : New typeRef arguments? + ; + +assignmentOperator + : Eq + | '+=' + | '-=' + | '*=' + | '**=' + | '/=' + | '.=' + | '%=' + | '&=' + | '|=' + | '^=' + | '<<=' + | '>>=' + | '??=' + ; + +yieldExpression + : Yield (expression ('=>' expression)? | From expression) + ; + +arrayItemList + : arrayItem (',' arrayItem)* ','? + ; + +arrayItem + : expression ('=>' expression)? + | (expression '=>')? '&' chain + ; + +lambdaFunctionUseVars + : Use '(' lambdaFunctionUseVar (',' lambdaFunctionUseVar)* ')' + ; + +lambdaFunctionUseVar + : '&'? VarName + ; + +qualifiedStaticTypeRef + : qualifiedNamespaceName genericDynamicArgs? + | Static + ; + +typeRef + : (qualifiedNamespaceName | indirectTypeRef) genericDynamicArgs? + | primitiveType + | Static + | anonymousClass + ; + +anonymousClass + : attributes? Private? modifier? Partial? ( + classEntryType typeParameterListInBrackets? (Extends qualifiedStaticTypeRef)? (Implements interfaceList)? + | Interface identifier typeParameterListInBrackets? (Extends interfaceList)? ) + OpenCurlyBracket classStatement* CloseCurlyBracket + ; + +indirectTypeRef + : chainBase ('->' keyedFieldName)* + ; + +qualifiedNamespaceName + : Namespace? '\\'? namespaceNameList + ; + +namespaceNameList + : identifier + | identifier ('\\' identifier)* ('\\' namespaceNameTail)? + ; + +namespaceNameTail + : identifier (As identifier)? + | OpenCurlyBracket namespaceNameTail (','namespaceNameTail)* ','? CloseCurlyBracket + ; + +qualifiedNamespaceNameList + : qualifiedNamespaceName (',' qualifiedNamespaceName)* + ; + +arguments + : '(' ( actualArgument (',' actualArgument)* | yieldExpression)? ','? ')' + ; + +actualArgument + : argumentName? '...'? expression + | '&' chain + ; + +argumentName + : identifier ':' + ; + +constantInitializer + : constant + | string + | Array '(' (arrayItemList ','?)? ')' + | '[' (arrayItemList ','?)? ']' + | ('+' | '-') constantInitializer + ; + +constant + : Null + | literalConstant + | magicConstant + | classConstant + | qualifiedNamespaceName + ; + +literalConstant + : Real + | BooleanConstant + | numericConstant + | stringConstant + ; + +numericConstant + : Octal + | Decimal + | Hex + | Binary + ; + +classConstant + : (Class | Parent_) '::' (identifier | Constructor | Get | Set) + | (qualifiedStaticTypeRef | keyedVariable | string) '::' (identifier | keyedVariable) // 'foo'::$bar works in php7 + ; + +stringConstant + : Label + ; + +string + : StartHereDoc HereDocText+ + | StartNowDoc HereDocText+ + | SingleQuoteString + | DoubleQuote interpolatedStringPart* DoubleQuote + ; + +interpolatedStringPart + : StringPart + | UnicodeEscape + | chain + ; + +chainList + : chain (',' chain)* + ; + +chain + : chainOrigin memberAccess* + //| arrayCreation // [$a,$b]=$c + ; + +chainOrigin + : chainBase + | functionCall + | '(' newExpr ')' + ; + +memberAccess + : '->' keyedFieldName actualArguments? + ; + +functionCall + : functionCallName actualArguments + ; + +functionCallName + : qualifiedNamespaceName + | classConstant + | chainBase + | parentheses + ; + +actualArguments + : genericDynamicArgs? arguments squareCurlyExpression* + ; + +chainBase + : keyedVariable ('::' keyedVariable)? + | qualifiedStaticTypeRef '::' keyedVariable + ; + +keyedFieldName + : keyedSimpleFieldName + | keyedVariable + ; + +keyedSimpleFieldName + : (identifier | OpenCurlyBracket expression CloseCurlyBracket) squareCurlyExpression* + ; + +keyedVariable + : Dollar* (VarName | Dollar OpenCurlyBracket expression CloseCurlyBracket) squareCurlyExpression* + ; + +squareCurlyExpression + : '[' expression? ']' + | OpenCurlyBracket expression CloseCurlyBracket + ; + +assignmentList + : assignmentListElement? (',' assignmentListElement?)* + ; + +assignmentListElement + : chain + | List '(' assignmentList ')' + | arrayItem + ; + +modifier + : Abstract + | Final + ; + +identifier + : Label + + | Abstract + | Array + | As + | BinaryCast + | BoolType + | BooleanConstant + | Break + | Callable + | Case + | Catch + | Class + | Clone + | Const + | Continue + | Declare + | Default + | Do + | DoubleCast + | DoubleType + | Echo + | Else + | ElseIf + | Empty + | EndDeclare + | EndFor + | EndForeach + | EndIf + | EndSwitch + | EndWhile + | Eval + | Exit + | Extends + | Final + | Finally + | FloatCast + | For + | Foreach + | Function_ + | Global + | Goto + | If + | Implements + | Import + | Include + | IncludeOnce + | InstanceOf + | InsteadOf + | Int16Cast + | Int64Type + | Int8Cast + | Interface + | IntType + | IsSet + | List + | LogicalAnd + | LogicalOr + | LogicalXor + | Namespace + | New + | Null + | ObjectType + | Parent_ + | Partial + | Print + | Private + | Protected + | Public + | Require + | RequireOnce + | Resource + | Return + | Static + | StringType + | Switch + | Throw + | Trait + | Try + | Typeof + | UintCast + | UnicodeCast + | Unset + | Use + | Var + | While + | Yield + | From + + | Get + | Set + | Call + | CallStatic + | Constructor + | Destruct + | Wakeup + | Sleep + | Autoload + | IsSet__ + | Unset__ + | ToString__ + | Invoke + | SetState + | Clone__ + | DebugInfo + | Namespace__ + | Class__ + | Traic__ + | Function__ + | Method__ + | Line__ + | File__ + | Dir__ + ; + +memberModifier + : Public + | Protected + | Private + | Static + | Abstract + | Final + ; + +magicConstant + : Namespace__ + | Class__ + | Traic__ + | Function__ + | Method__ + | Line__ + | File__ + | Dir__ + ; + +magicMethod + : Get + | Set + | Call + | CallStatic + | Constructor + | Destruct + | Wakeup + | Sleep + | Autoload + | IsSet__ + | Unset__ + | ToString__ + | Invoke + | SetState + | Clone__ + | DebugInfo + ; + +primitiveType + : BoolType + | IntType + | Int64Type + | DoubleType + | StringType + | Resource + | ObjectType + | Array + ; + +castOperation + : BoolType + | Int8Cast + | Int16Cast + | IntType + | Int64Type + | UintCast + | DoubleCast + | DoubleType + | FloatCast + | StringType + | BinaryCast + | UnicodeCast + | Array + | ObjectType + | Resource + | Unset + ; \ No newline at end of file diff --git a/src/main/java/me/vovak/antlr/parser/CaseChangingCharStream.java b/src/main/java/me/vovak/antlr/parser/CaseChangingCharStream.java new file mode 100644 index 00000000..c91537b3 --- /dev/null +++ b/src/main/java/me/vovak/antlr/parser/CaseChangingCharStream.java @@ -0,0 +1,82 @@ +package me.vovak.antlr.parser; + +import org.antlr.v4.runtime.CharStream; +import org.antlr.v4.runtime.misc.Interval; + +/** + * This class supports case-insensitive lexing by wrapping an existing + * {@link CharStream} and forcing the lexer to see either upper or + * lowercase characters. Grammar literals should then be either upper or + * lower case such as 'BEGIN' or 'begin'. The text of the character + * stream is unaffected. Example: input 'BeGiN' would match lexer rule + * 'BEGIN' if constructor parameter upper=true but getText() would return + * 'BeGiN'. + */ +public class CaseChangingCharStream implements CharStream { + + final CharStream stream; + final boolean upper; + + /** + * Constructs a new CaseChangingCharStream wrapping the given {@link CharStream} forcing + * all characters to upper case or lower case. + * @param stream The stream to wrap. + * @param upper If true force each symbol to upper case, otherwise force to lower. + */ + public CaseChangingCharStream(CharStream stream, boolean upper) { + this.stream = stream; + this.upper = upper; + } + + @Override + public String getText(Interval interval) { + return stream.getText(interval); + } + + @Override + public void consume() { + stream.consume(); + } + + @Override + public int LA(int i) { + int c = stream.LA(i); + if (c <= 0) { + return c; + } + if (upper) { + return Character.toUpperCase(c); + } + return Character.toLowerCase(c); + } + + @Override + public int mark() { + return stream.mark(); + } + + @Override + public void release(int marker) { + stream.release(marker); + } + + @Override + public int index() { + return stream.index(); + } + + @Override + public void seek(int index) { + stream.seek(index); + } + + @Override + public int size() { + return stream.size(); + } + + @Override + public String getSourceName() { + return stream.getSourceName(); + } +} \ No newline at end of file diff --git a/src/main/java/me/vovak/antlr/parser/PhpLexerBase.java b/src/main/java/me/vovak/antlr/parser/PhpLexerBase.java new file mode 100644 index 00000000..92a8bed3 --- /dev/null +++ b/src/main/java/me/vovak/antlr/parser/PhpLexerBase.java @@ -0,0 +1,192 @@ +package me.vovak.antlr.parser; + +/* +PHP grammar. +The MIT License (MIT). +Copyright (c) 2015-2019, Ivan Kochurkin (kvanttt@gmail.com), Positive Technologies. +Copyright (c) 2019, Thierry Marianne (thierry.marianne@weaving-the-web.org) +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +import org.antlr.v4.runtime.*; + +import java.util.Stack; + +public abstract class PhpLexerBase extends Lexer +{ + protected boolean AspTags = true; + protected boolean _scriptTag; + protected boolean _styleTag; + protected String _heredocIdentifier; + protected int _prevTokenType; + protected String _htmlNameText; + protected boolean _phpScript; + protected boolean _insideString; + + public PhpLexerBase(CharStream input) { + super(input); + } + + @Override + public Token nextToken() { + CommonToken token = (CommonToken)super.nextToken(); + + if (token.getType() == PhpLexer.PHPEnd || token.getType() == PhpLexer.PHPEndSingleLineComment) + { + if (_mode == PhpLexer.SingleLineCommentMode) + { + // SingleLineCommentMode for such allowed syntax: + // + popMode(); // exit from SingleLineComment mode. + } + popMode(); // exit from PHP mode. + + if ("".equals(token.getText())) + { + _phpScript = false; + token.setType(PhpLexer.HtmlScriptClose); + } + else + { + // Add semicolon to the end of statement if it is absente. + // For example: + if (_prevTokenType == PhpLexer.SemiColon || _prevTokenType == PhpLexer.Colon + || _prevTokenType == PhpLexer.OpenCurlyBracket || _prevTokenType == PhpLexer.CloseCurlyBracket) + { + token.setChannel(PhpLexer.SkipChannel); + } + else + { + token = new CommonToken(PhpLexer.SemiColon); + } + } + } + else if (token.getType() == PhpLexer.HtmlName) + { + _htmlNameText = token.getText(); + } + else if (token.getType() == PhpLexer.HtmlDoubleQuoteString) + { + if ("php".equals(token.getText()) && "language".equals(_htmlNameText)) + { + _phpScript = true; + } + } + else if (_mode == PhpLexer.HereDoc) + { + // Heredoc and Nowdoc syntax support: http://php.net/manual/en/language.types.string.php#language.types.string.syntax.heredoc + switch (token.getType()) + { + case PhpLexer.StartHereDoc: + case PhpLexer.StartNowDoc: + _heredocIdentifier = token.getText().substring(3).trim().replace("'",""); + break; + + case PhpLexer.HereDocText: + if (CheckHeredocEnd(token.getText())) + { + popMode(); + + String heredocIdentifier = GetHeredocIdentifier(token.getText()); + if (token.getText().trim().endsWith(";")) + { + token = new CommonToken(PhpLexer.SemiColon, heredocIdentifier + ";\n"); + } + else + { + token = (CommonToken)super.nextToken(); + token.setText(heredocIdentifier + "\n;"); + } + } + break; + } + } + else if (_mode == PhpLexer.PHP) + { + if (_channel != PhpLexer.HIDDEN) + { + _prevTokenType = token.getType(); + } + } + + return token; + } + + private String GetHeredocIdentifier(String text) { + String trimmedText = text.trim(); + boolean semi = (trimmedText.length() > 0) ? (trimmedText.charAt(trimmedText.length() - 1) == ';') : false; + return semi ? trimmedText.substring(0, trimmedText.length() - 1) : trimmedText; + } + + private boolean CheckHeredocEnd(String text) { + return GetHeredocIdentifier(text).equals(_heredocIdentifier); + } + + protected boolean IsNewLineOrStart(int pos) { + return this._input.LA(pos) <= 0 || this._input.LA(pos) == '\r' || this._input.LA(pos) == '\n'; + } + + protected void PushModeOnHtmlClose() { + popMode(); + if (_scriptTag) + { + if (!_phpScript) + { + pushMode(PhpLexer.SCRIPT); + } + else + { + pushMode(PhpLexer.PHP); + } + _scriptTag = false; + } + else if (_styleTag) + { + pushMode(PhpLexer.STYLE); + _styleTag = false; + } + } + + protected boolean HasAspTags() { + return this.AspTags; + } + + protected boolean HasPhpScriptTag() { + return this._phpScript; + } + + protected void PopModeOnCurlyBracketClose() { + if (_insideString) + { + _insideString = false; + setChannel(PhpLexer.SkipChannel); + popMode(); + } + } + + protected boolean ShouldPushHereDocMode(int pos) { + return _input.LA(pos) == '\r' || _input.LA(pos) == '\n'; + } + + protected boolean IsCurlyDollar(int pos) { + return _input.LA(pos) == '$'; + } + + protected void SetInsideString() { + _insideString = true; + } +} \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/antlr/AntlrHandler.kt b/src/main/kotlin/astminer/parse/antlr/AntlrHandler.kt index 2a53e548..d3c270fe 100644 --- a/src/main/kotlin/astminer/parse/antlr/AntlrHandler.kt +++ b/src/main/kotlin/astminer/parse/antlr/AntlrHandler.kt @@ -1,12 +1,12 @@ package astminer.parse.antlr -import astminer.common.model.ParseResult -import astminer.common.model.HandlerFactory -import astminer.common.model.LanguageHandler +import astminer.common.model.* import astminer.parse.antlr.java.JavaFunctionSplitter import astminer.parse.antlr.java.JavaParser import astminer.parse.antlr.javascript.JavaScriptFunctionSplitter import astminer.parse.antlr.javascript.JavaScriptParser +import astminer.parse.antlr.php.PHPFunctionSplitter +import astminer.parse.antlr.php.PHPParser import astminer.parse.antlr.python.PythonFunctionSplitter import astminer.parse.antlr.python.PythonParser import java.io.File @@ -36,4 +36,13 @@ object AntlrJavascriptHandlerFactory : HandlerFactory { override val parseResult: ParseResult = JavaScriptParser().parseFile(file) override val splitter = JavaScriptFunctionSplitter() } +} + +object AntlrPHPHandlerFactory: HandlerFactory { + override fun createHandler(file: File): LanguageHandler = AntlrPHPHandler(file) + + class AntlrPHPHandler(file: File): LanguageHandler() { + override val parseResult: ParseResult = PHPParser().parseFile(file) + override val splitter: TreeFunctionSplitter = PHPFunctionSplitter() + } } \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt b/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt new file mode 100644 index 00000000..94f304e9 --- /dev/null +++ b/src/main/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionInfo.kt @@ -0,0 +1,127 @@ +package astminer.parse.antlr.php + +import astminer.common.model.EnclosingElement +import astminer.common.model.EnclosingElementType +import astminer.common.model.FunctionInfo +import astminer.common.model.FunctionInfoParameter +import astminer.parse.antlr.* +import astminer.parse.findEnclosingElementBy + +class ANTLRPHPFunctionInfo(override val root: AntlrNode) : FunctionInfo { + override val returnType = getElementType(root) + override val nameNode: AntlrNode? = root.getChildOfType(FUNCTION_NAME) + + override val parameters: List = collectParameters() + override val enclosingElement: EnclosingElement? = collectEnclosingElement() + + companion object { + const val PARAMETERS_LIST = "formalParameterList" + const val PARAMETER = "formalParameter" + const val TYPE = "typeHint" + const val PARAMETER_NAME = "VarName" + const val CLASS_MEMBER = "classStatement" + const val FUNCTION_NAME = "identifier" + const val CLASS_DECLARATION = "classDeclaration" + const val VAR_DECLARATION = "variableInitializer" + const val ELLIPSIS = "Ellipsis" + const val EXPRESSION = "expression" + const val ASSIGN_OP = "assignmentOperator" + const val LAMBDA_TOKEN = "LambdaFn" + const val FUNCTION_TOKEN = "Function_" + const val REFERENCE = "Ampersand" + } + + private fun collectParameters(): List { + // Parameters in this grammar have following structure (children order may be wrong): + //formal parameter list -> formal parameter -> Ampersand + // | -> type hint + // | -> ellipsis + // | -> var init -> var name + // | -> equal + // | -> default value + + // No parameters + val parameterList = root.getChildOfType(PARAMETERS_LIST) ?: return emptyList() + + // Checking if function have only one parameter + // without ellipsis, type hint or default value + if (parameterList.hasLastLabel(PARAMETER_NAME) || parameterList.hasLastLabel(VAR_DECLARATION)) { + return listOf(assembleParameter(parameterList)) + } + + // Otherwise find all parameters + return parameterList.getItOrChildrenOfType(PARAMETER).mapNotNull { + try { assembleParameter(it) } catch (e: IllegalStateException) { return@mapNotNull null } + } + } + + private fun assembleParameter(parameterNode: AntlrNode): FunctionInfoParameter { + return FunctionInfoParameter( + name = getParameterName(parameterNode), + type = getElementType(parameterNode) + ) + } + + private fun getParameterName(parameterNode: AntlrNode): String { + // "...$args" in php equivalent to *args in python + val isSplattedArg = parameterNode.getChildOfType(ELLIPSIS) != null + + val isPassedByReference = parameterNode.getChildOfType(REFERENCE) != null + + if (parameterNode.hasLastLabel(PARAMETER_NAME)) return parameterNode.originalToken + ?: throw IllegalStateException("No name was found for a parameter") + + val varInit = parameterNode.getItOrChildrenOfType(VAR_DECLARATION).first() + + val name = varInit.getItOrChildrenOfType(PARAMETER_NAME).first().originalToken + ?: throw IllegalStateException("No name was found for a parameter") + + return (if (isPassedByReference) "&" else "") + (if (isSplattedArg) "..." else "") + name + } + + private fun getElementType(element: AntlrNode): String? { + return element.getChildOfType(TYPE)?.originalToken + } + + private fun collectEnclosingElement(): EnclosingElement? { + val enclosing = root.findEnclosingElementBy { it.isPossibleEnclosing() } ?: return null + return try { + EnclosingElement( + root = enclosing, + name = getEnclosingElementName(enclosing), + type = getEnclosingType(enclosing) + ) + } catch (e: IllegalStateException) { + null + } + } + + private fun getEnclosingType(enclosing: AntlrNode): EnclosingElementType { + return when { + enclosing.isMethod() -> EnclosingElementType.Method + enclosing.isFunction() -> EnclosingElementType.Function + enclosing.isClass() -> EnclosingElementType.Class + enclosing.isAssignExpression() -> EnclosingElementType.VariableDeclaration + else -> throw IllegalStateException("No type can be associated") + } + } + + private fun getEnclosingElementName(enclosing: AntlrNode) : String?{ + return when { + enclosing.isFunction() || enclosing.isClass() -> enclosing.getChildOfType(FUNCTION_NAME)?.originalToken + enclosing.isAssignExpression() -> enclosing.children.find { it.hasLastLabel(PARAMETER_NAME) }?.originalToken + else -> throw IllegalStateException("No type can be associated") + } + } + + // No check for method because method is a function + private fun AntlrNode.isPossibleEnclosing() = isFunction() || isClass() || isAssignExpression() + + private fun AntlrNode.isMethod() = isFunction() && (hasFirstLabel(CLASS_MEMBER)) + + private fun AntlrNode.isFunction() = getChildOfType(LAMBDA_TOKEN) != null || getChildOfType(FUNCTION_TOKEN) != null + + private fun AntlrNode.isAssignExpression() = hasFirstLabel(EXPRESSION) && (getChildOfType(ASSIGN_OP) != null) + + private fun AntlrNode.isClass(): Boolean = hasLastLabel(CLASS_DECLARATION) +} \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/antlr/php/PHPFunctionSplitter.kt b/src/main/kotlin/astminer/parse/antlr/php/PHPFunctionSplitter.kt new file mode 100644 index 00000000..43bfa666 --- /dev/null +++ b/src/main/kotlin/astminer/parse/antlr/php/PHPFunctionSplitter.kt @@ -0,0 +1,17 @@ +package astminer.parse.antlr.php + +import astminer.common.model.FunctionInfo +import astminer.common.model.TreeFunctionSplitter +import astminer.parse.antlr.AntlrNode + +class PHPFunctionSplitter : TreeFunctionSplitter { + companion object { + const val LAMBDA_TOKEN = "LambdaFn" + const val FUNCTION_TOKEN = "Function_" + } + + override fun splitIntoFunctions(root: AntlrNode): Collection> { + return root.preOrder().filter { it.typeLabel == LAMBDA_TOKEN || it.typeLabel == FUNCTION_TOKEN } + .mapNotNull { node -> node.parent?.let {statement -> ANTLRPHPFunctionInfo(statement) } } + } +} \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/antlr/php/PHPParser.kt b/src/main/kotlin/astminer/parse/antlr/php/PHPParser.kt new file mode 100644 index 00000000..86808926 --- /dev/null +++ b/src/main/kotlin/astminer/parse/antlr/php/PHPParser.kt @@ -0,0 +1,37 @@ +package astminer.parse.antlr.php + +import astminer.common.model.Parser +import astminer.parse.ParsingException +import astminer.parse.antlr.AntlrNode +import astminer.parse.antlr.convertAntlrTree +import me.vovak.antlr.parser.CaseChangingCharStream +import me.vovak.antlr.parser.PhpLexer +import me.vovak.antlr.parser.PhpParser +import org.antlr.v4.runtime.CharStreams +import org.antlr.v4.runtime.CommonTokenStream +import java.io.InputStream + +// Be aware that this parser can have some troubles with +// parsing function modifiers and string concatenation via dot +// (AST just falls apart when class field contain dot concatenation) +// More details can be found in corresponding issues +// https://github.com/antlr/grammars-v4/issues/1991 +class PHPParser: Parser { + override fun parseInputStream(content: InputStream): AntlrNode { + return try { + val stream = CharStreams.fromStream(content) + // Php keywords are case-insensitive, so case changing stream must be used + // Tokens won't be in lower case in resulting tree + val lexer = PhpLexer(CaseChangingCharStream(stream, false)) + lexer.removeErrorListeners() + val tokens = CommonTokenStream(lexer) + val parser = PhpParser(tokens) + parser.removeErrorListeners() + val context = parser.htmlDocument() + convertAntlrTree(context, PhpParser.ruleNames, PhpParser.VOCABULARY) + } catch (e: Exception) { + throw ParsingException("ANTLR", "PHP", e.message) + } + } + +} \ No newline at end of file diff --git a/src/main/kotlin/astminer/parse/factory.kt b/src/main/kotlin/astminer/parse/factory.kt index 1c9a6613..ea9ba441 100644 --- a/src/main/kotlin/astminer/parse/factory.kt +++ b/src/main/kotlin/astminer/parse/factory.kt @@ -3,6 +3,7 @@ package astminer.parse import astminer.common.model.HandlerFactory import astminer.parse.antlr.AntlrJavaHandlerFactory import astminer.parse.antlr.AntlrJavascriptHandlerFactory +import astminer.parse.antlr.AntlrPHPHandlerFactory import astminer.parse.antlr.AntlrPythonHandlerFactory import astminer.parse.gumtree.GumtreeJavaHandlerFactory import astminer.parse.gumtree.GumtreePythonHandlerFactory @@ -29,6 +30,7 @@ private fun getAntlrHandlerFactory(extension: String): HandlerFactory { "java" -> AntlrJavaHandlerFactory "javascript" -> AntlrJavascriptHandlerFactory "python" -> AntlrPythonHandlerFactory + "php" -> AntlrPHPHandlerFactory else -> throw UnsupportedOperationException() } } diff --git a/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionSplitterTest.kt b/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionSplitterTest.kt new file mode 100644 index 00000000..f17a81fa --- /dev/null +++ b/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPFunctionSplitterTest.kt @@ -0,0 +1,65 @@ +package astminer.parse.antlr.php + +import astminer.common.model.EnclosingElementType +import org.junit.Test +import kotlin.test.BeforeTest +import astminer.common.model.FunctionInfo +import astminer.parse.antlr.AntlrNode +import java.io.File +import kotlin.test.assertEquals +import kotlin.test.assertNotNull + +internal class ANTLRPHPFunctionSplitterTest { + companion object { + const val N_METHODS = 18 + const val testFilePath = "src/test/resources/methodSplitting/testMethodSplitting.php" + val functionSplitter = PHPFunctionSplitter() + val parser = PHPParser() + } + + private var functionInfos: Collection> = listOf() + + @BeforeTest + fun parseTree() { + val testTree = parser.parseInputStream(File(testFilePath).inputStream()) + assertNotNull(testTree) + functionInfos = functionSplitter.splitIntoFunctions(testTree) + } + + @Test + fun testValidSplitting() { + assertEquals(N_METHODS, functionInfos.size, "Test file contains $N_METHODS methods") + } + + @Test + fun testValidMethodInfo() { + fun EnclosingElementType.getEnclosingElementType(): String { + return when (this) { + EnclosingElementType.Function -> "function" + EnclosingElementType.Class -> "class" + EnclosingElementType.Method -> "method" + EnclosingElementType.VariableDeclaration -> "variable" + else -> "" + } + } + + fun FunctionInfo.getJsonInfo(): String { + return listOf( + "info : {", + "name: ${name}, ", + "args: ${parameters.joinToString(", ") { listOfNotNull(it.type, it.name).joinToString(" ") }}, ", + "enclosing element: ${enclosingElement?.type?.getEnclosingElementType()}, ", + "enclosing element name: ${enclosingElement?.name}, ", + "return type: $returnType", + "}" + ).joinToString("") + } + + val actualJsonInfos = functionInfos.map { it.getJsonInfo() + '\n' }.sorted() + + val text = File(testFilePath).readText() + val expectedJsonInfos = Regex("info : \\{.*\\}").findAll(text).toList().map { it.value + '\n' }.sorted() + + assertEquals(expectedJsonInfos, actualJsonInfos) + } +} \ No newline at end of file diff --git a/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPParserText.kt b/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPParserText.kt new file mode 100644 index 00000000..fe42535b --- /dev/null +++ b/src/test/kotlin/astminer/parse/antlr/php/ANTLRPHPParserText.kt @@ -0,0 +1,17 @@ +package astminer.parse.antlr.php + +import org.junit.Test +import java.io.File +import java.io.FileInputStream +import kotlin.test.assertNotNull + +internal class ANTLRPHPParserText { + + @Test + fun testNodeIsNotNull() { + val parser = PHPParser() + val file = File("src/test/resources/examples/1.php") + val node = parser.parseInputStream(FileInputStream(file)) + assertNotNull(node) + } +} \ No newline at end of file diff --git a/src/test/resources/examples/1.php b/src/test/resources/examples/1.php new file mode 100644 index 00000000..28972163 --- /dev/null +++ b/src/test/resources/examples/1.php @@ -0,0 +1,28 @@ +_add($a, $b); + } + + public function sub($a, $b) { + return $a - $b; + } + + public function mul($a, $b) { + return $a * $b; + } + + /** + * Защищённый метод + * @param interge + * @return interge + */ + protected function _add($a, $b) { + return $a + $b; + } +} + +$server = new Yar_Server(new Operator()); +$server->handle(); +?> \ No newline at end of file diff --git a/src/test/resources/methodSplitting/testMethodSplitting.php b/src/test/resources/methodSplitting/testMethodSplitting.php new file mode 100644 index 00000000..0201ecca --- /dev/null +++ b/src/test/resources/methodSplitting/testMethodSplitting.php @@ -0,0 +1,87 @@ + $x + $y; + +// #9 info : {name: null, args: $x, enclosing element: variable, enclosing element name: $arrow2, return type: null} +// #10 info : {name: null, args: $y, enclosing element: function, enclosing element name: null, return type: null} +$arrow2 = fn($x) => fn($y) => $x * $y; + +// #11 info : {name: null, args: $x, enclosing element: null, enclosing element name: null, return type: null} +fn($x = 42) => $x; + +// #12 info : {name: null, args: &$x, enclosing element: null, enclosing element name: null, return type: null} +fn(&$x) => $x; + +// #13 info : {name: null, args: $x, enclosing element: null, enclosing element name: null, return type: null} +fn&($x) => $x; + +// #14 info : {name: null, args: $x, ...$rest, enclosing element: null, enclosing element name: null, return type: null} +fn($x, ...$rest) => $rest; + +////////////////// METHOD FUNCTIONS ////////////////// + +class someClass { + // #15 info : {name: someFunc, args: , enclosing element: class, enclosing element name: someClass, return type: null} + public function someFunc() { + return 42; + } + + // #16 info : {name: funcWithParams, args: $a, $b, enclosing element: class, enclosing element name: someClass, return type: null} + public function funcWithParams($a, $b) { + + // #17 info : {name: innerFunction, args: , enclosing element: method, enclosing element name: funcWithParams, return type: null} + function innerFunction() { + + // #18 info : {name: superInnerFunction, args: , enclosing element: function, enclosing element name: innerFunction, return type: null} + function superInnerFunction() { + return 42; + } + return 42; + } + return 42; + } +} \ No newline at end of file