Skip to content

Commit

Permalink
This PR adds the following new Metapath map features.
Browse files Browse the repository at this point in the history
- Support for map construction.
- Added support for maps in function call, postfix and unary lookups.
- Added support and unit tests for the following Metapath functions:
  - map:get
  - map:merge
  - map:entry
  - map:size
  - map:keys
  - map:contains
  - map:find
  - map:put
  - map:remove
- Added missing characteristics on a number of existing Metapath functions.
- Cleaned up a bunch of PMD warnings.
  • Loading branch information
david-waltermire committed Jun 3, 2024
1 parent f29d5d7 commit 0235e9a
Show file tree
Hide file tree
Showing 103 changed files with 4,453 additions and 506 deletions.
17 changes: 7 additions & 10 deletions core/src/main/antlr4/Metapath10.g4
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,8 @@ parser grammar Metapath10;

options { tokenVocab=Metapath10Lexer; superClass=Metapath10ParserBase; }

// Metapath extensions
metapath : expr EOF ;

// [1]
// xpath : expr EOF ;
metapath : expr EOF ;
// paramlist : param ( COMMA param)* ;
// param : DOLLAR eqname typedeclaration? ;
// functionbody : enclosedexpr ;
Expand Down Expand Up @@ -81,7 +78,7 @@ keyspecifier : NCName | IntegerLiteral | parenthesizedexpr | STAR ;
//arrowfunctionspecifier : eqname | varref | parenthesizedexpr ;
arrowfunctionspecifier : eqname;
// primaryexpr : literal | varref | parenthesizedexpr | contextitemexpr | functioncall | functionitemexpr | mapconstructor | arrayconstructor | unarylookup ;
primaryexpr : literal | varref | parenthesizedexpr | contextitemexpr | functioncall | arrayconstructor | unarylookup;
primaryexpr : literal | varref | parenthesizedexpr | contextitemexpr | functioncall | mapconstructor | arrayconstructor | unarylookup;
literal : numericliteral | StringLiteral ;
numericliteral : IntegerLiteral | DecimalLiteral | DoubleLiteral ;
varref : DOLLAR varname ;
Expand All @@ -97,11 +94,11 @@ argument : exprsingle ;
// functionitemexpr : namedfunctionref | inlinefunctionexpr ;
// namedfunctionref : eqname POUND IntegerLiteral /* xgc: reserved-function-names */;
// inlinefunctionexpr : KW_FUNCTION OP paramlist? CP ( KW_AS sequencetype)? functionbody ;
// mapconstructor : KW_MAP OC (mapconstructorentry ( COMMA mapconstructorentry)*)? CC ;
mapconstructor : KW_MAP OC (mapconstructorentry ( COMMA mapconstructorentry)*)? CC ;
// [70]
// mapconstructorentry : mapkeyexpr COLON mapvalueexpr ;
// mapkeyexpr : exprsingle ;
// mapvalueexpr : exprsingle ;
mapconstructorentry : mapkeyexpr COLON mapvalueexpr ;
mapkeyexpr : exprsingle ;
mapvalueexpr : exprsingle ;
arrayconstructor : squarearrayconstructor | curlyarrayconstructor ;
squarearrayconstructor : OB (exprsingle ( COMMA exprsingle)*)? CB ;
// [75]
Expand Down Expand Up @@ -152,7 +149,7 @@ unarylookup : QM keyspecifier ;


// Error in the spec. EQName also includes acceptable keywords.
eqname : QName | URIQualifiedName
eqname : NCName | QName | URIQualifiedName
| KW_ANCESTOR
| KW_ANCESTOR_OR_SELF
| KW_AND
Expand Down
298 changes: 156 additions & 142 deletions core/src/main/antlr4/Metapath10Lexer.g4
Original file line number Diff line number Diff line change
@@ -1,166 +1,180 @@
// This grammar is derived from the XPath 3.1 grammar produced by Ken Domino, et al (https://github.com/antlr/grammars-v4/blob/63359bd91593ece31a384acd507ae860d6cf7ff7/xpath/xpath31/XPath31Lexer.g4).

// This is a faithful implementation of the XPath version 3.1 grammar
// from the spec at https://www.w3.org/TR/2017/REC-xpath-31-20170321/

// $antlr-format alignTrailingComments true, columnLimit 150, maxEmptyLinesToKeep 1, reflowComments false, useTab false
// $antlr-format allowShortRulesOnASingleLine true, allowShortBlocksOnASingleLine true, minEmptyLines 0, alignSemicolons ownLine
// $antlr-format alignColons trailing, singleLineOverrulesHangingColon true, alignLexerCommands true, alignLabels true, alignTrailers true

lexer grammar Metapath10Lexer;

AT : '@' ;
BANG : '!' ;
CB : ']' ;
CC : '}' ;
CEQ : ':=' ;
COLON : ':' ;
COLONCOLON : '::' ;
COMMA : ',' ;
CP : ')' ;
CS : ':*' ;
D : '.' ;
DD : '..' ;
DOLLAR : '$' ;
EG : '=>' ;
EQ : '=' ;
GE : '>=' ;
GG : '>>' ;
GT : '>' ;
LE : '<=' ;
LL : '<<' ;
LT : '<' ;
MINUS : '-' ;
NE : '!=' ;
OB : '[' ;
OC : '{' ;
OP : '(' ;
P : '|' ;
PLUS : '+' ;
POUND : '#' ;
PP : '||' ;
QM : '?' ;
SC : '*:' ;
SLASH : '/' ;
SS : '//' ;
STAR : '*' ;
AT : '@';
BANG : '!';
CB : ']';
CC : '}';
CEQ : ':=';
COLON : ':';
COLONCOLON : '::';
COMMA : ',';
CP : ')';
CS : ':*';
D : '.';
DD : '..';
DOLLAR : '$';
EG : '=>';
EQ : '=';
GE : '>=';
GG : '>>';
GT : '>';
LE : '<=';
LL : '<<';
LT : '<';
MINUS : '-';
NE : '!=';
OB : '[';
OC : '{';
OP : '(';
P : '|';
PLUS : '+';
POUND : '#';
PP : '||';
QM : '?';
SC : '*:';
SLASH : '/';
SS : '//';
STAR : '*';

// KEYWORDS

KW_ANCESTOR : 'ancestor' ;
KW_ANCESTOR_OR_SELF : 'ancestor-or-self' ;
KW_AND : 'and' ;
KW_ARRAY : 'array' ;
KW_AS : 'as' ;
KW_ATTRIBUTE : 'attribute' ;
KW_CAST : 'cast' ;
KW_CASTABLE : 'castable' ;
KW_CHILD : 'child' ;
KW_COMMENT : 'comment' ;
KW_DESCENDANT : 'descendant' ;
KW_DESCENDANT_OR_SELF : 'descendant-or-self' ;
KW_DIV : 'div' ;
KW_DOCUMENT_NODE : 'document-node' ;
KW_ELEMENT : 'element' ;
KW_ELSE : 'else' ;
KW_EMPTY_SEQUENCE : 'empty-sequence' ;
KW_EQ : 'eq' ;
KW_EVERY : 'every' ;
KW_EXCEPT : 'except' ;
KW_FOLLOWING : 'following' ;
KW_FOLLOWING_SIBLING : 'following-sibling' ;
KW_FOR : 'for' ;
KW_FUNCTION : 'function' ;
KW_GE : 'ge' ;
KW_GT : 'gt' ;
KW_IDIV : 'idiv' ;
KW_IF : 'if' ;
KW_IN : 'in' ;
KW_INSTANCE : 'instance' ;
KW_INTERSECT : 'intersect' ;
KW_IS : 'is' ;
KW_ITEM : 'item' ;
KW_LE : 'le' ;
KW_LET : 'let' ;
KW_LT : 'lt' ;
KW_MAP : 'map' ;
KW_MOD : 'mod' ;
KW_NAMESPACE : 'namespace' ;
KW_NAMESPACE_NODE : 'namespace-node' ;
KW_NE : 'ne' ;
KW_NODE : 'node' ;
KW_OF : 'of' ;
KW_OR : 'or' ;
KW_PARENT : 'parent' ;
KW_PRECEDING : 'preceding' ;
KW_PRECEDING_SIBLING : 'preceding-sibling' ;
KW_PROCESSING_INSTRUCTION : 'processing-instruction' ;
KW_RETURN : 'return' ;
KW_SATISFIES : 'satisfies' ;
KW_SCHEMA_ATTRIBUTE : 'schema-attribute' ;
KW_SCHEMA_ELEMENT : 'schema-element' ;
KW_SELF : 'self' ;
KW_SOME : 'some' ;
KW_TEXT : 'text' ;
KW_THEN : 'then' ;
KW_TO : 'to' ;
KW_TREAT : 'treat' ;
KW_UNION : 'union' ;
KW_ANCESTOR : 'ancestor';
KW_ANCESTOR_OR_SELF : 'ancestor-or-self';
KW_AND : 'and';
KW_ARRAY : 'array';
KW_AS : 'as';
KW_ATTRIBUTE : 'attribute';
KW_CAST : 'cast';
KW_CASTABLE : 'castable';
KW_CHILD : 'child';
KW_COMMENT : 'comment';
KW_DESCENDANT : 'descendant';
KW_DESCENDANT_OR_SELF : 'descendant-or-self';
KW_DIV : 'div';
KW_DOCUMENT_NODE : 'document-node';
KW_ELEMENT : 'element';
KW_ELSE : 'else';
KW_EMPTY_SEQUENCE : 'empty-sequence';
KW_EQ : 'eq';
KW_EVERY : 'every';
KW_EXCEPT : 'except';
KW_FOLLOWING : 'following';
KW_FOLLOWING_SIBLING : 'following-sibling';
KW_FOR : 'for';
KW_FUNCTION : 'function';
KW_GE : 'ge';
KW_GT : 'gt';
KW_IDIV : 'idiv';
KW_IF : 'if';
KW_IN : 'in';
KW_INSTANCE : 'instance';
KW_INTERSECT : 'intersect';
KW_IS : 'is';
KW_ITEM : 'item';
KW_LE : 'le';
KW_LET : 'let';
KW_LT : 'lt';
KW_MAP : 'map';
KW_MOD : 'mod';
KW_NAMESPACE : 'namespace';
KW_NAMESPACE_NODE : 'namespace-node';
KW_NE : 'ne';
KW_NODE : 'node';
KW_OF : 'of';
KW_OR : 'or';
KW_PARENT : 'parent';
KW_PRECEDING : 'preceding';
KW_PRECEDING_SIBLING : 'preceding-sibling';
KW_PROCESSING_INSTRUCTION : 'processing-instruction';
KW_RETURN : 'return';
KW_SATISFIES : 'satisfies';
KW_SCHEMA_ATTRIBUTE : 'schema-attribute';
KW_SCHEMA_ELEMENT : 'schema-element';
KW_SELF : 'self';
KW_SOME : 'some';
KW_TEXT : 'text';
KW_THEN : 'then';
KW_TO : 'to';
KW_TREAT : 'treat';
KW_UNION : 'union';

// A.2.1. TERMINAL SYMBOLS
// This isn't a complete list of tokens in the language.
// Keywords and symbols are terminals.

IntegerLiteral : FragDigits ;
DecimalLiteral : '.' FragDigits | FragDigits '.' [0-9]* ;
DoubleLiteral : ('.' FragDigits | FragDigits ('.' [0-9]*)?) [eE] [+-]? FragDigits ;
StringLiteral : '"' (~["] | FragEscapeQuot)* '"' | '\'' (~['] | FragEscapeApos)* '\'' ;
URIQualifiedName : BracedURILiteral NCName ;
BracedURILiteral : 'Q' '{' [^{}]* '}' ;
IntegerLiteral : FragDigits;
DecimalLiteral : '.' FragDigits | FragDigits '.' [0-9]*;
DoubleLiteral : ('.' FragDigits | FragDigits ('.' [0-9]*)?) [eE] [+-]? FragDigits;
StringLiteral : '"' (~["] | FragEscapeQuot)* '"' | '\'' (~['] | FragEscapeApos)* '\'';
URIQualifiedName : BracedURILiteral NCName;
BracedURILiteral : 'Q' '{' [^{}]* '}';
// Error in spec: EscapeQuot and EscapeApos are not terminals!
fragment FragEscapeQuot : '""' ;
fragment FragEscapeQuot : '""';
fragment FragEscapeApos : '\'\'';
// Error in spec: Comment isn't really a terminal, but an off-channel object.
Comment : '(:' (Comment | CommentContents)*? ':)' -> skip ;
QName : FragQName ;
NCName : FragmentNCName ;
Comment : '(:' (Comment | CommentContents)*? ':)' -> skip;
NCName : FragmentNCName;
QName : FragQName;
// Error in spec: Char is not a terminal!
fragment Char : FragChar ;
fragment FragDigits : [0-9]+ ;
fragment CommentContents : Char ;
fragment Char : FragChar;
fragment FragDigits : [0-9]+;
fragment CommentContents : Char;
// https://www.w3.org/TR/REC-xml-names/#NT-QName
fragment FragQName : FragPrefixedName | FragUnprefixedName ;
fragment FragPrefixedName : FragPrefix ':' FragLocalPart ;
fragment FragUnprefixedName : FragLocalPart ;
fragment FragPrefix : FragmentNCName ;
fragment FragLocalPart : FragmentNCName ;
fragment FragNCNameStartChar
: 'A'..'Z'
| '_'
| 'a'..'z'
| '\u00C0'..'\u00D6'
| '\u00D8'..'\u00F6'
| '\u00F8'..'\u02FF'
| '\u0370'..'\u037D'
| '\u037F'..'\u1FFF'
| '\u200C'..'\u200D'
| '\u2070'..'\u218F'
| '\u2C00'..'\u2FEF'
| '\u3001'..'\uD7FF'
| '\uF900'..'\uFDCF'
| '\uFDF0'..'\uFFFD'
| '\u{10000}'..'\u{EFFFF}'
;
fragment FragNCNameChar
: FragNCNameStartChar | '-' | '.' | '0'..'9'
| '\u00B7' | '\u0300'..'\u036F'
| '\u203F'..'\u2040'
;
fragment FragmentNCName : FragNCNameStartChar FragNCNameChar* ;
fragment FragQName : FragPrefixedName | FragUnprefixedName;
fragment FragPrefixedName : FragPrefix ':' FragLocalPart;
fragment FragUnprefixedName : FragLocalPart;
fragment FragPrefix : FragmentNCName;
fragment FragLocalPart : FragmentNCName;
fragment FragNCNameStartChar:
'A' ..'Z'
| '_'
| 'a' ..'z'
| '\u00C0' ..'\u00D6'
| '\u00D8' ..'\u00F6'
| '\u00F8' ..'\u02FF'
| '\u0370' ..'\u037D'
| '\u037F' ..'\u1FFF'
| '\u200C' ..'\u200D'
| '\u2070' ..'\u218F'
| '\u2C00' ..'\u2FEF'
| '\u3001' ..'\uD7FF'
| '\uF900' ..'\uFDCF'
| '\uFDF0' ..'\uFFFD'
| '\u{10000}' ..'\u{EFFFF}'
;
fragment FragNCNameChar:
FragNCNameStartChar
| '-'
| '.'
| '0' ..'9'
| '\u00B7'
| '\u0300' ..'\u036F'
| '\u203F' ..'\u2040'
;
fragment FragmentNCName: FragNCNameStartChar FragNCNameChar*;

// https://www.w3.org/TR/REC-xml/#NT-Char

fragment FragChar : '\u0009' | '\u000a' | '\u000d'
| '\u0020'..'\ud7ff'
| '\ue000'..'\ufffd'
| '\u{10000}'..'\u{10ffff}'
;
fragment FragChar:
'\u0009'
| '\u000a'
| '\u000d'
| '\u0020' ..'\ud7ff'
| '\ue000' ..'\ufffd'
| '\u{10000}' ..'\u{10ffff}'
;

// https://github.com/antlr/grammars-v4/blob/17d3db3fd6a8fc319a12176e0bb735b066ec0616/xpath/xpath31/XPath31.g4#L389
Whitespace : ('\u000d' | '\u000a' | '\u0020' | '\u0009')+ -> skip ;
Whitespace: ('\u000d' | '\u000a' | '\u0020' | '\u0009')+ -> skip;

// Not per spec. Specified for testing.
SEMI : ';' ;
SEMI: ';';
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,8 @@
import com.vladsch.flexmark.util.sequence.BasedSequence;
import com.vladsch.flexmark.util.sequence.Escaping;

import gov.nist.secauto.metaschema.core.datatype.markup.flexmark.IMarkupWriter;
import gov.nist.secauto.metaschema.core.datatype.markup.flexmark.HtmlQuoteTagExtension.DoubleQuoteNode;
import gov.nist.secauto.metaschema.core.datatype.markup.flexmark.IMarkupWriter.ChildHandler;
import gov.nist.secauto.metaschema.core.datatype.markup.flexmark.IMarkupWriter;
import gov.nist.secauto.metaschema.core.datatype.markup.flexmark.InsertAnchorExtension.InsertAnchorNode;
import gov.nist.secauto.metaschema.core.util.CollectionUtil;
import gov.nist.secauto.metaschema.core.util.ObjectUtils;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,9 @@ public interface ICollectionValue {

@NonNull
static Stream<? extends IItem> normalizeAsItems(@NonNull ICollectionValue value) {
return value instanceof IItem ? ObjectUtils.notNull(Stream.of((IItem) value)) : value.asSequence().stream();
return value instanceof IItem
? ObjectUtils.notNull(Stream.of((IItem) value))
: value.asSequence().stream();
}

@NonNull
Expand Down
Loading

0 comments on commit 0235e9a

Please sign in to comment.