From cc3a3b4aeadcd312ad8862abe86435227dc5a983 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Sun, 4 Dec 2022 18:12:15 +0100 Subject: [PATCH 001/132] Generate Java assign token more often --- .../java/TokenGeneratingTreeScanner.java | 58 ++++++------------- 1 file changed, 19 insertions(+), 39 deletions(-) diff --git a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java index ee2d053b6..99222ff75 100644 --- a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java +++ b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java @@ -3,46 +3,9 @@ import java.io.File; import java.util.ArrayList; import java.util.List; +import java.util.Set; -import com.sun.source.tree.AnnotationTree; -import com.sun.source.tree.AssertTree; -import com.sun.source.tree.AssignmentTree; -import com.sun.source.tree.BlockTree; -import com.sun.source.tree.BreakTree; -import com.sun.source.tree.CaseTree; -import com.sun.source.tree.CatchTree; -import com.sun.source.tree.ClassTree; -import com.sun.source.tree.CompilationUnitTree; -import com.sun.source.tree.ConditionalExpressionTree; -import com.sun.source.tree.ContinueTree; -import com.sun.source.tree.DefaultCaseLabelTree; -import com.sun.source.tree.DoWhileLoopTree; -import com.sun.source.tree.EnhancedForLoopTree; -import com.sun.source.tree.ErroneousTree; -import com.sun.source.tree.ExportsTree; -import com.sun.source.tree.ForLoopTree; -import com.sun.source.tree.IfTree; -import com.sun.source.tree.ImportTree; -import com.sun.source.tree.LineMap; -import com.sun.source.tree.MethodInvocationTree; -import com.sun.source.tree.MethodTree; -import com.sun.source.tree.ModuleTree; -import com.sun.source.tree.NewArrayTree; -import com.sun.source.tree.NewClassTree; -import com.sun.source.tree.PackageTree; -import com.sun.source.tree.ProvidesTree; -import com.sun.source.tree.RequiresTree; -import com.sun.source.tree.ReturnTree; -import com.sun.source.tree.SwitchExpressionTree; -import com.sun.source.tree.SwitchTree; -import com.sun.source.tree.SynchronizedTree; -import com.sun.source.tree.ThrowTree; -import com.sun.source.tree.Tree; -import com.sun.source.tree.TryTree; -import com.sun.source.tree.TypeParameterTree; -import com.sun.source.tree.VariableTree; -import com.sun.source.tree.WhileLoopTree; -import com.sun.source.tree.YieldTree; +import com.sun.source.tree.*; import com.sun.source.util.SourcePositions; import com.sun.source.util.TreeScanner; @@ -335,6 +298,23 @@ public Object visitAssignment(AssignmentTree node, Object p) { return super.visitAssignment(node, p); } + @Override + public Object visitCompoundAssignment(CompoundAssignmentTree node, Object p) { + long start = positions.getStartPosition(ast, node); + addToken(JavaTokenType.J_ASSIGN, start, 1); + return super.visitCompoundAssignment(node, p); + } + + @Override + public Object visitUnary(UnaryTree node, Object p) { + if (Set.of(Tree.Kind.PREFIX_INCREMENT, Tree.Kind.POSTFIX_INCREMENT, Tree.Kind.PREFIX_DECREMENT, Tree.Kind.POSTFIX_DECREMENT) + .contains(node.getKind())) { + long start = positions.getStartPosition(ast, node); + addToken(JavaTokenType.J_ASSIGN, start, 1); + } + return super.visitUnary(node, p); + } + @Override public Object visitAssert(AssertTree node, Object p) { long start = positions.getStartPosition(ast, node); From 232b84966b88fec7946d11400cb9003ccd4b2818 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Tue, 6 Dec 2022 15:34:07 +0100 Subject: [PATCH 002/132] Re-add imports --- .../src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java index bc70ec68c..36742c7b9 100644 --- a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java +++ b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java @@ -16,6 +16,7 @@ import com.sun.source.tree.CatchTree; import com.sun.source.tree.ClassTree; import com.sun.source.tree.CompilationUnitTree; +import com.sun.source.tree.CompoundAssignmentTree; import com.sun.source.tree.ConditionalExpressionTree; import com.sun.source.tree.ContinueTree; import com.sun.source.tree.DefaultCaseLabelTree; @@ -43,6 +44,7 @@ import com.sun.source.tree.Tree; import com.sun.source.tree.TryTree; import com.sun.source.tree.TypeParameterTree; +import com.sun.source.tree.UnaryTree; import com.sun.source.tree.VariableTree; import com.sun.source.tree.WhileLoopTree; import com.sun.source.tree.YieldTree; From c24bfd7877dc170ad77c7cba292ff9a97c66ef39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Tue, 6 Dec 2022 16:29:56 +0100 Subject: [PATCH 003/132] Make TreeScanner parameters Void --- .../java/TokenGeneratingTreeScanner.java | 207 ++++++++++-------- 1 file changed, 116 insertions(+), 91 deletions(-) diff --git a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java index 36742c7b9..9d917613a 100644 --- a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java +++ b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java @@ -51,7 +51,7 @@ import com.sun.source.util.SourcePositions; import com.sun.source.util.TreeScanner; -final class TokenGeneratingTreeScanner extends TreeScanner { +final class TokenGeneratingTreeScanner extends TreeScanner { private final File file; private final Parser parser; private final LineMap map; @@ -93,17 +93,17 @@ private void addToken(JavaTokenType tokenType, long start, long end) { } @Override - public Object visitBlock(BlockTree node, Object p) { + public Void visitBlock(BlockTree node, Void unused) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; addToken(JavaTokenType.J_INIT_BEGIN, start, 1); - Object result = super.visitBlock(node, p); + super.visitBlock(node, unused); addToken(JavaTokenType.J_INIT_END, end, 1); - return result; + return null; } @Override - public Object visitClass(ClassTree node, Object p) { + public Void visitClass(ClassTree node, Void unused) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; @@ -118,7 +118,7 @@ public Object visitClass(ClassTree node, Object p) { } else if (node.getKind() == Tree.Kind.CLASS) { addToken(JavaTokenType.J_CLASS_BEGIN, start, 5); } - Object result = super.visitClass(node, p); + super.visitClass(node, unused); if (node.getKind() == Tree.Kind.ENUM) { addToken(JavaTokenType.J_ENUM_END, end, 1); } else if (node.getKind() == Tree.Kind.INTERFACE) { @@ -130,112 +130,115 @@ public Object visitClass(ClassTree node, Object p) { } else if (node.getKind() == Tree.Kind.CLASS) { addToken(JavaTokenType.J_CLASS_END, end, 1); } - return result; + return null; } @Override - public Object visitImport(ImportTree node, Object p) { + public Void visitImport(ImportTree node, Void unused) { long start = positions.getStartPosition(ast, node); addToken(JavaTokenType.J_IMPORT, start, 6); - return super.visitImport(node, p); + super.visitImport(node, unused); + return null; } @Override - public Object visitPackage(PackageTree node, Object p) { + public Void visitPackage(PackageTree node, Void unused) { long start = positions.getStartPosition(ast, node); addToken(JavaTokenType.J_PACKAGE, start, 7); - return super.visitPackage(node, p); + super.visitPackage(node, unused); + return null; } @Override - public Object visitMethod(MethodTree node, Object p) { + public Void visitMethod(MethodTree node, Void unused) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; addToken(JavaTokenType.J_METHOD_BEGIN, start, node.getName().length()); - Object result = super.visitMethod(node, p); + super.visitMethod(node, unused); addToken(JavaTokenType.J_METHOD_END, end, 1); - return result; + return null; } @Override - public Object visitSynchronized(SynchronizedTree node, Object p) { + public Void visitSynchronized(SynchronizedTree node, Void unused) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; addToken(JavaTokenType.J_SYNC_BEGIN, start, 12); - Object result = super.visitSynchronized(node, p); + super.visitSynchronized(node, unused); addToken(JavaTokenType.J_SYNC_END, end, 1); - return result; + return null; } @Override - public Object visitDoWhileLoop(DoWhileLoopTree node, Object p) { + public Void visitDoWhileLoop(DoWhileLoopTree node, Void unused) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; addToken(JavaTokenType.J_DO_BEGIN, start, 2); - Object result = super.visitDoWhileLoop(node, p); + super.visitDoWhileLoop(node, unused); addToken(JavaTokenType.J_DO_END, end, 1); - return result; + return null; } @Override - public Object visitWhileLoop(WhileLoopTree node, Object p) { + public Void visitWhileLoop(WhileLoopTree node, Void unused) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; addToken(JavaTokenType.J_WHILE_BEGIN, start, 5); - Object result = super.visitWhileLoop(node, p); + super.visitWhileLoop(node, unused); addToken(JavaTokenType.J_WHILE_END, end, 1); - return result; + return null; } @Override - public Object visitForLoop(ForLoopTree node, Object p) { + public Void visitForLoop(ForLoopTree node, Void unused) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; addToken(JavaTokenType.J_FOR_BEGIN, start, 3); - Object result = super.visitForLoop(node, p); + super.visitForLoop(node, unused); addToken(JavaTokenType.J_FOR_END, end, 1); - return result; + return null; } @Override - public Object visitEnhancedForLoop(EnhancedForLoopTree node, Object p) { + public Void visitEnhancedForLoop(EnhancedForLoopTree node, Void unused) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; addToken(JavaTokenType.J_FOR_BEGIN, start, 3); - Object result = super.visitEnhancedForLoop(node, p); + super.visitEnhancedForLoop(node, unused); addToken(JavaTokenType.J_FOR_END, end, 1); - return result; + return null; } @Override - public Object visitSwitch(SwitchTree node, Object p) { + public Void visitSwitch(SwitchTree node, Void unused) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; addToken(JavaTokenType.J_SWITCH_BEGIN, start, 6); - Object result = super.visitSwitch(node, p); + super.visitSwitch(node, unused); addToken(JavaTokenType.J_SWITCH_END, end, 1); - return result; + return null; } @Override - public Object visitSwitchExpression(SwitchExpressionTree node, Object parameterValue) { + public Void visitSwitchExpression(SwitchExpressionTree node, Void unused) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; addToken(JavaTokenType.J_SWITCH_BEGIN, start, 6); - Object result = super.visitSwitchExpression(node, parameterValue); + super.visitSwitchExpression(node, unused); addToken(JavaTokenType.J_SWITCH_END, end, 1); - return result; + return null; } @Override - public Object visitCase(CaseTree node, Object p) { + public Void visitCase(CaseTree node, Void unused) { long start = positions.getStartPosition(ast, node); addToken(JavaTokenType.J_CASE, start, 4); - return super.visitCase(node, p); + super.visitCase(node, unused); + return null; } @Override - public Object visitTry(TryTree node, Object p) { + public Void visitTry(TryTree node, Void unused) { long start = positions.getStartPosition(ast, node); if (node.getResources().isEmpty()) addToken(JavaTokenType.J_TRY_BEGIN, start, 3); @@ -243,83 +246,90 @@ public Object visitTry(TryTree node, Object p) { addToken(JavaTokenType.J_TRY_WITH_RESOURCE, start, 3); if (node.getFinallyBlock() != null) addToken(JavaTokenType.J_FINALLY, start, 3); - return super.visitTry(node, p); + super.visitTry(node, unused); + return null; } @Override - public Object visitCatch(CatchTree node, Object p) { + public Void visitCatch(CatchTree node, Void unused) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; addToken(JavaTokenType.J_CATCH_BEGIN, start, 5); - Object result = super.visitCatch(node, p); + super.visitCatch(node, unused); addToken(JavaTokenType.J_CATCH_END, end, 1); - return result; + return null; } @Override - public Object visitIf(IfTree node, Object p) { + public Void visitIf(IfTree node, Void unused) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; addToken(JavaTokenType.J_IF_BEGIN, start, 2); - node.getCondition().accept(this, p); - node.getThenStatement().accept(this, p); + node.getCondition().accept(this, unused); + node.getThenStatement().accept(this, unused); if (node.getElseStatement() != null) { start = positions.getStartPosition(ast, node.getElseStatement()); addToken(JavaTokenType.J_ELSE, start, 4); - node.getElseStatement().accept(this, p); + node.getElseStatement().accept(this, unused); } addToken(JavaTokenType.J_IF_END, end, 1); return null; } @Override - public Object visitBreak(BreakTree node, Object p) { + public Void visitBreak(BreakTree node, Void unused) { long start = positions.getStartPosition(ast, node); addToken(JavaTokenType.J_BREAK, start, 5); - return super.visitBreak(node, p); + super.visitBreak(node, unused); + return null; } @Override - public Object visitContinue(ContinueTree node, Object p) { + public Void visitContinue(ContinueTree node, Void unused) { long start = positions.getStartPosition(ast, node); addToken(JavaTokenType.J_CONTINUE, start, 8); - return super.visitContinue(node, p); + super.visitContinue(node, unused); + return null; } @Override - public Object visitReturn(ReturnTree node, Object p) { + public Void visitReturn(ReturnTree node, Void unused) { long start = positions.getStartPosition(ast, node); addToken(JavaTokenType.J_RETURN, start, 6); - return super.visitReturn(node, p); + super.visitReturn(node, unused); + return null; } @Override - public Object visitThrow(ThrowTree node, Object p) { + public Void visitThrow(ThrowTree node, Void unused) { long start = positions.getStartPosition(ast, node); addToken(JavaTokenType.J_THROW, start, 5); - return super.visitThrow(node, p); + super.visitThrow(node, unused); + return null; } @Override - public Object visitNewClass(NewClassTree node, Object p) { + public Void visitNewClass(NewClassTree node, Void unused) { long start = positions.getStartPosition(ast, node); if (node.getTypeArguments().size() > 0) { addToken(JavaTokenType.J_GENERIC, start, 3 + node.getIdentifier().toString().length()); } addToken(JavaTokenType.J_NEWCLASS, start, 3); - return super.visitNewClass(node, p); + super.visitNewClass(node, unused); + return null; } @Override - public Object visitTypeParameter(TypeParameterTree node, Object p) { + public Void visitTypeParameter(TypeParameterTree node, Void unused) { long start = positions.getStartPosition(ast, node); // This is odd, but also done like this in Java17 addToken(JavaTokenType.J_GENERIC, start, 1); - return super.visitTypeParameter(node, p); + super.visitTypeParameter(node, unused); + return null; } @Override - public Object visitNewArray(NewArrayTree node, Object arg1) { + public Void visitNewArray(NewArrayTree node, Void unused) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; addToken(JavaTokenType.J_NEWARRAY, start, 3); @@ -328,118 +338,133 @@ public Object visitNewArray(NewArrayTree node, Object arg1) { addToken(JavaTokenType.J_ARRAY_INIT_BEGIN, start, 1); addToken(JavaTokenType.J_ARRAY_INIT_END, end, 1); } - return super.visitNewArray(node, arg1); + super.visitNewArray(node, unused); + return null; } @Override - public Object visitAssignment(AssignmentTree node, Object p) { + public Void visitAssignment(AssignmentTree node, Void unused) { long start = positions.getStartPosition(ast, node); addToken(JavaTokenType.J_ASSIGN, start, 1); - return super.visitAssignment(node, p); + super.visitAssignment(node, unused); + return null; } @Override - public Object visitCompoundAssignment(CompoundAssignmentTree node, Object p) { + public Void visitCompoundAssignment(CompoundAssignmentTree node, Void unused) { long start = positions.getStartPosition(ast, node); addToken(JavaTokenType.J_ASSIGN, start, 1); - return super.visitCompoundAssignment(node, p); + super.visitCompoundAssignment(node, unused); + return null; } @Override - public Object visitUnary(UnaryTree node, Object p) { + public Void visitUnary(UnaryTree node, Void unused) { if (Set.of(Tree.Kind.PREFIX_INCREMENT, Tree.Kind.POSTFIX_INCREMENT, Tree.Kind.PREFIX_DECREMENT, Tree.Kind.POSTFIX_DECREMENT) .contains(node.getKind())) { long start = positions.getStartPosition(ast, node); addToken(JavaTokenType.J_ASSIGN, start, 1); } - return super.visitUnary(node, p); + super.visitUnary(node, unused); + return null; } @Override - public Object visitAssert(AssertTree node, Object p) { + public Void visitAssert(AssertTree node, Void unused) { long start = positions.getStartPosition(ast, node); addToken(JavaTokenType.J_ASSERT, start, 6); - return super.visitAssert(node, p); + super.visitAssert(node, unused); + return null; } @Override - public Object visitVariable(VariableTree node, Object p) { + public Void visitVariable(VariableTree node, Void unused) { long start = positions.getStartPosition(ast, node); addToken(JavaTokenType.J_VARDEF, start, node.toString().length()); - return super.visitVariable(node, p); + super.visitVariable(node, unused); + return null; } @Override - public Object visitConditionalExpression(ConditionalExpressionTree node, Object p) { + public Void visitConditionalExpression(ConditionalExpressionTree node, Void unused) { long start = positions.getStartPosition(ast, node); addToken(JavaTokenType.J_COND, start, 1); - return super.visitConditionalExpression(node, p); + super.visitConditionalExpression(node, unused); + return null; } @Override - public Object visitMethodInvocation(MethodInvocationTree node, Object p) { + public Void visitMethodInvocation(MethodInvocationTree node, Void unused) { long start = positions.getStartPosition(ast, node); addToken(JavaTokenType.J_APPLY, start, positions.getEndPosition(ast, node.getMethodSelect()) - start); - return super.visitMethodInvocation(node, p); + super.visitMethodInvocation(node, unused); + return null; } @Override - public Object visitAnnotation(AnnotationTree node, Object p) { + public Void visitAnnotation(AnnotationTree node, Void unused) { long start = positions.getStartPosition(ast, node); addToken(JavaTokenType.J_ANNO, start, 1); - return super.visitAnnotation(node, p); + super.visitAnnotation(node, unused); + return null; } @Override - public Object visitModule(ModuleTree node, Object p) { + public Void visitModule(ModuleTree node, Void unused) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; addToken(JavaTokenType.J_MODULE_BEGIN, start, 6); - Object result = super.visitModule(node, p); + super.visitModule(node, unused); addToken(JavaTokenType.J_MODULE_END, end, 1); - return result; + return null; } @Override - public Object visitRequires(RequiresTree node, Object p) { + public Void visitRequires(RequiresTree node, Void unused) { long start = positions.getStartPosition(ast, node); addToken(JavaTokenType.J_REQUIRES, start, 8); - return super.visitRequires(node, p); + super.visitRequires(node, unused); + return null; } @Override - public Object visitProvides(ProvidesTree node, Object p) { + public Void visitProvides(ProvidesTree node, Void unused) { long start = positions.getStartPosition(ast, node); addToken(JavaTokenType.J_PROVIDES, start, 8); - return super.visitProvides(node, p); + super.visitProvides(node, unused); + return null; } @Override - public Object visitExports(ExportsTree node, Object p) { + public Void visitExports(ExportsTree node, Void unused) { long start = positions.getStartPosition(ast, node); addToken(JavaTokenType.J_EXPORTS, start, 7); - return super.visitExports(node, p); + super.visitExports(node, unused); + return null; } @Override - public Object visitErroneous(ErroneousTree node, Object p) { + public Void visitErroneous(ErroneousTree node, Void unused) { parsingExceptions.add(new ParsingException(file, "error while visiting %s".formatted(node))); - return super.visitErroneous(node, p); + super.visitErroneous(node, unused); + return null; } @Override - public Object visitYield(YieldTree node, Object p) { + public Void visitYield(YieldTree node, Void unused) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node); addToken(JavaTokenType.J_YIELD, start, end); - return super.visitYield(node, p); + super.visitYield(node, unused); + return null; } @Override - public Object visitDefaultCaseLabel(DefaultCaseLabelTree node, Object p) { + public Void visitDefaultCaseLabel(DefaultCaseLabelTree node, Void unused) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node); addToken(JavaTokenType.J_DEFAULT, start, end); - return super.visitDefaultCaseLabel(node, p); + super.visitDefaultCaseLabel(node, unused); + return null; } } From 2d9b8230e6c75c36871206222964104b0b590632 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Wed, 7 Dec 2022 14:06:34 +0100 Subject: [PATCH 004/132] Generate unique variable ids --- .../src/main/java/de/jplag/java/Parser.java | 5 - .../java/TokenGeneratingTreeScanner.java | 103 ++++++++++++++++-- 2 files changed, 91 insertions(+), 17 deletions(-) diff --git a/languages/java/src/main/java/de/jplag/java/Parser.java b/languages/java/src/main/java/de/jplag/java/Parser.java index ffeaa4802..816ce3203 100644 --- a/languages/java/src/main/java/de/jplag/java/Parser.java +++ b/languages/java/src/main/java/de/jplag/java/Parser.java @@ -8,7 +8,6 @@ import de.jplag.AbstractParser; import de.jplag.ParsingException; import de.jplag.Token; -import de.jplag.TokenType; public class Parser extends AbstractParser { private List tokens; @@ -26,10 +25,6 @@ public List parse(Set files) throws ParsingException { return tokens; } - public void add(TokenType type, File file, long line, long column, long length) { - add(new Token(type, file, (int) line, (int) column, (int) length)); - } - public void add(Token token) { tokens.add(token); } diff --git a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java index 9d917613a..7bb65c17e 100644 --- a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java +++ b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java @@ -2,10 +2,16 @@ import java.io.File; import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; +import java.util.Stack; import de.jplag.ParsingException; +import de.jplag.Token; +import de.jplag.TokenType; import com.sun.source.tree.AnnotationTree; import com.sun.source.tree.AssertTree; @@ -58,6 +64,11 @@ final class TokenGeneratingTreeScanner extends TreeScanner { private final SourcePositions positions; private final CompilationUnitTree ast; + private int variableCount; + private Map memberVariables; // map member variable's name to id + private Map> localVariables; // map local variable's name to id + private Stack> scopeVariables; // stack of local variable names in scope + private List parsingExceptions = new ArrayList<>(); public TokenGeneratingTreeScanner(File file, Parser parser, LineMap map, SourcePositions positions, CompilationUnitTree ast) { @@ -66,12 +77,20 @@ public TokenGeneratingTreeScanner(File file, Parser parser, LineMap map, SourceP this.map = map; this.positions = positions; this.ast = ast; + this.variableCount = 0; + this.memberVariables = new HashMap<>(); + this.localVariables = new HashMap<>(); + this.scopeVariables = new Stack<>(); } public List getParsingExceptions() { return parsingExceptions; } + public void addToken(TokenType type, File file, long line, long column, long length) { + parser.add(new Token(type, file, (int) line, (int) column, (int) length)); + } + /** * Convenience method that adds a specific token. * @param tokenType is the type of the token. @@ -79,7 +98,7 @@ public List getParsingExceptions() { * @param length is the length of the token. */ private void addToken(JavaTokenType tokenType, long position, int length) { - parser.add(tokenType, file, map.getLineNumber(position), map.getColumnNumber(position), length); + addToken(tokenType, file, map.getLineNumber(position), map.getColumnNumber(position), length); } /** @@ -89,21 +108,65 @@ private void addToken(JavaTokenType tokenType, long position, int length) { * @param end is the end position of the token for the calculation of the length. */ private void addToken(JavaTokenType tokenType, long start, long end) { - parser.add(tokenType, file, map.getLineNumber(start), map.getColumnNumber(start), (end - start)); + addToken(tokenType, file, map.getLineNumber(start), map.getColumnNumber(start), (end - start)); + } + + private String variableId() { + return Integer.toString(variableCount++); + } + + private String getVariableId(String variableName) { + Stack variableIdStack = localVariables.getOrDefault(variableName, null); + if (variableIdStack != null) + return variableIdStack.peek(); + return memberVariables.getOrDefault(variableName, null); + } + + public void enterLocalScope() { + scopeVariables.add(new HashSet<>()); + } + + public void exitLocalScope() { + for (String variableName : scopeVariables.pop()) { + Stack variableIdStack = localVariables.get(variableName); + variableIdStack.pop(); + if (variableIdStack.isEmpty()) + localVariables.remove(variableName); + } } @Override public Void visitBlock(BlockTree node, Void unused) { + // classes are an obvious exception since members are treated differently + Set classKinds = Set.of(Tree.Kind.ENUM, Tree.Kind.INTERFACE, Tree.Kind.RECORD, Tree.Kind.ANNOTATION_TYPE, Tree.Kind.CLASS); + boolean isClass = classKinds.contains(node.getKind()); + // for loops are also an exception since a scope can be induced without a block visit (without brackets) + boolean isForLoop = Set.of(Tree.Kind.FOR_LOOP, Tree.Kind.ENHANCED_FOR_LOOP).contains(node.getKind()); + // methods and catches are also an exception since variables can be declared before the block begins + if (!(isClass || isForLoop || Set.of(Tree.Kind.METHOD, Tree.Kind.CATCH).contains(node.getKind()))) { + enterLocalScope(); + } long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; addToken(JavaTokenType.J_INIT_BEGIN, start, 1); super.visitBlock(node, unused); addToken(JavaTokenType.J_INIT_END, end, 1); + if (!(isClass || isForLoop)) { + exitLocalScope(); + } return null; } @Override public Void visitClass(ClassTree node, Void unused) { + for (var member : node.getMembers()) { + if (member.getKind() == Tree.Kind.VARIABLE) { + String variableName = ((VariableTree) member).getName().toString(); + String variableId = variableId(); + System.out.println("new member variable " + variableName + " with id " + variableId); + memberVariables.put(variableName, variableId); + } + } long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; @@ -119,17 +182,18 @@ public Void visitClass(ClassTree node, Void unused) { addToken(JavaTokenType.J_CLASS_BEGIN, start, 5); } super.visitClass(node, unused); - if (node.getKind() == Tree.Kind.ENUM) { - addToken(JavaTokenType.J_ENUM_END, end, 1); - } else if (node.getKind() == Tree.Kind.INTERFACE) { - addToken(JavaTokenType.J_INTERFACE_END, end, 1); - } else if (node.getKind() == Tree.Kind.RECORD) { - addToken(JavaTokenType.J_RECORD_END, end, 1); - } else if (node.getKind() == Tree.Kind.ANNOTATION_TYPE) { - addToken(JavaTokenType.J_ANNO_T_END, end, 1); - } else if (node.getKind() == Tree.Kind.CLASS) { - addToken(JavaTokenType.J_CLASS_END, end, 1); + JavaTokenType tokenType = switch (node.getKind()) { + case ENUM -> JavaTokenType.J_ENUM_END; + case INTERFACE -> JavaTokenType.J_INTERFACE_END; + case RECORD -> JavaTokenType.J_RECORD_END; + case ANNOTATION_TYPE -> JavaTokenType.J_ANNO_T_END; + case CLASS -> JavaTokenType.J_CLASS_END; + default -> null; + }; + if (tokenType != null) { + addToken(tokenType, end, 1); } + memberVariables.clear(); return null; } @@ -151,6 +215,7 @@ public Void visitPackage(PackageTree node, Void unused) { @Override public Void visitMethod(MethodTree node, Void unused) { + enterLocalScope(); long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; addToken(JavaTokenType.J_METHOD_BEGIN, start, node.getName().length()); @@ -191,21 +256,25 @@ public Void visitWhileLoop(WhileLoopTree node, Void unused) { @Override public Void visitForLoop(ForLoopTree node, Void unused) { + enterLocalScope(); long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; addToken(JavaTokenType.J_FOR_BEGIN, start, 3); super.visitForLoop(node, unused); addToken(JavaTokenType.J_FOR_END, end, 1); + exitLocalScope(); return null; } @Override public Void visitEnhancedForLoop(EnhancedForLoopTree node, Void unused) { + enterLocalScope(); long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; addToken(JavaTokenType.J_FOR_BEGIN, start, 3); super.visitEnhancedForLoop(node, unused); addToken(JavaTokenType.J_FOR_END, end, 1); + exitLocalScope(); return null; } @@ -252,6 +321,7 @@ public Void visitTry(TryTree node, Void unused) { @Override public Void visitCatch(CatchTree node, Void unused) { + enterLocalScope(); long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; addToken(JavaTokenType.J_CATCH_BEGIN, start, 5); @@ -379,6 +449,15 @@ public Void visitAssert(AssertTree node, Void unused) { @Override public Void visitVariable(VariableTree node, Void unused) { + if (!scopeVariables.isEmpty()) { // local scope + String variableName = node.getName().toString(); + String variableId = variableId(); + System.out.println("new local variable " + variableName + " with id " + variableId); + if (!localVariables.containsKey(variableName)) + localVariables.put(variableName, new Stack<>()); + localVariables.get(variableName).push(variableId); + scopeVariables.peek().add(variableName); + } long start = positions.getStartPosition(ast, node); addToken(JavaTokenType.J_VARDEF, start, node.toString().length()); super.visitVariable(node, unused); From 624acea29ab094e6f77957c1e08d48f8664c94ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Sat, 10 Dec 2022 22:42:34 +0100 Subject: [PATCH 005/132] Register variable reads --- .../java/TokenGeneratingTreeScanner.java | 68 +++++++++++++++++-- 1 file changed, 61 insertions(+), 7 deletions(-) diff --git a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java index 7bb65c17e..e85a6138b 100644 --- a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java +++ b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java @@ -30,10 +30,13 @@ import com.sun.source.tree.EnhancedForLoopTree; import com.sun.source.tree.ErroneousTree; import com.sun.source.tree.ExportsTree; +import com.sun.source.tree.ExpressionTree; import com.sun.source.tree.ForLoopTree; +import com.sun.source.tree.IdentifierTree; import com.sun.source.tree.IfTree; import com.sun.source.tree.ImportTree; import com.sun.source.tree.LineMap; +import com.sun.source.tree.MemberSelectTree; import com.sun.source.tree.MethodInvocationTree; import com.sun.source.tree.MethodTree; import com.sun.source.tree.ModuleTree; @@ -68,6 +71,7 @@ final class TokenGeneratingTreeScanner extends TreeScanner { private Map memberVariables; // map member variable's name to id private Map> localVariables; // map local variable's name to id private Stack> scopeVariables; // stack of local variable names in scope + private boolean ignoreNextIdentifier; private List parsingExceptions = new ArrayList<>(); @@ -81,6 +85,7 @@ public TokenGeneratingTreeScanner(File file, Parser parser, LineMap map, SourceP this.memberVariables = new HashMap<>(); this.localVariables = new HashMap<>(); this.scopeVariables = new Stack<>(); + this.ignoreNextIdentifier = false; } public List getParsingExceptions() { @@ -115,11 +120,34 @@ private String variableId() { return Integer.toString(variableCount++); } + private String getMemberVariableId(String variableName) { + return memberVariables.getOrDefault(variableName, null); + } + private String getVariableId(String variableName) { Stack variableIdStack = localVariables.getOrDefault(variableName, null); - if (variableIdStack != null) + if (variableIdStack != null) { return variableIdStack.peek(); - return memberVariables.getOrDefault(variableName, null); + } + return getMemberVariableId(variableName); + } + + private boolean isOwnMemberSelect(MemberSelectTree memberSelect) { + return memberSelect.getExpression().toString().equals("this"); + } + + private void registerVariableWrite(String variableId) { + if (variableId != null && !ignoreNextIdentifier) { + System.out.println("write variable with id " + variableId); + } + ignoreNextIdentifier = true; // next identifier (this one) has already been accounted for + } + + private void registerVariableRead(String variableId) { + if (variableId != null && !ignoreNextIdentifier) { + System.out.println("read variable with id " + variableId); + } + ignoreNextIdentifier = false; // next identifier is this one so... } public void enterLocalScope() { @@ -335,12 +363,12 @@ public Void visitIf(IfTree node, Void unused) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; addToken(JavaTokenType.J_IF_BEGIN, start, 2); - node.getCondition().accept(this, unused); - node.getThenStatement().accept(this, unused); + scan(node.getCondition(), unused); + scan(node.getThenStatement(), unused); if (node.getElseStatement() != null) { start = positions.getStartPosition(ast, node.getElseStatement()); addToken(JavaTokenType.J_ELSE, start, 4); - node.getElseStatement().accept(this, unused); + scan(node.getElseStatement(), unused); } addToken(JavaTokenType.J_IF_END, end, 1); return null; @@ -453,8 +481,9 @@ public Void visitVariable(VariableTree node, Void unused) { String variableName = node.getName().toString(); String variableId = variableId(); System.out.println("new local variable " + variableName + " with id " + variableId); - if (!localVariables.containsKey(variableName)) + if (!localVariables.containsKey(variableName)) { localVariables.put(variableName, new Stack<>()); + } localVariables.get(variableName).push(variableId); scopeVariables.peek().add(variableName); } @@ -476,7 +505,16 @@ public Void visitConditionalExpression(ConditionalExpressionTree node, Void unus public Void visitMethodInvocation(MethodInvocationTree node, Void unused) { long start = positions.getStartPosition(ast, node); addToken(JavaTokenType.J_APPLY, start, positions.getEndPosition(ast, node.getMethodSelect()) - start); - super.visitMethodInvocation(node, unused); + // from super method, would need to be changed if return value were to be used + scan(node.getTypeArguments(), unused); + ExpressionTree methodSelect = node.getMethodSelect(); + if (methodSelect.getKind() == Tree.Kind.IDENTIFIER + || (methodSelect.getKind() == Tree.Kind.MEMBER_SELECT && isOwnMemberSelect((MemberSelectTree) methodSelect))) { + // todo is the next identifier always a method identifier? + ignoreNextIdentifier = true; // to differentiate bar() and this.bar() (ignore) from bar.foo() (don't ignore) + } + scan(node.getMethodSelect(), unused); + scan(node.getArguments(), unused); return null; } @@ -546,4 +584,20 @@ public Void visitDefaultCaseLabel(DefaultCaseLabelTree node, Void unused) { super.visitDefaultCaseLabel(node, unused); return null; } + + @Override + public Void visitMemberSelect(MemberSelectTree node, Void unused) { + if (isOwnMemberSelect(node)) { + registerVariableRead(getMemberVariableId(node.getIdentifier().toString())); // getIdentifier returns a Name, not an identifier :)) + } + super.visitMemberSelect(node, unused); + return null; + } + + @Override + public Void visitIdentifier(IdentifierTree node, Void unused) { + registerVariableRead(getVariableId(node.getName().toString())); + super.visitIdentifier(node, unused); + return null; + } } From 4ab3ae5a1832aed6ce2ab59e7d3406f936c23d21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Sat, 17 Dec 2022 13:09:03 +0100 Subject: [PATCH 006/132] Register variable writes --- .../java/TokenGeneratingTreeScanner.java | 89 ++++++++++++------- 1 file changed, 56 insertions(+), 33 deletions(-) diff --git a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java index e85a6138b..4085de3a2 100644 --- a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java +++ b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java @@ -60,6 +60,8 @@ import com.sun.source.util.SourcePositions; import com.sun.source.util.TreeScanner; +import javax.lang.model.element.Name; + final class TokenGeneratingTreeScanner extends TreeScanner { private final File file; private final Parser parser; @@ -67,14 +69,22 @@ final class TokenGeneratingTreeScanner extends TreeScanner { private final SourcePositions positions; private final CompilationUnitTree ast; - private int variableCount; - private Map memberVariables; // map member variable's name to id - private Map> localVariables; // map local variable's name to id - private Stack> scopeVariables; // stack of local variable names in scope - private boolean ignoreNextIdentifier; - private List parsingExceptions = new ArrayList<>(); + private int variableCount; + private Map memberVariables; // map member variable name to id + private Map> localVariables; // map local variable name to id + private Map variableNames; // map variable id to name for debugging purposes, inverse of two maps above + private Stack> scopeVariables; // stack of local variable names in scope + private NextOperation nextOperation; + + enum NextOperation { + NONE, + READ, + WRITE, + READ_WRITE + } + public TokenGeneratingTreeScanner(File file, Parser parser, LineMap map, SourcePositions positions, CompilationUnitTree ast) { this.file = file; this.parser = parser; @@ -84,8 +94,9 @@ public TokenGeneratingTreeScanner(File file, Parser parser, LineMap map, SourceP this.variableCount = 0; this.memberVariables = new HashMap<>(); this.localVariables = new HashMap<>(); + this.variableNames = new HashMap<>(); this.scopeVariables = new Stack<>(); - this.ignoreNextIdentifier = false; + this.nextOperation = NextOperation.READ; // the default } public List getParsingExceptions() { @@ -120,11 +131,11 @@ private String variableId() { return Integer.toString(variableCount++); } - private String getMemberVariableId(String variableName) { + private String getMemberVariableId(Name variableName) { return memberVariables.getOrDefault(variableName, null); } - private String getVariableId(String variableName) { + private String getVariableId(Name variableName) { Stack variableIdStack = localVariables.getOrDefault(variableName, null); if (variableIdStack != null) { return variableIdStack.peek(); @@ -132,22 +143,29 @@ private String getVariableId(String variableName) { return getMemberVariableId(variableName); } + private boolean isVariable(ExpressionTree expressionTree) { + return expressionTree.getKind() == Tree.Kind.IDENTIFIER + || (expressionTree.getKind() == Tree.Kind.MEMBER_SELECT && isOwnMemberSelect((MemberSelectTree) expressionTree)); + } + private boolean isOwnMemberSelect(MemberSelectTree memberSelect) { return memberSelect.getExpression().toString().equals("this"); } - private void registerVariableWrite(String variableId) { - if (variableId != null && !ignoreNextIdentifier) { - System.out.println("write variable with id " + variableId); - } - ignoreNextIdentifier = true; // next identifier (this one) has already been accounted for + private String formatVariable(String variableId) { + return variableNames.get(variableId) + " [" + variableId + "]"; } - private void registerVariableRead(String variableId) { - if (variableId != null && !ignoreNextIdentifier) { - System.out.println("read variable with id " + variableId); + private void registerVariable(String variableId) { + if (variableId != null) { + if (Set.of(NextOperation.WRITE, NextOperation.READ_WRITE).contains(nextOperation)) { + System.out.println("write " + formatVariable(variableId)); + } + if (Set.of(NextOperation.READ, NextOperation.READ_WRITE).contains(nextOperation)) { + System.out.println("read " + formatVariable(variableId)); + } } - ignoreNextIdentifier = false; // next identifier is this one so... + nextOperation = NextOperation.READ; } public void enterLocalScope() { @@ -155,7 +173,7 @@ public void enterLocalScope() { } public void exitLocalScope() { - for (String variableName : scopeVariables.pop()) { + for (Name variableName : scopeVariables.pop()) { Stack variableIdStack = localVariables.get(variableName); variableIdStack.pop(); if (variableIdStack.isEmpty()) @@ -189,10 +207,11 @@ public Void visitBlock(BlockTree node, Void unused) { public Void visitClass(ClassTree node, Void unused) { for (var member : node.getMembers()) { if (member.getKind() == Tree.Kind.VARIABLE) { - String variableName = ((VariableTree) member).getName().toString(); + Name variableName = ((VariableTree) member).getName(); String variableId = variableId(); - System.out.println("new member variable " + variableName + " with id " + variableId); + // System.out.println("new member " + formatVariable(variableId)); memberVariables.put(variableName, variableId); + variableNames.put(variableId, variableName); } } long start = positions.getStartPosition(ast, node); @@ -444,6 +463,7 @@ public Void visitNewArray(NewArrayTree node, Void unused) { public Void visitAssignment(AssignmentTree node, Void unused) { long start = positions.getStartPosition(ast, node); addToken(JavaTokenType.J_ASSIGN, start, 1); + nextOperation = NextOperation.WRITE; super.visitAssignment(node, unused); return null; } @@ -452,6 +472,7 @@ public Void visitAssignment(AssignmentTree node, Void unused) { public Void visitCompoundAssignment(CompoundAssignmentTree node, Void unused) { long start = positions.getStartPosition(ast, node); addToken(JavaTokenType.J_ASSIGN, start, 1); + nextOperation = NextOperation.READ_WRITE; super.visitCompoundAssignment(node, unused); return null; } @@ -462,6 +483,7 @@ public Void visitUnary(UnaryTree node, Void unused) { .contains(node.getKind())) { long start = positions.getStartPosition(ast, node); addToken(JavaTokenType.J_ASSIGN, start, 1); + nextOperation = NextOperation.READ_WRITE; } super.visitUnary(node, unused); return null; @@ -477,15 +499,18 @@ public Void visitAssert(AssertTree node, Void unused) { @Override public Void visitVariable(VariableTree node, Void unused) { + nextOperation = NextOperation.WRITE; if (!scopeVariables.isEmpty()) { // local scope - String variableName = node.getName().toString(); + Name variableName = node.getName(); String variableId = variableId(); - System.out.println("new local variable " + variableName + " with id " + variableId); - if (!localVariables.containsKey(variableName)) { - localVariables.put(variableName, new Stack<>()); - } + // System.out.println("new local " + formatVariable(variableId)); + localVariables.putIfAbsent(variableName, new Stack<>()); localVariables.get(variableName).push(variableId); + variableNames.put(variableId, variableName); scopeVariables.peek().add(variableName); + registerVariable(variableId); // somewhat special case, identifier isn't visited + } else { + registerVariable(getMemberVariableId(node.getName())); } long start = positions.getStartPosition(ast, node); addToken(JavaTokenType.J_VARDEF, start, node.toString().length()); @@ -507,11 +532,9 @@ public Void visitMethodInvocation(MethodInvocationTree node, Void unused) { addToken(JavaTokenType.J_APPLY, start, positions.getEndPosition(ast, node.getMethodSelect()) - start); // from super method, would need to be changed if return value were to be used scan(node.getTypeArguments(), unused); - ExpressionTree methodSelect = node.getMethodSelect(); - if (methodSelect.getKind() == Tree.Kind.IDENTIFIER - || (methodSelect.getKind() == Tree.Kind.MEMBER_SELECT && isOwnMemberSelect((MemberSelectTree) methodSelect))) { - // todo is the next identifier always a method identifier? - ignoreNextIdentifier = true; // to differentiate bar() and this.bar() (ignore) from bar.foo() (don't ignore) + // to differentiate bar() and this.bar() (ignore) from bar.foo() (don't ignore) + if (isVariable(node.getMethodSelect())) { + nextOperation = NextOperation.NONE; } scan(node.getMethodSelect(), unused); scan(node.getArguments(), unused); @@ -588,7 +611,7 @@ public Void visitDefaultCaseLabel(DefaultCaseLabelTree node, Void unused) { @Override public Void visitMemberSelect(MemberSelectTree node, Void unused) { if (isOwnMemberSelect(node)) { - registerVariableRead(getMemberVariableId(node.getIdentifier().toString())); // getIdentifier returns a Name, not an identifier :)) + registerVariable(getMemberVariableId(node.getIdentifier())); } super.visitMemberSelect(node, unused); return null; @@ -596,7 +619,7 @@ public Void visitMemberSelect(MemberSelectTree node, Void unused) { @Override public Void visitIdentifier(IdentifierTree node, Void unused) { - registerVariableRead(getVariableId(node.getName().toString())); + registerVariable(getVariableId(node.getName())); super.visitIdentifier(node, unused); return null; } From 82de37e52c6da37d16de3a46b82e6bbfb0f9d252 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Sat, 17 Dec 2022 14:45:45 +0100 Subject: [PATCH 007/132] Register additional variables writes for mutables --- .../java/TokenGeneratingTreeScanner.java | 29 ++++++++++++++++--- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java index 4085de3a2..fc5160642 100644 --- a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java +++ b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java @@ -75,9 +75,18 @@ final class TokenGeneratingTreeScanner extends TreeScanner { private Map memberVariables; // map member variable name to id private Map> localVariables; // map local variable name to id private Map variableNames; // map variable id to name for debugging purposes, inverse of two maps above + private Map variableIsMutable; // map variable id to whether it is immutable private Stack> scopeVariables; // stack of local variable names in scope private NextOperation nextOperation; - + private boolean mutableWrite; + + private static final Set IMMUTABLES = Set.of( + // from https://medium.com/@bpnorlander/java-understanding-primitive-types-and-wrapper-objects-a6798fb2afe9 + "byte", "short", "int", "long", "float", "double", "boolean", "char", + "Byte", "Short", "Integer", "Long", "Float", "Double", "Boolean", "Character", + "String" + ); + enum NextOperation { NONE, READ, @@ -95,8 +104,10 @@ public TokenGeneratingTreeScanner(File file, Parser parser, LineMap map, SourceP this.memberVariables = new HashMap<>(); this.localVariables = new HashMap<>(); this.variableNames = new HashMap<>(); + this.variableIsMutable = new HashMap<>(); this.scopeVariables = new Stack<>(); this.nextOperation = NextOperation.READ; // the default + this.mutableWrite = false; } public List getParsingExceptions() { @@ -156,9 +167,14 @@ private String formatVariable(String variableId) { return variableNames.get(variableId) + " [" + variableId + "]"; } + private boolean isMutable(Tree classTree) { + return classTree != null && !IMMUTABLES.contains(classTree); + } + private void registerVariable(String variableId) { if (variableId != null) { - if (Set.of(NextOperation.WRITE, NextOperation.READ_WRITE).contains(nextOperation)) { + if (Set.of(NextOperation.WRITE, NextOperation.READ_WRITE).contains(nextOperation) + || mutableWrite && variableIsMutable.get(variableId)) { System.out.println("write " + formatVariable(variableId)); } if (Set.of(NextOperation.READ, NextOperation.READ_WRITE).contains(nextOperation)) { @@ -207,11 +223,13 @@ public Void visitBlock(BlockTree node, Void unused) { public Void visitClass(ClassTree node, Void unused) { for (var member : node.getMembers()) { if (member.getKind() == Tree.Kind.VARIABLE) { - Name variableName = ((VariableTree) member).getName(); + VariableTree variable = (VariableTree) member; + Name variableName = variable.getName(); String variableId = variableId(); // System.out.println("new member " + formatVariable(variableId)); memberVariables.put(variableName, variableId); variableNames.put(variableId, variableName); + variableIsMutable.put(variableId, isMutable(variable.getType())); } } long start = positions.getStartPosition(ast, node); @@ -508,6 +526,7 @@ public Void visitVariable(VariableTree node, Void unused) { localVariables.get(variableName).push(variableId); variableNames.put(variableId, variableName); scopeVariables.peek().add(variableName); + variableIsMutable.put(variableId, isMutable(node.getType())); registerVariable(variableId); // somewhat special case, identifier isn't visited } else { registerVariable(getMemberVariableId(node.getName())); @@ -532,12 +551,14 @@ public Void visitMethodInvocation(MethodInvocationTree node, Void unused) { addToken(JavaTokenType.J_APPLY, start, positions.getEndPosition(ast, node.getMethodSelect()) - start); // from super method, would need to be changed if return value were to be used scan(node.getTypeArguments(), unused); - // to differentiate bar() and this.bar() (ignore) from bar.foo() (don't ignore) + // to differentiate bar() and this.bar() (ignore) from bar.foo() (don't ignore), different namespace for variables and methods if (isVariable(node.getMethodSelect())) { nextOperation = NextOperation.NONE; } + mutableWrite = true; // when mentioned here, mutable variables can be written to scan(node.getMethodSelect(), unused); scan(node.getArguments(), unused); + mutableWrite = false; return null; } From 6e92ceb8aa50e82f2972553de463fe222861288e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Sat, 17 Dec 2022 23:17:55 +0100 Subject: [PATCH 008/132] Add SemanticToken class --- .../de/jplag/semantics/SemanticToken.java | 38 ++++++++++++++ .../de/jplag/semantics/TokenSemantics.java | 38 ++++++++++++++ .../semantics/TokenSemanticsBuilder.java | 50 +++++++++++++++++++ 3 files changed, 126 insertions(+) create mode 100644 language-api/src/main/java/de/jplag/semantics/SemanticToken.java create mode 100644 language-api/src/main/java/de/jplag/semantics/TokenSemantics.java create mode 100644 language-api/src/main/java/de/jplag/semantics/TokenSemanticsBuilder.java diff --git a/language-api/src/main/java/de/jplag/semantics/SemanticToken.java b/language-api/src/main/java/de/jplag/semantics/SemanticToken.java new file mode 100644 index 000000000..c3c4e6823 --- /dev/null +++ b/language-api/src/main/java/de/jplag/semantics/SemanticToken.java @@ -0,0 +1,38 @@ +package de.jplag.semantics; + +import java.io.File; + +import de.jplag.SharedTokenType; +import de.jplag.Token; +import de.jplag.TokenType; + +public class SemanticToken extends Token { + + private TokenSemantics semantics; + + /** + * @return a record containing semantic information about the token. + */ + public TokenSemantics semantics() { + return semantics; + } + + /** + * Creates a token with column, length and semantic information. + * @param type is the token type. + * @param file is the name of the source code file. + * @param line is the line index in the source code where the token resides. Index is 1-based. + * @param column is the column index, meaning where the token starts in the line. Index is 1-based. + * @param length is the length of the token in the source code. + * @param semantics is a record containing semantic information about the token. + */ + public SemanticToken(TokenType type, File file, int line, int column, int length, TokenSemantics semantics) { + super(type, file, line, column, length); + this.semantics = semantics; + } + + public static SemanticToken fileEnd(File file) { + TokenSemantics semantics = new TokenSemanticsBuilder().control().critical().build(); // todo + return new SemanticToken(SharedTokenType.FILE_END, file, NO_VALUE, NO_VALUE, NO_VALUE, semantics); + } +} diff --git a/language-api/src/main/java/de/jplag/semantics/TokenSemantics.java b/language-api/src/main/java/de/jplag/semantics/TokenSemantics.java new file mode 100644 index 000000000..074ad0450 --- /dev/null +++ b/language-api/src/main/java/de/jplag/semantics/TokenSemantics.java @@ -0,0 +1,38 @@ +package de.jplag.semantics; + +import java.util.Collections; +import java.util.Set; + +/** + * This record contains semantic information about the token. + * @param critical Whether the token is critical, e.g. whether it (potentially) has any non-local effects. + * @param control Whether the token controls the program flow. + * @param loopBegin Whether the token marks the beginning of a loop. + * @param loopEnd Whether the token marks the end of a loop + * @param writes A set of the variable names which were (potentially) written to in this token. + * @param reads A set of the variable names which were (potentially) read from in this token. + */ +public record TokenSemantics(boolean critical, boolean control, boolean loopBegin, boolean loopEnd, Set writes, Set reads) { + + public void addWrite(String write) { + writes.add(write); + } + + public void addRead(String read) { + reads.add(read); + } + + /** + * @return an unmodifiable set of the variable names which were (potentially) written to in this token. + */ + public Set writes() { + return Collections.unmodifiableSet(writes); + } + + /** + * @return an unmodifiable set of the variable names which were (potentially) read from in this token. + */ + public Set reads() { + return Collections.unmodifiableSet(reads); + } +} \ No newline at end of file diff --git a/language-api/src/main/java/de/jplag/semantics/TokenSemanticsBuilder.java b/language-api/src/main/java/de/jplag/semantics/TokenSemanticsBuilder.java new file mode 100644 index 000000000..8325dd542 --- /dev/null +++ b/language-api/src/main/java/de/jplag/semantics/TokenSemanticsBuilder.java @@ -0,0 +1,50 @@ +package de.jplag.semantics; + +import java.util.HashSet; +import java.util.Set; + +/** + * A builder class for the TokenSemantics record. + */ +public class TokenSemanticsBuilder { + private boolean critical; + private boolean control; + private boolean loopBegin; + private boolean loopEnd; + private Set writes; + private Set reads; + + public TokenSemanticsBuilder() { + this.writes = new HashSet<>(); + this.reads = new HashSet<>(); + } + + public TokenSemantics build() { + if (loopBegin && loopEnd) { + throw new IllegalStateException("Token can't mark both the beginning and end of a loop"); + } + return new TokenSemantics(critical, control, loopBegin, loopEnd, writes, reads); + } + + public TokenSemanticsBuilder critical() { + this.critical = true; + return this; + } + + public TokenSemanticsBuilder control() { + this.control = true; + return this; + } + + public TokenSemanticsBuilder loopBegin() { + this.loopBegin = true; + this.control = true; + return this; + } + + public TokenSemanticsBuilder loopEnd() { + this.loopEnd = true; + this.control = true; + return this; + } +} From 49b875aaafc8c935f23c4ab6aca79e3b77f9aa9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Sat, 17 Dec 2022 23:22:56 +0100 Subject: [PATCH 009/132] Generate semantic tokens for Java --- .../main/java/de/jplag/java/JavacAdapter.java | 4 +- .../src/main/java/de/jplag/java/Language.java | 2 +- .../src/main/java/de/jplag/java/Parser.java | 8 +- .../java/TokenGeneratingTreeScanner.java | 488 +++++++++++------- 4 files changed, 299 insertions(+), 203 deletions(-) diff --git a/languages/java/src/main/java/de/jplag/java/JavacAdapter.java b/languages/java/src/main/java/de/jplag/java/JavacAdapter.java index bd6972b87..bd7c4954c 100644 --- a/languages/java/src/main/java/de/jplag/java/JavacAdapter.java +++ b/languages/java/src/main/java/de/jplag/java/JavacAdapter.java @@ -19,7 +19,7 @@ import org.slf4j.Logger; import de.jplag.ParsingException; -import de.jplag.Token; +import de.jplag.semantics.SemanticToken; import com.sun.source.tree.CompilationUnitTree; import com.sun.source.tree.LineMap; @@ -49,7 +49,7 @@ public void parseFiles(Set files, final Parser parser) throws ParsingExcep var scanner = new TokenGeneratingTreeScanner(file, parser, map, positions, ast); ast.accept(scanner, null); parsingExceptions.addAll(scanner.getParsingExceptions()); - parser.add(Token.fileEnd(file)); + parser.add(SemanticToken.fileEnd(file)); } } catch (IOException exception) { throw new ParsingException(null, exception.getMessage(), exception); diff --git a/languages/java/src/main/java/de/jplag/java/Language.java b/languages/java/src/main/java/de/jplag/java/Language.java index fd0d2a577..f3b13589b 100644 --- a/languages/java/src/main/java/de/jplag/java/Language.java +++ b/languages/java/src/main/java/de/jplag/java/Language.java @@ -44,6 +44,6 @@ public int minimumTokenMatch() { @Override public List parse(Set files) throws ParsingException { - return this.parser.parse(files); + return this.parser.parse(files).stream().map(Token.class::cast).toList(); // todo } } diff --git a/languages/java/src/main/java/de/jplag/java/Parser.java b/languages/java/src/main/java/de/jplag/java/Parser.java index 816ce3203..1e623116d 100644 --- a/languages/java/src/main/java/de/jplag/java/Parser.java +++ b/languages/java/src/main/java/de/jplag/java/Parser.java @@ -7,10 +7,10 @@ import de.jplag.AbstractParser; import de.jplag.ParsingException; -import de.jplag.Token; +import de.jplag.semantics.SemanticToken; public class Parser extends AbstractParser { - private List tokens; + private List tokens; /** * Creates the parser. @@ -19,13 +19,13 @@ public Parser() { super(); } - public List parse(Set files) throws ParsingException { + public List parse(Set files) throws ParsingException { tokens = new ArrayList<>(); new JavacAdapter().parseFiles(files, this); return tokens; } - public void add(Token token) { + public void add(SemanticToken token) { tokens.add(token); } } diff --git a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java index fc5160642..19e8c499c 100644 --- a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java +++ b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java @@ -8,10 +8,15 @@ import java.util.Map; import java.util.Set; import java.util.Stack; +import java.util.function.Function; + +import javax.lang.model.element.Name; import de.jplag.ParsingException; -import de.jplag.Token; import de.jplag.TokenType; +import de.jplag.semantics.SemanticToken; +import de.jplag.semantics.TokenSemantics; +import de.jplag.semantics.TokenSemanticsBuilder; import com.sun.source.tree.AnnotationTree; import com.sun.source.tree.AssertTree; @@ -60,9 +65,7 @@ import com.sun.source.util.SourcePositions; import com.sun.source.util.TreeScanner; -import javax.lang.model.element.Name; - -final class TokenGeneratingTreeScanner extends TreeScanner { +final class TokenGeneratingTreeScanner extends TreeScanner { private final File file; private final Parser parser; private final LineMap map; @@ -72,20 +75,19 @@ final class TokenGeneratingTreeScanner extends TreeScanner { private List parsingExceptions = new ArrayList<>(); private int variableCount; - private Map memberVariables; // map member variable name to id - private Map> localVariables; // map local variable name to id - private Map variableNames; // map variable id to name for debugging purposes, inverse of two maps above + private Map memberVariableIds; // map member variable name to id + private Map> localVariableIdMap; // map local variable name to id + private Set localVariables; + private Map variableNameMap; // map variable id to name for debugging purposes, inverse of two maps above private Map variableIsMutable; // map variable id to whether it is immutable private Stack> scopeVariables; // stack of local variable names in scope private NextOperation nextOperation; private boolean mutableWrite; private static final Set IMMUTABLES = Set.of( - // from https://medium.com/@bpnorlander/java-understanding-primitive-types-and-wrapper-objects-a6798fb2afe9 - "byte", "short", "int", "long", "float", "double", "boolean", "char", - "Byte", "Short", "Integer", "Long", "Float", "Double", "Boolean", "Character", - "String" - ); + // from https://medium.com/@bpnorlander/java-understanding-primitive-types-and-wrapper-objects-a6798fb2afe9 + "byte", "short", "int", "long", "float", "double", "boolean", "char", "Byte", "Short", "Integer", "Long", "Float", "Double", "Boolean", + "Character", "String"); enum NextOperation { NONE, @@ -101,9 +103,10 @@ public TokenGeneratingTreeScanner(File file, Parser parser, LineMap map, SourceP this.positions = positions; this.ast = ast; this.variableCount = 0; - this.memberVariables = new HashMap<>(); - this.localVariables = new HashMap<>(); - this.variableNames = new HashMap<>(); + this.memberVariableIds = new HashMap<>(); + this.localVariableIdMap = new HashMap<>(); + this.variableNameMap = new HashMap<>(); + this.localVariables = new HashSet<>(); this.variableIsMutable = new HashMap<>(); this.scopeVariables = new Stack<>(); this.nextOperation = NextOperation.READ; // the default @@ -114,8 +117,8 @@ public List getParsingExceptions() { return parsingExceptions; } - public void addToken(TokenType type, File file, long line, long column, long length) { - parser.add(new Token(type, file, (int) line, (int) column, (int) length)); + public void addToken(TokenType type, File file, long line, long column, long length, TokenSemantics semantics) { + parser.add(new SemanticToken(type, file, (int) line, (int) column, (int) length, semantics)); } /** @@ -124,8 +127,8 @@ public void addToken(TokenType type, File file, long line, long column, long len * @param position is the start position of the token. * @param length is the length of the token. */ - private void addToken(JavaTokenType tokenType, long position, int length) { - addToken(tokenType, file, map.getLineNumber(position), map.getColumnNumber(position), length); + private void addToken(JavaTokenType tokenType, long position, int length, TokenSemantics semantics) { + addToken(tokenType, file, map.getLineNumber(position), map.getColumnNumber(position), length, semantics); } /** @@ -134,8 +137,8 @@ private void addToken(JavaTokenType tokenType, long position, int length) { * @param start is the start position of the token. * @param end is the end position of the token for the calculation of the length. */ - private void addToken(JavaTokenType tokenType, long start, long end) { - addToken(tokenType, file, map.getLineNumber(start), map.getColumnNumber(start), (end - start)); + private void addToken(JavaTokenType tokenType, long start, long end, TokenSemantics semantics) { + addToken(tokenType, file, map.getLineNumber(start), map.getColumnNumber(start), (end - start), semantics); } private String variableId() { @@ -143,11 +146,11 @@ private String variableId() { } private String getMemberVariableId(Name variableName) { - return memberVariables.getOrDefault(variableName, null); + return memberVariableIds.getOrDefault(variableName, null); } private String getVariableId(Name variableName) { - Stack variableIdStack = localVariables.getOrDefault(variableName, null); + Stack variableIdStack = localVariableIdMap.getOrDefault(variableName, null); if (variableIdStack != null) { return variableIdStack.peek(); } @@ -159,26 +162,31 @@ private boolean isVariable(ExpressionTree expressionTree) { || (expressionTree.getKind() == Tree.Kind.MEMBER_SELECT && isOwnMemberSelect((MemberSelectTree) expressionTree)); } + private boolean isNotExistingLocalVariable(ExpressionTree expressionTree) { + return !(expressionTree.getKind() == Tree.Kind.IDENTIFIER && localVariables.contains(((IdentifierTree) expressionTree).getName().toString())); + } + private boolean isOwnMemberSelect(MemberSelectTree memberSelect) { return memberSelect.getExpression().toString().equals("this"); } private String formatVariable(String variableId) { - return variableNames.get(variableId) + " [" + variableId + "]"; + return variableNameMap.get(variableId) + " [" + variableId + "]"; } private boolean isMutable(Tree classTree) { return classTree != null && !IMMUTABLES.contains(classTree); } - private void registerVariable(String variableId) { + private void registerVariable(String variableId, TokenSemantics semantics) { if (variableId != null) { - if (Set.of(NextOperation.WRITE, NextOperation.READ_WRITE).contains(nextOperation) - || mutableWrite && variableIsMutable.get(variableId)) { - System.out.println("write " + formatVariable(variableId)); + if (Set.of(NextOperation.WRITE, NextOperation.READ_WRITE).contains(nextOperation) || mutableWrite && variableIsMutable.get(variableId)) { + // System.out.println("write " + formatVariable(variableId)); + semantics.addWrite(variableId); } if (Set.of(NextOperation.READ, NextOperation.READ_WRITE).contains(nextOperation)) { - System.out.println("read " + formatVariable(variableId)); + // System.out.println("read " + formatVariable(variableId)); + semantics.addRead(variableId); // todo change order it's read/write not write/read } } nextOperation = NextOperation.READ; @@ -190,15 +198,15 @@ public void enterLocalScope() { public void exitLocalScope() { for (Name variableName : scopeVariables.pop()) { - Stack variableIdStack = localVariables.get(variableName); + Stack variableIdStack = localVariableIdMap.get(variableName); variableIdStack.pop(); if (variableIdStack.isEmpty()) - localVariables.remove(variableName); + localVariableIdMap.remove(variableName); } } @Override - public Void visitBlock(BlockTree node, Void unused) { + public Void visitBlock(BlockTree node, TokenSemantics semantics) { // classes are an obvious exception since members are treated differently Set classKinds = Set.of(Tree.Kind.ENUM, Tree.Kind.INTERFACE, Tree.Kind.RECORD, Tree.Kind.ANNOTATION_TYPE, Tree.Kind.CLASS); boolean isClass = classKinds.contains(node.getKind()); @@ -210,9 +218,11 @@ public Void visitBlock(BlockTree node, Void unused) { } long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - addToken(JavaTokenType.J_INIT_BEGIN, start, 1); - super.visitBlock(node, unused); - addToken(JavaTokenType.J_INIT_END, end, 1); + semantics = new TokenSemanticsBuilder().control().build(); + addToken(JavaTokenType.J_INIT_BEGIN, start, 1, semantics); + super.visitBlock(node, null); + semantics = new TokenSemanticsBuilder().control().build(); + addToken(JavaTokenType.J_INIT_END, end, 1, semantics); if (!(isClass || isForLoop)) { exitLocalScope(); } @@ -220,33 +230,35 @@ public Void visitBlock(BlockTree node, Void unused) { } @Override - public Void visitClass(ClassTree node, Void unused) { + public Void visitClass(ClassTree node, TokenSemantics semantics) { for (var member : node.getMembers()) { if (member.getKind() == Tree.Kind.VARIABLE) { VariableTree variable = (VariableTree) member; Name variableName = variable.getName(); String variableId = variableId(); // System.out.println("new member " + formatVariable(variableId)); - memberVariables.put(variableName, variableId); - variableNames.put(variableId, variableName); + memberVariableIds.put(variableName, variableId); + variableNameMap.put(variableId, variableName); variableIsMutable.put(variableId, isMutable(variable.getType())); } } + long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - + semantics = new TokenSemanticsBuilder().control().critical().build(); if (node.getKind() == Tree.Kind.ENUM) { - addToken(JavaTokenType.J_ENUM_BEGIN, start, 4); + addToken(JavaTokenType.J_ENUM_BEGIN, start, 4, semantics); } else if (node.getKind() == Tree.Kind.INTERFACE) { - addToken(JavaTokenType.J_INTERFACE_BEGIN, start, 9); + addToken(JavaTokenType.J_INTERFACE_BEGIN, start, 9, semantics); } else if (node.getKind() == Tree.Kind.RECORD) { - addToken(JavaTokenType.J_RECORD_BEGIN, start, 1); + addToken(JavaTokenType.J_RECORD_BEGIN, start, 1, semantics); } else if (node.getKind() == Tree.Kind.ANNOTATION_TYPE) { - addToken(JavaTokenType.J_ANNO_T_BEGIN, start, 10); + addToken(JavaTokenType.J_ANNO_T_BEGIN, start, 10, semantics); } else if (node.getKind() == Tree.Kind.CLASS) { - addToken(JavaTokenType.J_CLASS_BEGIN, start, 5); + addToken(JavaTokenType.J_CLASS_BEGIN, start, 5, semantics); } - super.visitClass(node, unused); + super.visitClass(node, null); + JavaTokenType tokenType = switch (node.getKind()) { case ENUM -> JavaTokenType.J_ENUM_END; case INTERFACE -> JavaTokenType.J_INTERFACE_END; @@ -256,392 +268,476 @@ public Void visitClass(ClassTree node, Void unused) { default -> null; }; if (tokenType != null) { - addToken(tokenType, end, 1); + semantics = new TokenSemanticsBuilder().control().critical().build(); + addToken(tokenType, end, 1, semantics); } - memberVariables.clear(); + memberVariableIds.clear(); return null; } @Override - public Void visitImport(ImportTree node, Void unused) { + public Void visitImport(ImportTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); - addToken(JavaTokenType.J_IMPORT, start, 6); - super.visitImport(node, unused); + semantics = new TokenSemanticsBuilder().control().critical().build(); + addToken(JavaTokenType.J_IMPORT, start, 6, semantics); + super.visitImport(node, semantics); return null; } @Override - public Void visitPackage(PackageTree node, Void unused) { + public Void visitPackage(PackageTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); - addToken(JavaTokenType.J_PACKAGE, start, 7); - super.visitPackage(node, unused); + semantics = new TokenSemanticsBuilder().control().critical().build(); + addToken(JavaTokenType.J_PACKAGE, start, 7, semantics); + super.visitPackage(node, semantics); return null; } @Override - public Void visitMethod(MethodTree node, Void unused) { + public Void visitMethod(MethodTree node, TokenSemantics semantics) { enterLocalScope(); long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - addToken(JavaTokenType.J_METHOD_BEGIN, start, node.getName().length()); - super.visitMethod(node, unused); - addToken(JavaTokenType.J_METHOD_END, end, 1); + semantics = new TokenSemanticsBuilder().control().critical().build(); + addToken(JavaTokenType.J_METHOD_BEGIN, start, node.getName().length(), semantics); + super.visitMethod(node, null); + semantics = new TokenSemanticsBuilder().control().critical().build(); + addToken(JavaTokenType.J_METHOD_END, end, 1, semantics); return null; } @Override - public Void visitSynchronized(SynchronizedTree node, Void unused) { + public Void visitSynchronized(SynchronizedTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - addToken(JavaTokenType.J_SYNC_BEGIN, start, 12); - super.visitSynchronized(node, unused); - addToken(JavaTokenType.J_SYNC_END, end, 1); + semantics = new TokenSemanticsBuilder().control().critical().build(); + addToken(JavaTokenType.J_SYNC_BEGIN, start, 12, semantics); + super.visitSynchronized(node, semantics); + semantics = new TokenSemanticsBuilder().control().critical().build(); + addToken(JavaTokenType.J_SYNC_END, end, 1, semantics); return null; } @Override - public Void visitDoWhileLoop(DoWhileLoopTree node, Void unused) { + public Void visitDoWhileLoop(DoWhileLoopTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - addToken(JavaTokenType.J_DO_BEGIN, start, 2); - super.visitDoWhileLoop(node, unused); - addToken(JavaTokenType.J_DO_END, end, 1); + semantics = new TokenSemanticsBuilder().loopBegin().build(); + addToken(JavaTokenType.J_DO_BEGIN, start, 2, semantics); + scan(node.getStatement(), null); + semantics = new TokenSemanticsBuilder().loopEnd().build(); + addToken(JavaTokenType.J_DO_END, end, 1, semantics); + scan(node.getCondition(), semantics); return null; } @Override - public Void visitWhileLoop(WhileLoopTree node, Void unused) { + public Void visitWhileLoop(WhileLoopTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - addToken(JavaTokenType.J_WHILE_BEGIN, start, 5); - super.visitWhileLoop(node, unused); - addToken(JavaTokenType.J_WHILE_END, end, 1); + semantics = new TokenSemanticsBuilder().loopBegin().build(); + addToken(JavaTokenType.J_WHILE_BEGIN, start, 5, semantics); + scan(node.getCondition(), semantics); + scan(node.getStatement(), null); + semantics = new TokenSemanticsBuilder().loopEnd().build(); + addToken(JavaTokenType.J_WHILE_END, end, 1, semantics); return null; } @Override - public Void visitForLoop(ForLoopTree node, Void unused) { + public Void visitForLoop(ForLoopTree node, TokenSemantics semantics) { enterLocalScope(); long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - addToken(JavaTokenType.J_FOR_BEGIN, start, 3); - super.visitForLoop(node, unused); - addToken(JavaTokenType.J_FOR_END, end, 1); + semantics = new TokenSemanticsBuilder().loopBegin().build(); + addToken(JavaTokenType.J_FOR_BEGIN, start, 3, semantics); + scan(node.getInitializer(), semantics); + scan(node.getCondition(), semantics); + scan(node.getUpdate(), semantics); + scan(node.getStatement(), null); + semantics = new TokenSemanticsBuilder().loopEnd().build(); + addToken(JavaTokenType.J_FOR_END, end, 1, semantics); exitLocalScope(); return null; } @Override - public Void visitEnhancedForLoop(EnhancedForLoopTree node, Void unused) { + public Void visitEnhancedForLoop(EnhancedForLoopTree node, TokenSemantics semantics) { enterLocalScope(); long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - addToken(JavaTokenType.J_FOR_BEGIN, start, 3); - super.visitEnhancedForLoop(node, unused); - addToken(JavaTokenType.J_FOR_END, end, 1); + semantics = new TokenSemanticsBuilder().loopBegin().build(); + addToken(JavaTokenType.J_FOR_BEGIN, start, 3, semantics); + scan(node.getVariable(), semantics); + scan(node.getExpression(), semantics); + scan(node.getStatement(), null); + semantics = new TokenSemanticsBuilder().loopEnd().build(); + addToken(JavaTokenType.J_FOR_END, end, 1, semantics); exitLocalScope(); return null; } @Override - public Void visitSwitch(SwitchTree node, Void unused) { + public Void visitSwitch(SwitchTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - addToken(JavaTokenType.J_SWITCH_BEGIN, start, 6); - super.visitSwitch(node, unused); - addToken(JavaTokenType.J_SWITCH_END, end, 1); + semantics = new TokenSemanticsBuilder().control().build(); + addToken(JavaTokenType.J_SWITCH_BEGIN, start, 6, semantics); + scan(node.getExpression(), semantics); + scan(node.getCases(), null); + semantics = new TokenSemanticsBuilder().control().build(); + addToken(JavaTokenType.J_SWITCH_END, end, 1, semantics); return null; } @Override - public Void visitSwitchExpression(SwitchExpressionTree node, Void unused) { + public Void visitSwitchExpression(SwitchExpressionTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - addToken(JavaTokenType.J_SWITCH_BEGIN, start, 6); - super.visitSwitchExpression(node, unused); - addToken(JavaTokenType.J_SWITCH_END, end, 1); + semantics = new TokenSemanticsBuilder().control().build(); + addToken(JavaTokenType.J_SWITCH_BEGIN, start, 6, semantics); + scan(node.getExpression(), semantics); + scan(node.getCases(), null); + semantics = new TokenSemanticsBuilder().control().build(); + addToken(JavaTokenType.J_SWITCH_END, end, 1, semantics); return null; } @Override - public Void visitCase(CaseTree node, Void unused) { + public Void visitCase(CaseTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); - addToken(JavaTokenType.J_CASE, start, 4); - super.visitCase(node, unused); + semantics = new TokenSemanticsBuilder().control().build(); + addToken(JavaTokenType.J_CASE, start, 4, semantics); + scan(node.getExpressions(), semantics); + if (node.getCaseKind() == CaseTree.CaseKind.RULE) { + scan(node.getBody(), semantics); // case -> result, in switch expression + } else { + scan(node.getStatements(), null); // in normal switch + } return null; } @Override - public Void visitTry(TryTree node, Void unused) { + public Void visitTry(TryTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); - if (node.getResources().isEmpty()) - addToken(JavaTokenType.J_TRY_BEGIN, start, 3); - else - addToken(JavaTokenType.J_TRY_WITH_RESOURCE, start, 3); - if (node.getFinallyBlock() != null) - addToken(JavaTokenType.J_FINALLY, start, 3); - super.visitTry(node, unused); + semantics = new TokenSemanticsBuilder().control().build(); + if (node.getResources().isEmpty()) { + addToken(JavaTokenType.J_TRY_BEGIN, start, 3, semantics); + } else { + addToken(JavaTokenType.J_TRY_WITH_RESOURCE, start, 3, semantics); + } + if (node.getFinallyBlock() != null) { // todo fix location (breaks tests) + semantics = new TokenSemanticsBuilder().control().build(); + addToken(JavaTokenType.J_FINALLY, start, 3, semantics); + } + scan(node.getResources(), semantics); + scan(node.getBlock(), null); + scan(node.getCatches(), null); + scan(node.getFinallyBlock(), null); return null; } @Override - public Void visitCatch(CatchTree node, Void unused) { + public Void visitCatch(CatchTree node, TokenSemantics semantics) { enterLocalScope(); long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - addToken(JavaTokenType.J_CATCH_BEGIN, start, 5); - super.visitCatch(node, unused); - addToken(JavaTokenType.J_CATCH_END, end, 1); + semantics = new TokenSemanticsBuilder().control().build(); + addToken(JavaTokenType.J_CATCH_BEGIN, start, 5, semantics); + super.visitCatch(node, null); // can leave this since catch parameter is variable declaration and thus always generates a token + semantics = new TokenSemanticsBuilder().control().build(); + addToken(JavaTokenType.J_CATCH_END, end, 1, semantics); return null; } @Override - public Void visitIf(IfTree node, Void unused) { + public Void visitIf(IfTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - addToken(JavaTokenType.J_IF_BEGIN, start, 2); - scan(node.getCondition(), unused); - scan(node.getThenStatement(), unused); + semantics = new TokenSemanticsBuilder().control().build(); + addToken(JavaTokenType.J_IF_BEGIN, start, 2, semantics); + scan(node.getCondition(), semantics); + scan(node.getThenStatement(), null); if (node.getElseStatement() != null) { start = positions.getStartPosition(ast, node.getElseStatement()); - addToken(JavaTokenType.J_ELSE, start, 4); - scan(node.getElseStatement(), unused); + semantics = new TokenSemanticsBuilder().control().build(); + addToken(JavaTokenType.J_ELSE, start, 4, semantics); } - addToken(JavaTokenType.J_IF_END, end, 1); + scan(node.getElseStatement(), null); + semantics = new TokenSemanticsBuilder().control().build(); + addToken(JavaTokenType.J_IF_END, end, 1, semantics); return null; } @Override - public Void visitBreak(BreakTree node, Void unused) { + public Void visitBreak(BreakTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); - addToken(JavaTokenType.J_BREAK, start, 5); - super.visitBreak(node, unused); + semantics = new TokenSemanticsBuilder().control().build(); + addToken(JavaTokenType.J_BREAK, start, 5, semantics); + super.visitBreak(node, semantics); return null; } @Override - public Void visitContinue(ContinueTree node, Void unused) { + public Void visitContinue(ContinueTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); - addToken(JavaTokenType.J_CONTINUE, start, 8); - super.visitContinue(node, unused); + semantics = new TokenSemanticsBuilder().control().build(); + addToken(JavaTokenType.J_CONTINUE, start, 8, semantics); + super.visitContinue(node, semantics); return null; } @Override - public Void visitReturn(ReturnTree node, Void unused) { + public Void visitReturn(ReturnTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); - addToken(JavaTokenType.J_RETURN, start, 6); - super.visitReturn(node, unused); + semantics = new TokenSemanticsBuilder().control().critical().build(); + addToken(JavaTokenType.J_RETURN, start, 6, semantics); + super.visitReturn(node, semantics); return null; } @Override - public Void visitThrow(ThrowTree node, Void unused) { + public Void visitThrow(ThrowTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); - addToken(JavaTokenType.J_THROW, start, 5); - super.visitThrow(node, unused); + semantics = new TokenSemanticsBuilder().control().critical().build(); + addToken(JavaTokenType.J_THROW, start, 5, semantics); + super.visitThrow(node, semantics); return null; } @Override - public Void visitNewClass(NewClassTree node, Void unused) { + public Void visitNewClass(NewClassTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); if (node.getTypeArguments().size() > 0) { - addToken(JavaTokenType.J_GENERIC, start, 3 + node.getIdentifier().toString().length()); + semantics = new TokenSemanticsBuilder().build(); + addToken(JavaTokenType.J_GENERIC, start, 3 + node.getIdentifier().toString().length(), semantics); } - addToken(JavaTokenType.J_NEWCLASS, start, 3); - super.visitNewClass(node, unused); + semantics = new TokenSemanticsBuilder().build(); + addToken(JavaTokenType.J_NEWCLASS, start, 3, semantics); + super.visitNewClass(node, semantics); return null; } @Override - public Void visitTypeParameter(TypeParameterTree node, Void unused) { + public Void visitTypeParameter(TypeParameterTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); // This is odd, but also done like this in Java17 - addToken(JavaTokenType.J_GENERIC, start, 1); - super.visitTypeParameter(node, unused); + semantics = new TokenSemanticsBuilder().build(); + addToken(JavaTokenType.J_GENERIC, start, 1, semantics); + super.visitTypeParameter(node, semantics); return null; } @Override - public Void visitNewArray(NewArrayTree node, Void unused) { + public Void visitNewArray(NewArrayTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - addToken(JavaTokenType.J_NEWARRAY, start, 3); - if (node.getInitializers() != null && !node.getInitializers().isEmpty()) { + semantics = new TokenSemanticsBuilder().build(); + addToken(JavaTokenType.J_NEWARRAY, start, 3, semantics); + boolean hasInit = node.getInitializers() != null && !node.getInitializers().isEmpty(); + if (hasInit) { start = positions.getStartPosition(ast, node.getInitializers().get(0)); - addToken(JavaTokenType.J_ARRAY_INIT_BEGIN, start, 1); - addToken(JavaTokenType.J_ARRAY_INIT_END, end, 1); + semantics = new TokenSemanticsBuilder().build(); + addToken(JavaTokenType.J_ARRAY_INIT_BEGIN, start, 1, semantics); + } + super.visitNewArray(node, semantics); // doesn't break tests :) + if (hasInit) { + semantics = new TokenSemanticsBuilder().build(); + addToken(JavaTokenType.J_ARRAY_INIT_END, end, 1, semantics); } - super.visitNewArray(node, unused); return null; } + private TokenSemantics conditionalCriticalSemantics(ExpressionTree expressionTree, Function conditional) { + TokenSemanticsBuilder semanticsBuilder = new TokenSemanticsBuilder(); + if (conditional.apply(expressionTree)) { + semanticsBuilder.critical(); + } + return semanticsBuilder.build(); + } + @Override - public Void visitAssignment(AssignmentTree node, Void unused) { + public Void visitAssignment(AssignmentTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); - addToken(JavaTokenType.J_ASSIGN, start, 1); + semantics = conditionalCriticalSemantics(node.getVariable(), this::isNotExistingLocalVariable); + addToken(JavaTokenType.J_ASSIGN, start, 1, semantics); nextOperation = NextOperation.WRITE; - super.visitAssignment(node, unused); + super.visitAssignment(node, semantics); return null; } @Override - public Void visitCompoundAssignment(CompoundAssignmentTree node, Void unused) { + public Void visitCompoundAssignment(CompoundAssignmentTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); - addToken(JavaTokenType.J_ASSIGN, start, 1); + semantics = conditionalCriticalSemantics(node.getVariable(), this::isNotExistingLocalVariable); + addToken(JavaTokenType.J_ASSIGN, start, 1, semantics); nextOperation = NextOperation.READ_WRITE; - super.visitCompoundAssignment(node, unused); + super.visitCompoundAssignment(node, semantics); return null; } @Override - public Void visitUnary(UnaryTree node, Void unused) { + public Void visitUnary(UnaryTree node, TokenSemantics semantics) { + semantics = conditionalCriticalSemantics(node.getExpression(), this::isNotExistingLocalVariable); if (Set.of(Tree.Kind.PREFIX_INCREMENT, Tree.Kind.POSTFIX_INCREMENT, Tree.Kind.PREFIX_DECREMENT, Tree.Kind.POSTFIX_DECREMENT) .contains(node.getKind())) { long start = positions.getStartPosition(ast, node); - addToken(JavaTokenType.J_ASSIGN, start, 1); + addToken(JavaTokenType.J_ASSIGN, start, 1, semantics); nextOperation = NextOperation.READ_WRITE; } - super.visitUnary(node, unused); + super.visitUnary(node, semantics); return null; } @Override - public Void visitAssert(AssertTree node, Void unused) { + public Void visitAssert(AssertTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); - addToken(JavaTokenType.J_ASSERT, start, 6); - super.visitAssert(node, unused); + semantics = new TokenSemanticsBuilder().control().critical().build(); + addToken(JavaTokenType.J_ASSERT, start, 6, semantics); + super.visitAssert(node, semantics); return null; } @Override - public Void visitVariable(VariableTree node, Void unused) { - nextOperation = NextOperation.WRITE; + public Void visitVariable(VariableTree node, TokenSemantics semantics) { + long start = positions.getStartPosition(ast, node); + semantics = conditionalCriticalSemantics(node.getNameExpression(), (n) -> scopeVariables.isEmpty()); // member variable defs are critical + if (!scopeVariables.isEmpty()) { // local scope Name variableName = node.getName(); String variableId = variableId(); // System.out.println("new local " + formatVariable(variableId)); - localVariables.putIfAbsent(variableName, new Stack<>()); - localVariables.get(variableName).push(variableId); - variableNames.put(variableId, variableName); + localVariableIdMap.putIfAbsent(variableName, new Stack<>()); + localVariableIdMap.get(variableName).push(variableId); + variableNameMap.put(variableId, variableName); + localVariables.add(variableId); scopeVariables.peek().add(variableName); variableIsMutable.put(variableId, isMutable(node.getType())); - registerVariable(variableId); // somewhat special case, identifier isn't visited - } else { - registerVariable(getMemberVariableId(node.getName())); - } - long start = positions.getStartPosition(ast, node); - addToken(JavaTokenType.J_VARDEF, start, node.toString().length()); - super.visitVariable(node, unused); + registerVariable(variableId, semantics); // somewhat special case, identifier isn't visited + } // no else, don't want to register member variable defs since the location doesn't matter (also they're going to be up + // top 99% of the time) + + addToken(JavaTokenType.J_VARDEF, start, node.toString().length(), semantics); + nextOperation = NextOperation.WRITE; + super.visitVariable(node, semantics); return null; } @Override - public Void visitConditionalExpression(ConditionalExpressionTree node, Void unused) { + public Void visitConditionalExpression(ConditionalExpressionTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); - addToken(JavaTokenType.J_COND, start, 1); - super.visitConditionalExpression(node, unused); + semantics = new TokenSemanticsBuilder().build(); + addToken(JavaTokenType.J_COND, start, 1, semantics); + super.visitConditionalExpression(node, semantics); return null; } @Override - public Void visitMethodInvocation(MethodInvocationTree node, Void unused) { + public Void visitMethodInvocation(MethodInvocationTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); - addToken(JavaTokenType.J_APPLY, start, positions.getEndPosition(ast, node.getMethodSelect()) - start); - // from super method, would need to be changed if return value were to be used - scan(node.getTypeArguments(), unused); - // to differentiate bar() and this.bar() (ignore) from bar.foo() (don't ignore), different namespace for variables and methods + semantics = new TokenSemanticsBuilder().critical().control().build(); + addToken(JavaTokenType.J_APPLY, start, positions.getEndPosition(ast, node.getMethodSelect()) - start, semantics); + scan(node.getTypeArguments(), semantics); + // to differentiate bar() and this.bar() (ignore) from bar.foo() (don't ignore), different namespace for variables and + // methods if (isVariable(node.getMethodSelect())) { nextOperation = NextOperation.NONE; } mutableWrite = true; // when mentioned here, mutable variables can be written to - scan(node.getMethodSelect(), unused); - scan(node.getArguments(), unused); + scan(node.getMethodSelect(), semantics); + scan(node.getArguments(), semantics); mutableWrite = false; return null; } @Override - public Void visitAnnotation(AnnotationTree node, Void unused) { + public Void visitAnnotation(AnnotationTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); - addToken(JavaTokenType.J_ANNO, start, 1); - super.visitAnnotation(node, unused); + semantics = new TokenSemanticsBuilder().build(); + addToken(JavaTokenType.J_ANNO, start, 1, semantics); + super.visitAnnotation(node, semantics); return null; } @Override - public Void visitModule(ModuleTree node, Void unused) { + public Void visitModule(ModuleTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - addToken(JavaTokenType.J_MODULE_BEGIN, start, 6); - super.visitModule(node, unused); - addToken(JavaTokenType.J_MODULE_END, end, 1); + semantics = new TokenSemanticsBuilder().critical().control().build(); + addToken(JavaTokenType.J_MODULE_BEGIN, start, 6, semantics); + super.visitModule(node, null); + semantics = new TokenSemanticsBuilder().critical().control().build(); + addToken(JavaTokenType.J_MODULE_END, end, 1, semantics); return null; } @Override - public Void visitRequires(RequiresTree node, Void unused) { + public Void visitRequires(RequiresTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); - addToken(JavaTokenType.J_REQUIRES, start, 8); - super.visitRequires(node, unused); + semantics = new TokenSemanticsBuilder().critical().control().build(); + addToken(JavaTokenType.J_REQUIRES, start, 8, semantics); + super.visitRequires(node, semantics); return null; } @Override - public Void visitProvides(ProvidesTree node, Void unused) { + public Void visitProvides(ProvidesTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); - addToken(JavaTokenType.J_PROVIDES, start, 8); - super.visitProvides(node, unused); + semantics = new TokenSemanticsBuilder().critical().control().build(); + addToken(JavaTokenType.J_PROVIDES, start, 8, semantics); + super.visitProvides(node, semantics); return null; } @Override - public Void visitExports(ExportsTree node, Void unused) { + public Void visitExports(ExportsTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); - addToken(JavaTokenType.J_EXPORTS, start, 7); - super.visitExports(node, unused); + semantics = new TokenSemanticsBuilder().critical().control().build(); + addToken(JavaTokenType.J_EXPORTS, start, 7, semantics); + super.visitExports(node, semantics); return null; } @Override - public Void visitErroneous(ErroneousTree node, Void unused) { + public Void visitErroneous(ErroneousTree node, TokenSemantics semantics) { parsingExceptions.add(new ParsingException(file, "error while visiting %s".formatted(node))); - super.visitErroneous(node, unused); + super.visitErroneous(node, semantics); return null; } @Override - public Void visitYield(YieldTree node, Void unused) { + public Void visitYield(YieldTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node); - addToken(JavaTokenType.J_YIELD, start, end); - super.visitYield(node, unused); + semantics = new TokenSemanticsBuilder().control().build(); + addToken(JavaTokenType.J_YIELD, start, end, semantics); + super.visitYield(node, semantics); return null; } @Override - public Void visitDefaultCaseLabel(DefaultCaseLabelTree node, Void unused) { + public Void visitDefaultCaseLabel(DefaultCaseLabelTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node); - addToken(JavaTokenType.J_DEFAULT, start, end); - super.visitDefaultCaseLabel(node, unused); + semantics = new TokenSemanticsBuilder().control().build(); + addToken(JavaTokenType.J_DEFAULT, start, end, semantics); + super.visitDefaultCaseLabel(node, semantics); return null; } @Override - public Void visitMemberSelect(MemberSelectTree node, Void unused) { + public Void visitMemberSelect(MemberSelectTree node, TokenSemantics semantics) { if (isOwnMemberSelect(node)) { - registerVariable(getMemberVariableId(node.getIdentifier())); + registerVariable(getMemberVariableId(node.getIdentifier()), semantics); } - super.visitMemberSelect(node, unused); + super.visitMemberSelect(node, semantics); return null; } @Override - public Void visitIdentifier(IdentifierTree node, Void unused) { - registerVariable(getVariableId(node.getName())); - super.visitIdentifier(node, unused); + public Void visitIdentifier(IdentifierTree node, TokenSemantics semantics) { + registerVariable(getVariableId(node.getName()), semantics); + super.visitIdentifier(node, semantics); return null; } } From 60f3c98412d04e3e3ddbeb3c9d315a7fe21b4cb8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Sun, 18 Dec 2022 22:06:23 +0100 Subject: [PATCH 010/132] Add Variable class, refactor --- .../de/jplag/semantics/TokenSemantics.java | 46 +++++-- .../semantics/TokenSemanticsBuilder.java | 8 +- .../java/de/jplag/semantics/Variable.java | 20 +++ .../java/de/jplag/semantics/VariableId.java | 14 ++ .../src/main/java/de/jplag/java/Parser.java | 15 +++ .../java/TokenGeneratingTreeScanner.java | 125 ++++++++---------- 6 files changed, 140 insertions(+), 88 deletions(-) create mode 100644 language-api/src/main/java/de/jplag/semantics/Variable.java create mode 100644 language-api/src/main/java/de/jplag/semantics/VariableId.java diff --git a/language-api/src/main/java/de/jplag/semantics/TokenSemantics.java b/language-api/src/main/java/de/jplag/semantics/TokenSemantics.java index 074ad0450..6ad5baf53 100644 --- a/language-api/src/main/java/de/jplag/semantics/TokenSemantics.java +++ b/language-api/src/main/java/de/jplag/semantics/TokenSemantics.java @@ -1,6 +1,8 @@ package de.jplag.semantics; import java.util.Collections; +import java.util.LinkedList; +import java.util.List; import java.util.Set; /** @@ -9,30 +11,48 @@ * @param control Whether the token controls the program flow. * @param loopBegin Whether the token marks the beginning of a loop. * @param loopEnd Whether the token marks the end of a loop - * @param writes A set of the variable names which were (potentially) written to in this token. - * @param reads A set of the variable names which were (potentially) read from in this token. + * @param reads A set of the variables which were (potentially) read from in this token. + * @param writes A set of the variables which were (potentially) written to in this token. */ -public record TokenSemantics(boolean critical, boolean control, boolean loopBegin, boolean loopEnd, Set writes, Set reads) { +public record TokenSemantics(boolean critical, boolean control, boolean loopBegin, boolean loopEnd, Set reads, Set writes) { - public void addWrite(String write) { - writes.add(write); + public void addRead(Variable read) { + reads.add(read); } - public void addRead(String read) { - reads.add(read); + public void addWrite(Variable write) { + writes.add(write); } /** - * @return an unmodifiable set of the variable names which were (potentially) written to in this token. + * @return an unmodifiable set of the variables which were (potentially) read from in this token. */ - public Set writes() { - return Collections.unmodifiableSet(writes); + public Set reads() { + return Collections.unmodifiableSet(reads); } /** - * @return an unmodifiable set of the variable names which were (potentially) read from in this token. + * @return an unmodifiable set of the variables which were (potentially) written to in this token. */ - public Set reads() { - return Collections.unmodifiableSet(reads); + public Set writes() { + return Collections.unmodifiableSet(writes); + } + + @Override + public String toString() { + List properties = new LinkedList<>(); + if (critical) + properties.add("critical"); + if (control) + properties.add("control"); + if (loopBegin) + properties.add("loop begin"); + if (loopEnd) + properties.add("loop end"); + if (!reads.isEmpty()) + properties.add("read " + String.join(" ", reads.stream().map(Variable::toString).toList())); + if (!writes.isEmpty()) + properties.add("write " + String.join(" ", writes.stream().map(Variable::toString).toList())); + return String.join(", ", properties); } } \ No newline at end of file diff --git a/language-api/src/main/java/de/jplag/semantics/TokenSemanticsBuilder.java b/language-api/src/main/java/de/jplag/semantics/TokenSemanticsBuilder.java index 8325dd542..1d5e0184f 100644 --- a/language-api/src/main/java/de/jplag/semantics/TokenSemanticsBuilder.java +++ b/language-api/src/main/java/de/jplag/semantics/TokenSemanticsBuilder.java @@ -11,19 +11,19 @@ public class TokenSemanticsBuilder { private boolean control; private boolean loopBegin; private boolean loopEnd; - private Set writes; - private Set reads; + private Set reads; + private Set writes; public TokenSemanticsBuilder() { - this.writes = new HashSet<>(); this.reads = new HashSet<>(); + this.writes = new HashSet<>(); } public TokenSemantics build() { if (loopBegin && loopEnd) { throw new IllegalStateException("Token can't mark both the beginning and end of a loop"); } - return new TokenSemantics(critical, control, loopBegin, loopEnd, writes, reads); + return new TokenSemantics(critical, control, loopBegin, loopEnd, reads, writes); } public TokenSemanticsBuilder critical() { diff --git a/language-api/src/main/java/de/jplag/semantics/Variable.java b/language-api/src/main/java/de/jplag/semantics/Variable.java new file mode 100644 index 000000000..a9e87a419 --- /dev/null +++ b/language-api/src/main/java/de/jplag/semantics/Variable.java @@ -0,0 +1,20 @@ +package de.jplag.semantics; + +import javax.lang.model.element.Name; + +public record Variable(Name name, VariableId id) { + + public Variable(Name name) { + this(name, new VariableId()); + } + + @Override + public String toString() { + return name + "[" + id + "]"; + } + + @Override + public int hashCode() { + return id.hashCode(); + } +} diff --git a/language-api/src/main/java/de/jplag/semantics/VariableId.java b/language-api/src/main/java/de/jplag/semantics/VariableId.java new file mode 100644 index 000000000..bd12bf1eb --- /dev/null +++ b/language-api/src/main/java/de/jplag/semantics/VariableId.java @@ -0,0 +1,14 @@ +package de.jplag.semantics; + +public record VariableId(String id) { + private static long counter; + + public VariableId() { + this(Long.toString(counter++)); + } + + @Override + public String toString() { + return id; + } +} diff --git a/languages/java/src/main/java/de/jplag/java/Parser.java b/languages/java/src/main/java/de/jplag/java/Parser.java index 1e623116d..231dcad59 100644 --- a/languages/java/src/main/java/de/jplag/java/Parser.java +++ b/languages/java/src/main/java/de/jplag/java/Parser.java @@ -22,10 +22,25 @@ public Parser() { public List parse(Set files) throws ParsingException { tokens = new ArrayList<>(); new JavacAdapter().parseFiles(files, this); + // print(); return tokens; } public void add(SemanticToken token) { tokens.add(token); } + + public void print() { + long currentLine = 0; + for (SemanticToken t : tokens) { + if (t.getLine() != currentLine) { + currentLine = t.getLine(); + System.out.println(); + System.out.println(t.getLine()); + } + System.out.print(t.getType().getDescription()); + System.out.print(" | "); + System.out.println(t.semantics()); + } + } } diff --git a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java index 19e8c499c..7e71e2bad 100644 --- a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java +++ b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java @@ -17,6 +17,7 @@ import de.jplag.semantics.SemanticToken; import de.jplag.semantics.TokenSemantics; import de.jplag.semantics.TokenSemanticsBuilder; +import de.jplag.semantics.Variable; import com.sun.source.tree.AnnotationTree; import com.sun.source.tree.AssertTree; @@ -74,15 +75,12 @@ final class TokenGeneratingTreeScanner extends TreeScanner private List parsingExceptions = new ArrayList<>(); - private int variableCount; - private Map memberVariableIds; // map member variable name to id - private Map> localVariableIdMap; // map local variable name to id - private Set localVariables; - private Map variableNameMap; // map variable id to name for debugging purposes, inverse of two maps above - private Map variableIsMutable; // map variable id to whether it is immutable - private Stack> scopeVariables; // stack of local variable names in scope - private NextOperation nextOperation; + private Map memberVariables; // map member variable name to variable + private Map> localVariables; // map local variable name to variable + private Stack> localVariablesByScope; // stack of local variable names in scope + private Map isMutable; // map variable to whether it is mutable private boolean mutableWrite; + private NextOperation nextOperation; private static final Set IMMUTABLES = Set.of( // from https://medium.com/@bpnorlander/java-understanding-primitive-types-and-wrapper-objects-a6798fb2afe9 @@ -102,15 +100,12 @@ public TokenGeneratingTreeScanner(File file, Parser parser, LineMap map, SourceP this.map = map; this.positions = positions; this.ast = ast; - this.variableCount = 0; - this.memberVariableIds = new HashMap<>(); - this.localVariableIdMap = new HashMap<>(); - this.variableNameMap = new HashMap<>(); - this.localVariables = new HashSet<>(); - this.variableIsMutable = new HashMap<>(); - this.scopeVariables = new Stack<>(); - this.nextOperation = NextOperation.READ; // the default + this.memberVariables = new HashMap<>(); + this.localVariables = new HashMap<>(); + this.localVariablesByScope = new Stack<>(); + this.isMutable = new HashMap<>(); this.mutableWrite = false; + this.nextOperation = NextOperation.READ; // the default } public List getParsingExceptions() { @@ -141,20 +136,16 @@ private void addToken(JavaTokenType tokenType, long start, long end, TokenSemant addToken(tokenType, file, map.getLineNumber(start), map.getColumnNumber(start), (end - start), semantics); } - private String variableId() { - return Integer.toString(variableCount++); + private Variable getMemberVariable(Name variableName) { + return memberVariables.getOrDefault(variableName, null); } - private String getMemberVariableId(Name variableName) { - return memberVariableIds.getOrDefault(variableName, null); - } - - private String getVariableId(Name variableName) { - Stack variableIdStack = localVariableIdMap.getOrDefault(variableName, null); + private Variable getVariable(Name variableName) { + Stack variableIdStack = localVariables.getOrDefault(variableName, null); if (variableIdStack != null) { return variableIdStack.peek(); } - return getMemberVariableId(variableName); + return getMemberVariable(variableName); } private boolean isVariable(ExpressionTree expressionTree) { @@ -163,45 +154,44 @@ private boolean isVariable(ExpressionTree expressionTree) { } private boolean isNotExistingLocalVariable(ExpressionTree expressionTree) { - return !(expressionTree.getKind() == Tree.Kind.IDENTIFIER && localVariables.contains(((IdentifierTree) expressionTree).getName().toString())); + if (expressionTree.getKind() != Tree.Kind.IDENTIFIER) { + return true; + } + Name variableName = ((IdentifierTree) expressionTree).getName(); + return !localVariables.containsKey(variableName); } private boolean isOwnMemberSelect(MemberSelectTree memberSelect) { return memberSelect.getExpression().toString().equals("this"); } - private String formatVariable(String variableId) { - return variableNameMap.get(variableId) + " [" + variableId + "]"; - } - private boolean isMutable(Tree classTree) { - return classTree != null && !IMMUTABLES.contains(classTree); + return classTree != null && !IMMUTABLES.contains(classTree.toString()); } - private void registerVariable(String variableId, TokenSemantics semantics) { - if (variableId != null) { - if (Set.of(NextOperation.WRITE, NextOperation.READ_WRITE).contains(nextOperation) || mutableWrite && variableIsMutable.get(variableId)) { - // System.out.println("write " + formatVariable(variableId)); - semantics.addWrite(variableId); - } + private void registerVariable(Variable variable, TokenSemantics semantics) { + if (variable != null) { if (Set.of(NextOperation.READ, NextOperation.READ_WRITE).contains(nextOperation)) { - // System.out.println("read " + formatVariable(variableId)); - semantics.addRead(variableId); // todo change order it's read/write not write/read + semantics.addRead(variable); + } + if (Set.of(NextOperation.WRITE, NextOperation.READ_WRITE).contains(nextOperation) || (mutableWrite && isMutable.get(variable))) { + semantics.addWrite(variable); } } nextOperation = NextOperation.READ; } public void enterLocalScope() { - scopeVariables.add(new HashSet<>()); + localVariablesByScope.add(new HashSet<>()); } public void exitLocalScope() { - for (Name variableName : scopeVariables.pop()) { - Stack variableIdStack = localVariableIdMap.get(variableName); - variableIdStack.pop(); - if (variableIdStack.isEmpty()) - localVariableIdMap.remove(variableName); + for (Name variableName : localVariablesByScope.pop()) { + Stack variableStack = localVariables.get(variableName); + variableStack.pop(); + if (variableStack.isEmpty()) { + localVariables.remove(variableName); + } } } @@ -233,13 +223,10 @@ public Void visitBlock(BlockTree node, TokenSemantics semantics) { public Void visitClass(ClassTree node, TokenSemantics semantics) { for (var member : node.getMembers()) { if (member.getKind() == Tree.Kind.VARIABLE) { - VariableTree variable = (VariableTree) member; - Name variableName = variable.getName(); - String variableId = variableId(); - // System.out.println("new member " + formatVariable(variableId)); - memberVariableIds.put(variableName, variableId); - variableNameMap.put(variableId, variableName); - variableIsMutable.put(variableId, isMutable(variable.getType())); + VariableTree variableTree = (VariableTree) member; + Variable variable = new Variable(variableTree.getName()); + memberVariables.put(variable.name(), variable); + isMutable.put(variable, isMutable(variableTree.getType())); } } @@ -271,7 +258,7 @@ public Void visitClass(ClassTree node, TokenSemantics semantics) { semantics = new TokenSemanticsBuilder().control().critical().build(); addToken(tokenType, end, 1, semantics); } - memberVariableIds.clear(); + memberVariables.clear(); return null; } @@ -601,24 +588,20 @@ public Void visitAssert(AssertTree node, TokenSemantics semantics) { @Override public Void visitVariable(VariableTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); - semantics = conditionalCriticalSemantics(node.getNameExpression(), (n) -> scopeVariables.isEmpty()); // member variable defs are critical - - if (!scopeVariables.isEmpty()) { // local scope - Name variableName = node.getName(); - String variableId = variableId(); - // System.out.println("new local " + formatVariable(variableId)); - localVariableIdMap.putIfAbsent(variableName, new Stack<>()); - localVariableIdMap.get(variableName).push(variableId); - variableNameMap.put(variableId, variableName); - localVariables.add(variableId); - scopeVariables.peek().add(variableName); - variableIsMutable.put(variableId, isMutable(node.getType())); - registerVariable(variableId, semantics); // somewhat special case, identifier isn't visited - } // no else, don't want to register member variable defs since the location doesn't matter (also they're going to be up - // top 99% of the time) + // member variable defs are critical + semantics = conditionalCriticalSemantics(node.getNameExpression(), n -> localVariablesByScope.isEmpty()); + + if (!localVariablesByScope.isEmpty()) { // local scope + Variable variable = new Variable(node.getName()); + localVariables.putIfAbsent(variable.name(), new Stack<>()); + localVariables.get(variable.name()).push(variable); + localVariablesByScope.peek().add(variable.name()); + isMutable.put(variable, isMutable(node.getType())); + semantics.addWrite(variable); // somewhat special case, identifier isn't visited + } // no else, don't want to register member variable defs since the location doesn't matter + // (also they're going to be up top 99% of the time) addToken(JavaTokenType.J_VARDEF, start, node.toString().length(), semantics); - nextOperation = NextOperation.WRITE; super.visitVariable(node, semantics); return null; } @@ -728,7 +711,7 @@ public Void visitDefaultCaseLabel(DefaultCaseLabelTree node, TokenSemantics sema @Override public Void visitMemberSelect(MemberSelectTree node, TokenSemantics semantics) { if (isOwnMemberSelect(node)) { - registerVariable(getMemberVariableId(node.getIdentifier()), semantics); + registerVariable(getMemberVariable(node.getIdentifier()), semantics); } super.visitMemberSelect(node, semantics); return null; @@ -736,7 +719,7 @@ public Void visitMemberSelect(MemberSelectTree node, TokenSemantics semantics) { @Override public Void visitIdentifier(IdentifierTree node, TokenSemantics semantics) { - registerVariable(getVariableId(node.getName()), semantics); + registerVariable(getVariable(node.getName()), semantics); super.visitIdentifier(node, semantics); return null; } From b18eff098a905c9ad5a8cb821c31b5c1d92b4009 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Tue, 20 Dec 2022 17:39:49 +0100 Subject: [PATCH 011/132] Remove SemanticToken class, use composition instead --- .../src/main/java/de/jplag/Token.java | 52 ++++++++++++++++--- .../de/jplag/semantics/SemanticToken.java | 38 -------------- .../main/java/de/jplag/java/JavacAdapter.java | 4 +- .../src/main/java/de/jplag/java/Language.java | 2 +- .../src/main/java/de/jplag/java/Parser.java | 16 +++--- .../java/TokenGeneratingTreeScanner.java | 8 +-- 6 files changed, 59 insertions(+), 61 deletions(-) delete mode 100644 language-api/src/main/java/de/jplag/semantics/SemanticToken.java diff --git a/language-api/src/main/java/de/jplag/Token.java b/language-api/src/main/java/de/jplag/Token.java index 8d0420e82..7eceb696c 100644 --- a/language-api/src/main/java/de/jplag/Token.java +++ b/language-api/src/main/java/de/jplag/Token.java @@ -5,6 +5,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import de.jplag.semantics.TokenSemantics; +import de.jplag.semantics.TokenSemanticsBuilder; + /** * This class represents a token in a source code. It can represent keywords, identifiers, syntactical structures etc. * What types of tokens there are depends on the specific language, meaning JPlag does not enforce a specific token set. @@ -20,14 +23,7 @@ public class Token { private int length; private File file; private TokenType type; - - /** - * Creates a token of type {@link SharedTokenType#FILE_END FILE_END} without information about line, column, and length. - * @param file is the name of the source code file. - */ - public static Token fileEnd(File file) { - return new Token(SharedTokenType.FILE_END, file, NO_VALUE, NO_VALUE, NO_VALUE); - } + private TokenSemantics semantics; // value null if no semantics /** * Creates a token with column and length information. @@ -49,6 +45,39 @@ public Token(TokenType type, File file, int line, int column, int length) { this.line = line; this.column = column; this.length = length; + this.semantics = null; + } + + /** + * Creates a token with column, length and semantic information. + * @param type is the token type. + * @param file is the name of the source code file. + * @param line is the line index in the source code where the token resides. Index is 1-based. + * @param column is the column index, meaning where the token starts in the line. Index is 1-based. + * @param length is the length of the token in the source code. + * @param semantics is a record containing semantic information about the token. + */ + public Token(TokenType type, File file, int line, int column, int length, TokenSemantics semantics) { + this(type, file, line, column, length); + this.semantics = semantics; + } + + /** + * Creates a token of type {@link SharedTokenType#FILE_END FILE_END} without information about line, column, and length. + * @param file is the name of the source code file. + */ + public static Token fileEnd(File file) { + return new Token(SharedTokenType.FILE_END, file, NO_VALUE, NO_VALUE, NO_VALUE); + } + + /** + * Creates a token of type {@link SharedTokenType#FILE_END FILE_END} without information about line, column, and length, + * but with semantic information. + * @param file is the name of the source code file. + */ + public static Token semanticFileEnd(File file) { + TokenSemantics semantics = new TokenSemanticsBuilder().control().critical().build(); + return new Token(SharedTokenType.FILE_END, file, NO_VALUE, NO_VALUE, NO_VALUE, semantics); } /** @@ -88,4 +117,11 @@ public int getLine() { public TokenType getType() { return type; } + + /** + * @return the semantics of the token. + */ + public TokenSemantics getSemantics() { + return semantics; + } } diff --git a/language-api/src/main/java/de/jplag/semantics/SemanticToken.java b/language-api/src/main/java/de/jplag/semantics/SemanticToken.java deleted file mode 100644 index c3c4e6823..000000000 --- a/language-api/src/main/java/de/jplag/semantics/SemanticToken.java +++ /dev/null @@ -1,38 +0,0 @@ -package de.jplag.semantics; - -import java.io.File; - -import de.jplag.SharedTokenType; -import de.jplag.Token; -import de.jplag.TokenType; - -public class SemanticToken extends Token { - - private TokenSemantics semantics; - - /** - * @return a record containing semantic information about the token. - */ - public TokenSemantics semantics() { - return semantics; - } - - /** - * Creates a token with column, length and semantic information. - * @param type is the token type. - * @param file is the name of the source code file. - * @param line is the line index in the source code where the token resides. Index is 1-based. - * @param column is the column index, meaning where the token starts in the line. Index is 1-based. - * @param length is the length of the token in the source code. - * @param semantics is a record containing semantic information about the token. - */ - public SemanticToken(TokenType type, File file, int line, int column, int length, TokenSemantics semantics) { - super(type, file, line, column, length); - this.semantics = semantics; - } - - public static SemanticToken fileEnd(File file) { - TokenSemantics semantics = new TokenSemanticsBuilder().control().critical().build(); // todo - return new SemanticToken(SharedTokenType.FILE_END, file, NO_VALUE, NO_VALUE, NO_VALUE, semantics); - } -} diff --git a/languages/java/src/main/java/de/jplag/java/JavacAdapter.java b/languages/java/src/main/java/de/jplag/java/JavacAdapter.java index bd7c4954c..54668026c 100644 --- a/languages/java/src/main/java/de/jplag/java/JavacAdapter.java +++ b/languages/java/src/main/java/de/jplag/java/JavacAdapter.java @@ -19,7 +19,7 @@ import org.slf4j.Logger; import de.jplag.ParsingException; -import de.jplag.semantics.SemanticToken; +import de.jplag.Token; import com.sun.source.tree.CompilationUnitTree; import com.sun.source.tree.LineMap; @@ -49,7 +49,7 @@ public void parseFiles(Set files, final Parser parser) throws ParsingExcep var scanner = new TokenGeneratingTreeScanner(file, parser, map, positions, ast); ast.accept(scanner, null); parsingExceptions.addAll(scanner.getParsingExceptions()); - parser.add(SemanticToken.fileEnd(file)); + parser.add(Token.semanticFileEnd(file)); } } catch (IOException exception) { throw new ParsingException(null, exception.getMessage(), exception); diff --git a/languages/java/src/main/java/de/jplag/java/Language.java b/languages/java/src/main/java/de/jplag/java/Language.java index f3b13589b..fd0d2a577 100644 --- a/languages/java/src/main/java/de/jplag/java/Language.java +++ b/languages/java/src/main/java/de/jplag/java/Language.java @@ -44,6 +44,6 @@ public int minimumTokenMatch() { @Override public List parse(Set files) throws ParsingException { - return this.parser.parse(files).stream().map(Token.class::cast).toList(); // todo + return this.parser.parse(files); } } diff --git a/languages/java/src/main/java/de/jplag/java/Parser.java b/languages/java/src/main/java/de/jplag/java/Parser.java index 231dcad59..eecc3ae05 100644 --- a/languages/java/src/main/java/de/jplag/java/Parser.java +++ b/languages/java/src/main/java/de/jplag/java/Parser.java @@ -7,10 +7,10 @@ import de.jplag.AbstractParser; import de.jplag.ParsingException; -import de.jplag.semantics.SemanticToken; +import de.jplag.Token; public class Parser extends AbstractParser { - private List tokens; + private List tokens; /** * Creates the parser. @@ -19,20 +19,20 @@ public Parser() { super(); } - public List parse(Set files) throws ParsingException { + public List parse(Set files) throws ParsingException { tokens = new ArrayList<>(); new JavacAdapter().parseFiles(files, this); - // print(); + // printSemantics(); return tokens; } - public void add(SemanticToken token) { + public void add(Token token) { tokens.add(token); } - public void print() { + public void printSemantics() { long currentLine = 0; - for (SemanticToken t : tokens) { + for (Token t : tokens) { if (t.getLine() != currentLine) { currentLine = t.getLine(); System.out.println(); @@ -40,7 +40,7 @@ public void print() { } System.out.print(t.getType().getDescription()); System.out.print(" | "); - System.out.println(t.semantics()); + System.out.println(t.getSemantics()); } } } diff --git a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java index 08c4bb07c..f4e4f46e6 100644 --- a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java +++ b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java @@ -13,8 +13,8 @@ import javax.lang.model.element.Name; import de.jplag.ParsingException; +import de.jplag.Token; import de.jplag.TokenType; -import de.jplag.semantics.SemanticToken; import de.jplag.semantics.TokenSemantics; import de.jplag.semantics.TokenSemanticsBuilder; import de.jplag.semantics.Variable; @@ -84,8 +84,8 @@ final class TokenGeneratingTreeScanner extends TreeScanner private static final Set IMMUTABLES = Set.of( // from https://medium.com/@bpnorlander/java-understanding-primitive-types-and-wrapper-objects-a6798fb2afe9 - "byte", "short", "int", "long", "float", "double", "boolean", "char", "Byte", "Short", "Integer", "Long", "Float", "Double", "Boolean", - "Character", "String"); + "byte", "short", "int", "long", "float", "double", "boolean", "char", // + "Byte", "Short", "Integer", "Long", "Float", "Double", "Boolean", "Character", "String"); enum NextOperation { NONE, @@ -113,7 +113,7 @@ public List getParsingExceptions() { } public void addToken(TokenType type, File file, long line, long column, long length, TokenSemantics semantics) { - parser.add(new SemanticToken(type, file, (int) line, (int) column, (int) length, semantics)); + parser.add(new Token(type, file, (int) line, (int) column, (int) length, semantics)); } /** From a74dad20040c8a0b600665f2b75983dd4fb9f2a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Tue, 20 Dec 2022 17:41:58 +0100 Subject: [PATCH 012/132] Add normalization skeleton --- core/src/main/java/de/jplag/JPlag.java | 6 ++++++ core/src/main/java/de/jplag/Submission.java | 6 ++++++ core/src/main/java/de/jplag/SubmissionSet.java | 4 ++++ .../java/de/jplag/normalization/Normalizer.java | 17 +++++++++++++++++ .../src/main/java/de/jplag/Language.java | 8 ++++++++ .../src/main/java/de/jplag/java/Language.java | 5 +++++ 6 files changed, 46 insertions(+) create mode 100644 core/src/main/java/de/jplag/normalization/Normalizer.java diff --git a/core/src/main/java/de/jplag/JPlag.java b/core/src/main/java/de/jplag/JPlag.java index 137e6b11c..af645f35d 100644 --- a/core/src/main/java/de/jplag/JPlag.java +++ b/core/src/main/java/de/jplag/JPlag.java @@ -59,6 +59,12 @@ public JPlagResult run() throws ExitException { throw new SubmissionException("Not enough valid submissions! (found " + submissionCount + " valid submissions)"); } + // better solution long-term: pull this into submissionSet constructor, option for normalization, can only be true if + // language supports it + if (language.tokensHaveSemantics()) { + submissionSet.normalizeSubmissions(); + } + // Compare valid submissions. JPlagResult result = comparisonStrategy.compareSubmissions(submissionSet); if (logger.isInfoEnabled()) diff --git a/core/src/main/java/de/jplag/Submission.java b/core/src/main/java/de/jplag/Submission.java index 576af0fbd..1910cef2a 100644 --- a/core/src/main/java/de/jplag/Submission.java +++ b/core/src/main/java/de/jplag/Submission.java @@ -13,6 +13,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import de.jplag.normalization.Normalizer; + /** * Represents a single submission. A submission can contain multiple files. */ @@ -265,4 +267,8 @@ private static File createErrorDirectory(String... subdirectoryNames) { } return true; } + + void normalize() { + tokenList = Normalizer.normalize(tokenList); + } } diff --git a/core/src/main/java/de/jplag/SubmissionSet.java b/core/src/main/java/de/jplag/SubmissionSet.java index 5fd637324..5fac01093 100644 --- a/core/src/main/java/de/jplag/SubmissionSet.java +++ b/core/src/main/java/de/jplag/SubmissionSet.java @@ -90,6 +90,10 @@ public List getInvalidSubmissions() { return invalidSubmissions; } + public void normalizeSubmissions() { + submissions.forEach(Submission::normalize); + } + private List filterValidSubmissions() { return allSubmissions.stream().filter(submission -> !submission.hasErrors()).collect(Collectors.toCollection(ArrayList::new)); } diff --git a/core/src/main/java/de/jplag/normalization/Normalizer.java b/core/src/main/java/de/jplag/normalization/Normalizer.java new file mode 100644 index 000000000..703f45035 --- /dev/null +++ b/core/src/main/java/de/jplag/normalization/Normalizer.java @@ -0,0 +1,17 @@ +package de.jplag.normalization; + +import java.util.List; + +import de.jplag.Token; + +public class Normalizer { + + private Normalizer() { + } + + public static List normalize(List tokens) { + // Graph graph = constructGraph(tokens); + // return linearizeGraph(graph); + return tokens; + } +} diff --git a/language-api/src/main/java/de/jplag/Language.java b/language-api/src/main/java/de/jplag/Language.java index 4f4d94d78..892e30a8d 100644 --- a/language-api/src/main/java/de/jplag/Language.java +++ b/language-api/src/main/java/de/jplag/Language.java @@ -37,6 +37,14 @@ public interface Language { */ List parse(Set files) throws ParsingException; + /** + * Indicates whether the tokens returned by parse have semantic information added to them, i.e. whether the token + * attribute semantics is null or not. + */ + default boolean tokensHaveSemantics() { + return false; + } + /** * Determines whether a fixed-width font should be used to display that language. */ diff --git a/languages/java/src/main/java/de/jplag/java/Language.java b/languages/java/src/main/java/de/jplag/java/Language.java index fd0d2a577..86d49fbed 100644 --- a/languages/java/src/main/java/de/jplag/java/Language.java +++ b/languages/java/src/main/java/de/jplag/java/Language.java @@ -46,4 +46,9 @@ public int minimumTokenMatch() { public List parse(Set files) throws ParsingException { return this.parser.parse(files); } + + @Override + public boolean tokensHaveSemantics() { + return true; + } } From 13cd78907ad39b2bab1804083b69a57ebbb6ecb0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Wed, 21 Dec 2022 18:21:36 +0100 Subject: [PATCH 013/132] Add normalization data structures --- .../de/jplag/normalization/Dependency.java | 32 ++++++++++ .../jplag/normalization/DependencyType.java | 6 ++ .../de/jplag/normalization/TokenGroup.java | 63 +++++++++++++++++++ .../src/main/java/de/jplag/Token.java | 4 ++ .../de/jplag/semantics/TokenSemantics.java | 19 ++++++ .../semantics/TokenSemanticsBuilder.java | 5 -- .../java/TokenGeneratingTreeScanner.java | 16 ++--- 7 files changed, 132 insertions(+), 13 deletions(-) create mode 100644 core/src/main/java/de/jplag/normalization/Dependency.java create mode 100644 core/src/main/java/de/jplag/normalization/DependencyType.java create mode 100644 core/src/main/java/de/jplag/normalization/TokenGroup.java diff --git a/core/src/main/java/de/jplag/normalization/Dependency.java b/core/src/main/java/de/jplag/normalization/Dependency.java new file mode 100644 index 000000000..be8fc4229 --- /dev/null +++ b/core/src/main/java/de/jplag/normalization/Dependency.java @@ -0,0 +1,32 @@ +package de.jplag.normalization; + +import java.util.Objects; + +import org.jgrapht.graph.DefaultEdge; + +import de.jplag.semantics.Variable; + +public class Dependency extends DefaultEdge { // for optimization + private final DependencyType type; + private final Variable cause; + + public Dependency(DependencyType type, Variable cause) { + this.type = type; + this.cause = cause; + } + + @Override + public boolean equals(Object o) { + if (this == o) + return true; + if (o == null || getClass() != o.getClass()) + return false; + Dependency that = (Dependency) o; + return type == that.type && Objects.equals(cause, that.cause); + } + + @Override + public int hashCode() { + return Objects.hash(type, cause, super.getSource(), super.getTarget()); + } +} diff --git a/core/src/main/java/de/jplag/normalization/DependencyType.java b/core/src/main/java/de/jplag/normalization/DependencyType.java new file mode 100644 index 000000000..af2b6c672 --- /dev/null +++ b/core/src/main/java/de/jplag/normalization/DependencyType.java @@ -0,0 +1,6 @@ +package de.jplag.normalization; + +public enum DependencyType { + DATA, + ORDER +} diff --git a/core/src/main/java/de/jplag/normalization/TokenGroup.java b/core/src/main/java/de/jplag/normalization/TokenGroup.java new file mode 100644 index 000000000..bbb993205 --- /dev/null +++ b/core/src/main/java/de/jplag/normalization/TokenGroup.java @@ -0,0 +1,63 @@ +package de.jplag.normalization; + +import java.util.Collections; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; + +import de.jplag.Token; +import de.jplag.semantics.TokenSemantics; + +public class TokenGroup implements Comparable { + + List tokens; + TokenSemantics joinedSemantics; + + public TokenGroup(List tokens) { + this.tokens = Collections.unmodifiableList(tokens); + this.joinedSemantics = TokenSemantics.join(tokens.stream().map(Token::getSemantics).toList()); + } + + public static List group(List tokens) { + List tokenGroups = new LinkedList<>(); + List groupTokens = new LinkedList<>(); + int currentLine = 0; + for (Token t : tokens) { + groupTokens.add(t); + if (t.getLine() != currentLine) { + currentLine = t.getLine(); + tokenGroups.add(new TokenGroup(new LinkedList<>(groupTokens))); + groupTokens.clear(); + } + } + return tokenGroups; + } + + public static List ungroup(List tokenGroups) { + return tokenGroups.stream().flatMap(tg -> tg.tokens.stream()).toList(); + } + + private int tokenOrdinal(Token token) { + return ((Enum) token.getType()).ordinal(); // reflects the order the enums were declared in + } + + @Override + public int compareTo(TokenGroup other) { + int sizeComp = Integer.compare(this.tokens.size(), other.tokens.size()); + if (sizeComp != 0) + return -sizeComp; // bigger size should come first + Iterator tokens = this.tokens.iterator(); + Iterator otherTokens = other.tokens.iterator(); + for (int i = 0; i < this.tokens.size(); i++) { + int tokenComp = Integer.compare(tokenOrdinal(tokens.next()), tokenOrdinal(otherTokens.next())); + if (tokenComp != 0) + return tokenComp; + } + return 0; + } + + @Override + public String toString() { + return String.join(" ", tokens.stream().map(Token::toString).toList()); + } +} diff --git a/language-api/src/main/java/de/jplag/Token.java b/language-api/src/main/java/de/jplag/Token.java index 7eceb696c..36f82d0a8 100644 --- a/language-api/src/main/java/de/jplag/Token.java +++ b/language-api/src/main/java/de/jplag/Token.java @@ -124,4 +124,8 @@ public TokenType getType() { public TokenSemantics getSemantics() { return semantics; } + + public String toString() { + return type.getDescription(); + } } diff --git a/language-api/src/main/java/de/jplag/semantics/TokenSemantics.java b/language-api/src/main/java/de/jplag/semantics/TokenSemantics.java index 6ad5baf53..462aa44dd 100644 --- a/language-api/src/main/java/de/jplag/semantics/TokenSemantics.java +++ b/language-api/src/main/java/de/jplag/semantics/TokenSemantics.java @@ -38,6 +38,25 @@ public Set writes() { return Collections.unmodifiableSet(writes); } + public static TokenSemantics join(List semanticsList) { + TokenSemanticsBuilder semanticsBuilder = new TokenSemanticsBuilder(); + for (TokenSemantics semantics : semanticsList) { + if (semantics.critical) + semanticsBuilder.critical(); + if (semantics.control) + semanticsBuilder.control(); + if (semantics.loopBegin) + semanticsBuilder.loopBegin(); + if (semantics.loopEnd) + semanticsBuilder.loopEnd(); + for (Variable r : semantics.reads) + semantics.addRead(r); + for (Variable w : semantics.writes) + semantics.addWrite(w); + } + return semanticsBuilder.build(); + } + @Override public String toString() { List properties = new LinkedList<>(); diff --git a/language-api/src/main/java/de/jplag/semantics/TokenSemanticsBuilder.java b/language-api/src/main/java/de/jplag/semantics/TokenSemanticsBuilder.java index 1d5e0184f..b53f7bbe0 100644 --- a/language-api/src/main/java/de/jplag/semantics/TokenSemanticsBuilder.java +++ b/language-api/src/main/java/de/jplag/semantics/TokenSemanticsBuilder.java @@ -20,9 +20,6 @@ public TokenSemanticsBuilder() { } public TokenSemantics build() { - if (loopBegin && loopEnd) { - throw new IllegalStateException("Token can't mark both the beginning and end of a loop"); - } return new TokenSemantics(critical, control, loopBegin, loopEnd, reads, writes); } @@ -38,13 +35,11 @@ public TokenSemanticsBuilder control() { public TokenSemanticsBuilder loopBegin() { this.loopBegin = true; - this.control = true; return this; } public TokenSemanticsBuilder loopEnd() { this.loopEnd = true; - this.control = true; return this; } } diff --git a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java index f4e4f46e6..e3287c76b 100644 --- a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java +++ b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java @@ -309,10 +309,10 @@ public Void visitSynchronized(SynchronizedTree node, TokenSemantics semantics) { public Void visitDoWhileLoop(DoWhileLoopTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - semantics = new TokenSemanticsBuilder().loopBegin().build(); + semantics = new TokenSemanticsBuilder().control().loopBegin().build(); addToken(JavaTokenType.J_DO_BEGIN, start, 2, semantics); scan(node.getStatement(), null); - semantics = new TokenSemanticsBuilder().loopEnd().build(); + semantics = new TokenSemanticsBuilder().control().loopEnd().build(); addToken(JavaTokenType.J_DO_END, end, 1, semantics); scan(node.getCondition(), semantics); return null; @@ -322,11 +322,11 @@ public Void visitDoWhileLoop(DoWhileLoopTree node, TokenSemantics semantics) { public Void visitWhileLoop(WhileLoopTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - semantics = new TokenSemanticsBuilder().loopBegin().build(); + semantics = new TokenSemanticsBuilder().control().loopBegin().build(); addToken(JavaTokenType.J_WHILE_BEGIN, start, 5, semantics); scan(node.getCondition(), semantics); scan(node.getStatement(), null); - semantics = new TokenSemanticsBuilder().loopEnd().build(); + semantics = new TokenSemanticsBuilder().control().loopEnd().build(); addToken(JavaTokenType.J_WHILE_END, end, 1, semantics); return null; } @@ -336,13 +336,13 @@ public Void visitForLoop(ForLoopTree node, TokenSemantics semantics) { enterLocalScope(); long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - semantics = new TokenSemanticsBuilder().loopBegin().build(); + semantics = new TokenSemanticsBuilder().control().loopBegin().build(); addToken(JavaTokenType.J_FOR_BEGIN, start, 3, semantics); scan(node.getInitializer(), semantics); scan(node.getCondition(), semantics); scan(node.getUpdate(), semantics); scan(node.getStatement(), null); - semantics = new TokenSemanticsBuilder().loopEnd().build(); + semantics = new TokenSemanticsBuilder().control().loopEnd().build(); addToken(JavaTokenType.J_FOR_END, end, 1, semantics); exitLocalScope(); return null; @@ -353,12 +353,12 @@ public Void visitEnhancedForLoop(EnhancedForLoopTree node, TokenSemantics semant enterLocalScope(); long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - semantics = new TokenSemanticsBuilder().loopBegin().build(); + semantics = new TokenSemanticsBuilder().control().loopBegin().build(); addToken(JavaTokenType.J_FOR_BEGIN, start, 3, semantics); scan(node.getVariable(), semantics); scan(node.getExpression(), semantics); scan(node.getStatement(), null); - semantics = new TokenSemanticsBuilder().loopEnd().build(); + semantics = new TokenSemanticsBuilder().control().loopEnd().build(); addToken(JavaTokenType.J_FOR_END, end, 1, semantics); exitLocalScope(); return null; From e059ed0e4a3a3b33c216186eabcac032d298216f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Wed, 21 Dec 2022 18:21:44 +0100 Subject: [PATCH 014/132] Add normalization dummy implementation --- core/pom.xml | 5 ++ .../de/jplag/normalization/Normalizer.java | 50 +++++++++++++++++-- 2 files changed, 52 insertions(+), 3 deletions(-) diff --git a/core/pom.xml b/core/pom.xml index f3ca51810..def3a8024 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -11,6 +11,11 @@ jplag + + org.jgrapht + jgrapht-core + 1.5.1 + com.fasterxml.jackson.core jackson-databind diff --git a/core/src/main/java/de/jplag/normalization/Normalizer.java b/core/src/main/java/de/jplag/normalization/Normalizer.java index 703f45035..d45dc5677 100644 --- a/core/src/main/java/de/jplag/normalization/Normalizer.java +++ b/core/src/main/java/de/jplag/normalization/Normalizer.java @@ -1,6 +1,12 @@ package de.jplag.normalization; +import java.util.LinkedList; import java.util.List; +import java.util.PriorityQueue; +import java.util.stream.Collectors; + +import org.jgrapht.Graphs; +import org.jgrapht.graph.DirectedMultigraph; import de.jplag.Token; @@ -10,8 +16,46 @@ private Normalizer() { } public static List normalize(List tokens) { - // Graph graph = constructGraph(tokens); - // return linearizeGraph(graph); - return tokens; + List tokenGroups = TokenGroup.group(tokens); + List originalTokenGroups = new LinkedList<>(tokenGroups); + DirectedMultigraph graph = constructGraph(tokenGroups); + tokenGroups = linearizeGraph(graph); + assert tokenGroups.equals(originalTokenGroups); + return TokenGroup.ungroup(tokenGroups); + } + + private static DirectedMultigraph constructGraph(List tokenGroups) { + DirectedMultigraph graph = new DirectedMultigraph<>(Dependency.class); + TokenGroup startGroup = tokenGroups.remove(0); + graph.addVertex(startGroup); + for (TokenGroup endGroup : tokenGroups) { + graph.addVertex(endGroup); + graph.addEdge(startGroup, endGroup, new Dependency(DependencyType.DATA, null)); + startGroup = endGroup; + } + return graph; + } + + private static List linearizeGraph(DirectedMultigraph graph) { + PriorityQueue roots = graph.vertexSet().stream() // + .filter(v -> !Graphs.vertexHasPredecessors(graph, v)) // + .collect(Collectors.toCollection(PriorityQueue::new)); + List tokenGroups = new LinkedList<>(); + while (!roots.isEmpty()) { + PriorityQueue newRoots = new PriorityQueue<>(); + do { + TokenGroup group = roots.poll(); + tokenGroups.add(group); + for (TokenGroup successorGroup : Graphs.successorListOf(graph, group)) { + graph.removeAllEdges(group, successorGroup); + if (!Graphs.vertexHasPredecessors(graph, successorGroup)) { + newRoots.add(successorGroup); + } + } + } while (!roots.isEmpty()); + roots = newRoots; + } + assert tokenGroups.size() == graph.vertexSet().size(); + return tokenGroups; } } From e59cc19fa4d01c633481dc47277262f10540ce52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Fri, 23 Dec 2022 01:25:34 +0100 Subject: [PATCH 015/132] Add normalization graph construction --- .../de/jplag/normalization/Dependency.java | 32 ++---- .../jplag/normalization/DependencyItem.java | 6 ++ .../jplag/normalization/DependencyType.java | 3 +- .../jplag/normalization/GraphConstructor.java | 98 +++++++++++++++++++ .../de/jplag/normalization/Normalizer.java | 22 +---- .../de/jplag/normalization/TokenGroup.java | 9 +- .../java/de/jplag/semantics/Variable.java | 5 - .../java/TokenGeneratingTreeScanner.java | 3 + 8 files changed, 128 insertions(+), 50 deletions(-) create mode 100644 core/src/main/java/de/jplag/normalization/DependencyItem.java create mode 100644 core/src/main/java/de/jplag/normalization/GraphConstructor.java diff --git a/core/src/main/java/de/jplag/normalization/Dependency.java b/core/src/main/java/de/jplag/normalization/Dependency.java index be8fc4229..4fc50426d 100644 --- a/core/src/main/java/de/jplag/normalization/Dependency.java +++ b/core/src/main/java/de/jplag/normalization/Dependency.java @@ -1,32 +1,20 @@ package de.jplag.normalization; -import java.util.Objects; - -import org.jgrapht.graph.DefaultEdge; +import java.util.HashSet; +import java.util.Set; import de.jplag.semantics.Variable; -public class Dependency extends DefaultEdge { // for optimization - private final DependencyType type; - private final Variable cause; - - public Dependency(DependencyType type, Variable cause) { - this.type = type; - this.cause = cause; - } +// only purpose is debugging/explainability, edges could be anonymous otherwise +// not a record because JGraphT wants unique edges and we don't... +public class Dependency { + private Set items; - @Override - public boolean equals(Object o) { - if (this == o) - return true; - if (o == null || getClass() != o.getClass()) - return false; - Dependency that = (Dependency) o; - return type == that.type && Objects.equals(cause, that.cause); + public Dependency() { + items = new HashSet<>(); } - @Override - public int hashCode() { - return Objects.hash(type, cause, super.getSource(), super.getTarget()); + public void addItem(DependencyType type, Variable cause) { + items.add(new DependencyItem(type, cause)); } } diff --git a/core/src/main/java/de/jplag/normalization/DependencyItem.java b/core/src/main/java/de/jplag/normalization/DependencyItem.java new file mode 100644 index 000000000..aefca06ac --- /dev/null +++ b/core/src/main/java/de/jplag/normalization/DependencyItem.java @@ -0,0 +1,6 @@ +package de.jplag.normalization; + +import de.jplag.semantics.Variable; + +public record DependencyItem(DependencyType type, Variable cause) { +} diff --git a/core/src/main/java/de/jplag/normalization/DependencyType.java b/core/src/main/java/de/jplag/normalization/DependencyType.java index af2b6c672..9109c84c8 100644 --- a/core/src/main/java/de/jplag/normalization/DependencyType.java +++ b/core/src/main/java/de/jplag/normalization/DependencyType.java @@ -2,5 +2,6 @@ public enum DependencyType { DATA, - ORDER + ORDER, + CONTROL } diff --git a/core/src/main/java/de/jplag/normalization/GraphConstructor.java b/core/src/main/java/de/jplag/normalization/GraphConstructor.java new file mode 100644 index 000000000..91246eb34 --- /dev/null +++ b/core/src/main/java/de/jplag/normalization/GraphConstructor.java @@ -0,0 +1,98 @@ +package de.jplag.normalization; + +import java.util.Collection; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; + +import org.jgrapht.graph.SimpleDirectedGraph; + +import de.jplag.semantics.Variable; + +public class GraphConstructor { + private SimpleDirectedGraph graph; + private int loopCount; + private Collection controlAffected; + private TokenGroup lastControl; + private Map> variableReads; + private Map> variableWrites; + private TokenGroup current; + + public GraphConstructor(List tokenGroups) { + graph = new SimpleDirectedGraph<>(Dependency.class); + loopCount = 0; + controlAffected = new LinkedList<>(); + lastControl = null; + variableReads = new HashMap<>(); + variableWrites = new HashMap<>(); + for (TokenGroup current : tokenGroups) { + graph.addVertex(current); + this.current = current; + processLoops(); + processControl(); + processReads(); + processWrites(); + } + } + + public SimpleDirectedGraph get() { + return graph; + } + + private void processLoops() { + if (current.semantics.loopBegin()) + loopCount++; + if (current.semantics.loopEnd()) + loopCount--; + } + + private void processControl() { + if (current.semantics.control()) { + addCurrentEdges(controlAffected, DependencyType.CONTROL, null); + controlAffected.clear(); + lastControl = current; + } else { + addCurrentEdge(lastControl, DependencyType.CONTROL, null); + } + controlAffected.add(current); + } + + private void processReads() { + for (Variable r : current.semantics.reads()) { + addCurrentEdgesVar(DependencyType.DATA, r, variableWrites); + addVarToMap(r, variableReads); + } + } + + private void processWrites() { + DependencyType writeToReadDependencyType = loopCount > 0 ? DependencyType.DATA : DependencyType.ORDER; + for (Variable w : current.semantics.writes()) { + addCurrentEdgesVar(DependencyType.ORDER, w, variableWrites); + addCurrentEdgesVar(writeToReadDependencyType, w, variableReads); + addVarToMap(w, variableWrites); + } + } + + private void addCurrentEdgesVar(DependencyType type, Variable var, Map> varMap) { + addCurrentEdges(varMap.getOrDefault(var, new LinkedList<>()), type, var); + } + + private void addCurrentEdges(Collection starts, DependencyType type, Variable cause) { + starts.forEach(s -> addCurrentEdge(s, type, cause)); + } + + private void addCurrentEdge(TokenGroup start, DependencyType type, Variable cause) { + Dependency dependency = graph.getEdge(start, current); + if (dependency == null) { + dependency = new Dependency(); + graph.addEdge(start, current, dependency); + } + dependency.addItem(type, cause); + } + + private void addVarToMap(Variable var, Map> varMap) { + varMap.putIfAbsent(var, new LinkedList<>()); + varMap.get(var).add(current); + } +} diff --git a/core/src/main/java/de/jplag/normalization/Normalizer.java b/core/src/main/java/de/jplag/normalization/Normalizer.java index d45dc5677..298a6fc5d 100644 --- a/core/src/main/java/de/jplag/normalization/Normalizer.java +++ b/core/src/main/java/de/jplag/normalization/Normalizer.java @@ -6,7 +6,7 @@ import java.util.stream.Collectors; import org.jgrapht.Graphs; -import org.jgrapht.graph.DirectedMultigraph; +import org.jgrapht.graph.SimpleDirectedGraph; import de.jplag.Token; @@ -17,26 +17,12 @@ private Normalizer() { public static List normalize(List tokens) { List tokenGroups = TokenGroup.group(tokens); - List originalTokenGroups = new LinkedList<>(tokenGroups); - DirectedMultigraph graph = constructGraph(tokenGroups); + SimpleDirectedGraph graph = new GraphConstructor(tokenGroups).get(); tokenGroups = linearizeGraph(graph); - assert tokenGroups.equals(originalTokenGroups); return TokenGroup.ungroup(tokenGroups); } - private static DirectedMultigraph constructGraph(List tokenGroups) { - DirectedMultigraph graph = new DirectedMultigraph<>(Dependency.class); - TokenGroup startGroup = tokenGroups.remove(0); - graph.addVertex(startGroup); - for (TokenGroup endGroup : tokenGroups) { - graph.addVertex(endGroup); - graph.addEdge(startGroup, endGroup, new Dependency(DependencyType.DATA, null)); - startGroup = endGroup; - } - return graph; - } - - private static List linearizeGraph(DirectedMultigraph graph) { + private static List linearizeGraph(SimpleDirectedGraph graph) { PriorityQueue roots = graph.vertexSet().stream() // .filter(v -> !Graphs.vertexHasPredecessors(graph, v)) // .collect(Collectors.toCollection(PriorityQueue::new)); @@ -47,7 +33,7 @@ private static List linearizeGraph(DirectedMultigraph { List tokens; - TokenSemantics joinedSemantics; + TokenSemantics semantics; public TokenGroup(List tokens) { this.tokens = Collections.unmodifiableList(tokens); - this.joinedSemantics = TokenSemantics.join(tokens.stream().map(Token::getSemantics).toList()); + this.semantics = TokenSemantics.join(tokens.stream().map(Token::getSemantics).toList()); } public static List group(List tokens) { List tokenGroups = new LinkedList<>(); List groupTokens = new LinkedList<>(); - int currentLine = 0; + int currentLine = tokens.get(0).getLine(); for (Token t : tokens) { - groupTokens.add(t); if (t.getLine() != currentLine) { currentLine = t.getLine(); tokenGroups.add(new TokenGroup(new LinkedList<>(groupTokens))); groupTokens.clear(); } + groupTokens.add(t); } + tokenGroups.add(new TokenGroup(new LinkedList<>(groupTokens))); return tokenGroups; } diff --git a/language-api/src/main/java/de/jplag/semantics/Variable.java b/language-api/src/main/java/de/jplag/semantics/Variable.java index a9e87a419..11b196896 100644 --- a/language-api/src/main/java/de/jplag/semantics/Variable.java +++ b/language-api/src/main/java/de/jplag/semantics/Variable.java @@ -12,9 +12,4 @@ public Variable(Name name) { public String toString() { return name + "[" + id + "]"; } - - @Override - public int hashCode() { - return id.hashCode(); - } } diff --git a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java index e3287c76b..a8fc1bc84 100644 --- a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java +++ b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java @@ -623,6 +623,9 @@ public Void visitConditionalExpression(ConditionalExpressionTree node, TokenSema public Void visitMethodInvocation(MethodInvocationTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); semantics = new TokenSemanticsBuilder().critical().control().build(); + for (Variable mv : memberVariables.values()) { + semantics.addRead(mv); + } addToken(JavaTokenType.J_APPLY, start, positions.getEndPosition(ast, node.getMethodSelect()) - start, semantics); scan(node.getTypeArguments(), semantics); // to differentiate bar() and this.bar() (ignore) from bar.foo() (don't ignore), different namespace for variables and From fdcd1946d6a6f67056df37ca914f043e5e83ef42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Fri, 23 Dec 2022 23:38:24 +0100 Subject: [PATCH 016/132] Fix bugs --- .../de/jplag/normalization/TokenGroup.java | 38 +++++++++++++++---- .../de/jplag/semantics/TokenSemantics.java | 16 +++++--- 2 files changed, 42 insertions(+), 12 deletions(-) diff --git a/core/src/main/java/de/jplag/normalization/TokenGroup.java b/core/src/main/java/de/jplag/normalization/TokenGroup.java index 11099e3b8..ab26e7773 100644 --- a/core/src/main/java/de/jplag/normalization/TokenGroup.java +++ b/core/src/main/java/de/jplag/normalization/TokenGroup.java @@ -10,12 +10,36 @@ public class TokenGroup implements Comparable { - List tokens; - TokenSemantics semantics; + private List tokens; + private int line; + private TokenSemantics semantics; + private boolean keep; - public TokenGroup(List tokens) { + public TokenGroup(List tokens, int line) { this.tokens = Collections.unmodifiableList(tokens); + this.line = line; this.semantics = TokenSemantics.join(tokens.stream().map(Token::getSemantics).toList()); + this.keep = semantics.critical() || semantics.control(); + } + + public List tokens() { + return tokens; + } + + public int line() { + return line; + } + + public TokenSemantics semantics() { + return semantics; + } + + public boolean keep() { + return keep; + } + + public void markKeep() { + keep = true; } public static List group(List tokens) { @@ -24,13 +48,13 @@ public static List group(List tokens) { int currentLine = tokens.get(0).getLine(); for (Token t : tokens) { if (t.getLine() != currentLine) { - currentLine = t.getLine(); - tokenGroups.add(new TokenGroup(new LinkedList<>(groupTokens))); + tokenGroups.add(new TokenGroup(new LinkedList<>(groupTokens), currentLine)); groupTokens.clear(); + currentLine = t.getLine(); } groupTokens.add(t); } - tokenGroups.add(new TokenGroup(new LinkedList<>(groupTokens))); + tokenGroups.add(new TokenGroup(new LinkedList<>(groupTokens), currentLine)); return tokenGroups; } @@ -59,6 +83,6 @@ public int compareTo(TokenGroup other) { @Override public String toString() { - return String.join(" ", tokens.stream().map(Token::toString).toList()); + return line + ": " + String.join(" ", tokens.stream().map(Token::toString).toList()); } } diff --git a/language-api/src/main/java/de/jplag/semantics/TokenSemantics.java b/language-api/src/main/java/de/jplag/semantics/TokenSemantics.java index 462aa44dd..4e3d7fc5a 100644 --- a/language-api/src/main/java/de/jplag/semantics/TokenSemantics.java +++ b/language-api/src/main/java/de/jplag/semantics/TokenSemantics.java @@ -1,6 +1,7 @@ package de.jplag.semantics; import java.util.Collections; +import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Set; @@ -39,6 +40,8 @@ public Set writes() { } public static TokenSemantics join(List semanticsList) { + Set reads = new HashSet<>(); + Set writes = new HashSet<>(); TokenSemanticsBuilder semanticsBuilder = new TokenSemanticsBuilder(); for (TokenSemantics semantics : semanticsList) { if (semantics.critical) @@ -49,12 +52,15 @@ public static TokenSemantics join(List semanticsList) { semanticsBuilder.loopBegin(); if (semantics.loopEnd) semanticsBuilder.loopEnd(); - for (Variable r : semantics.reads) - semantics.addRead(r); - for (Variable w : semantics.writes) - semantics.addWrite(w); + reads.addAll(semantics.reads); + writes.addAll(semantics.writes); } - return semanticsBuilder.build(); + TokenSemantics semantics = semanticsBuilder.build(); + for (Variable r : reads) + semantics.addRead(r); + for (Variable w : writes) + semantics.addWrite(w); + return semantics; } @Override From 0b6eac4cd2fb822281d78f8cfa612f4afa04f741 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Fri, 23 Dec 2022 23:38:41 +0100 Subject: [PATCH 017/132] Add normalization graph linearization --- .../de/jplag/normalization/Dependency.java | 16 ++++- .../jplag/normalization/DependencyType.java | 4 +- .../normalization/NormalizationGraph.java | 67 +++++++++++++++++++ ...ava => NormalizationGraphConstructor.java} | 31 ++++++--- .../de/jplag/normalization/Normalizer.java | 33 +-------- .../java/TokenGeneratingTreeScanner.java | 15 +++-- 6 files changed, 117 insertions(+), 49 deletions(-) create mode 100644 core/src/main/java/de/jplag/normalization/NormalizationGraph.java rename core/src/main/java/de/jplag/normalization/{GraphConstructor.java => NormalizationGraphConstructor.java} (75%) diff --git a/core/src/main/java/de/jplag/normalization/Dependency.java b/core/src/main/java/de/jplag/normalization/Dependency.java index 4fc50426d..a869a0819 100644 --- a/core/src/main/java/de/jplag/normalization/Dependency.java +++ b/core/src/main/java/de/jplag/normalization/Dependency.java @@ -5,16 +5,30 @@ import de.jplag.semantics.Variable; -// only purpose is debugging/explainability, edges could be anonymous otherwise // not a record because JGraphT wants unique edges and we don't... public class Dependency { private Set items; + private boolean isData; + private boolean isDataThroughLoop; public Dependency() { items = new HashSet<>(); + isData = false; + } + + public boolean isData() { + return isData; + } + + public boolean isDataThroughLoop() { + return isDataThroughLoop; } public void addItem(DependencyType type, Variable cause) { + if (type == DependencyType.DATA) + isData = true; + if (type == DependencyType.DATA_THROUGH_LOOP) + isDataThroughLoop = true; items.add(new DependencyItem(type, cause)); } } diff --git a/core/src/main/java/de/jplag/normalization/DependencyType.java b/core/src/main/java/de/jplag/normalization/DependencyType.java index 9109c84c8..227d1fd67 100644 --- a/core/src/main/java/de/jplag/normalization/DependencyType.java +++ b/core/src/main/java/de/jplag/normalization/DependencyType.java @@ -2,6 +2,8 @@ public enum DependencyType { DATA, + DATA_THROUGH_LOOP, ORDER, - CONTROL + CONTROL, + CRITICAL } diff --git a/core/src/main/java/de/jplag/normalization/NormalizationGraph.java b/core/src/main/java/de/jplag/normalization/NormalizationGraph.java new file mode 100644 index 000000000..e7b4cbabd --- /dev/null +++ b/core/src/main/java/de/jplag/normalization/NormalizationGraph.java @@ -0,0 +1,67 @@ +package de.jplag.normalization; + +import java.util.Deque; +import java.util.LinkedList; +import java.util.List; +import java.util.PriorityQueue; +import java.util.stream.Collectors; + +import org.jgrapht.Graphs; +import org.jgrapht.graph.SimpleDirectedGraph; + +public class NormalizationGraph { + private SimpleDirectedGraph graph; + + public NormalizationGraph(List tokenGroups) { + graph = new NormalizationGraphConstructor(tokenGroups).get(); + } + + public List linearize() { + spreadKeep(); + PriorityQueue roots = graph.vertexSet().stream() // + .filter(v -> !Graphs.vertexHasPredecessors(graph, v)) // + .collect(Collectors.toCollection(PriorityQueue::new)); + List tokenGroups = new LinkedList<>(); + while (!roots.isEmpty()) { + PriorityQueue newRoots = new PriorityQueue<>(); + do { + TokenGroup group = roots.poll(); + if (!group.keep()) { + System.out.println("removed " + group); + } + tokenGroups.add(group); + for (TokenGroup successorGroup : Graphs.successorListOf(graph, group)) { + graph.removeEdge(group, successorGroup); + if (!Graphs.vertexHasPredecessors(graph, successorGroup)) { + newRoots.add(successorGroup); + } + } + } while (!roots.isEmpty()); + roots = newRoots; + } + assert tokenGroups.size() == graph.vertexSet().size(); + return tokenGroups; + } + + private void spreadKeep() { + Deque visit = new LinkedList<>(graph.vertexSet().stream().filter(TokenGroup::keep).toList()); + while (!visit.isEmpty()) { + TokenGroup current = visit.pop(); + for (TokenGroup pred : Graphs.predecessorListOf(graph, current)) { // performance? + if (graph.getEdge(pred, current).isData() && !pred.keep()) { + pred.markKeep(); + visit.add(pred); + } + } + // not great performance-wise but I doubt it matters at this stage... + // could instead insert data-through-loop edges the other way around, which arguably makes more sense semantically + // and turn them around here, but too much code for me to bother right now + for (TokenGroup succ : Graphs.successorListOf(graph, current)) { + if (graph.getEdge(current, succ).isDataThroughLoop() && !succ.keep()) { + succ.markKeep(); + visit.add(succ); + } + } + } + } +} diff --git a/core/src/main/java/de/jplag/normalization/GraphConstructor.java b/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java similarity index 75% rename from core/src/main/java/de/jplag/normalization/GraphConstructor.java rename to core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java index 91246eb34..2175cdc19 100644 --- a/core/src/main/java/de/jplag/normalization/GraphConstructor.java +++ b/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java @@ -10,20 +10,22 @@ import de.jplag.semantics.Variable; -public class GraphConstructor { +public class NormalizationGraphConstructor { private SimpleDirectedGraph graph; private int loopCount; private Collection controlAffected; private TokenGroup lastControl; + private TokenGroup lastCritical; private Map> variableReads; private Map> variableWrites; private TokenGroup current; - public GraphConstructor(List tokenGroups) { + public NormalizationGraphConstructor(List tokenGroups) { graph = new SimpleDirectedGraph<>(Dependency.class); loopCount = 0; controlAffected = new LinkedList<>(); lastControl = null; + lastCritical = null; variableReads = new HashMap<>(); variableWrites = new HashMap<>(); for (TokenGroup current : tokenGroups) { @@ -31,8 +33,11 @@ public GraphConstructor(List tokenGroups) { this.current = current; processLoops(); processControl(); + processCritical(); processReads(); processWrites(); + current.semantics().reads().forEach(r -> addVarToMap(r, variableReads)); + current.semantics().writes().forEach(w -> addVarToMap(w, variableWrites)); } } @@ -41,33 +46,39 @@ public SimpleDirectedGraph get() { } private void processLoops() { - if (current.semantics.loopBegin()) + if (current.semantics().loopBegin()) loopCount++; - if (current.semantics.loopEnd()) + if (current.semantics().loopEnd()) loopCount--; } private void processControl() { - if (current.semantics.control()) { + if (current.semantics().control()) { addCurrentEdges(controlAffected, DependencyType.CONTROL, null); controlAffected.clear(); lastControl = current; - } else { + } else if (lastControl != null) { addCurrentEdge(lastControl, DependencyType.CONTROL, null); } controlAffected.add(current); } + private void processCritical() { + if (current.semantics().critical() && lastCritical != null) { + addCurrentEdge(lastCritical, DependencyType.CRITICAL, null); + lastCritical = current; + } + } + private void processReads() { - for (Variable r : current.semantics.reads()) { + for (Variable r : current.semantics().reads()) { addCurrentEdgesVar(DependencyType.DATA, r, variableWrites); - addVarToMap(r, variableReads); } } private void processWrites() { - DependencyType writeToReadDependencyType = loopCount > 0 ? DependencyType.DATA : DependencyType.ORDER; - for (Variable w : current.semantics.writes()) { + DependencyType writeToReadDependencyType = loopCount > 0 ? DependencyType.DATA_THROUGH_LOOP : DependencyType.ORDER; + for (Variable w : current.semantics().writes()) { addCurrentEdgesVar(DependencyType.ORDER, w, variableWrites); addCurrentEdgesVar(writeToReadDependencyType, w, variableReads); addVarToMap(w, variableWrites); diff --git a/core/src/main/java/de/jplag/normalization/Normalizer.java b/core/src/main/java/de/jplag/normalization/Normalizer.java index 298a6fc5d..a7775c61e 100644 --- a/core/src/main/java/de/jplag/normalization/Normalizer.java +++ b/core/src/main/java/de/jplag/normalization/Normalizer.java @@ -1,12 +1,6 @@ package de.jplag.normalization; -import java.util.LinkedList; import java.util.List; -import java.util.PriorityQueue; -import java.util.stream.Collectors; - -import org.jgrapht.Graphs; -import org.jgrapht.graph.SimpleDirectedGraph; import de.jplag.Token; @@ -17,31 +11,8 @@ private Normalizer() { public static List normalize(List tokens) { List tokenGroups = TokenGroup.group(tokens); - SimpleDirectedGraph graph = new GraphConstructor(tokenGroups).get(); - tokenGroups = linearizeGraph(graph); + NormalizationGraph graph = new NormalizationGraph(tokenGroups); + tokenGroups = graph.linearize(); return TokenGroup.ungroup(tokenGroups); } - - private static List linearizeGraph(SimpleDirectedGraph graph) { - PriorityQueue roots = graph.vertexSet().stream() // - .filter(v -> !Graphs.vertexHasPredecessors(graph, v)) // - .collect(Collectors.toCollection(PriorityQueue::new)); - List tokenGroups = new LinkedList<>(); - while (!roots.isEmpty()) { - PriorityQueue newRoots = new PriorityQueue<>(); - do { - TokenGroup group = roots.poll(); - tokenGroups.add(group); - for (TokenGroup successorGroup : Graphs.successorListOf(graph, group)) { - graph.removeEdge(group, successorGroup); - if (!Graphs.vertexHasPredecessors(graph, successorGroup)) { - newRoots.add(successorGroup); - } - } - } while (!roots.isEmpty()); - roots = newRoots; - } - assert tokenGroups.size() == graph.vertexSet().size(); - return tokenGroups; - } } diff --git a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java index a8fc1bc84..74f8ee8fb 100644 --- a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java +++ b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java @@ -153,12 +153,12 @@ private boolean isVariable(ExpressionTree expressionTree) { || (expressionTree.getKind() == Tree.Kind.MEMBER_SELECT && isOwnMemberSelect((MemberSelectTree) expressionTree)); } - private boolean isNotExistingLocalVariable(ExpressionTree expressionTree) { + private boolean isNotExistingVariable(ExpressionTree expressionTree) { if (expressionTree.getKind() != Tree.Kind.IDENTIFIER) { return true; } - Name variableName = ((IdentifierTree) expressionTree).getName(); - return !localVariables.containsKey(variableName); + Name name = ((IdentifierTree) expressionTree).getName(); + return getVariable(name) == null; } private boolean isOwnMemberSelect(MemberSelectTree memberSelect) { @@ -289,6 +289,9 @@ public Void visitMethod(MethodTree node, TokenSemantics semantics) { addToken(JavaTokenType.J_METHOD_BEGIN, start, node.getName().length(), semantics); super.visitMethod(node, null); semantics = new TokenSemanticsBuilder().control().critical().build(); + for (Variable mv : memberVariables.values()) { + semantics.addRead(mv); + } addToken(JavaTokenType.J_METHOD_END, end, 1, semantics); return null; } @@ -550,7 +553,7 @@ private TokenSemantics conditionalCriticalSemantics(ExpressionTree expressionTre @Override public Void visitAssignment(AssignmentTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); - semantics = conditionalCriticalSemantics(node.getVariable(), this::isNotExistingLocalVariable); + semantics = conditionalCriticalSemantics(node.getVariable(), this::isNotExistingVariable); addToken(JavaTokenType.J_ASSIGN, start, 1, semantics); nextOperation = NextOperation.WRITE; super.visitAssignment(node, semantics); @@ -560,7 +563,7 @@ public Void visitAssignment(AssignmentTree node, TokenSemantics semantics) { @Override public Void visitCompoundAssignment(CompoundAssignmentTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); - semantics = conditionalCriticalSemantics(node.getVariable(), this::isNotExistingLocalVariable); + semantics = conditionalCriticalSemantics(node.getVariable(), this::isNotExistingVariable); addToken(JavaTokenType.J_ASSIGN, start, 1, semantics); nextOperation = NextOperation.READ_WRITE; super.visitCompoundAssignment(node, semantics); @@ -569,7 +572,7 @@ public Void visitCompoundAssignment(CompoundAssignmentTree node, TokenSemantics @Override public Void visitUnary(UnaryTree node, TokenSemantics semantics) { - semantics = conditionalCriticalSemantics(node.getExpression(), this::isNotExistingLocalVariable); + semantics = conditionalCriticalSemantics(node.getExpression(), this::isNotExistingVariable); if (Set.of(Tree.Kind.PREFIX_INCREMENT, Tree.Kind.POSTFIX_INCREMENT, Tree.Kind.PREFIX_DECREMENT, Tree.Kind.POSTFIX_DECREMENT) .contains(node.getKind())) { long start = positions.getStartPosition(ast, node); From 856d95e560ae713b0c300f77cc9472fb9c060b5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Sun, 25 Dec 2022 11:53:27 +0100 Subject: [PATCH 018/132] Change normalization interface --- core/src/main/java/de/jplag/Submission.java | 4 +- .../de/jplag/normalization/Dependency.java | 10 ++--- .../jplag/normalization/DependencyItem.java | 2 +- .../jplag/normalization/DependencyType.java | 2 +- .../normalization/NormalizationGraph.java | 15 ++++---- .../NormalizationGraphConstructor.java | 36 +++++++++++------- .../de/jplag/normalization/Normalizer.java | 18 --------- .../de/jplag/normalization/TokenGroup.java | 37 +++---------------- 8 files changed, 46 insertions(+), 78 deletions(-) delete mode 100644 core/src/main/java/de/jplag/normalization/Normalizer.java diff --git a/core/src/main/java/de/jplag/Submission.java b/core/src/main/java/de/jplag/Submission.java index 1910cef2a..7ca9cd484 100644 --- a/core/src/main/java/de/jplag/Submission.java +++ b/core/src/main/java/de/jplag/Submission.java @@ -13,7 +13,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import de.jplag.normalization.Normalizer; +import de.jplag.normalization.NormalizationGraph; /** * Represents a single submission. A submission can contain multiple files. @@ -269,6 +269,6 @@ private static File createErrorDirectory(String... subdirectoryNames) { } void normalize() { - tokenList = Normalizer.normalize(tokenList); + tokenList = new NormalizationGraph(tokenList).linearize(); } } diff --git a/core/src/main/java/de/jplag/normalization/Dependency.java b/core/src/main/java/de/jplag/normalization/Dependency.java index a869a0819..cfae3620b 100644 --- a/core/src/main/java/de/jplag/normalization/Dependency.java +++ b/core/src/main/java/de/jplag/normalization/Dependency.java @@ -6,25 +6,25 @@ import de.jplag.semantics.Variable; // not a record because JGraphT wants unique edges and we don't... -public class Dependency { +class Dependency { private Set items; private boolean isData; private boolean isDataThroughLoop; - public Dependency() { + Dependency() { items = new HashSet<>(); isData = false; } - public boolean isData() { + boolean isData() { return isData; } - public boolean isDataThroughLoop() { + boolean isDataThroughLoop() { return isDataThroughLoop; } - public void addItem(DependencyType type, Variable cause) { + void addItem(DependencyType type, Variable cause) { if (type == DependencyType.DATA) isData = true; if (type == DependencyType.DATA_THROUGH_LOOP) diff --git a/core/src/main/java/de/jplag/normalization/DependencyItem.java b/core/src/main/java/de/jplag/normalization/DependencyItem.java index aefca06ac..7219557ad 100644 --- a/core/src/main/java/de/jplag/normalization/DependencyItem.java +++ b/core/src/main/java/de/jplag/normalization/DependencyItem.java @@ -2,5 +2,5 @@ import de.jplag.semantics.Variable; -public record DependencyItem(DependencyType type, Variable cause) { +record DependencyItem(DependencyType type, Variable cause) { } diff --git a/core/src/main/java/de/jplag/normalization/DependencyType.java b/core/src/main/java/de/jplag/normalization/DependencyType.java index 227d1fd67..a0ccd17a2 100644 --- a/core/src/main/java/de/jplag/normalization/DependencyType.java +++ b/core/src/main/java/de/jplag/normalization/DependencyType.java @@ -1,6 +1,6 @@ package de.jplag.normalization; -public enum DependencyType { +enum DependencyType { DATA, DATA_THROUGH_LOOP, ORDER, diff --git a/core/src/main/java/de/jplag/normalization/NormalizationGraph.java b/core/src/main/java/de/jplag/normalization/NormalizationGraph.java index e7b4cbabd..1954944ca 100644 --- a/core/src/main/java/de/jplag/normalization/NormalizationGraph.java +++ b/core/src/main/java/de/jplag/normalization/NormalizationGraph.java @@ -9,19 +9,21 @@ import org.jgrapht.Graphs; import org.jgrapht.graph.SimpleDirectedGraph; +import de.jplag.Token; + public class NormalizationGraph { private SimpleDirectedGraph graph; - public NormalizationGraph(List tokenGroups) { - graph = new NormalizationGraphConstructor(tokenGroups).get(); + public NormalizationGraph(List tokens) { + graph = new NormalizationGraphConstructor(tokens).get(); } - public List linearize() { + public List linearize() { spreadKeep(); PriorityQueue roots = graph.vertexSet().stream() // .filter(v -> !Graphs.vertexHasPredecessors(graph, v)) // .collect(Collectors.toCollection(PriorityQueue::new)); - List tokenGroups = new LinkedList<>(); + List tokens = new LinkedList<>(); while (!roots.isEmpty()) { PriorityQueue newRoots = new PriorityQueue<>(); do { @@ -29,7 +31,7 @@ public List linearize() { if (!group.keep()) { System.out.println("removed " + group); } - tokenGroups.add(group); + tokens.addAll(group.tokens()); for (TokenGroup successorGroup : Graphs.successorListOf(graph, group)) { graph.removeEdge(group, successorGroup); if (!Graphs.vertexHasPredecessors(graph, successorGroup)) { @@ -39,8 +41,7 @@ public List linearize() { } while (!roots.isEmpty()); roots = newRoots; } - assert tokenGroups.size() == graph.vertexSet().size(); - return tokenGroups; + return tokens; } private void spreadKeep() { diff --git a/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java b/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java index 2175cdc19..6d65f5a33 100644 --- a/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java +++ b/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java @@ -8,9 +8,10 @@ import org.jgrapht.graph.SimpleDirectedGraph; +import de.jplag.Token; import de.jplag.semantics.Variable; -public class NormalizationGraphConstructor { +class NormalizationGraphConstructor { private SimpleDirectedGraph graph; private int loopCount; private Collection controlAffected; @@ -20,7 +21,7 @@ public class NormalizationGraphConstructor { private Map> variableWrites; private TokenGroup current; - public NormalizationGraphConstructor(List tokenGroups) { + NormalizationGraphConstructor(List tokens) { graph = new SimpleDirectedGraph<>(Dependency.class); loopCount = 0; controlAffected = new LinkedList<>(); @@ -28,20 +29,29 @@ public NormalizationGraphConstructor(List tokenGroups) { lastCritical = null; variableReads = new HashMap<>(); variableWrites = new HashMap<>(); - for (TokenGroup current : tokenGroups) { - graph.addVertex(current); - this.current = current; - processLoops(); - processControl(); - processCritical(); - processReads(); - processWrites(); - current.semantics().reads().forEach(r -> addVarToMap(r, variableReads)); - current.semantics().writes().forEach(w -> addVarToMap(w, variableWrites)); + List unitTokens = new LinkedList<>(); + int currentLine = tokens.get(0).getLine(); + for (Token token : tokens) { + if (token.getLine() != currentLine) { // if (tokenGroupEnd) + TokenGroup group = new TokenGroup(new LinkedList<>(unitTokens), currentLine); + graph.addVertex(group); + unitTokens.clear(); + currentLine = token.getLine(); + this.current = group; + + processLoops(); + processControl(); + processCritical(); + processReads(); + processWrites(); + current.semantics().reads().forEach(r -> addVarToMap(r, variableReads)); + current.semantics().writes().forEach(w -> addVarToMap(w, variableWrites)); + } + unitTokens.add(token); } } - public SimpleDirectedGraph get() { + SimpleDirectedGraph get() { return graph; } diff --git a/core/src/main/java/de/jplag/normalization/Normalizer.java b/core/src/main/java/de/jplag/normalization/Normalizer.java deleted file mode 100644 index a7775c61e..000000000 --- a/core/src/main/java/de/jplag/normalization/Normalizer.java +++ /dev/null @@ -1,18 +0,0 @@ -package de.jplag.normalization; - -import java.util.List; - -import de.jplag.Token; - -public class Normalizer { - - private Normalizer() { - } - - public static List normalize(List tokens) { - List tokenGroups = TokenGroup.group(tokens); - NormalizationGraph graph = new NormalizationGraph(tokenGroups); - tokenGroups = graph.linearize(); - return TokenGroup.ungroup(tokenGroups); - } -} diff --git a/core/src/main/java/de/jplag/normalization/TokenGroup.java b/core/src/main/java/de/jplag/normalization/TokenGroup.java index ab26e7773..7190a86a0 100644 --- a/core/src/main/java/de/jplag/normalization/TokenGroup.java +++ b/core/src/main/java/de/jplag/normalization/TokenGroup.java @@ -2,34 +2,29 @@ import java.util.Collections; import java.util.Iterator; -import java.util.LinkedList; import java.util.List; import de.jplag.Token; import de.jplag.semantics.TokenSemantics; -public class TokenGroup implements Comparable { +class TokenGroup implements Comparable { private List tokens; - private int line; + private int beginLine; private TokenSemantics semantics; private boolean keep; - public TokenGroup(List tokens, int line) { + TokenGroup(List tokens, int beginLine) { this.tokens = Collections.unmodifiableList(tokens); - this.line = line; + this.beginLine = beginLine; this.semantics = TokenSemantics.join(tokens.stream().map(Token::getSemantics).toList()); - this.keep = semantics.critical() || semantics.control(); + keep = semantics.critical() || semantics.control(); } public List tokens() { return tokens; } - public int line() { - return line; - } - public TokenSemantics semantics() { return semantics; } @@ -42,26 +37,6 @@ public void markKeep() { keep = true; } - public static List group(List tokens) { - List tokenGroups = new LinkedList<>(); - List groupTokens = new LinkedList<>(); - int currentLine = tokens.get(0).getLine(); - for (Token t : tokens) { - if (t.getLine() != currentLine) { - tokenGroups.add(new TokenGroup(new LinkedList<>(groupTokens), currentLine)); - groupTokens.clear(); - currentLine = t.getLine(); - } - groupTokens.add(t); - } - tokenGroups.add(new TokenGroup(new LinkedList<>(groupTokens), currentLine)); - return tokenGroups; - } - - public static List ungroup(List tokenGroups) { - return tokenGroups.stream().flatMap(tg -> tg.tokens.stream()).toList(); - } - private int tokenOrdinal(Token token) { return ((Enum) token.getType()).ordinal(); // reflects the order the enums were declared in } @@ -83,6 +58,6 @@ public int compareTo(TokenGroup other) { @Override public String toString() { - return line + ": " + String.join(" ", tokens.stream().map(Token::toString).toList()); + return beginLine + ": " + String.join(" ", tokens.stream().map(Token::toString).toList()); } } From dcf0c428169d0c5a85c2e95570ccda6dfa202acb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Mon, 26 Dec 2022 12:58:01 +0100 Subject: [PATCH 019/132] Fix scope bug --- .../normalization/NormalizationGraph.java | 26 +++++++------- .../NormalizationGraphConstructor.java | 36 +++++++++---------- .../{TokenGroup.java => TokenLine.java} | 12 +++---- .../java/TokenGeneratingTreeScanner.java | 33 +++++++++-------- 4 files changed, 56 insertions(+), 51 deletions(-) rename core/src/main/java/de/jplag/normalization/{TokenGroup.java => TokenLine.java} (82%) diff --git a/core/src/main/java/de/jplag/normalization/NormalizationGraph.java b/core/src/main/java/de/jplag/normalization/NormalizationGraph.java index 1954944ca..709ff6384 100644 --- a/core/src/main/java/de/jplag/normalization/NormalizationGraph.java +++ b/core/src/main/java/de/jplag/normalization/NormalizationGraph.java @@ -12,7 +12,7 @@ import de.jplag.Token; public class NormalizationGraph { - private SimpleDirectedGraph graph; + private SimpleDirectedGraph graph; public NormalizationGraph(List tokens) { graph = new NormalizationGraphConstructor(tokens).get(); @@ -20,20 +20,20 @@ public NormalizationGraph(List tokens) { public List linearize() { spreadKeep(); - PriorityQueue roots = graph.vertexSet().stream() // + PriorityQueue roots = graph.vertexSet().stream() // .filter(v -> !Graphs.vertexHasPredecessors(graph, v)) // .collect(Collectors.toCollection(PriorityQueue::new)); List tokens = new LinkedList<>(); while (!roots.isEmpty()) { - PriorityQueue newRoots = new PriorityQueue<>(); + PriorityQueue newRoots = new PriorityQueue<>(); do { - TokenGroup group = roots.poll(); - if (!group.keep()) { - System.out.println("removed " + group); + TokenLine tokenLine = roots.poll(); + if (!tokenLine.keep()) { + System.out.println("removed " + tokenLine); } - tokens.addAll(group.tokens()); - for (TokenGroup successorGroup : Graphs.successorListOf(graph, group)) { - graph.removeEdge(group, successorGroup); + tokens.addAll(tokenLine.tokens()); + for (TokenLine successorGroup : Graphs.successorListOf(graph, tokenLine)) { + graph.removeEdge(tokenLine, successorGroup); if (!Graphs.vertexHasPredecessors(graph, successorGroup)) { newRoots.add(successorGroup); } @@ -45,10 +45,10 @@ public List linearize() { } private void spreadKeep() { - Deque visit = new LinkedList<>(graph.vertexSet().stream().filter(TokenGroup::keep).toList()); + Deque visit = new LinkedList<>(graph.vertexSet().stream().filter(TokenLine::keep).toList()); while (!visit.isEmpty()) { - TokenGroup current = visit.pop(); - for (TokenGroup pred : Graphs.predecessorListOf(graph, current)) { // performance? + TokenLine current = visit.pop(); + for (TokenLine pred : Graphs.predecessorListOf(graph, current)) { // performance? if (graph.getEdge(pred, current).isData() && !pred.keep()) { pred.markKeep(); visit.add(pred); @@ -57,7 +57,7 @@ private void spreadKeep() { // not great performance-wise but I doubt it matters at this stage... // could instead insert data-through-loop edges the other way around, which arguably makes more sense semantically // and turn them around here, but too much code for me to bother right now - for (TokenGroup succ : Graphs.successorListOf(graph, current)) { + for (TokenLine succ : Graphs.successorListOf(graph, current)) { if (graph.getEdge(current, succ).isDataThroughLoop() && !succ.keep()) { succ.markKeep(); visit.add(succ); diff --git a/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java b/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java index 6d65f5a33..c3ac6777f 100644 --- a/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java +++ b/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java @@ -12,14 +12,14 @@ import de.jplag.semantics.Variable; class NormalizationGraphConstructor { - private SimpleDirectedGraph graph; + private SimpleDirectedGraph graph; private int loopCount; - private Collection controlAffected; - private TokenGroup lastControl; - private TokenGroup lastCritical; - private Map> variableReads; - private Map> variableWrites; - private TokenGroup current; + private Collection controlAffected; + private TokenLine lastControl; + private TokenLine lastCritical; + private Map> variableReads; + private Map> variableWrites; + private TokenLine current; NormalizationGraphConstructor(List tokens) { graph = new SimpleDirectedGraph<>(Dependency.class); @@ -30,14 +30,14 @@ class NormalizationGraphConstructor { variableReads = new HashMap<>(); variableWrites = new HashMap<>(); List unitTokens = new LinkedList<>(); - int currentLine = tokens.get(0).getLine(); + int lineNumber = tokens.get(0).getLine(); for (Token token : tokens) { - if (token.getLine() != currentLine) { // if (tokenGroupEnd) - TokenGroup group = new TokenGroup(new LinkedList<>(unitTokens), currentLine); - graph.addVertex(group); + if (token.getLine() != lineNumber) { // if (tokenGroupEnd) + TokenLine tokenLine = new TokenLine(new LinkedList<>(unitTokens), lineNumber); + graph.addVertex(tokenLine); unitTokens.clear(); - currentLine = token.getLine(); - this.current = group; + lineNumber = token.getLine(); + this.current = tokenLine; processLoops(); processControl(); @@ -51,7 +51,7 @@ class NormalizationGraphConstructor { } } - SimpleDirectedGraph get() { + SimpleDirectedGraph get() { return graph; } @@ -95,15 +95,15 @@ private void processWrites() { } } - private void addCurrentEdgesVar(DependencyType type, Variable var, Map> varMap) { + private void addCurrentEdgesVar(DependencyType type, Variable var, Map> varMap) { addCurrentEdges(varMap.getOrDefault(var, new LinkedList<>()), type, var); } - private void addCurrentEdges(Collection starts, DependencyType type, Variable cause) { + private void addCurrentEdges(Collection starts, DependencyType type, Variable cause) { starts.forEach(s -> addCurrentEdge(s, type, cause)); } - private void addCurrentEdge(TokenGroup start, DependencyType type, Variable cause) { + private void addCurrentEdge(TokenLine start, DependencyType type, Variable cause) { Dependency dependency = graph.getEdge(start, current); if (dependency == null) { dependency = new Dependency(); @@ -112,7 +112,7 @@ private void addCurrentEdge(TokenGroup start, DependencyType type, Variable caus dependency.addItem(type, cause); } - private void addVarToMap(Variable var, Map> varMap) { + private void addVarToMap(Variable var, Map> varMap) { varMap.putIfAbsent(var, new LinkedList<>()); varMap.get(var).add(current); } diff --git a/core/src/main/java/de/jplag/normalization/TokenGroup.java b/core/src/main/java/de/jplag/normalization/TokenLine.java similarity index 82% rename from core/src/main/java/de/jplag/normalization/TokenGroup.java rename to core/src/main/java/de/jplag/normalization/TokenLine.java index 7190a86a0..ea69cc9f8 100644 --- a/core/src/main/java/de/jplag/normalization/TokenGroup.java +++ b/core/src/main/java/de/jplag/normalization/TokenLine.java @@ -7,16 +7,16 @@ import de.jplag.Token; import de.jplag.semantics.TokenSemantics; -class TokenGroup implements Comparable { +class TokenLine implements Comparable { private List tokens; - private int beginLine; + private int lineNumber; private TokenSemantics semantics; private boolean keep; - TokenGroup(List tokens, int beginLine) { + TokenLine(List tokens, int lineNumber) { this.tokens = Collections.unmodifiableList(tokens); - this.beginLine = beginLine; + this.lineNumber = lineNumber; this.semantics = TokenSemantics.join(tokens.stream().map(Token::getSemantics).toList()); keep = semantics.critical() || semantics.control(); } @@ -42,7 +42,7 @@ private int tokenOrdinal(Token token) { } @Override - public int compareTo(TokenGroup other) { + public int compareTo(TokenLine other) { int sizeComp = Integer.compare(this.tokens.size(), other.tokens.size()); if (sizeComp != 0) return -sizeComp; // bigger size should come first @@ -58,6 +58,6 @@ public int compareTo(TokenGroup other) { @Override public String toString() { - return beginLine + ": " + String.join(" ", tokens.stream().map(Token::toString).toList()); + return lineNumber + ": " + String.join(" ", tokens.stream().map(Token::toString).toList()); } } diff --git a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java index 74f8ee8fb..3be439c16 100644 --- a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java +++ b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java @@ -197,15 +197,9 @@ public void exitLocalScope() { @Override public Void visitBlock(BlockTree node, TokenSemantics semantics) { - // classes are an obvious exception since members are treated differently - Set classKinds = Set.of(Tree.Kind.ENUM, Tree.Kind.INTERFACE, Tree.Kind.RECORD, Tree.Kind.ANNOTATION_TYPE, Tree.Kind.CLASS); - boolean isClass = classKinds.contains(node.getKind()); - // for loops are also an exception since a scope can be induced without a block visit (without brackets) - boolean isForLoop = Set.of(Tree.Kind.FOR_LOOP, Tree.Kind.ENHANCED_FOR_LOOP).contains(node.getKind()); - // methods and catches are also an exception since variables can be declared before the block begins - if (!(isClass || isForLoop || Set.of(Tree.Kind.METHOD, Tree.Kind.CATCH).contains(node.getKind()))) { - enterLocalScope(); - } + // kind of weird since in the case of for loops and catches, two scopes are introduced + // but I'm pretty sure that's how Java does it internally as well + enterLocalScope(); long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; semantics = new TokenSemanticsBuilder().control().build(); @@ -213,9 +207,7 @@ public Void visitBlock(BlockTree node, TokenSemantics semantics) { super.visitBlock(node, null); semantics = new TokenSemanticsBuilder().control().build(); addToken(JavaTokenType.J_INIT_END, end, 1, semantics); - if (!(isClass || isForLoop)) { - exitLocalScope(); - } + exitLocalScope(); return null; } @@ -244,7 +236,12 @@ public Void visitClass(ClassTree node, TokenSemantics semantics) { } else if (node.getKind() == Tree.Kind.CLASS) { addToken(JavaTokenType.J_CLASS_BEGIN, start, 5, semantics); } - super.visitClass(node, null); + scan(node.getModifiers(), semantics); + scan(node.getTypeParameters(), semantics); + scan(node.getExtendsClause(), semantics); + scan(node.getImplementsClause(), semantics); + scan(node.getPermitsClause(), semantics); + scan(node.getMembers(), null); JavaTokenType tokenType = switch (node.getKind()) { case ENUM -> JavaTokenType.J_ENUM_END; @@ -287,12 +284,19 @@ public Void visitMethod(MethodTree node, TokenSemantics semantics) { long end = positions.getEndPosition(ast, node) - 1; semantics = new TokenSemanticsBuilder().control().critical().build(); addToken(JavaTokenType.J_METHOD_BEGIN, start, node.getName().length(), semantics); - super.visitMethod(node, null); + scan(node.getModifiers(), semantics); + scan(node.getReturnType(), semantics); + scan(node.getTypeParameters(), semantics); + scan(node.getParameters(), semantics); + scan(node.getReceiverParameter(), semantics); + scan(node.getThrows(), semantics); + scan(node.getBody(), null); semantics = new TokenSemanticsBuilder().control().critical().build(); for (Variable mv : memberVariables.values()) { semantics.addRead(mv); } addToken(JavaTokenType.J_METHOD_END, end, 1, semantics); + exitLocalScope(); return null; } @@ -438,6 +442,7 @@ public Void visitCatch(CatchTree node, TokenSemantics semantics) { super.visitCatch(node, null); // can leave this since catch parameter is variable declaration and thus always generates a token semantics = new TokenSemanticsBuilder().control().build(); addToken(JavaTokenType.J_CATCH_END, end, 1, semantics); + exitLocalScope(); return null; } From 3df89fc69c9925fef3f2f2d1a2ba44be2b4572d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Mon, 26 Dec 2022 13:28:53 +0100 Subject: [PATCH 020/132] Add block attributes --- .../de/jplag/semantics/TokenSemantics.java | 29 ++++++++++++------- .../semantics/TokenSemanticsBuilder.java | 20 ++++++++----- 2 files changed, 31 insertions(+), 18 deletions(-) diff --git a/language-api/src/main/java/de/jplag/semantics/TokenSemantics.java b/language-api/src/main/java/de/jplag/semantics/TokenSemantics.java index 4e3d7fc5a..5a8a1b578 100644 --- a/language-api/src/main/java/de/jplag/semantics/TokenSemantics.java +++ b/language-api/src/main/java/de/jplag/semantics/TokenSemantics.java @@ -10,12 +10,15 @@ * This record contains semantic information about the token. * @param critical Whether the token is critical, e.g. whether it (potentially) has any non-local effects. * @param control Whether the token controls the program flow. - * @param loopBegin Whether the token marks the beginning of a loop. - * @param loopEnd Whether the token marks the end of a loop + * @param blockBegin Whether the token marks the beginning of a block. + * @param blockEnd Whether the token marks the end of a block. + * @param blockIsLoop Whether the block is a loop (ignored if blockBegin is false). * @param reads A set of the variables which were (potentially) read from in this token. * @param writes A set of the variables which were (potentially) written to in this token. */ -public record TokenSemantics(boolean critical, boolean control, boolean loopBegin, boolean loopEnd, Set reads, Set writes) { +public record TokenSemantics(boolean critical, boolean control, + boolean blockBegin, boolean blockEnd, boolean blockIsLoop, + Set reads, Set writes) { public void addRead(Variable read) { reads.add(read); @@ -48,10 +51,12 @@ public static TokenSemantics join(List semanticsList) { semanticsBuilder.critical(); if (semantics.control) semanticsBuilder.control(); - if (semantics.loopBegin) - semanticsBuilder.loopBegin(); - if (semantics.loopEnd) - semanticsBuilder.loopEnd(); + if (semantics.blockBegin) + semanticsBuilder.blockBegin(); + if (semantics.blockEnd) + semanticsBuilder.blockEnd(); + if (semantics.blockIsLoop) + semanticsBuilder.blockIsLoop(); reads.addAll(semantics.reads); writes.addAll(semantics.writes); } @@ -70,10 +75,12 @@ public String toString() { properties.add("critical"); if (control) properties.add("control"); - if (loopBegin) - properties.add("loop begin"); - if (loopEnd) - properties.add("loop end"); + if (blockBegin) + properties.add("block begin"); + if (blockBegin) + properties.add("block end"); + if (blockIsLoop) + properties.add("block is loop"); if (!reads.isEmpty()) properties.add("read " + String.join(" ", reads.stream().map(Variable::toString).toList())); if (!writes.isEmpty()) diff --git a/language-api/src/main/java/de/jplag/semantics/TokenSemanticsBuilder.java b/language-api/src/main/java/de/jplag/semantics/TokenSemanticsBuilder.java index b53f7bbe0..5ae1d2f1e 100644 --- a/language-api/src/main/java/de/jplag/semantics/TokenSemanticsBuilder.java +++ b/language-api/src/main/java/de/jplag/semantics/TokenSemanticsBuilder.java @@ -9,8 +9,9 @@ public class TokenSemanticsBuilder { private boolean critical; private boolean control; - private boolean loopBegin; - private boolean loopEnd; + private boolean blockBegin; + private boolean blockEnd; + private boolean blockIsLoop; private Set reads; private Set writes; @@ -20,7 +21,7 @@ public TokenSemanticsBuilder() { } public TokenSemantics build() { - return new TokenSemantics(critical, control, loopBegin, loopEnd, reads, writes); + return new TokenSemantics(critical, control, blockBegin, blockEnd, blockIsLoop, reads, writes); } public TokenSemanticsBuilder critical() { @@ -33,13 +34,18 @@ public TokenSemanticsBuilder control() { return this; } - public TokenSemanticsBuilder loopBegin() { - this.loopBegin = true; + public TokenSemanticsBuilder blockBegin() { + this.blockBegin = true; return this; } - public TokenSemanticsBuilder loopEnd() { - this.loopEnd = true; + public TokenSemanticsBuilder blockEnd() { + this.blockEnd = true; + return this; + } + + public TokenSemanticsBuilder blockIsLoop() { + this.blockIsLoop = true; return this; } } From 51e379bfc0b3ff16705cce7fdd9775d39c4b9f38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Mon, 26 Dec 2022 13:29:00 +0100 Subject: [PATCH 021/132] Revert "Add block attributes" This reverts commit 3df89fc69c9925fef3f2f2d1a2ba44be2b4572d6. --- .../de/jplag/semantics/TokenSemantics.java | 29 +++++++------------ .../semantics/TokenSemanticsBuilder.java | 20 +++++-------- 2 files changed, 18 insertions(+), 31 deletions(-) diff --git a/language-api/src/main/java/de/jplag/semantics/TokenSemantics.java b/language-api/src/main/java/de/jplag/semantics/TokenSemantics.java index 5a8a1b578..4e3d7fc5a 100644 --- a/language-api/src/main/java/de/jplag/semantics/TokenSemantics.java +++ b/language-api/src/main/java/de/jplag/semantics/TokenSemantics.java @@ -10,15 +10,12 @@ * This record contains semantic information about the token. * @param critical Whether the token is critical, e.g. whether it (potentially) has any non-local effects. * @param control Whether the token controls the program flow. - * @param blockBegin Whether the token marks the beginning of a block. - * @param blockEnd Whether the token marks the end of a block. - * @param blockIsLoop Whether the block is a loop (ignored if blockBegin is false). + * @param loopBegin Whether the token marks the beginning of a loop. + * @param loopEnd Whether the token marks the end of a loop * @param reads A set of the variables which were (potentially) read from in this token. * @param writes A set of the variables which were (potentially) written to in this token. */ -public record TokenSemantics(boolean critical, boolean control, - boolean blockBegin, boolean blockEnd, boolean blockIsLoop, - Set reads, Set writes) { +public record TokenSemantics(boolean critical, boolean control, boolean loopBegin, boolean loopEnd, Set reads, Set writes) { public void addRead(Variable read) { reads.add(read); @@ -51,12 +48,10 @@ public static TokenSemantics join(List semanticsList) { semanticsBuilder.critical(); if (semantics.control) semanticsBuilder.control(); - if (semantics.blockBegin) - semanticsBuilder.blockBegin(); - if (semantics.blockEnd) - semanticsBuilder.blockEnd(); - if (semantics.blockIsLoop) - semanticsBuilder.blockIsLoop(); + if (semantics.loopBegin) + semanticsBuilder.loopBegin(); + if (semantics.loopEnd) + semanticsBuilder.loopEnd(); reads.addAll(semantics.reads); writes.addAll(semantics.writes); } @@ -75,12 +70,10 @@ public String toString() { properties.add("critical"); if (control) properties.add("control"); - if (blockBegin) - properties.add("block begin"); - if (blockBegin) - properties.add("block end"); - if (blockIsLoop) - properties.add("block is loop"); + if (loopBegin) + properties.add("loop begin"); + if (loopEnd) + properties.add("loop end"); if (!reads.isEmpty()) properties.add("read " + String.join(" ", reads.stream().map(Variable::toString).toList())); if (!writes.isEmpty()) diff --git a/language-api/src/main/java/de/jplag/semantics/TokenSemanticsBuilder.java b/language-api/src/main/java/de/jplag/semantics/TokenSemanticsBuilder.java index 5ae1d2f1e..b53f7bbe0 100644 --- a/language-api/src/main/java/de/jplag/semantics/TokenSemanticsBuilder.java +++ b/language-api/src/main/java/de/jplag/semantics/TokenSemanticsBuilder.java @@ -9,9 +9,8 @@ public class TokenSemanticsBuilder { private boolean critical; private boolean control; - private boolean blockBegin; - private boolean blockEnd; - private boolean blockIsLoop; + private boolean loopBegin; + private boolean loopEnd; private Set reads; private Set writes; @@ -21,7 +20,7 @@ public TokenSemanticsBuilder() { } public TokenSemantics build() { - return new TokenSemantics(critical, control, blockBegin, blockEnd, blockIsLoop, reads, writes); + return new TokenSemantics(critical, control, loopBegin, loopEnd, reads, writes); } public TokenSemanticsBuilder critical() { @@ -34,18 +33,13 @@ public TokenSemanticsBuilder control() { return this; } - public TokenSemanticsBuilder blockBegin() { - this.blockBegin = true; + public TokenSemanticsBuilder loopBegin() { + this.loopBegin = true; return this; } - public TokenSemanticsBuilder blockEnd() { - this.blockEnd = true; - return this; - } - - public TokenSemanticsBuilder blockIsLoop() { - this.blockIsLoop = true; + public TokenSemanticsBuilder loopEnd() { + this.loopEnd = true; return this; } } From 10a260287a4c1240b43891edc2219eabe44b3cea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Thu, 26 Jan 2023 22:00:21 +0100 Subject: [PATCH 022/132] Fix bug in graph construction --- .../normalization/NormalizationGraph.java | 5 +-- .../NormalizationGraphConstructor.java | 37 ++++++++++--------- .../de/jplag/normalization/TokenLine.java | 6 +-- .../jplag/normalization/TokenLineBuilder.java | 29 +++++++++++++++ 4 files changed, 54 insertions(+), 23 deletions(-) create mode 100644 core/src/main/java/de/jplag/normalization/TokenLineBuilder.java diff --git a/core/src/main/java/de/jplag/normalization/NormalizationGraph.java b/core/src/main/java/de/jplag/normalization/NormalizationGraph.java index 709ff6384..c7bbdb2a9 100644 --- a/core/src/main/java/de/jplag/normalization/NormalizationGraph.java +++ b/core/src/main/java/de/jplag/normalization/NormalizationGraph.java @@ -28,10 +28,9 @@ public List linearize() { PriorityQueue newRoots = new PriorityQueue<>(); do { TokenLine tokenLine = roots.poll(); - if (!tokenLine.keep()) { - System.out.println("removed " + tokenLine); + if (tokenLine.keep()) { + tokens.addAll(tokenLine.tokens()); } - tokens.addAll(tokenLine.tokens()); for (TokenLine successorGroup : Graphs.successorListOf(graph, tokenLine)) { graph.removeEdge(tokenLine, successorGroup); if (!Graphs.vertexHasPredecessors(graph, successorGroup)) { diff --git a/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java b/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java index c3ac6777f..4122c146c 100644 --- a/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java +++ b/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java @@ -29,26 +29,29 @@ class NormalizationGraphConstructor { lastCritical = null; variableReads = new HashMap<>(); variableWrites = new HashMap<>(); - List unitTokens = new LinkedList<>(); - int lineNumber = tokens.get(0).getLine(); + TokenLineBuilder currentLine = new TokenLineBuilder(tokens.get(0).getLine()); for (Token token : tokens) { - if (token.getLine() != lineNumber) { // if (tokenGroupEnd) - TokenLine tokenLine = new TokenLine(new LinkedList<>(unitTokens), lineNumber); - graph.addVertex(tokenLine); - unitTokens.clear(); - lineNumber = token.getLine(); - this.current = tokenLine; - - processLoops(); - processControl(); - processCritical(); - processReads(); - processWrites(); - current.semantics().reads().forEach(r -> addVarToMap(r, variableReads)); - current.semantics().writes().forEach(w -> addVarToMap(w, variableWrites)); + if (token.getLine() != currentLine.lineNumber()) { + addTokenLine(currentLine.build()); + currentLine = new TokenLineBuilder(token.getLine()); } - unitTokens.add(token); + currentLine.addToken(token); } + addTokenLine(currentLine.build()); + } + + private void addTokenLine(TokenLine tokenLine) { + graph.addVertex(tokenLine); + this.current = tokenLine; + + processLoops(); + processControl(); + processCritical(); + processReads(); + processWrites(); + current.semantics().reads().forEach(r -> addVarToMap(r, variableReads)); + current.semantics().writes().forEach(w -> addVarToMap(w, variableWrites)); + } SimpleDirectedGraph get() { diff --git a/core/src/main/java/de/jplag/normalization/TokenLine.java b/core/src/main/java/de/jplag/normalization/TokenLine.java index ea69cc9f8..110500900 100644 --- a/core/src/main/java/de/jplag/normalization/TokenLine.java +++ b/core/src/main/java/de/jplag/normalization/TokenLine.java @@ -9,9 +9,9 @@ class TokenLine implements Comparable { - private List tokens; - private int lineNumber; - private TokenSemantics semantics; + private final List tokens; + private final int lineNumber; + private final TokenSemantics semantics; private boolean keep; TokenLine(List tokens, int lineNumber) { diff --git a/core/src/main/java/de/jplag/normalization/TokenLineBuilder.java b/core/src/main/java/de/jplag/normalization/TokenLineBuilder.java new file mode 100644 index 000000000..d8510869f --- /dev/null +++ b/core/src/main/java/de/jplag/normalization/TokenLineBuilder.java @@ -0,0 +1,29 @@ +package de.jplag.normalization; + +import java.util.LinkedList; +import java.util.List; + +import de.jplag.Token; + +class TokenLineBuilder { + + private List tokens; + private final int lineNumber; + + TokenLineBuilder(int lineNumber) { + this.lineNumber = lineNumber; + this.tokens = new LinkedList<>(); + } + + int lineNumber() { + return lineNumber; + } + + void addToken(Token tok) { + tokens.add(tok); + } + + TokenLine build() { + return new TokenLine(tokens, lineNumber); + } +} From c64797041ac27c51785361bf64dc6dd518824ae4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Thu, 26 Jan 2023 22:02:34 +0100 Subject: [PATCH 023/132] Improve prints --- core/src/main/java/de/jplag/Submission.java | 17 +++++++++++++++++ .../src/main/java/de/jplag/java/Parser.java | 2 ++ 2 files changed, 19 insertions(+) diff --git a/core/src/main/java/de/jplag/Submission.java b/core/src/main/java/de/jplag/Submission.java index 7ca9cd484..9deddd5ac 100644 --- a/core/src/main/java/de/jplag/Submission.java +++ b/core/src/main/java/de/jplag/Submission.java @@ -7,6 +7,7 @@ import java.util.Collection; import java.util.Collections; import java.util.HashSet; +import java.util.LinkedList; import java.util.List; import java.util.Objects; @@ -269,6 +270,22 @@ private static File createErrorDirectory(String... subdirectoryNames) { } void normalize() { + // System.out.println(); + // System.out.println("original: " + getOrder(tokenList)); tokenList = new NormalizationGraph(tokenList).linearize(); + // System.out.println("normalized: " + getOrder(tokenList)); + } + + private List getOrder(List tokenList) { + List order = new LinkedList<>(); + int currentLineNumber = tokenList.get(0).getLine(); + order.add(currentLineNumber); + for (Token token : tokenList) { + if (token.getLine() != currentLineNumber) { + currentLineNumber = token.getLine(); + order.add(currentLineNumber); + } + } + return order; } } diff --git a/languages/java/src/main/java/de/jplag/java/Parser.java b/languages/java/src/main/java/de/jplag/java/Parser.java index eecc3ae05..ddf74453f 100644 --- a/languages/java/src/main/java/de/jplag/java/Parser.java +++ b/languages/java/src/main/java/de/jplag/java/Parser.java @@ -42,5 +42,7 @@ public void printSemantics() { System.out.print(" | "); System.out.println(t.getSemantics()); } + System.out.println(); + System.out.println("=".repeat(100)); } } From 2090725e900493d82ed01e73097c1677787e44d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Thu, 5 Jan 2023 17:13:02 +0100 Subject: [PATCH 024/132] Add and use VariableHelper --- .../java/de/jplag/semantics/Variable.java | 6 +- .../de/jplag/semantics/VariableHelper.java | 115 ++++++++++++++ .../java/TokenGeneratingTreeScanner.java | 141 +++++------------- 3 files changed, 157 insertions(+), 105 deletions(-) create mode 100644 language-api/src/main/java/de/jplag/semantics/VariableHelper.java diff --git a/language-api/src/main/java/de/jplag/semantics/Variable.java b/language-api/src/main/java/de/jplag/semantics/Variable.java index 11b196896..17afe73c4 100644 --- a/language-api/src/main/java/de/jplag/semantics/Variable.java +++ b/language-api/src/main/java/de/jplag/semantics/Variable.java @@ -1,10 +1,8 @@ package de.jplag.semantics; -import javax.lang.model.element.Name; +public record Variable(String name, VariableId id) { -public record Variable(Name name, VariableId id) { - - public Variable(Name name) { + public Variable(String name) { this(name, new VariableId()); } diff --git a/language-api/src/main/java/de/jplag/semantics/VariableHelper.java b/language-api/src/main/java/de/jplag/semantics/VariableHelper.java new file mode 100644 index 000000000..cb66fe105 --- /dev/null +++ b/language-api/src/main/java/de/jplag/semantics/VariableHelper.java @@ -0,0 +1,115 @@ +package de.jplag.semantics; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.Stack; + +/** + * Variable helper class to assist generating token semantics. For languages similar in structure to Java/C/etc + */ +public class VariableHelper { + private Map memberVariables; // map member variable name to variable + private Map> localVariables; // map local variable name to variable + private Stack> localVariablesByScope; // stack of local variable names in scope + private Map isMutable; // map variable to whether it is mutable + private boolean mutableWrite; + private NextOperation nextOperation; + + public enum NextOperation { + NONE, + READ, + WRITE, + READ_WRITE + } + + public VariableHelper() { + this.memberVariables = new HashMap<>(); + this.localVariables = new HashMap<>(); + this.localVariablesByScope = new Stack<>(); + this.isMutable = new HashMap<>(); + this.mutableWrite = false; + this.nextOperation = NextOperation.READ; // the default + } + + public void mutableWrite() { + mutableWrite = true; + } + + public void noMutableWrite() { + mutableWrite = false; + } + + public void setNextOperation(NextOperation nextOperation) { + this.nextOperation = nextOperation; + } + + public boolean inLocalScope() { + return !localVariablesByScope.isEmpty(); + } + + public Variable getMemberVariable(String variableName) { + return memberVariables.getOrDefault(variableName, null); + } + + public Variable getVariable(String variableName) { + Stack variableIdStack = localVariables.getOrDefault(variableName, null); + if (variableIdStack != null) { + return variableIdStack.peek(); + } + return getMemberVariable(variableName); + } + + public Variable registerMemberVariable(String name, boolean mutable) { + Variable variable = new Variable(name); + memberVariables.put(variable.name(), variable); + this.isMutable.put(variable, mutable); + return variable; + } + + public Variable registerLocalVariable(String name, boolean mutable) { + Variable variable = new Variable(name); + localVariables.putIfAbsent(variable.name(), new Stack<>()); + localVariables.get(variable.name()).push(variable); + localVariablesByScope.peek().add(variable.name()); + this.isMutable.put(variable, mutable); + return variable; + } + + public void addAllMemberVariablesAsReads(TokenSemantics semantics) { + for (Variable mv : memberVariables.values()) { + semantics.addRead(mv); + } + } + + public void clearMemberVariables() { + memberVariables.clear(); + } + + public void registerVariableOperation(Variable variable, TokenSemantics semantics) { + if (variable != null) { + if (Set.of(NextOperation.READ, NextOperation.READ_WRITE).contains(nextOperation)) { + semantics.addRead(variable); + } + if (Set.of(NextOperation.WRITE, NextOperation.READ_WRITE).contains(nextOperation) || (mutableWrite && isMutable.get(variable))) { + semantics.addWrite(variable); + } + } + nextOperation = NextOperation.READ; + } + + public void enterLocalScope() { + localVariablesByScope.add(new HashSet<>()); + } + + public void exitLocalScope() { + for (String variableName : localVariablesByScope.pop()) { + Stack variableStack = localVariables.get(variableName); + variableStack.pop(); + if (variableStack.isEmpty()) { + localVariables.remove(variableName); + } + } + } +} diff --git a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java index 3be439c16..918c83608 100644 --- a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java +++ b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java @@ -2,12 +2,8 @@ import java.io.File; import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; import java.util.List; -import java.util.Map; import java.util.Set; -import java.util.Stack; import java.util.function.Function; import javax.lang.model.element.Name; @@ -18,6 +14,7 @@ import de.jplag.semantics.TokenSemantics; import de.jplag.semantics.TokenSemanticsBuilder; import de.jplag.semantics.Variable; +import de.jplag.semantics.VariableHelper; import com.sun.source.tree.AnnotationTree; import com.sun.source.tree.AssertTree; @@ -75,37 +72,20 @@ final class TokenGeneratingTreeScanner extends TreeScanner private List parsingExceptions = new ArrayList<>(); - private Map memberVariables; // map member variable name to variable - private Map> localVariables; // map local variable name to variable - private Stack> localVariablesByScope; // stack of local variable names in scope - private Map isMutable; // map variable to whether it is mutable - private boolean mutableWrite; - private NextOperation nextOperation; + private VariableHelper variableHelper; private static final Set IMMUTABLES = Set.of( // from https://medium.com/@bpnorlander/java-understanding-primitive-types-and-wrapper-objects-a6798fb2afe9 "byte", "short", "int", "long", "float", "double", "boolean", "char", // "Byte", "Short", "Integer", "Long", "Float", "Double", "Boolean", "Character", "String"); - enum NextOperation { - NONE, - READ, - WRITE, - READ_WRITE - } - public TokenGeneratingTreeScanner(File file, Parser parser, LineMap map, SourcePositions positions, CompilationUnitTree ast) { this.file = file; this.parser = parser; this.map = map; this.positions = positions; this.ast = ast; - this.memberVariables = new HashMap<>(); - this.localVariables = new HashMap<>(); - this.localVariablesByScope = new Stack<>(); - this.isMutable = new HashMap<>(); - this.mutableWrite = false; - this.nextOperation = NextOperation.READ; // the default + this.variableHelper = new VariableHelper(); } public List getParsingExceptions() { @@ -136,18 +116,6 @@ private void addToken(JavaTokenType tokenType, long start, long end, TokenSemant addToken(tokenType, file, map.getLineNumber(start), map.getColumnNumber(start), (end - start), semantics); } - private Variable getMemberVariable(Name variableName) { - return memberVariables.getOrDefault(variableName, null); - } - - private Variable getVariable(Name variableName) { - Stack variableIdStack = localVariables.getOrDefault(variableName, null); - if (variableIdStack != null) { - return variableIdStack.peek(); - } - return getMemberVariable(variableName); - } - private boolean isVariable(ExpressionTree expressionTree) { return expressionTree.getKind() == Tree.Kind.IDENTIFIER || (expressionTree.getKind() == Tree.Kind.MEMBER_SELECT && isOwnMemberSelect((MemberSelectTree) expressionTree)); @@ -158,7 +126,7 @@ private boolean isNotExistingVariable(ExpressionTree expressionTree) { return true; } Name name = ((IdentifierTree) expressionTree).getName(); - return getVariable(name) == null; + return variableHelper.getVariable(name.toString()) == null; } private boolean isOwnMemberSelect(MemberSelectTree memberSelect) { @@ -169,37 +137,11 @@ private boolean isMutable(Tree classTree) { return classTree != null && !IMMUTABLES.contains(classTree.toString()); } - private void registerVariable(Variable variable, TokenSemantics semantics) { - if (variable != null) { - if (Set.of(NextOperation.READ, NextOperation.READ_WRITE).contains(nextOperation)) { - semantics.addRead(variable); - } - if (Set.of(NextOperation.WRITE, NextOperation.READ_WRITE).contains(nextOperation) || (mutableWrite && isMutable.get(variable))) { - semantics.addWrite(variable); - } - } - nextOperation = NextOperation.READ; - } - - public void enterLocalScope() { - localVariablesByScope.add(new HashSet<>()); - } - - public void exitLocalScope() { - for (Name variableName : localVariablesByScope.pop()) { - Stack variableStack = localVariables.get(variableName); - variableStack.pop(); - if (variableStack.isEmpty()) { - localVariables.remove(variableName); - } - } - } - @Override public Void visitBlock(BlockTree node, TokenSemantics semantics) { // kind of weird since in the case of for loops and catches, two scopes are introduced // but I'm pretty sure that's how Java does it internally as well - enterLocalScope(); + variableHelper.enterLocalScope(); long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; semantics = new TokenSemanticsBuilder().control().build(); @@ -207,7 +149,7 @@ public Void visitBlock(BlockTree node, TokenSemantics semantics) { super.visitBlock(node, null); semantics = new TokenSemanticsBuilder().control().build(); addToken(JavaTokenType.J_INIT_END, end, 1, semantics); - exitLocalScope(); + variableHelper.exitLocalScope(); return null; } @@ -216,9 +158,9 @@ public Void visitClass(ClassTree node, TokenSemantics semantics) { for (var member : node.getMembers()) { if (member.getKind() == Tree.Kind.VARIABLE) { VariableTree variableTree = (VariableTree) member; - Variable variable = new Variable(variableTree.getName()); - memberVariables.put(variable.name(), variable); - isMutable.put(variable, isMutable(variableTree.getType())); + String name = variableTree.getName().toString(); + boolean mutable = isMutable(variableTree.getType()); + variableHelper.registerMemberVariable(name, mutable); } } @@ -255,7 +197,7 @@ public Void visitClass(ClassTree node, TokenSemantics semantics) { semantics = new TokenSemanticsBuilder().control().critical().build(); addToken(tokenType, end, 1, semantics); } - memberVariables.clear(); + variableHelper.clearMemberVariables(); return null; } @@ -279,7 +221,7 @@ public Void visitPackage(PackageTree node, TokenSemantics semantics) { @Override public Void visitMethod(MethodTree node, TokenSemantics semantics) { - enterLocalScope(); + variableHelper.enterLocalScope(); long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; semantics = new TokenSemanticsBuilder().control().critical().build(); @@ -292,11 +234,9 @@ public Void visitMethod(MethodTree node, TokenSemantics semantics) { scan(node.getThrows(), semantics); scan(node.getBody(), null); semantics = new TokenSemanticsBuilder().control().critical().build(); - for (Variable mv : memberVariables.values()) { - semantics.addRead(mv); - } + variableHelper.addAllMemberVariablesAsReads(semantics); addToken(JavaTokenType.J_METHOD_END, end, 1, semantics); - exitLocalScope(); + variableHelper.exitLocalScope(); return null; } @@ -340,7 +280,7 @@ public Void visitWhileLoop(WhileLoopTree node, TokenSemantics semantics) { @Override public Void visitForLoop(ForLoopTree node, TokenSemantics semantics) { - enterLocalScope(); + variableHelper.enterLocalScope(); long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; semantics = new TokenSemanticsBuilder().control().loopBegin().build(); @@ -351,13 +291,13 @@ public Void visitForLoop(ForLoopTree node, TokenSemantics semantics) { scan(node.getStatement(), null); semantics = new TokenSemanticsBuilder().control().loopEnd().build(); addToken(JavaTokenType.J_FOR_END, end, 1, semantics); - exitLocalScope(); + variableHelper.exitLocalScope(); return null; } @Override public Void visitEnhancedForLoop(EnhancedForLoopTree node, TokenSemantics semantics) { - enterLocalScope(); + variableHelper.enterLocalScope(); long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; semantics = new TokenSemanticsBuilder().control().loopBegin().build(); @@ -367,7 +307,7 @@ public Void visitEnhancedForLoop(EnhancedForLoopTree node, TokenSemantics semant scan(node.getStatement(), null); semantics = new TokenSemanticsBuilder().control().loopEnd().build(); addToken(JavaTokenType.J_FOR_END, end, 1, semantics); - exitLocalScope(); + variableHelper.exitLocalScope(); return null; } @@ -434,7 +374,7 @@ public Void visitTry(TryTree node, TokenSemantics semantics) { @Override public Void visitCatch(CatchTree node, TokenSemantics semantics) { - enterLocalScope(); + variableHelper.enterLocalScope(); long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; semantics = new TokenSemanticsBuilder().control().build(); @@ -442,7 +382,7 @@ public Void visitCatch(CatchTree node, TokenSemantics semantics) { super.visitCatch(node, null); // can leave this since catch parameter is variable declaration and thus always generates a token semantics = new TokenSemanticsBuilder().control().build(); addToken(JavaTokenType.J_CATCH_END, end, 1, semantics); - exitLocalScope(); + variableHelper.exitLocalScope(); return null; } @@ -560,7 +500,7 @@ public Void visitAssignment(AssignmentTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); semantics = conditionalCriticalSemantics(node.getVariable(), this::isNotExistingVariable); addToken(JavaTokenType.J_ASSIGN, start, 1, semantics); - nextOperation = NextOperation.WRITE; + variableHelper.setNextOperation(VariableHelper.NextOperation.WRITE); super.visitAssignment(node, semantics); return null; } @@ -570,7 +510,7 @@ public Void visitCompoundAssignment(CompoundAssignmentTree node, TokenSemantics long start = positions.getStartPosition(ast, node); semantics = conditionalCriticalSemantics(node.getVariable(), this::isNotExistingVariable); addToken(JavaTokenType.J_ASSIGN, start, 1, semantics); - nextOperation = NextOperation.READ_WRITE; + variableHelper.setNextOperation(VariableHelper.NextOperation.READ_WRITE); super.visitCompoundAssignment(node, semantics); return null; } @@ -582,7 +522,7 @@ public Void visitUnary(UnaryTree node, TokenSemantics semantics) { .contains(node.getKind())) { long start = positions.getStartPosition(ast, node); addToken(JavaTokenType.J_ASSIGN, start, 1, semantics); - nextOperation = NextOperation.READ_WRITE; + variableHelper.setNextOperation(VariableHelper.NextOperation.READ_WRITE); } super.visitUnary(node, semantics); return null; @@ -601,17 +541,16 @@ public Void visitAssert(AssertTree node, TokenSemantics semantics) { public Void visitVariable(VariableTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); // member variable defs are critical - semantics = conditionalCriticalSemantics(node.getNameExpression(), n -> localVariablesByScope.isEmpty()); - - if (!localVariablesByScope.isEmpty()) { // local scope - Variable variable = new Variable(node.getName()); - localVariables.putIfAbsent(variable.name(), new Stack<>()); - localVariables.get(variable.name()).push(variable); - localVariablesByScope.peek().add(variable.name()); - isMutable.put(variable, isMutable(node.getType())); - semantics.addWrite(variable); // somewhat special case, identifier isn't visited - } // no else, don't want to register member variable defs since the location doesn't matter - // (also they're going to be up top 99% of the time) + boolean inLocalScope = variableHelper.inLocalScope(); + semantics = conditionalCriticalSemantics(node.getNameExpression(), n -> !inLocalScope); + + if (inLocalScope) { + String name = node.getName().toString(); + boolean mutable = isMutable(node.getType()); + Variable variable = variableHelper.registerLocalVariable(name, mutable); + // manually add variable to semantics since identifier isn't visited + semantics.addWrite(variable); + } // no else since member variable defs are registered on class visit addToken(JavaTokenType.J_VARDEF, start, node.toString().length(), semantics); super.visitVariable(node, semantics); @@ -631,20 +570,18 @@ public Void visitConditionalExpression(ConditionalExpressionTree node, TokenSema public Void visitMethodInvocation(MethodInvocationTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); semantics = new TokenSemanticsBuilder().critical().control().build(); - for (Variable mv : memberVariables.values()) { - semantics.addRead(mv); - } + variableHelper.addAllMemberVariablesAsReads(semantics); addToken(JavaTokenType.J_APPLY, start, positions.getEndPosition(ast, node.getMethodSelect()) - start, semantics); scan(node.getTypeArguments(), semantics); // to differentiate bar() and this.bar() (ignore) from bar.foo() (don't ignore), different namespace for variables and // methods if (isVariable(node.getMethodSelect())) { - nextOperation = NextOperation.NONE; + variableHelper.setNextOperation(VariableHelper.NextOperation.NONE); } - mutableWrite = true; // when mentioned here, mutable variables can be written to + variableHelper.mutableWrite(); // when mentioned here, mutable variables can be written to scan(node.getMethodSelect(), semantics); scan(node.getArguments(), semantics); - mutableWrite = false; + variableHelper.noMutableWrite(); return null; } @@ -726,7 +663,8 @@ public Void visitDefaultCaseLabel(DefaultCaseLabelTree node, TokenSemantics sema @Override public Void visitMemberSelect(MemberSelectTree node, TokenSemantics semantics) { if (isOwnMemberSelect(node)) { - registerVariable(getMemberVariable(node.getIdentifier()), semantics); + Variable variable = variableHelper.getMemberVariable(node.getIdentifier().toString()); + variableHelper.registerVariableOperation(variable, semantics); } super.visitMemberSelect(node, semantics); return null; @@ -734,7 +672,8 @@ public Void visitMemberSelect(MemberSelectTree node, TokenSemantics semantics) { @Override public Void visitIdentifier(IdentifierTree node, TokenSemantics semantics) { - registerVariable(getVariable(node.getName()), semantics); + Variable variable = variableHelper.getVariable(node.getName().toString()); + variableHelper.registerVariableOperation(variable, semantics); super.visitIdentifier(node, semantics); return null; } From 0573880e7969a800183c01a796f54aa0fc03de8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Sat, 7 Jan 2023 11:45:00 +0100 Subject: [PATCH 025/132] Simplify conditional critical semantics --- .../semantics/TokenSemanticsBuilder.java | 10 ++++++++-- .../java/TokenGeneratingTreeScanner.java | 20 +++++++------------ 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/language-api/src/main/java/de/jplag/semantics/TokenSemanticsBuilder.java b/language-api/src/main/java/de/jplag/semantics/TokenSemanticsBuilder.java index b53f7bbe0..8f33617d3 100644 --- a/language-api/src/main/java/de/jplag/semantics/TokenSemanticsBuilder.java +++ b/language-api/src/main/java/de/jplag/semantics/TokenSemanticsBuilder.java @@ -23,11 +23,17 @@ public TokenSemantics build() { return new TokenSemantics(critical, control, loopBegin, loopEnd, reads, writes); } - public TokenSemanticsBuilder critical() { - this.critical = true; + public TokenSemanticsBuilder critical(boolean condition) { + if (condition) { + this.critical = true; + } return this; } + public TokenSemanticsBuilder critical() { + return critical(true); + } + public TokenSemanticsBuilder control() { this.control = true; return this; diff --git a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java index 918c83608..ca6c164f5 100644 --- a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java +++ b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java @@ -4,7 +4,6 @@ import java.util.ArrayList; import java.util.List; import java.util.Set; -import java.util.function.Function; import javax.lang.model.element.Name; @@ -487,18 +486,11 @@ public Void visitNewArray(NewArrayTree node, TokenSemantics semantics) { return null; } - private TokenSemantics conditionalCriticalSemantics(ExpressionTree expressionTree, Function conditional) { - TokenSemanticsBuilder semanticsBuilder = new TokenSemanticsBuilder(); - if (conditional.apply(expressionTree)) { - semanticsBuilder.critical(); - } - return semanticsBuilder.build(); - } - @Override public Void visitAssignment(AssignmentTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); - semantics = conditionalCriticalSemantics(node.getVariable(), this::isNotExistingVariable); + boolean criticalCondition = isNotExistingVariable(node.getVariable()); + semantics = new TokenSemanticsBuilder().critical(criticalCondition).build(); addToken(JavaTokenType.J_ASSIGN, start, 1, semantics); variableHelper.setNextOperation(VariableHelper.NextOperation.WRITE); super.visitAssignment(node, semantics); @@ -508,7 +500,8 @@ public Void visitAssignment(AssignmentTree node, TokenSemantics semantics) { @Override public Void visitCompoundAssignment(CompoundAssignmentTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); - semantics = conditionalCriticalSemantics(node.getVariable(), this::isNotExistingVariable); + boolean criticalCondition = isNotExistingVariable(node.getVariable()); + semantics = new TokenSemanticsBuilder().critical(criticalCondition).build(); addToken(JavaTokenType.J_ASSIGN, start, 1, semantics); variableHelper.setNextOperation(VariableHelper.NextOperation.READ_WRITE); super.visitCompoundAssignment(node, semantics); @@ -517,7 +510,8 @@ public Void visitCompoundAssignment(CompoundAssignmentTree node, TokenSemantics @Override public Void visitUnary(UnaryTree node, TokenSemantics semantics) { - semantics = conditionalCriticalSemantics(node.getExpression(), this::isNotExistingVariable); + boolean criticalCondition = isNotExistingVariable(node.getExpression()); + semantics = new TokenSemanticsBuilder().critical(criticalCondition).build(); if (Set.of(Tree.Kind.PREFIX_INCREMENT, Tree.Kind.POSTFIX_INCREMENT, Tree.Kind.PREFIX_DECREMENT, Tree.Kind.POSTFIX_DECREMENT) .contains(node.getKind())) { long start = positions.getStartPosition(ast, node); @@ -542,7 +536,7 @@ public Void visitVariable(VariableTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); // member variable defs are critical boolean inLocalScope = variableHelper.inLocalScope(); - semantics = conditionalCriticalSemantics(node.getNameExpression(), n -> !inLocalScope); + semantics = new TokenSemanticsBuilder().critical(!inLocalScope).build(); if (inLocalScope) { String name = node.getName().toString(); From 7068b216aff3522b3a306d3eb9539cb98c3949f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Sun, 29 Jan 2023 16:06:45 +0100 Subject: [PATCH 026/132] Fix variable next operation bug --- .../src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java | 1 + 1 file changed, 1 insertion(+) diff --git a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java index ca6c164f5..834f92623 100644 --- a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java +++ b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java @@ -660,6 +660,7 @@ public Void visitMemberSelect(MemberSelectTree node, TokenSemantics semantics) { Variable variable = variableHelper.getMemberVariable(node.getIdentifier().toString()); variableHelper.registerVariableOperation(variable, semantics); } + variableHelper.setNextOperation(VariableHelper.NextOperation.READ); super.visitMemberSelect(node, semantics); return null; } From a62a1ab940c4d0fdfd1d21496424709fc46fb184 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Fri, 3 Feb 2023 12:06:15 +0100 Subject: [PATCH 027/132] Fix bugs, improve readability --- .../de/jplag/normalization/Dependency.java | 1 - .../normalization/NormalizationGraph.java | 7 ++- .../NormalizationGraphConstructor.java | 28 +++++---- .../jplag/normalization/TokenLineBuilder.java | 4 +- .../src/main/java/de/jplag/Token.java | 1 - .../de/jplag/semantics/NextOperation.java | 8 +++ ...iableHelper.java => VariableRegistry.java} | 17 ++--- .../src/main/java/de/jplag/java/Parser.java | 12 ++-- .../java/TokenGeneratingTreeScanner.java | 63 ++++++++++--------- 9 files changed, 75 insertions(+), 66 deletions(-) create mode 100644 language-api/src/main/java/de/jplag/semantics/NextOperation.java rename language-api/src/main/java/de/jplag/semantics/{VariableHelper.java => VariableRegistry.java} (90%) diff --git a/core/src/main/java/de/jplag/normalization/Dependency.java b/core/src/main/java/de/jplag/normalization/Dependency.java index cfae3620b..0a6ed8ef1 100644 --- a/core/src/main/java/de/jplag/normalization/Dependency.java +++ b/core/src/main/java/de/jplag/normalization/Dependency.java @@ -5,7 +5,6 @@ import de.jplag.semantics.Variable; -// not a record because JGraphT wants unique edges and we don't... class Dependency { private Set items; private boolean isData; diff --git a/core/src/main/java/de/jplag/normalization/NormalizationGraph.java b/core/src/main/java/de/jplag/normalization/NormalizationGraph.java index c7bbdb2a9..065b24534 100644 --- a/core/src/main/java/de/jplag/normalization/NormalizationGraph.java +++ b/core/src/main/java/de/jplag/normalization/NormalizationGraph.java @@ -18,6 +18,7 @@ public NormalizationGraph(List tokens) { graph = new NormalizationGraphConstructor(tokens).get(); } + // todo java doc public List linearize() { spreadKeep(); PriorityQueue roots = graph.vertexSet().stream() // @@ -48,7 +49,8 @@ private void spreadKeep() { while (!visit.isEmpty()) { TokenLine current = visit.pop(); for (TokenLine pred : Graphs.predecessorListOf(graph, current)) { // performance? - if (graph.getEdge(pred, current).isData() && !pred.keep()) { + Dependency dependency = graph.getEdge(pred, current); + if ((dependency.isData() || dependency.isDataThroughLoop()) && !pred.keep()) { pred.markKeep(); visit.add(pred); } @@ -57,7 +59,8 @@ private void spreadKeep() { // could instead insert data-through-loop edges the other way around, which arguably makes more sense semantically // and turn them around here, but too much code for me to bother right now for (TokenLine succ : Graphs.successorListOf(graph, current)) { - if (graph.getEdge(current, succ).isDataThroughLoop() && !succ.keep()) { + Dependency dependency = graph.getEdge(current, succ); + if (dependency.isDataThroughLoop() && !succ.keep()) { succ.markKeep(); visit.add(succ); } diff --git a/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java b/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java index 4122c146c..eaaea8b6e 100644 --- a/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java +++ b/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java @@ -13,7 +13,7 @@ class NormalizationGraphConstructor { private SimpleDirectedGraph graph; - private int loopCount; + private int loopDepth; private Collection controlAffected; private TokenLine lastControl; private TokenLine lastCritical; @@ -23,10 +23,8 @@ class NormalizationGraphConstructor { NormalizationGraphConstructor(List tokens) { graph = new SimpleDirectedGraph<>(Dependency.class); - loopCount = 0; + loopDepth = 0; controlAffected = new LinkedList<>(); - lastControl = null; - lastCritical = null; variableReads = new HashMap<>(); variableWrites = new HashMap<>(); TokenLineBuilder currentLine = new TokenLineBuilder(tokens.get(0).getLine()); @@ -60,25 +58,27 @@ SimpleDirectedGraph get() { private void processLoops() { if (current.semantics().loopBegin()) - loopCount++; + loopDepth++; if (current.semantics().loopEnd()) - loopCount--; + loopDepth--; } private void processControl() { if (current.semantics().control()) { - addCurrentEdges(controlAffected, DependencyType.CONTROL, null); + addCurrentEdges(controlAffected, DependencyType.CONTROL, null); // edges to control lines controlAffected.clear(); lastControl = current; } else if (lastControl != null) { - addCurrentEdge(lastControl, DependencyType.CONTROL, null); + addCurrentEdge(lastControl, DependencyType.CONTROL, null); // edge from control lines } controlAffected.add(current); } private void processCritical() { - if (current.semantics().critical() && lastCritical != null) { - addCurrentEdge(lastCritical, DependencyType.CRITICAL, null); + if (current.semantics().critical()) { + if (lastCritical != null) { + addCurrentEdge(lastCritical, DependencyType.CRITICAL, null); + } lastCritical = current; } } @@ -90,7 +90,7 @@ private void processReads() { } private void processWrites() { - DependencyType writeToReadDependencyType = loopCount > 0 ? DependencyType.DATA_THROUGH_LOOP : DependencyType.ORDER; + DependencyType writeToReadDependencyType = loopDepth > 0 ? DependencyType.DATA_THROUGH_LOOP : DependencyType.ORDER; for (Variable w : current.semantics().writes()) { addCurrentEdgesVar(DependencyType.ORDER, w, variableWrites); addCurrentEdgesVar(writeToReadDependencyType, w, variableReads); @@ -106,6 +106,12 @@ private void addCurrentEdges(Collection starts, DependencyType type, starts.forEach(s -> addCurrentEdge(s, type, cause)); } + /** + * Adds an ingoing edge to the current node. + * @param start the start of the edge + * @param type the type of the edge + * @param cause the variable that caused the edge, may be null + */ private void addCurrentEdge(TokenLine start, DependencyType type, Variable cause) { Dependency dependency = graph.getEdge(start, current); if (dependency == null) { diff --git a/core/src/main/java/de/jplag/normalization/TokenLineBuilder.java b/core/src/main/java/de/jplag/normalization/TokenLineBuilder.java index d8510869f..538d1877c 100644 --- a/core/src/main/java/de/jplag/normalization/TokenLineBuilder.java +++ b/core/src/main/java/de/jplag/normalization/TokenLineBuilder.java @@ -19,8 +19,8 @@ int lineNumber() { return lineNumber; } - void addToken(Token tok) { - tokens.add(tok); + void addToken(Token token) { + tokens.add(token); } TokenLine build() { diff --git a/language-api/src/main/java/de/jplag/Token.java b/language-api/src/main/java/de/jplag/Token.java index 36f82d0a8..73a3c07c0 100644 --- a/language-api/src/main/java/de/jplag/Token.java +++ b/language-api/src/main/java/de/jplag/Token.java @@ -45,7 +45,6 @@ public Token(TokenType type, File file, int line, int column, int length) { this.line = line; this.column = column; this.length = length; - this.semantics = null; } /** diff --git a/language-api/src/main/java/de/jplag/semantics/NextOperation.java b/language-api/src/main/java/de/jplag/semantics/NextOperation.java new file mode 100644 index 000000000..88ad3f169 --- /dev/null +++ b/language-api/src/main/java/de/jplag/semantics/NextOperation.java @@ -0,0 +1,8 @@ +package de.jplag.semantics; + +public enum NextOperation { + NONE, + READ, + WRITE, + READ_WRITE +} diff --git a/language-api/src/main/java/de/jplag/semantics/VariableHelper.java b/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java similarity index 90% rename from language-api/src/main/java/de/jplag/semantics/VariableHelper.java rename to language-api/src/main/java/de/jplag/semantics/VariableRegistry.java index cb66fe105..9f3c94761 100644 --- a/language-api/src/main/java/de/jplag/semantics/VariableHelper.java +++ b/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java @@ -7,9 +7,9 @@ import java.util.Stack; /** - * Variable helper class to assist generating token semantics. For languages similar in structure to Java/C/etc + * Variable helper class to assist generating token semantics. For languages similar in structure to Java/C */ -public class VariableHelper { +public class VariableRegistry { private Map memberVariables; // map member variable name to variable private Map> localVariables; // map local variable name to variable private Stack> localVariablesByScope; // stack of local variable names in scope @@ -17,14 +17,7 @@ public class VariableHelper { private boolean mutableWrite; private NextOperation nextOperation; - public enum NextOperation { - NONE, - READ, - WRITE, - READ_WRITE - } - - public VariableHelper() { + public VariableRegistry() { this.memberVariables = new HashMap<>(); this.localVariables = new HashMap<>(); this.localVariablesByScope = new Stack<>(); @@ -50,11 +43,11 @@ public boolean inLocalScope() { } public Variable getMemberVariable(String variableName) { - return memberVariables.getOrDefault(variableName, null); + return memberVariables.get(variableName); } public Variable getVariable(String variableName) { - Stack variableIdStack = localVariables.getOrDefault(variableName, null); + Stack variableIdStack = localVariables.get(variableName); if (variableIdStack != null) { return variableIdStack.peek(); } diff --git a/languages/java/src/main/java/de/jplag/java/Parser.java b/languages/java/src/main/java/de/jplag/java/Parser.java index ddf74453f..ee2a96306 100644 --- a/languages/java/src/main/java/de/jplag/java/Parser.java +++ b/languages/java/src/main/java/de/jplag/java/Parser.java @@ -32,15 +32,15 @@ public void add(Token token) { public void printSemantics() { long currentLine = 0; - for (Token t : tokens) { - if (t.getLine() != currentLine) { - currentLine = t.getLine(); + for (Token token : tokens) { + if (token.getLine() != currentLine) { + currentLine = token.getLine(); System.out.println(); - System.out.println(t.getLine()); + System.out.println(token.getLine()); } - System.out.print(t.getType().getDescription()); + System.out.print(token.getType().getDescription()); System.out.print(" | "); - System.out.println(t.getSemantics()); + System.out.println(token.getSemantics()); } System.out.println(); System.out.println("=".repeat(100)); diff --git a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java index 834f92623..9943725c6 100644 --- a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java +++ b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java @@ -10,10 +10,11 @@ import de.jplag.ParsingException; import de.jplag.Token; import de.jplag.TokenType; +import de.jplag.semantics.NextOperation; import de.jplag.semantics.TokenSemantics; import de.jplag.semantics.TokenSemanticsBuilder; import de.jplag.semantics.Variable; -import de.jplag.semantics.VariableHelper; +import de.jplag.semantics.VariableRegistry; import com.sun.source.tree.AnnotationTree; import com.sun.source.tree.AssertTree; @@ -71,7 +72,7 @@ final class TokenGeneratingTreeScanner extends TreeScanner private List parsingExceptions = new ArrayList<>(); - private VariableHelper variableHelper; + private VariableRegistry variableRegistry; private static final Set IMMUTABLES = Set.of( // from https://medium.com/@bpnorlander/java-understanding-primitive-types-and-wrapper-objects-a6798fb2afe9 @@ -84,7 +85,7 @@ public TokenGeneratingTreeScanner(File file, Parser parser, LineMap map, SourceP this.map = map; this.positions = positions; this.ast = ast; - this.variableHelper = new VariableHelper(); + this.variableRegistry = new VariableRegistry(); } public List getParsingExceptions() { @@ -125,7 +126,7 @@ private boolean isNotExistingVariable(ExpressionTree expressionTree) { return true; } Name name = ((IdentifierTree) expressionTree).getName(); - return variableHelper.getVariable(name.toString()) == null; + return variableRegistry.getVariable(name.toString()) == null; } private boolean isOwnMemberSelect(MemberSelectTree memberSelect) { @@ -140,7 +141,7 @@ private boolean isMutable(Tree classTree) { public Void visitBlock(BlockTree node, TokenSemantics semantics) { // kind of weird since in the case of for loops and catches, two scopes are introduced // but I'm pretty sure that's how Java does it internally as well - variableHelper.enterLocalScope(); + variableRegistry.enterLocalScope(); long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; semantics = new TokenSemanticsBuilder().control().build(); @@ -148,7 +149,7 @@ public Void visitBlock(BlockTree node, TokenSemantics semantics) { super.visitBlock(node, null); semantics = new TokenSemanticsBuilder().control().build(); addToken(JavaTokenType.J_INIT_END, end, 1, semantics); - variableHelper.exitLocalScope(); + variableRegistry.exitLocalScope(); return null; } @@ -159,7 +160,7 @@ public Void visitClass(ClassTree node, TokenSemantics semantics) { VariableTree variableTree = (VariableTree) member; String name = variableTree.getName().toString(); boolean mutable = isMutable(variableTree.getType()); - variableHelper.registerMemberVariable(name, mutable); + variableRegistry.registerMemberVariable(name, mutable); } } @@ -196,7 +197,7 @@ public Void visitClass(ClassTree node, TokenSemantics semantics) { semantics = new TokenSemanticsBuilder().control().critical().build(); addToken(tokenType, end, 1, semantics); } - variableHelper.clearMemberVariables(); + variableRegistry.clearMemberVariables(); return null; } @@ -220,7 +221,7 @@ public Void visitPackage(PackageTree node, TokenSemantics semantics) { @Override public Void visitMethod(MethodTree node, TokenSemantics semantics) { - variableHelper.enterLocalScope(); + variableRegistry.enterLocalScope(); long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; semantics = new TokenSemanticsBuilder().control().critical().build(); @@ -233,9 +234,9 @@ public Void visitMethod(MethodTree node, TokenSemantics semantics) { scan(node.getThrows(), semantics); scan(node.getBody(), null); semantics = new TokenSemanticsBuilder().control().critical().build(); - variableHelper.addAllMemberVariablesAsReads(semantics); + variableRegistry.addAllMemberVariablesAsReads(semantics); addToken(JavaTokenType.J_METHOD_END, end, 1, semantics); - variableHelper.exitLocalScope(); + variableRegistry.exitLocalScope(); return null; } @@ -279,7 +280,7 @@ public Void visitWhileLoop(WhileLoopTree node, TokenSemantics semantics) { @Override public Void visitForLoop(ForLoopTree node, TokenSemantics semantics) { - variableHelper.enterLocalScope(); + variableRegistry.enterLocalScope(); long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; semantics = new TokenSemanticsBuilder().control().loopBegin().build(); @@ -290,13 +291,13 @@ public Void visitForLoop(ForLoopTree node, TokenSemantics semantics) { scan(node.getStatement(), null); semantics = new TokenSemanticsBuilder().control().loopEnd().build(); addToken(JavaTokenType.J_FOR_END, end, 1, semantics); - variableHelper.exitLocalScope(); + variableRegistry.exitLocalScope(); return null; } @Override public Void visitEnhancedForLoop(EnhancedForLoopTree node, TokenSemantics semantics) { - variableHelper.enterLocalScope(); + variableRegistry.enterLocalScope(); long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; semantics = new TokenSemanticsBuilder().control().loopBegin().build(); @@ -306,7 +307,7 @@ public Void visitEnhancedForLoop(EnhancedForLoopTree node, TokenSemantics semant scan(node.getStatement(), null); semantics = new TokenSemanticsBuilder().control().loopEnd().build(); addToken(JavaTokenType.J_FOR_END, end, 1, semantics); - variableHelper.exitLocalScope(); + variableRegistry.exitLocalScope(); return null; } @@ -373,7 +374,7 @@ public Void visitTry(TryTree node, TokenSemantics semantics) { @Override public Void visitCatch(CatchTree node, TokenSemantics semantics) { - variableHelper.enterLocalScope(); + variableRegistry.enterLocalScope(); long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; semantics = new TokenSemanticsBuilder().control().build(); @@ -381,7 +382,7 @@ public Void visitCatch(CatchTree node, TokenSemantics semantics) { super.visitCatch(node, null); // can leave this since catch parameter is variable declaration and thus always generates a token semantics = new TokenSemanticsBuilder().control().build(); addToken(JavaTokenType.J_CATCH_END, end, 1, semantics); - variableHelper.exitLocalScope(); + variableRegistry.exitLocalScope(); return null; } @@ -492,7 +493,7 @@ public Void visitAssignment(AssignmentTree node, TokenSemantics semantics) { boolean criticalCondition = isNotExistingVariable(node.getVariable()); semantics = new TokenSemanticsBuilder().critical(criticalCondition).build(); addToken(JavaTokenType.J_ASSIGN, start, 1, semantics); - variableHelper.setNextOperation(VariableHelper.NextOperation.WRITE); + variableRegistry.setNextOperation(NextOperation.WRITE); super.visitAssignment(node, semantics); return null; } @@ -503,7 +504,7 @@ public Void visitCompoundAssignment(CompoundAssignmentTree node, TokenSemantics boolean criticalCondition = isNotExistingVariable(node.getVariable()); semantics = new TokenSemanticsBuilder().critical(criticalCondition).build(); addToken(JavaTokenType.J_ASSIGN, start, 1, semantics); - variableHelper.setNextOperation(VariableHelper.NextOperation.READ_WRITE); + variableRegistry.setNextOperation(NextOperation.READ_WRITE); super.visitCompoundAssignment(node, semantics); return null; } @@ -516,7 +517,7 @@ public Void visitUnary(UnaryTree node, TokenSemantics semantics) { .contains(node.getKind())) { long start = positions.getStartPosition(ast, node); addToken(JavaTokenType.J_ASSIGN, start, 1, semantics); - variableHelper.setNextOperation(VariableHelper.NextOperation.READ_WRITE); + variableRegistry.setNextOperation(NextOperation.READ_WRITE); } super.visitUnary(node, semantics); return null; @@ -535,13 +536,13 @@ public Void visitAssert(AssertTree node, TokenSemantics semantics) { public Void visitVariable(VariableTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); // member variable defs are critical - boolean inLocalScope = variableHelper.inLocalScope(); + boolean inLocalScope = variableRegistry.inLocalScope(); semantics = new TokenSemanticsBuilder().critical(!inLocalScope).build(); if (inLocalScope) { String name = node.getName().toString(); boolean mutable = isMutable(node.getType()); - Variable variable = variableHelper.registerLocalVariable(name, mutable); + Variable variable = variableRegistry.registerLocalVariable(name, mutable); // manually add variable to semantics since identifier isn't visited semantics.addWrite(variable); } // no else since member variable defs are registered on class visit @@ -564,18 +565,18 @@ public Void visitConditionalExpression(ConditionalExpressionTree node, TokenSema public Void visitMethodInvocation(MethodInvocationTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); semantics = new TokenSemanticsBuilder().critical().control().build(); - variableHelper.addAllMemberVariablesAsReads(semantics); + variableRegistry.addAllMemberVariablesAsReads(semantics); addToken(JavaTokenType.J_APPLY, start, positions.getEndPosition(ast, node.getMethodSelect()) - start, semantics); scan(node.getTypeArguments(), semantics); // to differentiate bar() and this.bar() (ignore) from bar.foo() (don't ignore), different namespace for variables and // methods if (isVariable(node.getMethodSelect())) { - variableHelper.setNextOperation(VariableHelper.NextOperation.NONE); + variableRegistry.setNextOperation(NextOperation.NONE); } - variableHelper.mutableWrite(); // when mentioned here, mutable variables can be written to + variableRegistry.mutableWrite(); // when mentioned here, mutable variables can be written to scan(node.getMethodSelect(), semantics); scan(node.getArguments(), semantics); - variableHelper.noMutableWrite(); + variableRegistry.noMutableWrite(); return null; } @@ -657,18 +658,18 @@ public Void visitDefaultCaseLabel(DefaultCaseLabelTree node, TokenSemantics sema @Override public Void visitMemberSelect(MemberSelectTree node, TokenSemantics semantics) { if (isOwnMemberSelect(node)) { - Variable variable = variableHelper.getMemberVariable(node.getIdentifier().toString()); - variableHelper.registerVariableOperation(variable, semantics); + Variable variable = variableRegistry.getMemberVariable(node.getIdentifier().toString()); + variableRegistry.registerVariableOperation(variable, semantics); } - variableHelper.setNextOperation(VariableHelper.NextOperation.READ); + variableRegistry.setNextOperation(NextOperation.READ); super.visitMemberSelect(node, semantics); return null; } @Override public Void visitIdentifier(IdentifierTree node, TokenSemantics semantics) { - Variable variable = variableHelper.getVariable(node.getName().toString()); - variableHelper.registerVariableOperation(variable, semantics); + Variable variable = variableRegistry.getVariable(node.getName().toString()); + variableRegistry.registerVariableOperation(variable, semantics); super.visitIdentifier(node, semantics); return null; } From f1c83a1fb6d9b7059968c7578fd2659df2c7be26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Sat, 25 Feb 2023 22:23:46 +0100 Subject: [PATCH 028/132] Simplify token generation --- .../de/jplag/semantics/NextOperation.java | 16 +++-- .../java/de/jplag/semantics/Variable.java | 17 ++++-- .../java/de/jplag/semantics/VariableId.java | 14 ----- .../de/jplag/semantics/VariableRegistry.java | 49 +++++++-------- .../java/TokenGeneratingTreeScanner.java | 59 ++++++------------- 5 files changed, 64 insertions(+), 91 deletions(-) delete mode 100644 language-api/src/main/java/de/jplag/semantics/VariableId.java diff --git a/language-api/src/main/java/de/jplag/semantics/NextOperation.java b/language-api/src/main/java/de/jplag/semantics/NextOperation.java index 88ad3f169..eb2fb3a7f 100644 --- a/language-api/src/main/java/de/jplag/semantics/NextOperation.java +++ b/language-api/src/main/java/de/jplag/semantics/NextOperation.java @@ -1,8 +1,16 @@ package de.jplag.semantics; public enum NextOperation { - NONE, - READ, - WRITE, - READ_WRITE + NONE(false, false), + READ(true, false), + WRITE(false, true), + READ_WRITE(true, true); + + final boolean isRead; + final boolean isWrite; + + NextOperation(boolean isRead, boolean isWrite) { + this.isRead = isRead; + this.isWrite = isWrite; + } } diff --git a/language-api/src/main/java/de/jplag/semantics/Variable.java b/language-api/src/main/java/de/jplag/semantics/Variable.java index 17afe73c4..3758512a7 100644 --- a/language-api/src/main/java/de/jplag/semantics/Variable.java +++ b/language-api/src/main/java/de/jplag/semantics/Variable.java @@ -1,13 +1,22 @@ package de.jplag.semantics; -public record Variable(String name, VariableId id) { +public class Variable { + private final String name; + private final boolean isOwnMember; + private final boolean isMutable; - public Variable(String name) { - this(name, new VariableId()); + Variable(String name, boolean isOwnMember, boolean isMutable) { + this.name = name; + this.isOwnMember = isOwnMember; + this.isMutable = isMutable; + } + + boolean isMutable() { + return isMutable; } @Override public String toString() { - return name + "[" + id + "]"; + return "%s[%s]".formatted(isOwnMember ? "this." : "" + name, isMutable ? "mut" : "non"); } } diff --git a/language-api/src/main/java/de/jplag/semantics/VariableId.java b/language-api/src/main/java/de/jplag/semantics/VariableId.java deleted file mode 100644 index bd12bf1eb..000000000 --- a/language-api/src/main/java/de/jplag/semantics/VariableId.java +++ /dev/null @@ -1,14 +0,0 @@ -package de.jplag.semantics; - -public record VariableId(String id) { - private static long counter; - - public VariableId() { - this(Long.toString(counter++)); - } - - @Override - public String toString() { - return id; - } -} diff --git a/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java b/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java index 9f3c94761..3add9db21 100644 --- a/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java +++ b/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java @@ -7,31 +7,25 @@ import java.util.Stack; /** - * Variable helper class to assist generating token semantics. For languages similar in structure to Java/C + * Helper class to assist in generating token semantics. For languages similar in structure to Java/C */ public class VariableRegistry { private Map memberVariables; // map member variable name to variable private Map> localVariables; // map local variable name to variable private Stack> localVariablesByScope; // stack of local variable names in scope - private Map isMutable; // map variable to whether it is mutable - private boolean mutableWrite; private NextOperation nextOperation; + private boolean mutableWrite; public VariableRegistry() { this.memberVariables = new HashMap<>(); this.localVariables = new HashMap<>(); this.localVariablesByScope = new Stack<>(); - this.isMutable = new HashMap<>(); - this.mutableWrite = false; this.nextOperation = NextOperation.READ; // the default + this.mutableWrite = false; } - public void mutableWrite() { - mutableWrite = true; - } - - public void noMutableWrite() { - mutableWrite = false; + public void setMutableWrite(boolean mutableWrite) { + this.mutableWrite = mutableWrite; } public void setNextOperation(NextOperation nextOperation) { @@ -42,11 +36,11 @@ public boolean inLocalScope() { return !localVariablesByScope.isEmpty(); } - public Variable getMemberVariable(String variableName) { + private Variable getMemberVariable(String variableName) { return memberVariables.get(variableName); } - public Variable getVariable(String variableName) { + private Variable getVariable(String variableName) { Stack variableIdStack = localVariables.get(variableName); if (variableIdStack != null) { return variableIdStack.peek(); @@ -54,25 +48,23 @@ public Variable getVariable(String variableName) { return getMemberVariable(variableName); } - public Variable registerMemberVariable(String name, boolean mutable) { - Variable variable = new Variable(name); - memberVariables.put(variable.name(), variable); - this.isMutable.put(variable, mutable); + public Variable registerMemberVariable(String variableName, boolean mutable) { + Variable variable = new Variable(variableName, true, mutable); + memberVariables.put(variableName, variable); return variable; } - public Variable registerLocalVariable(String name, boolean mutable) { - Variable variable = new Variable(name); - localVariables.putIfAbsent(variable.name(), new Stack<>()); - localVariables.get(variable.name()).push(variable); - localVariablesByScope.peek().add(variable.name()); - this.isMutable.put(variable, mutable); + public Variable registerLocalVariable(String variableName, boolean mutable) { + Variable variable = new Variable(variableName, false, mutable); + localVariables.putIfAbsent(variableName, new Stack<>()); + localVariables.get(variableName).push(variable); + localVariablesByScope.peek().add(variableName); return variable; } public void addAllMemberVariablesAsReads(TokenSemantics semantics) { - for (Variable mv : memberVariables.values()) { - semantics.addRead(mv); + for (Variable memberVar : memberVariables.values()) { + semantics.addRead(memberVar); } } @@ -80,12 +72,13 @@ public void clearMemberVariables() { memberVariables.clear(); } - public void registerVariableOperation(Variable variable, TokenSemantics semantics) { + public void registerVariableOperation(String variableName, boolean isOwnMember, TokenSemantics semantics) { + Variable variable = isOwnMember ? getMemberVariable(variableName) : getVariable(variableName); if (variable != null) { - if (Set.of(NextOperation.READ, NextOperation.READ_WRITE).contains(nextOperation)) { + if (nextOperation.isRead) { semantics.addRead(variable); } - if (Set.of(NextOperation.WRITE, NextOperation.READ_WRITE).contains(nextOperation) || (mutableWrite && isMutable.get(variable))) { + if (nextOperation.isWrite || (nextOperation.isRead && mutableWrite && variable.isMutable())) { semantics.addWrite(variable); } } diff --git a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java index 8cb28f15b..efaf82849 100644 --- a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java +++ b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java @@ -5,8 +5,6 @@ import java.util.List; import java.util.Set; -import javax.lang.model.element.Name; - import de.jplag.ParsingException; import de.jplag.Token; import de.jplag.TokenType; @@ -33,7 +31,6 @@ import com.sun.source.tree.EnhancedForLoopTree; import com.sun.source.tree.ErroneousTree; import com.sun.source.tree.ExportsTree; -import com.sun.source.tree.ExpressionTree; import com.sun.source.tree.ForLoopTree; import com.sun.source.tree.IdentifierTree; import com.sun.source.tree.IfTree; @@ -76,7 +73,7 @@ final class TokenGeneratingTreeScanner extends TreeScanner private static final Set IMMUTABLES = Set.of( // from https://medium.com/@bpnorlander/java-understanding-primitive-types-and-wrapper-objects-a6798fb2afe9 - "byte", "short", "int", "long", "float", "double", "boolean", "char", // + "byte", "short", "int", "long", "float", "double", "boolean", "char", // primitives "Byte", "Short", "Integer", "Long", "Float", "Double", "Boolean", "Character", "String"); public TokenGeneratingTreeScanner(File file, Parser parser, LineMap map, SourcePositions positions, CompilationUnitTree ast) { @@ -116,25 +113,13 @@ private void addToken(JavaTokenType tokenType, long start, long end, TokenSemant addToken(tokenType, file, map.getLineNumber(start), map.getColumnNumber(start), (end - start), semantics); } - private boolean isVariable(ExpressionTree expressionTree) { - return expressionTree.getKind() == Tree.Kind.IDENTIFIER - || (expressionTree.getKind() == Tree.Kind.MEMBER_SELECT && isOwnMemberSelect((MemberSelectTree) expressionTree)); - } - - private boolean isNotExistingVariable(ExpressionTree expressionTree) { - if (expressionTree.getKind() != Tree.Kind.IDENTIFIER) { - return true; - } - Name name = ((IdentifierTree) expressionTree).getName(); - return variableRegistry.getVariable(name.toString()) == null; - } - private boolean isOwnMemberSelect(MemberSelectTree memberSelect) { - return memberSelect.getExpression().toString().equals("this"); + return memberSelect.toString().equals("this"); } private boolean isMutable(Tree classTree) { - return classTree != null && !IMMUTABLES.contains(classTree.toString()); + // classTree is null if `var` keyword is used + return classTree == null || !IMMUTABLES.contains(classTree.toString()); } @Override @@ -484,19 +469,19 @@ public Void visitNewArray(NewArrayTree node, TokenSemantics semantics) { @Override public Void visitAssignment(AssignmentTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); - boolean criticalCondition = isNotExistingVariable(node.getVariable()); - semantics = new TokenSemanticsBuilder().critical(criticalCondition).build(); + // todo may need to be critical when non-registered (global) variables are involved, not sure how to check + semantics = new TokenSemanticsBuilder().build(); addToken(JavaTokenType.J_ASSIGN, start, 1, semantics); variableRegistry.setNextOperation(NextOperation.WRITE); super.visitAssignment(node, semantics); + // if (this.assignedVariableWasRegistered) makeSemanticsCritical(semantics) return null; } @Override public Void visitCompoundAssignment(CompoundAssignmentTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); - boolean criticalCondition = isNotExistingVariable(node.getVariable()); - semantics = new TokenSemanticsBuilder().critical(criticalCondition).build(); + semantics = new TokenSemanticsBuilder().build(); addToken(JavaTokenType.J_ASSIGN, start, 1, semantics); variableRegistry.setNextOperation(NextOperation.READ_WRITE); super.visitCompoundAssignment(node, semantics); @@ -505,8 +490,7 @@ public Void visitCompoundAssignment(CompoundAssignmentTree node, TokenSemantics @Override public Void visitUnary(UnaryTree node, TokenSemantics semantics) { - boolean criticalCondition = isNotExistingVariable(node.getExpression()); - semantics = new TokenSemanticsBuilder().critical(criticalCondition).build(); + semantics = new TokenSemanticsBuilder().build(); if (Set.of(Tree.Kind.PREFIX_INCREMENT, Tree.Kind.POSTFIX_INCREMENT, Tree.Kind.PREFIX_DECREMENT, Tree.Kind.POSTFIX_DECREMENT) .contains(node.getKind())) { long start = positions.getStartPosition(ast, node); @@ -537,8 +521,7 @@ public Void visitVariable(VariableTree node, TokenSemantics semantics) { String name = node.getName().toString(); boolean mutable = isMutable(node.getType()); Variable variable = variableRegistry.registerLocalVariable(name, mutable); - // manually add variable to semantics since identifier isn't visited - semantics.addWrite(variable); + semantics.addWrite(variable); // manually add variable to semantics since identifier isn't visited } // no else since member variable defs are registered on class visit addToken(JavaTokenType.J_VARDEF, start, node.toString().length(), semantics); @@ -562,15 +545,12 @@ public Void visitMethodInvocation(MethodInvocationTree node, TokenSemantics sema variableRegistry.addAllMemberVariablesAsReads(semantics); addToken(JavaTokenType.J_APPLY, start, positions.getEndPosition(ast, node.getMethodSelect()) - start, semantics); scan(node.getTypeArguments(), semantics); - // to differentiate bar() and this.bar() (ignore) from bar.foo() (don't ignore), different namespace for variables and - // methods - if (isVariable(node.getMethodSelect())) { - variableRegistry.setNextOperation(NextOperation.NONE); - } - variableRegistry.mutableWrite(); // when mentioned here, mutable variables can be written to - scan(node.getMethodSelect(), semantics); - scan(node.getArguments(), semantics); - variableRegistry.noMutableWrite(); + // differentiate bar() and this.bar() (ignore) from bar.foo() (don't ignore) + variableRegistry.setNextOperation(NextOperation.NONE); + variableRegistry.setMutableWrite(true); + scan(node.getMethodSelect(), semantics); // foo.bar() is a write to foo + scan(node.getArguments(), semantics); // foo(bar) is a write to bar + variableRegistry.setMutableWrite(false); return null; } @@ -652,18 +632,15 @@ public Void visitDefaultCaseLabel(DefaultCaseLabelTree node, TokenSemantics sema @Override public Void visitMemberSelect(MemberSelectTree node, TokenSemantics semantics) { if (isOwnMemberSelect(node)) { - Variable variable = variableRegistry.getMemberVariable(node.getIdentifier().toString()); - variableRegistry.registerVariableOperation(variable, semantics); + variableRegistry.registerVariableOperation(node.getIdentifier().toString(), true, semantics); } - variableRegistry.setNextOperation(NextOperation.READ); super.visitMemberSelect(node, semantics); return null; } @Override public Void visitIdentifier(IdentifierTree node, TokenSemantics semantics) { - Variable variable = variableRegistry.getVariable(node.getName().toString()); - variableRegistry.registerVariableOperation(variable, semantics); + variableRegistry.registerVariableOperation(node.toString(), false, semantics); super.visitIdentifier(node, semantics); return null; } From 28ea93ef15d71b9a53390bac7e970f11c6470e99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Sat, 25 Feb 2023 23:09:20 +0100 Subject: [PATCH 029/132] Fix bug regarding foo.bar() case --- .../de/jplag/semantics/VariableRegistry.java | 33 ++++++++++++------- .../java/TokenGeneratingTreeScanner.java | 4 ++- 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java b/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java index 3add9db21..d72e5ef67 100644 --- a/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java +++ b/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java @@ -14,6 +14,7 @@ public class VariableRegistry { private Map> localVariables; // map local variable name to variable private Stack> localVariablesByScope; // stack of local variable names in scope private NextOperation nextOperation; + private boolean ignoreNextOperation; private boolean mutableWrite; public VariableRegistry() { @@ -21,17 +22,22 @@ public VariableRegistry() { this.localVariables = new HashMap<>(); this.localVariablesByScope = new Stack<>(); this.nextOperation = NextOperation.READ; // the default + this.ignoreNextOperation = false; this.mutableWrite = false; } - public void setMutableWrite(boolean mutableWrite) { - this.mutableWrite = mutableWrite; - } - public void setNextOperation(NextOperation nextOperation) { this.nextOperation = nextOperation; } + public void setIgnoreNextOperation(boolean ignoreNextOperation) { + this.ignoreNextOperation = ignoreNextOperation; + } + + public void setMutableWrite(boolean mutableWrite) { + this.mutableWrite = mutableWrite; + } + public boolean inLocalScope() { return !localVariablesByScope.isEmpty(); } @@ -73,16 +79,19 @@ public void clearMemberVariables() { } public void registerVariableOperation(String variableName, boolean isOwnMember, TokenSemantics semantics) { - Variable variable = isOwnMember ? getMemberVariable(variableName) : getVariable(variableName); - if (variable != null) { - if (nextOperation.isRead) { - semantics.addRead(variable); - } - if (nextOperation.isWrite || (nextOperation.isRead && mutableWrite && variable.isMutable())) { - semantics.addWrite(variable); + if (!ignoreNextOperation) { + Variable variable = isOwnMember ? getMemberVariable(variableName) : getVariable(variableName); + if (variable != null) { + if (nextOperation.isRead) { + semantics.addRead(variable); + } + if (nextOperation.isWrite || (nextOperation.isRead && mutableWrite && variable.isMutable())) { + semantics.addWrite(variable); + } } + nextOperation = NextOperation.READ; } - nextOperation = NextOperation.READ; + ignoreNextOperation = false; } public void enterLocalScope() { diff --git a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java index 972fc25ce..8ae98838b 100644 --- a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java +++ b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java @@ -548,7 +548,8 @@ public Void visitMethodInvocation(MethodInvocationTree node, TokenSemantics sema addToken(JavaTokenType.J_APPLY, start, positions.getEndPosition(ast, node.getMethodSelect()) - start, semantics); scan(node.getTypeArguments(), semantics); // differentiate bar() and this.bar() (ignore) from bar.foo() (don't ignore) - variableRegistry.setNextOperation(NextOperation.NONE); + // look at cases foo.bar()++ and foo().bar++ + variableRegistry.setIgnoreNextOperation(true); variableRegistry.setMutableWrite(true); scan(node.getMethodSelect(), semantics); // foo.bar() is a write to foo scan(node.getArguments(), semantics); // foo(bar) is a write to bar @@ -636,6 +637,7 @@ public Void visitMemberSelect(MemberSelectTree node, TokenSemantics semantics) { if (isOwnMemberSelect(node)) { variableRegistry.registerVariableOperation(node.getIdentifier().toString(), true, semantics); } + variableRegistry.setIgnoreNextOperation(false); // don't ignore the foo in foo.bar() super.visitMemberSelect(node, semantics); return null; } From b83e4e46970f9e02627f83e4a3486ecb27384657 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Sat, 4 Mar 2023 18:20:13 +0100 Subject: [PATCH 030/132] Rework TokenSemantics --- core/src/main/java/de/jplag/Submission.java | 2 +- .../NormalizationGraphConstructor.java | 53 +++---- .../de/jplag/normalization/TokenLine.java | 4 +- .../src/main/java/de/jplag/Token.java | 3 +- .../de/jplag/semantics/BlockRelation.java | 19 +++ .../de/jplag/semantics/NextOperation.java | 1 - .../java/de/jplag/semantics/Ordering.java | 29 ++++ .../de/jplag/semantics/TokenSemantics.java | 147 +++++++++++++----- .../semantics/TokenSemanticsBuilder.java | 51 ------ .../java/de/jplag/semantics/Variable.java | 5 +- .../de/jplag/semantics/VariableRegistry.java | 6 +- .../java/TokenGeneratingTreeScanner.java | 129 ++++++++------- 12 files changed, 259 insertions(+), 190 deletions(-) create mode 100644 language-api/src/main/java/de/jplag/semantics/BlockRelation.java create mode 100644 language-api/src/main/java/de/jplag/semantics/Ordering.java delete mode 100644 language-api/src/main/java/de/jplag/semantics/TokenSemanticsBuilder.java diff --git a/core/src/main/java/de/jplag/Submission.java b/core/src/main/java/de/jplag/Submission.java index 9deddd5ac..36f485dfe 100644 --- a/core/src/main/java/de/jplag/Submission.java +++ b/core/src/main/java/de/jplag/Submission.java @@ -271,7 +271,7 @@ private static File createErrorDirectory(String... subdirectoryNames) { void normalize() { // System.out.println(); - // System.out.println("original: " + getOrder(tokenList)); + // System.out.println("original: " + getOrder(tokenList)); tokenList = new NormalizationGraph(tokenList).linearize(); // System.out.println("normalized: " + getOrder(tokenList)); } diff --git a/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java b/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java index eaaea8b6e..abf333b27 100644 --- a/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java +++ b/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java @@ -9,14 +9,16 @@ import org.jgrapht.graph.SimpleDirectedGraph; import de.jplag.Token; +import de.jplag.semantics.BlockRelation; +import de.jplag.semantics.Ordering; import de.jplag.semantics.Variable; class NormalizationGraphConstructor { private SimpleDirectedGraph graph; private int loopDepth; - private Collection controlAffected; - private TokenLine lastControl; - private TokenLine lastCritical; + private Collection fullOrderingIngoing; + private TokenLine lastFullOrdering; + private TokenLine lastPartialOrdering; private Map> variableReads; private Map> variableWrites; private TokenLine current; @@ -24,7 +26,7 @@ class NormalizationGraphConstructor { NormalizationGraphConstructor(List tokens) { graph = new SimpleDirectedGraph<>(Dependency.class); loopDepth = 0; - controlAffected = new LinkedList<>(); + fullOrderingIngoing = new LinkedList<>(); variableReads = new HashMap<>(); variableWrites = new HashMap<>(); TokenLineBuilder currentLine = new TokenLineBuilder(tokens.get(0).getLine()); @@ -38,13 +40,16 @@ class NormalizationGraphConstructor { addTokenLine(currentLine.build()); } + SimpleDirectedGraph get() { + return graph; + } + private void addTokenLine(TokenLine tokenLine) { graph.addVertex(tokenLine); this.current = tokenLine; - processLoops(); - processControl(); - processCritical(); + processFullOrdering(); + processPartialOrdering(); processReads(); processWrites(); current.semantics().reads().forEach(r -> addVarToMap(r, variableReads)); @@ -52,34 +57,30 @@ private void addTokenLine(TokenLine tokenLine) { } - SimpleDirectedGraph get() { - return graph; - } - private void processLoops() { - if (current.semantics().loopBegin()) + if (current.semantics().bidirectionalBlockRelation() == BlockRelation.BEGINS_BLOCK) loopDepth++; - if (current.semantics().loopEnd()) + if (current.semantics().bidirectionalBlockRelation() == BlockRelation.ENDS_BLOCK) loopDepth--; } - private void processControl() { - if (current.semantics().control()) { - addCurrentEdges(controlAffected, DependencyType.CONTROL, null); // edges to control lines - controlAffected.clear(); - lastControl = current; - } else if (lastControl != null) { - addCurrentEdge(lastControl, DependencyType.CONTROL, null); // edge from control lines + private void processFullOrdering() { + if (current.semantics().ordering() == Ordering.FULL) { + addCurrentEdges(fullOrderingIngoing, DependencyType.CONTROL, null); // ingoing edges + fullOrderingIngoing.clear(); + lastFullOrdering = current; + } else if (lastFullOrdering != null) { + addCurrentEdge(lastFullOrdering, DependencyType.CONTROL, null); // outgoing edges } - controlAffected.add(current); + fullOrderingIngoing.add(current); } - private void processCritical() { - if (current.semantics().critical()) { - if (lastCritical != null) { - addCurrentEdge(lastCritical, DependencyType.CRITICAL, null); + private void processPartialOrdering() { + if (current.semantics().ordering() == Ordering.PARTIAL) { + if (lastPartialOrdering != null) { + addCurrentEdge(lastPartialOrdering, DependencyType.CRITICAL, null); } - lastCritical = current; + lastPartialOrdering = current; } } diff --git a/core/src/main/java/de/jplag/normalization/TokenLine.java b/core/src/main/java/de/jplag/normalization/TokenLine.java index 110500900..e6ceb99c0 100644 --- a/core/src/main/java/de/jplag/normalization/TokenLine.java +++ b/core/src/main/java/de/jplag/normalization/TokenLine.java @@ -17,8 +17,8 @@ class TokenLine implements Comparable { TokenLine(List tokens, int lineNumber) { this.tokens = Collections.unmodifiableList(tokens); this.lineNumber = lineNumber; - this.semantics = TokenSemantics.join(tokens.stream().map(Token::getSemantics).toList()); - keep = semantics.critical() || semantics.control(); + semantics = TokenSemantics.join(tokens.stream().map(Token::getSemantics).toList()); + keep = semantics.keep(); } public List tokens() { diff --git a/language-api/src/main/java/de/jplag/Token.java b/language-api/src/main/java/de/jplag/Token.java index a1d665d21..89f425ed1 100644 --- a/language-api/src/main/java/de/jplag/Token.java +++ b/language-api/src/main/java/de/jplag/Token.java @@ -6,7 +6,6 @@ import org.slf4j.LoggerFactory; import de.jplag.semantics.TokenSemantics; -import de.jplag.semantics.TokenSemanticsBuilder; /** * This class represents a token in a source code. It can represent keywords, identifiers, syntactical structures etc. @@ -75,7 +74,7 @@ public static Token fileEnd(File file) { * @param file is the name of the source code file. */ public static Token semanticFileEnd(File file) { - TokenSemantics semantics = new TokenSemanticsBuilder().control().critical().build(); + TokenSemantics semantics = TokenSemantics.createControl(); return new Token(SharedTokenType.FILE_END, file, NO_VALUE, NO_VALUE, NO_VALUE, semantics); } diff --git a/language-api/src/main/java/de/jplag/semantics/BlockRelation.java b/language-api/src/main/java/de/jplag/semantics/BlockRelation.java new file mode 100644 index 000000000..c219f0c49 --- /dev/null +++ b/language-api/src/main/java/de/jplag/semantics/BlockRelation.java @@ -0,0 +1,19 @@ +package de.jplag.semantics; + +/** + * Enumerates the relationships a code snippet can have with a code block. + */ +public enum BlockRelation { + /** + * This code snippet begins the block. + */ + BEGINS_BLOCK, + /** + * This code snippet ends the block. + */ + ENDS_BLOCK, + /** + * This code snippet neither begins nor ends the block. + */ + NONE +} diff --git a/language-api/src/main/java/de/jplag/semantics/NextOperation.java b/language-api/src/main/java/de/jplag/semantics/NextOperation.java index eb2fb3a7f..283ca27a5 100644 --- a/language-api/src/main/java/de/jplag/semantics/NextOperation.java +++ b/language-api/src/main/java/de/jplag/semantics/NextOperation.java @@ -1,7 +1,6 @@ package de.jplag.semantics; public enum NextOperation { - NONE(false, false), READ(true, false), WRITE(false, true), READ_WRITE(true, true); diff --git a/language-api/src/main/java/de/jplag/semantics/Ordering.java b/language-api/src/main/java/de/jplag/semantics/Ordering.java new file mode 100644 index 000000000..24835e231 --- /dev/null +++ b/language-api/src/main/java/de/jplag/semantics/Ordering.java @@ -0,0 +1,29 @@ +package de.jplag.semantics; + +/** + * Enumerates how the order of an item in a sequence relative to other items may be relevant. + */ +public enum Ordering { + /** + * The order of the item relative to other items in the sequence is not relevant. + */ + NONE(0), + /** + * The order of the item relative to other items that also have partial ordering is relevant. + */ + PARTIAL(1), + /** + * The order of the item to all other items is relevant. + */ + FULL(2); + + private final int strength; + + Ordering(int strength) { + this.strength = strength; + } + + boolean isStrongerThan(Ordering other) { + return this.strength > other.strength; + } +} diff --git a/language-api/src/main/java/de/jplag/semantics/TokenSemantics.java b/language-api/src/main/java/de/jplag/semantics/TokenSemantics.java index 4e3d7fc5a..9a1fc9ec5 100644 --- a/language-api/src/main/java/de/jplag/semantics/TokenSemantics.java +++ b/language-api/src/main/java/de/jplag/semantics/TokenSemantics.java @@ -7,73 +7,146 @@ import java.util.Set; /** - * This record contains semantic information about the token. - * @param critical Whether the token is critical, e.g. whether it (potentially) has any non-local effects. - * @param control Whether the token controls the program flow. - * @param loopBegin Whether the token marks the beginning of a loop. - * @param loopEnd Whether the token marks the end of a loop - * @param reads A set of the variables which were (potentially) read from in this token. - * @param writes A set of the variables which were (potentially) written to in this token. + * This record contains semantic information about a code snippet, in our case either a token or a line of code. + * @param keep Whether the code snippet must be kept or if it may be removed. + * @param ordering In which way the ordering of the code snippet relative to other code snippets of the same type is + * relevant. For the possible options see {@link Ordering}. + * @param bidirectionalBlockRelation Which relation (if any) the code snippet has to bidirectional block, meaning a + * block where any statement within it may be executed after any other. This will typically be a loop. For the possible + * options see {@link BlockRelation}. + * @param reads A set of the variables which were (potentially) read from in the code snippet. + * @param writes A set of the variables which were (potentially) written to in the code snippet. */ -public record TokenSemantics(boolean critical, boolean control, boolean loopBegin, boolean loopEnd, Set reads, Set writes) { +public record TokenSemantics(boolean keep, Ordering ordering, BlockRelation bidirectionalBlockRelation, Set reads, Set writes) { - public void addRead(Variable read) { - reads.add(read); + private TokenSemantics(boolean keep, Ordering ordering, BlockRelation bidirectionalBlockRelation) { + this(keep, ordering, bidirectionalBlockRelation, new HashSet<>(), new HashSet<>()); } - public void addWrite(Variable write) { - writes.add(write); + /** + * Creates a new TokenSemantics instance with the following meaning: The token may be removed, and its order relative to + * other tokens may change. Example: An assignment to a local variable. + */ + public TokenSemantics() { + this(false, Ordering.NONE, BlockRelation.NONE); } /** - * @return an unmodifiable set of the variables which were (potentially) read from in this token. + * @return A new TokenSemantics instance with the following meaning: The token may not be removed, and its order + * relative to other tokens may change. Example: An attribute declaration. + */ + public static TokenSemantics createKeep() { + return new TokenSemantics(true, Ordering.NONE, BlockRelation.NONE); + } + + /** + * @return A new TokenSemantics instance with the following meaning: The token may not be removed, and its order must + * stay invariant to other tokens of the same type. Example: A method call which is guaranteed to not result in an + * exception. + */ + public static TokenSemantics createCritical() { + return new TokenSemantics(true, Ordering.PARTIAL, BlockRelation.NONE); + } + + /** + * @return A new TokenSemantics instance with the following meaning: The token may not be removed, and its order must + * stay invariant to all other tokens. Example: A return statement. + */ + public static TokenSemantics createControl() { + return new TokenSemantics(true, Ordering.FULL, BlockRelation.NONE); + } + + /** + * @return A new TokenSemantics instance with the following meaning: The token may not be removed, and its order must + * stay invariant to all other tokens, which also begins a bidirectional block. Example: The beginning of a while loop. + */ + public static TokenSemantics createLoopBegin() { + return new TokenSemantics(true, Ordering.FULL, BlockRelation.BEGINS_BLOCK); + } + + /** + * @return A new TokenSemantics instance with the following meaning: The token may not be removed, and its order must + * stay invariant to all other tokens, which also ends a bidirectional block. Example: The end of a while loop. + */ + public static TokenSemantics createLoopEnd() { + return new TokenSemantics(true, Ordering.FULL, BlockRelation.ENDS_BLOCK); + } + + /** + * Add a variable to the set of variables which were (potentially) read from in this code snippet. + * @param variable The variable which is added. + */ + public void addRead(Variable variable) { + reads.add(variable); + } + + /** + * Add a variable to the set of variables which were (potentially) written to in this code snippet. + * @param variable The variable which is added. + */ + public void addWrite(Variable variable) { + writes.add(variable); + } + + /** + * @return an unmodifiable set of the variables which were (potentially) read from in this code snippet. */ public Set reads() { return Collections.unmodifiableSet(reads); } /** - * @return an unmodifiable set of the variables which were (potentially) written to in this token. + * @return an unmodifiable set of the variables which were (potentially) written to in this code snippet. */ public Set writes() { return Collections.unmodifiableSet(writes); } + /** + * Create new joint semantics by joining a number of existing ones. It has the following properties: + *
    + *
  • keep is the disjunction of all keeps
  • + *
  • ordering is the strongest ordering out of all orderings
  • + *
  • bidirectionalBlockRelation is the one that is not NONE out of all bidirectionalBlockRelations if it exists. + * It's assumed that there is at most one. If there isn't one bidirectionalBlockRelation is NONE.
  • + *
  • reads is the union of all reads
  • + *
  • writes is the union of all writes
  • + *
+ * @param semanticsList A list of the semantics which should be joined. + * @return New semantics which were created by joining the elements in the semanticsList. + */ public static TokenSemantics join(List semanticsList) { + boolean keep = false; + Ordering ordering = Ordering.NONE; + BlockRelation bidirectionalBlockRelation = BlockRelation.NONE; Set reads = new HashSet<>(); Set writes = new HashSet<>(); - TokenSemanticsBuilder semanticsBuilder = new TokenSemanticsBuilder(); for (TokenSemantics semantics : semanticsList) { - if (semantics.critical) - semanticsBuilder.critical(); - if (semantics.control) - semanticsBuilder.control(); - if (semantics.loopBegin) - semanticsBuilder.loopBegin(); - if (semantics.loopEnd) - semanticsBuilder.loopEnd(); + keep = keep || semantics.keep(); + if (semantics.ordering.isStrongerThan(ordering)) { + ordering = semantics.ordering; + } + if (semantics.bidirectionalBlockRelation != BlockRelation.NONE) { + assert bidirectionalBlockRelation == BlockRelation.NONE; // only one block begin/end per line + bidirectionalBlockRelation = semantics.bidirectionalBlockRelation; + } reads.addAll(semantics.reads); writes.addAll(semantics.writes); } - TokenSemantics semantics = semanticsBuilder.build(); - for (Variable r : reads) - semantics.addRead(r); - for (Variable w : writes) - semantics.addWrite(w); - return semantics; + return new TokenSemantics(keep, ordering, bidirectionalBlockRelation, reads, writes); } @Override public String toString() { List properties = new LinkedList<>(); - if (critical) - properties.add("critical"); - if (control) - properties.add("control"); - if (loopBegin) - properties.add("loop begin"); - if (loopEnd) - properties.add("loop end"); + if (keep) + properties.add("keep"); + if (ordering != Ordering.NONE) + properties.add(ordering.name().toLowerCase() + " ordering"); + if (bidirectionalBlockRelation != BlockRelation.NONE) { + String keyword = bidirectionalBlockRelation.name().toLowerCase().split("_")[0]; + properties.add(keyword + " bidirectional block"); + } if (!reads.isEmpty()) properties.add("read " + String.join(" ", reads.stream().map(Variable::toString).toList())); if (!writes.isEmpty()) diff --git a/language-api/src/main/java/de/jplag/semantics/TokenSemanticsBuilder.java b/language-api/src/main/java/de/jplag/semantics/TokenSemanticsBuilder.java deleted file mode 100644 index 8f33617d3..000000000 --- a/language-api/src/main/java/de/jplag/semantics/TokenSemanticsBuilder.java +++ /dev/null @@ -1,51 +0,0 @@ -package de.jplag.semantics; - -import java.util.HashSet; -import java.util.Set; - -/** - * A builder class for the TokenSemantics record. - */ -public class TokenSemanticsBuilder { - private boolean critical; - private boolean control; - private boolean loopBegin; - private boolean loopEnd; - private Set reads; - private Set writes; - - public TokenSemanticsBuilder() { - this.reads = new HashSet<>(); - this.writes = new HashSet<>(); - } - - public TokenSemantics build() { - return new TokenSemantics(critical, control, loopBegin, loopEnd, reads, writes); - } - - public TokenSemanticsBuilder critical(boolean condition) { - if (condition) { - this.critical = true; - } - return this; - } - - public TokenSemanticsBuilder critical() { - return critical(true); - } - - public TokenSemanticsBuilder control() { - this.control = true; - return this; - } - - public TokenSemanticsBuilder loopBegin() { - this.loopBegin = true; - return this; - } - - public TokenSemanticsBuilder loopEnd() { - this.loopEnd = true; - return this; - } -} diff --git a/language-api/src/main/java/de/jplag/semantics/Variable.java b/language-api/src/main/java/de/jplag/semantics/Variable.java index 3758512a7..d41eb49be 100644 --- a/language-api/src/main/java/de/jplag/semantics/Variable.java +++ b/language-api/src/main/java/de/jplag/semantics/Variable.java @@ -1,5 +1,8 @@ package de.jplag.semantics; +/** + * Each variable has its unique identity, important for tracing in graph (NormalizationGraph::spreadKeep). + */ public class Variable { private final String name; private final boolean isOwnMember; @@ -17,6 +20,6 @@ boolean isMutable() { @Override public String toString() { - return "%s[%s]".formatted(isOwnMember ? "this." : "" + name, isMutable ? "mut" : "non"); + return (isOwnMember ? "this." : "") + name + (isMutable ? "*" : ""); } } diff --git a/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java b/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java index d72e5ef67..91c993229 100644 --- a/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java +++ b/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java @@ -54,18 +54,16 @@ private Variable getVariable(String variableName) { return getMemberVariable(variableName); } - public Variable registerMemberVariable(String variableName, boolean mutable) { + public void registerMemberVariable(String variableName, boolean mutable) { Variable variable = new Variable(variableName, true, mutable); memberVariables.put(variableName, variable); - return variable; } - public Variable registerLocalVariable(String variableName, boolean mutable) { + public void registerLocalVariable(String variableName, boolean mutable) { Variable variable = new Variable(variableName, false, mutable); localVariables.putIfAbsent(variableName, new Stack<>()); localVariables.get(variableName).push(variable); localVariablesByScope.peek().add(variableName); - return variable; } public void addAllMemberVariablesAsReads(TokenSemantics semantics) { diff --git a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java index 8ae98838b..7b0d34daa 100644 --- a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java +++ b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java @@ -10,8 +10,6 @@ import de.jplag.TokenType; import de.jplag.semantics.NextOperation; import de.jplag.semantics.TokenSemantics; -import de.jplag.semantics.TokenSemanticsBuilder; -import de.jplag.semantics.Variable; import de.jplag.semantics.VariableRegistry; import com.sun.source.tree.AnnotationTree; @@ -145,7 +143,7 @@ public Void visitClass(ClassTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - semantics = new TokenSemanticsBuilder().control().critical().build(); + semantics = TokenSemantics.createControl(); if (node.getKind() == Tree.Kind.ENUM) { addToken(JavaTokenType.J_ENUM_BEGIN, start, 4, semantics); } else if (node.getKind() == Tree.Kind.INTERFACE) { @@ -173,7 +171,7 @@ public Void visitClass(ClassTree node, TokenSemantics semantics) { default -> null; }; if (tokenType != null) { - semantics = new TokenSemanticsBuilder().control().critical().build(); + semantics = TokenSemantics.createControl(); addToken(tokenType, end, 1, semantics); } variableRegistry.clearMemberVariables(); @@ -183,7 +181,7 @@ public Void visitClass(ClassTree node, TokenSemantics semantics) { @Override public Void visitImport(ImportTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); - semantics = new TokenSemanticsBuilder().control().critical().build(); + semantics = TokenSemantics.createKeep(); addToken(JavaTokenType.J_IMPORT, start, 6, semantics); super.visitImport(node, semantics); return null; @@ -192,7 +190,7 @@ public Void visitImport(ImportTree node, TokenSemantics semantics) { @Override public Void visitPackage(PackageTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); - semantics = new TokenSemanticsBuilder().control().critical().build(); + semantics = TokenSemantics.createControl(); addToken(JavaTokenType.J_PACKAGE, start, 7, semantics); super.visitPackage(node, semantics); return null; @@ -203,7 +201,7 @@ public Void visitMethod(MethodTree node, TokenSemantics semantics) { variableRegistry.enterLocalScope(); long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - semantics = new TokenSemanticsBuilder().control().critical().build(); + semantics = TokenSemantics.createControl(); addToken(JavaTokenType.J_METHOD_BEGIN, start, node.getName().length(), semantics); scan(node.getModifiers(), semantics); scan(node.getReturnType(), semantics); @@ -212,7 +210,7 @@ public Void visitMethod(MethodTree node, TokenSemantics semantics) { scan(node.getReceiverParameter(), semantics); scan(node.getThrows(), semantics); scan(node.getBody(), null); - semantics = new TokenSemanticsBuilder().control().critical().build(); + semantics = TokenSemantics.createControl(); variableRegistry.addAllMemberVariablesAsReads(semantics); addToken(JavaTokenType.J_METHOD_END, end, 1, semantics); variableRegistry.exitLocalScope(); @@ -223,10 +221,10 @@ public Void visitMethod(MethodTree node, TokenSemantics semantics) { public Void visitSynchronized(SynchronizedTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - semantics = new TokenSemanticsBuilder().control().critical().build(); + semantics = TokenSemantics.createControl(); addToken(JavaTokenType.J_SYNC_BEGIN, start, 12, semantics); super.visitSynchronized(node, semantics); - semantics = new TokenSemanticsBuilder().control().critical().build(); + semantics = TokenSemantics.createControl(); addToken(JavaTokenType.J_SYNC_END, end, 1, semantics); return null; } @@ -235,10 +233,10 @@ public Void visitSynchronized(SynchronizedTree node, TokenSemantics semantics) { public Void visitDoWhileLoop(DoWhileLoopTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - semantics = new TokenSemanticsBuilder().control().loopBegin().build(); + semantics = TokenSemantics.createLoopBegin(); addToken(JavaTokenType.J_DO_BEGIN, start, 2, semantics); scan(node.getStatement(), null); - semantics = new TokenSemanticsBuilder().control().loopEnd().build(); + semantics = TokenSemantics.createLoopEnd(); addToken(JavaTokenType.J_DO_END, end, 1, semantics); scan(node.getCondition(), semantics); return null; @@ -248,11 +246,11 @@ public Void visitDoWhileLoop(DoWhileLoopTree node, TokenSemantics semantics) { public Void visitWhileLoop(WhileLoopTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - semantics = new TokenSemanticsBuilder().control().loopBegin().build(); + semantics = TokenSemantics.createLoopBegin(); addToken(JavaTokenType.J_WHILE_BEGIN, start, 5, semantics); scan(node.getCondition(), semantics); scan(node.getStatement(), null); - semantics = new TokenSemanticsBuilder().control().loopEnd().build(); + semantics = TokenSemantics.createLoopEnd(); addToken(JavaTokenType.J_WHILE_END, end, 1, semantics); return null; } @@ -262,13 +260,13 @@ public Void visitForLoop(ForLoopTree node, TokenSemantics semantics) { variableRegistry.enterLocalScope(); long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - semantics = new TokenSemanticsBuilder().control().loopBegin().build(); + semantics = TokenSemantics.createLoopBegin(); addToken(JavaTokenType.J_FOR_BEGIN, start, 3, semantics); scan(node.getInitializer(), semantics); scan(node.getCondition(), semantics); scan(node.getUpdate(), semantics); scan(node.getStatement(), null); - semantics = new TokenSemanticsBuilder().control().loopEnd().build(); + semantics = TokenSemantics.createLoopEnd(); addToken(JavaTokenType.J_FOR_END, end, 1, semantics); variableRegistry.exitLocalScope(); return null; @@ -279,12 +277,12 @@ public Void visitEnhancedForLoop(EnhancedForLoopTree node, TokenSemantics semant variableRegistry.enterLocalScope(); long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - semantics = new TokenSemanticsBuilder().control().loopBegin().build(); + semantics = TokenSemantics.createLoopBegin(); addToken(JavaTokenType.J_FOR_BEGIN, start, 3, semantics); scan(node.getVariable(), semantics); scan(node.getExpression(), semantics); scan(node.getStatement(), null); - semantics = new TokenSemanticsBuilder().control().loopEnd().build(); + semantics = TokenSemantics.createLoopEnd(); addToken(JavaTokenType.J_FOR_END, end, 1, semantics); variableRegistry.exitLocalScope(); return null; @@ -294,11 +292,11 @@ public Void visitEnhancedForLoop(EnhancedForLoopTree node, TokenSemantics semant public Void visitSwitch(SwitchTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - semantics = new TokenSemanticsBuilder().control().build(); + semantics = TokenSemantics.createControl(); addToken(JavaTokenType.J_SWITCH_BEGIN, start, 6, semantics); scan(node.getExpression(), semantics); scan(node.getCases(), null); - semantics = new TokenSemanticsBuilder().control().build(); + semantics = TokenSemantics.createControl(); addToken(JavaTokenType.J_SWITCH_END, end, 1, semantics); return null; } @@ -307,11 +305,11 @@ public Void visitSwitch(SwitchTree node, TokenSemantics semantics) { public Void visitSwitchExpression(SwitchExpressionTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - semantics = new TokenSemanticsBuilder().control().build(); + semantics = TokenSemantics.createControl(); addToken(JavaTokenType.J_SWITCH_BEGIN, start, 6, semantics); scan(node.getExpression(), semantics); scan(node.getCases(), null); - semantics = new TokenSemanticsBuilder().control().build(); + semantics = TokenSemantics.createControl(); addToken(JavaTokenType.J_SWITCH_END, end, 1, semantics); return null; } @@ -319,7 +317,7 @@ public Void visitSwitchExpression(SwitchExpressionTree node, TokenSemantics sema @Override public Void visitCase(CaseTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); - semantics = new TokenSemanticsBuilder().control().build(); + semantics = TokenSemantics.createControl(); addToken(JavaTokenType.J_CASE, start, 4, semantics); scan(node.getExpressions(), semantics); if (node.getCaseKind() == CaseTree.CaseKind.RULE) { @@ -333,21 +331,21 @@ public Void visitCase(CaseTree node, TokenSemantics semantics) { @Override public Void visitTry(TryTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); - semantics = new TokenSemanticsBuilder().control().build(); + semantics = TokenSemantics.createControl(); addToken(JavaTokenType.J_TRY_BEGIN, start, 3, semantics); scan(node.getResources(), semantics); scan(node.getBlock(), null); long end = positions.getEndPosition(ast, node); - semantics = new TokenSemanticsBuilder().control().build(); + semantics = TokenSemantics.createControl(); addToken(JavaTokenType.J_TRY_END, end, 1, semantics); scan(node.getCatches(), null); if (node.getFinallyBlock() != null) { start = positions.getStartPosition(ast, node.getFinallyBlock()); - semantics = new TokenSemanticsBuilder().control().build(); + semantics = TokenSemantics.createControl(); addToken(JavaTokenType.J_FINALLY_BEGIN, start, 3, semantics); scan(node.getFinallyBlock(), null); end = positions.getEndPosition(ast, node.getFinallyBlock()); - semantics = new TokenSemanticsBuilder().control().build(); + semantics = TokenSemantics.createControl(); addToken(JavaTokenType.J_FINALLY_END, end, 1, semantics); } return null; // return value isn't used @@ -358,10 +356,10 @@ public Void visitCatch(CatchTree node, TokenSemantics semantics) { variableRegistry.enterLocalScope(); long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - semantics = new TokenSemanticsBuilder().control().build(); + semantics = TokenSemantics.createControl(); addToken(JavaTokenType.J_CATCH_BEGIN, start, 5, semantics); super.visitCatch(node, null); // can leave this since catch parameter is variable declaration and thus always generates a token - semantics = new TokenSemanticsBuilder().control().build(); + semantics = TokenSemantics.createControl(); addToken(JavaTokenType.J_CATCH_END, end, 1, semantics); variableRegistry.exitLocalScope(); return null; @@ -371,17 +369,17 @@ public Void visitCatch(CatchTree node, TokenSemantics semantics) { public Void visitIf(IfTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - semantics = new TokenSemanticsBuilder().control().build(); + semantics = TokenSemantics.createControl(); addToken(JavaTokenType.J_IF_BEGIN, start, 2, semantics); scan(node.getCondition(), semantics); scan(node.getThenStatement(), null); if (node.getElseStatement() != null) { start = positions.getStartPosition(ast, node.getElseStatement()); - semantics = new TokenSemanticsBuilder().control().build(); + semantics = TokenSemantics.createControl(); addToken(JavaTokenType.J_ELSE, start, 4, semantics); } scan(node.getElseStatement(), null); - semantics = new TokenSemanticsBuilder().control().build(); + semantics = TokenSemantics.createControl(); addToken(JavaTokenType.J_IF_END, end, 1, semantics); return null; } @@ -389,7 +387,7 @@ public Void visitIf(IfTree node, TokenSemantics semantics) { @Override public Void visitBreak(BreakTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); - semantics = new TokenSemanticsBuilder().control().build(); + semantics = TokenSemantics.createControl(); addToken(JavaTokenType.J_BREAK, start, 5, semantics); super.visitBreak(node, semantics); return null; @@ -398,7 +396,7 @@ public Void visitBreak(BreakTree node, TokenSemantics semantics) { @Override public Void visitContinue(ContinueTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); - semantics = new TokenSemanticsBuilder().control().build(); + semantics = TokenSemantics.createControl(); addToken(JavaTokenType.J_CONTINUE, start, 8, semantics); super.visitContinue(node, semantics); return null; @@ -407,7 +405,7 @@ public Void visitContinue(ContinueTree node, TokenSemantics semantics) { @Override public Void visitReturn(ReturnTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); - semantics = new TokenSemanticsBuilder().control().critical().build(); + semantics = TokenSemantics.createControl(); addToken(JavaTokenType.J_RETURN, start, 6, semantics); super.visitReturn(node, semantics); return null; @@ -416,7 +414,7 @@ public Void visitReturn(ReturnTree node, TokenSemantics semantics) { @Override public Void visitThrow(ThrowTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); - semantics = new TokenSemanticsBuilder().control().critical().build(); + semantics = TokenSemantics.createControl(); addToken(JavaTokenType.J_THROW, start, 5, semantics); super.visitThrow(node, semantics); return null; @@ -426,10 +424,10 @@ public Void visitThrow(ThrowTree node, TokenSemantics semantics) { public Void visitNewClass(NewClassTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); if (node.getTypeArguments().size() > 0) { - semantics = new TokenSemanticsBuilder().build(); + semantics = new TokenSemantics(); addToken(JavaTokenType.J_GENERIC, start, 3 + node.getIdentifier().toString().length(), semantics); } - semantics = new TokenSemanticsBuilder().build(); + semantics = new TokenSemantics(); addToken(JavaTokenType.J_NEWCLASS, start, 3, semantics); super.visitNewClass(node, semantics); return null; @@ -439,7 +437,7 @@ public Void visitNewClass(NewClassTree node, TokenSemantics semantics) { public Void visitTypeParameter(TypeParameterTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); // This is odd, but also done like this in Java 1.7 - semantics = new TokenSemanticsBuilder().build(); + semantics = new TokenSemantics(); addToken(JavaTokenType.J_GENERIC, start, 1, semantics); super.visitTypeParameter(node, semantics); return null; @@ -449,20 +447,20 @@ public Void visitTypeParameter(TypeParameterTree node, TokenSemantics semantics) public Void visitNewArray(NewArrayTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - semantics = new TokenSemanticsBuilder().build(); + semantics = new TokenSemantics(); addToken(JavaTokenType.J_NEWARRAY, start, 3, semantics); scan(node.getType(), semantics); scan(node.getDimensions(), semantics); boolean hasInit = node.getInitializers() != null && !node.getInitializers().isEmpty(); if (hasInit) { start = positions.getStartPosition(ast, node.getInitializers().get(0)); - semantics = new TokenSemanticsBuilder().build(); + semantics = new TokenSemantics(); addToken(JavaTokenType.J_ARRAY_INIT_BEGIN, start, 1, semantics); } scan(node.getInitializers(), semantics); // super method has annotation processing but we have it disabled anyways if (hasInit) { - semantics = new TokenSemanticsBuilder().build(); + semantics = new TokenSemantics(); addToken(JavaTokenType.J_ARRAY_INIT_END, end, 1, semantics); } return null; @@ -471,8 +469,8 @@ public Void visitNewArray(NewArrayTree node, TokenSemantics semantics) { @Override public Void visitAssignment(AssignmentTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); - // todo may need to be critical when non-registered (global) variables are involved, not sure how to check - semantics = new TokenSemanticsBuilder().build(); + // todo may need to be keep when non-registered (global) variables are involved, not sure how to check + semantics = new TokenSemantics(); addToken(JavaTokenType.J_ASSIGN, start, 1, semantics); variableRegistry.setNextOperation(NextOperation.WRITE); super.visitAssignment(node, semantics); @@ -483,7 +481,7 @@ public Void visitAssignment(AssignmentTree node, TokenSemantics semantics) { @Override public Void visitCompoundAssignment(CompoundAssignmentTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); - semantics = new TokenSemanticsBuilder().build(); + semantics = new TokenSemantics(); addToken(JavaTokenType.J_ASSIGN, start, 1, semantics); variableRegistry.setNextOperation(NextOperation.READ_WRITE); super.visitCompoundAssignment(node, semantics); @@ -492,7 +490,7 @@ public Void visitCompoundAssignment(CompoundAssignmentTree node, TokenSemantics @Override public Void visitUnary(UnaryTree node, TokenSemantics semantics) { - semantics = new TokenSemanticsBuilder().build(); + semantics = new TokenSemantics(); if (Set.of(Tree.Kind.PREFIX_INCREMENT, Tree.Kind.POSTFIX_INCREMENT, Tree.Kind.PREFIX_DECREMENT, Tree.Kind.POSTFIX_DECREMENT) .contains(node.getKind())) { long start = positions.getStartPosition(ast, node); @@ -506,7 +504,7 @@ public Void visitUnary(UnaryTree node, TokenSemantics semantics) { @Override public Void visitAssert(AssertTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); - semantics = new TokenSemanticsBuilder().control().critical().build(); + semantics = TokenSemantics.createControl(); addToken(JavaTokenType.J_ASSERT, start, 6, semantics); super.visitAssert(node, semantics); return null; @@ -515,17 +513,18 @@ public Void visitAssert(AssertTree node, TokenSemantics semantics) { @Override public Void visitVariable(VariableTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); - // member variable defs are critical + String name = node.getName().toString(); boolean inLocalScope = variableRegistry.inLocalScope(); - semantics = new TokenSemanticsBuilder().critical(!inLocalScope).build(); - if (inLocalScope) { - String name = node.getName().toString(); boolean mutable = isMutable(node.getType()); - Variable variable = variableRegistry.registerLocalVariable(name, mutable); - semantics.addWrite(variable); // manually add variable to semantics since identifier isn't visited - } // no else since member variable defs are registered on class visit - + variableRegistry.registerLocalVariable(name, mutable); + semantics = new TokenSemantics(); + } else { + semantics = TokenSemantics.createKeep(); + } + variableRegistry.setNextOperation(NextOperation.WRITE); + // manually add variable to semantics since identifier isn't visited + variableRegistry.registerVariableOperation(name, !inLocalScope, semantics); addToken(JavaTokenType.J_VARDEF, start, node.toString().length(), semantics); super.visitVariable(node, semantics); return null; @@ -534,7 +533,7 @@ public Void visitVariable(VariableTree node, TokenSemantics semantics) { @Override public Void visitConditionalExpression(ConditionalExpressionTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); - semantics = new TokenSemanticsBuilder().build(); + semantics = new TokenSemantics(); addToken(JavaTokenType.J_COND, start, 1, semantics); super.visitConditionalExpression(node, semantics); return null; @@ -543,7 +542,7 @@ public Void visitConditionalExpression(ConditionalExpressionTree node, TokenSema @Override public Void visitMethodInvocation(MethodInvocationTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); - semantics = new TokenSemanticsBuilder().critical().control().build(); + semantics = TokenSemantics.createControl(); variableRegistry.addAllMemberVariablesAsReads(semantics); addToken(JavaTokenType.J_APPLY, start, positions.getEndPosition(ast, node.getMethodSelect()) - start, semantics); scan(node.getTypeArguments(), semantics); @@ -560,7 +559,7 @@ public Void visitMethodInvocation(MethodInvocationTree node, TokenSemantics sema @Override public Void visitAnnotation(AnnotationTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); - semantics = new TokenSemanticsBuilder().build(); + semantics = new TokenSemantics(); addToken(JavaTokenType.J_ANNO, start, 1, semantics); super.visitAnnotation(node, semantics); return null; @@ -570,10 +569,10 @@ public Void visitAnnotation(AnnotationTree node, TokenSemantics semantics) { public Void visitModule(ModuleTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - semantics = new TokenSemanticsBuilder().critical().control().build(); + semantics = TokenSemantics.createControl(); addToken(JavaTokenType.J_MODULE_BEGIN, start, 6, semantics); super.visitModule(node, null); - semantics = new TokenSemanticsBuilder().critical().control().build(); + semantics = TokenSemantics.createControl(); addToken(JavaTokenType.J_MODULE_END, end, 1, semantics); return null; } @@ -581,7 +580,7 @@ public Void visitModule(ModuleTree node, TokenSemantics semantics) { @Override public Void visitRequires(RequiresTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); - semantics = new TokenSemanticsBuilder().critical().control().build(); + semantics = TokenSemantics.createControl(); addToken(JavaTokenType.J_REQUIRES, start, 8, semantics); super.visitRequires(node, semantics); return null; @@ -590,7 +589,7 @@ public Void visitRequires(RequiresTree node, TokenSemantics semantics) { @Override public Void visitProvides(ProvidesTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); - semantics = new TokenSemanticsBuilder().critical().control().build(); + semantics = TokenSemantics.createControl(); addToken(JavaTokenType.J_PROVIDES, start, 8, semantics); super.visitProvides(node, semantics); return null; @@ -599,7 +598,7 @@ public Void visitProvides(ProvidesTree node, TokenSemantics semantics) { @Override public Void visitExports(ExportsTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); - semantics = new TokenSemanticsBuilder().critical().control().build(); + semantics = TokenSemantics.createControl(); addToken(JavaTokenType.J_EXPORTS, start, 7, semantics); super.visitExports(node, semantics); return null; @@ -616,7 +615,7 @@ public Void visitErroneous(ErroneousTree node, TokenSemantics semantics) { public Void visitYield(YieldTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node); - semantics = new TokenSemanticsBuilder().control().build(); + semantics = TokenSemantics.createControl(); addToken(JavaTokenType.J_YIELD, start, end, semantics); super.visitYield(node, semantics); return null; @@ -626,7 +625,7 @@ public Void visitYield(YieldTree node, TokenSemantics semantics) { public Void visitDefaultCaseLabel(DefaultCaseLabelTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node); - semantics = new TokenSemanticsBuilder().control().build(); + semantics = TokenSemantics.createControl(); addToken(JavaTokenType.J_DEFAULT, start, end, semantics); super.visitDefaultCaseLabel(node, semantics); return null; From 51f234a0ba573131dfdedc77e1681fd69155739d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Sat, 4 Mar 2023 18:28:05 +0100 Subject: [PATCH 031/132] Rename TokenSemantics to CodeSemantics --- .../de/jplag/normalization/TokenLine.java | 8 +- .../src/main/java/de/jplag/Token.java | 10 +- ...TokenSemantics.java => CodeSemantics.java} | 67 +++--- .../de/jplag/semantics/VariableRegistry.java | 4 +- .../java/TokenGeneratingTreeScanner.java | 202 +++++++++--------- 5 files changed, 145 insertions(+), 146 deletions(-) rename language-api/src/main/java/de/jplag/semantics/{TokenSemantics.java => CodeSemantics.java} (58%) diff --git a/core/src/main/java/de/jplag/normalization/TokenLine.java b/core/src/main/java/de/jplag/normalization/TokenLine.java index e6ceb99c0..95325b186 100644 --- a/core/src/main/java/de/jplag/normalization/TokenLine.java +++ b/core/src/main/java/de/jplag/normalization/TokenLine.java @@ -5,19 +5,19 @@ import java.util.List; import de.jplag.Token; -import de.jplag.semantics.TokenSemantics; +import de.jplag.semantics.CodeSemantics; class TokenLine implements Comparable { private final List tokens; private final int lineNumber; - private final TokenSemantics semantics; + private final CodeSemantics semantics; private boolean keep; TokenLine(List tokens, int lineNumber) { this.tokens = Collections.unmodifiableList(tokens); this.lineNumber = lineNumber; - semantics = TokenSemantics.join(tokens.stream().map(Token::getSemantics).toList()); + semantics = CodeSemantics.join(tokens.stream().map(Token::getSemantics).toList()); keep = semantics.keep(); } @@ -25,7 +25,7 @@ public List tokens() { return tokens; } - public TokenSemantics semantics() { + public CodeSemantics semantics() { return semantics; } diff --git a/language-api/src/main/java/de/jplag/Token.java b/language-api/src/main/java/de/jplag/Token.java index 89f425ed1..29a91610b 100644 --- a/language-api/src/main/java/de/jplag/Token.java +++ b/language-api/src/main/java/de/jplag/Token.java @@ -5,7 +5,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import de.jplag.semantics.TokenSemantics; +import de.jplag.semantics.CodeSemantics; /** * This class represents a token in a source code. It can represent keywords, identifiers, syntactical structures etc. @@ -22,7 +22,7 @@ public class Token { private int length; private File file; private TokenType type; - private TokenSemantics semantics; // value null if no semantics + private CodeSemantics semantics; // value null if no semantics /** * Creates a token with column and length information. @@ -55,7 +55,7 @@ public Token(TokenType type, File file, int line, int column, int length) { * @param length is the length of the token in the source code. * @param semantics is a record containing semantic information about the token. */ - public Token(TokenType type, File file, int line, int column, int length, TokenSemantics semantics) { + public Token(TokenType type, File file, int line, int column, int length, CodeSemantics semantics) { this(type, file, line, column, length); this.semantics = semantics; } @@ -74,7 +74,7 @@ public static Token fileEnd(File file) { * @param file is the name of the source code file. */ public static Token semanticFileEnd(File file) { - TokenSemantics semantics = TokenSemantics.createControl(); + CodeSemantics semantics = CodeSemantics.createControl(); return new Token(SharedTokenType.FILE_END, file, NO_VALUE, NO_VALUE, NO_VALUE, semantics); } @@ -124,7 +124,7 @@ public String toString() { /** * @return the semantics of the token. */ - public TokenSemantics getSemantics() { + public CodeSemantics getSemantics() { return semantics; } } diff --git a/language-api/src/main/java/de/jplag/semantics/TokenSemantics.java b/language-api/src/main/java/de/jplag/semantics/CodeSemantics.java similarity index 58% rename from language-api/src/main/java/de/jplag/semantics/TokenSemantics.java rename to language-api/src/main/java/de/jplag/semantics/CodeSemantics.java index 9a1fc9ec5..55e6ef79c 100644 --- a/language-api/src/main/java/de/jplag/semantics/TokenSemantics.java +++ b/language-api/src/main/java/de/jplag/semantics/CodeSemantics.java @@ -11,65 +11,64 @@ * @param keep Whether the code snippet must be kept or if it may be removed. * @param ordering In which way the ordering of the code snippet relative to other code snippets of the same type is * relevant. For the possible options see {@link Ordering}. - * @param bidirectionalBlockRelation Which relation (if any) the code snippet has to bidirectional block, meaning a - * block where any statement within it may be executed after any other. This will typically be a loop. For the possible - * options see {@link BlockRelation}. + * @param bidirectionalBlockRelation Which relation the code snippet has to bidirectional block, meaning a block where + * any statement within it may be executed after any other. This will typically be a loop. For the possible options see + * {@link BlockRelation}. * @param reads A set of the variables which were (potentially) read from in the code snippet. * @param writes A set of the variables which were (potentially) written to in the code snippet. */ -public record TokenSemantics(boolean keep, Ordering ordering, BlockRelation bidirectionalBlockRelation, Set reads, Set writes) { +public record CodeSemantics(boolean keep, Ordering ordering, BlockRelation bidirectionalBlockRelation, Set reads, Set writes) { - private TokenSemantics(boolean keep, Ordering ordering, BlockRelation bidirectionalBlockRelation) { + private CodeSemantics(boolean keep, Ordering ordering, BlockRelation bidirectionalBlockRelation) { this(keep, ordering, bidirectionalBlockRelation, new HashSet<>(), new HashSet<>()); } /** - * Creates a new TokenSemantics instance with the following meaning: The token may be removed, and its order relative to - * other tokens may change. Example: An assignment to a local variable. + * Creates new semantics with the following meaning: The token may be removed, and its order relative to other tokens + * may change. Example: An assignment to a local variable. */ - public TokenSemantics() { + public CodeSemantics() { this(false, Ordering.NONE, BlockRelation.NONE); } /** - * @return A new TokenSemantics instance with the following meaning: The token may not be removed, and its order - * relative to other tokens may change. Example: An attribute declaration. + * @return new semantics with the following meaning: The token may not be removed, and its order relative to other + * tokens may change. Example: An attribute declaration. */ - public static TokenSemantics createKeep() { - return new TokenSemantics(true, Ordering.NONE, BlockRelation.NONE); + public static CodeSemantics createKeep() { + return new CodeSemantics(true, Ordering.NONE, BlockRelation.NONE); } /** - * @return A new TokenSemantics instance with the following meaning: The token may not be removed, and its order must - * stay invariant to other tokens of the same type. Example: A method call which is guaranteed to not result in an - * exception. + * @return new semantics with the following meaning: The token may not be removed, and its order must stay invariant to + * other tokens of the same type. Example: A method call which is guaranteed to not result in an exception. */ - public static TokenSemantics createCritical() { - return new TokenSemantics(true, Ordering.PARTIAL, BlockRelation.NONE); + public static CodeSemantics createCritical() { + return new CodeSemantics(true, Ordering.PARTIAL, BlockRelation.NONE); } /** - * @return A new TokenSemantics instance with the following meaning: The token may not be removed, and its order must - * stay invariant to all other tokens. Example: A return statement. + * @return new semantics with the following meaning: The token may not be removed, and its order must stay invariant to + * all other tokens. Example: A return statement. */ - public static TokenSemantics createControl() { - return new TokenSemantics(true, Ordering.FULL, BlockRelation.NONE); + public static CodeSemantics createControl() { + return new CodeSemantics(true, Ordering.FULL, BlockRelation.NONE); } /** - * @return A new TokenSemantics instance with the following meaning: The token may not be removed, and its order must - * stay invariant to all other tokens, which also begins a bidirectional block. Example: The beginning of a while loop. + * @return new semantics with the following meaning: The token may not be removed, and its order must stay invariant to + * all other tokens, which also begins a bidirectional block. Example: The beginning of a while loop. */ - public static TokenSemantics createLoopBegin() { - return new TokenSemantics(true, Ordering.FULL, BlockRelation.BEGINS_BLOCK); + public static CodeSemantics createLoopBegin() { + return new CodeSemantics(true, Ordering.FULL, BlockRelation.BEGINS_BLOCK); } /** - * @return A new TokenSemantics instance with the following meaning: The token may not be removed, and its order must - * stay invariant to all other tokens, which also ends a bidirectional block. Example: The end of a while loop. + * @return new semantics with the following meaning: The token may not be removed, and its order must stay invariant to + * all other tokens, which also ends a bidirectional block. Example: The end of a while loop. */ - public static TokenSemantics createLoopEnd() { - return new TokenSemantics(true, Ordering.FULL, BlockRelation.ENDS_BLOCK); + public static CodeSemantics createLoopEnd() { + return new CodeSemantics(true, Ordering.FULL, BlockRelation.ENDS_BLOCK); } /** @@ -107,21 +106,21 @@ public Set writes() { *
    *
  • keep is the disjunction of all keeps
  • *
  • ordering is the strongest ordering out of all orderings
  • - *
  • bidirectionalBlockRelation is the one that is not NONE out of all bidirectionalBlockRelations if it exists. - * It's assumed that there is at most one. If there isn't one bidirectionalBlockRelation is NONE.
  • + *
  • bidirectionalBlockRelation is the one that is not NONE out of all bidirectionalBlockRelations if it exists. It's + * assumed that there is at most one. If there isn't one bidirectionalBlockRelation is NONE.
  • *
  • reads is the union of all reads
  • *
  • writes is the union of all writes
  • *
* @param semanticsList A list of the semantics which should be joined. * @return New semantics which were created by joining the elements in the semanticsList. */ - public static TokenSemantics join(List semanticsList) { + public static CodeSemantics join(List semanticsList) { boolean keep = false; Ordering ordering = Ordering.NONE; BlockRelation bidirectionalBlockRelation = BlockRelation.NONE; Set reads = new HashSet<>(); Set writes = new HashSet<>(); - for (TokenSemantics semantics : semanticsList) { + for (CodeSemantics semantics : semanticsList) { keep = keep || semantics.keep(); if (semantics.ordering.isStrongerThan(ordering)) { ordering = semantics.ordering; @@ -133,7 +132,7 @@ public static TokenSemantics join(List semanticsList) { reads.addAll(semantics.reads); writes.addAll(semantics.writes); } - return new TokenSemantics(keep, ordering, bidirectionalBlockRelation, reads, writes); + return new CodeSemantics(keep, ordering, bidirectionalBlockRelation, reads, writes); } @Override diff --git a/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java b/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java index 91c993229..bf19cc99a 100644 --- a/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java +++ b/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java @@ -66,7 +66,7 @@ public void registerLocalVariable(String variableName, boolean mutable) { localVariablesByScope.peek().add(variableName); } - public void addAllMemberVariablesAsReads(TokenSemantics semantics) { + public void addAllMemberVariablesAsReads(CodeSemantics semantics) { for (Variable memberVar : memberVariables.values()) { semantics.addRead(memberVar); } @@ -76,7 +76,7 @@ public void clearMemberVariables() { memberVariables.clear(); } - public void registerVariableOperation(String variableName, boolean isOwnMember, TokenSemantics semantics) { + public void registerVariableOperation(String variableName, boolean isOwnMember, CodeSemantics semantics) { if (!ignoreNextOperation) { Variable variable = isOwnMember ? getMemberVariable(variableName) : getVariable(variableName); if (variable != null) { diff --git a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java index 7b0d34daa..2f24a4e08 100644 --- a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java +++ b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java @@ -8,8 +8,8 @@ import de.jplag.ParsingException; import de.jplag.Token; import de.jplag.TokenType; +import de.jplag.semantics.CodeSemantics; import de.jplag.semantics.NextOperation; -import de.jplag.semantics.TokenSemantics; import de.jplag.semantics.VariableRegistry; import com.sun.source.tree.AnnotationTree; @@ -58,7 +58,7 @@ import com.sun.source.util.SourcePositions; import com.sun.source.util.TreeScanner; -final class TokenGeneratingTreeScanner extends TreeScanner { +final class TokenGeneratingTreeScanner extends TreeScanner { private final File file; private final Parser parser; private final LineMap map; @@ -87,7 +87,7 @@ public List getParsingExceptions() { return parsingExceptions; } - public void addToken(TokenType type, File file, long line, long column, long length, TokenSemantics semantics) { + public void addToken(TokenType type, File file, long line, long column, long length, CodeSemantics semantics) { parser.add(new Token(type, file, (int) line, (int) column, (int) length, semantics)); } @@ -97,7 +97,7 @@ public void addToken(TokenType type, File file, long line, long column, long len * @param position is the start position of the token. * @param length is the length of the token. */ - private void addToken(JavaTokenType tokenType, long position, int length, TokenSemantics semantics) { + private void addToken(JavaTokenType tokenType, long position, int length, CodeSemantics semantics) { addToken(tokenType, file, map.getLineNumber(position), map.getColumnNumber(position), length, semantics); } @@ -107,7 +107,7 @@ private void addToken(JavaTokenType tokenType, long position, int length, TokenS * @param start is the start position of the token. * @param end is the end position of the token for the calculation of the length. */ - private void addToken(JavaTokenType tokenType, long start, long end, TokenSemantics semantics) { + private void addToken(JavaTokenType tokenType, long start, long end, CodeSemantics semantics) { addToken(tokenType, file, map.getLineNumber(start), map.getColumnNumber(start), (end - start), semantics); } @@ -121,7 +121,7 @@ private boolean isMutable(Tree classTree) { } @Override - public Void visitBlock(BlockTree node, TokenSemantics semantics) { + public Void visitBlock(BlockTree node, CodeSemantics semantics) { // kind of weird since in the case of for loops and catches, two scopes are introduced // but I'm pretty sure that's how Java does it internally as well variableRegistry.enterLocalScope(); @@ -131,7 +131,7 @@ public Void visitBlock(BlockTree node, TokenSemantics semantics) { } @Override - public Void visitClass(ClassTree node, TokenSemantics semantics) { + public Void visitClass(ClassTree node, CodeSemantics semantics) { for (var member : node.getMembers()) { if (member.getKind() == Tree.Kind.VARIABLE) { VariableTree variableTree = (VariableTree) member; @@ -143,7 +143,7 @@ public Void visitClass(ClassTree node, TokenSemantics semantics) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - semantics = TokenSemantics.createControl(); + semantics = CodeSemantics.createControl(); if (node.getKind() == Tree.Kind.ENUM) { addToken(JavaTokenType.J_ENUM_BEGIN, start, 4, semantics); } else if (node.getKind() == Tree.Kind.INTERFACE) { @@ -171,7 +171,7 @@ public Void visitClass(ClassTree node, TokenSemantics semantics) { default -> null; }; if (tokenType != null) { - semantics = TokenSemantics.createControl(); + semantics = CodeSemantics.createControl(); addToken(tokenType, end, 1, semantics); } variableRegistry.clearMemberVariables(); @@ -179,29 +179,29 @@ public Void visitClass(ClassTree node, TokenSemantics semantics) { } @Override - public Void visitImport(ImportTree node, TokenSemantics semantics) { + public Void visitImport(ImportTree node, CodeSemantics semantics) { long start = positions.getStartPosition(ast, node); - semantics = TokenSemantics.createKeep(); + semantics = CodeSemantics.createKeep(); addToken(JavaTokenType.J_IMPORT, start, 6, semantics); super.visitImport(node, semantics); return null; } @Override - public Void visitPackage(PackageTree node, TokenSemantics semantics) { + public Void visitPackage(PackageTree node, CodeSemantics semantics) { long start = positions.getStartPosition(ast, node); - semantics = TokenSemantics.createControl(); + semantics = CodeSemantics.createControl(); addToken(JavaTokenType.J_PACKAGE, start, 7, semantics); super.visitPackage(node, semantics); return null; } @Override - public Void visitMethod(MethodTree node, TokenSemantics semantics) { + public Void visitMethod(MethodTree node, CodeSemantics semantics) { variableRegistry.enterLocalScope(); long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - semantics = TokenSemantics.createControl(); + semantics = CodeSemantics.createControl(); addToken(JavaTokenType.J_METHOD_BEGIN, start, node.getName().length(), semantics); scan(node.getModifiers(), semantics); scan(node.getReturnType(), semantics); @@ -210,7 +210,7 @@ public Void visitMethod(MethodTree node, TokenSemantics semantics) { scan(node.getReceiverParameter(), semantics); scan(node.getThrows(), semantics); scan(node.getBody(), null); - semantics = TokenSemantics.createControl(); + semantics = CodeSemantics.createControl(); variableRegistry.addAllMemberVariablesAsReads(semantics); addToken(JavaTokenType.J_METHOD_END, end, 1, semantics); variableRegistry.exitLocalScope(); @@ -218,106 +218,106 @@ public Void visitMethod(MethodTree node, TokenSemantics semantics) { } @Override - public Void visitSynchronized(SynchronizedTree node, TokenSemantics semantics) { + public Void visitSynchronized(SynchronizedTree node, CodeSemantics semantics) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - semantics = TokenSemantics.createControl(); + semantics = CodeSemantics.createControl(); addToken(JavaTokenType.J_SYNC_BEGIN, start, 12, semantics); super.visitSynchronized(node, semantics); - semantics = TokenSemantics.createControl(); + semantics = CodeSemantics.createControl(); addToken(JavaTokenType.J_SYNC_END, end, 1, semantics); return null; } @Override - public Void visitDoWhileLoop(DoWhileLoopTree node, TokenSemantics semantics) { + public Void visitDoWhileLoop(DoWhileLoopTree node, CodeSemantics semantics) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - semantics = TokenSemantics.createLoopBegin(); + semantics = CodeSemantics.createLoopBegin(); addToken(JavaTokenType.J_DO_BEGIN, start, 2, semantics); scan(node.getStatement(), null); - semantics = TokenSemantics.createLoopEnd(); + semantics = CodeSemantics.createLoopEnd(); addToken(JavaTokenType.J_DO_END, end, 1, semantics); scan(node.getCondition(), semantics); return null; } @Override - public Void visitWhileLoop(WhileLoopTree node, TokenSemantics semantics) { + public Void visitWhileLoop(WhileLoopTree node, CodeSemantics semantics) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - semantics = TokenSemantics.createLoopBegin(); + semantics = CodeSemantics.createLoopBegin(); addToken(JavaTokenType.J_WHILE_BEGIN, start, 5, semantics); scan(node.getCondition(), semantics); scan(node.getStatement(), null); - semantics = TokenSemantics.createLoopEnd(); + semantics = CodeSemantics.createLoopEnd(); addToken(JavaTokenType.J_WHILE_END, end, 1, semantics); return null; } @Override - public Void visitForLoop(ForLoopTree node, TokenSemantics semantics) { + public Void visitForLoop(ForLoopTree node, CodeSemantics semantics) { variableRegistry.enterLocalScope(); long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - semantics = TokenSemantics.createLoopBegin(); + semantics = CodeSemantics.createLoopBegin(); addToken(JavaTokenType.J_FOR_BEGIN, start, 3, semantics); scan(node.getInitializer(), semantics); scan(node.getCondition(), semantics); scan(node.getUpdate(), semantics); scan(node.getStatement(), null); - semantics = TokenSemantics.createLoopEnd(); + semantics = CodeSemantics.createLoopEnd(); addToken(JavaTokenType.J_FOR_END, end, 1, semantics); variableRegistry.exitLocalScope(); return null; } @Override - public Void visitEnhancedForLoop(EnhancedForLoopTree node, TokenSemantics semantics) { + public Void visitEnhancedForLoop(EnhancedForLoopTree node, CodeSemantics semantics) { variableRegistry.enterLocalScope(); long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - semantics = TokenSemantics.createLoopBegin(); + semantics = CodeSemantics.createLoopBegin(); addToken(JavaTokenType.J_FOR_BEGIN, start, 3, semantics); scan(node.getVariable(), semantics); scan(node.getExpression(), semantics); scan(node.getStatement(), null); - semantics = TokenSemantics.createLoopEnd(); + semantics = CodeSemantics.createLoopEnd(); addToken(JavaTokenType.J_FOR_END, end, 1, semantics); variableRegistry.exitLocalScope(); return null; } @Override - public Void visitSwitch(SwitchTree node, TokenSemantics semantics) { + public Void visitSwitch(SwitchTree node, CodeSemantics semantics) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - semantics = TokenSemantics.createControl(); + semantics = CodeSemantics.createControl(); addToken(JavaTokenType.J_SWITCH_BEGIN, start, 6, semantics); scan(node.getExpression(), semantics); scan(node.getCases(), null); - semantics = TokenSemantics.createControl(); + semantics = CodeSemantics.createControl(); addToken(JavaTokenType.J_SWITCH_END, end, 1, semantics); return null; } @Override - public Void visitSwitchExpression(SwitchExpressionTree node, TokenSemantics semantics) { + public Void visitSwitchExpression(SwitchExpressionTree node, CodeSemantics semantics) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - semantics = TokenSemantics.createControl(); + semantics = CodeSemantics.createControl(); addToken(JavaTokenType.J_SWITCH_BEGIN, start, 6, semantics); scan(node.getExpression(), semantics); scan(node.getCases(), null); - semantics = TokenSemantics.createControl(); + semantics = CodeSemantics.createControl(); addToken(JavaTokenType.J_SWITCH_END, end, 1, semantics); return null; } @Override - public Void visitCase(CaseTree node, TokenSemantics semantics) { + public Void visitCase(CaseTree node, CodeSemantics semantics) { long start = positions.getStartPosition(ast, node); - semantics = TokenSemantics.createControl(); + semantics = CodeSemantics.createControl(); addToken(JavaTokenType.J_CASE, start, 4, semantics); scan(node.getExpressions(), semantics); if (node.getCaseKind() == CaseTree.CaseKind.RULE) { @@ -329,148 +329,148 @@ public Void visitCase(CaseTree node, TokenSemantics semantics) { } @Override - public Void visitTry(TryTree node, TokenSemantics semantics) { + public Void visitTry(TryTree node, CodeSemantics semantics) { long start = positions.getStartPosition(ast, node); - semantics = TokenSemantics.createControl(); + semantics = CodeSemantics.createControl(); addToken(JavaTokenType.J_TRY_BEGIN, start, 3, semantics); scan(node.getResources(), semantics); scan(node.getBlock(), null); long end = positions.getEndPosition(ast, node); - semantics = TokenSemantics.createControl(); + semantics = CodeSemantics.createControl(); addToken(JavaTokenType.J_TRY_END, end, 1, semantics); scan(node.getCatches(), null); if (node.getFinallyBlock() != null) { start = positions.getStartPosition(ast, node.getFinallyBlock()); - semantics = TokenSemantics.createControl(); + semantics = CodeSemantics.createControl(); addToken(JavaTokenType.J_FINALLY_BEGIN, start, 3, semantics); scan(node.getFinallyBlock(), null); end = positions.getEndPosition(ast, node.getFinallyBlock()); - semantics = TokenSemantics.createControl(); + semantics = CodeSemantics.createControl(); addToken(JavaTokenType.J_FINALLY_END, end, 1, semantics); } return null; // return value isn't used } @Override - public Void visitCatch(CatchTree node, TokenSemantics semantics) { + public Void visitCatch(CatchTree node, CodeSemantics semantics) { variableRegistry.enterLocalScope(); long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - semantics = TokenSemantics.createControl(); + semantics = CodeSemantics.createControl(); addToken(JavaTokenType.J_CATCH_BEGIN, start, 5, semantics); super.visitCatch(node, null); // can leave this since catch parameter is variable declaration and thus always generates a token - semantics = TokenSemantics.createControl(); + semantics = CodeSemantics.createControl(); addToken(JavaTokenType.J_CATCH_END, end, 1, semantics); variableRegistry.exitLocalScope(); return null; } @Override - public Void visitIf(IfTree node, TokenSemantics semantics) { + public Void visitIf(IfTree node, CodeSemantics semantics) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - semantics = TokenSemantics.createControl(); + semantics = CodeSemantics.createControl(); addToken(JavaTokenType.J_IF_BEGIN, start, 2, semantics); scan(node.getCondition(), semantics); scan(node.getThenStatement(), null); if (node.getElseStatement() != null) { start = positions.getStartPosition(ast, node.getElseStatement()); - semantics = TokenSemantics.createControl(); + semantics = CodeSemantics.createControl(); addToken(JavaTokenType.J_ELSE, start, 4, semantics); } scan(node.getElseStatement(), null); - semantics = TokenSemantics.createControl(); + semantics = CodeSemantics.createControl(); addToken(JavaTokenType.J_IF_END, end, 1, semantics); return null; } @Override - public Void visitBreak(BreakTree node, TokenSemantics semantics) { + public Void visitBreak(BreakTree node, CodeSemantics semantics) { long start = positions.getStartPosition(ast, node); - semantics = TokenSemantics.createControl(); + semantics = CodeSemantics.createControl(); addToken(JavaTokenType.J_BREAK, start, 5, semantics); super.visitBreak(node, semantics); return null; } @Override - public Void visitContinue(ContinueTree node, TokenSemantics semantics) { + public Void visitContinue(ContinueTree node, CodeSemantics semantics) { long start = positions.getStartPosition(ast, node); - semantics = TokenSemantics.createControl(); + semantics = CodeSemantics.createControl(); addToken(JavaTokenType.J_CONTINUE, start, 8, semantics); super.visitContinue(node, semantics); return null; } @Override - public Void visitReturn(ReturnTree node, TokenSemantics semantics) { + public Void visitReturn(ReturnTree node, CodeSemantics semantics) { long start = positions.getStartPosition(ast, node); - semantics = TokenSemantics.createControl(); + semantics = CodeSemantics.createControl(); addToken(JavaTokenType.J_RETURN, start, 6, semantics); super.visitReturn(node, semantics); return null; } @Override - public Void visitThrow(ThrowTree node, TokenSemantics semantics) { + public Void visitThrow(ThrowTree node, CodeSemantics semantics) { long start = positions.getStartPosition(ast, node); - semantics = TokenSemantics.createControl(); + semantics = CodeSemantics.createControl(); addToken(JavaTokenType.J_THROW, start, 5, semantics); super.visitThrow(node, semantics); return null; } @Override - public Void visitNewClass(NewClassTree node, TokenSemantics semantics) { + public Void visitNewClass(NewClassTree node, CodeSemantics semantics) { long start = positions.getStartPosition(ast, node); if (node.getTypeArguments().size() > 0) { - semantics = new TokenSemantics(); + semantics = new CodeSemantics(); addToken(JavaTokenType.J_GENERIC, start, 3 + node.getIdentifier().toString().length(), semantics); } - semantics = new TokenSemantics(); + semantics = new CodeSemantics(); addToken(JavaTokenType.J_NEWCLASS, start, 3, semantics); super.visitNewClass(node, semantics); return null; } @Override - public Void visitTypeParameter(TypeParameterTree node, TokenSemantics semantics) { + public Void visitTypeParameter(TypeParameterTree node, CodeSemantics semantics) { long start = positions.getStartPosition(ast, node); // This is odd, but also done like this in Java 1.7 - semantics = new TokenSemantics(); + semantics = new CodeSemantics(); addToken(JavaTokenType.J_GENERIC, start, 1, semantics); super.visitTypeParameter(node, semantics); return null; } @Override - public Void visitNewArray(NewArrayTree node, TokenSemantics semantics) { + public Void visitNewArray(NewArrayTree node, CodeSemantics semantics) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - semantics = new TokenSemantics(); + semantics = new CodeSemantics(); addToken(JavaTokenType.J_NEWARRAY, start, 3, semantics); scan(node.getType(), semantics); scan(node.getDimensions(), semantics); boolean hasInit = node.getInitializers() != null && !node.getInitializers().isEmpty(); if (hasInit) { start = positions.getStartPosition(ast, node.getInitializers().get(0)); - semantics = new TokenSemantics(); + semantics = new CodeSemantics(); addToken(JavaTokenType.J_ARRAY_INIT_BEGIN, start, 1, semantics); } scan(node.getInitializers(), semantics); // super method has annotation processing but we have it disabled anyways if (hasInit) { - semantics = new TokenSemantics(); + semantics = new CodeSemantics(); addToken(JavaTokenType.J_ARRAY_INIT_END, end, 1, semantics); } return null; } @Override - public Void visitAssignment(AssignmentTree node, TokenSemantics semantics) { + public Void visitAssignment(AssignmentTree node, CodeSemantics semantics) { long start = positions.getStartPosition(ast, node); // todo may need to be keep when non-registered (global) variables are involved, not sure how to check - semantics = new TokenSemantics(); + semantics = new CodeSemantics(); addToken(JavaTokenType.J_ASSIGN, start, 1, semantics); variableRegistry.setNextOperation(NextOperation.WRITE); super.visitAssignment(node, semantics); @@ -479,9 +479,9 @@ public Void visitAssignment(AssignmentTree node, TokenSemantics semantics) { } @Override - public Void visitCompoundAssignment(CompoundAssignmentTree node, TokenSemantics semantics) { + public Void visitCompoundAssignment(CompoundAssignmentTree node, CodeSemantics semantics) { long start = positions.getStartPosition(ast, node); - semantics = new TokenSemantics(); + semantics = new CodeSemantics(); addToken(JavaTokenType.J_ASSIGN, start, 1, semantics); variableRegistry.setNextOperation(NextOperation.READ_WRITE); super.visitCompoundAssignment(node, semantics); @@ -489,8 +489,8 @@ public Void visitCompoundAssignment(CompoundAssignmentTree node, TokenSemantics } @Override - public Void visitUnary(UnaryTree node, TokenSemantics semantics) { - semantics = new TokenSemantics(); + public Void visitUnary(UnaryTree node, CodeSemantics semantics) { + semantics = new CodeSemantics(); if (Set.of(Tree.Kind.PREFIX_INCREMENT, Tree.Kind.POSTFIX_INCREMENT, Tree.Kind.PREFIX_DECREMENT, Tree.Kind.POSTFIX_DECREMENT) .contains(node.getKind())) { long start = positions.getStartPosition(ast, node); @@ -502,25 +502,25 @@ public Void visitUnary(UnaryTree node, TokenSemantics semantics) { } @Override - public Void visitAssert(AssertTree node, TokenSemantics semantics) { + public Void visitAssert(AssertTree node, CodeSemantics semantics) { long start = positions.getStartPosition(ast, node); - semantics = TokenSemantics.createControl(); + semantics = CodeSemantics.createControl(); addToken(JavaTokenType.J_ASSERT, start, 6, semantics); super.visitAssert(node, semantics); return null; } @Override - public Void visitVariable(VariableTree node, TokenSemantics semantics) { + public Void visitVariable(VariableTree node, CodeSemantics semantics) { long start = positions.getStartPosition(ast, node); String name = node.getName().toString(); boolean inLocalScope = variableRegistry.inLocalScope(); if (inLocalScope) { boolean mutable = isMutable(node.getType()); variableRegistry.registerLocalVariable(name, mutable); - semantics = new TokenSemantics(); + semantics = new CodeSemantics(); } else { - semantics = TokenSemantics.createKeep(); + semantics = CodeSemantics.createKeep(); } variableRegistry.setNextOperation(NextOperation.WRITE); // manually add variable to semantics since identifier isn't visited @@ -531,18 +531,18 @@ public Void visitVariable(VariableTree node, TokenSemantics semantics) { } @Override - public Void visitConditionalExpression(ConditionalExpressionTree node, TokenSemantics semantics) { + public Void visitConditionalExpression(ConditionalExpressionTree node, CodeSemantics semantics) { long start = positions.getStartPosition(ast, node); - semantics = new TokenSemantics(); + semantics = new CodeSemantics(); addToken(JavaTokenType.J_COND, start, 1, semantics); super.visitConditionalExpression(node, semantics); return null; } @Override - public Void visitMethodInvocation(MethodInvocationTree node, TokenSemantics semantics) { + public Void visitMethodInvocation(MethodInvocationTree node, CodeSemantics semantics) { long start = positions.getStartPosition(ast, node); - semantics = TokenSemantics.createControl(); + semantics = CodeSemantics.createControl(); variableRegistry.addAllMemberVariablesAsReads(semantics); addToken(JavaTokenType.J_APPLY, start, positions.getEndPosition(ast, node.getMethodSelect()) - start, semantics); scan(node.getTypeArguments(), semantics); @@ -557,82 +557,82 @@ public Void visitMethodInvocation(MethodInvocationTree node, TokenSemantics sema } @Override - public Void visitAnnotation(AnnotationTree node, TokenSemantics semantics) { + public Void visitAnnotation(AnnotationTree node, CodeSemantics semantics) { long start = positions.getStartPosition(ast, node); - semantics = new TokenSemantics(); + semantics = new CodeSemantics(); addToken(JavaTokenType.J_ANNO, start, 1, semantics); super.visitAnnotation(node, semantics); return null; } @Override - public Void visitModule(ModuleTree node, TokenSemantics semantics) { + public Void visitModule(ModuleTree node, CodeSemantics semantics) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - semantics = TokenSemantics.createControl(); + semantics = CodeSemantics.createControl(); addToken(JavaTokenType.J_MODULE_BEGIN, start, 6, semantics); super.visitModule(node, null); - semantics = TokenSemantics.createControl(); + semantics = CodeSemantics.createControl(); addToken(JavaTokenType.J_MODULE_END, end, 1, semantics); return null; } @Override - public Void visitRequires(RequiresTree node, TokenSemantics semantics) { + public Void visitRequires(RequiresTree node, CodeSemantics semantics) { long start = positions.getStartPosition(ast, node); - semantics = TokenSemantics.createControl(); + semantics = CodeSemantics.createControl(); addToken(JavaTokenType.J_REQUIRES, start, 8, semantics); super.visitRequires(node, semantics); return null; } @Override - public Void visitProvides(ProvidesTree node, TokenSemantics semantics) { + public Void visitProvides(ProvidesTree node, CodeSemantics semantics) { long start = positions.getStartPosition(ast, node); - semantics = TokenSemantics.createControl(); + semantics = CodeSemantics.createControl(); addToken(JavaTokenType.J_PROVIDES, start, 8, semantics); super.visitProvides(node, semantics); return null; } @Override - public Void visitExports(ExportsTree node, TokenSemantics semantics) { + public Void visitExports(ExportsTree node, CodeSemantics semantics) { long start = positions.getStartPosition(ast, node); - semantics = TokenSemantics.createControl(); + semantics = CodeSemantics.createControl(); addToken(JavaTokenType.J_EXPORTS, start, 7, semantics); super.visitExports(node, semantics); return null; } @Override - public Void visitErroneous(ErroneousTree node, TokenSemantics semantics) { + public Void visitErroneous(ErroneousTree node, CodeSemantics semantics) { parsingExceptions.add(new ParsingException(file, "error while visiting %s".formatted(node))); super.visitErroneous(node, semantics); return null; } @Override - public Void visitYield(YieldTree node, TokenSemantics semantics) { + public Void visitYield(YieldTree node, CodeSemantics semantics) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node); - semantics = TokenSemantics.createControl(); + semantics = CodeSemantics.createControl(); addToken(JavaTokenType.J_YIELD, start, end, semantics); super.visitYield(node, semantics); return null; } @Override - public Void visitDefaultCaseLabel(DefaultCaseLabelTree node, TokenSemantics semantics) { + public Void visitDefaultCaseLabel(DefaultCaseLabelTree node, CodeSemantics semantics) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node); - semantics = TokenSemantics.createControl(); + semantics = CodeSemantics.createControl(); addToken(JavaTokenType.J_DEFAULT, start, end, semantics); super.visitDefaultCaseLabel(node, semantics); return null; } @Override - public Void visitMemberSelect(MemberSelectTree node, TokenSemantics semantics) { + public Void visitMemberSelect(MemberSelectTree node, CodeSemantics semantics) { if (isOwnMemberSelect(node)) { variableRegistry.registerVariableOperation(node.getIdentifier().toString(), true, semantics); } @@ -642,7 +642,7 @@ public Void visitMemberSelect(MemberSelectTree node, TokenSemantics semantics) { } @Override - public Void visitIdentifier(IdentifierTree node, TokenSemantics semantics) { + public Void visitIdentifier(IdentifierTree node, CodeSemantics semantics) { variableRegistry.registerVariableOperation(node.toString(), false, semantics); super.visitIdentifier(node, semantics); return null; From ba046cff151a37ac501519277438b2cf86d1aa74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Sun, 5 Mar 2023 14:41:53 +0100 Subject: [PATCH 032/132] Clean up naming in normalization module --- .../de/jplag/normalization/Dependency.java | 12 ++--- .../jplag/normalization/DependencyType.java | 10 ++-- .../normalization/NormalizationGraph.java | 11 ++--- .../NormalizationGraphConstructor.java | 46 +++++++++---------- 4 files changed, 37 insertions(+), 42 deletions(-) diff --git a/core/src/main/java/de/jplag/normalization/Dependency.java b/core/src/main/java/de/jplag/normalization/Dependency.java index 0a6ed8ef1..41da16d22 100644 --- a/core/src/main/java/de/jplag/normalization/Dependency.java +++ b/core/src/main/java/de/jplag/normalization/Dependency.java @@ -8,7 +8,7 @@ class Dependency { private Set items; private boolean isData; - private boolean isDataThroughLoop; + private boolean isReverseData; Dependency() { items = new HashSet<>(); @@ -19,15 +19,15 @@ boolean isData() { return isData; } - boolean isDataThroughLoop() { - return isDataThroughLoop; + boolean isReverseData() { + return isReverseData; } void addItem(DependencyType type, Variable cause) { - if (type == DependencyType.DATA) + if (type == DependencyType.VARIABLE_DATA) isData = true; - if (type == DependencyType.DATA_THROUGH_LOOP) - isDataThroughLoop = true; + if (type == DependencyType.VARIABLE_REVERSE_DATA) + isReverseData = true; items.add(new DependencyItem(type, cause)); } } diff --git a/core/src/main/java/de/jplag/normalization/DependencyType.java b/core/src/main/java/de/jplag/normalization/DependencyType.java index a0ccd17a2..626e51666 100644 --- a/core/src/main/java/de/jplag/normalization/DependencyType.java +++ b/core/src/main/java/de/jplag/normalization/DependencyType.java @@ -1,9 +1,9 @@ package de.jplag.normalization; enum DependencyType { - DATA, - DATA_THROUGH_LOOP, - ORDER, - CONTROL, - CRITICAL + VARIABLE_DATA, + VARIABLE_REVERSE_DATA, + VARIABLE_ORDER, + ORDERING_FULL, + ORDERING_PARTIAL } diff --git a/core/src/main/java/de/jplag/normalization/NormalizationGraph.java b/core/src/main/java/de/jplag/normalization/NormalizationGraph.java index 065b24534..a2138ddbf 100644 --- a/core/src/main/java/de/jplag/normalization/NormalizationGraph.java +++ b/core/src/main/java/de/jplag/normalization/NormalizationGraph.java @@ -48,19 +48,14 @@ private void spreadKeep() { Deque visit = new LinkedList<>(graph.vertexSet().stream().filter(TokenLine::keep).toList()); while (!visit.isEmpty()) { TokenLine current = visit.pop(); - for (TokenLine pred : Graphs.predecessorListOf(graph, current)) { // performance? - Dependency dependency = graph.getEdge(pred, current); - if ((dependency.isData() || dependency.isDataThroughLoop()) && !pred.keep()) { + for (TokenLine pred : Graphs.predecessorListOf(graph, current)) { // performance of iteration? + if (!pred.keep() && graph.getEdge(pred, current).isData()) { pred.markKeep(); visit.add(pred); } } - // not great performance-wise but I doubt it matters at this stage... - // could instead insert data-through-loop edges the other way around, which arguably makes more sense semantically - // and turn them around here, but too much code for me to bother right now for (TokenLine succ : Graphs.successorListOf(graph, current)) { - Dependency dependency = graph.getEdge(current, succ); - if (dependency.isDataThroughLoop() && !succ.keep()) { + if (!succ.keep() && graph.getEdge(current, succ).isReverseData()) { succ.markKeep(); visit.add(succ); } diff --git a/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java b/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java index abf333b27..55b4721ad 100644 --- a/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java +++ b/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java @@ -15,7 +15,7 @@ class NormalizationGraphConstructor { private SimpleDirectedGraph graph; - private int loopDepth; + private int bidirectionalBlockDepth; private Collection fullOrderingIngoing; private TokenLine lastFullOrdering; private TokenLine lastPartialOrdering; @@ -25,7 +25,7 @@ class NormalizationGraphConstructor { NormalizationGraphConstructor(List tokens) { graph = new SimpleDirectedGraph<>(Dependency.class); - loopDepth = 0; + bidirectionalBlockDepth = 0; fullOrderingIngoing = new LinkedList<>(); variableReads = new HashMap<>(); variableWrites = new HashMap<>(); @@ -47,30 +47,30 @@ SimpleDirectedGraph get() { private void addTokenLine(TokenLine tokenLine) { graph.addVertex(tokenLine); this.current = tokenLine; - processLoops(); + processBidirectionalBlocks(); processFullOrdering(); processPartialOrdering(); processReads(); processWrites(); - current.semantics().reads().forEach(r -> addVarToMap(r, variableReads)); - current.semantics().writes().forEach(w -> addVarToMap(w, variableWrites)); + current.semantics().reads().forEach(variable -> addVariableToMap(variableReads, variable)); + current.semantics().writes().forEach(variable -> addVariableToMap(variableWrites, variable)); } - private void processLoops() { + private void processBidirectionalBlocks() { if (current.semantics().bidirectionalBlockRelation() == BlockRelation.BEGINS_BLOCK) - loopDepth++; + bidirectionalBlockDepth++; if (current.semantics().bidirectionalBlockRelation() == BlockRelation.ENDS_BLOCK) - loopDepth--; + bidirectionalBlockDepth--; } private void processFullOrdering() { if (current.semantics().ordering() == Ordering.FULL) { - addCurrentEdges(fullOrderingIngoing, DependencyType.CONTROL, null); // ingoing edges + addCurrentEdges(fullOrderingIngoing, DependencyType.ORDERING_FULL, null); // ingoing edges fullOrderingIngoing.clear(); lastFullOrdering = current; } else if (lastFullOrdering != null) { - addCurrentEdge(lastFullOrdering, DependencyType.CONTROL, null); // outgoing edges + addCurrentEdge(lastFullOrdering, DependencyType.ORDERING_FULL, null); // outgoing edges } fullOrderingIngoing.add(current); } @@ -78,29 +78,29 @@ private void processFullOrdering() { private void processPartialOrdering() { if (current.semantics().ordering() == Ordering.PARTIAL) { if (lastPartialOrdering != null) { - addCurrentEdge(lastPartialOrdering, DependencyType.CRITICAL, null); + addCurrentEdge(lastPartialOrdering, DependencyType.ORDERING_PARTIAL, null); } lastPartialOrdering = current; } } private void processReads() { - for (Variable r : current.semantics().reads()) { - addCurrentEdgesVar(DependencyType.DATA, r, variableWrites); + for (Variable variable : current.semantics().reads()) { + addCurrentEdgesByVariable(variableWrites, variable, DependencyType.VARIABLE_DATA); } } private void processWrites() { - DependencyType writeToReadDependencyType = loopDepth > 0 ? DependencyType.DATA_THROUGH_LOOP : DependencyType.ORDER; - for (Variable w : current.semantics().writes()) { - addCurrentEdgesVar(DependencyType.ORDER, w, variableWrites); - addCurrentEdgesVar(writeToReadDependencyType, w, variableReads); - addVarToMap(w, variableWrites); + DependencyType readToWriteDependencyType = bidirectionalBlockDepth > 0 ? DependencyType.VARIABLE_REVERSE_DATA : DependencyType.VARIABLE_ORDER; + for (Variable variable : current.semantics().writes()) { + addCurrentEdgesByVariable(variableWrites, variable, DependencyType.VARIABLE_ORDER); + addCurrentEdgesByVariable(variableReads, variable, readToWriteDependencyType); + addVariableToMap(variableWrites, variable); } } - private void addCurrentEdgesVar(DependencyType type, Variable var, Map> varMap) { - addCurrentEdges(varMap.getOrDefault(var, new LinkedList<>()), type, var); + private void addCurrentEdgesByVariable(Map> variableMap, Variable variable, DependencyType type) { + addCurrentEdges(variableMap.getOrDefault(variable, new LinkedList<>()), type, variable); } private void addCurrentEdges(Collection starts, DependencyType type, Variable cause) { @@ -122,8 +122,8 @@ private void addCurrentEdge(TokenLine start, DependencyType type, Variable cause dependency.addItem(type, cause); } - private void addVarToMap(Variable var, Map> varMap) { - varMap.putIfAbsent(var, new LinkedList<>()); - varMap.get(var).add(current); + private void addVariableToMap(Map> variableMap, Variable variable) { + variableMap.putIfAbsent(variable, new LinkedList<>()); + variableMap.get(variable).add(current); } } From 3698d4995d8ead33572d391815cd7c60bfbcee01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Sun, 5 Mar 2023 15:35:37 +0100 Subject: [PATCH 033/132] Handle writes to non-local variables --- .../normalization/NormalizationGraph.java | 24 ++-- .../NormalizationGraphConstructor.java | 10 +- .../de/jplag/normalization/TokenLine.java | 10 +- .../de/jplag/semantics/BlockRelation.java | 2 +- .../de/jplag/semantics/CodeSemantics.java | 128 ++++++++++++++---- .../java/de/jplag/semantics/Ordering.java | 2 +- .../de/jplag/semantics/VariableRegistry.java | 5 +- .../java/TokenGeneratingTreeScanner.java | 2 - 8 files changed, 121 insertions(+), 62 deletions(-) diff --git a/core/src/main/java/de/jplag/normalization/NormalizationGraph.java b/core/src/main/java/de/jplag/normalization/NormalizationGraph.java index a2138ddbf..ccd4c2697 100644 --- a/core/src/main/java/de/jplag/normalization/NormalizationGraph.java +++ b/core/src/main/java/de/jplag/normalization/NormalizationGraph.java @@ -4,6 +4,7 @@ import java.util.LinkedList; import java.util.List; import java.util.PriorityQueue; +import java.util.Set; import java.util.stream.Collectors; import org.jgrapht.Graphs; @@ -29,7 +30,7 @@ public List linearize() { PriorityQueue newRoots = new PriorityQueue<>(); do { TokenLine tokenLine = roots.poll(); - if (tokenLine.keep()) { + if (tokenLine.semantics().keep()) { tokens.addAll(tokenLine.tokens()); } for (TokenLine successorGroup : Graphs.successorListOf(graph, tokenLine)) { @@ -45,20 +46,17 @@ public List linearize() { } private void spreadKeep() { - Deque visit = new LinkedList<>(graph.vertexSet().stream().filter(TokenLine::keep).toList()); + Set originalKeep = graph.vertexSet().stream() // + .filter(tl -> tl.semantics().keep()).collect(Collectors.toSet()); + Deque visit = new LinkedList<>(originalKeep); while (!visit.isEmpty()) { TokenLine current = visit.pop(); - for (TokenLine pred : Graphs.predecessorListOf(graph, current)) { // performance of iteration? - if (!pred.keep() && graph.getEdge(pred, current).isData()) { - pred.markKeep(); - visit.add(pred); - } - } - for (TokenLine succ : Graphs.successorListOf(graph, current)) { - if (!succ.keep() && graph.getEdge(current, succ).isReverseData()) { - succ.markKeep(); - visit.add(succ); - } + if (originalKeep.contains(current) || !current.semantics().keep()) { + current.markKeep(); + visit.addAll(Graphs.predecessorListOf(graph, current).stream() // + .filter(pred -> graph.getEdge(pred, current).isData()).toList()); + visit.addAll(Graphs.successorListOf(graph, current).stream() // + .filter(succ -> graph.getEdge(current, succ).isReverseData()).toList()); } } } diff --git a/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java b/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java index 55b4721ad..a187d9da8 100644 --- a/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java +++ b/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java @@ -9,8 +9,6 @@ import org.jgrapht.graph.SimpleDirectedGraph; import de.jplag.Token; -import de.jplag.semantics.BlockRelation; -import de.jplag.semantics.Ordering; import de.jplag.semantics.Variable; class NormalizationGraphConstructor { @@ -58,14 +56,14 @@ private void addTokenLine(TokenLine tokenLine) { } private void processBidirectionalBlocks() { - if (current.semantics().bidirectionalBlockRelation() == BlockRelation.BEGINS_BLOCK) + if (current.semantics().isBidirectionalBlockBegin()) bidirectionalBlockDepth++; - if (current.semantics().bidirectionalBlockRelation() == BlockRelation.ENDS_BLOCK) + if (current.semantics().isBidirectionalBlockEnd()) bidirectionalBlockDepth--; } private void processFullOrdering() { - if (current.semantics().ordering() == Ordering.FULL) { + if (current.semantics().isFullOrdering()) { addCurrentEdges(fullOrderingIngoing, DependencyType.ORDERING_FULL, null); // ingoing edges fullOrderingIngoing.clear(); lastFullOrdering = current; @@ -76,7 +74,7 @@ private void processFullOrdering() { } private void processPartialOrdering() { - if (current.semantics().ordering() == Ordering.PARTIAL) { + if (current.semantics().isPartialOrdering()) { if (lastPartialOrdering != null) { addCurrentEdge(lastPartialOrdering, DependencyType.ORDERING_PARTIAL, null); } diff --git a/core/src/main/java/de/jplag/normalization/TokenLine.java b/core/src/main/java/de/jplag/normalization/TokenLine.java index 95325b186..d6833b533 100644 --- a/core/src/main/java/de/jplag/normalization/TokenLine.java +++ b/core/src/main/java/de/jplag/normalization/TokenLine.java @@ -12,13 +12,11 @@ class TokenLine implements Comparable { private final List tokens; private final int lineNumber; private final CodeSemantics semantics; - private boolean keep; TokenLine(List tokens, int lineNumber) { this.tokens = Collections.unmodifiableList(tokens); this.lineNumber = lineNumber; - semantics = CodeSemantics.join(tokens.stream().map(Token::getSemantics).toList()); - keep = semantics.keep(); + this.semantics = CodeSemantics.join(tokens.stream().map(Token::getSemantics).toList()); } public List tokens() { @@ -29,12 +27,8 @@ public CodeSemantics semantics() { return semantics; } - public boolean keep() { - return keep; - } - public void markKeep() { - keep = true; + semantics.markKeep(); } private int tokenOrdinal(Token token) { diff --git a/language-api/src/main/java/de/jplag/semantics/BlockRelation.java b/language-api/src/main/java/de/jplag/semantics/BlockRelation.java index c219f0c49..9ee12f843 100644 --- a/language-api/src/main/java/de/jplag/semantics/BlockRelation.java +++ b/language-api/src/main/java/de/jplag/semantics/BlockRelation.java @@ -3,7 +3,7 @@ /** * Enumerates the relationships a code snippet can have with a code block. */ -public enum BlockRelation { +enum BlockRelation { /** * This code snippet begins the block. */ diff --git a/language-api/src/main/java/de/jplag/semantics/CodeSemantics.java b/language-api/src/main/java/de/jplag/semantics/CodeSemantics.java index 55e6ef79c..917a5960b 100644 --- a/language-api/src/main/java/de/jplag/semantics/CodeSemantics.java +++ b/language-api/src/main/java/de/jplag/semantics/CodeSemantics.java @@ -7,24 +7,41 @@ import java.util.Set; /** - * This record contains semantic information about a code snippet, in our case either a token or a line of code. - * @param keep Whether the code snippet must be kept or if it may be removed. - * @param ordering In which way the ordering of the code snippet relative to other code snippets of the same type is - * relevant. For the possible options see {@link Ordering}. - * @param bidirectionalBlockRelation Which relation the code snippet has to bidirectional block, meaning a block where - * any statement within it may be executed after any other. This will typically be a loop. For the possible options see - * {@link BlockRelation}. - * @param reads A set of the variables which were (potentially) read from in the code snippet. - * @param writes A set of the variables which were (potentially) written to in the code snippet. + * This class contains semantic information about a code snippet, in our case either a token or a line of code. */ -public record CodeSemantics(boolean keep, Ordering ordering, BlockRelation bidirectionalBlockRelation, Set reads, Set writes) { +public class CodeSemantics { + + private boolean keep; + private Ordering ordering; + private final BlockRelation bidirectionalBlockRelation; + private Set reads; + private Set writes; + + /** + * Creates new semantics. reads and writes, which each contain the variables which were (potentially) read from/written to in this code snippet, are created empty. + * @param keep Whether the code snippet must be kept or if it may be removed. + * @param ordering In which way the ordering of the code snippet relative to other code snippets of the same type is + * relevant. For the possible options see {@link Ordering}. + * @param bidirectionalBlockRelation Which relation the code snippet has to bidirectional block, meaning a block where + * any statement within it may be executed after any other. This will typically be a loop. For the possible options see + * {@link BlockRelation}. + * @param reads A set of the variables which were (potentially) read from in the code snippet. + * @param writes A set of the variables which were (potentially) written to in the code snippet. + */ + private CodeSemantics(boolean keep, Ordering ordering, BlockRelation bidirectionalBlockRelation, Set reads, Set writes) { + this.keep = keep; + this.ordering = ordering; + this.bidirectionalBlockRelation = bidirectionalBlockRelation; + this.reads = reads; + this.writes = writes; + } private CodeSemantics(boolean keep, Ordering ordering, BlockRelation bidirectionalBlockRelation) { this(keep, ordering, bidirectionalBlockRelation, new HashSet<>(), new HashSet<>()); } /** - * Creates new semantics with the following meaning: The token may be removed, and its order relative to other tokens + * Creates new semantics with the following meaning: The code snippet may be removed, and its order relative to other code snippets * may change. Example: An assignment to a local variable. */ public CodeSemantics() { @@ -32,59 +49,94 @@ public CodeSemantics() { } /** - * @return new semantics with the following meaning: The token may not be removed, and its order relative to other - * tokens may change. Example: An attribute declaration. + * @return new semantics with the following meaning: The code snippet may not be removed, and its order relative to other + * code snippets may change. Example: An attribute declaration. */ public static CodeSemantics createKeep() { return new CodeSemantics(true, Ordering.NONE, BlockRelation.NONE); } /** - * @return new semantics with the following meaning: The token may not be removed, and its order must stay invariant to - * other tokens of the same type. Example: A method call which is guaranteed to not result in an exception. + * @return new semantics with the following meaning: The code snippet may not be removed, and its order must stay invariant to + * other code snippets of the same type. Example: A method call which is guaranteed to not result in an exception. */ public static CodeSemantics createCritical() { return new CodeSemantics(true, Ordering.PARTIAL, BlockRelation.NONE); } /** - * @return new semantics with the following meaning: The token may not be removed, and its order must stay invariant to - * all other tokens. Example: A return statement. + * @return new semantics with the following meaning: The code snippet may not be removed, and its order must stay invariant to + * all other code snippets. Example: A return statement. */ public static CodeSemantics createControl() { return new CodeSemantics(true, Ordering.FULL, BlockRelation.NONE); } /** - * @return new semantics with the following meaning: The token may not be removed, and its order must stay invariant to - * all other tokens, which also begins a bidirectional block. Example: The beginning of a while loop. + * @return new semantics with the following meaning: The code snippet may not be removed, and its order must stay invariant to + * all other code snippets, which also begins a bidirectional block. Example: The beginning of a while loop. */ public static CodeSemantics createLoopBegin() { return new CodeSemantics(true, Ordering.FULL, BlockRelation.BEGINS_BLOCK); } /** - * @return new semantics with the following meaning: The token may not be removed, and its order must stay invariant to - * all other tokens, which also ends a bidirectional block. Example: The end of a while loop. + * @return new semantics with the following meaning: The code snippet may not be removed, and its order must stay invariant to + * all other code snippets, which also ends a bidirectional block. Example: The end of a while loop. */ public static CodeSemantics createLoopEnd() { return new CodeSemantics(true, Ordering.FULL, BlockRelation.ENDS_BLOCK); } /** - * Add a variable to the set of variables which were (potentially) read from in this code snippet. - * @param variable The variable which is added. + * @return whether this code snippet must be kept. */ - public void addRead(Variable variable) { - reads.add(variable); + public boolean keep() { + return keep; } /** - * Add a variable to the set of variables which were (potentially) written to in this code snippet. - * @param variable The variable which is added. + * Mark this code snippet as having to be kept. */ - public void addWrite(Variable variable) { - writes.add(variable); + public void markKeep() { + keep = true; + } + + /** + * Mark this code snippet as having partial ordering. + */ + void markPartialOrdering() { + if (Ordering.PARTIAL.isStrongerThan(ordering)) { + ordering = Ordering.PARTIAL; + } + } + + /** + * @return whether this code snippet begins a bidirectional block. + */ + public boolean isBidirectionalBlockBegin() { + return bidirectionalBlockRelation == BlockRelation.BEGINS_BLOCK; + } + + /** + * @return whether this code snippet ends a bidirectional block. + */ + public boolean isBidirectionalBlockEnd() { + return bidirectionalBlockRelation == BlockRelation.ENDS_BLOCK; + } + + /** + * @return whether this code snippet has the partial ordering type. + */ + public boolean isPartialOrdering() { + return ordering == Ordering.PARTIAL; + } + + /** + * @return whether this code snippet has the full ordering type. + */ + public boolean isFullOrdering() { + return ordering == Ordering.FULL; } /** @@ -101,6 +153,22 @@ public Set writes() { return Collections.unmodifiableSet(writes); } + /** + * Add a variable to the set of variables which were (potentially) read from in this code snippet. + * @param variable The variable which is added. + */ + public void addRead(Variable variable) { + reads.add(variable); + } + + /** + * Add a variable to the set of variables which were (potentially) written to in this code snippet. + * @param variable The variable which is added. + */ + public void addWrite(Variable variable) { + writes.add(variable); + } + /** * Create new joint semantics by joining a number of existing ones. It has the following properties: *
    @@ -121,7 +189,7 @@ public static CodeSemantics join(List semanticsList) { Set reads = new HashSet<>(); Set writes = new HashSet<>(); for (CodeSemantics semantics : semanticsList) { - keep = keep || semantics.keep(); + keep = keep || semantics.keep; if (semantics.ordering.isStrongerThan(ordering)) { ordering = semantics.ordering; } diff --git a/language-api/src/main/java/de/jplag/semantics/Ordering.java b/language-api/src/main/java/de/jplag/semantics/Ordering.java index 24835e231..b014ad025 100644 --- a/language-api/src/main/java/de/jplag/semantics/Ordering.java +++ b/language-api/src/main/java/de/jplag/semantics/Ordering.java @@ -3,7 +3,7 @@ /** * Enumerates how the order of an item in a sequence relative to other items may be relevant. */ -public enum Ordering { +enum Ordering { /** * The order of the item relative to other items in the sequence is not relevant. */ diff --git a/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java b/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java index bf19cc99a..868b67833 100644 --- a/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java +++ b/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java @@ -83,9 +83,12 @@ public void registerVariableOperation(String variableName, boolean isOwnMember, if (nextOperation.isRead) { semantics.addRead(variable); } - if (nextOperation.isWrite || (nextOperation.isRead && mutableWrite && variable.isMutable())) { + if (nextOperation.isWrite || (mutableWrite && variable.isMutable())) { semantics.addWrite(variable); } + } else if (nextOperation.isWrite) { + semantics.markKeep(); // non-registered variable is written to -> keep! + semantics.markPartialOrdering(); // could have an effect on other such writes and method calls } nextOperation = NextOperation.READ; } diff --git a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java index 2f24a4e08..cd41ac55a 100644 --- a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java +++ b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java @@ -469,12 +469,10 @@ public Void visitNewArray(NewArrayTree node, CodeSemantics semantics) { @Override public Void visitAssignment(AssignmentTree node, CodeSemantics semantics) { long start = positions.getStartPosition(ast, node); - // todo may need to be keep when non-registered (global) variables are involved, not sure how to check semantics = new CodeSemantics(); addToken(JavaTokenType.J_ASSIGN, start, 1, semantics); variableRegistry.setNextOperation(NextOperation.WRITE); super.visitAssignment(node, semantics); - // if (this.assignedVariableWasRegistered) makeSemanticsCritical(semantics) return null; } From 61900707e82d8ed5e538322fbfed72be6dffe732 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Sun, 5 Mar 2023 17:04:07 +0100 Subject: [PATCH 034/132] Fix bug in handling of member variables --- .../main/java/de/jplag/java/TokenGeneratingTreeScanner.java | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java index cd41ac55a..cf9f2464d 100644 --- a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java +++ b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java @@ -111,10 +111,6 @@ private void addToken(JavaTokenType tokenType, long start, long end, CodeSemanti addToken(tokenType, file, map.getLineNumber(start), map.getColumnNumber(start), (end - start), semantics); } - private boolean isOwnMemberSelect(MemberSelectTree memberSelect) { - return memberSelect.toString().equals("this"); - } - private boolean isMutable(Tree classTree) { // classTree is null if `var` keyword is used return classTree == null || !IMMUTABLES.contains(classTree.toString()); @@ -631,7 +627,7 @@ public Void visitDefaultCaseLabel(DefaultCaseLabelTree node, CodeSemantics seman @Override public Void visitMemberSelect(MemberSelectTree node, CodeSemantics semantics) { - if (isOwnMemberSelect(node)) { + if (node.getExpression().toString().equals("this")) { variableRegistry.registerVariableOperation(node.getIdentifier().toString(), true, semantics); } variableRegistry.setIgnoreNextOperation(false); // don't ignore the foo in foo.bar() From 7ed62348955789ed8811d4aaecc8edb702a29469 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Sun, 5 Mar 2023 17:43:03 +0100 Subject: [PATCH 035/132] Fix endless loop --- .../normalization/NormalizationGraph.java | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/core/src/main/java/de/jplag/normalization/NormalizationGraph.java b/core/src/main/java/de/jplag/normalization/NormalizationGraph.java index ccd4c2697..011c07adb 100644 --- a/core/src/main/java/de/jplag/normalization/NormalizationGraph.java +++ b/core/src/main/java/de/jplag/normalization/NormalizationGraph.java @@ -46,17 +46,21 @@ public List linearize() { } private void spreadKeep() { - Set originalKeep = graph.vertexSet().stream() // - .filter(tl -> tl.semantics().keep()).collect(Collectors.toSet()); - Deque visit = new LinkedList<>(originalKeep); + Deque visit = new LinkedList<>(graph.vertexSet().stream() + .filter(tl -> tl.semantics().keep()).toList()); while (!visit.isEmpty()) { TokenLine current = visit.pop(); - if (originalKeep.contains(current) || !current.semantics().keep()) { - current.markKeep(); - visit.addAll(Graphs.predecessorListOf(graph, current).stream() // - .filter(pred -> graph.getEdge(pred, current).isData()).toList()); - visit.addAll(Graphs.successorListOf(graph, current).stream() // - .filter(succ -> graph.getEdge(current, succ).isReverseData()).toList()); + for (TokenLine pred : Graphs.predecessorListOf(graph, current)) { // performance of iteration? + if (!pred.semantics().keep() && graph.getEdge(pred, current).isData()) { + pred.markKeep(); + visit.add(pred); + } + } + for (TokenLine succ : Graphs.successorListOf(graph, current)) { + if (!succ.semantics().keep() && graph.getEdge(current, succ).isReverseData()) { + succ.markKeep(); + visit.add(succ); + } } } } From 175a3644eab4397ecc37a2dcfffea63d7afb5163 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Sun, 5 Mar 2023 17:54:01 +0100 Subject: [PATCH 036/132] Fix bug in tracking of loop depth --- .../NormalizationGraphConstructor.java | 16 ++---- .../de/jplag/semantics/CodeSemantics.java | 55 +++++++------------ 2 files changed, 26 insertions(+), 45 deletions(-) diff --git a/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java b/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java index a187d9da8..5e791520c 100644 --- a/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java +++ b/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java @@ -45,21 +45,15 @@ SimpleDirectedGraph get() { private void addTokenLine(TokenLine tokenLine) { graph.addVertex(tokenLine); this.current = tokenLine; - processBidirectionalBlocks(); + bidirectionalBlockDepth += tokenLine.semantics().bidirectionalBlockDepthChange(); processFullOrdering(); processPartialOrdering(); processReads(); processWrites(); - current.semantics().reads().forEach(variable -> addVariableToMap(variableReads, variable)); - current.semantics().writes().forEach(variable -> addVariableToMap(variableWrites, variable)); - - } - - private void processBidirectionalBlocks() { - if (current.semantics().isBidirectionalBlockBegin()) - bidirectionalBlockDepth++; - if (current.semantics().isBidirectionalBlockEnd()) - bidirectionalBlockDepth--; + for (Variable variable: current.semantics().reads()) + addVariableToMap(variableReads, variable); + for (Variable variable: current.semantics().writes()) + addVariableToMap(variableWrites, variable); } private void processFullOrdering() { diff --git a/language-api/src/main/java/de/jplag/semantics/CodeSemantics.java b/language-api/src/main/java/de/jplag/semantics/CodeSemantics.java index 917a5960b..f6b2dc4b2 100644 --- a/language-api/src/main/java/de/jplag/semantics/CodeSemantics.java +++ b/language-api/src/main/java/de/jplag/semantics/CodeSemantics.java @@ -13,7 +13,7 @@ public class CodeSemantics { private boolean keep; private Ordering ordering; - private final BlockRelation bidirectionalBlockRelation; + private final int bidirectionalBlockDepthChange; private Set reads; private Set writes; @@ -22,22 +22,21 @@ public class CodeSemantics { * @param keep Whether the code snippet must be kept or if it may be removed. * @param ordering In which way the ordering of the code snippet relative to other code snippets of the same type is * relevant. For the possible options see {@link Ordering}. - * @param bidirectionalBlockRelation Which relation the code snippet has to bidirectional block, meaning a block where - * any statement within it may be executed after any other. This will typically be a loop. For the possible options see - * {@link BlockRelation}. + * @param bidirectionalBlockDepthChange How the code snippet affects the depth of bidirectional blocks, meaning blocks where + * any statement within it may be executed after any other. This will typically be a loop. * @param reads A set of the variables which were (potentially) read from in the code snippet. * @param writes A set of the variables which were (potentially) written to in the code snippet. */ - private CodeSemantics(boolean keep, Ordering ordering, BlockRelation bidirectionalBlockRelation, Set reads, Set writes) { + private CodeSemantics(boolean keep, Ordering ordering, int bidirectionalBlockDepthChange, Set reads, Set writes) { this.keep = keep; this.ordering = ordering; - this.bidirectionalBlockRelation = bidirectionalBlockRelation; + this.bidirectionalBlockDepthChange = bidirectionalBlockDepthChange; this.reads = reads; this.writes = writes; } - private CodeSemantics(boolean keep, Ordering ordering, BlockRelation bidirectionalBlockRelation) { - this(keep, ordering, bidirectionalBlockRelation, new HashSet<>(), new HashSet<>()); + private CodeSemantics(boolean keep, Ordering ordering, int bidirectionalBlockDepthChange) { + this(keep, ordering, bidirectionalBlockDepthChange, new HashSet<>(), new HashSet<>()); } /** @@ -45,7 +44,7 @@ private CodeSemantics(boolean keep, Ordering ordering, BlockRelation bidirection * may change. Example: An assignment to a local variable. */ public CodeSemantics() { - this(false, Ordering.NONE, BlockRelation.NONE); + this(false, Ordering.NONE, 0); } /** @@ -53,7 +52,7 @@ public CodeSemantics() { * code snippets may change. Example: An attribute declaration. */ public static CodeSemantics createKeep() { - return new CodeSemantics(true, Ordering.NONE, BlockRelation.NONE); + return new CodeSemantics(true, Ordering.NONE, 0); } /** @@ -61,7 +60,7 @@ public static CodeSemantics createKeep() { * other code snippets of the same type. Example: A method call which is guaranteed to not result in an exception. */ public static CodeSemantics createCritical() { - return new CodeSemantics(true, Ordering.PARTIAL, BlockRelation.NONE); + return new CodeSemantics(true, Ordering.PARTIAL, 0); } /** @@ -69,7 +68,7 @@ public static CodeSemantics createCritical() { * all other code snippets. Example: A return statement. */ public static CodeSemantics createControl() { - return new CodeSemantics(true, Ordering.FULL, BlockRelation.NONE); + return new CodeSemantics(true, Ordering.FULL, 0); } /** @@ -77,7 +76,7 @@ public static CodeSemantics createControl() { * all other code snippets, which also begins a bidirectional block. Example: The beginning of a while loop. */ public static CodeSemantics createLoopBegin() { - return new CodeSemantics(true, Ordering.FULL, BlockRelation.BEGINS_BLOCK); + return new CodeSemantics(true, Ordering.FULL, 1); } /** @@ -85,7 +84,7 @@ public static CodeSemantics createLoopBegin() { * all other code snippets, which also ends a bidirectional block. Example: The end of a while loop. */ public static CodeSemantics createLoopEnd() { - return new CodeSemantics(true, Ordering.FULL, BlockRelation.ENDS_BLOCK); + return new CodeSemantics(true, Ordering.FULL, -1); } /** @@ -112,17 +111,10 @@ void markPartialOrdering() { } /** - * @return whether this code snippet begins a bidirectional block. + * @return the change this code snippet causes in the depth of bidirectional loops. */ - public boolean isBidirectionalBlockBegin() { - return bidirectionalBlockRelation == BlockRelation.BEGINS_BLOCK; - } - - /** - * @return whether this code snippet ends a bidirectional block. - */ - public boolean isBidirectionalBlockEnd() { - return bidirectionalBlockRelation == BlockRelation.ENDS_BLOCK; + public int bidirectionalBlockDepthChange() { + return bidirectionalBlockDepthChange; } /** @@ -185,7 +177,7 @@ public void addWrite(Variable variable) { public static CodeSemantics join(List semanticsList) { boolean keep = false; Ordering ordering = Ordering.NONE; - BlockRelation bidirectionalBlockRelation = BlockRelation.NONE; + int bidirectionalBlockDepthChange = 0; Set reads = new HashSet<>(); Set writes = new HashSet<>(); for (CodeSemantics semantics : semanticsList) { @@ -193,14 +185,11 @@ public static CodeSemantics join(List semanticsList) { if (semantics.ordering.isStrongerThan(ordering)) { ordering = semantics.ordering; } - if (semantics.bidirectionalBlockRelation != BlockRelation.NONE) { - assert bidirectionalBlockRelation == BlockRelation.NONE; // only one block begin/end per line - bidirectionalBlockRelation = semantics.bidirectionalBlockRelation; - } + bidirectionalBlockDepthChange += semantics.bidirectionalBlockDepthChange(); reads.addAll(semantics.reads); writes.addAll(semantics.writes); } - return new CodeSemantics(keep, ordering, bidirectionalBlockRelation, reads, writes); + return new CodeSemantics(keep, ordering, bidirectionalBlockDepthChange, reads, writes); } @Override @@ -210,10 +199,8 @@ public String toString() { properties.add("keep"); if (ordering != Ordering.NONE) properties.add(ordering.name().toLowerCase() + " ordering"); - if (bidirectionalBlockRelation != BlockRelation.NONE) { - String keyword = bidirectionalBlockRelation.name().toLowerCase().split("_")[0]; - properties.add(keyword + " bidirectional block"); - } + if (bidirectionalBlockDepthChange != 0) + properties.add("change bidirectional block depth by " + bidirectionalBlockDepthChange); if (!reads.isEmpty()) properties.add("read " + String.join(" ", reads.stream().map(Variable::toString).toList())); if (!writes.isEmpty()) From 22c76e120d47a4db8c92abeb72cd9c107b4edba5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Sun, 5 Mar 2023 18:01:17 +0100 Subject: [PATCH 037/132] Apply spotless --- .../normalization/NormalizationGraph.java | 3 +- .../NormalizationGraphConstructor.java | 4 +-- .../de/jplag/semantics/CodeSemantics.java | 33 ++++++++++--------- 3 files changed, 21 insertions(+), 19 deletions(-) diff --git a/core/src/main/java/de/jplag/normalization/NormalizationGraph.java b/core/src/main/java/de/jplag/normalization/NormalizationGraph.java index 011c07adb..4b6f18512 100644 --- a/core/src/main/java/de/jplag/normalization/NormalizationGraph.java +++ b/core/src/main/java/de/jplag/normalization/NormalizationGraph.java @@ -4,7 +4,6 @@ import java.util.LinkedList; import java.util.List; import java.util.PriorityQueue; -import java.util.Set; import java.util.stream.Collectors; import org.jgrapht.Graphs; @@ -46,7 +45,7 @@ public List linearize() { } private void spreadKeep() { - Deque visit = new LinkedList<>(graph.vertexSet().stream() + Deque visit = new LinkedList<>(graph.vertexSet().stream() // .filter(tl -> tl.semantics().keep()).toList()); while (!visit.isEmpty()) { TokenLine current = visit.pop(); diff --git a/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java b/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java index 5e791520c..32d0a38b8 100644 --- a/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java +++ b/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java @@ -50,9 +50,9 @@ private void addTokenLine(TokenLine tokenLine) { processPartialOrdering(); processReads(); processWrites(); - for (Variable variable: current.semantics().reads()) + for (Variable variable : current.semantics().reads()) addVariableToMap(variableReads, variable); - for (Variable variable: current.semantics().writes()) + for (Variable variable : current.semantics().writes()) addVariableToMap(variableWrites, variable); } diff --git a/language-api/src/main/java/de/jplag/semantics/CodeSemantics.java b/language-api/src/main/java/de/jplag/semantics/CodeSemantics.java index f6b2dc4b2..199c69c2e 100644 --- a/language-api/src/main/java/de/jplag/semantics/CodeSemantics.java +++ b/language-api/src/main/java/de/jplag/semantics/CodeSemantics.java @@ -18,12 +18,13 @@ public class CodeSemantics { private Set writes; /** - * Creates new semantics. reads and writes, which each contain the variables which were (potentially) read from/written to in this code snippet, are created empty. + * Creates new semantics. reads and writes, which each contain the variables which were (potentially) read from/written + * to in this code snippet, are created empty. * @param keep Whether the code snippet must be kept or if it may be removed. * @param ordering In which way the ordering of the code snippet relative to other code snippets of the same type is * relevant. For the possible options see {@link Ordering}. - * @param bidirectionalBlockDepthChange How the code snippet affects the depth of bidirectional blocks, meaning blocks where - * any statement within it may be executed after any other. This will typically be a loop. + * @param bidirectionalBlockDepthChange How the code snippet affects the depth of bidirectional blocks, meaning blocks + * where any statement within it may be executed after any other. This will typically be a loop. * @param reads A set of the variables which were (potentially) read from in the code snippet. * @param writes A set of the variables which were (potentially) written to in the code snippet. */ @@ -40,48 +41,50 @@ private CodeSemantics(boolean keep, Ordering ordering, int bidirectionalBlockDep } /** - * Creates new semantics with the following meaning: The code snippet may be removed, and its order relative to other code snippets - * may change. Example: An assignment to a local variable. + * Creates new semantics with the following meaning: The code snippet may be removed, and its order relative to other + * code snippets may change. Example: An assignment to a local variable. */ public CodeSemantics() { this(false, Ordering.NONE, 0); } /** - * @return new semantics with the following meaning: The code snippet may not be removed, and its order relative to other - * code snippets may change. Example: An attribute declaration. + * @return new semantics with the following meaning: The code snippet may not be removed, and its order relative to + * other code snippets may change. Example: An attribute declaration. */ public static CodeSemantics createKeep() { return new CodeSemantics(true, Ordering.NONE, 0); } /** - * @return new semantics with the following meaning: The code snippet may not be removed, and its order must stay invariant to - * other code snippets of the same type. Example: A method call which is guaranteed to not result in an exception. + * @return new semantics with the following meaning: The code snippet may not be removed, and its order must stay + * invariant to other code snippets of the same type. Example: A method call which is guaranteed to not result in an + * exception. */ public static CodeSemantics createCritical() { return new CodeSemantics(true, Ordering.PARTIAL, 0); } /** - * @return new semantics with the following meaning: The code snippet may not be removed, and its order must stay invariant to - * all other code snippets. Example: A return statement. + * @return new semantics with the following meaning: The code snippet may not be removed, and its order must stay + * invariant to all other code snippets. Example: A return statement. */ public static CodeSemantics createControl() { return new CodeSemantics(true, Ordering.FULL, 0); } /** - * @return new semantics with the following meaning: The code snippet may not be removed, and its order must stay invariant to - * all other code snippets, which also begins a bidirectional block. Example: The beginning of a while loop. + * @return new semantics with the following meaning: The code snippet may not be removed, and its order must stay + * invariant to all other code snippets, which also begins a bidirectional block. Example: The beginning of a while + * loop. */ public static CodeSemantics createLoopBegin() { return new CodeSemantics(true, Ordering.FULL, 1); } /** - * @return new semantics with the following meaning: The code snippet may not be removed, and its order must stay invariant to - * all other code snippets, which also ends a bidirectional block. Example: The end of a while loop. + * @return new semantics with the following meaning: The code snippet may not be removed, and its order must stay + * invariant to all other code snippets, which also ends a bidirectional block. Example: The end of a while loop. */ public static CodeSemantics createLoopEnd() { return new CodeSemantics(true, Ordering.FULL, -1); From 9ff98ff18a73f32cc6b9eba1b13d082ebb1e2dea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Mon, 6 Mar 2023 13:16:24 +0100 Subject: [PATCH 038/132] Track global variables, fix bug in tracking of member variables --- .../de/jplag/semantics/CodeSemantics.java | 11 +- .../de/jplag/semantics/VariableRegistry.java | 100 ++++++++++-------- .../java/TokenGeneratingTreeScanner.java | 7 +- 3 files changed, 62 insertions(+), 56 deletions(-) diff --git a/language-api/src/main/java/de/jplag/semantics/CodeSemantics.java b/language-api/src/main/java/de/jplag/semantics/CodeSemantics.java index 199c69c2e..c7f248044 100644 --- a/language-api/src/main/java/de/jplag/semantics/CodeSemantics.java +++ b/language-api/src/main/java/de/jplag/semantics/CodeSemantics.java @@ -12,7 +12,7 @@ public class CodeSemantics { private boolean keep; - private Ordering ordering; + private final Ordering ordering; private final int bidirectionalBlockDepthChange; private Set reads; private Set writes; @@ -104,15 +104,6 @@ public void markKeep() { keep = true; } - /** - * Mark this code snippet as having partial ordering. - */ - void markPartialOrdering() { - if (Ordering.PARTIAL.isStrongerThan(ordering)) { - ordering = Ordering.PARTIAL; - } - } - /** * @return the change this code snippet causes in the depth of bidirectional loops. */ diff --git a/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java b/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java index 868b67833..e8c3d01d7 100644 --- a/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java +++ b/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java @@ -1,26 +1,29 @@ package de.jplag.semantics; +import java.util.Deque; import java.util.HashMap; import java.util.HashSet; +import java.util.LinkedList; import java.util.Map; import java.util.Set; -import java.util.Stack; /** * Helper class to assist in generating token semantics. For languages similar in structure to Java/C */ public class VariableRegistry { - private Map memberVariables; // map member variable name to variable - private Map> localVariables; // map local variable name to variable - private Stack> localVariablesByScope; // stack of local variable names in scope + private Map globalVariables; // map global variable name to variable + private Deque> memberVariables; // map member variable name to stack of variables + private Map> localVariables; // map local variable name to stack of variables + private Deque> localVariablesByScope; // stack of local variable names in scope private NextOperation nextOperation; private boolean ignoreNextOperation; private boolean mutableWrite; public VariableRegistry() { - this.memberVariables = new HashMap<>(); + this.globalVariables = new HashMap<>(); + this.memberVariables = new LinkedList<>(); this.localVariables = new HashMap<>(); - this.localVariablesByScope = new Stack<>(); + this.localVariablesByScope = new LinkedList<>(); this.nextOperation = NextOperation.READ; // the default this.ignoreNextOperation = false; this.mutableWrite = false; @@ -43,69 +46,80 @@ public boolean inLocalScope() { } private Variable getMemberVariable(String variableName) { - return memberVariables.get(variableName); + Map currentMemberVariables = memberVariables.peek(); + return currentMemberVariables != null ? memberVariables.getLast().get(variableName) : null; } private Variable getVariable(String variableName) { - Stack variableIdStack = localVariables.get(variableName); - if (variableIdStack != null) { - return variableIdStack.peek(); - } - return getMemberVariable(variableName); + // get local variable if exists + Deque variableIdStack = localVariables.get(variableName); + if (variableIdStack != null) + return variableIdStack.getLast(); + // get member variable if exists + Variable variable = getMemberVariable(variableName); + if (variable != null) + return variable; + // if (nextOperation.isWrite) System.err.println(variableName); <- can uncover bugs + // get global variable, register if it doesn't exist + variable = globalVariables.get(variableName); + if (variable != null) + return variable; + variable = new Variable(variableName, false, true); + globalVariables.put(variableName, variable); + return variable; } public void registerMemberVariable(String variableName, boolean mutable) { Variable variable = new Variable(variableName, true, mutable); - memberVariables.put(variableName, variable); + memberVariables.getLast().put(variableName, variable); } public void registerLocalVariable(String variableName, boolean mutable) { Variable variable = new Variable(variableName, false, mutable); - localVariables.putIfAbsent(variableName, new Stack<>()); - localVariables.get(variableName).push(variable); - localVariablesByScope.peek().add(variableName); + localVariables.putIfAbsent(variableName, new LinkedList<>()); + localVariables.get(variableName).addLast(variable); + localVariablesByScope.getLast().add(variableName); } - public void addAllMemberVariablesAsReads(CodeSemantics semantics) { - for (Variable memberVar : memberVariables.values()) { - semantics.addRead(memberVar); - } + public void addAllNonLocalVariablesAsReads(CodeSemantics semantics) { + Set nonLocalVariables = new HashSet<>(globalVariables.values()); + for (Map classMemberVariables: memberVariables) + nonLocalVariables.addAll(classMemberVariables.values()); + for (Variable variable : nonLocalVariables) + semantics.addRead(variable); + } + + public void enterClass() { + memberVariables.addLast(new HashMap<>()); } - public void clearMemberVariables() { - memberVariables.clear(); + public void exitClass() { + memberVariables.removeLast(); } public void registerVariableOperation(String variableName, boolean isOwnMember, CodeSemantics semantics) { - if (!ignoreNextOperation) { - Variable variable = isOwnMember ? getMemberVariable(variableName) : getVariable(variableName); - if (variable != null) { - if (nextOperation.isRead) { - semantics.addRead(variable); - } - if (nextOperation.isWrite || (mutableWrite && variable.isMutable())) { - semantics.addWrite(variable); - } - } else if (nextOperation.isWrite) { - semantics.markKeep(); // non-registered variable is written to -> keep! - semantics.markPartialOrdering(); // could have an effect on other such writes and method calls - } - nextOperation = NextOperation.READ; + if (ignoreNextOperation) { + ignoreNextOperation = false; + return; } - ignoreNextOperation = false; + Variable variable = isOwnMember ? getMemberVariable(variableName) : getVariable(variableName); + if (nextOperation.isRead) + semantics.addRead(variable); + if (nextOperation.isWrite || (mutableWrite && variable.isMutable())) + semantics.addWrite(variable); + nextOperation = NextOperation.READ; } public void enterLocalScope() { - localVariablesByScope.add(new HashSet<>()); + localVariablesByScope.addLast(new HashSet<>()); } public void exitLocalScope() { - for (String variableName : localVariablesByScope.pop()) { - Stack variableStack = localVariables.get(variableName); - variableStack.pop(); - if (variableStack.isEmpty()) { + for (String variableName : localVariablesByScope.removeLast()) { + Deque variableStack = localVariables.get(variableName); + variableStack.removeLast(); + if (variableStack.isEmpty()) localVariables.remove(variableName); - } } } } diff --git a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java index cf9f2464d..0706f4580 100644 --- a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java +++ b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java @@ -128,6 +128,7 @@ public Void visitBlock(BlockTree node, CodeSemantics semantics) { @Override public Void visitClass(ClassTree node, CodeSemantics semantics) { + variableRegistry.enterClass(); for (var member : node.getMembers()) { if (member.getKind() == Tree.Kind.VARIABLE) { VariableTree variableTree = (VariableTree) member; @@ -170,7 +171,7 @@ public Void visitClass(ClassTree node, CodeSemantics semantics) { semantics = CodeSemantics.createControl(); addToken(tokenType, end, 1, semantics); } - variableRegistry.clearMemberVariables(); + variableRegistry.exitClass(); return null; } @@ -207,7 +208,7 @@ public Void visitMethod(MethodTree node, CodeSemantics semantics) { scan(node.getThrows(), semantics); scan(node.getBody(), null); semantics = CodeSemantics.createControl(); - variableRegistry.addAllMemberVariablesAsReads(semantics); + variableRegistry.addAllNonLocalVariablesAsReads(semantics); addToken(JavaTokenType.J_METHOD_END, end, 1, semantics); variableRegistry.exitLocalScope(); return null; @@ -537,7 +538,7 @@ public Void visitConditionalExpression(ConditionalExpressionTree node, CodeSeman public Void visitMethodInvocation(MethodInvocationTree node, CodeSemantics semantics) { long start = positions.getStartPosition(ast, node); semantics = CodeSemantics.createControl(); - variableRegistry.addAllMemberVariablesAsReads(semantics); + variableRegistry.addAllNonLocalVariablesAsReads(semantics); addToken(JavaTokenType.J_APPLY, start, positions.getEndPosition(ast, node.getMethodSelect()) - start, semantics); scan(node.getTypeArguments(), semantics); // differentiate bar() and this.bar() (ignore) from bar.foo() (don't ignore) From 376acdb19b0431dde165a742c2efc31fdf541849 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Mon, 6 Mar 2023 21:41:42 +0100 Subject: [PATCH 039/132] Untrack global variables, track unscoped variables instead --- .../de/jplag/semantics/CodeSemantics.java | 9 +++- .../main/java/de/jplag/semantics/Scope.java | 7 +++ .../java/de/jplag/semantics/Variable.java | 8 +-- .../de/jplag/semantics/VariableRegistry.java | 49 ++++++++++--------- .../java/TokenGeneratingTreeScanner.java | 6 ++- 5 files changed, 50 insertions(+), 29 deletions(-) create mode 100644 language-api/src/main/java/de/jplag/semantics/Scope.java diff --git a/language-api/src/main/java/de/jplag/semantics/CodeSemantics.java b/language-api/src/main/java/de/jplag/semantics/CodeSemantics.java index c7f248044..f2702b617 100644 --- a/language-api/src/main/java/de/jplag/semantics/CodeSemantics.java +++ b/language-api/src/main/java/de/jplag/semantics/CodeSemantics.java @@ -12,7 +12,7 @@ public class CodeSemantics { private boolean keep; - private final Ordering ordering; + private Ordering ordering; private final int bidirectionalBlockDepthChange; private Set reads; private Set writes; @@ -125,6 +125,13 @@ public boolean isFullOrdering() { return ordering == Ordering.FULL; } + /** + * Mark this code snippet as having to be fully ordered with regards to all other code snippets. + */ + public void markFullOrdering() { + ordering = Ordering.FULL; + } + /** * @return an unmodifiable set of the variables which were (potentially) read from in this code snippet. */ diff --git a/language-api/src/main/java/de/jplag/semantics/Scope.java b/language-api/src/main/java/de/jplag/semantics/Scope.java new file mode 100644 index 000000000..760c42ff1 --- /dev/null +++ b/language-api/src/main/java/de/jplag/semantics/Scope.java @@ -0,0 +1,7 @@ +package de.jplag.semantics; + +public enum Scope { + FILE, + CLASS, + LOCAL +} diff --git a/language-api/src/main/java/de/jplag/semantics/Variable.java b/language-api/src/main/java/de/jplag/semantics/Variable.java index d41eb49be..f41d63fb9 100644 --- a/language-api/src/main/java/de/jplag/semantics/Variable.java +++ b/language-api/src/main/java/de/jplag/semantics/Variable.java @@ -5,12 +5,12 @@ */ public class Variable { private final String name; - private final boolean isOwnMember; + private final Scope scope; private final boolean isMutable; - Variable(String name, boolean isOwnMember, boolean isMutable) { + Variable(String name, Scope scope, boolean isMutable) { this.name = name; - this.isOwnMember = isOwnMember; + this.scope = scope; this.isMutable = isMutable; } @@ -20,6 +20,6 @@ boolean isMutable() { @Override public String toString() { - return (isOwnMember ? "this." : "") + name + (isMutable ? "*" : ""); + return name + (isMutable ? "*" : "") + " [scope: " + scope.name().toLowerCase() + "]"; } } diff --git a/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java b/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java index e8c3d01d7..f9c6fd65b 100644 --- a/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java +++ b/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java @@ -11,7 +11,7 @@ * Helper class to assist in generating token semantics. For languages similar in structure to Java/C */ public class VariableRegistry { - private Map globalVariables; // map global variable name to variable + private Map unscopedVariables; private Deque> memberVariables; // map member variable name to stack of variables private Map> localVariables; // map local variable name to stack of variables private Deque> localVariablesByScope; // stack of local variable names in scope @@ -20,7 +20,7 @@ public class VariableRegistry { private boolean mutableWrite; public VariableRegistry() { - this.globalVariables = new HashMap<>(); + this.unscopedVariables = new HashMap<>(); this.memberVariables = new LinkedList<>(); this.localVariables = new HashMap<>(); this.localVariablesByScope = new LinkedList<>(); @@ -51,15 +51,13 @@ private Variable getMemberVariable(String variableName) { } private Variable getVariable(String variableName) { - // get local variable if exists Deque variableIdStack = localVariables.get(variableName); if (variableIdStack != null) return variableIdStack.getLast(); - // get member variable if exists Variable variable = getMemberVariable(variableName); - if (variable != null) - return variable; - // if (nextOperation.isWrite) System.err.println(variableName); <- can uncover bugs + return variable != null ? variable : unscopedVariables.get(variableName); + /* todo track global variables -> hard, how to differentiate SomeClass.staticAttr++ from String.join(...) + // problem here: all String.joins (for example) are registered as writes to String // get global variable, register if it doesn't exist variable = globalVariables.get(variableName); if (variable != null) @@ -67,22 +65,24 @@ private Variable getVariable(String variableName) { variable = new Variable(variableName, false, true); globalVariables.put(variableName, variable); return variable; + */ } - public void registerMemberVariable(String variableName, boolean mutable) { - Variable variable = new Variable(variableName, true, mutable); - memberVariables.getLast().put(variableName, variable); - } - - public void registerLocalVariable(String variableName, boolean mutable) { - Variable variable = new Variable(variableName, false, mutable); - localVariables.putIfAbsent(variableName, new LinkedList<>()); - localVariables.get(variableName).addLast(variable); - localVariablesByScope.getLast().add(variableName); + public void registerVariable(String variableName, Scope scope, boolean mutable) { + Variable variable = new Variable(variableName, scope, mutable); + switch (scope) { + case FILE -> unscopedVariables.put(variableName, variable); + case CLASS -> memberVariables.getLast().put(variableName, variable); + case LOCAL -> { + localVariables.putIfAbsent(variableName, new LinkedList<>()); + localVariables.get(variableName).addLast(variable); + localVariablesByScope.getLast().add(variableName); + } + } } public void addAllNonLocalVariablesAsReads(CodeSemantics semantics) { - Set nonLocalVariables = new HashSet<>(globalVariables.values()); + Set nonLocalVariables = new HashSet<>(); for (Map classMemberVariables: memberVariables) nonLocalVariables.addAll(classMemberVariables.values()); for (Variable variable : nonLocalVariables) @@ -103,10 +103,15 @@ public void registerVariableOperation(String variableName, boolean isOwnMember, return; } Variable variable = isOwnMember ? getMemberVariable(variableName) : getVariable(variableName); - if (nextOperation.isRead) - semantics.addRead(variable); - if (nextOperation.isWrite || (mutableWrite && variable.isMutable())) - semantics.addWrite(variable); + if (variable != null) { + if (nextOperation.isRead) + semantics.addRead(variable); + if (nextOperation.isWrite || (mutableWrite && variable.isMutable())) + semantics.addWrite(variable); + } else if (nextOperation.isWrite || mutableWrite) { + semantics.markKeep(); + semantics.markFullOrdering(); // since we don't track reads... + } nextOperation = NextOperation.READ; } diff --git a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java index 0706f4580..5be49faeb 100644 --- a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java +++ b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java @@ -10,6 +10,7 @@ import de.jplag.TokenType; import de.jplag.semantics.CodeSemantics; import de.jplag.semantics.NextOperation; +import de.jplag.semantics.Scope; import de.jplag.semantics.VariableRegistry; import com.sun.source.tree.AnnotationTree; @@ -128,13 +129,14 @@ public Void visitBlock(BlockTree node, CodeSemantics semantics) { @Override public Void visitClass(ClassTree node, CodeSemantics semantics) { + variableRegistry.registerVariable(node.getSimpleName().toString(), Scope.FILE, true); variableRegistry.enterClass(); for (var member : node.getMembers()) { if (member.getKind() == Tree.Kind.VARIABLE) { VariableTree variableTree = (VariableTree) member; String name = variableTree.getName().toString(); boolean mutable = isMutable(variableTree.getType()); - variableRegistry.registerMemberVariable(name, mutable); + variableRegistry.registerVariable(name, Scope.CLASS, mutable); } } @@ -512,7 +514,7 @@ public Void visitVariable(VariableTree node, CodeSemantics semantics) { boolean inLocalScope = variableRegistry.inLocalScope(); if (inLocalScope) { boolean mutable = isMutable(node.getType()); - variableRegistry.registerLocalVariable(name, mutable); + variableRegistry.registerVariable(name, Scope.LOCAL, mutable); semantics = new CodeSemantics(); } else { semantics = CodeSemantics.createKeep(); From 8d0dc9bfaf702f88d1dd93b741442bd008eba678 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Wed, 15 Mar 2023 17:41:02 +0100 Subject: [PATCH 040/132] Fix rare edge case regarding reverse data dependencies --- .../normalization/NormalizationGraph.java | 10 ++--- .../NormalizationGraphConstructor.java | 40 ++++++++++++------- 2 files changed, 30 insertions(+), 20 deletions(-) diff --git a/core/src/main/java/de/jplag/normalization/NormalizationGraph.java b/core/src/main/java/de/jplag/normalization/NormalizationGraph.java index 4b6f18512..379efd1c6 100644 --- a/core/src/main/java/de/jplag/normalization/NormalizationGraph.java +++ b/core/src/main/java/de/jplag/normalization/NormalizationGraph.java @@ -32,10 +32,10 @@ public List linearize() { if (tokenLine.semantics().keep()) { tokens.addAll(tokenLine.tokens()); } - for (TokenLine successorGroup : Graphs.successorListOf(graph, tokenLine)) { - graph.removeEdge(tokenLine, successorGroup); - if (!Graphs.vertexHasPredecessors(graph, successorGroup)) { - newRoots.add(successorGroup); + for (TokenLine succ : Graphs.successorListOf(graph, tokenLine)) { + graph.removeEdge(tokenLine, succ); + if (!Graphs.vertexHasPredecessors(graph, succ)) { + newRoots.add(succ); } } } while (!roots.isEmpty()); @@ -49,7 +49,7 @@ private void spreadKeep() { .filter(tl -> tl.semantics().keep()).toList()); while (!visit.isEmpty()) { TokenLine current = visit.pop(); - for (TokenLine pred : Graphs.predecessorListOf(graph, current)) { // performance of iteration? + for (TokenLine pred : Graphs.predecessorListOf(graph, current)) { // performance of iteration? if (!pred.semantics().keep() && graph.getEdge(pred, current).isData()) { pred.markKeep(); visit.add(pred); diff --git a/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java b/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java index 32d0a38b8..80ce92f7f 100644 --- a/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java +++ b/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java @@ -2,9 +2,11 @@ import java.util.Collection; import java.util.HashMap; +import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.Set; import org.jgrapht.graph.SimpleDirectedGraph; @@ -19,6 +21,7 @@ class NormalizationGraphConstructor { private TokenLine lastPartialOrdering; private Map> variableReads; private Map> variableWrites; + private Set inCurrentBidirectionalBlock; private TokenLine current; NormalizationGraphConstructor(List tokens) { @@ -27,6 +30,7 @@ class NormalizationGraphConstructor { fullOrderingIngoing = new LinkedList<>(); variableReads = new HashMap<>(); variableWrites = new HashMap<>(); + inCurrentBidirectionalBlock = new HashSet<>(); TokenLineBuilder currentLine = new TokenLineBuilder(tokens.get(0).getLine()); for (Token token : tokens) { if (token.getLine() != currentLine.lineNumber()) { @@ -45,7 +49,7 @@ SimpleDirectedGraph get() { private void addTokenLine(TokenLine tokenLine) { graph.addVertex(tokenLine); this.current = tokenLine; - bidirectionalBlockDepth += tokenLine.semantics().bidirectionalBlockDepthChange(); + processBidirectionalBlock(); processFullOrdering(); processPartialOrdering(); processReads(); @@ -56,13 +60,22 @@ private void addTokenLine(TokenLine tokenLine) { addVariableToMap(variableWrites, variable); } + private void processBidirectionalBlock() { + bidirectionalBlockDepth += current.semantics().bidirectionalBlockDepthChange(); + if (bidirectionalBlockDepth > 0) + inCurrentBidirectionalBlock.add(current); + else + inCurrentBidirectionalBlock.clear(); + } + private void processFullOrdering() { if (current.semantics().isFullOrdering()) { - addCurrentEdges(fullOrderingIngoing, DependencyType.ORDERING_FULL, null); // ingoing edges + for (TokenLine node: fullOrderingIngoing) + addCurrentEdge(node, DependencyType.ORDERING_FULL, null); fullOrderingIngoing.clear(); lastFullOrdering = current; } else if (lastFullOrdering != null) { - addCurrentEdge(lastFullOrdering, DependencyType.ORDERING_FULL, null); // outgoing edges + addCurrentEdge(lastFullOrdering, DependencyType.ORDERING_FULL, null); } fullOrderingIngoing.add(current); } @@ -78,27 +91,24 @@ private void processPartialOrdering() { private void processReads() { for (Variable variable : current.semantics().reads()) { - addCurrentEdgesByVariable(variableWrites, variable, DependencyType.VARIABLE_DATA); + for (TokenLine node : variableWrites.getOrDefault(variable, Set.of())) + addCurrentEdge(node, DependencyType.VARIABLE_DATA, variable); } } private void processWrites() { - DependencyType readToWriteDependencyType = bidirectionalBlockDepth > 0 ? DependencyType.VARIABLE_REVERSE_DATA : DependencyType.VARIABLE_ORDER; for (Variable variable : current.semantics().writes()) { - addCurrentEdgesByVariable(variableWrites, variable, DependencyType.VARIABLE_ORDER); - addCurrentEdgesByVariable(variableReads, variable, readToWriteDependencyType); + for (TokenLine node : variableWrites.getOrDefault(variable, Set.of())) + addCurrentEdge(node, DependencyType.VARIABLE_ORDER, variable); + for (TokenLine node : variableReads.getOrDefault(variable, Set.of())) { + DependencyType dependencyType = inCurrentBidirectionalBlock.contains(node) ? // + DependencyType.VARIABLE_REVERSE_DATA : DependencyType.VARIABLE_ORDER; + addCurrentEdge(node, dependencyType, variable); + } addVariableToMap(variableWrites, variable); } } - private void addCurrentEdgesByVariable(Map> variableMap, Variable variable, DependencyType type) { - addCurrentEdges(variableMap.getOrDefault(variable, new LinkedList<>()), type, variable); - } - - private void addCurrentEdges(Collection starts, DependencyType type, Variable cause) { - starts.forEach(s -> addCurrentEdge(s, type, cause)); - } - /** * Adds an ingoing edge to the current node. * @param start the start of the edge From 0e6702859bbdcde81190bf1ff8921eb2a0546d1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Sat, 18 Mar 2023 20:19:37 +0100 Subject: [PATCH 041/132] Rename "ordering" to "position significance" --- .../jplag/normalization/DependencyType.java | 4 +- .../NormalizationGraphConstructor.java | 48 ++++++------- .../de/jplag/semantics/CodeSemantics.java | 70 +++++++++---------- .../java/de/jplag/semantics/Ordering.java | 29 -------- .../jplag/semantics/PositionSignificance.java | 19 +++++ .../de/jplag/semantics/VariableRegistry.java | 2 +- 6 files changed, 81 insertions(+), 91 deletions(-) delete mode 100644 language-api/src/main/java/de/jplag/semantics/Ordering.java create mode 100644 language-api/src/main/java/de/jplag/semantics/PositionSignificance.java diff --git a/core/src/main/java/de/jplag/normalization/DependencyType.java b/core/src/main/java/de/jplag/normalization/DependencyType.java index 626e51666..386a5334e 100644 --- a/core/src/main/java/de/jplag/normalization/DependencyType.java +++ b/core/src/main/java/de/jplag/normalization/DependencyType.java @@ -4,6 +4,6 @@ enum DependencyType { VARIABLE_DATA, VARIABLE_REVERSE_DATA, VARIABLE_ORDER, - ORDERING_FULL, - ORDERING_PARTIAL + POSITION_SIGNIFICANCE_FULL, + POSITION_SIGNIFICANCE_PARTIAL } diff --git a/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java b/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java index 80ce92f7f..f0a9013d3 100644 --- a/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java +++ b/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java @@ -16,9 +16,9 @@ class NormalizationGraphConstructor { private SimpleDirectedGraph graph; private int bidirectionalBlockDepth; - private Collection fullOrderingIngoing; - private TokenLine lastFullOrdering; - private TokenLine lastPartialOrdering; + private Collection fullPositionalSignificanceIngoing; + private TokenLine lastFullPositionalSignificance; + private TokenLine lastPartialPositionalSignificance; private Map> variableReads; private Map> variableWrites; private Set inCurrentBidirectionalBlock; @@ -27,7 +27,7 @@ class NormalizationGraphConstructor { NormalizationGraphConstructor(List tokens) { graph = new SimpleDirectedGraph<>(Dependency.class); bidirectionalBlockDepth = 0; - fullOrderingIngoing = new LinkedList<>(); + fullPositionalSignificanceIngoing = new LinkedList<>(); variableReads = new HashMap<>(); variableWrites = new HashMap<>(); inCurrentBidirectionalBlock = new HashSet<>(); @@ -50,8 +50,8 @@ private void addTokenLine(TokenLine tokenLine) { graph.addVertex(tokenLine); this.current = tokenLine; processBidirectionalBlock(); - processFullOrdering(); - processPartialOrdering(); + processFullPositionalSignificance(); + processPartialPositionalSignificance(); processReads(); processWrites(); for (Variable variable : current.semantics().reads()) @@ -68,42 +68,42 @@ private void processBidirectionalBlock() { inCurrentBidirectionalBlock.clear(); } - private void processFullOrdering() { - if (current.semantics().isFullOrdering()) { - for (TokenLine node: fullOrderingIngoing) - addCurrentEdge(node, DependencyType.ORDERING_FULL, null); - fullOrderingIngoing.clear(); - lastFullOrdering = current; - } else if (lastFullOrdering != null) { - addCurrentEdge(lastFullOrdering, DependencyType.ORDERING_FULL, null); + private void processFullPositionalSignificance() { + if (current.semantics().hasFullPositionSignificance()) { + for (TokenLine node: fullPositionalSignificanceIngoing) + addIngoingEdgeToCurrent(node, DependencyType.POSITION_SIGNIFICANCE_FULL, null); + fullPositionalSignificanceIngoing.clear(); + lastFullPositionalSignificance = current; + } else if (lastFullPositionalSignificance != null) { + addIngoingEdgeToCurrent(lastFullPositionalSignificance, DependencyType.POSITION_SIGNIFICANCE_FULL, null); } - fullOrderingIngoing.add(current); + fullPositionalSignificanceIngoing.add(current); } - private void processPartialOrdering() { - if (current.semantics().isPartialOrdering()) { - if (lastPartialOrdering != null) { - addCurrentEdge(lastPartialOrdering, DependencyType.ORDERING_PARTIAL, null); + private void processPartialPositionalSignificance() { + if (current.semantics().hasPartialPositionSignificance()) { + if (lastPartialPositionalSignificance != null) { + addIngoingEdgeToCurrent(lastPartialPositionalSignificance, DependencyType.POSITION_SIGNIFICANCE_PARTIAL, null); } - lastPartialOrdering = current; + lastPartialPositionalSignificance = current; } } private void processReads() { for (Variable variable : current.semantics().reads()) { for (TokenLine node : variableWrites.getOrDefault(variable, Set.of())) - addCurrentEdge(node, DependencyType.VARIABLE_DATA, variable); + addIngoingEdgeToCurrent(node, DependencyType.VARIABLE_DATA, variable); } } private void processWrites() { for (Variable variable : current.semantics().writes()) { for (TokenLine node : variableWrites.getOrDefault(variable, Set.of())) - addCurrentEdge(node, DependencyType.VARIABLE_ORDER, variable); + addIngoingEdgeToCurrent(node, DependencyType.VARIABLE_ORDER, variable); for (TokenLine node : variableReads.getOrDefault(variable, Set.of())) { DependencyType dependencyType = inCurrentBidirectionalBlock.contains(node) ? // DependencyType.VARIABLE_REVERSE_DATA : DependencyType.VARIABLE_ORDER; - addCurrentEdge(node, dependencyType, variable); + addIngoingEdgeToCurrent(node, dependencyType, variable); } addVariableToMap(variableWrites, variable); } @@ -115,7 +115,7 @@ private void processWrites() { * @param type the type of the edge * @param cause the variable that caused the edge, may be null */ - private void addCurrentEdge(TokenLine start, DependencyType type, Variable cause) { + private void addIngoingEdgeToCurrent(TokenLine start, DependencyType type, Variable cause) { Dependency dependency = graph.getEdge(start, current); if (dependency == null) { dependency = new Dependency(); diff --git a/language-api/src/main/java/de/jplag/semantics/CodeSemantics.java b/language-api/src/main/java/de/jplag/semantics/CodeSemantics.java index f2702b617..660e862d0 100644 --- a/language-api/src/main/java/de/jplag/semantics/CodeSemantics.java +++ b/language-api/src/main/java/de/jplag/semantics/CodeSemantics.java @@ -12,7 +12,7 @@ public class CodeSemantics { private boolean keep; - private Ordering ordering; + private PositionSignificance positionSignificance; private final int bidirectionalBlockDepthChange; private Set reads; private Set writes; @@ -21,73 +21,73 @@ public class CodeSemantics { * Creates new semantics. reads and writes, which each contain the variables which were (potentially) read from/written * to in this code snippet, are created empty. * @param keep Whether the code snippet must be kept or if it may be removed. - * @param ordering In which way the ordering of the code snippet relative to other code snippets of the same type is - * relevant. For the possible options see {@link Ordering}. + * @param positionSignificance In which way the position of the code snippet relative to other code snippets of the same type is + * significant. For the possible options see {@link PositionSignificance}. * @param bidirectionalBlockDepthChange How the code snippet affects the depth of bidirectional blocks, meaning blocks * where any statement within it may be executed after any other. This will typically be a loop. * @param reads A set of the variables which were (potentially) read from in the code snippet. * @param writes A set of the variables which were (potentially) written to in the code snippet. */ - private CodeSemantics(boolean keep, Ordering ordering, int bidirectionalBlockDepthChange, Set reads, Set writes) { + private CodeSemantics(boolean keep, PositionSignificance positionSignificance, int bidirectionalBlockDepthChange, Set reads, Set writes) { this.keep = keep; - this.ordering = ordering; + this.positionSignificance = positionSignificance; this.bidirectionalBlockDepthChange = bidirectionalBlockDepthChange; this.reads = reads; this.writes = writes; } - private CodeSemantics(boolean keep, Ordering ordering, int bidirectionalBlockDepthChange) { - this(keep, ordering, bidirectionalBlockDepthChange, new HashSet<>(), new HashSet<>()); + private CodeSemantics(boolean keep, PositionSignificance positionSignificance, int bidirectionalBlockDepthChange) { + this(keep, positionSignificance, bidirectionalBlockDepthChange, new HashSet<>(), new HashSet<>()); } /** - * Creates new semantics with the following meaning: The code snippet may be removed, and its order relative to other + * Creates new semantics with the following meaning: The code snippet may be removed, and its position relative to other * code snippets may change. Example: An assignment to a local variable. */ public CodeSemantics() { - this(false, Ordering.NONE, 0); + this(false, PositionSignificance.NONE, 0); } /** - * @return new semantics with the following meaning: The code snippet may not be removed, and its order relative to + * @return new semantics with the following meaning: The code snippet may not be removed, and its position relative to * other code snippets may change. Example: An attribute declaration. */ public static CodeSemantics createKeep() { - return new CodeSemantics(true, Ordering.NONE, 0); + return new CodeSemantics(true, PositionSignificance.NONE, 0); } /** - * @return new semantics with the following meaning: The code snippet may not be removed, and its order must stay + * @return new semantics with the following meaning: The code snippet may not be removed, and its position must stay * invariant to other code snippets of the same type. Example: A method call which is guaranteed to not result in an * exception. */ public static CodeSemantics createCritical() { - return new CodeSemantics(true, Ordering.PARTIAL, 0); + return new CodeSemantics(true, PositionSignificance.PARTIAL, 0); } /** - * @return new semantics with the following meaning: The code snippet may not be removed, and its order must stay + * @return new semantics with the following meaning: The code snippet may not be removed, and its position must stay * invariant to all other code snippets. Example: A return statement. */ public static CodeSemantics createControl() { - return new CodeSemantics(true, Ordering.FULL, 0); + return new CodeSemantics(true, PositionSignificance.FULL, 0); } /** - * @return new semantics with the following meaning: The code snippet may not be removed, and its order must stay + * @return new semantics with the following meaning: The code snippet may not be removed, and its position must stay * invariant to all other code snippets, which also begins a bidirectional block. Example: The beginning of a while * loop. */ public static CodeSemantics createLoopBegin() { - return new CodeSemantics(true, Ordering.FULL, 1); + return new CodeSemantics(true, PositionSignificance.FULL, 1); } /** - * @return new semantics with the following meaning: The code snippet may not be removed, and its order must stay + * @return new semantics with the following meaning: The code snippet may not be removed, and its position must stay * invariant to all other code snippets, which also ends a bidirectional block. Example: The end of a while loop. */ public static CodeSemantics createLoopEnd() { - return new CodeSemantics(true, Ordering.FULL, -1); + return new CodeSemantics(true, PositionSignificance.FULL, -1); } /** @@ -112,24 +112,24 @@ public int bidirectionalBlockDepthChange() { } /** - * @return whether this code snippet has the partial ordering type. + * @return whether this code snippet has partial position significance. */ - public boolean isPartialOrdering() { - return ordering == Ordering.PARTIAL; + public boolean hasPartialPositionSignificance() { + return positionSignificance == PositionSignificance.PARTIAL; } /** - * @return whether this code snippet has the full ordering type. + * @return whether this code snippet has full position significance. */ - public boolean isFullOrdering() { - return ordering == Ordering.FULL; + public boolean hasFullPositionSignificance() { + return positionSignificance == PositionSignificance.FULL; } /** - * Mark this code snippet as having to be fully ordered with regards to all other code snippets. + * Mark this code snippet as having full position significance. */ - public void markFullOrdering() { - ordering = Ordering.FULL; + public void markFullPositionSignificance() { + positionSignificance = PositionSignificance.FULL; } /** @@ -166,7 +166,7 @@ public void addWrite(Variable variable) { * Create new joint semantics by joining a number of existing ones. It has the following properties: *
      *
    • keep is the disjunction of all keeps
    • - *
    • ordering is the strongest ordering out of all orderings
    • + *
    • position significance is the most significant
    • *
    • bidirectionalBlockRelation is the one that is not NONE out of all bidirectionalBlockRelations if it exists. It's * assumed that there is at most one. If there isn't one bidirectionalBlockRelation is NONE.
    • *
    • reads is the union of all reads
    • @@ -177,20 +177,20 @@ public void addWrite(Variable variable) { */ public static CodeSemantics join(List semanticsList) { boolean keep = false; - Ordering ordering = Ordering.NONE; + PositionSignificance positionSignificance = PositionSignificance.NONE; int bidirectionalBlockDepthChange = 0; Set reads = new HashSet<>(); Set writes = new HashSet<>(); for (CodeSemantics semantics : semanticsList) { keep = keep || semantics.keep; - if (semantics.ordering.isStrongerThan(ordering)) { - ordering = semantics.ordering; + if (semantics.positionSignificance.compareTo(positionSignificance) > 0) { + positionSignificance = semantics.positionSignificance; } bidirectionalBlockDepthChange += semantics.bidirectionalBlockDepthChange(); reads.addAll(semantics.reads); writes.addAll(semantics.writes); } - return new CodeSemantics(keep, ordering, bidirectionalBlockDepthChange, reads, writes); + return new CodeSemantics(keep, positionSignificance, bidirectionalBlockDepthChange, reads, writes); } @Override @@ -198,8 +198,8 @@ public String toString() { List properties = new LinkedList<>(); if (keep) properties.add("keep"); - if (ordering != Ordering.NONE) - properties.add(ordering.name().toLowerCase() + " ordering"); + if (positionSignificance != PositionSignificance.NONE) + properties.add(positionSignificance.name().toLowerCase() + " position significance"); if (bidirectionalBlockDepthChange != 0) properties.add("change bidirectional block depth by " + bidirectionalBlockDepthChange); if (!reads.isEmpty()) diff --git a/language-api/src/main/java/de/jplag/semantics/Ordering.java b/language-api/src/main/java/de/jplag/semantics/Ordering.java deleted file mode 100644 index b014ad025..000000000 --- a/language-api/src/main/java/de/jplag/semantics/Ordering.java +++ /dev/null @@ -1,29 +0,0 @@ -package de.jplag.semantics; - -/** - * Enumerates how the order of an item in a sequence relative to other items may be relevant. - */ -enum Ordering { - /** - * The order of the item relative to other items in the sequence is not relevant. - */ - NONE(0), - /** - * The order of the item relative to other items that also have partial ordering is relevant. - */ - PARTIAL(1), - /** - * The order of the item to all other items is relevant. - */ - FULL(2); - - private final int strength; - - Ordering(int strength) { - this.strength = strength; - } - - boolean isStrongerThan(Ordering other) { - return this.strength > other.strength; - } -} diff --git a/language-api/src/main/java/de/jplag/semantics/PositionSignificance.java b/language-api/src/main/java/de/jplag/semantics/PositionSignificance.java new file mode 100644 index 000000000..b4a6a4ccc --- /dev/null +++ b/language-api/src/main/java/de/jplag/semantics/PositionSignificance.java @@ -0,0 +1,19 @@ +package de.jplag.semantics; + +/** + * Enumerates how the position of an item in a sequence relative to other items may be significant. + */ +enum PositionSignificance { + /** + * The position of the item relative to other items is insignificant. + */ + NONE, + /** + * The position of the item relative to other items with partial position significance is significant. + */ + PARTIAL, + /** + * The position of the item to all other items is significant. + */ + FULL +} diff --git a/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java b/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java index f9c6fd65b..9c280a2d8 100644 --- a/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java +++ b/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java @@ -110,7 +110,7 @@ public void registerVariableOperation(String variableName, boolean isOwnMember, semantics.addWrite(variable); } else if (nextOperation.isWrite || mutableWrite) { semantics.markKeep(); - semantics.markFullOrdering(); // since we don't track reads... + semantics.markFullPositionSignificance(); // since we don't track reads... } nextOperation = NextOperation.READ; } From 5f635f956b0c79ec27354810c973aafa231ce775 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Sun, 19 Mar 2023 21:14:21 +0100 Subject: [PATCH 042/132] Rename "variable data" dependency type to "variable flow" --- .../de/jplag/normalization/Dependency.java | 22 +++++++++---------- .../jplag/normalization/DependencyType.java | 4 ++-- .../normalization/NormalizationGraph.java | 4 ++-- .../NormalizationGraphConstructor.java | 4 ++-- 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/core/src/main/java/de/jplag/normalization/Dependency.java b/core/src/main/java/de/jplag/normalization/Dependency.java index 41da16d22..95a880783 100644 --- a/core/src/main/java/de/jplag/normalization/Dependency.java +++ b/core/src/main/java/de/jplag/normalization/Dependency.java @@ -7,27 +7,27 @@ class Dependency { private Set items; - private boolean isData; - private boolean isReverseData; + private boolean isVariableFlow; + private boolean isVariableReverseFlow; Dependency() { items = new HashSet<>(); - isData = false; + isVariableFlow = false; } - boolean isData() { - return isData; + boolean isVariableFlow() { + return isVariableFlow; } - boolean isReverseData() { - return isReverseData; + boolean isVariableReverseFlow() { + return isVariableReverseFlow; } void addItem(DependencyType type, Variable cause) { - if (type == DependencyType.VARIABLE_DATA) - isData = true; - if (type == DependencyType.VARIABLE_REVERSE_DATA) - isReverseData = true; + if (type == DependencyType.VARIABLE_FLOW) + isVariableFlow = true; + if (type == DependencyType.VARIABLE_REVERSE_FLOW) + isVariableReverseFlow = true; items.add(new DependencyItem(type, cause)); } } diff --git a/core/src/main/java/de/jplag/normalization/DependencyType.java b/core/src/main/java/de/jplag/normalization/DependencyType.java index 386a5334e..605af009e 100644 --- a/core/src/main/java/de/jplag/normalization/DependencyType.java +++ b/core/src/main/java/de/jplag/normalization/DependencyType.java @@ -1,8 +1,8 @@ package de.jplag.normalization; enum DependencyType { - VARIABLE_DATA, - VARIABLE_REVERSE_DATA, + VARIABLE_FLOW, + VARIABLE_REVERSE_FLOW, VARIABLE_ORDER, POSITION_SIGNIFICANCE_FULL, POSITION_SIGNIFICANCE_PARTIAL diff --git a/core/src/main/java/de/jplag/normalization/NormalizationGraph.java b/core/src/main/java/de/jplag/normalization/NormalizationGraph.java index 379efd1c6..e0bdea618 100644 --- a/core/src/main/java/de/jplag/normalization/NormalizationGraph.java +++ b/core/src/main/java/de/jplag/normalization/NormalizationGraph.java @@ -50,13 +50,13 @@ private void spreadKeep() { while (!visit.isEmpty()) { TokenLine current = visit.pop(); for (TokenLine pred : Graphs.predecessorListOf(graph, current)) { // performance of iteration? - if (!pred.semantics().keep() && graph.getEdge(pred, current).isData()) { + if (!pred.semantics().keep() && graph.getEdge(pred, current).isVariableFlow()) { pred.markKeep(); visit.add(pred); } } for (TokenLine succ : Graphs.successorListOf(graph, current)) { - if (!succ.semantics().keep() && graph.getEdge(current, succ).isReverseData()) { + if (!succ.semantics().keep() && graph.getEdge(current, succ).isVariableReverseFlow()) { succ.markKeep(); visit.add(succ); } diff --git a/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java b/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java index f0a9013d3..9fb4052b1 100644 --- a/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java +++ b/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java @@ -92,7 +92,7 @@ private void processPartialPositionalSignificance() { private void processReads() { for (Variable variable : current.semantics().reads()) { for (TokenLine node : variableWrites.getOrDefault(variable, Set.of())) - addIngoingEdgeToCurrent(node, DependencyType.VARIABLE_DATA, variable); + addIngoingEdgeToCurrent(node, DependencyType.VARIABLE_FLOW, variable); } } @@ -102,7 +102,7 @@ private void processWrites() { addIngoingEdgeToCurrent(node, DependencyType.VARIABLE_ORDER, variable); for (TokenLine node : variableReads.getOrDefault(variable, Set.of())) { DependencyType dependencyType = inCurrentBidirectionalBlock.contains(node) ? // - DependencyType.VARIABLE_REVERSE_DATA : DependencyType.VARIABLE_ORDER; + DependencyType.VARIABLE_REVERSE_FLOW : DependencyType.VARIABLE_ORDER; addIngoingEdgeToCurrent(node, dependencyType, variable); } addVariableToMap(variableWrites, variable); From 3321bf2f7de5c03869e5b0d90887478e2327013a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Wed, 22 Mar 2023 18:28:33 +0100 Subject: [PATCH 043/132] Rename variable "operation" to "access" --- ...Operation.java => VariableAccessType.java} | 4 +- .../de/jplag/semantics/VariableRegistry.java | 41 ++++++++++++------- .../java/TokenGeneratingTreeScanner.java | 20 ++++----- 3 files changed, 38 insertions(+), 27 deletions(-) rename language-api/src/main/java/de/jplag/semantics/{NextOperation.java => VariableAccessType.java} (71%) diff --git a/language-api/src/main/java/de/jplag/semantics/NextOperation.java b/language-api/src/main/java/de/jplag/semantics/VariableAccessType.java similarity index 71% rename from language-api/src/main/java/de/jplag/semantics/NextOperation.java rename to language-api/src/main/java/de/jplag/semantics/VariableAccessType.java index 283ca27a5..15245916c 100644 --- a/language-api/src/main/java/de/jplag/semantics/NextOperation.java +++ b/language-api/src/main/java/de/jplag/semantics/VariableAccessType.java @@ -1,6 +1,6 @@ package de.jplag.semantics; -public enum NextOperation { +public enum VariableAccessType { READ(true, false), WRITE(false, true), READ_WRITE(true, true); @@ -8,7 +8,7 @@ public enum NextOperation { final boolean isRead; final boolean isWrite; - NextOperation(boolean isRead, boolean isWrite) { + VariableAccessType(boolean isRead, boolean isWrite) { this.isRead = isRead; this.isWrite = isWrite; } diff --git a/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java b/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java index 9c280a2d8..c5f4f6fd0 100644 --- a/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java +++ b/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java @@ -15,26 +15,37 @@ public class VariableRegistry { private Deque> memberVariables; // map member variable name to stack of variables private Map> localVariables; // map local variable name to stack of variables private Deque> localVariablesByScope; // stack of local variable names in scope - private NextOperation nextOperation; - private boolean ignoreNextOperation; + private VariableAccessType nextVariableAccessType; + private boolean ignoreNextVariableAccess; private boolean mutableWrite; + /** + * Initialize a new variable registry. + */ public VariableRegistry() { this.unscopedVariables = new HashMap<>(); this.memberVariables = new LinkedList<>(); this.localVariables = new HashMap<>(); this.localVariablesByScope = new LinkedList<>(); - this.nextOperation = NextOperation.READ; // the default - this.ignoreNextOperation = false; + this.nextVariableAccessType = VariableAccessType.READ; // the default + this.ignoreNextVariableAccess = false; this.mutableWrite = false; } - public void setNextOperation(NextOperation nextOperation) { - this.nextOperation = nextOperation; + /** + * Set the next variable acc. This only influences the very next call of registerVariableOperation. + * @param nextVariableAccessType the new value + */ + public void setNextVariableAccessType(VariableAccessType nextVariableAccessType) { + this.nextVariableAccessType = nextVariableAccessType; } - public void setIgnoreNextOperation(boolean ignoreNextOperation) { - this.ignoreNextOperation = ignoreNextOperation; + /** + * + * @param ignoreNextVariableAccess + */ + public void setIgnoreNextVariableAccess(boolean ignoreNextVariableAccess) { + this.ignoreNextVariableAccess = ignoreNextVariableAccess; } public void setMutableWrite(boolean mutableWrite) { @@ -97,22 +108,22 @@ public void exitClass() { memberVariables.removeLast(); } - public void registerVariableOperation(String variableName, boolean isOwnMember, CodeSemantics semantics) { - if (ignoreNextOperation) { - ignoreNextOperation = false; + public void registerVariableAccess(String variableName, boolean isOwnMember, CodeSemantics semantics) { + if (ignoreNextVariableAccess) { + ignoreNextVariableAccess = false; return; } Variable variable = isOwnMember ? getMemberVariable(variableName) : getVariable(variableName); if (variable != null) { - if (nextOperation.isRead) + if (nextVariableAccessType.isRead) semantics.addRead(variable); - if (nextOperation.isWrite || (mutableWrite && variable.isMutable())) + if (nextVariableAccessType.isWrite || (mutableWrite && variable.isMutable())) semantics.addWrite(variable); - } else if (nextOperation.isWrite || mutableWrite) { + } else if (nextVariableAccessType.isWrite || mutableWrite) { semantics.markKeep(); semantics.markFullPositionSignificance(); // since we don't track reads... } - nextOperation = NextOperation.READ; + nextVariableAccessType = VariableAccessType.READ; } public void enterLocalScope() { diff --git a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java index 5be49faeb..037a71f3e 100644 --- a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java +++ b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java @@ -9,7 +9,7 @@ import de.jplag.Token; import de.jplag.TokenType; import de.jplag.semantics.CodeSemantics; -import de.jplag.semantics.NextOperation; +import de.jplag.semantics.VariableAccessType; import de.jplag.semantics.Scope; import de.jplag.semantics.VariableRegistry; @@ -470,7 +470,7 @@ public Void visitAssignment(AssignmentTree node, CodeSemantics semantics) { long start = positions.getStartPosition(ast, node); semantics = new CodeSemantics(); addToken(JavaTokenType.J_ASSIGN, start, 1, semantics); - variableRegistry.setNextOperation(NextOperation.WRITE); + variableRegistry.setNextVariableAccessType(VariableAccessType.WRITE); super.visitAssignment(node, semantics); return null; } @@ -480,7 +480,7 @@ public Void visitCompoundAssignment(CompoundAssignmentTree node, CodeSemantics s long start = positions.getStartPosition(ast, node); semantics = new CodeSemantics(); addToken(JavaTokenType.J_ASSIGN, start, 1, semantics); - variableRegistry.setNextOperation(NextOperation.READ_WRITE); + variableRegistry.setNextVariableAccessType(VariableAccessType.READ_WRITE); super.visitCompoundAssignment(node, semantics); return null; } @@ -492,7 +492,7 @@ public Void visitUnary(UnaryTree node, CodeSemantics semantics) { .contains(node.getKind())) { long start = positions.getStartPosition(ast, node); addToken(JavaTokenType.J_ASSIGN, start, 1, semantics); - variableRegistry.setNextOperation(NextOperation.READ_WRITE); + variableRegistry.setNextVariableAccessType(VariableAccessType.READ_WRITE); } super.visitUnary(node, semantics); return null; @@ -519,9 +519,9 @@ public Void visitVariable(VariableTree node, CodeSemantics semantics) { } else { semantics = CodeSemantics.createKeep(); } - variableRegistry.setNextOperation(NextOperation.WRITE); + variableRegistry.setNextVariableAccessType(VariableAccessType.WRITE); // manually add variable to semantics since identifier isn't visited - variableRegistry.registerVariableOperation(name, !inLocalScope, semantics); + variableRegistry.registerVariableAccess(name, !inLocalScope, semantics); addToken(JavaTokenType.J_VARDEF, start, node.toString().length(), semantics); super.visitVariable(node, semantics); return null; @@ -545,7 +545,7 @@ public Void visitMethodInvocation(MethodInvocationTree node, CodeSemantics seman scan(node.getTypeArguments(), semantics); // differentiate bar() and this.bar() (ignore) from bar.foo() (don't ignore) // look at cases foo.bar()++ and foo().bar++ - variableRegistry.setIgnoreNextOperation(true); + variableRegistry.setIgnoreNextVariableAccess(true); variableRegistry.setMutableWrite(true); scan(node.getMethodSelect(), semantics); // foo.bar() is a write to foo scan(node.getArguments(), semantics); // foo(bar) is a write to bar @@ -631,16 +631,16 @@ public Void visitDefaultCaseLabel(DefaultCaseLabelTree node, CodeSemantics seman @Override public Void visitMemberSelect(MemberSelectTree node, CodeSemantics semantics) { if (node.getExpression().toString().equals("this")) { - variableRegistry.registerVariableOperation(node.getIdentifier().toString(), true, semantics); + variableRegistry.registerVariableAccess(node.getIdentifier().toString(), true, semantics); } - variableRegistry.setIgnoreNextOperation(false); // don't ignore the foo in foo.bar() + variableRegistry.setIgnoreNextVariableAccess(false); // don't ignore the foo in foo.bar() super.visitMemberSelect(node, semantics); return null; } @Override public Void visitIdentifier(IdentifierTree node, CodeSemantics semantics) { - variableRegistry.registerVariableOperation(node.toString(), false, semantics); + variableRegistry.registerVariableAccess(node.toString(), false, semantics); super.visitIdentifier(node, semantics); return null; } From 1d756c93b1fe994763e637ccf5ff9dcd3d623ef4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Wed, 22 Mar 2023 22:56:47 +0100 Subject: [PATCH 044/132] Clean up variable registry --- .../de/jplag/semantics/VariableRegistry.java | 108 +++++++++--------- 1 file changed, 54 insertions(+), 54 deletions(-) diff --git a/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java b/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java index c5f4f6fd0..fef1ccdf2 100644 --- a/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java +++ b/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java @@ -11,8 +11,8 @@ * Helper class to assist in generating token semantics. For languages similar in structure to Java/C */ public class VariableRegistry { - private Map unscopedVariables; - private Deque> memberVariables; // map member variable name to stack of variables + private Map fileVariables; + private Deque> classVariables; // map class name to map of variable names to variables private Map> localVariables; // map local variable name to stack of variables private Deque> localVariablesByScope; // stack of local variable names in scope private VariableAccessType nextVariableAccessType; @@ -23,8 +23,8 @@ public class VariableRegistry { * Initialize a new variable registry. */ public VariableRegistry() { - this.unscopedVariables = new HashMap<>(); - this.memberVariables = new LinkedList<>(); + this.fileVariables = new HashMap<>(); + this.classVariables = new LinkedList<>(); this.localVariables = new HashMap<>(); this.localVariablesByScope = new LinkedList<>(); this.nextVariableAccessType = VariableAccessType.READ; // the default @@ -32,6 +32,10 @@ public VariableRegistry() { this.mutableWrite = false; } + public boolean inLocalScope() { + return !localVariablesByScope.isEmpty(); + } + /** * Set the next variable acc. This only influences the very next call of registerVariableOperation. * @param nextVariableAccessType the new value @@ -52,38 +56,32 @@ public void setMutableWrite(boolean mutableWrite) { this.mutableWrite = mutableWrite; } - public boolean inLocalScope() { - return !localVariablesByScope.isEmpty(); + public void enterClass() { + classVariables.addLast(new HashMap<>()); } - private Variable getMemberVariable(String variableName) { - Map currentMemberVariables = memberVariables.peek(); - return currentMemberVariables != null ? memberVariables.getLast().get(variableName) : null; + public void exitClass() { + classVariables.removeLast(); } - private Variable getVariable(String variableName) { - Deque variableIdStack = localVariables.get(variableName); - if (variableIdStack != null) - return variableIdStack.getLast(); - Variable variable = getMemberVariable(variableName); - return variable != null ? variable : unscopedVariables.get(variableName); - /* todo track global variables -> hard, how to differentiate SomeClass.staticAttr++ from String.join(...) - // problem here: all String.joins (for example) are registered as writes to String - // get global variable, register if it doesn't exist - variable = globalVariables.get(variableName); - if (variable != null) - return variable; - variable = new Variable(variableName, false, true); - globalVariables.put(variableName, variable); - return variable; - */ + public void enterLocalScope() { + localVariablesByScope.addLast(new HashSet<>()); + } + + public void exitLocalScope() { + for (String variableName : localVariablesByScope.removeLast()) { + Deque variableStack = localVariables.get(variableName); + variableStack.removeLast(); + if (variableStack.isEmpty()) + localVariables.remove(variableName); + } } public void registerVariable(String variableName, Scope scope, boolean mutable) { Variable variable = new Variable(variableName, scope, mutable); switch (scope) { - case FILE -> unscopedVariables.put(variableName, variable); - case CLASS -> memberVariables.getLast().put(variableName, variable); + case FILE -> fileVariables.put(variableName, variable); + case CLASS -> classVariables.getLast().put(variableName, variable); case LOCAL -> { localVariables.putIfAbsent(variableName, new LinkedList<>()); localVariables.get(variableName).addLast(variable); @@ -92,28 +90,12 @@ public void registerVariable(String variableName, Scope scope, boolean mutable) } } - public void addAllNonLocalVariablesAsReads(CodeSemantics semantics) { - Set nonLocalVariables = new HashSet<>(); - for (Map classMemberVariables: memberVariables) - nonLocalVariables.addAll(classMemberVariables.values()); - for (Variable variable : nonLocalVariables) - semantics.addRead(variable); - } - - public void enterClass() { - memberVariables.addLast(new HashMap<>()); - } - - public void exitClass() { - memberVariables.removeLast(); - } - - public void registerVariableAccess(String variableName, boolean isOwnMember, CodeSemantics semantics) { + public void registerVariableAccess(String variableName, boolean isClassVariable, CodeSemantics semantics) { if (ignoreNextVariableAccess) { ignoreNextVariableAccess = false; return; } - Variable variable = isOwnMember ? getMemberVariable(variableName) : getVariable(variableName); + Variable variable = isClassVariable ? getClassVariable(variableName) : getVariable(variableName); if (variable != null) { if (nextVariableAccessType.isRead) semantics.addRead(variable); @@ -126,16 +108,34 @@ public void registerVariableAccess(String variableName, boolean isOwnMember, Cod nextVariableAccessType = VariableAccessType.READ; } - public void enterLocalScope() { - localVariablesByScope.addLast(new HashSet<>()); + public void addAllNonLocalVariablesAsReads(CodeSemantics semantics) { + Set nonLocalVariables = new HashSet<>(fileVariables.values()); + for (Map specificClassVariables: classVariables) + nonLocalVariables.addAll(specificClassVariables.values()); + for (Variable variable : nonLocalVariables) + semantics.addRead(variable); } - public void exitLocalScope() { - for (String variableName : localVariablesByScope.removeLast()) { - Deque variableStack = localVariables.get(variableName); - variableStack.removeLast(); - if (variableStack.isEmpty()) - localVariables.remove(variableName); - } + private Variable getVariable(String variableName) { + Deque variableIdStack = localVariables.get(variableName); + if (variableIdStack != null) + return variableIdStack.getLast(); + Variable variable = getClassVariable(variableName); + return variable != null ? variable : fileVariables.get(variableName); + /* todo track global variables -> hard, how to differentiate SomeClass.staticAttr++ from String.join(...) + // problem here: all String.joins (for example) are registered as writes to String + // get global variable, register if it doesn't exist + variable = globalVariables.get(variableName); + if (variable != null) + return variable; + variable = new Variable(variableName, false, true); + globalVariables.put(variableName, variable); + return variable; + */ + } + + private Variable getClassVariable(String variableName) { + Map currentClassVariables = classVariables.peek(); + return currentClassVariables != null ? classVariables.getLast().get(variableName) : null; } } From 9209ba0d4bb2d2fc2c1606d856aad34ae8316f12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Sat, 25 Mar 2023 16:33:42 +0100 Subject: [PATCH 045/132] Rename normalization graph "dependency" to "edge" --- .../{Dependency.java => Edge.java} | 14 ++++---- .../{DependencyItem.java => EdgeItem.java} | 2 +- .../{DependencyType.java => EdgeType.java} | 2 +- .../normalization/NormalizationGraph.java | 2 +- .../NormalizationGraphConstructor.java | 34 +++++++++---------- .../de/jplag/semantics/VariableRegistry.java | 9 +++-- .../java/TokenGeneratingTreeScanner.java | 1 + 7 files changed, 32 insertions(+), 32 deletions(-) rename core/src/main/java/de/jplag/normalization/{Dependency.java => Edge.java} (63%) rename core/src/main/java/de/jplag/normalization/{DependencyItem.java => EdgeItem.java} (54%) rename core/src/main/java/de/jplag/normalization/{DependencyType.java => EdgeType.java} (88%) diff --git a/core/src/main/java/de/jplag/normalization/Dependency.java b/core/src/main/java/de/jplag/normalization/Edge.java similarity index 63% rename from core/src/main/java/de/jplag/normalization/Dependency.java rename to core/src/main/java/de/jplag/normalization/Edge.java index 95a880783..106b44baf 100644 --- a/core/src/main/java/de/jplag/normalization/Dependency.java +++ b/core/src/main/java/de/jplag/normalization/Edge.java @@ -5,12 +5,12 @@ import de.jplag.semantics.Variable; -class Dependency { - private Set items; +class Edge { + private Set items; private boolean isVariableFlow; private boolean isVariableReverseFlow; - Dependency() { + Edge() { items = new HashSet<>(); isVariableFlow = false; } @@ -23,11 +23,11 @@ boolean isVariableReverseFlow() { return isVariableReverseFlow; } - void addItem(DependencyType type, Variable cause) { - if (type == DependencyType.VARIABLE_FLOW) + void addItem(EdgeType type, Variable cause) { + if (type == EdgeType.VARIABLE_FLOW) isVariableFlow = true; - if (type == DependencyType.VARIABLE_REVERSE_FLOW) + if (type == EdgeType.VARIABLE_REVERSE_FLOW) isVariableReverseFlow = true; - items.add(new DependencyItem(type, cause)); + items.add(new EdgeItem(type, cause)); } } diff --git a/core/src/main/java/de/jplag/normalization/DependencyItem.java b/core/src/main/java/de/jplag/normalization/EdgeItem.java similarity index 54% rename from core/src/main/java/de/jplag/normalization/DependencyItem.java rename to core/src/main/java/de/jplag/normalization/EdgeItem.java index 7219557ad..376839c08 100644 --- a/core/src/main/java/de/jplag/normalization/DependencyItem.java +++ b/core/src/main/java/de/jplag/normalization/EdgeItem.java @@ -2,5 +2,5 @@ import de.jplag.semantics.Variable; -record DependencyItem(DependencyType type, Variable cause) { +record EdgeItem(EdgeType type, Variable cause) { } diff --git a/core/src/main/java/de/jplag/normalization/DependencyType.java b/core/src/main/java/de/jplag/normalization/EdgeType.java similarity index 88% rename from core/src/main/java/de/jplag/normalization/DependencyType.java rename to core/src/main/java/de/jplag/normalization/EdgeType.java index 605af009e..43c5f87dd 100644 --- a/core/src/main/java/de/jplag/normalization/DependencyType.java +++ b/core/src/main/java/de/jplag/normalization/EdgeType.java @@ -1,6 +1,6 @@ package de.jplag.normalization; -enum DependencyType { +enum EdgeType { VARIABLE_FLOW, VARIABLE_REVERSE_FLOW, VARIABLE_ORDER, diff --git a/core/src/main/java/de/jplag/normalization/NormalizationGraph.java b/core/src/main/java/de/jplag/normalization/NormalizationGraph.java index e0bdea618..ab785e18a 100644 --- a/core/src/main/java/de/jplag/normalization/NormalizationGraph.java +++ b/core/src/main/java/de/jplag/normalization/NormalizationGraph.java @@ -12,7 +12,7 @@ import de.jplag.Token; public class NormalizationGraph { - private SimpleDirectedGraph graph; + private SimpleDirectedGraph graph; public NormalizationGraph(List tokens) { graph = new NormalizationGraphConstructor(tokens).get(); diff --git a/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java b/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java index 9fb4052b1..8cbdee1d9 100644 --- a/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java +++ b/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java @@ -14,7 +14,7 @@ import de.jplag.semantics.Variable; class NormalizationGraphConstructor { - private SimpleDirectedGraph graph; + private SimpleDirectedGraph graph; private int bidirectionalBlockDepth; private Collection fullPositionalSignificanceIngoing; private TokenLine lastFullPositionalSignificance; @@ -25,7 +25,7 @@ class NormalizationGraphConstructor { private TokenLine current; NormalizationGraphConstructor(List tokens) { - graph = new SimpleDirectedGraph<>(Dependency.class); + graph = new SimpleDirectedGraph<>(Edge.class); bidirectionalBlockDepth = 0; fullPositionalSignificanceIngoing = new LinkedList<>(); variableReads = new HashMap<>(); @@ -42,7 +42,7 @@ class NormalizationGraphConstructor { addTokenLine(currentLine.build()); } - SimpleDirectedGraph get() { + SimpleDirectedGraph get() { return graph; } @@ -71,11 +71,11 @@ private void processBidirectionalBlock() { private void processFullPositionalSignificance() { if (current.semantics().hasFullPositionSignificance()) { for (TokenLine node: fullPositionalSignificanceIngoing) - addIngoingEdgeToCurrent(node, DependencyType.POSITION_SIGNIFICANCE_FULL, null); + addIngoingEdgeToCurrent(node, EdgeType.POSITION_SIGNIFICANCE_FULL, null); fullPositionalSignificanceIngoing.clear(); lastFullPositionalSignificance = current; } else if (lastFullPositionalSignificance != null) { - addIngoingEdgeToCurrent(lastFullPositionalSignificance, DependencyType.POSITION_SIGNIFICANCE_FULL, null); + addIngoingEdgeToCurrent(lastFullPositionalSignificance, EdgeType.POSITION_SIGNIFICANCE_FULL, null); } fullPositionalSignificanceIngoing.add(current); } @@ -83,7 +83,7 @@ private void processFullPositionalSignificance() { private void processPartialPositionalSignificance() { if (current.semantics().hasPartialPositionSignificance()) { if (lastPartialPositionalSignificance != null) { - addIngoingEdgeToCurrent(lastPartialPositionalSignificance, DependencyType.POSITION_SIGNIFICANCE_PARTIAL, null); + addIngoingEdgeToCurrent(lastPartialPositionalSignificance, EdgeType.POSITION_SIGNIFICANCE_PARTIAL, null); } lastPartialPositionalSignificance = current; } @@ -92,18 +92,18 @@ private void processPartialPositionalSignificance() { private void processReads() { for (Variable variable : current.semantics().reads()) { for (TokenLine node : variableWrites.getOrDefault(variable, Set.of())) - addIngoingEdgeToCurrent(node, DependencyType.VARIABLE_FLOW, variable); + addIngoingEdgeToCurrent(node, EdgeType.VARIABLE_FLOW, variable); } } private void processWrites() { for (Variable variable : current.semantics().writes()) { for (TokenLine node : variableWrites.getOrDefault(variable, Set.of())) - addIngoingEdgeToCurrent(node, DependencyType.VARIABLE_ORDER, variable); + addIngoingEdgeToCurrent(node, EdgeType.VARIABLE_ORDER, variable); for (TokenLine node : variableReads.getOrDefault(variable, Set.of())) { - DependencyType dependencyType = inCurrentBidirectionalBlock.contains(node) ? // - DependencyType.VARIABLE_REVERSE_FLOW : DependencyType.VARIABLE_ORDER; - addIngoingEdgeToCurrent(node, dependencyType, variable); + EdgeType edgeType = inCurrentBidirectionalBlock.contains(node) ? // + EdgeType.VARIABLE_REVERSE_FLOW : EdgeType.VARIABLE_ORDER; + addIngoingEdgeToCurrent(node, edgeType, variable); } addVariableToMap(variableWrites, variable); } @@ -115,13 +115,13 @@ private void processWrites() { * @param type the type of the edge * @param cause the variable that caused the edge, may be null */ - private void addIngoingEdgeToCurrent(TokenLine start, DependencyType type, Variable cause) { - Dependency dependency = graph.getEdge(start, current); - if (dependency == null) { - dependency = new Dependency(); - graph.addEdge(start, current, dependency); + private void addIngoingEdgeToCurrent(TokenLine start, EdgeType type, Variable cause) { + Edge edge = graph.getEdge(start, current); + if (edge == null) { + edge = new Edge(); + graph.addEdge(start, current, edge); } - dependency.addItem(type, cause); + edge.addItem(type, cause); } private void addVariableToMap(Map> variableMap, Variable variable) { diff --git a/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java b/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java index fef1ccdf2..f033ac9d9 100644 --- a/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java +++ b/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java @@ -110,8 +110,7 @@ public void registerVariableAccess(String variableName, boolean isClassVariable, public void addAllNonLocalVariablesAsReads(CodeSemantics semantics) { Set nonLocalVariables = new HashSet<>(fileVariables.values()); - for (Map specificClassVariables: classVariables) - nonLocalVariables.addAll(specificClassVariables.values()); + nonLocalVariables.addAll(classVariables.getLast().values()); for (Variable variable : nonLocalVariables) semantics.addRead(variable); } @@ -119,7 +118,7 @@ public void addAllNonLocalVariablesAsReads(CodeSemantics semantics) { private Variable getVariable(String variableName) { Deque variableIdStack = localVariables.get(variableName); if (variableIdStack != null) - return variableIdStack.getLast(); + return variableIdStack.getLast(); // stack is never empty Variable variable = getClassVariable(variableName); return variable != null ? variable : fileVariables.get(variableName); /* todo track global variables -> hard, how to differentiate SomeClass.staticAttr++ from String.join(...) @@ -135,7 +134,7 @@ private Variable getVariable(String variableName) { } private Variable getClassVariable(String variableName) { - Map currentClassVariables = classVariables.peek(); - return currentClassVariables != null ? classVariables.getLast().get(variableName) : null; + Map currentClassVariables = classVariables.peekLast(); + return currentClassVariables != null ? currentClassVariables.get(variableName) : null; } } diff --git a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java index 037a71f3e..999139a99 100644 --- a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java +++ b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java @@ -129,6 +129,7 @@ public Void visitBlock(BlockTree node, CodeSemantics semantics) { @Override public Void visitClass(ClassTree node, CodeSemantics semantics) { + // not super accurate variableRegistry.registerVariable(node.getSimpleName().toString(), Scope.FILE, true); variableRegistry.enterClass(); for (var member : node.getMembers()) { From 650cf6f8dee18992f88f2019ad3d1032b76a3f3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Sat, 25 Mar 2023 16:56:42 +0100 Subject: [PATCH 046/132] Rename "token line" to "statement" --- .../normalization/NormalizationGraph.java | 24 +++++----- .../NormalizationGraphConstructor.java | 48 +++++++++---------- .../{TokenLine.java => Statement.java} | 6 +-- ...LineBuilder.java => StatementBuilder.java} | 8 ++-- .../de/jplag/semantics/CodeSemantics.java | 2 +- 5 files changed, 44 insertions(+), 44 deletions(-) rename core/src/main/java/de/jplag/normalization/{TokenLine.java => Statement.java} (91%) rename core/src/main/java/de/jplag/normalization/{TokenLineBuilder.java => StatementBuilder.java} (74%) diff --git a/core/src/main/java/de/jplag/normalization/NormalizationGraph.java b/core/src/main/java/de/jplag/normalization/NormalizationGraph.java index ab785e18a..2c7c545ae 100644 --- a/core/src/main/java/de/jplag/normalization/NormalizationGraph.java +++ b/core/src/main/java/de/jplag/normalization/NormalizationGraph.java @@ -12,7 +12,7 @@ import de.jplag.Token; public class NormalizationGraph { - private SimpleDirectedGraph graph; + private SimpleDirectedGraph graph; public NormalizationGraph(List tokens) { graph = new NormalizationGraphConstructor(tokens).get(); @@ -21,19 +21,19 @@ public NormalizationGraph(List tokens) { // todo java doc public List linearize() { spreadKeep(); - PriorityQueue roots = graph.vertexSet().stream() // + PriorityQueue roots = graph.vertexSet().stream() // .filter(v -> !Graphs.vertexHasPredecessors(graph, v)) // .collect(Collectors.toCollection(PriorityQueue::new)); List tokens = new LinkedList<>(); while (!roots.isEmpty()) { - PriorityQueue newRoots = new PriorityQueue<>(); + PriorityQueue newRoots = new PriorityQueue<>(); do { - TokenLine tokenLine = roots.poll(); - if (tokenLine.semantics().keep()) { - tokens.addAll(tokenLine.tokens()); + Statement statement = roots.poll(); + if (statement.semantics().keep()) { + tokens.addAll(statement.tokens()); } - for (TokenLine succ : Graphs.successorListOf(graph, tokenLine)) { - graph.removeEdge(tokenLine, succ); + for (Statement succ : Graphs.successorListOf(graph, statement)) { + graph.removeEdge(statement, succ); if (!Graphs.vertexHasPredecessors(graph, succ)) { newRoots.add(succ); } @@ -45,17 +45,17 @@ public List linearize() { } private void spreadKeep() { - Deque visit = new LinkedList<>(graph.vertexSet().stream() // + Deque visit = new LinkedList<>(graph.vertexSet().stream() // .filter(tl -> tl.semantics().keep()).toList()); while (!visit.isEmpty()) { - TokenLine current = visit.pop(); - for (TokenLine pred : Graphs.predecessorListOf(graph, current)) { // performance of iteration? + Statement current = visit.pop(); + for (Statement pred : Graphs.predecessorListOf(graph, current)) { // performance of iteration? if (!pred.semantics().keep() && graph.getEdge(pred, current).isVariableFlow()) { pred.markKeep(); visit.add(pred); } } - for (TokenLine succ : Graphs.successorListOf(graph, current)) { + for (Statement succ : Graphs.successorListOf(graph, current)) { if (!succ.semantics().keep() && graph.getEdge(current, succ).isVariableReverseFlow()) { succ.markKeep(); visit.add(succ); diff --git a/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java b/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java index 8cbdee1d9..5bba3784f 100644 --- a/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java +++ b/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java @@ -14,15 +14,15 @@ import de.jplag.semantics.Variable; class NormalizationGraphConstructor { - private SimpleDirectedGraph graph; + private SimpleDirectedGraph graph; private int bidirectionalBlockDepth; - private Collection fullPositionalSignificanceIngoing; - private TokenLine lastFullPositionalSignificance; - private TokenLine lastPartialPositionalSignificance; - private Map> variableReads; - private Map> variableWrites; - private Set inCurrentBidirectionalBlock; - private TokenLine current; + private Collection fullPositionalSignificanceIngoing; + private Statement lastFullPositionalSignificance; + private Statement lastPartialPositionalSignificance; + private Map> variableReads; + private Map> variableWrites; + private Set inCurrentBidirectionalBlock; + private Statement current; NormalizationGraphConstructor(List tokens) { graph = new SimpleDirectedGraph<>(Edge.class); @@ -31,24 +31,24 @@ class NormalizationGraphConstructor { variableReads = new HashMap<>(); variableWrites = new HashMap<>(); inCurrentBidirectionalBlock = new HashSet<>(); - TokenLineBuilder currentLine = new TokenLineBuilder(tokens.get(0).getLine()); + StatementBuilder current = new StatementBuilder(tokens.get(0).getLine()); for (Token token : tokens) { - if (token.getLine() != currentLine.lineNumber()) { - addTokenLine(currentLine.build()); - currentLine = new TokenLineBuilder(token.getLine()); + if (token.getLine() != current.lineNumber()) { + addStatement(current.build()); + current = new StatementBuilder(token.getLine()); } - currentLine.addToken(token); + current.addToken(token); } - addTokenLine(currentLine.build()); + addStatement(current.build()); } - SimpleDirectedGraph get() { + SimpleDirectedGraph get() { return graph; } - private void addTokenLine(TokenLine tokenLine) { - graph.addVertex(tokenLine); - this.current = tokenLine; + private void addStatement(Statement statement) { + graph.addVertex(statement); + this.current = statement; processBidirectionalBlock(); processFullPositionalSignificance(); processPartialPositionalSignificance(); @@ -70,7 +70,7 @@ private void processBidirectionalBlock() { private void processFullPositionalSignificance() { if (current.semantics().hasFullPositionSignificance()) { - for (TokenLine node: fullPositionalSignificanceIngoing) + for (Statement node: fullPositionalSignificanceIngoing) addIngoingEdgeToCurrent(node, EdgeType.POSITION_SIGNIFICANCE_FULL, null); fullPositionalSignificanceIngoing.clear(); lastFullPositionalSignificance = current; @@ -91,16 +91,16 @@ private void processPartialPositionalSignificance() { private void processReads() { for (Variable variable : current.semantics().reads()) { - for (TokenLine node : variableWrites.getOrDefault(variable, Set.of())) + for (Statement node : variableWrites.getOrDefault(variable, Set.of())) addIngoingEdgeToCurrent(node, EdgeType.VARIABLE_FLOW, variable); } } private void processWrites() { for (Variable variable : current.semantics().writes()) { - for (TokenLine node : variableWrites.getOrDefault(variable, Set.of())) + for (Statement node : variableWrites.getOrDefault(variable, Set.of())) addIngoingEdgeToCurrent(node, EdgeType.VARIABLE_ORDER, variable); - for (TokenLine node : variableReads.getOrDefault(variable, Set.of())) { + for (Statement node : variableReads.getOrDefault(variable, Set.of())) { EdgeType edgeType = inCurrentBidirectionalBlock.contains(node) ? // EdgeType.VARIABLE_REVERSE_FLOW : EdgeType.VARIABLE_ORDER; addIngoingEdgeToCurrent(node, edgeType, variable); @@ -115,7 +115,7 @@ private void processWrites() { * @param type the type of the edge * @param cause the variable that caused the edge, may be null */ - private void addIngoingEdgeToCurrent(TokenLine start, EdgeType type, Variable cause) { + private void addIngoingEdgeToCurrent(Statement start, EdgeType type, Variable cause) { Edge edge = graph.getEdge(start, current); if (edge == null) { edge = new Edge(); @@ -124,7 +124,7 @@ private void addIngoingEdgeToCurrent(TokenLine start, EdgeType type, Variable ca edge.addItem(type, cause); } - private void addVariableToMap(Map> variableMap, Variable variable) { + private void addVariableToMap(Map> variableMap, Variable variable) { variableMap.putIfAbsent(variable, new LinkedList<>()); variableMap.get(variable).add(current); } diff --git a/core/src/main/java/de/jplag/normalization/TokenLine.java b/core/src/main/java/de/jplag/normalization/Statement.java similarity index 91% rename from core/src/main/java/de/jplag/normalization/TokenLine.java rename to core/src/main/java/de/jplag/normalization/Statement.java index d6833b533..a6e6800ef 100644 --- a/core/src/main/java/de/jplag/normalization/TokenLine.java +++ b/core/src/main/java/de/jplag/normalization/Statement.java @@ -7,13 +7,13 @@ import de.jplag.Token; import de.jplag.semantics.CodeSemantics; -class TokenLine implements Comparable { +class Statement implements Comparable { private final List tokens; private final int lineNumber; private final CodeSemantics semantics; - TokenLine(List tokens, int lineNumber) { + Statement(List tokens, int lineNumber) { this.tokens = Collections.unmodifiableList(tokens); this.lineNumber = lineNumber; this.semantics = CodeSemantics.join(tokens.stream().map(Token::getSemantics).toList()); @@ -36,7 +36,7 @@ private int tokenOrdinal(Token token) { } @Override - public int compareTo(TokenLine other) { + public int compareTo(Statement other) { int sizeComp = Integer.compare(this.tokens.size(), other.tokens.size()); if (sizeComp != 0) return -sizeComp; // bigger size should come first diff --git a/core/src/main/java/de/jplag/normalization/TokenLineBuilder.java b/core/src/main/java/de/jplag/normalization/StatementBuilder.java similarity index 74% rename from core/src/main/java/de/jplag/normalization/TokenLineBuilder.java rename to core/src/main/java/de/jplag/normalization/StatementBuilder.java index 538d1877c..b582940f3 100644 --- a/core/src/main/java/de/jplag/normalization/TokenLineBuilder.java +++ b/core/src/main/java/de/jplag/normalization/StatementBuilder.java @@ -5,12 +5,12 @@ import de.jplag.Token; -class TokenLineBuilder { +class StatementBuilder { private List tokens; private final int lineNumber; - TokenLineBuilder(int lineNumber) { + StatementBuilder(int lineNumber) { this.lineNumber = lineNumber; this.tokens = new LinkedList<>(); } @@ -23,7 +23,7 @@ void addToken(Token token) { tokens.add(token); } - TokenLine build() { - return new TokenLine(tokens, lineNumber); + Statement build() { + return new Statement(tokens, lineNumber); } } diff --git a/language-api/src/main/java/de/jplag/semantics/CodeSemantics.java b/language-api/src/main/java/de/jplag/semantics/CodeSemantics.java index 660e862d0..70c40dc82 100644 --- a/language-api/src/main/java/de/jplag/semantics/CodeSemantics.java +++ b/language-api/src/main/java/de/jplag/semantics/CodeSemantics.java @@ -7,7 +7,7 @@ import java.util.Set; /** - * This class contains semantic information about a code snippet, in our case either a token or a line of code. + * This class contains semantic information about a code snippet, in our case either a token or a statement. */ public class CodeSemantics { From a2c500a626d2ab74b7793cee9ce9eca6779937f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Sat, 25 Mar 2023 20:18:41 +0100 Subject: [PATCH 047/132] Change "positional" to "position" in normalization graph constructor --- .../NormalizationGraphConstructor.java | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java b/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java index 5bba3784f..ffed626af 100644 --- a/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java +++ b/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java @@ -16,9 +16,9 @@ class NormalizationGraphConstructor { private SimpleDirectedGraph graph; private int bidirectionalBlockDepth; - private Collection fullPositionalSignificanceIngoing; - private Statement lastFullPositionalSignificance; - private Statement lastPartialPositionalSignificance; + private Collection fullPositionSignificanceIngoing; + private Statement lastFullPositionSignificance; + private Statement lastPartialPositionSignificance; private Map> variableReads; private Map> variableWrites; private Set inCurrentBidirectionalBlock; @@ -27,7 +27,7 @@ class NormalizationGraphConstructor { NormalizationGraphConstructor(List tokens) { graph = new SimpleDirectedGraph<>(Edge.class); bidirectionalBlockDepth = 0; - fullPositionalSignificanceIngoing = new LinkedList<>(); + fullPositionSignificanceIngoing = new LinkedList<>(); variableReads = new HashMap<>(); variableWrites = new HashMap<>(); inCurrentBidirectionalBlock = new HashSet<>(); @@ -50,8 +50,8 @@ private void addStatement(Statement statement) { graph.addVertex(statement); this.current = statement; processBidirectionalBlock(); - processFullPositionalSignificance(); - processPartialPositionalSignificance(); + processFullPositionSignificance(); + processPartialPositionSignificance(); processReads(); processWrites(); for (Variable variable : current.semantics().reads()) @@ -68,24 +68,24 @@ private void processBidirectionalBlock() { inCurrentBidirectionalBlock.clear(); } - private void processFullPositionalSignificance() { + private void processFullPositionSignificance() { if (current.semantics().hasFullPositionSignificance()) { - for (Statement node: fullPositionalSignificanceIngoing) + for (Statement node: fullPositionSignificanceIngoing) addIngoingEdgeToCurrent(node, EdgeType.POSITION_SIGNIFICANCE_FULL, null); - fullPositionalSignificanceIngoing.clear(); - lastFullPositionalSignificance = current; - } else if (lastFullPositionalSignificance != null) { - addIngoingEdgeToCurrent(lastFullPositionalSignificance, EdgeType.POSITION_SIGNIFICANCE_FULL, null); + fullPositionSignificanceIngoing.clear(); + lastFullPositionSignificance = current; + } else if (lastFullPositionSignificance != null) { + addIngoingEdgeToCurrent(lastFullPositionSignificance, EdgeType.POSITION_SIGNIFICANCE_FULL, null); } - fullPositionalSignificanceIngoing.add(current); + fullPositionSignificanceIngoing.add(current); } - private void processPartialPositionalSignificance() { + private void processPartialPositionSignificance() { if (current.semantics().hasPartialPositionSignificance()) { - if (lastPartialPositionalSignificance != null) { - addIngoingEdgeToCurrent(lastPartialPositionalSignificance, EdgeType.POSITION_SIGNIFICANCE_PARTIAL, null); + if (lastPartialPositionSignificance != null) { + addIngoingEdgeToCurrent(lastPartialPositionSignificance, EdgeType.POSITION_SIGNIFICANCE_PARTIAL, null); } - lastPartialPositionalSignificance = current; + lastPartialPositionSignificance = current; } } From 3a22f7abb77c17eb52df7f5388702d93a7ad0832 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Sat, 25 Mar 2023 20:47:32 +0100 Subject: [PATCH 048/132] Remove small redundancy from graph construction --- .../de/jplag/normalization/NormalizationGraphConstructor.java | 1 - 1 file changed, 1 deletion(-) diff --git a/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java b/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java index ffed626af..fe258d8ee 100644 --- a/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java +++ b/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java @@ -105,7 +105,6 @@ private void processWrites() { EdgeType.VARIABLE_REVERSE_FLOW : EdgeType.VARIABLE_ORDER; addIngoingEdgeToCurrent(node, edgeType, variable); } - addVariableToMap(variableWrites, variable); } } From 6365f317afea3f98f2faf5f43d18dde63c3cb2b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Sat, 25 Mar 2023 22:16:42 +0100 Subject: [PATCH 049/132] Add JavaDoc --- .../java/de/jplag/normalization/EdgeType.java | 20 ++++++ .../normalization/NormalizationGraph.java | 13 +++- .../NormalizationGraphConstructor.java | 2 +- .../de/jplag/normalization/Statement.java | 6 +- .../de/jplag/semantics/BlockRelation.java | 19 ------ .../de/jplag/semantics/CodeSemantics.java | 10 +-- .../main/java/de/jplag/semantics/Scope.java | 7 -- .../java/de/jplag/semantics/Variable.java | 4 +- .../jplag/semantics/VariableAccessType.java | 12 ++++ .../de/jplag/semantics/VariableRegistry.java | 64 ++++++++++++++----- .../de/jplag/semantics/VariableScope.java | 19 ++++++ .../java/TokenGeneratingTreeScanner.java | 9 +-- 12 files changed, 128 insertions(+), 57 deletions(-) delete mode 100644 language-api/src/main/java/de/jplag/semantics/BlockRelation.java delete mode 100644 language-api/src/main/java/de/jplag/semantics/Scope.java create mode 100644 language-api/src/main/java/de/jplag/semantics/VariableScope.java diff --git a/core/src/main/java/de/jplag/normalization/EdgeType.java b/core/src/main/java/de/jplag/normalization/EdgeType.java index 43c5f87dd..a377d97c3 100644 --- a/core/src/main/java/de/jplag/normalization/EdgeType.java +++ b/core/src/main/java/de/jplag/normalization/EdgeType.java @@ -1,9 +1,29 @@ package de.jplag.normalization; +/** + * Enum for types of edges in normalization graph. Given two statements A and B, A comes before B, there is such an edge + * between A and B if... + */ enum EdgeType { + /** + * B reads from a variable A writes. + */ VARIABLE_FLOW, + /** + * A reads from a variable B writes, and A and B are in the same bidirectional block. + */ VARIABLE_REVERSE_FLOW, + /** + * A and B access the same variable, and at least one of the two accesses is not a read. + */ VARIABLE_ORDER, + /** + * A or B have full position significance, and there is no statement C with full position significance between them. + */ POSITION_SIGNIFICANCE_FULL, + /** + * A and B have partial position significance, and there is no statement C with partial position significance between + * them. + */ POSITION_SIGNIFICANCE_PARTIAL } diff --git a/core/src/main/java/de/jplag/normalization/NormalizationGraph.java b/core/src/main/java/de/jplag/normalization/NormalizationGraph.java index 2c7c545ae..51da8b0df 100644 --- a/core/src/main/java/de/jplag/normalization/NormalizationGraph.java +++ b/core/src/main/java/de/jplag/normalization/NormalizationGraph.java @@ -14,11 +14,19 @@ public class NormalizationGraph { private SimpleDirectedGraph graph; + /** + * Construct a new normalization graph from the tokens. + * @param tokens The tokens used to construct the normalization graph. + */ public NormalizationGraph(List tokens) { graph = new NormalizationGraphConstructor(tokens).get(); } - // todo java doc + /** + * Turns this normalization graph back into a list of tokens. Tokens representing dead code have been eliminated and + * tokens representing subsequent independent statements have been put in a fixed order. + * @return the normalized list of tokens. + */ public List linearize() { spreadKeep(); PriorityQueue roots = graph.vertexSet().stream() // @@ -44,6 +52,9 @@ public List linearize() { return tokens; } + /** + * Spread keep status to every node that does not represent dead code. Nodes without keep status are later eliminated. + */ private void spreadKeep() { Deque visit = new LinkedList<>(graph.vertexSet().stream() // .filter(tl -> tl.semantics().keep()).toList()); diff --git a/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java b/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java index fe258d8ee..a59164602 100644 --- a/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java +++ b/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java @@ -70,7 +70,7 @@ private void processBidirectionalBlock() { private void processFullPositionSignificance() { if (current.semantics().hasFullPositionSignificance()) { - for (Statement node: fullPositionSignificanceIngoing) + for (Statement node : fullPositionSignificanceIngoing) addIngoingEdgeToCurrent(node, EdgeType.POSITION_SIGNIFICANCE_FULL, null); fullPositionSignificanceIngoing.clear(); lastFullPositionSignificance = current; diff --git a/core/src/main/java/de/jplag/normalization/Statement.java b/core/src/main/java/de/jplag/normalization/Statement.java index a6e6800ef..f1d1f0e2c 100644 --- a/core/src/main/java/de/jplag/normalization/Statement.java +++ b/core/src/main/java/de/jplag/normalization/Statement.java @@ -19,15 +19,15 @@ class Statement implements Comparable { this.semantics = CodeSemantics.join(tokens.stream().map(Token::getSemantics).toList()); } - public List tokens() { + List tokens() { return tokens; } - public CodeSemantics semantics() { + CodeSemantics semantics() { return semantics; } - public void markKeep() { + void markKeep() { semantics.markKeep(); } diff --git a/language-api/src/main/java/de/jplag/semantics/BlockRelation.java b/language-api/src/main/java/de/jplag/semantics/BlockRelation.java deleted file mode 100644 index 9ee12f843..000000000 --- a/language-api/src/main/java/de/jplag/semantics/BlockRelation.java +++ /dev/null @@ -1,19 +0,0 @@ -package de.jplag.semantics; - -/** - * Enumerates the relationships a code snippet can have with a code block. - */ -enum BlockRelation { - /** - * This code snippet begins the block. - */ - BEGINS_BLOCK, - /** - * This code snippet ends the block. - */ - ENDS_BLOCK, - /** - * This code snippet neither begins nor ends the block. - */ - NONE -} diff --git a/language-api/src/main/java/de/jplag/semantics/CodeSemantics.java b/language-api/src/main/java/de/jplag/semantics/CodeSemantics.java index 70c40dc82..f6c97186c 100644 --- a/language-api/src/main/java/de/jplag/semantics/CodeSemantics.java +++ b/language-api/src/main/java/de/jplag/semantics/CodeSemantics.java @@ -21,14 +21,15 @@ public class CodeSemantics { * Creates new semantics. reads and writes, which each contain the variables which were (potentially) read from/written * to in this code snippet, are created empty. * @param keep Whether the code snippet must be kept or if it may be removed. - * @param positionSignificance In which way the position of the code snippet relative to other code snippets of the same type is - * significant. For the possible options see {@link PositionSignificance}. + * @param positionSignificance In which way the position of the code snippet relative to other code snippets of the same + * type is significant. For the possible options see {@link PositionSignificance}. * @param bidirectionalBlockDepthChange How the code snippet affects the depth of bidirectional blocks, meaning blocks * where any statement within it may be executed after any other. This will typically be a loop. * @param reads A set of the variables which were (potentially) read from in the code snippet. * @param writes A set of the variables which were (potentially) written to in the code snippet. */ - private CodeSemantics(boolean keep, PositionSignificance positionSignificance, int bidirectionalBlockDepthChange, Set reads, Set writes) { + private CodeSemantics(boolean keep, PositionSignificance positionSignificance, int bidirectionalBlockDepthChange, Set reads, + Set writes) { this.keep = keep; this.positionSignificance = positionSignificance; this.bidirectionalBlockDepthChange = bidirectionalBlockDepthChange; @@ -167,8 +168,7 @@ public void addWrite(Variable variable) { *
        *
      • keep is the disjunction of all keeps
      • *
      • position significance is the most significant
      • - *
      • bidirectionalBlockRelation is the one that is not NONE out of all bidirectionalBlockRelations if it exists. It's - * assumed that there is at most one. If there isn't one bidirectionalBlockRelation is NONE.
      • + *
      • bidirectionalBlockDepthChange is the sum of all bidirectionalBlockDepthChanges
      • *
      • reads is the union of all reads
      • *
      • writes is the union of all writes
      • *
      diff --git a/language-api/src/main/java/de/jplag/semantics/Scope.java b/language-api/src/main/java/de/jplag/semantics/Scope.java deleted file mode 100644 index 760c42ff1..000000000 --- a/language-api/src/main/java/de/jplag/semantics/Scope.java +++ /dev/null @@ -1,7 +0,0 @@ -package de.jplag.semantics; - -public enum Scope { - FILE, - CLASS, - LOCAL -} diff --git a/language-api/src/main/java/de/jplag/semantics/Variable.java b/language-api/src/main/java/de/jplag/semantics/Variable.java index f41d63fb9..bf1588a93 100644 --- a/language-api/src/main/java/de/jplag/semantics/Variable.java +++ b/language-api/src/main/java/de/jplag/semantics/Variable.java @@ -5,10 +5,10 @@ */ public class Variable { private final String name; - private final Scope scope; + private final VariableScope scope; private final boolean isMutable; - Variable(String name, Scope scope, boolean isMutable) { + Variable(String name, VariableScope scope, boolean isMutable) { this.name = name; this.scope = scope; this.isMutable = isMutable; diff --git a/language-api/src/main/java/de/jplag/semantics/VariableAccessType.java b/language-api/src/main/java/de/jplag/semantics/VariableAccessType.java index 15245916c..9829050fb 100644 --- a/language-api/src/main/java/de/jplag/semantics/VariableAccessType.java +++ b/language-api/src/main/java/de/jplag/semantics/VariableAccessType.java @@ -1,8 +1,20 @@ package de.jplag.semantics; +/** + * The ways a variable can be accessed. + */ public enum VariableAccessType { + /** + * The variable is read from. + */ READ(true, false), + /** + * The variable is written to. + */ WRITE(false, true), + /** + * The variable is read from and written to. + */ READ_WRITE(true, true); final boolean isRead; diff --git a/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java b/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java index f033ac9d9..a3209cea1 100644 --- a/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java +++ b/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java @@ -8,7 +8,7 @@ import java.util.Set; /** - * Helper class to assist in generating token semantics. For languages similar in structure to Java/C + * Registry of variables to assist in generating token semantics. */ public class VariableRegistry { private Map fileVariables; @@ -32,42 +32,62 @@ public VariableRegistry() { this.mutableWrite = false; } + /** + * @return If we are currently in a local scope. + */ public boolean inLocalScope() { return !localVariablesByScope.isEmpty(); } /** - * Set the next variable acc. This only influences the very next call of registerVariableOperation. - * @param nextVariableAccessType the new value + * Set the type of the next variable access. This only influences the very next call of registerVariableOperation. + * @param nextVariableAccessType The type of the next variable access. */ public void setNextVariableAccessType(VariableAccessType nextVariableAccessType) { this.nextVariableAccessType = nextVariableAccessType; } /** - * - * @param ignoreNextVariableAccess + * Set whether the next variable access is ignored. This only influences the very next call of + * registerVariableOperation. + * @param ignoreNextVariableAccess Whether the next variable access is ignored. */ public void setIgnoreNextVariableAccess(boolean ignoreNextVariableAccess) { this.ignoreNextVariableAccess = ignoreNextVariableAccess; } + /** + * Set whether accesses to mutable variables are writes from this point on. + * @param mutableWrite Whether accesses to mutable variables are writes from this point on. + */ public void setMutableWrite(boolean mutableWrite) { this.mutableWrite = mutableWrite; } + /** + * Enter a class. + */ public void enterClass() { classVariables.addLast(new HashMap<>()); } + /** + * Exit a class. This causes all variables bound to the current class to no longer be visible. + */ public void exitClass() { classVariables.removeLast(); } + /** + * Enter a local scope. + */ public void enterLocalScope() { localVariablesByScope.addLast(new HashSet<>()); } + /** + * Exit a local scope. This causes all variables bound to the current local scope to no longer be visible. + */ public void exitLocalScope() { for (String variableName : localVariablesByScope.removeLast()) { Deque variableStack = localVariables.get(variableName); @@ -77,7 +97,13 @@ public void exitLocalScope() { } } - public void registerVariable(String variableName, Scope scope, boolean mutable) { + /** + * Register a variable. + * @param variableName The variable's name. + * @param scope The variable's scope. + * @param mutable Whether the variable is mutable. + */ + public void registerVariable(String variableName, VariableScope scope, boolean mutable) { Variable variable = new Variable(variableName, scope, mutable); switch (scope) { case FILE -> fileVariables.put(variableName, variable); @@ -90,6 +116,14 @@ public void registerVariable(String variableName, Scope scope, boolean mutable) } } + /** + * Register a variable access, more precisely: Add a variable access to a CodeSemantics instance. The type of the access + * can be set with setNextVariableAccessType. By default, its type is read. + * @param variableName The variable's name. + * @param isClassVariable Whether the variable is a class variable. This is true if a variable is qualified with the + * "this" keyword in Java, for example. + * @param semantics The CodeSemantics instance the variable access is added to. + */ public void registerVariableAccess(String variableName, boolean isClassVariable, CodeSemantics semantics) { if (ignoreNextVariableAccess) { ignoreNextVariableAccess = false; @@ -108,6 +142,10 @@ public void registerVariableAccess(String variableName, boolean isClassVariable, nextVariableAccessType = VariableAccessType.READ; } + /** + * Add all non-local visible variables as reads to the CodeSemantics instance. + * @param semantics The CodeSemantics instance. + */ public void addAllNonLocalVariablesAsReads(CodeSemantics semantics) { Set nonLocalVariables = new HashSet<>(fileVariables.values()); nonLocalVariables.addAll(classVariables.getLast().values()); @@ -121,15 +159,11 @@ private Variable getVariable(String variableName) { return variableIdStack.getLast(); // stack is never empty Variable variable = getClassVariable(variableName); return variable != null ? variable : fileVariables.get(variableName); - /* todo track global variables -> hard, how to differentiate SomeClass.staticAttr++ from String.join(...) - // problem here: all String.joins (for example) are registered as writes to String - // get global variable, register if it doesn't exist - variable = globalVariables.get(variableName); - if (variable != null) - return variable; - variable = new Variable(variableName, false, true); - globalVariables.put(variableName, variable); - return variable; + /* + * todo track global variables -> hard, how to differentiate SomeClass.staticAttr++ from String.join(...) // problem + * here: all String.joins (for example) are registered as writes to String // get global variable, register if it + * doesn't exist variable = globalVariables.get(variableName); if (variable != null) return variable; variable = new + * Variable(variableName, false, true); globalVariables.put(variableName, variable); return variable; */ } diff --git a/language-api/src/main/java/de/jplag/semantics/VariableScope.java b/language-api/src/main/java/de/jplag/semantics/VariableScope.java new file mode 100644 index 000000000..8f86407a8 --- /dev/null +++ b/language-api/src/main/java/de/jplag/semantics/VariableScope.java @@ -0,0 +1,19 @@ +package de.jplag.semantics; + +/** + * The scopes a variable can have. Scopes dictate a variable's visibility. + */ +public enum VariableScope { + /** + * The variable is visible in the entire file. + */ + FILE, + /** + * The variable is only visible in the class it was declared in. + */ + CLASS, + /** + * The variable is only visible in the local scope it was declared in. + */ + LOCAL +} diff --git a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java index 999139a99..504f70369 100644 --- a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java +++ b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java @@ -10,8 +10,8 @@ import de.jplag.TokenType; import de.jplag.semantics.CodeSemantics; import de.jplag.semantics.VariableAccessType; -import de.jplag.semantics.Scope; import de.jplag.semantics.VariableRegistry; +import de.jplag.semantics.VariableScope; import com.sun.source.tree.AnnotationTree; import com.sun.source.tree.AssertTree; @@ -130,14 +130,14 @@ public Void visitBlock(BlockTree node, CodeSemantics semantics) { @Override public Void visitClass(ClassTree node, CodeSemantics semantics) { // not super accurate - variableRegistry.registerVariable(node.getSimpleName().toString(), Scope.FILE, true); + variableRegistry.registerVariable(node.getSimpleName().toString(), VariableScope.FILE, true); variableRegistry.enterClass(); for (var member : node.getMembers()) { if (member.getKind() == Tree.Kind.VARIABLE) { VariableTree variableTree = (VariableTree) member; String name = variableTree.getName().toString(); boolean mutable = isMutable(variableTree.getType()); - variableRegistry.registerVariable(name, Scope.CLASS, mutable); + variableRegistry.registerVariable(name, VariableScope.CLASS, mutable); } } @@ -513,9 +513,10 @@ public Void visitVariable(VariableTree node, CodeSemantics semantics) { long start = positions.getStartPosition(ast, node); String name = node.getName().toString(); boolean inLocalScope = variableRegistry.inLocalScope(); + // this presents a problem when classes are declared in local scopes, which can happen in ad-hoc implementations if (inLocalScope) { boolean mutable = isMutable(node.getType()); - variableRegistry.registerVariable(name, Scope.LOCAL, mutable); + variableRegistry.registerVariable(name, VariableScope.LOCAL, mutable); semantics = new CodeSemantics(); } else { semantics = CodeSemantics.createKeep(); From bb48fa110eaf9f69f7e153efe7e8e7e87e3f4072 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Sun, 2 Apr 2023 15:27:01 +0200 Subject: [PATCH 050/132] Fix performance --- .../main/java/de/jplag/normalization/NormalizationGraph.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/src/main/java/de/jplag/normalization/NormalizationGraph.java b/core/src/main/java/de/jplag/normalization/NormalizationGraph.java index 51da8b0df..fd823b670 100644 --- a/core/src/main/java/de/jplag/normalization/NormalizationGraph.java +++ b/core/src/main/java/de/jplag/normalization/NormalizationGraph.java @@ -1,5 +1,6 @@ package de.jplag.normalization; +import java.util.ArrayList; import java.util.Deque; import java.util.LinkedList; import java.util.List; @@ -32,7 +33,7 @@ public List linearize() { PriorityQueue roots = graph.vertexSet().stream() // .filter(v -> !Graphs.vertexHasPredecessors(graph, v)) // .collect(Collectors.toCollection(PriorityQueue::new)); - List tokens = new LinkedList<>(); + List tokens = new ArrayList<>(); while (!roots.isEmpty()) { PriorityQueue newRoots = new PriorityQueue<>(); do { From 7fde233538f91349d51bde3b10d1e7eedd339b32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Sun, 2 Apr 2023 23:36:59 +0200 Subject: [PATCH 051/132] Comment out token string normalization --- core/src/main/java/de/jplag/JPlag.java | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/core/src/main/java/de/jplag/JPlag.java b/core/src/main/java/de/jplag/JPlag.java index af645f35d..3480e8f86 100644 --- a/core/src/main/java/de/jplag/JPlag.java +++ b/core/src/main/java/de/jplag/JPlag.java @@ -59,11 +59,9 @@ public JPlagResult run() throws ExitException { throw new SubmissionException("Not enough valid submissions! (found " + submissionCount + " valid submissions)"); } - // better solution long-term: pull this into submissionSet constructor, option for normalization, can only be true if - // language supports it - if (language.tokensHaveSemantics()) { - submissionSet.normalizeSubmissions(); - } + // better solution long-term: pull this into SubmissionSet constructor, use language-specific option + // if (language.tokensHaveSemantics()) + // submissionSet.normalizeSubmissions(); // Compare valid submissions. JPlagResult result = comparisonStrategy.compareSubmissions(submissionSet); From 0153c06b4d6813f4ce34a0023844a26229a39776 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dominik=20Fuch=C3=9F?= Date: Wed, 5 Apr 2023 23:26:16 +0200 Subject: [PATCH 052/132] Added Data from Wiki with Workflow --- .github/workflows/docs.yml | 36 +++ .gitignore | 3 +- docs/1.-How-to-Use-JPlag.md | 207 +++++++++++++ docs/2.-Supported-Languages.md | 11 + docs/3.-Contributing-to-JPlag.md | 22 ++ docs/4.-Adding-New-Languages.md | 426 ++++++++++++++++++++++++++ docs/5.-End-to-End-Testing.md | 433 +++++++++++++++++++++++++++ docs/6.-Report-File-Generation.md | 85 ++++++ docs/7.-Clustering-of-Submissions.md | 91 ++++++ docs/Home.md | 40 +++ 10 files changed, 1353 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/docs.yml create mode 100644 docs/1.-How-to-Use-JPlag.md create mode 100644 docs/2.-Supported-Languages.md create mode 100644 docs/3.-Contributing-to-JPlag.md create mode 100644 docs/4.-Adding-New-Languages.md create mode 100644 docs/5.-End-to-End-Testing.md create mode 100644 docs/6.-Report-File-Generation.md create mode 100644 docs/7.-Clustering-of-Submissions.md create mode 100644 docs/Home.md diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 000000000..1b4a7f0f7 --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,36 @@ +name: Documentation + +on: + push: + branches: + - main + tags: "v**" + paths: + - 'docs/**' + - '.github/workflows/docs.yml' + workflow_dispatch: + +jobs: + docs: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3.5.0 + with: + token: ${{ secrets.SDQ_DEV_DEPLOY_TOKEN }} + - uses: actions/checkout@v3.5.0 + with: + repository: ${{ github.repository }}.wiki + path: wiki + token: ${{ secrets.SDQ_DEV_DEPLOY_TOKEN }} + + - name: Remove contents in Wiki + working-directory: wiki + run: ls -A1 | grep -v '.git' | xargs rm -r + + - name: Copy Wiki from Docs folder + run: cp -r ./docs/. ./wiki + + - name: Deploy 🚀 + uses: stefanzweifel/git-auto-commit-action@v4 + with: + repository: wiki diff --git a/.gitignore b/.gitignore index 9db0f9ba9..582412849 100644 --- a/.gitignore +++ b/.gitignore @@ -5,7 +5,8 @@ target/ *.class - +# GitHub +wiki # Mobile Tools for Java (J2ME) .mtj.tmp/ diff --git a/docs/1.-How-to-Use-JPlag.md b/docs/1.-How-to-Use-JPlag.md new file mode 100644 index 000000000..9cb84e86b --- /dev/null +++ b/docs/1.-How-to-Use-JPlag.md @@ -0,0 +1,207 @@ +JPlag can be used via the Command Line Interface (CLI) or programmatically via the Java API. + +## Using JPlag via the CLI +JPlag can be used via the Command Line Interface by executing the JAR file. + +Example: `java -jar jplag.jar path/to/the/submissions` + +The following arguments can be used to control JPlag: +``` +positional arguments: + rootDir Root-directory with submissions to check for plagiarism + +named arguments: + -h, --help show this help message and exit + -new NEW [NEW ...] Root-directory with submissions to check for plagiarism (same as the root directory) + -old OLD [OLD ...] Root-directory with prior submissions to compare against + -l {cpp,csharp,emf,go,java,kotlin,python3,rlang,scala,scheme,swift,text} + Select the language to parse the submissions (default: java) + -bc BC Path of the directory containing the base code (common framework used in all + submissions) + -t T Tunes the comparison sensitivity by adjusting the minimum token required to be counted + as a matching section. A smaller increases the sensitivity but might lead to more + false-positives + -n N The maximum number of comparisons that will be shown in the generated report, if set + to -1 all comparisons will be shown (default: 100) + -r R Name of the directory in which the comparison results will be stored (default: result) + +Advanced: + -d Debug parser. Non-parsable files will be stored (default: false) + -s S Look in directories /*/ for programs + -p P comma-separated list of all filename suffixes that are included + -x X All files named in this file will be ignored in the comparison (line-separated list) + -m M Comparison similarity threshold [0.0-1.0]: All comparisons above this threshold will + be saved (default: 0.0) + +Clustering: + --cluster-skip Skips the clustering (default: false) + --cluster-alg {AGGLOMERATIVE,SPECTRAL} + Which clustering algorithm to use. Agglomerative merges similar submissions bottom up. + Spectral clustering is combined with Bayesian Optimization to execute the k-Means + clustering algorithm multiple times, hopefully finding a "good" clustering + automatically. (default: spectral) + --cluster-metric {AVG,MIN,MAX,INTERSECTION} + The metric used for clustering. AVG is intersection over union, MAX can expose some + attempts of obfuscation. (default: MAX) +``` + +*Note that the [legacy CLI](https://github.com/jplag/jplag/blob/legacy/README.md) is varying slightly.* + +## Using JPlag programmatically +The new API makes it easy to integrate JPlag's plagiarism detection into external Java projects. + +**Example:** + +```java +Language language = new de.jplag.java.Language(); +Set submissionDirectories = Set.of(new File("/path/to/rootDir")); +File baseCode = new File("/path/to/baseCode"); +JPlagOptions options = new JPlagOptions(language, submissionDirectories, Set.of()).withBaseCodeSubmissionDirectory(baseCode); + +JPlag jplag = new JPlag(options); +try { + JPlagResult result = jplag.run(); + + // Optional + ReportObjectFactory reportObjectFactory = new ReportObjectFactory(); + reportObjectFactory.createAndSaveReport(result, "/path/to/output"); +} catch (ExitException e) { + // error handling here +} +``` + +## Report File Generation + +After a JPlag run a zipped result report is automatically created. +The target location of the report can be specified with the `-r` flag. + +If the `-r` is not specified, the location defaults `result.zip`. Specifying the `-r` flag with a path `/path/to/desiredFolder` results in the report being created as `/path/to/desiredFolder.zip`. + +Unless there is an error during the zipping process, the report will always be zipped. If the zipping process fails, the report will be available as unzipped under the specified location. + +## Viewing Reports + +The newest version of the report viewer is always accessible at https://jplag.github.io/JPlag/. Simply drop your `result.zip` folder on the page to start inspecting the results of your JPlag run. Your submissions will neither be uploaded to a server nor stored permanently. They are saved in the application as long as you view them. Once you refresh the page, all information will be erased. + + +## Basic Concepts + +This section explains some fundamental concepts about JPlag that make it easier to understand and use. + +* **Root directory:** This is the directory in which JPlag will scan for submissions. +* **Submissions:** Submissions contain the source code that JPlag will parse and compare. They have to be direct children of the root directory and can either be single files or directories. + +### Single-file submissions + +``` +/path/to/root-directory +├── Submission-1.java +├── ... +└── Submission-n.java +``` + +### Directory submissions + +JPlag will read submission directories recursively, so they can contain multiple (nested) source code files. + +``` +/path/to/root-directory +├── Submission-1 +│ ├── Main.java +│ └── util +│ └── Utils.java +├── ... +└── Submission-n + ├── Main.java + └── util + └── Utils.java +``` + +If you want JPlag to scan only one specific subdirectory of the submissions for source code files (e.g. `src`), can configure that with the argument `-S`: + +``` +/path/to/root-directory +├── Submission-1 +│ ├── src +│ │ ├── Main.java # Included +│ │ └── util +│ │ └── Utils.java # Included +│ ├── lib +│ │ └── Library.java # Ignored +│ └── Other.java # Ignored +└── ... +``` + + +### Base Code + +The base code is a special kind of submission. It is the template that all other submissions are based on. JPlag will ignore all matches between two submissions, where the matches are also part of the base code. Like any other submission, the base code has to be a single file or directory in the root directory. + +``` +/path/to/root-directory +├── BaseCode +│ └── Solution.java +├── Submission-1 +│ └── Solution.java +├── ... +└── Submission-n + └── Solution.java +``` + +In this example, students have to solve a given problem by implementing the `run` method in the template below. Because they are not supposed to modify the `main` function, it will be identical for each student. + +```java +// BaseCode/Solution.java +public class Solution { + + // DO NOT MODIFY + public static void main(String[] args) { + Solution solution = new Solution(); + solution.run(); + } + + public void run() { + // TODO: Implement your solution here. + } +} +``` + +To prevent JPlag from detecting similarities in the `main` function (and other parts of the template), we can instruct JPlag to ignore matches with the given base code by providing the `--bc=` option. +The `` in the example above is `BaseCode`. + +### Multiple Root Directories +* You can run JPlag with multiple root directories, JPlag compares submissions from all of them +* JPlag distinguishes between old and new root directories +** Submissions in new root directories are checked amongst themselves and against submissions from other root directories +** Submissions in old root directories are only checked against submissions from other new root directories +* You need at least one new root directory to run JPlag + +This allows you to check submissions against those of previous years: +``` +/path/to/root-new +└── ... +/path/to/root-old1 +└── ... +/path/to/root-old2 +└── ... +``` + +### Details + +The following diagram shows all the relations between root directories, submissions, and files: + +```mermaid +classDiagram + direction LR + + Input -->"1..*" RootDirectory : consists of + RootDirectory + RootDirectory <|-- NewDirectory: is a + RootDirectory <|-- OldDirectory : is a + + + RootDirectory --> "1..*" Submission : contains + Directory --> "1..*" File : contains + Submission <|-- File : is a + Submission <|-- Directory : is a +``` diff --git a/docs/2.-Supported-Languages.md b/docs/2.-Supported-Languages.md new file mode 100644 index 000000000..524b2a48c --- /dev/null +++ b/docs/2.-Supported-Languages.md @@ -0,0 +1,11 @@ +JPlag currently supports Java, C/C++, C#, Go, Kotlin, Python, R, Rust, Scala, Swift, and Scheme. Additionally, it has primitive support for text and prototypical support for EMF metamodels. A detailed list, including the supported language versions can be found in the [project readme](https://github.com/jplag/JPlag/blob/main/README.md#supported-languages). + +The language modules differ in their maturity due to their age and different usage frequencies. +Thus, each frontend has a state label: +- `mature`: This module is tried and tested, as well as up to date with a current language version. +- `beta`: This module is relatively new and up to date. However, it is not as well tested. **Feedback welcome!** +- `alpha`: This module is very new and not yet finished. Use with caution! +- `legacy`: This module is old and may only support outdated language versions. It needs an update. +- `unknown`: It is very much unclear in which state this module is. + +All language modules can be found [here](https://github.com/jplag/JPlag/tree/master/languages). \ No newline at end of file diff --git a/docs/3.-Contributing-to-JPlag.md b/docs/3.-Contributing-to-JPlag.md new file mode 100644 index 000000000..dc967f659 --- /dev/null +++ b/docs/3.-Contributing-to-JPlag.md @@ -0,0 +1,22 @@ +We're happy to incorporate all improvements to JPlag into this codebase. Feel free to fork the project and send pull requests. +If you are new to JPlag, maybe check the [good first issues](https://github.com/jplag/jplag/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22). + +Please try to make well-documented and clear structured submissions: +* All artifacts (code, comments...) should be in English +* Please avoid abbreviations! +* Make use of JavaDoc to document classes and public methods +* We provide a [formatter configuration](https://github.com/jplag/JPlag/blob/master/formatter.xml), which is enforced by spotless + * Eclipse/IntelliJ users can use it directly + * It can always be applied via maven with `mvn spotless:apply` +* Use well-explained pull requests to propose your features +* When re-using code from other projects mark them accordingly and make sure their license allows the re-use +* Your changes should always improve the code quality of the codebase, especially when working on older components +* Your git messages should be concise but more importantly descriptive +* Ensure your git history is clean, or else your PR may get squashed while merging + +## Building from sources +1. Download or clone the code from this repository. +2. Run `mvn clean package` from the root of the repository to compile and build all submodules. + Run `mvn clean package assembly:single` instead if you need the full jar which includes all dependencies. +5. You will find the generated JARs in the subdirectory `jplag.cli/target`. + diff --git a/docs/4.-Adding-New-Languages.md b/docs/4.-Adding-New-Languages.md new file mode 100644 index 000000000..a38742490 --- /dev/null +++ b/docs/4.-Adding-New-Languages.md @@ -0,0 +1,426 @@ +# JPlag Frontend Design + +To add support for a new language to JPlag, a JPlag frontend needs to be created for that specific language. The core purpose of a frontend is to transform each submission to a list of _Tokens_, an abstraction of the content of the submission files independent of the language of the submissions.
      +The token lists of the different submissions are then passed on to a comparison algorithm that checks the token lists for matching sequences. + +## How are submissions represented? — Notion of _Token_ + +In the context of JPlag, a Token does not represent a lexical unit, as identifiers, keywords or operators. Instead, Tokens represent syntactic entities, like statements, or control structures. More than one token might be needed to represent the nested structure of a statement or expression in a linear token list. + +```java + class MyClass extends SuperClass { private String name; } + //CLASS_DECLARATION //CLASS_BODY_BEGIN + //FIELD_DECLARATION + //CLASS_BODY_END +``` +Each comment is intended to represent one token. + +From this example in Java, you may be able to see the following things: + - a class declaration is represented by three tokens of different _types_: `CLASS_DECLARATION`, `CLASS_BODY_BEGIN` and `CLASS_BODY_END` + - a token is associated with a _position_ in a code file. + - the abstraction is incomplete, many details of the code are omitted. The original code cannot be reconstructed from the token list, but its structure can. + +A few more points about Tokens in JPlag: + - a token list contains the Tokens from _all files of one submission_. For that reason, Tokens save the _filename_ of their origin in addition to their position. + - Token types are represented by the `TokenType` interface which has to be adapted for each language individually. + - For brevity, each token type is also associated with a String description, usually shorter than their name. Looking at the String representations used in existing frontends, you may recognize a kind of convention about how they are formed. The example above uses the full names of token types. + +## How does the transformation work? + +Here is an outline of the transformation process. + - each submitted file is _parsed_. The result is a set of ASTs for each submission. + - each AST is now _traversed_ depth-first. The nodes of the AST represent the grammatical units of the language. + - upon entering and exiting a node, Tokens can be created that match the type of the node. They are added to the current token list. + - for block-type nodes like bodies of classes or if expressions, the point of entry and exit correspond to the respective `BEGIN` and `END` token types. If done correctly, the token list should contain balanced pairs of matching `BEGIN` and `END` tokens. + +```java +@Override +public void enterClassDeclaration(ClassDeclarationContext context) { + Token token = new Token(CLASS_DECLARATION, /* more parameters ... */); + addToken(token); +} + +@Override +public void exitClassDeclaration(ClassDeclarationContext context) { + // class declarations get no end token -> do nothing +} + +@Override +public void enterClassBody(ClassBodyContext context) { + Token token = new Token(CLASS_BODY_START, /* more parameters ... */); + addToken(token); +} + +@Override +public void enterClassDeclaration(ClassBodyContext context) { + Token token = new Token(CLASS_BODY_END, /* more parameters ... */); + addToken(token); +} +``` +The way the traversal works and how you can interact with the process depends on the parser technology used. In the example above, **ANTLR-generated parsers** were used, as was in most of the current JPlag frontends. We recommend to use ANTLR for any new frontend. + +If a hard-coded (as opposed to dynamically generated) parser library is available for your language, it may make sense to use it. An implementation of the visitor pattern for the resulting AST should be included. + +# Frontend Structure + +A frontend consists of these parts: + +| Component/Class | Superclass | Function | How to get there | +|-----------------------------------------|---------------------------|--------------------------------------------------|-------------------------------------------------------------| +| Language class | `de.jplag.Language` | access point for the frontend | copy with small adjustments | +| `pom.xml` | - | Maven submodule descriptor | copy with small adjustments;
      add dependencies for parser | +| `README.md` | - | documentation for the frontend | copy for consistent structure; adjust from there | +| TokenType class | `de.jplag.TokenType` | contains the language-specific token types | **implement new** | +| | | | +| Lexer and Parser | - | transform code into AST | depends on technology | +| ParserAdapter class | `de.jplag.AbstractParser` | sets up Parser and calls Traverser | depends on technology | +| Traverser/
      TraverserListener classes | - | creates tokens traversing the AST | depends on technology | + +For example, if ANTLR is used, the setup is as follows: + +| Antlr specific parts/files | Superclass | Function | How to get there | +|----------------------------|--------------------------------|------------------------------------|----------------------------------------------------------------------------------------------| +| Grammar files (`.g4`) | - | capture syntax of the language | most likely available in [ANTLRv4 Grammar Repository](https://github.com/antlr/grammars-v4/) | +| Lexer and Parser | `Lexer`, `Parser` (ANTLR) | transform code into AST | generated from grammar files by antlr4-maven-plugin | +| Traverser | `ParseTreeWalker` (ANTLR) | traverses AST and calls listener | included in antlr4-runtime library, can be used as is | +| TraverserListener class | `ParseTreeListener` (ANTLR) | creates tokens when called | **implement new** | +| ParserAdapter class | `de.jplag.AbstractParser` | sets up Parser and calls Traverser | copy with small adjustments | + +As the table shows, much of a frontend can be reused, especially when using ANTLR. The only parts left to implement specifically for each frontend are + - the ParserAdapter (for custom parsers) + - the TokenTypes, and + - the TraverserListener. + +**Note** for parser libraries other than ANTLR: + - It should still be rather easy to implement the ParserAdapter from the library documentation. + - Instead of using a listener pattern, the library may require you to do the token extraction in a _Visitor subclass_. In that case, there is only one method call per element, called e.g. `traverseClassDeclaration`. The advantage of this version is that the traversal of the subtrees can be controlled freely. See the Scala frontend for an example. + +### Basic procedure outline + +```mermaid +flowchart LR + JPlag -->|"parse(files)"| Language + subgraph frontend[LanguageFrontend] + Language -->|"parse(files)"| ParserAdapter + ParserAdapter -->|"parse(files)"| Parser -.->|ASTs| ParserAdapter + ParserAdapter -->|"walk(ASTs)"| Traverser + Traverser -->|"enterXYNode()"| TraverserListener + Traverser -->|"exitXYNode()"| TraverserListener + TraverserListener -.-> action{{"tokenList.add(new Token())"}} + end +``` + +Note: In existing frontends, the token list is managed by the ParserAdapter, and from there it is returned to the +Language class and then to JPlag. + +### Integration into JPlag + +The following adjustments have to be made beyond creating the frontend submodule itself: + +- Register the submodule in the aggregator POM for every build profile. +```xml + + + ... + jplag.frontend.my-frontend + ... + +``` +- Add a dependency from the aggregator module to the new frontend +- Add a dependency from the jplag module to the new frontend +```xml + + + + ... + + de.jplag + jplag.frontend.my-frontend + ${revision} + + ... + +``` + +That's it! The new frontend should now be usable as described in the main README. The name of the frontend used with the CLI `-l` option is the `IDENTIFIER` set in the Language class. + +# Token Selection + +Apart from extracting the tokens correctly, the task of deciding which syntactical elements should be assigned a token is the essential part when designing a frontend.
      +This guideline is solely based on experience and intuition – this "worked well" so far. More research might hint towards a more systematic process of token selection. + +The goal of the abstraction is to create a token list that is + - _accurate_: a fair representation of the code as input to the comparison algorithm + - _consistent per se_: insensitive to small changes in the code that might obfuscate plagiarism; constructs are represented equally throughout the file + - _consistent_ with the output of other trusted frontends—only to the extent that their respective languages are comparable, naturally. + +To create a set of tokens in line with these objectives, we offer the tips below. + +### Quick Word on Notation + +Elements with `BIG_AND_FAT` text represent tokens, while elements in lowercase surrounded by `` represent subexpressions that may produce any number of tokens themselves.
      +? marks optional parts which may occur zero or one times, * marks elements that may occur any number of times. +
      + +1) Use a separate token for both ends of every type of _block_ or _body_. + + +2) More generally, for any type of composite expression or statement, the number of designated token types needed to separate them in the token list is the number of subexpressions + 1. +Additional tokens may make be needed in certain locations, like optional parts. + +| Expression type | #expressions | #tokens | Example code and tokenization pattern | +|--------------------|--------------|---------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| _loop_ (Rust) | 1 | 2 | `loop { println!("{}", 1) }`

      `LOOP{` `` `}LOOP` | +| _if_ (C) | 2 | 3 | `if (true) { printf("1"); } `

      `IF` `` `IF{` `` `}IF` | +| _do-while_ (C) | 2 | 3 | `do { printf("1") } while (true);`

      `DO{` `` `}DO-WHILE(` `` `)WHILE`
      alt.: `DO{` `` `}DO` `WHILE(` `` `)WHILE` | +| Ternary – _?:_ (C) | 3 | 4 | `true ? 1 : 0`

      `COND(` `` `IF_TRUE:` `` `IF_FALSE:` `` `)COND` | + +In the do-while example above, the `}DO-WHILE(` marks the end of the loop block and the beginning of the condition expression at once. For the sake of having a designated token for the ending of the loop block, a possible alternative tokenization with an extra token is given. + +3) For _list_ subtrees, a single token to mark the beginning of each element may suffice.
      +Note: If lists of the same type are nested, the end of the inner list may become unclear. Additional tokens for both ends of the list may be appropriate in that case. + + +4) For _leaf_ subtrees (that do not subdivide further), a single token may suffice. + + +5) For _optional_ subtrees, a single token may suffice to indicate that it occurred. + +| Optional expression type | #expressions | #tokens | Example code and tokenization pattern | +|--------------------------------------------|--------------|---------|------------------------------------------------------------------------------------------------------------------------------------------| +| Class declaration: generic type parameters | _n_ + 1 | _n_ + 2 | `class Map { ... }`

      `CLASS` (`TYPE_PARAM`)* `CLASS{` `` `}CLASS` | +| Method invocation: arguments | _n_ | _n_ + 1 | `printf("%d: %s", 1, "one");`

      `APPLY` (`ARG` ``)* | +| _if_ statement: _else_ block | 2 (+ 1) | 3 (+ 2) | `if (true) { printf("1"); } else { printf("0"); }`

      `IF` `` `IF{` `` `}IF` (`ELSE{` `` `}ELSE`)? | + + +6) Keywords that influence the control flow receive a token, for example + - `return`, `break`, `continue` + + +7) Semantic information, references and concrete values are generally omitted, for example + - identifiers + - type information + - `final` modifier + - access modifiers + - instructions to the compiler/VM: `transient`, `volatile` + - references to classes, objects, fields, array accesses + - numbers and other literals, as well as built-in operations + + +8) Statements with side effects generally receive a token, for example + - constructor calls + - declarations of variables and fields + - assignments + - method calls + + +9) Regarding sensitivity: Very similar constructs may receive the same token even if they are syntactically distinct, for example + - variable and constant declarations + - method and function declarations + - different variations of `for`-loops + - (See the documentation about token extraction for more examples) + +# Token Extraction + +The token extraction is the most time-consuming part of the frontend design. +How difficult it is is largely dependent on the underlying **grammar** of the parser. + +This article deals with the implementation of the listener which is called at every stage of traversal of the AST. The examples center around tokenizing the Java language, using a grammar written in ANTLR4. + +```mermaid +flowchart LR + AstVisitor -->|"enterNode(Node)"| Listener + AstVisitor -->|"exitNode(Node)"| Listener + + Listener --> |"add(Token)"| List["List<Token>"] +``` +In the actual listener, there will be concrete `enter` and `exit` implementations for each syntactic category, e.g. `enterClassDeclaration`, `enterIfStatement` and so on. + +## Basic case +The basic case is that a syntactic category of the grammar corresponds to a token directly. +```java +@Override +public void enterIfStatement(IfStatementContext context) { + addToken(IF_STATEMENT, context); +} + +@Override +public void enterIfBody(IfBodyContext context) { + addToken(IF_BODY_START, context); +} + +private void addToken(TokenType tokenType, ParserRuleContext context) { + tokens.add(new Token(tokenType, context.getFile(), context.getLine(), context.getColumn(), context.getLength())); +} +``` +## Complex case: Ambiguity +The complexity comes from the degree to which syntactic categories are _reused_ in different contexts in the grammar. Instead of a distinct `ifBody` category, for example, there may be only one category for 'control structure bodies' or 'block expressions' of any kind. + +```java +@Override +public void enterBlockExpression(BlockExpressionContext context) { + // Now, is this the body of a method, a for/while/if/try/catch expression? +} +``` + +```antlrv4 + // unedited grammar + ifStatement: + 'if' '(' expression ')' statement + ('else' statement))? ; + + statement: + blockStatement | ifStatement | forStatement | ... ; +``` + +### Approach 1: Edit the grammar +If you can alter the grammar definition, it may be well worth considering. Introduce a separate rule for each use of rules that you want to differentiate. Example in antlr4: + +```antlrv4 + // after editing + ifStatement: + 'if' '(' expression ')' ifBody ('else' elseBody)? ; + + ifBody: + statement ; + elseBody: + statement ; + + statement: + blockStatement | ifStatement | forStatement | ... ; +``` +This does introduce some kind of redundancy to the grammar, but it makes the implementation of the listener methods much easier. The caveat that comes with this approach is that updated grammars will have to be edited again. The licence of the grammar should also be considered. + +### Approach 2: Manage a context stack +If you are stuck with the grammar and parser as they are, you can mimic what the parser does and introduce a _stack machine_ to the listener. Then, anytime a relevant syntactical structure is entered, you add a context to the stack, and when you enter an ambiguous subtree, the current context will help distinguish the different cases. + +```java +@Override +public void enterIfStatement(IfStatementContext context) { + addToken(IF_STATEMENT, context); + contexts.enterContext(IF_CONTEXT); +} + +@Override +public void enterBlockExpression(BlockExpressionContext context) { + switch (contexts.peek()) { + case IF_BODY -> addToken(IF_BODY_START, context); + case FOR_BODY -> addToken(FOR_BODY_START, context); + ... + } +} + +@Override +public void exitIfStatement(IfStatementContext context) { + contexts.popContext(); +} +``` + +The management of the context makes the listener much more complicated to read and maintain. + + + +Notes: +- If there are optional parts in the grammar rule, the index of terminals may not be static. A more sophisticated search method may be necessary, possibly using the text content of the child nodes (`ParserRuleContext::getText`). +- In this example, the curly braces themselves are optional. The case where they are omitted needs to be covered as well. + +## Additional notes + +### Using the `exit` methods + +The `exit` methods can be used to add `END` tokens for bodies and blocks. If you put the ´enter` and ´exit´ methods of a kind directly next to each other in the code as a pair, there should be little room for confusion about which token types should be added there. + +### Using terminals + +Depending on the implementation of the grammar, some keywords or symbols may not have a rule for themselves. Using Antlr, you can always catch their occurrences in the `visitTerminal(TerminalNode)` method. + +```java +@Override +public void visitTerminal(TerminalNode node) { + switch (node.getText()) { + case "catch" -> addToken(CATCH, node.getToken()); + //... + } +} +``` + +### Preventing simple attacks + +The token extraction process can support the prevention of simple refactoring attacks by treating equivalent constructs the same. For example, a language may support multi-definitions: + +```java +var a, b, c = 1; +``` +This statement could translate to the token list `VAR_DEF VAR_DEF VAR_DEF ASSIGN`. An easy refactoring would produce a different token list: +```java +var a = 1; +var b = 1; +var c = 1; +``` +Now, this looks more like `VAR_DEF ASSIGN VAR_DEF ASSIGN VAR_DEF ASSIGN`. It might be a good idea to duplicate the `ASSIGN` token in the first case as well, so that this simple attack is overcome easily. (The resulting token list may be unsorted, but that is acceptable.) + +This almost goes in the direction of partly compiling the code in your mind. Another example would be the treatment of control structures, which can be refactored into each other fairly easily as well: +```java +for (; ; ) { + ; +} + +// is the same as: + +while () { + ; + ; +} +``` +```java + = ? : ; + +// is the same as +if () { + = ; +} else { + = ; +} +``` + +The degree to which the Token extraction process should try to preemptively generalize over similar grammatical elements is unclear and heavily dependent on the language. + +# Frontend Test + +To check the output of your frontend against the input, the `TokenPrinter` can be helpful. The `TokenPrinter` prints the input line by line, and the tokens of each line below it. + +```java +10 public class Example { + |CLASS |CLASS{ + +11 private int number; + |FIELD + +12 public int getNumber() { + |METHOD |METHOD{ + +13 return number; + |RETURN + +14 } + |}METHOD + +15 } + |}CLASS +``` +To test a frontend, set up a JUnit test class where the `TokenPrinter` prints the output of the `parse` method of the frontend. Read through the output and check whether the `List` satisfies the given requirements. + +### Test files + +The frontend should be tested with 'authentic' sample code as well as a 'complete' test file that covers all syntactic elements that the frontend should take into account. If you are using an ANTLR parser, such a complete test file may be included in the parser test files in the ANTLRv4 Grammar Repository. + +### Sanity check suggestions + +- The token list represents the input code correctly. + - In particular, the nesting tokens are correctly nested and balanced. + +- The token list represents the input code with an acceptable coverage + —how that can be measured and what coverage is acceptable depends on the language. One approach would be line coverage, e.g. 90 percent of code lines should contain a token. + +- There are no `TokenTypes` that can never be produced by the frontend for any input. + - Put another way, the complete test code produces a token list that contains every type of token. diff --git a/docs/5.-End-to-End-Testing.md b/docs/5.-End-to-End-Testing.md new file mode 100644 index 000000000..98a4d3857 --- /dev/null +++ b/docs/5.-End-to-End-Testing.md @@ -0,0 +1,433 @@ +## Basics +The basic structure of the end-to-end testing module is discussed in the [corresponding readme file](https://github.com/jplag/JPlag/blob/master/endtoend-testing/README.md). + +## Rationale behind the test data +To be able to create the test data, some examples from science, which have addressed the concealment of plagiarism, were used to ensure the greatest possible coverage of the JPlag functionality. +Here, the changes were split as finely as possible and applied to the levies. +The following elaborations were used for this: +- [Detecting source code plagiarism on introductory programming course assignments using a bytecode approach](https://ieeexplore.ieee.org/abstract/document/7910274) +- [Detecting Disguised Plagiarism](https://arxiv.org/abs/1711.02149) + +These elaborations provide basic ideas on how a modification of the plagiarized source code can look like or be adapted. +These code adaptations refer to a wide range of changes starting from +adding/removing comments to architectural changes in the deliverables. + +- (1) Inserting comments or empty lines +- (2) Changing variable names or function names +- (3) Insertion of unnecessary or changed code lines +- (4) Changing the program flow (statements and functions must be independent from each other) + - (1) Variable declaration at the beginning of the program + - (2) Combining declarations of variables + - (3) Reuse of the same variable for other functions +- (5) Changing control structures + - (1) for(...) to while(...) + - (2) if(...) to switch-case +- (6) Modification of expressions + - (1) (X < Y) to !(X >= Y) and ++x to x = x + 1 +- (7) Splitting and merging statements + - (1) x = getSomeValue(); y = x- z; to y = (getSomeValue() - Z; +- (8) Inserting unnecessary casts + +These changes were now transferred to a base class and thus the plagiarism was created. The named base class was provided with the individual changes. The numbers in the list shown above are intended for the traceability of the test data. Here the test data filenames were named with the respective changes. Example: SortAlgo4d1 contains the changes "Variable declaration at the beginning of the program". If several points are combined, this is separated by "_" e.g.: SortAlgo1_3 contains "(1) Inserting comments or empty lines" and "(3) Insertion of unnecessary or changed code lines". + +The following code examples show how these changes affect the program code and also how the detection of JPLag behaves. +All the code examples shown and more can be found at [testdata-resources-SortAlgo](https://github.com/jplag/JPlag/tree/main/endtoend-testing/src/test/resources/languageTestFiles/java/sortAlgo). + +### (1) Inserting comments or empty lines + +Adding empty lines or comments affects the normalization of the output. If the End-To-End tests fail with these changes, it means that something has changed in the normalization, e.g. removing empty lines or recognizing comments no longer works. + +In the following, the modified base class looks like this: + +Original: +``` java + + public void BubbleSortWithoutRecursion(Integer arr[]) { + for(int i = arr.length; i > 1 ; i--) { +``` + +Plagiarized: +``` java +/* + + Unnecessary comment + */ + public void BubbleSortWithoutRecursion(Integer arr[]) { + //Unnecessary comment + for(int i = arr.length; i > 1 ; i--) { +``` + +As expected, the resulting outputs have a match of 100% (JPLag result): + +``` json +"SortAlgo-SortAlgo1" : { + "minimal_similarity" : 100.0, + "maximum_similarity" : 100.0, + "matched_token_number" : 56 + }, +``` + +### (2) Changing variable names or function names + +Changing variable names and function names has, like point 1, also the goal of detecting adjustments in the normalization level. +If the End-To-End tests fail with these changes, it means that something has changed in the normalization, e.g. creating constants function and variable names. + +Orginal: + +``` java + private final void swap(T[] arr, int i, int j) { + T t = arr[i]; + arr[i] = arr[j]; + arr[j] = t; + } +``` + +Plagiarized: + +``` java + private final void paws(T[] otherArr, int i, int j) { + T t = otherArr[i]; + otherArr[i] = otherArr[j]; + otherArr[j] = t; + } +``` + +As expected, the resulting outputs have a match of 100% (JPLag result): + +``` json +"SortAlgo-SortAlgo2" : { + "minimal_similarity" : 100.0, + "maximum_similarity" : 100.0, + "matched_token_number" : 56 + }, +``` + +### (3) Insertion of unnecessary or changed code lines + +In contrast to points 1 and 2, adding unnecessary code lines reduces the token generation. This has the consequence that the recognition can no longer be 100% sure whether plagiarism is present or not. The failure of the end-to-end tests in these cases means that either the tokens have been adjusted, the normalization has changed the function separation or something has changed in the minimum token numbers. This can be easily seen by running the end-to-end tests in different options. this will be shown in the next result examples. + +Original: +``` java + private final void swap(T[] arr, int i, int j) { + T t = arr[i]; + arr[i] = arr[j]; + arr[j] = t; + } +``` +Plagiarized: +``` java +private final void swap(T[] arr, int i, int j) { + var tempVar1 = 0; + if (true) { + T t = arr[i]; + arr[i] = arr[j]; + arr[j] = t; + var tempVar2 = 0; + tempVar2++; + tempVar2 = tempVar2 + 1; + } + } +``` + +The results for the recognition already allow first recognition changes. Here the change of the `minimum_token_match` also has an effect on the result, which was not the case with (1) and (2). + +``` json +[{"options" : { + "minimum_token_match" : 1 + }, +"SortAlgo-SortAlgo3" : { + "minimal_similarity" : 81.159424, + "maximum_similarity" : 100.0, + "matched_token_number" : 56 + }, +}] +``` + +``` json +[{"options" : { + "minimum_token_match" : 15 + }, +"SortAlgo-SortAlgo3" : { + "minimal_similarity" : 57.971016, + "maximum_similarity" : 71.42857, + "matched_token_number" : 40 + }, +}] +``` + +### (4) Changing the program flow (statements and functions must be independent from each other) + +This subitem breaks down into three more change methods to maintain fine granularity: +- (1) Variable declaration at the beginning of the program +```java +public class SortAlgo4d1 { + private int firstCounter; + private int arrayLenght; + private int swapVarI; + private int swapVarJ; + +``` + +- (2) Combining declarations of variables +``` java +public class SortAlgo4d2 { + private int firstCounter,swapVarJ,arrayLenght ,swapVarI; +``` + +- (3) Reuse of the same variable for other functions +``` java +public class SortAlgo4d3 { + private int firstCounterAndArrayLenghtAndswapVarJ ,swapVarI; +``` + +The adjustments to the program flow with the previous instantiation of the variables were also made: +Original: +``` java + if (n == 1) + { + return; + } +``` + +Plagiarized: +``` java + firstCounter = n; + if (firstCounter == 1) { + return; + } +``` + +The results of the individual adjustment are as follows: +```json + "SortAlgo-SortAlgo4d1" : { + "minimal_similarity" : 87.30159, + "maximum_similarity" : 98.21429, + "matched_token_number" : 55 + }, + "SortAlgo-SortAlgo4d2" : { + "minimal_similarity" : 87.5, + "maximum_similarity" : 100.0, + "matched_token_number" : 56 + }, + "SortAlgo-SortAlgo4d3" : { + "minimal_similarity" : 90.32258, + "maximum_similarity" : 100.0, + "matched_token_number" : 56 + }, +``` + +### (5) Changing control structures + +The change of the control structure in the program also indicates a change of the token generation in case of faulty tests. In contrast to (4), however, these are specially designed for other tokens that are made for if, else, ... structures. + +These changes were made to the SortAlgo test data in a plagiarized form. + +Original: +``` java + public void BubbleSortRecursion(Integer arr[], int n) { + if (n == 1) + { + return; + } + + for (int i = 0; i < n - 1; i++) + { + if (arr[i] > arr[i + 1]) + { + swap(arr, i , i+1); + } + } + BubbleSortRecursion(arr, n - 1); + } +``` + +Plagiarized: +``` java + public void BubbleSortRecursion(Integer arr[], int n) { + switch (n) { + case 1: + return; + } + + int i = 0; + while(i < n-1) + { + var tempBool = arr[i] > arr[i + 1]; + if (tempBool) { + swap(arr, i, i + 1); + } + i++; + } + + BubbleSortRecursion(arr, n - 1); + } +``` + +Here it is remarkable which affects the adjustment of the `minimum_token_match` has on the recognition of the plagiarism. +Changes of the token generation as well as the `minimum_token_match` have an effect on this kind of End-To-End test. + +``` json + "options" : { + "minimum_token_match" : 1 + }, + "tests" : { + "SortAlgo-SortAlgo5" : { + "minimal_similarity" : 82.14286, + "maximum_similarity" : 82.14286, + "matched_token_number" : 46 + }, +``` + +``` json + "options" : { + "minimum_token_match" : 15 + }, + "tests" : { + "SortAlgo-SortAlgo5" : { + "minimal_similarity" : 0.0, + "maximum_similarity" : 0.0, + "matched_token_number" : 0 + }, +``` + +### (6) Modification of expressions +Changing the order of compare also changes the order of the program flow which is difficult to determine the exact effect of plagiarism. Here the statements (X < Y) to !(X >= Y) and ++x to x = x + 1 are changed. Since the syntax should be recognized however as expression, the pure change of the expression has little effect on their plagiarism recognition. + +Orginal: +``` java + public void BubbleSortRecursion(Integer arr[], int n) { + if (n == 1) + { + return; + } + + for (int i = 0; i < n - 1; i++) + { + if (arr[i] > arr[i + 1]) + { + swap(arr, i , i+1); + } + } + BubbleSortRecursion(arr, n - 1); + } +``` + +Plagiarized: +``` java +public void BubbleSortRecursion(Integer arr[], int n) { + if (n != 1) + { + for (int i = 0; !(i >= (n - 1));) + { + if (!(arr[i] <= arr[i + 1])) + { + swap(arr, i , i+1); + } + i = i + 1; + } + BubbleSortRecursion(arr, n - 1); + } + else + { + return; + } + } +``` + +Results: +``` json + { + "options" : { + "minimum_token_match" : 1 + }, + "SortAlgo-SortAlgo6" : { + "minimal_similarity" : 83.58209, + "maximum_similarity" : 100.0, + "matched_token_number" : 56 + }, +``` + +``` json + "options" : { + "minimum_token_match" : 15 + }, + "SortAlgo-SortAlgo6" : { + "minimal_similarity" : 43.28358, + "maximum_similarity" : 51.785713, + "matched_token_number" : 29 + }, +``` + +### (7) Splitting and merging statements +The merging or splitting of statements results in changing the token for the respective plagiarism detection. +Here code lines are either fetched from functions or stored in functions like `x = getSomeValue(); y = x- z;` to `y = (getSomeValue() - Z`. + +Original: +``` java +[...] + swap(arr, i , i+1); +[...] + if (arr[innerCounter] > arr[innerCounter + 1]) { +[...] +``` + +Plagiarized: +``` java +[...] + swap(arr, i, add(i , 1)); +[...] + if (arr[innerCounter] > arr[add(innerCounter , 1)]) { +[...] + private int add(int value1, int value2) + { + return value1 + value2; + } + + private int subtract(int value1, int value2) + { + return value1 - value2; + } +``` + +Results: +``` json + "options" : { + "minimum_token_match" : 1 + }, + "tests" : { + "SortAlgo-SortAlgo7" : { + "minimal_similarity" : 76.712326, + "maximum_similarity" : 100.0, + "matched_token_number" : 56 + }, +``` + +``` json + "options" : { + "minimum_token_match" : 15 + }, + "tests" : { + "SortAlgo-SortAlgo7" : { + "minimal_similarity" : 49.315067, + "maximum_similarity" : 64.28571, + "matched_token_number" : 36 + }, +``` + +## Summary + +The results and the test coverage of the end-to-end tests strongly depend on the tested plagiarisms. It is also important to use and test many different options of the API offered by JPlag, as these have a direct influence on the detection and are therefore also important for the change detection. + +To summarize +- (1) and (2) test normalization level +- (3) to (7) the token generation level + +If a result differs only in the options, it is very likely that the change is in the configuration of the `minimum_token_match`. +This means that if Option1 does not change in the result of the detection, but the result in Option2 does, this is the basis of the `minimum_token_match`. +``` +java: (1)SortAlgo-SortAlgo5 --> passed +java: (15)SortAlgo-SortAlgo5 --> failed +``` + + + diff --git a/docs/6.-Report-File-Generation.md b/docs/6.-Report-File-Generation.md new file mode 100644 index 000000000..9d303333b --- /dev/null +++ b/docs/6.-Report-File-Generation.md @@ -0,0 +1,85 @@ +`ReportObjectFactory` is the starting point and anchor for the reporting process. `CLI` calls `ReportObjectFactory#createAndSaveReport(result: JPlagResult, path: String)`, providing the factory with the result of the JPlag run and the desired target path for the report. +`ReportObjectFactory#createAndSaveReport` then goes on and delegates the creation of all necessary files and folders and consequently zips the report. + +## Report File Structure + +``` +result.zip +│ overview.json +│ +└───submissions +│ └───submissionId1 +│ │ submission1File1 +│ │ submission1File2 +│ │ ... +│ └───submissionId2 +│ │ submission2File1 +│ │ submission2File2 +│ │ ... +│ └───submissionId... +│ │ ... +│ └───submissionIdN +│ │ submissionNFile1 +│ │ submissionNFile2 +│ │ ... +│ submission1-submission2.json +│ submission1-submission3.json +│ submission1-submission....json +│ submission1-submissionN.json +│ submission2-submission3.json +│ submission2-submission....json +│ submission2-submissionN.json +│ ... +``` + +The report zip contains + +- overview.json + + - The `overview.json` encapsulates the main information from a JPlagResult such as base directory path, language, min- and max-metric, etc. The `overview.json` provides data to the `OverviewView.vue` that is first displayed after the report is dropped into the viewer. Corresponds to the Java record `OverviewReport`. + +- submissions + + - This folder contains all files of all submissions JPlag was run with. For each submission the `submissions` folder contains a subfolder with the name of the corresponding submission id. A subfolder for a submission contains all files of said submission. + These files are displayed in the `ComparisonView.vue` + +- comparison files + + - For each submission pair submission1 submission2 with ids submissionId1 and submissionId2, the report contains either submissionId1-submissionId2.json or submissionId2-submissionId1.json. This file contains information the comparison between the two submissions, such as the similarity and concrete matches. Corresponds to the Java record `ComparisonReport`. + +## Submission ids + +### Report Viewer +The `overview.json` contains a map that associates a submission id to its display name. +For internal use in the report viewer use only(!) the submission id. Whenever the name of a submission has to be displayed in the report viewer, the id has to be resolved to its display name first. The report viewer's vuex store provides a getter for this resolution. + +### JPlag +At the beginning of report generation a map and a function that associate a JPlag `Submission` to a submission id is built. Whenever you reference a submission in a report viewer DTO use this map/function to resolve the submission to its id. + +## Adding and displaying new attributes from JPlagResult + +The new design of JPlag reporting and viewing enables the easy addition of new attributes. Adding a new attribute follows the pattern: + +In JPlag: + +1. Introduce a new attribute to the Java DTO that represents the file you aim to change. +2. Define how the attribute is obtained from the JPlagResult. Do so either by introducing a new component that extracts the attribute from the `JPlagResult` or by modifying an existing component. `ReportObjectFactory` then calls this component and assigns the result to the attribute defined in 1. + +In the Report Viewer: + +3. Introduce the new attribute to the Typescript DTO. +4. Define how the attribute is extracted from the JSON file. +5. Display the attribute in the desired Vue component. + +### Example + +An example is provided in the following section which explains how new attributes can be introduced to the JPlagReport and then processed in the report viewer. In the following example we add the number of tokens per match to the JPlag report and view. + +Task: Adding the number of tokens in a match, which has to be displayed in the MatchesTable in the ComparisonView. + +1. Add `int tokens` to `Match.java` (sub-model of `ComparisonReport.java`) +2. Modify the existing component `ComparisonReportWriter.java` to additionally extract the number of tokens in a match from the `JPlagResult.java` + and save it in the Match DTO +3. Add `tokens: number` to `Match.ts` +4. Edit `ComparisonFactory.ts` to get the number of tokens from the JSON report file. [report-viewer] +5. Edit `MatchTable.vue` to display the tokens number in the `ComparisonView.vue`. \ No newline at end of file diff --git a/docs/7.-Clustering-of-Submissions.md b/docs/7.-Clustering-of-Submissions.md new file mode 100644 index 000000000..be1aee66a --- /dev/null +++ b/docs/7.-Clustering-of-Submissions.md @@ -0,0 +1,91 @@ +## Clustering Usage + +By default, JPlag is configured to perform a clustering of the submissions. +The clustering partitions the set of submissions into groups of similar submissions. +The found clusters can be used candidates for _potentially_ colluding groups. Each cluster has a strength score, that measures how _suspicious_ the cluster is compared to other clusters. + +### Disabling Clustering + +Clustering can take a long when there is a large number of submissions. +Users who are not interested in clustering can safely disable it with the `--cluster-skip` option. + +### Clustering Configuration + +Clustering can either be configured using the CLI options or programmatically using the `ClusteringOptions` class. Both options work analogously and share the same default values. + +__The clustering is designed to work out-of-the-box for running within the magnitude of about 50-500 submissions__, but it can be tweaked when problems occur. For more submissions it might be necessary to increase `Max-Runs` or `Bandwidth`, so that an appropriate number of clusters can be determined. + +| Group | Option | Description | Default | +| --- | --- | ----------- | - | +| General | Enable | Controls whether the clustering is run at all. | `true` | +| General | Algorithm | Which clustering algorithm to use.
      Agglomerative Clustering
      Agglomerative Clustering iteratively merges similar submissions bottom up. It usually requires manual tuning for its parameters to yield helpful clusters.
      Spectral Clustering
      Spectral Clustering is combined with Bayesian Optimization to execute the k-Means clustering algorithm multiple times, hopefully finding a \"good\" clustering automatically. Its default parameters should work O.K. in most cases.
      | Agglomerative Clustering | +| General | Metric | The similarity score between submissions to use during clustering. Each score is expressed in terms of the size of the submissions `A` and `B` and the size of their matched intersection `A ∩ B`.
      AVG (aka. Dice's coefficient)
      `AVG = 2 * (A ∩ B) / (A + B)`
      MAX (aka. overlap coefficient)
      `MAX = (A ∩ B) / min(A, B)` Compared to MAX, this prevents obfuscation when a collaborator bloats his submission with unrelated code.
      MIN (_deprecated_)
      `MIN = (A ∩ B) / max(A, B)`
      INTERSECTION (_experimental_)
      `INTERSECTION = A ∩ B`
      | AVG | +| Spectral | Bandwidth | For Spectral Clustering, Baysian Optimization is used to determine a fitting number of clusters. If a good clustering result is found during the search, numbers of clusters that differ by something in range of the bandwidth are also expected to good. Low values result in more exploration of the search space, high values in more exploitation of known results. | 20.0 | +| Spectral | Noise | The result of each k-Means run in the search for good clusterings is random. The noise level models the variance in the \"worth\" of these results. It also acts as a regularization constant. | 0.0025 | +| Spectral | Min-Runs | Minimum number of k-Means executions for spectral clustering. With these initial runs clustering sizes are explored. | 5 | +| Spectral | Max-Runs | Maximum number of k-Means executions during spectral clustering. Any execution after the initial (min-) runs tries to balance between exploration of unknown clustering sizes and exploitation of clustering sizes known as good. | 50 | +| Spectral | K-Means Iterations | Maximum number of iterations during each execution of the k-Means algorithm. | 200 | +| Agglomerative | Threshold | Only clusters with an inter-cluster-similarity greater than this threshold are merged during agglomerative clustering. | 0.2 | +| Agglomerative | inter-cluster-similarity | How to measure the similarity of two clusters during agglomerative clustering.
      MIN (aka. complete-linkage)
      Clusters are merged if all their submissions are similar.
      MAX (aka. single-linkage)
      Clusters are merged if there is a similar submission in both.
      AVERAGE (aka. average-linkage)
      Clusters are merged if their submissions are similar on average.
      | AVERAGE | +| Preprocessing | Pre-Processor | How the similarities are preprocessed prior to clustering. Spectral Clustering will probably not have good results without it.
      None
      No preprocessing.
      Cumulative Distribution Function (CDF)
      Before clustering, the value of the cumulative distribution function of all similarities is estimated. The similarities are multiplied with these estimates. This has the effect of suppressing similarities that are low compared to other similarities.
      Percentile
      Any similarity smaller than the given percentile will be suppressed during clustering.
      Threshold
      Any similarity smaller than the given threshold will be suppressed during clustering.
      | CDF | + +## Clustering Architecture + +All clustering related classes are contained within the `de.jplag.clustering(.*)` packages in the core project. + +The central idea behind the structure of clustering is the ease of use: To use the clustering calling code should only ever interact with the `ClusteringOptions`, `ClusteringFactory`, and `ClusteringResult` classes: + +```mermaid + +classDiagram + ClusteringFactory <.. CallingCode + ClusteringOptions <.. CallingCode : creates + ClusteringAdapter <.. ClusteringFactory + ClusteringAlgorithm <.. ClusteringAdapter : runs + ClusteringAlgorithm <.. ClusteringFactory : creates instances + ClusteringPreprocessor <.. ClusteringFactory : creates instances + PreprocessedClusteringAlgorithm <.. ClusteringFactory : creates + ClusteringOptions <-- ClusteringFactory + ClusteringAlgorithm <|-- PreprocessedClusteringAlgorithm + ClusteringAlgorithm <-- PreprocessedClusteringAlgorithm : delegates to + ClusteringPreprocessor ..o PreprocessedClusteringAlgorithm + class ClusteringFactory{ + getClusterings(List~JPlagComparison~ comparisons, ClusteringOptions options)$ ClusteringResult~Submission~ + } + class ClusteringOptions{ + } + class ClusteringAlgorithm { + <> + cluster(Matrix similarities) ClusteringResult~Integer~ + } + class ClusteringPreprocessor { + <> + preprocess(Matrix similarities) Matrix + } + class ClusteringAdapter{ + ClusteringAdapter(List~JplagComparison~ comparisons) + doClustering(ClusteringAlgorithm algorithm) ClusteringResult~Submission~ + } + class PreprocessedClusteringAlgorithm{ + cluster(Matrix similarities) ClusteringResult~Integer~ + } + class CallingCode{ + + } + +``` + +New clustering algorithms and preprocessors can be implemented using the `GenericClusteringAlgorithm` and `ClusteringPreprocessor` interfaces which operate on similarity matrices only. `ClusteringAdapter` handles the conversion between `de.jplag` classes and matrices. `PreprocessedClusteringAlgorithm` adds a preprocessor onto another `ClusteringAlgorithm`. +### Remarks on Spectral Clustering + +* based on [On Spectral Clustering: Analysis and an algorithm (Ng, Jordan & Weiss, 2001)](https://proceedings.neurips.cc/paper/2001/file/801272ee79cfde7fa5960571fee36b9b-Paper.pdf) +* automatic hyper-parameter search using Bayesian Optimization with a Gaussian Process as the surrogate model and L-BFGS for optimization on the surrogate +* the L-BFGS implementation is a pit of technical debt, [see here](https://github.com/jplag/JPlag/pull/281#discussion_r810171986). + + +### Integration Tests + +There are integration tests for the Spectral Clustering to verify, that a least in the case of two known sets of similarities the groups known to be colluders are found. However, these are considered to be sensitive data. The datasets are not available to the public and these tests can only be run by maintainers with access. + +To run these tests the contents of the [PseudonymizedReports](https://github.com/jplag/PseudonymizedReports) repository must added in the folder `jplag/src/test/resources/de/jplag/PseudonymizedReports`. + diff --git a/docs/Home.md b/docs/Home.md new file mode 100644 index 000000000..e7e2cbd2c --- /dev/null +++ b/docs/Home.md @@ -0,0 +1,40 @@ +

      + JPlag logo +

      + +## What is JPlag +JPlag is a system that finds similarities among multiple sets of source code files. This way it can detect software plagiarism and collusion in software development. JPlag does not merely compare bytes of text but is aware of programming language syntax and program structure and hence is robust against many kinds of attempts to disguise similarities between plagiarized files. JPlag currently supports Java, C#, C/C++, Python 3, Go, Rust, Kotlin, Swift, Scala, Scheme, EMF, and natural language text. + +JPlag is typically used to detect and thus discourage the unallowed copying of student exercise programs in programming education. But in principle, it can also be used to detect stolen software parts among large amounts of source text or modules that have been duplicated (and only slightly modified). JPlag has already played a part in several intellectual property cases where it has been successfully used by expert witnesses. + +[TODO]: <> (Link or visualize example report) + +**Just to make it clear**: JPlag does not compare to the internet! It is designed to find similarities among the student solutions, which is usually sufficient for computer programs. + +## History +Originally, JPlag was developed in 1996 by Guido Mahlpohl and others at the chair of Prof. Walter Tichy at Karlsruhe Institute of Technology (KIT). It was first documented in a [Tech Report](https://publikationen.bibliothek.kit.edu/542000) in 2000 and later more formally in the [Journal of Universal Computer Science](http://www.ipd.kit.edu/tichy/uploads/publikationen/16/finding_plagiarisms_among_a_set_of_progr_638847.pdf). Since 2015 JPlag is hosted here on GitHub. After 25 years of its creation, JPlag is still used frequently in many universities in different countries around the world. + +## Download JPlag +Download the latest version of JPlag [here](https://github.com/jplag/jplag/releases). If you encounter bugs or other issues, please report them [here](https://github.com/jplag/jplag/issues). + +## Using JPlag +Use JPlag via the CLI to analyze your set of source codes. You can display your results via [jplag.github.io](https://jplag.github.io/JPlag/). No data will leave your computer! More usage information can be found [here](https://github.com/jplag/JPlag/wiki/1.-How-to-Use-JPlag). + +## Include JPlag as a Dependency +JPlag is released on [Maven Central](https://search.maven.org/search?q=de.jplag), it can be included as follows: +```xml + + de.jplag + jplag + +``` + +## JPlag legacy version +In case you depend on the legacy version of JPlag we refer to the [legacy release v2.12.1](https://github.com/jplag/jplag/releases/tag/v2.12.1-SNAPSHOT) and the [legacy branch](https://github.com/jplag/jplag/tree/legacy). Note that the legacy CLI usage is slightly different. + +The following features are only available in version v4.0.0 and onwards: +* a modern web-based UI +* a simplified command-line interface +* support for Kotlin, Scala, Go, Rust, and R +* support for Java 17 language features +* a Java API for third-party integration From 44a177ab4f3de4bb8ea076fc648d070a44068dce Mon Sep 17 00:00:00 2001 From: Alexander Milster Date: Tue, 11 Apr 2023 13:37:42 +0200 Subject: [PATCH 053/132] Basic implementation for file encoding detection --- .../java/de/jplag/options/JPlagOptions.java | 4 +- language-api/pom.xml | 5 + .../main/java/de/jplag/util/FileUtils.java | 91 ++++++++++++++++++ .../test/java/de/jplag/util/FileUtilTest.java | 35 +++++++ .../de/jplag/fileReaderTests/ISO-8859-1 | 1 + .../de/jplag/fileReaderTests/UTF-16LE | Bin 0 -> 106 bytes .../de/jplag/fileReaderTests/UTF-32BE | Bin 0 -> 208 bytes .../resources/de/jplag/fileReaderTests/UTF-8 | 1 + 8 files changed, 135 insertions(+), 2 deletions(-) create mode 100644 language-api/src/main/java/de/jplag/util/FileUtils.java create mode 100644 language-api/src/test/java/de/jplag/util/FileUtilTest.java create mode 100644 language-api/src/test/resources/de/jplag/fileReaderTests/ISO-8859-1 create mode 100644 language-api/src/test/resources/de/jplag/fileReaderTests/UTF-16LE create mode 100644 language-api/src/test/resources/de/jplag/fileReaderTests/UTF-32BE create mode 100644 language-api/src/test/resources/de/jplag/fileReaderTests/UTF-8 diff --git a/core/src/main/java/de/jplag/options/JPlagOptions.java b/core/src/main/java/de/jplag/options/JPlagOptions.java index e392fefdf..87da296c8 100644 --- a/core/src/main/java/de/jplag/options/JPlagOptions.java +++ b/core/src/main/java/de/jplag/options/JPlagOptions.java @@ -2,7 +2,6 @@ import java.io.BufferedReader; import java.io.File; -import java.io.FileReader; import java.io.IOException; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; @@ -20,6 +19,7 @@ import de.jplag.Language; import de.jplag.clustering.ClusteringOptions; import de.jplag.exceptions.BasecodeException; +import de.jplag.util.FileUtils; /** * This record defines the options to configure {@link JPlag}. @@ -184,7 +184,7 @@ public Integer minimumTokenMatch() { } private Set readExclusionFile(final String exclusionFileName) { - try (BufferedReader reader = new BufferedReader(new FileReader(exclusionFileName, JPlagOptions.CHARSET))) { + try (BufferedReader reader = FileUtils.openFileReader(new File(exclusionFileName))) { final var excludedFileNames = reader.lines().collect(Collectors.toSet()); if (logger.isDebugEnabled()) { logger.debug("Excluded files:{}{}", System.lineSeparator(), String.join(System.lineSeparator(), excludedFileNames)); diff --git a/language-api/pom.xml b/language-api/pom.xml index 2476d5e21..623efc192 100644 --- a/language-api/pom.xml +++ b/language-api/pom.xml @@ -15,5 +15,10 @@ org.kohsuke.metainf-services metainf-services + + com.ibm.icu + icu4j-charset + 68.1 + diff --git a/language-api/src/main/java/de/jplag/util/FileUtils.java b/language-api/src/main/java/de/jplag/util/FileUtils.java new file mode 100644 index 000000000..6f0748b6b --- /dev/null +++ b/language-api/src/main/java/de/jplag/util/FileUtils.java @@ -0,0 +1,91 @@ +package de.jplag.util; + +import java.io.*; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.util.stream.Collectors; + +import com.ibm.icu.text.CharsetDetector; +import com.ibm.icu.text.CharsetMatch; + +/** + * Encapsulates various interactions with files to prevent issues with file encodings. + */ +public class FileUtils { + private static final Charset defaultOutputCharset = StandardCharsets.UTF_8; + + /** + * Opens a file reader, guessing the charset from the content. Also, if the file is encoded in a UTF* encoding and a bom + * exists, it is removed from the reader. + * @param file The file to open for read + * @return The reader, configured with the best matching charset + * @throws IOException If the file does not exist for is not readable + */ + public static BufferedReader openFileReader(File file) throws IOException { + InputStream stream = new BufferedInputStream(new FileInputStream(file)); + Charset charset = detectCharset(stream); + BufferedReader reader = new BufferedReader(new FileReader(file, charset)); + removeBom(reader, charset); + return reader; + } + + /** + * Reads the contents of a file into a single string. + * @param file The file to read + * @return The files content as a string + * @throws IOException If an IO error occurs + * @see FileUtils#openFileReader(File) + */ + public static String readFileContent(File file) throws IOException { + try (BufferedReader reader = openFileReader(file)) { + return reader.lines().collect(Collectors.joining(System.lineSeparator())); + } + } + + /** + * Removes the byte order mark from the beginning of the stream, if it exists and the charset is a UTF* charset. For + * details see: Wikipedia + * @param reader The reader to remove the bom from + * @throws IOException If an IO error occurs. + */ + private static void removeBom(BufferedReader reader, Charset charset) throws IOException { + if (charset.name().toUpperCase().startsWith("UTF")) { + reader.mark(10); + if (reader.read() != '\uFEFF') { + reader.reset(); + } + } + } + + /** + * Detects the charset of a file. Prefer using {@link #openFileReader(File)} or {@link #readFileContent(File)} if you + * are only interested in the content. + * @param file The file to detect + * @return The most probable charset + * @throws IOException If an IO error occurs + */ + public static Charset detectCharset(File file) throws IOException { + try (InputStream stream = new BufferedInputStream(new FileInputStream((file)))) { + return detectCharset(stream); + } + } + + private static Charset detectCharset(InputStream stream) throws IOException { + CharsetDetector charsetDetector = new CharsetDetector(); + + charsetDetector.setText(stream); + + CharsetMatch match = charsetDetector.detect(); + return Charset.forName(match.getName()); + } + + /** + * Opens a file writer, using the default charset for JPlag + * @param file The file to write + * @return The file writer, configured with the default charset + * @throws IOException If the file does not exist or is not writable + */ + public static Writer openFileWriter(File file) throws IOException { + return new BufferedWriter(new FileWriter(file, defaultOutputCharset)); + } +} diff --git a/language-api/src/test/java/de/jplag/util/FileUtilTest.java b/language-api/src/test/java/de/jplag/util/FileUtilTest.java new file mode 100644 index 000000000..52ab58e6a --- /dev/null +++ b/language-api/src/test/java/de/jplag/util/FileUtilTest.java @@ -0,0 +1,35 @@ +package de.jplag.util; + +import java.io.File; +import java.io.IOException; +import java.nio.charset.Charset; +import java.nio.file.Path; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; + +public class FileUtilTest { + private static final Path TEST_FILE_LOCATION = Path.of("src", "test", "resources", "de", "jplag", "fileReaderTests"); + + private static final String expectedFileContent = "Some ascii characters and some others: ä#+öü%&(/)?=?"; + + @ParameterizedTest + @MethodSource("searchTestFiles") + public void testReadFile(File file) throws IOException { + String found = FileUtils.readFileContent(file); + + Assertions.assertEquals(expectedFileContent, found, "File contains unexpected content: " + file.getAbsolutePath()); + } + + @ParameterizedTest + @MethodSource("searchTestFiles") + public void testCharsetDetection(File file) throws IOException { + Assertions.assertEquals(Charset.forName(file.getName()), FileUtils.detectCharset(file), + "Wrong charset assumed for: " + file.getAbsolutePath()); + } + + public static File[] searchTestFiles() { + return TEST_FILE_LOCATION.toFile().listFiles(); + } +} diff --git a/language-api/src/test/resources/de/jplag/fileReaderTests/ISO-8859-1 b/language-api/src/test/resources/de/jplag/fileReaderTests/ISO-8859-1 new file mode 100644 index 000000000..292674145 --- /dev/null +++ b/language-api/src/test/resources/de/jplag/fileReaderTests/ISO-8859-1 @@ -0,0 +1 @@ +Some ascii characters and some others: �#+��%&(/)?=? \ No newline at end of file diff --git a/language-api/src/test/resources/de/jplag/fileReaderTests/UTF-16LE b/language-api/src/test/resources/de/jplag/fileReaderTests/UTF-16LE new file mode 100644 index 0000000000000000000000000000000000000000..642cfe74c2f26dbb6d465c9f38d3f6fd48cc4acc GIT binary patch literal 106 zcmezWFPI^pA(tVQL4hHWp_n0=ArlA{fOH0sUj)R-3?)F>BA^^dMIJ*6kS>Pn$p^}2 iK;^A~GEW$k8MGO`G5ldrWl&?#V9;mKWUyzj1!4fvxD?v} literal 0 HcmV?d00001 diff --git a/language-api/src/test/resources/de/jplag/fileReaderTests/UTF-32BE b/language-api/src/test/resources/de/jplag/fileReaderTests/UTF-32BE new file mode 100644 index 0000000000000000000000000000000000000000..f3cd324242dc40132ca74ac4943fd8c1c4576489 GIT binary patch literal 208 zcmZ9_K?*=n5XSL2%9MyK?3EnC3FIjjC>+@ttStP$dJ9?nnr7a7^USO>vkePIIC>ly yFh?HyiK_QKR_&ZnRx{-s+F23c(ER_8dS!L Date: Tue, 11 Apr 2023 13:17:13 +0200 Subject: [PATCH 054/132] Treat deque as stack --- .../de/jplag/semantics/VariableRegistry.java | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java b/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java index a3209cea1..4860b1af5 100644 --- a/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java +++ b/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java @@ -11,6 +11,7 @@ * Registry of variables to assist in generating token semantics. */ public class VariableRegistry { + // private CodeSemantics semantics; private Map fileVariables; private Deque> classVariables; // map class name to map of variable names to variables private Map> localVariables; // map local variable name to stack of variables @@ -68,30 +69,30 @@ public void setMutableWrite(boolean mutableWrite) { * Enter a class. */ public void enterClass() { - classVariables.addLast(new HashMap<>()); + classVariables.push(new HashMap<>()); } /** * Exit a class. This causes all variables bound to the current class to no longer be visible. */ public void exitClass() { - classVariables.removeLast(); + classVariables.pop(); } /** * Enter a local scope. */ public void enterLocalScope() { - localVariablesByScope.addLast(new HashSet<>()); + localVariablesByScope.push(new HashSet<>()); } /** * Exit a local scope. This causes all variables bound to the current local scope to no longer be visible. */ public void exitLocalScope() { - for (String variableName : localVariablesByScope.removeLast()) { + for (String variableName : localVariablesByScope.pop()) { Deque variableStack = localVariables.get(variableName); - variableStack.removeLast(); + variableStack.pop(); if (variableStack.isEmpty()) localVariables.remove(variableName); } @@ -107,11 +108,11 @@ public void registerVariable(String variableName, VariableScope scope, boolean m Variable variable = new Variable(variableName, scope, mutable); switch (scope) { case FILE -> fileVariables.put(variableName, variable); - case CLASS -> classVariables.getLast().put(variableName, variable); + case CLASS -> classVariables.getFirst().put(variableName, variable); case LOCAL -> { localVariables.putIfAbsent(variableName, new LinkedList<>()); - localVariables.get(variableName).addLast(variable); - localVariablesByScope.getLast().add(variableName); + localVariables.get(variableName).push(variable); + localVariablesByScope.getFirst().add(variableName); } } } @@ -148,7 +149,7 @@ public void registerVariableAccess(String variableName, boolean isClassVariable, */ public void addAllNonLocalVariablesAsReads(CodeSemantics semantics) { Set nonLocalVariables = new HashSet<>(fileVariables.values()); - nonLocalVariables.addAll(classVariables.getLast().values()); + nonLocalVariables.addAll(classVariables.getFirst().values()); for (Variable variable : nonLocalVariables) semantics.addRead(variable); } @@ -156,7 +157,7 @@ public void addAllNonLocalVariablesAsReads(CodeSemantics semantics) { private Variable getVariable(String variableName) { Deque variableIdStack = localVariables.get(variableName); if (variableIdStack != null) - return variableIdStack.getLast(); // stack is never empty + return variableIdStack.getFirst(); // stack is never empty Variable variable = getClassVariable(variableName); return variable != null ? variable : fileVariables.get(variableName); /* @@ -168,7 +169,7 @@ private Variable getVariable(String variableName) { } private Variable getClassVariable(String variableName) { - Map currentClassVariables = classVariables.peekLast(); + Map currentClassVariables = classVariables.peek(); return currentClassVariables != null ? currentClassVariables.get(variableName) : null; } } From 0995248ddfa807e4511696a47f2b272a011258fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Tue, 11 Apr 2023 13:59:56 +0200 Subject: [PATCH 055/132] Simplify token semantics generation --- .../de/jplag/semantics/VariableRegistry.java | 20 +- .../java/TokenGeneratingTreeScanner.java | 389 +++++++----------- 2 files changed, 159 insertions(+), 250 deletions(-) diff --git a/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java b/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java index 4860b1af5..a411bab32 100644 --- a/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java +++ b/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java @@ -11,7 +11,7 @@ * Registry of variables to assist in generating token semantics. */ public class VariableRegistry { - // private CodeSemantics semantics; + private CodeSemantics semantics; private Map fileVariables; private Deque> classVariables; // map class name to map of variable names to variables private Map> localVariables; // map local variable name to stack of variables @@ -98,6 +98,14 @@ public void exitLocalScope() { } } + /** + * Update the current semantics. + * @param semantics are the new current semantics. + */ + public void updateSemantics(CodeSemantics semantics) { + this.semantics = semantics; + } + /** * Register a variable. * @param variableName The variable's name. @@ -118,14 +126,13 @@ public void registerVariable(String variableName, VariableScope scope, boolean m } /** - * Register a variable access, more precisely: Add a variable access to a CodeSemantics instance. The type of the access + * Register a variable access, more precisely: Add a variable access to the current CodeSemantics instance. The type of the access * can be set with setNextVariableAccessType. By default, its type is read. * @param variableName The variable's name. * @param isClassVariable Whether the variable is a class variable. This is true if a variable is qualified with the * "this" keyword in Java, for example. - * @param semantics The CodeSemantics instance the variable access is added to. */ - public void registerVariableAccess(String variableName, boolean isClassVariable, CodeSemantics semantics) { + public void registerVariableAccess(String variableName, boolean isClassVariable) { if (ignoreNextVariableAccess) { ignoreNextVariableAccess = false; return; @@ -144,10 +151,9 @@ public void registerVariableAccess(String variableName, boolean isClassVariable, } /** - * Add all non-local visible variables as reads to the CodeSemantics instance. - * @param semantics The CodeSemantics instance. + * Add all non-local visible variables as reads to the current CodeSemantics instance. */ - public void addAllNonLocalVariablesAsReads(CodeSemantics semantics) { + public void addAllNonLocalVariablesAsReads() { Set nonLocalVariables = new HashSet<>(fileVariables.values()); nonLocalVariables.addAll(classVariables.getFirst().values()); for (Variable variable : nonLocalVariables) diff --git a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java index 504f70369..f9cb262f8 100644 --- a/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java +++ b/languages/java/src/main/java/de/jplag/java/TokenGeneratingTreeScanner.java @@ -59,7 +59,7 @@ import com.sun.source.util.SourcePositions; import com.sun.source.util.TreeScanner; -final class TokenGeneratingTreeScanner extends TreeScanner { +final class TokenGeneratingTreeScanner extends TreeScanner { private final File file; private final Parser parser; private final LineMap map; @@ -90,6 +90,7 @@ public List getParsingExceptions() { public void addToken(TokenType type, File file, long line, long column, long length, CodeSemantics semantics) { parser.add(new Token(type, file, (int) line, (int) column, (int) length, semantics)); + variableRegistry.updateSemantics(semantics); } /** @@ -118,7 +119,7 @@ private boolean isMutable(Tree classTree) { } @Override - public Void visitBlock(BlockTree node, CodeSemantics semantics) { + public Void visitBlock(BlockTree node, Void unused) { // kind of weird since in the case of for loops and catches, two scopes are introduced // but I'm pretty sure that's how Java does it internally as well variableRegistry.enterLocalScope(); @@ -128,7 +129,7 @@ public Void visitBlock(BlockTree node, CodeSemantics semantics) { } @Override - public Void visitClass(ClassTree node, CodeSemantics semantics) { + public Void visitClass(ClassTree node, Void unused) { // not super accurate variableRegistry.registerVariable(node.getSimpleName().toString(), VariableScope.FILE, true); variableRegistry.enterClass(); @@ -143,7 +144,7 @@ public Void visitClass(ClassTree node, CodeSemantics semantics) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - semantics = CodeSemantics.createControl(); + CodeSemantics semantics = CodeSemantics.createControl(); if (node.getKind() == Tree.Kind.ENUM) { addToken(JavaTokenType.J_ENUM_BEGIN, start, 4, semantics); } else if (node.getKind() == Tree.Kind.INTERFACE) { @@ -155,12 +156,7 @@ public Void visitClass(ClassTree node, CodeSemantics semantics) { } else if (node.getKind() == Tree.Kind.CLASS) { addToken(JavaTokenType.J_CLASS_BEGIN, start, 5, semantics); } - scan(node.getModifiers(), semantics); - scan(node.getTypeParameters(), semantics); - scan(node.getExtendsClause(), semantics); - scan(node.getImplementsClause(), semantics); - scan(node.getPermitsClause(), semantics); - scan(node.getMembers(), null); + super.visitClass(node, null); JavaTokenType tokenType = switch (node.getKind()) { case ENUM -> JavaTokenType.J_ENUM_END; @@ -179,341 +175,269 @@ public Void visitClass(ClassTree node, CodeSemantics semantics) { } @Override - public Void visitImport(ImportTree node, CodeSemantics semantics) { + public Void visitImport(ImportTree node, Void unused) { long start = positions.getStartPosition(ast, node); - semantics = CodeSemantics.createKeep(); - addToken(JavaTokenType.J_IMPORT, start, 6, semantics); - super.visitImport(node, semantics); - return null; + addToken(JavaTokenType.J_IMPORT, start, 6, CodeSemantics.createKeep()); + return super.visitImport(node, null); } @Override - public Void visitPackage(PackageTree node, CodeSemantics semantics) { + public Void visitPackage(PackageTree node, Void unused) { long start = positions.getStartPosition(ast, node); - semantics = CodeSemantics.createControl(); - addToken(JavaTokenType.J_PACKAGE, start, 7, semantics); - super.visitPackage(node, semantics); - return null; + addToken(JavaTokenType.J_PACKAGE, start, 7, CodeSemantics.createControl()); + return super.visitPackage(node, null); } @Override - public Void visitMethod(MethodTree node, CodeSemantics semantics) { + public Void visitMethod(MethodTree node, Void unused) { variableRegistry.enterLocalScope(); long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - semantics = CodeSemantics.createControl(); - addToken(JavaTokenType.J_METHOD_BEGIN, start, node.getName().length(), semantics); - scan(node.getModifiers(), semantics); - scan(node.getReturnType(), semantics); - scan(node.getTypeParameters(), semantics); - scan(node.getParameters(), semantics); - scan(node.getReceiverParameter(), semantics); - scan(node.getThrows(), semantics); - scan(node.getBody(), null); - semantics = CodeSemantics.createControl(); - variableRegistry.addAllNonLocalVariablesAsReads(semantics); - addToken(JavaTokenType.J_METHOD_END, end, 1, semantics); + addToken(JavaTokenType.J_METHOD_BEGIN, start, node.getName().length(), CodeSemantics.createControl()); + super.visitMethod(node, null); + addToken(JavaTokenType.J_METHOD_END, end, 1, CodeSemantics.createControl()); + variableRegistry.addAllNonLocalVariablesAsReads(); variableRegistry.exitLocalScope(); return null; } @Override - public Void visitSynchronized(SynchronizedTree node, CodeSemantics semantics) { + public Void visitSynchronized(SynchronizedTree node, Void unused) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - semantics = CodeSemantics.createControl(); - addToken(JavaTokenType.J_SYNC_BEGIN, start, 12, semantics); - super.visitSynchronized(node, semantics); - semantics = CodeSemantics.createControl(); - addToken(JavaTokenType.J_SYNC_END, end, 1, semantics); + addToken(JavaTokenType.J_SYNC_BEGIN, start, 12, CodeSemantics.createControl()); + super.visitSynchronized(node, null); + addToken(JavaTokenType.J_SYNC_END, end, 1, CodeSemantics.createControl()); return null; } @Override - public Void visitDoWhileLoop(DoWhileLoopTree node, CodeSemantics semantics) { + public Void visitDoWhileLoop(DoWhileLoopTree node, Void unused) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - semantics = CodeSemantics.createLoopBegin(); - addToken(JavaTokenType.J_DO_BEGIN, start, 2, semantics); + addToken(JavaTokenType.J_DO_BEGIN, start, 2, CodeSemantics.createLoopBegin()); scan(node.getStatement(), null); - semantics = CodeSemantics.createLoopEnd(); - addToken(JavaTokenType.J_DO_END, end, 1, semantics); - scan(node.getCondition(), semantics); + addToken(JavaTokenType.J_DO_END, end, 1, CodeSemantics.createLoopEnd()); + scan(node.getCondition(), null); return null; } @Override - public Void visitWhileLoop(WhileLoopTree node, CodeSemantics semantics) { + public Void visitWhileLoop(WhileLoopTree node, Void unused) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - semantics = CodeSemantics.createLoopBegin(); - addToken(JavaTokenType.J_WHILE_BEGIN, start, 5, semantics); - scan(node.getCondition(), semantics); - scan(node.getStatement(), null); - semantics = CodeSemantics.createLoopEnd(); - addToken(JavaTokenType.J_WHILE_END, end, 1, semantics); + addToken(JavaTokenType.J_WHILE_BEGIN, start, 5, CodeSemantics.createLoopBegin()); + super.visitWhileLoop(node, null); + addToken(JavaTokenType.J_WHILE_END, end, 1, CodeSemantics.createLoopEnd()); return null; } @Override - public Void visitForLoop(ForLoopTree node, CodeSemantics semantics) { + public Void visitForLoop(ForLoopTree node, Void unused) { variableRegistry.enterLocalScope(); long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - semantics = CodeSemantics.createLoopBegin(); - addToken(JavaTokenType.J_FOR_BEGIN, start, 3, semantics); - scan(node.getInitializer(), semantics); - scan(node.getCondition(), semantics); - scan(node.getUpdate(), semantics); - scan(node.getStatement(), null); - semantics = CodeSemantics.createLoopEnd(); - addToken(JavaTokenType.J_FOR_END, end, 1, semantics); + addToken(JavaTokenType.J_FOR_BEGIN, start, 3, CodeSemantics.createLoopBegin()); + super.visitForLoop(node, null); + addToken(JavaTokenType.J_FOR_END, end, 1, CodeSemantics.createLoopEnd()); variableRegistry.exitLocalScope(); return null; } @Override - public Void visitEnhancedForLoop(EnhancedForLoopTree node, CodeSemantics semantics) { + public Void visitEnhancedForLoop(EnhancedForLoopTree node, Void unused) { variableRegistry.enterLocalScope(); long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - semantics = CodeSemantics.createLoopBegin(); - addToken(JavaTokenType.J_FOR_BEGIN, start, 3, semantics); - scan(node.getVariable(), semantics); - scan(node.getExpression(), semantics); - scan(node.getStatement(), null); - semantics = CodeSemantics.createLoopEnd(); - addToken(JavaTokenType.J_FOR_END, end, 1, semantics); + addToken(JavaTokenType.J_FOR_BEGIN, start, 3, CodeSemantics.createLoopBegin()); + super.visitEnhancedForLoop(node, null); + addToken(JavaTokenType.J_FOR_END, end, 1, CodeSemantics.createLoopEnd()); variableRegistry.exitLocalScope(); return null; } @Override - public Void visitSwitch(SwitchTree node, CodeSemantics semantics) { + public Void visitSwitch(SwitchTree node, Void unused) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - semantics = CodeSemantics.createControl(); - addToken(JavaTokenType.J_SWITCH_BEGIN, start, 6, semantics); - scan(node.getExpression(), semantics); - scan(node.getCases(), null); - semantics = CodeSemantics.createControl(); - addToken(JavaTokenType.J_SWITCH_END, end, 1, semantics); + addToken(JavaTokenType.J_SWITCH_BEGIN, start, 6, CodeSemantics.createControl()); + super.visitSwitch(node, null); + addToken(JavaTokenType.J_SWITCH_END, end, 1, CodeSemantics.createControl()); return null; } @Override - public Void visitSwitchExpression(SwitchExpressionTree node, CodeSemantics semantics) { + public Void visitSwitchExpression(SwitchExpressionTree node, Void unused) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - semantics = CodeSemantics.createControl(); - addToken(JavaTokenType.J_SWITCH_BEGIN, start, 6, semantics); - scan(node.getExpression(), semantics); - scan(node.getCases(), null); - semantics = CodeSemantics.createControl(); - addToken(JavaTokenType.J_SWITCH_END, end, 1, semantics); + addToken(JavaTokenType.J_SWITCH_BEGIN, start, 6, CodeSemantics.createControl()); + super.visitSwitchExpression(node, null); + addToken(JavaTokenType.J_SWITCH_END, end, 1, CodeSemantics.createControl()); return null; } @Override - public Void visitCase(CaseTree node, CodeSemantics semantics) { + public Void visitCase(CaseTree node, Void unused) { long start = positions.getStartPosition(ast, node); - semantics = CodeSemantics.createControl(); - addToken(JavaTokenType.J_CASE, start, 4, semantics); - scan(node.getExpressions(), semantics); - if (node.getCaseKind() == CaseTree.CaseKind.RULE) { - scan(node.getBody(), semantics); // case -> result, in switch expression - } else { - scan(node.getStatements(), null); // in normal switch - } - return null; + addToken(JavaTokenType.J_CASE, start, 4, CodeSemantics.createControl()); + return super.visitCase(node, null); } @Override - public Void visitTry(TryTree node, CodeSemantics semantics) { + public Void visitTry(TryTree node, Void unused) { long start = positions.getStartPosition(ast, node); - semantics = CodeSemantics.createControl(); - addToken(JavaTokenType.J_TRY_BEGIN, start, 3, semantics); - scan(node.getResources(), semantics); + addToken(JavaTokenType.J_TRY_BEGIN, start, 3, CodeSemantics.createControl()); + scan(node.getResources(), null); scan(node.getBlock(), null); long end = positions.getEndPosition(ast, node); - semantics = CodeSemantics.createControl(); - addToken(JavaTokenType.J_TRY_END, end, 1, semantics); + addToken(JavaTokenType.J_TRY_END, end, 1, CodeSemantics.createControl()); scan(node.getCatches(), null); if (node.getFinallyBlock() != null) { start = positions.getStartPosition(ast, node.getFinallyBlock()); - semantics = CodeSemantics.createControl(); - addToken(JavaTokenType.J_FINALLY_BEGIN, start, 3, semantics); + addToken(JavaTokenType.J_FINALLY_BEGIN, start, 3, CodeSemantics.createControl()); scan(node.getFinallyBlock(), null); end = positions.getEndPosition(ast, node.getFinallyBlock()); - semantics = CodeSemantics.createControl(); - addToken(JavaTokenType.J_FINALLY_END, end, 1, semantics); + addToken(JavaTokenType.J_FINALLY_END, end, 1, CodeSemantics.createControl()); } - return null; // return value isn't used + return null; } @Override - public Void visitCatch(CatchTree node, CodeSemantics semantics) { + public Void visitCatch(CatchTree node, Void unused) { variableRegistry.enterLocalScope(); long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - semantics = CodeSemantics.createControl(); - addToken(JavaTokenType.J_CATCH_BEGIN, start, 5, semantics); - super.visitCatch(node, null); // can leave this since catch parameter is variable declaration and thus always generates a token - semantics = CodeSemantics.createControl(); - addToken(JavaTokenType.J_CATCH_END, end, 1, semantics); + addToken(JavaTokenType.J_CATCH_BEGIN, start, 5, CodeSemantics.createControl()); + super.visitCatch(node, null); + addToken(JavaTokenType.J_CATCH_END, end, 1, CodeSemantics.createControl()); variableRegistry.exitLocalScope(); return null; } @Override - public Void visitIf(IfTree node, CodeSemantics semantics) { + public Void visitIf(IfTree node, Void unused) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - semantics = CodeSemantics.createControl(); - addToken(JavaTokenType.J_IF_BEGIN, start, 2, semantics); - scan(node.getCondition(), semantics); + addToken(JavaTokenType.J_IF_BEGIN, start, 2, CodeSemantics.createControl()); + scan(node.getCondition(), null); scan(node.getThenStatement(), null); if (node.getElseStatement() != null) { start = positions.getStartPosition(ast, node.getElseStatement()); - semantics = CodeSemantics.createControl(); - addToken(JavaTokenType.J_ELSE, start, 4, semantics); + addToken(JavaTokenType.J_ELSE, start, 4, CodeSemantics.createControl()); } scan(node.getElseStatement(), null); - semantics = CodeSemantics.createControl(); - addToken(JavaTokenType.J_IF_END, end, 1, semantics); + addToken(JavaTokenType.J_IF_END, end, 1, CodeSemantics.createControl()); return null; } @Override - public Void visitBreak(BreakTree node, CodeSemantics semantics) { + public Void visitBreak(BreakTree node, Void unused) { long start = positions.getStartPosition(ast, node); - semantics = CodeSemantics.createControl(); - addToken(JavaTokenType.J_BREAK, start, 5, semantics); - super.visitBreak(node, semantics); - return null; + addToken(JavaTokenType.J_BREAK, start, 5, CodeSemantics.createControl()); + return super.visitBreak(node, null); } @Override - public Void visitContinue(ContinueTree node, CodeSemantics semantics) { + public Void visitContinue(ContinueTree node, Void unused) { long start = positions.getStartPosition(ast, node); - semantics = CodeSemantics.createControl(); - addToken(JavaTokenType.J_CONTINUE, start, 8, semantics); - super.visitContinue(node, semantics); - return null; + addToken(JavaTokenType.J_CONTINUE, start, 8, CodeSemantics.createControl()); + return super.visitContinue(node, null); } @Override - public Void visitReturn(ReturnTree node, CodeSemantics semantics) { + public Void visitReturn(ReturnTree node, Void unused) { long start = positions.getStartPosition(ast, node); - semantics = CodeSemantics.createControl(); - addToken(JavaTokenType.J_RETURN, start, 6, semantics); - super.visitReturn(node, semantics); - return null; + addToken(JavaTokenType.J_RETURN, start, 6, CodeSemantics.createControl()); + return super.visitReturn(node, null); } @Override - public Void visitThrow(ThrowTree node, CodeSemantics semantics) { + public Void visitThrow(ThrowTree node, Void unused) { long start = positions.getStartPosition(ast, node); - semantics = CodeSemantics.createControl(); - addToken(JavaTokenType.J_THROW, start, 5, semantics); - super.visitThrow(node, semantics); - return null; + addToken(JavaTokenType.J_THROW, start, 5, CodeSemantics.createControl()); + return super.visitThrow(node, null); } @Override - public Void visitNewClass(NewClassTree node, CodeSemantics semantics) { + public Void visitNewClass(NewClassTree node, Void unused) { long start = positions.getStartPosition(ast, node); if (node.getTypeArguments().size() > 0) { - semantics = new CodeSemantics(); - addToken(JavaTokenType.J_GENERIC, start, 3 + node.getIdentifier().toString().length(), semantics); + addToken(JavaTokenType.J_GENERIC, start, 3 + node.getIdentifier().toString().length(), new CodeSemantics()); } - semantics = new CodeSemantics(); - addToken(JavaTokenType.J_NEWCLASS, start, 3, semantics); - super.visitNewClass(node, semantics); + addToken(JavaTokenType.J_NEWCLASS, start, 3, new CodeSemantics()); + super.visitNewClass(node, null); return null; } @Override - public Void visitTypeParameter(TypeParameterTree node, CodeSemantics semantics) { + public Void visitTypeParameter(TypeParameterTree node, Void unused) { long start = positions.getStartPosition(ast, node); // This is odd, but also done like this in Java 1.7 - semantics = new CodeSemantics(); - addToken(JavaTokenType.J_GENERIC, start, 1, semantics); - super.visitTypeParameter(node, semantics); - return null; + addToken(JavaTokenType.J_GENERIC, start, 1, new CodeSemantics()); + return super.visitTypeParameter(node, null); } @Override - public Void visitNewArray(NewArrayTree node, CodeSemantics semantics) { + public Void visitNewArray(NewArrayTree node, Void unused) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - semantics = new CodeSemantics(); - addToken(JavaTokenType.J_NEWARRAY, start, 3, semantics); - scan(node.getType(), semantics); - scan(node.getDimensions(), semantics); + addToken(JavaTokenType.J_NEWARRAY, start, 3, new CodeSemantics()); + scan(node.getType(), null); + scan(node.getDimensions(), null); boolean hasInit = node.getInitializers() != null && !node.getInitializers().isEmpty(); if (hasInit) { start = positions.getStartPosition(ast, node.getInitializers().get(0)); - semantics = new CodeSemantics(); - addToken(JavaTokenType.J_ARRAY_INIT_BEGIN, start, 1, semantics); + addToken(JavaTokenType.J_ARRAY_INIT_BEGIN, start, 1, new CodeSemantics()); } - scan(node.getInitializers(), semantics); + scan(node.getInitializers(), null); // super method has annotation processing but we have it disabled anyways if (hasInit) { - semantics = new CodeSemantics(); - addToken(JavaTokenType.J_ARRAY_INIT_END, end, 1, semantics); + addToken(JavaTokenType.J_ARRAY_INIT_END, end, 1, new CodeSemantics()); } return null; } @Override - public Void visitAssignment(AssignmentTree node, CodeSemantics semantics) { + public Void visitAssignment(AssignmentTree node, Void unused) { long start = positions.getStartPosition(ast, node); - semantics = new CodeSemantics(); - addToken(JavaTokenType.J_ASSIGN, start, 1, semantics); + addToken(JavaTokenType.J_ASSIGN, start, 1, new CodeSemantics()); variableRegistry.setNextVariableAccessType(VariableAccessType.WRITE); - super.visitAssignment(node, semantics); - return null; + return super.visitAssignment(node, null); } @Override - public Void visitCompoundAssignment(CompoundAssignmentTree node, CodeSemantics semantics) { + public Void visitCompoundAssignment(CompoundAssignmentTree node, Void unused) { long start = positions.getStartPosition(ast, node); - semantics = new CodeSemantics(); - addToken(JavaTokenType.J_ASSIGN, start, 1, semantics); + addToken(JavaTokenType.J_ASSIGN, start, 1, new CodeSemantics()); variableRegistry.setNextVariableAccessType(VariableAccessType.READ_WRITE); - super.visitCompoundAssignment(node, semantics); - return null; + return super.visitCompoundAssignment(node, null); } @Override - public Void visitUnary(UnaryTree node, CodeSemantics semantics) { - semantics = new CodeSemantics(); + public Void visitUnary(UnaryTree node, Void unused) { if (Set.of(Tree.Kind.PREFIX_INCREMENT, Tree.Kind.POSTFIX_INCREMENT, Tree.Kind.PREFIX_DECREMENT, Tree.Kind.POSTFIX_DECREMENT) .contains(node.getKind())) { long start = positions.getStartPosition(ast, node); - addToken(JavaTokenType.J_ASSIGN, start, 1, semantics); + addToken(JavaTokenType.J_ASSIGN, start, 1, new CodeSemantics()); variableRegistry.setNextVariableAccessType(VariableAccessType.READ_WRITE); } - super.visitUnary(node, semantics); - return null; + return super.visitUnary(node, null); } @Override - public Void visitAssert(AssertTree node, CodeSemantics semantics) { + public Void visitAssert(AssertTree node, Void unused) { long start = positions.getStartPosition(ast, node); - semantics = CodeSemantics.createControl(); - addToken(JavaTokenType.J_ASSERT, start, 6, semantics); - super.visitAssert(node, semantics); - return null; + addToken(JavaTokenType.J_ASSERT, start, 6, CodeSemantics.createControl()); + return super.visitAssert(node, null); } @Override - public Void visitVariable(VariableTree node, CodeSemantics semantics) { + public Void visitVariable(VariableTree node, Void unused) { long start = positions.getStartPosition(ast, node); String name = node.getName().toString(); boolean inLocalScope = variableRegistry.inLocalScope(); // this presents a problem when classes are declared in local scopes, which can happen in ad-hoc implementations + CodeSemantics semantics; if (inLocalScope) { boolean mutable = isMutable(node.getType()); variableRegistry.registerVariable(name, VariableScope.LOCAL, mutable); @@ -521,129 +445,108 @@ public Void visitVariable(VariableTree node, CodeSemantics semantics) { } else { semantics = CodeSemantics.createKeep(); } - variableRegistry.setNextVariableAccessType(VariableAccessType.WRITE); - // manually add variable to semantics since identifier isn't visited - variableRegistry.registerVariableAccess(name, !inLocalScope, semantics); addToken(JavaTokenType.J_VARDEF, start, node.toString().length(), semantics); - super.visitVariable(node, semantics); - return null; + // manually add variable to semantics since identifier isn't visited + variableRegistry.setNextVariableAccessType(VariableAccessType.WRITE); + variableRegistry.registerVariableAccess(name, !inLocalScope); + return super.visitVariable(node, null); } @Override - public Void visitConditionalExpression(ConditionalExpressionTree node, CodeSemantics semantics) { + public Void visitConditionalExpression(ConditionalExpressionTree node, Void unused) { long start = positions.getStartPosition(ast, node); - semantics = new CodeSemantics(); - addToken(JavaTokenType.J_COND, start, 1, semantics); - super.visitConditionalExpression(node, semantics); - return null; + addToken(JavaTokenType.J_COND, start, 1, new CodeSemantics()); + return super.visitConditionalExpression(node, null); } @Override - public Void visitMethodInvocation(MethodInvocationTree node, CodeSemantics semantics) { + public Void visitMethodInvocation(MethodInvocationTree node, Void unused) { long start = positions.getStartPosition(ast, node); - semantics = CodeSemantics.createControl(); - variableRegistry.addAllNonLocalVariablesAsReads(semantics); - addToken(JavaTokenType.J_APPLY, start, positions.getEndPosition(ast, node.getMethodSelect()) - start, semantics); - scan(node.getTypeArguments(), semantics); + addToken(JavaTokenType.J_APPLY, start, positions.getEndPosition(ast, node.getMethodSelect()) - start, CodeSemantics.createControl()); + variableRegistry.addAllNonLocalVariablesAsReads(); + scan(node.getTypeArguments(), null); // differentiate bar() and this.bar() (ignore) from bar.foo() (don't ignore) // look at cases foo.bar()++ and foo().bar++ variableRegistry.setIgnoreNextVariableAccess(true); variableRegistry.setMutableWrite(true); - scan(node.getMethodSelect(), semantics); // foo.bar() is a write to foo - scan(node.getArguments(), semantics); // foo(bar) is a write to bar + scan(node.getMethodSelect(), null); // foo.bar() is a write to foo + scan(node.getArguments(), null); // foo(bar) is a write to bar variableRegistry.setMutableWrite(false); return null; } @Override - public Void visitAnnotation(AnnotationTree node, CodeSemantics semantics) { + public Void visitAnnotation(AnnotationTree node, Void unused) { long start = positions.getStartPosition(ast, node); - semantics = new CodeSemantics(); - addToken(JavaTokenType.J_ANNO, start, 1, semantics); - super.visitAnnotation(node, semantics); - return null; + addToken(JavaTokenType.J_ANNO, start, 1, new CodeSemantics()); + return super.visitAnnotation(node, null); } @Override - public Void visitModule(ModuleTree node, CodeSemantics semantics) { + public Void visitModule(ModuleTree node, Void unused) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node) - 1; - semantics = CodeSemantics.createControl(); - addToken(JavaTokenType.J_MODULE_BEGIN, start, 6, semantics); + addToken(JavaTokenType.J_MODULE_BEGIN, start, 6, CodeSemantics.createControl()); super.visitModule(node, null); - semantics = CodeSemantics.createControl(); - addToken(JavaTokenType.J_MODULE_END, end, 1, semantics); + addToken(JavaTokenType.J_MODULE_END, end, 1, CodeSemantics.createControl()); return null; } @Override - public Void visitRequires(RequiresTree node, CodeSemantics semantics) { + public Void visitRequires(RequiresTree node, Void unused) { long start = positions.getStartPosition(ast, node); - semantics = CodeSemantics.createControl(); - addToken(JavaTokenType.J_REQUIRES, start, 8, semantics); - super.visitRequires(node, semantics); - return null; + addToken(JavaTokenType.J_REQUIRES, start, 8, CodeSemantics.createControl()); + return super.visitRequires(node, null); } @Override - public Void visitProvides(ProvidesTree node, CodeSemantics semantics) { + public Void visitProvides(ProvidesTree node, Void unused) { long start = positions.getStartPosition(ast, node); - semantics = CodeSemantics.createControl(); - addToken(JavaTokenType.J_PROVIDES, start, 8, semantics); - super.visitProvides(node, semantics); - return null; + addToken(JavaTokenType.J_PROVIDES, start, 8, CodeSemantics.createControl()); + return super.visitProvides(node, null); } @Override - public Void visitExports(ExportsTree node, CodeSemantics semantics) { + public Void visitExports(ExportsTree node, Void unused) { long start = positions.getStartPosition(ast, node); - semantics = CodeSemantics.createControl(); - addToken(JavaTokenType.J_EXPORTS, start, 7, semantics); - super.visitExports(node, semantics); - return null; + addToken(JavaTokenType.J_EXPORTS, start, 7, CodeSemantics.createControl()); + return super.visitExports(node, null); } @Override - public Void visitErroneous(ErroneousTree node, CodeSemantics semantics) { + public Void visitErroneous(ErroneousTree node, Void unused) { parsingExceptions.add(new ParsingException(file, "error while visiting %s".formatted(node))); - super.visitErroneous(node, semantics); - return null; + return super.visitErroneous(node, null); } @Override - public Void visitYield(YieldTree node, CodeSemantics semantics) { + public Void visitYield(YieldTree node, Void unused) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node); - semantics = CodeSemantics.createControl(); - addToken(JavaTokenType.J_YIELD, start, end, semantics); - super.visitYield(node, semantics); - return null; + addToken(JavaTokenType.J_YIELD, start, end, CodeSemantics.createControl()); + return super.visitYield(node, null); } @Override - public Void visitDefaultCaseLabel(DefaultCaseLabelTree node, CodeSemantics semantics) { + public Void visitDefaultCaseLabel(DefaultCaseLabelTree node, Void unused) { long start = positions.getStartPosition(ast, node); long end = positions.getEndPosition(ast, node); - semantics = CodeSemantics.createControl(); - addToken(JavaTokenType.J_DEFAULT, start, end, semantics); - super.visitDefaultCaseLabel(node, semantics); - return null; + addToken(JavaTokenType.J_DEFAULT, start, end, CodeSemantics.createControl()); + return super.visitDefaultCaseLabel(node, null); } @Override - public Void visitMemberSelect(MemberSelectTree node, CodeSemantics semantics) { + public Void visitMemberSelect(MemberSelectTree node, Void unused) { if (node.getExpression().toString().equals("this")) { - variableRegistry.registerVariableAccess(node.getIdentifier().toString(), true, semantics); + variableRegistry.registerVariableAccess(node.getIdentifier().toString(), true); } variableRegistry.setIgnoreNextVariableAccess(false); // don't ignore the foo in foo.bar() - super.visitMemberSelect(node, semantics); - return null; + return super.visitMemberSelect(node, null); } @Override - public Void visitIdentifier(IdentifierTree node, CodeSemantics semantics) { - variableRegistry.registerVariableAccess(node.toString(), false, semantics); - super.visitIdentifier(node, semantics); - return null; + public Void visitIdentifier(IdentifierTree node, Void unused) { + variableRegistry.registerVariableAccess(node.toString(), false); + return super.visitIdentifier(node, null); } } From 2048f846214f3c7d00698c72306d033a51d8ec5e Mon Sep 17 00:00:00 2001 From: Alexander Milster Date: Wed, 12 Apr 2023 12:24:36 +0200 Subject: [PATCH 056/132] Using FileUtils for file accesses. --- .../main/java/de/jplag/util/FileUtils.java | 50 +++++++++++++++++++ .../java/de/jplag/cpp2/CPPParserAdapter.java | 4 +- .../de/jplag/csharp/CSharpParserAdapter.java | 7 +-- .../java/de/jplag/golang/GoParserAdapter.java | 7 +-- .../main/java/de/jplag/java/JavacAdapter.java | 6 ++- .../main/java/de/jplag/python3/Parser.java | 7 +-- .../java/de/jplag/rlang/RParserAdapter.java | 7 +-- .../java/de/jplag/rust/RustParserAdapter.java | 7 +-- .../main/scala/de/jplag/scala/Parser.scala | 5 +- languages/scheme/src/main/javacc/Scheme.jj | 8 ++- .../de/jplag/swift/SwiftParserAdapter.java | 7 +-- .../java/de/jplag/text/ParserAdapter.java | 4 +- 12 files changed, 91 insertions(+), 28 deletions(-) diff --git a/language-api/src/main/java/de/jplag/util/FileUtils.java b/language-api/src/main/java/de/jplag/util/FileUtils.java index 6f0748b6b..63f4f282c 100644 --- a/language-api/src/main/java/de/jplag/util/FileUtils.java +++ b/language-api/src/main/java/de/jplag/util/FileUtils.java @@ -3,6 +3,8 @@ import java.io.*; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; +import java.util.*; +import java.util.concurrent.atomic.AtomicReference; import java.util.stream.Collectors; import com.ibm.icu.text.CharsetDetector; @@ -70,6 +72,46 @@ public static Charset detectCharset(File file) throws IOException { } } + /** + * Detects the most probable charset over the whole set of files. + * @param files The files to check + * @return The most probable charset + */ + public static Charset detectCharsetFromMultiple(Collection files) { + Map> charsetValues = new HashMap<>(); + + files.stream().map(it -> { + try (InputStream stream = new BufferedInputStream(new FileInputStream(it))) { + return detectAllCharsets(stream); + } catch (IOException e) { + throw new RuntimeException(e); + } + }).forEach(matches -> { + Set remaining = new HashSet<>(Set.of(CharsetDetector.getAllDetectableCharsets())); + for (CharsetMatch match : matches) { + charsetValues.putIfAbsent(match.getName(), new ArrayList<>()); + charsetValues.get(match.getName()).add(match.getConfidence()); + remaining.remove(match.getName()); + } + remaining.forEach(it -> { + charsetValues.putIfAbsent(it, new ArrayList<>()); + charsetValues.get(it).add(0); + }); + }); + + AtomicReference mostProbable = new AtomicReference<>(StandardCharsets.UTF_8); + AtomicReference mostProbableConfidence = new AtomicReference<>((double) 0); + charsetValues.forEach((charset, confidenceValues) -> { + double average = confidenceValues.stream().mapToInt(it -> it).average().orElse(0); + if (average > mostProbableConfidence.get()) { + mostProbable.set(Charset.forName(charset)); + mostProbableConfidence.set(average); + } + }); + + return mostProbable.get(); + } + private static Charset detectCharset(InputStream stream) throws IOException { CharsetDetector charsetDetector = new CharsetDetector(); @@ -79,6 +121,14 @@ private static Charset detectCharset(InputStream stream) throws IOException { return Charset.forName(match.getName()); } + private static CharsetMatch[] detectAllCharsets(InputStream stream) throws IOException { + CharsetDetector charsetDetector = new CharsetDetector(); + + charsetDetector.setText(stream); + + return charsetDetector.detectAll(); + } + /** * Opens a file writer, using the default charset for JPlag * @param file The file to write diff --git a/languages/cpp2/src/main/java/de/jplag/cpp2/CPPParserAdapter.java b/languages/cpp2/src/main/java/de/jplag/cpp2/CPPParserAdapter.java index c123dc29e..ed46d18e0 100644 --- a/languages/cpp2/src/main/java/de/jplag/cpp2/CPPParserAdapter.java +++ b/languages/cpp2/src/main/java/de/jplag/cpp2/CPPParserAdapter.java @@ -2,7 +2,6 @@ import java.io.File; import java.io.IOException; -import java.nio.file.Files; import java.util.ArrayList; import java.util.List; import java.util.Set; @@ -17,6 +16,7 @@ import de.jplag.TokenType; import de.jplag.cpp2.grammar.CPP14Lexer; import de.jplag.cpp2.grammar.CPP14Parser; +import de.jplag.util.FileUtils; /** * The adapter between {@link AbstractParser} and the ANTLR based parser of this language module. @@ -37,7 +37,7 @@ public List scan(Set files) throws ParsingException { this.currentFile = file; logger.trace("Parsing file {}", currentFile); try { - CPP14Lexer lexer = new CPP14Lexer(CharStreams.fromStream(Files.newInputStream(file.toPath()))); + CPP14Lexer lexer = new CPP14Lexer(CharStreams.fromReader(FileUtils.openFileReader(file))); // create a buffer of tokens pulled from the lexer CommonTokenStream tokenStream = new CommonTokenStream(lexer); CPP14Parser parser = new CPP14Parser(tokenStream); diff --git a/languages/csharp/src/main/java/de/jplag/csharp/CSharpParserAdapter.java b/languages/csharp/src/main/java/de/jplag/csharp/CSharpParserAdapter.java index a3668b1a2..3b2b0d8a2 100644 --- a/languages/csharp/src/main/java/de/jplag/csharp/CSharpParserAdapter.java +++ b/languages/csharp/src/main/java/de/jplag/csharp/CSharpParserAdapter.java @@ -1,7 +1,7 @@ package de.jplag.csharp; +import java.io.BufferedReader; import java.io.File; -import java.io.FileInputStream; import java.io.IOException; import java.util.ArrayList; import java.util.List; @@ -19,6 +19,7 @@ import de.jplag.TokenType; import de.jplag.csharp.grammar.CSharpLexer; import de.jplag.csharp.grammar.CSharpParser; +import de.jplag.util.FileUtils; /** * Parser adapter for the ANTLR 4 CSharp Parser and Lexer. It receives file to parse and passes them to the ANTLR @@ -51,11 +52,11 @@ public List parse(Set files) throws ParsingException { } private void parseFile(File file) throws ParsingException { - try (FileInputStream inputStream = new FileInputStream(file)) { + try (BufferedReader reader = FileUtils.openFileReader(file)) { currentFile = file; // create a lexer, a parser and a buffer between them. - CSharpLexer lexer = new CSharpLexer(CharStreams.fromStream(inputStream)); + CSharpLexer lexer = new CSharpLexer(CharStreams.fromReader(reader)); CommonTokenStream tokens = new CommonTokenStream(lexer); CSharpParser parser = new CSharpParser(tokens); diff --git a/languages/golang/src/main/java/de/jplag/golang/GoParserAdapter.java b/languages/golang/src/main/java/de/jplag/golang/GoParserAdapter.java index b179e0640..3d9da82c6 100644 --- a/languages/golang/src/main/java/de/jplag/golang/GoParserAdapter.java +++ b/languages/golang/src/main/java/de/jplag/golang/GoParserAdapter.java @@ -1,7 +1,7 @@ package de.jplag.golang; +import java.io.BufferedReader; import java.io.File; -import java.io.FileInputStream; import java.io.IOException; import java.util.ArrayList; import java.util.List; @@ -19,6 +19,7 @@ import de.jplag.TokenType; import de.jplag.golang.grammar.GoLexer; import de.jplag.golang.grammar.GoParser; +import de.jplag.util.FileUtils; public class GoParserAdapter extends AbstractParser { private File currentFile; @@ -34,10 +35,10 @@ public List parse(Set files) throws ParsingException { } private void parseFile(File file) throws ParsingException { - try (FileInputStream inputStream = new FileInputStream(file)) { + try (BufferedReader reader = FileUtils.openFileReader(file)) { currentFile = file; - GoLexer lexer = new GoLexer(CharStreams.fromStream(inputStream)); + GoLexer lexer = new GoLexer(CharStreams.fromReader(reader)); CommonTokenStream tokenStream = new CommonTokenStream(lexer); GoParser parser = new GoParser(tokenStream); diff --git a/languages/java/src/main/java/de/jplag/java/JavacAdapter.java b/languages/java/src/main/java/de/jplag/java/JavacAdapter.java index bd6972b87..07a501761 100644 --- a/languages/java/src/main/java/de/jplag/java/JavacAdapter.java +++ b/languages/java/src/main/java/de/jplag/java/JavacAdapter.java @@ -2,7 +2,7 @@ import java.io.File; import java.io.IOException; -import java.nio.charset.StandardCharsets; +import java.nio.charset.Charset; import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -20,6 +20,7 @@ import de.jplag.ParsingException; import de.jplag.Token; +import de.jplag.util.FileUtils; import com.sun.source.tree.CompilationUnitTree; import com.sun.source.tree.LineMap; @@ -35,7 +36,8 @@ public void parseFiles(Set files, final Parser parser) throws ParsingExcep var listener = new DiagnosticCollector<>(); List parsingExceptions = new ArrayList<>(); - try (final StandardJavaFileManager fileManager = javac.getStandardFileManager(listener, null, StandardCharsets.UTF_8)) { + final Charset guessedCharset = FileUtils.detectCharsetFromMultiple(files); + try (final StandardJavaFileManager fileManager = javac.getStandardFileManager(listener, null, guessedCharset)) { var javaFiles = fileManager.getJavaFileObjectsFromFiles(files); // We need to disable annotation processing, see diff --git a/languages/python-3/src/main/java/de/jplag/python3/Parser.java b/languages/python-3/src/main/java/de/jplag/python3/Parser.java index a24ccc15c..2dc352bfe 100644 --- a/languages/python-3/src/main/java/de/jplag/python3/Parser.java +++ b/languages/python-3/src/main/java/de/jplag/python3/Parser.java @@ -1,7 +1,7 @@ package de.jplag.python3; +import java.io.BufferedReader; import java.io.File; -import java.io.FileInputStream; import java.io.IOException; import java.util.ArrayList; import java.util.List; @@ -19,6 +19,7 @@ import de.jplag.python3.grammar.Python3Lexer; import de.jplag.python3.grammar.Python3Parser; import de.jplag.python3.grammar.Python3Parser.File_inputContext; +import de.jplag.util.FileUtils; public class Parser extends AbstractParser { @@ -43,11 +44,11 @@ public List parse(Set files) throws ParsingException { } private void parseFile(File file) throws ParsingException { - try (FileInputStream fileInputStream = new FileInputStream((file))) { + try (BufferedReader reader = FileUtils.openFileReader(file)) { currentFile = file; // create a lexer that feeds off of input CharStream - Python3Lexer lexer = new Python3Lexer(CharStreams.fromStream(fileInputStream)); + Python3Lexer lexer = new Python3Lexer(CharStreams.fromReader(reader)); // create a buffer of tokens pulled from the lexer CommonTokenStream tokens = new CommonTokenStream(lexer); diff --git a/languages/rlang/src/main/java/de/jplag/rlang/RParserAdapter.java b/languages/rlang/src/main/java/de/jplag/rlang/RParserAdapter.java index 6da61e4c1..e3c3aa6bc 100644 --- a/languages/rlang/src/main/java/de/jplag/rlang/RParserAdapter.java +++ b/languages/rlang/src/main/java/de/jplag/rlang/RParserAdapter.java @@ -1,7 +1,7 @@ package de.jplag.rlang; +import java.io.BufferedReader; import java.io.File; -import java.io.FileInputStream; import java.io.IOException; import java.util.ArrayList; import java.util.List; @@ -20,6 +20,7 @@ import de.jplag.rlang.grammar.RFilter; import de.jplag.rlang.grammar.RLexer; import de.jplag.rlang.grammar.RParser; +import de.jplag.util.FileUtils; /** * This class sets up the lexer and parser generated by ANTLR4, feeds the submissions through them and passes the @@ -52,11 +53,11 @@ public List parse(Set files) throws ParsingException { } private void parseFile(File file) throws ParsingException { - try (FileInputStream inputStream = new FileInputStream(file)) { + try (BufferedReader reader = FileUtils.openFileReader(file)) { currentFile = file; // create a lexer, a parser and a buffer between them. - RLexer lexer = new RLexer(CharStreams.fromStream(inputStream)); + RLexer lexer = new RLexer(CharStreams.fromReader(reader)); CommonTokenStream tokens = new CommonTokenStream(lexer); RFilter filter = new RFilter(tokens); diff --git a/languages/rust/src/main/java/de/jplag/rust/RustParserAdapter.java b/languages/rust/src/main/java/de/jplag/rust/RustParserAdapter.java index 5a4d9920c..092889054 100644 --- a/languages/rust/src/main/java/de/jplag/rust/RustParserAdapter.java +++ b/languages/rust/src/main/java/de/jplag/rust/RustParserAdapter.java @@ -1,7 +1,7 @@ package de.jplag.rust; +import java.io.BufferedReader; import java.io.File; -import java.io.FileInputStream; import java.io.IOException; import java.util.ArrayList; import java.util.List; @@ -18,6 +18,7 @@ import de.jplag.Token; import de.jplag.rust.grammar.RustLexer; import de.jplag.rust.grammar.RustParser; +import de.jplag.util.FileUtils; public class RustParserAdapter extends AbstractParser { @@ -39,11 +40,11 @@ public List parse(Set files) throws ParsingException { } private void parseFile(File file) throws ParsingException { - try (FileInputStream inputStream = new FileInputStream(file)) { + try (BufferedReader reader = FileUtils.openFileReader(file)) { currentFile = file; // create a lexer, a parser and a buffer between them. - RustLexer lexer = new RustLexer(CharStreams.fromStream(inputStream)); + RustLexer lexer = new RustLexer(CharStreams.fromReader(reader)); CommonTokenStream tokenStream = new CommonTokenStream(lexer); RustParser parser = new RustParser(tokenStream); diff --git a/languages/scala/src/main/scala/de/jplag/scala/Parser.scala b/languages/scala/src/main/scala/de/jplag/scala/Parser.scala index c4436eb5e..c5e2f396c 100644 --- a/languages/scala/src/main/scala/de/jplag/scala/Parser.scala +++ b/languages/scala/src/main/scala/de/jplag/scala/Parser.scala @@ -1,9 +1,11 @@ package de.jplag.scala import de.jplag.scala.ScalaTokenType._ +import de.jplag.util.FileUtils import de.jplag.{AbstractParser, ParsingException, Token} import java.io.File +import java.util.stream.Collectors import scala.collection.mutable.ListBuffer import scala.meta._ @@ -343,8 +345,7 @@ class Parser extends AbstractParser { currentFile = file try { - val bytes = java.nio.file.Files.readAllBytes(file.toPath) - val text = new String(bytes, "UTF-8") + val text = FileUtils.readFileContent(file) val input = Input.VirtualFile(file.getPath, text) val ast = input.parse[Source].get traverser(ast) diff --git a/languages/scheme/src/main/javacc/Scheme.jj b/languages/scheme/src/main/javacc/Scheme.jj index de2797daf..1b8384992 100644 --- a/languages/scheme/src/main/javacc/Scheme.jj +++ b/languages/scheme/src/main/javacc/Scheme.jj @@ -58,7 +58,10 @@ import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; +import java.nio.charset.Charset; + import de.jplag.ParsingException; +import de.jplag.util.FileUtils; public class SchemeParser { /* used for context in the template production rule */ @@ -68,10 +71,11 @@ public class SchemeParser { public static void parseFile(File file, SchemeParser parser, Parser parserX) throws ParsingException { try { FileInputStream in = new FileInputStream(file); + Charset charset = FileUtils.detectCharset(file); if (parser == null) { - parser = new SchemeParser(in, "UTF-8"); + parser = new SchemeParser(in, charset.name()); } else { - parser.ReInit(in, "UTF-8"); + parser.ReInit(in, charset.name()); } parser.parser2 = parserX; } catch (FileNotFoundException e) { diff --git a/languages/swift/src/main/java/de/jplag/swift/SwiftParserAdapter.java b/languages/swift/src/main/java/de/jplag/swift/SwiftParserAdapter.java index a6614257f..26c8025a8 100644 --- a/languages/swift/src/main/java/de/jplag/swift/SwiftParserAdapter.java +++ b/languages/swift/src/main/java/de/jplag/swift/SwiftParserAdapter.java @@ -1,7 +1,7 @@ package de.jplag.swift; +import java.io.BufferedReader; import java.io.File; -import java.io.FileInputStream; import java.io.IOException; import java.util.ArrayList; import java.util.List; @@ -18,6 +18,7 @@ import de.jplag.Token; import de.jplag.swift.grammar.Swift5Lexer; import de.jplag.swift.grammar.Swift5Parser; +import de.jplag.util.FileUtils; public class SwiftParserAdapter extends AbstractParser { @@ -47,10 +48,10 @@ public List parse(Set files) throws ParsingException { } private void parse(File file) throws ParsingException { - try (FileInputStream inputStream = new FileInputStream(file)) { + try (BufferedReader reader = FileUtils.openFileReader(file)) { currentFile = file; - Swift5Lexer lexer = new Swift5Lexer(CharStreams.fromStream(inputStream)); + Swift5Lexer lexer = new Swift5Lexer(CharStreams.fromReader(reader)); CommonTokenStream tokenStream = new CommonTokenStream(lexer); Swift5Parser parser = new Swift5Parser(tokenStream); diff --git a/languages/text/src/main/java/de/jplag/text/ParserAdapter.java b/languages/text/src/main/java/de/jplag/text/ParserAdapter.java index 975dbe7e3..c3d1ccc83 100644 --- a/languages/text/src/main/java/de/jplag/text/ParserAdapter.java +++ b/languages/text/src/main/java/de/jplag/text/ParserAdapter.java @@ -2,7 +2,6 @@ import java.io.File; import java.io.IOException; -import java.nio.file.Files; import java.util.ArrayList; import java.util.List; import java.util.Properties; @@ -11,6 +10,7 @@ import de.jplag.AbstractParser; import de.jplag.ParsingException; import de.jplag.Token; +import de.jplag.util.FileUtils; import edu.stanford.nlp.ling.CoreLabel; import edu.stanford.nlp.pipeline.CoreDocument; @@ -100,7 +100,7 @@ private void addToken(CoreLabel label) { private String readFile(File file) throws ParsingException { try { - return Files.readString(file.toPath()); + return FileUtils.readFileContent(file); } catch (IOException e) { throw new ParsingException(file, e.getMessage(), e); } From 48f27eb2017f9861b17f9ca9aded2c3643068143 Mon Sep 17 00:00:00 2001 From: Alexander Milster Date: Wed, 12 Apr 2023 12:46:08 +0200 Subject: [PATCH 057/132] Added a test case for guessing the charset from multiple files. --- .../src/test/java/de/jplag/util/FileUtilTest.java | 11 +++++++++++ .../test/resources/de/jplag/fileSetEncoding/ascii1 | 1 + .../test/resources/de/jplag/fileSetEncoding/ascii2 | 1 + .../test/resources/de/jplag/fileSetEncoding/notAscii | 1 + 4 files changed, 14 insertions(+) create mode 100644 language-api/src/test/resources/de/jplag/fileSetEncoding/ascii1 create mode 100644 language-api/src/test/resources/de/jplag/fileSetEncoding/ascii2 create mode 100644 language-api/src/test/resources/de/jplag/fileSetEncoding/notAscii diff --git a/language-api/src/test/java/de/jplag/util/FileUtilTest.java b/language-api/src/test/java/de/jplag/util/FileUtilTest.java index 52ab58e6a..ff66c0b49 100644 --- a/language-api/src/test/java/de/jplag/util/FileUtilTest.java +++ b/language-api/src/test/java/de/jplag/util/FileUtilTest.java @@ -3,14 +3,18 @@ import java.io.File; import java.io.IOException; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.nio.file.Path; +import java.util.Set; import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.MethodSource; public class FileUtilTest { private static final Path TEST_FILE_LOCATION = Path.of("src", "test", "resources", "de", "jplag", "fileReaderTests"); + private static final Path TEST_FILE_SET_LOCATION = Path.of("src", "test", "resources", "de", "jplag", "fileSetEncoding"); private static final String expectedFileContent = "Some ascii characters and some others: ä#+öü%&(/)?=?"; @@ -29,6 +33,13 @@ public void testCharsetDetection(File file) throws IOException { "Wrong charset assumed for: " + file.getAbsolutePath()); } + @Test + public void testDetectFromFileSet() { + Set files = Set.of(TEST_FILE_SET_LOCATION.toFile().listFiles()); + Charset encoding = FileUtils.detectCharsetFromMultiple(files); + Assertions.assertEquals(StandardCharsets.ISO_8859_1, encoding); + } + public static File[] searchTestFiles() { return TEST_FILE_LOCATION.toFile().listFiles(); } diff --git a/language-api/src/test/resources/de/jplag/fileSetEncoding/ascii1 b/language-api/src/test/resources/de/jplag/fileSetEncoding/ascii1 new file mode 100644 index 000000000..4d039c8df --- /dev/null +++ b/language-api/src/test/resources/de/jplag/fileSetEncoding/ascii1 @@ -0,0 +1 @@ +some simple ascii characters \ No newline at end of file diff --git a/language-api/src/test/resources/de/jplag/fileSetEncoding/ascii2 b/language-api/src/test/resources/de/jplag/fileSetEncoding/ascii2 new file mode 100644 index 000000000..72af43005 --- /dev/null +++ b/language-api/src/test/resources/de/jplag/fileSetEncoding/ascii2 @@ -0,0 +1 @@ +some more ascii characters \ No newline at end of file diff --git a/language-api/src/test/resources/de/jplag/fileSetEncoding/notAscii b/language-api/src/test/resources/de/jplag/fileSetEncoding/notAscii new file mode 100644 index 000000000..3cfd99b9d --- /dev/null +++ b/language-api/src/test/resources/de/jplag/fileSetEncoding/notAscii @@ -0,0 +1 @@ +this contains a non ascii character: � From be51ee2441db7e1d3b55da144a25c3735de18570 Mon Sep 17 00:00:00 2001 From: Alexander Milster Date: Wed, 12 Apr 2023 12:59:17 +0200 Subject: [PATCH 058/132] Small error I missed when first commiting. --- languages/scheme/src/main/javacc/Scheme.jj | 2 ++ 1 file changed, 2 insertions(+) diff --git a/languages/scheme/src/main/javacc/Scheme.jj b/languages/scheme/src/main/javacc/Scheme.jj index 1b8384992..94d9b1a90 100644 --- a/languages/scheme/src/main/javacc/Scheme.jj +++ b/languages/scheme/src/main/javacc/Scheme.jj @@ -81,6 +81,8 @@ public class SchemeParser { } catch (FileNotFoundException e) { System.out.println("Scheme Parser R4RS: File " + file.getName() + " not found."); throw new ParsingException(file, e.getMessage(), e); + } catch (IOException e) { + throw new ParsingException(file, e.getMessage(), e); } try { parser.Program(); From 8e95667b1ee3cb4abecab7acda85e5d02bb84163 Mon Sep 17 00:00:00 2001 From: Alexander Milster Date: Wed, 12 Apr 2023 12:59:17 +0200 Subject: [PATCH 059/132] Small error I missed when first commiting. --- languages/scheme/src/main/javacc/Scheme.jj | 3 +++ 1 file changed, 3 insertions(+) diff --git a/languages/scheme/src/main/javacc/Scheme.jj b/languages/scheme/src/main/javacc/Scheme.jj index 1b8384992..9d09407d8 100644 --- a/languages/scheme/src/main/javacc/Scheme.jj +++ b/languages/scheme/src/main/javacc/Scheme.jj @@ -58,6 +58,7 @@ import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; +import java.io.IOException; import java.nio.charset.Charset; import de.jplag.ParsingException; @@ -81,6 +82,8 @@ public class SchemeParser { } catch (FileNotFoundException e) { System.out.println("Scheme Parser R4RS: File " + file.getName() + " not found."); throw new ParsingException(file, e.getMessage(), e); + } catch (IOException e) { + throw new ParsingException(file, e.getMessage(), e); } try { parser.Program(); From d4a49695fbe0b0bb4f9d880ac04184bbedf1090d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Thu, 13 Apr 2023 12:46:39 +0200 Subject: [PATCH 060/132] Replace prints with logger calls --- core/src/main/java/de/jplag/Submission.java | 16 +++++++++----- .../normalization/NormalizationGraph.java | 6 +++--- .../src/main/java/de/jplag/java/Parser.java | 21 ++++--------------- 3 files changed, 18 insertions(+), 25 deletions(-) diff --git a/core/src/main/java/de/jplag/Submission.java b/core/src/main/java/de/jplag/Submission.java index 36f485dfe..3e09bb8ee 100644 --- a/core/src/main/java/de/jplag/Submission.java +++ b/core/src/main/java/de/jplag/Submission.java @@ -4,12 +4,13 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashSet; -import java.util.LinkedList; import java.util.List; import java.util.Objects; +import java.util.Set; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -270,14 +271,19 @@ private static File createErrorDirectory(String... subdirectoryNames) { } void normalize() { - // System.out.println(); - // System.out.println("original: " + getOrder(tokenList)); + List originalOrder = getOrder(tokenList); tokenList = new NormalizationGraph(tokenList).linearize(); - // System.out.println("normalized: " + getOrder(tokenList)); + List normalizedOrder = getOrder(tokenList); + + logger.debug("original line order: " + originalOrder); + logger.debug("line order after normalization: " + normalizedOrder); + Set normalizedSet = new HashSet<>(normalizedOrder); + List removed = originalOrder.stream().filter(l -> !normalizedSet.contains(l)).toList(); + logger.debug("removed %d line(s): %s".formatted(removed.size(), removed)); } private List getOrder(List tokenList) { - List order = new LinkedList<>(); + List order = new ArrayList<>(tokenList.size()); // a little too big int currentLineNumber = tokenList.get(0).getLine(); order.add(currentLineNumber); for (Token token : tokenList) { diff --git a/core/src/main/java/de/jplag/normalization/NormalizationGraph.java b/core/src/main/java/de/jplag/normalization/NormalizationGraph.java index fd823b670..ee285f284 100644 --- a/core/src/main/java/de/jplag/normalization/NormalizationGraph.java +++ b/core/src/main/java/de/jplag/normalization/NormalizationGraph.java @@ -1,7 +1,7 @@ package de.jplag.normalization; import java.util.ArrayList; -import java.util.Deque; +import java.util.Queue; import java.util.LinkedList; import java.util.List; import java.util.PriorityQueue; @@ -57,10 +57,10 @@ public List linearize() { * Spread keep status to every node that does not represent dead code. Nodes without keep status are later eliminated. */ private void spreadKeep() { - Deque visit = new LinkedList<>(graph.vertexSet().stream() // + Queue visit = new LinkedList<>(graph.vertexSet().stream() // .filter(tl -> tl.semantics().keep()).toList()); while (!visit.isEmpty()) { - Statement current = visit.pop(); + Statement current = visit.remove(); for (Statement pred : Graphs.predecessorListOf(graph, current)) { // performance of iteration? if (!pred.semantics().keep() && graph.getEdge(pred, current).isVariableFlow()) { pred.markKeep(); diff --git a/languages/java/src/main/java/de/jplag/java/Parser.java b/languages/java/src/main/java/de/jplag/java/Parser.java index ee2a96306..805e0686c 100644 --- a/languages/java/src/main/java/de/jplag/java/Parser.java +++ b/languages/java/src/main/java/de/jplag/java/Parser.java @@ -22,27 +22,14 @@ public Parser() { public List parse(Set files) throws ParsingException { tokens = new ArrayList<>(); new JavacAdapter().parseFiles(files, this); - // printSemantics(); + logger.debug("--- token semantics ---"); + for (Token token: tokens) { + logger.debug("%d | %s | %s".formatted(token.getLine(), token.getType().getDescription(), token.getSemantics())); + } return tokens; } public void add(Token token) { tokens.add(token); } - - public void printSemantics() { - long currentLine = 0; - for (Token token : tokens) { - if (token.getLine() != currentLine) { - currentLine = token.getLine(); - System.out.println(); - System.out.println(token.getLine()); - } - System.out.print(token.getType().getDescription()); - System.out.print(" | "); - System.out.println(token.getSemantics()); - } - System.out.println(); - System.out.println("=".repeat(100)); - } } From 8ba324db1395aa11a8b93f983a6eceff2a86ac13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Thu, 13 Apr 2023 12:54:28 +0200 Subject: [PATCH 061/132] Replace LinkedList with ArrayList where appropriate --- .../jplag/normalization/NormalizationGraphConstructor.java | 6 +++--- .../main/java/de/jplag/normalization/StatementBuilder.java | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java b/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java index a59164602..46ec19635 100644 --- a/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java +++ b/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java @@ -1,9 +1,9 @@ package de.jplag.normalization; +import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; -import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; @@ -27,7 +27,7 @@ class NormalizationGraphConstructor { NormalizationGraphConstructor(List tokens) { graph = new SimpleDirectedGraph<>(Edge.class); bidirectionalBlockDepth = 0; - fullPositionSignificanceIngoing = new LinkedList<>(); + fullPositionSignificanceIngoing = new ArrayList<>(); variableReads = new HashMap<>(); variableWrites = new HashMap<>(); inCurrentBidirectionalBlock = new HashSet<>(); @@ -124,7 +124,7 @@ private void addIngoingEdgeToCurrent(Statement start, EdgeType type, Variable ca } private void addVariableToMap(Map> variableMap, Variable variable) { - variableMap.putIfAbsent(variable, new LinkedList<>()); + variableMap.putIfAbsent(variable, new ArrayList<>()); variableMap.get(variable).add(current); } } diff --git a/core/src/main/java/de/jplag/normalization/StatementBuilder.java b/core/src/main/java/de/jplag/normalization/StatementBuilder.java index b582940f3..5b7099f41 100644 --- a/core/src/main/java/de/jplag/normalization/StatementBuilder.java +++ b/core/src/main/java/de/jplag/normalization/StatementBuilder.java @@ -1,6 +1,6 @@ package de.jplag.normalization; -import java.util.LinkedList; +import java.util.ArrayList; import java.util.List; import de.jplag.Token; @@ -12,7 +12,7 @@ class StatementBuilder { StatementBuilder(int lineNumber) { this.lineNumber = lineNumber; - this.tokens = new LinkedList<>(); + this.tokens = new ArrayList<>(); } int lineNumber() { From e9e046b96a4b7c9e39cb4c1b5131b84c940408fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Thu, 13 Apr 2023 13:28:47 +0200 Subject: [PATCH 062/132] Remove token string normalization comment from JPlag::run --- core/src/main/java/de/jplag/JPlag.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/core/src/main/java/de/jplag/JPlag.java b/core/src/main/java/de/jplag/JPlag.java index 3480e8f86..137e6b11c 100644 --- a/core/src/main/java/de/jplag/JPlag.java +++ b/core/src/main/java/de/jplag/JPlag.java @@ -59,10 +59,6 @@ public JPlagResult run() throws ExitException { throw new SubmissionException("Not enough valid submissions! (found " + submissionCount + " valid submissions)"); } - // better solution long-term: pull this into SubmissionSet constructor, use language-specific option - // if (language.tokensHaveSemantics()) - // submissionSet.normalizeSubmissions(); - // Compare valid submissions. JPlagResult result = comparisonStrategy.compareSubmissions(submissionSet); if (logger.isInfoEnabled()) From b8ed525f2ee74f279f10b21b8953cc3bc7bdb35b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Thu, 13 Apr 2023 13:48:10 +0200 Subject: [PATCH 063/132] Satisfy Sonar --- core/src/main/java/de/jplag/Submission.java | 12 ++++++------ .../normalization/NormalizationGraph.java | 2 +- .../NormalizationGraphConstructor.java | 12 ++++++------ .../java/de/jplag/normalization/Statement.java | 18 ++++++++++++++++-- .../de/jplag/semantics/VariableRegistry.java | 10 ++-------- .../src/main/java/de/jplag/java/Parser.java | 4 ++-- 6 files changed, 33 insertions(+), 25 deletions(-) diff --git a/core/src/main/java/de/jplag/Submission.java b/core/src/main/java/de/jplag/Submission.java index 3e09bb8ee..cf94b6548 100644 --- a/core/src/main/java/de/jplag/Submission.java +++ b/core/src/main/java/de/jplag/Submission.java @@ -87,11 +87,11 @@ public int compareTo(Submission other) { } @Override - public boolean equals(Object obj) { - if (obj == this) { + public boolean equals(Object object) { + if (object == this) { return true; } - if (!(obj instanceof Submission otherSubmission)) { + if (!(object instanceof Submission otherSubmission)) { return false; } return otherSubmission.getName().equals(name); @@ -275,11 +275,11 @@ void normalize() { tokenList = new NormalizationGraph(tokenList).linearize(); List normalizedOrder = getOrder(tokenList); - logger.debug("original line order: " + originalOrder); - logger.debug("line order after normalization: " + normalizedOrder); + logger.debug("original line order: {}", originalOrder); + logger.debug("line order after normalization: {}", normalizedOrder); Set normalizedSet = new HashSet<>(normalizedOrder); List removed = originalOrder.stream().filter(l -> !normalizedSet.contains(l)).toList(); - logger.debug("removed %d line(s): %s".formatted(removed.size(), removed)); + logger.debug("removed {} line(s): {}", removed.size(), removed); } private List getOrder(List tokenList) { diff --git a/core/src/main/java/de/jplag/normalization/NormalizationGraph.java b/core/src/main/java/de/jplag/normalization/NormalizationGraph.java index ee285f284..b0bafa15f 100644 --- a/core/src/main/java/de/jplag/normalization/NormalizationGraph.java +++ b/core/src/main/java/de/jplag/normalization/NormalizationGraph.java @@ -1,10 +1,10 @@ package de.jplag.normalization; import java.util.ArrayList; -import java.util.Queue; import java.util.LinkedList; import java.util.List; import java.util.PriorityQueue; +import java.util.Queue; import java.util.stream.Collectors; import org.jgrapht.Graphs; diff --git a/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java b/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java index 46ec19635..4b0c2a54c 100644 --- a/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java +++ b/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java @@ -31,15 +31,15 @@ class NormalizationGraphConstructor { variableReads = new HashMap<>(); variableWrites = new HashMap<>(); inCurrentBidirectionalBlock = new HashSet<>(); - StatementBuilder current = new StatementBuilder(tokens.get(0).getLine()); + StatementBuilder builderForCurrent = new StatementBuilder(tokens.get(0).getLine()); for (Token token : tokens) { - if (token.getLine() != current.lineNumber()) { - addStatement(current.build()); - current = new StatementBuilder(token.getLine()); + if (token.getLine() != builderForCurrent.lineNumber()) { + addStatement(builderForCurrent.build()); + builderForCurrent = new StatementBuilder(token.getLine()); } - current.addToken(token); + builderForCurrent.addToken(token); } - addStatement(current.build()); + addStatement(builderForCurrent.build()); } SimpleDirectedGraph get() { diff --git a/core/src/main/java/de/jplag/normalization/Statement.java b/core/src/main/java/de/jplag/normalization/Statement.java index f1d1f0e2c..87fb49544 100644 --- a/core/src/main/java/de/jplag/normalization/Statement.java +++ b/core/src/main/java/de/jplag/normalization/Statement.java @@ -40,16 +40,30 @@ public int compareTo(Statement other) { int sizeComp = Integer.compare(this.tokens.size(), other.tokens.size()); if (sizeComp != 0) return -sizeComp; // bigger size should come first - Iterator tokens = this.tokens.iterator(); + Iterator myTokens = this.tokens.iterator(); Iterator otherTokens = other.tokens.iterator(); for (int i = 0; i < this.tokens.size(); i++) { - int tokenComp = Integer.compare(tokenOrdinal(tokens.next()), tokenOrdinal(otherTokens.next())); + int tokenComp = Integer.compare(tokenOrdinal(myTokens.next()), tokenOrdinal(otherTokens.next())); if (tokenComp != 0) return tokenComp; } return 0; } + @Override + public boolean equals(Object object) { + if (this == object) + return true; + if (object == null || getClass() != object.getClass()) + return false; + return tokens.equals(((Statement) object).tokens); + } + + @Override + public int hashCode() { + return tokens.hashCode(); + } + @Override public String toString() { return lineNumber + ": " + String.join(" ", tokens.stream().map(Token::toString).toList()); diff --git a/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java b/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java index a411bab32..e338adb61 100644 --- a/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java +++ b/language-api/src/main/java/de/jplag/semantics/VariableRegistry.java @@ -126,8 +126,8 @@ public void registerVariable(String variableName, VariableScope scope, boolean m } /** - * Register a variable access, more precisely: Add a variable access to the current CodeSemantics instance. The type of the access - * can be set with setNextVariableAccessType. By default, its type is read. + * Register a variable access, more precisely: Add a variable access to the current CodeSemantics instance. The type of + * the access can be set with setNextVariableAccessType. By default, its type is read. * @param variableName The variable's name. * @param isClassVariable Whether the variable is a class variable. This is true if a variable is qualified with the * "this" keyword in Java, for example. @@ -166,12 +166,6 @@ private Variable getVariable(String variableName) { return variableIdStack.getFirst(); // stack is never empty Variable variable = getClassVariable(variableName); return variable != null ? variable : fileVariables.get(variableName); - /* - * todo track global variables -> hard, how to differentiate SomeClass.staticAttr++ from String.join(...) // problem - * here: all String.joins (for example) are registered as writes to String // get global variable, register if it - * doesn't exist variable = globalVariables.get(variableName); if (variable != null) return variable; variable = new - * Variable(variableName, false, true); globalVariables.put(variableName, variable); return variable; - */ } private Variable getClassVariable(String variableName) { diff --git a/languages/java/src/main/java/de/jplag/java/Parser.java b/languages/java/src/main/java/de/jplag/java/Parser.java index 805e0686c..f6a35e370 100644 --- a/languages/java/src/main/java/de/jplag/java/Parser.java +++ b/languages/java/src/main/java/de/jplag/java/Parser.java @@ -23,8 +23,8 @@ public List parse(Set files) throws ParsingException { tokens = new ArrayList<>(); new JavacAdapter().parseFiles(files, this); logger.debug("--- token semantics ---"); - for (Token token: tokens) { - logger.debug("%d | %s | %s".formatted(token.getLine(), token.getType().getDescription(), token.getSemantics())); + for (Token token : tokens) { + logger.debug("{} | {} | {}", token.getLine(), token.getType().getDescription(), token.getSemantics()); } return tokens; } From 78e3036e996bdc92d1c6b87aa06cd008289473dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Thu, 13 Apr 2023 15:00:34 +0200 Subject: [PATCH 064/132] Make normalization static --- core/src/main/java/de/jplag/Submission.java | 4 +- .../normalization/NormalizationGraph.java | 78 ------------------- .../normalization/TokenStringNormalizer.java | 74 ++++++++++++++++++ 3 files changed, 76 insertions(+), 80 deletions(-) delete mode 100644 core/src/main/java/de/jplag/normalization/NormalizationGraph.java create mode 100644 core/src/main/java/de/jplag/normalization/TokenStringNormalizer.java diff --git a/core/src/main/java/de/jplag/Submission.java b/core/src/main/java/de/jplag/Submission.java index cf94b6548..af1aa3614 100644 --- a/core/src/main/java/de/jplag/Submission.java +++ b/core/src/main/java/de/jplag/Submission.java @@ -15,7 +15,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import de.jplag.normalization.NormalizationGraph; +import de.jplag.normalization.TokenStringNormalizer; /** * Represents a single submission. A submission can contain multiple files. @@ -272,7 +272,7 @@ private static File createErrorDirectory(String... subdirectoryNames) { void normalize() { List originalOrder = getOrder(tokenList); - tokenList = new NormalizationGraph(tokenList).linearize(); + TokenStringNormalizer.normalize(tokenList); List normalizedOrder = getOrder(tokenList); logger.debug("original line order: {}", originalOrder); diff --git a/core/src/main/java/de/jplag/normalization/NormalizationGraph.java b/core/src/main/java/de/jplag/normalization/NormalizationGraph.java deleted file mode 100644 index b0bafa15f..000000000 --- a/core/src/main/java/de/jplag/normalization/NormalizationGraph.java +++ /dev/null @@ -1,78 +0,0 @@ -package de.jplag.normalization; - -import java.util.ArrayList; -import java.util.LinkedList; -import java.util.List; -import java.util.PriorityQueue; -import java.util.Queue; -import java.util.stream.Collectors; - -import org.jgrapht.Graphs; -import org.jgrapht.graph.SimpleDirectedGraph; - -import de.jplag.Token; - -public class NormalizationGraph { - private SimpleDirectedGraph graph; - - /** - * Construct a new normalization graph from the tokens. - * @param tokens The tokens used to construct the normalization graph. - */ - public NormalizationGraph(List tokens) { - graph = new NormalizationGraphConstructor(tokens).get(); - } - - /** - * Turns this normalization graph back into a list of tokens. Tokens representing dead code have been eliminated and - * tokens representing subsequent independent statements have been put in a fixed order. - * @return the normalized list of tokens. - */ - public List linearize() { - spreadKeep(); - PriorityQueue roots = graph.vertexSet().stream() // - .filter(v -> !Graphs.vertexHasPredecessors(graph, v)) // - .collect(Collectors.toCollection(PriorityQueue::new)); - List tokens = new ArrayList<>(); - while (!roots.isEmpty()) { - PriorityQueue newRoots = new PriorityQueue<>(); - do { - Statement statement = roots.poll(); - if (statement.semantics().keep()) { - tokens.addAll(statement.tokens()); - } - for (Statement succ : Graphs.successorListOf(graph, statement)) { - graph.removeEdge(statement, succ); - if (!Graphs.vertexHasPredecessors(graph, succ)) { - newRoots.add(succ); - } - } - } while (!roots.isEmpty()); - roots = newRoots; - } - return tokens; - } - - /** - * Spread keep status to every node that does not represent dead code. Nodes without keep status are later eliminated. - */ - private void spreadKeep() { - Queue visit = new LinkedList<>(graph.vertexSet().stream() // - .filter(tl -> tl.semantics().keep()).toList()); - while (!visit.isEmpty()) { - Statement current = visit.remove(); - for (Statement pred : Graphs.predecessorListOf(graph, current)) { // performance of iteration? - if (!pred.semantics().keep() && graph.getEdge(pred, current).isVariableFlow()) { - pred.markKeep(); - visit.add(pred); - } - } - for (Statement succ : Graphs.successorListOf(graph, current)) { - if (!succ.semantics().keep() && graph.getEdge(current, succ).isVariableReverseFlow()) { - succ.markKeep(); - visit.add(succ); - } - } - } - } -} diff --git a/core/src/main/java/de/jplag/normalization/TokenStringNormalizer.java b/core/src/main/java/de/jplag/normalization/TokenStringNormalizer.java new file mode 100644 index 000000000..6b8e373a9 --- /dev/null +++ b/core/src/main/java/de/jplag/normalization/TokenStringNormalizer.java @@ -0,0 +1,74 @@ +package de.jplag.normalization; + +import java.util.LinkedList; +import java.util.List; +import java.util.PriorityQueue; +import java.util.Queue; +import java.util.stream.Collectors; + +import org.jgrapht.Graphs; +import org.jgrapht.graph.SimpleDirectedGraph; + +import de.jplag.Token; + +/** + * The class that performs token string normalization. + */ +public class TokenStringNormalizer { + + private TokenStringNormalizer() { + } + + /** + * Normalizes the token string it receives inplace. Tokens representing dead code have been eliminated and tokens + * representing subsequent independent statements have been put in a fixed order. Works by first constructing a + * Normalization Graph and then turning it back into a token string. + */ + public static void normalize(List tokens) { + SimpleDirectedGraph normalizationGraph = new NormalizationGraphConstructor(tokens).get(); + tokens.clear(); + spreadKeep(normalizationGraph); + PriorityQueue roots = normalizationGraph.vertexSet().stream() // + .filter(v -> !Graphs.vertexHasPredecessors(normalizationGraph, v)) // + .collect(Collectors.toCollection(PriorityQueue::new)); + while (!roots.isEmpty()) { + PriorityQueue newRoots = new PriorityQueue<>(); + do { + Statement statement = roots.poll(); + if (statement.semantics().keep()) { + tokens.addAll(statement.tokens()); + } + for (Statement successor : Graphs.successorListOf(normalizationGraph, statement)) { + normalizationGraph.removeEdge(statement, successor); + if (!Graphs.vertexHasPredecessors(normalizationGraph, successor)) { + newRoots.add(successor); + } + } + } while (!roots.isEmpty()); + roots = newRoots; + } + } + + /** + * Spread keep status to every node that does not represent dead code. Nodes without keep status are later eliminated. + */ + private static void spreadKeep(SimpleDirectedGraph normalizationGraph) { + Queue visit = new LinkedList<>(normalizationGraph.vertexSet().stream() // + .filter(tl -> tl.semantics().keep()).toList()); + while (!visit.isEmpty()) { + Statement current = visit.remove(); + for (Statement predecessor : Graphs.predecessorListOf(normalizationGraph, current)) { // performance of iteration? + if (!predecessor.semantics().keep() && normalizationGraph.getEdge(predecessor, current).isVariableFlow()) { + predecessor.markKeep(); + visit.add(predecessor); + } + } + for (Statement successor : Graphs.successorListOf(normalizationGraph, current)) { + if (!successor.semantics().keep() && normalizationGraph.getEdge(current, successor).isVariableReverseFlow()) { + successor.markKeep(); + visit.add(successor); + } + } + } + } +} From 2f22bfc761bad610fa90ee1c706b8b7ad42b4cb8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Sat, 15 Apr 2023 12:52:54 +0200 Subject: [PATCH 065/132] Improve naming & comments --- .../java/de/jplag/normalization/Edge.java | 29 +------------ .../java/de/jplag/normalization/EdgeItem.java | 6 --- .../java/de/jplag/normalization/EdgeType.java | 14 +++---- .../de/jplag/normalization/MultipleEdge.java | 33 +++++++++++++++ .../NormalizationGraphConstructor.java | 42 +++++++++---------- .../normalization/TokenStringNormalizer.java | 8 ++-- .../de/jplag/semantics/CodeSemantics.java | 2 +- 7 files changed, 67 insertions(+), 67 deletions(-) delete mode 100644 core/src/main/java/de/jplag/normalization/EdgeItem.java create mode 100644 core/src/main/java/de/jplag/normalization/MultipleEdge.java diff --git a/core/src/main/java/de/jplag/normalization/Edge.java b/core/src/main/java/de/jplag/normalization/Edge.java index 106b44baf..5f6922db3 100644 --- a/core/src/main/java/de/jplag/normalization/Edge.java +++ b/core/src/main/java/de/jplag/normalization/Edge.java @@ -1,33 +1,6 @@ package de.jplag.normalization; -import java.util.HashSet; -import java.util.Set; - import de.jplag.semantics.Variable; -class Edge { - private Set items; - private boolean isVariableFlow; - private boolean isVariableReverseFlow; - - Edge() { - items = new HashSet<>(); - isVariableFlow = false; - } - - boolean isVariableFlow() { - return isVariableFlow; - } - - boolean isVariableReverseFlow() { - return isVariableReverseFlow; - } - - void addItem(EdgeType type, Variable cause) { - if (type == EdgeType.VARIABLE_FLOW) - isVariableFlow = true; - if (type == EdgeType.VARIABLE_REVERSE_FLOW) - isVariableReverseFlow = true; - items.add(new EdgeItem(type, cause)); - } +record Edge(EdgeType type, Variable cause) { } diff --git a/core/src/main/java/de/jplag/normalization/EdgeItem.java b/core/src/main/java/de/jplag/normalization/EdgeItem.java deleted file mode 100644 index 376839c08..000000000 --- a/core/src/main/java/de/jplag/normalization/EdgeItem.java +++ /dev/null @@ -1,6 +0,0 @@ -package de.jplag.normalization; - -import de.jplag.semantics.Variable; - -record EdgeItem(EdgeType type, Variable cause) { -} diff --git a/core/src/main/java/de/jplag/normalization/EdgeType.java b/core/src/main/java/de/jplag/normalization/EdgeType.java index a377d97c3..f75b30ffe 100644 --- a/core/src/main/java/de/jplag/normalization/EdgeType.java +++ b/core/src/main/java/de/jplag/normalization/EdgeType.java @@ -1,28 +1,28 @@ package de.jplag.normalization; /** - * Enum for types of edges in normalization graph. Given two statements A and B, A comes before B, there is such an edge - * between A and B if... + * Enum for types of edges in normalization graph. Given two statements S and T, S comes before T, there is such an edge + * between S and T if... */ enum EdgeType { /** - * B reads from a variable A writes. + * S writes a variable T reads. */ VARIABLE_FLOW, /** - * A reads from a variable B writes, and A and B are in the same bidirectional block. + * S reads a variable T writes, and S and T are in the same bidirectional block. */ VARIABLE_REVERSE_FLOW, /** - * A and B access the same variable, and at least one of the two accesses is not a read. + * S and T access the same variable, and at least one of the two accesses is not a read. */ VARIABLE_ORDER, /** - * A or B have full position significance, and there is no statement C with full position significance between them. + * S or T have full position significance, and there is no statement C with full position significance between them. */ POSITION_SIGNIFICANCE_FULL, /** - * A and B have partial position significance, and there is no statement C with partial position significance between + * S and T have partial position significance, and there is no statement C with partial position significance between * them. */ POSITION_SIGNIFICANCE_PARTIAL diff --git a/core/src/main/java/de/jplag/normalization/MultipleEdge.java b/core/src/main/java/de/jplag/normalization/MultipleEdge.java new file mode 100644 index 000000000..f42e2953b --- /dev/null +++ b/core/src/main/java/de/jplag/normalization/MultipleEdge.java @@ -0,0 +1,33 @@ +package de.jplag.normalization; + +import java.util.HashSet; +import java.util.Set; + +import de.jplag.semantics.Variable; + +class MultipleEdge { + private Set edges; + private boolean isVariableFlow; + private boolean isVariableReverseFlow; + + MultipleEdge() { + edges = new HashSet<>(); + isVariableFlow = false; + } + + boolean isVariableFlow() { + return isVariableFlow; + } + + boolean isVariableReverseFlow() { + return isVariableReverseFlow; + } + + void addEdge(EdgeType type, Variable cause) { + if (type == EdgeType.VARIABLE_FLOW) + isVariableFlow = true; + if (type == EdgeType.VARIABLE_REVERSE_FLOW) + isVariableReverseFlow = true; + edges.add(new Edge(type, cause)); + } +} diff --git a/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java b/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java index 4b0c2a54c..d83049ad8 100644 --- a/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java +++ b/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java @@ -14,9 +14,9 @@ import de.jplag.semantics.Variable; class NormalizationGraphConstructor { - private SimpleDirectedGraph graph; + private SimpleDirectedGraph graph; private int bidirectionalBlockDepth; - private Collection fullPositionSignificanceIngoing; + private Collection fullPositionSignificanceIncoming; private Statement lastFullPositionSignificance; private Statement lastPartialPositionSignificance; private Map> variableReads; @@ -25,9 +25,9 @@ class NormalizationGraphConstructor { private Statement current; NormalizationGraphConstructor(List tokens) { - graph = new SimpleDirectedGraph<>(Edge.class); + graph = new SimpleDirectedGraph<>(MultipleEdge.class); bidirectionalBlockDepth = 0; - fullPositionSignificanceIngoing = new ArrayList<>(); + fullPositionSignificanceIncoming = new ArrayList<>(); variableReads = new HashMap<>(); variableWrites = new HashMap<>(); inCurrentBidirectionalBlock = new HashSet<>(); @@ -42,7 +42,7 @@ class NormalizationGraphConstructor { addStatement(builderForCurrent.build()); } - SimpleDirectedGraph get() { + SimpleDirectedGraph get() { return graph; } @@ -70,20 +70,20 @@ private void processBidirectionalBlock() { private void processFullPositionSignificance() { if (current.semantics().hasFullPositionSignificance()) { - for (Statement node : fullPositionSignificanceIngoing) - addIngoingEdgeToCurrent(node, EdgeType.POSITION_SIGNIFICANCE_FULL, null); - fullPositionSignificanceIngoing.clear(); + for (Statement node : fullPositionSignificanceIncoming) + addIncomingEdgeToCurrent(node, EdgeType.POSITION_SIGNIFICANCE_FULL, null); + fullPositionSignificanceIncoming.clear(); lastFullPositionSignificance = current; } else if (lastFullPositionSignificance != null) { - addIngoingEdgeToCurrent(lastFullPositionSignificance, EdgeType.POSITION_SIGNIFICANCE_FULL, null); + addIncomingEdgeToCurrent(lastFullPositionSignificance, EdgeType.POSITION_SIGNIFICANCE_FULL, null); } - fullPositionSignificanceIngoing.add(current); + fullPositionSignificanceIncoming.add(current); } private void processPartialPositionSignificance() { if (current.semantics().hasPartialPositionSignificance()) { if (lastPartialPositionSignificance != null) { - addIngoingEdgeToCurrent(lastPartialPositionSignificance, EdgeType.POSITION_SIGNIFICANCE_PARTIAL, null); + addIncomingEdgeToCurrent(lastPartialPositionSignificance, EdgeType.POSITION_SIGNIFICANCE_PARTIAL, null); } lastPartialPositionSignificance = current; } @@ -92,35 +92,35 @@ private void processPartialPositionSignificance() { private void processReads() { for (Variable variable : current.semantics().reads()) { for (Statement node : variableWrites.getOrDefault(variable, Set.of())) - addIngoingEdgeToCurrent(node, EdgeType.VARIABLE_FLOW, variable); + addIncomingEdgeToCurrent(node, EdgeType.VARIABLE_FLOW, variable); } } private void processWrites() { for (Variable variable : current.semantics().writes()) { for (Statement node : variableWrites.getOrDefault(variable, Set.of())) - addIngoingEdgeToCurrent(node, EdgeType.VARIABLE_ORDER, variable); + addIncomingEdgeToCurrent(node, EdgeType.VARIABLE_ORDER, variable); for (Statement node : variableReads.getOrDefault(variable, Set.of())) { EdgeType edgeType = inCurrentBidirectionalBlock.contains(node) ? // EdgeType.VARIABLE_REVERSE_FLOW : EdgeType.VARIABLE_ORDER; - addIngoingEdgeToCurrent(node, edgeType, variable); + addIncomingEdgeToCurrent(node, edgeType, variable); } } } /** - * Adds an ingoing edge to the current node. + * Adds an incoming edge to the current node. * @param start the start of the edge * @param type the type of the edge * @param cause the variable that caused the edge, may be null */ - private void addIngoingEdgeToCurrent(Statement start, EdgeType type, Variable cause) { - Edge edge = graph.getEdge(start, current); - if (edge == null) { - edge = new Edge(); - graph.addEdge(start, current, edge); + private void addIncomingEdgeToCurrent(Statement start, EdgeType type, Variable cause) { + MultipleEdge multipleEdge = graph.getEdge(start, current); + if (multipleEdge == null) { + multipleEdge = new MultipleEdge(); + graph.addEdge(start, current, multipleEdge); } - edge.addItem(type, cause); + multipleEdge.addEdge(type, cause); } private void addVariableToMap(Map> variableMap, Variable variable) { diff --git a/core/src/main/java/de/jplag/normalization/TokenStringNormalizer.java b/core/src/main/java/de/jplag/normalization/TokenStringNormalizer.java index 6b8e373a9..47a053d0f 100644 --- a/core/src/main/java/de/jplag/normalization/TokenStringNormalizer.java +++ b/core/src/main/java/de/jplag/normalization/TokenStringNormalizer.java @@ -12,7 +12,7 @@ import de.jplag.Token; /** - * The class that performs token string normalization. + * Performs token string normalization. */ public class TokenStringNormalizer { @@ -20,12 +20,12 @@ private TokenStringNormalizer() { } /** - * Normalizes the token string it receives inplace. Tokens representing dead code have been eliminated and tokens + * Normalizes the token string it receives in place. Tokens representing dead code have been eliminated and tokens * representing subsequent independent statements have been put in a fixed order. Works by first constructing a * Normalization Graph and then turning it back into a token string. */ public static void normalize(List tokens) { - SimpleDirectedGraph normalizationGraph = new NormalizationGraphConstructor(tokens).get(); + SimpleDirectedGraph normalizationGraph = new NormalizationGraphConstructor(tokens).get(); tokens.clear(); spreadKeep(normalizationGraph); PriorityQueue roots = normalizationGraph.vertexSet().stream() // @@ -52,7 +52,7 @@ public static void normalize(List tokens) { /** * Spread keep status to every node that does not represent dead code. Nodes without keep status are later eliminated. */ - private static void spreadKeep(SimpleDirectedGraph normalizationGraph) { + private static void spreadKeep(SimpleDirectedGraph normalizationGraph) { Queue visit = new LinkedList<>(normalizationGraph.vertexSet().stream() // .filter(tl -> tl.semantics().keep()).toList()); while (!visit.isEmpty()) { diff --git a/language-api/src/main/java/de/jplag/semantics/CodeSemantics.java b/language-api/src/main/java/de/jplag/semantics/CodeSemantics.java index f6c97186c..2ef9f0883 100644 --- a/language-api/src/main/java/de/jplag/semantics/CodeSemantics.java +++ b/language-api/src/main/java/de/jplag/semantics/CodeSemantics.java @@ -7,7 +7,7 @@ import java.util.Set; /** - * This class contains semantic information about a code snippet, in our case either a token or a statement. + * Contains semantic information about a code snippet, in our case either a token or a statement. */ public class CodeSemantics { From e3d3aecb5ad513919bcfa9bd50dbdb552394612f Mon Sep 17 00:00:00 2001 From: smjonas Date: Sat, 15 Apr 2023 20:13:53 +0200 Subject: [PATCH 066/132] Add SCXML language module --- cli/pom.xml | 5 + .../test/java/de/jplag/cli/LanguageTest.java | 4 +- languages/pom.xml | 1 + languages/scxml/README.md | 22 +++ languages/scxml/pom.xml | 19 ++ .../java/de/jplag/scxml/ScxmlLanguage.java | 65 +++++++ .../main/java/de/jplag/scxml/ScxmlToken.java | 50 ++++++ .../java/de/jplag/scxml/ScxmlTokenType.java | 71 ++++++++ .../HandcraftedScxmlTokenGenerator.java | 63 +++++++ .../de/jplag/scxml/parser/PeekAdapter.java | 51 ++++++ .../de/jplag/scxml/parser/ScxmlParser.java | 141 +++++++++++++++ .../scxml/parser/ScxmlParserAdapter.java | 96 ++++++++++ .../parser/SimpleScxmlTokenGenerator.java | 164 ++++++++++++++++++ .../de/jplag/scxml/parser/model/State.java | 139 +++++++++++++++ .../jplag/scxml/parser/model/Statechart.java | 17 ++ .../scxml/parser/model/StatechartElement.java | 8 + .../jplag/scxml/parser/model/Transition.java | 47 +++++ .../model/executable_content/Action.java | 22 +++ .../model/executable_content/Cancel.java | 9 + .../parser/model/executable_content/Else.java | 11 ++ .../model/executable_content/ElseIf.java | 11 ++ .../executable_content/ExecutableContent.java | 52 ++++++ .../model/executable_content/ForEach.java | 11 ++ .../parser/model/executable_content/If.java | 104 +++++++++++ .../parser/model/executable_content/Send.java | 9 + .../SimpleExecutableContent.java | 23 +++ .../de/jplag/scxml/parser/util/NodeUtil.java | 95 ++++++++++ .../scxml/sorting/NoOpSortingStrategy.java | 17 ++ .../sorting/RecursiveSortingStrategy.java | 38 ++++ .../scxml/sorting/SimpleSortingStrategy.java | 36 ++++ .../jplag/scxml/sorting/SortingStrategy.java | 18 ++ .../scxml/util/AbstractScxmlVisitor.java | 109 ++++++++++++ .../java/de/jplag/scxml/util/ScxmlView.java | 68 ++++++++ .../scxml/ConfigurableScxmlParserAdapter.java | 17 ++ .../java/de/jplag/scxml/ScxmlParserTest.java | 135 ++++++++++++++ .../jplag/scxml/ScxmlTokenGeneratorTest.java | 64 +++++++ .../de/jplag/scxml/util/StateBuilder.java | 58 +++++++ .../de/jplag/statecharts/complex.scxml | 50 ++++++ .../de/jplag/statecharts/conditional.scxml | 20 +++ .../de/jplag/statecharts/coverage.scxml | 77 ++++++++ .../de/jplag/statecharts/reordered.scxml | 49 ++++++ .../de/jplag/statecharts/simple.scxml | 16 ++ .../jplag/statecharts/timed_transition.scxml | 14 ++ 43 files changed, 2094 insertions(+), 2 deletions(-) create mode 100644 languages/scxml/README.md create mode 100644 languages/scxml/pom.xml create mode 100644 languages/scxml/src/main/java/de/jplag/scxml/ScxmlLanguage.java create mode 100644 languages/scxml/src/main/java/de/jplag/scxml/ScxmlToken.java create mode 100644 languages/scxml/src/main/java/de/jplag/scxml/ScxmlTokenType.java create mode 100644 languages/scxml/src/main/java/de/jplag/scxml/parser/HandcraftedScxmlTokenGenerator.java create mode 100644 languages/scxml/src/main/java/de/jplag/scxml/parser/PeekAdapter.java create mode 100644 languages/scxml/src/main/java/de/jplag/scxml/parser/ScxmlParser.java create mode 100644 languages/scxml/src/main/java/de/jplag/scxml/parser/ScxmlParserAdapter.java create mode 100644 languages/scxml/src/main/java/de/jplag/scxml/parser/SimpleScxmlTokenGenerator.java create mode 100644 languages/scxml/src/main/java/de/jplag/scxml/parser/model/State.java create mode 100644 languages/scxml/src/main/java/de/jplag/scxml/parser/model/Statechart.java create mode 100644 languages/scxml/src/main/java/de/jplag/scxml/parser/model/StatechartElement.java create mode 100644 languages/scxml/src/main/java/de/jplag/scxml/parser/model/Transition.java create mode 100644 languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/Action.java create mode 100644 languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/Cancel.java create mode 100644 languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/Else.java create mode 100644 languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/ElseIf.java create mode 100644 languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/ExecutableContent.java create mode 100644 languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/ForEach.java create mode 100644 languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/If.java create mode 100644 languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/Send.java create mode 100644 languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/SimpleExecutableContent.java create mode 100644 languages/scxml/src/main/java/de/jplag/scxml/parser/util/NodeUtil.java create mode 100644 languages/scxml/src/main/java/de/jplag/scxml/sorting/NoOpSortingStrategy.java create mode 100644 languages/scxml/src/main/java/de/jplag/scxml/sorting/RecursiveSortingStrategy.java create mode 100644 languages/scxml/src/main/java/de/jplag/scxml/sorting/SimpleSortingStrategy.java create mode 100644 languages/scxml/src/main/java/de/jplag/scxml/sorting/SortingStrategy.java create mode 100644 languages/scxml/src/main/java/de/jplag/scxml/util/AbstractScxmlVisitor.java create mode 100644 languages/scxml/src/main/java/de/jplag/scxml/util/ScxmlView.java create mode 100644 languages/scxml/src/test/java/de/jplag/scxml/ConfigurableScxmlParserAdapter.java create mode 100644 languages/scxml/src/test/java/de/jplag/scxml/ScxmlParserTest.java create mode 100644 languages/scxml/src/test/java/de/jplag/scxml/ScxmlTokenGeneratorTest.java create mode 100644 languages/scxml/src/test/java/de/jplag/scxml/util/StateBuilder.java create mode 100644 languages/scxml/src/test/resources/de/jplag/statecharts/complex.scxml create mode 100644 languages/scxml/src/test/resources/de/jplag/statecharts/conditional.scxml create mode 100644 languages/scxml/src/test/resources/de/jplag/statecharts/coverage.scxml create mode 100644 languages/scxml/src/test/resources/de/jplag/statecharts/reordered.scxml create mode 100644 languages/scxml/src/test/resources/de/jplag/statecharts/simple.scxml create mode 100644 languages/scxml/src/test/resources/de/jplag/statecharts/timed_transition.scxml diff --git a/cli/pom.xml b/cli/pom.xml index 791736d02..60fd9621b 100644 --- a/cli/pom.xml +++ b/cli/pom.xml @@ -82,6 +82,11 @@ scheme ${revision} + + de.jplag + scxml + ${revision} + de.jplag swift diff --git a/cli/src/test/java/de/jplag/cli/LanguageTest.java b/cli/src/test/java/de/jplag/cli/LanguageTest.java index 307d414e4..7ed7b9608 100644 --- a/cli/src/test/java/de/jplag/cli/LanguageTest.java +++ b/cli/src/test/java/de/jplag/cli/LanguageTest.java @@ -28,7 +28,7 @@ void testInvalidLanguage() throws Exception { @Test void testLoading() { var languages = LanguageLoader.getAllAvailableLanguages(); - assertEquals(14, languages.size(), "Loaded Languages: " + languages.keySet()); + assertEquals(15, languages.size(), "Loaded Languages: " + languages.keySet()); } @Test @@ -49,4 +49,4 @@ void testCustomSuffixes() { assertEquals(suffixes, options.fileSuffixes()); } -} \ No newline at end of file +} diff --git a/languages/pom.xml b/languages/pom.xml index 48acb72c6..d1932bf7b 100644 --- a/languages/pom.xml +++ b/languages/pom.xml @@ -22,6 +22,7 @@ rust scala scheme + scxml swift text diff --git a/languages/scxml/README.md b/languages/scxml/README.md new file mode 100644 index 000000000..f186f61dd --- /dev/null +++ b/languages/scxml/README.md @@ -0,0 +1,22 @@ +## SCXML language module + +This language module enables the use of JPlag with SCXML submissions. +It works by first parsing the XML files using a SAX parser and transforming them into an intermediate Java object +structure. The tokens are extracted by iterating over this structure. + +### Token Extraction + +There are two token extraction strategies available: the SimpleStatechartTokenGenerator and the +DynamicStatechartTokenGenerator. + +The SimpleStatechartTokenGenerator extracts tokens by recursively traversing the +Statechart object, using the elements outlined in the [SCXML specification](https://www.w3.org/TR/scxml). +The DynamicStatechartTokenGenerator utilizes a larger token set and extracts tokens based on the attributes of the +StatechartElement, for example extracting a `PARALLEL_STATE` token for the State object if it is parallel. + +The set of possible tokens can be found here: +https://github.com/smjonas/JPlag/blob/statecharts/languages/scxml/src/main/java/de/jplag/scxml/ScxmlTokenType.java + +## Usage + +To use the new module, add the `-l scxml` flag in the CLI. diff --git a/languages/scxml/pom.xml b/languages/scxml/pom.xml new file mode 100644 index 000000000..78001f1f4 --- /dev/null +++ b/languages/scxml/pom.xml @@ -0,0 +1,19 @@ + + + 4.0.0 + + de.jplag + languages + ${revision} + + scxml + + + + org.assertj + assertj-core + 3.23.1 + test + + + diff --git a/languages/scxml/src/main/java/de/jplag/scxml/ScxmlLanguage.java b/languages/scxml/src/main/java/de/jplag/scxml/ScxmlLanguage.java new file mode 100644 index 000000000..c315830c6 --- /dev/null +++ b/languages/scxml/src/main/java/de/jplag/scxml/ScxmlLanguage.java @@ -0,0 +1,65 @@ +package de.jplag.scxml; + +import de.jplag.ParsingException; +import de.jplag.Token; +import de.jplag.scxml.parser.ScxmlParserAdapter; +import org.kohsuke.MetaInfServices; + +import java.io.File; +import java.util.List; +import java.util.Set; + +/** + * Language for statecharts in the State Chart XML (SCXML) format. + */ +@MetaInfServices(de.jplag.Language.class) +public class ScxmlLanguage implements de.jplag.Language { + + public static final String FILE_ENDING = ".scxml"; + public static final String VIEW_FILE_SUFFIX = ".scxmlview"; + + private static final String NAME = "SCXML (Statechart XML)"; + private static final String IDENTIFIER = "scxml"; + private static final int DEFAULT_MIN_TOKEN_MATCH = 6; + + protected final ScxmlParserAdapter parser; + + public ScxmlLanguage() { + this.parser = new ScxmlParserAdapter(); + } + + @Override + public String[] suffixes() { + return new String[]{FILE_ENDING}; + } + + @Override + public String getName() { + return NAME; + } + + @Override + public String getIdentifier() { + return IDENTIFIER; + } + + @Override + public int minimumTokenMatch() { + return DEFAULT_MIN_TOKEN_MATCH; + } + + @Override + public List parse(Set files) throws ParsingException { + return parser.parse(files); + } + + @Override + public boolean useViewFiles() { + return true; + } + + @Override + public String viewFileSuffix() { + return VIEW_FILE_SUFFIX; + } +} diff --git a/languages/scxml/src/main/java/de/jplag/scxml/ScxmlToken.java b/languages/scxml/src/main/java/de/jplag/scxml/ScxmlToken.java new file mode 100644 index 000000000..13361a4aa --- /dev/null +++ b/languages/scxml/src/main/java/de/jplag/scxml/ScxmlToken.java @@ -0,0 +1,50 @@ +package de.jplag.scxml; + +import de.jplag.Token; +import de.jplag.TokenType; +import de.jplag.scxml.parser.model.StatechartElement; + +import java.io.File; + +/** + * Represents a SCXML token. + */ +public class ScxmlToken extends Token { + + private final StatechartElement element; + + /** + * Creates an SCXML token that corresponds to a StatechartElement. + * + * @param type the type of the token + * @param file the source statechart file + * @param element the corresponding StatechartElement this token was extracted from + */ + public ScxmlToken(TokenType type, File file, StatechartElement element) { + super(type, file, NO_VALUE, NO_VALUE, NO_VALUE); + this.element = element; + } + + /** + * Creates an SCXML token that corresponds to a StatechartElement including file information. + * + * @param type the type of the token + * @param file the source statechart file + * @param line the line index in the source code where the token resides, 1-based + * @param column the column index, meaning where the token starts in the line, 1-based + * @param length the length of the token in the view file + * @param element the corresponding StatechartElement this token was extracted from + */ + // TODO: use TokenTrace + public ScxmlToken(TokenType type, File file, int line, int column, int length, StatechartElement element) { + super(type, file, line, column, length); + this.element = element; + } + + /** + * @return the StatechartElement corresponding to the token + */ + public StatechartElement getStatechartElement() { + return element; + } +} diff --git a/languages/scxml/src/main/java/de/jplag/scxml/ScxmlTokenType.java b/languages/scxml/src/main/java/de/jplag/scxml/ScxmlTokenType.java new file mode 100644 index 000000000..a4e1946b0 --- /dev/null +++ b/languages/scxml/src/main/java/de/jplag/scxml/ScxmlTokenType.java @@ -0,0 +1,71 @@ +package de.jplag.scxml; + +import de.jplag.TokenType; + +/** + * SCXML token type. Defines which tokens can be extracted from a statechart. + */ +public enum ScxmlTokenType implements TokenType { + + TRANSITION("Transition"), + TRANSITION_END("Transition end", true), + GUARDED_TRANSITION("Guarded transition"), + TIMED_TRANSITION("Timed transition"), + STATE("State begin"), + STATE_END("State end", true), + REGION("Region"), + INITIAL_STATE("Initial state"), + PARALLEL_STATE("Parallel state"), + ON_ENTRY("OnEntry"), + ON_EXIT("OnExit"), + ACTION_END("Action end", true), + // Simple executable content + RAISE("Raise"), + IF("If"), + IF_END("If end", true), + ELSE_IF("Else if"), + ELSE_IF_END("Else if end", true), + ELSE("Else"), + FOREACH("For each"), + // LOG("Log"), + // TODO: fix comments + // Other executable content + ASSIGNMENT("Assignment"), + CANCEL("Cancel"), + SCRIPT("Script"), + SEND("Send"); + + private final String description; + private boolean isEndToken = false; + + ScxmlTokenType(String description) { + this.description = description; + } + + /** + * Creates a statechart token type that may be an end token. + * An end token represents a token that is always added after + * all child tokens for a nested token such as STATE. + * + * @param isEndToken indicates that the token is an end token + */ + ScxmlTokenType(String description, boolean isEndToken) { + this(description); + this.isEndToken = isEndToken; + } + + /** + * @return the description for this token type + */ + public String getDescription() { + return description; + } + + /** + * @return whether this token is an end token + */ + public boolean isEndToken() { + return isEndToken; + } + +} diff --git a/languages/scxml/src/main/java/de/jplag/scxml/parser/HandcraftedScxmlTokenGenerator.java b/languages/scxml/src/main/java/de/jplag/scxml/parser/HandcraftedScxmlTokenGenerator.java new file mode 100644 index 000000000..d19292f92 --- /dev/null +++ b/languages/scxml/src/main/java/de/jplag/scxml/parser/HandcraftedScxmlTokenGenerator.java @@ -0,0 +1,63 @@ +package de.jplag.scxml.parser; + +import de.jplag.scxml.parser.model.State; +import de.jplag.scxml.parser.model.Transition; +import de.jplag.scxml.parser.model.executable_content.ExecutableContent; + +import static de.jplag.scxml.ScxmlTokenType.*; + +/** + * Visits a statechart and its contained elements to extract tokens + * using a handcrafted strategy, i.e. a larger token set than for the + * simple strategy (see {@link SimpleScxmlTokenGenerator}). + * Additional tokens are extracted depending on the attributes of + * the statechart elements. + */ +public class HandcraftedScxmlTokenGenerator extends SimpleScxmlTokenGenerator { + + /** + * Creates the visitor. + * + * @param adapter is the parser adapter which receives the generated tokens. + */ + public HandcraftedScxmlTokenGenerator(ScxmlParserAdapter adapter) { + super(adapter); + } + + protected void visitStateAttributes(State state) { + if (state.initial()) { + adapter.addToken(INITIAL_STATE, state); + } + if (state.parallel()) { + adapter.addToken(PARALLEL_STATE, state); + } + } + + @Override + public void visitState(State state) { + adapter.addToken(state.isRegion() ? REGION : STATE, state); + depth++; + visitStateAttributes(state); + visitStateContents(state); + depth--; + adapter.addToken(STATE_END, state); + } + + @Override + public void visitTransition(Transition transition) { + if (transition.isTimed()) { + adapter.addToken(TIMED_TRANSITION, transition); + } else if (transition.isGuarded()) { + adapter.addToken(GUARDED_TRANSITION, transition); + } else { + adapter.addToken(TRANSITION, transition); + } + + depth++; + for (ExecutableContent content : transition.contents()) { + visitExecutableContent(content); + } + depth--; + adapter.addToken(TRANSITION_END, transition); + } +} diff --git a/languages/scxml/src/main/java/de/jplag/scxml/parser/PeekAdapter.java b/languages/scxml/src/main/java/de/jplag/scxml/parser/PeekAdapter.java new file mode 100644 index 000000000..c7d711853 --- /dev/null +++ b/languages/scxml/src/main/java/de/jplag/scxml/parser/PeekAdapter.java @@ -0,0 +1,51 @@ +package de.jplag.scxml.parser; + +import de.jplag.scxml.ScxmlTokenType; +import de.jplag.scxml.parser.model.StatechartElement; + +import java.util.ArrayList; +import java.util.List; + +/** + * A parser adapter that provides a way to retrieve a list of + * token types. When a token is added, only the ordinal of its type + * is stored. This can be used to "peek" at a list of token types + * that are extracted when visiting a statechart. + */ +public class PeekAdapter extends ScxmlParserAdapter { + + private final List tokenTypes = new ArrayList<>(); + + /** + * Lexicographically compares two lists of integer representations / + * ordinals of token types. + * + * @param first the first list of ordinals of token types + * @param second the second list of ordinals of token types + * @return 0 if the lists are equal, a negative integer if the first list is lexicographically + * less than the second list, or a positive integer if the first list is lexicographically + * greater than the second list + */ + public static int compareTokenTypeLists(List first, List second) { + int size = Math.min(first.size(), second.size()); + for (int i = 0; i < size; i++) { + int result = Integer.compare(first.get(i), second.get(i)); + if (result != 0) { + return result; + } + } + return Integer.compare(first.size(), second.size()); + } + + @Override + public void addToken(ScxmlTokenType type, StatechartElement source) { + tokenTypes.add(type.ordinal()); + } + + /** + * @return the currently extracted list of token types + */ + public List getTokenTypes() { + return tokenTypes; + } +} diff --git a/languages/scxml/src/main/java/de/jplag/scxml/parser/ScxmlParser.java b/languages/scxml/src/main/java/de/jplag/scxml/parser/ScxmlParser.java new file mode 100644 index 000000000..867175e8a --- /dev/null +++ b/languages/scxml/src/main/java/de/jplag/scxml/parser/ScxmlParser.java @@ -0,0 +1,141 @@ +package de.jplag.scxml.parser; + +import de.jplag.ParsingException; +import de.jplag.scxml.parser.model.State; +import de.jplag.scxml.parser.model.Statechart; +import de.jplag.scxml.parser.model.Transition; +import de.jplag.scxml.parser.model.executable_content.Action; +import de.jplag.scxml.parser.model.executable_content.ExecutableContent; +import de.jplag.scxml.parser.util.NodeUtil; +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.Node; +import org.xml.sax.SAXException; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; +import java.util.function.Function; + +/** + * An SCXML parser implementation based on a Simple API for XML (SAX) parser. + * Constructs a Statechart object during the parse. + */ +public class ScxmlParser { + + private static final String STATE_ELEMENT = "state"; + private static final String PARALLEL_STATE_ELEMENT = "parallel"; + private static final String INITIAL_ELEMENT = "initial"; + private static final String ONENTRY_ELEMENT = "onentry"; + private static final String ONEXIT_ELEMENT = "onexit"; + private static final String TRANSITION_ELEMENT = "transition"; + + private static final String NAME_ATTRIBUTE = "name"; + private static final String ID_ATTRIBUTE = "id"; + private static final String INITIAL_ATTRIBUTE = "initial"; + private static final String TARGET_ATTRIBUTE = "target"; + private static final String EVENT_ATTRIBUTE = "event"; + private static final String CONDITION_ATTRIBUTE = "cond"; + + private final DocumentBuilder builder; + private final List initialStateTargets = new ArrayList<>(); + + /** + * Constructs a new ScxmlParser used to parse SCXML documents. + */ + public ScxmlParser() throws ParserConfigurationException { + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + builder = factory.newDocumentBuilder(); + } + + /** + * Parses the given SCXML file using Javax and constructs a Statechart object. + * Two passes through the document are performed: + * In the first pass, all {@literal } elements within states are iterated over + * to resolve initial states. In the second pass, the whole document is visited. + * This is necessary because an initial state may occur in the document prior to + * the transitions pointing to it. + * + * @param file the SCXML file to parse + */ + public Statechart parse(File file) throws IOException, SAXException, ParsingException { + Document document = builder.parse(file); + try { + Element element = document.getDocumentElement(); + resolveInitialStates(element); + return visitRoot(element); + } catch (IllegalArgumentException e) { + throw new ParsingException(file, "failed to parse statechart: " + e.getMessage()); + } + } + + private void resolveInitialStates(Node root) { + List initialElements = NodeUtil.getNodesRecursive(root, INITIAL_ELEMENT); + List transitions = initialElements.stream().map(this::visitInitialTransition).toList(); + initialStateTargets.addAll(transitions.stream().map(Transition::target).toList()); + } + + private ArrayList visitChildElements(Node root, Set childNames, Function visitorFunction) { + return new ArrayList<>(NodeUtil.getChildNodes(root, childNames).stream().map(visitorFunction).toList()); + } + + private Statechart visitRoot(Node node) { + String name = NodeUtil.getAttribute(node, NAME_ATTRIBUTE); + assert name != null : "statechart element must have name attribute"; + + List states = visitChildElements(node, Set.of(STATE_ELEMENT, PARALLEL_STATE_ELEMENT), this::visitState); + return new Statechart(name, states); + } + + private State visitState(Node node) { + String id = NodeUtil.getAttribute(node, ID_ATTRIBUTE); + assert id != null : "state element must have id attribute"; + + boolean initial = initialStateTargets.contains(id) || NodeUtil.getAttribute(node, INITIAL_ATTRIBUTE) != null; + boolean parallel = node.getNodeName().equals(PARALLEL_STATE_ELEMENT); + + Node child = NodeUtil.getFirstChild(node, INITIAL_ELEMENT); + assert !(parallel && child != null) : "parallel state " + id + " must not have initial element"; + + ArrayList actions = visitChildElements(node, Set.of(ONENTRY_ELEMENT, ONEXIT_ELEMENT), this::visitAction); + ArrayList transitions = visitChildElements(node, Set.of(TRANSITION_ELEMENT), this::visitTransition); + List states = visitChildElements(node, Set.of(STATE_ELEMENT, PARALLEL_STATE_ELEMENT), this::visitState); + return new State(id, transitions, states, actions, initial, parallel); + } + + private Action visitAction(Node node) throws IllegalArgumentException { + if (node == null) { + return null; + } + Action.Type type = node.getNodeName().equals(ONENTRY_ELEMENT) ? Action.Type.ON_ENTRY : Action.Type.ON_EXIT; + return new Action(type, visitExecutableContents(node)); + } + + private List visitExecutableContents(Node node) throws IllegalArgumentException { + return visitChildElements(node, ExecutableContent.ALLOWED_XML_ELEMENTS, ExecutableContent::fromNode); + } + + private Transition visitInitialTransition(Node node) { + List transitionNodes = NodeUtil.getChildNodes(node, TRANSITION_ELEMENT); + assert !transitionNodes.isEmpty() : "initial element must contain transition child"; + Transition transition = visitTransition(transitionNodes.get(0)); + assert transition.isInitial() : "transition is not an initial transition"; + return transition; + } + + private Transition visitTransition(Node node) throws IllegalArgumentException { + return new Transition( + NodeUtil.getAttribute(node, TARGET_ATTRIBUTE), + NodeUtil.getAttribute(node, EVENT_ATTRIBUTE), + NodeUtil.getAttribute(node, CONDITION_ATTRIBUTE), + visitExecutableContents(node), + // Set timed attribute to false initially, may be updated later in the State class + false + ); + } +} diff --git a/languages/scxml/src/main/java/de/jplag/scxml/parser/ScxmlParserAdapter.java b/languages/scxml/src/main/java/de/jplag/scxml/parser/ScxmlParserAdapter.java new file mode 100644 index 000000000..658a4c801 --- /dev/null +++ b/languages/scxml/src/main/java/de/jplag/scxml/parser/ScxmlParserAdapter.java @@ -0,0 +1,96 @@ +package de.jplag.scxml.parser; + +import de.jplag.AbstractParser; +import de.jplag.ParsingException; +import de.jplag.Token; +import de.jplag.scxml.ScxmlLanguage; +import de.jplag.scxml.ScxmlToken; +import de.jplag.scxml.ScxmlTokenType; +import de.jplag.scxml.parser.model.Statechart; +import de.jplag.scxml.parser.model.StatechartElement; +import de.jplag.scxml.util.AbstractScxmlVisitor; +import de.jplag.scxml.util.ScxmlView; +import org.xml.sax.SAXException; + +import javax.xml.parsers.ParserConfigurationException; +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; + +/** + * Parser adapter for SCXML statecharts that uses a Statechart object + * obtained from an instance of ScxmlParser to extract tokens. + */ +public class ScxmlParserAdapter extends AbstractParser { + + protected List tokens; + protected File currentFile; + protected AbstractScxmlVisitor visitor; + protected ScxmlView view; + + public ScxmlParserAdapter() { + this.visitor = new HandcraftedScxmlTokenGenerator(this); + } + + /** + * Parses all tokens from a set of files. + * + * @param files the set of files. + * @return the list of parsed tokens. + */ + public List parse(Set files) throws ParsingException { + tokens = new ArrayList<>(); + for (File file : files) { + parseModelFile(file); + } + return tokens; + } + + /** + * Loads a statechart from a file, parses it and extracts tokens from it. + * + * @param file is the statechart file. + */ + protected void parseModelFile(File file) throws ParsingException { + currentFile = file; + Statechart statechart; + view = new ScxmlView(file); + + try { + statechart = new ScxmlParser().parse(file); + } catch (ParserConfigurationException | IOException | SAXException e) { + throw new ParsingException(file, "failed to parse statechart:\n" + e.getMessage()); + } + + visitor.visit(statechart); + tokens.add(Token.fileEnd(currentFile)); + view.writeToFile(ScxmlLanguage.VIEW_FILE_SUFFIX); + } + + /** + * Creates a token from the given type plus the associated statechart element + * and adds it to the token stream. + * The token is enhanced with view information (see {@link ScxmlView}). + * + * @param type the type of the token + * @param source the statechart element associated with the token + */ + public void addToken(ScxmlTokenType type, StatechartElement source) { + ScxmlToken token = new ScxmlToken(type, currentFile, source); + Token enhancedToken = view.enhanceToken(token, visitor.getCurrentStatechartDepth()); + tokens.add(enhancedToken); + } + + /** + * Creates a token from the given type without an associated statechart element. + * The token is enhanced with view information (see {@link ScxmlView}). + * + * @param type the type of the token + */ + public void addEndToken(ScxmlTokenType type) { + addToken(type, null); + } + +} diff --git a/languages/scxml/src/main/java/de/jplag/scxml/parser/SimpleScxmlTokenGenerator.java b/languages/scxml/src/main/java/de/jplag/scxml/parser/SimpleScxmlTokenGenerator.java new file mode 100644 index 000000000..65f605c81 --- /dev/null +++ b/languages/scxml/src/main/java/de/jplag/scxml/parser/SimpleScxmlTokenGenerator.java @@ -0,0 +1,164 @@ +package de.jplag.scxml.parser; + +import de.jplag.scxml.ScxmlTokenType; +import de.jplag.scxml.parser.model.State; +import de.jplag.scxml.parser.model.Statechart; +import de.jplag.scxml.parser.model.StatechartElement; +import de.jplag.scxml.parser.model.Transition; +import de.jplag.scxml.parser.model.executable_content.*; +import de.jplag.scxml.util.AbstractScxmlVisitor; + +import java.util.List; +import java.util.Map; + +import static de.jplag.scxml.ScxmlTokenType.*; + +/** + * Visits a statechart and its contained elements to extract tokens + * using a simple strategy, i.e. a smaller token set than for the + * handcrafted strategy (see {@link HandcraftedScxmlTokenGenerator}). + */ +public class SimpleScxmlTokenGenerator extends AbstractScxmlVisitor { + + /** + * Creates the visitor. + * + * @param adapter the parser adapter which receives the generated tokens + */ + public SimpleScxmlTokenGenerator(ScxmlParserAdapter adapter) { + super(adapter); + } + + @Override + public void visitStatechart(Statechart statechart) { + for (State state : sorter.sort(statechart.states())) { + visitState(state); + } + } + + protected void visitStateContents(State state) { + visitActions(state.actions()); + for (Transition transition : sorter.sort(state.transitions())) { + visitTransition(transition); + } + for (State substate : sorter.sort(state.substates())) { + visitState(substate); + } + } + + @Override + public void visitState(State state) { + adapter.addToken(STATE, state); + depth++; + visitStateContents(state); + depth--; + adapter.addEndToken(STATE_END); + } + + @Override + public void visitActions(List actions) { + // Group actions by their type + List onEntries = actions.stream().filter(a -> a.type() == Action.Type.ON_ENTRY).toList(); + List onExits = actions.stream().filter(a -> a.type() == Action.Type.ON_EXIT).toList(); + visitActions(onEntries, ON_ENTRY); + visitActions(onExits, ON_EXIT); + } + + private void visitActions(List actions, ScxmlTokenType tokenType) { + if (!actions.isEmpty()) { + // Only extract a single ENTRY / EXIT token even if the state contains multiple. + // Functionally, this makes no difference. + adapter.addToken(tokenType, null); + List actionContents = actions.stream().flatMap(a -> a.contents().stream()).toList(); + depth++; + // Do not sort executable content because the order is important + for (ExecutableContent content : actionContents) { + visitExecutableContent(content); + } + depth--; + adapter.addEndToken(ACTION_END); + } + } + + @Override + public void visitTransition(Transition transition) { + adapter.addToken(TRANSITION, transition); + depth++; + // Do not sort executable content because the order is important + for (ExecutableContent content : transition.contents()) { + visitExecutableContent(content); + } + depth--; + adapter.addEndToken(TRANSITION_END); + } + + @Override + public void visitIf(If if_) { + adapter.addToken(IF, if_); + depth++; + for (ExecutableContent content : if_.contents()) { + visitExecutableContent(content); + } + for (ElseIf elseIf : if_.elseIfs()) { + visitElseIf(elseIf); + } + visitElse(if_.else_()); + adapter.addEndToken(IF_END); + } + + @Override + public void visitElseIf(ElseIf elseIf) { + adapter.addToken(ELSE_IF, elseIf); + for (ExecutableContent content : elseIf.contents()) { + visitExecutableContent(content); + } + adapter.addEndToken(ELSE_IF_END); + } + + @Override + public void visitElse(Else else_) { + if (else_ != null) { + adapter.addToken(ELSE, else_); + for (ExecutableContent content : else_.contents()) { + visitExecutableContent(content); + } + } + } + + @Override + public void visitExecutableContent(ExecutableContent content) { + if (content instanceof SimpleExecutableContent simpleExecutableContent) { + visitSimpleExecutableContent(simpleExecutableContent); + return; + } + + if (content instanceof If if_) { + visitIf(if_); + return; + } + + Map, ScxmlTokenType> tokenTypeMap = Map.of( + Send.class, SEND, + Cancel.class, CANCEL + ); + ScxmlTokenType type = tokenTypeMap.get(content.getClass()); + System.out.println(content.getClass()); + adapter.addToken(type, content); + } + + @Override + public void visitSimpleExecutableContent(SimpleExecutableContent content) { + ScxmlTokenType type = switch (content.type()) { + case RAISE -> RAISE; + case ASSIGNMENT -> ASSIGNMENT; + case SCRIPT -> SCRIPT; + case FOREACH -> FOREACH; + // Don't extract a token for log elements + case LOG -> null; + }; + if (type != null) { + adapter.addToken(type, content); + } + } + +} diff --git a/languages/scxml/src/main/java/de/jplag/scxml/parser/model/State.java b/languages/scxml/src/main/java/de/jplag/scxml/parser/model/State.java new file mode 100644 index 000000000..a6eb46e25 --- /dev/null +++ b/languages/scxml/src/main/java/de/jplag/scxml/parser/model/State.java @@ -0,0 +1,139 @@ +package de.jplag.scxml.parser.model; + +import de.jplag.scxml.parser.model.executable_content.Action; +import de.jplag.scxml.parser.model.executable_content.Cancel; +import de.jplag.scxml.parser.model.executable_content.ExecutableContent; +import de.jplag.scxml.parser.model.executable_content.Send; + +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Stream; + +public record State(String id, List transitions, List substates, List actions, + boolean initial, boolean parallel) implements StatechartElement { + + /** + * Constructs a new state. + * + * @param id the ID of the state + * @param transitions a non-null list of outgoing transitions of this state + * @param substates a non-null list of substates of this state + * @param actions a non-null list of actions associated with this state + * @param initial whether this state is an initial state + * @param parallel whether this state is a parallel state + */ + public State(String id, List transitions, List substates, List actions, boolean initial, boolean parallel) { + this.id = id; + assert transitions != null : "State.transitions must not be null"; + this.transitions = transitions; + assert substates != null : "State.substates must not be null"; + this.substates = substates; + this.actions = actions; + this.initial = initial; + this.parallel = parallel; + updateTimedTransitions(); + } + + /** + * Constructs a state with an ID, setting all other variables + * to default values. + * + * @param id the ID of the state + */ + public State(String id) { + this(id, new ArrayList<>(), new ArrayList<>(), new ArrayList<>(), false, false); + } + + /** + * @return whether this state is a region, i.e. it contains at least one substate + */ + public boolean isRegion() { + return !substates.isEmpty(); + } + + /** + * @return whether this state is a simple state, meaning that is + * neither an initial state nor a parallel state + */ + public boolean isSimple() { + return !initial && !parallel; + } + + private Stream onEntries() { + return actions.stream().filter(a -> a.type() == Action.Type.ON_ENTRY); + } + + private Stream onExits() { + return actions.stream().filter(a -> a.type() == Action.Type.ON_EXIT); + } + + private List getOnEntrySends() { + Stream> onEntryContents = this.onEntries().map(Action::contents); + return onEntryContents.flatMap(List::stream).filter(c -> c instanceof Send).map(s -> (Send) s).toList(); + } + + private void removeTimedTransitionElements(Action onEntry, Send send, Action onExit, Cancel cancel, Transition transition) { + List filteredContents = onEntry.contents().stream().filter(c -> !(c instanceof Send && c.equals(send))).toList(); + if (filteredContents.isEmpty()) { + // Remove onEntry entirely if it is now empty + actions.remove(onEntry); + } else { + // Only remove the matching onEntry.send + Action filteredOnEntry = new Action(Action.Type.ON_ENTRY, filteredContents); + actions.set(actions.indexOf(onEntry), filteredOnEntry); + } + + // Do something similar for onExit + filteredContents = onExit.contents().stream().filter(c -> !(c instanceof Cancel && c.equals(cancel))).toList(); + if (filteredContents.isEmpty()) { + actions.remove(onExit); + } else { + Action filteredOnExit = new Action(Action.Type.ON_EXIT, filteredContents); + actions.set(actions.indexOf(onExit), filteredOnExit); + } + } + + /** + * Sets the timed attribute of each transition of this state that is timed. + * To model a timed transition, itemis Create adds onentry.send, onexit.cancel + * and transition elements with matching IDs. + * These elements will be removed if they are part of a timed transition. + **/ + private void updateTimedTransitions() { + if (this.transitions().isEmpty() || this.actions().isEmpty()) { + return; + } + List onEntrySends = getOnEntrySends(); + + for (Action onExit : onExits().toList()) { + for (Cancel cancel : onExit.contents().stream().filter(c -> c instanceof Cancel).map(c -> (Cancel) c).toList()) { + String sendId = cancel.sendid(); + // First check if there is a matching transition for the sendid + for (Transition transition : transitions) { + boolean foundTimedTransition = false; + if (transition.event() != null && transition.event().equals(sendId)) { + // Then check if there is also a matching send element in + for (Action onEntry : onEntries().toList()) { + for (Send send : onEntrySends) { + if (send.event().equals(sendId)) { + foundTimedTransition = true; + // Finally, replace the transition + removeTimedTransitionElements(onEntry, send, onExit, cancel, transition); + } + } + } + } + if (foundTimedTransition) { + transitions.set(transitions.indexOf(transition), Transition.makeTimed(transition)); + } + } + } + } + } + + @Override + public String toString() { + return String.format("%s: %s {", id, isRegion() ? "Region" : "State"); + } + +} diff --git a/languages/scxml/src/main/java/de/jplag/scxml/parser/model/Statechart.java b/languages/scxml/src/main/java/de/jplag/scxml/parser/model/Statechart.java new file mode 100644 index 000000000..379409b6f --- /dev/null +++ b/languages/scxml/src/main/java/de/jplag/scxml/parser/model/Statechart.java @@ -0,0 +1,17 @@ +package de.jplag.scxml.parser.model; + +import java.util.List; + +/** + * Represents an SCXML statechart. + * + * @param name the name of the statechart + * @param states a list of states comprising this statechart + */ +public record Statechart(String name, List states) implements StatechartElement { + + @Override + public String toString() { + return "%s: Statechart {"; + } +} diff --git a/languages/scxml/src/main/java/de/jplag/scxml/parser/model/StatechartElement.java b/languages/scxml/src/main/java/de/jplag/scxml/parser/model/StatechartElement.java new file mode 100644 index 000000000..16ab8a610 --- /dev/null +++ b/languages/scxml/src/main/java/de/jplag/scxml/parser/model/StatechartElement.java @@ -0,0 +1,8 @@ +package de.jplag.scxml.parser.model; + +/** + * Defines a common supertype of all elements in an SCXML statechart. + * This is used to be able to handle any concrete statechart element. + */ +public interface StatechartElement { +} diff --git a/languages/scxml/src/main/java/de/jplag/scxml/parser/model/Transition.java b/languages/scxml/src/main/java/de/jplag/scxml/parser/model/Transition.java new file mode 100644 index 000000000..fd3c26f2c --- /dev/null +++ b/languages/scxml/src/main/java/de/jplag/scxml/parser/model/Transition.java @@ -0,0 +1,47 @@ +package de.jplag.scxml.parser.model; + +import de.jplag.scxml.parser.model.executable_content.ExecutableContent; + +import java.util.List; +import java.util.Objects; + +public record Transition(String target, String event, String cond, List contents, + boolean timed) implements StatechartElement { + + public static Transition makeTimed(Transition transition) { + return new Transition(transition.target, null, transition.cond, transition.contents, true); + } + + public boolean isInitial() { + return target != null && event == null && cond == null; + } + + public boolean isGuarded() { + return cond != null; + } + + public boolean isTimed() { + return timed; + } + + @Override + public String toString() { + String prefix = isTimed() ? "Timed t" : "T"; + String suffix; + if (event == null && cond == null) { + suffix = ""; + } else if (event != null && cond != null) { + suffix = String.format("(event='%s', cond='%s')", event, cond); + } else if (event != null) { + suffix = String.format("(event='%s')", event); + } else { + suffix = String.format("(cond='%s')", cond); + } + return String.format("%sransition (-> %s) %s {", prefix, target, suffix); + } + + @Override + public int hashCode() { + return Objects.hash(target, event, cond, contents, timed); + } +} diff --git a/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/Action.java b/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/Action.java new file mode 100644 index 000000000..d0195e0a0 --- /dev/null +++ b/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/Action.java @@ -0,0 +1,22 @@ +package de.jplag.scxml.parser.model.executable_content; + +import java.util.List; +import java.util.Objects; + +public record Action(Type type, List contents) implements ExecutableContent { + + @Override + public int hashCode() { + return Objects.hash(type, contents); + } + + @Override + public String toString() { + return String.format("Action (type=%s) {", type == Type.ON_ENTRY ? "OnEntry" : "OnExit"); + } + + public enum Type { + ON_ENTRY, + ON_EXIT, + } +} diff --git a/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/Cancel.java b/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/Cancel.java new file mode 100644 index 000000000..b6852236c --- /dev/null +++ b/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/Cancel.java @@ -0,0 +1,9 @@ +package de.jplag.scxml.parser.model.executable_content; + +public record Cancel(String sendid) implements ExecutableContent { + + @Override + public String toString() { + return "Cancel"; + } +} diff --git a/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/Else.java b/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/Else.java new file mode 100644 index 000000000..45d120864 --- /dev/null +++ b/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/Else.java @@ -0,0 +1,11 @@ +package de.jplag.scxml.parser.model.executable_content; + +import java.util.List; + +public record Else(List contents) implements ExecutableContent { + + @Override + public String toString() { + return "Else"; + } +} diff --git a/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/ElseIf.java b/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/ElseIf.java new file mode 100644 index 000000000..a9deea351 --- /dev/null +++ b/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/ElseIf.java @@ -0,0 +1,11 @@ +package de.jplag.scxml.parser.model.executable_content; + +import java.util.List; + +public record ElseIf(List contents) implements ExecutableContent { + + @Override + public String toString() { + return "ElseIf"; + } +} diff --git a/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/ExecutableContent.java b/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/ExecutableContent.java new file mode 100644 index 000000000..892ba09fc --- /dev/null +++ b/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/ExecutableContent.java @@ -0,0 +1,52 @@ +package de.jplag.scxml.parser.model.executable_content; + +import de.jplag.scxml.parser.model.StatechartElement; +import de.jplag.scxml.parser.util.NodeUtil; +import org.w3c.dom.Node; + +import java.util.Set; + +import static de.jplag.scxml.parser.model.executable_content.SimpleExecutableContent.Type.*; + +public interface ExecutableContent extends StatechartElement { + + /** + * Defines the set of allowed XML element names that are considered + * valid executable content. + * and elements are not allowed as they may only present + * as children of an element. + */ + Set ALLOWED_XML_ELEMENTS = Set.of( + "raise", "if", "foreach", "log", "assign", "script", "send", "cancel" + ); + + String ELSE_ELEMENT = "else"; + String EVENT_ATTRIBUTE = "event"; + String SEND_ID_ATTRIBUTE = "sendid"; + String DELAY_ATTRIBUTE = "delay"; + + /** + * Constructs a concrete instance of ExecutableContent based on the name + * of the given node. + * + * @param node the node to create the ExecutableContent from + * @return the constructed ExecutableContent + * @throws IllegalArgumentException if the node name is not allowed or the + * executable content could not be created + */ + static ExecutableContent fromNode(Node node) throws IllegalArgumentException { + return switch (node.getNodeName()) { + case "if" -> If.fromNode(node); + case "raise" -> new SimpleExecutableContent(RAISE); + case "assign" -> new SimpleExecutableContent(ASSIGNMENT); + case "script" -> new SimpleExecutableContent(SCRIPT); + case "foreach" -> new SimpleExecutableContent(FOREACH); + case "log" -> new SimpleExecutableContent(LOG); + case "send" -> + new Send(NodeUtil.getAttribute(node, EVENT_ATTRIBUTE), NodeUtil.getAttribute(node, DELAY_ATTRIBUTE)); + case "cancel" -> new Cancel(NodeUtil.getAttribute(node, SEND_ID_ATTRIBUTE)); + default -> + throw new IllegalArgumentException("ExecutableContent.fromNode: invalid node " + node.getNodeName() + node.getParentNode().getNodeName()); + }; + } +} diff --git a/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/ForEach.java b/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/ForEach.java new file mode 100644 index 000000000..9db18cc2c --- /dev/null +++ b/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/ForEach.java @@ -0,0 +1,11 @@ +package de.jplag.scxml.parser.model.executable_content; + +import java.util.List; + +public record ForEach(List contents) implements ExecutableContent { + + @Override + public String toString() { + return "ForEach"; + } +} diff --git a/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/If.java b/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/If.java new file mode 100644 index 000000000..ffee68db0 --- /dev/null +++ b/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/If.java @@ -0,0 +1,104 @@ +package de.jplag.scxml.parser.model.executable_content; + +import de.jplag.scxml.parser.util.NodeUtil; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; + +import java.util.ArrayList; +import java.util.List; +import java.util.Set; + +public record If(String cond, List contents, List elseIfs, + Else else_) implements ExecutableContent { + + private static final Set ALLOWED_CONTENTS = Set.of( + "raise", "if", "foreach", "log", "assign", "script", "send", "cancel" + ); + + private static final String IF_ELEMENT = "if"; + private static final String ELSEIF_ELEMENT = "elseif"; + private static final String COND_ATTRIBUTE = "cond"; + + public If(String cond, ExecutableContent... contents) { + this(cond, new ArrayList<>(List.of(contents)), new ArrayList<>(), null); + } + + private static void addBranch(String branch, List contents, List elseIfs, List elses) { + if (branch.equals(ELSEIF_ELEMENT)) { + elseIfs.add(new ElseIf(contents)); + } else if (branch.equals(ELSE_ELEMENT)) { + elses.add(new Else(contents)); + } + } + + /** + * Constructs an If statechart element from a given node + * with optional ElseIf or Else branches. + * The W3C SCXML specification defines a valid {@literal } element as follows: + *

      + *

      +     * {@code
      +     * 
      +     *   
      +     * 
      +     *   
      +     * 
      +     *   
      +     * 
      +     *   
      +     * 
      +     * }
      +     * 
      + *

      + * This syntax requires more complicated parsing as the branches and + * executable contents within each branch are defined on the same level. + * + * @param node the node to create the If object from. Must + * contain at least one {@literal } element and optionally + * {@literal } or {@literal } tags. + * @throws IllegalArgumentException when more than one {@literal } + * statement is present + */ + public static If fromNode(Node node) throws IllegalArgumentException { + NodeList childNodes = node.getChildNodes(); + List elseIfs = new ArrayList<>(); + List elses = new ArrayList<>(); + + String curBranch = IF_ELEMENT; + List curContents = new ArrayList<>(); + List ifContents = new ArrayList<>(); + + for (int i = 0; i < childNodes.getLength(); i++) { + Node childNode = childNodes.item(i); + String nodeName = childNode.getNodeName(); + if (nodeName.equals(ELSEIF_ELEMENT) || nodeName.equals(ELSE_ELEMENT)) { + if (curBranch.equals(IF_ELEMENT)) { + ifContents = new ArrayList<>(curContents); + } + + addBranch(curBranch, curContents, elseIfs, elses); + curBranch = nodeName; + curContents.clear(); + } else if (ALLOWED_CONTENTS.contains(nodeName)) { + curContents.add(ExecutableContent.fromNode(childNode)); + } + } + + if (curBranch.equals(IF_ELEMENT)) { + ifContents = curContents; + } else { + // Close the last branch, if there is any + addBranch(curBranch, curContents, elseIfs, elses); + } + + if (elses.size() > 1) { + throw new IllegalArgumentException(" element may only contain at most one else branch"); + } + return new If(NodeUtil.getAttribute(node, COND_ATTRIBUTE), ifContents, elseIfs, elses.isEmpty() ? null : elses.get(0)); + } + + @Override + public String toString() { + return "If"; + } +} diff --git a/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/Send.java b/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/Send.java new file mode 100644 index 000000000..d2a8fff8c --- /dev/null +++ b/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/Send.java @@ -0,0 +1,9 @@ +package de.jplag.scxml.parser.model.executable_content; + +public record Send(String event, String delay) implements ExecutableContent { + + @Override + public String toString() { + return String.format("Send (event='%s', delay='%s') {", event, delay); + } +} diff --git a/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/SimpleExecutableContent.java b/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/SimpleExecutableContent.java new file mode 100644 index 000000000..569b5a6a9 --- /dev/null +++ b/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/SimpleExecutableContent.java @@ -0,0 +1,23 @@ +package de.jplag.scxml.parser.model.executable_content; + +/** + * Represents simple executable content as defined in + * sections 4.2 - 4.7 of the SCXML specification. + * Other executable content is defined in the subclasses {@link Action}, {@link Send}, {@link Cancel}, + * {@link If}, {@link ElseIf} and {@link Else}. + */ +public record SimpleExecutableContent(Type type) implements ExecutableContent { + + @Override + public String toString() { + return String.format("SimpleExecutableContent (type=%s) {", type); + } + + public enum Type { + RAISE, + ASSIGNMENT, + SCRIPT, + FOREACH, + LOG + } +} diff --git a/languages/scxml/src/main/java/de/jplag/scxml/parser/util/NodeUtil.java b/languages/scxml/src/main/java/de/jplag/scxml/parser/util/NodeUtil.java new file mode 100644 index 000000000..e8d0ae89e --- /dev/null +++ b/languages/scxml/src/main/java/de/jplag/scxml/parser/util/NodeUtil.java @@ -0,0 +1,95 @@ +package de.jplag.scxml.parser.util; + +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; + +import java.util.ArrayList; +import java.util.List; +import java.util.Set; + +/** + * Utility class providing methods for conveniently iterating over nodes in the DOM. + */ +public final class NodeUtil { + + /** + * Iterates over all immediate child nodes of the given root node and returns + * a list of child nodes whose node names match any of the provided node names. + * + * @param root the root node + * @param childNames a set of child node names to consider + * @return a list of matching child nodes + */ + public static List getChildNodes(Node root, Set childNames) { + List matchingChildren = new ArrayList<>(); + NodeList children = root.getChildNodes(); + for (int i = 0; i < children.getLength(); i++) { + Node child = children.item(i); + if (childNames.contains(child.getNodeName())) { + matchingChildren.add(children.item(i)); + } + } + // matchingChildren.sort(Comparator.comparing(Node::getNodeName)); + return matchingChildren; + } + + /** + * Iterates over all immediate direct child nodes of the given root node and returns + * a list of child nodes whose node names match the provided node name. + * + * @param root the root node + * @param childName the child node name to consider + * @return a list of matching child nodes + */ + public static List getChildNodes(Node root, String childName) { + return getChildNodes(root, Set.of(childName)); + } + + /** + * Iterates over all immediate direct child nodes of the given root node and returns + * the first child node whose node name matches the provided node name. + * If there are no matching nodes, null is returned. + * + * @param root the root node + * @param childName the node name to consider + * @return the first matching child node, or null if none are found + */ + public static Node getFirstChild(Node root, String childName) { + List children = getChildNodes(root, Set.of(childName)); + return children.isEmpty() ? null : children.get(0); + } + + /** + * Recursively iterates over all child nodes of the given root node and returns + * a list of child nodes whose node names match the provided node name. + * + * @param root the root node + * @param childName the node name to consider + * @return a list of matching child nodes + */ + public static List getNodesRecursive(Node root, String childName) { + List matchingNodes = new ArrayList<>(); + if (root.getNodeName().equals(childName)) { + matchingNodes.add(root); + } + NodeList children = root.getChildNodes(); + for (int i = 0; i < children.getLength(); i++) { + Node child = children.item(i); + matchingNodes.addAll(getNodesRecursive(child, childName)); + } + return matchingNodes; + } + + /** + * @return the value of the attribute specified by name + * of the given node or null if the node does not contain + * an attribute with this name + */ + public static String getAttribute(Node node, String name) { + Node attribute = node.getAttributes().getNamedItem(name); + if (attribute != null) { + return attribute.getNodeValue(); + } + return null; + } +} diff --git a/languages/scxml/src/main/java/de/jplag/scxml/sorting/NoOpSortingStrategy.java b/languages/scxml/src/main/java/de/jplag/scxml/sorting/NoOpSortingStrategy.java new file mode 100644 index 000000000..cfd5816d4 --- /dev/null +++ b/languages/scxml/src/main/java/de/jplag/scxml/sorting/NoOpSortingStrategy.java @@ -0,0 +1,17 @@ +package de.jplag.scxml.sorting; + +import de.jplag.scxml.parser.model.StatechartElement; + +import java.util.List; + +/** + * A sorting strategy that returns the provided statechart elements unchanged. + * Can be used in the parser adapter to disable sorting entirely. + */ +public class NoOpSortingStrategy implements SortingStrategy { + + @Override + public List sort(List statechartElements) { + return statechartElements; + } +} diff --git a/languages/scxml/src/main/java/de/jplag/scxml/sorting/RecursiveSortingStrategy.java b/languages/scxml/src/main/java/de/jplag/scxml/sorting/RecursiveSortingStrategy.java new file mode 100644 index 000000000..cc3efdaa8 --- /dev/null +++ b/languages/scxml/src/main/java/de/jplag/scxml/sorting/RecursiveSortingStrategy.java @@ -0,0 +1,38 @@ +package de.jplag.scxml.sorting; + +import de.jplag.scxml.parser.PeekAdapter; +import de.jplag.scxml.parser.model.StatechartElement; +import de.jplag.scxml.util.AbstractScxmlVisitor; + +import java.util.List; + +/** + * This sorting strategy lexicographically sorts the list of statechart elements by + * the token streams they were to produce without affecting the main token stream. + * This implies that child elements of nested model objects have an effect on the + * final token order. The tokens are sorted by the ordinals of their types + * using {@link PeekAdapter#compareTokenTypeLists(List, List)}. + */ +public class RecursiveSortingStrategy implements SortingStrategy { + + private final AbstractScxmlVisitor visitor; + + /** + * Constructs a new sorter based on the recursive strategy. + * + * @param visitor the visitor used to peek tokens + */ + public RecursiveSortingStrategy(AbstractScxmlVisitor visitor) { + this.visitor = visitor; + } + + @Override + public List sort(List statechartElements) { + statechartElements.sort((v1, v2) -> { + List v1TokenOrdinals = visitor.peekTokens(v1); + List v2TokenOrdinals = visitor.peekTokens(v2); + return PeekAdapter.compareTokenTypeLists(v1TokenOrdinals, v2TokenOrdinals); + }); + return statechartElements; + } +} diff --git a/languages/scxml/src/main/java/de/jplag/scxml/sorting/SimpleSortingStrategy.java b/languages/scxml/src/main/java/de/jplag/scxml/sorting/SimpleSortingStrategy.java new file mode 100644 index 000000000..9069a9402 --- /dev/null +++ b/languages/scxml/src/main/java/de/jplag/scxml/sorting/SimpleSortingStrategy.java @@ -0,0 +1,36 @@ +package de.jplag.scxml.sorting; + +import de.jplag.scxml.parser.model.StatechartElement; +import de.jplag.scxml.util.AbstractScxmlVisitor; + +import java.util.List; + + +/** + * This sorting strategy sorts the list of StatechartElements by the ordinal + * of the first token that was to be extracted without affecting the main token stream. + * This implies that child elements of nested model objects do not change the token order. + */ +public class SimpleSortingStrategy implements SortingStrategy { + + private final AbstractScxmlVisitor visitor; + + /** + * Constructs a new sorter based on the simple strategy. + * + * @param visitor the visitor used to peek tokens + */ + public SimpleSortingStrategy(AbstractScxmlVisitor visitor) { + this.visitor = visitor; + } + + @Override + public List sort(List statechartElements) { + statechartElements.sort((v1, v2) -> { + int v1FirstTokenOrdinal = visitor.peekTokens(v1).get(0); + int v2FirstTokenOrdinal = visitor.peekTokens(v2).get(0); + return Integer.compare(v1FirstTokenOrdinal, v2FirstTokenOrdinal); + }); + return statechartElements; + } +} diff --git a/languages/scxml/src/main/java/de/jplag/scxml/sorting/SortingStrategy.java b/languages/scxml/src/main/java/de/jplag/scxml/sorting/SortingStrategy.java new file mode 100644 index 000000000..ccb62a4b8 --- /dev/null +++ b/languages/scxml/src/main/java/de/jplag/scxml/sorting/SortingStrategy.java @@ -0,0 +1,18 @@ +package de.jplag.scxml.sorting; + +import de.jplag.scxml.parser.model.StatechartElement; + +import java.util.List; + +/** + * Represents a sorting strategy for sorting statechart elements. + */ +public interface SortingStrategy { + + /** + * Sorts a list of statechart elements. + * + * @param statechartElements the list of statechart elements to sort + */ + List sort(List statechartElements); +} diff --git a/languages/scxml/src/main/java/de/jplag/scxml/util/AbstractScxmlVisitor.java b/languages/scxml/src/main/java/de/jplag/scxml/util/AbstractScxmlVisitor.java new file mode 100644 index 000000000..612a7f3f7 --- /dev/null +++ b/languages/scxml/src/main/java/de/jplag/scxml/util/AbstractScxmlVisitor.java @@ -0,0 +1,109 @@ +package de.jplag.scxml.util; + +import de.jplag.scxml.parser.PeekAdapter; +import de.jplag.scxml.parser.ScxmlParserAdapter; +import de.jplag.scxml.parser.model.State; +import de.jplag.scxml.parser.model.Statechart; +import de.jplag.scxml.parser.model.StatechartElement; +import de.jplag.scxml.parser.model.Transition; +import de.jplag.scxml.parser.model.executable_content.*; +import de.jplag.scxml.sorting.RecursiveSortingStrategy; +import de.jplag.scxml.sorting.SortingStrategy; + +import java.util.List; +import java.util.Map; +import java.util.function.Consumer; + +/** + * Visitor for all StatechartElements in a Statechart object. + */ +public abstract class AbstractScxmlVisitor { + + protected ScxmlParserAdapter adapter; + protected SortingStrategy sorter; + protected int depth; + + public AbstractScxmlVisitor(ScxmlParserAdapter adapter) { + this.adapter = adapter; + this.sorter = new RecursiveSortingStrategy(this); + } + + /** + * Sets the current sorting strategy for this visitor. + * + * @param sorter the sorter to use for sorting nested + * statechart elements before extracting tokens for them + */ + public void setSorter(SortingStrategy sorter) { + this.sorter = sorter; + } + + /** + * Visits a statechart element without effecting the main + * token stream by temporarily swapping out the current parser + * adapter. Returns a list of collected token type ordinals. + * + * @param element the statechart element to visit + */ + public List peekTokens(StatechartElement element) { + ScxmlParserAdapter prevAdapter = this.adapter; + PeekAdapter peekAdapter = new PeekAdapter(); + // Switch out the main adapter for the peek adapter + // so that the main token stream is not affected + this.adapter = peekAdapter; + visit(element); + this.adapter = prevAdapter; + return peekAdapter.getTokenTypes(); + } + + /** + * Returns the current depth in the statechart. The depth is incremented + * whenever child elements of a nested statechart element are visited + * and decremented after all child elements have been visited. + * + * @return the current depth in the statechart + */ + public int getCurrentStatechartDepth() { + return depth; + } + + /** + * Visits the given statechart element while adding extracted tokens + * to the current parser adapter. + * + * @throws IllegalArgumentException when the statechart element is of a type that is not currently handled + */ + public final void visit(StatechartElement element) throws IllegalArgumentException { + Map, Consumer> visitorMap = Map.of( + Statechart.class, e -> visitStatechart((Statechart) e), + State.class, e -> visitState((State) e), + If.class, e -> visitIf((If) e), + SimpleExecutableContent.class, e -> visitSimpleExecutableContent((SimpleExecutableContent) e), + ExecutableContent.class, e -> visitExecutableContent((ExecutableContent) e), + Transition.class, e -> visitTransition((Transition) e) + ); + if (!visitorMap.containsKey(element.getClass())) { + throw new IllegalArgumentException("AbstractScxmlVisitor.visit: unhandled class " + element.getClass()); + } + visitorMap.get(element.getClass()).accept(element); + } + + protected abstract void visitStatechart(Statechart statechart); + + protected abstract void visitState(State state); + + protected abstract void visitActions(List actions); + + protected abstract void visitIf(If if_); + + protected abstract void visitElseIf(ElseIf elseIf); + + protected abstract void visitElse(Else else_); + + protected abstract void visitExecutableContent(ExecutableContent content); + + protected abstract void visitSimpleExecutableContent(SimpleExecutableContent content); + + protected abstract void visitTransition(Transition transition); + +} diff --git a/languages/scxml/src/main/java/de/jplag/scxml/util/ScxmlView.java b/languages/scxml/src/main/java/de/jplag/scxml/util/ScxmlView.java new file mode 100644 index 000000000..c3c81512f --- /dev/null +++ b/languages/scxml/src/main/java/de/jplag/scxml/util/ScxmlView.java @@ -0,0 +1,68 @@ +package de.jplag.scxml.util; + +import de.jplag.scxml.ScxmlToken; +import de.jplag.scxml.ScxmlTokenType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; + +public class ScxmlView { + + private final File file; + private final StringBuilder builder; + private final Logger logger; + private int line; + + /** + * Constructs a new ScxmlView that turns tokens into a textual representation. + * The provided input file determines the path of the output view file. + * + * @param file the input file corresponding to this view + */ + public ScxmlView(File file) { + this.file = file; + this.builder = new StringBuilder(); + this.logger = LoggerFactory.getLogger(this.getClass()); + this.line = 1; + } + + /** + * Writes the current view file contents to the file specified in the constructor. + * + * @param fileExtension the extension to use for the name of the view file + */ + public void writeToFile(String fileExtension) { + File viewFile = new File(file.toString() + fileExtension); + try (BufferedWriter writer = new BufferedWriter(new FileWriter(viewFile))) { + if (!viewFile.createNewFile()) { + logger.warn("Overwriting statechart view file: {}", viewFile); + } + writer.append(builder.toString()); + } catch (IOException exception) { + logger.error("Could not write statechart view file!", exception); + } + } + + /** + * Enhances the given token by adding information about the + * line and column numbers in the view file. + * At the same time, the contents of the file are constructed. + * + * @param token the token to enhance and add to the view file + * @param depth current depth in the statechart to determine the indent + * in the view file + */ + public ScxmlToken enhanceToken(ScxmlToken token, int depth) { + String prefix = " ".repeat(depth); + ScxmlTokenType type = (ScxmlTokenType) token.getType(); + String element = token.getStatechartElement() == null ? "" : token.getStatechartElement().toString(); + String content = type.isEndToken() ? "}" : element; + builder.append(prefix).append(content).append("\n"); + // TokenTrace trace = new TokenTrace(line++, prefix.length() + 1, content.length()); + return new ScxmlToken(token.getType(), token.getFile(), line++, prefix.length() + 1, content.length(), token.getStatechartElement()); + } +} diff --git a/languages/scxml/src/test/java/de/jplag/scxml/ConfigurableScxmlParserAdapter.java b/languages/scxml/src/test/java/de/jplag/scxml/ConfigurableScxmlParserAdapter.java new file mode 100644 index 000000000..931844560 --- /dev/null +++ b/languages/scxml/src/test/java/de/jplag/scxml/ConfigurableScxmlParserAdapter.java @@ -0,0 +1,17 @@ +package de.jplag.scxml; + +import de.jplag.scxml.parser.ScxmlParserAdapter; +import de.jplag.scxml.sorting.SortingStrategy; +import de.jplag.scxml.util.AbstractScxmlVisitor; + +public class ConfigurableScxmlParserAdapter extends ScxmlParserAdapter { + + public void configure(AbstractScxmlVisitor visitor, SortingStrategy sorter) { + visitor.setSorter(sorter); + this.visitor = visitor; + } + + public void setSorter(SortingStrategy sortingStrategy) { + this.visitor.setSorter(sortingStrategy); + } +} diff --git a/languages/scxml/src/test/java/de/jplag/scxml/ScxmlParserTest.java b/languages/scxml/src/test/java/de/jplag/scxml/ScxmlParserTest.java new file mode 100644 index 000000000..3a3263247 --- /dev/null +++ b/languages/scxml/src/test/java/de/jplag/scxml/ScxmlParserTest.java @@ -0,0 +1,135 @@ +package de.jplag.scxml; + +import de.jplag.ParsingException; +import de.jplag.scxml.parser.ScxmlParser; +import de.jplag.scxml.parser.model.State; +import de.jplag.scxml.parser.model.Statechart; +import de.jplag.scxml.parser.model.Transition; +import de.jplag.scxml.parser.model.executable_content.*; +import de.jplag.scxml.util.StateBuilder; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.xml.sax.SAXException; + +import javax.xml.parsers.ParserConfigurationException; +import java.io.File; +import java.io.IOException; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; + +import static de.jplag.scxml.parser.model.executable_content.SimpleExecutableContent.Type.ASSIGNMENT; +import static de.jplag.scxml.parser.model.executable_content.SimpleExecutableContent.Type.SCRIPT; +import static org.assertj.core.api.Assertions.assertThat; + +class ScxmlParserTest { + + private static final Path BASE_PATH = Path.of("src", "test", "resources", "de", "jplag", "statecharts"); + private final File baseDirectory = BASE_PATH.toFile(); + + private static final String[] TEST_SUBJECTS = {"simple.scxml", "timed_transition.scxml", "conditional.scxml", "complex.scxml"}; + + // Helper methods for less verbose construction of transitions + + private static Transition transition(String target, String event, List contents) { + return new Transition(target, event, null, contents, false); + } + + private static Transition transition(String target, List contents) { + return transition(target, null, contents); + } + + private static Transition transition(String target, String event, String cond) { + return new Transition(target, event, cond, new ArrayList<>(), false); + } + + private static Transition transition(String target, String event) { + return transition(target, event, new ArrayList<>()); + } + + private static Transition transition(String target) { + return transition(target, (String) null); + } + + @BeforeEach + public void setUp() { + ScxmlLanguage language = new ScxmlLanguage(); + // FileUtil.assertDirectory(baseDirectory, TEST_SUBJECTS); + } + + @Test + void canParseSimpleStatechart() throws ParsingException, ParserConfigurationException, SAXException, IOException { + File testFile = new File(baseDirectory, TEST_SUBJECTS[0]); + Statechart actual = new ScxmlParser().parse(testFile); + + State start = new StateBuilder("Start").setInitial().addTransitions(transition("Blinking", "user.press_button")).build(); + State mainRegion = new StateBuilder("main_region").addSubstates(start).build(); + Statechart expected = new Statechart("Statechart", List.of(mainRegion)); + assertThat(actual).usingRecursiveComparison().isEqualTo(expected); + } + + @Test + void canParseTimedTransition() throws ParsingException, ParserConfigurationException, SAXException, IOException { + File testFile = new File(baseDirectory, TEST_SUBJECTS[1]); + Statechart actual = new ScxmlParser().parse(testFile); + + State start = new StateBuilder("Start").addTransitions(Transition.makeTimed(transition("Next", List.of(new SimpleExecutableContent(SCRIPT))))).build(); + Statechart expected = new Statechart("Statechart", List.of(start)); + assertThat(actual).usingRecursiveComparison().isEqualTo(expected); + } + + @Test + void canParseConditional() throws ParserConfigurationException, ParsingException, IOException, SAXException { + File testFile = new File(baseDirectory, TEST_SUBJECTS[2]); + Statechart actual = new ScxmlParser().parse(testFile); + ElseIf elseIf = new ElseIf(List.of(new SimpleExecutableContent(SimpleExecutableContent.Type.RAISE))); + Else _else = new Else(List.of(new SimpleExecutableContent(SimpleExecutableContent.Type.RAISE))); + If firstIf = new If("counter % 3 == 0", List.of(new Send("toggleB", "1s")), List.of(elseIf), _else); + If secondIf = new If("cond", List.of(new SimpleExecutableContent(ASSIGNMENT)), new ArrayList<>(), null); + + State start = new StateBuilder("Start").addOnEntry(firstIf, secondIf).build(); + Statechart expected = new Statechart("statechart", List.of(start)); + assertThat(actual).usingRecursiveComparison().isEqualTo(expected); + } + + @Test + void canParseComplexStatechart() throws ParsingException, ParserConfigurationException, SAXException, IOException { + File testFile = new File(baseDirectory, TEST_SUBJECTS[3]); + Statechart actual = new ScxmlParser().parse(testFile); + + State start = new StateBuilder("Start").setInitial() + .addTransitions(transition("Blinking", "user.press_button", List.of(new SimpleExecutableContent(ASSIGNMENT)))).build(); + + State light = new StateBuilder("Light") + .addTransitions(transition("Dark")) + .addOnEntry(new If("true", new SimpleExecutableContent(ASSIGNMENT))).build(); + + State dark = new StateBuilder("Dark") + .addTransitions( + transition("Start", null, "t == 5"), + transition("Light", "C") + ) + .addOnEntry(new Send("A", "1s")) + .addOnExit(new Cancel("B")).build(); + + State blinking = new StateBuilder("Blinking") + .addSubstates(light, dark) + .addTransitions(transition("Start", "user.press_button")) + .addOnEntry(new SimpleExecutableContent(ASSIGNMENT)).build(); + + State mainRegion = new StateBuilder("main_region").addSubstates(start, blinking).build(); + Statechart expected = new Statechart("Statechart", List.of(mainRegion)); + assertThat(actual).usingRecursiveComparison().isEqualTo(expected); + } + +} + + + + + + + + + + diff --git a/languages/scxml/src/test/java/de/jplag/scxml/ScxmlTokenGeneratorTest.java b/languages/scxml/src/test/java/de/jplag/scxml/ScxmlTokenGeneratorTest.java new file mode 100644 index 000000000..d966418db --- /dev/null +++ b/languages/scxml/src/test/java/de/jplag/scxml/ScxmlTokenGeneratorTest.java @@ -0,0 +1,64 @@ +package de.jplag.scxml; + +import de.jplag.ParsingException; +import de.jplag.Token; +import de.jplag.TokenType; +import de.jplag.scxml.parser.ScxmlParserAdapter; +import de.jplag.scxml.parser.SimpleScxmlTokenGenerator; +import de.jplag.scxml.sorting.NoOpSortingStrategy; +import de.jplag.scxml.sorting.RecursiveSortingStrategy; +import de.jplag.scxml.util.AbstractScxmlVisitor; +import org.junit.jupiter.api.Test; + +import java.io.File; +import java.nio.file.Path; +import java.util.List; +import java.util.Set; + +import static de.jplag.SharedTokenType.FILE_END; +import static de.jplag.scxml.ScxmlTokenType.*; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.assertj.core.api.Assertions.assertThat; + +public class ScxmlTokenGeneratorTest { + + private static final Path BASE_PATH = Path.of("src", "test", "resources", "de", "jplag", "statecharts"); + private static final String[] TEST_SUBJECTS = {"complex.scxml", "reordered.scxml", "coverage.scxml"}; + private final File baseDirectory = BASE_PATH.toFile(); + + private List getTokenTypes(ScxmlParserAdapter adapter, File testFile) throws ParsingException { + return adapter.parse(Set.of(testFile)).stream().map(Token::getType).toList(); + } + + @Test + void testRecursiveSorter() throws ParsingException { + File originalTestFile = new File(baseDirectory, TEST_SUBJECTS[0]); + ConfigurableScxmlParserAdapter adapter = new ConfigurableScxmlParserAdapter(); + AbstractScxmlVisitor visitor = new SimpleScxmlTokenGenerator(adapter); + adapter.configure(visitor, new NoOpSortingStrategy()); + + List expectedTokenTypes = List.of( + STATE, STATE, TRANSITION, ASSIGNMENT, TRANSITION_END, STATE_END, STATE, ON_ENTRY, ASSIGNMENT, + ACTION_END, TRANSITION, TRANSITION_END, STATE, ON_ENTRY, IF, ASSIGNMENT, IF_END, ACTION_END, TRANSITION, + TRANSITION_END, STATE_END, STATE, ON_ENTRY, SEND, ACTION_END, ON_EXIT, CANCEL, ACTION_END, TRANSITION, + TRANSITION_END, TRANSITION, TRANSITION_END, STATE_END, STATE_END, STATE_END, FILE_END + ); + + List originalTokenTypes = getTokenTypes(adapter, originalTestFile); + assertEquals(expectedTokenTypes, originalTokenTypes); + adapter.setSorter(new RecursiveSortingStrategy(visitor)); + + File reorderedTestFile = new File(baseDirectory, TEST_SUBJECTS[1]); + List reorderedTokenTypes = getTokenTypes(adapter, reorderedTestFile); + // Check that the token sequences is the same when applying the recursive sorter on the reordered file + assertEquals(expectedTokenTypes, reorderedTokenTypes); + } + + @Test + void testCoverage() throws ParsingException { + File testFile = new File(baseDirectory, TEST_SUBJECTS[2]); + ScxmlParserAdapter adapter = new ScxmlParserAdapter(); + List tokenTypes = getTokenTypes(adapter, testFile); + assertThat(tokenTypes).containsExactlyInAnyOrder(ScxmlTokenType.values()); + } +} diff --git a/languages/scxml/src/test/java/de/jplag/scxml/util/StateBuilder.java b/languages/scxml/src/test/java/de/jplag/scxml/util/StateBuilder.java new file mode 100644 index 000000000..23da9f296 --- /dev/null +++ b/languages/scxml/src/test/java/de/jplag/scxml/util/StateBuilder.java @@ -0,0 +1,58 @@ +package de.jplag.scxml.util; + +import de.jplag.scxml.parser.model.State; +import de.jplag.scxml.parser.model.Transition; +import de.jplag.scxml.parser.model.executable_content.Action; +import de.jplag.scxml.parser.model.executable_content.ExecutableContent; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class StateBuilder { + + private final String id; + private final List actions = new ArrayList<>(); + private ArrayList transitions = new ArrayList<>(); + private List substates = new ArrayList<>(); + private boolean initial; + private boolean parallel; + + public StateBuilder(String id) { + this.id = id; + } + + public StateBuilder setParallel() { + parallel = true; + return this; + } + + public StateBuilder setInitial() { + initial = true; + return this; + } + + public StateBuilder addTransitions(Transition... transitions) { + this.transitions = new ArrayList<>(List.of(transitions)); + return this; + } + + public StateBuilder addSubstates(State... substates) { + this.substates = Arrays.asList(substates); + return this; + } + + public StateBuilder addOnEntry(ExecutableContent... contents) { + this.actions.add(new Action(Action.Type.ON_ENTRY, List.of(contents))); + return this; + } + + public StateBuilder addOnExit(ExecutableContent... contents) { + this.actions.add(new Action(Action.Type.ON_EXIT, List.of(contents))); + return this; + } + + public State build() { + return new State(id, transitions, substates, actions, initial, parallel); + } +} diff --git a/languages/scxml/src/test/resources/de/jplag/statecharts/complex.scxml b/languages/scxml/src/test/resources/de/jplag/statecharts/complex.scxml new file mode 100644 index 000000000..5b8a93d7f --- /dev/null +++ b/languages/scxml/src/test/resources/de/jplag/statecharts/complex.scxml @@ -0,0 +1,50 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/languages/scxml/src/test/resources/de/jplag/statecharts/conditional.scxml b/languages/scxml/src/test/resources/de/jplag/statecharts/conditional.scxml new file mode 100644 index 000000000..e5cbee3ea --- /dev/null +++ b/languages/scxml/src/test/resources/de/jplag/statecharts/conditional.scxml @@ -0,0 +1,20 @@ + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/languages/scxml/src/test/resources/de/jplag/statecharts/coverage.scxml b/languages/scxml/src/test/resources/de/jplag/statecharts/coverage.scxml new file mode 100644 index 000000000..e37c013d7 --- /dev/null +++ b/languages/scxml/src/test/resources/de/jplag/statecharts/coverage.scxml @@ -0,0 +1,77 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/languages/scxml/src/test/resources/de/jplag/statecharts/reordered.scxml b/languages/scxml/src/test/resources/de/jplag/statecharts/reordered.scxml new file mode 100644 index 000000000..328450903 --- /dev/null +++ b/languages/scxml/src/test/resources/de/jplag/statecharts/reordered.scxml @@ -0,0 +1,49 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/languages/scxml/src/test/resources/de/jplag/statecharts/simple.scxml b/languages/scxml/src/test/resources/de/jplag/statecharts/simple.scxml new file mode 100644 index 000000000..895c56c58 --- /dev/null +++ b/languages/scxml/src/test/resources/de/jplag/statecharts/simple.scxml @@ -0,0 +1,16 @@ + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/languages/scxml/src/test/resources/de/jplag/statecharts/timed_transition.scxml b/languages/scxml/src/test/resources/de/jplag/statecharts/timed_transition.scxml new file mode 100644 index 000000000..da609a5ff --- /dev/null +++ b/languages/scxml/src/test/resources/de/jplag/statecharts/timed_transition.scxml @@ -0,0 +1,14 @@ + + + + + + + + + + + + + + From 5d56168e0673100610c9a0142a7ed12109afb952 Mon Sep 17 00:00:00 2001 From: smjonas Date: Mon, 17 Apr 2023 14:29:32 +0200 Subject: [PATCH 067/132] fixup: run spotless:apply --- .../java/de/jplag/scxml/ScxmlLanguage.java | 13 ++-- .../main/java/de/jplag/scxml/ScxmlToken.java | 14 ++-- .../java/de/jplag/scxml/ScxmlTokenType.java | 4 +- .../HandcraftedScxmlTokenGenerator.java | 13 ++-- .../de/jplag/scxml/parser/PeekAdapter.java | 24 +++---- .../de/jplag/scxml/parser/ScxmlParser.java | 55 +++++++--------- .../scxml/parser/ScxmlParserAdapter.java | 39 +++++------ .../parser/SimpleScxmlTokenGenerator.java | 21 +++--- .../de/jplag/scxml/parser/model/State.java | 37 +++++------ .../jplag/scxml/parser/model/Statechart.java | 3 +- .../scxml/parser/model/StatechartElement.java | 4 +- .../jplag/scxml/parser/model/Transition.java | 7 +- .../executable_content/ExecutableContent.java | 33 ++++------ .../parser/model/executable_content/If.java | 35 +++++----- .../SimpleExecutableContent.java | 7 +- .../de/jplag/scxml/parser/util/NodeUtil.java | 42 +++++------- .../scxml/sorting/NoOpSortingStrategy.java | 8 +-- .../sorting/RecursiveSortingStrategy.java | 14 ++-- .../scxml/sorting/SimpleSortingStrategy.java | 12 ++-- .../jplag/scxml/sorting/SortingStrategy.java | 5 +- .../scxml/util/AbstractScxmlVisitor.java | 38 ++++------- .../java/de/jplag/scxml/util/ScxmlView.java | 26 ++++---- .../java/de/jplag/scxml/ScxmlParserTest.java | 65 +++++++------------ .../jplag/scxml/ScxmlTokenGeneratorTest.java | 33 +++++----- .../de/jplag/scxml/util/StateBuilder.java | 8 +-- 25 files changed, 237 insertions(+), 323 deletions(-) diff --git a/languages/scxml/src/main/java/de/jplag/scxml/ScxmlLanguage.java b/languages/scxml/src/main/java/de/jplag/scxml/ScxmlLanguage.java index c315830c6..73bc7e54c 100644 --- a/languages/scxml/src/main/java/de/jplag/scxml/ScxmlLanguage.java +++ b/languages/scxml/src/main/java/de/jplag/scxml/ScxmlLanguage.java @@ -1,14 +1,15 @@ package de.jplag.scxml; -import de.jplag.ParsingException; -import de.jplag.Token; -import de.jplag.scxml.parser.ScxmlParserAdapter; -import org.kohsuke.MetaInfServices; - import java.io.File; import java.util.List; import java.util.Set; +import org.kohsuke.MetaInfServices; + +import de.jplag.ParsingException; +import de.jplag.Token; +import de.jplag.scxml.parser.ScxmlParserAdapter; + /** * Language for statecharts in the State Chart XML (SCXML) format. */ @@ -30,7 +31,7 @@ public ScxmlLanguage() { @Override public String[] suffixes() { - return new String[]{FILE_ENDING}; + return new String[] {FILE_ENDING}; } @Override diff --git a/languages/scxml/src/main/java/de/jplag/scxml/ScxmlToken.java b/languages/scxml/src/main/java/de/jplag/scxml/ScxmlToken.java index 13361a4aa..e33edfc28 100644 --- a/languages/scxml/src/main/java/de/jplag/scxml/ScxmlToken.java +++ b/languages/scxml/src/main/java/de/jplag/scxml/ScxmlToken.java @@ -1,11 +1,11 @@ package de.jplag.scxml; +import java.io.File; + import de.jplag.Token; import de.jplag.TokenType; import de.jplag.scxml.parser.model.StatechartElement; -import java.io.File; - /** * Represents a SCXML token. */ @@ -15,9 +15,8 @@ public class ScxmlToken extends Token { /** * Creates an SCXML token that corresponds to a StatechartElement. - * - * @param type the type of the token - * @param file the source statechart file + * @param type the type of the token + * @param file the source statechart file * @param element the corresponding StatechartElement this token was extracted from */ public ScxmlToken(TokenType type, File file, StatechartElement element) { @@ -27,9 +26,8 @@ public ScxmlToken(TokenType type, File file, StatechartElement element) { /** * Creates an SCXML token that corresponds to a StatechartElement including file information. - * - * @param type the type of the token - * @param file the source statechart file + * @param type the type of the token + * @param file the source statechart file * @param line the line index in the source code where the token resides, 1-based * @param column the column index, meaning where the token starts in the line, 1-based * @param length the length of the token in the view file diff --git a/languages/scxml/src/main/java/de/jplag/scxml/ScxmlTokenType.java b/languages/scxml/src/main/java/de/jplag/scxml/ScxmlTokenType.java index a4e1946b0..2ae588a7a 100644 --- a/languages/scxml/src/main/java/de/jplag/scxml/ScxmlTokenType.java +++ b/languages/scxml/src/main/java/de/jplag/scxml/ScxmlTokenType.java @@ -43,10 +43,8 @@ public enum ScxmlTokenType implements TokenType { } /** - * Creates a statechart token type that may be an end token. - * An end token represents a token that is always added after + * Creates a statechart token type that may be an end token. An end token represents a token that is always added after * all child tokens for a nested token such as STATE. - * * @param isEndToken indicates that the token is an end token */ ScxmlTokenType(String description, boolean isEndToken) { diff --git a/languages/scxml/src/main/java/de/jplag/scxml/parser/HandcraftedScxmlTokenGenerator.java b/languages/scxml/src/main/java/de/jplag/scxml/parser/HandcraftedScxmlTokenGenerator.java index d19292f92..8460a045e 100644 --- a/languages/scxml/src/main/java/de/jplag/scxml/parser/HandcraftedScxmlTokenGenerator.java +++ b/languages/scxml/src/main/java/de/jplag/scxml/parser/HandcraftedScxmlTokenGenerator.java @@ -1,23 +1,20 @@ package de.jplag.scxml.parser; +import static de.jplag.scxml.ScxmlTokenType.*; + import de.jplag.scxml.parser.model.State; import de.jplag.scxml.parser.model.Transition; import de.jplag.scxml.parser.model.executable_content.ExecutableContent; -import static de.jplag.scxml.ScxmlTokenType.*; - /** - * Visits a statechart and its contained elements to extract tokens - * using a handcrafted strategy, i.e. a larger token set than for the - * simple strategy (see {@link SimpleScxmlTokenGenerator}). - * Additional tokens are extracted depending on the attributes of - * the statechart elements. + * Visits a statechart and its contained elements to extract tokens using a handcrafted strategy, i.e. a larger token + * set than for the simple strategy (see {@link SimpleScxmlTokenGenerator}). Additional tokens are extracted depending + * on the attributes of the statechart elements. */ public class HandcraftedScxmlTokenGenerator extends SimpleScxmlTokenGenerator { /** * Creates the visitor. - * * @param adapter is the parser adapter which receives the generated tokens. */ public HandcraftedScxmlTokenGenerator(ScxmlParserAdapter adapter) { diff --git a/languages/scxml/src/main/java/de/jplag/scxml/parser/PeekAdapter.java b/languages/scxml/src/main/java/de/jplag/scxml/parser/PeekAdapter.java index c7d711853..87105836d 100644 --- a/languages/scxml/src/main/java/de/jplag/scxml/parser/PeekAdapter.java +++ b/languages/scxml/src/main/java/de/jplag/scxml/parser/PeekAdapter.java @@ -1,30 +1,26 @@ package de.jplag.scxml.parser; -import de.jplag.scxml.ScxmlTokenType; -import de.jplag.scxml.parser.model.StatechartElement; - import java.util.ArrayList; import java.util.List; +import de.jplag.scxml.ScxmlTokenType; +import de.jplag.scxml.parser.model.StatechartElement; + /** - * A parser adapter that provides a way to retrieve a list of - * token types. When a token is added, only the ordinal of its type - * is stored. This can be used to "peek" at a list of token types - * that are extracted when visiting a statechart. + * A parser adapter that provides a way to retrieve a list of token types. When a token is added, only the ordinal of + * its type is stored. This can be used to "peek" at a list of token types that are extracted when visiting a + * statechart. */ public class PeekAdapter extends ScxmlParserAdapter { private final List tokenTypes = new ArrayList<>(); /** - * Lexicographically compares two lists of integer representations / - * ordinals of token types. - * - * @param first the first list of ordinals of token types + * Lexicographically compares two lists of integer representations / ordinals of token types. + * @param first the first list of ordinals of token types * @param second the second list of ordinals of token types - * @return 0 if the lists are equal, a negative integer if the first list is lexicographically - * less than the second list, or a positive integer if the first list is lexicographically - * greater than the second list + * @return 0 if the lists are equal, a negative integer if the first list is lexicographically less than the second + * list, or a positive integer if the first list is lexicographically greater than the second list */ public static int compareTokenTypeLists(List first, List second) { int size = Math.min(first.size(), second.size()); diff --git a/languages/scxml/src/main/java/de/jplag/scxml/parser/ScxmlParser.java b/languages/scxml/src/main/java/de/jplag/scxml/parser/ScxmlParser.java index 867175e8a..a19ddfe83 100644 --- a/languages/scxml/src/main/java/de/jplag/scxml/parser/ScxmlParser.java +++ b/languages/scxml/src/main/java/de/jplag/scxml/parser/ScxmlParser.java @@ -1,5 +1,21 @@ package de.jplag.scxml.parser; +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; +import java.util.function.Function; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; + +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.Node; +import org.xml.sax.SAXException; + import de.jplag.ParsingException; import de.jplag.scxml.parser.model.State; import de.jplag.scxml.parser.model.Statechart; @@ -7,24 +23,10 @@ import de.jplag.scxml.parser.model.executable_content.Action; import de.jplag.scxml.parser.model.executable_content.ExecutableContent; import de.jplag.scxml.parser.util.NodeUtil; -import org.w3c.dom.Document; -import org.w3c.dom.Element; -import org.w3c.dom.Node; -import org.xml.sax.SAXException; - -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; -import javax.xml.parsers.ParserConfigurationException; -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Set; -import java.util.function.Function; /** - * An SCXML parser implementation based on a Simple API for XML (SAX) parser. - * Constructs a Statechart object during the parse. + * An SCXML parser implementation based on a Simple API for XML (SAX) parser. Constructs a Statechart object during the + * parse. */ public class ScxmlParser { @@ -54,13 +56,10 @@ public ScxmlParser() throws ParserConfigurationException { } /** - * Parses the given SCXML file using Javax and constructs a Statechart object. - * Two passes through the document are performed: - * In the first pass, all {@literal } elements within states are iterated over - * to resolve initial states. In the second pass, the whole document is visited. - * This is necessary because an initial state may occur in the document prior to - * the transitions pointing to it. - * + * Parses the given SCXML file using Javax and constructs a Statechart object. Two passes through the document are + * performed: In the first pass, all {@literal } elements within states are iterated over to resolve initial + * states. In the second pass, the whole document is visited. This is necessary because an initial state may occur in + * the document prior to the transitions pointing to it. * @param file the SCXML file to parse */ public Statechart parse(File file) throws IOException, SAXException, ParsingException { @@ -129,13 +128,9 @@ private Transition visitInitialTransition(Node node) { } private Transition visitTransition(Node node) throws IllegalArgumentException { - return new Transition( - NodeUtil.getAttribute(node, TARGET_ATTRIBUTE), - NodeUtil.getAttribute(node, EVENT_ATTRIBUTE), - NodeUtil.getAttribute(node, CONDITION_ATTRIBUTE), - visitExecutableContents(node), + return new Transition(NodeUtil.getAttribute(node, TARGET_ATTRIBUTE), NodeUtil.getAttribute(node, EVENT_ATTRIBUTE), + NodeUtil.getAttribute(node, CONDITION_ATTRIBUTE), visitExecutableContents(node), // Set timed attribute to false initially, may be updated later in the State class - false - ); + false); } } diff --git a/languages/scxml/src/main/java/de/jplag/scxml/parser/ScxmlParserAdapter.java b/languages/scxml/src/main/java/de/jplag/scxml/parser/ScxmlParserAdapter.java index 658a4c801..e7aecb504 100644 --- a/languages/scxml/src/main/java/de/jplag/scxml/parser/ScxmlParserAdapter.java +++ b/languages/scxml/src/main/java/de/jplag/scxml/parser/ScxmlParserAdapter.java @@ -1,5 +1,15 @@ package de.jplag.scxml.parser; +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; + +import javax.xml.parsers.ParserConfigurationException; + +import org.xml.sax.SAXException; + import de.jplag.AbstractParser; import de.jplag.ParsingException; import de.jplag.Token; @@ -10,18 +20,10 @@ import de.jplag.scxml.parser.model.StatechartElement; import de.jplag.scxml.util.AbstractScxmlVisitor; import de.jplag.scxml.util.ScxmlView; -import org.xml.sax.SAXException; - -import javax.xml.parsers.ParserConfigurationException; -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Set; /** - * Parser adapter for SCXML statecharts that uses a Statechart object - * obtained from an instance of ScxmlParser to extract tokens. + * Parser adapter for SCXML statecharts that uses a Statechart object obtained from an instance of ScxmlParser to + * extract tokens. */ public class ScxmlParserAdapter extends AbstractParser { @@ -36,7 +38,6 @@ public ScxmlParserAdapter() { /** * Parses all tokens from a set of files. - * * @param files the set of files. * @return the list of parsed tokens. */ @@ -50,7 +51,6 @@ public List parse(Set files) throws ParsingException { /** * Loads a statechart from a file, parses it and extracts tokens from it. - * * @param file is the statechart file. */ protected void parseModelFile(File file) throws ParsingException { @@ -70,11 +70,9 @@ protected void parseModelFile(File file) throws ParsingException { } /** - * Creates a token from the given type plus the associated statechart element - * and adds it to the token stream. - * The token is enhanced with view information (see {@link ScxmlView}). - * - * @param type the type of the token + * Creates a token from the given type plus the associated statechart element and adds it to the token stream. The token + * is enhanced with view information (see {@link ScxmlView}). + * @param type the type of the token * @param source the statechart element associated with the token */ public void addToken(ScxmlTokenType type, StatechartElement source) { @@ -84,10 +82,9 @@ public void addToken(ScxmlTokenType type, StatechartElement source) { } /** - * Creates a token from the given type without an associated statechart element. - * The token is enhanced with view information (see {@link ScxmlView}). - * - * @param type the type of the token + * Creates a token from the given type without an associated statechart element. The token is enhanced with view + * information (see {@link ScxmlView}). + * @param type the type of the token */ public void addEndToken(ScxmlTokenType type) { addToken(type, null); diff --git a/languages/scxml/src/main/java/de/jplag/scxml/parser/SimpleScxmlTokenGenerator.java b/languages/scxml/src/main/java/de/jplag/scxml/parser/SimpleScxmlTokenGenerator.java index 65f605c81..4a3c56ce4 100644 --- a/languages/scxml/src/main/java/de/jplag/scxml/parser/SimpleScxmlTokenGenerator.java +++ b/languages/scxml/src/main/java/de/jplag/scxml/parser/SimpleScxmlTokenGenerator.java @@ -1,5 +1,10 @@ package de.jplag.scxml.parser; +import static de.jplag.scxml.ScxmlTokenType.*; + +import java.util.List; +import java.util.Map; + import de.jplag.scxml.ScxmlTokenType; import de.jplag.scxml.parser.model.State; import de.jplag.scxml.parser.model.Statechart; @@ -8,21 +13,14 @@ import de.jplag.scxml.parser.model.executable_content.*; import de.jplag.scxml.util.AbstractScxmlVisitor; -import java.util.List; -import java.util.Map; - -import static de.jplag.scxml.ScxmlTokenType.*; - /** - * Visits a statechart and its contained elements to extract tokens - * using a simple strategy, i.e. a smaller token set than for the - * handcrafted strategy (see {@link HandcraftedScxmlTokenGenerator}). + * Visits a statechart and its contained elements to extract tokens using a simple strategy, i.e. a smaller token set + * than for the handcrafted strategy (see {@link HandcraftedScxmlTokenGenerator}). */ public class SimpleScxmlTokenGenerator extends AbstractScxmlVisitor { /** * Creates the visitor. - * * @param adapter the parser adapter which receives the generated tokens */ public SimpleScxmlTokenGenerator(ScxmlParserAdapter adapter) { @@ -137,10 +135,7 @@ public void visitExecutableContent(ExecutableContent content) { return; } - Map, ScxmlTokenType> tokenTypeMap = Map.of( - Send.class, SEND, - Cancel.class, CANCEL - ); + Map, ScxmlTokenType> tokenTypeMap = Map.of(Send.class, SEND, Cancel.class, CANCEL); ScxmlTokenType type = tokenTypeMap.get(content.getClass()); System.out.println(content.getClass()); adapter.addToken(type, content); diff --git a/languages/scxml/src/main/java/de/jplag/scxml/parser/model/State.java b/languages/scxml/src/main/java/de/jplag/scxml/parser/model/State.java index a6eb46e25..817e11d6d 100644 --- a/languages/scxml/src/main/java/de/jplag/scxml/parser/model/State.java +++ b/languages/scxml/src/main/java/de/jplag/scxml/parser/model/State.java @@ -1,26 +1,25 @@ package de.jplag.scxml.parser.model; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Stream; + import de.jplag.scxml.parser.model.executable_content.Action; import de.jplag.scxml.parser.model.executable_content.Cancel; import de.jplag.scxml.parser.model.executable_content.ExecutableContent; import de.jplag.scxml.parser.model.executable_content.Send; -import java.util.ArrayList; -import java.util.List; -import java.util.stream.Stream; - -public record State(String id, List transitions, List substates, List actions, - boolean initial, boolean parallel) implements StatechartElement { +public record State(String id, List transitions, List substates, List actions, boolean initial, boolean parallel) + implements StatechartElement { /** * Constructs a new state. - * - * @param id the ID of the state + * @param id the ID of the state * @param transitions a non-null list of outgoing transitions of this state - * @param substates a non-null list of substates of this state - * @param actions a non-null list of actions associated with this state - * @param initial whether this state is an initial state - * @param parallel whether this state is a parallel state + * @param substates a non-null list of substates of this state + * @param actions a non-null list of actions associated with this state + * @param initial whether this state is an initial state + * @param parallel whether this state is a parallel state */ public State(String id, List transitions, List substates, List actions, boolean initial, boolean parallel) { this.id = id; @@ -35,9 +34,7 @@ public State(String id, List transitions, List substates, Lis } /** - * Constructs a state with an ID, setting all other variables - * to default values. - * + * Constructs a state with an ID, setting all other variables to default values. * @param id the ID of the state */ public State(String id) { @@ -52,8 +49,7 @@ public boolean isRegion() { } /** - * @return whether this state is a simple state, meaning that is - * neither an initial state nor a parallel state + * @return whether this state is a simple state, meaning that is neither an initial state nor a parallel state */ public boolean isSimple() { return !initial && !parallel; @@ -94,10 +90,9 @@ private void removeTimedTransitionElements(Action onEntry, Send send, Action onE } /** - * Sets the timed attribute of each transition of this state that is timed. - * To model a timed transition, itemis Create adds onentry.send, onexit.cancel - * and transition elements with matching IDs. - * These elements will be removed if they are part of a timed transition. + * Sets the timed attribute of each transition of this state that is timed. To model a timed transition, itemis Create + * adds onentry.send, onexit.cancel and transition elements with matching IDs. These elements will be removed if they + * are part of a timed transition. **/ private void updateTimedTransitions() { if (this.transitions().isEmpty() || this.actions().isEmpty()) { diff --git a/languages/scxml/src/main/java/de/jplag/scxml/parser/model/Statechart.java b/languages/scxml/src/main/java/de/jplag/scxml/parser/model/Statechart.java index 379409b6f..738f3e974 100644 --- a/languages/scxml/src/main/java/de/jplag/scxml/parser/model/Statechart.java +++ b/languages/scxml/src/main/java/de/jplag/scxml/parser/model/Statechart.java @@ -4,8 +4,7 @@ /** * Represents an SCXML statechart. - * - * @param name the name of the statechart + * @param name the name of the statechart * @param states a list of states comprising this statechart */ public record Statechart(String name, List states) implements StatechartElement { diff --git a/languages/scxml/src/main/java/de/jplag/scxml/parser/model/StatechartElement.java b/languages/scxml/src/main/java/de/jplag/scxml/parser/model/StatechartElement.java index 16ab8a610..9696edd07 100644 --- a/languages/scxml/src/main/java/de/jplag/scxml/parser/model/StatechartElement.java +++ b/languages/scxml/src/main/java/de/jplag/scxml/parser/model/StatechartElement.java @@ -1,8 +1,8 @@ package de.jplag.scxml.parser.model; /** - * Defines a common supertype of all elements in an SCXML statechart. - * This is used to be able to handle any concrete statechart element. + * Defines a common supertype of all elements in an SCXML statechart. This is used to be able to handle any concrete + * statechart element. */ public interface StatechartElement { } diff --git a/languages/scxml/src/main/java/de/jplag/scxml/parser/model/Transition.java b/languages/scxml/src/main/java/de/jplag/scxml/parser/model/Transition.java index fd3c26f2c..1b3844e99 100644 --- a/languages/scxml/src/main/java/de/jplag/scxml/parser/model/Transition.java +++ b/languages/scxml/src/main/java/de/jplag/scxml/parser/model/Transition.java @@ -1,12 +1,11 @@ package de.jplag.scxml.parser.model; -import de.jplag.scxml.parser.model.executable_content.ExecutableContent; - import java.util.List; import java.util.Objects; -public record Transition(String target, String event, String cond, List contents, - boolean timed) implements StatechartElement { +import de.jplag.scxml.parser.model.executable_content.ExecutableContent; + +public record Transition(String target, String event, String cond, List contents, boolean timed) implements StatechartElement { public static Transition makeTimed(Transition transition) { return new Transition(transition.target, null, transition.cond, transition.contents, true); diff --git a/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/ExecutableContent.java b/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/ExecutableContent.java index 892ba09fc..07c027a3f 100644 --- a/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/ExecutableContent.java +++ b/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/ExecutableContent.java @@ -1,24 +1,21 @@ package de.jplag.scxml.parser.model.executable_content; -import de.jplag.scxml.parser.model.StatechartElement; -import de.jplag.scxml.parser.util.NodeUtil; -import org.w3c.dom.Node; +import static de.jplag.scxml.parser.model.executable_content.SimpleExecutableContent.Type.*; import java.util.Set; -import static de.jplag.scxml.parser.model.executable_content.SimpleExecutableContent.Type.*; +import org.w3c.dom.Node; + +import de.jplag.scxml.parser.model.StatechartElement; +import de.jplag.scxml.parser.util.NodeUtil; public interface ExecutableContent extends StatechartElement { /** - * Defines the set of allowed XML element names that are considered - * valid executable content. - * and elements are not allowed as they may only present - * as children of an element. + * Defines the set of allowed XML element names that are considered valid executable content. and + * elements are not allowed as they may only present as children of an element. */ - Set ALLOWED_XML_ELEMENTS = Set.of( - "raise", "if", "foreach", "log", "assign", "script", "send", "cancel" - ); + Set ALLOWED_XML_ELEMENTS = Set.of("raise", "if", "foreach", "log", "assign", "script", "send", "cancel"); String ELSE_ELEMENT = "else"; String EVENT_ATTRIBUTE = "event"; @@ -26,13 +23,10 @@ public interface ExecutableContent extends StatechartElement { String DELAY_ATTRIBUTE = "delay"; /** - * Constructs a concrete instance of ExecutableContent based on the name - * of the given node. - * + * Constructs a concrete instance of ExecutableContent based on the name of the given node. * @param node the node to create the ExecutableContent from * @return the constructed ExecutableContent - * @throws IllegalArgumentException if the node name is not allowed or the - * executable content could not be created + * @throws IllegalArgumentException if the node name is not allowed or the executable content could not be created */ static ExecutableContent fromNode(Node node) throws IllegalArgumentException { return switch (node.getNodeName()) { @@ -42,11 +36,10 @@ static ExecutableContent fromNode(Node node) throws IllegalArgumentException { case "script" -> new SimpleExecutableContent(SCRIPT); case "foreach" -> new SimpleExecutableContent(FOREACH); case "log" -> new SimpleExecutableContent(LOG); - case "send" -> - new Send(NodeUtil.getAttribute(node, EVENT_ATTRIBUTE), NodeUtil.getAttribute(node, DELAY_ATTRIBUTE)); + case "send" -> new Send(NodeUtil.getAttribute(node, EVENT_ATTRIBUTE), NodeUtil.getAttribute(node, DELAY_ATTRIBUTE)); case "cancel" -> new Cancel(NodeUtil.getAttribute(node, SEND_ID_ATTRIBUTE)); - default -> - throw new IllegalArgumentException("ExecutableContent.fromNode: invalid node " + node.getNodeName() + node.getParentNode().getNodeName()); + default -> throw new IllegalArgumentException( + "ExecutableContent.fromNode: invalid node " + node.getNodeName() + node.getParentNode().getNodeName()); }; } } diff --git a/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/If.java b/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/If.java index ffee68db0..2e7817b3b 100644 --- a/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/If.java +++ b/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/If.java @@ -1,19 +1,17 @@ package de.jplag.scxml.parser.model.executable_content; -import de.jplag.scxml.parser.util.NodeUtil; -import org.w3c.dom.Node; -import org.w3c.dom.NodeList; - import java.util.ArrayList; import java.util.List; import java.util.Set; -public record If(String cond, List contents, List elseIfs, - Else else_) implements ExecutableContent { +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; + +import de.jplag.scxml.parser.util.NodeUtil; + +public record If(String cond, List contents, List elseIfs, Else else_) implements ExecutableContent { - private static final Set ALLOWED_CONTENTS = Set.of( - "raise", "if", "foreach", "log", "assign", "script", "send", "cancel" - ); + private static final Set ALLOWED_CONTENTS = Set.of("raise", "if", "foreach", "log", "assign", "script", "send", "cancel"); private static final String IF_ELEMENT = "if"; private static final String ELSEIF_ELEMENT = "elseif"; @@ -32,10 +30,10 @@ private static void addBranch(String branch, List contents, L } /** - * Constructs an If statechart element from a given node - * with optional ElseIf or Else branches. - * The W3C SCXML specification defines a valid {@literal } element as follows: + * Constructs an If statechart element from a given node with optional ElseIf or Else branches. The W3C SCXML + * specification defines a valid {@literal } element as follows: *

      + * *

            * {@code
            * 
      @@ -50,14 +48,11 @@ private static void addBranch(String branch, List contents, L
            * }
            * 
      *

      - * This syntax requires more complicated parsing as the branches and - * executable contents within each branch are defined on the same level. - * - * @param node the node to create the If object from. Must - * contain at least one {@literal } element and optionally - * {@literal } or {@literal } tags. - * @throws IllegalArgumentException when more than one {@literal } - * statement is present + * This syntax requires more complicated parsing as the branches and executable contents within each branch are defined + * on the same level. + * @param node the node to create the If object from. Must contain at least one {@literal } element and optionally + * {@literal } or {@literal } tags. + * @throws IllegalArgumentException when more than one {@literal } statement is present */ public static If fromNode(Node node) throws IllegalArgumentException { NodeList childNodes = node.getChildNodes(); diff --git a/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/SimpleExecutableContent.java b/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/SimpleExecutableContent.java index 569b5a6a9..5ba4dbcf9 100644 --- a/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/SimpleExecutableContent.java +++ b/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/SimpleExecutableContent.java @@ -1,10 +1,9 @@ package de.jplag.scxml.parser.model.executable_content; /** - * Represents simple executable content as defined in - * sections 4.2 - 4.7 of the SCXML specification. - * Other executable content is defined in the subclasses {@link Action}, {@link Send}, {@link Cancel}, - * {@link If}, {@link ElseIf} and {@link Else}. + * Represents simple executable content as defined in sections 4.2 - + * 4.7 of the SCXML specification. Other executable content is defined in the subclasses {@link Action}, + * {@link Send}, {@link Cancel}, {@link If}, {@link ElseIf} and {@link Else}. */ public record SimpleExecutableContent(Type type) implements ExecutableContent { diff --git a/languages/scxml/src/main/java/de/jplag/scxml/parser/util/NodeUtil.java b/languages/scxml/src/main/java/de/jplag/scxml/parser/util/NodeUtil.java index e8d0ae89e..3a530cac2 100644 --- a/languages/scxml/src/main/java/de/jplag/scxml/parser/util/NodeUtil.java +++ b/languages/scxml/src/main/java/de/jplag/scxml/parser/util/NodeUtil.java @@ -1,22 +1,21 @@ package de.jplag.scxml.parser.util; -import org.w3c.dom.Node; -import org.w3c.dom.NodeList; - import java.util.ArrayList; import java.util.List; import java.util.Set; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; + /** * Utility class providing methods for conveniently iterating over nodes in the DOM. */ public final class NodeUtil { /** - * Iterates over all immediate child nodes of the given root node and returns - * a list of child nodes whose node names match any of the provided node names. - * - * @param root the root node + * Iterates over all immediate child nodes of the given root node and returns a list of child nodes whose node names + * match any of the provided node names. + * @param root the root node * @param childNames a set of child node names to consider * @return a list of matching child nodes */ @@ -34,11 +33,10 @@ public static List getChildNodes(Node root, Set childNames) { } /** - * Iterates over all immediate direct child nodes of the given root node and returns - * a list of child nodes whose node names match the provided node name. - * - * @param root the root node - * @param childName the child node name to consider + * Iterates over all immediate direct child nodes of the given root node and returns a list of child nodes whose node + * names match the provided node name. + * @param root the root node + * @param childName the child node name to consider * @return a list of matching child nodes */ public static List getChildNodes(Node root, String childName) { @@ -46,11 +44,9 @@ public static List getChildNodes(Node root, String childName) { } /** - * Iterates over all immediate direct child nodes of the given root node and returns - * the first child node whose node name matches the provided node name. - * If there are no matching nodes, null is returned. - * - * @param root the root node + * Iterates over all immediate direct child nodes of the given root node and returns the first child node whose node + * name matches the provided node name. If there are no matching nodes, null is returned. + * @param root the root node * @param childName the node name to consider * @return the first matching child node, or null if none are found */ @@ -60,10 +56,9 @@ public static Node getFirstChild(Node root, String childName) { } /** - * Recursively iterates over all child nodes of the given root node and returns - * a list of child nodes whose node names match the provided node name. - * - * @param root the root node + * Recursively iterates over all child nodes of the given root node and returns a list of child nodes whose node names + * match the provided node name. + * @param root the root node * @param childName the node name to consider * @return a list of matching child nodes */ @@ -81,9 +76,8 @@ public static List getNodesRecursive(Node root, String childName) { } /** - * @return the value of the attribute specified by name - * of the given node or null if the node does not contain - * an attribute with this name + * @return the value of the attribute specified by name of the given node or null if the node does not contain an + * attribute with this name */ public static String getAttribute(Node node, String name) { Node attribute = node.getAttributes().getNamedItem(name); diff --git a/languages/scxml/src/main/java/de/jplag/scxml/sorting/NoOpSortingStrategy.java b/languages/scxml/src/main/java/de/jplag/scxml/sorting/NoOpSortingStrategy.java index cfd5816d4..451eebd71 100644 --- a/languages/scxml/src/main/java/de/jplag/scxml/sorting/NoOpSortingStrategy.java +++ b/languages/scxml/src/main/java/de/jplag/scxml/sorting/NoOpSortingStrategy.java @@ -1,12 +1,12 @@ package de.jplag.scxml.sorting; -import de.jplag.scxml.parser.model.StatechartElement; - import java.util.List; +import de.jplag.scxml.parser.model.StatechartElement; + /** - * A sorting strategy that returns the provided statechart elements unchanged. - * Can be used in the parser adapter to disable sorting entirely. + * A sorting strategy that returns the provided statechart elements unchanged. Can be used in the parser adapter to + * disable sorting entirely. */ public class NoOpSortingStrategy implements SortingStrategy { diff --git a/languages/scxml/src/main/java/de/jplag/scxml/sorting/RecursiveSortingStrategy.java b/languages/scxml/src/main/java/de/jplag/scxml/sorting/RecursiveSortingStrategy.java index cc3efdaa8..0df07845d 100644 --- a/languages/scxml/src/main/java/de/jplag/scxml/sorting/RecursiveSortingStrategy.java +++ b/languages/scxml/src/main/java/de/jplag/scxml/sorting/RecursiveSortingStrategy.java @@ -1,17 +1,16 @@ package de.jplag.scxml.sorting; +import java.util.List; + import de.jplag.scxml.parser.PeekAdapter; import de.jplag.scxml.parser.model.StatechartElement; import de.jplag.scxml.util.AbstractScxmlVisitor; -import java.util.List; - /** - * This sorting strategy lexicographically sorts the list of statechart elements by - * the token streams they were to produce without affecting the main token stream. - * This implies that child elements of nested model objects have an effect on the - * final token order. The tokens are sorted by the ordinals of their types - * using {@link PeekAdapter#compareTokenTypeLists(List, List)}. + * This sorting strategy lexicographically sorts the list of statechart elements by the token streams they were to + * produce without affecting the main token stream. This implies that child elements of nested model objects have an + * effect on the final token order. The tokens are sorted by the ordinals of their types using + * {@link PeekAdapter#compareTokenTypeLists(List, List)}. */ public class RecursiveSortingStrategy implements SortingStrategy { @@ -19,7 +18,6 @@ public class RecursiveSortingStrategy implements SortingStrategy { /** * Constructs a new sorter based on the recursive strategy. - * * @param visitor the visitor used to peek tokens */ public RecursiveSortingStrategy(AbstractScxmlVisitor visitor) { diff --git a/languages/scxml/src/main/java/de/jplag/scxml/sorting/SimpleSortingStrategy.java b/languages/scxml/src/main/java/de/jplag/scxml/sorting/SimpleSortingStrategy.java index 9069a9402..1e322f32c 100644 --- a/languages/scxml/src/main/java/de/jplag/scxml/sorting/SimpleSortingStrategy.java +++ b/languages/scxml/src/main/java/de/jplag/scxml/sorting/SimpleSortingStrategy.java @@ -1,15 +1,14 @@ package de.jplag.scxml.sorting; -import de.jplag.scxml.parser.model.StatechartElement; -import de.jplag.scxml.util.AbstractScxmlVisitor; - import java.util.List; +import de.jplag.scxml.parser.model.StatechartElement; +import de.jplag.scxml.util.AbstractScxmlVisitor; /** - * This sorting strategy sorts the list of StatechartElements by the ordinal - * of the first token that was to be extracted without affecting the main token stream. - * This implies that child elements of nested model objects do not change the token order. + * This sorting strategy sorts the list of StatechartElements by the ordinal of the first token that was to be extracted + * without affecting the main token stream. This implies that child elements of nested model objects do not change the + * token order. */ public class SimpleSortingStrategy implements SortingStrategy { @@ -17,7 +16,6 @@ public class SimpleSortingStrategy implements SortingStrategy { /** * Constructs a new sorter based on the simple strategy. - * * @param visitor the visitor used to peek tokens */ public SimpleSortingStrategy(AbstractScxmlVisitor visitor) { diff --git a/languages/scxml/src/main/java/de/jplag/scxml/sorting/SortingStrategy.java b/languages/scxml/src/main/java/de/jplag/scxml/sorting/SortingStrategy.java index ccb62a4b8..ea0f66491 100644 --- a/languages/scxml/src/main/java/de/jplag/scxml/sorting/SortingStrategy.java +++ b/languages/scxml/src/main/java/de/jplag/scxml/sorting/SortingStrategy.java @@ -1,9 +1,9 @@ package de.jplag.scxml.sorting; -import de.jplag.scxml.parser.model.StatechartElement; - import java.util.List; +import de.jplag.scxml.parser.model.StatechartElement; + /** * Represents a sorting strategy for sorting statechart elements. */ @@ -11,7 +11,6 @@ public interface SortingStrategy { /** * Sorts a list of statechart elements. - * * @param statechartElements the list of statechart elements to sort */ List sort(List statechartElements); diff --git a/languages/scxml/src/main/java/de/jplag/scxml/util/AbstractScxmlVisitor.java b/languages/scxml/src/main/java/de/jplag/scxml/util/AbstractScxmlVisitor.java index 612a7f3f7..7f0a5eed1 100644 --- a/languages/scxml/src/main/java/de/jplag/scxml/util/AbstractScxmlVisitor.java +++ b/languages/scxml/src/main/java/de/jplag/scxml/util/AbstractScxmlVisitor.java @@ -1,5 +1,9 @@ package de.jplag.scxml.util; +import java.util.List; +import java.util.Map; +import java.util.function.Consumer; + import de.jplag.scxml.parser.PeekAdapter; import de.jplag.scxml.parser.ScxmlParserAdapter; import de.jplag.scxml.parser.model.State; @@ -10,10 +14,6 @@ import de.jplag.scxml.sorting.RecursiveSortingStrategy; import de.jplag.scxml.sorting.SortingStrategy; -import java.util.List; -import java.util.Map; -import java.util.function.Consumer; - /** * Visitor for all StatechartElements in a Statechart object. */ @@ -30,19 +30,15 @@ public AbstractScxmlVisitor(ScxmlParserAdapter adapter) { /** * Sets the current sorting strategy for this visitor. - * - * @param sorter the sorter to use for sorting nested - * statechart elements before extracting tokens for them + * @param sorter the sorter to use for sorting nested statechart elements before extracting tokens for them */ public void setSorter(SortingStrategy sorter) { this.sorter = sorter; } /** - * Visits a statechart element without effecting the main - * token stream by temporarily swapping out the current parser + * Visits a statechart element without effecting the main token stream by temporarily swapping out the current parser * adapter. Returns a list of collected token type ordinals. - * * @param element the statechart element to visit */ public List peekTokens(StatechartElement element) { @@ -57,10 +53,8 @@ public List peekTokens(StatechartElement element) { } /** - * Returns the current depth in the statechart. The depth is incremented - * whenever child elements of a nested statechart element are visited - * and decremented after all child elements have been visited. - * + * Returns the current depth in the statechart. The depth is incremented whenever child elements of a nested statechart + * element are visited and decremented after all child elements have been visited. * @return the current depth in the statechart */ public int getCurrentStatechartDepth() { @@ -68,20 +62,14 @@ public int getCurrentStatechartDepth() { } /** - * Visits the given statechart element while adding extracted tokens - * to the current parser adapter. - * + * Visits the given statechart element while adding extracted tokens to the current parser adapter. * @throws IllegalArgumentException when the statechart element is of a type that is not currently handled */ public final void visit(StatechartElement element) throws IllegalArgumentException { - Map, Consumer> visitorMap = Map.of( - Statechart.class, e -> visitStatechart((Statechart) e), - State.class, e -> visitState((State) e), - If.class, e -> visitIf((If) e), - SimpleExecutableContent.class, e -> visitSimpleExecutableContent((SimpleExecutableContent) e), - ExecutableContent.class, e -> visitExecutableContent((ExecutableContent) e), - Transition.class, e -> visitTransition((Transition) e) - ); + Map, Consumer> visitorMap = Map.of(Statechart.class, + e -> visitStatechart((Statechart) e), State.class, e -> visitState((State) e), If.class, e -> visitIf((If) e), + SimpleExecutableContent.class, e -> visitSimpleExecutableContent((SimpleExecutableContent) e), ExecutableContent.class, + e -> visitExecutableContent((ExecutableContent) e), Transition.class, e -> visitTransition((Transition) e)); if (!visitorMap.containsKey(element.getClass())) { throw new IllegalArgumentException("AbstractScxmlVisitor.visit: unhandled class " + element.getClass()); } diff --git a/languages/scxml/src/main/java/de/jplag/scxml/util/ScxmlView.java b/languages/scxml/src/main/java/de/jplag/scxml/util/ScxmlView.java index c3c81512f..de8e1b83e 100644 --- a/languages/scxml/src/main/java/de/jplag/scxml/util/ScxmlView.java +++ b/languages/scxml/src/main/java/de/jplag/scxml/util/ScxmlView.java @@ -1,15 +1,16 @@ package de.jplag.scxml.util; -import de.jplag.scxml.ScxmlToken; -import de.jplag.scxml.ScxmlTokenType; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; import java.io.IOException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import de.jplag.scxml.ScxmlToken; +import de.jplag.scxml.ScxmlTokenType; + public class ScxmlView { private final File file; @@ -18,9 +19,8 @@ public class ScxmlView { private int line; /** - * Constructs a new ScxmlView that turns tokens into a textual representation. - * The provided input file determines the path of the output view file. - * + * Constructs a new ScxmlView that turns tokens into a textual representation. The provided input file determines the + * path of the output view file. * @param file the input file corresponding to this view */ public ScxmlView(File file) { @@ -32,7 +32,6 @@ public ScxmlView(File file) { /** * Writes the current view file contents to the file specified in the constructor. - * * @param fileExtension the extension to use for the name of the view file */ public void writeToFile(String fileExtension) { @@ -48,13 +47,10 @@ public void writeToFile(String fileExtension) { } /** - * Enhances the given token by adding information about the - * line and column numbers in the view file. - * At the same time, the contents of the file are constructed. - * + * Enhances the given token by adding information about the line and column numbers in the view file. At the same time, + * the contents of the file are constructed. * @param token the token to enhance and add to the view file - * @param depth current depth in the statechart to determine the indent - * in the view file + * @param depth current depth in the statechart to determine the indent in the view file */ public ScxmlToken enhanceToken(ScxmlToken token, int depth) { String prefix = " ".repeat(depth); diff --git a/languages/scxml/src/test/java/de/jplag/scxml/ScxmlParserTest.java b/languages/scxml/src/test/java/de/jplag/scxml/ScxmlParserTest.java index 3a3263247..67dc2ff05 100644 --- a/languages/scxml/src/test/java/de/jplag/scxml/ScxmlParserTest.java +++ b/languages/scxml/src/test/java/de/jplag/scxml/ScxmlParserTest.java @@ -1,26 +1,28 @@ package de.jplag.scxml; -import de.jplag.ParsingException; -import de.jplag.scxml.parser.ScxmlParser; -import de.jplag.scxml.parser.model.State; -import de.jplag.scxml.parser.model.Statechart; -import de.jplag.scxml.parser.model.Transition; -import de.jplag.scxml.parser.model.executable_content.*; -import de.jplag.scxml.util.StateBuilder; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.xml.sax.SAXException; +import static de.jplag.scxml.parser.model.executable_content.SimpleExecutableContent.Type.ASSIGNMENT; +import static de.jplag.scxml.parser.model.executable_content.SimpleExecutableContent.Type.SCRIPT; +import static org.assertj.core.api.Assertions.assertThat; -import javax.xml.parsers.ParserConfigurationException; import java.io.File; import java.io.IOException; import java.nio.file.Path; import java.util.ArrayList; import java.util.List; -import static de.jplag.scxml.parser.model.executable_content.SimpleExecutableContent.Type.ASSIGNMENT; -import static de.jplag.scxml.parser.model.executable_content.SimpleExecutableContent.Type.SCRIPT; -import static org.assertj.core.api.Assertions.assertThat; +import javax.xml.parsers.ParserConfigurationException; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.xml.sax.SAXException; + +import de.jplag.ParsingException; +import de.jplag.scxml.parser.ScxmlParser; +import de.jplag.scxml.parser.model.State; +import de.jplag.scxml.parser.model.Statechart; +import de.jplag.scxml.parser.model.Transition; +import de.jplag.scxml.parser.model.executable_content.*; +import de.jplag.scxml.util.StateBuilder; class ScxmlParserTest { @@ -73,7 +75,8 @@ void canParseTimedTransition() throws ParsingException, ParserConfigurationExcep File testFile = new File(baseDirectory, TEST_SUBJECTS[1]); Statechart actual = new ScxmlParser().parse(testFile); - State start = new StateBuilder("Start").addTransitions(Transition.makeTimed(transition("Next", List.of(new SimpleExecutableContent(SCRIPT))))).build(); + State start = new StateBuilder("Start").addTransitions(Transition.makeTimed(transition("Next", List.of(new SimpleExecutableContent(SCRIPT))))) + .build(); Statechart expected = new Statechart("Statechart", List.of(start)); assertThat(actual).usingRecursiveComparison().isEqualTo(expected); } @@ -100,21 +103,13 @@ void canParseComplexStatechart() throws ParsingException, ParserConfigurationExc State start = new StateBuilder("Start").setInitial() .addTransitions(transition("Blinking", "user.press_button", List.of(new SimpleExecutableContent(ASSIGNMENT)))).build(); - State light = new StateBuilder("Light") - .addTransitions(transition("Dark")) - .addOnEntry(new If("true", new SimpleExecutableContent(ASSIGNMENT))).build(); - - State dark = new StateBuilder("Dark") - .addTransitions( - transition("Start", null, "t == 5"), - transition("Light", "C") - ) - .addOnEntry(new Send("A", "1s")) - .addOnExit(new Cancel("B")).build(); - - State blinking = new StateBuilder("Blinking") - .addSubstates(light, dark) - .addTransitions(transition("Start", "user.press_button")) + State light = new StateBuilder("Light").addTransitions(transition("Dark")).addOnEntry(new If("true", new SimpleExecutableContent(ASSIGNMENT))) + .build(); + + State dark = new StateBuilder("Dark").addTransitions(transition("Start", null, "t == 5"), transition("Light", "C")) + .addOnEntry(new Send("A", "1s")).addOnExit(new Cancel("B")).build(); + + State blinking = new StateBuilder("Blinking").addSubstates(light, dark).addTransitions(transition("Start", "user.press_button")) .addOnEntry(new SimpleExecutableContent(ASSIGNMENT)).build(); State mainRegion = new StateBuilder("main_region").addSubstates(start, blinking).build(); @@ -123,13 +118,3 @@ void canParseComplexStatechart() throws ParsingException, ParserConfigurationExc } } - - - - - - - - - - diff --git a/languages/scxml/src/test/java/de/jplag/scxml/ScxmlTokenGeneratorTest.java b/languages/scxml/src/test/java/de/jplag/scxml/ScxmlTokenGeneratorTest.java index d966418db..797ebc1d8 100644 --- a/languages/scxml/src/test/java/de/jplag/scxml/ScxmlTokenGeneratorTest.java +++ b/languages/scxml/src/test/java/de/jplag/scxml/ScxmlTokenGeneratorTest.java @@ -1,5 +1,17 @@ package de.jplag.scxml; +import static de.jplag.SharedTokenType.FILE_END; +import static de.jplag.scxml.ScxmlTokenType.*; +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.File; +import java.nio.file.Path; +import java.util.List; +import java.util.Set; + +import org.junit.jupiter.api.Test; + import de.jplag.ParsingException; import de.jplag.Token; import de.jplag.TokenType; @@ -8,17 +20,6 @@ import de.jplag.scxml.sorting.NoOpSortingStrategy; import de.jplag.scxml.sorting.RecursiveSortingStrategy; import de.jplag.scxml.util.AbstractScxmlVisitor; -import org.junit.jupiter.api.Test; - -import java.io.File; -import java.nio.file.Path; -import java.util.List; -import java.util.Set; - -import static de.jplag.SharedTokenType.FILE_END; -import static de.jplag.scxml.ScxmlTokenType.*; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.assertj.core.api.Assertions.assertThat; public class ScxmlTokenGeneratorTest { @@ -37,12 +38,10 @@ void testRecursiveSorter() throws ParsingException { AbstractScxmlVisitor visitor = new SimpleScxmlTokenGenerator(adapter); adapter.configure(visitor, new NoOpSortingStrategy()); - List expectedTokenTypes = List.of( - STATE, STATE, TRANSITION, ASSIGNMENT, TRANSITION_END, STATE_END, STATE, ON_ENTRY, ASSIGNMENT, - ACTION_END, TRANSITION, TRANSITION_END, STATE, ON_ENTRY, IF, ASSIGNMENT, IF_END, ACTION_END, TRANSITION, - TRANSITION_END, STATE_END, STATE, ON_ENTRY, SEND, ACTION_END, ON_EXIT, CANCEL, ACTION_END, TRANSITION, - TRANSITION_END, TRANSITION, TRANSITION_END, STATE_END, STATE_END, STATE_END, FILE_END - ); + List expectedTokenTypes = List.of(STATE, STATE, TRANSITION, ASSIGNMENT, TRANSITION_END, STATE_END, STATE, ON_ENTRY, ASSIGNMENT, + ACTION_END, TRANSITION, TRANSITION_END, STATE, ON_ENTRY, IF, ASSIGNMENT, IF_END, ACTION_END, TRANSITION, TRANSITION_END, STATE_END, + STATE, ON_ENTRY, SEND, ACTION_END, ON_EXIT, CANCEL, ACTION_END, TRANSITION, TRANSITION_END, TRANSITION, TRANSITION_END, STATE_END, + STATE_END, STATE_END, FILE_END); List originalTokenTypes = getTokenTypes(adapter, originalTestFile); assertEquals(expectedTokenTypes, originalTokenTypes); diff --git a/languages/scxml/src/test/java/de/jplag/scxml/util/StateBuilder.java b/languages/scxml/src/test/java/de/jplag/scxml/util/StateBuilder.java index 23da9f296..6fc232a9d 100644 --- a/languages/scxml/src/test/java/de/jplag/scxml/util/StateBuilder.java +++ b/languages/scxml/src/test/java/de/jplag/scxml/util/StateBuilder.java @@ -1,14 +1,14 @@ package de.jplag.scxml.util; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + import de.jplag.scxml.parser.model.State; import de.jplag.scxml.parser.model.Transition; import de.jplag.scxml.parser.model.executable_content.Action; import de.jplag.scxml.parser.model.executable_content.ExecutableContent; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - public class StateBuilder { private final String id; From ee0f7ed65f679cca91227cb3de6aac864c998291 Mon Sep 17 00:00:00 2001 From: smjonas Date: Mon, 17 Apr 2023 15:07:13 +0200 Subject: [PATCH 068/132] Fix coverage test --- .../de/jplag/scxml/parser/SimpleScxmlTokenGenerator.java | 1 - .../test/java/de/jplag/scxml/ScxmlTokenGeneratorTest.java | 5 +++-- .../test/resources/de/jplag/statecharts/coverage.scxml | 8 ++++++++ 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/languages/scxml/src/main/java/de/jplag/scxml/parser/SimpleScxmlTokenGenerator.java b/languages/scxml/src/main/java/de/jplag/scxml/parser/SimpleScxmlTokenGenerator.java index 4a3c56ce4..30b921600 100644 --- a/languages/scxml/src/main/java/de/jplag/scxml/parser/SimpleScxmlTokenGenerator.java +++ b/languages/scxml/src/main/java/de/jplag/scxml/parser/SimpleScxmlTokenGenerator.java @@ -137,7 +137,6 @@ public void visitExecutableContent(ExecutableContent content) { Map, ScxmlTokenType> tokenTypeMap = Map.of(Send.class, SEND, Cancel.class, CANCEL); ScxmlTokenType type = tokenTypeMap.get(content.getClass()); - System.out.println(content.getClass()); adapter.addToken(type, content); } diff --git a/languages/scxml/src/test/java/de/jplag/scxml/ScxmlTokenGeneratorTest.java b/languages/scxml/src/test/java/de/jplag/scxml/ScxmlTokenGeneratorTest.java index 797ebc1d8..3a8edb0c5 100644 --- a/languages/scxml/src/test/java/de/jplag/scxml/ScxmlTokenGeneratorTest.java +++ b/languages/scxml/src/test/java/de/jplag/scxml/ScxmlTokenGeneratorTest.java @@ -57,7 +57,8 @@ void testRecursiveSorter() throws ParsingException { void testCoverage() throws ParsingException { File testFile = new File(baseDirectory, TEST_SUBJECTS[2]); ScxmlParserAdapter adapter = new ScxmlParserAdapter(); - List tokenTypes = getTokenTypes(adapter, testFile); - assertThat(tokenTypes).containsExactlyInAnyOrder(ScxmlTokenType.values()); + List actualUniqueTokenTypes = getTokenTypes(adapter, testFile).stream().filter(x -> x != FILE_END).distinct().toList(); + + assertThat(actualUniqueTokenTypes).containsExactlyInAnyOrder(ScxmlTokenType.values()); } } diff --git a/languages/scxml/src/test/resources/de/jplag/statecharts/coverage.scxml b/languages/scxml/src/test/resources/de/jplag/statecharts/coverage.scxml index e37c013d7..8f98c30e2 100644 --- a/languages/scxml/src/test/resources/de/jplag/statecharts/coverage.scxml +++ b/languages/scxml/src/test/resources/de/jplag/statecharts/coverage.scxml @@ -23,10 +23,18 @@ + + + + + + From 698c5923fa09fbb648910e27a02092ab5282e1ac Mon Sep 17 00:00:00 2001 From: smjonas Date: Wed, 19 Apr 2023 08:38:23 +0200 Subject: [PATCH 069/132] Update coverage-report/pom.xml --- coverage-report/pom.xml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/coverage-report/pom.xml b/coverage-report/pom.xml index 223bed320..21b94cc95 100644 --- a/coverage-report/pom.xml +++ b/coverage-report/pom.xml @@ -86,6 +86,11 @@ scheme ${revision} + + de.jplag + scxml + ${revision} + de.jplag swift From 731c51d6e349007f97eaa276fa8e3253f944f184 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Br=C3=B6del?= Date: Wed, 19 Apr 2023 12:01:43 +0200 Subject: [PATCH 070/132] Improve naming & comments further --- core/src/main/java/de/jplag/Submission.java | 9 ++++++--- .../java/de/jplag/normalization/MultipleEdge.java | 11 +++++++---- .../normalization/NormalizationGraphConstructor.java | 3 +++ .../main/java/de/jplag/normalization/Statement.java | 11 +++++++---- .../java/de/jplag/normalization/StatementBuilder.java | 3 +++ 5 files changed, 26 insertions(+), 11 deletions(-) diff --git a/core/src/main/java/de/jplag/Submission.java b/core/src/main/java/de/jplag/Submission.java index af1aa3614..4d0fe2363 100644 --- a/core/src/main/java/de/jplag/Submission.java +++ b/core/src/main/java/de/jplag/Submission.java @@ -87,11 +87,11 @@ public int compareTo(Submission other) { } @Override - public boolean equals(Object object) { - if (object == this) { + public boolean equals(Object obj) { + if (obj == this) { return true; } - if (!(object instanceof Submission otherSubmission)) { + if (!(obj instanceof Submission otherSubmission)) { return false; } return otherSubmission.getName().equals(name); @@ -270,6 +270,9 @@ private static File createErrorDirectory(String... subdirectoryNames) { return true; } + /** + * Perform token string normalization, which makes the token string invariant to dead code insertion and independent statement reordering. + */ void normalize() { List originalOrder = getOrder(tokenList); TokenStringNormalizer.normalize(tokenList); diff --git a/core/src/main/java/de/jplag/normalization/MultipleEdge.java b/core/src/main/java/de/jplag/normalization/MultipleEdge.java index f42e2953b..4f7b16c3e 100644 --- a/core/src/main/java/de/jplag/normalization/MultipleEdge.java +++ b/core/src/main/java/de/jplag/normalization/MultipleEdge.java @@ -5,6 +5,9 @@ import de.jplag.semantics.Variable; +/** + * Models a multiple edge in the normalization graph. Contains multiple edges. + */ class MultipleEdge { private Set edges; private boolean isVariableFlow; @@ -24,10 +27,10 @@ boolean isVariableReverseFlow() { } void addEdge(EdgeType type, Variable cause) { - if (type == EdgeType.VARIABLE_FLOW) - isVariableFlow = true; - if (type == EdgeType.VARIABLE_REVERSE_FLOW) - isVariableReverseFlow = true; + switch(type) { + case VARIABLE_FLOW -> isVariableFlow = true; + case VARIABLE_REVERSE_FLOW -> isVariableReverseFlow = true; + } edges.add(new Edge(type, cause)); } } diff --git a/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java b/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java index d83049ad8..a5f4e496e 100644 --- a/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java +++ b/core/src/main/java/de/jplag/normalization/NormalizationGraphConstructor.java @@ -13,6 +13,9 @@ import de.jplag.Token; import de.jplag.semantics.Variable; +/** + * Constructs the normalization graph. + */ class NormalizationGraphConstructor { private SimpleDirectedGraph graph; private int bidirectionalBlockDepth; diff --git a/core/src/main/java/de/jplag/normalization/Statement.java b/core/src/main/java/de/jplag/normalization/Statement.java index 87fb49544..c7086bb74 100644 --- a/core/src/main/java/de/jplag/normalization/Statement.java +++ b/core/src/main/java/de/jplag/normalization/Statement.java @@ -7,6 +7,9 @@ import de.jplag.Token; import de.jplag.semantics.CodeSemantics; +/** + * Models statements, which are the nodes of the normalization graph. + */ class Statement implements Comparable { private final List tokens; @@ -51,12 +54,12 @@ public int compareTo(Statement other) { } @Override - public boolean equals(Object object) { - if (this == object) + public boolean equals(Object obj) { + if (this == obj) return true; - if (object == null || getClass() != object.getClass()) + if (obj == null || getClass() != obj.getClass()) return false; - return tokens.equals(((Statement) object).tokens); + return tokens.equals(((Statement) obj).tokens); } @Override diff --git a/core/src/main/java/de/jplag/normalization/StatementBuilder.java b/core/src/main/java/de/jplag/normalization/StatementBuilder.java index 5b7099f41..1afaa3eb3 100644 --- a/core/src/main/java/de/jplag/normalization/StatementBuilder.java +++ b/core/src/main/java/de/jplag/normalization/StatementBuilder.java @@ -5,6 +5,9 @@ import de.jplag.Token; +/** + * Builds statements, which are the nodes of the normalization graph. + */ class StatementBuilder { private List tokens; From 585a96860d10ecfd1901d51b306f1a6339cdc970 Mon Sep 17 00:00:00 2001 From: smjonas Date: Wed, 19 Apr 2023 12:20:41 +0200 Subject: [PATCH 071/132] Add more Javadoc --- .../HandcraftedScxmlTokenGenerator.java | 10 ++- .../parser/SimpleScxmlTokenGenerator.java | 3 +- .../model/executable_content/Action.java | 15 +++++ .../model/executable_content/Cancel.java | 4 ++ .../executable_content/ExecutableContent.java | 23 ++++++- .../model/executable_content/ForEach.java | 6 ++ .../parser/model/executable_content/If.java | 23 ++++++- .../de/jplag/scxml/parser/util/NodeUtil.java | 9 ++- .../jplag/scxml/sorting/SortingStrategy.java | 1 + .../scxml/util/AbstractScxmlVisitor.java | 62 ++++++++++++++++++- 10 files changed, 144 insertions(+), 12 deletions(-) diff --git a/languages/scxml/src/main/java/de/jplag/scxml/parser/HandcraftedScxmlTokenGenerator.java b/languages/scxml/src/main/java/de/jplag/scxml/parser/HandcraftedScxmlTokenGenerator.java index 8460a045e..0f0b0b197 100644 --- a/languages/scxml/src/main/java/de/jplag/scxml/parser/HandcraftedScxmlTokenGenerator.java +++ b/languages/scxml/src/main/java/de/jplag/scxml/parser/HandcraftedScxmlTokenGenerator.java @@ -14,13 +14,19 @@ public class HandcraftedScxmlTokenGenerator extends SimpleScxmlTokenGenerator { /** - * Creates the visitor. - * @param adapter is the parser adapter which receives the generated tokens. + * Creates the token generator. + * @param adapter the parser adapter which receives the generated tokens */ public HandcraftedScxmlTokenGenerator(ScxmlParserAdapter adapter) { super(adapter); } + /** + * Visits a state and extracts tokens based on whether its {@code initial} + * and {@code parallel} attributes are set to {@code true}. + * + * @param state the state to visit + */ protected void visitStateAttributes(State state) { if (state.initial()) { adapter.addToken(INITIAL_STATE, state); diff --git a/languages/scxml/src/main/java/de/jplag/scxml/parser/SimpleScxmlTokenGenerator.java b/languages/scxml/src/main/java/de/jplag/scxml/parser/SimpleScxmlTokenGenerator.java index 30b921600..d8c8fb78e 100644 --- a/languages/scxml/src/main/java/de/jplag/scxml/parser/SimpleScxmlTokenGenerator.java +++ b/languages/scxml/src/main/java/de/jplag/scxml/parser/SimpleScxmlTokenGenerator.java @@ -20,7 +20,7 @@ public class SimpleScxmlTokenGenerator extends AbstractScxmlVisitor { /** - * Creates the visitor. + * Creates the token generator. * @param adapter the parser adapter which receives the generated tokens */ public SimpleScxmlTokenGenerator(ScxmlParserAdapter adapter) { @@ -135,6 +135,7 @@ public void visitExecutableContent(ExecutableContent content) { return; } + // TODO: use entries Map, ScxmlTokenType> tokenTypeMap = Map.of(Send.class, SEND, Cancel.class, CANCEL); ScxmlTokenType type = tokenTypeMap.get(content.getClass()); adapter.addToken(type, content); diff --git a/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/Action.java b/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/Action.java index d0195e0a0..9e058afda 100644 --- a/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/Action.java +++ b/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/Action.java @@ -3,6 +3,12 @@ import java.util.List; import java.util.Objects; +/** + * Represents and SCXML elements which contain + * executable content to be executed when a state is entered / exited. + * @param type the type of the action ({@link Type#ON_ENTRY} or {@link Type#ON_EXIT}) + * @param contents the list of executable contents within the action + */ public record Action(Type type, List contents) implements ExecutableContent { @Override @@ -15,8 +21,17 @@ public String toString() { return String.format("Action (type=%s) {", type == Type.ON_ENTRY ? "OnEntry" : "OnExit"); } + /** + * The type of the action. + */ public enum Type { + /** + * Represents an SCXML element. + */ ON_ENTRY, + /** + * Represents an SCXML element. + */ ON_EXIT, } } diff --git a/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/Cancel.java b/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/Cancel.java index b6852236c..2f12923cf 100644 --- a/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/Cancel.java +++ b/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/Cancel.java @@ -1,5 +1,9 @@ package de.jplag.scxml.parser.model.executable_content; +/** + * Represents a SCXML element. + * @param sendid represents the sendid attribute of the SCXML element which is the ID of the event to be cancelled + */ public record Cancel(String sendid) implements ExecutableContent { @Override diff --git a/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/ExecutableContent.java b/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/ExecutableContent.java index 07c027a3f..d0ab01328 100644 --- a/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/ExecutableContent.java +++ b/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/ExecutableContent.java @@ -9,17 +9,36 @@ import de.jplag.scxml.parser.model.StatechartElement; import de.jplag.scxml.parser.util.NodeUtil; +/** + * Represents executable content in an SCXML statechart, which are elements that can be executed + * during state transitions, state entry, state exit or in conditional statements. + */ public interface ExecutableContent extends StatechartElement { /** - * Defines the set of allowed XML element names that are considered valid executable content. and - * elements are not allowed as they may only present as children of an element. + * Defines the set of allowed XML element names that are considered valid executable content. {@literal } and + * {@literal } elements are not allowed as they may only present as children of an {@literal } element. */ Set ALLOWED_XML_ELEMENTS = Set.of("raise", "if", "foreach", "log", "assign", "script", "send", "cancel"); + /** + * String constant for the element. + */ String ELSE_ELEMENT = "else"; + + /** + * String constant for the "event" attribute. + */ String EVENT_ATTRIBUTE = "event"; + + /** + * String constant for the "sendid" attribute. + */ String SEND_ID_ATTRIBUTE = "sendid"; + + /** + * String constant for the "delay" attribute. + */ String DELAY_ATTRIBUTE = "delay"; /** diff --git a/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/ForEach.java b/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/ForEach.java index 9db18cc2c..70be409fa 100644 --- a/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/ForEach.java +++ b/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/ForEach.java @@ -2,6 +2,12 @@ import java.util.List; +/** + * Represents a {@code } SCXML element, which is executable content + * that executes its contents for each item in a given data set. + * + * @param contents the list of executable contents within the {@code } element + */ public record ForEach(List contents) implements ExecutableContent { @Override diff --git a/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/If.java b/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/If.java index 2e7817b3b..1ca55135d 100644 --- a/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/If.java +++ b/languages/scxml/src/main/java/de/jplag/scxml/parser/model/executable_content/If.java @@ -9,6 +9,16 @@ import de.jplag.scxml.parser.util.NodeUtil; +/** + * Represents an {@literal } SCXML element, which is an executable content element used for conditional execution. + * The {@literal } element can contain {@literal } and {@literal } branches for handling multiple conditions. + * + * @param cond the cond attribute of the {@literal } element which is the condition expression + * for the contained executable contents to be executed + * @param contents the list of executable contents to be executed when the condition is met + * @param elseIfs represents the list of {@literal } branches in the {@literal } element + * @param else_ the {@literal } branch corresponding to the {@literal } element, or {@code null} if not present + */ public record If(String cond, List contents, List elseIfs, Else else_) implements ExecutableContent { private static final Set ALLOWED_CONTENTS = Set.of("raise", "if", "foreach", "log", "assign", "script", "send", "cancel"); @@ -17,6 +27,15 @@ public record If(String cond, List contents, List els private static final String ELSEIF_ELEMENT = "elseif"; private static final String COND_ATTRIBUTE = "cond"; + + /** + * Constructs an If instance with the specified condition and a list of executable contents. + * The {@code elseIf} attribute is set to an empty list and the {@code else} is set to null. + * + * @param cond the cond attribute of the {@literal } element which is the condition expression + * for the contained executable contents to be executed + * @param contents the list of executable contents to be executed when the condition is met + */ public If(String cond, ExecutableContent... contents) { this(cond, new ArrayList<>(List.of(contents)), new ArrayList<>(), null); } @@ -32,8 +51,7 @@ private static void addBranch(String branch, List contents, L /** * Constructs an If statechart element from a given node with optional ElseIf or Else branches. The W3C SCXML * specification defines a valid {@literal } element as follows: - *

      - * + * *

            * {@code
            * 
      @@ -52,6 +70,7 @@ private static void addBranch(String branch, List contents, L
            * on the same level.
            * @param node the node to create the If object from. Must contain at least one {@literal } element and optionally
            * {@literal } or {@literal } tags.
      +     * @return an instance of If created from the node
            * @throws IllegalArgumentException when more than one {@literal } statement is present
            */
           public static If fromNode(Node node) throws IllegalArgumentException {
      diff --git a/languages/scxml/src/main/java/de/jplag/scxml/parser/util/NodeUtil.java b/languages/scxml/src/main/java/de/jplag/scxml/parser/util/NodeUtil.java
      index 3a530cac2..003d71304 100644
      --- a/languages/scxml/src/main/java/de/jplag/scxml/parser/util/NodeUtil.java
      +++ b/languages/scxml/src/main/java/de/jplag/scxml/parser/util/NodeUtil.java
      @@ -76,8 +76,13 @@ public static List getNodesRecursive(Node root, String childName) {
           }
       
           /**
      -     * @return the value of the attribute specified by name of the given node or null if the node does not contain an
      -     * attribute with this name
      +     * Retrieves the value of an attribute with the specified name from the given node. If the attribute is not present,
      +     * {@code null} is returned.
      +     *
      +     * @param node the node containing the attribute
      +     * @param name the name of the attribute to retrieve
      +     * @return the value of the attribute, or {@code null} if the node does not contain an
      +     * attribute with the given name
            */
           public static String getAttribute(Node node, String name) {
               Node attribute = node.getAttributes().getNamedItem(name);
      diff --git a/languages/scxml/src/main/java/de/jplag/scxml/sorting/SortingStrategy.java b/languages/scxml/src/main/java/de/jplag/scxml/sorting/SortingStrategy.java
      index ea0f66491..095ecffcb 100644
      --- a/languages/scxml/src/main/java/de/jplag/scxml/sorting/SortingStrategy.java
      +++ b/languages/scxml/src/main/java/de/jplag/scxml/sorting/SortingStrategy.java
      @@ -13,5 +13,6 @@ public interface SortingStrategy {
            * Sorts a list of statechart elements.
            * @param statechartElements the list of statechart elements to sort
            */
      +    // TODO: try without type parameter
            List sort(List statechartElements);
       }
      diff --git a/languages/scxml/src/main/java/de/jplag/scxml/util/AbstractScxmlVisitor.java b/languages/scxml/src/main/java/de/jplag/scxml/util/AbstractScxmlVisitor.java
      index 7f0a5eed1..31ba4c5bb 100644
      --- a/languages/scxml/src/main/java/de/jplag/scxml/util/AbstractScxmlVisitor.java
      +++ b/languages/scxml/src/main/java/de/jplag/scxml/util/AbstractScxmlVisitor.java
      @@ -19,8 +19,19 @@
        */
       public abstract class AbstractScxmlVisitor {
       
      +    /**
      +     * The current parser adapter that is called to add new tokens.
      +     */
           protected ScxmlParserAdapter adapter;
      +
      +    /**
      +     * The sorting strategy to use for visiting nested statechart elements.
      +     */
           protected SortingStrategy sorter;
      +
      +    /**
      +     * The current depth in the statechart.
      +     */
           protected int depth;
       
           public AbstractScxmlVisitor(ScxmlParserAdapter adapter) {
      @@ -40,6 +51,7 @@ public void setSorter(SortingStrategy sorter) {
            * Visits a statechart element without effecting the main token stream by temporarily swapping out the current parser
            * adapter. Returns a list of collected token type ordinals.
            * @param element the statechart element to visit
      +     * @return a list of visited token type ordinals
            */
           public List peekTokens(StatechartElement element) {
               ScxmlParserAdapter prevAdapter = this.adapter;
      @@ -76,22 +88,66 @@ public final void visit(StatechartElement element) throws IllegalArgumentExcepti
               visitorMap.get(element.getClass()).accept(element);
           }
       
      +    /**
      +     * Recursively visits a statechart.
      +     *
      +     * @param statechart the statechart to visit
      +     */
           protected abstract void visitStatechart(Statechart statechart);
       
      +    /**
      +     * Recursively visits a state.
      +     *
      +     * @param state the state to visit
      +     */
           protected abstract void visitState(State state);
       
      +    /**
      +     * Recursively visits a transition.
      +     *
      +     * @param transition the transition to visit
      +     */
      +    protected abstract void visitTransition(Transition transition);
      +
      +    /**
      +     * Recursively visits a list of actions.
      +     *
      +     * @param actions the list of actions to visit
      +     */
           protected abstract void visitActions(List actions);
       
      +    /**
      +     * Recursively visits an if statechart element.
      +     *
      +     * @param if_ the if element to visit
      +     */
           protected abstract void visitIf(If if_);
       
      +    /**
      +     * Recursively visits an elseIf statechart element.
      +     *
      +     * @param elseIf the elseIf element to visit
      +     */
           protected abstract void visitElseIf(ElseIf elseIf);
       
      +    /**
      +     * Recursively visits an else statechart element.
      +     *
      +     * @param else_ the else element to visit
      +     */
           protected abstract void visitElse(Else else_);
       
      +    /**
      +     * Recursively visits executable content.
      +     *
      +     * @param content the executable content to visit
      +     */
           protected abstract void visitExecutableContent(ExecutableContent content);
       
      +    /**
      +     * Visits simple executable content.
      +     *
      +     * @param content the simple executable content to visit
      +     */
           protected abstract void visitSimpleExecutableContent(SimpleExecutableContent content);
      -
      -    protected abstract void visitTransition(Transition transition);
      -
       }
      
      From e75819b07e89c5d7d6f69361a9eb38b4830c1f19 Mon Sep 17 00:00:00 2001
      From: =?UTF-8?q?Moritz=20Br=C3=B6del?= 
      Date: Fri, 21 Apr 2023 16:31:15 +0200
      Subject: [PATCH 072/132] Apply spotless
      
      ---
       core/src/main/java/de/jplag/Submission.java                 | 3 ++-
       core/src/main/java/de/jplag/normalization/MultipleEdge.java | 2 +-
       2 files changed, 3 insertions(+), 2 deletions(-)
      
      diff --git a/core/src/main/java/de/jplag/Submission.java b/core/src/main/java/de/jplag/Submission.java
      index 4d0fe2363..0684d371c 100644
      --- a/core/src/main/java/de/jplag/Submission.java
      +++ b/core/src/main/java/de/jplag/Submission.java
      @@ -271,7 +271,8 @@ private static File createErrorDirectory(String... subdirectoryNames) {
           }
       
           /**
      -     * Perform token string normalization, which makes the token string invariant to dead code insertion and independent statement reordering.
      +     * Perform token string normalization, which makes the token string invariant to dead code insertion and independent
      +     * statement reordering.
            */
           void normalize() {
               List originalOrder = getOrder(tokenList);
      diff --git a/core/src/main/java/de/jplag/normalization/MultipleEdge.java b/core/src/main/java/de/jplag/normalization/MultipleEdge.java
      index 4f7b16c3e..c088fa242 100644
      --- a/core/src/main/java/de/jplag/normalization/MultipleEdge.java
      +++ b/core/src/main/java/de/jplag/normalization/MultipleEdge.java
      @@ -27,7 +27,7 @@ boolean isVariableReverseFlow() {
           }
       
           void addEdge(EdgeType type, Variable cause) {
      -        switch(type) {
      +        switch (type) {
                   case VARIABLE_FLOW -> isVariableFlow = true;
                   case VARIABLE_REVERSE_FLOW -> isVariableReverseFlow = true;
               }
      
      From f3f061e94d7baad5032f2812bcdf3e12c0cd48d7 Mon Sep 17 00:00:00 2001
      From: =?UTF-8?q?Moritz=20Br=C3=B6del?= 
      Date: Fri, 21 Apr 2023 17:05:52 +0200
      Subject: [PATCH 073/132] Remove switch again
      
      ---
       .../main/java/de/jplag/normalization/MultipleEdge.java    | 8 ++++----
       1 file changed, 4 insertions(+), 4 deletions(-)
      
      diff --git a/core/src/main/java/de/jplag/normalization/MultipleEdge.java b/core/src/main/java/de/jplag/normalization/MultipleEdge.java
      index c088fa242..6265c5371 100644
      --- a/core/src/main/java/de/jplag/normalization/MultipleEdge.java
      +++ b/core/src/main/java/de/jplag/normalization/MultipleEdge.java
      @@ -27,10 +27,10 @@ boolean isVariableReverseFlow() {
           }
       
           void addEdge(EdgeType type, Variable cause) {
      -        switch (type) {
      -            case VARIABLE_FLOW -> isVariableFlow = true;
      -            case VARIABLE_REVERSE_FLOW -> isVariableReverseFlow = true;
      -        }
      +        if (type == EdgeType.VARIABLE_FLOW)
      +            isVariableFlow = true;
      +        if (type == EdgeType.VARIABLE_REVERSE_FLOW)
      +            isVariableReverseFlow = true;
               edges.add(new Edge(type, cause));
           }
       }
      
      From 97e79ade11dbfa24953794cd80cb7e07c68ff1b7 Mon Sep 17 00:00:00 2001
      From: Alexander Vogt 
      Date: Sat, 22 Apr 2023 10:50:04 +0200
      Subject: [PATCH 074/132] Documented View files
      
      ---
       report-viewer/src/views/ComparisonView.vue | 41 ++++++++++++------
       report-viewer/src/views/FileUploadView.vue | 37 ++++++++++++----
       report-viewer/src/views/OverviewView.vue   | 49 +++++++++++-----------
       3 files changed, 83 insertions(+), 44 deletions(-)
      
      diff --git a/report-viewer/src/views/ComparisonView.vue b/report-viewer/src/views/ComparisonView.vue
      index af44687c4..ae746c197 100644
      --- a/report-viewer/src/views/ComparisonView.vue
      +++ b/report-viewer/src/views/ComparisonView.vue
      @@ -3,6 +3,7 @@
       -->