diff --git a/vtl-prov/pom.xml b/vtl-prov/pom.xml
index 5e74fa512..82db9daba 100644
--- a/vtl-prov/pom.xml
+++ b/vtl-prov/pom.xml
@@ -34,6 +34,21 @@
vtl-parser
1.8.0-SNAPSHOT
+
+ fr.insee.trevas
+ vtl-engine
+ 1.8.0-SNAPSHOT
+
+
+ fr.insee.trevas
+ vtl-spark
+ 1.8.0-SNAPSHOT
+
+
+ fr.insee.trevas
+ vtl-model
+ 1.8.0-SNAPSHOT
+
@@ -45,4 +60,17 @@
+
+
+
+ org.apache.maven.plugins
+ maven-surefire-plugin
+ 3.3.0
+
+ --add-exports java.base/sun.nio.ch=ALL-UNNAMED
+
+
+
+
+
\ No newline at end of file
diff --git a/vtl-prov/src/main/java/fr/insee/vtl/prov/ProvenanceListener.java b/vtl-prov/src/main/java/fr/insee/vtl/prov/ProvenanceListener.java
index c44fdc8e5..f4c42df53 100644
--- a/vtl-prov/src/main/java/fr/insee/vtl/prov/ProvenanceListener.java
+++ b/vtl-prov/src/main/java/fr/insee/vtl/prov/ProvenanceListener.java
@@ -1,5 +1,6 @@
package fr.insee.vtl.prov;
+import fr.insee.vtl.model.Dataset;
import fr.insee.vtl.parser.VtlBaseListener;
import fr.insee.vtl.parser.VtlLexer;
import fr.insee.vtl.parser.VtlParser;
@@ -7,11 +8,15 @@
import fr.insee.vtl.prov.prov.Program;
import fr.insee.vtl.prov.prov.ProgramStep;
import fr.insee.vtl.prov.prov.VariableInstance;
+import fr.insee.vtl.prov.utils.ProvenanceUtils;
import org.antlr.v4.runtime.*;
import org.antlr.v4.runtime.misc.Interval;
import org.antlr.v4.runtime.tree.ParseTreeWalker;
-import java.util.Set;
+import javax.script.ScriptEngine;
+import javax.script.ScriptException;
+import java.util.*;
+import java.util.concurrent.atomic.AtomicInteger;
/**
* ANTLR Listener that create provenance objects.
@@ -26,6 +31,22 @@ public class ProvenanceListener extends VtlBaseListener {
private String currentComponentID;
+ private String currentDataframeID;
+
+ private int stepIndex = 1;
+
+ private boolean rootAssignment = true;
+
+ // Map of label/UUID id
+ private final Map availableDataframeUUID = new HashMap<>();
+
+ private Map currentAvailableDataframeUUID = new HashMap<>();
+
+ // Map of label/UUID id
+ private final Map availableVariableUUID = new HashMap<>();
+
+ private Map currentAvailableVariableUUID = new HashMap<>();
+
public ProvenanceListener(String id, String programName) {
program.setId(id);
program.setLabel(programName);
@@ -44,11 +65,14 @@ public void enterStart(VtlParser.StartContext ctx) {
@Override
public void enterTemporaryAssignment(VtlParser.TemporaryAssignmentContext ctx) {
- String id = getText(ctx.varID());
+ String label = getText(ctx.varID());
String sourceCode = getText(ctx);
- currentProgramStep = id;
- ProgramStep programStep = new ProgramStep(id, id, sourceCode);
- DataframeInstance df = new DataframeInstance(id, id);
+ currentProgramStep = label;
+ ProgramStep programStep = new ProgramStep(label, sourceCode, stepIndex);
+ stepIndex++;
+ String dfId = UUID.randomUUID().toString();
+ currentAvailableDataframeUUID.put(label, dfId);
+ DataframeInstance df = new DataframeInstance(dfId, label);
programStep.setProducedDataframe(df);
program.getProgramSteps().add(programStep);
}
@@ -56,15 +80,21 @@ public void enterTemporaryAssignment(VtlParser.TemporaryAssignmentContext ctx) {
@Override
public void exitTemporaryAssignment(VtlParser.TemporaryAssignmentContext ctx) {
currentProgramStep = null;
+ availableDataframeUUID.putAll(currentAvailableDataframeUUID);
+ currentAvailableDataframeUUID = new HashMap<>();
+ rootAssignment = true;
}
@Override
public void enterPersistAssignment(VtlParser.PersistAssignmentContext ctx) {
- String id = getText(ctx.varID());
+ String label = getText(ctx.varID());
String sourceCode = getText(ctx);
- currentProgramStep = id;
- ProgramStep programStep = new ProgramStep(id, id, sourceCode);
- DataframeInstance df = new DataframeInstance(id, id);
+ currentProgramStep = label;
+ ProgramStep programStep = new ProgramStep(label, sourceCode, stepIndex);
+ stepIndex++;
+ String dfId = UUID.randomUUID().toString();
+ currentAvailableDataframeUUID.put(label, dfId);
+ DataframeInstance df = new DataframeInstance(dfId, label);
programStep.setProducedDataframe(df);
program.getProgramSteps().add(programStep);
}
@@ -72,23 +102,33 @@ public void enterPersistAssignment(VtlParser.PersistAssignmentContext ctx) {
@Override
public void exitPersistAssignment(VtlParser.PersistAssignmentContext ctx) {
currentProgramStep = null;
+ availableDataframeUUID.putAll(currentAvailableDataframeUUID);
+ currentAvailableDataframeUUID = new HashMap<>();
+ rootAssignment = true;
}
@Override
public void enterVarID(VtlParser.VarIDContext ctx) {
- String id = ctx.IDENTIFIER().getText();
- if (!id.equals(currentProgramStep)) {
- ProgramStep programStep = program.getProgramStepById(currentProgramStep);
+ String label = ctx.IDENTIFIER().getText();
+ if (!rootAssignment) {
+ ProgramStep programStep = program.getProgramStepByLabel(currentProgramStep);
if (!isInDatasetClause) {
Set consumedDataframe = programStep.getConsumedDataframe();
- DataframeInstance df = new DataframeInstance(id, id);
+ String dfId = ProvenanceUtils.getOrBuildUUID(availableDataframeUUID, label);
+ DataframeInstance df = new DataframeInstance(dfId, label);
consumedDataframe.add(df);
+ // Certainly don't need to reset? To check!
+ currentDataframeID = label;
}
if (isInDatasetClause && null != currentComponentID) {
Set usedVariables = programStep.getUsedVariables();
- VariableInstance v = new VariableInstance(id, id);
+ String varUUID = ProvenanceUtils.getOrBuildUUID(availableVariableUUID, currentDataframeID + "|" + label);
+ VariableInstance v = new VariableInstance(varUUID, label);
+ v.setParentDataframe(currentDataframeID);
usedVariables.add(v);
}
+ } else {
+ rootAssignment = false;
}
}
@@ -104,11 +144,18 @@ public void exitDatasetClause(VtlParser.DatasetClauseContext ctx) {
@Override
public void enterComponentID(VtlParser.ComponentIDContext ctx) {
- String id = ctx.getText();
- ProgramStep programStep = program.getProgramStepById(currentProgramStep);
+ String label = ctx.getText();
+ ProgramStep programStep = program.getProgramStepByLabel(currentProgramStep);
Set assignedVariables = programStep.getAssignedVariables();
- VariableInstance v = new VariableInstance(id, id);
+ String variableUUID = ProvenanceUtils.getOrBuildUUID(availableDataframeUUID, label);
+ VariableInstance v = new VariableInstance(variableUUID, label);
assignedVariables.add(v);
+ currentAvailableVariableUUID.put(currentDataframeID + "|" + label, variableUUID);
+ }
+
+ @Override
+ public void exitComponentID(VtlParser.ComponentIDContext ctx) {
+ System.out.println(ctx.IDENTIFIER());
}
@Override
@@ -119,6 +166,8 @@ public void enterCalcClauseItem(VtlParser.CalcClauseItemContext ctx) {
@Override
public void exitCalcClauseItem(VtlParser.CalcClauseItemContext ctx) {
currentComponentID = null;
+ availableVariableUUID.putAll(currentAvailableVariableUUID);
+ currentAvailableVariableUUID = new HashMap<>();
}
@Override
@@ -148,4 +197,63 @@ public static Program run(String expr, String id, String programName) {
return provenanceListener.getProgram();
}
+ public static Program runWithBindings(ScriptEngine engine, String expr, String id, String programName) {
+ Program program = run(expr, id, programName);
+ // 0 check if input dataset are empty?
+ // Keep already handled dataset
+ List dsHandled = new ArrayList<>();
+ // Split script to loop over program steps and run them
+ AtomicInteger index = new AtomicInteger(1);
+ Arrays.stream(expr.split(";"))
+ .map(e -> e + ";")
+ .forEach(stepScript -> {
+ int i = index.getAndIncrement();
+ // 1 - Handle input dataset
+ ProgramStep step = program.getProgramStepByIndex(i);
+ Set consumedDataframe = step.getConsumedDataframe();
+ consumedDataframe.forEach(d -> {
+ if (!dsHandled.contains(d.getLabel())) {
+ Dataset ds = (Dataset) engine.getContext().getAttribute(d.getLabel());
+ ds.getDataStructure().values().forEach(c -> {
+ VariableInstance variableInstance = step.getUsedVariables()
+ .stream()
+ .filter(v ->
+ v.getParentDataframe().equals(d.getLabel()) &&
+ v.getLabel().equals(c.getName()))
+ .findFirst()
+ .orElse(new VariableInstance(c.getName()));
+ variableInstance.setRole(c.getRole());
+ variableInstance.setType(c.getType());
+ d.getHasVariableInstances().add(variableInstance);
+ });
+ }
+ });
+ try {
+ engine.eval(stepScript);
+ } catch (ScriptException e) {
+ throw new RuntimeException(e);
+ }
+ // Improve built variables attributes
+ DataframeInstance producedDataframe = step.getProducedDataframe();
+ Dataset ds = (Dataset) engine.getContext().getAttribute(producedDataframe.getLabel());
+
+ ds.getDataStructure().values().forEach(c -> {
+ VariableInstance variableInstance = step.getAssignedVariables()
+ .stream()
+ .filter(v -> v.getLabel().equals(c.getName()))
+ .findFirst()
+ // TODO: refine variable detection in usedVariable
+ .orElse(new VariableInstance(c.getName()));
+ variableInstance.setRole(c.getRole());
+ variableInstance.setType(c.getType());
+ producedDataframe.getHasVariableInstances().add(variableInstance);
+ });
+ dsHandled.add((producedDataframe.getLabel()));
+ // Correct usedVariables ID checking ds/var of last assignment
+ });
+
+
+ return program;
+ }
+
}
diff --git a/vtl-prov/src/main/java/fr/insee/vtl/prov/prov/DataframeInstance.java b/vtl-prov/src/main/java/fr/insee/vtl/prov/prov/DataframeInstance.java
index 2860dc6e4..c25c4218b 100644
--- a/vtl-prov/src/main/java/fr/insee/vtl/prov/prov/DataframeInstance.java
+++ b/vtl-prov/src/main/java/fr/insee/vtl/prov/prov/DataframeInstance.java
@@ -1,9 +1,14 @@
package fr.insee.vtl.prov.prov;
+import java.util.HashSet;
+import java.util.Set;
+
public class DataframeInstance {
String id;
String label;
+ Set hasVariableInstances = new HashSet<>();
+
public DataframeInstance(String id, String label) {
this.id = id;
this.label = label;
@@ -24,4 +29,14 @@ public String getLabel() {
public void setLabel(String label) {
this.label = label;
}
+
+ public Set getHasVariableInstances() {
+ return hasVariableInstances;
+ }
+
+ public void setHasVariableInstances(Set hasVariableInstances) {
+ this.hasVariableInstances = hasVariableInstances;
+ }
+
+
}
diff --git a/vtl-prov/src/main/java/fr/insee/vtl/prov/prov/Program.java b/vtl-prov/src/main/java/fr/insee/vtl/prov/prov/Program.java
index ad11c7204..50a7571b5 100644
--- a/vtl-prov/src/main/java/fr/insee/vtl/prov/prov/Program.java
+++ b/vtl-prov/src/main/java/fr/insee/vtl/prov/prov/Program.java
@@ -3,12 +3,16 @@
import java.util.HashSet;
import java.util.Set;
+/* Filled thanks to listener, except for dataframInstances */
public class Program {
String id;
String label;
Set programSteps = new HashSet<>();
+ /* Provided running preview mode */
+ Set dataframeInstances = new HashSet<>();
+
String sourceCode;
public Program() {
@@ -43,6 +47,14 @@ public void setProgramSteps(Set programSteps) {
this.programSteps = programSteps;
}
+ public Set getDataframeInstances() {
+ return dataframeInstances;
+ }
+
+ public void setDataframeInstances(Set dataframeInstances) {
+ this.dataframeInstances = dataframeInstances;
+ }
+
public String getSourceCode() {
return sourceCode;
}
@@ -51,9 +63,16 @@ public void setSourceCode(String sourceCode) {
this.sourceCode = sourceCode;
}
- public ProgramStep getProgramStepById(String id) {
+ public ProgramStep getProgramStepByLabel(String label) {
+ return programSteps.stream()
+ .filter(p -> p.getLabel().equals(label))
+ .findFirst()
+ .orElse(null);
+ }
+
+ public ProgramStep getProgramStepByIndex(int index) {
return programSteps.stream()
- .filter(p -> p.getId().equals(id))
+ .filter(p -> p.getIndex() == index)
.findFirst()
.orElse(null);
}
diff --git a/vtl-prov/src/main/java/fr/insee/vtl/prov/prov/ProgramStep.java b/vtl-prov/src/main/java/fr/insee/vtl/prov/prov/ProgramStep.java
index 421999d72..8f2cad975 100644
--- a/vtl-prov/src/main/java/fr/insee/vtl/prov/prov/ProgramStep.java
+++ b/vtl-prov/src/main/java/fr/insee/vtl/prov/prov/ProgramStep.java
@@ -2,26 +2,24 @@
import java.util.HashSet;
import java.util.Set;
+import java.util.UUID;
public class ProgramStep {
String id;
String label;
String sourceCode;
- Set usedVariables = new HashSet<>();;
- Set assignedVariables = new HashSet<>();;
-
- Set consumedDataframe = new HashSet<>();;
+ int index;
+ Set usedVariables = new HashSet<>();
+ Set assignedVariables = new HashSet<>();
+ Set consumedDataframe = new HashSet<>();
DataframeInstance producedDataframe;
-
- public ProgramStep() {
- }
-
- public ProgramStep(String id, String label, String sourceCode) {
- this.id = id;
+ public ProgramStep(String label, String sourceCode, int index) {
+ this.id = UUID.randomUUID().toString();
this.label = label;
this.sourceCode = sourceCode;
+ this.index = index;
}
public String getId() {
@@ -44,6 +42,14 @@ public String getSourceCode() {
return sourceCode;
}
+ public int getIndex() {
+ return index;
+ }
+
+ public void setIndex(int index) {
+ this.index = index;
+ }
+
public void setSourceCode(String sourceCode) {
this.sourceCode = sourceCode;
}
diff --git a/vtl-prov/src/main/java/fr/insee/vtl/prov/prov/VariableInstance.java b/vtl-prov/src/main/java/fr/insee/vtl/prov/prov/VariableInstance.java
index b7cc00707..92e150f6d 100644
--- a/vtl-prov/src/main/java/fr/insee/vtl/prov/prov/VariableInstance.java
+++ b/vtl-prov/src/main/java/fr/insee/vtl/prov/prov/VariableInstance.java
@@ -1,8 +1,20 @@
package fr.insee.vtl.prov.prov;
+import fr.insee.vtl.model.Dataset;
+
+import java.util.UUID;
+
public class VariableInstance {
String id;
String label;
+ Dataset.Role role;
+ String parentDataframe;
+ Class> type;
+
+ public VariableInstance(String label) {
+ this.id = UUID.randomUUID().toString();
+ this.label = label;
+ }
public VariableInstance(String id, String label) {
this.id = id;
@@ -24,4 +36,29 @@ public String getLabel() {
public void setLabel(String label) {
this.label = label;
}
+
+ public Dataset.Role getRole() {
+ return role;
+ }
+
+ public void setRole(Dataset.Role role) {
+ this.role = role;
+ }
+
+ public Class> getType() {
+ return type;
+ }
+
+ public void setType(Class> type) {
+ this.type = type;
+ }
+
+
+ public String getParentDataframe() {
+ return parentDataframe;
+ }
+
+ public void setParentDataframe(String parentDataframe) {
+ this.parentDataframe = parentDataframe;
+ }
}
diff --git a/vtl-prov/src/main/java/fr/insee/vtl/prov/ProvenanceUtils.java b/vtl-prov/src/main/java/fr/insee/vtl/prov/utils/ProvenanceUtils.java
similarity index 68%
rename from vtl-prov/src/main/java/fr/insee/vtl/prov/ProvenanceUtils.java
rename to vtl-prov/src/main/java/fr/insee/vtl/prov/utils/ProvenanceUtils.java
index 80651142c..1a6b624bb 100644
--- a/vtl-prov/src/main/java/fr/insee/vtl/prov/ProvenanceUtils.java
+++ b/vtl-prov/src/main/java/fr/insee/vtl/prov/utils/ProvenanceUtils.java
@@ -1,17 +1,17 @@
-package fr.insee.vtl.prov;
+package fr.insee.vtl.prov.utils;
-import fr.insee.vtl.parser.VtlLexer;
-import fr.insee.vtl.parser.VtlParser;
-import fr.insee.vtl.prov.model.VTLDataset;
-import org.antlr.v4.runtime.CharStreams;
-import org.antlr.v4.runtime.CodePointCharStream;
-import org.antlr.v4.runtime.CommonTokenStream;
-import org.antlr.v4.runtime.tree.ParseTreeWalker;
-
-import java.util.*;
+import java.util.Map;
+import java.util.UUID;
public class ProvenanceUtils {
+ public static String getOrBuildUUID(Map availableUUID, String label) {
+ if (null != availableUUID.get(label)) {
+ return availableUUID.get(label);
+ }
+ return UUID.randomUUID().toString();
+ }
+
//public static List