From 51691e7a209cb01911756ff54037dd756a5d439e Mon Sep 17 00:00:00 2001
From: Todd Fincannon <todd@toddfincannon.com>
Date: Fri, 23 Jul 2021 15:58:35 -0700
Subject: [PATCH] feat: implement GET DIRECT SUBSCRIPT for CSV (#79)

Fixes #77

Co-authored-by: Chris Campbell <ccampbell@climateinteractive.org>
---
 models/directsubs/b_subs.csv          |  4 ++
 models/directsubs/c_subs.csv          |  2 +
 models/directsubs/directsubs.dat      | 30 ++++++++++
 models/directsubs/directsubs.mdl      | 61 ++++++++++++++++++++
 models/directsubs/directsubs_subs.txt | 63 ++++++++++++++++++++
 models/directsubs/directsubs_vars.txt | 82 +++++++++++++++++++++++++++
 package-lock.json                     |  4 +-
 package.json                          |  2 +-
 src/CodeGen.js                        |  4 +-
 src/Helpers.js                        | 58 +++++++++++++------
 src/Model.js                          |  8 +--
 src/SubscriptRangeReader.js           | 50 +++++++++++++++-
 src/sde-generate.js                   |  2 +-
 13 files changed, 343 insertions(+), 27 deletions(-)
 create mode 100644 models/directsubs/b_subs.csv
 create mode 100644 models/directsubs/c_subs.csv
 create mode 100644 models/directsubs/directsubs.dat
 create mode 100644 models/directsubs/directsubs.mdl
 create mode 100644 models/directsubs/directsubs_subs.txt
 create mode 100644 models/directsubs/directsubs_vars.txt

diff --git a/models/directsubs/b_subs.csv b/models/directsubs/b_subs.csv
new file mode 100644
index 00000000..2516ad76
--- /dev/null
+++ b/models/directsubs/b_subs.csv
@@ -0,0 +1,4 @@
+DimB
+B1
+B2
+B3
\ No newline at end of file
diff --git a/models/directsubs/c_subs.csv b/models/directsubs/c_subs.csv
new file mode 100644
index 00000000..b5b62b93
--- /dev/null
+++ b/models/directsubs/c_subs.csv
@@ -0,0 +1,2 @@
+DimC,,
+C1,C2,C3
\ No newline at end of file
diff --git a/models/directsubs/directsubs.dat b/models/directsubs/directsubs.dat
new file mode 100644
index 00000000..72008991
--- /dev/null
+++ b/models/directsubs/directsubs.dat
@@ -0,0 +1,30 @@
+a[A1]
+0	10
+a[A2]
+0	20
+a[A3]
+0	30
+b[B1]
+0	1
+b[B2]
+0	2
+b[B3]
+0	3
+c[C1]
+0	11
+1	11
+c[C2]
+0	21
+1	21
+c[C3]
+0	31
+1	31
+FINAL TIME
+0	1
+INITIAL TIME
+0	0
+SAVEPER
+0	1
+1	1
+TIME STEP
+0	1
diff --git a/models/directsubs/directsubs.mdl b/models/directsubs/directsubs.mdl
new file mode 100644
index 00000000..9f51ee78
--- /dev/null
+++ b/models/directsubs/directsubs.mdl
@@ -0,0 +1,61 @@
+{UTF-8}
+DimA: A1, A2, A3 -> DimB, DimC ~~|
+DimB:
+	GET DIRECT SUBSCRIPT(
+	   'b_subs.csv',
+	   ',',
+	   'A2',
+	   'A',
+	   ''
+	)
+  ~~|
+DimC:
+	GET DIRECT SUBSCRIPT(
+	   'c_subs.csv',
+	   ',',
+	   'A2',
+	   '2',
+	   ''
+	)
+  ~~|
+a[DimA] = 10, 20, 30
+  ~~|
+b[DimB] = 1, 2, 3
+  ~~~:SUPPLEMENTARY|
+c[DimC] = a[DimA] + 1
+  ~~~:SUPPLEMENTARY|
+
+********************************************************
+	.Control
+********************************************************~
+		Simulation Control Parameters
+	|
+
+FINAL TIME = 1 ~~|
+INITIAL TIME = 0 ~~|
+SAVEPER = TIME STEP ~~|
+TIME STEP = 1 ~~|
+
+\\\---/// Sketch information - do not modify anything except names
+V300  Do not put anything below this section - it will be ignored
+*View 1
+$0-0-0,0,|0||0-0-0|0-0-0|0-0-0|0-0-0|0-0-0|0,0,100,0
+///---\\\
+:L<%^E!@
+9:Current
+15:0,0,0,0,0,0
+19:100,0
+27:2,
+34:0,
+5:FINAL TIME
+35:Date
+36:YYYY-MM-DD
+37:2000
+38:1
+39:1
+40:2
+41:0
+42:1
+24:0
+25:0
+26:0
diff --git a/models/directsubs/directsubs_subs.txt b/models/directsubs/directsubs_subs.txt
new file mode 100644
index 00000000..c420a758
--- /dev/null
+++ b/models/directsubs/directsubs_subs.txt
@@ -0,0 +1,63 @@
+_dima:
+{
+  modelName: 'DimA',
+  modelValue: [ 'A1', 'A2', 'A3' ],
+  modelMappings: [ { toDim: 'DimB', value: [] }, { toDim: 'DimC', value: [] } ],
+  name: '_dima',
+  value: [ '_a1', '_a2', '_a3' ],
+  size: 3,
+  family: '_dima',
+  mappings: { _dimb: [ '_a1', '_a2', '_a3' ], _dimc: [ '_a1', '_a2', '_a3' ] }
+}
+
+_dimb:
+{
+  modelName: 'DimB',
+  modelValue: [ 'B1', 'B2', 'B3' ],
+  modelMappings: [],
+  name: '_dimb',
+  value: [ '_b1', '_b2', '_b3' ],
+  size: 3,
+  family: '_dimb',
+  mappings: {}
+}
+
+_dimc:
+{
+  modelName: 'DimC',
+  modelValue: [ 'C1', 'C2', 'C3' ],
+  modelMappings: [],
+  name: '_dimc',
+  value: [ '_c1', '_c2', '_c3' ],
+  size: 3,
+  family: '_dimc',
+  mappings: {}
+}
+
+_a1:
+{ name: '_a1', value: 0, size: 1, family: '_dima', mappings: {} }
+
+_a2:
+{ name: '_a2', value: 1, size: 1, family: '_dima', mappings: {} }
+
+_a3:
+{ name: '_a3', value: 2, size: 1, family: '_dima', mappings: {} }
+
+_b1:
+{ name: '_b1', value: 0, size: 1, family: '_dimb', mappings: {} }
+
+_b2:
+{ name: '_b2', value: 1, size: 1, family: '_dimb', mappings: {} }
+
+_b3:
+{ name: '_b3', value: 2, size: 1, family: '_dimb', mappings: {} }
+
+_c1:
+{ name: '_c1', value: 0, size: 1, family: '_dimc', mappings: {} }
+
+_c2:
+{ name: '_c2', value: 1, size: 1, family: '_dimc', mappings: {} }
+
+_c3:
+{ name: '_c3', value: 2, size: 1, family: '_dimc', mappings: {} }
+
diff --git a/models/directsubs/directsubs_vars.txt b/models/directsubs/directsubs_vars.txt
new file mode 100644
index 00000000..cb77bbcf
--- /dev/null
+++ b/models/directsubs/directsubs_vars.txt
@@ -0,0 +1,82 @@
+a[DimA]: const (non-apply-to-all)
+= 10,20,30
+refId(_a[_a1])
+families(_dima)
+subscripts(_a1)
+separationDims(_dima)
+hasInitValue(false)
+
+a[DimA]: const (non-apply-to-all)
+= 10,20,30
+refId(_a[_a2])
+families(_dima)
+subscripts(_a2)
+separationDims(_dima)
+hasInitValue(false)
+
+a[DimA]: const (non-apply-to-all)
+= 10,20,30
+refId(_a[_a3])
+families(_dima)
+subscripts(_a3)
+separationDims(_dima)
+hasInitValue(false)
+
+b[DimB]: const (non-apply-to-all)
+= 1,2,3
+refId(_b[_b1])
+families(_dimb)
+subscripts(_b1)
+separationDims(_dimb)
+hasInitValue(false)
+
+b[DimB]: const (non-apply-to-all)
+= 1,2,3
+refId(_b[_b2])
+families(_dimb)
+subscripts(_b2)
+separationDims(_dimb)
+hasInitValue(false)
+
+b[DimB]: const (non-apply-to-all)
+= 1,2,3
+refId(_b[_b3])
+families(_dimb)
+subscripts(_b3)
+separationDims(_dimb)
+hasInitValue(false)
+
+c[DimC]: aux
+= a[DimA]+1
+refId(_c)
+families(_dimc)
+subscripts(_dimc)
+hasInitValue(false)
+refs(_a[_a1], _a[_a2], _a[_a3])
+
+FINAL TIME: const
+= 1
+refId(_final_time)
+hasInitValue(false)
+
+INITIAL TIME: const
+= 0
+refId(_initial_time)
+hasInitValue(false)
+
+SAVEPER: aux
+= TIME STEP
+refId(_saveper)
+hasInitValue(false)
+refs(_time_step)
+
+Time: const
+= 
+refId(_time)
+hasInitValue(false)
+
+TIME STEP: const
+= 1
+refId(_time_step)
+hasInitValue(false)
+
diff --git a/package-lock.json b/package-lock.json
index 849c74db..deb73c6b 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -42,8 +42,8 @@
       "integrity": "sha512-UjMSlenUORL+a+6g4RNZxRh5LcFWybRi2g0ASDBpgXBY6nlavg0BRVAVEQF0dz8jH6SyX3lV7uP5y/krJzc+Hw=="
     },
     "antlr4-vensim": {
-      "version": "git+https://github.com/climateinteractive/antlr4-vensim.git#a1d8e0b234e8424b31639aa5727c9bf5ae141387",
-      "from": "git+https://github.com/climateinteractive/antlr4-vensim.git#a1d8e0b",
+      "version": "git+https://github.com/climateinteractive/antlr4-vensim.git#d0087772d97c846015d3f323c6234e397884feae",
+      "from": "git+https://github.com/climateinteractive/antlr4-vensim.git#d008777",
       "requires": {
         "antlr4": "4.9.2"
       }
diff --git a/package.json b/package.json
index f9028ddc..5f7cb7be 100644
--- a/package.json
+++ b/package.json
@@ -19,7 +19,7 @@
   },
   "dependencies": {
     "antlr4": "4.9.2",
-    "antlr4-vensim": "https://github.com/climateinteractive/antlr4-vensim#a1d8e0b",
+    "antlr4-vensim": "https://github.com/climateinteractive/antlr4-vensim#d008777",
     "bufx": "^1.0.5",
     "byline": "^5.0.0",
     "chart.js": "^2.9.4",
diff --git a/src/CodeGen.js b/src/CodeGen.js
index 44351959..786b1fc4 100644
--- a/src/CodeGen.js
+++ b/src/CodeGen.js
@@ -6,7 +6,7 @@ import { sub, allDimensions, allMappings, subscriptFamilies } from './Subscript.
 import { asort, lines, strlist, abend, mapIndexed } from './Helpers.js'
 
 export let codeGenerator = (parseTree, opts) => {
-  const { spec, operation, extData, directData } = opts
+  const { spec, operation, extData, directData, modelDirname } = opts
   // Set to 'decl', 'init-lookups', 'eval', etc depending on the section being generated.
   let mode = ''
   // Set to true to output all variables when there is no model run spec.
@@ -25,7 +25,7 @@ export let codeGenerator = (parseTree, opts) => {
     // Read variables and subscript ranges from the model parse tree.
     // This is the main entry point for code generation and is called just once.
     try {
-      Model.read(parseTree, spec, extData, directData)
+      Model.read(parseTree, spec, extData, directData, modelDirname)
       // In list mode, print variables to the console instead of generating code.
       if (operation === 'printRefIdTest') {
         Model.printRefIdTest()
diff --git a/src/Helpers.js b/src/Helpers.js
index 8a023dab..ca229fdd 100644
--- a/src/Helpers.js
+++ b/src/Helpers.js
@@ -6,6 +6,7 @@ import sh from 'shelljs'
 import split from 'split-string'
 import byline from 'byline'
 import XLSX from 'xlsx'
+import parseCsv from 'csv-parse/lib/sync.js'
 import B from 'bufx'
 
 // Set true to print a stack trace in vlog
@@ -237,7 +238,7 @@ export let modelPathProps = model => {
   return {
     modelDirname: p.dir,
     modelName: p.name,
-    modelPathname: path.format(p),
+    modelPathname: path.format(p)
   }
 }
 export let execCmd = cmd => {
@@ -293,7 +294,9 @@ export let readDat = async (pathname, prefix = '') => {
         if (Number.isNaN(t)) {
           console.error(`DAT file ${pathname}:${lineNum} time value is NaN`)
         } else if (Number.isNaN(value)) {
-          console.error(`DAT file ${pathname}:${lineNum} var "${varName}" value is NaN at time=${t}`)
+          console.error(
+            `DAT file ${pathname}:${lineNum} var "${varName}" value is NaN at time=${t}`
+          )
         } else {
           varValues.set(t, value)
         }
@@ -310,6 +313,26 @@ export let readDat = async (pathname, prefix = '') => {
 export let readXlsx = pathname => {
   return XLSX.readFile(pathname, { cellDates: true })
 }
+export let readCsv = (pathname, delimiter = ',') => {
+  // Read the CSV file at the pathname and parse it with the given delimiter.
+  // Return an array of rows that are each an array of columns.
+  // If there is a header row, it is returned as the first row.
+  let result = null
+  const CSV_PARSE_OPTS = {
+    delimiter,
+    columns: false,
+    trim: true,
+    skip_empty_lines: true,
+    skip_lines_with_empty_values: true
+  }
+  try {
+    let data = B.read(pathname)
+    result = parseCsv(data, CSV_PARSE_OPTS)
+  } catch (error) {
+    console.error(`ERROR: CSV file ${pathname} not found`)
+  }
+  return result
+}
 // Convert the var name and subscript names to canonical form separately.
 export let canonicalVensimName = vname => {
   let result = vname
@@ -334,7 +357,9 @@ export let mapIndexed = R.addIndex(R.map)
 // Function to sort an array of strings
 export let asort = R.sort((a, b) => (a > b ? 1 : a < b ? -1 : 0))
 // Function to alpha sort an array of variables on the model LHS
-export let vsort = R.sort((a, b) => (a.modelLHS > b.modelLHS ? 1 : a.modelLHS < b.modelLHS ? -1 : 0))
+export let vsort = R.sort((a, b) =>
+  a.modelLHS > b.modelLHS ? 1 : a.modelLHS < b.modelLHS ? -1 : 0
+)
 // Function to list an array to stderr
 export let printArray = R.forEach(x => console.error(x))
 // Function to expand an array of strings into a comma-delimited list of strings
@@ -407,13 +432,14 @@ export let replaceDelimitedStrings = (str, open, close, newStr) => {
  * This can be used in place of nested for loops and has the benefit of working
  * for multi-dimensional inputs.
  */
- export const cartesianProductOf = arr => {
+export const cartesianProductOf = arr => {
   // Implementation based on: https://stackoverflow.com/a/36234242
-  return arr.reduce((a, b) => {
-    return a
-      .map(x => b.map(y => x.concat([y])))
-      .reduce((v, w) => v.concat(w), [])
-  }, [[]])
+  return arr.reduce(
+    (a, b) => {
+      return a.map(x => b.map(y => x.concat([y]))).reduce((v, w) => v.concat(w), [])
+    },
+    [[]]
+  )
 }
 
 /**
@@ -424,14 +450,14 @@ export let replaceDelimitedStrings = (str, open, close, newStr) => {
  * this function will return all the permutations, e.g.:
  *   [ [1,2,3], [1,3,2], [2,1,3], [2,3,1], [3,1,2], [3,2,1] ]
  */
- export const permutationsOf = (elems, subperms = [[]]) => {
+export const permutationsOf = (elems, subperms = [[]]) => {
   // Implementation based on: https://gist.github.com/CrossEye/f7c2f77f7db7a94af209
-  return R.isEmpty(elems) ?
-    subperms :
-    R.addIndex(R.chain)((elem, idx) => permutationsOf(
-      R.remove(idx, 1, elems),
-      R.map(R.append(elem), subperms)
-    ), elems)
+  return R.isEmpty(elems)
+    ? subperms
+    : R.addIndex(R.chain)(
+        (elem, idx) => permutationsOf(R.remove(idx, 1, elems), R.map(R.append(elem), subperms)),
+        elems
+      )
 }
 
 //
diff --git a/src/Model.js b/src/Model.js
index 057d2d8b..b516ba7c 100644
--- a/src/Model.js
+++ b/src/Model.js
@@ -34,12 +34,12 @@ const PRINT_INIT_GRAPH = false
 const PRINT_AUX_GRAPH = false
 const PRINT_LEVEL_GRAPH = false
 
-function read(parseTree, spec, extData, directData) {
+function read(parseTree, spec, extData, directData, modelDirname) {
   // Some arrays need to be separated into variables with individual indices to
   // prevent eval cycles. They are manually added to the spec file.
   let specialSeparationDims = spec.specialSeparationDims
   // Subscript ranges must be defined before reading variables that use them.
-  readSubscriptRanges(parseTree, spec.dimensionFamilies, spec.indexFamilies)
+  readSubscriptRanges(parseTree, spec.dimensionFamilies, spec.indexFamilies, modelDirname)
   // Read variables from the model parse tree.
   readVariables(parseTree, specialSeparationDims, directData)
   // Analyze model equations to fill in more details about variables.
@@ -49,9 +49,9 @@ function read(parseTree, spec, extData, directData) {
   // Remove variables that are not referenced by an input or output variable.
   removeUnusedVariables(spec)
 }
-function readSubscriptRanges(tree, dimensionFamilies, indexFamilies) {
+function readSubscriptRanges(tree, dimensionFamilies, indexFamilies, modelDirname) {
   // Read subscript ranges from the model.
-  let subscriptRangeReader = new SubscriptRangeReader()
+  let subscriptRangeReader = new SubscriptRangeReader(modelDirname)
   subscriptRangeReader.visitModel(tree)
   let allDims = allDimensions()
 
diff --git a/src/SubscriptRangeReader.js b/src/SubscriptRangeReader.js
index ebe7e3f4..086f387b 100644
--- a/src/SubscriptRangeReader.js
+++ b/src/SubscriptRangeReader.js
@@ -1,11 +1,20 @@
+import path from 'path';
 import { ModelParser } from 'antlr4-vensim'
 import R from 'ramda'
+import XLSX from 'xlsx'
 import ModelReader from './ModelReader.js'
 import { Subscript } from './Subscript.js'
+import { cFunctionName, matchRegex, readCsv } from './Helpers.js'
 
 export default class SubscriptRangeReader extends ModelReader {
-  constructor() {
+  constructor(modelDirname) {
     super()
+    // The model directory is required when reading data files for GET DIRECT SUBSCRIPT.
+    this.modelDirname = modelDirname
+    // Index names from a subscript list or GET DIRECT SUBSCRIPT
+    this.indNames = []
+    // Dimension mappings with model names
+    this.modelMappings = []
   }
   visitModel(ctx) {
     let subscriptRanges = ctx.subscriptRange()
@@ -62,4 +71,43 @@ export default class SubscriptRangeReader extends ModelReader {
       }
     }
   }
+  visitCall(ctx) {
+    // A subscript range can have a GET DIRECT SUBSCRIPT call on the RHS.
+    let fn = cFunctionName(ctx.Id().getText())
+    if (fn === '_GET_DIRECT_SUBSCRIPT') {
+      super.visitCall(ctx)
+    }
+  }
+  visitExprList(ctx) {
+    // We assume the only call that ends up here is GET DIRECT SUBSCRIPT.
+    let args = R.map(
+      arg => matchRegex(arg, /'(.*)'/),
+      R.map(expr => expr.getText(), ctx.expr())
+    )
+    let pathname = args[0]
+    let delimiter = args[1]
+    let firstCell = args[2]
+    let lastCell = args[3]
+    // let prefix = args[4]
+    // If lastCell is a column letter, scan the column, else scan the row.
+    let dataAddress = XLSX.utils.decode_cell(firstCell)
+    let col = dataAddress.c
+    let row = dataAddress.r
+    let nextCell
+    if (isNaN(parseInt(lastCell))) {
+      nextCell = () => row++
+    } else {
+      nextCell = () => col++
+    }
+    // Read subscript names from the CSV file at the given position.
+    let csvPathname = path.resolve(this.modelDirname, pathname)
+    let data = readCsv(csvPathname, delimiter)
+    let indexName = data[row][col]
+    while (indexName != null) {
+      this.indNames.push(indexName)
+      nextCell()
+      indexName = data[row] != null ? data[row][col] : null
+    }
+    super.visitExprList(ctx)
+  }
 }
diff --git a/src/sde-generate.js b/src/sde-generate.js
index be96aa73..78fa4406 100644
--- a/src/sde-generate.js
+++ b/src/sde-generate.js
@@ -136,7 +136,7 @@ export let generate = async (model, opts) => {
     operation = 'printRefIdTest'
   }
   let parseTree = parseModel(input)
-  let code = codeGenerator(parseTree, { spec, operation, extData, directData }).generate()
+  let code = codeGenerator(parseTree, { spec, operation, extData, directData, modelDirname }).generate()
   if (opts.genc || opts.genhtml) {
     let outputPathname = path.join(buildDirname, `${modelName}.c`)
     writeOutput(outputPathname, code)