Skip to content

Commit

Permalink
fix: remove duplicate implementations of canonical[Var]Name functions (
Browse files Browse the repository at this point in the history
…#580)

Fixes #578
  • Loading branch information
chrispcampbell authored Dec 7, 2024
1 parent c04e0ca commit e215d7d
Show file tree
Hide file tree
Showing 24 changed files with 275 additions and 280 deletions.
22 changes: 22 additions & 0 deletions packages/build/docs/classes/BuildContext.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,3 +107,25 @@ Spawn a child process that runs the given command.
`Promise`<`ProcessOutput`\>

The output of the process.

___

### canonicalVarId

**canonicalVarId**(`name`): `string`

Format a (subscripted or non-subscripted) model variable name into a canonical
identifier (with special characters converted to underscore, and subscript/dimension
parts separated by commas).

#### Parameters

| Name | Type | Description |
| :------ | :------ | :------ |
| `name` | `string` | The name of the variable in the source model, e.g., `Variable name[DimA, B2]`. |

#### Returns

`string`

The canonical identifier for the given name, e.g., `_variable_name[_dima,_b2]`.
1 change: 1 addition & 0 deletions packages/build/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
"ci:build": "run-s clean lint prettier:check type-check build test:ci docs"
},
"dependencies": {
"@sdeverywhere/parse": "^0.1.1",
"chokidar": "^3.5.3",
"cross-spawn": "^7.0.3",
"folder-hash": "^4.0.2",
Expand Down
13 changes: 13 additions & 0 deletions packages/build/src/context/context.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// Copyright (c) 2022 Climate Interactive / New Venture Fund

import { canonicalVarId } from '@sdeverywhere/parse'
import type { LogLevel } from '../_shared/log'
import { log } from '../_shared/log'

Expand Down Expand Up @@ -84,4 +85,16 @@ export class BuildContext {
spawnChild(cwd: string, command: string, args: string[], opts?: ProcessOptions): Promise<ProcessOutput> {
return spawnChild(cwd, command, args, this.abortSignal, opts)
}

/**
* Format a (subscripted or non-subscripted) model variable name into a canonical
* identifier (with special characters converted to underscore, and subscript/dimension
* parts separated by commas).
*
* @param name The name of the variable in the source model, e.g., `Variable name[DimA, B2]`.
* @returns The canonical identifier for the given name, e.g., `_variable_name[_dima,_b2]`.
*/
canonicalVarId(name: string): string {
return canonicalVarId(name)
}
}
1 change: 0 additions & 1 deletion packages/compile/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
"csv-parse": "^5.3.3",
"js-yaml": "^3.13.1",
"ramda": "^0.27.0",
"split-string": "^6.0.0",
"xlsx": "https://cdn.sheetjs.com/xlsx-0.20.2/xlsx-0.20.2.tgz"
},
"author": "Climate Interactive",
Expand Down
47 changes: 8 additions & 39 deletions packages/compile/src/_shared/helpers.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@ import util from 'util'
import B from 'bufx'
import { parse as parseCsv } from 'csv-parse/sync'
import * as R from 'ramda'
import split from 'split-string'
import XLSX from 'xlsx'

import { canonicalId, canonicalVarId } from '@sdeverywhere/parse'

// Set true to print a stack trace in vlog
export const PRINT_VLOG_TRACE = false

Expand Down Expand Up @@ -40,25 +41,11 @@ export function resetHelperState() {
}

export let canonicalName = name => {
// Format a model variable name into a valid C identifier.
return (
'_' +
name
.trim()
.replace(/"/g, '_')
.replace(/\s+!$/g, '!')
.replace(/\s/g, '_')
.replace(/,/g, '_')
.replace(/-/g, '_')
.replace(/\./g, '_')
.replace(/\$/g, '_')
.replace(/'/g, '_')
.replace(/&/g, '_')
.replace(/%/g, '_')
.replace(/\//g, '_')
.replace(/\|/g, '_')
.toLowerCase()
)
// Format a model variable or subscript/dimension name into a valid C identifier.
// In the case where you have a full variable name that includes subscripts/dimensions
// (e.g., 'Variable name[DimA,B2]'), use `canonicalVensimName` to convert the
// base variable name and subscript/dimension parts to canonical form indepdendently.
return canonicalId(name)
}
export let decanonicalize = name => {
// Decanonicalize the var name.
Expand All @@ -71,9 +58,6 @@ export let decanonicalize = name => {
}
return name
}
export let cFunctionName = name => {
return canonicalName(name).toUpperCase()
}
export let isSeparatedVar = v => {
return v.separationDims.length > 0
}
Expand Down Expand Up @@ -244,22 +228,7 @@ export let readCsv = (pathname, delimiter = ',') => {
}
// Convert the var name and subscript names to canonical form separately.
export let canonicalVensimName = vname => {
let result = vname
let m = vname.match(/([^[]+)(?:\[([^\]]+)\])?/)
if (m) {
result = canonicalName(m[1])
if (m[2]) {
let subscripts = m[2].split(',').map(x => canonicalName(x))
result += `[${subscripts.join(',')}]`
}
}
return result
}
// Split a model string into an array of equations without the "|" terminator.
// Allow "|" to occur in quoted variable names across line breaks.
// Retain the backslash character.
export let splitEquations = mdl => {
return split(mdl, { separator: '|', quotes: ['"'], keep: () => true })
return canonicalVarId(vname)
}
// Function to map over lists's value and index
export let mapIndexed = R.addIndex(R.map)
Expand Down
79 changes: 79 additions & 0 deletions packages/parse/src/_shared/canonical-id.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
// Copyright (c) 2023 Climate Interactive / New Venture Fund

// Detect '!' at the end of a marked dimension when preceded by whitespace
const reTrailingMark = new RegExp('\\s+!$', 'g')

// Detect one or more consecutive whitespace or underscore characters
const reWhitespace = new RegExp('(\\s|_)+', 'g')

// Detect special punctuation characters
// TODO: We do not currently include '!' characters in this set; we should only replace these
// when they don't appear at the end of a (marked) dimension
const reSpecialChars = new RegExp(`['"\\.,\\-\\$&%\\/\\|]`, 'g')

/**
* Format a model variable or subscript/dimension name into a valid C identifier (with
* special characters converted to underscore).
*
* Note that this should only be called with an individual variable base name (e.g.,
* 'Variable name') or a subscript/dimension name (e.g., 'DimA'). In the case where
* you have a full variable name that includes subscripts/dimensions (e.g.,
* 'Variable name[DimA,B2]'), use `canonicalVarId` to convert the base variable name
* and subscript/dimension parts to canonical form indepdendently.
*
* @param {string} name The name of the variable in the source model, e.g., "Variable name".
* @returns {string} The C identifier for the given name, e.g., "_variable_name".
*/
export function canonicalId(name) {
return (
'_' +
name
// Ignore any leading or trailing whitespace
.trim()
// When a '!' character appears at the end of a marked dimension, preserve the mark
// but remove any preceding whitespace
.replace(reTrailingMark, '!')
// Replace one or more consecutive whitespace or underscore characters with a single
// underscore character; this matches the behavior of Vensim documented here:
// https://www.vensim.com/documentation/ref_variable_names.html
.replace(reWhitespace, '_')
// Replace each special punctuation character with an underscore
.replace(reSpecialChars, '_')
// Convert to lower case
.toLowerCase()
)
}

/**
* Format a (subscripted or non-subscripted) model variable name into a canonical identifier,
* (with special characters converted to underscore, and subscript/dimension parts separated
* by commas).
*
* @param {string} name The name of the variable in the source model, e.g., "Variable name[DimA, B2]".
* @returns {string} The canonical identifier for the given name, e.g., "_variable_name[_dima,_b2]".
*/
export function canonicalVarId(name) {
const m = name.match(/([^[]+)(?:\[([^\]]+)\])?/)
if (!m) {
throw new Error(`Invalid variable name: ${name}`)
}

let id = canonicalId(m[1])
if (m[2]) {
const subscripts = m[2].split(',').map(x => canonicalId(x))
id += `[${subscripts.join(',')}]`
}

return id
}

/**
* Format a model function name into a valid C identifier (with special characters
* converted to underscore, and the ID converted to uppercase).
*
* @param {string} name The name of the variable in the source model, e.g., "FUNCTION name".
* @returns {string} The C identifier for the given name, e.g., "_FUNCTION_NAME".
*/
export function canonicalFunctionId(name) {
return canonicalId(name).toUpperCase()
}
85 changes: 85 additions & 0 deletions packages/parse/src/_shared/canonical-id.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
// Copyright (c) 2024 Climate Interactive / New Venture Fund

import { describe, expect, it } from 'vitest'

import { canonicalFunctionId, canonicalId, canonicalVarId } from './canonical-id'

describe('canonicalId', () => {
it('should collapse multiple consecutive whitespace or underscore characters to a single underscore', () => {
// The following examples are taken from the Vensim documentation under "Rules for Variable Names":
// https://www.vensim.com/documentation/ref_variable_names.html
expect(canonicalId('Hello There')).toBe('_hello_there')
expect(canonicalId('Hello_There')).toBe('_hello_there')
expect(canonicalId('Hello __ ___ There')).toBe('_hello_there')
})

it('should replace each special character with a single underscore', () => {
let input = '"Special'
let expected = '__special'
function add(name: string, char: string) {
input += ` ${name}${char}`
expected += `_${name}_`
}
add('period', '.')
add('comma', ',')
add('dash', '-')
add('dollar', '$')
add('amp', '&')
add('pct', '%')
add('slash', '/')
// TODO: Handle backslashes
// add('bslash', '\\')
// TODO: Handle parentheses
// add('lparen', '(')
// add('rparen', ')')
input += ' characters"'
expected += '_characters_'
expect(canonicalId(input)).toBe(expected)

// The following examples are taken from the Vensim documentation under "Rules for Variable Names":
// https://www.vensim.com/documentation/ref_variable_names.html
expect(canonicalId('"HiRes TV/Web Sets"')).toBe('__hires_tv_web_sets_')
// TODO: Handle backslashes
// expect(canonicalId('"The \\"Final\\" Frontier"')).toBe('')
expect(canonicalId("érosion d'action")).toBe('_érosion_d_action')
})

it('should preserve mark when preceded by whitespace', () => {
expect(canonicalVarId(`DimA !`)).toBe('_dima!')
})

it('should preserve mark when split over multiple lines', () => {
const name = `DimA
!
`
expect(canonicalVarId(name)).toBe('_dima!')
})
})

describe('canonicalVarId', () => {
it('should work for non-subscripted variable', () => {
expect(canonicalVarId('Hello There')).toBe('_hello_there')
})

it('should work for variable with 1 subscript', () => {
expect(canonicalVarId('Variable name[A1]')).toBe('_variable_name[_a1]')
})

it('should work for variable with 2 subscripts', () => {
expect(canonicalVarId('Variable name[A1, DimB]')).toBe('_variable_name[_a1,_dimb]')
})

it('should work for variable with 3 subscripts', () => {
expect(canonicalVarId('Variable name[A1, DimB,C2]')).toBe('_variable_name[_a1,_dimb,_c2]')
})
})

describe('canonicalFunctionId', () => {
it('should work for uppercase function name', () => {
expect(canonicalFunctionId('FUNCTION NAME')).toBe('_FUNCTION_NAME')
})

it('should work for mixed case function name', () => {
expect(canonicalFunctionId('function name')).toBe('_FUNCTION_NAME')
})
})
42 changes: 0 additions & 42 deletions packages/parse/src/_shared/names.js

This file was deleted.

Loading

0 comments on commit e215d7d

Please sign in to comment.