diff --git a/count-substring.mjs b/count-substring.mjs new file mode 100644 index 0000000..3767260 --- /dev/null +++ b/count-substring.mjs @@ -0,0 +1,5 @@ +export default function countSubstring(string, substring) { + const pattern = new RegExp(substring, "g"); + const match = string.match(pattern); + return match ? match.length : 0; +} diff --git a/find-tag-by-name.mjs b/find-tag-by-name.mjs new file mode 100644 index 0000000..f79a400 --- /dev/null +++ b/find-tag-by-name.mjs @@ -0,0 +1,58 @@ +import indexOfMatch from "./index-of-match.mjs"; +import indexOfMatchEnd from "./index-of-match-end.mjs"; +import countSubstring from "./count-substring.mjs"; + +export default function findTagByName(xml, tagName, options) { + const debug = (options && options.debug) || false; + const nested = !(options && typeof options.nested === false); + + const startIndex = (options && options.startIndex) || 0; + + if (debug) console.log("[xml-utils] starting findTagByName with", tagName, " and ", options); + + const start = indexOfMatch(xml, `\<${tagName}[ \n\>\/]`, startIndex); + if (debug) console.log("[xml-utils] start:", start); + if (start === -1) return undefined; + + const afterStart = xml.slice(start + tagName.length); + + let relativeEnd = indexOfMatchEnd(afterStart, "^[^<]*[ /]>", 0); + + const selfClosing = relativeEnd !== -1 && afterStart[relativeEnd - 1] === "/"; + if (debug) console.log("[xml-utils] selfClosing:", selfClosing); + + if (selfClosing === false) { + // check if tag has subtags with the same name + if (nested) { + let startIndex = 0; + let openings = 1; + let closings = 0; + while ((relativeEnd = indexOfMatchEnd(afterStart, "[ /]" + tagName + ">", startIndex)) !== -1) { + const clip = afterStart.substring(startIndex, relativeEnd + 1); + openings += countSubstring(clip, "<" + tagName + "[ \n\t>]"); + closings += countSubstring(clip, ""); + // we can't have more openings than closings + if (closings >= openings) break; + startIndex = relativeEnd; + } + } else { + relativeEnd = indexOfMatchEnd(afterStart, "[ /]" + tagName + ">", 0); + } + } + + const end = start + tagName.length + relativeEnd + 1; + if (debug) console.log("[xml-utils] end:", end); + if (end === -1) return undefined; + + const outer = xml.slice(start, end); + // tag is like urn:ogc:def:crs:EPSG::32617 + + let inner; + if (selfClosing) { + inner = null; + } else { + inner = outer.slice(outer.indexOf(">") + 1, outer.lastIndexOf("<")); + } + + return { inner, outer, start, end }; +} diff --git a/find-tag-by-path.mjs b/find-tag-by-path.mjs new file mode 100644 index 0000000..5c63b3e --- /dev/null +++ b/find-tag-by-path.mjs @@ -0,0 +1,8 @@ +import findTagsByPath from "./find-tags-by-path.mjs"; + +export default function findTagByPath(xml, path, options) { + const debug = (options && options.debug) || false; + const found = findTagsByPath(xml, path, { debug, returnOnFirst: true }); + if (Array.isArray(found) && found.length === 1) return found[0]; + else return undefined; +} diff --git a/find-tags-by-name.mjs b/find-tags-by-name.mjs new file mode 100644 index 0000000..a86a2b7 --- /dev/null +++ b/find-tags-by-name.mjs @@ -0,0 +1,19 @@ +import findTagByName from "./find-tag-by-name.mjs"; + +export default function findTagsByName(xml, tagName, options) { + const tags = []; + const debug = (options && options.debug) || false; + const nested = options && typeof options.nested === "boolean" ? options.nested : true; + let startIndex = (options && options.startIndex) || 0; + let tag; + while ((tag = findTagByName(xml, tagName, { debug, startIndex }))) { + if (nested) { + startIndex = tag.start + 1 + tagName.length; + } else { + startIndex = tag.end; + } + tags.push(tag); + } + if (debug) console.log("findTagsByName found", tags.length, "tags"); + return tags; +} diff --git a/find-tags-by-path.mjs b/find-tags-by-path.mjs new file mode 100644 index 0000000..49e7379 --- /dev/null +++ b/find-tags-by-path.mjs @@ -0,0 +1,30 @@ +import findTagsByName from "./find-tags-by-name.mjs"; + +export default function findTagsByPath(xml, path, options) { + const debug = (options && options.debug) || false; + const returnOnFirst = (options && options.returnOnFirst) || false; + let tags = findTagsByName(xml, path.shift(), { debug, nested: false }); + if (debug) console.log("first tags are:", tags); + for (let pathIndex = 0; pathIndex < path.length; pathIndex++) { + const tagName = path[pathIndex]; + if (debug) console.log("tagName:", tagName); + let allSubTags = []; + for (let tagIndex = 0; tagIndex < tags.length; tagIndex++) { + const tag = tags[tagIndex]; + const subTags = findTagsByName(tag.outer, tagName, { + debug, + startIndex: 1 + }); + if (debug) console.log("subTags.length:", subTags.length); + if (subTags.length > 0) { + subTags.forEach(subTag => { + (subTag.start += tag.start), (subTag.end += tag.start); + }); + if (returnOnFirst && pathIndex === path.length - 1) return [subTags[0]]; + allSubTags = allSubTags.concat(subTags); + } + } + tags = allSubTags; + } + return tags; +} diff --git a/get-attribute.mjs b/get-attribute.mjs new file mode 100644 index 0000000..1a7a371 --- /dev/null +++ b/get-attribute.mjs @@ -0,0 +1,21 @@ +export default function getAttribute(tag, attributeName, options) { + const debug = (options && options.debug) || false; + if (debug) console.log("[xml-utils] getting " + attributeName + " in " + tag); + + const xml = typeof tag === "object" ? tag.outer : tag; + + // only search for attributes in the opening tag + const opening = xml.slice(0, xml.indexOf(">") + 1); + + const quotechars = ['"', "'"]; + for (let i = 0; i < quotechars.length; i++) { + const char = quotechars[i]; + const pattern = attributeName + "\\=" + char + "([^" + char + "]*)" + char; + if (debug) console.log("[xml-utils] pattern:", pattern); + + const re = new RegExp(pattern); + const match = re.exec(opening); + if (debug) console.log("[xml-utils] match:", match); + if (match) return match[1]; + } +} diff --git a/index-of-match-end.mjs b/index-of-match-end.mjs new file mode 100644 index 0000000..e249e32 --- /dev/null +++ b/index-of-match-end.mjs @@ -0,0 +1,6 @@ +export default function indexOfMatchEnd(xml, pattern, startIndex) { + const re = new RegExp(pattern); + const match = re.exec(xml.slice(startIndex)); + if (match) return startIndex + match.index + match[0].length - 1; + else return -1; +} diff --git a/index-of-match.mjs b/index-of-match.mjs new file mode 100644 index 0000000..941091b --- /dev/null +++ b/index-of-match.mjs @@ -0,0 +1,6 @@ +export default function indexOfMatch(xml, pattern, startIndex) { + const re = new RegExp(pattern); + const match = re.exec(xml.slice(startIndex)); + if (match) return startIndex + match.index; + else return -1; +} diff --git a/index.mjs b/index.mjs index 9c5b710..cd22086 100644 --- a/index.mjs +++ b/index.mjs @@ -1,6 +1,6 @@ -export { default as getAttribute } from "./get-attribute.js"; -export { default as findTagByName } from "./find-tag-by-name.js"; -export { default as findTagsByName } from "./find-tags-by-name.js"; -export { default as findTagByPath } from "./find-tag-by-path.js"; -export { default as findTagsByPath } from "./find-tags-by-path.js"; -export { default as removeComments } from "./remove-comments.js"; +export { default as getAttribute } from "./get-attribute.mjs"; +export { default as findTagByName } from "./find-tag-by-name.mjs"; +export { default as findTagsByName } from "./find-tags-by-name.mjs"; +export { default as findTagByPath } from "./find-tag-by-path.mjs"; +export { default as findTagsByPath } from "./find-tags-by-path.mjs"; +export { default as removeComments } from "./remove-comments.mjs"; diff --git a/package.json b/package.json index b13cacd..849dfbc 100644 --- a/package.json +++ b/package.json @@ -44,11 +44,21 @@ "index-of-match.js", "index-of-match-end.js", "remove-comments.js", - "remove-tags-by-name.js" + "remove-tags-by-name.js", + "count-substring.mjs", + "find-tag-by-name.mjs", + "find-tag-by-path.mjs", + "find-tags-by-name.mjs", + "find-tags-by-path.mjs", + "get-attribute.mjs", + "index-of-match.mjs", + "index-of-match-end.mjs", + "remove-comments.mjs", + "remove-tags-by-name.mjs" ], "scripts": { "f": "npm run format", - "format": "npx prettier --arrow-parens=avoid --print-width=140 --trailing-comma=none --write *.js */*.js */*.mjs */*.ts", + "format": "npx prettier --arrow-parens=avoid --print-width=140 --trailing-comma=none --write *.js *.mjs */*.js */*.mjs */*.ts", "setup": "cd test/data && bash setup.sh", "test": "npm run test:js && npm run test:ts && npm run test:tsc && npm run test:esm", "test:js": "node ./test/test.js", diff --git a/remove-comments.mjs b/remove-comments.mjs new file mode 100644 index 0000000..93cdcc5 --- /dev/null +++ b/remove-comments.mjs @@ -0,0 +1,3 @@ +export default function removeComments(xml) { + return xml.replace(//g, ""); +} diff --git a/remove-tags-by-name.mjs b/remove-tags-by-name.mjs new file mode 100644 index 0000000..164269a --- /dev/null +++ b/remove-tags-by-name.mjs @@ -0,0 +1,11 @@ +import findTagByName from "./find-tag-by-name.mjs"; + +export default function removeTagsByName(xml, tagName, options) { + const debug = (options && options.debug) || false; + let tag; + while ((tag = findTagByName(xml, tagName, { debug }))) { + xml = xml.substring(0, tag.start) + xml.substring(tag.end); + if (debug) console.log("[xml-utils] removed:", tag); + } + return xml; +} diff --git a/test/test.mjs b/test/test.mjs index 2152f30..232e0b0 100644 --- a/test/test.mjs +++ b/test/test.mjs @@ -1,7 +1,7 @@ import test from "flug"; import { readFileSync } from "node:fs"; import { findTagByName, findTagsByName, findTagByPath, findTagsByPath, getAttribute } from "../index.mjs"; -import { default as findTagsByNameDirect } from "../find-tags-by-name.js"; +import { default as findTagsByNameDirect } from "../find-tags-by-name.mjs"; const iso = readFileSync("test/data/iso.xml", "utf-8"); const mrf = readFileSync("test/data/m_3008501_ne_16_1_20171018.mrf", "utf-8");