-
Notifications
You must be signed in to change notification settings - Fork 3
/
poindexter.js
65 lines (57 loc) · 2.12 KB
/
poindexter.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
const { readdir, stat, readFile, outputFileSync } = require('fs-extra')
const { resolve, relative } = require('path')
const { configent } = require('configent')
const cheerio = require('cheerio')
/** @param {Partial<import('./defaults.js')>} options */
async function poindexter(options = {}) {
const config = configent(require('./defaults'), options, { useDetectDefaults: true, module })
const {
flexsearch,
docs,
output,
scrape,
} = config
/** @ts-ignore */
/** @type {import('flexsearch')['default']} */
const FlexSearch = (require("flexsearch"));
/** @ts-ignore */
const index = FlexSearch.create(flexsearch)
await walker(docs)
const outputs = [output].flat()
for (const output of outputs) {
outputFileSync(output, (
JSON.stringify({
config: flexsearch,
/** @ts-ignore */
dump: index.export({ serialize: false })
})
))
}
async function walker(path, root) {
root = root || path
const files = (await readdir(path))
const promises = files.map(async filename => {
const filepath = resolve(path, filename)
const file = await stat(filepath)
if (file.isDirectory()) {
await walker(filepath, root)
} else if (filename.endsWith('.html')) {
const urlPath = relative(root, filepath)
.replace(/\\/g, '/') //force unix paths
.replace(/\.html$/, '')
.replace(/\/index$/, '')
const content = await readFile(filepath, 'utf-8')
const doc = cheerio.load(content)
const data = await scrape.bind(config)(doc, path, config)
if (data) {
console.log(`[poindexter] indexed ${urlPath}`)
index.add([{ path: urlPath, ...data }])
}
else
console.log(`[poindexter] skipped ${urlPath}`)
}
})
await Promise.all(promises)
}
}
module.exports.poindexter = poindexter