-
Notifications
You must be signed in to change notification settings - Fork 21
/
util.js
113 lines (94 loc) · 3.39 KB
/
util.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
const fs = require('fs')
const path = require('path')
const { DateTime } = require('luxon')
const toPacificTimeString = (date) =>
date
? DateTime.fromJSDate(date, { zone: 'utc' }) // Convert JS Date to Luxon DateTime in UTC
.setZone('America/Los_Angeles') // Convert to Pacific Time
.toFormat('yyyy-MM-dd HH:mm:ss ZZZZ') // Format the DateTime object
: null
const flatten = arr => arr.reduce((acc, cur) =>
Array.isArray(cur)
? [...acc, ...cur]
: [...acc, cur]
, [])
const average = (arr) => {
if (arr.length === 0) return 0
const sum = arr.reduce((acc, curr) => acc + curr, 0)
return sum / arr.length
}
// median from array
const median = (arr) => {
const sorted = arr.slice().sort()
const mid = Math.floor(sorted.length / 2)
return sorted.length % 2 !== 0 ? sorted[mid] : (sorted[mid - 1] + sorted[mid]) / 2
}
const escapeComment = comment => comment ? '"' + comment.replace(/"/g, "'") + '"' : ''
const stripHTML = comment => comment ? comment.replace(/(<([^>]+)>)/gi, "").replace(/ /g, " ") : ''
const writeHeader = (pathToFile, headers) => fs.writeFileSync(pathToFile, headers + '\r\n')
const appendRow = (pathToFile, row) => fs.appendFileSync(pathToFile, row.join(',') + '\r\n')
// Word count function
const getWordCount = (str) => {
const cleanStr = stripHTML(escapeComment(str))
//const cleanStr = str.replace(/<\/?[^>]+(>|$)/g, "") // Remove HTML tags
return cleanStr.trim().split(/\s+/).length
}
// Function to calculate the topic summary
const postStatistics = (posts, referenceTimestamp) => {
// Number of posts
const numberOfPosts = posts.length
if (numberOfPosts === 0) {
return {
numberOfPosts: 0,
medianWordCount: 0,
averageTimeDiff: null,
firstReplyTimestamp: null,
averageTimeToPostFromFirst: null,
averagePostsPerAuthor: null
}
}
// Word counts
const wordCounts = posts.map(post => getWordCount(post.postMessage))
const medianWordCount = Math.round(median(wordCounts) * 10) / 10
// Average time in hours from topicCreatedAt to postTimestamp
const timeDiffs = posts.map(post => {
return (post.postTimestamp - referenceTimestamp)
})
const averageTimeDiff = parseFloat(average(timeDiffs).toFixed(1)) / (1000 * 60 * 60) // Convert from milliseconds to hours
const postCountsByAuthor = posts.reduce((acc, post) => {
acc[post.postAuthorId] = (acc[post.postAuthorId] || 0) + 1
return acc
}, {})
// Average number of posts per postAuthorId
const postCounts = Object.values(postCountsByAuthor)
const averagePostsPerAuthor = parseFloat(average(postCounts).toFixed(1))
const firstReplyTimestamp = new Date(Math.min(...posts.map(
post => new Date(post.postTimestamp))))
const timeDiffsFromFirst = posts
.map(post => {
return post.postTimestamp > firstReplyTimestamp
? (post.postTimestamp - firstReplyTimestamp) / (1000 * 60 * 60) // Convert from milliseconds to hours
: null
})
.filter(diff => diff !== null)
const averageTimeToPostFromFirst = timeDiffsFromFirst.length > 0
? parseFloat(average(timeDiffsFromFirst).toFixed(1))
: 0
return {
numberOfPosts,
medianWordCount,
averageTimeDiff,
firstReplyTimestamp,
averageTimeToPostFromFirst,
averagePostsPerAuthor
}
}
module.exports = {
flatten,
escapeComment,
stripHTML,
writeHeader,
appendRow,
postStatistics,
toPacificTimeString
}