Skip to content

Commit

Permalink
feat(app, app-shell-odd): add Flex resource monitor
Browse files Browse the repository at this point in the history
  • Loading branch information
mjhuff committed Oct 24, 2024
1 parent bbe06f1 commit 4d32231
Show file tree
Hide file tree
Showing 8 changed files with 351 additions and 2 deletions.
2 changes: 2 additions & 0 deletions app-shell-odd/src/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import {
closeBrokerConnection,
} from './notifications'
import { setUserDataPath } from './early'
import { registerResourceMonitor } from './monitor'

import type { OTLogger } from './log'
import type { BrowserWindow } from 'electron'
Expand Down Expand Up @@ -135,6 +136,7 @@ function startUp(): void {
registerConfig(dispatch),
registerDiscovery(dispatch),
registerRobotSystemUpdate(dispatch),
registerResourceMonitor(dispatch),
registerAppRestart(),
registerUpdateBrightness(),
registerNotify(dispatch, mainWindow),
Expand Down
291 changes: 291 additions & 0 deletions app-shell-odd/src/monitor/ResourceMonitor.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,291 @@
import { exec } from 'child_process'
import { promises as fs } from 'fs'

import { createLogger } from '../log'
import { UI_INITIALIZED } from '../constants'

import type { Action, Dispatch } from '../types'

const PARENT_PROCESSES = [
'opentrons-robot-server.service',
'opentrons-robot-app.service',
] as const
const REPORTING_INTERVAL_MS = 3600000 // 1 hour
const MAX_CMD_STR_LENGTH = 100
const MAX_REPORTED_PROCESSES = 15

interface ProcessTreeNode {
pid: number
cmd: string
children: ProcessTreeNode[]
}

interface ProcessDetails {
name: string
memRssMb: string
}

interface ResourceMonitorDetails {
systemAvailMemMb: string
systemUptimeHrs: string
processesDetails: ProcessDetails[]
}

// TODO(jh 10-24-24): Add testing, making proper affordances for mocking fs.readFile.

// Scrapes system and select process resource metrics, reporting those metrics to the browser layer.
// Note that only MAX_REPORTED_PROCESSES are actually dispatched.
export class ResourceMonitor {
private readonly monitoredProcesses: Set<string>
private readonly log: ReturnType<typeof createLogger>
private intervalId: NodeJS.Timeout | null

constructor() {
this.monitoredProcesses = new Set(PARENT_PROCESSES)
this.log = createLogger('monitor')
this.intervalId = null
}

start(dispatch: Dispatch): Dispatch {
// Scrape and report metrics on an interval.
const beginMonitor = (): void => {
if (this.intervalId == null) {
this.intervalId = setInterval(() => {
this.getResourceDetails()
.then(resourceDetails => {
this.log.debug('resource monitor report', {
resourceDetails,
})
this.dispatchResourceDetails(resourceDetails, dispatch)
})
.catch(error => {
this.log.error('Error monitoring process: ', error)
})
}, REPORTING_INTERVAL_MS)
} else {
this.log.warn(
'Attempted to start an already started instance of ResourceMonitor.'
)
}
}

return function handleAction(action: Action) {
switch (action.type) {
case UI_INITIALIZED:
beginMonitor()
}
}
}

private dispatchResourceDetails(
details: ResourceMonitorDetails,
dispatch: Dispatch
): void {
const { processesDetails, systemUptimeHrs, systemAvailMemMb } = details
dispatch({
type: 'analytics:RESOURCE_MONITOR_REPORT',
payload: {
systemUptimeHrs,
systemAvailMemMb,
processesDetails: processesDetails.slice(0, MAX_REPORTED_PROCESSES), // don't accidentally send too many items to mixpanel.
},
})
}

private getResourceDetails(): Promise<ResourceMonitorDetails> {
return Promise.all([
this.getSystemAvailableMemory(),
this.getSystemUptimeHrs(),
this.getProcessDetails(),
]).then(([systemAvailMemMb, systemUptimeHrs, processesDetails]) => ({
systemAvailMemMb,
systemUptimeHrs,
processesDetails,
}))
}

// Scrape system uptime from /proc/uptime.
private getSystemUptimeHrs(): Promise<string> {
return fs
.readFile('/proc/uptime', 'utf8')
.then(uptime => {
// First value is uptime in seconds, second is idle time
const uptimeSeconds = Math.floor(parseFloat(uptime.split(' ')[0]))
return (uptimeSeconds / 3600).toFixed(2)
})
.catch(error => {
throw new Error(
`Failed to read system uptime: ${
error instanceof Error ? error.message : String(error)
}`
)
})
}

// Scrape system available memory from /proc/meminfo.
private getSystemAvailableMemory(): Promise<string> {
return fs
.readFile('/proc/meminfo', 'utf8')
.then(meminfo => {
const match = meminfo.match(/MemAvailable:\s+(\d+)\s+kB/)
if (match == null) {
throw new Error('Could not find MemAvailable in meminfo file')
} else {
const memInKb = parseInt(match[1], 10)
return (memInKb / 1024).toFixed(2)
}
})
.catch(error => {
throw new Error(
`Failed to read available memory info: ${
error instanceof Error ? error.message : String(error)
}`
)
})
}

// Given parent process names, get metrics for parent and all spawned processes.
private getProcessDetails(): Promise<ProcessDetails[]> {
return Promise.all(
Array.from(this.monitoredProcesses).map(parentProcess =>
this.getProcessTree(parentProcess)
.then(processTree => {
if (processTree == null) {
return []
} else {
return this.getProcessDetailsFlattened(processTree)
}
})
.catch(error => {
this.log.error('Failed to get process tree', {
parentProcess,
error,
})
return []
})
)
).then(detailsArrays => detailsArrays.flat())
}

private getProcessTree(
parentProcess: string
): Promise<ProcessTreeNode | null> {
return this.getProcessPid(parentProcess).then(parentPid => {
if (parentPid == null) {
return null
} else {
return this.buildProcessTree(parentPid)
}
})
}

private getProcessPid(serviceName: string): Promise<number | null> {
return new Promise((resolve, reject) => {
exec(`systemctl show ${serviceName} -p MainPID`, (error, stdout) => {
if (error != null) {
reject(
new Error(`Failed to get PID for ${serviceName}: ${error.message}`)
)
} else {
const match = stdout.match(/MainPID=(\d+)/)

if (match == null) {
resolve(null)
} else {
const pid = parseInt(match[1], 10)
resolve(pid > 1 ? pid : null)
}
}
})
})
}

// Recursively build the process tree, scraping the cmdline string for each pid.
private buildProcessTree(pid: number): Promise<ProcessTreeNode> {
return Promise.all([
this.getProcessCmdline(pid),
this.getChildProcessesFrom(pid),
]).then(([cmd, childPids]) => {
return Promise.all(
childPids.map(childPid => this.buildProcessTree(childPid))
).then(children => ({
pid,
cmd,
children,
}))
})
}

// Get the exact cmdline string for the given pid, truncating if necessary.
private getProcessCmdline(pid: number): Promise<string> {
return fs
.readFile(`/proc/${pid}/cmdline`, 'utf8')
.then(cmdline => {
const cmd = cmdline.replace(/\0/g, ' ').trim()
return cmd.length > MAX_CMD_STR_LENGTH
? `${cmd.substring(0, MAX_CMD_STR_LENGTH)}...`
: cmd
})
.catch(error => {
this.log.error(`Failed to read cmdline for PID ${pid}`, error)
return `PID ${pid}`
})
}

private getChildProcessesFrom(parentPid: number): Promise<number[]> {
return new Promise((resolve, reject) => {
exec(`pgrep -P ${parentPid}`, (error, stdout) => {
// code 1 means no children found
if (error != null && error.code !== 1) {
reject(error)
} else {
const children = stdout
.trim()
.split('\n')
.filter(line => line.length > 0)
.map(pid => parseInt(pid, 10))

resolve(children)
}
})
})
}

// Get the actual metric(s) for a given node and recursively get metric(s) for all child nodes.
private getProcessDetailsFlattened(
node: ProcessTreeNode
): Promise<ProcessDetails[]> {
return this.getProcessMemory(node.pid).then(memRssMb => {
const currentNodeDetails: ProcessDetails = {
name: node.cmd,
memRssMb,
}

return Promise.all(
node.children.map(child => this.getProcessDetailsFlattened(child))
).then(childDetailsArrays => {
return [currentNodeDetails, ...childDetailsArrays.flat()]
})
})
}

// Scrape VmRSS from /proc/pid/status for a given pid.
private getProcessMemory(pid: number): Promise<string> {
return fs
.readFile(`/proc/${pid}/status`, 'utf8')
.then(status => {
const match = status.match(/VmRSS:\s+(\d+)\s+kB/)
if (match == null) {
throw new Error('Could not find VmRSS in status file')
} else {
const memInKb = parseInt(match[1], 10)
return (memInKb / 1024).toFixed(2)
}
})
.catch(error => {
throw new Error(
`Failed to read memory info for PID ${pid}: ${error.message}`
)
})
}
}
8 changes: 8 additions & 0 deletions app-shell-odd/src/monitor/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import { ResourceMonitor } from './ResourceMonitor'

import type { Dispatch } from '../types'

export function registerResourceMonitor(dispatch: Dispatch): Dispatch {
const resourceMonitor = new ResourceMonitor()
return resourceMonitor.start(dispatch)
}
19 changes: 19 additions & 0 deletions app/src/redux/analytics/__tests__/make-event.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -121,4 +121,23 @@ describe('analytics events map', () => {
})
})
})

describe('events with calibration data', () => {
it('analytics:RESOURCE_MONITOR_REPORT -> resourceMonitorReport event', () => {
const state = {} as any
const action = {
type: 'analytics:RESOURCE_MONITOR_REPORT',
payload: {
systemAvailMemMb: '500',
systemUptimeHrs: '111',
processesDetails: [],
},
} as any

return expect(makeEvent(action, state)).resolves.toEqual({
name: 'resourceMonitorReport',
properties: { ...action.payload },
})
})
})
})
6 changes: 6 additions & 0 deletions app/src/redux/analytics/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -97,3 +97,9 @@ export const ANALYTICS_QUICK_TRANSFER_DETAILS_PAGE = 'quickTransferDetailsPage'
export const ANALYTICS_QUICK_TRANSFER_RUN_FROM_DETAILS =
'quickTransferRunFromDetails'
export const ANALYTICS_QUICK_TRANSFER_RERUN = 'quickTransferReRunFromSummary'

/**
* Resource Monitor Analytics
*/
export const ANALYTICS_RESOURCE_MONITOR_REPORT: 'analytics:RESOURCE_MONITOR_REPORT' =
'analytics:RESOURCE_MONITOR_REPORT'
9 changes: 9 additions & 0 deletions app/src/redux/analytics/make-event.ts
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,15 @@ export function makeEvent(
})
}

case Constants.ANALYTICS_RESOURCE_MONITOR_REPORT: {
return Promise.resolve({
name: 'resourceMonitorReport',
properties: {
...action.payload,
},
})
}

case RobotAdmin.RESET_CONFIG: {
const { resets } = action.payload
return Promise.resolve({
Expand Down
7 changes: 5 additions & 2 deletions app/src/redux/analytics/mixpanel.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,12 @@ export function trackEvent(

log.debug('Trackable event', { event, optedIn })
if (MIXPANEL_ID != null && optedIn) {
if (event.superProperties != null) mixpanel.register(event.superProperties)
if ('name' in event && event.name != null)
if (event.superProperties != null) {
mixpanel.register(event.superProperties)
}
if ('name' in event && event.name != null) {
mixpanel.track(event.name, event.properties)
}
}
}

Expand Down
Loading

0 comments on commit 4d32231

Please sign in to comment.