diff --git a/.github/workflows/tests-vm-pr.yml b/.github/workflows/tests-vm-pr.yml index bf8ca2dfd..f16f01261 100644 --- a/.github/workflows/tests-vm-pr.yml +++ b/.github/workflows/tests-vm-pr.yml @@ -9,6 +9,7 @@ jobs: permissions: packages: write contents: read + pull-requests: write steps: - name: 'Checkout repository' uses: actions/checkout@v3 @@ -38,6 +39,5 @@ jobs: with: task: display-results branch: main - - - + pr-comment: true + \ No newline at end of file diff --git a/api/api.py b/api/api.py index 956012df3..ebbca4193 100644 --- a/api/api.py +++ b/api/api.py @@ -8,6 +8,7 @@ import sys import os +from xml.sax.saxutils import escape as xml_escape from fastapi import FastAPI, Request, Response, status from fastapi.responses import ORJSONResponse from fastapi.encoders import jsonable_encoder @@ -31,7 +32,6 @@ sanitize, get_phase_stats, get_phase_stats_object, is_valid_uuid, rescale_energy_value) - # It seems like FastAPI already enables faulthandler as it shows stacktrace on SEGFAULT # Is the redundant call problematic faulthandler.enable() # will catch segfaults and write to STDERR @@ -152,7 +152,7 @@ async def get_network(project_id): @app.get('/v1/machines/') async def get_machines(): query = """ - SELECT id, description + SELECT id, description, available FROM machines ORDER BY description ASC """ @@ -165,14 +165,28 @@ async def get_machines(): # A route to return all of the available entries in our catalog. @app.get('/v1/projects') -async def get_projects(): +async def get_projects(repo: str, filename: str): query = """ SELECT a.id, a.name, a.uri, COALESCE(a.branch, 'main / master'), a.end_measurement, a.last_run, a.invalid_project, a.filename, b.description, a.commit_hash FROM projects as a LEFT JOIN machines as b on a.machine_id = b.id - ORDER BY a.created_at DESC -- important to order here, the charting library in JS cannot do that automatically! + WHERE 1=1 """ - data = DB().fetch_all(query) + params = [] + + filename = filename.strip() + if filename not in ('', 'null'): + query = f"{query} AND a.filename LIKE %s \n" + params.append(f"%{filename}%") + + repo = repo.strip() + if repo not in ('', 'null'): + query = f"{query} AND a.uri LIKE %s \n" + params.append(f"%{repo}%") + + query = f"{query} ORDER BY a.created_at DESC -- important to order here, the charting library in JS cannot do that automatically!" + + data = DB().fetch_all(query, params=tuple(params)) if data is None or data == []: return Response(status_code=204) # No-Content @@ -316,42 +330,46 @@ async def get_badge_single(project_id: str, metric: str = 'ml-estimated'): return ORJSONResponse({'success': False, 'err': 'Project ID is not a valid UUID or empty'}, status_code=400) query = ''' - WITH times AS ( - SELECT start_measurement, end_measurement FROM projects WHERE id = %s - ) SELECT - (SELECT start_measurement FROM times), (SELECT end_measurement FROM times), - SUM(measurements.value), measurements.unit - FROM measurements + SELECT + SUM(value), MAX(unit) + FROM + phase_stats WHERE - measurements.project_id = %s - AND measurements.time >= (SELECT start_measurement FROM times) - AND measurements.time <= (SELECT end_measurement FROM times) - AND measurements.metric LIKE %s - GROUP BY measurements.unit + project_id = %s + AND metric LIKE %s + AND phase LIKE '%%_[RUNTIME]' ''' value = None + label = 'Energy Cost' + via = '' if metric == 'ml-estimated': value = 'psu_energy_ac_xgboost_machine' + via = 'via XGBoost ML' elif metric == 'RAPL': - value = '%_rapl_%' + value = '%_energy_rapl_%' + via = 'via RAPL' elif metric == 'AC': value = 'psu_energy_ac_%' + via = 'via PSU (AC)' + elif metric == 'SCI': + label = 'SCI' + value = 'software_carbon_intensity_global' else: return ORJSONResponse({'success': False, 'err': f"Unknown metric '{metric}' submitted"}, status_code=400) - params = (project_id, project_id, value) + params = (project_id, value) data = DB().fetch_one(query, params=params) - if data is None or data == []: + if data is None or data == [] or not data[1] : badge_value = 'No energy data yet' else: - [energy_value, energy_unit] = rescale_energy_value(data[2], data[3]) - badge_value= f"{energy_value:.2f} {energy_unit} via {metric}" + [energy_value, energy_unit] = rescale_energy_value(data[0], data[1]) + badge_value= f"{energy_value:.2f} {energy_unit} {via}" badge = anybadge.Badge( - label='Energy cost', - value=badge_value, + label=xml_escape(label), + value=xml_escape(badge_value), num_value_padding_chars=1, default_color='cornflowerblue') return Response(content=str(badge), media_type="image/svg+xml") @@ -553,7 +571,7 @@ async def get_ci_badge_get(repo: str, branch: str, workflow:str): badge = anybadge.Badge( label='Energy Used', - value=badge_value, + value=xml_escape(badge_value), num_value_padding_chars=1, default_color='green') return Response(content=str(badge), media_type="image/svg+xml") diff --git a/api/api_helpers.py b/api/api_helpers.py index 36c87b109..0f54e88a1 100644 --- a/api/api_helpers.py +++ b/api/api_helpers.py @@ -21,6 +21,17 @@ METRIC_MAPPINGS = { + + 'embodied_carbon_share_machine': { + 'clean_name': 'Embodied Carbon', + 'source': 'formula', + 'explanation': 'Embodied carbon attributed by time share of the life-span and total embodied carbon', + }, + 'software_carbon_intensity_global': { + 'clean_name': 'SCI', + 'source': 'formula', + 'explanation': 'SCI metric by the Green Software Foundation', + }, 'phase_time_syscall_system': { 'clean_name': 'Phase Duration', 'source': 'Syscall', @@ -223,17 +234,21 @@ def rescale_energy_value(value, unit): # We only expect values to be mJ for energy! - if unit != 'mJ': - raise RuntimeError('Unexpected unit occured for energy rescaling: ', unit) + if unit in ['mJ', 'ug'] or unit.startswith('ugCO2e/'): + unit_type = unit[1:] - energy_rescaled = [value, unit] + energy_rescaled = [value, unit] + + # pylint: disable=multiple-statements + if value > 1_000_000_000: energy_rescaled = [value/(10**12), f"G{unit_type}"] + elif value > 1_000_000_000: energy_rescaled = [value/(10**9), f"M{unit_type}"] + elif value > 1_000_000: energy_rescaled = [value/(10**6), f"k{unit_type}"] + elif value > 1_000: energy_rescaled = [value/(10**3), f"{unit_type}"] + elif value < 0.001: energy_rescaled = [value*(10**3), f"n{unit_type}"] + + else: + raise RuntimeError('Unexpected unit occured for energy rescaling: ', unit) - # pylint: disable=multiple-statements - if value > 1_000_000_000: energy_rescaled = [value/(10**12), 'GJ'] - elif value > 1_000_000_000: energy_rescaled = [value/(10**9), 'MJ'] - elif value > 1_000_000: energy_rescaled = [value/(10**6), 'kJ'] - elif value > 1_000: energy_rescaled = [value/(10**3), 'J'] - elif value < 0.001: energy_rescaled = [value*(10**3), 'nJ'] return energy_rescaled diff --git a/config.yml.example b/config.yml.example index c3ec48012..bc09229c6 100644 --- a/config.yml.example +++ b/config.yml.example @@ -27,6 +27,7 @@ admin: notify_admin_for_own_project_ready: False + cluster: api_url: __API_URL__ metrics_url: __METRICS_URL__ @@ -122,3 +123,31 @@ measurement: # HW_MemAmountGB: 16 # Hardware_Availability_Year: 2011 #--- END + + +sci: + # https://github.com/Green-Software-Foundation/sci/blob/main/Software_Carbon_Intensity/Software_Carbon_Intensity_Specification.md + + # The values specific to the machine will be set here. The values that are specific to the + # software, like R – Functional unit, will be set in the usage_scenario.yml + + # EL Expected Lifespan; the anticipated time that the equipment will be installed. Value is in years + # The number 3.5 comes from a typical developer machine (Apple Macbook 16" 2023 - https://dataviz.boavizta.org/manufacturerdata?lifetime=3.5&name=14-inch%20MacBook%20Pro%20with%2064GB) + EL: 3.5 + # RS Resource-share; the share of the total available resources of the hardware reserved for use by the software. + # This ratio is typically 1 with the Green Metrics Tool unless you use a custom distributed orchestrator + RS: 1 + # TE Total Embodied Emissions; the sum of Life Cycle Assessment (LCA) emissions for all hardware components. + # Value is in gCO2eq + # The value has to be identified from vendor datasheets. Here are some example sources: + # https://dataviz.boavizta.org/manufacturerdata + # https://tco.exploresurface.com/sustainability/calculator + # https://www.delltechnologies.com/asset/en-us/products/servers/technical-support/Full_LCA_Dell_R740.pdf + # The default is the value for a developer machine (Apple Macbook 16" 2023 - https://dataviz.boavizta.org/manufacturerdata?lifetime=3.5&name=14-inch%20MacBook%20Pro%20with%2064GB) + TE: 194000 + # I is the Carbon Intensity at the location of this machine + # The value can either be a number in gCO2e/kWh or a carbon intensity provider that fetches this number dynamically + # https://docs.green-coding.berlin/docs/measuring/carbon-intensity-providers/carbon-intensity-providers-overview/ + # For fixed values get the number from https://ember-climate.org/insights/research/global-electricity-review-2022/ + # The number 475 that comes as default is for Germany from 2022 + I: 475 \ No newline at end of file diff --git a/docker/Dockerfile-gunicorn b/docker/Dockerfile-gunicorn index 3a608025b..2f77559c4 100644 --- a/docker/Dockerfile-gunicorn +++ b/docker/Dockerfile-gunicorn @@ -1,14 +1,8 @@ # syntax=docker/dockerfile:1 -FROM ubuntu:22.04 +FROM python:3.11.4-slim-bookworm ENV DEBIAN_FRONTEND=noninteractive -RUN rm -rf /var/lib/apt/lists/* -RUN apt update && \ - apt install python3 python3-pip gunicorn -y - COPY requirements.txt requirements.txt RUN pip3 install -r requirements.txt -RUN rm -rf /var/lib/apt/lists/* - -ENTRYPOINT ["/bin/gunicorn", "--workers=2", "--access-logfile=-", "--error-logfile=-", "--worker-tmp-dir=/dev/shm", "--threads=4", "--worker-class=gthread", "--bind", "unix:/tmp/green-coding-api.sock", "-m", "007", "--user", "www-data", "--chdir", "/var/www/green-metrics-tool/api", "-k", "uvicorn.workers.UvicornWorker", "api:app"] \ No newline at end of file +ENTRYPOINT ["/usr/local/bin/gunicorn", "--workers=2", "--access-logfile=-", "--error-logfile=-", "--worker-tmp-dir=/dev/shm", "--threads=4", "--worker-class=gthread", "--bind", "unix:/tmp/green-coding-api.sock", "-m", "007", "--user", "www-data", "--chdir", "/var/www/green-metrics-tool/api", "-k", "uvicorn.workers.UvicornWorker", "api:app"] \ No newline at end of file diff --git a/docker/requirements.txt b/docker/requirements.txt index 91706445f..afefcff8b 100644 --- a/docker/requirements.txt +++ b/docker/requirements.txt @@ -7,7 +7,5 @@ PyYAML==6.0.1 anybadge==1.14.0 uvicorn==0.23.2 orjson==3.9.2 -pyserial==3.5 -psutil==5.9.5 scipy==1.11.1 -schema==0.7.5 \ No newline at end of file +schema==0.7.5 diff --git a/frontend/css/green-coding.css b/frontend/css/green-coding.css index dee643bd5..b03199f83 100644 --- a/frontend/css/green-coding.css +++ b/frontend/css/green-coding.css @@ -160,6 +160,7 @@ a, text-overflow: ellipsis; overflow-wrap: normal; overflow: hidden; + white-space: nowrap; } .si-unit { diff --git a/frontend/index.html b/frontend/index.html index 4ab15ac55..61f7c9500 100644 --- a/frontend/index.html +++ b/frontend/index.html @@ -53,6 +53,13 @@

--> + + diff --git a/frontend/js/ci.js b/frontend/js/ci.js index 2ad6829de..90a904cc7 100644 --- a/frontend/js/ci.js +++ b/frontend/js/ci.js @@ -171,7 +171,7 @@ const getChartOptions = (runs, chart_element) => { const displayGraph = (runs) => { const element = createChartContainer("#chart-container", "run-energy", runs); - + const options = getChartOptions(runs, element); const chart_instance = echarts.init(element); @@ -241,26 +241,26 @@ const displayCITable = (runs, url_params) => { var run_link = '' if(source == 'github') { - run_link = `https://github.com/${url_params.get('repo')}/actions/runs/${run_id}`; + run_link = `https://github.com/${escapeString(url_params.get('repo'))}/actions/runs/${escapeString(run_id)}`; } else if (source == 'gitlab') { - run_link = `https://gitlab.com/${url_params.get('repo')}/-/pipelines/${run_id}` + run_link = `https://gitlab.com/${escapeString(url_params.get('repo'))}/-/pipelines/${escapeString(run_id)}` } - const run_link_node = `${run_id}` + const run_link_node = `${escapeString(run_id)}` const created_at = el[3] const label = el[4] const duration = el[7] - li_node.innerHTML = `\ - \ + li_node.innerHTML = `\ + \ \ - \ - \ - \ - `; + \ + \ + \ + `; document.querySelector("#ci-table").appendChild(li_node); }); $('table').tablesort(); @@ -318,17 +318,17 @@ $(document).ready((e) => { let repo_link = '' if(badges_data.data[0][8] == 'github') { - repo_link = `https://github.com/${url_params.get('repo')}`; + repo_link = `https://github.com/${escapeString(url_params.get('repo'))}`; } else if(badges_data.data[0][8] == 'gitlab') { - repo_link = `https://gitlab.com/${url_params.get('repo')}`; + repo_link = `https://gitlab.com/${escapeString(url_params.get('repo'))}`; } //${repo_link} - const repo_link_node = `${url_params.get('repo')}` + const repo_link_node = `${escapeString(url_params.get('repo'))}` document.querySelector('#ci-data').insertAdjacentHTML('afterbegin', ``) - document.querySelector('#ci-data').insertAdjacentHTML('afterbegin', ``) - document.querySelector('#ci-data').insertAdjacentHTML('afterbegin', ``) - + document.querySelector('#ci-data').insertAdjacentHTML('afterbegin', ``) + document.querySelector('#ci-data').insertAdjacentHTML('afterbegin', ``) + displayCITable(badges_data.data, url_params); chart_instance = displayGraph(badges_data.data) displayAveragesTable(badges_data.data) diff --git a/frontend/js/helpers/config.js.example b/frontend/js/helpers/config.js.example index 264559e43..42f01fad8 100644 --- a/frontend/js/helpers/config.js.example +++ b/frontend/js/helpers/config.js.example @@ -60,4 +60,15 @@ const machine_power_metric_condition = (metric) => { const machine_energy_metric_condition = (metric) => { if(metric.match(/^.*_energy_.*_machine$/) !== null) return true; return false; -} \ No newline at end of file +} + +const sci_metric_condition = (metric) => { + if(metric == 'software_carbon_intensity_global') return true; + return false; +} + +const embodied_carbon_share_metric_condition = (metric) => { + if(metric == 'embodied_carbon_share_machine') return true; + return false; +} + diff --git a/frontend/js/helpers/main.js b/frontend/js/helpers/main.js index 6100db7de..5cf56e78e 100644 --- a/frontend/js/helpers/main.js +++ b/frontend/js/helpers/main.js @@ -33,18 +33,30 @@ class GMTMenu extends HTMLElement { customElements.define('gmt-menu', GMTMenu); const replaceRepoIcon = (uri) => { - if (uri.startsWith("https://www.github.com") || uri.startsWith("https://github.com")) { - uri = uri.replace("https://www.github.com", ''); - uri = uri.replace("https://github.com", ''); - } else if (uri.startsWith("https://www.bitbucket.com") || uri.startsWith("https://bitbucket.com")) { - uri = uri.replace("https://www.bitbucket.com", ''); - uri = uri.replace("https://bitbucket.com", ''); - } else if (uri.startsWith("https://www.gitlab.com") || uri.startsWith("https://gitlab.com")) { - uri = uri.replace("https://www.gitlab.com", ''); - uri = uri.replace("https://gitlab.com", ''); - } - return uri; -} + + if(!uri.startsWith('http')) return uri; // ignore filesystem paths + + const url = new URL(uri); + + let iconClass = ""; + switch (url.host) { + case "github.com": + case "www.github.com": + iconClass = "github"; + break; + case "bitbucket.com": + case "www.bitbucket.com": + iconClass = "bitbucket"; + break; + case "gitlab.com": + case "www.gitlab.com": + iconClass = "gitlab"; + break; + default: + return uri; + } + return `` + uri.substring(url.origin.length); +}; const showNotification = (message_title, message_text, type='warning') => { $('body') @@ -82,6 +94,18 @@ const dateToYMD = (date, short=false) => { return ` ${date.getFullYear()}-${month}-${day}
${hours}:${minutes} UTC${offset}`; } +const escapeString = (string) =>{ + const map = { + '&': '&', + '<': '<', + '>': '>', + '"': '"', + "'": ''' + }; + const reg = /[&<>"']/ig; + return string.replace(reg, (match) => map[match]); + } + async function makeAPICall(path, values=null) { if(values != null ) { diff --git a/frontend/js/helpers/metric-boxes.js b/frontend/js/helpers/metric-boxes.js index c3e44ce28..bed8ba641 100644 --- a/frontend/js/helpers/metric-boxes.js +++ b/frontend/js/helpers/metric-boxes.js @@ -139,6 +139,25 @@ class PhaseMetrics extends HTMLElement { +
+
+
SCI
+
+
+
+ N/A +
+
+
+ via Formula + +
+
+ +
+
+
+

@@ -348,6 +367,10 @@ const updateKeyMetric = (phase, metric_name, clean_name, detail_name, value, std selector = '.phase-duration'; } else if(network_co2_metric_condition(metric)) { selector = '.network-co2'; + } else if(embodied_carbon_share_metric_condition(metric)) { + selector = '.embodied-carbon'; + } else if(sci_metric_condition(metric)) { + selector = '.software-carbon-intensity'; } else if(machine_power_metric_condition(metric)) { selector = '.machine-power'; } else if(machine_co2_metric_condition(metric)) { @@ -360,7 +383,7 @@ const updateKeyMetric = (phase, metric_name, clean_name, detail_name, value, std document.querySelector(`div.tab[data-tab='${phase}'] ${selector} .value span`).innerText = `${(value)} ${std_dev_text}` document.querySelector(`div.tab[data-tab='${phase}'] ${selector} .si-unit`).innerText = `[${unit}]` if(std_dev_text != '') document.querySelector(`div.tab[data-tab='${phase}'] ${selector} .metric-type`).innerText = `(AVG + STD.DEV)`; - else if(value.indexOf('%') !== -1) document.querySelector(`div.tab[data-tab='${phase}'] ${selector} .metric-type`).innerText = `(Diff. in %)`; + else if(String(value).indexOf('%') !== -1) document.querySelector(`div.tab[data-tab='${phase}'] ${selector} .metric-type`).innerText = `(Diff. in %)`; node = document.querySelector(`div.tab[data-tab='${phase}'] ${selector} .source`) if (node !== null) node.innerText = source // not every key metric shall have a custom detail_name diff --git a/frontend/js/helpers/phase-stats.js b/frontend/js/helpers/phase-stats.js index 5eed23214..e1bcfee00 100644 --- a/frontend/js/helpers/phase-stats.js +++ b/frontend/js/helpers/phase-stats.js @@ -75,13 +75,13 @@ const createPhaseTab = (phase) => { cloned_tab_node.style.display = ''; cloned_tab_node.innerText = phase; cloned_tab_node.setAttribute('data-tab', phase); - runtime_tab_node.parentNode.insertBefore(cloned_tab_node, runtime_tab_node) + runtime_tab_node.parentNode.appendChild(cloned_tab_node) let phase_step_node = document.querySelector('.runtime-tab'); let cloned_step_node = phase_step_node.cloneNode(true); cloned_step_node.style.display = ''; cloned_step_node.setAttribute('data-tab', phase); - phase_step_node.parentNode.insertBefore(cloned_step_node, phase_step_node) + phase_step_node.parentNode.appendChild(cloned_step_node) } } diff --git a/frontend/js/index.js b/frontend/js/index.js index ad094a85d..002bb9c72 100644 --- a/frontend/js/index.js +++ b/frontend/js/index.js @@ -13,7 +13,23 @@ const updateCompareCount = () => { countButton.textContent = `Compare: ${checkedCount} Run(s)`; } -function allow_group_select_checkboxes(checkbox_wrapper_id){ +const removeFilter = (paramName) => { + const urlSearchParams = new URLSearchParams(window.location.search); + urlSearchParams.delete(paramName); + const newUrl = `${window.location.pathname}?${urlSearchParams.toString()}`; + window.location.href = newUrl; +} + +const showActiveFilters = (key, value) => { + document.querySelector(`.ui.warning.message`).classList.remove('hidden'); + const newListItem = document.createElement("span"); + newListItem.innerHTML = `
${escapeString(key)}: ${escapeString(value)}
`; + document.querySelector(`.ui.warning.message ul`).appendChild(newListItem); + +} + + +const allow_group_select_checkboxes = (checkbox_wrapper_id) => { let lastChecked = null; let checkboxes = document.querySelectorAll(checkbox_wrapper_id); @@ -42,10 +58,21 @@ function allow_group_select_checkboxes(checkbox_wrapper_id){ (async () => { try { - var api_data = await makeAPICall('/v1/projects') + const url_params = (new URLSearchParams(window.location.search)) + let repo_filter = ''; + if (url_params.get('repo') != null && url_params.get('repo').trim() != '') { + repo_filter = url_params.get('repo').trim() + showActiveFilters('repo', repo_filter) + } + let filename_filter = ''; + if (url_params.get('filename') != null && url_params.get('filename').trim() != '') { + filename_filter = url_params.get('filename').trim() + showActiveFilters('filename', filename_filter) + } + var api_data = await makeAPICall(`/v1/projects?repo=${repo_filter}&filename=${filename_filter}`) } catch (err) { - showNotification('Could not get data from API', err); - return; + showNotification('Could not get data from API', err); + return; } api_data.data.forEach(el => { diff --git a/frontend/js/request.js b/frontend/js/request.js index 02e6d0cbb..1ed7058c8 100644 --- a/frontend/js/request.js +++ b/frontend/js/request.js @@ -3,6 +3,7 @@ var machines_json = await makeAPICall('/v1/machines/'); machines_json.data.forEach(machine => { + if(machine[2] == false) return; let newOption = new Option(machine[1],machine[0]); const select = document.querySelector('select'); select.add(newOption,undefined); diff --git a/frontend/stats.html b/frontend/stats.html index f1763fa74..05aefeaf7 100644 --- a/frontend/stats.html +++ b/frontend/stats.html @@ -72,7 +72,7 @@

Project Data

- XGBoost estimated AC energy + XGBoost estimated AC energy (Runtime)
@@ -80,7 +80,7 @@

Project Data

- RAPL component energy + RAPL component energy (Runtime)
@@ -88,7 +88,15 @@

Project Data

- Measured AC energy + Measured AC energy (Runtime) +
+ +
+
+
+ +
+ SCI (Runtime)
diff --git a/install_linux.sh b/install_linux.sh index d06d24363..a872f6f6f 100755 --- a/install_linux.sh +++ b/install_linux.sh @@ -172,6 +172,9 @@ if [[ $no_build != true ]] ; then print_message "Building / Updating docker containers" docker compose -f docker/compose.yml down docker compose -f docker/compose.yml build + + print_message "Updating python requirements" + python3 -m pip install -r requirements.txt fi echo "" diff --git a/install_mac.sh b/install_mac.sh index fe35782b9..f9f547d6e 100755 --- a/install_mac.sh +++ b/install_mac.sh @@ -75,6 +75,7 @@ git submodule update --init print_message "Adding hardware_info_root.py to sudoers file" echo "ALL ALL=(ALL) NOPASSWD:/usr/bin/powermetrics" | sudo tee /etc/sudoers.d/green_coding_powermetrics echo "ALL ALL=(ALL) NOPASSWD:/usr/bin/killall powermetrics" | sudo tee /etc/sudoers.d/green_coding_kill_powermetrics +echo "ALL ALL=(ALL) NOPASSWD:/usr/bin/killall -9 powermetrics" | sudo tee /etc/sudoers.d/green_coding_kill_powermetrics_sigkill etc_hosts_line_1="127.0.0.1 green-coding-postgres-container" @@ -102,5 +103,8 @@ print_message "Building / Updating docker containers" docker compose -f docker/compose.yml down docker compose -f docker/compose.yml build +print_message "Updating python requirements" +python3 -m pip install -r requirements.txt + echo "" echo -e "${GREEN}Successfully installed Green Metrics Tool!${NC}" diff --git a/metric_providers/base.py b/metric_providers/base.py index ea4587b57..5bb845b01 100644 --- a/metric_providers/base.py +++ b/metric_providers/base.py @@ -76,7 +76,7 @@ def read_metrics(self, project_id, containers): elif self._metrics.get('container_id') is not None: df['detail_name'] = df.container_id for container_id in containers: - df.loc[df.detail_name == container_id, 'detail_name'] = containers[container_id] + df.loc[df.detail_name == container_id, 'detail_name'] = containers[container_id]['name'] df = df.drop('container_id', axis=1) else: # We use the default granularity from the name of the provider eg. "..._machine" => [MACHINE] df['detail_name'] = f"[{self._metric_name.split('_')[-1]}]" @@ -137,6 +137,7 @@ def stop_profiling(self): except subprocess.TimeoutExpired: # If the process hasn't gracefully exited after 5 seconds we kill it os.killpg(ps_group_id, signal.SIGKILL) + print("Killed the process with SIGKILL. This could lead to corrupted metric log files!") except ProcessLookupError: print(f"Could not find process-group for {self._ps.pid}", diff --git a/metric_providers/powermetrics/provider.py b/metric_providers/powermetrics/provider.py index c15dee513..2923dd7c6 100644 --- a/metric_providers/powermetrics/provider.py +++ b/metric_providers/powermetrics/provider.py @@ -2,6 +2,8 @@ import subprocess import plistlib from datetime import timezone +import time +import xml import pandas #pylint: disable=import-error @@ -33,13 +35,35 @@ def __init__(self, resolution): '-o', self._filename] + def is_powermetrics_running(self): + try: + output = subprocess.check_output('pgrep -x powermetrics', shell=True) + return bool(output.strip()) # If the output is not empty, the process is running. + except subprocess.CalledProcessError: # If the process is not running, 'pgrep' returns non-zero exit code. + return False + + def stop_profiling(self): try: # We try calling the parent method but if this doesn't work we use the more hardcore approach super().stop_profiling() except PermissionError: - # This isn't the nicest way of doing this but there isn't really any other way that is nicer + #This isn't the nicest way of doing this but there isn't really any other way that is nicer subprocess.check_output('sudo /usr/bin/killall powermetrics', shell=True) + print('Killed powermetrics process with killall!') + + # As killall returns right after sending the SIGKILL we need to wait and make sure that the process + # had time to flush everything to disk + count = 0 + while self.is_powermetrics_running(): + time.sleep(1) + count += 1 + if count >= 5: + subprocess.check_output('sudo /usr/bin/killall -9 powermetrics', shell=True) + raise RuntimeError('powermetrics was stopped with kill -9. Values can not be trusted!') + + # We need to give the OS a second to flush + time.sleep(1) self._ps = None @@ -56,8 +80,15 @@ def read_metrics(self, project_id, containers=None): dfs = [] cum_time = None - for data in datas: - data = plistlib.loads(data) + for count, data in enumerate(datas, start=1): + try: + data = plistlib.loads(data) + except xml.parsers.expat.ExpatError as e: + print('There was an error parsing the powermetrics data!') + print(f"Iteration count: {count}") + print(f"Number of items in datas: {len(datas)}") + print(data) + raise e if cum_time is None: # Convert seconds to nano seconds diff --git a/runner.py b/runner.py index adc2e16ec..b91aba5e7 100755 --- a/runner.py +++ b/runner.py @@ -59,6 +59,9 @@ def arrows(text): def join_paths(path, path2, mode=None): filename = os.path.realpath(os.path.join(path, path2)) + # If the original path is a symlink we need to resolve it. + path = os.path.realpath(path) + # This is a special case in which the file is '.' if filename == path.rstrip('/'): return filename @@ -118,6 +121,8 @@ def __init__(self, self._tmp_folder = '/tmp/green-metrics-tool' self._usage_scenario = {} self._architecture = utils.get_architecture() + self._sci = {'R_d': None, 'R': 0} + # transient variables that are created by the runner itself # these are accessed and processed on cleanup and then reset @@ -162,6 +167,7 @@ def check_system(self): def checkout_repository(self): + print(TerminalColors.HEADER, '\nChecking out repository', TerminalColors.ENDC) if self._uri_type == 'URL': # always remove the folder if URL provided, cause -v directory binding always creates it @@ -319,6 +325,15 @@ def merge_dicts(dict1, dict2): del yml_obj['compose-file'] yml_obj.update(new_dict) + + # If a service is defined as None we remove it. This is so we can have a compose file that starts + # all the various services but we can disable them in the usage_scenario. This is quite useful when + # creating benchmarking scripts and you want to have all options in the compose but not in each benchmark. + # The cleaner way would be to handle an empty service key throughout the code but would make it quite messy + # so we chose to remove it right at the start. + for key in [sname for sname, content in yml_obj['services'].items() if content is None]: + del yml_obj['services'][key] + self._usage_scenario = yml_obj def initial_parse(self): @@ -338,6 +353,8 @@ def initial_parse(self): if self._usage_scenario.get('architecture') is not None and self._architecture != self._usage_scenario['architecture'].lower(): raise RuntimeError(f"Specified architecture does not match system architecture: system ({self._architecture}) != specified ({self._usage_scenario.get('architecture')})") + self._sci['R_d'] = self._usage_scenario.get('sci', {}).get('R_d', None) + def check_running_containers(self): result = subprocess.run(['docker', 'ps' ,'--format', '{{.Names}}'], stdout=subprocess.PIPE, @@ -437,6 +454,9 @@ def update_and_insert_specs(self): machine_specs.update(machine_specs_root) + keys = ["measurement", "sci"] + measurement_config = {key: config.get(key, None) for key in keys} + # Insert auxilary info for the run. Not critical. DB().query(""" UPDATE projects @@ -447,7 +467,7 @@ def update_and_insert_specs(self): """, params=( config['machine']['id'], escape(json.dumps(machine_specs), quote=False), - json.dumps(config['measurement']), + json.dumps(measurement_config), escape(json.dumps(self._usage_scenario), quote=False), self._original_filename, gmt_hash, @@ -770,13 +790,19 @@ def setup_services(self): ps = subprocess.run( docker_run_string, check=True, - stderr=subprocess.PIPE, stdout=subprocess.PIPE, + #stderr=subprocess.DEVNULL, // not setting will show in CLI encoding='UTF-8' ) container_id = ps.stdout.strip() - self.__containers[container_id] = container_name + self.__containers[container_id] = { + 'name': container_name, + 'log-stdout': service.get('log-stdout', False), + 'log-stderr': service.get('log-stderr', True), + 'read-sci-stdout': service.get('read-sci-stdout', False), + } + print('Stdout:', container_id) if 'setup-commands' not in service: @@ -966,6 +992,7 @@ def run_flows(self): 'container_name': el['container'], 'read-notes-stdout': inner_el.get('read-notes-stdout', False), 'ignore-errors': inner_el.get('ignore-errors', False), + 'read-sci-stdout': inner_el.get('read-sci-stdout', False), 'detail_name': el['container'], 'detach': inner_el.get('detach', False), }) @@ -1022,14 +1049,17 @@ def read_and_cleanup_processes(self): stderr = ps['ps'].stderr if stdout: - stdout = stdout.splitlines() - for line in stdout: + for line in stdout.splitlines(): print('stdout from process:', ps['cmd'], line) self.add_to_log(ps['container_name'], f"stdout: {line}", ps['cmd']) if ps['read-notes-stdout']: if note := self.__notes_helper.parse_note(line): self.__notes_helper.add_note({'note': note[1], 'detail_name': ps['detail_name'], 'timestamp': note[0]}) + + if ps['read-sci-stdout']: + if match := re.findall(r'GMT_SCI_R=(\d+)', line): + self._sci['R'] += int(match[0]) if stderr: stderr = stderr.splitlines() for line in stderr: @@ -1076,18 +1106,32 @@ def store_phases(self): def read_container_logs(self): print(TerminalColors.HEADER, '\nCapturing container logs', TerminalColors.ENDC) - for container_name in self.__containers.values(): + for container_id, container_info in self.__containers.items(): + + stderr_behaviour = stdout_behaviour = subprocess.DEVNULL + if container_info['log-stdout'] is True: + stdout_behaviour = subprocess.PIPE + if container_info['log-stderr'] is True: + stderr_behaviour = subprocess.PIPE + + log = subprocess.run( - ['docker', 'logs', '-t', container_name], + ['docker', 'logs', '-t', container_id], check=True, encoding='UTF-8', - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, + stdout=stdout_behaviour, + stderr=stderr_behaviour, ) + if log.stdout: - self.add_to_log(container_name, f"stdout: {log.stdout}") + self.add_to_log(container_id, f"stdout: {log.stdout}") + if container_info['read-sci-stdout']: + for line in log.stdout.splitlines(): + if match := re.findall(r'GMT_SCI_R=(\d+)', line): + self._sci['R'] += int(match[0]) + if log.stderr: - self.add_to_log(container_name, f"stderr: {log.stderr}") + self.add_to_log(container_id, f"stderr: {log.stderr}") def save_stdout_logs(self): print(TerminalColors.HEADER, '\nSaving logs to DB', TerminalColors.ENDC) @@ -1110,8 +1154,8 @@ def cleanup(self): metric_provider.stop_profiling() print('Stopping containers') - for container_name in self.__containers.values(): - subprocess.run(['docker', 'rm', '-f', container_name], check=True, stderr=subprocess.DEVNULL) + for container_id in self.__containers: + subprocess.run(['docker', 'rm', '-f', container_id], check=True, stderr=subprocess.DEVNULL) print('Removing network') for network_name in self.__networks: @@ -1367,7 +1411,7 @@ def run(self): # get all the metrics from the measurements table grouped by metric # loop over them issueing separate queries to the DB from phase_stats import build_and_store_phase_stats - build_and_store_phase_stats(project_id) + build_and_store_phase_stats(project_id, runner._sci) print(TerminalColors.OKGREEN,'\n\n####################################################################################') diff --git a/test/api/test_api.py b/test/api/test_api.py index f351e88f1..d2e2d274c 100644 --- a/test/api/test_api.py +++ b/test/api/test_api.py @@ -47,7 +47,7 @@ def test_get_projects(cleanup_projects): pid = DB().fetch_one('INSERT INTO "projects" ("name","uri","email","last_run","created_at") \ VALUES \ (%s,%s,\'manual\',NULL,NOW()) RETURNING id;', params=(project_name, uri))[0] - response = requests.get(f"{API_URL}/v1/projects", timeout=15) + response = requests.get(f"{API_URL}/v1/projects?repo=&filename=", timeout=15) res_json = response.json() assert response.status_code == 200 assert res_json['data'][0][0] == str(pid) diff --git a/tools/jobs.py b/tools/jobs.py index 7a3ccff81..6a103f69d 100644 --- a/tools/jobs.py +++ b/tools/jobs.py @@ -2,6 +2,7 @@ import sys import os import faulthandler +from datetime import datetime faulthandler.enable() # will catch segfaults and write to STDERR @@ -125,7 +126,7 @@ def _do_project_job(job_id, project_id, skip_system_checks=False, docker_prune=F try: # Start main code. Only URL is allowed for cron jobs runner.run() - build_and_store_phase_stats(project_id) + build_and_store_phase_stats(project_id, runner._sci) insert_job('email', project_id=project_id) delete_job(job_id) except Exception as exc: @@ -163,7 +164,7 @@ def _do_project_job(job_id, project_id, skip_system_checks=False, docker_prune=F try: job = get_job(args.type) if (job is None or job == []): - print('No job to process. Exiting') + print(datetime.now().strftime("%Y-%m-%d %H:%M:%S"), 'No job to process. Exiting') sys.exit(0) p_id = job[2] process_job(job[0], job[1], job[2], args.skip_system_checks, args.docker_prune, args.full_docker_prune) diff --git a/tools/phase_stats.py b/tools/phase_stats.py index 128047cf5..1f1f51fce 100644 --- a/tools/phase_stats.py +++ b/tools/phase_stats.py @@ -2,8 +2,8 @@ from io import StringIO import sys import os +import decimal import faulthandler - faulthandler.enable() # will catch segfaults and write to STDERR CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) @@ -11,12 +11,15 @@ sys.path.append(f"{CURRENT_DIR}/../lib") from db import DB +from global_config import GlobalConfig def generate_csv_line(project_id, metric, detail_name, phase_name, value, value_type, max_value, min_value, unit): return f"{project_id},{metric},{detail_name},{phase_name},{round(value)},{value_type},{round(max_value) if max_value is not None else ''},{round(min_value) if min_value is not None else ''},{unit},NOW()\n" -def build_and_store_phase_stats(project_id): +def build_and_store_phase_stats(project_id, sci=None): + config = GlobalConfig().config + query = """ SELECT metric, unit, detail_name FROM measurements @@ -38,6 +41,8 @@ def build_and_store_phase_stats(project_id): for idx, phase in enumerate(phases[0]): network_io_bytes_total = [] # reset; # we use array here and sum later, because checking for 0 alone not enough + machine_co2 = None # reset + select_query = """ SELECT SUM(value), MAX(value), MIN(value), AVG(value), COUNT(value) FROM measurements @@ -99,7 +104,7 @@ def build_and_store_phase_stats(project_id): csv_buffer.write(generate_csv_line(project_id, f"{metric.replace('_energy_', '_power_')}", detail_name, f"{idx:03}_{phase['name']}", power_sum, 'MEAN', power_max, power_min, 'mW')) if metric.endswith('_machine'): - machine_co2 = (value_sum / 3_600) * 475 + machine_co2 = (value_sum / 3_600) * config['sci']['I'] csv_buffer.write(generate_csv_line(project_id, f"{metric.replace('_energy_', '_co2_')}", detail_name, f"{idx:03}_{phase['name']}", machine_co2, 'TOTAL', None, None, 'ug')) @@ -114,11 +119,21 @@ def build_and_store_phase_stats(project_id): network_io_in_mJ = network_io_in_kWh * 3_600_000_000 csv_buffer.write(generate_csv_line(project_id, 'network_energy_formula_global', '[FORMULA]', f"{idx:03}_{phase['name']}", network_io_in_mJ, 'TOTAL', None, None, 'mJ')) # co2 calculations - network_io_co2_in_ug = network_io_in_kWh * 475 * 1_000_000 + network_io_co2_in_ug = network_io_in_kWh * config['sci']['I'] * 1_000_000 csv_buffer.write(generate_csv_line(project_id, 'network_co2_formula_global', '[FORMULA]', f"{idx:03}_{phase['name']}", network_io_co2_in_ug, 'TOTAL', None, None, 'ug')) - # also create the phase time metric - csv_buffer.write(generate_csv_line(project_id, 'phase_time_syscall_system', '[SYSTEM]', f"{idx:03}_{phase['name']}", phase['end']-phase['start'], 'TOTAL', None, None, 'us')) + duration = phase['end']-phase['start'] + csv_buffer.write(generate_csv_line(project_id, 'phase_time_syscall_system', '[SYSTEM]', f"{idx:03}_{phase['name']}", duration, 'TOTAL', None, None, 'us')) + + duration_in_years = duration / (1_000_000 * 60 * 60 * 24 * 365) + embodied_carbon_share_g = (duration_in_years / (config['sci']['EL']) ) * config['sci']['TE'] * config['sci']['RS'] + embodied_carbon_share_ug = decimal.Decimal(embodied_carbon_share_g * 1_000_000) + csv_buffer.write(generate_csv_line(project_id, 'embodied_carbon_share_machine', '[SYSTEM]', f"{idx:03}_{phase['name']}", embodied_carbon_share_ug, 'TOTAL', None, None, 'ug')) + + if phase['name'] == '[RUNTIME]' and machine_co2 is not None and sci is not None \ + and sci.get('R', None) is not None and sci['R'] != 0: + csv_buffer.write(generate_csv_line(project_id, 'software_carbon_intensity_global', '[SYSTEM]', f"{idx:03}_{phase['name']}", (machine_co2 + embodied_carbon_share_ug) / sci['R'], 'TOTAL', None, None, f"ugCO2e/{sci['R_d']}")) + csv_buffer.seek(0) # Reset buffer position to the beginning DB().copy_from(
${value}${label}${escapeString(value)}${escapeString(label)}${run_link_node}${dateToYMD(new Date(created_at))}${short_hash}${cpu}${duration} seconds${dateToYMD(new Date(created_at))}${escapeString(short_hash)}${escapeString(cpu)}${escapeString(duration)} seconds
Repository:${repo_link_node}
Branch:${url_params.get('branch')}
Workflow:${url_params.get('workflow')}
Branch:${escapeString(url_params.get('branch'))}
Workflow:${escapeString(url_params.get('workflow'))}