Skip to content

Commit

Permalink
Fixes the massive data usage problem
Browse files Browse the repository at this point in the history
Fixes #22
  • Loading branch information
ribalba committed Jan 8, 2024
1 parent ad3f8c6 commit d08a02e
Show file tree
Hide file tree
Showing 3 changed files with 150 additions and 11 deletions.
8 changes: 4 additions & 4 deletions app/hog/hog.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -493,8 +493,8 @@
"@executable_path/../Frameworks",
);
MACOSX_DEPLOYMENT_TARGET = 13.0;
MARKETING_VERSION = 0.4;
PRODUCT_BUNDLE_IDENTIFIER = "berlin.green-coding.hog";
MARKETING_VERSION = 0.5;
PRODUCT_BUNDLE_IDENTIFIER = "io.green-coding.hog";
PRODUCT_NAME = "$(TARGET_NAME)";
SWIFT_EMIT_LOC_STRINGS = YES;
SWIFT_VERSION = 5.0;
Expand Down Expand Up @@ -529,8 +529,8 @@
"@executable_path/../Frameworks",
);
MACOSX_DEPLOYMENT_TARGET = 13.0;
MARKETING_VERSION = 0.4;
PRODUCT_BUNDLE_IDENTIFIER = "berlin.green-coding.hog";
MARKETING_VERSION = 0.5;
PRODUCT_BUNDLE_IDENTIFIER = "io.green-coding.hog";
PRODUCT_NAME = "$(TARGET_NAME)";
SWIFT_EMIT_LOC_STRINGS = YES;
SWIFT_VERSION = 5.0;
Expand Down
29 changes: 23 additions & 6 deletions app/hog/hog/DetailView.swift
Original file line number Diff line number Diff line change
Expand Up @@ -331,12 +331,17 @@ class TopProcessData: LoadingClass, Identifiable, ObservableObject, RandomAccess
let queryString: String
if self.lookBackTime == 0 {
queryString = """
SELECT name, SUM(energy_impact), AVG(cputime_per)
FROM top_processes
GROUP BY name
ORDER BY SUM(energy_impact) DESC
SELECT
name,
SUM(energy_impact) AS total_energy_impact,
AVG(cputime_per) AS average_cputime_per
FROM
top_processes
GROUP BY
name
ORDER BY
total_energy_impact DESC
LIMIT 50;
"""
} else {
queryString = """
Expand Down Expand Up @@ -413,7 +418,19 @@ class ChartData: LoadingClass, ObservableObject, RandomAccessCollection {

let queryString: String
if self.lookBackTime == 0 {
queryString = "SELECT * FROM power_measurements;"
queryString = """
SELECT
strftime('%s', date(time / 1000, 'unixepoch')) * 1000 AS day_epoch,
SUM(combined_energy),
SUM(cpu_energy),
SUM(gpu_energy),
SUM(ane_energy),
SUM(energy_impact)
FROM
power_measurements
GROUP BY
day_epoch;
"""
} else {
queryString = "SELECT * FROM power_measurements WHERE time >= ((CAST(strftime('%s', 'now') AS INTEGER) * 1000) - \(self.lookBackTime));"
}
Expand Down
124 changes: 123 additions & 1 deletion power_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ def upload_data_to_endpoint(local_stop_signal):
with urllib.request.urlopen(req, timeout=30) as response:
if response.status == 204:
for p in payload:
tc.execute('UPDATE measurements SET uploaded = ?, data = NULL WHERE id = ?;', (int(time.time()), p['row_id']))
tc.execute('DELETE FROM measurements WHERE id = ?;', (p['row_id'],))
thread_conn.commit()
upload_delta = time.time() - start_time
logging.debug(f"Uploaded. Took {upload_delta:.2f} seconds")
Expand Down Expand Up @@ -407,6 +407,123 @@ def check_DB(local_stop_signal, stime: SharedTime):
thread_conn.close()


def optimize_DB(local_stop_signal):
while not local_stop_signal.is_set():

sleeper(local_stop_signal, 3600) # We only need to optimize every hour

logging.debug("Starting DB optimization for power_measurements")

thread_conn = sqlite3.connect(DATABASE_FILE)
tc = thread_conn.cursor()

# This is for legacy systems. We just make sure that there are no values left
tc.execute('DELETE FROM measurements WHERE data IS NULL;')

one_week_ago = int(time.time() * 1000) - 7 * 24 * 60 * 60 * 1000 # Adjusted for milliseconds

aggregate_query = """
SELECT
strftime('%s', date(time / 1000, 'unixepoch')) * 1000 AS day_epoch,
SUM(combined_energy),
SUM(cpu_energy),
SUM(gpu_energy),
SUM(ane_energy),
SUM(energy_impact)
FROM
power_measurements
WHERE
time < ?
GROUP BY
day_epoch;
"""
tc.execute(aggregate_query, (one_week_ago,))
aggregated_data = tc.fetchall()

tc.execute("""
CREATE TEMPORARY TABLE temp_power_measurements (
time INT,
combined_energy INT,
cpu_energy INT,
gpu_energy INT,
ane_energy INT,
energy_impact INT
);
""")

insert_temp_query = """
INSERT INTO temp_power_measurements (time, combined_energy, cpu_energy, gpu_energy, ane_energy, energy_impact)
VALUES (?, ?, ?, ?, ?, ?);
"""
tc.executemany(insert_temp_query, aggregated_data)

delete_query = """
DELETE FROM power_measurements WHERE time < ?;
"""
tc.execute(delete_query, (one_week_ago,))

insert_back_query = """
INSERT INTO power_measurements (time, combined_energy, cpu_energy, gpu_energy, ane_energy, energy_impact)
SELECT * FROM temp_power_measurements;
"""
tc.execute(insert_back_query)

tc.execute("DROP TABLE temp_power_measurements;")

logging.debug("Starting DB optimization for top_processes")

# Do the same with processes
aggregate_query = """
SELECT
name,
SUM(energy_impact) AS total_energy_impact,
AVG(cputime_per) AS average_cputime_per
FROM
top_processes
WHERE
time < ?
GROUP BY
name;
"""
tc.execute(aggregate_query, (one_week_ago,))
aggregated_data = tc.fetchall()

tc.execute("""
CREATE TEMPORARY TABLE temp_top_processes (
name STRING,
total_energy_impact INT,
average_cputime_per INT
);
""")

insert_temp_query = """
INSERT INTO temp_top_processes (name, total_energy_impact, average_cputime_per)
VALUES (?, ?, ?);
"""
tc.executemany(insert_temp_query, aggregated_data)

tc.execute("DELETE FROM top_processes WHERE time < ?;", (one_week_ago,))

insert_back_query = """
INSERT INTO top_processes (time, name, energy_impact, cputime_per)
SELECT ?, name, total_energy_impact, average_cputime_per FROM temp_top_processes;
"""
tc.execute(insert_back_query, (one_week_ago,))

# Drop the temporary table
tc.execute("DROP TABLE temp_top_processes;")

thread_conn.commit()

# We vacuum to actually reduce the file size. We probably don't need to vacuum this often but I would rather
# do it here then have another thread.
tc.execute("VACUUM;")

This comment has been minimized.

Copy link
@ArneTR

ArneTR Jan 9, 2024

Member

I think you need to do a VACCUUM FULL: https://postgrespro.com/docs/postgresql/9.4/sql-vacuum.html

"Plain VACUUM (without FULL) simply reclaims space and makes it available for re-use. This form of the command can operate in parallel with normal reading and writing of the table, as an exclusive lock is not obtained. However, extra space is not returned to the operating system (in most cases); it's just kept available for re-use within the same table. VACUUM FULL rewrites the entire contents of the table into a new disk file with no extra space, allowing unused space to be returned to the operating system. This form is much slower and requires an exclusive lock on each table while it is being processed.

"

This comment has been minimized.

Copy link
@ribalba

This comment has been minimized.

Copy link
@ArneTR

ArneTR via email Jan 9, 2024

Member

This comment has been minimized.

Copy link
@ribalba

ribalba Jan 9, 2024

Author Member

I also looked at that. But I didn't want to Vacuum on every commit. And incremental needs a call again from the user.

When the auto-vacuum mode is 1 or "full", the freelist pages are moved to the end of the database file and the database file is truncated to remove the freelist pages at every transaction commit.

logging.debug("Ending DB optimization")

thread_conn.close()


def is_power_logger_running():
try:
subprocess.check_output(['pgrep', '-f', sys.argv[0]])
Expand Down Expand Up @@ -550,6 +667,11 @@ def get_settings(debug = False):
ticker_thread.start()
logging.debug('Ticker thread started')

db_checker_thread = threading.Thread(target=optimize_DB, args=(stop_signal,), daemon=True)
db_checker_thread.start()
logging.debug('DB optimizer thread started')


run_powermetrics(stop_signal, args.file)

c.close()

0 comments on commit d08a02e

Please sign in to comment.