Skip to content

Commit

Permalink
Merge pull request #15 from Rafsan7238/redo-tests
Browse files Browse the repository at this point in the history
Redo tests
  • Loading branch information
chillingo117 authored May 21, 2024
2 parents a89f2ea + cbdae87 commit beeebaf
Show file tree
Hide file tree
Showing 5 changed files with 105 additions and 16 deletions.
4 changes: 3 additions & 1 deletion backend/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,9 @@ def sentiment_weather_queries_endpoint():
if resource == AVG_MONTHLY_ANALYSIS:
return jsonify({'result': get_averaged_by_month(es)}), 200
if resource == UPDATING_ANALYSIS:
return jsonify({'result': get_recent_averaged_by_daily(es)}), 200
return jsonify({'result': get_recent_averaged_sentiment_by_hourly(es)}), 200
if resource == MESSAGE_COUNTS_HOURLY:
return jsonify({'result': get_recent_total_sentiment_by_hourly(es)}), 200
else:
return jsonify({'Resource in headers is not valid': resource}), 400

Expand Down
9 changes: 5 additions & 4 deletions backend/harvesters/Mastodon/mharvester.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,8 @@ def ingest(recents_only = True, max_id = None):
since_date = datetime.now().replace(tzinfo=utc) - timedelta(minutes=5)
if len(doc)>0:
val = doc[0]
since_id = val['_id']
print(val)
since_id = val['_source']['id']
else:
print('backfilling old')
since_date = datetime.now().replace(tzinfo=utc) - timedelta(days=5) # a random post on 15/05/2024
Expand All @@ -104,12 +105,12 @@ def ingest(recents_only = True, max_id = None):
oldest_doc = response['rows'][0]
print(oldest_doc)
print(f'continuing retrieval of old data from {oldest_doc[0]}')
max_id = 112475922112584533 #oldest_doc[1]
max_id = max_id if max_id else oldest_doc[1]

since_date = since_date.replace(tzinfo=utc)
done = False
while not done:
print(f'fetching toots with since_id:{since_id}, max_id:{max_id}, and up_to:{since_date}')
print(f'fetching toots with since_id:{since_id}, max_id:{max_id}, and since the date:{since_date}')
# Returns toots more recent than since_id, less recent than max_id
toots = m.timeline(timeline='public', since_id=since_id, max_id=max_id, limit=100)
to_add = []
Expand All @@ -132,7 +133,7 @@ def ingest(recents_only = True, max_id = None):

if toots and len(toots) > 0:
max_id = toots[-1]['id']
print(f'Toots had oldest date {created_at} and oldest id {max_id}')
print(f'Toots had oldest date {created_at} and largest id {max_id}')
else:
print(f'toots was empty')

Expand Down
3 changes: 2 additions & 1 deletion backend/querying/query_path_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@
FOR_STATISTICAL_ANALYSIS = 'stat_analysis'
FOR_SPATIAL_ANALYSIS = 'spatial_analysis'
AVG_MONTHLY_ANALYSIS = 'avg_monthly_analysis'
UPDATING_ANALYSIS = 'updateing_analysis'
UPDATING_ANALYSIS = 'updateing_analysis'
MESSAGE_COUNTS_HOURLY = 'message_counts_hourly'
43 changes: 40 additions & 3 deletions backend/querying/sentiment_weather_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from querying.make_query import make_query
import numpy as np

def get_recent_averaged_by_daily(es):
def get_recent_averaged_sentiment_by_hourly(es):
data_start = datetime(2024, 5, 17, tzinfo=UTC)
joined = {}

Expand All @@ -26,7 +26,7 @@ def get_recent_averaged_by_daily(es):
query = f"""
SELECT
HISTOGRAM(created_at, INTERVAL 1 HOUR) as hour,
SUM(sentiment) as avg_sentiment
SUM(sentiment) as sum_sentiment
FROM {MASTODON}
WHERE sentiment <> 0.0
GROUP BY hour
Expand All @@ -38,7 +38,44 @@ def get_recent_averaged_by_daily(es):
joined[hour][2] = avg_sent

rows = [[hour]+stats for hour, stats in joined.items()]
columns = ['hour', 'average air temp', 'total rainfall', 'average sentiment']
columns = ['hour', 'average air temp', 'total rainfall', 'sum sentiment']
return {'columns': columns, 'rows': rows }


def get_recent_total_sentiment_by_hourly(es):
data_start = datetime(2024, 5, 17, tzinfo=UTC)
joined = {}

query = f"""
SELECT
HISTOGRAM(DATETIME_PARSE(aifstime_utc,'yyyyMMddHHmmss'), INTERVAL 1 HOUR) as hour,
AVG(air_temp) as air_temp,
SUM(rain_trace) as total_rain
FROM {BOM_OBSERVATIONS}
GROUP BY hour
"""
weather_rows = make_query(es, query)['rows']
for hour, avg_temp, total_rain in weather_rows:
if datetime.strptime(hour, "%Y-%m-%dT%H:%M:%S.%fZ").replace(tzinfo=UTC) >= data_start:
joined[hour] = joined.get(hour, [avg_temp, total_rain, 0])

query = f"""
SELECT
HISTOGRAM(created_at, INTERVAL 1 HOUR) as hour,
SUM(1) as message_count
FROM {MASTODON}
WHERE sentiment <> 0.0
GROUP BY hour
"""
sentiment_rows = make_query(es, query)['rows']
for hour, avg_sent in sentiment_rows:
if datetime.strptime(hour, "%Y-%m-%dT%H:%M:%S.%fZ").replace(tzinfo=UTC) >= data_start:
if hour in joined.keys():
joined[hour][2] = avg_sent

rows = [[hour]+stats for hour, stats in joined.items()]
columns = ['hour', 'average air temp', 'total rainfall', 'message counts']
return {'columns': columns, 'rows': rows }


Expand Down
62 changes: 55 additions & 7 deletions frontend/Weather vs Sentiment Analysis.ipynb

Large diffs are not rendered by default.

0 comments on commit beeebaf

Please sign in to comment.