Skip to content

Commit

Permalink
Merge pull request #432 from rgao/dev
Browse files Browse the repository at this point in the history
Updated Frequency Module
  • Loading branch information
sellnat77 authored Mar 23, 2020
2 parents d630c13 + aa5218d commit c88655d
Show file tree
Hide file tree
Showing 4 changed files with 251 additions and 130 deletions.
15 changes: 8 additions & 7 deletions server/src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from multiprocessing import cpu_count

from services.time_to_close import time_to_close
from services.frequency import frequency
from services.frequency import FrequencyService
from services.pinService import PinService
from services.requestCountsService import RequestCountsService
from services.requestDetailService import RequestDetailService
Expand Down Expand Up @@ -68,16 +68,17 @@ async def timetoclose(request):
return json(data)


@app.route('/requestfrequency')
@app.route('/requestfrequency', methods=["POST"])
@compress.compress()
async def requestfrequency(request):
freq_worker = frequency(app.config['Settings'])
freq_worker = FrequencyService(app.config['Settings'])

data = freq_worker.freq_view_data(service=True,
councils=[],
aggregate=True)
return_data = await freq_worker.get_frequency(startDate='2019-01-01',
endDate='2020-12-31',
ncList=['SHERMAN OAKS NC'],
requestTypes=['Other'])

return json(data)
return json(return_data)


@app.route('/sample-data')
Expand Down
293 changes: 173 additions & 120 deletions server/src/services/frequency.py
Original file line number Diff line number Diff line change
@@ -1,133 +1,186 @@
from configparser import ConfigParser
import sqlalchemy as db
from .dataService import DataService
import pandas as pd
import json


class frequency(object):
class FrequencyService(object):
def __init__(self, config=None, tableName="ingest_staging_table"):
self.config = config
self.dbString = None if not self.config \
else self.config['Database']['DB_CONNECTION_STRING']
self.dataAccess = DataService(config, tableName)

self.table = tableName
self.data = None
pass
async def get_frequency(self,
startDate=None,
endDate=None,
ncList=[],
requestTypes=[],
window='month'):

def freq_view_all(self, serviced=False, aggregate=True):
"""
Returns the request type and associated dates for all data
Sorted by request type, followed by created date,
service date (if applicable), and then closed date
{
"lastPulled": "NOW",
"data": [{
"bucketStartDates": ["2015-01-01", "2015-01-04",
"2015-01-07", "2015-01-10",
"2015-01-13", "2015-01-16"],
"requestTypes": [{
"type": "Homeless Encampment",
"numRequests": [200, 250, 12, 143, 200, 250]
}, {
"type": "Bulky Items",
"numRequests": [2, 25, 682, 333, 444, 666]
}]
}]
}
"""
# Todo: implement condition for serviced date
engine = db.create_engine(self.dbString)

if serviced:
query = "SELECT \
requesttype,\
createddate,\
closeddate,\
servicedate\
FROM %s" % self.table
else:
query = "SELECT \
requesttype,\
createddate,\
closeddate\
FROM %s" % self.table

df = pd.read_sql_query(query, con=engine)

if serviced:
df['servicedate'] = pd.to_datetime(df['servicedate'])

df['closeddate'] = pd.to_datetime(df['closeddate'])
df = df.sort_values(by=['requesttype', 'createddate', 'closeddate'])

return df.to_json(orient="records")
filters = self.dataAccess.standardFilters(
startDate, endDate, ncList, requestTypes)

def freq_aggregate(self, df):
request_counts = df['requesttype'].value_counts()
fields = ['createddate', 'requesttype']

return request_counts.to_json()

def freq_view_data(self,
service=False,
aggregate=True,
councils=[],
startdate="",
enddate=""):
"""
Returns the request type, neighborhood council, created and
closed dates for all data sorted by request type, followed by
neighborhood council #, then created date, and then closed date
Returns serviced date as well if service is set to True
Returns data for all councils if councils=[], otherwise returns data
for only the array of neighborhood council #s
Returns summary data as well if aggregate is set to True
Returns only entries created between startdate and enddate if values
are set for those parameters
Format of startdate and enddate should be a string in
the form 2019-12-01 23:02:05
"""
engine = db.create_engine(self.dbString)

if service:
df = pd.read_sql_query("SELECT\
requesttype,\
createddate,\
closeddate,\
servicedate,\
nc,\
ncname\
FROM %s" % self.table, con=engine)
df['servicedate'] = pd.to_datetime(df['servicedate'])
filteredData = self.dataAccess.query(fields, filters)
df = pd.DataFrame(data=filteredData['data'])

if window == 'month':
numBins = 10
else:
df = pd.read_sql_query("SELECT\
requesttype,\
createddate,\
closeddate,\
nc,\
ncname\
FROM %s" % self.table, con=engine)

df['closeddate'] = pd.to_datetime(df['closeddate'])

if councils != []:
df = df[df.nc.isin(councils)]

if startdate != "":
start = pd.to_datetime(startdate)
df = df[(df['createddate'] >= start)]

if enddate != "":
end = pd.to_datetime(enddate)
df = df[df['createddate'] <= end]

df = df.sort_values(by=['requesttype',
'nc',
'createddate',
'closeddate'])
df_json = json.loads(df.to_json(orient="records"))

if aggregate:
summary = self.freq_aggregate(df)
json_data = []
json_data.append(json.loads(summary))
json_data.append(df_json)
return json_data

return df_json

# Todo: filter by NC at the sql request stage instead of afterwards


if __name__ == "__main__":
freq = frequency()
config = ConfigParser()
config.read("../setting.cfg")
freq.config = config
freq.dbString = config['Database']['DB_CONNECTION_STRING']
freq.freq_view_data(service=True, aggregate=True)
numBins = 12

df['buckets'] = pd.qcut(df['createddate'], q=numBins, precision=0)
bucketStartDates = [str(df['buckets'].unique()[i].left)
for i in range(numBins)]

return [{
'bucketStartDates': bucketStartDates,
'requestTypes': [{
'type': request,
'numRequests': df['buckets'][df['requesttype'] == request]
.value_counts(sort=False).values.tolist()
} for request in requestTypes]
}]

# Following is deprecated, saving for reference

# class frequency(object):
# def __init__(self, config=None, tableName="ingest_staging_table"):
# self.config = config
# self.dbString = None if not self.config \
# else self.config['Database']['DB_CONNECTION_STRING']

# self.table = tableName
# self.data = None
# pass

# Following code are deprecated, saving in the meantime for reference

# def freq_view_all(self, serviced=False, aggregate=True):
# """
# Returns the request type and associated dates for all data
# Sorted by request type, followed by created date,
# service date (if applicable), and then closed date
# """
# engine = db.create_engine(self.dbString)

# if serviced:
# query = "SELECT \
# requesttype,\
# createddate,\
# closeddate,\
# servicedate\
# FROM %s" % self.table
# else:
# query = "SELECT \
# requesttype,\
# createddate,\
# closeddate\
# FROM %s" % self.table

# df = pd.read_sql_query(query, con=engine)

# if serviced:
# df['servicedate'] = pd.to_datetime(df['servicedate'])

# df['closeddate'] = pd.to_datetime(df['closeddate'])
# df = df.sort_values(by=['requesttype', 'createddate', 'closeddate'])

# return df.to_json(orient="records")

# def freq_aggregate(self, df):
# request_counts = df['requesttype'].value_counts()

# return request_counts.to_json()

# def freq_view_data(self,
# service=False,
# aggregate=True,
# councils=[],
# startdate="",
# enddate=""):
# """
# Returns the request type, neighborhood council, created and
# closed dates for all data sorted by request type, followed by
# neighborhood council #, then created date, and then closed date
# Returns serviced date as well if service is set to True
# Returns data for all councils if councils=[], otherwise returns data
# for only the array of neighborhood council #s
# Returns summary data as well if aggregate is set to True
# Returns only entries created between startdate and enddate if values
# are set for those parameters
# Format of startdate and enddate should be a string in
# the form 2019-12-01 23:02:05
# """
# engine = db.create_engine(self.dbString)

# if service:
# df = pd.read_sql_query("SELECT\
# requesttype,\
# createddate,\
# closeddate,\
# servicedate,\
# nc,\
# ncname\
# FROM %s" % self.table, con=engine)
# df['servicedate'] = pd.to_datetime(df['servicedate'])

# else:
# df = pd.read_sql_query("SELECT\
# requesttype,\
# createddate,\
# closeddate,\
# nc,\
# ncname\
# FROM %s" % self.table, con=engine)

# df['closeddate'] = pd.to_datetime(df['closeddate'])

# if councils != []:
# df = df[df.nc.isin(councils)]

# if startdate != "":
# start = pd.to_datetime(startdate)
# df = df[(df['createddate'] >= start)]

# if enddate != "":
# end = pd.to_datetime(enddate)
# df = df[df['createddate'] <= end]

# df = df.sort_values(by=['requesttype',
# 'nc',
# 'createddate',
# 'closeddate'])
# df_json = json.loads(df.to_json(orient="records"))

# if aggregate:
# summary = self.freq_aggregate(df)
# json_data = []
# json_data.append(json.loads(summary))
# json_data.append(df_json)
# return json_data

# return df_json

# if __name__ == "__main__":
# freq = frequency()
# config = ConfigParser()
# config.read("../setting.cfg")
# freq.config = config
# freq.dbString = config['Database']['DB_CONNECTION_STRING']
# freq.freq_view_data(service=True, aggregate=True)
Loading

0 comments on commit c88655d

Please sign in to comment.