From 076146eb1545d86136273b554a59045d731a6558 Mon Sep 17 00:00:00 2001 From: Ariel Date: Fri, 21 Sep 2018 16:43:15 -0400 Subject: [PATCH 1/4] Modify stats endpoint to include all data we need on the dashboard --- api/data_refinery_api/views.py | 110 ++++++++++++++++++++++++++------- 1 file changed, 89 insertions(+), 21 deletions(-) diff --git a/api/data_refinery_api/views.py b/api/data_refinery_api/views.py index 6af50f3ac..0090f105b 100644 --- a/api/data_refinery_api/views.py +++ b/api/data_refinery_api/views.py @@ -57,6 +57,9 @@ APITokenSerializer ) +import datetime as dt +from django.utils import timezone + ## # Custom Views ## @@ -606,34 +609,99 @@ def get(self, request, format=None): class Stats(APIView): """ Statistics about the health of the system. + + ?range=week includes statics for the last week """ def get(self, request, format=None): + range_param = request.query_params.dict().pop('range', None) + data = {} - data['survey_jobs'] = {} - data['survey_jobs']['total'] = SurveyJob.objects.count() - data['survey_jobs']['pending'] = SurveyJob.objects.filter(start_time__isnull=True).count() - data['survey_jobs']['completed'] = SurveyJob.objects.filter(end_time__isnull=False).count() - data['survey_jobs']['open'] = SurveyJob.objects.filter(start_time__isnull=False, end_time__isnull=True).count() - # via https://stackoverflow.com/questions/32520655/get-average-of-difference-of-datetime-fields-in-django - data['survey_jobs']['average_time'] = SurveyJob.objects.filter(start_time__isnull=False, end_time__isnull=False).aggregate(average_time=Avg(F('end_time') - F('start_time')))['average_time'] - - data['downloader_jobs'] = {} - data['downloader_jobs']['total'] = DownloaderJob.objects.count() - data['downloader_jobs']['pending'] = DownloaderJob.objects.filter(start_time__isnull=True).count() - data['downloader_jobs']['completed'] = DownloaderJob.objects.filter(end_time__isnull=False).count() - data['downloader_jobs']['open'] = DownloaderJob.objects.filter(start_time__isnull=False, end_time__isnull=True).count() - data['downloader_jobs']['average_time'] = DownloaderJob.objects.filter(start_time__isnull=False, end_time__isnull=False).aggregate(average_time=Avg(F('end_time') - F('start_time')))['average_time'] - - data['processor_jobs'] = {} - data['processor_jobs']['total'] = ProcessorJob.objects.count() - data['processor_jobs']['pending'] = ProcessorJob.objects.filter(start_time__isnull=True).count() - data['processor_jobs']['completed'] = ProcessorJob.objects.filter(end_time__isnull=False).count() - data['processor_jobs']['open'] = ProcessorJob.objects.filter(start_time__isnull=False, end_time__isnull=True).count() - data['processor_jobs']['average_time'] = ProcessorJob.objects.filter(start_time__isnull=False, end_time__isnull=False).aggregate(average_time=Avg(F('end_time') - F('start_time')))['average_time'] + data['survey_jobs'] = self._get_job_stats(SurveyJob.objects, range_param) + data['downloader_jobs'] = self._get_job_stats(DownloaderJob.objects, range_param) + data['processor_jobs'] = self._get_job_stats(ProcessorJob.objects, range_param) + data['samples'] = self._get_object_stats(Sample.objects, range_param) + data['experiments'] = self._get_object_stats(Experiment.objects, range_param) return Response(data) + def _get_job_stats(self, jobs, range_param): + result = { + 'total': jobs.count(), + 'pending': jobs.filter(start_time__isnull=True).count(), + 'completed': jobs.filter(end_time__isnull=False).count(), + 'open': jobs.filter(start_time__isnull=False, end_time__isnull=True).count(), + # via https://stackoverflow.com/questions/32520655/get-average-of-difference-of-datetime-fields-in-django + 'average_time': jobs.filter(start_time__isnull=False, end_time__isnull=False).aggregate( + average_time=Avg(F('end_time') - F('start_time')))['average_time'] + } + + if result['average_time'] is None: + result['average_time'] = 0 + else: + result['average_time'] = result['average_time'].total_seconds() + + if range_param is not None: + result['timeline'] = self._jobs_timeline(jobs, range_param) + + return result + + def _get_object_stats(self, objects, range_param): + result = { + 'total': objects.count() + } + + if range_param is not None: + result['timeline'] = self._created_timeline(Sample.objects, range_param) + + return result + + interval_timedelta = { + 'day': dt.timedelta(days=1), + 'week': dt.timedelta(weeks=1), + 'month': dt.timedelta(weeks=4), + 'year': dt.timedelta(weeks=52) + } + + interval_timestep = { + 'day': dt.timedelta(hours=1), + 'week': dt.timedelta(days=1), + 'month': dt.timedelta(days=2), + 'year': dt.timedelta(weeks=4) + } + + def _get_time_intervals(self, range_param): + current_date = dt.datetime.now(tz=timezone.utc) + time_step = self.interval_timestep.get(range_param) + start_date = current_date - self.interval_timedelta.get(range_param) + + intervals = [(current_date - time_step*(i+1), current_date - time_step*i) + for i in range(100) if current_date - time_step*(i+1) > start_date] + return intervals[::-1] + + def _get_job_interval(self, jobs, start, end): + filtered_jobs = jobs.filter(created_at__gte=start, created_at__lte=end) + pending = filtered_jobs and jobs.filter(start_time__isnull=True) + failed = filtered_jobs and jobs.filter(success=False) + completed = filtered_jobs and jobs.filter(success=True) + open = filtered_jobs and jobs.filter(success__isnull=True) + + return { + 'start': start, + 'end': end, + 'total': filtered_jobs.count(), + 'completed': completed.count(), + 'pending': pending.count(), + 'failed': failed.count(), + 'open': open.count() + } + + def _jobs_timeline(self, jobs, range_param): + return [self._get_job_interval(jobs, start, end) for (start, end) in self._get_time_intervals(range_param)] + + def _created_timeline(self, objects, range_param): + return [({'start': start, 'end': end, 'total': objects.filter(created_at__gte=start, created_at__lte=end).count()}) for (start, end) in self._get_time_intervals(range_param)] + ### # Transcriptome Indices ### From cb505ebbd728270ef36990aa6f2ae194bced749a Mon Sep 17 00:00:00 2001 From: Ariel Date: Thu, 27 Sep 2018 15:27:12 -0400 Subject: [PATCH 2/4] Address PR comments --- api/data_refinery_api/views.py | 47 ++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/api/data_refinery_api/views.py b/api/data_refinery_api/views.py index 0090f105b..6df2e9aa6 100644 --- a/api/data_refinery_api/views.py +++ b/api/data_refinery_api/views.py @@ -57,7 +57,7 @@ APITokenSerializer ) -import datetime as dt +from datetime import timedelta, datetime from django.utils import timezone ## @@ -636,12 +636,12 @@ def _get_job_stats(self, jobs, range_param): average_time=Avg(F('end_time') - F('start_time')))['average_time'] } - if result['average_time'] is None: + if not result['average_time']: result['average_time'] = 0 else: result['average_time'] = result['average_time'].total_seconds() - if range_param is not None: + if range_param: result['timeline'] = self._jobs_timeline(jobs, range_param) return result @@ -651,29 +651,28 @@ def _get_object_stats(self, objects, range_param): 'total': objects.count() } - if range_param is not None: + if range_param: result['timeline'] = self._created_timeline(Sample.objects, range_param) return result - interval_timedelta = { - 'day': dt.timedelta(days=1), - 'week': dt.timedelta(weeks=1), - 'month': dt.timedelta(weeks=4), - 'year': dt.timedelta(weeks=52) - } - - interval_timestep = { - 'day': dt.timedelta(hours=1), - 'week': dt.timedelta(days=1), - 'month': dt.timedelta(days=2), - 'year': dt.timedelta(weeks=4) - } - def _get_time_intervals(self, range_param): - current_date = dt.datetime.now(tz=timezone.utc) - time_step = self.interval_timestep.get(range_param) - start_date = current_date - self.interval_timedelta.get(range_param) + interval_timedelta = { + 'day': timedelta(days=1), + 'week': timedelta(weeks=1), + 'month': timedelta(weeks=4), + 'year': timedelta(weeks=52) + } + interval_timestep = { + 'day': timedelta(hours=1), + 'week': timedelta(days=1), + 'month': timedelta(days=2), + 'year': timedelta(weeks=4) + } + + current_date = datetime.now(tz=timezone.utc) + time_step = interval_timestep.get(range_param) + start_date = current_date - interval_timedelta.get(range_param) intervals = [(current_date - time_step*(i+1), current_date - time_step*i) for i in range(100) if current_date - time_step*(i+1) > start_date] @@ -700,7 +699,11 @@ def _jobs_timeline(self, jobs, range_param): return [self._get_job_interval(jobs, start, end) for (start, end) in self._get_time_intervals(range_param)] def _created_timeline(self, objects, range_param): - return [({'start': start, 'end': end, 'total': objects.filter(created_at__gte=start, created_at__lte=end).count()}) for (start, end) in self._get_time_intervals(range_param)] + return [({ + 'start': start, + 'end': end, + 'total': objects.filter(created_at__gte=start, created_at__lte=end).count() + }) for (start, end) in self._get_time_intervals(range_param)] ### # Transcriptome Indices From 756688eb5f1f5093e443a99aee620a358681bfba Mon Sep 17 00:00:00 2001 From: Ariel Date: Thu, 27 Sep 2018 15:36:03 -0400 Subject: [PATCH 3/4] simplify created_timeline method --- api/data_refinery_api/views.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/api/data_refinery_api/views.py b/api/data_refinery_api/views.py index 6df2e9aa6..d5d6623e4 100644 --- a/api/data_refinery_api/views.py +++ b/api/data_refinery_api/views.py @@ -699,11 +699,16 @@ def _jobs_timeline(self, jobs, range_param): return [self._get_job_interval(jobs, start, end) for (start, end) in self._get_time_intervals(range_param)] def _created_timeline(self, objects, range_param): - return [({ - 'start': start, - 'end': end, - 'total': objects.filter(created_at__gte=start, created_at__lte=end).count() - }) for (start, end) in self._get_time_intervals(range_param)] + results = [] + for start, end in self._get_time_intervals(range_param): + total = objects.filter(created_at__gte=start, created_at__lte=end).count() + stats = { + 'start': start, + 'end': end, + 'total': total + } + results.append(stats) + return results ### # Transcriptome Indices From 1ac75d43952b574a49ba5051b3f301db133aaca6 Mon Sep 17 00:00:00 2001 From: Kurt Wheeler Date: Mon, 1 Oct 2018 11:34:12 -0400 Subject: [PATCH 4/4] Increases nomad_server_instance_type from m5.xlarge to m5.2xlarge. --- infrastructure/environments/prod.tfvars | Bin 699 -> 700 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/infrastructure/environments/prod.tfvars b/infrastructure/environments/prod.tfvars index 9fd640e60302da15802736dfc3d13bd185e919ac..622f1e2dce5849e111d9889ab999bef53388937a 100644 GIT binary patch literal 700 zcmV;t0z>@(M@dveQdv+`0NS@9#^FNKfYZQ!+|EGU4tav)@r<;YW?PHe)lxb{m-{3P z-KDiik$`;!jj%q`sq@H$!$_ZX=KHz@{F!$XRxt6e4bc5ZbrVi%i~e814`Mu9cooU8t?$V3eb)+#JtS zq|z!GKhqodU=!Gf=v>tLFPp@j)T(KKVXC>XM^y)}^j86n+l* zcemh$;PkA2#S5xu($IgEls)QVe6MG#Rer8KXs}9#$5H030etkF{04~?c;`BTah*f2 zo%geM{MCjN?U#w-a|R)jBi`vMhL4=xOx{`bZ23+aE4nmByaSyw#%TUXJ%vYW6WKf` zWogq)&MYc&pUqdPE|1DL2o&grAYEs-qOS<^lxB47B@FR^TG*HUP7(8ndr)>{+W7vw zJ`ZAL5WTz(jTir-`010_-3!Oy@fxC;5C?FO&F?vx8n8WQQgSt&=sXnrb5<;Kx%D&f zBBm|dgCnf$iZ{lH)`s`IYVOBSyW16f(WQVaCC36zW_ao`!C&Z9Lf#2@=XT@jkhr3$ ic@9^T+(|*AAn_8NtQ&b6>H9`Qg!YHPkkh3GsLhSq&1b6s literal 699 zcmV;s0z~})M@dveQdv+`0JLG+%$T=WR<JXtDay^(QImeG(G* z0WY;kBS9Ufn6uhVne2os6F!ENeC23N+^IG#?X*w7>^?S72RMW2Tr_7^u)5}d9{ry{ z3M~vXk!SzM_*X)AtXYZYI;)#!2{|7oIO#QZs`Wc4H?;WPv_>T>vuT$nau`?^@|#sq zBu`to%xaFcU0|TjfUpUQzW+EC@l%nPF<|l+qy=Nx?=y|y9b=%@3f=e{6v z_m|DyPNy!FH*VCG7MtfMm+y*Om}a;jgWYPjbxbhxM_t@73i5$9xXgqI=|o~XWY?8V zSb4A!$Gkx_kWA)Ulr8!bR^xk+85?lI@nniDYfBRvCsS9meZl%i0JZNj7ex%5C8SG5 zmTEPIO?ZpmatIf*f9WByr8Tvnx{9)&Py3Evh13(?sG;Iu@7{Miqpm6@5}Bl{qzi%7k91afMLX^)G{UbbB5Ae@-z^XS6eisQ7Stf~pD hiRd~6M7t{i`{BI+(5_C`Xb-I{tg4=yei98Wdq&q(QoH~F