diff --git a/functions/crux/.gitkeep b/functions/crux/.gitkeep deleted file mode 100644 index 45adbb2..0000000 --- a/functions/crux/.gitkeep +++ /dev/null @@ -1 +0,0 @@ -.gitkeep \ No newline at end of file diff --git a/functions/crux/main.py b/functions/crux/main.py new file mode 100644 index 0000000..a24302b --- /dev/null +++ b/functions/crux/main.py @@ -0,0 +1,32 @@ +from google.cloud import bigquery +from google.cloud import storage +from google.cloud.exceptions import NotFound +from util_class import Crux +import logging +import json +import os + +logging.basicConfig(level=logging.INFO) + +storage_client = storage.Client() +bigquery_client = bigquery.Client() + +project = os.environ.get('PROJECT_ID') +dataset = os.environ.get('PROJECT_DATASET_BQ') + +crux_table = "{project}.{dataset}.crux_table".format(project=project,dataset=dataset) +crux = Crux(bigquery_client,crux_table,bigquery,storage_client) + +def main(event,context): + + if(crux.check_last_month()): + logging.info("Previously updated CRUX table.") + + else: + + if(crux.check_table_crux()): + response = crux.update_crux_table() + if(response == True): + logging.info("CRUX table updated successfully.") + else: + logging.info("CRUX table not available.") diff --git a/functions/crux/requirements.txt b/functions/crux/requirements.txt new file mode 100644 index 0000000..9577935 --- /dev/null +++ b/functions/crux/requirements.txt @@ -0,0 +1,5 @@ +# Function dependencies, for example: +# package>=version +google-cloud-bigquery>=2.20.0 +google-cloud-storage>=1.40.0 +google-cloud>=0.34.0 diff --git a/functions/crux/schema.json b/functions/crux/schema.json new file mode 100644 index 0000000..34c5c4e --- /dev/null +++ b/functions/crux/schema.json @@ -0,0 +1,232 @@ +[ + { + "field_path": "year_month", + "data_type": "STRING", + "mode": "NULLABLE" + }, + { + "field_path": "start", + "data_type": "NUMERIC", + "mode": "NULLABLE" + }, + { + "field_path": "ended", + "data_type": "NUMERIC", + "mode": "NULLABLE" + }, + { + "field_path": "form", + "data_type": "STRING", + "mode": "NULLABLE" + }, + { + "field_path": "origin", + "data_type": "STRING", + "mode": "NULLABLE" + }, + { + "field_path": "connection", + "data_type": "STRING", + "mode": "NULLABLE" + }, + { + "field_path": "fcp_density", + "data_type": "FLOAT64", + "mode": "NULLABLE" + }, + { + "field_path": "lcp_density", + "data_type": "FLOAT64", + "mode": "NULLABLE" + }, + { + "field_path": "dcl_density", + "data_type": "FLOAT64", + "mode": "NULLABLE" + }, + { + "field_path": "onload_density", + "data_type": "FLOAT64", + "mode": "NULLABLE" + }, + { + "field_path": "fp_density", + "data_type": "FLOAT64", + "mode": "NULLABLE" + }, + { + "field_path": "fid_density", + "data_type": "FLOAT64", + "mode": "NULLABLE" + }, + { + "field_path": "ttfb_density", + "data_type": "FLOAT64", + "mode": "NULLABLE" + }, + { + "field_path": "cls_density", + "data_type": "FLOAT64", + "mode": "NULLABLE" + }, + { + "field_path": "good_lcp", + "data_type": "FLOAT64", + "mode": "NULLABLE" + }, + { + "field_path": "poor_lcp", + "data_type": "FLOAT64", + "mode": "NULLABLE" + }, + { + "field_path": "average_lcp", + "data_type": "FLOAT64", + "mode": "NULLABLE" + }, + { + "field_path": "good_fp", + "data_type": "FLOAT64", + "mode": "NULLABLE" + }, + { + "field_path": "poor_fp", + "data_type": "FLOAT64", + "mode": "NULLABLE" + }, + { + "field_path": "average_fp", + "data_type": "FLOAT64", + "mode": "NULLABLE" + }, + { + "field_path": "good_onload", + "data_type": "FLOAT64", + "mode": "NULLABLE" + }, + { + "field_path": "poor_onload", + "data_type": "FLOAT64", + "mode": "NULLABLE" + }, + { + "field_path": "average_onload", + "data_type": "FLOAT64", + "mode": "NULLABLE" + }, + { + "field_path": "good_fcp", + "data_type": "FLOAT64", + "mode": "NULLABLE" + }, + { + "field_path": "poor_fcp", + "data_type": "FLOAT64", + "mode": "NULLABLE" + }, + { + "field_path": "average_fcp", + "data_type": "FLOAT64", + "mode": "NULLABLE" + }, + { + "field_path": "good_dcl", + "data_type": "FLOAT64", + "mode": "NULLABLE" + }, + { + "field_path": "poor_dcl", + "data_type": "FLOAT64", + "mode": "NULLABLE" + }, + { + "field_path": "average_dcl", + "data_type": "FLOAT64", + "mode": "NULLABLE" + }, + { + "field_path": "good_fid", + "data_type": "FLOAT64", + "mode": "NULLABLE" + }, + { + "field_path": "poor_fid", + "data_type": "FLOAT64", + "mode": "NULLABLE" + }, + { + "field_path": "average_fid", + "data_type": "FLOAT64", + "mode": "NULLABLE" + }, + { + "field_path": "good_cls", + "data_type": "FLOAT64", + "mode": "NULLABLE" + }, + { + "field_path": "poor_cls", + "data_type": "FLOAT64", + "mode": "NULLABLE" + }, + { + "field_path": "average_cls", + "data_type": "FLOAT64", + "mode": "NULLABLE" + }, + { + "field_path": "good_ttfb", + "data_type": "FLOAT64", + "mode": "NULLABLE" + }, + { + "field_path": "poor_ttfb", + "data_type": "FLOAT64", + "mode": "NULLABLE" + }, + { + "field_path": "average_ttfb", + "data_type": "FLOAT64", + "mode": "NULLABLE" + }, + { + "field_path": "phone", + "data_type": "FLOAT64", + "mode": "NULLABLE" + }, + { + "field_path": "desktop", + "data_type": "FLOAT64", + "mode": "NULLABLE" + }, + { + "field_path": "tablet", + "data_type": "FLOAT64", + "mode": "NULLABLE" + }, + { + "field_path": "fourG", + "data_type": "FLOAT64", + "mode": "NULLABLE" + }, + { + "field_path": "threeG", + "data_type": "FLOAT64", + "mode": "NULLABLE" + }, + { + "field_path": "twoG", + "data_type": "FLOAT64", + "mode": "NULLABLE" + }, + { + "field_path": "slowtwoG", + "data_type": "FLOAT64", + "mode": "NULLABLE" + }, + { + "field_path": "offline", + "data_type": "FLOAT64", + "mode": "NULLABLE" + } +] \ No newline at end of file diff --git a/functions/crux/util_class.py b/functions/crux/util_class.py new file mode 100644 index 0000000..9670659 --- /dev/null +++ b/functions/crux/util_class.py @@ -0,0 +1,98 @@ +from google.cloud.exceptions import NotFound +from util_query import crux_query +import logging +import json +import re +import os +import datetime + +class Crux: + + def __init__(self, bigquery_client,crux_table,bigquery,storage_client): + self.bigquery_client = bigquery_client + self.crux_table = crux_table + self.bigquery = bigquery + self.storage_client = storage_client + + def table_suffix(self): + + today = datetime.date.today() + first = today.replace(day=1) + lastMonth = first - datetime.timedelta(days=1) + return lastMonth.strftime("%Y%m") + + def check_rows(self): + + table = self.bigquery_client.get_table(self.crux_table) + return table.num_rows + + + def check_table_crux(self): + try: + full_table = "chrome-ux-report.all." + self.table_suffix() + query_job = self.bigquery_client.get_table(full_table) + return True + + except NotFound: + return False + + def check_last_month(self): + try: + response = self.bigquery_client.query(""" + SELECT if(count(*) > 0,true,false) as check_rows FROM `{table}` + where year_month = '{year_month}' + + """.format(year_month = self.table_suffix(), table = self.crux_table) + ) + return list(response.result())[0].values()[0] + except NotFound: + schema_json = json.load(open("schema_json.json")) + schema = [] + for obj in schema_json: + schema.append(self.bigquery.SchemaField(obj["field_path"], obj["data_type"], mode=obj["mode"])) + + table = self.bigquery.Table(self.crux_table, schema = schema) + table = self.bigquery_client.create_table(table) + logging.info("CRUX database created successfully.") + return False + + + def get_domains(self): + + bucket_gcs = os.environ.get('PROJECT_BUCKET_GCS') + bucket = self.storage_client.get_bucket(bucket_gcs) + blob = bucket.blob('config/config.json') + domains_json = json.loads(blob.download_as_string()) + domains = [] + for ind,value in enumerate(domains_json["URLS"]): + if value["page"] == "Home": + url = "'" + re.sub(r'(\/)$','',value["URL"]) + "'" + domains.append(url) + return ",".join(domains) + + + + + def update_crux_table(self): + try: + job_config = self.bigquery.QueryJobConfig() + job_config.destination = self.crux_table + job_config.write_disposition = 'WRITE_APPEND' + job_config.allow_large_results = True + + domains = self.get_domains() + table_suffix = self.table_suffix() + sql_query = crux_query(domains,table_suffix) + + rows_before_response = self.check_rows() + response = self.bigquery_client.query(sql_query,job_config = job_config) + rows_after_response = response.result().total_rows + table_loaded = rows_after_response > rows_before_response + if(table_loaded == False): + logging.error("CRUX table loading error.") + + return table_loaded + except Exception as error: + logging.error(error) + + return False diff --git a/functions/crux/util_query.py b/functions/crux/util_query.py new file mode 100644 index 0000000..645699f --- /dev/null +++ b/functions/crux/util_query.py @@ -0,0 +1,237 @@ +## Query responsible to retrieve CRUX data at bigquery public dataset. +def crux_query(domains,table_suffix): + return """ + with + +base_crux as ( + +select _TABLE_SUFFIX as year_month , * from `chrome-ux-report.all.*` +where _TABLE_SUFFIX = '{table_suffix}' +and +origin in ({domains}) + + +), + + +lcp as ( --- LCP +SELECT +year_month, +origin, +bin.start start , +bin.end ended , +effective_connection_type.name AS connection, +form_factor.name AS form, +sum(bin.density) density + + FROM base_crux left join unnest(largest_contentful_paint.histogram.bin) as bin + group by 1,2,3,4,5,6 + order by start +), +fcp as ( --- FCP +SELECT +year_month, +origin, +bin.start start , +bin.end ended , +effective_connection_type.name AS connection, +form_factor.name AS form, +sum(bin.density) density + + FROM base_crux left join unnest(first_contentful_paint.histogram.bin) as bin + group by 1,2,3,4,5,6 + order by start +), +dcl as ( --- DCL +SELECT +year_month, +origin, +bin.start start , +bin.end ended , +effective_connection_type.name AS connection, +form_factor.name AS form, +sum(bin.density) density + + FROM base_crux left join unnest(dom_content_loaded.histogram.bin) as bin + group by 1,2,3,4,5,6 + order by start +), +onload as ( --- ONLOAD +SELECT +year_month, +origin, +bin.start start , +bin.end ended, +effective_connection_type.name AS connection, +form_factor.name AS form, +sum(bin.density) density + + FROM base_crux left join unnest(onload.histogram.bin) as bin + group by 1,2,3,4,5,6 + order by start +), +fp as ( --- FP +SELECT +year_month, +origin, +bin.start start , +bin.end ended , +effective_connection_type.name AS connection, +form_factor.name AS form, +sum(bin.density) density + + FROM base_crux left join unnest(first_paint.histogram.bin) as bin + group by 1,2,3,4,5,6 + order by start +), + +fid as ( --- FID +SELECT +year_month, +origin, +bin.start start , +bin.end ended , +effective_connection_type.name AS connection, +form_factor.name AS form, +sum(bin.density) density + + FROM base_crux left join unnest(first_input.delay.histogram.bin) as bin + group by 1,2,3,4,5,6 + order by start +), + + +ttfb as ( --- TTFB +SELECT +year_month, +origin, +bin.start start , +bin.end ended , +effective_connection_type.name AS connection, +form_factor.name AS form, +sum(bin.density) density + + FROM base_crux left join unnest(experimental.time_to_first_byte.histogram.bin) as bin + group by 1,2,3,4,5,6 + order by start +), + +cls as ( --- CLS +SELECT +year_month, +origin, +bin.start start , +bin.end ended , +effective_connection_type.name AS connection, +form_factor.name AS form, +sum(bin.density) density + + FROM base_crux left join unnest(layout_instability.cumulative_layout_shift.histogram.bin) as bin + group by 1,2,3,4,5,6 + order by start +), base_final as ( + + + +select +coalesce(lcp.year_month,fcp.year_month,dcl.year_month,onload.year_month,fp.year_month,fid.year_month,ttfb.year_month,cls.year_month ) as year_month, +coalesce(lcp.start,fcp.start,dcl.start,onload.start,fp.start,fid.start,ttfb.start,cls.start) as start, +coalesce(lcp.ended,fcp.ended,dcl.ended,onload.ended,fp.ended,fid.ended,ttfb.ended,cls.ended) as ended, +coalesce(lcp.form,fcp.form,dcl.form,onload.form,fp.form,fid.form,ttfb.form,cls.form) as form, +coalesce(lcp.origin,fcp.origin,dcl.origin,onload.origin,fp.origin,fid.origin,ttfb.origin,cls.origin) as origin, +coalesce(lcp.connection,fcp.connection,dcl.connection,onload.connection,fp.connection,fid.connection,ttfb.connection,cls.connection) as connection, + + +coalesce(fcp.density,0) fcp_density, +coalesce(lcp.density,0) lcp_density, +coalesce(dcl.density,0) dcl_density, +coalesce(onload.density,0) onload_density, +coalesce(fp.density,0) fp_density, +coalesce(fid.density,0) fid_density, +coalesce(ttfb.density,0) ttfb_density, +coalesce(cls.density,0) cls_density + +from lcp +full outer join +fcp using(year_month,start,form,origin,connection,ended) +full outer join +dcl using(year_month,start,form,origin,connection,ended) +full outer join +onload using(year_month,start,form,origin,connection,ended) +full outer join +fp using(year_month,start,form,origin,connection,ended) +full outer join +fid using(year_month,start,form,origin,connection,ended) +full outer join +ttfb using(year_month,start,form,origin,connection,ended) +full outer join +cls using(year_month,start,form,origin,connection,ended) +), + thresholds as ( + +select + +[ + struct("LCP" as metric, 2500 as min , 4000 as max ), + struct("FID" as metric, 100 as min , 300 as max ), + struct("CLS" as metric, 0.10 as min , 0.25 as max ), + struct("FCP" as metric, 1500 as min , 2500 as max ), + struct("TTFB" as metric, 500 as min , 1500 as max ), + struct("FP" as metric, 1000 as min , 3000 as max ), + struct("DCL" as metric, 1500 as min , 3500 as max ), + struct("OL" as metric, 2500 as min , 6500 as max ) + +] + as limits + +) +select * except(limits), + + +if(start < (select min from unnest(limits) where metric = "LCP") ,lcp_density,0) as good_lcp, +if(start >= (select max from unnest(limits) where metric = "LCP") ,lcp_density,0) as poor_lcp, +if(start >= (select min from unnest(limits) where metric = "LCP") and start < (select max from unnest(limits) where metric = "LCP") ,lcp_density,0) as average_lcp, + +if(start < (select min from unnest(limits) where metric = "FP") , fp_density,0) as good_fp, +if(start >= (select max from unnest(limits) where metric = "FP") ,fp_density,0) as poor_fp, +if(start >= (select min from unnest(limits) where metric = "FP") and start < (select max from unnest(limits) where metric = "FP") ,fp_density,0) as average_fp, + +if(start < (select min from unnest(limits) where metric = "OL") ,onload_density,0) as good_onload, +if(start >= (select max from unnest(limits) where metric = "OL") ,onload_density,0) as poor_onload, +if(start >= (select min from unnest(limits) where metric = "OL") and start < (select max from unnest(limits) where metric = "OL") ,onload_density,0) as average_onload, + +if(start < (select min from unnest(limits) where metric = "FCP") ,fcp_density,0) as good_fcp, +if(start >= (select max from unnest(limits) where metric = "FCP") ,fcp_density,0) as poor_fcp, +if(start >= (select min from unnest(limits) where metric = "FCP") and start < (select max from unnest(limits) where metric = "FCP") ,fcp_density,0) as average_fcp, + +if(start < (select min from unnest(limits) where metric = "DCL") ,dcl_density,0) as good_dcl, +if(start >= (select max from unnest(limits) where metric = "DCL") ,dcl_density,0) as poor_dcl, +if(start >= (select min from unnest(limits) where metric = "DCL") and start < (select max from unnest(limits) where metric = "DCL") ,dcl_density,0) as average_dcl, + +if(start < (select min from unnest(limits) where metric = "FID") ,fid_density,0) as good_fid, +if(start >= (select max from unnest(limits) where metric = "FID") ,fid_density,0) as poor_fid, +if(start >= (select min from unnest(limits) where metric = "FID") and start < (select max from unnest(limits) where metric = "FID") ,fid_density,0) as average_fid, + +if(start < (select min from unnest(limits) where metric = "CLS") ,cls_density,0) as good_cls, +if(start >= (select max from unnest(limits) where metric = "CLS") ,cls_density,0) as poor_cls, +if(start >= (select min from unnest(limits) where metric = "CLS") and start < (select max from unnest(limits) where metric = "CLS") ,cls_density,0) as average_cls, + +if(start < (select min from unnest(limits) where metric = "TTFB") ,ttfb_density,0) as good_ttfb, +if(start >= (select max from unnest(limits) where metric = "TTFB") ,ttfb_density,0) as poor_ttfb, +if(start >= (select min from unnest(limits) where metric = "TTFB") and start < (select max from unnest(limits) where metric = "TTFB") ,ttfb_density,0) as average_ttfb, + +if(form like "phone",fcp_density,0) as phone, +if(form like "desktop",fcp_density,0) as desktop, +if(form like "tablet",fcp_density,0) as tablet, + +if(connection like "4G",fcp_density,0) as fourG, +if(connection like "3G",fcp_density,0) as threeG, +if(connection like "2G",fcp_density,0) as twoG, +if(connection like "Slow 2G",fcp_density,0) as slowtwoG, +if(connection like "Offline",fcp_density,0) as offline, + + +from base_final ,thresholds as t + + +""".format(domains = domains,table_suffix = table_suffix) \ No newline at end of file