From ceae5a5687564d1b43415a8ddeddd4ce3548885b Mon Sep 17 00:00:00 2001 From: Sam Doran Date: Wed, 8 May 2024 10:16:33 -0400 Subject: [PATCH 01/11] AWS network costs - Add data transfer direction to daily summary - Separate node network costs out into network unattributed --- ...rting_ocpawscostlineitem_daily_summary.sql | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/koku/masu/database/trino_sql/reporting_ocpawscostlineitem_daily_summary.sql b/koku/masu/database/trino_sql/reporting_ocpawscostlineitem_daily_summary.sql index c1edfc07b6..8fa14e97c2 100644 --- a/koku/masu/database/trino_sql/reporting_ocpawscostlineitem_daily_summary.sql +++ b/koku/masu/database/trino_sql/reporting_ocpawscostlineitem_daily_summary.sql @@ -174,6 +174,7 @@ INSERT INTO hive.{{schema | sqlsafe}}.aws_openshift_daily_resource_matched_temp region, unit, usage_amount, + data_transfer_direction, currency_code, unblended_cost, blended_cost, @@ -200,6 +201,19 @@ SELECT cast(uuid() as varchar) as uuid, nullif(aws.product_region, '') as region, max(nullif(aws.pricing_unit, '')) as unit, sum(aws.lineitem_usageamount) as usage_amount, + -- Determine network direction + CASE + -- Is this a network record? + WHEN aws.lineitem_productcode = 'AmazonEC2' AND aws.product_productfamily = 'Data Transfer' THEN + -- Yes, it's a network. What's the direction? + CASE + WHEN strpos(aws.lineitem_usagetype, 'In-Bytes') > 0 THEN 'IN' + WHEN strpos(aws.lineitem_usagetype, 'Out-Bytes') > 0 THEN 'OUT' + WHEN (strpos(aws.lineitem_usagetype, 'Regional-Bytes') > 0 AND strpos(lineitem_operation, '-In') > 0) THEN 'IN' + WHEN (strpos(aws.lineitem_usagetype, 'Regional-Bytes') > 0 AND strpos(lineitem_operation, '-Out') > 0)THEN 'OUT' + ELSE NULL + END + END as data_transfer_direction, max(nullif(aws.lineitem_currencycode, '')) as currency_code, sum(aws.lineitem_unblendedcost) as unblended_cost, sum(aws.lineitem_blendedcost) as blended_cost, @@ -448,6 +462,8 @@ SELECT aws.uuid as aws_uuid, AND aws.ocp_source = {{ocp_source_uuid}} AND aws.year = {{year}} AND aws.month = {{month}} + -- Filter out Node Networks Costs since they cannot be attributed to a namespace and are account for later + AND aws.data_transfer_direction IS NULL GROUP BY aws.uuid, ocp.namespace, ocp.pod_labels ; @@ -604,6 +620,7 @@ INSERT INTO hive.{{schema | sqlsafe}}.reporting_ocpawscostlineitem_project_daily region, unit, usage_amount, + data_transfer_direction, currency_code, unblended_cost, markup_cost, @@ -652,6 +669,7 @@ SELECT pds.aws_uuid, region, unit, usage_amount / aws_uuid_count as usage_amount, + NULL AS data_transfer_direction, currency_code, CASE WHEN resource_id_matched = TRUE AND data_source = 'Pod' THEN ({{pod_column | sqlsafe}} / nullif({{node_column | sqlsafe}}, 0)) * unblended_cost @@ -721,6 +739,15 @@ LEFT JOIN postgres.{{schema | sqlsafe}}.reporting_awsaccountalias AS aa WHERE pds.ocp_source = {{ocp_source_uuid}} AND year = {{year}} AND month = {{month}} ; +-- Put Node Network Costs back in +INSERT INTO hive.{{schema | sqlsafe}}.reporting_ocpawscostlineitem_project_daily_summary ( + +) +FROM hive.{{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary as ocp +JOIN hive.{{schema | sqlsafe}}.aws_openshift_daily_resource_matched_temp as aws + ON aws.usage_start = ocp.usage_start + AND strpos(aws.resource_id, ocp.resource_id) != 0 + INSERT INTO postgres.{{schema | sqlsafe}}.reporting_ocpawscostlineitem_project_daily_summary_p ( uuid, report_period_id, From 182501f5a15fc9933994ceec5290c28a3edc18cd Mon Sep 17 00:00:00 2001 From: Sam Doran Date: Mon, 13 May 2024 18:31:56 -0400 Subject: [PATCH 02/11] Separate node network costs into a Network unattributed project --- ...rting_ocpawscostlineitem_daily_summary.sql | 123 ++++++++++++++++-- 1 file changed, 110 insertions(+), 13 deletions(-) diff --git a/koku/masu/database/trino_sql/reporting_ocpawscostlineitem_daily_summary.sql b/koku/masu/database/trino_sql/reporting_ocpawscostlineitem_daily_summary.sql index 8fa14e97c2..12a7fcf8e3 100644 --- a/koku/masu/database/trino_sql/reporting_ocpawscostlineitem_daily_summary.sql +++ b/koku/masu/database/trino_sql/reporting_ocpawscostlineitem_daily_summary.sql @@ -204,16 +204,16 @@ SELECT cast(uuid() as varchar) as uuid, -- Determine network direction CASE -- Is this a network record? - WHEN aws.lineitem_productcode = 'AmazonEC2' AND aws.product_productfamily = 'Data Transfer' THEN - -- Yes, it's a network. What's the direction? + WHEN max(aws.lineitem_productcode) = 'AmazonEC2' AND max(aws.product_productfamily) = 'Data Transfer' THEN + -- Yes, it's a network record. What's the direction? CASE - WHEN strpos(aws.lineitem_usagetype, 'In-Bytes') > 0 THEN 'IN' - WHEN strpos(aws.lineitem_usagetype, 'Out-Bytes') > 0 THEN 'OUT' - WHEN (strpos(aws.lineitem_usagetype, 'Regional-Bytes') > 0 AND strpos(lineitem_operation, '-In') > 0) THEN 'IN' - WHEN (strpos(aws.lineitem_usagetype, 'Regional-Bytes') > 0 AND strpos(lineitem_operation, '-Out') > 0)THEN 'OUT' + WHEN position(max(aws.lineitem_usagetype) IN 'In-Bytes') > 0 THEN 'IN' + WHEN position(max(aws.lineitem_usagetype) IN 'Out-Bytes') > 0 THEN 'OUT' + WHEN (position(max(aws.lineitem_usagetype) IN 'Regional-Bytes') > 0 AND position(max(lineitem_operation) IN '-In') > 0) THEN 'IN' + WHEN (position(max(aws.lineitem_usagetype) IN 'Regional-Bytes') > 0 AND position(max(lineitem_operation) IN '-Out') > 0) THEN 'OUT' ELSE NULL END - END as data_transfer_direction, + END AS data_transfer_direction, max(nullif(aws.lineitem_currencycode, '')) as currency_code, sum(aws.lineitem_unblendedcost) as unblended_cost, sum(aws.lineitem_blendedcost) as blended_cost, @@ -462,7 +462,7 @@ SELECT aws.uuid as aws_uuid, AND aws.ocp_source = {{ocp_source_uuid}} AND aws.year = {{year}} AND aws.month = {{month}} - -- Filter out Node Networks Costs since they cannot be attributed to a namespace and are account for later + -- Filter out Node Network Costs since they cannot be attributed to a namespace and are accounted for later AND aws.data_transfer_direction IS NULL GROUP BY aws.uuid, ocp.namespace, ocp.pod_labels ; @@ -739,14 +739,111 @@ LEFT JOIN postgres.{{schema | sqlsafe}}.reporting_awsaccountalias AS aa WHERE pds.ocp_source = {{ocp_source_uuid}} AND year = {{year}} AND month = {{month}} ; --- Put Node Network Costs back in +-- Put Node Network Costs into the Network unattributed namespace INSERT INTO hive.{{schema | sqlsafe}}.reporting_ocpawscostlineitem_project_daily_summary ( - + aws_uuid, + cluster_id, + cluster_alias, + data_source, + namespace, + node, + persistentvolumeclaim, + persistentvolume, + storageclass, + resource_id, + usage_start, + usage_end, + product_code, + product_family, + instance_type, + usage_account_id, + availability_zone, + region, + unit, + usage_amount, + data_transfer_direction, + currency_code, + unblended_cost, + markup_cost, + blended_cost, + markup_cost_blended, + savingsplan_effective_cost, + markup_cost_savingsplan, + calculated_amortized_cost, + markup_cost_amortized, + pod_cost, + project_markup_cost, + pod_labels, + tags, + aws_cost_category, + cost_category_id, + aws_source, + ocp_source, + year, + month, + day ) -FROM hive.{{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary as ocp -JOIN hive.{{schema | sqlsafe}}.aws_openshift_daily_resource_matched_temp as aws +SELECT + aws.uuid AS aws_uuid, + max(cluster_id), + max(cluster_alias), + max(data_source), + 'Network unattributed' AS namespace, + max(node), + max(persistentvolumeclaim), + max(persistentvolume), + max(storageclass), + max(aws.resource_id), + max(aws.usage_start), + max(usage_end), + max(product_code), + max(product_family), + max(instance_type), + max(usage_account_id), + max(availability_zone), + max(region), + max(unit), + max(usage_amount), + data_transfer_direction, + max(currency_code), + max(unblended_cost), + max(unblended_cost) * cast({{markup}} AS decimal(24,9)), + max(blended_cost), + max(blended_cost) * cast({{markup}} AS decimal(24,9)), + max(savingsplan_effective_cost), + max(savingsplan_effective_cost) * cast({{markup}} AS decimal(24,9)), + max(calculated_amortized_cost), + max(calculated_amortized_cost) * cast({{markup}} AS decimal(33,9)), + max(unblended_cost) AS pod_cost, + max(unblended_cost) * cast({{markup}} AS decimal(24,9)) AS project_markup_cost, + max(ocp.pod_labels), + cast(NULL AS varchar) AS tags, + cast(NULL AS varchar) AS aws_cost_category, + max(cost_category_id), + max({{aws_source_uuid}}) AS aws_source, + max({{ocp_source_uuid}}) AS ocp_source, + max(cast(year(aws.usage_start) AS varchar)) AS year, + max(cast(month(aws.usage_start) AS varchar)) AS month, + max(cast(day(aws.usage_start) AS varchar)) AS day +FROM hive.{{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary AS ocp +JOIN hive.{{schema | sqlsafe}}.aws_openshift_daily_resource_matched_temp AS aws ON aws.usage_start = ocp.usage_start - AND strpos(aws.resource_id, ocp.resource_id) != 0 + AND position(ocp.resource_id IN aws.resource_id) != 0 +WHERE ocp.source = {{ocp_source_uuid}} + AND ocp.year = {{year}} + AND lpad(ocp.month, 2, '0') = {{month}} -- Zero pad the month when fewer than 2 characters + AND ocp.day IN {{days | inclause}} + AND (ocp.resource_id IS NOT NULL AND ocp.resource_id != '') + AND aws.ocp_source = {{ocp_source_uuid}} + AND aws.year = {{year}} + AND aws.month = {{month}} + -- Network related costs + AND aws.data_transfer_direction IS NOT NULL +GROUP BY + aws.uuid, + ocp.node, + aws.data_transfer_direction +; INSERT INTO postgres.{{schema | sqlsafe}}.reporting_ocpawscostlineitem_project_daily_summary_p ( uuid, From c3cc6c6d231387bf3f058648717d071760c387ff Mon Sep 17 00:00:00 2001 From: Sam Doran Date: Mon, 13 May 2024 18:49:14 -0400 Subject: [PATCH 03/11] Lower case string for comparison --- .../reporting_ocpawscostlineitem_daily_summary.sql | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/koku/masu/database/trino_sql/reporting_ocpawscostlineitem_daily_summary.sql b/koku/masu/database/trino_sql/reporting_ocpawscostlineitem_daily_summary.sql index 12a7fcf8e3..dde29d36a7 100644 --- a/koku/masu/database/trino_sql/reporting_ocpawscostlineitem_daily_summary.sql +++ b/koku/masu/database/trino_sql/reporting_ocpawscostlineitem_daily_summary.sql @@ -207,10 +207,10 @@ SELECT cast(uuid() as varchar) as uuid, WHEN max(aws.lineitem_productcode) = 'AmazonEC2' AND max(aws.product_productfamily) = 'Data Transfer' THEN -- Yes, it's a network record. What's the direction? CASE - WHEN position(max(aws.lineitem_usagetype) IN 'In-Bytes') > 0 THEN 'IN' - WHEN position(max(aws.lineitem_usagetype) IN 'Out-Bytes') > 0 THEN 'OUT' - WHEN (position(max(aws.lineitem_usagetype) IN 'Regional-Bytes') > 0 AND position(max(lineitem_operation) IN '-In') > 0) THEN 'IN' - WHEN (position(max(aws.lineitem_usagetype) IN 'Regional-Bytes') > 0 AND position(max(lineitem_operation) IN '-Out') > 0) THEN 'OUT' + WHEN position(lower(max(aws.lineitem_usagetype)) IN 'in-bytes') > 0 THEN 'IN' + WHEN position(lower(max(aws.lineitem_usagetype)) IN 'out-bytes') > 0 THEN 'OUT' + WHEN (position(lower(max(aws.lineitem_usagetype)) IN 'regional-bytes') > 0 AND position(max(lineitem_operation) IN '-In') > 0) THEN 'IN' + WHEN (position(lower(max(aws.lineitem_usagetype)) IN 'regional-bytes') > 0 AND position(max(lineitem_operation) IN '-Out') > 0) THEN 'OUT' ELSE NULL END END AS data_transfer_direction, From 1cc6a8f92720aa1ec5ded369c19dbbef82e392ef Mon Sep 17 00:00:00 2001 From: Sam Doran Date: Tue, 14 May 2024 12:48:55 -0400 Subject: [PATCH 04/11] Fix comparison and add data_transfer_direction to final table --- .../reporting_ocpawscostlineitem_daily_summary.sql | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/koku/masu/database/trino_sql/reporting_ocpawscostlineitem_daily_summary.sql b/koku/masu/database/trino_sql/reporting_ocpawscostlineitem_daily_summary.sql index dde29d36a7..08b525c79b 100644 --- a/koku/masu/database/trino_sql/reporting_ocpawscostlineitem_daily_summary.sql +++ b/koku/masu/database/trino_sql/reporting_ocpawscostlineitem_daily_summary.sql @@ -207,10 +207,10 @@ SELECT cast(uuid() as varchar) as uuid, WHEN max(aws.lineitem_productcode) = 'AmazonEC2' AND max(aws.product_productfamily) = 'Data Transfer' THEN -- Yes, it's a network record. What's the direction? CASE - WHEN position(lower(max(aws.lineitem_usagetype)) IN 'in-bytes') > 0 THEN 'IN' - WHEN position(lower(max(aws.lineitem_usagetype)) IN 'out-bytes') > 0 THEN 'OUT' - WHEN (position(lower(max(aws.lineitem_usagetype)) IN 'regional-bytes') > 0 AND position(max(lineitem_operation) IN '-In') > 0) THEN 'IN' - WHEN (position(lower(max(aws.lineitem_usagetype)) IN 'regional-bytes') > 0 AND position(max(lineitem_operation) IN '-Out') > 0) THEN 'OUT' + WHEN position('in-bytes' IN lower(max(aws.lineitem_usagetype))) > 0 THEN 'IN' + WHEN position('out-bytes' IN lower(max(aws.lineitem_usagetype))) > 0 THEN 'OUT' + WHEN (position('regional-bytes' IN lower(max(aws.lineitem_usagetype))) > 0 AND position('-in' IN lower(max(lineitem_operation))) > 0) THEN 'IN' + WHEN (position('regional-bytes' IN lower(max(aws.lineitem_usagetype))) > 0 AND position('-out' IN lower(max(lineitem_operation))) > 0) THEN 'OUT' ELSE NULL END END AS data_transfer_direction, @@ -869,6 +869,7 @@ INSERT INTO postgres.{{schema | sqlsafe}}.reporting_ocpawscostlineitem_project_d region, unit, usage_amount, + data_transfer_direction, currency_code, unblended_cost, markup_cost, @@ -909,6 +910,7 @@ SELECT uuid(), region, unit, usage_amount, + data_transfer_direction, currency_code, unblended_cost, markup_cost, From a30e075c73bd6fd8abac58f0a5010f5ce9842e99 Mon Sep 17 00:00:00 2001 From: Sam Doran Date: Tue, 14 May 2024 14:53:43 -0400 Subject: [PATCH 05/11] Populate data transfer amounts --- ...ing_ocpaws_ocp_infrastructure_back_populate.sql | 11 +++++++++++ .../reporting_ocpawscostlineitem_daily_summary.sql | 14 ++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/koku/masu/database/sql/reporting_ocpaws_ocp_infrastructure_back_populate.sql b/koku/masu/database/sql/reporting_ocpaws_ocp_infrastructure_back_populate.sql index 49cf56ecb9..10aa5e551e 100644 --- a/koku/masu/database/sql/reporting_ocpaws_ocp_infrastructure_back_populate.sql +++ b/koku/masu/database/sql/reporting_ocpaws_ocp_infrastructure_back_populate.sql @@ -21,6 +21,8 @@ INSERT INTO {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary ( infrastructure_project_raw_cost, infrastructure_usage_cost, supplementary_usage_cost, + infrastructure_data_in_gigabytes, + infrastructure_data_out_gigabytes, pod_usage_cpu_core_hours, pod_request_cpu_core_hours, pod_limit_cpu_core_hours, @@ -67,6 +69,14 @@ INSERT INTO {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary ( sum(coalesce(nullif(ocp_aws.savingsplan_effective_cost, 0), ocp_aws.unblended_cost) + coalesce(nullif(ocp_aws.markup_cost_savingsplan, 0), ocp_aws.markup_cost)) AS infrastructure_project_raw_cost, '{"cpu": 0.000000000, "memory": 0.000000000, "storage": 0.000000000}'::jsonb as infrastructure_usage_cost, '{"cpu": 0.000000000, "memory": 0.000000000, "storage": 0.000000000}'::jsonb as supplementary_usage_cost, + CASE + WHEN upper(data_transfer_direction) = 'IN' THEN sum(infrastructure_data_in_gigabytes) + ELSE 0 + END as infrastructure_data_in_gigabytes, + CASE + WHEN upper(data_transfer_direction) = 'OUT' THEN sum(infrastructure_data_out_gigabytes) + ELSE 0 + END as infrastructure_data_out_gigabytes, 0 as pod_usage_cpu_core_hours, 0 as pod_request_cpu_core_hours, 0 as pod_limit_cpu_core_hours, @@ -102,5 +112,6 @@ INSERT INTO {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary ( ocp_aws.persistentvolumeclaim, ocp_aws.resource_id, ocp_aws.pod_labels, + ocp_aws.data_transfer_direction, rp.provider_id ; diff --git a/koku/masu/database/trino_sql/reporting_ocpawscostlineitem_daily_summary.sql b/koku/masu/database/trino_sql/reporting_ocpawscostlineitem_daily_summary.sql index 08b525c79b..fee5d7e705 100644 --- a/koku/masu/database/trino_sql/reporting_ocpawscostlineitem_daily_summary.sql +++ b/koku/masu/database/trino_sql/reporting_ocpawscostlineitem_daily_summary.sql @@ -757,6 +757,7 @@ INSERT INTO hive.{{schema | sqlsafe}}.reporting_ocpawscostlineitem_project_daily product_family, instance_type, usage_account_id, + account_alias_id, availability_zone, region, unit, @@ -800,6 +801,7 @@ SELECT max(product_family), max(instance_type), max(usage_account_id), + max(aa.id) AS account_alias_id, max(availability_zone), max(region), max(unit), @@ -829,6 +831,8 @@ FROM hive.{{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary AS ocp JOIN hive.{{schema | sqlsafe}}.aws_openshift_daily_resource_matched_temp AS aws ON aws.usage_start = ocp.usage_start AND position(ocp.resource_id IN aws.resource_id) != 0 +LEFT JOIN postgres.{{schema | sqlsafe}}.reporting_awsaccountalias AS aa + ON aws.usage_account_id = aa.account_id WHERE ocp.source = {{ocp_source_uuid}} AND ocp.year = {{year}} AND lpad(ocp.month, 2, '0') = {{month}} -- Zero pad the month when fewer than 2 characters @@ -869,6 +873,8 @@ INSERT INTO postgres.{{schema | sqlsafe}}.reporting_ocpawscostlineitem_project_d region, unit, usage_amount, + infrastructure_data_in_gigabytes, + infrastructure_data_out_gigabytes, data_transfer_direction, currency_code, unblended_cost, @@ -910,6 +916,14 @@ SELECT uuid(), region, unit, usage_amount, + CASE + WHEN upper(data_transfer_direction) = 'IN' THEN usage_amount + ELSE 0 + END AS infrastructure_data_in_gigabytes, + CASE + WHEN upper(data_transfer_direction) = 'OUT' THEN usage_amount + ELSE 0 + END AS infrastructure_data_out_gigabytes, data_transfer_direction, currency_code, unblended_cost, From 0cd41d62bccf0d279f137c9643342610423d46a5 Mon Sep 17 00:00:00 2001 From: Sam Doran Date: Thu, 6 Jun 2024 17:52:14 -0400 Subject: [PATCH 06/11] No need to aggregate on the field that is being grouped by --- .../trino_sql/reporting_ocpawscostlineitem_daily_summary.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/koku/masu/database/trino_sql/reporting_ocpawscostlineitem_daily_summary.sql b/koku/masu/database/trino_sql/reporting_ocpawscostlineitem_daily_summary.sql index fee5d7e705..3b9fd5a05a 100644 --- a/koku/masu/database/trino_sql/reporting_ocpawscostlineitem_daily_summary.sql +++ b/koku/masu/database/trino_sql/reporting_ocpawscostlineitem_daily_summary.sql @@ -790,7 +790,7 @@ SELECT max(cluster_alias), max(data_source), 'Network unattributed' AS namespace, - max(node), + ocp.node AS node, max(persistentvolumeclaim), max(persistentvolume), max(storageclass), From 0f69050e75bb27851e53fc891f51edbe13dad3d3 Mon Sep 17 00:00:00 2001 From: Sam Doran Date: Thu, 6 Jun 2024 17:52:30 -0400 Subject: [PATCH 07/11] Use strpos instead of position --- .../reporting_ocpawscostlineitem_daily_summary.sql | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/koku/masu/database/trino_sql/reporting_ocpawscostlineitem_daily_summary.sql b/koku/masu/database/trino_sql/reporting_ocpawscostlineitem_daily_summary.sql index 3b9fd5a05a..4984ca4c31 100644 --- a/koku/masu/database/trino_sql/reporting_ocpawscostlineitem_daily_summary.sql +++ b/koku/masu/database/trino_sql/reporting_ocpawscostlineitem_daily_summary.sql @@ -207,10 +207,10 @@ SELECT cast(uuid() as varchar) as uuid, WHEN max(aws.lineitem_productcode) = 'AmazonEC2' AND max(aws.product_productfamily) = 'Data Transfer' THEN -- Yes, it's a network record. What's the direction? CASE - WHEN position('in-bytes' IN lower(max(aws.lineitem_usagetype))) > 0 THEN 'IN' - WHEN position('out-bytes' IN lower(max(aws.lineitem_usagetype))) > 0 THEN 'OUT' - WHEN (position('regional-bytes' IN lower(max(aws.lineitem_usagetype))) > 0 AND position('-in' IN lower(max(lineitem_operation))) > 0) THEN 'IN' - WHEN (position('regional-bytes' IN lower(max(aws.lineitem_usagetype))) > 0 AND position('-out' IN lower(max(lineitem_operation))) > 0) THEN 'OUT' + WHEN strpos(lower(max(aws.lineitem_usagetype)), 'in-bytes') > 0 THEN 'IN' + WHEN strpos(lower(max(aws.lineitem_usagetype)), 'out-bytes') > 0 THEN 'OUT' + WHEN (strpos(lower(max(aws.lineitem_usagetype)), 'regional-bytes') > 0 AND strpos(lower(max(lineitem_operation)), '-in') > 0) THEN 'IN' + WHEN (strpos(lower(max(aws.lineitem_usagetype)), 'regional-bytes') > 0 AND strpos(lower(max(lineitem_operation)), '-out') > 0) THEN 'OUT' ELSE NULL END END AS data_transfer_direction, @@ -830,7 +830,7 @@ SELECT FROM hive.{{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary AS ocp JOIN hive.{{schema | sqlsafe}}.aws_openshift_daily_resource_matched_temp AS aws ON aws.usage_start = ocp.usage_start - AND position(ocp.resource_id IN aws.resource_id) != 0 + AND strpos(aws.resource_id, ocp.resource_id) != 0 LEFT JOIN postgres.{{schema | sqlsafe}}.reporting_awsaccountalias AS aa ON aws.usage_account_id = aa.account_id WHERE ocp.source = {{ocp_source_uuid}} From 6422e4db80ebb235fb15a53b8d41ffd18031d681 Mon Sep 17 00:00:00 2001 From: Sam Doran Date: Thu, 6 Jun 2024 18:51:38 -0400 Subject: [PATCH 08/11] Group by lineitem_usagetype MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It’s not possible to use an aggregation in a GROUP BY statement, so grouping by the lineitem_usagetype is the closest I could think of. It may result in too many distinct rows, though. --- .../trino_sql/reporting_ocpawscostlineitem_daily_summary.sql | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/koku/masu/database/trino_sql/reporting_ocpawscostlineitem_daily_summary.sql b/koku/masu/database/trino_sql/reporting_ocpawscostlineitem_daily_summary.sql index 4984ca4c31..41bbca484d 100644 --- a/koku/masu/database/trino_sql/reporting_ocpawscostlineitem_daily_summary.sql +++ b/koku/masu/database/trino_sql/reporting_ocpawscostlineitem_daily_summary.sql @@ -242,11 +242,12 @@ WHERE aws.source = {{aws_source_uuid}} AND aws.resource_id_matched = TRUE GROUP BY aws.lineitem_usagestartdate, aws.lineitem_resourceid, - 4, -- CASE satement + 4, -- product_code aws.product_productfamily, aws.product_instancetype, aws.lineitem_availabilityzone, aws.product_region, + aws.lineitem_usagetype, aws.resourcetags, aws.costcategory ; @@ -335,7 +336,7 @@ WHERE aws.source = {{aws_source_uuid}} AND (aws.resource_id_matched = FALSE OR aws.resource_id_matched IS NULL) GROUP BY aws.lineitem_usagestartdate, aws.lineitem_resourceid, - 4, -- CASE satement + 4, -- product_code aws.product_productfamily, aws.product_instancetype, aws.lineitem_availabilityzone, From daae1e02917a413c61253d9f2408033919179a23 Mon Sep 17 00:00:00 2001 From: Sam Doran Date: Fri, 12 Jul 2024 14:12:32 -0400 Subject: [PATCH 09/11] Only use Pod data source for Network unattributed costs --- .../trino_sql/reporting_ocpawscostlineitem_daily_summary.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/koku/masu/database/trino_sql/reporting_ocpawscostlineitem_daily_summary.sql b/koku/masu/database/trino_sql/reporting_ocpawscostlineitem_daily_summary.sql index 41bbca484d..9486c96b1a 100644 --- a/koku/masu/database/trino_sql/reporting_ocpawscostlineitem_daily_summary.sql +++ b/koku/masu/database/trino_sql/reporting_ocpawscostlineitem_daily_summary.sql @@ -844,6 +844,8 @@ WHERE ocp.source = {{ocp_source_uuid}} AND aws.month = {{month}} -- Network related costs AND aws.data_transfer_direction IS NOT NULL + -- Storage and Pod can have the same resource_id and we want the Pod + AND ocp.data_source = 'Pod' GROUP BY aws.uuid, ocp.node, From c9d314ee06264234c650799beaa10a352a5252f6 Mon Sep 17 00:00:00 2001 From: Sam Doran Date: Fri, 19 Jul 2024 15:42:32 -0400 Subject: [PATCH 10/11] Group by lineitem_operation in order to determine correct direction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without this group by, the data transfer direction would not be accurate since the operation value may sometimes contain ‘-in’ and other times ‘-out’. This ensures we are getting the correct direction consistently. Co-authored-by: Corey Goodfred --- .../trino_sql/reporting_ocpawscostlineitem_daily_summary.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/koku/masu/database/trino_sql/reporting_ocpawscostlineitem_daily_summary.sql b/koku/masu/database/trino_sql/reporting_ocpawscostlineitem_daily_summary.sql index 9486c96b1a..01a05e9b78 100644 --- a/koku/masu/database/trino_sql/reporting_ocpawscostlineitem_daily_summary.sql +++ b/koku/masu/database/trino_sql/reporting_ocpawscostlineitem_daily_summary.sql @@ -249,7 +249,8 @@ GROUP BY aws.lineitem_usagestartdate, aws.product_region, aws.lineitem_usagetype, aws.resourcetags, - aws.costcategory + aws.costcategory, + lineitem_operation ; INSERT INTO hive.{{schema | sqlsafe}}.aws_openshift_daily_tag_matched_temp ( From ff669625de86c1f5fc8815ed246fbf826e6a8265 Mon Sep 17 00:00:00 2001 From: Sam Doran Date: Fri, 19 Jul 2024 16:15:22 -0400 Subject: [PATCH 11/11] Set data value to NULL for non-applicable direction If set to 0, the records will be inserted twice into the network summary tables because that query is filtering on values where the field is not null, and zero is not null. Co-authored-by: Corey Goodfred --- .../sql/reporting_ocpaws_ocp_infrastructure_back_populate.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/koku/masu/database/sql/reporting_ocpaws_ocp_infrastructure_back_populate.sql b/koku/masu/database/sql/reporting_ocpaws_ocp_infrastructure_back_populate.sql index 10aa5e551e..0f33f37458 100644 --- a/koku/masu/database/sql/reporting_ocpaws_ocp_infrastructure_back_populate.sql +++ b/koku/masu/database/sql/reporting_ocpaws_ocp_infrastructure_back_populate.sql @@ -71,11 +71,11 @@ INSERT INTO {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary ( '{"cpu": 0.000000000, "memory": 0.000000000, "storage": 0.000000000}'::jsonb as supplementary_usage_cost, CASE WHEN upper(data_transfer_direction) = 'IN' THEN sum(infrastructure_data_in_gigabytes) - ELSE 0 + ELSE NULL END as infrastructure_data_in_gigabytes, CASE WHEN upper(data_transfer_direction) = 'OUT' THEN sum(infrastructure_data_out_gigabytes) - ELSE 0 + ELSE NULL END as infrastructure_data_out_gigabytes, 0 as pod_usage_cpu_core_hours, 0 as pod_request_cpu_core_hours,