Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[COST-4745] OCPGCP Network data processing SQL #5058

Merged
merged 17 commits into from
Jul 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions dev/scripts/nise_ymls/ocp_on_gcp/gcp_static_data.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ generators:
start_date: {{start_date}}
end_date: {{end_date}}
price: 2
sku_id: CF4E-A0C7-E3BF
usage.amount_in_pricing_units: 1
usage.pricing_unit: hour
currency: USD
Expand All @@ -12,6 +13,30 @@ generators:
resource.name: projects/nise-populator/instances/gcp_compute1
resource.global_name: //compute.googleapis.com/projects/nise-populator/zones/australia-southeast1-a/instances/3447398860992947181
labels: [{"environment": "clyde", "app":"winter", "version":"green", "kubernetes-io-cluster-c32se93c-73z3-3s3d-cs23-d3245sj45349": "owned"}]
- ComputeEngineGenerator:
start_date: {{start_date}}
end_date: {{end_date}}
price: 2
sku_id: BBF8-C07D-1DF4 #inbound data transfer
usage.amount_in_pricing_units: 50
currency: USD
instance_type: m2-megamem-416
location.region: australia-southeast1-a
resource.name: projects/nise-populator/instances/gcp_compute1
resource.global_name: //compute.googleapis.com/projects/nise-populator/zones/australia-southeast1-a/instances/3447398860992947181
labels: [{"environment": "clyde", "app":"winter", "version":"green", "kubernetes-io-cluster-c32se93c-73z3-3s3d-cs23-d3245sj45349": "owned"}]
- ComputeEngineGenerator:
start_date: {{start_date}}
end_date: {{end_date}}
price: 30
sku_id: 9DE9-9092-B3BC # outbound data transfer
usage.amount_in_pricing_units: 10
currency: USD
instance_type: m2-megamem-416
location.region: australia-southeast1-a
resource.name: projects/nise-populator/instances/gcp_compute1
resource.global_name: //compute.googleapis.com/projects/nise-populator/zones/australia-southeast1-a/instances/3447398860992947181
labels: [{"environment": "clyde", "app":"winter", "version":"green", "kubernetes-io-cluster-c32se93c-73z3-3s3d-cs23-d3245sj45349": "owned"}]
- ComputeEngineGenerator:
start_date: {{start_date}}
end_date: {{end_date}}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ INSERT INTO {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary (
source_uuid,
infrastructure_raw_cost,
infrastructure_project_raw_cost,
infrastructure_data_in_gigabytes,
infrastructure_data_out_gigabytes,
infrastructure_usage_cost,
supplementary_usage_cost,
pod_usage_cpu_core_hours,
Expand Down Expand Up @@ -65,6 +67,14 @@ INSERT INTO {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary (
rp.provider_id as source_uuid,
sum(ocp_gcp.unblended_cost + ocp_gcp.markup_cost + ocp_gcp.credit_amount) AS infrastructure_raw_cost,
sum(ocp_gcp.unblended_cost + ocp_gcp.project_markup_cost + ocp_gcp.pod_credit) AS infrastructure_project_raw_cost,
CASE
WHEN upper(data_transfer_direction) = 'IN' THEN sum(infrastructure_data_in_gigabytes)
ELSE NULL
END as infrastructure_data_in_gigabytes,
CASE
WHEN upper(data_transfer_direction) = 'OUT' THEN sum(infrastructure_data_out_gigabytes)
ELSE NULL
END as infrastructure_data_out_gigabytes,
'{"cpu": 0.000000000, "memory": 0.000000000, "storage": 0.000000000}'::jsonb as infrastructure_usage_cost,
'{"cpu": 0.000000000, "memory": 0.000000000, "storage": 0.000000000}'::jsonb as supplementary_usage_cost,
0 as pod_usage_cpu_core_hours,
Expand Down Expand Up @@ -101,5 +111,6 @@ INSERT INTO {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary (
ocp_gcp.persistentvolumeclaim,
ocp_gcp.resource_id,
ocp_gcp.pod_labels,
ocp_gcp.data_transfer_direction,
rp.provider_id
;
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ INSERT INTO hive.{{schema | sqlsafe}}.gcp_openshift_daily_resource_matched_temp
instance_type,
service_id,
service_alias,
data_transfer_direction,
sku_id,
sku_alias,
region,
Expand All @@ -205,6 +206,11 @@ SELECT cast(uuid() as varchar),
json_extract_scalar(json_parse(gcp.system_labels), '$["compute.googleapis.com/machine_spec"]') as instance_type,
gcp.service_id,
max(nullif(gcp.service_description, '')) as service_alias,
CASE
WHEN service_description = 'Compute Engine' AND STRPOS(lower(sku_description), 'data transfer in') != 0 THEN 'IN'
WHEN service_description = 'Compute Engine' AND STRPOS(lower(sku_description), 'data transfer') != 0 THEN 'OUT'
maskarb marked this conversation as resolved.
Show resolved Hide resolved
ELSE NULL
END as data_transfer_direction,
max(nullif(gcp.sku_id, '')) as sku_id,
max(nullif(gcp.sku_description, '')) as sku_alias,
gcp.location_region as region,
Expand Down Expand Up @@ -233,7 +239,8 @@ GROUP BY gcp.usage_start_time,
gcp.service_id,
gcp.location_region,
gcp.invoice_month,
gcp.labels
gcp.labels,
10 -- data transfer direction
;

INSERT INTO hive.{{schema | sqlsafe}}.gcp_openshift_daily_tag_matched_temp (
Expand Down Expand Up @@ -438,6 +445,8 @@ WHERE ocp.source = {{ocp_source_uuid}}
AND gcp.ocp_source = {{ocp_source_uuid}}
AND gcp.year = {{year}}
AND gcp.month = {{month}}
-- Filter out Node Network Costs because they cannot be tied to namespace level
AND data_transfer_direction IS NULL
GROUP BY gcp.uuid, ocp.namespace, ocp.data_source, ocp.pod_labels, ocp.volume_labels
;

Expand Down Expand Up @@ -590,6 +599,7 @@ INSERT INTO hive.{{schema | sqlsafe}}.reporting_ocpgcpcostlineitem_project_daily
instance_type,
service_id,
service_alias,
data_transfer_direction,
sku_id,
sku_alias,
region,
Expand Down Expand Up @@ -657,6 +667,7 @@ SELECT pds.gcp_uuid,
instance_type,
service_id,
service_alias,
NULL as data_transfer_direction,
sku_id,
sku_alias,
region,
Expand Down Expand Up @@ -711,6 +722,110 @@ JOIN cte_rankings as r
WHERE pds.ocp_source = {{ocp_source_uuid}} AND pds.year = {{year}} AND pds.month = {{month}}
;

-- Network costs are currently not mapped to pod metrics
-- and are filtered out of the above SQL since that is grouped by namespace
-- and costs are split out by pod metrics, this puts all network costs per node
-- into a "Network unattributed" project with no cost split and one record per
-- data direction
INSERT INTO hive.{{schema | sqlsafe}}.reporting_ocpgcpcostlineitem_project_daily_summary (
gcp_uuid,
cluster_id,
cluster_alias,
data_source,
namespace,
node,
persistentvolumeclaim,
persistentvolume,
storageclass,
resource_id,
usage_start,
usage_end,
account_id,
project_id,
project_name,
instance_type,
service_id,
service_alias,
data_transfer_direction,
sku_id,
sku_alias,
region,
unit,
usage_amount,
currency,
invoice_month,
credit_amount,
unblended_cost,
markup_cost,
project_markup_cost,
pod_cost,
pod_credit,
tags,
cost_category_id,
gcp_source,
ocp_source,
year,
month,
day
)
SELECT gcp.uuid as gcp_uuid,
max(ocp.cluster_id) as cluster_id,
max(ocp.cluster_alias) as cluster_alias,
max(ocp.data_source),
'Network unattributed' as namespace,
ocp.node as node,
max(nullif(ocp.persistentvolumeclaim, '')) as persistentvolumeclaim,
max(nullif(ocp.persistentvolume, '')) as persistentvolume,
max(nullif(ocp.storageclass, '')) as storageclass,
max(ocp.resource_id) as resource_id,
max(gcp.usage_start) as usage_start,
max(gcp.usage_start) as usage_end,
max(gcp.account_id) as account_id,
max(gcp.project_id) as project_id,
max(gcp.project_name) as project_name,
max(instance_type) as instance_type,
max(nullif(gcp.service_id, '')) as service_id,
max(gcp.service_alias) as service_alias,
max(data_transfer_direction) as data_transfer_direction,
max(gcp.sku_id) as sku_id,
max(gcp.sku_alias) as sku_alias,
max(nullif(gcp.region, '')) as region,
max(gcp.unit) as unit,
max(gcp.usage_amount) as usage_amount,
max(gcp.currency) as currency,
max(gcp.invoice_month) as invoice_month,
max(gcp.credit_amount) as credit_amount,
max(gcp.unblended_cost) as unblended_cost,
max(gcp.unblended_cost * {{markup | sqlsafe}}) as markup_cost,
max(gcp.unblended_cost * {{markup | sqlsafe}}) AS project_markup_cost,
max(gcp.unblended_cost) AS pod_cost,
cast(NULL AS double) AS pod_credit,
max(gcp.labels) as tags,
max(ocp.cost_category_id) as cost_category_id,
{{gcp_source_uuid}} as gcp_source,
{{ocp_source_uuid}} as ocp_source,
cast(year(max(gcp.usage_start)) as varchar) as year,
cast(month(max(gcp.usage_start)) as varchar) as month,
cast(day(max(gcp.usage_start)) as varchar) as day
FROM hive.{{ schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary as ocp
JOIN hive.{{schema | sqlsafe}}.gcp_openshift_daily_resource_matched_temp as gcp
ON gcp.usage_start = ocp.usage_start
AND (
(strpos(gcp.resource_name, ocp.node) != 0 AND ocp.data_source='Pod')
)
WHERE ocp.source = {{ocp_source_uuid}}
AND ocp.year = {{year}}
AND lpad(ocp.month, 2, '0') = {{month}} -- Zero pad the month when fewer than 2 characters
AND ocp.day IN {{days | inclause}}
AND (ocp.resource_id IS NOT NULL AND ocp.resource_id != '')
AND gcp.ocp_source = {{ocp_source_uuid}}
AND gcp.year = {{year}}
AND gcp.month = {{month}}
-- Filter for Node Network Costs to tie them to the Network unattributed project
AND data_transfer_direction IS NOT NULL
GROUP BY gcp.uuid, ocp.node
;

INSERT INTO postgres.{{schema | sqlsafe}}.reporting_ocpgcpcostlineitem_project_daily_summary_p (
uuid,
report_period_id,
Expand All @@ -733,6 +848,9 @@ INSERT INTO postgres.{{schema | sqlsafe}}.reporting_ocpgcpcostlineitem_project_d
instance_type,
service_id,
service_alias,
infrastructure_data_in_gigabytes,
infrastructure_data_out_gigabytes,
data_transfer_direction,
sku_id,
sku_alias,
region,
Expand Down Expand Up @@ -771,6 +889,25 @@ SELECT uuid(),
instance_type,
service_id,
service_alias,
CASE
WHEN upper(data_transfer_direction) = 'IN' THEN
-- GCP uses gibibyte but we are tracking this field in gigabytes
CASE unit
WHEN 'gibibyte' THEN usage_amount * 1.07374
ELSE usage_amount
END
ELSE 0
END as infrastructure_data_in_gigabytes,
CASE
WHEN upper(data_transfer_direction) = 'OUT' THEN
-- GCP uses gibibyte but we are tracking this field in gigabytes
CASE unit
WHEN 'gibibyte' THEN usage_amount * 1.07374
ELSE usage_amount
END
ELSE 0
END as infrastructure_data_out_gigabytes,
data_transfer_direction as data_transfer_direction,
sku_id,
sku_alias,
region,
Expand Down
Loading