From c92e575cf78c153608f0566c7954bd76a94d6638 Mon Sep 17 00:00:00 2001 From: Douglas Curtis Date: Mon, 18 Jan 2021 14:00:18 -0500 Subject: [PATCH 01/17] GCP forecasts (#2590) --- docs/source/specs/openapi.json | 45 ++++++++++++++ koku/api/common/permissions/__init__.py | 3 + koku/api/forecast/serializers.py | 4 ++ koku/api/forecast/views.py | 13 ++++ koku/api/urls.py | 2 + koku/api/views.py | 1 + koku/forecast/__init__.py | 1 + koku/forecast/forecast.py | 8 +++ koku/forecast/test/tests_forecast.py | 83 +++++++++++++++++++++++++ koku/sources/test/api/test_view.py | 8 ++- 10 files changed, 167 insertions(+), 1 deletion(-) diff --git a/docs/source/specs/openapi.json b/docs/source/specs/openapi.json index c9a87cc933..6f7058ae13 100644 --- a/docs/source/specs/openapi.json +++ b/docs/source/specs/openapi.json @@ -486,6 +486,51 @@ }] } }, + "/forecasts/gcp/costs": { + "summary": "GCP Cost Forecasts", + "get": { + "tags":["Forecasts"], + "parameters": [{ + "$ref": "#/components/parameters/QueryFilter", + "name":"QueryFilter" + }], + "responses": { + "200": { + "description": "An object describing the cost forecast.", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Forecast" + } + } + } + }, + "400": { + "description": "Request Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Error" + } + } + } + }, + "500": { + "description": "Unexpected Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Error" + } + } + } + } + }, + "security": [{ + "basic_auth": [] + }] + } + }, "/forecasts/openshift/costs": { "summary": "OpenShift Cost Forecasts", "get": { diff --git a/koku/api/common/permissions/__init__.py b/koku/api/common/permissions/__init__.py index c6dfe4b8a1..795fbb280b 100644 --- a/koku/api/common/permissions/__init__.py +++ b/koku/api/common/permissions/__init__.py @@ -16,6 +16,7 @@ # from api.common.permissions.aws_access import AwsAccessPermission from api.common.permissions.azure_access import AzureAccessPermission +from api.common.permissions.gcp_access import GcpAccessPermission from api.common.permissions.openshift_access import OpenShiftAccessPermission from api.provider.models import Provider @@ -23,10 +24,12 @@ AwsAccessPermission.resource_type, AzureAccessPermission.resource_type, OpenShiftAccessPermission.resource_type, + GcpAccessPermission.resource_type, ] RESOURCE_TYPE_MAP = { AwsAccessPermission.resource_type: [Provider.PROVIDER_AWS, Provider.PROVIDER_AWS_LOCAL], AzureAccessPermission.resource_type: [Provider.PROVIDER_AZURE, Provider.PROVIDER_AZURE_LOCAL], OpenShiftAccessPermission.resource_type: [Provider.PROVIDER_OCP], + GcpAccessPermission.resource_type: [Provider.PROVIDER_GCP, Provider.PROVIDER_GCP_LOCAL], } diff --git a/koku/api/forecast/serializers.py b/koku/api/forecast/serializers.py index 698b08d419..58e01c01b4 100644 --- a/koku/api/forecast/serializers.py +++ b/koku/api/forecast/serializers.py @@ -50,6 +50,10 @@ class AWSCostForecastParamSerializer(ForecastParamSerializer): """AWS Cost Forecast Serializer.""" +class GCPCostForecastParamSerializer(ForecastParamSerializer): + """GCP Cost Forecast Serializer.""" + + class AzureCostForecastParamSerializer(ForecastParamSerializer): """Azure Cost Forecast Serializer.""" diff --git a/koku/api/forecast/views.py b/koku/api/forecast/views.py index 94bc014e6c..e0c79910fb 100644 --- a/koku/api/forecast/views.py +++ b/koku/api/forecast/views.py @@ -27,10 +27,12 @@ from api.common.pagination import ForecastListPaginator from api.common.permissions import AwsAccessPermission from api.common.permissions import AzureAccessPermission +from api.common.permissions import GcpAccessPermission from api.common.permissions import OpenShiftAccessPermission from api.common.permissions.openshift_all_access import OpenshiftAllAccessPermission from api.forecast.serializers import AWSCostForecastParamSerializer from api.forecast.serializers import AzureCostForecastParamSerializer +from api.forecast.serializers import GCPCostForecastParamSerializer from api.forecast.serializers import OCPAllCostForecastParamSerializer from api.forecast.serializers import OCPAWSCostForecastParamSerializer from api.forecast.serializers import OCPAzureCostForecastParamSerializer @@ -38,11 +40,13 @@ from api.query_params import QueryParameters from forecast import AWSForecast from forecast import AzureForecast +from forecast import GCPForecast from forecast import OCPAllForecast from forecast import OCPAWSForecast from forecast import OCPAzureForecast from forecast import OCPForecast from reporting.models import AzureTagsSummary +from reporting.models import GCPTagsSummary from reporting.models import OCPAWSTagsSummary from reporting.models import OCPAzureTagsSummary from reporting.models import OCPStorageVolumeLabelSummary @@ -128,3 +132,12 @@ class OCPAllCostForecastView(ForecastView): query_handler = OCPAllForecast serializer = OCPAllCostForecastParamSerializer tag_handler = [OCPAWSTagsSummary, OCPAzureTagsSummary] + + +class GCPForecastCostView(ForecastView): + """GCP Cost Forecast View.""" + + permission_classes = (GcpAccessPermission,) + query_handler = GCPForecast + serializer = GCPCostForecastParamSerializer + tag_handler = [GCPTagsSummary] diff --git a/koku/api/urls.py b/koku/api/urls.py index 2510e5784e..dbbb6c15c4 100644 --- a/koku/api/urls.py +++ b/koku/api/urls.py @@ -39,6 +39,7 @@ from api.views import DataExportRequestViewSet from api.views import GCPAccountView from api.views import GCPCostView +from api.views import GCPForecastCostView from api.views import GCPProjectsView from api.views import GCPTagView from api.views import metrics @@ -317,6 +318,7 @@ path("resource-types/openshift-nodes/", OCPNodesView.as_view(), name="openshift-nodes"), path("resource-types/cost-models/", CostModelResourceTypesView.as_view(), name="cost-models"), path("forecasts/aws/costs/", AWSCostForecastView.as_view(), name="aws-cost-forecasts"), + path("forecasts/gcp/costs/", GCPForecastCostView.as_view(), name="gcp-cost-forecasts"), path("forecasts/azure/costs/", AzureCostForecastView.as_view(), name="azure-cost-forecasts"), path("forecasts/openshift/costs/", OCPCostForecastView.as_view(), name="openshift-cost-forecasts"), path( diff --git a/koku/api/views.py b/koku/api/views.py index 1f8096a84c..502b4e11a2 100644 --- a/koku/api/views.py +++ b/koku/api/views.py @@ -20,6 +20,7 @@ from api.dataexport.views import DataExportRequestViewSet from api.forecast.views import AWSCostForecastView from api.forecast.views import AzureCostForecastView +from api.forecast.views import GCPForecastCostView from api.forecast.views import OCPAllCostForecastView from api.forecast.views import OCPAWSCostForecastView from api.forecast.views import OCPAzureCostForecastView diff --git a/koku/forecast/__init__.py b/koku/forecast/__init__.py index adb572e538..5c0e38ba9c 100644 --- a/koku/forecast/__init__.py +++ b/koku/forecast/__init__.py @@ -18,6 +18,7 @@ from .forecast import AWSForecast # noqa: F401 from .forecast import AzureForecast # noqa: F401 from .forecast import Forecast # noqa: F401 +from .forecast import GCPForecast # noqa: F401 from .forecast import OCPAllForecast # noqa: F401 from .forecast import OCPAWSForecast # noqa: F401 from .forecast import OCPAzureForecast # noqa: F401 diff --git a/koku/forecast/forecast.py b/koku/forecast/forecast.py index 8e466493fd..3beccada86 100644 --- a/koku/forecast/forecast.py +++ b/koku/forecast/forecast.py @@ -38,6 +38,7 @@ from api.report.aws.provider_map import AWSProviderMap from api.report.azure.openshift.provider_map import OCPAzureProviderMap from api.report.azure.provider_map import AzureProviderMap +from api.report.gcp.provider_map import GCPProviderMap from api.report.ocp.provider_map import OCPProviderMap from api.utils import DateHelper from reporting.provider.aws.models import AWSOrganizationalUnit @@ -557,3 +558,10 @@ class OCPAllForecast(Forecast): provider = Provider.OCP_ALL provider_map_class = OCPAllProviderMap + + +class GCPForecast(Forecast): + """GCP forecasting class.""" + + provider = Provider.PROVIDER_GCP + provider_map_class = GCPProviderMap diff --git a/koku/forecast/test/tests_forecast.py b/koku/forecast/test/tests_forecast.py index 5abb24be83..1872c47b31 100644 --- a/koku/forecast/test/tests_forecast.py +++ b/koku/forecast/test/tests_forecast.py @@ -28,6 +28,7 @@ from api.forecast.views import AWSCostForecastView from api.forecast.views import AzureCostForecastView +from api.forecast.views import GCPForecastCostView from api.forecast.views import OCPAllCostForecastView from api.forecast.views import OCPAWSCostForecastView from api.forecast.views import OCPAzureCostForecastView @@ -39,11 +40,15 @@ from api.utils import DateHelper from forecast import AWSForecast from forecast import AzureForecast +from forecast import GCPForecast from forecast import OCPAllForecast from forecast import OCPAWSForecast from forecast import OCPAzureForecast from forecast import OCPForecast from forecast.forecast import LinearForecastResult +from reporting.provider.gcp.models import GCPCostSummary +from reporting.provider.gcp.models import GCPCostSummaryByAccount +from reporting.provider.gcp.models import GCPCostSummaryByProject from reporting.provider.ocp.models import OCPCostSummary from reporting.provider.ocp.models import OCPCostSummaryByNode from reporting.provider.ocp.models import OCPUsageLineItemDailySummary @@ -452,6 +457,84 @@ def test_predict_flat(self): self.assertGreaterEqual(float(pval), 0) +class GCPForecastTest(IamTestCase): + """Tests the GCPForecast class.""" + + def test_predict_flat(self): + """Test that predict() returns expected values for flat costs.""" + dh = DateHelper() + + expected = [] + for n in range(0, 10): + # the test data needs to include some jitter to avoid + # division-by-zero in the underlying dot-product maths. + expected.append( + { + "usage_start": (dh.this_month_start + timedelta(days=n)).date(), + "total_cost": 5 + random.random(), + "infrastructure_cost": 3 + random.random(), + "supplementary_cost": 2 + random.random(), + } + ) + mock_qset = MockQuerySet(expected) + + mocked_table = Mock() + mocked_table.objects.filter.return_value.order_by.return_value.values.return_value.annotate.return_value = ( # noqa: E501 + mock_qset + ) + mocked_table.len = mock_qset.len + + params = self.mocked_query_params("?", AzureCostForecastView) + instance = GCPForecast(params) + + instance.cost_summary_table = mocked_table + + results = instance.predict() + + for result in results: + for val in result.get("values", []): + self.assertIsInstance(val.get("date"), date) + + for item, cost in [ + (val.get("cost"), 5), + (val.get("infrastructure"), 3), + (val.get("supplementary"), 2), + ]: + self.assertAlmostEqual(float(item.get("total").get("value")), cost, delta=2.2) + self.assertAlmostEqual(float(item.get("confidence_max").get("value")), cost, delta=2.2) + self.assertAlmostEqual(float(item.get("confidence_min").get("value")), cost, delta=2.2) + self.assertAlmostEqual(float(item.get("rsquared").get("value")), 1, delta=2.2) + for pval in item.get("pvalues").get("value"): + self.assertGreaterEqual(float(pval), 0) + + def test_cost_summary_table(self): + """Test that we select a valid table or view.""" + params = self.mocked_query_params("?", GCPForecastCostView) + forecast = GCPForecast(params) + self.assertEqual(forecast.cost_summary_table, GCPCostSummary) + + params = self.mocked_query_params("?", GCPForecastCostView, access={"gcp.account": {"read": ["1"]}}) + forecast = GCPForecast(params) + self.assertEqual(forecast.cost_summary_table, GCPCostSummaryByAccount) + + params = self.mocked_query_params("?", GCPForecastCostView, access={"gcp.project": {"read": ["1"]}}) + forecast = GCPForecast(params) + self.assertEqual(forecast.cost_summary_table, GCPCostSummaryByProject) + + params = self.mocked_query_params( + "?", GCPForecastCostView, access={"gcp.account": {"read": ["1"]}, "gcp.project": {"read": ["1"]}} + ) + forecast = GCPForecast(params) + self.assertEqual(forecast.cost_summary_table, GCPCostSummaryByProject) + + params = self.mocked_query_params( + "?", GCPForecastCostView, access={"gcp.account": {"read": ["1"]}, "gcp.project": {"read": ["1"]}} + ) + + forecast = GCPForecast(params) + self.assertEqual(forecast.cost_summary_table, GCPCostSummaryByProject) + + class OCPForecastTest(IamTestCase): """Tests the OCPForecast class.""" diff --git a/koku/sources/test/api/test_view.py b/koku/sources/test/api/test_view.py index a8df996b9c..559a72f15d 100644 --- a/koku/sources/test/api/test_view.py +++ b/koku/sources/test/api/test_view.py @@ -331,5 +331,11 @@ def test_sources_access(self): mock_user = Mock(admin=False, access=permissions) request = Mock(user=mock_user) excluded = SourcesViewSet.get_excludes(request) - expected = [Provider.PROVIDER_AZURE, Provider.PROVIDER_AZURE_LOCAL, Provider.PROVIDER_OCP] + expected = [ + Provider.PROVIDER_AZURE, + Provider.PROVIDER_AZURE_LOCAL, + Provider.PROVIDER_OCP, + Provider.PROVIDER_GCP, + Provider.PROVIDER_GCP_LOCAL, + ] self.assertEqual(excluded, expected) From 827ad8bec3f67b615b14d6ecfc1fdda834311636 Mon Sep 17 00:00:00 2001 From: Brett Lentz Date: Mon, 18 Jan 2021 14:15:25 -0500 Subject: [PATCH 02/17] COST-854: pass exog vars to wls_prediction_std (#2592) --- koku/forecast/forecast.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/koku/forecast/forecast.py b/koku/forecast/forecast.py index 3beccada86..495789ef61 100644 --- a/koku/forecast/forecast.py +++ b/koku/forecast/forecast.py @@ -347,7 +347,7 @@ def _run_forecast(self, x, y): x = sm.add_constant(x) model = sm.OLS(y, x) results = model.fit() - return LinearForecastResult(results) + return LinearForecastResult(results, exog=x) def _uniquify_qset(self, qset, field="total_cost"): """Take a QuerySet list, sum costs within the same day, and arrange it into a list of tuples. @@ -395,14 +395,15 @@ class LinearForecastResult: Note: this class should be considered read-only """ - def __init__(self, regression_result): + def __init__(self, regression_result, exog=None): """Class constructor. Args: regression_result (RegressionResult) the results of a statsmodels regression + exog (array-like) exogenous variables for points to predict """ self._regression_result = regression_result - self._std_err, self._conf_lower, self._conf_upper = wls_prediction_std(regression_result) + self._std_err, self._conf_lower, self._conf_upper = wls_prediction_std(regression_result, exog=exog) try: LOG.debug(regression_result.summary()) From a8b04e6dbd30d5a7f598ed66db3552e4b8cc3776 Mon Sep 17 00:00:00 2001 From: Brett Lentz Date: Mon, 18 Jan 2021 15:49:50 -0500 Subject: [PATCH 03/17] COST-848: fix use of DateHandler (#2593) --- koku/api/organizations/queries.py | 3 - koku/api/query_handler.py | 18 +++--- koku/api/tags/queries.py | 3 - koku/forecast/forecast.py | 3 +- koku/forecast/test/tests_forecast.py | 84 +++++++++++++++++----------- 5 files changed, 60 insertions(+), 51 deletions(-) diff --git a/koku/api/organizations/queries.py b/koku/api/organizations/queries.py index 9000aaf660..8a61347d0d 100644 --- a/koku/api/organizations/queries.py +++ b/koku/api/organizations/queries.py @@ -28,7 +28,6 @@ from api.query_filter import QueryFilter from api.query_filter import QueryFilterCollection from api.query_handler import QueryHandler -from api.utils import DateHelper LOG = logging.getLogger(__name__) @@ -71,8 +70,6 @@ class OrgQueryHandler(QueryHandler): SUPPORTED_FILTERS = [] FILTER_MAP = {} - dh = DateHelper() - def __init__(self, parameters): """Establish org query handler. diff --git a/koku/api/query_handler.py b/koku/api/query_handler.py index 8b792b22c3..71cc630783 100644 --- a/koku/api/query_handler.py +++ b/koku/api/query_handler.py @@ -59,6 +59,7 @@ def __init__(self, parameters): """ LOG.debug(f"Query Params: {parameters}") + self.dh = DateHelper() parameters = self.filter_to_order_by(parameters) self.tenant = parameters.tenant self.access = parameters.access @@ -215,25 +216,24 @@ def _get_timeframe(self): time_scope_units = self.get_time_scope_units() start = None end = None - dh = DateHelper() if time_scope_units == "month": if time_scope_value == -1: # get current month - start = dh.this_month_start - end = dh.today + start = self.dh.this_month_start + end = self.dh.today else: # get previous month - start = dh.last_month_start - end = dh.last_month_end + start = self.dh.last_month_start + end = self.dh.last_month_end else: if time_scope_value == -10: # get last 10 days - start = dh.n_days_ago(dh.this_hour, 9) - end = dh.this_hour + start = self.dh.n_days_ago(self.dh.this_hour, 9) + end = self.dh.this_hour else: # get last 30 days - start = dh.n_days_ago(dh.this_hour, 29) - end = dh.this_hour + start = self.dh.n_days_ago(self.dh.this_hour, 29) + end = self.dh.this_hour self.start_datetime = start self.end_datetime = end diff --git a/koku/api/tags/queries.py b/koku/api/tags/queries.py index aca0dbea82..732a5bb3fb 100644 --- a/koku/api/tags/queries.py +++ b/koku/api/tags/queries.py @@ -24,7 +24,6 @@ from api.query_filter import QueryFilter from api.query_filter import QueryFilterCollection from api.query_handler import QueryHandler -from api.utils import DateHelper LOG = logging.getLogger(__name__) @@ -69,8 +68,6 @@ class TagQueryHandler(QueryHandler): "value": {"field": "value", "operation": "icontains", "composition_key": "value_filter"}, } - dh = DateHelper() - def __init__(self, parameters): """Establish tag query handler. diff --git a/koku/forecast/forecast.py b/koku/forecast/forecast.py index 495789ef61..225a2be28e 100644 --- a/koku/forecast/forecast.py +++ b/koku/forecast/forecast.py @@ -62,8 +62,6 @@ class Forecast: REPORT_TYPE = "costs" - dh = DateHelper() - def __init__(self, query_params): # noqa: C901 """Class Constructor. @@ -73,6 +71,7 @@ def __init__(self, query_params): # noqa: C901 - filters (QueryFilterCollection) - query_range (tuple) """ + self.dh = DateHelper() self.params = query_params # select appropriate model based on access diff --git a/koku/forecast/test/tests_forecast.py b/koku/forecast/test/tests_forecast.py index 1872c47b31..b9f0b5897a 100644 --- a/koku/forecast/test/tests_forecast.py +++ b/koku/forecast/test/tests_forecast.py @@ -85,51 +85,67 @@ def test_constructor(self): def test_forecast_days_required(self): """Test that we accurately select the number of days.""" - dh = DateHelper() params = self.mocked_query_params("?", AWSCostForecastView) - with patch("forecast.forecast.Forecast.dh") as mock_dh: - mock_dh.today = dh.this_month_start - mock_dh.this_month_start = dh.this_month_start - mock_dh.this_month_end = dh.this_month_end - mock_dh.last_month_start = dh.last_month_start - mock_dh.last_month_end = dh.last_month_end + + mock_dh = Mock(spec=DateHelper) + + mock_dh.return_value.today = datetime(2000, 1, 1, 0, 0, 0, 0) + mock_dh.return_value.yesterday = datetime(1999, 12, 31, 0, 0, 0, 0) + mock_dh.return_value.this_month_start = datetime(2000, 1, 1, 0, 0, 0, 0) + mock_dh.return_value.this_month_end = datetime(2000, 1, 31, 0, 0, 0, 0) + mock_dh.return_value.last_month_start = datetime(1999, 12, 1, 0, 0, 0, 0) + mock_dh.return_value.last_month_end = datetime(1999, 12, 31, 0, 0, 0, 0) + + with patch("forecast.forecast.DateHelper", new_callable=lambda: mock_dh) as mock_dh: forecast = AWSForecast(params) - self.assertEqual(forecast.forecast_days_required, dh.this_month_end.day) - - with patch("forecast.forecast.Forecast.dh") as mock_dh: - mock_dh.today = datetime(2000, 1, 13, 0, 0, 0, 0) - mock_dh.yesterday = datetime(2000, 1, 12, 0, 0, 0, 0) - mock_dh.this_month_start = datetime(2000, 1, 1, 0, 0, 0, 0) - mock_dh.this_month_end = datetime(2000, 1, 31, 0, 0, 0, 0) - mock_dh.last_month_start = datetime(1999, 12, 1, 0, 0, 0, 0) - mock_dh.last_month_end = datetime(1999, 12, 31, 0, 0, 0, 0) + self.assertEqual(forecast.forecast_days_required, 31) + + mock_dh.return_value.today = datetime(2000, 1, 13, 0, 0, 0, 0) + mock_dh.return_value.yesterday = datetime(2000, 1, 12, 0, 0, 0, 0) + mock_dh.return_value.this_month_start = datetime(2000, 1, 1, 0, 0, 0, 0) + mock_dh.return_value.this_month_end = datetime(2000, 1, 31, 0, 0, 0, 0) + mock_dh.return_value.last_month_start = datetime(1999, 12, 1, 0, 0, 0, 0) + mock_dh.return_value.last_month_end = datetime(1999, 12, 31, 0, 0, 0, 0) + + with patch("forecast.forecast.DateHelper", new_callable=lambda: mock_dh) as mock_dh: forecast = AWSForecast(params) self.assertEqual(forecast.forecast_days_required, 19) - def test_query_range(self): + def test_query_range_under(self): """Test that we select the correct range based on day of month.""" - dh = DateHelper() params = self.mocked_query_params("?", AWSCostForecastView) - with patch("forecast.forecast.Forecast.dh") as mock_dh: - mock_dh.today = dh.this_month_start + timedelta(days=AWSForecast.MINIMUM - 1) - mock_dh.yesterday = dh.this_month_start + timedelta(days=AWSForecast.MINIMUM - 2) - mock_dh.this_month_start = dh.this_month_start - mock_dh.this_month_end = dh.this_month_end - mock_dh.last_month_start = dh.last_month_start - mock_dh.last_month_end = dh.last_month_end - expected = (dh.last_month_start, mock_dh.yesterday) + dh = DateHelper() + mock_dh = Mock(spec=DateHelper) + + mock_dh.return_value.today = dh.this_month_start + timedelta(days=AWSForecast.MINIMUM - 1) + mock_dh.return_value.yesterday = dh.this_month_start + timedelta(days=AWSForecast.MINIMUM - 2) + mock_dh.return_value.this_month_start = dh.this_month_start + mock_dh.return_value.this_month_end = dh.this_month_end + mock_dh.return_value.last_month_start = dh.last_month_start + mock_dh.return_value.last_month_end = dh.last_month_end + + with patch("forecast.forecast.DateHelper", new_callable=lambda: mock_dh) as mock_dh: + expected = (dh.last_month_start, dh.this_month_start + timedelta(days=AWSForecast.MINIMUM - 2)) forecast = AWSForecast(params) self.assertEqual(forecast.query_range, expected) - with patch("forecast.forecast.Forecast.dh") as mock_dh: - mock_dh.today = dh.this_month_start + timedelta(days=(AWSForecast.MINIMUM)) - mock_dh.yesterday = dh.this_month_start + timedelta(days=AWSForecast.MINIMUM - 1) - mock_dh.this_month_start = dh.this_month_start - mock_dh.this_month_end = dh.this_month_end - mock_dh.last_month_start = dh.last_month_start - mock_dh.last_month_end = dh.last_month_end - expected = (dh.this_month_start, dh.this_month_start + timedelta(days=AWSForecast.MINIMUM - 1)) + def test_query_range_over(self): + """Test that we select the correct range based on day of month.""" + params = self.mocked_query_params("?", AWSCostForecastView) + + dh = DateHelper() + mock_dh = Mock(spec=DateHelper) + + mock_dh.return_value.today = dh.this_month_start + timedelta(days=(AWSForecast.MINIMUM + 1)) + mock_dh.return_value.yesterday = dh.this_month_start + timedelta(days=AWSForecast.MINIMUM) + mock_dh.return_value.this_month_start = dh.this_month_start + mock_dh.return_value.this_month_end = dh.this_month_end + mock_dh.return_value.last_month_start = dh.last_month_start + mock_dh.return_value.last_month_end = dh.last_month_end + + with patch("forecast.forecast.DateHelper", new_callable=lambda: mock_dh) as mock_dh: + expected = (dh.this_month_start, dh.this_month_start + timedelta(days=AWSForecast.MINIMUM)) forecast = AWSForecast(params) self.assertEqual(forecast.query_range, expected) From 3a42a67fe60c3b718506107e6c4a4324b7d2cbdb Mon Sep 17 00:00:00 2001 From: Douglas Curtis Date: Tue, 19 Jan 2021 11:37:42 -0500 Subject: [PATCH 04/17] Updating GCP size of usage_amount in daily table (#2595) --- .../migrations/0161_auto_20210118_2113.py | 16 ++++++++++++++++ koku/reporting/provider/gcp/models.py | 2 +- 2 files changed, 17 insertions(+), 1 deletion(-) create mode 100644 koku/reporting/migrations/0161_auto_20210118_2113.py diff --git a/koku/reporting/migrations/0161_auto_20210118_2113.py b/koku/reporting/migrations/0161_auto_20210118_2113.py new file mode 100644 index 0000000000..5aced67db4 --- /dev/null +++ b/koku/reporting/migrations/0161_auto_20210118_2113.py @@ -0,0 +1,16 @@ +# Generated by Django 3.1.3 on 2021-01-18 21:13 +from django.db import migrations +from django.db import models + + +class Migration(migrations.Migration): + + dependencies = [("reporting", "0160_auto_20210114_1548")] + + operations = [ + migrations.AlterField( + model_name="gcpcostentrylineitemdaily", + name="usage_amount", + field=models.DecimalField(decimal_places=15, max_digits=33, null=True), + ) + ] diff --git a/koku/reporting/provider/gcp/models.py b/koku/reporting/provider/gcp/models.py index b4861ac96a..b3b563014d 100644 --- a/koku/reporting/provider/gcp/models.py +++ b/koku/reporting/provider/gcp/models.py @@ -137,7 +137,7 @@ class Meta: cost = models.DecimalField(max_digits=24, decimal_places=9, null=True, blank=True) currency = models.CharField(max_length=256, null=True, blank=True) conversion_rate = models.CharField(max_length=256, null=True, blank=True) - usage_amount = models.DecimalField(max_digits=24, decimal_places=9, null=True) + usage_amount = models.DecimalField(max_digits=33, decimal_places=15, null=True) usage_unit = models.CharField(max_length=256, null=True, blank=True) usage_in_pricing_units = models.DecimalField(max_digits=24, decimal_places=9, null=True) usage_pricing_unit = models.CharField(max_length=256, null=True, blank=True) From 62f715666c06aaf13c9d93b47c5b7cd29ecd9364 Mon Sep 17 00:00:00 2001 From: Andrew Berglund Date: Tue, 19 Jan 2021 14:11:27 -0500 Subject: [PATCH 05/17] COST-891 aws report processor memory bugfix (#2597) * Add string split util * Bugfix for processing memory for aws products --- .../processor/aws/aws_report_processor.py | 38 +++++++++++++------ .../aws/test_aws_report_processor.py | 28 ++++++++++++++ koku/masu/test/util/test_common.py | 8 ++++ koku/masu/util/common.py | 6 +++ 4 files changed, 68 insertions(+), 12 deletions(-) diff --git a/koku/masu/processor/aws/aws_report_processor.py b/koku/masu/processor/aws/aws_report_processor.py index 0567c626fd..5d8c809e1f 100644 --- a/koku/masu/processor/aws/aws_report_processor.py +++ b/koku/masu/processor/aws/aws_report_processor.py @@ -28,6 +28,7 @@ from masu.config import Config from masu.database.aws_report_db_accessor import AWSReportDBAccessor from masu.processor.report_processor_base import ReportProcessorBase +from masu.util.common import split_alphanumeric_string from reporting.provider.aws.models import AWSCostEntry from reporting.provider.aws.models import AWSCostEntryBill from reporting.provider.aws.models import AWSCostEntryLineItem @@ -222,6 +223,30 @@ def _update_mappings(self): self.processed_report.remove_processed_rows() + def _process_memory_value(self, data): + """Parse out value and unit from memory strings.""" + if "memory" in data and data["memory"] is not None: + unit = None + try: + memory = float(data["memory"]) + except ValueError: + memory = None + # Memory can come as a single number or a number with a unit + # e.g. "1", "1GB", "1 Gb" so it gets special cased. + memory_list = list(split_alphanumeric_string(data["memory"])) + if memory_list: + memory = memory_list[0] + if len(memory_list) > 1: + unit = memory_list[1] + try: + memory = float(memory) + except (ValueError, TypeError): + memory = None + unit = None + data["memory"] = memory + data["memory_unit"] = unit + return data + def _get_data_for_table(self, row, table_name): """Extract the data from a row for a specific table. @@ -233,18 +258,6 @@ def _get_data_for_table(self, row, table_name): (dict): The data from the row keyed on the DB table's column names """ - # Memory can come as a single number or a number with a unit - # e.g. "1" vs. "1 Gb" so it gets special cased. - if "product/memory" in row and row["product/memory"] is not None: - memory_list = row["product/memory"].split(" ") - if len(memory_list) > 1: - memory, unit = row["product/memory"].split(" ") - else: - memory = memory_list[0] - unit = None - row["product/memory"] = memory - row["product/memory_unit"] = unit - column_map = REPORT_COLUMN_MAP[table_name] return {column_map[key]: value for key, value in row.items() if key in column_map} @@ -436,6 +449,7 @@ def _create_cost_entry_product(self, row, report_db_accessor): return self.existing_product_map[key] data = self._get_data_for_table(row, table_name._meta.db_table) + data = self._process_memory_value(data) value_set = set(data.values()) if value_set == {""}: return diff --git a/koku/masu/test/processor/aws/test_aws_report_processor.py b/koku/masu/test/processor/aws/test_aws_report_processor.py index 8f66c78e8b..44f384bffb 100644 --- a/koku/masu/test/processor/aws/test_aws_report_processor.py +++ b/koku/masu/test/processor/aws/test_aws_report_processor.py @@ -1228,3 +1228,31 @@ def test_get_date_column_filter(self): date_filter = processor.get_date_column_filter() self.assertIn("usage_start__gte", date_filter) + + def test_process_memory_value(self): + """Test that product data has memory properly parsed.""" + + data = {"memory": None} + result = self.processor._process_memory_value(data) + self.assertIsNone(result.get("memory")) + self.assertIsNone(result.get("memory_unit")) + + data = {"memory": "NA"} + result = self.processor._process_memory_value(data) + self.assertIsNone(result.get("memory")) + self.assertIsNone(result.get("memory_unit")) + + data = {"memory": "4GiB"} + result = self.processor._process_memory_value(data) + self.assertEqual(result.get("memory"), 4) + self.assertEqual(result.get("memory_unit"), "GiB") + + data = {"memory": "4 GB"} + result = self.processor._process_memory_value(data) + self.assertEqual(result.get("memory"), 4) + self.assertEqual(result.get("memory_unit"), "GB") + + data = {"memory": "4"} + result = self.processor._process_memory_value(data) + self.assertEqual(result.get("memory"), 4) + self.assertIsNone(result.get("memory_unit")) diff --git a/koku/masu/test/util/test_common.py b/koku/masu/test/util/test_common.py index 286b29ffce..9ddcc28a54 100644 --- a/koku/masu/test/util/test_common.py +++ b/koku/masu/test/util/test_common.py @@ -282,6 +282,14 @@ def test_determine_if_full_summary_update_needed(self): # Current month, has not been summarized before self.assertTrue(common_utils.determine_if_full_summary_update_needed(current_month_bill)) + def test_split_alphanumeric_string(self): + """Test the alpha-numeric split function.""" + s = "4 GiB" + + expected = ["4 ", "GiB"] + result = list(common_utils.split_alphanumeric_string(s)) + self.assertEqual(result, expected) + class NamedTemporaryGZipTests(TestCase): """Tests for NamedTemporaryGZip.""" diff --git a/koku/masu/util/common.py b/koku/masu/util/common.py index c09a8d88c9..35c6fba4af 100644 --- a/koku/masu/util/common.py +++ b/koku/masu/util/common.py @@ -22,6 +22,7 @@ import logging import re from datetime import timedelta +from itertools import groupby from os import remove from tempfile import gettempdir from uuid import uuid4 @@ -362,3 +363,8 @@ def determine_if_full_summary_update_needed(bill): return True return False + + +def split_alphanumeric_string(s): + for k, g in groupby(s, str.isalpha): + yield "".join(g) From b4060d1cdbeb23dd1c75682658316797c5e0c613 Mon Sep 17 00:00:00 2001 From: Andrew Berglund Date: Tue, 19 Jan 2021 15:27:29 -0500 Subject: [PATCH 06/17] COST-442 ocp on azure presto summary (#2523) * Add processing of OCP on Azure via Presto * Update OCP flow to do tag enablement after upload of data * Use postgres to do OCP tag summary Co-authored-by: Douglas Curtis Co-authored-by: HAP --- .../masu/database/azure_report_db_accessor.py | 23 +- koku/masu/database/ocp_report_db_accessor.py | 27 +- .../reporting_ocp_lineitem_daily_summary.sql | 404 ------ ...rting_ocpawscostlineitem_daily_summary.sql | 12 +- ...ing_ocpazurecostlineitem_daily_summary.sql | 1195 +++++++++++++++++ ...porting_ocpusagelineitem_daily_summary.sql | 387 ++++++ ...ing_ocpazurecostlineitem_daily_summary.sql | 430 +++--- ...rting_ocpstoragelineitem_daily_summary.sql | 34 +- ...eporting_ocpstoragevolumelabel_summary.sql | 7 +- ...porting_ocpusagelineitem_daily_summary.sql | 29 +- ...item_daily_summary_update_enabled_tags.sql | 86 ++ .../reporting_ocpusagepodlabel_summary.sql | 13 +- .../ocp/ocp_cloud_parquet_summary_updater.py | 41 +- .../ocp/ocp_report_parquet_processor.py | 4 + .../ocp/ocp_report_parquet_summary_updater.py | 3 +- .../ocp/ocp_report_summary_updater.py | 1 + .../database/test_azure_report_db_accessor.py | 26 + .../database/test_ocp_report_db_accessor.py | 50 + ...cp_cloud_parquet_report_summary_updater.py | 119 +- .../ocp/test_ocp_report_parquet_processor.py | 6 +- .../migrations/0162_auto_20201120_1901.py | 12 + .../provider/azure/openshift/models.py | 4 - .../nise_ymls/ocp_on_aws/ocp_static_data.yml | 3 + .../ocp_on_azure/ocp_static_data.yml | 4 + 24 files changed, 2101 insertions(+), 819 deletions(-) delete mode 100644 koku/masu/database/presto_sql/reporting_ocp_lineitem_daily_summary.sql create mode 100644 koku/masu/database/presto_sql/reporting_ocpazurecostlineitem_daily_summary.sql create mode 100644 koku/masu/database/presto_sql/reporting_ocpusagelineitem_daily_summary.sql create mode 100644 koku/masu/database/sql/reporting_ocpusagelineitem_daily_summary_update_enabled_tags.sql create mode 100644 koku/reporting/migrations/0162_auto_20201120_1901.py diff --git a/koku/masu/database/azure_report_db_accessor.py b/koku/masu/database/azure_report_db_accessor.py index 3b275ee103..495566cf16 100644 --- a/koku/masu/database/azure_report_db_accessor.py +++ b/koku/masu/database/azure_report_db_accessor.py @@ -224,7 +224,7 @@ def get_summary_query_for_billid(self, bill_id): return summary_item_query def populate_ocp_on_azure_cost_daily_summary(self, start_date, end_date, cluster_id, bill_ids, markup_value): - """Populate the daily cost aggregated summary for OCP on AWS. + """Populate the daily cost aggregated summary for OCP on Azure. Args: start_date (datetime.date) The date to start populating the table. @@ -262,6 +262,27 @@ def populate_ocp_on_azure_tags_summary_table(self): agg_sql, agg_sql_params = self.jinja_sql.prepare_query(agg_sql, agg_sql_params) self._execute_raw_sql_query(table_name, agg_sql, bind_params=list(agg_sql_params)) + def populate_ocp_on_azure_cost_daily_summary_presto( + self, start_date, end_date, openshift_provider_uuid, azure_provider_uuid, cluster_id, bill_id, markup_value + ): + """Populate the daily cost aggregated summary for OCP on Azure.""" + summary_sql = pkgutil.get_data("masu.database", "presto_sql/reporting_ocpazurecostlineitem_daily_summary.sql") + summary_sql = summary_sql.decode("utf-8") + summary_sql_params = { + "uuid": str(openshift_provider_uuid).replace("-", "_"), + "schema": self.schema, + "start_date": start_date, + "end_date": end_date, + "year": start_date.strftime("%Y"), + "month": start_date.strftime("%m"), + "azure_source_uuid": azure_provider_uuid, + "ocp_source_uuid": openshift_provider_uuid, + "cluster_id": cluster_id, + "bill_id": bill_id, + "markup": markup_value, + } + self._execute_presto_multipart_sql_query(self.schema, summary_sql, bind_params=summary_sql_params) + def populate_enabled_tag_keys(self, start_date, end_date, bill_ids): """Populate the enabled tag key table. Args: diff --git a/koku/masu/database/ocp_report_db_accessor.py b/koku/masu/database/ocp_report_db_accessor.py index 408a695508..811b438945 100644 --- a/koku/masu/database/ocp_report_db_accessor.py +++ b/koku/masu/database/ocp_report_db_accessor.py @@ -343,6 +343,31 @@ def populate_line_item_daily_table(self, start_date, end_date, cluster_id): daily_sql, daily_sql_params = self.jinja_sql.prepare_query(daily_sql, daily_sql_params) self._execute_raw_sql_query(table_name, daily_sql, start_date, end_date, bind_params=list(daily_sql_params)) + def update_line_item_daily_summary_with_enabled_tags(self, start_date, end_date, report_period_ids): + """Populate the enabled tag key table. + Args: + start_date (datetime.date) The date to start populating the table. + end_date (datetime.date) The date to end on. + bill_ids (list) A list of bill IDs. + Returns + (None) + """ + table_name = OCP_REPORT_TABLE_MAP["line_item_daily_summary"] + summary_sql = pkgutil.get_data( + "masu.database", "sql/reporting_ocpusagelineitem_daily_summary_update_enabled_tags.sql" + ) + summary_sql = summary_sql.decode("utf-8") + summary_sql_params = { + "start_date": start_date, + "end_date": end_date, + "report_period_ids": report_period_ids, + "schema": self.schema, + } + summary_sql, summary_sql_params = self.jinja_sql.prepare_query(summary_sql, summary_sql_params) + self._execute_raw_sql_query( + table_name, summary_sql, start_date, end_date, bind_params=list(summary_sql_params) + ) + def get_ocp_infrastructure_map(self, start_date, end_date, **kwargs): """Get the OCP on infrastructure map. @@ -552,7 +577,7 @@ def populate_line_item_daily_summary_table_presto( start_date = start_date.date() end_date = end_date.date() - tmpl_summary_sql = pkgutil.get_data("masu.database", "presto_sql/reporting_ocp_lineitem_daily_summary.sql") + tmpl_summary_sql = pkgutil.get_data("masu.database", "presto_sql/reporting_ocpusagelineitem_daily_summary.sql") tmpl_summary_sql = tmpl_summary_sql.decode("utf-8") summary_sql_params = { "uuid": str(source).replace("-", "_"), diff --git a/koku/masu/database/presto_sql/reporting_ocp_lineitem_daily_summary.sql b/koku/masu/database/presto_sql/reporting_ocp_lineitem_daily_summary.sql deleted file mode 100644 index 6976a84da4..0000000000 --- a/koku/masu/database/presto_sql/reporting_ocp_lineitem_daily_summary.sql +++ /dev/null @@ -1,404 +0,0 @@ -/* - * Process OCP Usage Data Processing SQL - * This SQL will utilize Presto for the raw line-item data aggregating - * and store the results into the koku database summary tables. - */ - --- Using the convention of a double-underscore prefix to denote a temp table. - -/* - * ==================================== - * COMMON - * ==================================== - */ - --- node label line items by day presto sql --- still using a "temp" table here because there is no guarantee how big this might get -DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__ocp_node_label_line_item_daily_{{uuid | sqlsafe}}; -CREATE TABLE hive.{{schema | sqlsafe}}.__ocp_node_label_line_item_daily_{{uuid | sqlsafe}} AS ( - SELECT {{cluster_id}} as "cluster_id", - date(nli.interval_start) as "usage_start", - max(nli.node) as "node", - nli.node_labels, - max(nli.source) as "source", - max(nli.year) as "year", - max(nli.month) as "month" - FROM hive.{{schema | sqlsafe}}.openshift_node_labels_line_items as "nli" - WHERE nli.source = {{source}} - AND nli.year = {{year}} - AND nli.month = {{month}} - AND nli.interval_start >= TIMESTAMP {{start_date}} - AND nli.interval_start < date_add('day', 1, TIMESTAMP {{end_date}}) - GROUP - BY {{cluster_id}}, - date(nli.interval_start), - nli.node_labels -) -; - -/* - * ==================================== - * POD - * ==================================== - */ - --- cluster daily cappacity presto sql --- still using a "temp" table here because there is no guarantee how big this might get -DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__ocp_cluster_capacity_{{uuid | sqlsafe}}; -CREATE TABLE hive.{{schema | sqlsafe}}.__ocp_cluster_capacity_{{uuid | sqlsafe}} as ( - SELECT {{cluster_id}} as "cluster_id", - usage_start, - max(cc.source) as "source", - max(cc.year) as "year", - max(cc.month) as "month", - sum(cc.max_cluster_capacity_cpu_core_seconds) as cluster_capacity_cpu_core_seconds, - sum(cc.max_cluster_capacity_memory_byte_seconds) as cluster_capacity_memory_byte_seconds - FROM ( - SELECT date(li.interval_start) as usage_start, - max(li.source) as "source", - max(li.year) as "year", - max(li.month) as "month", - max(li.node_capacity_cpu_core_seconds) as "max_cluster_capacity_cpu_core_seconds", - max(li.node_capacity_memory_byte_seconds) as "max_cluster_capacity_memory_byte_seconds" - FROM hive.{{schema | sqlsafe}}.openshift_pod_usage_line_items AS li - WHERE li.source = {{source}} - AND li.year = {{year}} - AND li.month = {{month}} - AND li.interval_start >= TIMESTAMP {{start_date}} - AND li.interval_start < date_add('day', 1, TIMESTAMP {{end_date}}) - GROUP - BY date(li.interval_start) - ) as cc - GROUP - BY {{cluster_id}}, - usage_start -) -; - -/* - * Delete the old block of data (if any) based on the usage range - * Inserting a record in this log will trigger a delete against the specified table - * in the same schema as the log table with the specified where_clause - * start_date and end_date MUST be strings in order for this to work properly. - */ -INSERT - INTO postgres.{{schema | sqlsafe}}.presto_delete_wrapper_log - ( - id, - action_ts, - table_name, - where_clause, - result_rows - ) -VALUES ( - uuid(), - now(), - 'reporting_ocpusagelineitem_daily_summary', - 'where usage_start >= '{{start_date}}'::date ' || - 'and usage_start <= '{{end_date}}'::date ' || - 'and cluster_id = '{{cluster_id}}' ' || - 'and data_source = ''Pod''', - null -) -; - -/* - * This is the target summarization sql for POD usage - * It combines the prior daily summarization query with the final summarization query - * by use of MAP_FILTER to filter the combined node line item labels as well as - * the line-item pod labels against the postgres enabled keys in the same query - */ -INSERT - INTO postgres.{{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary - ( - uuid, - report_period_id, - cluster_id, - cluster_alias, - data_source, - usage_start, - usage_end, - namespace, - node, - resource_id, - pod_labels, - pod_usage_cpu_core_hours, - pod_request_cpu_core_hours, - pod_limit_cpu_core_hours, - pod_usage_memory_gigabyte_hours, - pod_request_memory_gigabyte_hours, - pod_limit_memory_gigabyte_hours, - node_capacity_cpu_cores, - node_capacity_cpu_core_hours, - node_capacity_memory_gigabytes, - node_capacity_memory_gigabyte_hours, - cluster_capacity_cpu_core_hours, - cluster_capacity_memory_gigabyte_hours, - source_uuid, - infrastructure_usage_cost - ) -SELECT uuid() as "uuid", - {{report_period_id}} as "report_period_id", - {{cluster_id}} as "cluster_id", - {{cluster_alias}} as "cluster_alias", - 'Pod' as "data_source", - pua.usage_start, - pua.usage_start as "usage_end", - pua.namespace, - pua.node, - pua.resource_id, - cast(pua.pod_labels as json) as "pod_labels", - pua.pod_usage_cpu_core_hours, - pua.pod_request_cpu_core_hours, - pua.pod_limit_cpu_core_hours, - pua.pod_usage_memory_gigabyte_hours, - pua.pod_request_memory_gigabyte_hours, - pua.pod_limit_memory_gigabyte_hours, - pua.node_capacity_cpu_cores, - pua.node_capacity_cpu_core_hours, - pua.node_capacity_memory_gigabytes, - pua.node_capacity_memory_gigabyte_hours, - pua.cluster_capacity_cpu_core_hours, - pua.cluster_capacity_memory_gigabyte_hours, - cast(pua.source_uuid as UUID) as "source_uuid", - JSON '{"cpu": 0.000000000, "memory": 0.000000000, "storage": 0.000000000}' as "infrastructure_usage_cost" - FROM ( - SELECT date(li.interval_start) as "usage_start", - li.namespace, - li.node, - li.source as "source_uuid", - map_filter(map_concat(cast(json_parse(coalesce(nli.node_labels, '{}')) as map(varchar, varchar)), - cast(json_parse(li.pod_labels) as map(varchar, varchar))), - (k, v) -> contains(ek.enabled_keys, k)) as "pod_labels", - max(li.resource_id) as "resource_id", - sum(li.pod_usage_cpu_core_seconds) / 3600.0 as "pod_usage_cpu_core_hours", - sum(li.pod_request_cpu_core_seconds) / 3600.0 as "pod_request_cpu_core_hours", - sum(li.pod_limit_cpu_core_seconds) / 3600.0 as "pod_limit_cpu_core_hours", - sum(li.pod_usage_memory_byte_seconds) / 3600.0 * power(2, -30) as "pod_usage_memory_gigabyte_hours", - sum(li.pod_request_memory_byte_seconds) / 3600.0 * power(2, -30) as "pod_request_memory_gigabyte_hours", - sum(li.pod_limit_memory_byte_seconds) / 3600.0 * power(2, -30) as "pod_limit_memory_gigabyte_hours", - max(li.node_capacity_cpu_cores) as "node_capacity_cpu_cores", - sum(li.node_capacity_cpu_core_seconds) / 3600.0 as "node_capacity_cpu_core_hours", - max(li.node_capacity_memory_bytes) * power(2, -30) as "node_capacity_memory_gigabytes", - sum(li.node_capacity_memory_byte_seconds) / 3600.0 * power(2, -30) as "node_capacity_memory_gigabyte_hours", - max(cc.cluster_capacity_cpu_core_seconds) / 3600.0 as "cluster_capacity_cpu_core_hours", - max(cc.cluster_capacity_memory_byte_seconds) / 3600.0 * power(2, -30) as "cluster_capacity_memory_gigabyte_hours" - FROM hive.{{schema | sqlsafe}}.openshift_pod_usage_line_items as "li" - LEFT - JOIN hive.{{schema | sqlsafe}}.__ocp_node_label_line_item_daily_{{uuid | sqlsafe}} as "nli" - ON nli.node = li.node - AND nli.usage_start = date(li.interval_start) - AND nli.source = li.source - LEFT - JOIN hive.{{schema | sqlsafe}}.__ocp_cluster_capacity_{{uuid | sqlsafe}} as "cc" - ON cc.source = li.source - AND cc.usage_start = date(li.interval_start) - CROSS - JOIN ( - SELECT array_agg(distinct key) as "enabled_keys" - FROM postgres.{{schema | sqlsafe}}.reporting_ocpenabledtagkeys - ) as "ek" - WHERE li.source = {{source}} - AND li.year = {{year}} - AND li.month = {{month}} - AND li.interval_start >= TIMESTAMP {{start_date}} - AND li.interval_start < date_add('day', 1, TIMESTAMP {{end_date}}) - GROUP - BY date(li.interval_start), - li.namespace, - li.node, - li.source, - 5 /* THIS ORDINAL MUST BE KEPT IN SYNC WITH THE map_filter EXPRESSION */ - /* The map_filter expression was too complex for presto to use */ - ) as "pua" -; - - -/* - * ==================================== - * STORAGE - * ==================================== - */ - --- Storage node label line items -DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__volume_nodes_{{uuid | sqlsafe}}; -CREATE TABLE hive.{{schema | sqlsafe}}.__volume_nodes_{{uuid | sqlsafe}} as ( - SELECT sli.namespace, - sli.pod, - date(sli.interval_start) as "usage_start", - max(uli.node) as "node", - sli.source, - sli.year, - sli.month - FROM hive.{{schema | sqlsafe}}.openshift_storage_usage_line_items as "sli" - JOIN hive.{{schema | sqlsafe}}.openshift_pod_usage_line_items as "uli" - ON uli.source = sli.source - AND uli.year = sli.year - AND uli.month = sli.month - AND uli.namespace = sli.namespace - AND uli.pod = sli.pod - AND date(uli.interval_start) = date(sli.interval_start) - WHERE sli.source = {{source}} - AND sli.year = {{year}} - AND sli.month = {{month}} - AND sli.interval_start >= TIMESTAMP {{start_date}} - AND sli.interval_start < date_add('day', 1, TIMESTAMP {{end_date}}) - GROUP - BY sli.namespace, - sli.pod, - date(sli.interval_start), - sli.source, - sli.year, - sli.month -) -; - -/* - * Delete the old block of data (if any) based on the usage range - * Inserting a record in this log will trigger a delete against the specified table - * in the same schema as the log table with the specified where_clause - * start_date and end_date MUST be strings in order for this to work properly. - */ -INSERT - INTO postgres.{{schema | sqlsafe}}.presto_delete_wrapper_log - ( - id, - action_ts, - table_name, - where_clause, - result_rows - ) -VALUES ( - uuid(), - now(), - 'reporting_ocpusagelineitem_daily_summary', - 'where usage_start >= '{{start_date}}'::date ' || - 'and usage_start <= '{{end_date}}'::date ' || - 'and cluster_id = '{{cluster_id}}' ' || - 'and data_source = ''Storage''', - null -) -; - -/* - * This is the target summarization sql for STORAGE usage - * It combines the prior daily summarization query with the final summarization query - * by use of MAP_FILTER to filter the combined node line item labels as well as - * the line-item pod labels against the postgres enabled keys in the same query - */ -INSERT INTO postgres.{{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary ( - uuid, - report_period_id, - cluster_id, - cluster_alias, - data_source, - namespace, - node, - persistentvolumeclaim, - persistentvolume, - storageclass, - usage_start, - usage_end, - volume_labels, - source_uuid, - persistentvolumeclaim_capacity_gigabyte, - persistentvolumeclaim_capacity_gigabyte_months, - volume_request_storage_gigabyte_months, - persistentvolumeclaim_usage_gigabyte_months -) -SELECT uuid() as "uuid", - {{report_period_id}} as "report_period_id", - {{cluster_id}} as "cluster_id", - {{cluster_alias}} as "cluster_alias", - 'Storage' as "data_source", - sua.namespace, - sua.node, - sua.persistentvolumeclaim, - sua.persistentvolume, - sua.storageclass, - sua.usage_start, - sua.usage_start as "usage_end", - cast(sua.volume_labels as json) as "volume_labels", - cast(sua.source_uuid as UUID) as "source_uuid", - (sua.persistentvolumeclaim_capacity_bytes * - power(2, -30)) as "persistentvolumeclaim_capacity_gigibytes", - (sua.persistentvolumeclaim_capacity_byte_seconds / - 86400 * - cast(extract(day from last_day_of_month(date(sua.usage_start))) as integer) * - power(2, -30)) as "persistentvolumeclaim_capacity_gigabyte_months", - (sua.volume_request_storage_byte_seconds / - 86400 * - cast(extract(day from last_day_of_month(date(sua.usage_start))) as integer) * - power(2, -30)) as "volume_request_storage_gigabyte_months", - (sua.persistentvolumeclaim_usage_byte_seconds / - 86400 * - cast(extract(day from last_day_of_month(date(sua.usage_start))) as integer) * - power(2, -30)) as "persistentvolumeclaim_usage_byte_months" - FROM ( - SELECT sli.namespace, - vn.node, - sli.persistentvolumeclaim, - sli.persistentvolume, - sli.storageclass, - date(sli.interval_start) as "usage_start", - map_filter(map_concat(cast(json_parse(coalesce(nli.node_labels, '{}')) as map(varchar, varchar)), - cast(json_parse(sli.persistentvolume_labels) as map(varchar, varchar)), - cast(json_parse(sli.persistentvolumeclaim_labels) as map(varchar, varchar))), - (k, v) -> contains(ek.enabled_keys, k)) as "volume_labels", - sli.source as "source_uuid", - max(sli.persistentvolumeclaim_capacity_bytes) as "persistentvolumeclaim_capacity_bytes", - sum(sli.persistentvolumeclaim_capacity_byte_seconds) as "persistentvolumeclaim_capacity_byte_seconds", - sum(sli.volume_request_storage_byte_seconds) as "volume_request_storage_byte_seconds", - sum(sli.persistentvolumeclaim_usage_byte_seconds) as "persistentvolumeclaim_usage_byte_seconds" - FROM hive.{{schema | sqlsafe}}.openshift_storage_usage_line_items "sli" - LEFT - JOIN hive.{{schema | sqlsafe}}.__volume_nodes_{{uuid | sqlsafe}} as "vn" - ON vn.source = sli.source - AND vn.year = sli.year - AND vn.month = sli.month - AND vn.namespace = sli.namespace - AND vn.pod = sli.pod - AND vn.usage_start = date(sli.interval_start) - LEFT - JOIN hive.{{schema | sqlsafe}}.__ocp_node_label_line_item_daily_{{uuid | sqlsafe}} as "nli" - ON nli.source = vn.source - AND nli.year = vn.year - AND nli.month = vn.month - AND nli.node = vn.node - AND date(nli.usage_start) = date(vn.usage_start) - CROSS - JOIN ( - SELECT array_agg(distinct key) as enabled_keys - FROM postgres.{{schema | sqlsafe}}.reporting_ocpenabledtagkeys - ) as "ek" - WHERE sli.source = {{source}} - AND sli.year = {{year}} - AND sli.month = {{month}} - AND sli.interval_start >= TIMESTAMP {{start_date}} - AND sli.interval_start < date_add('day', 1, TIMESTAMP {{end_date}}) - GROUP - BY sli.namespace, - vn.node, - sli.persistentvolumeclaim, - sli.persistentvolume, - sli.storageclass, - date(sli.interval_start), - 7, /* THIS ORDINAL MUST BE KEPT IN SYNC WITH THE map_filter EXPRESSION */ - /* The map_filter expression was too complex for presto to use */ - sli.source - ) as "sua" -; - - -/* - * ==================================== - * CLEANUP - * ==================================== - */ - -DELETE FROM hive.{{schema | sqlsafe}}.__ocp_node_label_line_item_daily_{{uuid | sqlsafe}}; -DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__ocp_node_label_line_item_daily_{{uuid | sqlsafe}}; -DELETE FROM hive.{{schema | sqlsafe}}.__ocp_cluster_capacity_{{uuid | sqlsafe}}; -DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__ocp_cluster_capacity_{{uuid | sqlsafe}}; -DELETE FROM hive.{{schema | sqlsafe}}.__volume_nodes_{{uuid | sqlsafe}}; -DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__volume_nodes_{{uuid | sqlsafe}}; diff --git a/koku/masu/database/presto_sql/reporting_ocpawscostlineitem_daily_summary.sql b/koku/masu/database/presto_sql/reporting_ocpawscostlineitem_daily_summary.sql index ef3a2dbe00..20c990bd28 100644 --- a/koku/masu/database/presto_sql/reporting_ocpawscostlineitem_daily_summary.sql +++ b/koku/masu/database/presto_sql/reporting_ocpawscostlineitem_daily_summary.sql @@ -113,13 +113,13 @@ CREATE TABLE hive.{{schema | sqlsafe}}.__reporting_aws_daily_{{uuid | sqlsafe}} SELECT aws.lineitem_resourceid as resource_id, date(aws.lineitem_usagestartdate) as usage_start, date(aws.lineitem_usagestartdate) as usage_end, - aws.lineitem_productcode as product_code, - aws.product_productfamily as product_family, - aws.product_instancetype as instance_type, + nullif(aws.lineitem_productcode, '') as product_code, + nullif(aws.product_productfamily, '') as product_family, + nullif(aws.product_instancetype, '') as instance_type, aws.lineitem_usageaccountid as usage_account_id, - aws.lineitem_availabilityzone as availability_zone, - aws.product_region as region, - aws.pricing_unit as unit, + nullif(aws.lineitem_availabilityzone, '') as availability_zone, + nullif(aws.product_region, '') as region, + nullif(aws.pricing_unit, '') as unit, aws.lineitem_usageamount as usage_amount, aws.lineitem_normalizedusageamount as normalized_usage_amount, aws.lineitem_currencycode as currency_code, diff --git a/koku/masu/database/presto_sql/reporting_ocpazurecostlineitem_daily_summary.sql b/koku/masu/database/presto_sql/reporting_ocpazurecostlineitem_daily_summary.sql new file mode 100644 index 0000000000..5bcd1ffc62 --- /dev/null +++ b/koku/masu/database/presto_sql/reporting_ocpazurecostlineitem_daily_summary.sql @@ -0,0 +1,1195 @@ +-- The Python Jinja string variable subsitutions azure_where_clause and ocp_where_clause +-- optionally filter azure and OCP data by provider/source +-- Ex azure_where_clause: 'AND cost_entry_bill_id IN (1, 2, 3)' +-- Ex ocp_where_clause: "AND cluster_id = 'abcd-1234`" +DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__matched_tags_{{uuid | sqlsafe}}; +CREATE TABLE hive.{{schema | sqlsafe}}.__matched_tags_{{uuid | sqlsafe}} AS ( + WITH cte_unnested_azure_tags AS ( + SELECT tags.*, + b.billing_period_start + FROM ( + SELECT key, + value, + cost_entry_bill_id + FROM postgres.{{schema | sqlsafe}}.reporting_azuretags_summary AS ts + CROSS JOIN UNNEST("values") AS v(value) + ) AS tags + JOIN postgres.{{schema | sqlsafe}}.reporting_azurecostentrybill AS b + ON tags.cost_entry_bill_id = b.id + JOIN postgres.{{schema | sqlsafe}}.reporting_azureenabledtagkeys as enabled_tags + ON lower(enabled_tags.key) = lower(tags.key) + WHERE b.id = {{bill_id}} + ), + cte_unnested_ocp_pod_tags AS ( + SELECT tags.*, + rp.report_period_start, + rp.cluster_id, + rp.cluster_alias + FROM ( + SELECT key, + value, + report_period_id + FROM postgres.{{schema | sqlsafe}}.reporting_ocpusagepodlabel_summary AS ts + CROSS JOIN UNNEST("values") AS v(value) + ) AS tags + JOIN postgres.{{schema | sqlsafe}}.reporting_ocpusagereportperiod AS rp + ON tags.report_period_id = rp.id + -- Filter out tags that aren't enabled + JOIN postgres.{{schema | sqlsafe}}.reporting_ocpenabledtagkeys as enabled_tags + ON lower(enabled_tags.key) = lower(tags.key) + WHERE rp.cluster_id = {{cluster_id}} + ), + cte_unnested_ocp_volume_tags AS ( + SELECT tags.*, + rp.report_period_start, + rp.cluster_id, + rp.cluster_alias + FROM ( + SELECT key, + value, + report_period_id + FROM postgres.{{schema | sqlsafe}}.reporting_ocpstoragevolumelabel_summary AS ts + CROSS JOIN UNNEST("values") AS v(value) + ) AS tags + JOIN postgres.{{schema | sqlsafe}}.reporting_ocpusagereportperiod AS rp + ON tags.report_period_id = rp.id + -- Filter out tags that aren't enabled + JOIN postgres.{{schema | sqlsafe}}.reporting_ocpenabledtagkeys as enabled_tags + ON lower(enabled_tags.key) = lower(tags.key) + WHERE rp.cluster_id = {{cluster_id}} + ) + SELECT '{"' || key || '": "' || value || '"}' as tag, + key, + value, + cost_entry_bill_id, + report_period_id + FROM ( + SELECT azure.key, + azure.value, + azure.cost_entry_bill_id, + ocp.report_period_id + FROM cte_unnested_azure_tags AS azure + JOIN cte_unnested_ocp_pod_tags AS ocp + ON lower(azure.key) = lower(ocp.key) + AND lower(azure.value) = lower(ocp.value) + AND azure.billing_period_start = ocp.report_period_start + + UNION + + SELECT azure.key, + azure.value, + azure.cost_entry_bill_id, + ocp.report_period_id + FROM cte_unnested_azure_tags AS azure + JOIN cte_unnested_ocp_volume_tags AS ocp + ON lower(azure.key) = lower(ocp.key) + AND lower(azure.value) = lower(ocp.value) + AND azure.billing_period_start = ocp.report_period_start + ) AS matches +) +; + +DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__reporting_azure_daily_{{uuid | sqlsafe}}; +CREATE TABLE hive.{{schema | sqlsafe}}.__reporting_azure_daily_{{uuid | sqlsafe}} AS ( + WITH cte_line_items AS ( + SELECT {{bill_id | sqlsafe}} as cost_entry_bill_id, + cast(uuid() as varchar) as line_item_id, + date(coalesce(date, usagedatetime)) as usage_date, + coalesce(subscriptionid, subscriptionguid) as subscription_guid, + json_extract_scalar(json_parse(azure.additionalinfo), '$.ServiceType') as instance_type, + coalesce(servicename, metercategory) as service_name, + resourcelocation as resource_location, + split_part(coalesce(resourceid, instanceid), '/', 9) as resource_id, + cast(coalesce(quantity, usagequantity) as decimal(24,9)) as usage_quantity, + cast(coalesce(costinbillingcurrency, pretaxcost) as decimal(24,9)) as pretax_cost, + coalesce(billingcurrencycode, currency) as currency, + CASE + WHEN split_part(unitofmeasure, ' ', 2) != '' AND NOT (unitofmeasure = '100 Hours' AND metercategory='Virtual Machines') + THEN cast(split_part(unitofmeasure, ' ', 1) as integer) + ELSE 1 + END as multiplier, + CASE + WHEN split_part(unitofmeasure, ' ', 2) = 'Hours' + THEN 'Hrs' + WHEN split_part(unitofmeasure, ' ', 2) = 'GB/Month' + THEN 'GB-Mo' + WHEN split_part(unitofmeasure, ' ', 2) != '' + THEN split_part(unitofmeasure, ' ', 2) + ELSE unitofmeasure + END as unit_of_measure, + tags, + lower(tags) as lower_tags + FROM hive.{{schema | sqlsafe}}.azure_line_items as azure + WHERE azure.source = '{{azure_source_uuid | sqlsafe}}' + AND azure.year = '{{year | sqlsafe}}' + AND azure.month = '{{month | sqlsafe}}' + AND date(coalesce(date, usagedatetime)) >= date('{{start_date | sqlsafe}}') + AND date(coalesce(date, usagedatetime)) <= date('{{end_date | sqlsafe}}') + ) + SELECT azure.cost_entry_bill_id, + azure.line_item_id, + azure.usage_date, + azure.subscription_guid, + azure.instance_type, + azure.service_name, + azure.resource_location, + azure.resource_id, + azure.usage_quantity * azure.multiplier as usage_quantity, + azure.pretax_cost, + azure.currency, + azure.unit_of_measure, + azure.tags, + azure.lower_tags + FROM cte_line_items AS azure +) +; + +DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__reporting_azure_tags_{{uuid | sqlsafe}}; +CREATE TABLE hive.{{schema | sqlsafe}}.__reporting_azure_tags_{{uuid | sqlsafe}} AS ( + SELECT azure.* + FROM ( + SELECT azure.*, + row_number() OVER (PARTITION BY azure.line_item_id ORDER BY azure.line_item_id) as row_number + FROM hive.{{schema | sqlsafe}}.__reporting_azure_daily_{{uuid | sqlsafe}} as azure + JOIN hive.{{schema | sqlsafe}}.__matched_tags_{{uuid | sqlsafe}} as tag + ON json_extract_scalar(azure.tags, '$.' || tag.key) = tag.value + ) AS azure + WHERE azure.row_number = 1 +) +; + +DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__reporting_azure_special_case_tags_{{uuid | sqlsafe}}; +CREATE TABLE hive.{{schema | sqlsafe}}.__reporting_azure_special_case_tags_{{uuid | sqlsafe}} AS ( + SELECT azure.* + FROM hive.{{schema | sqlsafe}}.__reporting_azure_daily_{{uuid | sqlsafe}} as azure + WHERE ( + strpos(lower_tags, 'openshift_cluster') != 0 + OR strpos(lower_tags, 'openshift_node') != 0 + OR strpos(lower_tags, 'openshift_project') != 0 + ) +) +; + +DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__reporting_ocp_storage_tags_{{uuid | sqlsafe}}; +CREATE TABLE hive.{{schema | sqlsafe}}.__reporting_ocp_storage_tags_{{uuid | sqlsafe}} AS ( + SELECT cast(ocp.uuid AS VARCHAR) AS ocp_id, + ocp.usage_start, + ocp.report_period_id, + ocp.cluster_id, + ocp.cluster_alias, + ocp.namespace, + ocp.node, + ocp.persistentvolumeclaim, + ocp.persistentvolume, + ocp.storageclass, + ocp.persistentvolumeclaim_capacity_gigabyte, + ocp.persistentvolumeclaim_capacity_gigabyte_months, + ocp.volume_request_storage_gigabyte_months, + ocp.persistentvolumeclaim_usage_gigabyte_months, + json_format(ocp.volume_labels) as volume_labels, + lower(tag.key) as key, + lower(tag.value) as value, + lower(tag.tag) as tag + FROM postgres.{{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary as ocp + JOIN hive.{{schema | sqlsafe}}.__matched_tags_{{uuid | sqlsafe}} AS tag + ON ocp.report_period_id = tag.report_period_id + AND json_extract_scalar(ocp.volume_labels, '$.' || tag.key) = tag.value + WHERE ocp.source_uuid = UUID '{{ocp_source_uuid | sqlsafe}}' + AND ocp.data_source = 'Storage' + AND date(ocp.usage_start) >= date('{{start_date | sqlsafe}}') + AND date(ocp.usage_start) <= date('{{end_date | sqlsafe}}') +) +; + +DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__reporting_ocp_pod_tags_{{uuid | sqlsafe}}; +CREATE TABLE hive.{{schema | sqlsafe}}.__reporting_ocp_pod_tags_{{uuid | sqlsafe}} AS ( + SELECT cast(ocp.uuid AS VARCHAR) AS ocp_id, + ocp.usage_start, + ocp.report_period_id, + ocp.cluster_id, + ocp.cluster_alias, + ocp.namespace, + ocp.node, + json_format(ocp.pod_labels) as pod_labels, + ocp.pod_usage_cpu_core_hours, + ocp.pod_request_cpu_core_hours, + ocp.pod_limit_cpu_core_hours, + ocp.pod_usage_memory_gigabyte_hours, + ocp.pod_request_memory_gigabyte_hours, + ocp.node_capacity_cpu_cores, + ocp.node_capacity_cpu_core_hours, + ocp.node_capacity_memory_gigabytes, + ocp.node_capacity_memory_gigabyte_hours, + ocp.cluster_capacity_cpu_core_hours, + ocp.cluster_capacity_memory_gigabyte_hours, + lower(tag.key) as key, + lower(tag.value) as value, + lower(tag.tag) as tag + FROM postgres.{{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary as ocp + JOIN hive.{{schema | sqlsafe}}.__matched_tags_{{uuid | sqlsafe}} AS tag + ON ocp.report_period_id = tag.report_period_id + AND json_extract_scalar(ocp.pod_labels, '$.' || tag.key) = tag.value + WHERE ocp.source_uuid = UUID '{{ocp_source_uuid | sqlsafe}}' + AND ocp.data_source = 'Pod' + AND date(ocp.usage_start) >= date('{{start_date | sqlsafe}}') + AND date(ocp.usage_start) <= date('{{end_date | sqlsafe}}') +) +; + +DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__matched_tags_{{uuid | sqlsafe}}; + + +-- First we match OCP pod data to azure data using a direct +-- resource id match. This usually means OCP node -> azure EC2 instance ID. +DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}}; +CREATE TABLE hive.{{schema | sqlsafe}}.__reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} AS ( + WITH cte_resource_id_matched AS ( + SELECT cast(ocp.uuid AS VARCHAR) AS ocp_id, + ocp.report_period_id, + ocp.cluster_id, + ocp.cluster_alias, + ocp.namespace, + ocp.node, + json_format(ocp.pod_labels) as pod_labels, + ocp.pod_usage_cpu_core_hours, + ocp.pod_request_cpu_core_hours, + ocp.pod_limit_cpu_core_hours, + ocp.pod_usage_memory_gigabyte_hours, + ocp.pod_request_memory_gigabyte_hours, + ocp.node_capacity_cpu_cores, + ocp.node_capacity_cpu_core_hours, + ocp.node_capacity_memory_gigabytes, + ocp.node_capacity_memory_gigabyte_hours, + ocp.cluster_capacity_cpu_core_hours, + ocp.cluster_capacity_memory_gigabyte_hours, + azure.cost_entry_bill_id, + azure.line_item_id as azure_id, + azure.usage_date, + azure.subscription_guid, + azure.instance_type, + azure.service_name, + azure.resource_location, + azure.resource_id, + azure.usage_quantity, + azure.pretax_cost, + azure.currency, + azure.unit_of_measure, + azure.tags + FROM hive.{{schema | sqlsafe}}.__reporting_azure_daily_{{uuid | sqlsafe}} as azure + JOIN postgres.{{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary as ocp + -- NOTE: We would normally use ocp.resource_id + -- For this JOIN, but it is not guaranteed to be correct + -- in the current Operator Metering version + -- so we are matching only on the node name + -- which should match the split Azure instance ID + ON azure.resource_id = ocp.node + AND azure.usage_date = ocp.usage_start + WHERE ocp.source_uuid = UUID '{{ocp_source_uuid | sqlsafe}}' + AND ocp.usage_start >= date('{{start_date | sqlsafe}}') + AND ocp.usage_start <= date('{{end_date | sqlsafe}}') + AND ocp.data_source = 'Pod' + ), + cte_number_of_shared AS ( + SELECT azure_id, + count(DISTINCT namespace) as shared_projects + FROM cte_resource_id_matched + GROUP BY azure_id + ) + SELECT rm.*, + (rm.pod_usage_cpu_core_hours / rm.node_capacity_cpu_core_hours) * rm.pretax_cost as project_cost, + shared.shared_projects + FROM cte_resource_id_matched AS rm + JOIN cte_number_of_shared AS shared + ON rm.azure_id = shared.azure_id +) +; + +-- Next we match where the azure tag is the special openshift_project key +-- and the value matches an OpenShift project name +INSERT INTO hive.{{schema | sqlsafe}}.__reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} + WITH cte_tag_matched AS ( + SELECT cast(ocp.uuid AS VARCHAR) AS ocp_id, + ocp.report_period_id, + ocp.cluster_id, + ocp.cluster_alias, + ocp.namespace, + ocp.node, + json_format(ocp.pod_labels) as pod_labels, + ocp.pod_usage_cpu_core_hours, + ocp.pod_request_cpu_core_hours, + ocp.pod_limit_cpu_core_hours, + ocp.pod_usage_memory_gigabyte_hours, + ocp.pod_request_memory_gigabyte_hours, + ocp.node_capacity_cpu_cores, + ocp.node_capacity_cpu_core_hours, + ocp.node_capacity_memory_gigabytes, + ocp.node_capacity_memory_gigabyte_hours, + ocp.cluster_capacity_cpu_core_hours, + ocp.cluster_capacity_memory_gigabyte_hours, + azure.cost_entry_bill_id, + azure.line_item_id as azure_id, + azure.usage_date, + azure.subscription_guid, + azure.instance_type, + azure.service_name, + azure.resource_location, + azure.resource_id, + azure.usage_quantity, + azure.pretax_cost, + azure.currency, + azure.unit_of_measure, + azure.tags + FROM hive.{{schema | sqlsafe}}.__reporting_azure_special_case_tags_{{uuid | sqlsafe}} as azure + JOIN postgres.{{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary as ocp + ON json_extract_scalar(azure.lower_tags, '$.openshift_project') = lower(ocp.namespace) + AND azure.usage_date = ocp.usage_start + -- ANTI JOIN to remove rows that already matched + LEFT JOIN hive.{{schema | sqlsafe}}.__reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} AS rm + ON rm.azure_id = azure.line_item_id + WHERE ocp.source_uuid = UUID '{{ocp_source_uuid | sqlsafe}}' + AND ocp.data_source = 'Pod' + AND rm.azure_id IS NULL + ), + cte_number_of_shared AS ( + SELECT azure_id, + count(DISTINCT namespace) as shared_projects + FROM cte_tag_matched + GROUP BY azure_id + ) + SELECT tm.*, + tm.pretax_cost / shared.shared_projects as project_cost, + shared.shared_projects + FROM cte_tag_matched AS tm + JOIN cte_number_of_shared AS shared + ON tm.azure_id = shared.azure_id +; + +-- Next we match where the azure tag is the special openshift_node key +-- and the value matches an OpenShift node name +INSERT INTO hive.{{schema | sqlsafe}}.__reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} + WITH cte_tag_matched AS ( + SELECT cast(ocp.uuid AS VARCHAR) AS ocp_id, + ocp.report_period_id, + ocp.cluster_id, + ocp.cluster_alias, + ocp.namespace, + ocp.node, + json_format(ocp.pod_labels) as pod_labels, + ocp.pod_usage_cpu_core_hours, + ocp.pod_request_cpu_core_hours, + ocp.pod_limit_cpu_core_hours, + ocp.pod_usage_memory_gigabyte_hours, + ocp.pod_request_memory_gigabyte_hours, + ocp.node_capacity_cpu_cores, + ocp.node_capacity_cpu_core_hours, + ocp.node_capacity_memory_gigabytes, + ocp.node_capacity_memory_gigabyte_hours, + ocp.cluster_capacity_cpu_core_hours, + ocp.cluster_capacity_memory_gigabyte_hours, + azure.cost_entry_bill_id, + azure.line_item_id as azure_id, + azure.usage_date, + azure.subscription_guid, + azure.instance_type, + azure.service_name, + azure.resource_location, + azure.resource_id, + azure.usage_quantity, + azure.pretax_cost, + azure.currency, + azure.unit_of_measure, + azure.tags + FROM hive.{{schema | sqlsafe}}.__reporting_azure_special_case_tags_{{uuid | sqlsafe}} as azure + JOIN postgres.{{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary as ocp + ON json_extract_scalar(azure.lower_tags, '$.openshift_node') = lower(ocp.node) + AND azure.usage_date = ocp.usage_start + -- ANTI JOIN to remove rows that already matched + LEFT JOIN hive.{{schema | sqlsafe}}.__reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} AS rm + ON rm.azure_id = azure.line_item_id + WHERE ocp.source_uuid = UUID '{{ocp_source_uuid | sqlsafe}}' + AND ocp.data_source = 'Pod' + AND rm.azure_id IS NULL + ), + cte_number_of_shared AS ( + SELECT azure_id, + count(DISTINCT namespace) as shared_projects + FROM cte_tag_matched + GROUP BY azure_id + ) + SELECT tm.*, + tm.pretax_cost / shared.shared_projects as project_cost, + shared.shared_projects + FROM cte_tag_matched AS tm + JOIN cte_number_of_shared AS shared + ON tm.azure_id = shared.azure_id +; + +-- Next we match where the azure tag is the special openshift_cluster key +-- and the value matches an OpenShift cluster name + INSERT INTO hive.{{schema | sqlsafe}}.__reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} + WITH cte_tag_matched AS ( + SELECT cast(ocp.uuid AS VARCHAR) AS ocp_id, + ocp.report_period_id, + ocp.cluster_id, + ocp.cluster_alias, + ocp.namespace, + ocp.node, + json_format(ocp.pod_labels) as pod_labels, + ocp.pod_usage_cpu_core_hours, + ocp.pod_request_cpu_core_hours, + ocp.pod_limit_cpu_core_hours, + ocp.pod_usage_memory_gigabyte_hours, + ocp.pod_request_memory_gigabyte_hours, + ocp.node_capacity_cpu_cores, + ocp.node_capacity_cpu_core_hours, + ocp.node_capacity_memory_gigabytes, + ocp.node_capacity_memory_gigabyte_hours, + ocp.cluster_capacity_cpu_core_hours, + ocp.cluster_capacity_memory_gigabyte_hours, + azure.cost_entry_bill_id, + azure.line_item_id as azure_id, + azure.usage_date, + azure.subscription_guid, + azure.instance_type, + azure.service_name, + azure.resource_location, + azure.resource_id, + azure.usage_quantity, + azure.pretax_cost, + azure.currency, + azure.unit_of_measure, + azure.tags + FROM hive.{{schema | sqlsafe}}.__reporting_azure_special_case_tags_{{uuid | sqlsafe}} as azure + JOIN postgres.{{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary as ocp + ON json_extract_scalar(azure.lower_tags, '$.openshift_cluster') IN (lower(ocp.cluster_id), lower(ocp.cluster_alias)) + AND azure.usage_date = ocp.usage_start + -- ANTI JOIN to remove rows that already matched + LEFT JOIN hive.{{schema | sqlsafe}}.__reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} AS rm + ON rm.azure_id = azure.line_item_id + WHERE ocp.source_uuid = UUID '{{ocp_source_uuid | sqlsafe}}' + AND ocp.data_source = 'Pod' + AND rm.azure_id IS NULL + ), + cte_number_of_shared AS ( + SELECT azure_id, + count(DISTINCT namespace) as shared_projects + FROM cte_tag_matched + GROUP BY azure_id + ) + SELECT tm.*, + tm.pretax_cost / shared.shared_projects as project_cost, + shared.shared_projects + FROM cte_tag_matched AS tm + JOIN cte_number_of_shared AS shared + ON tm.azure_id = shared.azure_id +; + +-- Next we match where the pod label key and value +-- and azure tag key and value match directly + INSERT INTO hive.{{schema | sqlsafe}}.__reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} + WITH cte_tag_matched AS ( + SELECT ocp.ocp_id, + ocp.report_period_id, + ocp.cluster_id, + ocp.cluster_alias, + ocp.namespace, + ocp.node, + ocp.pod_labels, + ocp.pod_usage_cpu_core_hours, + ocp.pod_request_cpu_core_hours, + ocp.pod_limit_cpu_core_hours, + ocp.pod_usage_memory_gigabyte_hours, + ocp.pod_request_memory_gigabyte_hours, + ocp.node_capacity_cpu_cores, + ocp.node_capacity_cpu_core_hours, + ocp.node_capacity_memory_gigabytes, + ocp.node_capacity_memory_gigabyte_hours, + ocp.cluster_capacity_cpu_core_hours, + ocp.cluster_capacity_memory_gigabyte_hours, + azure.cost_entry_bill_id, + azure.line_item_id as azure_id, + azure.usage_date, + azure.subscription_guid, + azure.instance_type, + azure.service_name, + azure.resource_location, + azure.resource_id, + azure.usage_quantity, + azure.pretax_cost, + azure.currency, + azure.unit_of_measure, + azure.tags + FROM hive.{{schema | sqlsafe}}.__reporting_azure_tags_{{uuid | sqlsafe}} as azure + JOIN hive.{{schema | sqlsafe}}.__reporting_ocp_pod_tags_{{uuid | sqlsafe}} as ocp + ON azure.usage_date = ocp.usage_start + AND strpos(azure.lower_tags, ocp.tag) != 0 + -- ANTI JOIN to remove rows that already matched + LEFT JOIN hive.{{schema | sqlsafe}}.__reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} AS rm + ON rm.azure_id = azure.line_item_id + WHERE rm.azure_id IS NULL + ), + cte_number_of_shared AS ( + SELECT azure_id, + count(DISTINCT namespace) as shared_projects + FROM cte_tag_matched + GROUP BY azure_id + ) + SELECT tm.*, + tm.pretax_cost / shared.shared_projects as project_cost, + shared.shared_projects + FROM cte_tag_matched AS tm + JOIN cte_number_of_shared AS shared + ON tm.azure_id = shared.azure_id +; + +DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__reporting_ocp_pod_tags_{{uuid | sqlsafe}}; + +-- First we match OCP storage data to Azure data using a direct +-- resource id match. OCP PVC name -> Azure instance ID. +DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}}; +CREATE TABLE hive.{{schema | sqlsafe}}.__reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} AS ( + WITH cte_tag_matched AS ( + SELECT cast(ocp.uuid AS VARCHAR) AS ocp_id, + ocp.report_period_id, + ocp.cluster_id, + ocp.cluster_alias, + ocp.namespace, + ocp.node, + ocp.persistentvolumeclaim, + ocp.persistentvolume, + ocp.storageclass, + ocp.persistentvolumeclaim_capacity_gigabyte, + ocp.persistentvolumeclaim_capacity_gigabyte_months, + ocp.volume_request_storage_gigabyte_months, + ocp.persistentvolumeclaim_usage_gigabyte_months, + json_format(ocp.volume_labels) as volume_labels, + azure.cost_entry_bill_id, + azure.line_item_id as azure_id, + azure.usage_date, + azure.subscription_guid, + azure.instance_type, + azure.service_name, + azure.resource_location, + azure.resource_id, + azure.usage_quantity, + azure.pretax_cost, + azure.currency, + azure.unit_of_measure, + azure.tags + FROM hive.{{schema | sqlsafe}}.__reporting_azure_daily_{{uuid | sqlsafe}} as azure + JOIN postgres.{{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary as ocp + ON azure.resource_id LIKE '%%' || ocp.persistentvolume + AND azure.usage_date = ocp.usage_start + -- ANTI JOIN to remove rows that already matched + LEFT JOIN hive.{{schema | sqlsafe}}.__reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} AS ulid + ON ulid.azure_id = azure.line_item_id + WHERE ocp.source_uuid = UUID '{{ocp_source_uuid | sqlsafe}}' + AND ocp.data_source = 'Storage' + AND ocp.usage_start >= date('{{start_date | sqlsafe}}') + AND ocp.usage_start <= date('{{end_date | sqlsafe}}') + AND ulid.azure_id IS NULL + ), + cte_number_of_shared AS ( + SELECT azure_id, + count(DISTINCT namespace) as shared_projects + FROM cte_tag_matched + GROUP BY azure_id + ) + SELECT tm.*, + tm.pretax_cost / shared.shared_projects as project_cost, + shared.shared_projects + FROM cte_tag_matched AS tm + JOIN cte_number_of_shared AS shared + ON tm.azure_id = shared.azure_id +) +; + + +-- Next we match where the azure tag is the special openshift_project key +-- and the value matches an OpenShift project name +INSERT INTO hive.{{schema | sqlsafe}}.__reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} + WITH cte_tag_matched AS ( + SELECT cast(ocp.uuid AS VARCHAR) AS ocp_id, + ocp.report_period_id, + ocp.cluster_id, + ocp.cluster_alias, + ocp.namespace, + ocp.node, + ocp.persistentvolumeclaim, + ocp.persistentvolume, + ocp.storageclass, + ocp.persistentvolumeclaim_capacity_gigabyte, + ocp.persistentvolumeclaim_capacity_gigabyte_months, + ocp.volume_request_storage_gigabyte_months, + ocp.persistentvolumeclaim_usage_gigabyte_months, + json_format(ocp.volume_labels) as volume_labels, + azure.cost_entry_bill_id, + azure.line_item_id as azure_id, + azure.usage_date, + azure.subscription_guid, + azure.instance_type, + azure.service_name, + azure.resource_location, + azure.resource_id, + azure.usage_quantity, + azure.pretax_cost, + azure.currency, + azure.unit_of_measure, + azure.tags + FROM hive.{{schema | sqlsafe}}.__reporting_azure_special_case_tags_{{uuid | sqlsafe}} as azure + JOIN postgres.{{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary as ocp + ON json_extract_scalar(azure.lower_tags, '$.openshift_project') = lower(ocp.namespace) + AND azure.usage_date = ocp.usage_start + -- ANTI JOIN to remove rows that already matched + LEFT JOIN hive.{{schema | sqlsafe}}.__reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} AS ulid + ON ulid.azure_id = azure.line_item_id + LEFT JOIN hive.{{schema | sqlsafe}}.__reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} AS rm + ON rm.azure_id = azure.line_item_id + WHERE ocp.source_uuid = UUID '{{ocp_source_uuid | sqlsafe}}' + AND ocp.data_source = 'Storage' + AND ocp.usage_start >= date('{{start_date | sqlsafe}}') + AND ocp.usage_start <= date('{{end_date | sqlsafe}}') + AND ulid.azure_id IS NULL + AND rm.azure_id IS NULL + ), + cte_number_of_shared AS ( + SELECT azure_id, + count(DISTINCT namespace) as shared_projects + FROM cte_tag_matched + GROUP BY azure_id + ) + SELECT tm.*, + tm.pretax_cost / shared.shared_projects as project_cost, + shared.shared_projects + FROM cte_tag_matched AS tm + JOIN cte_number_of_shared AS shared + ON tm.azure_id = shared.azure_id +; + +-- Next we match where the azure tag is the special openshift_node key +-- and the value matches an OpenShift node name +INSERT INTO hive.{{schema | sqlsafe}}.__reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} + WITH cte_tag_matched AS ( + SELECT cast(ocp.uuid AS VARCHAR) AS ocp_id, + ocp.report_period_id, + ocp.cluster_id, + ocp.cluster_alias, + ocp.namespace, + ocp.node, + ocp.persistentvolumeclaim, + ocp.persistentvolume, + ocp.storageclass, + ocp.persistentvolumeclaim_capacity_gigabyte, + ocp.persistentvolumeclaim_capacity_gigabyte_months, + ocp.volume_request_storage_gigabyte_months, + ocp.persistentvolumeclaim_usage_gigabyte_months, + json_format(ocp.volume_labels) as volume_labels, + azure.cost_entry_bill_id, + azure.line_item_id as azure_id, + azure.usage_date, + azure.subscription_guid, + azure.instance_type, + azure.service_name, + azure.resource_location, + azure.resource_id, + azure.usage_quantity, + azure.pretax_cost, + azure.currency, + azure.unit_of_measure, + azure.tags + FROM hive.{{schema | sqlsafe}}.__reporting_azure_special_case_tags_{{uuid | sqlsafe}} as azure + JOIN postgres.{{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary as ocp + ON json_extract_scalar(azure.lower_tags, '$.openshift_node') = lower(ocp.node) + AND azure.usage_date = ocp.usage_start + -- ANTI JOIN to remove rows that already matched + LEFT JOIN hive.{{schema | sqlsafe}}.__reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} AS ulid + ON ulid.azure_id = azure.line_item_id + LEFT JOIN hive.{{schema | sqlsafe}}.__reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} AS rm + ON rm.azure_id = azure.line_item_id + WHERE ocp.source_uuid = UUID '{{ocp_source_uuid | sqlsafe}}' + AND ocp.data_source = 'Storage' + AND ulid.azure_id IS NULL + AND rm.azure_id IS NULL + ), + cte_number_of_shared AS ( + SELECT azure_id, + count(DISTINCT namespace) as shared_projects + FROM cte_tag_matched + GROUP BY azure_id + ) + SELECT tm.*, + tm.pretax_cost / shared.shared_projects as project_cost, + shared.shared_projects + FROM cte_tag_matched AS tm + JOIN cte_number_of_shared AS shared + ON tm.azure_id = shared.azure_id +; + +-- Next we match where the azure tag is the special openshift_cluster key +-- and the value matches an OpenShift cluster name + INSERT INTO hive.{{schema | sqlsafe}}.__reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} + WITH cte_tag_matched AS ( + SELECT cast(ocp.uuid AS VARCHAR) AS ocp_id, + ocp.report_period_id, + ocp.cluster_id, + ocp.cluster_alias, + ocp.namespace, + ocp.node, + ocp.persistentvolumeclaim, + ocp.persistentvolume, + ocp.storageclass, + ocp.persistentvolumeclaim_capacity_gigabyte, + ocp.persistentvolumeclaim_capacity_gigabyte_months, + ocp.volume_request_storage_gigabyte_months, + ocp.persistentvolumeclaim_usage_gigabyte_months, + json_format(ocp.volume_labels) as volume_labels, + azure.cost_entry_bill_id, + azure.line_item_id as azure_id, + azure.usage_date, + azure.subscription_guid, + azure.instance_type, + azure.service_name, + azure.resource_location, + azure.resource_id, + azure.usage_quantity, + azure.pretax_cost, + azure.currency, + azure.unit_of_measure, + azure.tags + FROM hive.{{schema | sqlsafe}}.__reporting_azure_special_case_tags_{{uuid | sqlsafe}} as azure + JOIN postgres.{{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary as ocp + ON json_extract_scalar(azure.lower_tags, '$.openshift_cluster') IN (lower(ocp.cluster_id), lower(ocp.cluster_alias)) + AND azure.usage_date = ocp.usage_start + -- ANTI JOIN to remove rows that already matched + LEFT JOIN hive.{{schema | sqlsafe}}.__reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} AS ulid + ON ulid.azure_id = azure.line_item_id + LEFT JOIN hive.{{schema | sqlsafe}}.__reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} AS rm + ON rm.azure_id = azure.line_item_id + WHERE ocp.source_uuid = UUID '{{ocp_source_uuid | sqlsafe}}' + AND ocp.data_source = 'Storage' + AND ulid.azure_id IS NULL + AND rm.azure_id IS NULL + ), + cte_number_of_shared AS ( + SELECT azure_id, + count(DISTINCT namespace) as shared_projects + FROM cte_tag_matched + GROUP BY azure_id + ) + SELECT tm.*, + tm.pretax_cost / shared.shared_projects as project_cost, + shared.shared_projects + FROM cte_tag_matched AS tm + JOIN cte_number_of_shared AS shared + ON tm.azure_id = shared.azure_id + +; + +DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__reporting_azure_daily_{{uuid | sqlsafe}} +; + +DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__reporting_azure_special_case_tags_{{uuid | sqlsafe}} +; + + +-- Then we match for OpenShift volume data where the volume label key and value +-- and azure tag key and value match directly + INSERT INTO hive.{{schema | sqlsafe}}.__reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} + WITH cte_tag_matched AS ( + SELECT ocp.ocp_id, + ocp.report_period_id, + ocp.cluster_id, + ocp.cluster_alias, + ocp.namespace, + ocp.node, + ocp.persistentvolumeclaim, + ocp.persistentvolume, + ocp.storageclass, + ocp.persistentvolumeclaim_capacity_gigabyte, + ocp.persistentvolumeclaim_capacity_gigabyte_months, + ocp.volume_request_storage_gigabyte_months, + ocp.persistentvolumeclaim_usage_gigabyte_months, + ocp.volume_labels, + azure.cost_entry_bill_id, + azure.line_item_id as azure_id, + azure.usage_date, + azure.subscription_guid, + azure.instance_type, + azure.service_name, + azure.resource_location, + azure.resource_id, + azure.usage_quantity, + azure.pretax_cost, + azure.currency, + azure.unit_of_measure, + azure.tags + FROM hive.{{schema | sqlsafe}}.__reporting_azure_tags_{{uuid | sqlsafe}} as azure + JOIN hive.{{schema | sqlsafe}}.__reporting_ocp_storage_tags_{{uuid | sqlsafe}} as ocp + ON azure.usage_date = ocp.usage_start + AND strpos(azure.lower_tags, ocp.tag) != 0 + -- ANTI JOIN to remove rows that already matched + LEFT JOIN hive.{{schema | sqlsafe}}.__reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} AS rm + ON rm.azure_id = azure.line_item_id + WHERE rm.azure_id IS NULL + ), + cte_number_of_shared AS ( + SELECT azure_id, + count(DISTINCT namespace) as shared_projects + FROM cte_tag_matched + GROUP BY azure_id + ) + SELECT tm.*, + tm.pretax_cost / shared.shared_projects as project_cost, + shared.shared_projects + FROM cte_tag_matched AS tm + JOIN cte_number_of_shared AS shared + ON tm.azure_id = shared.azure_id +; + +DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__reporting_ocp_storage_tags_{{uuid | sqlsafe}} +; + +DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__reporting_azure_tags_{{uuid | sqlsafe}} +; + + +-- The full summary data for Openshift pod<->azure and +-- Openshift volume<->azure matches are UNIONed together +-- with a GROUP BY using the azure ID to deduplicate +-- the azure data. This should ensure that we never double count +-- azure cost or usage. +DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__reporting_ocpazurecostlineitem_daily_summary_{{uuid | sqlsafe}}; +CREATE TABLE hive.{{schema | sqlsafe}}.__reporting_ocpazurecostlineitem_daily_summary_{{uuid | sqlsafe}} AS ( + WITH cte_pod_project_cost AS ( + SELECT pc.azure_id, + map_agg(pc.namespace, pc.project_cost) as project_costs + FROM ( + SELECT li.azure_id, + li.namespace, + sum(project_cost) as project_cost + FROM hive.{{schema | sqlsafe}}.__reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} as li + GROUP BY li.azure_id, li.namespace + ) AS pc + GROUP BY pc.azure_id + ), + cte_storage_project_cost AS ( + SELECT pc.azure_id, + map_agg(pc.namespace, pc.project_cost) as project_costs + FROM ( + SELECT li.azure_id, + li.namespace, + sum(project_cost) as project_cost + FROM hive.{{schema | sqlsafe}}.__reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} as li + GROUP BY li.azure_id, li.namespace + ) AS pc + GROUP BY pc.azure_id + ) + SELECT max(li.report_period_id) as report_period_id, + max(li.cluster_id) as cluster_id, + max(li.cluster_alias) as cluster_alias, + array_agg(DISTINCT li.namespace) as namespace, + max(li.node) as node, + max(li.resource_id) as resource_id, + max(li.usage_date) as usage_start, + max(li.usage_date) as usage_end, + max(li.cost_entry_bill_id) as cost_entry_bill_id, + max(li.subscription_guid) as subscription_guid, + max(li.service_name) as service_name, + max(li.instance_type) as instance_type, + max(li.resource_location) as resource_location, + max(li.currency) as currency, + max(li.unit_of_measure) as unit_of_measure, + li.tags, + max(li.usage_quantity) as usage_quantity, + max(li.pretax_cost) as pretax_cost, + max(li.pretax_cost) * cast({{markup}} as decimal(24,9)) as markup_cost, + max(li.shared_projects) as shared_projects, + pc.project_costs as project_costs, + '{{azure_source_uuid | sqlsafe}}' as source_uuid + FROM hive.{{schema | sqlsafe}}.__reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} as li + JOIN cte_pod_project_cost as pc + ON li.azure_id = pc.azure_id + -- Dedup on azure line item so we never double count usage or cost + GROUP BY li.azure_id, li.tags, pc.project_costs + + UNION + + SELECT max(li.report_period_id) as report_period_id, + max(li.cluster_id) as cluster_id, + max(li.cluster_alias) as cluster_alias, + array_agg(DISTINCT li.namespace) as namespace, + max(li.node) as node, + max(li.resource_id) as resource_id, + max(li.usage_date) as usage_start, + max(li.usage_date) as usage_end, + max(li.cost_entry_bill_id) as cost_entry_bill_id, + max(li.subscription_guid) as subscription_guid, + max(li.service_name) as service_name, + max(li.instance_type) as instance_type, + max(li.resource_location) as resource_location, + max(li.currency) as currency, + max(li.unit_of_measure) as unit_of_measure, + li.tags, + max(li.usage_quantity) as usage_quantity, + max(li.pretax_cost) as pretax_cost, + max(li.pretax_cost) * cast({{markup}} as decimal(24,9)) as markup_cost, + max(li.shared_projects) as shared_projects, + pc.project_costs as project_costs, + '{{azure_source_uuid | sqlsafe}}' as source_uuid + FROM hive.{{schema | sqlsafe}}.__reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} AS li + JOIN cte_storage_project_cost AS pc + ON li.azure_id = pc.azure_id + LEFT JOIN hive.{{schema | sqlsafe}}.__reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} AS ulid + ON ulid.azure_id = li.azure_id + AND ulid.azure_id IS NULL + GROUP BY li.azure_id, li.tags, pc.project_costs +) +; + +-- The full summary data for Openshift pod<->azure and +-- Openshift volume<->azure matches are UNIONed together +-- with a GROUP BY using the OCP ID to deduplicate +-- based on OpenShift data. This is effectively the same table +-- as reporting_ocpazurecostlineitem_daily_summary but from the OpenShift +-- point of view. Here usage and cost are divided by the +-- number of pods sharing the cost so the values turn out the +-- same when reported. +DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__reporting_ocpazurecostlineitem_project_daily_summary_{{uuid | sqlsafe}}; +CREATE TABLE hive.{{schema | sqlsafe}}.__reporting_ocpazurecostlineitem_project_daily_summary_{{uuid | sqlsafe}} AS ( + SELECT li.report_period_id, + li.cluster_id, + li.cluster_alias, + 'Pod' as data_source, + li.namespace, + li.node, + li.pod_labels, + max(li.resource_id) as resource_id, + max(li.usage_date) as usage_start, + max(li.usage_date) as usage_end, + max(li.cost_entry_bill_id) as cost_entry_bill_id, + max(li.subscription_guid) as subscription_guid, + max(li.service_name) as service_name, + max(li.instance_type) as instance_type, + max(li.resource_location) as resource_location, + max(li.currency) as currency, + max(li.unit_of_measure) as unit_of_measure, + li.tags, + sum(li.usage_quantity / li.shared_projects) as usage_quantity, + sum(li.pretax_cost / li.shared_projects) as pretax_cost, + sum(li.pretax_cost / li.shared_projects) * cast({{markup}} as decimal(24,9)) as markup_cost, + max(li.shared_projects) as shared_projects, + li.project_cost, + li.project_cost * cast({{markup}} as decimal(24,9)) as project_markup_cost, + '{{azure_source_uuid | sqlsafe}}' as source_uuid + FROM hive.{{schema | sqlsafe}}.__reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} as li + -- Grouping by OCP this time for the by project view + GROUP BY li.report_period_id, + li.ocp_id, + li.cluster_id, + li.cluster_alias, + li.namespace, + li.node, + li.pod_labels, + li.project_cost, + li.tags + + UNION + + SELECT li.report_period_id, + li.cluster_id, + li.cluster_alias, + 'Storage' as data_source, + li.namespace, + li.node, + li.volume_labels as pod_labels, + max(li.resource_id) as resource_id, + max(li.usage_date) as usage_start, + max(li.usage_date) as usage_end, + max(li.cost_entry_bill_id) as cost_entry_bill_id, + max(li.subscription_guid) as subscription_guid, + max(li.service_name) as service_name, + max(li.instance_type) as instance_type, + max(li.resource_location) as resource_location, + max(li.currency) as currency, + max(li.unit_of_measure) as unit_of_measure, + li.tags, + sum(li.usage_quantity / li.shared_projects) as usage_quantity, + sum(li.pretax_cost / li.shared_projects) as pretax_cost, + sum(li.pretax_cost / li.shared_projects) * cast({{markup}} as decimal(24,9)) as markup_cost, + max(li.shared_projects) as shared_projects, + li.project_cost, + li.project_cost * cast({{markup}} as decimal(24,9)) as project_markup_cost, + '{{azure_source_uuid | sqlsafe}}' as source_uuid + FROM hive.{{schema | sqlsafe}}.__reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} AS li + LEFT JOIN hive.{{schema | sqlsafe}}.__reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} AS ulid + ON ulid.azure_id = li.azure_id + WHERE ulid.azure_id IS NULL + GROUP BY li.ocp_id, + li.report_period_id, + li.cluster_id, + li.cluster_alias, + li.namespace, + li.node, + li.volume_labels, + li.project_cost, + li.tags +) +; + +DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}}; + +DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}}; + + +-- Clear out old entries first +INSERT + INTO postgres.{{schema | sqlsafe}}.presto_delete_wrapper_log + ( + id, + action_ts, + table_name, + where_clause, + result_rows + ) +VALUES ( + uuid(), + now(), + 'reporting_ocpazurecostlineitem_daily_summary', + 'WHERE usage_start >= '{{start_date}}'::date ' || + 'AND usage_start <= '{{end_date}}'::date ' || + 'AND cluster_id = '{{cluster_id}}' ' || + 'AND cost_entry_bill_id = {{bill_id}} ', + null +) +; + +-- Populate the daily aggregate line item data +INSERT INTO postgres.{{schema | sqlsafe}}.reporting_ocpazurecostlineitem_daily_summary ( + uuid, + report_period_id, + cluster_id, + cluster_alias, + namespace, + node, + resource_id, + usage_start, + usage_end, + cost_entry_bill_id, + subscription_guid, + instance_type, + service_name, + resource_location, + tags, + usage_quantity, + pretax_cost, + markup_cost, + currency, + unit_of_measure, + shared_projects, + project_costs, + source_uuid +) + SELECT uuid(), + report_period_id, + cluster_id, + cluster_alias, + namespace, + node, + resource_id, + usage_start, + usage_end, + cost_entry_bill_id, + subscription_guid, + instance_type, + service_name, + resource_location, + json_parse(tags), + cast(usage_quantity AS decimal(24,9)), + cast(pretax_cost AS decimal(30,15)), + cast(markup_cost AS decimal(30,15)), + currency, + unit_of_measure, + shared_projects, + cast(project_costs AS JSON), + cast(source_uuid AS UUID) + FROM hive.{{schema | sqlsafe}}.__reporting_ocpazurecostlineitem_daily_summary_{{uuid | sqlsafe}} +; + +DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__reporting_ocpazurecostlineitem_daily_summary_{{uuid | sqlsafe}}; + +-- Clear out old entries first +INSERT + INTO postgres.{{schema | sqlsafe}}.presto_delete_wrapper_log + ( + id, + action_ts, + table_name, + where_clause, + result_rows + ) +VALUES ( + uuid(), + now(), + 'reporting_ocpazurecostlineitem_project_daily_summary', + 'where usage_start >= '{{start_date}}'::date ' || + 'and usage_start <= '{{end_date}}'::date ' || + 'and cluster_id = '{{cluster_id}}' ' || + 'and cost_entry_bill_id = {{bill_id}} ', + null +) +; + +INSERT INTO postgres.{{schema | sqlsafe}}.reporting_ocpazurecostlineitem_project_daily_summary ( + uuid, + report_period_id, + cluster_id, + cluster_alias, + data_source, + namespace, + node, + pod_labels, + resource_id, + usage_start, + usage_end, + cost_entry_bill_id, + subscription_guid, + instance_type, + service_name, + resource_location, + usage_quantity, + pretax_cost, + markup_cost, + currency, + unit_of_measure, + pod_cost, + project_markup_cost, + source_uuid +) + SELECT uuid(), + report_period_id, + cluster_id, + cluster_alias, + data_source, + namespace, + node, + json_parse(pod_labels), + resource_id, + usage_start, + usage_end, + cost_entry_bill_id, + subscription_guid, + instance_type, + service_name, + resource_location, + cast(usage_quantity AS decimal(24,9)), + cast(pretax_cost AS decimal(30,15)), + cast(markup_cost AS decimal(30,15)), + currency, + unit_of_measure, + cast(project_cost AS decimal(30,15)), + cast(project_markup_cost AS decimal(30,15)), + cast(source_uuid as UUID) + FROM hive.{{schema | sqlsafe}}.__reporting_ocpazurecostlineitem_project_daily_summary_{{uuid | sqlsafe}} +; + +DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__reporting_ocpazurecostlineitem_project_daily_summary_{{uuid | sqlsafe}}; diff --git a/koku/masu/database/presto_sql/reporting_ocpusagelineitem_daily_summary.sql b/koku/masu/database/presto_sql/reporting_ocpusagelineitem_daily_summary.sql new file mode 100644 index 0000000000..396bc1664b --- /dev/null +++ b/koku/masu/database/presto_sql/reporting_ocpusagelineitem_daily_summary.sql @@ -0,0 +1,387 @@ +/* + * Process OCP Usage Data Processing SQL + * This SQL will utilize Presto for the raw line-item data aggregating + * and store the results into the koku database summary tables. + */ + +-- Using the convention of a double-underscore prefix to denote a temp table. + +/* + * ==================================== + * COMMON + * ==================================== + */ + +-- node label line items by day presto sql +-- still using a temp table here because there is no guarantee how big this might get +DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__ocp_node_label_line_item_daily_{{uuid | sqlsafe}}; +CREATE TABLE hive.{{schema | sqlsafe}}.__ocp_node_label_line_item_daily_{{uuid | sqlsafe}} AS ( + SELECT date(nli.interval_start) as usage_start, + nli.node, + nli.node_labels + FROM hive.{{schema | sqlsafe}}.openshift_node_labels_line_items AS nli + WHERE nli.source = {{source}} + AND nli.year = {{year}} + AND nli.month = {{month}} + AND nli.interval_start >= TIMESTAMP {{start_date}} + AND nli.interval_start < date_add('day', 1, TIMESTAMP {{end_date}}) + GROUP BY date(nli.interval_start), + nli.node, + nli.node_labels +) +; + +-- namespace label line items by day presto sql +-- still using a temp table here because there is no guarantee how big this might get +DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__ocp_namespace_label_line_item_daily_{{uuid | sqlsafe}}; +CREATE TABLE hive.{{schema | sqlsafe}}.__ocp_namespace_label_line_item_daily_{{uuid | sqlsafe}} AS ( + SELECT date(nli.interval_start) as usage_start, + nli.namespace, + nli.namespace_labels + FROM hive.{{schema | sqlsafe}}.openshift_namespace_labels_line_items AS nli + WHERE nli.source = {{source}} + AND nli.year = {{year}} + AND nli.month = {{month}} + AND nli.interval_start >= TIMESTAMP {{start_date}} + AND nli.interval_start < date_add('day', 1, TIMESTAMP {{end_date}}) + GROUP BY date(nli.interval_start), + nli.namespace, + nli.namespace_labels +) +; + +-- Daily sum of cluster CPU and memory capacity +DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__ocp_cluster_capacity_{{uuid | sqlsafe}}; +CREATE TABLE hive.{{schema | sqlsafe}}.__ocp_cluster_capacity_{{uuid | sqlsafe}} as ( + SELECT date(cc.interval_start) as usage_start, + sum(cc.max_cluster_capacity_cpu_core_seconds) as cluster_capacity_cpu_core_seconds, + sum(cc.max_cluster_capacity_memory_byte_seconds) as cluster_capacity_memory_byte_seconds + FROM ( + SELECT li.interval_start, + li.node, + max(li.node_capacity_cpu_core_seconds) as max_cluster_capacity_cpu_core_seconds, + max(li.node_capacity_memory_byte_seconds) as max_cluster_capacity_memory_byte_seconds + FROM hive.{{schema | sqlsafe}}.openshift_pod_usage_line_items AS li + WHERE li.source = {{source}} + AND li.year = {{year}} + AND li.month = {{month}} + AND li.interval_start >= TIMESTAMP {{start_date}} + AND li.interval_start < date_add('day', 1, TIMESTAMP {{end_date}}) + GROUP BY li.interval_start, + li.node + ) as cc + GROUP BY date(cc.interval_start) +) +; + +/* + * ==================================== + * POD + * ==================================== + */ + +/* + * Delete the old block of data (if any) based on the usage range + * Inserting a record in this log will trigger a delete against the specified table + * in the same schema as the log table with the specified where_clause + * start_date and end_date MUST be strings in order for this to work properly. + */ +INSERT INTO postgres.{{schema | sqlsafe}}.presto_delete_wrapper_log ( + id, + action_ts, + table_name, + where_clause, + result_rows +) +VALUES ( + uuid(), + now(), + 'reporting_ocpusagelineitem_daily_summary', + 'where usage_start >= '{{start_date}}'::date ' || + 'and usage_start <= '{{end_date}}'::date ' || + 'and cluster_id = '{{cluster_id}}' ' || + 'and data_source = ''Pod''', + null +) +; + +/* + * This is the target summarization sql for POD usage + * It combines the prior daily summarization query with the final summarization query + * by use of MAP_FILTER to filter the combined node line item labels as well as + * the line-item pod labels against the postgres enabled keys in the same query + */ +INSERT INTO postgres.{{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary ( + uuid, + report_period_id, + cluster_id, + cluster_alias, + data_source, + usage_start, + usage_end, + namespace, + node, + resource_id, + pod_labels, + pod_usage_cpu_core_hours, + pod_request_cpu_core_hours, + pod_limit_cpu_core_hours, + pod_usage_memory_gigabyte_hours, + pod_request_memory_gigabyte_hours, + pod_limit_memory_gigabyte_hours, + node_capacity_cpu_cores, + node_capacity_cpu_core_hours, + node_capacity_memory_gigabytes, + node_capacity_memory_gigabyte_hours, + cluster_capacity_cpu_core_hours, + cluster_capacity_memory_gigabyte_hours, + source_uuid, + infrastructure_usage_cost +) +SELECT uuid() as uuid, + {{report_period_id}} as report_period_id, + {{cluster_id}} as cluster_id, + {{cluster_alias}} as cluster_alias, + 'Pod' as data_source, + pua.usage_start, + pua.usage_start as usage_end, + pua.namespace, + pua.node, + pua.resource_id, + cast(pua.pod_labels as json) as pod_labels, + pua.pod_usage_cpu_core_hours, + pua.pod_request_cpu_core_hours, + pua.pod_limit_cpu_core_hours, + pua.pod_usage_memory_gigabyte_hours, + pua.pod_request_memory_gigabyte_hours, + pua.pod_limit_memory_gigabyte_hours, + pua.node_capacity_cpu_cores, + pua.node_capacity_cpu_core_hours, + pua.node_capacity_memory_gigabytes, + pua.node_capacity_memory_gigabyte_hours, + pua.cluster_capacity_cpu_core_hours, + pua.cluster_capacity_memory_gigabyte_hours, + cast(pua.source_uuid as UUID) as source_uuid, + JSON '{"cpu": 0.000000000, "memory": 0.000000000, "storage": 0.000000000}' as infrastructure_usage_cost +FROM ( + SELECT date(li.interval_start) as usage_start, + li.namespace, + li.node, + li.source as source_uuid, + map_concat( + cast(json_parse(coalesce(nli.node_labels, '{}')) as map(varchar, varchar)), + cast(json_parse(coalesce(nsli.namespace_labels, '{}')) as map(varchar, varchar)), + cast(json_parse(li.pod_labels) as map(varchar, varchar)) + ) as pod_labels, + max(li.resource_id) as resource_id, + sum(li.pod_usage_cpu_core_seconds) / 3600.0 as pod_usage_cpu_core_hours, + sum(li.pod_request_cpu_core_seconds) / 3600.0 as pod_request_cpu_core_hours, + sum(li.pod_limit_cpu_core_seconds) / 3600.0 as pod_limit_cpu_core_hours, + sum(li.pod_usage_memory_byte_seconds) / 3600.0 * power(2, -30) as pod_usage_memory_gigabyte_hours, + sum(li.pod_request_memory_byte_seconds) / 3600.0 * power(2, -30) as pod_request_memory_gigabyte_hours, + sum(li.pod_limit_memory_byte_seconds) / 3600.0 * power(2, -30) as pod_limit_memory_gigabyte_hours, + max(li.node_capacity_cpu_cores) as node_capacity_cpu_cores, + sum(li.node_capacity_cpu_core_seconds) / 3600.0 as node_capacity_cpu_core_hours, + max(li.node_capacity_memory_bytes) * power(2, -30) as node_capacity_memory_gigabytes, + sum(li.node_capacity_memory_byte_seconds) / 3600.0 * power(2, -30) as node_capacity_memory_gigabyte_hours, + max(cc.cluster_capacity_cpu_core_seconds) / 3600.0 as cluster_capacity_cpu_core_hours, + max(cc.cluster_capacity_memory_byte_seconds) / 3600.0 * power(2, -30) as cluster_capacity_memory_gigabyte_hours + FROM hive.{{schema | sqlsafe}}.openshift_pod_usage_line_items as li + LEFT JOIN hive.{{schema | sqlsafe}}.__ocp_node_label_line_item_daily_{{uuid | sqlsafe}} as nli + ON nli.node = li.node + AND nli.usage_start = date(li.interval_start) + LEFT JOIN hive.{{schema | sqlsafe}}.__ocp_namespace_label_line_item_daily_{{uuid | sqlsafe}} as nsli + ON nsli.namespace = li.namespace + AND nsli.usage_start = date(li.interval_start) + LEFT JOIN hive.{{schema | sqlsafe}}.__ocp_cluster_capacity_{{uuid | sqlsafe}} as cc + ON cc.usage_start = date(li.interval_start) + -- CROSS JOIN ( + -- SELECT array_agg(distinct key) as enabled_keys + -- FROM postgres.{{schema | sqlsafe}}.reporting_ocpenabledtagkeys + -- ) as ek + WHERE li.source = {{source}} + AND li.year = {{year}} + AND li.month = {{month}} + AND li.interval_start >= TIMESTAMP {{start_date}} + AND li.interval_start < date_add('day', 1, TIMESTAMP {{end_date}}) + GROUP BY date(li.interval_start), + li.namespace, + li.node, + li.source, + 5 /* THIS ORDINAL MUST BE KEPT IN SYNC WITH THE map_filter EXPRESSION */ + /* The map_filter expression was too complex for presto to use */ +) as pua +; + + +/* + * ==================================== + * STORAGE + * ==================================== + */ + + +-- Determine which node a PVC is running on +DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__volume_nodes_{{uuid | sqlsafe}}; +CREATE TABLE hive.{{schema | sqlsafe}}.__volume_nodes_{{uuid | sqlsafe}} as ( + SELECT date(sli.interval_start) as usage_start, + sli.persistentvolumeclaim, + max(uli.node) as node + FROM hive.{{schema | sqlsafe}}.openshift_storage_usage_line_items as sli + JOIN hive.{{schema | sqlsafe}}.openshift_pod_usage_line_items as uli + ON uli.source = sli.source + AND uli.namespace = sli.namespace + AND uli.pod = sli.pod + AND date(uli.interval_start) = date(sli.interval_start) + WHERE sli.source = {{source}} + AND sli.year = {{year}} + AND sli.month = {{month}} + AND sli.interval_start >= TIMESTAMP {{start_date}} + AND sli.interval_start < date_add('day', 1, TIMESTAMP {{end_date}}) + AND uli.source = {{source}} + AND uli.year = {{year}} + AND uli.month = {{month}} + -- AND uli.interval_start >= TIMESTAMP {{start_date}} + -- AND uli.interval_start < date_add('day', 1, TIMESTAMP {{end_date}}) + GROUP BY date(sli.interval_start), + sli.persistentvolumeclaim +) +; + +/* + * Delete the old block of data (if any) based on the usage range + * Inserting a record in this log will trigger a delete against the specified table + * in the same schema as the log table with the specified where_clause + * start_date and end_date MUST be strings in order for this to work properly. + */ +INSERT INTO postgres.{{schema | sqlsafe}}.presto_delete_wrapper_log ( + id, + action_ts, + table_name, + where_clause, + result_rows +) +VALUES ( + uuid(), + now(), + 'reporting_ocpusagelineitem_daily_summary', + 'where usage_start >= '{{start_date}}'::date ' || + 'and usage_start <= '{{end_date}}'::date ' || + 'and cluster_id = '{{cluster_id}}' ' || + 'and data_source = ''Storage''', + null +) +; + +/* + * This is the target summarization sql for STORAGE usage + * It combines the prior daily summarization query with the final summarization query + * by use of MAP_FILTER to filter the combined node line item labels as well as + * the line-item pod labels against the postgres enabled keys in the same query + */ +INSERT INTO postgres.{{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary ( + uuid, + report_period_id, + cluster_id, + cluster_alias, + data_source, + namespace, + node, + persistentvolumeclaim, + persistentvolume, + storageclass, + usage_start, + usage_end, + volume_labels, + source_uuid, + persistentvolumeclaim_capacity_gigabyte, + persistentvolumeclaim_capacity_gigabyte_months, + volume_request_storage_gigabyte_months, + persistentvolumeclaim_usage_gigabyte_months +) +SELECT uuid() as uuid, + {{report_period_id}} as report_period_id, + {{cluster_id}} as cluster_id, + {{cluster_alias}} as cluster_alias, + 'Storage' as data_source, + sua.namespace, + sua.node, + sua.persistentvolumeclaim, + sua.persistentvolume, + sua.storageclass, + sua.usage_start, + sua.usage_start as usage_end, + cast(sua.volume_labels as json) as volume_labels, + cast(sua.source_uuid as UUID) as source_uuid, + (sua.persistentvolumeclaim_capacity_bytes * + power(2, -30)) as persistentvolumeclaim_capacity_gigibytes, + (sua.persistentvolumeclaim_capacity_byte_seconds / + 86400 * + cast(extract(day from last_day_of_month(date(sua.usage_start))) as integer) * + power(2, -30)) as persistentvolumeclaim_capacity_gigabyte_months, + (sua.volume_request_storage_byte_seconds / + 86400 * + cast(extract(day from last_day_of_month(date(sua.usage_start))) as integer) * + power(2, -30)) as volume_request_storage_gigabyte_months, + (sua.persistentvolumeclaim_usage_byte_seconds / + 86400 * + cast(extract(day from last_day_of_month(date(sua.usage_start))) as integer) * + power(2, -30)) as persistentvolumeclaim_usage_byte_months +FROM ( + SELECT sli.namespace, + vn.node, + sli.persistentvolumeclaim, + sli.persistentvolume, + sli.storageclass, + date(sli.interval_start) as usage_start, + map_concat( + cast(json_parse(coalesce(nli.node_labels, '{}')) as map(varchar, varchar)), + cast(json_parse(coalesce(nsli.namespace_labels, '{}')) as map(varchar, varchar)), + cast(json_parse(sli.persistentvolume_labels) as map(varchar, varchar)), + cast(json_parse(sli.persistentvolumeclaim_labels) as map(varchar, varchar)) + ) as volume_labels, + sli.source as source_uuid, + max(sli.persistentvolumeclaim_capacity_bytes) as persistentvolumeclaim_capacity_bytes, + sum(sli.persistentvolumeclaim_capacity_byte_seconds) as persistentvolumeclaim_capacity_byte_seconds, + sum(sli.volume_request_storage_byte_seconds) as volume_request_storage_byte_seconds, + sum(sli.persistentvolumeclaim_usage_byte_seconds) as persistentvolumeclaim_usage_byte_seconds + FROM hive.{{schema | sqlsafe}}.openshift_storage_usage_line_items sli + LEFT JOIN hive.{{schema | sqlsafe}}.__volume_nodes_{{uuid | sqlsafe}} as vn + ON vn.usage_start = date(sli.interval_start) + AND vn.persistentvolumeclaim = sli.persistentvolumeclaim + LEFT JOIN hive.{{schema | sqlsafe}}.__ocp_node_label_line_item_daily_{{uuid | sqlsafe}} as nli + ON nli.node = vn.node + AND nli.usage_start = vn.usage_start + LEFT JOIN hive.{{schema | sqlsafe}}.__ocp_namespace_label_line_item_daily_{{uuid | sqlsafe}} as nsli + ON nsli.namespace = sli.namespace + AND nsli.usage_start = date(sli.interval_start) + WHERE sli.source = {{source}} + AND sli.year = {{year}} + AND sli.month = {{month}} + AND sli.interval_start >= TIMESTAMP {{start_date}} + AND sli.interval_start < date_add('day', 1, TIMESTAMP {{end_date}}) + GROUP BY sli.namespace, + vn.node, + sli.persistentvolumeclaim, + sli.persistentvolume, + sli.storageclass, + date(sli.interval_start), + 7, /* THIS ORDINAL MUST BE KEPT IN SYNC WITH THE map_filter EXPRESSION */ + /* The map_filter expression was too complex for presto to use */ + sli.source +) as sua +; + + +/* + * ==================================== + * CLEANUP + * ==================================== + */ + +DELETE FROM hive.{{schema | sqlsafe}}.__ocp_node_label_line_item_daily_{{uuid | sqlsafe}}; +DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__ocp_node_label_line_item_daily_{{uuid | sqlsafe}}; +DELETE FROM hive.{{schema | sqlsafe}}.__ocp_cluster_capacity_{{uuid | sqlsafe}}; +DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__ocp_cluster_capacity_{{uuid | sqlsafe}}; +DELETE FROM hive.{{schema | sqlsafe}}.__volume_nodes_{{uuid | sqlsafe}}; +DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__volume_nodes_{{uuid | sqlsafe}}; diff --git a/koku/masu/database/sql/reporting_ocpazurecostlineitem_daily_summary.sql b/koku/masu/database/sql/reporting_ocpazurecostlineitem_daily_summary.sql index 8f0ec2d6fb..2ec90f4654 100644 --- a/koku/masu/database/sql/reporting_ocpazurecostlineitem_daily_summary.sql +++ b/koku/masu/database/sql/reporting_ocpazurecostlineitem_daily_summary.sql @@ -226,12 +226,13 @@ CREATE TEMPORARY TABLE reporting_azure_special_case_tags_{{uuid | sqlsafe}} AS ( CREATE TEMPORARY TABLE reporting_ocp_storage_tags_{{uuid | sqlsafe}} AS ( SELECT ocp.*, lower(tag.tag::text)::jsonb as tag - FROM {{schema | sqlsafe}}.reporting_ocpstoragelineitem_daily as ocp + FROM {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary as ocp JOIN matched_tags_{{uuid | sqlsafe}} AS tag ON ocp.report_period_id = tag.report_period_id - AND ocp.persistentvolumeclaim_labels @> tag.tag + AND ocp.volume_labels @> tag.tag WHERE ocp.usage_start >= {{start_date}}::date AND ocp.usage_start <= {{end_date}}::date + AND ocp.data_source = 'Storage' --ocp_where_clause {% if cluster_id %} AND cluster_id = {{cluster_id}} @@ -242,12 +243,13 @@ CREATE TEMPORARY TABLE reporting_ocp_storage_tags_{{uuid | sqlsafe}} AS ( CREATE TEMPORARY TABLE reporting_ocp_pod_tags_{{uuid | sqlsafe}} AS ( SELECT ocp.*, lower(tag.tag::text)::jsonb as tag - FROM {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily as ocp + FROM {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary as ocp JOIN matched_tags_{{uuid | sqlsafe}} AS tag ON ocp.report_period_id = tag.report_period_id AND ocp.pod_labels @> tag.tag WHERE ocp.usage_start >= {{start_date}}::date AND ocp.usage_start <= {{end_date}}::date + AND ocp.data_source = 'Pod' --ocp_where_clause {% if cluster_id %} AND cluster_id = {{cluster_id}} @@ -264,25 +266,24 @@ DROP TABLE matched_tags_{{uuid | sqlsafe}}; -- resource id match. This usually means OCP node -> Azure Virutal Machine. CREATE TEMPORARY TABLE reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} AS ( WITH cte_resource_id_matched AS ( - SELECT ocp.id AS ocp_id, + SELECT ocp.uuid AS ocp_id, ocp.report_period_id, ocp.cluster_id, ocp.cluster_alias, ocp.namespace, - ocp.pod, ocp.node, ocp.pod_labels, - ocp.pod_usage_cpu_core_seconds, - ocp.pod_request_cpu_core_seconds, - ocp.pod_limit_cpu_core_seconds, - ocp.pod_usage_memory_byte_seconds, - ocp.pod_request_memory_byte_seconds, + ocp.pod_usage_cpu_core_hours, + ocp.pod_request_cpu_core_hours, + ocp.pod_limit_cpu_core_hours, + ocp.pod_usage_memory_gigabyte_hours, + ocp.pod_request_memory_gigabyte_hours, ocp.node_capacity_cpu_cores, - ocp.node_capacity_cpu_core_seconds, - ocp.node_capacity_memory_bytes, - ocp.node_capacity_memory_byte_seconds, - ocp.cluster_capacity_cpu_core_seconds, - ocp.cluster_capacity_memory_byte_seconds, + ocp.node_capacity_cpu_core_hours, + ocp.node_capacity_memory_gigabytes, + ocp.node_capacity_memory_gigabyte_hours, + ocp.cluster_capacity_cpu_core_hours, + ocp.cluster_capacity_memory_gigabyte_hours, azure.id AS azure_id, azure.cost_entry_bill_id, azure.cost_entry_product_id, @@ -295,7 +296,7 @@ CREATE TEMPORARY TABLE reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} FROM reporting_azure_with_enabled_tags_{{uuid | sqlsafe}} as azure JOIN {{schema | sqlsafe}}.reporting_azurecostentryproductservice as aps ON azure.cost_entry_product_id = aps.id - JOIN {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily as ocp + JOIN {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary as ocp -- NOTE: We would normally use ocp.resource_id -- For this JOIN, but it is not guaranteed to be correct -- in the current Operator Metering version @@ -305,6 +306,7 @@ CREATE TEMPORARY TABLE reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} AND azure.usage_date = ocp.usage_start WHERE azure.usage_date >= {{start_date}}::date AND azure.usage_date <= {{end_date}}::date + AND ocp.data_source = 'Pod' -- azure_where_clause {% if bill_ids %} AND cost_entry_bill_id IN ( @@ -323,22 +325,13 @@ CREATE TEMPORARY TABLE reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} count(DISTINCT namespace) as shared_projects FROM cte_resource_id_matched GROUP BY azure_id - ), - cte_number_of_shared_pods AS ( - SELECT azure_id, - count(DISTINCT pod) as shared_pods - FROM cte_resource_id_matched - GROUP BY azure_id ) SELECT rm.*, - (rm.pod_usage_cpu_core_seconds / rm.node_capacity_cpu_core_seconds) * rm.pretax_cost as pod_cost, - sp.shared_projects, - spod.shared_pods + (rm.pod_usage_cpu_core_hours / rm.node_capacity_cpu_core_hours) * rm.pretax_cost as project_cost, + sp.shared_projects FROM cte_resource_id_matched AS rm JOIN cte_number_of_shared_projects AS sp ON rm.azure_id = sp.azure_id - JOIN cte_number_of_shared_pods AS spod - ON rm.azure_id = spod.azure_id ) ; @@ -346,25 +339,24 @@ CREATE TEMPORARY TABLE reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} -- and the value matches an OpenShift project name INSERT INTO reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} ( WITH cte_tag_matched AS ( - SELECT ocp.id AS ocp_id, + SELECT ocp.uuid AS ocp_id, ocp.report_period_id, ocp.cluster_id, ocp.cluster_alias, ocp.namespace, - ocp.pod, ocp.node, ocp.pod_labels, - ocp.pod_usage_cpu_core_seconds, - ocp.pod_request_cpu_core_seconds, - ocp.pod_limit_cpu_core_seconds, - ocp.pod_usage_memory_byte_seconds, - ocp.pod_request_memory_byte_seconds, + ocp.pod_usage_cpu_core_hours, + ocp.pod_request_cpu_core_hours, + ocp.pod_limit_cpu_core_hours, + ocp.pod_usage_memory_gigabyte_hours, + ocp.pod_request_memory_gigabyte_hours, ocp.node_capacity_cpu_cores, - ocp.node_capacity_cpu_core_seconds, - ocp.node_capacity_memory_bytes, - ocp.node_capacity_memory_byte_seconds, - ocp.cluster_capacity_cpu_core_seconds, - ocp.cluster_capacity_memory_byte_seconds, + ocp.node_capacity_cpu_core_hours, + ocp.node_capacity_memory_gigabytes, + ocp.node_capacity_memory_gigabyte_hours, + ocp.cluster_capacity_cpu_core_hours, + ocp.cluster_capacity_memory_gigabyte_hours, azure.id AS azure_id, azure.cost_entry_bill_id, azure.cost_entry_product_id, @@ -375,7 +367,7 @@ INSERT INTO reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} ( azure.pretax_cost, azure.tags FROM reporting_azure_special_case_tags_{{uuid | sqlsafe}} as azure - JOIN {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily as ocp + JOIN {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary as ocp ON azure.key = 'openshift_project' AND azure.value = lower(ocp.namespace) AND azure.usage_date = ocp.usage_start -- ANTI JOIN to remove rows that already matched @@ -383,6 +375,7 @@ INSERT INTO reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} ( ON rm.azure_id = azure.id WHERE azure.usage_date >= {{start_date}}::date AND azure.usage_date <= {{end_date}}::date + AND ocp.data_source = 'Pod' AND rm.azure_id IS NULL ), cte_number_of_shared_projects AS ( @@ -390,22 +383,13 @@ INSERT INTO reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} ( count(DISTINCT namespace) as shared_projects FROM cte_tag_matched GROUP BY azure_id - ), - cte_number_of_shared_pods AS ( - SELECT azure_id, - count(DISTINCT pod) as shared_pods - FROM cte_tag_matched - GROUP BY azure_id ) SELECT tm.*, - tm.pretax_cost / spod.shared_pods as pod_cost, - sp.shared_projects, - spod.shared_pods + tm.pretax_cost / sp.shared_projects as project_cost, + sp.shared_projects FROM cte_tag_matched AS tm JOIN cte_number_of_shared_projects AS sp ON tm.azure_id = sp.azure_id - JOIN cte_number_of_shared_pods AS spod - ON tm.azure_id = spod.azure_id ) ; @@ -413,25 +397,24 @@ INSERT INTO reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} ( -- and the value matches an OpenShift node name INSERT INTO reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} ( WITH cte_tag_matched AS ( - SELECT ocp.id AS ocp_id, + SELECT ocp.uuid AS ocp_id, ocp.report_period_id, ocp.cluster_id, ocp.cluster_alias, ocp.namespace, - ocp.pod, ocp.node, ocp.pod_labels, - ocp.pod_usage_cpu_core_seconds, - ocp.pod_request_cpu_core_seconds, - ocp.pod_limit_cpu_core_seconds, - ocp.pod_usage_memory_byte_seconds, - ocp.pod_request_memory_byte_seconds, + ocp.pod_usage_cpu_core_hours, + ocp.pod_request_cpu_core_hours, + ocp.pod_limit_cpu_core_hours, + ocp.pod_usage_memory_gigabyte_hours, + ocp.pod_request_memory_gigabyte_hours, ocp.node_capacity_cpu_cores, - ocp.node_capacity_cpu_core_seconds, - ocp.node_capacity_memory_bytes, - ocp.node_capacity_memory_byte_seconds, - ocp.cluster_capacity_cpu_core_seconds, - ocp.cluster_capacity_memory_byte_seconds, + ocp.node_capacity_cpu_core_hours, + ocp.node_capacity_memory_gigabytes, + ocp.node_capacity_memory_gigabyte_hours, + ocp.cluster_capacity_cpu_core_hours, + ocp.cluster_capacity_memory_gigabyte_hours, azure.id AS azure_id, azure.cost_entry_bill_id, azure.cost_entry_product_id, @@ -442,7 +425,7 @@ INSERT INTO reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} ( azure.pretax_cost, azure.tags FROM reporting_azure_special_case_tags_{{uuid | sqlsafe}} as azure - JOIN {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily as ocp + JOIN {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary as ocp ON azure.key = 'openshift_node' AND azure.value = lower(ocp.node) AND azure.usage_date = ocp.usage_start -- ANTI JOIN to remove rows that already matched @@ -450,6 +433,7 @@ INSERT INTO reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} ( ON rm.azure_id = azure.id WHERE azure.usage_date >= {{start_date}}::date AND azure.usage_date <= {{end_date}}::date + AND ocp.data_source = 'Pod' AND rm.azure_id IS NULL ), cte_number_of_shared_projects AS ( @@ -457,22 +441,13 @@ INSERT INTO reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} ( count(DISTINCT namespace) as shared_projects FROM cte_tag_matched GROUP BY azure_id - ), - cte_number_of_shared_pods AS ( - SELECT azure_id, - count(DISTINCT pod) as shared_pods - FROM cte_tag_matched - GROUP BY azure_id ) SELECT tm.*, - tm.pretax_cost / spod.shared_pods as pod_cost, - sp.shared_projects, - spod.shared_pods + tm.pretax_cost / sp.shared_projects as project_cost, + sp.shared_projects FROM cte_tag_matched AS tm JOIN cte_number_of_shared_projects AS sp ON tm.azure_id = sp.azure_id - JOIN cte_number_of_shared_pods AS spod - ON tm.azure_id = spod.azure_id ) ; @@ -480,25 +455,24 @@ INSERT INTO reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} ( -- and the value matches an OpenShift cluster name INSERT INTO reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} ( WITH cte_tag_matched AS ( - SELECT ocp.id AS ocp_id, + SELECT ocp.uuid AS ocp_id, ocp.report_period_id, ocp.cluster_id, ocp.cluster_alias, ocp.namespace, - ocp.pod, ocp.node, ocp.pod_labels, - ocp.pod_usage_cpu_core_seconds, - ocp.pod_request_cpu_core_seconds, - ocp.pod_limit_cpu_core_seconds, - ocp.pod_usage_memory_byte_seconds, - ocp.pod_request_memory_byte_seconds, + ocp.pod_usage_cpu_core_hours, + ocp.pod_request_cpu_core_hours, + ocp.pod_limit_cpu_core_hours, + ocp.pod_usage_memory_gigabyte_hours, + ocp.pod_request_memory_gigabyte_hours, ocp.node_capacity_cpu_cores, - ocp.node_capacity_cpu_core_seconds, - ocp.node_capacity_memory_bytes, - ocp.node_capacity_memory_byte_seconds, - ocp.cluster_capacity_cpu_core_seconds, - ocp.cluster_capacity_memory_byte_seconds, + ocp.node_capacity_cpu_core_hours, + ocp.node_capacity_memory_gigabytes, + ocp.node_capacity_memory_gigabyte_hours, + ocp.cluster_capacity_cpu_core_hours, + ocp.cluster_capacity_memory_gigabyte_hours, azure.id AS azure_id, azure.cost_entry_bill_id, azure.cost_entry_product_id, @@ -509,7 +483,7 @@ INSERT INTO reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} ( azure.pretax_cost, azure.tags FROM reporting_azure_special_case_tags_{{uuid | sqlsafe}} as azure - JOIN {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily as ocp + JOIN {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary as ocp ON (azure.key = 'openshift_cluster' AND azure.value = lower(ocp.cluster_id) OR azure.key = 'openshift_cluster' AND azure.value = lower(ocp.cluster_alias)) AND azure.usage_date = ocp.usage_start @@ -518,6 +492,7 @@ INSERT INTO reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} ( ON rm.azure_id = azure.id WHERE azure.usage_date >= {{start_date}}::date AND azure.usage_date <= {{end_date}}::date + AND ocp.data_source = 'Pod' AND rm.azure_id IS NULL ), cte_number_of_shared_projects AS ( @@ -525,22 +500,13 @@ INSERT INTO reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} ( count(DISTINCT namespace) as shared_projects FROM cte_tag_matched GROUP BY azure_id - ), - cte_number_of_shared_pods AS ( - SELECT azure_id, - count(DISTINCT pod) as shared_pods - FROM cte_tag_matched - GROUP BY azure_id ) SELECT tm.*, - tm.pretax_cost / spod.shared_pods as pod_cost, - sp.shared_projects, - spod.shared_pods + tm.pretax_cost / sp.shared_projects as project_cost, + sp.shared_projects FROM cte_tag_matched AS tm JOIN cte_number_of_shared_projects AS sp ON tm.azure_id = sp.azure_id - JOIN cte_number_of_shared_pods AS spod - ON tm.azure_id = spod.azure_id ) ; @@ -548,25 +514,24 @@ INSERT INTO reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} ( -- and Azure tag key and value match directly INSERT INTO reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} ( WITH cte_tag_matched AS ( - SELECT ocp.id AS ocp_id, + SELECT ocp.uuid AS ocp_id, ocp.report_period_id, ocp.cluster_id, ocp.cluster_alias, ocp.namespace, - ocp.pod, ocp.node, ocp.pod_labels, - ocp.pod_usage_cpu_core_seconds, - ocp.pod_request_cpu_core_seconds, - ocp.pod_limit_cpu_core_seconds, - ocp.pod_usage_memory_byte_seconds, - ocp.pod_request_memory_byte_seconds, + ocp.pod_usage_cpu_core_hours, + ocp.pod_request_cpu_core_hours, + ocp.pod_limit_cpu_core_hours, + ocp.pod_usage_memory_gigabyte_hours, + ocp.pod_request_memory_gigabyte_hours, ocp.node_capacity_cpu_cores, - ocp.node_capacity_cpu_core_seconds, - ocp.node_capacity_memory_bytes, - ocp.node_capacity_memory_byte_seconds, - ocp.cluster_capacity_cpu_core_seconds, - ocp.cluster_capacity_memory_byte_seconds, + ocp.node_capacity_cpu_core_hours, + ocp.node_capacity_memory_gigabytes, + ocp.node_capacity_memory_gigabyte_hours, + ocp.cluster_capacity_cpu_core_hours, + ocp.cluster_capacity_memory_gigabyte_hours, azure.id AS azure_id, azure.cost_entry_bill_id, azure.cost_entry_product_id, @@ -592,22 +557,13 @@ INSERT INTO reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} ( count(DISTINCT namespace) as shared_projects FROM cte_tag_matched GROUP BY azure_id - ), - cte_number_of_shared_pods AS ( - SELECT azure_id, - count(DISTINCT pod) as shared_pods - FROM cte_tag_matched - GROUP BY azure_id ) SELECT tm.*, - tm.pretax_cost / spod.shared_pods as pod_cost, - sp.shared_projects, - spod.shared_pods + tm.pretax_cost / sp.shared_projects as project_cost, + sp.shared_projects FROM cte_tag_matched AS tm JOIN cte_number_of_shared_projects AS sp ON tm.azure_id = sp.azure_id - JOIN cte_number_of_shared_pods AS spod - ON tm.azure_id = spod.azure_id ) ; @@ -620,22 +576,20 @@ DROP TABLE reporting_ocp_pod_tags_{{uuid | sqlsafe}}; -- resource id match. OCP PVC name -> Azure instance ID. CREATE TEMPORARY TABLE reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} AS ( WITH cte_resource_id_matched AS ( - SELECT ocp.id AS ocp_id, + SELECT ocp.uuid AS ocp_id, ocp.report_period_id, ocp.cluster_id, ocp.cluster_alias, ocp.namespace, - ocp.pod, ocp.node, ocp.persistentvolumeclaim, ocp.persistentvolume, ocp.storageclass, - ocp.persistentvolumeclaim_capacity_bytes, - ocp.persistentvolumeclaim_capacity_byte_seconds, - ocp.volume_request_storage_byte_seconds, - ocp.persistentvolumeclaim_usage_byte_seconds, - ocp.persistentvolume_labels, - ocp.persistentvolumeclaim_labels, + ocp.persistentvolumeclaim_capacity_gigabyte, + ocp.persistentvolumeclaim_capacity_gigabyte_months, + ocp.volume_request_storage_gigabyte_months, + ocp.persistentvolumeclaim_usage_gigabyte_months, + ocp.volume_labels, azure.id AS azure_id, azure.cost_entry_bill_id, azure.cost_entry_product_id, @@ -648,12 +602,13 @@ CREATE TEMPORARY TABLE reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe} FROM reporting_azure_with_enabled_tags_{{uuid | sqlsafe}} as azure JOIN {{schema | sqlsafe}}.reporting_azurecostentryproductservice as aps ON azure.cost_entry_product_id = aps.id - JOIN {{schema | sqlsafe}}.reporting_ocpstoragelineitem_daily as ocp + JOIN {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary as ocp -- Need the doubl percent here for Jinja templating ON split_part(aps.instance_id, '/', 9) LIKE '%%' || ocp.persistentvolume AND azure.usage_date = ocp.usage_start WHERE azure.usage_date >= {{start_date}}::date AND azure.usage_date <= {{end_date}}::date + AND ocp.data_source = 'Storage' -- azure_where_clause {% if bill_ids %} AND cost_entry_bill_id IN ( @@ -672,22 +627,13 @@ CREATE TEMPORARY TABLE reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe} count(DISTINCT namespace) as shared_projects FROM cte_resource_id_matched GROUP BY azure_id - ), - cte_number_of_shared_pods AS ( - SELECT azure_id, - count(DISTINCT pod) as shared_pods - FROM cte_resource_id_matched - GROUP BY azure_id ) SELECT rm.*, - (rm.persistentvolumeclaim_usage_byte_seconds / rm.persistentvolumeclaim_capacity_byte_seconds) * rm.pretax_cost as pod_cost, - sp.shared_projects, - spod.shared_pods + (rm.persistentvolumeclaim_usage_gigabyte_months / rm.persistentvolumeclaim_capacity_gigabyte_months) * rm.pretax_cost as project_cost, + sp.shared_projects FROM cte_resource_id_matched AS rm JOIN cte_number_of_shared_projects AS sp ON rm.azure_id = sp.azure_id - JOIN cte_number_of_shared_pods AS spod - ON rm.azure_id = spod.azure_id ) ; @@ -695,22 +641,20 @@ CREATE TEMPORARY TABLE reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe} -- and the value matches an OpenShift project name INSERT INTO reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} ( WITH cte_tag_matched AS ( - SELECT ocp.id AS ocp_id, + SELECT ocp.uuid AS ocp_id, ocp.report_period_id, ocp.cluster_id, ocp.cluster_alias, ocp.namespace, - ocp.pod, ocp.node, ocp.persistentvolumeclaim, ocp.persistentvolume, ocp.storageclass, - ocp.persistentvolumeclaim_capacity_bytes, - ocp.persistentvolumeclaim_capacity_byte_seconds, - ocp.volume_request_storage_byte_seconds, - ocp.persistentvolumeclaim_usage_byte_seconds, - ocp.persistentvolume_labels, - ocp.persistentvolumeclaim_labels, + ocp.persistentvolumeclaim_capacity_gigabyte, + ocp.persistentvolumeclaim_capacity_gigabyte_months, + ocp.volume_request_storage_gigabyte_months, + ocp.persistentvolumeclaim_usage_gigabyte_months, + ocp.volume_labels, azure.id AS azure_id, azure.cost_entry_bill_id, azure.cost_entry_product_id, @@ -721,7 +665,7 @@ INSERT INTO reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} ( azure.pretax_cost, azure.tags FROM reporting_azure_special_case_tags_{{uuid | sqlsafe}} as azure - JOIN {{schema | sqlsafe}}.reporting_ocpstoragelineitem_daily as ocp + JOIN {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary as ocp ON azure.key = 'openshift_project' AND azure.value = lower(ocp.namespace) AND azure.usage_date = ocp.usage_start -- ANTI JOIN to remove rows that already matched @@ -729,6 +673,7 @@ INSERT INTO reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} ( ON rm.azure_id = azure.id WHERE azure.usage_date >= {{start_date}}::date AND azure.usage_date <= {{end_date}}::date + AND ocp.data_source = 'Storage' AND rm.azure_id IS NULL ), cte_number_of_shared_projects AS ( @@ -736,22 +681,13 @@ INSERT INTO reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} ( count(DISTINCT namespace) as shared_projects FROM cte_tag_matched GROUP BY azure_id - ), - cte_number_of_shared_pods AS ( - SELECT azure_id, - count(DISTINCT pod) as shared_pods - FROM cte_tag_matched - GROUP BY azure_id ) SELECT tm.*, - tm.pretax_cost / spod.shared_pods as pod_cost, - sp.shared_projects, - spod.shared_pods + tm.pretax_cost / sp.shared_projects as project_cost, + sp.shared_projects FROM cte_tag_matched AS tm JOIN cte_number_of_shared_projects AS sp ON tm.azure_id = sp.azure_id - JOIN cte_number_of_shared_pods AS spod - ON tm.azure_id = spod.azure_id ) ; @@ -759,22 +695,20 @@ INSERT INTO reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} ( -- and the value matches an OpenShift node name INSERT INTO reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} ( WITH cte_tag_matched AS ( - SELECT ocp.id AS ocp_id, + SELECT ocp.uuid AS ocp_id, ocp.report_period_id, ocp.cluster_id, ocp.cluster_alias, ocp.namespace, - ocp.pod, ocp.node, ocp.persistentvolumeclaim, ocp.persistentvolume, ocp.storageclass, - ocp.persistentvolumeclaim_capacity_bytes, - ocp.persistentvolumeclaim_capacity_byte_seconds, - ocp.volume_request_storage_byte_seconds, - ocp.persistentvolumeclaim_usage_byte_seconds, - ocp.persistentvolume_labels, - ocp.persistentvolumeclaim_labels, + ocp.persistentvolumeclaim_capacity_gigabyte, + ocp.persistentvolumeclaim_capacity_gigabyte_months, + ocp.volume_request_storage_gigabyte_months, + ocp.persistentvolumeclaim_usage_gigabyte_months, + ocp.volume_labels, azure.id AS azure_id, azure.cost_entry_bill_id, azure.cost_entry_product_id, @@ -785,7 +719,7 @@ INSERT INTO reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} ( azure.pretax_cost, azure.tags FROM reporting_azure_special_case_tags_{{uuid | sqlsafe}} as azure - JOIN {{schema | sqlsafe}}.reporting_ocpstoragelineitem_daily as ocp + JOIN {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary as ocp ON azure.key = 'openshift_node' AND azure.value = lower(ocp.node) AND azure.usage_date = ocp.usage_start -- ANTI JOIN to remove rows that already matched @@ -793,6 +727,7 @@ INSERT INTO reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} ( ON rm.azure_id = azure.id WHERE azure.usage_date >= {{start_date}}::date AND azure.usage_date <= {{end_date}}::date + AND ocp.data_source = 'Storage' AND rm.azure_id IS NULL ), cte_number_of_shared_projects AS ( @@ -800,22 +735,13 @@ INSERT INTO reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} ( count(DISTINCT namespace) as shared_projects FROM cte_tag_matched GROUP BY azure_id - ), - cte_number_of_shared_pods AS ( - SELECT azure_id, - count(DISTINCT pod) as shared_pods - FROM cte_tag_matched - GROUP BY azure_id ) SELECT tm.*, - tm.pretax_cost / spod.shared_pods as pod_cost, - sp.shared_projects, - spod.shared_pods + tm.pretax_cost / sp.shared_projects as project_cost, + sp.shared_projects FROM cte_tag_matched AS tm JOIN cte_number_of_shared_projects AS sp ON tm.azure_id = sp.azure_id - JOIN cte_number_of_shared_pods AS spod - ON tm.azure_id = spod.azure_id ) ; @@ -823,22 +749,20 @@ INSERT INTO reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} ( -- and the value matches an OpenShift cluster name INSERT INTO reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} ( WITH cte_tag_matched AS ( - SELECT ocp.id AS ocp_id, + SELECT ocp.uuid AS ocp_id, ocp.report_period_id, ocp.cluster_id, ocp.cluster_alias, ocp.namespace, - ocp.pod, ocp.node, ocp.persistentvolumeclaim, ocp.persistentvolume, ocp.storageclass, - ocp.persistentvolumeclaim_capacity_bytes, - ocp.persistentvolumeclaim_capacity_byte_seconds, - ocp.volume_request_storage_byte_seconds, - ocp.persistentvolumeclaim_usage_byte_seconds, - ocp.persistentvolume_labels, - ocp.persistentvolumeclaim_labels, + ocp.persistentvolumeclaim_capacity_gigabyte, + ocp.persistentvolumeclaim_capacity_gigabyte_months, + ocp.volume_request_storage_gigabyte_months, + ocp.persistentvolumeclaim_usage_gigabyte_months, + ocp.volume_labels, azure.id AS azure_id, azure.cost_entry_bill_id, azure.cost_entry_product_id, @@ -849,7 +773,7 @@ INSERT INTO reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} ( azure.pretax_cost, azure.tags FROM reporting_azure_special_case_tags_{{uuid | sqlsafe}} as azure - JOIN {{schema | sqlsafe}}.reporting_ocpstoragelineitem_daily as ocp + JOIN {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary as ocp ON (azure.key = 'openshift_cluster' AND azure.value = lower(ocp.cluster_id) OR azure.key = 'openshift_cluster' AND azure.value = lower(ocp.cluster_alias)) AND azure.usage_date = ocp.usage_start @@ -858,6 +782,7 @@ INSERT INTO reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} ( ON rm.azure_id = azure.id WHERE azure.usage_date >= {{start_date}}::date AND azure.usage_date <= {{end_date}}::date + AND ocp.data_source = 'Storage' AND rm.azure_id IS NULL ), cte_number_of_shared_projects AS ( @@ -865,44 +790,33 @@ INSERT INTO reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} ( count(DISTINCT namespace) as shared_projects FROM cte_tag_matched GROUP BY azure_id - ), - cte_number_of_shared_pods AS ( - SELECT azure_id, - count(DISTINCT pod) as shared_pods - FROM cte_tag_matched - GROUP BY azure_id ) SELECT tm.*, - tm.pretax_cost / spod.shared_pods as pod_cost, - sp.shared_projects, - spod.shared_pods + tm.pretax_cost / sp.shared_projects as project_cost, + sp.shared_projects FROM cte_tag_matched AS tm JOIN cte_number_of_shared_projects AS sp ON tm.azure_id = sp.azure_id - JOIN cte_number_of_shared_pods AS spod - ON tm.azure_id = spod.azure_id ) ; -- Next we match where the azure tag is kubernetes.io-created-for-pv-name INSERT INTO reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} ( WITH cte_tag_matched AS ( - SELECT ocp.id AS ocp_id, + SELECT ocp.uuid AS ocp_id, ocp.report_period_id, ocp.cluster_id, ocp.cluster_alias, ocp.namespace, - ocp.pod, ocp.node, ocp.persistentvolumeclaim, ocp.persistentvolume, ocp.storageclass, - ocp.persistentvolumeclaim_capacity_bytes, - ocp.persistentvolumeclaim_capacity_byte_seconds, - ocp.volume_request_storage_byte_seconds, - ocp.persistentvolumeclaim_usage_byte_seconds, - ocp.persistentvolume_labels, - ocp.persistentvolumeclaim_labels, + ocp.persistentvolumeclaim_capacity_gigabyte, + ocp.persistentvolumeclaim_capacity_gigabyte_months, + ocp.volume_request_storage_gigabyte_months, + ocp.persistentvolumeclaim_usage_gigabyte_months, + ocp.volume_labels, azure.id AS azure_id, azure.cost_entry_bill_id, azure.cost_entry_product_id, @@ -913,7 +827,7 @@ INSERT INTO reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} ( azure.pretax_cost, azure.tags FROM reporting_azure_special_case_tags_{{uuid | sqlsafe}} as azure - JOIN {{schema | sqlsafe}}.reporting_ocpstoragelineitem_daily as ocp + JOIN {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary as ocp ON azure.key = 'kubernetes.io-created-for-pv-name' AND azure.value = lower(ocp.persistentvolume) -- ANTI JOIN to remove rows that already matched @@ -921,6 +835,7 @@ INSERT INTO reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} ( ON rm.azure_id = azure.id WHERE azure.usage_date >= {{start_date}}::date AND azure.usage_date <= {{end_date}}::date + AND ocp.data_source = 'Storage' AND rm.azure_id IS NULL ), cte_number_of_shared_projects AS ( @@ -928,22 +843,13 @@ INSERT INTO reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} ( count(DISTINCT namespace) as shared_projects FROM cte_tag_matched GROUP BY azure_id - ), - cte_number_of_shared_pods AS ( - SELECT azure_id, - count(DISTINCT pod) as shared_pods - FROM cte_tag_matched - GROUP BY azure_id ) SELECT tm.*, - tm.pretax_cost / spod.shared_pods as pod_cost, - sp.shared_projects, - spod.shared_pods + tm.pretax_cost / sp.shared_projects as project_cost, + sp.shared_projects FROM cte_tag_matched AS tm JOIN cte_number_of_shared_projects AS sp ON tm.azure_id = sp.azure_id - JOIN cte_number_of_shared_pods AS spod - ON tm.azure_id = spod.azure_id ) ; @@ -956,22 +862,20 @@ DROP TABLE reporting_azure_special_case_tags_{{uuid | sqlsafe}}; -- and azure tag key and value match directly INSERT INTO reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} ( WITH cte_tag_matched AS ( - SELECT ocp.id AS ocp_id, + SELECT ocp.uuid AS ocp_id, ocp.report_period_id, ocp.cluster_id, ocp.cluster_alias, ocp.namespace, - ocp.pod, ocp.node, ocp.persistentvolumeclaim, ocp.persistentvolume, ocp.storageclass, - ocp.persistentvolumeclaim_capacity_bytes, - ocp.persistentvolumeclaim_capacity_byte_seconds, - ocp.volume_request_storage_byte_seconds, - ocp.persistentvolumeclaim_usage_byte_seconds, - ocp.persistentvolume_labels, - ocp.persistentvolumeclaim_labels, + ocp.persistentvolumeclaim_capacity_gigabyte, + ocp.persistentvolumeclaim_capacity_gigabyte_months, + ocp.volume_request_storage_gigabyte_months, + ocp.persistentvolumeclaim_usage_gigabyte_months, + ocp.volume_labels, azure.id AS azure_id, azure.cost_entry_bill_id, azure.cost_entry_product_id, @@ -997,22 +901,13 @@ INSERT INTO reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} ( count(DISTINCT namespace) as shared_projects FROM cte_tag_matched GROUP BY azure_id - ), - cte_number_of_shared_pods AS ( - SELECT azure_id, - count(DISTINCT pod) as shared_pods - FROM cte_tag_matched - GROUP BY azure_id ) SELECT tm.*, - tm.pretax_cost / spod.shared_pods as pod_cost, - sp.shared_projects, - spod.shared_pods + tm.pretax_cost / sp.shared_projects as project_cost, + sp.shared_projects FROM cte_tag_matched AS tm JOIN cte_number_of_shared_projects AS sp ON tm.azure_id = sp.azure_id - JOIN cte_number_of_shared_pods AS spod - ON tm.azure_id = spod.azure_id ) ; @@ -1070,11 +965,11 @@ CREATE TEMPORARY TABLE reporting_ocpazurecostlineitem_daily_summary_{{uuid | sql ), cte_pod_project_cost AS ( SELECT pc.azure_id, - jsonb_object_agg(pc.namespace, pc.pod_cost) as project_costs + jsonb_object_agg(pc.namespace, pc.project_cost) as project_costs FROM ( SELECT li.azure_id, li.namespace, - sum(pod_cost) as pod_cost + sum(project_cost) as project_cost FROM reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} as li GROUP BY li.azure_id, li.namespace ) AS pc @@ -1082,11 +977,11 @@ CREATE TEMPORARY TABLE reporting_ocpazurecostlineitem_daily_summary_{{uuid | sql ), cte_storage_project_cost AS ( SELECT pc.azure_id, - jsonb_object_agg(pc.namespace, pc.pod_cost) as project_costs + jsonb_object_agg(pc.namespace, pc.project_cost) as project_costs FROM ( SELECT li.azure_id, li.namespace, - sum(pod_cost) as pod_cost + sum(project_cost) as project_cost FROM reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} as li GROUP BY li.azure_id, li.namespace ) AS pc @@ -1096,7 +991,6 @@ CREATE TEMPORARY TABLE reporting_ocpazurecostlineitem_daily_summary_{{uuid | sql max(li.cluster_id) as cluster_id, max(li.cluster_alias) as cluster_alias, array_agg(DISTINCT li.namespace) as namespace, - array_agg(DISTINCT li.pod) as pod, max(li.node) as node, max(li.usage_date) as usage_start, max(li.usage_date) as usage_end, @@ -1137,7 +1031,6 @@ CREATE TEMPORARY TABLE reporting_ocpazurecostlineitem_daily_summary_{{uuid | sql max(li.cluster_id) as cluster_id, max(li.cluster_alias) as cluster_alias, array_agg(DISTINCT li.namespace) as namespace, - array_agg(DISTINCT li.pod) as pod, max(li.node) as node, max(li.usage_date) as usage_start, max(li.usage_date) as usage_end, @@ -1228,7 +1121,6 @@ CREATE TEMPORARY TABLE reporting_ocpazurecostlineitem_project_daily_summary_{{uu li.cluster_alias, 'Pod' as data_source, li.namespace, - li.pod, li.node, li.pod_labels, max(li.usage_date) as usage_start, @@ -1241,12 +1133,12 @@ CREATE TEMPORARY TABLE reporting_ocpazurecostlineitem_project_daily_summary_{{uu max(split_part(p.instance_id, '/', 9)) as resource_id, max(m.currency) as currency, max(suu.unit_of_measure) as unit_of_measure, - max((li.usage_quantity * suu.multiplier) / li.shared_pods) as usage_quantity, - sum(li.pretax_cost / li.shared_pods) as pretax_cost, - sum(li.pretax_cost / li.shared_pods) * {{markup}}::numeric as markup_cost, - max(li.shared_pods) as shared_pods, - li.pod_cost, - li.pod_cost * {{markup}}::numeric as project_markup_cost, + max((li.usage_quantity * suu.multiplier) / li.shared_projects) as usage_quantity, + sum(li.pretax_cost / li.shared_projects) as pretax_cost, + sum(li.pretax_cost / li.shared_projects) * {{markup}}::numeric as markup_cost, + max(li.shared_projects) as shared_projects, + li.project_cost, + li.project_cost * {{markup}}::numeric as project_markup_cost, ab.provider_id as source_uuid FROM reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} as li JOIN {{schema | sqlsafe}}.reporting_azurecostentryproductservice AS p @@ -1265,10 +1157,9 @@ CREATE TEMPORARY TABLE reporting_ocpazurecostlineitem_project_daily_summary_{{uu li.cluster_id, li.cluster_alias, li.namespace, - li.pod, li.node, li.pod_labels, - li.pod_cost, + li.project_cost, ab.provider_id UNION @@ -1278,9 +1169,8 @@ CREATE TEMPORARY TABLE reporting_ocpazurecostlineitem_project_daily_summary_{{uu li.cluster_alias, 'Storage' as data_source, li.namespace, - li.pod, li.node, - li.persistentvolume_labels || li.persistentvolumeclaim_labels as pod_labels, + li.volume_labels as pod_labels, max(li.usage_date) as usage_start, max(li.usage_date) as usage_end, max(li.cost_entry_bill_id) as cost_entry_bill_id, @@ -1291,12 +1181,12 @@ CREATE TEMPORARY TABLE reporting_ocpazurecostlineitem_project_daily_summary_{{uu max(split_part(p.instance_id, '/', 9)) as resource_id, max(m.currency) as currency, max(sus.unit_of_measure) as unit_of_measure, - max((li.usage_quantity * sus.multiplier) / li.shared_pods) as usage_quantity, - sum(li.pretax_cost / li.shared_pods) as pretax_cost, - sum(li.pretax_cost / li.shared_pods) * {{markup}}::numeric as markup_cost, - max(li.shared_pods) as shared_pods, - li.pod_cost, - li.pod_cost * {{markup}}::numeric as project_markup_cost, + max((li.usage_quantity * sus.multiplier) / li.shared_projects) as usage_quantity, + sum(li.pretax_cost / li.shared_projects) as pretax_cost, + sum(li.pretax_cost / li.shared_projects) * {{markup}}::numeric as markup_cost, + max(li.shared_projects) as shared_projects, + li.project_cost, + li.project_cost * {{markup}}::numeric as project_markup_cost, ab.provider_id as source_uuid FROM reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} AS li JOIN {{schema | sqlsafe}}.reporting_azurecostentryproductservice AS p @@ -1317,11 +1207,9 @@ CREATE TEMPORARY TABLE reporting_ocpazurecostlineitem_project_daily_summary_{{uu li.cluster_id, li.cluster_alias, li.namespace, - li.pod, li.node, - li.persistentvolume_labels, - li.persistentvolumeclaim_labels, - li.pod_cost, + li.volume_labels, + li.project_cost, ab.provider_id ) ; @@ -1359,7 +1247,6 @@ INSERT INTO {{schema | sqlsafe}}.reporting_ocpazurecostlineitem_daily_summary ( cluster_id, cluster_alias, namespace, - pod, node, resource_id, usage_start, @@ -1384,7 +1271,6 @@ INSERT INTO {{schema | sqlsafe}}.reporting_ocpazurecostlineitem_daily_summary ( cluster_id, cluster_alias, namespace, - pod, node, resource_id, usage_start, @@ -1435,7 +1321,6 @@ INSERT INTO {{schema | sqlsafe}}.reporting_ocpazurecostlineitem_project_daily_su cluster_alias, data_source, namespace, - pod, node, pod_labels, resource_id, @@ -1461,7 +1346,6 @@ INSERT INTO {{schema | sqlsafe}}.reporting_ocpazurecostlineitem_project_daily_su cluster_alias, data_source, namespace, - pod, node, pod_labels, resource_id, @@ -1477,7 +1361,7 @@ INSERT INTO {{schema | sqlsafe}}.reporting_ocpazurecostlineitem_project_daily_su markup_cost, currency, unit_of_measure, - pod_cost, + project_cost, project_markup_cost, source_uuid FROM reporting_ocpazurecostlineitem_project_daily_summary_{{uuid | sqlsafe}} diff --git a/koku/masu/database/sql/reporting_ocpstoragelineitem_daily_summary.sql b/koku/masu/database/sql/reporting_ocpstoragelineitem_daily_summary.sql index 26424c0900..fb171b0f61 100644 --- a/koku/masu/database/sql/reporting_ocpstoragelineitem_daily_summary.sql +++ b/koku/masu/database/sql/reporting_ocpstoragelineitem_daily_summary.sql @@ -1,32 +1,4 @@ CREATE TEMPORARY TABLE reporting_ocpstoragelineitem_daily_summary_{{uuid | sqlsafe}} AS ( - WITH cte_array_agg_keys AS ( - SELECT array_agg(key) as key_array - FROM reporting_ocpenabledtagkeys - ), - cte_filtered_volume_labels AS ( - SELECT id, - jsonb_object_agg(key,value) as volume_labels - FROM ( - SELECT lid.id, - -- persistentvolumeclaim_labels values will win in - -- the volume label merge - lid.persistentvolume_labels || lid.persistentvolumeclaim_labels as volume_labels, - aak.key_array - FROM reporting_ocpstoragelineitem_daily lid - JOIN cte_array_agg_keys aak - ON 1=1 - WHERE lid.usage_start >= {{start_date}} - AND lid.usage_start <= {{end_date}} - AND lid.cluster_id = {{cluster_id}} - AND ( - lid.persistentvolume_labels ?| aak.key_array - OR lid.persistentvolumeclaim_labels ?| aak.key_array - ) - ) AS lid, - jsonb_each_text(lid.volume_labels) AS labels - WHERE key = ANY (key_array) - GROUP BY id - ) SELECT uuid_generate_v4() as uuid, li.report_period_id, li.cluster_id, @@ -38,7 +10,7 @@ CREATE TEMPORARY TABLE reporting_ocpstoragelineitem_daily_summary_{{uuid | sqlsa li.persistentvolumeclaim, li.persistentvolume, li.storageclass, - coalesce(fvl.volume_labels, '{}'::jsonb) as volume_labels, + li.persistentvolume_labels || li.persistentvolumeclaim_labels as volume_labels, max(li.persistentvolumeclaim_capacity_bytes) * POWER(2, -30) as persistentvolumeclaim_capacity_gigabyte, sum(li.persistentvolumeclaim_capacity_byte_seconds) / 86400 * @@ -54,8 +26,6 @@ CREATE TEMPORARY TABLE reporting_ocpstoragelineitem_daily_summary_{{uuid | sqlsa * POWER(2, -30) as persistentvolumeclaim_usage_gigabyte_months, ab.provider_id as source_uuid FROM {{schema | sqlsafe}}.reporting_ocpstoragelineitem_daily AS li - LEFT JOIN cte_filtered_volume_labels AS fvl - ON li.id = fvl.id LEFT JOIN {{schema | sqlsafe}}.reporting_ocpusagereportperiod as ab ON li.cluster_id = ab.cluster_id WHERE usage_start >= {{start_date}} @@ -68,7 +38,7 @@ CREATE TEMPORARY TABLE reporting_ocpstoragelineitem_daily_summary_{{uuid | sqlsa li.usage_end, li.namespace, li.node, - fvl.volume_labels, + li.persistentvolume_labels || li.persistentvolumeclaim_labels, li.persistentvolume, li.persistentvolumeclaim, li.storageclass, diff --git a/koku/masu/database/sql/reporting_ocpstoragevolumelabel_summary.sql b/koku/masu/database/sql/reporting_ocpstoragevolumelabel_summary.sql index b788b44750..8cfd30fd9a 100644 --- a/koku/masu/database/sql/reporting_ocpstoragevolumelabel_summary.sql +++ b/koku/masu/database/sql/reporting_ocpstoragevolumelabel_summary.sql @@ -4,10 +4,11 @@ WITH cte_tag_value(key, value, report_period_id, namespace) AS ( li.report_period_id, li.namespace, li.node - FROM {{schema | sqlsafe}}.reporting_ocpstoragelineitem_daily AS li, - jsonb_each_text(li.persistentvolume_labels || li.persistentvolumeclaim_labels) labels + FROM {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary AS li, + jsonb_each_text(li.volume_labels) labels + WHERE li.data_source = 'Storage' {% if report_periods %} - WHERE li.report_period_id IN ( + AND li.report_period_id IN ( {%- for report_period_id in report_period_ids -%} {{report_period_id}}{% if not loop.last %},{% endif %} {%- endfor -%} diff --git a/koku/masu/database/sql/reporting_ocpusagelineitem_daily_summary.sql b/koku/masu/database/sql/reporting_ocpusagelineitem_daily_summary.sql index 6be4913e6e..fb9d9eb53a 100644 --- a/koku/masu/database/sql/reporting_ocpusagelineitem_daily_summary.sql +++ b/koku/masu/database/sql/reporting_ocpusagelineitem_daily_summary.sql @@ -1,28 +1,5 @@ -- Place our query in a temporary table CREATE TEMPORARY TABLE reporting_ocpusagelineitem_daily_summary_{{uuid | sqlsafe}} AS ( - WITH cte_array_agg_keys AS ( - SELECT array_agg(key) as key_array - FROM {{schema | sqlsafe}}.reporting_ocpenabledtagkeys - ), - cte_filtered_pod_labels AS ( - SELECT id, - jsonb_object_agg(key,value) as pod_labels - FROM ( - SELECT lid.id, - lid.pod_labels, - aak.key_array - FROM {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily lid - JOIN cte_array_agg_keys aak - ON 1=1 - WHERE lid.usage_start >= {{start_date}} - AND lid.usage_start <= {{end_date}} - AND lid.cluster_id = {{cluster_id}} - AND lid.pod_labels ?| aak.key_array - ) AS lid, - jsonb_each_text(lid.pod_labels) AS labels - WHERE key = ANY (key_array) - GROUP BY id - ) SELECT uuid_generate_v4() as uuid, li.report_period_id, li.cluster_id, @@ -32,7 +9,7 @@ CREATE TEMPORARY TABLE reporting_ocpusagelineitem_daily_summary_{{uuid | sqlsafe max(li.resource_id) as resource_id, li.usage_start, li.usage_end, - coalesce(fpl.pod_labels, '{}'::jsonb) as pod_labels, + li.pod_labels, sum(li.pod_usage_cpu_core_seconds) / 3600 as pod_usage_cpu_core_hours, sum(li.pod_request_cpu_core_seconds) / 3600 as pod_request_cpu_core_hours, sum(li.pod_limit_cpu_core_seconds) / 3600 as pod_limit_cpu_core_hours, @@ -48,8 +25,6 @@ CREATE TEMPORARY TABLE reporting_ocpusagelineitem_daily_summary_{{uuid | sqlsafe ab.provider_id as source_uuid, '{"cpu": 0.000000000, "memory": 0.000000000, "storage": 0.000000000}'::jsonb as infrastructure_usage_cost FROM {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily AS li - LEFT JOIN cte_filtered_pod_labels AS fpl - ON li.id = fpl.id LEFT JOIN {{schema | sqlsafe}}.reporting_ocpusagereportperiod as ab ON li.cluster_id = ab.cluster_id WHERE usage_start >= {{start_date}} @@ -62,7 +37,7 @@ CREATE TEMPORARY TABLE reporting_ocpusagelineitem_daily_summary_{{uuid | sqlsafe li.usage_end, li.namespace, li.node, - fpl.pod_labels, + li.pod_labels, ab.provider_id ) ; diff --git a/koku/masu/database/sql/reporting_ocpusagelineitem_daily_summary_update_enabled_tags.sql b/koku/masu/database/sql/reporting_ocpusagelineitem_daily_summary_update_enabled_tags.sql new file mode 100644 index 0000000000..e20821b56b --- /dev/null +++ b/koku/masu/database/sql/reporting_ocpusagelineitem_daily_summary_update_enabled_tags.sql @@ -0,0 +1,86 @@ +WITH cte_array_agg_keys AS ( + SELECT array_agg(key) as key_array + FROM {{schema | sqlsafe}}.reporting_ocpenabledtagkeys +), +cte_filtered_pod_labels AS ( + SELECT uuid, + jsonb_object_agg(key,value) as pod_labels + FROM ( + SELECT lids.uuid, + lids.pod_labels as ocp_tags, + aak.key_array + FROM {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary lids + JOIN cte_array_agg_keys aak + ON 1=1 + WHERE lids.pod_labels ?| aak.key_array + AND lids.usage_start >= date({{start_date}}) + AND lids.usage_start <= date({{end_date}}) + {% if bill_ids %} + AND lids.cost_entry_bill_id IN ( + {%- for bill_id in bill_ids -%} + {{bill_id}}{% if not loop.last %},{% endif %} + {%- endfor -%}) + {% endif %} + ) AS lids, + jsonb_each_text(lids.ocp_tags) AS labels + WHERE key = ANY (key_array) + GROUP BY lids.uuid +), +cte_filtered_volume_labels AS ( + SELECT uuid, + jsonb_object_agg(key,value) as volume_labels + FROM ( + SELECT lids.uuid, + lids.volume_labels as ocp_tags, + aak.key_array + FROM {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary lids + JOIN cte_array_agg_keys aak + ON 1=1 + WHERE lids.volume_labels ?| aak.key_array + AND lids.usage_start >= date({{start_date}}) + AND lids.usage_start <= date({{end_date}}) + {% if bill_ids %} + AND lids.cost_entry_bill_id IN ( + {%- for bill_id in bill_ids -%} + {{bill_id}}{% if not loop.last %},{% endif %} + {%- endfor -%}) + {% endif %} + ) AS lids, + jsonb_each_text(lids.ocp_tags) AS labels + WHERE key = ANY (key_array) + GROUP BY lids.uuid +), +cte_joined_tags AS ( + SELECT f.uuid, + CASE WHEN f.pod_labels IS NOT NULL + THEN f.pod_labels + ELSE '{}'::jsonb + END AS pod_labels, + CASE WHEN f.volume_labels IS NOT NULL + THEN f.volume_labels + ELSE '{}'::jsonb + END AS volume_labels + FROM ( + SELECT lids.uuid, + fpl.pod_labels, + fvl.volume_labels + FROM {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary AS lids + LEFT JOIN cte_filtered_pod_labels AS fpl + ON lids.uuid = fpl.uuid + LEFT JOIN cte_filtered_volume_labels AS fvl + ON lids.uuid = fvl.uuid + WHERE lids.usage_start >= date({{start_date}}) + AND lids.usage_start <= date({{end_date}}) + {% if bill_ids %} + AND lids.cost_entry_bill_id IN ( + {%- for bill_id in bill_ids -%} + {{bill_id}}{% if not loop.last %},{% endif %} + {%- endfor -%}) + {% endif %} + ) AS f +) +UPDATE {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary AS lids + SET pod_labels = jt.pod_labels, + volume_labels = jt.volume_labels +FROM cte_joined_tags AS jt +WHERE lids.uuid = jt.uuid diff --git a/koku/masu/database/sql/reporting_ocpusagepodlabel_summary.sql b/koku/masu/database/sql/reporting_ocpusagepodlabel_summary.sql index 1223cb261c..2556456615 100644 --- a/koku/masu/database/sql/reporting_ocpusagepodlabel_summary.sql +++ b/koku/masu/database/sql/reporting_ocpusagepodlabel_summary.sql @@ -4,14 +4,15 @@ WITH cte_tag_value(key, value, report_period_id, namespace) AS ( li.report_period_id, li.namespace, li.node - FROM {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily AS li, + FROM {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary AS li, jsonb_each_text(li.pod_labels) labels + WHERE li.data_source = 'Pod' {% if report_periods %} - WHERE li.report_period_id IN ( - {%- for report_period_id in report_period_ids -%} - {{report_period_id}}{% if not loop.last %},{% endif %} - {%- endfor -%} - ) + AND li.report_period_id IN ( + {%- for report_period_id in report_period_ids -%} + {{report_period_id}}{% if not loop.last %},{% endif %} + {%- endfor -%} + ) {% endif %} GROUP BY key, value, li.report_period_id, li.namespace, li.node ), diff --git a/koku/masu/processor/ocp/ocp_cloud_parquet_summary_updater.py b/koku/masu/processor/ocp/ocp_cloud_parquet_summary_updater.py index f77df8265a..085a02a172 100644 --- a/koku/masu/processor/ocp/ocp_cloud_parquet_summary_updater.py +++ b/koku/masu/processor/ocp/ocp_cloud_parquet_summary_updater.py @@ -28,7 +28,6 @@ from masu.processor.ocp.ocp_cloud_summary_updater import OCPCloudReportSummaryUpdater from masu.util.aws.common import get_bills_from_provider as aws_get_bills_from_provider from masu.util.azure.common import get_bills_from_provider as azure_get_bills_from_provider -from masu.util.common import date_range_pair from masu.util.ocp.common import get_cluster_id_from_provider LOG = logging.getLogger(__name__) @@ -85,15 +84,14 @@ def update_aws_summary_tables(self, openshift_provider_uuid, aws_provider_uuid, def update_azure_summary_tables(self, openshift_provider_uuid, azure_provider_uuid, start_date, end_date): """Update operations specifically for OpenShift on Azure.""" if isinstance(start_date, str): - start_date = parser.parse(start_date) + start_date = parser.parse(start_date).date() if isinstance(end_date, str): - end_date = parser.parse(end_date) + end_date = parser.parse(end_date).date() cluster_id = get_cluster_id_from_provider(openshift_provider_uuid) azure_bills = azure_get_bills_from_provider(azure_provider_uuid, self._schema, start_date, end_date) - azure_bill_ids = [] with schema_context(self._schema): - azure_bill_ids = [str(bill.id) for bill in azure_bills] + current_azure_bill_id = azure_bills.first().id if azure_bills else None with CostModelDBAccessor(self._schema, azure_provider_uuid) as cost_model_accessor: markup = cost_model_accessor.markup @@ -101,19 +99,26 @@ def update_azure_summary_tables(self, openshift_provider_uuid, azure_provider_uu # OpenShift on Azure with AzureReportDBAccessor(self._schema) as accessor: - for start, end in date_range_pair(start_date, end_date): - LOG.info( - "Updating OpenShift on Azure summary table for " - "\n\tSchema: %s \n\tProvider: %s \n\tDates: %s - %s" - "\n\tCluster ID: %s, Azure Bill IDs: %s", - self._schema, - self._provider.uuid, - start, - end, - cluster_id, - str(azure_bill_ids), - ) - accessor.populate_ocp_on_azure_cost_daily_summary(start, end, cluster_id, azure_bill_ids, markup_value) + LOG.info( + "Updating OpenShift on Azure summary table for " + "\n\tSchema: %s \n\tProvider: %s \n\tDates: %s - %s" + "\n\tCluster ID: %s, Azure Bill ID: %s", + self._schema, + self._provider.uuid, + start_date, + end_date, + cluster_id, + current_azure_bill_id, + ) + accessor.populate_ocp_on_azure_cost_daily_summary_presto( + start_date, + end_date, + openshift_provider_uuid, + azure_provider_uuid, + cluster_id, + current_azure_bill_id, + markup_value, + ) accessor.populate_ocp_on_azure_tags_summary_table() with OCPReportDBAccessor(self._schema) as accessor: diff --git a/koku/masu/processor/ocp/ocp_report_parquet_processor.py b/koku/masu/processor/ocp/ocp_report_parquet_processor.py index e4e1caa935..4ef8c1f69b 100644 --- a/koku/masu/processor/ocp/ocp_report_parquet_processor.py +++ b/koku/masu/processor/ocp/ocp_report_parquet_processor.py @@ -15,6 +15,8 @@ # along with this program. If not, see . # """Processor for OCP Parquet files.""" +import datetime + import ciso8601 import pytz from tenant_schemas.utils import schema_context @@ -72,6 +74,8 @@ def create_bill(self, bill_date): report_period_start = ciso8601.parse_datetime(start_date).replace(hour=0, minute=0, tzinfo=pytz.UTC) report_period_end = ciso8601.parse_datetime(end_date).replace(hour=0, minute=0, tzinfo=pytz.UTC) + # Make end date first of next month + report_period_end = report_period_end + datetime.timedelta(days=1) provider = self._get_provider() diff --git a/koku/masu/processor/ocp/ocp_report_parquet_summary_updater.py b/koku/masu/processor/ocp/ocp_report_parquet_summary_updater.py index 675d4a2b28..8a68ae66ae 100644 --- a/koku/masu/processor/ocp/ocp_report_parquet_summary_updater.py +++ b/koku/masu/processor/ocp/ocp_report_parquet_summary_updater.py @@ -129,10 +129,11 @@ def update_summary_tables(self, start_date, end_date): LOG.info( "Updating OpenShift label summary tables for \n\tSchema: %s " "\n\tReport Period IDs: %s", self._schema, - report_period.id, + report_period_ids, ) accessor.populate_pod_label_summary_table(report_period_ids) accessor.populate_volume_label_summary_table(report_period_ids) + accessor.update_line_item_daily_summary_with_enabled_tags(start_date, end_date, report_period_ids) LOG.info("Updating OpenShift report periods") for period in report_periods: diff --git a/koku/masu/processor/ocp/ocp_report_summary_updater.py b/koku/masu/processor/ocp/ocp_report_summary_updater.py index 5f63a41262..cf0c9e84c7 100644 --- a/koku/masu/processor/ocp/ocp_report_summary_updater.py +++ b/koku/masu/processor/ocp/ocp_report_summary_updater.py @@ -106,6 +106,7 @@ def update_summary_tables(self, start_date, end_date): accessor.populate_storage_line_item_daily_summary_table(start, end, self._cluster_id) accessor.populate_pod_label_summary_table(report_period_ids) accessor.populate_volume_label_summary_table(report_period_ids) + accessor.update_line_item_daily_summary_with_enabled_tags(start_date, end_date, report_period_ids) for period in report_periods: if period.summary_data_creation_datetime is None: diff --git a/koku/masu/test/database/test_azure_report_db_accessor.py b/koku/masu/test/database/test_azure_report_db_accessor.py index 4ce906f1e9..30c401b185 100644 --- a/koku/masu/test/database/test_azure_report_db_accessor.py +++ b/koku/masu/test/database/test_azure_report_db_accessor.py @@ -315,6 +315,32 @@ def test_populate_line_item_daily_summary_table_presto(self, mock_presto): ) mock_presto.assert_called() + @patch("masu.database.azure_report_db_accessor.AzureReportDBAccessor._execute_presto_multipart_sql_query") + def test_populate_ocp_on_azure_cost_daily_summary_presto(self, mock_presto): + """Test that we construst our SQL and query using Presto.""" + dh = DateHelper() + start_date = dh.this_month_start.date() + end_date = dh.this_month_end.date() + + bills = self.accessor.get_cost_entry_bills_query_by_provider(self.azure_provider.uuid) + with schema_context(self.schema): + current_bill_id = bills.first().id if bills else None + + with CostModelDBAccessor(self.schema, self.aws_provider.uuid) as cost_model_accessor: + markup = cost_model_accessor.markup + markup_value = float(markup.get("value", 0)) / 100 + + self.accessor.populate_ocp_on_azure_cost_daily_summary_presto( + start_date, + end_date, + self.ocp_provider_uuid, + self.azure_provider_uuid, + self.ocp_cluster_id, + current_bill_id, + markup_value, + ) + mock_presto.assert_called() + def test_populate_enabled_tag_keys(self): """Test that enabled tag keys are populated.""" dh = DateHelper() diff --git a/koku/masu/test/database/test_ocp_report_db_accessor.py b/koku/masu/test/database/test_ocp_report_db_accessor.py index 6c0595b0f5..b66e6042b5 100644 --- a/koku/masu/test/database/test_ocp_report_db_accessor.py +++ b/koku/masu/test/database/test_ocp_report_db_accessor.py @@ -37,8 +37,11 @@ from masu.test import MasuTestCase from masu.test.database.helpers import ReportObjectCreator from masu.util.common import month_date_range_tuple +from reporting.models import OCPEnabledTagKeys +from reporting.models import OCPStorageVolumeLabelSummary from reporting.models import OCPUsageLineItem from reporting.models import OCPUsageLineItemDailySummary +from reporting.models import OCPUsagePodLabelSummary from reporting.models import OCPUsageReport from reporting.models import OCPUsageReportPeriod from reporting_common import REPORT_COLUMN_MAP @@ -2100,3 +2103,50 @@ def test_populate_tag_based_default_usage_costs(self): # noqa: C901 cost_fields[0] ) self.assertAlmostEqual(actual_diff, expected_diff) + + def test_update_line_item_daily_summary_with_enabled_tags(self): + """Test that we filter the daily summary table's tags with only enabled tags.""" + dh = DateHelper() + start_date = dh.this_month_start.date() + end_date = dh.this_month_end.date() + + report_periods = self.accessor.report_periods_for_provider_uuid(self.ocp_provider_uuid, start_date) + + with schema_context(self.schema): + OCPUsagePodLabelSummary.objects.all().delete() + OCPStorageVolumeLabelSummary.objects.all().delete() + key_to_keep = OCPEnabledTagKeys.objects.first() + OCPEnabledTagKeys.objects.exclude(key=key_to_keep.key).delete() + report_period_ids = [report_period.id for report_period in report_periods] + self.accessor.update_line_item_daily_summary_with_enabled_tags(start_date, end_date, report_period_ids) + tags = ( + OCPUsageLineItemDailySummary.objects.filter( + usage_start__gte=start_date, report_period_id__in=report_period_ids + ) + .values_list("pod_labels") + .distinct() + ) + + for tag in tags: + tag_dict = tag[0] + tag_keys = list(tag_dict.keys()) + if tag_keys: + self.assertEqual([key_to_keep.key], tag_keys) + else: + self.assertEqual([], tag_keys) + + tags = ( + OCPUsageLineItemDailySummary.objects.filter( + usage_start__gte=start_date, report_period_id__in=report_period_ids + ) + .values_list("volume_labels") + .distinct() + ) + + for tag in tags: + tag_dict = tag[0] + tag_keys = list(tag_dict.keys()) + if tag_keys: + self.assertEqual([key_to_keep.key], tag_keys) + else: + self.assertEqual([], tag_keys) diff --git a/koku/masu/test/processor/ocp/test_ocp_cloud_parquet_report_summary_updater.py b/koku/masu/test/processor/ocp/test_ocp_cloud_parquet_report_summary_updater.py index 3d1900ddbb..0aa30c02b1 100644 --- a/koku/masu/test/processor/ocp/test_ocp_cloud_parquet_report_summary_updater.py +++ b/koku/masu/test/processor/ocp/test_ocp_cloud_parquet_report_summary_updater.py @@ -20,14 +20,8 @@ from unittest.mock import Mock from unittest.mock import patch -from django.db.models import Sum - from api.models import Provider from api.utils import DateHelper -from masu.database import AZURE_REPORT_TABLE_MAP -from masu.database import OCP_REPORT_TABLE_MAP -from masu.database.azure_report_db_accessor import AzureReportDBAccessor -from masu.database.ocp_report_db_accessor import OCPReportDBAccessor from masu.database.provider_db_accessor import ProviderDBAccessor from masu.processor.ocp.ocp_cloud_parquet_summary_updater import OCPCloudParquetReportSummaryUpdater from masu.test import MasuTestCase @@ -87,41 +81,84 @@ def test_update_aws_summary_tables(self, mock_utility, mock_ocp, mock_ocp_on_aws decimal.Decimal(0), ) - @patch("masu.database.cost_model_db_accessor.CostModelDBAccessor.cost_model") - def test_update_azure_summary_tables(self, mock_cost_model): - """Test that summary tables are updated correctly.""" - markup = {"value": 10, "unit": "percent"} - mock_cost_model.markup = markup - - start_date = self.dh.this_month_start - end_date = self.dh.this_month_end - - updater = OCPCloudParquetReportSummaryUpdater(schema=self.schema, provider=self.azure_provider, manifest=None) - - updater.update_summary_tables(start_date, end_date) - - summary_table_name = AZURE_REPORT_TABLE_MAP["ocp_on_azure_daily_summary"] - with AzureReportDBAccessor(self.schema) as azure_accessor: - query = azure_accessor._get_db_obj_query(summary_table_name).filter( - cost_entry_bill__billing_period_start=start_date - ) - markup_cost = query.aggregate(Sum("markup_cost"))["markup_cost__sum"] - pretax_cost = query.aggregate(Sum("pretax_cost"))["pretax_cost__sum"] + @patch("masu.processor.ocp.ocp_cloud_updater_base.OCPCloudUpdaterBase.get_infra_map") + @patch( + "masu.processor.ocp.ocp_cloud_parquet_summary_updater.AzureReportDBAccessor.populate_ocp_on_azure_tags_summary_table" # noqa: E501 + ) + @patch( + "masu.processor.ocp.ocp_cloud_parquet_summary_updater.AzureReportDBAccessor.populate_ocp_on_azure_cost_daily_summary_presto" # noqa: E501 + ) + @patch("masu.database.ocp_report_db_accessor.OCPReportDBAccessor.update_summary_infrastructure_cost") + @patch("masu.processor.ocp.ocp_cloud_parquet_summary_updater.azure_get_bills_from_provider") + def test_update_azure_summary_tables(self, mock_utility, mock_ocp, mock_ocp_on_azure, mock_tag_summary, mock_map): + """Test that summary tables are properly run for an OCP provider.""" + fake_bills = Mock() + first = Mock() + bill_id = 1 + first.return_value.id = bill_id + fake_bills.first = first + mock_utility.return_value = fake_bills + start_date = self.dh.today.date() + end_date = start_date + datetime.timedelta(days=1) - self.assertAlmostEqual(markup_cost, pretax_cost * decimal.Decimal(markup.get("value") / 100), places=5) + with ProviderDBAccessor(self.azure_provider_uuid) as provider_accessor: + provider = provider_accessor.get_provider() + with ProviderDBAccessor(self.ocp_test_provider_uuid) as provider_accessor: + credentials = provider_accessor.get_credentials() + cluster_id = credentials.get("cluster_id") + mock_map.return_value = {self.ocp_test_provider_uuid: (self.azure_provider_uuid, Provider.PROVIDER_AZURE)} + updater = OCPCloudParquetReportSummaryUpdater(schema="acct10001", provider=provider, manifest=None) + updater.update_azure_summary_tables( + self.ocp_test_provider_uuid, self.azure_test_provider_uuid, start_date, end_date + ) + mock_ocp_on_azure.assert_called_with( + start_date, + end_date, + self.ocp_test_provider_uuid, + self.azure_test_provider_uuid, + cluster_id, + bill_id, + decimal.Decimal(0), + ) - daily_summary_table_name = OCP_REPORT_TABLE_MAP["line_item_daily_summary"] - with OCPReportDBAccessor(self.schema) as ocp_accessor: - query = ocp_accessor._get_db_obj_query(daily_summary_table_name).filter( - report_period__provider=self.ocp_on_azure_ocp_provider, - report_period__report_period_start=self.dh.this_month_start, - ) - infra_cost = query.aggregate(Sum("infrastructure_raw_cost"))["infrastructure_raw_cost__sum"] - project_infra_cost = query.aggregate(Sum("infrastructure_project_raw_cost"))[ - "infrastructure_project_raw_cost__sum" - ] + @patch("masu.processor.ocp.ocp_cloud_updater_base.OCPCloudUpdaterBase.get_infra_map") + @patch( + "masu.processor.ocp.ocp_cloud_parquet_summary_updater.AzureReportDBAccessor.populate_ocp_on_azure_tags_summary_table" # noqa: E501 + ) + @patch( + "masu.processor.ocp.ocp_cloud_parquet_summary_updater.AzureReportDBAccessor.populate_ocp_on_azure_cost_daily_summary_presto" # noqa: E501 + ) + @patch("masu.database.ocp_report_db_accessor.OCPReportDBAccessor.update_summary_infrastructure_cost") + @patch("masu.processor.ocp.ocp_cloud_parquet_summary_updater.azure_get_bills_from_provider") + def test_update_azure_summary_tables_with_string_dates( + self, mock_utility, mock_ocp, mock_ocp_on_azure, mock_tag_summary, mock_map + ): + """Test that summary tables are properly run for an OCP provider.""" + fake_bills = Mock() + first = Mock() + bill_id = 1 + first.return_value.id = bill_id + fake_bills.first = first + mock_utility.return_value = fake_bills + start_date = self.dh.today.date() + end_date = start_date + datetime.timedelta(days=1) - self.assertIsNotNone(infra_cost) - self.assertIsNotNone(project_infra_cost) - self.assertNotEqual(infra_cost, decimal.Decimal(0)) - self.assertNotEqual(project_infra_cost, decimal.Decimal(0)) + with ProviderDBAccessor(self.azure_provider_uuid) as provider_accessor: + provider = provider_accessor.get_provider() + with ProviderDBAccessor(self.ocp_test_provider_uuid) as provider_accessor: + credentials = provider_accessor.get_credentials() + cluster_id = credentials.get("cluster_id") + mock_map.return_value = {self.ocp_test_provider_uuid: (self.azure_provider_uuid, Provider.PROVIDER_AZURE)} + updater = OCPCloudParquetReportSummaryUpdater(schema="acct10001", provider=provider, manifest=None) + updater.update_azure_summary_tables( + self.ocp_test_provider_uuid, self.azure_test_provider_uuid, str(start_date), str(end_date) + ) + mock_ocp_on_azure.assert_called_with( + start_date, + end_date, + self.ocp_test_provider_uuid, + self.azure_test_provider_uuid, + cluster_id, + bill_id, + decimal.Decimal(0), + ) diff --git a/koku/masu/test/processor/ocp/test_ocp_report_parquet_processor.py b/koku/masu/test/processor/ocp/test_ocp_report_parquet_processor.py index a386f98653..436e261bfb 100644 --- a/koku/masu/test/processor/ocp/test_ocp_report_parquet_processor.py +++ b/koku/masu/test/processor/ocp/test_ocp_report_parquet_processor.py @@ -15,6 +15,8 @@ # along with this program. If not, see . # """Test the OCPReportParquetProcessor.""" +import datetime + from tenant_schemas.utils import schema_context from api.utils import DateHelper @@ -54,7 +56,7 @@ def test_create_bill(self): """Test that a bill is created in the Postgres database.""" bill_date = DateHelper().next_month_start start_date = bill_date - end_date = DateHelper().next_month_end + end_date = DateHelper().next_month_end + datetime.timedelta(days=1) self.processor.create_bill(bill_date.date()) with schema_context(self.schema): @@ -70,7 +72,7 @@ def test_create_bill_with_string_arg(self): """Test that a bill is created in the Postgres database.""" bill_date = DateHelper().next_month_start start_date = bill_date - end_date = DateHelper().next_month_end + end_date = DateHelper().next_month_end + datetime.timedelta(days=1) self.processor.create_bill(str(bill_date.date())) diff --git a/koku/reporting/migrations/0162_auto_20201120_1901.py b/koku/reporting/migrations/0162_auto_20201120_1901.py new file mode 100644 index 0000000000..17742fb67c --- /dev/null +++ b/koku/reporting/migrations/0162_auto_20201120_1901.py @@ -0,0 +1,12 @@ +# Generated by Django 3.1.3 on 2020-11-20 19:01 +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [("reporting", "0161_auto_20210118_2113")] + + operations = [ + migrations.RemoveField(model_name="ocpazurecostlineitemdailysummary", name="pod"), + migrations.RemoveField(model_name="ocpazurecostlineitemprojectdailysummary", name="pod"), + ] diff --git a/koku/reporting/provider/azure/openshift/models.py b/koku/reporting/provider/azure/openshift/models.py index 8d2806e9db..ad0352e26a 100644 --- a/koku/reporting/provider/azure/openshift/models.py +++ b/koku/reporting/provider/azure/openshift/models.py @@ -67,8 +67,6 @@ class Meta: # Kubernetes objects by convention have a max name length of 253 chars namespace = ArrayField(models.CharField(max_length=253, null=False)) - pod = ArrayField(models.CharField(max_length=253, null=False)) - node = models.CharField(max_length=253, null=True) resource_id = models.CharField(max_length=253, null=True) @@ -149,8 +147,6 @@ class Meta: # Kubernetes objects by convention have a max name length of 253 chars namespace = models.CharField(max_length=253, null=False) - pod = models.CharField(max_length=253, null=True) - node = models.CharField(max_length=253, null=True) pod_labels = JSONField(null=True) diff --git a/scripts/nise_ymls/ocp_on_aws/ocp_static_data.yml b/scripts/nise_ymls/ocp_on_aws/ocp_static_data.yml index 400c67492a..7b12c2fe45 100644 --- a/scripts/nise_ymls/ocp_on_aws/ocp_static_data.yml +++ b/scripts/nise_ymls/ocp_on_aws/ocp_static_data.yml @@ -6,6 +6,7 @@ generators: nodes: - node: node_name: aws_compute1 + node_labels: label_nodeclass:compute cpu_cores: 2 memory_gig: 8 resource_id: 55555555 @@ -56,6 +57,7 @@ generators: capacity_gig: 20 - node: node_name: aws_compute2 + node_labels: label_nodeclass:compute cpu_cores: 2 memory_gig: 8 resource_id: 55555556 @@ -112,6 +114,7 @@ generators: capacity_gig: 20 - node: node_name: aws_master + node_labels: label_nodeclass:master cpu_cores: 2 memory_gig: 8 resource_id: 55555558 diff --git a/scripts/nise_ymls/ocp_on_azure/ocp_static_data.yml b/scripts/nise_ymls/ocp_on_azure/ocp_static_data.yml index dd921803c3..569b6fc41c 100644 --- a/scripts/nise_ymls/ocp_on_azure/ocp_static_data.yml +++ b/scripts/nise_ymls/ocp_on_azure/ocp_static_data.yml @@ -6,6 +6,7 @@ generators: nodes: - node: node_name: azure_compute1 + node_labels: label_nodeclass:compute cpu_cores: 2 memory_gig: 8 resource_id: 99999995 @@ -56,6 +57,7 @@ generators: capacity_gig: 20 - node: node_name: azure_compute2 + node_labels: label_nodeclass:compute cpu_cores: 2 memory_gig: 8 resource_id: 99999996 @@ -84,6 +86,7 @@ generators: capacity_gig: 20 - node: node_name: azure_compute3 + node_labels: label_nodeclass:compute cpu_cores: 2 memory_gig: 8 resource_id: 99999997 @@ -112,6 +115,7 @@ generators: capacity_gig: 20 - node: node_name: azure_master + node_labels: label_nodeclass:master cpu_cores: 2 memory_gig: 8 resource_id: 99999998 From 54bcc70745ac6250836a74df78dd9b7db5e63d81 Mon Sep 17 00:00:00 2001 From: Cody Myers Date: Tue, 19 Jan 2021 15:54:47 -0500 Subject: [PATCH 07/17] COST-895: GCP costs report generates 500 error when group by tags. (#2598) * COST-895: GCP costs report generates 500 error when group by tags. Co-authored-by: Douglas Curtis --- koku/api/report/gcp/query_handler.py | 5 +++-- koku/api/report/gcp/serializers.py | 1 + .../report/test/gcp/tests_gcp_query_handler.py | 17 ++++++++++++++++- 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/koku/api/report/gcp/query_handler.py b/koku/api/report/gcp/query_handler.py index 94fe1d17f3..a80f13e74a 100644 --- a/koku/api/report/gcp/query_handler.py +++ b/koku/api/report/gcp/query_handler.py @@ -83,8 +83,9 @@ def annotations(self): annotations[q_param] = Concat(db_field, Value("")) group_by_fields = self._mapper.provider_map.get("group_by_annotations") for group_key in self._get_group_by(): - for q_param, db_field in group_by_fields[group_key].items(): - annotations[q_param] = Concat(db_field, Value("")) + if group_by_fields.get(group_key): + for q_param, db_field in group_by_fields[group_key].items(): + annotations[q_param] = Concat(db_field, Value("")) return annotations def _format_query_response(self): diff --git a/koku/api/report/gcp/serializers.py b/koku/api/report/gcp/serializers.py index 340e148687..46a111292b 100644 --- a/koku/api/report/gcp/serializers.py +++ b/koku/api/report/gcp/serializers.py @@ -72,6 +72,7 @@ class GCPQueryParamSerializer(ParamSerializer): delta = serializers.ChoiceField(choices=DELTA_CHOICES, required=False) units = serializers.CharField(required=False) + check_tags = serializers.BooleanField(required=False, default=False) def __init__(self, *args, **kwargs): """Initialize the GCP query param serializer.""" diff --git a/koku/api/report/test/gcp/tests_gcp_query_handler.py b/koku/api/report/test/gcp/tests_gcp_query_handler.py index 1b1fb73851..bc1c02eec8 100644 --- a/koku/api/report/test/gcp/tests_gcp_query_handler.py +++ b/koku/api/report/test/gcp/tests_gcp_query_handler.py @@ -38,6 +38,7 @@ from reporting.models import GCPCostSummaryByAccount from reporting.models import GCPCostSummaryByProject from reporting.models import GCPCostSummaryByService +from reporting.models import GCPTagsSummary LOG = logging.getLogger(__name__) @@ -177,7 +178,7 @@ def test_query_group_by_partial_filtered_service(self): self.assertIsNotNone(query_output.get("total")) total = query_output.get("total") aggregates = handler._mapper.report_type_map.get("aggregates") - filters = {**self.this_month_filter, "service_id__icontains": service} + filters = {**self.this_month_filter, "service_alias__icontains": service} for filt in handler._mapper.report_type_map.get("filter"): if filt: qf = QueryFilter(**filt) @@ -960,3 +961,17 @@ def test_execute_query_annotate(self): for data_item in data: month_val = data_item.get("date") self.assertEqual(month_val, cmonth_str) + + def test_execute_query_group_by_tag(self): + """Test execute_query for current month on monthly breakdown by service.""" + with tenant_context(self.tenant): + tag_object = GCPTagsSummary.objects.first() + key = tag_object.key + value = tag_object.values[0] + url = f"?filter[time_scope_units]=month&filter[time_scope_value]=-1&filter[resolution]=monthly&group_by[tag:{key}]={value}" # noqa: E501 + query_params = self.mocked_query_params(url, GCPCostView) + handler = GCPReportQueryHandler(query_params) + query_output = handler.execute_query() + data = query_output.get("data") + self.assertIsNotNone(data) + self.assertIsNotNone(query_output.get("total")) From 42dc8157dda3fb1c76188b065a8419be97927592 Mon Sep 17 00:00:00 2001 From: Brett Lentz Date: Tue, 19 Jan 2021 16:09:35 -0500 Subject: [PATCH 08/17] COST-854: ensure constant used for both prediction and confidence intervals. (#2596) * COST-854: ensure constant used for both prediction and confidence intervals. * make forecast unit tests more predictable --- koku/forecast/forecast.py | 18 +- koku/forecast/test/tests_forecast.py | 252 +++++++++++++-------------- 2 files changed, 137 insertions(+), 133 deletions(-) diff --git a/koku/forecast/forecast.py b/koku/forecast/forecast.py index 225a2be28e..7f18cc88d7 100644 --- a/koku/forecast/forecast.py +++ b/koku/forecast/forecast.py @@ -184,10 +184,10 @@ def _predict(self, data): pred_x = [i for i in range(X[-1] + 1, X[-1] + 1 + self.forecast_days_required)] # run the forecast - results = self._run_forecast(X, Y) + results = self._run_forecast(X, Y, to_predict=pred_x) result_dict = {} - for i, value in enumerate(results.prediction(pred_x)): + for i, value in enumerate(results.prediction): if i < len(results.confidence_lower): lower = results.confidence_lower[i] else: @@ -325,12 +325,13 @@ def format_result(self, results): response.append(dikt) return response - def _run_forecast(self, x, y): + def _run_forecast(self, x, y, to_predict=None): """Apply the forecast model. Args: x (list) a list of exogenous variables y (list) a list of endogenous variables + to_predict (list) a list of exogenous variables used in the forecast results Note: both x and y MUST be the same number of elements @@ -344,9 +345,10 @@ def _run_forecast(self, x, y): (list) P-values """ x = sm.add_constant(x) + to_predict = sm.add_constant(to_predict) model = sm.OLS(y, x) results = model.fit() - return LinearForecastResult(results, exog=x) + return LinearForecastResult(results, exog=to_predict) def _uniquify_qset(self, qset, field="total_cost"): """Take a QuerySet list, sum costs within the same day, and arrange it into a list of tuples. @@ -401,6 +403,7 @@ def __init__(self, regression_result, exog=None): regression_result (RegressionResult) the results of a statsmodels regression exog (array-like) exogenous variables for points to predict """ + self._exog = exog self._regression_result = regression_result self._std_err, self._conf_lower, self._conf_upper = wls_prediction_std(regression_result, exog=exog) @@ -412,7 +415,8 @@ def __init__(self, regression_result, exog=None): LOG.debug("Forecast interval lower-bound: %s", self.confidence_lower) LOG.debug("Forecast interval upper-bound: %s", self.confidence_upper) - def prediction(self, to_predict=None): + @property + def prediction(self): """Forecast prediction. Args: @@ -424,8 +428,8 @@ def prediction(self, to_predict=None): # predict() returns the same number of elements as the number of input observations prediction = [] try: - if to_predict: - prediction = self._regression_result.predict(sm.add_constant(to_predict)) + if self._exog is not None: + prediction = self._regression_result.predict(sm.add_constant(self._exog)) else: prediction = self._regression_result.predict() except ValueError as exc: diff --git a/koku/forecast/test/tests_forecast.py b/koku/forecast/test/tests_forecast.py index b9f0b5897a..d2188067ed 100644 --- a/koku/forecast/test/tests_forecast.py +++ b/koku/forecast/test/tests_forecast.py @@ -172,14 +172,12 @@ def test_predict_flat(self): expected = [] for n in range(0, 10): - # the test data needs to include some jitter to avoid - # division-by-zero in the underlying dot-product maths. expected.append( { "usage_start": (dh.this_month_start + timedelta(days=n)).date(), - "total_cost": 5 + random.random(), - "infrastructure_cost": 3 + random.random(), - "supplementary_cost": 2 + random.random(), + "total_cost": 5 + (0.01 * n), + "infrastructure_cost": 3 + (0.01 * n), + "supplementary_cost": 2 + (0.01 * n), } ) mock_qset = MockQuerySet(expected) @@ -199,19 +197,21 @@ def test_predict_flat(self): for result in results: for val in result.get("values", []): - self.assertIsInstance(val.get("date"), date) - - for item, cost in [ - (val.get("cost"), 5), - (val.get("infrastructure"), 3), - (val.get("supplementary"), 2), - ]: - self.assertAlmostEqual(float(item.get("total").get("value")), cost, delta=2.2) - self.assertAlmostEqual(float(item.get("confidence_max").get("value")), cost, delta=2.2) - self.assertAlmostEqual(float(item.get("confidence_min").get("value")), cost, delta=2.2) - self.assertAlmostEqual(float(item.get("rsquared").get("value")), 1, delta=2.2) - for pval in item.get("pvalues").get("value"): - self.assertGreaterEqual(float(pval), 0) + with self.subTest(values=val): + self.assertIsInstance(val.get("date"), date) + + for item, cost, delta in [ + (val.get("cost"), 5, 1), + (val.get("infrastructure"), 3, 1), + (val.get("supplementary"), 2, 1), + ]: + with self.subTest(cost=cost, delta=delta, item=item): + self.assertAlmostEqual(float(item.get("total").get("value")), cost, delta=delta) + self.assertAlmostEqual(float(item.get("confidence_max").get("value")), cost, delta=delta) + self.assertAlmostEqual(float(item.get("confidence_min").get("value")), cost, delta=delta) + self.assertGreater(float(item.get("rsquared").get("value")), 0) + for pval in item.get("pvalues").get("value"): + self.assertGreaterEqual(float(pval), 0) def test_predict_increasing(self): """Test that predict() returns expected values for increasing costs.""" @@ -431,14 +431,12 @@ def test_predict_flat(self): expected = [] for n in range(0, 10): - # the test data needs to include some jitter to avoid - # division-by-zero in the underlying dot-product maths. expected.append( { "usage_start": (dh.this_month_start + timedelta(days=n)).date(), - "total_cost": 5 + random.random(), - "infrastructure_cost": 3 + random.random(), - "supplementary_cost": 2 + random.random(), + "total_cost": 5 + (0.01 * n), + "infrastructure_cost": 3 + (0.01 * n), + "supplementary_cost": 2 + (0.01 * n), } ) mock_qset = MockQuerySet(expected) @@ -458,19 +456,21 @@ def test_predict_flat(self): for result in results: for val in result.get("values", []): - self.assertIsInstance(val.get("date"), date) - - for item, cost in [ - (val.get("cost"), 5), - (val.get("infrastructure"), 3), - (val.get("supplementary"), 2), - ]: - self.assertAlmostEqual(float(item.get("total").get("value")), cost, delta=2.2) - self.assertAlmostEqual(float(item.get("confidence_max").get("value")), cost, delta=2.2) - self.assertAlmostEqual(float(item.get("confidence_min").get("value")), cost, delta=2.2) - self.assertAlmostEqual(float(item.get("rsquared").get("value")), 1, delta=2.2) - for pval in item.get("pvalues").get("value"): - self.assertGreaterEqual(float(pval), 0) + with self.subTest(values=val): + self.assertIsInstance(val.get("date"), date) + + for item, cost, delta in [ + (val.get("cost"), 5, 1), + (val.get("infrastructure"), 3, 1), + (val.get("supplementary"), 2, 1), + ]: + with self.subTest(cost=cost, delta=delta, item=item): + self.assertAlmostEqual(float(item.get("total").get("value")), cost, delta=delta) + self.assertAlmostEqual(float(item.get("confidence_max").get("value")), cost, delta=delta) + self.assertAlmostEqual(float(item.get("confidence_min").get("value")), cost, delta=delta) + self.assertGreater(float(item.get("rsquared").get("value")), 0) + for pval in item.get("pvalues").get("value"): + self.assertGreaterEqual(float(pval), 0) class GCPForecastTest(IamTestCase): @@ -482,14 +482,12 @@ def test_predict_flat(self): expected = [] for n in range(0, 10): - # the test data needs to include some jitter to avoid - # division-by-zero in the underlying dot-product maths. expected.append( { "usage_start": (dh.this_month_start + timedelta(days=n)).date(), - "total_cost": 5 + random.random(), - "infrastructure_cost": 3 + random.random(), - "supplementary_cost": 2 + random.random(), + "total_cost": 5 + (0.01 * n), + "infrastructure_cost": 3 + (0.01 * n), + "supplementary_cost": 2 + (0.01 * n), } ) mock_qset = MockQuerySet(expected) @@ -509,19 +507,21 @@ def test_predict_flat(self): for result in results: for val in result.get("values", []): - self.assertIsInstance(val.get("date"), date) - - for item, cost in [ - (val.get("cost"), 5), - (val.get("infrastructure"), 3), - (val.get("supplementary"), 2), - ]: - self.assertAlmostEqual(float(item.get("total").get("value")), cost, delta=2.2) - self.assertAlmostEqual(float(item.get("confidence_max").get("value")), cost, delta=2.2) - self.assertAlmostEqual(float(item.get("confidence_min").get("value")), cost, delta=2.2) - self.assertAlmostEqual(float(item.get("rsquared").get("value")), 1, delta=2.2) - for pval in item.get("pvalues").get("value"): - self.assertGreaterEqual(float(pval), 0) + with self.subTest(values=val): + self.assertIsInstance(val.get("date"), date) + + for item, cost, delta in [ + (val.get("cost"), 5, 1), + (val.get("infrastructure"), 3, 1), + (val.get("supplementary"), 2, 1), + ]: + with self.subTest(cost=cost, delta=delta, item=item): + self.assertAlmostEqual(float(item.get("total").get("value")), cost, delta=delta) + self.assertAlmostEqual(float(item.get("confidence_max").get("value")), cost, delta=delta) + self.assertAlmostEqual(float(item.get("confidence_min").get("value")), cost, delta=delta) + self.assertGreater(float(item.get("rsquared").get("value")), 0) + for pval in item.get("pvalues").get("value"): + self.assertGreaterEqual(float(pval), 0) def test_cost_summary_table(self): """Test that we select a valid table or view.""" @@ -560,14 +560,12 @@ def test_predict_flat(self): expected = [] for n in range(0, 10): - # the test data needs to include some jitter to avoid - # division-by-zero in the underlying dot-product maths. expected.append( { "usage_start": (dh.this_month_start + timedelta(days=n)).date(), - "total_cost": 5 + random.random(), - "infrastructure_cost": 3 + random.random(), - "supplementary_cost": 2 + random.random(), + "total_cost": 5 + (0.01 * n), + "infrastructure_cost": 3 + (0.01 * n), + "supplementary_cost": 2 + (0.01 * n), } ) mock_qset = MockQuerySet(expected) @@ -587,19 +585,21 @@ def test_predict_flat(self): for result in results: for val in result.get("values", []): - self.assertIsInstance(val.get("date"), date) - - for item, cost in [ - (val.get("cost"), 5), - (val.get("infrastructure"), 3), - (val.get("supplementary"), 2), - ]: - self.assertAlmostEqual(float(item.get("total").get("value")), cost, delta=2.2) - self.assertAlmostEqual(float(item.get("confidence_max").get("value")), cost, delta=2.2) - self.assertAlmostEqual(float(item.get("confidence_min").get("value")), cost, delta=2.2) - self.assertAlmostEqual(float(item.get("rsquared").get("value")), 1, delta=2.2) - for pval in item.get("pvalues").get("value"): - self.assertGreaterEqual(float(pval), 0) + with self.subTest(values=val): + self.assertIsInstance(val.get("date"), date) + + for item, cost, delta in [ + (val.get("cost"), 5, 1), + (val.get("infrastructure"), 3, 1), + (val.get("supplementary"), 2, 1), + ]: + with self.subTest(cost=cost, delta=delta, item=item): + self.assertAlmostEqual(float(item.get("total").get("value")), cost, delta=delta) + self.assertAlmostEqual(float(item.get("confidence_max").get("value")), cost, delta=delta) + self.assertAlmostEqual(float(item.get("confidence_min").get("value")), cost, delta=delta) + self.assertGreater(float(item.get("rsquared").get("value")), 0) + for pval in item.get("pvalues").get("value"): + self.assertGreaterEqual(float(pval), 0) def test_cost_summary_table(self): """Test that we select a valid table or view.""" @@ -643,14 +643,12 @@ def test_predict_flat(self): expected = [] for n in range(0, 10): - # the test data needs to include some jitter to avoid - # division-by-zero in the underlying dot-product maths. expected.append( { "usage_start": (dh.this_month_start + timedelta(days=n)).date(), - "total_cost": 5 + random.random(), - "infrastructure_cost": 3 + random.random(), - "supplementary_cost": 2 + random.random(), + "total_cost": 5 + (0.01 * n), + "infrastructure_cost": 3 + (0.01 * n), + "supplementary_cost": 2 + (0.01 * n), } ) mock_qset = MockQuerySet(expected) @@ -670,19 +668,21 @@ def test_predict_flat(self): for result in results: for val in result.get("values", []): - self.assertIsInstance(val.get("date"), date) - - for item, cost in [ - (val.get("cost"), 5), - (val.get("infrastructure"), 3), - (val.get("supplementary"), 2), - ]: - self.assertAlmostEqual(float(item.get("total").get("value")), cost, delta=2.2) - self.assertAlmostEqual(float(item.get("confidence_max").get("value")), cost, delta=2.2) - self.assertAlmostEqual(float(item.get("confidence_min").get("value")), cost, delta=2.2) - self.assertAlmostEqual(float(item.get("rsquared").get("value")), 1, delta=2.2) - for pval in item.get("pvalues").get("value"): - self.assertGreaterEqual(float(pval), 0) + with self.subTest(values=val): + self.assertIsInstance(val.get("date"), date) + + for item, cost, delta in [ + (val.get("cost"), 5, 1), + (val.get("infrastructure"), 3, 1), + (val.get("supplementary"), 2, 1), + ]: + with self.subTest(cost=cost, delta=delta, item=item): + self.assertAlmostEqual(float(item.get("total").get("value")), cost, delta=delta) + self.assertAlmostEqual(float(item.get("confidence_max").get("value")), cost, delta=delta) + self.assertAlmostEqual(float(item.get("confidence_min").get("value")), cost, delta=delta) + self.assertGreater(float(item.get("rsquared").get("value")), 0) + for pval in item.get("pvalues").get("value"): + self.assertGreaterEqual(float(pval), 0) class OCPAWSForecastTest(IamTestCase): @@ -694,14 +694,12 @@ def test_predict_flat(self): expected = [] for n in range(0, 10): - # the test data needs to include some jitter to avoid - # division-by-zero in the underlying dot-product maths. expected.append( { "usage_start": (dh.this_month_start + timedelta(days=n)).date(), - "total_cost": 5 + random.random(), - "infrastructure_cost": 3 + random.random(), - "supplementary_cost": 2 + random.random(), + "total_cost": 5 + (0.01 * n), + "infrastructure_cost": 3 + (0.01 * n), + "supplementary_cost": 2 + (0.01 * n), } ) mock_qset = MockQuerySet(expected) @@ -721,19 +719,21 @@ def test_predict_flat(self): for result in results: for val in result.get("values", []): - self.assertIsInstance(val.get("date"), date) - - for item, cost in [ - (val.get("cost"), 5), - (val.get("infrastructure"), 3), - (val.get("supplementary"), 2), - ]: - self.assertAlmostEqual(float(item.get("total").get("value")), cost, delta=2.2) - self.assertAlmostEqual(float(item.get("confidence_max").get("value")), cost, delta=2.2) - self.assertAlmostEqual(float(item.get("confidence_min").get("value")), cost, delta=2.2) - self.assertAlmostEqual(float(item.get("rsquared").get("value")), 1, delta=2.2) - for pval in item.get("pvalues").get("value"): - self.assertGreaterEqual(float(pval), 0) + with self.subTest(values=val): + self.assertIsInstance(val.get("date"), date) + + for item, cost, delta in [ + (val.get("cost"), 5, 1), + (val.get("infrastructure"), 3, 1), + (val.get("supplementary"), 2, 1), + ]: + with self.subTest(cost=cost, delta=delta, item=item): + self.assertAlmostEqual(float(item.get("total").get("value")), cost, delta=delta) + self.assertAlmostEqual(float(item.get("confidence_max").get("value")), cost, delta=delta) + self.assertAlmostEqual(float(item.get("confidence_min").get("value")), cost, delta=delta) + self.assertGreater(float(item.get("rsquared").get("value")), 0) + for pval in item.get("pvalues").get("value"): + self.assertGreaterEqual(float(pval), 0) class OCPAzureForecastTest(IamTestCase): @@ -745,14 +745,12 @@ def test_predict_flat(self): expected = [] for n in range(0, 10): - # the test data needs to include some jitter to avoid - # division-by-zero in the underlying dot-product maths. expected.append( { "usage_start": (dh.this_month_start + timedelta(days=n)).date(), - "total_cost": 5 + random.random(), - "infrastructure_cost": 3 + random.random(), - "supplementary_cost": 2 + random.random(), + "total_cost": 5 + (0.01 * n), + "infrastructure_cost": 3 + (0.01 * n), + "supplementary_cost": 2 + (0.01 * n), } ) mock_qset = MockQuerySet(expected) @@ -773,19 +771,21 @@ def test_predict_flat(self): for result in results: for val in result.get("values", []): - self.assertIsInstance(val.get("date"), date) - - for item, cost in [ - (val.get("cost"), 5), - (val.get("infrastructure"), 3), - (val.get("supplementary"), 2), - ]: - self.assertAlmostEqual(float(item.get("total").get("value")), cost, delta=2.2) - self.assertAlmostEqual(float(item.get("confidence_max").get("value")), cost, delta=2.2) - self.assertAlmostEqual(float(item.get("confidence_min").get("value")), cost, delta=2.2) - self.assertAlmostEqual(float(item.get("rsquared").get("value")), 1, delta=2.2) - for pval in item.get("pvalues").get("value"): - self.assertGreaterEqual(float(pval), 0) + with self.subTest(values=val): + self.assertIsInstance(val.get("date"), date) + + for item, cost, delta in [ + (val.get("cost"), 5, 1), + (val.get("infrastructure"), 3, 1), + (val.get("supplementary"), 2, 1), + ]: + with self.subTest(cost=cost, delta=delta, item=item): + self.assertAlmostEqual(float(item.get("total").get("value")), cost, delta=delta) + self.assertAlmostEqual(float(item.get("confidence_max").get("value")), cost, delta=delta) + self.assertAlmostEqual(float(item.get("confidence_min").get("value")), cost, delta=delta) + self.assertGreater(float(item.get("rsquared").get("value")), 0) + for pval in item.get("pvalues").get("value"): + self.assertGreaterEqual(float(pval), 0) class LinearForecastResultTest(IamTestCase): From a6b10335c5b13a4e08a623296981042a3ce9b879 Mon Sep 17 00:00:00 2001 From: Douglas Curtis Date: Tue, 19 Jan 2021 21:11:25 -0500 Subject: [PATCH 09/17] tech-debt: Removing sources endpoint references (#2588) --- .../0034_remove_sources_endpoint_id.py | 9 + koku/api/provider/models.py | 3 - koku/sources/kafka_listener.py | 3 - koku/sources/sources_http_client.py | 165 +++++++---------- koku/sources/storage.py | 17 -- koku/sources/test/test_kafka_listener.py | 65 +++---- koku/sources/test/test_sources_http_client.py | 167 +----------------- koku/sources/test/test_storage.py | 34 ---- 8 files changed, 93 insertions(+), 370 deletions(-) create mode 100644 koku/api/migrations/0034_remove_sources_endpoint_id.py diff --git a/koku/api/migrations/0034_remove_sources_endpoint_id.py b/koku/api/migrations/0034_remove_sources_endpoint_id.py new file mode 100644 index 0000000000..62ba8a769a --- /dev/null +++ b/koku/api/migrations/0034_remove_sources_endpoint_id.py @@ -0,0 +1,9 @@ +# Generated by Django 3.1.3 on 2021-01-15 15:32 +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [("api", "0033_sources_name_text")] + + operations = [migrations.RemoveField(model_name="sources", name="endpoint_id")] diff --git a/koku/api/provider/models.py b/koku/api/provider/models.py index 85eed55ec0..ddcb9029c7 100644 --- a/koku/api/provider/models.py +++ b/koku/api/provider/models.py @@ -190,9 +190,6 @@ class Meta: # Kafka message offset for Platform-Sources kafka stream offset = models.IntegerField(null=False) - # Endpoint ID. Identifier to connect source to authentication. - endpoint_id = models.IntegerField(null=True) - # Koku Specific data. # Customer Account ID account_id = models.TextField(null=True) diff --git a/koku/sources/kafka_listener.py b/koku/sources/kafka_listener.py index 29af541d00..4626747197 100644 --- a/koku/sources/kafka_listener.py +++ b/koku/sources/kafka_listener.py @@ -105,7 +105,6 @@ def __init__(self, auth_header, source_id): self.source_type_id = int(details.get("source_type_id")) self.source_uuid = details.get("uid") self.source_type_name = sources_network.get_source_type_name(self.source_type_id) - self.endpoint_id = sources_network.get_endpoint_id() self.source_type = SOURCE_PROVIDER_MAP.get(self.source_type_name) @@ -366,8 +365,6 @@ def cost_mgmt_msg_filter(msg_data): if event_type in (KAFKA_AUTHENTICATION_CREATE, KAFKA_AUTHENTICATION_UPDATE): sources_network = SourcesHTTPClient(auth_header) - if msg_data.get("resource_type") == "Endpoint": - source_id = sources_network.get_source_id_from_endpoint_id(msg_data.get("resource_id")) if msg_data.get("resource_type") == "Application": source_id = sources_network.get_source_id_from_applications_id(msg_data.get("resource_id")) msg_data["source_id"] = source_id diff --git a/koku/sources/sources_http_client.py b/koku/sources/sources_http_client.py index c9fd40cc60..d82598019b 100644 --- a/koku/sources/sources_http_client.py +++ b/koku/sources/sources_http_client.py @@ -77,38 +77,6 @@ def get_source_details(self): response = r.json() return response - def get_endpoint_id(self): - """Get Sources Endpoint ID from Source ID.""" - endpoint_url = f"{self._base_url}/endpoints?filter[source_id]={self._source_id}" - r = self._get_network_response(endpoint_url, self._identity_header, "Unable to endpoint ID") - if r.status_code == 404: - raise SourceNotFoundError(f"Status Code: {r.status_code}") - elif r.status_code != 200: - raise SourcesHTTPClientError("Status Code: ", r.status_code) - endpoint_response = r.json() - - endpoint_id = None - if endpoint_response.get("data"): - endpoint_id = endpoint_response.get("data")[0].get("id") - - return endpoint_id - - def get_source_id_from_endpoint_id(self, resource_id): - """Get Source ID from Sources Endpoint ID.""" - endpoint_url = f"{self._base_url}/endpoints?filter[id]={resource_id}" - r = self._get_network_response(endpoint_url, self._identity_header, "Unable to source ID from endpoint ID") - if r.status_code == 404: - raise SourceNotFoundError(f"Status Code: {r.status_code}") - elif r.status_code != 200: - raise SourcesHTTPClientError("Status Code: ", r.status_code) - endpoint_response = r.json() - - source_id = None - if endpoint_response.get("data"): - source_id = endpoint_response.get("data")[0].get("source_id") - - return source_id - def get_source_id_from_applications_id(self, resource_id): """Get Source ID from Sources Authentications ID.""" authentication_url = f"{self._base_url}/applications?filter[id]={resource_id}" @@ -178,37 +146,32 @@ def get_source_type_name(self, type_id): def get_aws_credentials(self): """Get the roleARN from Sources Authentication service.""" - urls = [ - "{}/applications?filter[source_id]={}".format(self._base_url, str(self._source_id)), - "{}/endpoints?filter[source_id]={}".format(self._base_url, str(self._source_id)), - ] - - for url in urls: - r = self._get_network_response(url, self._identity_header, "Unable to AWS RoleARN") - endpoint_response = r.json() - if endpoint_response.get("data"): - resource_id = endpoint_response.get("data")[0].get("id") - else: - continue - - authentications_str = "{}/authentications?[authtype]=arn&[resource_id]={}" - authentications_url = authentications_str.format(self._base_url, str(resource_id)) - r = self._get_network_response(authentications_url, self._identity_header, "Unable to AWS RoleARN") - authentications_response = r.json() - if not authentications_response.get("data"): - continue - authentications_id = authentications_response.get("data")[0].get("id") - - authentications_internal_url = "{}/authentications/{}?expose_encrypted_attribute[]=password".format( - self._internal_url, str(authentications_id) - ) - r = self._get_network_response( - authentications_internal_url, self._identity_header, "Unable to AWS RoleARN" - ) - authentications_internal_response = r.json() - password = authentications_internal_response.get("password") - if password: - return {"role_arn": password} + url = "{}/applications?filter[source_id]={}".format(self._base_url, str(self._source_id)) + + r = self._get_network_response(url, self._identity_header, "Unable to AWS RoleARN") + endpoint_response = r.json() + if endpoint_response.get("data"): + resource_id = endpoint_response.get("data")[0].get("id") + else: + raise SourcesHTTPClientError(f"Unable to get AWS roleARN for Source: {self._source_id}") + + authentications_str = "{}/authentications?[authtype]=arn&[resource_id]={}" + authentications_url = authentications_str.format(self._base_url, str(resource_id)) + r = self._get_network_response(authentications_url, self._identity_header, "Unable to AWS RoleARN") + authentications_response = r.json() + if not authentications_response.get("data"): + raise SourcesHTTPClientError(f"Unable to get AWS roleARN for Source: {self._source_id}") + + authentications_id = authentications_response.get("data")[0].get("id") + + authentications_internal_url = "{}/authentications/{}?expose_encrypted_attribute[]=password".format( + self._internal_url, str(authentications_id) + ) + r = self._get_network_response(authentications_internal_url, self._identity_header, "Unable to AWS RoleARN") + authentications_internal_response = r.json() + password = authentications_internal_response.get("password") + if password: + return {"role_arn": password} raise SourcesHTTPClientError(f"Unable to get AWS roleARN for Source: {self._source_id}") @@ -246,47 +209,41 @@ def get_gcp_credentials(self): def get_azure_credentials(self): """Get the Azure Credentials from Sources Authentication service.""" - urls = [ - "{}/applications?filter[source_id]={}".format(self._base_url, str(self._source_id)), - "{}/endpoints?filter[source_id]={}".format(self._base_url, str(self._source_id)), - ] - - for url in urls: - r = self._get_network_response(url, self._identity_header, "Unable to get Azure credentials") - endpoint_response = r.json() - if endpoint_response.get("data"): - resource_id = endpoint_response.get("data")[0].get("id") - else: - continue - - authentications_url = ( - f"{self._base_url}/authentications?" - f"[authtype]=tenant_id_client_id_client_secret&[resource_id]={str(resource_id)}" - ) - r = self._get_network_response( - authentications_url, self._identity_header, "Unable to get Azure credentials" - ) - authentications_response = r.json() - if not authentications_response.get("data"): - continue - data_dict = authentications_response.get("data")[0] - authentications_id = data_dict.get("id") - - authentications_internal_url = ( - f"{self._internal_url}/authentications/{str(authentications_id)}?expose_encrypted_attribute[]=password" - ) - r = self._get_network_response( - authentications_internal_url, self._identity_header, "Unable to get Azure credentials" - ) - authentications_internal_response = r.json() - password = authentications_internal_response.get("password") - - if password and data_dict: - return { - "client_id": data_dict.get("username"), - "client_secret": password, - "tenant_id": data_dict.get("extra").get("azure").get("tenant_id"), - } + url = "{}/applications?filter[source_id]={}".format(self._base_url, str(self._source_id)) + + r = self._get_network_response(url, self._identity_header, "Unable to get Azure credentials") + endpoint_response = r.json() + if endpoint_response.get("data"): + resource_id = endpoint_response.get("data")[0].get("id") + else: + raise SourcesHTTPClientError(f"Unable to get Azure credentials for Source: {self._source_id}") + + authentications_url = ( + f"{self._base_url}/authentications?" + f"[authtype]=tenant_id_client_id_client_secret&[resource_id]={str(resource_id)}" + ) + r = self._get_network_response(authentications_url, self._identity_header, "Unable to get Azure credentials") + authentications_response = r.json() + if not authentications_response.get("data"): + raise SourcesHTTPClientError(f"Unable to get Azure credentials for Source: {self._source_id}") + data_dict = authentications_response.get("data")[0] + authentications_id = data_dict.get("id") + + authentications_internal_url = ( + f"{self._internal_url}/authentications/{str(authentications_id)}?expose_encrypted_attribute[]=password" + ) + r = self._get_network_response( + authentications_internal_url, self._identity_header, "Unable to get Azure credentials" + ) + authentications_internal_response = r.json() + password = authentications_internal_response.get("password") + + if password and data_dict: + return { + "client_id": data_dict.get("username"), + "client_secret": password, + "tenant_id": data_dict.get("extra").get("azure").get("tenant_id"), + } raise SourcesHTTPClientError(f"Unable to get Azure credentials for Source: {self._source_id}") diff --git a/koku/sources/storage.py b/koku/sources/storage.py index 22b070c31c..f91dada116 100644 --- a/koku/sources/storage.py +++ b/koku/sources/storage.py @@ -339,20 +339,6 @@ def get_source_type(source_id): return source_type -def get_source_from_endpoint(endpoint_id): - """Get Source ID from Endpoint ID.""" - source_id = None - try: - query = Sources.objects.get(endpoint_id=endpoint_id) - source_id = query.source_id - except Sources.DoesNotExist: - LOG.info(f"Endpoint ID {endpoint_id} not associated with Cost Management") - except (InterfaceError, OperationalError) as error: - LOG.error(f"source.storage.get_source_from_endpoint {type(error).__name__}: {error}") - raise error - return source_id - - def add_provider_sources_auth_info(source_id, authentication): """ Add additional Sources information to a Source database object. @@ -407,9 +393,6 @@ def add_provider_sources_network_info(details, source_id): if source.source_type != details.source_type: source.source_type = details.source_type save_needed = True - if str(source.endpoint_id) != details.endpoint_id: - source.endpoint_id = details.endpoint_id - save_needed = True if save_needed: source.save() diff --git a/koku/sources/test/test_kafka_listener.py b/koku/sources/test/test_kafka_listener.py index 1592d4c3b5..927f218c5d 100644 --- a/koku/sources/test/test_kafka_listener.py +++ b/koku/sources/test/test_kafka_listener.py @@ -573,7 +573,7 @@ def test_sources_network_info_sync_aws(self): json={"data": []}, ) m.get( - f"http://www.sources.com/api/v1.0/endpoints?filter[source_id]={test_source_id}", + f"http://www.sources.com/api/v1.0/applications?filter[source_id]={test_source_id}", status_code=200, json={"data": [{"id": resource_id}]}, ) @@ -688,11 +688,6 @@ def test_sources_network_info_sync_aws_local(self): m.get( f"http://www.sources.com/api/v1.0/applications?filter[source_id]={test_source_id}", status_code=200, - json={"data": []}, - ) - m.get( - f"http://www.sources.com/api/v1.0/endpoints?filter[source_id]={test_source_id}", - status_code=200, json={"data": [{"id": resource_id}]}, ) m.get( @@ -803,11 +798,6 @@ def test_sources_network_info_sync_azure(self): m.get( f"http://www.sources.com/api/v1.0/applications?filter[source_id]={test_source_id}", status_code=200, - json={"data": []}, - ) - m.get( - f"http://www.sources.com/api/v1.0/endpoints?filter[source_id]={test_source_id}", - status_code=200, json={"data": [{"id": resource_id}]}, ) m.get( @@ -875,11 +865,6 @@ def test_sources_network_info_sync_azure_local(self): m.get( f"http://www.sources.com/api/v1.0/applications?filter[source_id]={test_source_id}", status_code=200, - json={"data": []}, - ) - m.get( - f"http://www.sources.com/api/v1.0/endpoints?filter[source_id]={test_source_id}", - status_code=200, json={"data": [{"id": resource_id}]}, ) m.get( @@ -947,7 +932,7 @@ def test_sources_network_info_no_endpoint(self): json={"data": [{"name": mock_source_name}]}, ) m.get( - f"http://www.sources.com/api/v1.0/endpoints?filter[source_id]={test_source_id}", + f"http://www.sources.com/api/v1.0/applications?filter[source_id]={test_source_id}", status_code=200, json={"data": []}, ) @@ -1026,8 +1011,7 @@ def test_process_message_application_unsupported_source_type(self): SourcesHTTPClient, "get_source_details", return_value={"name": "my ansible", "source_type_id": 2} ): with patch.object(SourcesHTTPClient, "get_source_type_name", return_value="ansible-tower"): - with patch.object(SourcesHTTPClient, "get_endpoint_id", return_value=1): - self.assertIsNone(process_message(test_application_id, msg_data)) + self.assertIsNone(process_message(test_application_id, msg_data)) @patch.object(Config, "SOURCES_API_URL", "http://www.sources.com") @patch("sources.kafka_listener.sources_network_info", returns=None) @@ -1053,7 +1037,7 @@ def _expected_authentication_create(msg_data, test, save_auth_info_mock): "value": { "id": 1, "source_id": 1, - "resource_type": "Endpoint", + "resource_type": "Application", "resource_id": "1", "application_type_id": test_application_id, }, @@ -1065,7 +1049,7 @@ def _expected_authentication_create(msg_data, test, save_auth_info_mock): "value": { "id": 1, "source_id": 1, - "resource_type": "Endpoint", + "resource_type": "Application", "resource_id": "1", "application_type_id": test_application_id, }, @@ -1089,7 +1073,7 @@ def _expected_authentication_create(msg_data, test, save_auth_info_mock): "value": { "id": 1, "source_id": 1, - "resource_type": "Endpoint", + "resource_type": "Application", "resource_id": "1", "application_type_id": test_application_id, }, @@ -1101,24 +1085,19 @@ def _expected_authentication_create(msg_data, test, save_auth_info_mock): for test in test_matrix: msg_data = MsgDataGenerator(event_type=test.get("event"), value=test.get("value")).get_data() with patch.object( - SourcesHTTPClient, "get_source_id_from_endpoint_id", return_value=test.get("value").get("source_id") + SourcesHTTPClient, + "get_application_type_is_cost_management", + return_value=test.get("expected_cost_mgmt_match"), ): with patch.object( SourcesHTTPClient, - "get_application_type_is_cost_management", + "get_source_id_from_applications_id", return_value=test.get("expected_cost_mgmt_match"), ): - with patch.object( - SourcesHTTPClient, - "get_source_id_from_applications_id", - return_value=test.get("expected_cost_mgmt_match"), - ): - with patch.object( - SourcesHTTPClient, "get_source_details", return_value={"source_type_id": "1"} - ): - with patch.object(SourcesHTTPClient, "get_source_type_name", return_value="amazon"): - process_message(test_application_id, msg_data) - test.get("expected_fn")(msg_data, test, mock_save_auth_info) + with patch.object(SourcesHTTPClient, "get_source_details", return_value={"source_type_id": "1"}): + with patch.object(SourcesHTTPClient, "get_source_type_name", return_value="amazon"): + process_message(test_application_id, msg_data) + test.get("expected_fn")(msg_data, test, mock_save_auth_info) @patch.object(Config, "SOURCES_API_URL", "http://www.sources.com") @patch("sources.kafka_listener.sources_network_info", returns=None) @@ -1207,7 +1186,7 @@ def _expected_update(test): "value": { "id": 1, "source_id": 1, - "resource_type": "Endpoint", + "resource_type": "Application", "resource_id": "1", "application_type_id": test_application_id, }, @@ -1231,15 +1210,13 @@ def _expected_update(test): test_source.save() msg_data = MsgDataGenerator(event_type=test.get("event"), value=test.get("value")).get_data() with patch.object( - SourcesHTTPClient, "get_source_id_from_endpoint_id", return_value=test.get("value").get("source_id") + SourcesHTTPClient, + "get_application_type_is_cost_management", + return_value=test.get("expected_cost_mgmt_match"), ): - with patch.object( - SourcesHTTPClient, - "get_application_type_is_cost_management", - return_value=test.get("expected_cost_mgmt_match"), - ): - with patch.object(SourcesHTTPClient, "get_source_details", return_value={"source_type_id": "1"}): - with patch.object(SourcesHTTPClient, "get_source_type_name", return_value="amazon"): + with patch.object(SourcesHTTPClient, "get_source_details", return_value={"source_type_id": "1"}): + with patch.object(SourcesHTTPClient, "get_source_type_name", return_value="amazon"): + with patch.object(SourcesHTTPClient, "get_source_id_from_applications_id", return_value=1): process_message(test_application_id, msg_data) test.get("expected_fn")(test) Sources.objects.all().delete() diff --git a/koku/sources/test/test_sources_http_client.py b/koku/sources/test/test_sources_http_client.py index cb2d128dbb..12e1a2f7f5 100644 --- a/koku/sources/test/test_sources_http_client.py +++ b/koku/sources/test/test_sources_http_client.py @@ -178,11 +178,6 @@ def test_get_aws_credentials(self): m.get( f"http://www.sources.com/api/v1.0/applications?filter[source_id]={self.source_id}", status_code=200, - json={"data": []}, - ) - m.get( - f"http://www.sources.com/api/v1.0/endpoints?filter[source_id]={self.source_id}", - status_code=200, json={"data": [{"id": resource_id}]}, ) m.get( @@ -239,11 +234,6 @@ def test_get_aws_credentials_no_auth(self): m.get( f"http://www.sources.com/api/v1.0/applications?filter[source_id]={self.source_id}", status_code=200, - json={"data": []}, - ) - m.get( - f"http://www.sources.com/api/v1.0/endpoints?filter[source_id]={self.source_id}", - status_code=200, json={"data": [{"id": resource_id}]}, ) m.get( @@ -305,11 +295,6 @@ def test_get_gcp_credentials_no_auth(self): status_code=200, json={"data": []}, ) - m.get( - f"http://www.sources.com/api/v1.0/endpoints?filter[source_id]={self.source_id}", - status_code=200, - json={"data": [{"id": resource_id}]}, - ) m.get( (f"http://www.sources.com/api/v1.0/authentications?" f"[authtype]=arn&[resource_id]={resource_id}"), status_code=200, @@ -338,11 +323,6 @@ def test_get_gcp_credentials_no_password(self): status_code=200, json={"data": [{"id": resource_id}]}, ) - m.get( - f"http://www.sources.com/api/v1.0/endpoints?filter[source_id]={self.source_id}", - status_code=200, - json={"data": [{"id": resource_id}]}, - ) m.get( ( f"http://www.sources.com/api/v1.0/authentications?" @@ -373,11 +353,6 @@ def test_get_aws_credentials_no_endpoint(self): status_code=200, json={"data": []}, ) - m.get( - f"http://www.sources.com/api/v1.0/endpoints?filter[source_id]={self.source_id}", - status_code=200, - json={"data": []}, - ) with self.assertRaises(SourcesHTTPClientError): client.get_aws_credentials() @@ -389,11 +364,7 @@ def test_get_aws_credentials_connection_error(self): with requests_mock.mock() as m: m.get( f"http://www.sources.com/api/v1.0/applications?filter[source_id]={self.source_id}", - status_code=200, - json={"data": []}, - ) - m.get( - f"http://www.sources.com/api/v1.0/endpoints?filter[source_id]={self.source_id}", exc=RequestException + exc=RequestException, ) with self.assertRaises(SourcesHTTPClientError): client.get_aws_credentials() @@ -418,11 +389,6 @@ def test_get_azure_credentials(self): m.get( f"http://www.sources.com/api/v1.0/applications?filter[source_id]={self.source_id}", status_code=200, - json={"data": []}, - ) - m.get( - f"http://www.sources.com/api/v1.0/endpoints?filter[source_id]={self.source_id}", - status_code=200, json={"data": [{"id": resource_id}]}, ) m.get( @@ -504,11 +470,6 @@ def test_get_azure_credentials_no_auth(self): m.get( f"http://www.sources.com/api/v1.0/applications?filter[source_id]={self.source_id}", status_code=200, - json={"data": []}, - ) - m.get( - f"http://www.sources.com/api/v1.0/endpoints?filter[source_id]={self.source_id}", - status_code=200, json={"data": [{"id": resource_id}]}, ) m.get( @@ -537,11 +498,7 @@ def test_get_azure_credentials_connection_error(self): with requests_mock.mock() as m: m.get( f"http://www.sources.com/api/v1.0/applications?filter[source_id]={self.source_id}", - status_code=200, - json={"data": []}, - ) - m.get( - f"http://www.sources.com/api/v1.0/endpoints?filter[source_id]={self.source_id}", exc=RequestException + exc=RequestException, ) with self.assertRaises(SourcesHTTPClientError): client.get_azure_credentials() @@ -556,129 +513,9 @@ def test_get_azure_credentials_no_endpoint(self): status_code=200, json={"data": []}, ) - m.get( - f"http://www.sources.com/api/v1.0/endpoints?filter[source_id]={self.source_id}", - status_code=200, - json={"data": []}, - ) with self.assertRaises(SourcesHTTPClientError): client.get_azure_credentials() - @patch.object(Config, "SOURCES_API_URL", "http://www.sources.com") - def test_get_endpoint_id(self): - """Test to get endpoint_id from Source_id.""" - resource_id = 2 - source_id = 3 - - client = SourcesHTTPClient(auth_header=Config.SOURCES_FAKE_HEADER, source_id=source_id) - with requests_mock.mock() as m: - m.get( - f"http://www.sources.com/api/v1.0/endpoints?filter[source_id]={source_id}", - status_code=200, - json={"data": [{"id": resource_id}]}, - ) - response = client.get_endpoint_id() - self.assertEqual(response, resource_id) - - @patch.object(Config, "SOURCES_API_URL", "http://www.sources.com") - def test_get_endpoint_id_no_data(self): - """Test to get endpoint_id from Source_id with no data in response.""" - source_id = 3 - - client = SourcesHTTPClient(auth_header=Config.SOURCES_FAKE_HEADER, source_id=source_id) - with requests_mock.mock() as m: - m.get( - f"http://www.sources.com/api/v1.0/endpoints?filter[source_id]={source_id}", - status_code=200, - json={"data": []}, - ) - self.assertIsNone(client.get_endpoint_id()) - - @patch.object(Config, "SOURCES_API_URL", "http://www.sources.com") - def test_get_endpoint_id_misconfigured(self): - """Test to get endpoint_id from Source_id with route not found.""" - resource_id = 2 - source_id = 3 - - client = SourcesHTTPClient(auth_header=Config.SOURCES_FAKE_HEADER, source_id=source_id) - with requests_mock.mock() as m: - m.get( - f"http://www.sources.com/api/v1.0/endpoints?filter[source_id]={source_id}", - status_code=404, - json={"data": [{"id": resource_id}]}, - ) - with self.assertRaises(SourceNotFoundError): - client.get_endpoint_id() - - @patch.object(Config, "SOURCES_API_URL", "http://www.sources.com") - def test_get_endpoint_ids_connection_error(self): - """Test to get endpoint id with connection error.""" - client = SourcesHTTPClient(auth_header=Config.SOURCES_FAKE_HEADER, source_id=self.source_id) - with requests_mock.mock() as m: - m.get( - f"http://www.sources.com/api/v1.0/endpoints?filter[source_id]={self.source_id}", exc=RequestException - ) - with self.assertRaises(SourcesHTTPClientError): - client.get_endpoint_id() - - @patch.object(Config, "SOURCES_API_URL", "http://www.sources.com") - def test_get_source_id_from_endpoint_id(self): - """Test to get source_id from resource_id.""" - resource_id = 2 - source_id = 3 - - client = SourcesHTTPClient(auth_header=Config.SOURCES_FAKE_HEADER, source_id=source_id) - with requests_mock.mock() as m: - m.get( - f"http://www.sources.com/api/v1.0/endpoints?filter[id]={resource_id}", - status_code=200, - json={"data": [{"source_id": source_id}]}, - ) - response = client.get_source_id_from_endpoint_id(resource_id) - self.assertEqual(response, source_id) - - @patch.object(Config, "SOURCES_API_URL", "http://www.sources.com") - def test_get_source_id_from_endpoint_id_no_data(self): - """Test to get source_id from resource_id with no data in response.""" - resource_id = 2 - source_id = 3 - - client = SourcesHTTPClient(auth_header=Config.SOURCES_FAKE_HEADER, source_id=source_id) - with requests_mock.mock() as m: - m.get( - f"http://www.sources.com/api/v1.0/endpoints?filter[id]={resource_id}", - status_code=200, - json={"data": []}, - ) - self.assertIsNone(client.get_source_id_from_endpoint_id(resource_id)) - - @patch.object(Config, "SOURCES_API_URL", "http://www.sources.com") - def test_get_source_id_from_endpoint_id_misconfigured(self): - """Test to get source_id from resource_id with route not found.""" - resource_id = 2 - source_id = 3 - - client = SourcesHTTPClient(auth_header=Config.SOURCES_FAKE_HEADER, source_id=source_id) - with requests_mock.mock() as m: - m.get( - f"http://www.sources.com/api/v1.0/endpoints?filter[id]={resource_id}", - status_code=404, - json={"data": [{"id": resource_id}]}, - ) - with self.assertRaises(SourceNotFoundError): - client.get_source_id_from_endpoint_id(resource_id) - - @patch.object(Config, "SOURCES_API_URL", "http://www.sources.com") - def test_get_source_id_from_endpoint_id_connection_error(self): - """Test to get source ID from endpoint ID with connection error.""" - resource_id = 2 - - client = SourcesHTTPClient(auth_header=Config.SOURCES_FAKE_HEADER, source_id=self.source_id) - with requests_mock.mock() as m: - m.get(f"http://www.sources.com/api/v1.0/endpoints?filter[id]={resource_id}", exc=RequestException) - with self.assertRaises(SourcesHTTPClientError): - client.get_source_id_from_endpoint_id(resource_id) - @patch.object(Config, "SOURCES_API_URL", "http://www.sources.com") def test_get_source_id_from_applications_id(self): """Test to get source_id from application resource_id.""" diff --git a/koku/sources/test/test_storage.py b/koku/sources/test/test_storage.py index 6768d338f9..df85abea8d 100644 --- a/koku/sources/test/test_storage.py +++ b/koku/sources/test/test_storage.py @@ -184,7 +184,6 @@ def test_add_provider_network_info(self): test_source = Sources.objects.get(source_id=self.test_source_id) self.assertEqual(test_source.name, test_name) self.assertEqual(test_source.source_type, source_type) - self.assertEqual(test_source.endpoint_id, endpoint_id) self.assertEqual(str(test_source.source_uuid), source_uuid) def test_add_provider_network_info_not_found(self): @@ -346,40 +345,14 @@ def test_get_source_type(self): self.assertEquals(response, Provider.PROVIDER_OCP) self.assertEquals(storage.get_source_type(test_source_id + 1), None) - def test_get_source_from_endpoint(self): - """Test to source from endpoint id.""" - test_source_id = 3 - test_endpoint_id = 4 - aws_obj = Sources( - source_id=test_source_id, - auth_header=self.test_header, - offset=3, - endpoint_id=test_endpoint_id, - source_type=Provider.PROVIDER_AWS, - name="Test AWS Source", - authentication={"role_arn": "arn:test"}, - billing_source={"bucket": "test-bucket"}, - ) - aws_obj.save() - - response = storage.get_source_from_endpoint(test_endpoint_id) - self.assertEquals(response, test_source_id) - self.assertEquals(storage.get_source_from_endpoint(test_source_id + 10), None) - with patch("sources.storage.Sources.objects") as mock_objects: - mock_objects.get.side_effect = InterfaceError("Test exception") - with self.assertRaises(InterfaceError): - storage.get_source_from_endpoint(test_endpoint_id) - def test_add_provider_sources_auth_info(self): """Test to add authentication to a source.""" test_source_id = 3 - test_endpoint_id = 4 test_authentication = {"role_arn": "arn:test"} aws_obj = Sources( source_id=test_source_id, auth_header=self.test_header, offset=3, - endpoint_id=test_endpoint_id, source_type=Provider.PROVIDER_AWS, name="Test AWS Source", billing_source={"bucket": "test-bucket"}, @@ -393,13 +366,11 @@ def test_add_provider_sources_auth_info(self): def test_add_provider_sources_auth_info_with_sub_id(self): """Test to add authentication to a source with subscription_id.""" test_source_id = 3 - test_endpoint_id = 4 test_authentication = {"credentials": {"client_id": "new-client-id"}} azure_obj = Sources( source_id=test_source_id, auth_header=self.test_header, offset=3, - endpoint_id=test_endpoint_id, source_type=Provider.PROVIDER_AZURE, name="Test AZURE Source", authentication={"credentials": {"subscription_id": "orig-sub-id", "client_id": "test-client-id"}}, @@ -419,7 +390,6 @@ def test_enqueue_source_delete(self): source_id=test_source_id, auth_header=self.test_header, offset=test_offset, - endpoint_id=4, source_type=Provider.PROVIDER_AWS, name="Test AWS Source", billing_source={"bucket": "test-bucket"}, @@ -449,7 +419,6 @@ def test_enqueue_source_delete_in_pending(self): source_id=test_source_id, auth_header=self.test_header, offset=test_offset, - endpoint_id=4, source_type=Provider.PROVIDER_AWS, name="Test AWS Source", billing_source={"bucket": "test-bucket"}, @@ -511,7 +480,6 @@ def test_enqueue_source_update(self): pending_delete=test.get("pending_delete"), pending_update=test.get("pending_update"), offset=3, - endpoint_id=4, source_type=Provider.PROVIDER_AWS, name="Test AWS Source", billing_source={"bucket": "test-bucket"}, @@ -544,7 +512,6 @@ def test_clear_update_flag(self): koku_uuid=test.get("koku_uuid"), pending_update=test.get("pending_update"), offset=3, - endpoint_id=4, source_type=Provider.PROVIDER_AWS, name="Test AWS Source", billing_source={"bucket": "test-bucket"}, @@ -580,7 +547,6 @@ def test_load_providers_to_update(self): pending_update=test.get("pending_update"), pending_delete=test.get("pending_delete"), offset=3, - endpoint_id=4, source_type=Provider.PROVIDER_AWS, name="Test AWS Source", billing_source={"bucket": "test-bucket"}, From 8a192b54548fb2735b07c243a232067fd3fd497e Mon Sep 17 00:00:00 2001 From: Brett Lentz Date: Wed, 20 Jan 2021 09:17:08 -0500 Subject: [PATCH 10/17] COST-853 - migrate forecasting to rolling 30-day window; enforce minimum number of data points (#2599) --- koku/api/utils.py | 6 +- koku/forecast/forecast.py | 23 ++-- koku/forecast/test/tests_forecast.py | 169 ++++++++++++++------------- 3 files changed, 102 insertions(+), 96 deletions(-) diff --git a/koku/api/utils.py b/koku/api/utils.py index 374bea35cd..bd2816a823 100644 --- a/koku/api/utils.py +++ b/koku/api/utils.py @@ -208,7 +208,11 @@ def list_days(self, start_date, end_date): end_midnight = end_date.replace(hour=0, minute=0, second=0, microsecond=0) start_midnight = start_date.replace(hour=0, minute=0, second=0, microsecond=0) days = (end_midnight - start_midnight + self.one_day).days - return [start_midnight + datetime.timedelta(i) for i in range(days)] + + # built-in range(start, end, step) requires (start < end) == True + day_range = range(days, 0) if days < 0 else range(0, days) + output = [start_midnight + datetime.timedelta(i) for i in day_range] + return output def list_months(self, start_date, end_date): """Return a list of months from the start date til the end date. diff --git a/koku/forecast/forecast.py b/koku/forecast/forecast.py index 7f18cc88d7..7ab527428d 100644 --- a/koku/forecast/forecast.py +++ b/koku/forecast/forecast.py @@ -89,16 +89,10 @@ def __init__(self, query_params): # noqa: C901 # We have access constraints, but no view to accomodate, default to daily summary table self.cost_summary_table = self.provider_map.report_type_map.get("tables", {}).get("query") - # FIXME: replace with rolling 30-day window - if self.dh.today.day == 1: - self.forecast_days_required = self.dh.this_month_end.day - else: - self.forecast_days_required = self.dh.this_month_end.day - self.dh.yesterday.day + self.forecast_days_required = (self.dh.this_month_end - self.dh.yesterday).days - if self.dh.today.day <= self.MINIMUM: - self.query_range = (self.dh.last_month_start, self.dh.yesterday) - else: - self.query_range = (self.dh.this_month_start, self.dh.yesterday) + # forecasts use a rolling window + self.query_range = (self.dh.n_days_ago(self.dh.yesterday, 30), self.dh.yesterday) self.filters = QueryFilterCollection() self.filters.add(field="usage_start", operation="gte", parameter=self.query_range[0]) @@ -168,12 +162,13 @@ def _predict(self, data): """ LOG.debug("Forecast input data: %s", data) - if len(data) < 3: - LOG.warning("Unable to calculate forecast. Insufficient data for %s.", self.params.tenant) - return [] - if len(data) < self.MINIMUM: - LOG.warning("Number of data elements is fewer than the minimum.") + LOG.warning( + "Number of data elements (%s) is fewer than the minimum (%s). Unable to generate forecast.", + len(data), + self.MINIMUM, + ) + return [] dates, costs = zip(*data) diff --git a/koku/forecast/test/tests_forecast.py b/koku/forecast/test/tests_forecast.py index d2188067ed..d02572cba7 100644 --- a/koku/forecast/test/tests_forecast.py +++ b/koku/forecast/test/tests_forecast.py @@ -87,67 +87,87 @@ def test_forecast_days_required(self): """Test that we accurately select the number of days.""" params = self.mocked_query_params("?", AWSCostForecastView) + dh = DateHelper() mock_dh = Mock(spec=DateHelper) - mock_dh.return_value.today = datetime(2000, 1, 1, 0, 0, 0, 0) - mock_dh.return_value.yesterday = datetime(1999, 12, 31, 0, 0, 0, 0) - mock_dh.return_value.this_month_start = datetime(2000, 1, 1, 0, 0, 0, 0) - mock_dh.return_value.this_month_end = datetime(2000, 1, 31, 0, 0, 0, 0) - mock_dh.return_value.last_month_start = datetime(1999, 12, 1, 0, 0, 0, 0) - mock_dh.return_value.last_month_end = datetime(1999, 12, 31, 0, 0, 0, 0) - - with patch("forecast.forecast.DateHelper", new_callable=lambda: mock_dh) as mock_dh: - forecast = AWSForecast(params) - self.assertEqual(forecast.forecast_days_required, 31) - - mock_dh.return_value.today = datetime(2000, 1, 13, 0, 0, 0, 0) - mock_dh.return_value.yesterday = datetime(2000, 1, 12, 0, 0, 0, 0) - mock_dh.return_value.this_month_start = datetime(2000, 1, 1, 0, 0, 0, 0) - mock_dh.return_value.this_month_end = datetime(2000, 1, 31, 0, 0, 0, 0) - mock_dh.return_value.last_month_start = datetime(1999, 12, 1, 0, 0, 0, 0) - mock_dh.return_value.last_month_end = datetime(1999, 12, 31, 0, 0, 0, 0) - - with patch("forecast.forecast.DateHelper", new_callable=lambda: mock_dh) as mock_dh: - forecast = AWSForecast(params) - self.assertEqual(forecast.forecast_days_required, 19) - - def test_query_range_under(self): - """Test that we select the correct range based on day of month.""" - params = self.mocked_query_params("?", AWSCostForecastView) + scenarios = [ + { + "today": dh.today, + "yesterday": dh.yesterday, + "this_month_end": dh.this_month_end, + "expected": (dh.this_month_end - dh.yesterday).days, + }, + { + "today": datetime(2000, 1, 1, 0, 0, 0, 0), + "yesterday": datetime(1999, 12, 31, 0, 0, 0, 0), + "this_month_end": datetime(2000, 1, 31, 0, 0, 0, 0), + "expected": 31, + }, + { + "today": datetime(2000, 1, 31, 0, 0, 0, 0), + "yesterday": datetime(2000, 1, 30, 0, 0, 0, 0), + "this_month_end": datetime(2000, 1, 31, 0, 0, 0, 0), + "expected": 1, + }, + ] - dh = DateHelper() - mock_dh = Mock(spec=DateHelper) + mock_dh.return_value.n_days_ago = dh.n_days_ago # pass-thru to real function - mock_dh.return_value.today = dh.this_month_start + timedelta(days=AWSForecast.MINIMUM - 1) - mock_dh.return_value.yesterday = dh.this_month_start + timedelta(days=AWSForecast.MINIMUM - 2) - mock_dh.return_value.this_month_start = dh.this_month_start - mock_dh.return_value.this_month_end = dh.this_month_end - mock_dh.return_value.last_month_start = dh.last_month_start - mock_dh.return_value.last_month_end = dh.last_month_end + for test in scenarios: + with self.subTest(scenario=test): + mock_dh.return_value.today = test["today"] + mock_dh.return_value.yesterday = test["yesterday"] + mock_dh.return_value.this_month_end = test["this_month_end"] - with patch("forecast.forecast.DateHelper", new_callable=lambda: mock_dh) as mock_dh: - expected = (dh.last_month_start, dh.this_month_start + timedelta(days=AWSForecast.MINIMUM - 2)) - forecast = AWSForecast(params) - self.assertEqual(forecast.query_range, expected) + with patch("forecast.forecast.DateHelper", new_callable=lambda: mock_dh) as mock_dh: + forecast = AWSForecast(params) + self.assertEqual(forecast.forecast_days_required, test["expected"]) - def test_query_range_over(self): + def test_query_range(self): """Test that we select the correct range based on day of month.""" params = self.mocked_query_params("?", AWSCostForecastView) dh = DateHelper() mock_dh = Mock(spec=DateHelper) - mock_dh.return_value.today = dh.this_month_start + timedelta(days=(AWSForecast.MINIMUM + 1)) - mock_dh.return_value.yesterday = dh.this_month_start + timedelta(days=AWSForecast.MINIMUM) - mock_dh.return_value.this_month_start = dh.this_month_start - mock_dh.return_value.this_month_end = dh.this_month_end - mock_dh.return_value.last_month_start = dh.last_month_start - mock_dh.return_value.last_month_end = dh.last_month_end + scenarios = [ + { + "today": dh.today, + "yesterday": dh.yesterday, + "this_month_end": dh.this_month_end, + "expected": (dh.yesterday + timedelta(days=-30), dh.yesterday), + }, + { + "today": datetime(2000, 1, 1, 0, 0, 0, 0), + "yesterday": datetime(1999, 12, 31, 0, 0, 0, 0), + "this_month_end": datetime(2000, 1, 31, 0, 0, 0, 0), + "expected": ( + datetime(1999, 12, 31, 0, 0, 0, 0) + timedelta(days=-30), + datetime(1999, 12, 31, 0, 0, 0, 0), + ), + }, + { + "today": datetime(2000, 1, 31, 0, 0, 0, 0), + "yesterday": datetime(2000, 1, 30, 0, 0, 0, 0), + "this_month_end": datetime(2000, 1, 31, 0, 0, 0, 0), + "expected": ( + datetime(2000, 1, 30, 0, 0, 0, 0) + timedelta(days=-30), + datetime(2000, 1, 30, 0, 0, 0, 0), + ), + }, + ] + + mock_dh.return_value.n_days_ago = dh.n_days_ago # pass-thru to real function + + for test in scenarios: + with self.subTest(scenario=test): + mock_dh.return_value.today = test["today"] + mock_dh.return_value.yesterday = test["yesterday"] + mock_dh.return_value.this_month_end = test["this_month_end"] - with patch("forecast.forecast.DateHelper", new_callable=lambda: mock_dh) as mock_dh: - expected = (dh.this_month_start, dh.this_month_start + timedelta(days=AWSForecast.MINIMUM)) - forecast = AWSForecast(params) - self.assertEqual(forecast.query_range, expected) + with patch("forecast.forecast.DateHelper", new_callable=lambda: mock_dh) as mock_dh: + forecast = AWSForecast(params) + self.assertEqual(forecast.query_range, test["expected"]) def test_remove_outliers(self): """Test that we remove outliers before predicting.""" @@ -293,7 +313,7 @@ def test_predict_few_values(self): """Test that predict() behaves well with a limited data set.""" dh = DateHelper() - num_elements = [1, 2, 3, 4, 5] + num_elements = [AWSForecast.MINIMUM - 1, AWSForecast.MINIMUM, AWSForecast.MINIMUM + 1] for number in num_elements: with self.subTest(num_elements=number): @@ -304,10 +324,9 @@ def test_predict_few_values(self): expected.append( { "usage_start": dh.n_days_ago(dh.today, 10 - n).date(), - # "usage_start": dh.today.replace(day=n).date(), - "total_cost": 5 + random.random(), - "infrastructure_cost": 3 + random.random(), - "supplementary_cost": 2 + random.random(), + "total_cost": 5 + (0.01 * n), + "infrastructure_cost": 3 + (0.01 * n), + "supplementary_cost": 2 + (0.01 * n), } ) mock_qset = MockQuerySet(expected) @@ -322,41 +341,29 @@ def test_predict_few_values(self): instance = AWSForecast(params) instance.cost_summary_table = mocked_table - if number < 3: - # forecasting isn't possible with less than 3 data points. + if number < AWSForecast.MINIMUM: + # forecasting isn't useful with less than the minimum number of data points. with self.assertLogs(logger="forecast.forecast", level=logging.WARNING): results = instance.predict() self.assertEqual(results, []) else: - with self.assertLogs(logger="forecast.forecast", level=logging.WARNING): - results = instance.predict() + results = instance.predict() - self.assertNotEqual(results, []) + self.assertNotEqual(results, []) - for result in results: - for val in result.get("values", []): - self.assertIsInstance(val.get("date"), date) + for result in results: + for val in result.get("values", []): + self.assertIsInstance(val.get("date"), date) - item = val.get("cost") - self.assertGreaterEqual(float(item.get("total").get("value")), 0) - self.assertGreaterEqual(float(item.get("confidence_max").get("value")), 0) - self.assertGreaterEqual(float(item.get("confidence_min").get("value")), 0) - self.assertGreaterEqual(float(item.get("rsquared").get("value")), 0) - for pval in item.get("pvalues").get("value"): - self.assertGreaterEqual(float(pval), 0) - # test that the results always stop at the end of the month. - self.assertEqual(results[-1].get("date"), dh.this_month_end.date()) - - def test_results_never_outside_current_month(self): - """Test that our results stop at the end of the current month.""" - dh = DateHelper() - params = self.mocked_query_params("?", AWSCostForecastView) - forecast = AWSForecast(params) - forecast.forecast_days_required = 100 - results = forecast.predict() - dates = [result.get("date") for result in results] - self.assertNotIn(dh.next_month_start, dates) - self.assertEqual(dh.this_month_end.date(), max(dates)) + item = val.get("cost") + self.assertGreaterEqual(float(item.get("total").get("value")), 0) + self.assertGreaterEqual(float(item.get("confidence_max").get("value")), 0) + self.assertGreaterEqual(float(item.get("confidence_min").get("value")), 0) + self.assertGreaterEqual(float(item.get("rsquared").get("value")), 0) + for pval in item.get("pvalues").get("value"): + self.assertGreaterEqual(float(pval), 0) + # test that the results always stop at the end of the month. + self.assertEqual(results[-1].get("date"), dh.this_month_end.date()) def test_set_access_filter_with_list(self): """ From a63ba21afbc834d44cb1f4e668e5f7299fb5017e Mon Sep 17 00:00:00 2001 From: Douglas Curtis Date: Wed, 20 Jan 2021 13:58:08 -0500 Subject: [PATCH 11/17] updating cache key (#2602) --- .github/workflows/unittests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml index 81d4ea6e0a..2b6cbea20d 100644 --- a/.github/workflows/unittests.yml +++ b/.github/workflows/unittests.yml @@ -54,7 +54,7 @@ jobs: path: | ~/.cache/pipenv ~/.local/share/virtualenvs - key: os-${{ runner.os }}-env-${{ matrix.python-version }}-${{ hashFiles('**/Pipfile.lock') }}-${{ github.ref }} + key: ${{ runner.os }}-env-${{ matrix.python-version }}-${{ hashFiles('**/Pipfile.lock') }}-${{ github.ref }} - name: Install dependencies if: steps.cache-dependencies.outputs.cache-hit != 'true' From 721db82b76626e2a4dbe5b8ca6bacb620a9168d3 Mon Sep 17 00:00:00 2001 From: Andrew Berglund Date: Wed, 20 Jan 2021 16:42:14 -0500 Subject: [PATCH 12/17] Only consider group by for by_project apis (#2531) --- koku/api/report/all/openshift/query_handler.py | 4 ++-- koku/api/report/aws/openshift/query_handler.py | 4 ++-- koku/api/report/azure/openshift/query_handler.py | 4 ++-- koku/api/report/ocp/query_handler.py | 4 ++-- koku/api/report/queries.py | 6 ++---- 5 files changed, 10 insertions(+), 12 deletions(-) diff --git a/koku/api/report/all/openshift/query_handler.py b/koku/api/report/all/openshift/query_handler.py index 99188eeba6..0fa2d25d1f 100644 --- a/koku/api/report/all/openshift/query_handler.py +++ b/koku/api/report/all/openshift/query_handler.py @@ -20,7 +20,7 @@ from api.models import Provider from api.report.all.openshift.provider_map import OCPAllProviderMap from api.report.aws.openshift.query_handler import OCPInfrastructureReportQueryHandlerBase -from api.report.queries import is_grouped_or_filtered_by_project +from api.report.queries import is_grouped_by_project LOG = logging.getLogger(__name__) @@ -61,7 +61,7 @@ def __init__(self, parameters): """ self._mapper = OCPAllProviderMap(provider=self.provider, report_type=parameters.report_type) # Update which field is used to calculate cost by group by param. - if is_grouped_or_filtered_by_project(parameters): + if is_grouped_by_project(parameters): self._report_type = parameters.report_type + "_by_project" self._mapper = OCPAllProviderMap(provider=self.provider, report_type=self._report_type) diff --git a/koku/api/report/aws/openshift/query_handler.py b/koku/api/report/aws/openshift/query_handler.py index 0be919c0ab..0061210701 100644 --- a/koku/api/report/aws/openshift/query_handler.py +++ b/koku/api/report/aws/openshift/query_handler.py @@ -27,7 +27,7 @@ from api.models import Provider from api.report.aws.openshift.provider_map import OCPAWSProviderMap from api.report.aws.query_handler import AWSReportQueryHandler -from api.report.queries import is_grouped_or_filtered_by_project +from api.report.queries import is_grouped_by_project LOG = logging.getLogger(__name__) @@ -135,7 +135,7 @@ def __init__(self, parameters): """ self._mapper = OCPAWSProviderMap(provider=self.provider, report_type=parameters.report_type) # Update which field is used to calculate cost by group by param. - if is_grouped_or_filtered_by_project(parameters): + if is_grouped_by_project(parameters): self._report_type = parameters.report_type + "_by_project" self._mapper = OCPAWSProviderMap(provider=self.provider, report_type=self._report_type) self.group_by_options = self._mapper.provider_map.get("group_by_options") diff --git a/koku/api/report/azure/openshift/query_handler.py b/koku/api/report/azure/openshift/query_handler.py index b600c56c1e..459ea5bf3c 100644 --- a/koku/api/report/azure/openshift/query_handler.py +++ b/koku/api/report/azure/openshift/query_handler.py @@ -26,7 +26,7 @@ from api.models import Provider from api.report.azure.openshift.provider_map import OCPAzureProviderMap from api.report.azure.query_handler import AzureReportQueryHandler -from api.report.queries import is_grouped_or_filtered_by_project +from api.report.queries import is_grouped_by_project LOG = logging.getLogger(__name__) @@ -45,7 +45,7 @@ def __init__(self, parameters): """ self._mapper = OCPAzureProviderMap(provider=self.provider, report_type=parameters.report_type) # Update which field is used to calculate cost by group by param. - if is_grouped_or_filtered_by_project(parameters): + if is_grouped_by_project(parameters): self._report_type = parameters.report_type + "_by_project" self._mapper = OCPAzureProviderMap(provider=self.provider, report_type=self._report_type) diff --git a/koku/api/report/ocp/query_handler.py b/koku/api/report/ocp/query_handler.py index 189914ef8b..6f466483a6 100644 --- a/koku/api/report/ocp/query_handler.py +++ b/koku/api/report/ocp/query_handler.py @@ -30,7 +30,7 @@ from api.models import Provider from api.report.ocp.provider_map import OCPProviderMap -from api.report.queries import is_grouped_or_filtered_by_project +from api.report.queries import is_grouped_by_project from api.report.queries import ReportQueryHandler LOG = logging.getLogger(__name__) @@ -58,7 +58,7 @@ def __init__(self, parameters): # super() needs to be called before _get_group_by is called # Update which field is used to calculate cost by group by param. - if is_grouped_or_filtered_by_project(parameters) and parameters.report_type == "costs": + if is_grouped_by_project(parameters) and parameters.report_type == "costs": self._report_type = parameters.report_type + "_by_project" self._mapper = OCPProviderMap(provider=self.provider, report_type=self._report_type) diff --git a/koku/api/report/queries.py b/koku/api/report/queries.py index 2705a7041e..5925566420 100644 --- a/koku/api/report/queries.py +++ b/koku/api/report/queries.py @@ -44,12 +44,10 @@ def strip_tag_prefix(tag): return tag.replace("tag:", "").replace("and:", "").replace("or:", "") -def is_grouped_or_filtered_by_project(parameters): +def is_grouped_by_project(parameters): """Determine if grouped or filtered by project.""" group_by = list(parameters.parameters.get("group_by", {}).keys()) - filters = list(parameters.parameters.get("filter", {}).keys()) - effects = group_by + filters - return [key for key in effects if "project" in key] + return [key for key in group_by if "project" in key] def check_view_filter_and_group_by_criteria(filter_set, group_by_set): From 63abc4c198522433839b6f62028f812f3d454708 Mon Sep 17 00:00:00 2001 From: Douglas Curtis Date: Thu, 21 Jan 2021 12:03:11 -0500 Subject: [PATCH 13/17] COST-741: User Access API (#2601) --- docs/source/specs/openapi.json | 63 +++++ koku/api/urls.py | 2 + koku/api/user_access/__init__.py | 1 + koku/api/user_access/test/__init__.py | 1 + koku/api/user_access/test/test_view.py | 327 +++++++++++++++++++++++++ koku/api/user_access/view.py | 155 ++++++++++++ koku/api/views.py | 1 + 7 files changed, 550 insertions(+) create mode 100644 koku/api/user_access/__init__.py create mode 100644 koku/api/user_access/test/__init__.py create mode 100644 koku/api/user_access/test/test_view.py create mode 100644 koku/api/user_access/view.py diff --git a/docs/source/specs/openapi.json b/docs/source/specs/openapi.json index 6f7058ae13..f66a985966 100644 --- a/docs/source/specs/openapi.json +++ b/docs/source/specs/openapi.json @@ -2451,6 +2451,31 @@ } } }, + "/user-access/": { + "get": { + "tags": [ + "UserAccess" + ], + "summary": "Returns user permission status.", + "operationId": "listUserAccess", + "parameters": [{ + "$ref": "#/components/parameters/QueryType" + } + ], + "responses": { + "200": { + "description": "| - 200 response", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/UserAccessListPagination" + } + } + } + } + } + } + }, "/sources/": { "get": { "tags": [ @@ -3867,6 +3892,14 @@ "type": "boolean" } }, + "QueryType": { + "name": "type", + "in": "query", + "description": "String to identify user access permission type (i.e. AWS, cost_model).", + "schema": { + "type": "string" + } + }, "QueryValue": { "in": "query", "name": "value", @@ -4521,6 +4554,26 @@ } ] }, + "UserAccessListPagination": { + "allOf": [{ + "$ref": "#/components/schemas/ListPagination" + }, + { + "type": "object", + "required": [ + "data" + ], + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/UserAccessTypeOut" + } + } + } + } + ] + }, "ResourceTypeListPagination": { "allOf": [{ "$ref": "#/components/schemas/ListPagination" @@ -4568,6 +4621,16 @@ } } }, + "UserAccessTypeOut": { + "properties": { + "type": { + "type": "string" + }, + "access": { + "type": "boolean" + } + } + }, "ResourceTypeListOut": { "properties": { "value": { diff --git a/koku/api/urls.py b/koku/api/urls.py index dbbb6c15c4..d9765de8b0 100644 --- a/koku/api/urls.py +++ b/koku/api/urls.py @@ -71,6 +71,7 @@ from api.views import ResourceTypeView from api.views import SettingsView from api.views import StatusView +from api.views import UserAccessView from koku.cache import AWS_CACHE_PREFIX from koku.cache import AZURE_CACHE_PREFIX from koku.cache import GCP_CACHE_PREFIX @@ -300,6 +301,7 @@ path("settings", RedirectView.as_view(pattern_name="settings"), name="settings-redirect"), path("organizations/aws/", AWSOrgView.as_view(), name="aws-org-unit"), path("resource-types/", ResourceTypeView.as_view(), name="resource-types"), + path("user-access/", UserAccessView.as_view(), name="user-access"), path("resource-types/aws-accounts/", AWSAccountView.as_view(), name="aws-accounts"), path("resource-types/gcp-accounts/", GCPAccountView.as_view(), name="gcp-accounts"), path("resource-types/gcp-projects/", GCPProjectsView.as_view(), name="gcp-projects"), diff --git a/koku/api/user_access/__init__.py b/koku/api/user_access/__init__.py new file mode 100644 index 0000000000..4ede8e6dfa --- /dev/null +++ b/koku/api/user_access/__init__.py @@ -0,0 +1 @@ +# noqa diff --git a/koku/api/user_access/test/__init__.py b/koku/api/user_access/test/__init__.py new file mode 100644 index 0000000000..4ede8e6dfa --- /dev/null +++ b/koku/api/user_access/test/__init__.py @@ -0,0 +1 @@ +# noqa diff --git a/koku/api/user_access/test/test_view.py b/koku/api/user_access/test/test_view.py new file mode 100644 index 0000000000..6bfb0a82ea --- /dev/null +++ b/koku/api/user_access/test/test_view.py @@ -0,0 +1,327 @@ +# +# Copyright 2021 Red Hat, Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +"""Test the UserAccess view.""" +from django.urls import reverse +from rest_framework import status +from rest_framework.test import APIClient + +from api.iam.test.iam_test_case import IamTestCase +from api.iam.test.iam_test_case import RbacPermissions + + +class UserAccessViewTest(IamTestCase): + """Tests the resource types views.""" + + def setUp(self): + """Set up the UserAccess view tests.""" + super().setUp() + self.client = APIClient() + + @RbacPermissions({"aws.account": {"read": ["*"]}}) + def test_aws_view_read(self): + """Test user-access view with aws read wildcard permission.""" + url = reverse("user-access") + response = self.client.get(url, **self.headers) + + self.assertEqual(len(response.data.get("data")), 5) + self.assertTrue({"type": "aws", "access": True} in response.data.get("data")) + self.assertTrue({"type": "ocp", "access": False} in response.data.get("data")) + self.assertTrue({"type": "gcp", "access": False} in response.data.get("data")) + self.assertTrue({"type": "azure", "access": False} in response.data.get("data")) + self.assertTrue({"type": "cost_model", "access": False} in response.data.get("data")) + + @RbacPermissions({"aws.account": {"read": ["123"]}}) + def test_aws_view_read_specific_account(self): + """Test user-access view with aws read specific account permission.""" + url = reverse("user-access") + response = self.client.get(url, **self.headers) + + self.assertEqual(len(response.data.get("data")), 5) + self.assertTrue({"type": "aws", "access": True} in response.data.get("data")) + self.assertTrue({"type": "ocp", "access": False} in response.data.get("data")) + self.assertTrue({"type": "gcp", "access": False} in response.data.get("data")) + self.assertTrue({"type": "azure", "access": False} in response.data.get("data")) + self.assertTrue({"type": "cost_model", "access": False} in response.data.get("data")) + + @RbacPermissions({"aws.account": "*"}) + def test_aws_view_wildcard(self): + """Test user-access view with aws wildcard permission.""" + url = reverse("user-access") + response = self.client.get(url, **self.headers) + + self.assertEqual(len(response.data.get("data")), 5) + self.assertTrue({"type": "aws", "access": True} in response.data.get("data")) + self.assertTrue({"type": "ocp", "access": False} in response.data.get("data")) + self.assertTrue({"type": "gcp", "access": False} in response.data.get("data")) + self.assertTrue({"type": "azure", "access": False} in response.data.get("data")) + self.assertTrue({"type": "cost_model", "access": False} in response.data.get("data")) + + @RbacPermissions( + { + "openshift.cluster": {"read": ["*"]}, + "openshift.project": {"read": ["myproject"]}, + "openshift.node": {"read": ["mynode"]}, + } + ) + def test_ocp_view_cluster(self): + """Test user-access view with openshift cluster read wildcard permission.""" + url = reverse("user-access") + response = self.client.get(url, **self.headers) + + self.assertEqual(len(response.data.get("data")), 5) + self.assertTrue({"type": "aws", "access": False} in response.data.get("data")) + self.assertTrue({"type": "ocp", "access": True} in response.data.get("data")) + self.assertTrue({"type": "gcp", "access": False} in response.data.get("data")) + self.assertTrue({"type": "azure", "access": False} in response.data.get("data")) + self.assertTrue({"type": "cost_model", "access": False} in response.data.get("data")) + + @RbacPermissions( + { + "openshift.cluster": {"read": ["mycluster"]}, + "openshift.project": {"read": ["*"]}, + "openshift.node": {"read": ["mynode"]}, + } + ) + def test_ocp_view_project(self): + """Test user-access view with openshift project read wildcard permission.""" + url = reverse("user-access") + response = self.client.get(url, **self.headers) + + self.assertEqual(len(response.data.get("data")), 5) + self.assertTrue({"type": "aws", "access": False} in response.data.get("data")) + self.assertTrue({"type": "ocp", "access": True} in response.data.get("data")) + self.assertTrue({"type": "gcp", "access": False} in response.data.get("data")) + self.assertTrue({"type": "azure", "access": False} in response.data.get("data")) + self.assertTrue({"type": "cost_model", "access": False} in response.data.get("data")) + + @RbacPermissions( + { + "openshift.cluster": {"read": ["mycluster"]}, + "openshift.project": {"read": ["myproject"]}, + "openshift.node": {"read": ["*"]}, + } + ) + def test_ocp_view_node(self): + """Test user-access view with openshift node read wildcard permission.""" + url = reverse("user-access") + response = self.client.get(url, **self.headers) + + self.assertEqual(len(response.data.get("data")), 5) + self.assertTrue({"type": "aws", "access": False} in response.data.get("data")) + self.assertTrue({"type": "ocp", "access": True} in response.data.get("data")) + self.assertTrue({"type": "gcp", "access": False} in response.data.get("data")) + self.assertTrue({"type": "azure", "access": False} in response.data.get("data")) + self.assertTrue({"type": "cost_model", "access": False} in response.data.get("data")) + + @RbacPermissions({"openshift.cluster": "*"}) + def test_ocp_view_cluster_wildcard(self): + """Test user-access view with openshift cluster wildcard permission.""" + url = reverse("user-access") + + response = self.client.get(url, **self.headers) + + self.assertEqual(len(response.data.get("data")), 5) + self.assertTrue({"type": "aws", "access": False} in response.data.get("data")) + self.assertTrue({"type": "ocp", "access": True} in response.data.get("data")) + self.assertTrue({"type": "gcp", "access": False} in response.data.get("data")) + self.assertTrue({"type": "azure", "access": False} in response.data.get("data")) + self.assertTrue({"type": "cost_model", "access": False} in response.data.get("data")) + + @RbacPermissions({"openshift.project": "*"}) + def test_ocp_view_project_wildcard(self): + """Test user-access view with openshift project wildcard permission.""" + url = reverse("user-access") + + response = self.client.get(url, **self.headers) + + self.assertEqual(len(response.data.get("data")), 5) + self.assertTrue({"type": "aws", "access": False} in response.data.get("data")) + self.assertTrue({"type": "ocp", "access": True} in response.data.get("data")) + self.assertTrue({"type": "gcp", "access": False} in response.data.get("data")) + self.assertTrue({"type": "azure", "access": False} in response.data.get("data")) + self.assertTrue({"type": "cost_model", "access": False} in response.data.get("data")) + + @RbacPermissions({"openshift.node": "*"}) + def test_ocp_view_node_wildcard(self): + """Test user-access view with openshift node wildcard permission.""" + url = reverse("user-access") + + response = self.client.get(url, **self.headers) + + self.assertEqual(len(response.data.get("data")), 5) + self.assertTrue({"type": "aws", "access": False} in response.data.get("data")) + self.assertTrue({"type": "ocp", "access": True} in response.data.get("data")) + self.assertTrue({"type": "gcp", "access": False} in response.data.get("data")) + self.assertTrue({"type": "azure", "access": False} in response.data.get("data")) + self.assertTrue({"type": "cost_model", "access": False} in response.data.get("data")) + + @RbacPermissions({"gcp.account": {"read": ["*"]}, "gcp.project": {"read": ["myproject"]}}) + def test_gcp_view_account(self): + """Test user-access view with gcp account read wildcard permission.""" + url = reverse("user-access") + response = self.client.get(url, **self.headers) + + self.assertEqual(len(response.data.get("data")), 5) + self.assertTrue({"type": "aws", "access": False} in response.data.get("data")) + self.assertTrue({"type": "ocp", "access": False} in response.data.get("data")) + self.assertTrue({"type": "gcp", "access": True} in response.data.get("data")) + self.assertTrue({"type": "azure", "access": False} in response.data.get("data")) + self.assertTrue({"type": "cost_model", "access": False} in response.data.get("data")) + + @RbacPermissions({"gcp.account": {"read": ["myaccount"]}, "gcp.project": {"read": ["*"]}}) + def test_gcp_view_project(self): + """Test user-access view with gcp project read wildcard permission.""" + url = reverse("user-access") + response = self.client.get(url, **self.headers) + + self.assertEqual(len(response.data.get("data")), 5) + self.assertTrue({"type": "aws", "access": False} in response.data.get("data")) + self.assertTrue({"type": "ocp", "access": False} in response.data.get("data")) + self.assertTrue({"type": "gcp", "access": True} in response.data.get("data")) + self.assertTrue({"type": "azure", "access": False} in response.data.get("data")) + self.assertTrue({"type": "cost_model", "access": False} in response.data.get("data")) + + @RbacPermissions({"gcp.account": "*"}) + def test_gcp_view_account_wildcard(self): + """Test user-access view with gcp account wildcard permission.""" + url = reverse("user-access") + + response = self.client.get(url, **self.headers) + + self.assertEqual(len(response.data.get("data")), 5) + self.assertTrue({"type": "aws", "access": False} in response.data.get("data")) + self.assertTrue({"type": "ocp", "access": False} in response.data.get("data")) + self.assertTrue({"type": "gcp", "access": True} in response.data.get("data")) + self.assertTrue({"type": "azure", "access": False} in response.data.get("data")) + self.assertTrue({"type": "cost_model", "access": False} in response.data.get("data")) + + @RbacPermissions({"gcp.project": "*"}) + def test_gcp_view_project_wildcard(self): + """Test user-access view with gcp project wildcard permission.""" + url = reverse("user-access") + + response = self.client.get(url, **self.headers) + + self.assertEqual(len(response.data.get("data")), 5) + self.assertTrue({"type": "aws", "access": False} in response.data.get("data")) + self.assertTrue({"type": "ocp", "access": False} in response.data.get("data")) + self.assertTrue({"type": "gcp", "access": True} in response.data.get("data")) + self.assertTrue({"type": "azure", "access": False} in response.data.get("data")) + self.assertTrue({"type": "cost_model", "access": False} in response.data.get("data")) + + @RbacPermissions({"azure.subscription_guid": {"read": ["*"]}}) + def test_azure_view_read(self): + """Test user-access view with azure subscription read wildcard permission.""" + url = reverse("user-access") + response = self.client.get(url, **self.headers) + + self.assertEqual(len(response.data.get("data")), 5) + self.assertTrue({"type": "aws", "access": False} in response.data.get("data")) + self.assertTrue({"type": "ocp", "access": False} in response.data.get("data")) + self.assertTrue({"type": "gcp", "access": False} in response.data.get("data")) + self.assertTrue({"type": "azure", "access": True} in response.data.get("data")) + self.assertTrue({"type": "cost_model", "access": False} in response.data.get("data")) + + @RbacPermissions({"azure.subscription_guid": "*"}) + def test_azure_view_wildcard(self): + """Test user-access view with azure subscription wildcard permission.""" + url = reverse("user-access") + response = self.client.get(url, **self.headers) + + self.assertEqual(len(response.data.get("data")), 5) + self.assertTrue({"type": "aws", "access": False} in response.data.get("data")) + self.assertTrue({"type": "ocp", "access": False} in response.data.get("data")) + self.assertTrue({"type": "gcp", "access": False} in response.data.get("data")) + self.assertTrue({"type": "azure", "access": True} in response.data.get("data")) + self.assertTrue({"type": "cost_model", "access": False} in response.data.get("data")) + + def test_view_as_org_admin(self): + """Test user-access view as an org admin.""" + url = reverse("user-access") + response = self.client.get(url, **self.headers) + + self.assertEqual(len(response.data.get("data")), 5) + self.assertTrue({"type": "aws", "access": True} in response.data.get("data")) + self.assertTrue({"type": "ocp", "access": True} in response.data.get("data")) + self.assertTrue({"type": "gcp", "access": True} in response.data.get("data")) + self.assertTrue({"type": "azure", "access": True} in response.data.get("data")) + self.assertTrue({"type": "cost_model", "access": True} in response.data.get("data")) + + @RbacPermissions({"*": "*"}) + def test_view_as_cost_admin(self): + """Test user-access view as a cost admin.""" + url = reverse("user-access") + response = self.client.get(url, **self.headers) + + self.assertEqual(len(response.data.get("data")), 5) + self.assertTrue({"type": "aws", "access": True} in response.data.get("data")) + self.assertTrue({"type": "ocp", "access": True} in response.data.get("data")) + self.assertTrue({"type": "gcp", "access": True} in response.data.get("data")) + self.assertTrue({"type": "azure", "access": True} in response.data.get("data")) + self.assertTrue({"type": "cost_model", "access": True} in response.data.get("data")) + + def test_aws_view_query_read_org_admin(self): + """Test user-access view query as an org admin.""" + url = reverse("user-access") + query_url = f"{url}?source_type=aws" + response = self.client.get(query_url, **self.headers) + + self.assertTrue(response.data.get("data")) + + @RbacPermissions({"aws.account": "*"}) + def test_aws_view_query_read(self): + """Test user-access view query for aws.""" + url = reverse("user-access") + query_url = f"{url}?source_type=aws" + response = self.client.get(query_url, **self.headers) + + self.assertTrue(response.data.get("data")) + + @RbacPermissions({"openshift.cluster": "*"}) + def test_openshift_view_query_read_for_aws(self): + """Test user-access view query for aws with openshift permissions.""" + url = reverse("user-access") + query_url = f"{url}?type=aws" + response = self.client.get(query_url, **self.headers) + + self.assertFalse(response.data.get("data")) + + @RbacPermissions({"cost_model": "*"}) + def test_cost_model_view_query_read_for_aws(self): + """Test user-access view query for cost_model.""" + url = reverse("user-access") + query_url = f"{url}?type=cost_model" + response = self.client.get(query_url, **self.headers) + + self.assertTrue(response.data.get("data")) + + @RbacPermissions({"cost_model": {"write": ["*"]}}) + def test_cost_model_view_query_write_for_aws(self): + """Test user-access view query for cost_model with write access.""" + url = reverse("user-access") + query_url = f"{url}?type=cost_model" + response = self.client.get(query_url, **self.headers) + + self.assertTrue(response.data.get("data")) + + def test_view_query_invalid_source_type(self): + """Test user-access view query for invalid type.""" + url = reverse("user-access") + query_url = f"{url}?type=bad" + response = self.client.get(query_url, **self.headers) + + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) diff --git a/koku/api/user_access/view.py b/koku/api/user_access/view.py new file mode 100644 index 0000000000..89e9367a7b --- /dev/null +++ b/koku/api/user_access/view.py @@ -0,0 +1,155 @@ +# +# Copyright 2021 Red Hat, Inc. +# +# This program is free software: you can redistribute it and/or modify +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +"""View for UserAccess.""" +import logging + +from django.utils.decorators import method_decorator +from django.views.decorators.vary import vary_on_headers +from rest_framework import status +from rest_framework.permissions import AllowAny +from rest_framework.response import Response +from rest_framework.views import APIView + +from api.common import CACHE_RH_IDENTITY_HEADER +from api.common.pagination import ListPaginator + +LOGGER = logging.getLogger(__name__) + + +class UserAccess: + def check_access(self, access_list): + if access_list: + return True + return False + + +class AWSUserAccess(UserAccess): + def __init__(self, access): + self.account_access = access.get("aws.account") + + @property + def access(self): + if self.check_access(self.account_access): + return True + return False + + +class OCPUserAccess(UserAccess): + def __init__(self, access): + self.cluster_access = access.get("openshift.cluster") + self.node_access = access.get("openshift.node") + self.project_access = access.get("openshift.project") + + @property + def access(self): + if ( + self.check_access(self.cluster_access) + or self.check_access(self.node_access) + or self.check_access(self.project_access) + ): + return True + return False + + +class AzureUserAccess(UserAccess): + def __init__(self, access): + self.subscription_access = access.get("azure.subscription_guid") + + @property + def access(self): + if self.check_access(self.subscription_access): + return True + return False + + +class GCPUserAccess(UserAccess): + def __init__(self, access): + self.account_access = access.get("gcp.account") + self.project_access = access.get("gcp.project") + + @property + def access(self): + if self.check_access(self.account_access) or self.check_access(self.project_access): + return True + return False + + +class CostModelUserAccess(UserAccess): + def __init__(self, access): + self.subscription_access = access.get("cost_model") + + @property + def access(self): + if self.check_access(self.subscription_access): + return True + return False + + +class CostManagementAllAccess(UserAccess): + def __init__(self, access): + self.all_access = access.get("*") + + @property + def access(self): + if self.check_access(self.all_access): + return True + return False + + +class UserAccessView(APIView): + """API GET view for User API.""" + + permission_classes = [AllowAny] + + @method_decorator(vary_on_headers(CACHE_RH_IDENTITY_HEADER)) + def get(self, request, **kwargs): + query_params = request.query_params + user_access = request.user.access + admin_user = request.user.admin or CostManagementAllAccess(user_access).access + + source_types = [ + {"type": "aws", "access_class": AWSUserAccess}, + {"type": "ocp", "access_class": OCPUserAccess}, + {"type": "gcp", "access_class": GCPUserAccess}, + {"type": "azure", "access_class": AzureUserAccess}, + {"type": "cost_model", "access_class": CostModelUserAccess}, + ] + + source_type = query_params.get("type") + if source_type: + source_accessor = next((item for item in source_types if item.get("type") == source_type.lower()), False) + if source_accessor: + access_class = source_accessor.get("access_class") + if admin_user: + access_granted = True + else: + access_granted = access_class(user_access).access + return Response({"data": access_granted}) + else: + return Response({f"Unknown source type: {source_type}"}, status=status.HTTP_400_BAD_REQUEST) + + data = [] + for source_type in source_types: + access_granted = False + if admin_user: + access_granted = True + else: + access_granted = source_type.get("access_class")(user_access).access + data.append({"type": source_type.get("type"), "access": access_granted}) + + paginator = ListPaginator(data, request) + + return paginator.get_paginated_response(data) diff --git a/koku/api/views.py b/koku/api/views.py index 502b4e11a2..846a32f2ce 100644 --- a/koku/api/views.py +++ b/koku/api/views.py @@ -67,3 +67,4 @@ from api.tags.azure.view import AzureTagView from api.tags.gcp.view import GCPTagView from api.tags.ocp.view import OCPTagView +from api.user_access.view import UserAccessView From edbdb2ea4ba196620729c634fb7219693c63d439 Mon Sep 17 00:00:00 2001 From: esebesto <73821679+esebesto@users.noreply.github.com> Date: Thu, 21 Jan 2021 19:32:17 +0100 Subject: [PATCH 14/17] Fix forecast paths (#2606) --- docs/source/specs/openapi.json | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/source/specs/openapi.json b/docs/source/specs/openapi.json index f66a985966..8faab173ab 100644 --- a/docs/source/specs/openapi.json +++ b/docs/source/specs/openapi.json @@ -396,7 +396,7 @@ }] } }, - "/forecasts/aws/costs": { + "/forecasts/aws/costs/": { "summary": "AWS Cost Forecasts", "get": { "tags":["Forecasts"], @@ -441,7 +441,7 @@ }] } }, - "/forecasts/azure/costs": { + "/forecasts/azure/costs/": { "summary": "Azure Cost Forecasts", "get": { "tags":["Forecasts"], @@ -486,7 +486,7 @@ }] } }, - "/forecasts/gcp/costs": { + "/forecasts/gcp/costs/": { "summary": "GCP Cost Forecasts", "get": { "tags":["Forecasts"], @@ -531,7 +531,7 @@ }] } }, - "/forecasts/openshift/costs": { + "/forecasts/openshift/costs/": { "summary": "OpenShift Cost Forecasts", "get": { "tags":["Forecasts"], @@ -576,7 +576,7 @@ }] } }, - "/forecasts/openshift/infrastructures/aws/costs": { + "/forecasts/openshift/infrastructures/aws/costs/": { "summary": "OpenShift on AWS Cost Forecasts", "get": { "tags":["Forecasts"], @@ -621,7 +621,7 @@ }] } }, - "/forecasts/openshift/infrastructures/all/costs": { + "/forecasts/openshift/infrastructures/all/costs/": { "summary": "OpenShift on Cloud Cost Forecasts", "get": { "tags":["Forecasts"], @@ -666,7 +666,7 @@ }] } }, - "/forecasts/openshift/infrastructures/azure/costs": { + "/forecasts/openshift/infrastructures/azure/costs/": { "summary": "OpenShift on Azure Cost Forecasts", "get": { "tags":["Forecasts"], From c8e5dddafcc07b4b684c2418524421983420ea9b Mon Sep 17 00:00:00 2001 From: Douglas Curtis Date: Thu, 21 Jan 2021 14:15:53 -0500 Subject: [PATCH 15/17] user access logging (#2607) --- koku/api/user_access/view.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/koku/api/user_access/view.py b/koku/api/user_access/view.py index 89e9367a7b..ce87d51d43 100644 --- a/koku/api/user_access/view.py +++ b/koku/api/user_access/view.py @@ -26,7 +26,7 @@ from api.common import CACHE_RH_IDENTITY_HEADER from api.common.pagination import ListPaginator -LOGGER = logging.getLogger(__name__) +LOG = logging.getLogger(__name__) class UserAccess: @@ -118,7 +118,9 @@ class UserAccessView(APIView): def get(self, request, **kwargs): query_params = request.query_params user_access = request.user.access + LOG.info(f"User Access RBAC permissions: {str(user_access)}. Org Admin: {str(request.user.admin)}") admin_user = request.user.admin or CostManagementAllAccess(user_access).access + LOG.info(f"User Access admin user: {str(admin_user)}") source_types = [ {"type": "aws", "access_class": AWSUserAccess}, From 26284ad484366ae340d75d1c618c0efe6f976e49 Mon Sep 17 00:00:00 2001 From: Douglas Curtis Date: Thu, 21 Jan 2021 15:24:54 -0500 Subject: [PATCH 16/17] Fixing user access to expect all rbac types to be present (#2608) --- koku/api/user_access/test/test_view.py | 261 ++++++++++++++++++++++--- koku/api/user_access/view.py | 19 +- 2 files changed, 233 insertions(+), 47 deletions(-) diff --git a/koku/api/user_access/test/test_view.py b/koku/api/user_access/test/test_view.py index 6bfb0a82ea..efedc45540 100644 --- a/koku/api/user_access/test/test_view.py +++ b/koku/api/user_access/test/test_view.py @@ -31,7 +31,19 @@ def setUp(self): super().setUp() self.client = APIClient() - @RbacPermissions({"aws.account": {"read": ["*"]}}) + @RbacPermissions( + { + "aws.account": {"read": ["*"]}, + "aws.organizational_unit": {"read": []}, + "gcp.account": {"read": []}, + "gcp.project": {"read": []}, + "azure.subscription_guid": {"read": []}, + "openshift.cluster": {"read": []}, + "openshift.node": {"read": []}, + "openshift.project": {"read": []}, + "cost_model": {"read": [], "write": []}, + } + ) def test_aws_view_read(self): """Test user-access view with aws read wildcard permission.""" url = reverse("user-access") @@ -44,7 +56,19 @@ def test_aws_view_read(self): self.assertTrue({"type": "azure", "access": False} in response.data.get("data")) self.assertTrue({"type": "cost_model", "access": False} in response.data.get("data")) - @RbacPermissions({"aws.account": {"read": ["123"]}}) + @RbacPermissions( + { + "aws.account": {"read": ["123"]}, + "aws.organizational_unit": {"read": []}, + "gcp.account": {"read": []}, + "gcp.project": {"read": []}, + "azure.subscription_guid": {"read": []}, + "openshift.cluster": {"read": []}, + "openshift.node": {"read": []}, + "openshift.project": {"read": []}, + "cost_model": {"read": [], "write": []}, + } + ) def test_aws_view_read_specific_account(self): """Test user-access view with aws read specific account permission.""" url = reverse("user-access") @@ -57,7 +81,19 @@ def test_aws_view_read_specific_account(self): self.assertTrue({"type": "azure", "access": False} in response.data.get("data")) self.assertTrue({"type": "cost_model", "access": False} in response.data.get("data")) - @RbacPermissions({"aws.account": "*"}) + @RbacPermissions( + { + "aws.account": {"read": ["*"]}, + "aws.organizational_unit": {"read": []}, + "gcp.account": {"read": []}, + "gcp.project": {"read": []}, + "azure.subscription_guid": {"read": []}, + "openshift.cluster": {"read": []}, + "openshift.node": {"read": []}, + "openshift.project": {"read": []}, + "cost_model": {"read": [], "write": []}, + } + ) def test_aws_view_wildcard(self): """Test user-access view with aws wildcard permission.""" url = reverse("user-access") @@ -72,9 +108,15 @@ def test_aws_view_wildcard(self): @RbacPermissions( { + "aws.account": {"read": []}, + "aws.organizational_unit": {"read": []}, + "gcp.account": {"read": []}, + "gcp.project": {"read": []}, + "azure.subscription_guid": {"read": []}, "openshift.cluster": {"read": ["*"]}, - "openshift.project": {"read": ["myproject"]}, "openshift.node": {"read": ["mynode"]}, + "openshift.project": {"read": ["myproject"]}, + "cost_model": {"read": [], "write": []}, } ) def test_ocp_view_cluster(self): @@ -91,9 +133,15 @@ def test_ocp_view_cluster(self): @RbacPermissions( { + "aws.account": {"read": []}, + "aws.organizational_unit": {"read": []}, + "gcp.account": {"read": []}, + "gcp.project": {"read": []}, + "azure.subscription_guid": {"read": []}, "openshift.cluster": {"read": ["mycluster"]}, - "openshift.project": {"read": ["*"]}, "openshift.node": {"read": ["mynode"]}, + "openshift.project": {"read": ["*"]}, + "cost_model": {"read": [], "write": []}, } ) def test_ocp_view_project(self): @@ -110,9 +158,15 @@ def test_ocp_view_project(self): @RbacPermissions( { + "aws.account": {"read": []}, + "aws.organizational_unit": {"read": []}, + "gcp.account": {"read": []}, + "gcp.project": {"read": []}, + "azure.subscription_guid": {"read": []}, "openshift.cluster": {"read": ["mycluster"]}, - "openshift.project": {"read": ["myproject"]}, "openshift.node": {"read": ["*"]}, + "openshift.project": {"read": ["myproject"]}, + "cost_model": {"read": [], "write": []}, } ) def test_ocp_view_node(self): @@ -127,7 +181,19 @@ def test_ocp_view_node(self): self.assertTrue({"type": "azure", "access": False} in response.data.get("data")) self.assertTrue({"type": "cost_model", "access": False} in response.data.get("data")) - @RbacPermissions({"openshift.cluster": "*"}) + @RbacPermissions( + { + "aws.account": {"read": []}, + "aws.organizational_unit": {"read": []}, + "gcp.account": {"read": []}, + "gcp.project": {"read": []}, + "azure.subscription_guid": {"read": []}, + "openshift.cluster": {"read": ["*"]}, + "openshift.node": {"read": []}, + "openshift.project": {"read": []}, + "cost_model": {"read": [], "write": []}, + } + ) def test_ocp_view_cluster_wildcard(self): """Test user-access view with openshift cluster wildcard permission.""" url = reverse("user-access") @@ -141,7 +207,19 @@ def test_ocp_view_cluster_wildcard(self): self.assertTrue({"type": "azure", "access": False} in response.data.get("data")) self.assertTrue({"type": "cost_model", "access": False} in response.data.get("data")) - @RbacPermissions({"openshift.project": "*"}) + @RbacPermissions( + { + "aws.account": {"read": []}, + "aws.organizational_unit": {"read": []}, + "gcp.account": {"read": []}, + "gcp.project": {"read": []}, + "azure.subscription_guid": {"read": []}, + "openshift.cluster": {"read": [""]}, + "openshift.node": {"read": []}, + "openshift.project": {"read": ["*"]}, + "cost_model": {"read": [], "write": []}, + } + ) def test_ocp_view_project_wildcard(self): """Test user-access view with openshift project wildcard permission.""" url = reverse("user-access") @@ -155,7 +233,19 @@ def test_ocp_view_project_wildcard(self): self.assertTrue({"type": "azure", "access": False} in response.data.get("data")) self.assertTrue({"type": "cost_model", "access": False} in response.data.get("data")) - @RbacPermissions({"openshift.node": "*"}) + @RbacPermissions( + { + "aws.account": {"read": []}, + "aws.organizational_unit": {"read": []}, + "gcp.account": {"read": []}, + "gcp.project": {"read": []}, + "azure.subscription_guid": {"read": []}, + "openshift.cluster": {"read": []}, + "openshift.node": {"read": ["*"]}, + "openshift.project": {"read": []}, + "cost_model": {"read": [], "write": []}, + } + ) def test_ocp_view_node_wildcard(self): """Test user-access view with openshift node wildcard permission.""" url = reverse("user-access") @@ -169,7 +259,19 @@ def test_ocp_view_node_wildcard(self): self.assertTrue({"type": "azure", "access": False} in response.data.get("data")) self.assertTrue({"type": "cost_model", "access": False} in response.data.get("data")) - @RbacPermissions({"gcp.account": {"read": ["*"]}, "gcp.project": {"read": ["myproject"]}}) + @RbacPermissions( + { + "aws.account": {"read": []}, + "aws.organizational_unit": {"read": []}, + "gcp.account": {"read": ["*"]}, + "gcp.project": {"read": ["myproject"]}, + "azure.subscription_guid": {"read": []}, + "openshift.cluster": {"read": []}, + "openshift.node": {"read": []}, + "openshift.project": {"read": []}, + "cost_model": {"read": [], "write": []}, + } + ) def test_gcp_view_account(self): """Test user-access view with gcp account read wildcard permission.""" url = reverse("user-access") @@ -182,7 +284,19 @@ def test_gcp_view_account(self): self.assertTrue({"type": "azure", "access": False} in response.data.get("data")) self.assertTrue({"type": "cost_model", "access": False} in response.data.get("data")) - @RbacPermissions({"gcp.account": {"read": ["myaccount"]}, "gcp.project": {"read": ["*"]}}) + @RbacPermissions( + { + "aws.account": {"read": []}, + "aws.organizational_unit": {"read": []}, + "gcp.account": {"read": ["myaccount"]}, + "gcp.project": {"read": ["*"]}, + "azure.subscription_guid": {"read": []}, + "openshift.cluster": {"read": []}, + "openshift.node": {"read": []}, + "openshift.project": {"read": []}, + "cost_model": {"read": [], "write": []}, + } + ) def test_gcp_view_project(self): """Test user-access view with gcp project read wildcard permission.""" url = reverse("user-access") @@ -195,7 +309,19 @@ def test_gcp_view_project(self): self.assertTrue({"type": "azure", "access": False} in response.data.get("data")) self.assertTrue({"type": "cost_model", "access": False} in response.data.get("data")) - @RbacPermissions({"gcp.account": "*"}) + @RbacPermissions( + { + "aws.account": {"read": []}, + "aws.organizational_unit": {"read": []}, + "gcp.account": {"read": ["*"]}, + "gcp.project": {"read": []}, + "azure.subscription_guid": {"read": []}, + "openshift.cluster": {"read": []}, + "openshift.node": {"read": []}, + "openshift.project": {"read": []}, + "cost_model": {"read": [], "write": []}, + } + ) def test_gcp_view_account_wildcard(self): """Test user-access view with gcp account wildcard permission.""" url = reverse("user-access") @@ -209,7 +335,19 @@ def test_gcp_view_account_wildcard(self): self.assertTrue({"type": "azure", "access": False} in response.data.get("data")) self.assertTrue({"type": "cost_model", "access": False} in response.data.get("data")) - @RbacPermissions({"gcp.project": "*"}) + @RbacPermissions( + { + "aws.account": {"read": []}, + "aws.organizational_unit": {"read": []}, + "gcp.account": {"read": []}, + "gcp.project": {"read": ["*"]}, + "azure.subscription_guid": {"read": []}, + "openshift.cluster": {"read": []}, + "openshift.node": {"read": []}, + "openshift.project": {"read": []}, + "cost_model": {"read": [], "write": []}, + } + ) def test_gcp_view_project_wildcard(self): """Test user-access view with gcp project wildcard permission.""" url = reverse("user-access") @@ -223,7 +361,19 @@ def test_gcp_view_project_wildcard(self): self.assertTrue({"type": "azure", "access": False} in response.data.get("data")) self.assertTrue({"type": "cost_model", "access": False} in response.data.get("data")) - @RbacPermissions({"azure.subscription_guid": {"read": ["*"]}}) + @RbacPermissions( + { + "aws.account": {"read": []}, + "aws.organizational_unit": {"read": []}, + "gcp.account": {"read": []}, + "gcp.project": {"read": []}, + "azure.subscription_guid": {"read": ["*"]}, + "openshift.cluster": {"read": []}, + "openshift.node": {"read": []}, + "openshift.project": {"read": []}, + "cost_model": {"read": [], "write": []}, + } + ) def test_azure_view_read(self): """Test user-access view with azure subscription read wildcard permission.""" url = reverse("user-access") @@ -236,7 +386,19 @@ def test_azure_view_read(self): self.assertTrue({"type": "azure", "access": True} in response.data.get("data")) self.assertTrue({"type": "cost_model", "access": False} in response.data.get("data")) - @RbacPermissions({"azure.subscription_guid": "*"}) + @RbacPermissions( + { + "aws.account": {"read": []}, + "aws.organizational_unit": {"read": []}, + "gcp.account": {"read": []}, + "gcp.project": {"read": []}, + "azure.subscription_guid": {"read": ["*"]}, + "openshift.cluster": {"read": []}, + "openshift.node": {"read": []}, + "openshift.project": {"read": []}, + "cost_model": {"read": [], "write": []}, + } + ) def test_azure_view_wildcard(self): """Test user-access view with azure subscription wildcard permission.""" url = reverse("user-access") @@ -261,19 +423,6 @@ def test_view_as_org_admin(self): self.assertTrue({"type": "azure", "access": True} in response.data.get("data")) self.assertTrue({"type": "cost_model", "access": True} in response.data.get("data")) - @RbacPermissions({"*": "*"}) - def test_view_as_cost_admin(self): - """Test user-access view as a cost admin.""" - url = reverse("user-access") - response = self.client.get(url, **self.headers) - - self.assertEqual(len(response.data.get("data")), 5) - self.assertTrue({"type": "aws", "access": True} in response.data.get("data")) - self.assertTrue({"type": "ocp", "access": True} in response.data.get("data")) - self.assertTrue({"type": "gcp", "access": True} in response.data.get("data")) - self.assertTrue({"type": "azure", "access": True} in response.data.get("data")) - self.assertTrue({"type": "cost_model", "access": True} in response.data.get("data")) - def test_aws_view_query_read_org_admin(self): """Test user-access view query as an org admin.""" url = reverse("user-access") @@ -282,7 +431,19 @@ def test_aws_view_query_read_org_admin(self): self.assertTrue(response.data.get("data")) - @RbacPermissions({"aws.account": "*"}) + @RbacPermissions( + { + "aws.account": {"read": ["*"]}, + "aws.organizational_unit": {"read": []}, + "gcp.account": {"read": []}, + "gcp.project": {"read": []}, + "azure.subscription_guid": {"read": []}, + "openshift.cluster": {"read": []}, + "openshift.node": {"read": []}, + "openshift.project": {"read": []}, + "cost_model": {"read": [], "write": []}, + } + ) def test_aws_view_query_read(self): """Test user-access view query for aws.""" url = reverse("user-access") @@ -291,7 +452,19 @@ def test_aws_view_query_read(self): self.assertTrue(response.data.get("data")) - @RbacPermissions({"openshift.cluster": "*"}) + @RbacPermissions( + { + "aws.account": {"read": []}, + "aws.organizational_unit": {"read": []}, + "gcp.account": {"read": []}, + "gcp.project": {"read": []}, + "azure.subscription_guid": {"read": []}, + "openshift.cluster": {"read": ["*"]}, + "openshift.node": {"read": []}, + "openshift.project": {"read": []}, + "cost_model": {"read": [], "write": []}, + } + ) def test_openshift_view_query_read_for_aws(self): """Test user-access view query for aws with openshift permissions.""" url = reverse("user-access") @@ -300,7 +473,19 @@ def test_openshift_view_query_read_for_aws(self): self.assertFalse(response.data.get("data")) - @RbacPermissions({"cost_model": "*"}) + @RbacPermissions( + { + "aws.account": {"read": []}, + "aws.organizational_unit": {"read": []}, + "gcp.account": {"read": []}, + "gcp.project": {"read": []}, + "azure.subscription_guid": {"read": []}, + "openshift.cluster": {"read": ["*"]}, + "openshift.node": {"read": []}, + "openshift.project": {"read": []}, + "cost_model": {"read": ["*"], "write": []}, + } + ) def test_cost_model_view_query_read_for_aws(self): """Test user-access view query for cost_model.""" url = reverse("user-access") @@ -309,7 +494,19 @@ def test_cost_model_view_query_read_for_aws(self): self.assertTrue(response.data.get("data")) - @RbacPermissions({"cost_model": {"write": ["*"]}}) + @RbacPermissions( + { + "aws.account": {"read": []}, + "aws.organizational_unit": {"read": []}, + "gcp.account": {"read": []}, + "gcp.project": {"read": []}, + "azure.subscription_guid": {"read": []}, + "openshift.cluster": {"read": ["*"]}, + "openshift.node": {"read": []}, + "openshift.project": {"read": []}, + "cost_model": {"read": [], "write": ["*"]}, + } + ) def test_cost_model_view_query_write_for_aws(self): """Test user-access view query for cost_model with write access.""" url = reverse("user-access") diff --git a/koku/api/user_access/view.py b/koku/api/user_access/view.py index ce87d51d43..c6a9b84b18 100644 --- a/koku/api/user_access/view.py +++ b/koku/api/user_access/view.py @@ -31,7 +31,7 @@ class UserAccess: def check_access(self, access_list): - if access_list: + if access_list.get("read") or access_list.get("write"): return True return False @@ -98,17 +98,6 @@ def access(self): return False -class CostManagementAllAccess(UserAccess): - def __init__(self, access): - self.all_access = access.get("*") - - @property - def access(self): - if self.check_access(self.all_access): - return True - return False - - class UserAccessView(APIView): """API GET view for User API.""" @@ -118,9 +107,9 @@ class UserAccessView(APIView): def get(self, request, **kwargs): query_params = request.query_params user_access = request.user.access - LOG.info(f"User Access RBAC permissions: {str(user_access)}. Org Admin: {str(request.user.admin)}") - admin_user = request.user.admin or CostManagementAllAccess(user_access).access - LOG.info(f"User Access admin user: {str(admin_user)}") + LOG.debug(f"User Access RBAC permissions: {str(user_access)}. Org Admin: {str(request.user.admin)}") + admin_user = request.user.admin + LOG.debug(f"User Access admin user: {str(admin_user)}") source_types = [ {"type": "aws", "access_class": AWSUserAccess}, From bd2198ef4a2f814a1958387a8f6c33d09e1f6812 Mon Sep 17 00:00:00 2001 From: Andrew Berglund Date: Fri, 22 Jan 2021 08:47:21 -0500 Subject: [PATCH 17/17] COST-881 Use contains instead of in for arrays (#2604) * Use contains instead of in for Array fields when creating filters in query handler --- koku/api/query_handler.py | 16 ++++++++++++ koku/api/report/aws/query_handler.py | 25 ++++++++++--------- .../test/all/openshift/test_query_handler.py | 18 +++++++++++++ 3 files changed, 47 insertions(+), 12 deletions(-) diff --git a/koku/api/query_handler.py b/koku/api/query_handler.py index 71cc630783..163f881744 100644 --- a/koku/api/query_handler.py +++ b/koku/api/query_handler.py @@ -19,6 +19,7 @@ import logging from dateutil import relativedelta +from django.core.exceptions import FieldDoesNotExist from django.db.models.functions import TruncDay from django.db.models.functions import TruncMonth @@ -344,5 +345,20 @@ def set_access_filters(self, access, filt, filters): filters.add(q_filter) else: filt["operation"] = "in" + try: + check_field_type = None + if hasattr(self, "query_table"): + # Reports APIs + check_field_type = self.query_table._meta.get_field(filt.get("field", "")).get_internal_type() + elif hasattr(self, "data_sources"): + # Tags APIs + check_field_type = ( + self.data_sources[0].get("db_table")._meta.get_field(filt.get("field", "")).get_internal_type() + ) + if check_field_type == "ArrayField": + filt["operation"] = "contains" + except FieldDoesNotExist: + pass + q_filter = QueryFilter(parameter=access, **filt) filters.add(q_filter) diff --git a/koku/api/report/aws/query_handler.py b/koku/api/report/aws/query_handler.py index da74ca1478..9c9e657e70 100644 --- a/koku/api/report/aws/query_handler.py +++ b/koku/api/report/aws/query_handler.py @@ -543,19 +543,20 @@ def set_access_filters(self, access, filt, filters): # structure of the tree. Therefore, as long as the user has access to the root nodes # passed in by group_by[org_unit_id] then the user automatically has access to all # the sub orgs. - if access and "*" not in access: - allowed_ous = ( - AWSOrganizationalUnit.objects.filter( - reduce(operator.or_, (Q(org_unit_path__icontains=rbac) for rbac in access)) + with tenant_context(self.tenant): + if access and "*" not in access: + allowed_ous = ( + AWSOrganizationalUnit.objects.filter( + reduce(operator.or_, (Q(org_unit_path__icontains=rbac) for rbac in access)) + ) + .filter(account_alias__isnull=True) + .order_by("org_unit_id", "-created_timestamp") + .distinct("org_unit_id") ) - .filter(account_alias__isnull=True) - .order_by("org_unit_id", "-created_timestamp") - .distinct("org_unit_id") - ) - if allowed_ous: - access = list(allowed_ous.values_list("org_unit_id", flat=True)) - if not isinstance(filt, list) and filt["field"] == "organizational_unit__org_unit_path": - filt["field"] = "organizational_unit__org_unit_id" + if allowed_ous: + access = list(allowed_ous.values_list("org_unit_id", flat=True)) + if not isinstance(filt, list) and filt["field"] == "organizational_unit__org_unit_path": + filt["field"] = "organizational_unit__org_unit_id" super().set_access_filters(access, filt, filters) def total_sum(self, sum1, sum2): # noqa: C901 diff --git a/koku/api/report/test/all/openshift/test_query_handler.py b/koku/api/report/test/all/openshift/test_query_handler.py index 3eb4cff285..c5197ed994 100644 --- a/koku/api/report/test/all/openshift/test_query_handler.py +++ b/koku/api/report/test/all/openshift/test_query_handler.py @@ -18,6 +18,9 @@ from tenant_schemas.utils import tenant_context from api.iam.test.iam_test_case import IamTestCase +from api.iam.test.iam_test_case import RbacPermissions +from api.query_filter import QueryFilter +from api.query_filter import QueryFilterCollection from api.report.all.openshift.query_handler import OCPAllReportQueryHandler from api.urls import OCPAllCostView from api.urls import OCPAllInstanceTypeView @@ -187,3 +190,18 @@ def test_query_table(self): query_params = self.mocked_query_params(url, view) handler = OCPAllReportQueryHandler(query_params) self.assertEqual(handler.query_table, table) + + @RbacPermissions({"openshift.project": {"read": ["analytics"]}}) + def test_set_access_filters_with_array_field(self): + """Test that a filter is correctly set for arrays.""" + + query_params = self.mocked_query_params("?filter[project]=analytics", OCPAllCostView) + # the mocked query parameters dont include the key from the url so it needs to be added + handler = OCPAllReportQueryHandler(query_params) + field = "namespace" + access = ["analytics"] + filt = {"field": field} + filters = QueryFilterCollection() + handler.set_access_filters(access, filt, filters) + expected = [QueryFilter(field=field, operation="contains", parameter=access)] + self.assertEqual(filters._filters, expected)