From c92e575cf78c153608f0566c7954bd76a94d6638 Mon Sep 17 00:00:00 2001
From: Douglas Curtis <docurtis@redhat.com>
Date: Mon, 18 Jan 2021 14:00:18 -0500
Subject: [PATCH 01/17] GCP forecasts (#2590)

---
 docs/source/specs/openapi.json          | 45 ++++++++++++++
 koku/api/common/permissions/__init__.py |  3 +
 koku/api/forecast/serializers.py        |  4 ++
 koku/api/forecast/views.py              | 13 ++++
 koku/api/urls.py                        |  2 +
 koku/api/views.py                       |  1 +
 koku/forecast/__init__.py               |  1 +
 koku/forecast/forecast.py               |  8 +++
 koku/forecast/test/tests_forecast.py    | 83 +++++++++++++++++++++++++
 koku/sources/test/api/test_view.py      |  8 ++-
 10 files changed, 167 insertions(+), 1 deletion(-)

diff --git a/docs/source/specs/openapi.json b/docs/source/specs/openapi.json
index c9a87cc933..6f7058ae13 100644
--- a/docs/source/specs/openapi.json
+++ b/docs/source/specs/openapi.json
@@ -486,6 +486,51 @@
                 }]
             }
         },
+        "/forecasts/gcp/costs": {
+            "summary": "GCP Cost Forecasts",
+            "get": {
+                "tags":["Forecasts"],
+                "parameters": [{
+                    "$ref": "#/components/parameters/QueryFilter",
+                    "name":"QueryFilter"
+                }],
+                "responses": {
+                    "200": {
+                        "description": "An object describing the cost forecast.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/Forecast"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "description": "Request Error",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/Error"
+                                }
+                            }
+                        }
+                    },
+                    "500": {
+                        "description": "Unexpected Error",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/Error"
+                                }
+                            }
+                        }
+                    }
+                },
+                "security": [{
+                    "basic_auth": []
+                }]
+            }
+        },
         "/forecasts/openshift/costs": {
             "summary": "OpenShift Cost Forecasts",
             "get": {
diff --git a/koku/api/common/permissions/__init__.py b/koku/api/common/permissions/__init__.py
index c6dfe4b8a1..795fbb280b 100644
--- a/koku/api/common/permissions/__init__.py
+++ b/koku/api/common/permissions/__init__.py
@@ -16,6 +16,7 @@
 #
 from api.common.permissions.aws_access import AwsAccessPermission
 from api.common.permissions.azure_access import AzureAccessPermission
+from api.common.permissions.gcp_access import GcpAccessPermission
 from api.common.permissions.openshift_access import OpenShiftAccessPermission
 from api.provider.models import Provider
 
@@ -23,10 +24,12 @@
     AwsAccessPermission.resource_type,
     AzureAccessPermission.resource_type,
     OpenShiftAccessPermission.resource_type,
+    GcpAccessPermission.resource_type,
 ]
 
 RESOURCE_TYPE_MAP = {
     AwsAccessPermission.resource_type: [Provider.PROVIDER_AWS, Provider.PROVIDER_AWS_LOCAL],
     AzureAccessPermission.resource_type: [Provider.PROVIDER_AZURE, Provider.PROVIDER_AZURE_LOCAL],
     OpenShiftAccessPermission.resource_type: [Provider.PROVIDER_OCP],
+    GcpAccessPermission.resource_type: [Provider.PROVIDER_GCP, Provider.PROVIDER_GCP_LOCAL],
 }
diff --git a/koku/api/forecast/serializers.py b/koku/api/forecast/serializers.py
index 698b08d419..58e01c01b4 100644
--- a/koku/api/forecast/serializers.py
+++ b/koku/api/forecast/serializers.py
@@ -50,6 +50,10 @@ class AWSCostForecastParamSerializer(ForecastParamSerializer):
     """AWS Cost Forecast Serializer."""
 
 
+class GCPCostForecastParamSerializer(ForecastParamSerializer):
+    """GCP Cost Forecast Serializer."""
+
+
 class AzureCostForecastParamSerializer(ForecastParamSerializer):
     """Azure Cost Forecast Serializer."""
 
diff --git a/koku/api/forecast/views.py b/koku/api/forecast/views.py
index 94bc014e6c..e0c79910fb 100644
--- a/koku/api/forecast/views.py
+++ b/koku/api/forecast/views.py
@@ -27,10 +27,12 @@
 from api.common.pagination import ForecastListPaginator
 from api.common.permissions import AwsAccessPermission
 from api.common.permissions import AzureAccessPermission
+from api.common.permissions import GcpAccessPermission
 from api.common.permissions import OpenShiftAccessPermission
 from api.common.permissions.openshift_all_access import OpenshiftAllAccessPermission
 from api.forecast.serializers import AWSCostForecastParamSerializer
 from api.forecast.serializers import AzureCostForecastParamSerializer
+from api.forecast.serializers import GCPCostForecastParamSerializer
 from api.forecast.serializers import OCPAllCostForecastParamSerializer
 from api.forecast.serializers import OCPAWSCostForecastParamSerializer
 from api.forecast.serializers import OCPAzureCostForecastParamSerializer
@@ -38,11 +40,13 @@
 from api.query_params import QueryParameters
 from forecast import AWSForecast
 from forecast import AzureForecast
+from forecast import GCPForecast
 from forecast import OCPAllForecast
 from forecast import OCPAWSForecast
 from forecast import OCPAzureForecast
 from forecast import OCPForecast
 from reporting.models import AzureTagsSummary
+from reporting.models import GCPTagsSummary
 from reporting.models import OCPAWSTagsSummary
 from reporting.models import OCPAzureTagsSummary
 from reporting.models import OCPStorageVolumeLabelSummary
@@ -128,3 +132,12 @@ class OCPAllCostForecastView(ForecastView):
     query_handler = OCPAllForecast
     serializer = OCPAllCostForecastParamSerializer
     tag_handler = [OCPAWSTagsSummary, OCPAzureTagsSummary]
+
+
+class GCPForecastCostView(ForecastView):
+    """GCP Cost Forecast View."""
+
+    permission_classes = (GcpAccessPermission,)
+    query_handler = GCPForecast
+    serializer = GCPCostForecastParamSerializer
+    tag_handler = [GCPTagsSummary]
diff --git a/koku/api/urls.py b/koku/api/urls.py
index 2510e5784e..dbbb6c15c4 100644
--- a/koku/api/urls.py
+++ b/koku/api/urls.py
@@ -39,6 +39,7 @@
 from api.views import DataExportRequestViewSet
 from api.views import GCPAccountView
 from api.views import GCPCostView
+from api.views import GCPForecastCostView
 from api.views import GCPProjectsView
 from api.views import GCPTagView
 from api.views import metrics
@@ -317,6 +318,7 @@
     path("resource-types/openshift-nodes/", OCPNodesView.as_view(), name="openshift-nodes"),
     path("resource-types/cost-models/", CostModelResourceTypesView.as_view(), name="cost-models"),
     path("forecasts/aws/costs/", AWSCostForecastView.as_view(), name="aws-cost-forecasts"),
+    path("forecasts/gcp/costs/", GCPForecastCostView.as_view(), name="gcp-cost-forecasts"),
     path("forecasts/azure/costs/", AzureCostForecastView.as_view(), name="azure-cost-forecasts"),
     path("forecasts/openshift/costs/", OCPCostForecastView.as_view(), name="openshift-cost-forecasts"),
     path(
diff --git a/koku/api/views.py b/koku/api/views.py
index 1f8096a84c..502b4e11a2 100644
--- a/koku/api/views.py
+++ b/koku/api/views.py
@@ -20,6 +20,7 @@
 from api.dataexport.views import DataExportRequestViewSet
 from api.forecast.views import AWSCostForecastView
 from api.forecast.views import AzureCostForecastView
+from api.forecast.views import GCPForecastCostView
 from api.forecast.views import OCPAllCostForecastView
 from api.forecast.views import OCPAWSCostForecastView
 from api.forecast.views import OCPAzureCostForecastView
diff --git a/koku/forecast/__init__.py b/koku/forecast/__init__.py
index adb572e538..5c0e38ba9c 100644
--- a/koku/forecast/__init__.py
+++ b/koku/forecast/__init__.py
@@ -18,6 +18,7 @@
 from .forecast import AWSForecast  # noqa: F401
 from .forecast import AzureForecast  # noqa: F401
 from .forecast import Forecast  # noqa: F401
+from .forecast import GCPForecast  # noqa: F401
 from .forecast import OCPAllForecast  # noqa: F401
 from .forecast import OCPAWSForecast  # noqa: F401
 from .forecast import OCPAzureForecast  # noqa: F401
diff --git a/koku/forecast/forecast.py b/koku/forecast/forecast.py
index 8e466493fd..3beccada86 100644
--- a/koku/forecast/forecast.py
+++ b/koku/forecast/forecast.py
@@ -38,6 +38,7 @@
 from api.report.aws.provider_map import AWSProviderMap
 from api.report.azure.openshift.provider_map import OCPAzureProviderMap
 from api.report.azure.provider_map import AzureProviderMap
+from api.report.gcp.provider_map import GCPProviderMap
 from api.report.ocp.provider_map import OCPProviderMap
 from api.utils import DateHelper
 from reporting.provider.aws.models import AWSOrganizationalUnit
@@ -557,3 +558,10 @@ class OCPAllForecast(Forecast):
 
     provider = Provider.OCP_ALL
     provider_map_class = OCPAllProviderMap
+
+
+class GCPForecast(Forecast):
+    """GCP forecasting class."""
+
+    provider = Provider.PROVIDER_GCP
+    provider_map_class = GCPProviderMap
diff --git a/koku/forecast/test/tests_forecast.py b/koku/forecast/test/tests_forecast.py
index 5abb24be83..1872c47b31 100644
--- a/koku/forecast/test/tests_forecast.py
+++ b/koku/forecast/test/tests_forecast.py
@@ -28,6 +28,7 @@
 
 from api.forecast.views import AWSCostForecastView
 from api.forecast.views import AzureCostForecastView
+from api.forecast.views import GCPForecastCostView
 from api.forecast.views import OCPAllCostForecastView
 from api.forecast.views import OCPAWSCostForecastView
 from api.forecast.views import OCPAzureCostForecastView
@@ -39,11 +40,15 @@
 from api.utils import DateHelper
 from forecast import AWSForecast
 from forecast import AzureForecast
+from forecast import GCPForecast
 from forecast import OCPAllForecast
 from forecast import OCPAWSForecast
 from forecast import OCPAzureForecast
 from forecast import OCPForecast
 from forecast.forecast import LinearForecastResult
+from reporting.provider.gcp.models import GCPCostSummary
+from reporting.provider.gcp.models import GCPCostSummaryByAccount
+from reporting.provider.gcp.models import GCPCostSummaryByProject
 from reporting.provider.ocp.models import OCPCostSummary
 from reporting.provider.ocp.models import OCPCostSummaryByNode
 from reporting.provider.ocp.models import OCPUsageLineItemDailySummary
@@ -452,6 +457,84 @@ def test_predict_flat(self):
                         self.assertGreaterEqual(float(pval), 0)
 
 
+class GCPForecastTest(IamTestCase):
+    """Tests the GCPForecast class."""
+
+    def test_predict_flat(self):
+        """Test that predict() returns expected values for flat costs."""
+        dh = DateHelper()
+
+        expected = []
+        for n in range(0, 10):
+            # the test data needs to include some jitter to avoid
+            # division-by-zero in the underlying dot-product maths.
+            expected.append(
+                {
+                    "usage_start": (dh.this_month_start + timedelta(days=n)).date(),
+                    "total_cost": 5 + random.random(),
+                    "infrastructure_cost": 3 + random.random(),
+                    "supplementary_cost": 2 + random.random(),
+                }
+            )
+        mock_qset = MockQuerySet(expected)
+
+        mocked_table = Mock()
+        mocked_table.objects.filter.return_value.order_by.return_value.values.return_value.annotate.return_value = (  # noqa: E501
+            mock_qset
+        )
+        mocked_table.len = mock_qset.len
+
+        params = self.mocked_query_params("?", AzureCostForecastView)
+        instance = GCPForecast(params)
+
+        instance.cost_summary_table = mocked_table
+
+        results = instance.predict()
+
+        for result in results:
+            for val in result.get("values", []):
+                self.assertIsInstance(val.get("date"), date)
+
+                for item, cost in [
+                    (val.get("cost"), 5),
+                    (val.get("infrastructure"), 3),
+                    (val.get("supplementary"), 2),
+                ]:
+                    self.assertAlmostEqual(float(item.get("total").get("value")), cost, delta=2.2)
+                    self.assertAlmostEqual(float(item.get("confidence_max").get("value")), cost, delta=2.2)
+                    self.assertAlmostEqual(float(item.get("confidence_min").get("value")), cost, delta=2.2)
+                    self.assertAlmostEqual(float(item.get("rsquared").get("value")), 1, delta=2.2)
+                    for pval in item.get("pvalues").get("value"):
+                        self.assertGreaterEqual(float(pval), 0)
+
+    def test_cost_summary_table(self):
+        """Test that we select a valid table or view."""
+        params = self.mocked_query_params("?", GCPForecastCostView)
+        forecast = GCPForecast(params)
+        self.assertEqual(forecast.cost_summary_table, GCPCostSummary)
+
+        params = self.mocked_query_params("?", GCPForecastCostView, access={"gcp.account": {"read": ["1"]}})
+        forecast = GCPForecast(params)
+        self.assertEqual(forecast.cost_summary_table, GCPCostSummaryByAccount)
+
+        params = self.mocked_query_params("?", GCPForecastCostView, access={"gcp.project": {"read": ["1"]}})
+        forecast = GCPForecast(params)
+        self.assertEqual(forecast.cost_summary_table, GCPCostSummaryByProject)
+
+        params = self.mocked_query_params(
+            "?", GCPForecastCostView, access={"gcp.account": {"read": ["1"]}, "gcp.project": {"read": ["1"]}}
+        )
+        forecast = GCPForecast(params)
+        self.assertEqual(forecast.cost_summary_table, GCPCostSummaryByProject)
+
+        params = self.mocked_query_params(
+            "?", GCPForecastCostView, access={"gcp.account": {"read": ["1"]}, "gcp.project": {"read": ["1"]}}
+        )
+
+        forecast = GCPForecast(params)
+        self.assertEqual(forecast.cost_summary_table, GCPCostSummaryByProject)
+
+
 class OCPForecastTest(IamTestCase):
     """Tests the OCPForecast class."""
 
diff --git a/koku/sources/test/api/test_view.py b/koku/sources/test/api/test_view.py
index a8df996b9c..559a72f15d 100644
--- a/koku/sources/test/api/test_view.py
+++ b/koku/sources/test/api/test_view.py
@@ -331,5 +331,11 @@ def test_sources_access(self):
         mock_user = Mock(admin=False, access=permissions)
         request = Mock(user=mock_user)
         excluded = SourcesViewSet.get_excludes(request)
-        expected = [Provider.PROVIDER_AZURE, Provider.PROVIDER_AZURE_LOCAL, Provider.PROVIDER_OCP]
+        expected = [
+            Provider.PROVIDER_AZURE,
+            Provider.PROVIDER_AZURE_LOCAL,
+            Provider.PROVIDER_OCP,
+            Provider.PROVIDER_GCP,
+            Provider.PROVIDER_GCP_LOCAL,
+        ]
         self.assertEqual(excluded, expected)

From 827ad8bec3f67b615b14d6ecfc1fdda834311636 Mon Sep 17 00:00:00 2001
From: Brett Lentz <blentz@users.noreply.github.com>
Date: Mon, 18 Jan 2021 14:15:25 -0500
Subject: [PATCH 02/17] COST-854: pass exog vars to wls_prediction_std (#2592)

---
 koku/forecast/forecast.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/koku/forecast/forecast.py b/koku/forecast/forecast.py
index 3beccada86..495789ef61 100644
--- a/koku/forecast/forecast.py
+++ b/koku/forecast/forecast.py
@@ -347,7 +347,7 @@ def _run_forecast(self, x, y):
         x = sm.add_constant(x)
         model = sm.OLS(y, x)
         results = model.fit()
-        return LinearForecastResult(results)
+        return LinearForecastResult(results, exog=x)
 
     def _uniquify_qset(self, qset, field="total_cost"):
         """Take a QuerySet list, sum costs within the same day, and arrange it into a list of tuples.
@@ -395,14 +395,15 @@ class LinearForecastResult:
     Note: this class should be considered read-only
     """
 
-    def __init__(self, regression_result):
+    def __init__(self, regression_result, exog=None):
         """Class constructor.
 
         Args:
             regression_result (RegressionResult) the results of a statsmodels regression
+            exog (array-like) exogenous variables for points to predict
         """
         self._regression_result = regression_result
-        self._std_err, self._conf_lower, self._conf_upper = wls_prediction_std(regression_result)
+        self._std_err, self._conf_lower, self._conf_upper = wls_prediction_std(regression_result, exog=exog)
 
         try:
             LOG.debug(regression_result.summary())

From a8b04e6dbd30d5a7f598ed66db3552e4b8cc3776 Mon Sep 17 00:00:00 2001
From: Brett Lentz <blentz@users.noreply.github.com>
Date: Mon, 18 Jan 2021 15:49:50 -0500
Subject: [PATCH 03/17] COST-848: fix use of DateHandler (#2593)

---
 koku/api/organizations/queries.py    |  3 -
 koku/api/query_handler.py            | 18 +++---
 koku/api/tags/queries.py             |  3 -
 koku/forecast/forecast.py            |  3 +-
 koku/forecast/test/tests_forecast.py | 84 +++++++++++++++++-----------
 5 files changed, 60 insertions(+), 51 deletions(-)

diff --git a/koku/api/organizations/queries.py b/koku/api/organizations/queries.py
index 9000aaf660..8a61347d0d 100644
--- a/koku/api/organizations/queries.py
+++ b/koku/api/organizations/queries.py
@@ -28,7 +28,6 @@
 from api.query_filter import QueryFilter
 from api.query_filter import QueryFilterCollection
 from api.query_handler import QueryHandler
-from api.utils import DateHelper
 
 
 LOG = logging.getLogger(__name__)
@@ -71,8 +70,6 @@ class OrgQueryHandler(QueryHandler):
     SUPPORTED_FILTERS = []
     FILTER_MAP = {}
 
-    dh = DateHelper()
-
     def __init__(self, parameters):
         """Establish org query handler.
 
diff --git a/koku/api/query_handler.py b/koku/api/query_handler.py
index 8b792b22c3..71cc630783 100644
--- a/koku/api/query_handler.py
+++ b/koku/api/query_handler.py
@@ -59,6 +59,7 @@ def __init__(self, parameters):
 
         """
         LOG.debug(f"Query Params: {parameters}")
+        self.dh = DateHelper()
         parameters = self.filter_to_order_by(parameters)
         self.tenant = parameters.tenant
         self.access = parameters.access
@@ -215,25 +216,24 @@ def _get_timeframe(self):
         time_scope_units = self.get_time_scope_units()
         start = None
         end = None
-        dh = DateHelper()
         if time_scope_units == "month":
             if time_scope_value == -1:
                 # get current month
-                start = dh.this_month_start
-                end = dh.today
+                start = self.dh.this_month_start
+                end = self.dh.today
             else:
                 # get previous month
-                start = dh.last_month_start
-                end = dh.last_month_end
+                start = self.dh.last_month_start
+                end = self.dh.last_month_end
         else:
             if time_scope_value == -10:
                 # get last 10 days
-                start = dh.n_days_ago(dh.this_hour, 9)
-                end = dh.this_hour
+                start = self.dh.n_days_ago(self.dh.this_hour, 9)
+                end = self.dh.this_hour
             else:
                 # get last 30 days
-                start = dh.n_days_ago(dh.this_hour, 29)
-                end = dh.this_hour
+                start = self.dh.n_days_ago(self.dh.this_hour, 29)
+                end = self.dh.this_hour
 
         self.start_datetime = start
         self.end_datetime = end
diff --git a/koku/api/tags/queries.py b/koku/api/tags/queries.py
index aca0dbea82..732a5bb3fb 100644
--- a/koku/api/tags/queries.py
+++ b/koku/api/tags/queries.py
@@ -24,7 +24,6 @@
 from api.query_filter import QueryFilter
 from api.query_filter import QueryFilterCollection
 from api.query_handler import QueryHandler
-from api.utils import DateHelper
 
 LOG = logging.getLogger(__name__)
 
@@ -69,8 +68,6 @@ class TagQueryHandler(QueryHandler):
         "value": {"field": "value", "operation": "icontains", "composition_key": "value_filter"},
     }
 
-    dh = DateHelper()
-
     def __init__(self, parameters):
         """Establish tag query handler.
 
diff --git a/koku/forecast/forecast.py b/koku/forecast/forecast.py
index 495789ef61..225a2be28e 100644
--- a/koku/forecast/forecast.py
+++ b/koku/forecast/forecast.py
@@ -62,8 +62,6 @@ class Forecast:
 
     REPORT_TYPE = "costs"
 
-    dh = DateHelper()
-
     def __init__(self, query_params):  # noqa: C901
         """Class Constructor.
 
@@ -73,6 +71,7 @@ def __init__(self, query_params):  # noqa: C901
             - filters (QueryFilterCollection)
             - query_range (tuple)
         """
+        self.dh = DateHelper()
         self.params = query_params
 
         # select appropriate model based on access
diff --git a/koku/forecast/test/tests_forecast.py b/koku/forecast/test/tests_forecast.py
index 1872c47b31..b9f0b5897a 100644
--- a/koku/forecast/test/tests_forecast.py
+++ b/koku/forecast/test/tests_forecast.py
@@ -85,51 +85,67 @@ def test_constructor(self):
 
     def test_forecast_days_required(self):
         """Test that we accurately select the number of days."""
-        dh = DateHelper()
         params = self.mocked_query_params("?", AWSCostForecastView)
-        with patch("forecast.forecast.Forecast.dh") as mock_dh:
-            mock_dh.today = dh.this_month_start
-            mock_dh.this_month_start = dh.this_month_start
-            mock_dh.this_month_end = dh.this_month_end
-            mock_dh.last_month_start = dh.last_month_start
-            mock_dh.last_month_end = dh.last_month_end
+
+        mock_dh = Mock(spec=DateHelper)
+
+        mock_dh.return_value.today = datetime(2000, 1, 1, 0, 0, 0, 0)
+        mock_dh.return_value.yesterday = datetime(1999, 12, 31, 0, 0, 0, 0)
+        mock_dh.return_value.this_month_start = datetime(2000, 1, 1, 0, 0, 0, 0)
+        mock_dh.return_value.this_month_end = datetime(2000, 1, 31, 0, 0, 0, 0)
+        mock_dh.return_value.last_month_start = datetime(1999, 12, 1, 0, 0, 0, 0)
+        mock_dh.return_value.last_month_end = datetime(1999, 12, 31, 0, 0, 0, 0)
+
+        with patch("forecast.forecast.DateHelper", new_callable=lambda: mock_dh) as mock_dh:
             forecast = AWSForecast(params)
-            self.assertEqual(forecast.forecast_days_required, dh.this_month_end.day)
-
-        with patch("forecast.forecast.Forecast.dh") as mock_dh:
-            mock_dh.today = datetime(2000, 1, 13, 0, 0, 0, 0)
-            mock_dh.yesterday = datetime(2000, 1, 12, 0, 0, 0, 0)
-            mock_dh.this_month_start = datetime(2000, 1, 1, 0, 0, 0, 0)
-            mock_dh.this_month_end = datetime(2000, 1, 31, 0, 0, 0, 0)
-            mock_dh.last_month_start = datetime(1999, 12, 1, 0, 0, 0, 0)
-            mock_dh.last_month_end = datetime(1999, 12, 31, 0, 0, 0, 0)
+            self.assertEqual(forecast.forecast_days_required, 31)
+
+        mock_dh.return_value.today = datetime(2000, 1, 13, 0, 0, 0, 0)
+        mock_dh.return_value.yesterday = datetime(2000, 1, 12, 0, 0, 0, 0)
+        mock_dh.return_value.this_month_start = datetime(2000, 1, 1, 0, 0, 0, 0)
+        mock_dh.return_value.this_month_end = datetime(2000, 1, 31, 0, 0, 0, 0)
+        mock_dh.return_value.last_month_start = datetime(1999, 12, 1, 0, 0, 0, 0)
+        mock_dh.return_value.last_month_end = datetime(1999, 12, 31, 0, 0, 0, 0)
+
+        with patch("forecast.forecast.DateHelper", new_callable=lambda: mock_dh) as mock_dh:
             forecast = AWSForecast(params)
             self.assertEqual(forecast.forecast_days_required, 19)
 
-    def test_query_range(self):
+    def test_query_range_under(self):
         """Test that we select the correct range based on day of month."""
-        dh = DateHelper()
         params = self.mocked_query_params("?", AWSCostForecastView)
 
-        with patch("forecast.forecast.Forecast.dh") as mock_dh:
-            mock_dh.today = dh.this_month_start + timedelta(days=AWSForecast.MINIMUM - 1)
-            mock_dh.yesterday = dh.this_month_start + timedelta(days=AWSForecast.MINIMUM - 2)
-            mock_dh.this_month_start = dh.this_month_start
-            mock_dh.this_month_end = dh.this_month_end
-            mock_dh.last_month_start = dh.last_month_start
-            mock_dh.last_month_end = dh.last_month_end
-            expected = (dh.last_month_start, mock_dh.yesterday)
+        dh = DateHelper()
+        mock_dh = Mock(spec=DateHelper)
+
+        mock_dh.return_value.today = dh.this_month_start + timedelta(days=AWSForecast.MINIMUM - 1)
+        mock_dh.return_value.yesterday = dh.this_month_start + timedelta(days=AWSForecast.MINIMUM - 2)
+        mock_dh.return_value.this_month_start = dh.this_month_start
+        mock_dh.return_value.this_month_end = dh.this_month_end
+        mock_dh.return_value.last_month_start = dh.last_month_start
+        mock_dh.return_value.last_month_end = dh.last_month_end
+
+        with patch("forecast.forecast.DateHelper", new_callable=lambda: mock_dh) as mock_dh:
+            expected = (dh.last_month_start, dh.this_month_start + timedelta(days=AWSForecast.MINIMUM - 2))
             forecast = AWSForecast(params)
             self.assertEqual(forecast.query_range, expected)
 
-        with patch("forecast.forecast.Forecast.dh") as mock_dh:
-            mock_dh.today = dh.this_month_start + timedelta(days=(AWSForecast.MINIMUM))
-            mock_dh.yesterday = dh.this_month_start + timedelta(days=AWSForecast.MINIMUM - 1)
-            mock_dh.this_month_start = dh.this_month_start
-            mock_dh.this_month_end = dh.this_month_end
-            mock_dh.last_month_start = dh.last_month_start
-            mock_dh.last_month_end = dh.last_month_end
-            expected = (dh.this_month_start, dh.this_month_start + timedelta(days=AWSForecast.MINIMUM - 1))
+    def test_query_range_over(self):
+        """Test that we select the correct range based on day of month."""
+        params = self.mocked_query_params("?", AWSCostForecastView)
+
+        dh = DateHelper()
+        mock_dh = Mock(spec=DateHelper)
+
+        mock_dh.return_value.today = dh.this_month_start + timedelta(days=(AWSForecast.MINIMUM + 1))
+        mock_dh.return_value.yesterday = dh.this_month_start + timedelta(days=AWSForecast.MINIMUM)
+        mock_dh.return_value.this_month_start = dh.this_month_start
+        mock_dh.return_value.this_month_end = dh.this_month_end
+        mock_dh.return_value.last_month_start = dh.last_month_start
+        mock_dh.return_value.last_month_end = dh.last_month_end
+
+        with patch("forecast.forecast.DateHelper", new_callable=lambda: mock_dh) as mock_dh:
+            expected = (dh.this_month_start, dh.this_month_start + timedelta(days=AWSForecast.MINIMUM))
             forecast = AWSForecast(params)
             self.assertEqual(forecast.query_range, expected)
 

From 3a42a67fe60c3b718506107e6c4a4324b7d2cbdb Mon Sep 17 00:00:00 2001
From: Douglas Curtis <docurtis@redhat.com>
Date: Tue, 19 Jan 2021 11:37:42 -0500
Subject: [PATCH 04/17] Updating GCP size of usage_amount in daily table
 (#2595)

---
 .../migrations/0161_auto_20210118_2113.py        | 16 ++++++++++++++++
 koku/reporting/provider/gcp/models.py            |  2 +-
 2 files changed, 17 insertions(+), 1 deletion(-)
 create mode 100644 koku/reporting/migrations/0161_auto_20210118_2113.py

diff --git a/koku/reporting/migrations/0161_auto_20210118_2113.py b/koku/reporting/migrations/0161_auto_20210118_2113.py
new file mode 100644
index 0000000000..5aced67db4
--- /dev/null
+++ b/koku/reporting/migrations/0161_auto_20210118_2113.py
@@ -0,0 +1,16 @@
+# Generated by Django 3.1.3 on 2021-01-18 21:13
+from django.db import migrations
+from django.db import models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [("reporting", "0160_auto_20210114_1548")]
+
+    operations = [
+        migrations.AlterField(
+            model_name="gcpcostentrylineitemdaily",
+            name="usage_amount",
+            field=models.DecimalField(decimal_places=15, max_digits=33, null=True),
+        )
+    ]
diff --git a/koku/reporting/provider/gcp/models.py b/koku/reporting/provider/gcp/models.py
index b4861ac96a..b3b563014d 100644
--- a/koku/reporting/provider/gcp/models.py
+++ b/koku/reporting/provider/gcp/models.py
@@ -137,7 +137,7 @@ class Meta:
     cost = models.DecimalField(max_digits=24, decimal_places=9, null=True, blank=True)
     currency = models.CharField(max_length=256, null=True, blank=True)
     conversion_rate = models.CharField(max_length=256, null=True, blank=True)
-    usage_amount = models.DecimalField(max_digits=24, decimal_places=9, null=True)
+    usage_amount = models.DecimalField(max_digits=33, decimal_places=15, null=True)
     usage_unit = models.CharField(max_length=256, null=True, blank=True)
     usage_in_pricing_units = models.DecimalField(max_digits=24, decimal_places=9, null=True)
     usage_pricing_unit = models.CharField(max_length=256, null=True, blank=True)

From 62f715666c06aaf13c9d93b47c5b7cd29ecd9364 Mon Sep 17 00:00:00 2001
From: Andrew Berglund <aberglun@redhat.com>
Date: Tue, 19 Jan 2021 14:11:27 -0500
Subject: [PATCH 05/17] COST-891 aws report processor memory bugfix (#2597)

* Add string split util
* Bugfix for processing memory for aws products
---
 .../processor/aws/aws_report_processor.py     | 38 +++++++++++++------
 .../aws/test_aws_report_processor.py          | 28 ++++++++++++++
 koku/masu/test/util/test_common.py            |  8 ++++
 koku/masu/util/common.py                      |  6 +++
 4 files changed, 68 insertions(+), 12 deletions(-)

diff --git a/koku/masu/processor/aws/aws_report_processor.py b/koku/masu/processor/aws/aws_report_processor.py
index 0567c626fd..5d8c809e1f 100644
--- a/koku/masu/processor/aws/aws_report_processor.py
+++ b/koku/masu/processor/aws/aws_report_processor.py
@@ -28,6 +28,7 @@
 from masu.config import Config
 from masu.database.aws_report_db_accessor import AWSReportDBAccessor
 from masu.processor.report_processor_base import ReportProcessorBase
+from masu.util.common import split_alphanumeric_string
 from reporting.provider.aws.models import AWSCostEntry
 from reporting.provider.aws.models import AWSCostEntryBill
 from reporting.provider.aws.models import AWSCostEntryLineItem
@@ -222,6 +223,30 @@ def _update_mappings(self):
 
         self.processed_report.remove_processed_rows()
 
+    def _process_memory_value(self, data):
+        """Parse out value and unit from memory strings."""
+        if "memory" in data and data["memory"] is not None:
+            unit = None
+            try:
+                memory = float(data["memory"])
+            except ValueError:
+                memory = None
+                # Memory can come as a single number or a number with a unit
+                # e.g. "1", "1GB", "1 Gb" so it gets special cased.
+                memory_list = list(split_alphanumeric_string(data["memory"]))
+                if memory_list:
+                    memory = memory_list[0]
+                    if len(memory_list) > 1:
+                        unit = memory_list[1]
+            try:
+                memory = float(memory)
+            except (ValueError, TypeError):
+                memory = None
+                unit = None
+            data["memory"] = memory
+            data["memory_unit"] = unit
+        return data
+
     def _get_data_for_table(self, row, table_name):
         """Extract the data from a row for a specific table.
 
@@ -233,18 +258,6 @@ def _get_data_for_table(self, row, table_name):
             (dict): The data from the row keyed on the DB table's column names
 
         """
-        # Memory can come as a single number or a number with a unit
-        # e.g. "1" vs. "1 Gb" so it gets special cased.
-        if "product/memory" in row and row["product/memory"] is not None:
-            memory_list = row["product/memory"].split(" ")
-            if len(memory_list) > 1:
-                memory, unit = row["product/memory"].split(" ")
-            else:
-                memory = memory_list[0]
-                unit = None
-            row["product/memory"] = memory
-            row["product/memory_unit"] = unit
-
         column_map = REPORT_COLUMN_MAP[table_name]
 
         return {column_map[key]: value for key, value in row.items() if key in column_map}
@@ -436,6 +449,7 @@ def _create_cost_entry_product(self, row, report_db_accessor):
             return self.existing_product_map[key]
 
         data = self._get_data_for_table(row, table_name._meta.db_table)
+        data = self._process_memory_value(data)
         value_set = set(data.values())
         if value_set == {""}:
             return
diff --git a/koku/masu/test/processor/aws/test_aws_report_processor.py b/koku/masu/test/processor/aws/test_aws_report_processor.py
index 8f66c78e8b..44f384bffb 100644
--- a/koku/masu/test/processor/aws/test_aws_report_processor.py
+++ b/koku/masu/test/processor/aws/test_aws_report_processor.py
@@ -1228,3 +1228,31 @@ def test_get_date_column_filter(self):
         date_filter = processor.get_date_column_filter()
 
         self.assertIn("usage_start__gte", date_filter)
+
+    def test_process_memory_value(self):
+        """Test that product data has memory properly parsed."""
+
+        data = {"memory": None}
+        result = self.processor._process_memory_value(data)
+        self.assertIsNone(result.get("memory"))
+        self.assertIsNone(result.get("memory_unit"))
+
+        data = {"memory": "NA"}
+        result = self.processor._process_memory_value(data)
+        self.assertIsNone(result.get("memory"))
+        self.assertIsNone(result.get("memory_unit"))
+
+        data = {"memory": "4GiB"}
+        result = self.processor._process_memory_value(data)
+        self.assertEqual(result.get("memory"), 4)
+        self.assertEqual(result.get("memory_unit"), "GiB")
+
+        data = {"memory": "4 GB"}
+        result = self.processor._process_memory_value(data)
+        self.assertEqual(result.get("memory"), 4)
+        self.assertEqual(result.get("memory_unit"), "GB")
+
+        data = {"memory": "4"}
+        result = self.processor._process_memory_value(data)
+        self.assertEqual(result.get("memory"), 4)
+        self.assertIsNone(result.get("memory_unit"))
diff --git a/koku/masu/test/util/test_common.py b/koku/masu/test/util/test_common.py
index 286b29ffce..9ddcc28a54 100644
--- a/koku/masu/test/util/test_common.py
+++ b/koku/masu/test/util/test_common.py
@@ -282,6 +282,14 @@ def test_determine_if_full_summary_update_needed(self):
             # Current month, has not been summarized before
             self.assertTrue(common_utils.determine_if_full_summary_update_needed(current_month_bill))
 
+    def test_split_alphanumeric_string(self):
+        """Test the alpha-numeric split function."""
+        s = "4 GiB"
+
+        expected = ["4 ", "GiB"]
+        result = list(common_utils.split_alphanumeric_string(s))
+        self.assertEqual(result, expected)
+
 
 class NamedTemporaryGZipTests(TestCase):
     """Tests for NamedTemporaryGZip."""
diff --git a/koku/masu/util/common.py b/koku/masu/util/common.py
index c09a8d88c9..35c6fba4af 100644
--- a/koku/masu/util/common.py
+++ b/koku/masu/util/common.py
@@ -22,6 +22,7 @@
 import logging
 import re
 from datetime import timedelta
+from itertools import groupby
 from os import remove
 from tempfile import gettempdir
 from uuid import uuid4
@@ -362,3 +363,8 @@ def determine_if_full_summary_update_needed(bill):
         return True
 
     return False
+
+
+def split_alphanumeric_string(s):
+    for k, g in groupby(s, str.isalpha):
+        yield "".join(g)

From b4060d1cdbeb23dd1c75682658316797c5e0c613 Mon Sep 17 00:00:00 2001
From: Andrew Berglund <aberglun@redhat.com>
Date: Tue, 19 Jan 2021 15:27:29 -0500
Subject: [PATCH 06/17] COST-442 ocp on azure presto summary (#2523)

* Add processing of OCP on Azure via Presto
* Update OCP flow to do tag enablement after upload of data
* Use postgres to do OCP tag summary
Co-authored-by: Douglas Curtis <docurtis@redhat.com>
Co-authored-by: HAP <hproctor@redhat.com>
---
 .../masu/database/azure_report_db_accessor.py |   23 +-
 koku/masu/database/ocp_report_db_accessor.py  |   27 +-
 .../reporting_ocp_lineitem_daily_summary.sql  |  404 ------
 ...rting_ocpawscostlineitem_daily_summary.sql |   12 +-
 ...ing_ocpazurecostlineitem_daily_summary.sql | 1195 +++++++++++++++++
 ...porting_ocpusagelineitem_daily_summary.sql |  387 ++++++
 ...ing_ocpazurecostlineitem_daily_summary.sql |  430 +++---
 ...rting_ocpstoragelineitem_daily_summary.sql |   34 +-
 ...eporting_ocpstoragevolumelabel_summary.sql |    7 +-
 ...porting_ocpusagelineitem_daily_summary.sql |   29 +-
 ...item_daily_summary_update_enabled_tags.sql |   86 ++
 .../reporting_ocpusagepodlabel_summary.sql    |   13 +-
 .../ocp/ocp_cloud_parquet_summary_updater.py  |   41 +-
 .../ocp/ocp_report_parquet_processor.py       |    4 +
 .../ocp/ocp_report_parquet_summary_updater.py |    3 +-
 .../ocp/ocp_report_summary_updater.py         |    1 +
 .../database/test_azure_report_db_accessor.py |   26 +
 .../database/test_ocp_report_db_accessor.py   |   50 +
 ...cp_cloud_parquet_report_summary_updater.py |  119 +-
 .../ocp/test_ocp_report_parquet_processor.py  |    6 +-
 .../migrations/0162_auto_20201120_1901.py     |   12 +
 .../provider/azure/openshift/models.py        |    4 -
 .../nise_ymls/ocp_on_aws/ocp_static_data.yml  |    3 +
 .../ocp_on_azure/ocp_static_data.yml          |    4 +
 24 files changed, 2101 insertions(+), 819 deletions(-)
 delete mode 100644 koku/masu/database/presto_sql/reporting_ocp_lineitem_daily_summary.sql
 create mode 100644 koku/masu/database/presto_sql/reporting_ocpazurecostlineitem_daily_summary.sql
 create mode 100644 koku/masu/database/presto_sql/reporting_ocpusagelineitem_daily_summary.sql
 create mode 100644 koku/masu/database/sql/reporting_ocpusagelineitem_daily_summary_update_enabled_tags.sql
 create mode 100644 koku/reporting/migrations/0162_auto_20201120_1901.py

diff --git a/koku/masu/database/azure_report_db_accessor.py b/koku/masu/database/azure_report_db_accessor.py
index 3b275ee103..495566cf16 100644
--- a/koku/masu/database/azure_report_db_accessor.py
+++ b/koku/masu/database/azure_report_db_accessor.py
@@ -224,7 +224,7 @@ def get_summary_query_for_billid(self, bill_id):
             return summary_item_query
 
     def populate_ocp_on_azure_cost_daily_summary(self, start_date, end_date, cluster_id, bill_ids, markup_value):
-        """Populate the daily cost aggregated summary for OCP on AWS.
+        """Populate the daily cost aggregated summary for OCP on Azure.
 
         Args:
             start_date (datetime.date) The date to start populating the table.
@@ -262,6 +262,27 @@ def populate_ocp_on_azure_tags_summary_table(self):
         agg_sql, agg_sql_params = self.jinja_sql.prepare_query(agg_sql, agg_sql_params)
         self._execute_raw_sql_query(table_name, agg_sql, bind_params=list(agg_sql_params))
 
+    def populate_ocp_on_azure_cost_daily_summary_presto(
+        self, start_date, end_date, openshift_provider_uuid, azure_provider_uuid, cluster_id, bill_id, markup_value
+    ):
+        """Populate the daily cost aggregated summary for OCP on Azure."""
+        summary_sql = pkgutil.get_data("masu.database", "presto_sql/reporting_ocpazurecostlineitem_daily_summary.sql")
+        summary_sql = summary_sql.decode("utf-8")
+        summary_sql_params = {
+            "uuid": str(openshift_provider_uuid).replace("-", "_"),
+            "schema": self.schema,
+            "start_date": start_date,
+            "end_date": end_date,
+            "year": start_date.strftime("%Y"),
+            "month": start_date.strftime("%m"),
+            "azure_source_uuid": azure_provider_uuid,
+            "ocp_source_uuid": openshift_provider_uuid,
+            "cluster_id": cluster_id,
+            "bill_id": bill_id,
+            "markup": markup_value,
+        }
+        self._execute_presto_multipart_sql_query(self.schema, summary_sql, bind_params=summary_sql_params)
+
     def populate_enabled_tag_keys(self, start_date, end_date, bill_ids):
         """Populate the enabled tag key table.
         Args:
diff --git a/koku/masu/database/ocp_report_db_accessor.py b/koku/masu/database/ocp_report_db_accessor.py
index 408a695508..811b438945 100644
--- a/koku/masu/database/ocp_report_db_accessor.py
+++ b/koku/masu/database/ocp_report_db_accessor.py
@@ -343,6 +343,31 @@ def populate_line_item_daily_table(self, start_date, end_date, cluster_id):
         daily_sql, daily_sql_params = self.jinja_sql.prepare_query(daily_sql, daily_sql_params)
         self._execute_raw_sql_query(table_name, daily_sql, start_date, end_date, bind_params=list(daily_sql_params))
 
+    def update_line_item_daily_summary_with_enabled_tags(self, start_date, end_date, report_period_ids):
+        """Populate the enabled tag key table.
+        Args:
+            start_date (datetime.date) The date to start populating the table.
+            end_date (datetime.date) The date to end on.
+            bill_ids (list) A list of bill IDs.
+        Returns
+            (None)
+        """
+        table_name = OCP_REPORT_TABLE_MAP["line_item_daily_summary"]
+        summary_sql = pkgutil.get_data(
+            "masu.database", "sql/reporting_ocpusagelineitem_daily_summary_update_enabled_tags.sql"
+        )
+        summary_sql = summary_sql.decode("utf-8")
+        summary_sql_params = {
+            "start_date": start_date,
+            "end_date": end_date,
+            "report_period_ids": report_period_ids,
+            "schema": self.schema,
+        }
+        summary_sql, summary_sql_params = self.jinja_sql.prepare_query(summary_sql, summary_sql_params)
+        self._execute_raw_sql_query(
+            table_name, summary_sql, start_date, end_date, bind_params=list(summary_sql_params)
+        )
+
     def get_ocp_infrastructure_map(self, start_date, end_date, **kwargs):
         """Get the OCP on infrastructure map.
 
@@ -552,7 +577,7 @@ def populate_line_item_daily_summary_table_presto(
             start_date = start_date.date()
             end_date = end_date.date()
 
-        tmpl_summary_sql = pkgutil.get_data("masu.database", "presto_sql/reporting_ocp_lineitem_daily_summary.sql")
+        tmpl_summary_sql = pkgutil.get_data("masu.database", "presto_sql/reporting_ocpusagelineitem_daily_summary.sql")
         tmpl_summary_sql = tmpl_summary_sql.decode("utf-8")
         summary_sql_params = {
             "uuid": str(source).replace("-", "_"),
diff --git a/koku/masu/database/presto_sql/reporting_ocp_lineitem_daily_summary.sql b/koku/masu/database/presto_sql/reporting_ocp_lineitem_daily_summary.sql
deleted file mode 100644
index 6976a84da4..0000000000
--- a/koku/masu/database/presto_sql/reporting_ocp_lineitem_daily_summary.sql
+++ /dev/null
@@ -1,404 +0,0 @@
-/*
- * Process OCP Usage Data Processing SQL
- * This SQL will utilize Presto for the raw line-item data aggregating
- * and store the results into the koku database summary tables.
- */
-
--- Using the convention of a double-underscore prefix to denote a temp table.
-
-/*
- * ====================================
- *               COMMON
- * ====================================
- */
-
--- node label line items by day presto sql
--- still using a "temp" table here because there is no guarantee how big this might get
-DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__ocp_node_label_line_item_daily_{{uuid | sqlsafe}};
-CREATE TABLE hive.{{schema | sqlsafe}}.__ocp_node_label_line_item_daily_{{uuid | sqlsafe}} AS (
-    SELECT {{cluster_id}} as "cluster_id",
-           date(nli.interval_start) as "usage_start",
-           max(nli.node) as "node",
-           nli.node_labels,
-           max(nli.source) as "source",
-           max(nli.year) as "year",
-           max(nli.month) as "month"
-      FROM hive.{{schema | sqlsafe}}.openshift_node_labels_line_items as "nli"
-     WHERE nli.source = {{source}}
-       AND nli.year = {{year}}
-       AND nli.month = {{month}}
-       AND nli.interval_start >= TIMESTAMP {{start_date}}
-       AND nli.interval_start < date_add('day', 1, TIMESTAMP {{end_date}})
-     GROUP
-        BY {{cluster_id}},
-           date(nli.interval_start),
-           nli.node_labels
-)
-;
-
-/*
- * ====================================
- *                POD
- * ====================================
- */
-
--- cluster daily cappacity presto sql
--- still using a "temp" table here because there is no guarantee how big this might get
-DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__ocp_cluster_capacity_{{uuid | sqlsafe}};
-CREATE TABLE hive.{{schema | sqlsafe}}.__ocp_cluster_capacity_{{uuid | sqlsafe}} as (
-    SELECT {{cluster_id}} as "cluster_id",
-           usage_start,
-           max(cc.source) as "source",
-           max(cc.year) as "year",
-           max(cc.month) as "month",
-           sum(cc.max_cluster_capacity_cpu_core_seconds) as cluster_capacity_cpu_core_seconds,
-           sum(cc.max_cluster_capacity_memory_byte_seconds) as cluster_capacity_memory_byte_seconds
-      FROM (
-               SELECT date(li.interval_start) as usage_start,
-                      max(li.source) as "source",
-                      max(li.year) as "year",
-                      max(li.month) as "month",
-                      max(li.node_capacity_cpu_core_seconds) as "max_cluster_capacity_cpu_core_seconds",
-                      max(li.node_capacity_memory_byte_seconds) as "max_cluster_capacity_memory_byte_seconds"
-                 FROM hive.{{schema | sqlsafe}}.openshift_pod_usage_line_items AS li
-                WHERE li.source = {{source}}
-                  AND li.year = {{year}}
-                  AND li.month = {{month}}
-                  AND li.interval_start >= TIMESTAMP {{start_date}}
-                  AND li.interval_start < date_add('day', 1, TIMESTAMP {{end_date}})
-                GROUP
-                   BY date(li.interval_start)
-           ) as cc
-     GROUP
-        BY {{cluster_id}},
-           usage_start
-)
-;
-
-/*
- * Delete the old block of data (if any) based on the usage range
- * Inserting a record in this log will trigger a delete against the specified table
- * in the same schema as the log table with the specified where_clause
- * start_date and end_date MUST be strings in order for this to work properly.
- */
-INSERT
-  INTO postgres.{{schema | sqlsafe}}.presto_delete_wrapper_log
-       (
-           id,
-           action_ts,
-           table_name,
-           where_clause,
-           result_rows
-       )
-VALUES (
-    uuid(),
-    now(),
-    'reporting_ocpusagelineitem_daily_summary',
-    'where usage_start >= '{{start_date}}'::date ' ||
-      'and usage_start <= '{{end_date}}'::date ' ||
-      'and cluster_id = '{{cluster_id}}' ' ||
-      'and data_source = ''Pod''',
-    null
-)
-;
-
-/*
- * This is the target summarization sql for POD usage
- * It combines the prior daily summarization query with the final summarization query
- * by use of MAP_FILTER to filter the combined node line item labels as well as
- * the line-item pod labels against the postgres enabled keys in the same query
- */
-INSERT
-  INTO postgres.{{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary
-       (
-           uuid,
-           report_period_id,
-           cluster_id,
-           cluster_alias,
-           data_source,
-           usage_start,
-           usage_end,
-           namespace,
-           node,
-           resource_id,
-           pod_labels,
-           pod_usage_cpu_core_hours,
-           pod_request_cpu_core_hours,
-           pod_limit_cpu_core_hours,
-           pod_usage_memory_gigabyte_hours,
-           pod_request_memory_gigabyte_hours,
-           pod_limit_memory_gigabyte_hours,
-           node_capacity_cpu_cores,
-           node_capacity_cpu_core_hours,
-           node_capacity_memory_gigabytes,
-           node_capacity_memory_gigabyte_hours,
-           cluster_capacity_cpu_core_hours,
-           cluster_capacity_memory_gigabyte_hours,
-           source_uuid,
-           infrastructure_usage_cost
-       )
-SELECT uuid() as "uuid",
-       {{report_period_id}} as "report_period_id",
-       {{cluster_id}} as "cluster_id",
-       {{cluster_alias}} as "cluster_alias",
-       'Pod' as "data_source",
-       pua.usage_start,
-       pua.usage_start as "usage_end",
-       pua.namespace,
-       pua.node,
-       pua.resource_id,
-       cast(pua.pod_labels as json) as "pod_labels",
-       pua.pod_usage_cpu_core_hours,
-       pua.pod_request_cpu_core_hours,
-       pua.pod_limit_cpu_core_hours,
-       pua.pod_usage_memory_gigabyte_hours,
-       pua.pod_request_memory_gigabyte_hours,
-       pua.pod_limit_memory_gigabyte_hours,
-       pua.node_capacity_cpu_cores,
-       pua.node_capacity_cpu_core_hours,
-       pua.node_capacity_memory_gigabytes,
-       pua.node_capacity_memory_gigabyte_hours,
-       pua.cluster_capacity_cpu_core_hours,
-       pua.cluster_capacity_memory_gigabyte_hours,
-       cast(pua.source_uuid as UUID) as "source_uuid",
-       JSON '{"cpu": 0.000000000, "memory": 0.000000000, "storage": 0.000000000}' as "infrastructure_usage_cost"
-  FROM (
-           SELECT date(li.interval_start) as "usage_start",
-                  li.namespace,
-                  li.node,
-                  li.source as "source_uuid",
-                  map_filter(map_concat(cast(json_parse(coalesce(nli.node_labels, '{}')) as map(varchar, varchar)),
-                                        cast(json_parse(li.pod_labels) as map(varchar, varchar))),
-                             (k, v) -> contains(ek.enabled_keys, k)) as "pod_labels",
-                  max(li.resource_id) as "resource_id",
-                  sum(li.pod_usage_cpu_core_seconds) / 3600.0 as "pod_usage_cpu_core_hours",
-                  sum(li.pod_request_cpu_core_seconds) / 3600.0  as "pod_request_cpu_core_hours",
-                  sum(li.pod_limit_cpu_core_seconds) / 3600.0 as "pod_limit_cpu_core_hours",
-                  sum(li.pod_usage_memory_byte_seconds) / 3600.0 * power(2, -30) as "pod_usage_memory_gigabyte_hours",
-                  sum(li.pod_request_memory_byte_seconds) / 3600.0 * power(2, -30) as "pod_request_memory_gigabyte_hours",
-                  sum(li.pod_limit_memory_byte_seconds) / 3600.0 * power(2, -30) as "pod_limit_memory_gigabyte_hours",
-                  max(li.node_capacity_cpu_cores) as "node_capacity_cpu_cores",
-                  sum(li.node_capacity_cpu_core_seconds) / 3600.0 as "node_capacity_cpu_core_hours",
-                  max(li.node_capacity_memory_bytes) * power(2, -30) as "node_capacity_memory_gigabytes",
-                  sum(li.node_capacity_memory_byte_seconds) / 3600.0 * power(2, -30) as "node_capacity_memory_gigabyte_hours",
-                  max(cc.cluster_capacity_cpu_core_seconds) / 3600.0 as "cluster_capacity_cpu_core_hours",
-                  max(cc.cluster_capacity_memory_byte_seconds) / 3600.0 * power(2, -30) as "cluster_capacity_memory_gigabyte_hours"
-             FROM hive.{{schema | sqlsafe}}.openshift_pod_usage_line_items as "li"
-             LEFT
-             JOIN hive.{{schema | sqlsafe}}.__ocp_node_label_line_item_daily_{{uuid | sqlsafe}} as "nli"
-               ON nli.node = li.node
-              AND nli.usage_start = date(li.interval_start)
-              AND nli.source = li.source
-             LEFT
-             JOIN hive.{{schema | sqlsafe}}.__ocp_cluster_capacity_{{uuid | sqlsafe}} as "cc"
-               ON cc.source = li.source
-              AND cc.usage_start = date(li.interval_start)
-            CROSS
-             JOIN (
-                      SELECT array_agg(distinct key) as "enabled_keys"
-                        FROM postgres.{{schema | sqlsafe}}.reporting_ocpenabledtagkeys
-                  ) as "ek"
-            WHERE li.source = {{source}}
-              AND li.year = {{year}}
-              AND li.month = {{month}}
-              AND li.interval_start >= TIMESTAMP {{start_date}}
-              AND li.interval_start < date_add('day', 1, TIMESTAMP {{end_date}})
-            GROUP
-               BY date(li.interval_start),
-                  li.namespace,
-                  li.node,
-                  li.source,
-                  5  /* THIS ORDINAL MUST BE KEPT IN SYNC WITH THE map_filter EXPRESSION */
-                     /* The map_filter expression was too complex for presto to use */
-       ) as "pua"
-;
-
-
-/*
- * ====================================
- *            STORAGE
- * ====================================
- */
-
--- Storage node label line items
-DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__volume_nodes_{{uuid | sqlsafe}};
-CREATE TABLE hive.{{schema | sqlsafe}}.__volume_nodes_{{uuid | sqlsafe}} as (
-    SELECT sli.namespace,
-           sli.pod,
-           date(sli.interval_start) as "usage_start",
-           max(uli.node) as "node",
-           sli.source,
-           sli.year,
-           sli.month
-      FROM hive.{{schema | sqlsafe}}.openshift_storage_usage_line_items as "sli"
-      JOIN hive.{{schema | sqlsafe}}.openshift_pod_usage_line_items as "uli"
-        ON uli.source = sli.source
-       AND uli.year = sli.year
-       AND uli.month = sli.month
-       AND uli.namespace = sli.namespace
-       AND uli.pod = sli.pod
-       AND date(uli.interval_start) = date(sli.interval_start)
-     WHERE sli.source = {{source}}
-       AND sli.year = {{year}}
-       AND sli.month = {{month}}
-       AND sli.interval_start >= TIMESTAMP {{start_date}}
-       AND sli.interval_start < date_add('day', 1, TIMESTAMP {{end_date}})
-     GROUP
-        BY sli.namespace,
-           sli.pod,
-           date(sli.interval_start),
-           sli.source,
-           sli.year,
-           sli.month
-)
-;
-
-/*
- * Delete the old block of data (if any) based on the usage range
- * Inserting a record in this log will trigger a delete against the specified table
- * in the same schema as the log table with the specified where_clause
- * start_date and end_date MUST be strings in order for this to work properly.
- */
-INSERT
-  INTO postgres.{{schema | sqlsafe}}.presto_delete_wrapper_log
-       (
-           id,
-           action_ts,
-           table_name,
-           where_clause,
-           result_rows
-       )
-VALUES (
-    uuid(),
-    now(),
-    'reporting_ocpusagelineitem_daily_summary',
-    'where usage_start >= '{{start_date}}'::date ' ||
-      'and usage_start <= '{{end_date}}'::date ' ||
-      'and cluster_id = '{{cluster_id}}' ' ||
-      'and data_source = ''Storage''',
-    null
-)
-;
-
-/*
- * This is the target summarization sql for STORAGE usage
- * It combines the prior daily summarization query with the final summarization query
- * by use of MAP_FILTER to filter the combined node line item labels as well as
- * the line-item pod labels against the postgres enabled keys in the same query
- */
-INSERT INTO postgres.{{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary (
-    uuid,
-    report_period_id,
-    cluster_id,
-    cluster_alias,
-    data_source,
-    namespace,
-    node,
-    persistentvolumeclaim,
-    persistentvolume,
-    storageclass,
-    usage_start,
-    usage_end,
-    volume_labels,
-    source_uuid,
-    persistentvolumeclaim_capacity_gigabyte,
-    persistentvolumeclaim_capacity_gigabyte_months,
-    volume_request_storage_gigabyte_months,
-    persistentvolumeclaim_usage_gigabyte_months
-)
-SELECT uuid() as "uuid",
-       {{report_period_id}} as "report_period_id",
-       {{cluster_id}} as "cluster_id",
-       {{cluster_alias}} as "cluster_alias",
-       'Storage' as "data_source",
-       sua.namespace,
-       sua.node,
-       sua.persistentvolumeclaim,
-       sua.persistentvolume,
-       sua.storageclass,
-       sua.usage_start,
-       sua.usage_start as "usage_end",
-       cast(sua.volume_labels as json) as "volume_labels",
-       cast(sua.source_uuid as UUID) as "source_uuid",
-       (sua.persistentvolumeclaim_capacity_bytes *
-             power(2, -30)) as "persistentvolumeclaim_capacity_gigibytes",
-       (sua.persistentvolumeclaim_capacity_byte_seconds /
-             86400 *
-             cast(extract(day from last_day_of_month(date(sua.usage_start))) as integer) *
-             power(2, -30)) as "persistentvolumeclaim_capacity_gigabyte_months",
-       (sua.volume_request_storage_byte_seconds /
-             86400 *
-             cast(extract(day from last_day_of_month(date(sua.usage_start))) as integer) *
-             power(2, -30)) as "volume_request_storage_gigabyte_months",
-       (sua.persistentvolumeclaim_usage_byte_seconds /
-             86400 *
-             cast(extract(day from last_day_of_month(date(sua.usage_start))) as integer) *
-             power(2, -30)) as "persistentvolumeclaim_usage_byte_months"
-  FROM (
-           SELECT sli.namespace,
-                  vn.node,
-                  sli.persistentvolumeclaim,
-                  sli.persistentvolume,
-                  sli.storageclass,
-                  date(sli.interval_start) as "usage_start",
-                  map_filter(map_concat(cast(json_parse(coalesce(nli.node_labels, '{}')) as map(varchar, varchar)),
-                                        cast(json_parse(sli.persistentvolume_labels) as map(varchar, varchar)),
-                                        cast(json_parse(sli.persistentvolumeclaim_labels) as map(varchar, varchar))),
-                             (k, v) -> contains(ek.enabled_keys, k)) as "volume_labels",
-                  sli.source as "source_uuid",
-                  max(sli.persistentvolumeclaim_capacity_bytes) as "persistentvolumeclaim_capacity_bytes",
-                  sum(sli.persistentvolumeclaim_capacity_byte_seconds) as "persistentvolumeclaim_capacity_byte_seconds",
-                  sum(sli.volume_request_storage_byte_seconds) as "volume_request_storage_byte_seconds",
-                  sum(sli.persistentvolumeclaim_usage_byte_seconds) as "persistentvolumeclaim_usage_byte_seconds"
-             FROM hive.{{schema | sqlsafe}}.openshift_storage_usage_line_items "sli"
-             LEFT
-             JOIN hive.{{schema | sqlsafe}}.__volume_nodes_{{uuid | sqlsafe}} as "vn"
-               ON vn.source = sli.source
-              AND vn.year = sli.year
-              AND vn.month = sli.month
-              AND vn.namespace = sli.namespace
-              AND vn.pod = sli.pod
-              AND vn.usage_start = date(sli.interval_start)
-             LEFT
-             JOIN hive.{{schema | sqlsafe}}.__ocp_node_label_line_item_daily_{{uuid | sqlsafe}} as "nli"
-               ON nli.source = vn.source
-              AND nli.year = vn.year
-              AND nli.month = vn.month
-              AND nli.node = vn.node
-              AND date(nli.usage_start) = date(vn.usage_start)
-            CROSS
-             JOIN (
-                    SELECT array_agg(distinct key) as enabled_keys
-                      FROM postgres.{{schema | sqlsafe}}.reporting_ocpenabledtagkeys
-                  ) as "ek"
-            WHERE sli.source = {{source}}
-              AND sli.year = {{year}}
-              AND sli.month = {{month}}
-              AND sli.interval_start >= TIMESTAMP {{start_date}}
-              AND sli.interval_start < date_add('day', 1, TIMESTAMP {{end_date}})
-            GROUP
-               BY sli.namespace,
-                  vn.node,
-                  sli.persistentvolumeclaim,
-                  sli.persistentvolume,
-                  sli.storageclass,
-                  date(sli.interval_start),
-                  7,  /* THIS ORDINAL MUST BE KEPT IN SYNC WITH THE map_filter EXPRESSION */
-                      /* The map_filter expression was too complex for presto to use */
-                  sli.source
-       ) as "sua"
-;
-
-
-/*
- * ====================================
- *               CLEANUP
- * ====================================
- */
-
-DELETE FROM hive.{{schema | sqlsafe}}.__ocp_node_label_line_item_daily_{{uuid | sqlsafe}};
-DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__ocp_node_label_line_item_daily_{{uuid | sqlsafe}};
-DELETE FROM hive.{{schema | sqlsafe}}.__ocp_cluster_capacity_{{uuid | sqlsafe}};
-DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__ocp_cluster_capacity_{{uuid | sqlsafe}};
-DELETE FROM hive.{{schema | sqlsafe}}.__volume_nodes_{{uuid | sqlsafe}};
-DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__volume_nodes_{{uuid | sqlsafe}};
diff --git a/koku/masu/database/presto_sql/reporting_ocpawscostlineitem_daily_summary.sql b/koku/masu/database/presto_sql/reporting_ocpawscostlineitem_daily_summary.sql
index ef3a2dbe00..20c990bd28 100644
--- a/koku/masu/database/presto_sql/reporting_ocpawscostlineitem_daily_summary.sql
+++ b/koku/masu/database/presto_sql/reporting_ocpawscostlineitem_daily_summary.sql
@@ -113,13 +113,13 @@ CREATE TABLE hive.{{schema | sqlsafe}}.__reporting_aws_daily_{{uuid | sqlsafe}}
         SELECT aws.lineitem_resourceid as resource_id,
             date(aws.lineitem_usagestartdate) as usage_start,
             date(aws.lineitem_usagestartdate) as usage_end,
-            aws.lineitem_productcode as product_code,
-            aws.product_productfamily as product_family,
-            aws.product_instancetype as instance_type,
+            nullif(aws.lineitem_productcode, '') as product_code,
+            nullif(aws.product_productfamily, '') as product_family,
+            nullif(aws.product_instancetype, '') as instance_type,
             aws.lineitem_usageaccountid as usage_account_id,
-            aws.lineitem_availabilityzone as availability_zone,
-            aws.product_region as region,
-            aws.pricing_unit as unit,
+            nullif(aws.lineitem_availabilityzone, '') as availability_zone,
+            nullif(aws.product_region, '') as region,
+            nullif(aws.pricing_unit, '') as unit,
             aws.lineitem_usageamount as usage_amount,
             aws.lineitem_normalizedusageamount as normalized_usage_amount,
             aws.lineitem_currencycode as currency_code,
diff --git a/koku/masu/database/presto_sql/reporting_ocpazurecostlineitem_daily_summary.sql b/koku/masu/database/presto_sql/reporting_ocpazurecostlineitem_daily_summary.sql
new file mode 100644
index 0000000000..5bcd1ffc62
--- /dev/null
+++ b/koku/masu/database/presto_sql/reporting_ocpazurecostlineitem_daily_summary.sql
@@ -0,0 +1,1195 @@
+-- The Python Jinja string variable subsitutions azure_where_clause and ocp_where_clause
+-- optionally filter azure and OCP data by provider/source
+-- Ex azure_where_clause: 'AND cost_entry_bill_id IN (1, 2, 3)'
+-- Ex ocp_where_clause: "AND cluster_id = 'abcd-1234`"
+DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__matched_tags_{{uuid | sqlsafe}};
+CREATE TABLE hive.{{schema | sqlsafe}}.__matched_tags_{{uuid | sqlsafe}} AS (
+    WITH cte_unnested_azure_tags AS (
+        SELECT tags.*,
+            b.billing_period_start
+        FROM (
+            SELECT key,
+                value,
+                cost_entry_bill_id
+            FROM postgres.{{schema | sqlsafe}}.reporting_azuretags_summary AS ts
+            CROSS JOIN UNNEST("values") AS v(value)
+        ) AS tags
+        JOIN postgres.{{schema | sqlsafe}}.reporting_azurecostentrybill AS b
+            ON tags.cost_entry_bill_id = b.id
+        JOIN postgres.{{schema | sqlsafe}}.reporting_azureenabledtagkeys as enabled_tags
+            ON lower(enabled_tags.key) = lower(tags.key)
+        WHERE b.id = {{bill_id}}
+    ),
+    cte_unnested_ocp_pod_tags AS (
+        SELECT tags.*,
+            rp.report_period_start,
+            rp.cluster_id,
+            rp.cluster_alias
+        FROM (
+            SELECT key,
+                value,
+                report_period_id
+            FROM postgres.{{schema | sqlsafe}}.reporting_ocpusagepodlabel_summary AS ts
+            CROSS JOIN UNNEST("values") AS v(value)
+        ) AS tags
+        JOIN postgres.{{schema | sqlsafe}}.reporting_ocpusagereportperiod AS rp
+            ON tags.report_period_id = rp.id
+        -- Filter out tags that aren't enabled
+        JOIN postgres.{{schema | sqlsafe}}.reporting_ocpenabledtagkeys as enabled_tags
+            ON lower(enabled_tags.key) = lower(tags.key)
+        WHERE rp.cluster_id = {{cluster_id}}
+    ),
+    cte_unnested_ocp_volume_tags AS (
+        SELECT tags.*,
+            rp.report_period_start,
+            rp.cluster_id,
+            rp.cluster_alias
+        FROM (
+            SELECT key,
+                value,
+                report_period_id
+            FROM postgres.{{schema | sqlsafe}}.reporting_ocpstoragevolumelabel_summary AS ts
+            CROSS JOIN UNNEST("values") AS v(value)
+        ) AS tags
+        JOIN postgres.{{schema | sqlsafe}}.reporting_ocpusagereportperiod AS rp
+            ON tags.report_period_id = rp.id
+        -- Filter out tags that aren't enabled
+        JOIN postgres.{{schema | sqlsafe}}.reporting_ocpenabledtagkeys as enabled_tags
+            ON lower(enabled_tags.key) = lower(tags.key)
+        WHERE rp.cluster_id = {{cluster_id}}
+    )
+    SELECT '{"' || key || '": "' || value || '"}' as tag,
+        key,
+        value,
+        cost_entry_bill_id,
+        report_period_id
+    FROM (
+        SELECT azure.key,
+            azure.value,
+            azure.cost_entry_bill_id,
+            ocp.report_period_id
+        FROM cte_unnested_azure_tags AS azure
+        JOIN cte_unnested_ocp_pod_tags AS ocp
+            ON lower(azure.key) = lower(ocp.key)
+                AND lower(azure.value) = lower(ocp.value)
+                AND azure.billing_period_start = ocp.report_period_start
+
+        UNION
+
+        SELECT azure.key,
+            azure.value,
+            azure.cost_entry_bill_id,
+            ocp.report_period_id
+        FROM cte_unnested_azure_tags AS azure
+        JOIN cte_unnested_ocp_volume_tags AS ocp
+            ON lower(azure.key) = lower(ocp.key)
+                AND lower(azure.value) = lower(ocp.value)
+                AND azure.billing_period_start = ocp.report_period_start
+    ) AS matches
+)
+;
+
+DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__reporting_azure_daily_{{uuid | sqlsafe}};
+CREATE TABLE hive.{{schema | sqlsafe}}.__reporting_azure_daily_{{uuid | sqlsafe}} AS (
+    WITH cte_line_items AS (
+        SELECT {{bill_id | sqlsafe}} as cost_entry_bill_id,
+            cast(uuid() as varchar) as line_item_id,
+            date(coalesce(date, usagedatetime)) as usage_date,
+            coalesce(subscriptionid, subscriptionguid) as subscription_guid,
+            json_extract_scalar(json_parse(azure.additionalinfo), '$.ServiceType') as instance_type,
+            coalesce(servicename, metercategory) as service_name,
+            resourcelocation as resource_location,
+            split_part(coalesce(resourceid, instanceid), '/', 9) as resource_id,
+            cast(coalesce(quantity, usagequantity) as decimal(24,9)) as usage_quantity,
+            cast(coalesce(costinbillingcurrency, pretaxcost) as decimal(24,9)) as pretax_cost,
+            coalesce(billingcurrencycode, currency) as currency,
+            CASE
+                WHEN split_part(unitofmeasure, ' ', 2) != '' AND NOT (unitofmeasure = '100 Hours' AND metercategory='Virtual Machines')
+                    THEN cast(split_part(unitofmeasure, ' ', 1) as integer)
+                ELSE 1
+                END as multiplier,
+            CASE
+                WHEN split_part(unitofmeasure, ' ', 2) = 'Hours'
+                    THEN  'Hrs'
+                WHEN split_part(unitofmeasure, ' ', 2) = 'GB/Month'
+                    THEN  'GB-Mo'
+                WHEN split_part(unitofmeasure, ' ', 2) != ''
+                    THEN  split_part(unitofmeasure, ' ', 2)
+                ELSE unitofmeasure
+            END as unit_of_measure,
+            tags,
+            lower(tags) as lower_tags
+        FROM hive.{{schema | sqlsafe}}.azure_line_items as azure
+        WHERE azure.source = '{{azure_source_uuid | sqlsafe}}'
+            AND azure.year = '{{year | sqlsafe}}'
+            AND azure.month = '{{month | sqlsafe}}'
+            AND date(coalesce(date, usagedatetime)) >= date('{{start_date | sqlsafe}}')
+            AND date(coalesce(date, usagedatetime)) <= date('{{end_date | sqlsafe}}')
+    )
+    SELECT azure.cost_entry_bill_id,
+        azure.line_item_id,
+        azure.usage_date,
+        azure.subscription_guid,
+        azure.instance_type,
+        azure.service_name,
+        azure.resource_location,
+        azure.resource_id,
+        azure.usage_quantity * azure.multiplier as usage_quantity,
+        azure.pretax_cost,
+        azure.currency,
+        azure.unit_of_measure,
+        azure.tags,
+        azure.lower_tags
+    FROM cte_line_items AS azure
+)
+;
+
+DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__reporting_azure_tags_{{uuid | sqlsafe}};
+CREATE TABLE hive.{{schema | sqlsafe}}.__reporting_azure_tags_{{uuid | sqlsafe}} AS (
+    SELECT azure.*
+    FROM (
+        SELECT azure.*,
+            row_number() OVER (PARTITION BY azure.line_item_id ORDER BY azure.line_item_id) as row_number
+        FROM hive.{{schema | sqlsafe}}.__reporting_azure_daily_{{uuid | sqlsafe}} as azure
+        JOIN hive.{{schema | sqlsafe}}.__matched_tags_{{uuid | sqlsafe}} as tag
+            ON json_extract_scalar(azure.tags, '$.' || tag.key) = tag.value
+    ) AS azure
+    WHERE azure.row_number = 1
+)
+;
+
+DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__reporting_azure_special_case_tags_{{uuid | sqlsafe}};
+CREATE TABLE hive.{{schema | sqlsafe}}.__reporting_azure_special_case_tags_{{uuid | sqlsafe}} AS (
+    SELECT azure.*
+    FROM hive.{{schema | sqlsafe}}.__reporting_azure_daily_{{uuid | sqlsafe}} as azure
+    WHERE (
+        strpos(lower_tags, 'openshift_cluster') != 0
+        OR strpos(lower_tags, 'openshift_node') != 0
+        OR strpos(lower_tags, 'openshift_project') != 0
+    )
+)
+;
+
+DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__reporting_ocp_storage_tags_{{uuid | sqlsafe}};
+CREATE TABLE hive.{{schema | sqlsafe}}.__reporting_ocp_storage_tags_{{uuid | sqlsafe}} AS (
+    SELECT cast(ocp.uuid AS VARCHAR) AS ocp_id,
+        ocp.usage_start,
+        ocp.report_period_id,
+        ocp.cluster_id,
+        ocp.cluster_alias,
+        ocp.namespace,
+        ocp.node,
+        ocp.persistentvolumeclaim,
+        ocp.persistentvolume,
+        ocp.storageclass,
+        ocp.persistentvolumeclaim_capacity_gigabyte,
+        ocp.persistentvolumeclaim_capacity_gigabyte_months,
+        ocp.volume_request_storage_gigabyte_months,
+        ocp.persistentvolumeclaim_usage_gigabyte_months,
+        json_format(ocp.volume_labels) as volume_labels,
+        lower(tag.key) as key,
+        lower(tag.value) as value,
+        lower(tag.tag) as tag
+    FROM postgres.{{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary as ocp
+    JOIN hive.{{schema | sqlsafe}}.__matched_tags_{{uuid | sqlsafe}} AS tag
+        ON ocp.report_period_id = tag.report_period_id
+        AND json_extract_scalar(ocp.volume_labels, '$.' || tag.key) = tag.value
+    WHERE ocp.source_uuid = UUID '{{ocp_source_uuid | sqlsafe}}'
+        AND ocp.data_source = 'Storage'
+        AND date(ocp.usage_start) >= date('{{start_date | sqlsafe}}')
+        AND date(ocp.usage_start) <= date('{{end_date | sqlsafe}}')
+)
+;
+
+DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__reporting_ocp_pod_tags_{{uuid | sqlsafe}};
+CREATE TABLE hive.{{schema | sqlsafe}}.__reporting_ocp_pod_tags_{{uuid | sqlsafe}} AS (
+    SELECT cast(ocp.uuid AS VARCHAR) AS ocp_id,
+        ocp.usage_start,
+        ocp.report_period_id,
+        ocp.cluster_id,
+        ocp.cluster_alias,
+        ocp.namespace,
+        ocp.node,
+        json_format(ocp.pod_labels) as pod_labels,
+        ocp.pod_usage_cpu_core_hours,
+        ocp.pod_request_cpu_core_hours,
+        ocp.pod_limit_cpu_core_hours,
+        ocp.pod_usage_memory_gigabyte_hours,
+        ocp.pod_request_memory_gigabyte_hours,
+        ocp.node_capacity_cpu_cores,
+        ocp.node_capacity_cpu_core_hours,
+        ocp.node_capacity_memory_gigabytes,
+        ocp.node_capacity_memory_gigabyte_hours,
+        ocp.cluster_capacity_cpu_core_hours,
+        ocp.cluster_capacity_memory_gigabyte_hours,
+        lower(tag.key) as key,
+        lower(tag.value) as value,
+        lower(tag.tag) as tag
+    FROM postgres.{{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary as ocp
+    JOIN hive.{{schema | sqlsafe}}.__matched_tags_{{uuid | sqlsafe}} AS tag
+        ON ocp.report_period_id = tag.report_period_id
+        AND json_extract_scalar(ocp.pod_labels, '$.' || tag.key) = tag.value
+    WHERE ocp.source_uuid = UUID '{{ocp_source_uuid | sqlsafe}}'
+        AND ocp.data_source = 'Pod'
+        AND date(ocp.usage_start) >= date('{{start_date | sqlsafe}}')
+        AND date(ocp.usage_start) <= date('{{end_date | sqlsafe}}')
+)
+;
+
+DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__matched_tags_{{uuid | sqlsafe}};
+
+
+-- First we match OCP pod data to azure data using a direct
+-- resource id match. This usually means OCP node -> azure EC2 instance ID.
+DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}};
+CREATE TABLE hive.{{schema | sqlsafe}}.__reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} AS (
+    WITH cte_resource_id_matched AS (
+        SELECT cast(ocp.uuid AS VARCHAR) AS ocp_id,
+            ocp.report_period_id,
+            ocp.cluster_id,
+            ocp.cluster_alias,
+            ocp.namespace,
+            ocp.node,
+            json_format(ocp.pod_labels) as pod_labels,
+            ocp.pod_usage_cpu_core_hours,
+            ocp.pod_request_cpu_core_hours,
+            ocp.pod_limit_cpu_core_hours,
+            ocp.pod_usage_memory_gigabyte_hours,
+            ocp.pod_request_memory_gigabyte_hours,
+            ocp.node_capacity_cpu_cores,
+            ocp.node_capacity_cpu_core_hours,
+            ocp.node_capacity_memory_gigabytes,
+            ocp.node_capacity_memory_gigabyte_hours,
+            ocp.cluster_capacity_cpu_core_hours,
+            ocp.cluster_capacity_memory_gigabyte_hours,
+            azure.cost_entry_bill_id,
+            azure.line_item_id as azure_id,
+            azure.usage_date,
+            azure.subscription_guid,
+            azure.instance_type,
+            azure.service_name,
+            azure.resource_location,
+            azure.resource_id,
+            azure.usage_quantity,
+            azure.pretax_cost,
+            azure.currency,
+            azure.unit_of_measure,
+            azure.tags
+        FROM hive.{{schema | sqlsafe}}.__reporting_azure_daily_{{uuid | sqlsafe}} as azure
+        JOIN postgres.{{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary as ocp
+            -- NOTE: We would normally use ocp.resource_id
+            -- For this JOIN, but it is not guaranteed to be correct
+            -- in the current Operator Metering version
+            -- so we are matching only on the node name
+            -- which should match the split Azure instance ID
+            ON azure.resource_id = ocp.node
+                AND azure.usage_date = ocp.usage_start
+        WHERE ocp.source_uuid = UUID '{{ocp_source_uuid | sqlsafe}}'
+            AND ocp.usage_start >= date('{{start_date | sqlsafe}}')
+            AND ocp.usage_start <= date('{{end_date | sqlsafe}}')
+            AND ocp.data_source = 'Pod'
+    ),
+    cte_number_of_shared AS (
+        SELECT azure_id,
+            count(DISTINCT namespace) as shared_projects
+        FROM cte_resource_id_matched
+        GROUP BY azure_id
+    )
+    SELECT rm.*,
+        (rm.pod_usage_cpu_core_hours / rm.node_capacity_cpu_core_hours) * rm.pretax_cost as project_cost,
+        shared.shared_projects
+    FROM cte_resource_id_matched AS rm
+    JOIN cte_number_of_shared AS shared
+        ON rm.azure_id = shared.azure_id
+)
+;
+
+-- Next we match where the azure tag is the special openshift_project key
+-- and the value matches an OpenShift project name
+INSERT INTO hive.{{schema | sqlsafe}}.__reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}}
+    WITH cte_tag_matched AS (
+        SELECT cast(ocp.uuid AS VARCHAR) AS ocp_id,
+            ocp.report_period_id,
+            ocp.cluster_id,
+            ocp.cluster_alias,
+            ocp.namespace,
+            ocp.node,
+            json_format(ocp.pod_labels) as pod_labels,
+            ocp.pod_usage_cpu_core_hours,
+            ocp.pod_request_cpu_core_hours,
+            ocp.pod_limit_cpu_core_hours,
+            ocp.pod_usage_memory_gigabyte_hours,
+            ocp.pod_request_memory_gigabyte_hours,
+            ocp.node_capacity_cpu_cores,
+            ocp.node_capacity_cpu_core_hours,
+            ocp.node_capacity_memory_gigabytes,
+            ocp.node_capacity_memory_gigabyte_hours,
+            ocp.cluster_capacity_cpu_core_hours,
+            ocp.cluster_capacity_memory_gigabyte_hours,
+            azure.cost_entry_bill_id,
+            azure.line_item_id as azure_id,
+            azure.usage_date,
+            azure.subscription_guid,
+            azure.instance_type,
+            azure.service_name,
+            azure.resource_location,
+            azure.resource_id,
+            azure.usage_quantity,
+            azure.pretax_cost,
+            azure.currency,
+            azure.unit_of_measure,
+            azure.tags
+        FROM hive.{{schema | sqlsafe}}.__reporting_azure_special_case_tags_{{uuid | sqlsafe}} as azure
+        JOIN postgres.{{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary as ocp
+            ON json_extract_scalar(azure.lower_tags, '$.openshift_project') = lower(ocp.namespace)
+                AND azure.usage_date = ocp.usage_start
+        -- ANTI JOIN to remove rows that already matched
+        LEFT JOIN hive.{{schema | sqlsafe}}.__reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} AS rm
+            ON rm.azure_id = azure.line_item_id
+        WHERE ocp.source_uuid = UUID '{{ocp_source_uuid | sqlsafe}}'
+            AND ocp.data_source = 'Pod'
+            AND rm.azure_id IS NULL
+    ),
+    cte_number_of_shared AS (
+        SELECT azure_id,
+            count(DISTINCT namespace) as shared_projects
+        FROM cte_tag_matched
+        GROUP BY azure_id
+    )
+    SELECT tm.*,
+        tm.pretax_cost / shared.shared_projects as project_cost,
+        shared.shared_projects
+    FROM cte_tag_matched AS tm
+    JOIN cte_number_of_shared AS shared
+        ON tm.azure_id = shared.azure_id
+;
+
+-- Next we match where the azure tag is the special openshift_node key
+-- and the value matches an OpenShift node name
+INSERT INTO hive.{{schema | sqlsafe}}.__reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}}
+    WITH cte_tag_matched AS (
+        SELECT cast(ocp.uuid AS VARCHAR) AS ocp_id,
+            ocp.report_period_id,
+            ocp.cluster_id,
+            ocp.cluster_alias,
+            ocp.namespace,
+            ocp.node,
+            json_format(ocp.pod_labels) as pod_labels,
+            ocp.pod_usage_cpu_core_hours,
+            ocp.pod_request_cpu_core_hours,
+            ocp.pod_limit_cpu_core_hours,
+            ocp.pod_usage_memory_gigabyte_hours,
+            ocp.pod_request_memory_gigabyte_hours,
+            ocp.node_capacity_cpu_cores,
+            ocp.node_capacity_cpu_core_hours,
+            ocp.node_capacity_memory_gigabytes,
+            ocp.node_capacity_memory_gigabyte_hours,
+            ocp.cluster_capacity_cpu_core_hours,
+            ocp.cluster_capacity_memory_gigabyte_hours,
+            azure.cost_entry_bill_id,
+            azure.line_item_id as azure_id,
+            azure.usage_date,
+            azure.subscription_guid,
+            azure.instance_type,
+            azure.service_name,
+            azure.resource_location,
+            azure.resource_id,
+            azure.usage_quantity,
+            azure.pretax_cost,
+            azure.currency,
+            azure.unit_of_measure,
+            azure.tags
+        FROM hive.{{schema | sqlsafe}}.__reporting_azure_special_case_tags_{{uuid | sqlsafe}} as azure
+        JOIN postgres.{{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary as ocp
+            ON json_extract_scalar(azure.lower_tags, '$.openshift_node') = lower(ocp.node)
+                AND azure.usage_date = ocp.usage_start
+        -- ANTI JOIN to remove rows that already matched
+        LEFT JOIN hive.{{schema | sqlsafe}}.__reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} AS rm
+            ON rm.azure_id = azure.line_item_id
+        WHERE ocp.source_uuid = UUID '{{ocp_source_uuid | sqlsafe}}'
+            AND ocp.data_source = 'Pod'
+            AND rm.azure_id IS NULL
+    ),
+    cte_number_of_shared AS (
+        SELECT azure_id,
+            count(DISTINCT namespace) as shared_projects
+        FROM cte_tag_matched
+        GROUP BY azure_id
+    )
+    SELECT tm.*,
+        tm.pretax_cost / shared.shared_projects as project_cost,
+        shared.shared_projects
+    FROM cte_tag_matched AS tm
+    JOIN cte_number_of_shared AS shared
+        ON tm.azure_id = shared.azure_id
+;
+
+-- Next we match where the azure tag is the special openshift_cluster key
+-- and the value matches an OpenShift cluster name
+ INSERT INTO hive.{{schema | sqlsafe}}.__reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}}
+    WITH cte_tag_matched AS (
+        SELECT cast(ocp.uuid AS VARCHAR) AS ocp_id,
+            ocp.report_period_id,
+            ocp.cluster_id,
+            ocp.cluster_alias,
+            ocp.namespace,
+            ocp.node,
+            json_format(ocp.pod_labels) as pod_labels,
+            ocp.pod_usage_cpu_core_hours,
+            ocp.pod_request_cpu_core_hours,
+            ocp.pod_limit_cpu_core_hours,
+            ocp.pod_usage_memory_gigabyte_hours,
+            ocp.pod_request_memory_gigabyte_hours,
+            ocp.node_capacity_cpu_cores,
+            ocp.node_capacity_cpu_core_hours,
+            ocp.node_capacity_memory_gigabytes,
+            ocp.node_capacity_memory_gigabyte_hours,
+            ocp.cluster_capacity_cpu_core_hours,
+            ocp.cluster_capacity_memory_gigabyte_hours,
+            azure.cost_entry_bill_id,
+            azure.line_item_id as azure_id,
+            azure.usage_date,
+            azure.subscription_guid,
+            azure.instance_type,
+            azure.service_name,
+            azure.resource_location,
+            azure.resource_id,
+            azure.usage_quantity,
+            azure.pretax_cost,
+            azure.currency,
+            azure.unit_of_measure,
+            azure.tags
+        FROM hive.{{schema | sqlsafe}}.__reporting_azure_special_case_tags_{{uuid | sqlsafe}} as azure
+        JOIN postgres.{{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary as ocp
+            ON json_extract_scalar(azure.lower_tags, '$.openshift_cluster') IN (lower(ocp.cluster_id), lower(ocp.cluster_alias))
+                AND azure.usage_date = ocp.usage_start
+        -- ANTI JOIN to remove rows that already matched
+        LEFT JOIN hive.{{schema | sqlsafe}}.__reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} AS rm
+            ON rm.azure_id = azure.line_item_id
+        WHERE ocp.source_uuid = UUID '{{ocp_source_uuid | sqlsafe}}'
+            AND ocp.data_source = 'Pod'
+            AND rm.azure_id IS NULL
+    ),
+    cte_number_of_shared AS (
+        SELECT azure_id,
+            count(DISTINCT namespace) as shared_projects
+        FROM cte_tag_matched
+        GROUP BY azure_id
+    )
+    SELECT tm.*,
+        tm.pretax_cost / shared.shared_projects as project_cost,
+        shared.shared_projects
+    FROM cte_tag_matched AS tm
+    JOIN cte_number_of_shared AS shared
+        ON tm.azure_id = shared.azure_id
+;
+
+-- Next we match where the pod label key and value
+-- and azure tag key and value match directly
+ INSERT INTO hive.{{schema | sqlsafe}}.__reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}}
+    WITH cte_tag_matched AS (
+        SELECT ocp.ocp_id,
+            ocp.report_period_id,
+            ocp.cluster_id,
+            ocp.cluster_alias,
+            ocp.namespace,
+            ocp.node,
+            ocp.pod_labels,
+            ocp.pod_usage_cpu_core_hours,
+            ocp.pod_request_cpu_core_hours,
+            ocp.pod_limit_cpu_core_hours,
+            ocp.pod_usage_memory_gigabyte_hours,
+            ocp.pod_request_memory_gigabyte_hours,
+            ocp.node_capacity_cpu_cores,
+            ocp.node_capacity_cpu_core_hours,
+            ocp.node_capacity_memory_gigabytes,
+            ocp.node_capacity_memory_gigabyte_hours,
+            ocp.cluster_capacity_cpu_core_hours,
+            ocp.cluster_capacity_memory_gigabyte_hours,
+            azure.cost_entry_bill_id,
+            azure.line_item_id as azure_id,
+            azure.usage_date,
+            azure.subscription_guid,
+            azure.instance_type,
+            azure.service_name,
+            azure.resource_location,
+            azure.resource_id,
+            azure.usage_quantity,
+            azure.pretax_cost,
+            azure.currency,
+            azure.unit_of_measure,
+            azure.tags
+        FROM hive.{{schema | sqlsafe}}.__reporting_azure_tags_{{uuid | sqlsafe}} as azure
+        JOIN hive.{{schema | sqlsafe}}.__reporting_ocp_pod_tags_{{uuid | sqlsafe}} as ocp
+            ON azure.usage_date = ocp.usage_start
+                AND strpos(azure.lower_tags, ocp.tag) != 0
+        -- ANTI JOIN to remove rows that already matched
+        LEFT JOIN hive.{{schema | sqlsafe}}.__reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} AS rm
+            ON rm.azure_id = azure.line_item_id
+        WHERE rm.azure_id IS NULL
+    ),
+    cte_number_of_shared AS (
+        SELECT azure_id,
+            count(DISTINCT namespace) as shared_projects
+        FROM cte_tag_matched
+        GROUP BY azure_id
+    )
+    SELECT tm.*,
+        tm.pretax_cost / shared.shared_projects as project_cost,
+        shared.shared_projects
+    FROM cte_tag_matched AS tm
+    JOIN cte_number_of_shared AS shared
+        ON tm.azure_id = shared.azure_id
+;
+
+DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__reporting_ocp_pod_tags_{{uuid | sqlsafe}};
+
+-- First we match OCP storage data to Azure data using a direct
+-- resource id match. OCP PVC name -> Azure instance ID.
+DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}};
+CREATE TABLE hive.{{schema | sqlsafe}}.__reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} AS (
+    WITH cte_tag_matched AS (
+        SELECT cast(ocp.uuid AS VARCHAR) AS ocp_id,
+            ocp.report_period_id,
+            ocp.cluster_id,
+            ocp.cluster_alias,
+            ocp.namespace,
+            ocp.node,
+            ocp.persistentvolumeclaim,
+            ocp.persistentvolume,
+            ocp.storageclass,
+            ocp.persistentvolumeclaim_capacity_gigabyte,
+            ocp.persistentvolumeclaim_capacity_gigabyte_months,
+            ocp.volume_request_storage_gigabyte_months,
+            ocp.persistentvolumeclaim_usage_gigabyte_months,
+            json_format(ocp.volume_labels) as volume_labels,
+            azure.cost_entry_bill_id,
+            azure.line_item_id as azure_id,
+            azure.usage_date,
+            azure.subscription_guid,
+            azure.instance_type,
+            azure.service_name,
+            azure.resource_location,
+            azure.resource_id,
+            azure.usage_quantity,
+            azure.pretax_cost,
+            azure.currency,
+            azure.unit_of_measure,
+            azure.tags
+        FROM hive.{{schema | sqlsafe}}.__reporting_azure_daily_{{uuid | sqlsafe}} as azure
+        JOIN postgres.{{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary as ocp
+            ON azure.resource_id LIKE '%%' || ocp.persistentvolume
+                AND azure.usage_date = ocp.usage_start
+        -- ANTI JOIN to remove rows that already matched
+        LEFT JOIN hive.{{schema | sqlsafe}}.__reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} AS ulid
+            ON ulid.azure_id = azure.line_item_id
+        WHERE ocp.source_uuid = UUID '{{ocp_source_uuid | sqlsafe}}'
+            AND ocp.data_source = 'Storage'
+            AND ocp.usage_start >= date('{{start_date | sqlsafe}}')
+            AND ocp.usage_start <= date('{{end_date | sqlsafe}}')
+            AND ulid.azure_id IS NULL
+    ),
+    cte_number_of_shared AS (
+        SELECT azure_id,
+            count(DISTINCT namespace) as shared_projects
+        FROM cte_tag_matched
+        GROUP BY azure_id
+    )
+    SELECT tm.*,
+        tm.pretax_cost / shared.shared_projects as project_cost,
+        shared.shared_projects
+    FROM cte_tag_matched AS tm
+    JOIN cte_number_of_shared AS shared
+        ON tm.azure_id = shared.azure_id
+)
+;
+
+
+-- Next we match where the azure tag is the special openshift_project key
+-- and the value matches an OpenShift project name
+INSERT INTO hive.{{schema | sqlsafe}}.__reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}}
+    WITH cte_tag_matched AS (
+        SELECT cast(ocp.uuid AS VARCHAR) AS ocp_id,
+            ocp.report_period_id,
+            ocp.cluster_id,
+            ocp.cluster_alias,
+            ocp.namespace,
+            ocp.node,
+            ocp.persistentvolumeclaim,
+            ocp.persistentvolume,
+            ocp.storageclass,
+            ocp.persistentvolumeclaim_capacity_gigabyte,
+            ocp.persistentvolumeclaim_capacity_gigabyte_months,
+            ocp.volume_request_storage_gigabyte_months,
+            ocp.persistentvolumeclaim_usage_gigabyte_months,
+            json_format(ocp.volume_labels) as volume_labels,
+            azure.cost_entry_bill_id,
+            azure.line_item_id as azure_id,
+            azure.usage_date,
+            azure.subscription_guid,
+            azure.instance_type,
+            azure.service_name,
+            azure.resource_location,
+            azure.resource_id,
+            azure.usage_quantity,
+            azure.pretax_cost,
+            azure.currency,
+            azure.unit_of_measure,
+            azure.tags
+        FROM hive.{{schema | sqlsafe}}.__reporting_azure_special_case_tags_{{uuid | sqlsafe}} as azure
+        JOIN postgres.{{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary as ocp
+            ON json_extract_scalar(azure.lower_tags, '$.openshift_project') = lower(ocp.namespace)
+                AND azure.usage_date = ocp.usage_start
+        -- ANTI JOIN to remove rows that already matched
+        LEFT JOIN hive.{{schema | sqlsafe}}.__reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} AS ulid
+            ON ulid.azure_id = azure.line_item_id
+        LEFT JOIN hive.{{schema | sqlsafe}}.__reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} AS rm
+            ON rm.azure_id = azure.line_item_id
+        WHERE ocp.source_uuid = UUID '{{ocp_source_uuid | sqlsafe}}'
+            AND ocp.data_source = 'Storage'
+            AND ocp.usage_start >= date('{{start_date | sqlsafe}}')
+            AND ocp.usage_start <= date('{{end_date | sqlsafe}}')
+            AND ulid.azure_id IS NULL
+            AND rm.azure_id IS NULL
+    ),
+    cte_number_of_shared AS (
+        SELECT azure_id,
+            count(DISTINCT namespace) as shared_projects
+        FROM cte_tag_matched
+        GROUP BY azure_id
+    )
+    SELECT tm.*,
+        tm.pretax_cost / shared.shared_projects as project_cost,
+        shared.shared_projects
+    FROM cte_tag_matched AS tm
+    JOIN cte_number_of_shared AS shared
+        ON tm.azure_id = shared.azure_id
+;
+
+-- Next we match where the azure tag is the special openshift_node key
+-- and the value matches an OpenShift node name
+INSERT INTO hive.{{schema | sqlsafe}}.__reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}}
+    WITH cte_tag_matched AS (
+        SELECT cast(ocp.uuid AS VARCHAR) AS ocp_id,
+            ocp.report_period_id,
+            ocp.cluster_id,
+            ocp.cluster_alias,
+            ocp.namespace,
+            ocp.node,
+            ocp.persistentvolumeclaim,
+            ocp.persistentvolume,
+            ocp.storageclass,
+            ocp.persistentvolumeclaim_capacity_gigabyte,
+            ocp.persistentvolumeclaim_capacity_gigabyte_months,
+            ocp.volume_request_storage_gigabyte_months,
+            ocp.persistentvolumeclaim_usage_gigabyte_months,
+            json_format(ocp.volume_labels) as volume_labels,
+            azure.cost_entry_bill_id,
+            azure.line_item_id as azure_id,
+            azure.usage_date,
+            azure.subscription_guid,
+            azure.instance_type,
+            azure.service_name,
+            azure.resource_location,
+            azure.resource_id,
+            azure.usage_quantity,
+            azure.pretax_cost,
+            azure.currency,
+            azure.unit_of_measure,
+            azure.tags
+        FROM hive.{{schema | sqlsafe}}.__reporting_azure_special_case_tags_{{uuid | sqlsafe}} as azure
+        JOIN postgres.{{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary as ocp
+            ON json_extract_scalar(azure.lower_tags, '$.openshift_node') = lower(ocp.node)
+                AND azure.usage_date = ocp.usage_start
+        -- ANTI JOIN to remove rows that already matched
+        LEFT JOIN hive.{{schema | sqlsafe}}.__reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} AS ulid
+            ON ulid.azure_id = azure.line_item_id
+        LEFT JOIN hive.{{schema | sqlsafe}}.__reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} AS rm
+            ON rm.azure_id = azure.line_item_id
+        WHERE ocp.source_uuid = UUID '{{ocp_source_uuid | sqlsafe}}'
+            AND ocp.data_source = 'Storage'
+            AND ulid.azure_id IS NULL
+            AND rm.azure_id IS NULL
+    ),
+    cte_number_of_shared AS (
+        SELECT azure_id,
+            count(DISTINCT namespace) as shared_projects
+        FROM cte_tag_matched
+        GROUP BY azure_id
+    )
+    SELECT tm.*,
+        tm.pretax_cost / shared.shared_projects as project_cost,
+        shared.shared_projects
+    FROM cte_tag_matched AS tm
+    JOIN cte_number_of_shared AS shared
+        ON tm.azure_id = shared.azure_id
+;
+
+-- Next we match where the azure tag is the special openshift_cluster key
+-- and the value matches an OpenShift cluster name
+ INSERT INTO hive.{{schema | sqlsafe}}.__reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}}
+    WITH cte_tag_matched AS (
+        SELECT cast(ocp.uuid AS VARCHAR) AS ocp_id,
+            ocp.report_period_id,
+            ocp.cluster_id,
+            ocp.cluster_alias,
+            ocp.namespace,
+            ocp.node,
+            ocp.persistentvolumeclaim,
+            ocp.persistentvolume,
+            ocp.storageclass,
+            ocp.persistentvolumeclaim_capacity_gigabyte,
+            ocp.persistentvolumeclaim_capacity_gigabyte_months,
+            ocp.volume_request_storage_gigabyte_months,
+            ocp.persistentvolumeclaim_usage_gigabyte_months,
+            json_format(ocp.volume_labels) as volume_labels,
+            azure.cost_entry_bill_id,
+            azure.line_item_id as azure_id,
+            azure.usage_date,
+            azure.subscription_guid,
+            azure.instance_type,
+            azure.service_name,
+            azure.resource_location,
+            azure.resource_id,
+            azure.usage_quantity,
+            azure.pretax_cost,
+            azure.currency,
+            azure.unit_of_measure,
+            azure.tags
+        FROM hive.{{schema | sqlsafe}}.__reporting_azure_special_case_tags_{{uuid | sqlsafe}} as azure
+        JOIN postgres.{{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary as ocp
+            ON json_extract_scalar(azure.lower_tags, '$.openshift_cluster') IN (lower(ocp.cluster_id), lower(ocp.cluster_alias))
+                AND azure.usage_date = ocp.usage_start
+        -- ANTI JOIN to remove rows that already matched
+        LEFT JOIN hive.{{schema | sqlsafe}}.__reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} AS ulid
+            ON ulid.azure_id = azure.line_item_id
+        LEFT JOIN hive.{{schema | sqlsafe}}.__reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} AS rm
+            ON rm.azure_id = azure.line_item_id
+        WHERE ocp.source_uuid = UUID '{{ocp_source_uuid | sqlsafe}}'
+            AND ocp.data_source = 'Storage'
+            AND ulid.azure_id IS NULL
+            AND rm.azure_id IS NULL
+    ),
+    cte_number_of_shared AS (
+        SELECT azure_id,
+            count(DISTINCT namespace) as shared_projects
+        FROM cte_tag_matched
+        GROUP BY azure_id
+    )
+    SELECT tm.*,
+        tm.pretax_cost / shared.shared_projects as project_cost,
+        shared.shared_projects
+    FROM cte_tag_matched AS tm
+    JOIN cte_number_of_shared AS shared
+        ON tm.azure_id = shared.azure_id
+
+;
+
+DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__reporting_azure_daily_{{uuid | sqlsafe}}
+;
+
+DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__reporting_azure_special_case_tags_{{uuid | sqlsafe}}
+;
+
+
+-- Then we match for OpenShift volume data where the volume label key and value
+-- and azure tag key and value match directly
+ INSERT INTO hive.{{schema | sqlsafe}}.__reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}}
+    WITH cte_tag_matched AS (
+        SELECT ocp.ocp_id,
+            ocp.report_period_id,
+            ocp.cluster_id,
+            ocp.cluster_alias,
+            ocp.namespace,
+            ocp.node,
+            ocp.persistentvolumeclaim,
+            ocp.persistentvolume,
+            ocp.storageclass,
+            ocp.persistentvolumeclaim_capacity_gigabyte,
+            ocp.persistentvolumeclaim_capacity_gigabyte_months,
+            ocp.volume_request_storage_gigabyte_months,
+            ocp.persistentvolumeclaim_usage_gigabyte_months,
+            ocp.volume_labels,
+            azure.cost_entry_bill_id,
+            azure.line_item_id as azure_id,
+            azure.usage_date,
+            azure.subscription_guid,
+            azure.instance_type,
+            azure.service_name,
+            azure.resource_location,
+            azure.resource_id,
+            azure.usage_quantity,
+            azure.pretax_cost,
+            azure.currency,
+            azure.unit_of_measure,
+            azure.tags
+        FROM hive.{{schema | sqlsafe}}.__reporting_azure_tags_{{uuid | sqlsafe}} as azure
+        JOIN hive.{{schema | sqlsafe}}.__reporting_ocp_storage_tags_{{uuid | sqlsafe}} as ocp
+            ON azure.usage_date = ocp.usage_start
+                AND strpos(azure.lower_tags, ocp.tag) != 0
+        -- ANTI JOIN to remove rows that already matched
+        LEFT JOIN hive.{{schema | sqlsafe}}.__reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} AS rm
+            ON rm.azure_id = azure.line_item_id
+        WHERE rm.azure_id IS NULL
+    ),
+    cte_number_of_shared AS (
+        SELECT azure_id,
+            count(DISTINCT namespace) as shared_projects
+        FROM cte_tag_matched
+        GROUP BY azure_id
+    )
+    SELECT tm.*,
+        tm.pretax_cost / shared.shared_projects as project_cost,
+        shared.shared_projects
+    FROM cte_tag_matched AS tm
+    JOIN cte_number_of_shared AS shared
+        ON tm.azure_id = shared.azure_id
+;
+
+DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__reporting_ocp_storage_tags_{{uuid | sqlsafe}}
+;
+
+DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__reporting_azure_tags_{{uuid | sqlsafe}}
+;
+
+
+-- The full summary data for Openshift pod<->azure and
+-- Openshift volume<->azure matches are UNIONed together
+-- with a GROUP BY using the azure ID to deduplicate
+-- the azure data. This should ensure that we never double count
+-- azure cost or usage.
+DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__reporting_ocpazurecostlineitem_daily_summary_{{uuid | sqlsafe}};
+CREATE TABLE hive.{{schema | sqlsafe}}.__reporting_ocpazurecostlineitem_daily_summary_{{uuid | sqlsafe}} AS (
+    WITH cte_pod_project_cost AS (
+        SELECT pc.azure_id,
+            map_agg(pc.namespace, pc.project_cost) as project_costs
+            FROM (
+                SELECT li.azure_id,
+                    li.namespace,
+                    sum(project_cost) as project_cost
+                FROM hive.{{schema | sqlsafe}}.__reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} as li
+                GROUP BY li.azure_id, li.namespace
+            ) AS pc
+        GROUP BY pc.azure_id
+    ),
+    cte_storage_project_cost AS (
+        SELECT pc.azure_id,
+            map_agg(pc.namespace, pc.project_cost) as project_costs
+        FROM (
+            SELECT li.azure_id,
+                li.namespace,
+                sum(project_cost) as project_cost
+            FROM hive.{{schema | sqlsafe}}.__reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} as li
+            GROUP BY li.azure_id, li.namespace
+        ) AS pc
+        GROUP BY pc.azure_id
+    )
+    SELECT max(li.report_period_id) as report_period_id,
+        max(li.cluster_id) as cluster_id,
+        max(li.cluster_alias) as cluster_alias,
+        array_agg(DISTINCT li.namespace) as namespace,
+        max(li.node) as node,
+        max(li.resource_id) as resource_id,
+        max(li.usage_date) as usage_start,
+        max(li.usage_date) as usage_end,
+        max(li.cost_entry_bill_id) as cost_entry_bill_id,
+        max(li.subscription_guid) as subscription_guid,
+        max(li.service_name) as service_name,
+        max(li.instance_type) as instance_type,
+        max(li.resource_location) as resource_location,
+        max(li.currency) as currency,
+        max(li.unit_of_measure) as unit_of_measure,
+        li.tags,
+        max(li.usage_quantity) as usage_quantity,
+        max(li.pretax_cost) as pretax_cost,
+        max(li.pretax_cost) * cast({{markup}} as decimal(24,9)) as markup_cost,
+        max(li.shared_projects) as shared_projects,
+        pc.project_costs as project_costs,
+        '{{azure_source_uuid | sqlsafe}}' as source_uuid
+    FROM hive.{{schema | sqlsafe}}.__reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} as li
+    JOIN cte_pod_project_cost as pc
+        ON li.azure_id = pc.azure_id
+    -- Dedup on azure line item so we never double count usage or cost
+    GROUP BY li.azure_id, li.tags, pc.project_costs
+
+    UNION
+
+    SELECT max(li.report_period_id) as report_period_id,
+        max(li.cluster_id) as cluster_id,
+        max(li.cluster_alias) as cluster_alias,
+        array_agg(DISTINCT li.namespace) as namespace,
+        max(li.node) as node,
+        max(li.resource_id) as resource_id,
+        max(li.usage_date) as usage_start,
+        max(li.usage_date) as usage_end,
+        max(li.cost_entry_bill_id) as cost_entry_bill_id,
+        max(li.subscription_guid) as subscription_guid,
+        max(li.service_name) as service_name,
+        max(li.instance_type) as instance_type,
+        max(li.resource_location) as resource_location,
+        max(li.currency) as currency,
+        max(li.unit_of_measure) as unit_of_measure,
+        li.tags,
+        max(li.usage_quantity) as usage_quantity,
+        max(li.pretax_cost) as pretax_cost,
+        max(li.pretax_cost) * cast({{markup}} as decimal(24,9)) as markup_cost,
+        max(li.shared_projects) as shared_projects,
+        pc.project_costs as project_costs,
+        '{{azure_source_uuid | sqlsafe}}' as source_uuid
+    FROM hive.{{schema | sqlsafe}}.__reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} AS li
+    JOIN cte_storage_project_cost AS pc
+        ON li.azure_id = pc.azure_id
+    LEFT JOIN hive.{{schema | sqlsafe}}.__reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} AS ulid
+        ON ulid.azure_id = li.azure_id
+        AND ulid.azure_id IS NULL
+    GROUP BY li.azure_id, li.tags, pc.project_costs
+)
+;
+
+-- The full summary data for Openshift pod<->azure and
+-- Openshift volume<->azure matches are UNIONed together
+-- with a GROUP BY using the OCP ID to deduplicate
+-- based on OpenShift data. This is effectively the same table
+-- as reporting_ocpazurecostlineitem_daily_summary but from the OpenShift
+-- point of view. Here usage and cost are divided by the
+-- number of pods sharing the cost so the values turn out the
+-- same when reported.
+DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__reporting_ocpazurecostlineitem_project_daily_summary_{{uuid | sqlsafe}};
+CREATE TABLE hive.{{schema | sqlsafe}}.__reporting_ocpazurecostlineitem_project_daily_summary_{{uuid | sqlsafe}} AS (
+    SELECT li.report_period_id,
+        li.cluster_id,
+        li.cluster_alias,
+        'Pod' as data_source,
+        li.namespace,
+        li.node,
+        li.pod_labels,
+        max(li.resource_id) as resource_id,
+        max(li.usage_date) as usage_start,
+        max(li.usage_date) as usage_end,
+        max(li.cost_entry_bill_id) as cost_entry_bill_id,
+        max(li.subscription_guid) as subscription_guid,
+        max(li.service_name) as service_name,
+        max(li.instance_type) as instance_type,
+        max(li.resource_location) as resource_location,
+        max(li.currency) as currency,
+        max(li.unit_of_measure) as unit_of_measure,
+        li.tags,
+        sum(li.usage_quantity / li.shared_projects) as usage_quantity,
+        sum(li.pretax_cost / li.shared_projects) as pretax_cost,
+        sum(li.pretax_cost / li.shared_projects) * cast({{markup}} as decimal(24,9)) as markup_cost,
+        max(li.shared_projects) as shared_projects,
+        li.project_cost,
+        li.project_cost * cast({{markup}} as decimal(24,9)) as project_markup_cost,
+        '{{azure_source_uuid | sqlsafe}}' as source_uuid
+    FROM hive.{{schema | sqlsafe}}.__reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} as li
+    -- Grouping by OCP this time for the by project view
+    GROUP BY li.report_period_id,
+        li.ocp_id,
+        li.cluster_id,
+        li.cluster_alias,
+        li.namespace,
+        li.node,
+        li.pod_labels,
+        li.project_cost,
+        li.tags
+
+    UNION
+
+    SELECT li.report_period_id,
+        li.cluster_id,
+        li.cluster_alias,
+        'Storage' as data_source,
+        li.namespace,
+        li.node,
+        li.volume_labels as pod_labels,
+        max(li.resource_id) as resource_id,
+        max(li.usage_date) as usage_start,
+        max(li.usage_date) as usage_end,
+        max(li.cost_entry_bill_id) as cost_entry_bill_id,
+        max(li.subscription_guid) as subscription_guid,
+        max(li.service_name) as service_name,
+        max(li.instance_type) as instance_type,
+        max(li.resource_location) as resource_location,
+        max(li.currency) as currency,
+        max(li.unit_of_measure) as unit_of_measure,
+        li.tags,
+        sum(li.usage_quantity / li.shared_projects) as usage_quantity,
+        sum(li.pretax_cost / li.shared_projects) as pretax_cost,
+        sum(li.pretax_cost / li.shared_projects) * cast({{markup}} as decimal(24,9)) as markup_cost,
+        max(li.shared_projects) as shared_projects,
+        li.project_cost,
+        li.project_cost * cast({{markup}} as decimal(24,9)) as project_markup_cost,
+        '{{azure_source_uuid | sqlsafe}}' as source_uuid
+    FROM hive.{{schema | sqlsafe}}.__reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} AS li
+    LEFT JOIN hive.{{schema | sqlsafe}}.__reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} AS ulid
+        ON ulid.azure_id = li.azure_id
+    WHERE ulid.azure_id IS NULL
+    GROUP BY li.ocp_id,
+        li.report_period_id,
+        li.cluster_id,
+        li.cluster_alias,
+        li.namespace,
+        li.node,
+        li.volume_labels,
+        li.project_cost,
+        li.tags
+)
+;
+
+DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}};
+
+DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}};
+
+
+-- Clear out old entries first
+INSERT
+  INTO postgres.{{schema | sqlsafe}}.presto_delete_wrapper_log
+       (
+           id,
+           action_ts,
+           table_name,
+           where_clause,
+           result_rows
+       )
+VALUES (
+    uuid(),
+    now(),
+    'reporting_ocpazurecostlineitem_daily_summary',
+    'WHERE usage_start >= '{{start_date}}'::date ' ||
+      'AND usage_start <= '{{end_date}}'::date ' ||
+      'AND cluster_id = '{{cluster_id}}' ' ||
+      'AND cost_entry_bill_id = {{bill_id}} ',
+    null
+)
+;
+
+-- Populate the daily aggregate line item data
+INSERT INTO postgres.{{schema | sqlsafe}}.reporting_ocpazurecostlineitem_daily_summary (
+    uuid,
+    report_period_id,
+    cluster_id,
+    cluster_alias,
+    namespace,
+    node,
+    resource_id,
+    usage_start,
+    usage_end,
+    cost_entry_bill_id,
+    subscription_guid,
+    instance_type,
+    service_name,
+    resource_location,
+    tags,
+    usage_quantity,
+    pretax_cost,
+    markup_cost,
+    currency,
+    unit_of_measure,
+    shared_projects,
+    project_costs,
+    source_uuid
+)
+    SELECT uuid(),
+        report_period_id,
+        cluster_id,
+        cluster_alias,
+        namespace,
+        node,
+        resource_id,
+        usage_start,
+        usage_end,
+        cost_entry_bill_id,
+        subscription_guid,
+        instance_type,
+        service_name,
+        resource_location,
+        json_parse(tags),
+        cast(usage_quantity AS decimal(24,9)),
+        cast(pretax_cost AS decimal(30,15)),
+        cast(markup_cost  AS decimal(30,15)),
+        currency,
+        unit_of_measure,
+        shared_projects,
+        cast(project_costs AS JSON),
+        cast(source_uuid AS UUID)
+    FROM hive.{{schema | sqlsafe}}.__reporting_ocpazurecostlineitem_daily_summary_{{uuid | sqlsafe}}
+;
+
+DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__reporting_ocpazurecostlineitem_daily_summary_{{uuid | sqlsafe}};
+
+-- Clear out old entries first
+INSERT
+  INTO postgres.{{schema | sqlsafe}}.presto_delete_wrapper_log
+       (
+           id,
+           action_ts,
+           table_name,
+           where_clause,
+           result_rows
+       )
+VALUES (
+    uuid(),
+    now(),
+    'reporting_ocpazurecostlineitem_project_daily_summary',
+    'where usage_start >= '{{start_date}}'::date ' ||
+      'and usage_start <= '{{end_date}}'::date ' ||
+      'and cluster_id = '{{cluster_id}}' ' ||
+      'and cost_entry_bill_id = {{bill_id}} ',
+    null
+)
+;
+
+INSERT INTO postgres.{{schema | sqlsafe}}.reporting_ocpazurecostlineitem_project_daily_summary (
+    uuid,
+    report_period_id,
+    cluster_id,
+    cluster_alias,
+    data_source,
+    namespace,
+    node,
+    pod_labels,
+    resource_id,
+    usage_start,
+    usage_end,
+    cost_entry_bill_id,
+    subscription_guid,
+    instance_type,
+    service_name,
+    resource_location,
+    usage_quantity,
+    pretax_cost,
+    markup_cost,
+    currency,
+    unit_of_measure,
+    pod_cost,
+    project_markup_cost,
+    source_uuid
+)
+    SELECT uuid(),
+        report_period_id,
+        cluster_id,
+        cluster_alias,
+        data_source,
+        namespace,
+        node,
+        json_parse(pod_labels),
+        resource_id,
+        usage_start,
+        usage_end,
+        cost_entry_bill_id,
+        subscription_guid,
+        instance_type,
+        service_name,
+        resource_location,
+        cast(usage_quantity AS decimal(24,9)),
+        cast(pretax_cost AS decimal(30,15)),
+        cast(markup_cost AS decimal(30,15)),
+        currency,
+        unit_of_measure,
+        cast(project_cost AS decimal(30,15)),
+        cast(project_markup_cost AS decimal(30,15)),
+        cast(source_uuid as UUID)
+    FROM hive.{{schema | sqlsafe}}.__reporting_ocpazurecostlineitem_project_daily_summary_{{uuid | sqlsafe}}
+;
+
+DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__reporting_ocpazurecostlineitem_project_daily_summary_{{uuid | sqlsafe}};
diff --git a/koku/masu/database/presto_sql/reporting_ocpusagelineitem_daily_summary.sql b/koku/masu/database/presto_sql/reporting_ocpusagelineitem_daily_summary.sql
new file mode 100644
index 0000000000..396bc1664b
--- /dev/null
+++ b/koku/masu/database/presto_sql/reporting_ocpusagelineitem_daily_summary.sql
@@ -0,0 +1,387 @@
+/*
+ * Process OCP Usage Data Processing SQL
+ * This SQL will utilize Presto for the raw line-item data aggregating
+ * and store the results into the koku database summary tables.
+ */
+
+-- Using the convention of a double-underscore prefix to denote a temp table.
+
+/*
+ * ====================================
+ *               COMMON
+ * ====================================
+ */
+
+-- node label line items by day presto sql
+-- still using a temp table here because there is no guarantee how big this might get
+DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__ocp_node_label_line_item_daily_{{uuid | sqlsafe}};
+CREATE TABLE hive.{{schema | sqlsafe}}.__ocp_node_label_line_item_daily_{{uuid | sqlsafe}} AS (
+    SELECT date(nli.interval_start) as usage_start,
+        nli.node,
+        nli.node_labels
+    FROM hive.{{schema | sqlsafe}}.openshift_node_labels_line_items AS nli
+    WHERE nli.source = {{source}}
+       AND nli.year = {{year}}
+       AND nli.month = {{month}}
+       AND nli.interval_start >= TIMESTAMP {{start_date}}
+       AND nli.interval_start < date_add('day', 1, TIMESTAMP {{end_date}})
+    GROUP BY date(nli.interval_start),
+        nli.node,
+        nli.node_labels
+)
+;
+
+-- namespace label line items by day presto sql
+-- still using a temp table here because there is no guarantee how big this might get
+DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__ocp_namespace_label_line_item_daily_{{uuid | sqlsafe}};
+CREATE TABLE hive.{{schema | sqlsafe}}.__ocp_namespace_label_line_item_daily_{{uuid | sqlsafe}} AS (
+    SELECT date(nli.interval_start) as usage_start,
+        nli.namespace,
+        nli.namespace_labels
+    FROM hive.{{schema | sqlsafe}}.openshift_namespace_labels_line_items AS nli
+    WHERE nli.source = {{source}}
+       AND nli.year = {{year}}
+       AND nli.month = {{month}}
+       AND nli.interval_start >= TIMESTAMP {{start_date}}
+       AND nli.interval_start < date_add('day', 1, TIMESTAMP {{end_date}})
+    GROUP BY date(nli.interval_start),
+        nli.namespace,
+        nli.namespace_labels
+)
+;
+
+-- Daily sum of cluster CPU and memory capacity
+DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__ocp_cluster_capacity_{{uuid | sqlsafe}};
+CREATE TABLE hive.{{schema | sqlsafe}}.__ocp_cluster_capacity_{{uuid | sqlsafe}} as (
+    SELECT date(cc.interval_start) as usage_start,
+        sum(cc.max_cluster_capacity_cpu_core_seconds) as cluster_capacity_cpu_core_seconds,
+        sum(cc.max_cluster_capacity_memory_byte_seconds) as cluster_capacity_memory_byte_seconds
+    FROM (
+        SELECT li.interval_start,
+            li.node,
+            max(li.node_capacity_cpu_core_seconds) as max_cluster_capacity_cpu_core_seconds,
+            max(li.node_capacity_memory_byte_seconds) as max_cluster_capacity_memory_byte_seconds
+        FROM hive.{{schema | sqlsafe}}.openshift_pod_usage_line_items AS li
+        WHERE li.source = {{source}}
+            AND li.year = {{year}}
+            AND li.month = {{month}}
+            AND li.interval_start >= TIMESTAMP {{start_date}}
+            AND li.interval_start < date_add('day', 1, TIMESTAMP {{end_date}})
+        GROUP BY li.interval_start,
+            li.node
+    ) as cc
+    GROUP BY date(cc.interval_start)
+)
+;
+
+/*
+ * ====================================
+ *                POD
+ * ====================================
+ */
+
+/*
+ * Delete the old block of data (if any) based on the usage range
+ * Inserting a record in this log will trigger a delete against the specified table
+ * in the same schema as the log table with the specified where_clause
+ * start_date and end_date MUST be strings in order for this to work properly.
+ */
+INSERT INTO postgres.{{schema | sqlsafe}}.presto_delete_wrapper_log (
+    id,
+    action_ts,
+    table_name,
+    where_clause,
+    result_rows
+)
+VALUES (
+    uuid(),
+    now(),
+    'reporting_ocpusagelineitem_daily_summary',
+    'where usage_start >= '{{start_date}}'::date ' ||
+        'and usage_start <= '{{end_date}}'::date ' ||
+        'and cluster_id = '{{cluster_id}}' ' ||
+        'and data_source = ''Pod''',
+    null
+)
+;
+
+/*
+ * This is the target summarization sql for POD usage
+ * It combines the prior daily summarization query with the final summarization query
+ * by use of MAP_FILTER to filter the combined node line item labels as well as
+ * the line-item pod labels against the postgres enabled keys in the same query
+ */
+INSERT INTO postgres.{{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary (
+    uuid,
+    report_period_id,
+    cluster_id,
+    cluster_alias,
+    data_source,
+    usage_start,
+    usage_end,
+    namespace,
+    node,
+    resource_id,
+    pod_labels,
+    pod_usage_cpu_core_hours,
+    pod_request_cpu_core_hours,
+    pod_limit_cpu_core_hours,
+    pod_usage_memory_gigabyte_hours,
+    pod_request_memory_gigabyte_hours,
+    pod_limit_memory_gigabyte_hours,
+    node_capacity_cpu_cores,
+    node_capacity_cpu_core_hours,
+    node_capacity_memory_gigabytes,
+    node_capacity_memory_gigabyte_hours,
+    cluster_capacity_cpu_core_hours,
+    cluster_capacity_memory_gigabyte_hours,
+    source_uuid,
+    infrastructure_usage_cost
+)
+SELECT uuid() as uuid,
+    {{report_period_id}} as report_period_id,
+    {{cluster_id}} as cluster_id,
+    {{cluster_alias}} as cluster_alias,
+    'Pod' as data_source,
+    pua.usage_start,
+    pua.usage_start as usage_end,
+    pua.namespace,
+    pua.node,
+    pua.resource_id,
+    cast(pua.pod_labels as json) as pod_labels,
+    pua.pod_usage_cpu_core_hours,
+    pua.pod_request_cpu_core_hours,
+    pua.pod_limit_cpu_core_hours,
+    pua.pod_usage_memory_gigabyte_hours,
+    pua.pod_request_memory_gigabyte_hours,
+    pua.pod_limit_memory_gigabyte_hours,
+    pua.node_capacity_cpu_cores,
+    pua.node_capacity_cpu_core_hours,
+    pua.node_capacity_memory_gigabytes,
+    pua.node_capacity_memory_gigabyte_hours,
+    pua.cluster_capacity_cpu_core_hours,
+    pua.cluster_capacity_memory_gigabyte_hours,
+    cast(pua.source_uuid as UUID) as source_uuid,
+    JSON '{"cpu": 0.000000000, "memory": 0.000000000, "storage": 0.000000000}' as infrastructure_usage_cost
+FROM (
+    SELECT date(li.interval_start) as usage_start,
+        li.namespace,
+        li.node,
+        li.source as source_uuid,
+        map_concat(
+            cast(json_parse(coalesce(nli.node_labels, '{}')) as map(varchar, varchar)),
+            cast(json_parse(coalesce(nsli.namespace_labels, '{}')) as map(varchar, varchar)),
+            cast(json_parse(li.pod_labels) as map(varchar, varchar))
+        ) as pod_labels,
+        max(li.resource_id) as resource_id,
+        sum(li.pod_usage_cpu_core_seconds) / 3600.0 as pod_usage_cpu_core_hours,
+        sum(li.pod_request_cpu_core_seconds) / 3600.0  as pod_request_cpu_core_hours,
+        sum(li.pod_limit_cpu_core_seconds) / 3600.0 as pod_limit_cpu_core_hours,
+        sum(li.pod_usage_memory_byte_seconds) / 3600.0 * power(2, -30) as pod_usage_memory_gigabyte_hours,
+        sum(li.pod_request_memory_byte_seconds) / 3600.0 * power(2, -30) as pod_request_memory_gigabyte_hours,
+        sum(li.pod_limit_memory_byte_seconds) / 3600.0 * power(2, -30) as pod_limit_memory_gigabyte_hours,
+        max(li.node_capacity_cpu_cores) as node_capacity_cpu_cores,
+        sum(li.node_capacity_cpu_core_seconds) / 3600.0 as node_capacity_cpu_core_hours,
+        max(li.node_capacity_memory_bytes) * power(2, -30) as node_capacity_memory_gigabytes,
+        sum(li.node_capacity_memory_byte_seconds) / 3600.0 * power(2, -30) as node_capacity_memory_gigabyte_hours,
+        max(cc.cluster_capacity_cpu_core_seconds) / 3600.0 as cluster_capacity_cpu_core_hours,
+        max(cc.cluster_capacity_memory_byte_seconds) / 3600.0 * power(2, -30) as cluster_capacity_memory_gigabyte_hours
+    FROM hive.{{schema | sqlsafe}}.openshift_pod_usage_line_items as li
+    LEFT JOIN hive.{{schema | sqlsafe}}.__ocp_node_label_line_item_daily_{{uuid | sqlsafe}} as nli
+        ON nli.node = li.node
+            AND nli.usage_start = date(li.interval_start)
+    LEFT JOIN hive.{{schema | sqlsafe}}.__ocp_namespace_label_line_item_daily_{{uuid | sqlsafe}} as nsli
+        ON nsli.namespace = li.namespace
+            AND nsli.usage_start = date(li.interval_start)
+    LEFT JOIN hive.{{schema | sqlsafe}}.__ocp_cluster_capacity_{{uuid | sqlsafe}} as cc
+        ON cc.usage_start = date(li.interval_start)
+    -- CROSS JOIN (
+    --     SELECT array_agg(distinct key) as enabled_keys
+    --     FROM postgres.{{schema | sqlsafe}}.reporting_ocpenabledtagkeys
+    -- ) as ek
+    WHERE li.source = {{source}}
+        AND li.year = {{year}}
+        AND li.month = {{month}}
+        AND li.interval_start >= TIMESTAMP {{start_date}}
+        AND li.interval_start < date_add('day', 1, TIMESTAMP {{end_date}})
+    GROUP BY date(li.interval_start),
+        li.namespace,
+        li.node,
+        li.source,
+        5  /* THIS ORDINAL MUST BE KEPT IN SYNC WITH THE map_filter EXPRESSION */
+            /* The map_filter expression was too complex for presto to use */
+) as pua
+;
+
+
+/*
+ * ====================================
+ *            STORAGE
+ * ====================================
+ */
+
+
+-- Determine which node a PVC is running on
+DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__volume_nodes_{{uuid | sqlsafe}};
+CREATE TABLE hive.{{schema | sqlsafe}}.__volume_nodes_{{uuid | sqlsafe}} as (
+    SELECT date(sli.interval_start) as usage_start,
+        sli.persistentvolumeclaim,
+        max(uli.node) as node
+    FROM hive.{{schema | sqlsafe}}.openshift_storage_usage_line_items as sli
+    JOIN hive.{{schema | sqlsafe}}.openshift_pod_usage_line_items as uli
+        ON uli.source = sli.source
+            AND uli.namespace = sli.namespace
+            AND uli.pod = sli.pod
+            AND date(uli.interval_start) = date(sli.interval_start)
+     WHERE sli.source = {{source}}
+        AND sli.year = {{year}}
+        AND sli.month = {{month}}
+        AND sli.interval_start >= TIMESTAMP {{start_date}}
+        AND sli.interval_start < date_add('day', 1, TIMESTAMP {{end_date}})
+        AND uli.source = {{source}}
+        AND uli.year = {{year}}
+        AND uli.month = {{month}}
+        -- AND uli.interval_start >= TIMESTAMP {{start_date}}
+        -- AND uli.interval_start < date_add('day', 1, TIMESTAMP {{end_date}})
+     GROUP BY date(sli.interval_start),
+          sli.persistentvolumeclaim
+)
+;
+
+/*
+ * Delete the old block of data (if any) based on the usage range
+ * Inserting a record in this log will trigger a delete against the specified table
+ * in the same schema as the log table with the specified where_clause
+ * start_date and end_date MUST be strings in order for this to work properly.
+ */
+INSERT INTO postgres.{{schema | sqlsafe}}.presto_delete_wrapper_log (
+    id,
+    action_ts,
+    table_name,
+    where_clause,
+    result_rows
+)
+VALUES (
+    uuid(),
+    now(),
+    'reporting_ocpusagelineitem_daily_summary',
+    'where usage_start >= '{{start_date}}'::date ' ||
+        'and usage_start <= '{{end_date}}'::date ' ||
+        'and cluster_id = '{{cluster_id}}' ' ||
+        'and data_source = ''Storage''',
+    null
+)
+;
+
+/*
+ * This is the target summarization sql for STORAGE usage
+ * It combines the prior daily summarization query with the final summarization query
+ * by use of MAP_FILTER to filter the combined node line item labels as well as
+ * the line-item pod labels against the postgres enabled keys in the same query
+ */
+INSERT INTO postgres.{{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary (
+    uuid,
+    report_period_id,
+    cluster_id,
+    cluster_alias,
+    data_source,
+    namespace,
+    node,
+    persistentvolumeclaim,
+    persistentvolume,
+    storageclass,
+    usage_start,
+    usage_end,
+    volume_labels,
+    source_uuid,
+    persistentvolumeclaim_capacity_gigabyte,
+    persistentvolumeclaim_capacity_gigabyte_months,
+    volume_request_storage_gigabyte_months,
+    persistentvolumeclaim_usage_gigabyte_months
+)
+SELECT uuid() as uuid,
+    {{report_period_id}} as report_period_id,
+    {{cluster_id}} as cluster_id,
+    {{cluster_alias}} as cluster_alias,
+    'Storage' as data_source,
+    sua.namespace,
+    sua.node,
+    sua.persistentvolumeclaim,
+    sua.persistentvolume,
+    sua.storageclass,
+    sua.usage_start,
+    sua.usage_start as usage_end,
+    cast(sua.volume_labels as json) as volume_labels,
+    cast(sua.source_uuid as UUID) as source_uuid,
+    (sua.persistentvolumeclaim_capacity_bytes *
+          power(2, -30)) as persistentvolumeclaim_capacity_gigibytes,
+    (sua.persistentvolumeclaim_capacity_byte_seconds /
+          86400 *
+          cast(extract(day from last_day_of_month(date(sua.usage_start))) as integer) *
+          power(2, -30)) as persistentvolumeclaim_capacity_gigabyte_months,
+    (sua.volume_request_storage_byte_seconds /
+          86400 *
+          cast(extract(day from last_day_of_month(date(sua.usage_start))) as integer) *
+          power(2, -30)) as volume_request_storage_gigabyte_months,
+    (sua.persistentvolumeclaim_usage_byte_seconds /
+          86400 *
+          cast(extract(day from last_day_of_month(date(sua.usage_start))) as integer) *
+          power(2, -30)) as persistentvolumeclaim_usage_byte_months
+FROM (
+    SELECT sli.namespace,
+        vn.node,
+        sli.persistentvolumeclaim,
+        sli.persistentvolume,
+        sli.storageclass,
+        date(sli.interval_start) as usage_start,
+        map_concat(
+            cast(json_parse(coalesce(nli.node_labels, '{}')) as map(varchar, varchar)),
+            cast(json_parse(coalesce(nsli.namespace_labels, '{}')) as map(varchar, varchar)),
+            cast(json_parse(sli.persistentvolume_labels) as map(varchar, varchar)),
+            cast(json_parse(sli.persistentvolumeclaim_labels) as map(varchar, varchar))
+        ) as volume_labels,
+        sli.source as source_uuid,
+        max(sli.persistentvolumeclaim_capacity_bytes) as persistentvolumeclaim_capacity_bytes,
+        sum(sli.persistentvolumeclaim_capacity_byte_seconds) as persistentvolumeclaim_capacity_byte_seconds,
+        sum(sli.volume_request_storage_byte_seconds) as volume_request_storage_byte_seconds,
+        sum(sli.persistentvolumeclaim_usage_byte_seconds) as persistentvolumeclaim_usage_byte_seconds
+      FROM hive.{{schema | sqlsafe}}.openshift_storage_usage_line_items sli
+      LEFT JOIN hive.{{schema | sqlsafe}}.__volume_nodes_{{uuid | sqlsafe}} as vn
+          ON vn.usage_start = date(sli.interval_start)
+              AND vn.persistentvolumeclaim = sli.persistentvolumeclaim
+      LEFT JOIN hive.{{schema | sqlsafe}}.__ocp_node_label_line_item_daily_{{uuid | sqlsafe}} as nli
+          ON nli.node = vn.node
+            AND nli.usage_start = vn.usage_start
+      LEFT JOIN hive.{{schema | sqlsafe}}.__ocp_namespace_label_line_item_daily_{{uuid | sqlsafe}} as nsli
+          ON nsli.namespace = sli.namespace
+              AND nsli.usage_start = date(sli.interval_start)
+    WHERE sli.source = {{source}}
+        AND sli.year = {{year}}
+        AND sli.month = {{month}}
+        AND sli.interval_start >= TIMESTAMP {{start_date}}
+        AND sli.interval_start < date_add('day', 1, TIMESTAMP {{end_date}})
+    GROUP BY sli.namespace,
+        vn.node,
+        sli.persistentvolumeclaim,
+        sli.persistentvolume,
+        sli.storageclass,
+        date(sli.interval_start),
+        7,  /* THIS ORDINAL MUST BE KEPT IN SYNC WITH THE map_filter EXPRESSION */
+            /* The map_filter expression was too complex for presto to use */
+        sli.source
+) as sua
+;
+
+
+/*
+ * ====================================
+ *               CLEANUP
+ * ====================================
+ */
+
+DELETE FROM hive.{{schema | sqlsafe}}.__ocp_node_label_line_item_daily_{{uuid | sqlsafe}};
+DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__ocp_node_label_line_item_daily_{{uuid | sqlsafe}};
+DELETE FROM hive.{{schema | sqlsafe}}.__ocp_cluster_capacity_{{uuid | sqlsafe}};
+DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__ocp_cluster_capacity_{{uuid | sqlsafe}};
+DELETE FROM hive.{{schema | sqlsafe}}.__volume_nodes_{{uuid | sqlsafe}};
+DROP TABLE IF EXISTS hive.{{schema | sqlsafe}}.__volume_nodes_{{uuid | sqlsafe}};
diff --git a/koku/masu/database/sql/reporting_ocpazurecostlineitem_daily_summary.sql b/koku/masu/database/sql/reporting_ocpazurecostlineitem_daily_summary.sql
index 8f0ec2d6fb..2ec90f4654 100644
--- a/koku/masu/database/sql/reporting_ocpazurecostlineitem_daily_summary.sql
+++ b/koku/masu/database/sql/reporting_ocpazurecostlineitem_daily_summary.sql
@@ -226,12 +226,13 @@ CREATE TEMPORARY TABLE reporting_azure_special_case_tags_{{uuid | sqlsafe}} AS (
 CREATE TEMPORARY TABLE reporting_ocp_storage_tags_{{uuid | sqlsafe}} AS (
     SELECT ocp.*,
         lower(tag.tag::text)::jsonb as tag
-    FROM {{schema | sqlsafe}}.reporting_ocpstoragelineitem_daily as ocp
+    FROM {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary as ocp
     JOIN matched_tags_{{uuid | sqlsafe}} AS tag
         ON ocp.report_period_id = tag.report_period_id
-            AND ocp.persistentvolumeclaim_labels @> tag.tag
+            AND ocp.volume_labels @> tag.tag
     WHERE ocp.usage_start >= {{start_date}}::date
         AND ocp.usage_start <= {{end_date}}::date
+        AND ocp.data_source = 'Storage'
         --ocp_where_clause
         {% if cluster_id %}
         AND cluster_id = {{cluster_id}}
@@ -242,12 +243,13 @@ CREATE TEMPORARY TABLE reporting_ocp_storage_tags_{{uuid | sqlsafe}} AS (
 CREATE TEMPORARY TABLE reporting_ocp_pod_tags_{{uuid | sqlsafe}} AS (
     SELECT ocp.*,
         lower(tag.tag::text)::jsonb as tag
-    FROM {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily as ocp
+    FROM {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary as ocp
     JOIN matched_tags_{{uuid | sqlsafe}} AS tag
         ON ocp.report_period_id = tag.report_period_id
             AND ocp.pod_labels @> tag.tag
     WHERE ocp.usage_start >= {{start_date}}::date
         AND ocp.usage_start <= {{end_date}}::date
+        AND ocp.data_source = 'Pod'
         --ocp_where_clause
         {% if cluster_id %}
         AND cluster_id = {{cluster_id}}
@@ -264,25 +266,24 @@ DROP TABLE matched_tags_{{uuid | sqlsafe}};
 -- resource id match. This usually means OCP node -> Azure Virutal Machine.
 CREATE TEMPORARY TABLE reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} AS (
     WITH cte_resource_id_matched AS (
-        SELECT ocp.id AS ocp_id,
+        SELECT ocp.uuid AS ocp_id,
             ocp.report_period_id,
             ocp.cluster_id,
             ocp.cluster_alias,
             ocp.namespace,
-            ocp.pod,
             ocp.node,
             ocp.pod_labels,
-            ocp.pod_usage_cpu_core_seconds,
-            ocp.pod_request_cpu_core_seconds,
-            ocp.pod_limit_cpu_core_seconds,
-            ocp.pod_usage_memory_byte_seconds,
-            ocp.pod_request_memory_byte_seconds,
+            ocp.pod_usage_cpu_core_hours,
+            ocp.pod_request_cpu_core_hours,
+            ocp.pod_limit_cpu_core_hours,
+            ocp.pod_usage_memory_gigabyte_hours,
+            ocp.pod_request_memory_gigabyte_hours,
             ocp.node_capacity_cpu_cores,
-            ocp.node_capacity_cpu_core_seconds,
-            ocp.node_capacity_memory_bytes,
-            ocp.node_capacity_memory_byte_seconds,
-            ocp.cluster_capacity_cpu_core_seconds,
-            ocp.cluster_capacity_memory_byte_seconds,
+            ocp.node_capacity_cpu_core_hours,
+            ocp.node_capacity_memory_gigabytes,
+            ocp.node_capacity_memory_gigabyte_hours,
+            ocp.cluster_capacity_cpu_core_hours,
+            ocp.cluster_capacity_memory_gigabyte_hours,
             azure.id AS azure_id,
             azure.cost_entry_bill_id,
             azure.cost_entry_product_id,
@@ -295,7 +296,7 @@ CREATE TEMPORARY TABLE reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}}
         FROM reporting_azure_with_enabled_tags_{{uuid | sqlsafe}} as azure
         JOIN {{schema | sqlsafe}}.reporting_azurecostentryproductservice as aps
             ON azure.cost_entry_product_id = aps.id
-        JOIN {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily as ocp
+        JOIN {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary as ocp
             -- NOTE: We would normally use ocp.resource_id
             -- For this JOIN, but it is not guaranteed to be correct
             -- in the current Operator Metering version
@@ -305,6 +306,7 @@ CREATE TEMPORARY TABLE reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}}
                 AND azure.usage_date = ocp.usage_start
         WHERE azure.usage_date >= {{start_date}}::date
             AND azure.usage_date <= {{end_date}}::date
+            AND ocp.data_source = 'Pod'
             -- azure_where_clause
             {% if bill_ids %}
             AND cost_entry_bill_id IN (
@@ -323,22 +325,13 @@ CREATE TEMPORARY TABLE reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}}
             count(DISTINCT namespace) as shared_projects
         FROM cte_resource_id_matched
         GROUP BY azure_id
-    ),
-    cte_number_of_shared_pods AS (
-        SELECT azure_id,
-            count(DISTINCT pod) as shared_pods
-        FROM cte_resource_id_matched
-        GROUP BY azure_id
     )
     SELECT rm.*,
-        (rm.pod_usage_cpu_core_seconds / rm.node_capacity_cpu_core_seconds) * rm.pretax_cost as pod_cost,
-        sp.shared_projects,
-        spod.shared_pods
+        (rm.pod_usage_cpu_core_hours / rm.node_capacity_cpu_core_hours) * rm.pretax_cost as project_cost,
+        sp.shared_projects
     FROM cte_resource_id_matched AS rm
     JOIN cte_number_of_shared_projects AS sp
         ON rm.azure_id = sp.azure_id
-    JOIN cte_number_of_shared_pods AS spod
-        ON rm.azure_id = spod.azure_id
 )
 ;
 
@@ -346,25 +339,24 @@ CREATE TEMPORARY TABLE reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}}
 -- and the value matches an OpenShift project name
 INSERT INTO reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} (
     WITH cte_tag_matched AS (
-        SELECT ocp.id AS ocp_id,
+        SELECT ocp.uuid AS ocp_id,
             ocp.report_period_id,
             ocp.cluster_id,
             ocp.cluster_alias,
             ocp.namespace,
-            ocp.pod,
             ocp.node,
             ocp.pod_labels,
-            ocp.pod_usage_cpu_core_seconds,
-            ocp.pod_request_cpu_core_seconds,
-            ocp.pod_limit_cpu_core_seconds,
-            ocp.pod_usage_memory_byte_seconds,
-            ocp.pod_request_memory_byte_seconds,
+            ocp.pod_usage_cpu_core_hours,
+            ocp.pod_request_cpu_core_hours,
+            ocp.pod_limit_cpu_core_hours,
+            ocp.pod_usage_memory_gigabyte_hours,
+            ocp.pod_request_memory_gigabyte_hours,
             ocp.node_capacity_cpu_cores,
-            ocp.node_capacity_cpu_core_seconds,
-            ocp.node_capacity_memory_bytes,
-            ocp.node_capacity_memory_byte_seconds,
-            ocp.cluster_capacity_cpu_core_seconds,
-            ocp.cluster_capacity_memory_byte_seconds,
+            ocp.node_capacity_cpu_core_hours,
+            ocp.node_capacity_memory_gigabytes,
+            ocp.node_capacity_memory_gigabyte_hours,
+            ocp.cluster_capacity_cpu_core_hours,
+            ocp.cluster_capacity_memory_gigabyte_hours,
             azure.id AS azure_id,
             azure.cost_entry_bill_id,
             azure.cost_entry_product_id,
@@ -375,7 +367,7 @@ INSERT INTO reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} (
             azure.pretax_cost,
             azure.tags
         FROM reporting_azure_special_case_tags_{{uuid | sqlsafe}} as azure
-        JOIN {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily as ocp
+        JOIN {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary as ocp
             ON azure.key = 'openshift_project' AND azure.value = lower(ocp.namespace)
                 AND azure.usage_date = ocp.usage_start
         -- ANTI JOIN to remove rows that already matched
@@ -383,6 +375,7 @@ INSERT INTO reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} (
             ON rm.azure_id = azure.id
         WHERE azure.usage_date >= {{start_date}}::date
             AND azure.usage_date <= {{end_date}}::date
+            AND ocp.data_source = 'Pod'
             AND rm.azure_id IS NULL
     ),
     cte_number_of_shared_projects AS (
@@ -390,22 +383,13 @@ INSERT INTO reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} (
             count(DISTINCT namespace) as shared_projects
         FROM cte_tag_matched
         GROUP BY azure_id
-    ),
-    cte_number_of_shared_pods AS (
-        SELECT azure_id,
-            count(DISTINCT pod) as shared_pods
-        FROM cte_tag_matched
-        GROUP BY azure_id
     )
     SELECT tm.*,
-        tm.pretax_cost / spod.shared_pods as pod_cost,
-        sp.shared_projects,
-        spod.shared_pods
+        tm.pretax_cost / sp.shared_projects as project_cost,
+        sp.shared_projects
     FROM cte_tag_matched AS tm
     JOIN cte_number_of_shared_projects AS sp
         ON tm.azure_id = sp.azure_id
-    JOIN cte_number_of_shared_pods AS spod
-        ON tm.azure_id = spod.azure_id
 )
 ;
 
@@ -413,25 +397,24 @@ INSERT INTO reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} (
 -- and the value matches an OpenShift node name
 INSERT INTO reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} (
     WITH cte_tag_matched AS (
-        SELECT ocp.id AS ocp_id,
+        SELECT ocp.uuid AS ocp_id,
             ocp.report_period_id,
             ocp.cluster_id,
             ocp.cluster_alias,
             ocp.namespace,
-            ocp.pod,
             ocp.node,
             ocp.pod_labels,
-            ocp.pod_usage_cpu_core_seconds,
-            ocp.pod_request_cpu_core_seconds,
-            ocp.pod_limit_cpu_core_seconds,
-            ocp.pod_usage_memory_byte_seconds,
-            ocp.pod_request_memory_byte_seconds,
+            ocp.pod_usage_cpu_core_hours,
+            ocp.pod_request_cpu_core_hours,
+            ocp.pod_limit_cpu_core_hours,
+            ocp.pod_usage_memory_gigabyte_hours,
+            ocp.pod_request_memory_gigabyte_hours,
             ocp.node_capacity_cpu_cores,
-            ocp.node_capacity_cpu_core_seconds,
-            ocp.node_capacity_memory_bytes,
-            ocp.node_capacity_memory_byte_seconds,
-            ocp.cluster_capacity_cpu_core_seconds,
-            ocp.cluster_capacity_memory_byte_seconds,
+            ocp.node_capacity_cpu_core_hours,
+            ocp.node_capacity_memory_gigabytes,
+            ocp.node_capacity_memory_gigabyte_hours,
+            ocp.cluster_capacity_cpu_core_hours,
+            ocp.cluster_capacity_memory_gigabyte_hours,
             azure.id AS azure_id,
             azure.cost_entry_bill_id,
             azure.cost_entry_product_id,
@@ -442,7 +425,7 @@ INSERT INTO reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} (
             azure.pretax_cost,
             azure.tags
         FROM reporting_azure_special_case_tags_{{uuid | sqlsafe}} as azure
-        JOIN {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily as ocp
+        JOIN {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary as ocp
             ON azure.key = 'openshift_node' AND azure.value = lower(ocp.node)
                 AND azure.usage_date = ocp.usage_start
         -- ANTI JOIN to remove rows that already matched
@@ -450,6 +433,7 @@ INSERT INTO reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} (
             ON rm.azure_id = azure.id
         WHERE azure.usage_date >= {{start_date}}::date
             AND azure.usage_date <= {{end_date}}::date
+            AND ocp.data_source = 'Pod'
             AND rm.azure_id IS NULL
     ),
     cte_number_of_shared_projects AS (
@@ -457,22 +441,13 @@ INSERT INTO reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} (
             count(DISTINCT namespace) as shared_projects
         FROM cte_tag_matched
         GROUP BY azure_id
-    ),
-    cte_number_of_shared_pods AS (
-        SELECT azure_id,
-            count(DISTINCT pod) as shared_pods
-        FROM cte_tag_matched
-        GROUP BY azure_id
     )
     SELECT tm.*,
-        tm.pretax_cost / spod.shared_pods as pod_cost,
-        sp.shared_projects,
-        spod.shared_pods
+        tm.pretax_cost / sp.shared_projects as project_cost,
+        sp.shared_projects
     FROM cte_tag_matched AS tm
     JOIN cte_number_of_shared_projects AS sp
         ON tm.azure_id = sp.azure_id
-    JOIN cte_number_of_shared_pods AS spod
-        ON tm.azure_id = spod.azure_id
 )
 ;
 
@@ -480,25 +455,24 @@ INSERT INTO reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} (
 -- and the value matches an OpenShift cluster name
 INSERT INTO reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} (
     WITH cte_tag_matched AS (
-        SELECT ocp.id AS ocp_id,
+        SELECT ocp.uuid AS ocp_id,
             ocp.report_period_id,
             ocp.cluster_id,
             ocp.cluster_alias,
             ocp.namespace,
-            ocp.pod,
             ocp.node,
             ocp.pod_labels,
-            ocp.pod_usage_cpu_core_seconds,
-            ocp.pod_request_cpu_core_seconds,
-            ocp.pod_limit_cpu_core_seconds,
-            ocp.pod_usage_memory_byte_seconds,
-            ocp.pod_request_memory_byte_seconds,
+            ocp.pod_usage_cpu_core_hours,
+            ocp.pod_request_cpu_core_hours,
+            ocp.pod_limit_cpu_core_hours,
+            ocp.pod_usage_memory_gigabyte_hours,
+            ocp.pod_request_memory_gigabyte_hours,
             ocp.node_capacity_cpu_cores,
-            ocp.node_capacity_cpu_core_seconds,
-            ocp.node_capacity_memory_bytes,
-            ocp.node_capacity_memory_byte_seconds,
-            ocp.cluster_capacity_cpu_core_seconds,
-            ocp.cluster_capacity_memory_byte_seconds,
+            ocp.node_capacity_cpu_core_hours,
+            ocp.node_capacity_memory_gigabytes,
+            ocp.node_capacity_memory_gigabyte_hours,
+            ocp.cluster_capacity_cpu_core_hours,
+            ocp.cluster_capacity_memory_gigabyte_hours,
             azure.id AS azure_id,
             azure.cost_entry_bill_id,
             azure.cost_entry_product_id,
@@ -509,7 +483,7 @@ INSERT INTO reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} (
             azure.pretax_cost,
             azure.tags
         FROM reporting_azure_special_case_tags_{{uuid | sqlsafe}} as azure
-        JOIN {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily as ocp
+        JOIN {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary as ocp
             ON (azure.key = 'openshift_cluster' AND azure.value = lower(ocp.cluster_id)
                 OR azure.key = 'openshift_cluster' AND azure.value = lower(ocp.cluster_alias))
                 AND azure.usage_date = ocp.usage_start
@@ -518,6 +492,7 @@ INSERT INTO reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} (
             ON rm.azure_id = azure.id
         WHERE azure.usage_date >= {{start_date}}::date
             AND azure.usage_date <= {{end_date}}::date
+            AND ocp.data_source = 'Pod'
             AND rm.azure_id IS NULL
     ),
     cte_number_of_shared_projects AS (
@@ -525,22 +500,13 @@ INSERT INTO reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} (
             count(DISTINCT namespace) as shared_projects
         FROM cte_tag_matched
         GROUP BY azure_id
-    ),
-    cte_number_of_shared_pods AS (
-        SELECT azure_id,
-            count(DISTINCT pod) as shared_pods
-        FROM cte_tag_matched
-        GROUP BY azure_id
     )
     SELECT tm.*,
-        tm.pretax_cost / spod.shared_pods as pod_cost,
-        sp.shared_projects,
-        spod.shared_pods
+        tm.pretax_cost / sp.shared_projects as project_cost,
+        sp.shared_projects
     FROM cte_tag_matched AS tm
     JOIN cte_number_of_shared_projects AS sp
         ON tm.azure_id = sp.azure_id
-    JOIN cte_number_of_shared_pods AS spod
-        ON tm.azure_id = spod.azure_id
 )
 ;
 
@@ -548,25 +514,24 @@ INSERT INTO reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} (
 -- and Azure tag key and value match directly
 INSERT INTO reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} (
     WITH cte_tag_matched AS (
-        SELECT ocp.id AS ocp_id,
+        SELECT ocp.uuid AS ocp_id,
             ocp.report_period_id,
             ocp.cluster_id,
             ocp.cluster_alias,
             ocp.namespace,
-            ocp.pod,
             ocp.node,
             ocp.pod_labels,
-            ocp.pod_usage_cpu_core_seconds,
-            ocp.pod_request_cpu_core_seconds,
-            ocp.pod_limit_cpu_core_seconds,
-            ocp.pod_usage_memory_byte_seconds,
-            ocp.pod_request_memory_byte_seconds,
+            ocp.pod_usage_cpu_core_hours,
+            ocp.pod_request_cpu_core_hours,
+            ocp.pod_limit_cpu_core_hours,
+            ocp.pod_usage_memory_gigabyte_hours,
+            ocp.pod_request_memory_gigabyte_hours,
             ocp.node_capacity_cpu_cores,
-            ocp.node_capacity_cpu_core_seconds,
-            ocp.node_capacity_memory_bytes,
-            ocp.node_capacity_memory_byte_seconds,
-            ocp.cluster_capacity_cpu_core_seconds,
-            ocp.cluster_capacity_memory_byte_seconds,
+            ocp.node_capacity_cpu_core_hours,
+            ocp.node_capacity_memory_gigabytes,
+            ocp.node_capacity_memory_gigabyte_hours,
+            ocp.cluster_capacity_cpu_core_hours,
+            ocp.cluster_capacity_memory_gigabyte_hours,
             azure.id AS azure_id,
             azure.cost_entry_bill_id,
             azure.cost_entry_product_id,
@@ -592,22 +557,13 @@ INSERT INTO reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} (
             count(DISTINCT namespace) as shared_projects
         FROM cte_tag_matched
         GROUP BY azure_id
-    ),
-    cte_number_of_shared_pods AS (
-        SELECT azure_id,
-            count(DISTINCT pod) as shared_pods
-        FROM cte_tag_matched
-        GROUP BY azure_id
     )
     SELECT tm.*,
-        tm.pretax_cost / spod.shared_pods as pod_cost,
-        sp.shared_projects,
-        spod.shared_pods
+        tm.pretax_cost / sp.shared_projects as project_cost,
+        sp.shared_projects
     FROM cte_tag_matched AS tm
     JOIN cte_number_of_shared_projects AS sp
         ON tm.azure_id = sp.azure_id
-    JOIN cte_number_of_shared_pods AS spod
-        ON tm.azure_id = spod.azure_id
 )
 ;
 
@@ -620,22 +576,20 @@ DROP TABLE reporting_ocp_pod_tags_{{uuid | sqlsafe}};
 -- resource id match. OCP PVC name -> Azure instance ID.
 CREATE TEMPORARY TABLE reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} AS (
     WITH cte_resource_id_matched AS (
-        SELECT ocp.id AS ocp_id,
+        SELECT ocp.uuid AS ocp_id,
             ocp.report_period_id,
             ocp.cluster_id,
             ocp.cluster_alias,
             ocp.namespace,
-            ocp.pod,
             ocp.node,
             ocp.persistentvolumeclaim,
             ocp.persistentvolume,
             ocp.storageclass,
-            ocp.persistentvolumeclaim_capacity_bytes,
-            ocp.persistentvolumeclaim_capacity_byte_seconds,
-            ocp.volume_request_storage_byte_seconds,
-            ocp.persistentvolumeclaim_usage_byte_seconds,
-            ocp.persistentvolume_labels,
-            ocp.persistentvolumeclaim_labels,
+            ocp.persistentvolumeclaim_capacity_gigabyte,
+            ocp.persistentvolumeclaim_capacity_gigabyte_months,
+            ocp.volume_request_storage_gigabyte_months,
+            ocp.persistentvolumeclaim_usage_gigabyte_months,
+            ocp.volume_labels,
             azure.id AS azure_id,
             azure.cost_entry_bill_id,
             azure.cost_entry_product_id,
@@ -648,12 +602,13 @@ CREATE TEMPORARY TABLE reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}
         FROM reporting_azure_with_enabled_tags_{{uuid | sqlsafe}} as azure
         JOIN {{schema | sqlsafe}}.reporting_azurecostentryproductservice as aps
             ON azure.cost_entry_product_id = aps.id
-        JOIN {{schema | sqlsafe}}.reporting_ocpstoragelineitem_daily as ocp
+        JOIN {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary as ocp
             -- Need the doubl percent here for Jinja templating
             ON split_part(aps.instance_id, '/', 9) LIKE '%%' || ocp.persistentvolume
                 AND azure.usage_date = ocp.usage_start
         WHERE azure.usage_date >= {{start_date}}::date
             AND azure.usage_date <= {{end_date}}::date
+            AND ocp.data_source = 'Storage'
             -- azure_where_clause
             {% if bill_ids %}
             AND cost_entry_bill_id IN (
@@ -672,22 +627,13 @@ CREATE TEMPORARY TABLE reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}
             count(DISTINCT namespace) as shared_projects
         FROM cte_resource_id_matched
         GROUP BY azure_id
-    ),
-    cte_number_of_shared_pods AS (
-        SELECT azure_id,
-            count(DISTINCT pod) as shared_pods
-        FROM cte_resource_id_matched
-        GROUP BY azure_id
     )
     SELECT rm.*,
-        (rm.persistentvolumeclaim_usage_byte_seconds / rm.persistentvolumeclaim_capacity_byte_seconds) * rm.pretax_cost as pod_cost,
-        sp.shared_projects,
-        spod.shared_pods
+        (rm.persistentvolumeclaim_usage_gigabyte_months / rm.persistentvolumeclaim_capacity_gigabyte_months) * rm.pretax_cost as project_cost,
+        sp.shared_projects
     FROM cte_resource_id_matched AS rm
     JOIN cte_number_of_shared_projects AS sp
         ON rm.azure_id = sp.azure_id
-    JOIN cte_number_of_shared_pods AS spod
-        ON rm.azure_id = spod.azure_id
 )
 ;
 
@@ -695,22 +641,20 @@ CREATE TEMPORARY TABLE reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}
 -- and the value matches an OpenShift project name
 INSERT INTO reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} (
     WITH cte_tag_matched AS (
-        SELECT ocp.id AS ocp_id,
+        SELECT ocp.uuid AS ocp_id,
             ocp.report_period_id,
             ocp.cluster_id,
             ocp.cluster_alias,
             ocp.namespace,
-            ocp.pod,
             ocp.node,
             ocp.persistentvolumeclaim,
             ocp.persistentvolume,
             ocp.storageclass,
-            ocp.persistentvolumeclaim_capacity_bytes,
-            ocp.persistentvolumeclaim_capacity_byte_seconds,
-            ocp.volume_request_storage_byte_seconds,
-            ocp.persistentvolumeclaim_usage_byte_seconds,
-            ocp.persistentvolume_labels,
-            ocp.persistentvolumeclaim_labels,
+            ocp.persistentvolumeclaim_capacity_gigabyte,
+            ocp.persistentvolumeclaim_capacity_gigabyte_months,
+            ocp.volume_request_storage_gigabyte_months,
+            ocp.persistentvolumeclaim_usage_gigabyte_months,
+            ocp.volume_labels,
             azure.id AS azure_id,
             azure.cost_entry_bill_id,
             azure.cost_entry_product_id,
@@ -721,7 +665,7 @@ INSERT INTO reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} (
             azure.pretax_cost,
             azure.tags
         FROM reporting_azure_special_case_tags_{{uuid | sqlsafe}} as azure
-        JOIN {{schema | sqlsafe}}.reporting_ocpstoragelineitem_daily as ocp
+        JOIN {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary as ocp
             ON azure.key = 'openshift_project' AND azure.value = lower(ocp.namespace)
                 AND azure.usage_date = ocp.usage_start
         -- ANTI JOIN to remove rows that already matched
@@ -729,6 +673,7 @@ INSERT INTO reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} (
             ON rm.azure_id = azure.id
         WHERE azure.usage_date >= {{start_date}}::date
             AND azure.usage_date <= {{end_date}}::date
+            AND ocp.data_source = 'Storage'
             AND rm.azure_id IS NULL
     ),
     cte_number_of_shared_projects AS (
@@ -736,22 +681,13 @@ INSERT INTO reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} (
             count(DISTINCT namespace) as shared_projects
         FROM cte_tag_matched
         GROUP BY azure_id
-    ),
-    cte_number_of_shared_pods AS (
-        SELECT azure_id,
-            count(DISTINCT pod) as shared_pods
-        FROM cte_tag_matched
-        GROUP BY azure_id
     )
     SELECT tm.*,
-        tm.pretax_cost / spod.shared_pods as pod_cost,
-        sp.shared_projects,
-        spod.shared_pods
+        tm.pretax_cost / sp.shared_projects as project_cost,
+        sp.shared_projects
     FROM cte_tag_matched AS tm
     JOIN cte_number_of_shared_projects AS sp
         ON tm.azure_id = sp.azure_id
-    JOIN cte_number_of_shared_pods AS spod
-        ON tm.azure_id = spod.azure_id
 )
 ;
 
@@ -759,22 +695,20 @@ INSERT INTO reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} (
 -- and the value matches an OpenShift node name
  INSERT INTO reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} (
     WITH cte_tag_matched AS (
-        SELECT ocp.id AS ocp_id,
+        SELECT ocp.uuid AS ocp_id,
             ocp.report_period_id,
             ocp.cluster_id,
             ocp.cluster_alias,
             ocp.namespace,
-            ocp.pod,
             ocp.node,
             ocp.persistentvolumeclaim,
             ocp.persistentvolume,
             ocp.storageclass,
-            ocp.persistentvolumeclaim_capacity_bytes,
-            ocp.persistentvolumeclaim_capacity_byte_seconds,
-            ocp.volume_request_storage_byte_seconds,
-            ocp.persistentvolumeclaim_usage_byte_seconds,
-            ocp.persistentvolume_labels,
-            ocp.persistentvolumeclaim_labels,
+            ocp.persistentvolumeclaim_capacity_gigabyte,
+            ocp.persistentvolumeclaim_capacity_gigabyte_months,
+            ocp.volume_request_storage_gigabyte_months,
+            ocp.persistentvolumeclaim_usage_gigabyte_months,
+            ocp.volume_labels,
             azure.id AS azure_id,
             azure.cost_entry_bill_id,
             azure.cost_entry_product_id,
@@ -785,7 +719,7 @@ INSERT INTO reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} (
             azure.pretax_cost,
             azure.tags
         FROM reporting_azure_special_case_tags_{{uuid | sqlsafe}} as azure
-        JOIN {{schema | sqlsafe}}.reporting_ocpstoragelineitem_daily as ocp
+        JOIN {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary as ocp
             ON azure.key = 'openshift_node' AND azure.value = lower(ocp.node)
                 AND azure.usage_date = ocp.usage_start
         -- ANTI JOIN to remove rows that already matched
@@ -793,6 +727,7 @@ INSERT INTO reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} (
             ON rm.azure_id = azure.id
         WHERE azure.usage_date >= {{start_date}}::date
             AND azure.usage_date <= {{end_date}}::date
+            AND ocp.data_source = 'Storage'
             AND rm.azure_id IS NULL
     ),
     cte_number_of_shared_projects AS (
@@ -800,22 +735,13 @@ INSERT INTO reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} (
             count(DISTINCT namespace) as shared_projects
         FROM cte_tag_matched
         GROUP BY azure_id
-    ),
-    cte_number_of_shared_pods AS (
-        SELECT azure_id,
-            count(DISTINCT pod) as shared_pods
-        FROM cte_tag_matched
-        GROUP BY azure_id
     )
     SELECT tm.*,
-        tm.pretax_cost / spod.shared_pods as pod_cost,
-        sp.shared_projects,
-        spod.shared_pods
+        tm.pretax_cost / sp.shared_projects as project_cost,
+        sp.shared_projects
     FROM cte_tag_matched AS tm
     JOIN cte_number_of_shared_projects AS sp
         ON tm.azure_id = sp.azure_id
-    JOIN cte_number_of_shared_pods AS spod
-        ON tm.azure_id = spod.azure_id
  )
  ;
 
@@ -823,22 +749,20 @@ INSERT INTO reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} (
 -- and the value matches an OpenShift cluster name
 INSERT INTO reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} (
     WITH cte_tag_matched AS (
-        SELECT ocp.id AS ocp_id,
+        SELECT ocp.uuid AS ocp_id,
             ocp.report_period_id,
             ocp.cluster_id,
             ocp.cluster_alias,
             ocp.namespace,
-            ocp.pod,
             ocp.node,
             ocp.persistentvolumeclaim,
             ocp.persistentvolume,
             ocp.storageclass,
-            ocp.persistentvolumeclaim_capacity_bytes,
-            ocp.persistentvolumeclaim_capacity_byte_seconds,
-            ocp.volume_request_storage_byte_seconds,
-            ocp.persistentvolumeclaim_usage_byte_seconds,
-            ocp.persistentvolume_labels,
-            ocp.persistentvolumeclaim_labels,
+            ocp.persistentvolumeclaim_capacity_gigabyte,
+            ocp.persistentvolumeclaim_capacity_gigabyte_months,
+            ocp.volume_request_storage_gigabyte_months,
+            ocp.persistentvolumeclaim_usage_gigabyte_months,
+            ocp.volume_labels,
             azure.id AS azure_id,
             azure.cost_entry_bill_id,
             azure.cost_entry_product_id,
@@ -849,7 +773,7 @@ INSERT INTO reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} (
             azure.pretax_cost,
             azure.tags
         FROM reporting_azure_special_case_tags_{{uuid | sqlsafe}} as azure
-        JOIN {{schema | sqlsafe}}.reporting_ocpstoragelineitem_daily as ocp
+        JOIN {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary as ocp
             ON (azure.key = 'openshift_cluster' AND azure.value = lower(ocp.cluster_id)
                 OR azure.key = 'openshift_cluster' AND azure.value = lower(ocp.cluster_alias))
                 AND azure.usage_date = ocp.usage_start
@@ -858,6 +782,7 @@ INSERT INTO reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} (
             ON rm.azure_id = azure.id
         WHERE azure.usage_date >= {{start_date}}::date
             AND azure.usage_date <= {{end_date}}::date
+            AND ocp.data_source = 'Storage'
             AND rm.azure_id IS NULL
     ),
     cte_number_of_shared_projects AS (
@@ -865,44 +790,33 @@ INSERT INTO reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} (
             count(DISTINCT namespace) as shared_projects
         FROM cte_tag_matched
         GROUP BY azure_id
-    ),
-    cte_number_of_shared_pods AS (
-        SELECT azure_id,
-            count(DISTINCT pod) as shared_pods
-        FROM cte_tag_matched
-        GROUP BY azure_id
     )
     SELECT tm.*,
-        tm.pretax_cost / spod.shared_pods as pod_cost,
-        sp.shared_projects,
-        spod.shared_pods
+        tm.pretax_cost / sp.shared_projects as project_cost,
+        sp.shared_projects
     FROM cte_tag_matched AS tm
     JOIN cte_number_of_shared_projects AS sp
         ON tm.azure_id = sp.azure_id
-    JOIN cte_number_of_shared_pods AS spod
-        ON tm.azure_id = spod.azure_id
 )
 ;
 
 -- Next we match where the azure tag is kubernetes.io-created-for-pv-name
  INSERT INTO reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} (
     WITH cte_tag_matched AS (
-        SELECT ocp.id AS ocp_id,
+        SELECT ocp.uuid AS ocp_id,
             ocp.report_period_id,
             ocp.cluster_id,
             ocp.cluster_alias,
             ocp.namespace,
-            ocp.pod,
             ocp.node,
             ocp.persistentvolumeclaim,
             ocp.persistentvolume,
             ocp.storageclass,
-            ocp.persistentvolumeclaim_capacity_bytes,
-            ocp.persistentvolumeclaim_capacity_byte_seconds,
-            ocp.volume_request_storage_byte_seconds,
-            ocp.persistentvolumeclaim_usage_byte_seconds,
-            ocp.persistentvolume_labels,
-            ocp.persistentvolumeclaim_labels,
+            ocp.persistentvolumeclaim_capacity_gigabyte,
+            ocp.persistentvolumeclaim_capacity_gigabyte_months,
+            ocp.volume_request_storage_gigabyte_months,
+            ocp.persistentvolumeclaim_usage_gigabyte_months,
+            ocp.volume_labels,
             azure.id AS azure_id,
             azure.cost_entry_bill_id,
             azure.cost_entry_product_id,
@@ -913,7 +827,7 @@ INSERT INTO reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} (
             azure.pretax_cost,
             azure.tags
         FROM reporting_azure_special_case_tags_{{uuid | sqlsafe}} as azure
-        JOIN {{schema | sqlsafe}}.reporting_ocpstoragelineitem_daily as ocp
+        JOIN {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary as ocp
             ON azure.key = 'kubernetes.io-created-for-pv-name'
                 AND azure.value = lower(ocp.persistentvolume)
         -- ANTI JOIN to remove rows that already matched
@@ -921,6 +835,7 @@ INSERT INTO reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} (
             ON rm.azure_id = azure.id
         WHERE azure.usage_date >= {{start_date}}::date
             AND azure.usage_date <= {{end_date}}::date
+            AND ocp.data_source = 'Storage'
             AND rm.azure_id IS NULL
     ),
     cte_number_of_shared_projects AS (
@@ -928,22 +843,13 @@ INSERT INTO reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} (
             count(DISTINCT namespace) as shared_projects
         FROM cte_tag_matched
         GROUP BY azure_id
-    ),
-    cte_number_of_shared_pods AS (
-        SELECT azure_id,
-            count(DISTINCT pod) as shared_pods
-        FROM cte_tag_matched
-        GROUP BY azure_id
     )
     SELECT tm.*,
-        tm.pretax_cost / spod.shared_pods as pod_cost,
-        sp.shared_projects,
-        spod.shared_pods
+        tm.pretax_cost / sp.shared_projects as project_cost,
+        sp.shared_projects
     FROM cte_tag_matched AS tm
     JOIN cte_number_of_shared_projects AS sp
         ON tm.azure_id = sp.azure_id
-    JOIN cte_number_of_shared_pods AS spod
-        ON tm.azure_id = spod.azure_id
  )
  ;
 
@@ -956,22 +862,20 @@ DROP TABLE reporting_azure_special_case_tags_{{uuid | sqlsafe}};
 -- and azure tag key and value match directly
 INSERT INTO reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} (
     WITH cte_tag_matched AS (
-        SELECT ocp.id AS ocp_id,
+        SELECT ocp.uuid AS ocp_id,
             ocp.report_period_id,
             ocp.cluster_id,
             ocp.cluster_alias,
             ocp.namespace,
-            ocp.pod,
             ocp.node,
             ocp.persistentvolumeclaim,
             ocp.persistentvolume,
             ocp.storageclass,
-            ocp.persistentvolumeclaim_capacity_bytes,
-            ocp.persistentvolumeclaim_capacity_byte_seconds,
-            ocp.volume_request_storage_byte_seconds,
-            ocp.persistentvolumeclaim_usage_byte_seconds,
-            ocp.persistentvolume_labels,
-            ocp.persistentvolumeclaim_labels,
+            ocp.persistentvolumeclaim_capacity_gigabyte,
+            ocp.persistentvolumeclaim_capacity_gigabyte_months,
+            ocp.volume_request_storage_gigabyte_months,
+            ocp.persistentvolumeclaim_usage_gigabyte_months,
+            ocp.volume_labels,
             azure.id AS azure_id,
             azure.cost_entry_bill_id,
             azure.cost_entry_product_id,
@@ -997,22 +901,13 @@ INSERT INTO reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} (
             count(DISTINCT namespace) as shared_projects
         FROM cte_tag_matched
         GROUP BY azure_id
-    ),
-    cte_number_of_shared_pods AS (
-        SELECT azure_id,
-            count(DISTINCT pod) as shared_pods
-        FROM cte_tag_matched
-        GROUP BY azure_id
     )
     SELECT tm.*,
-        tm.pretax_cost / spod.shared_pods as pod_cost,
-        sp.shared_projects,
-        spod.shared_pods
+        tm.pretax_cost / sp.shared_projects as project_cost,
+        sp.shared_projects
     FROM cte_tag_matched AS tm
     JOIN cte_number_of_shared_projects AS sp
         ON tm.azure_id = sp.azure_id
-    JOIN cte_number_of_shared_pods AS spod
-        ON tm.azure_id = spod.azure_id
 )
 ;
 
@@ -1070,11 +965,11 @@ CREATE TEMPORARY TABLE reporting_ocpazurecostlineitem_daily_summary_{{uuid | sql
     ),
     cte_pod_project_cost AS (
         SELECT pc.azure_id,
-            jsonb_object_agg(pc.namespace, pc.pod_cost) as project_costs
+            jsonb_object_agg(pc.namespace, pc.project_cost) as project_costs
         FROM (
             SELECT li.azure_id,
                 li.namespace,
-                sum(pod_cost) as pod_cost
+                sum(project_cost) as project_cost
             FROM reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} as li
             GROUP BY li.azure_id, li.namespace
         ) AS pc
@@ -1082,11 +977,11 @@ CREATE TEMPORARY TABLE reporting_ocpazurecostlineitem_daily_summary_{{uuid | sql
     ),
     cte_storage_project_cost AS (
         SELECT pc.azure_id,
-            jsonb_object_agg(pc.namespace, pc.pod_cost) as project_costs
+            jsonb_object_agg(pc.namespace, pc.project_cost) as project_costs
         FROM (
             SELECT li.azure_id,
                 li.namespace,
-                sum(pod_cost) as pod_cost
+                sum(project_cost) as project_cost
             FROM reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} as li
             GROUP BY li.azure_id, li.namespace
         ) AS pc
@@ -1096,7 +991,6 @@ CREATE TEMPORARY TABLE reporting_ocpazurecostlineitem_daily_summary_{{uuid | sql
         max(li.cluster_id) as cluster_id,
         max(li.cluster_alias) as cluster_alias,
         array_agg(DISTINCT li.namespace) as namespace,
-        array_agg(DISTINCT li.pod) as pod,
         max(li.node) as node,
         max(li.usage_date) as usage_start,
         max(li.usage_date) as usage_end,
@@ -1137,7 +1031,6 @@ CREATE TEMPORARY TABLE reporting_ocpazurecostlineitem_daily_summary_{{uuid | sql
         max(li.cluster_id) as cluster_id,
         max(li.cluster_alias) as cluster_alias,
         array_agg(DISTINCT li.namespace) as namespace,
-        array_agg(DISTINCT li.pod) as pod,
         max(li.node) as node,
         max(li.usage_date) as usage_start,
         max(li.usage_date) as usage_end,
@@ -1228,7 +1121,6 @@ CREATE TEMPORARY TABLE reporting_ocpazurecostlineitem_project_daily_summary_{{uu
         li.cluster_alias,
         'Pod' as data_source,
         li.namespace,
-        li.pod,
         li.node,
         li.pod_labels,
         max(li.usage_date) as usage_start,
@@ -1241,12 +1133,12 @@ CREATE TEMPORARY TABLE reporting_ocpazurecostlineitem_project_daily_summary_{{uu
         max(split_part(p.instance_id, '/', 9)) as resource_id,
         max(m.currency) as currency,
         max(suu.unit_of_measure) as unit_of_measure,
-        max((li.usage_quantity * suu.multiplier) / li.shared_pods) as usage_quantity,
-        sum(li.pretax_cost / li.shared_pods) as pretax_cost,
-        sum(li.pretax_cost / li.shared_pods) * {{markup}}::numeric as markup_cost,
-        max(li.shared_pods) as shared_pods,
-        li.pod_cost,
-        li.pod_cost * {{markup}}::numeric as project_markup_cost,
+        max((li.usage_quantity * suu.multiplier) / li.shared_projects) as usage_quantity,
+        sum(li.pretax_cost / li.shared_projects) as pretax_cost,
+        sum(li.pretax_cost / li.shared_projects) * {{markup}}::numeric as markup_cost,
+        max(li.shared_projects) as shared_projects,
+        li.project_cost,
+        li.project_cost * {{markup}}::numeric as project_markup_cost,
         ab.provider_id as source_uuid
     FROM reporting_ocpazureusagelineitem_daily_{{uuid | sqlsafe}} as li
     JOIN {{schema | sqlsafe}}.reporting_azurecostentryproductservice AS p
@@ -1265,10 +1157,9 @@ CREATE TEMPORARY TABLE reporting_ocpazurecostlineitem_project_daily_summary_{{uu
         li.cluster_id,
         li.cluster_alias,
         li.namespace,
-        li.pod,
         li.node,
         li.pod_labels,
-        li.pod_cost,
+        li.project_cost,
         ab.provider_id
 
     UNION
@@ -1278,9 +1169,8 @@ CREATE TEMPORARY TABLE reporting_ocpazurecostlineitem_project_daily_summary_{{uu
         li.cluster_alias,
         'Storage' as data_source,
         li.namespace,
-        li.pod,
         li.node,
-        li.persistentvolume_labels || li.persistentvolumeclaim_labels as pod_labels,
+        li.volume_labels as pod_labels,
         max(li.usage_date) as usage_start,
         max(li.usage_date) as usage_end,
         max(li.cost_entry_bill_id) as cost_entry_bill_id,
@@ -1291,12 +1181,12 @@ CREATE TEMPORARY TABLE reporting_ocpazurecostlineitem_project_daily_summary_{{uu
         max(split_part(p.instance_id, '/', 9)) as resource_id,
         max(m.currency) as currency,
         max(sus.unit_of_measure) as unit_of_measure,
-        max((li.usage_quantity * sus.multiplier) / li.shared_pods) as usage_quantity,
-        sum(li.pretax_cost / li.shared_pods) as pretax_cost,
-        sum(li.pretax_cost / li.shared_pods) * {{markup}}::numeric as markup_cost,
-        max(li.shared_pods) as shared_pods,
-        li.pod_cost,
-        li.pod_cost * {{markup}}::numeric as project_markup_cost,
+        max((li.usage_quantity * sus.multiplier) / li.shared_projects) as usage_quantity,
+        sum(li.pretax_cost / li.shared_projects) as pretax_cost,
+        sum(li.pretax_cost / li.shared_projects) * {{markup}}::numeric as markup_cost,
+        max(li.shared_projects) as shared_projects,
+        li.project_cost,
+        li.project_cost * {{markup}}::numeric as project_markup_cost,
         ab.provider_id as source_uuid
     FROM reporting_ocpazurestoragelineitem_daily_{{uuid | sqlsafe}} AS li
     JOIN {{schema | sqlsafe}}.reporting_azurecostentryproductservice AS p
@@ -1317,11 +1207,9 @@ CREATE TEMPORARY TABLE reporting_ocpazurecostlineitem_project_daily_summary_{{uu
         li.cluster_id,
         li.cluster_alias,
         li.namespace,
-        li.pod,
         li.node,
-        li.persistentvolume_labels,
-        li.persistentvolumeclaim_labels,
-        li.pod_cost,
+        li.volume_labels,
+        li.project_cost,
         ab.provider_id
 )
 ;
@@ -1359,7 +1247,6 @@ INSERT INTO {{schema | sqlsafe}}.reporting_ocpazurecostlineitem_daily_summary (
     cluster_id,
     cluster_alias,
     namespace,
-    pod,
     node,
     resource_id,
     usage_start,
@@ -1384,7 +1271,6 @@ INSERT INTO {{schema | sqlsafe}}.reporting_ocpazurecostlineitem_daily_summary (
         cluster_id,
         cluster_alias,
         namespace,
-        pod,
         node,
         resource_id,
         usage_start,
@@ -1435,7 +1321,6 @@ INSERT INTO {{schema | sqlsafe}}.reporting_ocpazurecostlineitem_project_daily_su
     cluster_alias,
     data_source,
     namespace,
-    pod,
     node,
     pod_labels,
     resource_id,
@@ -1461,7 +1346,6 @@ INSERT INTO {{schema | sqlsafe}}.reporting_ocpazurecostlineitem_project_daily_su
         cluster_alias,
         data_source,
         namespace,
-        pod,
         node,
         pod_labels,
         resource_id,
@@ -1477,7 +1361,7 @@ INSERT INTO {{schema | sqlsafe}}.reporting_ocpazurecostlineitem_project_daily_su
         markup_cost,
         currency,
         unit_of_measure,
-        pod_cost,
+        project_cost,
         project_markup_cost,
         source_uuid
     FROM reporting_ocpazurecostlineitem_project_daily_summary_{{uuid | sqlsafe}}
diff --git a/koku/masu/database/sql/reporting_ocpstoragelineitem_daily_summary.sql b/koku/masu/database/sql/reporting_ocpstoragelineitem_daily_summary.sql
index 26424c0900..fb171b0f61 100644
--- a/koku/masu/database/sql/reporting_ocpstoragelineitem_daily_summary.sql
+++ b/koku/masu/database/sql/reporting_ocpstoragelineitem_daily_summary.sql
@@ -1,32 +1,4 @@
 CREATE TEMPORARY TABLE reporting_ocpstoragelineitem_daily_summary_{{uuid | sqlsafe}} AS (
-    WITH cte_array_agg_keys AS (
-        SELECT array_agg(key) as key_array
-        FROM reporting_ocpenabledtagkeys
-    ),
-    cte_filtered_volume_labels AS (
-        SELECT id,
-            jsonb_object_agg(key,value) as volume_labels
-        FROM (
-            SELECT lid.id,
-                -- persistentvolumeclaim_labels values will win in
-                -- the volume label merge
-                lid.persistentvolume_labels || lid.persistentvolumeclaim_labels as volume_labels,
-                aak.key_array
-            FROM reporting_ocpstoragelineitem_daily lid
-            JOIN cte_array_agg_keys aak
-                ON 1=1
-            WHERE lid.usage_start >= {{start_date}}
-                AND lid.usage_start <= {{end_date}}
-                AND lid.cluster_id = {{cluster_id}}
-                AND (
-                    lid.persistentvolume_labels ?| aak.key_array
-                    OR lid.persistentvolumeclaim_labels ?| aak.key_array
-                )
-        ) AS lid,
-        jsonb_each_text(lid.volume_labels) AS labels
-        WHERE key = ANY (key_array)
-        GROUP BY id
-    )
     SELECT uuid_generate_v4() as uuid,
         li.report_period_id,
         li.cluster_id,
@@ -38,7 +10,7 @@ CREATE TEMPORARY TABLE reporting_ocpstoragelineitem_daily_summary_{{uuid | sqlsa
         li.persistentvolumeclaim,
         li.persistentvolume,
         li.storageclass,
-        coalesce(fvl.volume_labels, '{}'::jsonb) as volume_labels,
+        li.persistentvolume_labels || li.persistentvolumeclaim_labels as volume_labels,
         max(li.persistentvolumeclaim_capacity_bytes) * POWER(2, -30) as persistentvolumeclaim_capacity_gigabyte,
         sum(li.persistentvolumeclaim_capacity_byte_seconds) /
             86400 *
@@ -54,8 +26,6 @@ CREATE TEMPORARY TABLE reporting_ocpstoragelineitem_daily_summary_{{uuid | sqlsa
             * POWER(2, -30) as persistentvolumeclaim_usage_gigabyte_months,
         ab.provider_id as source_uuid
     FROM {{schema | sqlsafe}}.reporting_ocpstoragelineitem_daily AS li
-    LEFT JOIN cte_filtered_volume_labels AS fvl
-        ON li.id = fvl.id
     LEFT JOIN {{schema | sqlsafe}}.reporting_ocpusagereportperiod as ab
         ON li.cluster_id = ab.cluster_id
     WHERE usage_start >= {{start_date}}
@@ -68,7 +38,7 @@ CREATE TEMPORARY TABLE reporting_ocpstoragelineitem_daily_summary_{{uuid | sqlsa
         li.usage_end,
         li.namespace,
         li.node,
-        fvl.volume_labels,
+        li.persistentvolume_labels || li.persistentvolumeclaim_labels,
         li.persistentvolume,
         li.persistentvolumeclaim,
         li.storageclass,
diff --git a/koku/masu/database/sql/reporting_ocpstoragevolumelabel_summary.sql b/koku/masu/database/sql/reporting_ocpstoragevolumelabel_summary.sql
index b788b44750..8cfd30fd9a 100644
--- a/koku/masu/database/sql/reporting_ocpstoragevolumelabel_summary.sql
+++ b/koku/masu/database/sql/reporting_ocpstoragevolumelabel_summary.sql
@@ -4,10 +4,11 @@ WITH cte_tag_value(key, value, report_period_id, namespace) AS (
         li.report_period_id,
         li.namespace,
         li.node
-    FROM {{schema | sqlsafe}}.reporting_ocpstoragelineitem_daily AS li,
-        jsonb_each_text(li.persistentvolume_labels || li.persistentvolumeclaim_labels) labels
+    FROM {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary AS li,
+        jsonb_each_text(li.volume_labels) labels
+    WHERE li.data_source = 'Storage'
     {% if report_periods %}
-    WHERE li.report_period_id IN (
+        AND li.report_period_id IN (
         {%- for report_period_id in report_period_ids -%}
         {{report_period_id}}{% if not loop.last %},{% endif %}
         {%- endfor -%}
diff --git a/koku/masu/database/sql/reporting_ocpusagelineitem_daily_summary.sql b/koku/masu/database/sql/reporting_ocpusagelineitem_daily_summary.sql
index 6be4913e6e..fb9d9eb53a 100644
--- a/koku/masu/database/sql/reporting_ocpusagelineitem_daily_summary.sql
+++ b/koku/masu/database/sql/reporting_ocpusagelineitem_daily_summary.sql
@@ -1,28 +1,5 @@
 -- Place our query in a temporary table
 CREATE TEMPORARY TABLE reporting_ocpusagelineitem_daily_summary_{{uuid | sqlsafe}} AS (
-    WITH cte_array_agg_keys AS (
-        SELECT array_agg(key) as key_array
-        FROM {{schema | sqlsafe}}.reporting_ocpenabledtagkeys
-    ),
-    cte_filtered_pod_labels AS (
-        SELECT id,
-            jsonb_object_agg(key,value) as pod_labels
-        FROM (
-            SELECT lid.id,
-                lid.pod_labels,
-                aak.key_array
-            FROM {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily lid
-            JOIN cte_array_agg_keys aak
-                ON 1=1
-            WHERE lid.usage_start >= {{start_date}}
-                AND lid.usage_start <= {{end_date}}
-                AND lid.cluster_id = {{cluster_id}}
-                AND lid.pod_labels ?| aak.key_array
-        ) AS lid,
-        jsonb_each_text(lid.pod_labels) AS labels
-        WHERE key = ANY (key_array)
-        GROUP BY id
-    )
     SELECT uuid_generate_v4() as uuid,
         li.report_period_id,
         li.cluster_id,
@@ -32,7 +9,7 @@ CREATE TEMPORARY TABLE reporting_ocpusagelineitem_daily_summary_{{uuid | sqlsafe
         max(li.resource_id) as resource_id,
         li.usage_start,
         li.usage_end,
-        coalesce(fpl.pod_labels, '{}'::jsonb) as pod_labels,
+        li.pod_labels,
         sum(li.pod_usage_cpu_core_seconds) / 3600 as pod_usage_cpu_core_hours,
         sum(li.pod_request_cpu_core_seconds) / 3600 as pod_request_cpu_core_hours,
         sum(li.pod_limit_cpu_core_seconds) / 3600 as pod_limit_cpu_core_hours,
@@ -48,8 +25,6 @@ CREATE TEMPORARY TABLE reporting_ocpusagelineitem_daily_summary_{{uuid | sqlsafe
         ab.provider_id as source_uuid,
         '{"cpu": 0.000000000, "memory": 0.000000000, "storage": 0.000000000}'::jsonb as infrastructure_usage_cost
     FROM {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily AS li
-    LEFT JOIN cte_filtered_pod_labels AS fpl
-        ON li.id = fpl.id
     LEFT JOIN {{schema | sqlsafe}}.reporting_ocpusagereportperiod as ab
         ON li.cluster_id = ab.cluster_id
     WHERE usage_start >= {{start_date}}
@@ -62,7 +37,7 @@ CREATE TEMPORARY TABLE reporting_ocpusagelineitem_daily_summary_{{uuid | sqlsafe
         li.usage_end,
         li.namespace,
         li.node,
-        fpl.pod_labels,
+        li.pod_labels,
         ab.provider_id
 )
 ;
diff --git a/koku/masu/database/sql/reporting_ocpusagelineitem_daily_summary_update_enabled_tags.sql b/koku/masu/database/sql/reporting_ocpusagelineitem_daily_summary_update_enabled_tags.sql
new file mode 100644
index 0000000000..e20821b56b
--- /dev/null
+++ b/koku/masu/database/sql/reporting_ocpusagelineitem_daily_summary_update_enabled_tags.sql
@@ -0,0 +1,86 @@
+WITH cte_array_agg_keys AS (
+    SELECT array_agg(key) as key_array
+    FROM {{schema | sqlsafe}}.reporting_ocpenabledtagkeys
+),
+cte_filtered_pod_labels AS (
+    SELECT uuid,
+        jsonb_object_agg(key,value) as pod_labels
+    FROM (
+        SELECT lids.uuid,
+            lids.pod_labels as ocp_tags,
+            aak.key_array
+        FROM {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary lids
+        JOIN cte_array_agg_keys aak
+            ON 1=1
+        WHERE lids.pod_labels ?| aak.key_array
+            AND lids.usage_start >= date({{start_date}})
+            AND lids.usage_start <= date({{end_date}})
+            {% if bill_ids %}
+            AND lids.cost_entry_bill_id IN (
+                {%- for bill_id in bill_ids  -%}
+                    {{bill_id}}{% if not loop.last %},{% endif %}
+                {%- endfor -%})
+            {% endif %}
+    ) AS lids,
+    jsonb_each_text(lids.ocp_tags) AS labels
+    WHERE key = ANY (key_array)
+    GROUP BY lids.uuid
+),
+cte_filtered_volume_labels AS (
+    SELECT uuid,
+        jsonb_object_agg(key,value) as volume_labels
+    FROM (
+        SELECT lids.uuid,
+            lids.volume_labels as ocp_tags,
+            aak.key_array
+        FROM {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary lids
+        JOIN cte_array_agg_keys aak
+            ON 1=1
+        WHERE lids.volume_labels ?| aak.key_array
+            AND lids.usage_start >= date({{start_date}})
+            AND lids.usage_start <= date({{end_date}})
+            {% if bill_ids %}
+            AND lids.cost_entry_bill_id IN (
+                {%- for bill_id in bill_ids  -%}
+                    {{bill_id}}{% if not loop.last %},{% endif %}
+                {%- endfor -%})
+            {% endif %}
+    ) AS lids,
+    jsonb_each_text(lids.ocp_tags) AS labels
+    WHERE key = ANY (key_array)
+    GROUP BY lids.uuid
+),
+cte_joined_tags AS (
+    SELECT f.uuid,
+        CASE WHEN f.pod_labels IS NOT NULL
+            THEN f.pod_labels
+            ELSE '{}'::jsonb
+            END AS pod_labels,
+        CASE WHEN f.volume_labels IS NOT NULL
+            THEN f.volume_labels
+            ELSE '{}'::jsonb
+            END AS volume_labels
+    FROM (
+        SELECT lids.uuid,
+            fpl.pod_labels,
+            fvl.volume_labels
+        FROM {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary AS lids
+        LEFT JOIN cte_filtered_pod_labels AS fpl
+            ON lids.uuid = fpl.uuid
+        LEFT JOIN cte_filtered_volume_labels AS fvl
+            ON lids.uuid = fvl.uuid
+        WHERE lids.usage_start >= date({{start_date}})
+            AND lids.usage_start <= date({{end_date}})
+            {% if bill_ids %}
+            AND lids.cost_entry_bill_id IN (
+                {%- for bill_id in bill_ids  -%}
+                    {{bill_id}}{% if not loop.last %},{% endif %}
+                {%- endfor -%})
+            {% endif %}
+    ) AS f
+)
+UPDATE {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary AS lids
+    SET pod_labels = jt.pod_labels,
+        volume_labels = jt.volume_labels
+FROM cte_joined_tags AS jt
+WHERE lids.uuid = jt.uuid
diff --git a/koku/masu/database/sql/reporting_ocpusagepodlabel_summary.sql b/koku/masu/database/sql/reporting_ocpusagepodlabel_summary.sql
index 1223cb261c..2556456615 100644
--- a/koku/masu/database/sql/reporting_ocpusagepodlabel_summary.sql
+++ b/koku/masu/database/sql/reporting_ocpusagepodlabel_summary.sql
@@ -4,14 +4,15 @@ WITH cte_tag_value(key, value, report_period_id, namespace) AS (
         li.report_period_id,
         li.namespace,
         li.node
-    FROM {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily AS li,
+    FROM {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary AS li,
         jsonb_each_text(li.pod_labels) labels
+    WHERE li.data_source = 'Pod'
     {% if report_periods %}
-    WHERE li.report_period_id IN (
-    {%- for report_period_id in report_period_ids -%}
-    {{report_period_id}}{% if not loop.last %},{% endif %}
-    {%- endfor -%}
-    )
+        AND li.report_period_id IN (
+        {%- for report_period_id in report_period_ids -%}
+        {{report_period_id}}{% if not loop.last %},{% endif %}
+        {%- endfor -%}
+        )
     {% endif %}
     GROUP BY key, value, li.report_period_id, li.namespace, li.node
 ),
diff --git a/koku/masu/processor/ocp/ocp_cloud_parquet_summary_updater.py b/koku/masu/processor/ocp/ocp_cloud_parquet_summary_updater.py
index f77df8265a..085a02a172 100644
--- a/koku/masu/processor/ocp/ocp_cloud_parquet_summary_updater.py
+++ b/koku/masu/processor/ocp/ocp_cloud_parquet_summary_updater.py
@@ -28,7 +28,6 @@
 from masu.processor.ocp.ocp_cloud_summary_updater import OCPCloudReportSummaryUpdater
 from masu.util.aws.common import get_bills_from_provider as aws_get_bills_from_provider
 from masu.util.azure.common import get_bills_from_provider as azure_get_bills_from_provider
-from masu.util.common import date_range_pair
 from masu.util.ocp.common import get_cluster_id_from_provider
 
 LOG = logging.getLogger(__name__)
@@ -85,15 +84,14 @@ def update_aws_summary_tables(self, openshift_provider_uuid, aws_provider_uuid,
     def update_azure_summary_tables(self, openshift_provider_uuid, azure_provider_uuid, start_date, end_date):
         """Update operations specifically for OpenShift on Azure."""
         if isinstance(start_date, str):
-            start_date = parser.parse(start_date)
+            start_date = parser.parse(start_date).date()
         if isinstance(end_date, str):
-            end_date = parser.parse(end_date)
+            end_date = parser.parse(end_date).date()
 
         cluster_id = get_cluster_id_from_provider(openshift_provider_uuid)
         azure_bills = azure_get_bills_from_provider(azure_provider_uuid, self._schema, start_date, end_date)
-        azure_bill_ids = []
         with schema_context(self._schema):
-            azure_bill_ids = [str(bill.id) for bill in azure_bills]
+            current_azure_bill_id = azure_bills.first().id if azure_bills else None
 
         with CostModelDBAccessor(self._schema, azure_provider_uuid) as cost_model_accessor:
             markup = cost_model_accessor.markup
@@ -101,19 +99,26 @@ def update_azure_summary_tables(self, openshift_provider_uuid, azure_provider_uu
 
         # OpenShift on Azure
         with AzureReportDBAccessor(self._schema) as accessor:
-            for start, end in date_range_pair(start_date, end_date):
-                LOG.info(
-                    "Updating OpenShift on Azure summary table for "
-                    "\n\tSchema: %s \n\tProvider: %s \n\tDates: %s - %s"
-                    "\n\tCluster ID: %s, Azure Bill IDs: %s",
-                    self._schema,
-                    self._provider.uuid,
-                    start,
-                    end,
-                    cluster_id,
-                    str(azure_bill_ids),
-                )
-                accessor.populate_ocp_on_azure_cost_daily_summary(start, end, cluster_id, azure_bill_ids, markup_value)
+            LOG.info(
+                "Updating OpenShift on Azure summary table for "
+                "\n\tSchema: %s \n\tProvider: %s \n\tDates: %s - %s"
+                "\n\tCluster ID: %s, Azure Bill ID: %s",
+                self._schema,
+                self._provider.uuid,
+                start_date,
+                end_date,
+                cluster_id,
+                current_azure_bill_id,
+            )
+            accessor.populate_ocp_on_azure_cost_daily_summary_presto(
+                start_date,
+                end_date,
+                openshift_provider_uuid,
+                azure_provider_uuid,
+                cluster_id,
+                current_azure_bill_id,
+                markup_value,
+            )
             accessor.populate_ocp_on_azure_tags_summary_table()
 
         with OCPReportDBAccessor(self._schema) as accessor:
diff --git a/koku/masu/processor/ocp/ocp_report_parquet_processor.py b/koku/masu/processor/ocp/ocp_report_parquet_processor.py
index e4e1caa935..4ef8c1f69b 100644
--- a/koku/masu/processor/ocp/ocp_report_parquet_processor.py
+++ b/koku/masu/processor/ocp/ocp_report_parquet_processor.py
@@ -15,6 +15,8 @@
 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
 #
 """Processor for OCP Parquet files."""
+import datetime
+
 import ciso8601
 import pytz
 from tenant_schemas.utils import schema_context
@@ -72,6 +74,8 @@ def create_bill(self, bill_date):
 
         report_period_start = ciso8601.parse_datetime(start_date).replace(hour=0, minute=0, tzinfo=pytz.UTC)
         report_period_end = ciso8601.parse_datetime(end_date).replace(hour=0, minute=0, tzinfo=pytz.UTC)
+        # Make end date first of next month
+        report_period_end = report_period_end + datetime.timedelta(days=1)
 
         provider = self._get_provider()
 
diff --git a/koku/masu/processor/ocp/ocp_report_parquet_summary_updater.py b/koku/masu/processor/ocp/ocp_report_parquet_summary_updater.py
index 675d4a2b28..8a68ae66ae 100644
--- a/koku/masu/processor/ocp/ocp_report_parquet_summary_updater.py
+++ b/koku/masu/processor/ocp/ocp_report_parquet_summary_updater.py
@@ -129,10 +129,11 @@ def update_summary_tables(self, start_date, end_date):
             LOG.info(
                 "Updating OpenShift label summary tables for \n\tSchema: %s " "\n\tReport Period IDs: %s",
                 self._schema,
-                report_period.id,
+                report_period_ids,
             )
             accessor.populate_pod_label_summary_table(report_period_ids)
             accessor.populate_volume_label_summary_table(report_period_ids)
+            accessor.update_line_item_daily_summary_with_enabled_tags(start_date, end_date, report_period_ids)
 
             LOG.info("Updating OpenShift report periods")
             for period in report_periods:
diff --git a/koku/masu/processor/ocp/ocp_report_summary_updater.py b/koku/masu/processor/ocp/ocp_report_summary_updater.py
index 5f63a41262..cf0c9e84c7 100644
--- a/koku/masu/processor/ocp/ocp_report_summary_updater.py
+++ b/koku/masu/processor/ocp/ocp_report_summary_updater.py
@@ -106,6 +106,7 @@ def update_summary_tables(self, start_date, end_date):
                 accessor.populate_storage_line_item_daily_summary_table(start, end, self._cluster_id)
             accessor.populate_pod_label_summary_table(report_period_ids)
             accessor.populate_volume_label_summary_table(report_period_ids)
+            accessor.update_line_item_daily_summary_with_enabled_tags(start_date, end_date, report_period_ids)
 
             for period in report_periods:
                 if period.summary_data_creation_datetime is None:
diff --git a/koku/masu/test/database/test_azure_report_db_accessor.py b/koku/masu/test/database/test_azure_report_db_accessor.py
index 4ce906f1e9..30c401b185 100644
--- a/koku/masu/test/database/test_azure_report_db_accessor.py
+++ b/koku/masu/test/database/test_azure_report_db_accessor.py
@@ -315,6 +315,32 @@ def test_populate_line_item_daily_summary_table_presto(self, mock_presto):
         )
         mock_presto.assert_called()
 
+    @patch("masu.database.azure_report_db_accessor.AzureReportDBAccessor._execute_presto_multipart_sql_query")
+    def test_populate_ocp_on_azure_cost_daily_summary_presto(self, mock_presto):
+        """Test that we construst our SQL and query using Presto."""
+        dh = DateHelper()
+        start_date = dh.this_month_start.date()
+        end_date = dh.this_month_end.date()
+
+        bills = self.accessor.get_cost_entry_bills_query_by_provider(self.azure_provider.uuid)
+        with schema_context(self.schema):
+            current_bill_id = bills.first().id if bills else None
+
+        with CostModelDBAccessor(self.schema, self.aws_provider.uuid) as cost_model_accessor:
+            markup = cost_model_accessor.markup
+            markup_value = float(markup.get("value", 0)) / 100
+
+        self.accessor.populate_ocp_on_azure_cost_daily_summary_presto(
+            start_date,
+            end_date,
+            self.ocp_provider_uuid,
+            self.azure_provider_uuid,
+            self.ocp_cluster_id,
+            current_bill_id,
+            markup_value,
+        )
+        mock_presto.assert_called()
+
     def test_populate_enabled_tag_keys(self):
         """Test that enabled tag keys are populated."""
         dh = DateHelper()
diff --git a/koku/masu/test/database/test_ocp_report_db_accessor.py b/koku/masu/test/database/test_ocp_report_db_accessor.py
index 6c0595b0f5..b66e6042b5 100644
--- a/koku/masu/test/database/test_ocp_report_db_accessor.py
+++ b/koku/masu/test/database/test_ocp_report_db_accessor.py
@@ -37,8 +37,11 @@
 from masu.test import MasuTestCase
 from masu.test.database.helpers import ReportObjectCreator
 from masu.util.common import month_date_range_tuple
+from reporting.models import OCPEnabledTagKeys
+from reporting.models import OCPStorageVolumeLabelSummary
 from reporting.models import OCPUsageLineItem
 from reporting.models import OCPUsageLineItemDailySummary
+from reporting.models import OCPUsagePodLabelSummary
 from reporting.models import OCPUsageReport
 from reporting.models import OCPUsageReportPeriod
 from reporting_common import REPORT_COLUMN_MAP
@@ -2100,3 +2103,50 @@ def test_populate_tag_based_default_usage_costs(self):  # noqa: C901
                                     cost_fields[0]
                                 )
                                 self.assertAlmostEqual(actual_diff, expected_diff)
+
+    def test_update_line_item_daily_summary_with_enabled_tags(self):
+        """Test that we filter the daily summary table's tags with only enabled tags."""
+        dh = DateHelper()
+        start_date = dh.this_month_start.date()
+        end_date = dh.this_month_end.date()
+
+        report_periods = self.accessor.report_periods_for_provider_uuid(self.ocp_provider_uuid, start_date)
+
+        with schema_context(self.schema):
+            OCPUsagePodLabelSummary.objects.all().delete()
+            OCPStorageVolumeLabelSummary.objects.all().delete()
+            key_to_keep = OCPEnabledTagKeys.objects.first()
+            OCPEnabledTagKeys.objects.exclude(key=key_to_keep.key).delete()
+            report_period_ids = [report_period.id for report_period in report_periods]
+            self.accessor.update_line_item_daily_summary_with_enabled_tags(start_date, end_date, report_period_ids)
+            tags = (
+                OCPUsageLineItemDailySummary.objects.filter(
+                    usage_start__gte=start_date, report_period_id__in=report_period_ids
+                )
+                .values_list("pod_labels")
+                .distinct()
+            )
+
+            for tag in tags:
+                tag_dict = tag[0]
+                tag_keys = list(tag_dict.keys())
+                if tag_keys:
+                    self.assertEqual([key_to_keep.key], tag_keys)
+                else:
+                    self.assertEqual([], tag_keys)
+
+            tags = (
+                OCPUsageLineItemDailySummary.objects.filter(
+                    usage_start__gte=start_date, report_period_id__in=report_period_ids
+                )
+                .values_list("volume_labels")
+                .distinct()
+            )
+
+            for tag in tags:
+                tag_dict = tag[0]
+                tag_keys = list(tag_dict.keys())
+                if tag_keys:
+                    self.assertEqual([key_to_keep.key], tag_keys)
+                else:
+                    self.assertEqual([], tag_keys)
diff --git a/koku/masu/test/processor/ocp/test_ocp_cloud_parquet_report_summary_updater.py b/koku/masu/test/processor/ocp/test_ocp_cloud_parquet_report_summary_updater.py
index 3d1900ddbb..0aa30c02b1 100644
--- a/koku/masu/test/processor/ocp/test_ocp_cloud_parquet_report_summary_updater.py
+++ b/koku/masu/test/processor/ocp/test_ocp_cloud_parquet_report_summary_updater.py
@@ -20,14 +20,8 @@
 from unittest.mock import Mock
 from unittest.mock import patch
 
-from django.db.models import Sum
-
 from api.models import Provider
 from api.utils import DateHelper
-from masu.database import AZURE_REPORT_TABLE_MAP
-from masu.database import OCP_REPORT_TABLE_MAP
-from masu.database.azure_report_db_accessor import AzureReportDBAccessor
-from masu.database.ocp_report_db_accessor import OCPReportDBAccessor
 from masu.database.provider_db_accessor import ProviderDBAccessor
 from masu.processor.ocp.ocp_cloud_parquet_summary_updater import OCPCloudParquetReportSummaryUpdater
 from masu.test import MasuTestCase
@@ -87,41 +81,84 @@ def test_update_aws_summary_tables(self, mock_utility, mock_ocp, mock_ocp_on_aws
             decimal.Decimal(0),
         )
 
-    @patch("masu.database.cost_model_db_accessor.CostModelDBAccessor.cost_model")
-    def test_update_azure_summary_tables(self, mock_cost_model):
-        """Test that summary tables are updated correctly."""
-        markup = {"value": 10, "unit": "percent"}
-        mock_cost_model.markup = markup
-
-        start_date = self.dh.this_month_start
-        end_date = self.dh.this_month_end
-
-        updater = OCPCloudParquetReportSummaryUpdater(schema=self.schema, provider=self.azure_provider, manifest=None)
-
-        updater.update_summary_tables(start_date, end_date)
-
-        summary_table_name = AZURE_REPORT_TABLE_MAP["ocp_on_azure_daily_summary"]
-        with AzureReportDBAccessor(self.schema) as azure_accessor:
-            query = azure_accessor._get_db_obj_query(summary_table_name).filter(
-                cost_entry_bill__billing_period_start=start_date
-            )
-            markup_cost = query.aggregate(Sum("markup_cost"))["markup_cost__sum"]
-            pretax_cost = query.aggregate(Sum("pretax_cost"))["pretax_cost__sum"]
+    @patch("masu.processor.ocp.ocp_cloud_updater_base.OCPCloudUpdaterBase.get_infra_map")
+    @patch(
+        "masu.processor.ocp.ocp_cloud_parquet_summary_updater.AzureReportDBAccessor.populate_ocp_on_azure_tags_summary_table"  # noqa: E501
+    )
+    @patch(
+        "masu.processor.ocp.ocp_cloud_parquet_summary_updater.AzureReportDBAccessor.populate_ocp_on_azure_cost_daily_summary_presto"  # noqa: E501
+    )
+    @patch("masu.database.ocp_report_db_accessor.OCPReportDBAccessor.update_summary_infrastructure_cost")
+    @patch("masu.processor.ocp.ocp_cloud_parquet_summary_updater.azure_get_bills_from_provider")
+    def test_update_azure_summary_tables(self, mock_utility, mock_ocp, mock_ocp_on_azure, mock_tag_summary, mock_map):
+        """Test that summary tables are properly run for an OCP provider."""
+        fake_bills = Mock()
+        first = Mock()
+        bill_id = 1
+        first.return_value.id = bill_id
+        fake_bills.first = first
+        mock_utility.return_value = fake_bills
+        start_date = self.dh.today.date()
+        end_date = start_date + datetime.timedelta(days=1)
 
-        self.assertAlmostEqual(markup_cost, pretax_cost * decimal.Decimal(markup.get("value") / 100), places=5)
+        with ProviderDBAccessor(self.azure_provider_uuid) as provider_accessor:
+            provider = provider_accessor.get_provider()
+        with ProviderDBAccessor(self.ocp_test_provider_uuid) as provider_accessor:
+            credentials = provider_accessor.get_credentials()
+        cluster_id = credentials.get("cluster_id")
+        mock_map.return_value = {self.ocp_test_provider_uuid: (self.azure_provider_uuid, Provider.PROVIDER_AZURE)}
+        updater = OCPCloudParquetReportSummaryUpdater(schema="acct10001", provider=provider, manifest=None)
+        updater.update_azure_summary_tables(
+            self.ocp_test_provider_uuid, self.azure_test_provider_uuid, start_date, end_date
+        )
+        mock_ocp_on_azure.assert_called_with(
+            start_date,
+            end_date,
+            self.ocp_test_provider_uuid,
+            self.azure_test_provider_uuid,
+            cluster_id,
+            bill_id,
+            decimal.Decimal(0),
+        )
 
-        daily_summary_table_name = OCP_REPORT_TABLE_MAP["line_item_daily_summary"]
-        with OCPReportDBAccessor(self.schema) as ocp_accessor:
-            query = ocp_accessor._get_db_obj_query(daily_summary_table_name).filter(
-                report_period__provider=self.ocp_on_azure_ocp_provider,
-                report_period__report_period_start=self.dh.this_month_start,
-            )
-            infra_cost = query.aggregate(Sum("infrastructure_raw_cost"))["infrastructure_raw_cost__sum"]
-            project_infra_cost = query.aggregate(Sum("infrastructure_project_raw_cost"))[
-                "infrastructure_project_raw_cost__sum"
-            ]
+    @patch("masu.processor.ocp.ocp_cloud_updater_base.OCPCloudUpdaterBase.get_infra_map")
+    @patch(
+        "masu.processor.ocp.ocp_cloud_parquet_summary_updater.AzureReportDBAccessor.populate_ocp_on_azure_tags_summary_table"  # noqa: E501
+    )
+    @patch(
+        "masu.processor.ocp.ocp_cloud_parquet_summary_updater.AzureReportDBAccessor.populate_ocp_on_azure_cost_daily_summary_presto"  # noqa: E501
+    )
+    @patch("masu.database.ocp_report_db_accessor.OCPReportDBAccessor.update_summary_infrastructure_cost")
+    @patch("masu.processor.ocp.ocp_cloud_parquet_summary_updater.azure_get_bills_from_provider")
+    def test_update_azure_summary_tables_with_string_dates(
+        self, mock_utility, mock_ocp, mock_ocp_on_azure, mock_tag_summary, mock_map
+    ):
+        """Test that summary tables are properly run for an OCP provider."""
+        fake_bills = Mock()
+        first = Mock()
+        bill_id = 1
+        first.return_value.id = bill_id
+        fake_bills.first = first
+        mock_utility.return_value = fake_bills
+        start_date = self.dh.today.date()
+        end_date = start_date + datetime.timedelta(days=1)
 
-        self.assertIsNotNone(infra_cost)
-        self.assertIsNotNone(project_infra_cost)
-        self.assertNotEqual(infra_cost, decimal.Decimal(0))
-        self.assertNotEqual(project_infra_cost, decimal.Decimal(0))
+        with ProviderDBAccessor(self.azure_provider_uuid) as provider_accessor:
+            provider = provider_accessor.get_provider()
+        with ProviderDBAccessor(self.ocp_test_provider_uuid) as provider_accessor:
+            credentials = provider_accessor.get_credentials()
+        cluster_id = credentials.get("cluster_id")
+        mock_map.return_value = {self.ocp_test_provider_uuid: (self.azure_provider_uuid, Provider.PROVIDER_AZURE)}
+        updater = OCPCloudParquetReportSummaryUpdater(schema="acct10001", provider=provider, manifest=None)
+        updater.update_azure_summary_tables(
+            self.ocp_test_provider_uuid, self.azure_test_provider_uuid, str(start_date), str(end_date)
+        )
+        mock_ocp_on_azure.assert_called_with(
+            start_date,
+            end_date,
+            self.ocp_test_provider_uuid,
+            self.azure_test_provider_uuid,
+            cluster_id,
+            bill_id,
+            decimal.Decimal(0),
+        )
diff --git a/koku/masu/test/processor/ocp/test_ocp_report_parquet_processor.py b/koku/masu/test/processor/ocp/test_ocp_report_parquet_processor.py
index a386f98653..436e261bfb 100644
--- a/koku/masu/test/processor/ocp/test_ocp_report_parquet_processor.py
+++ b/koku/masu/test/processor/ocp/test_ocp_report_parquet_processor.py
@@ -15,6 +15,8 @@
 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
 #
 """Test the OCPReportParquetProcessor."""
+import datetime
+
 from tenant_schemas.utils import schema_context
 
 from api.utils import DateHelper
@@ -54,7 +56,7 @@ def test_create_bill(self):
         """Test that a bill is created in the Postgres database."""
         bill_date = DateHelper().next_month_start
         start_date = bill_date
-        end_date = DateHelper().next_month_end
+        end_date = DateHelper().next_month_end + datetime.timedelta(days=1)
         self.processor.create_bill(bill_date.date())
 
         with schema_context(self.schema):
@@ -70,7 +72,7 @@ def test_create_bill_with_string_arg(self):
         """Test that a bill is created in the Postgres database."""
         bill_date = DateHelper().next_month_start
         start_date = bill_date
-        end_date = DateHelper().next_month_end
+        end_date = DateHelper().next_month_end + datetime.timedelta(days=1)
 
         self.processor.create_bill(str(bill_date.date()))
 
diff --git a/koku/reporting/migrations/0162_auto_20201120_1901.py b/koku/reporting/migrations/0162_auto_20201120_1901.py
new file mode 100644
index 0000000000..17742fb67c
--- /dev/null
+++ b/koku/reporting/migrations/0162_auto_20201120_1901.py
@@ -0,0 +1,12 @@
+# Generated by Django 3.1.3 on 2020-11-20 19:01
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [("reporting", "0161_auto_20210118_2113")]
+
+    operations = [
+        migrations.RemoveField(model_name="ocpazurecostlineitemdailysummary", name="pod"),
+        migrations.RemoveField(model_name="ocpazurecostlineitemprojectdailysummary", name="pod"),
+    ]
diff --git a/koku/reporting/provider/azure/openshift/models.py b/koku/reporting/provider/azure/openshift/models.py
index 8d2806e9db..ad0352e26a 100644
--- a/koku/reporting/provider/azure/openshift/models.py
+++ b/koku/reporting/provider/azure/openshift/models.py
@@ -67,8 +67,6 @@ class Meta:
     # Kubernetes objects by convention have a max name length of 253 chars
     namespace = ArrayField(models.CharField(max_length=253, null=False))
 
-    pod = ArrayField(models.CharField(max_length=253, null=False))
-
     node = models.CharField(max_length=253, null=True)
 
     resource_id = models.CharField(max_length=253, null=True)
@@ -149,8 +147,6 @@ class Meta:
     # Kubernetes objects by convention have a max name length of 253 chars
     namespace = models.CharField(max_length=253, null=False)
 
-    pod = models.CharField(max_length=253, null=True)
-
     node = models.CharField(max_length=253, null=True)
 
     pod_labels = JSONField(null=True)
diff --git a/scripts/nise_ymls/ocp_on_aws/ocp_static_data.yml b/scripts/nise_ymls/ocp_on_aws/ocp_static_data.yml
index 400c67492a..7b12c2fe45 100644
--- a/scripts/nise_ymls/ocp_on_aws/ocp_static_data.yml
+++ b/scripts/nise_ymls/ocp_on_aws/ocp_static_data.yml
@@ -6,6 +6,7 @@ generators:
       nodes:
         - node:
           node_name: aws_compute1
+          node_labels: label_nodeclass:compute
           cpu_cores: 2
           memory_gig: 8
           resource_id: 55555555
@@ -56,6 +57,7 @@ generators:
                     capacity_gig: 20
         - node:
           node_name: aws_compute2
+          node_labels: label_nodeclass:compute
           cpu_cores: 2
           memory_gig: 8
           resource_id: 55555556
@@ -112,6 +114,7 @@ generators:
                     capacity_gig: 20
         - node:
           node_name: aws_master
+          node_labels: label_nodeclass:master
           cpu_cores: 2
           memory_gig: 8
           resource_id: 55555558
diff --git a/scripts/nise_ymls/ocp_on_azure/ocp_static_data.yml b/scripts/nise_ymls/ocp_on_azure/ocp_static_data.yml
index dd921803c3..569b6fc41c 100644
--- a/scripts/nise_ymls/ocp_on_azure/ocp_static_data.yml
+++ b/scripts/nise_ymls/ocp_on_azure/ocp_static_data.yml
@@ -6,6 +6,7 @@ generators:
       nodes:
         - node:
           node_name: azure_compute1
+          node_labels: label_nodeclass:compute
           cpu_cores: 2
           memory_gig: 8
           resource_id: 99999995
@@ -56,6 +57,7 @@ generators:
                     capacity_gig: 20
         - node:
           node_name: azure_compute2
+          node_labels: label_nodeclass:compute
           cpu_cores: 2
           memory_gig: 8
           resource_id: 99999996
@@ -84,6 +86,7 @@ generators:
                     capacity_gig: 20
         - node:
           node_name: azure_compute3
+          node_labels: label_nodeclass:compute
           cpu_cores: 2
           memory_gig: 8
           resource_id: 99999997
@@ -112,6 +115,7 @@ generators:
                     capacity_gig: 20
         - node:
           node_name: azure_master
+          node_labels: label_nodeclass:master
           cpu_cores: 2
           memory_gig: 8
           resource_id: 99999998

From 54bcc70745ac6250836a74df78dd9b7db5e63d81 Mon Sep 17 00:00:00 2001
From: Cody Myers <cmyers@redhat.com>
Date: Tue, 19 Jan 2021 15:54:47 -0500
Subject: [PATCH 07/17] COST-895: GCP costs report generates 500 error when
 group by tags. (#2598)

* COST-895: GCP costs report generates 500 error when group by tags.

Co-authored-by: Douglas Curtis <docurtis@redhat.com>
---
 koku/api/report/gcp/query_handler.py            |  5 +++--
 koku/api/report/gcp/serializers.py              |  1 +
 .../report/test/gcp/tests_gcp_query_handler.py  | 17 ++++++++++++++++-
 3 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/koku/api/report/gcp/query_handler.py b/koku/api/report/gcp/query_handler.py
index 94fe1d17f3..a80f13e74a 100644
--- a/koku/api/report/gcp/query_handler.py
+++ b/koku/api/report/gcp/query_handler.py
@@ -83,8 +83,9 @@ def annotations(self):
             annotations[q_param] = Concat(db_field, Value(""))
         group_by_fields = self._mapper.provider_map.get("group_by_annotations")
         for group_key in self._get_group_by():
-            for q_param, db_field in group_by_fields[group_key].items():
-                annotations[q_param] = Concat(db_field, Value(""))
+            if group_by_fields.get(group_key):
+                for q_param, db_field in group_by_fields[group_key].items():
+                    annotations[q_param] = Concat(db_field, Value(""))
         return annotations
 
     def _format_query_response(self):
diff --git a/koku/api/report/gcp/serializers.py b/koku/api/report/gcp/serializers.py
index 340e148687..46a111292b 100644
--- a/koku/api/report/gcp/serializers.py
+++ b/koku/api/report/gcp/serializers.py
@@ -72,6 +72,7 @@ class GCPQueryParamSerializer(ParamSerializer):
 
     delta = serializers.ChoiceField(choices=DELTA_CHOICES, required=False)
     units = serializers.CharField(required=False)
+    check_tags = serializers.BooleanField(required=False, default=False)
 
     def __init__(self, *args, **kwargs):
         """Initialize the GCP query param serializer."""
diff --git a/koku/api/report/test/gcp/tests_gcp_query_handler.py b/koku/api/report/test/gcp/tests_gcp_query_handler.py
index 1b1fb73851..bc1c02eec8 100644
--- a/koku/api/report/test/gcp/tests_gcp_query_handler.py
+++ b/koku/api/report/test/gcp/tests_gcp_query_handler.py
@@ -38,6 +38,7 @@
 from reporting.models import GCPCostSummaryByAccount
 from reporting.models import GCPCostSummaryByProject
 from reporting.models import GCPCostSummaryByService
+from reporting.models import GCPTagsSummary
 
 LOG = logging.getLogger(__name__)
 
@@ -177,7 +178,7 @@ def test_query_group_by_partial_filtered_service(self):
         self.assertIsNotNone(query_output.get("total"))
         total = query_output.get("total")
         aggregates = handler._mapper.report_type_map.get("aggregates")
-        filters = {**self.this_month_filter, "service_id__icontains": service}
+        filters = {**self.this_month_filter, "service_alias__icontains": service}
         for filt in handler._mapper.report_type_map.get("filter"):
             if filt:
                 qf = QueryFilter(**filt)
@@ -960,3 +961,17 @@ def test_execute_query_annotate(self):
         for data_item in data:
             month_val = data_item.get("date")
             self.assertEqual(month_val, cmonth_str)
+
+    def test_execute_query_group_by_tag(self):
+        """Test execute_query for current month on monthly breakdown by service."""
+        with tenant_context(self.tenant):
+            tag_object = GCPTagsSummary.objects.first()
+            key = tag_object.key
+            value = tag_object.values[0]
+        url = f"?filter[time_scope_units]=month&filter[time_scope_value]=-1&filter[resolution]=monthly&group_by[tag:{key}]={value}"  # noqa: E501
+        query_params = self.mocked_query_params(url, GCPCostView)
+        handler = GCPReportQueryHandler(query_params)
+        query_output = handler.execute_query()
+        data = query_output.get("data")
+        self.assertIsNotNone(data)
+        self.assertIsNotNone(query_output.get("total"))

From 42dc8157dda3fb1c76188b065a8419be97927592 Mon Sep 17 00:00:00 2001
From: Brett Lentz <blentz@users.noreply.github.com>
Date: Tue, 19 Jan 2021 16:09:35 -0500
Subject: [PATCH 08/17] COST-854: ensure constant used for both prediction and
 confidence intervals. (#2596)

* COST-854: ensure constant used for both prediction and confidence intervals.
* make forecast unit tests more predictable
---
 koku/forecast/forecast.py            |  18 +-
 koku/forecast/test/tests_forecast.py | 252 +++++++++++++--------------
 2 files changed, 137 insertions(+), 133 deletions(-)

diff --git a/koku/forecast/forecast.py b/koku/forecast/forecast.py
index 225a2be28e..7f18cc88d7 100644
--- a/koku/forecast/forecast.py
+++ b/koku/forecast/forecast.py
@@ -184,10 +184,10 @@ def _predict(self, data):
         pred_x = [i for i in range(X[-1] + 1, X[-1] + 1 + self.forecast_days_required)]
 
         # run the forecast
-        results = self._run_forecast(X, Y)
+        results = self._run_forecast(X, Y, to_predict=pred_x)
 
         result_dict = {}
-        for i, value in enumerate(results.prediction(pred_x)):
+        for i, value in enumerate(results.prediction):
             if i < len(results.confidence_lower):
                 lower = results.confidence_lower[i]
             else:
@@ -325,12 +325,13 @@ def format_result(self, results):
             response.append(dikt)
         return response
 
-    def _run_forecast(self, x, y):
+    def _run_forecast(self, x, y, to_predict=None):
         """Apply the forecast model.
 
         Args:
             x (list) a list of exogenous variables
             y (list) a list of endogenous variables
+            to_predict (list) a list of exogenous variables used in the forecast results
 
         Note:
             both x and y MUST be the same number of elements
@@ -344,9 +345,10 @@ def _run_forecast(self, x, y):
                 (list) P-values
         """
         x = sm.add_constant(x)
+        to_predict = sm.add_constant(to_predict)
         model = sm.OLS(y, x)
         results = model.fit()
-        return LinearForecastResult(results, exog=x)
+        return LinearForecastResult(results, exog=to_predict)
 
     def _uniquify_qset(self, qset, field="total_cost"):
         """Take a QuerySet list, sum costs within the same day, and arrange it into a list of tuples.
@@ -401,6 +403,7 @@ def __init__(self, regression_result, exog=None):
             regression_result (RegressionResult) the results of a statsmodels regression
             exog (array-like) exogenous variables for points to predict
         """
+        self._exog = exog
         self._regression_result = regression_result
         self._std_err, self._conf_lower, self._conf_upper = wls_prediction_std(regression_result, exog=exog)
 
@@ -412,7 +415,8 @@ def __init__(self, regression_result, exog=None):
         LOG.debug("Forecast interval lower-bound: %s", self.confidence_lower)
         LOG.debug("Forecast interval upper-bound: %s", self.confidence_upper)
 
-    def prediction(self, to_predict=None):
+    @property
+    def prediction(self):
         """Forecast prediction.
 
         Args:
@@ -424,8 +428,8 @@ def prediction(self, to_predict=None):
         # predict() returns the same number of elements as the number of input observations
         prediction = []
         try:
-            if to_predict:
-                prediction = self._regression_result.predict(sm.add_constant(to_predict))
+            if self._exog is not None:
+                prediction = self._regression_result.predict(sm.add_constant(self._exog))
             else:
                 prediction = self._regression_result.predict()
         except ValueError as exc:
diff --git a/koku/forecast/test/tests_forecast.py b/koku/forecast/test/tests_forecast.py
index b9f0b5897a..d2188067ed 100644
--- a/koku/forecast/test/tests_forecast.py
+++ b/koku/forecast/test/tests_forecast.py
@@ -172,14 +172,12 @@ def test_predict_flat(self):
 
         expected = []
         for n in range(0, 10):
-            # the test data needs to include some jitter to avoid
-            # division-by-zero in the underlying dot-product maths.
             expected.append(
                 {
                     "usage_start": (dh.this_month_start + timedelta(days=n)).date(),
-                    "total_cost": 5 + random.random(),
-                    "infrastructure_cost": 3 + random.random(),
-                    "supplementary_cost": 2 + random.random(),
+                    "total_cost": 5 + (0.01 * n),
+                    "infrastructure_cost": 3 + (0.01 * n),
+                    "supplementary_cost": 2 + (0.01 * n),
                 }
             )
         mock_qset = MockQuerySet(expected)
@@ -199,19 +197,21 @@ def test_predict_flat(self):
 
         for result in results:
             for val in result.get("values", []):
-                self.assertIsInstance(val.get("date"), date)
-
-                for item, cost in [
-                    (val.get("cost"), 5),
-                    (val.get("infrastructure"), 3),
-                    (val.get("supplementary"), 2),
-                ]:
-                    self.assertAlmostEqual(float(item.get("total").get("value")), cost, delta=2.2)
-                    self.assertAlmostEqual(float(item.get("confidence_max").get("value")), cost, delta=2.2)
-                    self.assertAlmostEqual(float(item.get("confidence_min").get("value")), cost, delta=2.2)
-                    self.assertAlmostEqual(float(item.get("rsquared").get("value")), 1, delta=2.2)
-                    for pval in item.get("pvalues").get("value"):
-                        self.assertGreaterEqual(float(pval), 0)
+                with self.subTest(values=val):
+                    self.assertIsInstance(val.get("date"), date)
+
+                    for item, cost, delta in [
+                        (val.get("cost"), 5, 1),
+                        (val.get("infrastructure"), 3, 1),
+                        (val.get("supplementary"), 2, 1),
+                    ]:
+                        with self.subTest(cost=cost, delta=delta, item=item):
+                            self.assertAlmostEqual(float(item.get("total").get("value")), cost, delta=delta)
+                            self.assertAlmostEqual(float(item.get("confidence_max").get("value")), cost, delta=delta)
+                            self.assertAlmostEqual(float(item.get("confidence_min").get("value")), cost, delta=delta)
+                            self.assertGreater(float(item.get("rsquared").get("value")), 0)
+                            for pval in item.get("pvalues").get("value"):
+                                self.assertGreaterEqual(float(pval), 0)
 
     def test_predict_increasing(self):
         """Test that predict() returns expected values for increasing costs."""
@@ -431,14 +431,12 @@ def test_predict_flat(self):
 
         expected = []
         for n in range(0, 10):
-            # the test data needs to include some jitter to avoid
-            # division-by-zero in the underlying dot-product maths.
             expected.append(
                 {
                     "usage_start": (dh.this_month_start + timedelta(days=n)).date(),
-                    "total_cost": 5 + random.random(),
-                    "infrastructure_cost": 3 + random.random(),
-                    "supplementary_cost": 2 + random.random(),
+                    "total_cost": 5 + (0.01 * n),
+                    "infrastructure_cost": 3 + (0.01 * n),
+                    "supplementary_cost": 2 + (0.01 * n),
                 }
             )
         mock_qset = MockQuerySet(expected)
@@ -458,19 +456,21 @@ def test_predict_flat(self):
 
         for result in results:
             for val in result.get("values", []):
-                self.assertIsInstance(val.get("date"), date)
-
-                for item, cost in [
-                    (val.get("cost"), 5),
-                    (val.get("infrastructure"), 3),
-                    (val.get("supplementary"), 2),
-                ]:
-                    self.assertAlmostEqual(float(item.get("total").get("value")), cost, delta=2.2)
-                    self.assertAlmostEqual(float(item.get("confidence_max").get("value")), cost, delta=2.2)
-                    self.assertAlmostEqual(float(item.get("confidence_min").get("value")), cost, delta=2.2)
-                    self.assertAlmostEqual(float(item.get("rsquared").get("value")), 1, delta=2.2)
-                    for pval in item.get("pvalues").get("value"):
-                        self.assertGreaterEqual(float(pval), 0)
+                with self.subTest(values=val):
+                    self.assertIsInstance(val.get("date"), date)
+
+                    for item, cost, delta in [
+                        (val.get("cost"), 5, 1),
+                        (val.get("infrastructure"), 3, 1),
+                        (val.get("supplementary"), 2, 1),
+                    ]:
+                        with self.subTest(cost=cost, delta=delta, item=item):
+                            self.assertAlmostEqual(float(item.get("total").get("value")), cost, delta=delta)
+                            self.assertAlmostEqual(float(item.get("confidence_max").get("value")), cost, delta=delta)
+                            self.assertAlmostEqual(float(item.get("confidence_min").get("value")), cost, delta=delta)
+                            self.assertGreater(float(item.get("rsquared").get("value")), 0)
+                            for pval in item.get("pvalues").get("value"):
+                                self.assertGreaterEqual(float(pval), 0)
 
 
 class GCPForecastTest(IamTestCase):
@@ -482,14 +482,12 @@ def test_predict_flat(self):
 
         expected = []
         for n in range(0, 10):
-            # the test data needs to include some jitter to avoid
-            # division-by-zero in the underlying dot-product maths.
             expected.append(
                 {
                     "usage_start": (dh.this_month_start + timedelta(days=n)).date(),
-                    "total_cost": 5 + random.random(),
-                    "infrastructure_cost": 3 + random.random(),
-                    "supplementary_cost": 2 + random.random(),
+                    "total_cost": 5 + (0.01 * n),
+                    "infrastructure_cost": 3 + (0.01 * n),
+                    "supplementary_cost": 2 + (0.01 * n),
                 }
             )
         mock_qset = MockQuerySet(expected)
@@ -509,19 +507,21 @@ def test_predict_flat(self):
 
         for result in results:
             for val in result.get("values", []):
-                self.assertIsInstance(val.get("date"), date)
-
-                for item, cost in [
-                    (val.get("cost"), 5),
-                    (val.get("infrastructure"), 3),
-                    (val.get("supplementary"), 2),
-                ]:
-                    self.assertAlmostEqual(float(item.get("total").get("value")), cost, delta=2.2)
-                    self.assertAlmostEqual(float(item.get("confidence_max").get("value")), cost, delta=2.2)
-                    self.assertAlmostEqual(float(item.get("confidence_min").get("value")), cost, delta=2.2)
-                    self.assertAlmostEqual(float(item.get("rsquared").get("value")), 1, delta=2.2)
-                    for pval in item.get("pvalues").get("value"):
-                        self.assertGreaterEqual(float(pval), 0)
+                with self.subTest(values=val):
+                    self.assertIsInstance(val.get("date"), date)
+
+                    for item, cost, delta in [
+                        (val.get("cost"), 5, 1),
+                        (val.get("infrastructure"), 3, 1),
+                        (val.get("supplementary"), 2, 1),
+                    ]:
+                        with self.subTest(cost=cost, delta=delta, item=item):
+                            self.assertAlmostEqual(float(item.get("total").get("value")), cost, delta=delta)
+                            self.assertAlmostEqual(float(item.get("confidence_max").get("value")), cost, delta=delta)
+                            self.assertAlmostEqual(float(item.get("confidence_min").get("value")), cost, delta=delta)
+                            self.assertGreater(float(item.get("rsquared").get("value")), 0)
+                            for pval in item.get("pvalues").get("value"):
+                                self.assertGreaterEqual(float(pval), 0)
 
     def test_cost_summary_table(self):
         """Test that we select a valid table or view."""
@@ -560,14 +560,12 @@ def test_predict_flat(self):
 
         expected = []
         for n in range(0, 10):
-            # the test data needs to include some jitter to avoid
-            # division-by-zero in the underlying dot-product maths.
             expected.append(
                 {
                     "usage_start": (dh.this_month_start + timedelta(days=n)).date(),
-                    "total_cost": 5 + random.random(),
-                    "infrastructure_cost": 3 + random.random(),
-                    "supplementary_cost": 2 + random.random(),
+                    "total_cost": 5 + (0.01 * n),
+                    "infrastructure_cost": 3 + (0.01 * n),
+                    "supplementary_cost": 2 + (0.01 * n),
                 }
             )
         mock_qset = MockQuerySet(expected)
@@ -587,19 +585,21 @@ def test_predict_flat(self):
 
         for result in results:
             for val in result.get("values", []):
-                self.assertIsInstance(val.get("date"), date)
-
-                for item, cost in [
-                    (val.get("cost"), 5),
-                    (val.get("infrastructure"), 3),
-                    (val.get("supplementary"), 2),
-                ]:
-                    self.assertAlmostEqual(float(item.get("total").get("value")), cost, delta=2.2)
-                    self.assertAlmostEqual(float(item.get("confidence_max").get("value")), cost, delta=2.2)
-                    self.assertAlmostEqual(float(item.get("confidence_min").get("value")), cost, delta=2.2)
-                    self.assertAlmostEqual(float(item.get("rsquared").get("value")), 1, delta=2.2)
-                    for pval in item.get("pvalues").get("value"):
-                        self.assertGreaterEqual(float(pval), 0)
+                with self.subTest(values=val):
+                    self.assertIsInstance(val.get("date"), date)
+
+                    for item, cost, delta in [
+                        (val.get("cost"), 5, 1),
+                        (val.get("infrastructure"), 3, 1),
+                        (val.get("supplementary"), 2, 1),
+                    ]:
+                        with self.subTest(cost=cost, delta=delta, item=item):
+                            self.assertAlmostEqual(float(item.get("total").get("value")), cost, delta=delta)
+                            self.assertAlmostEqual(float(item.get("confidence_max").get("value")), cost, delta=delta)
+                            self.assertAlmostEqual(float(item.get("confidence_min").get("value")), cost, delta=delta)
+                            self.assertGreater(float(item.get("rsquared").get("value")), 0)
+                            for pval in item.get("pvalues").get("value"):
+                                self.assertGreaterEqual(float(pval), 0)
 
     def test_cost_summary_table(self):
         """Test that we select a valid table or view."""
@@ -643,14 +643,12 @@ def test_predict_flat(self):
 
         expected = []
         for n in range(0, 10):
-            # the test data needs to include some jitter to avoid
-            # division-by-zero in the underlying dot-product maths.
             expected.append(
                 {
                     "usage_start": (dh.this_month_start + timedelta(days=n)).date(),
-                    "total_cost": 5 + random.random(),
-                    "infrastructure_cost": 3 + random.random(),
-                    "supplementary_cost": 2 + random.random(),
+                    "total_cost": 5 + (0.01 * n),
+                    "infrastructure_cost": 3 + (0.01 * n),
+                    "supplementary_cost": 2 + (0.01 * n),
                 }
             )
         mock_qset = MockQuerySet(expected)
@@ -670,19 +668,21 @@ def test_predict_flat(self):
 
         for result in results:
             for val in result.get("values", []):
-                self.assertIsInstance(val.get("date"), date)
-
-                for item, cost in [
-                    (val.get("cost"), 5),
-                    (val.get("infrastructure"), 3),
-                    (val.get("supplementary"), 2),
-                ]:
-                    self.assertAlmostEqual(float(item.get("total").get("value")), cost, delta=2.2)
-                    self.assertAlmostEqual(float(item.get("confidence_max").get("value")), cost, delta=2.2)
-                    self.assertAlmostEqual(float(item.get("confidence_min").get("value")), cost, delta=2.2)
-                    self.assertAlmostEqual(float(item.get("rsquared").get("value")), 1, delta=2.2)
-                    for pval in item.get("pvalues").get("value"):
-                        self.assertGreaterEqual(float(pval), 0)
+                with self.subTest(values=val):
+                    self.assertIsInstance(val.get("date"), date)
+
+                    for item, cost, delta in [
+                        (val.get("cost"), 5, 1),
+                        (val.get("infrastructure"), 3, 1),
+                        (val.get("supplementary"), 2, 1),
+                    ]:
+                        with self.subTest(cost=cost, delta=delta, item=item):
+                            self.assertAlmostEqual(float(item.get("total").get("value")), cost, delta=delta)
+                            self.assertAlmostEqual(float(item.get("confidence_max").get("value")), cost, delta=delta)
+                            self.assertAlmostEqual(float(item.get("confidence_min").get("value")), cost, delta=delta)
+                            self.assertGreater(float(item.get("rsquared").get("value")), 0)
+                            for pval in item.get("pvalues").get("value"):
+                                self.assertGreaterEqual(float(pval), 0)
 
 
 class OCPAWSForecastTest(IamTestCase):
@@ -694,14 +694,12 @@ def test_predict_flat(self):
 
         expected = []
         for n in range(0, 10):
-            # the test data needs to include some jitter to avoid
-            # division-by-zero in the underlying dot-product maths.
             expected.append(
                 {
                     "usage_start": (dh.this_month_start + timedelta(days=n)).date(),
-                    "total_cost": 5 + random.random(),
-                    "infrastructure_cost": 3 + random.random(),
-                    "supplementary_cost": 2 + random.random(),
+                    "total_cost": 5 + (0.01 * n),
+                    "infrastructure_cost": 3 + (0.01 * n),
+                    "supplementary_cost": 2 + (0.01 * n),
                 }
             )
         mock_qset = MockQuerySet(expected)
@@ -721,19 +719,21 @@ def test_predict_flat(self):
 
         for result in results:
             for val in result.get("values", []):
-                self.assertIsInstance(val.get("date"), date)
-
-                for item, cost in [
-                    (val.get("cost"), 5),
-                    (val.get("infrastructure"), 3),
-                    (val.get("supplementary"), 2),
-                ]:
-                    self.assertAlmostEqual(float(item.get("total").get("value")), cost, delta=2.2)
-                    self.assertAlmostEqual(float(item.get("confidence_max").get("value")), cost, delta=2.2)
-                    self.assertAlmostEqual(float(item.get("confidence_min").get("value")), cost, delta=2.2)
-                    self.assertAlmostEqual(float(item.get("rsquared").get("value")), 1, delta=2.2)
-                    for pval in item.get("pvalues").get("value"):
-                        self.assertGreaterEqual(float(pval), 0)
+                with self.subTest(values=val):
+                    self.assertIsInstance(val.get("date"), date)
+
+                    for item, cost, delta in [
+                        (val.get("cost"), 5, 1),
+                        (val.get("infrastructure"), 3, 1),
+                        (val.get("supplementary"), 2, 1),
+                    ]:
+                        with self.subTest(cost=cost, delta=delta, item=item):
+                            self.assertAlmostEqual(float(item.get("total").get("value")), cost, delta=delta)
+                            self.assertAlmostEqual(float(item.get("confidence_max").get("value")), cost, delta=delta)
+                            self.assertAlmostEqual(float(item.get("confidence_min").get("value")), cost, delta=delta)
+                            self.assertGreater(float(item.get("rsquared").get("value")), 0)
+                            for pval in item.get("pvalues").get("value"):
+                                self.assertGreaterEqual(float(pval), 0)
 
 
 class OCPAzureForecastTest(IamTestCase):
@@ -745,14 +745,12 @@ def test_predict_flat(self):
 
         expected = []
         for n in range(0, 10):
-            # the test data needs to include some jitter to avoid
-            # division-by-zero in the underlying dot-product maths.
             expected.append(
                 {
                     "usage_start": (dh.this_month_start + timedelta(days=n)).date(),
-                    "total_cost": 5 + random.random(),
-                    "infrastructure_cost": 3 + random.random(),
-                    "supplementary_cost": 2 + random.random(),
+                    "total_cost": 5 + (0.01 * n),
+                    "infrastructure_cost": 3 + (0.01 * n),
+                    "supplementary_cost": 2 + (0.01 * n),
                 }
             )
         mock_qset = MockQuerySet(expected)
@@ -773,19 +771,21 @@ def test_predict_flat(self):
 
         for result in results:
             for val in result.get("values", []):
-                self.assertIsInstance(val.get("date"), date)
-
-                for item, cost in [
-                    (val.get("cost"), 5),
-                    (val.get("infrastructure"), 3),
-                    (val.get("supplementary"), 2),
-                ]:
-                    self.assertAlmostEqual(float(item.get("total").get("value")), cost, delta=2.2)
-                    self.assertAlmostEqual(float(item.get("confidence_max").get("value")), cost, delta=2.2)
-                    self.assertAlmostEqual(float(item.get("confidence_min").get("value")), cost, delta=2.2)
-                    self.assertAlmostEqual(float(item.get("rsquared").get("value")), 1, delta=2.2)
-                    for pval in item.get("pvalues").get("value"):
-                        self.assertGreaterEqual(float(pval), 0)
+                with self.subTest(values=val):
+                    self.assertIsInstance(val.get("date"), date)
+
+                    for item, cost, delta in [
+                        (val.get("cost"), 5, 1),
+                        (val.get("infrastructure"), 3, 1),
+                        (val.get("supplementary"), 2, 1),
+                    ]:
+                        with self.subTest(cost=cost, delta=delta, item=item):
+                            self.assertAlmostEqual(float(item.get("total").get("value")), cost, delta=delta)
+                            self.assertAlmostEqual(float(item.get("confidence_max").get("value")), cost, delta=delta)
+                            self.assertAlmostEqual(float(item.get("confidence_min").get("value")), cost, delta=delta)
+                            self.assertGreater(float(item.get("rsquared").get("value")), 0)
+                            for pval in item.get("pvalues").get("value"):
+                                self.assertGreaterEqual(float(pval), 0)
 
 
 class LinearForecastResultTest(IamTestCase):

From a6b10335c5b13a4e08a623296981042a3ce9b879 Mon Sep 17 00:00:00 2001
From: Douglas Curtis <docurtis@redhat.com>
Date: Tue, 19 Jan 2021 21:11:25 -0500
Subject: [PATCH 09/17] tech-debt: Removing sources endpoint references (#2588)

---
 .../0034_remove_sources_endpoint_id.py        |   9 +
 koku/api/provider/models.py                   |   3 -
 koku/sources/kafka_listener.py                |   3 -
 koku/sources/sources_http_client.py           | 165 +++++++----------
 koku/sources/storage.py                       |  17 --
 koku/sources/test/test_kafka_listener.py      |  65 +++----
 koku/sources/test/test_sources_http_client.py | 167 +-----------------
 koku/sources/test/test_storage.py             |  34 ----
 8 files changed, 93 insertions(+), 370 deletions(-)
 create mode 100644 koku/api/migrations/0034_remove_sources_endpoint_id.py

diff --git a/koku/api/migrations/0034_remove_sources_endpoint_id.py b/koku/api/migrations/0034_remove_sources_endpoint_id.py
new file mode 100644
index 0000000000..62ba8a769a
--- /dev/null
+++ b/koku/api/migrations/0034_remove_sources_endpoint_id.py
@@ -0,0 +1,9 @@
+# Generated by Django 3.1.3 on 2021-01-15 15:32
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [("api", "0033_sources_name_text")]
+
+    operations = [migrations.RemoveField(model_name="sources", name="endpoint_id")]
diff --git a/koku/api/provider/models.py b/koku/api/provider/models.py
index 85eed55ec0..ddcb9029c7 100644
--- a/koku/api/provider/models.py
+++ b/koku/api/provider/models.py
@@ -190,9 +190,6 @@ class Meta:
     # Kafka message offset for Platform-Sources kafka stream
     offset = models.IntegerField(null=False)
 
-    # Endpoint ID.  Identifier to connect source to authentication.
-    endpoint_id = models.IntegerField(null=True)
-
     # Koku Specific data.
     # Customer Account ID
     account_id = models.TextField(null=True)
diff --git a/koku/sources/kafka_listener.py b/koku/sources/kafka_listener.py
index 29af541d00..4626747197 100644
--- a/koku/sources/kafka_listener.py
+++ b/koku/sources/kafka_listener.py
@@ -105,7 +105,6 @@ def __init__(self, auth_header, source_id):
         self.source_type_id = int(details.get("source_type_id"))
         self.source_uuid = details.get("uid")
         self.source_type_name = sources_network.get_source_type_name(self.source_type_id)
-        self.endpoint_id = sources_network.get_endpoint_id()
         self.source_type = SOURCE_PROVIDER_MAP.get(self.source_type_name)
 
 
@@ -366,8 +365,6 @@ def cost_mgmt_msg_filter(msg_data):
     if event_type in (KAFKA_AUTHENTICATION_CREATE, KAFKA_AUTHENTICATION_UPDATE):
         sources_network = SourcesHTTPClient(auth_header)
 
-        if msg_data.get("resource_type") == "Endpoint":
-            source_id = sources_network.get_source_id_from_endpoint_id(msg_data.get("resource_id"))
         if msg_data.get("resource_type") == "Application":
             source_id = sources_network.get_source_id_from_applications_id(msg_data.get("resource_id"))
         msg_data["source_id"] = source_id
diff --git a/koku/sources/sources_http_client.py b/koku/sources/sources_http_client.py
index c9fd40cc60..d82598019b 100644
--- a/koku/sources/sources_http_client.py
+++ b/koku/sources/sources_http_client.py
@@ -77,38 +77,6 @@ def get_source_details(self):
         response = r.json()
         return response
 
-    def get_endpoint_id(self):
-        """Get Sources Endpoint ID from Source ID."""
-        endpoint_url = f"{self._base_url}/endpoints?filter[source_id]={self._source_id}"
-        r = self._get_network_response(endpoint_url, self._identity_header, "Unable to endpoint ID")
-        if r.status_code == 404:
-            raise SourceNotFoundError(f"Status Code: {r.status_code}")
-        elif r.status_code != 200:
-            raise SourcesHTTPClientError("Status Code: ", r.status_code)
-        endpoint_response = r.json()
-
-        endpoint_id = None
-        if endpoint_response.get("data"):
-            endpoint_id = endpoint_response.get("data")[0].get("id")
-
-        return endpoint_id
-
-    def get_source_id_from_endpoint_id(self, resource_id):
-        """Get Source ID from Sources Endpoint ID."""
-        endpoint_url = f"{self._base_url}/endpoints?filter[id]={resource_id}"
-        r = self._get_network_response(endpoint_url, self._identity_header, "Unable to source ID from endpoint ID")
-        if r.status_code == 404:
-            raise SourceNotFoundError(f"Status Code: {r.status_code}")
-        elif r.status_code != 200:
-            raise SourcesHTTPClientError("Status Code: ", r.status_code)
-        endpoint_response = r.json()
-
-        source_id = None
-        if endpoint_response.get("data"):
-            source_id = endpoint_response.get("data")[0].get("source_id")
-
-        return source_id
-
     def get_source_id_from_applications_id(self, resource_id):
         """Get Source ID from Sources Authentications ID."""
         authentication_url = f"{self._base_url}/applications?filter[id]={resource_id}"
@@ -178,37 +146,32 @@ def get_source_type_name(self, type_id):
 
     def get_aws_credentials(self):
         """Get the roleARN from Sources Authentication service."""
-        urls = [
-            "{}/applications?filter[source_id]={}".format(self._base_url, str(self._source_id)),
-            "{}/endpoints?filter[source_id]={}".format(self._base_url, str(self._source_id)),
-        ]
-
-        for url in urls:
-            r = self._get_network_response(url, self._identity_header, "Unable to AWS RoleARN")
-            endpoint_response = r.json()
-            if endpoint_response.get("data"):
-                resource_id = endpoint_response.get("data")[0].get("id")
-            else:
-                continue
-
-            authentications_str = "{}/authentications?[authtype]=arn&[resource_id]={}"
-            authentications_url = authentications_str.format(self._base_url, str(resource_id))
-            r = self._get_network_response(authentications_url, self._identity_header, "Unable to AWS RoleARN")
-            authentications_response = r.json()
-            if not authentications_response.get("data"):
-                continue
-            authentications_id = authentications_response.get("data")[0].get("id")
-
-            authentications_internal_url = "{}/authentications/{}?expose_encrypted_attribute[]=password".format(
-                self._internal_url, str(authentications_id)
-            )
-            r = self._get_network_response(
-                authentications_internal_url, self._identity_header, "Unable to AWS RoleARN"
-            )
-            authentications_internal_response = r.json()
-            password = authentications_internal_response.get("password")
-            if password:
-                return {"role_arn": password}
+        url = "{}/applications?filter[source_id]={}".format(self._base_url, str(self._source_id))
+
+        r = self._get_network_response(url, self._identity_header, "Unable to AWS RoleARN")
+        endpoint_response = r.json()
+        if endpoint_response.get("data"):
+            resource_id = endpoint_response.get("data")[0].get("id")
+        else:
+            raise SourcesHTTPClientError(f"Unable to get AWS roleARN for Source: {self._source_id}")
+
+        authentications_str = "{}/authentications?[authtype]=arn&[resource_id]={}"
+        authentications_url = authentications_str.format(self._base_url, str(resource_id))
+        r = self._get_network_response(authentications_url, self._identity_header, "Unable to AWS RoleARN")
+        authentications_response = r.json()
+        if not authentications_response.get("data"):
+            raise SourcesHTTPClientError(f"Unable to get AWS roleARN for Source: {self._source_id}")
+
+        authentications_id = authentications_response.get("data")[0].get("id")
+
+        authentications_internal_url = "{}/authentications/{}?expose_encrypted_attribute[]=password".format(
+            self._internal_url, str(authentications_id)
+        )
+        r = self._get_network_response(authentications_internal_url, self._identity_header, "Unable to AWS RoleARN")
+        authentications_internal_response = r.json()
+        password = authentications_internal_response.get("password")
+        if password:
+            return {"role_arn": password}
 
         raise SourcesHTTPClientError(f"Unable to get AWS roleARN for Source: {self._source_id}")
 
@@ -246,47 +209,41 @@ def get_gcp_credentials(self):
 
     def get_azure_credentials(self):
         """Get the Azure Credentials from Sources Authentication service."""
-        urls = [
-            "{}/applications?filter[source_id]={}".format(self._base_url, str(self._source_id)),
-            "{}/endpoints?filter[source_id]={}".format(self._base_url, str(self._source_id)),
-        ]
-
-        for url in urls:
-            r = self._get_network_response(url, self._identity_header, "Unable to get Azure credentials")
-            endpoint_response = r.json()
-            if endpoint_response.get("data"):
-                resource_id = endpoint_response.get("data")[0].get("id")
-            else:
-                continue
-
-            authentications_url = (
-                f"{self._base_url}/authentications?"
-                f"[authtype]=tenant_id_client_id_client_secret&[resource_id]={str(resource_id)}"
-            )
-            r = self._get_network_response(
-                authentications_url, self._identity_header, "Unable to get Azure credentials"
-            )
-            authentications_response = r.json()
-            if not authentications_response.get("data"):
-                continue
-            data_dict = authentications_response.get("data")[0]
-            authentications_id = data_dict.get("id")
-
-            authentications_internal_url = (
-                f"{self._internal_url}/authentications/{str(authentications_id)}?expose_encrypted_attribute[]=password"
-            )
-            r = self._get_network_response(
-                authentications_internal_url, self._identity_header, "Unable to get Azure credentials"
-            )
-            authentications_internal_response = r.json()
-            password = authentications_internal_response.get("password")
-
-            if password and data_dict:
-                return {
-                    "client_id": data_dict.get("username"),
-                    "client_secret": password,
-                    "tenant_id": data_dict.get("extra").get("azure").get("tenant_id"),
-                }
+        url = "{}/applications?filter[source_id]={}".format(self._base_url, str(self._source_id))
+
+        r = self._get_network_response(url, self._identity_header, "Unable to get Azure credentials")
+        endpoint_response = r.json()
+        if endpoint_response.get("data"):
+            resource_id = endpoint_response.get("data")[0].get("id")
+        else:
+            raise SourcesHTTPClientError(f"Unable to get Azure credentials for Source: {self._source_id}")
+
+        authentications_url = (
+            f"{self._base_url}/authentications?"
+            f"[authtype]=tenant_id_client_id_client_secret&[resource_id]={str(resource_id)}"
+        )
+        r = self._get_network_response(authentications_url, self._identity_header, "Unable to get Azure credentials")
+        authentications_response = r.json()
+        if not authentications_response.get("data"):
+            raise SourcesHTTPClientError(f"Unable to get Azure credentials for Source: {self._source_id}")
+        data_dict = authentications_response.get("data")[0]
+        authentications_id = data_dict.get("id")
+
+        authentications_internal_url = (
+            f"{self._internal_url}/authentications/{str(authentications_id)}?expose_encrypted_attribute[]=password"
+        )
+        r = self._get_network_response(
+            authentications_internal_url, self._identity_header, "Unable to get Azure credentials"
+        )
+        authentications_internal_response = r.json()
+        password = authentications_internal_response.get("password")
+
+        if password and data_dict:
+            return {
+                "client_id": data_dict.get("username"),
+                "client_secret": password,
+                "tenant_id": data_dict.get("extra").get("azure").get("tenant_id"),
+            }
 
         raise SourcesHTTPClientError(f"Unable to get Azure credentials for Source: {self._source_id}")
 
diff --git a/koku/sources/storage.py b/koku/sources/storage.py
index 22b070c31c..f91dada116 100644
--- a/koku/sources/storage.py
+++ b/koku/sources/storage.py
@@ -339,20 +339,6 @@ def get_source_type(source_id):
     return source_type
 
 
-def get_source_from_endpoint(endpoint_id):
-    """Get Source ID from Endpoint ID."""
-    source_id = None
-    try:
-        query = Sources.objects.get(endpoint_id=endpoint_id)
-        source_id = query.source_id
-    except Sources.DoesNotExist:
-        LOG.info(f"Endpoint ID {endpoint_id} not associated with Cost Management")
-    except (InterfaceError, OperationalError) as error:
-        LOG.error(f"source.storage.get_source_from_endpoint {type(error).__name__}: {error}")
-        raise error
-    return source_id
-
-
 def add_provider_sources_auth_info(source_id, authentication):
     """
     Add additional Sources information to a Source database object.
@@ -407,9 +393,6 @@ def add_provider_sources_network_info(details, source_id):
         if source.source_type != details.source_type:
             source.source_type = details.source_type
             save_needed = True
-        if str(source.endpoint_id) != details.endpoint_id:
-            source.endpoint_id = details.endpoint_id
-            save_needed = True
         if save_needed:
             source.save()
 
diff --git a/koku/sources/test/test_kafka_listener.py b/koku/sources/test/test_kafka_listener.py
index 1592d4c3b5..927f218c5d 100644
--- a/koku/sources/test/test_kafka_listener.py
+++ b/koku/sources/test/test_kafka_listener.py
@@ -573,7 +573,7 @@ def test_sources_network_info_sync_aws(self):
                 json={"data": []},
             )
             m.get(
-                f"http://www.sources.com/api/v1.0/endpoints?filter[source_id]={test_source_id}",
+                f"http://www.sources.com/api/v1.0/applications?filter[source_id]={test_source_id}",
                 status_code=200,
                 json={"data": [{"id": resource_id}]},
             )
@@ -688,11 +688,6 @@ def test_sources_network_info_sync_aws_local(self):
             m.get(
                 f"http://www.sources.com/api/v1.0/applications?filter[source_id]={test_source_id}",
                 status_code=200,
-                json={"data": []},
-            )
-            m.get(
-                f"http://www.sources.com/api/v1.0/endpoints?filter[source_id]={test_source_id}",
-                status_code=200,
                 json={"data": [{"id": resource_id}]},
             )
             m.get(
@@ -803,11 +798,6 @@ def test_sources_network_info_sync_azure(self):
             m.get(
                 f"http://www.sources.com/api/v1.0/applications?filter[source_id]={test_source_id}",
                 status_code=200,
-                json={"data": []},
-            )
-            m.get(
-                f"http://www.sources.com/api/v1.0/endpoints?filter[source_id]={test_source_id}",
-                status_code=200,
                 json={"data": [{"id": resource_id}]},
             )
             m.get(
@@ -875,11 +865,6 @@ def test_sources_network_info_sync_azure_local(self):
             m.get(
                 f"http://www.sources.com/api/v1.0/applications?filter[source_id]={test_source_id}",
                 status_code=200,
-                json={"data": []},
-            )
-            m.get(
-                f"http://www.sources.com/api/v1.0/endpoints?filter[source_id]={test_source_id}",
-                status_code=200,
                 json={"data": [{"id": resource_id}]},
             )
             m.get(
@@ -947,7 +932,7 @@ def test_sources_network_info_no_endpoint(self):
                 json={"data": [{"name": mock_source_name}]},
             )
             m.get(
-                f"http://www.sources.com/api/v1.0/endpoints?filter[source_id]={test_source_id}",
+                f"http://www.sources.com/api/v1.0/applications?filter[source_id]={test_source_id}",
                 status_code=200,
                 json={"data": []},
             )
@@ -1026,8 +1011,7 @@ def test_process_message_application_unsupported_source_type(self):
             SourcesHTTPClient, "get_source_details", return_value={"name": "my ansible", "source_type_id": 2}
         ):
             with patch.object(SourcesHTTPClient, "get_source_type_name", return_value="ansible-tower"):
-                with patch.object(SourcesHTTPClient, "get_endpoint_id", return_value=1):
-                    self.assertIsNone(process_message(test_application_id, msg_data))
+                self.assertIsNone(process_message(test_application_id, msg_data))
 
     @patch.object(Config, "SOURCES_API_URL", "http://www.sources.com")
     @patch("sources.kafka_listener.sources_network_info", returns=None)
@@ -1053,7 +1037,7 @@ def _expected_authentication_create(msg_data, test, save_auth_info_mock):
                 "value": {
                     "id": 1,
                     "source_id": 1,
-                    "resource_type": "Endpoint",
+                    "resource_type": "Application",
                     "resource_id": "1",
                     "application_type_id": test_application_id,
                 },
@@ -1065,7 +1049,7 @@ def _expected_authentication_create(msg_data, test, save_auth_info_mock):
                 "value": {
                     "id": 1,
                     "source_id": 1,
-                    "resource_type": "Endpoint",
+                    "resource_type": "Application",
                     "resource_id": "1",
                     "application_type_id": test_application_id,
                 },
@@ -1089,7 +1073,7 @@ def _expected_authentication_create(msg_data, test, save_auth_info_mock):
                 "value": {
                     "id": 1,
                     "source_id": 1,
-                    "resource_type": "Endpoint",
+                    "resource_type": "Application",
                     "resource_id": "1",
                     "application_type_id": test_application_id,
                 },
@@ -1101,24 +1085,19 @@ def _expected_authentication_create(msg_data, test, save_auth_info_mock):
         for test in test_matrix:
             msg_data = MsgDataGenerator(event_type=test.get("event"), value=test.get("value")).get_data()
             with patch.object(
-                SourcesHTTPClient, "get_source_id_from_endpoint_id", return_value=test.get("value").get("source_id")
+                SourcesHTTPClient,
+                "get_application_type_is_cost_management",
+                return_value=test.get("expected_cost_mgmt_match"),
             ):
                 with patch.object(
                     SourcesHTTPClient,
-                    "get_application_type_is_cost_management",
+                    "get_source_id_from_applications_id",
                     return_value=test.get("expected_cost_mgmt_match"),
                 ):
-                    with patch.object(
-                        SourcesHTTPClient,
-                        "get_source_id_from_applications_id",
-                        return_value=test.get("expected_cost_mgmt_match"),
-                    ):
-                        with patch.object(
-                            SourcesHTTPClient, "get_source_details", return_value={"source_type_id": "1"}
-                        ):
-                            with patch.object(SourcesHTTPClient, "get_source_type_name", return_value="amazon"):
-                                process_message(test_application_id, msg_data)
-                                test.get("expected_fn")(msg_data, test, mock_save_auth_info)
+                    with patch.object(SourcesHTTPClient, "get_source_details", return_value={"source_type_id": "1"}):
+                        with patch.object(SourcesHTTPClient, "get_source_type_name", return_value="amazon"):
+                            process_message(test_application_id, msg_data)
+                            test.get("expected_fn")(msg_data, test, mock_save_auth_info)
 
     @patch.object(Config, "SOURCES_API_URL", "http://www.sources.com")
     @patch("sources.kafka_listener.sources_network_info", returns=None)
@@ -1207,7 +1186,7 @@ def _expected_update(test):
                 "value": {
                     "id": 1,
                     "source_id": 1,
-                    "resource_type": "Endpoint",
+                    "resource_type": "Application",
                     "resource_id": "1",
                     "application_type_id": test_application_id,
                 },
@@ -1231,15 +1210,13 @@ def _expected_update(test):
             test_source.save()
             msg_data = MsgDataGenerator(event_type=test.get("event"), value=test.get("value")).get_data()
             with patch.object(
-                SourcesHTTPClient, "get_source_id_from_endpoint_id", return_value=test.get("value").get("source_id")
+                SourcesHTTPClient,
+                "get_application_type_is_cost_management",
+                return_value=test.get("expected_cost_mgmt_match"),
             ):
-                with patch.object(
-                    SourcesHTTPClient,
-                    "get_application_type_is_cost_management",
-                    return_value=test.get("expected_cost_mgmt_match"),
-                ):
-                    with patch.object(SourcesHTTPClient, "get_source_details", return_value={"source_type_id": "1"}):
-                        with patch.object(SourcesHTTPClient, "get_source_type_name", return_value="amazon"):
+                with patch.object(SourcesHTTPClient, "get_source_details", return_value={"source_type_id": "1"}):
+                    with patch.object(SourcesHTTPClient, "get_source_type_name", return_value="amazon"):
+                        with patch.object(SourcesHTTPClient, "get_source_id_from_applications_id", return_value=1):
                             process_message(test_application_id, msg_data)
                             test.get("expected_fn")(test)
                             Sources.objects.all().delete()
diff --git a/koku/sources/test/test_sources_http_client.py b/koku/sources/test/test_sources_http_client.py
index cb2d128dbb..12e1a2f7f5 100644
--- a/koku/sources/test/test_sources_http_client.py
+++ b/koku/sources/test/test_sources_http_client.py
@@ -178,11 +178,6 @@ def test_get_aws_credentials(self):
             m.get(
                 f"http://www.sources.com/api/v1.0/applications?filter[source_id]={self.source_id}",
                 status_code=200,
-                json={"data": []},
-            )
-            m.get(
-                f"http://www.sources.com/api/v1.0/endpoints?filter[source_id]={self.source_id}",
-                status_code=200,
                 json={"data": [{"id": resource_id}]},
             )
             m.get(
@@ -239,11 +234,6 @@ def test_get_aws_credentials_no_auth(self):
             m.get(
                 f"http://www.sources.com/api/v1.0/applications?filter[source_id]={self.source_id}",
                 status_code=200,
-                json={"data": []},
-            )
-            m.get(
-                f"http://www.sources.com/api/v1.0/endpoints?filter[source_id]={self.source_id}",
-                status_code=200,
                 json={"data": [{"id": resource_id}]},
             )
             m.get(
@@ -305,11 +295,6 @@ def test_get_gcp_credentials_no_auth(self):
                 status_code=200,
                 json={"data": []},
             )
-            m.get(
-                f"http://www.sources.com/api/v1.0/endpoints?filter[source_id]={self.source_id}",
-                status_code=200,
-                json={"data": [{"id": resource_id}]},
-            )
             m.get(
                 (f"http://www.sources.com/api/v1.0/authentications?" f"[authtype]=arn&[resource_id]={resource_id}"),
                 status_code=200,
@@ -338,11 +323,6 @@ def test_get_gcp_credentials_no_password(self):
                 status_code=200,
                 json={"data": [{"id": resource_id}]},
             )
-            m.get(
-                f"http://www.sources.com/api/v1.0/endpoints?filter[source_id]={self.source_id}",
-                status_code=200,
-                json={"data": [{"id": resource_id}]},
-            )
             m.get(
                 (
                     f"http://www.sources.com/api/v1.0/authentications?"
@@ -373,11 +353,6 @@ def test_get_aws_credentials_no_endpoint(self):
                 status_code=200,
                 json={"data": []},
             )
-            m.get(
-                f"http://www.sources.com/api/v1.0/endpoints?filter[source_id]={self.source_id}",
-                status_code=200,
-                json={"data": []},
-            )
             with self.assertRaises(SourcesHTTPClientError):
                 client.get_aws_credentials()
 
@@ -389,11 +364,7 @@ def test_get_aws_credentials_connection_error(self):
         with requests_mock.mock() as m:
             m.get(
                 f"http://www.sources.com/api/v1.0/applications?filter[source_id]={self.source_id}",
-                status_code=200,
-                json={"data": []},
-            )
-            m.get(
-                f"http://www.sources.com/api/v1.0/endpoints?filter[source_id]={self.source_id}", exc=RequestException
+                exc=RequestException,
             )
             with self.assertRaises(SourcesHTTPClientError):
                 client.get_aws_credentials()
@@ -418,11 +389,6 @@ def test_get_azure_credentials(self):
             m.get(
                 f"http://www.sources.com/api/v1.0/applications?filter[source_id]={self.source_id}",
                 status_code=200,
-                json={"data": []},
-            )
-            m.get(
-                f"http://www.sources.com/api/v1.0/endpoints?filter[source_id]={self.source_id}",
-                status_code=200,
                 json={"data": [{"id": resource_id}]},
             )
             m.get(
@@ -504,11 +470,6 @@ def test_get_azure_credentials_no_auth(self):
             m.get(
                 f"http://www.sources.com/api/v1.0/applications?filter[source_id]={self.source_id}",
                 status_code=200,
-                json={"data": []},
-            )
-            m.get(
-                f"http://www.sources.com/api/v1.0/endpoints?filter[source_id]={self.source_id}",
-                status_code=200,
                 json={"data": [{"id": resource_id}]},
             )
             m.get(
@@ -537,11 +498,7 @@ def test_get_azure_credentials_connection_error(self):
         with requests_mock.mock() as m:
             m.get(
                 f"http://www.sources.com/api/v1.0/applications?filter[source_id]={self.source_id}",
-                status_code=200,
-                json={"data": []},
-            )
-            m.get(
-                f"http://www.sources.com/api/v1.0/endpoints?filter[source_id]={self.source_id}", exc=RequestException
+                exc=RequestException,
             )
             with self.assertRaises(SourcesHTTPClientError):
                 client.get_azure_credentials()
@@ -556,129 +513,9 @@ def test_get_azure_credentials_no_endpoint(self):
                 status_code=200,
                 json={"data": []},
             )
-            m.get(
-                f"http://www.sources.com/api/v1.0/endpoints?filter[source_id]={self.source_id}",
-                status_code=200,
-                json={"data": []},
-            )
             with self.assertRaises(SourcesHTTPClientError):
                 client.get_azure_credentials()
 
-    @patch.object(Config, "SOURCES_API_URL", "http://www.sources.com")
-    def test_get_endpoint_id(self):
-        """Test to get endpoint_id from Source_id."""
-        resource_id = 2
-        source_id = 3
-
-        client = SourcesHTTPClient(auth_header=Config.SOURCES_FAKE_HEADER, source_id=source_id)
-        with requests_mock.mock() as m:
-            m.get(
-                f"http://www.sources.com/api/v1.0/endpoints?filter[source_id]={source_id}",
-                status_code=200,
-                json={"data": [{"id": resource_id}]},
-            )
-            response = client.get_endpoint_id()
-            self.assertEqual(response, resource_id)
-
-    @patch.object(Config, "SOURCES_API_URL", "http://www.sources.com")
-    def test_get_endpoint_id_no_data(self):
-        """Test to get endpoint_id from Source_id with no data in response."""
-        source_id = 3
-
-        client = SourcesHTTPClient(auth_header=Config.SOURCES_FAKE_HEADER, source_id=source_id)
-        with requests_mock.mock() as m:
-            m.get(
-                f"http://www.sources.com/api/v1.0/endpoints?filter[source_id]={source_id}",
-                status_code=200,
-                json={"data": []},
-            )
-            self.assertIsNone(client.get_endpoint_id())
-
-    @patch.object(Config, "SOURCES_API_URL", "http://www.sources.com")
-    def test_get_endpoint_id_misconfigured(self):
-        """Test to get endpoint_id from Source_id with route not found."""
-        resource_id = 2
-        source_id = 3
-
-        client = SourcesHTTPClient(auth_header=Config.SOURCES_FAKE_HEADER, source_id=source_id)
-        with requests_mock.mock() as m:
-            m.get(
-                f"http://www.sources.com/api/v1.0/endpoints?filter[source_id]={source_id}",
-                status_code=404,
-                json={"data": [{"id": resource_id}]},
-            )
-            with self.assertRaises(SourceNotFoundError):
-                client.get_endpoint_id()
-
-    @patch.object(Config, "SOURCES_API_URL", "http://www.sources.com")
-    def test_get_endpoint_ids_connection_error(self):
-        """Test to get endpoint id with connection error."""
-        client = SourcesHTTPClient(auth_header=Config.SOURCES_FAKE_HEADER, source_id=self.source_id)
-        with requests_mock.mock() as m:
-            m.get(
-                f"http://www.sources.com/api/v1.0/endpoints?filter[source_id]={self.source_id}", exc=RequestException
-            )
-            with self.assertRaises(SourcesHTTPClientError):
-                client.get_endpoint_id()
-
-    @patch.object(Config, "SOURCES_API_URL", "http://www.sources.com")
-    def test_get_source_id_from_endpoint_id(self):
-        """Test to get source_id from resource_id."""
-        resource_id = 2
-        source_id = 3
-
-        client = SourcesHTTPClient(auth_header=Config.SOURCES_FAKE_HEADER, source_id=source_id)
-        with requests_mock.mock() as m:
-            m.get(
-                f"http://www.sources.com/api/v1.0/endpoints?filter[id]={resource_id}",
-                status_code=200,
-                json={"data": [{"source_id": source_id}]},
-            )
-            response = client.get_source_id_from_endpoint_id(resource_id)
-            self.assertEqual(response, source_id)
-
-    @patch.object(Config, "SOURCES_API_URL", "http://www.sources.com")
-    def test_get_source_id_from_endpoint_id_no_data(self):
-        """Test to get source_id from resource_id with no data in response."""
-        resource_id = 2
-        source_id = 3
-
-        client = SourcesHTTPClient(auth_header=Config.SOURCES_FAKE_HEADER, source_id=source_id)
-        with requests_mock.mock() as m:
-            m.get(
-                f"http://www.sources.com/api/v1.0/endpoints?filter[id]={resource_id}",
-                status_code=200,
-                json={"data": []},
-            )
-            self.assertIsNone(client.get_source_id_from_endpoint_id(resource_id))
-
-    @patch.object(Config, "SOURCES_API_URL", "http://www.sources.com")
-    def test_get_source_id_from_endpoint_id_misconfigured(self):
-        """Test to get source_id from resource_id with route not found."""
-        resource_id = 2
-        source_id = 3
-
-        client = SourcesHTTPClient(auth_header=Config.SOURCES_FAKE_HEADER, source_id=source_id)
-        with requests_mock.mock() as m:
-            m.get(
-                f"http://www.sources.com/api/v1.0/endpoints?filter[id]={resource_id}",
-                status_code=404,
-                json={"data": [{"id": resource_id}]},
-            )
-            with self.assertRaises(SourceNotFoundError):
-                client.get_source_id_from_endpoint_id(resource_id)
-
-    @patch.object(Config, "SOURCES_API_URL", "http://www.sources.com")
-    def test_get_source_id_from_endpoint_id_connection_error(self):
-        """Test to get source ID from endpoint ID with connection error."""
-        resource_id = 2
-
-        client = SourcesHTTPClient(auth_header=Config.SOURCES_FAKE_HEADER, source_id=self.source_id)
-        with requests_mock.mock() as m:
-            m.get(f"http://www.sources.com/api/v1.0/endpoints?filter[id]={resource_id}", exc=RequestException)
-            with self.assertRaises(SourcesHTTPClientError):
-                client.get_source_id_from_endpoint_id(resource_id)
-
     @patch.object(Config, "SOURCES_API_URL", "http://www.sources.com")
     def test_get_source_id_from_applications_id(self):
         """Test to get source_id from application resource_id."""
diff --git a/koku/sources/test/test_storage.py b/koku/sources/test/test_storage.py
index 6768d338f9..df85abea8d 100644
--- a/koku/sources/test/test_storage.py
+++ b/koku/sources/test/test_storage.py
@@ -184,7 +184,6 @@ def test_add_provider_network_info(self):
         test_source = Sources.objects.get(source_id=self.test_source_id)
         self.assertEqual(test_source.name, test_name)
         self.assertEqual(test_source.source_type, source_type)
-        self.assertEqual(test_source.endpoint_id, endpoint_id)
         self.assertEqual(str(test_source.source_uuid), source_uuid)
 
     def test_add_provider_network_info_not_found(self):
@@ -346,40 +345,14 @@ def test_get_source_type(self):
         self.assertEquals(response, Provider.PROVIDER_OCP)
         self.assertEquals(storage.get_source_type(test_source_id + 1), None)
 
-    def test_get_source_from_endpoint(self):
-        """Test to source from endpoint id."""
-        test_source_id = 3
-        test_endpoint_id = 4
-        aws_obj = Sources(
-            source_id=test_source_id,
-            auth_header=self.test_header,
-            offset=3,
-            endpoint_id=test_endpoint_id,
-            source_type=Provider.PROVIDER_AWS,
-            name="Test AWS Source",
-            authentication={"role_arn": "arn:test"},
-            billing_source={"bucket": "test-bucket"},
-        )
-        aws_obj.save()
-
-        response = storage.get_source_from_endpoint(test_endpoint_id)
-        self.assertEquals(response, test_source_id)
-        self.assertEquals(storage.get_source_from_endpoint(test_source_id + 10), None)
-        with patch("sources.storage.Sources.objects") as mock_objects:
-            mock_objects.get.side_effect = InterfaceError("Test exception")
-            with self.assertRaises(InterfaceError):
-                storage.get_source_from_endpoint(test_endpoint_id)
-
     def test_add_provider_sources_auth_info(self):
         """Test to add authentication to a source."""
         test_source_id = 3
-        test_endpoint_id = 4
         test_authentication = {"role_arn": "arn:test"}
         aws_obj = Sources(
             source_id=test_source_id,
             auth_header=self.test_header,
             offset=3,
-            endpoint_id=test_endpoint_id,
             source_type=Provider.PROVIDER_AWS,
             name="Test AWS Source",
             billing_source={"bucket": "test-bucket"},
@@ -393,13 +366,11 @@ def test_add_provider_sources_auth_info(self):
     def test_add_provider_sources_auth_info_with_sub_id(self):
         """Test to add authentication to a source with subscription_id."""
         test_source_id = 3
-        test_endpoint_id = 4
         test_authentication = {"credentials": {"client_id": "new-client-id"}}
         azure_obj = Sources(
             source_id=test_source_id,
             auth_header=self.test_header,
             offset=3,
-            endpoint_id=test_endpoint_id,
             source_type=Provider.PROVIDER_AZURE,
             name="Test AZURE Source",
             authentication={"credentials": {"subscription_id": "orig-sub-id", "client_id": "test-client-id"}},
@@ -419,7 +390,6 @@ def test_enqueue_source_delete(self):
             source_id=test_source_id,
             auth_header=self.test_header,
             offset=test_offset,
-            endpoint_id=4,
             source_type=Provider.PROVIDER_AWS,
             name="Test AWS Source",
             billing_source={"bucket": "test-bucket"},
@@ -449,7 +419,6 @@ def test_enqueue_source_delete_in_pending(self):
             source_id=test_source_id,
             auth_header=self.test_header,
             offset=test_offset,
-            endpoint_id=4,
             source_type=Provider.PROVIDER_AWS,
             name="Test AWS Source",
             billing_source={"bucket": "test-bucket"},
@@ -511,7 +480,6 @@ def test_enqueue_source_update(self):
                 pending_delete=test.get("pending_delete"),
                 pending_update=test.get("pending_update"),
                 offset=3,
-                endpoint_id=4,
                 source_type=Provider.PROVIDER_AWS,
                 name="Test AWS Source",
                 billing_source={"bucket": "test-bucket"},
@@ -544,7 +512,6 @@ def test_clear_update_flag(self):
                 koku_uuid=test.get("koku_uuid"),
                 pending_update=test.get("pending_update"),
                 offset=3,
-                endpoint_id=4,
                 source_type=Provider.PROVIDER_AWS,
                 name="Test AWS Source",
                 billing_source={"bucket": "test-bucket"},
@@ -580,7 +547,6 @@ def test_load_providers_to_update(self):
                 pending_update=test.get("pending_update"),
                 pending_delete=test.get("pending_delete"),
                 offset=3,
-                endpoint_id=4,
                 source_type=Provider.PROVIDER_AWS,
                 name="Test AWS Source",
                 billing_source={"bucket": "test-bucket"},

From 8a192b54548fb2735b07c243a232067fd3fd497e Mon Sep 17 00:00:00 2001
From: Brett Lentz <blentz@users.noreply.github.com>
Date: Wed, 20 Jan 2021 09:17:08 -0500
Subject: [PATCH 10/17] COST-853 - migrate forecasting to rolling 30-day
 window; enforce minimum number of data points (#2599)

---
 koku/api/utils.py                    |   6 +-
 koku/forecast/forecast.py            |  23 ++--
 koku/forecast/test/tests_forecast.py | 169 ++++++++++++++-------------
 3 files changed, 102 insertions(+), 96 deletions(-)

diff --git a/koku/api/utils.py b/koku/api/utils.py
index 374bea35cd..bd2816a823 100644
--- a/koku/api/utils.py
+++ b/koku/api/utils.py
@@ -208,7 +208,11 @@ def list_days(self, start_date, end_date):
         end_midnight = end_date.replace(hour=0, minute=0, second=0, microsecond=0)
         start_midnight = start_date.replace(hour=0, minute=0, second=0, microsecond=0)
         days = (end_midnight - start_midnight + self.one_day).days
-        return [start_midnight + datetime.timedelta(i) for i in range(days)]
+
+        # built-in range(start, end, step) requires (start < end) == True
+        day_range = range(days, 0) if days < 0 else range(0, days)
+        output = [start_midnight + datetime.timedelta(i) for i in day_range]
+        return output
 
     def list_months(self, start_date, end_date):
         """Return a list of months from the start date til the end date.
diff --git a/koku/forecast/forecast.py b/koku/forecast/forecast.py
index 7f18cc88d7..7ab527428d 100644
--- a/koku/forecast/forecast.py
+++ b/koku/forecast/forecast.py
@@ -89,16 +89,10 @@ def __init__(self, query_params):  # noqa: C901
                 # We have access constraints, but no view to accomodate, default to daily summary table
                 self.cost_summary_table = self.provider_map.report_type_map.get("tables", {}).get("query")
 
-        # FIXME: replace with rolling 30-day window
-        if self.dh.today.day == 1:
-            self.forecast_days_required = self.dh.this_month_end.day
-        else:
-            self.forecast_days_required = self.dh.this_month_end.day - self.dh.yesterday.day
+        self.forecast_days_required = (self.dh.this_month_end - self.dh.yesterday).days
 
-        if self.dh.today.day <= self.MINIMUM:
-            self.query_range = (self.dh.last_month_start, self.dh.yesterday)
-        else:
-            self.query_range = (self.dh.this_month_start, self.dh.yesterday)
+        # forecasts use a rolling window
+        self.query_range = (self.dh.n_days_ago(self.dh.yesterday, 30), self.dh.yesterday)
 
         self.filters = QueryFilterCollection()
         self.filters.add(field="usage_start", operation="gte", parameter=self.query_range[0])
@@ -168,12 +162,13 @@ def _predict(self, data):
         """
         LOG.debug("Forecast input data: %s", data)
 
-        if len(data) < 3:
-            LOG.warning("Unable to calculate forecast. Insufficient data for %s.", self.params.tenant)
-            return []
-
         if len(data) < self.MINIMUM:
-            LOG.warning("Number of data elements is fewer than the minimum.")
+            LOG.warning(
+                "Number of data elements (%s) is fewer than the minimum (%s). Unable to generate forecast.",
+                len(data),
+                self.MINIMUM,
+            )
+            return []
 
         dates, costs = zip(*data)
 
diff --git a/koku/forecast/test/tests_forecast.py b/koku/forecast/test/tests_forecast.py
index d2188067ed..d02572cba7 100644
--- a/koku/forecast/test/tests_forecast.py
+++ b/koku/forecast/test/tests_forecast.py
@@ -87,67 +87,87 @@ def test_forecast_days_required(self):
         """Test that we accurately select the number of days."""
         params = self.mocked_query_params("?", AWSCostForecastView)
 
+        dh = DateHelper()
         mock_dh = Mock(spec=DateHelper)
 
-        mock_dh.return_value.today = datetime(2000, 1, 1, 0, 0, 0, 0)
-        mock_dh.return_value.yesterday = datetime(1999, 12, 31, 0, 0, 0, 0)
-        mock_dh.return_value.this_month_start = datetime(2000, 1, 1, 0, 0, 0, 0)
-        mock_dh.return_value.this_month_end = datetime(2000, 1, 31, 0, 0, 0, 0)
-        mock_dh.return_value.last_month_start = datetime(1999, 12, 1, 0, 0, 0, 0)
-        mock_dh.return_value.last_month_end = datetime(1999, 12, 31, 0, 0, 0, 0)
-
-        with patch("forecast.forecast.DateHelper", new_callable=lambda: mock_dh) as mock_dh:
-            forecast = AWSForecast(params)
-            self.assertEqual(forecast.forecast_days_required, 31)
-
-        mock_dh.return_value.today = datetime(2000, 1, 13, 0, 0, 0, 0)
-        mock_dh.return_value.yesterday = datetime(2000, 1, 12, 0, 0, 0, 0)
-        mock_dh.return_value.this_month_start = datetime(2000, 1, 1, 0, 0, 0, 0)
-        mock_dh.return_value.this_month_end = datetime(2000, 1, 31, 0, 0, 0, 0)
-        mock_dh.return_value.last_month_start = datetime(1999, 12, 1, 0, 0, 0, 0)
-        mock_dh.return_value.last_month_end = datetime(1999, 12, 31, 0, 0, 0, 0)
-
-        with patch("forecast.forecast.DateHelper", new_callable=lambda: mock_dh) as mock_dh:
-            forecast = AWSForecast(params)
-            self.assertEqual(forecast.forecast_days_required, 19)
-
-    def test_query_range_under(self):
-        """Test that we select the correct range based on day of month."""
-        params = self.mocked_query_params("?", AWSCostForecastView)
+        scenarios = [
+            {
+                "today": dh.today,
+                "yesterday": dh.yesterday,
+                "this_month_end": dh.this_month_end,
+                "expected": (dh.this_month_end - dh.yesterday).days,
+            },
+            {
+                "today": datetime(2000, 1, 1, 0, 0, 0, 0),
+                "yesterday": datetime(1999, 12, 31, 0, 0, 0, 0),
+                "this_month_end": datetime(2000, 1, 31, 0, 0, 0, 0),
+                "expected": 31,
+            },
+            {
+                "today": datetime(2000, 1, 31, 0, 0, 0, 0),
+                "yesterday": datetime(2000, 1, 30, 0, 0, 0, 0),
+                "this_month_end": datetime(2000, 1, 31, 0, 0, 0, 0),
+                "expected": 1,
+            },
+        ]
 
-        dh = DateHelper()
-        mock_dh = Mock(spec=DateHelper)
+        mock_dh.return_value.n_days_ago = dh.n_days_ago  # pass-thru to real function
 
-        mock_dh.return_value.today = dh.this_month_start + timedelta(days=AWSForecast.MINIMUM - 1)
-        mock_dh.return_value.yesterday = dh.this_month_start + timedelta(days=AWSForecast.MINIMUM - 2)
-        mock_dh.return_value.this_month_start = dh.this_month_start
-        mock_dh.return_value.this_month_end = dh.this_month_end
-        mock_dh.return_value.last_month_start = dh.last_month_start
-        mock_dh.return_value.last_month_end = dh.last_month_end
+        for test in scenarios:
+            with self.subTest(scenario=test):
+                mock_dh.return_value.today = test["today"]
+                mock_dh.return_value.yesterday = test["yesterday"]
+                mock_dh.return_value.this_month_end = test["this_month_end"]
 
-        with patch("forecast.forecast.DateHelper", new_callable=lambda: mock_dh) as mock_dh:
-            expected = (dh.last_month_start, dh.this_month_start + timedelta(days=AWSForecast.MINIMUM - 2))
-            forecast = AWSForecast(params)
-            self.assertEqual(forecast.query_range, expected)
+                with patch("forecast.forecast.DateHelper", new_callable=lambda: mock_dh) as mock_dh:
+                    forecast = AWSForecast(params)
+                    self.assertEqual(forecast.forecast_days_required, test["expected"])
 
-    def test_query_range_over(self):
+    def test_query_range(self):
         """Test that we select the correct range based on day of month."""
         params = self.mocked_query_params("?", AWSCostForecastView)
 
         dh = DateHelper()
         mock_dh = Mock(spec=DateHelper)
 
-        mock_dh.return_value.today = dh.this_month_start + timedelta(days=(AWSForecast.MINIMUM + 1))
-        mock_dh.return_value.yesterday = dh.this_month_start + timedelta(days=AWSForecast.MINIMUM)
-        mock_dh.return_value.this_month_start = dh.this_month_start
-        mock_dh.return_value.this_month_end = dh.this_month_end
-        mock_dh.return_value.last_month_start = dh.last_month_start
-        mock_dh.return_value.last_month_end = dh.last_month_end
+        scenarios = [
+            {
+                "today": dh.today,
+                "yesterday": dh.yesterday,
+                "this_month_end": dh.this_month_end,
+                "expected": (dh.yesterday + timedelta(days=-30), dh.yesterday),
+            },
+            {
+                "today": datetime(2000, 1, 1, 0, 0, 0, 0),
+                "yesterday": datetime(1999, 12, 31, 0, 0, 0, 0),
+                "this_month_end": datetime(2000, 1, 31, 0, 0, 0, 0),
+                "expected": (
+                    datetime(1999, 12, 31, 0, 0, 0, 0) + timedelta(days=-30),
+                    datetime(1999, 12, 31, 0, 0, 0, 0),
+                ),
+            },
+            {
+                "today": datetime(2000, 1, 31, 0, 0, 0, 0),
+                "yesterday": datetime(2000, 1, 30, 0, 0, 0, 0),
+                "this_month_end": datetime(2000, 1, 31, 0, 0, 0, 0),
+                "expected": (
+                    datetime(2000, 1, 30, 0, 0, 0, 0) + timedelta(days=-30),
+                    datetime(2000, 1, 30, 0, 0, 0, 0),
+                ),
+            },
+        ]
+
+        mock_dh.return_value.n_days_ago = dh.n_days_ago  # pass-thru to real function
+
+        for test in scenarios:
+            with self.subTest(scenario=test):
+                mock_dh.return_value.today = test["today"]
+                mock_dh.return_value.yesterday = test["yesterday"]
+                mock_dh.return_value.this_month_end = test["this_month_end"]
 
-        with patch("forecast.forecast.DateHelper", new_callable=lambda: mock_dh) as mock_dh:
-            expected = (dh.this_month_start, dh.this_month_start + timedelta(days=AWSForecast.MINIMUM))
-            forecast = AWSForecast(params)
-            self.assertEqual(forecast.query_range, expected)
+                with patch("forecast.forecast.DateHelper", new_callable=lambda: mock_dh) as mock_dh:
+                    forecast = AWSForecast(params)
+                    self.assertEqual(forecast.query_range, test["expected"])
 
     def test_remove_outliers(self):
         """Test that we remove outliers before predicting."""
@@ -293,7 +313,7 @@ def test_predict_few_values(self):
         """Test that predict() behaves well with a limited data set."""
         dh = DateHelper()
 
-        num_elements = [1, 2, 3, 4, 5]
+        num_elements = [AWSForecast.MINIMUM - 1, AWSForecast.MINIMUM, AWSForecast.MINIMUM + 1]
 
         for number in num_elements:
             with self.subTest(num_elements=number):
@@ -304,10 +324,9 @@ def test_predict_few_values(self):
                     expected.append(
                         {
                             "usage_start": dh.n_days_ago(dh.today, 10 - n).date(),
-                            # "usage_start": dh.today.replace(day=n).date(),
-                            "total_cost": 5 + random.random(),
-                            "infrastructure_cost": 3 + random.random(),
-                            "supplementary_cost": 2 + random.random(),
+                            "total_cost": 5 + (0.01 * n),
+                            "infrastructure_cost": 3 + (0.01 * n),
+                            "supplementary_cost": 2 + (0.01 * n),
                         }
                     )
                 mock_qset = MockQuerySet(expected)
@@ -322,41 +341,29 @@ def test_predict_few_values(self):
                 instance = AWSForecast(params)
 
                 instance.cost_summary_table = mocked_table
-                if number < 3:
-                    # forecasting isn't possible with less than 3 data points.
+                if number < AWSForecast.MINIMUM:
+                    # forecasting isn't useful with less than the minimum number of data points.
                     with self.assertLogs(logger="forecast.forecast", level=logging.WARNING):
                         results = instance.predict()
                         self.assertEqual(results, [])
                 else:
-                    with self.assertLogs(logger="forecast.forecast", level=logging.WARNING):
-                        results = instance.predict()
+                    results = instance.predict()
 
-                        self.assertNotEqual(results, [])
+                    self.assertNotEqual(results, [])
 
-                        for result in results:
-                            for val in result.get("values", []):
-                                self.assertIsInstance(val.get("date"), date)
+                    for result in results:
+                        for val in result.get("values", []):
+                            self.assertIsInstance(val.get("date"), date)
 
-                                item = val.get("cost")
-                                self.assertGreaterEqual(float(item.get("total").get("value")), 0)
-                                self.assertGreaterEqual(float(item.get("confidence_max").get("value")), 0)
-                                self.assertGreaterEqual(float(item.get("confidence_min").get("value")), 0)
-                                self.assertGreaterEqual(float(item.get("rsquared").get("value")), 0)
-                                for pval in item.get("pvalues").get("value"):
-                                    self.assertGreaterEqual(float(pval), 0)
-                        # test that the results always stop at the end of the month.
-                        self.assertEqual(results[-1].get("date"), dh.this_month_end.date())
-
-    def test_results_never_outside_current_month(self):
-        """Test that our results stop at the end of the current month."""
-        dh = DateHelper()
-        params = self.mocked_query_params("?", AWSCostForecastView)
-        forecast = AWSForecast(params)
-        forecast.forecast_days_required = 100
-        results = forecast.predict()
-        dates = [result.get("date") for result in results]
-        self.assertNotIn(dh.next_month_start, dates)
-        self.assertEqual(dh.this_month_end.date(), max(dates))
+                            item = val.get("cost")
+                            self.assertGreaterEqual(float(item.get("total").get("value")), 0)
+                            self.assertGreaterEqual(float(item.get("confidence_max").get("value")), 0)
+                            self.assertGreaterEqual(float(item.get("confidence_min").get("value")), 0)
+                            self.assertGreaterEqual(float(item.get("rsquared").get("value")), 0)
+                            for pval in item.get("pvalues").get("value"):
+                                self.assertGreaterEqual(float(pval), 0)
+                    # test that the results always stop at the end of the month.
+                    self.assertEqual(results[-1].get("date"), dh.this_month_end.date())
 
     def test_set_access_filter_with_list(self):
         """

From a63ba21afbc834d44cb1f4e668e5f7299fb5017e Mon Sep 17 00:00:00 2001
From: Douglas Curtis <docurtis@redhat.com>
Date: Wed, 20 Jan 2021 13:58:08 -0500
Subject: [PATCH 11/17] updating cache key (#2602)

---
 .github/workflows/unittests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml
index 81d4ea6e0a..2b6cbea20d 100644
--- a/.github/workflows/unittests.yml
+++ b/.github/workflows/unittests.yml
@@ -54,7 +54,7 @@ jobs:
           path: |
             ~/.cache/pipenv
             ~/.local/share/virtualenvs
-          key: os-${{ runner.os }}-env-${{ matrix.python-version }}-${{ hashFiles('**/Pipfile.lock') }}-${{ github.ref }}
+          key: ${{ runner.os }}-env-${{ matrix.python-version }}-${{ hashFiles('**/Pipfile.lock') }}-${{ github.ref }}
 
       - name: Install dependencies
         if: steps.cache-dependencies.outputs.cache-hit != 'true'

From 721db82b76626e2a4dbe5b8ca6bacb620a9168d3 Mon Sep 17 00:00:00 2001
From: Andrew Berglund <aberglun@redhat.com>
Date: Wed, 20 Jan 2021 16:42:14 -0500
Subject: [PATCH 12/17] Only consider group by for by_project apis (#2531)

---
 koku/api/report/all/openshift/query_handler.py   | 4 ++--
 koku/api/report/aws/openshift/query_handler.py   | 4 ++--
 koku/api/report/azure/openshift/query_handler.py | 4 ++--
 koku/api/report/ocp/query_handler.py             | 4 ++--
 koku/api/report/queries.py                       | 6 ++----
 5 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/koku/api/report/all/openshift/query_handler.py b/koku/api/report/all/openshift/query_handler.py
index 99188eeba6..0fa2d25d1f 100644
--- a/koku/api/report/all/openshift/query_handler.py
+++ b/koku/api/report/all/openshift/query_handler.py
@@ -20,7 +20,7 @@
 from api.models import Provider
 from api.report.all.openshift.provider_map import OCPAllProviderMap
 from api.report.aws.openshift.query_handler import OCPInfrastructureReportQueryHandlerBase
-from api.report.queries import is_grouped_or_filtered_by_project
+from api.report.queries import is_grouped_by_project
 
 LOG = logging.getLogger(__name__)
 
@@ -61,7 +61,7 @@ def __init__(self, parameters):
         """
         self._mapper = OCPAllProviderMap(provider=self.provider, report_type=parameters.report_type)
         # Update which field is used to calculate cost by group by param.
-        if is_grouped_or_filtered_by_project(parameters):
+        if is_grouped_by_project(parameters):
             self._report_type = parameters.report_type + "_by_project"
             self._mapper = OCPAllProviderMap(provider=self.provider, report_type=self._report_type)
 
diff --git a/koku/api/report/aws/openshift/query_handler.py b/koku/api/report/aws/openshift/query_handler.py
index 0be919c0ab..0061210701 100644
--- a/koku/api/report/aws/openshift/query_handler.py
+++ b/koku/api/report/aws/openshift/query_handler.py
@@ -27,7 +27,7 @@
 from api.models import Provider
 from api.report.aws.openshift.provider_map import OCPAWSProviderMap
 from api.report.aws.query_handler import AWSReportQueryHandler
-from api.report.queries import is_grouped_or_filtered_by_project
+from api.report.queries import is_grouped_by_project
 
 LOG = logging.getLogger(__name__)
 
@@ -135,7 +135,7 @@ def __init__(self, parameters):
         """
         self._mapper = OCPAWSProviderMap(provider=self.provider, report_type=parameters.report_type)
         # Update which field is used to calculate cost by group by param.
-        if is_grouped_or_filtered_by_project(parameters):
+        if is_grouped_by_project(parameters):
             self._report_type = parameters.report_type + "_by_project"
             self._mapper = OCPAWSProviderMap(provider=self.provider, report_type=self._report_type)
         self.group_by_options = self._mapper.provider_map.get("group_by_options")
diff --git a/koku/api/report/azure/openshift/query_handler.py b/koku/api/report/azure/openshift/query_handler.py
index b600c56c1e..459ea5bf3c 100644
--- a/koku/api/report/azure/openshift/query_handler.py
+++ b/koku/api/report/azure/openshift/query_handler.py
@@ -26,7 +26,7 @@
 from api.models import Provider
 from api.report.azure.openshift.provider_map import OCPAzureProviderMap
 from api.report.azure.query_handler import AzureReportQueryHandler
-from api.report.queries import is_grouped_or_filtered_by_project
+from api.report.queries import is_grouped_by_project
 
 LOG = logging.getLogger(__name__)
 
@@ -45,7 +45,7 @@ def __init__(self, parameters):
         """
         self._mapper = OCPAzureProviderMap(provider=self.provider, report_type=parameters.report_type)
         # Update which field is used to calculate cost by group by param.
-        if is_grouped_or_filtered_by_project(parameters):
+        if is_grouped_by_project(parameters):
             self._report_type = parameters.report_type + "_by_project"
             self._mapper = OCPAzureProviderMap(provider=self.provider, report_type=self._report_type)
 
diff --git a/koku/api/report/ocp/query_handler.py b/koku/api/report/ocp/query_handler.py
index 189914ef8b..6f466483a6 100644
--- a/koku/api/report/ocp/query_handler.py
+++ b/koku/api/report/ocp/query_handler.py
@@ -30,7 +30,7 @@
 
 from api.models import Provider
 from api.report.ocp.provider_map import OCPProviderMap
-from api.report.queries import is_grouped_or_filtered_by_project
+from api.report.queries import is_grouped_by_project
 from api.report.queries import ReportQueryHandler
 
 LOG = logging.getLogger(__name__)
@@ -58,7 +58,7 @@ def __init__(self, parameters):
         # super() needs to be called before _get_group_by is called
 
         # Update which field is used to calculate cost by group by param.
-        if is_grouped_or_filtered_by_project(parameters) and parameters.report_type == "costs":
+        if is_grouped_by_project(parameters) and parameters.report_type == "costs":
             self._report_type = parameters.report_type + "_by_project"
             self._mapper = OCPProviderMap(provider=self.provider, report_type=self._report_type)
 
diff --git a/koku/api/report/queries.py b/koku/api/report/queries.py
index 2705a7041e..5925566420 100644
--- a/koku/api/report/queries.py
+++ b/koku/api/report/queries.py
@@ -44,12 +44,10 @@ def strip_tag_prefix(tag):
     return tag.replace("tag:", "").replace("and:", "").replace("or:", "")
 
 
-def is_grouped_or_filtered_by_project(parameters):
+def is_grouped_by_project(parameters):
     """Determine if grouped or filtered by project."""
     group_by = list(parameters.parameters.get("group_by", {}).keys())
-    filters = list(parameters.parameters.get("filter", {}).keys())
-    effects = group_by + filters
-    return [key for key in effects if "project" in key]
+    return [key for key in group_by if "project" in key]
 
 
 def check_view_filter_and_group_by_criteria(filter_set, group_by_set):

From 63abc4c198522433839b6f62028f812f3d454708 Mon Sep 17 00:00:00 2001
From: Douglas Curtis <docurtis@redhat.com>
Date: Thu, 21 Jan 2021 12:03:11 -0500
Subject: [PATCH 13/17] COST-741: User Access API  (#2601)

---
 docs/source/specs/openapi.json         |  63 +++++
 koku/api/urls.py                       |   2 +
 koku/api/user_access/__init__.py       |   1 +
 koku/api/user_access/test/__init__.py  |   1 +
 koku/api/user_access/test/test_view.py | 327 +++++++++++++++++++++++++
 koku/api/user_access/view.py           | 155 ++++++++++++
 koku/api/views.py                      |   1 +
 7 files changed, 550 insertions(+)
 create mode 100644 koku/api/user_access/__init__.py
 create mode 100644 koku/api/user_access/test/__init__.py
 create mode 100644 koku/api/user_access/test/test_view.py
 create mode 100644 koku/api/user_access/view.py

diff --git a/docs/source/specs/openapi.json b/docs/source/specs/openapi.json
index 6f7058ae13..f66a985966 100644
--- a/docs/source/specs/openapi.json
+++ b/docs/source/specs/openapi.json
@@ -2451,6 +2451,31 @@
                 }
             }
         },
+        "/user-access/": {
+            "get": {
+                "tags": [
+                    "UserAccess"
+                ],
+                "summary": "Returns user permission status.",
+                "operationId": "listUserAccess",
+                "parameters": [{
+                        "$ref": "#/components/parameters/QueryType"
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "| - 200 response",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/UserAccessListPagination"
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        },
         "/sources/": {
             "get": {
                 "tags": [
@@ -3867,6 +3892,14 @@
                     "type": "boolean"
                 }
             },
+            "QueryType": {
+                "name": "type",
+                "in": "query",
+                "description": "String to identify user access permission type (i.e. AWS, cost_model).",
+                "schema": {
+                    "type": "string"
+                }
+            },
             "QueryValue": {
                 "in": "query",
                 "name": "value",
@@ -4521,6 +4554,26 @@
                     }
                 ]
             },
+            "UserAccessListPagination": {
+                "allOf": [{
+                    "$ref": "#/components/schemas/ListPagination"
+                  },
+                  {
+                        "type": "object",
+                        "required": [
+                            "data"
+                        ],
+                        "properties": {
+                            "data": {
+                                "type": "array",
+                                "items": {
+                                    "$ref": "#/components/schemas/UserAccessTypeOut"
+                                }
+                            }
+                        }
+                    }
+                ]
+            },
             "ResourceTypeListPagination": {
                 "allOf": [{
                     "$ref": "#/components/schemas/ListPagination"
@@ -4568,6 +4621,16 @@
                     }
                 }
             },
+            "UserAccessTypeOut": {
+                "properties": {
+                    "type": {
+                        "type": "string"
+                    },
+                    "access": {
+                        "type": "boolean"
+                    }
+                }
+            },
             "ResourceTypeListOut": {
                 "properties": {
                     "value": {
diff --git a/koku/api/urls.py b/koku/api/urls.py
index dbbb6c15c4..d9765de8b0 100644
--- a/koku/api/urls.py
+++ b/koku/api/urls.py
@@ -71,6 +71,7 @@
 from api.views import ResourceTypeView
 from api.views import SettingsView
 from api.views import StatusView
+from api.views import UserAccessView
 from koku.cache import AWS_CACHE_PREFIX
 from koku.cache import AZURE_CACHE_PREFIX
 from koku.cache import GCP_CACHE_PREFIX
@@ -300,6 +301,7 @@
     path("settings", RedirectView.as_view(pattern_name="settings"), name="settings-redirect"),
     path("organizations/aws/", AWSOrgView.as_view(), name="aws-org-unit"),
     path("resource-types/", ResourceTypeView.as_view(), name="resource-types"),
+    path("user-access/", UserAccessView.as_view(), name="user-access"),
     path("resource-types/aws-accounts/", AWSAccountView.as_view(), name="aws-accounts"),
     path("resource-types/gcp-accounts/", GCPAccountView.as_view(), name="gcp-accounts"),
     path("resource-types/gcp-projects/", GCPProjectsView.as_view(), name="gcp-projects"),
diff --git a/koku/api/user_access/__init__.py b/koku/api/user_access/__init__.py
new file mode 100644
index 0000000000..4ede8e6dfa
--- /dev/null
+++ b/koku/api/user_access/__init__.py
@@ -0,0 +1 @@
+# noqa
diff --git a/koku/api/user_access/test/__init__.py b/koku/api/user_access/test/__init__.py
new file mode 100644
index 0000000000..4ede8e6dfa
--- /dev/null
+++ b/koku/api/user_access/test/__init__.py
@@ -0,0 +1 @@
+# noqa
diff --git a/koku/api/user_access/test/test_view.py b/koku/api/user_access/test/test_view.py
new file mode 100644
index 0000000000..6bfb0a82ea
--- /dev/null
+++ b/koku/api/user_access/test/test_view.py
@@ -0,0 +1,327 @@
+#
+# Copyright 2021 Red Hat, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+#
+"""Test the UserAccess view."""
+from django.urls import reverse
+from rest_framework import status
+from rest_framework.test import APIClient
+
+from api.iam.test.iam_test_case import IamTestCase
+from api.iam.test.iam_test_case import RbacPermissions
+
+
+class UserAccessViewTest(IamTestCase):
+    """Tests the resource types views."""
+
+    def setUp(self):
+        """Set up the UserAccess view tests."""
+        super().setUp()
+        self.client = APIClient()
+
+    @RbacPermissions({"aws.account": {"read": ["*"]}})
+    def test_aws_view_read(self):
+        """Test user-access view with aws read wildcard permission."""
+        url = reverse("user-access")
+        response = self.client.get(url, **self.headers)
+
+        self.assertEqual(len(response.data.get("data")), 5)
+        self.assertTrue({"type": "aws", "access": True} in response.data.get("data"))
+        self.assertTrue({"type": "ocp", "access": False} in response.data.get("data"))
+        self.assertTrue({"type": "gcp", "access": False} in response.data.get("data"))
+        self.assertTrue({"type": "azure", "access": False} in response.data.get("data"))
+        self.assertTrue({"type": "cost_model", "access": False} in response.data.get("data"))
+
+    @RbacPermissions({"aws.account": {"read": ["123"]}})
+    def test_aws_view_read_specific_account(self):
+        """Test user-access view with aws read specific account permission."""
+        url = reverse("user-access")
+        response = self.client.get(url, **self.headers)
+
+        self.assertEqual(len(response.data.get("data")), 5)
+        self.assertTrue({"type": "aws", "access": True} in response.data.get("data"))
+        self.assertTrue({"type": "ocp", "access": False} in response.data.get("data"))
+        self.assertTrue({"type": "gcp", "access": False} in response.data.get("data"))
+        self.assertTrue({"type": "azure", "access": False} in response.data.get("data"))
+        self.assertTrue({"type": "cost_model", "access": False} in response.data.get("data"))
+
+    @RbacPermissions({"aws.account": "*"})
+    def test_aws_view_wildcard(self):
+        """Test user-access view with aws wildcard permission."""
+        url = reverse("user-access")
+        response = self.client.get(url, **self.headers)
+
+        self.assertEqual(len(response.data.get("data")), 5)
+        self.assertTrue({"type": "aws", "access": True} in response.data.get("data"))
+        self.assertTrue({"type": "ocp", "access": False} in response.data.get("data"))
+        self.assertTrue({"type": "gcp", "access": False} in response.data.get("data"))
+        self.assertTrue({"type": "azure", "access": False} in response.data.get("data"))
+        self.assertTrue({"type": "cost_model", "access": False} in response.data.get("data"))
+
+    @RbacPermissions(
+        {
+            "openshift.cluster": {"read": ["*"]},
+            "openshift.project": {"read": ["myproject"]},
+            "openshift.node": {"read": ["mynode"]},
+        }
+    )
+    def test_ocp_view_cluster(self):
+        """Test user-access view with openshift cluster read wildcard permission."""
+        url = reverse("user-access")
+        response = self.client.get(url, **self.headers)
+
+        self.assertEqual(len(response.data.get("data")), 5)
+        self.assertTrue({"type": "aws", "access": False} in response.data.get("data"))
+        self.assertTrue({"type": "ocp", "access": True} in response.data.get("data"))
+        self.assertTrue({"type": "gcp", "access": False} in response.data.get("data"))
+        self.assertTrue({"type": "azure", "access": False} in response.data.get("data"))
+        self.assertTrue({"type": "cost_model", "access": False} in response.data.get("data"))
+
+    @RbacPermissions(
+        {
+            "openshift.cluster": {"read": ["mycluster"]},
+            "openshift.project": {"read": ["*"]},
+            "openshift.node": {"read": ["mynode"]},
+        }
+    )
+    def test_ocp_view_project(self):
+        """Test user-access view with openshift project read wildcard permission."""
+        url = reverse("user-access")
+        response = self.client.get(url, **self.headers)
+
+        self.assertEqual(len(response.data.get("data")), 5)
+        self.assertTrue({"type": "aws", "access": False} in response.data.get("data"))
+        self.assertTrue({"type": "ocp", "access": True} in response.data.get("data"))
+        self.assertTrue({"type": "gcp", "access": False} in response.data.get("data"))
+        self.assertTrue({"type": "azure", "access": False} in response.data.get("data"))
+        self.assertTrue({"type": "cost_model", "access": False} in response.data.get("data"))
+
+    @RbacPermissions(
+        {
+            "openshift.cluster": {"read": ["mycluster"]},
+            "openshift.project": {"read": ["myproject"]},
+            "openshift.node": {"read": ["*"]},
+        }
+    )
+    def test_ocp_view_node(self):
+        """Test user-access view with openshift node read wildcard permission."""
+        url = reverse("user-access")
+        response = self.client.get(url, **self.headers)
+
+        self.assertEqual(len(response.data.get("data")), 5)
+        self.assertTrue({"type": "aws", "access": False} in response.data.get("data"))
+        self.assertTrue({"type": "ocp", "access": True} in response.data.get("data"))
+        self.assertTrue({"type": "gcp", "access": False} in response.data.get("data"))
+        self.assertTrue({"type": "azure", "access": False} in response.data.get("data"))
+        self.assertTrue({"type": "cost_model", "access": False} in response.data.get("data"))
+
+    @RbacPermissions({"openshift.cluster": "*"})
+    def test_ocp_view_cluster_wildcard(self):
+        """Test user-access view with openshift cluster wildcard permission."""
+        url = reverse("user-access")
+
+        response = self.client.get(url, **self.headers)
+
+        self.assertEqual(len(response.data.get("data")), 5)
+        self.assertTrue({"type": "aws", "access": False} in response.data.get("data"))
+        self.assertTrue({"type": "ocp", "access": True} in response.data.get("data"))
+        self.assertTrue({"type": "gcp", "access": False} in response.data.get("data"))
+        self.assertTrue({"type": "azure", "access": False} in response.data.get("data"))
+        self.assertTrue({"type": "cost_model", "access": False} in response.data.get("data"))
+
+    @RbacPermissions({"openshift.project": "*"})
+    def test_ocp_view_project_wildcard(self):
+        """Test user-access view with openshift project wildcard permission."""
+        url = reverse("user-access")
+
+        response = self.client.get(url, **self.headers)
+
+        self.assertEqual(len(response.data.get("data")), 5)
+        self.assertTrue({"type": "aws", "access": False} in response.data.get("data"))
+        self.assertTrue({"type": "ocp", "access": True} in response.data.get("data"))
+        self.assertTrue({"type": "gcp", "access": False} in response.data.get("data"))
+        self.assertTrue({"type": "azure", "access": False} in response.data.get("data"))
+        self.assertTrue({"type": "cost_model", "access": False} in response.data.get("data"))
+
+    @RbacPermissions({"openshift.node": "*"})
+    def test_ocp_view_node_wildcard(self):
+        """Test user-access view with openshift node wildcard permission."""
+        url = reverse("user-access")
+
+        response = self.client.get(url, **self.headers)
+
+        self.assertEqual(len(response.data.get("data")), 5)
+        self.assertTrue({"type": "aws", "access": False} in response.data.get("data"))
+        self.assertTrue({"type": "ocp", "access": True} in response.data.get("data"))
+        self.assertTrue({"type": "gcp", "access": False} in response.data.get("data"))
+        self.assertTrue({"type": "azure", "access": False} in response.data.get("data"))
+        self.assertTrue({"type": "cost_model", "access": False} in response.data.get("data"))
+
+    @RbacPermissions({"gcp.account": {"read": ["*"]}, "gcp.project": {"read": ["myproject"]}})
+    def test_gcp_view_account(self):
+        """Test user-access view with gcp account read wildcard permission."""
+        url = reverse("user-access")
+        response = self.client.get(url, **self.headers)
+
+        self.assertEqual(len(response.data.get("data")), 5)
+        self.assertTrue({"type": "aws", "access": False} in response.data.get("data"))
+        self.assertTrue({"type": "ocp", "access": False} in response.data.get("data"))
+        self.assertTrue({"type": "gcp", "access": True} in response.data.get("data"))
+        self.assertTrue({"type": "azure", "access": False} in response.data.get("data"))
+        self.assertTrue({"type": "cost_model", "access": False} in response.data.get("data"))
+
+    @RbacPermissions({"gcp.account": {"read": ["myaccount"]}, "gcp.project": {"read": ["*"]}})
+    def test_gcp_view_project(self):
+        """Test user-access view with gcp project read wildcard permission."""
+        url = reverse("user-access")
+        response = self.client.get(url, **self.headers)
+
+        self.assertEqual(len(response.data.get("data")), 5)
+        self.assertTrue({"type": "aws", "access": False} in response.data.get("data"))
+        self.assertTrue({"type": "ocp", "access": False} in response.data.get("data"))
+        self.assertTrue({"type": "gcp", "access": True} in response.data.get("data"))
+        self.assertTrue({"type": "azure", "access": False} in response.data.get("data"))
+        self.assertTrue({"type": "cost_model", "access": False} in response.data.get("data"))
+
+    @RbacPermissions({"gcp.account": "*"})
+    def test_gcp_view_account_wildcard(self):
+        """Test user-access view with gcp account wildcard permission."""
+        url = reverse("user-access")
+
+        response = self.client.get(url, **self.headers)
+
+        self.assertEqual(len(response.data.get("data")), 5)
+        self.assertTrue({"type": "aws", "access": False} in response.data.get("data"))
+        self.assertTrue({"type": "ocp", "access": False} in response.data.get("data"))
+        self.assertTrue({"type": "gcp", "access": True} in response.data.get("data"))
+        self.assertTrue({"type": "azure", "access": False} in response.data.get("data"))
+        self.assertTrue({"type": "cost_model", "access": False} in response.data.get("data"))
+
+    @RbacPermissions({"gcp.project": "*"})
+    def test_gcp_view_project_wildcard(self):
+        """Test user-access view with gcp project wildcard permission."""
+        url = reverse("user-access")
+
+        response = self.client.get(url, **self.headers)
+
+        self.assertEqual(len(response.data.get("data")), 5)
+        self.assertTrue({"type": "aws", "access": False} in response.data.get("data"))
+        self.assertTrue({"type": "ocp", "access": False} in response.data.get("data"))
+        self.assertTrue({"type": "gcp", "access": True} in response.data.get("data"))
+        self.assertTrue({"type": "azure", "access": False} in response.data.get("data"))
+        self.assertTrue({"type": "cost_model", "access": False} in response.data.get("data"))
+
+    @RbacPermissions({"azure.subscription_guid": {"read": ["*"]}})
+    def test_azure_view_read(self):
+        """Test user-access view with azure subscription read wildcard permission."""
+        url = reverse("user-access")
+        response = self.client.get(url, **self.headers)
+
+        self.assertEqual(len(response.data.get("data")), 5)
+        self.assertTrue({"type": "aws", "access": False} in response.data.get("data"))
+        self.assertTrue({"type": "ocp", "access": False} in response.data.get("data"))
+        self.assertTrue({"type": "gcp", "access": False} in response.data.get("data"))
+        self.assertTrue({"type": "azure", "access": True} in response.data.get("data"))
+        self.assertTrue({"type": "cost_model", "access": False} in response.data.get("data"))
+
+    @RbacPermissions({"azure.subscription_guid": "*"})
+    def test_azure_view_wildcard(self):
+        """Test user-access view with azure subscription wildcard permission."""
+        url = reverse("user-access")
+        response = self.client.get(url, **self.headers)
+
+        self.assertEqual(len(response.data.get("data")), 5)
+        self.assertTrue({"type": "aws", "access": False} in response.data.get("data"))
+        self.assertTrue({"type": "ocp", "access": False} in response.data.get("data"))
+        self.assertTrue({"type": "gcp", "access": False} in response.data.get("data"))
+        self.assertTrue({"type": "azure", "access": True} in response.data.get("data"))
+        self.assertTrue({"type": "cost_model", "access": False} in response.data.get("data"))
+
+    def test_view_as_org_admin(self):
+        """Test user-access view as an org admin."""
+        url = reverse("user-access")
+        response = self.client.get(url, **self.headers)
+
+        self.assertEqual(len(response.data.get("data")), 5)
+        self.assertTrue({"type": "aws", "access": True} in response.data.get("data"))
+        self.assertTrue({"type": "ocp", "access": True} in response.data.get("data"))
+        self.assertTrue({"type": "gcp", "access": True} in response.data.get("data"))
+        self.assertTrue({"type": "azure", "access": True} in response.data.get("data"))
+        self.assertTrue({"type": "cost_model", "access": True} in response.data.get("data"))
+
+    @RbacPermissions({"*": "*"})
+    def test_view_as_cost_admin(self):
+        """Test user-access view as a cost admin."""
+        url = reverse("user-access")
+        response = self.client.get(url, **self.headers)
+
+        self.assertEqual(len(response.data.get("data")), 5)
+        self.assertTrue({"type": "aws", "access": True} in response.data.get("data"))
+        self.assertTrue({"type": "ocp", "access": True} in response.data.get("data"))
+        self.assertTrue({"type": "gcp", "access": True} in response.data.get("data"))
+        self.assertTrue({"type": "azure", "access": True} in response.data.get("data"))
+        self.assertTrue({"type": "cost_model", "access": True} in response.data.get("data"))
+
+    def test_aws_view_query_read_org_admin(self):
+        """Test user-access view query as an org admin."""
+        url = reverse("user-access")
+        query_url = f"{url}?source_type=aws"
+        response = self.client.get(query_url, **self.headers)
+
+        self.assertTrue(response.data.get("data"))
+
+    @RbacPermissions({"aws.account": "*"})
+    def test_aws_view_query_read(self):
+        """Test user-access view query for aws."""
+        url = reverse("user-access")
+        query_url = f"{url}?source_type=aws"
+        response = self.client.get(query_url, **self.headers)
+
+        self.assertTrue(response.data.get("data"))
+
+    @RbacPermissions({"openshift.cluster": "*"})
+    def test_openshift_view_query_read_for_aws(self):
+        """Test user-access view query for aws with openshift permissions."""
+        url = reverse("user-access")
+        query_url = f"{url}?type=aws"
+        response = self.client.get(query_url, **self.headers)
+
+        self.assertFalse(response.data.get("data"))
+
+    @RbacPermissions({"cost_model": "*"})
+    def test_cost_model_view_query_read_for_aws(self):
+        """Test user-access view query for cost_model."""
+        url = reverse("user-access")
+        query_url = f"{url}?type=cost_model"
+        response = self.client.get(query_url, **self.headers)
+
+        self.assertTrue(response.data.get("data"))
+
+    @RbacPermissions({"cost_model": {"write": ["*"]}})
+    def test_cost_model_view_query_write_for_aws(self):
+        """Test user-access view query for cost_model with write access."""
+        url = reverse("user-access")
+        query_url = f"{url}?type=cost_model"
+        response = self.client.get(query_url, **self.headers)
+
+        self.assertTrue(response.data.get("data"))
+
+    def test_view_query_invalid_source_type(self):
+        """Test user-access view query for invalid type."""
+        url = reverse("user-access")
+        query_url = f"{url}?type=bad"
+        response = self.client.get(query_url, **self.headers)
+
+        self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
diff --git a/koku/api/user_access/view.py b/koku/api/user_access/view.py
new file mode 100644
index 0000000000..89e9367a7b
--- /dev/null
+++ b/koku/api/user_access/view.py
@@ -0,0 +1,155 @@
+#
+# Copyright 2021 Red Hat, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+#
+"""View for UserAccess."""
+import logging
+
+from django.utils.decorators import method_decorator
+from django.views.decorators.vary import vary_on_headers
+from rest_framework import status
+from rest_framework.permissions import AllowAny
+from rest_framework.response import Response
+from rest_framework.views import APIView
+
+from api.common import CACHE_RH_IDENTITY_HEADER
+from api.common.pagination import ListPaginator
+
+LOGGER = logging.getLogger(__name__)
+
+
+class UserAccess:
+    def check_access(self, access_list):
+        if access_list:
+            return True
+        return False
+
+
+class AWSUserAccess(UserAccess):
+    def __init__(self, access):
+        self.account_access = access.get("aws.account")
+
+    @property
+    def access(self):
+        if self.check_access(self.account_access):
+            return True
+        return False
+
+
+class OCPUserAccess(UserAccess):
+    def __init__(self, access):
+        self.cluster_access = access.get("openshift.cluster")
+        self.node_access = access.get("openshift.node")
+        self.project_access = access.get("openshift.project")
+
+    @property
+    def access(self):
+        if (
+            self.check_access(self.cluster_access)
+            or self.check_access(self.node_access)
+            or self.check_access(self.project_access)
+        ):
+            return True
+        return False
+
+
+class AzureUserAccess(UserAccess):
+    def __init__(self, access):
+        self.subscription_access = access.get("azure.subscription_guid")
+
+    @property
+    def access(self):
+        if self.check_access(self.subscription_access):
+            return True
+        return False
+
+
+class GCPUserAccess(UserAccess):
+    def __init__(self, access):
+        self.account_access = access.get("gcp.account")
+        self.project_access = access.get("gcp.project")
+
+    @property
+    def access(self):
+        if self.check_access(self.account_access) or self.check_access(self.project_access):
+            return True
+        return False
+
+
+class CostModelUserAccess(UserAccess):
+    def __init__(self, access):
+        self.subscription_access = access.get("cost_model")
+
+    @property
+    def access(self):
+        if self.check_access(self.subscription_access):
+            return True
+        return False
+
+
+class CostManagementAllAccess(UserAccess):
+    def __init__(self, access):
+        self.all_access = access.get("*")
+
+    @property
+    def access(self):
+        if self.check_access(self.all_access):
+            return True
+        return False
+
+
+class UserAccessView(APIView):
+    """API GET view for User API."""
+
+    permission_classes = [AllowAny]
+
+    @method_decorator(vary_on_headers(CACHE_RH_IDENTITY_HEADER))
+    def get(self, request, **kwargs):
+        query_params = request.query_params
+        user_access = request.user.access
+        admin_user = request.user.admin or CostManagementAllAccess(user_access).access
+
+        source_types = [
+            {"type": "aws", "access_class": AWSUserAccess},
+            {"type": "ocp", "access_class": OCPUserAccess},
+            {"type": "gcp", "access_class": GCPUserAccess},
+            {"type": "azure", "access_class": AzureUserAccess},
+            {"type": "cost_model", "access_class": CostModelUserAccess},
+        ]
+
+        source_type = query_params.get("type")
+        if source_type:
+            source_accessor = next((item for item in source_types if item.get("type") == source_type.lower()), False)
+            if source_accessor:
+                access_class = source_accessor.get("access_class")
+                if admin_user:
+                    access_granted = True
+                else:
+                    access_granted = access_class(user_access).access
+                return Response({"data": access_granted})
+            else:
+                return Response({f"Unknown source type: {source_type}"}, status=status.HTTP_400_BAD_REQUEST)
+
+        data = []
+        for source_type in source_types:
+            access_granted = False
+            if admin_user:
+                access_granted = True
+            else:
+                access_granted = source_type.get("access_class")(user_access).access
+            data.append({"type": source_type.get("type"), "access": access_granted})
+
+        paginator = ListPaginator(data, request)
+
+        return paginator.get_paginated_response(data)
diff --git a/koku/api/views.py b/koku/api/views.py
index 502b4e11a2..846a32f2ce 100644
--- a/koku/api/views.py
+++ b/koku/api/views.py
@@ -67,3 +67,4 @@
 from api.tags.azure.view import AzureTagView
 from api.tags.gcp.view import GCPTagView
 from api.tags.ocp.view import OCPTagView
+from api.user_access.view import UserAccessView

From edbdb2ea4ba196620729c634fb7219693c63d439 Mon Sep 17 00:00:00 2001
From: esebesto <73821679+esebesto@users.noreply.github.com>
Date: Thu, 21 Jan 2021 19:32:17 +0100
Subject: [PATCH 14/17] Fix forecast paths (#2606)

---
 docs/source/specs/openapi.json | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/docs/source/specs/openapi.json b/docs/source/specs/openapi.json
index f66a985966..8faab173ab 100644
--- a/docs/source/specs/openapi.json
+++ b/docs/source/specs/openapi.json
@@ -396,7 +396,7 @@
                 }]
             }
         },
-        "/forecasts/aws/costs": {
+        "/forecasts/aws/costs/": {
             "summary": "AWS Cost Forecasts",
             "get": {
                 "tags":["Forecasts"],
@@ -441,7 +441,7 @@
                 }]
             }
         },
-        "/forecasts/azure/costs": {
+        "/forecasts/azure/costs/": {
             "summary": "Azure Cost Forecasts",
             "get": {
                 "tags":["Forecasts"],
@@ -486,7 +486,7 @@
                 }]
             }
         },
-        "/forecasts/gcp/costs": {
+        "/forecasts/gcp/costs/": {
             "summary": "GCP Cost Forecasts",
             "get": {
                 "tags":["Forecasts"],
@@ -531,7 +531,7 @@
                 }]
             }
         },
-        "/forecasts/openshift/costs": {
+        "/forecasts/openshift/costs/": {
             "summary": "OpenShift Cost Forecasts",
             "get": {
                 "tags":["Forecasts"],
@@ -576,7 +576,7 @@
                 }]
             }
         },
-        "/forecasts/openshift/infrastructures/aws/costs": {
+        "/forecasts/openshift/infrastructures/aws/costs/": {
             "summary": "OpenShift on AWS Cost Forecasts",
             "get": {
                 "tags":["Forecasts"],
@@ -621,7 +621,7 @@
                 }]
             }
         },
-        "/forecasts/openshift/infrastructures/all/costs": {
+        "/forecasts/openshift/infrastructures/all/costs/": {
             "summary": "OpenShift on Cloud Cost Forecasts",
             "get": {
                 "tags":["Forecasts"],
@@ -666,7 +666,7 @@
                 }]
             }
         },
-        "/forecasts/openshift/infrastructures/azure/costs": {
+        "/forecasts/openshift/infrastructures/azure/costs/": {
             "summary": "OpenShift on Azure Cost Forecasts",
             "get": {
                 "tags":["Forecasts"],

From c8e5dddafcc07b4b684c2418524421983420ea9b Mon Sep 17 00:00:00 2001
From: Douglas Curtis <docurtis@redhat.com>
Date: Thu, 21 Jan 2021 14:15:53 -0500
Subject: [PATCH 15/17] user access logging (#2607)

---
 koku/api/user_access/view.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/koku/api/user_access/view.py b/koku/api/user_access/view.py
index 89e9367a7b..ce87d51d43 100644
--- a/koku/api/user_access/view.py
+++ b/koku/api/user_access/view.py
@@ -26,7 +26,7 @@
 from api.common import CACHE_RH_IDENTITY_HEADER
 from api.common.pagination import ListPaginator
 
-LOGGER = logging.getLogger(__name__)
+LOG = logging.getLogger(__name__)
 
 
 class UserAccess:
@@ -118,7 +118,9 @@ class UserAccessView(APIView):
     def get(self, request, **kwargs):
         query_params = request.query_params
         user_access = request.user.access
+        LOG.info(f"User Access RBAC permissions: {str(user_access)}. Org Admin: {str(request.user.admin)}")
         admin_user = request.user.admin or CostManagementAllAccess(user_access).access
+        LOG.info(f"User Access admin user: {str(admin_user)}")
 
         source_types = [
             {"type": "aws", "access_class": AWSUserAccess},

From 26284ad484366ae340d75d1c618c0efe6f976e49 Mon Sep 17 00:00:00 2001
From: Douglas Curtis <docurtis@redhat.com>
Date: Thu, 21 Jan 2021 15:24:54 -0500
Subject: [PATCH 16/17] Fixing user access to expect all rbac types to be
 present (#2608)

---
 koku/api/user_access/test/test_view.py | 261 ++++++++++++++++++++++---
 koku/api/user_access/view.py           |  19 +-
 2 files changed, 233 insertions(+), 47 deletions(-)

diff --git a/koku/api/user_access/test/test_view.py b/koku/api/user_access/test/test_view.py
index 6bfb0a82ea..efedc45540 100644
--- a/koku/api/user_access/test/test_view.py
+++ b/koku/api/user_access/test/test_view.py
@@ -31,7 +31,19 @@ def setUp(self):
         super().setUp()
         self.client = APIClient()
 
-    @RbacPermissions({"aws.account": {"read": ["*"]}})
+    @RbacPermissions(
+        {
+            "aws.account": {"read": ["*"]},
+            "aws.organizational_unit": {"read": []},
+            "gcp.account": {"read": []},
+            "gcp.project": {"read": []},
+            "azure.subscription_guid": {"read": []},
+            "openshift.cluster": {"read": []},
+            "openshift.node": {"read": []},
+            "openshift.project": {"read": []},
+            "cost_model": {"read": [], "write": []},
+        }
+    )
     def test_aws_view_read(self):
         """Test user-access view with aws read wildcard permission."""
         url = reverse("user-access")
@@ -44,7 +56,19 @@ def test_aws_view_read(self):
         self.assertTrue({"type": "azure", "access": False} in response.data.get("data"))
         self.assertTrue({"type": "cost_model", "access": False} in response.data.get("data"))
 
-    @RbacPermissions({"aws.account": {"read": ["123"]}})
+    @RbacPermissions(
+        {
+            "aws.account": {"read": ["123"]},
+            "aws.organizational_unit": {"read": []},
+            "gcp.account": {"read": []},
+            "gcp.project": {"read": []},
+            "azure.subscription_guid": {"read": []},
+            "openshift.cluster": {"read": []},
+            "openshift.node": {"read": []},
+            "openshift.project": {"read": []},
+            "cost_model": {"read": [], "write": []},
+        }
+    )
     def test_aws_view_read_specific_account(self):
         """Test user-access view with aws read specific account permission."""
         url = reverse("user-access")
@@ -57,7 +81,19 @@ def test_aws_view_read_specific_account(self):
         self.assertTrue({"type": "azure", "access": False} in response.data.get("data"))
         self.assertTrue({"type": "cost_model", "access": False} in response.data.get("data"))
 
-    @RbacPermissions({"aws.account": "*"})
+    @RbacPermissions(
+        {
+            "aws.account": {"read": ["*"]},
+            "aws.organizational_unit": {"read": []},
+            "gcp.account": {"read": []},
+            "gcp.project": {"read": []},
+            "azure.subscription_guid": {"read": []},
+            "openshift.cluster": {"read": []},
+            "openshift.node": {"read": []},
+            "openshift.project": {"read": []},
+            "cost_model": {"read": [], "write": []},
+        }
+    )
     def test_aws_view_wildcard(self):
         """Test user-access view with aws wildcard permission."""
         url = reverse("user-access")
@@ -72,9 +108,15 @@ def test_aws_view_wildcard(self):
 
     @RbacPermissions(
         {
+            "aws.account": {"read": []},
+            "aws.organizational_unit": {"read": []},
+            "gcp.account": {"read": []},
+            "gcp.project": {"read": []},
+            "azure.subscription_guid": {"read": []},
             "openshift.cluster": {"read": ["*"]},
-            "openshift.project": {"read": ["myproject"]},
             "openshift.node": {"read": ["mynode"]},
+            "openshift.project": {"read": ["myproject"]},
+            "cost_model": {"read": [], "write": []},
         }
     )
     def test_ocp_view_cluster(self):
@@ -91,9 +133,15 @@ def test_ocp_view_cluster(self):
 
     @RbacPermissions(
         {
+            "aws.account": {"read": []},
+            "aws.organizational_unit": {"read": []},
+            "gcp.account": {"read": []},
+            "gcp.project": {"read": []},
+            "azure.subscription_guid": {"read": []},
             "openshift.cluster": {"read": ["mycluster"]},
-            "openshift.project": {"read": ["*"]},
             "openshift.node": {"read": ["mynode"]},
+            "openshift.project": {"read": ["*"]},
+            "cost_model": {"read": [], "write": []},
         }
     )
     def test_ocp_view_project(self):
@@ -110,9 +158,15 @@ def test_ocp_view_project(self):
 
     @RbacPermissions(
         {
+            "aws.account": {"read": []},
+            "aws.organizational_unit": {"read": []},
+            "gcp.account": {"read": []},
+            "gcp.project": {"read": []},
+            "azure.subscription_guid": {"read": []},
             "openshift.cluster": {"read": ["mycluster"]},
-            "openshift.project": {"read": ["myproject"]},
             "openshift.node": {"read": ["*"]},
+            "openshift.project": {"read": ["myproject"]},
+            "cost_model": {"read": [], "write": []},
         }
     )
     def test_ocp_view_node(self):
@@ -127,7 +181,19 @@ def test_ocp_view_node(self):
         self.assertTrue({"type": "azure", "access": False} in response.data.get("data"))
         self.assertTrue({"type": "cost_model", "access": False} in response.data.get("data"))
 
-    @RbacPermissions({"openshift.cluster": "*"})
+    @RbacPermissions(
+        {
+            "aws.account": {"read": []},
+            "aws.organizational_unit": {"read": []},
+            "gcp.account": {"read": []},
+            "gcp.project": {"read": []},
+            "azure.subscription_guid": {"read": []},
+            "openshift.cluster": {"read": ["*"]},
+            "openshift.node": {"read": []},
+            "openshift.project": {"read": []},
+            "cost_model": {"read": [], "write": []},
+        }
+    )
     def test_ocp_view_cluster_wildcard(self):
         """Test user-access view with openshift cluster wildcard permission."""
         url = reverse("user-access")
@@ -141,7 +207,19 @@ def test_ocp_view_cluster_wildcard(self):
         self.assertTrue({"type": "azure", "access": False} in response.data.get("data"))
         self.assertTrue({"type": "cost_model", "access": False} in response.data.get("data"))
 
-    @RbacPermissions({"openshift.project": "*"})
+    @RbacPermissions(
+        {
+            "aws.account": {"read": []},
+            "aws.organizational_unit": {"read": []},
+            "gcp.account": {"read": []},
+            "gcp.project": {"read": []},
+            "azure.subscription_guid": {"read": []},
+            "openshift.cluster": {"read": [""]},
+            "openshift.node": {"read": []},
+            "openshift.project": {"read": ["*"]},
+            "cost_model": {"read": [], "write": []},
+        }
+    )
     def test_ocp_view_project_wildcard(self):
         """Test user-access view with openshift project wildcard permission."""
         url = reverse("user-access")
@@ -155,7 +233,19 @@ def test_ocp_view_project_wildcard(self):
         self.assertTrue({"type": "azure", "access": False} in response.data.get("data"))
         self.assertTrue({"type": "cost_model", "access": False} in response.data.get("data"))
 
-    @RbacPermissions({"openshift.node": "*"})
+    @RbacPermissions(
+        {
+            "aws.account": {"read": []},
+            "aws.organizational_unit": {"read": []},
+            "gcp.account": {"read": []},
+            "gcp.project": {"read": []},
+            "azure.subscription_guid": {"read": []},
+            "openshift.cluster": {"read": []},
+            "openshift.node": {"read": ["*"]},
+            "openshift.project": {"read": []},
+            "cost_model": {"read": [], "write": []},
+        }
+    )
     def test_ocp_view_node_wildcard(self):
         """Test user-access view with openshift node wildcard permission."""
         url = reverse("user-access")
@@ -169,7 +259,19 @@ def test_ocp_view_node_wildcard(self):
         self.assertTrue({"type": "azure", "access": False} in response.data.get("data"))
         self.assertTrue({"type": "cost_model", "access": False} in response.data.get("data"))
 
-    @RbacPermissions({"gcp.account": {"read": ["*"]}, "gcp.project": {"read": ["myproject"]}})
+    @RbacPermissions(
+        {
+            "aws.account": {"read": []},
+            "aws.organizational_unit": {"read": []},
+            "gcp.account": {"read": ["*"]},
+            "gcp.project": {"read": ["myproject"]},
+            "azure.subscription_guid": {"read": []},
+            "openshift.cluster": {"read": []},
+            "openshift.node": {"read": []},
+            "openshift.project": {"read": []},
+            "cost_model": {"read": [], "write": []},
+        }
+    )
     def test_gcp_view_account(self):
         """Test user-access view with gcp account read wildcard permission."""
         url = reverse("user-access")
@@ -182,7 +284,19 @@ def test_gcp_view_account(self):
         self.assertTrue({"type": "azure", "access": False} in response.data.get("data"))
         self.assertTrue({"type": "cost_model", "access": False} in response.data.get("data"))
 
-    @RbacPermissions({"gcp.account": {"read": ["myaccount"]}, "gcp.project": {"read": ["*"]}})
+    @RbacPermissions(
+        {
+            "aws.account": {"read": []},
+            "aws.organizational_unit": {"read": []},
+            "gcp.account": {"read": ["myaccount"]},
+            "gcp.project": {"read": ["*"]},
+            "azure.subscription_guid": {"read": []},
+            "openshift.cluster": {"read": []},
+            "openshift.node": {"read": []},
+            "openshift.project": {"read": []},
+            "cost_model": {"read": [], "write": []},
+        }
+    )
     def test_gcp_view_project(self):
         """Test user-access view with gcp project read wildcard permission."""
         url = reverse("user-access")
@@ -195,7 +309,19 @@ def test_gcp_view_project(self):
         self.assertTrue({"type": "azure", "access": False} in response.data.get("data"))
         self.assertTrue({"type": "cost_model", "access": False} in response.data.get("data"))
 
-    @RbacPermissions({"gcp.account": "*"})
+    @RbacPermissions(
+        {
+            "aws.account": {"read": []},
+            "aws.organizational_unit": {"read": []},
+            "gcp.account": {"read": ["*"]},
+            "gcp.project": {"read": []},
+            "azure.subscription_guid": {"read": []},
+            "openshift.cluster": {"read": []},
+            "openshift.node": {"read": []},
+            "openshift.project": {"read": []},
+            "cost_model": {"read": [], "write": []},
+        }
+    )
     def test_gcp_view_account_wildcard(self):
         """Test user-access view with gcp account wildcard permission."""
         url = reverse("user-access")
@@ -209,7 +335,19 @@ def test_gcp_view_account_wildcard(self):
         self.assertTrue({"type": "azure", "access": False} in response.data.get("data"))
         self.assertTrue({"type": "cost_model", "access": False} in response.data.get("data"))
 
-    @RbacPermissions({"gcp.project": "*"})
+    @RbacPermissions(
+        {
+            "aws.account": {"read": []},
+            "aws.organizational_unit": {"read": []},
+            "gcp.account": {"read": []},
+            "gcp.project": {"read": ["*"]},
+            "azure.subscription_guid": {"read": []},
+            "openshift.cluster": {"read": []},
+            "openshift.node": {"read": []},
+            "openshift.project": {"read": []},
+            "cost_model": {"read": [], "write": []},
+        }
+    )
     def test_gcp_view_project_wildcard(self):
         """Test user-access view with gcp project wildcard permission."""
         url = reverse("user-access")
@@ -223,7 +361,19 @@ def test_gcp_view_project_wildcard(self):
         self.assertTrue({"type": "azure", "access": False} in response.data.get("data"))
         self.assertTrue({"type": "cost_model", "access": False} in response.data.get("data"))
 
-    @RbacPermissions({"azure.subscription_guid": {"read": ["*"]}})
+    @RbacPermissions(
+        {
+            "aws.account": {"read": []},
+            "aws.organizational_unit": {"read": []},
+            "gcp.account": {"read": []},
+            "gcp.project": {"read": []},
+            "azure.subscription_guid": {"read": ["*"]},
+            "openshift.cluster": {"read": []},
+            "openshift.node": {"read": []},
+            "openshift.project": {"read": []},
+            "cost_model": {"read": [], "write": []},
+        }
+    )
     def test_azure_view_read(self):
         """Test user-access view with azure subscription read wildcard permission."""
         url = reverse("user-access")
@@ -236,7 +386,19 @@ def test_azure_view_read(self):
         self.assertTrue({"type": "azure", "access": True} in response.data.get("data"))
         self.assertTrue({"type": "cost_model", "access": False} in response.data.get("data"))
 
-    @RbacPermissions({"azure.subscription_guid": "*"})
+    @RbacPermissions(
+        {
+            "aws.account": {"read": []},
+            "aws.organizational_unit": {"read": []},
+            "gcp.account": {"read": []},
+            "gcp.project": {"read": []},
+            "azure.subscription_guid": {"read": ["*"]},
+            "openshift.cluster": {"read": []},
+            "openshift.node": {"read": []},
+            "openshift.project": {"read": []},
+            "cost_model": {"read": [], "write": []},
+        }
+    )
     def test_azure_view_wildcard(self):
         """Test user-access view with azure subscription wildcard permission."""
         url = reverse("user-access")
@@ -261,19 +423,6 @@ def test_view_as_org_admin(self):
         self.assertTrue({"type": "azure", "access": True} in response.data.get("data"))
         self.assertTrue({"type": "cost_model", "access": True} in response.data.get("data"))
 
-    @RbacPermissions({"*": "*"})
-    def test_view_as_cost_admin(self):
-        """Test user-access view as a cost admin."""
-        url = reverse("user-access")
-        response = self.client.get(url, **self.headers)
-
-        self.assertEqual(len(response.data.get("data")), 5)
-        self.assertTrue({"type": "aws", "access": True} in response.data.get("data"))
-        self.assertTrue({"type": "ocp", "access": True} in response.data.get("data"))
-        self.assertTrue({"type": "gcp", "access": True} in response.data.get("data"))
-        self.assertTrue({"type": "azure", "access": True} in response.data.get("data"))
-        self.assertTrue({"type": "cost_model", "access": True} in response.data.get("data"))
-
     def test_aws_view_query_read_org_admin(self):
         """Test user-access view query as an org admin."""
         url = reverse("user-access")
@@ -282,7 +431,19 @@ def test_aws_view_query_read_org_admin(self):
 
         self.assertTrue(response.data.get("data"))
 
-    @RbacPermissions({"aws.account": "*"})
+    @RbacPermissions(
+        {
+            "aws.account": {"read": ["*"]},
+            "aws.organizational_unit": {"read": []},
+            "gcp.account": {"read": []},
+            "gcp.project": {"read": []},
+            "azure.subscription_guid": {"read": []},
+            "openshift.cluster": {"read": []},
+            "openshift.node": {"read": []},
+            "openshift.project": {"read": []},
+            "cost_model": {"read": [], "write": []},
+        }
+    )
     def test_aws_view_query_read(self):
         """Test user-access view query for aws."""
         url = reverse("user-access")
@@ -291,7 +452,19 @@ def test_aws_view_query_read(self):
 
         self.assertTrue(response.data.get("data"))
 
-    @RbacPermissions({"openshift.cluster": "*"})
+    @RbacPermissions(
+        {
+            "aws.account": {"read": []},
+            "aws.organizational_unit": {"read": []},
+            "gcp.account": {"read": []},
+            "gcp.project": {"read": []},
+            "azure.subscription_guid": {"read": []},
+            "openshift.cluster": {"read": ["*"]},
+            "openshift.node": {"read": []},
+            "openshift.project": {"read": []},
+            "cost_model": {"read": [], "write": []},
+        }
+    )
     def test_openshift_view_query_read_for_aws(self):
         """Test user-access view query for aws with openshift permissions."""
         url = reverse("user-access")
@@ -300,7 +473,19 @@ def test_openshift_view_query_read_for_aws(self):
 
         self.assertFalse(response.data.get("data"))
 
-    @RbacPermissions({"cost_model": "*"})
+    @RbacPermissions(
+        {
+            "aws.account": {"read": []},
+            "aws.organizational_unit": {"read": []},
+            "gcp.account": {"read": []},
+            "gcp.project": {"read": []},
+            "azure.subscription_guid": {"read": []},
+            "openshift.cluster": {"read": ["*"]},
+            "openshift.node": {"read": []},
+            "openshift.project": {"read": []},
+            "cost_model": {"read": ["*"], "write": []},
+        }
+    )
     def test_cost_model_view_query_read_for_aws(self):
         """Test user-access view query for cost_model."""
         url = reverse("user-access")
@@ -309,7 +494,19 @@ def test_cost_model_view_query_read_for_aws(self):
 
         self.assertTrue(response.data.get("data"))
 
-    @RbacPermissions({"cost_model": {"write": ["*"]}})
+    @RbacPermissions(
+        {
+            "aws.account": {"read": []},
+            "aws.organizational_unit": {"read": []},
+            "gcp.account": {"read": []},
+            "gcp.project": {"read": []},
+            "azure.subscription_guid": {"read": []},
+            "openshift.cluster": {"read": ["*"]},
+            "openshift.node": {"read": []},
+            "openshift.project": {"read": []},
+            "cost_model": {"read": [], "write": ["*"]},
+        }
+    )
     def test_cost_model_view_query_write_for_aws(self):
         """Test user-access view query for cost_model with write access."""
         url = reverse("user-access")
diff --git a/koku/api/user_access/view.py b/koku/api/user_access/view.py
index ce87d51d43..c6a9b84b18 100644
--- a/koku/api/user_access/view.py
+++ b/koku/api/user_access/view.py
@@ -31,7 +31,7 @@
 
 class UserAccess:
     def check_access(self, access_list):
-        if access_list:
+        if access_list.get("read") or access_list.get("write"):
             return True
         return False
 
@@ -98,17 +98,6 @@ def access(self):
         return False
 
 
-class CostManagementAllAccess(UserAccess):
-    def __init__(self, access):
-        self.all_access = access.get("*")
-
-    @property
-    def access(self):
-        if self.check_access(self.all_access):
-            return True
-        return False
-
-
 class UserAccessView(APIView):
     """API GET view for User API."""
 
@@ -118,9 +107,9 @@ class UserAccessView(APIView):
     def get(self, request, **kwargs):
         query_params = request.query_params
         user_access = request.user.access
-        LOG.info(f"User Access RBAC permissions: {str(user_access)}. Org Admin: {str(request.user.admin)}")
-        admin_user = request.user.admin or CostManagementAllAccess(user_access).access
-        LOG.info(f"User Access admin user: {str(admin_user)}")
+        LOG.debug(f"User Access RBAC permissions: {str(user_access)}. Org Admin: {str(request.user.admin)}")
+        admin_user = request.user.admin
+        LOG.debug(f"User Access admin user: {str(admin_user)}")
 
         source_types = [
             {"type": "aws", "access_class": AWSUserAccess},

From bd2198ef4a2f814a1958387a8f6c33d09e1f6812 Mon Sep 17 00:00:00 2001
From: Andrew Berglund <aberglun@redhat.com>
Date: Fri, 22 Jan 2021 08:47:21 -0500
Subject: [PATCH 17/17] COST-881 Use contains instead of in for arrays (#2604)

* Use contains instead of in for Array fields when creating filters in query handler
---
 koku/api/query_handler.py                     | 16 ++++++++++++
 koku/api/report/aws/query_handler.py          | 25 ++++++++++---------
 .../test/all/openshift/test_query_handler.py  | 18 +++++++++++++
 3 files changed, 47 insertions(+), 12 deletions(-)

diff --git a/koku/api/query_handler.py b/koku/api/query_handler.py
index 71cc630783..163f881744 100644
--- a/koku/api/query_handler.py
+++ b/koku/api/query_handler.py
@@ -19,6 +19,7 @@
 import logging
 
 from dateutil import relativedelta
+from django.core.exceptions import FieldDoesNotExist
 from django.db.models.functions import TruncDay
 from django.db.models.functions import TruncMonth
 
@@ -344,5 +345,20 @@ def set_access_filters(self, access, filt, filters):
                 filters.add(q_filter)
         else:
             filt["operation"] = "in"
+            try:
+                check_field_type = None
+                if hasattr(self, "query_table"):
+                    # Reports APIs
+                    check_field_type = self.query_table._meta.get_field(filt.get("field", "")).get_internal_type()
+                elif hasattr(self, "data_sources"):
+                    # Tags APIs
+                    check_field_type = (
+                        self.data_sources[0].get("db_table")._meta.get_field(filt.get("field", "")).get_internal_type()
+                    )
+                if check_field_type == "ArrayField":
+                    filt["operation"] = "contains"
+            except FieldDoesNotExist:
+                pass
+
             q_filter = QueryFilter(parameter=access, **filt)
             filters.add(q_filter)
diff --git a/koku/api/report/aws/query_handler.py b/koku/api/report/aws/query_handler.py
index da74ca1478..9c9e657e70 100644
--- a/koku/api/report/aws/query_handler.py
+++ b/koku/api/report/aws/query_handler.py
@@ -543,19 +543,20 @@ def set_access_filters(self, access, filt, filters):
         # structure of the tree. Therefore, as long as the user has access to the root nodes
         # passed in by group_by[org_unit_id] then the user automatically has access to all
         # the sub orgs.
-        if access and "*" not in access:
-            allowed_ous = (
-                AWSOrganizationalUnit.objects.filter(
-                    reduce(operator.or_, (Q(org_unit_path__icontains=rbac) for rbac in access))
+        with tenant_context(self.tenant):
+            if access and "*" not in access:
+                allowed_ous = (
+                    AWSOrganizationalUnit.objects.filter(
+                        reduce(operator.or_, (Q(org_unit_path__icontains=rbac) for rbac in access))
+                    )
+                    .filter(account_alias__isnull=True)
+                    .order_by("org_unit_id", "-created_timestamp")
+                    .distinct("org_unit_id")
                 )
-                .filter(account_alias__isnull=True)
-                .order_by("org_unit_id", "-created_timestamp")
-                .distinct("org_unit_id")
-            )
-            if allowed_ous:
-                access = list(allowed_ous.values_list("org_unit_id", flat=True))
-        if not isinstance(filt, list) and filt["field"] == "organizational_unit__org_unit_path":
-            filt["field"] = "organizational_unit__org_unit_id"
+                if allowed_ous:
+                    access = list(allowed_ous.values_list("org_unit_id", flat=True))
+            if not isinstance(filt, list) and filt["field"] == "organizational_unit__org_unit_path":
+                filt["field"] = "organizational_unit__org_unit_id"
         super().set_access_filters(access, filt, filters)
 
     def total_sum(self, sum1, sum2):  # noqa: C901
diff --git a/koku/api/report/test/all/openshift/test_query_handler.py b/koku/api/report/test/all/openshift/test_query_handler.py
index 3eb4cff285..c5197ed994 100644
--- a/koku/api/report/test/all/openshift/test_query_handler.py
+++ b/koku/api/report/test/all/openshift/test_query_handler.py
@@ -18,6 +18,9 @@
 from tenant_schemas.utils import tenant_context
 
 from api.iam.test.iam_test_case import IamTestCase
+from api.iam.test.iam_test_case import RbacPermissions
+from api.query_filter import QueryFilter
+from api.query_filter import QueryFilterCollection
 from api.report.all.openshift.query_handler import OCPAllReportQueryHandler
 from api.urls import OCPAllCostView
 from api.urls import OCPAllInstanceTypeView
@@ -187,3 +190,18 @@ def test_query_table(self):
                 query_params = self.mocked_query_params(url, view)
                 handler = OCPAllReportQueryHandler(query_params)
                 self.assertEqual(handler.query_table, table)
+
+    @RbacPermissions({"openshift.project": {"read": ["analytics"]}})
+    def test_set_access_filters_with_array_field(self):
+        """Test that a filter is correctly set for arrays."""
+
+        query_params = self.mocked_query_params("?filter[project]=analytics", OCPAllCostView)
+        # the mocked query parameters dont include the key from the url so it needs to be added
+        handler = OCPAllReportQueryHandler(query_params)
+        field = "namespace"
+        access = ["analytics"]
+        filt = {"field": field}
+        filters = QueryFilterCollection()
+        handler.set_access_filters(access, filt, filters)
+        expected = [QueryFilter(field=field, operation="contains", parameter=access)]
+        self.assertEqual(filters._filters, expected)