From b278a0465f4278c8010764cd439ef58da41a803c Mon Sep 17 00:00:00 2001
From: Kenneth Lakin <klakin@vmware.com>
Date: Mon, 11 Apr 2022 23:51:53 +0000
Subject: [PATCH] Creates metrics endpoint for use by prom-scraper

Notable changes in this commit:
* Adds an `/internal/v4/metrics` endpoint which serves Prometheus-style
metrics.
* Adds function to Prometheus::Client::Registry to re-create the
library's metrics registry. This is used by the PrometheusClient class's
tests to wipe out the metrics registry, which is global state. Global
state is the #1 cause of test pollution and this change seemed to be the
most straightforward way to eliminate some order-dependent test failures.
NOTE: This function definition is only in the
`prometheus_updater_spec.rb` file, which we believe is not shipped in
capi-release, so the odds of actually writing production code that
depends on this test-only functionality are pretty slim.

Co-authored-by: David Alvarado <alvaradoda@vmware.com>
Co-authored-by: Kenneth Lakin <klakin@vmware.com>
Co-authored-by: Merric de Launey <mdelauney@pivotal.io>
Co-authored-by: Michael Oleske <moleske@pivotal.io>
---
 Gemfile                                       |   1 +
 Gemfile.lock                                  |   4 +-
 .../internal/metrics_controller.rb            |  25 ++
 .../internal/staging_completion_controller.rb |   6 +
 app/jobs/diego/sync.rb                        |  17 +-
 lib/cloud_controller/dependency_locator.rb    |   8 +
 .../deployment_updater/scheduler.rb           |  18 +-
 .../metrics/periodic_updater.rb               |  10 +-
 .../metrics/prometheus_updater.rb             | 143 +++++++
 .../metrics/request_metrics.rb                |  15 +-
 .../metrics/statsd_updater.rb                 |   2 +-
 spec/request/internal/metrics_spec.rb         | 142 +++++++
 .../internal/metric_controller_spec.rb        |  34 ++
 .../staging_completion_controller_spec.rb     |  12 +
 spec/unit/jobs/diego/sync_spec.rb             |  19 +-
 .../deployment_updater/scheduler_spec.rb      |  20 +-
 .../metrics/periodic_updater_spec.rb          |  26 +-
 .../metrics/prometheus_updater_spec.rb        | 350 ++++++++++++++++++
 .../metrics/request_metrics_spec.rb           |  19 +-
 .../metrics/statsd_updater_spec.rb            |   4 +-
 20 files changed, 820 insertions(+), 55 deletions(-)
 create mode 100644 app/controllers/internal/metrics_controller.rb
 create mode 100644 lib/cloud_controller/metrics/prometheus_updater.rb
 create mode 100644 spec/request/internal/metrics_spec.rb
 create mode 100644 spec/unit/controllers/internal/metric_controller_spec.rb
 create mode 100644 spec/unit/lib/cloud_controller/metrics/prometheus_updater_spec.rb

diff --git a/Gemfile b/Gemfile
index c4b151cae7d..35cef3fa722 100644
--- a/Gemfile
+++ b/Gemfile
@@ -40,6 +40,7 @@ gem 'sequel_pg', require: 'sequel'
 gem 'sinatra', '~> 2.2'
 gem 'sinatra-contrib'
 gem 'statsd-ruby', '~> 1.4.0'
+gem 'prometheus-client'
 gem 'steno'
 gem 'talentbox-delayed_job_sequel', '~> 4.3.0'
 gem 'thin'
diff --git a/Gemfile.lock b/Gemfile.lock
index f4f5cacff6a..b1547eed23c 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -352,6 +352,7 @@ GEM
       ast (~> 2.4.1)
     pg (1.3.5)
     posix-spawn (0.3.15)
+    prometheus-client (3.0.0)
     protobuf (3.6.12)
       activesupport (>= 3.2)
       middleware
@@ -600,6 +601,7 @@ DEPENDENCIES
   parallel_tests
   pg
   posix-spawn (~> 0.3.15)
+  prometheus-client
   protobuf (= 3.6.12)
   pry-byebug
   psych (>= 4.0.4)
@@ -640,4 +642,4 @@ DEPENDENCIES
   yajl-ruby
 
 BUNDLED WITH
-   2.1.4
+   2.2.26
diff --git a/app/controllers/internal/metrics_controller.rb b/app/controllers/internal/metrics_controller.rb
new file mode 100644
index 00000000000..e65e3d31d1b
--- /dev/null
+++ b/app/controllers/internal/metrics_controller.rb
@@ -0,0 +1,25 @@
+require 'prometheus/client'
+require 'prometheus/client/formats/text'
+require 'cloud_controller/metrics/prometheus_updater'
+
+module VCAP::CloudController
+  module Internal
+    class MetricsController < RestController::BaseController
+      allow_unauthenticated_access
+      get '/internal/v4/metrics', :index
+
+      def index
+        periodic_updater = VCAP::CloudController::Metrics::PeriodicUpdater.new(
+          Time.now.utc,
+          Steno::Sink::Counter.new,
+          Steno.logger('cc.api'),
+          [
+            VCAP::CloudController::Metrics::StatsdUpdater.new,
+            VCAP::CloudController::Metrics::PrometheusUpdater.new
+          ])
+        periodic_updater.update!
+        [200, Prometheus::Client::Formats::Text.marshal(Prometheus::Client.registry)]
+      end
+    end
+  end
+end
diff --git a/app/controllers/internal/staging_completion_controller.rb b/app/controllers/internal/staging_completion_controller.rb
index fda00f0743e..54ecef89156 100644
--- a/app/controllers/internal/staging_completion_controller.rb
+++ b/app/controllers/internal/staging_completion_controller.rb
@@ -117,8 +117,10 @@ def report_metrics(bbs_staging_response)
       duration = Time.now.utc.to_i * 1e9 - bbs_staging_response[:created_at]
       if bbs_staging_response[:failed]
         statsd_updater.report_staging_failure_metrics(duration)
+        prometheus_updater.report_staging_failure_metrics(duration)
       else
         statsd_updater.report_staging_success_metrics(duration)
+        prometheus_updater.report_staging_success_metrics(duration)
       end
     end
 
@@ -126,6 +128,10 @@ def statsd_updater
       @statsd_updater ||= VCAP::CloudController::Metrics::StatsdUpdater.new
     end
 
+    def prometheus_updater
+      @prometheus_updater ||= VCAP::CloudController::Metrics::PrometheusUpdater.new # this should be using singleton
+    end
+
     attr_reader :stagers
 
     def read_body
diff --git a/app/jobs/diego/sync.rb b/app/jobs/diego/sync.rb
index 0ba5d886549..3889b309bb0 100644
--- a/app/jobs/diego/sync.rb
+++ b/app/jobs/diego/sync.rb
@@ -7,15 +7,28 @@ module VCAP::CloudController
   module Jobs
     module Diego
       class Sync < VCAP::CloudController::Jobs::CCJob
-        def initialize(statsd=Statsd.new)
+        def initialize(statsd=Statsd.new, prometheus_updater=VCAP::CloudController::Metrics::PrometheusUpdater.new)
           @statsd = statsd
+          @prometheus_updater = prometheus_updater
         end
 
         def perform
           config = CloudController::DependencyLocator.instance.config
-          @statsd.time('cc.diego_sync.duration') do
+          begin
+            ## TODO: At some point in the future, start using a monotonic time source, rather than wall-clock time!
+            start = Time.now
             VCAP::CloudController::Diego::ProcessesSync.new(config: config).sync
             VCAP::CloudController::Diego::TasksSync.new(config: config).sync
+          ensure
+            finish = Time.now
+            ## NOTE: We're taking time in seconds and multiplying by 1000 because we don't have
+            ##       access to time in milliseconds. If you ever get access to reliable time in
+            ##       milliseconds, then do know that the lack of precision here is not desired
+            ##       so feed in the entire value!
+            elapsed_ms = ((finish - start) * 1000).round
+
+            @statsd.timing('cc.diego_sync.duration', elapsed_ms)
+            @prometheus_updater.report_diego_cell_sync_duration(elapsed_ms)
           end
         end
 
diff --git a/lib/cloud_controller/dependency_locator.rb b/lib/cloud_controller/dependency_locator.rb
index 5c2ec46258b..db2e77cd4e6 100644
--- a/lib/cloud_controller/dependency_locator.rb
+++ b/lib/cloud_controller/dependency_locator.rb
@@ -34,6 +34,7 @@
 require 'cloud_controller/opi/instances_client'
 require 'cloud_controller/opi/stager_client'
 require 'cloud_controller/opi/task_client'
+require 'cloud_controller/metrics/prometheus_updater'
 
 require 'bits_service_client'
 
@@ -70,6 +71,13 @@ def runners
       @dependencies[:runners] || register(:runners, VCAP::CloudController::Runners.new(config))
     end
 
+    def prometheus_updater
+      unless @dependencies[:prometheus_updater]
+        register(:prometheus_updater, VCAP::CloudController::Metrics::PrometheusUpdater.new)
+      end
+      @dependencies[:prometheus_updater]
+    end
+
     def stagers
       @dependencies[:stagers] || register(:stagers, VCAP::CloudController::Stagers.new(config))
     end
diff --git a/lib/cloud_controller/deployment_updater/scheduler.rb b/lib/cloud_controller/deployment_updater/scheduler.rb
index c0dc27790f0..82cd1a03bba 100644
--- a/lib/cloud_controller/deployment_updater/scheduler.rb
+++ b/lib/cloud_controller/deployment_updater/scheduler.rb
@@ -10,10 +10,12 @@ def start
           with_error_logging('cc.deployment_updater') do
             config = CloudController::DependencyLocator.instance.config
             statsd_client = CloudController::DependencyLocator.instance.statsd_client
+            prometheus_updater = CloudController::DependencyLocator.instance.prometheus_updater
 
             update_step = proc { update(
               update_frequency: config.get(:deployment_updater, :update_frequency_in_seconds),
-              statsd_client: statsd_client
+              statsd_client: statsd_client,
+              prometheus_updater: prometheus_updater
             )
             }
 
@@ -39,14 +41,20 @@ def start
 
         private
 
-        def update(update_frequency:, statsd_client:)
+        def update(update_frequency:, statsd_client:, prometheus_updater:)
           logger = Steno.logger('cc.deployment_updater.scheduler')
 
           update_start_time = Time.now
-          statsd_client.time('cc.deployments.update.duration') do
-            Dispatcher.dispatch
-          end
+          Dispatcher.dispatch
           update_duration = Time.now - update_start_time
+          ## NOTE: We're taking time in seconds and multiplying by 1000 because we don't have
+          ##       access to time in milliseconds. If you ever get access to reliable time in
+          ##       milliseconds, then do know that the lack of precision here is not desired
+          ##       so feed in the entire value!
+          update_duration_ms = update_duration * 1000
+          statsd_client.timing('cc.deployments.update.duration', update_duration_ms)
+          prometheus_updater.report_deployment_duration(update_duration_ms)
+
           logger.info("Update loop took #{update_duration}s")
 
           sleep_duration = update_frequency - update_duration
diff --git a/lib/cloud_controller/metrics/periodic_updater.rb b/lib/cloud_controller/metrics/periodic_updater.rb
index 20a04238464..2e5e77d9532 100644
--- a/lib/cloud_controller/metrics/periodic_updater.rb
+++ b/lib/cloud_controller/metrics/periodic_updater.rb
@@ -3,7 +3,7 @@
 
 module VCAP::CloudController::Metrics
   class PeriodicUpdater
-    def initialize(start_time, log_counter, logger=Steno.logger, updaters=[StatsdUpdater.new])
+    def initialize(start_time, log_counter, logger=Steno.logger, updaters=[StatsdUpdater.new, PrometheusUpdater.new])
       @start_time = start_time
       @updaters = updaters
       @log_counter = log_counter
@@ -15,7 +15,7 @@ def initialize(start_time, log_counter, logger=Steno.logger, updaters=[StatsdUpd
 
     def setup_updates
       update!
-      EM.add_periodic_timer(600) { catch_error { record_user_count } }
+      EM.add_periodic_timer(600) { catch_error { update_user_count } }
       EM.add_periodic_timer(30)  { catch_error { update_job_queue_length } }
       EM.add_periodic_timer(30)  { catch_error { update_thread_info } }
       EM.add_periodic_timer(30)  { catch_error { update_failed_job_count } }
@@ -26,7 +26,7 @@ def setup_updates
     end
 
     def update!
-      record_user_count
+      update_user_count
       update_job_queue_length
       update_thread_info
       update_failed_job_count
@@ -67,10 +67,10 @@ def update_deploying_count
       @updaters.each { |u| u.update_deploying_count(deploying_count) }
     end
 
-    def record_user_count
+    def update_user_count
       user_count = VCAP::CloudController::User.count
 
-      @updaters.each { |u| u.record_user_count(user_count) }
+      @updaters.each { |u| u.update_user_count(user_count) }
     end
 
     def update_job_queue_length
diff --git a/lib/cloud_controller/metrics/prometheus_updater.rb b/lib/cloud_controller/metrics/prometheus_updater.rb
new file mode 100644
index 00000000000..e3c07a4ed53
--- /dev/null
+++ b/lib/cloud_controller/metrics/prometheus_updater.rb
@@ -0,0 +1,143 @@
+require 'prometheus/client'
+
+module VCAP::CloudController::Metrics
+  class PrometheusUpdater
+    def initialize(registry=Prometheus::Client.registry)
+      @registry = registry
+    end
+
+    def update_gauge_metric(metric, value, message)
+      unless @registry.exist?(metric)
+        @registry.gauge(metric, docstring: message)
+      end
+      @registry.get(metric).set(value)
+    end
+
+    def increment_gauge_metric(metric, message)
+      unless @registry.exist?(metric)
+        @registry.gauge(metric, docstring: message)
+      end
+      @registry.get(metric).increment
+    end
+
+    def decrement_gauge_metric(metric, message)
+      unless @registry.exist?(metric)
+        @registry.gauge(metric, docstring: message)
+      end
+      @registry.get(metric).decrement
+    end
+
+    def increment_counter_metric(metric, message)
+      unless @registry.exist?(metric)
+        @registry.counter(metric, docstring: message)
+      end
+      @registry.get(metric).increment
+    end
+
+    def update_histogram_metric(metric, value, message, buckets)
+      unless @registry.exist?(metric)
+        @registry.histogram(metric, buckets: buckets, docstring: message)
+      end
+      @registry.get(metric).observe(value)
+    end
+
+    def update_summary_metric(metric, value, message)
+      unless @registry.exist?(metric)
+        @registry.summary(metric, docstring: message)
+      end
+      @registry.get(metric).observe(value)
+    end
+
+    def update_deploying_count(deploying_count)
+      update_gauge_metric(:cc_deployments_deploying, deploying_count, 'Number of in progress deployments')
+    end
+
+    def update_user_count(user_count)
+      update_gauge_metric(:cc_total_users, user_count, 'Number of users')
+    end
+
+    def update_job_queue_length(pending_job_count_by_queue, total)
+      pending_job_count_by_queue.each do |key, value|
+        metric_key = :"cc_job_queue_length_#{key.to_s.underscore}"
+        update_gauge_metric(metric_key, value, docstring: "Job queue length for worker #{key}")
+      end
+
+      update_gauge_metric(:cc_job_queue_length_total, total, 'Total job queue length')
+    end
+
+    def update_thread_info(thread_info)
+      update_gauge_metric(:cc_thread_info_thread_count, thread_info[:thread_count], 'Thread count')
+      update_gauge_metric(:cc_thread_info_event_machine_connection_count, thread_info[:event_machine][:connection_count], 'Event Machine connection count')
+      update_gauge_metric(:cc_thread_info_event_machine_threadqueue_size, thread_info[:event_machine][:threadqueue][:size], 'EventMachine thread queue size')
+      update_gauge_metric(:cc_thread_info_event_machine_threadqueue_num_waiting, thread_info[:event_machine][:threadqueue][:num_waiting], 'EventMachine num waiting in thread')
+      update_gauge_metric(:cc_thread_info_event_machine_resultqueue_size, thread_info[:event_machine][:resultqueue][:size], 'EventMachine queue size')
+      update_gauge_metric(:cc_thread_info_event_machine_resultqueue_num_waiting, thread_info[:event_machine][:resultqueue][:num_waiting], 'EventMachine requests waiting in queue')
+    end
+
+    def update_failed_job_count(failed_jobs_by_queue, total)
+      failed_jobs_by_queue.each do |key, value|
+        metric_key = :"cc_failed_job_count_#{key.to_s.underscore}"
+        update_gauge_metric(metric_key, value, "Failed jobs for worker #{key}")
+      end
+
+      update_gauge_metric(:cc_failed_job_count_total, total, 'Total failed jobs')
+    end
+
+    def update_vitals(vitals)
+      vitals.each do |key, value|
+        metric_key = :"cc_vitals_#{key.to_s.underscore}"
+        update_gauge_metric(metric_key, value, "CloudController Vitals: #{key}")
+      end
+    end
+
+    def update_log_counts(counts)
+      counts.each do |key, value|
+        metric_key = :"cc_log_count_#{key.to_s.underscore}"
+        update_gauge_metric(metric_key, value, "Log count for log level '#{key}'")
+      end
+    end
+
+    def update_task_stats(total_running_tasks, total_memory_in_mb)
+      update_gauge_metric(:cc_tasks_running_count, total_running_tasks, 'Total running tasks')
+      update_gauge_metric(:cc_tasks_running_memory_in_mb, total_memory_in_mb, 'Total memory consumed by running tasks')
+    end
+
+    def update_synced_invalid_lrps(lrp_count)
+      update_gauge_metric(:cc_diego_sync_invalid_desired_lrps, lrp_count, 'Invalid Desired LRPs')
+    end
+
+    def start_staging_request_received
+      increment_counter_metric(:cc_staging_requested, 'Number of staging requests')
+    end
+
+    def report_staging_success_metrics(duration_ns)
+      increment_counter_metric(:cc_staging_succeeded, 'Number of successful staging events')
+      update_histogram_metric(:cc_staging_succeeded_duration, nanoseconds_to_milliseconds(duration_ns), 'Durations of successful staging events', duration_buckets)
+    end
+
+    def report_staging_failure_metrics(duration_ns)
+      increment_counter_metric(:cc_staging_failed, 'Number of failed staging events')
+      update_histogram_metric(:cc_staging_failed_duration, nanoseconds_to_milliseconds(duration_ns), 'Durations of failed staging events', duration_buckets)
+    end
+
+    def report_diego_cell_sync_duration(duration_ms)
+      update_summary_metric(:cc_diego_sync_duration, duration_ms, 'Diego cell sync duration')
+      update_gauge_metric(:cc_diego_sync_duration_gauge, duration_ms, 'Diego cell sync duration (gauge metric)')
+    end
+
+    def report_deployment_duration(duration_ms)
+      update_summary_metric(:cc_deployments_update_duration, duration_ms, 'Deployment duration')
+      update_gauge_metric(:cc_deployments_update_duration_gauge, duration_ms, 'Deployment duration (gauge metric)')
+    end
+
+    private
+
+    def duration_buckets
+      Prometheus::Client::Histogram.linear_buckets(start: 10000, width: 5000, count: 5)
+    end
+
+    def nanoseconds_to_milliseconds(time_ns)
+      (time_ns / 1e6).to_i
+    end
+  end
+end
diff --git a/lib/cloud_controller/metrics/request_metrics.rb b/lib/cloud_controller/metrics/request_metrics.rb
index 6847b71733d..83a0122112e 100644
--- a/lib/cloud_controller/metrics/request_metrics.rb
+++ b/lib/cloud_controller/metrics/request_metrics.rb
@@ -3,25 +3,36 @@
 module VCAP::CloudController
   module Metrics
     class RequestMetrics
-      def initialize(statsd=Statsd.new)
+      def initialize(statsd=Statsd.new, prometheus_updater=PrometheusUpdater.new)
         @counter = 0
         @statsd = statsd
+        @prometheus_updater = prometheus_updater
       end
 
       def start_request
         @counter += 1
         @statsd.gauge('cc.requests.outstanding.gauge', @counter)
         @statsd.increment 'cc.requests.outstanding'
+
+        @prometheus_updater.update_gauge_metric(:cc_requests_outstanding_gauge, @counter, 'Requests Outstanding Gauge')
+        @prometheus_updater.increment_gauge_metric(:cc_requests_outstanding, 'Requests Outstanding')
       end
 
       def complete_request(status)
+        http_status_code = "#{status.to_s[0]}XX"
+        http_status_metric = "cc.http_status.#{http_status_code}"
         @counter -= 1
         @statsd.gauge('cc.requests.outstanding.gauge', @counter)
         @statsd.batch do |batch|
           batch.decrement 'cc.requests.outstanding'
           batch.increment 'cc.requests.completed'
-          batch.increment "cc.http_status.#{status.to_s[0]}XX"
+          batch.increment http_status_metric
         end
+
+        @prometheus_updater.update_gauge_metric(:cc_requests_outstanding_gauge, @counter, 'Requests Outstanding Gauge')
+        @prometheus_updater.decrement_gauge_metric(:cc_requests_outstanding, 'Requests Outstanding')
+        @prometheus_updater.increment_gauge_metric(:cc_requests_completed, 'Requests Completed')
+        @prometheus_updater.increment_gauge_metric(http_status_metric.gsub('.', '_').to_sym, "Times HTTP status #{http_status_code} have been received")
       end
     end
   end
diff --git a/lib/cloud_controller/metrics/statsd_updater.rb b/lib/cloud_controller/metrics/statsd_updater.rb
index f45b716b0c4..49ce3d7aacc 100644
--- a/lib/cloud_controller/metrics/statsd_updater.rb
+++ b/lib/cloud_controller/metrics/statsd_updater.rb
@@ -10,7 +10,7 @@ def update_deploying_count(deploying_count)
       @statsd.gauge('cc.deployments.deploying', deploying_count)
     end
 
-    def record_user_count(user_count)
+    def update_user_count(user_count)
       @statsd.gauge('cc.total_users', user_count)
     end
 
diff --git a/spec/request/internal/metrics_spec.rb b/spec/request/internal/metrics_spec.rb
new file mode 100644
index 00000000000..6e78d91ce05
--- /dev/null
+++ b/spec/request/internal/metrics_spec.rb
@@ -0,0 +1,142 @@
+require 'spec_helper'
+
+RSpec.describe 'Metrics' do
+  let(:user) { VCAP::CloudController::User.make }
+  let(:user_header) { headers_for(user) }
+  let(:threadqueue) { double(EventMachine::Queue, size: 20, num_waiting: 0) }
+  let(:resultqueue) { double(EventMachine::Queue, size: 0, num_waiting: 1) }
+
+  before do
+    # locator = CloudController::DependencyLocator.instance
+    # allow(locator).to receive(:prometheus_updater).and_return(VCAP::CloudController::Metrics::PrometheusUpdater.new(Prometheus::Client::Registry.new))
+    allow(EventMachine).to receive(:connection_count).and_return(123)
+
+    allow(EventMachine).to receive(:instance_variable_get) do |instance_var|
+      case instance_var
+      when :@threadqueue
+        threadqueue
+      when :@resultqueue
+        resultqueue
+      else
+        raise "Unexpected call: #{instance_var}"
+      end
+    end
+  end
+
+  it 'succeeds' do
+    get '/internal/v4/metrics', nil
+
+    expect(last_response.status).to eq 200
+    get '/internal/v4/metrics', nil
+
+    expect(last_response.status).to eq 200
+    get '/internal/v4/metrics', nil
+
+    expect(last_response.status).to eq 200
+  end
+
+  context 'cc_total_users' do
+    before do
+      cc_total_users = Prometheus::Client.registry.get(:cc_total_users)
+      if !cc_total_users.nil?
+        cc_total_users.set(0)
+      end
+
+      # Prometheus::Client::Config::data_store = nil
+      10.times do
+        VCAP::CloudController::User.make
+      end
+    end
+
+    it 'reports the total number of users' do
+      get '/internal/v4/metrics', nil
+
+      expect(last_response.status).to eq 200
+
+      expect(last_response.body).to include('cc_total_users 10.0')
+    end
+  end
+
+  context 'cc_vitals' do
+    it 'reports vitals' do
+      get '/internal/v4/metrics', nil
+
+      expect(last_response.body).to match(/cc_vitals_num_cores [1-9][0-9]*\.\d+/)
+      expect(last_response.body).to match(/cc_vitals_uptime [0-9][0-9]*\.\d+/)
+      expect(last_response.body).to match(/cc_vitals_cpu [1-9][0-9]*\.\d+/)
+      expect(last_response.body).to match(/cc_vitals_mem_bytes [1-9][0-9]*\.\d+/)
+      expect(last_response.body).to match(/cc_vitals_cpu_load_avg [0-9][0-9]*\.\d+/)
+      expect(last_response.body).to match(/cc_vitals_mem_used_bytes [1-9][0-9]*\.\d+/)
+      expect(last_response.body).to match(/cc_vitals_mem_free_bytes [1-9][0-9]*\.\d+/)
+    end
+  end
+
+  context 'cc_job_queue_length' do
+    # NOTE: Because there is no easy way to enqueue a job that will
+    #       stick around for long enough to appear in the metrics,
+    #       we're only testing that the metric is emitted in the output.
+    #       If you can figure out how to actually get a job enqueued that
+    #       will show up in the "job queue length" metric, please do update
+    #       the test!
+    it 'includes job queue length metric in output' do
+      get '/internal/v4/metrics', nil
+
+      expect(last_response.body).to match(/cc_job_queue_length_cc_api_0 [0-9][0-9]*\.\d+/)
+      expect(last_response.body).to match(/cc_job_queue_length_total [0-9][0-9]*\.\d+/)
+    end
+  end
+
+  context 'cc_thread_info' do
+    it 'reports thread info' do
+      get '/internal/v4/metrics', nil
+
+      expect(last_response.body).to match(/cc_thread_info_thread_count [0-9][0-9]*\.\d+/)
+      expect(last_response.body).to match(/cc_thread_info_event_machine_connection_count [0-9][0-9]*\.\d+/)
+      expect(last_response.body).to match(/cc_thread_info_event_machine_threadqueue_size [0-9][0-9]*\.\d+/)
+      expect(last_response.body).to match(/cc_thread_info_event_machine_threadqueue_num_waiting [0-9][0-9]*\.\d+/)
+      expect(last_response.body).to match(/cc_thread_info_event_machine_resultqueue_size [0-9][0-9]*\.\d+/)
+      expect(last_response.body).to match(/cc_thread_info_event_machine_resultqueue_num_waiting [0-9][0-9]*\.\d+/)
+    end
+  end
+
+  context 'cc_failed_job_count' do
+    it 'reports failed job count' do
+      get '/internal/v4/metrics', nil
+
+      expect(last_response.body).to match(/cc_failed_job_count_cc_api_0 [0-9][0-9]*\.\d+/)
+      expect(last_response.body).to match(/cc_failed_job_count_total [0-9][0-9]*\.\d+/)
+    end
+  end
+
+  context 'cc_log_count' do
+    it 'reports log counts' do
+      get '/internal/v4/metrics', nil
+
+      expect(last_response.body).to match(/cc_log_count_off [0-9][0-9]*\.\d+/)
+      expect(last_response.body).to match(/cc_log_count_fatal [0-9][0-9]*\.\d+/)
+      expect(last_response.body).to match(/cc_log_count_error [0-9][0-9]*\.\d+/)
+      expect(last_response.body).to match(/cc_log_count_warn [0-9][0-9]*\.\d+/)
+      expect(last_response.body).to match(/cc_log_count_info [0-9][0-9]*\.\d+/)
+      expect(last_response.body).to match(/cc_log_count_debug1 [0-9][0-9]*\.\d+/)
+      expect(last_response.body).to match(/cc_log_count_debug2 [0-9][0-9]*\.\d+/)
+      expect(last_response.body).to match(/cc_log_count_all [0-9][0-9]*\.\d+/)
+    end
+  end
+
+  context 'cc_task_stats' do
+    it 'reports task stats' do
+      get '/internal/v4/metrics', nil
+
+      expect(last_response.body).to match(/cc_tasks_running_count [0-9][0-9]*\.\d+/)
+      expect(last_response.body).to match(/cc_tasks_running_memory_in_mb [0-9][0-9]*\.\d+/)
+    end
+  end
+
+  context 'cc_deploying_count' do
+    it 'reports deploying_count' do
+      get '/internal/v4/metrics', nil
+
+      expect(last_response.body).to match(/cc_deployments_deploying [0-9][0-9]*\.\d+/)
+    end
+  end
+end
diff --git a/spec/unit/controllers/internal/metric_controller_spec.rb b/spec/unit/controllers/internal/metric_controller_spec.rb
new file mode 100644
index 00000000000..b31e4db1ddc
--- /dev/null
+++ b/spec/unit/controllers/internal/metric_controller_spec.rb
@@ -0,0 +1,34 @@
+require 'spec_helper'
+
+module VCAP::CloudController
+  module Internal
+    RSpec.describe MetricsController do
+      let(:threadqueue) { double(EventMachine::Queue, size: 20, num_waiting: 0) }
+      let(:resultqueue) { double(EventMachine::Queue, size: 0, num_waiting: 1) }
+
+      before do
+        allow(EventMachine).to receive(:connection_count).and_return(123)
+
+        allow(EventMachine).to receive(:instance_variable_get) do |instance_var|
+          case instance_var
+          when :@threadqueue
+            threadqueue
+          when :@resultqueue
+            resultqueue
+          else
+            raise "Unexpected call: #{instance_var}"
+          end
+        end
+      end
+
+      describe '#index' do
+        it 'returns a 200' do
+          get '/internal/v4/metrics'
+
+          expect(last_response.status).to eq 200
+          expect(last_response.body).to match(/cc_vitals_num_cores [1-9][0-9]*.\d+/)
+        end
+      end
+    end
+  end
+end
diff --git a/spec/unit/controllers/internal/staging_completion_controller_spec.rb b/spec/unit/controllers/internal/staging_completion_controller_spec.rb
index c0713b921ac..608df70341d 100644
--- a/spec/unit/controllers/internal/staging_completion_controller_spec.rb
+++ b/spec/unit/controllers/internal/staging_completion_controller_spec.rb
@@ -1,6 +1,7 @@
 require 'spec_helper'
 require 'membrane'
 require 'cloud_controller/diego/failure_reason_sanitizer'
+require 'cloud_controller/metrics/prometheus_updater'
 
 ## NOTICE: Prefer request specs over controller specs as per ADR #0003 ##
 
@@ -37,6 +38,12 @@ module VCAP::CloudController
         report_staging_failure_metrics:    nil,
       )
     end
+    let(:prometheus_updater) do
+      instance_double(VCAP::CloudController::Metrics::PrometheusUpdater,
+        report_staging_success_metrics: nil,
+        report_staging_failure_metrics:    nil,
+      )
+    end
     let(:one_hour) { 1.hour.to_i }
     let(:one_hour_in_nanoseconds) { (1.hour.to_i * 1e9).to_i }
 
@@ -44,6 +51,7 @@ module VCAP::CloudController
       TestConfig.override(kubernetes: nil)
 
       allow(VCAP::CloudController::Metrics::StatsdUpdater).to receive(:new).and_return(statsd_updater)
+      allow(VCAP::CloudController::Metrics::PrometheusUpdater).to receive(:new).and_return(prometheus_updater)
     end
 
     context 'staging a package through /droplet_completed (legacy for rolling deploy)' do
@@ -143,6 +151,7 @@ module VCAP::CloudController
         it 'emits metrics for staging success' do
           one_hour_in_nanoseconds = (1.hour.to_i * 1e9).to_i
           expect(statsd_updater).to receive(:report_staging_success_metrics).with(one_hour_in_nanoseconds)
+          expect(prometheus_updater).to receive(:report_staging_success_metrics).with(one_hour_in_nanoseconds)
           Timecop.freeze(Time.now) do
             post url, MultiJson.dump(staging_response)
           end
@@ -161,6 +170,7 @@ module VCAP::CloudController
           it 'emits metrics for staging failure' do
             one_hour_in_nanoseconds = (1.hour.to_i * 1e9).to_i
             expect(statsd_updater).to receive(:report_staging_failure_metrics).with(one_hour_in_nanoseconds)
+            expect(prometheus_updater).to receive(:report_staging_failure_metrics).with(one_hour_in_nanoseconds)
             Timecop.freeze(Time.now) do
               post url, MultiJson.dump(staging_response)
             end
@@ -357,6 +367,7 @@ module VCAP::CloudController
         it 'emits metrics for staging success' do
           one_hour_in_nanoseconds = (1.hour.to_i * 1e9).to_i
           expect(statsd_updater).to receive(:report_staging_success_metrics).with(one_hour_in_nanoseconds)
+          expect(prometheus_updater).to receive(:report_staging_success_metrics).with(one_hour_in_nanoseconds)
           Timecop.freeze(Time.now) do
             post url, MultiJson.dump(staging_response)
           end
@@ -392,6 +403,7 @@ module VCAP::CloudController
           it 'emits metrics for staging failure' do
             one_hour_in_nanoseconds = (1.hour.to_i * 1e9).to_i
             expect(statsd_updater).to receive(:report_staging_failure_metrics).with(one_hour_in_nanoseconds)
+            expect(prometheus_updater).to receive(:report_staging_failure_metrics).with(one_hour_in_nanoseconds)
             Timecop.freeze(Time.now) do
               post url, MultiJson.dump(staging_response)
             end
diff --git a/spec/unit/jobs/diego/sync_spec.rb b/spec/unit/jobs/diego/sync_spec.rb
index 63e732f2a90..944c99a739e 100644
--- a/spec/unit/jobs/diego/sync_spec.rb
+++ b/spec/unit/jobs/diego/sync_spec.rb
@@ -27,20 +27,17 @@ module Jobs::Diego
         end
 
         it 'records sync duration' do
-          yielded_block = nil
+          allow(Time).to receive(:now).and_call_original
+          allow_any_instance_of(VCAP::CloudController::Diego::ProcessesSync).to receive(:sync)
+          allow_any_instance_of(VCAP::CloudController::Diego::TasksSync).to receive(:sync)
 
-          allow_any_instance_of(Statsd).to receive(:time) do |_, metric_name, &block|
-            expect(metric_name).to eq 'cc.diego_sync.duration'
-            yielded_block = block
-          end
+          expect(processes_sync).to receive(:sync)
+          expect(tasks_sync).to receive(:sync)
+          expect(Time).to receive(:now).twice # Ensure that we get two time measurements. _Hopefully_ they get turned into an elapsed time and passed in where they need to be!
+          expect_any_instance_of(Statsd).to receive(:timing).with('cc.diego_sync.duration', kind_of(Numeric))
+          expect_any_instance_of(VCAP::CloudController::Metrics::PrometheusUpdater).to receive(:report_diego_cell_sync_duration).with(kind_of(Numeric))
 
           job.perform
-          expect(processes_sync).to_not have_received(:sync)
-          expect(tasks_sync).to_not have_received(:sync)
-
-          yielded_block.call
-          expect(processes_sync).to have_received(:sync)
-          expect(tasks_sync).to have_received(:sync)
         end
       end
     end
diff --git a/spec/unit/lib/cloud_controller/deployment_updater/scheduler_spec.rb b/spec/unit/lib/cloud_controller/deployment_updater/scheduler_spec.rb
index f946916ebcd..10e4e336f97 100644
--- a/spec/unit/lib/cloud_controller/deployment_updater/scheduler_spec.rb
+++ b/spec/unit/lib/cloud_controller/deployment_updater/scheduler_spec.rb
@@ -27,6 +27,7 @@ module VCAP::CloudController
       let(:lock_worker) { instance_double(Locket::LockWorker) }
       let(:logger) { instance_double(Steno::Logger, info: nil, debug: nil, error: nil) }
       let(:statsd_client) { instance_double(Statsd) }
+      let(:prometheus_updater) { instance_double(VCAP::CloudController::Metrics::PrometheusUpdater) }
 
       before do
         allow(Locket::LockRunner).to receive(:new).and_return(lock_runner)
@@ -37,7 +38,9 @@ module VCAP::CloudController
         allow(DeploymentUpdater::Scheduler).to receive(:sleep)
         allow(DeploymentUpdater::Dispatcher).to receive(:dispatch)
         allow(CloudController::DependencyLocator.instance).to receive(:statsd_client).and_return(statsd_client)
-        allow(statsd_client).to receive(:time).and_yield
+        allow(CloudController::DependencyLocator.instance).to receive(:prometheus_updater).and_return(prometheus_updater)
+        allow(statsd_client).to receive(:timing)
+        allow(prometheus_updater).to receive(:report_deployment_duration)
       end
 
       it 'correctly configures a LockRunner and uses it to initialize a LockWorker' do
@@ -125,20 +128,13 @@ module VCAP::CloudController
         end
       end
 
-      describe 'statsd metrics' do
+      describe 'metrics' do
         it 'records the deployment update duration' do
-          timed_block = nil
-
-          allow(statsd_client).to receive(:time) do |_, &block|
-            timed_block = block
-          end
+          expect(DeploymentUpdater::Dispatcher).to receive(:dispatch)
+          expect(statsd_client).to receive(:timing).with('cc.deployments.update.duration', kind_of(Numeric))
+          expect(prometheus_updater).to receive(:report_deployment_duration).with(kind_of(Numeric))
 
           DeploymentUpdater::Scheduler.start
-          expect(statsd_client).to have_received(:time).with('cc.deployments.update.duration')
-
-          expect(DeploymentUpdater::Dispatcher).to_not have_received(:dispatch)
-          timed_block.call
-          expect(DeploymentUpdater::Dispatcher).to have_received(:dispatch)
         end
       end
 
diff --git a/spec/unit/lib/cloud_controller/metrics/periodic_updater_spec.rb b/spec/unit/lib/cloud_controller/metrics/periodic_updater_spec.rb
index e94be504ba9..98051e69180 100644
--- a/spec/unit/lib/cloud_controller/metrics/periodic_updater_spec.rb
+++ b/spec/unit/lib/cloud_controller/metrics/periodic_updater_spec.rb
@@ -70,7 +70,7 @@ module VCAP::CloudController::Metrics
 
     describe '#setup_updates' do
       before do
-        allow(updater1).to receive(:record_user_count)
+        allow(updater1).to receive(:update_user_count)
         allow(updater1).to receive(:update_job_queue_length)
         allow(updater1).to receive(:update_thread_info)
         allow(updater1).to receive(:update_failed_job_count)
@@ -79,7 +79,7 @@ module VCAP::CloudController::Metrics
         allow(updater1).to receive(:update_task_stats)
         allow(updater1).to receive(:update_deploying_count)
 
-        allow(updater2).to receive(:record_user_count)
+        allow(updater2).to receive(:update_user_count)
         allow(updater2).to receive(:update_job_queue_length)
         allow(updater2).to receive(:update_thread_info)
         allow(updater2).to receive(:update_failed_job_count)
@@ -92,7 +92,7 @@ module VCAP::CloudController::Metrics
       end
 
       it 'bumps the number of users and sets periodic timer' do
-        expect(periodic_updater).to receive(:record_user_count).once
+        expect(periodic_updater).to receive(:update_user_count).once
         periodic_updater.setup_updates
       end
 
@@ -147,7 +147,7 @@ module VCAP::CloudController::Metrics
 
         it 'bumps the number of users and sets periodic timer' do
           expect(periodic_updater).to receive(:catch_error).once.and_call_original
-          expect(periodic_updater).to receive(:record_user_count).once
+          expect(periodic_updater).to receive(:update_user_count).once
           expect(@periodic_timers[0][:interval]).to eq(600)
 
           @periodic_timers[0][:block].call
@@ -219,19 +219,19 @@ module VCAP::CloudController::Metrics
       end
     end
 
-    describe '#record_user_count' do
+    describe '#update_user_count' do
       before do
-        allow(updater1).to receive(:record_user_count)
-        allow(updater2).to receive(:record_user_count)
+        allow(updater1).to receive(:update_user_count)
+        allow(updater2).to receive(:update_user_count)
       end
 
       it 'should include the number of users' do
         4.times { VCAP::CloudController::User.create(guid: SecureRandom.uuid) }
 
-        periodic_updater.record_user_count
+        periodic_updater.update_user_count
 
-        expect(updater1).to have_received(:record_user_count).with(VCAP::CloudController::User.count)
-        expect(updater2).to have_received(:record_user_count).with(VCAP::CloudController::User.count)
+        expect(updater1).to have_received(:update_user_count).with(VCAP::CloudController::User.count)
+        expect(updater2).to have_received(:update_user_count).with(VCAP::CloudController::User.count)
       end
     end
 
@@ -593,7 +593,7 @@ module VCAP::CloudController::Metrics
 
     describe '#update!' do
       before do
-        allow(updater1).to receive(:record_user_count)
+        allow(updater1).to receive(:update_user_count)
         allow(updater1).to receive(:update_job_queue_length)
         allow(updater1).to receive(:update_thread_info)
         allow(updater1).to receive(:update_failed_job_count)
@@ -602,7 +602,7 @@ module VCAP::CloudController::Metrics
         allow(updater1).to receive(:update_task_stats)
         allow(updater1).to receive(:update_deploying_count)
 
-        allow(updater2).to receive(:record_user_count)
+        allow(updater2).to receive(:update_user_count)
         allow(updater2).to receive(:update_job_queue_length)
         allow(updater2).to receive(:update_thread_info)
         allow(updater2).to receive(:update_failed_job_count)
@@ -613,7 +613,7 @@ module VCAP::CloudController::Metrics
       end
 
       it 'calls all update methods' do
-        expect(periodic_updater).to receive(:record_user_count).once
+        expect(periodic_updater).to receive(:update_user_count).once
         expect(periodic_updater).to receive(:update_job_queue_length).once
         expect(periodic_updater).to receive(:update_thread_info).once
         expect(periodic_updater).to receive(:update_failed_job_count).once
diff --git a/spec/unit/lib/cloud_controller/metrics/prometheus_updater_spec.rb b/spec/unit/lib/cloud_controller/metrics/prometheus_updater_spec.rb
new file mode 100644
index 00000000000..4192d9c8ea4
--- /dev/null
+++ b/spec/unit/lib/cloud_controller/metrics/prometheus_updater_spec.rb
@@ -0,0 +1,350 @@
+require 'spec_helper'
+require 'cloud_controller/metrics/prometheus_updater'
+
+# There are a few things to NOTE here:
+# 1) We're adding this function because the Prometheus Client Registry
+#    is interpreter-global state, so changes that happen in one test will
+#    absolutely affect other tests, unless the test author has taken pains
+#    to carefully clean up their changes.
+# 2) In our CI, tests are run in parallel to speed up execution. At the time
+#    of this writing (2022-05-03), we use the `grosser/parallel_tests` gem to
+#    run tests in parallel. This gem uses the `grosser/parallel` gem to distribute
+#    work which runs workloads in separate subprocesses, rather than threads
+#    in the same interpreter process.
+#    You will notice that we call this function that we created in a top-level
+#    `before` block a little bit later in this file, and _also_ that we mention
+#    above that the Registry is interpeter-global state. Because our
+#    test-parallelizer uses subprocesses to distribute its work, and also does
+#    not interleave tests from one file with tests from another, the registry
+#    recreation that we're doing here will be restricted to the tests in _this_
+#    file and will not leak out into other files.
+# 3) So, if you see weird or unreliable behavior out of the Registry when running
+#    specs (this would probably be stuff like "metric values are sometimes not what
+#    they should be"), check to see if our test parallelizer is still using
+#    subprocesses, or if it has switched to threads and runs everything in one
+#    interpreter.
+module Prometheus
+  module Client
+    def self.recreate_registry
+      @registry = Prometheus::Client::Registry.new
+    end
+  end
+end
+
+module VCAP::CloudController::Metrics
+  RSpec.describe PrometheusUpdater do
+    let(:updater) { PrometheusUpdater.new(prom_client) }
+    let(:prom_client) { Prometheus::Client.registry }
+
+    before do
+      Prometheus::Client.recreate_registry
+    end
+
+    describe 'Promethus creation guards work correctly' do
+      # This might look to be a duplicate of 'records the current number of deployments that are DEPLOYING'
+      # below, but it tests that at least one of the metric updating functions can be called multiple times
+      # without failures. Because we are re-creating the Prometheus Client Registry before every test, we
+      # need to have at least one test that ensures that operations on a used registry won't fail.
+      #
+      # Because the PrometheusUpdater is designed so that all of the functionality that calls out to Prometheus is
+      # contained within a few short functions that follow the same obvious "Create the metric if it doesn't exist
+      # and then update the metric" pattern, the expectation is that future folks that work on the code will
+      # be unlikely to unintentionally remove the "create the metric if it doesn't exist" guard, and will also
+      # be unlikely to leave the guard out if they need to create a function that works with another Prometheus
+      # metric datatype.
+      it 'does not explode when the update function is called more than once' do
+        expected_deploying_count = 7
+
+        updater.update_deploying_count(expected_deploying_count)
+        metric = prom_client.metrics.find { |m| m.name == :cc_deployments_deploying }
+        expect(metric).to be_present
+        expect(metric.get).to eq 7
+
+        updater.update_deploying_count(expected_deploying_count)
+        metric = prom_client.metrics.find { |m| m.name == :cc_deployments_deploying }
+        expect(metric).to be_present
+        expect(metric.get).to eq 7
+      end
+    end
+
+    describe '#update_deploying_count' do
+      it 'records the current number of deployments that are DEPLOYING' do
+        expected_deploying_count = 7
+
+        updater.update_deploying_count(expected_deploying_count)
+        metric = prom_client.metrics.find { |m| m.name == :cc_deployments_deploying }
+        expect(metric).to be_present
+        expect(metric.get).to eq 7
+      end
+    end
+
+    describe '#update_user_count' do
+      it 'records number of users' do
+        expected_user_count = 5
+
+        updater.update_user_count(expected_user_count)
+
+        metric = prom_client.metrics.find { |m| m.name == :cc_total_users }
+        expect(metric).to be_present
+        expect(metric.get).to eq 5
+      end
+    end
+
+    describe '#update_job_queue_length' do
+      it 'records the length of the delayed job queues and total' do
+        expected_local_length   = 5
+        expected_generic_length = 6
+        total                   = expected_local_length + expected_generic_length
+
+        pending_job_count_by_queue = {
+          cc_local:   expected_local_length,
+          cc_generic: expected_generic_length
+        }
+
+        updater.update_job_queue_length(pending_job_count_by_queue, total)
+
+        metric = prom_client.metrics.find { |m| m.name == :cc_job_queue_length_cc_local }
+        expect(metric.get).to eq 5
+
+        metric = prom_client.metrics.find { |m| m.name == :cc_job_queue_length_cc_generic }
+        expect(metric.get).to eq 6
+
+        metric = prom_client.metrics.find { |m| m.name == :cc_job_queue_length_total }
+        expect(metric.get).to eq 11
+      end
+    end
+
+    describe '#update_failed_job_count' do
+      it 'records the number of failed jobs in the delayed job queue and the total to statsd' do
+        expected_local_length   = 5
+        expected_generic_length = 6
+        total                   = expected_local_length + expected_generic_length
+
+        failed_jobs_by_queue = {
+          cc_local:   expected_local_length,
+          cc_generic: expected_generic_length
+        }
+
+        updater.update_failed_job_count(failed_jobs_by_queue, total)
+
+        metric = prom_client.metrics.find { |m| m.name == :cc_failed_job_count_cc_local }
+        expect(metric.get).to eq 5
+
+        metric = prom_client.metrics.find { |m| m.name == :cc_failed_job_count_cc_generic }
+        expect(metric.get).to eq 6
+
+        metric = prom_client.metrics.find { |m| m.name == :cc_failed_job_count_total }
+        expect(metric.get).to eq 11
+      end
+    end
+
+    describe '#update_thread_info' do
+      it 'should contain EventMachine data' do
+        thread_info = {
+          thread_count:  5,
+          event_machine: {
+            connection_count: 10,
+            threadqueue:      {
+              size:        19,
+              num_waiting: 2,
+            },
+            resultqueue: {
+              size:        8,
+              num_waiting: 1,
+            },
+          },
+        }
+
+        updater.update_thread_info(thread_info)
+
+        metric = prom_client.metrics.find { |m| m.name == :cc_thread_info_thread_count }
+        expect(metric.get).to eq 5
+
+        metric = prom_client.metrics.find { |m| m.name == :cc_thread_info_event_machine_connection_count }
+        expect(metric.get).to eq 10
+
+        metric = prom_client.metrics.find { |m| m.name == :cc_thread_info_event_machine_threadqueue_size }
+        expect(metric.get).to eq 19
+
+        metric = prom_client.metrics.find { |m| m.name == :cc_thread_info_event_machine_threadqueue_num_waiting }
+        expect(metric.get).to eq 2
+
+        metric = prom_client.metrics.find { |m| m.name == :cc_thread_info_event_machine_resultqueue_size }
+        expect(metric.get).to eq 8
+
+        metric = prom_client.metrics.find { |m| m.name == :cc_thread_info_event_machine_resultqueue_num_waiting }
+        expect(metric.get).to eq 1
+      end
+    end
+
+    describe '#update_vitals' do
+      it 'updates vitals' do
+        vitals = {
+          uptime:         33,
+          cpu_load_avg:   0.5,
+          mem_used_bytes: 542,
+          mem_free_bytes: 927,
+          mem_bytes:      1,
+          cpu:            2.0,
+          num_cores:      4,
+        }
+
+        updater.update_vitals(vitals)
+
+        metric = prom_client.metrics.find { |m| m.name == :cc_vitals_uptime }
+        expect(metric.get).to eq 33
+
+        metric = prom_client.metrics.find { |m| m.name == :cc_vitals_cpu_load_avg }
+        expect(metric.get).to eq 0.5
+
+        metric = prom_client.metrics.find { |m| m.name == :cc_vitals_mem_used_bytes }
+        expect(metric.get).to eq 542
+
+        metric = prom_client.metrics.find { |m| m.name == :cc_vitals_mem_free_bytes }
+        expect(metric.get).to eq 927
+
+        metric = prom_client.metrics.find { |m| m.name == :cc_vitals_mem_bytes }
+        expect(metric.get).to eq 1
+
+        metric = prom_client.metrics.find { |m| m.name == :cc_vitals_cpu }
+        expect(metric.get).to eq 2.0
+
+        metric = prom_client.metrics.find { |m| m.name == :cc_vitals_num_cores }
+        expect(metric.get).to eq 4
+      end
+    end
+
+    describe '#update_log_counts' do
+      it 'updates log counts' do
+        counts = {
+          off:    1,
+          fatal:  2,
+          error:  3,
+          warn:   4,
+          info:   5,
+          debug:  6,
+          debug1: 7,
+          debug2: 8,
+          all:    9
+        }
+
+        updater.update_log_counts(counts)
+
+        metric = prom_client.metrics.find { |m| m.name == :cc_log_count_off }
+        expect(metric.get).to eq 1
+
+        metric = prom_client.metrics.find { |m| m.name == :cc_log_count_fatal }
+        expect(metric.get).to eq 2
+
+        metric = prom_client.metrics.find { |m| m.name == :cc_log_count_error }
+        expect(metric.get).to eq 3
+
+        metric = prom_client.metrics.find { |m| m.name == :cc_log_count_warn }
+        expect(metric.get).to eq 4
+
+        metric = prom_client.metrics.find { |m| m.name == :cc_log_count_info }
+        expect(metric.get).to eq 5
+
+        metric = prom_client.metrics.find { |m| m.name == :cc_log_count_debug }
+        expect(metric.get).to eq 6
+
+        metric = prom_client.metrics.find { |m| m.name == :cc_log_count_debug1 }
+        expect(metric.get).to eq 7
+
+        metric = prom_client.metrics.find { |m| m.name == :cc_log_count_debug2 }
+        expect(metric.get).to eq 8
+
+        metric = prom_client.metrics.find { |m| m.name == :cc_log_count_all }
+        expect(metric.get).to eq 9
+      end
+    end
+
+    describe '#update_task_stats' do
+      it 'records the number of running tasks and task memory' do
+        updater.update_task_stats(5, 512)
+
+        metric = prom_client.metrics.find { |m| m.name == :cc_tasks_running_count }
+        expect(metric.get).to eq 5
+
+        metric = prom_client.metrics.find { |m| m.name == :cc_tasks_running_memory_in_mb }
+        expect(metric.get).to eq 512
+      end
+    end
+
+    describe '#update_synced_invalid_lrps' do
+      it 'records number of running tasks and task memory to statsd' do
+        updater.update_synced_invalid_lrps(5)
+        metric = prom_client.metrics.find { |m| m.name == :cc_diego_sync_invalid_desired_lrps }
+        expect(metric.get).to eq 5
+      end
+    end
+
+    describe '#start_staging_request_received' do
+      it 'increments "cc_staging_requested"' do
+        updater.start_staging_request_received
+
+        metric = prom_client.metrics.find { |m| m.name == :cc_staging_requested }
+        expect(metric.get).to eq 1
+
+        updater.start_staging_request_received
+
+        metric = prom_client.metrics.find { |m| m.name == :cc_staging_requested }
+        expect(metric.get).to eq 2
+      end
+    end
+
+    describe '#report_staging_success_metrics' do
+      it 'records staging success metrics' do
+        duration_ns = 20 * 1e9
+
+        updater.report_staging_success_metrics(duration_ns)
+        metric = prom_client.metrics.find { |m| m.name == :cc_staging_succeeded }
+        expect(metric.get).to eq 1
+
+        metric = prom_client.metrics.find { |m| m.name == :cc_staging_succeeded_duration }
+        # expected buckets for duration, in millis : 10000, 15000, 20000, 25000, 30000
+        expect(metric.get).to eq({ '10000.0' => 0, '15000.0' => 0, '20000.0' => 1, '25000.0' => 1, '30000.0' => 1, 'sum' => 20000, '+Inf' => 1 })
+      end
+    end
+
+    describe '#report_staging_failure_metrics' do
+      it 'emits staging failure metrics' do
+        duration_ns = 20 * 1e9
+
+        updater.report_staging_failure_metrics(duration_ns)
+        metric = prom_client.metrics.find { |m| m.name == :cc_staging_failed }
+        expect(metric.get).to eq 1
+
+        metric = prom_client.metrics.find { |m| m.name == :cc_staging_failed_duration }
+        # expected buckets for duration, in millis : 10000, 15000, 20000, 25000, 30000
+        expect(metric.get).to eq({ '10000.0' => 0, '15000.0' => 0, '20000.0' => 1, '25000.0' => 1, '30000.0' => 1, 'sum' => 20000, '+Inf' => 1 })
+      end
+    end
+
+    describe '#report_diego_cell_sync_duration' do
+      it 'reports diego cell sync duration' do
+        duration_ns = 20 * 1e9
+
+        updater.report_diego_cell_sync_duration(duration_ns)
+        metric = prom_client.metrics.find { |m| m.name == :cc_diego_sync_duration }
+        expect(metric.get).to eq({ 'count' => 1.0, 'sum' => 20000000000.0 })
+
+        metric = prom_client.metrics.find { |m| m.name == :cc_diego_sync_duration_gauge }
+        expect(metric.get).to eq duration_ns
+      end
+    end
+
+    describe '#report_deployment_duration' do
+      it 'reports deployments update duration' do
+        duration_ns = 20 * 1e9
+
+        updater.report_deployment_duration(duration_ns)
+        metric = prom_client.metrics.find { |m| m.name == :cc_deployments_update_duration }
+        expect(metric.get).to eq({ 'count' => 1.0, 'sum' => 20000000000.0 })
+
+        metric = prom_client.metrics.find { |m| m.name == :cc_deployments_update_duration_gauge }
+        expect(metric.get).to eq duration_ns
+      end
+    end
+  end
+end
diff --git a/spec/unit/lib/cloud_controller/metrics/request_metrics_spec.rb b/spec/unit/lib/cloud_controller/metrics/request_metrics_spec.rb
index efdf1ec9e8a..b3850633b03 100644
--- a/spec/unit/lib/cloud_controller/metrics/request_metrics_spec.rb
+++ b/spec/unit/lib/cloud_controller/metrics/request_metrics_spec.rb
@@ -4,7 +4,14 @@
 module VCAP::CloudController::Metrics
   RSpec.describe RequestMetrics do
     let(:statsd_client) { double(:statsd_client) }
-    let!(:request_metrics) { RequestMetrics.new(statsd_client) } # TODO: probably doesn't need to be a let!, just a let
+    let(:prometheus_client) { double(:prometheus_client) }
+    let!(:request_metrics) { RequestMetrics.new(statsd_client, prometheus_client) } # TODO: probably doesn't need to be a let!, just a let
+
+    before do
+      allow(prometheus_client).to receive(:update_gauge_metric)
+      allow(prometheus_client).to receive(:decrement_gauge_metric)
+      allow(prometheus_client).to receive(:increment_gauge_metric)
+    end
 
     describe '#start_request' do
       before do
@@ -17,6 +24,8 @@ module VCAP::CloudController::Metrics
 
         expect(statsd_client).to have_received(:gauge).with('cc.requests.outstanding.gauge', 1)
         expect(statsd_client).to have_received(:increment).with('cc.requests.outstanding')
+        expect(prometheus_client).to have_received(:update_gauge_metric).with(:cc_requests_outstanding_gauge, 1, kind_of(String))
+        expect(prometheus_client).to have_received(:increment_gauge_metric).with(:cc_requests_outstanding, kind_of(String))
       end
     end
 
@@ -38,17 +47,25 @@ module VCAP::CloudController::Metrics
         expect(batch).to have_received(:decrement).with('cc.requests.outstanding')
         expect(batch).to have_received(:increment).with('cc.requests.completed')
         expect(batch).to have_received(:increment).with('cc.http_status.2XX')
+
+        expect(prometheus_client).to have_received(:update_gauge_metric).with(:cc_requests_outstanding_gauge, -1, kind_of(String))
+        expect(prometheus_client).to have_received(:decrement_gauge_metric).with(:cc_requests_outstanding, kind_of(String))
+        expect(prometheus_client).to have_received(:increment_gauge_metric).with(:cc_requests_completed, kind_of(String))
+        expect(prometheus_client).to have_received(:increment_gauge_metric).with(:cc_http_status_2XX, kind_of(String))
       end
 
       it 'normalizes http status codes in statsd' do
         request_metrics.complete_request(200)
         expect(batch).to have_received(:increment).with('cc.http_status.2XX')
+        expect(prometheus_client).to have_received(:increment_gauge_metric).with(:cc_http_status_2XX, kind_of(String))
 
         request_metrics.complete_request(300)
         expect(batch).to have_received(:increment).with('cc.http_status.3XX')
+        expect(prometheus_client).to have_received(:increment_gauge_metric).with(:cc_http_status_3XX, kind_of(String))
 
         request_metrics.complete_request(400)
         expect(batch).to have_received(:increment).with('cc.http_status.4XX')
+        expect(prometheus_client).to have_received(:increment_gauge_metric).with(:cc_http_status_4XX, kind_of(String))
       end
     end
   end
diff --git a/spec/unit/lib/cloud_controller/metrics/statsd_updater_spec.rb b/spec/unit/lib/cloud_controller/metrics/statsd_updater_spec.rb
index 4bf89be77eb..da35b23b575 100644
--- a/spec/unit/lib/cloud_controller/metrics/statsd_updater_spec.rb
+++ b/spec/unit/lib/cloud_controller/metrics/statsd_updater_spec.rb
@@ -20,7 +20,7 @@ module VCAP::CloudController::Metrics
       end
     end
 
-    describe '#record_user_count' do
+    describe '#update_user_count' do
       before do
         allow(statsd_client).to receive(:gauge)
       end
@@ -28,7 +28,7 @@ module VCAP::CloudController::Metrics
       it 'emits number of users to statsd' do
         expected_user_count = 5
 
-        updater.record_user_count(expected_user_count)
+        updater.update_user_count(expected_user_count)
 
         expect(statsd_client).to have_received(:gauge).with('cc.total_users', expected_user_count)
       end