stripe · brandur-stripe · Jan 17, 2019 · Jan 11, 2019 · Jan 11, 2019 · Jan 11, 2019
diff --git a/stripe.go b/stripe.go
@@ -67,6 +67,15 @@ var LogLevel = 2
 // be overridden if a backend is created with GetBackendWithConfig.
 var Logger Printfer
 
+// EnableTelemetry is a global override for enabling client telemetry, which
+// sends request performance metrics to Stripe via the `X-Stripe-Client-Telemetry`
+// header. If set to true, all clients will send telemetry metrics. Defaults to
+// false.
+//
+// Telemetry can also be enabled on a per-client basis by instead creating a
+// `BackendConfig` with `EnableTelemetry: true`.
+var EnableTelemetry = false
+
 //
 // Public types
 //
@@ -138,6 +147,12 @@ type BackendConfig struct {
 	//
 	// If left empty, it'll be set to the default for the SupportedBackend.
 	URL string
+
+	// EnableTelemetry allows request metrics (request id and duration) to be sent
+	// to Stripe in subsequent requests via the `X-Stripe-Client-Telemetry` header.
+	//
+	// Defaults to false.
+	EnableTelemetry bool
 }
 
 // BackendImplementation is the internal implementation for making HTTP calls
@@ -158,6 +173,10 @@ type BackendImplementation struct {
 	//
 	// See also SetNetworkRetriesSleep.
 	networkRetriesSleep bool
+
+	enableTelemetry      bool
+	requestMetricsBuffer chan requestMetrics
+	requestMetricsMutex  *sync.Mutex
 }
 
 // Call is the Backend.Call implementation for invoking Stripe APIs.
@@ -292,8 +311,23 @@ func (s *BackendImplementation) Do(req *http.Request, body *bytes.Buffer, v inte
 		s.Logger.Printf("Requesting %v %v%v\n", req.Method, req.URL.Host, req.URL.Path)
 	}
 
+	if s.enableTelemetry {
+		select {
+		case metrics := <-s.requestMetricsBuffer:
+			metricsJSON, err := json.Marshal(&requestTelemetry{LastRequestMetrics: metrics})
+			if err == nil {
+				req.Header.Set("X-Stripe-Client-Telemetry", string(metricsJSON))
+			} else if s.LogLevel > 2 {
+				s.Logger.Printf("Unable to encode client telemetry: %s", err)
+			}
+		default:
+			// no metrics available, ignore.
+		}
+	}
+
 	var res *http.Response
 	var err error
+	var requestDurationMS int
 	for retry := 0; ; {
 		start := time.Now()
 
@@ -338,8 +372,14 @@ func (s *BackendImplementation) Do(req *http.Request, body *bytes.Buffer, v inte
 			}
 		}
 
+		// `requestStart` is used solely for client telemetry and, unlike `start`,
+		// does not account for the time spent building the request body.
+		requestStart := time.Now()
+
 		res, err = s.HTTPClient.Do(req)
 
+		requestDurationMS = int(time.Since(requestStart) / time.Millisecond)
+
 		if s.LogLevel > 2 {
 			s.Logger.Printf("Request completed in %v (retry: %v)\n",
 				time.Since(start), retry)
@@ -387,6 +427,18 @@ func (s *BackendImplementation) Do(req *http.Request, body *bytes.Buffer, v inte
 		return err
 	}
 
+	if s.enableTelemetry {
+		reqID := res.Header.Get("Request-Id")
+		if len(reqID) > 0 {
+			metrics := requestMetrics{
+				RequestID:         reqID,
+				RequestDurationMS: requestDurationMS,
+			}
+
+			s.recordRequestMetrics(metrics)
+		}
+	}
+
 	defer res.Body.Close()
 
 	resBody, err := ioutil.ReadAll(res.Body)
@@ -779,6 +831,9 @@ const minNetworkRetriesDelay = 500 * time.Millisecond
 
 const uploadsURL = "https://uploads.stripe.com"
 
+// The number of requestMetric objects to buffer for client telemetry.
+const telemetryBufferSize = 16
+
 //
 // Private types
 //
@@ -805,6 +860,18 @@ type stripeClientUserAgent struct {
 	Uname           string   `json:"uname"`
 }
 
+// requestMetrics contains the id and duration of the last request sent
+type requestMetrics struct {
+	RequestID         string `json:"request_id"`
+	RequestDurationMS int    `json:"request_duration_ms"`
+}
+
+// requestTelemetry contains the payload sent in the
+// `X-Stripe-Client-Telemetry` header when BackendConfig#EnableTelemetry = true.
+type requestTelemetry struct {
+	LastRequestMetrics requestMetrics `json:"last_request_metrics"`
+}
+
 //
 // Private variables
 //
@@ -878,6 +945,14 @@ func isHTTPWriteMethod(method string) bool {
 // The vast majority of the time you should be calling GetBackendWithConfig
 // instead of this function.
 func newBackendImplementation(backendType SupportedBackend, config *BackendConfig) Backend {
+	var requestMetricsBuffer chan requestMetrics
+	enableTelemetry := config.EnableTelemetry || EnableTelemetry
+
+	// only allocate the requestMetrics buffer if client telemetry is enabled.
+	if enableTelemetry {
+		requestMetricsBuffer = make(chan requestMetrics, telemetryBufferSize)
+	}
+
 	return &BackendImplementation{
 		HTTPClient:          config.HTTPClient,
 		LogLevel:            config.LogLevel,
@@ -886,6 +961,27 @@ func newBackendImplementation(backendType SupportedBackend, config *BackendConfi
 		Type:                backendType,
 		URL:                 config.URL,
 		networkRetriesSleep: true,
+		enableTelemetry:     enableTelemetry,
+
+		// requestMetricsBuffer is a circular buffer of unsent metrics from previous
+		// requests. You should not write to requestMetricsBuffer without holding the
+		// requestMetricsMutex lock.
+		requestMetricsBuffer: requestMetricsBuffer,
+		requestMetricsMutex:  &sync.Mutex{},
+	}
+}
+
+func (s *BackendImplementation) recordRequestMetrics(r requestMetrics) {
+	s.requestMetricsMutex.Lock()
+	defer s.requestMetricsMutex.Unlock()
+
+	// treat requestMetricsBuffer as a circular buffer: if it is full, pop off the
+	// oldest requestMetrics struct and insert r.
+	select {
+	case s.requestMetricsBuffer <- r:
+	default:
+		<-s.requestMetricsBuffer
+		s.requestMetricsBuffer <- r
 	}
 }
 

diff --git a/stripe_test.go b/stripe_test.go
@@ -4,6 +4,7 @@ import (
 	"bytes"
 	"context"
 	"encoding/json"
+	"fmt"
 	"net/http"
 	"net/http/httptest"
 	"regexp"
@@ -165,6 +166,207 @@ func TestDo_RetryOnTimeout(t *testing.T) {
 	assert.Equal(t, uint32(2), atomic.LoadUint32(&counter))
 }
 
+// Test that telemetry metrics are not sent by default
+func TestDo_TelemetryDisabled(t *testing.T) {
+	type testServerResponse struct {
+		Message string `json:"message"`
+	}
+
+	message := "Hello, client."
+	requestNum := 0
+
+	testServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		// none of the requests should include telemetry metrics
+		assert.Equal(t, r.Header.Get("X-Stripe-Client-Telemetry"), "")
+
+		response := testServerResponse{Message: message}
+
+		data, err := json.Marshal(response)
+		assert.NoError(t, err)
+
+		_, err = w.Write(data)
+		assert.NoError(t, err)
+
+		requestNum++
+	}))
+	defer testServer.Close()
+
+	backend := stripe.GetBackendWithConfig(
+		stripe.APIBackend,
+		&stripe.BackendConfig{
+			LogLevel:          3,
+			MaxNetworkRetries: 0,
+			URL:               testServer.URL,
+		},
+	).(*stripe.BackendImplementation)
+
+	for i := 0; i < 2; i++ {
+		request, err := backend.NewRequest(
+			http.MethodGet,
+			"/hello",
+			"sk_test_123",
+			"application/x-www-form-urlencoded",
+			nil,
+		)
+		assert.NoError(t, err)
+
+		var response testServerResponse
+		err = backend.Do(request, nil, &response)
+
+		assert.NoError(t, err)
+		assert.Equal(t, message, response.Message)
+	}
+
+	// We should have seen exactly two requests.
+	assert.Equal(t, 2, requestNum)
+}
+
+// Test that telemetry metrics are sent on subsequent requests when
+// stripe.EnableTelemetry = true.
+func TestDo_TelemetryEnabled(t *testing.T) {
+	type testServerResponse struct {
+		Message string `json:"message"`
+	}
+
+	type requestMetrics struct {
+		RequestID         string `json:"request_id"`
+		RequestDurationMS int    `json:"request_duration_ms"`
+	}
+
+	type requestTelemetry struct {
+		LastRequestMetrics requestMetrics `json:"last_request_metrics"`
+	}
+
+	message := "Hello, client."
+	requestNum := 0
+
+	testServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		requestNum++
+
+		telemetryStr := r.Header.Get("X-Stripe-Client-Telemetry")
+		switch requestNum {
+		case 1:
+			// the first request should not receive any metrics
+			assert.Equal(t, telemetryStr, "")
+		case 2:
+			assert.True(t, len(telemetryStr) > 0, "telemetryStr should not be empty")
+
+			// the telemetry should properly unmarshal into stripe.RequestTelemetry
+			var telemetry requestTelemetry
+			err := json.Unmarshal([]byte(telemetryStr), &telemetry)
+			assert.NoError(t, err)
+
+			// the second request should include the metrics for the first request
+			assert.Equal(t, telemetry.LastRequestMetrics.RequestID, "req_1")
+		default:
+			assert.Fail(t, "Should not have reached request %v", requestNum)
+		}
+
+		w.Header().Set("Request-Id", fmt.Sprintf("req_%d", requestNum))
+		response := testServerResponse{Message: message}
+
+		data, err := json.Marshal(response)
+		assert.NoError(t, err)
+
+		_, err = w.Write(data)
+		assert.NoError(t, err)
+	}))
+	defer testServer.Close()
+
+	backend := stripe.GetBackendWithConfig(
+		stripe.APIBackend,
+		&stripe.BackendConfig{
+			LogLevel:          3,
+			MaxNetworkRetries: 0,
+			URL:               testServer.URL,
+			EnableTelemetry:   true,
+		},
+	).(*stripe.BackendImplementation)
+
+	for i := 0; i < 2; i++ {
+		request, err := backend.NewRequest(
+			http.MethodGet,
+			"/hello",
+			"sk_test_123",
+			"application/x-www-form-urlencoded",
+			nil,
+		)
+		assert.NoError(t, err)
+
+		var response testServerResponse
+		err = backend.Do(request, nil, &response)
+
+		assert.NoError(t, err)
+		assert.Equal(t, message, response.Message)
+	}
+
+	// We should have seen exactly two requests.
+	assert.Equal(t, 2, requestNum)
+}
+
+func TestDo_TelemetryEnabledNoDataRace(t *testing.T) {
+	type testServerResponse struct {
+		Message string `json:"message"`
+	}
+
+	message := "Hello, client."
+	var requestNum int32
+
+	testServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		reqID := atomic.AddInt32(&requestNum, 1)
+
+		w.Header().Set("Request-Id", fmt.Sprintf("req_%d", reqID))
+		response := testServerResponse{Message: message}
+
+		data, err := json.Marshal(response)
+		assert.NoError(t, err)
+
+		_, err = w.Write(data)
+		assert.NoError(t, err)
+	}))
+	defer testServer.Close()
+
+	backend := stripe.GetBackendWithConfig(
+		stripe.APIBackend,
+		&stripe.BackendConfig{
+			LogLevel:          3,
+			MaxNetworkRetries: 0,
+			URL:               testServer.URL,
+			EnableTelemetry:   true,
+		},
+	).(*stripe.BackendImplementation)
+
+	times := 20 // 20 > telemetryBufferSize, so some metrics could be discarded
+	done := make(chan struct{})
+
+	for i := 0; i < times; i++ {
+		go func() {
+			request, err := backend.NewRequest(
+				http.MethodGet,
+				"/hello",
+				"sk_test_123",
+				"application/x-www-form-urlencoded",
+				nil,
+			)
+			assert.NoError(t, err)
+
+			var response testServerResponse
+			err = backend.Do(request, nil, &response)
+
+			assert.NoError(t, err)
+			assert.Equal(t, message, response.Message)
+
+			done <- struct{}{}
+		}()
+	}
+
+	for i := 0; i < times; i++ {
+		<-done
+	}
+
+	assert.Equal(t, int32(times), requestNum)
+}
+
 func TestFormatURLPath(t *testing.T) {
 	assert.Equal(t, "/v1/resources/1/subresources/2",
 		stripe.FormatURLPath("/v1/resources/%s/subresources/%s", "1", "2"))