diff --git a/onnxruntime/contrib_ops/cuda/collective/distributed_expand.cc b/onnxruntime/contrib_ops/cuda/collective/distributed_expand.cc
index ec1826d1eabd2..3cfa3ab959343 100644
--- a/onnxruntime/contrib_ops/cuda/collective/distributed_expand.cc
+++ b/onnxruntime/contrib_ops/cuda/collective/distributed_expand.cc
@@ -51,10 +51,11 @@ Status DistributedExpand<T>::ComputeInternal(OpKernelContext* context) const {
   TensorShapeVector original_output_dims{p_shape, p_shape + shape_tensor->Shape().Size()};
   TensorShape original_output_shape(original_output_dims);
   ORT_ENFORCE(
-    onnxruntime::cuda::ComputeOutputShape(
-      Node().Name(),
-      original_input_shape,
-      original_output_dims, original_output_shape).IsOK());
+      onnxruntime::cuda::ComputeOutputShape(
+          Node().Name(),
+          original_input_shape,
+          original_output_dims, original_output_shape)
+          .IsOK());
 
   // Compute local output shape.
   const auto local_output_shape = ComputeShardShape(original_output_shape, output_sharding_spec);
@@ -62,11 +63,11 @@ Status DistributedExpand<T>::ComputeInternal(OpKernelContext* context) const {
   auto output_tensor = context->Output(0, local_output_shape);
 
   return FuncExpand(
-    this,
-    context,
-    input_tensor,
-    shape_tensor,
-    output_tensor);
+      this,
+      context,
+      input_tensor,
+      shape_tensor,
+      output_tensor);
 }
 
 ONNX_OPERATOR_TYPED_KERNEL_EX(
diff --git a/onnxruntime/core/providers/cuda/tensor/expand.cc b/onnxruntime/core/providers/cuda/tensor/expand.cc
index 368c167f58641..806ecfa1aab17 100644
--- a/onnxruntime/core/providers/cuda/tensor/expand.cc
+++ b/onnxruntime/core/providers/cuda/tensor/expand.cc
@@ -148,7 +148,6 @@ Status FuncExpand(
     const Tensor* input_data_tensor,
     const Tensor* /*input_shape_tensor*/,
     Tensor* output_tensor) {
-
   TensorShape output_shape = output_tensor->Shape();
 
 #ifdef ENABLE_STRIDED_TENSORS
@@ -203,10 +202,11 @@ std::unique_ptr<Tensor> FuncExpand(
   TensorShape output_shape(output_dims);
 
   ORT_ENFORCE(
-    ComputeOutputShape(
-      cuda_kernel->Node().Name(),
-      input_data_tensor->Shape(),
-      output_dims, output_shape).IsOK());
+      ComputeOutputShape(
+          cuda_kernel->Node().Name(),
+          input_data_tensor->Shape(),
+          output_dims, output_shape)
+          .IsOK());
 
   // Pre-allocate output.
   AllocatorPtr alloc;