diff --git a/examples/serve/min_replicas_zero.yaml b/examples/serve/min_replicas_zero.yaml
new file mode 100644
index 00000000000..e6e54cd41f2
--- /dev/null
+++ b/examples/serve/min_replicas_zero.yaml
@@ -0,0 +1,27 @@
+# SkyServe YAML to test min_replicas=0 with a simple http server.
+# The service will be initialized with no replica (min_replicas = 0).
+# Any traffic to the service will trigger an immediate scale-up.
+# The service will be scaled down to 0 replica when there is no traffic
+# for a long time. 
+# 
+# Usage:
+#   sky serve up -n min_replicas examples/serve/min_replicas_zero.yaml
+# The endpoint will be printed in the console.
+# Querying the endpoint will trigger a scale up.
+
+service:
+  readiness_probe:
+    path: /health
+    initial_delay_seconds: 20
+  replica_policy:
+    min_replicas: 0
+    max_replicas: 2
+    target_qps_per_replica: 1
+
+resources:
+  ports: 8081
+  cpus: 2+
+
+workdir: examples/serve/http_server
+
+run: python3 server.py
diff --git a/sky/cli.py b/sky/cli.py
index e457269e0b4..a66dffbee64 100644
--- a/sky/cli.py
+++ b/sky/cli.py
@@ -4386,6 +4386,9 @@ def serve_status(all: bool, endpoint: bool, service_names: List[str]):
       down. This usually indicates resource leakages. If you see such status,
       please login to the cloud console and double-check
 
+    - ``NO_REPLICAS``: The service has no replicas. This usually happens when
+        min_replicas is set to 0 and there is no traffic to the system.
+
     Each replica can have one of the following statuses:
 
     - ``PENDING``: The maximum number of simultaneous launches has been reached
diff --git a/sky/serve/autoscalers.py b/sky/serve/autoscalers.py
index 29f58a21370..fd5dc4b6148 100644
--- a/sky/serve/autoscalers.py
+++ b/sky/serve/autoscalers.py
@@ -154,7 +154,7 @@ def _get_desired_num_replicas(self) -> int:
         logger.info(f'Requests per second: {num_requests_per_second}, '
                     f'Current target number of replicas: {target_num_replicas}')
 
-        if not self.bootstrap_done:
+        if not self.bootstrap_done or self.target_num_replicas == 0:
             self.bootstrap_done = True
             return target_num_replicas
         elif target_num_replicas > self.target_num_replicas:
@@ -173,6 +173,14 @@ def _get_desired_num_replicas(self) -> int:
             self.upscale_counter = self.downscale_counter = 0
         return self.target_num_replicas
 
+    def get_decision_interval(self) -> int:
+        # Reduce autoscaler interval when target_num_replicas = 0.
+        # This will happen when min_replicas = 0 and no traffic.
+        if self.target_num_replicas == 0:
+            return constants.AUTOSCALER_NO_REPLICA_DECISION_INTERVAL_SECONDS
+        else:
+            return constants.AUTOSCALER_DEFAULT_DECISION_INTERVAL_SECONDS
+
     def evaluate_scaling(
         self,
         replica_infos: List['replica_managers.ReplicaInfo'],
diff --git a/sky/serve/constants.py b/sky/serve/constants.py
index 73e2e21c2ca..c292a59e88b 100644
--- a/sky/serve/constants.py
+++ b/sky/serve/constants.py
@@ -36,6 +36,8 @@
 # Autoscaler scale decision interval in seconds.
 # We will try to scale up/down every `decision_interval`.
 AUTOSCALER_DEFAULT_DECISION_INTERVAL_SECONDS = 20
+# Autoscaler no replica decision interval in seconds.
+AUTOSCALER_NO_REPLICA_DECISION_INTERVAL_SECONDS = 5
 # Autoscaler default upscale delays in seconds.
 # We will upscale only if the target number of instances
 # is larger than the current launched instances for delay amount of time.
diff --git a/sky/serve/controller.py b/sky/serve/controller.py
index 8497da07afc..36e5e97c74a 100644
--- a/sky/serve/controller.py
+++ b/sky/serve/controller.py
@@ -88,7 +88,7 @@ def _run_autoscaler(self):
                              f'{common_utils.format_exception(e)}')
                 with ux_utils.enable_traceback():
                     logger.error(f'  Traceback: {traceback.format_exc()}')
-            time.sleep(constants.AUTOSCALER_DEFAULT_DECISION_INTERVAL_SECONDS)
+            time.sleep(self._autoscaler.get_decision_interval())
 
     def run(self) -> None:
 
diff --git a/sky/serve/serve_state.py b/sky/serve/serve_state.py
index b09dd063c90..10c6a505267 100644
--- a/sky/serve/serve_state.py
+++ b/sky/serve/serve_state.py
@@ -153,6 +153,9 @@ class ServiceStatus(enum.Enum):
     # Clean up failed
     FAILED_CLEANUP = 'FAILED_CLEANUP'
 
+    # No replica
+    NO_REPLICA = 'NO_REPLICA'
+
     @classmethod
     def failed_statuses(cls) -> List['ServiceStatus']:
         return [cls.CONTROLLER_FAILED, cls.FAILED_CLEANUP]
@@ -175,6 +178,9 @@ def from_replica_statuses(
         if sum(status2num[status]
                for status in ReplicaStatus.failed_statuses()) > 0:
             return cls.FAILED
+        # When min_replicas = 0, there is no (provisioning) replica.
+        if len(replica_statuses) == 0:
+            return cls.NO_REPLICA
         return cls.REPLICA_INIT
 
 
@@ -186,6 +192,7 @@ def from_replica_statuses(
     ServiceStatus.SHUTTING_DOWN: colorama.Fore.YELLOW,
     ServiceStatus.FAILED: colorama.Fore.RED,
     ServiceStatus.FAILED_CLEANUP: colorama.Fore.RED,
+    ServiceStatus.NO_REPLICA: colorama.Fore.MAGENTA,
 }
 
 
diff --git a/sky/serve/service_spec.py b/sky/serve/service_spec.py
index d73477c4b95..06a7fe8d540 100644
--- a/sky/serve/service_spec.py
+++ b/sky/serve/service_spec.py
@@ -33,9 +33,9 @@ def __init__(
         qps_upper_threshold: Optional[float] = None,
         qps_lower_threshold: Optional[float] = None,
     ) -> None:
-        if min_replicas <= 0:
+        if min_replicas < 0:
             with ux_utils.print_exception_no_traceback():
-                raise ValueError('min_replicas must be greater than 0')
+                raise ValueError('min_replicas must be greater or equal to 0')
         if max_replicas is not None and max_replicas < min_replicas:
             with ux_utils.print_exception_no_traceback():
                 raise ValueError(