diff --git a/apis/go/mlops/agent/agent.pb.go b/apis/go/mlops/agent/agent.pb.go
index 10f30ef566..bf8df2fd1f 100644
--- a/apis/go/mlops/agent/agent.pb.go
+++ b/apis/go/mlops/agent/agent.pb.go
@@ -204,6 +204,7 @@ type ModelEventMessage struct {
 	Event                ModelEventMessage_Event `protobuf:"varint,5,opt,name=event,proto3,enum=seldon.mlops.agent.ModelEventMessage_Event" json:"event,omitempty"`
 	Message              string                  `protobuf:"bytes,6,opt,name=message,proto3" json:"message,omitempty"`
 	AvailableMemoryBytes uint64                  `protobuf:"varint,7,opt,name=availableMemoryBytes,proto3" json:"availableMemoryBytes,omitempty"`
+	RuntimeInfo          *ModelRuntimeInfo       `protobuf:"bytes,8,opt,name=runtimeInfo,proto3" json:"runtimeInfo,omitempty"`
 }
 
 func (x *ModelEventMessage) Reset() {
@@ -287,6 +288,13 @@ func (x *ModelEventMessage) GetAvailableMemoryBytes() uint64 {
 	return 0
 }
 
+func (x *ModelEventMessage) GetRuntimeInfo() *ModelRuntimeInfo {
+	if x != nil {
+		return x.RuntimeInfo
+	}
+	return nil
+}
+
 type ModelEventResponse struct {
 	state         protoimpl.MessageState
 	sizeCache     protoimpl.SizeCache
@@ -802,8 +810,9 @@ type ModelVersion struct {
 	sizeCache     protoimpl.SizeCache
 	unknownFields protoimpl.UnknownFields
 
-	Model   *scheduler.Model `protobuf:"bytes,1,opt,name=model,proto3" json:"model,omitempty"`
-	Version uint32           `protobuf:"varint,2,opt,name=version,proto3" json:"version,omitempty"`
+	Model       *scheduler.Model  `protobuf:"bytes,1,opt,name=model,proto3" json:"model,omitempty"`
+	Version     uint32            `protobuf:"varint,2,opt,name=version,proto3" json:"version,omitempty"`
+	RuntimeInfo *ModelRuntimeInfo `protobuf:"bytes,3,opt,name=runtimeInfo,proto3" json:"runtimeInfo,omitempty"`
 }
 
 func (x *ModelVersion) Reset() {
@@ -852,6 +861,235 @@ func (x *ModelVersion) GetVersion() uint32 {
 	return 0
 }
 
+func (x *ModelVersion) GetRuntimeInfo() *ModelRuntimeInfo {
+	if x != nil {
+		return x.RuntimeInfo
+	}
+	return nil
+}
+
+type ModelRuntimeInfo struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	// Types that are assignable to ModelRuntimeInfo:
+	//
+	//	*ModelRuntimeInfo_Mlserver
+	//	*ModelRuntimeInfo_Triton
+	ModelRuntimeInfo isModelRuntimeInfo_ModelRuntimeInfo `protobuf_oneof:"modelRuntimeInfo"`
+}
+
+func (x *ModelRuntimeInfo) Reset() {
+	*x = ModelRuntimeInfo{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_mlops_agent_agent_proto_msgTypes[10]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *ModelRuntimeInfo) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*ModelRuntimeInfo) ProtoMessage() {}
+
+func (x *ModelRuntimeInfo) ProtoReflect() protoreflect.Message {
+	mi := &file_mlops_agent_agent_proto_msgTypes[10]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use ModelRuntimeInfo.ProtoReflect.Descriptor instead.
+func (*ModelRuntimeInfo) Descriptor() ([]byte, []int) {
+	return file_mlops_agent_agent_proto_rawDescGZIP(), []int{10}
+}
+
+func (m *ModelRuntimeInfo) GetModelRuntimeInfo() isModelRuntimeInfo_ModelRuntimeInfo {
+	if m != nil {
+		return m.ModelRuntimeInfo
+	}
+	return nil
+}
+
+func (x *ModelRuntimeInfo) GetMlserver() *MLServerModelSettings {
+	if x, ok := x.GetModelRuntimeInfo().(*ModelRuntimeInfo_Mlserver); ok {
+		return x.Mlserver
+	}
+	return nil
+}
+
+func (x *ModelRuntimeInfo) GetTriton() *TritonModelConfig {
+	if x, ok := x.GetModelRuntimeInfo().(*ModelRuntimeInfo_Triton); ok {
+		return x.Triton
+	}
+	return nil
+}
+
+type isModelRuntimeInfo_ModelRuntimeInfo interface {
+	isModelRuntimeInfo_ModelRuntimeInfo()
+}
+
+type ModelRuntimeInfo_Mlserver struct {
+	Mlserver *MLServerModelSettings `protobuf:"bytes,1,opt,name=mlserver,proto3,oneof"`
+}
+
+type ModelRuntimeInfo_Triton struct {
+	Triton *TritonModelConfig `protobuf:"bytes,2,opt,name=triton,proto3,oneof"`
+}
+
+func (*ModelRuntimeInfo_Mlserver) isModelRuntimeInfo_ModelRuntimeInfo() {}
+
+func (*ModelRuntimeInfo_Triton) isModelRuntimeInfo_ModelRuntimeInfo() {}
+
+type MLServerModelSettings struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	ParallelWorkers uint32 `protobuf:"varint,1,opt,name=parallelWorkers,proto3" json:"parallelWorkers,omitempty"`
+}
+
+func (x *MLServerModelSettings) Reset() {
+	*x = MLServerModelSettings{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_mlops_agent_agent_proto_msgTypes[11]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *MLServerModelSettings) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*MLServerModelSettings) ProtoMessage() {}
+
+func (x *MLServerModelSettings) ProtoReflect() protoreflect.Message {
+	mi := &file_mlops_agent_agent_proto_msgTypes[11]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use MLServerModelSettings.ProtoReflect.Descriptor instead.
+func (*MLServerModelSettings) Descriptor() ([]byte, []int) {
+	return file_mlops_agent_agent_proto_rawDescGZIP(), []int{11}
+}
+
+func (x *MLServerModelSettings) GetParallelWorkers() uint32 {
+	if x != nil {
+		return x.ParallelWorkers
+	}
+	return 0
+}
+
+type TritonModelConfig struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	Cpu []*TritonCPU `protobuf:"bytes,1,rep,name=cpu,proto3" json:"cpu,omitempty"`
+}
+
+func (x *TritonModelConfig) Reset() {
+	*x = TritonModelConfig{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_mlops_agent_agent_proto_msgTypes[12]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *TritonModelConfig) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*TritonModelConfig) ProtoMessage() {}
+
+func (x *TritonModelConfig) ProtoReflect() protoreflect.Message {
+	mi := &file_mlops_agent_agent_proto_msgTypes[12]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use TritonModelConfig.ProtoReflect.Descriptor instead.
+func (*TritonModelConfig) Descriptor() ([]byte, []int) {
+	return file_mlops_agent_agent_proto_rawDescGZIP(), []int{12}
+}
+
+func (x *TritonModelConfig) GetCpu() []*TritonCPU {
+	if x != nil {
+		return x.Cpu
+	}
+	return nil
+}
+
+type TritonCPU struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	InstanceCount uint32 `protobuf:"varint,1,opt,name=instanceCount,proto3" json:"instanceCount,omitempty"`
+}
+
+func (x *TritonCPU) Reset() {
+	*x = TritonCPU{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_mlops_agent_agent_proto_msgTypes[13]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *TritonCPU) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*TritonCPU) ProtoMessage() {}
+
+func (x *TritonCPU) ProtoReflect() protoreflect.Message {
+	mi := &file_mlops_agent_agent_proto_msgTypes[13]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use TritonCPU.ProtoReflect.Descriptor instead.
+func (*TritonCPU) Descriptor() ([]byte, []int) {
+	return file_mlops_agent_agent_proto_rawDescGZIP(), []int{13}
+}
+
+func (x *TritonCPU) GetInstanceCount() uint32 {
+	if x != nil {
+		return x.InstanceCount
+	}
+	return 0
+}
+
 var File_mlops_agent_agent_proto protoreflect.FileDescriptor
 
 var file_mlops_agent_agent_proto_rawDesc = []byte{
@@ -859,8 +1097,8 @@ var file_mlops_agent_agent_proto_rawDesc = []byte{
 	0x65, 0x6e, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x12, 0x73, 0x65, 0x6c, 0x64, 0x6f,
 	0x6e, 0x2e, 0x6d, 0x6c, 0x6f, 0x70, 0x73, 0x2e, 0x61, 0x67, 0x65, 0x6e, 0x74, 0x1a, 0x1f, 0x6d,
 	0x6c, 0x6f, 0x70, 0x73, 0x2f, 0x73, 0x63, 0x68, 0x65, 0x64, 0x75, 0x6c, 0x65, 0x72, 0x2f, 0x73,
-	0x63, 0x68, 0x65, 0x64, 0x75, 0x6c, 0x65, 0x72, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, 0xc2,
-	0x03, 0x0a, 0x11, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x4d, 0x65, 0x73,
+	0x63, 0x68, 0x65, 0x64, 0x75, 0x6c, 0x65, 0x72, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, 0x8a,
+	0x04, 0x0a, 0x11, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x4d, 0x65, 0x73,
 	0x73, 0x61, 0x67, 0x65, 0x12, 0x1e, 0x0a, 0x0a, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x4e, 0x61,
 	0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72,
 	0x4e, 0x61, 0x6d, 0x65, 0x12, 0x1e, 0x0a, 0x0a, 0x72, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x49,
@@ -878,117 +1116,149 @@ var file_mlops_agent_agent_proto_rawDesc = []byte{
 	0x61, 0x67, 0x65, 0x12, 0x32, 0x0a, 0x14, 0x61, 0x76, 0x61, 0x69, 0x6c, 0x61, 0x62, 0x6c, 0x65,
 	0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x42, 0x79, 0x74, 0x65, 0x73, 0x18, 0x07, 0x20, 0x01, 0x28,
 	0x04, 0x52, 0x14, 0x61, 0x76, 0x61, 0x69, 0x6c, 0x61, 0x62, 0x6c, 0x65, 0x4d, 0x65, 0x6d, 0x6f,
-	0x72, 0x79, 0x42, 0x79, 0x74, 0x65, 0x73, 0x22, 0x99, 0x01, 0x0a, 0x05, 0x45, 0x76, 0x65, 0x6e,
-	0x74, 0x12, 0x11, 0x0a, 0x0d, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x5f, 0x45, 0x56, 0x45,
-	0x4e, 0x54, 0x10, 0x00, 0x12, 0x14, 0x0a, 0x10, 0x4c, 0x4f, 0x41, 0x44, 0x5f, 0x46, 0x41, 0x49,
-	0x4c, 0x5f, 0x4d, 0x45, 0x4d, 0x4f, 0x52, 0x59, 0x10, 0x01, 0x12, 0x0a, 0x0a, 0x06, 0x4c, 0x4f,
-	0x41, 0x44, 0x45, 0x44, 0x10, 0x02, 0x12, 0x0f, 0x0a, 0x0b, 0x4c, 0x4f, 0x41, 0x44, 0x5f, 0x46,
-	0x41, 0x49, 0x4c, 0x45, 0x44, 0x10, 0x03, 0x12, 0x0c, 0x0a, 0x08, 0x55, 0x4e, 0x4c, 0x4f, 0x41,
-	0x44, 0x45, 0x44, 0x10, 0x04, 0x12, 0x11, 0x0a, 0x0d, 0x55, 0x4e, 0x4c, 0x4f, 0x41, 0x44, 0x5f,
-	0x46, 0x41, 0x49, 0x4c, 0x45, 0x44, 0x10, 0x05, 0x12, 0x0b, 0x0a, 0x07, 0x52, 0x45, 0x4d, 0x4f,
-	0x56, 0x45, 0x44, 0x10, 0x06, 0x12, 0x11, 0x0a, 0x0d, 0x52, 0x45, 0x4d, 0x4f, 0x56, 0x45, 0x5f,
-	0x46, 0x41, 0x49, 0x4c, 0x45, 0x44, 0x10, 0x07, 0x12, 0x09, 0x0a, 0x05, 0x52, 0x53, 0x59, 0x4e,
-	0x43, 0x10, 0x09, 0x22, 0x14, 0x0a, 0x12, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x45, 0x76, 0x65, 0x6e,
-	0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0xc4, 0x03, 0x0a, 0x1a, 0x4d, 0x6f,
-	0x64, 0x65, 0x6c, 0x53, 0x63, 0x61, 0x6c, 0x69, 0x6e, 0x67, 0x54, 0x72, 0x69, 0x67, 0x67, 0x65,
-	0x72, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x1e, 0x0a, 0x0a, 0x73, 0x65, 0x72, 0x76,
-	0x65, 0x72, 0x4e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x73, 0x65,
-	0x72, 0x76, 0x65, 0x72, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x1e, 0x0a, 0x0a, 0x72, 0x65, 0x70, 0x6c,
-	0x69, 0x63, 0x61, 0x49, 0x64, 0x78, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0a, 0x72, 0x65,
-	0x70, 0x6c, 0x69, 0x63, 0x61, 0x49, 0x64, 0x78, 0x12, 0x1c, 0x0a, 0x09, 0x6d, 0x6f, 0x64, 0x65,
-	0x6c, 0x4e, 0x61, 0x6d, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x6d, 0x6f, 0x64,
-	0x65, 0x6c, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x22, 0x0a, 0x0c, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x56,
-	0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0c, 0x6d, 0x6f,
-	0x64, 0x65, 0x6c, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x50, 0x0a, 0x07, 0x74, 0x72,
-	0x69, 0x67, 0x67, 0x65, 0x72, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x36, 0x2e, 0x73, 0x65,
-	0x6c, 0x64, 0x6f, 0x6e, 0x2e, 0x6d, 0x6c, 0x6f, 0x70, 0x73, 0x2e, 0x61, 0x67, 0x65, 0x6e, 0x74,
-	0x2e, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x53, 0x63, 0x61, 0x6c, 0x69, 0x6e, 0x67, 0x54, 0x72, 0x69,
-	0x67, 0x67, 0x65, 0x72, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x2e, 0x54, 0x72, 0x69, 0x67,
-	0x67, 0x65, 0x72, 0x52, 0x07, 0x74, 0x72, 0x69, 0x67, 0x67, 0x65, 0x72, 0x12, 0x16, 0x0a, 0x06,
-	0x61, 0x6d, 0x6f, 0x75, 0x6e, 0x74, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x06, 0x61, 0x6d,
-	0x6f, 0x75, 0x6e, 0x74, 0x12, 0x55, 0x0a, 0x07, 0x6d, 0x65, 0x74, 0x72, 0x69, 0x63, 0x73, 0x18,
-	0x07, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x3b, 0x2e, 0x73, 0x65, 0x6c, 0x64, 0x6f, 0x6e, 0x2e, 0x6d,
-	0x6c, 0x6f, 0x70, 0x73, 0x2e, 0x61, 0x67, 0x65, 0x6e, 0x74, 0x2e, 0x4d, 0x6f, 0x64, 0x65, 0x6c,
-	0x53, 0x63, 0x61, 0x6c, 0x69, 0x6e, 0x67, 0x54, 0x72, 0x69, 0x67, 0x67, 0x65, 0x72, 0x4d, 0x65,
-	0x73, 0x73, 0x61, 0x67, 0x65, 0x2e, 0x4d, 0x65, 0x74, 0x72, 0x69, 0x63, 0x73, 0x45, 0x6e, 0x74,
-	0x72, 0x79, 0x52, 0x07, 0x6d, 0x65, 0x74, 0x72, 0x69, 0x63, 0x73, 0x1a, 0x3a, 0x0a, 0x0c, 0x4d,
-	0x65, 0x74, 0x72, 0x69, 0x63, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b,
-	0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a,
-	0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x05, 0x76, 0x61,
-	0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0x27, 0x0a, 0x07, 0x54, 0x72, 0x69, 0x67, 0x67,
-	0x65, 0x72, 0x12, 0x0c, 0x0a, 0x08, 0x53, 0x43, 0x41, 0x4c, 0x45, 0x5f, 0x55, 0x50, 0x10, 0x00,
-	0x12, 0x0e, 0x0a, 0x0a, 0x53, 0x43, 0x41, 0x4c, 0x45, 0x5f, 0x44, 0x4f, 0x57, 0x4e, 0x10, 0x01,
-	0x22, 0x1d, 0x0a, 0x1b, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x53, 0x63, 0x61, 0x6c, 0x69, 0x6e, 0x67,
-	0x54, 0x72, 0x69, 0x67, 0x67, 0x65, 0x72, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22,
-	0x53, 0x0a, 0x11, 0x41, 0x67, 0x65, 0x6e, 0x74, 0x44, 0x72, 0x61, 0x69, 0x6e, 0x52, 0x65, 0x71,
-	0x75, 0x65, 0x73, 0x74, 0x12, 0x1e, 0x0a, 0x0a, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x4e, 0x61,
-	0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72,
-	0x4e, 0x61, 0x6d, 0x65, 0x12, 0x1e, 0x0a, 0x0a, 0x72, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x49,
-	0x64, 0x78, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0a, 0x72, 0x65, 0x70, 0x6c, 0x69, 0x63,
-	0x61, 0x49, 0x64, 0x78, 0x22, 0x2e, 0x0a, 0x12, 0x41, 0x67, 0x65, 0x6e, 0x74, 0x44, 0x72, 0x61,
-	0x69, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x73, 0x75,
-	0x63, 0x63, 0x65, 0x73, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x73, 0x75, 0x63,
-	0x63, 0x65, 0x73, 0x73, 0x22, 0xb2, 0x02, 0x0a, 0x15, 0x41, 0x67, 0x65, 0x6e, 0x74, 0x53, 0x75,
-	0x62, 0x73, 0x63, 0x72, 0x69, 0x62, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x1e,
-	0x0a, 0x0a, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x4e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01,
-	0x28, 0x09, 0x52, 0x0a, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x16,
-	0x0a, 0x06, 0x73, 0x68, 0x61, 0x72, 0x65, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x06,
-	0x73, 0x68, 0x61, 0x72, 0x65, 0x64, 0x12, 0x1e, 0x0a, 0x0a, 0x72, 0x65, 0x70, 0x6c, 0x69, 0x63,
-	0x61, 0x49, 0x64, 0x78, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0a, 0x72, 0x65, 0x70, 0x6c,
-	0x69, 0x63, 0x61, 0x49, 0x64, 0x78, 0x12, 0x47, 0x0a, 0x0d, 0x72, 0x65, 0x70, 0x6c, 0x69, 0x63,
-	0x61, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x21, 0x2e,
+	0x72, 0x79, 0x42, 0x79, 0x74, 0x65, 0x73, 0x12, 0x46, 0x0a, 0x0b, 0x72, 0x75, 0x6e, 0x74, 0x69,
+	0x6d, 0x65, 0x49, 0x6e, 0x66, 0x6f, 0x18, 0x08, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x24, 0x2e, 0x73,
+	0x65, 0x6c, 0x64, 0x6f, 0x6e, 0x2e, 0x6d, 0x6c, 0x6f, 0x70, 0x73, 0x2e, 0x61, 0x67, 0x65, 0x6e,
+	0x74, 0x2e, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x52, 0x75, 0x6e, 0x74, 0x69, 0x6d, 0x65, 0x49, 0x6e,
+	0x66, 0x6f, 0x52, 0x0b, 0x72, 0x75, 0x6e, 0x74, 0x69, 0x6d, 0x65, 0x49, 0x6e, 0x66, 0x6f, 0x22,
+	0x99, 0x01, 0x0a, 0x05, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x12, 0x11, 0x0a, 0x0d, 0x55, 0x4e, 0x4b,
+	0x4e, 0x4f, 0x57, 0x4e, 0x5f, 0x45, 0x56, 0x45, 0x4e, 0x54, 0x10, 0x00, 0x12, 0x14, 0x0a, 0x10,
+	0x4c, 0x4f, 0x41, 0x44, 0x5f, 0x46, 0x41, 0x49, 0x4c, 0x5f, 0x4d, 0x45, 0x4d, 0x4f, 0x52, 0x59,
+	0x10, 0x01, 0x12, 0x0a, 0x0a, 0x06, 0x4c, 0x4f, 0x41, 0x44, 0x45, 0x44, 0x10, 0x02, 0x12, 0x0f,
+	0x0a, 0x0b, 0x4c, 0x4f, 0x41, 0x44, 0x5f, 0x46, 0x41, 0x49, 0x4c, 0x45, 0x44, 0x10, 0x03, 0x12,
+	0x0c, 0x0a, 0x08, 0x55, 0x4e, 0x4c, 0x4f, 0x41, 0x44, 0x45, 0x44, 0x10, 0x04, 0x12, 0x11, 0x0a,
+	0x0d, 0x55, 0x4e, 0x4c, 0x4f, 0x41, 0x44, 0x5f, 0x46, 0x41, 0x49, 0x4c, 0x45, 0x44, 0x10, 0x05,
+	0x12, 0x0b, 0x0a, 0x07, 0x52, 0x45, 0x4d, 0x4f, 0x56, 0x45, 0x44, 0x10, 0x06, 0x12, 0x11, 0x0a,
+	0x0d, 0x52, 0x45, 0x4d, 0x4f, 0x56, 0x45, 0x5f, 0x46, 0x41, 0x49, 0x4c, 0x45, 0x44, 0x10, 0x07,
+	0x12, 0x09, 0x0a, 0x05, 0x52, 0x53, 0x59, 0x4e, 0x43, 0x10, 0x09, 0x22, 0x14, 0x0a, 0x12, 0x4d,
+	0x6f, 0x64, 0x65, 0x6c, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73,
+	0x65, 0x22, 0xc4, 0x03, 0x0a, 0x1a, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x53, 0x63, 0x61, 0x6c, 0x69,
+	0x6e, 0x67, 0x54, 0x72, 0x69, 0x67, 0x67, 0x65, 0x72, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65,
+	0x12, 0x1e, 0x0a, 0x0a, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x4e, 0x61, 0x6d, 0x65, 0x18, 0x01,
+	0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x4e, 0x61, 0x6d, 0x65,
+	0x12, 0x1e, 0x0a, 0x0a, 0x72, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x49, 0x64, 0x78, 0x18, 0x02,
+	0x20, 0x01, 0x28, 0x0d, 0x52, 0x0a, 0x72, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x49, 0x64, 0x78,
+	0x12, 0x1c, 0x0a, 0x09, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x4e, 0x61, 0x6d, 0x65, 0x18, 0x03, 0x20,
+	0x01, 0x28, 0x09, 0x52, 0x09, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x22,
+	0x0a, 0x0c, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x04,
+	0x20, 0x01, 0x28, 0x0d, 0x52, 0x0c, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x56, 0x65, 0x72, 0x73, 0x69,
+	0x6f, 0x6e, 0x12, 0x50, 0x0a, 0x07, 0x74, 0x72, 0x69, 0x67, 0x67, 0x65, 0x72, 0x18, 0x05, 0x20,
+	0x01, 0x28, 0x0e, 0x32, 0x36, 0x2e, 0x73, 0x65, 0x6c, 0x64, 0x6f, 0x6e, 0x2e, 0x6d, 0x6c, 0x6f,
+	0x70, 0x73, 0x2e, 0x61, 0x67, 0x65, 0x6e, 0x74, 0x2e, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x53, 0x63,
+	0x61, 0x6c, 0x69, 0x6e, 0x67, 0x54, 0x72, 0x69, 0x67, 0x67, 0x65, 0x72, 0x4d, 0x65, 0x73, 0x73,
+	0x61, 0x67, 0x65, 0x2e, 0x54, 0x72, 0x69, 0x67, 0x67, 0x65, 0x72, 0x52, 0x07, 0x74, 0x72, 0x69,
+	0x67, 0x67, 0x65, 0x72, 0x12, 0x16, 0x0a, 0x06, 0x61, 0x6d, 0x6f, 0x75, 0x6e, 0x74, 0x18, 0x06,
+	0x20, 0x01, 0x28, 0x0d, 0x52, 0x06, 0x61, 0x6d, 0x6f, 0x75, 0x6e, 0x74, 0x12, 0x55, 0x0a, 0x07,
+	0x6d, 0x65, 0x74, 0x72, 0x69, 0x63, 0x73, 0x18, 0x07, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x3b, 0x2e,
 	0x73, 0x65, 0x6c, 0x64, 0x6f, 0x6e, 0x2e, 0x6d, 0x6c, 0x6f, 0x70, 0x73, 0x2e, 0x61, 0x67, 0x65,
-	0x6e, 0x74, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67,
-	0x52, 0x0d, 0x72, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x12,
-	0x44, 0x0a, 0x0c, 0x6c, 0x6f, 0x61, 0x64, 0x65, 0x64, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x73, 0x18,
-	0x05, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x20, 0x2e, 0x73, 0x65, 0x6c, 0x64, 0x6f, 0x6e, 0x2e, 0x6d,
-	0x6c, 0x6f, 0x70, 0x73, 0x2e, 0x61, 0x67, 0x65, 0x6e, 0x74, 0x2e, 0x4d, 0x6f, 0x64, 0x65, 0x6c,
-	0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x52, 0x0c, 0x6c, 0x6f, 0x61, 0x64, 0x65, 0x64, 0x4d,
-	0x6f, 0x64, 0x65, 0x6c, 0x73, 0x12, 0x32, 0x0a, 0x14, 0x61, 0x76, 0x61, 0x69, 0x6c, 0x61, 0x62,
-	0x6c, 0x65, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x42, 0x79, 0x74, 0x65, 0x73, 0x18, 0x06, 0x20,
-	0x01, 0x28, 0x04, 0x52, 0x14, 0x61, 0x76, 0x61, 0x69, 0x6c, 0x61, 0x62, 0x6c, 0x65, 0x4d, 0x65,
-	0x6d, 0x6f, 0x72, 0x79, 0x42, 0x79, 0x74, 0x65, 0x73, 0x22, 0x89, 0x02, 0x0a, 0x0d, 0x52, 0x65,
-	0x70, 0x6c, 0x69, 0x63, 0x61, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x12, 0x22, 0x0a, 0x0c, 0x69,
-	0x6e, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x63, 0x65, 0x53, 0x76, 0x63, 0x18, 0x01, 0x20, 0x01, 0x28,
-	0x09, 0x52, 0x0c, 0x69, 0x6e, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x63, 0x65, 0x53, 0x76, 0x63, 0x12,
-	0x2c, 0x0a, 0x11, 0x69, 0x6e, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x63, 0x65, 0x48, 0x74, 0x74, 0x70,
-	0x50, 0x6f, 0x72, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x11, 0x69, 0x6e, 0x66, 0x65,
-	0x72, 0x65, 0x6e, 0x63, 0x65, 0x48, 0x74, 0x74, 0x70, 0x50, 0x6f, 0x72, 0x74, 0x12, 0x2c, 0x0a,
-	0x11, 0x69, 0x6e, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x63, 0x65, 0x47, 0x72, 0x70, 0x63, 0x50, 0x6f,
-	0x72, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x52, 0x11, 0x69, 0x6e, 0x66, 0x65, 0x72, 0x65,
-	0x6e, 0x63, 0x65, 0x47, 0x72, 0x70, 0x63, 0x50, 0x6f, 0x72, 0x74, 0x12, 0x20, 0x0a, 0x0b, 0x6d,
-	0x65, 0x6d, 0x6f, 0x72, 0x79, 0x42, 0x79, 0x74, 0x65, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x04,
-	0x52, 0x0b, 0x6d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x42, 0x79, 0x74, 0x65, 0x73, 0x12, 0x22, 0x0a,
-	0x0c, 0x63, 0x61, 0x70, 0x61, 0x62, 0x69, 0x6c, 0x69, 0x74, 0x69, 0x65, 0x73, 0x18, 0x05, 0x20,
-	0x03, 0x28, 0x09, 0x52, 0x0c, 0x63, 0x61, 0x70, 0x61, 0x62, 0x69, 0x6c, 0x69, 0x74, 0x69, 0x65,
-	0x73, 0x12, 0x32, 0x0a, 0x14, 0x6f, 0x76, 0x65, 0x72, 0x43, 0x6f, 0x6d, 0x6d, 0x69, 0x74, 0x50,
-	0x65, 0x72, 0x63, 0x65, 0x6e, 0x74, 0x61, 0x67, 0x65, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0d, 0x52,
-	0x14, 0x6f, 0x76, 0x65, 0x72, 0x43, 0x6f, 0x6d, 0x6d, 0x69, 0x74, 0x50, 0x65, 0x72, 0x63, 0x65,
-	0x6e, 0x74, 0x61, 0x67, 0x65, 0x22, 0xa2, 0x02, 0x0a, 0x15, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f,
-	0x70, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x12,
-	0x51, 0x0a, 0x09, 0x6f, 0x70, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x01, 0x20, 0x01,
-	0x28, 0x0e, 0x32, 0x33, 0x2e, 0x73, 0x65, 0x6c, 0x64, 0x6f, 0x6e, 0x2e, 0x6d, 0x6c, 0x6f, 0x70,
-	0x73, 0x2e, 0x61, 0x67, 0x65, 0x6e, 0x74, 0x2e, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x65,
-	0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x2e, 0x4f, 0x70,
-	0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x09, 0x6f, 0x70, 0x65, 0x72, 0x61, 0x74, 0x69,
-	0x6f, 0x6e, 0x12, 0x44, 0x0a, 0x0c, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x56, 0x65, 0x72, 0x73, 0x69,
-	0x6f, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x20, 0x2e, 0x73, 0x65, 0x6c, 0x64, 0x6f,
-	0x6e, 0x2e, 0x6d, 0x6c, 0x6f, 0x70, 0x73, 0x2e, 0x61, 0x67, 0x65, 0x6e, 0x74, 0x2e, 0x4d, 0x6f,
-	0x64, 0x65, 0x6c, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x52, 0x0c, 0x6d, 0x6f, 0x64, 0x65,
-	0x6c, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x2e, 0x0a, 0x12, 0x61, 0x75, 0x74, 0x6f,
-	0x73, 0x63, 0x61, 0x6c, 0x69, 0x6e, 0x67, 0x45, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x18, 0x03,
-	0x20, 0x01, 0x28, 0x08, 0x52, 0x12, 0x61, 0x75, 0x74, 0x6f, 0x73, 0x63, 0x61, 0x6c, 0x69, 0x6e,
-	0x67, 0x45, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x22, 0x40, 0x0a, 0x09, 0x4f, 0x70, 0x65, 0x72,
-	0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x11, 0x0a, 0x0d, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e,
-	0x5f, 0x45, 0x56, 0x45, 0x4e, 0x54, 0x10, 0x00, 0x12, 0x0e, 0x0a, 0x0a, 0x4c, 0x4f, 0x41, 0x44,
-	0x5f, 0x4d, 0x4f, 0x44, 0x45, 0x4c, 0x10, 0x01, 0x12, 0x10, 0x0a, 0x0c, 0x55, 0x4e, 0x4c, 0x4f,
-	0x41, 0x44, 0x5f, 0x4d, 0x4f, 0x44, 0x45, 0x4c, 0x10, 0x02, 0x22, 0x5d, 0x0a, 0x0c, 0x4d, 0x6f,
-	0x64, 0x65, 0x6c, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x33, 0x0a, 0x05, 0x6d, 0x6f,
-	0x64, 0x65, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1d, 0x2e, 0x73, 0x65, 0x6c, 0x64,
-	0x6f, 0x6e, 0x2e, 0x6d, 0x6c, 0x6f, 0x70, 0x73, 0x2e, 0x73, 0x63, 0x68, 0x65, 0x64, 0x75, 0x6c,
-	0x65, 0x72, 0x2e, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x52, 0x05, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x12,
-	0x18, 0x0a, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d,
-	0x52, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x32, 0xaf, 0x03, 0x0a, 0x0c, 0x41, 0x67,
+	0x6e, 0x74, 0x2e, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x53, 0x63, 0x61, 0x6c, 0x69, 0x6e, 0x67, 0x54,
+	0x72, 0x69, 0x67, 0x67, 0x65, 0x72, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x2e, 0x4d, 0x65,
+	0x74, 0x72, 0x69, 0x63, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x07, 0x6d, 0x65, 0x74, 0x72,
+	0x69, 0x63, 0x73, 0x1a, 0x3a, 0x0a, 0x0c, 0x4d, 0x65, 0x74, 0x72, 0x69, 0x63, 0x73, 0x45, 0x6e,
+	0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09,
+	0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02,
+	0x20, 0x01, 0x28, 0x0d, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22,
+	0x27, 0x0a, 0x07, 0x54, 0x72, 0x69, 0x67, 0x67, 0x65, 0x72, 0x12, 0x0c, 0x0a, 0x08, 0x53, 0x43,
+	0x41, 0x4c, 0x45, 0x5f, 0x55, 0x50, 0x10, 0x00, 0x12, 0x0e, 0x0a, 0x0a, 0x53, 0x43, 0x41, 0x4c,
+	0x45, 0x5f, 0x44, 0x4f, 0x57, 0x4e, 0x10, 0x01, 0x22, 0x1d, 0x0a, 0x1b, 0x4d, 0x6f, 0x64, 0x65,
+	0x6c, 0x53, 0x63, 0x61, 0x6c, 0x69, 0x6e, 0x67, 0x54, 0x72, 0x69, 0x67, 0x67, 0x65, 0x72, 0x52,
+	0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x53, 0x0a, 0x11, 0x41, 0x67, 0x65, 0x6e, 0x74,
+	0x44, 0x72, 0x61, 0x69, 0x6e, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x1e, 0x0a, 0x0a,
+	0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x4e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09,
+	0x52, 0x0a, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x1e, 0x0a, 0x0a,
+	0x72, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x49, 0x64, 0x78, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d,
+	0x52, 0x0a, 0x72, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x49, 0x64, 0x78, 0x22, 0x2e, 0x0a, 0x12,
+	0x41, 0x67, 0x65, 0x6e, 0x74, 0x44, 0x72, 0x61, 0x69, 0x6e, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e,
+	0x73, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x18, 0x01, 0x20,
+	0x01, 0x28, 0x08, 0x52, 0x07, 0x73, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x22, 0xb2, 0x02, 0x0a,
+	0x15, 0x41, 0x67, 0x65, 0x6e, 0x74, 0x53, 0x75, 0x62, 0x73, 0x63, 0x72, 0x69, 0x62, 0x65, 0x52,
+	0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x1e, 0x0a, 0x0a, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72,
+	0x4e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x73, 0x65, 0x72, 0x76,
+	0x65, 0x72, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x68, 0x61, 0x72, 0x65, 0x64,
+	0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x06, 0x73, 0x68, 0x61, 0x72, 0x65, 0x64, 0x12, 0x1e,
+	0x0a, 0x0a, 0x72, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x49, 0x64, 0x78, 0x18, 0x03, 0x20, 0x01,
+	0x28, 0x0d, 0x52, 0x0a, 0x72, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x49, 0x64, 0x78, 0x12, 0x47,
+	0x0a, 0x0d, 0x72, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x18,
+	0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x21, 0x2e, 0x73, 0x65, 0x6c, 0x64, 0x6f, 0x6e, 0x2e, 0x6d,
+	0x6c, 0x6f, 0x70, 0x73, 0x2e, 0x61, 0x67, 0x65, 0x6e, 0x74, 0x2e, 0x52, 0x65, 0x70, 0x6c, 0x69,
+	0x63, 0x61, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x52, 0x0d, 0x72, 0x65, 0x70, 0x6c, 0x69, 0x63,
+	0x61, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x12, 0x44, 0x0a, 0x0c, 0x6c, 0x6f, 0x61, 0x64, 0x65,
+	0x64, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x20, 0x2e,
+	0x73, 0x65, 0x6c, 0x64, 0x6f, 0x6e, 0x2e, 0x6d, 0x6c, 0x6f, 0x70, 0x73, 0x2e, 0x61, 0x67, 0x65,
+	0x6e, 0x74, 0x2e, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x52,
+	0x0c, 0x6c, 0x6f, 0x61, 0x64, 0x65, 0x64, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x73, 0x12, 0x32, 0x0a,
+	0x14, 0x61, 0x76, 0x61, 0x69, 0x6c, 0x61, 0x62, 0x6c, 0x65, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79,
+	0x42, 0x79, 0x74, 0x65, 0x73, 0x18, 0x06, 0x20, 0x01, 0x28, 0x04, 0x52, 0x14, 0x61, 0x76, 0x61,
+	0x69, 0x6c, 0x61, 0x62, 0x6c, 0x65, 0x4d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x42, 0x79, 0x74, 0x65,
+	0x73, 0x22, 0x89, 0x02, 0x0a, 0x0d, 0x52, 0x65, 0x70, 0x6c, 0x69, 0x63, 0x61, 0x43, 0x6f, 0x6e,
+	0x66, 0x69, 0x67, 0x12, 0x22, 0x0a, 0x0c, 0x69, 0x6e, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x63, 0x65,
+	0x53, 0x76, 0x63, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x69, 0x6e, 0x66, 0x65, 0x72,
+	0x65, 0x6e, 0x63, 0x65, 0x53, 0x76, 0x63, 0x12, 0x2c, 0x0a, 0x11, 0x69, 0x6e, 0x66, 0x65, 0x72,
+	0x65, 0x6e, 0x63, 0x65, 0x48, 0x74, 0x74, 0x70, 0x50, 0x6f, 0x72, 0x74, 0x18, 0x02, 0x20, 0x01,
+	0x28, 0x05, 0x52, 0x11, 0x69, 0x6e, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x63, 0x65, 0x48, 0x74, 0x74,
+	0x70, 0x50, 0x6f, 0x72, 0x74, 0x12, 0x2c, 0x0a, 0x11, 0x69, 0x6e, 0x66, 0x65, 0x72, 0x65, 0x6e,
+	0x63, 0x65, 0x47, 0x72, 0x70, 0x63, 0x50, 0x6f, 0x72, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05,
+	0x52, 0x11, 0x69, 0x6e, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x63, 0x65, 0x47, 0x72, 0x70, 0x63, 0x50,
+	0x6f, 0x72, 0x74, 0x12, 0x20, 0x0a, 0x0b, 0x6d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x42, 0x79, 0x74,
+	0x65, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x04, 0x52, 0x0b, 0x6d, 0x65, 0x6d, 0x6f, 0x72, 0x79,
+	0x42, 0x79, 0x74, 0x65, 0x73, 0x12, 0x22, 0x0a, 0x0c, 0x63, 0x61, 0x70, 0x61, 0x62, 0x69, 0x6c,
+	0x69, 0x74, 0x69, 0x65, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0c, 0x63, 0x61, 0x70,
+	0x61, 0x62, 0x69, 0x6c, 0x69, 0x74, 0x69, 0x65, 0x73, 0x12, 0x32, 0x0a, 0x14, 0x6f, 0x76, 0x65,
+	0x72, 0x43, 0x6f, 0x6d, 0x6d, 0x69, 0x74, 0x50, 0x65, 0x72, 0x63, 0x65, 0x6e, 0x74, 0x61, 0x67,
+	0x65, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x14, 0x6f, 0x76, 0x65, 0x72, 0x43, 0x6f, 0x6d,
+	0x6d, 0x69, 0x74, 0x50, 0x65, 0x72, 0x63, 0x65, 0x6e, 0x74, 0x61, 0x67, 0x65, 0x22, 0xa2, 0x02,
+	0x0a, 0x15, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e,
+	0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x51, 0x0a, 0x09, 0x6f, 0x70, 0x65, 0x72, 0x61,
+	0x74, 0x69, 0x6f, 0x6e, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x33, 0x2e, 0x73, 0x65, 0x6c,
+	0x64, 0x6f, 0x6e, 0x2e, 0x6d, 0x6c, 0x6f, 0x70, 0x73, 0x2e, 0x61, 0x67, 0x65, 0x6e, 0x74, 0x2e,
+	0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x4f, 0x70, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x4d, 0x65,
+	0x73, 0x73, 0x61, 0x67, 0x65, 0x2e, 0x4f, 0x70, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52,
+	0x09, 0x6f, 0x70, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x44, 0x0a, 0x0c, 0x6d, 0x6f,
+	0x64, 0x65, 0x6c, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b,
+	0x32, 0x20, 0x2e, 0x73, 0x65, 0x6c, 0x64, 0x6f, 0x6e, 0x2e, 0x6d, 0x6c, 0x6f, 0x70, 0x73, 0x2e,
+	0x61, 0x67, 0x65, 0x6e, 0x74, 0x2e, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x56, 0x65, 0x72, 0x73, 0x69,
+	0x6f, 0x6e, 0x52, 0x0c, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e,
+	0x12, 0x2e, 0x0a, 0x12, 0x61, 0x75, 0x74, 0x6f, 0x73, 0x63, 0x61, 0x6c, 0x69, 0x6e, 0x67, 0x45,
+	0x6e, 0x61, 0x62, 0x6c, 0x65, 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, 0x08, 0x52, 0x12, 0x61, 0x75,
+	0x74, 0x6f, 0x73, 0x63, 0x61, 0x6c, 0x69, 0x6e, 0x67, 0x45, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x64,
+	0x22, 0x40, 0x0a, 0x09, 0x4f, 0x70, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x11, 0x0a,
+	0x0d, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x5f, 0x45, 0x56, 0x45, 0x4e, 0x54, 0x10, 0x00,
+	0x12, 0x0e, 0x0a, 0x0a, 0x4c, 0x4f, 0x41, 0x44, 0x5f, 0x4d, 0x4f, 0x44, 0x45, 0x4c, 0x10, 0x01,
+	0x12, 0x10, 0x0a, 0x0c, 0x55, 0x4e, 0x4c, 0x4f, 0x41, 0x44, 0x5f, 0x4d, 0x4f, 0x44, 0x45, 0x4c,
+	0x10, 0x02, 0x22, 0xa5, 0x01, 0x0a, 0x0c, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x56, 0x65, 0x72, 0x73,
+	0x69, 0x6f, 0x6e, 0x12, 0x33, 0x0a, 0x05, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x18, 0x01, 0x20, 0x01,
+	0x28, 0x0b, 0x32, 0x1d, 0x2e, 0x73, 0x65, 0x6c, 0x64, 0x6f, 0x6e, 0x2e, 0x6d, 0x6c, 0x6f, 0x70,
+	0x73, 0x2e, 0x73, 0x63, 0x68, 0x65, 0x64, 0x75, 0x6c, 0x65, 0x72, 0x2e, 0x4d, 0x6f, 0x64, 0x65,
+	0x6c, 0x52, 0x05, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x12, 0x18, 0x0a, 0x07, 0x76, 0x65, 0x72, 0x73,
+	0x69, 0x6f, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69,
+	0x6f, 0x6e, 0x12, 0x46, 0x0a, 0x0b, 0x72, 0x75, 0x6e, 0x74, 0x69, 0x6d, 0x65, 0x49, 0x6e, 0x66,
+	0x6f, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x24, 0x2e, 0x73, 0x65, 0x6c, 0x64, 0x6f, 0x6e,
+	0x2e, 0x6d, 0x6c, 0x6f, 0x70, 0x73, 0x2e, 0x61, 0x67, 0x65, 0x6e, 0x74, 0x2e, 0x4d, 0x6f, 0x64,
+	0x65, 0x6c, 0x52, 0x75, 0x6e, 0x74, 0x69, 0x6d, 0x65, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x0b, 0x72,
+	0x75, 0x6e, 0x74, 0x69, 0x6d, 0x65, 0x49, 0x6e, 0x66, 0x6f, 0x22, 0xb0, 0x01, 0x0a, 0x10, 0x4d,
+	0x6f, 0x64, 0x65, 0x6c, 0x52, 0x75, 0x6e, 0x74, 0x69, 0x6d, 0x65, 0x49, 0x6e, 0x66, 0x6f, 0x12,
+	0x47, 0x0a, 0x08, 0x6d, 0x6c, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28,
+	0x0b, 0x32, 0x29, 0x2e, 0x73, 0x65, 0x6c, 0x64, 0x6f, 0x6e, 0x2e, 0x6d, 0x6c, 0x6f, 0x70, 0x73,
+	0x2e, 0x61, 0x67, 0x65, 0x6e, 0x74, 0x2e, 0x4d, 0x4c, 0x53, 0x65, 0x72, 0x76, 0x65, 0x72, 0x4d,
+	0x6f, 0x64, 0x65, 0x6c, 0x53, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x48, 0x00, 0x52, 0x08,
+	0x6d, 0x6c, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x12, 0x3f, 0x0a, 0x06, 0x74, 0x72, 0x69, 0x74,
+	0x6f, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x25, 0x2e, 0x73, 0x65, 0x6c, 0x64, 0x6f,
+	0x6e, 0x2e, 0x6d, 0x6c, 0x6f, 0x70, 0x73, 0x2e, 0x61, 0x67, 0x65, 0x6e, 0x74, 0x2e, 0x54, 0x72,
+	0x69, 0x74, 0x6f, 0x6e, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x48,
+	0x00, 0x52, 0x06, 0x74, 0x72, 0x69, 0x74, 0x6f, 0x6e, 0x42, 0x12, 0x0a, 0x10, 0x6d, 0x6f, 0x64,
+	0x65, 0x6c, 0x52, 0x75, 0x6e, 0x74, 0x69, 0x6d, 0x65, 0x49, 0x6e, 0x66, 0x6f, 0x22, 0x41, 0x0a,
+	0x15, 0x4d, 0x4c, 0x53, 0x65, 0x72, 0x76, 0x65, 0x72, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x53, 0x65,
+	0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x12, 0x28, 0x0a, 0x0f, 0x70, 0x61, 0x72, 0x61, 0x6c, 0x6c,
+	0x65, 0x6c, 0x57, 0x6f, 0x72, 0x6b, 0x65, 0x72, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52,
+	0x0f, 0x70, 0x61, 0x72, 0x61, 0x6c, 0x6c, 0x65, 0x6c, 0x57, 0x6f, 0x72, 0x6b, 0x65, 0x72, 0x73,
+	0x22, 0x44, 0x0a, 0x11, 0x54, 0x72, 0x69, 0x74, 0x6f, 0x6e, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x43,
+	0x6f, 0x6e, 0x66, 0x69, 0x67, 0x12, 0x2f, 0x0a, 0x03, 0x63, 0x70, 0x75, 0x18, 0x01, 0x20, 0x03,
+	0x28, 0x0b, 0x32, 0x1d, 0x2e, 0x73, 0x65, 0x6c, 0x64, 0x6f, 0x6e, 0x2e, 0x6d, 0x6c, 0x6f, 0x70,
+	0x73, 0x2e, 0x61, 0x67, 0x65, 0x6e, 0x74, 0x2e, 0x54, 0x72, 0x69, 0x74, 0x6f, 0x6e, 0x43, 0x50,
+	0x55, 0x52, 0x03, 0x63, 0x70, 0x75, 0x22, 0x31, 0x0a, 0x09, 0x54, 0x72, 0x69, 0x74, 0x6f, 0x6e,
+	0x43, 0x50, 0x55, 0x12, 0x24, 0x0a, 0x0d, 0x69, 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x63, 0x65, 0x43,
+	0x6f, 0x75, 0x6e, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x0d, 0x69, 0x6e, 0x73, 0x74,
+	0x61, 0x6e, 0x63, 0x65, 0x43, 0x6f, 0x75, 0x6e, 0x74, 0x32, 0xaf, 0x03, 0x0a, 0x0c, 0x41, 0x67,
 	0x65, 0x6e, 0x74, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x12, 0x5d, 0x0a, 0x0a, 0x41, 0x67,
 	0x65, 0x6e, 0x74, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x12, 0x25, 0x2e, 0x73, 0x65, 0x6c, 0x64, 0x6f,
 	0x6e, 0x2e, 0x6d, 0x6c, 0x6f, 0x70, 0x73, 0x2e, 0x61, 0x67, 0x65, 0x6e, 0x74, 0x2e, 0x4d, 0x6f,
@@ -1035,7 +1305,7 @@ func file_mlops_agent_agent_proto_rawDescGZIP() []byte {
 }
 
 var file_mlops_agent_agent_proto_enumTypes = make([]protoimpl.EnumInfo, 3)
-var file_mlops_agent_agent_proto_msgTypes = make([]protoimpl.MessageInfo, 11)
+var file_mlops_agent_agent_proto_msgTypes = make([]protoimpl.MessageInfo, 15)
 var file_mlops_agent_agent_proto_goTypes = []any{
 	(ModelEventMessage_Event)(0),            // 0: seldon.mlops.agent.ModelEventMessage.Event
 	(ModelScalingTriggerMessage_Trigger)(0), // 1: seldon.mlops.agent.ModelScalingTriggerMessage.Trigger
@@ -1050,31 +1320,40 @@ var file_mlops_agent_agent_proto_goTypes = []any{
 	(*ReplicaConfig)(nil),                   // 10: seldon.mlops.agent.ReplicaConfig
 	(*ModelOperationMessage)(nil),           // 11: seldon.mlops.agent.ModelOperationMessage
 	(*ModelVersion)(nil),                    // 12: seldon.mlops.agent.ModelVersion
-	nil,                                     // 13: seldon.mlops.agent.ModelScalingTriggerMessage.MetricsEntry
-	(*scheduler.Model)(nil),                 // 14: seldon.mlops.scheduler.Model
+	(*ModelRuntimeInfo)(nil),                // 13: seldon.mlops.agent.ModelRuntimeInfo
+	(*MLServerModelSettings)(nil),           // 14: seldon.mlops.agent.MLServerModelSettings
+	(*TritonModelConfig)(nil),               // 15: seldon.mlops.agent.TritonModelConfig
+	(*TritonCPU)(nil),                       // 16: seldon.mlops.agent.TritonCPU
+	nil,                                     // 17: seldon.mlops.agent.ModelScalingTriggerMessage.MetricsEntry
+	(*scheduler.Model)(nil),                 // 18: seldon.mlops.scheduler.Model
 }
 var file_mlops_agent_agent_proto_depIdxs = []int32{
 	0,  // 0: seldon.mlops.agent.ModelEventMessage.event:type_name -> seldon.mlops.agent.ModelEventMessage.Event
-	1,  // 1: seldon.mlops.agent.ModelScalingTriggerMessage.trigger:type_name -> seldon.mlops.agent.ModelScalingTriggerMessage.Trigger
-	13, // 2: seldon.mlops.agent.ModelScalingTriggerMessage.metrics:type_name -> seldon.mlops.agent.ModelScalingTriggerMessage.MetricsEntry
-	10, // 3: seldon.mlops.agent.AgentSubscribeRequest.replicaConfig:type_name -> seldon.mlops.agent.ReplicaConfig
-	12, // 4: seldon.mlops.agent.AgentSubscribeRequest.loadedModels:type_name -> seldon.mlops.agent.ModelVersion
-	2,  // 5: seldon.mlops.agent.ModelOperationMessage.operation:type_name -> seldon.mlops.agent.ModelOperationMessage.Operation
-	12, // 6: seldon.mlops.agent.ModelOperationMessage.modelVersion:type_name -> seldon.mlops.agent.ModelVersion
-	14, // 7: seldon.mlops.agent.ModelVersion.model:type_name -> seldon.mlops.scheduler.Model
-	3,  // 8: seldon.mlops.agent.AgentService.AgentEvent:input_type -> seldon.mlops.agent.ModelEventMessage
-	9,  // 9: seldon.mlops.agent.AgentService.Subscribe:input_type -> seldon.mlops.agent.AgentSubscribeRequest
-	5,  // 10: seldon.mlops.agent.AgentService.ModelScalingTrigger:input_type -> seldon.mlops.agent.ModelScalingTriggerMessage
-	7,  // 11: seldon.mlops.agent.AgentService.AgentDrain:input_type -> seldon.mlops.agent.AgentDrainRequest
-	4,  // 12: seldon.mlops.agent.AgentService.AgentEvent:output_type -> seldon.mlops.agent.ModelEventResponse
-	11, // 13: seldon.mlops.agent.AgentService.Subscribe:output_type -> seldon.mlops.agent.ModelOperationMessage
-	6,  // 14: seldon.mlops.agent.AgentService.ModelScalingTrigger:output_type -> seldon.mlops.agent.ModelScalingTriggerResponse
-	8,  // 15: seldon.mlops.agent.AgentService.AgentDrain:output_type -> seldon.mlops.agent.AgentDrainResponse
-	12, // [12:16] is the sub-list for method output_type
-	8,  // [8:12] is the sub-list for method input_type
-	8,  // [8:8] is the sub-list for extension type_name
-	8,  // [8:8] is the sub-list for extension extendee
-	0,  // [0:8] is the sub-list for field type_name
+	13, // 1: seldon.mlops.agent.ModelEventMessage.runtimeInfo:type_name -> seldon.mlops.agent.ModelRuntimeInfo
+	1,  // 2: seldon.mlops.agent.ModelScalingTriggerMessage.trigger:type_name -> seldon.mlops.agent.ModelScalingTriggerMessage.Trigger
+	17, // 3: seldon.mlops.agent.ModelScalingTriggerMessage.metrics:type_name -> seldon.mlops.agent.ModelScalingTriggerMessage.MetricsEntry
+	10, // 4: seldon.mlops.agent.AgentSubscribeRequest.replicaConfig:type_name -> seldon.mlops.agent.ReplicaConfig
+	12, // 5: seldon.mlops.agent.AgentSubscribeRequest.loadedModels:type_name -> seldon.mlops.agent.ModelVersion
+	2,  // 6: seldon.mlops.agent.ModelOperationMessage.operation:type_name -> seldon.mlops.agent.ModelOperationMessage.Operation
+	12, // 7: seldon.mlops.agent.ModelOperationMessage.modelVersion:type_name -> seldon.mlops.agent.ModelVersion
+	18, // 8: seldon.mlops.agent.ModelVersion.model:type_name -> seldon.mlops.scheduler.Model
+	13, // 9: seldon.mlops.agent.ModelVersion.runtimeInfo:type_name -> seldon.mlops.agent.ModelRuntimeInfo
+	14, // 10: seldon.mlops.agent.ModelRuntimeInfo.mlserver:type_name -> seldon.mlops.agent.MLServerModelSettings
+	15, // 11: seldon.mlops.agent.ModelRuntimeInfo.triton:type_name -> seldon.mlops.agent.TritonModelConfig
+	16, // 12: seldon.mlops.agent.TritonModelConfig.cpu:type_name -> seldon.mlops.agent.TritonCPU
+	3,  // 13: seldon.mlops.agent.AgentService.AgentEvent:input_type -> seldon.mlops.agent.ModelEventMessage
+	9,  // 14: seldon.mlops.agent.AgentService.Subscribe:input_type -> seldon.mlops.agent.AgentSubscribeRequest
+	5,  // 15: seldon.mlops.agent.AgentService.ModelScalingTrigger:input_type -> seldon.mlops.agent.ModelScalingTriggerMessage
+	7,  // 16: seldon.mlops.agent.AgentService.AgentDrain:input_type -> seldon.mlops.agent.AgentDrainRequest
+	4,  // 17: seldon.mlops.agent.AgentService.AgentEvent:output_type -> seldon.mlops.agent.ModelEventResponse
+	11, // 18: seldon.mlops.agent.AgentService.Subscribe:output_type -> seldon.mlops.agent.ModelOperationMessage
+	6,  // 19: seldon.mlops.agent.AgentService.ModelScalingTrigger:output_type -> seldon.mlops.agent.ModelScalingTriggerResponse
+	8,  // 20: seldon.mlops.agent.AgentService.AgentDrain:output_type -> seldon.mlops.agent.AgentDrainResponse
+	17, // [17:21] is the sub-list for method output_type
+	13, // [13:17] is the sub-list for method input_type
+	13, // [13:13] is the sub-list for extension type_name
+	13, // [13:13] is the sub-list for extension extendee
+	0,  // [0:13] is the sub-list for field type_name
 }
 
 func init() { file_mlops_agent_agent_proto_init() }
@@ -1203,6 +1482,58 @@ func file_mlops_agent_agent_proto_init() {
 				return nil
 			}
 		}
+		file_mlops_agent_agent_proto_msgTypes[10].Exporter = func(v any, i int) any {
+			switch v := v.(*ModelRuntimeInfo); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+		file_mlops_agent_agent_proto_msgTypes[11].Exporter = func(v any, i int) any {
+			switch v := v.(*MLServerModelSettings); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+		file_mlops_agent_agent_proto_msgTypes[12].Exporter = func(v any, i int) any {
+			switch v := v.(*TritonModelConfig); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+		file_mlops_agent_agent_proto_msgTypes[13].Exporter = func(v any, i int) any {
+			switch v := v.(*TritonCPU); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+	}
+	file_mlops_agent_agent_proto_msgTypes[10].OneofWrappers = []any{
+		(*ModelRuntimeInfo_Mlserver)(nil),
+		(*ModelRuntimeInfo_Triton)(nil),
 	}
 	type x struct{}
 	out := protoimpl.TypeBuilder{
@@ -1210,7 +1541,7 @@ func file_mlops_agent_agent_proto_init() {
 			GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
 			RawDescriptor: file_mlops_agent_agent_proto_rawDesc,
 			NumEnums:      3,
-			NumMessages:   11,
+			NumMessages:   15,
 			NumExtensions: 0,
 			NumServices:   1,
 		},
diff --git a/apis/mlops/agent/agent.proto b/apis/mlops/agent/agent.proto
index 7b6fb12d67..088057e150 100644
--- a/apis/mlops/agent/agent.proto
+++ b/apis/mlops/agent/agent.proto
@@ -27,6 +27,7 @@ message ModelEventMessage {
   Event event = 5;
   string message = 6;
   uint64 availableMemoryBytes = 7;
+  ModelRuntimeInfo runtimeInfo = 8;
 }
 
 message ModelEventResponse {
@@ -92,8 +93,29 @@ message ModelOperationMessage {
 message ModelVersion {
   scheduler.Model model = 1;
   uint32 version = 2;
+  ModelRuntimeInfo runtimeInfo = 3;
 }
 
+message ModelRuntimeInfo {
+  oneof modelRuntimeInfo {
+    MLServerModelSettings mlserver = 1; 
+    TritonModelConfig triton = 2;
+    }
+}
+
+message MLServerModelSettings {
+  uint32 parallelWorkers = 1;
+}
+
+message TritonModelConfig {
+  repeated TritonCPU cpu = 1;
+}
+
+message TritonCPU {
+  uint32 instanceCount = 1;
+  }
+
+
 // [END Messages]
 
 // [START Services]
diff --git a/docs-gb/SUMMARY.md b/docs-gb/SUMMARY.md
index 64e4a3b2f4..e5b3ce169e 100644
--- a/docs-gb/SUMMARY.md
+++ b/docs-gb/SUMMARY.md
@@ -77,6 +77,7 @@
   * [rClone](models/rclone.md)
   * [Parameterized Models](models/parameterized-models/README.md)
   * [Pandas Query](models/parameterized-models/pandasquery.md)
+  * [Securing Endpoints](models/securing-endpoints.md)
 * [Metrics](metrics/README.md)
   * [Usage](metrics/usage.md)
   * [Operational](metrics/operational.md)
diff --git a/docs-gb/getting-started/kubernetes-installation/README.md b/docs-gb/getting-started/kubernetes-installation/README.md
index bdcbaffad9..f21d1df474 100644
--- a/docs-gb/getting-started/kubernetes-installation/README.md
+++ b/docs-gb/getting-started/kubernetes-installation/README.md
@@ -2,9 +2,29 @@
 
 ## Prerequisites
 
-* Ensure that the version of the Kubernetes cluster is v1.27 or later. Seldon Core 2 supports Kubernetes versions 1.27, 1.28, 1.29, 1.30, and 1.31. You can create a [KinD](https://kind.sigs.k8s.io/docs/user/quick-start/#installation) cluster on your local computer for testing with [Ansible](ansible.md). 
+* Ensure that the version of the Kubernetes cluster meets the requirement listed below. You can create a [KinD](https://kind.sigs.k8s.io/docs/user/quick-start/#installation) cluster on your local computer for testing with [Ansible](ansible.md). 
 * Install the ecosystem components using [Ansible](ansible.md).
 
+## Core 2 Dependencies
+
+Here is a list of components that Seldon Core 2 depends on, with minimum and maximum supported versions.
+
+| Component | Minimum Version | Maximum Version | Notes |
+| - | - | - | - |
+| Kubernetes | 1.27 | 1.31 | Required |
+| Envoy`*` | 1.32.2 | 1.32.2 | Required |
+| Rclone`*` | 1.68.2 | 1.68.2 | Required |
+| Kafka`**` | 3.4 | 3.8 | Optional |
+| Prometheus | 2.0 | 2.x | Optional |
+| Grafana | 10.0 | `***` | Optional |
+| Prometheus-adapter | 0.12 | 0.12 | Optional |
+| Opentelemetry Collector | 0.68 | `***` | Optional |
+
+`*` These components are shipped as part of Seldon Core 2 docker images set, users should not install them separately but they need to be aware of the configuration options that are supported by these versions.  
+`**` Kafka is only required to operate Seldon Core 2 dataflow Pipelines. If not required then users should not install seldon-modelgateway, seldon-pipelinegateway, and seldon-dataflow-engine.  
+`***` Not hard limit on the maximum version to be used.  
+
+
 ## Install Ecosystem Components
 
 You also need to install our ecosystem components. For this we provide directions for [Ansible](ansible.md) to install these.
@@ -22,8 +42,8 @@ You also need to install our ecosystem components. For this we provide direction
 
 To install Seldon Core 2 from the [source repository](https://github.com/SeldonIO/seldon-core), you can choose one of the following methods:
 
-* [Helm](helm.md)(recommended for production systems)
-* [Ansible](ansible.md)(recommended for testing, development, or trial)
+* [Helm](helm.md) (recommended for production systems)
+* [Ansible](ansible.md) (recommended for testing, development, or trial)
 
 The Kubernetes operator that is installed runs in namespaced mode so any resources you create
 need to be in the same namespace as you installed into.
diff --git a/docs-gb/kubernetes/hpa-rps-autoscaling.md b/docs-gb/kubernetes/hpa-rps-autoscaling.md
index 2759ac0bad..d5fd373390 100644
--- a/docs-gb/kubernetes/hpa-rps-autoscaling.md
+++ b/docs-gb/kubernetes/hpa-rps-autoscaling.md
@@ -13,6 +13,19 @@ and servers (single-model serving). This will require:
 * Configuring HPA manifests to scale Models and the corresponding Server replicas based on the
   custom metrics
 
+{% hint style="warning" %}
+The Core 2 HPA-based autoscaling has the following constraints/limitations:
+
+- HPA scaling only targets single-model serving, where there is a 1:1 correspondence between models and servers. Autoscaling for multi-model serving (MMS) is supported for specific models and workloads via the Core 2 native features described [here](autoscaling.md).
+  - Significant improvements to MMS autoscaling are planned for future releases.
+
+- **Only custom metrics** from Prometheus are supported. Native Kubernetes resource metrics such as CPU or memory are not. This limitation exists because of HPA's design: In order to prevent multiple HPA CRs from issuing conflicting scaling instructions, each HPA CR must exclusively control a set of pods which is disjoint from the pods controlled by other HPA CRs. In Seldon Core 2, CPU/memory metrics can be used to scale the number of Server replicas via HPA. However, this also means that the CPU/memory metrics from the same set of pods can no longer be used to scale the number of model replicas.
+  - We are working on improvements in Core 2 to allow both servers and models to be scaled based on a single HPA manifest, targeting the Model CR.
+
+- Each Kubernetes cluster supports only one active custom metrics provider. If your cluster already uses a custom metrics provider different from `prometheus-adapter`, it will need to be removed before being able to scale Core 2 models and servers via HPA.
+  - The Kubernetes community is actively exploring solutions for allowing multiple custom metrics providers to coexist.
+{% endhint %}
+
 ## Installing and configuring the Prometheus Adapter
 
 The role of the Prometheus Adapter is to expose queries on metrics in Prometheus as k8s custom
@@ -36,6 +49,14 @@ If you are running Prometheus on a different port than the default 9090, you can
 prometheus.port=[custom_port]` You may inspect all the options available as helm values by
 running `helm show values prometheus-community/prometheus-adapter`
 
+{% hint style="warning" %}
+Please check that the `metricsRelistInterval` helm value (default to 1m) works well in your
+setup, and update it otherwise. This value needs to be larger than or equal to your Prometheus
+scrape interval. The corresponding prometheus adapter command-line argument is
+`--metrics-relist-interval`. If the relist interval is set incorrectly, it will lead to some of
+the custom metrics being intermittently reported as missing.
+{% endhint %}
+
 We now need to configure the adapter to look for the correct prometheus metrics and compute
 per-model RPS values. On install, the adapter has created a `ConfigMap` in the same namespace as
 itself, named `[helm_release_name]-prometheus-adapter`. In our case, it will be
@@ -60,10 +81,7 @@ data:
     "rules":
     -
       "seriesQuery": |
-         {__name__=~"^seldon_model.*_total",namespace!=""}
-      "seriesFilters":
-        - "isNot": "^seldon_.*_seconds_total"
-        - "isNot": "^seldon_.*_aggregate_.*"
+         {__name__="seldon_model_infer_total",namespace!=""}
       "resources":
         "overrides":
           "model": {group: "mlops.seldon.io", resource: "model"}
@@ -71,35 +89,82 @@ data:
           "pod": {resource: "pod"}
           "namespace": {resource: "namespace"}
       "name":
-        "matches": "^seldon_model_(.*)_total"
-        "as": "${1}_rps"
+        "matches": "seldon_model_infer_total"
+        "as": "infer_rps"
       "metricsQuery": |
         sum by (<<.GroupBy>>) (
           rate (
-            <<.Series>>{<<.LabelMatchers>>}[1m]
+            <<.Series>>{<<.LabelMatchers>>}[2m]
           )
         )
 ````
 {% endcode %}
 
-In this example, a single rule is defined to fetch the `seldon_model_infer_total` metric
-from Prometheus, compute its rate over a 1 minute window, and expose this to k8s as the `infer_rps`
-metric, with aggregations at model, server, inference server pod and namespace level.
+In this example, a single rule is defined to fetch the `seldon_model_infer_total` metric from
+Prometheus, compute its per second change rate based on data within a 2 minute sliding window,
+and expose this to Kubernetes as the `infer_rps` metric, with aggregations available at model,
+server, inference server pod and namespace level.
+
+When HPA requests the `infer_rps` metric via the custom metrics API for a specific model,
+prometheus-adapter issues a Prometheus query in line with what it is defined in its config.
+
+For the configuration in our example, the query for a model named `irisa0` in namespace
+`seldon-mesh` would be:
+
+```
+sum by (model) (
+  rate (
+    seldon_model_infer_total{model="irisa0", namespace="seldon-mesh"}[2m]
+  )
+)
+```
+
+You may want to modify the query in the example to match the one that you typically use in your
+monitoring setup for RPS metrics. The example calls [`rate()`](https://prometheus.io/docs/prometheus/latest/querying/functions/#rate)
+with a 2 minute sliding window. Values scraped at the beginning and end of the 2 minute window
+before query time are used to compute the RPS.
+
+It is important to sanity-check the query by executing it against your Prometheus instance. To
+do so, pick an existing model CR in your Seldon Core 2 install, and send some inference requests
+towards it. Then, wait for a period equal to at least twice the Prometheus scrape interval
+(Prometheus default 1 minute), so that two values from the series are captured and a rate can be
+computed. Finally, you can modify the model name and namespace in the query above to match the
+model you've picked and execute the query.
+
+If the query result is empty, please adjust it until it consistently returns the expected metric
+values. Pay special attention to the window size (2 minutes in the example): if it is smaller
+than twice the Prometheus scrape interval, the query may return no results. A compromise needs
+to be reached to set the window size large enough to reject noise but also small enough to make
+the result responsive to quick changes in load.
 
-A list of all the Prometheus metrics exposed by Seldon Core 2 in relation to Models, Servers and Pipelines is available [here](../metrics/operational.md),
-and those may be used when customizing the configuration.
+Update the `metricsQuery` in the prometheus-adapter ConfigMap to match any query changes you
+have made during tests.
 
-### Understanding prometheus-adapter rule definitions
+A list of all the Prometheus metrics exposed by Seldon Core 2 in relation to Models, Servers and
+Pipelines is available [here](../metrics/operational.md), and those may be used when customizing
+the configuration.
+
+### Customizing prometheus-adapter rule definitions
 
 The rule definition can be broken down in four parts:
 
 * _Discovery_ (the `seriesQuery` and `seriesFilters` keys) controls what Prometheus
     metrics are considered for exposure via the k8s custom metrics API.
 
-  In the example, all the Seldon Prometheus metrics of the form `seldon_model_*_total` are
-  considered, excluding metrics pre-aggregated across all models (`.*_aggregate_.*`) as well as
-  the cummulative infer time per model (`.*_seconds_total`). For RPS, we are only interested in
-  the model inference count (`seldon_model_infer_total`)
+  As an alternative to the example above, all the Seldon Prometheus metrics of the form `seldon_model.*_total`
+  could be considered, followed by excluding metrics pre-aggregated across all models (`.*_aggregate_.*`) as well as
+  the cummulative infer time per model (`.*_seconds_total`):
+
+    ```yaml
+    "seriesQuery": |
+            {__name__=~"^seldon_model.*_total",namespace!=""}
+        "seriesFilters":
+            - "isNot": "^seldon_.*_seconds_total"
+            - "isNot": "^seldon_.*_aggregate_.*"
+    ...
+    ```
+
+  For RPS, we are only interested in the model inference count (`seldon_model_infer_total`)
 
 * _Association_ (the `resources` key) controls the Kubernetes resources that a particular
     metric can be attached to or aggregated over.
@@ -125,8 +190,14 @@ The rule definition can be broken down in four parts:
   `seldon_model_infer_total` and expose custom metric endpoints named `infer_rps`, which when
   called return the result of a query over the Prometheus metric.
 
-  The matching over the Prometheus metric name uses regex group capture expressions (line 22),
-  which are then be referenced in the custom metric name (line 23).
+  Instead of a literal match, one could also use regex group capture expressions,
+  which can then be referenced in the custom metric name:
+
+  ```yaml
+  "name":
+    "matches": "^seldon_model_(.*)_total"
+    "as": "${1}_rps"
+  ```
 
 * _Querying_ (the `metricsQuery` key) defines how a request for a specific k8s custom metric gets
     converted into a Prometheus query.
@@ -141,16 +212,11 @@ The rule definition can be broken down in four parts:
     - .GroupBy is replaced by the resource type of the requested metric (e.g. `model`,
     `server`, `pod` or `namespace`).
 
-  You may want to modify the query in the example to match the one that you typically use in
-  your monitoring setup for RPS metrics. The example calls [`rate()`](https://prometheus.io/docs/prometheus/latest/querying/functions/#rate)
-  with a 1 minute window.
-
 
 For a complete reference for how `prometheus-adapter` can be configured via the `ConfigMap`, please
 consult the docs [here](https://github.com/kubernetes-sigs/prometheus-adapter/blob/master/docs/config.md).
 
 
-
 Once you have applied any necessary customizations, replace the default prometheus-adapter config
 with the new one, and restart the deployment (this restart is required so that prometheus-adapter
 picks up the new config):
@@ -301,8 +367,8 @@ spec:
 ```
 {% endcode %}
 
-In the preceding HPA manifests, the scaling metric is exactly the same, and uses the exact same
-parameters. This is to ensure that both the Models and the Servers are scaled up/down at
+It is important to keep both the scaling metric and any scaling policies the same across the two
+HPA manifests. This is to ensure that both the Models and the Servers are scaled up/down at
 approximately the same time. Small variations in the scale-up time are expected because each HPA
 samples the metrics independently, at regular intervals.
 
@@ -317,9 +383,17 @@ In order to ensure similar scaling behaviour between Models and Servers, the num
 `minReplicas` and `maxReplicas`, as well as any other configured scaling policies should be kept
 in sync across the HPA for the model and the server.
 
-### Details on custom metrics of type Object
+{% hint style="danger" %}
+The Object metric allows for two target value types: `AverageValue` and `Value`. Of the two,
+only `AverageValue` is supported for the current Seldon Core 2 setup. The `Value` target type is
+typically used for metrics describing the utilization of a resource and would not be suitable
+for RPS-based scaling.
+{% endhint %}
+
+
+### HPA metrics of type Object
 
-The HPA manifests use metrics of type "Object" that fetch the data used in scaling
+The example HPA manifests use metrics of type "Object" that fetch the data used in scaling
 decisions by querying k8s metrics associated with a particular k8s object.  The endpoints that
 HPA uses for fetching those metrics are the same ones that were tested in the previous section
 using `kubectl get --raw ...`. Because you have configured the Prometheus Adapter to expose those
@@ -348,7 +422,7 @@ query template configured in our example would be transformed into:
 ```
 sum by (namespace) (
   rate (
-    seldon_model_infer_total{namespace="seldon-mesh"}[1m]
+    seldon_model_infer_total{namespace="seldon-mesh"}[2m]
   )
 )
 ```
@@ -360,25 +434,37 @@ identifying the namespace where the HPA manifest resides in.:
 ```
 sum by (pod) (
   rate (
-    seldon_model_infer_total{pod="mlserver-0", namespace="seldon-mesh"}[1m]
+    seldon_model_infer_total{pod="mlserver-0", namespace="seldon-mesh"}[2m]
   )
 )
 ```
 
-For the `target` of the Object metric you **must** use a `type` of `AverageValue`. The value
-given in `averageValue` represents the per replica RPS scaling threshold of the `scaleTargetRef`
-object (either a Model or a Server in our case), with the target number of replicas being
-computed by HPA according to the following formula:
+The `target` section establishes the thresholds used in scaling decisions. For RPS, the
+`AverageValue` target type refers to the threshold per replica RPS above which the number of the
+`scaleTargetRef` (Model or Server) replicas should be increased. The target number of replicas
+is being computed by HPA according to the following formula:
 
-$$\texttt{targetReplicas} = \frac{\texttt{infer\_rps}}{\texttt{thresholdPerReplicaRPS}}$$
+$$\texttt{targetReplicas} = \frac{\texttt{infer\_rps}}{\texttt{averageValue}}$$
+
+As an example, if `averageValue=50` and `infer_rps=150`, the `targetReplicas` would be 3.
+
+Importantly, computing the target number of replicas does not require knowing the number of
+active pods currently associated with the Server or Model. This is what allows both the Model
+and the Server to be targeted by two separate HPA manifests. Otherwise, both HPA CRs would
+attempt to take ownership of the same set of pods, and transition into a failure state.
+
+This is also why the `Value` target type is **not currently supported**. In this case, HPA first
+computes an `utilizationRatio`:
+
+$$\texttt{utilizationRatio} = \frac{\texttt{custom\_metric\_value}}{\texttt{threshold\_value}}$$
+
+As an example, if `threshold_value=100` and `custom_metric_value=200`, the `utilizationRatio`
+would be 2. HPA deduces from this that the number of active pods associated with the
+`scaleTargetRef` object should be doubled, and expects that once that target is achieved, the
+`custom_metric_value` will become equal to the `threshold_value` (`utilizationRatio=1`). However,
+by using the number of active pods, the HPA CRs for both the Model and the Server also try to
+take exclusive ownership of the same set of pods, and fail.
 
-{% hint style="info" %}
-**Note**: Attempting other target types does not work under the current Seldon Core 2 setup, because they
-use the number of active Pods associated with the Model CR (i.e. the associated Server pods) in
-the `targetReplicas` computation. However, this also means that this set of pods becomes "owned"
-by the Model HPA. Once a pod is owned by a given HPA it is not available for other HPAs to use,
-so we would no longer be able to scale the Server CRs using HPA.
-{% endhint %}
 
 ### HPA sampling of custom metrics
 
@@ -421,7 +507,8 @@ inspecting the corresponding Server HPA CR, or by fetching the metric directly v
 
 *   Filtering metrics by additional labels on the prometheus metric:
 
-    The prometheus metric from which the model RPS is computed has the following labels:
+    The prometheus metric from which the model RPS is computed has the following labels managed
+    by Seldon Core 2:
 
     ```c-like
     seldon_model_infer_total{
@@ -440,9 +527,11 @@ inspecting the corresponding Server HPA CR, or by fetching the metric directly v
     }
     ```
 
-    If you want the scaling metric to be computed based on inferences with a particular value
-    for any of those labels, you can add this in the HPA metric config, as in the example
-    (targeting `method_type="rest"`):
+    If you want the scaling metric to be computed based on a subset of the Prometheus time
+    series with particular label values (labels either managed by Seldon Core 2 or added
+    automatically within your infrastructure), you can add this as a selector the HPA metric
+    config. This is shown in the following example, which scales only based on the RPS of REST
+    requests as opposed to REST + gRPC:
 
     ```yaml
       metrics:
@@ -461,6 +550,7 @@ inspecting the corresponding Server HPA CR, or by fetching the metric directly v
     	    type: AverageValue
             averageValue: "3"
     ```
+
 *   Customize scale-up / scale-down rate & properties by using scaling policies as described in
     the [HPA scaling policies docs](https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/#configurable-scaling-behavior)
 
@@ -592,8 +682,7 @@ into account when setting the HPA policies.
       within the set `periodSeconds`) is not recommended because of this.
     - Perhaps more importantly, there is no reason to scale faster than the time it takes for
       replicas to become available - this is the true maximum rate with which scaling up can
-      happen anyway. Because the underlying Server replica pods are part of a stateful set, they
-      are created sequentially by k8s.
+      happen anyway.
 
 {% code title="hpa-custom-policy.yaml" lineNumbers="true" %}
 ```yaml
diff --git a/docs-gb/kubernetes/service-meshes/istio.md b/docs-gb/kubernetes/service-meshes/istio.md
index d1fbd42a15..4a16db5b5b 100644
--- a/docs-gb/kubernetes/service-meshes/istio.md
+++ b/docs-gb/kubernetes/service-meshes/istio.md
@@ -49,7 +49,7 @@ spec:
       privateKey: /etc/istio/ingressgateway-certs/tls.key
       serverCertificate: /etc/istio/ingressgateway-certs/tls.crt
 ---
-apiVersion: networking.istio.io/v1beta1
+apiVersion: networking.istio.io/v1alpha3
 kind: VirtualService
 metadata:
   name: iris-route
diff --git a/docs-gb/models/securing-endpoints.md b/docs-gb/models/securing-endpoints.md
new file mode 100644
index 0000000000..debce9b854
--- /dev/null
+++ b/docs-gb/models/securing-endpoints.md
@@ -0,0 +1,110 @@
+# Securing model endpoints
+
+In enterprise use cases, you may need to control who can access the endpoints for deployed models or pipelines. You can leverage existing authentication mechanisms in your cluster or environment, such as service mesh-level controls, or use cloud provider solutions like Apigee on GCP, Amazon API Gateway on AWS, or a provider-agnostic gateway like Gravitee. Seldon Core 2 integrates with various [service meshes](../kubernetes/service-meshes/) that support these requirements. Though Seldon Core 2 is service-mesh agnostic, the example on this page demonstrates how to set up authentication and authorization to secure a model endpoint using the Istio service mesh.
+
+## Securing Endpoints with Istio
+
+Service meshes offer a flexible way of defining authentication and authorization rules for your models. With Istio, for example, you can configure multiple layers of security within an Istio Gateway, such as a [TLS for HTTPS at the gateway](https://istio.io/latest/docs/tasks/traffic-management/ingress/secure-ingress/#configure-a-tls-ingress-gateway-for-a-single-host) level, [mutual TLS (mTLS) for secure internal communication](https://istio.io/latest/docs/tasks/traffic-management/ingress/secure-ingress/#configure-a-mutual-tls-ingress-gateway), as well as [AuthorizationPolicies](https://istio.io/latest/docs/reference/config/security/authorization-policy/) and [RequestAuthentication](https://istio.io/latest/docs/reference/config/security/request_authentication/) policies to enforce both authentication and authorization controls.
+
+**Prerequisites**
+* [Deploy a model](../kubernetes/service-meshes/istio.md)
+* [Configure a gateway](../kubernetes/service-meshes/istio.md)
+* [Create a virtual service to expose the REST and gRPC endpoints](../kubernetes/service-meshes/istio.md)
+* Configure a OIDC provider to authenticate. Obtain the `issuer` url, `jwksUri`, and the `Access token` from the OIDC provider.
+{% hint style="info" %}
+**Note** There are many types of authorization policies that you can configure to enable access control on workloads in the mesh. 
+{% endhint %}
+
+In the following example, you can secure the endpoint such that any requests to the endpoint without the access token are denied.
+
+To secure the endpoints of a model, you need to:
+1. Create a `RequestAuthentication` resource named `ingress-jwt-auth` in the `istio-system namespace`. Replace `<OIDC_TOKEN_ISSUER>` and `<OIDC_TOKEN_ISSUER_JWKS>` with your OIDC provider’s specific issuer URL and JWKS (JSON Web Key Set) URI.
+   
+```yaml
+apiVersion: security.istio.io/v1beta1
+kind: RequestAuthentication
+metadata:
+  name: ingress-jwt-auth
+  namespace: istio-system  # This is the namespace where Istio Ingress Gateway usually resides
+spec:
+  selector:
+    matchLabels:
+      istio: istio-ingressgateway  # Apply to Istio Ingress Gateway pods
+  jwtRules:
+    - issuer: <OIDC_TOKEN_ISSUER>
+      jwksUri: <OIDC_TOKEN_ISSUER_JWKS>
+```
+Create the resource using `kubectl apply -f ingress-jwt-auth.yaml`.
+
+2. Create an authorization policy `deny-empty-jwt` in the namespace `istio-system`.
+ 
+```yaml
+apiVersion: security.istio.io/v1beta1
+kind: AuthorizationPolicy
+metadata:
+  name: deny-empty-jwt
+  namespace: istio-system
+spec:
+  action: DENY
+  rules:
+    - from:
+        - source:
+            notRequestPrincipals:
+              - '*'  # Denies requests without a valid JWT principal
+      to:
+        - operation:
+            paths:
+              - /v2/*  # Applies to requests with this path pattern
+  selector:
+    matchLabels:
+      app: istio-ingressgateway  # Applies to Istio Ingress Gateway pods
+```
+Create the resource using `kubectl apply -f deny-empty-jwt.yaml`.
+
+3. To verify that the requests without an access token are denied send this request:
+   ```bash
+    curl -i http://$MESH_IP/v2/models/iris/infer \
+    -H "Content-Type: application/json" \
+    -H "seldon-model":iris \
+    -d '{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}'
+    ``` 
+  The output is similar to:
+  ```bash
+  HTTP/1.1 403 Forbidden
+  content-length: 19
+  content-type: text/plain  
+  date: Fri, 25 Oct 2024 11:14:33 GMT
+  server: istio-envoy
+  connection: close
+  Closing connection 0
+  RBAC: access denied
+  ```
+  Now, send the same request with an access token:
+  ```bash
+  curl -i http://$MESH_IP/v2/models/iris/infer \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer $ACCESS_TOKEN" \
+  -H "seldon-model":iris \
+  -d '{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}'
+  ```
+  The output is similar to:
+  ```bash
+  HTTP/1.1 200 OK
+  ce-endpoint: iris_1
+  ce-id: 2fb8a086-ee22-4285-9826-9d38111cbb9e
+  ce-inferenceservicename: mlserver
+  ce-modelid: iris_1
+  ce-namespace: seldon-mesh
+  ce-requestid: 2fb8a086-ee22-4285-9826-9d38111cbb9e
+  ce-source: io.seldon.serving.deployment.mlserver.seldon-mesh
+  ce-specversion: 0.3
+  ce-type: io.seldon.serving.inference.response
+  content-length: 213
+  content-type: application/json
+  date: Fri, 25 Oct 2024 11:44:49 GMT
+  server: envoy
+  x-request-id: csdo9cbc2nks73dtlk3g
+  x-envoy-upstream-service-time: 9
+  x-seldon-route: :iris_1:
+  ```
+
diff --git a/docs-gb/upgrading.md b/docs-gb/upgrading.md
index c8523b1435..55e3872113 100644
--- a/docs-gb/upgrading.md
+++ b/docs-gb/upgrading.md
@@ -1,5 +1,14 @@
 # Upgrading
 
+## Upgrading from 2.7 - 2.8
+
+Core 2.8 introduces several new fields in our CRDs:
+* `statefulSetPersistentVolumeClaimRetentionPolicy` enables users to configure the cleaning of PVC on their **servers**. This field is set to **retain** as default.
+* `Status.selector` was introduced as a mandatory field for **models** in 2.8.4 and made optional in 2.8.5. This field enables autoscaling with HPA.
+* `PodSpec` in the `OverrideSpec` for **SeldonRuntimes** enables users to customize how Seldon Core 2 pods are created. In particular, this also allows for setting custom taints/tolerations, adding additional containers to our pods, configuring custom security settings.
+
+These added fields do not result in breaking changes, apart from 2.8.4 which required the setting of the `Status.selector` upon upgrading. This field was however changed to optional in the subsequent 2.8.5 release. Updating the CRDs (e.g. via helm) will enable users to benefit from the associated functionality.
+
 ## Upgrading from 2.6 - 2.7
 
 All pods provisioned through the operator i.e. `SeldonRuntime` and `Server` resources now have the
diff --git a/hodometer/go.mod b/hodometer/go.mod
index b115beaf40..35cf1066e2 100644
--- a/hodometer/go.mod
+++ b/hodometer/go.mod
@@ -13,7 +13,7 @@ require (
 	github.com/seldonio/seldon-core/components/tls/v2 v2.0.0-00010101000000-000000000000
 	github.com/sirupsen/logrus v1.9.3
 	github.com/stretchr/testify v1.9.0
-	google.golang.org/grpc v1.68.0
+	google.golang.org/grpc v1.68.1
 	k8s.io/apimachinery v0.29.2
 	k8s.io/client-go v0.29.2
 )
diff --git a/hodometer/go.sum b/hodometer/go.sum
index 3b6a7d4315..6fe5effc8e 100644
--- a/hodometer/go.sum
+++ b/hodometer/go.sum
@@ -239,8 +239,8 @@ google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyac
 google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY=
 google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
 google.golang.org/grpc v1.29.1/go.mod h1:itym6AZVZYACWQqET3MqgPpjcuV5QH3BxFS3IjizoKk=
-google.golang.org/grpc v1.68.0 h1:aHQeeJbo8zAkAa3pRzrVjZlbz6uSfeOXlJNQM0RAbz0=
-google.golang.org/grpc v1.68.0/go.mod h1:fmSPC5AsjSBCK54MyHRx48kpOti1/jRfOlwEWywNjWA=
+google.golang.org/grpc v1.68.1 h1:oI5oTa11+ng8r8XMMN7jAOmWfPZWbYpCFaMUTACxkM0=
+google.golang.org/grpc v1.68.1/go.mod h1:+q1XYFJjShcqn0QZHvCyeR4CXPA+llXIeUIfIe00waw=
 google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg=
 google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
diff --git a/operator/cmd/seldon/cli/pipeline_inspect.go b/operator/cmd/seldon/cli/pipeline_inspect.go
index 791e1589f0..f1ec63e9b2 100644
--- a/operator/cmd/seldon/cli/pipeline_inspect.go
+++ b/operator/cmd/seldon/cli/pipeline_inspect.go
@@ -11,6 +11,7 @@ package cli
 
 import (
 	"fmt"
+	"time"
 
 	"github.com/spf13/cobra"
 	"k8s.io/utils/env"
@@ -24,7 +25,7 @@ const (
 	flagOutputFormat   = "format"
 	flagTruncate       = "truncate"
 	flagNamespace      = "namespace"
-	flagTimeoutDefault = int64(60)
+	flagTimeoutDefault = int64(5)
 )
 
 func createPipelineInspect() *cobra.Command {
@@ -74,12 +75,16 @@ func createPipelineInspect() *cobra.Command {
 			if err != nil {
 				return err
 			}
+			timeoutSecs, err := flags.GetInt64(flagTimeout)
+			if err != nil {
+				return err
+			}
 			kc, err := cli.NewKafkaClient(kafkaBroker, kafkaBrokerIsSet, schedulerHost, schedulerHostIsSet, kafkaConfigPath)
 			if err != nil {
 				return err
 			}
 			data := []byte(args[0])
-			err = kc.InspectStep(string(data), offset, requestId, format, verbose, truncateData, namespace)
+			err = kc.InspectStep(string(data), offset, requestId, format, verbose, truncateData, namespace, time.Duration(timeoutSecs)*time.Second)
 			return err
 		},
 	}
@@ -94,5 +99,6 @@ func createPipelineInspect() *cobra.Command {
 	flags.BoolP(flagVerbose, "v", false, "display more details, such as headers")
 	flags.BoolP(flagTruncate, "t", false, "truncate data")
 	flags.String(flagKafkaConfigPath, env.GetString(envKafkaConfigPath, ""), "path to kafka config file")
+	flags.Int64P(flagTimeout, "d", flagTimeoutDefault, "timeout seconds for kafka operations")
 	return cmd
 }
diff --git a/operator/controllers/mlops/experiment_controller.go b/operator/controllers/mlops/experiment_controller.go
index 62a2c6c71a..99ef595679 100644
--- a/operator/controllers/mlops/experiment_controller.go
+++ b/operator/controllers/mlops/experiment_controller.go
@@ -44,7 +44,7 @@ func (r *ExperimentReconciler) handleFinalizer(ctx context.Context, logger logr.
 		// Add our finalizer
 		if !utils.ContainsStr(experiment.ObjectMeta.Finalizers, constants.ExperimentFinalizerName) {
 			experiment.ObjectMeta.Finalizers = append(experiment.ObjectMeta.Finalizers, constants.ExperimentFinalizerName)
-			if err := r.Update(context.Background(), experiment); err != nil {
+			if err := r.Update(ctx, experiment); err != nil {
 				return true, err
 			}
 		}
@@ -84,6 +84,8 @@ func (r *ExperimentReconciler) handleFinalizer(ctx context.Context, logger logr.
 // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.10.0/pkg/reconcile
 func (r *ExperimentReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
 	logger := log.FromContext(ctx).WithName("Reconcile")
+	ctx, cancel := context.WithTimeout(ctx, constants.ReconcileTimeout)
+	defer cancel()
 
 	experiment := &mlopsv1alpha1.Experiment{}
 	if err := r.Get(ctx, req.NamespacedName, experiment); err != nil {
diff --git a/operator/controllers/mlops/model_controller.go b/operator/controllers/mlops/model_controller.go
index 4f39838425..1b3245ef0c 100644
--- a/operator/controllers/mlops/model_controller.go
+++ b/operator/controllers/mlops/model_controller.go
@@ -46,7 +46,7 @@ func (r *ModelReconciler) handleFinalizer(ctx context.Context, logger logr.Logge
 		// Add our finalizer
 		if !utils.ContainsStr(model.ObjectMeta.Finalizers, constants.ModelFinalizerName) {
 			model.ObjectMeta.Finalizers = append(model.ObjectMeta.Finalizers, constants.ModelFinalizerName)
-			if err := r.Update(context.Background(), model); err != nil {
+			if err := r.Update(ctx, model); err != nil {
 				return true, err
 			}
 		}
@@ -78,6 +78,8 @@ func (r *ModelReconciler) handleFinalizer(ctx context.Context, logger logr.Logge
 
 func (r *ModelReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
 	logger := log.FromContext(ctx).WithName("Reconcile")
+	ctx, cancel := context.WithTimeout(ctx, constants.ReconcileTimeout)
+	defer cancel()
 
 	model := &mlopsv1alpha1.Model{}
 	if err := r.Get(ctx, req.NamespacedName, model); err != nil {
diff --git a/operator/controllers/mlops/pipeline_controller.go b/operator/controllers/mlops/pipeline_controller.go
index 7e3bcc477d..0fc2fb20d2 100644
--- a/operator/controllers/mlops/pipeline_controller.go
+++ b/operator/controllers/mlops/pipeline_controller.go
@@ -49,7 +49,7 @@ func (r *PipelineReconciler) handleFinalizer(
 		// Add our finalizer
 		if !utils.ContainsStr(pipeline.ObjectMeta.Finalizers, constants.PipelineFinalizerName) {
 			pipeline.ObjectMeta.Finalizers = append(pipeline.ObjectMeta.Finalizers, constants.PipelineFinalizerName)
-			if err := r.Update(context.Background(), pipeline); err != nil {
+			if err := r.Update(ctx, pipeline); err != nil {
 				return true, err
 			}
 		}
@@ -94,6 +94,8 @@ func (r *PipelineReconciler) handleFinalizer(
 // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.10.0/pkg/reconcile
 func (r *PipelineReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
 	logger := log.FromContext(ctx).WithName("Reconcile")
+	ctx, cancel := context.WithTimeout(ctx, constants.ReconcileTimeout)
+	defer cancel()
 
 	pipeline := &mlopsv1alpha1.Pipeline{}
 	if err := r.Get(ctx, req.NamespacedName, pipeline); err != nil {
diff --git a/operator/controllers/mlops/seldonruntime_controller.go b/operator/controllers/mlops/seldonruntime_controller.go
index 837c55af60..3e39c0874c 100644
--- a/operator/controllers/mlops/seldonruntime_controller.go
+++ b/operator/controllers/mlops/seldonruntime_controller.go
@@ -65,7 +65,7 @@ func (r *SeldonRuntimeReconciler) handleFinalizer(ctx context.Context, logger lo
 		// Add our finalizer
 		if !utils.ContainsStr(runtime.ObjectMeta.Finalizers, constants.RuntimeFinalizerName) {
 			runtime.ObjectMeta.Finalizers = append(runtime.ObjectMeta.Finalizers, constants.RuntimeFinalizerName)
-			if err := r.Update(context.Background(), runtime); err != nil {
+			if err := r.Update(ctx, runtime); err != nil {
 				return true, err
 			}
 		}
@@ -120,6 +120,8 @@ func (r *SeldonRuntimeReconciler) handleFinalizer(ctx context.Context, logger lo
 // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.17.4/pkg/reconcile
 func (r *SeldonRuntimeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
 	logger := log.FromContext(ctx).WithName("Reconcile")
+	ctx, cancel := context.WithTimeout(ctx, constants.ReconcileTimeout)
+	defer cancel()
 
 	seldonRuntime := &mlopsv1alpha1.SeldonRuntime{}
 	if err := r.Get(ctx, req.NamespacedName, seldonRuntime); err != nil {
@@ -214,9 +216,11 @@ func (r *SeldonRuntimeReconciler) updateStatus(seldonRuntime *mlopsv1alpha1.Seld
 // Find SeldonRuntimes that reference the changes SeldonConfig
 // TODO: pass an actual context from the caller to be used here
 func (r *SeldonRuntimeReconciler) mapSeldonRuntimesFromSeldonConfig(_ context.Context, obj client.Object) []reconcile.Request {
-	logger := log.FromContext(context.Background()).WithName("mapSeldonRuntimesFromSeldonConfig")
+	ctx, cancel := context.WithTimeout(context.Background(), constants.K8sAPICallsTxTimeout)
+	defer cancel()
+	logger := log.FromContext(ctx).WithName("mapSeldonRuntimesFromSeldonConfig")
 	var seldonRuntimes mlopsv1alpha1.SeldonRuntimeList
-	if err := r.Client.List(context.Background(), &seldonRuntimes); err != nil {
+	if err := r.Client.List(ctx, &seldonRuntimes); err != nil {
 		logger.Error(err, "error listing seldonRuntimes")
 		return nil
 	}
diff --git a/operator/controllers/mlops/server_controller.go b/operator/controllers/mlops/server_controller.go
index e7b3d95741..f6b6353bd1 100644
--- a/operator/controllers/mlops/server_controller.go
+++ b/operator/controllers/mlops/server_controller.go
@@ -33,6 +33,7 @@ import (
 	mlopsv1alpha1 "github.com/seldonio/seldon-core/operator/v2/apis/mlops/v1alpha1"
 	"github.com/seldonio/seldon-core/operator/v2/controllers/reconcilers/common"
 	serverreconcile "github.com/seldonio/seldon-core/operator/v2/controllers/reconcilers/server"
+	"github.com/seldonio/seldon-core/operator/v2/pkg/constants"
 	scheduler "github.com/seldonio/seldon-core/operator/v2/scheduler"
 )
 
@@ -65,6 +66,8 @@ type ServerReconciler struct {
 // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.10.0/pkg/reconcile
 func (r *ServerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
 	logger := log.FromContext(ctx).WithName("Reconcile")
+	ctx, cancel := context.WithTimeout(ctx, constants.ReconcileTimeout)
+	defer cancel()
 
 	logger.Info("Received reconcile for Server", "name", req.Name, "namespace", req.NamespacedName.Namespace)
 
@@ -186,9 +189,11 @@ func (r *ServerReconciler) updateStatus(server *mlopsv1alpha1.Server) error {
 // Find Servers that need reconcilliation from a change to a given ServerConfig
 // TODO: pass an actual context from the caller to be used here
 func (r *ServerReconciler) mapServerFromServerConfig(_ context.Context, obj client.Object) []reconcile.Request {
-	logger := log.FromContext(context.Background()).WithName("mapServerFromServerConfig")
+	ctx, cancel := context.WithTimeout(context.Background(), constants.K8sAPICallsTxTimeout)
+	defer cancel()
+	logger := log.FromContext(ctx).WithName("mapServerFromServerConfig")
 	var servers mlopsv1alpha1.ServerList
-	if err := r.Client.List(context.Background(), &servers); err != nil {
+	if err := r.Client.List(ctx, &servers); err != nil {
 		logger.Error(err, "error listing servers")
 		return nil
 	}
diff --git a/operator/main.go b/operator/main.go
index 6e4586b1b8..82963484bd 100644
--- a/operator/main.go
+++ b/operator/main.go
@@ -12,6 +12,7 @@ package main
 import (
 	"flag"
 	"os"
+	"time"
 
 	//+kubebuilder:scaffold:imports
 	"go.uber.org/zap/zapcore"
@@ -43,6 +44,10 @@ func init() {
 	//+kubebuilder:scaffold:scheme
 }
 
+const (
+	defaultReconcileTimeout = 2 * time.Minute
+)
+
 func main() {
 	var metricsAddr string
 	var enableLeaderElection bool
diff --git a/operator/pkg/cli/kafka.go b/operator/pkg/cli/kafka.go
index 9324711fce..1ae1f9abc1 100644
--- a/operator/pkg/cli/kafka.go
+++ b/operator/pkg/cli/kafka.go
@@ -30,7 +30,6 @@ const (
 	OutputsSpecifier         = "outputs"
 	PipelineSpecifier        = "pipeline"
 	ModelSpecifier           = "model"
-	KafkaTimeoutSeconds      = 2
 	DefaultNamespace         = "default"
 	DefaultMaxMessageSize    = 1000000000
 )
@@ -222,7 +221,9 @@ func getPipelineNameFromHeaders(headers []kafka.Header) (string, error) {
 	return "", fmt.Errorf("No pipeline found in headers.")
 }
 
-func (kc *KafkaClient) InspectStep(pipelineStep string, offset int64, key string, format string, verbose bool, truncateData bool, namespace string) error {
+func (kc *KafkaClient) InspectStep(
+	pipelineStep string, offset int64, key string, format string, verbose bool, truncateData bool, namespace string, timeout time.Duration,
+) error {
 	defer kc.consumer.Close()
 	if namespace == "" {
 		namespace = kc.namespace
@@ -238,7 +239,7 @@ func (kc *KafkaClient) InspectStep(pipelineStep string, offset int64, key string
 
 	ki := KafkaInspect{}
 	for _, topic := range pipelineTopics.topics {
-		kit, err := kc.createInspectTopic(topic, pipelineTopics.pipeline, pipelineTopics.tensor, offset, key, verbose, truncateData)
+		kit, err := kc.createInspectTopic(topic, pipelineTopics.pipeline, pipelineTopics.tensor, offset, key, verbose, truncateData, timeout)
 		if err != nil {
 			return err
 		}
@@ -253,24 +254,28 @@ func (kc *KafkaClient) InspectStep(pipelineStep string, offset int64, key string
 		fmt.Printf("%s\n", string(b))
 	} else {
 		for _, topic := range ki.Topics {
+			fmt.Printf("Topic: %s\n", topic.Name)
 			for _, msg := range topic.Msgs {
 				if verbose {
-					fmt.Printf("%s\t%s\t%s\t", topic.Name, msg.Key, msg.Value)
+					fmt.Printf("%s\t%s\t", msg.Key, msg.Value)
 					for k, v := range msg.Headers {
 						fmt.Printf("\t%s=%s", k, v)
 					}
 					fmt.Println("")
 				} else {
-					fmt.Printf("%s\t%s\t%s\n", topic.Name, msg.Key, msg.Value)
+					fmt.Printf("%s\t%s\n", msg.Key, msg.Value)
 				}
 			}
+			fmt.Print("----------------\n")
 		}
 	}
 
 	return nil
 }
 
-func (kc *KafkaClient) createInspectTopic(topic string, pipeline string, tensor string, offset int64, key string, verbose bool, truncateData bool) (*KafkaInspectTopic, error) {
+func (kc *KafkaClient) createInspectTopic(
+	topic string, pipeline string, tensor string, offset int64, key string, verbose bool, truncateData bool, timeout time.Duration,
+) (*KafkaInspectTopic, error) {
 	kit := KafkaInspectTopic{
 		Name: topic,
 	}
@@ -279,7 +284,7 @@ func (kc *KafkaClient) createInspectTopic(topic string, pipeline string, tensor
 		return nil, err
 	}
 
-	ctx, cancel := context.WithTimeout(context.Background(), KafkaTimeoutSeconds*time.Second)
+	ctx, cancel := context.WithTimeout(context.Background(), timeout)
 	defer cancel()
 
 	run := true
diff --git a/operator/pkg/constants/constants.go b/operator/pkg/constants/constants.go
index 1c52c078c7..6372b394a1 100644
--- a/operator/pkg/constants/constants.go
+++ b/operator/pkg/constants/constants.go
@@ -9,7 +9,10 @@ the Change License after the Change Date as each is defined in accordance with t
 
 package constants
 
-import "os"
+import (
+	"os"
+	"time"
+)
 
 const (
 	// note: we do not have a finalizer for servers as we rely on the draining logic to reschedule models
@@ -50,3 +53,13 @@ const (
 	ReconcileUpdateNeeded
 	ReconcileCreateNeeded
 )
+
+// k8s api call timeout
+const (
+	// this is a constant that can be used to set the timeout for k8s api calls
+	// currently it can be used for a series of calls in a single logical operation
+	// which is expected to be completed in this amount of time (as opposed to a single call)
+	K8sAPICallsTxTimeout    = 2 * time.Minute
+	ControlPlaneExecTimeOut = 5 * time.Minute
+	ReconcileTimeout        = 5 * time.Minute
+)
diff --git a/operator/scheduler/control_plane.go b/operator/scheduler/control_plane.go
index 9f0d1dfd8d..65466c580f 100644
--- a/operator/scheduler/control_plane.go
+++ b/operator/scheduler/control_plane.go
@@ -19,10 +19,8 @@ import (
 	"google.golang.org/grpc/status"
 
 	"github.com/seldonio/seldon-core/apis/go/v2/mlops/scheduler"
-)
 
-const (
-	execTimeOut = 5 * time.Minute
+	"github.com/seldonio/seldon-core/operator/v2/pkg/constants"
 )
 
 func (s *SchedulerClient) SubscribeControlPlaneEvents(ctx context.Context, grpcClient scheduler.SchedulerClient, namespace string) error {
@@ -49,10 +47,12 @@ func (s *SchedulerClient) SubscribeControlPlaneEvents(ctx context.Context, grpcC
 		}
 		logger.Info("Received event to handle state", "event", event)
 
-		fn := func() error {
+		fn := func(ctx context.Context) error {
 			return s.handleStateOnReconnect(ctx, grpcClient, namespace, event.GetEvent())
 		}
-		_, err = execWithTimeout(fn, execTimeOut)
+		// in general we could have also handled timeout via a context with timeout
+		// but we want to handle the timeout in a more controlled way and not depending on the other side
+		_, err = execWithTimeout(ctx, fn, constants.ControlPlaneExecTimeOut)
 		if err != nil {
 			logger.Error(err, "Failed to handle state on reconnect")
 			return err
@@ -64,10 +64,14 @@ func (s *SchedulerClient) SubscribeControlPlaneEvents(ctx context.Context, grpcC
 	return nil
 }
 
-func execWithTimeout(f func() error, d time.Duration) (bool, error) {
+func execWithTimeout(baseContext context.Context, f func(ctx context.Context) error, d time.Duration) (bool, error) {
+	// cancel the context after the timeout
+	ctxWithCancel, cancel := context.WithCancel(baseContext)
+	defer cancel()
+
 	errChan := make(chan error, 1)
 	go func() {
-		errChan <- f()
+		errChan <- f(ctxWithCancel)
 		close(errChan)
 	}()
 	t := time.NewTimer(d)
diff --git a/operator/scheduler/control_plane_test.go b/operator/scheduler/control_plane_test.go
index 3bdd535f55..7792ff6997 100644
--- a/operator/scheduler/control_plane_test.go
+++ b/operator/scheduler/control_plane_test.go
@@ -67,7 +67,7 @@ func TestSendWithTimeout(t *testing.T) {
 
 	for _, test := range tests {
 		t.Run(test.name, func(t *testing.T) {
-			hasExpired, err := execWithTimeout(func() error {
+			hasExpired, err := execWithTimeout(context.Background(), func(_ context.Context) error {
 				return fn(test.err)
 			}, test.sleepTime)
 			g.Expect(hasExpired).To(Equal(test.isExpired))
diff --git a/operator/scheduler/experiment.go b/operator/scheduler/experiment.go
index 50be2e6cf8..727794a28d 100644
--- a/operator/scheduler/experiment.go
+++ b/operator/scheduler/experiment.go
@@ -102,79 +102,79 @@ func (s *SchedulerClient) SubscribeExperimentEvents(ctx context.Context, grpcCli
 			logger.Info("Received experiment event with no k8s metadata so ignoring", "Experiment", event.ExperimentName)
 			continue
 		}
-		experiment := &v1alpha1.Experiment{}
-		err = s.Get(ctx, client.ObjectKey{Name: event.ExperimentName, Namespace: event.KubernetesMeta.Namespace}, experiment)
-		if err != nil {
-			logger.Error(err, "Failed to get experiment", "name", event.ExperimentName, "namespace", event.KubernetesMeta.Namespace)
-			continue
-		}
 
-		if !experiment.ObjectMeta.DeletionTimestamp.IsZero() {
-			logger.Info("Experiment is pending deletion", "experiment", experiment.Name)
-			if !event.Active {
-				retryErr := retry.RetryOnConflict(retry.DefaultRetry, func() error {
-					latestExperiment := &v1alpha1.Experiment{}
-					err = s.Get(ctx, client.ObjectKey{Name: event.ExperimentName, Namespace: event.KubernetesMeta.Namespace}, latestExperiment)
-					if err != nil {
+		// An experiment is not active if it is being deleted or some models are not ready
+		if !event.Active {
+			retryErr := retry.RetryOnConflict(retry.DefaultRetry, func() error {
+				ctxWithTimeout, cancel := context.WithTimeout(ctx, constants.K8sAPICallsTxTimeout)
+				defer cancel()
+
+				latestExperiment := &v1alpha1.Experiment{}
+				err = s.Get(ctxWithTimeout, client.ObjectKey{Name: event.ExperimentName, Namespace: event.KubernetesMeta.Namespace}, latestExperiment)
+				if err != nil {
+					return err
+				}
+				if !latestExperiment.ObjectMeta.DeletionTimestamp.IsZero() { // Experiment is being deleted
+					// remove finalizer now we have completed successfully
+					latestExperiment.ObjectMeta.Finalizers = utils.RemoveStr(latestExperiment.ObjectMeta.Finalizers, constants.ExperimentFinalizerName)
+					if err := s.Update(ctxWithTimeout, latestExperiment); err != nil {
+						logger.Error(err, "Failed to remove finalizer", "experiment", latestExperiment.GetName())
 						return err
 					}
-					if !latestExperiment.ObjectMeta.DeletionTimestamp.IsZero() { // Experiment is being deleted
-						// remove finalizer now we have completed successfully
-						latestExperiment.ObjectMeta.Finalizers = utils.RemoveStr(latestExperiment.ObjectMeta.Finalizers, constants.ExperimentFinalizerName)
-						if err := s.Update(ctx, latestExperiment); err != nil {
-							logger.Error(err, "Failed to remove finalizer", "experiment", latestExperiment.GetName())
-							return err
-						}
-					}
-					return nil
-				})
-				if retryErr != nil {
-					logger.Error(err, "Failed to remove finalizer after retries")
 				}
+				return nil
+			})
+			if retryErr != nil {
+				logger.Error(err, "Failed to remove finalizer after retries")
 			}
 		}
 
 		// Try to update status
-		retryErr := retry.RetryOnConflict(retry.DefaultRetry, func() error {
-			experiment := &v1alpha1.Experiment{}
-			err = s.Get(ctx, client.ObjectKey{Name: event.ExperimentName, Namespace: event.KubernetesMeta.Namespace}, experiment)
-			if err != nil {
-				return err
-			}
-			if event.KubernetesMeta.Generation != experiment.Generation {
-				logger.Info("Ignoring event for old generation", "currentGeneration", experiment.Generation, "eventGeneration", event.KubernetesMeta.Generation, "server", event.ExperimentName)
-				return nil
-			}
-			// Handle status update
-			if event.Active {
-				logger.Info("Setting experiment to ready", "experiment", event.ExperimentName)
-				experiment.Status.CreateAndSetCondition(v1alpha1.ExperimentReady, true, event.StatusDescription)
-			} else {
-				logger.Info("Setting experiment to not ready", "experiment", event.ExperimentName)
-				experiment.Status.CreateAndSetCondition(v1alpha1.ExperimentReady, false, event.StatusDescription)
-			}
-			if event.CandidatesReady {
-				experiment.Status.CreateAndSetCondition(v1alpha1.CandidatesReady, true, "Candidates ready")
-			} else {
-				experiment.Status.CreateAndSetCondition(v1alpha1.CandidatesReady, false, "Candidates not ready")
-			}
-			if event.MirrorReady {
-				experiment.Status.CreateAndSetCondition(v1alpha1.MirrorReady, true, "Mirror ready")
-			} else {
-				experiment.Status.CreateAndSetCondition(v1alpha1.MirrorReady, false, "Mirror not ready")
+		{
+			retryErr := retry.RetryOnConflict(retry.DefaultRetry, func() error {
+				ctxWithTimeout, cancel := context.WithTimeout(ctx, constants.K8sAPICallsTxTimeout)
+				defer cancel()
+
+				experiment := &v1alpha1.Experiment{}
+				err = s.Get(ctxWithTimeout, client.ObjectKey{Name: event.ExperimentName, Namespace: event.KubernetesMeta.Namespace}, experiment)
+				if err != nil {
+					return err
+				}
+				if event.KubernetesMeta.Generation != experiment.Generation {
+					logger.Info("Ignoring event for old generation", "currentGeneration", experiment.Generation, "eventGeneration", event.KubernetesMeta.Generation, "server", event.ExperimentName)
+					return nil
+				}
+				// Handle status update
+				if event.Active {
+					logger.Info("Setting experiment to ready", "experiment", event.ExperimentName)
+					experiment.Status.CreateAndSetCondition(v1alpha1.ExperimentReady, true, event.StatusDescription)
+				} else {
+					logger.Info("Setting experiment to not ready", "experiment", event.ExperimentName)
+					experiment.Status.CreateAndSetCondition(v1alpha1.ExperimentReady, false, event.StatusDescription)
+				}
+				if event.CandidatesReady {
+					experiment.Status.CreateAndSetCondition(v1alpha1.CandidatesReady, true, "Candidates ready")
+				} else {
+					experiment.Status.CreateAndSetCondition(v1alpha1.CandidatesReady, false, "Candidates not ready")
+				}
+				if event.MirrorReady {
+					experiment.Status.CreateAndSetCondition(v1alpha1.MirrorReady, true, "Mirror ready")
+				} else {
+					experiment.Status.CreateAndSetCondition(v1alpha1.MirrorReady, false, "Mirror not ready")
+				}
+				return s.updateExperimentStatus(ctxWithTimeout, experiment)
+			})
+			if retryErr != nil {
+				logger.Error(err, "Failed to update status", "experiment", event.ExperimentName)
 			}
-			return s.updateExperimentStatus(experiment)
-		})
-		if retryErr != nil {
-			logger.Error(err, "Failed to update status", "experiment", event.ExperimentName)
 		}
 
 	}
 	return nil
 }
 
-func (s *SchedulerClient) updateExperimentStatus(experiment *v1alpha1.Experiment) error {
-	if err := s.Status().Update(context.TODO(), experiment); err != nil {
+func (s *SchedulerClient) updateExperimentStatus(ctx context.Context, experiment *v1alpha1.Experiment) error {
+	if err := s.Status().Update(ctx, experiment); err != nil {
 		s.recorder.Eventf(experiment, v1.EventTypeWarning, "UpdateFailed",
 			"Failed to update status for experiment %q: %v", experiment.Name, err)
 		return err
diff --git a/operator/scheduler/model.go b/operator/scheduler/model.go
index f49217be38..9c5a18aad5 100644
--- a/operator/scheduler/model.go
+++ b/operator/scheduler/model.go
@@ -161,10 +161,12 @@ func (s *SchedulerClient) SubscribeModelEvents(ctx context.Context, grpcClient s
 		// Handle terminated event to remove finalizer
 		if canRemoveFinalizer(latestVersionStatus.State.State) {
 			retryErr := retry.RetryOnConflict(retry.DefaultRetry, func() error {
-				latestModel := &v1alpha1.Model{}
+				ctxWithTimeout, cancel := context.WithTimeout(ctx, constants.K8sAPICallsTxTimeout)
+				defer cancel()
 
+				latestModel := &v1alpha1.Model{}
 				err = s.Get(
-					ctx,
+					ctxWithTimeout,
 					client.ObjectKey{
 						Name:      event.ModelName,
 						Namespace: latestVersionStatus.GetKubernetesMeta().Namespace,
@@ -181,7 +183,7 @@ func (s *SchedulerClient) SubscribeModelEvents(ctx context.Context, grpcClient s
 						latestModel.ObjectMeta.Finalizers,
 						constants.ModelFinalizerName,
 					)
-					if err := s.Update(ctx, latestModel); err != nil {
+					if err := s.Update(ctxWithTimeout, latestModel); err != nil {
 						logger.Error(err, "Failed to remove finalizer", "model", latestModel.GetName())
 						return err
 					}
@@ -195,70 +197,75 @@ func (s *SchedulerClient) SubscribeModelEvents(ctx context.Context, grpcClient s
 		}
 
 		// Try to update status
-		retryErr := retry.RetryOnConflict(retry.DefaultRetry, func() error {
-			latestModel := &v1alpha1.Model{}
-
-			err = s.Get(
-				ctx,
-				client.ObjectKey{
-					Name:      event.ModelName,
-					Namespace: latestVersionStatus.GetKubernetesMeta().Namespace,
-				},
-				latestModel,
-			)
-			if err != nil {
-				return err
-			}
+		{
+			retryErr := retry.RetryOnConflict(retry.DefaultRetry, func() error {
+				ctxWithTimeout, cancel := context.WithTimeout(ctx, constants.K8sAPICallsTxTimeout)
+				defer cancel()
 
-			if latestVersionStatus.GetKubernetesMeta().Generation != latestModel.Generation {
-				logger.Info(
-					"Ignoring event for old generation",
-					"currentGeneration", latestModel.Generation,
-					"eventGeneration", latestVersionStatus.GetKubernetesMeta().Generation,
-					"model", event.ModelName,
-				)
-				return nil
-			}
+				latestModel := &v1alpha1.Model{}
 
-			// Handle status update
-			modelStatus := latestVersionStatus.GetState()
-			switch modelStatus.GetState() {
-			case scheduler.ModelStatus_ModelAvailable:
-				logger.Info(
-					"Setting model to ready",
-					"name", event.ModelName,
-					"state", modelStatus.GetState().String(),
-				)
-				latestModel.Status.CreateAndSetCondition(
-					v1alpha1.ModelReady,
-					true,
-					modelStatus.GetState().String(),
-					modelStatus.GetReason(),
-				)
-			default:
-				logger.Info(
-					"Setting model to not ready",
-					"name", event.ModelName,
-					"state", modelStatus.GetState().String(),
+				err = s.Get(
+					ctxWithTimeout,
+					client.ObjectKey{
+						Name:      event.ModelName,
+						Namespace: latestVersionStatus.GetKubernetesMeta().Namespace,
+					},
+					latestModel,
 				)
-				latestModel.Status.CreateAndSetCondition(
-					v1alpha1.ModelReady,
-					false,
-					modelStatus.GetState().String(),
-					modelStatus.GetReason(),
+				if err != nil {
+					return err
+				}
+
+				if latestVersionStatus.GetKubernetesMeta().Generation != latestModel.Generation {
+					logger.Info(
+						"Ignoring event for old generation",
+						"currentGeneration", latestModel.Generation,
+						"eventGeneration", latestVersionStatus.GetKubernetesMeta().Generation,
+						"model", event.ModelName,
+					)
+					return nil
+				}
+
+				// Handle status update
+				modelStatus := latestVersionStatus.GetState()
+				switch modelStatus.GetState() {
+				case scheduler.ModelStatus_ModelAvailable:
+					logger.Info(
+						"Setting model to ready",
+						"name", event.ModelName,
+						"state", modelStatus.GetState().String(),
+					)
+					latestModel.Status.CreateAndSetCondition(
+						v1alpha1.ModelReady,
+						true,
+						modelStatus.GetState().String(),
+						modelStatus.GetReason(),
+					)
+				default:
+					logger.Info(
+						"Setting model to not ready",
+						"name", event.ModelName,
+						"state", modelStatus.GetState().String(),
+					)
+					latestModel.Status.CreateAndSetCondition(
+						v1alpha1.ModelReady,
+						false,
+						modelStatus.GetState().String(),
+						modelStatus.GetReason(),
+					)
+				}
+
+				// Set the total number of replicas targeted by this model
+				latestModel.Status.Replicas = int32(
+					modelStatus.GetAvailableReplicas() +
+						modelStatus.GetUnavailableReplicas(),
 				)
+				latestModel.Status.Selector = "server=" + latestVersionStatus.ServerName
+				return s.updateModelStatus(ctxWithTimeout, latestModel)
+			})
+			if retryErr != nil {
+				logger.Error(err, "Failed to update status", "model", event.ModelName)
 			}
-
-			// Set the total number of replicas targeted by this model
-			latestModel.Status.Replicas = int32(
-				modelStatus.GetAvailableReplicas() +
-					modelStatus.GetUnavailableReplicas(),
-			)
-			latestModel.Status.Selector = "server=" + latestVersionStatus.ServerName
-			return s.updateModelStatus(latestModel)
-		})
-		if retryErr != nil {
-			logger.Error(err, "Failed to update status", "model", event.ModelName)
 		}
 
 	}
@@ -284,11 +291,11 @@ func modelReady(status v1alpha1.ModelStatus) bool {
 		status.GetCondition(apis.ConditionReady).Status == v1.ConditionTrue
 }
 
-func (s *SchedulerClient) updateModelStatus(model *v1alpha1.Model) error {
+func (s *SchedulerClient) updateModelStatus(ctx context.Context, model *v1alpha1.Model) error {
 	existingModel := &v1alpha1.Model{}
 	namespacedName := types.NamespacedName{Name: model.Name, Namespace: model.Namespace}
 
-	if err := s.Get(context.TODO(), namespacedName, existingModel); err != nil {
+	if err := s.Get(ctx, namespacedName, existingModel); err != nil {
 		if errors.IsNotFound(err) { //Ignore NotFound errors
 			return nil
 		}
@@ -299,7 +306,7 @@ func (s *SchedulerClient) updateModelStatus(model *v1alpha1.Model) error {
 	if equality.Semantic.DeepEqual(existingModel.Status, model.Status) {
 		// Not updating as no difference
 	} else {
-		if err := s.Status().Update(context.TODO(), model); err != nil {
+		if err := s.Status().Update(ctx, model); err != nil {
 			s.recorder.Eventf(
 				model,
 				v1.EventTypeWarning,
diff --git a/operator/scheduler/pipeline.go b/operator/scheduler/pipeline.go
index fce4af998c..6c8efdbb06 100644
--- a/operator/scheduler/pipeline.go
+++ b/operator/scheduler/pipeline.go
@@ -74,7 +74,7 @@ func (s *SchedulerClient) UnloadPipeline(ctx context.Context, pipeline *v1alpha1
 		scheduler.PipelineVersionState_PipelineTerminating.String(),
 		"Pipeline unload requested",
 	)
-	_ = s.updatePipelineStatusImpl(pipeline)
+	_ = s.updatePipelineStatusImpl(ctx, pipeline)
 	return nil, false
 }
 
@@ -125,135 +125,116 @@ func (s *SchedulerClient) SubscribePipelineEvents(ctx context.Context, grpcClien
 			"State", pv.GetState().String(),
 		)
 
-		pipeline := &v1alpha1.Pipeline{}
-		err = s.Get(
-			ctx,
-			client.ObjectKey{
-				Name:      event.PipelineName,
-				Namespace: pv.GetPipeline().GetKubernetesMeta().GetNamespace(),
-			},
-			pipeline,
-		)
-		if err != nil {
-			logger.Error(
-				err,
-				"Failed to get pipeline",
-				"name", event.PipelineName,
-				"namespace", pv.GetPipeline().GetKubernetesMeta().GetNamespace(),
-			)
-			continue
-		}
-
-		if !pipeline.ObjectMeta.DeletionTimestamp.IsZero() {
-			logger.Info(
-				"Pipeline is pending deletion",
-				"pipeline", pipeline.Name,
-				"state", pv.State.Status.String(),
-			)
-			if canRemovePipelineFinalizer(pv.State.Status) {
-				retryErr := retry.RetryOnConflict(retry.DefaultRetry, func() error {
-					latestPipeline := &v1alpha1.Pipeline{}
-					err = s.Get(
-						ctx,
-						client.ObjectKey{
-							Name:      event.PipelineName,
-							Namespace: pv.GetPipeline().GetKubernetesMeta().GetNamespace(),
-						},
-						latestPipeline,
+		if canRemovePipelineFinalizer(pv.State.Status) {
+			retryErr := retry.RetryOnConflict(retry.DefaultRetry, func() error {
+				ctxWithTimeout, cancel := context.WithTimeout(ctx, constants.K8sAPICallsTxTimeout)
+				defer cancel()
+
+				latestPipeline := &v1alpha1.Pipeline{}
+				err = s.Get(
+					ctxWithTimeout,
+					client.ObjectKey{
+						Name:      event.PipelineName,
+						Namespace: pv.GetPipeline().GetKubernetesMeta().GetNamespace(),
+					},
+					latestPipeline,
+				)
+				if err != nil {
+					return err
+				}
+				if !latestPipeline.ObjectMeta.DeletionTimestamp.IsZero() { // Pipeline is being deleted
+					// remove finalizer now we have completed successfully
+					latestPipeline.ObjectMeta.Finalizers = utils.RemoveStr(
+						latestPipeline.ObjectMeta.Finalizers,
+						constants.PipelineFinalizerName,
 					)
-					if err != nil {
+					if err := s.Update(ctxWithTimeout, latestPipeline); err != nil {
+						logger.Error(err, "Failed to remove finalizer", "pipeline", latestPipeline.GetName())
 						return err
 					}
-					if !latestPipeline.ObjectMeta.DeletionTimestamp.IsZero() { // Pipeline is being deleted
-						// remove finalizer now we have completed successfully
-						latestPipeline.ObjectMeta.Finalizers = utils.RemoveStr(
-							latestPipeline.ObjectMeta.Finalizers,
-							constants.PipelineFinalizerName,
-						)
-						if err := s.Update(ctx, latestPipeline); err != nil {
-							logger.Error(err, "Failed to remove finalizer", "pipeline", latestPipeline.GetName())
-							return err
-						}
-					}
-					return nil
-				})
-				if retryErr != nil {
-					logger.Error(err, "Failed to remove finalizer after retries")
 				}
+				return nil
+			})
+			if retryErr != nil {
+				logger.Error(err, "Failed to remove finalizer after retries")
 			}
 		}
 
 		// Try to update status
-		retryErr := retry.RetryOnConflict(retry.DefaultRetry, func() error {
-			pipeline := &v1alpha1.Pipeline{}
-			err = s.Get(
-				ctx,
-				client.ObjectKey{
-					Name:      event.PipelineName,
-					Namespace: pv.GetPipeline().GetKubernetesMeta().GetNamespace(),
-				},
-				pipeline,
-			)
-			if err != nil {
-				return err
-			}
-
-			if pv.GetPipeline().GetKubernetesMeta().GetGeneration() != pipeline.Generation {
-				logger.Info(
-					"Ignoring event for old generation",
-					"currentGeneration", pipeline.Generation,
-					"eventGeneration", pv.GetPipeline().GetKubernetesMeta().GetGeneration(),
-					"server", event.PipelineName,
+		{
+			retryErr := retry.RetryOnConflict(retry.DefaultRetry, func() error {
+				ctxWithTimeout, cancel := context.WithTimeout(ctx, constants.K8sAPICallsTxTimeout)
+				defer cancel()
+
+				pipeline := &v1alpha1.Pipeline{}
+				err = s.Get(
+					ctxWithTimeout,
+					client.ObjectKey{
+						Name:      event.PipelineName,
+						Namespace: pv.GetPipeline().GetKubernetesMeta().GetNamespace(),
+					},
+					pipeline,
 				)
-				return nil
-			}
+				if err != nil {
+					return err
+				}
 
-			// Handle status update
-			switch pv.State.Status {
-			case scheduler.PipelineVersionState_PipelineReady:
-				logger.Info(
-					"Setting pipeline to ready",
-					"pipeline", pipeline.Name,
-					"generation", pipeline.Generation,
-				)
-				pipeline.Status.CreateAndSetCondition(
-					v1alpha1.PipelineReady,
-					true,
-					pv.State.Reason,
-					pv.State.Status.String(),
-				)
-			default:
-				logger.Info(
-					"Setting pipeline to not ready",
-					"pipeline", pipeline.Name,
-					"generation", pipeline.Generation,
-				)
-				pipeline.Status.CreateAndSetCondition(
-					v1alpha1.PipelineReady,
-					false,
-					pv.State.Reason,
-					pv.State.Status.String(),
-				)
-			}
-			// Set models ready
-			if pv.State.ModelsReady {
-				pipeline.Status.CreateAndSetCondition(v1alpha1.ModelsReady, true, "Models all available", "")
-			} else {
-				pipeline.Status.CreateAndSetCondition(v1alpha1.ModelsReady, false, "Some models are not available", "")
-			}
+				if pv.GetPipeline().GetKubernetesMeta().GetGeneration() != pipeline.Generation {
+					logger.Info(
+						"Ignoring event for old generation",
+						"currentGeneration", pipeline.Generation,
+						"eventGeneration", pv.GetPipeline().GetKubernetesMeta().GetGeneration(),
+						"server", event.PipelineName,
+					)
+					return nil
+				}
 
-			return s.updatePipelineStatusImpl(pipeline)
-		})
-		if retryErr != nil {
-			logger.Error(retryErr, "Failed to update status", "pipeline", event.PipelineName)
-		}
+				// Handle status update
+				switch pv.State.Status {
+				case scheduler.PipelineVersionState_PipelineReady:
+					logger.Info(
+						"Setting pipeline to ready",
+						"pipeline", pipeline.Name,
+						"generation", pipeline.Generation,
+					)
+					pipeline.Status.CreateAndSetCondition(
+						v1alpha1.PipelineReady,
+						true,
+						pv.State.Reason,
+						pv.State.Status.String(),
+					)
+				default:
+					logger.Info(
+						"Setting pipeline to not ready",
+						"pipeline", pipeline.Name,
+						"generation", pipeline.Generation,
+					)
+					pipeline.Status.CreateAndSetCondition(
+						v1alpha1.PipelineReady,
+						false,
+						pv.State.Reason,
+						pv.State.Status.String(),
+					)
+				}
+				// Set models ready
+				if pv.State.ModelsReady {
+					pipeline.Status.CreateAndSetCondition(v1alpha1.ModelsReady, true, "Models all available", "")
+				} else {
+					pipeline.Status.CreateAndSetCondition(v1alpha1.ModelsReady, false, "Some models are not available", "")
+				}
 
+				return s.updatePipelineStatusImpl(ctxWithTimeout, pipeline)
+			})
+			if retryErr != nil {
+				logger.Error(retryErr, "Failed to update status", "pipeline", event.PipelineName)
+			}
+		}
 	}
 	return nil
 }
 
-func (s *SchedulerClient) updatePipelineStatusImpl(pipeline *v1alpha1.Pipeline) error {
-	if err := s.Status().Update(context.TODO(), pipeline); err != nil {
+func (s *SchedulerClient) updatePipelineStatusImpl(ctx context.Context, pipeline *v1alpha1.Pipeline) error {
+	if err := s.Status().Update(ctx, pipeline); err != nil {
 		s.recorder.Eventf(pipeline, v1.EventTypeWarning, "UpdateFailed",
 			"Failed to update status for pipeline %q: %v", pipeline.Name, err)
 		return err
diff --git a/operator/scheduler/server.go b/operator/scheduler/server.go
index 8ecf39bf17..de2ab90756 100644
--- a/operator/scheduler/server.go
+++ b/operator/scheduler/server.go
@@ -21,6 +21,7 @@ import (
 	"github.com/seldonio/seldon-core/apis/go/v2/mlops/scheduler"
 
 	"github.com/seldonio/seldon-core/operator/v2/apis/mlops/v1alpha1"
+	"github.com/seldonio/seldon-core/operator/v2/pkg/constants"
 )
 
 func (s *SchedulerClient) ServerNotify(ctx context.Context, grpcClient scheduler.SchedulerClient, servers []v1alpha1.Server, isFirstSync bool) error {
@@ -114,8 +115,11 @@ func (s *SchedulerClient) SubscribeServerEvents(ctx context.Context, grpcClient
 
 		// Try to update status
 		retryErr := retry.RetryOnConflict(retry.DefaultRetry, func() error {
+			contextWithTimeout, cancel := context.WithTimeout(ctx, constants.K8sAPICallsTxTimeout)
+			defer cancel()
+
 			server := &v1alpha1.Server{}
-			err = s.Get(ctx, client.ObjectKey{Name: event.ServerName, Namespace: event.GetKubernetesMeta().GetNamespace()}, server)
+			err = s.Get(contextWithTimeout, client.ObjectKey{Name: event.ServerName, Namespace: event.GetKubernetesMeta().GetNamespace()}, server)
 			if err != nil {
 				return err
 			}
@@ -125,7 +129,7 @@ func (s *SchedulerClient) SubscribeServerEvents(ctx context.Context, grpcClient
 			}
 			// Handle status update
 			server.Status.LoadedModelReplicas = event.NumLoadedModelReplicas
-			return s.updateServerStatus(server)
+			return s.updateServerStatus(contextWithTimeout, server)
 		})
 		if retryErr != nil {
 			logger.Error(err, "Failed to update status", "model", event.ServerName)
@@ -135,8 +139,8 @@ func (s *SchedulerClient) SubscribeServerEvents(ctx context.Context, grpcClient
 	return nil
 }
 
-func (s *SchedulerClient) updateServerStatus(server *v1alpha1.Server) error {
-	if err := s.Status().Update(context.TODO(), server); err != nil {
+func (s *SchedulerClient) updateServerStatus(ctx context.Context, server *v1alpha1.Server) error {
+	if err := s.Status().Update(ctx, server); err != nil {
 		s.recorder.Eventf(server, v1.EventTypeWarning, "UpdateFailed",
 			"Failed to update status for Server %q: %v", server.Name, err)
 		return err
diff --git a/scheduler/Dockerfile.envoy b/scheduler/Dockerfile.envoy
index 5af8628b45..34415c88dc 100644
--- a/scheduler/Dockerfile.envoy
+++ b/scheduler/Dockerfile.envoy
@@ -1,4 +1,4 @@
-FROM envoyproxy/envoy:v1.32.1 as envoy
+FROM envoyproxy/envoy:v1.32.2 as envoy
 
 FROM registry.access.redhat.com/ubi9/ubi-micro:9.5
 
diff --git a/scheduler/Dockerfile.grafana b/scheduler/Dockerfile.grafana
index c595539fe9..2c9591e951 100644
--- a/scheduler/Dockerfile.grafana
+++ b/scheduler/Dockerfile.grafana
@@ -1,4 +1,4 @@
-FROM grafana/grafana:11.3.1
+FROM grafana/grafana:11.4.0
 
 # Disable Login form or not
 ENV GF_AUTH_DISABLE_LOGIN_FORM "true"
diff --git a/scheduler/go.mod b/scheduler/go.mod
index 1b9ddc1347..a25f454159 100644
--- a/scheduler/go.mod
+++ b/scheduler/go.mod
@@ -26,8 +26,8 @@ require (
 	github.com/prometheus/client_golang v1.19.1
 	github.com/rs/xid v1.6.0
 	github.com/seldonio/seldon-core/apis/go/v2 v2.0.0-00010101000000-000000000000
-	github.com/seldonio/seldon-core/components/tls/v2 v2.0.0-00010101000000-000000000000
 	github.com/seldonio/seldon-core/components/kafka/v2 v2.0.0-00010101000000-000000000000
+	github.com/seldonio/seldon-core/components/tls/v2 v2.0.0-00010101000000-000000000000
 	github.com/serialx/hashring v0.0.0-20200727003509-22c0c7ab6b1b
 	github.com/signalfx/splunk-otel-go/instrumentation/github.com/confluentinc/confluent-kafka-go/v2/kafka/splunkkafka v1.19.0
 	github.com/sirupsen/logrus v1.9.3
diff --git a/scheduler/pkg/agent/agent_debug_test.go b/scheduler/pkg/agent/agent_debug_test.go
index d5b844e82c..decfc752df 100644
--- a/scheduler/pkg/agent/agent_debug_test.go
+++ b/scheduler/pkg/agent/agent_debug_test.go
@@ -35,7 +35,7 @@ func setupService(numModels int, modelPrefix string, capacity int) *agentDebug {
 }
 
 func TestAgentDebugServiceSmoke(t *testing.T) {
-	//TODO break this down in proper tests
+	// TODO break this down in proper tests
 	g := NewGomegaWithT(t)
 
 	service := setupService(10, "dummy", 10)
@@ -60,6 +60,7 @@ func TestAgentDebugServiceSmoke(t *testing.T) {
 					MemoryBytes: &mem,
 				},
 			},
+			RuntimeInfo: getModelRuntimeInfo(1),
 		},
 	)
 	g.Expect(err).To(BeNil())
@@ -87,7 +88,7 @@ func TestAgentDebugServiceSmoke(t *testing.T) {
 }
 
 func TestAgentDebugEarlyStop(t *testing.T) {
-	//TODO break this down in proper tests
+	// TODO break this down in proper tests
 	g := NewGomegaWithT(t)
 
 	service := setupService(10, "dummy", 10)
diff --git a/scheduler/pkg/agent/client.go b/scheduler/pkg/agent/client.go
index 79a388266d..b86d49ffb0 100644
--- a/scheduler/pkg/agent/client.go
+++ b/scheduler/pkg/agent/client.go
@@ -559,7 +559,7 @@ func (c *Client) getArtifactConfig(request *agent.ModelOperationMessage) ([]byte
 
 		}
 
-		config, err := c.secretsHandler.GetSecretConfig(x.StorageSecretName)
+		config, err := c.secretsHandler.GetSecretConfig(x.StorageSecretName, util.K8sTimeoutDefault)
 		if err != nil {
 			return nil, err
 		}
@@ -615,15 +615,23 @@ func (c *Client) LoadModel(request *agent.ModelOperationMessage, timestamp int64
 	}
 	logger.Infof("Chose path %s for model %s:%d", *chosenVersionPath, modelName, modelVersion)
 
+	modelConfig, err := c.ModelRepository.GetModelRuntimeInfo(modelWithVersion)
+	if err != nil {
+		logger.Errorf("there was a problem getting the config for model: %s", modelName)
+	}
+
 	// TODO: consider whether we need the actual protos being sent to `LoadModelVersion`?
 	modifiedModelVersionRequest := getModifiedModelVersion(
 		modelWithVersion,
 		pinnedModelVersion,
 		request.GetModelVersion(),
+		modelConfig,
 	)
+
 	loaderFn := func() error {
 		return c.stateManager.LoadModelVersion(modifiedModelVersionRequest)
 	}
+
 	if err := backoffWithMaxNumRetry(loaderFn, c.settings.maxLoadRetryCount, c.settings.maxLoadElapsedTime, logger); err != nil {
 		c.sendModelEventError(modelName, modelVersion, agent.ModelEventMessage_LOAD_FAILED, err)
 		c.cleanup(modelWithVersion)
@@ -641,7 +649,8 @@ func (c *Client) LoadModel(request *agent.ModelOperationMessage, timestamp int64
 	}
 
 	logger.Infof("Load model %s:%d success", modelName, modelVersion)
-	return c.sendAgentEvent(modelName, modelVersion, agent.ModelEventMessage_LOADED)
+
+	return c.sendAgentEvent(modelName, modelVersion, modelConfig, agent.ModelEventMessage_LOADED)
 }
 
 func (c *Client) UnloadModel(request *agent.ModelOperationMessage, timestamp int64) error {
@@ -674,7 +683,7 @@ func (c *Client) UnloadModel(request *agent.ModelOperationMessage, timestamp int
 	defer c.modelTimestamps.Store(modelWithVersion, timestamp)
 
 	// we do not care about model versions here
-	modifiedModelVersionRequest := getModifiedModelVersion(modelWithVersion, pinnedModelVersion, request.GetModelVersion())
+	modifiedModelVersionRequest := getModifiedModelVersion(modelWithVersion, pinnedModelVersion, request.GetModelVersion(), nil)
 
 	unloaderFn := func() error {
 		return c.stateManager.UnloadModelVersion(modifiedModelVersionRequest)
@@ -702,7 +711,7 @@ func (c *Client) UnloadModel(request *agent.ModelOperationMessage, timestamp int
 	}
 
 	logger.Infof("Unload model %s:%d success", modelName, modelVersion)
-	return c.sendAgentEvent(modelName, modelVersion, agent.ModelEventMessage_UNLOADED)
+	return c.sendAgentEvent(modelName, modelVersion, nil, agent.ModelEventMessage_UNLOADED)
 }
 
 func (c *Client) cleanup(modelWithVersion string) {
@@ -742,6 +751,7 @@ func (c *Client) sendModelEventError(
 func (c *Client) sendAgentEvent(
 	modelName string,
 	modelVersion uint32,
+	modelRuntimeInfo *agent.ModelRuntimeInfo,
 	event agent.ModelEventMessage_Event,
 ) error {
 	// if the server is draining and the model load has succeeded, we need to "cancel"
@@ -765,6 +775,7 @@ func (c *Client) sendAgentEvent(
 		ModelVersion:         modelVersion,
 		Event:                event,
 		AvailableMemoryBytes: c.stateManager.GetAvailableMemoryBytesWithOverCommit(),
+		RuntimeInfo:          modelRuntimeInfo,
 	})
 	return err
 }
diff --git a/scheduler/pkg/agent/client_test.go b/scheduler/pkg/agent/client_test.go
index 4042bd277e..194e7a6db1 100644
--- a/scheduler/pkg/agent/client_test.go
+++ b/scheduler/pkg/agent/client_test.go
@@ -29,6 +29,7 @@ import (
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/client-go/kubernetes/fake"
 
+	"github.com/seldonio/seldon-core/apis/go/v2/mlops/agent"
 	pb "github.com/seldonio/seldon-core/apis/go/v2/mlops/agent"
 	pbs "github.com/seldonio/seldon-core/apis/go/v2/mlops/scheduler"
 
@@ -51,6 +52,7 @@ type mockAgentV2Server struct {
 	unloadFailedEvents int
 	otherEvents        int
 	errors             int
+	events             []*pb.ModelEventMessage
 }
 
 type FakeModelRepository struct {
@@ -64,6 +66,10 @@ func (f *FakeModelRepository) RemoveModelVersion(modelName string) error {
 	return nil
 }
 
+func (f *FakeModelRepository) GetModelRuntimeInfo(modelName string) (*pb.ModelRuntimeInfo, error) {
+	return &pb.ModelRuntimeInfo{ModelRuntimeInfo: &pb.ModelRuntimeInfo_Mlserver{Mlserver: &agent.MLServerModelSettings{ParallelWorkers: uint32(1)}}}, nil
+}
+
 func (f *FakeModelRepository) DownloadModelVersion(modelName string, version uint32, modelSpec *pbs.ModelSpec, config []byte) (*string, error) {
 	f.modelDownloads++
 	if f.err != nil {
@@ -147,6 +153,7 @@ func (m *mockAgentV2Server) AgentEvent(ctx context.Context, message *pb.ModelEve
 	default:
 		m.otherEvents++
 	}
+	m.events = append(m.events, message)
 	return &pb.ModelEventResponse{}, nil
 }
 
@@ -247,6 +254,7 @@ func TestLoadModel(t *testing.T) {
 		models                  []string
 		replicaConfig           *pb.ReplicaConfig
 		op                      *pb.ModelOperationMessage
+		modelConfig             *pb.ModelRuntimeInfo
 		expectedAvailableMemory uint64
 		v2Status                int
 		modelRepoErr            error
@@ -270,9 +278,11 @@ func TestLoadModel(t *testing.T) {
 						},
 						ModelSpec: &pbs.ModelSpec{Uri: "gs://model", MemoryBytes: &smallMemory},
 					},
+					RuntimeInfo: getModelRuntimeInfo(1),
 				},
 			},
 			replicaConfig:           &pb.ReplicaConfig{MemoryBytes: 1000},
+			modelConfig:             getModelRuntimeInfo(1),
 			expectedAvailableMemory: 500,
 			v2Status:                200,
 			success:                 true,
@@ -289,10 +299,12 @@ func TestLoadModel(t *testing.T) {
 						},
 						ModelSpec: &pbs.ModelSpec{Uri: "gs://model", MemoryBytes: &smallMemory},
 					},
+					RuntimeInfo: getModelRuntimeInfo(1),
 				},
 				AutoscalingEnabled: true,
 			},
 			replicaConfig:           &pb.ReplicaConfig{MemoryBytes: 1000},
+			modelConfig:             getModelRuntimeInfo(1),
 			expectedAvailableMemory: 500,
 			v2Status:                200,
 			success:                 true,
@@ -310,9 +322,11 @@ func TestLoadModel(t *testing.T) {
 						},
 						ModelSpec: &pbs.ModelSpec{Uri: "gs://model", MemoryBytes: &smallMemory},
 					},
+					RuntimeInfo: getModelRuntimeInfo(1),
 				},
 			},
 			replicaConfig:           &pb.ReplicaConfig{MemoryBytes: 1000},
+			modelConfig:             getModelRuntimeInfo(1),
 			expectedAvailableMemory: 1000,
 			v2Status:                400,
 			success:                 false,
@@ -329,9 +343,11 @@ func TestLoadModel(t *testing.T) {
 						},
 						ModelSpec: &pbs.ModelSpec{Uri: "gs://model", MemoryBytes: &largeMemory},
 					},
+					RuntimeInfo: getModelRuntimeInfo(1),
 				},
 			},
 			replicaConfig:           &pb.ReplicaConfig{MemoryBytes: 1000},
+			modelConfig:             getModelRuntimeInfo(1),
 			expectedAvailableMemory: 1000,
 			v2Status:                200,
 			success:                 false,
@@ -399,6 +415,9 @@ func TestLoadModel(t *testing.T) {
 				g.Expect(err).To(BeNil())
 				g.Expect(mockAgentV2Server.loadedEvents).To(Equal(1))
 				g.Expect(mockAgentV2Server.loadFailedEvents).To(Equal(0))
+				g.Expect(len(mockAgentV2Server.events)).To(Equal(1))
+				g.Expect(mockAgentV2Server.events[0].RuntimeInfo).ToNot(BeNil())
+				g.Expect(mockAgentV2Server.events[0].RuntimeInfo.GetMlserver().ParallelWorkers).To(Equal(uint32(1)))
 				g.Expect(client.stateManager.GetAvailableMemoryBytes()).To(Equal(test.expectedAvailableMemory))
 				g.Expect(modelRepository.modelRemovals).To(Equal(0))
 				loadedVersions := client.stateManager.modelVersions.getVersionsForAllModels()
diff --git a/scheduler/pkg/agent/client_utils.go b/scheduler/pkg/agent/client_utils.go
index 3af8233f64..95b6c9e61c 100644
--- a/scheduler/pkg/agent/client_utils.go
+++ b/scheduler/pkg/agent/client_utils.go
@@ -54,10 +54,11 @@ func isReady(service interfaces.DependencyServiceInterface, logger *log.Entry, m
 	return backoff.RetryNotify(readyToError, backoffWithMax, logFailure)
 }
 
-func getModifiedModelVersion(modelId string, version uint32, originalModelVersion *agent.ModelVersion) *agent.ModelVersion {
+func getModifiedModelVersion(modelId string, version uint32, originalModelVersion *agent.ModelVersion, modelRuntimeInfo *agent.ModelRuntimeInfo) *agent.ModelVersion {
 	mv := proto.Clone(originalModelVersion)
 	mv.(*agent.ModelVersion).Model.Meta.Name = modelId
 	mv.(*agent.ModelVersion).Version = version
+	mv.(*agent.ModelVersion).RuntimeInfo = modelRuntimeInfo
 	return mv.(*agent.ModelVersion)
 }
 
diff --git a/scheduler/pkg/agent/drainservice/server.go b/scheduler/pkg/agent/drainservice/server.go
index 1315dc9658..36e6910cf7 100644
--- a/scheduler/pkg/agent/drainservice/server.go
+++ b/scheduler/pkg/agent/drainservice/server.go
@@ -19,6 +19,8 @@ import (
 
 	"github.com/gorilla/mux"
 	log "github.com/sirupsen/logrus"
+
+	"github.com/seldonio/seldon-core/scheduler/v2/pkg/util"
 )
 
 const (
@@ -102,7 +104,9 @@ func (drainer *DrainerService) Stop() error {
 	defer drainer.muServerReady.Unlock()
 	var err error
 	if drainer.server != nil {
-		err = drainer.server.Shutdown(context.Background())
+		ctx, cancel := context.WithTimeout(context.Background(), util.ServerControlPlaneTimeout)
+		defer cancel()
+		err = drainer.server.Shutdown(ctx)
 	}
 	drainer.serverReady = false
 	drainer.logger.Info("Finished graceful shutdown")
diff --git a/scheduler/pkg/agent/k8s/secrets.go b/scheduler/pkg/agent/k8s/secrets.go
index c8d50d4522..66dd78f47b 100644
--- a/scheduler/pkg/agent/k8s/secrets.go
+++ b/scheduler/pkg/agent/k8s/secrets.go
@@ -12,6 +12,7 @@ package k8s
 import (
 	"context"
 	"fmt"
+	"time"
 
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/client-go/kubernetes"
@@ -29,8 +30,10 @@ func NewSecretsHandler(clientset kubernetes.Interface, namespace string) *Secret
 	}
 }
 
-func (s *SecretHandler) GetSecretConfig(secretName string) ([]byte, error) {
-	secret, err := s.clientset.CoreV1().Secrets(s.namespace).Get(context.Background(), secretName, metav1.GetOptions{})
+func (s *SecretHandler) GetSecretConfig(secretName string, timeout time.Duration) ([]byte, error) {
+	ctx, cancel := context.WithTimeout(context.Background(), timeout)
+	defer cancel()
+	secret, err := s.clientset.CoreV1().Secrets(s.namespace).Get(ctx, secretName, metav1.GetOptions{})
 	if err != nil {
 		return nil, err
 	}
diff --git a/scheduler/pkg/agent/k8s/secrets_test.go b/scheduler/pkg/agent/k8s/secrets_test.go
index 6e4ef2a375..9069414593 100644
--- a/scheduler/pkg/agent/k8s/secrets_test.go
+++ b/scheduler/pkg/agent/k8s/secrets_test.go
@@ -11,6 +11,7 @@ package k8s
 
 import (
 	"testing"
+	"time"
 
 	. "github.com/onsi/gomega"
 	v1 "k8s.io/api/core/v1"
@@ -67,7 +68,7 @@ parameters:
 		t.Run(test.name, func(t *testing.T) {
 			fakeClientset := fake.NewSimpleClientset(test.secret)
 			s := NewSecretsHandler(fakeClientset, test.secret.Namespace)
-			data, err := s.GetSecretConfig(test.secretName)
+			data, err := s.GetSecretConfig(test.secretName, 1*time.Millisecond)
 			if test.err {
 				g.Expect(err).ToNot(BeNil())
 			} else {
diff --git a/scheduler/pkg/agent/model_state.go b/scheduler/pkg/agent/model_state.go
index 85f1c2ef08..13451ad852 100644
--- a/scheduler/pkg/agent/model_state.go
+++ b/scheduler/pkg/agent/model_state.go
@@ -59,7 +59,6 @@ func (modelState *ModelState) addModelVersionImpl(modelVersionDetails *agent.Mod
 				modelName, versionId, exsistingVersion.getVersion())
 		}
 	}
-
 }
 
 // Remove model version and return true if no versions left (in which case we remove from map)
@@ -70,7 +69,6 @@ func (modelState *ModelState) removeModelVersion(modelVersionDetails *agent.Mode
 }
 
 func (modelState *ModelState) removeModelVersionImpl(modelVersionDetails *agent.ModelVersion) (bool, error) {
-
 	modelName := modelVersionDetails.GetModel().GetMeta().GetName()
 	versionId := modelVersionDetails.GetVersion()
 
@@ -143,7 +141,8 @@ func (modelState *ModelState) getVersionsForAllModels() []*agent.ModelVersion {
 		mv := version.get()
 		versionedModelName := mv.Model.GetMeta().Name
 		originalModelName, originalModelVersion, _ := util.GetOrignalModelNameAndVersion(versionedModelName)
-		loadedModels = append(loadedModels, getModifiedModelVersion(originalModelName, originalModelVersion, mv))
+		modelRuntimeInfo := mv.RuntimeInfo
+		loadedModels = append(loadedModels, getModifiedModelVersion(originalModelName, originalModelVersion, mv, modelRuntimeInfo))
 	}
 	return loadedModels
 }
@@ -153,7 +152,19 @@ type modelVersion struct {
 }
 
 func (version *modelVersion) getVersionMemory() uint64 {
-	return version.versionInfo.GetModel().GetModelSpec().GetMemoryBytes()
+	instanceCount := getInstanceCount(version)
+	return version.versionInfo.GetModel().GetModelSpec().GetMemoryBytes() * instanceCount
+}
+
+func getInstanceCount(version *modelVersion) uint64 {
+	switch version.versionInfo.RuntimeInfo.ModelRuntimeInfo.(type) {
+	case *agent.ModelRuntimeInfo_Mlserver:
+		return uint64(version.versionInfo.GetRuntimeInfo().GetMlserver().ParallelWorkers)
+	case *agent.ModelRuntimeInfo_Triton:
+		return uint64(version.versionInfo.GetRuntimeInfo().GetTriton().Cpu[0].InstanceCount)
+	default:
+		return 1
+	}
 }
 
 func (version *modelVersion) getVersion() uint32 {
diff --git a/scheduler/pkg/agent/model_state_test.go b/scheduler/pkg/agent/model_state_test.go
index bcc8fdc91e..58abf0f898 100644
--- a/scheduler/pkg/agent/model_state_test.go
+++ b/scheduler/pkg/agent/model_state_test.go
@@ -47,11 +47,12 @@ func TestAddModelVersion(t *testing.T) {
 						MemoryBytes: getUint64Ptr(500),
 					},
 				},
-				Version: 1,
+				RuntimeInfo: getModelRuntimeInfo(2),
+				Version:     1,
 			},
 			versionAdded:       true,
-			expectedModelBytes: 500,
-			expectedTotalBytes: 500,
+			expectedModelBytes: 1000,
+			expectedTotalBytes: 1000,
 		},
 		{
 			name: "NewModel (Another Model Exsits)",
@@ -67,7 +68,8 @@ func TestAddModelVersion(t *testing.T) {
 									MemoryBytes: getUint64Ptr(500),
 								},
 							},
-							Version: 1,
+							RuntimeInfo: getModelRuntimeInfo(1),
+							Version:     1,
 						},
 					},
 				},
@@ -82,7 +84,8 @@ func TestAddModelVersion(t *testing.T) {
 						MemoryBytes: getUint64Ptr(500),
 					},
 				},
-				Version: 1,
+				RuntimeInfo: getModelRuntimeInfo(1),
+				Version:     1,
 			},
 			versionAdded:       true,
 			expectedModelBytes: 500,
@@ -102,7 +105,8 @@ func TestAddModelVersion(t *testing.T) {
 									MemoryBytes: getUint64Ptr(500),
 								},
 							},
-							Version: 1,
+							RuntimeInfo: getModelRuntimeInfo(1),
+							Version:     1,
 						},
 					},
 				},
@@ -117,7 +121,8 @@ func TestAddModelVersion(t *testing.T) {
 						MemoryBytes: getUint64Ptr(500),
 					},
 				},
-				Version: 2,
+				RuntimeInfo: getModelRuntimeInfo(1),
+				Version:     2,
 			},
 			versionAdded:       false,
 			expectedModelBytes: 500,
@@ -137,7 +142,8 @@ func TestAddModelVersion(t *testing.T) {
 									MemoryBytes: getUint64Ptr(500),
 								},
 							},
-							Version: 1,
+							RuntimeInfo: getModelRuntimeInfo(1),
+							Version:     1,
 						},
 					},
 				},
@@ -152,7 +158,8 @@ func TestAddModelVersion(t *testing.T) {
 						MemoryBytes: getUint64Ptr(500),
 					},
 				},
-				Version: 1,
+				RuntimeInfo: getModelRuntimeInfo(1),
+				Version:     1,
 			},
 			versionAdded:       false,
 			expectedModelBytes: 500,
@@ -164,7 +171,7 @@ func TestAddModelVersion(t *testing.T) {
 		t.Run(test.name, func(t *testing.T) {
 			versionAdded, err := test.state.addModelVersion(test.modelVersion)
 			g.Expect(versionAdded).To(Equal(test.versionAdded))
-			//check version exists
+			// check version exists
 			if versionAdded {
 				g.Expect(test.state.versionExists("iris", test.modelVersion.GetVersion())).To(Equal(true))
 			} else if err != nil {
@@ -204,7 +211,8 @@ func TestRemoveModelVersion(t *testing.T) {
 									MemoryBytes: getUint64Ptr(500),
 								},
 							},
-							Version: 1,
+							RuntimeInfo: getModelRuntimeInfo(1),
+							Version:     1,
 						},
 					},
 				},
@@ -216,7 +224,8 @@ func TestRemoveModelVersion(t *testing.T) {
 						Name: "iris",
 					},
 				},
-				Version: 1,
+				RuntimeInfo: getModelRuntimeInfo(1),
+				Version:     1,
 			},
 			modelDeleted:       true,
 			expectedModelBytes: 0,
@@ -237,7 +246,8 @@ func TestRemoveModelVersion(t *testing.T) {
 									MemoryBytes: getUint64Ptr(500),
 								},
 							},
-							Version: 1,
+							RuntimeInfo: getModelRuntimeInfo(1),
+							Version:     1,
 						},
 					},
 				},
@@ -249,7 +259,8 @@ func TestRemoveModelVersion(t *testing.T) {
 						Name: "iris",
 					},
 				},
-				Version: 2,
+				RuntimeInfo: getModelRuntimeInfo(1),
+				Version:     2,
 			},
 			modelDeleted:       false,
 			expectedModelBytes: 500,
@@ -262,7 +273,7 @@ func TestRemoveModelVersion(t *testing.T) {
 		t.Run(test.name, func(t *testing.T) {
 			modelDeleted, _ := test.state.removeModelVersion(test.modelVersion)
 			g.Expect(modelDeleted).To(Equal(test.modelDeleted))
-			//check version not exists
+			// check version not exists
 			g.Expect(test.state.versionExists("iris", test.modelVersion.GetVersion())).To(Equal(false))
 			if !modelDeleted {
 				g.Expect(test.state.getModelMemoryBytes("iris")).To(Equal(test.expectedModelBytes))
diff --git a/scheduler/pkg/agent/rclone/rclone_config.go b/scheduler/pkg/agent/rclone/rclone_config.go
index 4900d90926..1c45ac071f 100644
--- a/scheduler/pkg/agent/rclone/rclone_config.go
+++ b/scheduler/pkg/agent/rclone/rclone_config.go
@@ -12,6 +12,7 @@ package rclone
 import (
 	"github.com/seldonio/seldon-core/scheduler/v2/pkg/agent/config"
 	"github.com/seldonio/seldon-core/scheduler/v2/pkg/agent/k8s"
+	"github.com/seldonio/seldon-core/scheduler/v2/pkg/util"
 )
 
 func (r *RCloneClient) loadRcloneConfiguration(config *config.AgentConfiguration) error {
@@ -119,7 +120,7 @@ func (r *RCloneClient) loadRcloneSecretsConfiguration(config *config.AgentConfig
 		for _, secret := range config.Rclone.ConfigSecrets {
 			logger.WithField("secret_name", secret).Infof("retrieving Rclone secret")
 
-			config, err := secretsHandler.GetSecretConfig(secret)
+			config, err := secretsHandler.GetSecretConfig(secret, util.K8sTimeoutDefault)
 			if err != nil {
 				return nil, err
 			}
diff --git a/scheduler/pkg/agent/repository/mlserver/mlserver.go b/scheduler/pkg/agent/repository/mlserver/mlserver.go
index 7a086ef7e2..ecd5e4c570 100644
--- a/scheduler/pkg/agent/repository/mlserver/mlserver.go
+++ b/scheduler/pkg/agent/repository/mlserver/mlserver.go
@@ -20,6 +20,7 @@ import (
 
 	log "github.com/sirupsen/logrus"
 
+	"github.com/seldonio/seldon-core/apis/go/v2/mlops/agent"
 	"github.com/seldonio/seldon-core/apis/go/v2/mlops/scheduler"
 	seldontls "github.com/seldonio/seldon-core/components/tls/v2/pkg/tls"
 )
@@ -29,6 +30,7 @@ const (
 	inferUriKey            = "infer_uri"
 	explainerTypeKey       = "explainer_type"
 	sslVerifyPath          = "ssl_verify_path"
+	parallelWorkersEnvVar  = "MLSERVER_PARALLEL_WORKERS"
 )
 
 type MLServerRepositoryHandler struct {
@@ -71,12 +73,12 @@ type ModelMetadataTensors struct {
 
 // MLServer model parameters.
 type ModelParameters struct {
-	//URI where the model artifacts can be found.
-	//This path must be either absolute or relative to where MLServer is running.
+	// URI where the model artifacts can be found.
+	// This path must be either absolute or relative to where MLServer is running.
 	Uri string `json:"uri,omitempty"`
-	//Version of the model
+	// Version of the model
 	Version string `json:"version,omitempty"`
-	//Format of the model (only available on certain runtimes).
+	// Format of the model (only available on certain runtimes).
 	Format             string                 `json:"format,omitempty"`
 	ContentType        string                 `json:"content_type,omitempty"`
 	Extra              map[string]interface{} `json:"extra,omitempty"`
@@ -98,7 +100,7 @@ func (m *MLServerRepositoryHandler) UpdateModelVersion(modelName string, version
 			return err
 		}
 	}
-	//Modify model-settings
+	// Modify model-settings
 	err := m.updateNameAndVersion(path, modelName, versionStr)
 	return err
 }
@@ -167,7 +169,7 @@ func (m *MLServerRepositoryHandler) SetExplainer(modelRepoPath string, explainer
 		if err != nil {
 			return err
 		}
-		//TODO: temporary fix for issue in mlserver with explainers
+		// TODO: temporary fix for issue in mlserver with explainers
 		ms.ParallelWorkers = &workers
 		if ms.Parameters == nil {
 			ms.Parameters = &ModelParameters{}
@@ -262,7 +264,7 @@ func (m *MLServerRepositoryHandler) findModelVersionInPath(modelPath string, ver
 	case 1:
 		return found[0], nil
 	default:
-		return "", fmt.Errorf("Found multiple folders with version %d %v", version, found)
+		return "", fmt.Errorf("found multiple folders with version %d %v", version, found)
 	}
 }
 
@@ -272,7 +274,7 @@ func (m *MLServerRepositoryHandler) getDefaultModelSettingsPath(modelPath string
 		if err != nil {
 			return err
 		}
-		if info.IsDir() && modelPath != path { //Don't descend into directories
+		if info.IsDir() && modelPath != path { // Don't descend into directories
 			return filepath.SkipDir
 		}
 		if !info.IsDir() && filepath.Base(path) == mlserverConfigFilename {
@@ -321,3 +323,13 @@ func (m *MLServerRepositoryHandler) findHighestVersionInPath(modelPath string) (
 	}
 	return "", nil
 }
+
+func (m *MLServerRepositoryHandler) GetModelRuntimeInfo(_ string) (*agent.ModelRuntimeInfo, error) {
+	parallelWorkersStr := os.Getenv(parallelWorkersEnvVar)
+	parallelWorkers, err := strconv.Atoi(parallelWorkersStr)
+	if err != nil || parallelWorkersStr == "" {
+		parallelWorkers = 1
+	}
+
+	return &agent.ModelRuntimeInfo{ModelRuntimeInfo: &agent.ModelRuntimeInfo_Mlserver{Mlserver: &agent.MLServerModelSettings{ParallelWorkers: uint32(parallelWorkers)}}}, nil
+}
diff --git a/scheduler/pkg/agent/repository/mlserver/mlserver_test.go b/scheduler/pkg/agent/repository/mlserver/mlserver_test.go
index 69f018dcf9..fda8e7444a 100644
--- a/scheduler/pkg/agent/repository/mlserver/mlserver_test.go
+++ b/scheduler/pkg/agent/repository/mlserver/mlserver_test.go
@@ -14,11 +14,13 @@ import (
 	"io/fs"
 	"os"
 	"path/filepath"
+	"strconv"
 	"testing"
 
 	. "github.com/onsi/gomega"
 	log "github.com/sirupsen/logrus"
 
+	"github.com/seldonio/seldon-core/apis/go/v2/mlops/agent"
 	"github.com/seldonio/seldon-core/apis/go/v2/mlops/scheduler"
 )
 
@@ -776,3 +778,48 @@ func TestDefaultModelSettings(t *testing.T) {
 		})
 	}
 }
+
+func TestGetModelConfig(t *testing.T) {
+	g := NewGomegaWithT(t)
+
+	tests := []struct {
+		name     string
+		expected uint32
+		err      bool
+	}{
+		{
+			name:     "defaults to 1",
+			expected: 1,
+			err:      false,
+		},
+		{
+			name:     "should pick up env var",
+			expected: 10,
+			err:      false,
+		},
+		{
+			name:     "returns 1 on err",
+			expected: 1,
+			err:      true,
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			logger := log.New()
+			m := NewMLServerRepositoryHandler(logger)
+
+			if test.err {
+				os.Setenv(parallelWorkersEnvVar, "uh-oh")
+			} else if test.expected > 1 {
+				os.Setenv(parallelWorkersEnvVar, strconv.FormatInt(int64(test.expected), 10))
+			}
+
+			runtimeInfo, err := m.GetModelRuntimeInfo("test-model")
+			// mlserver should never return an error
+			g.Expect(err).To(BeNil())
+			mlserverRuntimeInfo := runtimeInfo.ModelRuntimeInfo.(*agent.ModelRuntimeInfo_Mlserver)
+			g.Expect(mlserverRuntimeInfo.Mlserver.ParallelWorkers).To(Equal(test.expected))
+		})
+	}
+}
diff --git a/scheduler/pkg/agent/repository/model_repository.go b/scheduler/pkg/agent/repository/model_repository.go
index 68e06d24cb..0c0a79e459 100644
--- a/scheduler/pkg/agent/repository/model_repository.go
+++ b/scheduler/pkg/agent/repository/model_repository.go
@@ -18,6 +18,7 @@ import (
 	copy2 "github.com/otiai10/copy"
 	log "github.com/sirupsen/logrus"
 
+	"github.com/seldonio/seldon-core/apis/go/v2/mlops/agent"
 	"github.com/seldonio/seldon-core/apis/go/v2/mlops/scheduler"
 
 	"github.com/seldonio/seldon-core/scheduler/v2/pkg/agent/rclone"
@@ -29,11 +30,13 @@ type ModelRepositoryHandler interface {
 	UpdateModelRepository(modelName string, path string, isVersionFolder bool, modelRepoPath string) error
 	SetExplainer(modelRepoPath string, explainerSpec *scheduler.ExplainerSpec, envoyHost string, envoyPort int) error
 	SetExtraParameters(modelRepoPath string, parameters []*scheduler.ParameterSpec) error
+	GetModelRuntimeInfo(path string) (*agent.ModelRuntimeInfo, error)
 }
 
 type ModelRepository interface {
 	DownloadModelVersion(modelName string, version uint32, modelSpec *scheduler.ModelSpec, config []byte) (*string, error)
 	RemoveModelVersion(modelName string) error
+	GetModelRuntimeInfo(modelName string) (*agent.ModelRuntimeInfo, error)
 	Ready() error
 }
 
@@ -41,7 +44,7 @@ type V2ModelRepository struct {
 	logger                 log.FieldLogger
 	rcloneClient           *rclone.RCloneClient
 	repoPath               string
-	modelrepositoryHandler ModelRepositoryHandler
+	modelRepositoryHandler ModelRepositoryHandler
 	envoyHost              string
 	envoyPort              int
 }
@@ -51,17 +54,23 @@ func NewModelRepository(logger log.FieldLogger,
 	repoPath string,
 	modelRepositoryHandler ModelRepositoryHandler,
 	envoyHost string,
-	envoyPort int) *V2ModelRepository {
+	envoyPort int,
+) *V2ModelRepository {
 	return &V2ModelRepository{
 		logger:                 logger.WithField("Name", "V2ModelRepository"),
 		rcloneClient:           rcloneClient,
 		repoPath:               repoPath,
-		modelrepositoryHandler: modelRepositoryHandler,
+		modelRepositoryHandler: modelRepositoryHandler,
 		envoyHost:              envoyHost,
 		envoyPort:              envoyPort,
 	}
 }
 
+func (r *V2ModelRepository) GetModelRuntimeInfo(modelName string) (*agent.ModelRuntimeInfo, error) {
+	modelPathInRepo := filepath.Join(r.repoPath, modelName)
+	return r.modelRepositoryHandler.GetModelRuntimeInfo(modelPathInRepo)
+}
+
 func (r *V2ModelRepository) DownloadModelVersion(
 	modelName string,
 	version uint32,
@@ -94,7 +103,7 @@ func (r *V2ModelRepository) DownloadModelVersion(
 	}()
 
 	// Find the version folder we want
-	modelVersionFolder, foundVersionFolder, err := r.modelrepositoryHandler.FindModelVersionFolder(
+	modelVersionFolder, foundVersionFolder, err := r.modelRepositoryHandler.FindModelVersionFolder(
 		modelName,
 		artifactVersion,
 		rclonePath,
@@ -134,7 +143,7 @@ func (r *V2ModelRepository) DownloadModelVersion(
 	}
 
 	// Update model version in repo
-	err = r.modelrepositoryHandler.UpdateModelVersion(
+	err = r.modelRepositoryHandler.UpdateModelVersion(
 		modelName,
 		version,
 		modelVersionPathInRepo,
@@ -146,7 +155,7 @@ func (r *V2ModelRepository) DownloadModelVersion(
 
 	// Update details for blackbox explainer
 	if explainerSpec != nil {
-		err = r.modelrepositoryHandler.SetExplainer(
+		err = r.modelRepositoryHandler.SetExplainer(
 			modelVersionPathInRepo,
 			explainerSpec,
 			r.envoyHost,
@@ -158,13 +167,13 @@ func (r *V2ModelRepository) DownloadModelVersion(
 	}
 
 	// Set init parameters inside model
-	err = r.modelrepositoryHandler.SetExtraParameters(modelVersionPathInRepo, parameters)
+	err = r.modelRepositoryHandler.SetExtraParameters(modelVersionPathInRepo, parameters)
 	if err != nil {
 		return nil, err
 	}
 
 	// Update global model configuration
-	err = r.modelrepositoryHandler.UpdateModelRepository(
+	err = r.modelRepositoryHandler.UpdateModelRepository(
 		modelName,
 		modelVersionFolder,
 		foundVersionFolder,
diff --git a/scheduler/pkg/agent/repository/triton/triton.go b/scheduler/pkg/agent/repository/triton/triton.go
index 3cd1926868..a411ec0246 100644
--- a/scheduler/pkg/agent/repository/triton/triton.go
+++ b/scheduler/pkg/agent/repository/triton/triton.go
@@ -15,11 +15,13 @@ import (
 	"os"
 	"path/filepath"
 	"strconv"
+	"strings"
 
 	copy2 "github.com/otiai10/copy"
 	log "github.com/sirupsen/logrus"
 	"google.golang.org/protobuf/encoding/prototext"
 
+	"github.com/seldonio/seldon-core/apis/go/v2/mlops/agent"
 	"github.com/seldonio/seldon-core/apis/go/v2/mlops/scheduler"
 
 	pb "github.com/seldonio/seldon-core/scheduler/v2/pkg/agent/repository/triton/config"
@@ -42,7 +44,7 @@ func copyNonConfigFilesToModelRepo(src string, dst string) error {
 		if err != nil {
 			return err
 		}
-		if info.IsDir() && src != path { //Don't descend into directories
+		if info.IsDir() && src != path { // Don't descend into directories
 			return filepath.SkipDir
 		}
 		// Copy non- config.pbtxt files to dst folder
@@ -217,3 +219,42 @@ func (t *TritonRepositoryHandler) SetExplainer(modelRepoPath string, explainerSp
 func (t *TritonRepositoryHandler) SetExtraParameters(modelRepoPath string, parameters []*scheduler.ParameterSpec) error {
 	return nil
 }
+
+func (t *TritonRepositoryHandler) GetModelRuntimeInfo(path string) (*agent.ModelRuntimeInfo, error) {
+	configPath := filepath.Join(path, TritonConfigFile)
+	tritonConfig, err := t.loadConfigFromFile(configPath)
+	tritonRuntimeInfo := &agent.ModelRuntimeInfo_Triton{
+		Triton: &agent.TritonModelConfig{
+			Cpu: []*agent.TritonCPU{
+				{InstanceCount: 1},
+			},
+		},
+	}
+	if err == nil {
+		instanceGroups := tritonConfig.InstanceGroup
+		if len(instanceGroups) > 0 {
+			var instanceCount int32 = 0
+			backend := tritonConfig.Backend
+			for _, instanceGroup := range instanceGroups {
+				// only take the value from the first KIND_CPU that's found
+				if instanceGroup.Kind == pb.ModelInstanceGroup_KIND_CPU && instanceCount == 0 {
+					if instanceGroup.Count < 1 {
+						if strings.ToLower(backend) == "tensorflow" || strings.ToLower(backend) == "onnxruntime" {
+							instanceCount = 2
+						} else {
+							instanceCount = 1
+						}
+					} else {
+						instanceCount += instanceGroup.Count
+					}
+				}
+			}
+			// Default to 1 if no KIND_CPU is found, as KIND_GPU is currently not supported
+			if instanceCount < 1 {
+				instanceCount = 1
+			}
+			tritonRuntimeInfo.Triton.Cpu = []*agent.TritonCPU{{InstanceCount: uint32(instanceCount)}}
+		}
+	}
+	return &agent.ModelRuntimeInfo{ModelRuntimeInfo: tritonRuntimeInfo}, nil
+}
diff --git a/scheduler/pkg/agent/repository/triton/triton_test.go b/scheduler/pkg/agent/repository/triton/triton_test.go
index addcd3d9ae..f5e4ec2612 100644
--- a/scheduler/pkg/agent/repository/triton/triton_test.go
+++ b/scheduler/pkg/agent/repository/triton/triton_test.go
@@ -20,6 +20,8 @@ import (
 	"google.golang.org/protobuf/encoding/prototext"
 	"google.golang.org/protobuf/proto"
 
+	"github.com/seldonio/seldon-core/apis/go/v2/mlops/agent"
+
 	pb "github.com/seldonio/seldon-core/scheduler/v2/pkg/agent/repository/triton/config"
 )
 
@@ -336,3 +338,86 @@ func TestCopyNonConfigFilesToModelRepo(t *testing.T) {
 		})
 	}
 }
+
+func TestGetModelConfig(t *testing.T) {
+	g := NewGomegaWithT(t)
+
+	type test struct {
+		name                  string
+		modelConfig           *pb.ModelConfig
+		expectedInstanceCount uint32
+	}
+
+	tests := []test{
+		{
+			name: "onnxruntime backend without a count defaults to 2",
+			modelConfig: &pb.ModelConfig{
+				Backend:       "onnxruntime",
+				InstanceGroup: []*pb.ModelInstanceGroup{{Kind: pb.ModelInstanceGroup_KIND_CPU}},
+			},
+			expectedInstanceCount: 2,
+		}, {
+			name: "tensorflow backend without a count defaults to 2",
+			modelConfig: &pb.ModelConfig{
+				Backend:       "tensorflow",
+				InstanceGroup: []*pb.ModelInstanceGroup{{Kind: pb.ModelInstanceGroup_KIND_CPU}},
+			},
+			expectedInstanceCount: 2,
+		}, {
+			name: "other backend without a count defaults to 2",
+			modelConfig: &pb.ModelConfig{
+				Backend:       "other",
+				InstanceGroup: []*pb.ModelInstanceGroup{{Kind: pb.ModelInstanceGroup_KIND_CPU}},
+			},
+			expectedInstanceCount: 1,
+		}, {
+			name: "not KIND_CPU defaults to 1",
+			modelConfig: &pb.ModelConfig{
+				Backend:       "tensorflow",
+				InstanceGroup: []*pb.ModelInstanceGroup{{Kind: pb.ModelInstanceGroup_KIND_GPU}},
+			},
+			expectedInstanceCount: 1,
+		}, {
+			name: "onnxruntime backend with count",
+			modelConfig: &pb.ModelConfig{
+				Backend:       "onnxruntime",
+				InstanceGroup: []*pb.ModelInstanceGroup{{Count: 8, Kind: pb.ModelInstanceGroup_KIND_CPU}},
+			},
+			expectedInstanceCount: 8,
+		}, {
+			name: "tensorflow backend with count",
+			modelConfig: &pb.ModelConfig{
+				Backend:       "tensorflow",
+				InstanceGroup: []*pb.ModelInstanceGroup{{Count: 5, Kind: pb.ModelInstanceGroup_KIND_CPU}},
+			},
+			expectedInstanceCount: 5,
+		}, {
+			name: "no instance group defaults to 1",
+			modelConfig: &pb.ModelConfig{
+				Backend: "tensorflow",
+			},
+			expectedInstanceCount: 1,
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			rclonePath := t.TempDir()
+			// Create rclone config.pbtxt
+			configPathRclone := filepath.Join(rclonePath, TritonConfigFile)
+			data, err := prototext.Marshal(test.modelConfig)
+			g.Expect(err).To(BeNil())
+			err = os.WriteFile(configPathRclone, data, fs.ModePerm)
+			g.Expect(err).To(BeNil())
+
+			logger := log.New()
+			triton := TritonRepositoryHandler{logger: logger}
+			modelConfig, err := triton.GetModelRuntimeInfo(rclonePath)
+
+			g.Expect(err).To(BeNil())
+
+			tritonModelConfig := modelConfig.ModelRuntimeInfo.(*agent.ModelRuntimeInfo_Triton)
+			g.Expect(tritonModelConfig.Triton.Cpu[0].InstanceCount).To(Equal(test.expectedInstanceCount))
+		})
+	}
+}
diff --git a/scheduler/pkg/agent/rproxy.go b/scheduler/pkg/agent/rproxy.go
index db7a067590..1d44858e7f 100644
--- a/scheduler/pkg/agent/rproxy.go
+++ b/scheduler/pkg/agent/rproxy.go
@@ -242,7 +242,9 @@ func (rp *reverseHTTPProxy) Stop() error {
 	defer rp.mu.Unlock()
 	var err error
 	if rp.server != nil {
-		err = rp.server.Shutdown(context.Background())
+		ctx, cancel := context.WithTimeout(context.Background(), util.ServerControlPlaneTimeout)
+		defer cancel()
+		err = rp.server.Shutdown(ctx)
 	}
 	rp.serverReady = false
 	rp.logger.Info("Finished graceful shutdown")
diff --git a/scheduler/pkg/agent/state_manager_test.go b/scheduler/pkg/agent/state_manager_test.go
index 2b1b5ff31d..87d6bbd663 100644
--- a/scheduler/pkg/agent/state_manager_test.go
+++ b/scheduler/pkg/agent/state_manager_test.go
@@ -22,6 +22,7 @@ import (
 	. "github.com/onsi/gomega"
 	log "github.com/sirupsen/logrus"
 
+	"github.com/seldonio/seldon-core/apis/go/v2/mlops/agent"
 	pba "github.com/seldonio/seldon-core/apis/go/v2/mlops/agent"
 	pbs "github.com/seldonio/seldon-core/apis/go/v2/mlops/scheduler"
 
@@ -78,8 +79,9 @@ func getDummyModelDetails(modelId string, memBytes uint64, version uint32) *pba.
 		},
 	}
 	mv := pba.ModelVersion{
-		Model:   &model,
-		Version: version,
+		Model:       &model,
+		Version:     version,
+		RuntimeInfo: getModelRuntimeInfo(1),
 	}
 	return &mv
 }
@@ -102,12 +104,11 @@ func setupLocalTestManagerWithState(
 	numModels int, modelPrefix string, v2Client interfaces.ModelServerControlPlaneClient,
 	capacity int, numVersions int, overCommitPercentage uint32,
 ) (*LocalStateManager, *testing_utils.V2State) {
-
 	logger := log.New()
 	logger.SetLevel(log.InfoLevel)
 
 	modelState := NewModelState()
-	//create mock v2 client
+	// create mock v2 client
 	var v2ClientState *testing_utils.V2State
 	if v2Client == nil {
 		models := make([]string, numModels*numVersions)
@@ -137,13 +138,13 @@ func setupLocalTestManager(numModels int, modelPrefix string, v2Client interface
 }
 
 // this mimics LoadModel in client.go with regards to locking
-func (manager *LocalStateManager) loadModelFn(modelVersionDetails *pba.ModelVersion) error {
+func (manager *LocalStateManager) loadModelFn(modelVersionDetails *pba.ModelVersion, modelConfig *agent.ModelRuntimeInfo) error {
 	modelName := modelVersionDetails.GetModel().GetMeta().GetName()
 	modelVersion := modelVersionDetails.GetVersion()
 
 	modelWithVersion := util.GetVersionedModelName(modelName, modelVersion)
 	pinnedModelVersion := util.GetPinnedModelVersion()
-	modifiedModelVersionRequest := getModifiedModelVersion(modelWithVersion, pinnedModelVersion, modelVersionDetails)
+	modifiedModelVersionRequest := getModifiedModelVersion(modelWithVersion, pinnedModelVersion, modelVersionDetails, modelConfig)
 
 	manager.cache.Lock(modelWithVersion)
 	defer manager.cache.Unlock(modelWithVersion)
@@ -152,14 +153,14 @@ func (manager *LocalStateManager) loadModelFn(modelVersionDetails *pba.ModelVers
 }
 
 // this mimics UnloadModel in client.go with regards to locking
-func (manager *LocalStateManager) unloadModelFn(modelVersionDetails *pba.ModelVersion) error {
+func (manager *LocalStateManager) unloadModelFn(modelVersionDetails *pba.ModelVersion, modelConfig *agent.ModelRuntimeInfo) error {
 	modelName := modelVersionDetails.GetModel().GetMeta().GetName()
 	modelVersion := modelVersionDetails.GetVersion()
 
 	modelWithVersion := util.GetVersionedModelName(modelName, modelVersion)
 	pinnedModelVersion := util.GetPinnedModelVersion()
 	// we dont have memory actually requirement in unload
-	modifiedModelVersionRequest := getModifiedModelVersion(modelWithVersion, pinnedModelVersion, modelVersionDetails)
+	modifiedModelVersionRequest := getModifiedModelVersion(modelWithVersion, pinnedModelVersion, modelVersionDetails, getModelRuntimeInfo(1))
 
 	manager.cache.Lock(modelWithVersion)
 	defer manager.cache.Unlock(modelWithVersion)
@@ -175,13 +176,12 @@ func (manager *LocalStateManager) ensureLoadModelFn(modelName string, modelVersi
 }
 
 func TestLocalStateManagerSmoke(t *testing.T) {
-
 	numModels := 10
 	dummyModelPrefix := "dummy_model"
 
 	manager, v2State := setupLocalTestManagerWithState(numModels, dummyModelPrefix, nil, numModels-2, 1, 100)
 
-	//activate mock http server for v2
+	// activate mock http server for v2
 	httpmock.ActivateNonDefault(manager.v2Client.(*testing_utils.V2RestClientForTest).HttpClient)
 	defer httpmock.DeactivateAndReset()
 
@@ -190,7 +190,7 @@ func TestLocalStateManagerSmoke(t *testing.T) {
 	for i := 0; i < numModels; i++ {
 		modelName := getModelId(dummyModelPrefix, i)
 		memBytes := uint64(1)
-		err := manager.loadModelFn(getDummyModelDetails(modelName, memBytes, uint32(1)))
+		err := manager.loadModelFn(getDummyModelDetails(modelName, memBytes, uint32(1)), getModelRuntimeInfo(1))
 		g.Expect(err).To(BeNil())
 	}
 
@@ -213,7 +213,6 @@ func TestLocalStateManagerSmoke(t *testing.T) {
 		t.Logf("Difference in models %v", modelsDiff)
 	}
 	g.Expect(isMatch).To(Equal(true))
-
 }
 
 // Ensures that we have a lock on model reloading
@@ -254,7 +253,7 @@ func TestConcurrentReload(t *testing.T) {
 			t.Log("Setup test")
 
 			manager, v2State := setupLocalTestManagerWithState(test.numModels, dummyModelPrefix, nil, test.capacity, 1, 100)
-			//activate mock http server for v2
+			// activate mock http server for v2
 			httpmock.ActivateNonDefault(manager.v2Client.(*testing_utils.V2RestClientForTest).HttpClient)
 			defer httpmock.DeactivateAndReset()
 
@@ -262,7 +261,7 @@ func TestConcurrentReload(t *testing.T) {
 			for i := test.numModels - 1; i >= 0; i-- {
 				modelName := getModelId(dummyModelPrefix, i)
 				memBytes := uint64(1)
-				_ = manager.loadModelFn(getDummyModelDetails(modelName, memBytes, uint32(1)))
+				_ = manager.loadModelFn(getDummyModelDetails(modelName, memBytes, uint32(1)), getModelRuntimeInfo(1))
 			}
 
 			t.Log("Start test")
@@ -303,15 +302,13 @@ func TestConcurrentReload(t *testing.T) {
 			t.Log("Test unload models")
 			for i := 0; i < test.numModels; i++ {
 				modelName := getModelId(dummyModelPrefix, i)
-				err := manager.unloadModelFn(getDummyModelDetailsUnload(modelName, uint32(1)))
+				err := manager.unloadModelFn(getDummyModelDetailsUnload(modelName, uint32(1)), getModelRuntimeInfo(1))
 				g.Expect(err).To(BeNil())
 			}
 			g.Expect(manager.availableMainMemoryBytes).Should(BeNumerically("==", test.capacity))
 			g.Expect(manager.modelVersions.numModels()).Should(BeNumerically("==", 0))
-
 		})
 	}
-
 }
 
 // Test concurrent infer requests
@@ -351,7 +348,7 @@ func TestConcurrentInfer(t *testing.T) {
 			t.Log("Setup test")
 
 			manager, v2State := setupLocalTestManagerWithState(test.numModels, dummyModelPrefix, nil, test.capacity, 1, 100)
-			//activate mock http server for v2
+			// activate mock http server for v2
 			httpmock.ActivateNonDefault(manager.v2Client.(*testing_utils.V2RestClientForTest).HttpClient)
 			defer httpmock.DeactivateAndReset()
 
@@ -359,7 +356,7 @@ func TestConcurrentInfer(t *testing.T) {
 			for i := test.numModels - 1; i >= 0; i-- {
 				modelName := getModelId(dummyModelPrefix, i)
 				memBytes := uint64(1)
-				_ = manager.loadModelFn(getDummyModelDetails(modelName, memBytes, uint32(1)))
+				_ = manager.loadModelFn(getDummyModelDetails(modelName, memBytes, uint32(1)), getModelRuntimeInfo(1))
 			}
 
 			t.Log("Start test")
@@ -401,14 +398,13 @@ func TestConcurrentInfer(t *testing.T) {
 			t.Log("Test unload models")
 			for i := 0; i < test.numModels; i++ {
 				modelName := getModelId(dummyModelPrefix, i)
-				err := manager.unloadModelFn(getDummyModelDetailsUnload(modelName, uint32(1)))
+				err := manager.unloadModelFn(getDummyModelDetailsUnload(modelName, uint32(1)), getModelRuntimeInfo(1))
 				g.Expect(err).To(BeNil())
 			}
 			g.Expect(manager.availableMainMemoryBytes).Should(BeNumerically("==", test.capacity))
 			g.Expect(manager.modelVersions.numModels()).Should(BeNumerically("==", 0))
 		})
 	}
-
 }
 
 // We have concurrent load and unload of models from the scheduler.
@@ -448,11 +444,10 @@ func TestConcurrentLoad(t *testing.T) {
 
 	for _, test := range tests {
 		t.Run(test.name, func(t *testing.T) {
-
 			t.Log("Setup test")
 			manager, v2State := setupLocalTestManagerWithState(test.numModels, dummyModelPrefix, nil, test.capacity, 1, 100)
 
-			//activate mock http server for v2
+			// activate mock http server for v2
 			httpmock.ActivateNonDefault(manager.v2Client.(*testing_utils.V2RestClientForTest).HttpClient)
 			defer httpmock.DeactivateAndReset()
 
@@ -463,11 +458,11 @@ func TestConcurrentLoad(t *testing.T) {
 				modelName := getModelId(dummyModelPrefix, i)
 				memBytes := uint64(1)
 				checkerFn := func(wg *sync.WaitGroup, modelName string, modelVersion uint32) {
-					err := manager.loadModelFn(getDummyModelDetails(modelName, memBytes, modelVersion))
+					err := manager.loadModelFn(getDummyModelDetails(modelName, memBytes, modelVersion), getModelRuntimeInfo(1))
 					for err != nil {
 						t.Logf("Error %s", err)
 						time.Sleep(10 * time.Millisecond)
-						err = manager.loadModelFn(getDummyModelDetails(modelName, memBytes, modelVersion))
+						err = manager.loadModelFn(getDummyModelDetails(modelName, memBytes, modelVersion), getModelRuntimeInfo(1))
 					}
 					g.Expect(manager.availableMainMemoryBytes).Should(BeNumerically(">=", 0))
 					wg.Done()
@@ -492,7 +487,7 @@ func TestConcurrentLoad(t *testing.T) {
 			for i := 0; i < test.numModels; i++ {
 				modelName := getModelId(dummyModelPrefix, i)
 				checkerFn := func(wg *sync.WaitGroup, modelName string, modelVersion uint32) {
-					err := manager.unloadModelFn(getDummyModelDetailsUnload(modelName, modelVersion))
+					err := manager.unloadModelFn(getDummyModelDetailsUnload(modelName, modelVersion), getModelRuntimeInfo(1))
 					if err != nil {
 						t.Logf("Error %s", err)
 					}
@@ -507,7 +502,6 @@ func TestConcurrentLoad(t *testing.T) {
 			// should be an empty server
 			g.Expect(manager.availableMainMemoryBytes).Should(BeNumerically("==", test.capacity))
 			g.Expect(manager.modelVersions.numModels()).Should(BeNumerically("==", 0))
-
 		})
 	}
 }
@@ -545,7 +539,7 @@ func TestConcurrentLoadWithVersions(t *testing.T) {
 			t.Log("Setup test")
 			manager, v2State := setupLocalTestManagerWithState(test.numModels, dummyModelPrefix, nil, test.capacity, numberOfVersionsToAdd, 100)
 
-			//activate mock http server for v2
+			// activate mock http server for v2
 			httpmock.ActivateNonDefault(manager.v2Client.(*testing_utils.V2RestClientForTest).HttpClient)
 			defer httpmock.DeactivateAndReset()
 
@@ -554,11 +548,11 @@ func TestConcurrentLoadWithVersions(t *testing.T) {
 			wg.Add(test.numModels * numberOfVersionsToAdd)
 
 			checkerFn := func(wg *sync.WaitGroup, modelName string, memBytes uint64, modelVersion uint32) {
-				err := manager.loadModelFn(getDummyModelDetails(modelName, memBytes, modelVersion))
+				err := manager.loadModelFn(getDummyModelDetails(modelName, memBytes, modelVersion), getModelRuntimeInfo(1))
 				for err != nil {
 					t.Logf("Error %s for model %s version %d", err, modelName, modelVersion)
 					time.Sleep(10 * time.Millisecond)
-					err = manager.loadModelFn(getDummyModelDetails(modelName, memBytes, modelVersion))
+					err = manager.loadModelFn(getDummyModelDetails(modelName, memBytes, modelVersion), getModelRuntimeInfo(1))
 				}
 				g.Expect(manager.availableMainMemoryBytes).Should(BeNumerically(">=", 0))
 				wg.Done()
@@ -587,7 +581,7 @@ func TestConcurrentLoadWithVersions(t *testing.T) {
 			wg.Add(test.numModels * numberOfVersionsToAdd)
 
 			checkerFn = func(wg *sync.WaitGroup, modelName string, _ uint64, modelVersion uint32) {
-				err := manager.unloadModelFn(getDummyModelDetailsUnload(modelName, modelVersion))
+				err := manager.unloadModelFn(getDummyModelDetailsUnload(modelName, modelVersion), getModelRuntimeInfo(1))
 				if err != nil {
 					t.Logf("Error %s", err)
 				}
@@ -605,7 +599,6 @@ func TestConcurrentLoadWithVersions(t *testing.T) {
 			// should be an empty server
 			g.Expect(manager.availableMainMemoryBytes).Should(BeNumerically("==", test.capacity))
 			g.Expect(manager.modelVersions.numModels()).Should(BeNumerically("==", 0))
-
 		})
 	}
 }
@@ -640,7 +633,7 @@ func TestDataAndControlPlaneInteractionSmoke(t *testing.T) {
 			t.Log("Setup test")
 			manager, v2State := setupLocalTestManagerWithState(test.numModels, dummyModelPrefix, nil, test.capacity, numberOfVersionsToAdd, 100)
 
-			//activate mock http server for v2
+			// activate mock http server for v2
 			httpmock.ActivateNonDefault(manager.v2Client.(*testing_utils.V2RestClientForTest).HttpClient)
 			defer httpmock.DeactivateAndReset()
 
@@ -650,7 +643,7 @@ func TestDataAndControlPlaneInteractionSmoke(t *testing.T) {
 				modelName := getModelId(dummyModelPrefix, i)
 				memBytes := uint64(1)
 				modelVersion := uint32(1)
-				_ = manager.loadModelFn(getDummyModelDetails(modelName, memBytes, modelVersion))
+				_ = manager.loadModelFn(getDummyModelDetails(modelName, memBytes, modelVersion), getModelRuntimeInfo(1))
 			}
 
 			t.Log("Start test")
@@ -664,12 +657,12 @@ func TestDataAndControlPlaneInteractionSmoke(t *testing.T) {
 				switch op {
 				case 0:
 					t.Logf("Load model %s", modelName)
-					if err := manager.loadModelFn(getDummyModelDetails(modelName, memBytes, modelVersion)); err != nil {
+					if err := manager.loadModelFn(getDummyModelDetails(modelName, memBytes, modelVersion), getModelRuntimeInfo(1)); err != nil {
 						t.Logf("Load model %s failed (%s)", modelName, err)
 					}
 				case 1:
 					t.Logf("Unload model %s", modelName)
-					if err := manager.unloadModelFn(getDummyModelDetailsUnload(modelName, modelVersion)); err != nil {
+					if err := manager.unloadModelFn(getDummyModelDetailsUnload(modelName, modelVersion), getModelRuntimeInfo(1)); err != nil {
 						t.Logf("Unload model %s failed (%s)", modelName, err)
 					}
 				case 2:
@@ -706,12 +699,11 @@ func TestDataAndControlPlaneInteractionSmoke(t *testing.T) {
 			for i := 0; i < test.numModels; i++ {
 				modelName := getModelId(dummyModelPrefix, i)
 				for j := 1; j <= numberOfVersionsToAdd; j++ {
-					_ = manager.unloadModelFn(getDummyModelDetailsUnload(modelName, uint32(j)))
+					_ = manager.unloadModelFn(getDummyModelDetailsUnload(modelName, uint32(j)), getModelRuntimeInfo(1))
 				}
 			}
 			g.Expect(manager.availableMainMemoryBytes).Should(BeNumerically("==", test.capacity))
 			g.Expect(manager.modelVersions.numModels()).Should(BeNumerically("==", 0))
-
 		})
 	}
 }
@@ -824,7 +816,7 @@ func TestControlAndDataPlaneUseCases(t *testing.T) {
 		t.Run(test.name, func(t *testing.T) {
 			manager, v2State := setupLocalTestManagerWithState(numModels, dummyModelPrefix, nil, capacity, 1, 100)
 
-			//activate mock http server for v2
+			// activate mock http server for v2
 			httpmock.ActivateNonDefault(manager.v2Client.(*testing_utils.V2RestClientForTest).HttpClient)
 			defer httpmock.DeactivateAndReset()
 
@@ -835,7 +827,7 @@ func TestControlAndDataPlaneUseCases(t *testing.T) {
 			for i := 0; i < capacity; i++ {
 				modelName := getModelId(dummyModelPrefix, i)
 				modelVersion := uint32(1)
-				_ = manager.loadModelFn(getDummyModelDetails(modelName, memBytes, modelVersion))
+				_ = manager.loadModelFn(getDummyModelDetails(modelName, memBytes, modelVersion), getModelRuntimeInfo(1))
 			}
 
 			var wg sync.WaitGroup
@@ -881,9 +873,8 @@ func TestControlAndDataPlaneUseCases(t *testing.T) {
 			}
 
 			t.Log("Setup step1")
-
 			if test.step1.isLoaded {
-				_ = manager.loadModelFn(getDummyModelDetails(getModelId(dummyModelPrefix, test.step1.modelIdSuffix), memBytes, uint32(test.step1.modelVersion)))
+				_ = manager.loadModelFn(getDummyModelDetails(getModelId(dummyModelPrefix, test.step1.modelIdSuffix), memBytes, uint32(test.step1.modelVersion)), getModelRuntimeInfo(1))
 				if !test.step1.inMemory {
 					// ensure load the other model 0, so evicts model_1 if in memory
 					_ = manager.ensureLoadModelFn(getModelId(dummyModelPrefix, 0), 1)
@@ -893,7 +884,7 @@ func TestControlAndDataPlaneUseCases(t *testing.T) {
 			t.Log("Setup step2")
 
 			if test.step2.isLoaded {
-				_ = manager.loadModelFn(getDummyModelDetails(getModelId(dummyModelPrefix, test.step2.modelIdSuffix), memBytes, uint32(test.step2.modelVersion)))
+				_ = manager.loadModelFn(getDummyModelDetails(getModelId(dummyModelPrefix, test.step2.modelIdSuffix), memBytes, uint32(test.step2.modelVersion)), getModelRuntimeInfo(1))
 				if !test.step2.inMemory {
 					// ensure load the other model 0, so evicts model_1 if in memory
 					_ = manager.ensureLoadModelFn(getModelId(dummyModelPrefix, 0), 1)
@@ -951,14 +942,16 @@ func TestAvailableMemoryWithOverCommit(t *testing.T) {
 		capacity                              int
 		overCommitPercentage                  int
 		expectedAvailableMemoryWithOverCommit uint64
+		runtimeInfo                           *agent.ModelRuntimeInfo
 	}
 	tests := []test{
 		{
 			name:                                  "extra main capacity",
 			numModels:                             10,
-			capacity:                              20,
+			capacity:                              30,
 			overCommitPercentage:                  0,
 			expectedAvailableMemoryWithOverCommit: 10,
+			runtimeInfo:                           getModelRuntimeInfo(2),
 		},
 		{
 			name:                                  "extra main capacity with overcommit",
@@ -966,6 +959,7 @@ func TestAvailableMemoryWithOverCommit(t *testing.T) {
 			capacity:                              20,
 			overCommitPercentage:                  10,
 			expectedAvailableMemoryWithOverCommit: 12,
+			runtimeInfo:                           getModelRuntimeInfo(1),
 		},
 		{
 			name:                                  "enough main capacity",
@@ -973,6 +967,7 @@ func TestAvailableMemoryWithOverCommit(t *testing.T) {
 			capacity:                              10,
 			overCommitPercentage:                  0,
 			expectedAvailableMemoryWithOverCommit: 0,
+			runtimeInfo:                           getModelRuntimeInfo(1),
 		},
 		{
 			name:                                  "enough main capacity with overcommit",
@@ -980,13 +975,15 @@ func TestAvailableMemoryWithOverCommit(t *testing.T) {
 			capacity:                              10,
 			overCommitPercentage:                  10,
 			expectedAvailableMemoryWithOverCommit: 1,
+			runtimeInfo:                           getModelRuntimeInfo(1),
 		},
 		{
 			name:                                  "overcommit",
 			numModels:                             10,
-			capacity:                              8,
+			capacity:                              16,
 			overCommitPercentage:                  50,
-			expectedAvailableMemoryWithOverCommit: 2,
+			expectedAvailableMemoryWithOverCommit: 4,
+			runtimeInfo:                           getModelRuntimeInfo(2),
 		},
 		{
 			name:                                  "overflow",
@@ -994,6 +991,7 @@ func TestAvailableMemoryWithOverCommit(t *testing.T) {
 			capacity:                              8,
 			overCommitPercentage:                  0,
 			expectedAvailableMemoryWithOverCommit: 0,
+			runtimeInfo:                           getModelRuntimeInfo(1),
 		},
 	}
 
@@ -1002,14 +1000,14 @@ func TestAvailableMemoryWithOverCommit(t *testing.T) {
 			t.Log("Test load")
 			manager, _ := setupLocalTestManagerWithState(test.numModels, dummyModelPrefix, nil, test.capacity, 1, uint32(test.overCommitPercentage))
 
-			//activate mock http server for v2
+			// activate mock http server for v2
 			httpmock.ActivateNonDefault(manager.v2Client.(*testing_utils.V2RestClientForTest).HttpClient)
 			defer httpmock.DeactivateAndReset()
 
 			for i := 0; i < test.numModels; i++ {
 				modelName := getModelId(dummyModelPrefix, i)
 				modelVersion := uint32(1)
-				_ = manager.loadModelFn(getDummyModelDetails(modelName, memBytes, modelVersion))
+				_ = manager.loadModelFn(getDummyModelDetails(modelName, memBytes, modelVersion), test.runtimeInfo)
 			}
 
 			g.Expect(manager.GetAvailableMemoryBytesWithOverCommit()).To(Equal(test.expectedAvailableMemoryWithOverCommit))
@@ -1029,14 +1027,14 @@ func TestServerConnectionIssues(t *testing.T) {
 
 	manager := setupLocalTestManager(numModels, dummyModelPrefix, nil, numModels-1, 1)
 
-	//activate httpmock for v2
+	// activate httpmock for v2
 	httpmock.ActivateNonDefault(manager.v2Client.(*testing_utils.V2RestClientForTest).HttpClient)
 	g := NewGomegaWithT(t)
 
 	for i := 0; i < numModels; i++ {
 		modelName := getModelId(dummyModelPrefix, i)
 		memBytes := uint64(1)
-		_ = manager.loadModelFn(getDummyModelDetails(modelName, memBytes, uint32(1)))
+		_ = manager.loadModelFn(getDummyModelDetails(modelName, memBytes, uint32(1)), getModelRuntimeInfo(1))
 	}
 
 	// disable httpmock, then we will have a connection issue, check that state is same
@@ -1082,14 +1080,14 @@ func TestModelMetricsStats(t *testing.T) {
 			t.Log("load test")
 			manager, _ := setupLocalTestManagerWithState(test.numModels, dummyModelPrefix, nil, test.capacity, 1, uint32(50))
 
-			//activate mock http server for v2
+			// activate mock http server for v2
 			httpmock.ActivateNonDefault(manager.v2Client.(*testing_utils.V2RestClientForTest).HttpClient)
 			defer httpmock.DeactivateAndReset()
 
 			for i := 0; i < test.numModels; i++ {
 				modelName := getModelId(dummyModelPrefix, i)
 				modelVersion := uint32(1)
-				_ = manager.loadModelFn(getDummyModelDetails(modelName, memBytes, modelVersion))
+				_ = manager.loadModelFn(getDummyModelDetails(modelName, memBytes, modelVersion), getModelRuntimeInfo(1))
 				time.Sleep(10 * time.Millisecond)
 				model := getVersionedModelId(dummyModelPrefix, i, 1)
 				// model under test real load
@@ -1157,7 +1155,7 @@ func TestModelMetricsStats(t *testing.T) {
 			for i := 0; i < test.numModels; i++ {
 				modelName := getModelId(dummyModelPrefix, i)
 				modelVersion := uint32(1)
-				_ = manager.unloadModelFn(getDummyModelDetails(modelName, memBytes, modelVersion))
+				_ = manager.unloadModelFn(getDummyModelDetails(modelName, memBytes, modelVersion), getModelRuntimeInfo(1))
 				time.Sleep(10 * time.Millisecond)
 				model := getVersionedModelId(dummyModelPrefix, i, 1)
 				if test.capacity < test.numModels {
@@ -1177,7 +1175,6 @@ func TestModelMetricsStats(t *testing.T) {
 								isSoft: false,
 							},
 						))
-
 					}
 				} else {
 					g.Expect(manager.metrics.(fakeMetricsHandler).modelLoadState[model]).To(Equal(
@@ -1192,3 +1189,8 @@ func TestModelMetricsStats(t *testing.T) {
 		})
 	}
 }
+
+// TODO: getModelRuntimeInfo method
+func getModelRuntimeInfo(instanceCount uint32) *pba.ModelRuntimeInfo {
+	return &pba.ModelRuntimeInfo{ModelRuntimeInfo: &pba.ModelRuntimeInfo_Mlserver{Mlserver: &agent.MLServerModelSettings{ParallelWorkers: instanceCount}}}
+}
diff --git a/scheduler/pkg/kafka/gateway/worker.go b/scheduler/pkg/kafka/gateway/worker.go
index d126eff71f..728db9ed24 100644
--- a/scheduler/pkg/kafka/gateway/worker.go
+++ b/scheduler/pkg/kafka/gateway/worker.go
@@ -20,6 +20,7 @@ import (
 	"net/url"
 	"strconv"
 	"strings"
+	"time"
 
 	"github.com/confluentinc/confluent-kafka-go/v2/kafka"
 	grpc_retry "github.com/grpc-ecosystem/go-grpc-middleware/retry"
@@ -158,14 +159,6 @@ func getProtoInferRequest(job *InferWork) (*v2.ModelInferRequest, error) {
 	return &ireq, nil
 }
 
-// Extract tracing context from Kafka message
-func createContextFromKafkaMsg(job *InferWork) context.Context {
-	ctx := context.Background()
-	carrierIn := splunkkafka.NewMessageCarrier(job.msg)
-	ctx = otel.GetTextMapPropagator().Extract(ctx, carrierIn)
-	return ctx
-}
-
 func (iw *InferWorker) Start(jobChan <-chan *InferWork, cancelChan <-chan struct{}) {
 	for {
 		select {
@@ -173,8 +166,8 @@ func (iw *InferWorker) Start(jobChan <-chan *InferWork, cancelChan <-chan struct
 			return
 
 		case job := <-jobChan:
-			ctx := createContextFromKafkaMsg(job)
-			err := iw.processRequest(ctx, job)
+			ctx := createBaseContextFromKafkaMsg(job.msg)
+			err := iw.processRequest(ctx, job, util.InferTimeoutDefault)
 			if err != nil {
 				iw.logger.WithError(err).Errorf("Failed to process request for model %s", job.modelName)
 			}
@@ -182,35 +175,38 @@ func (iw *InferWorker) Start(jobChan <-chan *InferWork, cancelChan <-chan struct
 	}
 }
 
-func (iw *InferWorker) processRequest(ctx context.Context, job *InferWork) error {
+func (iw *InferWorker) processRequest(ctx context.Context, job *InferWork, timeout time.Duration) error {
+	ctxWithTimeout, cancel := context.WithTimeout(ctx, timeout)
+	defer cancel()
+
 	// Has Type Header
 	if typeValue, ok := job.headers[HeaderKeyType]; ok {
 		switch typeValue {
 		case HeaderValueJsonReq:
-			return iw.restRequest(ctx, job, false)
+			return iw.restRequest(ctxWithTimeout, job, false)
 		case HeaderValueJsonRes:
-			return iw.restRequest(ctx, job, true)
+			return iw.restRequest(ctxWithTimeout, job, true)
 		case HeaderValueProtoReq:
 			protoRequest, err := getProtoInferRequest(job)
 			if err != nil {
 				return err
 			}
-			return iw.grpcRequest(ctx, job, protoRequest)
+			return iw.grpcRequest(ctxWithTimeout, job, protoRequest)
 		case HeaderValueProtoRes:
 			protoRequest, err := getProtoRequestAssumingResponse(job.msg.Value)
 			if err != nil {
 				return err
 			}
-			return iw.grpcRequest(ctx, job, protoRequest)
+			return iw.grpcRequest(ctxWithTimeout, job, protoRequest)
 		default:
 			return fmt.Errorf("Header %s with unknown type %s", HeaderKeyType, typeValue)
 		}
 	} else { // Does not have type header - this is the general case to allow easy use
 		protoRequest, err := getProtoInferRequest(job)
 		if err != nil {
-			return iw.restRequest(ctx, job, true)
+			return iw.restRequest(ctxWithTimeout, job, true)
 		} else {
-			return iw.grpcRequest(ctx, job, protoRequest)
+			return iw.grpcRequest(ctxWithTimeout, job, protoRequest)
 		}
 	}
 }
@@ -404,3 +400,13 @@ func (iw *InferWorker) grpcRequest(ctx context.Context, job *InferWork, req *v2.
 	}
 	return nil
 }
+
+// this is redundant code but is kept there to avoid circular dependencies
+// todo: refactor tracing pkg in general and remove this
+func createBaseContextFromKafkaMsg(msg *kafka.Message) context.Context {
+	// these are just a base context for a new span
+	// callers should add timeout, etc for this context as they see fit.
+	ctx := context.Background()
+	carrierIn := splunkkafka.NewMessageCarrier(msg)
+	return otel.GetTextMapPropagator().Extract(ctx, carrierIn)
+}
diff --git a/scheduler/pkg/kafka/gateway/worker_test.go b/scheduler/pkg/kafka/gateway/worker_test.go
index e4008e1a7c..a21d5356ae 100644
--- a/scheduler/pkg/kafka/gateway/worker_test.go
+++ b/scheduler/pkg/kafka/gateway/worker_test.go
@@ -15,6 +15,7 @@ import (
 	"net"
 	"net/http"
 	"testing"
+	"time"
 
 	"github.com/confluentinc/confluent-kafka-go/v2/kafka"
 	"github.com/jarcoal/httpmock"
@@ -31,6 +32,7 @@ import (
 	v2 "github.com/seldonio/seldon-core/apis/go/v2/mlops/v2_dataplane"
 	kafka_config "github.com/seldonio/seldon-core/components/kafka/v2/pkg/config"
 
+	"github.com/seldonio/seldon-core/scheduler/v2/pkg/internal/testing_utils"
 	kafka2 "github.com/seldonio/seldon-core/scheduler/v2/pkg/kafka"
 	seldontracer "github.com/seldonio/seldon-core/scheduler/v2/pkg/tracing"
 	"github.com/seldonio/seldon-core/scheduler/v2/pkg/util"
@@ -66,10 +68,14 @@ func TestRestRequest(t *testing.T) {
 		t.Run(test.name, func(t *testing.T) {
 			httpmock.Activate()
 			defer httpmock.DeactivateAndReset()
+
+			httpPort, _ := testing_utils.GetFreePortForTest()
+			grpcPort, _ := testing_utils.GetFreePortForTest()
+
 			kafkaServerConfig := InferenceServerConfig{
 				Host:     "0.0.0.0",
-				HttpPort: 1234,
-				GrpcPort: 1235,
+				HttpPort: httpPort,
+				GrpcPort: grpcPort,
 			}
 			kafkaModelConfig := KafkaModelConfig{
 				ModelName:   "foo",
@@ -111,10 +117,13 @@ func TestProcessRequestRest(t *testing.T) {
 	}
 	for _, test := range tests {
 		t.Run(test.name, func(t *testing.T) {
+			httpPort, _ := testing_utils.GetFreePortForTest()
+			grpcPort, _ := testing_utils.GetFreePortForTest()
+
 			kafkaServerConfig := InferenceServerConfig{
 				Host:     "0.0.0.0",
-				HttpPort: 1234,
-				GrpcPort: 1235,
+				HttpPort: httpPort,
+				GrpcPort: grpcPort,
 			}
 			kafkaModelConfig := KafkaModelConfig{
 				ModelName:   "foo",
@@ -134,7 +143,7 @@ func TestProcessRequestRest(t *testing.T) {
 			g.Expect(err).To(BeNil())
 			iw, err := NewInferWorker(ic, logger, tp, tn)
 			g.Expect(err).To(BeNil())
-			err = iw.processRequest(context.Background(), &InferWork{modelName: "foo", msg: &kafka.Message{Value: test.data}})
+			err = iw.processRequest(context.Background(), &InferWork{modelName: "foo", msg: &kafka.Message{Value: test.data}}, util.InferTimeoutDefault)
 			g.Expect(err).To(BeNil())
 			ic.Stop()
 			g.Eventually(httpmock.GetTotalCallCount).Should(Equal(1))
@@ -191,7 +200,8 @@ func createInferWorkerWithMockConn(
 	logger log.FieldLogger,
 	serverConfig *InferenceServerConfig,
 	modelConfig *KafkaModelConfig,
-	g *WithT) (*InferKafkaHandler, *InferWorker) {
+	g *WithT,
+) (*InferKafkaHandler, *InferWorker) {
 	conn, _ := grpc.NewClient("passthrough://", grpc.WithContextDialer(func(context.Context, string) (net.Conn, error) {
 		return grpcServer.listener.Dial()
 	}), grpc.WithTransportCredentials(insecure.NewCredentials()))
@@ -244,10 +254,14 @@ func TestProcessRequestGrpc(t *testing.T) {
 		t.Run(test.name, func(t *testing.T) {
 			logger := log.New()
 			t.Log("Start test", test.name)
+
+			httpPort, _ := testing_utils.GetFreePortForTest()
+			grpcPort, _ := testing_utils.GetFreePortForTest()
+
 			kafkaServerConfig := InferenceServerConfig{
 				Host:     "0.0.0.0",
-				HttpPort: 1234,
-				GrpcPort: 1235,
+				HttpPort: httpPort,
+				GrpcPort: grpcPort,
 			}
 			kafkaModelConfig := KafkaModelConfig{
 				ModelName:   "foo",
@@ -262,7 +276,7 @@ func TestProcessRequestGrpc(t *testing.T) {
 			g.Eventually(check).Should(BeTrue())
 			b, err := proto.Marshal(test.req)
 			g.Expect(err).To(BeNil())
-			err = iw.processRequest(context.Background(), &InferWork{modelName: "foo", msg: &kafka.Message{Value: b}})
+			err = iw.processRequest(context.Background(), &InferWork{modelName: "foo", msg: &kafka.Message{Value: b}}, util.InferTimeoutDefault)
 			g.Expect(err).To(BeNil())
 			g.Eventually(func() int { return mockMLGrpcServer.recv }).Should(Equal(1))
 			g.Eventually(ic.producer.Len).Should(Equal(1))
@@ -280,6 +294,7 @@ func TestProcessRequest(t *testing.T) {
 		restCalls int
 		grpcCalls int
 		error     bool
+		timeout   time.Duration
 	}
 	getProtoBytes := func(res proto.Message) []byte {
 		b, _ := proto.Marshal(res)
@@ -319,6 +334,7 @@ func TestProcessRequest(t *testing.T) {
 				msg:       &kafka.Message{Value: []byte{}, Key: []byte{}},
 			},
 			grpcCalls: 1,
+			timeout:   util.InferTimeoutDefault,
 		},
 		{
 			name: "empty json request",
@@ -328,6 +344,7 @@ func TestProcessRequest(t *testing.T) {
 				msg:       &kafka.Message{Value: []byte("{}"), Key: []byte{}},
 			},
 			restCalls: 1,
+			timeout:   util.InferTimeoutDefault,
 		},
 		{
 			name: "json request",
@@ -337,6 +354,7 @@ func TestProcessRequest(t *testing.T) {
 				msg:       &kafka.Message{Value: []byte(`{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}`), Key: []byte{}},
 			},
 			restCalls: 1,
+			timeout:   util.InferTimeoutDefault,
 		},
 		{
 			name: "chain json request",
@@ -346,6 +364,7 @@ func TestProcessRequest(t *testing.T) {
 				msg:       &kafka.Message{Value: []byte(`{"model_name":"iris_1","model_version":"1","id":"903964e4-2419-41ce-b5d1-3ca0c8df9e0c","parameters":null,"outputs":[{"name":"predict","shape":[1],"datatype":"INT64","parameters":null,"data":[2]}]}`), Key: []byte{}},
 			},
 			restCalls: 1,
+			timeout:   util.InferTimeoutDefault,
 		},
 		{
 			name: "json request with header",
@@ -355,6 +374,7 @@ func TestProcessRequest(t *testing.T) {
 				msg:       &kafka.Message{Value: []byte(`{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}`), Key: []byte{}},
 			},
 			restCalls: 1,
+			timeout:   util.InferTimeoutDefault,
 		},
 		{
 			name: "chain json request with header",
@@ -364,6 +384,7 @@ func TestProcessRequest(t *testing.T) {
 				msg:       &kafka.Message{Value: []byte(`{"model_name":"iris_1","model_version":"1","id":"903964e4-2419-41ce-b5d1-3ca0c8df9e0c","parameters":null,"outputs":[{"name":"predict","shape":[1],"datatype":"INT64","parameters":null,"data":[2]}]}`), Key: []byte{}},
 			},
 			restCalls: 1,
+			timeout:   util.InferTimeoutDefault,
 		},
 		{
 			name: "grpc request without header",
@@ -373,6 +394,7 @@ func TestProcessRequest(t *testing.T) {
 				msg:       &kafka.Message{Value: getProtoBytes(testRequest), Key: []byte{}},
 			},
 			grpcCalls: 1,
+			timeout:   util.InferTimeoutDefault,
 		},
 		{
 			name: "grpc request with header",
@@ -382,6 +404,7 @@ func TestProcessRequest(t *testing.T) {
 				msg:       &kafka.Message{Value: getProtoBytes(testRequest), Key: []byte{}},
 			},
 			grpcCalls: 1,
+			timeout:   util.InferTimeoutDefault,
 		},
 		{
 			name: "chained grpc request without header",
@@ -391,6 +414,7 @@ func TestProcessRequest(t *testing.T) {
 				msg:       &kafka.Message{Value: getProtoBytes(testResponse), Key: []byte{}},
 			},
 			grpcCalls: 1,
+			timeout:   util.InferTimeoutDefault,
 		},
 		{
 			name: "chained grpc request with header",
@@ -400,6 +424,7 @@ func TestProcessRequest(t *testing.T) {
 				msg:       &kafka.Message{Value: getProtoBytes(testResponse), Key: []byte{}},
 			},
 			grpcCalls: 1,
+			timeout:   util.InferTimeoutDefault,
 		},
 		{
 			name: "json request with proto request header",
@@ -408,7 +433,8 @@ func TestProcessRequest(t *testing.T) {
 				headers:   map[string]string{HeaderKeyType: HeaderValueProtoReq},
 				msg:       &kafka.Message{Value: []byte(`{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}`), Key: []byte{}},
 			},
-			error: true,
+			error:   true,
+			timeout: util.InferTimeoutDefault,
 		},
 		{
 			name: "json request with proto response header",
@@ -417,16 +443,28 @@ func TestProcessRequest(t *testing.T) {
 				headers:   map[string]string{HeaderKeyType: HeaderValueProtoRes},
 				msg:       &kafka.Message{Value: []byte(`{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}`), Key: []byte{}},
 			},
-			error: true,
+			error:   true,
+			timeout: util.InferTimeoutDefault,
 		},
 		{
-			name: "grpc request with json header treated as json", //TODO maybe fail in this case as it will fail at server
+			name: "grpc request with json header treated as json", // TODO maybe fail in this case as it will fail at server
 			job: &InferWork{
 				modelName: "foo",
 				headers:   map[string]string{HeaderKeyType: HeaderValueJsonReq},
 				msg:       &kafka.Message{Value: getProtoBytes(testRequest), Key: []byte{}},
 			},
 			restCalls: 1,
+			timeout:   util.InferTimeoutDefault,
+		},
+		{
+			name: "grpc request with header - timeout",
+			job: &InferWork{
+				modelName: "foo",
+				headers:   map[string]string{HeaderKeyType: HeaderValueProtoReq},
+				msg:       &kafka.Message{Value: getProtoBytes(testRequest), Key: []byte{}},
+			},
+			grpcCalls: 0, // grpc call will not be made as it will timeout
+			timeout:   time.Nanosecond * 1,
 		},
 	}
 	for _, test := range tests {
@@ -434,10 +472,14 @@ func TestProcessRequest(t *testing.T) {
 			logger := log.New()
 			logger.Infof("Start test %s", test.name)
 			t.Log("Start test", test.name)
+
+			httpPort, _ := testing_utils.GetFreePortForTest()
+			grpcPort, _ := testing_utils.GetFreePortForTest()
+
 			kafkaServerConfig := InferenceServerConfig{
 				Host:     "0.0.0.0",
-				HttpPort: 1234,
-				GrpcPort: 1235,
+				HttpPort: httpPort,
+				GrpcPort: grpcPort,
 			}
 			kafkaModelConfig := KafkaModelConfig{
 				ModelName:   "foo",
@@ -453,7 +495,7 @@ func TestProcessRequest(t *testing.T) {
 			defer ic.Stop()
 			check := creatMockServerHealthFunc(mockMLGrpcServer)
 			g.Eventually(check).Should(BeTrue())
-			err := iw.processRequest(context.Background(), test.job)
+			err := iw.processRequest(context.Background(), test.job, test.timeout)
 			if test.error {
 				g.Expect(err).ToNot(BeNil())
 			} else {
diff --git a/scheduler/pkg/kafka/pipeline/httpserver.go b/scheduler/pkg/kafka/pipeline/httpserver.go
index b5ba40eae2..c4da136f22 100644
--- a/scheduler/pkg/kafka/pipeline/httpserver.go
+++ b/scheduler/pkg/kafka/pipeline/httpserver.go
@@ -68,7 +68,7 @@ func NewGatewayHttpServer(port int, logger log.FieldLogger,
 }
 
 func (g *GatewayHttpServer) Stop() error {
-	ctx, cancel := context.WithTimeout(context.Background(), time.Duration(time.Second*5))
+	ctx, cancel := context.WithTimeout(context.Background(), util.ServerControlPlaneTimeout)
 	defer cancel()
 	return g.server.Shutdown(ctx)
 }
diff --git a/scheduler/pkg/kafka/pipeline/multi_topic_consumer.go b/scheduler/pkg/kafka/pipeline/multi_topic_consumer.go
index 30af55586f..05c1b0a9e1 100644
--- a/scheduler/pkg/kafka/pipeline/multi_topic_consumer.go
+++ b/scheduler/pkg/kafka/pipeline/multi_topic_consumer.go
@@ -158,9 +158,7 @@ func (c *MultiTopicsKafkaConsumer) pollAndMatch() error {
 				Debugf("received message")
 
 			if val, ok := c.requests.Get(string(e.Key)); ok {
-				ctx := context.Background()
-				carrierIn := splunkkafka.NewMessageCarrier(e)
-				ctx = otel.GetTextMapPropagator().Extract(ctx, carrierIn)
+				ctx := createBaseContextFromKafkaMsg(e)
 
 				// Add tracing span
 				_, span := c.tracer.Start(ctx, "Consume")
@@ -196,3 +194,11 @@ func (c *MultiTopicsKafkaConsumer) pollAndMatch() error {
 	logger.Warning("Ending kafka consumer poll")
 	return nil // assumption here is that the connection has already terminated
 }
+
+func createBaseContextFromKafkaMsg(msg *kafka.Message) context.Context {
+	// these are just a base context for a new span
+	// callers should add timeout, etc for this context as they see fit.
+	ctx := context.Background()
+	carrierIn := splunkkafka.NewMessageCarrier(msg)
+	return otel.GetTextMapPropagator().Extract(ctx, carrierIn)
+}
diff --git a/scheduler/pkg/metrics/agent.go b/scheduler/pkg/metrics/agent.go
index a3f42b801c..a05768c53b 100644
--- a/scheduler/pkg/metrics/agent.go
+++ b/scheduler/pkg/metrics/agent.go
@@ -21,6 +21,8 @@ import (
 	log "github.com/sirupsen/logrus"
 	"google.golang.org/grpc"
 	"google.golang.org/grpc/status"
+
+	"github.com/seldonio/seldon-core/scheduler/v2/pkg/util"
 )
 
 // Keep next line as used in docs
@@ -521,7 +523,9 @@ func (pm *PrometheusMetrics) Start(port int) error {
 func (pm *PrometheusMetrics) Stop() error {
 	pm.logger.Info("Graceful shutdown")
 	if pm.server != nil {
-		return pm.server.Shutdown(context.Background())
+		ctx, cancel := context.WithTimeout(context.Background(), util.ServerControlPlaneTimeout)
+		defer cancel()
+		return pm.server.Shutdown(ctx)
 	}
 	return nil
 }
diff --git a/scheduler/pkg/util/constants.go b/scheduler/pkg/util/constants.go
index 3f61402f06..3e0755cf3f 100644
--- a/scheduler/pkg/util/constants.go
+++ b/scheduler/pkg/util/constants.go
@@ -46,6 +46,21 @@ const (
 	GRPCControlPlaneTimeout      = 1 * time.Minute // For control plane operations except load/unload
 )
 
+// K8s API
+const (
+	K8sTimeoutDefault = 2 * time.Minute
+)
+
+// Servers control plane
+const (
+	ServerControlPlaneTimeout = time.Second * 5
+)
+
+// inference
+const (
+	InferTimeoutDefault = 10 * time.Minute // TODO: expose this as a config (map)?
+)
+
 const (
 	EnvoyUpdateDefaultBatchWait = 250 * time.Millisecond
 	// note that we keep client and server keepalive times the same
diff --git a/tests/k6/components/settings.js b/tests/k6/components/settings.js
index 503efcc2c1..e9a214cdbb 100644
--- a/tests/k6/components/settings.js
+++ b/tests/k6/components/settings.js
@@ -238,11 +238,34 @@ function doWarmup() {
 
 function requestRate() {
     if (__ENV.REQUEST_RATE) {
-        return __ENV.REQUEST_RATE
+        return parseInt(__ENV.REQUEST_RATE)
     }
     return 10
 }
 
+function requestRates() {
+    if (__ENV.REQUEST_RATES) {
+        return __ENV.REQUEST_RATES.split(",").map( s => parseInt(s))
+    }
+    return [requestRate()]
+}
+
+function rateStages() {
+    if (__ENV.REQUEST_RATES) {
+        var stages = []
+        var durations = constantRateDurationsSeconds()
+        var rates = requestRates()
+        for (var i = 0; i < rates.length; i++) {
+            // ramp up (1/3 rd of the duration)
+            stages.push({target: rates[i], duration: Math.ceil(durations[i]/3).toString()+'s'})
+            // hold
+            stages.push({target: rates[i], duration: durations[i].toString()+'s'})
+        }
+        return stages
+    }
+    return [{target: requestRate(), duration: constantRateDurationSeconds().toString()+'s'}]
+}
+
 function constantRateDurationSeconds() {
     if (__ENV.CONSTANT_RATE_DURATION_SECONDS) {
         return __ENV.CONSTANT_RATE_DURATION_SECONDS
@@ -250,6 +273,25 @@ function constantRateDurationSeconds() {
     return 30
 }
 
+function constantRateDurationsSeconds() {
+    if (__ENV.CONSTANT_RATE_DURATIONS_SECONDS) {
+        var durations = __ENV.CONSTANT_RATE_DURATIONS_SECONDS.split(",").map( s => parseInt(s))
+        if (durations.length > requestRates().length) {
+            return durations.slice(0, requestRates().length)
+        } else if (durations.length < requestRates().length) {
+            // pad with the last value
+            const last = durations[durations.length - 1]
+            for (var i = durations.length; i < requestRates().length; i++) {
+                durations.push(last)
+            }
+        } else {
+            return durations
+        }
+    }
+    const reqNumberOfStages = requestRates().length
+    return new Array(reqNumberOfStages).fill(constantRateDurationSeconds()/reqNumberOfStages)
+}
+
 function podNamespace() {
     if (__ENV.NAMESPACE) {
         return __ENV.NAMESPACE
@@ -349,7 +391,10 @@ export function getConfig() {
         "inferType" : inferType(),
         "doWarmup": doWarmup(),
         "requestRate": requestRate(),
+        "requestRates": requestRates(),
         "constantRateDurationSeconds": constantRateDurationSeconds(),
+        "constantRateDurationsSeconds": constantRateDurationsSeconds(),
+        "rateStages": rateStages(),
         "modelReplicas": modelReplicas(),
         "maxModelReplicas": maxModelReplicas(),
         "namespace":  podNamespace(),
diff --git a/tests/k6/configs/k8s/base/k6.yaml b/tests/k6/configs/k8s/base/k6.yaml
index 892c4435dd..a60a5a36b2 100644
--- a/tests/k6/configs/k8s/base/k6.yaml
+++ b/tests/k6/configs/k8s/base/k6.yaml
@@ -36,6 +36,15 @@ spec:
         #   "csv=results/base.gz",
         #   "scenarios/infer_constant_rate.js",
         #   ]
+        # # infer_multiple_rates
+        # args: [
+        #   "--no-teardown",
+        #   "--summary-export",
+        #   "results/base.json",
+        #   "--out",
+        #   "csv=results/base.gz",
+        #   "scenarios/infer_multiple_rates.js",
+        #   ]
         # # k8s-test-script
         # args: [
         #   "--summary-export",
@@ -61,8 +70,30 @@ spec:
         #   "scenarios/core2_qa_control_plane_ops.js",
         #   ]
         env:
+        - name: REQUEST_RATE
+          value: "20"
+        # REQUEST_RATES allows you to specify multiple target RPS values, and is
+        # used in the infer_multiple_rates scenario. The duration over which
+        # each rate is maintained is controlled via the value at the same index
+        # in the CONSTANT_RATE_DURATIONS_SECONDS variable. Rather than a sudden
+        # change in RPS, additional stages lasting 1/3rd of the duration over
+        # which the RPS is held constant are automatically introduced, and
+        # perform either a linear ramp-up or ramp-down to the next target RPS.
+        # - name: REQUEST_RATES
+        #   value: "10,70,10,50,20"
+        - name: CONSTANT_RATE_DURATION_SECONDS
+          value: "60"
+        # CONSTANT_RATE_DURATIONS_SECONDS is used in the infer_multiple_rates
+        # scenario. It specifies the duration in seconds for which each target
+        # RPS value is maintained. If the sum of the values here is S, the total
+        # duration of the test will be S + S/3 (with the latter due to the added
+        # ramp-up/ramp-down stages)
+        # - name: CONSTANT_RATE_DURATIONS_SECONDS
+        #   value: "120,120,400,120,400"
         - name: USE_KUBE_CONTROL_PLANE
           value: "true"
+        # - name: SKIP_UNLOAD_MODEL
+        #   value: "true"
         - name: SCHEDULER_ENDPOINT
           value: "${SCHEDULER_ENDPOINT}:9004"
         - name: INFER_HTTP_ITERATIONS
diff --git a/tests/k6/scenarios/infer_multiple_rates.js b/tests/k6/scenarios/infer_multiple_rates.js
new file mode 100644
index 0000000000..d72b1e875e
--- /dev/null
+++ b/tests/k6/scenarios/infer_multiple_rates.js
@@ -0,0 +1,79 @@
+import { getConfig } from '../components/settings.js'
+import { doInfer, setupBase, teardownBase, getVersionSuffix, applyModelReplicaChange } from '../components/utils.js'
+import { vu } from 'k6/execution';
+
+export const options = {
+    thresholds: {
+        'http_req_duration{scenario:default}': [`max>=0`],
+        'http_reqs{scenario:default}': [],
+        'grpc_req_duration{scenario:default}': [`max>=0`],
+        'data_received{scenario:default}': [],
+        'data_sent{scenario:default}': [],
+    },
+    scenarios: {
+        ramping_request_rates: {
+            startTime: '0s',
+            executor: 'ramping-arrival-rate',
+            startRate: 5,
+            timeUnit: '1s',
+            preAllocatedVUs: 50, // how large the initial pool of VUs would be
+            maxVUs: 1000, // if the preAllocatedVUs are not enough, we can initialize more
+            stages: getConfig().rateStages,
+        },
+    },
+    setupTimeout: '6000s',
+    teardownTimeout: '6000s',
+};
+
+export function setup() {
+    const config = getConfig()
+
+    setupBase(config)
+    console.log("rate stages:", getConfig().rateStages)
+
+    return config
+}
+
+export default function (config) {
+    const numModelTypes = config.modelType.length
+
+    let candidateIdxs = []
+    for (let i = 0; i < numModelTypes; i++) {
+        if (config.maxNumModels[i] !== 0)
+            candidateIdxs.push(i)
+    }
+    const numCandidates = candidateIdxs.length
+    var idx = candidateIdxs[Math.floor(Math.random() * numCandidates)]
+
+    const modelId = Math.floor(Math.random() * config.maxNumModels[idx])
+    const modelName = config.modelNamePrefix[idx] + modelId.toString()
+
+    const modelNameWithVersion = modelName + getVersionSuffix(config.isSchedulerProxy)  // first version
+
+    var rest_enabled = Number(config.inferHttpIterations)
+    var grpc_enabled = Number(config.inferGrpcIterations)
+    if (rest_enabled && grpc_enabled) {
+        // if both protocols are enabled, choose one randomly
+        const rand = Math.random()
+        if (rand > 0.5) {
+            doInfer(modelName, modelNameWithVersion, config, true, idx) // rest
+        } else {
+            doInfer(modelName, modelNameWithVersion, config, false, idx) // grpc
+        }
+    } else if (rest_enabled) {
+        doInfer(modelName, modelNameWithVersion, config, true, idx)
+    } else if (grpc_enabled) {
+        doInfer(modelName, modelNameWithVersion, config, false, idx)
+    } else {
+        throw new Error('Both REST and GRPC protocols are disabled!')
+    }
+
+    // for simplicity we only change model replicas in the first VU
+    if (vu.idInTest == 1 && config.enableModelReplicaChange) {
+        applyModelReplicaChange(config)
+    }
+}
+
+export function teardown(config) {
+    teardownBase(config)
+}