-
Notifications
You must be signed in to change notification settings - Fork 46
/
Copy pathenvironment.go
194 lines (170 loc) · 8.43 KB
/
environment.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
// Copyright 2020 The Merlin Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package config
import (
"fmt"
"os"
"time"
"github.com/go-playground/validator/v10"
"gopkg.in/yaml.v2"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
sigyaml "sigs.k8s.io/yaml"
mlpcluster "github.com/caraml-dev/mlp/api/pkg/cluster"
)
type EnvironmentConfig struct {
Name string `validate:"required" yaml:"name"`
Cluster string `yaml:"cluster"`
IsDefault bool `yaml:"is_default"`
IsPredictionJobEnabled bool `yaml:"is_prediction_job_enabled"`
IsDefaultPredictionJob bool `yaml:"is_default_prediction_job"`
Region string `yaml:"region"`
GcpProject string `yaml:"gcp_project"`
DeploymentTimeout time.Duration `yaml:"deployment_timeout"`
NamespaceTimeout time.Duration `yaml:"namespace_timeout"`
GPUs []GPUConfig `yaml:"gpus"`
MaxCPU string `yaml:"max_cpu"`
MaxMemory string `yaml:"max_memory"`
MaxAllowedReplica int `yaml:"max_allowed_replica"`
TopologySpreadConstraints TopologySpreadConstraints `yaml:"topology_spread_constraints"`
PodDisruptionBudget PodDisruptionBudgetConfig `yaml:"pod_disruption_budget"`
QueueResourcePercentage string `yaml:"queue_resource_percentage"`
DefaultPredictionJobConfig *PredictionJobResourceRequestConfig `yaml:"default_prediction_job_config"`
DefaultDeploymentConfig *ResourceRequestConfig `yaml:"default_deployment_config"`
DefaultTransformerConfig *ResourceRequestConfig `yaml:"default_transformer_config"`
K8sConfig *mlpcluster.K8sConfig `validate:"required" yaml:"k8s_config"`
}
func (e *EnvironmentConfig) Validate() error {
v := validator.New()
// Use struct level validation for PodDisruptionBudgetConfig
v.RegisterStructValidation(func(sl validator.StructLevel) {
field := sl.Current().Interface().(PodDisruptionBudgetConfig)
// If PDB is enabled, one of max unavailable or min available shall be set
if field.Enabled &&
(field.MaxUnavailablePercentage == nil && field.MinAvailablePercentage == nil) ||
(field.MaxUnavailablePercentage != nil && field.MinAvailablePercentage != nil) {
sl.ReportError(field.MaxUnavailablePercentage, "max_unavailable_percentage", "int", "choose_one[max_unavailable_percentage,min_available_percentage]", "")
sl.ReportError(field.MinAvailablePercentage, "min_available_percentage", "int", "choose_one[max_unavailable_percentage,min_available_percentage]", "")
}
}, PodDisruptionBudgetConfig{})
return v.Struct(e)
}
type TopologySpreadConstraints []corev1.TopologySpreadConstraint
// UnmarshalYAML implements Unmarshal interface
// Since TopologySpreadConstraint fields only have json tags, sigyaml.Unmarshal needs to be used
// to unmarshal all the fields. This method reads TopologySpreadConstraint into a map[string]interface{},
// marshals it into a byte for, before passing to sigyaml.Unmarshal
func (t *TopologySpreadConstraints) UnmarshalYAML(unmarshal func(interface{}) error) error {
var topologySpreadConstraints []map[string]interface{}
// Unmarshal into map[string]interface{}
if err := unmarshal(&topologySpreadConstraints); err != nil {
return err
}
// convert back to byte string
byteForm, err := yaml.Marshal(topologySpreadConstraints)
if err != nil {
return err
}
// use sigyaml.Unmarshal to convert to json object then unmarshal
if err := sigyaml.Unmarshal(byteForm, t); err != nil {
return err
}
return nil
}
type PredictionJobResourceRequestConfig struct {
ExecutorReplica int32 `yaml:"executor_replica"`
DriverCPURequest string `yaml:"driver_cpu_request"`
DriverMemoryRequest string `yaml:"driver_memory_request"`
ExecutorCPURequest string `yaml:"executor_cpu_request"`
ExecutorMemoryRequest string `yaml:"executor_memory_request"`
}
type ResourceRequestConfig struct {
MinReplica int `yaml:"min_replica"`
MaxReplica int `yaml:"max_replica"`
CPURequest string `yaml:"cpu_request"`
MemoryRequest string `yaml:"memory_request"`
}
type GPUConfig struct {
// Name is used as the key to identify the GPU configuration.
// It also specifies how the accelerator type will be written in the UI.
// Example: "NVIDIA T4"
Name string `yaml:"name"`
// Values limits how many GPUs can be requested by users.
// Example: "None", "1", "2", "4"
Values []string `yaml:"values"`
// Specifies how the accelerator type will be translated to
// K8s resource type. Example: nvidia.com/gpu
ResourceType string `yaml:"resource_type"`
// To deploy the models on a specific GPU node.
NodeSelector map[string]string `yaml:"node_selector"`
// To deploy the models on a specific GPU node via taints and tolerations.
Tolerations []corev1.Toleration `yaml:"tolerations"`
// MinMonthlyCostPerGPU is the minimum monthly cost per GPU, for example, if you enable time-sharing GPUs with 8 max shared clients,
// the minimum monthly cost per GPU is max_monthly_cost_per_gpu divided by 8.
// MaxMonthlyCostPerGPU is the maximum monthly cost if you use the whole GPU.
// https://cloud.google.com/compute/gpus-pricing#other-gpu-models
MinMonthlyCostPerGPU float64 `yaml:"min_monthly_cost_per_gpu"`
MaxMonthlyCostPerGPU float64 `yaml:"max_monthly_cost_per_gpu"`
}
func InitEnvironmentConfigs(path string) ([]*EnvironmentConfig, error) {
cfgFile, err := os.ReadFile(path)
if err != nil {
return nil, fmt.Errorf("unable to read deployment config file: %s", path)
}
var configs []*EnvironmentConfig
if err = yaml.Unmarshal(cfgFile, &configs); err != nil {
return nil, fmt.Errorf("unable to unmarshall deployment config file:\n %s,\ndue to: %w", cfgFile, err)
}
for _, env := range configs {
if err := env.Validate(); err != nil {
return nil, fmt.Errorf("invalid environment config: %w", err)
}
if env.K8sConfig == nil {
return nil, fmt.Errorf("k8sConfig for %s is nil", env.Name)
}
}
return configs, nil
}
func ParseDeploymentConfig(envCfg *EnvironmentConfig, cfg *Config) DeploymentConfig {
return DeploymentConfig{
DeploymentTimeout: envCfg.DeploymentTimeout,
NamespaceTimeout: envCfg.NamespaceTimeout,
DefaultModelResourceRequests: &ResourceRequests{
MinReplica: envCfg.DefaultDeploymentConfig.MinReplica,
MaxReplica: envCfg.DefaultDeploymentConfig.MaxReplica,
CPURequest: resource.MustParse(envCfg.DefaultDeploymentConfig.CPURequest),
MemoryRequest: resource.MustParse(envCfg.DefaultDeploymentConfig.MemoryRequest),
},
DefaultTransformerResourceRequests: &ResourceRequests{
MinReplica: envCfg.DefaultTransformerConfig.MinReplica,
MaxReplica: envCfg.DefaultTransformerConfig.MaxReplica,
CPURequest: resource.MustParse(envCfg.DefaultTransformerConfig.CPURequest),
MemoryRequest: resource.MustParse(envCfg.DefaultTransformerConfig.MemoryRequest),
},
MaxCPU: resource.MustParse(envCfg.MaxCPU),
MaxMemory: resource.MustParse(envCfg.MaxMemory),
MaxAllowedReplica: envCfg.MaxAllowedReplica,
TopologySpreadConstraints: envCfg.TopologySpreadConstraints,
QueueResourcePercentage: envCfg.QueueResourcePercentage,
PyfuncGRPCOptions: cfg.PyfuncGRPCOptions,
PodDisruptionBudget: envCfg.PodDisruptionBudget,
GPUs: envCfg.GPUs,
StandardTransformer: cfg.StandardTransformerConfig,
PyFuncPublisher: cfg.PyFuncPublisherConfig,
UserContainerCPUDefaultLimit: cfg.InferenceServiceDefaults.UserContainerCPUDefaultLimit,
UserContainerCPULimitRequestFactor: cfg.InferenceServiceDefaults.UserContainerCPULimitRequestFactor,
UserContainerMemoryLimitRequestFactor: cfg.InferenceServiceDefaults.UserContainerMemoryLimitRequestFactor,
DefaultEnvVarsWithoutCPULimits: cfg.InferenceServiceDefaults.DefaultEnvVarsWithoutCPULimits,
}
}