This repository has been archived by the owner on Jan 30, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathjobs.go
246 lines (221 loc) · 10.6 KB
/
jobs.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
package databricks
import "time"
// View is a view of a job.
type View struct {
Content string `json:"content"`
Name string `json:"name"`
Type string `json:"type"`
}
// RunState is a job run state.
type RunState struct {
LifeCycleState string `json:"life_cycle_state"` // TODO(daniel) make this an enum
ResultState string `json:"result_state"` // TODO(daniel) make this an enum
StateMessage string `json:"state_message"`
}
// JobTask is a job task.
type JobTask struct {
NotebookTask *NotebookTask `json:"notebook_task,omitempty"`
SparkJarTask *SparkJarTask `json:"spark_jar_task,omitempty"`
SparkPythonTask *SparkPythonTask `json:"spark_python_task,omitempty"`
SparkSubmitTask *SparkSubmitTask `json:"spark_submit_task,omitempty"`
}
// JobRunGetResponse is the metadata of a run.
type JobRunGetResponse struct {
JobID int64 `json:"job_id"`
RunID int64 `json:"run_id"`
NumberInJob int64 `json:"number_in_job"`
OriginalAttemptRunID int64 `json:"original_attempt_run_id"`
State RunState `json:"state"`
Schedule *CronSchedule `json:"schedule"`
Task JobTask `json:"task"`
ClusterSpec ClusterSpec `json:"cluster_spec"`
ClusterInstance ClusterInstance `json:"cluster_instance"`
OverridingParameters RunParameters `json:"overriding_parameters"`
StartTime int64 `json:"start_time"`
SetupDuration int64 `json:"setup_duration"`
ExecutionDuration int64 `json:"execution_duration"`
CleanupDuration int64 `json:"cleanup_duration"`
Trigger string `json:"trigger"` // TODO(daniel) enum this?
CreatorUserName string `json:"creator_user_name"`
RunPageurl *string `json:"run_pageurl"`
}
// JobRunListRequest is used to request Run information.
type JobRunListRequest struct {
ActiveOnly *bool `json:"active_only,omitempty"`
CompleteOnly *bool `json:"complete_only,omitempty"`
JobID int64 `json:"job_id"`
Offset int `json:"offset"`
Limit int `json:"limit"`
}
// JobSubmitSettings is used to configure a job for submission.
type JobSubmitSettings struct {
ExistingClusterID *string `json:"existing_cluster_id"`
NewCluster *NewCluster `json:"new_cluster"`
NotebookTask *NotebookTask `json:"notebook_task"`
SparkJarTask *SparkJarTask `json:"spark_jar_task"`
SparkPythonTask *SparkPythonTask `json:"spark_python_task"`
SparkSubmitTask *SparkSubmitTask `json:"spark_submit_task"`
RunName *string `json:"run_name"`
Libraries []Library `json:"libraries"`
TimeoutSeconds *int32 `json:"timeout_seconds"`
}
// JobRunNowSettings is used to configure a job for the RunNow API.
type JobRunNowSettings struct {
JobID int64 `json:"job_id"`
JarParams []string `json:"jar_params,omitempty"`
NotebookParams []ParamPair `json:"notebook_params,omitempty"`
PythonParams []string `json:"python_params,omitempty"`
SparkSubmitParams []string `json:"spark_submit_params,omitempty"`
}
// JobGetResponse is returned when getting a job info.
type JobGetResponse struct {
JobID int64 `json:"job_id"`
CreatorUserName string `json:"creator_user_name"`
Settings JobSettings `json:"settings"`
CreatedTime int64 `json:"created_time"`
}
// JobEmailNotifications is set of email addresses that will be notified when runs of this job begin or complete as well as when this job is deleted.
type JobEmailNotifications struct {
OnStart []string `json:"on_start"`
OnSuccess []string `json:"on_success"`
OnFailure []string `json:"on_failure"`
}
// JobCreateRequest is used for creating new jobs.
type JobCreateRequest struct {
ExistingClusterID *string `json:"existing_cluster_id,omitempty"`
NewCluster *NewCluster `json:"new_cluster,omitempty"`
NotebookTask *NotebookTask `json:"notebook_task,omitempty"`
SparkJarTask *SparkJarTask `json:"spark_jar_task,omitempty"`
SparkPythonTask *SparkPythonTask `json:"spark_python_task,omitempty"`
SparkSubmitTask *SparkSubmitTask `json:"spark_submit_task,omitempty"`
Name string `json:"name"`
Libraries []Library `json:"libraries"`
EmailNotifications *JobEmailNotifications `json:"email_notifications,omitempty"`
TimeoutSeconds *int32 `json:"timeout_seconds,omitempty"`
MaxRetries *int32 `json:"max_retries,omitempty"`
MinRetryIntervalMillis *int32 `json:"min_retry_interval_millis,omitempty"`
RetryOnTimeout *bool `json:"retry_on_timeout,omitempty"`
Schedule *CronSchedule `json:"schedule,omitempty"`
MaxConcurrentRuns *int32 `json:"max_concurrent_runs,omitempty"`
}
// ClusterInstance identifiers for the cluster and Spark context used by a run.
// These two values together identify an execution context across all time.
type ClusterInstance struct {
ClusterID string `json:"cluster_id"`
SparkContextID string `json:"spark_context_id"`
}
// CronSchedule is a cron schedule.
type CronSchedule struct {
QuartzCronExpression string `json:"quartz_cron_expression"`
TimezoneID string `json:"timezone_id"`
}
// NewCluster is settings for a new Cluster.
type NewCluster struct {
NumWorkers *int32 `json:"num_workers"`
Autoscale *Autoscale `json:"autoscale"`
ClusterName string `json:"cluster_name"`
SparkVersion string `json:"spark_version"`
SparkConf *S3StorageInfo `json:"spark_conf"`
AWSAttributes AWSAttributes `json:"aws_attributes"`
NodeTypeID string `json:"node_type_id"`
DriverNodeTypeID string `json:"driver_node_type_id"`
SSHPublicKeys []string `json:"ssh_public_keys"`
CustomTags []ClusterTag `json:"custom_tags"`
ClusterLogConf ClusterLogConf `json:"cluster_log_conf"`
InitScripts []InitScriptInfo `json:"init_scripts"`
SparkEnvVars SparkEnvPair `json:"spark_env_vars"`
AutoterminationMinutes *int32 `json:"autotermination_minutes"`
EnableElasticDisk bool `json:"enable_elastic_disk"`
}
// NotebookOutput is the output of a Notebook.
type NotebookOutput struct {
Result string `json:"result"`
}
// ParamPair is a key value pair of Notebook parameters.
type ParamPair struct {
Key string `json:"key"`
Value string `json:"value"`
}
// NotebookTask is a Notebook task.
type NotebookTask struct {
NotebookPath string `json:"notebook_path"`
BaseParameters []ParamPair `json:"base_parameters"`
}
// SparkJarTask is a Spark jar run.
type SparkJarTask struct {
JarURI string `json:"jar_uri"`
MainClassName string `json:"main_class_name"`
Parameters []string `json:"parameters"`
}
// SparkPythonTask is a Spark python task.
type SparkPythonTask struct {
PythonFile string `json:"python_file"`
Parameters []string `json:"parameters"`
}
// SparkSubmitTask is a submit Spark task.
type SparkSubmitTask struct {
Parameters []string `json:"parameters"`
}
// JobSettings are job settings.
type JobSettings struct {
ExistingClusterID *string `json:"existing_cluster_id"`
NewCluster *NewCluster `json:"new_cluster"`
NotebookTask *NotebookTask `json:"notebook_task"`
SparkJarTask *SparkJarTask `json:"spark_jar_task"`
SparkPythonTask *SparkPythonTask `json:"spark_python_task"`
SparkSubmitTask *SparkSubmitTask `json:"spark_submit_task"`
Name *string `json:"name"`
Libraries []Library `json:"libraries"`
EmailNotifications *JobEmailNotifications `json:"email_notifications,omitempty"`
TimeoutSeconds *int32 `json:"timeout_seconds"`
MaxRetries *int32 `json:"max_retries"`
MinRetryIntervalMillis *int32 `json:"min_retry_interval_millis"`
RetryOnTimeout *bool `json:"retry_on_timeout"`
Schedule *CronSchedule `json:"schedule"`
MaxConcurrentRuns *int32 `json:"max_concurrent_runs"`
}
// Job is a job.
type Job struct {
JobID int64 `json:"job_id"`
CreatorUserName string `json:"creator_user_name"`
Settings JobSettings `json:"settings"`
CreatedTime time.Time `json:"created_time"`
}
// ClusterSpec is a Cluster specification.
type ClusterSpec struct {
ExistingClusterID *string `json:"existing_cluster_id"`
NewCluster *NewCluster `json:"new_cluster"`
Libraries []Library
}
// RunParameters are parameters for this run. Only one of jar_params,
// python_params or notebook_params should be specified in the run-now request,
// depending on the type of job task. Jobs with jar task or python task take a
// list of position-based parameters, and jobs with notebook tasks take a key
// value map.
type RunParameters struct {
JarParams []string `json:"jar_params"`
NotebookParams []ParamPair `json:"notebook_params"`
PythonParams []string `json:"python_params"`
SparkSubmitParams []string `json:"spark_submit_params"`
}
// Run is all the information about a run except for its
// output. The output can be retrieved separately with the
// getRunOutput method.
type Run struct {
JobID int64 `json:"job_id"`
RunID int64 `json:"run_id"`
CreatorUserName string `json:"creator_user_name"`
NumberInJob int64 `json:"number_in_job"`
OriginalAttemptRunid int64 `json:"original_attempt_runid"`
State string `json:"state"` // TODO(daniel)
Schedule CronSchedule `json:"schedule"`
// Task JobTask `json:"task"` // TODO(daniel)
ClusterSpec ClusterSpec `json:"cluster_spec"`
ClusterInstance ClusterInstance `json:"cluster_instance"`
OverridingParameters RunParameters `json:"overriding_parameters"`
StartTime time.Time `json:"start_time"`
SetupDuration int64 `json:"setup_duration"`
ExecutionDuration int64 `json:"execution_duration"`
CleanupDuration int64 `json:"cleanup_duration"`
//Trigger TriggerType `json:"trigger"` // TODO(daniel)
}