From 670873b9c90ae7b950d455ededbdf466b019b101 Mon Sep 17 00:00:00 2001 From: Future-Outlier Date: Thu, 1 Aug 2024 13:44:04 +0800 Subject: [PATCH 1/6] FlytePropeller Compiler Avoid Crash when Type not found Signed-off-by: Future-Outlier --- .../pkg/manager/impl/validation/validation.go | 16 ++++++++++ .../impl/validation/validation_test.go | 32 +++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/flyteadmin/pkg/manager/impl/validation/validation.go b/flyteadmin/pkg/manager/impl/validation/validation.go index 1958f25021..0f01882f41 100644 --- a/flyteadmin/pkg/manager/impl/validation/validation.go +++ b/flyteadmin/pkg/manager/impl/validation/validation.go @@ -1,6 +1,7 @@ package validation import ( + "fmt" "net/url" "strconv" "strings" @@ -282,11 +283,26 @@ func validateParameterMap(inputMap *core.ParameterMap, fieldName string) error { defaultValue := defaultInput.GetDefault() if defaultValue != nil { inputType := validators.LiteralTypeForLiteral(defaultValue) + + if inputType == nil { + return errors.NewFlyteAdminErrorf(codes.InvalidArgument, + fmt.Sprintf( + "Flyte Propeller encountered an issue while determining\n"+ + "the type of the default value for Parameter '%s' in '%s'.\n"+ + "Registered type from FlyteKit: [%s].\n"+ + "FlytePropeller needs to support latest FlyteIDL to support this type.\n"+ + "Suggested solution: Please update your Flyte Propeller image to the latest version and try again.", + name, fieldName, defaultInput.GetVar().GetType().String(), + ), + ) + } + if !validators.AreTypesCastable(inputType, defaultInput.GetVar().GetType()) { return errors.NewFlyteAdminErrorf(codes.InvalidArgument, "Type mismatch for Parameter %s in %s has type %s, expected %s", name, fieldName, defaultInput.GetVar().GetType().String(), inputType.String()) } + if defaultInput.GetVar().GetType().GetSimple() == core.SimpleType_DATETIME { // Make datetime specific validations return ValidateDatetime(defaultValue) diff --git a/flyteadmin/pkg/manager/impl/validation/validation_test.go b/flyteadmin/pkg/manager/impl/validation/validation_test.go index a9fed38ee9..f07f38c85f 100644 --- a/flyteadmin/pkg/manager/impl/validation/validation_test.go +++ b/flyteadmin/pkg/manager/impl/validation/validation_test.go @@ -320,6 +320,38 @@ func TestValidateParameterMap(t *testing.T) { err := validateParameterMap(&exampleMap, "some text") assert.NoError(t, err) }) + t.Run("invalid because inputType is nil", func(t *testing.T) { + // Create a literal that will cause LiteralTypeForLiteral to return nil. + // For example, a scalar with no value. + unsupportedLiteral := &core.Literal{ + Value: &core.Literal_Scalar{ + Scalar: &core.Scalar{}, + }, + } + + exampleMap := core.ParameterMap{ + Parameters: map[string]*core.Parameter{ + "foo": { + Var: &core.Variable{ + // 1000 means an unsupported type + Type: &core.LiteralType{Type: &core.LiteralType_Simple{Simple: 1000}}, + }, + Behavior: &core.Parameter_Default{ + Default: unsupportedLiteral, + }, + }, + }, + } + err := validateParameterMap(&exampleMap, "test_field_name") + assert.Error(t, err) + fmt.Println(err.Error()) + expectedErrMsg := "Flyte Propeller encountered an issue while determining\n" + + "the type of the default value for Parameter 'foo' in 'test_field_name'.\n" + + "Registered type from FlyteKit: [simple:1000].\n" + + "FlytePropeller needs to support latest FlyteIDL to support this type.\n" + + "Suggested solution: Please update your Flyte Propeller image to the latest version and try again." + assert.Equal(t, expectedErrMsg, err.Error()) + }) } func TestValidateToken(t *testing.T) { From 70c8ac346657291cea80fb050f1324e524d6cdfa Mon Sep 17 00:00:00 2001 From: Future-Outlier Date: Thu, 1 Aug 2024 14:52:56 +0800 Subject: [PATCH 2/6] Update pingsu's error message advices Signed-off-by: Future-Outlier Co-authored-by: pingsutw --- .../pkg/manager/impl/validation/validation.go | 9 +++++---- .../impl/validation/validation_test.go | 20 ++++++++++++------- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/flyteadmin/pkg/manager/impl/validation/validation.go b/flyteadmin/pkg/manager/impl/validation/validation.go index 0f01882f41..55c45db9bb 100644 --- a/flyteadmin/pkg/manager/impl/validation/validation.go +++ b/flyteadmin/pkg/manager/impl/validation/validation.go @@ -287,11 +287,12 @@ func validateParameterMap(inputMap *core.ParameterMap, fieldName string) error { if inputType == nil { return errors.NewFlyteAdminErrorf(codes.InvalidArgument, fmt.Sprintf( - "Flyte Propeller encountered an issue while determining\n"+ + "Flyte encountered an issue while determining\n"+ "the type of the default value for Parameter '%s' in '%s'.\n"+ - "Registered type from FlyteKit: [%s].\n"+ - "FlytePropeller needs to support latest FlyteIDL to support this type.\n"+ - "Suggested solution: Please update your Flyte Propeller image to the latest version and try again.", + "Registered type: [%s].\n"+ + "Flyte needs to support the latest FlyteIDL to support this type.\n"+ + "Suggested solution: Please update all of your Flyte images to the latest version and "+ + "try again.", name, fieldName, defaultInput.GetVar().GetType().String(), ), ) diff --git a/flyteadmin/pkg/manager/impl/validation/validation_test.go b/flyteadmin/pkg/manager/impl/validation/validation_test.go index f07f38c85f..1aa0bc7bab 100644 --- a/flyteadmin/pkg/manager/impl/validation/validation_test.go +++ b/flyteadmin/pkg/manager/impl/validation/validation_test.go @@ -329,9 +329,11 @@ func TestValidateParameterMap(t *testing.T) { }, } + name := "foo" + fieldName := "test_field_name" exampleMap := core.ParameterMap{ Parameters: map[string]*core.Parameter{ - "foo": { + name: { Var: &core.Variable{ // 1000 means an unsupported type Type: &core.LiteralType{Type: &core.LiteralType_Simple{Simple: 1000}}, @@ -342,14 +344,18 @@ func TestValidateParameterMap(t *testing.T) { }, }, } - err := validateParameterMap(&exampleMap, "test_field_name") + err := validateParameterMap(&exampleMap, fieldName) assert.Error(t, err) fmt.Println(err.Error()) - expectedErrMsg := "Flyte Propeller encountered an issue while determining\n" + - "the type of the default value for Parameter 'foo' in 'test_field_name'.\n" + - "Registered type from FlyteKit: [simple:1000].\n" + - "FlytePropeller needs to support latest FlyteIDL to support this type.\n" + - "Suggested solution: Please update your Flyte Propeller image to the latest version and try again." + expectedErrMsg := fmt.Sprintf( + "Flyte encountered an issue while determining\n"+ + "the type of the default value for Parameter '%s' in '%s'.\n"+ + "Registered type: [%s].\n"+ + "Flyte needs to support the latest FlyteIDL to support this type.\n"+ + "Suggested solution: Please update all of your Flyte images to the latest version and "+ + "try again.", + name, fieldName, exampleMap.Parameters[name].GetVar().GetType().String(), + ) assert.Equal(t, expectedErrMsg, err.Error()) }) } From 38dfa50ed4a6404a94bad7b58b4aee14846507c6 Mon Sep 17 00:00:00 2001 From: Future-Outlier Date: Fri, 2 Aug 2024 07:46:14 +0800 Subject: [PATCH 3/6] fix lint Signed-off-by: Future-Outlier --- .../validation/launch_plan_validator_test.go | 14 +- rfc/system/5606-json-idl.md | 322 ++++++++++++++++++ 2 files changed, 331 insertions(+), 5 deletions(-) create mode 100644 rfc/system/5606-json-idl.md diff --git a/flyteadmin/pkg/manager/impl/validation/launch_plan_validator_test.go b/flyteadmin/pkg/manager/impl/validation/launch_plan_validator_test.go index 86bfc5c6b7..178c2b497b 100644 --- a/flyteadmin/pkg/manager/impl/validation/launch_plan_validator_test.go +++ b/flyteadmin/pkg/manager/impl/validation/launch_plan_validator_test.go @@ -13,6 +13,10 @@ import ( "github.com/flyteorg/flyte/flytestdlib/utils" ) +const ( + foo = "foo" +) + var lpApplicationConfig = testutils.GetApplicationConfigWithDefaultDomains() func getWorkflowInterface() *core.TypedInterface { @@ -344,7 +348,7 @@ func TestValidateSchedule_KickoffTimeArgPointsAtWrongType(t *testing.T) { request := testutils.GetLaunchPlanRequestWithDeprecatedCronSchedule("* * * * * *") inputMap := &core.ParameterMap{ Parameters: map[string]*core.Parameter{ - "foo": { + foo: { Var: &core.Variable{ Type: &core.LiteralType{Type: &core.LiteralType_Simple{Simple: core.SimpleType_STRING}}, }, @@ -354,7 +358,7 @@ func TestValidateSchedule_KickoffTimeArgPointsAtWrongType(t *testing.T) { }, }, } - request.Spec.EntityMetadata.Schedule.KickoffTimeInputArg = "foo" + request.Spec.EntityMetadata.Schedule.KickoffTimeInputArg = foo err := validateSchedule(request, inputMap) assert.NotNil(t, err) @@ -364,7 +368,7 @@ func TestValidateSchedule_NoRequired(t *testing.T) { request := testutils.GetLaunchPlanRequestWithDeprecatedCronSchedule("* * * * * *") inputMap := &core.ParameterMap{ Parameters: map[string]*core.Parameter{ - "foo": { + foo: { Var: &core.Variable{ Type: &core.LiteralType{Type: &core.LiteralType_Simple{Simple: core.SimpleType_STRING}}, }, @@ -383,7 +387,7 @@ func TestValidateSchedule_KickoffTimeBound(t *testing.T) { request := testutils.GetLaunchPlanRequestWithDeprecatedCronSchedule("* * * * * *") inputMap := &core.ParameterMap{ Parameters: map[string]*core.Parameter{ - "foo": { + foo: { Var: &core.Variable{ Type: &core.LiteralType{Type: &core.LiteralType_Simple{Simple: core.SimpleType_DATETIME}}, }, @@ -393,7 +397,7 @@ func TestValidateSchedule_KickoffTimeBound(t *testing.T) { }, }, } - request.Spec.EntityMetadata.Schedule.KickoffTimeInputArg = "foo" + request.Spec.EntityMetadata.Schedule.KickoffTimeInputArg = foo err := validateSchedule(request, inputMap) assert.Nil(t, err) diff --git a/rfc/system/5606-json-idl.md b/rfc/system/5606-json-idl.md new file mode 100644 index 0000000000..53359b9c60 --- /dev/null +++ b/rfc/system/5606-json-idl.md @@ -0,0 +1,322 @@ +# JSON IDL + +**Authors:** + +- [@Han-Ru](https://github.com/future-outlier) +- [@Ping-Su](https://github.com/pingsutw) +- [@Fabio M. Graetz](https://github.com/fg91) +- [@Yee Hing Tong](https://github.com/wild-endeavor) + +## 1 Executive Summary + +Use byte string in protobuf instead of json string to fix int is not supported in protobuf struct. + + +## 2 Motivation + +In Flytekit, when handling dataclasses, Pydantic base models, and dictionaries, we store data using JSON strings within struct protobuf. This approach causes issues with integers, as protobuf does not support int types, leading to their conversion to floats. This results in performance issues since we need to recursively iterate through all attributes/keys in dataclasses and dictionaries to ensure floats types are converted to int. In addition to performance issues, the required code is complicated and error prone. + +Note: We have more than 10 issues about dict, dataclass and Pydantic. + +This feature can solve them all. + +## 3 Proposed Implementation +### Flytekit Example +#### Before +```python +@task +def t1() -> dict: + ... + return {"a": 1} # protobuf Struct {"a": 1.0} + +@task +def t2(a: dict): + print(a["integer"]) # wrong, will be a float +``` +#### After +```python +Json = "json" + +@task +def t1() -> Annotated[dict, Json]: # Json Byte Strings + ... + return {"a": 1} # -> protobuf Json b'{"a": 1}' + +@task +def t2(a: Annotated[dict, Json]): + print(a["integer"]) # correct, it will be a integer +``` + +#### Note +- We use Annotated[dict, Json] instead of dict to ensure backward compatibility. + - This helps us avoid breaking changes. +- It makes it easier for the frontend to support JSON IDL after these features are merged. +- If users only upgrade Flytekit, we can ensure they won’t face error when using dict only. +(Since we have to upgrade both flytepropeller, flyteidl and flytekit to support JSON IDL.) + + +### How to create a byte string? +#### Use MsgPack to convert value to a byte string +##### Python +```python +import msgpack +# Encode +def to_literal(): + json_bytes = msgpack.dumps(v) + return Literal(scalar=Scalar(json=Json(json_bytes)), metadata={"format": "json"}) +# Decode +def to_python_value(): + json_bytes = lv.scalar.json.value + return msgpack.loads(json_bytes) +``` +reference: https://github.com/msgpack/msgpack-python + +##### Golang +```go +package main + +import ( + "fmt" + "github.com/vmihailenco/msgpack/v5" +) + +func main() { + // Example data to encode + data := map[string]int{"a": 1} + + // Encode the data + encodedData, err := msgpack.Marshal(data) + if err != nil { + panic(err) + } + + // Print the encoded data + fmt.Printf("Encoded data: %x\n", encodedData) // Output: 81a16101 + + // Decode the data + var decodedData map[string]int + err = msgpack.Unmarshal(encodedData, &decodedData) + if err != nil { + panic(err) + } + + // Print the decoded data + fmt.Printf("Decoded data: %+v\n", decodedData) // Output: map[a:1] +} +``` + +reference: https://github.com/vmihailenco/msgpack + +##### JavaScript +```javascript +import msgpack5 from 'msgpack5'; + +// Create a MessagePack instance +const msgpack = msgpack5(); + +// Example data to encode +const data = { a: 1 }; + +// Encode the data +const encodedData = msgpack.encode(data); + +// Print the encoded data +console.log(encodedData); // + +// Decode the data +const decodedData = msgpack.decode(encodedData); + +// Print the decoded data +console.log(decodedData); // { a: 1 } +``` +reference: https://github.com/msgpack/msgpack-javascript + + +### FlyteIDL +```proto +message Json { + bytes value = 1; +} + +message Scalar { + oneof value { + Primitive primitive = 1; + Blob blob = 2; + Binary binary = 3; + Schema schema = 4; + Void none_type = 5; + Error error = 6; + google.protobuf.Struct generic = 7; + StructuredDataset structured_dataset = 8; + Union union = 9; + Json json = 10; // New Type + } +} +``` + +### FlytePropeller +1. Attribute Access for dictionary, Datalcass, and Pydantic in workflow. +Dict[type, type] is supported already, we have to support Datalcass and Pydantic now. +```python +from flytekit import task, workflow +from dataclasses import dataclass + +@dataclass +class DC: + a: int + +@task +def t1() -> DC: + return DC(a=1) + +@task +def t2(x: int): + print("x:", x) + return + +@workflow +def wf(): + o = t1() + t2(x=o.a) +``` +2. Create a Literal Type for Scalar when doing type validation. +```go +func literalTypeForScalar(scalar *core.Scalar) *core.LiteralType { + ... + case *core.Scalar_Json: + literalType = &core.LiteralType\ + {Type: &core.LiteralType_Simple{Simple: core.SimpleType_JSON}} + ... + return literalType +} +``` +3. Support input and default input +```go +// Literal Input +func ExtractFromLiteral(literal *core.Literal) (interface{}, error) { + switch literalValue := literal.Value.(type) { + case *core.Literal_Scalar: + ... + case *core.Scalar_Json: + return scalarValue.Json.Value, nil + } +} +// Default Input +func MakeDefaultLiteralForType(typ *core.LiteralType) (*core.Literal, error) { + switch t := typ.GetType().(type) { + case *core.LiteralType_Simple: + ... + case core.SimpleType_JSON: + return &core.Literal{ + Value: &core.Literal_Scalar{ + Scalar: &core.Scalar{ + Value: &core.Scalar_Json{ + Json: &core.Json{ + Value: []byte("{}"), + }, + }, + }, + }, + }, nil + } +} +``` +### FlyteKit +#### pyflyte run +The behavior will remain unchanged. +We will pass the value to our class, which inherits from `click.ParamType`, and use the corresponding type transformer to convert the input to the correct type. + +#### Dict Transformer +##### Before +###### Convert Python Value to Literal +- Dict[type, type] uses type hints to construct a LiteralMap. +- `dict` uses `json.dumps` to turn a `dict` value to a JSON string, and store it to protobuf Struct . +###### Convert Literal to Python Value +- `Dict[type, type]` uses type hints to convert LiteralMap to Python Value. +- `dict` uses `json.loads` to turn a JSON string to a dict value and store it to protobuf Struct . +##### After +###### Convert Python Value to Literal +- `Dict[type, type]` stays the same. +- `dict` uses `msgpack.dumps` to turn a dict value to a JSON byte string, and store is to protobuf Json . + +###### Convert Literal to Python Value + +`Dict[type, type]` uses type hints to convert LiteralMap to Python Value. + +`dict` uses `msgpack.loads` to turn a JSON byte string to a `dict` value and store it to protobuf `Struct` . + +#### Dataclass Transformer +##### Before +###### Convert Python Value to Literal +Uses `mashumaro JSON Encoder` to turn a dataclass value to a JSON string, and store it to protobuf `Struct` . +Note: For `FlyteTypes`, we will inherit mashumaro `SerializableType` to define our own serialization behavior, which includes upload file to remote storage. + +###### Convert Literal to Python Value +Uses `mashumaro JSON Decoder` to turn a JSON string to a python value, and recursively fixed int attributes to int (it will be float because we stored it in to `Struct`). + +Note: For `FlyteTypes`, we will inherit the mashumaro `SerializableType` to define our own serialization behavior, which includes uploading files to remote storage. +##### After +###### Convert Python Value to Literal +Uses `msgpack.dumps()` to turn a Python value into a byte string. +Note: For `FlyteTypes`, we will need to customize serialization behavior by msgpack reference here. + +https://github.com/msgpack/msgpack-python?tab=readme-ov-file#packingunpacking-of-custom-data-type + +###### Convert Literal to Python Value + +Uses `msgpack.loads()` to turn a byte string into a Python value. + +Note: For `FlyteTypes`, we will need to customize deserialization behavior by `msgpack` reference here. + +https://github.com/msgpack/msgpack-python?tab=readme-ov-file#packingunpacking-of-custom-data-type + + +#### Pydantic Transformer +##### Before +###### Convert Python Value to Literal +Convert `BaseModel` to a JSON string, and then convert it to a Protobuf `Struct`. +###### Convert Literal to Python Value +Convert Protobuf `Struct` to a JSON string and then convert it to a `BaseModel`. +##### After +###### Convert Python Value to Literal +Convert the Pydantic `BaseModel` to a JSON string, then convert the JSON string to a `dictionary`, and finally, convert it to a `byte string` using msgpack. +###### Convert Literal to Python Value +Convert `byte string` to a `dictionary` using `msgpack`, then convert dictionary to a JSON string, and finally, convert it to Pydantic `BaseModel`. + +### FlyteCtl +In Flytectl, we can construct input for the execution, so we have to make sure the values we passed to FlyteAdmin can all be constructed to Literal. + +https://github.com/flyteorg/flytectl/blob/131d6a20c7db601ca9156b8d43d243bc88669829/cmd/create/serialization_utils.go#L48 + +### FlyteConsole +#### Show input/output on FlyteConsole +We will get node’s input output literal value by FlyteAdmin’s API, and get the json byte string in the literal value. + +We can use MsgPack dumps the json byte string to a dictionary, and shows it to the flyteconsole. +#### Construct Input +We should use `msgpack.encode` to encode input value and store it to the literal’s json field. + + + +## 4 Metrics & Dashboards + +None + +## 5 Drawbacks +There's no breaking changes if we use `Annotated[dict, Json]`, but we need to be very careful about will there be any breaking changes. + +## 6 Alternatives +Use UTF-8 format to encode and decode, this will be more easier for implementation, but maybe will cause performance issue when using Pydantic Transformer. + +## 7 Potential Impact and Dependencies + +*Here, we aim to be mindful of our environment and generate empathy towards others who may be impacted by our decisions.* + +- *What other systems or teams are affected by this proposal?* +- *How could this be exploited by malicious attackers?* + +## 8 Unresolved questions +I am not sure use UTF-8 format or msgpack to encode and decode is a better option. + +## 9 Conclusion +Whether use UTF-8 format or msgpack to encode and decode, we will definitely do it. From 28cef8c53c86c8fc81918060fc54647af24494ef Mon Sep 17 00:00:00 2001 From: Future-Outlier Date: Fri, 2 Aug 2024 07:59:44 +0800 Subject: [PATCH 4/6] Trigger CI Signed-off-by: Future-Outlier From ef97c9aed4dbfe3d81e374ee89f12fb619b42d77 Mon Sep 17 00:00:00 2001 From: Future-Outlier Date: Fri, 2 Aug 2024 07:59:47 +0800 Subject: [PATCH 5/6] Trigger CI Signed-off-by: Future-Outlier From 4cdce3e3490675fb195c81d8e4f85d3a09d91d9b Mon Sep 17 00:00:00 2001 From: Future-Outlier Date: Fri, 2 Aug 2024 09:20:34 +0800 Subject: [PATCH 6/6] rm rfc/system/5606-json-idl.md Signed-off-by: Future-Outlier --- rfc/system/5606-json-idl.md | 322 ------------------------------------ 1 file changed, 322 deletions(-) delete mode 100644 rfc/system/5606-json-idl.md diff --git a/rfc/system/5606-json-idl.md b/rfc/system/5606-json-idl.md deleted file mode 100644 index 53359b9c60..0000000000 --- a/rfc/system/5606-json-idl.md +++ /dev/null @@ -1,322 +0,0 @@ -# JSON IDL - -**Authors:** - -- [@Han-Ru](https://github.com/future-outlier) -- [@Ping-Su](https://github.com/pingsutw) -- [@Fabio M. Graetz](https://github.com/fg91) -- [@Yee Hing Tong](https://github.com/wild-endeavor) - -## 1 Executive Summary - -Use byte string in protobuf instead of json string to fix int is not supported in protobuf struct. - - -## 2 Motivation - -In Flytekit, when handling dataclasses, Pydantic base models, and dictionaries, we store data using JSON strings within struct protobuf. This approach causes issues with integers, as protobuf does not support int types, leading to their conversion to floats. This results in performance issues since we need to recursively iterate through all attributes/keys in dataclasses and dictionaries to ensure floats types are converted to int. In addition to performance issues, the required code is complicated and error prone. - -Note: We have more than 10 issues about dict, dataclass and Pydantic. - -This feature can solve them all. - -## 3 Proposed Implementation -### Flytekit Example -#### Before -```python -@task -def t1() -> dict: - ... - return {"a": 1} # protobuf Struct {"a": 1.0} - -@task -def t2(a: dict): - print(a["integer"]) # wrong, will be a float -``` -#### After -```python -Json = "json" - -@task -def t1() -> Annotated[dict, Json]: # Json Byte Strings - ... - return {"a": 1} # -> protobuf Json b'{"a": 1}' - -@task -def t2(a: Annotated[dict, Json]): - print(a["integer"]) # correct, it will be a integer -``` - -#### Note -- We use Annotated[dict, Json] instead of dict to ensure backward compatibility. - - This helps us avoid breaking changes. -- It makes it easier for the frontend to support JSON IDL after these features are merged. -- If users only upgrade Flytekit, we can ensure they won’t face error when using dict only. -(Since we have to upgrade both flytepropeller, flyteidl and flytekit to support JSON IDL.) - - -### How to create a byte string? -#### Use MsgPack to convert value to a byte string -##### Python -```python -import msgpack -# Encode -def to_literal(): - json_bytes = msgpack.dumps(v) - return Literal(scalar=Scalar(json=Json(json_bytes)), metadata={"format": "json"}) -# Decode -def to_python_value(): - json_bytes = lv.scalar.json.value - return msgpack.loads(json_bytes) -``` -reference: https://github.com/msgpack/msgpack-python - -##### Golang -```go -package main - -import ( - "fmt" - "github.com/vmihailenco/msgpack/v5" -) - -func main() { - // Example data to encode - data := map[string]int{"a": 1} - - // Encode the data - encodedData, err := msgpack.Marshal(data) - if err != nil { - panic(err) - } - - // Print the encoded data - fmt.Printf("Encoded data: %x\n", encodedData) // Output: 81a16101 - - // Decode the data - var decodedData map[string]int - err = msgpack.Unmarshal(encodedData, &decodedData) - if err != nil { - panic(err) - } - - // Print the decoded data - fmt.Printf("Decoded data: %+v\n", decodedData) // Output: map[a:1] -} -``` - -reference: https://github.com/vmihailenco/msgpack - -##### JavaScript -```javascript -import msgpack5 from 'msgpack5'; - -// Create a MessagePack instance -const msgpack = msgpack5(); - -// Example data to encode -const data = { a: 1 }; - -// Encode the data -const encodedData = msgpack.encode(data); - -// Print the encoded data -console.log(encodedData); // - -// Decode the data -const decodedData = msgpack.decode(encodedData); - -// Print the decoded data -console.log(decodedData); // { a: 1 } -``` -reference: https://github.com/msgpack/msgpack-javascript - - -### FlyteIDL -```proto -message Json { - bytes value = 1; -} - -message Scalar { - oneof value { - Primitive primitive = 1; - Blob blob = 2; - Binary binary = 3; - Schema schema = 4; - Void none_type = 5; - Error error = 6; - google.protobuf.Struct generic = 7; - StructuredDataset structured_dataset = 8; - Union union = 9; - Json json = 10; // New Type - } -} -``` - -### FlytePropeller -1. Attribute Access for dictionary, Datalcass, and Pydantic in workflow. -Dict[type, type] is supported already, we have to support Datalcass and Pydantic now. -```python -from flytekit import task, workflow -from dataclasses import dataclass - -@dataclass -class DC: - a: int - -@task -def t1() -> DC: - return DC(a=1) - -@task -def t2(x: int): - print("x:", x) - return - -@workflow -def wf(): - o = t1() - t2(x=o.a) -``` -2. Create a Literal Type for Scalar when doing type validation. -```go -func literalTypeForScalar(scalar *core.Scalar) *core.LiteralType { - ... - case *core.Scalar_Json: - literalType = &core.LiteralType\ - {Type: &core.LiteralType_Simple{Simple: core.SimpleType_JSON}} - ... - return literalType -} -``` -3. Support input and default input -```go -// Literal Input -func ExtractFromLiteral(literal *core.Literal) (interface{}, error) { - switch literalValue := literal.Value.(type) { - case *core.Literal_Scalar: - ... - case *core.Scalar_Json: - return scalarValue.Json.Value, nil - } -} -// Default Input -func MakeDefaultLiteralForType(typ *core.LiteralType) (*core.Literal, error) { - switch t := typ.GetType().(type) { - case *core.LiteralType_Simple: - ... - case core.SimpleType_JSON: - return &core.Literal{ - Value: &core.Literal_Scalar{ - Scalar: &core.Scalar{ - Value: &core.Scalar_Json{ - Json: &core.Json{ - Value: []byte("{}"), - }, - }, - }, - }, - }, nil - } -} -``` -### FlyteKit -#### pyflyte run -The behavior will remain unchanged. -We will pass the value to our class, which inherits from `click.ParamType`, and use the corresponding type transformer to convert the input to the correct type. - -#### Dict Transformer -##### Before -###### Convert Python Value to Literal -- Dict[type, type] uses type hints to construct a LiteralMap. -- `dict` uses `json.dumps` to turn a `dict` value to a JSON string, and store it to protobuf Struct . -###### Convert Literal to Python Value -- `Dict[type, type]` uses type hints to convert LiteralMap to Python Value. -- `dict` uses `json.loads` to turn a JSON string to a dict value and store it to protobuf Struct . -##### After -###### Convert Python Value to Literal -- `Dict[type, type]` stays the same. -- `dict` uses `msgpack.dumps` to turn a dict value to a JSON byte string, and store is to protobuf Json . - -###### Convert Literal to Python Value - -`Dict[type, type]` uses type hints to convert LiteralMap to Python Value. - -`dict` uses `msgpack.loads` to turn a JSON byte string to a `dict` value and store it to protobuf `Struct` . - -#### Dataclass Transformer -##### Before -###### Convert Python Value to Literal -Uses `mashumaro JSON Encoder` to turn a dataclass value to a JSON string, and store it to protobuf `Struct` . -Note: For `FlyteTypes`, we will inherit mashumaro `SerializableType` to define our own serialization behavior, which includes upload file to remote storage. - -###### Convert Literal to Python Value -Uses `mashumaro JSON Decoder` to turn a JSON string to a python value, and recursively fixed int attributes to int (it will be float because we stored it in to `Struct`). - -Note: For `FlyteTypes`, we will inherit the mashumaro `SerializableType` to define our own serialization behavior, which includes uploading files to remote storage. -##### After -###### Convert Python Value to Literal -Uses `msgpack.dumps()` to turn a Python value into a byte string. -Note: For `FlyteTypes`, we will need to customize serialization behavior by msgpack reference here. - -https://github.com/msgpack/msgpack-python?tab=readme-ov-file#packingunpacking-of-custom-data-type - -###### Convert Literal to Python Value - -Uses `msgpack.loads()` to turn a byte string into a Python value. - -Note: For `FlyteTypes`, we will need to customize deserialization behavior by `msgpack` reference here. - -https://github.com/msgpack/msgpack-python?tab=readme-ov-file#packingunpacking-of-custom-data-type - - -#### Pydantic Transformer -##### Before -###### Convert Python Value to Literal -Convert `BaseModel` to a JSON string, and then convert it to a Protobuf `Struct`. -###### Convert Literal to Python Value -Convert Protobuf `Struct` to a JSON string and then convert it to a `BaseModel`. -##### After -###### Convert Python Value to Literal -Convert the Pydantic `BaseModel` to a JSON string, then convert the JSON string to a `dictionary`, and finally, convert it to a `byte string` using msgpack. -###### Convert Literal to Python Value -Convert `byte string` to a `dictionary` using `msgpack`, then convert dictionary to a JSON string, and finally, convert it to Pydantic `BaseModel`. - -### FlyteCtl -In Flytectl, we can construct input for the execution, so we have to make sure the values we passed to FlyteAdmin can all be constructed to Literal. - -https://github.com/flyteorg/flytectl/blob/131d6a20c7db601ca9156b8d43d243bc88669829/cmd/create/serialization_utils.go#L48 - -### FlyteConsole -#### Show input/output on FlyteConsole -We will get node’s input output literal value by FlyteAdmin’s API, and get the json byte string in the literal value. - -We can use MsgPack dumps the json byte string to a dictionary, and shows it to the flyteconsole. -#### Construct Input -We should use `msgpack.encode` to encode input value and store it to the literal’s json field. - - - -## 4 Metrics & Dashboards - -None - -## 5 Drawbacks -There's no breaking changes if we use `Annotated[dict, Json]`, but we need to be very careful about will there be any breaking changes. - -## 6 Alternatives -Use UTF-8 format to encode and decode, this will be more easier for implementation, but maybe will cause performance issue when using Pydantic Transformer. - -## 7 Potential Impact and Dependencies - -*Here, we aim to be mindful of our environment and generate empathy towards others who may be impacted by our decisions.* - -- *What other systems or teams are affected by this proposal?* -- *How could this be exploited by malicious attackers?* - -## 8 Unresolved questions -I am not sure use UTF-8 format or msgpack to encode and decode is a better option. - -## 9 Conclusion -Whether use UTF-8 format or msgpack to encode and decode, we will definitely do it.