From 21b382e27aeb8b8ea0c91dbfc1c16ddd3f89f400 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=F0=9F=91=A8=F0=9F=8F=BC=E2=80=8D=F0=9F=92=BB=20Romain=20M?= =?UTF-8?q?arcadier-Muller?= Date: Tue, 7 Jul 2020 13:36:34 +0200 Subject: [PATCH] RFC 193: Removal of type unions Tracking issue: #193 --- text/0193-removal-of-type-unions.md | 484 ++++++++++++++++++++++++++++ 1 file changed, 484 insertions(+) create mode 100644 text/0193-removal-of-type-unions.md diff --git a/text/0193-removal-of-type-unions.md b/text/0193-removal-of-type-unions.md new file mode 100644 index 000000000..4a24d4b89 --- /dev/null +++ b/text/0193-removal-of-type-unions.md @@ -0,0 +1,484 @@ +--- +feature name: removal-of-type-unions +start date: 2020-07-06 +rfc pr: 194 +related issue: 193 +--- + +# Summary + +_Type unions_ is a feature of the TypeScript language that allows typing values as "one of several candidate types". +This is achieved by specifying a `|`-delimited list of candidate types (e.g: `Foo | Bar`). While this feature is common +in dynamic languages (**TypeScript**, **Python**, ...), it is almost always absent from statically typed languages +(**Java**, **C#**, ...). In those languages, the impossibility to represent _type unions_ natively results in degraded +developer experience. + +This RFC proposes to remove all usage of _type unions_ from the AWS CDK, and eventually the removal of the feature from +the `jsii` type model. + +# README + +Use of TypeScript _type unions_ is prohibited by `jsii` due to the lack of support of this language feature in several +of the target languages supported (or planned to be supported) by `jsii`: **C#**, **Go**, **Java**, and probably more +languages to come. Proper support of _type unions_ in those languages requires generating code that produces a degraded +developer experience in those languages. + +The recommended substitution pattern for APIs where _type unions_ would be the natural candidate in dynamic languages is +the introduction of a wrapper type with a private constructor and static factories for each of the candidate types: + +```ts +// recommended replacement for `Foo | Bar` (the name is illustrative only): +export class FooOrBar { + /** + * Static factory method for the `Foo` variant of the union. + */ + public static ofFoo(value: Foo): FooOrBar { + return new FooOrBar(value); + } + + /** + * Static factory method for the `Bar` variant of the union. + */ + public static ofBar(value: Bar): FooOrBar { + return new FooOrBar(value); + } + + /** + * Private members can leverage *type unions* since they are not exposed to users. + */ + private constructor(private readonly value: Foo | Bar) {} + + // Introduce inspections methods as needed + public isFoo(): boolean { + /*...*/ + } + public isBar(): boolean { + /*...*/ + } + + // And also accessors + public asFoo(): Foo | undefined { + return this.isFoo() ? (this.value as Foo) : undefined; + } + + public asBar(): Bar | undefined { + return this.isBar() ? (this.value as Bar) : undefined; + } +} +``` + +This patterns offers a better developer experience in statically typed languages, while retaining acceptable (although +not optimal) developer experience in dynamic languages. + +# Motivation + +Type unions create challenges in two layers of the `jsii` stack: + +1. certain combinations of candidate types lead to ambiguity in the `@jsii/kernel` when selecting a serialization type +1. code generated by `jsii-pacmak` in statically typed languages can sometimes not correctly express the full static + type of a value + +## Ambiguity in the `@jsii/kernel` + +Several bug reports (for example: [`aws/aws-cdk#8223`], [`aws/jsii#1025`], [`aws/jsii#1045`]) have been found to be +caused by the `@jsii/kernel` making incorrect serialization decisions when passing values from the _Kernel_ +(**JavaScript**) process to the _Host_ (**Java**, **C#**, ...) process when the value is an object literal (i.e: not an +instance of a class known to jsii, or a subclass thereof), where the passing point is typed as a union of interfaces +and/or structs. + +The `@jsii/kernel` must appropriately tag instances that are serialized out of the _Kernel_ process specifically so that +statically typed languages are able to construct the appropriate _proxy instance_: in those languages, users will not be +able to idiomatically convert an instance to another static type (for example, this would lead to a `ClassCastException` +being thrown in **Java**). + +[`aws/aws-cdk#8223`]: https://github.com/aws/aws-cdk/issues/8223 +[`aws/jsii#1025`]: https://github.com/aws/jsii/issues/1025 +[`aws/jsii#1045`]: https://github.com/aws/jsii/issues/1045 + +## Expressivity in static languages + +Usage of _type unions_ can occur either on inputs (parameters passed into a method or constructor) or outputs +(properties and values returned from method). + +### Output values + +Code generated by `jsii-pacmak` uses some common supertype for all classes (either the languages' built-in superclass, +or some jsii-specific superclass) as the return type of any method or property typed as a union. This does not provide +developers with any information about the types they can expect to receive from such methods. + +This opaque typing prevent compile-time type checking, which is one of the main value propositions of statically typed +languages. Combined with issues caused by [the ambiguity in the `@jsii/kernel`](#ambiguity-in-the-`@jsii/kernel`), this +can lead to crashes that are difficult or impossible to work around (as seen in [`aws/aws-cdk#8223`]). + +### Input values + +Statically typed languages typically support overloading methods and constructors with different signatures. This can be +leveraged to provide very nice APIs around _type union_ parameters. A specific overload could be rendered for each of +the candidate combinations for a particular method (or constructor). + +A drawback of this approach is that calls that accept several _type union_ parameters must have one overload per +possible combination of the various unions' candidates, which can cause proliferation that could be detrimental to +performance. This caveat is however likely acceptable as the accumulation of several _type unions_ on a single call is +uncommon, and most _type unions_ have relatively few candidates. + +The overload approach however does not offer a clean solution for collections of _type unions_. It isn't possible to +express those in any of the statically typed languages supported to this day. Fatal limitations of both **C#** and +**Java** (and possibly other statically typed languages to come) also significantly limit the scope of applicability of +an overload-based solution: + +- **C#** does not support property setter overload: the type accepted by a property setter must be the same as the one + return by the getter +- **Java** generics are _erased_ during compilation (where they are _reified_ in **C#**): overloads that differ only in + generic arguments (e.g: `void call(List)` and `void call(List)`) are invalid and will cause a + compilation failure, since they share the same _erasure_ (e.g: `void call(List)`) and a _Java Virtual Machine_ would + be unable to determine which implementation to dynamically dispatch to. + +Additionally, certain static languages (such as **Go**) do not support overloading at all, rendering this approach +entirely unsuitable. + +# Design Summary + +Removal of _type unions_ from the AWS CDK requires designing new APIs for those places where they are in use. This +includes two kinds of APIs: + +- [Generated as part of the CloudFormation Resources](#generated-code) +- [Manually authored as part of the AWS Construct Library or AWS CDK Core framework](#hand-written-code) + +# Detailed Design + +## Generated code + +As previously discussed, the _CloudFormation Resources_ ubiquitously rely on _type unions_ to allow providing late-bound +values to the various properties accepted by the resources. Late-binding values is necessary to support mutation APIs +which provide enhanced ergonomics for certain constructs, particularly where various components can be expected to +collaborate on the finalized configuration of a resource. + +All _type unions_ where late-bound values can be represented in the literal type of the value (`string`, `number` and +`any`) can be simplified to that literal type, since the required APIs already exist and the migration path for those +use-cases is already very well defined: using `Lazy` factory methods to manually wrap `IResolvable` instances. + +On the other hand, _type unions_ of `IResolvable` and complex types (including collections) cannot be resolved that +easily. In **TypeScript** and other dynamic languages (such as **Python**), the `Lazy.asAny` method can be used to wrap +an `IResolvable` and evade the type checker. This approach is however no workable in **C#**, **Go** and **Java** (and +likely other statically typed languages we may wish to support in the future). + +For those cases, one solution is to have users of statically typed languages use _[escape hatches]_ to forcefully +override properties with the desired late-bound value, since _[escape hatches]_ offer `any`-typed accessors. This +results in a degradation of ergonomics for statically typed languages, but not not imply a redution in available +features offered by the AWS CDK. + +An option is to generate specific wrapper types that effectively encapsulate the _type union_, however such wrapper +types must then be used for _all_ properties in order to avoid incurring an API breaking change if some _CloudFormation +Resource_ schema adds support for alternate value types on a given property (this is possible since CloudFormation +describes resource schemas using [JSON Schema]). This would result in a significant deterioration of the ergonomics of +the _CloudFormation Resource_ classes, but provides an API that is safe to use from any programming language. + +[escape hatches]: https://docs.aws.amazon.com/cdk/latest/guide/cfn_layer.html +[json schema]: https://json-schema.org + +## Hand-written code + +There are not currently many incidences of _type unions_ in the _AWS Construct Library_ or the _AWS CDK Core_ framework. +Per the research done in [Annex 1](#annex-1), only 7 types were observed to expose properties declared as _type unions_. +The small amount of occurrence seems favorable to replacing those APIs with alternate ones that do not use _type unions_ +(for example, introducing new variants and deprecating the current options). + +# Drawbacks + +## Removing usage in CDK + +Removing support for _type unions_ from `jsii` requires removing usage of those throughout the CDK codebase. _Type +unions_ are used pervasively in code generated for the _CloudFormation Resources_ in order to allow late binding of +properties that are fed by mutating APIs (for example, AWS Lambda environment variables). + +Many of those _type unions_ can be removed without consequences: there are many occurrences of `string | IResolvable` +and `number | IResolvable` where the union is not necessary, since the `Lazy` API allows representing late-bound values +directly as a `string` or `number`. + +The rest of uses consists in unions of `IResolvable` and a complex type (collections, structs, ...). In **TypeScript** +those can be addressed by using `Lazy.asAny`, however this approach is not workable in statically typed languages (`any` +disables all type checking, whereas statically typed languages will _require_ a correct _dynamic_ type to be provided). + +# Rationale and Alternatives + +Complete removal of _type unions_ appears to be the safest strategy going foward. It already targets languages without +support of _type unions_ where this has caused problems. Alternatives require addressing the +[ambiguity in the `@jsii/kernel`](#ambiguity-in-the-`@jsii/kernel`) as well as language-specific developer experience +enhancements. + +## Alternatives Considered + +Alternatives were considered on both sides of the problem: + +- [Removing ambiguity in the `@jsii/kernel`](#removing-ambiguity-in-the-`@jsii/kernel`) +- [Language-specific union modeling](#language-specific-union-modeling) + +### Removing ambiguity in the `@jsii/kernel` + +#### Dynamic disambiguation + +Since the `@jsii/kernel` has access to the type model, it has information about the candidate types including what their +members are. Matching the members of the object instance against the candidate types can often determine the correct +dynamic type of the value. + +This kind of matching can however be expensive (especially if _deep_ matching is necessary), and cannot remove all +ambiguity: **JavaScript** provides no runtime-available information about the signature of a method (making it +impossible to distinguish two methods with the same name but different signatures), and types that differ only in +optional properties are impossible to tell apart. This would likely require restricting _type unions_ so that instances +can be matched to exactly one of the candidates conclusively, for example by requiring that each type in a _type union_ +has at least one member that is not present in other candidates. + +#### Explicit disambiguation + +One solution to remove the ambiguity is to require developers manually annotate object literals returned through a _type +union_ with the actual type information for the value they are returning, using a helper library provided by `jsii`: + +```ts +import { typed } from '@jsii/types'; +// ... +export class Example { + // ... + public fooOrBar(): Foo | Bar { + return typed({ foo: true }, 'example.Foo'); + } + // ... +} +``` + +This would require specific machinery to be built into the `jsii` compiler to validate when this annotation is required +and fail compilation otherwise; or accepting that non-**TypeScript** (nor **Javascript**) users would receive a runtime +error if this was not done when it was required. + +The specific syntax for this explicit annotation could also be awkward to come up with: _interfaces_ have no +representation in **Javascript**, and an _interface_ name cannot be passed as an argument like a _class_ name would. +This means users would have to manually type the _jsii fully qualified name_ of the type by hand. Alternatively, a +sophisticated **TypeScript** _transformation_ would need to be added to the `jsii` compiler to transparently process the +annotation call, replacing an interface name with the appropriate _jsii fully qualified name_. + +#### API Extension + +The `@jsii/kernel` API could be extended so that instances can be tagged using unions, instead of necessarily narrowing +down to a single type. This way, the kernel would not have to make any decision beyond the statically available context +of a serialization. + +This requires more extensive work to be performed in each individual language runtime library. + +### Language-specific union modeling + +#### Generic model + +A generic union type can be introduced to be a stand-in for any _type union_ value. It would require an implementation +in each language that does not natively support unions, for example in **Java** it could look like: + +```java +public final class Union { + private final Object value; + + /** + * Checks whether this `Union` can be converted to an instance of `type`. + * + * @param type the dynamic type being tested. + * + * @returns {@code true} if this `Union`'s value is of type `type`. + */ + public boolean isInstanceOf(final Class type) { + return Unsafe.isInstanceOf(this.value, type); + } + + /** + * Converts the value of this `Union` to an instance of `type`. + * + * @param type the desired dynamic type of the return value. + * + * @return an instance of `type`. + * + * @throws ClassCastException if `isInstanceOf(type)` is `false`. + */ + public T as(final Class type) throws ClassCastException { + if (!this.isInstanceOf(type)) { throw new ClassCastException(/*...*/); } + return Unsafe.cast(this.value, type); + } +} +``` + +This solution gives the user the ability to check the dynamic type of a value and convert it to a suitable runtime type, +allowing them to write safer code, however it provides no more static information than the currently generated code +offers. There is still no ability to compile-time check types received from methods returning this. Additionally, this +API is non-idiomatic (users would likely instinctively attempt an `instanceof` guard, which would fail) and forces users +to write additional boilerplate code. + +#### Specific model + +Perhaps a better solution than a generic model, is a specifically crafted model for each location where a _type union_ +is used. The shape of the type would be similar, although it would have per-type inspectors, allowing minimal type +checking: + +```java +public final class FooOrBar { + private final Object value; + + public boolean isFoo() { + return Unsafe.isInstanceOf(this.value, Foo.class); + } + + public boolean isBar() { + return Unsafe.isInstanceOf(this.value, Bar.class); + } + + public Foo asFoo() throws ClassCastException { + if (!this.isFoo()) { throw new ClassCastException(/*...*/); } + return Unsafe.cast(this.value, Foo.class); + } + + public Foo asFoo() throws ClassCastException { + if (!this.isbar()) { throw new ClassCastException(/*...*/); } + return Unsafe.cast(this.value, Bar.class); + } +} +``` + +The challenge with this solution resides in _naming_: + +- _Naming_ of the type itself cannot be determined from the list of candidate types (since adding a new candidate type + would result in breaking the statically typed API for the module) + - This could be solved by _requiring_ developers explicitly name unions by exporting a type alias for those + (`export type FooOrBar = Foo | Bar;`). This would however require fundamentally changing the `jsii` compiler + architecture: the current mode of operation leverages the **TypeScript** type checker API, which does _not_ expose + type aliases. It would need to be re-written to operate as a **TypeScript** _transform_ instead (working at on the + syntactic instead of semantic level). +- _Naming_ of the inspection and accessor methods may be subject to collisions (if two of the candidates have the same + leaf name) + - Names must be stable going forward, so it might be necessary to prohibit _type unions_ from having candidates with + the same unqualified name. This should be acceptable, since unions of homonyms are rather unlikely to occur. + +#### Explicit Dynamic Conversion + +A final solution would be to allow users of statically typed languages to use an _unsafe_ API provided as part of the +language's _jsii runtime library_ to obtain a new _proxy_ of any type they choose. This is the least effort solution +(except maybe from dropping _type union_ support entirely), but it requires the user to have a way to know the actual +dynamic type of the value. While this was true in all the bug reports we have received so far about these issues, this +is a strong assumption that is unlikely to hold in the general case. It also does not provide any improvement with +respects to the developer experience, as it merely allows users to unblock themselves in front of a bug. + +# Adoption Strategy + +This feature is a candidate for rollout as part of [AWS CDK v2]. Since the removal of type unions will incur significant +breaking in the API of the generated CloudFormation Resource classes, it naturally requires a major version bump. + +A migration guide will be provided on release of [AWS CDK v2] that explains how to migrate from the old API patterns +over to the new ones, although the majority of users may not have to change anything in their code for it to continue +working. + +[aws cdk v2]: https://github.com/aws/aws-cdk-rfcs/issues/79 + +# Unresolved questions + +- Should all APIs be useable in the exact same way in all supported languages? In particular, any "type checker escapes" + using `Lazy.asAny` (or **TypeScript**'s `as any`) are only workable in dynamic languages such as **TypeScript** and + **Python**, as statically typed languages have no such capability. + +# Future Possibilities + +Should _type unions_ be the right choice for some future API, support of _type unions_ can be re-introduced at a later +point in time, with careful consideration on what the semantics should be. Adding support for type unions _may_ be +achievable without breaking existing APIs, whereas removing support is definitely a breaking change. Concerns documented +as part of this RFC should be re-visited and appropriately addressed if the feature is to be restored. + +# Implementation Plan + +INTENTIONALLY LEFT BLANK: an implementation plan will be added when the RFC is scheduled for implementation. + +> The implementation plan should analyze all the tasks needed in order to implement this RFC, and the planned order of +> implementation based on dependencies and analysis of the critical path. +> +> Either add the plan here or add link which references a separate document and/or a GitHub Project Board (and reference +> it here) which manages the execution of your plan. + +--- + +# Annex 1 + +Usage of type unions on types outsode of the _CloudFormation Resources_ as of _July 7, 2020_, obtained by scanning +`monocdk-experiment` using `jsii-reflect`: + +``` +monocdk-experiment.aws_apigateway.JsonSchema + -> PROPERTY: contains?: monocdk-experiment.aws_apigateway.JsonSchema | Array + -> PROPERTY: items?: monocdk-experiment.aws_apigateway.JsonSchema | Array + -> PROPERTY: type?: monocdk-experiment.aws_apigateway.JsonSchemaType | Array + +monocdk-experiment.aws_appsync.BaseResolverProps + -> PROPERTY: pipelineConfig?: monocdk-experiment.aws_appsync.CfnResolver.PipelineConfigProperty | monocdk-experiment.IResolvable + +monocdk-experiment.aws_appsync.ExtendedDataSourceProps + -> PROPERTY: dynamoDbConfig?: monocdk-experiment.aws_appsync.CfnDataSource.DynamoDBConfigProperty | monocdk-experiment.IResolvable + -> PROPERTY: elasticsearchConfig?: monocdk-experiment.aws_appsync.CfnDataSource.ElasticsearchConfigProperty | monocdk-experiment.IResolvable + -> PROPERTY: httpConfig?: monocdk-experiment.aws_appsync.CfnDataSource.HttpConfigProperty | monocdk-experiment.IResolvable + -> PROPERTY: lambdaConfig?: monocdk-experiment.aws_appsync.CfnDataSource.LambdaConfigProperty | monocdk-experiment.IResolvable + -> PROPERTY: relationalDatabaseConfig?: monocdk-experiment.aws_appsync.CfnDataSource.RelationalDatabaseConfigProperty | monocdk-experiment.IResolvable + +monocdk-experiment.aws_appsync.LogConfig + -> PROPERTY: excludeVerboseContent?: boolean | monocdk-experiment.IResolvable + +monocdk-experiment.cloud_assembly_schema.ArtifactManifest + -> PROPERTY: properties?: monocdk-experiment.cloud_assembly_schema.AwsCloudFormationStackProperties | monocdk-experiment.cloud_assembly_schema.AssetManifestProperties | monocdk-experiment.cloud_assembly_schema.TreeArtifactProperties | monocdk-experiment.cloud_assembly_schema.NestedCloudAssemblyProperties + +monocdk-experiment.cloud_assembly_schema.MetadataEntry + -> PROPERTY: data?: string | monocdk-experiment.cloud_assembly_schema.FileAssetMetadataEntry | monocdk-experiment.cloud_assembly_schema.ContainerImageAssetMetadataEntry | Array + +monocdk-experiment.cloud_assembly_schema.MissingContext + -> PROPERTY: props: monocdk-experiment.cloud_assembly_schema.AmiContextQuery | monocdk-experiment.cloud_assembly_schema.AvailabilityZonesContextQuery | monocdk-experiment.cloud_assembly_schema.HostedZoneContextQuery | monocdk-experiment.cloud_assembly_schema.SSMParameterContextQuery | monocdk-experiment.cloud_assembly_schema.VpcContextQuery | monocdk-experiment.cloud_assembly_schema.EndpointServiceAvailabilityZonesContextQuery +``` + +The script used to extract this dataset is the following: + +```ts +import * as chalk from 'chalk'; +import * as reflect from 'jsii-reflect'; + +const typeSystem = new reflect.TypeSystem(); +typeSystem.load(process.argv[2]).then((_assembly) => { + const unionUsages = [ + ...typeSystem.methods.filter( + (method) => + method.returns?.type?.unionOfTypes != null || + method.parameters.some((param) => param.type.unionOfTypes != null), + ), + ...typeSystem.properties.filter((prop) => prop.type.unionOfTypes != null), + ].reduce((acc, elt) => { + const key = elt.definingType.fqn; + acc[key] = acc[key] ?? []; + acc[key].push(elt); + return acc; + }, {} as Record>); + + for (const [type, uses] of Object.entries(unionUsages)) { + if (type.includes('.Cfn')) { + continue; + } + + console.log(chalk.blueBright(type)); + for (const use of uses) { + let description: string; + if (reflect.isMethod(use)) { + const params = use.parameters.map((param) => { + let typeIfNeeded: string = param.optional ? '?' : ''; + if (param.type.unionOfTypes != null) { + typeIfNeeded += `: ${chalk.redBright(param.type.toString())}`; + } + return `${chalk.gray(param.name)}${typeIfNeeded}`; + }); + const returnType = + use.returns.type.unionOfTypes != null + ? chalk.redBright(use.returns.type.toString()) + : chalk.greenBright(use.returns.type.toString()); + description = `METHOD: ${chalk.gray(use.name)}(${params.join(', ')}): ${returnType}`; + } else { + description = `PROPERTY: ${chalk.gray(use.name)}${use.optional ? '?' : ''}: ${chalk.redBright(use.type)}`; + } + console.log(` -> ${description}`); + } + console.log(''); + } +}); +```