diff --git a/RELEASE.md b/RELEASE.md index af27c910d5..fe103256cf 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -9,6 +9,7 @@ * Addressed arbitrary file write via archive extraction security vulnerability in micropackaging. * Added the `_EPHEMERAL` attribute to `AbstractDataset` and other Dataset classes that inherit from it. * Enable read-the-docs search when user presses Command/Ctrl + K. +* Added new JSON Schema that works with Kedro versions 0.19.* ## Breaking changes to the API diff --git a/docs/source/development/set_up_pycharm.md b/docs/source/development/set_up_pycharm.md index 00fa84b61f..fd2e8f8a94 100644 --- a/docs/source/development/set_up_pycharm.md +++ b/docs/source/development/set_up_pycharm.md @@ -163,7 +163,7 @@ You can enable the Kedro catalog validation schema in your PyCharm IDE to enable ![](../meta/images/pycharm_edit_schema_mapping.png) -Add a new mapping using the "+" button in the top left of the window and select the name you want for it. Enter this URL `https://raw.githubusercontent.com/kedro-org/kedro/develop/static/jsonschema/kedro-catalog-0.18.json` in the "Schema URL" field and select "JSON Schema Version 7" in the "Schema version" field. +Add a new mapping using the "+" button in the top left of the window and select the name you want for it. Enter this URL `https://raw.githubusercontent.com/kedro-org/kedro/develop/static/jsonschema/kedro-catalog-0.19.json` in the "Schema URL" field and select "JSON Schema Version 7" in the "Schema version" field. Add the following file path pattern to the mapping: `conf/**/*catalog*`. diff --git a/static/jsonschema/kedro-catalog-0.19.json b/static/jsonschema/kedro-catalog-0.19.json new file mode 100644 index 0000000000..d7093004b2 --- /dev/null +++ b/static/jsonschema/kedro-catalog-0.19.json @@ -0,0 +1,1470 @@ +{ + "type": "object", + "patternProperties": { + "^[a-z0-9-_]+$": { + "required": [ + "type" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "CachedDataset", + "IncrementalDataset", + "MemoryDataset", + "LambdaDataset", + "PartitionedDataset", + "api.APIDataset", + "biosequence.BioSequenceDataset", + "dask.ParquetDataset", + "email.EmailMessageDataset", + "geopandas.GeoJSONDataset", + "holoviews.HoloviewsWriter", + "huggingface.HFDataset", + "huggingface.HFTransformerPipelineDataset", + "json.JSONDataset", + "matplotlib.MatplotlibWriter", + "networkx.NetworkXDataset", + "pandas.CSVDataset", + "pandas.ExcelDataset", + "pandas.FeatherDataset", + "pandas.GBQTableDataset", + "pandas.HDFDataset", + "pandas.JSONDataset", + "pandas.ParquetDataset", + "pandas.SQLTableDataset", + "pandas.SQLQueryDataset", + "pandas.XMLDataset", + "pillow.ImageDataset", + "pickle.PickleDataset", + "plotly.PlotlyDataset", + "redis.PickleDataset", + "spark.SparkDataset", + "spark.SparkHiveDataset", + "spark.SparkJDBCDataset", + "tensorflow.TensorFlowModelDataset", + "text.TextDataset", + "tracking.JSONDataset", + "tracking.MetricsDataset", + "yaml.YAMLDataset" + ] + } + }, + "allOf": [ + { + "if": { + "properties": { + "type": { + "const": "CachedDataset" + } + } + }, + "then": { + "required": [ + "dataset" + ], + "properties": { + "dataset": { + "pattern": ".*", + "description": "A Kedro Dataset object or a dictionary to cache." + }, + "copy_mode": { + "type": "string", + "description": "The copy mode used to copy the data. Possible\nvalues are: \"deepcopy\", \"copy\" and \"assign\". If not\nprovided, it is inferred based on the data type." + } + } + } + }, + { + "if": { + "properties": { + "type": { + "const": "IncrementalDataset" + } + } + }, + "then": { + "required": [ + "path", + "dataset" + ], + "properties": { + "path": { + "type": "string", + "description": "Path to the folder containing partitioned data.\nIf path starts with the protocol (e.g., ``s3://``) then the\ncorresponding ``fsspec`` concrete filesystem implementation will\nbe used. If protocol is not specified,\n``fsspec.implementations.local.LocalFileSystem`` will be used.\n**Note:** Some concrete implementations are bundled with ``fsspec``,\nwhile others (like ``s3`` or ``gcs``) must be installed separately\nprior to usage of the ``PartitionedDataset``." + }, + "dataset": { + "pattern": ".*", + "description": "Underlying dataset definition. This is used to instantiate\nthe dataset for each file located inside the ``path``.\nAccepted formats are:\na) object of a class that inherits from ``AbstractDataset``\nb) a string representing a fully qualified class name to such class\nc) a dictionary with ``type`` key pointing to a string from b),\nother keys are passed to the Dataset initializer.\nCredentials for the dataset can be explicitly specified in\nthis configuration." + }, + "checkpoint": { + "pattern": "object", + "description": "Optional checkpoint configuration. Accepts a dictionary\nwith the corresponding dataset definition including ``filepath``\n(unlike ``dataset`` argument). Checkpoint configuration is\ndescribed here:\nhttps://kedro.readthedocs.io/en/0.19.0/data/kedro_io.html#checkpoint-configuration\nCredentials for the checkpoint can be explicitly specified\nin this configuration." + }, + "filepath_arg": { + "type": "string", + "description": "Underlying dataset initializer argument that will\ncontain a path to each corresponding partition file.\nIf unspecified, defaults to \"filepath\"." + }, + "filename_suffix": { + "type": "string", + "description": "If specified, only partitions that end with this\nstring will be processed." + }, + "credentials": { + "type": [ + "object", + "string" + ], + "description": "Protocol-specific options that will be passed to\n``fsspec.filesystem``\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.filesystem\nand the dataset initializer. If the dataset config contains\nexplicit credentials spec, then such spec will take precedence.\nAll possible credentials management scenarios are documented here:\nhttps://kedro.readthedocs.io/en/0.19.0/data/kedro_io.html#partitioned-dataset-credentials" + }, + "load_args": { + "type": "object", + "description": "Keyword arguments to be passed into ``find()`` method of\nthe filesystem implementation." + }, + "fs_args": { + "type": "object", + "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``)" + } + } + } + }, + { + "if": { + "properties": { + "type": { + "const": "MemoryDataset" + } + } + }, + "then": { + "required": [], + "properties": { + "data": { + "pattern": ".*", + "description": "Python object containing the data." + }, + "copy_mode": { + "type": "string", + "description": "The copy mode used to copy the data. Possible\nvalues are: \"deepcopy\", \"copy\" and \"assign\". If not\nprovided, it is inferred based on the data type." + } + } + } + }, + { + "if": { + "properties": { + "type": { + "const": "LambdaDataset" + } + } + }, + "then": { + "required": [ + "load", + "save" + ], + "properties": { + "load": { + "pattern": ".*", + "description": "Method to load data from a data set." + }, + "save": { + "pattern": ".*", + "description": "Method to save data to a data set." + }, + "exists": { + "pattern": ".*", + "description": "Method to check whether output data already exists." + }, + "release": { + "pattern": ".*", + "description": "Method to release any cached information." + } + } + } + }, + { + "if": { + "properties": { + "type": { + "const": "PartitionedDataset" + } + } + }, + "then": { + "required": [ + "path", + "dataset" + ], + "properties": { + "path": { + "type": "string", + "description": "Path to the folder containing partitioned data.\nIf path starts with the protocol (e.g., ``s3://``) then the\ncorresponding ``fsspec`` concrete filesystem implementation will\nbe used. If protocol is not specified,\n``fsspec.implementations.local.LocalFileSystem`` will be used.\n**Note:** Some concrete implementations are bundled with ``fsspec``,\nwhile others (like ``s3`` or ``gcs``) must be installed separately\nprior to usage of the ``PartitionedDataset``." + }, + "dataset": { + "pattern": ".*", + "description": "Underlying dataset definition. This is used to instantiate\nthe dataset for each file located inside the ``path``.\nAccepted formats are:\na) object of a class that inherits from ``AbstractDataset``\nb) a string representing a fully qualified class name to such class\nc) a dictionary with ``type`` key pointing to a string from b),\nother keys are passed to the Dataset initializer.\nCredentials for the dataset can be explicitly specified in\nthis configuration." + }, + "filepath_arg": { + "type": "string", + "description": "Underlying dataset initializer argument that will\ncontain a path to each corresponding partition file.\nIf unspecified, defaults to \"filepath\"." + }, + "filename_suffix": { + "type": "string", + "description": "If specified, only partitions that end with this\nstring will be processed." + }, + "credentials": { + "type": [ + "object", + "string" + ], + "description": "Protocol-specific options that will be passed to\n``fsspec.filesystem``\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.filesystem\nand the dataset initializer. If the dataset config contains\nexplicit credentials spec, then such spec will take precedence.\nAll possible credentials management scenarios are documented here:\nhttps://kedro.readthedocs.io/en/0.19.0/data/kedro_io.html#partitioned-dataset-credentials" + }, + "load_args": { + "type": "object", + "description": "Keyword arguments to be passed into ``find()`` method of\nthe filesystem implementation." + }, + "fs_args": { + "type": "object", + "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``)" + } + } + } + }, + { + "if": { + "properties": { + "type": { + "const": "api.APIDataset" + } + } + }, + "then": { + "required": [ + "url" + ], + "properties": { + "url": { + "type": "string", + "description": "The API URL endpoint." + }, + "method": { + "type": "string", + "description": "The Method of the request, GET, POST, PUT, DELETE, HEAD, etc..." + }, + "data": { + "pattern": ".*", + "description": "The request payload, used for POST, PUT, etc requests\nhttps://requests.readthedocs.io/en/master/user/quickstart/#more-complicated-post-requests" + }, + "params": { + "type": "object", + "description": "The url parameters of the API.\nhttps://requests.readthedocs.io/en/master/user/quickstart/#passing-parameters-in-urls" + }, + "headers": { + "type": "object", + "description": "The HTTP headers.\nhttps://requests.readthedocs.io/en/master/user/quickstart/#custom-headers" + }, + "auth": { + "pattern": ".*", + "description": "Anything ``requests`` accepts. Normally it's either ``('login', 'password')``,\nor ``AuthBase``, ``HTTPBasicAuth`` instance for more complex cases." + }, + "json": { + "pattern": ".*", + "description": "The request payload, used for POST, PUT, etc requests, passed in\nto the json kwarg in the requests object.\nhttps://requests.readthedocs.io/en/master/user/quickstart/#more-complicated-post-requests" + }, + "timeout": { + "type": "integer", + "description": "The wait time in seconds for a response, defaults to 1 minute.\nhttps://requests.readthedocs.io/en/master/user/quickstart/#timeouts" + } + } + } + }, + { + "if": { + "properties": { + "type": { + "const": "biosequence.BioSequenceDataset" + } + } + }, + "then": { + "required": [ + "filepath" + ], + "properties": { + "filepath": { + "type": "string", + "description": "Filepath in POSIX format to sequence file prefixed with a protocol like\n`s3://`. If prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``." + }, + "load_args": { + "type": "object", + "description": "Options for parsing sequence files by Biopython ``SeqIO.parse()``." + }, + "save_args": { + "type": "object", + "description": "file format supported by Biopython ``SeqIO.write()``.\nE.g. `{\"format\": \"fasta\"}`." + }, + "credentials": { + "type": [ + "object", + "string" + ], + "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." + }, + "fs_args": { + "type": "object", + "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\n to pass to the filesystem's `open` method through nested keys\n `open_args_load` and `open_args_save`.\n Here you can find all available arguments for `open`:\n https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\n All defaults are preserved, except `mode`, which is set to `r` when loading\n and to `w` when saving.\n\nNote: Here you can find all supported file formats: https://biopython.org/wiki/SeqIO" + } + } + } + }, + { + "if": { + "properties": { + "type": { + "const": "dask.ParquetDataset" + } + } + }, + "then": { + "required": [ + "filepath" + ], + "properties": { + "filepath": { + "type": "string", + "description": "Filepath in POSIX format to a parquet file\nparquet collection or the directory of a multipart parquet." + }, + "load_args": { + "type": "object", + "description": "Additional loading options `dask.dataframe.read_parquet`:\nhttps://docs.dask.org/en/latest/dataframe-api.html#dask.dataframe.read_parquet" + }, + "save_args": { + "type": "object", + "description": "Additional saving options for `dask.dataframe.to_parquet`:\nhttps://docs.dask.org/en/latest/dataframe-api.html#dask.dataframe.to_parquet" + }, + "credentials": { + "type": [ + "object", + "string" + ], + "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." + }, + "fs_args": { + "type": "object", + "description": "Optional parameters to the backend file system driver:\nhttps://docs.dask.org/en/latest/remote-data-services.html#optional-parameters" + } + } + } + }, + { + "if": { + "properties": { + "type": { + "const": "email.EmailMessageDataset" + } + } + }, + "then": { + "required": [ + "filepath" + ], + "properties": { + "filepath": { + "type": "string", + "description": "Filepath in POSIX format to a text file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." + }, + "load_args": { + "type": "object", + "description": "``email`` options for parsing email messages (arguments passed\ninto ``email.parser.Parser.parse``). Here you can find all available arguments:\nhttps://docs.python.org/3/library/email.parser.html#email.parser.Parser.parse\nIf you would like to specify options for the `Parser`,\nyou can include them under the \"parser\" key. Here you can\nfind all available arguments:\nhttps://docs.python.org/3/library/email.parser.html#email.parser.Parser\nAll defaults are preserved, but \"policy\", which is set to ``email.policy.default``." + }, + "save_args": { + "type": "object", + "description": "``email`` options for generating MIME documents (arguments passed into\n``email.generator.Generator.flatten``). Here you can find all available arguments:\nhttps://docs.python.org/3/library/email.generator.html#email.generator.Generator.flatten\nIf you would like to specify options for the `Generator`,\nyou can include them under the \"generator\" key. Here you can\nfind all available arguments:\nhttps://docs.python.org/3/library/email.generator.html#email.generator.Generator\nAll defaults are preserved." + }, + "credentials": { + "type": [ + "object", + "string" + ], + "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." + }, + "fs_args": { + "type": "object", + "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving." + } + } + } + }, + { + "if": { + "properties": { + "type": { + "const": "geopandas.GeoJSONDataset" + } + } + }, + "then": { + "required": [ + "filepath" + ], + "properties": { + "filepath": { + "type": "string", + "description": "Filepath in POSIX format to a GeoJSON file prefixed with a protocol like\n`s3://`. If prefix is not provided `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." + }, + "load_args": { + "type": "object", + "description": "GeoPandas options for loading GeoJSON files.\nHere you can find all available arguments:\nhttps://geopandas.org/reference/geopandas.read_file.html" + }, + "save_args": { + "type": "object", + "description": "GeoPandas options for saving geojson files.\nHere you can find all available arguments:\nhttps://geopandas.org/reference.html#geopandas.GeoDataFrame.to_file\nThe default_save_arg driver is 'GeoJSON', all others preserved." + }, + "credentials": { + "type": [ + "object", + "string" + ], + "description": "credentials required to access the underlying filesystem.\nEg. for ``GCFileSystem`` it would look like `{'token': None}`." + }, + "fs_args": { + "type": "object", + "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `wb` when saving." + } + } + } + }, + { + "if": { + "properties": { + "type": { + "const": "holoviews.HoloviewsWriter" + } + } + }, + "then": { + "required": [ + "filepath" + ], + "properties": { + "filepath": { + "type": "string", + "description": "Filepath in POSIX format to a text file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." + }, + "fs_args": { + "type": "object", + "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested key `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `wb` when saving." + }, + "credentials": { + "type": [ + "object", + "string" + ], + "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``S3FileSystem`` it should look like:\n`{'key': '', 'secret': ''}}`" + }, + "save_args": { + "type": "object", + "description": "Extra save args passed to `holoviews.save()`. See\nhttps://holoviews.org/reference_manual/holoviews.util.html#holoviews.util.save" + } + } + } + }, + { + "if": { + "properties": { + "type": { + "const": "huggingface.HFDataset" + } + } + }, + "then": { + "required": [ + "dataset_name" + ], + "properties": { + "dataset_name": { + "type": "string", + "description": "Huggingface dataset name" + } + } + } + }, + { + "if": { + "properties": { + "type": { + "const": "huggingface.HFTransformerPipelineDataset" + } + } + }, + "then": { + "properties": { + "task": { + "type": "string", + "description": "Huggingface pipeline task name" + }, + "model_name": { + "type": "string", + "description": "Huggingface model name" + }, + "pipeline_kwargs": { + "type": "object", + "description": "Additional kwargs to be passed into the pipeline" + } + } + } + }, + { + "if": { + "properties": { + "type": { + "const": "json.JSONDataset" + } + } + }, + "then": { + "required": [ + "filepath" + ], + "properties": { + "filepath": { + "type": "string", + "description": "Filepath in POSIX format to a JSON file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." + }, + "save_args": { + "type": "object", + "description": "json options for saving JSON files (arguments passed\ninto ```json.dump``). Here you can find all available arguments:\nhttps://docs.python.org/3/library/json.html\nAll defaults are preserved, but \"default_flow_style\", which is set to False." + }, + "credentials": { + "type": [ + "object", + "string" + ], + "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." + }, + "fs_args": { + "type": "object", + "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving." + } + } + } + }, + { + "if": { + "properties": { + "type": { + "const": "matplotlib.MatplotlibWriter" + } + } + }, + "then": { + "required": [ + "filepath" + ], + "properties": { + "filepath": { + "type": "string", + "description": "Filepath in POSIX format to a matplot object file(s) prefixed with a protocol\nlike `s3://`. If prefix is not provided, `file` protocol (local filesystem) will be\nused. The prefix should be any protocol supported by ``fsspec``." + }, + "fs_args": { + "type": "object", + "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested key `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `wb` when saving." + }, + "credentials": { + "type": [ + "object", + "string" + ], + "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``S3FileSystem`` it should look like:\n`{'key': '', 'secret': ''}}`" + }, + "save_args": { + "type": "object", + "description": "Save args passed to `plt.savefig`. See\nhttps://matplotlib.org/api/_as_gen/matplotlib.pyplot.savefig.html" + } + } + } + }, + { + "if": { + "properties": { + "type": { + "const": "networkx.NetworkXDataset" + } + } + }, + "then": { + "required": [ + "filepath" + ], + "properties": { + "filepath": { + "type": "string", + "description": "Filepath in POSIX format to the NetworkX graph JSON file." + }, + "load_args": { + "type": "object", + "description": "Arguments passed on to ```networkx.node_link_graph``.\nSee the details in\nhttps://networkx.github.io/documentation/networkx-1.9.1/reference/generated/networkx.readwrite.json_graph.node_link_graph.html" + }, + "save_args": { + "type": "object", + "description": "Arguments passed on to ```networkx.node_link_data``.\nSee the details in\nhttps://networkx.github.io/documentation/networkx-1.9.1/reference/generated/networkx.readwrite.json_graph.node_link_data.html" + }, + "credentials": { + "type": [ + "object", + "string" + ], + "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." + }, + "fs_args": { + "type": "object", + "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving." + } + } + } + }, + { + "if": { + "properties": { + "type": { + "const": "pandas.CSVDataset" + } + } + }, + "then": { + "required": [ + "filepath" + ], + "properties": { + "filepath": { + "type": "string", + "description": "Filepath in POSIX format to a CSV file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." + }, + "load_args": { + "type": "object", + "description": "Pandas options for loading CSV files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_csv.html\nAll defaults are preserved." + }, + "save_args": { + "type": "object", + "description": "Pandas options for saving CSV files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_csv.html\nAll defaults are preserved, but \"index\", which is set to False." + }, + "credentials": { + "type": [ + "object", + "string" + ], + "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." + }, + "fs_args": { + "type": "object", + "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving." + } + } + } + }, + { + "if": { + "properties": { + "type": { + "const": "pandas.ExcelDataset" + } + } + }, + "then": { + "required": [ + "filepath" + ], + "properties": { + "filepath": { + "type": "string", + "description": "Filepath in POSIX format to a Excel file prefixed with a protocol like\n`s3://`. If prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." + }, + "engine": { + "type": "string", + "description": "The engine used to write to excel files. The default\nengine is 'xlsxwriter'." + }, + "load_args": { + "type": "object", + "description": "Pandas options for loading Excel files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_excel.html\nAll defaults are preserved, but \"engine\", which is set to \"xlrd\"." + }, + "save_args": { + "type": "object", + "description": "Pandas options for saving Excel files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_excel.html\nAll defaults are preserved, but \"index\", which is set to False.\nIf you would like to specify options for the `ExcelWriter`,\nyou can include them under the \"writer\" key. Here you can\nfind all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.ExcelWriter.html" + }, + "credentials": { + "type": [ + "object", + "string" + ], + "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." + }, + "fs_args": { + "type": "object", + "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `wb` when saving." + } + } + } + }, + { + "if": { + "properties": { + "type": { + "const": "pandas.FeatherDataset" + } + } + }, + "then": { + "required": [ + "filepath" + ], + "properties": { + "filepath": { + "type": "string", + "description": "Filepath in POSIX format to a feather file prefixed with a protocol like\n`s3://`. If prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." + }, + "load_args": { + "type": "object", + "description": "Pandas options for loading feather files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_feather.html\nAll defaults are preserved." + }, + "credentials": { + "type": [ + "object", + "string" + ], + "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." + }, + "fs_args": { + "type": "object", + "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `wb` when saving." + } + } + } + }, + { + "if": { + "properties": { + "type": { + "const": "pandas.GBQTableDataset" + } + } + }, + "then": { + "required": [ + "dataset", + "table_name" + ], + "properties": { + "dataset": { + "type": "string", + "description": "Google BigQuery dataset." + }, + "table_name": { + "type": "string", + "description": "Google BigQuery table name." + }, + "project": { + "type": "string", + "description": "Google BigQuery Account project ID.\nOptional when available from the environment.\nhttps://cloud.google.com/resource-manager/docs/creating-managing-projects" + }, + "credentials": { + "pattern": ".*", + "description": "Credentials for accessing Google APIs.\nEither ``google.auth.credentials.Credentials`` object or dictionary with\nparameters required to instantiate ``google.oauth2.credentials.Credentials``.\nHere you can find all the arguments:\nhttps://google-auth.readthedocs.io/en/latest/reference/google.oauth2.credentials.html" + }, + "load_args": { + "type": "object", + "description": "Pandas options for loading BigQuery table into DataFrame.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_gbq.html\nAll defaults are preserved." + }, + "save_args": { + "type": "object", + "description": "Pandas options for saving DataFrame to BigQuery table.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_gbq.html\nAll defaults are preserved, but \"progress_bar\", which is set to False." + } + } + } + }, + { + "if": { + "properties": { + "type": { + "const": "pandas.HDFDataset" + } + } + }, + "then": { + "required": [ + "filepath", + "key" + ], + "properties": { + "filepath": { + "type": "string", + "description": "Filepath in POSIX format to a hdf file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." + }, + "key": { + "type": "string", + "description": "Identifier to the group in the HDF store." + }, + "load_args": { + "type": "object", + "description": "PyTables options for loading hdf files.\nYou can find all available arguments at:\nhttps://www.pytables.org/usersguide/libref/top_level.html#tables.open_file\nAll defaults are preserved." + }, + "save_args": { + "type": "object", + "description": "PyTables options for saving hdf files.\nYou can find all available arguments at:\nhttps://www.pytables.org/usersguide/libref/top_level.html#tables.open_file\nAll defaults are preserved." + }, + "credentials": { + "type": [ + "object", + "string" + ], + "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." + }, + "fs_args": { + "type": "object", + "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set `wb` when saving." + } + } + } + }, + { + "if": { + "properties": { + "type": { + "const": "pandas.JSONDataset" + } + } + }, + "then": { + "required": [ + "filepath" + ], + "properties": { + "filepath": { + "type": "string", + "description": "Filepath in POSIX format to a JSON file prefixed with a protocol like `s3://`.\nIf prefix is not provided `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." + }, + "load_args": { + "type": "object", + "description": "Pandas options for loading JSON files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_json.html\nAll defaults are preserved." + }, + "save_args": { + "type": "object", + "description": "Pandas options for saving JSON files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_json.html\nAll defaults are preserved, but \"index\", which is set to False." + }, + "credentials": { + "type": [ + "object", + "string" + ], + "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{'token': None}`." + }, + "fs_args": { + "type": "object", + "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving." + } + } + } + }, + { + "if": { + "properties": { + "type": { + "const": "pandas.ParquetDataset" + } + } + }, + "then": { + "required": [ + "filepath" + ], + "properties": { + "filepath": { + "type": "string", + "description": "Filepath in POSIX format to a Parquet file prefixed with a protocol like\n`s3://`. If prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nIt can also be a path to a directory. If the directory is\nprovided then it can be used for reading partitioned parquet files.\nNote: `http(s)` doesn't support versioning." + }, + "load_args": { + "type": "object", + "description": "Additional options for loading Parquet file(s).\nHere you can find all available arguments when reading single file:\nhttps://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_parquet.html\nHere you can find all available arguments when reading partitioned datasets:\nhttps://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html#pyarrow.parquet.ParquetDataset.read\nAll defaults are preserved." + }, + "save_args": { + "type": "object", + "description": "Additional saving options for `pyarrow.parquet.write_table` and\n`pyarrow.Table.from_pandas`.\nHere you can find all available arguments for `write_table()`:\nhttps://arrow.apache.org/docs/python/generated/pyarrow.parquet.write_table.html?highlight=write_table#pyarrow.parquet.write_table\nThe arguments for `from_pandas()` should be passed through a nested\nkey: `from_pandas`. E.g.: `save_args = {\"from_pandas\": {\"preserve_index\": False}}`\nHere you can find all available arguments for `from_pandas()`:\nhttps://arrow.apache.org/docs/python/generated/pyarrow.Table.html#pyarrow.Table.from_pandas" + }, + "credentials": { + "type": [ + "object", + "string" + ], + "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." + }, + "fs_args": { + "type": "object", + "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved." + } + } + } + }, + { + "if": { + "properties": { + "type": { + "const": "pandas.SQLTableDataset" + } + } + }, + "then": { + "required": [ + "table_name", + "credentials" + ], + "properties": { + "table_name": { + "type": "string", + "description": "The table name to load or save data to. It\noverwrites name in ``save_args`` and ``table_name``\nparameters in ``load_args``." + }, + "credentials": { + "type": [ + "object", + "string" + ], + "description": "A dictionary with a ``SQLAlchemy`` connection string.\nUsers are supposed to provide the connection string 'con'\nthrough credentials. It overwrites `con` parameter in\n``load_args`` and ``save_args`` in case it is provided. To find\nall supported connection string formats, see here:\nhttps://docs.sqlalchemy.org/en/13/core/engines.html#database-urls" + }, + "load_args": { + "type": "object", + "description": "Provided to underlying pandas ``read_sql_table``\nfunction along with the connection string.\nTo find all supported arguments, see here:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_sql_table.html\nTo find all supported connection string formats, see here:\nhttps://docs.sqlalchemy.org/en/13/core/engines.html#database-urls" + }, + "save_args": { + "type": "object", + "description": "Provided to underlying pandas ``to_sql`` function along\nwith the connection string.\nTo find all supported arguments, see here:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_sql.html\nTo find all supported connection string formats, see here:\nhttps://docs.sqlalchemy.org/en/13/core/engines.html#database-urls\nIt has ``index=False`` in the default parameters." + } + } + } + }, + { + "if": { + "properties": { + "type": { + "const": "pandas.SQLQueryDataset" + } + } + }, + "then": { + "required": [ + "sql", + "credentials" + ], + "properties": { + "sql": { + "type": "string", + "description": "The sql query statement." + }, + "credentials": { + "type": [ + "object", + "string" + ], + "description": "A dictionary with a ``SQLAlchemy`` connection string.\nUsers are supposed to provide the connection string 'con'\nthrough credentials. It overwrites `con` parameter in\n``load_args`` and ``save_args`` in case it is provided. To find\nall supported connection string formats, see here:\nhttps://docs.sqlalchemy.org/en/13/core/engines.html#database-urls" + }, + "load_args": { + "type": "object", + "description": "Provided to underlying pandas ``read_sql_query``\nfunction along with the connection string.\nTo find all supported arguments, see here:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_sql_query.html\nTo find all supported connection string formats, see here:\nhttps://docs.sqlalchemy.org/en/13/core/engines.html#database-urls" + }, + "execution_options": { + "type": "object", + "description": "A dictionary with non-SQL options for the connection\nto be applied to the underlying engine.\nTo find all supported execution options, see here:\nhttps://docs.sqlalchemy.org/en/12/core/connections.html#sqlalchemy.engine.Connection.execution_options \nNote that this is not a standard argument supported by pandas API, but could be useful for handling large datasets." + } + } + } + }, + { + "if": { + "properties": { + "type": { + "const": "pandas.XMLDataset" + } + } + }, + "then": { + "required": [ + "filepath" + ], + "properties": { + "filepath": { + "type": "string", + "description": "Filepath in POSIX format to a XML file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." + }, + "load_args": { + "type": "object", + "description": "Pandas options for loading XML files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_xml.html\nAll defaults are preserved." + }, + "save_args": { + "type": "object", + "description": "Pandas options for saving XML files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_xml.html\nAll defaults are preserved, but \"index\", which is set to False." + }, + "credentials": { + "type": [ + "object", + "string" + ], + "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." + }, + "fs_args": { + "type": "object", + "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving." + } + } + } + }, + { + "if": { + "properties": { + "type": { + "const": "pickle.PickleDataset" + } + } + }, + "then": { + "required": [ + "filepath" + ], + "properties": { + "filepath": { + "type": "string", + "description": "Filepath in POSIX format to a Pickle file prefixed with a protocol like\n`s3://`. If prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." + }, + "backend": { + "type": "string", + "description": "Backend to use, must be one of ['pickle', 'joblib']. Defaults to 'pickle'." + }, + "load_args": { + "type": "object", + "description": "Pickle options for loading pickle files.\nHere you can find all available arguments for different backends:\npickle.load: https://docs.python.org/3/library/pickle.html#pickle.load\njoblib.load: https://joblib.readthedocs.io/en/latest/generated/joblib.load.html\nAll defaults are preserved." + }, + "save_args": { + "type": "object", + "description": "Pickle options for saving pickle files.\nHere you can find all available arguments for different backends:\npickle.dump: https://docs.python.org/3/library/pickle.html#pickle.dump\njoblib.dump: https://joblib.readthedocs.io/en/latest/generated/joblib.dump.html\nAll defaults are preserved." + }, + "credentials": { + "type": [ + "object", + "string" + ], + "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." + }, + "fs_args": { + "type": "object", + "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `wb` when saving." + } + } + } + }, + { + "if": { + "properties": { + "type": { + "const": "pillow.ImageDataset" + } + } + }, + "then": { + "required": [ + "filepath" + ], + "properties": { + "filepath": { + "type": "string", + "description": "Filepath in POSIX format to an image file prefixed with a protocol like\n`s3://`. If prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." + }, + "save_args": { + "type": "object", + "description": "Pillow options for saving image files.\nHere you can find all available arguments:\nhttps://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.Image.save\nAll defaults are preserved." + }, + "credentials": { + "type": [ + "object", + "string" + ], + "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." + }, + "fs_args": { + "type": "object", + "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving." + } + } + } + }, + { + "if": { + "properties": { + "type": { + "const": "plotly.PlotlyDataset" + } + } + }, + "then": { + "required": [ + "filepath", + "plotly_args" + ], + "properties": { + "filepath": { + "type": "string", + "description": "Filepath in POSIX format to a JSON file prefixed with a protocol like `s3://`.\nIf prefix is not provided `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." + }, + "plotly_args": { + "type": "object", + "description": "Plotly configuration for generating a plotly graph object Figure\nrepresenting the plotted data." + }, + "load_args": { + "type": "object", + "description": "Plotly options for loading JSON files.\nHere you can find all available arguments:\nhttps://plotly.com/python-api-reference/generated/plotly.io.from_json.html#plotly.io.from_json\nAll defaults are preserved." + }, + "save_args": { + "type": "object", + "description": "Plotly options for saving JSON files.\nHere you can find all available arguments:\nhttps://plotly.com/python-api-reference/generated/plotly.io.write_json.html\nAll defaults are preserved, but \"index\", which is set to False." + }, + "credentials": { + "type": [ + "object", + "string" + ], + "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{'token': None}`." + }, + "fs_args": { + "type": "object", + "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested key `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `wb` when saving." + } + } + } + }, + { + "if": { + "properties": { + "type": { + "const": "redis.PickleDataset" + } + } + }, + "then": { + "required": [ + "key" + ], + "properties": { + "key": { + "type": "string", + "description": "The key to use for saving/loading object to Redis." + }, + "backend": { + "type": "string", + "description": "Backend to use, must be an import path to a module which satisfies the ``pickle`` interface.\nThat is, contains a `loads` and `dumps` function. Defaults to 'pickle'." + }, + "load_args": { + "type": "object", + "description": "Pickle options for loading pickle files.\nHere you can find all available arguments:\nhttps://docs.python.org/3/library/pickle.html#pickle.loads\nAll defaults are preserved." + }, + "save_args": { + "type": "object", + "description": "Pickle options for saving pickle files.\nHere you can find all available arguments:\nhttps://docs.python.org/3/library/pickle.html#pickle.dumps\nAll defaults are preserved." + }, + "credentials": { + "type": "object", + "description": "Credentials required to get access to the redis server." + }, + "redis_args": { + "type": "object", + "description": "Extra arguments to pass into the redis client constructor ``redis.StrictRedis.from_url``, as well as to pass to the ``redis.StrictRedis.set``" + } + } + } + }, + { + "if": { + "properties": { + "type": { + "const": "spark.SparkDataset" + } + } + }, + "then": { + "required": [ + "filepath" + ], + "properties": { + "filepath": { + "type": "string", + "description": "Filepath in POSIX format to a Spark dataframe. When using Databricks\nand working with data written to mount path points,\nspecify ``filepath``s for (versioned) ``SparkDataset``s\nstarting with ``/dbfs/mnt``." + }, + "file_format": { + "type": "string", + "description": "File format used during load and save\noperations. These are formats supported by the running\nSparkContext include parquet, csv. For a list of supported\nformats please refer to Apache Spark documentation at\nhttps://spark.apache.org/docs/latest/sql-programming-guide.html" + }, + "load_args": { + "type": "object", + "description": "Load args passed to Spark DataFrameReader load method.\nIt is dependent on the selected file format. You can find\na list of read options for each supported format\nin Spark DataFrame read documentation:\nhttps://spark.apache.org/docs/latest/api/python/reference/api/pyspark.sql.DataFrame.html" + }, + "save_args": { + "type": "object", + "description": "Save args passed to Spark DataFrame write options.\nSimilar to load_args this is dependent on the selected file\nformat. You can pass ``mode`` and ``partitionBy`` to specify\nyour overwrite mode and partitioning respectively. You can find\na list of options for each format in Spark DataFrame\nwrite documentation:\nhttps://spark.apache.org/docs/latest/api/python/reference/api/pyspark.sql.DataFrame.html" + }, + "credentials": { + "type": [ + "object", + "string" + ], + "description": "Credentials to access the S3 bucket, such as\n``key``, ``secret``, if ``filepath`` prefix is ``s3a://`` or ``s3n://``.\nOptional keyword arguments passed to ``hdfs.client.InsecureClient``\nif ``filepath`` prefix is ``hdfs://``. Ignored otherwise." + } + } + } + }, + { + "if": { + "properties": { + "type": { + "const": "spark.SparkHiveDataset" + } + } + }, + "then": { + "required": [ + "database", + "table", + "write_mode" + ], + "properties": { + "database": { + "type": "string", + "description": "The name of the hive database." + }, + "table": { + "type": "string", + "description": "The name of the table within the database." + }, + "write_mode": { + "type": "string", + "description": "``insert``, ``upsert`` or ``overwrite`` are supported." + }, + "table_pk": { + "type": "array", + "description": "If performing an upsert, this identifies the primary key columns used to\nresolve preexisting data. Is required for ``write_mode=\"upsert\"``." + } + } + } + }, + { + "if": { + "properties": { + "type": { + "const": "spark.SparkJDBCDataset" + } + } + }, + "then": { + "required": [ + "url", + "table" + ], + "properties": { + "url": { + "type": "string", + "description": "A JDBC URL of the form ``jdbc:subprotocol:subname``." + }, + "table": { + "type": "string", + "description": "The name of the table to load or save data to." + }, + "credentials": { + "type": [ + "object", + "string" + ], + "description": "A dictionary of JDBC database connection arguments.\nNormally at least properties ``user`` and ``password`` with\ntheir corresponding values. It updates ``properties``\nparameter in ``load_args`` and ``save_args`` in case it is\nprovided." + }, + "load_args": { + "type": "object", + "description": "Provided to underlying PySpark ``jdbc`` function along\nwith the JDBC URL and the name of the table. To find all\nsupported arguments, see here:\nhttps://spark.apache.org/docs/latest/api/python/reference/api/pyspark.sql.DataFrameReader.jdbc.html" + }, + "save_args": { + "type": "object", + "description": "Provided to underlying PySpark ``jdbc`` function along\nwith the JDBC URL and the name of the table. To find all\nsupported arguments, see here:\nhttps://spark.apache.org/docs/latest/api/python/reference/api/pyspark.sql.DataFrameWriter.jdbc.html" + } + } + } + }, + { + "if": { + "properties": { + "type": { + "const": "tensorflow.TensorFlowModelDataset" + } + } + }, + "then": { + "required": [ + "filepath" + ], + "properties": { + "filepath": { + "type": "string", + "description": "Filepath in POSIX format to a TensorFlow model directory prefixed with a\nprotocol like `s3://`. If prefix is not provided `file` protocol (local filesystem)\nwill be used. The prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." + }, + "load_args": { + "type": "object", + "description": "TensorFlow options for loading models.\nHere you can find all available arguments:\nhttps://www.tensorflow.org/api_docs/python/tf/keras/models/load_model\nAll defaults are preserved." + }, + "save_args": { + "type": "object", + "description": "TensorFlow options for saving models.\nHere you can find all available arguments:\nhttps://www.tensorflow.org/api_docs/python/tf/keras/models/save_model\nAll defaults are preserved, except for \"save_format\", which is set to \"tf\"." + }, + "credentials": { + "type": [ + "object", + "string" + ], + "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{'token': None}`." + }, + "fs_args": { + "type": "object", + "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``)." + } + } + } + }, + { + "if": { + "properties": { + "type": { + "const": "text.TextDataset" + } + } + }, + "then": { + "required": [ + "filepath" + ], + "properties": { + "filepath": { + "type": "string", + "description": "Filepath in POSIX format to a text file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." + }, + "credentials": { + "type": [ + "object", + "string" + ], + "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." + }, + "fs_args": { + "type": "object", + "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving." + } + } + } + }, + { + "if": { + "properties": { + "type": { + "const": "tracking.JSONDataset" + } + } + }, + "then": { + "required": [ + "filepath" + ], + "properties": { + "filepath": { + "type": "string", + "description": "Filepath in POSIX format to a text file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." + }, + "save_args": { + "type": "object", + "description": "json options for saving JSON files (arguments passed\ninto ```json.dump``). Here you can find all available arguments:\nhttps://docs.python.org/3/library/json.html\nAll defaults are preserved, but \"default_flow_style\", which is set to False." + }, + "credentials": { + "type": [ + "object", + "string" + ], + "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." + }, + "fs_args": { + "type": "object", + "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving." + } + } + } + }, + { + "if": { + "properties": { + "type": { + "const": "tracking.MetricsDataset" + } + } + }, + "then": { + "required": [ + "filepath" + ], + "properties": { + "filepath": { + "type": "string", + "description": "Filepath in POSIX format to a text file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." + }, + "save_args": { + "type": "object", + "description": "json options for saving JSON files (arguments passed\ninto ```json.dump``). Here you can find all available arguments:\nhttps://docs.python.org/3/library/json.html\nAll defaults are preserved, but \"default_flow_style\", which is set to False." + }, + "credentials": { + "type": [ + "object", + "string" + ], + "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." + }, + "fs_args": { + "type": "object", + "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving." + } + } + } + }, + { + "if": { + "properties": { + "type": { + "const": "yaml.YAMLDataset" + } + } + }, + "then": { + "required": [ + "filepath" + ], + "properties": { + "filepath": { + "type": "string", + "description": "Filepath in POSIX format to a YAML file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." + }, + "save_args": { + "type": "object", + "description": "PyYAML options for saving YAML files (arguments passed\ninto ```yaml.dump``). Here you can find all available arguments:\nhttps://pyyaml.org/wiki/PyYAMLDocumentation\nAll defaults are preserved, but \"default_flow_style\", which is set to False." + }, + "credentials": { + "type": [ + "object", + "string" + ], + "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." + }, + "fs_args": { + "type": "object", + "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving." + } + } + } + } + ] + } + } +}