Move deprecated integrations docs to flyte (flyteorg#5283)

* move deprecated integrations docs to flyte Signed-off-by: nikki everett <[email protected]> * add redirect, exclude deprecated_integrations file Signed-off-by: nikki everett <[email protected]> --------- Signed-off-by: nikki everett <[email protected]>
austin362667 · May 7, 2024 · 9cd6f55 · 9cd6f55
1 parent db3bcd2
commit 9cd6f55
Show file tree

Hide file tree

Showing 11 changed files with 493 additions and 14 deletions.
diff --git a/docs/conf.py b/docs/conf.py
@@ -94,6 +94,19 @@
     "cookbook": ("https://flytecookbook.readthedocs.io/en/latest/", None),
 }
 
+# redirects
+redirects = {
+    "flytesnacks/deprecated_integrations": "../deprecated_integrations/index.html",
+    "flytesnacks/examples/bigquery_plugin/index": "../../../deprecated_integrations/bigquery_plugin/index.html",
+    "flytesnacks/examples/bigquery_plugin/bigquery_plugin_example": "../../../deprecated_integrations/bigquery_plugin/biquery_plugin_example.html",
+    "flytesnacks/examples/databricks_plugin/index": "../../../deprecated_integrations/databricks_plugin/index.html",
+    "flytesnacks/examples/databricks_plugin/databricks_plugin_example": "../../../deprecated_integrations/databricks_plugin/databricks_plugin_example.html",
+    "flytesnacks/examples/mmcloud_plugin/index": "../../../deprecated_integrations/mmcloud_plugin/index.html",
+    "flytesnacks/examples/mmcloud_plugin/mmcloud_plugin_example": "../../../deprecated_integrations/mmcloud_plugin/mmcloud_plugin_example.html",
+    "flytesnacks/examples/snowflake_plugin/index": "../../../deprecated_integrations/snowflake_plugin/index.html",
+    "flytesnacks/examples/snowflake_plugin/snowflake_plugin_example": "../../../deprecated_integrations/snowflake_plugin/snowflake_plugin_example.html",
+}
+
 
 autosummary_generate = True
 suppress_warnings = ["autosectionlabel.*", "myst.header"]
@@ -131,6 +144,7 @@
     "flytesnacks/feature_engineering.md",
     "flytesnacks/flyte_lab.md",
     "flytesnacks/ml_training.md",
+    "flytesnacks/deprecated_integrations.md",
     "flytesnacks/README.md",
     "flytekit/**/README.md",
     "flytekit/_templates/**",
@@ -341,8 +355,7 @@
     "source_regex_mapping": REPLACE_PATTERNS,
     "list_table_toc": [
        "flytesnacks/tutorials",
-       "flytesnacks/integrations",
-       "flytesnacks/deprecated_integrations"
+       "flytesnacks/integrations"
     ],
     "dev_build": bool(int(os.environ.get("MONODOCS_DEV_BUILD", 1))),
 }
@@ -372,9 +385,6 @@
                 "flytesnacks/auto_examples",
                 "flytesnacks/_build",
                 "flytesnacks/_tags",
-                "flytesnacks/getting_started",
-                "flytesnacks/userguide.md",
-                "flytesnacks/environment_setup.md",
                 "flytesnacks/index.md",
                 "examples/advanced_composition",
                 "examples/basics",
@@ -384,14 +394,10 @@
                 "examples/extending",
                 "examples/productionizing",
                 "examples/testing",
-                "flytesnacks/examples/advanced_composition",
-                "flytesnacks/examples/basics",
-                "flytesnacks/examples/customizing_dependencies",
-                "flytesnacks/examples/data_types_and_io",
-                "flytesnacks/examples/development_lifecycle",
-                "flytesnacks/examples/extending",
-                "flytesnacks/examples/productionizing",
-                "flytesnacks/examples/testing",
+                "examples/bigquery_plugin",
+                "examples/databricks_plugin",
+                "examples/mmcloud_plugin",
+                "examples/snowflake_plugin",
             ]
         ],
         "local": flytesnacks_local_path is not None,

diff --git a/docs/deprecated_integrations/bigquery_plugin/bigquery_plugin_example.md b/docs/deprecated_integrations/bigquery_plugin/bigquery_plugin_example.md
@@ -0,0 +1,44 @@
+(bigquery_plugin_example)=
+# BigQuery example query
+
+This example shows how to use a Flyte BigQueryTask to execute a query.
+
+```{note}
+To clone and run the example code on this page, see the [Flytesnacks repo][flytesnacks].
+```
+
+```{rli} https://raw.githubusercontent.com/flyteorg/flytesnacks/master/examples/bigquery_plugin/bigquery_plugin/bigquery_plugin_example.py
+:caption: bigquery_plugin/bigquery_plugin_example.py
+:lines: 1-8
+```
+
+This is the world's simplest query. Note that in order for registration to work properly, you'll need to give your BigQuery task a name that's unique across your project/domain for your Flyte installation.
+
+```{rli} https://raw.githubusercontent.com/flyteorg/flytesnacks/master/examples/bigquery_plugin/bigquery_plugin/bigquery_plugin_example.py
+:caption: bigquery_plugin/bigquery_plugin_example.py
+:lines: 12-22
+```
+
+Of course, in real world applications we are usually more interested in using BigQuery to query a dataset.
+In this case we use crypto_dogecoin data which is public dataset in BigQuery.
+[here](https://console.cloud.google.com/bigquery?project=bigquery-public-data&page=table&d=crypto_dogecoin&p=bigquery-public-data&t=transactions)
+
+Let's look out how we can parameterize our query to filter results for a specific transaction version, provided as a user input
+specifying a version.
+
+```{rli} https://raw.githubusercontent.com/flyteorg/flytesnacks/master/examples/bigquery_plugin/bigquery_plugin/bigquery_plugin_example.py
+:caption: bigquery_plugin/bigquery_plugin_example.py
+:lines: 25-34
+```
+
+StructuredDataset transformer can convert query result to pandas dataframe here.
+We can also change "pandas.dataframe" to "pyarrow.Table", and convert result to Arrow table.
+
+```{rli} https://raw.githubusercontent.com/flyteorg/flytesnacks/master/examples/bigquery_plugin/bigquery_plugin/bigquery_plugin_example.py
+:caption: bigquery_plugin/bigquery_plugin_example.py
+:lines: 37-45
+```
+
+Check query result on bigquery console: `https://console.cloud.google.com/bigquery`
+
+[flytesnacks]: https://github.com/flyteorg/flytesnacks/tree/master/examples/bigquery_plugin
diff --git a/docs/deprecated_integrations/bigquery_plugin/index.md b/docs/deprecated_integrations/bigquery_plugin/index.md
@@ -0,0 +1,39 @@
+(bigquery_plugin)=
+
+# BigQuery plugin
+
+```{note}
+
+This is a legacy implementation of the BigQuery integration. We recommend using the {ref}`BigQuery agent <bigquery_agent>` instead.
+
+```
+
+## Installation
+
+To install the BigQuery plugin, run the following command:
+
+```{eval-rst}
+.. prompt:: bash
+
+    pip install flytekitplugins-bigquery
+```
+
+This plugin is purely a spec. Since SQL is completely portable, there is no need to build a Docker container.
+
+## Example usage
+
+For a usage example, see the {ref}`BigQuery example query<bigquery_plugin_example>` page.
+
+## Flyte deployment configuration
+
+BigQuery plugins are [enabled in FlytePropeller's config](https://docs.flyte.org/en/latest/deployment/plugin_setup/gcp/bigquery.html#deployment-plugin-setup-gcp-bigquery).
+
+To run the BigQuery plugin on a Flyte cluster, you must configure it in your Flyte deployment. For more information, see the {ref}`BigQuery plugin setup guide <deployment-plugin-setup-gcp-bigquery>`.
+
+```{toctree}
+:maxdepth: -1
+:hidden:
+
+bigquery_plugin_example
+
+```
diff --git a/docs/deprecated_integrations/databricks_plugin/databricks_plugin_example.md b/docs/deprecated_integrations/databricks_plugin/databricks_plugin_example.md
@@ -0,0 +1,55 @@
+(spark_on_databricks_plugin)=
+
+# Running Spark on Databricks
+
+```{note}
+To clone and run the example code on this page, see the [Flytesnacks repo][flytesnacks].
+```
+
+To begin, import the required dependencies:
+
+```{rli} https://raw.githubusercontent.com/flyteorg/flytesnacks/master/examples/databricks_plugin/databricks_plugin/databricks_plugin_example.py
+:caption: databricks_plugin/databricks_plugin_example.py
+:lines: 1-7
+```
+
+To run a Spark job on the Databricks platform, simply include Databricks configuration in the task config.
+The Databricks config is the same as the Databricks job request. For more details, please refer to the
+[Databricks job request](https://docs.databricks.com/dev-tools/api/2.0/jobs.html#request-structure) documentation.
+
+```{rli} https://raw.githubusercontent.com/flyteorg/flytesnacks/master/examples/databricks_plugin/databricks_plugin/databricks_plugin_example.py
+:caption: databricks_plugin/databricks_plugin_example.py
+:pyobject: hello_spark
+```
+
+For this particular example,
+we define a function that executes the map-reduce operation within the Spark cluster.
+
+```{rli} https://raw.githubusercontent.com/flyteorg/flytesnacks/master/examples/databricks_plugin/databricks_plugin/databricks_plugin_example.py
+:caption: databricks_plugin/databricks_plugin_example.py
+:pyobject: f
+```
+
+Additionally, we define a standard Flyte task that won't be executed on the Spark cluster.
+
+```{rli} https://raw.githubusercontent.com/flyteorg/flytesnacks/master/examples/databricks_plugin/databricks_plugin/databricks_plugin_example.py
+:caption: databricks_plugin/databricks_plugin_example.py
+:pyobject: print_every_time
+```
+
+Finally, define a workflow that connects your tasks in a sequence.
+Remember, Spark and non-Spark tasks can be chained together as long as their parameter specifications match.
+
+```{rli} https://raw.githubusercontent.com/flyteorg/flytesnacks/master/examples/databricks_plugin/databricks_plugin/databricks_plugin_example.py
+:caption: databricks_plugin/databricks_plugin_example.py
+:pyobject: my_databricks_job
+```
+
+You can execute the workflow locally.
+
+```{rli} https://raw.githubusercontent.com/flyteorg/flytesnacks/master/examples/databricks_plugin/databricks_plugin/databricks_plugin_example.py
+:caption: databricks_plugin/databricks_plugin_example.py
+:lines: 79-83
+```
+
+[flytesnacks]: https://github.com/flyteorg/flytesnacks/tree/master/examples/databricks_plugin
diff --git a/docs/deprecated_integrations/databricks_plugin/index.md b/docs/deprecated_integrations/databricks_plugin/index.md
@@ -0,0 +1,62 @@
+# Databricks plugin
+
+```{eval-rst}
+.. tags:: Spark, Integration, DistributedComputing, Data, Advanced
+```
+
+```{note}
+
+This is a legacy implementation of the Databricks integration. We recommend using the {ref}`Databricks agent <databricks_agent>` instead.
+
+```
+
+Flyte can be integrated with the [Databricks](https://www.databricks.com/) service,
+enabling you to submit Spark jobs to the Databricks platform.
+
+## Installation
+
+The Databricks plugin comes bundled with the Spark plugin. To install the Spark plugin, run the following command:
+
+```
+pip install flytekitplugins-spark
+
+```
+
+## Flyte deployment configuration
+
+To run the Databricks plugin on a Flyte cluster, you must configure it in your Flyte deployment. For more information, see the
+{std:ref}`Databricks plugin setup guide <flyte:deployment-plugin-setup-webapi-databricks>`.
+
+## Example usage
+
+For a usage example, see the {doc}`Databricks plugin example <databricks_plugin_example>` page.
+
+### Run the example on the Flyte cluster
+
+To run the provided example on a Flyte cluster, use the following command:
+
+```
+pyflyte run --remote \
+  --image ghcr.io/flyteorg/flytecookbook:databricks_plugin-latest \
+  https://raw.githubusercontent.com/flyteorg/flytesnacks/master/examples/databricks_plugin/databricks_plugin/databricks_job.py \
+  my_databricks_job
+```
+
+Using Spark on Databricks allows comprehensive versioning through a
+custom-built Spark container. This container also facilitates the execution of standard Spark tasks.
+
+To use Spark, the image should employ a base image provided by Databricks,
+and the workflow code must be copied to `/databricks/driver`.
+
+```{rli} https://raw.githubusercontent.com/flyteorg/flytesnacks/master/examples/databricks_plugin/Dockerfile
+:language: docker
+:emphasize-lines: 1,7-8,20
+```
+
+
+```{toctree}
+:maxdepth: -1
+:hidden:
+
+databricks_plugin_example
+```
diff --git a/docs/deprecated_integrations/index.md b/docs/deprecated_integrations/index.md
@@ -0,0 +1,25 @@
+# Deprecated integrations
+
+```{list-table}
+:header-rows: 0
+:widths: 20 30
+
+* - {doc}`BigQuery plugin <bigquery_plugin/index>`
+  - Deprecated BigQuery plugin.
+* - {doc}`Databricks <databricks_plugin/index>`
+  - Deprecated Databricks plugin.
+* - {doc}`Memory Machine Cloud <mmcloud_plugin/index>`
+  - Deprecated MemVerge Memory Machine Cloud plugin.
+* - {doc}`Snowflake <snowflake_plugin/index>`
+  - Deprecated Snowflake plugin.
+```
+
+```{toctree}
+:maxdepth: 1
+:hidden:
+
+bigquery_plugin/index
+databricks_plugin/index
+mmcloud_plugin/index
+snowflake_plugin/index
+```
diff --git a/docs/deprecated_integrations/mmcloud_plugin/index.md b/docs/deprecated_integrations/mmcloud_plugin/index.md
@@ -0,0 +1,98 @@
+```{eval-rst}
+.. tags:: AWS, GCP, AliCloud, Integration, Advanced
+```
+
+(mmcloud_plugin)=
+
+# Memory Machine Cloud plugin
+
+```{note}
+
+This is a legacy implementation of the Memory Machine Cloud integration. We recommend using the {ref}`Memory Machine Cloud agent <mmcloud_agent>` instead.
+
+```
+
+[MemVerge](https://memverge.com/) [Memory Machine Cloud](https://www.mmcloud.io/) (MMCloud)—available on AWS, GCP, and AliCloud—empowers users to continuously optimize cloud resources during runtime, safely execute stateful tasks on spot instances, and monitor resource usage in real time. These capabilities make it an excellent fit for long-running batch workloads. Flyte can be integrated with MMCloud, allowing you to execute Flyte tasks using MMCloud.
+
+## Installation
+
+To install the plugin, run the following command:
+
+```{eval-rst}
+.. prompt:: bash
+
+    pip install flytekitplugins-mmcloud
+```
+
+To get started with MMCloud, see the [MMCloud user guide](https://docs.memverge.com/mmce/current/userguide/olh/index.html).
+
+## Flyte deployment configuration
+
+The MMCloud plugin is [enabled in FlytePropeller's configuration](https://docs.flyte.org/en/latest/deployment/plugins/memverge/mmcloud.html).
+
+## Getting Started
+
+This plugin allows executing `PythonFunctionTask` using MMCloud without changing any function code.
+
+```{eval-rst}
+.. testcode:: awsbatch-quickstart
+    from flytekitplugins.mmcloud import MMCloudConfig
+
+    @task(task_config=MMCloudConfig())
+    def to_str(i: int) -> str:
+        return str(i)
+```
+
+[Resource](https://docs.flyte.org/en/latest/user_guide/productionizing/customizing_task_resources.html) (cpu and mem) requests and limits, [container](https://docs.flyte.org/en/latest/user_guide/customizing_dependencies/multiple_images_in_a_workflow.html) images, and environment variable specifications are supported.
+
+[ImageSpec](https://docs.flyte.org/en/latest/user_guide/customizing_dependencies/imagespec.html#image-spec-example) may be used to define images to run tasks.
+
+### Credentials
+
+The following [secrets](https://docs.flyte.org/en/latest/user_guide/productionizing/secrets.html) are required to be defined for the agent server:
+* `mmc_address`: MMCloud OpCenter address
+* `mmc_username`: MMCloud OpCenter username
+* `mmc_password`: MMCloud OpCenter password
+
+### Defaults
+
+Compute resources:
+* If only requests are specified, there are no limits.
+* If only limits are specified, the requests are equal to the limits.
+* If neither resource requests nor limits are specified, the default requests used for job submission are `cpu="1"` and `mem="1Gi"`, and there are no limits.
+
+### Agent Image
+
+Install `flytekitplugins-mmcloud` in the agent image.
+
+A `float` binary (obtainable via the OpCenter) is required. Copy it to the agent image `PATH`.
+
+Sample `Dockerfile` for building an agent image:
+```dockerfile
+FROM python:3.11-slim-bookworm
+
+WORKDIR /root
+ENV PYTHONPATH /root
+
+# flytekit will autoload the agent if package is installed.
+RUN pip install flytekitplugins-mmcloud
+COPY float /usr/local/bin/float
+
+# For flytekit versions <= v1.10.2, use pyflyte serve.
+# CMD pyflyte serve --port 8000
+# For flytekit versions > v1.10.2, use pyflyte serve agent.
+CMD pyflyte serve agent --port 8000
+```
+
+## Example usage
+
+For a usage example, see the {doc}`Memory Machine Cloud example<mmcloud_plugin_example>` page.
+
+
+
+```{toctree}
+:maxdepth: -1
+:hidden:
+
+mmcloud_plugin_example
+```