From 251a91bbfb2f08ee4d20a9f1b71421cf6bc086fa Mon Sep 17 00:00:00 2001 From: kathweinschenkprophecy Date: Wed, 11 Dec 2024 21:21:42 +0000 Subject: [PATCH] deploy: ffae0c7372531bdc20fd552ed332db7afda7e98a --- 404.html | 11 +++++------ Orchestration/airflow/index.html | 11 +++++------ Orchestration/airflow/setup/MWAA_fabric/index.html | 11 +++++------ .../airflow/setup/composer_fabric/index.html | 11 +++++------ Orchestration/airflow/setup/index.html | 11 +++++------ .../setup/prophecy-managed/connections/index.html | 11 +++++------ .../index.html | 11 +++++------ .../index.html | 11 +++++------ .../index.html | 11 +++++------ .../index.html | 11 +++++------ .../index.html | 11 +++++------ .../airflow/setup/prophecy-managed/index.html | 11 +++++------ .../index.html | 11 +++++------ Orchestration/alternative-schedulers/index.html | 11 +++++------ Orchestration/databricks-jobs/index.html | 11 +++++------ Orchestration/index.html | 11 +++++------ SQL/data-tests/index.html | 11 +++++------ SQL/data-tests/use-model-tests/index.html | 11 +++++------ SQL/data-tests/use-project-tests/index.html | 11 +++++------ SQL/development/code-editor/index.html | 11 +++++------ SQL/development/index.html | 11 +++++------ SQL/development/target-models/index.html | 11 +++++------ SQL/development/target-models/location/index.html | 11 +++++------ SQL/development/target-models/schema/index.html | 11 +++++------ SQL/development/target-models/sql-query/index.html | 11 +++++------ .../target-models/type-and-format/index.html | 11 +++++------ .../target-models/write-options/index.html | 11 +++++------ SQL/development/visual-editor/index.html | 11 +++++------ .../visual-editor/variant-schema/index.html | 11 +++++------ .../visual-expression-builder/index.html | 11 +++++------ .../use-the-expression-builder/index.html | 11 +++++------ .../visual-expression-builder-reference/index.html | 11 +++++------ SQL/execution/data-explorer/index.html | 11 +++++------ SQL/execution/index.html | 11 +++++------ SQL/extensibility/dependencies/index.html | 11 +++++------ SQL/extensibility/gem-builder/index.html | 11 +++++------ SQL/extensibility/index.html | 11 +++++------ SQL/fabrics/databricks/index.html | 11 +++++------ SQL/fabrics/index.html | 11 +++++------ SQL/fabrics/snowflake/index.html | 11 +++++------ SQL/gems/custom/index.html | 13 ++++++------- SQL/gems/data-joins/index.html | 13 ++++++------- SQL/gems/datasources/index.html | 11 +++++------ SQL/gems/datasources/upload-files/index.html | 11 +++++------ SQL/gems/index.html | 11 +++++------ SQL/gems/subgraph/index.html | 13 ++++++------- SQL/gems/transform/deduplicate/index.html | 13 ++++++------- SQL/gems/transform/flattenschema/index.html | 13 ++++++------- SQL/gems/transform/index.html | 11 +++++------ SQL/gems/transform/sql-aggregate/index.html | 13 ++++++------- SQL/index.html | 11 +++++------ Spark/best-practices/index.html | 11 +++++------ Spark/best-practices/use-dbx-secrets/index.html | 11 +++++------ .../configuration/conditional-execution/index.html | 11 +++++------ Spark/configuration/index.html | 11 +++++------ Spark/execution/data-explorer/index.html | 11 +++++------ Spark/execution/execution-metrics/index.html | 11 +++++------ .../executions_on_databricks_clusters/index.html | 11 +++++------ .../executions_on_livy_clusters/index.html | 11 +++++------ Spark/execution/index.html | 11 +++++------ Spark/execution/interactive-execution/index.html | 11 +++++------ Spark/expression-builder/index.html | 11 +++++------ Spark/extensibility/dependencies/index.html | 11 +++++------ Spark/extensibility/gem-builder/index.html | 11 +++++------ .../gem-builder/optimization-functions/index.html | 11 +++++------ Spark/extensibility/index.html | 11 +++++------ Spark/extensibility/udfs/index.html | 11 +++++------ Spark/fabrics/azure-synapse-fabric-guide/index.html | 11 +++++------ Spark/fabrics/databricks-fabric/index.html | 11 +++++------ .../dataproc/gcp-dataproc-fabric-tips/index.html | 11 +++++------ Spark/fabrics/dataproc/index.html | 11 +++++------ Spark/fabrics/emr/index.html | 11 +++++------ Spark/fabrics/fabric-diagnostics/index.html | 11 +++++------ Spark/fabrics/index.html | 11 +++++------ Spark/fabrics/livy/index.html | 11 +++++------ .../fabrics/prophecy-managed-databricks/index.html | 11 +++++------ Spark/gems/custom/delta-ops/index.html | 13 ++++++------- Spark/gems/custom/file-operations/index.html | 13 ++++++------- Spark/gems/custom/index.html | 11 +++++------ Spark/gems/custom/rest-api-enrich/index.html | 13 ++++++------- Spark/gems/custom/script/index.html | 13 ++++++------- Spark/gems/custom/sql-statement/index.html | 13 ++++++------- Spark/gems/index.html | 11 +++++------ Spark/gems/join-split/Repartition/index.html | 13 ++++++------- Spark/gems/join-split/compare-columns/index.html | 13 ++++++------- Spark/gems/join-split/index.html | 11 +++++------ Spark/gems/join-split/join/index.html | 13 ++++++------- Spark/gems/join-split/row-distributor/index.html | 13 ++++++------- Spark/gems/machine-learning/index.html | 11 +++++------ Spark/gems/machine-learning/ml-openai/index.html | 13 ++++++------- .../machine-learning/ml-pinecone-lookup/index.html | 13 ++++++------- .../machine-learning/ml-text-processing/index.html | 13 ++++++------- Spark/gems/source-target/advanced/lookup/index.html | 13 ++++++------- .../advanced/synthetic-data-generator/index.html | 13 ++++++------- .../synthetic-data-generator/providers/index.html | 11 +++++------ .../source-target/catalog-table/delta/index.html | 11 +++++------ .../source-target/catalog-table/hive/index.html | 11 +++++------ Spark/gems/source-target/catalog-table/index.html | 11 +++++------ Spark/gems/source-target/file/avro/index.html | 11 +++++------ Spark/gems/source-target/file/csv/index.html | 11 +++++------ Spark/gems/source-target/file/delta/index.html | 11 +++++------ .../gems/source-target/file/fixed-format/index.html | 11 +++++------ Spark/gems/source-target/file/iceberg/index.html | 11 +++++------ Spark/gems/source-target/file/index.html | 11 +++++------ Spark/gems/source-target/file/json/index.html | 11 +++++------ Spark/gems/source-target/file/kafka/index.html | 11 +++++------ Spark/gems/source-target/file/orc/index.html | 11 +++++------ Spark/gems/source-target/file/parquet/index.html | 11 +++++------ Spark/gems/source-target/file/text/index.html | 11 +++++------ Spark/gems/source-target/file/xlsx/index.html | 11 +++++------ Spark/gems/source-target/index.html | 11 +++++------ .../source-target/warehouse/bigquery/index.html | 11 +++++------ .../gems/source-target/warehouse/cosmos/index.html | 11 +++++------ Spark/gems/source-target/warehouse/db2/index.html | 11 +++++------ Spark/gems/source-target/warehouse/index.html | 11 +++++------ Spark/gems/source-target/warehouse/jdbc/index.html | 11 +++++------ .../gems/source-target/warehouse/mongodb/index.html | 11 +++++------ .../gems/source-target/warehouse/oracle/index.html | 11 +++++------ .../source-target/warehouse/redshift/index.html | 11 +++++------ .../source-target/warehouse/salesforce/index.html | 11 +++++------ .../source-target/warehouse/snowflake/index.html | 11 +++++------ .../source-target/warehouse/teradata/index.html | 11 +++++------ Spark/gems/subgraph/basic-subgraph/index.html | 13 ++++++------- Spark/gems/subgraph/index.html | 11 +++++------ Spark/gems/subgraph/table-iterator/index.html | 13 ++++++------- Spark/gems/transform/aggregate/index.html | 13 ++++++------- .../transform/bulk-column-expressions/index.html | 13 ++++++------- Spark/gems/transform/bulk-column-rename/index.html | 13 ++++++------- Spark/gems/transform/data-cleansing/index.html | 13 ++++++------- Spark/gems/transform/deduplicate/index.html | 13 ++++++------- Spark/gems/transform/dynamic-select/index.html | 13 ++++++------- Spark/gems/transform/filter/index.html | 13 ++++++------- Spark/gems/transform/flatten-schema/index.html | 13 ++++++------- Spark/gems/transform/index.html | 11 +++++------ Spark/gems/transform/limit/index.html | 13 ++++++------- Spark/gems/transform/order-by/index.html | 13 ++++++------- Spark/gems/transform/reformat/index.html | 13 ++++++------- Spark/gems/transform/schema-transform/index.html | 13 ++++++------- Spark/gems/transform/set-operation/index.html | 13 ++++++------- Spark/gems/transform/unpivot/index.html | 13 ++++++------- Spark/gems/transform/window-function/index.html | 13 ++++++------- Spark/index.html | 11 +++++------ .../enable-pipeline-monitoring/index.html | 11 +++++------ Spark/pipeline-monitoring/index.html | 11 +++++------ .../use-pipeline-monitoring/index.html | 11 +++++------ .../secret-management/databricks-secrets/index.html | 11 +++++------ Spark/secret-management/env-variable/index.html | 11 +++++------ Spark/secret-management/hashicorp-vault/index.html | 11 +++++------ Spark/secret-management/index.html | 11 +++++------ Spark/secret-management/using-secrets/index.html | 11 +++++------ Spark/spark-streaming/index.html | 11 +++++------ .../streaming-sources-and-targets/index.html | 11 +++++------ .../streaming-event-apps/index.html | 11 +++++------ .../streaming-file-apps/index.html | 11 +++++------ .../streaming-warehouse-apps/index.html | 11 +++++------ .../transformations-streaming/index.html | 11 +++++------ Spark/tests/index.html | 11 +++++------ architecture/deployment/index.html | 11 +++++------ architecture/index.html | 11 +++++------ .../authentication/active_directory/index.html | 11 +++++------ .../self-hosted/authentication/azure-ad/index.html | 11 +++++------ .../authentication/azuread-scim/index.html | 11 +++++------ architecture/self-hosted/authentication/index.html | 11 +++++------ .../self-hosted/authentication/saml-okta/index.html | 11 +++++------ .../authentication/security-settings/index.html | 11 +++++------ .../configurations/configure-alerts/index.html | 11 +++++------ .../configurations/configure-audit-logs/index.html | 11 +++++------ .../configure-object-store/index.html | 11 +++++------ architecture/self-hosted/configurations/index.html | 11 +++++------ .../configurations/sandbox-configuration/index.html | 11 +++++------ architecture/self-hosted/download-logs/index.html | 11 +++++------ .../self-hosted/generate-api-key/index.html | 11 +++++------ architecture/self-hosted/index.html | 11 +++++------ .../self-hosted/installation-helm/index.html | 11 +++++------ .../installation-helm/install-on-aws/index.html | 11 +++++------ .../self-hosted/upgrade-backup-restore/index.html | 11 +++++------ assets/css/styles.072e67d9.css | 1 - assets/css/styles.281c4cdc.css | 1 + .../{0207d280.8628b197.js => 0207d280.5cc53b80.js} | 2 +- assets/js/07e49c2d.2fda557a.js | 1 - assets/js/07e49c2d.642c6d62.js | 1 + .../{13b3561e.091b2efb.js => 13b3561e.79dc4954.js} | 2 +- assets/js/1d7b424d.a5664aae.js | 1 + assets/js/1d7b424d.b6e07850.js | 1 - assets/js/23d439be.5f3a2fdb.js | 1 - assets/js/23d439be.715e3e9d.js | 1 + assets/js/326b65c5.b5317393.js | 1 - assets/js/326b65c5.ed0f6532.js | 1 + .../{332c99fa.cbba2096.js => 332c99fa.be6a2311.js} | 2 +- .../{47cf1bcd.3f1ec781.js => 47cf1bcd.71e9a197.js} | 2 +- assets/js/486ce9f7.1b8dcbcc.js | 1 - assets/js/486ce9f7.c508bfda.js | 1 + .../{4ce6e96f.18e4256a.js => 4ce6e96f.e771c6f2.js} | 2 +- assets/js/50ddc816.41a1dded.js | 1 - assets/js/50ddc816.96a6828a.js | 1 + .../{5714fd1a.3da2ac7e.js => 5714fd1a.ca967822.js} | 2 +- .../{5b2eddc9.673748c2.js => 5b2eddc9.43f64355.js} | 2 +- assets/js/5beb85dd.5ace51e4.js | 1 - assets/js/5beb85dd.bb97cd1a.js | 1 + assets/js/61a8b9e9.799b17eb.js | 1 + assets/js/61a8b9e9.df6b2808.js | 1 - .../{6e9ec4f2.50888d14.js => 6e9ec4f2.466f344b.js} | 2 +- assets/js/77c4a354.5bb6b04f.js | 1 + assets/js/77c4a354.7dbd59b6.js | 1 - assets/js/809b845a.5682d8ab.js | 1 - assets/js/809b845a.697bc25e.js | 1 + assets/js/8ddf4ff6.80419e01.js | 1 + assets/js/8ddf4ff6.a1ff605e.js | 1 - assets/js/92b0f648.268d49ab.js | 1 - assets/js/92b0f648.8a4e717e.js | 1 + assets/js/9a47c610.b199ea30.js | 1 + assets/js/9a47c610.d2681540.js | 1 - assets/js/a135f75b.2c24fc77.js | 1 - assets/js/a135f75b.54f84a18.js | 1 + .../{a5d53aff.43621ef8.js => a5d53aff.a04c4099.js} | 2 +- .../{c327a517.6c3b4d95.js => c327a517.4088179e.js} | 2 +- assets/js/d1e881cf.7130d71d.js | 1 - assets/js/d1e881cf.a8034068.js | 1 + .../{d7087486.38b0dc27.js => d7087486.e025e5a9.js} | 2 +- .../{db3522d0.6c7d2099.js => db3522d0.4b7a81c5.js} | 2 +- .../{dca7a11b.199de914.js => dca7a11b.687f5c42.js} | 2 +- assets/js/dda96e46.272e2c52.js | 1 + assets/js/dda96e46.c89988f8.js | 1 - assets/js/e5297273.32262110.js | 1 - assets/js/e5297273.920b27aa.js | 1 + assets/js/ec996830.339685b4.js | 1 + assets/js/ec996830.a0758903.js | 1 - assets/js/f11ee91b.07a32cba.js | 1 + assets/js/f11ee91b.be73946a.js | 1 - assets/js/f29eab11.27d94399.js | 1 - assets/js/f29eab11.5672147f.js | 1 + assets/js/f779aca8.5d6c571b.js | 1 + assets/js/f779aca8.8fd6a6d1.js | 1 - assets/js/fe598bea.3b03a0a7.js | 1 + assets/js/fe598bea.8d795564.js | 1 - .../{fe6a71b8.dd8a21f3.js => fe6a71b8.d1c4ae35.js} | 2 +- .../{fec9a08a.1405b60a.js => fec9a08a.f73987e7.js} | 2 +- assets/js/{main.d1d4b5f9.js => main.5871d47d.js} | 4 ++-- ....js.LICENSE.txt => main.5871d47d.js.LICENSE.txt} | 0 ...me~main.40bd2e59.js => runtime~main.3a9e292f.js} | 2 +- concepts/copilot/copilot-ai-capabilities/index.html | 11 +++++------ concepts/copilot/copilot-data-privacy/index.html | 11 +++++------ concepts/copilot/enable-data-copilot/index.html | 11 +++++------ concepts/copilot/index.html | 11 +++++------ concepts/dataset/index.html | 11 +++++------ concepts/fabrics/index.html | 11 +++++------ concepts/fabrics/prophecy-libraries/index.html | 11 +++++------ concepts/index.html | 11 +++++------ concepts/project/Model/index.html | 11 +++++------ concepts/project/gems/index.html | 11 +++++------ concepts/project/index.html | 11 +++++------ concepts/project/pipeline/index.html | 11 +++++------ concepts/teamuser/index.html | 11 +++++------ deployment/index.html | 11 +++++------ deployment/prophecy-build-tool/index.html | 11 +++++------ .../prophecy-build-tool-github-actions/index.html | 11 +++++------ .../prophecy-build-tool-jenkins/index.html | 11 +++++------ deployment/use-external-release-tags/index.html | 11 +++++------ feature-matrix/index.html | 11 +++++------ getting-started/airflow/index.html | 11 +++++------ getting-started/gen-ai-chatbot/index.html | 11 +++++------ getting-started/getting-help/index.html | 11 +++++------ getting-started/index.html | 11 +++++------ getting-started/spark-with-databricks/index.html | 11 +++++------ getting-started/sql-with-databricks/index.html | 11 +++++------ getting-started/sql-with-snowflake/index.html | 11 +++++------ index.html | 11 +++++------ mdapi/enums/index.html | 11 +++++------ mdapi/index.html | 11 +++++------ mdapi/inputs/index.html | 11 +++++------ mdapi/types/index.html | 11 +++++------ metadata/Project Metadata/index.html | 11 +++++------ metadata/audit-logging/index.html | 11 +++++------ metadata/git/git-commit/index.html | 11 +++++------ metadata/git/git-fork/index.html | 11 +++++------ metadata/git/git-merge/index.html | 11 +++++------ metadata/git/git-resolve/index.html | 11 +++++------ metadata/git/index.html | 11 +++++------ metadata/index.html | 11 +++++------ metadata/lineage/index.html | 11 +++++------ .../lineage/lineage-run-and-diagnose/index.html | 11 +++++------ metadata/lineage/lineage-view-and-search/index.html | 11 +++++------ metadata/metadata-connections/index.html | 11 +++++------ metadata/pr-templates/index.html | 11 +++++------ metadata/prophecyAPI/index.html | 11 +++++------ package-hub/index.html | 11 +++++------ package-hub/package-builder/Gem-builder/index.html | 11 +++++------ package-hub/package-builder/index.html | 11 +++++------ .../package-builder/sharable-udfs/index.html | 11 +++++------ .../package-builder/shareable-datasets/index.html | 11 +++++------ .../package-builder/shareable-pipelines/index.html | 11 +++++------ .../package-builder/shareable-subgraphs/index.html | 11 +++++------ prophecy-ir/index.html | 11 +++++------ release_notes/2023/Apr_2023/index.html | 11 +++++------ release_notes/2023/August_2023/index.html | 11 +++++------ release_notes/2023/December_2023/index.html | 11 +++++------ release_notes/2023/Feb_2023/index.html | 11 +++++------ release_notes/2023/July_2023/index.html | 11 +++++------ release_notes/2023/June_2023/index.html | 11 +++++------ release_notes/2023/Mar_2023/index.html | 11 +++++------ release_notes/2023/May_2023/index.html | 11 +++++------ release_notes/2023/November_2023/index.html | 11 +++++------ release_notes/2023/October_2023/index.html | 11 +++++------ release_notes/2023/September_2023/index.html | 11 +++++------ release_notes/2024/April_2024/index.html | 11 +++++------ release_notes/2024/August_2024/index.html | 11 +++++------ .../August_2024/new-ui-sql-onboarding/index.html | 11 +++++------ release_notes/2024/Feb_2024/index.html | 11 +++++------ release_notes/2024/Jan_2024/index.html | 11 +++++------ release_notes/2024/July_2024/index.html | 11 +++++------ release_notes/2024/June_2024/index.html | 11 +++++------ release_notes/2024/March_2024/index.html | 11 +++++------ release_notes/2024/May_2024/index.html | 11 +++++------ release_notes/2024/November_2024/index.html | 11 +++++------ release_notes/2024/October_2024/index.html | 11 +++++------ .../webinar_new_features/ai_capabilities/index.html | 11 +++++------ .../development_highlights/index.html | 11 +++++------ .../October_2024/webinar_new_features/index.html | 11 +++++------ .../webinar_new_features/observability/index.html | 11 +++++------ release_notes/2024/September_2024/index.html | 11 +++++------ release_notes/index.html | 11 +++++------ release_notes/version_chart/index.html | 11 +++++------ .../version_chart/versions_support/index.html | 11 +++++------ search/index.html | 11 +++++------ settings/index.html | 11 +++++------ tags/active-directory/index.html | 11 +++++------ tags/ad-hoc/index.html | 11 +++++------ tags/ad/index.html | 11 +++++------ tags/admin/index.html | 11 +++++------ tags/aggregate/index.html | 11 +++++------ tags/airflow/index.html | 11 +++++------ tags/alerting/index.html | 11 +++++------ tags/alerts/index.html | 11 +++++------ tags/answer/index.html | 11 +++++------ tags/api/index.html | 11 +++++------ tags/april/index.html | 11 +++++------ tags/ascending/index.html | 11 +++++------ tags/audit-events/index.html | 11 +++++------ tags/audit-logs/index.html | 11 +++++------ tags/august/index.html | 11 +++++------ tags/authentication/index.html | 11 +++++------ tags/avro/index.html | 11 +++++------ tags/aws/index.html | 11 +++++------ tags/azure-blob-storage/index.html | 11 +++++------ tags/azure-blob/index.html | 11 +++++------ tags/azure/index.html | 11 +++++------ tags/azuread/index.html | 11 +++++------ tags/backup/index.html | 11 +++++------ tags/best-practices/index.html | 11 +++++------ tags/bigquery/index.html | 11 +++++------ tags/build/index.html | 11 +++++------ tags/capabilities/index.html | 11 +++++------ tags/catalog/index.html | 11 +++++------ tags/cdp/index.html | 11 +++++------ tags/changelog/index.html | 11 +++++------ tags/chart/index.html | 11 +++++------ tags/chatbot/index.html | 11 +++++------ tags/chunk/index.html | 11 +++++------ tags/cicd/index.html | 11 +++++------ tags/clean/index.html | 11 +++++------ tags/cli/index.html | 11 +++++------ tags/cloudera/index.html | 11 +++++------ tags/coalesce/index.html | 11 +++++------ tags/code/index.html | 11 +++++------ tags/columns/index.html | 11 +++++------ tags/commit/index.html | 11 +++++------ tags/compare-columns/index.html | 11 +++++------ tags/compare/index.html | 11 +++++------ tags/compatibility/index.html | 11 +++++------ tags/composer/index.html | 11 +++++------ tags/concept/index.html | 11 +++++------ tags/concepts/index.html | 11 +++++------ tags/conditional/index.html | 11 +++++------ tags/config/index.html | 11 +++++------ tags/configuration/index.html | 11 +++++------ tags/configurations/index.html | 11 +++++------ tags/connect/index.html | 11 +++++------ tags/connections/index.html | 11 +++++------ tags/continuous-deployment/index.html | 11 +++++------ tags/continuous-integration/index.html | 11 +++++------ tags/copilot/index.html | 11 +++++------ tags/cosmos/index.html | 11 +++++------ tags/count/index.html | 11 +++++------ tags/csv/index.html | 11 +++++------ tags/cte/index.html | 11 +++++------ tags/custom/index.html | 11 +++++------ tags/data-privacy/index.html | 11 +++++------ tags/data/index.html | 11 +++++------ tags/databricks-secrets/index.html | 11 +++++------ tags/databricks/index.html | 11 +++++------ tags/databricksworkflow/index.html | 11 +++++------ tags/dataproc/index.html | 11 +++++------ tags/datasets/index.html | 11 +++++------ tags/db-2/index.html | 11 +++++------ tags/dbfs/index.html | 11 +++++------ tags/dbt/index.html | 11 +++++------ tags/dedupe/index.html | 11 +++++------ tags/delta/index.html | 11 +++++------ tags/dependencies/index.html | 11 +++++------ tags/dependency/index.html | 11 +++++------ tags/deploy/index.html | 11 +++++------ tags/deployment/index.html | 11 +++++------ tags/descending/index.html | 11 +++++------ tags/development/index.html | 11 +++++------ tags/devops/index.html | 11 +++++------ tags/diagnose/index.html | 11 +++++------ tags/diagnostics/index.html | 11 +++++------ tags/diff/index.html | 11 +++++------ tags/difference/index.html | 11 +++++------ tags/disaster-recovery/index.html | 11 +++++------ tags/distinct/index.html | 11 +++++------ tags/download/index.html | 11 +++++------ tags/dynamic/index.html | 11 +++++------ tags/email/index.html | 11 +++++------ tags/embedding/index.html | 11 +++++------ tags/emr/index.html | 11 +++++------ tags/enterprise/index.html | 11 +++++------ tags/env-var/index.html | 11 +++++------ tags/environment/index.html | 11 +++++------ tags/execution/index.html | 11 +++++------ tags/explode/index.html | 11 +++++------ tags/explorer/index.html | 11 +++++------ tags/expression-builder/index.html | 11 +++++------ tags/expression/index.html | 11 +++++------ tags/expressions/index.html | 11 +++++------ tags/extensibility/index.html | 11 +++++------ tags/external/index.html | 11 +++++------ tags/extract/index.html | 11 +++++------ tags/fabric/index.html | 11 +++++------ tags/fabrics/index.html | 11 +++++------ tags/fake/index.html | 11 +++++------ tags/file-based/index.html | 11 +++++------ tags/file/index.html | 11 +++++------ tags/filter/index.html | 11 +++++------ tags/fixed-format/index.html | 11 +++++------ tags/flatten/index.html | 11 +++++------ tags/fork/index.html | 11 +++++------ tags/format/index.html | 11 +++++------ tags/functionality/index.html | 11 +++++------ tags/functions/index.html | 11 +++++------ tags/gcp/index.html | 11 +++++------ tags/gcs/index.html | 11 +++++------ tags/gem-builder/index.html | 11 +++++------ tags/gem/index.html | 11 +++++------ tags/gems/index.html | 11 +++++------ tags/generate/index.html | 11 +++++------ tags/generative-ai/index.html | 11 +++++------ tags/generativeai/index.html | 11 +++++------ tags/generator/index.html | 11 +++++------ tags/git/index.html | 11 +++++------ tags/github-actions/index.html | 11 +++++------ tags/google/index.html | 11 +++++------ tags/group-by/index.html | 11 +++++------ tags/group/index.html | 11 +++++------ tags/groupby/index.html | 11 +++++------ tags/guide/index.html | 11 +++++------ tags/hashicorp/index.html | 11 +++++------ tags/hdfs/index.html | 11 +++++------ tags/helm/index.html | 11 +++++------ tags/hints/index.html | 11 +++++------ tags/historical-runs/index.html | 11 +++++------ tags/hive/index.html | 11 +++++------ tags/how-to/index.html | 11 +++++------ tags/iceberg/index.html | 11 +++++------ tags/index.html | 11 +++++------ tags/infer/index.html | 11 +++++------ tags/inner/index.html | 11 +++++------ tags/installation/index.html | 11 +++++------ tags/instructions/index.html | 11 +++++------ tags/interactive/index.html | 11 +++++------ tags/interim/index.html | 11 +++++------ tags/intersect/index.html | 11 +++++------ tags/iterator/index.html | 11 +++++------ tags/jdbc/index.html | 11 +++++------ tags/jenkins/index.html | 11 +++++------ tags/job/index.html | 11 +++++------ tags/jobs/index.html | 11 +++++------ tags/join-split/index.html | 11 +++++------ tags/join/index.html | 11 +++++------ tags/json/index.html | 11 +++++------ tags/july/index.html | 11 +++++------ tags/june/index.html | 11 +++++------ tags/kafka/index.html | 11 +++++------ tags/key/index.html | 11 +++++------ tags/keytab/index.html | 11 +++++------ tags/left-join/index.html | 11 +++++------ tags/library/index.html | 11 +++++------ tags/license/index.html | 11 +++++------ tags/limit/index.html | 11 +++++------ tags/lineage/index.html | 11 +++++------ tags/livy/index.html | 11 +++++------ tags/llm/index.html | 11 +++++------ tags/location/index.html | 11 +++++------ tags/longformat/index.html | 11 +++++------ tags/lookup/index.html | 11 +++++------ tags/loop/index.html | 11 +++++------ tags/machine-learning/index.html | 11 +++++------ tags/march/index.html | 11 +++++------ tags/matrix/index.html | 11 +++++------ tags/maven/index.html | 11 +++++------ tags/may/index.html | 11 +++++------ tags/merge/index.html | 11 +++++------ tags/metadata/index.html | 11 +++++------ tags/metrics/index.html | 11 +++++------ tags/mock/index.html | 11 +++++------ tags/model/index.html | 11 +++++------ tags/models/index.html | 11 +++++------ tags/mongodb/index.html | 11 +++++------ tags/monitoring/index.html | 11 +++++------ tags/mwaa/index.html | 11 +++++------ tags/nfs/index.html | 11 +++++------ tags/november/index.html | 11 +++++------ tags/object-store/index.html | 11 +++++------ tags/october/index.html | 11 +++++------ tags/okta/index.html | 11 +++++------ tags/open-source-spark/index.html | 11 +++++------ tags/openai/index.html | 11 +++++------ tags/oracle/index.html | 11 +++++------ tags/orc/index.html | 11 +++++------ tags/orchestration/index.html | 11 +++++------ tags/order-by/index.html | 11 +++++------ tags/outer/index.html | 11 +++++------ tags/package-hub/index.html | 11 +++++------ tags/package/index.html | 11 +++++------ tags/parquet/index.html | 11 +++++------ tags/partition/index.html | 11 +++++------ tags/passwords/index.html | 11 +++++------ tags/pinecone/index.html | 11 +++++------ tags/pipelines/index.html | 11 +++++------ tags/plib/index.html | 11 +++++------ tags/plibs/index.html | 11 +++++------ tags/pr/index.html | 11 +++++------ tags/project/index.html | 11 +++++------ tags/prophecy-managed/index.html | 11 +++++------ tags/provider/index.html | 11 +++++------ tags/pull-requests/index.html | 11 +++++------ tags/pullrequest/index.html | 11 +++++------ tags/python/index.html | 11 +++++------ tags/qa/index.html | 11 +++++------ tags/query/index.html | 11 +++++------ tags/question/index.html | 11 +++++------ tags/random/index.html | 11 +++++------ tags/recommendations/index.html | 11 +++++------ tags/redshift/index.html | 11 +++++------ tags/reference/index.html | 11 +++++------ tags/reformat/index.html | 11 +++++------ tags/release-notes/index.html | 11 +++++------ tags/release/index.html | 11 +++++------ tags/rename/index.html | 11 +++++------ tags/repartition/index.html | 11 +++++------ tags/reserve-pods/index.html | 11 +++++------ tags/resolve/index.html | 11 +++++------ tags/rest/index.html | 11 +++++------ tags/restore/index.html | 11 +++++------ tags/reusable/index.html | 11 +++++------ tags/right-join/index.html | 11 +++++------ tags/row-distributor/index.html | 11 +++++------ tags/run/index.html | 11 +++++------ tags/runs/index.html | 11 +++++------ tags/runtime-config/index.html | 11 +++++------ tags/s-3/index.html | 11 +++++------ tags/salesforce/index.html | 11 +++++------ tags/saml/index.html | 11 +++++------ tags/sandbox/index.html | 11 +++++------ tags/scala/index.html | 11 +++++------ tags/scd-2/index.html | 11 +++++------ tags/schedule/index.html | 11 +++++------ tags/scheduling/index.html | 11 +++++------ tags/schema/index.html | 11 +++++------ tags/scim/index.html | 11 +++++------ tags/search/index.html | 11 +++++------ tags/secret-provider/index.html | 11 +++++------ tags/secrets/index.html | 11 +++++------ tags/security/index.html | 11 +++++------ tags/seeds/index.html | 11 +++++------ tags/select/index.html | 11 +++++------ tags/self-hosted/index.html | 11 +++++------ tags/self-managed/index.html | 11 +++++------ tags/september/index.html | 11 +++++------ tags/serverless/index.html | 11 +++++------ tags/set/index.html | 11 +++++------ tags/settings/index.html | 11 +++++------ tags/setup/index.html | 11 +++++------ tags/shared/index.html | 11 +++++------ tags/smtp/index.html | 11 +++++------ tags/snowflake/index.html | 11 +++++------ tags/sort/index.html | 11 +++++------ tags/source/index.html | 11 +++++------ tags/sources/index.html | 11 +++++------ tags/spark-submit/index.html | 11 +++++------ tags/spark/index.html | 11 +++++------ tags/split/index.html | 11 +++++------ tags/splunk/index.html | 11 +++++------ tags/sql/index.html | 11 +++++------ tags/streaming/index.html | 11 +++++------ tags/subgraph/index.html | 11 +++++------ tags/sum/index.html | 11 +++++------ tags/support-logs/index.html | 11 +++++------ tags/synase/index.html | 11 +++++------ tags/synthetic/index.html | 11 +++++------ tags/table/index.html | 11 +++++------ tags/tables/index.html | 11 +++++------ tags/tags/index.html | 11 +++++------ tags/target/index.html | 11 +++++------ tags/teams/index.html | 11 +++++------ tags/teradata/index.html | 11 +++++------ tags/test/index.html | 11 +++++------ tags/testing/index.html | 11 +++++------ tags/text-processing/index.html | 11 +++++------ tags/text/index.html | 11 +++++------ tags/transform/index.html | 11 +++++------ tags/transformation/index.html | 11 +++++------ tags/transformations/index.html | 11 +++++------ tags/trigger/index.html | 11 +++++------ tags/tutorial/index.html | 11 +++++------ tags/type/index.html | 11 +++++------ tags/udafs/index.html | 11 +++++------ tags/udfs/index.html | 11 +++++------ tags/union/index.html | 11 +++++------ tags/unique/index.html | 11 +++++------ tags/unit-tests/index.html | 11 +++++------ tags/unit/index.html | 11 +++++------ tags/unity-catalog/index.html | 11 +++++------ tags/unpivot/index.html | 11 +++++------ tags/upgrade/index.html | 11 +++++------ tags/upload/index.html | 11 +++++------ tags/user/index.html | 11 +++++------ tags/username/index.html | 11 +++++------ tags/users/index.html | 11 +++++------ tags/variable/index.html | 11 +++++------ tags/variant/index.html | 11 +++++------ tags/vault/index.html | 11 +++++------ tags/vector/index.html | 11 +++++------ tags/version/index.html | 11 +++++------ tags/view/index.html | 11 +++++------ tags/visual/index.html | 11 +++++------ tags/warehouse-based/index.html | 11 +++++------ tags/warehouse/index.html | 11 +++++------ tags/web-scraping/index.html | 11 +++++------ tags/webinar/index.html | 11 +++++------ tags/where/index.html | 11 +++++------ tags/wideformat/index.html | 11 +++++------ tags/window/index.html | 11 +++++------ tags/with-column/index.html | 11 +++++------ tags/write-options/index.html | 11 +++++------ tags/xlsx/index.html | 11 +++++------ tutorials/Orchestration/index.html | 11 +++++------ .../Orchestration/multi-jobs-trigger/index.html | 11 +++++------ tutorials/Orchestration/reliable-ci-cd/index.html | 11 +++++------ tutorials/Spark/index.html | 11 +++++------ tutorials/Spark/working-with-excel/index.html | 11 +++++------ tutorials/index.html | 11 +++++------ tutorials/videos/design-Pipeline/index.html | 11 +++++------ tutorials/videos/index.html | 11 +++++------ tutorials/videos/schedule-Pipeline/index.html | 11 +++++------ tutorials/videos/test-Pipeline/index.html | 11 +++++------ 656 files changed, 3038 insertions(+), 3630 deletions(-) delete mode 100644 assets/css/styles.072e67d9.css create mode 100644 assets/css/styles.281c4cdc.css rename assets/js/{0207d280.8628b197.js => 0207d280.5cc53b80.js} (51%) delete mode 100644 assets/js/07e49c2d.2fda557a.js create mode 100644 assets/js/07e49c2d.642c6d62.js rename assets/js/{13b3561e.091b2efb.js => 13b3561e.79dc4954.js} (66%) create mode 100644 assets/js/1d7b424d.a5664aae.js delete mode 100644 assets/js/1d7b424d.b6e07850.js delete mode 100644 assets/js/23d439be.5f3a2fdb.js create mode 100644 assets/js/23d439be.715e3e9d.js delete mode 100644 assets/js/326b65c5.b5317393.js create mode 100644 assets/js/326b65c5.ed0f6532.js rename assets/js/{332c99fa.cbba2096.js => 332c99fa.be6a2311.js} (81%) rename assets/js/{47cf1bcd.3f1ec781.js => 47cf1bcd.71e9a197.js} (74%) delete mode 100644 assets/js/486ce9f7.1b8dcbcc.js create mode 100644 assets/js/486ce9f7.c508bfda.js rename assets/js/{4ce6e96f.18e4256a.js => 4ce6e96f.e771c6f2.js} (79%) delete mode 100644 assets/js/50ddc816.41a1dded.js create mode 100644 assets/js/50ddc816.96a6828a.js rename assets/js/{5714fd1a.3da2ac7e.js => 5714fd1a.ca967822.js} (60%) rename assets/js/{5b2eddc9.673748c2.js => 5b2eddc9.43f64355.js} (77%) delete mode 100644 assets/js/5beb85dd.5ace51e4.js create mode 100644 assets/js/5beb85dd.bb97cd1a.js create mode 100644 assets/js/61a8b9e9.799b17eb.js delete mode 100644 assets/js/61a8b9e9.df6b2808.js rename assets/js/{6e9ec4f2.50888d14.js => 6e9ec4f2.466f344b.js} (64%) create mode 100644 assets/js/77c4a354.5bb6b04f.js delete mode 100644 assets/js/77c4a354.7dbd59b6.js delete mode 100644 assets/js/809b845a.5682d8ab.js create mode 100644 assets/js/809b845a.697bc25e.js create mode 100644 assets/js/8ddf4ff6.80419e01.js delete mode 100644 assets/js/8ddf4ff6.a1ff605e.js delete mode 100644 assets/js/92b0f648.268d49ab.js create mode 100644 assets/js/92b0f648.8a4e717e.js create mode 100644 assets/js/9a47c610.b199ea30.js delete mode 100644 assets/js/9a47c610.d2681540.js delete mode 100644 assets/js/a135f75b.2c24fc77.js create mode 100644 assets/js/a135f75b.54f84a18.js rename assets/js/{a5d53aff.43621ef8.js => a5d53aff.a04c4099.js} (68%) rename assets/js/{c327a517.6c3b4d95.js => c327a517.4088179e.js} (60%) delete mode 100644 assets/js/d1e881cf.7130d71d.js create mode 100644 assets/js/d1e881cf.a8034068.js rename assets/js/{d7087486.38b0dc27.js => d7087486.e025e5a9.js} (51%) rename assets/js/{db3522d0.6c7d2099.js => db3522d0.4b7a81c5.js} (62%) rename assets/js/{dca7a11b.199de914.js => dca7a11b.687f5c42.js} (67%) create mode 100644 assets/js/dda96e46.272e2c52.js delete mode 100644 assets/js/dda96e46.c89988f8.js delete mode 100644 assets/js/e5297273.32262110.js create mode 100644 assets/js/e5297273.920b27aa.js create mode 100644 assets/js/ec996830.339685b4.js delete mode 100644 assets/js/ec996830.a0758903.js create mode 100644 assets/js/f11ee91b.07a32cba.js delete mode 100644 assets/js/f11ee91b.be73946a.js delete mode 100644 assets/js/f29eab11.27d94399.js create mode 100644 assets/js/f29eab11.5672147f.js create mode 100644 assets/js/f779aca8.5d6c571b.js delete mode 100644 assets/js/f779aca8.8fd6a6d1.js create mode 100644 assets/js/fe598bea.3b03a0a7.js delete mode 100644 assets/js/fe598bea.8d795564.js rename assets/js/{fe6a71b8.dd8a21f3.js => fe6a71b8.d1c4ae35.js} (50%) rename assets/js/{fec9a08a.1405b60a.js => fec9a08a.f73987e7.js} (61%) rename assets/js/{main.d1d4b5f9.js => main.5871d47d.js} (88%) rename assets/js/{main.d1d4b5f9.js.LICENSE.txt => main.5871d47d.js.LICENSE.txt} (100%) rename assets/js/{runtime~main.40bd2e59.js => runtime~main.3a9e292f.js} (92%) diff --git a/404.html b/404.html index e824a994e1..9dc4e84376 100644 --- a/404.html +++ b/404.html @@ -6,15 +6,14 @@ Page Not Found | Prophecy - - - - + + +
Skip to main content

Page Not Found

We could not find what you were looking for.

Please contact the owner of the site that linked you to the original URL and let them know their link is broken.

- - + + \ No newline at end of file diff --git a/Orchestration/airflow/index.html b/Orchestration/airflow/index.html index a11d7ee203..a069366184 100644 --- a/Orchestration/airflow/index.html +++ b/Orchestration/airflow/index.html @@ -6,17 +6,16 @@ Airflow | Prophecy - - - - + + +
Skip to main content

Airflow

Apache Airflow is an open-source workflow automation and orchestration platform that helps organizations schedule, monitor, and manage complex data workflows. It is a popular choice for organizations of all sizes to handle data pipelines, ETL processes, and task automation.

Key Features of Apache Airflow

DAGs (Directed Acyclic Graphs)

Airflow allows you to define workflows using Directed Acyclic Graphs, where each node represents a task, and the edges define the sequence and dependencies between tasks. In Prophecy, you define these DAGs as Airflow Jobs. Each Airflow Job represent a DAG in Airflow.

Dynamic Workflow Scheduling

Airflow provides a flexible scheduling system that allows you to set up complex dependencies, retries, and time-based triggers for your tasks. All these can be set in the settings page for a Job.

Extensibility

Airflow is highly extensible, allowing you to define custom operators and sensors to suit your specific needs. It supports a wide range of plugins and integrations. Prophecy maintains this extensibility via Package Hub. Users can create Gems to represent any custom Operator or Sensor.

Monitoring and Logging

It offers built-in tools for monitoring task progress, logging, and alerting. You can easily track the status of your workflows through a web-based UI. Users would be able to monitor and debug these Jobs directly via Prophecy UI as well.

Parallel Execution and Scalability

Airflow can execute multiple tasks in parallel. It can also scale horizontally to accommodate high workloads by adding more worker nodes.

Low Code Airflow in Prophecy

Prophecy simplifies orchestration with a visual layer on top of Airflow, offering ease and efficiency. What the user builds visually in Prophecy, turns into high-quality and open-source Airflow code on Git. You have the flexibility to integrate and utilize Prophecy with your managed Airflow. If you are new to Airflow, We also offer a Prophecy-managed Airflow, which expedites your setup without having to manage your own Airflow instance.

Key features and components of Airflow in Prophecy

  • Setup: Seamlessly connect to your Airflow Instance, or use Prophecy hosted Airflow for getting started.

  • Development: A visual drag-and-drop interface that allows users to design Jobs by connecting Gems and defining dependencies effortlessly.

  • Scheduling and Triggers: Easily set up schedules for your Jobs and define triggers to automate the execution.

  • Monitoring and Reporting: Monitor the progress of your Jobs, receive notifications on status, and access logs and reporting features for real-time insights into Jobs execution.

  • Extensibility: For advanced users, Prophecy's Airflow interface offers scripting capabilities and Gem builder to accommodate custom requirements.

This combination of visual development and the power of Airflow enables organizations to streamline data pipelines, reduce development time, and improve operational efficiency.

Benefits of Using Prophecy's Airflow Interface

Visual development for Airflow in Prophecy brings numerous advantages to data engineers, analysts, and organizations:

  • Simplified Workflow Development: With a user-friendly visual interface, even users with limited programming experience can create and manage complex data workflows, reducing the learning curve.

  • Accelerated Development: Rapidly build, test, and deploy Jobs, reducing the time and effort needed to set up and execute data processes.

  • Reduced Maintenance: Visually designed Jobs are easier to maintain and update, as they require less coding, resulting in fewer errors and quicker adjustments.

  • Centralized Management: Streamline the management of workflows by having them all in one platform, providing a unified view of your data Pipeline.

  • Automation and Scheduling: Automate data processes and set schedules for tasks, ensuring that critical data operations occur at the right time.

  • Error Handling and Monitoring: Easily track task execution, set up error handling, and receive alerts when issues arise, improving data Pipeline reliability.

  • Cost Savings: Visually designed Airflow can lead to cost savings through increased operational efficiency and decreased development time.

  • Scalability: As your data processing needs grow, Prophecy's Airflow interface can scale with your organization, handling more extensive and complex Jobs.

What's next

Let's delve deeper into the specifics of utilizing Airflow in Prophecy.

- - + + \ No newline at end of file diff --git a/Orchestration/airflow/setup/MWAA_fabric/index.html b/Orchestration/airflow/setup/MWAA_fabric/index.html index 49147a7549..8b39d6725b 100644 --- a/Orchestration/airflow/setup/MWAA_fabric/index.html +++ b/Orchestration/airflow/setup/MWAA_fabric/index.html @@ -6,10 +6,9 @@ MWAA | Prophecy - - - - + + +
@@ -30,7 +29,7 @@ Prophecy will use these connections to fetch the connection-id to generate the correct Airflow Code when you use these in your Airflow Gems.

For adding a connection, Click on (1) Add Connection button. This Opens up the Connection form as shown.

Setting up Snowflake connection

snowflake To be able to schedule your Snowflake SQL Models via Airflow, you need to have a Snowflake Connection over DBT from Airflow to your Snowflake Airflow. You need to create the connection in Airflow and provide the mapping to Prophecy in this form.

Select (2) Connection Type as Snowflake(DBT), and select the (3) Fabric you have in Prophecy for your desired Snowflake environment. Provide the (4) Profile Directory and (5) Profile Name used while setting up the connection in Airflow.

Make sure you select the Fabric for the same Snowflake environment you have already created the connection for in your Airflow. Once done, hit (4) Save.

Create an Airflow Job

Once the MWAA Airflow Fabric is setup with the relevant connections, Airflow Job scheduling is done with an easy-to-use interface. Follow this guide to Create an Airflow Job.

- - + + \ No newline at end of file diff --git a/Orchestration/airflow/setup/composer_fabric/index.html b/Orchestration/airflow/setup/composer_fabric/index.html index 83e30610da..85a6b111a7 100644 --- a/Orchestration/airflow/setup/composer_fabric/index.html +++ b/Orchestration/airflow/setup/composer_fabric/index.html @@ -6,10 +6,9 @@ Composer | Prophecy - - - - + + +
@@ -23,7 +22,7 @@ Prophecy will use these connections to fetch the connection-id to generate the correct Airflow Code when you use these in your Airflow Gems.

For adding a connection, Click on (1) Add Connection button. This Opens up the Connection form as shown.

Setting up Databricks Spark connection

To be able to schedule your Databricks Spark pipelines via Airflow, you need to have a Databricks Spark Connections from Airflow to your Databricks Workspace. You need to create the connection in Airflow and provide the mapping to Prophecy in this form.

Select (2) Connection Type as Databricks(Spark), and select the (3) Fabric you have in Prophecy for the desired Databricks Workspace. Select the (4) Connection id you have created for this Databricks workspace in your Airflow. Make sure you select the Fabric for the same Databricks workspace you have already created the connection for in your Airflow. Once done, hit (4) Save.

Composer_connection

Setting up Snowflake SQL Connection

Similarly, setup a connection to a Snowflake Fabric following these steps.

Create an Airflow Job

Once the Airflow Composer Fabric is setup with the relevant connections, Airflow Job scheduling is done with an easy-to-use interface. Follow this guide to Create an Airflow Job.

- - + + \ No newline at end of file diff --git a/Orchestration/airflow/setup/index.html b/Orchestration/airflow/setup/index.html index 038145f3b3..9b320cbdc6 100644 --- a/Orchestration/airflow/setup/index.html +++ b/Orchestration/airflow/setup/index.html @@ -6,16 +6,15 @@ Setup | Prophecy - - - - + + +
Skip to main content

Setup

To connect to a running Airflow Instance, you would need to create a Fabric of type Airflow. Prophecy provides you with three different types of Fabrics depending upon where your Airflow Instance is running.

  1. Prophecy Managed Airflow - for those who are new to Airflow and do not have an Airflow instance, we provide a Prophecy Managed Airflow to expedite your trial and POC.

  2. MWAA - for those who are using Amazon Web Services and have an Amazon Managed Workflows for Apache Airflow instance running.

  3. Composer - for those who are using Google Cloud Platform and have a GCP Cloud Composer Airflow instance running.

Create an Airflow Job

Once the Airflow Fabric is setup, Airflow Job scheduling is done with an easy-to-use interface. Follow this guide to Create an Airflow Job.

What's next

To continue with Airflow setup, see the following pages:

- - + + \ No newline at end of file diff --git a/Orchestration/airflow/setup/prophecy-managed/connections/index.html b/Orchestration/airflow/setup/prophecy-managed/connections/index.html index cba0f86fb1..9e73d61386 100644 --- a/Orchestration/airflow/setup/prophecy-managed/connections/index.html +++ b/Orchestration/airflow/setup/prophecy-managed/connections/index.html @@ -6,17 +6,16 @@ Connections | Prophecy - - - - + + +
Skip to main content

Connections

You need Airflow to talk to various other systems in your Data Platform to be able to do certain tasks like send Email, trigger Spark pipelines and SQL models. For these we create connections in Airflow.

For Prophecy Managed Airflow, you can provide all the details required to connect to your external systems in Connections page, and Prophecy will set up the Airflow connection for you. The credentials for your connections are stored securely in a Google Cloud Secret Manager.

- - + + \ No newline at end of file diff --git a/Orchestration/airflow/setup/prophecy-managed/connections/prophecy_managed_airflow_fabric_aws_connections/index.html b/Orchestration/airflow/setup/prophecy-managed/connections/prophecy_managed_airflow_fabric_aws_connections/index.html index 4d527482a1..620bf6e314 100644 --- a/Orchestration/airflow/setup/prophecy-managed/connections/prophecy_managed_airflow_fabric_aws_connections/index.html +++ b/Orchestration/airflow/setup/prophecy-managed/connections/prophecy_managed_airflow_fabric_aws_connections/index.html @@ -6,15 +6,14 @@ AWS Connection | Prophecy - - - - + + +
Skip to main content

AWS Connection

To be able to trigger your Airflow Job using an S3 File Sensor, you need to have connection from Prophecy Managed Airflow to you S3 account. For this, we need to add an AWS Connection. You would need this AWS connection for connecting to any AWS services.

Add an AWS connection

To create an AWS connection for Prophecy Managed Airflow, you need to provide the AWS access key id and associated Secret Key. Check here to know how you can get it for your AWS account.

To add an AWS connection, click on + Add Connection. This Opens up the Connection form as shown.

Add_connection

Select AWS in (1) Connection Type. Provide a (2) Connection Name to identify your connection, add a (3) Description of your choice, and provide the (4) AWS Access Key ID and (5) AWS Secret Access Key. Once done, hit (6) Save.

AWS_connection

- - + + \ No newline at end of file diff --git a/Orchestration/airflow/setup/prophecy-managed/connections/prophecy_managed_airflow_fabric_dbx_spark_connections/index.html b/Orchestration/airflow/setup/prophecy-managed/connections/prophecy_managed_airflow_fabric_dbx_spark_connections/index.html index 535569b3b8..f8185f1f83 100644 --- a/Orchestration/airflow/setup/prophecy-managed/connections/prophecy_managed_airflow_fabric_dbx_spark_connections/index.html +++ b/Orchestration/airflow/setup/prophecy-managed/connections/prophecy_managed_airflow_fabric_dbx_spark_connections/index.html @@ -6,16 +6,15 @@ Databricks Spark Connection | Prophecy - - - - + + +
Skip to main content

Databricks Spark Connection

To be able to schedule your Databricks Spark pipelines via Airflow, you need to have a Databricks Spark Connections from Prophecy Managed Airflow to your Databricks Workspace.

Add a Databricks Spark connection

To add a Databricks Spark connection, click on + Add Connection.

Add_connection

Select Databricks Spark in (1) Connection Type. Since you have already provided the details for your Databricks Workspace when creating a Databricks Fabric, you need not provide the details here again. Under the (2) Fabric, select the Fabric you created for Databricks Spark and Prophecy would set up the connection. You can provide a description in the (3) Description. Once done, click (4) Save.

DB_Spark_connection

- - + + \ No newline at end of file diff --git a/Orchestration/airflow/setup/prophecy-managed/connections/prophecy_managed_airflow_fabric_dbx_sql_connections/index.html b/Orchestration/airflow/setup/prophecy-managed/connections/prophecy_managed_airflow_fabric_dbx_sql_connections/index.html index 4d0ff88589..ef89a30607 100644 --- a/Orchestration/airflow/setup/prophecy-managed/connections/prophecy_managed_airflow_fabric_dbx_sql_connections/index.html +++ b/Orchestration/airflow/setup/prophecy-managed/connections/prophecy_managed_airflow_fabric_dbx_sql_connections/index.html @@ -6,15 +6,14 @@ Databricks SQL Connection | Prophecy - - - - + + +
Skip to main content

Databricks SQL Connection

To be able to Run your Databricks SQL Models, you need to have connection from Prophecy Managed Airflow to your Databricks SQL Environment.

Add a Databricks SQL connection (DBT)

To add a Databricks SQL connection, click on + Add Connection.

Add_connection

Select Databricks SQL in (1) Connection Type. Now under the (2) Fabric, you would select the already created Fabric for Databricks SQL and Prophecy would setup the connection. You can provide a description in the (3) Description. Once done, click (4) Save.

DB_SQL_connection

- - + + \ No newline at end of file diff --git a/Orchestration/airflow/setup/prophecy-managed/connections/prophecy_managed_airflow_fabric_email_connections/index.html b/Orchestration/airflow/setup/prophecy-managed/connections/prophecy_managed_airflow_fabric_email_connections/index.html index 0bfcb116ea..215f39fdc1 100644 --- a/Orchestration/airflow/setup/prophecy-managed/connections/prophecy_managed_airflow_fabric_email_connections/index.html +++ b/Orchestration/airflow/setup/prophecy-managed/connections/prophecy_managed_airflow_fabric_email_connections/index.html @@ -6,15 +6,14 @@ Email Connection | Prophecy - - - - + + +
Skip to main content

Email Connection

To be able to send email via Airflow using an Email Gem, you need to have Email connection in Prophecy Managed Airflow.

info

If your email account has two-factor authentication (2FA) turned on, you may run into an authentication error even if you provide the correct login credentials. To workaround this, you can generate an App password or provide additional parameters to add your Email connection.

Add an Email connection

To add an Email connection, click on + Add Connection and select Email in (1) Connection Type.

Add_connection

Provide a (2) Connection Name to identify your connection, add a (3) Description of your choice, and provide the (4) Host as your SMTP host example smtp.gmail.com. Provide the login credentials for this server in (5) Login and (6) Password and provide your SMTP port in (7) Port. Once done, hit (8) Save.

Email_connection

- - + + \ No newline at end of file diff --git a/Orchestration/airflow/setup/prophecy-managed/connections/prophecy_managed_airflow_fabric_snowflake_connections/index.html b/Orchestration/airflow/setup/prophecy-managed/connections/prophecy_managed_airflow_fabric_snowflake_connections/index.html index 3a5e035ef0..39525e80f5 100644 --- a/Orchestration/airflow/setup/prophecy-managed/connections/prophecy_managed_airflow_fabric_snowflake_connections/index.html +++ b/Orchestration/airflow/setup/prophecy-managed/connections/prophecy_managed_airflow_fabric_snowflake_connections/index.html @@ -6,15 +6,14 @@ Snowflake Connection | Prophecy - - - - + + +
Skip to main content

Snowflake Connection

To be able to Run your Snowflake SQL Models, you need to have connection from Prophecy Managed Airflow to your Snowflake Environment

Add Snowflake connection (DBT)

To add a Snowflake connection, click on + Add Connection.

Add_connection

Select Snowflake in (1) Connection Type. Now under the (2) Fabric, you would select the already created Fabric for Snowflake and Prophecy would set up the connection. You can provide a description in the (3) Description. Once done, click (4) Save.

DB_SQL_connection

- - + + \ No newline at end of file diff --git a/Orchestration/airflow/setup/prophecy-managed/index.html b/Orchestration/airflow/setup/prophecy-managed/index.html index bbfc172807..6a3419cdea 100644 --- a/Orchestration/airflow/setup/prophecy-managed/index.html +++ b/Orchestration/airflow/setup/prophecy-managed/index.html @@ -6,10 +6,9 @@ Prophecy Managed | Prophecy - - - - + + +
@@ -18,7 +17,7 @@ Let's see how to set up this Fabric.

How to create Prophecy Managed Airflow Fabric

Setting up a Fabric is very straightforward. Click the (1) Create Entity button, and choose (2) Create Fabric option. The Fabric creation is composed of two steps: Basic Info and Providers setup. On the Basic Info screen, enter a (1) Fabric Name, (2) Fabric Description, and choose the (3) Team that’s going to own the Fabric.

Once ready, click (4) Continue.

CreateFabric

Since we’re setting up a Fabric connected to Airflow, choose Airflow as the (1) Provider Type and Prophecy Managed as the (2) Provider. For connecting to Prophecy Managed Airflow, you don't need to provide any other details, so go ahead and click on (3) Continue.

CreatePMFabric

This completes the Fabric creation for you. Now you can start setting up optional connections for Prophecy Managed Airflow to your AWS/Databricks etc. Also keep in mind there are some Limitations that larger teams may experience when using Prophecy Managed Airflow. Please reach out to support@Prophecy.io if these limitations block your use cases.

Create an Airflow Job

Once the Prophecy Managed Airflow Fabric is setup with the relevant connections, Airflow Job scheduling is done with an easy-to-use interface. Follow this guide to Create an Airflow Job.

- - + + \ No newline at end of file diff --git a/Orchestration/airflow/setup/prophecy-managed/prophecy_managed_airflow_fabric_limits/index.html b/Orchestration/airflow/setup/prophecy-managed/prophecy_managed_airflow_fabric_limits/index.html index c16f38359d..63ba950d95 100644 --- a/Orchestration/airflow/setup/prophecy-managed/prophecy_managed_airflow_fabric_limits/index.html +++ b/Orchestration/airflow/setup/prophecy-managed/prophecy_managed_airflow_fabric_limits/index.html @@ -6,10 +6,9 @@ Limits and Restrictions | Prophecy - - - - + + +
@@ -17,7 +16,7 @@ This should now allow Prophecy Managed Airflow to connect, and run any tasks when required.

Limits

We run our Prophecy Managed Airflow On Composer instance of Size Large. This has a limit of maximum number of DAGs as 1000. We also have restricted total number of concurrent tasks running in parallel to 10.

info

Please reach out to Prophecy support team, in case you run into any of above limits.

Restrictions

For security purposes, we have blocked certain type of tasks and operations on Prophecy Managed Airflow. For example, running Python or Script tasks are restricted. This is to prevent any misuse by malicious code in these tasks.

info

Please reach out to Prophecy support team, if any of these restrictions or limits is blocking your use-cases.

- - + + \ No newline at end of file diff --git a/Orchestration/alternative-schedulers/index.html b/Orchestration/alternative-schedulers/index.html index bf952e15a5..706e0679f9 100644 --- a/Orchestration/alternative-schedulers/index.html +++ b/Orchestration/alternative-schedulers/index.html @@ -6,10 +6,9 @@ Alternative Schedulers | Prophecy - - - - + + +
@@ -25,7 +24,7 @@ specify the name and value of each variable that you want to override.

-0 examples

-C override individual parameters

This may be used in conjunction with -i and it will only override parameters which are given. This option may be used more than once.

-C examples

-f set configuration using a file

This option will set all parameters for a Pipeline by using a json file which can be reached locally by the spark-submit command.

caution

All Configuration Schema fields must be provided in this file.

-f examples

Example json file:

{
"str_var": "vendor1",
"bool_var": true,
"float_var": 0.5
}
- - + + \ No newline at end of file diff --git a/Orchestration/databricks-jobs/index.html b/Orchestration/databricks-jobs/index.html index bd55120abb..202bc534be 100644 --- a/Orchestration/databricks-jobs/index.html +++ b/Orchestration/databricks-jobs/index.html @@ -6,10 +6,9 @@ Databricks Jobs | Prophecy - - - - + + +
@@ -27,7 +26,7 @@ This is done to ensure that the folder structure for one Pipeline does not overwrite another. Please refer to the steps below in continuation to our earlier Example on how to configure package name in Pipeline.


Here's how the Databricks UI looks for Prophecy's Single Cluster Mode.

Single Job Cluster

Job Monitoring

Prophecy provides monitoring page which shows the status (enable/disable) of all the Jobs deployed via Prophecy and status of historic/current runs (success/failure/in-progress) for quick reference.

Guides

  1. How to trigger a job from another job?
  2. How to design a reliable CI/CD process?
- - + + \ No newline at end of file diff --git a/Orchestration/index.html b/Orchestration/index.html index 6a394f7009..729567973a 100644 --- a/Orchestration/index.html +++ b/Orchestration/index.html @@ -6,10 +6,9 @@ Orchestration | Prophecy - - - - + + +
@@ -19,7 +18,7 @@ orchestrate multiple data-Pipelines to run together. Databricks Jobs is a recommended scheduler, if you're Databricks Native.

  • Airflow - for more complex use-cases, where you have to use various operators, or need any additional data pre-and-post-processing, you can interface from Prophecy with your production-ready Airflow deployment. To get started with your first Airflow jobs, try Prophecy Managed Airflow using this guide.

  • Custom - Alternatively, since Prophecy provides you native Spark code on Git, you can easily integrate with any other scheduler or custom solution.

  • What's next

    To continue using orchestration solutions, see the following pages:

    - - + + \ No newline at end of file diff --git a/SQL/data-tests/index.html b/SQL/data-tests/index.html index a3db647ebf..c1a0814bc7 100644 --- a/SQL/data-tests/index.html +++ b/SQL/data-tests/index.html @@ -6,15 +6,14 @@ Data tests | Prophecy - - - - + + +
    Skip to main content

    Data tests

    You can use data tests to ensure that your business data is generated reliably over time. As a data engineer, data analyst, or business user, you can run data tests so that you don’t have to manually check every Dataset every time you run a Job or model. The data test checks the validity of the SQL in your project.

    A data test is an assertion you make about a Dataset in your project. The Dataset can be the output from a series of transformations, or the Dataset can be a particular data source, seed, or model.

    For example, the following test named ref_int_orders_customers checks the validity of the SQL in the HelloWorld_SQL Project. In particular, the referential integrity check for orders and customers asserts that every customer_id entry in the orders table is present in the customers table.

    Project test canvas

    This test starts with several models from the HelloWorld_SQL Project, combines their data with a series of transformation steps, and feeds the resulting table into the Data Test Gem.

    If there are customer_id entries in the orders table that are not present in the customers table, then the ref_int_orders_customers test fails.

    You can test any series of transformations with a Data Test Gem. The following sections include more details about data tests.

    What you'll need to know

    Data tests use dbt for the underlying test execution, but you don’t need to know dbt or how to write your own tests. Prophecy simplifies the test definitions that are normally defined in .sql and .yaml files.

    You can create data tests in Prophecy using the visual canvas.

    Supported database objects

    Supported database objects include:

    • Models
    • Snapshots
    • Seeds
    • Sources
    note

    Data tests can accept input data from any table, no matter if the table is defined by a model, snapshot, seed, or source.

    Supported test types

    Supported test types include:

    • Project tests: Singular use tests that depend on the Model that they were created for
    • Generic tests: Generic use test that are not tied to a specific Model, and can be reused repeatedly
      • Model tests
      • Column tests

    When to use each test type

    See a few recommendations in the following table to get an idea of when to use each test type.

    General situations                                                                                                    Project testModel testColumn test
    Test a single ModelTickCrossCross
    Test multiple ModelsCrossTickTick
    Specific situationsProject testModel testColumn test
    Test for referential integrityTickCrossCross
    Test for late arriving dataTickCrossCross
    Test for data consistency verificationTickCrossCross
    Test for model size and aggregationsCrossTickCross
    Test for column data format and data presence (nulls, empty strings, etc.)CrossCrossTick

    What's next

    To set up a project test, see Use project tests.

    If you need to reuse a test that is defined by a parametrized query, see Use model tests.

    - - + + \ No newline at end of file diff --git a/SQL/data-tests/use-model-tests/index.html b/SQL/data-tests/use-model-tests/index.html index 24c880b820..c5d6366e5a 100644 --- a/SQL/data-tests/use-model-tests/index.html +++ b/SQL/data-tests/use-model-tests/index.html @@ -6,16 +6,15 @@ Use model and column tests | Prophecy - - - - + + +
    Skip to main content

    Use model and column tests

    Model and column data tests are dbt macro generated tests that can be parametrized and applied to a given model or any number of columns. These tests are called generic data tests, and are based on the following dbt generic test types.

    • Model-level tests: Can span across many columns for a given model, or even multiple models, and are defined at a model level
    • Column-level tests: Are defined on a column level of each model
    note

    There could be many tests within a project. Each test is checking a table created by your choice of input models, data sources, and transformation Gems.

    For each model, you can create a macro-based test definition to use as a model test.

    For each column within a model, you can define out-of-the-box supported dbt Simple data tests.

    • Unique: Asserts that each value within a column is unique
    • Not null: Asserts that each value within a column is not null
    • Accepted values: Asserts that column contains values present within a pre-defined set of values
    • Relationships: Asserts that column contains values present within another column

    You can also use generic tests defined within the project or those defined within dependency packages.

    note

    Prophecy doesn't differentiate between model and column tests, and refers to these generic tests as simply data tests.

    Set up a test

    Depending on the type of test, you can set up a new model or column test from either the Tests section of the project tab or the Data Tests tab of the Target Model.

    Develop a test

    You can create a new data test definition to use in your model or column test. You can also skip creating a data test definition, and use one of the Simple data tests previously mentioned.

    To develop a model or column test, start by opening a project:

    1. Under the Project Browser, click Add Test definition. You can also click + New Data Test Type from the Target Model Data Tests tab.

      Add a new model test definition

    2. Enter your Test Definition Name, and then click Create. The test definition page opens. Data tests are saved to a tests > generic SQL file in the Git directory by default.

      Create a new model test definition

    3. On the test definition page, enter the description, parameters, and definition. In the previous image example, we created a test definition that checks that the column value doesn't exceed a certain threshold value.

    4. To create a new data test, whether it's one that uses a test definition or one that uses an out-of-the-box Simple data test, navigate to the Data Tests tab in the Target Model, and then click + New Test.

      Create a new model test

    5. Choose your Data Test Type from the drop down menu, and then click Create Test. You can find the Simple data tests, the test definition you created earlier under the current project name, and any data tests from dependencies connected to your project listed here.

      Create a new model test type

    6. After selecting your Data Test Type, fill out the column fields and any other required for your data test. The following image example on the left uses the Simple data test unique, while the example on the right uses the created test definition.

      Model test types examples

    caution

    If changes are made to the columns or schemas used in your data test, then Prophecy will delete the data test. For example, if you run into a data mismatch error on the Schema tab of your Target Model or update the schema, then your data test will be affected.

    Run a test

    After you’ve developed your model or column test, you can run it.

    1. From the Data Tests tab of your Target Model, select the data tests that you'd like to run, and then click Run tests to execute your tests. The table input to the Target Model Gem is what’s tested.

      Run model tests

    2. You can click to view the Test Summary, Copy the logs, or Download them. Depending on the outcome of the test, different colored icons are presented.

    Fix a failed test

    If your model or column test fails, you can check the stored failed records using the Store Failures advanced setting. See Configure a test to learn how to set up this setting.

    • Check the test output tables to see your failed rows.
    note

    Make sure you have write permission to create a new table in your data warehouse, otherwise you may run into errors while trying to run your own tests.

    Configure a test

    You can configure your model or column test to help decide which cases to focus on.

    To configure a test, follow these steps:

    1. From the Data Tests tab of your Target Model, hover over the data test that you'd like to configure, and then click the edit icon.

      Edit a test

    2. Click Advanced to open the advanced settings, and then enter the conditional values for the following options:

      Advanced settings

      Continued Advanced settings

      • (A) Filter Condition: Sets a filter for what you want your test to run on.
      • (B) Severity: Determines whether the failure of the test returns an error or warning. The severity operates from the highest priority selection, error, to the lowest, warning. So if you select error, then the test first checks for errors. If it doesn’t find any, then it then checks for warnings. If you select warning, then the test only checks for warnings. If you don’t select a severity, then error is chosen by default. You can set conditions for when to return an error or warning using Error If and Warn If respectively. You can set the number of failed rows to determine an error, or otherwise just return a warning.
      • (C) Store Failures: Stores all records that failed the test. The records are saved in a new table with schema dbt_test\_\_audit in your database. The table is named after the name of the model and data test.
      • (D) Set max no of failures: Sets the maximum number of failures returned by a test query. You can set the limit to save resources and time by having the test stop its query as soon as it encounters a certain number of failed rows.
    3. Click Save.

    Schedule a test

    When scheduling your project, you can opt in to run a test along with the project or model. Scheduling a test allows you to ensure on a daily basis that your data is correct. You can only schedule a test with a project or model. You cannot schedule one individually.

    To Schedule your project to run with tests, follow these steps:

    1. Under the Project Browser, click Add Job. Enter a name for your Job and click Create New.

      Create a job

    2. Drag a Model Gem to your visual canvas.

    3. Click the Model to open the Model Properties.

    4. Select the database object you want to run the test on. You can schedule the entire project or just a single Model:

      • Run entire Project
      • Run a SQL Model

      Schedule a job

    5. Select Run tests.

    6. Check that your Project, model, and Fabric are correct.

    7. Click Save. The Job runs automatically. You can see the Job status by clicking Detail.

    - - + + \ No newline at end of file diff --git a/SQL/data-tests/use-project-tests/index.html b/SQL/data-tests/use-project-tests/index.html index e2eaf095cd..01602901c4 100644 --- a/SQL/data-tests/use-project-tests/index.html +++ b/SQL/data-tests/use-project-tests/index.html @@ -6,15 +6,14 @@ Use project tests | Prophecy - - - - + + +
    Skip to main content

    Use project tests

    Project tests are custom data tests that check for a passing condition, such as checking that the given query doesn’t return any unexpected rows. They are based on dbt singular tests, but, as a data user, you don’t need to know dbt to use project tests in Prophecy. Prophecy makes it easy set up, configure, and schedule project tests.

    Project tests perform a SQL query that checks the executed result of your project against a passing condition to see if it's successful. This is done by checking whether a particular table, generated by combining one or more database objects from your project, meets a certain condition.

    note

    There could be many tests within a project. Each test is checking a table created by your choice of input models, data sources, and transformation Gems.

    By default, and most often, the condition for a project test is simply a check that the given query does not return any failing rows.

    Set up a test

    Since project tests share the same canvas as models, you can set them up the same way you would develop and run a model.

    Develop a test

    To develop a project test, start by opening a project:

    1. Under the Project Browser, click Add Test.

      Add a new test

    2. Enter your Test Name. Project tests are saved to a tests SQL file in the Git directory by default.

    3. Click OK to create your new test.

    4. Create your test model by dragging and dropping Gems to your visual canvas. Connect them to your Data Test. You can also write your test on the Code view. You can use as many models and other database objects as you’d like in your test.

      Project test canvas

    Run a test

    After you’ve developed your project test and executed all of your models, you can run the test.

    1. Click the Play button on either the canvas or an individual Gem to execute your test. The table input to the data test Gem is what’s tested.

      1. Run a whole test. Clicking the canvas Play button executes the complete test and shows the interim data after the Data Test Gem. Additionally, it displays the final test status, succeeded, warning, or failed, in a summary.

      2. Run a partial test. Clicking the Gem Play button executes the test SQL up to a particular Gem, and displays that Gem’s data on the output. This option doesn’t execute the data test.

    2. Click See Run Details to view the test Summary. Depending on the outcome of the test, the icon displays a different color.

      See Run Details

      You can click to expand the test logs in the Summary to view the dbt logs.

      View test summary

    In addition to the previous relational integrity test, you can create tests to check that the total payment amount from customers is positive or simply check that all of your tables are still accessible. You can test any series of transformation Gems, because a project test simply asserts that the resulting table meets a certain criteria.

    Fix a failed test

    If your project test fails, check your test model for any changes.

    • Click the test output to see your failed rows.

      View test summary

    Some examples of test failure causes include:

    • Happy path: There is now a row in the table → you have detected the thing you were trying to detect!
    • The table no longer exists since one or more input data sources were deleted or inaccessible
    • The Failure Calculation function is invalid
    • The Error If and Warn If conditions are invalid
    note

    Make sure you have write permission to create a new table in your data warehouse, otherwise you may run into errors while trying to run your own tests.

    By default, project tests are configured to fail if the table has one or more rows. You can also modify the passing condition of your test through advanced options.

    Configure a test

    You can configure your own passing condition of your project test to help decide which cases to focus on.

    You might need to pay more attention as the number of rows in a table increases. For example, you might have a customer churn use case where many customers have a few comments (represented by rows in a table), but you need to change course when the number of customer comments exceeds a threshold of five comments. Configuring your test can help you identify and take action, such as assigning more resources to assist that customer.

    To configure a test, follow these steps:

    1. Click on the test to open the test details.

    2. Enter conditional values for the following options:

      Configure a test

      • (A) Failure Calculation: Sets the failure condition used to run against the test result. You can use the count() function on a column or multiple columns.
      • (B) Limit: Sets the maximum number of failures returned by a test query. You can set the limit to save resources and time by having the test stop its query as soon as it encounters a certain number of failed rows.
      • (C) Severity: Determines whether the failure of the test returns an error or warning. The severity operates from the highest priority selection, error, to the lowest, warning. So if you select error, then the test first checks for errors. If it doesn’t find any, then it then checks for warnings. If you select warning, then the test only checks for warnings. If you don’t select a severity, then error is chosen by default.
      • (D) Error If and (E) Warning If: Sets the number of failed rows to determine a failed test. Depending on the selected severity, your test only returns a failed test for error checks. Warning won’t return a failed test.
    3. Click Save.

    Schedule a test

    When scheduling your project, you can opt in to run a test along with the project or model. Scheduling a test allows you to ensure on a daily basis that your data is correct. You can only schedule a test with a project or model. You cannot schedule one individually.

    To Schedule your project to run with tests, follow these steps:

    1. Under the Project Browser, click Add Job.

    2. Drag a Model Gem to your visual canvas.

    3. Click the Model to open the Model Properties.

    4. Select the database object you want to run the test on. You can schedule the entire project or just a single Model:

      • Run entire Project
      • Run a SQL Model

      Add a schedule job

    5. Select Run tests.

    6. Check that your Project, model, and Fabric are correct.

    7. Click Save. The Job runs automatically. You can see the Job status by clicking Detail.

    - - + + \ No newline at end of file diff --git a/SQL/development/code-editor/index.html b/SQL/development/code-editor/index.html index 05e7057ecf..97dae3a04b 100644 --- a/SQL/development/code-editor/index.html +++ b/SQL/development/code-editor/index.html @@ -6,15 +6,14 @@ Code editor | Prophecy - - - - + + +
    Skip to main content

    Code editor

    You can use SQL and advanced macros through a code-based editor. Prophecy parses your code and visualizes it on an editable canvas and ensures both views remain in sync at all times.

    Code view

    As a visual developer, you'll appreciate the drag-n-drop canvas, but sometimes it's also nice to view the code. Prophecy creates highly performant code behind the scenes. Click the Code View to reveal the SQL queries we've generated using our visual design editor. Each Gem is represented by a CTE or subquery.

    Code View

    You may wish to edit the code view. Add a SQL statement in the code view and notice the visual editor displays the updated code. For example, we've added a limit statement in the code view, and a new limit Gem appears in the visual view.

    - - + + \ No newline at end of file diff --git a/SQL/development/index.html b/SQL/development/index.html index b1314ca559..96383907a8 100644 --- a/SQL/development/index.html +++ b/SQL/development/index.html @@ -6,15 +6,14 @@ Development | Prophecy - - - - + + +
    Skip to main content

    Development

    Prophecy Data Transformation Copilot for SQL combines the best of both worlds; high-quality code based on software engineering best practices with a complete, easy-to-use visual environment.

    Visual = Code for easy collaboration

    Visual = Code allows both SQL coders and business users to easily collaborate on the same project.

    Code = Visual

    Prophecy Copilot for SQL features two editors:

    CodeEqualsVisual

    1. Visual Editor - enables data practitioners to easily see and modify the data model definitions in a graphical way. The visual graph is perfect to quickly grasp the purpose and inner workings of the existing models. It’s also a great way to develop brand new models with step-by-step interactive execution. All models developed through the visual interface will be written in a highly standardized format.

    2. Code Editor - enables the users already familiar with SQL to optimize and understand their queries in-depth. New data practitioners can also learn best practices as they develop their models by sneak-peaking into the code editor in real-time.

    No matter which editor you prefer, Prophecy features a Code = Visual interface that allows teams to collaborate on both interfaces at the same time. Any changes made in the Visual Editor generate high-quality code on Git. And, any changes in the Code Editor can be visualized back as a visual graph.

    Do I have to save my work progress?

    No! Prophecy automatically saves your work as you develop in a working directory securely stored on the cloud. Just make sure to commit your changes every once in a while, to see your code reflected on your Git and to collaborate easier with your team.

    Are all SQL and dbt constructs supported?

    Vast majority of dbt and SQL constructs are supported within the Prophecy Visual editor. However, if something is not yet supported, don’t worry!

    • For SQL - Prophecy automatically maps unsupported SQL statements to a generic SQL Gem, allowing you to still freely edit even unsupported code.
    • For dbt - Features in dbt-core that may lack their visual-editor alternatives still work as expected from the standard dbt cli. Prophecy will never modify your existing codebase in unsupported ways.

    Interactive development

    At any step of the process, data users can interactively run their models to make sure they're going in the right direction. Models can be additionally tested to ensure robustness over time. Power users can also extend the visual canvas through custom Gems; making even the most complex logic easily accessible in the visual view.

    Deployment from code on Git

    Projects built through Prophecy are stored in the dbt Core format as repositories on Git, which allows data teams to follow best software engineering practices like CI/CD.

    Maintenance is simple since Prophecy gems turn into code on Git that’s always up-to-date with the latest version of the warehouse or lakehouse used. And, to ensure the best performance at all times, Prophecy is smart about which code construct (subquery or CTE) to use.

    Sharing of Projects as Packages

    Data users can import an existing dbt Core project or start from scratch. They can publish those projects to other teams or subscribe to already existing ones. Projects published as packages contain models, functions and Gems allowing for code reusability at every level.

    Learn more

    A word from Prophecy's co-Founder, Maciej! See how Prophecy allows every team, whether visual or code developers, to use the same software development best practices.

    Hands-on

    To get started developing SQL models, check out the pages below or try this guide for Databricks or Snowflake.

    As you're building your Model by adding individual Gems, see how the process is going by interactively running the model. To learn more, see Data Explorer.

    - - + + \ No newline at end of file diff --git a/SQL/development/target-models/index.html b/SQL/development/target-models/index.html index fa4a036551..34b7a3b3c3 100644 --- a/SQL/development/target-models/index.html +++ b/SQL/development/target-models/index.html @@ -6,15 +6,14 @@ Target Models | Prophecy - - - - + + +
    Skip to main content

    Target Models

    The Target Model Gem has different tabs that help you set advanced options and make it easy for you define how you want to materialize your data using write formats. You can see the UI and capabilities by opening the Target Model in your SQL projects.

    These simplified options allow you to use dbt Core™ properties without the need to have any dbt knowledge.

    Target Model tabs

    The tabs within the Target Model include the following:

    • Type & Format: Update the format of the model between different types of materializations
    • Location: Update the location by overwriting the Database, Schema, or Alias
    • Schema: Make schema changes
    • SQL Query: View and enable your custom SQL query
    • Write Options: Use Write Modes such as Overwrite, Append, and Merge

    Target Model tabs

    dbt advance settings

    You can find advance settings are available for defining model write formats and other DBT options. These include all dbt native settings, traditionally accessible from the yaml file.

    If you're familiar with dbt, then you can navigate to and update the advance settings.

    • Click ... > Advance Settings. The advance setting dialog opens.

    Advance Settings

    The object properties describe everything from physical locations, materialization logic, business metadata, and access control definitions.

    dbt properties mapping

    The following table maps all of the dbt properties for Databricks and Snowflake to the tabs of Prophecy’s Target Model:

    dbt property nameProviderTarget Model tab
    file_formatDatabricksType & Format
    aliasAllLocation
    databaseAllLocation
    location_rootDatabricksLocation
    contractAllSchema
    enabledAllSchema
    schemaAllSchema
    tagsAllSchema
    cluster_bySnowflakeSchema
    partition_bySnowflakeSchema
    clustered_byDatabricksSchema
    bucketsDatabricksSchema
    transientSnowflakeSchema
    query_tagSnowflakeSchema
    automatic_clusteringSnowflakeSchema
    snowflake_warehouseSnowflakeSchema
    copy_grantsSnowflakeSchema
    secureSnowflakeSchema
    target_lagSnowflakeSchema
    docsAllSchema
    groupAllSchema
    metaAllSchema
    persist_docsAllSchema
    full_refreshAllWrite Options
    incremental_predicatesAllWrite Options
    incremental_strategyAllWrite Options
    materializedAllWrite Options
    on_schema_changeAllWrite Options
    merge_exclude_columnsSnowflake, SparkWrite Options
    merge_update_columnsSnowflake, SparkWrite Options

    What's next

    To continue configuring your Target Model, see the following pages:

    - - + + \ No newline at end of file diff --git a/SQL/development/target-models/location/index.html b/SQL/development/target-models/location/index.html index 0392ca01b3..23b9651a27 100644 --- a/SQL/development/target-models/location/index.html +++ b/SQL/development/target-models/location/index.html @@ -6,15 +6,14 @@ Location | Prophecy - - - - + + +
    Skip to main content

    Location

    You can use the Location tab to overwrite the Database, Schema, or Alias of your Target Model.

    The full location of a table, view, and other objects consists of a database, schema, and alias. dbt allows its users to overwrite any of those parts of the location, with a combination of macros and additional project, folder, and model-level configurations. This tab simplifies those dbt object location parts.

    Overwrite location

    Prophecy allows you to overwrite the schema behavior, and if you wish, define it from scratch.

    You can select how you want to store the table, choosing the location by overwriting the properties. The defaults are automatically provided for you.

    • Location: The final location may vary depending on the model's execution environment.

    For the following locations, toggle Overwrite to add your own overwrite macro:

    • (A) Database: Optional, if Unity Database is enabled. By default, it's determined by the Fabric connection. You can overwrite it.
    • (B) Schema: The default schema is determined by the Fabric connection. You can overwrite it.
    • (C) Alias: Alias is the name of the created table or view. By default, it's equivalent to model name. You can overwrite it.

    Location

    - - + + \ No newline at end of file diff --git a/SQL/development/target-models/schema/index.html b/SQL/development/target-models/schema/index.html index 144e3e736c..e2d40bbd1d 100644 --- a/SQL/development/target-models/schema/index.html +++ b/SQL/development/target-models/schema/index.html @@ -6,15 +6,14 @@ Schema | Prophecy - - - - + + +
    Skip to main content

    Schema

    You can use the Schema tab to view the schema, and add any additional properties.

    Schema

    Generic Properties

    You can use the Generic Properties to do the following:

    • add a description
    • apply Dataset tags to the resource

    Schema

    You can use Schema to do the following:

    • reorder the rows
    • change the Type
    • expand the row to
      • add a description
      • apply column tags
      • enable or disable quoting for column names
    - - + + \ No newline at end of file diff --git a/SQL/development/target-models/sql-query/index.html b/SQL/development/target-models/sql-query/index.html index 4b156af517..084929c583 100644 --- a/SQL/development/target-models/sql-query/index.html +++ b/SQL/development/target-models/sql-query/index.html @@ -6,16 +6,15 @@ SQL Query | Prophecy - - - - + + +
    Skip to main content

    SQL Query

    You can use the SQL Query tab to view and enable your custom SQL query at the end of your Target Model. A custom SQL query enables you to perform one last data transformation step as a SQL query, before creating the model.

    This is useful if you import your own codebase and your final query has additional conditions. Your query is saved here for you to view and edit. For low code users you can use a Filter Gem to achieve the same results.

    Enable custom SQL query

    You can add a custom SQL query if you're doing data processing with advance Jinja or dbt templating. This gives you flexibility when doing last mile operations for your SQL models. To that end, we support the use of declared variables directly in the SQL queries for those last mile operations on each model write.

    Ports

    Ports represent the tables or Gems that you want to use in the SQL query. You can access them using the table aliases, or port names. The visual order of the ports defines the order of the variables.

    You can edit or add Ports for Input and Output.

    SQL Query

    You can enable customer SQL query on the SQL Query tab.

    • Toggle Enable custom SQL query to enable your custom SQL query at the end of your model.

    SQL Query

    Your SQL query will appear as a normal string in the Code view.

    SELECT *

    FROM customers_raw

    WHERE customer_id > {{ id_threshold }}

    You can use your declared dbt variables in the SQL query.

    Declare variables

    The variable declaration interface allows you to configure variables directly in the SQL query for your Target Model.

    Declared variables are accessible by clicking configuration to add the variable of interest.

    • You can declare the variables under ... > Configuration using key-value pairs. Variables can be defined at the model or project level.

    Configuration

    You can then use the variable, along with standard dbt functions, in the Target Model SQL Query tab.

    - - + + \ No newline at end of file diff --git a/SQL/development/target-models/type-and-format/index.html b/SQL/development/target-models/type-and-format/index.html index f86460843a..ac63765b83 100644 --- a/SQL/development/target-models/type-and-format/index.html +++ b/SQL/development/target-models/type-and-format/index.html @@ -6,15 +6,14 @@ Type and Format | Prophecy - - - - + + +
    Skip to main content

    Type and Format

    You can use the Type & Format tab to update the format of the model between different types of materializations. This determines the underlying physical format of your Target Model.

    You can select the type of data you want to write from. The Warehouse represents all native warehouse and optimized data formats.

    Materialization types

    The model materialization types include the following:

    • View: View models are rebuilt as a view on each run. Views built on top of source data always have the latest records, but they don't store any additional data. So they're simply queries based on other tables. Use the view materialization type for models that don't do significant transformations. This is the default type.

    • Table: Table models are rebuilt as a table on each run. Tables are fast to query, but they can take a long time to rebuild. Use the table materialization type for any models that are queried by BI tools. To apply a write mode to your Target Model, you must use the Table type.

    • Ephemeral: Ephemeral models aren't built into the database. Instead, the model code is taken from an ephemeral model and inserted into its dependent models using a common table expression (CTE). Use the ephemeral materialization type for undemanding transformations that appear at the start of your DAG.

    • Materialized View: Materialized View models are a combination of a view and a table. They are similar to incremental models in that they serve similar use cases. A materialized view model allows you to create and maintain a materialized view in the target database Warehouse type. This is not supported in Snowflake.

      Type & Format

    - - + + \ No newline at end of file diff --git a/SQL/development/target-models/write-options/index.html b/SQL/development/target-models/write-options/index.html index 055367b567..a36ca297b0 100644 --- a/SQL/development/target-models/write-options/index.html +++ b/SQL/development/target-models/write-options/index.html @@ -6,10 +6,9 @@ Write Options | Prophecy - - - - + + +
    @@ -18,7 +17,7 @@ SCD 2
    1. Under Updated at, click Add Column and select UPDATED_AT.
  • If you select Determine new records by looking for differences in column values: SCD 2
    1. Click + and select SHIPPING_STATUS.
  • Click Save.
  • Run the Target Model and check that the merge approach returns data properly.
  • Use delete and insert

    Use delete and insert to replace outdated data efficiently. It deletes existing records and inserts new ones in a single operation, insuring your Dataset remains up-to-date.

    Use delete and insert

    For example, imagine you have an ORDERS table where you want to replace outdated SHIPPING_STATUS data.

    To use delete and insert, follow these steps:

    1. Open your Target Model and navigate to the Write Options tab.
    2. For the Write Mode, select Merge.
    3. For the Merge Approach, select Use delete and insert.
    4. Under Merge Condition, set the Unique Key to SHIPPING_STATUS.
    5. Optional: Toggle to Use Predicate.
    6. Optional: Toggle to Use a condition to filter data or incremental runs.
    7. Under Merge Columns, there's no need to set the merge columns for incremental strategy delete and insert.
    8. Under Advanced, select an option for On Schema Change.
    9. Click Save.
    10. Run the Target Model and check that the merge approach returns data properly.

    Insert and overwrite

    Use insert and overwrite to overwrite existing records and insert new ones in a single operation, insuring your Dataset remains accurate.

    Insert and overwrite

    For example, imagine you have a CUSTOMERS table where you want to replace all partitions by CUSTOMER_ID.

    To use insert and overwrite, follow these steps:

    1. Open your Target Model and navigate to the Write Options tab.
    2. For the Write Mode, select Merge.
    3. For the Merge Approach, select Insert and overwrite.
    4. Under Merge Condition, set Partition By by selecting CUSTOMER_ID.
      info

      dbt will run an atomic insert overwrite statement that dynamically replaces all partitions included in your query. If no partition_by is specified, then the insert and overwrite strategy will atomically replace all contents of the table, overriding all existing data with only the new records. The column schema of the table remains the same.

    5. Under Merge Columns, there's no need to set the merge columns for incremental strategy delete and insert.
    6. Under Advanced, select an option for On Schema Change.
    7. Click Save.
    8. Run the Target Model and check that the merge approach returns data properly.
    - - + + \ No newline at end of file diff --git a/SQL/development/visual-editor/index.html b/SQL/development/visual-editor/index.html index 8062cc8ab5..1a5272a125 100644 --- a/SQL/development/visual-editor/index.html +++ b/SQL/development/visual-editor/index.html @@ -6,17 +6,16 @@ Visual editor | Prophecy - - - - + + +
    Skip to main content

    Visual editor

    Prophecy’s visual interface is where you can build data pipelines using a drag and drop interface or with SQL code. As a business user, you can visually create your SQL data models with Prophecy's visual canvas, with all of your work automatically turning into high-quality SQL code on Git.

    Visual Canvas

    The visual canvas is the main place to develop data models. Open any SQL Project in Prophecy to find the complete end-to-end Project Lineage. The Project Lineage provides a quick understanding of how Models refer to other Models, Seeds, or Sources. This quick, high-level project overview is explainable without having to read code.

    ProjectLineage

    The Project Browser displays entities available or used within this Project. Just click the Environment tab to browse through available databases and tables. Each Project connects to a Fabric, or execution environment. This Fabric defines the SQL Warehouse where each Model will materialize a single table or view. There are lots of additional features to learn more about, including configurations and committing code to Git. A single Project page contains lots of capabilities!

    From here you can easily create new models by clicking on + next to the Models pane, or you can edit existing Models. AddModel

    Once a Model is open, the model-editing canvas appears.

    Canvas

    Here we can see the customers Model starts with three existing Models. The data is transformed according to Aggregate, SQLStatement, and Join Gems. The available Transformation Gems are available by clicking the dropdown menu. As you develop the Model, you can iteratively run and see sample data as well as the relevant logs.

    What's next

    To continue developing with the visual editor, see the following pages:

    - - + + \ No newline at end of file diff --git a/SQL/development/visual-editor/variant-schema/index.html b/SQL/development/visual-editor/variant-schema/index.html index b1b470d955..d7a6bc423a 100644 --- a/SQL/development/visual-editor/variant-schema/index.html +++ b/SQL/development/visual-editor/variant-schema/index.html @@ -6,15 +6,14 @@ Variant schema | Prophecy - - - - + + +
    Skip to main content

    Variant schema

    You can use Prophecy to convert your variant schemas into flat, structured formats to make them easier to understand and use for analytics. This is available for when you want to determine the variant schema of your Snowflake array or object.

    Using the variant schema functionality, you can do the following:

    • Infer the variant schema
    • Configure the parsing limit for inferring the column structure
    • Use a nested column inside of the Visual Expression Builder

    Inferring the variant schema

    Variant schemas are not stored within the table definition and can vary for each row, making them difficult to infer and use. Fortunately, you don't have to infer the schema yourself. You can use the column selector inside of your Gems to automatically infer the variant schema, explore the multi-type variant structure, and later select a nested column to use in your transformations.

    To automatically infer the variant schema, start by opening a Gem that uses a variant column input.

    • Select the variant column, and click Infer Schema.

    Prophecy will automatically detect and identify the variant types in your input data.

    Schema and column selector

    note

    The inferred schema is cached so that you can use it again in the future whenever you reopen the Model, Gem, or another Gem connected to the same input port. Check the bottom of the column selector to see the last time the variant schema was inferred.

    To refresh the schema, simply click Infer Schema again.

    Editing the variant schema

    After you infer the schema, you can click Edit Schema to view the variant schema and make edits to it. Use the Type dropdowns to manually choose the data type of each nested schema.

    Edit schema view

    Editing the variant schema is useful in cases where not all of the schema cases were covered while sampling the records.

    Variant sampling setting

    When Prophecy infers the variant schema, it samples the records to identify all potential iterations of keys and values within the schema.

    The default number of records that Prophecy parses to understand the nested data schema is 100. You can update this limit under the Development Settings, which you can navigate to by clicking ... > Development Settings.

    Variant sampling setting

    We recommend that you increase the limit for small structures, or decrease it for larger ones.

    note

    This setting does not rely on the ratio of the data since that would require a complete count of the data records.

    Adding a nested column

    With in the column selector, you can add a nested column by clicking Add Column next to the input field name.

    Add column

    When adding a column nested within a variant, the output column name, expression, and data type are automatically generated according to the following rules:

    • Column name: The column name matches the input field name, and is prefixed with the parent field path. If there's a conflict, Prophecy appends numbers starting with _0 until it becomes unique.

      For example, if the column name customers_name already exists, the new field might be named customers_name_0.

    • Expression: The expression represents the full path to the selected field, and uses existing flattened subpaths.

    • Data type: The data type is automatically CAST to the closest inferred type.

    Default casting

    Prophecy automatically adds a CAST to any column you add from a nested type. By default, the column is cast using the standard CAST(x AS y) syntax.

    In some cases, a path within a variant may hold different value types across rows. For instance, consider a Dataset where each row’s value key contains different data types, such as integer, object, and boolean.

    Prophecy supports this scenario by presenting each detected data type for a given key, array, or object as a separate item in the column selector. When you add one of those columns to the expression, we use explicit casting, which may error out if the cast is not possible. You can change this behavior by using TRY_CAST, which returns null if the cast is not possible.

    - - + + \ No newline at end of file diff --git a/SQL/development/visual-editor/visual-expression-builder/index.html b/SQL/development/visual-editor/visual-expression-builder/index.html index 76035c3979..d535de4070 100644 --- a/SQL/development/visual-editor/visual-expression-builder/index.html +++ b/SQL/development/visual-editor/visual-expression-builder/index.html @@ -6,15 +6,14 @@ Visual Expression Builder | Prophecy - - - - + + +
    Skip to main content

    Visual Expression Builder

    In order to perform data analysis tasks, it's important to be able to construct expressions that combine the SQL functions in various ways. Prophecy makes this easy with the support of the Visual Expression Builder. The Visual Expression Builder takes you through building your expressions, following a step-by-step visual guide.

    Visual Expression Builder

    Using the SQL Visual Expression Builder can save you time and effort when constructing complex expressions, and can help you to better understand the relationships between different functions and their arguments.

    Supported Gems and features

    You can use the simplified Visual Expression Builder within the following data transformation Gems:

    • Aggregate
    • Filter
    • Join
    • Reformat

    And you can use it within the following Prophecy features:

    • Data Explorer
      • Filter and Sort Options
    • Data Tests

    Expression types

    You can use the Visual Expression Builder to build expressions in accordance with the following expression types:

    • Static (native to SQL):
      • Column selection - e.g. customer_id, amounts
      • Hardcoded value (based on the listed types) - e.g. 15, Poland
      • Function call - e.g. concat(amount, " ", currency)
      • Case statement - e.g. WHEN active_flag = True THEN first_name OTHERWISE last_name
    • Dynamic (native to Prophecy):
      • Configuration value - e.g. $currency
      • Secret value - e.g. $jdbc_url

    Expression syntax

    As a data user, you'll never again need to remember the right syntax for your expressions. The Visual Expression Builder takes care of the semantics and syntax for you as you build your expressions.

    If you're interested, you can check the syntax of your expressions by viewing the Code view of your expressions.

    Code Expression Builder

    Converting from code

    If you prefer, you can always choose to continue to write your expressions in the Code view.

    All of the expressions you write in the Code view are converted to visual expressions by the Visual Expression Builder when you view the Visual view.

    Suggesting expressions and functions

    As you build your expressions, Data Copilot can suggest expressions and functions to you, including nested and conditional functions.

    While viewing the Code view of your expressions, you can click Ask AI to generate your expressions using an English text prompt. You can then review the code expressions, view them on the Visual view, and test them by running the Model up to and including the Gem with the expressions.

    Ask AI to generate

    While you're viewing the Code view, you can also take advantage of the Expression Builder by clicking Expand Editor next to any of your expressions.

    What's next

    To continue developing with the Visual Expression Builder, see the following pages:

    - - + + \ No newline at end of file diff --git a/SQL/development/visual-editor/visual-expression-builder/use-the-expression-builder/index.html b/SQL/development/visual-editor/visual-expression-builder/use-the-expression-builder/index.html index e557a1834a..1b2df893f6 100644 --- a/SQL/development/visual-editor/visual-expression-builder/use-the-expression-builder/index.html +++ b/SQL/development/visual-editor/visual-expression-builder/use-the-expression-builder/index.html @@ -6,15 +6,14 @@ Use the Visual Expression Builder | Prophecy - - - - + + +
    Skip to main content

    Use the Visual Expression Builder

    Develop your SQL expressions by using the Visual Expression Builder, which shows you available columns and functions that you can use to build your expressions.

    The Visual Expression Builder is supported wherever you see Visual and Code views within your transformation Gems.

    • Create or open an existing transformation Gem, and select the Visual view.
    note

    The view you select persists across your Gems.

    All of the expressions you build using the Visual Expression Builder are converted to code in the Gem and Model Code views. Similarly, edits you make in the Code view, whether it's in an expression or condition editor, will be converted into Visual view.

    This allows you to take advantage of both Visual and Code views when building your expressions.

    Build an expression

    Using the Visual Expression Builder, you can build an expression using the following modes:

    • Simple Expression: This mode allows you to choose an expression, like a column, function, or Case statement.
    • Comparison: This mode allows you to compare two simple expressions. You have the option of using conditions, such as IF and ELSEIF statements.
    • Grouping: This mode allows you to build complex conditions by allowing the combinations of comparision expressions using logical operators AND or OR.

    See which Gems support which modes in the following table:

    Simple Expression modeComparison modeGrouping mode
    AggregateTickTickTick
    FilterTickTickTick
    JoinTickTickTick
    ReformatTickTickTick

    Build using the Simple Expression mode

    Let's use a Join Gem to build simple expressions.

    Join example

    Join Gem using Expression mode

    In our Join example, we want to join the in0 account table with the in1 expected revenue table matching the account IDs.

    To set up the join condition, follow these steps:

    1. After creating the Join Gem, in the Join section, click +Add Condition. An option to Select expression appears.
    2. Click Select expression and select Column. Search for or click to select in0.id from the populated list. An option to Select operator appears.
    3. Click Select operator and select the Comparison operator equals. An option to Select expression appears.
    4. Click Select expression and select Column. Search for or click to select in1.ACCOUNT_ID from the populated list.

    To set up the simple expressions, follow these steps:

    1. In the Expressions section, click Add Column +. A new expression row appears.
    2. Click target_column, and then search for or click to select ACCOUNT_ID from the populated list.
    3. Click Select expression and select Column. Search for or click to select in0.ID from the populated list.
    4. Repeat steps 1 to 3 to set up the rest of the matching columns.

    Build using the Comparison mode

    Let's use a Reformat Gem and a Aggregate Gem to build comparison expressions.

    Reformat example

    Reformat Gem using Comparison mode

    In our Reformat example, we want to stratify the accounts based on their annual revenues. Each condition we set up is limited to one comparison.

    To set up the comparison expressions, follow these steps:

    1. After creating the Reformat Gem, click target_column, and then search for or click to select ANNUALREVENUE from the populated list.
    2. Click Select expression and select Conditional. A WHEN clause appears.
    3. For WHEN, click Select expression and select Function. Search for and click to select TRY_CAST, which converts a value of one data type into another data type. An option to select source_string_expr appears.
    4. Click source_string_expr and select Column. Search for or click to select ANNUALREVENUE AS FLOAT from the populated list.
    5. Click Select operator and select the Comparison operator less than.
    6. Click Select expression and select Value. Enter 1000000 as the value.
      tip

      Whenever you enter a numerical or boolean value, a checkbox appears on the value dialog giving you the option to Check to read value as string.

    7. For THEN, click Select expression and select Value. Enter Low Revenue as the value.
    8. Click + on the next line and select Add CASE to add another WHEN clause.
    9. Repeat steps 3 to 8 to set up the rest of the comparison expressions.
    10. Click + on the next line and select Add ELSE to add an ELSE statement.
      note

      You can add multiple CASES of the WHEN clause, but you can only have one ELSE statement.

    11. Click Select expression and select Value. Enter Unknown as the value.

    Aggregate example

    Aggregate Gem using Comparison mode

    In our Aggregate example, we want to use other conditional expressions, such as IF, to set a threshold limit for ACCOUNT_ID using a configuration variable.

    To set up additional comparison expressions, follow these steps:

    1. After creating and setting up the initial Aggregate Gem, hover your pointer between two expression rows and click + to add a condition. You're given the option to insert another column or an IF or FOR condition.
    2. Select IF. An IF condition appears.
    3. Click Select expression and select Configuration Variable. Search for or click to select id_threshold from the populated list.
    4. Click Select operator and select the Comparison operator greater than.
    5. Click Select expression and select Value. Enter 50 as the value.
    6. Optional: You can hover your pointer below the express row you just created and click + to add another condition. You're now given additional options to insert an ELSEIF or ELSE condition.

    Build using the Grouping mode

    Let's use a Filter Gem to build grouping expressions.

    Filter example

    Filter Gem using Grouping mode

    In our Filter example, we want to filter for the following:

    • Total expected revenue that is not null
    • Total amounts that are greater than 100000
    • Latest closed quarters that equals 2023Q2 or 2024Q2

    To set up the grouping expressions, follow these steps:

    1. After creating the Filer Gem, click Add condition. An option to Select expression appears.
    2. Click Select expression and select Column. Search for or click to select TOTAL_EXPECTED_REVENUE from the populated list. An option to Select operator appears.
    3. Click Select operator and select the Existence check is not null.
    4. Click + Add Condition. A new expression row appears.
    5. Click Select expression and select Column. Search for or click to select TOTAL_AMOUNT from the populated list. An option to Select operator appears.
    6. Click Select operator and select the Comparison operator greater than. An option to Select expression appears.
    7. Click Select expression and select Value. Enter 100000 as the value.
    8. Click Add Group. A grouped expression row appears.
    9. Click Select expression and select Column. Search for or click to select LATEST_CLOSED_QTR from the populated list. An option to Select operator appears.
    10. Click Select operator and select the Comparison operator equals. An option to Select expression appears.
    11. Click Select expression and select Value. Enter 2023Q3 as the value.
    12. Click + Add Condition and repeat steps 9 to 11 to set up the other OR condition.
      tip

      You can have any number of groups and nestings (a group within a group). And you can change the grouping conditions between AND and OR by clicking on the labels.

    Tips

    Here are some additional tips to keep in mind when using the Visual Expression Builder:

    • The expression dropdowns support search.
    • Each argument of your function is another expression since you have the same expression options to choose from.
      • You can add optional arguments to your functions.
    • You can drag and drop your comparison expressions.
    • Just as with conditions, you can also drag and drop your grouping expressions.
    • You can delete individual expressions, conditions, and groupings by clicking the trash icon at the end of the rows.

    Run and Verify the output

    Run the Pipeline up to and including the Gem with your expression, and observe the resulting data sample.

    • Click the Play button on either the canvas or the Gem.

    Once the code has finished running, you can verify the results to make sure they match your expectations. This Data is same as what you see in interims view. By testing and verifying your expressions, you can ensure that your data analysis tasks are accurate and reliable.

    - - + + \ No newline at end of file diff --git a/SQL/development/visual-editor/visual-expression-builder/visual-expression-builder-reference/index.html b/SQL/development/visual-editor/visual-expression-builder/visual-expression-builder-reference/index.html index 2f610a14bc..985fd43937 100644 --- a/SQL/development/visual-editor/visual-expression-builder/visual-expression-builder-reference/index.html +++ b/SQL/development/visual-editor/visual-expression-builder/visual-expression-builder-reference/index.html @@ -6,15 +6,14 @@ Visual Expression Builder reference | Prophecy - - - - + + +
    Skip to main content

    Visual Expression Builder reference

    This page contains a reference of the different Visual Expression Builder components, which include the expression options, operator options, and data types.

    Expression options

    The Visual Expression Builder supports the following expression options:

    • Column: Allows you to select an input column from your source tables. You can view all of the available input columns from under the dropdown menu or under Input on the left-hand side of the Gem dialog.
    • Value: Allows you to enter any kind of value.
      • If you enter a string value, it'll be considered as a string within quotes.
      • If you enter a number, it'll be considered as a numerical value, but you have the option to click to Check to read value as string.
      • The same applies to a boolean value. For example, if you enter True, then it'll be considered a boolean value unless you Check to read value as string.
    • Function: Includes a list of all of the function category groups and functions that are supported. The list displays each function description, including mandatory arguments.
    • Data type cast: Allows you to cast a variant column into its appropriate data type. Instead of explicit casting, you can use TRY_CAST to avoid errors by setting the data type to null on failure.
      note

      For Snowflake, TRY_CAST is only supported on string type of data.

    • Conditional: Allows you to use a conditional WHEN clause.
      • Within WHEN, you use a comparison expression.
      • Within THEN you use a simple expression.
      • You can add multiple CASES of the WHEN clause, but you can only have one ELSE statement.
        • ELSE also uses a simple expression.
      • You can also add IF, ELSEIF, or FOR conditions between each of your expressions.
        • FOR conditions take a variable name and an expression value.
        • IF and ELSEIF conditions are considered comparisons.
        • These are available only in expressions tables in Aggregate, Join, and Reformat Gems.
    • Configuration Variable: Consists of Model Variables and Project Variables. You can see and edit your variables from the canvas settings by navigating to ... > Configuration. When you select a Project Variable, you can add a default value if no value is set in the Configuration setting.
    • Incremental: Allows you to use for advanced dbt configurations.
    • Custom Code: Allows you can write your own custom code to create your own expressions that are not yet supported by the Visual Expression Builder. For example, you can use custom code to use mathematical operations, such as addition and subtraction. As you type, you'll be given suggestions.

    Operator options

    The Visual Expression Builder supports the following operator options.

    Comparison operators

    Expressions can use the following comparison operators:

    • equals
    • not equals
    • less than
    • less than or equal
    • greater than
    • greater than or equal
    • between

    Existence checks

    Expressions support the following existence checks:

    • is null
    • is not null
    • in
    • not in

    Data types

    The Visual Expression Builder supports the following data types:

    • Basic:
      • Boolean
      • String - String / Varchar
      • Date & time - Date / Datetime / Timestamp / Timestamp NTZ
      • Number - Integer / Long / Short
      • Decimal number - Decimal / Double / Float
    • Other:
      • Binary
      • Byte
      • Char
      • Calendar interval / Day time interval / Year month interval
      • Null
      • Variant

    Booleon predicates

    Expressions support the following boolean predicates:

    • Unary:
      • Exists (in subquery)
      • In
      • Is null
    • Binary:
      • Between
      • Equality
      • Less than
      • Then than or equal
      • Greater than
      • Greater than or equal
    • Groups:
      • Not
      • And
      • Or
    - - + + \ No newline at end of file diff --git a/SQL/execution/data-explorer/index.html b/SQL/execution/data-explorer/index.html index 6d04bf486d..a2e94166e2 100644 --- a/SQL/execution/data-explorer/index.html +++ b/SQL/execution/data-explorer/index.html @@ -6,15 +6,14 @@ Data Explorer | Prophecy - - - - + + +
    Skip to main content

    Data Explorer

    The Data Explorer feature empowers users to explore and analyze their data samples directly within the user interface (UI). With this feature, users can gain insights, verify data accuracy, and make informed decisions.

    Interims are Data Samples

    Within any (1) SQL Model, interactively (2) Run upto one of the Gems. Notice the (3) Data Interim appears. Open the interim (data sample) to inspect the data.

    DataExplorationSQL

    The interim reveals the sample data after the Join in this example. This is a great way to confirm the structure for each column, review data entries in the first few rows, and make sure the datatypes are correct.

    Filter and Sort Options

    Users can now apply filters and sort rows based on any column, providing enhanced visibility and improved data analysis.

    info

    After applying or modifying any filter or sort criteria, you need to click the Run button to update the displayed data.

    Column Visibility Filtering

    By clicking on the ellipsis ... icon, users can conveniently filter the columns visible in the UI. No need to rerun the process to reflect these changes in the UI.

    Download Data

    Users can now download the data visible in the UI in CSV format using the dedicated download button.

    Experience seamless data exploration and analysis with the Data Explorer feature, enabling users to effortlessly navigate and understand their data directly from the user interface.

    - - + + \ No newline at end of file diff --git a/SQL/execution/index.html b/SQL/execution/index.html index 0ebbc23025..e9975fd7b9 100644 --- a/SQL/execution/index.html +++ b/SQL/execution/index.html @@ -6,15 +6,14 @@ Execution | Prophecy - - - - + + +
    Skip to main content
    - - + + \ No newline at end of file diff --git a/SQL/extensibility/dependencies/index.html b/SQL/extensibility/dependencies/index.html index d9e5e784ee..d5a5fb8126 100644 --- a/SQL/extensibility/dependencies/index.html +++ b/SQL/extensibility/dependencies/index.html @@ -6,16 +6,15 @@ Dependencies | Prophecy - - - - + + +
    Skip to main content

    Dependencies

    Dependencies allow you to make use of third-party or custom code in your Models and Jobs. You can connect dependencies to your SQL projects, and create them from the DBT Hub, GitHub, or another Prophecy Project.

    View dependencies

    To see all of your Project and Model dependencies simply open the Manage Dependencies screen by clicking ... > Dependencies.

    View your dependencies

    You can also view your dependencies and get to the same Manage Dependencies screen from the bottom left-hand side of the canvas, under DEPENDENCIES.

    View your dependencies

    Add dependency

    To add a dependency, first navigate to Manage Dependencies. There you will see a complete list of your dependencies. Click on + Add Dependency to add a new dependency.

    You will be taken to the Create Dependency screen, where you can choose to create a dependency from the DBT Hub, GitHub, or another Prophecy Project.

    Add a Dependency

    After you've defined the Dependency, click Create. Prophecy will validate the dependency and add it to the Project, making it enabled in the current Model, and adding it as a dependency within all of the other Models in the Project.

    When adding dependencies, Prophecy validates that the dependency coordinates are valid and accessible. If that fails, you will see an invalid coordinates error.

    Invalid coordinates

    In rare cases, your dependency might be only accessible to the cluster or the build system but not Prophecy itself. If you're confident that the dependency is correct, but the validation error shows up, it's safe to press Save Anyways to ignore that warning.

    Add a DBT dependency

    To add a DBT dependency, you must select DBT Hub and provide the dbt package and version number.

    Add a DBT Dependency

    You can find the DBT package and version number for DBT dependencies from the dbt Package hub. For example, to use the latest dbt_utils package, see the dbt_utils package page.

    dbt_utils dependency

    You can copy and paste the information into the Create Dependency fields.

    Add a GitHub dependency

    To add a GitHub dependency, you must select GitHub and provide the Git Repository, Revision, and Sub Directory.

    Add a GitHub Dependency

    The Revision must be either a Git tag, commit hash, or branch name.

    caution

    Enable Warn unpinned only if you want to point to your GitHub Repository without specifying any version, commit, or branch. Doing so may result in unexpected behavior if there are changes to your latest default branch.

    Add a Prophecy Project dependency

    To add another Prophecy Project as a dependency, you must select Prophecy Project and select a project from the Project Depedencies dropdown.

    note

    The Prophecy Project must already be released for it to show up in the dropdown.

    Add a Prophecy Project Dependency

    Use a dependency

    Once you've added a dependency, you can use the following entities from them:

    • Models
    • Seeds
    • Sources
    • Functions
    • Gems
    • Data Tests

    Use a dependency

    Simply drag and drop the entity that you'd like to use from the dependencies section onto your Visual canvas.

    Delete dependency

    To delete a dependency, first navigate to the dependencies list on the Manage Dependencies screen. There you can click on a trash icon next to the dependency that you'd like to delete.

    Deleting a dependency

    Deleting a dependency deletes it within the whole Project, and all inheriting Models. If you're not confident about the functionality of the other Models, it's usually better to disable a dependency, instead of deleting it.

    Storage

    All of your dependencies are stored at the Project-level. When adding a dependency to a single Model, by default it becomes available to all the other Models within the same Project.

    Prophecy takes care of pulling the dependencies automatically when a Model is run.

    Dependencies are saved within your packages.yml file.

    - - + + \ No newline at end of file diff --git a/SQL/extensibility/gem-builder/index.html b/SQL/extensibility/gem-builder/index.html index 063ffdd903..c82cca15bc 100644 --- a/SQL/extensibility/gem-builder/index.html +++ b/SQL/extensibility/gem-builder/index.html @@ -6,17 +6,16 @@ Gem builder | Prophecy - - - - + + +
    Skip to main content

    Gem builder

    Enterprise Only

    Please contact us to learn more about the Enterprise offering.

    Each Prophecy Model is composed of individual operations, or Gems, that perform actions on data. While Prophecy offers dozens of Gems out-of-the-box, some data practitioners want to extend this idea and create their own Gems. Gem builder allows enterprise users to add custom Gems. You can create custom source, target, and transformation Gems, and then publish them for your team to use.

    Our SQL Gem builder supports Databricks and Snowflake SQL. It's built on dbt Core™, allowing you to build upon existing dbt libraries to define new macros to use in your custom Gem.

    You can create a Gem that writes a reference to either of the following options:

    • a new user-defined macro
    • an existing macro present in a dependency (such as dbt-utils)

    Getting Started

    You can get started with creating your own Gem by completing the following steps:

    1. Open a SQL project, and the click Add Gem.

      Gem builder new Gem

    2. Enter a Gem Name, choose a Category, and verify the Directory Path. Then click Create. The Gem is automatically set to save in macros/ as gem_name.py and gem_name.sql files.

    Now you can customize the Gem using the split-screen code editor. See the following Creating a Gem section to learn how to define your Gem.

    Creating a Gem

    A Gem is made up of multiple components that determine the UI and logic of the Gem. The Gem builder breaks up these components into steps for you while you create your Gem.

    Gem components

    There are two types of Gems that you can create while using the Gem builder:

    • DataSource Gems: These Gems enable the reading and writing of data from or to various data sources.
    • Transform Gems: These Gems apply transformations/joins/any other custom logic onto any source that is passed into them.

    Programmatically, a Gem is a component with the following parts:

    • The Gem UI Component defines the user experience of using the Gem on the visual canvas. This code is rendered on the Prophecy UI.
    • The Gem Code Logic which is how the Gem acts within the context of a Model.

    Gem code can be written using either Python or Scala.

    Steps to follow

    There are three parts to creating a Gem:

    1. Create SQL Query
    2. Customize Interface
    3. Preview

    In the first part, you'll define the SQL query using a new or existing macro. You'll then need to customize the UI and logic of your Gem. Finally, you can preview your Gem.

    1. Create SQL Query

    Prophecy Gems are powered by macros. Therefore, you can either define a new macro or leverage an existing one for your custom Gem.

    Gem builder create SQL query

    Existing dbt macros can help define table-to-table transformations. Consider using them to complete your SQL Query. See the dbt utils source code for macro definitions.

    2. Customize Interface

    Customizing your Gem involves editing the code for specific classes, functions, and methods.

    Gem builder customize interface

    The code starts with a list of imports from the Prophecy codebase to help get you started.


    from dataclasses import dataclass


    from collections import defaultdict
    from prophecy.cb.sql.Component import *
    from prophecy.cb.sql.MacroBuilderBase import *
    from prophecy.cb.ui.uispec import *

    The following sections describe how to make edits to your Gem's interface.

    Parent Class

    Every Gem class needs to extend a parent class from which it inherits the representation of the overall Gem. This includes the UI and the logic.

    You can determine the name and category of your Gem, which are "macro_gem" and "Custom" in this template.


    class macro_gem(MacroSpec):
    name: str = "macro_gem"
    projectName: str = "snowflake_docs"
    category: str = "Custom"

    Properties Classes

    There is one class that contains a list of the properties to be made available to the user for this particular Gem. Think of these as all the values a user fills out within the template of this Gem, or any other UI state that you need to maintain.

    • A collection of input tables, represented as input ports (optional).
    • A configurable set of additional parameters through the dialog (optional).
    caution

    The content of these Properties classes is persisted in JSON and stored in Git.

    These properties can be set in the dialog function by taking input from user-controlled UI elements. The properties are then available for reading in the following functions: validate, onChange, and apply.


    @dataclass(frozen=True)
    class macro_gemProperties(MacroProperties):
    # properties for the component with default values
    parameter1: str = "'default_value_of_parameter1'"

    Additional information on these functions are available in the following sections.

    Dialog (UI)

    The dialog function contains code specific to how the Gem UI should look to the user.

    • Automatically generated based on parameters (default).
    • Custom dialogs using Python or visual configurations.

    def dialog(self) -> Dialog:
    return Dialog("Macro").addElement(
    ColumnsLayout(gap="1rem", height="100%")
    .addColumn(
    Ports(allowInputAddOrDelete=True),
    "content"
    )
    .addColumn(
    StackLayout()
    .addElement(
    TextBox("Table Name")
    .bindPlaceholder("Configure table name")
    .bindProperty("parameter1")
    )
    )
    )

    After defining a Gem in the code editor, you can preview and test it. See Preview. This feature directly renders the interface for the selected Gem using a dummy schema, enabling you to configure and experiment with the Gem’s UI components. You can then finalize them by previewing the generated SQL code.

    Gem builder preview

    There are various UI components that can be defined for custom Gems such as scroll boxes, tabs, and buttons. These UI components can be grouped together in various types of panels to create a custom user experience when using the Gem.

    After the Dialog object is defined, it's serialized as JSON, sent to the UI, and rendered there.

    Depending on what kind of Gem is being created, a Dialog needs to be defined.

    Column selector

    You can use the column selector property if you want to select the columns from UI and then highlight the used columns using the onChange function. The function defines the changes that you want to apply to the Gem properties once changes have been made from the UI. For example, in the reformat component provided by Prophecy, based on the columns used on the expression table onChange highlights the columns used on the input schema.

    It is recommended to try out this dialogue code in Gem builder UI and see how each of these elements looks in UI.

    Validation

    The validate method performs validation checks so that in the case where there's any issue with any inputs provided for the user an Error can be displayed. You can add any validation on your properties.

    • Optional functions such as onChange or validate, which are executed on user actions. They can dynamically alter the state of how the Gem works based on the user input.

    def validate(self, context: SqlContext, component: Component) -> List[Diagnostic]:
    # Validate the component's state
    return super().validate(context,component)

    State Changes

    The onChange method is given for the UI State transformations. You are given both the previous and the new incoming state and can merge or modify the state as needed. The properties of the Gem are also accessible to this function, so functions like selecting columns, etc. are possible to add from here.


    def onChange(self, context: SqlContext, oldState: Component, newState: Component) -> Component:
    # Handle changes in the component's state and return the new state
    return newState

    Apply

    The code for invoking the macro with the Gem logic is defined in the apply function. Here the above User Defined properties are accessible using self.projectName.{self.name}.


    def apply(self, props: macro_gemProperties) -> str:
    # generate the actual macro call given the component's state
    resolved_macro_name = f"{self.projectName}.{self.name}"
    non_empty_param = ",".join([param for param in [props.parameter1] if param != ''])
    return f'{{{{ {resolved_macro_name}({non_empty_param}) }}}}'

    Macro Properties

    When Prophecy parses a macro invocation, it represents a macro definition in a default state. MacroProperties consists of the following:

    • macro name
    • project name
    • parameters used

    For example, if macro invocation is

    dbt_utils.deduplicate(relation, partition_by, order_by)`

    then Prophecy parses it into an object such as the following:

    MacroParameter(value="relation"),
    MacroParameter(value="partition_by"),
    MacroParameter(value="relation")

    This object now has to be converted into the Gem state defined by the user. This logic is defined in loadProperties.


    def loadProperties(self, properties: MacroProperties) -> PropertiesType:
    # load the component's state given default macro property representation
    parametersMap = self.convertToParameterMap(properties.parameters)
    return macro_gem.macro_gemProperties(
    parameter1=parametersMap.get('parameter1')
    )

    def unloadProperties(self, properties: PropertiesType) -> MacroProperties:
    # convert component's state to default macro property representation
    return BasicMacroProperties(
    macroName=self.name,
    projectName=self.projectName,
    parameters=[
    MacroParameter("parameter1", properties.parameter1)
    ],
    )

    Similarly the opposite case where this enhanced UX is not available due to some reason, Prophecy needs to be able to render the default macro UI. For this purpose you must define the logic to convert the Gem properties back to the default macro properties object which Prophecy understands.

    3. Preview

    You can preview the component in the Gem builder to see how it looks. You can modify the properties and then save it to preview the generated code which will eventually run on your cluster.

    Gem builder preview

    Certain Gems may generate SQL code that isn’t compatible with a specific Fabric provider, rendering the Gem unusable and guaranteeing failure if attempted. This issue arises because some dbt macros are designed to support only specific warehouse types.

    note

    Custom Gem logic can be shared with other users within the Team and Organization. Navigate to the Gem listing to review Prophecy-defined and User-defined Gems. When your Gem is ready, publish it so that it is available to use in other Models.

    Example code

    This is an example specification of a Gem for an existing deduplicate macro from dbt utils.

    from dataclasses import dataclass

    from collections import defaultdict
    from prophecy.cb.sql.MacroBuilderBase import *
    from prophecy.cb.ui.uispec import *


    class Deduplicate(MacroSpec):
    name: str = "deduplicate"
    projectName: str = "dbt_utils"
    category: str = "Custom"


    @dataclass(frozen=True)
    class DeduplicateProperties(MacroProperties):
    tableName: str = ''
    partitionBy: str = ''
    orderBy: str = ''

    def dialog(self) -> Dialog:
    return Dialog("Macro") \
    .addElement(
    ColumnsLayout(gap="1rem", height="100%")
    .addColumn(
    Ports(allowInputAddOrDelete=True),
    "content"
    )
    .addColumn(
    StackLayout()
    .addElement(
    TextBox("Table Name")
    .bindPlaceholder("Configure table name")
    .bindProperty("tableName")
    )
    .addElement(
    TextBox("Deduplicate Columns")
    .bindPlaceholder("Select a column to deduplicate on")
    .bindProperty("partitionBy")
    )
    .addElement(
    TextBox("Rows to keep logic")
    .bindPlaceholder("Select row on the basis of ordering a particular column")
    .bindProperty("orderBy")
    )
    )
    )

    def validate(self, context: SqlContext, component: Component) -> List[Diagnostic]:
    diagnostics = []
    macroProjectMap = self.getMacroMap(context)
    projectName = self.projectName if self.projectName != "" else context.projectName
    if projectName not in macroProjectMap:
    diagnostics.append(Diagnostic(
    "properties.projectName",
    f"Project name {self.projectName} doesn't exist. Current Project is ${context.projectName}",
    SeverityLevelEnum.Error
    ))
    else:
    macroDef: Optional[MacroDefFromSqlSource] = self.getMacro(self.name, projectName,
    context)
    if macroDef is None:
    diagnostics.append(Diagnostic(
    "properties.macroName",
    f"Macro {self.name} doesn't exist",
    SeverityLevelEnum.Error
    ))
    else:
    if component.properties.tableName == '':
    diagnostics.append(
    Diagnostic(
    f"properties.tableName",
    f"Please define table name",
    SeverityLevelEnum.Error
    )
    )
    if component.properties.partitionBy == '':
    diagnostics.append(
    Diagnostic(
    f"properties.partitionBy",
    f"Please define partition by column",
    SeverityLevelEnum.Error
    )
    )
    if component.properties.orderBy == '':
    diagnostics.append(
    Diagnostic(
    f"properties.orderBy",
    f"Please define order by by column",
    SeverityLevelEnum.Error
    )
    )
    return diagnostics

    def onChange(self, context: SqlContext, oldState: Component, newState: Component) -> Component:
    return newState

    def apply(self, props: DeduplicateProperties) -> str:
    if self.projectName != "":
    resolved_macro_name = f"{self.projectName}.{self.name}"
    else:
    resolved_macro_name = self.name
    non_empty_param = ",".join([param for param in [props.tableName, props.partitionBy, props.orderBy] if param != ''])
    return f'{{{{ {resolved_macro_name}({non_empty_param}) }}}}'

    def loadProperties(self, properties: MacroProperties) -> PropertiesType:
    parametersMap = self.convertToParameterMap(properties.parameters)
    return Deduplicate.DeduplicateProperties(
    tableName=parametersMap.get('relation'),
    orderBy=parametersMap.get('order_by'),
    partitionBy=parametersMap.get('partition_by')
    )
    - - + + \ No newline at end of file diff --git a/SQL/extensibility/index.html b/SQL/extensibility/index.html index 4c26e0b712..4557ea7418 100644 --- a/SQL/extensibility/index.html +++ b/SQL/extensibility/index.html @@ -6,15 +6,14 @@ Extensibility | Prophecy - - - - + + +
    Skip to main content
    - - + + \ No newline at end of file diff --git a/SQL/fabrics/databricks/index.html b/SQL/fabrics/databricks/index.html index 50e9a69f4d..6655b20c96 100644 --- a/SQL/fabrics/databricks/index.html +++ b/SQL/fabrics/databricks/index.html @@ -6,10 +6,9 @@ Databricks SQL | Prophecy - - - - + + +
    @@ -17,7 +16,7 @@ There are three steps to creating a Fabric:

    1. Basic info
    2. Providers
    3. Connections (optional)

    Basic Info

    Each Fabric requires some Basic information

    DBInfo

    Basic Info
    1 - Title - Specify a title, like devDatabricks, for your Fabric. “dev” or “prod” are helpful descriptors for this environment setup. Also specify a description (optional).
    2 - Team - Select a team to own this Fabric. Click the dropdown to list the teams your user is a member. If you don’t see the desired team, ask a Prophecy Administrator to add you to a team.
    3 - Continue to the Provider step.

    Provider

    The SQL provider is both the storage warehouse and the execution environment where your SQL code will run. Be sure to Start the Databricks Warehouse before trying to setup the Fabric.

    SFProvider

    Provider details
    1 - Provider Type - Select SQL as the Provider type. (Alternatively, create a Spark type Fabric using instructions here or an Airflow type Fabric following these instructions.)
    2 - Provider - Click the dropdown menu for the list of supported Provider types. Select Databricks.
    3 - JDBC URL - Copy the JDBC URL from the Databricks UI as shown. This is the URL that Prophecy will connect for SQL Warehouse data storage and execution.
    4 - Personal Databricks Access Token - This is the token Prophecy will use to connect to Databricks. Each user will need to apply their own token. To generate a Databricks PAT follow these instructions.
    5 - Catalog - (Optional) Enter the Catalog name if you are using Unity Catalog
    6 - Continue to the optional Connections step.

    Prophecy supports Databricks Volumes. When you run a Python or Scala Pipeline via a Job, you must bundle them as whl/jar artifacts. These artifacts must then be made accessible to the Databricks Job in order to use them as a library installed on the cluster. You can designate a path to a Volume for uploading the whl/jar files under Artifacts.

    Connections

    (Optional) Browsing data catalogs, tables, and schemas can be a time-intensive operation especially for Warehouses with hundreds or thousands of tables. To address this challenge, Prophecy offers a Metadata Connection to sync metadata from the data provider at regular intervals. These steps describe how to setup a metadata connection once a Databricks SQL Fabric is created.

    Using Fabrics

    Completed Fabrics will appear on the Metadata page and can be managed by Team admins. FabricMetadata

    Each team member can attach completed Fabrics to their Projects and Models. SFAttachCluster

    Attach a Fabric to a Model
    1 Metadata - Click the Prophecy Metadata and search for a SQL Project or Model of interest. Open the Model.
    2 Model - Here we have opened a Model called "Customers."
    3 Attach Cluster Menu - This dropdown menu lists the Fabrics and execution clusters available to this Project, according to the Team.
    4 Databricks Fabric - The available Fabrics appear here. Only SQL Fabrics are available to attach to SQL Projects.
    5 Attach Cluster - The Databricks Warehouse can be attached to the Model for execution.
    6 Run Model - Once a Fabric and Cluster are attached to the Project, the Model can be run interactively using the play button.
    info

    Remember, each user will be prompted to update the Fabric with their own credentials. Prophecy respects these credentials when accessing Databricks catalogs, tables, databases, etc.

    Once a Project is attached to a Fabric, and the Project is released, the Project can be scheduled to run on a regular frequency using Databricks Jobs

    - - + + \ No newline at end of file diff --git a/SQL/fabrics/index.html b/SQL/fabrics/index.html index 143b0150df..584811ea54 100644 --- a/SQL/fabrics/index.html +++ b/SQL/fabrics/index.html @@ -6,16 +6,15 @@ Fabrics | Prophecy - - - - + + +
    Skip to main content

    Fabrics

    A Fabric is a logical execution environment. Teams can organize their data engineering into multiple environments such as development, staging, and production. SQL Fabrics define the credentials for Prophecy to connect to the SQL Warehouse or Lakehouse. Importantly, each user must update the relevant Fabric with their own credentials. Once a SQL Fabric is created, Prophecy can access data, execute data models and generate target tables.

    Prophecy supports Databricks and Snowflake SQL Warehouses. Providers

    1. Snowflake SQL Fabrics - Provide Prophecy access to use the Snowflake warehouse data storage and compute engine capabilities.

    2. Databricks SQL Fabrics - Provide Prophecy access to use the Databricks' Warehouse storage and compute engine capabilities.

    Job Scheduling

    In addition to secure, ad-hoc model runs on Snowflake and Databricks, Job scheduling is done with a visual, easy-to-use interface.

    1. Snowflake users schedule Jobs with Airflow. Prophecy's Airflow Fabric facilitates a Connection to the Snowflake Fabric.

    2. Databricks users schedule Jobs with Databricks Jobs in Prophecy.

    Once a Snowflake or Databricks SQL Fabric is setup, Prophecy's interface makes it easy to run Models on a daily, weekly, or monthly basis using Airflow or Databricks Jobs.

    What's next

    To continue creating Fabrics, see the following pages:

    - - + + \ No newline at end of file diff --git a/SQL/fabrics/snowflake/index.html b/SQL/fabrics/snowflake/index.html index d5616e3a50..8a2b79cf7f 100644 --- a/SQL/fabrics/snowflake/index.html +++ b/SQL/fabrics/snowflake/index.html @@ -6,10 +6,9 @@ Snowflake SQL | Prophecy - - - - + + +
    @@ -19,7 +18,7 @@ SFProvider

    Provider details
    1 Provider Type - Select SQL as the Provider type. (Alternatively, create a Spark type Fabric using instructions here or an Airflow type Fabric following these instructions.)
    2 Provider - Click the dropdown menu for the list of supported Provider types. Select Snowflake.
    3 URL - Add the Snowflake Account URL, which looks like this: https://<org>-<account>.snowflakecomputing.com
    4 Username - Add the username that Prophecy will use to connect to the Snowflake Warehouse.
    5 Password - Add the password that Prophecy will use to connect to the Snowflake Warehouse. These username/password credentials are encrypted for secure storage. Also, each Prophecy user will provide their own username/password credential upon login. Be sure these credentials are scoped appropriately; Prophecy respects the authorization granted to this Snowflake user.
    6 Role - Add the Snowflake role that Prophecy will use to read data and execute queries on the Snowflake Warehouse. The role must be already granted to the username/password provided above and should be scoped according to the permission set desired for Prophecy.
    7 Warehouse - Specify the Snowflake warehouse for default writes for this execution environment.
    8 Database and Schema - Specify the desired Snowflake database (and below, the schema) for default writes for this execution environment.
    9 Continue to complete the Fabric creation.
    info

    Each user can read tables from each database and schema for which they have access. The default write database and schema is set here in the Fabric.

    Completed Fabrics will appear on the Metadata page and can be managed by Team admins. FabricMetadata

    Each team member can attach completed Fabrics to their Projects and Models. SFAttachCluster

    Attach a Fabric to a Model
    1 Metadata - Click the Prophecy Metadata and search for a Project or Model of interest. Open the Model.
    2 Model - Here we have opened a SQL model called "MyModel"
    3 Attach Cluster - This dropdown menu lists the Fabrics and execution clusters available to this Project, according to the Team.
    4 Snowflake Fabric - The avaialable Fabrics appear here.
    5 Attach Cluster - The Snowflake Warehouse can be attached to the Model for execution.
    6 Run Model - Once a Fabric and Cluster are attached to the Project, the Model can be run interactively using the play button.
    info

    Remember, each user will be prompted to update the Fabric with their own credentials. Prophecy respects these credentials when reading Snowflake tables, databases, etc. User writes default to the database and schema defined in the Fabric.

    Schedule Jobs

    SQL models can be scheduled using Airflow to run on Snowflake. Create an Airflow Fabric, and setup a Snowflake Connection that references the Snowflake Fabric created above. Prophecy supports Snowflake Connections from these three Airflow flavors: Composer, MWAA, and Prophecy Managed Airflow.

    - - + + \ No newline at end of file diff --git a/SQL/gems/custom/index.html b/SQL/gems/custom/index.html index 77fde677cc..9e32c21e4b 100644 --- a/SQL/gems/custom/index.html +++ b/SQL/gems/custom/index.html @@ -6,15 +6,14 @@ Custom | Prophecy - - - - + + +
    -
    Skip to main content

    Custom

    SQL Gem

    caution

    This page about Custom SQL Gems is under construction. Please pardon our dust.

    Prophecy allows you to define new functions and gems by leveraging dbt macros as the underlying format. Both functions and gems can be easily defined visually and in code.

    - - +
    Skip to main content

    Custom

    SQL Gem

    caution

    This page about Custom SQL Gems is under construction. Please pardon our dust.

    Prophecy allows you to define new functions and gems by leveraging dbt macros as the underlying format. Both functions and gems can be easily defined visually and in code.

    + + \ No newline at end of file diff --git a/SQL/gems/data-joins/index.html b/SQL/gems/data-joins/index.html index 6a8bea2973..ec15030691 100644 --- a/SQL/gems/data-joins/index.html +++ b/SQL/gems/data-joins/index.html @@ -6,15 +6,14 @@ Join | Prophecy - - - - + + +
    -
    Skip to main content

    Join

    SQL Gem

    Upon opening the Join Gem, you can see a pop-up which provides several helpful features.

    Join definition

    For transparency, you can always see the (1) Input schema on the left hand-side, (2) Errors in the footer, and have the ability to (3) Run the Gem on the top right.

    To fill-in our (5) Join condition within the (4) Conditions section, start typing the input table name and key. For example, if we have two input tables, nation and customer, type nation.nationkey = customers.nationkey. This condition finds a nation based on the nationkey feild for every single customer.

    When you’re writing your join conditions, you’ll see available functions and columns to speed up your development. When the autocomplete appears, press ↑, ↓ to navigate between the suggestions and press tab to accept the suggestion.

    Select the (6)Join Type according to the provider, e.g. Databricks or Snowflake.

    The (7) Expressions tab allows you to define the set of output columns that are going to be returned from the Gem. Here we leave it empty, which by default passes through all the input columns, from both of the joined sources, without any modifications.

    To rename our Gem to describe its functionality, click on it’s (8) Name or try the Auto-label option. Gem names are going to be used as query names, which means that they should be concise and composed of alphanumeric characters with no spaces.

    Once done, press (9) Save.

    info

    To learn more about the Join Gem UI, see this page which illustrates features common to all Gems.

    Add a port

    It's easy to add an extra source to a Join Gem. Just connect and configure.

    JoinPort

    Once the source is (1) connected, click to (2) edit the ports.

    Update the (3) port name from the default input in2 to a more descriptive name such as the table name, in this case NATIONS.

    Fill in the (4) Join condition for the new table and specify the (5) Join type.

    Click (6) Save.

    Run

    When your Join Gem has the desired inputs, conditions and expressions, (7) run interactively to view (8)sample data.

    - - +
    Skip to main content

    Join

    SQL Gem

    Upon opening the Join Gem, you can see a pop-up which provides several helpful features.

    Join definition

    For transparency, you can always see the (1) Input schema on the left hand-side, (2) Errors in the footer, and have the ability to (3) Run the Gem on the top right.

    To fill-in our (5) Join condition within the (4) Conditions section, start typing the input table name and key. For example, if we have two input tables, nation and customer, type nation.nationkey = customers.nationkey. This condition finds a nation based on the nationkey feild for every single customer.

    When you’re writing your join conditions, you’ll see available functions and columns to speed up your development. When the autocomplete appears, press ↑, ↓ to navigate between the suggestions and press tab to accept the suggestion.

    Select the (6)Join Type according to the provider, e.g. Databricks or Snowflake.

    The (7) Expressions tab allows you to define the set of output columns that are going to be returned from the Gem. Here we leave it empty, which by default passes through all the input columns, from both of the joined sources, without any modifications.

    To rename our Gem to describe its functionality, click on it’s (8) Name or try the Auto-label option. Gem names are going to be used as query names, which means that they should be concise and composed of alphanumeric characters with no spaces.

    Once done, press (9) Save.

    info

    To learn more about the Join Gem UI, see this page which illustrates features common to all Gems.

    Add a port

    It's easy to add an extra source to a Join Gem. Just connect and configure.

    JoinPort

    Once the source is (1) connected, click to (2) edit the ports.

    Update the (3) port name from the default input in2 to a more descriptive name such as the table name, in this case NATIONS.

    Fill in the (4) Join condition for the new table and specify the (5) Join type.

    Click (6) Save.

    Run

    When your Join Gem has the desired inputs, conditions and expressions, (7) run interactively to view (8)sample data.

    + + \ No newline at end of file diff --git a/SQL/gems/datasources/index.html b/SQL/gems/datasources/index.html index bcc9d8b411..4e28dba689 100644 --- a/SQL/gems/datasources/index.html +++ b/SQL/gems/datasources/index.html @@ -6,15 +6,14 @@ Data Sources | Prophecy - - - - + + +
    Skip to main content

    Data Sources

    Loading data into Prophecy is done via the Seed, Source, or Model Gems - all based on dbt concepts.

    Seed

    A Seed is an excellent way to load small CSV files into Prophecy. This is super useful for small test datasets or lookup mappings, like a list of cities or countries. Seeds are saved as .sql files on Git when Projects are committed and released. Follow the steps below to Create a Seed and write to the SQL Warehouse defined in the Fabric.

    Seed1

    Seed Creation
    1 Add Seed - Inside the Model canvas, from the Project tab, select + Add Seed and provide a name. Here the Seed was named ORDERDETAILS.
    2 Toggle to Code - The code view displays a text editor.
    3 Paste data - Paste content in CSV format with header and separated by commas.
    4 Seed file - The Seed file is now listed in the files that will be committed to Git when the Project is committed and released.

    Seed2

    Seed - use in a Model and write to the Warehouse
    5 Open Model - Click to open the desired Model where you'd like to use the Seed.
    6 Toggle to Visual - From this visual view we can see the Model canvas.
    7 Add Seed to Model - Add the Seed as a datasource for this Model.
    8 Seed appears in the Model Canvas - Now the content of the Seed is part of the Model and can be transformed, joined, etc.
    9 Interactive Run - Click the Play button to interactively run the Model, including the Seed datasource.
    10 Click the Environment tab - Click to browse the SQL Warehouse.
    11 Refetch Sources - Crawls the SQL Warehouse to list tables in each of the databases and schemas accessible to your user. Seeds are by default materialized as Tables.
    12 Seed appears as a Table in the Warehouse - Once Prophecy has refetched the sources, Prophecy lists the Seed in the Project Browser. So the Seed exists as both a Table in the Warehouse and is also saved as a versioned CSV file on Git when the Project is committed and released.

    Source

    Each Source points to a table in the SQL Warehouse(s) specified in the Fabric. Prophecy does not store the contents of the Source Table. Inside a Project, the Env tab allows for browsing the database and schema. Tables in the Env tab can be drag-n-dropped to the canvas without manually specifying any metadata. You can also upload a source table directly to your environment. For more information about how to upload a file, see Upload files. Follow the steps below to create and use a Source.

    Source1

    Source creation and usage
    1 Cluster and Fabric - The Cluster and Fabric define the SQL Warehouse execution environment. Attach the desired Fabric which contains the Table of interest.
    2 Click the Environment tab - Browse the SQL Warehouse specified in the selected Fabric.
    3 Add a table - Prophecy respects the user's permissions. Any table for which the user has read permission can be added to the Model canvas.
    4 Source is added to the Model Canvas - The Source named ORDERSHIPMENTS is now part of the Model, and is ready for transformation, joins, etc.
    5 Click the Project tab - Lists all the Sources that are now accessible to any Model in the Project.

    Model

    A Model is similar to a Pipeline, as both contain data transformation steps. Unlike a Pipeline, however, each Model defines a single materialized view or table.

    Models are represented as visual format or as a single .sql file in code format. Because Models define a single materialized view or table, Models can serve as inputs to other models.

    Model1

    Model Creation
    1 Open existing Model - Within a Project, open an existing Model, e.g. MyNextModel.
    2 Add Model - Click to add a new Model to the existing Model.
    3 Model Name and File Path - Provide a name for the new Model, e.g. MyModel. Define the desired storage path in Git to store the new Model as a .sql file.

    Now the new Model has been created and the canvas is displayed. Optionally, click Config. Prophecy makes it easy to decide whether a model should be materialized as a view, table, ephemeral, incremental, or dynamic table. For more information on how to configure a table's materiliazation, read this interactive development blog post.

    Model2

    Model as a Datasource
    4 Add a Source - From the Environment tab, select a source for the new Model.
    5 Configure - Configure the new source, do any transformation steps or simply connect to the new Model Gem.
    6 Interactive Run - Click the "Play" button to execute the new Model and create a materiaized view of the new Model.
    7 Project Tab - Click the Project tab and notice MyModel is now available in the Project Browser.
    8 Existing Model - Open the existing Model of interest. In this example, MyNextModel is opened.
    9 Add Model as a Source - Add the new Model (e.g. MyModel) as a Source.

    Model3

    Model as a Datasource (cont'd)
    10 Model as a Source - The new Model (MyModel) appears on the canvas of the existing Model (MyNextModel). Connect the new Model as an input to the Join Gem or any desired transformation Gem. The new Model acts as a Datasource.
    11 Toggle to code - The SQL code view for the open Model, MyNextModel, is shown. Each code fragment represents one Gem in MyNextModel.

    As a result, MyNextModel contains three sources: a Seed (ORDERDETAILS), a Source (ORDERSHIPMENTS), and a Model (MyModel). These three sources are joined together and materialized as a table or view in the database.schema defined in the Fabric.

    MyNextModel can be viewed visually (left) or as a SQL file (right). The Project browser (left) lists the Sources, Seeds, and Models available to drag-n-drop into the selected Model's canvas.

    note

    You can't use statements, such as CALL and EXECUTE IMMEDIATE, directly inside of Models. Instead of trying to use these statements in SQL statements or macros, you must use them in pre-hooks.

    For more details on dbt concepts including Sources, Seeds, and Models, please explore the dbt documentation or checkout Prophecy's SQL with Databricks getting-started guide.

    - - + + \ No newline at end of file diff --git a/SQL/gems/datasources/upload-files/index.html b/SQL/gems/datasources/upload-files/index.html index 2d51b57a27..ec84075e0d 100644 --- a/SQL/gems/datasources/upload-files/index.html +++ b/SQL/gems/datasources/upload-files/index.html @@ -6,15 +6,14 @@ Upload files | Prophecy - - - - + + +
    Skip to main content

    Upload files

    You can add a source table to your SQL Environment by uploading a file directly onto the Visual canvas. This effectively uploads your file as a table. As a business user, this gives you more control over your data and how you choose to incorporate them into your Model transformation.

    The file can be one of the following file types:

    • CSV - The default format is comma-separated. You have the option to change this during upload.
    • Excel - The upload supports both XLS and XLSX.
    • JSON - The upload supports single JSON per line with a consistent set of keys.
    • Parquet - The upload supports single file upload.

    When you upload your file through Prophecy, it's added directly to your Databricks or Snowflake SQL warehouse as a table. This eliminates the need to use other tools to upload your files. Once you've uploaded your file, you can join the data in the file's table with existing big data tables that already exist in your SQL warehouse.

    Upload a file

    To upload your file, start by opening a Model within a project and following one of these steps:

    note

    The recommended maximum file size is 100 MB.

    • Under the Environment Browser, click Upload File, and then select your file.

      Upload file from your folders

    • Drag and drop your file onto the Visual canvas.

      Upload file by dragging and dropping

    Select the file type

    Both Databricks and Snowflake SQL warehouses support CSV, Excel, JSON, and Parquet file types.

    Select the file type:

    • Select your file type and format, and then click Next.

      Select your file type and format

    You can also replace your uploaded file or delete it.

    Select the table location

    You can select the database and schema for your table. For the table location, you can either create a new table or select a table you want to write your uploaded file to.

    Warning

    Selecting a table to write your uploaded file to deletes and re-creates the table.

    Select the table location:

    • Select the following table locations in order using the dropdown menus:

      • Database
      • Schema
      • Table
        • Choose a table to overwrite or click Create New to create a new table.

      Select the table location

      The dropdown menu choices are taken from the underlying warehouse, which already has these databases, schemas, and tables. You cannot create a new database or schema.

    Configure the table properties

    You can configure the table properties before completing the file upload.

    To configure the table properties, follow these steps:

    1. Review the file's Options. Depending on the file type and format, common defaults are already chosen for you.

    Configure the table properties

    1. Optional: Modify the Options. For example, you can update the schema or change the header row by selecting First row is header.

    2. Optional: If you made any changes to the Options, click Infer Schema. Infer schema runs automatically with the default options when you first get to the Properties step.

    Preview the table

    The preview shows your table data and gives you the option to download it.

    Preview the table:

    • Check that your preview looks correct, and then click Done.

      Preview the table

      If you selected a table to write your uploaded file to, you'll need to confirm the upload in the pop-up window by clicking Proceed.

    The uploaded file's table is now available in the Source/Target node. The table is created in your environment and a source is created within your Model. You can upload another file or start working with your new source Gem.

    - - + + \ No newline at end of file diff --git a/SQL/gems/index.html b/SQL/gems/index.html index d58c9b4595..e62362fba3 100644 --- a/SQL/gems/index.html +++ b/SQL/gems/index.html @@ -6,15 +6,14 @@ SQL Gems | Prophecy - - - - + + +
    Skip to main content

    SQL Gems

    In Prophecy and dbt, data models are groups of SQL statements used to create a single table or view. Prophecy simplifies data modeling by visualizing the data model as a series of steps, each represented by a Gem. Gems are functional units that perform tasks such as reading, transforming, writing, or handling other data operations.

    Each Gem corresponds to a SQL statement, which users can construct through an intuitive visual interface. Prophecy handles the underlying complexity by deciding whether each Gem should generate a CTE or a subquery. Users simply configure the Gem's interface, and Prophecy integrates the resulting SQL into the larger data model.

    The table below outlines the different SQL Gem categories.

    Gem
    CategoryDescription
    ModelModelEach model is a DAG that defines a single table or view. A model can also serve as an input for other Models.
    SourceDatasourceGems related to loading data: Seeds, Sources, or Models can be used as datasources.
    TransformTransformGems related to the transformation of data.
    Join and SplitJoinGems related to splitting or joining tables together.
    CustomCustomThe set of Gems built to extend Prophecy's capabilities.
    - - + + \ No newline at end of file diff --git a/SQL/gems/subgraph/index.html b/SQL/gems/subgraph/index.html index 184d33f8b2..a56277b82e 100644 --- a/SQL/gems/subgraph/index.html +++ b/SQL/gems/subgraph/index.html @@ -6,15 +6,14 @@ Subgraph | Prophecy - - - - + + +
    -
    Skip to main content

    Subgraph

    SQL Gem

    Subgraph Gems let you take multiple different Gems and wrap them under a single reusable parent Gem. In other words, they allow you to decompose complex logic into reusable components and simplify the visual view of your data model.

    Basic Subgraph

    Basic Subgraphs are single-use containers that capture one or more Gems within a model. They are the equivalent of a nested CTE.

    If you want to create a complex model with large sets of Transform and Join Gems, you can use a Basic Subgraph to group them together. This organizational approach enhances the visual clarity of your model by grouping various sections together under a common parent Gem. Additionally, it empowers you to break down intricate logic into modular components, thereby streamlining your data transformation processes.

    Create a Basic Subgraph

    You can create a Basic Subgraph the same way you create other Gems.

    To create a Basic Subgraph, follow these steps:

    1. Drag and drop the Subgraph Gem from the Subgraph menu, and connect it to any previously created Gem on your canvas.

    create_basic_subgraph

    1. Once you've added the Gem, click on it to open the subgraph canvas.

    2. On the subgraph canvas, add Gems to your Basic Subgraph by dragging and dropping from the Gems menu. You can even add a subgraph within the subgraph to create a nested subgraph.

    Run a Basic Subgraph

    A Basic Subgraph is functionally equivalent to the sequence of Gems that it contains. You can run a Basic Subgraph to see the output.

    To run a Basic Subgraph, follow this step:

    • On the Basic Subgraph Gem, click on the play button,

    run_basic_subgraph

    Add/Remove Port

    Gems and subgraphs are operations or transformations that takes one or more tables as inputs. Therefore, Input ports signify the number of tables that a Basic Subgraph is taking in as inputs. There is no limit to the number of Input ports you can add.

    While using a Subgraph, you can configure the number of Input ports as per the requirements. However, as with all SQL Gems, there can only be one Output port.

    To add an Input port, follow these steps:

    1. On the subgraph canvas, click on the + button to add a new port.
    2. Optional: You can click the Delete icon next to the input port you want to remove.

    add_remove_port

    Code view

    Normally from the Code view, we create one Gem per CTE. However, since subgraphs are represented as nested CTEs in code, one subgraph can represent multiple nested SQL statements.

    subgraph_code_view

    If you'd like, you can create a subgraph from the Code view by writing multiple nested statements. Then toggle back to the Visual view to see an auto-generated Subgraph Gem based on your defined transformations.

    Subgraph Configurations

    You can configure your subgraphs by using either:

    • Model-level configurations
    • Project-level configurations
    - - +
    Skip to main content

    Subgraph

    SQL Gem

    Subgraph Gems let you take multiple different Gems and wrap them under a single reusable parent Gem. In other words, they allow you to decompose complex logic into reusable components and simplify the visual view of your data model.

    Basic Subgraph

    Basic Subgraphs are single-use containers that capture one or more Gems within a model. They are the equivalent of a nested CTE.

    If you want to create a complex model with large sets of Transform and Join Gems, you can use a Basic Subgraph to group them together. This organizational approach enhances the visual clarity of your model by grouping various sections together under a common parent Gem. Additionally, it empowers you to break down intricate logic into modular components, thereby streamlining your data transformation processes.

    Create a Basic Subgraph

    You can create a Basic Subgraph the same way you create other Gems.

    To create a Basic Subgraph, follow these steps:

    1. Drag and drop the Subgraph Gem from the Subgraph menu, and connect it to any previously created Gem on your canvas.

    create_basic_subgraph

    1. Once you've added the Gem, click on it to open the subgraph canvas.

    2. On the subgraph canvas, add Gems to your Basic Subgraph by dragging and dropping from the Gems menu. You can even add a subgraph within the subgraph to create a nested subgraph.

    Run a Basic Subgraph

    A Basic Subgraph is functionally equivalent to the sequence of Gems that it contains. You can run a Basic Subgraph to see the output.

    To run a Basic Subgraph, follow this step:

    • On the Basic Subgraph Gem, click on the play button,

    run_basic_subgraph

    Add/Remove Port

    Gems and subgraphs are operations or transformations that takes one or more tables as inputs. Therefore, Input ports signify the number of tables that a Basic Subgraph is taking in as inputs. There is no limit to the number of Input ports you can add.

    While using a Subgraph, you can configure the number of Input ports as per the requirements. However, as with all SQL Gems, there can only be one Output port.

    To add an Input port, follow these steps:

    1. On the subgraph canvas, click on the + button to add a new port.
    2. Optional: You can click the Delete icon next to the input port you want to remove.

    add_remove_port

    Code view

    Normally from the Code view, we create one Gem per CTE. However, since subgraphs are represented as nested CTEs in code, one subgraph can represent multiple nested SQL statements.

    subgraph_code_view

    If you'd like, you can create a subgraph from the Code view by writing multiple nested statements. Then toggle back to the Visual view to see an auto-generated Subgraph Gem based on your defined transformations.

    Subgraph Configurations

    You can configure your subgraphs by using either:

    • Model-level configurations
    • Project-level configurations
    + + \ No newline at end of file diff --git a/SQL/gems/transform/deduplicate/index.html b/SQL/gems/transform/deduplicate/index.html index f6db7bf449..97e23c3bc8 100644 --- a/SQL/gems/transform/deduplicate/index.html +++ b/SQL/gems/transform/deduplicate/index.html @@ -6,15 +6,14 @@ Deduplicate | Prophecy - - - - + + +
    -
    Skip to main content

    Deduplicate

    SQL Gem

    Removes rows with duplicate values of specified columns.

    Parameters

    ParameterDescriptionRequired
    SourceInput sourceTrue
    Row to keep- Distinct Rows: Keeps all distinct rows. This is equivalent to performing a select distinct operation
    - Unique Only: Keeps rows that don't have duplicates
    - First: Keeps first occurrence of the duplicate row
    - Last: Keeps last occurrence of the duplicate row
    Default is Distinct Rows
    True
    Deduplicate On ColumnsColumns to consider while removing duplicate rows (not required for Distinct Rows)True

    Row to keep options

    As mentioned in the previous parameters, there are four Row to keep options that you can use in your deduplicate Gem.

    Deduplicate row to keep

    In the Code view, you can see that the Deduplicate Gem contains SELECT DISTINCT * when using the Distinct Rows option.

    Deduplicate code view

    Example

    Suppose you're deduplicating the following table.

    First_NameLast_NameTypeContact
    JohnDoephone123-456-7890
    JohnDoephone123-456-7890
    JohnDoephone123-456-7890
    AliceJohnsonphone246-135-0987
    AliceJohnsonphone246-135-0987
    AliceJohnsonemailalice@johnson.com
    AliceJohnsonemailalice@johnson.com
    BobSmithemailbob@smith.com

    For Distinct Rows, the interim data will show the following:

    First_NameLast_NameTypeContact
    JohnDoephone123-456-7890
    AliceJohnsonphone246-135-0987
    AliceJohnsonemailalice@johnson.com
    BobSmithemailbob@smith.com

    The First and Last options work similarly to Distinct Rows, but they keep the first and last occurrence of the duplicate rows respectively.

    For Unique Only, the interim data will look like the following:

    First_NameLast_NameTypeContact
    BobSmithemailbob@smith.com

    You'll be left with only one unique row since the rest were all duplicates.


    You can add First_Name and Last_Name to Deduplicate On Columns if you want to further deduplicate the table.

    For Distinct Rows, the interim data will show the following:

    First_NameLast_Name
    JohnDoe
    AliceJohnson
    BobSmith
    note

    For First, Last, and Unique Only, the interim data will contain all columns, irrespective of the columns that were added.

    For First and Last, the interim data will look like the following:

    First_NameLast_NameTypeContact
    JohnDoephone123-456-7890
    AliceJohnsonphone246-135-0987
    AliceJohnsonemailalice@johnson.com
    BobSmithemailbob@smith.com

    For Unique Only, the interim data will look like the following:

    First_NameLast_NameTypeContact
    BobSmithemailbob@smith.com
    - - +
    Skip to main content

    Deduplicate

    SQL Gem

    Removes rows with duplicate values of specified columns.

    Parameters

    ParameterDescriptionRequired
    SourceInput sourceTrue
    Row to keep- Distinct Rows: Keeps all distinct rows. This is equivalent to performing a select distinct operation
    - Unique Only: Keeps rows that don't have duplicates
    - First: Keeps first occurrence of the duplicate row
    - Last: Keeps last occurrence of the duplicate row
    Default is Distinct Rows
    True
    Deduplicate On ColumnsColumns to consider while removing duplicate rows (not required for Distinct Rows)True

    Row to keep options

    As mentioned in the previous parameters, there are four Row to keep options that you can use in your deduplicate Gem.

    Deduplicate row to keep

    In the Code view, you can see that the Deduplicate Gem contains SELECT DISTINCT * when using the Distinct Rows option.

    Deduplicate code view

    Example

    Suppose you're deduplicating the following table.

    First_NameLast_NameTypeContact
    JohnDoephone123-456-7890
    JohnDoephone123-456-7890
    JohnDoephone123-456-7890
    AliceJohnsonphone246-135-0987
    AliceJohnsonphone246-135-0987
    AliceJohnsonemailalice@johnson.com
    AliceJohnsonemailalice@johnson.com
    BobSmithemailbob@smith.com

    For Distinct Rows, the interim data will show the following:

    First_NameLast_NameTypeContact
    JohnDoephone123-456-7890
    AliceJohnsonphone246-135-0987
    AliceJohnsonemailalice@johnson.com
    BobSmithemailbob@smith.com

    The First and Last options work similarly to Distinct Rows, but they keep the first and last occurrence of the duplicate rows respectively.

    For Unique Only, the interim data will look like the following:

    First_NameLast_NameTypeContact
    BobSmithemailbob@smith.com

    You'll be left with only one unique row since the rest were all duplicates.


    You can add First_Name and Last_Name to Deduplicate On Columns if you want to further deduplicate the table.

    For Distinct Rows, the interim data will show the following:

    First_NameLast_Name
    JohnDoe
    AliceJohnson
    BobSmith
    note

    For First, Last, and Unique Only, the interim data will contain all columns, irrespective of the columns that were added.

    For First and Last, the interim data will look like the following:

    First_NameLast_NameTypeContact
    JohnDoephone123-456-7890
    AliceJohnsonphone246-135-0987
    AliceJohnsonemailalice@johnson.com
    BobSmithemailbob@smith.com

    For Unique Only, the interim data will look like the following:

    First_NameLast_NameTypeContact
    BobSmithemailbob@smith.com
    + + \ No newline at end of file diff --git a/SQL/gems/transform/flattenschema/index.html b/SQL/gems/transform/flattenschema/index.html index e5685ca767..e1589d7dfe 100644 --- a/SQL/gems/transform/flattenschema/index.html +++ b/SQL/gems/transform/flattenschema/index.html @@ -6,15 +6,14 @@ Flatten Schema | Prophecy - - - - + + +
    -
    Skip to main content

    Flatten Schema

    SQL Gem

    When processing raw data it can be useful to flatten complex data types like Structs and Arrays into simpler, flatter schemas. This allows you to preserve all schemas, and not just the first one. You can use FlattenSchema with Snowflake Models.

    The FlattenSchema gem

    The Input

    FlattenSchema works on Snowflake sources that have nested columns that you'd like to extract into a flat schema.

    For example, with an input schema like so:

    Input schema

    And the data looks like so:

    Input data

    We want to extract the contact, and all of the columns from the structs in content into a flattened schema.

    The Expressions

    Having added a FlattenSchema Gem to your Model, all you need to do is click the column names you wish to extract and they'll be added to the Expressions section.

    tip

    You can click to add all columns, which would make all nested leaf level values of an object visible as columns.

    Once added you can change the Output Column for a given row to change the name of the Column in the output.

    Adding expressions

    The Output

    If we check the Output tab in the Gem, you'll see the schema that we've created using the selected columns.

    And here's what the output data looks like:

    Output interim

    The nested contact information has been flatten so that you have individual rows for each content type.

    Advanced settings

    If you're familiar with Snowflake's FLATTEN table function, you can use the advanced settings to customize the optional column arguments.

    To use the advanced settings, hover over a column, and click the dropdown arrow.

    Advanced settings

    You can customize the following options:

    • Path to the element: The path to the element within the variant data structure that you want to flatten.
    • Flatten all elements recursively: If set to false, only the element mentioned in the path is expanded. If set to true, all sub-elements are expanded recursively. This is set to false by default.
    • Preserve rows with missing fields: If set to false, rows with missing fields are omitted from the output. If set to true, rows with missing fields are generated with null in the key, index, and value columns. This is set to false by default.
    • Datatype that needs to be flattened: The data type that you want to flatten. You can choose Object, Array, or Both. This is set to Both by default.
    - - +
    Skip to main content

    Flatten Schema

    SQL Gem

    When processing raw data it can be useful to flatten complex data types like Structs and Arrays into simpler, flatter schemas. This allows you to preserve all schemas, and not just the first one. You can use FlattenSchema with Snowflake Models.

    The FlattenSchema gem

    The Input

    FlattenSchema works on Snowflake sources that have nested columns that you'd like to extract into a flat schema.

    For example, with an input schema like so:

    Input schema

    And the data looks like so:

    Input data

    We want to extract the contact, and all of the columns from the structs in content into a flattened schema.

    The Expressions

    Having added a FlattenSchema Gem to your Model, all you need to do is click the column names you wish to extract and they'll be added to the Expressions section.

    tip

    You can click to add all columns, which would make all nested leaf level values of an object visible as columns.

    Once added you can change the Output Column for a given row to change the name of the Column in the output.

    Adding expressions

    The Output

    If we check the Output tab in the Gem, you'll see the schema that we've created using the selected columns.

    And here's what the output data looks like:

    Output interim

    The nested contact information has been flatten so that you have individual rows for each content type.

    Advanced settings

    If you're familiar with Snowflake's FLATTEN table function, you can use the advanced settings to customize the optional column arguments.

    To use the advanced settings, hover over a column, and click the dropdown arrow.

    Advanced settings

    You can customize the following options:

    • Path to the element: The path to the element within the variant data structure that you want to flatten.
    • Flatten all elements recursively: If set to false, only the element mentioned in the path is expanded. If set to true, all sub-elements are expanded recursively. This is set to false by default.
    • Preserve rows with missing fields: If set to false, rows with missing fields are omitted from the output. If set to true, rows with missing fields are generated with null in the key, index, and value columns. This is set to false by default.
    • Datatype that needs to be flattened: The data type that you want to flatten. You can choose Object, Array, or Both. This is set to Both by default.
    + + \ No newline at end of file diff --git a/SQL/gems/transform/index.html b/SQL/gems/transform/index.html index b1124fa97d..4e31b12606 100644 --- a/SQL/gems/transform/index.html +++ b/SQL/gems/transform/index.html @@ -6,15 +6,14 @@ Transform | Prophecy - - - - + + +
    Skip to main content

    Transform

    Constitutes the set of Gems that help with transforming data.

    Transforms
    Description
    AggregateGroup data and apply aggregation functions such as avg or sum.
    DeduplicateRemoves rows with duplicate values of specified columns.
    FilterSelect rows of data that meet a condition.
    FlattenSchemaFlattenSchema works on Snowflake sources that have nested columns that you'd like to extract into a flat schema.
    LimitLimit the number of rows in a table or view.
    OrderByArrange the rows in a table or view in alphanumeric order based on the values of the specified data columns.
    ReformatSelect columns, rename columns, or redefine existing columns from a table or view.
    info

    To learn more about the UI for Transformation Gems, see this page which illustrates features common to all Gems, or the Aggregate Gem page, which provides a thorough description for using SQL Gems.

    Run

    When your Transformation Gem has the desired condition and expression, run interactively to view sample data.

    - - + + \ No newline at end of file diff --git a/SQL/gems/transform/sql-aggregate/index.html b/SQL/gems/transform/sql-aggregate/index.html index 032c6baa9e..dc4b846dad 100644 --- a/SQL/gems/transform/sql-aggregate/index.html +++ b/SQL/gems/transform/sql-aggregate/index.html @@ -6,16 +6,15 @@ Aggregate | Prophecy - - - - + + +
    -
    Skip to main content

    Aggregate

    SQL Gem

    Together let's deconstruct a commonly used Transformation, the Aggregate Gem. Follow along in the HelloWorld_SQL Project.

    Using the Gem

    1

    1. Open the HelloWorld_SQL Project.
    2. From the list of Models, select the Orders Model. A Model is a series of transformation steps (Gems) that describe how to create a single table or view. The Orders Model defines the steps to create the Orders table.
    3. Open the Transformation dropdown to see the available Transformation Gems. The Aggregate Gem has already been dragged to the canvas and configured in this HelloWorld_SQL example.
    4. Click the arrow to Run upto the Aggregate Gem.
    5. Preview a data sample before the Aggregate Gem.
    6. This is the payments data sample before the Aggregate Gem. There is one row per payment.
    7. Click to Run upto the Join Gem.
    8. Preview a data sample after the Aggregate Gem.
    9. This is the order_payments data sample after the Aggregate Gem. The individual rows of payments have been grouped according to ORDER_ID, and the amounts have been summed according to the payment type.
    10. Let's see how to use expressions. Click to Open.

    Using Expressions

    GroupBy expression

    2

    1. There is one Input Dataset, payments, and we can see the columns and datatypes below.
    2. Open the GroupBy tab.
    3. We can see the Gem is configured to group according to the order_id column. Just click any column name listed in (1) Input to add a column to the GroupBy expressions.
    4. Syntax errors are surfaced here as you're designing your Gem (and Model) on the canvas. That's handy so you don't have to run a Job to discover a typo.
    5. The Run button is available here to test and view data samples. This way you can make sure your Aggregate Gem is configured as desired.

    Aggregate expressions

    Next we'll walk through the Aggregate tab, where we have a lot more bells and knobs to turn. +

    Aggregate

    SQL Gem

    Together let's deconstruct a commonly used Transformation, the Aggregate Gem. Follow along in the HelloWorld_SQL Project.

    Using the Gem

    1

    1. Open the HelloWorld_SQL Project.
    2. From the list of Models, select the Orders Model. A Model is a series of transformation steps (Gems) that describe how to create a single table or view. The Orders Model defines the steps to create the Orders table.
    3. Open the Transformation dropdown to see the available Transformation Gems. The Aggregate Gem has already been dragged to the canvas and configured in this HelloWorld_SQL example.
    4. Click the arrow to Run upto the Aggregate Gem.
    5. Preview a data sample before the Aggregate Gem.
    6. This is the payments data sample before the Aggregate Gem. There is one row per payment.
    7. Click to Run upto the Join Gem.
    8. Preview a data sample after the Aggregate Gem.
    9. This is the order_payments data sample after the Aggregate Gem. The individual rows of payments have been grouped according to ORDER_ID, and the amounts have been summed according to the payment type.
    10. Let's see how to use expressions. Click to Open.

    Using Expressions

    GroupBy expression

    2

    1. There is one Input Dataset, payments, and we can see the columns and datatypes below.
    2. Open the GroupBy tab.
    3. We can see the Gem is configured to group according to the order_id column. Just click any column name listed in (1) Input to add a column to the GroupBy expressions.
    4. Syntax errors are surfaced here as you're designing your Gem (and Model) on the canvas. That's handy so you don't have to run a Job to discover a typo.
    5. The Run button is available here to test and view data samples. This way you can make sure your Aggregate Gem is configured as desired.

    Aggregate expressions

    Next we'll walk through the Aggregate tab, where we have a lot more bells and knobs to turn. 3

    1. Click the Aggregate tab, where we'll define our new column names and data manipulation expressions.
    2. The list of Expressions describe how to manipulate a particular column from the input Dataset.
    3. The list of Target Columns is the list of column names to be manipulated by the Aggregate Gem and included in the Gem's output.
    4. Click Output to see the schema of the Dataset resulting from the Aggregate Gem.
    5. The order_id column is getting passed through the Aggregate step without being changed. Recall this is the column that will be used to group the data. To add any column, just click the column name from the Input list, or start typing the column name and Prophecy Copilot will provide suggestions.
    6. Since the order_id column was (5)selected, then this column appears in the output Dataset. It has a number datatype.
    7. Here is an expression that includes some data manipulation logic. The amount is summed according to the payment method. payment_method is being passed as a configurable variable surrounded by curly braces {{ }}. We'll see how to configure the variables credit_card, coupon, bank_transfer, gift_card in the next section.
    8. These are the output columns according to the (3)Target Column. {{ payment_method }} is a configurable parameter, and each of the payment methods (e.g. GIFT_CARD) has been appended with the string amount. Now we are starting to see how the data sample output from the Aggregate Gem will be constructed.
    9. Let's AskAI to help write a new expression. Type "Calculate customer size based on the amount purchased." Copilot AI generates a SQL expression and we can keep or reject the suggestion.
    10. The new expression will be reflected in the Aggregate Gem output, CUSTOMER_SIZE column.

    Using Variables

    Now let's see how to configure the payment_methods variable.

    4

    1. Click Config to open the configuration screen.
    2. We see the option to apply a configuration at several different levels: apply to the entire Model, all the Models in the GitHub folder, or all the Models in the Project. Here we can see there are Configurations that apply to this particular Orders Model.
    3. See the list of DBT Defined Configs. These are configs every user could employ with their DBT Projects, such as whether to materialize the model as table, view, ephemeral, or incremental. Click the dropdown to select the config of interest, then enter the appropriate value. Hover over the "i" icon for a short description of each DBT Config.
    4. See the list of user-defined Variables. In our HelloWorld_SQL project, the payment_methods variable has been defined with the four values shown.
    5. Click Save after editing the Config for the Model, Folder, or Project.

    Click the code view to see the Config encoded in the dbt_project.yml file or the schema.yml/properties.yml file. Further information can be found in DBT documentation, as Prophecy's Model Config is based on DBT's Model Configurations.

    Using Config variables (and DBT Defined Configs) within a Gem is easy. Just wrap the variable name (e.g. payment_method) in curly braces {{ }} like this: {{ payment_method }}.

    info

    To learn more about the Aggregate Gem UI, see this page which illustrates features common to all Gems.

    Here we used the Aggregate Gem from the HelloWorld_SQL Project as a learning guide. What types of Aggregations will you build? Reach out with questions and to let us know how you're using Prophecy.

    - - + + \ No newline at end of file diff --git a/SQL/index.html b/SQL/index.html index 06ff25a433..3c01d7b0b3 100644 --- a/SQL/index.html +++ b/SQL/index.html @@ -6,15 +6,14 @@ Copilot for SQL users | Prophecy - - - - + + + - - + + \ No newline at end of file diff --git a/Spark/best-practices/index.html b/Spark/best-practices/index.html index 65cde92516..702185c9f8 100644 --- a/Spark/best-practices/index.html +++ b/Spark/best-practices/index.html @@ -6,15 +6,14 @@ Best Practices for Spark | Prophecy - - - - + + + - - + + \ No newline at end of file diff --git a/Spark/best-practices/use-dbx-secrets/index.html b/Spark/best-practices/use-dbx-secrets/index.html index eb08f2a1c9..5c996b38d6 100644 --- a/Spark/best-practices/use-dbx-secrets/index.html +++ b/Spark/best-practices/use-dbx-secrets/index.html @@ -6,10 +6,9 @@ Use Databricks Secrets for Username Password fields in Gems | Prophecy - - - - + + +
    @@ -17,7 +16,7 @@ For value, add the scope and key you created for your secret in the first step and save it. Please refer below image

    img2.png

    It's now ready to be used in your Gems.

    Step4: Add a Snowflake Gem to your Pipeline and refer the above created Configs in the username and password field

    Now that we have a Pipeline config to refer our password stored in Databricks secrets securely, We can go ahead and add a snowflake Gem. Use the Config with syntax as ${snowflake_user} and ${snowflake_pass} in the username and password field respectively and define all other required fields in the Gem as is. Your Gem is now ready to Used and tested.

    img3.png

    If users still use plain-text, they would also see a Warning Diagnostics in their Gems.

    - - + + \ No newline at end of file diff --git a/Spark/configuration/conditional-execution/index.html b/Spark/configuration/conditional-execution/index.html index e4d282e033..86668b08a8 100644 --- a/Spark/configuration/conditional-execution/index.html +++ b/Spark/configuration/conditional-execution/index.html @@ -6,10 +6,9 @@ Conditional Execution in Spark | Prophecy - - - - + + +
    @@ -19,7 +18,7 @@ This opens a configuration panel where users can define the condition for the Gem. The conditions can be expressed in Scala or Python, depending on the language used in the project.

    When a condition is set on a Gem, it is indicated by the (C) symbol appearing before the Gem name, providing a visual cue that a condition has been configured.

    It's important to note that the conditions cannot access the data within the Pipeline. They are designed to evaluate based on Pipeline configurations and any other relevant factors that can be determined at runtime.

    Additionally, when a Gem is set as a pass-through or removed due to a condition evaluation, the interims will not be displayed on the edges associated with that Gem.

    Pass-through Condition

    Pass-through conditions provide a convenient way to skip the transformation of a Gem or subgraph and maintain the input data as the output data. This ensures that the data remains unchanged and passes through the Gem or subgraph without any modification.

    To support pass-through functionality, the following conditions must be met:

    1. Gem Connection: The Gem must be connected in the Pipeline, meaning it should have both an input port and an output port. This allows the data to flow through the Gem.

    2. Port Configuration: The Gem can have either an equal input and output port configuration, where the same data is passed through, or a single input port and multiple output ports configuration. In both cases, the input data is preserved as the output data, maintaining the pass-through behavior.

    3. Source and Targets: Pass-through conditions are not applicable to source and target elements within the Pipeline. These elements represent the data source and destination and do not involve any transformation logic. The same conditions mentioned above also apply to subgraphs. Subgraphs can have pass-through behavior if they meet the requirements of having connected input and output ports, as well as appropriate port configurations.

    Removal Condition

    In addition to pass-through conditions, we have introduced removal conditions that allow users to skip writing data to a target and remove all the Gems/transformations in the Pipeline after the current Gem. This feature provides users with greater control over the Pipeline flow and allows them to conditionally exclude specific portions of the Pipeline execution. Unlike pass-through conditions, removal conditions can be applied to any Gem in the Pipeline. However, it's important to note that when a removal condition is set on a Gem, users need to handle the missing data in the Gem logic, as the inputs may be unavailable.

    Currently, SetOperations and Script Gems in the Pipeline are capable of handling missing inputs. For other Gems, if any input is missing, the Gem will be removed along with all downstream transformations. The same behavior applies to subgraphs as well.

    To modify the handling of missing inputs in other Gems, users can update the Gem specs in the Gem Builder by adding a boolean parameter called allInputsRequired. By setting this parameter to true or false, users can determine whether a Gem should be removed if any of its inputs are missing.

    This customization option allows users to tailor the behavior of the Pipeline and the handling of missing data based on their specific requirements.

    - - + + \ No newline at end of file diff --git a/Spark/configuration/index.html b/Spark/configuration/index.html index 26f59c1ac0..79b923c44a 100644 --- a/Spark/configuration/index.html +++ b/Spark/configuration/index.html @@ -6,10 +6,9 @@ Configuration | Prophecy - - - - + + +
    @@ -22,7 +21,7 @@ configurations in different environments or different users.

    New instances can be configured to override default values as shown in image below:

    Create config instance

    Create pipeline override

    Using a particular configuration instance for interactive runs

    For interactive runs, configuration can be selected as shown in image below. Config interactive run

    Using configuration instances in Jobs

    Particular instances can also be configured in Databricks Jobs.

    Config inside job

    Overriding configuration values in Jobs

    Specific values from configuration instance can be overridden as shown in images below:

    Config job override

    Code

    All configuration instances and values are automatically converted to code as well. Default configurations are stored as code and specific instance overrides are stored as JSON files as shown in image below.

    Scala Config code

    Config scala code

    Python Config code

    Config python code

    Component code

    def Reformat(spark: SparkSession, in0: DataFrame) -> DataFrame:
    return in0.select(
    col("customer_id"),
    col("orders"),
    col("account_length_days"),
    expr(Config.test_expression).as("amounts"),
    lit(Config.report_name).as("report_name")
    )
    - - + + \ No newline at end of file diff --git a/Spark/execution/data-explorer/index.html b/Spark/execution/data-explorer/index.html index e7625bb511..2a8325e011 100644 --- a/Spark/execution/data-explorer/index.html +++ b/Spark/execution/data-explorer/index.html @@ -6,15 +6,14 @@ Data Explorer | Prophecy - - - - + + +
    Skip to main content

    Data Explorer

    The Data Explorer feature empowers users to seamlessly explore and analyze their data samples directly within the user interface (UI). This feature provides a range of capabilities to help users gain insights, verify data accuracy, and make informed decisions.

    Data_explorer

    Filter and Sort Options

    Users can apply filters and sort rows based on any column, enhancing visibility and improving data analysis.

    info

    After applying or modifying any filter or sort criteria, you need to click the Run button to update the displayed data.

    Reset Applied Filters and Sort

    Easily reset any applied filters and sorting settings by clicking on the reset button.

    Column Visibility Filtering

    Conveniently filter columns visible in the UI by clicking on the ellipsis ... icon. No need to rerun the process to reflect these changes in the UI.

    Load More Rows and Total Count

    View and assess a larger Dataset by loading more rows. The total count of rows and columns is visible, providing a comprehensive overview of the data.

    Download Data

    Download the visible data in the UI in CSV and JSON format using the dedicated download button.

    Create Gems

    After analyzing the data, users can retain the filter and sort options in the Pipeline by clicking on the Create Gems option. This action saves the applied filter and sort as a Filter and OrderBy Gem in the Pipeline.

    - - + + \ No newline at end of file diff --git a/Spark/execution/execution-metrics/index.html b/Spark/execution/execution-metrics/index.html index 1273168a13..0d8b88bf9b 100644 --- a/Spark/execution/execution-metrics/index.html +++ b/Spark/execution/execution-metrics/index.html @@ -6,10 +6,9 @@ Execution Metrics | Prophecy - - - - + + +
    @@ -20,7 +19,7 @@ the tables from Team view in the Prophecy UI.

    There are three execution metrics tables that store data for Pipelines, individual components, and the generated data samples, also known as interims. You have the option to choose the following at the time of team creation:

    ExecutionMetricsConfig.png

    Pre-requisite

    As a Workspace / Catalog Admin, you must create tables and grant appropriate permissions to your users for them to choose to mention tables of their choice. It's recommended that this should be done at the time of team creation itself, to ensure the best experience for your users.

    You can store these tables using any format like Avro, Parquet, ORC, or Delta.

    DDLs and Grant accesses are in the following sections.

    Create tables using Delta (for Databricks)

    The following are sample Create table commands for tables using Delta. These are suitable for Databricks or if your metastore supports Delta tables.

      CREATE TABLE IF NOT EXISTS <database>.<pipeline_runs_table_name>
    (
    uid STRING NOT NULL COMMENT 'Unique identifier for the pipeline run',
    pipeline_uri STRING NOT NULL COMMENT 'URI of the pipeline',
    job_uri STRING COMMENT 'URI of the job associated with the pipeline run',
    job_run_uid STRING COMMENT 'Unique identifier for the job run',
    task_run_uid STRING COMMENT 'Unique identifier for the task run',
    status STRING COMMENT 'Status of the pipeline run',
    fabric_uid STRING NOT NULL COMMENT 'Unique identifier for the fabric',
    time_taken LONG COMMENT 'Time taken for the pipeline run',
    rows_read LONG COMMENT 'Number of rows read during the pipeline run',
    rows_written LONG COMMENT 'Number of rows written during the pipeline run',
    created_at TIMESTAMP COMMENT 'Timestamp when the pipeline run was created',
    created_by STRING NOT NULL COMMENT 'Prophecy user ID who created the pipeline run',
    run_type STRING COMMENT 'Type of the run - Interactive, Adhoc, or Scheduled',
    input_datasets ARRAY<STRING> COMMENT 'List of input datasets',
    output_datasets ARRAY<STRING> COMMENT 'List of output datasets',
    workflow_code MAP<STRING, STRING> COMMENT 'Workflow code associated with the pipeline run',
    expired BOOLEAN COMMENT 'Indicates if the pipeline run has expired',
    branch STRING COMMENT 'Git entity information for the pipeline run. Can be branch name or release tag',
    pipeline_config STRING COMMENT 'Pipeline configuration details',
    user_config STRING COMMENT 'User configuration details',
    expected_interims INT COMMENT 'Expected number of interims',
    actual_interims INT COMMENT 'Actual number of interims',
    logs STRING COMMENT 'Logs for the pipeline run'
    )
    USING DELTA
    PARTITIONED BY (fabric_uid, pipeline_uri, created_by)
    LOCATION '<table_path>'
    TBLPROPERTIES (delta.autoOptimize.optimizeWrite = true, delta.autoOptimize.autoCompact = true)
      CREATE TABLE IF NOT EXISTS <database>.<component_runs_table_name>
    (
    uid STRING NOT NULL COMMENT 'Unique identifier for the component run',
    component_uri STRING NOT NULL COMMENT 'URI of the component',
    pipeline_uri STRING COMMENT 'URI of the pipeline associated with the component run',
    pipeline_run_uid STRING NOT NULL COMMENT 'Unique identifier for the pipeline run',
    fabric_uid STRING NOT NULL COMMENT 'Unique identifier for the fabric',
    component_name STRING COMMENT 'Name of the component',
    interim_component_name STRING COMMENT 'Name of the component holding interim for this component. Targets are supplied interims by the upstream component',
    component_type STRING COMMENT 'Type of the component - Source, Reformat, Target, etc.',
    interim_subgraph_name STRING COMMENT 'Name of the graph where interim component resides',
    interim_process_id STRING COMMENT 'Identifier for the interim process',
    interim_out_port STRING COMMENT 'Output port for the interim component',
    created_at TIMESTAMP COMMENT 'Timestamp when the component run was created',
    created_by STRING NOT NULL COMMENT 'Identifier of the user who created the component run',
    records LONG COMMENT 'Number of records processed by the component. Represents the aggregated amount of all rows read through this component. Rows can be re-read if there were multiple downstream components or if Spark ran the same plan multiple times',
    bytes LONG COMMENT 'Number of bytes processed by the component',
    partitions LONG COMMENT 'Number of partitions of the data processed by the component',
    expired BOOLEAN COMMENT 'Indicates if the component run has expired',
    run_type STRING COMMENT 'Type of the run',
    job_uri STRING COMMENT 'URI of the job associated with the component run',
    branch STRING COMMENT 'Branch information for the component run',
    gem_name STRING COMMENT 'Hierarchical component name',
    process_id STRING COMMENT 'Process identifier of the gem',
    gem_type STRING COMMENT 'Type of the gem',
    input_gems ARRAY<STRUCT<gem_name: STRING, from_port: STRING, to_port: STRING, num_rows: LONG>> COMMENT 'List of input gems with details',
    output_gems ARRAY<STRUCT<gem_name: STRING, from_port: STRING, to_port: STRING, num_rows: LONG>> COMMENT 'List of output gems with details',
    in_ports ARRAY<STRING> COMMENT 'List of input ports',
    out_ports ARRAY<STRING> COMMENT 'List of output ports',
    num_rows_output LONG COMMENT 'Number of rows output by the component. Represents the maximum number of rows read through this component across Spark plans and possible re-runs',
    stdout ARRAY<STRUCT<content: STRING, time: LONG>> COMMENT 'Standard output logs',
    stderr ARRAY<STRUCT<content: STRING, time: LONG>> COMMENT 'Standard error logs',
    start_time LONG COMMENT 'Start time of the component run',
    end_time LONG COMMENT 'End time of the component run',
    state STRING COMMENT 'State of the component run',
    exception STRUCT<exception_type: STRING, msg: STRING, cause_msg: STRING, stack_trace: STRING, time: LONG> COMMENT 'Exception details if any occurred during the component run'
    )
    USING DELTA
    PARTITIONED BY (fabric_uid, component_uri, created_by)
    LOCATION '<table_path>'
    TBLPROPERTIES (delta.autoOptimize.optimizeWrite = true, delta.autoOptimize.autoCompact = true)
      CREATE TABLE IF NOT EXISTS <database>.<interims_table_name>
    (
    uid STRING NOT NULL COMMENT 'Unique identifier for the interim',
    interim STRING COMMENT 'Interim data or information',
    created_by STRING COMMENT 'Identifier of the user who created the interim',
    created_at TIMESTAMP COMMENT 'Timestamp when the interim was created',
    fabric_uid STRING COMMENT 'Unique identifier for the fabric'
    )
    USING DELTA
    PARTITIONED BY (created_by, fabric_uid)
    LOCATION '<table_path>'
    TBLPROPERTIES (delta.autoOptimize.optimizeWrite = true, delta.autoOptimize.autoCompact = true)

    Grant permissions

      GRANT USAGE ON SCHEMA <database> TO group1;
    GRANT USAGE ON SCHEMA <database> TO group2;

    GRANT SELECT ON <database.component-runs-table> TO group1;
    GRANT SELECT ON <database.component-runs-table> TO group2;
    GRANT MODIFY ON <database.component-runs-table> TO group1;
    GRANT MODIFY ON <database.component-runs-table> TO group2;

    GRANT SELECT ON <database.pipeline-runs-table> TO group1;
    GRANT SELECT ON <database.pipeline-runs-table> TO group2;
    GRANT MODIFY ON <database.pipeline-runs-table> TO group1;
    GRANT MODIFY ON <database.pipeline-runs-table> TO group2;

    GRANT SELECT ON <database.interims-table> TO group1;
    GRANT SELECT ON <database.interims-table> TO group2;
    GRANT MODIFY ON <database.interims-table> TO group1;
    GRANT MODIFY ON <database.interims-table> TO group2;

    Restrictions

    Creating Tables using Parquet (for Livy)

    The following are sample Create table commands for tables using Parquet. These are suitable for Livy or for your Hive metastore in Hadoop setups.

      CREATE TABLE IF NOT EXISTS <database>.<pipeline_runs_table_name>
    (
    uid STRING NOT NULL COMMENT 'Unique identifier for the pipeline run',
    pipeline_uri STRING NOT NULL COMMENT 'URI of the pipeline',
    job_uri STRING COMMENT 'URI of the job associated with the pipeline run',
    job_run_uid STRING COMMENT 'Unique identifier for the job run',
    task_run_uid STRING COMMENT 'Unique identifier for the task run',
    status STRING COMMENT 'Status of the pipeline run',
    fabric_uid STRING NOT NULL COMMENT 'Unique identifier for the fabric',
    time_taken LONG COMMENT 'Time taken for the pipeline run',
    rows_read LONG COMMENT 'Number of rows read during the pipeline run',
    rows_written LONG COMMENT 'Number of rows written during the pipeline run',
    created_at TIMESTAMP COMMENT 'Timestamp when the pipeline run was created',
    created_by STRING NOT NULL COMMENT 'Prophecy user ID who created the pipeline run',
    run_type STRING COMMENT 'Type of the run - Interactive, Adhoc, or Scheduled',
    input_datasets ARRAY<STRING> COMMENT 'List of input datasets',
    output_datasets ARRAY<STRING> COMMENT 'List of output datasets',
    workflow_code MAP<STRING, STRING> COMMENT 'Workflow code associated with the pipeline run',
    expired BOOLEAN COMMENT 'Indicates if the pipeline run has expired',
    branch STRING COMMENT 'Git entity information for the pipeline run. Can be branch name or release tag',
    pipeline_config STRING COMMENT 'Pipeline configuration details',
    user_config STRING COMMENT 'User configuration details',
    expected_interims INT COMMENT 'Expected number of interims',
    actual_interims INT COMMENT 'Actual number of interims',
    logs STRING COMMENT 'Logs for the pipeline run'
    ) stored as parquet
    PARTITIONED BY (fabric_uid, pipeline_uri, created_by)
      CREATE TABLE IF NOT EXISTS <database>.<component_runs_table_name>
    (
    uid STRING NOT NULL COMMENT 'Unique identifier for the component run',
    component_uri STRING NOT NULL COMMENT 'URI of the component',
    pipeline_uri STRING COMMENT 'URI of the pipeline associated with the component run',
    pipeline_run_uid STRING NOT NULL COMMENT 'Unique identifier for the pipeline run',
    fabric_uid STRING NOT NULL COMMENT 'Unique identifier for the fabric',
    component_name STRING COMMENT 'Name of the component',
    interim_component_name STRING COMMENT 'Name of the component holding interim for this component. Targets are supplied interims by the upstream component',
    component_type STRING COMMENT 'Type of the component - Source, Reformat, Target, etc.',
    interim_subgraph_name STRING COMMENT 'Name of the graph where interim component resides',
    interim_process_id STRING COMMENT 'Identifier for the interim process',
    interim_out_port STRING COMMENT 'Output port for the interim component',
    created_at TIMESTAMP COMMENT 'Timestamp when the component run was created',
    created_by STRING NOT NULL COMMENT 'Identifier of the user who created the component run',
    records LONG COMMENT 'Number of records processed by the component. Represents the aggregated amount of all rows read through this component. Rows can be re-read if there were multiple downstream components or if Spark ran the same plan multiple times',
    bytes LONG COMMENT 'Number of bytes processed by the component',
    partitions LONG COMMENT 'Number of partitions of the data processed by the component',
    expired BOOLEAN COMMENT 'Indicates if the component run has expired',
    run_type STRING COMMENT 'Type of the run',
    job_uri STRING COMMENT 'URI of the job associated with the component run',
    branch STRING COMMENT 'Branch information for the component run',
    gem_name STRING COMMENT 'Hierarchical component name',
    process_id STRING COMMENT 'Process identifier of the gem',
    gem_type STRING COMMENT 'Type of the gem',
    input_gems ARRAY<STRUCT<gem_name: STRING, from_port: STRING, to_port: STRING, num_rows: LONG>> COMMENT 'List of input gems with details',
    output_gems ARRAY<STRUCT<gem_name: STRING, from_port: STRING, to_port: STRING, num_rows: LONG>> COMMENT 'List of output gems with details',
    in_ports ARRAY<STRING> COMMENT 'List of input ports',
    out_ports ARRAY<STRING> COMMENT 'List of output ports',
    num_rows_output LONG COMMENT 'Number of rows output by the component. Represents the maximum number of rows read through this component across Spark plans and possible re-runs',
    stdout ARRAY<STRUCT<content: STRING, time: LONG>> COMMENT 'Standard output logs',
    stderr ARRAY<STRUCT<content: STRING, time: LONG>> COMMENT 'Standard error logs',
    start_time LONG COMMENT 'Start time of the component run',
    end_time LONG COMMENT 'End time of the component run',
    state STRING COMMENT 'State of the component run',
    exception STRUCT<exception_type: STRING, msg: STRING, cause_msg: STRING, stack_trace: STRING, time: LONG> COMMENT 'Exception details if any occurred during the component run'
    ) stored as parquet
    PARTITIONED BY (fabric_uid, component_uri, created_by)
      CREATE TABLE IF NOT EXISTS <database>.<interims_table_name>
    (
    uid STRING NOT NULL COMMENT 'Unique identifier for the interim',
    interim STRING COMMENT 'Interim data or information',
    created_by STRING COMMENT 'Identifier of the user who created the interim',
    created_at TIMESTAMP COMMENT 'Timestamp when the interim was created',
    fabric_uid STRING COMMENT 'Unique identifier for the fabric'
    ) stored as parquet
    PARTITIONED BY (created_by, fabric_uid)
    - - + + \ No newline at end of file diff --git a/Spark/execution/executions_on_databricks_clusters/index.html b/Spark/execution/executions_on_databricks_clusters/index.html index c1069ba890..d87b5f7485 100644 --- a/Spark/execution/executions_on_databricks_clusters/index.html +++ b/Spark/execution/executions_on_databricks_clusters/index.html @@ -6,10 +6,9 @@ Execution on Databricks | Prophecy - - - - + + +
    @@ -19,7 +18,7 @@ These interims will come Just before Target Gems, and if there is no Target Gem, then as a dangling edge after last Gem. See below images for the same.

    Vanilla Interims

    Vanilla Interims

    Execution Metrics

    When running Pipelines and Jobs, you may be interested to know few metrics related to execution like records read/written, bytes read/written, total time taken and Data samples b/w components. These Dataset, Pipeline-run and Job-run related metrics are accumulated and stored on your data plane and can be viewed later from Prophecy UI. For more details, refer here.

    caution

    These metrics are not available for Shared mode clusters (both normal workspaces and Unity catalog workspaces). You should see a proper error when trying to get historical runs of Pipelines/Jobs executed on Shared mode clusters.

    Refer below images for Execution Metrics on Pipelines page.

    Pipeline_Execution_Metrics

    Each row here is one run of the Pipeline. You can click and go to a particular run and see the interims for that run or metrics like Rows read/written, time taken, etc.

    Execution_Metrics

    You can also see Execution Metrics for each Dataset in the Pipeline.

    Dataset_metrcis

    Each row here is one run where this Dataset was used. You can click and go to a particular run and see more detailed insights on your data along with preview.

    Dataset_stats

    info

    When using High Concurrency or Shared Mode Databricks Clusters you may notice a delay when running the first command, or when your cluster is scaling up to meet demand. This delay is due to Prophecy and Pipeline dependencies (Maven or Python packages) being installed. For the best performance, it is recommended that you cache packages in an Artifactory or on DBFS. Please contact us to learn more about this.

    - - + + \ No newline at end of file diff --git a/Spark/execution/executions_on_livy_clusters/index.html b/Spark/execution/executions_on_livy_clusters/index.html index 8961d0ba12..4011bb8f41 100644 --- a/Spark/execution/executions_on_livy_clusters/index.html +++ b/Spark/execution/executions_on_livy_clusters/index.html @@ -6,10 +6,9 @@ Execution on Livy | Prophecy - - - - + + +
    @@ -19,7 +18,7 @@ To check more about interims, please refer here.

    We have interims available after each Gem of Pipeline.

    Regular Interims

    Execution Metrics on Livy

    When running Pipelines and Jobs, you may be interested to know few metrics related to execution like records read/written, bytes read/written, total time taken and Data samples b/w components. These Dataset, Pipeline-run and Job-run related metrics are accumulated and stored on your data plane and can be viewed later from Prophecy UI. For more details please refer here

    Please refer below images for Execution Metrics on Pipelines page.

    Pipeline_Execution_Metrics

    Each row here is one run of the Pipeline. You can click and go to a particular run and see the interims for that run or metrics like Rows read/written, time taken etc

    Execution_Metrics

    You can also see Execution Metrics for each Dataset in the Pipeline.

    Dataset_metrcis

    Each row here is one run where this Dataset was used. You can click and go to a particular run and see more detailed insights on your data along with preview.

    Dataset_stats

    - - + + \ No newline at end of file diff --git a/Spark/execution/index.html b/Spark/execution/index.html index ac2aa13389..725709a824 100644 --- a/Spark/execution/index.html +++ b/Spark/execution/index.html @@ -6,15 +6,14 @@ Execution | Prophecy - - - - + + +
    Skip to main content
    - - + + \ No newline at end of file diff --git a/Spark/execution/interactive-execution/index.html b/Spark/execution/interactive-execution/index.html index f7adfed4fe..147a74f8c0 100644 --- a/Spark/execution/interactive-execution/index.html +++ b/Spark/execution/interactive-execution/index.html @@ -6,10 +6,9 @@ Interactive Execution | Prophecy - - - - + + +
    @@ -22,7 +21,7 @@ Interactive execution error logs

    Runtime Logs

    Overall progress with associated timestamps can be monitored from the Runtime Logs as shown here:

    Runtime Logs

    Runtime Metrics

    Various Spark metrics collected during runtime can be monitored as shown here:

    Runtime Metrics

    Execution Metrics

    For interactive runs execution metrics are collected to make the development easier and performance tuning more intuitive. These can be accessed from the Metadata Page inside the run tab of the Pipeline.

    Execution Metrics

    Shell

    Prophecy IDE comes with an inbuilt interactive Spark shell that supports both Python and Scala. The shell is an easy way to quickly analyze data or test Spark commands Interactive execution

    info

    Spark context and session are available within the shell as variables sc and spark respectively


    Examples

    note

    You need to be connected to a cluster to access the interactive shell

    Python

    Python interactive execution

    Scala

    Scala interactive execution

    - - + + \ No newline at end of file diff --git a/Spark/expression-builder/index.html b/Spark/expression-builder/index.html index b5a810d646..75f2e1bdb0 100644 --- a/Spark/expression-builder/index.html +++ b/Spark/expression-builder/index.html @@ -6,10 +6,9 @@ Expression Builder | Prophecy - - - - + + +
    @@ -20,7 +19,7 @@ Simply type in the name of the function you are looking for, and a list of matching functions will appear. To insert a function into your expression, click on the insert button. You can then specify the arguments for the function by clicking on it and filling in the required fields. You can also insert configs and input columns directly.

    Run and Verify the output

    You can now Attach to a cluster and Run your Pipeline till current Gem from the same screen. Once the code has finished running, you can verify the results to make sure they match your expectations. This Data is same as what you see in interims view. By testing and verifying your expressions, you can ensure that your data analysis tasks are accurate and reliable.

    - - + + \ No newline at end of file diff --git a/Spark/extensibility/dependencies/index.html b/Spark/extensibility/dependencies/index.html index da4c8d49ff..26c68a9b92 100644 --- a/Spark/extensibility/dependencies/index.html +++ b/Spark/extensibility/dependencies/index.html @@ -6,10 +6,9 @@ Dependencies | Prophecy - - - - + + +
    @@ -42,7 +41,7 @@ changes to take effect in the Job.

    For any help required in enabling the Project template for older Projects, please reach out to Prophecy support team.

    Jobs Support

    For Projects in which templating is enabled, dependencies are added automatically to pom.xml/build.py files as soon as they are added on the ...> Options > Manage Dependencies screen.

    Scala

    Python


    info

    For older Projects in which templating is disabled, the dependencies added to the Pipelines are not propagated to the scheduled Jobs automatically. (We highly recommend to enable the templating for all Projects, rather than editing the pom.xml files manually)

    Though, if you want to keep the templating disabled at Project level and still want to add dependencies for Scala Projects such that it is visible to your Pipeline when it's scheduled, it has to be manually added to the pom.xml file. Below is an example on how it can be achieved:

    The dependency should be added anywhere between the <dependencies></dependencies> tags. For instance to add io.github.etspaceman:scalacheck-faker_2.12:7.0.0 dependency, add it to the pom.xml like so:

    Dependencies List

    ...
    <dependencies>
    <dependency>
    <groupId>io.github.etspaceman</groupId>
    <artifactId>scalacheck-faker_2.12</artifactId>
    <version>7.0.0</version>
    </dependency>

    ...
    </dependencies>
    ...
    - - + + \ No newline at end of file diff --git a/Spark/extensibility/gem-builder/index.html b/Spark/extensibility/gem-builder/index.html index 5976ff4dde..2ae41af398 100644 --- a/Spark/extensibility/gem-builder/index.html +++ b/Spark/extensibility/gem-builder/index.html @@ -6,10 +6,9 @@ Gem builder | Prophecy - - - - + + +
    @@ -21,7 +20,7 @@ It is recommended to try out this dialogue code in Gem builder UI and see how each of these elements looks in UI.

    Validation

    The validate method performs validation checks so that in the case where there's any issue with any inputs provided for the user an Error can be displayed. In our example case, this would happen if the Filter condition is empty. Similarly, you can add any validation on your properties.

    def validate(self, component: Component[FilterProperties]) -> List[Diagnostic]:
    return validateSColumn(component.properties.condition, "condition", component)

    State Changes

    The onChange method is given for the UI State transformations. You are given both the previous and the new incoming state and can merge or modify the state as needed. The properties of the Gem are also accessible to this function, so functions like selecting columns, etc. are possible to add from here.

    def onChange(self, oldState: Component[FilterProperties], newState: Component[FilterProperties]) -> Component[
    FilterProperties]:
    newProps = newState.properties
    usedColExps = getColumnsToHighlight2([newProps.condition], newState)
    return newState.bindProperties(replace(newProps, columnsSelector=usedColExps))

    Component Code

    The last class used here is FilterCode which is inherited from ComponentCode class. This class contains the actual Spark code that needs to run on your Spark cluster. Here the above User Defined properties are accessible using self.props.{property}. The Spark code for the Gem logic is defined in the apply function. Input/Output of apply method can only be DataFrame or list of DataFrames or empty. For example, we are calling the .filter() method in this example in the apply function.

    class FilterCode(ComponentCode):
    def __init__(self, newProps):
    self.props: Filter.FilterProperties = newProps

    def apply(self, spark: SparkSession, in0: DataFrame) -> DataFrame:
    return in0.filter(self.props.condition.column())

    You can preview the component in the Gem Builder to see how it looks. You can modify the properties and then save it to preview the generated Spark code which will eventually run on your cluster.

    To assist the Spark Catalyst Optimizer to build scalable code, Prophecy performs some minor optimizations to the code generated by the apply() method.

    info

    For details on our optimization functions, see Optimization functions.

    Source/Target Gems

    Source/Target Gems are Gems that you use to read/write your Datasets into DataFrames. There are certain differences between how you define a Source/Target Gem and a Transformation Gem. For example, a Source/Target Gem will have two dialog and two apply functions each for Source and Target respectively. Let's look at them with an example.

    from pyspark.sql import SparkSession, DataFrame
    from pyspark.sql.types import StructType

    from prophecy.cb.server.base.ComponentBuilderBase import ComponentCode, Diagnostic, SeverityLevelEnum
    from prophecy.cb.server.base.DatasetBuilderBase import DatasetSpec, DatasetProperties, Component
    from prophecy.cb.ui.uispec import *


    class ParquetFormat(DatasetSpec):
    name: str = "parquet"
    datasetType: str = "File"

    def optimizeCode(self) -> bool:
    return True

    @dataclass(frozen=True)
    class ParquetProperties(DatasetProperties):
    schema: Optional[StructType] = None
    description: Optional[str] = ""
    useSchema: Optional[bool] = False
    path: str = ""
    mergeSchema: Optional[bool] = None
    datetimeRebaseMode: Optional[str] = None
    int96RebaseMode: Optional[str] = None
    compression: Optional[str] = None
    partitionColumns: Optional[List[str]] = None
    writeMode: Optional[str] = None
    pathGlobFilter: Optional[str] = None
    modifiedBefore: Optional[str] = None
    modifiedAfter: Optional[str] = None
    recursiveFileLookup: Optional[bool] = None

    def sourceDialog(self) -> DatasetDialog:
    return DatasetDialog("parquet") \
    .addSection("LOCATION", TargetLocation("path")) \
    .addSection(
    "PROPERTIES",
    ColumnsLayout(gap=("1rem"), height=("100%"))
    .addColumn(
    ScrollBox().addElement(
    StackLayout(height=("100%"))
    .addElement(
    StackItem(grow=(1)).addElement(
    FieldPicker(height=("100%"))
    .addField(
    TextArea("Description", 2, placeholder="Dataset description..."),
    "description",
    True
    )
    .addField(Checkbox("Use user-defined schema"), "useSchema", True)
    .addField(Checkbox("Merge schema"), "mergeSchema")
    .addField(
    SelectBox("Datetime Rebase Mode")
    .addOption("EXCEPTION", "EXCEPTION")
    .addOption("CORRECTED", "CORRECTED")
    .addOption("LEGACY", "LEGACY"),
    "datetimeRebaseMode"
    )
    .addField(
    SelectBox("Int96 Rebase Mode")
    .addOption("EXCEPTION", "EXCEPTION")
    .addOption("CORRECTED", "CORRECTED")
    .addOption("LEGACY", "LEGACY"),
    "int96RebaseMode"
    )
    .addField(Checkbox("Recursive File Lookup"), "recursiveFileLookup")
    .addField(TextBox("Path Global Filter").bindPlaceholder(""), "pathGlobFilter")
    .addField(TextBox("Modified Before").bindPlaceholder(""), "modifiedBefore")
    .addField(TextBox("Modified After").bindPlaceholder(""), "modifiedAfter")
    )
    )
    ),
    "auto"
    )
    .addColumn(SchemaTable("").bindProperty("schema"), "5fr")
    ) \
    .addSection(
    "PREVIEW",
    PreviewTable("").bindProperty("schema")
    )

    def targetDialog(self) -> DatasetDialog:
    return DatasetDialog("parquet") \
    .addSection("LOCATION", TargetLocation("path")) \
    .addSection(
    "PROPERTIES",
    ColumnsLayout(gap=("1rem"), height=("100%"))
    .addColumn(
    ScrollBox().addElement(
    StackLayout(height=("100%")).addElement(
    StackItem(grow=(1)).addElement(
    FieldPicker(height=("100%"))
    .addField(
    TextArea("Description", 2, placeholder="Dataset description..."),
    "description",
    True
    )
    .addField(
    SelectBox("Write Mode")
    .addOption("error", "error")
    .addOption("overwrite", "overwrite")
    .addOption("append", "append")
    .addOption("ignore", "ignore"),
    "writeMode"
    )
    .addField(
    SchemaColumnsDropdown("Partition Columns")
    .withMultipleSelection()
    .bindSchema("schema")
    .showErrorsFor("partitionColumns"),
    "partitionColumns"
    )
    .addField(
    SelectBox("Compression Codec")
    .addOption("none", "none")
    .addOption("uncompressed", "uncompressed")
    .addOption("gzip", "gzip")
    .addOption("lz4", "lz4")
    .addOption("snappy", "snappy")
    .addOption("lzo", "lzo")
    .addOption("brotli", "brotli")
    .addOption("zstd", "zstd"),
    "compression"
    )
    )
    )
    ),
    "auto"
    )
    .addColumn(SchemaTable("").isReadOnly().withoutInferSchema().bindProperty("schema"), "5fr")
    )

    def validate(self, component: Component) -> list:
    diagnostics = super(ParquetFormat, self).validate(component)
    if len(component.properties.path) == 0:
    diagnostics.append(
    Diagnostic("properties.path", "path variable cannot be empty [Location]", SeverityLevelEnum.Error))
    return diagnostics

    def onChange(self, oldState: Component, newState: Component) -> Component:
    return newState

    class ParquetFormatCode(ComponentCode):
    def __init__(self, props):
    self.props: ParquetFormat.ParquetProperties = props

    def sourceApply(self, spark: SparkSession) -> DataFrame:
    reader = spark.read.format("parquet")
    if self.props.mergeSchema is not None:
    reader = reader.option("mergeSchema", self.props.mergeSchema)
    if self.props.datetimeRebaseMode is not None:
    reader = reader.option("datetimeRebaseMode", self.props.datetimeRebaseMode)
    if self.props.int96RebaseMode is not None:
    reader = reader.option("int96RebaseMode", self.props.int96RebaseMode)
    if self.props.modifiedBefore is not None:
    reader = reader.option("modifiedBefore", self.props.modifiedBefore)
    if self.props.modifiedAfter is not None:
    reader = reader.option("modifiedAfter", self.props.modifiedAfter)
    if self.props.recursiveFileLookup is not None:
    reader = reader.option("recursiveFileLookup", self.props.recursiveFileLookup)
    if self.props.pathGlobFilter is not None:
    reader = reader.option("pathGlobFilter", self.props.pathGlobFilter)

    if self.props.schema is not None and self.props.useSchema:
    reader = reader.schema(self.props.schema)

    return reader.load(self.props.path)

    def targetApply(self, spark: SparkSession, in0: DataFrame):
    writer = in0.write.format("parquet")
    if self.props.compression is not None:
    writer = writer.option("compression", self.props.compression)

    if self.props.writeMode is not None:
    writer = writer.mode(self.props.writeMode)
    if self.props.partitionColumns is not None and len(self.props.partitionColumns) > 0:
    writer = writer.partitionBy(*self.props.partitionColumns)

    writer.save(self.props.path)

    Here you can see the differences between a Transform Gem and a DataSource Gem.

    1. The Source/Target Gem extends DatasetSpec.
    2. It has two Dialog functions: sourceDialog and targetDialog. They return both a DatasetDialog object, whereas for any Transform Gem, the dialog function returns a Dialog object.
    3. The ComponentCode class has two apply functions: sourceApply and targetApply for Source and Target modes respectively.

    There is no change in onChange and validate functions.

    What's next

    To learn more about the Gem builder and additional optimization options, see the following page:

    - - + + \ No newline at end of file diff --git a/Spark/extensibility/gem-builder/optimization-functions/index.html b/Spark/extensibility/gem-builder/optimization-functions/index.html index c0546189ef..fb34464607 100644 --- a/Spark/extensibility/gem-builder/optimization-functions/index.html +++ b/Spark/extensibility/gem-builder/optimization-functions/index.html @@ -6,10 +6,9 @@ Optimization functions | Prophecy - - - - + + +
    @@ -18,7 +17,7 @@ Catalyst optimization engine when it creates the Spark Plan. The optimizations make replacements using functionally equivalent code, but in some corner cases this may cause unwanted side effects.

    In certain corner cases you may want disable some or all optimizations.

    note

    These functions are Python specific.

    Turn off loop unrolling

    By default Prophecy will unroll small static loops.

    You can turn off loop unrolling by adding # skipLoopUnRolling as a comment on the same line as the for loop.

    Turn off loop unrolling example

    Replace variables and optimize objects

    You can use two functions to disable substitution of variables during the optimization step.

    Example:

    def testLoopUnRoll():
    myCols: SubstituteDisabled = ['a']
    cond = None
    for scdCol in myCols:
    if cond is None:
    cond = (existingDF[scdCol] != updatesDF[scdCol])
    else:
    cond = (cond | (existingDF[scdCol] != updatesDF[scdCol]))
    stagedUpdatesDF = updatesDF.where((existingDF["current"] == lit("true")) & (cond))

    cols: PostSubstituteDisabled = ['a']
    updateCond = None
    for scdCol1 in cols:
    if updateCond is None:
    updateCond = (existingDF[scdCol1] != updatesDF[scdCol1])
    else:
    updateCond = (updateCond | (existingDF[scdCol1] != updatesDF[scdCol1]))
    updatedDF = updatesDF.where((existingDF["current"] == lit("true")) & (updateCond))

    cols1: PostSubstituteDisabled = ['a']
    updateCond1 = None
    for scdCol2 in cols1:#skipLoopUnRolling
    if updateCond is None:
    updateCond1 = (existingDF[scdCol2] != updatesDF[scdCol2])
    else:
    updateCond1 = (updateCond | (existingDF[scdCol2] != updatesDF[scdCol2]))
    updatedDF1 = updatesDF.where((existingDF["current"] == lit("true")) & (updateCond1))

    The previous code sample becomes the following:

    def testLoopUnRoll():
    myCols = ['a']
    cond = None

    for scdCol in myCols:
    if cond is None:
    cond = (existingDF[scdCol] != updatesDF[scdCol])
    else:
    cond = (cond | (existingDF[scdCol] != updatesDF[scdCol]))

    updateCond = (existingDF['a'] != updatesDF['a'])
    cols1 = ['a']

    for scdCol2 in cols1:
    if updateCond is None:
    updateCond1 = (existingDF[scdCol2] != updatesDF[scdCol2])
    else:
    updateCond1 = (updateCond | (existingDF[scdCol2] != updatesDF[scdCol2]))

    Disable all optimizations

    You can turn off all optimizations by setting the optimize function stub to False.

    def optimizeCode(self) -> bool:
    return False
    - - + + \ No newline at end of file diff --git a/Spark/extensibility/index.html b/Spark/extensibility/index.html index 221d8c1571..bbfee4673e 100644 --- a/Spark/extensibility/index.html +++ b/Spark/extensibility/index.html @@ -6,15 +6,14 @@ Extensibility | Prophecy - - - - + + +
    Skip to main content
    - - + + \ No newline at end of file diff --git a/Spark/extensibility/udfs/index.html b/Spark/extensibility/udfs/index.html index 343a475d09..139cb85003 100644 --- a/Spark/extensibility/udfs/index.html +++ b/Spark/extensibility/udfs/index.html @@ -6,15 +6,14 @@ User-defined functions | Prophecy - - - - + + +
    Skip to main content

    User-defined functions

    Prophecy lets you create user-defined functions (UDFs) which can be used anywhere in the Pipeline.

    Parameters

    ParameterDescriptionRequired
    Function nameThe name of the function as it appears in your project.True
    UDF NameThe name of the UDF that will register it. All calls to the UDF will use this name.True
    DefinitionDefinition of the UDF function.
    For example, udf((value:Int)=>value*value)
    True
    UDF initialization codeCode block that contains initialization of entities used by UDFs. This could, for example, contain any static mapping that a UDF might use.False

    Steps

    There are a few steps to take to create and use a new UDF.

    1. Create a new function. You can find the Functions section in the left sidebar of a project page.

    Add a function to the pipeline

    1. Define the function.

    Define the function

    1. Call the function.

    Call the function

    country_code_map = {"Mexico" : "MX", "USA" : "US", "India" : "IN"}

    def registerUDFs(spark: SparkSession):
    spark.udf.register("get_country_code", get_country_code)

    @udf(returnType = StringType())
    def get_country_code(country: str):
    return country_code_map.get(country, "Not Found")
    - - + + \ No newline at end of file diff --git a/Spark/fabrics/azure-synapse-fabric-guide/index.html b/Spark/fabrics/azure-synapse-fabric-guide/index.html index 5976def370..a6f5605ec0 100644 --- a/Spark/fabrics/azure-synapse-fabric-guide/index.html +++ b/Spark/fabrics/azure-synapse-fabric-guide/index.html @@ -6,15 +6,14 @@ Azure Synapse Analytics | Prophecy - - - - + + +
    Skip to main content

    Azure Synapse Analytics

    In the context of Spark execution engines, users have the flexibility to opt for Azure Synapse Analytics while utilizing Prophecy. This comprehensive documentation aims to provide users with a clear understanding of the configuration process for Azure Synapse Analytics. Additionally, it offers step-by-step guidance on creating a Fabric that enables seamless connectivity to the Azure Synapse Workspace via Livy.

    Feel free to explore the following sections to gain insights into the integration of Azure Synapse Analytics with Prophecy.

    An existing Azure Synapse Analytics environment

    A properly configured Azure Synapse Analytics environment is required before configuring a Synapse Fabric on Prophecy. Prophecy configurations include the following:

    1. A Synapse workspace with proper security configured. If you don't have an existing workspace, you may deploy one from the Azure Marketplace.

      Synapse security
    2. Deploy a Synapse Spark Pool if one doesn't already exist.

      Synapse pool
    3. Make sure the Application is registered.

      Synapse security
    4. Create a secret for your Application.

      Synapse security
    5. Make a note of your secret. We need this information for your Fabric.

      Synapse security
    6. Configure Application, Workspace, Storage roles to ensure proper access.

    Find more information on assigning roles from Azure's documentation.

    Configure connectivity between Synapse and Prophecy

    Note that the Prophecy public IP is 3.133.35.237.

    Navigate to Prophecy's UI and click on Create Fabric. The Fabric will establish a connection with your Synapse environment and utilize it as the execution engine for your Pipelines.



    Choose Synapse as your Provider.



    Synapse connect

    Copy and paste your Application Client ID, Secret Value, and Tenant ID from the App Registration page.



    Synapse connect

    Copy and paste your Synapse Resource Group Name and Subscription ID from your Synapse workspace.



    Synapse connect

    Click on Fetch environments.



    Select your Spark pool from the Spark environment dropdown. All other fields should be automatically populated after selecting your Spark Pool.



    Synapse connect

    Click on Add Job Size and configure the Job size that you would like to use for processing.



    Synapse connect

    Now we configure the dependencies.

    Under Scala enter the following Path:

    https://prophecypublicazure.blob.core.windows.net/prophecy-public-libs/prophecy-scala-libs/

    Under Python enter the following Path:

    https://prophecypublicazure.blob.core.windows.net/prophecy-public-libs/prophecy-python-libs/



    Click on Complete.



    Your Fabric for Azure Synapase Fabric is configured! Try creating a cluster using the Fabric that you've just created and attach to it.

    Run a simple Pipeline and make sure that the interim returns data properly.

    - - + + \ No newline at end of file diff --git a/Spark/fabrics/databricks-fabric/index.html b/Spark/fabrics/databricks-fabric/index.html index a54d70c71d..1c9e1b9469 100644 --- a/Spark/fabrics/databricks-fabric/index.html +++ b/Spark/fabrics/databricks-fabric/index.html @@ -6,16 +6,15 @@ Databricks | Prophecy - - - - + + +
    Skip to main content

    Databricks

    Create a Databricks Fabric to connect Prophecy to your existing Databricks environment. Think of a Fabric as connection to your Databricks workspace. This Fabric enables Prophecy to connect to existing Spark clusters (or create new ones), execute Spark pipelines, read and write data, etc - all according to each user's permissions defined by their personal access token.

    Please refer to the video below for a step-by-step example.

    • Databricks Credentials - Here you will provide your Databricks Workspace URL and Personal Access Token (PAT). The PAT must have permission to attach clusters. If you'd like to create clusters or read/write data from Prophecy, then these permissions should be enabled for the PAT as well. Keep in mind each user will need to use their own PAT in the Fabric. Prophecy respects the permissions scoped to each user.
    • Cluster Details - Here you would need to provide the Databricks Runtime version, Executor and Drive Machine Types and Termination Timeout if any. These cluster details will be used when creating a cluster via Prophecy during Interactive development and for job clusters during Scheduled Databricks Job runs.
    • Job sizes - User can create Job sizes here using which clusters can be spawned while testing through prophecy IDE. Here you can provide Cluster mode, Databricks Runtime version, total number of the Executors, Core and Memory for them, etc. This provides all the options which are available on Databricks while spawning clusters through Databricks. We recommend using the smallest machines and smallest number of nodes appropriate for your use case.

    Editing a Job

    In Json you can just copy-paste your compute Json from Databricks.

    • Prophecy Library - These are some Scala and Python libraries written by Prophecy to provide additional functionalities on top of Spark. These would get automatically installed in your Spark execution environment when you attach to a cluster/create new cluster. These libraries are also publicly available on Maven central and Pypi respectively.
    • Metadata Connection - Optionally, enhance your Fabric by creating a Metadata Connection, recommended for users with hundreds or thousands of tables housed in their data provider(s).
    • Artifacts - Prophecy supports Databricks Volumes. When you run a Python or Scala Pipeline via a Job, you must bundle them as whl/jar artifacts. These artifacts must then be made accessible to the Databricks Job in order to use them as a library installed on the cluster. You can designate a path to a Volume for uploading the whl/jar files under Artifacts.

    Databricks Execution

    Execution on Databricks

    Interactive Execution

    Execution Metrics

    - - + + \ No newline at end of file diff --git a/Spark/fabrics/dataproc/gcp-dataproc-fabric-tips/index.html b/Spark/fabrics/dataproc/gcp-dataproc-fabric-tips/index.html index b518e60c1c..73abf1b161 100644 --- a/Spark/fabrics/dataproc/gcp-dataproc-fabric-tips/index.html +++ b/Spark/fabrics/dataproc/gcp-dataproc-fabric-tips/index.html @@ -6,10 +6,9 @@ Connectivity Tips | Prophecy - - - - + + +
    @@ -23,7 +22,7 @@ Configure Python Library Path. gs://prophecy-public-gcp/prophecy-python-libs/.

    Option 3:
    Setup an GCS bucket internally. Create two folders as in the previous option, and add prophecy-scala-libs and prophecy-python-libs in those folders.

    - - + + \ No newline at end of file diff --git a/Spark/fabrics/dataproc/index.html b/Spark/fabrics/dataproc/index.html index e8d351fc75..8c95b6beb5 100644 --- a/Spark/fabrics/dataproc/index.html +++ b/Spark/fabrics/dataproc/index.html @@ -6,17 +6,16 @@ Google Cloud Dataproc | Prophecy - - - - + + +
    Skip to main content

    Google Cloud Dataproc

    In the context of Spark execution engines, users have the flexibility to opt for Google Cloud Dataproc. This guide offers step-by-step instructions on creating a Fabric that enables seamless connectivity to the Dataproc environment.

    Create a Dataproc Cluster



    caution

    Livy is required for the Fabric. Prophecy provides a script required to deploy a Dataproc Cluster.



    1. If you don't already have a private key, create a private key for the service account that you're using.

      dataproc security

    2. Ensure you have the following permissions configured.
      gcloud projects add-iam-policy-binding <project-name> \
    --member "<service-account-name>" \
    --role "roles/serviceusage.serviceUsageViewer"

    gcloud projects add-iam-policy-binding <project-name> \
    --member "<service-account-name>" \
    --role "roles/dataproc.worker"

    gcloud projects add-iam-policy-binding core-shard-398601 \
    --member "<service-account-name>" \
    --role "storage.objects.get"

    gcloud storage buckets add-iam-policy-binding gs://<bucket-name> \
    --member="<service-account-name>" --role=roles/storage.admin --project <project-name>
    1. Associate secret key to service account.
    gcloud config set account meitestserviceaccount@core-shard-398601.iam.gserviceaccount.com \
    --key-file=<local-private-key-location>
    1. Start a Dataproc cluster using install-livy.sh.

       gcloud dataproc clusters create <cluster-name> \
      --scopes <permission-scope> \
      --region <cluster-region> \
      --initialization-actions 'gs://prophecy-public-gcp/livy-installer-script/install-livy.sh' \
      --properties "hive:hive.metastore.warehouse.dir=<gs://bucket/datasets>" \
      --metadata "enable-cloud-sql-proxy-on-workers=false" \
      --bucket <cluster-data-bucket-name> \
      --region <region> \
      --zone <zone> \
      --single-node \
      --master-machine-type n2-standard-4 \
      --master-boot-disk-size 500 \
      --image-version 2.0-debian10 \
      --project <project-name> \
      --service-account="<service-account-name>" \
      --dataproc-metastore=projects/<project-name>/locations/<region>/services/<metastore-service-name>

    Create a Dataproc Fabric

    1. Create a Fabric and select Dataproc.

      select dataproc

    2. Fill out your Project Name and Region, and upload the Private Key.

      configure dataproc

    3. Click on Fetch environments and select the Dataproc cluster that you created earlier.

      select cluster

    4. Leave everything as default and provide the Livy URL. Locate the External IP of your cluster instance. Optionally, you may configure the DNS instead of using the IP. The URL is http://<external-ip>:8998.

      livy ip

    5. Configure the bucket associated with your cluster.

      bucket location

    6. Add the Job Size.

      Job Size

    7. Configure Scala Library Path. gs://prophecy-public-gcp/prophecy-scala-libs/.
    8. Configure Python Library Path. gs://prophecy-public-gcp/prophecy-python-libs/.

      dependences

    9. Click on Complete.

      Run a simple Pipeline and make sure that the interim returns data properly.
    - - + + \ No newline at end of file diff --git a/Spark/fabrics/emr/index.html b/Spark/fabrics/emr/index.html index b42025309e..3702ea93b3 100644 --- a/Spark/fabrics/emr/index.html +++ b/Spark/fabrics/emr/index.html @@ -6,15 +6,14 @@ Amazon EMR | Prophecy - - - - + + +
    Skip to main content

    Amazon EMR

    This page outlines how to use Amazon EMR via Livy as your Spark execution engine in Prophecy.

    These instructions work for both Amazon EMR and Amazon EMR Serverless.

    Create Amazon EMR cluster with Apache Livy

    In your Amazon EMR service, create a cluster. When doing so:

    1. Under Application bundle select Custom.
    2. When selecting applications, make sure Livy and Spark are included in the install.
    EMR create cluster

    Configure network settings

    To make sure that EMR can communicate with Prophecy, you need to configure specific network settings. Specifically, you need to modify the security groups of your EMR cluster.

    1. Modify the Primary Node security group to allow incoming connections to port 8998 from the Prophecy IP. You can do so by adding an inbound rule to the Master security group that permits incoming traffic on port 8998 from the Prophecy IP address.
    2. Modify the Core Node security group to allow outgoing connections to the Prophecy public IP 3.133.35.237 over HTTPS. Do this by adding an outbound rule to the Core security group that allows outgoing traffic over HTTPS protocol to the Prophecy public IP.

    Create a Fabric

    To connect EMR and Prophecy, you must create a Fabric. You can either create an EMR Fabric (suggested), or a Livy Fabric.

    To create an EMR Fabric:

    1. Open Prophecy and click Create Entity from the left navigation menu. Then, click on the Fabric tile.
    2. Name your Fabric and click Continue.
    3. Keep the Provider Type as Spark, and choose EMR as the Provider.

    4. EMR Provider
    5. Enter your AWS credentials under Access Key and Secret Key. Then, enter the region that your EMR cluster is running in.
    6. Click on Fetch environments.
    7. Under Spark Environment, select the EMR cluster that you would like to connect to.
    8. Enter the S3 path that points to the location where you would like your logs to persist.

    9. EMR dependencies
    10. Add the Job size to your environment by clicking on Add Job Size. Configure your Job size and click on Add.
    11. Select File System under Scala Resolution mode and input s3://prophecy-public-bucket/prophecy-libs
    12. Select File System under Python Resolution mode and input s3://prophecy-public-bucket/python-prophecy-libs

    Click Complete to save your new EMR Fabric.

    At this point, you can test your Fabric. Open a project, connect to a cluster, and try to run a pipeline!

    - - + + \ No newline at end of file diff --git a/Spark/fabrics/fabric-diagnostics/index.html b/Spark/fabrics/fabric-diagnostics/index.html index 8b7979b96c..f4651bd547 100644 --- a/Spark/fabrics/fabric-diagnostics/index.html +++ b/Spark/fabrics/fabric-diagnostics/index.html @@ -6,15 +6,14 @@ Diagnostics | Prophecy - - - - + + +
    Skip to main content

    Diagnostics

    Troubleshooting Prophecy Fabrics is much easier with built-in diagnostics. The descriptions are designed to help users to independently identify and resolve issues. When creating or connecting to a Fabric, Prophecy automatically tests for connectivity. This feature helps users to determine whether the issue lies within Prophecy itself or in other components of the data ecosystem.

    Diagnostics error codes

    Error CodeSymptomProviderCauseResolution
    10000Is missing from the classpathDatabricksProphecy Library(Scala) is incorrect. You're probably using thin jar.Use assembly jar(${scalaFatJarName}) in the library section of the Fabric settings
    10001DRIVER_LIBRARY_INSTALLATION_FAILUREDatabricksProphecy Library(Scala/Python) is incorrect. Databricks could not install itPlease provide the valid library path in the Fabric
    10002object prophecy is not a member of packageLivyProphecy Library(Scala) is incorrectPlease ensure that the library path exists and you’re using the assembly jar(${scalaFatJarName})
    10003cannot be added to user sessions and prophecy_libsLivyProphecy Library(Python) is incorrectPlease ensure that the library path exists and you’re using correct file(${pythonPLibName})
    10004for method and too many argumentsLivyProphecy Library(Scala) is incompatiblePlease use the correct version(${Globals.prophecyLibsVersion}) in the library section of Fabric settings
    10005No module named and prophecyLivyProphecy Library(Python) is incorrectPlease provide the valid library path in the Fabric
    10006illegal start of simple expressionLivyPython version in livy/hadoop is incorrectPlease make sure you have python3 there
    10007IncompatibleClassChangeErrorLivyProphecy Library(Scala) is incompatible with your Spark versionPlease use the correct assembly jar(${scalaFatJarName}) in the library section of the Fabric settings.
    10008"FileNotFoundException and prophecy_libs"LivyProphecy Library(Python) path does not existPlease ensure that the file exists as per the path in the library section of the Fabric settings
    10009503 Service Temporarily Unavailable and LivyRestClientLivyLivy service is downPlease make sure the livy service is up before executing this command
    10010SQLNonTransientConnectionException, rds.amazonaws.com or Unable to instantiate, HiveMetaStoreClientUnity CatalogDatabricks cluster can't access RDS servicePlease ensure that the cluster can access to the same region's RDS endpoint as documented here
    10011UnauthorizedCommandException and This execution contained at leas and disallowed languageUnity CatalogShared cluster in unity catalog does not allow Scala commandsPlease use this cluster with Python Pipeline
    10012UnauthorizedCommandException and This execution contained at leas and disallowed languageDatabricksThis cluster does not allow ${pipeline's language} commandPlease check with the Databricks workspace administrator to provide the execution access to ${pipeline's language} language
    10013javax.net.ssl.SSLHandshakeException and PKIX path building failedLivy / EMRCertificates provided in EMR cluster's security configuration are wrongPlease ensure that EMR cluster's security configuration is using correct certificates
    - - + + \ No newline at end of file diff --git a/Spark/fabrics/index.html b/Spark/fabrics/index.html index f5b5800184..b3f9576293 100644 --- a/Spark/fabrics/index.html +++ b/Spark/fabrics/index.html @@ -6,15 +6,14 @@ Prophecy Fabrics | Prophecy - - - - + + +
    Skip to main content
    - - + + \ No newline at end of file diff --git a/Spark/fabrics/livy/index.html b/Spark/fabrics/livy/index.html index 6adb80c5ac..24241eaa70 100644 --- a/Spark/fabrics/livy/index.html +++ b/Spark/fabrics/livy/index.html @@ -6,10 +6,9 @@ Livy | Prophecy - - - - + + +
    @@ -28,7 +27,7 @@ Here, you will provide the Spark and Scala versions. The Spark and Scala versions are used when a user tries to attach a cluster using this Fabric. You can also provide details about Prophecy Libraries. These are Scala and Python libraries written by Prophecy to offer additional functionalities on top of Spark. These would get automatically installed in your Spark execution environment when you attach to a cluster/create a new cluster. These libraries are also publicly available on Maven Central and Pypi, respectively.

    job-sizes

    advance-settings

    Once ready, click Continue.

    Connections

    info

    Connections still need to be supported for Livy Fabrics.

    Click Continue.

    Secrets

    Here, you can configure any Secret Provider and secrets for your Livy Fabric. Read more about Secret Management here

    Click on Complete to complete the Fabric setup.

    Execution on Livy

    Execution on Livy

    - - + + \ No newline at end of file diff --git a/Spark/fabrics/prophecy-managed-databricks/index.html b/Spark/fabrics/prophecy-managed-databricks/index.html index 8a0326afba..df6cbe49a4 100644 --- a/Spark/fabrics/prophecy-managed-databricks/index.html +++ b/Spark/fabrics/prophecy-managed-databricks/index.html @@ -6,17 +6,16 @@ Prophecy Managed | Prophecy - - - - + + +
    Skip to main content

    Prophecy Managed

    If you don't have a Databricks environment, use the Prophecy Managed Databricks Fabric to get started. Using this option, you can create a 14-Day Free Trial Fabric using Prophecy Managed Databricks. You can use this when trying out Prophecy and when you don't want to connect your own Spark Execution Environment to Prophecy. We already have some sample data and tables created to try out the different functionalities. Please refer to the video below for a step-by-step example.

    In this Fabric you can only change the Databricks Runtime version. The auto-termination timeout, Executor and Driver Machine Type and Job sizes are uneditable.

    - - + + \ No newline at end of file diff --git a/Spark/gems/custom/delta-ops/index.html b/Spark/gems/custom/delta-ops/index.html index ead592b8b1..236f7fd542 100644 --- a/Spark/gems/custom/delta-ops/index.html +++ b/Spark/gems/custom/delta-ops/index.html @@ -6,15 +6,14 @@ DeltaTableOperations | Prophecy - - - - + + +
    -
    Skip to main content

    DeltaTableOperations

    Spark Gem

    Helps perform the following operations on Delta tables.

    1. Register table in catalog
    2. Vacuum table
    3. Optimize table
    4. Restore table
    5. Delete from table
    6. Drop table
    7. FSCK Repair table

    Parameters

    ParameterDescriptionRequired
    Database nameDatabase nameFalse
    Table nameTable nameFalse
    File pathFile path for delta tableFalse
    ActionAction to perform on the tableTrue
    note

    At least one value from table name or file path needs to be provided.

    Example

    Example usage of Delta Table Operations Gem

    Register table in catalog

    This will register the data at mentioned file path as a table in the whichever Metadata catalog is available in your execution environment.

    Vacuum table

    Recursively vacuum directories associated with the Delta table. VACUUM removes all files from the table directory that are not managed by Delta, as well as data files that are no longer in the latest state of the transaction log for the table and are older than a retention threshold. The default threshold is 7 days.

    To learn more about vacuum click here.

    Parameters

    ParameterDescriptionRequired
    Retention hoursRetention thresholdFalse

    Optimize table

    Optimizes the layout of Delta Table data. Optionally optimize a subset of data or colocate data by column. If colocation is not specified, bin-packing optimization is performed by default.

    To learn more about optimize click here.

    Parameters

    ParameterDescriptionRequired
    Where clauseOptimize the subset of rows matching the given partition predicate. Only filters involving partition key attributes are supported.False
    ZOrder ByList of columns to perform ZOrder onFalse

    Restore table

    Restores a Delta table to an earlier state. Restoring to an earlier version number or a timestamp is supported.

    Parameters

    ParameterDescriptionRequired
    Restore viaRestore the table via timestamp or versionFalse
    ValueValue to restore onFalse

    Delete from table

    Delete removes the data from the latest version of the Delta table that matches the specified condition. Please note that delete does not remove it from the physical storage until the older versions are explicitly vacuumed.

    Parameters

    ParameterDescriptionRequired
    Where clauseCondition which needs to be satisfied to delete a rowTrue

    Drop table

    This will drop the table from catalog and remove the files.

    FSCK Repair table

    Removes the file entries from the transaction log of a Delta table that can no longer be found in the underlying file system. This can happen when these files have been manually deleted.

    To learn more about fsck repair click here.

    - - +
    Skip to main content

    DeltaTableOperations

    Spark Gem

    Helps perform the following operations on Delta tables.

    1. Register table in catalog
    2. Vacuum table
    3. Optimize table
    4. Restore table
    5. Delete from table
    6. Drop table
    7. FSCK Repair table

    Parameters

    ParameterDescriptionRequired
    Database nameDatabase nameFalse
    Table nameTable nameFalse
    File pathFile path for delta tableFalse
    ActionAction to perform on the tableTrue
    note

    At least one value from table name or file path needs to be provided.

    Example

    Example usage of Delta Table Operations Gem

    Register table in catalog

    This will register the data at mentioned file path as a table in the whichever Metadata catalog is available in your execution environment.

    Vacuum table

    Recursively vacuum directories associated with the Delta table. VACUUM removes all files from the table directory that are not managed by Delta, as well as data files that are no longer in the latest state of the transaction log for the table and are older than a retention threshold. The default threshold is 7 days.

    To learn more about vacuum click here.

    Parameters

    ParameterDescriptionRequired
    Retention hoursRetention thresholdFalse

    Optimize table

    Optimizes the layout of Delta Table data. Optionally optimize a subset of data or colocate data by column. If colocation is not specified, bin-packing optimization is performed by default.

    To learn more about optimize click here.

    Parameters

    ParameterDescriptionRequired
    Where clauseOptimize the subset of rows matching the given partition predicate. Only filters involving partition key attributes are supported.False
    ZOrder ByList of columns to perform ZOrder onFalse

    Restore table

    Restores a Delta table to an earlier state. Restoring to an earlier version number or a timestamp is supported.

    Parameters

    ParameterDescriptionRequired
    Restore viaRestore the table via timestamp or versionFalse
    ValueValue to restore onFalse

    Delete from table

    Delete removes the data from the latest version of the Delta table that matches the specified condition. Please note that delete does not remove it from the physical storage until the older versions are explicitly vacuumed.

    Parameters

    ParameterDescriptionRequired
    Where clauseCondition which needs to be satisfied to delete a rowTrue

    Drop table

    This will drop the table from catalog and remove the files.

    FSCK Repair table

    Removes the file entries from the transaction log of a Delta table that can no longer be found in the underlying file system. This can happen when these files have been manually deleted.

    To learn more about fsck repair click here.

    + + \ No newline at end of file diff --git a/Spark/gems/custom/file-operations/index.html b/Spark/gems/custom/file-operations/index.html index 87d6892af2..927386e0ff 100644 --- a/Spark/gems/custom/file-operations/index.html +++ b/Spark/gems/custom/file-operations/index.html @@ -6,15 +6,14 @@ FileOperation | Prophecy - - - - + + +
    -
    Skip to main content

    FileOperation

    Spark Gem

    Helps perform file operations like copy and move on different file systems.

    Parameters

    ParameterDescriptionRequired
    File SystemLocal - for operations on driver node file system
    DBFS - for operations on Databricks file system
    S3 - for operations on S3 object store
    True
    OperationOperation to perform, Copy, Move or SyncTrue
    Filename RegexRegex to Filter File Names Eg: stdlog.*.txtFalse
    Ignore empty filesIgnore if file size is empty (Size of file is 0 bytes)False
    RecurseBoolean for performing Operation recursively. Default is FalseFalse
    Source PathPath of source file/directory.
    Eg: /dbfs/source_file.txt, dbfs:/source_file.txt, s3://source_bucket/source_prefix/filename.txt
    True
    Destination PathPath of destination file/directory.
    Eg: /dbfs/target_file.txt, dbfs:/target_file.txt, s3://target_bucket/target_prefix/filename.txt
    True
    info

    You can perform operations on DBFS files using Local file system too by providing path under /dbfs!
    This is because Databricks uses a FUSE mount to provide local access to the files stored in the cloud. A FUSE mount is a secure, virtual filesystem.

    Examples


    Copy Single File

    def copy_file(spark: SparkSession):
    from pyspark.dbutils import DBUtils
    DBUtils(spark).fs.cp(
    "dbfs:/Prophecy/example/source/person.json",
    "dbfs:/Prophecy/example/target/person.json",
    recurse = False
    )

    Copy All Files From A Directory

    def copy_file(spark: SparkSession):
    from pyspark.dbutils import DBUtils
    DBUtils(spark).fs.cp(
    "dbfs:/Prophecy/example/source/",
    "dbfs:/Prophecy/example/target/",
    recurse = True
    )

    Move Files

    def move_file(spark: SparkSession):
    from pyspark.dbutils import DBUtils
    DBUtils(spark).fs.mv("dbfs:/Prophecy/example/source/", "dbfs:/Prophecy/example/target/", recurse = False)


    S3 - Sync Entire Directory

    def sync_file(spark: SparkSession):
    dest_files = set(
    [
    f_object['Key'].lstrip('/')
    for f_object in boto3.client("s3").list_objects_v2(Bucket = dest_bucket, Prefix = dest_url.path.lstrip('/'))['Contents']
    if not f_object['Key'].endswith("/")
    ]
    )

    for obj in boto3.client("s3").list_objects_v2(Bucket = src_bucket, Prefix = src_url.path.lstrip('/'))['Contents']:
    new_dest_prefix = re.sub(src_prefix, dest_prefix, obj['Key'], 1)

    if (
    (
    mode in ["copy", "move"]
    and not obj['Key'].endswith("/")
    )
    or (
    not obj['Key'].endswith("/")
    and mode == "sync"
    and re.sub(src_prefix, dest_prefix, obj['Key'], 1) not in dest_files
    )
    ):

    if (
    (
    bool(ignoreEmptyFiles) == True
    and (
    s3.head_object(Bucket=src_bucket, Key=obj['Key'])['ContentLength']
    == 0
    )
    )
    or (
    bool(fileRegex)
    and fileRegex != ""
    and not bool(re.compile(fileRegex).match(obj['Key'].split('/')[- 1]))
    )
    ):
    continue

    s3.copy(
    {'Bucket' : src_bucket, 'Key' : obj['Key']},
    dest_bucket,
    re.sub(src_prefix, dest_prefix, obj['Key'], 1)
    )

    if mode == "move":
    s3.delete_object(Bucket = src_bucket, Key = obj['Key'])


    - - +
    Skip to main content

    FileOperation

    Spark Gem

    Helps perform file operations like copy and move on different file systems.

    Parameters

    ParameterDescriptionRequired
    File SystemLocal - for operations on driver node file system
    DBFS - for operations on Databricks file system
    S3 - for operations on S3 object store
    True
    OperationOperation to perform, Copy, Move or SyncTrue
    Filename RegexRegex to Filter File Names Eg: stdlog.*.txtFalse
    Ignore empty filesIgnore if file size is empty (Size of file is 0 bytes)False
    RecurseBoolean for performing Operation recursively. Default is FalseFalse
    Source PathPath of source file/directory.
    Eg: /dbfs/source_file.txt, dbfs:/source_file.txt, s3://source_bucket/source_prefix/filename.txt
    True
    Destination PathPath of destination file/directory.
    Eg: /dbfs/target_file.txt, dbfs:/target_file.txt, s3://target_bucket/target_prefix/filename.txt
    True
    info

    You can perform operations on DBFS files using Local file system too by providing path under /dbfs!
    This is because Databricks uses a FUSE mount to provide local access to the files stored in the cloud. A FUSE mount is a secure, virtual filesystem.

    Examples


    Copy Single File

    def copy_file(spark: SparkSession):
    from pyspark.dbutils import DBUtils
    DBUtils(spark).fs.cp(
    "dbfs:/Prophecy/example/source/person.json",
    "dbfs:/Prophecy/example/target/person.json",
    recurse = False
    )

    Copy All Files From A Directory

    def copy_file(spark: SparkSession):
    from pyspark.dbutils import DBUtils
    DBUtils(spark).fs.cp(
    "dbfs:/Prophecy/example/source/",
    "dbfs:/Prophecy/example/target/",
    recurse = True
    )

    Move Files

    def move_file(spark: SparkSession):
    from pyspark.dbutils import DBUtils
    DBUtils(spark).fs.mv("dbfs:/Prophecy/example/source/", "dbfs:/Prophecy/example/target/", recurse = False)


    S3 - Sync Entire Directory

    def sync_file(spark: SparkSession):
    dest_files = set(
    [
    f_object['Key'].lstrip('/')
    for f_object in boto3.client("s3").list_objects_v2(Bucket = dest_bucket, Prefix = dest_url.path.lstrip('/'))['Contents']
    if not f_object['Key'].endswith("/")
    ]
    )

    for obj in boto3.client("s3").list_objects_v2(Bucket = src_bucket, Prefix = src_url.path.lstrip('/'))['Contents']:
    new_dest_prefix = re.sub(src_prefix, dest_prefix, obj['Key'], 1)

    if (
    (
    mode in ["copy", "move"]
    and not obj['Key'].endswith("/")
    )
    or (
    not obj['Key'].endswith("/")
    and mode == "sync"
    and re.sub(src_prefix, dest_prefix, obj['Key'], 1) not in dest_files
    )
    ):

    if (
    (
    bool(ignoreEmptyFiles) == True
    and (
    s3.head_object(Bucket=src_bucket, Key=obj['Key'])['ContentLength']
    == 0
    )
    )
    or (
    bool(fileRegex)
    and fileRegex != ""
    and not bool(re.compile(fileRegex).match(obj['Key'].split('/')[- 1]))
    )
    ):
    continue

    s3.copy(
    {'Bucket' : src_bucket, 'Key' : obj['Key']},
    dest_bucket,
    re.sub(src_prefix, dest_prefix, obj['Key'], 1)
    )

    if mode == "move":
    s3.delete_object(Bucket = src_bucket, Key = obj['Key'])


    + + \ No newline at end of file diff --git a/Spark/gems/custom/index.html b/Spark/gems/custom/index.html index 925e02cfda..284a9e5c5b 100644 --- a/Spark/gems/custom/index.html +++ b/Spark/gems/custom/index.html @@ -6,15 +6,14 @@ Custom | Prophecy - - - - + + +
    Skip to main content
    - - + + \ No newline at end of file diff --git a/Spark/gems/custom/rest-api-enrich/index.html b/Spark/gems/custom/rest-api-enrich/index.html index 59933c3973..a2a1c89bde 100644 --- a/Spark/gems/custom/rest-api-enrich/index.html +++ b/Spark/gems/custom/rest-api-enrich/index.html @@ -6,21 +6,20 @@ RestAPIEnrich | Prophecy - - - - + + +
    -
    Skip to main content

    RestAPIEnrich

    Spark Gem

    Enriches the DataFrame by adding column(s) with content from REST API output based on the given configuration.

    Parameters

    Each property can either be set as a static value or a value from an existing column of the input DataFrame. Please refer +

    RestAPIEnrich

    Spark Gem

    Enriches the DataFrame by adding column(s) with content from REST API output based on the given configuration.

    Parameters

    Each property can either be set as a static value or a value from an existing column of the input DataFrame. Please refer to the examples in the description column of each parameter for reference on how the string value should be formed.

    ParameterDescriptionRequiredDefault
    methodmethod for the new Request object: GET, OPTIONS, HEAD, POST, PUT, PATCH, or DELETE.true
    urlURL for the REST API.true
    paramsDictionary, list of tuples or bytes to send in the query string for the Request. eg: {"key1":"value1", "key2": value2, "key3": ["value1", "value2"]}false
    dataDictionary to send in the body of the Request. eg: {"key1":"value1", "key2": value2}false
    JSONA JSON serializable Python object to send in the body of the Request. eg: {"key1":"value1", "key2": value2}false
    headersDictionary of HTTP Headers to send with the Request. eg: {"key1":"value1", "key2": "value2"}false
    cookiesDictionary to send with the Request. eg: {"key1":"value1", "key2": "value2"}false
    authAuth tuple to enable Basic/Digest/Custom HTTP Auth. eg: user:passfalse
    timeoutHow many seconds to wait for the server to send data before giving up, as a float, eg: 0.5 or a (connect timeout, read timeout) tuple. eg: 0.5:0.25false
    allow redirectsEnable/disable GET/OPTIONS/POST/PUT/PATCH/DELETE/HEAD redirection. eg: true or falsefalsetrue
    proxiesDictionary mapping protocol to the URL of the proxy. eg: {"https" : "https://1.1.0.1:80"}false
    verifyEither a boolean, in which case it controls whether we verify the server’s TLS certificate eg: true or false or a string, in which case it must be a path to a CA bundle to use. Defaults to True. eg: dbfs:/path-to-filefalsetrue
    streamif False, the response content will be immediately downloaded. eg: true or falsefalse
    certif String, path to SSL client cert file (.pem). eg. dbfs:/path-to-file. If Tuple, (‘cert’, ‘key’) pair. eg: cert:key.false
    parse contentParse content as JSON (to make the schema available, enable custom schema, and click infer from cluster at the bottom left in the output tab)falsefalse
    info
    1. To store sensitive information like API key (headers), auth etc., Databricks secrets can be used as shown in Example below.
    2. If the expected number of rows are very large, it's better to provide await time in the advanced tab so you don't overwhelm the source server or exceed any request limits.
    3. For APIs which takes list of parameters as inputs, window functions like collect_list can be used before RestApiEnrich Gem to reduce the number of API calls.

    Please make sure that cluster is connected while using the parse content option to infer the schema from cluster for the first time.

    note

    All input parameters are expected to be in string format. Other column types such as array/JSON/struct can be created using combination of aggregate/window Gems along with reformat component and then can be cast as string prior to passing the column in RestAPIEnrich Gem as needed.

    Example 1

    Let's try to fetch prices for few cryptocurrencies from Coin-API.

    We would be taking cryptocurrency and currency as input from DataFrame and pass url, headers as static values. Note that URL in this example is created using static base url and adding cryptocurrency and currency as inputs from DataFrame.

    Also, we would be using Databricks-secrets to pass headers as it requires API-key.

    Example 2

    Let's take a more complex example, where all method, url, headers, params etc are passed as values from DataFrame columns.

    Generated Code

    def get_data_from_api(spark: SparkSession, in0: DataFrame) -> DataFrame:
    requestDF = in0.withColumn(
    "api_output",
    get_rest_api(
    to_json(struct(lit("GET").alias("method"), col("url"), lit(Config.coin_api_key).alias("headers"))),
    lit("")
    )
    )

    return requestDF.withColumn(
    "content_parsed",
    from_json(col("api_output.content"), schema_of_json(requestDF.select("api_output.content").take(1)[0][0]))
    )

    - - + + \ No newline at end of file diff --git a/Spark/gems/custom/script/index.html b/Spark/gems/custom/script/index.html index b74e9557cc..9bac2b76a2 100644 --- a/Spark/gems/custom/script/index.html +++ b/Spark/gems/custom/script/index.html @@ -6,15 +6,14 @@ Script | Prophecy - - - - + + +
    -

    Script

    Spark Gem

    Provides a SparkSession and allows you to run custom code.

    Parameters

    ParameterMeaningRequired
    Input DataFrame(s)Input DataFrame(s)False
    Output DataFrame(s)Output DataFrame(s)False
    CodeCustom code to be executedTrue
    info

    To remove input/output DataFrame(s), simply click icon on the left sidebar

    Script - Remove inputs

    Schema

    When executing a custom script Gem, the output schema is not known by Prophecy so it must be inferred from a sample computation result. Click the Custom Schema button and Infer from cluster as shown in the Gems --> Outputs description here. The schema will be inferred according to the script and the Spark version running on the connected cluster.

    Examples


    Script Gem with Input and Output: Un-pivoting a DataFrame

    We'll perform the unpivot operation using our custom code

    Script - Unpivot


    Script Gem with only Output: Generating a DataFrame

    We'll use the provided SparkSession to create and return a DataFrame

    note

    Since we removed the input port, we don't see input DataFrame in the method signature

    Script - Unpivot

    - - +

    Script

    Spark Gem

    Provides a SparkSession and allows you to run custom code.

    Parameters

    ParameterMeaningRequired
    Input DataFrame(s)Input DataFrame(s)False
    Output DataFrame(s)Output DataFrame(s)False
    CodeCustom code to be executedTrue
    info

    To remove input/output DataFrame(s), simply click icon on the left sidebar

    Script - Remove inputs

    Schema

    When executing a custom script Gem, the output schema is not known by Prophecy so it must be inferred from a sample computation result. Click the Custom Schema button and Infer from cluster as shown in the Gems --> Outputs description here. The schema will be inferred according to the script and the Spark version running on the connected cluster.

    Examples


    Script Gem with Input and Output: Un-pivoting a DataFrame

    We'll perform the unpivot operation using our custom code

    Script - Unpivot


    Script Gem with only Output: Generating a DataFrame

    We'll use the provided SparkSession to create and return a DataFrame

    note

    Since we removed the input port, we don't see input DataFrame in the method signature

    Script - Unpivot

    + + \ No newline at end of file diff --git a/Spark/gems/custom/sql-statement/index.html b/Spark/gems/custom/sql-statement/index.html index b9c4d99a26..f868b12ba5 100644 --- a/Spark/gems/custom/sql-statement/index.html +++ b/Spark/gems/custom/sql-statement/index.html @@ -6,15 +6,14 @@ SQLStatement | Prophecy - - - - + + +
    -

    SQLStatement

    Spark Gem

    Create one or more DataFrame(s) based on provided SQL queries to run against one or more input DataFrames.

    Parameters

    ParameterMeaningRequired
    DataFrame(s)Input DataFrame(s)True
    SQL QueriesSQL Query for each output tabTrue

    Example

    SQL example 1

    info

    Number of inputs and outputs can be changed as needed by clicking the + button on the respective tab.

    Generated Code

    def SQLStatement(spark: SparkSession, orders: DataFrame, customers: DataFrame) -> (DataFrame, DataFrame):
    orders.createOrReplaceTempView("orders")
    customers.createOrReplaceTempView("customers")
    df1 = spark.sql("select * from orders inner join customers on orders.customer_id = customers.customer_id")
    df2 = spark.sql("select distinct customer_id from orders")

    return df1, df2

    - - +

    SQLStatement

    Spark Gem

    Create one or more DataFrame(s) based on provided SQL queries to run against one or more input DataFrames.

    Parameters

    ParameterMeaningRequired
    DataFrame(s)Input DataFrame(s)True
    SQL QueriesSQL Query for each output tabTrue

    Example

    SQL example 1

    info

    Number of inputs and outputs can be changed as needed by clicking the + button on the respective tab.

    Generated Code

    def SQLStatement(spark: SparkSession, orders: DataFrame, customers: DataFrame) -> (DataFrame, DataFrame):
    orders.createOrReplaceTempView("orders")
    customers.createOrReplaceTempView("customers")
    df1 = spark.sql("select * from orders inner join customers on orders.customer_id = customers.customer_id")
    df2 = spark.sql("select distinct customer_id from orders")

    return df1, df2

    + + \ No newline at end of file diff --git a/Spark/gems/index.html b/Spark/gems/index.html index 27cd1ef7e0..6d727748f7 100644 --- a/Spark/gems/index.html +++ b/Spark/gems/index.html @@ -6,15 +6,14 @@ Spark Gems | Prophecy - - - - + + +

    Spark Gems

    Gems are functional units in a Pipeline that perform tasks such as reading, transforming, writing, or handling other data operations.

    The table below outlines the different Spark Gem categories.

    Gem
    CategoryDescription
    Source and TargetSource and TargetThe set of Gems that help with loading and saving data.
    TransformTransformThe set of Gems that help with transforming data.
    Join and splitJoin and SplitThe set of Gems that help with the process of merging or splitting DataFrame(s) to create new DataFrame(s).
    CustomCustomThe set of Gems that our creative teams build using Expression Builder to extend the Propehcy capabilities.
    Machine LearningMachine LearningThe set of Gems that prepare data or use data for Machine Learning.
    SubgraphSubgraphA Gem that can contain many other Gems within it.

    What's next

    To learn more Spark Gems, see the following pages:

    - - + + \ No newline at end of file diff --git a/Spark/gems/join-split/Repartition/index.html b/Spark/gems/join-split/Repartition/index.html index 66efce91fb..2e74893f7c 100644 --- a/Spark/gems/join-split/Repartition/index.html +++ b/Spark/gems/join-split/Repartition/index.html @@ -6,15 +6,14 @@ Repartition | Prophecy - - - - + + +
    -

    Repartition

    Spark Gem

    This will repartition or coalesce the input DataFrame based on the specified configuration. There are four different repartitioning options:

    Hash Repartitoning

    Repartitions the data evenly across various partitions based on the hash value of the specified key.

    Parameters

    ParameterDescriptionRequired
    DataFrameInput DataFrameTrue
    Overwrite default partitionsFlag to overwrite default partitionsFalse
    Number of partitionsInteger value specifying number of partitionsFalse
    Repartition expression(s)List of expressions to repartition byTrue

    Generated Code

    def hashRepartition(spark: SparkSession, in0: DataFrame) -> DataFrame:
    return in0.repartition(5, col("customer_id"))

    Random Repartitioning

    Repartitions without data distribution defined.

    Parameters

    ParameterDescriptionRequired
    DataFrameInput DataFrameTrue
    Number of partitionsInteger value specifying number of partitionsTrue

    Generated Code

    def randomRepartition(spark: SparkSession, in0: DataFrame) -> DataFrame:
    return in0.repartition(5)

    Range Repartitoning

    Repartitions the data with tuples having keys within the same range on the same worker.

    Parameters

    ParameterDescriptionRequired
    DataFrameInput DataFrameTrue
    Overwrite default partitionsFlag to overwrite default partitionsFalse
    Number of partitionsInteger value specifying number of partitionsFalse
    Repartition expression(s) with sortingList of expressions to repartition by with corresponding sorting orderTrue

    Generated Code

    def RepartitionByRange(spark: SparkSession, in0: DataFrame) -> DataFrame:
    return in0.repartitionByRange(5, col("customer_id").asc())

    Coalesce

    Reduces the number of partitions without shuffling the dataset.

    Parameters

    ParameterDescriptionRequired
    DataFrameInput DataFrameTrue
    Number of partitionsInteger value specifying number of partitionsTrue

    Generated Code

    def Coalesce(spark: SparkSession, in0: DataFrame) -> DataFrame:
    return in0.coalesce(5)

    Video demo

    - - +

    Repartition

    Spark Gem

    This will repartition or coalesce the input DataFrame based on the specified configuration. There are four different repartitioning options:

    Hash Repartitoning

    Repartitions the data evenly across various partitions based on the hash value of the specified key.

    Parameters

    ParameterDescriptionRequired
    DataFrameInput DataFrameTrue
    Overwrite default partitionsFlag to overwrite default partitionsFalse
    Number of partitionsInteger value specifying number of partitionsFalse
    Repartition expression(s)List of expressions to repartition byTrue

    Generated Code

    def hashRepartition(spark: SparkSession, in0: DataFrame) -> DataFrame:
    return in0.repartition(5, col("customer_id"))

    Random Repartitioning

    Repartitions without data distribution defined.

    Parameters

    ParameterDescriptionRequired
    DataFrameInput DataFrameTrue
    Number of partitionsInteger value specifying number of partitionsTrue

    Generated Code

    def randomRepartition(spark: SparkSession, in0: DataFrame) -> DataFrame:
    return in0.repartition(5)

    Range Repartitoning

    Repartitions the data with tuples having keys within the same range on the same worker.

    Parameters

    ParameterDescriptionRequired
    DataFrameInput DataFrameTrue
    Overwrite default partitionsFlag to overwrite default partitionsFalse
    Number of partitionsInteger value specifying number of partitionsFalse
    Repartition expression(s) with sortingList of expressions to repartition by with corresponding sorting orderTrue

    Generated Code

    def RepartitionByRange(spark: SparkSession, in0: DataFrame) -> DataFrame:
    return in0.repartitionByRange(5, col("customer_id").asc())

    Coalesce

    Reduces the number of partitions without shuffling the dataset.

    Parameters

    ParameterDescriptionRequired
    DataFrameInput DataFrameTrue
    Number of partitionsInteger value specifying number of partitionsTrue

    Generated Code

    def Coalesce(spark: SparkSession, in0: DataFrame) -> DataFrame:
    return in0.coalesce(5)

    Video demo

    + + \ No newline at end of file diff --git a/Spark/gems/join-split/compare-columns/index.html b/Spark/gems/join-split/compare-columns/index.html index a7d91aea7c..906325d5b4 100644 --- a/Spark/gems/join-split/compare-columns/index.html +++ b/Spark/gems/join-split/compare-columns/index.html @@ -6,15 +6,14 @@ CompareColumns | Prophecy - - - - + + +
    -

    CompareColumns

    Spark Gem

    The CompareColumns Gem lets you compare columns between two DataFrames based on the key id columns defined.

    Parameters

    ParameterDescriptionRequired
    DataFrame 1First input DataFrameTrue
    DataFrame 2Second input DataFrameTrue
    ID columns to retain(Select Id Columns)List of columns that are used joining two dataframesTrue
    Output Column Name(Select Output Columns)In the output, alias name of the column name that was compared among dataframesTrue
    Match Count Column Name(Select Output Columns)In the output, alias name of the column that shows the count of rows that matched between two dataframesTrue
    Mismatch Count Column Name(Select Output Columns)In the output, alias name of the column that shows the count of rows that mismatched between two dataframesTrue
    Mismatch Example Left Column Name(Select Output Columns)In the output, alias name of the column displaying an incorrect left column valueTrue
    Mismatch Example Right Column Name(Select Output Columns)In the output, alias name of the column displaying an incorrect right column valueTrue
    Mismatch Example ID Column Prefix(Select Output Columns)In the output, alias name of the ID column value that mismatched between two dataframesTrue

    Example - Compare columns of two DataFrames

    Generated code

    def CompareColumns_1(spark: SparkSession, in0: DataFrame, in1: DataFrame) -> DataFrame:
    joined = exploded1\
    .join(
    exploded2,
    reduce(
    lambda a, c: a & c,
    [col(f"exploded1.column_name") == col(f"exploded2.column_name"), col(f"exploded1.customer_id") == col(f"exploded2.customer_id")],
    lit(True)
    ),
    "full_outer"
    )\
    .select(
    coalesce(col(f"exploded1.column_name"), col(f"exploded2.column_name")).alias("column_name"),
    coalesce(col(f"exploded1.customer_id"), col(f"exploded2.customer_id")).alias("customer_id"),
    col(
    f"exploded1.##value##"
    )\
    .alias(
    "##left_value##"
    ),
    col(
    f"exploded2.##value##"
    )\
    .alias(
    "##right_value##"
    )
    )\
    .withColumn(
    "match_count",
    when(
    coalesce(
    (
    col("##left_value##")
    == col(
    "##right_value##"
    )
    ),
    (
    col(
    "##left_value##"
    )\
    .isNull()
    & col(
    "##right_value##"
    )\
    .isNull()
    )
    ),
    lit(1)
    )\
    .otherwise(lit(0))
    )\
    .withColumn(
    "mismatch_count",
    when(
    coalesce(
    (
    col("##left_value##")
    != col(
    "##right_value##"
    )
    ),
    ~ (
    col(
    "##left_value##"
    )\
    .isNull()
    & col(
    "##right_value##"
    )\
    .isNull()
    )
    ),
    lit(1)
    )\
    .otherwise(lit(0))
    )
    mismatchExamples = joined\
    .select(
    col("column_name"),
    col("customer_id"),
    lit(0).alias("match_count"),
    lit(0).alias("mismatch_count"),
    col(
    "##left_value##"
    )\
    .alias("mismatch_example_left"),
    col(
    "##right_value##"
    )\
    .alias("mismatch_example_right")
    )\
    .dropDuplicates(["column_name"])

    return joined\
    .union(mismatchExamples)\
    .groupBy("column_name")\
    .agg(
    sum("match_count").alias("match_count"),
    sum("mismatch_count").alias("mismatch_count"),
    first(col("mismatch_example_left"), ignorenulls = True).alias("mismatch_example_left"),
    first(col("mismatch_example_right"), ignorenulls = True).alias("mismatch_example_right"),
    first(
    when(coalesce(col("mismatch_example_left"), col("mismatch_example_right")).isNotNull(), col("customer_id"))\
    .otherwise(lit(None)),
    ignorenulls = True
    )\
    .alias("mismatch_example_customer_id")
    )\
    .orderBy(col("mismatch_count").desc(), col("column_name"))

    Below are the steps that are performed to compare two DataFrames in compare column Gem:

    • Pivot the DataFrame to get the key column's, compare column name and value
    • Join the pivoted DataFrames and compare the column value using key column's
    • Calculate the match and mismatch record counts
    note

    Repartition the DataFrames as they will be exploded and joined with each other

    - - +

    CompareColumns

    Spark Gem

    The CompareColumns Gem lets you compare columns between two DataFrames based on the key id columns defined.

    Parameters

    ParameterDescriptionRequired
    DataFrame 1First input DataFrameTrue
    DataFrame 2Second input DataFrameTrue
    ID columns to retain(Select Id Columns)List of columns that are used joining two dataframesTrue
    Output Column Name(Select Output Columns)In the output, alias name of the column name that was compared among dataframesTrue
    Match Count Column Name(Select Output Columns)In the output, alias name of the column that shows the count of rows that matched between two dataframesTrue
    Mismatch Count Column Name(Select Output Columns)In the output, alias name of the column that shows the count of rows that mismatched between two dataframesTrue
    Mismatch Example Left Column Name(Select Output Columns)In the output, alias name of the column displaying an incorrect left column valueTrue
    Mismatch Example Right Column Name(Select Output Columns)In the output, alias name of the column displaying an incorrect right column valueTrue
    Mismatch Example ID Column Prefix(Select Output Columns)In the output, alias name of the ID column value that mismatched between two dataframesTrue

    Example - Compare columns of two DataFrames

    Generated code

    def CompareColumns_1(spark: SparkSession, in0: DataFrame, in1: DataFrame) -> DataFrame:
    joined = exploded1\
    .join(
    exploded2,
    reduce(
    lambda a, c: a & c,
    [col(f"exploded1.column_name") == col(f"exploded2.column_name"), col(f"exploded1.customer_id") == col(f"exploded2.customer_id")],
    lit(True)
    ),
    "full_outer"
    )\
    .select(
    coalesce(col(f"exploded1.column_name"), col(f"exploded2.column_name")).alias("column_name"),
    coalesce(col(f"exploded1.customer_id"), col(f"exploded2.customer_id")).alias("customer_id"),
    col(
    f"exploded1.##value##"
    )\
    .alias(
    "##left_value##"
    ),
    col(
    f"exploded2.##value##"
    )\
    .alias(
    "##right_value##"
    )
    )\
    .withColumn(
    "match_count",
    when(
    coalesce(
    (
    col("##left_value##")
    == col(
    "##right_value##"
    )
    ),
    (
    col(
    "##left_value##"
    )\
    .isNull()
    & col(
    "##right_value##"
    )\
    .isNull()
    )
    ),
    lit(1)
    )\
    .otherwise(lit(0))
    )\
    .withColumn(
    "mismatch_count",
    when(
    coalesce(
    (
    col("##left_value##")
    != col(
    "##right_value##"
    )
    ),
    ~ (
    col(
    "##left_value##"
    )\
    .isNull()
    & col(
    "##right_value##"
    )\
    .isNull()
    )
    ),
    lit(1)
    )\
    .otherwise(lit(0))
    )
    mismatchExamples = joined\
    .select(
    col("column_name"),
    col("customer_id"),
    lit(0).alias("match_count"),
    lit(0).alias("mismatch_count"),
    col(
    "##left_value##"
    )\
    .alias("mismatch_example_left"),
    col(
    "##right_value##"
    )\
    .alias("mismatch_example_right")
    )\
    .dropDuplicates(["column_name"])

    return joined\
    .union(mismatchExamples)\
    .groupBy("column_name")\
    .agg(
    sum("match_count").alias("match_count"),
    sum("mismatch_count").alias("mismatch_count"),
    first(col("mismatch_example_left"), ignorenulls = True).alias("mismatch_example_left"),
    first(col("mismatch_example_right"), ignorenulls = True).alias("mismatch_example_right"),
    first(
    when(coalesce(col("mismatch_example_left"), col("mismatch_example_right")).isNotNull(), col("customer_id"))\
    .otherwise(lit(None)),
    ignorenulls = True
    )\
    .alias("mismatch_example_customer_id")
    )\
    .orderBy(col("mismatch_count").desc(), col("column_name"))

    Below are the steps that are performed to compare two DataFrames in compare column Gem:

    • Pivot the DataFrame to get the key column's, compare column name and value
    • Join the pivoted DataFrames and compare the column value using key column's
    • Calculate the match and mismatch record counts
    note

    Repartition the DataFrames as they will be exploded and joined with each other

    + + \ No newline at end of file diff --git a/Spark/gems/join-split/index.html b/Spark/gems/join-split/index.html index 9c3f00ce96..a0aca65dab 100644 --- a/Spark/gems/join-split/index.html +++ b/Spark/gems/join-split/index.html @@ -6,15 +6,14 @@ Join & Split | Prophecy - - - - + + + - - + + \ No newline at end of file diff --git a/Spark/gems/join-split/join/index.html b/Spark/gems/join-split/join/index.html index d49ae38801..0af60c24cc 100644 --- a/Spark/gems/join-split/join/index.html +++ b/Spark/gems/join-split/join/index.html @@ -6,16 +6,15 @@ Join | Prophecy - - - - + + +
    -

    Join

    Spark Gem

    Joins 2 or more DataFrames based on the given configuration.

    Parameters

    ParameterDescriptionRequired
    DataFrame 1First input DataFrameTrue
    DataFrame 2Second input DataFrameTrue
    DataFrame NNth input DataFrameFalse
    Join Condition (Conditions tab)The join condition specifies how the rows will be combined.True
    Type (Conditions tab)The type of JOIN (Inner, Full Outer, Left , Right , Left Semi, Left Anti)True
    Where Clause (Conditions tab)Filter applied after the Join operationFalse
    Target column (Expressions)Output column nameFalse
    Expression (Expressions)Expression to compute target column. If no expression is given, then all columns from all DataFrames would reflect in output.False
    Hint Type (Advanced)The type of Join Hint (Broadcast, Merge, Shuffle Hash, Shuffle Replicate NL or None). To read more about join hints click hereFalse
    Propagate All Columns (Advanced)If true, all columns from that DataFrame would be propagated to output DataFrame. Equivalent to selecting df.* for the selected DataFrame.False

    Adding a new input

    1. Click on the plus icon to add a new input.
    2. Then add your condition expression for the newly added input. +

      Join

      Spark Gem

      Joins 2 or more DataFrames based on the given configuration.

      Parameters

      ParameterDescriptionRequired
      DataFrame 1First input DataFrameTrue
      DataFrame 2Second input DataFrameTrue
      DataFrame NNth input DataFrameFalse
      Join Condition (Conditions tab)The join condition specifies how the rows will be combined.True
      Type (Conditions tab)The type of JOIN (Inner, Full Outer, Left , Right , Left Semi, Left Anti)True
      Where Clause (Conditions tab)Filter applied after the Join operationFalse
      Target column (Expressions)Output column nameFalse
      Expression (Expressions)Expression to compute target column. If no expression is given, then all columns from all DataFrames would reflect in output.False
      Hint Type (Advanced)The type of Join Hint (Broadcast, Merge, Shuffle Hash, Shuffle Replicate NL or None). To read more about join hints click hereFalse
      Propagate All Columns (Advanced)If true, all columns from that DataFrame would be propagated to output DataFrame. Equivalent to selecting df.* for the selected DataFrame.False

      Adding a new input

      1. Click on the plus icon to add a new input.
      2. Then add your condition expression for the newly added input. Example usage of Join - Add new input to join gem

      Examples

      Example 1 - Join with three DataFrame inputs

      Example usage of Join - Join three DataFrame inputs

      def Join_1(spark: SparkSession, in0: DataFrame) -> DataFrame:
      return in0\
      .alias("in0")\
      .join(in1.alias("in1"), (col("in0.customer_id") == col("in1.customer_id")), "inner")\
      .join(in2.alias("in2"), (col("in1.customer_id") == col("in2.customer_id")), "inner")

      Example 2 - Join with Hints

      Join hints allow users to suggest the join strategy that Spark should use. For a quick overview, see Spark's Join Hints documentation.

      Example usage of Join - Join with hints

      def Join_1(spark: SparkSession, in0: DataFrame, in1: DataFrame, in2: DataFrame) -> DataFrame:
      df1 = in1.hint("merge")

      return in0\
      .alias("in0")\
      .hint("broadcast")\
      .join(df1.alias("in1"), col("in0.customer_id") == col("in1.customer_id"), "inner")\
      .join(in2.alias("in2"), col("in0.customer_id") == col("in1.customer_id"), "inner")
      object Join_1 {
      def apply(spark: SparkSession, in0: DataFrame, in1: DataFrame, in2: DataFrame): DataFrame =
      in0
      .as("in0")
      .hint("broadcast")
      .join(in1.as("in1").hint("merge"), col("in0.customer_id") === col("in1.customer_id"), "inner")
      .join(in2.as("in2"), col("in1.customer_id") === col("in2.customer_id"), "inner")
      }

      Example 3 - Join with Propagate Columns

      Step 1 - Specify join condition

      def Join_1(spark: SparkSession, in0: DataFrame, in1: DataFrame, ) -> DataFrame:
      return in0\
      .alias("in0")\
      .join(in1.alias("in1"), (col("in0.customer_id") == col("in1.customer_id")), "inner")\
      .select(*[col("in1.email").alias("email"), col("in1.phone").alias("phone")], col("in0.*"))
      object Join_1 {

      def apply(spark: SparkSession, in0: DataFrame, in1: DataFrame): DataFrame =
      in0
      .as("in0")
      .join(in1.as("in1"), col("in0.customer_id") === col("in1.customer_id"), "inner")
      .select(col("in1.phone").as("phone"), col("in1.email").as("email"), col("in0.*"))

      }

      Types of Join

      Suppose there are 2 tables TableA and TableB with only 2 columns (Ref, Data) and following contents:

      Table A

      RefData
      1Data_A11
      1Data_A12
      1Data_A13
      2Data_A21
      3Data_A31

      Table B

      RefData
      1Data_B11
      2Data_B21
      2Data_B22
      2Data_B23
      4Data_B41

      INNER JOIN

      Inner Join on column Ref will return columns from both the tables and only the matching records as long as the condition is satisfied:

      RefDataRefData
      1Data_A111Data_B11
      1Data_A121Data_B11
      1Data_A131Data_B11
      2Data_A212Data_B21
      2Data_A212Data_B22
      2Data_A212Data_B23

      LEFT JOIN

      Left Join (or Left Outer join) on column Ref will return columns from both the tables and match records with records from the left table. The result-set will contain null for the rows for which there is no matching row on the right side.

      RefDataRefData
      1Data_A111Data_B11
      1Data_A121Data_B11
      1Data_A131Data_B11
      2Data_A212Data_B21
      2Data_A212Data_B22
      2Data_A212Data_B23
      3Data_A31NULLNULL

      RIGHT JOIN

      Right Join (or Right Outer join) on column Ref will return columns from both the tables and match records with records from the right table. The result-set will contain null for the rows for which there is no matching row on the left side.

      RefDataRefData
      1Data_A111Data_B11
      1Data_A121Data_B11
      1Data_A131Data_B11
      2Data_A212Data_B21
      2Data_A212Data_B22
      2Data_A212Data_B23
      NULLNULL4Data_B41

      FULL OUTER JOIN

      Full Outer Join on column Ref will return columns from both the tables and matching records with records from the left table and records from the right table . The result-set will contain NULL values for the rows for which there is no matching.

      RefDataRefData
      1Data_A111Data_B11
      1Data_A121Data_B11
      1Data_A131Data_B11
      2Data_A212Data_B21
      2Data_A212Data_B22
      2Data_A212Data_B23
      3Data_A31NULLNULL
      NULLNULL4Data_B41

      LEFT SEMI JOIN

      Left Semi Join on column Ref will return columns only from left table and matching records only from left table.

      RefData
      1Data_B11
      1Data_B21
      1Data_B22
      2Data_B23
      3Data_B41

      LEFT ANTI JOIN

      Left anti join on column Ref will return columns from the left for non-matched records :

      RefDataRefData
      3Data_A31NULLNULL
    - - + + \ No newline at end of file diff --git a/Spark/gems/join-split/row-distributor/index.html b/Spark/gems/join-split/row-distributor/index.html index 62c4be4265..1bf065048b 100644 --- a/Spark/gems/join-split/row-distributor/index.html +++ b/Spark/gems/join-split/row-distributor/index.html @@ -6,15 +6,14 @@ RowDistributor | Prophecy - - - - + + +
    -

    RowDistributor

    Spark Gem

    Use the RowDistributor Gem to create multiple DataFrames based on provided filter conditions from an input DataFrame.

    This is useful for cases where rows from the input DataFrame needs to be distributed into multiple DataFrames in different ways for downstream Gems.

    Parameters

    ParameterDescriptionRequired
    DataFrameInput DataFrame for which rows needs to be distributed into multiple DataFramesTrue
    Filter ConditionsBoolean Type column or boolean expression for each output tab. Supports SQL, Python and Scala expressionsTrue

    Example

    Row distributor 1

    info

    Number of outputs can be changed as needed by clicking the + button.

    Generated Code

    def RowDistributor(spark: SparkSession, in0: DataFrame) -> (DataFrame, DataFrame, DataFrame):
    df1 = in0.filter((col("order_status") == lit("Started")))
    df2 = in0.filter((col("order_status") == lit("Approved")))
    df3 = in0.filter((col("order_status") == lit("Finished")))

    return df1, df2, df3
    - - +

    RowDistributor

    Spark Gem

    Use the RowDistributor Gem to create multiple DataFrames based on provided filter conditions from an input DataFrame.

    This is useful for cases where rows from the input DataFrame needs to be distributed into multiple DataFrames in different ways for downstream Gems.

    Parameters

    ParameterDescriptionRequired
    DataFrameInput DataFrame for which rows needs to be distributed into multiple DataFramesTrue
    Filter ConditionsBoolean Type column or boolean expression for each output tab. Supports SQL, Python and Scala expressionsTrue

    Example

    Row distributor 1

    info

    Number of outputs can be changed as needed by clicking the + button.

    Generated Code

    def RowDistributor(spark: SparkSession, in0: DataFrame) -> (DataFrame, DataFrame, DataFrame):
    df1 = in0.filter((col("order_status") == lit("Started")))
    df2 = in0.filter((col("order_status") == lit("Approved")))
    df3 = in0.filter((col("order_status") == lit("Finished")))

    return df1, df2, df3
    + + \ No newline at end of file diff --git a/Spark/gems/machine-learning/index.html b/Spark/gems/machine-learning/index.html index b342d82078..4b3341bfd4 100644 --- a/Spark/gems/machine-learning/index.html +++ b/Spark/gems/machine-learning/index.html @@ -6,15 +6,14 @@ Machine Learning | Prophecy - - - - + + +

    Machine Learning

    Each page below describes one of the set of Gems that prepare or use data for Machine Learning. These Gems have a required Cluster library dependency. For an example set of Pipelines that use these Gems to create a Generative AI Chatbot, see this guide.

    Cluster library dependencies

    Spark-AI - Toolbox for building Generative AI applications on top of Apache Spark. This library dependency is required for the Machine Learning Gem functionality. Setup this dependency in one of two ways:

    • Option a. Import a project with the spark-ai dependency preconfigured, as in this guide.

    • Option b. Alternatively, add prophecy-spark-ai==0.1.8 as a Python library, and io.prophecy:spark-ai_2.12:0.1.8 as a Maven library to the Pipeline dependencies.

    Click here to see the roadmap for what's coming in the future with the Spark-AI toolbox.

    - - + + \ No newline at end of file diff --git a/Spark/gems/machine-learning/ml-openai/index.html b/Spark/gems/machine-learning/ml-openai/index.html index fa1ced0148..3830ec4373 100644 --- a/Spark/gems/machine-learning/ml-openai/index.html +++ b/Spark/gems/machine-learning/ml-openai/index.html @@ -6,15 +6,14 @@ OpenAI | Prophecy - - - - + + +
    -

    OpenAI

    Spark Gem

    The OpenAI Gem allows the Prophecy user to interact with the OpenAI API using two different requests:

    1. Compute text embeddings
    2. Answer a question, where the user has the option to provide context

    Follow along to learn how to interact with the OpenAI API using Prophecy's easy-to-use interface. For an example set of Pipelines that use these Gems to create a Generative AI Chatbot, see this guide.

    caution

    As with all applications that interface with Large Language Models (LLMs), the OpenAI Gem can generate results that are incorrect and/or misleading. The OpenAI Gem is subject to the same limitations and risks as those posed by OpenAI itself.



    1. Compute text embeddings

    Given a question input, the OpenAI Gem will return a text embedding by calling the OpenAI ada-002 model. View the input and output from this Gem to understand the data formats and sample.

    Overview of the Gem showing the input and output for computing a text embedding

    1a. Configure

    Follow the steps below to configure the OpenAI Gem to compute text embeddings.

    Configure the Gem to compute a text embedding

    Storing the OpenAI API token as a (1) Databricks Secret is highly recommended. For instructions click here. Be sure to use the (2) Fabric connection to the Databricks workspace which contains the Databricks scope and secrets configured in this Gem. Contact us to understand the integrations with other secret managers.

    Select the Operation type from the dropdown menu. (3) Compute text embeddings operation will send the selected (4) Texts column to the OpenAI API. For each entry in the Texts column, OpenAI's ada-002 model will return a text embedding.

    Instead of sending a single row to OpenAI's API, select the (5) Group data option. Group data is a window function, using a window of size 20, (6) ordered by the selected column. Using the Group data option influences model performance based on the column selected.

    1b. Input

    ColumnDescriptionRequired
    Question/Textstring - a question or text string of interestTrue

    1c. Output

    ColumnDescription
    openai_embeddingarray(float) - The vector embedding returned from OpenAI corresponding to the input question/text. Each record is an array of 1536 floating point numbers, such as [-0.0018493991, -0.0059955865, ... -0.02498541].
    openai_errorstring - this column is provided to display any error message returned from the OpenAI API; helpful for troubleshooting.

    1d. Generated code

    All the visual designs are converted to code and committed to the Prophecy user's Git repository. See below for a sample of the code which calls the OpenAI API to compute text embeddings.

    def vectorize(spark: SparkSession, question_seed: DataFrame) -> DataFrame:
    from spark_ai.llms.openai import OpenAiLLM
    from pyspark.dbutils import DBUtils
    OpenAiLLM(api_key = DBUtils(spark).secrets.get(scope = "<redacted>", key = "<redacted>"))\
    .register_udfs(spark = spark)

    return question_seed\
    .withColumn("_row_num", row_number().over(Window.partitionBy().orderBy(col("input"))))\
    .withColumn("_group_num", ceil(col("_row_num") / 20))\
    .withColumn("_data", struct(col("*")))\
    .groupBy(col("_group_num"))\
    .agg(collect_list(col("_data")).alias("_data"), collect_list(col("input")).alias("_texts"))\
    .withColumn("_embedded", expr(f"openai_embed_texts(_texts)"))\
    .select(
    col("_texts"),
    col("_embedded.embeddings").alias("_embeddings"),
    col("_embedded.error").alias("openai_error"),
    col("_data")
    )\
    .select(expr("explode_outer(arrays_zip(_embeddings, _data))").alias("_content"), col("openai_error"))\
    .select(col("_content._embeddings").alias("openai_embedding"), col("openai_error"), col("_content._data.*"))\
    .drop("_row_num")\
    .drop("_group_num")

    2. Answer a question with a given context

    In addition to computing text embeddings, OpenAI's ada-002 model is also very good at answering questions. The Prophecy interface allows users to input a question (and optionally provide a context) as components of the prompt sent to OpenAI. In response, OpenAI's ada-002 model returns an answer(s) to the question. See the input and output data previews before and after the OpenAI Gem to understand the operation.

    Overview of the Gem showing the input and output for answering a question

    2a. Configure

    Follow the steps below to configure the OpenAI Gem to answer a question, and to understand how to provide a context if desired.

    Configure the gem to answer a question with a given context

    Storing the OpenAI API token as a (1) Databricks Secret is highly recommended. For instructions click here. Be sure to use the (2) Fabric connection to the Databricks workspace which contains the Databricks scope and secrets configured in this Gem.

    Hardcoding the OpenAI credential is not recommended. Selecting this option could send credentials to be stored hardcoded in Git; use only for credentials that should be shared with the world. Contact us to understand the integrations with other secret managers. (contact.us@Prophecy.io)

    Now it's time to craft a prompt to send to the OpenAI ada-002 model. Select the Operation type from the dropdown menu. The operation Answer questions will prompt OpenAI's ada-002 model to answer the provided question using the datasets the model was trained on, which have some blindness. For many users, you'll want to provide some context as part of your prompt. The operation (3) Answer questions for given context will likely generate answers more related to the context. Select the input column which has the question of interest as the (4) Question text column. To provide context in addition to the question, select (5) Context text column. For example, if the question is Does Prophecy support on-premise environments?, an appropriate context would be some section of Prophecy's documentation. The (6) context and (7) question (query) comprise the prompt sent to OpenAI.

    2b. Input

    ColumnDescriptionRequired
    Questionstring - a question of interest to include in the prompt sent to OpenAI. Example: What is Prophecy's AI Assistant feature?True
    Contextstring - a text corpus related to the question of interest, also included in the prompt sent to OpenAI. Frequently the context column should undergo data transformations in the Gems preceding the OpenAI Gem. See this guide for a great example of preparing the text corpus and transforming sufficiently to include in a useful prompt.False

    2c. Output

    Since OpenAI's models are probabalistic, they return at least one, and frequently more than one, answer. These responses are formatted as a json array of answer choices. The user would usually select the best answer from the choices; we recommend selecting the first answer if you wish to select one by default. This can be done in the Gem following the OpenAI Gem as in this example.

    ColumnDescription
    openai_answerstruct - this column contains the response from OpenAI in as a json array. Example: {"choices":["Prophecy's AI Assistant feature is called Data Copilot."]} Select/filter from multiple answer choices in a Gem following the OpenAI Gem.
    openai_errorstring - this column is provided to display any error message returned from the OpenAI API; helpful for troubleshooting.

    2d. Generated code

    See below for a sample of the code which calls the OpenAI API to answer a question provided some context.

    def OpenAI_1(spark: SparkSession, collect_context: DataFrame) -> DataFrame:
    from spark_ai.llms.openai import OpenAiLLM
    from pyspark.dbutils import DBUtils
    OpenAiLLM(api_key = DBUtils(spark).secrets.get(scope = "[redacted]", key = "[redacted]"))\
    .register_udfs(spark = spark)

    return collect_context\
    .withColumn("_context", col("context"))\
    .withColumn("_query", col("input"))\
    .withColumn(
    "openai_answer",
    expr(
    "openai_answer_question(_context, _query, \" Answer the question based on the context below.\nContext:\n```\n{context}\n```\nQuestion: \n```\n{query}\n```\nAnswer:\n \")"
    )
    )\
    .drop("_context", "_query")

    FAQ

    Troubleshooting

    The output data sample following the OpenAI Gem also contains a column for any error message(s) returned from OpenAI. This handy column surfaces errors including invalid OpenAI credentials, invalid input questions, or problems with data formatting.

    Can I choose other OpenAI models?

    Currently we use ChatGPT 3.5 Turbo. Contact us for additional options: contact.us@Prophecy.io

    - - +

    OpenAI

    Spark Gem

    The OpenAI Gem allows the Prophecy user to interact with the OpenAI API using two different requests:

    1. Compute text embeddings
    2. Answer a question, where the user has the option to provide context

    Follow along to learn how to interact with the OpenAI API using Prophecy's easy-to-use interface. For an example set of Pipelines that use these Gems to create a Generative AI Chatbot, see this guide.

    caution

    As with all applications that interface with Large Language Models (LLMs), the OpenAI Gem can generate results that are incorrect and/or misleading. The OpenAI Gem is subject to the same limitations and risks as those posed by OpenAI itself.



    1. Compute text embeddings

    Given a question input, the OpenAI Gem will return a text embedding by calling the OpenAI ada-002 model. View the input and output from this Gem to understand the data formats and sample.

    Overview of the Gem showing the input and output for computing a text embedding

    1a. Configure

    Follow the steps below to configure the OpenAI Gem to compute text embeddings.

    Configure the Gem to compute a text embedding

    Storing the OpenAI API token as a (1) Databricks Secret is highly recommended. For instructions click here. Be sure to use the (2) Fabric connection to the Databricks workspace which contains the Databricks scope and secrets configured in this Gem. Contact us to understand the integrations with other secret managers.

    Select the Operation type from the dropdown menu. (3) Compute text embeddings operation will send the selected (4) Texts column to the OpenAI API. For each entry in the Texts column, OpenAI's ada-002 model will return a text embedding.

    Instead of sending a single row to OpenAI's API, select the (5) Group data option. Group data is a window function, using a window of size 20, (6) ordered by the selected column. Using the Group data option influences model performance based on the column selected.

    1b. Input

    ColumnDescriptionRequired
    Question/Textstring - a question or text string of interestTrue

    1c. Output

    ColumnDescription
    openai_embeddingarray(float) - The vector embedding returned from OpenAI corresponding to the input question/text. Each record is an array of 1536 floating point numbers, such as [-0.0018493991, -0.0059955865, ... -0.02498541].
    openai_errorstring - this column is provided to display any error message returned from the OpenAI API; helpful for troubleshooting.

    1d. Generated code

    All the visual designs are converted to code and committed to the Prophecy user's Git repository. See below for a sample of the code which calls the OpenAI API to compute text embeddings.

    def vectorize(spark: SparkSession, question_seed: DataFrame) -> DataFrame:
    from spark_ai.llms.openai import OpenAiLLM
    from pyspark.dbutils import DBUtils
    OpenAiLLM(api_key = DBUtils(spark).secrets.get(scope = "<redacted>", key = "<redacted>"))\
    .register_udfs(spark = spark)

    return question_seed\
    .withColumn("_row_num", row_number().over(Window.partitionBy().orderBy(col("input"))))\
    .withColumn("_group_num", ceil(col("_row_num") / 20))\
    .withColumn("_data", struct(col("*")))\
    .groupBy(col("_group_num"))\
    .agg(collect_list(col("_data")).alias("_data"), collect_list(col("input")).alias("_texts"))\
    .withColumn("_embedded", expr(f"openai_embed_texts(_texts)"))\
    .select(
    col("_texts"),
    col("_embedded.embeddings").alias("_embeddings"),
    col("_embedded.error").alias("openai_error"),
    col("_data")
    )\
    .select(expr("explode_outer(arrays_zip(_embeddings, _data))").alias("_content"), col("openai_error"))\
    .select(col("_content._embeddings").alias("openai_embedding"), col("openai_error"), col("_content._data.*"))\
    .drop("_row_num")\
    .drop("_group_num")

    2. Answer a question with a given context

    In addition to computing text embeddings, OpenAI's ada-002 model is also very good at answering questions. The Prophecy interface allows users to input a question (and optionally provide a context) as components of the prompt sent to OpenAI. In response, OpenAI's ada-002 model returns an answer(s) to the question. See the input and output data previews before and after the OpenAI Gem to understand the operation.

    Overview of the Gem showing the input and output for answering a question

    2a. Configure

    Follow the steps below to configure the OpenAI Gem to answer a question, and to understand how to provide a context if desired.

    Configure the gem to answer a question with a given context

    Storing the OpenAI API token as a (1) Databricks Secret is highly recommended. For instructions click here. Be sure to use the (2) Fabric connection to the Databricks workspace which contains the Databricks scope and secrets configured in this Gem.

    Hardcoding the OpenAI credential is not recommended. Selecting this option could send credentials to be stored hardcoded in Git; use only for credentials that should be shared with the world. Contact us to understand the integrations with other secret managers. (contact.us@Prophecy.io)

    Now it's time to craft a prompt to send to the OpenAI ada-002 model. Select the Operation type from the dropdown menu. The operation Answer questions will prompt OpenAI's ada-002 model to answer the provided question using the datasets the model was trained on, which have some blindness. For many users, you'll want to provide some context as part of your prompt. The operation (3) Answer questions for given context will likely generate answers more related to the context. Select the input column which has the question of interest as the (4) Question text column. To provide context in addition to the question, select (5) Context text column. For example, if the question is Does Prophecy support on-premise environments?, an appropriate context would be some section of Prophecy's documentation. The (6) context and (7) question (query) comprise the prompt sent to OpenAI.

    2b. Input

    ColumnDescriptionRequired
    Questionstring - a question of interest to include in the prompt sent to OpenAI. Example: What is Prophecy's AI Assistant feature?True
    Contextstring - a text corpus related to the question of interest, also included in the prompt sent to OpenAI. Frequently the context column should undergo data transformations in the Gems preceding the OpenAI Gem. See this guide for a great example of preparing the text corpus and transforming sufficiently to include in a useful prompt.False

    2c. Output

    Since OpenAI's models are probabalistic, they return at least one, and frequently more than one, answer. These responses are formatted as a json array of answer choices. The user would usually select the best answer from the choices; we recommend selecting the first answer if you wish to select one by default. This can be done in the Gem following the OpenAI Gem as in this example.

    ColumnDescription
    openai_answerstruct - this column contains the response from OpenAI in as a json array. Example: {"choices":["Prophecy's AI Assistant feature is called Data Copilot."]} Select/filter from multiple answer choices in a Gem following the OpenAI Gem.
    openai_errorstring - this column is provided to display any error message returned from the OpenAI API; helpful for troubleshooting.

    2d. Generated code

    See below for a sample of the code which calls the OpenAI API to answer a question provided some context.

    def OpenAI_1(spark: SparkSession, collect_context: DataFrame) -> DataFrame:
    from spark_ai.llms.openai import OpenAiLLM
    from pyspark.dbutils import DBUtils
    OpenAiLLM(api_key = DBUtils(spark).secrets.get(scope = "[redacted]", key = "[redacted]"))\
    .register_udfs(spark = spark)

    return collect_context\
    .withColumn("_context", col("context"))\
    .withColumn("_query", col("input"))\
    .withColumn(
    "openai_answer",
    expr(
    "openai_answer_question(_context, _query, \" Answer the question based on the context below.\nContext:\n```\n{context}\n```\nQuestion: \n```\n{query}\n```\nAnswer:\n \")"
    )
    )\
    .drop("_context", "_query")

    FAQ

    Troubleshooting

    The output data sample following the OpenAI Gem also contains a column for any error message(s) returned from OpenAI. This handy column surfaces errors including invalid OpenAI credentials, invalid input questions, or problems with data formatting.

    Can I choose other OpenAI models?

    Currently we use ChatGPT 3.5 Turbo. Contact us for additional options: contact.us@Prophecy.io

    + + \ No newline at end of file diff --git a/Spark/gems/machine-learning/ml-pinecone-lookup/index.html b/Spark/gems/machine-learning/ml-pinecone-lookup/index.html index 2c4faefe08..6ed2602315 100644 --- a/Spark/gems/machine-learning/ml-pinecone-lookup/index.html +++ b/Spark/gems/machine-learning/ml-pinecone-lookup/index.html @@ -6,15 +6,14 @@ PineconeLookup | Prophecy - - - - + + +
    -

    PineconeLookup

    Spark Gem

    The PineconeLookup Gem identifies content that is similar to a provided vector embedding. The Gem calls the Pinecone API and returns a set of IDs with highest similarity to the provided embedding.

    • Parameters: Configure the parameters needed to call the Pinecone API.

    • Input: This Gem requires an embedding as input. The embedding is provided by a foundational model like OpenAI.

    • Output: This Gem outputs an array of IDs with corresponding similarity scores.

    Input and Output

    Now let’s understand the Gem Parameters, Input, and Output in detail.


    Gem Parameters

    Parameters

    Verify the (1) input columns contain a column with the embeddings. The structure of this column's entries must be compatible with the structure of the Pinecone index.

    Credentials

    Configure the Pinecone API credentials here. Storing the Pinecone API token as a (2) Databricks Secret is highly recommended. For instructions click here. Be sure to use the (3) Fabric connection to the Databricks workspace which contains the Databricks scope and secrets configured in this Gem.

    Hardcoding the Pinecone credential is not recommended. Selecting this option could send credentials to be stored hardcoded in Git; reach out to understand the integrations with other secret managers.

    Properties

    Pinecone DB uses indexing to map the vectors to a data structure that will enable faster searching. The PineconeLookup Gem searches through a Pinecone index to identify embeddings with similarity to the input embedding. Enter the Pinecone (4) Index name which you’d like to use for looking up embeddings.

    Select one of the Gem’s input columns with vector embeddings as the (5) Vector column to send to Pinecone’s API. The column must be compatible with the Pinecone Index. To change the column’s datatype and properties, configure the Gem(s) preceding the PineconeLookup Gem.

    Pinecone’s API can return multiple results. Depending on the use case, select the desired (6) Number of results sorted by similarity score. The result with highest similarity to the user’s text question will be listed first.

    Input

    PineconeLookup requires a model_embedding column as input. Use one of Prophecy's Machine Learning Gems to provide the model_embedding. For example, the OpenAI Gem can precede the PineconeLookup Gem in the Pipeline. The OpenAI Gem, configured to Compute a text embedding, will output an openai_embedding column. This is a suitable input for the PineconeLookup Gem.

    ColumnDescriptionRequired
    model_embeddingarray(float) - The format of this embedding is important. It must be an array of floating point numbers that matches the requirements of the Pinecone index. For example, we used a Pinecone index with 1536 dimensions, Cosine metric, and an s1 pod type. So each record in the model_embedding column must be an array of 1536 floating point numbers, such as [-0.0018493991, -0.0059955865, ... -0.02498541].True

    Output

    The output Dataset contains the pinecone_matches and pinecone_error columns. For each input content entry, this Gem adds an array to the pinecone_matches column. The output array will have Number of Results entries.

    ColumnDescription
    pinecone_matchesarray - an array of several content IDs and their scores. Example: [{"id":"web-223","score":0.8437653},{"id":"web-224","score":0.8403446}, ...{"id":"web-237","score":0.82916564}]
    pinecone_errorstring - this column is provided to show any error message returned from Pinecone’s API; helpful for troubleshooting errors related to the PineconeLookup Gem.

    Prophecy converts the visual design into Spark code available on the Prophecy user's Git repository. Find the Spark code for the PineconeLookup Gem below.

    def vector_lookup(Spark: SparkSession, in0: DataFrame) -> DataFrame:
    from pySpark.sql.functions import expr, array, struct
    from Spark_ai.dbs.pinecone import PineconeDB, IdVector
    from pySpark.dbutils import DBUtils
    PineconeDB(DBUtils(Spark).secrets.get(scope = "< my_scope >", key = "< my_key >"), "us-east-1-aws")\
    .register_udfs(Spark)

    return in0\
    .withColumn("_vector", col("<model>_embedding"))\
    .withColumn("_response", expr(f"pinecone_query(\<index name>\", _vector, {3})"))\
    .withColumn("pinecone_matches", col("_response.matches"))\
    .withColumn("pinecone_error", col("_response.error"))\
    .drop("_vector", "_response")

    FAQ

    Troubleshooting

    To troubleshoot the Gem preceding PineconeLookup, open the data preview output from the previous Gem. For example if the embedding structure is incorrect then try adjusting the previous Gem, run, and view that Gem’s output data preview.

    Creating a Pinecone Index

    If you don’t have one yet, create a Pinecone index. Click here for pointers on choosing an index type and size. How to populate the index? For example, this guide shows how to ingest and vectorize web content data to store in a Pinecone Database index.

    - - +

    PineconeLookup

    Spark Gem

    The PineconeLookup Gem identifies content that is similar to a provided vector embedding. The Gem calls the Pinecone API and returns a set of IDs with highest similarity to the provided embedding.

    • Parameters: Configure the parameters needed to call the Pinecone API.

    • Input: This Gem requires an embedding as input. The embedding is provided by a foundational model like OpenAI.

    • Output: This Gem outputs an array of IDs with corresponding similarity scores.

    Input and Output

    Now let’s understand the Gem Parameters, Input, and Output in detail.


    Gem Parameters

    Parameters

    Verify the (1) input columns contain a column with the embeddings. The structure of this column's entries must be compatible with the structure of the Pinecone index.

    Credentials

    Configure the Pinecone API credentials here. Storing the Pinecone API token as a (2) Databricks Secret is highly recommended. For instructions click here. Be sure to use the (3) Fabric connection to the Databricks workspace which contains the Databricks scope and secrets configured in this Gem.

    Hardcoding the Pinecone credential is not recommended. Selecting this option could send credentials to be stored hardcoded in Git; reach out to understand the integrations with other secret managers.

    Properties

    Pinecone DB uses indexing to map the vectors to a data structure that will enable faster searching. The PineconeLookup Gem searches through a Pinecone index to identify embeddings with similarity to the input embedding. Enter the Pinecone (4) Index name which you’d like to use for looking up embeddings.

    Select one of the Gem’s input columns with vector embeddings as the (5) Vector column to send to Pinecone’s API. The column must be compatible with the Pinecone Index. To change the column’s datatype and properties, configure the Gem(s) preceding the PineconeLookup Gem.

    Pinecone’s API can return multiple results. Depending on the use case, select the desired (6) Number of results sorted by similarity score. The result with highest similarity to the user’s text question will be listed first.

    Input

    PineconeLookup requires a model_embedding column as input. Use one of Prophecy's Machine Learning Gems to provide the model_embedding. For example, the OpenAI Gem can precede the PineconeLookup Gem in the Pipeline. The OpenAI Gem, configured to Compute a text embedding, will output an openai_embedding column. This is a suitable input for the PineconeLookup Gem.

    ColumnDescriptionRequired
    model_embeddingarray(float) - The format of this embedding is important. It must be an array of floating point numbers that matches the requirements of the Pinecone index. For example, we used a Pinecone index with 1536 dimensions, Cosine metric, and an s1 pod type. So each record in the model_embedding column must be an array of 1536 floating point numbers, such as [-0.0018493991, -0.0059955865, ... -0.02498541].True

    Output

    The output Dataset contains the pinecone_matches and pinecone_error columns. For each input content entry, this Gem adds an array to the pinecone_matches column. The output array will have Number of Results entries.

    ColumnDescription
    pinecone_matchesarray - an array of several content IDs and their scores. Example: [{"id":"web-223","score":0.8437653},{"id":"web-224","score":0.8403446}, ...{"id":"web-237","score":0.82916564}]
    pinecone_errorstring - this column is provided to show any error message returned from Pinecone’s API; helpful for troubleshooting errors related to the PineconeLookup Gem.

    Prophecy converts the visual design into Spark code available on the Prophecy user's Git repository. Find the Spark code for the PineconeLookup Gem below.

    def vector_lookup(Spark: SparkSession, in0: DataFrame) -> DataFrame:
    from pySpark.sql.functions import expr, array, struct
    from Spark_ai.dbs.pinecone import PineconeDB, IdVector
    from pySpark.dbutils import DBUtils
    PineconeDB(DBUtils(Spark).secrets.get(scope = "< my_scope >", key = "< my_key >"), "us-east-1-aws")\
    .register_udfs(Spark)

    return in0\
    .withColumn("_vector", col("<model>_embedding"))\
    .withColumn("_response", expr(f"pinecone_query(\<index name>\", _vector, {3})"))\
    .withColumn("pinecone_matches", col("_response.matches"))\
    .withColumn("pinecone_error", col("_response.error"))\
    .drop("_vector", "_response")

    FAQ

    Troubleshooting

    To troubleshoot the Gem preceding PineconeLookup, open the data preview output from the previous Gem. For example if the embedding structure is incorrect then try adjusting the previous Gem, run, and view that Gem’s output data preview.

    Creating a Pinecone Index

    If you don’t have one yet, create a Pinecone index. Click here for pointers on choosing an index type and size. How to populate the index? For example, this guide shows how to ingest and vectorize web content data to store in a Pinecone Database index.

    + + \ No newline at end of file diff --git a/Spark/gems/machine-learning/ml-text-processing/index.html b/Spark/gems/machine-learning/ml-text-processing/index.html index 84b4064af0..2cff282b9e 100644 --- a/Spark/gems/machine-learning/ml-text-processing/index.html +++ b/Spark/gems/machine-learning/ml-text-processing/index.html @@ -6,15 +6,14 @@ TextProcessing | Prophecy - - - - + + +
    -

    TextProcessing

    Spark Gem

    The TextProcessing Gem enables text data preparation for machine learning in two different ways:

    1. Load web URLs and extract text.
    2. Split text data into equal chunks.

    Follow along to see how to use the TextProcessing Gem. For an example set of Pipelines that uses this Gem to create a Generative AI Chatbot, see this guide.


    1. Load web URLs and Extract Text

    Given a column with web URLs, the Load web URLs operation will scrape the content from each URL, and output the content as a binary format or as a human readable text format, depending on the operation type selected. The figure below shows the Load web URL and Extract Text operation.

    Overview web scrape and extract text

    1a. Configure web scrape

    Configure to web scrape

    Configure the (1) Operation Type to Load url (web scrape), and optionally extract the text. Specify which input (2) Column name contains the web urls. If the extract text operation is selected, the text will be converted from binary to human readable format. When would you want to use the binary format? Binary web scraping is useful for downloading content including images or archived documents.

    1b. Input

    ParameterDescriptionRequired
    Column name (string with urls)string - the input column which contains the strings of web URLsTrue

    1c. Output

    ParameterDescription
    Result content Load url (web scrape)binary - the contents of each web page
    Result content Load url (web scrape) and extract textstring - the contents of each web page, converted from binary to human readable text

    1d. Generated Code

    def scrape_pages(spark: SparkSession, in0: DataFrame) -> DataFrame:
    from pyspark.sql.functions import expr, array, struct
    from spark_ai.webapps import WebUtils
    WebUtils().register_udfs(spark)

    return in0.withColumn("result_content", expr(f"web_scrape(loc)"))

    2. Split text data into equal chunks

    Sometimes you'd like to send text data to a foundational model or store in a vector database, but the text is too long. For this case, just split the text into "chunks" of characters.

    Overview Chunkify

    2a. Configure text splitting

    Given a text input, the Split data operation will separate the input column entries into chunks of specified size.

    Configure to Chunkify

    Select the (1) Operation type to split text into equal chunks. Specify which input (2) Column name contains the relevant content. Specify an integer chunk (3) Size relevant for your generative AI use case.

    2b. Input

    ParameterDescriptionRequired
    Column namestring - the text content which should be split into equal chunksTrue
    Sizeinteger - the size of each chunk, number of characters. Example: 1000True

    2c. Output

    ParameterDescription
    result_chunksarray(string) - an array of text strings, each string representing one chunk of the larger text content

    2d. Generated code

    def Chunkify(spark: SparkSession, web_bronze_content: DataFrame) -> DataFrame:
    from pyspark.sql.functions import expr, array, struct
    from spark_ai.files.text import FileTextUtils
    FileTextUtils().register_udfs(spark)

    return web_bronze_content.withColumn("result_chunks", expr(f"text_split_into_chunks(content, 1000)"))

    FAQ

    How does this Gem fit into the bigger picture of building a generative AI application?

    For an example set of Pipelines that uses this Gem to create a Generative AI Chatbot, see this guide. Feel free to reach out and explore your use case with us.

    Troubleshooting

    Select a chunk size according to the limitations of your vector database index.

    - - +

    TextProcessing

    Spark Gem

    The TextProcessing Gem enables text data preparation for machine learning in two different ways:

    1. Load web URLs and extract text.
    2. Split text data into equal chunks.

    Follow along to see how to use the TextProcessing Gem. For an example set of Pipelines that uses this Gem to create a Generative AI Chatbot, see this guide.


    1. Load web URLs and Extract Text

    Given a column with web URLs, the Load web URLs operation will scrape the content from each URL, and output the content as a binary format or as a human readable text format, depending on the operation type selected. The figure below shows the Load web URL and Extract Text operation.

    Overview web scrape and extract text

    1a. Configure web scrape

    Configure to web scrape

    Configure the (1) Operation Type to Load url (web scrape), and optionally extract the text. Specify which input (2) Column name contains the web urls. If the extract text operation is selected, the text will be converted from binary to human readable format. When would you want to use the binary format? Binary web scraping is useful for downloading content including images or archived documents.

    1b. Input

    ParameterDescriptionRequired
    Column name (string with urls)string - the input column which contains the strings of web URLsTrue

    1c. Output

    ParameterDescription
    Result content Load url (web scrape)binary - the contents of each web page
    Result content Load url (web scrape) and extract textstring - the contents of each web page, converted from binary to human readable text

    1d. Generated Code

    def scrape_pages(spark: SparkSession, in0: DataFrame) -> DataFrame:
    from pyspark.sql.functions import expr, array, struct
    from spark_ai.webapps import WebUtils
    WebUtils().register_udfs(spark)

    return in0.withColumn("result_content", expr(f"web_scrape(loc)"))

    2. Split text data into equal chunks

    Sometimes you'd like to send text data to a foundational model or store in a vector database, but the text is too long. For this case, just split the text into "chunks" of characters.

    Overview Chunkify

    2a. Configure text splitting

    Given a text input, the Split data operation will separate the input column entries into chunks of specified size.

    Configure to Chunkify

    Select the (1) Operation type to split text into equal chunks. Specify which input (2) Column name contains the relevant content. Specify an integer chunk (3) Size relevant for your generative AI use case.

    2b. Input

    ParameterDescriptionRequired
    Column namestring - the text content which should be split into equal chunksTrue
    Sizeinteger - the size of each chunk, number of characters. Example: 1000True

    2c. Output

    ParameterDescription
    result_chunksarray(string) - an array of text strings, each string representing one chunk of the larger text content

    2d. Generated code

    def Chunkify(spark: SparkSession, web_bronze_content: DataFrame) -> DataFrame:
    from pyspark.sql.functions import expr, array, struct
    from spark_ai.files.text import FileTextUtils
    FileTextUtils().register_udfs(spark)

    return web_bronze_content.withColumn("result_chunks", expr(f"text_split_into_chunks(content, 1000)"))

    FAQ

    How does this Gem fit into the bigger picture of building a generative AI application?

    For an example set of Pipelines that uses this Gem to create a Generative AI Chatbot, see this guide. Feel free to reach out and explore your use case with us.

    Troubleshooting

    Select a chunk size according to the limitations of your vector database index.

    + + \ No newline at end of file diff --git a/Spark/gems/source-target/advanced/lookup/index.html b/Spark/gems/source-target/advanced/lookup/index.html index 01cfd50965..46e4dd0d2e 100644 --- a/Spark/gems/source-target/advanced/lookup/index.html +++ b/Spark/gems/source-target/advanced/lookup/index.html @@ -6,15 +6,14 @@ Lookup | Prophecy - - - - + + +
    -

    Lookup

    Spark Gem

    Lookups are a special kind of Gem that allow you to mark a particular DataFrame as a Broadcast DataFrame. Spark will ensure that this data is available on every computation node so that these lookups can be done without shuffling data. This is useful for looking up values in tables, hence the name.

    Lookup Gem

    Lookup UI

    NameDescription
    1Key ColumnsSpecify one or more columns to use as the lookup key in the source DataFrame
    2Value ColumnsPick which columns can be referenced wherever this Lookup is used

    Using Lookups

    Lookups can be used wherever any other Expression can be used, but usage depends on your Expression language of choice. Lookup references follow a certain pattern:

    lookup("<LOOKUP NAME>", <KEY COLUMN>).getField(<VALUE COLUMN>)

    Column-based lookups

    So, based on our above MyLookup example we'd use:

    lookup("MyLookup", col("customer_id")).getField("order_category")

    For example, let's look at a Reformat component:

    Reformat example

    Here we have a column named category that is set to the value of MyLookup(customer_id)['order_category'] in SQL Expression mode. Whatever the value of order_category is for the key found in the c_id column (compared to the source customer_id key column) will be used for the new column.

    Literal lookups

    Since any column reference can be used in Lookup expressions, you can use Lookups with static keys:

    lookup("MyLookup", lit("0000")).getField("order_category")

    In this case, the expression evaluates to the value of order_category where customer_id is 0000. This can be useful in situations when you want to have a table of predefined keys and their values available in Expressions.

    - - +

    Lookup

    Spark Gem

    Lookups are a special kind of Gem that allow you to mark a particular DataFrame as a Broadcast DataFrame. Spark will ensure that this data is available on every computation node so that these lookups can be done without shuffling data. This is useful for looking up values in tables, hence the name.

    Lookup Gem

    Lookup UI

    NameDescription
    1Key ColumnsSpecify one or more columns to use as the lookup key in the source DataFrame
    2Value ColumnsPick which columns can be referenced wherever this Lookup is used

    Using Lookups

    Lookups can be used wherever any other Expression can be used, but usage depends on your Expression language of choice. Lookup references follow a certain pattern:

    lookup("<LOOKUP NAME>", <KEY COLUMN>).getField(<VALUE COLUMN>)

    Column-based lookups

    So, based on our above MyLookup example we'd use:

    lookup("MyLookup", col("customer_id")).getField("order_category")

    For example, let's look at a Reformat component:

    Reformat example

    Here we have a column named category that is set to the value of MyLookup(customer_id)['order_category'] in SQL Expression mode. Whatever the value of order_category is for the key found in the c_id column (compared to the source customer_id key column) will be used for the new column.

    Literal lookups

    Since any column reference can be used in Lookup expressions, you can use Lookups with static keys:

    lookup("MyLookup", lit("0000")).getField("order_category")

    In this case, the expression evaluates to the value of order_category where customer_id is 0000. This can be useful in situations when you want to have a table of predefined keys and their values available in Expressions.

    + + \ No newline at end of file diff --git a/Spark/gems/source-target/advanced/synthetic-data-generator/index.html b/Spark/gems/source-target/advanced/synthetic-data-generator/index.html index c2280fe236..ad6c1dfafa 100644 --- a/Spark/gems/source-target/advanced/synthetic-data-generator/index.html +++ b/Spark/gems/source-target/advanced/synthetic-data-generator/index.html @@ -6,14 +6,13 @@ Data Generator | Prophecy - - - - + + +
    -

    Data Generator

    Spark Gem

    Generate synthetic data with this special kind of Source Gem.

    Generating mock data is crucial when building data Pipelines to simulate real-world scenarios for testing, validating, and optimizing Pipeline performance before using actual production data. It helps ensure the Pipeline handles various data formats, structures, and edge cases effectively, minimizing potential issues in a live environment.

    A wide range of synthetic data can be created using any column name and an array of data types. For example, generate browser history data as shown below.

    img

    Follow the steps below to generate your own mock data using the Data Generator Gem.

    Cluster requirements

    Create a Fabric and configure the Job Size as below, or log into an existing Spark cluster UI. Here we use Databricks as an example.

    1. Verify the Databricks Runtime uses Python version >= 3.8. +

      Data Generator

      Spark Gem

      Generate synthetic data with this special kind of Source Gem.

      Generating mock data is crucial when building data Pipelines to simulate real-world scenarios for testing, validating, and optimizing Pipeline performance before using actual production data. It helps ensure the Pipeline handles various data formats, structures, and edge cases effectively, minimizing potential issues in a live environment.

      A wide range of synthetic data can be created using any column name and an array of data types. For example, generate browser history data as shown below.

      img

      Follow the steps below to generate your own mock data using the Data Generator Gem.

      Cluster requirements

      Create a Fabric and configure the Job Size as below, or log into an existing Spark cluster UI. Here we use Databricks as an example.

      1. Verify the Databricks Runtime uses Python version >= 3.8. For example, Databricks Runtime 12.2 LTS uses Python 3.9.19. If you are using Databricks Runtime 12.2+, the Python version meets this requirement.
      2. Create a new Environment variable called "SPARK_VERSION" with value 3.3
      3. Confirm and restart the Spark cluster. requirements

      Prophecy requirements

      Open a Prophecy Project and upgrade the ProphecySparkBasicsPython Dependency to 0.2.34 or later. Connecting a Prophecy project to a Spark cluster with a different dependency version will prompt a cluster restart. Ideally this is a one-time restart, and you're ready to proceed!

      img

      Caution

      Using two Prophecy projects with the same Spark cluster will cause cluster restarts (when each project attaches to the cluster) unless the ProphecySparkBasicsPython versions match across both Projects. The same caution applies to ProphecyLibsPython versions.

      The Fix: Do yourself a favor and upgrade all your Prophecy projects to the same ProphecySparkBasicsPython and ProphecyLibsPython versions or use separate Spark clusters.

      Create the Gem

      Create a new Dataset and select the Type as Data Generator. Note we are not specifying a storage location yet; we will store the data in a separate Gem.
      img

      img

      Properties: Specify Data Structure

      What type of data do you need to generate? Specify the data structure using Random Data Providers. Prophecy offers a selection of Random Data Providers including integers, booleans, and elements from a list.

      img

      Generate column using a sequence of integers (left). Generate another column by referencing an existing catalog table (right). Randomly select elements of the foreign key from that table.
      @@ -21,7 +20,7 @@ img

      Connect the Data Generator SOURCE Gem to the Target Gem. img

      Be sure to configure the write mode for the target Gem. This is very important because the Data Generator Gem is not idempotent. There is a new random seed each time the Gem is run. img

      caution

      The Data Generator only generates the data. If you want to store the data just connect the output to a target Gem and configure the location, write properties etc. The data generated is new for each run (execution). The target write mode can be error, overwrite, append, or ignore as desired.

    - - + + \ No newline at end of file diff --git a/Spark/gems/source-target/advanced/synthetic-data-generator/providers/index.html b/Spark/gems/source-target/advanced/synthetic-data-generator/providers/index.html index c8e8b75847..fe8a99cecb 100644 --- a/Spark/gems/source-target/advanced/synthetic-data-generator/providers/index.html +++ b/Spark/gems/source-target/advanced/synthetic-data-generator/providers/index.html @@ -6,15 +6,14 @@ Providers | Prophecy - - - - + + +

    Providers

    To generate a new column of random data, select one of the providers below. There are some properties common to all providers. If you prefer, provide the same information as a JSON schema.

    Providers

    Data ProviderDescription
    Random NameGenerates random names. Select Full Name, First Name, or Last Name as the sub-types.
    Random AddressGenerates random addresses.
    Random EmailGenerates random emails.
    Random Phone NumberGenerates random phone numbers based on specified or default pattern. Example: specify the pattern for a phone number as (###) ###-####.
    Random String UUIDGenerates random UUIDs in string form.
    Random Boolean ValuesGenerates random boolean values (True/False).
    Random Integer NumbersGenerates random integers within the range from Start Value to End Value.
    Random Elements From ListGenerates random values from the list of values. Just type into the List Of Values field.
    Random DateGenerates random dates within the given range.
    Random DateTimeGenerates random datetime values within the given range.
    Random Foreign Key ValuesPicks values randomly from specified foreign key column. Select another table to act as the reference table and provide the location, e.g., catalog.database.table. Select any column from the reference table to designate as Reference Column Name.

    Common properties

    Properties common to all providers

    NameDescription
    Column NameCustom name for the output column.
    Data TypeData type of output column.
    Null Percentage (Optional)X percent of values will be populated as null in generated column based on Row Count.
    - - + + \ No newline at end of file diff --git a/Spark/gems/source-target/catalog-table/delta/index.html b/Spark/gems/source-target/catalog-table/delta/index.html index b752a31062..bc5c2ee959 100644 --- a/Spark/gems/source-target/catalog-table/delta/index.html +++ b/Spark/gems/source-target/catalog-table/delta/index.html @@ -6,15 +6,14 @@ Delta Table | Prophecy - - - - + + +

    Delta Table

    Reads and writes Delta tables that are managed by the execution environment's Metadata catalog (Metastore).

    note

    Set the property provider to Delta on the properties page.

    Source

    Source Parameters

    ParameterDescriptionRequired
    Database nameName of the databaseTrue
    Table nameName of the tableTrue
    ProviderMust be set to DeltaTrue
    Filter PredicateWhere clause to filter the tableFalse
    Read TimestampTime travel to a specific timestampFalse
    Read VersionTime travel to a specific version of the tableFalse
    note

    For time travel on Delta tables:

    1. Only Read Timestamp OR Read Version can be selected, not both.
    2. Timestamp should be between the first commit timestamp and the latest commit timestamp in the table.
    3. Version needs to be an integer with value between min and max version of table.

    By default most recent version of each row is fetched if no time travel option is used.

    info

    To read more about Delta time travel and its use cases click here.

    Source Example

    Generated Code

    Without filter predicate

    def Source(spark: SparkSession) -> DataFrame:
    return spark.read.table(f"test_db.test_table")

    With filter predicate

    def Source(spark: SparkSession) -> DataFrame:
    return spark.sql("SELECT * FROM test_db.test_table WHERE col > 10")

    Target

    Target Parameters

    ParameterDescriptionRequired
    Database nameName of the databaseTrue
    Table nameName of the tableTrue
    Custom file pathUse custom file path to store underlying files.False
    ProviderMust be set to DeltaTrue
    Write ModeHow to handle existing data. See this table for a list of available options. (Default is set to error.)True
    Use insert intoFlag to use insertInto method to write instead of saveFalse
    Optimize writeIf true, it optimizes Spark partition sizes based on the actual data.False
    Overwrite table schemaIf true, overwrites the schema of the Delta table.False
    Merge schemaIf true, then any columns that are present in the DataFrame but not in the target table are automatically added on to the end of the schema as part of a write transaction.False
    Partition ColumnsList of columns to partition the Delta table byFalse
    Overwrite partition predicateIf specified, then it selectively overwrites only the data that satisfies the given where clause expression.False
    note

    Among these write modes overwrite, append, ignore, and error work the same way as with other native Spark-supported formats such as Parquet.

    To read more about using merge write mode click here.

    To read more about using SCD2 merge write mode click here.

    Target Example

    Generated Code

    def Target(spark: SparkSession, in0: DataFrame):
    in0.write\
    .format("delta")\
    .mode("overwrite")\
    .saveAsTable("test_db.test_table")

    - - + + \ No newline at end of file diff --git a/Spark/gems/source-target/catalog-table/hive/index.html b/Spark/gems/source-target/catalog-table/hive/index.html index 7e3c398a66..d88ffa05a4 100644 --- a/Spark/gems/source-target/catalog-table/hive/index.html +++ b/Spark/gems/source-target/catalog-table/hive/index.html @@ -6,15 +6,14 @@ Hive Table | Prophecy - - - - + + +

    Hive Table

    Reads and writes data Hive tables that are managed by the execution environment's Metadata catalog (Metastore).

    note

    Choose the provider as Hive on properties page.

    Source

    Source Parameters

    ParameterDescriptionRequiredDefault
    Database nameName of the databaseTrue
    Table nameName of the tableTrue
    ProviderMust be set to hiveTrue
    Filter PredicateWhere clause to filter the tableFalse(all records)

    Source Example

    Generated Code

    Without filter predicate

    def Source(spark: SparkSession) -> DataFrame:
    return spark.read.table(f"test_db.test_table")

    With filter predicate

    def Source(spark: SparkSession) -> DataFrame:
    return spark.sql("SELECT * FROM test_db.test_table WHERE col > 10")

    Target

    Target Parameters

    ParameterDescriptionRequiredDefault
    Database nameName of the databaseTrue
    Table nameName of the tableTrue
    Custom file pathUse custom file path to store underlying files.False
    ProviderMust be set to hiveTrue
    Write ModeHow to handle existing data. See the this table for a list of available options.Trueerror
    File FormatFile format to use when saving data. See this table for supported formats.Trueparquet
    Partition ColumnsColumns to partition byFalse(empty)
    Use insert intoIf true, use .insertInto instead of .save when generating code.Falsefalse

    Supported Write Modes

    Write ModeDescription
    overwriteIf data already exists, overwrite with the contents of the DataFrame.
    appendIf data already exists, append the contents of the DataFrame.
    ignoreIf data already exists, do nothing with the contents of the DataFrame. This is similar to a CREATE TABLE IF NOT EXISTS in SQL.
    errorIf data already exists, throw an exception.

    Supported File formats

    1. Parquet
    2. Text file
    3. Avro
    4. ORC
    5. RC file
    6. Sequence file

    Target Example

    Generated Code

    def Target(spark: SparkSession, in0: DataFrame):
    in0.write\
    .format("hive")\
    .option("fileFormat", "parquet")\
    .mode("overwrite")\
    .saveAsTable("test_db.test_table")

    - - + + \ No newline at end of file diff --git a/Spark/gems/source-target/catalog-table/index.html b/Spark/gems/source-target/catalog-table/index.html index 8f9bec2af5..f426edb9df 100644 --- a/Spark/gems/source-target/catalog-table/index.html +++ b/Spark/gems/source-target/catalog-table/index.html @@ -6,15 +6,14 @@ Catalog Table | Prophecy - - - - + + + - - + + \ No newline at end of file diff --git a/Spark/gems/source-target/file/avro/index.html b/Spark/gems/source-target/file/avro/index.html index 0834f1b841..9f4a23d2e3 100644 --- a/Spark/gems/source-target/file/avro/index.html +++ b/Spark/gems/source-target/file/avro/index.html @@ -6,17 +6,16 @@ Avro | Prophecy - - - - + + +

    Avro

    Avro format is a row-based storage format for Hadoop, which is widely used as a serialization platform. Avro format stores the schema in JSON format, making it easy to read and interpret by any program. The data itself is stored in a binary format making it compact and efficient.

    This Gem allows you to read from or write to an Avro file.

    Source

    Source Parameters

    ParameterDescriptionRequiredDefault
    LocationFile path where avro files are presentTrueNone
    SchemaSchema to be applied on the loaded data. Can be defined/edited as JSON or inferred using Infer Schema button.TrueNone
    Recursive File LookupThis is used to recursively load files and it disables partition inferring. If data source explicitly specifies the partitionSpec when recursiveFileLookup is true, an exception will be thrown.FalseFalse
    Path Global FilterAn optional glob pattern to only include files with paths matching the pattern. The syntax follows GlobFilter. It does not change the behavior of partition discovery.FalseNone
    Modified BeforeAn optional timestamp to only include files with modification times occurring before the specified Time. The provided timestamp must be in the following form: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00)FalseNone
    Modified AfterAn optional timestamp to only include files with modification times occurring after the specified Time. The provided timestamp must be in the following form: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00)FalseNone
    Avro SchemaOptional schema in JSON format. See here for more details.FalseNone
    ignoreExtensionDEPRECATED. Enable to load files without the .avro extension. See caveats here.FalseTrue

    Schema Evolution

    When reading Avro, the Avro Schema option can be set to a newer evolved schema which is compatible but different from the schema written to storage. The resulting DataFrame will follow the newer, evolved schema. For example, if we set an evolved schema containing one additional column with a default value, the resulting DataFrame will contain the new column too.

    Ignoring the File Extension

    If the ignoreExtension option is enabled, all files (with and without .avro extension) are loaded. The option has been deprecated, and it will be removed in the future releases. Please pathGlobFilter for filtering file names.

    Example

    Schema used in example above

    Avro schema used

    Generated Code

    def read_avro(spark: SparkSession) -> DataFrame:
    return spark.read\
    .format("avro")\
    .option("ignoreExtension", True)\
    .option(
    "avroSchema",
    "{\"type\":\"record\",\"name\":\"Person\",\"fields\":[{\"name\":\"firstname\",\"type\":\"string\"},{\"name\":\"middlename\",\"type\":\"string\"},{\"name\":\"lastname\",\"type\":\"string\"},{\"name\":\"dob_year\",\"type\":\"int\"},{\"name\":\"dob_month\",\"type\":\"int\"},{\"name\":\"gender\",\"type\":\"string\"},{\"name\":\"salary\",\"type\":\"int\"}]}"
    )\
    .load("dbfs:/FileStore/Users/abhinav/avro/test.avro")


    Target

    Target Parameters

    Write data as avro files at the specified path.

    ParameterDescriptionRequiredDefault
    LocationLocaiton to write the Avro files to presentTrueNone
    Avro SchemaOptional schema provided by a user in JSON format. This option can be set if the expected output Avro schema doesn't match the schema converted by Spark. For example, the expected schema of one column is of enum type, instead of string type in the default converted schema.FalseNone
    Record NameTop level record name in write result, which is required in Avro spec.FalsetopLevelRecord
    Record NamespaceRecord namespace in write result.False""
    CompressionCompression codec used when writing.
    Currently supported codecs are uncompressed, snappy, deflate, bzip2, xz and zstandard. Defaults to whatever spark.sql.avro.compression.codec is set to.
    Falsesnappy
    Write ModeHow to handle existing data. See this table for a list of available options.Trueerror
    Partition ColumnsList of columns to partition the avro files byFalseNone

    Supported Write Modes

    Write ModeDescription
    overwriteIf data already exists, overwrite with the contents of the DataFrame.
    appendIf data already exists, append the contents of the DataFrame.
    ignoreIf data already exists, do nothing with the contents of the DataFrame. This is similar to a CREATE TABLE IF NOT EXISTS in SQL.
    errorIf data already exists, throw an exception.

    Example

    Generated Code

    def write_avro(spark: SparkSession, in0: DataFrame):
    in0.write\
    .format("avro")\
    .mode("overwrite")\
    .partitionBy("dob_year","dob_month")\
    .save("dbfs:/data/test_output.avro")
    info

    To know more about tweaking Avro related properties in Spark config click here.

    - - + + \ No newline at end of file diff --git a/Spark/gems/source-target/file/csv/index.html b/Spark/gems/source-target/file/csv/index.html index fced8d88d4..b301faff63 100644 --- a/Spark/gems/source-target/file/csv/index.html +++ b/Spark/gems/source-target/file/csv/index.html @@ -6,15 +6,14 @@ CSV | Prophecy - - - - + + +

    CSV

    Allows you to read or write delimited files such as CSV (Comma-separated Values) or TSV (Tab-separated Values).

    Source

    Source Parameters

    CSV Source supports all the available Spark read options for CSV.

    The below list contains the additional parameters to read a CSV file:

    ParameterDescriptionRequired
    Dataset NameName of the DatasetTrue
    LocationLocation of the file(s) to be loaded
    E.g.: dbfs:/data/test.csv
    True
    SchemaSchema to applied on the loaded data. Can be defined/edited as JSON or inferred using Infer Schema button.True

    Example

    Step 1 - Create Source Component

    Generated Code

    def load_csv(spark: SparkSession) -> DataFrame:
    return spark.read\
    .schema(
    StructType([
    StructField("order_id", IntegerType(), True),
    StructField("customer_id", IntegerType(), True),
    StructField("order_status", StringType(), True),
    StructField("order_category", StringType(), True),
    StructField("order_date", DateType(), True),
    StructField("amount", DoubleType(), True)
    ])
    )\
    .option("header", True)\
    .option("quote", "\"")\
    .option("sep", ",")\
    .csv("dbfs:/Prophecy/anshuman@simpledatalabs.com/OrdersDatasetInput.csv")


    Target

    Target Parameters

    CSV Target supports all the available Spark write options for CSV.

    The below list contains the additional parameters to write a CSV file:

    ParameterDescriptionRequired
    Dataset NameName of the DatasetTrue
    LocationLocation of the file(s) to be loaded
    E.g.: dbfs:/data/output.csv
    True
    Write ModeHow to handle existing data. See this table for a list of available options.False

    Supported Write Modes

    Write ModeDescription
    overwriteIf data already exists, overwrite with the contents of the DataFrame.
    appendIf data already exists, append the contents of the DataFrame.
    ignoreIf data already exists, do nothing with the contents of the DataFrame. This is similar to a CREATE TABLE IF NOT EXISTS in SQL.
    errorIf data already exists, throw an exception.

    Example

    Step 1 - Create Target Component

    Generated Code

    def write_as_csv(spark: SparkSession, in0: DataFrame):
    in0.write\
    .option("header", True)\
    .option("sep", ",")\
    .mode("error")\
    .option("separator", ",")\
    .option("header", True)\
    .csv("dbfs:/Prophecy/anshuman@simpledatalabs.com/output.csv")

    Producing a single output file

    Because of Spark's distributed nature, output files are written as multiple separate partition files. If you need a single output file for some reason (such as reporting or exporting to an external system), use a Repartition Gem in Coalesce mode with 1 output partition:

    Coalesce example

    caution

    Note: This is not recommended for extremely large data sets as it may overwhelm the worker node writing the file.

    - - + + \ No newline at end of file diff --git a/Spark/gems/source-target/file/delta/index.html b/Spark/gems/source-target/file/delta/index.html index 33e11f7547..145ccab90b 100644 --- a/Spark/gems/source-target/file/delta/index.html +++ b/Spark/gems/source-target/file/delta/index.html @@ -6,16 +6,15 @@ Delta | Prophecy - - - - + + +

    Delta

    Reads and writes Delta tables, including Delta Merge operations and Time travel.

    Source

    Source Parameters

    ParameterDescriptionRequired
    LocationFile path for the Delta tableTrue
    Read TimestampTime travel to a specific timestampFalse
    Read VersionTime travel to a specific version of tableFalse
    note

    For time travel on Delta tables:

    1. Only Read Timestamp OR Read Version can be selected, not both.
    2. Timestamp should be between the first commit timestamp and the latest commit timestamp in the table.
    3. Version needs to be an integer. Its value has to be between min and max version of table.

    By default most recent version of each row is fetched if no time travel option is used.

    info

    To read more about Delta time travel and its use cases click here.

    Example

    Delta source example

    Generated Code

    Without time travel

    def ReadDelta(spark: SparkSession) -> DataFrame:
    return spark.read.format("delta").load("dbfs:/FileStore/data_engg/delta_demo/silver/orders")

    Timestamp-based time travel

    def ReadDelta(spark: SparkSession) -> DataFrame:
    return spark.read.format("delta").option("timestampAsOf", "2022-05-05")\
    .load("dbfs:/FileStore/data_engg/delta_demo/silver/orders")

    Version-based time travel

    def readDelta(spark: SparkSession) -> DataFrame:
    return spark.read.format("delta").option("versionAsOf", "0")\
    .load("dbfs:/FileStore/data_engg/delta_demo/silver/orders")


    Target

    Target Parameters

    ParameterDescriptionRequired
    LocationFile path to write the Delta table toTrue
    Write modeWrite mode for DataFrameTrue
    Optimise writeIf true, it optimizes Spark partition sizes based on the actual dataFalse
    Overwrite table schemaIf true, overwrites the schema of the Delta table with the schema of the incoming DataFrameFalse
    Merge schemaIf true, then any columns that are present in the DataFrame but not in the target table are automatically added on to the end of the schema as part of a write transactionFalse
    Partition ColumnsList of columns to partition the Delta table byFalse
    Overwrite partition predicateIf specified, then it selectively overwrites only the data that satisfies the given where clause expression.False

    Supported Write Modes

    Write ModeDescription
    overwriteIf data already exists, overwrite with the contents of the DataFrame
    appendIf data already exists, append the contents of the DataFrame
    ignoreIf data already exists, do nothing with the contents of the DataFrame. This is similar to a CREATE TABLE IF NOT EXISTS in SQL.
    errorIf data already exists, throw an exception.
    mergeInsert, delete and update data using the Delta merge command.
    SCD2 mergeIt is a Delta merge operation that stores and manages both current and historical data over time.

    Among these write modes overwrite, append, ignore and error works the same way as in case of parquet file writes. Merge will be explained with several examples in the following sections.

    Target Example

    Delta Target Example

    Generated Code

    def writeDelta(spark: SparkSession, in0: DataFrame):
    return in0.write\
    .format("delta")\
    .option("optimizeWrite", True)\
    .option("mergeSchema", True)\
    .option("replaceWhere", "order_dt > '2022-01-01'")\
    .option("overwriteSchema", True)\
    .mode("overwrite")\
    .partitionBy("order_dt")\
    .save("dbfs:/FileStore/data_engg/delta_demo/silver/orders")

    Delta MERGE

    You can upsert data from a source DataFrame into a target Delta table by using the MERGE operation. Delta MERGE supports Inserts, Updates, and Deletes in a variety of use cases, and Delta is particularly suited to examine data with individual records that slowly change over time. Here we consider the most common types of slowly changing dimension (SCD) cases: SCD1, SCD2, and SCD3. Records are modified in one of the following ways: history is not retained (SCD1), history is retained at the row level (SCD2), or history is retained at the column level (SCD3).

    SCD1

    Let's take the simplest case to illustrate a MERGE condition.

    Parameters

    ParameterDescriptionRequired
    Source aliasAlias to use for the source DataFrameTrue
    Target aliasAlias to use for existing target Delta tableTrue
    Merge ConditionCondition to merge data from source DataFrame to target table, which would be used to perform update, delete, or insert actions as specified.True
    When Matched Update ActionUpdate the row from Source that already exists in Target (based on Merge Condition)False
    When Matched Update ConditionOptional additional condition for updating row. If specified then it must evaluate to true for the row to be updated.False
    When Matched Update ExpressionsOptional expressions for setting the values of columns that need to be updated.False
    When Matched Delete ActionDelete rows if Merge Condition (and the optional additional condition) evaluates to trueFalse
    When Matched Delete ConditionOptional additional condition for deleting row. If a condition is specified then it must evaluate to true for the row to be deleted.False
    When Not Matched ActionThe action to perform if the row from Source is not present in Target (based on Merge Condition)False
    When Not Matched ConditionOptional condition for inserting row. If a condition is specified then it must evaluate to true for the row to be updated.False
    When Not Matched ExpressionsOptional expressions for setting the values of columns that need to be updated.False
    note
    1. At least one action out of update, delete or insert needs to be set.
    2. Delete removes the data from the latest version of the Delta table but does not remove it from the physical storage until the old versions are explicitly vacuumed. See vacuum for details.
    3. A merge operation can fail if multiple rows of the source DataFrame match and the merge attempts to update the same rows of the target Delta table. Deduplicate gem can be placed before target if duplicate rows at source are expected.

    When possible, provide predicates on the partition columns for a partitioned Delta table as such predicates can significantly speed up the operations.

    Example

    Let's assume our initial customers table is as below:

    Initial customer table

    And we have the below updates coming into customers table:

    Customer table updates

    Our output and configurations for SCD1 merge will look like below:

    Generated Code

    def writeDeltaMerge(spark: SparkSession, in0: DataFrame):
    from delta.tables import DeltaTable, DeltaMergeBuilder

    if DeltaTable.isDeltaTable(spark, "dbfs:/FileStore/data_engg/delta_demo/silver/customers_scd1"):
    DeltaTable\
    .forPath(spark, "dbfs:/FileStore/data_engg/delta_demo/silver/customers_scd1")\
    .alias("target")\
    .merge(in0.alias("source"), (col("source.customer_id") == col("target.customer_id")))\
    .whenMatchedUpdateAll()\
    .whenNotMatchedInsertAll()\
    .execute()
    else:
    in0.write\
    .format("delta")\
    .mode("overwrite")\
    .save("dbfs:/FileStore/data_engg/delta_demo/silver/customers_scd1")

    SCD2

    Let's use the Delta log to capture the historical customer_zip_code at the row-level.

    Parameters

    ParameterDescriptionRequired
    Key columnsList of key columns which would remain constantTrue
    Historic columnsList of columns which would change over time for which history needs to be maintainedTrue
    From time columnTime from which a particular row became validTrue
    To time columnTime till which a particular row was validTrue
    Min/old-value flagColumn placeholder to store the flag as true for the first entry of a particular keyTrue
    Max/latest flagColumn placeholder to store the flag as true for the last entry of a particular keyTrue
    Flag valuesOption to choose the min/max flag to be true/false or 0/1True

    Example

    Using the same customer tables as in our merge example above, output and configurations for SCD2 merge will look like below:

    Generated Code

    def writeDeltaSCD2(spark: SparkSession, in0: DataFrame):
    from delta.tables import DeltaTable, DeltaMergeBuilder

    if DeltaTable.isDeltaTable(spark, "dbfs:/FileStore/data_engg/delta_demo/silver/customers_scd2"):
    existingTable = DeltaTable.forPath(
    spark, "dbfs:/FileStore/data_engg/delta_demo/silver/customers_scd2"
    )
    updatesDF = in0.withColumn("minFlag", lit("true")).withColumn(
    "maxFlag", lit("true")
    )
    existingDF = existingTable.toDF()
    updateColumns = updatesDF.columns
    stagedUpdatesDF = (
    updatesDF.join(existingDF, ["customer_id"])
    .where(
    (
    (existingDF["maxFlag"] == lit("true"))
    & (
    (
    (
    existingDF["customer_zip_code"]
    != updatesDF["customer_zip_code"]
    )
    | (
    existingDF["customer_city"]
    != updatesDF["customer_city"]
    )
    )
    | (existingDF["customer_state"] != updatesDF["customer_state"])
    )
    )
    )
    .select(*[updatesDF[val] for val in updateColumns])
    .withColumn("minFlag", lit("false"))
    .withColumn("mergeKey", lit(None))
    .union(updatesDF.withColumn("mergeKey", concat("customer_id")))
    )
    existingTable.alias("existingTable").merge(
    stagedUpdatesDF.alias("staged_updates"),
    concat(existingDF["customer_id"]) == stagedUpdatesDF["mergeKey"],
    ).whenMatchedUpdate(
    condition=(
    (existingDF["maxFlag"] == lit("true"))
    & (
    (
    (
    existingDF["customer_zip_code"]
    != stagedUpdatesDF["customer_zip_code"]
    )
    | (
    existingDF["customer_city"]
    != stagedUpdatesDF["customer_city"]
    )
    )
    | (
    existingDF["customer_state"]
    != stagedUpdatesDF["customer_state"]
    )
    )
    ),
    set={"maxFlag": "false", "end_date": "staged_updates.updated_dt"},
    )\
    .whenNotMatchedInsertAll()\
    .execute()
    else:
    in0.write\
    .format("delta")\
    .mode("overwrite")\
    .save("dbfs:/FileStore/data_engg/delta_demo/silver/customers_scd2")


    SCD3

    Using the same customer tables as in our merge example above, output and configurations for SCD3 merge will look like below. Let's track change for customer_zip_code by adding a column to show the previous value.


    info

    To check out our blogpost on making data lakehouse easier using Delta with Prophecy click here.

    - - + + \ No newline at end of file diff --git a/Spark/gems/source-target/file/fixed-format/index.html b/Spark/gems/source-target/file/fixed-format/index.html index 372fc433e2..b830d6f051 100644 --- a/Spark/gems/source-target/file/fixed-format/index.html +++ b/Spark/gems/source-target/file/fixed-format/index.html @@ -6,15 +6,14 @@ Fixed Format | Prophecy - - - - + + +

    Fixed Format

    Enterprise Only

    Please contact us to learn more about the Enterprise offering.

    Read and write fixed format files with an expected schema.

    Source

    Reads data from fixed format files.

    Source Parameters

    ParameterDescriptionRequired
    LocationFile path where fixed format files are presentTrue
    Fixed Format SchemaSchema string for the fixed format file, supports either EBCDIC or ASCII formatsTrue

    Example

    Delta source example

    :::

    Generated Code


    object ReadEbcdic {

    def apply(spark: SparkSession): DataFrame = {
    import _root_.io.prophecy.abinitio.dml.DMLSchema.parse
    import _root_.io.prophecy.libs.{FFSchemaRecord, _}
    import play.api.libs.json.Json
    import _root_.io.prophecy.libs.FixedFormatSchemaImplicits._
    spark.read
    .option(
    "schema",
    Some("""ebcdic record
    string(6) service ;
    string(2) person ;
    decimal(2, 0) data ;
    string(1) format ;
    string(1) working ;
    end""").map(s => parse(s).asInstanceOf[FFSchemaRecord])
    .map(s => Json.stringify(Json.toJson(s)))
    .getOrElse("")
    )
    .format("io.prophecy.libs.FixedFileFormat")
    .load("/FileStore/tables/fixed_format/test/write_ebcdic")
    .cache()
    }

    }

    Target

    Writes data in fixed file format according to the specified schema string.

    Target Parameters

    ParameterDescriptionRequired
    LocationFile path where fixed format files will be writtenTrue
    Write modeHow to handle existing data. See this table for a list of available options.False
    Fixed Format SchemaSchema string for the fixed format file, supports either EBCDIC or ASCII formatsTrue

    Supported Write Modes

    Write ModeDescription
    overwriteIf data already exists, overwrite with the contents of the DataFrame
    appendIf data already exists, append the contents of the DataFrame
    ignoreIf data already exists, do nothing with the contents of the DataFrame. This is similar to a CREATE TABLE IF NOT EXISTS in SQL.
    errorIf data already exists, throw an exception.

    Example

    Delta Target Example

    Generated Code

    object write_ebcdic {

    def apply(spark: SparkSession, in: DataFrame): Unit = {
    import _root_.io.prophecy.abinitio.dml.DMLSchema.parse
    import _root_.io.prophecy.libs.{FFSchemaRecord, _}
    import play.api.libs.json.Json
    import _root_.io.prophecy.libs.FixedFormatSchemaImplicits._
    val schema = Some("""ebcdic record
    string(6) service ;
    string(2) person ;
    decimal(2, 0) data ;
    string(1) format ;
    string(1) working ;
    end""").map(s => parse(s).asInstanceOf[FFSchemaRecord])
    var writer = in.write.format("io.prophecy.libs.FixedFileFormat")
    writer = writer.mode("overwrite")
    schema
    .map(s => Json.stringify(Json.toJson(s)))
    .foreach(schema => writer = writer.option("schema", schema))
    writer.save("/FileStore/tables/fixed_format/test/write_ebcdic_alt")
    }

    }
    - - + + \ No newline at end of file diff --git a/Spark/gems/source-target/file/iceberg/index.html b/Spark/gems/source-target/file/iceberg/index.html index e95073ada7..ea005f3137 100644 --- a/Spark/gems/source-target/file/iceberg/index.html +++ b/Spark/gems/source-target/file/iceberg/index.html @@ -6,15 +6,14 @@ Iceberg | Prophecy - - - - + + +

    Iceberg

    Reads and writes Iceberg tables, including Iceberg Merge operations and Time travel.

    Required Settings

    Before you can use Iceberg source Gems, you must configure some required settings at the environment, initialization, and runtime stages.

    Environment Setting

    You must configure a required Spark JAR dependency in your Fabric environment.

    • JAR dependency

      • Package: https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-spark-runtime-3.3_2.12/1.5.0/iceberg-spark-runtime-3.3_2.12-1.5.0.jar
      note

      The JAR dependency is available on your compute platform wherever Spark is installed, such as on your Databricks cluster, EMR, or Dataproc.

    Initialization Settings

    You must configure the following Spark session property during the Spark session initialization.

    • Spark session property:

      • Key - spark.sql.extensions
      • Value - org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions
      note

      This can be done during cluster bootstrap. For example, you can set --properties "spark:spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtentions" \ with your create clusters command.

    Runtime Settings

    You must configure the following Spark conf properties, which can be done during the Spark session runtime.

    These properties allow you to configure multiple catalogs and your respective metastores for Iceberg tables and data management. You can configure Hadoop and Hive as catalogs.

    • Spark conf properties

      • Configure Hadoop as catalog

        • spark.sql.catalog.<catalog_name>=org.apache.iceberg.spark.SparkCatalog
        • spark.sql.catalog.<catalog_name>.type=hadoop
        • spark.sql.catalog.<catalog_name>.warehouse=gs://<bucket>/<folder_1>/<folder_1>/
      • Configure Hive as catalog

        • spark.sql.catalog.<catalog_name>=org.apache.iceberg.spark.SparkCatalog
        • spark.sql.catalog.<catalog_name>.type=hive
        • spark.sql.catalog.<catalog_name>.warehouse=gs://<bucket>/<folder_1>/<folder_1>/
        • spark.sql.catalog.<catalog_name>.uri=thrift://10.91.64.30:9083
      tip

      You can set the default catalog by using spark.default.catalog=<catalog_name>.

    To configure the Spark conf properties, follow these steps:

    1. Click ... at the top of the Prophecy canvas, and then click Pipeline Settings under Manage.

      Open Pipeline Settings

    2. On the Spark dialog, under Spark Configuration, add the Spark conf properties.

      Spark Pipeline Settings

    Source

    Source Parameters

    ParameterDescriptionRequired
    Catalog NameAny configured Hadoop/Hive catalog nameTrue (If any default catalog is not configured in Spark runtime properties.)
    Schema Name (Database Name)Name of the databaseTrue
    Table NameName of the tableTrue
    Read TimestampTime travel to a specific timestamp (value should be in milliseconds)False
    Read SnapshotTime travel to a specific version of the table (value should be a snapshot ID)False
    note

    For time travel on Iceberg tables:

    1. Only Read Timestamp OR Read Snapshot can be selected, not both.
    2. Timestamp should be between the first commit timestamp and the latest commit timestamp in the table.
    3. Snapshot needs to be a snapshot ID.

    By default most recent version of each row is fetched if no time travel option is used.

    info

    To read more about Iceberg time travel and its use cases, see the Apache Iceberg docs.

    Example

    Generated Code

    def iceberg_read(spark: SparkSession) -> DataFrame:
    return spark.read.format("iceberg").load("`hadoop_catalog_1`.`prophecy_doc_demo`.`employees_test`")

    Target

    Target Parameters

    ParameterDescriptionRequired
    Catalog TypeFile path to write the Iceberg table toTrue
    Catalog NameAny configured Hadoop/Hive catalog nameTrue (If any default catalog is not configured in Spark runtime properties.)
    Schema Name (Database Name)Name of the databaseTrue
    Table NameName of the tableTrue
    File LocationExternal file path to store data (Only applicable if Catalog type is Hive.)False
    Partition ColumnsList of columns to partition the Iceberg table by (Provide it during createOrReplace write mode to leverage overwritePartitions write mode in future.)False
    Merge schemaIf true, then any columns that are present in the DataFrame but not in the target table are automatically added on to the end of the schema as part of a write transaction.False

    Supported Write Modes

    Write ModeDescription
    overwriteIf data already exists, overwrite with the contents of the DataFrame
    appendIf data already exists, append the contents of the DataFrame
    ignoreIf data already exists, do nothing with the contents of the DataFrame. This is similar to a CREATE TABLE IF NOT EXISTS in SQL.
    errorIf data already exists, throw an exception.

    Among these write modes overwrite and append works the same way as in case of parquet file writes.

    Target Example

    Generated Code

    def iceberg_write(spark: SparkSession, in0: DataFrame):
    df1 = in0.writeTo("`hadoop_catalog_1`.`prophecy_doc_demo`.`employees_test`")
    df2 = df1.using("iceberg")
    df3 = df2.partitionedBy("Department")
    df4 = df3.tableProperty("write.spark.accept-any-schema", "true")
    df4.createOrReplace()
    - - + + \ No newline at end of file diff --git a/Spark/gems/source-target/file/index.html b/Spark/gems/source-target/file/index.html index 919c6f3a09..0acf7a2b90 100644 --- a/Spark/gems/source-target/file/index.html +++ b/Spark/gems/source-target/file/index.html @@ -6,15 +6,14 @@ File | Prophecy - - - - + + + - - + + \ No newline at end of file diff --git a/Spark/gems/source-target/file/json/index.html b/Spark/gems/source-target/file/json/index.html index 91372c645a..109316560e 100644 --- a/Spark/gems/source-target/file/json/index.html +++ b/Spark/gems/source-target/file/json/index.html @@ -6,15 +6,14 @@ JSON | Prophecy - - - - + + +

    JSON

    Read and write JSON formatted files

    Source

    Source Parameters

    JSON Source supports all the available Spark read options for JSON.

    The below list contains the additional parameters to read a JSON file:

    ParameterDescriptionRequired
    Dataset NameName of the DatasetTrue
    LocationLocation of the file(s) to be loaded
    E.g.: dbfs:/data/test.json
    True
    SchemaSchema to applied on the loaded data. Can be defined/edited as JSON or inferred using Infer Schema button.True

    Example

    Generated Code

    def ReadDelta(spark: SparkSession) -> DataFrame:
    return spark.read.format("json").load("dbfs:/FileStore/data/example.json")
    object ReadJson {

    def apply(spark: SparkSession): DataFrame =
    spark.read
    .format("json")
    .load("dbfs:/FileStore/data/example.json")

    }

    Target

    Target Parameters

    JSON Target supports all the available Spark write options for JSON.

    The below list contains the additional parameters to write a JSON file:

    ParameterDescriptionRequired
    Dataset NameName of the DatasetTrue
    LocationLocation of the file(s) to be loaded
    E.g.: dbfs:/data/output.json
    True

    Example

    Generated Code

    def write_json(spark: SparkSession, in0: DataFrame):
    in0.write\
    .format("json")\
    .mode("overwrite")\
    .save("dbfs:/data/test_output.json")
    object write_json {
    def apply(spark: SparkSession, in: DataFrame): Unit =
    in.write
    .format("json")
    .mode("overwrite")
    .save("dbfs:/data/test_output.json")
    }

    Producing a single output file

    Because of Spark's distributed nature, output files are written as multiple separate partition files. If you need a single output file for some reason (such as reporting or exporting to an external system), use a Repartition Gem in Coalesce mode with 1 output partition:


    caution

    Note: This is not recommended for extremely large data sets as it may overwhelm the worker node writing the file.

    - - + + \ No newline at end of file diff --git a/Spark/gems/source-target/file/kafka/index.html b/Spark/gems/source-target/file/kafka/index.html index 3def4efe97..7833e91500 100644 --- a/Spark/gems/source-target/file/kafka/index.html +++ b/Spark/gems/source-target/file/kafka/index.html @@ -6,16 +6,15 @@ Kafka | Prophecy - - - - + + +

    Kafka

    Apache Kafka is an open-source distributed event streaming platform. Supporting a number of streaming paradigms it's used by thousands of companies and organizations in scenarios including Data Ingestion, Analytics and more.

    This source currently connects with Kafka Brokers in Batch mode.

    Source

    Reads data from Kafka stream in batch mode. Data is read only incrementally from the last offset stored in the specified Metadata table. If the Metadata table is not present, then data will be read from the earliest offset.

    Source Parameters

    ParameterDescriptionRequired
    Broker ListComma separated list of Kafka brokersTrue
    Group IdKafka consumer group IDTrue
    Session TimeoutSession timeout for Kafka. (Default value set to 6000s)False
    Security ProtocolSecurity protocol for Kafka (Default value set to SASL_SSL)True
    SASL MechanismDefault SASL Mechanism for SASL_SSL (Default value set to SCRAM-SHA-256)True
    Credential TypeCredential Type provider (Databricks Secrets or Username/Password)True
    Credential ScopeScope to use for Databricks secretsTrue
    Kafka TopicComma separated list of Kafka topicsTrue
    Metadata TableTable name which would be used to store offsets for each topic, partitionTrue

    Example

    Example usage of Filter

    Generated Code

    def KafkaSource(spark: SparkSession) -> DataFrame:
    from delta.tables import DeltaTable
    import json
    from pyspark.dbutils import DBUtils

    if spark.catalog._jcatalog.tableExists(f"metadata.kafka_offsets"):
    offset_dict = {}

    for row in DeltaTable.forName(spark, f"metadata.kafka_offsets").toDF().collect():
    if row["topic"] in offset_dict.keys():
    offset_dict[row["topic"]].update({row["partition"] : row["max_offset"] + 1})
    else:
    offset_dict[row["topic"]] = {row["partition"] : row["max_offset"] + 1}

    return (spark.read\
    .format("kafka")\
    .options(
    **{
    "kafka.sasl.jaas.config": (
    f"kafkashaded.org.apache.kafka.common.security.scram.ScramLoginModule"
    + f' required username="{DBUtils(spark).secrets.get(scope = "test", key = "username")}" password="{DBUtils(spark).secrets.get(scope = "test", key = "password")}";'
    ),
    "kafka.sasl.mechanism": "SCRAM-SHA-256",
    "kafka.security.protocol": "SASL_SSL",
    "kafka.bootstrap.servers": "broker1.aws.com:9094,broker2.aws.com:9094",
    "kafka.session.timeout.ms": "6000",
    "group.id": "group_id_1",
    "subscribe": "my_first_topic,my_second_topic",
    "startingOffsets": json.dumps(offset_dict),
    }
    )\
    .load()\
    .withColumn("value", col("value").cast("string"))\
    .withColumn("key", col("key").cast("string")))
    else:
    return (spark.read\
    .format("kafka")\
    .options(
    **{
    "kafka.sasl.jaas.config": (
    f"kafkashaded.org.apache.kafka.common.security.scram.ScramLoginModule"
    + f' required username="{DBUtils(spark).secrets.get(scope = "test", key = "username")}" password="{DBUtils(spark).secrets.get(scope = "test", key = "password")}";'
    ),
    "kafka.sasl.mechanism": "SCRAM-SHA-256",
    "kafka.security.protocol": "SASL_SSL",
    "kafka.bootstrap.servers": "broker1.aws.com:9094,broker2.aws.com:9094",
    "kafka.session.timeout.ms": "6000",
    "group.id": "group_id_1",
    "subscribe": "my_first_topic,my_second_topic"
    }
    )\
    .load()\
    .withColumn("value", col("value").cast("string"))\
    .withColumn("key", col("key").cast("string")))


    Target

    Writes each row from the Dataframe to Kafka topic(s) as JSON messages.

    Target Parameters

    ParameterDescriptionRequired
    Broker ListComma separated list of Kafka brokersTrue
    Security ProtocolSecurity protocol for Kafka (Default value set to SASL_SSL)True
    SASL MechanismDefault SASL Mechanism for SASL_SSL (Default value set to SCRAM-SHA-256)True
    Credential TypeCredential Type provider (Databricks Secrets or Username/Password)True
    Credential ScopeScope to use for Databricks secretsTrue
    Kafka TopicComma separated list of Kafka topicsTrue

    Example

    Example usage of Filter

    Generated Code

    def KafkaTarget(spark: SparkSession, in0: DataFrame):
    df1 = in0.select(to_json(struct("*")).alias("value"))
    df2 = df1.selectExpr("CAST(value AS STRING)")
    df2.write\
    .format("kafka")\
    .options(
    **{
    "kafka.sasl.jaas.config": (
    f"kafkashaded.org.apache.kafka.common.security.scram.ScramLoginModule"
    + f' required username="{DBUtils(spark).secrets.get(scope = "test", key = "username")}" password="{DBUtils(spark).secrets.get(scope = "test", key = "password")}";'
    ),
    "kafka.sasl.mechanism": "SCRAM-SHA-256",
    "kafka.security.protocol": "SASL_SSL",
    "kafka.bootstrap.servers": "broker1.aws.com:9094,broker2.aws.com:9094",
    "topic": "my_first_topic,my_second_topic",
    }
    )\
    .save()

    Example Pipelines

    Source Pipeline Example

    In this example we'll read JSON messages from Kafka, parse them, remove any null messagesand then finally persist it to a Delta table.

    Example usage of Filter

    Metadata Table

    In order to avoid reprocessing messages on subsequent Pipeline runs, we're going to update a certain table with the last processed offsets for each Kafka partition and topic. The next time the Pipeline runs this table will be used to only get a batch of messages that have arrived since the previously-processed offset.

    For this example, we're going to update metadata.kafka_offsets, which has the following structure:

    topicpartitionmax_offset
    my_first_topic010
    my_first_topic15
    my_second_topic010
    my_second_topic15

    Taking this approach gives us the following benefits:

    1. Build the Pipeline interactively without committing any offsets
    2. Production workflows will only consume messages that have arrived since the previously-processed offset
    3. We can replay old messages by modifying the Metadata table
    note

    For production workflows the Phase for the Script Gem that updates the offsets should be greater than the Phase of the Target Gem. This is to ensure that offsets are only updated in the table after data is safely persisted to the Target.

    Spark Code used for script component

    def UpdateOffsets(spark: SparkSession, in0: DataFrame):

    if not ("SColumnExpression" in locals()):
    from delta.tables import DeltaTable
    import pyspark.sql.functions as f
    metadataTable = "metadata.kafka_offsets"
    metaDataDf = in0.groupBy("partition", "topic").agg(f.max(f.col("`offset`").cast("int")).alias("max_offset"))

    if not spark.catalog._jcatalog.tableExists(metadataTable):
    metaDataDf.write.format("delta").mode("overwrite").saveAsTable(metadataTable)
    else:
    DeltaTable\
    .forName(spark, metadataTable)\
    .alias("target")\
    .merge(
    metaDataDf.alias("source"),
    (
    (col("source.`partition`") == col("target.`partition`"))
    & (col("source.`topic`") == col("target.`topic`"))
    )
    )\
    .whenMatchedUpdateAll()\
    .whenNotMatchedInsertAll()\
    .execute()

    - - + + \ No newline at end of file diff --git a/Spark/gems/source-target/file/orc/index.html b/Spark/gems/source-target/file/orc/index.html index 3a95bd44e4..cf561f9bdb 100644 --- a/Spark/gems/source-target/file/orc/index.html +++ b/Spark/gems/source-target/file/orc/index.html @@ -6,15 +6,14 @@ ORC | Prophecy - - - - + + +

    ORC

    ORC (Optimized Row Columnar) is a columnar file format designed for Spark/Hadoop workloads. It is optimized for large streaming reads, but with integrated support for finding required rows quickly. Because ORC files are type-aware, the writer chooses the most appropriate encoding for the type and builds an internal index as the file is written.

    This Gem allows you to read from or write to ORC files.

    Source

    Reads data from ORC files present at a path.

    Source Parameters

    ParameterDescriptionRequiredDefault
    LocationFile path where ORC files are presentTrueNone
    SchemaSchema to be applied on the loaded data. Can be defined/edited as JSON or inferred using Infer Schema button.TrueNone
    Recursive File LookupThis is used to recursively load files and it disables partition inferring. Its default value is false. If data source explicitly specifies the partitionSpec when recursiveFileLookup is true, an exception will be thrown.FalseFalse

    Example

    ORC source example

    Generated Code

    def read_orc(spark: SparkSession) -> DataFrame:
    return spark.read\
    .format("orc")\
    .load("dbfs:/FileStore/Users/orc/test.orc")


    Target

    Target Parameters

    Write data as ORC files at the specified path.

    ParameterDescriptionRequiredDefault
    LocationFile path where ORC files will be writtenTrueNone
    CompressionCompression codec to use when saving to file. This can be one of the known case-insensitive shorten names (none, uncompressed, snappy, gzip, lzo, brotli, lz4, and zstd). This will override orc.compress.Falsesnappy
    Write ModeWrite mode for DataFrameTrueerror
    Partition ColumnsList of columns to partition the ORC files byFalseNone

    Example

    ORC target example

    Generated Code

    def write_orc(spark: SparkSession, in0: DataFrame):
    in0.write\
    .format("orc")\
    .mode("overwrite")\
    .save("dbfs:/data/test_output.orc")
    info

    To know more about tweaking orc related properties in Spark config click here.

    - - + + \ No newline at end of file diff --git a/Spark/gems/source-target/file/parquet/index.html b/Spark/gems/source-target/file/parquet/index.html index fc0ef118cb..2fdb77e839 100644 --- a/Spark/gems/source-target/file/parquet/index.html +++ b/Spark/gems/source-target/file/parquet/index.html @@ -6,15 +6,14 @@ Parquet | Prophecy - - - - + + +

    Parquet

    Parquet is an open-source Columnar storage data format. It handles large volumes of data by supporting complex pushdown predicates, nested schemas and a wide variety of column encoding types.

    This Gem allows you to read from or write to Parquet files.

    Source

    Reads data from Parquet files at the given path.

    Source Parameters

    ParameterDescriptionRequiredDefault
    LocationFile path where parquet files are presentTrueNone
    SchemaSchema to be applied on the loaded data. Can be defined/edited as json or inferred using Infer Schema button.TrueNone
    Recursive File LookupThis is used to recursively load files from the given Location. Disables partition discovery. An exception will be thrown if this option and a partitionSpec are specified.FalseFalse
    Path Global FilterAn optional glob pattern to only include files with paths matching the pattern. The syntax follows GlobFilter. It does not change the behavior of partition discovery.FalseNone
    Modified BeforeAn optional Timestamp to only include files with modification times occurring before the specified Time. The provided timestamp must be in YYYY-MM-DDTHH:mm:ss form (e.g. 2020-06-01T13:00:00)FalseNone
    Modified AfterAn optional timestamp to only include files with modification times occurring after the specified Time. The provided timestamp must be in YYYY-MM-DDTHH:mm:ss form (e.g. 2020-06-01T13:00:00)FalseNone
    Merge SchemaSets whether schemas should be merged from all collected Parquet part-files. This will override spark.sql.parquet.mergeSchema.False(value of spark.sql.parquet.
    mergeSchema)
    Int96 Rebase modeThe int96RebaseMode option allows to specify the rebasing mode for INT96 timestamps from the Julian to Proleptic Gregorian calendar.

    Currently supported modes are:

    EXCEPTION: fails in reads of ancient INT96 timestamps that are ambiguous between the two calendars.

    CORRECTED: loads INT96 timestamps without rebasing.

    LEGACY: performs rebasing of ancient timestamps from the Julian to Proleptic Gregorian calendar.
    False(value of spark.sql.parquet
    .int96RebaseModeInRead)
    Datetime Rebase modeThe datetimeRebaseMode option allows to specify the rebasing mode for the values of the DATE, TIMESTAMP_MILLIS, TIMESTAMP_MICROS logical types from the Julian to Proleptic Gregorian calendar.
    Currently supported modes are:

    EXCEPTION: fails in reads of ancient dates/timestamps that are ambiguous between the two calendars.

    CORRECTED: loads dates/timestamps without rebasing.

    LEGACY: performs rebasing of ancient dates/timestamps from the Julian to Proleptic Gregorian calendar.
    False(value of spark.sql.parquet
    .datetimeRebaseModeInRead)

    Example

    Generated Code

    def read_parquet(spark: SparkSession) -> DataFrame:
    return spark.read\
    .format("parquet")\
    .option("mergeSchema", True)\
    .load("dbfs:/FileStore/Users/parquet/test.parquet")


    Target

    Target Parameters

    Write data as Parquet files at the specified path.

    ParameterDescriptionRequiredDefault
    LocationFile path where the Parquet files will be writtenTrueNone
    CompressionCompression codec to use when saving to file. This can be one of the known case-insensitive shorten names (none, uncompressed, snappy, gzip, lzo, brotli, lz4, and zstd). This will override spark.sql.parquet.compression.codec.False`snappy
    Write ModeHow to handle existing data. See this table for a list of available options.Trueerror
    Partition ColumnsList of columns to partition the Parquet files byFalseNone

    Supported Write Modes

    Write ModeDescription
    overwriteIf data already exists, overwrite with the contents of the Dataframe.
    appendIf data already exists, append the contents of the Dataframe.
    ignoreIf data already exists, do nothing with the contents of the Dataframe. This is similar to a CREATE TABLE IF NOT EXISTS in SQL.
    errorIf data already exists, throw an exception.

    Example

    Generated Code

    def write_parquet(spark: SparkSession, in0: DataFrame):
    in0.write\
    .format("parquet")\
    .mode("overwrite")\
    .save("dbfs:/data/test_output.parquet")
    info

    To know more about tweaking Parquet related properties in Spark config click here.

    - - + + \ No newline at end of file diff --git a/Spark/gems/source-target/file/text/index.html b/Spark/gems/source-target/file/text/index.html index 21eeb4fd02..3847f5da77 100644 --- a/Spark/gems/source-target/file/text/index.html +++ b/Spark/gems/source-target/file/text/index.html @@ -6,16 +6,15 @@ Text | Prophecy - - - - + + +

    Text

    Allows you to read or write plain Text files.

    Source

    Reads data from Text files at the given Location.

    Source Parameters

    ParameterDescriptionRequiredDefault
    LocationFile path where the Text files are locatedTrueNone
    SchemaSchema to be applied on the loaded data. Can be defined/edited as JSON or inferred using Infer Schema button.TrueNone
    Recursive File LookupThis is used to recursively load files from the given Location. Disables partition discovery. An exception will be thrown if this option and a partitionSpec are specified.FalseFalse
    Line SeparatorDefines the line separator that should be used for reading or writing.False\r, \r\n, \n
    Read as a single rowIf true, read each file from input path(s) as a single row.FalseFalse

    Example

    Generated Code

    def read_avro(spark: SparkSession) -> DataFrame:
    return spark.read\
    .format("text")\
    .text("dbfs:/FileStore/customers.txt", wholetext = False, lineSep = "\n")


    Target

    Target Parameters

    Write data as text files at the specified path.

    ParameterDescriptionRequiredDefault
    LocationFile path where text files will be written toTrueNone
    CompressionCompression codec to use when saving to file. This can be one of the known case-insensitive shorten names (none, bzip2, gzip, lz4, snappy and deflate).FalseNone
    Write ModeHow to handle existing data. See this table for a list of available options.Trueerror
    Partition ColumnsList of columns to partition the Text files byFalseNone
    Line SeparatorDefines the line separator that should be used for writingFalse\n
    info

    The Text data source supports only a single column apart from the partition columns. An AnalysisException will be thrown if the DataFrame has more than 1 column apart from parition columns as the input DataFrame to the Target Gem.

    Supported Write Modes

    Write ModeDescription
    overwriteIf data already exists, overwrite with the contents of the DataFrame.
    appendIf data already exists, append the contents of the DataFrame.
    ignoreIf data already exists, do nothing with the contents of the DataFrame. This is similar to a CREATE TABLE IF NOT EXISTS in SQL.
    errorIf data already exists, throw an exception.

    Example

    Generated Code

    def write_text(spark: SparkSession, in0: DataFrame):
    in0.write\
    .format("text")\
    .mode("overwrite")\
    .text("dbfs:/FileStore/customers.txt", compression = "gzip", lineSep = "\n")
    info

    To know more about tweaking Text file related properties in Spark config click here.

    - - + + \ No newline at end of file diff --git a/Spark/gems/source-target/file/xlsx/index.html b/Spark/gems/source-target/file/xlsx/index.html index 422b9033dc..d3ebf1b0cd 100644 --- a/Spark/gems/source-target/file/xlsx/index.html +++ b/Spark/gems/source-target/file/xlsx/index.html @@ -6,15 +6,14 @@ XLSX (Excel) | Prophecy - - - - + + +

    XLSX (Excel)

    If you've ever done anything with numbers in your line of work odds are you've worked with Excel at one point or another. Prophecy supports the format as both a data source and data target, so if you're migrating from a legacy system or you need to produce an Excel-compatible file for a report, we've got you covered.

    Prerequisites

    caution

    If you receive an error about the excel format not being available you must add spark-excel library as a dependency.

    Follow the instructions on this page to add the Maven coordinate com.crealytics:spark-excel_2.12:3.5.1_0.20.4 to your Pipeline.

    Parameters

    Source Parameters

    The following is a list of options that are available while using XLSX as a Source:

    ParameterDescriptionRequiredDefault
    Column Name of Corrupt RecordName of the column to create for corrupt recordsFalseNone
    Column Name of Row NumberName of the column to create using the original row numberFalseNone
    Data AddressData address to read (see here) for more informationTrueA1 (Everything)
    Date FormatDate format to useFalseSpark default
    Excerpt SizeExcept SizeFalse
    File ExtensionInput file extensionFalsexlsx
    HeaderFirst line in input is a headerTrueTrue
    Ignore After HeaderNumber of rows to ignore after headerFalse0
    Ignore Leading WhitespaceFalseFalse
    Ignore Trailing WhitespaceFalseFalse
    Infer SchemaInfer the schema of the input.

    Note: This setting is provided directly by the spark-excel library and is different than the Infer Schema button in the Prophecy UI. Both should provide the same results.
    FalseFalse
    Keep Undefined RowsIf true, keeps undefined Excel rowsFalseFalse
    LocaleA language tag in the IETF BCP 47 formatFalse"US"
    NaN ValueValue to use in the case of NaNFalse"NaN"
    Negative InfinityValue to use in the case of negative infinityFalse"Inf"
    Null ValueValue to use for NullFalse(empty)
    Parse ModeParsing mode. Supports Permissive, Drop Malformed and Fail Fast.FalsePermissive
    Positive InfinityValue to use in case of positive infinityFalse"Inf"
    Sampling RatioDefines how much of the input to sample from when inferring the schema.False1.0
    Timestamp FormatFormat to parse timestamps from text cellsFalseSpark default
    Use Null for Error CellsUse null value for error cellsFalseTrue
    Workbook PasswordPassword to secure workbookFalse(empty)
    Timezone IDTimezone ID for Dates/Timestamps taken from the IANA Time Zone Database.

    Note: See here for valid values.
    False(empty)

    Target Parameters

    The following is a list of options that are available while using XLSX as a Target:

    ParameterDescriptionRequiredDefault
    Data AddressData address to write output toFalseA1
    File ExtensionFile extension used when writingFalse"xlsx"
    HeaderWrite header to fileFalseTrue
    LocaleA language tag in the IETF BCP 47 formatFalse"US"
    Date FormatFormat to use for Date columnsFalseSpark default
    Use Plain Number FormatIf true, format the cells without rounding and scientific notationsFalseFalse
    Workbook PasswordPassword to secure workbookFalse(empty)
    Write ModeWrite mode, same as underlying Spark write modeFalse"append"
    Parition ColumnsColumns to partition output files byFalse(empty)

    Example output

    Below is a snippet of the optimized code that is generated when using the XLSX source.

    def Demo_XLSX_Source(spark: SparkSession) -> DataFrame:
    if Config.fabricName == "dev":
    return spark.read\
    .format("excel")\
    .option("header", True)\
    .option("dataAddress", "A1")\
    .option("inferSchema", True)\
    .load("dbfs:/FileStore/Users/scott/plain_number.xlsx")
    else:
    raise Exception("No valid dataset present to read fabric")
    - - + + \ No newline at end of file diff --git a/Spark/gems/source-target/index.html b/Spark/gems/source-target/index.html index 51f60a7976..25db98745c 100644 --- a/Spark/gems/source-target/index.html +++ b/Spark/gems/source-target/index.html @@ -6,15 +6,14 @@ Source & Target | Prophecy - - - - + + +

    Source & Target

    Constitutes the set of Gems that help with loading and saving data.

    File

    A collection of Gems related to working with various file-based formats.

    NameDescription
    AvroAvro format is a row-based storage format for Hadoop, which is widely used as a serialization platform.
    CSVAllows you to read or write a delimited file (often called Comma Separated File, CSV).
    DeltaReads data from Delta files present at a path and writes Delta files to a path based on configuration.
    Fixed FormatRead data from fixed format files with expected schema, or write data to fixed format files with expected schema.
    IcebergReads data from Iceberg files present at a path and writes Iceberg files to a path based on configuration.
    JSONAllows you to read or write a delimited file (often called Comma Separated File, CSV).
    KafkaThis source currently connects with Kafka Brokers in Batch mode.
    ORCORC (Optimized Row Columnar) is a columnar file format designed for Spark/Hadoop workloads.
    ParquetParquet is an open source file format built to handle flat columnar storage data formats.
    TextThis Gem allows you to read from or write to text file.
    XLSX (Excel)Allows you to read or write Excel-compatible files.

    Warehouse

    A collection of Gems specializing in connecting to warehouse-style data sources.

    NameDescription
    BigQueryAllows you to read or write data to the BigQuery warehouse, using a high-performance connector. Enterprise only.
    CosmosDBAllows you to read or write data to the CosmosDB database.
    DB2Allows you to read or write data to the DB2 warehouse, using a high-performance connector. Enterprise only.
    JDBCAllows you to read or write data to the JDBC database.
    MongoDBAllows you to read or write data to the MongoDB database.
    OracleAllows you to read or write data to the Oracle warehouse, using a high-performance connector. Enterprise only.
    RedshiftAllows you to read or write data to the Redshift warehouse, using a high-performance connector. Enterprise only.
    SalesforceAllows you to read or write data to the Salesforce warehouse.
    SnowflakeAllows you to read or write data to the Snowflake warehouse, using a high-performance connector. Enterprise only.
    TeradataAllows you to read or write data to the Teradata warehouse, using a high-performance connector. Enterprise only.

    Catalog

    A collection of Gems related to working with various table-based formats.

    NameDescription
    DeltaReads data from Delta tables saved in data catalog and writes data into Delta table in a managed Metastore.
    HiveRead from or write to Tables managed by a Hive metastore.

    Lookup

    Lookup is a special component that allows you to broadcast any data, to later be used anywhere in your Pipeline.

    Synthetic Data Generator

    If you don't have the data you need, try generating fake data. Using the Synthetic Data Generator Gem, you can specify columns with various datatypes and populate fields with randomly generated data. Specify the boundaries for each row, the percentage of rows which should have null values, etc. It's not real data but it's the next best thing!

    - - + + \ No newline at end of file diff --git a/Spark/gems/source-target/warehouse/bigquery/index.html b/Spark/gems/source-target/warehouse/bigquery/index.html index d9151cee09..cc70a0011d 100644 --- a/Spark/gems/source-target/warehouse/bigquery/index.html +++ b/Spark/gems/source-target/warehouse/bigquery/index.html @@ -6,17 +6,16 @@ BigQuery | Prophecy - - - - + + +

    BigQuery

    Built on

    This connector is built on top of the already available spark-bigquery-connector connector.
    For non-Databricks clusters, we need to install the corresponding library and please refer the library compatiblity matrix in the Spark BigQuery documentation.

    Allows read and write operations on BigQuery.

    Source

    Reads data from BigQuery tables.

    For establishing the connecting to BigQuery we have below three options:

    • None: Users are not required to set any credentails if the BigQuery configurations are set at cluster level.
    • JSON Credentials Filepath: BigQuery JSON key configuration can be passed to BigQuery.
    • Databricks secrets: If the JSON configuration is directly stored on Pipeline configs as Databricks secrets then refer the config variable as ${config_vairable}.
    How to get the JSON Credentials from BigQuery?
    Steps to get download BigQuery JSON Credentials
    • Goto https://console.cloud.google.com/apis/credentials
    • Click on "+ CREATE CREDENTIALS" button on top next and select "Service account"
    • Fill in the credentail creation form and will create the Service account after submit
    • Skip the above steps if Service account is already created and goto "KEYS" section of service account
    • Click on "ADD KEY" -> Create new Key -> Select "JSON" key type -> CREATE will download the json configuration file

    Source Parameters

    ParameterDescriptionRequired
    Parent Project NameGoogle Cloud Project ID of the table to bill for the exportTrue
    Table NameName of the tableTrue
    Credentials file pathPath to BigQuery credentials file. If the credentials are set Spark conf level then select none in credentials options.True
    Configuration variable nameChoose the Databricks secrets and specify the variable in ${<variable>} format if the secrets are defined in Pipeline configs.True
    Is secret Base64 encodedEnable if the configuration variable above is Base64 encoded.True
    Project Name(Optional) Google Cloud Project ID of the table. Defaults to the project of the Service Account being used.
    Dataset Name(Optional) Dataset containing the table. Required unless it's mentioned in the Table Name.
    Maximum partitions(Optional) Maximum number of partitions to split the data into. Actual number may be less if BigQuery deems the data small enough.
    Minimum partitions(Optional) Minimal number of partitions to split the data into. Actual number may be less if BigQuery deems the data small enough.
    Enables read views(Optional) Enables the connector to read from views and not only tables. Please read the relevant section before activating this option.
    MaterializedView projectID(Optional) Project id where the materialized view is going to be created
    MaterializedView dataset(Optional) Dataset where the materialized view is going to be created. This dataset should be in same location as the view or the queried tables.
    Materialized expiration time in min's(Optional) Expiration time of the temporary table holding the materialized data of a view or a query, in minutes. Notice that the connector may re-use the temporary table due to the use of local cache and in order to reduce BigQuery computation, so very low values may cause errors.
    Read dataformat(Optional) Data Format for reading from BigQuery. Options : ARROW, AVRO Unsupported Arrow filters are not pushed down and results are filtered later by Spark. (Currently Arrow does not support disjunction across columns.)
    Enable optimize-empty-projection(Optional) Connector uses an optimized empty projection (select without any columns) logic, used for count() execution
    Enable push-all-filters(Optional) Pushes all the filters Spark can delegate to BigQuery Storage API. This reduces amount of data that needs to be sent from BigQuery Storage API servers to Spark clients. Default: true
    Additional Job Labels(Optional) Can be used to add labels to the connector initiated query and load BigQuery Jobs. Multiple labels can be set.
    Traceability Application Name(Optional) Application name used to trace BigQuery Storage read and write sessions. Setting the application name is required to set the trace ID on the sessions.
    Traceability Job ID(Optional) Job ID used to trace BigQuery Storage read and write sessions.
    Proxy URL(Optional) Address of the proxy server. The proxy must be a HTTP proxy and address should be in the host:port format. Can be alternatively set in the Spark configuration (spark.conf.set(...)) or in Hadoop Configuration (fs.gs.proxy.address).
    Proxy username(Optional) UserName used to connect to the proxy. Can be alternatively set in the Spark configuration (spark.conf.set(...)) or in Hadoop Configuration (fs.gs.proxy.username).
    Proxy password(Optional) Password used to connect to the proxy. Can be alternatively set in the Spark configuration (spark.conf.set(...)) or in Hadoop Configuration (fs.gs.proxy.password).
    Maximum HTTP retries(Optional) Maximum number of retries for the low-level HTTP requests to BigQuery. Can be alternatively set in the Spark configuration (spark.conf.set("httpMaxRetry", ...)) or in Hadoop Configuration (fs.gs.http.max.retry). Default is 10.
    HTTP Connection timeout in MSec's(Optional) Timeout in milliseconds to establish a connection with BigQuery. Can be alternatively set in the Spark configuration (spark.conf.set("httpConnectTimeout", ...)) or in Hadoop Configuration (fs.gs.http.connect-timeout). Default is 60000.
    HTTP Read timeout in MSec's(Optional) Timeout in milliseconds to read data from an established connection. Can be alternatively set in the Spark configuration (spark.conf.set("httpReadTimeout", ...)) or in Hadoop Configuration (fs.gs.http.read-timeout). Default is 60000.
    Arrow Compression Codec(Optional) Compression codec while reading from a BigQuery table when using Arrow format. Options : ZSTD (Zstandard compression), LZ4_FRAME (https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md), COMPRESSION_UNSPECIFIED. Default is COMPRESSION_UNSPECIFIED.
    Cache expiration time in min's(Optional) Expiration time of the in-memory cache storing query information. To disable caching, set the value to 0. Default is 15 min's.
    Cache read session timeout in sec's(Optional) Timeout in seconds to create a ReadSession when reading a table. For Extremely large table this value should be increased. Default is 600 sec's.
    Conversation datetime zone ID(Optional) Time zone ID used to convert BigQuery's DATETIME into Spark's Timestamp, and vice versa. The full list can be seen by running java.time.ZoneId.getAvailableZoneIds() in Java/Scala, or sc._jvm.java.time.ZoneId.getAvailableZoneIds() in pyspark. Default is UTC.
    Job query priority(Optional) Priority levels set for the Job while reading data from BigQuery query. The permitted values are:BATCH - Query is queued and started as soon as idle resources are available, usually within a few minutes. If the query hasn't started within 3 hours, its priority is changed to INTERACTIVE.

    Example

    Below is an example of fetching all customer data from BigQuery using Prophecy IDE. We will be using BigQuery table to fetch the customer data.

    Generated Code

    def read_bigquery(spark: SparkSession) -> DataFrame:
    return spark.read\
    .format("bigquery")\
    .option("credentials", "dbfs:/bucket/prefix/file.json")\
    .option("table", "tablename")\
    .load()


    Target

    Create/update BigQuery Tables

    Target Parameters

    ParameterDescriptionRequired
    Parent Project NameGoogle Cloud Project ID of the table to bill for the exportTrue
    Table NameName of the tableTrue
    Credentials file pathPath to BigQuery credentials file. If the credentials are set at Spark conf level then select none in credentials options.True
    Configuration variable nameChoose the Databricks secrets and specify the variable in ${<variable>} format if the secrets are defined in Pipeline configs.True
    Is secret Base64 encodedEnable if the configuration variable above is Base64 encodedTrue
    Project Name(Optional) Google Cloud Project ID of the table. Defaults to the project of the Service Account being used.
    Dataset Name(Optional) Dataset containing the table. Required unless it's mentioned in the Table Name.
    Table labels(Optional) Can be used to add labels to the table while writing to a table. Multiple labels can be set.
    Disposition creation(Optional) Specifies whether the Job is allowed to create new tables. The permitted values are:CREATE_IF_NEEDED - Configures the Job to create the table if it does not exist, CREATE_NEVER - Configures the Job to fail if the table does not exist.
    Write Method(Optional) Controls the method in which the data is written to BigQuery. Available values are direct to use the BigQuery Storage Write API and indirect which writes the data first to GCS and then triggers a BigQuery load operation.
    Temporary GCS Bucket(Optional) GCS bucket that temporarily holds the data before it is loaded to BigQuery. Required unless set in the Spark configuration (spark.conf.set(...)). Not supported by the DIRECT write method.
    Persistent GCS Bucket(Optional) GCS bucket that holds the data before it is loaded to BigQuery. If informed, the data won't be deleted after write data into BigQuery. Not supported by the DIRECT write method.
    Persistent GCS Path(Optional) GCS path that holds the data before it is loaded to BigQuery. Used only with persistentGcsBucket. Not supported by the DIRECT write method.
    Intermediate dataformat(Optional) Format of the data before it is loaded to BigQuery, values can be either "parquet","orc" or "avro". In order to use the Avro format, the spark-avro package must be added in runtime. Default is parquet.
    Date partition(Optional) date partition the data is going to be written to. Should be a date string given in the format YYYYMMDD. Can be used to overwrite the data of a single partition, like this: df.write.format("bigquery").option("datePartition", "20220331").mode("overwrite").save("table"). Can also be used with different partition types like: HOUR: YYYYMMDDHH, MONTH: YYYYMM, YEAR: YYYY
    Partition field(Optional) field is specified together with partitionType, the table is partitioned by this field. The field must be a top-level TIMESTAMP or DATE field. Its mode must be NULLABLE or REQUIRED. If the option is not set for a partitioned table, then the table will be partitioned by pseudo column, referenced via either'_PARTITIONTIME' as TIMESTAMP type, or '_PARTITIONDATE' as DATE type. Not supported by the DIRECT write method.
    Partition expiration MSec's(Optional) Number of milliseconds for which to keep the storage for partitions in the table. The storage in a partition will have an expiration time of its partition time plus this value. Not supported by the DIRECT write method.
    Partition type of the field(Optional) Supported types are: HOUR, DAY, MONTH, YEAR. This option is mandatory for a target table to be partitioned. Default is DAY. Not supported by the DIRECT write method.
    Cluster field(Optional) A string of non-repeated, top level columns.
    Enable allow-field-addition(Optional) Adds the ALLOW_FIELD_ADDITION SchemaUpdateOption to the BigQuery LoadJob. Allowed values are true and false. Default is false.
    Enable allow-field-relaxation(Optional) Adds the ALLOW_FIELD_RELAXATION SchemaUpdateOption to the BigQuery LoadJob. Allowed values are true and false. Default is false.
    Proxy URL(Optional) Address of the proxy server. The proxy must be a HTTP proxy and address should be in the host:port format. Can be alternatively set in the Spark configuration (spark.conf.set(...)) or in Hadoop Configuration (fs.gs.proxy.address).
    Proxy username(Optional) UserName used to connect to the proxy. Can be alternatively set in the Spark configuration (spark.conf.set(...)) or in Hadoop Configuration (fs.gs.proxy.username).
    Proxy password(Optional) Password used to connect to the proxy. Can be alternatively set in the Spark configuration (spark.conf.set(...)) or in Hadoop Configuration (fs.gs.proxy.password).
    Maximum HTTP retries(Optional) Maximum number of retries for the low-level HTTP requests to BigQuery. Can be alternatively set in the Spark configuration (spark.conf.set("httpMaxRetry", ...)) or in Hadoop Configuration (fs.gs.http.max.retry). Default is 10.
    HTTP Connection timeout in MSec's(Optional) Timeout in milliseconds to establish a connection with BigQuery. Can be alternatively set in the Spark configuration (spark.conf.set("httpConnectTimeout", ...)) or in Hadoop Configuration (fs.gs.http.connect-timeout). Default is 60000.
    Enable mode-check-for-schema-fields(Optional) Checks the mode of every field in destination schema to be equal to the mode in corresponding source field schema, during DIRECT write. Default is true.
    Enable list-interface(Optional) Indicates whether to use schema inference specifically when the mode is Parquet (https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#parquetoptions). Default is true.
    Conversation datetime zone ID(Optional) Time zone ID used to convert BigQuery's DATETIME into Spark's Timestamp, and vice versa. The full list can be seen by running java.time.ZoneId.getAvailableZoneIds() in Java/Scala, or sc._jvm.java.time.ZoneId.getAvailableZoneIds() in pyspark. Default is UTC.
    Job query priority(Optional) Option will be effective when DIRECT write is used with OVERWRITE mode, where the connector overwrites the destination table using MERGE statement.

    Generated Code

    Direct write using the BigQuery Storage Write API

    def write_bigquery(spark: SparkSession, in0: DataFrame):
    in0.write \
    .format("bigquery") \
    .option("writeMethod", "direct") \
    .save("dataset.table")

    Indirect write using the BigQuery Storage Write API

    def write_bigquery(spark: SparkSession, in0: DataFrame):
    in0.write \
    .format("bigquery") \
    .option("temporaryGcsBucket","some-bucket") \
    .save("dataset.table")
    - - + + \ No newline at end of file diff --git a/Spark/gems/source-target/warehouse/cosmos/index.html b/Spark/gems/source-target/warehouse/cosmos/index.html index 234254ef88..3c33874f54 100644 --- a/Spark/gems/source-target/warehouse/cosmos/index.html +++ b/Spark/gems/source-target/warehouse/cosmos/index.html @@ -6,16 +6,15 @@ CosmosDB | Prophecy - - - - + + +

    CosmosDB

    The azure-cosmos-spark connector is an integration between Azure CosmosDB and Apache Spark, enabling you to read and write data from and to CosmosDB using Spark. This document will cover the key properties available for configuration when using this connector.

    Installation

    On your Execution Cluster (Databricks or OnPrem), connect a dependency that has the following Maven coordinates (for Spark 3.3; For others please check this link):

    com.azure.cosmos.spark:azure-cosmos-spark_3-3_2-12:4.18.1

    Usage

    Common Auth Properties

    These properties are shared between the read and write operations.

    PropertyRequiredDefault ValueDescription
    Authentication TypeTrueMasterKeyThere are two auth types are supported currently: MasterKey(PrimaryReadWriteKeys, SecondReadWriteKeys, PrimaryReadOnlyKeys, SecondReadWriteKeys), ServicePrinciple.
    Azure EnvironmentTrueAzureThe azure environment of the CosmosDB account: Azure, AzureChina, AzureUsGovernment, AzureGermany.
    Account EndpointTrueNoneThe CosmosDB account URI.
    Account KeyTrueNoneThe CosmosDB account key.
    Subscription IDFalseNoneThe subscriptionId of the CosmosDB account. Required for ServicePrinciple authentication.
    TenantFalseNoneThe tenantId of the CosmosDB account. Required for ServicePrinciple authentication.
    Resource Group NameFalseNoneThe resource group of the CosmosDB account. Required for ServicePrinciple authentication.
    Client IdFalseNoneThe clientId/ApplicationId of the service principle. Required for ServicePrinciple authentication.
    Client SecretFalseNoneThe client secret/password of the service principle. Required for ServicePrinciple authentication.
    DatabaseTrueNoneThe CosmosDB database name.

    Source Properties

    Important Source Properties

    These properties are specific to read operations.

    PropertyRequiredDefault ValueDescription
    Data SourceTrueDB TableWhether all data in container should be loaded or should data be returned using a custom query
    ContainerFalseNoneThe CosmosDB container name.
    Custom QueryFalseSELECT 1When provided the custom query will be processed against the CosmosDB endpoint instead of dynamically generating the query via predicate push down. Usually it is recommended to rely on Spark's predicate push down because that will allow to generate the most efficient set of filters based on the query plan. But there are a couple of predicates like aggregates (count, group by, avg, sum, etc.) that cannot be pushed down yet (at least in Spark 3.1) - so the custom query is a fallback to allow them to be pushed into the query sent to Cosmos. If specified, with schema inference enabled, the custom query will also be used to infer the schema.

    Other Source Properties

    PropertyRequiredDefault ValueDescription
    Use Gateway ModeFalsefalseUse gateway mode for the client operations.
    Force Eventual ConsistencyFalsetrueMakes the client use Eventual consistency for read operations instead of using the default account level consistency.
    Application nameFalseNoneApplication name
    Preferred Regions ListFalseNonePreferred regions list to be used for a multi region CosmosDB account. This is a comma separated value (e.g., [East US, West US] or East US, West US) provided preferred regions will be used as hint. You should use a collocated Spark cluster with your CosmosDB account and pass the Spark cluster region as preferred region. See list of azure regions here. Note that you can also use spark.cosmos.preferredRegions as alias.
    Disable Tcp Connection Endpoint RediscoveryFalsefalseCan be used to disable TCP connection endpoint rediscovery. TCP connection endpoint rediscovery should only be disabled when using custom domain names with private endpoints when using a custom Spark environment. When using Azure Databricks or Azure Synapse as Spark runtime it should never be required to disable endpoint rediscovery.
    Allow Invalid JSON With Duplicate JSON PropertiesFalsefalseBy default (when set to false) the CosmosDB Java SDK and Spark connector will raise a hard failure when JSON documents are read that contain JSON object with multiple properties of the same name. This config option can be used to override the behavior and silently ignore the invalid JSON and instead use the last occurrence of the property when parsing the JSON. NOTE: This is only meant to be used as a temporary workaround. We strongly recommend fixing the invalid JSON from even being ingested into the data and only use this workaround while cleaning up the documents with invalid JSON.
    Max Item CountFalse1000Overrides the maximum number of documents that can be returned for a single query or change feed request. The default value is 1000. Consider increasing this only for average document sizes significantly smaller than 1KB or when projection reduces the number of properties selected in queries significantly (like when only selecting "id" of documents, etc.).
    Max Integrated Staleness in millisecondsFalseNoneSets the max staleness window in milliseconds for the point read or query request results in the integrated cache when using the dedicated gateway. Learn more about max integrated cache staleness here.
    Inclusion ModeFalseAlwaysDetermines whether null/default values will be serialized to JSON or whether properties with null/default value will be skipped. The behavior follows the same ideas as Jackson's JsonInclude.Include. Always means JSON properties are created even for null and default values. NonNull means no JSON properties will be created for explicit null values. NonEmpty means JSON properties will not be created for empty string values or empty arrays/mpas. NonDefault means JSON properties will be skipped not just for null/empty but also when the value is identical to the default value 0 for numeric properties for example.
    DateTime Conversion ModeFalseDefaultThe date/time conversion mode options are Default, AlwaysEpochMilliseconds, AlwaysEpochMillisecondsWithSystemDefaultTimezone. With Default, the standard Spark 3.* behavior is used: java.sql.Date/java.time.LocalDate are converted to EpochDay, java.sql.Timestamp/java.time.Instant are converted to MicrosecondsFromEpoch. With AlwaysEpochMilliseconds, the same behavior as the CosmosDB connector for Spark 2.4 is used: java.sql.Date, java.time.LocalDate, java.sql.Timestamp and java.time.Instant are converted to MillisecondsFromEpoch. The behavior for AlwaysEpochMillisecondsWithSystemDefaultTimezone is identical with AlwaysEpochMilliseconds except that the connector will assume System default time zone / Spark session time zone (specified via spark.sql.session.timezone) instead of UTC when the date/time to be parsed has no explicit time zone.
    Schema Conversion ModeFalseRelaxedThe schema conversion behavior (Relaxed, Strict). When reading JSON documents, if a document contains an attribute that does not map to the schema type, the user can decide whether to use a null value (Relaxed) or an exception (Strict).
    Partitioning StrategyFalseDefaultThe partitioning strategy used (Default, Custom, Restrictive or Aggressive)
    Partitioning Targeted CountFalseNoneAn Integer value representing the targeted Partition Count. This parameter is optional and ignored unless strategy is Custom. In this case the Spark Connector won't dynamically calculate the number of partitions but stick with this value.
    Partitioning Feed Range FilterFalseNoneCan be used to scope the query to a single logical CosmosDB partition (or a subset of logical partitions). If this parameter is optionally provided, the partitioning strategy will be modified - only partitions for the scoped logical partitions will be created. So, the main benefit of this config option is to reduce the necessary SparkTasks/Partitions.
    PropertyRequiredDefault ValueDescription
    Enable Infer SchemaTruetrueWhether all data in the container should be loaded or should data be returned using a custom query
    Enable Custom Query for Inferring SchemaFalseNoneCheckbox, which if marked, will open up an Editor for writing Custom SQL Query
    Make all Columns NullableFalsetrueWhen schema inference is enabled, whether the resulting schema will make all columns nullable. By default, all columns (except cosmos system properties) will be treated as nullable even if all rows within the sample set have non-null values. When disabled, the inferred columns are treated as nullable or not depending on whether any record in the sample set has null-values within a column.
    Record Sample Size for Schema InferenceFalse1000Sampling size to use when inferring schema and not using a query.
    Include all System PropertiesFalsefalseWhen schema inference is enabled, whether the resulting schema will include all CosmosDB system properties.
    Include Document Timestamp fieldFalsefalseWhen schema inference is enabled, whether the resulting schema will include the document Timestamp (_ts). Not required if spark.cosmos.read.inferSchema.includeSystemProperties is enabled, as it will already include all system properties.
    Infer Schema QueryFalseSELECT * FROM rWhen schema inference is enabled, used as custom query to infer it. For example, if you store multiple entities with different schemas within a container and you want to ensure inference only looks at certain document types or you want to project only particular columns.

    Throughput Control Configs

    Having throughput control helps to isolate the performance needs of applications running against a container, by limiting the amount of request units that can be consumed by a given Spark client. There are several advanced scenarios that benefit from client-side throughput control:

    • Different operations and tasks have different priorities - there can be a need to prevent normal transactions from being throttled due to data ingestion or copy activities. Some operations and/or tasks aren't sensitive to latency, and are more tolerant to being throttled than others.
    • Provide fairness/isolation to different end users/tenants - An application will usually have many end users. Some users may send too many requests, which consume all available throughput, causing others to get throttled.
    • Load balancing of throughput between different Azure Cosmos DB clients - in some use cases, it's important to make sure all the clients get a fair (equal) share of the throughput.
    PropertyRequiredDefault ValueDescription
    Throughput Control EnabledFalsefalseWhether throughput control is enabled
    Throughput Control: Account EndpointFalseNoneCosmosDB Account Endpoint Uri for throughput control. If not defined, then spark.cosmos.accountEndpoint will be used.
    Throughput Control Account KeyFalseNoneCosmosDB Account Key for throughput control.
    Throughput Control Preferred Regions ListFalseNonePreferred regions list to be used for a multi region CosmosDB account. This is a comma separated value (e.g., [East US, West US] or East US, West US) provided preferred regions will be used as hint. You should use a collocated Spark cluster with your CosmosDB account and pass the Spark cluster region as preferred region. See list of azure regions here.
    Disable TCP connection endpoint RediscoveryFalsefalseCan be used to disable TCP connection endpoint rediscovery. TCP connection endpoint rediscovery should only be disabled when using custom domain names with private endpoints when using a custom Spark environment. When using Azure Databricks or Azure Synapse as Spark runtime it should never be required to disable endpoint rediscovery.
    Use Gateway ModeFalsefalseUse gateway mode for the client operations
    Use Dedicated ContainerFalsetrueFlag to indicate when configured with throughput control, whether dedicated throughput control container will be provided.
    Throughput control group nameFalseNoneThroughput control group name
    Throughput control group target throughputFalseNoneThroughput control group target throughput in number of request units
    Throughput control group target throughput thresholdFalseNoneThroughput control group target throughput threshold in request units
    Database which will be used for throughput global controlFalseNoneDatabase which will be used for throughput global control
    Container which will be used for throughput global controlFalseNoneContainer which will be used for throughput global control
    Renew Interval in millisecondsFalse5sHow often the client is going to update the throughput usage of itself
    Expire Interval in millisecondsFalse11sHow quickly an offline client will be detected

    More information about Throughput control is available here.

    Target Properties

    These properties are specific to write operations.

    Write Configurations

    PropertyRequiredDefault ValueDescription
    Write ModeTrueappendSpark Write Mode. The possible values are error, append, overwrite and ignore . More information on Write Modes is available here.
    Date FormatFalseyyyy-MM-ddFormat for Date columns. More information on the possible formats is given here.
    Timestamp FormatFalseyyyy-MM-dd'T'HH:mm:ss.SSSXXXFormat for Timestamp columns. More information on the possible formats is given here.
    Write StrategyTrueItemOverwriteCosmosDB Item Write Strategy: ItemOverwrite (using upsert); ItemOverwriteIfNotModified (if etag property of the row is empty/null it will just do an insert and ignore if the document already exists - same as ItemAppend, if an etag value exists the connector will attempt to replace the document with etag pre-condition. If the document changed - identified by precondition failure - the update is skipped and the document is not updated with the content of the data frame row), ItemAppend (using create, ignore pre-existing items i.e., Conflicts), ItemDelete (delete all documents), ItemDeleteIfNotModified (delete all documents for which the etag hasn't changed),ItemPatch (Partial update all documents based on the patch config).
    Max Retry AttemptsFalse10CosmosDB Write Max Retry Attempts on retriable failures (e.g., connection error, write error, etc.)
    Max ConcurrencyFalseNoneCosmosDB Item Write Max concurrency. If not specified it will be determined based on the Spark executor VM Size.
    Max No. of Pending Bulk OperationsFalseNoneCosmosDB Item Write bulk mode maximum pending operations. Defines a limit of bulk operations being processed concurrently. If not specified it will be determined based on the Spark executor VM Size. If the volume of data is large for the provisioned throughput on the destination container, this setting can be adjusted by following the estimation of 1000 x Cores.
    Enable Write BulkFalsetrueCosmosDB Item Write bulk enabled

    Patch Specific Configurations

    These Configurations are valid only when Write Strategy is given as ItemPatch

    PropertyRequiredDefault ValueDescription
    Default Patch Operation TypeTrueReplaceDefault CosmosDB patch operation type. Supported types include none, add, set, replace, remove, increment. Choose none for no-op, for others please reference here for full context.
    Patch Column ConfigsFalseNoneCosmosDB patch column configs. It can contain multiple definitions that match the following patterns (separated by comma) => col(column).op(operationType) or col(column).path(patchInCosmosdb).op(operationType) - The difference of the second pattern is that it also allows you to define a different CosmosDB path.
    Patch FilterFalseNoneUsed for Conditional patch
    - - + + \ No newline at end of file diff --git a/Spark/gems/source-target/warehouse/db2/index.html b/Spark/gems/source-target/warehouse/db2/index.html index a470197324..2fa78f7077 100644 --- a/Spark/gems/source-target/warehouse/db2/index.html +++ b/Spark/gems/source-target/warehouse/db2/index.html @@ -6,15 +6,14 @@ DB2 | Prophecy - - - - + + + - - + + \ No newline at end of file diff --git a/Spark/gems/source-target/warehouse/index.html b/Spark/gems/source-target/warehouse/index.html index 2ca80a914e..fb30afc6a9 100644 --- a/Spark/gems/source-target/warehouse/index.html +++ b/Spark/gems/source-target/warehouse/index.html @@ -6,15 +6,14 @@ Warehouse | Prophecy - - - - + + + - - + + \ No newline at end of file diff --git a/Spark/gems/source-target/warehouse/jdbc/index.html b/Spark/gems/source-target/warehouse/jdbc/index.html index 6ac39aa1c6..1f9e7ae819 100644 --- a/Spark/gems/source-target/warehouse/jdbc/index.html +++ b/Spark/gems/source-target/warehouse/jdbc/index.html @@ -6,16 +6,15 @@ JDBC | Prophecy - - - - + + +

    JDBC

    Reads and writes data from other databases using JDBC.

    Source

    Source Parameters

    ParameterDescriptionRequiredDefault
    Credential TypeCredential Type provider (Databricks Secrets or Username/Password or Environment variables for Username and Password). These can be set in the config options/via Databricks secrets or Environment variables for security purpose, so that it's not visible in code.True(none)
    Credential ScopeScope to use for Databricks secretsFalse(none)
    urlThe JDBC URL in the form jdbc:subprotocol:subname to connect to. The source-specific connection properties may be specified in the URL. e.g.,
    jdbc:postgresql://test.us-east-1.rds.amazonaws.com:5432/postgres,
    jdbc:mysql://database-mysql.test.us-east-1.rds.amazonaws.com:3306/mysql.
    True(none)
    dbtableThe JDBC table that should be read from. See here for more details.False(none)
    queryThe query that will be used to read data into Spark. See here for more details.False(none)
    driverThe class name of the JDBC driver to use to connect to this URL. e.g.,
    For postgres : org.postgresql.Driver
    For mysql: com.mysql.cj.jdbc.Driver
    True(none)
    Partition Column, Lower Bound, Upper BoundThese options must all be specified if any of them is specified. In addition, numPartitions must be specified. They describe how to partition the table when reading in parallel from multiple workers. partitionColumn must be a numeric, date, or timestamp column from the table in question. Notice that lowerBound and upperBound are just used to decide the partition stride, not for filtering the rows in table. So all rows in the table will be partitioned and returned. This option applies only to reading.
    Note: Dropdown to choose column in partition column will only be populated once schema is inferred.
    False(none)
    Number of partitionsThe maximum number of partitions that can be used for parallelism in table reading. This also determines the maximum number of concurrent JDBC connections.False(none)
    Query TimeoutThe number of seconds the driver will wait for a Statement object to execute. Zero means there is no limit. As a Target, this option depends on how JDBC drivers implement the API setQueryTimeout, e.g., the h2 JDBC driver checks the timeout of each query instead of an entire JDBC batch.False0
    Fetch sizeThe JDBC fetch size, which determines how many rows to fetch per round trip. This can help performance on JDBC drivers which default to low fetch size (e.g. Oracle with 10 rows).False0
    Session Init StatementAfter each database session is opened to the remote DB and before starting to read data, this parameter executes a custom SQL statement (or a PL/SQL block). Use this to implement session initialization code. Example: option("sessionInitStatement", """BEGIN execute immediate 'alter session set "_serial_direct_read"=true'; END;""").False(none)
    Push-Down PredicateEnable or disable predicate push-down into the JDBC data source. The default value is true, in which case Spark will push down filters to the JDBC data source as much as possible. Otherwise, if set to false, no filter will be pushed down to the JDBC data source and thus all filters will be handled by Spark. Predicate push-down is usually turned off when the predicate filtering is performed faster by Spark than by the JDBC data source.FalseTrue
    Push-Down AggregateEnable or disable aggregate push-down in V2 JDBC data source. The default value is false, in which case Spark will not push down aggregates to the JDBC data source. Aggregate push-down is usually turned off when the aggregate is performed faster by Spark than by the JDBC data source. Please note that aggregates can be pushed down if and only if all the aggregate functions and the related filters can be pushed down. Spark assumes that the data source can't fully complete the aggregate and does a final aggregate over the data source output.FalseFalse
    caution

    If you get class not found error during running of pipeline then your dependency might be missing in the cluster. To read more about how to add dependencies for specific jdbc jar click here.

    Source Table

    The dbtable parameter dictates which Table will be used as the source to read from. Anything that is valid in a FROM clause of a SQL query can also be used. For example, instead of a table name you could use a subquery in parentheses.

    danger

    The dbtable parameter and the query parameter are mutually exclusive, they cannot both be specified at the same time.

    Source Query

    The specified query will be used as a subquery in the FROM clause. Spark will also assign an alias to the subquery clause. For example, Spark will issue a query of the following form to the JDBC Source. SELECT columns FROM (<user_specified_query>) spark_gen_alias.

    There are a couple of restrictions while using this option:

    1. query and partitionColumn options cannot be used at the same time.
    2. When specifying the partitionColumn option is required, the subquery can be specified using dbtable option instead and partition columns can be qualified using the subquery alias provided as part of dbtable.

    Example

    Generated Code

    def Source(spark: SparkSession) -> DataFrame:
    return spark.read\
    .format("jdbc")\
    .option("url", f"{Config.jdbc_url}")\
    .option("user", f"{Config.jdbc_username}")\
    .option("password", f"{Config.jdbc_password}")\
    .option("dbtable", "public.demo_customers_raw")\
    .option("pushDownPredicate", True)\
    .option("driver", "org.postgresql.Driver")\
    .load()

    Target

    Target Parameters

    ParameterDescriptionRequiredDefault
    Credential TypeCredential Type provider (Databricks Secrets or Username/Password or Environment variables for Username and Password). These can be set in the config options/via Databricks secrets or Environment variables for security purpose, so that it's not visible in code.True(none)
    Credential ScopeScope to use for Databricks secretsFalse(none)
    urlThe JDBC URL in the form jdbc:subprotocol:subname to connect to. The source-specific connection properties may be specified in the URL. e.g.,
    jdbc:postgresql://test.us-east-1.rds.amazonaws.com:5432/postgres,
    jdbc:mysql://database-mysql.test.us-east-1.rds.amazonaws.com:3306/mysql.
    True(none)
    tableThe JDBC table that should be written into.True(none)
    driverThe class name of the JDBC driver to use to connect to this URL. e.g.,
    For Postgres : org.postgresql.Driver
    For MySQL: com.mysql.cj.jdbc.Driver
    True(none)
    Number of PartitionsThe maximum number of partitions that can be used for parallelism in table writing. This also determines the maximum number of concurrent JDBC connections. If the number of partitions to write exceeds this limit, we decrease it to this limit by calling coalesce(numPartitions) before writing.False(none)
    Query TimeoutThe number of seconds the driver will wait for a Statement object to execute. Zero means there is no limit. This option depends on how JDBC drivers implement the API setQueryTimeout, e.g., the h2 JDBC driver checks the timeout of each query instead of an entire JDBC batch.False0
    Batch SizeThe JDBC batch size, which determines how many rows to insert per round trip. This can help performance on JDBC drivers. This option applies only to writing.False1000
    Isolation LevelThe transaction isolation level, which applies to current connection. It can be one of NONE, READ_COMMITTED, READ_UNCOMMITTED, REPEATABLE_READ, or SERIALIZABLE, corresponding to standard transaction isolation levels defined by JDBC's Connection object. Please refer to the documentation for more information.FalseREAD_UNCOMMITTED
    TruncateWhen SaveMode is set to Overwrite, this option causes Spark to truncate an existing table instead of dropping and recreating it. This can be more efficient, and prevents the table metadata (e.g., indices) from being removed. However, it will not work in some cases, such as when the new data has a different schema. In case of failures, users should turn off truncate option to use DROP TABLE again. Also, due to the different behavior of TRUNCATE TABLE among DBMS, it's not always safe to use this. MySQLDialect, DB2Dialect, MsSqlServerDialect, DerbyDialect, and OracleDialect supports this while PostgresDialect and the default JDBCDialect doesn't. For unknown and unsupported JDBCDialect, the user option truncate is ignored.FalseFalse
    Cascade TruncateIf enabled and supported by the JDBC database (PostgreSQL and Oracle at the moment), this options allows execution of a TRUNCATE TABLE t CASCADE (in the case of PostgreSQL a TRUNCATE TABLE ONLY t CASCADE is executed to prevent inadvertently truncating descendant tables). This will affect other tables, and thus should be used with care.FalseDefault according to the JDBCDialect in use
    Create Table OptionsIf specified, this option allows setting of database-specific table and partition options when creating a table (e.g., CREATE TABLE t (name string) ENGINE=InnoDB.).False
    Create Table Column TypesThe database column data types to use instead of the defaults when creating the table. Data type information should be specified in the same format as CREATE TABLE columns syntax (e.g: "name CHAR(64), comments VARCHAR(1024)"). The specified types should be valid Spark SQL data types.False(none)

    Supported Write Modes

    Write ModeDescription
    overwriteIf data already exists, overwrite with the contents of the DataFrame.
    appendIf data already exists, append the contents of the DataFrame.
    ignoreIf data already exists, do nothing with the contents of the DataFrame. This is similar to a CREATE TABLE IF NOT EXISTS in SQL.
    errorIf data already exists, throw an exception.

    Target Example

    Generated Code

    def Target(spark: SparkSession, in0: DataFrame):
    in0.write\
    .format("jdbc")\
    .option("url", f"{Config.jdbc_url}")\
    .option("dbtable", "public.demo_customers_raw_output")\
    .option("user", f"{Config.jdbc_username}")\
    .option("password", f"{Config.jdbc_password}")\
    .option("driver", "org.postgresql.Driver")\
    .save()

    - - + + \ No newline at end of file diff --git a/Spark/gems/source-target/warehouse/mongodb/index.html b/Spark/gems/source-target/warehouse/mongodb/index.html index 133357af74..9d4255ed38 100644 --- a/Spark/gems/source-target/warehouse/mongodb/index.html +++ b/Spark/gems/source-target/warehouse/mongodb/index.html @@ -6,10 +6,9 @@ MongoDB | Prophecy - - - - + + +
    @@ -18,7 +17,7 @@ We will be reading Airbnb public listingReviews dataset using in-built MongoDB Source Gem.
    After configuration you can view schema by clicking Infer Schema in properties tab and also view data by clicking Load inside Preview tab.

    Generated Code

    object input_mongodb {

    def apply(context: Context): DataFrame = {
    context.spark.read
    .format("mongodb")
    .option(
    "connection.uri",
    f"${"mongodb+srv"}://${"ashish_mongotrial"}:${"password"}@${"cluster0.zveltwx.mongodb.net/?retryWrites=true&w=majority"}".trim
    )
    .option("database", "test_input")
    .option("collection", "listAndReviews")
    .load()
    }
    }

    Target

    Official documentation

    Target Parameters

    ParameterDescriptionRequired
    UsernameUsername for MongoDB instanceTrue
    PasswordPassword for MongoDB instanceTrue
    DriverDriver string for mongodb connection, e.g. mongodb or mongodb+srvTrue
    Cluster IP Address and OptionsCluster IP and options(if required) for the MongoDB connection,
    e.g. cluster0.prophecy.mongodb.xyz/?retryWrites=true&w=majority
    True
    DatabaseDatabase to which we want to write the data.True
    CollectionCollection to which we want to write the data.True
    mongoClientFactoryMongoClientFactory configuration key.
    You can specify a custom implementation which must implement the com.mongodb.spark.sql.connector.connection.MongoClientFactory interface.
    Default: com.mongodb.spark.sql.connector.connection.DefaultMongoClientFactory
    False
    maxBatchSizeSpecifies the maximum number of operations to batch in bulk operations.
    Default: 512
    False
    orderedSpecifies whether to perform ordered bulk operations.
    Default: true
    False
    operationTypeSpecifies the type of write operation to perform. You can set this to one of the following values: insert, replace or update
    Default: replace
    False
    idFieldListField or list of fields by which to split the collection data. To specify more than one field, separate them using a comma as shown in the following example:"fieldName1,fieldName2"
    Default: _id
    False
    writeConcern.wSpecifies w, a write concern option to acknowledge the level to which the change propagated in the MongoDB replica set. You can specify one of the following values: MAJORITY, W1, W2, W3, ACKNOWLEDGED or UNACKNOWLEDGED
    Default: _ACKNOWLEDGED
    False
    writeConcern.journalSpecifies j, a write concern option to enable request for acknowledgment that the data is confirmed on on-disk journal for the criteria specified in the w option.
    You can specify either true or false.
    False
    writeConcern.wTimeoutMSSpecifies wTimeoutMS, a write concern option to return an error when a write operation exceeds the number of milliseconds. If you use this optional setting, you must specify a non-negative integer.False

    Supported Write Modes

    Write ModeDescription
    overwriteIf data already exists, overwrite the contents of the Collection with data.
    appendIf data already exists, append the data on to the contents of the Collection.

    Example

    Below is an example of configuring MongoDB Target using Prophecy IDE. We will be writing back Airbnb public listingReviews data into a collection in MongoDB using our in-built Target Gem.

    Generated Code

    object output_mongodb {
    def apply(context: Context, df: DataFrame): Unit = {
    df.write
    .format("mongodb")
    .mode("overwrite")
    .option(
    "connection.uri",
    f"${"mongodb+srv"}://${"ashish_mongotrial"}:${"password"}@${"cluster0.zveltwx.mongodb.net/?retryWrites=true&w=majority"}".trim
    )
    .option("database", "test")
    .option("collection", "test_output")
    .option("ordered", "true")
    .option("operationType", "replace")
    .save()
    }
    }
    - - + + \ No newline at end of file diff --git a/Spark/gems/source-target/warehouse/oracle/index.html b/Spark/gems/source-target/warehouse/oracle/index.html index b4b44c03de..962c2da73a 100644 --- a/Spark/gems/source-target/warehouse/oracle/index.html +++ b/Spark/gems/source-target/warehouse/oracle/index.html @@ -6,15 +6,14 @@ Oracle | Prophecy - - - - + + + - - + + \ No newline at end of file diff --git a/Spark/gems/source-target/warehouse/redshift/index.html b/Spark/gems/source-target/warehouse/redshift/index.html index af6722b9b3..e3b0832741 100644 --- a/Spark/gems/source-target/warehouse/redshift/index.html +++ b/Spark/gems/source-target/warehouse/redshift/index.html @@ -6,15 +6,14 @@ Redshift | Prophecy - - - - + + + - - + + \ No newline at end of file diff --git a/Spark/gems/source-target/warehouse/salesforce/index.html b/Spark/gems/source-target/warehouse/salesforce/index.html index ba38d7e978..ee6c1d136b 100644 --- a/Spark/gems/source-target/warehouse/salesforce/index.html +++ b/Spark/gems/source-target/warehouse/salesforce/index.html @@ -6,17 +6,16 @@ Salesforce | Prophecy - - - - + + +

    Salesforce

    This Gem has below features:

    1. Dataset Creation - Create Dataset in Salesforce Wave from Spark DataFrame.
    2. Read Salesforce Wave Dataset - User has to provide SAQL to read data from Salesforce Wave. The query result will be constructed as DataFrame.
    3. Read Salesforce Object - User has to provide SOQL to read data from Salesforce object. The query result will be constructed as DataFrame.
    4. Update Salesforce Object - Salesforce object will be updated with the details present in DataFrame.
    note

    This connector is built on top of the already available spark-salesforce connector.

    To use this Gem in Prophecy, com.springml:spark-salesforce_2.12:1.1.4 Maven external dependency needs to be installed on cluster. For installing dependencies from Prophecy UI. Please check dependency management docs.

    Source

    Reads data from Salesforce object and wave Datasets.

    Source Parameters

    ParameterDescriptionRequired
    Dataset NameName of the DatasetTrue
    Credential TypeCredential Type: Databricks Secrets or Username & PasswordTrue
    CredentialsDatabricks credential name, else username and password for the snowflake accountRequired if Credential Type is Databricks Secrets
    UsernameSalesforce Wave Username. This user should have privilege to upload Datasets or execute SAQL or execute SOQL.Required if Credential Type is Username & Password
    PasswordSalesforce Wave Password. Please append security token along with password. For example, if a user’s password is mypassword, and the security token is XXXXXXXXXX, the user must provide mypasswordXXXXXXXXXXRequired if Credential Type is Username & Password
    Login Url(Optional) Salesforce Login URL. Default value https://login.salesforce.com.True
    Read from sourceStrategy to read data: SAQL or SOQL.True
    SAQL Query(Optional) SAQL query to used to query Salesforce Wave. Mandatory for reading Salesforce Wave Dataset
    SOQL Query(Optional) SOQL query to used to query Salesforce Object. Mandatory for reading Salesforce Object like Opportunity
    Version(Optional) Salesforce API Version. Default 35.0
    Infer Schema(Optional) Infer schema from the query results. Sample rows will be taken to find the datatype.
    Date Format(Optional) A string that indicates the format that follow java.text.SimpleDateFormat to use when reading timestamps.
    This applies to TimestampType. By default, it is null which means trying to parse timestamp by java.sql.Timestamp.valueOf().
    Result Variable(Optional) result variable used in SAQL query. To paginate SAQL queries this package will add the required offset and limit.
    For example, in this SAQL query q = load \"<Dataset_id>/<Dataset_version_id>\"; q = foreach q generate 'Name' as 'Name', 'Email' as 'Email'; q is the result variable.
    Page Size(Optional) Page size for each query to be executed against Salesforce Wave. Default value is 2000.
    This option can only be used if resultVariable is set.
    Bulk(Optional) Flag to enable bulk query. This is the preferred method when loading large sets of data. Salesforce will process batches in the background. Default value is false.
    PK Chunking(Optional) Flag to enable automatic primary key chunking for bulk query Job. This splits bulk queries into separate batches that of the size defined by chunkSize option. By default false and the default chunk size is 100,000.
    Chunk size(Optional) The size of the number of records to include in each batch. Default value is 100,000. This option can only be used when pkChunking is true. Maximum size is 250,000.
    Timeout(Optional) The maximum time spent polling for the completion of bulk query Job.
    This option can only be used when bulk is true.
    Max chars per column(Optional) The maximum length of a column. This option can only be used when bulk is true. Default value is 4096.
    Query All(Optional) Toggle to retrieve deleted and archived records for SOQL queries. Default value is false.
    info

    Steps to reset your Salesforce security token can be found at this link.

    Example

    Below is an example of fetching all leads from sales cloud using Prophecy IDE. We will be using SOQL query to query our leads Dataset on sales cloud.

    Generated Code

    def read_salesforce(spark: SparkSession) -> DataFrame:
    return spark.read\
    .format("com.springml.spark.salesforce")\
    .option("username", "your_salesforce_username")\
    .option("password", "your_salesforce_password_with_secutiry_token")\
    .option("soql", "select id, name, email from lead")\
    .load()


    Target

    Create/update Datasets and Salesforce objects.

    Target Parameters

    ParameterDescriptionRequired
    Dataset NameName of the DatasetTrue
    Credential TypeCredential Type: Databricks Secrets or Username & PasswordTrue
    CredentialsDatabricks credential name, else username and password for the snowflake accountRequired if Credential Type is Databricks Secrets
    UsernameSalesforce Wave Username. This user should have privilege to upload Datasets or execute SAQL or execute SOQL.Required if Credential Type is Username & Password
    PasswordSalesforce Wave Password. Please append security token along with password.For example, if a user’s password is mypassword, and the security token is XXXXXXXXXX, the user must provide mypasswordXXXXXXXXXXRequired if Credential Type is Username & Password
    Login Url(Optional) Salesforce Login URL. Default value https://login.salesforce.com.True
    Salesforce Dataset name(Optional) Name of the Dataset to be created in Salesforce Wave. Required for Dataset Creation.
    Salesforce object name(Optional) Salesforce Object to be updated. (e.g.) Contact. Mandatory if bulk is true.
    Metadata Config in JSON(Optional) Metadata configuration which will be used to construct [Salesforce Wave Dataset Metadata]
    (https://resources.docs.salesforce.com/sfdc/pdf/bi_dev_guide_ext_data_format.pdf). Metadata configuration has to be provided in JSON format.
    Upsert(Optional) Flag to upsert data to Salesforce. This performs an insert or update operation using the "externalIdFieldName" as the primary ID. Existing fields that are not in the DataFrame being pushed will not be updated. Default "false".
    External Id Field Name(Optional) The name of the field used as the external ID for Salesforce Object. This value is only used when doing an update or upsert. Default "Id".

    Generated Code

    def write_salesforce(spark: SparkSession, in0: DataFrame):
    in0.write.format("com.springml.spark.salesforce")\
    .option("username", "your_salesforce_username")\
    .option("password", "your_salesforce_password_with_secutiry_token")\
    .option("DatasetName", "your_Dataset_name")
    .save()
    - - + + \ No newline at end of file diff --git a/Spark/gems/source-target/warehouse/snowflake/index.html b/Spark/gems/source-target/warehouse/snowflake/index.html index 6bee403573..98e858524f 100644 --- a/Spark/gems/source-target/warehouse/snowflake/index.html +++ b/Spark/gems/source-target/warehouse/snowflake/index.html @@ -6,15 +6,14 @@ Snowflake | Prophecy - - - - + + +

    Snowflake

    Allows read and write operations on Snowflake.

    How to configure Key Pair Authentication on Snowflake?

    Please refer the snowflake official documentation to attach private key to user account: Configuring Key Pair Authentication.

    Source

    Source Parameters

    ParameterDescriptionRequired
    Dataset NameName of the DatasetTrue
    Credential TypeCredential Type: Databricks Secrets or Username & PasswordTrue
    CredentialsDatabricks credential name, else username and password for the snowflake accountRequired if Credential Type is Databricks Secrets
    UsernameLogin name for the Snowflake userRequired if Credential Type is Username & Password
    PasswordPassword for the Snowflake userRequired if Credential Type is Username & Password
    Private key filepathLocation of Private key filepath in PKCS8 formatRequired if Private Key Details is enabled
    Private key passphrasePassphrase of Private key fileRequired if private key file is passphrase enabled
    UrlHostname for your account in the format: <account_identifier>.snowflakecomputing.com.
    E.g.: https://DJ07623.ap-south-1.aws.snowflakecomputing.com
    True
    DatabaseDatabase to use for the session after connectingTrue
    SchemaSchema to use for the session after connectingTrue
    WarehouseDefault virtual warehouse to use for the session after connectingFalse
    RoleDefault security role to use for the session after connectingFalse
    Data SourceStrategy to read data: DB Table or SQL Query.True
    TableName of the table to be read. All columns and records are retrieved (i.e. it is equivalent to SELECT * FROM table).Required if Data Source is DB Table
    SQL QueryExact query (SELECT statement) to runRequired if Data Source is SQL Query

    Example

    Generated Code

    def sf_customer(spark: SparkSession) -> DataFrame:
    from pyspark.dbutils import DBUtils

    return spark.read\
    .format("snowflake")\
    .options(
    **{
    "sfUrl": "https://DJ07623.ap-south-1.aws.snowflakecomputing.com",
    "sfUser": "anshuman",
    "sfPassword": "*******",
    "sfDatabase": "SNOWFLAKE_SAMPLE_DATA",
    "sfSchema": "TPCDS_SF100TCL",
    "sfWarehouse": "COMPUTE_WH"
    }
    )\
    .option("dbtable", "CUSTOMER")\
    .load()

    Target

    Target Parameters

    ParameterDescriptionRequired
    Dataset NameName of the DatasetTrue
    Credential TypeCredential Type: Databricks Secrets or Username & PasswordTrue
    CredentialsDatabricks credential name, else username and password for the snowflake accountRequired if Credential Type is Databricks Secrets
    UsernameLogin name for the snowflake userRequired if Credential Type is Username & Password
    PasswordPassword for the snowflake userRequired if Credential Type is Username & Password
    Private key filepathLocation of Private key filepath in PKCS8 formatRequired if Private Key Details is enabled
    Private key passphrasePassphrase of Private key fileRequired if private key file is passphrase enabled
    UrlHostname for your account in the format: <account_identifier>.snowflakecomputing.com.
    E.g.: https://DJ07623.ap-south-1.aws.snowflakecomputing.com
    True
    DatabaseDatabase to use for the session after connectingTrue
    SchemaSchema to use for the session after connectingTrue
    WarehouseDefault virtual warehouse to use for the session after connectingFalse
    RoleDefault security role to use for the session after connectingFalse
    TableName of the table to which data is to be writtenTrue
    Write ModeHow to handle existing data. See this table for a list of available options.True
    Post-Script SQLDDL/DML SQL statements to execute before writing data.
    It is intended for statements that do not return a result set, for example DDL statements like CREATE TABLE and DML statements like INSERT, UPDATE, and DELETE.
    It is not useful for statements that return a result set, such as SELECT or SHOW.
    False

    Supported Write Modes

    Write ModeDescription
    overwriteIf data already exists, overwrite with the contents of the DataFrame.
    appendIf data already exists, append the contents of the DataFrame.
    ignoreIf data already exists, do nothing with the contents of the DataFrame. This is similar to a CREATE TABLE IF NOT EXISTS in SQL.
    errorIf data already exists, throw an exception.

    Example

    Generated Code

    def customer_snow_tg(spark: SparkSession, in0: DataFrame):
    from pyspark.dbutils import DBUtils
    options = {
    "sfUrl": "https://DJ07623.ap-south-1.aws.snowflakecomputing.com",
    "sfUser": "anshuman",
    "sfPassword": "******",
    "sfDatabase": "SNOWFLAKE_SAMPLE_DATA",
    "sfSchema": "TPCDS_SF100TCL",
    "sfWarehouse": "COMPUTE_WH"
    }
    writer = in0.write.format("snowflake").options(**options)
    writer = writer.option("dbtable", "CUSTOMERS")
    writer = writer.mode("overwrite")
    writer.save()
    - - + + \ No newline at end of file diff --git a/Spark/gems/source-target/warehouse/teradata/index.html b/Spark/gems/source-target/warehouse/teradata/index.html index 3c348b8462..9b032980bf 100644 --- a/Spark/gems/source-target/warehouse/teradata/index.html +++ b/Spark/gems/source-target/warehouse/teradata/index.html @@ -6,15 +6,14 @@ Teradata | Prophecy - - - - + + + - - + + \ No newline at end of file diff --git a/Spark/gems/subgraph/basic-subgraph/index.html b/Spark/gems/subgraph/basic-subgraph/index.html index 49519db4fc..9c9b051d3a 100644 --- a/Spark/gems/subgraph/basic-subgraph/index.html +++ b/Spark/gems/subgraph/basic-subgraph/index.html @@ -6,18 +6,17 @@ Basic subgraph | Prophecy - - - - + + +
    -

    Basic subgraph

    Spark Gem

    In a Basic Subgraph, you have the capability to encapsulate multiple distinct Gems within a single, reusable parent Gem. This organizational approach enhances the visual clarity of your Pipeline by grouping various sections together under a common parent Gem. Additionally, it empowers you to break down intricate logic into modular components, thereby streamlining the Data Engineering processes.

    Furthermore, the ability to Publish a Subgraph extends the utility beyond a singular Pipeline. By doing so, you can reuse the encapsulated logic in other Pipelines or Projects, promoting code re-usability and simplifying the overall development workflow.

    Create a Basic Subgraph

    To create a Basic Subgraph, drag and drop the (1) Basic Subgraph Gem from the Subgraph menu, and connect it to any previously created Gem on your canvas.

    Once you've added the Gem, click (2) Open, to open the subgraph canvas.

    create_basic_subgraph

    Once you are on the canvas, you can start adding Gems to your subgraph by dragging and dropping from the Gems menu.

    Add/Remove Port

    While using a Subgraph, you can control the Input and Output ports as per the requirements. Click on the (1) Add/Remove Part Button to open the port settings dialog as shown below.

    add_remove_port

    In this dialog, you can add or remove the Input or Output ports. +

    Basic subgraph

    Spark Gem

    In a Basic Subgraph, you have the capability to encapsulate multiple distinct Gems within a single, reusable parent Gem. This organizational approach enhances the visual clarity of your Pipeline by grouping various sections together under a common parent Gem. Additionally, it empowers you to break down intricate logic into modular components, thereby streamlining the Data Engineering processes.

    Furthermore, the ability to Publish a Subgraph extends the utility beyond a singular Pipeline. By doing so, you can reuse the encapsulated logic in other Pipelines or Projects, promoting code re-usability and simplifying the overall development workflow.

    Create a Basic Subgraph

    To create a Basic Subgraph, drag and drop the (1) Basic Subgraph Gem from the Subgraph menu, and connect it to any previously created Gem on your canvas.

    Once you've added the Gem, click (2) Open, to open the subgraph canvas.

    create_basic_subgraph

    Once you are on the canvas, you can start adding Gems to your subgraph by dragging and dropping from the Gems menu.

    Add/Remove Port

    While using a Subgraph, you can control the Input and Output ports as per the requirements. Click on the (1) Add/Remove Part Button to open the port settings dialog as shown below.

    add_remove_port

    In this dialog, you can add or remove the Input or Output ports. To Add an input Put click on (2) + button. This will add an input Port. To remove an Input port, Click the (3) Delete icon next to the input port you want to delete.

    Similarly, you can control the Output Ports by switching to the (4) Output Tab. As you connect the input and output ports to gems Outside the subgraph, you would be able to see the schema for the port reflected here.

    Subgraph Configurations

    Subgraphs are configurable just like Pipelines are configurable. For Subgraphs, the configurations can apply in one of two ways: (1) Pipeline level Configurations apply to the Subgraph or (2) Specify Configurations only for the Subgraph

    Pipeline Level Configurations can apply to the Subgraph

    Pipeline configs are accessible inside Subgraphs by copying the config to the Subgraph. Checkout the video below to see how this works.

    Specify configurations only for the Subgraph

    In Subgraph Configs, you can define values that can be set at the Subgraph level and then be accessed inside any component in the Subgraph. These will also reflect under Configurations of Pipelines using these Subgraphs, but they can only be edited from Subgraph configs.

    - - + + \ No newline at end of file diff --git a/Spark/gems/subgraph/index.html b/Spark/gems/subgraph/index.html index 16307ee4b6..47e243a678 100644 --- a/Spark/gems/subgraph/index.html +++ b/Spark/gems/subgraph/index.html @@ -6,10 +6,9 @@ Subgraph | Prophecy - - - - + + +
    @@ -18,7 +17,7 @@ Read here for more details of the Gem code. More detailed docs on writing Subgraph Gems to follow soon.

    The newly constructed Subgraph Gem can be utilized within any Pipeline of this Project, accessible through the Subgraph menu as demonstrated below. Furthermore, you have the option to Release this project, enabling its use as a dependency in other projects, thus incorporating the created Gem into various projects. Read here for more details on project as a dependency.

    Use_subgraph

    What's next

    To learn more about Spark Subgraphs, see the following pages:

    - - + + \ No newline at end of file diff --git a/Spark/gems/subgraph/table-iterator/index.html b/Spark/gems/subgraph/table-iterator/index.html index cbc8ca0c09..e1bb9dcae2 100644 --- a/Spark/gems/subgraph/table-iterator/index.html +++ b/Spark/gems/subgraph/table-iterator/index.html @@ -6,14 +6,13 @@ TableIterator | Prophecy - - - - + + +
    -

    TableIterator

    Spark Gem

    TableIterator allows you to iterate over one or more Gems for each row of the first input DataFrame. +

    TableIterator

    Spark Gem

    TableIterator allows you to iterate over one or more Gems for each row of the first input DataFrame. Let's see how to create a Basic Loop which loops over a Metadata Table, and for each row of the table will run the Gems inside the Subgraph.

    Creating a TableIterator Gem

    First add the Input Gem on which you want to Iterate over. For this, simply use an existing Dataset or create a new Source Gem pointing to your Metadata table. You can run this Source Gem to see the data your loop would be running for.

    Now, Drag and Drop the (1) TableIterator Gem from the Subgraph menu, and connect it to the above created Source Gem.

    Create_table_iterator

    Configure the TableIterator

    Open the TableIterator Gem, and click on (1) Configure to open the Settings dialog. Here, on the left side panel you can edit the (2) Name of your Gem, check the (3) Input Schema for your DataFrame on which the loop will iterate.

    On the right side, you can define your Iterator Settings, and any other Subgraph Configs you want to use in the Subgraph. @@ -25,7 +24,7 @@ You can click on the (3) Interim to check values for that Iteration.

    run_loop

    Click on the (2) Iteration button, and it will open up the Iterations table as shown below. Here you can see all iterations and config values for each of them.

    iterations

    Adding Inputs and Outputs to TableIterator

    For a TableIterator Gem, the first input port is for your DataFrame on which you want to Iterate Over. You can (1)Add more Inputs or Switch to (2) Output tab to add more Outputs as needed. These extra inputs would not change for every iteration. Also, the output will be a Union of output of all Iterations. You can (3) Delete any port by hovering over it.

    table_iterator_ports

    - - + + \ No newline at end of file diff --git a/Spark/gems/transform/aggregate/index.html b/Spark/gems/transform/aggregate/index.html index 1813dc1bac..44c7bb16ab 100644 --- a/Spark/gems/transform/aggregate/index.html +++ b/Spark/gems/transform/aggregate/index.html @@ -6,16 +6,15 @@ Aggregate | Prophecy - - - - + + +
    -

    Aggregate

    Spark Gem

    Allows you to group the data and apply aggregation methods and pivot operation.

    Parameters

    ParameterDescriptionRequired
    DataFrameInput DataFrameTrue
    Target column (Aggregate Tab)Output column name of aggregated columnTrue
    Expression (Aggregate Tab)Aggregate function expression
    Eg: sum("amount"), count(*), avg("amount")
    True
    Target column (Group By Tab)Output column name of grouped columnRequired if Pivot Column is present
    Expression (Group By Tab)Column expression to group on
    Eg: col("id"), month(col("order_date"))
    Required if a Target Column(Group By) is present
    Pivot columnColumn name to pivotFalse
    Unique valuesList of values in Pivot Column that will be translated to columns in the output DataFrameFalse
    Propagate All Input ColumnsIf true, all columns from the DataFrame would be propagated to output DataFrame. By default all columns apart from ones specified in group by, pivot, aggregate expressions are propagated as first(col_name) in the output DataFrameFalse
    info

    Providing Unique values while performing pivot operation improves the performance of the operation since Spark does not have to first compute the list of distinct values of Pivot Column internally.

    Examples

    Aggregation without Grouping

    Example usage of Aggregate - Aggregation without Grouping

    def total_orders(spark: SparkSession, in0: DataFrame) -> DataFrame:
    return in0.agg(count(lit(1)).alias("number_of_orders"))

    Aggregation with Grouping

    Example usage of Aggregate - Aggregation with Grouping

    def orders_by_date(spark: SparkSession, in0: DataFrame) -> DataFrame:
    df1 = in0.groupBy(concat(month(col("order_date")), lit("/"), year(col("order_date")))
    .alias("order_month(MM/YYYY)"))
    return df1.agg(count(lit(1)).alias("number_of_orders"))

    Pivot Columns

    Example usage of Aggregate - Pivoting

    def orders_by_date_N_status(spark: SparkSession, in0: DataFrame) -> DataFrame:
    df1 = in0.groupBy(concat(month(col("order_date")), lit("/"), year(col("order_date"))).alias("order_month(MM/YYYY)"))
    df2 = df1.pivot("order_status", ["Approved", "Finished", "Pending", "Started"])
    return df2.agg(count(lit(1)).alias("number_of_orders"))

    Propagate all input Columns

    This option in used to propagate all columns from input DataFrame to output DataFrame. +

    Aggregate

    Spark Gem

    Allows you to group the data and apply aggregation methods and pivot operation.

    Parameters

    ParameterDescriptionRequired
    DataFrameInput DataFrameTrue
    Target column (Aggregate Tab)Output column name of aggregated columnTrue
    Expression (Aggregate Tab)Aggregate function expression
    Eg: sum("amount"), count(*), avg("amount")
    True
    Target column (Group By Tab)Output column name of grouped columnRequired if Pivot Column is present
    Expression (Group By Tab)Column expression to group on
    Eg: col("id"), month(col("order_date"))
    Required if a Target Column(Group By) is present
    Pivot columnColumn name to pivotFalse
    Unique valuesList of values in Pivot Column that will be translated to columns in the output DataFrameFalse
    Propagate All Input ColumnsIf true, all columns from the DataFrame would be propagated to output DataFrame. By default all columns apart from ones specified in group by, pivot, aggregate expressions are propagated as first(col_name) in the output DataFrameFalse
    info

    Providing Unique values while performing pivot operation improves the performance of the operation since Spark does not have to first compute the list of distinct values of Pivot Column internally.

    Examples

    Aggregation without Grouping

    Example usage of Aggregate - Aggregation without Grouping

    def total_orders(spark: SparkSession, in0: DataFrame) -> DataFrame:
    return in0.agg(count(lit(1)).alias("number_of_orders"))

    Aggregation with Grouping

    Example usage of Aggregate - Aggregation with Grouping

    def orders_by_date(spark: SparkSession, in0: DataFrame) -> DataFrame:
    df1 = in0.groupBy(concat(month(col("order_date")), lit("/"), year(col("order_date")))
    .alias("order_month(MM/YYYY)"))
    return df1.agg(count(lit(1)).alias("number_of_orders"))

    Pivot Columns

    Example usage of Aggregate - Pivoting

    def orders_by_date_N_status(spark: SparkSession, in0: DataFrame) -> DataFrame:
    df1 = in0.groupBy(concat(month(col("order_date")), lit("/"), year(col("order_date"))).alias("order_month(MM/YYYY)"))
    df2 = df1.pivot("order_status", ["Approved", "Finished", "Pending", "Started"])
    return df2.agg(count(lit(1)).alias("number_of_orders"))

    Propagate all input Columns

    This option in used to propagate all columns from input DataFrame to output DataFrame. By default first(col_name) is used as aggregate function for columns not specified in group by, pivot, aggregate expressions.

    def Aggregate_1(spark: SparkSession, in0: DataFrame) -> DataFrame:
    df1 = in0.groupBy(col("customer_id"))

    return df1.agg(
    *[first(col("order_date")).alias("order_date")],
    *[
    first(col(x)).alias(x)
    for x in in0.columns
    if x not in ["order_date", "customer_id"]
    ]
    )
    - - + + \ No newline at end of file diff --git a/Spark/gems/transform/bulk-column-expressions/index.html b/Spark/gems/transform/bulk-column-expressions/index.html index 5c06b6d7fe..7dbb67590b 100644 --- a/Spark/gems/transform/bulk-column-expressions/index.html +++ b/Spark/gems/transform/bulk-column-expressions/index.html @@ -6,15 +6,14 @@ BulkColumnExpressions | Prophecy - - - - + + +
    -

    BulkColumnExpressions

    Spark Gem

    The BulkColumnExpressions Gem primarily lets you cast or change the data type of multiple columns at once. It provides additional functionality, including:

    • Adding a prefix or suffix to selected columns.
    • Applying a custom expression to selected columns.

    Parameters

    ParameterDescription
    Data Type of the columns to do operations onThe data type of columns to select.
    Selected ColumnsThe columns on which to apply transformations.
    Change output column nameAn option to add a prefix or suffix to the selected column names.
    Change output column typeThe data type that the columns will be transformed into.
    Output ExpressionA Spark SQL expression that can be applied to the selected columns. This field is required. If you only want to select the column, use column_value as the expression.

    Example

    Assume you have some columns in a table that represent zero-based indices and are stored as long data types. You want them to represent one-based indices and be stored as integers to optimize memory use.

    Using the BulkColumnExpressions Gem, you can:

    • Filter your columns by long data types.
    • Select the columns you wish to transform.
    • Cast the output column(s) to be integers.
    • Include column_value + 1 in the expression field to shift the indices.
    - - +

    BulkColumnExpressions

    Spark Gem

    The BulkColumnExpressions Gem primarily lets you cast or change the data type of multiple columns at once. It provides additional functionality, including:

    • Adding a prefix or suffix to selected columns.
    • Applying a custom expression to selected columns.

    Parameters

    ParameterDescription
    Data Type of the columns to do operations onThe data type of columns to select.
    Selected ColumnsThe columns on which to apply transformations.
    Change output column nameAn option to add a prefix or suffix to the selected column names.
    Change output column typeThe data type that the columns will be transformed into.
    Output ExpressionA Spark SQL expression that can be applied to the selected columns. This field is required. If you only want to select the column, use column_value as the expression.

    Example

    Assume you have some columns in a table that represent zero-based indices and are stored as long data types. You want them to represent one-based indices and be stored as integers to optimize memory use.

    Using the BulkColumnExpressions Gem, you can:

    • Filter your columns by long data types.
    • Select the columns you wish to transform.
    • Cast the output column(s) to be integers.
    • Include column_value + 1 in the expression field to shift the indices.
    + + \ No newline at end of file diff --git a/Spark/gems/transform/bulk-column-rename/index.html b/Spark/gems/transform/bulk-column-rename/index.html index 198d8fb562..3a7e9a6996 100644 --- a/Spark/gems/transform/bulk-column-rename/index.html +++ b/Spark/gems/transform/bulk-column-rename/index.html @@ -6,15 +6,14 @@ BulkColumnRename | Prophecy - - - - + + +
    -

    BulkColumnRename

    Spark Gem

    Use the BulkColumnRename Gem to rename multiple columns in your Dataset in a systematic way.

    Parameters

    ParameterDescription
    Columns to renameSelect one or more columns to rename from the dropdown.
    MethodChoose to add a prefix, add a suffix, or use a custom expression to change column names.

    Based on the method you select, you will see an option to enter the prefix, suffix, or expression of your choice.

    Examples

    Add a prefix

    One example is to add the prefix meta_ to tag columns that contain metadata.

    Add prefix to multiple columns

    Use a custom expression

    You can accomplish the same or more complex changes using a custom expression like concat('meta_', column_name).

    - - +

    BulkColumnRename

    Spark Gem

    Use the BulkColumnRename Gem to rename multiple columns in your Dataset in a systematic way.

    Parameters

    ParameterDescription
    Columns to renameSelect one or more columns to rename from the dropdown.
    MethodChoose to add a prefix, add a suffix, or use a custom expression to change column names.

    Based on the method you select, you will see an option to enter the prefix, suffix, or expression of your choice.

    Examples

    Add a prefix

    One example is to add the prefix meta_ to tag columns that contain metadata.

    Add prefix to multiple columns

    Use a custom expression

    You can accomplish the same or more complex changes using a custom expression like concat('meta_', column_name).

    + + \ No newline at end of file diff --git a/Spark/gems/transform/data-cleansing/index.html b/Spark/gems/transform/data-cleansing/index.html index 991e44948d..6ffa860617 100644 --- a/Spark/gems/transform/data-cleansing/index.html +++ b/Spark/gems/transform/data-cleansing/index.html @@ -6,15 +6,14 @@ DataCleansing | Prophecy - - - - + + +
    -

    DataCleansing

    Spark Gem

    Use the DataCleansing Gem to standardize data formats and address missing or null values in the data.

    Parameters

    ParameterDescription
    Select columns you want to cleanThe set of columns on which to perform cleaning transformations
    Remove null dataThe method used to remove null data
    Replace null values in columnThe method used to replace null values
    Clean dataDifferent ways to standardize the format of data in columns

    Example

    Assume you have a table that includes customer feedback on individual orders. In this scenario, some customers may not provide feedback, resulting in null values in the data. You can use the DataCleansing Gem to replace null values with the string NA.

    Replace null with string

    - - +

    DataCleansing

    Spark Gem

    Use the DataCleansing Gem to standardize data formats and address missing or null values in the data.

    Parameters

    ParameterDescription
    Select columns you want to cleanThe set of columns on which to perform cleaning transformations
    Remove null dataThe method used to remove null data
    Replace null values in columnThe method used to replace null values
    Clean dataDifferent ways to standardize the format of data in columns

    Example

    Assume you have a table that includes customer feedback on individual orders. In this scenario, some customers may not provide feedback, resulting in null values in the data. You can use the DataCleansing Gem to replace null values with the string NA.

    Replace null with string

    + + \ No newline at end of file diff --git a/Spark/gems/transform/deduplicate/index.html b/Spark/gems/transform/deduplicate/index.html index e7aab6742f..52151f7f00 100644 --- a/Spark/gems/transform/deduplicate/index.html +++ b/Spark/gems/transform/deduplicate/index.html @@ -6,15 +6,14 @@ Deduplicate | Prophecy - - - - + + +
    -

    Deduplicate

    Spark Gem

    Removes rows with duplicate values of specified columns.

    Parameters

    ParameterDescriptionRequired
    DataframeInput dataframeTrue
    Row to keep- Any: Keeps any one row among duplicates. Uses underlying dropDuplicates construct
    - First: Keeps first occurrence of the duplicate row
    - Last: Keeps last occurrence of the duplicate row
    - Unique Only: Keeps rows that don't have duplicates
    - Distinct Rows: Keeps all distinct rows. This is equivalent to performing a df.distinct() operation
    Default is Any
    True
    Deduplicate columnsColumns to consider while removing duplicate rows (not required for Distinct Rows)True
    Order columnsColumns to sort Dataframe on before de-duping in case of First and Last rows to keepFalse

    Examples


    Rows to keep - Any

    Example usage of Deduplicate

    def dedup(spark: SparkSession, in0: DataFrame) -> DataFrame:
    return in0.dropDuplicates(["tran_id"])

    Rows to keep - First

    Example usage of Deduplicate - First

    def earliest_cust_order(spark: SparkSession, in0: DataFrame) -> DataFrame:
    return in0\
    .withColumn(
    "row_number",
    row_number()\
    .over(Window\
    .partitionBy("customer_id")\
    .orderBy(col("order_dt").asc())
    )\
    .filter(col("row_number") == lit(1))\
    .drop("row_number")

    Rows to keep - Last

    Example usage of Deduplicate - Last

    def latest_cust_order(spark: SparkSession, in0: DataFrame) -> DataFrame:
    return in0\
    .withColumn(
    "row_number",
    row_number()\
    .over(Window\
    .partitionBy("customer_id")\
    .orderBy(col("order_dt").asc())
    )\
    .withColumn(
    "count",
    count("*")\
    .over(Window\
    .partitionBy("customer_id")
    )\
    .filter(col("row_number") == col("count"))\
    .drop("row_number")\
    .drop("count")

    Rows to keep - Unique Only

    Example usage of Deduplicate - Unique

    def single_order_customers(spark: SparkSession, in0: DataFrame) -> DataFrame:
    return in0\
    .withColumn(
    "count",
    count("*")\
    .over(Window\
    .partitionBy("customer_id")
    )\
    .filter(col("count") == lit(1))\
    .drop("count")

    Rows to keep - Distinct Rows

    Example usage of Deduplicate - Distinct

    def single_order_customers(spark: SparkSession, in0: DataFrame) -> DataFrame:
    return in0.distinct()
    - - +

    Deduplicate

    Spark Gem

    Removes rows with duplicate values of specified columns.

    Parameters

    ParameterDescriptionRequired
    DataframeInput dataframeTrue
    Row to keep- Any: Keeps any one row among duplicates. Uses underlying dropDuplicates construct
    - First: Keeps first occurrence of the duplicate row
    - Last: Keeps last occurrence of the duplicate row
    - Unique Only: Keeps rows that don't have duplicates
    - Distinct Rows: Keeps all distinct rows. This is equivalent to performing a df.distinct() operation
    Default is Any
    True
    Deduplicate columnsColumns to consider while removing duplicate rows (not required for Distinct Rows)True
    Order columnsColumns to sort Dataframe on before de-duping in case of First and Last rows to keepFalse

    Examples


    Rows to keep - Any

    Example usage of Deduplicate

    def dedup(spark: SparkSession, in0: DataFrame) -> DataFrame:
    return in0.dropDuplicates(["tran_id"])

    Rows to keep - First

    Example usage of Deduplicate - First

    def earliest_cust_order(spark: SparkSession, in0: DataFrame) -> DataFrame:
    return in0\
    .withColumn(
    "row_number",
    row_number()\
    .over(Window\
    .partitionBy("customer_id")\
    .orderBy(col("order_dt").asc())
    )\
    .filter(col("row_number") == lit(1))\
    .drop("row_number")

    Rows to keep - Last

    Example usage of Deduplicate - Last

    def latest_cust_order(spark: SparkSession, in0: DataFrame) -> DataFrame:
    return in0\
    .withColumn(
    "row_number",
    row_number()\
    .over(Window\
    .partitionBy("customer_id")\
    .orderBy(col("order_dt").asc())
    )\
    .withColumn(
    "count",
    count("*")\
    .over(Window\
    .partitionBy("customer_id")
    )\
    .filter(col("row_number") == col("count"))\
    .drop("row_number")\
    .drop("count")

    Rows to keep - Unique Only

    Example usage of Deduplicate - Unique

    def single_order_customers(spark: SparkSession, in0: DataFrame) -> DataFrame:
    return in0\
    .withColumn(
    "count",
    count("*")\
    .over(Window\
    .partitionBy("customer_id")
    )\
    .filter(col("count") == lit(1))\
    .drop("count")

    Rows to keep - Distinct Rows

    Example usage of Deduplicate - Distinct

    def single_order_customers(spark: SparkSession, in0: DataFrame) -> DataFrame:
    return in0.distinct()
    + + \ No newline at end of file diff --git a/Spark/gems/transform/dynamic-select/index.html b/Spark/gems/transform/dynamic-select/index.html index a77dc89042..db83f9029a 100644 --- a/Spark/gems/transform/dynamic-select/index.html +++ b/Spark/gems/transform/dynamic-select/index.html @@ -6,15 +6,14 @@ DynamicSelect | Prophecy - - - - + + +
    -

    DynamicSelect

    Spark Gem

    Use the DynamicSelect Gem to dynamically filter columns of your Dataset based on a set of conditions.

    Configuration

    There are two ways to configure the DynamicSelect.

    ConfigurationDescription
    Select field typesChoose one or more types of columns to keep in the Dataset, such as string, decimal, or date.
    Select via expressionCreate an expression that limits the type of columns to keep in the Dataset.

    Examples

    You’ll use DynamicSelect when you want to avoid hard-coding your choice of columns. In other words, rather than define each column to keep in your Pipeline, you let the system automatically choose the columns based on certain conditions or rules.

    Remove date columns using field type

    Assume you would like to remove irrelevant date and timestamp columns from your Dataset. You can do so with the Select field types method by selecting all field types to maintain, except for date and timestamp.

    Keep all columns except Date and Timestamp column using the visual interface

    Remove date columns with an expression

    Using the same example, you can accomplish the same task with the Select via expression method by inputting the the expression column_type NOT IN ('date', 'timestamp').

    - - +

    DynamicSelect

    Spark Gem

    Use the DynamicSelect Gem to dynamically filter columns of your Dataset based on a set of conditions.

    Configuration

    There are two ways to configure the DynamicSelect.

    ConfigurationDescription
    Select field typesChoose one or more types of columns to keep in the Dataset, such as string, decimal, or date.
    Select via expressionCreate an expression that limits the type of columns to keep in the Dataset.

    Examples

    You’ll use DynamicSelect when you want to avoid hard-coding your choice of columns. In other words, rather than define each column to keep in your Pipeline, you let the system automatically choose the columns based on certain conditions or rules.

    Remove date columns using field type

    Assume you would like to remove irrelevant date and timestamp columns from your Dataset. You can do so with the Select field types method by selecting all field types to maintain, except for date and timestamp.

    Keep all columns except Date and Timestamp column using the visual interface

    Remove date columns with an expression

    Using the same example, you can accomplish the same task with the Select via expression method by inputting the the expression column_type NOT IN ('date', 'timestamp').

    + + \ No newline at end of file diff --git a/Spark/gems/transform/filter/index.html b/Spark/gems/transform/filter/index.html index 364f6563a6..1a71fda65f 100644 --- a/Spark/gems/transform/filter/index.html +++ b/Spark/gems/transform/filter/index.html @@ -6,15 +6,14 @@ Filter | Prophecy - - - - + + +
    -

    Filter

    Spark Gem

    Filters DataFrame based on the provided filter condition

    Parameters

    ParameterDescriptionRequired
    DataFrameInput DataFrame on which the filter condition will be applied.True
    Filter ConditionBooleanType column or boolean expression. Supports SQL, Python and Scala expressions.True

    Example

    Example usage of Filter

    Spark Code

    def Filter_Orders(spark: SparkSession, in0: DataFrame) -> DataFrame:
    return in0.filter(
    (
    ((col("order_category") == lit("Marketing"))
    & ((col("order_status") == lit("Finished")) | (col("order_status") == lit("Approved"))))
    & ~ col("is_discounted")
    )
    )
    - - +

    Filter

    Spark Gem

    Filters DataFrame based on the provided filter condition

    Parameters

    ParameterDescriptionRequired
    DataFrameInput DataFrame on which the filter condition will be applied.True
    Filter ConditionBooleanType column or boolean expression. Supports SQL, Python and Scala expressions.True

    Example

    Example usage of Filter

    Spark Code

    def Filter_Orders(spark: SparkSession, in0: DataFrame) -> DataFrame:
    return in0.filter(
    (
    ((col("order_category") == lit("Marketing"))
    & ((col("order_status") == lit("Finished")) | (col("order_status") == lit("Approved"))))
    & ~ col("is_discounted")
    )
    )
    + + \ No newline at end of file diff --git a/Spark/gems/transform/flatten-schema/index.html b/Spark/gems/transform/flatten-schema/index.html index 7fae4fe94e..1efeb3e962 100644 --- a/Spark/gems/transform/flatten-schema/index.html +++ b/Spark/gems/transform/flatten-schema/index.html @@ -6,15 +6,14 @@ FlattenSchema | Prophecy - - - - + + +
    -

    FlattenSchema

    Spark Gem

    When processing raw data it can be useful to flatten complex data types like structures and arrays into simpler, flatter schemas.

    The FlattenSchema gem

    The Input

    FlattenSchema works on DataFrames that have nested columns that you'd like to extract into a flat schema.

    For example, with an input schema like so:

    Input schema

    And the data looks like so:

    Input data

    We want to extract count from result and all of the columns from events into a flattened schema.

    The Expressions

    Having added a FlattenSchema Gem to your Pipeline, all you need to do is click the column names you wish to extract and they'll be added to the Expressions section. Then, you can change the values in the Target Column to change the name of output columns.

    Adding Expressions

    The Columns Delimiter dropdown allows you to control how the names of the new columns are derived. Currently dashes and underscores are supported.

    The Output

    If we check the Output tab in the Gem, you'll see the schema that we've created using the selected columns.

    Output schema

    And here's what the output data looks like:

    Output interim

    No more nested structures!

    info

    For more advanced use cases, the Spark explode function is available to use in the Reformat Gem, Custom SQL Gem, or anywhere else that accepts Spark expressions.

    - - +

    FlattenSchema

    Spark Gem

    When processing raw data it can be useful to flatten complex data types like structures and arrays into simpler, flatter schemas.

    The FlattenSchema gem

    The Input

    FlattenSchema works on DataFrames that have nested columns that you'd like to extract into a flat schema.

    For example, with an input schema like so:

    Input schema

    And the data looks like so:

    Input data

    We want to extract count from result and all of the columns from events into a flattened schema.

    The Expressions

    Having added a FlattenSchema Gem to your Pipeline, all you need to do is click the column names you wish to extract and they'll be added to the Expressions section. Then, you can change the values in the Target Column to change the name of output columns.

    Adding Expressions

    The Columns Delimiter dropdown allows you to control how the names of the new columns are derived. Currently dashes and underscores are supported.

    The Output

    If we check the Output tab in the Gem, you'll see the schema that we've created using the selected columns.

    Output schema

    And here's what the output data looks like:

    Output interim

    No more nested structures!

    info

    For more advanced use cases, the Spark explode function is available to use in the Reformat Gem, Custom SQL Gem, or anywhere else that accepts Spark expressions.

    + + \ No newline at end of file diff --git a/Spark/gems/transform/index.html b/Spark/gems/transform/index.html index 738906145d..d2ccf4946d 100644 --- a/Spark/gems/transform/index.html +++ b/Spark/gems/transform/index.html @@ -6,15 +6,14 @@ Transform | Prophecy - - - - + + +
    - - + + \ No newline at end of file diff --git a/Spark/gems/transform/limit/index.html b/Spark/gems/transform/limit/index.html index 69d0614536..f903cedb1c 100644 --- a/Spark/gems/transform/limit/index.html +++ b/Spark/gems/transform/limit/index.html @@ -6,15 +6,14 @@ Limit | Prophecy - - - - + + + - - +
    + + \ No newline at end of file diff --git a/Spark/gems/transform/order-by/index.html b/Spark/gems/transform/order-by/index.html index 996a0e9b55..c209824530 100644 --- a/Spark/gems/transform/order-by/index.html +++ b/Spark/gems/transform/order-by/index.html @@ -6,15 +6,14 @@ OrderBy | Prophecy - - - - + + +
    -

    OrderBy

    Spark Gem

    Sorts a DataFrame on one or more columns in ascending or descending order.

    Parameters

    ParameterDescriptionRequired
    DataFrameInput DataFrame to be sortedTrue
    Order columnsColumns to sort DataFrame byTrue
    SortOrder of sorting - ascending or descendingTrue

    Example

    Example usage of OrderBy

    Spark Code

    def Sort(spark: SparkSession, in0: DataFrame) -> DataFrame:
    return in0.orderBy(col("name").asc(), col("updated_at").desc())
    - - +

    OrderBy

    Spark Gem

    Sorts a DataFrame on one or more columns in ascending or descending order.

    Parameters

    ParameterDescriptionRequired
    DataFrameInput DataFrame to be sortedTrue
    Order columnsColumns to sort DataFrame byTrue
    SortOrder of sorting - ascending or descendingTrue

    Example

    Example usage of OrderBy

    Spark Code

    def Sort(spark: SparkSession, in0: DataFrame) -> DataFrame:
    return in0.orderBy(col("name").asc(), col("updated_at").desc())
    + + \ No newline at end of file diff --git a/Spark/gems/transform/reformat/index.html b/Spark/gems/transform/reformat/index.html index 7cb27bb1f9..e61765e95a 100644 --- a/Spark/gems/transform/reformat/index.html +++ b/Spark/gems/transform/reformat/index.html @@ -6,15 +6,14 @@ Reformat | Prophecy - - - - + + +
    -

    Reformat

    Spark Gem

    Transforms one or more column names or values by using expressions and/or functions. It's useful when we need to extract only the required columns or make changes column-wise.

    Parameters

    ParameterDescriptionRequired
    DataFrameInput DataFrame on which changes are requiredTrue
    Target columnOutput column nameFalse
    ExpressionExpression to compute target columnRequired if a Target column is present
    info

    If no columns are selected, then all columns are passed through to the output

    Example

    Example usage of Reformat

    Spark Code

    Reformat converts to a SQL Select or in relational terms into a projection, unlike SchemaTransform Gem which uses underlying withColumn construct

    def Reformat(spark: SparkSession, in0: DataFrame) -> DataFrame:
    return in0.select(
    col("id"),
    col("email").alias("email_address"),
    col("name"),
    col("updated_at"),
    concat_ws("$$$", col("address_line1"), col("address_line2"), col("postal_code"))
    .alias("address_string")
    )

    Advanced Import

    The Advanced Import feature allows you to bulk import statements that are structured similarly to CSV/TSV files. This can be useful if you have your expressions/transformation logic in another format and just want to quickly configure a Reformat Gem based on this logic.

    Using Advanced Import

    1. Click the Advanced button in the `Reformat UI

    Advanced import toggle

    1. Enter the expressions into the text area using the format as described below:

    Advanced import mode

    1. Use the button at the top (labeled Expressions) to switch back to the expressions view. This will translate the expressions from the CSV format to the table format and will show any errors detected.

    Format

    The format of these expressions is target_name,target_expr, where target_name is the desired new column name and target_expr is the Spark expression that will be used to generate the new column.

    caution

    For target_expr values that contain a comma , or span multiple lines, you must surround them by `` on either side. For example:

    customer_id,customer_id
    full_name,``concat(first_name, ' ', last_name)``
    - - +

    Reformat

    Spark Gem

    Transforms one or more column names or values by using expressions and/or functions. It's useful when we need to extract only the required columns or make changes column-wise.

    Parameters

    ParameterDescriptionRequired
    DataFrameInput DataFrame on which changes are requiredTrue
    Target columnOutput column nameFalse
    ExpressionExpression to compute target columnRequired if a Target column is present
    info

    If no columns are selected, then all columns are passed through to the output

    Example

    Example usage of Reformat

    Spark Code

    Reformat converts to a SQL Select or in relational terms into a projection, unlike SchemaTransform Gem which uses underlying withColumn construct

    def Reformat(spark: SparkSession, in0: DataFrame) -> DataFrame:
    return in0.select(
    col("id"),
    col("email").alias("email_address"),
    col("name"),
    col("updated_at"),
    concat_ws("$$$", col("address_line1"), col("address_line2"), col("postal_code"))
    .alias("address_string")
    )

    Advanced Import

    The Advanced Import feature allows you to bulk import statements that are structured similarly to CSV/TSV files. This can be useful if you have your expressions/transformation logic in another format and just want to quickly configure a Reformat Gem based on this logic.

    Using Advanced Import

    1. Click the Advanced button in the `Reformat UI

    Advanced import toggle

    1. Enter the expressions into the text area using the format as described below:

    Advanced import mode

    1. Use the button at the top (labeled Expressions) to switch back to the expressions view. This will translate the expressions from the CSV format to the table format and will show any errors detected.

    Format

    The format of these expressions is target_name,target_expr, where target_name is the desired new column name and target_expr is the Spark expression that will be used to generate the new column.

    caution

    For target_expr values that contain a comma , or span multiple lines, you must surround them by `` on either side. For example:

    customer_id,customer_id
    full_name,``concat(first_name, ' ', last_name)``
    + + \ No newline at end of file diff --git a/Spark/gems/transform/schema-transform/index.html b/Spark/gems/transform/schema-transform/index.html index 5bb204ba61..d1c5501348 100644 --- a/Spark/gems/transform/schema-transform/index.html +++ b/Spark/gems/transform/schema-transform/index.html @@ -6,16 +6,15 @@ SchemaTransform | Prophecy - - - - + + +
    -

    SchemaTransform

    Spark Gem

    SchemaTransform is used to add, edit, rename or drop columns from the incoming DataFrame.

    info

    Unlike Reformat which is a set operation where all the transforms are applied in parallel, transformations here are applied in order. +

    SchemaTransform

    Spark Gem

    SchemaTransform is used to add, edit, rename or drop columns from the incoming DataFrame.

    info

    Unlike Reformat which is a set operation where all the transforms are applied in parallel, transformations here are applied in order. Reformat is a SQL select and is preferable when making many changes.

    Parameters

    ParameterDescriptionRequired
    DataFrameInput DataFrameTrue
    OperationAdd/Replace Column, Rename Column and Drop ColumnRequired if a transformation is added
    New ColumnOutput column name (when Add/Replace operation is selected)Required if Add/Replace Column is selected
    ExpressionExpression to generate new column (when Add/Replace operation is selected)Required if Add/Replace Column is selected
    Old Column NameColumn to be renamed (when Rename operation is selected)Required if Rename Column is selected
    New Column NameOutput column name (when Rename operation is selected)Required if Rename Column is selected
    Column to dropColumn to be dropped (when Drop operation is selected)Required if Drop Column is selected

    Operation types

    Operation TypeDescription
    Add/ReplaceAdd a new column or replace an existing one based on an expression
    DropRemoves a single column from the next stages of the pipeline. This is useful if you need 9 out of 10 columns, for example.
    RenameRenames an existing column
    Add if MissingProvide a default value for a column if it's missing from the source. For example, if reading from a CSV file daily and want to ensure a column has a value even if it's not in the source files use this option.

    Example

    Example usage of SchemaTransform

    Spark Code

    def transform(spark: SparkSession, in0: DataFrame) -> DataFrame:
    return in0\
    .withColumn("business_date", to_date(lit("2022-05-05"), "yyyy-MM-dd"))\
    .withColumnRenamed("bonus_rate", "bonus")\
    .drop("slug")

    Advanced Import

    The Advanced Import feature allows you to bulk import statements that are structured similarly to CSV/TSV files. This can be useful if you have your expressions/transformation logic in another format and just want to quickly configure a SchemaTransform Gem based on existing logic.

    Using Advanced Import

    1. Click the Advanced button in the SchemaTransform Gem UI

    Advanced import toggle

    1. Enter the expressions into the text area using the format as described below:

    Advanced import mode

    1. Use the button at the top (labeled Expressions) to switch back to the expressions view. This will translate the expressions from the CSV format to the table format and will show any errors detected.

    Format

    The format of these expressions is op_type,target_name,target_expr, where op_type is the type of operation (see below); target_name is the desired new column name and target_expr is the Spark expression that will be used to generate the new column. Each op_type has a different number of extra columns that have to be provided, see below for more details.

    caution

    For target_expr values that contain a comma , or span multiple lines, you must surround them by `` on either side. For example:

    addrep,customer_id,customer_id
    addrep,full_name,``concat(first_name, ' ', last_name)``

    Advanced Import Operation types

    Operation TypeAdvanced Import name:Arguments:Example
    Add/Replaceaddrep2addrep,foo,CAST(NULL as int)
    Dropdrop1drop bar
    Renamerename2rename,foo,bar
    Add if missingmissing2missing,foo,current_timestamp()
    - - + + \ No newline at end of file diff --git a/Spark/gems/transform/set-operation/index.html b/Spark/gems/transform/set-operation/index.html index 838aa32b5c..d9dde68150 100644 --- a/Spark/gems/transform/set-operation/index.html +++ b/Spark/gems/transform/set-operation/index.html @@ -6,16 +6,15 @@ SetOperation | Prophecy - - - - + + +
    -

    SetOperation

    Spark Gem

    Use the SetOperation Gem to perform addition or subtraction of rows from DataFrames with identical schemas and different data.

    Parameters

    ParameterDescriptionRequired
    DataFrame 1First input DataFrameTrue
    DataFrame 2Second input DataFrameTrue
    DataFrame NNth input DataFrameFalse
    Operation typeOperation to perform
    - Union: Returns a DataFrame containing rows in any one of the input DataFrames, while preserving duplicates.
    - Intersect All: Returns a DataFrame containing rows in all of the input DataFrames, while preserving duplicates.
    - Except All: Returns a DataFrames containing rows in the first DataFrame, but not in the other DataFrames, while preserving duplicates.
    True
    info

    To add more input DataFrames, simply click + icon on the left sidebar +

    SetOperation

    Spark Gem

    Use the SetOperation Gem to perform addition or subtraction of rows from DataFrames with identical schemas and different data.

    Parameters

    ParameterDescriptionRequired
    DataFrame 1First input DataFrameTrue
    DataFrame 2Second input DataFrameTrue
    DataFrame NNth input DataFrameFalse
    Operation typeOperation to perform
    - Union: Returns a DataFrame containing rows in any one of the input DataFrames, while preserving duplicates.
    - Intersect All: Returns a DataFrame containing rows in all of the input DataFrames, while preserving duplicates.
    - Except All: Returns a DataFrames containing rows in the first DataFrame, but not in the other DataFrames, while preserving duplicates.
    True
    info

    To add more input DataFrames, simply click + icon on the left sidebar Set Operation - Add input dataframe

    Examples


    Operation Type - Union

    Example usage of Set Operation - Union

    def union(spark: SparkSession, in0: DataFrame, in1: DataFrame, ) -> DataFrame:
    return in0.unionAll(in1)

    Operation Type - Intersect All

    Example usage of Set Operation - Intersect All

    def intersectAll(spark: SparkSession, in0: DataFrame, in1: DataFrame, ) -> DataFrame:
    return in0.intersectAll(in1)

    Operation Type - Except All

    Example usage of Set Operation - Except All

    def exceptAll(spark: SparkSession, in0: DataFrame, in1: DataFrame, ) -> DataFrame:
    return in0.exceptAll(in1)
    - - + + \ No newline at end of file diff --git a/Spark/gems/transform/unpivot/index.html b/Spark/gems/transform/unpivot/index.html index c7c5cf6bd2..d6c05c588a 100644 --- a/Spark/gems/transform/unpivot/index.html +++ b/Spark/gems/transform/unpivot/index.html @@ -6,15 +6,14 @@ Unpivot | Prophecy - - - - + + +
    -

    Unpivot

    Spark Gem

    Use the Unpivot Gem to transform your data from a wide format to a long format.

    note

    If you want to pivot the data, rather than unpivot, use the Aggregate Gem.

    Parameters

    ParameterDescription
    Column(s) to use as identifiersThe column(s) that will identify to which group or entity the observation corresponds to.
    Columns to unpivotThe columns (wide format) that you would like to transform into a single column (long format).
    Variable column nameThe name of the column that contains the names of the unpivoted columns. This helps describe the values in the value column.
    Value column nameThe name of the column that will contain the values from the unpivoted columns.

    Example

    Transforming your data into a long format can be beneficial when creating visualizations, comparing variables, handling dynamic data, and more.

    Let's think about a time series example. If you have product sales data in a wide format, you may want to transform it into a long format before modeling the time series and analyzing the seasonal patterns in sales.

    The image below shows sample input and output tables for this scenario.

    Wide and long formats of time series data

    This table describes how this transformation was achieved:

    ParameterInput
    Column(s) to use as identifiersThe Product column is the identifier because it defines which product the sales correspond to.
    Columns to unpivotAll of the quarterly sales columns will be unpivoted.
    Variable column nameThe variable column is named Quarter because it identifies the sales period.
    Value column nameThe value column is named UnitsSold because it contains information about number of units sold.
    - - +

    Unpivot

    Spark Gem

    Use the Unpivot Gem to transform your data from a wide format to a long format.

    note

    If you want to pivot the data, rather than unpivot, use the Aggregate Gem.

    Parameters

    ParameterDescription
    Column(s) to use as identifiersThe column(s) that will identify to which group or entity the observation corresponds to.
    Columns to unpivotThe columns (wide format) that you would like to transform into a single column (long format).
    Variable column nameThe name of the column that contains the names of the unpivoted columns. This helps describe the values in the value column.
    Value column nameThe name of the column that will contain the values from the unpivoted columns.

    Example

    Transforming your data into a long format can be beneficial when creating visualizations, comparing variables, handling dynamic data, and more.

    Let's think about a time series example. If you have product sales data in a wide format, you may want to transform it into a long format before modeling the time series and analyzing the seasonal patterns in sales.

    The image below shows sample input and output tables for this scenario.

    Wide and long formats of time series data

    This table describes how this transformation was achieved:

    ParameterInput
    Column(s) to use as identifiersThe Product column is the identifier because it defines which product the sales correspond to.
    Columns to unpivotAll of the quarterly sales columns will be unpivoted.
    Variable column nameThe variable column is named Quarter because it identifies the sales period.
    Value column nameThe value column is named UnitsSold because it contains information about number of units sold.
    + + \ No newline at end of file diff --git a/Spark/gems/transform/window-function/index.html b/Spark/gems/transform/window-function/index.html index 80602e103a..0c55711aa9 100644 --- a/Spark/gems/transform/window-function/index.html +++ b/Spark/gems/transform/window-function/index.html @@ -6,16 +6,15 @@ WindowFunction | Prophecy - - - - + + +
    -

    WindowFunction

    Spark Gem

    The WindowFunction lets you define a WindowSpec and apply window functions on a DataFrame.

    Parameters

    ParameterDescriptionRequired
    DataFrameInput DataFrameTrue
    Target columnOutput Column nameTrue
    Source expressionWindow function expression to perform over the created WindowTrue
    Order columnsColumns to order by in Window. Must be a numeric type column if a Range Frame is selectedRequired when Source expression has a Ranking/Analytical function OR when Range Frame is selected
    Partition columnColumn to partition by in WindowFalse
    Row frameRow based frame boundary to apply on WindowFalse
    Range frameRange based frame boundary to apply on WindowFalse
    info

    When Order Columns are not defined, an unbounded window frame (rowFrame, unboundedPreceding, unboundedFollowing) is used by default.

    info

    When Order Columns are defined, a growing window frame (rangeFrame, unboundedPreceding, currentRow) is used by default.

    Examples


    Ranking Functions with Window

    Examples of ranking functions are: row_number(), rank(), dense_rank() and ntile()

    info

    Only the default window frame (rowFrame, unboundedPreceding, currentRow) can be used with Ranking functions

    Example usage of Window - Ranking

    def rank_cust_orders(spark: SparkSession, in0: DataFrame) -> DataFrame:
    return in0\
    .withColumn(
    "order_number",
    row_number().over(
    Window.partitionBy(col("customer_id")).orderBy(col("order_date").asc())
    )
    )\
    .withColumn(
    "order_recency",
    ntile(2).over(
    Window.partitionBy(col("customer_id")).orderBy(col("order_date").asc())
    )
    )

    Analytical Functions with Window

    Examples of analytical functions are: lead(), lag(), cume_dist(), etc.

    info

    Window frame for lead() and lag() can not be specified.

    info

    Only the default window frame (rangeFrame, unboundedPreceding, currentRow) can be used with cume_dist()

    Example usage of Window - Analytical

    def analyse_orders(spark: SparkSession, in0: DataFrame) -> DataFrame:
    return in0\
    .withColumn(
    "previous_order_date",
    lag(col("order_date")).over(
    Window.partitionBy(col("customer_id")).orderBy(col("order_id").asc())
    )
    )\
    .withColumn(
    "next_order_date",
    lead(col("order_date")).over(
    Window.partitionBy(col("customer_id")).orderBy(col("order_id").asc())
    )
    )

    Aggregate Functions with Window

    Examples of analytical functions are: min(), max(), avg(), etc. +

    WindowFunction

    Spark Gem

    The WindowFunction lets you define a WindowSpec and apply window functions on a DataFrame.

    Parameters

    ParameterDescriptionRequired
    DataFrameInput DataFrameTrue
    Target columnOutput Column nameTrue
    Source expressionWindow function expression to perform over the created WindowTrue
    Order columnsColumns to order by in Window. Must be a numeric type column if a Range Frame is selectedRequired when Source expression has a Ranking/Analytical function OR when Range Frame is selected
    Partition columnColumn to partition by in WindowFalse
    Row frameRow based frame boundary to apply on WindowFalse
    Range frameRange based frame boundary to apply on WindowFalse
    info

    When Order Columns are not defined, an unbounded window frame (rowFrame, unboundedPreceding, unboundedFollowing) is used by default.

    info

    When Order Columns are defined, a growing window frame (rangeFrame, unboundedPreceding, currentRow) is used by default.

    Examples


    Ranking Functions with Window

    Examples of ranking functions are: row_number(), rank(), dense_rank() and ntile()

    info

    Only the default window frame (rowFrame, unboundedPreceding, currentRow) can be used with Ranking functions

    Example usage of Window - Ranking

    def rank_cust_orders(spark: SparkSession, in0: DataFrame) -> DataFrame:
    return in0\
    .withColumn(
    "order_number",
    row_number().over(
    Window.partitionBy(col("customer_id")).orderBy(col("order_date").asc())
    )
    )\
    .withColumn(
    "order_recency",
    ntile(2).over(
    Window.partitionBy(col("customer_id")).orderBy(col("order_date").asc())
    )
    )

    Analytical Functions with Window

    Examples of analytical functions are: lead(), lag(), cume_dist(), etc.

    info

    Window frame for lead() and lag() can not be specified.

    info

    Only the default window frame (rangeFrame, unboundedPreceding, currentRow) can be used with cume_dist()

    Example usage of Window - Analytical

    def analyse_orders(spark: SparkSession, in0: DataFrame) -> DataFrame:
    return in0\
    .withColumn(
    "previous_order_date",
    lag(col("order_date")).over(
    Window.partitionBy(col("customer_id")).orderBy(col("order_id").asc())
    )
    )\
    .withColumn(
    "next_order_date",
    lead(col("order_date")).over(
    Window.partitionBy(col("customer_id")).orderBy(col("order_id").asc())
    )
    )

    Aggregate Functions with Window

    Examples of analytical functions are: min(), max(), avg(), etc. Example usage of Window - Aggregate

    def agg_orders(spark: SparkSession, in0: DataFrame) -> DataFrame:
    return in0\
    .withColumn(
    "running_avg_spend",
    avg(col("amount"))\
    .over(Window.partitionBy(col("customer_id"))\
    .rowsBetween(Window.unboundedPreceding, Window.currentRow))
    )\
    .withColumn("running_max_spend", max(col("amount"))\
    .over(Window.partitionBy(col("customer_id"))\
    .rowsBetween(Window.unboundedPreceding, Window.currentRow)))
    - - + + \ No newline at end of file diff --git a/Spark/index.html b/Spark/index.html index 95cc19dfb9..855ba7f6c9 100644 --- a/Spark/index.html +++ b/Spark/index.html @@ -6,15 +6,14 @@ Copilot for Spark users | Prophecy - - - - + + + - - + + \ No newline at end of file diff --git a/Spark/pipeline-monitoring/enable-pipeline-monitoring/index.html b/Spark/pipeline-monitoring/enable-pipeline-monitoring/index.html index c33876ce93..129ca62600 100644 --- a/Spark/pipeline-monitoring/enable-pipeline-monitoring/index.html +++ b/Spark/pipeline-monitoring/enable-pipeline-monitoring/index.html @@ -6,15 +6,14 @@ Enable Pipeline Monitoring | Prophecy - - - - + + +

    Enable Pipeline Monitoring

    To use Pipeline Monitoring, you must enable it by updating the Prophecy Library version in your Project dependencies, and turn on the Pipeline Monitoring flag in Pipeline Settings.

    Update Prophecy Library version

    Pipeline Monitoring requires your Project to be on the follow versions:

    • Prophecy Scala libs version 8.0.23 or above.
    • Prophecy Python libs version 1.9.9 or above.

    You can check your ProphecyLibsPython version under Dependencies.

    • If your Project's Prophecy Scala and Python libs versions are out of date, Update them.

    ProphecyLibsPython

    If you have uncommitted changes in your Pipelines, you may be prompted to either Commit & Save or Save Without Committing. The update will affect all Pipelines in your Project.

    For an up-to-date list of Prophecy versions and libraries, see Version Chart.

    Turn on the Pipeline Monitoring flag

    By default, Pipeline Monitoring is enabled for all new Pipelines. For existing Pipelines, the feature is turned off by default in order to prevent unexpected changes in your generated Pipeline code.

    • To turn on Pipeline Monitoring, you must toggle on Enable Pipeline monitoring in Pipeline Settings.

    Turn on Pipeline Monitoring

    You can check the code view to see added instrument annotations that adds all of the details for monitoring your Pipeline.

    - - + + \ No newline at end of file diff --git a/Spark/pipeline-monitoring/index.html b/Spark/pipeline-monitoring/index.html index b9d879c7dd..44cb87852a 100644 --- a/Spark/pipeline-monitoring/index.html +++ b/Spark/pipeline-monitoring/index.html @@ -6,15 +6,14 @@ Pipeline Monitoring | Prophecy - - - - + + +

    Pipeline Monitoring

    Pipeline Monitoring enables you as a Spark Copilot user to monitor and debug your scheduled and interactive Pipeline runs through Prophecy. The primary goal of Pipeline, Model, and Job observability is to assist you in quickly pinpointing errors, minimize the cost of fixes, and reduce downtime.

    When enabled, Prophecy highlights the Gem that causes the failure at runtime. You can view log stack traces in the Prophecy UI in order to to debug runtime issues. Pipeline Monitoring is available for any scheduled Job runs and all interactive runs through the Prophecy UI.

    Pipeline Monitoring features

    You can seamlessly address data health issues and monitor scheduled or ad-hoc runs without the need to switch to Databricks or Snowflake by using the following features:

    • Detect and monitor: Identify errors at runtime, and monitor scheduled production runs.
    • Alert: Get prompt alerts in case of failures according to severity.
    • Troubleshoot and fix with recommended solutions: Identify the cause of failures, fix them AI recommended solutions, and rerun failed or skipped tasks. Prophecy's Pipeline Monitoring encompasses all functionalities equivalent to those found in Databricks Workflows and Airflow Jobs.

    Possible Pipeline errors and failures

    During runtime, a Pipeline can fail due to different kinds of errors or failures such as the following:

    • Failure before plan execution started by Spark
    • Failure when the Gem has diagnostics or compilation issues because of a change in some common component
    • Runtime error due to unexpected data, such as data type mismatch
    • Error during write, such as write mode error or target data type mismatch
    • Driver/Executor errors like exceeding memory limits (OOMs)

    What's next

    To enable and use Pipeline Monitoring, see the following pages:

    For information on the stored execution metrics, see Execution Metrics.

    - - + + \ No newline at end of file diff --git a/Spark/pipeline-monitoring/use-pipeline-monitoring/index.html b/Spark/pipeline-monitoring/use-pipeline-monitoring/index.html index 6cf77dd316..44af42afe5 100644 --- a/Spark/pipeline-monitoring/use-pipeline-monitoring/index.html +++ b/Spark/pipeline-monitoring/use-pipeline-monitoring/index.html @@ -6,15 +6,14 @@ Use Pipeline Monitoring | Prophecy - - - - + + +

    Use Pipeline Monitoring

    You can use Pipeline Monitoring to help identify errors and failures at runtime. You can identify the cause of the failures, fix them with recommended solutions, and rerun failed or skipped tasks.

    Detect and monitor failures

    After your scheduled or ad-hoc run, you can view detected failures on your Pipeline. Gems with an error or warning are highlighted on the Visual canvas.

    • Hover over the Gem to see failure details, such as run duration and the exception.

    Gem failure-details

    • You can click Open Logs to open the Runtime Logs of that Gem.

    From the Runtime Logs panel, you can see other logs of the current run, including those that have succeeded. You can filter or sort the logs, and adjust the view.

    • Click More details to see the entire exception details.

    View runtime logs

    Troubleshoot and fix problems

    From the Runtime Logs panel, you can swap the tabs by clicking Problems. The Problems tab shows you a compilation of Warnings and Errors, where they can be found, and brief descriptions of each.

    • Click the name to view the problem. A side panel will open, taking you to the error location.
    • Or you can click Fix with AI to use a recommended solution made by Spark Copilot. And then try rerunning your task.

    View and fix problems with AI

    View historical runs

    You can see a complete history with logs and other debugging details for all of your scheduled and ad-hoc runs.

    • To view historical runs, select Run History.

    This will take you to the Historical View of your Pipeline in your particular Fabric.

    View run history

    note

    It may take a moment to load the Historical View since Prophecy is using your cluster to fetch the historical run information from your table.

    • You can view different versions of your Pipeline from the dropdown, or go back to view the Run Details.

    Successful runs are labeled with a ✔️, while failed runs are labeled with a ⚠️.

    • You can see the number of rows read and written at the top right.

    Historical monitoring options and details

    - - + + \ No newline at end of file diff --git a/Spark/secret-management/databricks-secrets/index.html b/Spark/secret-management/databricks-secrets/index.html index 006b74094d..0a5eabc7cf 100644 --- a/Spark/secret-management/databricks-secrets/index.html +++ b/Spark/secret-management/databricks-secrets/index.html @@ -6,17 +6,16 @@ Databricks Secrets | Prophecy - - - - + + +

    Databricks Secrets

    For users leveraging Databricks for Spark Execution, Databricks Secrets stand out as the most popular Secret Provider within Prophecy.

    Manage Databricks Secret Provider

    By default, a Databricks secret provider is added to all Databricks Fabrics. Users can choose to delete it if they prefer to restrict using Databricks secrets and opt for another Secret Provider, such as Hashicorp Vault.

    To add a Databricks Secret Provider, click on the (1) Add Provider button, opening the Secret Provider form.

    Add_Databricks_secret_provider

    Provide a (2) Name, select (3) Provider type as Databricks, and add a (4) Description of your choice. A Databricks Secret Provider can only be created in a Databricks Fabric, and you do not need to provide any other Authentication details here. Once done, click (5) Save.

    Manage Secrets and Scopes

    After adding your Secret provider, click on the (1) Refresh button to fetch secrets already created in Databricks. Ensure you have attached a Fabric from the top right corner for this.

    To create a new Secret, click on the (2) Add Secret button, opening the Add secret form.

    Add_databricks_secrets

    Provide a (3) Secret Name, and select a (4) Secret Scope from the dropdown. Note that if the scope doesn't exist, it will be auto-created. Add a (5) Description, and the (6) value for your secret. Once done, click (7) Save. This will create the Secret with the given key and value in the mentioned scope.

    You can also Edit/Delete an existing secret from the button next to the Secret in the table.

    info

    Users can only access the secrets, which they have permissions to according to the token provided in the Fabric. Every User provides their own PAT in the Databricks Fabric.

    Managing secrets for Prophecy Managed Databricks

    For POC and trial users exploring the product with Prophecy Managed Fabrics, we support creating and managing Databricks Secrets in our Databricks Workspace. Each Prophecy Managed Trial Fabric has a pre-created scope of its own. You can use/add/manage secrets in this pre-created Scope only.

    info

    Secrets created in one Prophecy Managed Databricks Fabric can't be accessed from another. They're also deleted when the trial expires. Your POC secrets are safe, but it's best not to use production systems for POC while using Prophecy Managed Databricks Fabrics.

    To create a new Secret, click on the Add Secret button. In the form, you will notice a Secret-scope already selected. Go ahead and add your Secret name and value.

    Once you have created a Secret Provider and Secrets, you can start to use them in your Pipelines.

    - - + + \ No newline at end of file diff --git a/Spark/secret-management/env-variable/index.html b/Spark/secret-management/env-variable/index.html index 1e53411a8e..a0217fc4cc 100644 --- a/Spark/secret-management/env-variable/index.html +++ b/Spark/secret-management/env-variable/index.html @@ -6,10 +6,9 @@ Environment Variables | Prophecy - - - - + + +
    @@ -18,7 +17,7 @@ There is no authentication required for Environment Variables secret provider. Once done, click (5) Save.

    Managing Environment Variables Secrets

    You can now add in Prophecy, what all environment variables are present in your Spark Cluster/Fabric. We dont currently support fetching all environment variables which are already set in the cluster. To create a new Secret, click on the (1) Add Secret button, opening the Add secret form.

    Provide a (2) Secret Name, and add a (3) Description. Once done, click (4) Save. This will store the mapping of secret with the given key in Prophecy.

    info

    Please be aware that these environment variables cannot be created, edited, or deleted through Prophecy in your Spark clusters. When you create a secret in Prophecy, you're essentially setting up a mapping for environment variables. Ensure that the same environment variable exists on your Spark cluster with the correct value.

    You can delete an existing env variable from the button next to the Secret in the table.

    Using Environment Variables Secrets in Pipelines

    Once you have added an Environment Variables Secret Provider and stored secrets, you can effortlessly use them in your Pipelines.

    - - + + \ No newline at end of file diff --git a/Spark/secret-management/hashicorp-vault/index.html b/Spark/secret-management/hashicorp-vault/index.html index 446dfa6a87..7992e43c4a 100644 --- a/Spark/secret-management/hashicorp-vault/index.html +++ b/Spark/secret-management/hashicorp-vault/index.html @@ -6,10 +6,9 @@ HashiCorp Vault | Prophecy - - - - + + +
    @@ -17,7 +16,7 @@ For authentication, Prophecy currently supports fixed Environment variables with names (5) VAULT_ADDR and (6) VAULT_TOKEN. Ensure your Spark cluster has these environment variables set correctly, pointing to the respective Vault URL and token. These Environment variables must be present on the Spark cluster, not on the Prophecy side. Attach the cluster in order to verify the configuration by clicking the (7) Test connection button.

    Once done, click (8) Save.

    Managing Vault Secrets

    After adding your Vault provider, click on the (1) Refresh button to fetch secrets already stored in HashiCorp Vault. Make sure you select the Secret provider created above in the Secret Provider dropdown while listing the secrets.

    info

    You can only fetch the secrets accessible to the Token provided in your Spark cluster

    To create a new Secret, click on the (2) Add Secret button, opening the Add secret form.

    Add_hashicorp_vault_secrets

    Provide a (3) Secret Name, and select a (4) Secret Scope from the dropdown.Scope in Hashicorp vault is nothing but the path of that secret key. Note that if the scope/path doesn't exist, it will be auto-created.
    Add a (5) Description, and the (6) value for your secret. Once done, click (7) Save. This will create the Secret with the given key and value, at the specified path.

    You can also Edit/Delete an existing secret from the button next to the Secret in the table.

    Using HashiCorp Vault Secrets in Pipelines

    Once you have added a HashiCorp Vault Secret Provider and stored secrets, you can seamlessly use them in your Pipelines.

    - - + + \ No newline at end of file diff --git a/Spark/secret-management/index.html b/Spark/secret-management/index.html index b3626f2c68..95b7869f32 100644 --- a/Spark/secret-management/index.html +++ b/Spark/secret-management/index.html @@ -6,17 +6,16 @@ Secret Management For Spark Fabrics | Prophecy - - - - + + +

    Secret Management For Spark Fabrics

    Prophecy seamlessly integrates with various data tools like data ingestion from sources like Salesforce, data enrichment through Rest APIs for data masking, and data egress to platforms like Snowflake.

    To ensure robust security for enterprise customers, Prophecy provides secure authentication for all data tool connections. Users can securely store credentials using centralized secret providers like Databricks Secrets or HashiCorp Vault, following best practices such as encryption.

    Users can connect these centralized secret providers to their Spark Fabrics and access the secrets via Prophecy.

    info

    Prophecy never accesses the value for any secrets, only scopes and keys to generate and execute correct code, keeping your secrets safe and out of your Pipeline code committed to Git.

    Secret Provider

    Users can create a Secret Provider in their Spark Fabrics. The secrets are accessed when a Pipeline is run on the cluster. Users must ensure that their Spark clusters have proper access to the Secret Providers they are using.

    You can access the Secret Providers and Secrets connected to the Fabric from the (1)Secrets tab in your Fabric. This page shows the list of all Secret Providers and the Secrets for each provider. You can directly (2)Refresh Secrets to fetch all accessible secrets or (3)Add any secret from here. There are three types of Secret Providers supported:

    secret_screen

    Secret ProviderDetails
    Databricks SecretsAvailable for Databricks Fabrics Only
    HashiCorp VaultAvailable for all Spark Fabrics
    Environment VariablesAvailable for all Spark Fabrics

    Once you have secrets created in Fabrics, you can Use a secret in your Source and Target gems in your Pipelines directly.

    Using Secrets in Prophecy Managed Spark Fabrics

    For POC and trial users exploring the product with Prophecy Managed Fabrics, Databricks Secrets are supported. Users can create secrets in Prophecy Managed Databricks Fabrics and use them for POC and trial purposes.

    Any secrets created in Prophecy Managed Databricks Workspace will be automatically cleaned up after the POC expires.

    caution

    Prophecy ensures a separate scope for each Prophecy Managed Fabric, preventing access to your secrets by others during POC. However, it is not recommended to use your Production data tools for trials/POC and connect them to Prophecy Managed Fabric.

    Read here for managing secrets in Prophecy Managed Databricks Fabric.

    What's next

    To learn more about secret management for Spark Fabrics, see the following pages:

    - - + + \ No newline at end of file diff --git a/Spark/secret-management/using-secrets/index.html b/Spark/secret-management/using-secrets/index.html index f92f762314..b344d6824f 100644 --- a/Spark/secret-management/using-secrets/index.html +++ b/Spark/secret-management/using-secrets/index.html @@ -6,10 +6,9 @@ Using Secrets in Pipelines | Prophecy - - - - + + +
    @@ -17,7 +16,7 @@ Any Gem which requires a Authentication Field like Username or password,you will have an option to Insert Secret as shown below.

    use_secret

    Click on (1) Insert Secret, this will open the dropdown for all secrets and secrets providers available in your Fabric. If you don't see your secrets, confirm you have correct Fabric selected in the top right corner. Attach a Cluster from top right corner to be able to (2) Refresh Secrets for any Provider.

    - - + + \ No newline at end of file diff --git a/Spark/spark-streaming/index.html b/Spark/spark-streaming/index.html index f787064a13..31e2bdc0df 100644 --- a/Spark/spark-streaming/index.html +++ b/Spark/spark-streaming/index.html @@ -6,16 +6,15 @@ Spark Structured Streaming | Prophecy - - - - + + +

    Spark Structured Streaming

    Prophecy 2.7 introduces native support for streaming data running on Spark Structured Streaming. The streaming capability is available for Python projects. Support for Scala will be added in the future.

    Streaming pipelines work differently from batch pipelines:

    1. Streaming applications are always running, continuously processing incoming data.
    2. Data is processed in micro-batches, with the notable exception of Continuous Triggers (an experimental feature available in Spark3.3). Continuous triggers are not supported by Prophecy.
    3. Streaming applications handle transient data rather than maintain the entire data. Aggregations and joins require watermarking for maintaining a limited state.
    4. All Streaming Datasets can behave similarly to Batch datasets using the Spark ForEachBatch. More on ForEachBatch here Note that forEachBatch is not supported by Prophecy.

    This documentation assumes you are already familiar with how Structured Streaming works. For more information, you can consult the Structured Streaming documentation here.

    Spark Structured Streaming using Prophecy IDE

    How to Create a Streaming Pipeline Within a Prophecy Python Project, a user can create a Structured Streaming Pipeline using the Streaming(beta) mode.

    Working with a Streaming Pipeline

    To create a Streaming Pipeline, users can follow a process similar to creating a Batch Pipeline in a Python project. For more on Pipeline creation and understanding Prophecy pipelines, please check this link. Streaming Pipelines work differently from Batch Pipelines in the following ways:

    1. Partial runs are not supported for streaming applications. A partial run is only allowed on a Streaming Target Gem.
    2. Streaming pipelines are long-running tasks and process data at intervals. Currently, they do not capture cumulative statistics.
    3. Streaming Pipelines are continuous and do not stop running. To terminate a Streaming Pipeline, users need to click the "X" button. A Streaming Pipeline is an ongoing process and will not terminate itself.
    4. To deploy the Pipeline on Databricks, users can follow the same process described here. A scheduled Job will check if the Streaming Pipeline is running every X minutes. If the Pipeline is not running, the Job will attempt to start it.

    Streaming Sources and Targets

    Spark Structured Streaming applications have a variety of source and target components available to construct Piplines.

    Streaming source gems render to spark.readStream() on the Spark side. Currently, we support file stream-based sources and targets, warehouse-based targets, and event stream-based sources and targets. For more information on Streaming Source and Target Gems, click here.

    Additionally, any batch data sources can be used in a streaming application. Batch data sources are read using the spark.read() function at every processing trigger (due to Spark evaluating lazily). More on triggers here. For more information on Batch Source and Target Gems, click here.

    Streaming Transformations

    For more information on Streaming Transformations, click here.

    - - + + \ No newline at end of file diff --git a/Spark/spark-streaming/streaming-sources-and-targets/index.html b/Spark/spark-streaming/streaming-sources-and-targets/index.html index f4707af16f..e06f7052ed 100644 --- a/Spark/spark-streaming/streaming-sources-and-targets/index.html +++ b/Spark/spark-streaming/streaming-sources-and-targets/index.html @@ -6,15 +6,14 @@ Streaming Sources and Targets | Prophecy - - - - + + + - - + + \ No newline at end of file diff --git a/Spark/spark-streaming/streaming-sources-and-targets/streaming-event-apps/index.html b/Spark/spark-streaming/streaming-sources-and-targets/streaming-event-apps/index.html index e0483cd062..a2ed09deb5 100644 --- a/Spark/spark-streaming/streaming-sources-and-targets/streaming-event-apps/index.html +++ b/Spark/spark-streaming/streaming-sources-and-targets/streaming-event-apps/index.html @@ -6,15 +6,14 @@ Event-based | Prophecy - - - - + + +

    Event-based

    Event-based Sources and Targets

    Prophecy supports Kafka Streaming Source and Target. More information on supported Kafka Source and Target options are available here.

    The Kafka Gem allows inferring the schema of the events by automatically populating the value column. Schema inference works with both JSON and AVRO file formats. A user is required to provide an example event for schema inference.

    Create a Kafka Source Gem

    A Kafka Source Gem allows the Streaming Pipeline continuously pull data from a Kafka topic. The following options are supported:

    PropertyOptionalDefault ValueComment
    Broker ListFalseN/AList of Kafka brokers separated by commas. For eg. kdj-ibg1.us-east-2.aws.cloud:9092, kdj-ibg2.us-east-2.aws.cloud:9092,kdj-ibg3.us-east-2.aws.cloud:9092
    Group IDTrueNoneConsumer group ID.
    Session TimeoutFalse6000Corresponds to the session.timeout.ms field
    Security ProtocolFalseSASL_SSLSupported values are SASL_SSL, PLAINTEXT, SSL, SSL_PLAINTEXT
    SASL MechanismsFalseSCRAM-SHA-256SASL mechanism to handle username/password authentication. Supported values are PLAIN, SCRAM-SHA-256 and SCRAM-SHA-512, GSSAPI, OAUTHBEARER
    Kafka TopicFalseN/AName of Kafka Topic to Consume

    Entering Authentication Credentials

    • Databricks Secrets (recommended): Use Databricks to manage your credentials
    • UserName, Password: Use ONLY for test deployments and during development. This writes credentials to Git repository, which isn't good practice.
    - - + + \ No newline at end of file diff --git a/Spark/spark-streaming/streaming-sources-and-targets/streaming-file-apps/index.html b/Spark/spark-streaming/streaming-sources-and-targets/streaming-file-apps/index.html index d557d7b2d4..bad8b25c68 100644 --- a/Spark/spark-streaming/streaming-sources-and-targets/streaming-file-apps/index.html +++ b/Spark/spark-streaming/streaming-sources-and-targets/streaming-file-apps/index.html @@ -6,10 +6,9 @@ File-based | Prophecy - - - - + + +
    @@ -17,7 +16,7 @@ File Streaming

    Databricks Auto Loader

    Databricks Fabrics can utilize Auto Loader.

    Auto Loader supports loading data directory listing as well as using AWS's Simple Queue Service (SQS) file notifications. More on this here. Stream sources using Auto Loader allow configurable properties that can be configured using the Field Picker on the Gem: Autoloader Directory Listing Mode Autoloader Filer Notifiction Mode

    Formats Supported

    The following file formats are supported. The Gem properties are accessible under the Properties Tab by clicking on + :

    1. JSON: Native Connector Docs for Source here. Additional Autoloader Options here.
    2. CSV: Native Connector Docs for Source here. Additional Autoloader Options here.
    3. Parquet: Native Connector Docs for Source here. Additional Autoloader Options here.
    4. ORC: Native Connector Docs for Source here. Additional Autoloader Options here.
    5. Delta: A quickstart on Delta Lake Stream Reading and Writing is available here. Connector Docs are available here. Note, that this would require installing the Spark Delta Lake Connector if the user has an on prem deployment. We have additionally provided support for Merge in the Delta Lake Write Connector. (uses forEatchBatch behind the scenes).

    File-based Streaming Tutorial

    - - + + \ No newline at end of file diff --git a/Spark/spark-streaming/streaming-sources-and-targets/streaming-warehouse-apps/index.html b/Spark/spark-streaming/streaming-sources-and-targets/streaming-warehouse-apps/index.html index 7c3bba9a24..96e9625ddc 100644 --- a/Spark/spark-streaming/streaming-sources-and-targets/streaming-warehouse-apps/index.html +++ b/Spark/spark-streaming/streaming-sources-and-targets/streaming-warehouse-apps/index.html @@ -6,15 +6,14 @@ Warehouse-based | Prophecy - - - - + + +
    Skip to main content
    - - + + \ No newline at end of file diff --git a/Spark/spark-streaming/transformations-streaming/index.html b/Spark/spark-streaming/transformations-streaming/index.html index 89039d47be..da5a6ef179 100644 --- a/Spark/spark-streaming/transformations-streaming/index.html +++ b/Spark/spark-streaming/transformations-streaming/index.html @@ -6,10 +6,9 @@ Streaming Transformations | Prophecy - - - - + + +
    @@ -17,7 +16,7 @@ Although Window would work with a watermarked column (explained below) as part of the partitioning, it is advised to use window() or session_window() from the pyspark.sql.functions package (link).

    Watermarking

    Watermarking is a technique that enables aggregations on streaming data by limiting the state over which the aggregation is performed. In order to prevent out-of-memory errors, we have introduced support for watermarking. More information on watermarking is available in the Spark documentation here

    We have added a Watermarking Gem in the Transform Section that allows a user to add a Watermark to a DataFrame. Example usage of Watermark - Watermark Table

    In this example, we add Watermarking to the timestamp column. A user may enter the column name or select one from the Schema Table on the left. The text box is editable. Finally, define the Watermark Duration. It is recommended to use Watermarking on a Streaming DataFrame in case you're planning to use any of the following operations on it:

    - - + + \ No newline at end of file diff --git a/Spark/tests/index.html b/Spark/tests/index.html index 4841f6a82a..2821cf17f6 100644 --- a/Spark/tests/index.html +++ b/Spark/tests/index.html @@ -6,16 +6,15 @@ Unit Testing | Prophecy - - - - + + +
    Skip to main content

    Unit Testing

    Writing good unit tests is one of the key stages of the CI/CD process. It ensures that the changes made by developers to projects will be verified and all the functionality will work correctly after deployment.

    Prophecy makes the process of writing unit cases easier by giving an interactive environment via which unit test cases can be configured across each component.

    There are two types of unit test cases which can be configured through Prophecy UI:

    1. Output rows equality
    2. Output predicates

    Let us understand both types in detail:

    Output rows equality

    Automatically takes a snapshot of the data for the component and allows to continuously test that the logic performs as intended. This would simply check the equality of the output rows.

    Example

    In the below example we would create below unit tests:

    1. To check the join condition correctly for one-to-one mappings.
    2. To check the join condition correctly for one-to-many mappings.

    Output predicates

    These are more advanced unit tests where multiple rules need to pass in order for the test as a whole to pass. Requires Spark expression to be used as predicates.

    Example

    In the below example we will create below unit tests:

    1. Check that the value of amount column is >0.
    2. Check whether first name is not equal to last name.

    Generating sample data for test cases automatically

    To generate sample input data automatically from the source DataFrame, this option can be enabled while creating unit test.

    note

    Pipeline needs to run once, to generate units test based on auto-generated sample data.

    Let's generate sample data automatically for the unit test case we created in above example.

    Generated code

    Behind the scenes, the code for unit tests is automatically generated in our repository. Let's have a look at the generated code for our unit test above.

    Renaming the name of unit test

    - - + + \ No newline at end of file diff --git a/architecture/deployment/index.html b/architecture/deployment/index.html index 244d008dcc..225f409e43 100644 --- a/architecture/deployment/index.html +++ b/architecture/deployment/index.html @@ -6,16 +6,15 @@ Prophecy deployment | Prophecy - - - - + + +
    Skip to main content

    Prophecy deployment

    Prophecy deployment is simple and flexible. Prophecy is written as a set of microservices that run on Kubernetes and is built to be multi-tenant. There are three primary options within cloud deployment.

    Cloud Deployment

    Prophecy in the cloud connects to your existing Spark and Scheduler/Orchestrator. Prophecy does not store any data, however, it does store metadata about your Pipelines, Datasets and schedules.

    General Architecture

    Public SaaS

    Public SaaS (Prophecy managed SaaS) is the default option when you connect from Databricks Partner Connect and is free for one user. This option is heavily used by customers to try Prophecy. Our startup and midsize customers who like the convenience of a managed service prefer this option. You can also use this by directly going to the Prophecy Application.

    VPC Architecture

    Private SaaS (Customer VPC)

    Customers in segments that deal with very sensitive data primarily use this option. Here, Prophecy runs within the Customer VPC and connects to the identity, Spark clusters and the scheduler within the VPC. For more information read the private SAAS installation documentation or reach out to our team by using request a demo.

    Customer VPC Architecture

    Ready to setup Prophecy to run in your VPC? Login into your preferred cloud marketplace: AWS, Azure, or GCP, and search for "Prophecy." The installation billing starts after 30 days.

    Marketplaces

    On-Premise Deployment

    On rare occasions, Prophecy will deploy on-premise for large customers who are moving to the cloud. Often the order is that the organizations will move Pipelines from on-premise legacy ETL tools to Spark, then move it to Spark on the cloud.

    What's next

    To continue with your Prophecy deployment, see the following pages:

    - - + + \ No newline at end of file diff --git a/architecture/index.html b/architecture/index.html index 6f2f37b95e..a3f7625db1 100644 --- a/architecture/index.html +++ b/architecture/index.html @@ -6,16 +6,15 @@ Architecture | Prophecy - - - - + + +
    Skip to main content

    Architecture

    Prophecy deployment is simple and flexible. Prophecy is written as a set of Microservices that run on Kubernetes and is built to be multi-tenant.

    Deployment ModelCustomers Who Prefer it
    Prophecy Managed SaaSMidsize Companies and Startups
    Private SaaS (Customer VPC)Enterprises in the Cloud
    On-PremiseLarge Enterprises in the middle of cloud migration (rare cases)

    High-Level Architecture

    There are four components of a successful Prophecy deployment:

    ComponentDescription
    Prophecy IDEThe development environment, including Prophecy microservices and cloud infrastructure, that is deployed.
    Data engineThe SQL or Spark execution environment, like Snowflake or Databricks. This is set up by a customer and connected to Prophecy through a secure and performant interface. No customer data is stored on Prophecy’s environment.
    Source controlProphecy works similar to code-first IDEs by natively integrating with Git and platforms like Bitbucket. An encrypted copy of customer’s code is stored within Prophecy’s environment for fast access, while the source-of-truth code is stored on Git.
    Identity management (optional)For simple user authentication and permission control, Prophecy can connect your identity provider of choice.

    Prophecy IDE

    A user who logs into Prophecy has access to the integrated development environment (IDE). This includes everything needed to enable all data users to transform raw data into reliable, analytics-ready data using visual data pipelines.

    Prophecy IDE

    Teams are the primary mechanism of ownership. Teams own Projects where Pipelines, Datasets, and Jobs live. Teams also own execution fabrics that provide the execution and storage resources for execution including on SQL Warehouses and Spark clusters.

    SQL

    To allow for SQL query execution Prophecy can connect to Snowflake and Databricks warehouses. Connectors for additional SQL warehouses will be announced as they are added.

    Snowflake

    To connect with data stored in a SQL Warehouse, or to allow for interactive SQL execution, Prophecy can connect to an existing Snowflake execution environment through secure and performant Snowpark or Snowflake APIs.

    Each Fabric defined in Prophecy connects to a single Snowflake Warehouse and each user is required to provide credentials to authenticate to it.

    Arch_Diagram

    Notice the data provider (e.g. Snowflake) matches up to a Fabric. For another scenario, consider the same architecture diagram where the Fabric connects to a Databricks SQL warehouse instead of Snowflake.

    Spark

    To allow for interactive code execution Prophecy can connect to either Databricks or any other Spark through Apache Livy (e.g. MapR, CDP, HDP, Spark on Kubernetes).

    Databricks

    Prophecy to Databricks Connectivity

    Prophecy connects to Databricks using Rest API. Each Fabric defined in Prophecy connects to a single Databricks workspace and each user is required to provide a personal access token to authenticate to it.

    Security-conscious enterprises that use Databricks with limited network access have to additionally add the Prophecy Data Plane IP address (3.133.35.237) to the Databricks allowed access list.

    Primarily Prophecy uses Databricks for the following functionalities:

    • Interactive Execution - Prophecy allows its users to spin up new clusters or connect to existing clusters. When a cluster connection exists, Prophecy allows the user to run their code in the interactive mode. Interactive code queries are sent to Databricks using the Databricks Command API 1.2.
    • Scheduling - Prophecy allows the user to build and orchestrate Databricks Jobs. This works through the Databricks Jobs API 2.1.

    By default, Prophecy does not store any data samples when executing code using Databricks. Data samples can be optionally stored for observability purposes (execution metrics).

    note

    When using Active Directory, Prophecy takes care of auto-generation and refreshing of the Databricks personal access tokens. Read more about it here.

    Git

    While all code generated by Prophecy is stored in a User’s Git repository, we temporarily store some of the generated code used during Interactive development in an encrypted cache.

    Supported Git providers:

    • Prophecy Managed - Prophecy automatically sets up the connectivity between itself and the repositories. Prophecy Managed is based on open-source GitTea.
    • GitHub (including GitHub Enterprise) - authenticates using per-user personal access tokens. How to generate PAT?
    • Bitbucket (including Bitbucket self-hosted) - authenticates using per-user personal access tokens. How to generate PAT?
    • GitLab (including GitLab self-hosted) - authenticates using per-user personal access tokens. How to generate PAT?
    • Azure DevOps - authenticates using per-user personal access tokens. How to generate PAT?

    Security-conscious enterprises that use Git Providers within private networks behind firewalls have to add the Prophecy Control Plane IP address (3.133.35.237) to the private network allow-list or to the Git provider allow-list.

    Security and Privacy Practices

    The Prophecy team employs top-notch industry practices to safeguard the security of their application and maintain the privacy of customer data. Below are just a few components of our comprehensive security strategy and system structure:

    • General - An annual penetration test is performed to validate Prophecy’s posture and identify vulnerabilities. Our latest penetration test report was issued in November 2022. Prophecy maintains SOC-2 compliance as audited by PrescientAssurance.

    • Public SaaS - Prophecy IDE is hosted on secure servers on AWS. All storage systems are encrypted, and all servers are tightly access controlled and audited. Data is encrypted in transit at all times.

    • Private SaaS - Alternatively, Prophecy’s IDE can be installed within an Enterprise network as desired. Prophecy’s IDE accesses your environment through a single IP address dedicated to you, allowing you to protect access to your data resources at the network level. The credentials are stored per user, and only a fully authenticated user can access their environment.

    • On-Premise - Prophecy complies with your security requirements on-premise; reach out to start the discussion.

    Read more details on Prophecy’s security and compliance posture at our Security Portal here.

    What's next

    To learn more about Prophecy's architecture, see the following pages:

    - - + + \ No newline at end of file diff --git a/architecture/self-hosted/authentication/active_directory/index.html b/architecture/self-hosted/authentication/active_directory/index.html index 8ec64b9ad2..aa77c61330 100644 --- a/architecture/self-hosted/authentication/active_directory/index.html +++ b/architecture/self-hosted/authentication/active_directory/index.html @@ -6,10 +6,9 @@ Active-Directory | Prophecy - - - - + + +
    @@ -24,7 +23,7 @@ Name Attribute: Maps to display name of users.

    Base Distinguished Name: BaseDN to start the search from. Filter: Optional filter to apply when searching the directory. Name Attribute: Maps to display name of users.

    User Matchers

    Add the list contains field pairs that are used to match a user to a group. It adds an additional requirement to the filter that an attribute in the group must match the user's attribute value.

    - - + + \ No newline at end of file diff --git a/architecture/self-hosted/authentication/azure-ad/index.html b/architecture/self-hosted/authentication/azure-ad/index.html index 5ae84240f0..8712332162 100644 --- a/architecture/self-hosted/authentication/azure-ad/index.html +++ b/architecture/self-hosted/authentication/azure-ad/index.html @@ -6,15 +6,14 @@ Azure Active Directory | Prophecy - - - - + + +
    Skip to main content

    Azure Active Directory

    This document describes how to configure Azure Active Directory as the identity provider for Prophecy.

    Register a new Azure App

    • In a new browser tab, log into Azure Portal as an administrator and register a new app "ProphecyAzureADApp"

    • In the home page search bar, search for "App Registrations"

    • Click "New Registration"

    • Give name as "ProphecyAzureADApp"

    • Supported account type as "Accounts in this organizational directory only (xxxxx only - Single tenant)"

    • Redirect URI : Choose "Web" in drop down

    • Redirect URI : https://your-prophecy-ide-url.domain/api/oauth/azureadCallback

    • Click Register

    API Permission

    • Go to "API permissions" on the left-hand side and add these set of API permissionsScreenshot 2022-06-13 at 9 57 16 PM

    Certificates and Secrets

    • Go to "Certificates and Secrets" add a new secret and note down the "value" of this secret.

    Note down Azure AD params for Prophecy IDE configuration

    Client ID

    • Click on "Overview" on the left-hand side and note down the Application(client) ID. This will be used as the client ID in Prophecy IDE

    Client Secret

    • You have already noted down the "value" of the secret you create earlier.

    Configure Prophecy to connect with Azure Active Directory

    • Login to Prophecy IDE using admin user
    • Go to settings and Admin tab and choose "Authentication Provider" as Azure Active Directory and fill up the information you noted down earlier. Save it.
    • Logout and you will be able to see "Login with Azure Active Directory" option. Now users your Azure AD users will be able to login to Prophecy IDE using "Login with Azure Active Directory" option
    - - + + \ No newline at end of file diff --git a/architecture/self-hosted/authentication/azuread-scim/index.html b/architecture/self-hosted/authentication/azuread-scim/index.html index f3ac206cc6..600236cadb 100644 --- a/architecture/self-hosted/authentication/azuread-scim/index.html +++ b/architecture/self-hosted/authentication/azuread-scim/index.html @@ -6,10 +6,9 @@ SAML Using AzureAD with SCIM | Prophecy - - - - + + +
    @@ -22,7 +21,7 @@ Prophecy IDE will not be synced back to AzureAD and will get overwritten whenever any update to user is synced from AzureAD.
  • Any changes to user/groups in AzureAD are not immediately synced from AzureAD to Prophecy and get reflected in next synchronization cycle. If you want the changes to immediately reflect in Prophecy, you need to go to Provisioning section of Prophecy Enterprise App and click Provision on Demand.
  • Updates to primary email is not supported in Prophecy via SCIM.
  • Login via secondary emails registered with AzureAD is not supported in Prophecy.
  • De-provisioning of a user from Azure deletes that user from Prophecy and not deactivates it. As a result, a de-provisioned user will lose their personal projects in Prophecy.
  • Requirements

    To provision users/groups to your Prophecy account using SCIM,

    Enable SCIM Provisioning for Prophecy Enterprise App in AzureAD

    - - + + \ No newline at end of file diff --git a/architecture/self-hosted/authentication/index.html b/architecture/self-hosted/authentication/index.html index 66fa794fc5..aa48aafc30 100644 --- a/architecture/self-hosted/authentication/index.html +++ b/architecture/self-hosted/authentication/index.html @@ -8,10 +8,9 @@ "> - - - - + + +
    @@ -19,7 +18,7 @@ provider. For authorization, when you access the execution infrastructure (that includes Spark, scheduler, storage and other cloud resources), your identity is passed through by Prophecy, ensuring that your existing authorization mechanisms are respected.

    We currently support the following:

    If you require some other authentication mechanism, please reach out to our team.

    What's next

    To learn more about with authentication with Prophecy, see the following pages:

    - - + + \ No newline at end of file diff --git a/architecture/self-hosted/authentication/saml-okta/index.html b/architecture/self-hosted/authentication/saml-okta/index.html index d54a8cf87a..0b8c67cefe 100644 --- a/architecture/self-hosted/authentication/saml-okta/index.html +++ b/architecture/self-hosted/authentication/saml-okta/index.html @@ -6,10 +6,9 @@ SAML Using Okta | Prophecy - - - - + + +
    @@ -23,7 +22,7 @@ to the group

  • Importing user/groups from Okta to Prophecy is supported but not vice-versa i.e. any changes made to a synced user in Prophecy IDE will not be synced back to Okta and will get overwritten whenever any update to user is synced from Okta.

  • Updates to primary email is not supported in Prophecy via SCIM.

  • Login via secondary emails registered with Okta is not supported in Prophecy.

  • De-provisioning of a user from Okta deletes that user from Prophecy and not deactivates it. As a result, a de-provisioned user will lose their personal projects in Prophecy.

  • Requirements

    To provision users/groups to your Prophecy account using SCIM,

    Enable SCIM Provisioning for Prophecy SAML App in Okta

    Choose provisioning options

    1. From the app integration's settings page, choose the Provisioning tab. The SCIM connection settings appear under Settings > Integration.
    2. Click Edit.
    3. Specify the SCIM connector base URL as https://your-prophecy-ide-url.domain/proscim
    4. Specify the field name of the Unique identifier for users as userName.
    5. Under Supported provisioning actions, choose the following provisioning actions:
    1. For Authentication Mode, choose HTTP Header from the dropdown box and in Authorization, provide the SCIM token as generated in Prophecy IDE above.
    2. Click on Test Connector Configuration to check the connectivity to the SCIM server.
    3. If the connection test succeeds, click Save. A new tab will appear on app integration's settings page named Push Groups.

    Scim Provisioning

    User/Group Assignment to Prophecy SAML App in Okta

    1. Go to the Assignment tab of Prophecy SAML App in Okta
    2. To assign to individual people, click Assign -> Assign to People. Search your users and assign them to the Prophecy app.
    3. To assign to groups, click Assign -> Assign to Groups. Search your groups and assign them to the Prophecy app.

    Assign App

    As mentioned earlier, assigning app to Group only creates new users in Prophecy IDE belonging to this group but doesn't create a group in Prophecy. To create a group:

    1. Go to the Push Groups tab of the Prophecy SAML App in Okta
    2. Click Push Groups -> Find groups by name/rule, enter the name/rule.

    Find Groups

    1. Select the checkbox to Push group memberships immediately.
    2. In dropdown of Create/Link Group, select Create Group (leave as is if already selected)
    3. Click Save.

    Push Groups by Name

    - - + + \ No newline at end of file diff --git a/architecture/self-hosted/authentication/security-settings/index.html b/architecture/self-hosted/authentication/security-settings/index.html index 83d20fe93d..60d8316901 100644 --- a/architecture/self-hosted/authentication/security-settings/index.html +++ b/architecture/self-hosted/authentication/security-settings/index.html @@ -6,10 +6,9 @@ Security | Prophecy - - - - + + +
    @@ -18,7 +17,7 @@ Simply click on the Add Keytab button and provide the Livy URL, Kerberos Principal, and Keytab File for the given Livy URL.

    info

    Any changes in the Kerberos Authentication section would require a restart of the execution service for the Prophecy installation.

    keytab

    Proxy-user Settings (Per user)

    If you want to use impersonation-enabled authentication to the Livy server, you can set how to obtain the proxy-user value for each user here. Currently, Prophecy supports two ways to sync this proxy-user value from AAD or LDAP. Note that these values will sync to Prophecy every time the user logs in.

    proxy-user

    - - + + \ No newline at end of file diff --git a/architecture/self-hosted/configurations/configure-alerts/index.html b/architecture/self-hosted/configurations/configure-alerts/index.html index 71ded9b440..2ab3b46407 100644 --- a/architecture/self-hosted/configurations/configure-alerts/index.html +++ b/architecture/self-hosted/configurations/configure-alerts/index.html @@ -6,15 +6,14 @@ Alerts Configuration | Prophecy - - - - + + +
    Skip to main content

    Alerts Configuration

    Prophecy offers a reliable solution for configuring alerts to monitor resource usage in Prophecy-managed microservices. It enables proactive alerting when resource utilization approaches defined thresholds, ensuring timely intervention before limits are reached. Additionally, it supports the suspension of critical services in the event of resource overflows.

    Alerting summary

    tip

    Note this doc is constantly updated with new features/options and hence it is better to always go with the latest version of Prophecy. Alerting is supported from Prophecy version 3.4.1 and above.

    Resource monitoring is enabled by default for cluster administrators, providing insights into CPU, memory, and disk usage for critical services. This feature reports current usage levels alongside defined limits, ensuring admins have a clear view of resource consumption.

    Alerting is also enabled by default for all customers, allowing proactive monitoring of resource usage and facilitating timely resolution as limits approach. However, this feature can be disabled if not necessary.

    Alerting features

    Currently, the following alerts are tracked, with plans to expand this list in the future to include more in-depth application-level monitoring and alerting.

    • CPU_USAGE
    • DISK_USAGE
    • FILE_COUNT
    • MEMORY_USAGE

    Alerts are of two levels:

    • WARNING: Nearing configured limits
    • CRITICAL: At or above configured limits

    And they are generated when they reach CRITICAL for MEMORY_USAGE / CPU_USAGE and WARNING / CRITICAL for DISK_USAGE / FILE_COUNT.

    To prevent data corruption, certain critical services, such as Metagraph and Gitserver, are automatically suspended when disk usage limits are reached. This feature is enabled by default but can be disabled if needed.

    By default, alerts are displayed as notification banners in the Prophecy UI, which direct users to a comprehensive view on the monitoring page for detailed insights.

    We also support email-based alerts, which can be configured by providing the necessary SMTP details. On resolution of alerts, email notifications are sent.

    Configuration

    There are certain environment variables that need to be configured in the Prophecy admin UI.

    To configure object store settings in the Prophecy UI, follow these steps:

    1. Log in to the Prophecy UI as an admin user.
    2. Click on the three dots at the bottom left corner and select the Settings icon from the submenu.
    3. Navigate to the Admin main tab.
    4. Within the Admin main tab, select the Config sub tab.
    5. Click on the Alert Config sub tab to configure the alert settings.

    JSON format

    Below are JSON configurations within the Prophecy UI that need to be enabled to support this functionality. You will have to configure only the options which you require. Make sure to maintain a JSON format mentioned below while configuring the different options. Most of the values below are defaults or sample values.

    {
    "alertConfigs": [
    {
    "maxAllowedOccurrenceCount": 20,
    "metricsType": "CPU_USAGE",
    "thresholdValue": 0.95
    },
    {
    "maxAllowedOccurrenceCount": 3,
    "metricsType": "DISK_USAGE",
    "thresholdValue": 0.95
    },
    {
    "maxAllowedOccurrenceCount": 3,
    "metricsType": "FILE_COUNT",
    "thresholdValue": 0.95
    },
    {
    "maxAllowedOccurrenceCount": 20,
    "metricsType": "MEMORY_USAGE",
    "thresholdValue": 0.8
    }
    ],
    "enableAlerts": true,
    "enableServiceSuspend": true,
    "notificationEmailIDs": [
    "customer1@test.com",
    "customer2@test.com"
    ],
    "reAlertIntervalinMinutes": 120,
    "relativeWarningThreshold": 0.05,
    "smtp": {
    "password": "********",
    "senderEmailID": "sender@test.com",
    "serverHostname": "smtp.test.com",
    "serverPort": 587
    },
    "suspensionWindowinMinutes": 60
    }

    Supported configuration variables

    Configuration variable nameDescriptionDefault value
    alertConfigs.maxAllowedOccurrenceCountNumber of consecutive occurrences to hit before triggering an alert (warning/critical). Every interval defaults to 30s.20 (or) 10mins for CPU/Memory, 3 (or) 1.5mins for Disk Usage/File Count
    alertConfigs.metricsTypeThe metric type trackedCPU_USAGE, DISK_USAGE, FILE_COUNT, MEMORY_USAGE
    alertConfigs.thresholdValueThe threshold value beyond which alert is triggered. This is a fractional value between 0 and 10.8 for MEMORY_USAGE and 0.95 for others
    enableAlertsEnabling of alerting systemtrue
    notificationEmailIDsList of emails to notify in case of hitting alerts, this list is comma separated[]
    reAlertIntervalinMinutesHow often should triggered alerts resend the email in minutes120
    relativeWarningThresholdIt is the (configured threshold - relativeWarningThreshold) value after which WARNING is generated before hitting CRITICAL state. This is a fractional value between 0 and 1. Defaulted to 5% less than threshold value.0.05
    smtp.passwordThe password of the SMTP server credential. This is an encrypted value. Setting this to NULL will disable email alerting.NULL
    smtp.senderEmailIDThe send email ID at the SMTP server. Setting this to NULL will disable SMTP email alerting.NULL
    smtp.serverHostnameThe SMTP hostname of the server.smtp.gmail.com
    smtp.serverPortThe SMTP port of the server credential. Setting this to 0 will disable SMTP email alerting.587
    suspensionWindowinMinutesTime after which suspension of critical services is to be done in minutes60

    Alerting guidelines

    • The default configured values satisfy most use cases.
    • If you set enableAlerts to false, this will disable the alerting system. Monitoring will work and be visible from within the Admin Monitoring sub tab.
    - - + + \ No newline at end of file diff --git a/architecture/self-hosted/configurations/configure-audit-logs/index.html b/architecture/self-hosted/configurations/configure-audit-logs/index.html index 5a28c8ad0a..42af5cd2c4 100644 --- a/architecture/self-hosted/configurations/configure-audit-logs/index.html +++ b/architecture/self-hosted/configurations/configure-audit-logs/index.html @@ -6,16 +6,15 @@ Audit Events Configuration | Prophecy - - - - + + +
    Skip to main content

    Audit Events Configuration

    Prophecy offers robust support for storing audit events (logs) on two of the industry's leading cloud object stores: AWS S3, Azure Blob Storage, GCP Cloud Storage or even local persistent volume (PV). Leveraging the capabilities of these object stores, Prophecy seamlessly synchronizes and persistently stores audit events. This not only ensures the secure retention of crucial data but also facilitates streamlined tracking and in-depth analysis of user interactions and activities for enhanced operational insights.

    note

    Certain object store level configurations are shared with backup restore configurations. Make sure to configure the object store level configurations before proceeding below.

    Use case

    • The overarching objective is to comprehensively track and log every user-level action.
    • All user actions will be meticulously recorded and stored, enabling easy retrieval from a persistent storage or object store, particularly when an audit is required.
    • To achieve this, each user action will be meticulously tracked and written into designated database tables.
    • The data accumulated in these database tables will be periodically transferred and pushed to an object store for efficient storage and management.

    Configuration

    There are certain environment variables that need to be configured in Athena based on the kind of user events audit logs required.

    To configure object store settings in the Prophecy UI, follow these steps:

    1. Log in to the Prophecy UI as an admin user.
    2. Click on the three dots at the bottom left corner and select the settings icon from the submenu.
    3. Navigate to the Admin main tab.
    4. Within the Admin main tab, access the Config sub tab.
    5. Finally, click on the auditConfig sub tab to configure the audit settings.

    JSON format

    Below are JSON configurations within the Prophecy UI that need to be enabled to support this functionality. You will have to configure only the options which you require. Make sure to maintain a JSON format mentioned below while configuring the different options.

    {
    "disableUeventsGC": false,
    "enableUserEvents": false,
    "ueventsGCSchedule": "0 0 1 * * *",
    "ueventsSchedule": "0 0 */1 * * *"
    }

    Supported Configuration Variables

    Configuration variable nameDescriptionDefault value
    disableUeventsGCGarbage collection of user events from local DB is enabled by default once the events have been pushed to upstream object store. Set this to true to disable this garbage collection to retain this data locally as well. Note that setting this to true could potentially result in a very large DB size.false
    enableUserEventsSet to true to enable user event audit logsfalse
    ueventsScheduleHow frequently to push user events to object store. Defaults to every one hour. Uses 6-digit CRON0 0 */1 * * *
    ueventsGCScheduleHow frequently to purge old user events from the internal database. Defaults to daily 1 am. Uses 6-digit CRON0 0 1 * * *

    To enable user events audit logs

    • Set enableUserEvents to true
    • Set ueventsSchedule to the CRON string for how frequently you'd like user events pushed to the object store.
    • Set ueventsGCSchedule to the CRON string for how frequently you'd like old user events removed from the database. PVC Mount point with storage | /backup |

    User events audit logs output

    By default pushes events to /namespace/audit/ directory in the S3 bucket configured.

    Sample output of user audit logs

    {"opType": "Query", "createdAt": 1690610557857, "nameHints": ["User"], "userEmail": "prophecy-system@prophecy.io", "customerName": "prophecy.io", "responseCode": 200, "requestParams": {"email": "\"adminuser@prophecy.io\""}, "requestPayload": "[\"_id\",\"name\",\"firstName\",\"lastName\",\"email\",\"created\",\"createdBy\",\"aspects(aspect:[Info]){AspectName AspectValue}\"]", "latencyInMillis": 11}
    {"opType": "Query", "createdAt": 1690552930350, "nameHints": ["ProjectReleaseByProjectIdAndStatus"], "userEmail": "prophecy-system@prophecy.io", "customerName": "prophecy.io", "responseCode": 200, "requestParams": {"statuses": "[Pending,Running,Retrying]"}, "requestPayload": "[\"_id\",\"releaseTag\",\"status\",\"attempt\",\"createdBy\",\"project{_id name}\"]", "latencyInMillis": 5}
    {"opType": "Query", "createdAt": 1690610557967, "nameHints": ["User"], "userEmail": "prophecy-system@prophecy.io", "customerName": "prophecy.io", "responseCode": 200, "requestParams": {"email": "\"adminuser@prophecy.io\""}, "requestPayload": "[\"_id\",\"name\",\"firstName\",\"lastName\",\"email\",\"created\",\"createdBy\",\"aspects(aspect:[Info]){AspectName AspectValue}\"]", "latencyInMillis": 10}
    - - + + \ No newline at end of file diff --git a/architecture/self-hosted/configurations/configure-object-store/index.html b/architecture/self-hosted/configurations/configure-object-store/index.html index 76d749a7eb..427992c4d3 100644 --- a/architecture/self-hosted/configurations/configure-object-store/index.html +++ b/architecture/self-hosted/configurations/configure-object-store/index.html @@ -6,15 +6,14 @@ Object Store Configuration | Prophecy - - - - + + +
    Skip to main content

    Object Store Configuration

    Prophecy provides reliable support for storing essential data such as backups and audit logs. However, to enable this functionality, a storage location is required. Prophecy seamlessly integrates with the industry's leading cloud (provider) object stores, including AWS S3, Azure Blob Storage, and GCP Cloud Storage, as well as local persistent volumes (which could be backed by a NFS). This section outlines how to configure these storage options effectively.

    To configure object store settings in the Prophecy UI, follow these steps:

    1. Log in to the Prophecy UI as an admin user.
    2. Click on the three dots at the bottom left corner and select the settings icon from the submenu.
    3. Navigate to the Admin main tab.
    4. Within the Admin main tab, access the Config sub tab.
    5. Finally, click on the objectStoreConfig sub tab to configure the object store settings.

    Configuration options

    Below are JSON configurations within the Prophecy UI that need to be enabled to support this functionality. You will have to configure only the options which you require. Make sure to maintain a JSON format mentioned below while configuring the different options.

    caution

    All sensitive keys are displayed in ******** format. However, you may supply the new values in normal text and save the JSON to update the keys.

    {
    "aws": {
    "accessKey": "********",
    "s3": {
    "bucketName": "athena-ondemand-backup",
    "endpoint": "https://s3.us-west-2.amazonaws.com",
    "forcePathStyle": true,
    "region": "us-west-2"
    },
    "secretKey": "********"
    },
    "azure": {
    "accessKey": "********",
    "blobStorage": {
    "accountName": "prophecyathenabackup",
    "containerName": "athena-ondemand-backup",
    "serviceURL": "https://prophecyathenabackup.blob.core.windows.net/"
    },
    "useManagedIdentityToAuthenticate": false
    },
    "cloudProvider": "gcp",
    "gcp": {
    "cloudStorage": {
    "bucketName": "athena-ondemand-backup"
    },
    "serviceAccount": "********"
    },
    "localLocation": "/backup",
    "locationType": "gcp-cloud-stoage"
    }

    Generic Configuration Variables

    These are the generic configurations which are required to be set irrespective of the provider.

    Configuration variable nameDescriptionDefault value
    locationTypeWhich provider to use for the object store. Supports local, s3, azure-blob-storage, gcp-cloud-storagelocal
    localLocationAny PVC Mount point with where local backup is done. Is required even for provider based object stores for a temporary location./backup

    Provider specific configuration

    This section outlines there various configurations at each provider level.

    AWS S3

    For this provider, set locationType to s3 and configure using the following variables:

    Environment variable nameDescriptionDefault value
    aws.s3.bucketNameS3 Bucket nameathena-ondemand-backup
    aws.s3.endpointS3 Endpoint used to communicate withhttps://s3.us-west-2.amazonaws.com
    aws.s3.forcePathStyleIf S3 should use path style for bucket nametrue
    aws.s3.regionS3 Regionus-west-2
    aws.accessKeyAWS Access key with the required privileges
    aws.secretKeyAWS Secret key with the required privileges
    localLocationAny PVC Mount point with storage/backup
    locationTypeSet to AWS S3s3

    Azure Blob Storage

    For this provider, set locationType to azure-blob-storage and configure using the following variables:

    Environment variable nameDescriptionDefault value
    azure.blobStorage.accountNameStorage Account nameprophecyathenabackup
    azure.blobStorage.serviceURLStorage Account Service URLhttps://prophecyathenabackup.blob.core.windows.net/
    azure.blobStorage.containerNameContainer name within the Storage Accountathena-ondemand-backup
    azure.useManagedIdentityToAuthenticateWheather to use system managed identity (role) to authenticatefalse
    azure.accessKeyStorage Access key
    localLocationAny PVC Mount point with storage/backup
    locationTypeSet to Azure Blob Storageazure-blob-storage

    GCP Cloud Storage

    For this provider, set locationType to gcp-cloud-storage and configure using the following variables:

    Environment variable nameDescriptionDefault value
    gcp.serviceAccountIs the GCP Service Account in Base64 Encoded format with the required privileges
    gcp.cloudStorage.bucketNameContainer name within the Storage Accountathena-ondemand-backup
    localLocationAny PVC Mount point with storage/backup
    locationTypeSet to GCP Cloud Storagegcp-cloud-storage

    Local PV (NFS)

    As this utilizes the local persistent volumes (PVs) offered by Kubernetes, no extra configuration is needed. All backups are stored directly on the disk, which can be supported by protocols such as NFS.

    Environment variable nameDescriptionDefault value
    localLocationAny PVC Mount point with storage/backup
    locationTypeSet to locallocal
    - - + + \ No newline at end of file diff --git a/architecture/self-hosted/configurations/index.html b/architecture/self-hosted/configurations/index.html index 07e863138b..4f4c00bfda 100644 --- a/architecture/self-hosted/configurations/index.html +++ b/architecture/self-hosted/configurations/index.html @@ -6,15 +6,14 @@ Configurations | Prophecy - - - - + + +
    Skip to main content
    - - + + \ No newline at end of file diff --git a/architecture/self-hosted/configurations/sandbox-configuration/index.html b/architecture/self-hosted/configurations/sandbox-configuration/index.html index 57423e99d5..a6290ffddb 100644 --- a/architecture/self-hosted/configurations/sandbox-configuration/index.html +++ b/architecture/self-hosted/configurations/sandbox-configuration/index.html @@ -6,15 +6,14 @@ Sandbox Configuration | Prophecy - - - - + + +
    Skip to main content

    Sandbox Configuration

    In the traditional Prophecy deployment model, a single microservice known as the editor web was tasked with managing all user requests using a threading approach. Each user request corresponds to a browser tab (session) utilized by individuals to execute operations on their pipelines within our integrated development environment (IDE). However, this model encountered constraints in isolation and vertically scaling the micro-service to accommodate the growing volume of user requests. As a result, this engendered significant resource consumption within the editor web microservice, ultimately impairing its ability to efficiently handle requests.

    What is sandboxing

    To tackle the aforementioned challenge, in Prophecy version 3.2, we introduced a novel approach to isolation and load manaGement known as sandboxing. This feature enables the provisioning of a pair of microservices for each user request: the Gem Plugin and Schema Analysis together termed as a single sandbox. This empowers users to execute their pipelines independently within dedicated environments. For instance, in a scenario where there are three users each with two browser tabs open in the IDE, this results in the provisioning of six pods each for the Gem Plugin and Schema Analysis. Consequently, users can seamlessly run their pipelines without interference in complete isolation.

    How to configure sandboxes

    Newer versions of Prophecy are defaulted to use sandboxing (ENABLE_SANDBOXING: true) as the default way to deploy Prophecy services. To configure object store settings in the Prophecy UI, follow these steps:

    1. Log in to the Prophecy UI as an admin user.
    2. Click on the three dots at the bottom left corner and select the settings icon from the submenu.
    3. Navigate to the Admin main tab.
    4. Within the Admin main tab, access the Config sub tab.
    5. Finally, click on the sandboxConfig sub tab to configure the settings.

    Configuration options

    Below are JSON configurations within the Prophecy UI that need to be enabled to support this functionality. You will have to configure only the options which you require. Make sure to maintain a JSON format mentioned below while configuring the different options.

    {
    "enableSandboxSharing": false,
    "PythonSandbox": {
    "GemPluginPod": {
    "cpu": {
    "limit": "2",
    "request": "0.5"
    },
    "memory": {
    "limit": "2.5Gi",
    "request": "2.5Gi"
    }
    },
    "schemaAnalysisPod": {
    "cpu": {
    "limit": "2",
    "request": "0.5"
    },
    "memory": {
    "limit": "2.5Gi",
    "request": "2.5Gi"
    }
    }
    },
    "PythonSandboxPoolSize": 2,
    "sandboxImageRegistry": "gcr.io/prophecy-share",
    "sandboxImageTag": "<current-prophecy-version>",
    "sandboxMaxTotalPods": 100,
    "sandboxMonitoringInterval": 2,
    "sandboxPoolHealthCheckInterval": 100,
    "sandboxStalePodsCleanupInterval": 4,
    "ScalaSandbox": {
    "GemPluginPod": {
    "cpu": {
    "limit": "2",
    "request": "0.5"
    },
    "memory": {
    "limit": "2.5Gi",
    "request": "2.5Gi"
    }
    },
    "schemaAnalysisPod": {
    "cpu": {
    "limit": "2",
    "request": "0.5"
    },
    "memory": {
    "limit": "2.5Gi",
    "request": "2.5Gi"
    }
    }
    },
    "ScalaSandboxPoolSize": 3
    }

    Configuration Variables

    These are the generic configurations which are required to be set irrespective of the provider.

    Configuration variable nameDescriptionDefault value
    enableSandboxSharingis an advanced feature that let's users share a sandbox between multiple sessionsfalse
    PythonSandbox.GemPluginPod.cpu.limitConfigures the CPU limit of the Python Gem plugin pod2
    PythonSandbox.GemPluginPod.cpu.requestConfigures the CPU request of the Python Gem plugin pod0.5
    PythonSandbox.GemPluginPod.memory.limitConfigures the Memory limit of the Python Gem plugin pod2.5Gi
    PythonSandbox.GemPluginPod.memory.requestConfigures the Memory request of the Python Gem plugin pod2.5Gi
    PythonSandbox.schemaAnalysisPod.cpu.limitConfigures the CPU limit of the Python schema analysis pod2
    PythonSandbox.schemaAnalysisPod.cpu.requestConfigures the CPU request of the Python schema analysis pod0.5
    PythonSandbox.schemaAnalysisPod.memory.limitConfigures the Memory limit of the Python schema analysis pod2.5Gi
    PythonSandbox.schemaAnalysisPod.memory.requestConfigures the Memory request of the Python schema analysis pod2.5Gi
    PythonSandboxPoolSizenumber of concurrent Python sessions/tabs startup (reserved) allowed2
    sandboxImageRegistryimage registry to be used for pulling sandbox images fromgcr.io/prophecy-share
    sandboxImageTagimage tag to be used for pulling sandbox images. Defaulted to current Prophecy version<current-prophecy-version-running>
    sandboxMaxTotalPodsmaximum number of Scala + Python (Gem plugin + schema analysis) pods allowed. This is used to restrict the number of pods spun up in case of surge of users.false
    sandboxMonitoringIntervalMonitoring interval used to spin up new sandbox pods as per session requests in seconds (s).2
    sandboxPoolHealthCheckIntervalPool health check interval used to check the health of each pod in seconds (s).100
    sandboxStalePodsCleanupIntervalClean up period used to clean up unused pods in seconds (s).4
    ScalaSandbox.GemPluginPod.cpu.limitConfigures the CPU limit of the Scala Gem plugin pod2
    ScalaSandbox.GemPluginPod.cpu.requestConfigures the CPU request of the Scala Gem plugin pod0.5
    ScalaSandbox.GemPluginPod.memory.limitConfigures the Memory limit of the Scala Gem plugin pod2.5Gi
    ScalaSandbox.GemPluginPod.memory.requestConfigures the Memory request of the Scala Gem plugin pod2.5Gi
    ScalaSandbox.schemaAnalysisPod.cpu.limitConfigures the CPU limit of the Scala schema analysis pod2
    ScalaSandbox.schemaAnalysisPod.cpu.requestConfigures the CPU request of the Scala schema analysis pod0.5
    ScalaSandbox.schemaAnalysisPod.memory.limitConfigures the Memory limit of the Scala schema analysis pod2.5Gi
    ScalaSandbox.schemaAnalysisPod.memory.requestConfigures the Memory request of the Scala schema analysis pod2.5Gi
    ScalaSandboxPoolSizenumber of concurrent Scala sessions/tabs startup (reserved) allowed3
    - - + + \ No newline at end of file diff --git a/architecture/self-hosted/download-logs/index.html b/architecture/self-hosted/download-logs/index.html index 447485847f..1b8fc01a9d 100644 --- a/architecture/self-hosted/download-logs/index.html +++ b/architecture/self-hosted/download-logs/index.html @@ -6,15 +6,14 @@ Download logs | Prophecy - - - - + + +
    Skip to main content

    Download logs

    As an admin user, you can download your environment logs from Prophecy without needing access to your Prophecy cluster or the assistance of Prophecy Support. This reduces the delay in debugging any issues with your Prophecy services.

    Use case

    • The overarching objective is to debug what is going on with your Prophecy services.
    • To achieve this, we've enabled admins to be able to download Prophecy logs and environment information so that they can upload them to Zendesk.

    Download logs features

    You can use the Download Logs feature to capture logs using the Services and time selectors.

    • All services are selected by default.
    • The download supports one hour of logs from the Start Time.

    The captured logs include all relevant Prophecy configurations, such as the following items:

    • Kubernetes cluster configuration
      • Resource quotas
      • Node configuration
    • Cluster custom resources
    • Config maps and files
    • Resource consumption logs
    note

    Sensitive information, such as customer preview data, credentials, tokens, or passwords, is scrubbed or redacted from the download bundle.

    Enable Prophecy Downloads logs

    Before you can download logs, you must enable it in your private SAAS deployment.

    See the following requirements for enabling the Prophecy logs:

    • Prophecy collects the logs of all pods and stores it in the Athena Pod.
    • Each pod uses an additional 500 MB ephemeral storage for temporary storage.
    • Athena requires additional storage of around 100 GB to store one week of logs.
    • A new container image fluentbit (gcr.io/prophecy-share/fluent-bit:2.2.3) is required for this feature.

    To enable Minio in Athena and provide it a volume, follow these steps:

    1. Add env variables to Athena STS:
      - name: MINIO_ENDPOINT
    value: athena:9000
    - name: ENABLE_FLUENTBIT_SIDECARS
    value: "true"
    - name: RUN_ATHENA_MINIO
    value: "true"
    1. Add volume to Athena STS:
      volumeClaimTemplates:
    ...
    - apiVersion: v1
    kind: PersistentVolumeClaim
    metadata:
    creationTimestamp: null
    name: minio-storage
    spec:
    accessModes:
    - ReadWriteOnce
    resources:
    requests:
    storage: 10Gi
    volumeMode: Filesystem
    1. Add volume mount to Athena:
      volumeMounts:
    ...
    - mountPath: /minio/data
    name: minio-storage
    1. Add the Minio port to Athena SVC:
      ports:
    ...
    - name: minio
    port: 9000
    protocol: TCP
    targetPort: 9000

    To download logs in the Prophecy UI, follow these steps:

    1. Log in to the Prophecy UI as an admin user.
    2. Click on the three dots at the bottom left corner and select the Settings icon from the submenu.
    3. Navigate to the Admin main tab.
    4. Within the Admin main tab, access the Logs sub tab.
    5. Set the Services and Start Date/Time, and then click Download

    After several seconds, the file will download via your browser. The download generates a compressed file containing logs, suitable for sharing with a Prophecy support engineer.

    - - + + \ No newline at end of file diff --git a/architecture/self-hosted/generate-api-key/index.html b/architecture/self-hosted/generate-api-key/index.html index b00cacc9be..637fec2c6b 100644 --- a/architecture/self-hosted/generate-api-key/index.html +++ b/architecture/self-hosted/generate-api-key/index.html @@ -6,15 +6,14 @@ Generate API Key | Prophecy - - - - + + +
    Skip to main content

    Generate API Key

    Prophecy provides secure way to generate an API Key per Prophecy deployment which can be used to trigger various secure operations like backup/restore. This API supportability will be enhanced in future to support other such use cases. Follow the below setups to generate a new key.

    Generate a fresh API Key

    To configure object store settings in the Prophecy UI, follow these steps:

    1. Log in to the Prophecy UI as an admin user.
    2. Click on the three dots at the bottom left corner and select the settings icon from the submenu.
    3. Navigate to the Access Tokens main tab.
    4. Click on Generate Token and a window will pop up.
    5. Fill in the details like Token Name and Expiration.
    6. You will be provided with a new token. You may use the copy button to copy it to use it for various API activities.

    Delete an existing API Key

    1. In the above tab.
    2. Click on the delete icon against the key which you wish to delete.
    3. After confirmation is done, the token will be deleted.
    - - + + \ No newline at end of file diff --git a/architecture/self-hosted/index.html b/architecture/self-hosted/index.html index a1f8980b24..91c760753a 100644 --- a/architecture/self-hosted/index.html +++ b/architecture/self-hosted/index.html @@ -6,16 +6,15 @@ Self Hosted | Prophecy - - - - + + +
    Skip to main content

    Self Hosted

    Prophecy is written as a set of microservices that run on a Kubernetes cluster. Our recommended platforms to run Kubernetes are:

    • Amazon Web Services managed EKS
    • Microsoft Azure managed AKS
    • Google Cloud Platform managed GKE

    There are a couple of ways to install the self-managed version of Prophecy:

    • Installation via Helm
    • Installation via Marketplaces

    Once Prophecy is installed, you'll have to manually perform upgrades, backups, restores, etc.

    Spark

    After installation, Prophecy requires the following for interactive development:

    • Databricks API 1.2 for Databricks-based Spark deployments, or
    • Livy 0.7.x for any other Spark deployment support (like CDP, HDP, MapR, Spark on Kubernetes).

    Airflow

    For interactive and jobs deployment to Airflow, Prophecy requires a customer-managed Airflow deployment version 2.x.x (latest recommended). Astronomer's managed Airflow offering is supported.

    Logging / Metrics

    • Prophecy comes with a built-in lightweight infrastructure for monitoring (based on Loki & Grafana) and logging (based on Prometheus, Grafana and alert-manager, etc.).
    • You can optionally redirect the logs and metrics to your own logging services.

    To learn more about installation methods and management, see the following pages:

    - - + + \ No newline at end of file diff --git a/architecture/self-hosted/installation-helm/index.html b/architecture/self-hosted/installation-helm/index.html index 34fdbe920c..24098d207b 100644 --- a/architecture/self-hosted/installation-helm/index.html +++ b/architecture/self-hosted/installation-helm/index.html @@ -6,16 +6,15 @@ Installation via Helm | Prophecy - - - - + + +
    Skip to main content

    Installation via Helm

    One way to install Prophecy is to use Helm. Installation requirements and Helm chart values are outlined in this page.

    For an example step-by-step installation, visit Installation on AWS.

    Requirements

    To install Prophecy, you need:

    • Kubernetes version 1.21 or later.
    • A namespace in Kubernetes cluster where you will install Prophecy.
    • Permission to install custom resources, or CRDs. Alternatively, you can deploy a single Helm chart that can be shared on request.
    • Block storage in the Kubernetes cluster.
    • Kubernetes cluster configured in either multi-AZ or single-AZ mode. For multi-AZ mode, the block storage has to have the volume binding mode set to waitforfirstconsumer.
    • Persistent storage with dynamic provisioning (like gp2).
    • Helm for Prophecy installation.

    Minimum service requirements:

    NamespaceDescription# Cores# RAM# Block Storage
    Control PlaneMain services (front-end, code editor, metadata, lineage, etc.)40 Cores78GB160GB
    PlatformBackup (twice a day, configurable), monitoring, logging services (optional)4 Cores8GB200GB

    Install Prophecy

    The helm install command installs Prophecy via the provided Helm chart.

    1. To retrieve the Prophecy Helm chart, run the command:

      helm repo add prophecy https://prophecy-chart.s3.us-west-2.amazonaws.com
    2. If you want to install Prophecy using all of the provided values, the simplest command to run is:

      helm -n <namespace> install prophecy prophecy/prophecy-installer --version <prophecy-chart-version> --set global.customer.name=<customer-name> --set global.prophecy.rootUrl=<IDE-URL>

    Read through the rest of this page to understand how to customize the installation command.

    Helm chart values

    Prophecy is installed via Helm chart called the Prophecy-installer. The following are the commonly used values which can be configured for this helm chart. The Helm chart values can be broadly classified into 5 different sections:

    • Athena: Configurations related to the management plane.
    • Global: Configurations for all common values between components like Athena, Postgres, and Platform.
    • Postgres: Configurations related to Postgres or at the database level.
    • Platform: Configurations regarding various platform components like elastic search, Prometheus, Grafana, etc.
    • Version: The Prophecy version you want to deploy.
    Complete table of Helm values

    Parameters marked with an asterisk(*) are mandatory.

    ParameterTypeDescriptionDefault value
    ATHENA
    athena.config-
    athena.config.fromFileboolSet this to false if you want to pass different environment and Prophecy application level configurations values inline here. If you keep it to true, the values are taken from the ../configs directory of the helm charttrue
    athena.config.athenaEnvsmap[string]stringProvide the values you wish to override in athena ENV variables. Passed in the format of KEY: "value". Value is type sensitive. This field is only respected when fromFile is set to false../configs/athena-env-cluster.yaml
    athena.config.envsmap[string]stringProvide the values you wish to override in Prophecy application ENV variables. Passed in the format of KEY: "value". Value is type sensitive. This field is only respected when fromFile is set to false../configs/env-cluster.yaml
    athena.config.prophecyClusterprophecyCluster (custom)Provide the values you wish to override in Prophecy cluster spec. This field is only respected when fromFile is set to false../configs/prophecy-cluster.yaml
    athena.enableSandboxingboolSet this to true to enable sandboxing feature.false
    athena.enableSignupboolSet this to true if you want to allow signups using a verified email-id.false
    athena.enableSlimImagesboolSet this to true to deploy Prophecy slim images (images without Spark).false
    athena.isDarkClusterboolSet this to true if the cluster doesn't have internet access.false
    athena.resources-
    athena.resources.requests-
    athena.resources.requests.cpustringGuaranteed value of CPU required to deploy Athena200m
    athena.resources.requests.memorystringGuaranteed value of memory required to deploy Athena512Mi
    athena.resources.limits-
    athena.resources.limits.cpustringMaximum usage of CPU for Athena200m
    athena.resources.limits.memorystringMaximum usage of memory for Athena512Mi
    athena.trustCAboolset this variable if you want metagraph and execution to trust local self signed certificates.true
    GLOBAL
    global.blockStorage-Kubernetes PVC block storage options for for Athena, Postgres and Platform components. Prophecy services takes storage values from Prophecy cluster in configs dir.
    global.blockStorage.storageClassNamestringKubernetes storage class (SC) name to be used by PVCsdefault
    global.blockStorage.pvcAnnotationsmap[string]stringAny additional kubernetes annotations to be set on the PVCs""
    global.customer-
    global.customer.name*stringName of the customer to uniquely identify""
    global.customer.cluster*stringName of the cluster to uniquely identify the cluster for a customerprophecy
    global.disableIngressCreationboolSet this to true, if the customer wishes to manually manage their ingress entriesfalse
    global.imagePullPolicystringImage pull policy for all the containers deployedAlways
    global.ingressController-
    global.ingressController.typestringType of ingress controller being used. Currently we support nginx and istionginx
    global.ingressController.classstringName of the controller class which is to be used for ingress resourcesprophecy-nginx
    global.istio-
    global.istio.enabledboolSet this to true to allow usage of istio as an ingress controller/gateway. We don't support sidecars yet. Istio isn't installed or managed by Prophecy.false
    global.istio.gateway-
    global.istio.gateway.namestringName of the Istio gatewayistio-gateways/istio-gateway
    global.istio.gateway.createboolSet this to true, when you wish to create the Istio gatewayfalse
    global.prophecy-
    global.prophecy.domainValuestringdomain value used for cookies. Used to configure multiple URLs.""
    global.prophecy.envSecret-Is configured when Prophecy credentials are fetched as a kubernetes secret
    global.prophecy.envSecret.createboolSet to true if the secret is to be created. With false the secret is expected to be created by the customer.false
    global.prophecy.envSecret.datamap[string]stringProvide the values you wish to override in Prophecy secret ENV variables. Passed in the format of METADATA_DB_PASSWORD: dummyPassword123.
    global.prophecy.envSecret.namestringName of the Kubernetes secret to be used.env-secrets
    global.prophecy.imagePullSecretstringName of the Kubernetes secret which contains the image pull credentials to the container registry.""
    global.prophecy.rootUrl*stringRoot URL where the Prophecy deployment will be hosted. You may provider a list of comma separated urls if you wish to work with multiple urls. Note that the first url in the provided list will be considered as the primary url.""
    global.prophecy.wildcardCert-
    global.prophecy.wildcardCert.namestringThe number of secrets passed should either be one or equal to the number of rootUrls provided which should be generated for each of these URLs in respective order.""
    global.prophecy.wildcardCert.useExternalboolSetting this to true will require the customer to provide a secret name here.false
    global.repositorystringContainer registry prefix to be used.gcr.io/prophecy-share
    global.tls-
    global.tls.enabledboolSpecifies if TLS is to be enabledtrue
    global.tls.certOnLBboolSpecifies whether TLS termination is to be done at the loadbalancerfalse
    POSTGRES
    postgres.isExternalPostgresboolSetting this to true, will allow Prophecy services to use a externally managed Postgres instance and will not use a Prophecy managed instance.false
    postgres.hoststringSpecifies the host name for the postgres service. If isExternalPostgres, pass the DNS host name (Not IP) for the external postgres here.postgres
    postgres.portintSpecifies the port used to communicate with postgres.5432
    postgres.resources-
    postgres.resources.requests-
    postgres.resources.requests.cpustringGuaranteed value of CPU required to deploy Postgres1
    postgres.resources.requests.memorystringGuaranteed value of memory required to deploy Postgres2000Mi
    postgres.resources.limits-
    postgres.resources.limits.cpustringMaximum usage of CPU for Postgres2
    postgres.resources.limits.memorystringMaximum usage of memory for Postgres4000Mi
    postgres.secretNamestringSpecifies the postgres AWS secret manager name from where postgres credentials are to be fetched from.
    postgres.secretLocationstringSpecifies the postgres AWS secret manager region from where postgres credentials are to be fetched from.
    postgres.volume-
    postgres.volume.requestsstringSpecifies the volume size of the PVC used by postgres25Gi
    PLATFORM
    platform.enabledboolSpecifies if one/more of the platform components are to be enabledtrue
    platform.elasticsearch.enabledboolSpecifies if elasticsearch is to be enabled and deployed for supporting Prophecy search featuretrue
    platform.ingressNginx.enabledboolSpecifies if nginx controller is to be deployed for ingress routingtrue
    platform.namespacestringSpecifies the namespace used to deploy the platform componentsprophecy
    platform.tracing.enabledboolSpecifies if Jaeger based tracing is to be enabled and deployedtrue
    VERSION
    versionstringSpecifies the Prophecy version to be deployed""

    Common configurations

    Here are some configurations that are commonly changed from the default.

    Internal repository

    During the Helm installation, docker images are pulled from Prophecy's public Google Container Registry gcr.io/prophecy-share. If you want to use your own internal container registry, you can specify it by setting the global.repository value during installation. Make sure to download the images from the public GCR first.

    Nginx ingress controller

    By default, Prophecy will install its own managed Nginx ingress controller during the Helm installation. If you want to use your own Nginx ingress controller already installed on your Kubernetes cluster, you can do so if it supports external service exposure via a LoadBalancer. You'll have to set global.disableIngressCreation to true, and set global.ingressController.class to the appropriate name.

    Custom domain names

    If you are using Prophecy's domain, you don't have to worry about hostname resolution. However, if you want to use your own domain, you need to create SSL certificates, set up the correct DNS configurations, and include that information during the Helm installation.

    Additional example Helm install commands

    • Using your own SSL certificates without internet connectivity:

      helm -n <namespace> install prophecy prophecy/prophecy-installer --version <prophecy-chart-version> --set global.customer.name=<customer name> --set global.prophecy.rootUrl=<Base URL> --set global.prophecy.wildcardCert.useExternal=true --set global.prophecy.wildcardCert.name=<wildcard cert secret name> --set athena.controlcenter.disabled=true --set global.repository=<Image repository> --set global.prophecy.imagePullSecret=<Image pull secret name> --set athena.isDarkCluster=true
    • Using an external SQL database:

      helm -n <namespace> upgrade -i prophecy-installer prophecy/prophecy-installer --version 3.3.1-1 --set version=3.3.1.1 --set global.customer.cluster={cluster-name} --set global.prophecy.rootUrl={cluster-name}-{customer-name}.dev.cloud.prophecy.io --set global.customer.name={customer-name} --set postgres.isExternalPostgres=true --set postgres.host={googlesql-dns-name} --set postgres.user={google-sql-user-name} --set postgres.password={google-sql-user-password} --debug
    - - + + \ No newline at end of file diff --git a/architecture/self-hosted/installation-helm/install-on-aws/index.html b/architecture/self-hosted/installation-helm/install-on-aws/index.html index 3ede173dd6..bad3b88df1 100644 --- a/architecture/self-hosted/installation-helm/install-on-aws/index.html +++ b/architecture/self-hosted/installation-helm/install-on-aws/index.html @@ -6,15 +6,14 @@ Installation on AWS | Prophecy - - - - + + +
    Skip to main content

    Installation on AWS

    If you are deploying Prophecy with the private SaaS option, you need to install Prophecy in your Virtual Private Cloud (VPC). Use this guide to help when installing Prophecy on AWS.

    Get started

    In this guide, we'll walk you through how to install Prophecy on an EKS cluster in AWS. Note that this is just one recommended way to perform the installation.

    Connect to your Kubernetes cluster

    Let's start by connecting to the Kubernetes cluster where you will install Prophecy.

    1. Throughout this guide, you'll need to interact with the command line interface. Make sure you have these command line tools:

    2. Configure your AWS CLI if you have not already.

    3. Configure kubectl to connect to your cluster using the following command.

      aws eks --region <aws-region> update-kubeconfig --name <cluster-name> --alias <alias>

    Now, your EKS cluster should be included in your local Kubernetes configuration.

    Validate your Kubernetes configurations

    The Kubernetes cluster on which you will install Prophecy must meet a set of requirements.

    1. Kubernetes must be version 1.21 or later. Check your Kubernetes server version using the following command.

      kubectl version
    2. Your Kubernetes cluster must have persistant storage with dynamic provisioning. To make sure you have at least one StorageClass that has a provisioner and reclaimPolicy set, run:

      kubectl get storageclass
    3. You must have permission to install custom resources, or CRDs, to your Kubernetes cluster. CRDs will be installed during installation with the Helm command. You can check if you have this permission with the following command.

      kubectl auth can-i create customresourcedefinitions --all-namespaces

      If you do not have this permission, contact your Kubernetes admin or EKS owner to grant you cluster-admin or equivalent permissions. Alternatively, you can deploy a single Helm chart instead that can be shared on request.

    4. You need a namespace in your Kubernetes cluster where you will install Prophecy. Create a namespace using the command:

      kubectl create namespace <namespace>

      A common namespace to use is prophecy. To check if you namespace was created, run:

      kubectl get namespaces

    Define network configurations

    There are certain configurations that control how Prophecy's services are exposed, secured, and accessed in a networked environment. Network configurations can be managed by Prophecy, or they can be self-managed.

    Ingress controller

    Prophecy can install its own Nginx ingress controller to expose services externally. However, you can also use an Nginx ingress controller already installed on your Kubernetes cluster if it supports external service exposure via a LoadBalancer. You'll have to specify a ingress controller during the Helm installation if you are not using the Prophecy ingress controller.

    Hostname resolution

    If you use a Prophecy domain in the format *.cloud.prophecy.io, Prophecy handles the hostname resolution. SSL certificates are managed using Let's Encrypt, and the DNS configuration is done for you.

    If you want to use your own domain:

    1. Obtain SSL certificates for your domain.
    2. Install the certificates in your Kubernetes cluster.
    3. Add DNS entries for the services in your domain's DNS zone.
    4. Ensure the entries point to the external IP address of the Nginx ingress controller LoadBalancer.

    Install Prophecy with Helm

    At this point, you should be ready to install Prophecy using Helm.

    1. Add the Prophecy Helm chart repository:

      helm repo add prophecy https://prophecy-chart.s3.us-west-2.amazonaws.com
    2. Run the Helm install command. Below is the most basic configuration command that only includes required Helm chart values:

      helm -n <namespace> install prophecy prophecy/prophecy-installer --version <version-number>
      --set global.customer.name=<customer-name> --set global.prophecy.rootUrl=<base-url>

    For a full list of Helm chart values that you can set during the installation, visit Helm chart values.

    - - + + \ No newline at end of file diff --git a/architecture/self-hosted/upgrade-backup-restore/index.html b/architecture/self-hosted/upgrade-backup-restore/index.html index b272c6a903..674a0a6574 100644 --- a/architecture/self-hosted/upgrade-backup-restore/index.html +++ b/architecture/self-hosted/upgrade-backup-restore/index.html @@ -6,16 +6,15 @@ Upgrades and backups | Prophecy - - - - + + +
    Skip to main content

    Upgrades and backups

    This page outlines different actions you may perform to maintain your Prophecy installation.

    Upgrade

    note

    You may want to back up Prophecy before you upgrade.

    To upgrade Prophecy, you can do so in Settings > Admin.

    1. Navigate to the Admin tab of the Prophecy Settings page.
    2. Click on Upgrade Version.
    3. Choose the version you would like to upgrade to in the Version dropdown.
    4. Make sure that Disable Rollback toggle is on.

    Upgrade version in Admin settings

    Backup

    Backups can be triggered manually via the API, or they can be configured to run automatically.

    On-demand backups

    You can use the Backup API to start a backup. See Generate API Key if you need an API key.

    Example:

    curl --location --request POST 'https://{prophecy-url}/api/backup' \
    --header 'Cookie: prophecy-token={api-key}' \
    --header 'Content-Type: application/json' \
    --data-raw '{}'

    Response:

    {
    "code": 202,
    "message": "Request Accepted with timestamp 2023-02-02t16-00-00"
    }

    Regular automatic backups

    You can configure regular backups in Prophecy in settings.

    1. Log in to Prophecy as an admin user.
    2. Click on the three dots at the bottom left corner.
    3. Navigate to Settings > Admin > Config > backupConfig.
    4. Make sure enableRegularBackups is set to true.
    5. Edit other variables to fit your requirements.
    6. Save your changes.

    Below is a list of supported variables that you can change.

    Configuration variable nameDescriptionDefault value
    backupFrequencyHow frequently to purge old user events from the internal database. Defaults to daily at 00:00. Uses 6-digit CRON0 0 0 * * *
    backupRetentionCountNumber of last N backups to retain.30
    enableRegularBackupsState of automated backup creation.false

    Additional backup APIs

    Here is a list of additional APIs for backups. One sample call may look like:

     curl --location --request POST 'https://{prophecy-url}/api/backup' \
    --header 'Cookie: prophecy-token={api-key}' \
    --header 'Content-Type: application/json' \
    --data-raw '{}'
    APIDescriptionParameters
    GET https://{prophecy-url}/api/backup/latestThis API returns the status current/last backup operation triggered.None expected
    GET https://{prophecy-url}/api/backup/statusThis API returns the status of the backup with a certain timestamp. If there is no timestamp passed and there is an ongoing backup, the status for ongoing backup is returned.timestamp
    GET https://{prophecy-url}/api/backup/listThis API returns the list of available backups.None expected
    GET https://{prophecy-url}/api/backup/deleteThis API attempts the delete the backup data (local and upstream) and also the metadata (database entries) associated with it. Note that in case of enableRegularBackups set to true, backups are older than backupRetentionCount in reverse order are garbage collected automatically.timestamp

    Restore

    Restore is an on-demand based overwrite of the whole Prophecy configuration to reflect the state of a particular backup. The restore operation always assumes a running destination Prophecy cluster where the data and the configuration of source cluster will be restored.

    note

    If backup was taken in Athena's local Persistent Volume, it needs to be copied to Athena's Persistent Volume in the destination cluster before the restore operation can be performed.

    On-demand restore

    You can restore using the Restore API. See Generate API Key if you need an API key.

    danger

    This API should be used with extreme caution as triggering this will lead to loss of current state/data.

    The below API is used to trigger a restore operation. It expects one parameter which is the timestamp of a successful backup.

    curl --location --request POST 'https://{prophecy-url}/api/restore' \
    --header 'Cookie: prophecy-token={api-key}' \
    --header 'Content-Type: application/json' \
    --data-raw '{
    "timestamp": "2022-11-22t10-00-00",
    "sourceNamespace": "{Source cluster controlplane namespace}"
    }'

    Sample API call with disable of gitserver restore. You may use similar options for artifactory / edweb / metagraph / openidfederator.

    curl --location --request POST 'https://{prophecy-url}/api/restore' \
    --header 'Cookie: prophecy-token={api-key}' \
    --header 'Content-Type: application/json' \
    --data-raw '{
    "timestamp": "2022-11-22t10-00-00",
    "sourceNamespace": "{Source cluster controlplane namespace}",
    "svcs": {
    "gitserver": {
    "disable": false
    },
    }
    }'

    In the above API:

    • timestamp is the timestamp of the backup to use to perform the restore
    • sourceNamespace is the namespace in which source cluster's control plane was installed.
    • svcs: This JSON object needs to be set only when you wish to skip restore of any particular service by setting disabled as true. Otherwise, you can skip svcs field entirely.

    Sample response

    {
    "code": 202,
    "message": "Request Accepted"
    }

    Additional restore APIs

    Here is a list of additional APIs for restore. One sample call may look like:

    curl --location --request POST 'https://{prophecy-url}/api/backup'
    --header 'Cookie: prophecy-token={api-key}'
    --header 'Content-Type: application/json'
    --data-raw '{}'
    APIDescriptionParameters
    GET https://{prophecy-url}/api/restore/statusThis API returns the status of the restore operation with a certain timestamp. If there is no timestamp passed and there is an ongoing restore, the status for the ongoing restore is returned.timestamp

    Backup and restore guidelines

    1. Take backups regularly, preferably to cloud storage.
    2. Create a Disaster Recovery Kubernetes in a different region.
    3. Install Prophecy in the remote Kubernetes and keep it in standby. In other words, scale down all pods.
    4. Disaster Recovery restore can be initiated from the remote region when the primary goes down.
    5. Once the restore is done, the Disaster Recovery site is available for work to continue.

    Migrate to different cluster

    If there is a requirement to migrate to a different Kubernetes cluster, you can leverage the backups for that:

    1. Create a new Kubernetes cluster and install Prophecy based on Prophecy installation requirements.
    2. Back up the source cluster.
    3. Restore the backup into new cluster.
    4. Check that everything works as expected.
    5. Plan a downtime for the source cluster, initiate a fresh backup and restore it to the new cluster.
    6. If the new cluster needs to use the old DNS, the DNS entry of old cluster should point to new cluster's Loadbalancer and the Ingress of the new cluster needs to be changed to use the old name. Contact support if you require assistance with these steps.
    - - + + \ No newline at end of file diff --git a/assets/css/styles.072e67d9.css b/assets/css/styles.072e67d9.css deleted file mode 100644 index cdc6debae1..0000000000 --- a/assets/css/styles.072e67d9.css +++ /dev/null @@ -1 +0,0 @@ -.col,.container{padding:0 var(--ifm-spacing-horizontal);width:100%}.markdown>h2,.markdown>h3,.markdown>h4,.markdown>h5,.markdown>h6{margin-bottom:calc(var(--ifm-heading-vertical-rhythm-bottom)*var(--ifm-leading))}.markdown li,body{word-wrap:break-word}body,ol ol,ol ul,ul ol,ul ul{margin:0}pre,table{overflow:auto}blockquote,pre{margin:0 0 var(--ifm-spacing-vertical)}.breadcrumbs__link,.button{transition-timing-function:var(--ifm-transition-timing-default)}.button,code{vertical-align:middle}.button--outline.button--active,.button--outline:active,.button--outline:hover,:root{--ifm-button-color:var(--ifm-font-color-base-inverse)}.menu__link:hover,a{transition:color var(--ifm-transition-fast) var(--ifm-transition-timing-default)}.navbar--dark,:root{--ifm-navbar-link-hover-color:var(--ifm-color-primary)}.menu,.navbar-sidebar{overflow-x:hidden}:root,html[data-theme=dark]{--ifm-color-emphasis-500:var(--ifm-color-gray-500)}:root,[data-theme=dark]{--ifm-table-border-width:1px}*,.DocSearch-Container,.DocSearch-Container *{box-sizing:border-box}.toggleButton_gllP,html{-webkit-tap-highlight-color:transparent}:root{--ifm-color-scheme:light;--ifm-dark-value:10%;--ifm-darker-value:15%;--ifm-darkest-value:30%;--ifm-light-value:15%;--ifm-lighter-value:30%;--ifm-lightest-value:50%;--ifm-contrast-background-value:90%;--ifm-contrast-foreground-value:70%;--ifm-contrast-background-dark-value:70%;--ifm-contrast-foreground-dark-value:90%;--ifm-color-primary:#3578e5;--ifm-color-secondary:#ebedf0;--ifm-color-success:#00a400;--ifm-color-info:#54c7ec;--ifm-color-warning:#ffba00;--ifm-color-danger:#fa383e;--ifm-color-primary-dark:#306cce;--ifm-color-primary-darker:#2d66c3;--ifm-color-primary-darkest:#2554a0;--ifm-color-primary-light:#538ce9;--ifm-color-primary-lighter:#72a1ed;--ifm-color-primary-lightest:#9abcf2;--ifm-color-primary-contrast-background:#ebf2fc;--ifm-color-primary-contrast-foreground:#102445;--ifm-color-secondary-dark:#d4d5d8;--ifm-color-secondary-darker:#c8c9cc;--ifm-color-secondary-darkest:#a4a6a8;--ifm-color-secondary-light:#eef0f2;--ifm-color-secondary-lighter:#f1f2f5;--ifm-color-secondary-lightest:#f5f6f8;--ifm-color-secondary-contrast-background:#fdfdfe;--ifm-color-secondary-contrast-foreground:#474748;--ifm-color-success-dark:#009400;--ifm-color-success-darker:#008b00;--ifm-color-success-darkest:#007300;--ifm-color-success-light:#26b226;--ifm-color-success-lighter:#4dbf4d;--ifm-color-success-lightest:#80d280;--ifm-color-success-contrast-background:#e6f6e6;--ifm-color-success-contrast-foreground:#003100;--ifm-color-info-dark:#4cb3d4;--ifm-color-info-darker:#47a9c9;--ifm-color-info-darkest:#3b8ba5;--ifm-color-info-light:#6ecfef;--ifm-color-info-lighter:#87d8f2;--ifm-color-info-lightest:#aae3f6;--ifm-color-info-contrast-background:#eef9fd;--ifm-color-info-contrast-foreground:#193c47;--ifm-color-warning-dark:#e6a700;--ifm-color-warning-darker:#d99e00;--ifm-color-warning-darkest:#b38200;--ifm-color-warning-light:#ffc426;--ifm-color-warning-lighter:#ffcf4d;--ifm-color-warning-lightest:#ffdd80;--ifm-color-warning-contrast-background:#fff8e6;--ifm-color-warning-contrast-foreground:#4d3800;--ifm-color-danger-dark:#e13238;--ifm-color-danger-darker:#d53035;--ifm-color-danger-darkest:#af272b;--ifm-color-danger-light:#fb565b;--ifm-color-danger-lighter:#fb7478;--ifm-color-danger-lightest:#fd9c9f;--ifm-color-danger-contrast-background:#ffebec;--ifm-color-danger-contrast-foreground:#4b1113;--ifm-color-white:#fff;--ifm-color-black:#000;--ifm-color-gray-0:var(--ifm-color-white);--ifm-color-gray-100:#f5f6f7;--ifm-color-gray-200:#ebedf0;--ifm-color-gray-300:#dadde1;--ifm-color-gray-400:#ccd0d5;--ifm-color-gray-500:#bec3c9;--ifm-color-gray-600:#8d949e;--ifm-color-gray-700:#606770;--ifm-color-gray-800:#444950;--ifm-color-gray-900:#1c1e21;--ifm-color-gray-1000:var(--ifm-color-black);--ifm-color-emphasis-0:var(--ifm-color-gray-0);--ifm-color-emphasis-100:var(--ifm-color-gray-100);--ifm-color-emphasis-200:var(--ifm-color-gray-200);--ifm-color-emphasis-300:var(--ifm-color-gray-300);--ifm-color-emphasis-400:var(--ifm-color-gray-400);--ifm-color-emphasis-600:var(--ifm-color-gray-600);--ifm-color-emphasis-700:var(--ifm-color-gray-700);--ifm-color-emphasis-800:var(--ifm-color-gray-800);--ifm-color-emphasis-900:var(--ifm-color-gray-900);--ifm-color-emphasis-1000:var(--ifm-color-gray-1000);--ifm-color-content:var(--ifm-color-emphasis-900);--ifm-color-content-inverse:var(--ifm-color-emphasis-0);--ifm-color-content-secondary:#525860;--ifm-background-color:#0000;--ifm-background-surface-color:var(--ifm-color-content-inverse);--ifm-global-border-width:1px;--ifm-global-radius:0.4rem;--ifm-hover-overlay:#0000000d;--ifm-font-color-base:var(--ifm-color-content);--ifm-font-color-base-inverse:var(--ifm-color-content-inverse);--ifm-font-color-secondary:var(--ifm-color-content-secondary);--ifm-font-family-base:system-ui,-apple-system,Segoe UI,Roboto,Ubuntu,Cantarell,Noto Sans,sans-serif,BlinkMacSystemFont,"Segoe UI",Helvetica,Arial,sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol";--ifm-font-family-monospace:SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",monospace;--ifm-font-size-base:100%;--ifm-font-weight-light:300;--ifm-font-weight-normal:400;--ifm-font-weight-semibold:500;--ifm-font-weight-bold:700;--ifm-font-weight-base:var(--ifm-font-weight-normal);--ifm-line-height-base:1.65;--ifm-global-spacing:1rem;--ifm-spacing-vertical:var(--ifm-global-spacing);--ifm-spacing-horizontal:var(--ifm-global-spacing);--ifm-transition-fast:200ms;--ifm-transition-slow:400ms;--ifm-transition-timing-default:cubic-bezier(0.08,0.52,0.52,1);--ifm-global-shadow-lw:0 1px 2px 0 #0000001a;--ifm-global-shadow-md:0 5px 40px #0003;--ifm-global-shadow-tl:0 12px 28px 0 #0003,0 2px 4px 0 #0000001a;--ifm-z-index-dropdown:100;--ifm-z-index-fixed:200;--ifm-z-index-overlay:400;--ifm-container-width:1140px;--ifm-container-width-xl:1320px;--ifm-code-background:#f6f7f8;--ifm-code-border-radius:var(--ifm-global-radius);--ifm-code-font-size:90%;--ifm-code-padding-horizontal:0.1rem;--ifm-code-padding-vertical:0.1rem;--ifm-pre-background:var(--ifm-code-background);--ifm-pre-border-radius:var(--ifm-code-border-radius);--ifm-pre-color:inherit;--ifm-pre-line-height:1.45;--ifm-pre-padding:1rem;--ifm-heading-color:inherit;--ifm-heading-margin-top:0;--ifm-heading-margin-bottom:var(--ifm-spacing-vertical);--ifm-heading-font-family:var(--ifm-font-family-base);--ifm-heading-font-weight:var(--ifm-font-weight-bold);--ifm-heading-line-height:1.25;--ifm-h1-font-size:2rem;--ifm-h2-font-size:1.5rem;--ifm-h3-font-size:1.25rem;--ifm-h4-font-size:1rem;--ifm-h5-font-size:0.875rem;--ifm-h6-font-size:0.85rem;--ifm-image-alignment-padding:1.25rem;--ifm-leading-desktop:1.25;--ifm-leading:calc(var(--ifm-leading-desktop)*1rem);--ifm-list-left-padding:2rem;--ifm-list-margin:1rem;--ifm-list-item-margin:0.25rem;--ifm-list-paragraph-margin:1rem;--ifm-table-cell-padding:0.75rem;--ifm-table-background:#0000;--ifm-table-stripe-background:#00000008;--ifm-table-border-color:var(--ifm-color-emphasis-300);--ifm-table-head-background:inherit;--ifm-table-head-color:inherit;--ifm-table-head-font-weight:var(--ifm-font-weight-bold);--ifm-table-cell-color:inherit;--ifm-link-color:var(--ifm-color-primary);--ifm-link-decoration:none;--ifm-link-hover-color:var(--ifm-link-color);--ifm-link-hover-decoration:underline;--ifm-paragraph-margin-bottom:var(--ifm-leading);--ifm-blockquote-font-size:var(--ifm-font-size-base);--ifm-blockquote-border-left-width:2px;--ifm-blockquote-padding-horizontal:var(--ifm-spacing-horizontal);--ifm-blockquote-padding-vertical:0;--ifm-blockquote-shadow:none;--ifm-blockquote-color:var(--ifm-color-emphasis-800);--ifm-blockquote-border-color:var(--ifm-color-emphasis-300);--ifm-hr-background-color:var(--ifm-color-emphasis-500);--ifm-hr-height:1px;--ifm-hr-margin-vertical:1.5rem;--ifm-scrollbar-size:7px;--ifm-scrollbar-track-background-color:#f1f1f1;--ifm-scrollbar-thumb-background-color:silver;--ifm-scrollbar-thumb-hover-background-color:#a7a7a7;--ifm-alert-background-color:inherit;--ifm-alert-border-color:inherit;--ifm-alert-border-radius:var(--ifm-global-radius);--ifm-alert-border-width:0px;--ifm-alert-border-left-width:5px;--ifm-alert-color:var(--ifm-font-color-base);--ifm-alert-padding-horizontal:var(--ifm-spacing-horizontal);--ifm-alert-padding-vertical:var(--ifm-spacing-vertical);--ifm-alert-shadow:var(--ifm-global-shadow-lw);--ifm-avatar-intro-margin:1rem;--ifm-avatar-intro-alignment:inherit;--ifm-avatar-photo-size:3rem;--ifm-badge-background-color:inherit;--ifm-badge-border-color:inherit;--ifm-badge-border-radius:var(--ifm-global-radius);--ifm-badge-border-width:var(--ifm-global-border-width);--ifm-badge-color:var(--ifm-color-white);--ifm-badge-padding-horizontal:calc(var(--ifm-spacing-horizontal)*0.5);--ifm-badge-padding-vertical:calc(var(--ifm-spacing-vertical)*0.25);--ifm-breadcrumb-border-radius:1.5rem;--ifm-breadcrumb-spacing:0.5rem;--ifm-breadcrumb-color-active:var(--ifm-color-primary);--ifm-breadcrumb-item-background-active:var(--ifm-hover-overlay);--ifm-breadcrumb-padding-horizontal:0.8rem;--ifm-breadcrumb-padding-vertical:0.4rem;--ifm-breadcrumb-size-multiplier:1;--ifm-breadcrumb-separator:url('data:image/svg+xml;utf8,');--ifm-breadcrumb-separator-filter:none;--ifm-breadcrumb-separator-size:0.5rem;--ifm-breadcrumb-separator-size-multiplier:1.25;--ifm-button-background-color:inherit;--ifm-button-border-color:var(--ifm-button-background-color);--ifm-button-border-width:var(--ifm-global-border-width);--ifm-button-font-weight:var(--ifm-font-weight-bold);--ifm-button-padding-horizontal:1.5rem;--ifm-button-padding-vertical:0.375rem;--ifm-button-size-multiplier:1;--ifm-button-transition-duration:var(--ifm-transition-fast);--ifm-button-border-radius:calc(var(--ifm-global-radius)*var(--ifm-button-size-multiplier));--ifm-button-group-spacing:2px;--ifm-card-background-color:var(--ifm-background-surface-color);--ifm-card-border-radius:calc(var(--ifm-global-radius)*2);--ifm-card-horizontal-spacing:var(--ifm-global-spacing);--ifm-card-vertical-spacing:var(--ifm-global-spacing);--ifm-toc-border-color:var(--ifm-color-emphasis-300);--ifm-toc-link-color:var(--ifm-color-content-secondary);--ifm-toc-padding-vertical:0.5rem;--ifm-toc-padding-horizontal:0.5rem;--ifm-dropdown-background-color:var(--ifm-background-surface-color);--ifm-dropdown-font-weight:var(--ifm-font-weight-semibold);--ifm-dropdown-link-color:var(--ifm-font-color-base);--ifm-dropdown-hover-background-color:var(--ifm-hover-overlay);--ifm-footer-background-color:var(--ifm-color-emphasis-100);--ifm-footer-color:inherit;--ifm-footer-link-color:var(--ifm-color-emphasis-700);--ifm-footer-link-hover-color:var(--ifm-color-primary);--ifm-footer-link-horizontal-spacing:0.5rem;--ifm-footer-padding-horizontal:calc(var(--ifm-spacing-horizontal)*2);--ifm-footer-padding-vertical:calc(var(--ifm-spacing-vertical)*2);--ifm-footer-title-color:inherit;--ifm-footer-logo-max-width:min(30rem,90vw);--ifm-hero-background-color:var(--ifm-background-surface-color);--ifm-hero-text-color:var(--ifm-color-emphasis-800);--ifm-menu-color:var(--ifm-color-emphasis-700);--ifm-menu-color-active:var(--ifm-color-primary);--ifm-menu-color-background-active:var(--ifm-hover-overlay);--ifm-menu-color-background-hover:var(--ifm-hover-overlay);--ifm-menu-link-padding-horizontal:0.75rem;--ifm-menu-link-padding-vertical:0.375rem;--ifm-menu-link-sublist-icon:url('data:image/svg+xml;utf8,');--ifm-menu-link-sublist-icon-filter:none;--ifm-navbar-background-color:var(--ifm-background-surface-color);--ifm-navbar-height:3.75rem;--ifm-navbar-item-padding-horizontal:0.75rem;--ifm-navbar-item-padding-vertical:0.25rem;--ifm-navbar-link-color:var(--ifm-font-color-base);--ifm-navbar-link-active-color:var(--ifm-link-color);--ifm-navbar-padding-horizontal:var(--ifm-spacing-horizontal);--ifm-navbar-padding-vertical:calc(var(--ifm-spacing-vertical)*0.5);--ifm-navbar-shadow:var(--ifm-global-shadow-lw);--ifm-navbar-search-input-background-color:var(--ifm-color-emphasis-200);--ifm-navbar-search-input-color:var(--ifm-color-emphasis-800);--ifm-navbar-search-input-placeholder-color:var(--ifm-color-emphasis-500);--ifm-navbar-search-input-icon:url('data:image/svg+xml;utf8,');--ifm-navbar-sidebar-width:83vw;--ifm-pagination-border-radius:var(--ifm-global-radius);--ifm-pagination-color-active:var(--ifm-color-primary);--ifm-pagination-font-size:1rem;--ifm-pagination-item-active-background:var(--ifm-hover-overlay);--ifm-pagination-page-spacing:0.2em;--ifm-pagination-padding-horizontal:calc(var(--ifm-spacing-horizontal)*1);--ifm-pagination-padding-vertical:calc(var(--ifm-spacing-vertical)*0.25);--ifm-pagination-nav-border-radius:var(--ifm-global-radius);--ifm-pagination-nav-color-hover:var(--ifm-color-primary);--ifm-pills-color-active:var(--ifm-color-primary);--ifm-pills-color-background-active:var(--ifm-hover-overlay);--ifm-pills-spacing:0.125rem;--ifm-tabs-color:var(--ifm-font-color-secondary);--ifm-tabs-color-active:var(--ifm-color-primary);--ifm-tabs-color-active-border:var(--ifm-tabs-color-active);--ifm-tabs-padding-horizontal:1rem;--ifm-tabs-padding-vertical:1rem;--docusaurus-progress-bar-color:var(--ifm-color-primary);--ifm-color-primary:#4c4ddc;--ifm-color-primary-dark:#403fc2;--ifm-color-primary-darker:#35359d;--ifm-color-primary-darkest:#30317c;--ifm-color-primary-light:#626ae9;--ifm-color-primary-lighter:#828ef1;--ifm-color-primary-lightest:#a7b5f6;--ifm-code-font-size:95%;--docusaurus-highlighted-code-line-bg:#0000001a;--ifm-h1-font-size:40px;--ifm-table-cell-padding:6px 12px;--ifm-table-head-background:#f7fafc;--ifm-table-border-color:#d8dbe6;--ifm-table-stripe-background:#fff;--docsearch-primary-color:#5468ff;--docsearch-text-color:#1c1e21;--docsearch-spacing:12px;--docsearch-icon-stroke-width:1.4;--docsearch-highlight-color:var(--docsearch-primary-color);--docsearch-muted-color:#969faf;--docsearch-container-background:#656c85cc;--docsearch-logo-color:#5468ff;--docsearch-modal-width:560px;--docsearch-modal-height:600px;--docsearch-modal-background:#f5f6f7;--docsearch-modal-shadow:inset 1px 1px 0 0 #ffffff80,0 3px 8px 0 #555a64;--docsearch-searchbox-height:56px;--docsearch-searchbox-background:#ebedf0;--docsearch-searchbox-focus-background:#fff;--docsearch-searchbox-shadow:inset 0 0 0 2px var(--docsearch-primary-color);--docsearch-hit-height:56px;--docsearch-hit-color:#444950;--docsearch-hit-active-color:#fff;--docsearch-hit-background:#fff;--docsearch-hit-shadow:0 1px 3px 0 #d4d9e1;--docsearch-key-gradient:linear-gradient(-225deg,#d5dbe4,#f8f8f8);--docsearch-key-shadow:inset 0 -2px 0 0 #cdcde6,inset 0 0 1px 1px #fff,0 1px 2px 1px #1e235a66;--docsearch-key-pressed-shadow:inset 0 -2px 0 0 #cdcde6,inset 0 0 1px 1px #fff,0 1px 1px 0 #1e235a66;--docsearch-footer-height:44px;--docsearch-footer-background:#fff;--docsearch-footer-shadow:0 -1px 0 0 #e0e3e8,0 -3px 6px 0 #45629b1f;--docsearch-primary-color:var(--ifm-color-primary);--docsearch-text-color:var(--ifm-font-color-base);--docusaurus-announcement-bar-height:auto;--docusaurus-tag-list-border:var(--ifm-color-emphasis-300);--docusaurus-collapse-button-bg:#0000;--docusaurus-collapse-button-bg-hover:#0000001a;--doc-sidebar-width:300px;--doc-sidebar-hidden-width:30px}.badge--danger,.badge--info,.badge--primary,.badge--secondary,.badge--success,.badge--warning{--ifm-badge-border-color:var(--ifm-badge-background-color)}.button--link,.button--outline{--ifm-button-background-color:#0000}html{-webkit-font-smoothing:antialiased;-webkit-text-size-adjust:100%;text-size-adjust:100%;background-color:var(--ifm-background-color);color:var(--ifm-font-color-base);color-scheme:var(--ifm-color-scheme);font:var(--ifm-font-size-base)/var(--ifm-line-height-base) var(--ifm-font-family-base);text-rendering:optimizelegibility}iframe{border:0;color-scheme:auto}.container{margin:0 auto;max-width:var(--ifm-container-width)}.container--fluid{max-width:inherit}.row{display:flex;flex-wrap:wrap;margin:0 calc(var(--ifm-spacing-horizontal)*-1)}.margin-bottom--none,.margin-vert--none,.markdown>:last-child{margin-bottom:0!important}.margin-top--none,.margin-vert--none,.tabItem_LNqP{margin-top:0!important}.row--no-gutters{margin-left:0;margin-right:0}.margin-horiz--none,.margin-right--none{margin-right:0!important}.row--no-gutters>.col{padding-left:0;padding-right:0}.row--align-top{align-items:flex-start}.row--align-bottom{align-items:flex-end}.menuExternalLink_NmtK,.row--align-center{align-items:center}.row--align-stretch{align-items:stretch}.row--align-baseline{align-items:baseline}.col{--ifm-col-width:100%;flex:1 0;margin-left:0;max-width:var(--ifm-col-width)}.padding-bottom--none,.padding-vert--none{padding-bottom:0!important}.padding-top--none,.padding-vert--none{padding-top:0!important}.padding-horiz--none,.padding-left--none{padding-left:0!important}.padding-horiz--none,.padding-right--none{padding-right:0!important}.col[class*=col--]{flex:0 0 var(--ifm-col-width)}.col--1{--ifm-col-width:8.33333%}.col--offset-1{margin-left:8.33333%}.col--2{--ifm-col-width:16.66667%}.col--offset-2{margin-left:16.66667%}.col--3{--ifm-col-width:25%}.col--offset-3{margin-left:25%}.col--4{--ifm-col-width:33.33333%}.col--offset-4{margin-left:33.33333%}.col--5{--ifm-col-width:41.66667%}.col--offset-5{margin-left:41.66667%}.col--6{--ifm-col-width:50%}.col--offset-6{margin-left:50%}.col--7{--ifm-col-width:58.33333%}.col--offset-7{margin-left:58.33333%}.col--8{--ifm-col-width:66.66667%}.col--offset-8{margin-left:66.66667%}.col--9{--ifm-col-width:75%}.col--offset-9{margin-left:75%}.col--10{--ifm-col-width:83.33333%}.col--offset-10{margin-left:83.33333%}.col--11{--ifm-col-width:91.66667%}.col--offset-11{margin-left:91.66667%}.col--12{--ifm-col-width:100%}.col--offset-12{margin-left:100%}.margin-horiz--none,.margin-left--none{margin-left:0!important}.margin--none{margin:0!important}.margin-bottom--xs,.margin-vert--xs{margin-bottom:.25rem!important}.margin-top--xs,.margin-vert--xs{margin-top:.25rem!important}.margin-horiz--xs,.margin-left--xs{margin-left:.25rem!important}.margin-horiz--xs,.margin-right--xs{margin-right:.25rem!important}.margin--xs{margin:.25rem!important}.margin-bottom--sm,.margin-vert--sm{margin-bottom:.5rem!important}.margin-top--sm,.margin-vert--sm{margin-top:.5rem!important}.margin-horiz--sm,.margin-left--sm{margin-left:.5rem!important}.margin-horiz--sm,.margin-right--sm{margin-right:.5rem!important}.margin--sm{margin:.5rem!important}.margin-bottom--md,.margin-vert--md{margin-bottom:1rem!important}.margin-top--md,.margin-vert--md{margin-top:1rem!important}.margin-horiz--md,.margin-left--md{margin-left:1rem!important}.margin-horiz--md,.margin-right--md{margin-right:1rem!important}.margin--md{margin:1rem!important}.margin-bottom--lg,.margin-vert--lg{margin-bottom:2rem!important}.margin-top--lg,.margin-vert--lg{margin-top:2rem!important}.margin-horiz--lg,.margin-left--lg{margin-left:2rem!important}.margin-horiz--lg,.margin-right--lg{margin-right:2rem!important}.margin--lg{margin:2rem!important}.margin-bottom--xl,.margin-vert--xl{margin-bottom:5rem!important}.margin-top--xl,.margin-vert--xl{margin-top:5rem!important}.margin-horiz--xl,.margin-left--xl{margin-left:5rem!important}.margin-horiz--xl,.margin-right--xl{margin-right:5rem!important}.margin--xl{margin:5rem!important}.padding--none{padding:0!important}.padding-bottom--xs,.padding-vert--xs{padding-bottom:.25rem!important}.padding-top--xs,.padding-vert--xs{padding-top:.25rem!important}.padding-horiz--xs,.padding-left--xs{padding-left:.25rem!important}.padding-horiz--xs,.padding-right--xs{padding-right:.25rem!important}.padding--xs{padding:.25rem!important}.padding-bottom--sm,.padding-vert--sm{padding-bottom:.5rem!important}.padding-top--sm,.padding-vert--sm{padding-top:.5rem!important}.padding-horiz--sm,.padding-left--sm{padding-left:.5rem!important}.padding-horiz--sm,.padding-right--sm{padding-right:.5rem!important}.padding--sm{padding:.5rem!important}.padding-bottom--md,.padding-vert--md{padding-bottom:1rem!important}.padding-top--md,.padding-vert--md{padding-top:1rem!important}.padding-horiz--md,.padding-left--md{padding-left:1rem!important}.padding-horiz--md,.padding-right--md{padding-right:1rem!important}.padding--md{padding:1rem!important}.padding-bottom--lg,.padding-vert--lg{padding-bottom:2rem!important}.padding-top--lg,.padding-vert--lg{padding-top:2rem!important}.padding-horiz--lg,.padding-left--lg{padding-left:2rem!important}.padding-horiz--lg,.padding-right--lg{padding-right:2rem!important}.padding--lg{padding:2rem!important}.padding-bottom--xl,.padding-vert--xl{padding-bottom:5rem!important}.padding-top--xl,.padding-vert--xl{padding-top:5rem!important}.padding-horiz--xl,.padding-left--xl{padding-left:5rem!important}.padding-horiz--xl,.padding-right--xl{padding-right:5rem!important}.padding--xl{padding:5rem!important}code{background-color:var(--ifm-code-background);border:.1rem solid #0000001a;border-radius:var(--ifm-code-border-radius);font-family:var(--ifm-font-family-monospace);font-size:var(--ifm-code-font-size);padding:var(--ifm-code-padding-vertical) var(--ifm-code-padding-horizontal)}a code{color:inherit}pre{background-color:var(--ifm-pre-background);border-radius:var(--ifm-pre-border-radius);color:var(--ifm-pre-color);font:var(--ifm-code-font-size)/var(--ifm-pre-line-height) var(--ifm-font-family-monospace);padding:var(--ifm-pre-padding)}pre code{background-color:initial;border:none;font-size:100%;line-height:inherit;padding:0}kbd{background-color:var(--ifm-color-emphasis-0);border:1px solid var(--ifm-color-emphasis-400);border-radius:.2rem;box-shadow:inset 0 -1px 0 var(--ifm-color-emphasis-400);color:var(--ifm-color-emphasis-800);font:80% var(--ifm-font-family-monospace);padding:.15rem .3rem}h1,h2,h3,h4,h5,h6{color:var(--ifm-heading-color);font-family:var(--ifm-heading-font-family);font-weight:var(--ifm-heading-font-weight);line-height:var(--ifm-heading-line-height);margin:var(--ifm-heading-margin-top) 0 var(--ifm-heading-margin-bottom) 0}h1{font-size:var(--ifm-h1-font-size)}h2{font-size:var(--ifm-h2-font-size)}h3{font-size:var(--ifm-h3-font-size)}h4{font-size:var(--ifm-h4-font-size)}h5{font-size:var(--ifm-h5-font-size)}h6{font-size:var(--ifm-h6-font-size)}img{max-width:100%;display:block;margin-left:auto;margin-right:auto}img[align=right]{padding-left:var(--image-alignment-padding)}img[align=left]{padding-right:var(--image-alignment-padding)}.markdown{--ifm-h1-vertical-rhythm-top:3;--ifm-h2-vertical-rhythm-top:2;--ifm-h3-vertical-rhythm-top:1.5;--ifm-heading-vertical-rhythm-top:1.25;--ifm-h1-vertical-rhythm-bottom:1.25;--ifm-heading-vertical-rhythm-bottom:1}.markdown:after,.markdown:before{content:"";display:table}.markdown:after{clear:both}.markdown h1:first-child{--ifm-h1-font-size:3rem;margin-bottom:calc(var(--ifm-h1-vertical-rhythm-bottom)*var(--ifm-leading));--ifm-h1-font-size:40px}.markdown>h2{--ifm-h2-font-size:2rem;margin-top:calc(var(--ifm-h2-vertical-rhythm-top)*var(--ifm-leading))}.markdown>h3{--ifm-h3-font-size:1.5rem;margin-top:calc(var(--ifm-h3-vertical-rhythm-top)*var(--ifm-leading))}.markdown>h4,.markdown>h5,.markdown>h6{margin-top:calc(var(--ifm-heading-vertical-rhythm-top)*var(--ifm-leading))}.markdown>p,.markdown>pre,.markdown>ul,.tabList__CuJ{margin-bottom:var(--ifm-leading)}.markdown li>p{margin-top:var(--ifm-list-paragraph-margin)}.markdown li+li{margin-top:var(--ifm-list-item-margin)}ol,ul{margin:0 0 var(--ifm-list-margin);padding-left:var(--ifm-list-left-padding)}ol ol,ul ol{list-style-type:lower-roman}ol ol ol,ol ul ol,ul ol ol,ul ul ol{list-style-type:lower-alpha}table{border-collapse:collapse;margin-bottom:var(--ifm-spacing-vertical);border-collapse:initial;border-spacing:0;display:table}table thead tr{border-bottom:2px solid var(--ifm-table-border-color)}table thead,table tr:nth-child(2n){background-color:var(--ifm-table-stripe-background)}table tr{background-color:var(--ifm-table-background);border-top:var(--ifm-table-border-width) solid var(--ifm-table-border-color)}table td,table th{border:var(--ifm-table-border-width) solid var(--ifm-table-border-color);padding:var(--ifm-table-cell-padding)}table th{background-color:var(--ifm-table-head-background);color:var(--ifm-table-head-color);font-weight:var(--ifm-table-head-font-weight)}table td{color:var(--ifm-table-cell-color)}strong{font-weight:var(--ifm-font-weight-bold)}a{color:var(--ifm-link-color);text-decoration:var(--ifm-link-decoration)}a:hover{color:var(--ifm-link-hover-color);text-decoration:var(--ifm-link-hover-decoration)}.button:hover,.text--no-decoration,.text--no-decoration:hover,a:not([href]){text-decoration:none}p{margin:0 0 var(--ifm-paragraph-margin-bottom)}blockquote{border-left:var(--ifm-blockquote-border-left-width) solid var(--ifm-blockquote-border-color);box-shadow:var(--ifm-blockquote-shadow);color:var(--ifm-blockquote-color);font-size:var(--ifm-blockquote-font-size);padding:var(--ifm-blockquote-padding-vertical) var(--ifm-blockquote-padding-horizontal)}blockquote>:first-child{margin-top:0}blockquote>:last-child{margin-bottom:0}hr{background-color:var(--ifm-hr-background-color);border:0;height:var(--ifm-hr-height);margin:var(--ifm-hr-margin-vertical) 0}.shadow--lw{box-shadow:var(--ifm-global-shadow-lw)!important}.shadow--md{box-shadow:var(--ifm-global-shadow-md)!important}.shadow--tl{box-shadow:var(--ifm-global-shadow-tl)!important}.text--primary,.wordWrapButtonEnabled_EoeP .wordWrapButtonIcon_Bwma{color:var(--ifm-color-primary)}.text--secondary{color:var(--ifm-color-secondary)}.text--success{color:var(--ifm-color-success)}.text--info{color:var(--ifm-color-info)}.text--warning{color:var(--ifm-color-warning)}.text--danger{color:var(--ifm-color-danger)}.text--center{text-align:center}.text--left{text-align:left}.text--justify{text-align:justify}.text--right{text-align:right}.text--capitalize{text-transform:capitalize}.text--lowercase{text-transform:lowercase}.admonitionHeading_tbUL,.alert__heading,.text--uppercase{text-transform:uppercase}.text--light{font-weight:var(--ifm-font-weight-light)}.text--normal{font-weight:var(--ifm-font-weight-normal)}.text--semibold{font-weight:var(--ifm-font-weight-semibold)}.text--bold{font-weight:var(--ifm-font-weight-bold)}.text--italic{font-style:italic}.text--truncate{overflow:hidden;text-overflow:ellipsis;white-space:nowrap}.text--break{word-wrap:break-word!important;word-break:break-word!important}.clean-btn{background:none;border:none;color:inherit;cursor:pointer;font-family:inherit;padding:0}.alert,.alert .close{color:var(--ifm-alert-foreground-color)}.clean-list{list-style:none;padding-left:0}.alert--primary{--ifm-alert-background-color:var(--ifm-color-primary-contrast-background);--ifm-alert-background-color-highlight:#3578e526;--ifm-alert-foreground-color:var(--ifm-color-primary-contrast-foreground);--ifm-alert-border-color:var(--ifm-color-primary-dark)}.alert--secondary{--ifm-alert-background-color:var(--ifm-color-secondary-contrast-background);--ifm-alert-background-color-highlight:#ebedf026;--ifm-alert-foreground-color:var(--ifm-color-secondary-contrast-foreground);--ifm-alert-border-color:var(--ifm-color-secondary-dark)}.alert--success{--ifm-alert-background-color:var(--ifm-color-success-contrast-background);--ifm-alert-background-color-highlight:#00a40026;--ifm-alert-foreground-color:var(--ifm-color-success-contrast-foreground);--ifm-alert-border-color:var(--ifm-color-success-dark)}.alert--info{--ifm-alert-background-color:var(--ifm-color-info-contrast-background);--ifm-alert-background-color-highlight:#54c7ec26;--ifm-alert-foreground-color:var(--ifm-color-info-contrast-foreground);--ifm-alert-border-color:var(--ifm-color-info-dark)}.alert--warning{--ifm-alert-background-color:var(--ifm-color-warning-contrast-background);--ifm-alert-background-color-highlight:#ffba0026;--ifm-alert-foreground-color:var(--ifm-color-warning-contrast-foreground);--ifm-alert-border-color:var(--ifm-color-warning-dark)}.alert--danger{--ifm-alert-background-color:var(--ifm-color-danger-contrast-background);--ifm-alert-background-color-highlight:#fa383e26;--ifm-alert-foreground-color:var(--ifm-color-danger-contrast-foreground);--ifm-alert-border-color:var(--ifm-color-danger-dark)}.alert{--ifm-code-background:var(--ifm-alert-background-color-highlight);--ifm-link-color:var(--ifm-alert-foreground-color);--ifm-link-hover-color:var(--ifm-alert-foreground-color);--ifm-link-decoration:underline;--ifm-tabs-color:var(--ifm-alert-foreground-color);--ifm-tabs-color-active:var(--ifm-alert-foreground-color);--ifm-tabs-color-active-border:var(--ifm-alert-border-color);background-color:var(--ifm-alert-background-color);border:var(--ifm-alert-border-width) solid var(--ifm-alert-border-color);border-left-width:var(--ifm-alert-border-left-width);border-radius:var(--ifm-alert-border-radius);box-shadow:var(--ifm-alert-shadow);padding:var(--ifm-alert-padding-vertical) var(--ifm-alert-padding-horizontal)}.alert__heading{align-items:center;display:flex;font:700 var(--ifm-h5-font-size)/var(--ifm-heading-line-height) var(--ifm-heading-font-family);margin-bottom:.5rem}.alert__icon{display:inline-flex;margin-right:.4em}.alert__icon svg{fill:var(--ifm-alert-foreground-color);stroke:var(--ifm-alert-foreground-color);stroke-width:0}.alert .close{margin:calc(var(--ifm-alert-padding-vertical)*-1) calc(var(--ifm-alert-padding-horizontal)*-1) 0 0;opacity:.75}.alert .close:focus,.alert .close:hover{opacity:1}.alert a{text-decoration-color:var(--ifm-alert-border-color)}.alert a:hover{text-decoration-thickness:2px}.avatar{column-gap:var(--ifm-avatar-intro-margin);display:flex}.avatar__photo{border-radius:50%;display:block;height:var(--ifm-avatar-photo-size);overflow:hidden;width:var(--ifm-avatar-photo-size)}.card--full-height,.navbar__logo img,body,html{height:100%}.avatar__photo--sm{--ifm-avatar-photo-size:2rem}.avatar__photo--lg{--ifm-avatar-photo-size:4rem}.avatar__photo--xl{--ifm-avatar-photo-size:6rem}.avatar__intro{display:flex;flex:1 1;flex-direction:column;justify-content:center;text-align:var(--ifm-avatar-intro-alignment)}.badge,.breadcrumbs__item,.breadcrumbs__link,.button,.dropdown>.navbar__link:after{display:inline-block}.avatar__name{font:700 var(--ifm-h4-font-size)/var(--ifm-heading-line-height) var(--ifm-font-family-base)}.avatar__subtitle{margin-top:.25rem}.avatar--vertical{--ifm-avatar-intro-alignment:center;--ifm-avatar-intro-margin:0.5rem;align-items:center;flex-direction:column}.badge{background-color:var(--ifm-badge-background-color);border:var(--ifm-badge-border-width) solid var(--ifm-badge-border-color);border-radius:var(--ifm-badge-border-radius);color:var(--ifm-badge-color);font-size:75%;font-weight:var(--ifm-font-weight-bold);line-height:1;padding:var(--ifm-badge-padding-vertical) var(--ifm-badge-padding-horizontal)}.badge--primary{--ifm-badge-background-color:var(--ifm-color-primary)}.badge--secondary{--ifm-badge-background-color:var(--ifm-color-secondary);color:var(--ifm-color-black)}.breadcrumbs__link,.button.button--secondary.button--outline:not(.button--active):not(:hover){color:var(--ifm-font-color-base)}.badge--success{--ifm-badge-background-color:var(--ifm-color-success)}.badge--info{--ifm-badge-background-color:var(--ifm-color-info)}.badge--warning{--ifm-badge-background-color:var(--ifm-color-warning)}.badge--danger{--ifm-badge-background-color:var(--ifm-color-danger)}.breadcrumbs{margin-bottom:0;padding-left:0}.breadcrumbs__item:not(:last-child):after{background:var(--ifm-breadcrumb-separator) center;content:" ";display:inline-block;filter:var(--ifm-breadcrumb-separator-filter);height:calc(var(--ifm-breadcrumb-separator-size)*var(--ifm-breadcrumb-size-multiplier)*var(--ifm-breadcrumb-separator-size-multiplier));margin:0 var(--ifm-breadcrumb-spacing);opacity:.5;width:calc(var(--ifm-breadcrumb-separator-size)*var(--ifm-breadcrumb-size-multiplier)*var(--ifm-breadcrumb-separator-size-multiplier))}.breadcrumbs__item--active .breadcrumbs__link{background:var(--ifm-breadcrumb-item-background-active);color:var(--ifm-breadcrumb-color-active)}.breadcrumbs__link{border-radius:var(--ifm-breadcrumb-border-radius);font-size:calc(1rem*var(--ifm-breadcrumb-size-multiplier));padding:calc(var(--ifm-breadcrumb-padding-vertical)*var(--ifm-breadcrumb-size-multiplier)) calc(var(--ifm-breadcrumb-padding-horizontal)*var(--ifm-breadcrumb-size-multiplier));transition-duration:var(--ifm-transition-fast);transition-property:background,color}.breadcrumbs__link:any-link:hover,.breadcrumbs__link:link:hover,.breadcrumbs__link:visited:hover,area[href].breadcrumbs__link:hover{background:var(--ifm-breadcrumb-item-background-active);text-decoration:none}.breadcrumbs--sm{--ifm-breadcrumb-size-multiplier:0.8}.breadcrumbs--lg{--ifm-breadcrumb-size-multiplier:1.2}.button{background-color:var(--ifm-button-background-color);border:var(--ifm-button-border-width) solid var(--ifm-button-border-color);border-radius:var(--ifm-button-border-radius);cursor:pointer;font-size:calc(.875rem*var(--ifm-button-size-multiplier));font-weight:var(--ifm-button-font-weight);line-height:1.5;padding:calc(var(--ifm-button-padding-vertical)*var(--ifm-button-size-multiplier)) calc(var(--ifm-button-padding-horizontal)*var(--ifm-button-size-multiplier));text-align:center;transition-duration:var(--ifm-button-transition-duration);transition-property:color,background,border-color;-webkit-user-select:none;user-select:none;white-space:nowrap}.button,.button:hover{color:var(--ifm-button-color)}.button--outline{--ifm-button-color:var(--ifm-button-border-color)}.button--outline:hover{--ifm-button-background-color:var(--ifm-button-border-color)}.button--link{--ifm-button-border-color:#0000;color:var(--ifm-link-color);text-decoration:var(--ifm-link-decoration)}.button--link.button--active,.button--link:active,.button--link:hover{color:var(--ifm-link-hover-color);text-decoration:var(--ifm-link-hover-decoration)}.button.disabled,.button:disabled,.button[disabled]{opacity:.65;pointer-events:none}.button--sm{--ifm-button-size-multiplier:0.8}.button--lg{--ifm-button-size-multiplier:1.35}.button--block{display:block;width:100%}.button.button--secondary{color:var(--ifm-color-gray-900)}:where(.button--primary){--ifm-button-background-color:var(--ifm-color-primary);--ifm-button-border-color:var(--ifm-color-primary)}:where(.button--primary):not(.button--outline):hover{--ifm-button-background-color:var(--ifm-color-primary-dark);--ifm-button-border-color:var(--ifm-color-primary-dark)}.button--primary.button--active,.button--primary:active{--ifm-button-background-color:var(--ifm-color-primary-darker);--ifm-button-border-color:var(--ifm-color-primary-darker)}:where(.button--secondary){--ifm-button-background-color:var(--ifm-color-secondary);--ifm-button-border-color:var(--ifm-color-secondary)}:where(.button--secondary):not(.button--outline):hover{--ifm-button-background-color:var(--ifm-color-secondary-dark);--ifm-button-border-color:var(--ifm-color-secondary-dark)}.button--secondary.button--active,.button--secondary:active{--ifm-button-background-color:var(--ifm-color-secondary-darker);--ifm-button-border-color:var(--ifm-color-secondary-darker)}:where(.button--success){--ifm-button-background-color:var(--ifm-color-success);--ifm-button-border-color:var(--ifm-color-success)}:where(.button--success):not(.button--outline):hover{--ifm-button-background-color:var(--ifm-color-success-dark);--ifm-button-border-color:var(--ifm-color-success-dark)}.button--success.button--active,.button--success:active{--ifm-button-background-color:var(--ifm-color-success-darker);--ifm-button-border-color:var(--ifm-color-success-darker)}:where(.button--info){--ifm-button-background-color:var(--ifm-color-info);--ifm-button-border-color:var(--ifm-color-info)}:where(.button--info):not(.button--outline):hover{--ifm-button-background-color:var(--ifm-color-info-dark);--ifm-button-border-color:var(--ifm-color-info-dark)}.button--info.button--active,.button--info:active{--ifm-button-background-color:var(--ifm-color-info-darker);--ifm-button-border-color:var(--ifm-color-info-darker)}:where(.button--warning){--ifm-button-background-color:var(--ifm-color-warning);--ifm-button-border-color:var(--ifm-color-warning)}:where(.button--warning):not(.button--outline):hover{--ifm-button-background-color:var(--ifm-color-warning-dark);--ifm-button-border-color:var(--ifm-color-warning-dark)}.button--warning.button--active,.button--warning:active{--ifm-button-background-color:var(--ifm-color-warning-darker);--ifm-button-border-color:var(--ifm-color-warning-darker)}:where(.button--danger){--ifm-button-background-color:var(--ifm-color-danger);--ifm-button-border-color:var(--ifm-color-danger)}:where(.button--danger):not(.button--outline):hover{--ifm-button-background-color:var(--ifm-color-danger-dark);--ifm-button-border-color:var(--ifm-color-danger-dark)}.button--danger.button--active,.button--danger:active{--ifm-button-background-color:var(--ifm-color-danger-darker);--ifm-button-border-color:var(--ifm-color-danger-darker)}.button-group{display:inline-flex;gap:var(--ifm-button-group-spacing)}.button-group>.button:not(:first-child){border-bottom-left-radius:0;border-top-left-radius:0}.button-group>.button:not(:last-child){border-bottom-right-radius:0;border-top-right-radius:0}.button-group--block{display:flex;justify-content:stretch}.button-group--block>.button{flex-grow:1}.card{background-color:var(--ifm-card-background-color);border-radius:var(--ifm-card-border-radius);box-shadow:var(--ifm-global-shadow-lw);display:flex;flex-direction:column;overflow:hidden}.card__image{padding-top:var(--ifm-card-vertical-spacing)}.card__image:first-child{padding-top:0}.card__body,.card__footer,.card__header{padding:var(--ifm-card-vertical-spacing) var(--ifm-card-horizontal-spacing)}.card__body:not(:last-child),.card__footer:not(:last-child),.card__header:not(:last-child){padding-bottom:0}.card__body>:last-child,.card__footer>:last-child,.card__header>:last-child{margin-bottom:0}.card__footer{margin-top:auto}.table-of-contents{font-size:.8rem;margin-bottom:0;padding:var(--ifm-toc-padding-vertical) 0}.table-of-contents,.table-of-contents ul{list-style:none;padding-left:var(--ifm-toc-padding-horizontal)}.table-of-contents li{margin:var(--ifm-toc-padding-vertical) var(--ifm-toc-padding-horizontal)}.table-of-contents__left-border{border-left:1px solid var(--ifm-toc-border-color)}.table-of-contents__link{color:var(--ifm-toc-link-color);display:block}.table-of-contents__link--active,.table-of-contents__link--active code,.table-of-contents__link:hover,.table-of-contents__link:hover code{color:var(--ifm-color-primary);text-decoration:none}.close{color:var(--ifm-color-black);float:right;font-size:1.5rem;font-weight:var(--ifm-font-weight-bold);line-height:1;opacity:.5;padding:1rem;transition:opacity var(--ifm-transition-fast) var(--ifm-transition-timing-default)}.close:hover{opacity:.7}.close:focus,.theme-code-block-highlighted-line .codeLineNumber_Tfdd:before{opacity:.8}.dropdown{display:inline-flex;font-weight:var(--ifm-dropdown-font-weight);position:relative;vertical-align:top}.dropdown--hoverable:hover .dropdown__menu,.dropdown--show .dropdown__menu{opacity:1;pointer-events:all;transform:translateY(-1px);visibility:visible}#nprogress,.dropdown__menu,.navbar__item.dropdown .navbar__link:not([href]){pointer-events:none}.dropdown--right .dropdown__menu{left:inherit;right:0}.dropdown--nocaret .navbar__link:after{content:none!important}.dropdown__menu{background-color:var(--ifm-dropdown-background-color);border-radius:var(--ifm-global-radius);box-shadow:var(--ifm-global-shadow-md);left:0;list-style:none;max-height:80vh;min-width:10rem;opacity:0;overflow-y:auto;padding:.5rem;position:absolute;top:calc(100% - var(--ifm-navbar-item-padding-vertical) + .3rem);transform:translateY(-.625rem);transition-duration:var(--ifm-transition-fast);transition-property:opacity,transform,visibility;transition-timing-function:var(--ifm-transition-timing-default);visibility:hidden;z-index:var(--ifm-z-index-dropdown)}.menu__caret,.menu__link,.menu__list-item-collapsible{border-radius:.25rem;transition:background var(--ifm-transition-fast) var(--ifm-transition-timing-default)}.dropdown__link{border-radius:.25rem;color:var(--ifm-dropdown-link-color);display:block;font-size:.875rem;margin-top:.2rem;padding:.25rem .5rem;white-space:nowrap}.dropdown__link--active,.dropdown__link:hover{background-color:var(--ifm-dropdown-hover-background-color);color:var(--ifm-dropdown-link-color);text-decoration:none}.dropdown__link--active,.dropdown__link--active:hover{--ifm-dropdown-link-color:var(--ifm-link-color)}.dropdown>.navbar__link:after{border-color:currentcolor #0000;border-style:solid;border-width:.4em .4em 0;content:"";margin-left:.3em;position:relative;top:2px;transform:translateY(-50%)}.footer{background-color:var(--ifm-footer-background-color);color:var(--ifm-footer-color);padding:var(--ifm-footer-padding-vertical) var(--ifm-footer-padding-horizontal)}.footer--dark{--ifm-footer-background-color:#303846;--ifm-footer-color:var(--ifm-footer-link-color);--ifm-footer-link-color:var(--ifm-color-secondary);--ifm-footer-title-color:var(--ifm-color-white)}.footer__links{margin-bottom:1rem}.footer__link-item{color:var(--ifm-footer-link-color);line-height:2}.footer__link-item:hover{color:var(--ifm-footer-link-hover-color)}.footer__link-separator{margin:0 var(--ifm-footer-link-horizontal-spacing)}.footer__logo{margin-top:1rem;max-width:var(--ifm-footer-logo-max-width)}.footer__title{color:var(--ifm-footer-title-color);font:700 var(--ifm-h4-font-size)/var(--ifm-heading-line-height) var(--ifm-font-family-base);margin-bottom:var(--ifm-heading-margin-bottom)}.menu,.navbar__link{font-weight:var(--ifm-font-weight-semibold)}.docItemContainer_Djhp article>:first-child,.docItemContainer_Djhp header+*,.footer__item{margin-top:0}.admonitionContent_S0QG>:last-child,.cardContainer_fWXF :last-child,.collapsibleContent_i85q>:last-child,.footer__items,.tabItem_Ymn6>:last-child{margin-bottom:0}.codeBlockStandalone_MEMb,[type=checkbox]{padding:0}.hero{align-items:center;background-color:var(--ifm-hero-background-color);color:var(--ifm-hero-text-color);display:flex;padding:4rem 2rem}.hero--primary{--ifm-hero-background-color:var(--ifm-color-primary);--ifm-hero-text-color:var(--ifm-font-color-base-inverse)}.hero--dark{--ifm-hero-background-color:#303846;--ifm-hero-text-color:var(--ifm-color-white)}.hero__title{font-size:3rem}.hero__subtitle{font-size:1.5rem}.menu__list{list-style:none;margin:0;padding-left:0}.menu__caret,.menu__link{padding:var(--ifm-menu-link-padding-vertical) var(--ifm-menu-link-padding-horizontal)}.menu__list .menu__list{flex:0 0 100%;margin-top:.25rem;padding-left:var(--ifm-menu-link-padding-horizontal)}.menu__list-item:not(:first-child){margin-top:.25rem}.menu__list-item--collapsed .menu__list{height:0;overflow:hidden}.details_lb9f[data-collapsed=false].isBrowser_bmU9>summary:before,.details_lb9f[open]:not(.isBrowser_bmU9)>summary:before,.menu__list-item--collapsed .menu__caret:before,.menu__list-item--collapsed .menu__link--sublist:after{transform:rotate(90deg)}.menu__list-item-collapsible{display:flex;flex-wrap:wrap;position:relative}.menu__caret:hover,.menu__link:hover,.menu__list-item-collapsible--active,.menu__list-item-collapsible:hover{background:var(--ifm-menu-color-background-hover)}.menu__list-item-collapsible .menu__link--active,.menu__list-item-collapsible .menu__link:hover{background:none!important}.menu__caret,.menu__link{align-items:center;display:flex}.navbar-sidebar,.navbar-sidebar__backdrop{bottom:0;opacity:0;transition-duration:var(--ifm-transition-fast);transition-timing-function:ease-in-out;top:0;left:0;visibility:hidden}.menu__link{color:var(--ifm-menu-color);flex:1;line-height:1.25}.menu__link:hover{color:var(--ifm-menu-color);text-decoration:none}.menu__caret:before,.menu__link--sublist-caret:after{height:1.25rem;transform:rotate(180deg);transition:transform var(--ifm-transition-fast) linear;width:1.25rem;filter:var(--ifm-menu-link-sublist-icon-filter);content:""}.menu__link--sublist-caret:after{background:var(--ifm-menu-link-sublist-icon) 50%/2rem 2rem;margin-left:auto;min-width:1.25rem}.menu__link--active,.menu__link--active:hover{color:var(--ifm-menu-color-active)}.navbar__brand,.navbar__link{color:var(--ifm-navbar-link-color)}.menu__link--active:not(.menu__link--sublist){background-color:var(--ifm-menu-color-background-active)}.menu__caret:before{background:var(--ifm-menu-link-sublist-icon) 50%/2rem 2rem}.navbar--dark,html[data-theme=dark]{--ifm-menu-link-sublist-icon-filter:invert(100%) sepia(94%) saturate(17%) hue-rotate(223deg) brightness(104%) contrast(98%)}.navbar{background-color:var(--ifm-navbar-background-color);box-shadow:var(--ifm-navbar-shadow);height:var(--ifm-navbar-height);padding:var(--ifm-navbar-padding-vertical) var(--ifm-navbar-padding-horizontal)}.navbar,.navbar>.container,.navbar>.container-fluid{display:flex}.navbar--fixed-top{position:sticky;top:0;z-index:var(--ifm-z-index-fixed)}.navbar__inner{display:flex;flex-wrap:wrap;justify-content:space-between;width:100%}.navbar__brand{align-items:center;display:flex;margin-right:1rem;min-width:0}.navbar__brand:hover{color:var(--ifm-navbar-link-hover-color);text-decoration:none}.announcementBarContent_xLdY,.navbar__title{flex:1 1 auto}.navbar__toggle{display:none;margin-right:.5rem}.navbar__logo{flex:0 0 auto;height:2rem;margin-right:.5rem}.navbar__items{align-items:center;display:flex;flex:1;min-width:0}.navbar__items--center{flex:0 0 auto}.navbar__items--center .navbar__brand{margin:0}.navbar__items--center+.navbar__items--right{flex:1}.navbar__items--right{flex:0 0 auto;justify-content:flex-end}.navbar__items--right>:last-child{padding-right:0}.navbar__item{display:inline-block;padding:var(--ifm-navbar-item-padding-vertical) var(--ifm-navbar-item-padding-horizontal)}.navbar__link--active,.navbar__link:hover{color:var(--ifm-navbar-link-hover-color);text-decoration:none}.navbar--dark,.navbar--primary{--ifm-menu-color:var(--ifm-color-gray-300);--ifm-navbar-link-color:var(--ifm-color-gray-100);--ifm-navbar-search-input-background-color:#ffffff1a;--ifm-navbar-search-input-placeholder-color:#ffffff80;color:var(--ifm-color-white)}.navbar--dark{--ifm-navbar-background-color:#242526;--ifm-menu-color-background-active:#ffffff0d;--ifm-navbar-search-input-color:var(--ifm-color-white)}.navbar--primary{--ifm-navbar-background-color:var(--ifm-color-primary);--ifm-navbar-link-hover-color:var(--ifm-color-white);--ifm-menu-color-active:var(--ifm-color-white);--ifm-navbar-search-input-color:var(--ifm-color-emphasis-500)}.navbar__search-input{appearance:none;background:var(--ifm-navbar-search-input-background-color) var(--ifm-navbar-search-input-icon) no-repeat .75rem center/1rem 1rem;border:none;border-radius:2rem;color:var(--ifm-navbar-search-input-color);cursor:text;display:inline-block;font-size:.9rem;height:2rem;padding:0 .5rem 0 2.25rem;width:12.5rem}.navbar__search-input::placeholder{color:var(--ifm-navbar-search-input-placeholder-color)}.navbar-sidebar{background-color:var(--ifm-navbar-background-color);box-shadow:var(--ifm-global-shadow-md);position:fixed;transform:translate3d(-100%,0,0);transition-property:opacity,visibility,transform;width:var(--ifm-navbar-sidebar-width)}.navbar-sidebar--show .navbar-sidebar,.navbar-sidebar__items{transform:translateZ(0)}.navbar-sidebar--show .navbar-sidebar,.navbar-sidebar--show .navbar-sidebar__backdrop{opacity:1;visibility:visible}.navbar-sidebar__backdrop{background-color:#0009;position:fixed;right:0;transition-property:opacity,visibility}.navbar-sidebar__brand{align-items:center;box-shadow:var(--ifm-navbar-shadow);display:flex;flex:1;height:var(--ifm-navbar-height);padding:var(--ifm-navbar-padding-vertical) var(--ifm-navbar-padding-horizontal)}.navbar-sidebar__items{display:flex;height:calc(100% - var(--ifm-navbar-height));transition:transform var(--ifm-transition-fast) ease-in-out}.navbar-sidebar__items--show-secondary{transform:translate3d(calc((var(--ifm-navbar-sidebar-width))*-1),0,0)}.navbar-sidebar__item{flex-shrink:0;padding:.5rem;width:calc(var(--ifm-navbar-sidebar-width))}.navbar-sidebar__back{background:var(--ifm-menu-color-background-active);font-size:15px;font-weight:var(--ifm-button-font-weight);margin:0 0 .2rem -.5rem;padding:.6rem 1.5rem;position:relative;text-align:left;top:-.5rem;width:calc(100% + 1rem)}.navbar-sidebar__close{display:flex;margin-left:auto}.pagination{column-gap:var(--ifm-pagination-page-spacing);display:flex;font-size:var(--ifm-pagination-font-size);padding-left:0}.pagination--sm{--ifm-pagination-font-size:0.8rem;--ifm-pagination-padding-horizontal:0.8rem;--ifm-pagination-padding-vertical:0.2rem}.pagination--lg{--ifm-pagination-font-size:1.2rem;--ifm-pagination-padding-horizontal:1.2rem;--ifm-pagination-padding-vertical:0.3rem}.pagination__item{display:inline-flex}.pagination__item>span{padding:var(--ifm-pagination-padding-vertical)}.pagination__item--active .pagination__link{color:var(--ifm-pagination-color-active)}.pagination__item--active .pagination__link,.pagination__item:not(.pagination__item--active):hover .pagination__link{background:var(--ifm-pagination-item-active-background)}.pagination__item--disabled,.pagination__item[disabled]{opacity:.25;pointer-events:none}.pagination__link{border-radius:var(--ifm-pagination-border-radius);color:var(--ifm-font-color-base);display:inline-block;padding:var(--ifm-pagination-padding-vertical) var(--ifm-pagination-padding-horizontal);transition:background var(--ifm-transition-fast) var(--ifm-transition-timing-default)}.pagination__link:hover{text-decoration:none}.pagination-nav{grid-gap:var(--ifm-spacing-horizontal);display:grid;gap:var(--ifm-spacing-horizontal);grid-template-columns:repeat(2,1fr)}.pagination-nav__link{border:1px solid var(--ifm-color-emphasis-300);border-radius:var(--ifm-pagination-nav-border-radius);display:block;height:100%;line-height:var(--ifm-heading-line-height);padding:var(--ifm-global-spacing);transition:border-color var(--ifm-transition-fast) var(--ifm-transition-timing-default)}.pagination-nav__link:hover{border-color:var(--ifm-pagination-nav-color-hover);text-decoration:none}.pagination-nav__link--next{grid-column:2/3;text-align:right}.pagination-nav__label{font-size:var(--ifm-h4-font-size);font-weight:var(--ifm-heading-font-weight);word-break:break-word}.pagination-nav__link--prev .pagination-nav__label:before{content:"« "}.pagination-nav__link--next .pagination-nav__label:after{content:" »"}.pagination-nav__sublabel{color:var(--ifm-color-content-secondary);font-size:var(--ifm-h5-font-size);font-weight:var(--ifm-font-weight-semibold);margin-bottom:.25rem}.pills__item,.tabs{font-weight:var(--ifm-font-weight-bold)}.pills{display:flex;gap:var(--ifm-pills-spacing);padding-left:0}.pills__item{border-radius:.5rem;cursor:pointer;display:inline-block;padding:.25rem 1rem;transition:background var(--ifm-transition-fast) var(--ifm-transition-timing-default)}.tabs,:not(.containsTaskList_mC6p>li)>.containsTaskList_mC6p{padding-left:0}.pills__item--active{color:var(--ifm-pills-color-active)}.pills__item--active,.pills__item:not(.pills__item--active):hover{background:var(--ifm-pills-color-background-active)}.pills--block{justify-content:stretch}.pills--block .pills__item{flex-grow:1;text-align:center}.tabs{color:var(--ifm-tabs-color);display:flex;margin-bottom:0;overflow-x:auto}.tabs__item{border-bottom:3px solid #0000;border-radius:var(--ifm-global-radius);cursor:pointer;display:inline-flex;padding:var(--ifm-tabs-padding-vertical) var(--ifm-tabs-padding-horizontal);transition:background-color var(--ifm-transition-fast) var(--ifm-transition-timing-default)}.tabs__item--active{border-bottom-color:var(--ifm-tabs-color-active-border);border-bottom-left-radius:0;border-bottom-right-radius:0;color:var(--ifm-tabs-color-active)}.tabs__item:hover{background-color:var(--ifm-hover-overlay)}.tabs--block{justify-content:stretch}.tabs--block .tabs__item{flex-grow:1;justify-content:center}.DocSearch-Button,.DocSearch-Button-Container{align-items:center;display:flex}html[data-theme=dark]{--ifm-color-scheme:dark;--ifm-color-emphasis-0:var(--ifm-color-gray-1000);--ifm-color-emphasis-100:var(--ifm-color-gray-900);--ifm-color-emphasis-200:var(--ifm-color-gray-800);--ifm-color-emphasis-300:var(--ifm-color-gray-700);--ifm-color-emphasis-400:var(--ifm-color-gray-600);--ifm-color-emphasis-600:var(--ifm-color-gray-400);--ifm-color-emphasis-700:var(--ifm-color-gray-300);--ifm-color-emphasis-800:var(--ifm-color-gray-200);--ifm-color-emphasis-900:var(--ifm-color-gray-100);--ifm-color-emphasis-1000:var(--ifm-color-gray-0);--ifm-background-color:#1b1b1d;--ifm-background-surface-color:#242526;--ifm-hover-overlay:#ffffff0d;--ifm-color-content:#e3e3e3;--ifm-color-content-secondary:#fff;--ifm-breadcrumb-separator-filter:invert(64%) sepia(11%) saturate(0%) hue-rotate(149deg) brightness(99%) contrast(95%);--ifm-code-background:#ffffff1a;--ifm-scrollbar-track-background-color:#444;--ifm-scrollbar-thumb-background-color:#686868;--ifm-scrollbar-thumb-hover-background-color:#7a7a7a;--ifm-table-stripe-background:#ffffff12;--ifm-toc-border-color:var(--ifm-color-emphasis-200);--ifm-color-primary-contrast-background:#102445;--ifm-color-primary-contrast-foreground:#ebf2fc;--ifm-color-secondary-contrast-background:#474748;--ifm-color-secondary-contrast-foreground:#fdfdfe;--ifm-color-success-contrast-background:#003100;--ifm-color-success-contrast-foreground:#e6f6e6;--ifm-color-info-contrast-background:#193c47;--ifm-color-info-contrast-foreground:#eef9fd;--ifm-color-warning-contrast-background:#4d3800;--ifm-color-warning-contrast-foreground:#fff8e6;--ifm-color-danger-contrast-background:#4b1113;--ifm-color-danger-contrast-foreground:#ffebec;--docsearch-text-color:#f5f6f7;--docsearch-container-background:#090a11cc;--docsearch-modal-background:#15172a;--docsearch-modal-shadow:inset 1px 1px 0 0 #2c2e40,0 3px 8px 0 #000309;--docsearch-searchbox-background:#090a11;--docsearch-searchbox-focus-background:#000;--docsearch-hit-color:#bec3c9;--docsearch-hit-shadow:none;--docsearch-hit-background:#090a11;--docsearch-key-gradient:linear-gradient(-26.5deg,#565872,#31355b);--docsearch-key-shadow:inset 0 -2px 0 0 #282d55,inset 0 0 1px 1px #51577d,0 2px 2px 0 #0304094d;--docsearch-key-pressed-shadow:inset 0 -2px 0 0 #282d55,inset 0 0 1px 1px #51577d,0 1px 1px 0 rgba(3,4,9,.302);--docsearch-footer-background:#1e2136;--docsearch-footer-shadow:inset 0 1px 0 0 #494c6a80,0 -4px 8px 0 #0003;--docsearch-logo-color:#fff;--docsearch-muted-color:#7f8497}#nprogress .bar{background:var(--docusaurus-progress-bar-color);height:2px;left:0;position:fixed;top:0;width:100%;z-index:1031}#nprogress .peg{box-shadow:0 0 10px var(--docusaurus-progress-bar-color),0 0 5px var(--docusaurus-progress-bar-color);height:100%;opacity:1;position:absolute;right:0;transform:rotate(3deg) translateY(-4px);width:100px}[data-theme=dark]{--ifm-color-primary:#a7b5f6;--ifm-color-primary-dark:#828ef1;--ifm-color-primary-darker:#626ae9;--ifm-color-primary-darkest:#4c4ddc;--ifm-color-primary-light:#c9d2fa;--ifm-color-primary-lighter:#e1e7fd;--ifm-color-primary-lightest:#eff2fe;--ifm-table-head-background:var(--ifm-table-stripe-background);--ifm-table-border-color:#d8dbe6}table,table thead tr{width:100%}table thead tr th{font-size:14px;font-weight:600;text-align:left}table thead tr{border-bottom:0}table tr td,table tr th{border:0;border-bottom:var(--ifm-table-border-width) solid var(--ifm-table-border-color)}table tr th{border-top:var(--ifm-table-border-width) solid var(--ifm-table-border-color)}table tr td:first-child,table tr th:first-child{border-left:var(--ifm-table-border-width) solid var(--ifm-table-border-color)}table tr th:first-child{border-top-left-radius:8px}table tr th:last-child{border-top-right-radius:8px}table tbody tr:last-child td:first-child{border-bottom-left-radius:8px}table tbody tr:last-child td:last-child{border-bottom-right-radius:8px}table tr td:last-child,table tr th:last-child{border-right:var(--ifm-table-border-width) solid var(--ifm-table-border-color)}table tr td{font-size:14px}table tbody tr:hover{background:var(--ifm-table-head-background)}table img{margin:0 0 -5px 18px}.video-container{overflow:hidden;position:relative;width:100%}.video-container:after{content:"";display:block;padding-top:56.25%}.video-container iframe{height:100%;left:0;position:absolute;top:0;width:100%}.gems-table table{display:table;width:100%}.gems-table table th:first-of-type,.gems-table table th:nth-of-type(2){width:150px}.alert--info{--ifm-alert-background-color:var(--ifm-color-secondary-contrast-background);--ifm-alert-border-color:#d8dbe6;--ifm-code-background:inherit}.cardContainer_fWXF{--ifm-link-color:var(--ifm-color-emphasis-800);--ifm-link-hover-color:var(--ifm-color-emphasis-700);--ifm-link-hover-decoration:none;border:1px solid var(--ifm-color-emphasis-200);box-shadow:0 1.5px 3px 0 #00000026;transition:all var(--ifm-transition-fast) ease;transition-property:border,box-shadow}.cardContainer_fWXF:hover{border-color:var(--ifm-color-primary);box-shadow:0 3px 6px 0 #0003}.cardTitle_rnsV{font-size:1.2rem}.cardDescription_PWke{font-size:.8rem}.iconEdit_Z9Sw{margin-right:.3em;vertical-align:sub}.tableOfContentsInline_prmo ul{font-size:medium;list-style-type:disc;padding-top:0}.DocSearch-Button{background:var(--docsearch-searchbox-background);border:0;border-radius:40px;color:var(--docsearch-muted-color);cursor:pointer;font-weight:500;height:36px;justify-content:space-between;padding:0 8px;-webkit-user-select:none;user-select:none}.DocSearch-Button:active,.DocSearch-Button:focus,.DocSearch-Button:hover{background:var(--docsearch-searchbox-focus-background);box-shadow:var(--docsearch-searchbox-shadow);color:var(--docsearch-text-color);outline:0}.DocSearch-Search-Icon{stroke-width:1.6}.DocSearch-Hit-Tree,.DocSearch-Hit-action,.DocSearch-Hit-icon,.DocSearch-Reset{stroke-width:var(--docsearch-icon-stroke-width)}.DocSearch-Button .DocSearch-Search-Icon{color:var(--docsearch-text-color)}.DocSearch-Button-Placeholder{font-size:1rem;padding:0 12px 0 6px}.DocSearch-Button-Keys{display:flex;min-width:calc(40px + .8em)}.DocSearch-Button-Key{align-items:center;background:var(--docsearch-key-gradient);border:0;border-radius:3px;box-shadow:var(--docsearch-key-shadow);color:var(--docsearch-muted-color);display:flex;height:18px;justify-content:center;margin-right:.4em;padding:0 0 2px;position:relative;top:-1px;width:20px}.DocSearch-Button-Key--pressed{box-shadow:var(--docsearch-key-pressed-shadow);transform:translate3d(0,1px,0)}.DocSearch--active{overflow:hidden!important}.DocSearch-Container{background-color:var(--docsearch-container-background);height:100vh;left:0;position:fixed;top:0;width:100vw;z-index:200}.DocSearch-Container a{text-decoration:none}.DocSearch-Hit[aria-selected=true] mark,.content_knG7 a{text-decoration:underline}.DocSearch-Link{appearance:none;background:none;border:0;color:var(--docsearch-highlight-color);cursor:pointer;font:inherit;margin:0;padding:0}.DocSearch-Modal{background:var(--docsearch-modal-background);border-radius:6px;box-shadow:var(--docsearch-modal-shadow);flex-direction:column;margin:60px auto auto;max-width:var(--docsearch-modal-width);position:relative}.DocSearch-SearchBar{display:flex;padding:var(--docsearch-spacing) var(--docsearch-spacing) 0}.DocSearch-Form{align-items:center;background:var(--docsearch-searchbox-focus-background);border-radius:4px;box-shadow:var(--docsearch-searchbox-shadow);display:flex;height:var(--docsearch-searchbox-height);margin:0;padding:0 var(--docsearch-spacing);position:relative;width:100%}.DocSearch-Input{appearance:none;background:#0000;border:0;color:var(--docsearch-text-color);flex:1;font:inherit;font-size:1.2em;height:100%;outline:0;padding:0 0 0 8px;width:80%}.DocSearch-Input::placeholder{color:var(--docsearch-muted-color);opacity:1}.DocSearch-Input::-webkit-search-cancel-button,.DocSearch-Input::-webkit-search-decoration,.DocSearch-Input::-webkit-search-results-button,.DocSearch-Input::-webkit-search-results-decoration{display:none}.DocSearch-LoadingIndicator,.DocSearch-MagnifierLabel,.DocSearch-Reset{margin:0;padding:0}.DocSearch-Container--Stalled .DocSearch-LoadingIndicator,.DocSearch-MagnifierLabel,.DocSearch-Reset{align-items:center;color:var(--docsearch-highlight-color);display:flex;justify-content:center}.DocSearch-Cancel,.DocSearch-Container--Stalled .DocSearch-MagnifierLabel,.DocSearch-LoadingIndicator,.DocSearch-Reset[hidden]{display:none}.DocSearch-Reset{animation:.1s ease-in forwards a;appearance:none;background:none;border:0;border-radius:50%;color:var(--docsearch-icon-color);cursor:pointer;padding:2px;right:0}.DocSearch-Help,.DocSearch-HitsFooter,.DocSearch-Label{color:var(--docsearch-muted-color)}.DocSearch-Reset:hover{color:var(--docsearch-highlight-color)}.DocSearch-LoadingIndicator svg,.DocSearch-MagnifierLabel svg{height:24px;width:24px}.DocSearch-Dropdown{max-height:calc(var(--docsearch-modal-height) - var(--docsearch-searchbox-height) - var(--docsearch-spacing) - var(--docsearch-footer-height));min-height:var(--docsearch-spacing);overflow-y:auto;overflow-y:overlay;padding:0 var(--docsearch-spacing);scrollbar-color:var(--docsearch-muted-color) var(--docsearch-modal-background);scrollbar-width:thin}.DocSearch-Dropdown::-webkit-scrollbar{width:12px}.DocSearch-Dropdown::-webkit-scrollbar-track{background:#0000}.DocSearch-Dropdown::-webkit-scrollbar-thumb{background-color:var(--docsearch-muted-color);border:3px solid var(--docsearch-modal-background);border-radius:20px}.DocSearch-Dropdown ul{list-style:none;margin:0;padding:0}.DocSearch-Label{font-size:.75em;line-height:1.6em}.DocSearch-Help{font-size:.9em;margin:0;-webkit-user-select:none;user-select:none}.DocSearch-Title{font-size:1.2em}.DocSearch-Logo a{display:flex}.DocSearch-Logo svg{color:var(--docsearch-logo-color);margin-left:8px}.DocSearch-Hits:last-of-type{margin-bottom:24px}.DocSearch-Hits mark{background:none;color:var(--docsearch-highlight-color)}.DocSearch-HitsFooter{display:flex;font-size:.85em;justify-content:center;margin-bottom:var(--docsearch-spacing);padding:var(--docsearch-spacing)}.DocSearch-HitsFooter a{border-bottom:1px solid;color:inherit}.DocSearch-Hit{border-radius:4px;display:flex;padding-bottom:4px;position:relative}.DocSearch-Hit--deleting{opacity:0;transition:.25s linear}.DocSearch-Hit--favoriting{transform:scale(0);transform-origin:top center;transition:.25s linear .25s}.DocSearch-Hit a{background:var(--docsearch-hit-background);border-radius:4px;box-shadow:var(--docsearch-hit-shadow);display:block;padding-left:var(--docsearch-spacing);width:100%}.DocSearch-Hit-source{background:var(--docsearch-modal-background);color:var(--docsearch-highlight-color);font-size:.85em;font-weight:600;line-height:32px;margin:0 -4px;padding:8px 4px 0;position:sticky;top:0;z-index:10}.DocSearch-Hit-Tree{color:var(--docsearch-muted-color);height:var(--docsearch-hit-height);opacity:.5;width:24px}.DocSearch-Hit[aria-selected=true] a{background-color:var(--docsearch-highlight-color)}.DocSearch-Hit-Container{align-items:center;color:var(--docsearch-hit-color);display:flex;flex-direction:row;height:var(--docsearch-hit-height);padding:0 var(--docsearch-spacing) 0 0}.DocSearch-Hit-icon{height:20px;width:20px}.DocSearch-Hit-action,.DocSearch-Hit-icon{color:var(--docsearch-muted-color)}.DocSearch-Hit-action{align-items:center;display:flex;height:22px;width:22px}.DocSearch-Hit-action svg{display:block;height:18px;width:18px}.DocSearch-Hit-action+.DocSearch-Hit-action{margin-left:6px}.DocSearch-Hit-action-button{appearance:none;background:none;border:0;border-radius:50%;color:inherit;cursor:pointer;padding:2px}#__docusaurus-base-url-issue-banner-container,.docSidebarContainer_b6E3,.sidebarLogo_isFc,.themedImage_ToTc,[data-theme=dark] .lightToggleIcon_pyhR,[data-theme=light] .darkToggleIcon_wfgR,html[data-announcement-bar-initially-dismissed=true] .announcementBar_mb4j,svg.DocSearch-Hit-Select-Icon{display:none}.DocSearch-Hit[aria-selected=true] .DocSearch-Hit-Select-Icon,.tocCollapsibleContent_vkbj a{display:block}.DocSearch-Hit-action-button:focus,.DocSearch-Hit-action-button:hover{background:#0003;transition:background-color .1s ease-in}.DocSearch-Hit-action-button:focus path,.DocSearch-Hit-action-button:hover path{fill:#fff}.DocSearch-Hit-content-wrapper{display:flex;flex:1 1 auto;flex-direction:column;font-weight:500;justify-content:center;line-height:1.2em;margin:0 8px;overflow-x:hidden;position:relative;text-overflow:ellipsis;white-space:nowrap;width:80%}.DocSearch-Hit-title{font-size:.9em}.DocSearch-Hit-path{color:var(--docsearch-muted-color);font-size:.75em}.DocSearch-Hit[aria-selected=true] .DocSearch-Hit-Tree,.DocSearch-Hit[aria-selected=true] .DocSearch-Hit-action,.DocSearch-Hit[aria-selected=true] .DocSearch-Hit-icon,.DocSearch-Hit[aria-selected=true] .DocSearch-Hit-path,.DocSearch-Hit[aria-selected=true] .DocSearch-Hit-text,.DocSearch-Hit[aria-selected=true] .DocSearch-Hit-title,.DocSearch-Hit[aria-selected=true] mark{color:var(--docsearch-hit-active-color)!important}.DocSearch-ErrorScreen,.DocSearch-NoResults,.DocSearch-StartScreen{font-size:.9em;margin:0 auto;padding:36px 0;text-align:center;width:80%}.DocSearch-Screen-Icon{color:var(--docsearch-muted-color);padding-bottom:12px}.DocSearch-NoResults-Prefill-List{display:inline-block;padding-bottom:24px;text-align:left}.DocSearch-NoResults-Prefill-List ul{display:inline-block;padding:8px 0 0}.DocSearch-NoResults-Prefill-List li{list-style-position:inside;list-style-type:"» "}.DocSearch-Prefill{appearance:none;background:none;border:0;border-radius:1em;color:var(--docsearch-highlight-color);cursor:pointer;display:inline-block;font-size:1em;font-weight:700;padding:0}.DocSearch-Prefill:focus,.DocSearch-Prefill:hover{outline:0;text-decoration:underline}.DocSearch-Footer{align-items:center;background:var(--docsearch-footer-background);border-radius:0 0 8px 8px;box-shadow:var(--docsearch-footer-shadow);display:flex;flex-direction:row-reverse;flex-shrink:0;height:var(--docsearch-footer-height);justify-content:space-between;padding:0 var(--docsearch-spacing);position:relative;-webkit-user-select:none;user-select:none;width:100%;z-index:300}.DocSearch-Commands li,.DocSearch-Commands-Key{align-items:center;display:flex}.DocSearch-Commands{color:var(--docsearch-muted-color);display:flex;list-style:none;margin:0;padding:0}.DocSearch-Commands li:not(:last-of-type){margin-right:.8em}.DocSearch-Commands-Key{background:var(--docsearch-key-gradient);border:0;border-radius:2px;box-shadow:var(--docsearch-key-shadow);color:var(--docsearch-muted-color);height:18px;justify-content:center;margin-right:.4em;padding:0 0 1px;width:20px}.DocSearch-VisuallyHiddenForAccessibility{clip:rect(0 0 0 0);clip-path:inset(50%);height:1px;overflow:hidden;position:absolute;white-space:nowrap;width:1px}@keyframes a{0%{opacity:0}to{opacity:1}}.DocSearch-Button{margin:0;transition:all var(--ifm-transition-fast) var(--ifm-transition-timing-default)}.DocSearch-Container,.skipToContent_fXgn{z-index:calc(var(--ifm-z-index-fixed) + 1)}.skipToContent_fXgn{background-color:var(--ifm-background-surface-color);color:var(--ifm-color-emphasis-900);left:100%;padding:calc(var(--ifm-global-spacing)/2) var(--ifm-global-spacing);position:fixed;top:1rem}.skipToContent_fXgn:focus{box-shadow:var(--ifm-global-shadow-md);left:1rem}.closeButton_CVFx{line-height:0;padding:0}.content_knG7{font-size:85%;padding:5px 0;text-align:center}.content_knG7 a{color:inherit}.announcementBar_mb4j{align-items:center;background-color:var(--ifm-color-white);border-bottom:1px solid var(--ifm-color-emphasis-100);color:var(--ifm-color-black);display:flex;height:var(--docusaurus-announcement-bar-height)}.announcementBarPlaceholder_vyr4{flex:0 0 10px}.announcementBarClose_gvF7{align-self:stretch;flex:0 0 30px}.toggle_vylO{height:2rem;width:2rem}.toggleButton_gllP{align-items:center;border-radius:50%;display:flex;height:100%;justify-content:center;transition:background var(--ifm-transition-fast);width:100%}.toggleButton_gllP:hover{background:var(--ifm-color-emphasis-200)}.toggleButtonDisabled_aARS{cursor:not-allowed}.darkNavbarColorModeToggle_X3D1:hover{background:var(--ifm-color-gray-800)}[data-theme=dark] .themedImage--dark_i4oU,[data-theme=light] .themedImage--light_HNdA,html:not([data-theme]) .themedComponent--light_NU7w{display:initial}.iconExternalLink_nPIU{margin-left:.3rem}.iconLanguage_nlXk{margin-right:5px;vertical-align:text-bottom}.navbarHideable_m1mJ{transition:transform var(--ifm-transition-fast) ease}.navbarHidden_jGov{transform:translate3d(0,calc(-100% - 2px),0)}.errorBoundaryError_a6uf{color:red;white-space:pre-wrap}body:not(.navigation-with-keyboard) :not(input):focus{outline:0}.footerLogoLink_BH7S{opacity:.5;transition:opacity var(--ifm-transition-fast) var(--ifm-transition-timing-default)}.footerLogoLink_BH7S:hover,.hash-link:focus,:hover>.hash-link{opacity:1}.mainWrapper_z2l0{display:flex;flex:1 0 auto;flex-direction:column}.docusaurus-mt-lg{margin-top:3rem}#__docusaurus{display:flex;flex-direction:column;min-height:100%}.tag_zVej{border:1px solid var(--docusaurus-tag-list-border);transition:border var(--ifm-transition-fast)}.tag_zVej:hover{--docusaurus-tag-list-border:var(--ifm-link-color);text-decoration:none}.tagRegular_sFm0{border-radius:var(--ifm-global-radius);font-size:90%;padding:.2rem .5rem .3rem}.tagWithCount_h2kH{align-items:center;border-left:0;display:flex;padding:0 .5rem 0 1rem;position:relative}.tagWithCount_h2kH:after,.tagWithCount_h2kH:before{border:1px solid var(--docusaurus-tag-list-border);content:"";position:absolute;top:50%;transition:inherit}.tagWithCount_h2kH:before{border-bottom:0;border-right:0;height:1.18rem;right:100%;transform:translate(50%,-50%) rotate(-45deg);width:1.18rem}.tagWithCount_h2kH:after{border-radius:50%;height:.5rem;left:0;transform:translateY(-50%);width:.5rem}.tagWithCount_h2kH span{background:var(--ifm-color-secondary);border-radius:var(--ifm-global-radius);color:var(--ifm-color-black);font-size:.7rem;line-height:1.2;margin-left:.3rem;padding:.1rem .4rem}.tags_jXut{display:inline}.tag_QGVx{display:inline-block;margin:0 .4rem .5rem 0}.lastUpdated_vwxv{font-size:smaller;font-style:italic;margin-top:.2rem}.tocCollapsibleButton_TO0P{align-items:center;display:flex;font-size:inherit;justify-content:space-between;padding:.4rem .8rem;width:100%}.tocCollapsibleButton_TO0P:after{background:var(--ifm-menu-link-sublist-icon) 50% 50%/2rem 2rem no-repeat;content:"";filter:var(--ifm-menu-link-sublist-icon-filter);height:1.25rem;transform:rotate(180deg);transition:transform var(--ifm-transition-fast);width:1.25rem}.tocCollapsibleButtonExpanded_MG3E:after,.tocCollapsibleExpanded_sAul{transform:none}.tocCollapsible_ETCw{background-color:var(--ifm-menu-color-background-active);border-radius:var(--ifm-global-radius);margin:1rem 0}.tocCollapsibleContent_vkbj>ul{border-left:none;border-top:1px solid var(--ifm-color-emphasis-300);font-size:15px;padding:.2rem 0}.tocCollapsibleContent_vkbj ul li{margin:.4rem .8rem}.searchQueryInput_u2C7,.searchVersionInput_m0Ui{background:var(--docsearch-searchbox-focus-background);border:2px solid var(--ifm-toc-border-color);border-radius:var(--ifm-global-radius);color:var(--docsearch-text-color);font:var(--ifm-font-size-base) var(--ifm-font-family-base);margin-bottom:.5rem;padding:.8rem;transition:border var(--ifm-transition-fast) ease;width:100%}.searchQueryInput_u2C7:focus,.searchVersionInput_m0Ui:focus{border-color:var(--docsearch-primary-color);outline:0}.searchQueryInput_u2C7::placeholder{color:var(--docsearch-muted-color)}.searchResultsColumn_JPFH{font-size:.9rem;font-weight:700}.algoliaLogo_rT1R{max-width:150px}.algoliaLogoPathFill_WdUC{fill:var(--ifm-font-color-base)}.searchResultItem_Tv2o{border-bottom:1px solid var(--ifm-toc-border-color);padding:1rem 0}.searchResultItemHeading_KbCB{font-weight:400;margin-bottom:0}.searchResultItemPath_lhe1{--ifm-breadcrumb-separator-size-multiplier:1;color:var(--ifm-color-content-secondary);font-size:.8rem}.searchResultItemSummary_AEaO{font-style:italic;margin:.5rem 0 0}.loadingSpinner_XVxU{animation:1s linear infinite b;border:.4em solid #eee;border-radius:50%;border-top:.4em solid var(--ifm-color-primary);height:3rem;margin:0 auto;width:3rem}@keyframes b{to{transform:rotate(1turn)}}.loader_vvXV{margin-top:2rem}.search-result-match{background:#ffd78e40;color:var(--docsearch-hit-color);padding:.09em 0}.backToTopButton_sjWU{background-color:var(--ifm-color-emphasis-200);border-radius:50%;bottom:1.3rem;box-shadow:var(--ifm-global-shadow-lw);height:3rem;opacity:0;position:fixed;right:1.3rem;transform:scale(0);transition:all var(--ifm-transition-fast) var(--ifm-transition-timing-default);visibility:hidden;width:3rem;z-index:calc(var(--ifm-z-index-fixed) - 1)}.buttonGroup__atx button,.codeBlockContainer_Ckt0{background:var(--prism-background-color);color:var(--prism-color)}.backToTopButton_sjWU:after{background-color:var(--ifm-color-emphasis-1000);content:" ";display:inline-block;height:100%;-webkit-mask:var(--ifm-menu-link-sublist-icon) 50%/2rem 2rem no-repeat;mask:var(--ifm-menu-link-sublist-icon) 50%/2rem 2rem no-repeat;width:100%}.backToTopButtonShow_xfvO{opacity:1;transform:scale(1);visibility:visible}[data-theme=dark]:root{--docusaurus-collapse-button-bg:#ffffff0d;--docusaurus-collapse-button-bg-hover:#ffffff1a}.collapseSidebarButton_PEFL{display:none;margin:0}.docMainContainer_gTbr,.docPage__5DB{display:flex;width:100%}.docPage__5DB{flex:1 0}.docsWrapper_BCFX{display:flex;flex:1 0 auto}.tag_Nnez{display:inline-block;margin:.5rem .5rem 0 1rem}.codeBlockContainer_Ckt0{border-radius:var(--ifm-code-border-radius);box-shadow:var(--ifm-global-shadow-lw);margin-bottom:var(--ifm-leading)}.codeBlockContent_biex{border-radius:inherit;direction:ltr;position:relative}.codeBlockTitle_Ktv7{border-bottom:1px solid var(--ifm-color-emphasis-300);border-top-left-radius:inherit;border-top-right-radius:inherit;font-size:var(--ifm-code-font-size);font-weight:500;padding:.75rem var(--ifm-pre-padding)}.codeBlock_bY9V{--ifm-pre-background:var(--prism-background-color);margin:0;padding:0}.codeBlockTitle_Ktv7+.codeBlockContent_biex .codeBlock_bY9V{border-top-left-radius:0;border-top-right-radius:0}.codeBlockLines_e6Vv{float:left;font:inherit;min-width:100%;padding:var(--ifm-pre-padding)}.codeBlockLinesWithNumbering_o6Pm{display:table;padding:var(--ifm-pre-padding) 0}.buttonGroup__atx{column-gap:.2rem;display:flex;position:absolute;right:calc(var(--ifm-pre-padding)/2);top:calc(var(--ifm-pre-padding)/2)}.buttonGroup__atx button{align-items:center;border:1px solid var(--ifm-color-emphasis-300);border-radius:var(--ifm-global-radius);display:flex;line-height:0;opacity:0;padding:.4rem;transition:opacity var(--ifm-transition-fast) ease-in-out}.buttonGroup__atx button:focus-visible,.buttonGroup__atx button:hover{opacity:1!important}.theme-code-block:hover .buttonGroup__atx button{opacity:.4}:where(:root){--docusaurus-highlighted-code-line-bg:#484d5b}:where([data-theme=dark]){--docusaurus-highlighted-code-line-bg:#646464}.theme-code-block-highlighted-line{background-color:var(--docusaurus-highlighted-code-line-bg);display:block;margin:0 calc(var(--ifm-pre-padding)*-1);padding:0 var(--ifm-pre-padding)}.codeLine_lJS_{counter-increment:a;display:table-row}.codeLineNumber_Tfdd{background:var(--ifm-pre-background);display:table-cell;left:0;overflow-wrap:normal;padding:0 var(--ifm-pre-padding);position:sticky;text-align:right;width:1%}.codeLineNumber_Tfdd:before{content:counter(a);opacity:.4}.codeLineContent_feaV{padding-right:var(--ifm-pre-padding)}.theme-code-block:hover .copyButtonCopied_obH4{opacity:1!important}.copyButtonIcons_eSgA{height:1.125rem;position:relative;width:1.125rem}.copyButtonIcon_y97N,.copyButtonSuccessIcon_LjdS{fill:currentColor;height:inherit;left:0;opacity:inherit;position:absolute;top:0;transition:all var(--ifm-transition-fast) ease;width:inherit}.copyButtonSuccessIcon_LjdS{color:#00d600;left:50%;opacity:0;top:50%;transform:translate(-50%,-50%) scale(.33)}.copyButtonCopied_obH4 .copyButtonIcon_y97N{opacity:0;transform:scale(.33)}.copyButtonCopied_obH4 .copyButtonSuccessIcon_LjdS{opacity:1;transform:translate(-50%,-50%) scale(1);transition-delay:75ms}.wordWrapButtonIcon_Bwma{height:1.2rem;width:1.2rem}.details_lb9f{--docusaurus-details-summary-arrow-size:0.38rem;--docusaurus-details-transition:transform 200ms ease;--docusaurus-details-decoration-color:grey}.details_lb9f>summary{cursor:pointer;list-style:none;padding-left:1rem;position:relative}.details_lb9f>summary::-webkit-details-marker{display:none}.details_lb9f>summary:before{border-color:#0000 #0000 #0000 var(--docusaurus-details-decoration-color);border-style:solid;border-width:var(--docusaurus-details-summary-arrow-size);content:"";left:0;position:absolute;top:.45rem;transform:rotate(0);transform-origin:calc(var(--docusaurus-details-summary-arrow-size)/2) 50%;transition:var(--docusaurus-details-transition)}.collapsibleContent_i85q{border-top:1px solid var(--docusaurus-details-decoration-color);margin-top:1rem;padding-top:1rem}.details_b_Ee{--docusaurus-details-decoration-color:var(--ifm-alert-border-color);--docusaurus-details-transition:transform var(--ifm-transition-fast) ease;border:1px solid var(--ifm-alert-border-color);margin:0 0 var(--ifm-spacing-vertical)}.anchorWithStickyNavbar_LWe7{scroll-margin-top:calc(var(--ifm-navbar-height) + .5rem)}.anchorWithHideOnScrollNavbar_WYt5{scroll-margin-top:.5rem}.hash-link{opacity:0;padding-left:.5rem;transition:opacity var(--ifm-transition-fast);-webkit-user-select:none;user-select:none}.hash-link:before{content:"#"}.containsTaskList_mC6p{list-style:none}.img_ev3q{height:auto}.tableOfContents_bqdL{max-height:calc(100vh - var(--ifm-navbar-height) - 2rem);overflow-y:auto;position:sticky;top:calc(var(--ifm-navbar-height) + 1rem)}.admonition_LlT9{margin-bottom:1em}.admonitionHeading_tbUL{font:var(--ifm-heading-font-weight) var(--ifm-h5-font-size)/var(--ifm-heading-line-height) var(--ifm-heading-font-family);margin-bottom:.3rem}.admonitionHeading_tbUL code{text-transform:none}.admonitionIcon_kALy{display:inline-block;margin-right:.4em;vertical-align:middle}.admonitionIcon_kALy svg{fill:var(--ifm-alert-foreground-color);display:inline-block;height:1.6em;width:1.6em}.breadcrumbHomeIcon_YNFT{height:1.1rem;position:relative;top:1px;vertical-align:top;width:1.1rem}.breadcrumbsContainer_Z_bl{--ifm-breadcrumb-size-multiplier:0.8;margin-bottom:.8rem}.mdxPageWrapper_j9I6{justify-content:center}@media (min-width:997px){.collapseSidebarButton_PEFL,.expandButton_m80_{background-color:var(--docusaurus-collapse-button-bg)}:root{--docusaurus-announcement-bar-height:30px}.announcementBarClose_gvF7,.announcementBarPlaceholder_vyr4{flex-basis:50px}.searchBox_ZlJk{padding:var(--ifm-navbar-item-padding-vertical) var(--ifm-navbar-item-padding-horizontal)}.lastUpdated_vwxv{text-align:right}.tocMobile_ITEo{display:none}.collapseSidebarButton_PEFL{border:1px solid var(--ifm-toc-border-color);border-radius:0;bottom:0;display:block!important;height:40px;position:sticky}.collapseSidebarButtonIcon_kv0_{margin-top:4px;transform:rotate(180deg)}.expandButtonIcon_BlDH,[dir=rtl] .collapseSidebarButtonIcon_kv0_{transform:rotate(0)}.collapseSidebarButton_PEFL:focus,.collapseSidebarButton_PEFL:hover,.expandButton_m80_:focus,.expandButton_m80_:hover{background-color:var(--docusaurus-collapse-button-bg-hover)}.menuHtmlItem_M9Kj{padding:var(--ifm-menu-link-padding-vertical) var(--ifm-menu-link-padding-horizontal)}.menu_SIkG{flex-grow:1;padding:.5rem}@supports (scrollbar-gutter:stable){.menu_SIkG{padding:.5rem 0 .5rem .5rem;scrollbar-gutter:stable}}.menuWithAnnouncementBar_GW3s{margin-bottom:var(--docusaurus-announcement-bar-height)}.sidebar_njMd{display:flex;flex-direction:column;height:100%;padding-top:var(--ifm-navbar-height);width:var(--doc-sidebar-width)}.sidebarWithHideableNavbar_wUlq{padding-top:0}.sidebarHidden_VK0M{opacity:0;visibility:hidden}.sidebarLogo_isFc{align-items:center;color:inherit!important;display:flex!important;margin:0 var(--ifm-navbar-padding-horizontal);max-height:var(--ifm-navbar-height);min-height:var(--ifm-navbar-height);text-decoration:none!important}.sidebarLogo_isFc img{height:2rem;margin-right:.5rem}.expandButton_m80_{align-items:center;display:flex;height:100%;justify-content:center;position:absolute;right:0;top:0;transition:background-color var(--ifm-transition-fast) ease;width:100%}[dir=rtl] .expandButtonIcon_BlDH{transform:rotate(180deg)}.docSidebarContainer_b6E3{border-right:1px solid var(--ifm-toc-border-color);clip-path:inset(0);display:block;margin-top:calc(var(--ifm-navbar-height)*-1);transition:width var(--ifm-transition-fast) ease;width:var(--doc-sidebar-width);will-change:width}.docSidebarContainerHidden_b3ry{cursor:pointer;width:var(--doc-sidebar-hidden-width)}.sidebarViewport_Xe31{height:100%;max-height:100vh;position:sticky;top:0}.docMainContainer_gTbr{flex-grow:1;max-width:calc(100% - var(--doc-sidebar-width))}.docMainContainerEnhanced_Uz_u{max-width:calc(100% - var(--doc-sidebar-hidden-width))}.docItemWrapperEnhanced_czyv{max-width:calc(var(--ifm-container-width) + var(--doc-sidebar-width))!important}.docItemCol_VOVn{max-width:75%!important}}@media (min-width:1440px){.container{max-width:var(--ifm-container-width-xl)}}@media (max-width:996px){.col{--ifm-col-width:100%;flex-basis:var(--ifm-col-width);margin-left:0}.footer{--ifm-footer-padding-horizontal:0}.colorModeToggle_DEke,.footer__link-separator,.navbar__item,.tableOfContents_bqdL{display:none}.footer__col{margin-bottom:calc(var(--ifm-spacing-vertical)*3)}.footer__link-item{display:block}.hero{padding-left:0;padding-right:0}.navbar>.container,.navbar>.container-fluid{padding:0}.navbar__toggle{display:inherit}.navbar__search-input{width:9rem}.pills--block,.tabs--block{flex-direction:column}.searchBox_ZlJk{position:absolute;right:var(--ifm-navbar-padding-horizontal)}.docItemContainer_F8PC{padding:0 .3rem}}@media only screen and (max-width:996px){.searchQueryColumn_RTkw,.searchResultsColumn_JPFH{max-width:60%!important}.searchLogoColumn_rJIA,.searchVersionColumn_ypXd{max-width:40%!important}.searchLogoColumn_rJIA{padding-left:0!important}}@media (max-width:768px){.DocSearch-Button-Keys,.DocSearch-Button-Placeholder,.DocSearch-Commands,.DocSearch-Hit-Tree{display:none}:root{--docsearch-spacing:10px;--docsearch-footer-height:40px}.DocSearch-Dropdown{height:100%;max-height:calc(var(--docsearch-vh,1vh)*100 - var(--docsearch-searchbox-height) - var(--docsearch-spacing) - var(--docsearch-footer-height))}.DocSearch-Container{height:100vh;height:-webkit-fill-available;height:calc(var(--docsearch-vh,1vh)*100);position:absolute}.DocSearch-Footer{border-radius:0;bottom:0;position:absolute}.DocSearch-Hit-content-wrapper{display:flex;position:relative;width:80%}.DocSearch-Modal{border-radius:0;box-shadow:none;height:100vh;height:-webkit-fill-available;height:calc(var(--docsearch-vh,1vh)*100);margin:0;max-width:100%;width:100%}.DocSearch-Cancel{appearance:none;background:none;border:0;color:var(--docsearch-highlight-color);cursor:pointer;display:inline-block;flex:none;font:inherit;font-size:1em;font-weight:500;margin-left:var(--docsearch-spacing);outline:0;overflow:hidden;padding:0;-webkit-user-select:none;user-select:none;white-space:nowrap}}@media (max-width:576px){.markdown h1:first-child{--ifm-h1-font-size:2rem}.markdown>h2{--ifm-h2-font-size:1.5rem}.markdown>h3{--ifm-h3-font-size:1.25rem}}@media screen and (max-width:576px){.searchQueryColumn_RTkw{max-width:100%!important}.searchVersionColumn_ypXd{max-width:100%!important;padding-left:var(--ifm-spacing-horizontal)!important}}@media (hover:hover){.backToTopButton_sjWU:hover{background-color:var(--ifm-color-emphasis-300)}}@media (pointer:fine){.thin-scrollbar{scrollbar-width:thin}.thin-scrollbar::-webkit-scrollbar{height:var(--ifm-scrollbar-size);width:var(--ifm-scrollbar-size)}.thin-scrollbar::-webkit-scrollbar-track{background:var(--ifm-scrollbar-track-background-color);border-radius:10px}.thin-scrollbar::-webkit-scrollbar-thumb{background:var(--ifm-scrollbar-thumb-background-color);border-radius:10px}.thin-scrollbar::-webkit-scrollbar-thumb:hover{background:var(--ifm-scrollbar-thumb-hover-background-color)}}@media (prefers-reduced-motion:reduce){:root{--ifm-transition-fast:0ms;--ifm-transition-slow:0ms}}@media screen and (prefers-reduced-motion:reduce){.DocSearch-Reset{stroke-width:var(--docsearch-icon-stroke-width);animation:none;appearance:none;background:none;border:0;border-radius:50%;color:var(--docsearch-icon-color);cursor:pointer;right:0}.DocSearch-Hit--deleting,.DocSearch-Hit--favoriting{transition:none}.DocSearch-Hit-action-button:focus,.DocSearch-Hit-action-button:hover{background:#0003;transition:none}}@media print{.announcementBar_mb4j,.footer,.menu,.navbar,.pagination-nav,.table-of-contents,.tocMobile_ITEo{display:none}.tabs{page-break-inside:avoid}.codeBlockLines_e6Vv{white-space:pre-wrap}} \ No newline at end of file diff --git a/assets/css/styles.281c4cdc.css b/assets/css/styles.281c4cdc.css new file mode 100644 index 0000000000..b896d34253 --- /dev/null +++ b/assets/css/styles.281c4cdc.css @@ -0,0 +1 @@ +.col,.container{padding:0 var(--ifm-spacing-horizontal);width:100%}.markdown>h2,.markdown>h3,.markdown>h4,.markdown>h5,.markdown>h6{margin-bottom:calc(var(--ifm-heading-vertical-rhythm-bottom)*var(--ifm-leading))}.markdown li,body{word-wrap:break-word}body,ol ol,ol ul,ul ol,ul ul{margin:0}pre,table{overflow:auto}blockquote,pre{margin:0 0 var(--ifm-spacing-vertical)}.breadcrumbs__link,.button{transition-timing-function:var(--ifm-transition-timing-default)}.button,code{vertical-align:middle}.button--outline.button--active,.button--outline:active,.button--outline:hover,:root{--ifm-button-color:var(--ifm-font-color-base-inverse)}.menu__link:hover,a{transition:color var(--ifm-transition-fast) var(--ifm-transition-timing-default)}.navbar--dark,:root{--ifm-navbar-link-hover-color:var(--ifm-color-primary)}.menu,.navbar-sidebar{overflow-x:hidden}:root,html[data-theme=dark]{--ifm-color-emphasis-500:var(--ifm-color-gray-500)}:root,[data-theme=dark]{--ifm-table-border-width:1px}*,.DocSearch-Container,.DocSearch-Container *{box-sizing:border-box}.toggleButton_gllP,html{-webkit-tap-highlight-color:transparent}:root{--ifm-color-scheme:light;--ifm-dark-value:10%;--ifm-darker-value:15%;--ifm-darkest-value:30%;--ifm-light-value:15%;--ifm-lighter-value:30%;--ifm-lightest-value:50%;--ifm-contrast-background-value:90%;--ifm-contrast-foreground-value:70%;--ifm-contrast-background-dark-value:70%;--ifm-contrast-foreground-dark-value:90%;--ifm-color-primary:#3578e5;--ifm-color-secondary:#ebedf0;--ifm-color-success:#00a400;--ifm-color-info:#54c7ec;--ifm-color-warning:#ffba00;--ifm-color-danger:#fa383e;--ifm-color-primary-dark:#306cce;--ifm-color-primary-darker:#2d66c3;--ifm-color-primary-darkest:#2554a0;--ifm-color-primary-light:#538ce9;--ifm-color-primary-lighter:#72a1ed;--ifm-color-primary-lightest:#9abcf2;--ifm-color-primary-contrast-background:#ebf2fc;--ifm-color-primary-contrast-foreground:#102445;--ifm-color-secondary-dark:#d4d5d8;--ifm-color-secondary-darker:#c8c9cc;--ifm-color-secondary-darkest:#a4a6a8;--ifm-color-secondary-light:#eef0f2;--ifm-color-secondary-lighter:#f1f2f5;--ifm-color-secondary-lightest:#f5f6f8;--ifm-color-secondary-contrast-background:#fdfdfe;--ifm-color-secondary-contrast-foreground:#474748;--ifm-color-success-dark:#009400;--ifm-color-success-darker:#008b00;--ifm-color-success-darkest:#007300;--ifm-color-success-light:#26b226;--ifm-color-success-lighter:#4dbf4d;--ifm-color-success-lightest:#80d280;--ifm-color-success-contrast-background:#e6f6e6;--ifm-color-success-contrast-foreground:#003100;--ifm-color-info-dark:#4cb3d4;--ifm-color-info-darker:#47a9c9;--ifm-color-info-darkest:#3b8ba5;--ifm-color-info-light:#6ecfef;--ifm-color-info-lighter:#87d8f2;--ifm-color-info-lightest:#aae3f6;--ifm-color-info-contrast-background:#eef9fd;--ifm-color-info-contrast-foreground:#193c47;--ifm-color-warning-dark:#e6a700;--ifm-color-warning-darker:#d99e00;--ifm-color-warning-darkest:#b38200;--ifm-color-warning-light:#ffc426;--ifm-color-warning-lighter:#ffcf4d;--ifm-color-warning-lightest:#ffdd80;--ifm-color-warning-contrast-background:#fff8e6;--ifm-color-warning-contrast-foreground:#4d3800;--ifm-color-danger-dark:#e13238;--ifm-color-danger-darker:#d53035;--ifm-color-danger-darkest:#af272b;--ifm-color-danger-light:#fb565b;--ifm-color-danger-lighter:#fb7478;--ifm-color-danger-lightest:#fd9c9f;--ifm-color-danger-contrast-background:#ffebec;--ifm-color-danger-contrast-foreground:#4b1113;--ifm-color-white:#fff;--ifm-color-black:#000;--ifm-color-gray-0:var(--ifm-color-white);--ifm-color-gray-100:#f5f6f7;--ifm-color-gray-200:#ebedf0;--ifm-color-gray-300:#dadde1;--ifm-color-gray-400:#ccd0d5;--ifm-color-gray-500:#bec3c9;--ifm-color-gray-600:#8d949e;--ifm-color-gray-700:#606770;--ifm-color-gray-800:#444950;--ifm-color-gray-900:#1c1e21;--ifm-color-gray-1000:var(--ifm-color-black);--ifm-color-emphasis-0:var(--ifm-color-gray-0);--ifm-color-emphasis-100:var(--ifm-color-gray-100);--ifm-color-emphasis-200:var(--ifm-color-gray-200);--ifm-color-emphasis-300:var(--ifm-color-gray-300);--ifm-color-emphasis-400:var(--ifm-color-gray-400);--ifm-color-emphasis-600:var(--ifm-color-gray-600);--ifm-color-emphasis-700:var(--ifm-color-gray-700);--ifm-color-emphasis-800:var(--ifm-color-gray-800);--ifm-color-emphasis-900:var(--ifm-color-gray-900);--ifm-color-emphasis-1000:var(--ifm-color-gray-1000);--ifm-color-content:var(--ifm-color-emphasis-900);--ifm-color-content-inverse:var(--ifm-color-emphasis-0);--ifm-color-content-secondary:#525860;--ifm-background-color:#0000;--ifm-background-surface-color:var(--ifm-color-content-inverse);--ifm-global-border-width:1px;--ifm-global-radius:0.4rem;--ifm-hover-overlay:#0000000d;--ifm-font-color-base:var(--ifm-color-content);--ifm-font-color-base-inverse:var(--ifm-color-content-inverse);--ifm-font-color-secondary:var(--ifm-color-content-secondary);--ifm-font-family-base:system-ui,-apple-system,Segoe UI,Roboto,Ubuntu,Cantarell,Noto Sans,sans-serif,BlinkMacSystemFont,"Segoe UI",Helvetica,Arial,sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol";--ifm-font-family-monospace:SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",monospace;--ifm-font-size-base:100%;--ifm-font-weight-light:300;--ifm-font-weight-normal:400;--ifm-font-weight-semibold:500;--ifm-font-weight-bold:700;--ifm-font-weight-base:var(--ifm-font-weight-normal);--ifm-line-height-base:1.65;--ifm-global-spacing:1rem;--ifm-spacing-vertical:var(--ifm-global-spacing);--ifm-spacing-horizontal:var(--ifm-global-spacing);--ifm-transition-fast:200ms;--ifm-transition-slow:400ms;--ifm-transition-timing-default:cubic-bezier(0.08,0.52,0.52,1);--ifm-global-shadow-lw:0 1px 2px 0 #0000001a;--ifm-global-shadow-md:0 5px 40px #0003;--ifm-global-shadow-tl:0 12px 28px 0 #0003,0 2px 4px 0 #0000001a;--ifm-z-index-dropdown:100;--ifm-z-index-fixed:200;--ifm-z-index-overlay:400;--ifm-container-width:1140px;--ifm-container-width-xl:1320px;--ifm-code-background:#f6f7f8;--ifm-code-border-radius:var(--ifm-global-radius);--ifm-code-font-size:90%;--ifm-code-padding-horizontal:0.1rem;--ifm-code-padding-vertical:0.1rem;--ifm-pre-background:var(--ifm-code-background);--ifm-pre-border-radius:var(--ifm-code-border-radius);--ifm-pre-color:inherit;--ifm-pre-line-height:1.45;--ifm-pre-padding:1rem;--ifm-heading-color:inherit;--ifm-heading-margin-top:0;--ifm-heading-margin-bottom:var(--ifm-spacing-vertical);--ifm-heading-font-family:var(--ifm-font-family-base);--ifm-heading-font-weight:var(--ifm-font-weight-bold);--ifm-heading-line-height:1.25;--ifm-h1-font-size:2rem;--ifm-h2-font-size:1.5rem;--ifm-h3-font-size:1.25rem;--ifm-h4-font-size:1rem;--ifm-h5-font-size:0.875rem;--ifm-h6-font-size:0.85rem;--ifm-image-alignment-padding:1.25rem;--ifm-leading-desktop:1.25;--ifm-leading:calc(var(--ifm-leading-desktop)*1rem);--ifm-list-left-padding:2rem;--ifm-list-margin:1rem;--ifm-list-item-margin:0.25rem;--ifm-list-paragraph-margin:1rem;--ifm-table-cell-padding:0.75rem;--ifm-table-background:#0000;--ifm-table-stripe-background:#00000008;--ifm-table-border-color:var(--ifm-color-emphasis-300);--ifm-table-head-background:inherit;--ifm-table-head-color:inherit;--ifm-table-head-font-weight:var(--ifm-font-weight-bold);--ifm-table-cell-color:inherit;--ifm-link-color:var(--ifm-color-primary);--ifm-link-decoration:none;--ifm-link-hover-color:var(--ifm-link-color);--ifm-link-hover-decoration:underline;--ifm-paragraph-margin-bottom:var(--ifm-leading);--ifm-blockquote-font-size:var(--ifm-font-size-base);--ifm-blockquote-border-left-width:2px;--ifm-blockquote-padding-horizontal:var(--ifm-spacing-horizontal);--ifm-blockquote-padding-vertical:0;--ifm-blockquote-shadow:none;--ifm-blockquote-color:var(--ifm-color-emphasis-800);--ifm-blockquote-border-color:var(--ifm-color-emphasis-300);--ifm-hr-background-color:var(--ifm-color-emphasis-500);--ifm-hr-height:1px;--ifm-hr-margin-vertical:1.5rem;--ifm-scrollbar-size:7px;--ifm-scrollbar-track-background-color:#f1f1f1;--ifm-scrollbar-thumb-background-color:silver;--ifm-scrollbar-thumb-hover-background-color:#a7a7a7;--ifm-alert-background-color:inherit;--ifm-alert-border-color:inherit;--ifm-alert-border-radius:var(--ifm-global-radius);--ifm-alert-border-width:0px;--ifm-alert-border-left-width:5px;--ifm-alert-color:var(--ifm-font-color-base);--ifm-alert-padding-horizontal:var(--ifm-spacing-horizontal);--ifm-alert-padding-vertical:var(--ifm-spacing-vertical);--ifm-alert-shadow:var(--ifm-global-shadow-lw);--ifm-avatar-intro-margin:1rem;--ifm-avatar-intro-alignment:inherit;--ifm-avatar-photo-size:3rem;--ifm-badge-background-color:inherit;--ifm-badge-border-color:inherit;--ifm-badge-border-radius:var(--ifm-global-radius);--ifm-badge-border-width:var(--ifm-global-border-width);--ifm-badge-color:var(--ifm-color-white);--ifm-badge-padding-horizontal:calc(var(--ifm-spacing-horizontal)*0.5);--ifm-badge-padding-vertical:calc(var(--ifm-spacing-vertical)*0.25);--ifm-breadcrumb-border-radius:1.5rem;--ifm-breadcrumb-spacing:0.5rem;--ifm-breadcrumb-color-active:var(--ifm-color-primary);--ifm-breadcrumb-item-background-active:var(--ifm-hover-overlay);--ifm-breadcrumb-padding-horizontal:0.8rem;--ifm-breadcrumb-padding-vertical:0.4rem;--ifm-breadcrumb-size-multiplier:1;--ifm-breadcrumb-separator:url('data:image/svg+xml;utf8,');--ifm-breadcrumb-separator-filter:none;--ifm-breadcrumb-separator-size:0.5rem;--ifm-breadcrumb-separator-size-multiplier:1.25;--ifm-button-background-color:inherit;--ifm-button-border-color:var(--ifm-button-background-color);--ifm-button-border-width:var(--ifm-global-border-width);--ifm-button-font-weight:var(--ifm-font-weight-bold);--ifm-button-padding-horizontal:1.5rem;--ifm-button-padding-vertical:0.375rem;--ifm-button-size-multiplier:1;--ifm-button-transition-duration:var(--ifm-transition-fast);--ifm-button-border-radius:calc(var(--ifm-global-radius)*var(--ifm-button-size-multiplier));--ifm-button-group-spacing:2px;--ifm-card-background-color:var(--ifm-background-surface-color);--ifm-card-border-radius:calc(var(--ifm-global-radius)*2);--ifm-card-horizontal-spacing:var(--ifm-global-spacing);--ifm-card-vertical-spacing:var(--ifm-global-spacing);--ifm-toc-border-color:var(--ifm-color-emphasis-300);--ifm-toc-link-color:var(--ifm-color-content-secondary);--ifm-toc-padding-vertical:0.5rem;--ifm-toc-padding-horizontal:0.5rem;--ifm-dropdown-background-color:var(--ifm-background-surface-color);--ifm-dropdown-font-weight:var(--ifm-font-weight-semibold);--ifm-dropdown-link-color:var(--ifm-font-color-base);--ifm-dropdown-hover-background-color:var(--ifm-hover-overlay);--ifm-footer-background-color:var(--ifm-color-emphasis-100);--ifm-footer-color:inherit;--ifm-footer-link-color:var(--ifm-color-emphasis-700);--ifm-footer-link-hover-color:var(--ifm-color-primary);--ifm-footer-link-horizontal-spacing:0.5rem;--ifm-footer-padding-horizontal:calc(var(--ifm-spacing-horizontal)*2);--ifm-footer-padding-vertical:calc(var(--ifm-spacing-vertical)*2);--ifm-footer-title-color:inherit;--ifm-footer-logo-max-width:min(30rem,90vw);--ifm-hero-background-color:var(--ifm-background-surface-color);--ifm-hero-text-color:var(--ifm-color-emphasis-800);--ifm-menu-color:var(--ifm-color-emphasis-700);--ifm-menu-color-active:var(--ifm-color-primary);--ifm-menu-color-background-active:var(--ifm-hover-overlay);--ifm-menu-color-background-hover:var(--ifm-hover-overlay);--ifm-menu-link-padding-horizontal:0.75rem;--ifm-menu-link-padding-vertical:0.375rem;--ifm-menu-link-sublist-icon:url('data:image/svg+xml;utf8,');--ifm-menu-link-sublist-icon-filter:none;--ifm-navbar-background-color:var(--ifm-background-surface-color);--ifm-navbar-height:3.75rem;--ifm-navbar-item-padding-horizontal:0.75rem;--ifm-navbar-item-padding-vertical:0.25rem;--ifm-navbar-link-color:var(--ifm-font-color-base);--ifm-navbar-link-active-color:var(--ifm-link-color);--ifm-navbar-padding-horizontal:var(--ifm-spacing-horizontal);--ifm-navbar-padding-vertical:calc(var(--ifm-spacing-vertical)*0.5);--ifm-navbar-shadow:var(--ifm-global-shadow-lw);--ifm-navbar-search-input-background-color:var(--ifm-color-emphasis-200);--ifm-navbar-search-input-color:var(--ifm-color-emphasis-800);--ifm-navbar-search-input-placeholder-color:var(--ifm-color-emphasis-500);--ifm-navbar-search-input-icon:url('data:image/svg+xml;utf8,');--ifm-navbar-sidebar-width:83vw;--ifm-pagination-border-radius:var(--ifm-global-radius);--ifm-pagination-color-active:var(--ifm-color-primary);--ifm-pagination-font-size:1rem;--ifm-pagination-item-active-background:var(--ifm-hover-overlay);--ifm-pagination-page-spacing:0.2em;--ifm-pagination-padding-horizontal:calc(var(--ifm-spacing-horizontal)*1);--ifm-pagination-padding-vertical:calc(var(--ifm-spacing-vertical)*0.25);--ifm-pagination-nav-border-radius:var(--ifm-global-radius);--ifm-pagination-nav-color-hover:var(--ifm-color-primary);--ifm-pills-color-active:var(--ifm-color-primary);--ifm-pills-color-background-active:var(--ifm-hover-overlay);--ifm-pills-spacing:0.125rem;--ifm-tabs-color:var(--ifm-font-color-secondary);--ifm-tabs-color-active:var(--ifm-color-primary);--ifm-tabs-color-active-border:var(--ifm-tabs-color-active);--ifm-tabs-padding-horizontal:1rem;--ifm-tabs-padding-vertical:1rem;--docusaurus-progress-bar-color:var(--ifm-color-primary);--ifm-color-primary:#4c4ddc;--ifm-color-primary-dark:#403fc2;--ifm-color-primary-darker:#35359d;--ifm-color-primary-darkest:#30317c;--ifm-color-primary-light:#626ae9;--ifm-color-primary-lighter:#828ef1;--ifm-color-primary-lightest:#a7b5f6;--ifm-code-font-size:95%;--docusaurus-highlighted-code-line-bg:#0000001a;--ifm-h1-font-size:40px;--ifm-table-cell-padding:6px 12px;--ifm-table-head-background:#f7fafc;--ifm-table-border-color:#d8dbe6;--ifm-table-stripe-background:#fff;--docsearch-primary-color:#5468ff;--docsearch-text-color:#1c1e21;--docsearch-spacing:12px;--docsearch-icon-stroke-width:1.4;--docsearch-highlight-color:var(--docsearch-primary-color);--docsearch-muted-color:#969faf;--docsearch-container-background:#656c85cc;--docsearch-logo-color:#5468ff;--docsearch-modal-width:560px;--docsearch-modal-height:600px;--docsearch-modal-background:#f5f6f7;--docsearch-modal-shadow:inset 1px 1px 0 0 #ffffff80,0 3px 8px 0 #555a64;--docsearch-searchbox-height:56px;--docsearch-searchbox-background:#ebedf0;--docsearch-searchbox-focus-background:#fff;--docsearch-searchbox-shadow:inset 0 0 0 2px var(--docsearch-primary-color);--docsearch-hit-height:56px;--docsearch-hit-color:#444950;--docsearch-hit-active-color:#fff;--docsearch-hit-background:#fff;--docsearch-hit-shadow:0 1px 3px 0 #d4d9e1;--docsearch-key-gradient:linear-gradient(-225deg,#d5dbe4,#f8f8f8);--docsearch-key-shadow:inset 0 -2px 0 0 #cdcde6,inset 0 0 1px 1px #fff,0 1px 2px 1px #1e235a66;--docsearch-key-pressed-shadow:inset 0 -2px 0 0 #cdcde6,inset 0 0 1px 1px #fff,0 1px 1px 0 #1e235a66;--docsearch-footer-height:44px;--docsearch-footer-background:#fff;--docsearch-footer-shadow:0 -1px 0 0 #e0e3e8,0 -3px 6px 0 #45629b1f;--docsearch-primary-color:var(--ifm-color-primary);--docsearch-text-color:var(--ifm-font-color-base);--docusaurus-announcement-bar-height:auto;--docusaurus-tag-list-border:var(--ifm-color-emphasis-300);--docusaurus-collapse-button-bg:#0000;--docusaurus-collapse-button-bg-hover:#0000001a;--doc-sidebar-width:300px;--doc-sidebar-hidden-width:30px}.badge--danger,.badge--info,.badge--primary,.badge--secondary,.badge--success,.badge--warning{--ifm-badge-border-color:var(--ifm-badge-background-color)}.button--link,.button--outline{--ifm-button-background-color:#0000}html{-webkit-font-smoothing:antialiased;-webkit-text-size-adjust:100%;text-size-adjust:100%;background-color:var(--ifm-background-color);color:var(--ifm-font-color-base);color-scheme:var(--ifm-color-scheme);font:var(--ifm-font-size-base)/var(--ifm-line-height-base) var(--ifm-font-family-base);text-rendering:optimizelegibility}iframe{border:0;color-scheme:auto}.container{margin:0 auto;max-width:var(--ifm-container-width)}.container--fluid{max-width:inherit}.row{display:flex;flex-wrap:wrap;margin:0 calc(var(--ifm-spacing-horizontal)*-1)}.margin-bottom--none,.margin-vert--none,.markdown>:last-child{margin-bottom:0!important}.margin-top--none,.margin-vert--none,.tabItem_LNqP{margin-top:0!important}.row--no-gutters{margin-left:0;margin-right:0}.margin-horiz--none,.margin-right--none{margin-right:0!important}.row--no-gutters>.col{padding-left:0;padding-right:0}.row--align-top{align-items:flex-start}.row--align-bottom{align-items:flex-end}.menuExternalLink_NmtK,.row--align-center{align-items:center}.row--align-stretch{align-items:stretch}.row--align-baseline{align-items:baseline}.col{--ifm-col-width:100%;flex:1 0;margin-left:0;max-width:var(--ifm-col-width)}.padding-bottom--none,.padding-vert--none{padding-bottom:0!important}.padding-top--none,.padding-vert--none{padding-top:0!important}.padding-horiz--none,.padding-left--none{padding-left:0!important}.padding-horiz--none,.padding-right--none{padding-right:0!important}.col[class*=col--]{flex:0 0 var(--ifm-col-width)}.col--1{--ifm-col-width:8.33333%}.col--offset-1{margin-left:8.33333%}.col--2{--ifm-col-width:16.66667%}.col--offset-2{margin-left:16.66667%}.col--3{--ifm-col-width:25%}.col--offset-3{margin-left:25%}.col--4{--ifm-col-width:33.33333%}.col--offset-4{margin-left:33.33333%}.col--5{--ifm-col-width:41.66667%}.col--offset-5{margin-left:41.66667%}.col--6{--ifm-col-width:50%}.col--offset-6{margin-left:50%}.col--7{--ifm-col-width:58.33333%}.col--offset-7{margin-left:58.33333%}.col--8{--ifm-col-width:66.66667%}.col--offset-8{margin-left:66.66667%}.col--9{--ifm-col-width:75%}.col--offset-9{margin-left:75%}.col--10{--ifm-col-width:83.33333%}.col--offset-10{margin-left:83.33333%}.col--11{--ifm-col-width:91.66667%}.col--offset-11{margin-left:91.66667%}.col--12{--ifm-col-width:100%}.col--offset-12{margin-left:100%}.margin-horiz--none,.margin-left--none{margin-left:0!important}.margin--none{margin:0!important}.margin-bottom--xs,.margin-vert--xs{margin-bottom:.25rem!important}.margin-top--xs,.margin-vert--xs{margin-top:.25rem!important}.margin-horiz--xs,.margin-left--xs{margin-left:.25rem!important}.margin-horiz--xs,.margin-right--xs{margin-right:.25rem!important}.margin--xs{margin:.25rem!important}.margin-bottom--sm,.margin-vert--sm{margin-bottom:.5rem!important}.margin-top--sm,.margin-vert--sm{margin-top:.5rem!important}.margin-horiz--sm,.margin-left--sm{margin-left:.5rem!important}.margin-horiz--sm,.margin-right--sm{margin-right:.5rem!important}.margin--sm{margin:.5rem!important}.margin-bottom--md,.margin-vert--md{margin-bottom:1rem!important}.margin-top--md,.margin-vert--md{margin-top:1rem!important}.margin-horiz--md,.margin-left--md{margin-left:1rem!important}.margin-horiz--md,.margin-right--md{margin-right:1rem!important}.margin--md{margin:1rem!important}.margin-bottom--lg,.margin-vert--lg{margin-bottom:2rem!important}.margin-top--lg,.margin-vert--lg{margin-top:2rem!important}.margin-horiz--lg,.margin-left--lg{margin-left:2rem!important}.margin-horiz--lg,.margin-right--lg{margin-right:2rem!important}.margin--lg{margin:2rem!important}.margin-bottom--xl,.margin-vert--xl{margin-bottom:5rem!important}.margin-top--xl,.margin-vert--xl{margin-top:5rem!important}.margin-horiz--xl,.margin-left--xl{margin-left:5rem!important}.margin-horiz--xl,.margin-right--xl{margin-right:5rem!important}.margin--xl{margin:5rem!important}.padding--none{padding:0!important}.padding-bottom--xs,.padding-vert--xs{padding-bottom:.25rem!important}.padding-top--xs,.padding-vert--xs{padding-top:.25rem!important}.padding-horiz--xs,.padding-left--xs{padding-left:.25rem!important}.padding-horiz--xs,.padding-right--xs{padding-right:.25rem!important}.padding--xs{padding:.25rem!important}.padding-bottom--sm,.padding-vert--sm{padding-bottom:.5rem!important}.padding-top--sm,.padding-vert--sm{padding-top:.5rem!important}.padding-horiz--sm,.padding-left--sm{padding-left:.5rem!important}.padding-horiz--sm,.padding-right--sm{padding-right:.5rem!important}.padding--sm{padding:.5rem!important}.padding-bottom--md,.padding-vert--md{padding-bottom:1rem!important}.padding-top--md,.padding-vert--md{padding-top:1rem!important}.padding-horiz--md,.padding-left--md{padding-left:1rem!important}.padding-horiz--md,.padding-right--md{padding-right:1rem!important}.padding--md{padding:1rem!important}.padding-bottom--lg,.padding-vert--lg{padding-bottom:2rem!important}.padding-top--lg,.padding-vert--lg{padding-top:2rem!important}.padding-horiz--lg,.padding-left--lg{padding-left:2rem!important}.padding-horiz--lg,.padding-right--lg{padding-right:2rem!important}.padding--lg{padding:2rem!important}.padding-bottom--xl,.padding-vert--xl{padding-bottom:5rem!important}.padding-top--xl,.padding-vert--xl{padding-top:5rem!important}.padding-horiz--xl,.padding-left--xl{padding-left:5rem!important}.padding-horiz--xl,.padding-right--xl{padding-right:5rem!important}.padding--xl{padding:5rem!important}code{background-color:var(--ifm-code-background);border:.1rem solid #0000001a;border-radius:var(--ifm-code-border-radius);font-family:var(--ifm-font-family-monospace);font-size:var(--ifm-code-font-size);padding:var(--ifm-code-padding-vertical) var(--ifm-code-padding-horizontal)}a code{color:inherit}pre{background-color:var(--ifm-pre-background);border-radius:var(--ifm-pre-border-radius);color:var(--ifm-pre-color);font:var(--ifm-code-font-size)/var(--ifm-pre-line-height) var(--ifm-font-family-monospace);padding:var(--ifm-pre-padding)}pre code{background-color:initial;border:none;font-size:100%;line-height:inherit;padding:0}kbd{background-color:var(--ifm-color-emphasis-0);border:1px solid var(--ifm-color-emphasis-400);border-radius:.2rem;box-shadow:inset 0 -1px 0 var(--ifm-color-emphasis-400);color:var(--ifm-color-emphasis-800);font:80% var(--ifm-font-family-monospace);padding:.15rem .3rem}h1,h2,h3,h4,h5,h6{color:var(--ifm-heading-color);font-family:var(--ifm-heading-font-family);font-weight:var(--ifm-heading-font-weight);line-height:var(--ifm-heading-line-height);margin:var(--ifm-heading-margin-top) 0 var(--ifm-heading-margin-bottom) 0}h1{font-size:var(--ifm-h1-font-size)}h2{font-size:var(--ifm-h2-font-size)}h3{font-size:var(--ifm-h3-font-size)}h4{font-size:var(--ifm-h4-font-size)}h5{font-size:var(--ifm-h5-font-size)}h6{font-size:var(--ifm-h6-font-size)}img{max-width:100%;display:block;margin-left:auto;margin-right:auto}img[align=right]{padding-left:var(--image-alignment-padding)}img[align=left]{padding-right:var(--image-alignment-padding)}.markdown{--ifm-h1-vertical-rhythm-top:3;--ifm-h2-vertical-rhythm-top:2;--ifm-h3-vertical-rhythm-top:1.5;--ifm-heading-vertical-rhythm-top:1.25;--ifm-h1-vertical-rhythm-bottom:1.25;--ifm-heading-vertical-rhythm-bottom:1}.markdown:after,.markdown:before{content:"";display:table}.markdown:after{clear:both}.markdown h1:first-child{--ifm-h1-font-size:3rem;margin-bottom:calc(var(--ifm-h1-vertical-rhythm-bottom)*var(--ifm-leading));--ifm-h1-font-size:40px}.markdown>h2{--ifm-h2-font-size:2rem;margin-top:calc(var(--ifm-h2-vertical-rhythm-top)*var(--ifm-leading))}.markdown>h3{--ifm-h3-font-size:1.5rem;margin-top:calc(var(--ifm-h3-vertical-rhythm-top)*var(--ifm-leading))}.markdown>h4,.markdown>h5,.markdown>h6{margin-top:calc(var(--ifm-heading-vertical-rhythm-top)*var(--ifm-leading))}.markdown>p,.markdown>pre,.markdown>ul,.tabList__CuJ{margin-bottom:var(--ifm-leading)}.markdown li>p{margin-top:var(--ifm-list-paragraph-margin)}.markdown li+li{margin-top:var(--ifm-list-item-margin)}ol,ul{margin:0 0 var(--ifm-list-margin);padding-left:var(--ifm-list-left-padding)}ol ol,ul ol{list-style-type:lower-roman}ol ol ol,ol ul ol,ul ol ol,ul ul ol{list-style-type:lower-alpha}table{border-collapse:collapse;margin-bottom:var(--ifm-spacing-vertical);border-collapse:initial;border-spacing:0;display:table}table thead tr{border-bottom:2px solid var(--ifm-table-border-color)}table thead,table tr:nth-child(2n){background-color:var(--ifm-table-stripe-background)}table tr{background-color:var(--ifm-table-background);border-top:var(--ifm-table-border-width) solid var(--ifm-table-border-color)}table td,table th{border:var(--ifm-table-border-width) solid var(--ifm-table-border-color);padding:var(--ifm-table-cell-padding)}table th{background-color:var(--ifm-table-head-background);color:var(--ifm-table-head-color);font-weight:var(--ifm-table-head-font-weight)}table td{color:var(--ifm-table-cell-color)}strong{font-weight:var(--ifm-font-weight-bold)}a{color:var(--ifm-link-color);text-decoration:var(--ifm-link-decoration)}a:hover{color:var(--ifm-link-hover-color);text-decoration:var(--ifm-link-hover-decoration)}.button:hover,.text--no-decoration,.text--no-decoration:hover,a:not([href]){text-decoration:none}p{margin:0 0 var(--ifm-paragraph-margin-bottom)}blockquote{border-left:var(--ifm-blockquote-border-left-width) solid var(--ifm-blockquote-border-color);box-shadow:var(--ifm-blockquote-shadow);color:var(--ifm-blockquote-color);font-size:var(--ifm-blockquote-font-size);padding:var(--ifm-blockquote-padding-vertical) var(--ifm-blockquote-padding-horizontal)}blockquote>:first-child{margin-top:0}blockquote>:last-child{margin-bottom:0}hr{background-color:var(--ifm-hr-background-color);border:0;height:var(--ifm-hr-height);margin:var(--ifm-hr-margin-vertical) 0}.shadow--lw{box-shadow:var(--ifm-global-shadow-lw)!important}.shadow--md{box-shadow:var(--ifm-global-shadow-md)!important}.shadow--tl{box-shadow:var(--ifm-global-shadow-tl)!important}.text--primary,.wordWrapButtonEnabled_EoeP .wordWrapButtonIcon_Bwma{color:var(--ifm-color-primary)}.text--secondary{color:var(--ifm-color-secondary)}.text--success{color:var(--ifm-color-success)}.text--info{color:var(--ifm-color-info)}.text--warning{color:var(--ifm-color-warning)}.text--danger{color:var(--ifm-color-danger)}.text--center{text-align:center}.text--left{text-align:left}.text--justify{text-align:justify}.text--right{text-align:right}.text--capitalize{text-transform:capitalize}.text--lowercase{text-transform:lowercase}.admonitionHeading_tbUL,.alert__heading,.text--uppercase{text-transform:uppercase}.text--light{font-weight:var(--ifm-font-weight-light)}.text--normal{font-weight:var(--ifm-font-weight-normal)}.text--semibold{font-weight:var(--ifm-font-weight-semibold)}.text--bold{font-weight:var(--ifm-font-weight-bold)}.text--italic{font-style:italic}.text--truncate{overflow:hidden;text-overflow:ellipsis;white-space:nowrap}.text--break{word-wrap:break-word!important;word-break:break-word!important}.clean-btn{background:none;border:none;color:inherit;cursor:pointer;font-family:inherit;padding:0}.alert,.alert .close{color:var(--ifm-alert-foreground-color)}.clean-list{list-style:none;padding-left:0}.alert--primary{--ifm-alert-background-color:var(--ifm-color-primary-contrast-background);--ifm-alert-background-color-highlight:#3578e526;--ifm-alert-foreground-color:var(--ifm-color-primary-contrast-foreground);--ifm-alert-border-color:var(--ifm-color-primary-dark)}.alert--secondary{--ifm-alert-background-color:var(--ifm-color-secondary-contrast-background);--ifm-alert-background-color-highlight:#ebedf026;--ifm-alert-foreground-color:var(--ifm-color-secondary-contrast-foreground);--ifm-alert-border-color:var(--ifm-color-secondary-dark)}.alert--success{--ifm-alert-background-color:var(--ifm-color-success-contrast-background);--ifm-alert-background-color-highlight:#00a40026;--ifm-alert-foreground-color:var(--ifm-color-success-contrast-foreground);--ifm-alert-border-color:var(--ifm-color-success-dark)}.alert--info{--ifm-alert-background-color:var(--ifm-color-info-contrast-background);--ifm-alert-background-color-highlight:#54c7ec26;--ifm-alert-foreground-color:var(--ifm-color-info-contrast-foreground);--ifm-alert-border-color:var(--ifm-color-info-dark)}.alert--warning{--ifm-alert-background-color:var(--ifm-color-warning-contrast-background);--ifm-alert-background-color-highlight:#ffba0026;--ifm-alert-foreground-color:var(--ifm-color-warning-contrast-foreground);--ifm-alert-border-color:var(--ifm-color-warning-dark)}.alert--danger{--ifm-alert-background-color:var(--ifm-color-danger-contrast-background);--ifm-alert-background-color-highlight:#fa383e26;--ifm-alert-foreground-color:var(--ifm-color-danger-contrast-foreground);--ifm-alert-border-color:var(--ifm-color-danger-dark)}.alert{--ifm-code-background:var(--ifm-alert-background-color-highlight);--ifm-link-color:var(--ifm-alert-foreground-color);--ifm-link-hover-color:var(--ifm-alert-foreground-color);--ifm-link-decoration:underline;--ifm-tabs-color:var(--ifm-alert-foreground-color);--ifm-tabs-color-active:var(--ifm-alert-foreground-color);--ifm-tabs-color-active-border:var(--ifm-alert-border-color);background-color:var(--ifm-alert-background-color);border:var(--ifm-alert-border-width) solid var(--ifm-alert-border-color);border-left-width:var(--ifm-alert-border-left-width);border-radius:var(--ifm-alert-border-radius);box-shadow:var(--ifm-alert-shadow);padding:var(--ifm-alert-padding-vertical) var(--ifm-alert-padding-horizontal)}.alert__heading{align-items:center;display:flex;font:700 var(--ifm-h5-font-size)/var(--ifm-heading-line-height) var(--ifm-heading-font-family);margin-bottom:.5rem}.alert__icon{display:inline-flex;margin-right:.4em}.alert__icon svg{fill:var(--ifm-alert-foreground-color);stroke:var(--ifm-alert-foreground-color);stroke-width:0}.alert .close{margin:calc(var(--ifm-alert-padding-vertical)*-1) calc(var(--ifm-alert-padding-horizontal)*-1) 0 0;opacity:.75}.alert .close:focus,.alert .close:hover{opacity:1}.alert a{text-decoration-color:var(--ifm-alert-border-color)}.alert a:hover{text-decoration-thickness:2px}.avatar{column-gap:var(--ifm-avatar-intro-margin);display:flex}.avatar__photo{border-radius:50%;display:block;height:var(--ifm-avatar-photo-size);overflow:hidden;width:var(--ifm-avatar-photo-size)}.card--full-height,.navbar__logo img,body,html{height:100%}.avatar__photo--sm{--ifm-avatar-photo-size:2rem}.avatar__photo--lg{--ifm-avatar-photo-size:4rem}.avatar__photo--xl{--ifm-avatar-photo-size:6rem}.avatar__intro{display:flex;flex:1 1;flex-direction:column;justify-content:center;text-align:var(--ifm-avatar-intro-alignment)}.badge,.breadcrumbs__item,.breadcrumbs__link,.button,.dropdown>.navbar__link:after{display:inline-block}.avatar__name{font:700 var(--ifm-h4-font-size)/var(--ifm-heading-line-height) var(--ifm-font-family-base)}.avatar__subtitle{margin-top:.25rem}.avatar--vertical{--ifm-avatar-intro-alignment:center;--ifm-avatar-intro-margin:0.5rem;align-items:center;flex-direction:column}.badge{background-color:var(--ifm-badge-background-color);border-radius:var(--ifm-badge-border-radius);color:var(--ifm-badge-color)}.badge--primary{--ifm-badge-background-color:var(--ifm-color-primary)}.badge--secondary{--ifm-badge-background-color:var(--ifm-color-secondary);color:var(--ifm-color-black)}.breadcrumbs__link,.button.button--secondary.button--outline:not(.button--active):not(:hover){color:var(--ifm-font-color-base)}.badge--success{--ifm-badge-background-color:var(--ifm-color-success)}.badge--info{--ifm-badge-background-color:var(--ifm-color-info)}.badge--warning{--ifm-badge-background-color:var(--ifm-color-warning)}.badge--danger{--ifm-badge-background-color:var(--ifm-color-danger)}.breadcrumbs{margin-bottom:0;padding-left:0}.breadcrumbs__item:not(:last-child):after{background:var(--ifm-breadcrumb-separator) center;content:" ";display:inline-block;filter:var(--ifm-breadcrumb-separator-filter);height:calc(var(--ifm-breadcrumb-separator-size)*var(--ifm-breadcrumb-size-multiplier)*var(--ifm-breadcrumb-separator-size-multiplier));margin:0 var(--ifm-breadcrumb-spacing);opacity:.5;width:calc(var(--ifm-breadcrumb-separator-size)*var(--ifm-breadcrumb-size-multiplier)*var(--ifm-breadcrumb-separator-size-multiplier))}.breadcrumbs__item--active .breadcrumbs__link{background:var(--ifm-breadcrumb-item-background-active);color:var(--ifm-breadcrumb-color-active)}.breadcrumbs__link{border-radius:var(--ifm-breadcrumb-border-radius);font-size:calc(1rem*var(--ifm-breadcrumb-size-multiplier));padding:calc(var(--ifm-breadcrumb-padding-vertical)*var(--ifm-breadcrumb-size-multiplier)) calc(var(--ifm-breadcrumb-padding-horizontal)*var(--ifm-breadcrumb-size-multiplier));transition-duration:var(--ifm-transition-fast);transition-property:background,color}.breadcrumbs__link:any-link:hover,.breadcrumbs__link:link:hover,.breadcrumbs__link:visited:hover,area[href].breadcrumbs__link:hover{background:var(--ifm-breadcrumb-item-background-active);text-decoration:none}.breadcrumbs--sm{--ifm-breadcrumb-size-multiplier:0.8}.breadcrumbs--lg{--ifm-breadcrumb-size-multiplier:1.2}.button{background-color:var(--ifm-button-background-color);border:var(--ifm-button-border-width) solid var(--ifm-button-border-color);border-radius:var(--ifm-button-border-radius);cursor:pointer;font-size:calc(.875rem*var(--ifm-button-size-multiplier));font-weight:var(--ifm-button-font-weight);line-height:1.5;padding:calc(var(--ifm-button-padding-vertical)*var(--ifm-button-size-multiplier)) calc(var(--ifm-button-padding-horizontal)*var(--ifm-button-size-multiplier));text-align:center;transition-duration:var(--ifm-button-transition-duration);transition-property:color,background,border-color;-webkit-user-select:none;user-select:none;white-space:nowrap}.button,.button:hover{color:var(--ifm-button-color)}.button--outline{--ifm-button-color:var(--ifm-button-border-color)}.button--outline:hover{--ifm-button-background-color:var(--ifm-button-border-color)}.button--link{--ifm-button-border-color:#0000;color:var(--ifm-link-color);text-decoration:var(--ifm-link-decoration)}.button--link.button--active,.button--link:active,.button--link:hover{color:var(--ifm-link-hover-color);text-decoration:var(--ifm-link-hover-decoration)}.button.disabled,.button:disabled,.button[disabled]{opacity:.65;pointer-events:none}.button--sm{--ifm-button-size-multiplier:0.8}.button--lg{--ifm-button-size-multiplier:1.35}.button--block{display:block;width:100%}.button.button--secondary{color:var(--ifm-color-gray-900)}:where(.button--primary){--ifm-button-background-color:var(--ifm-color-primary);--ifm-button-border-color:var(--ifm-color-primary)}:where(.button--primary):not(.button--outline):hover{--ifm-button-background-color:var(--ifm-color-primary-dark);--ifm-button-border-color:var(--ifm-color-primary-dark)}.button--primary.button--active,.button--primary:active{--ifm-button-background-color:var(--ifm-color-primary-darker);--ifm-button-border-color:var(--ifm-color-primary-darker)}:where(.button--secondary){--ifm-button-background-color:var(--ifm-color-secondary);--ifm-button-border-color:var(--ifm-color-secondary)}:where(.button--secondary):not(.button--outline):hover{--ifm-button-background-color:var(--ifm-color-secondary-dark);--ifm-button-border-color:var(--ifm-color-secondary-dark)}.button--secondary.button--active,.button--secondary:active{--ifm-button-background-color:var(--ifm-color-secondary-darker);--ifm-button-border-color:var(--ifm-color-secondary-darker)}:where(.button--success){--ifm-button-background-color:var(--ifm-color-success);--ifm-button-border-color:var(--ifm-color-success)}:where(.button--success):not(.button--outline):hover{--ifm-button-background-color:var(--ifm-color-success-dark);--ifm-button-border-color:var(--ifm-color-success-dark)}.button--success.button--active,.button--success:active{--ifm-button-background-color:var(--ifm-color-success-darker);--ifm-button-border-color:var(--ifm-color-success-darker)}:where(.button--info){--ifm-button-background-color:var(--ifm-color-info);--ifm-button-border-color:var(--ifm-color-info)}:where(.button--info):not(.button--outline):hover{--ifm-button-background-color:var(--ifm-color-info-dark);--ifm-button-border-color:var(--ifm-color-info-dark)}.button--info.button--active,.button--info:active{--ifm-button-background-color:var(--ifm-color-info-darker);--ifm-button-border-color:var(--ifm-color-info-darker)}:where(.button--warning){--ifm-button-background-color:var(--ifm-color-warning);--ifm-button-border-color:var(--ifm-color-warning)}:where(.button--warning):not(.button--outline):hover{--ifm-button-background-color:var(--ifm-color-warning-dark);--ifm-button-border-color:var(--ifm-color-warning-dark)}.button--warning.button--active,.button--warning:active{--ifm-button-background-color:var(--ifm-color-warning-darker);--ifm-button-border-color:var(--ifm-color-warning-darker)}:where(.button--danger){--ifm-button-background-color:var(--ifm-color-danger);--ifm-button-border-color:var(--ifm-color-danger)}:where(.button--danger):not(.button--outline):hover{--ifm-button-background-color:var(--ifm-color-danger-dark);--ifm-button-border-color:var(--ifm-color-danger-dark)}.button--danger.button--active,.button--danger:active{--ifm-button-background-color:var(--ifm-color-danger-darker);--ifm-button-border-color:var(--ifm-color-danger-darker)}.button-group{display:inline-flex;gap:var(--ifm-button-group-spacing)}.button-group>.button:not(:first-child){border-bottom-left-radius:0;border-top-left-radius:0}.button-group>.button:not(:last-child){border-bottom-right-radius:0;border-top-right-radius:0}.button-group--block{display:flex;justify-content:stretch}.button-group--block>.button{flex-grow:1}.card{background-color:var(--ifm-card-background-color);border-radius:var(--ifm-card-border-radius);box-shadow:var(--ifm-global-shadow-lw);display:flex;flex-direction:column;overflow:hidden}.card__image{padding-top:var(--ifm-card-vertical-spacing)}.card__image:first-child{padding-top:0}.card__body,.card__footer,.card__header{padding:var(--ifm-card-vertical-spacing) var(--ifm-card-horizontal-spacing)}.card__body:not(:last-child),.card__footer:not(:last-child),.card__header:not(:last-child){padding-bottom:0}.card__body>:last-child,.card__footer>:last-child,.card__header>:last-child{margin-bottom:0}.card__footer{margin-top:auto}.table-of-contents{font-size:.8rem;margin-bottom:0;padding:var(--ifm-toc-padding-vertical) 0}.table-of-contents,.table-of-contents ul{list-style:none;padding-left:var(--ifm-toc-padding-horizontal)}.table-of-contents li{margin:var(--ifm-toc-padding-vertical) var(--ifm-toc-padding-horizontal)}.table-of-contents__left-border{border-left:1px solid var(--ifm-toc-border-color)}.table-of-contents__link{color:var(--ifm-toc-link-color);display:block}.table-of-contents__link--active,.table-of-contents__link--active code,.table-of-contents__link:hover,.table-of-contents__link:hover code{color:var(--ifm-color-primary);text-decoration:none}.close{color:var(--ifm-color-black);float:right;font-size:1.5rem;font-weight:var(--ifm-font-weight-bold);line-height:1;opacity:.5;padding:1rem;transition:opacity var(--ifm-transition-fast) var(--ifm-transition-timing-default)}.close:hover{opacity:.7}.close:focus,.theme-code-block-highlighted-line .codeLineNumber_Tfdd:before{opacity:.8}.dropdown{display:inline-flex;font-weight:var(--ifm-dropdown-font-weight);position:relative;vertical-align:top}.dropdown--hoverable:hover .dropdown__menu,.dropdown--show .dropdown__menu{opacity:1;pointer-events:all;transform:translateY(-1px);visibility:visible}#nprogress,.dropdown__menu,.navbar__item.dropdown .navbar__link:not([href]){pointer-events:none}.dropdown--right .dropdown__menu{left:inherit;right:0}.dropdown--nocaret .navbar__link:after{content:none!important}.dropdown__menu{background-color:var(--ifm-dropdown-background-color);border-radius:var(--ifm-global-radius);box-shadow:var(--ifm-global-shadow-md);left:0;list-style:none;max-height:80vh;min-width:10rem;opacity:0;overflow-y:auto;padding:.5rem;position:absolute;top:calc(100% - var(--ifm-navbar-item-padding-vertical) + .3rem);transform:translateY(-.625rem);transition-duration:var(--ifm-transition-fast);transition-property:opacity,transform,visibility;transition-timing-function:var(--ifm-transition-timing-default);visibility:hidden;z-index:var(--ifm-z-index-dropdown)}.menu__caret,.menu__link,.menu__list-item-collapsible{border-radius:.25rem;transition:background var(--ifm-transition-fast) var(--ifm-transition-timing-default)}.dropdown__link{border-radius:.25rem;color:var(--ifm-dropdown-link-color);display:block;font-size:.875rem;margin-top:.2rem;padding:.25rem .5rem;white-space:nowrap}.dropdown__link--active,.dropdown__link:hover{background-color:var(--ifm-dropdown-hover-background-color);color:var(--ifm-dropdown-link-color);text-decoration:none}.dropdown__link--active,.dropdown__link--active:hover{--ifm-dropdown-link-color:var(--ifm-link-color)}.dropdown>.navbar__link:after{border-color:currentcolor #0000;border-style:solid;border-width:.4em .4em 0;content:"";margin-left:.3em;position:relative;top:2px;transform:translateY(-50%)}.footer{background-color:var(--ifm-footer-background-color);color:var(--ifm-footer-color);padding:var(--ifm-footer-padding-vertical) var(--ifm-footer-padding-horizontal)}.footer--dark{--ifm-footer-background-color:#303846;--ifm-footer-color:var(--ifm-footer-link-color);--ifm-footer-link-color:var(--ifm-color-secondary);--ifm-footer-title-color:var(--ifm-color-white)}.footer__links{margin-bottom:1rem}.footer__link-item{color:var(--ifm-footer-link-color);line-height:2}.footer__link-item:hover{color:var(--ifm-footer-link-hover-color)}.footer__link-separator{margin:0 var(--ifm-footer-link-horizontal-spacing)}.footer__logo{margin-top:1rem;max-width:var(--ifm-footer-logo-max-width)}.footer__title{color:var(--ifm-footer-title-color);font:700 var(--ifm-h4-font-size)/var(--ifm-heading-line-height) var(--ifm-font-family-base);margin-bottom:var(--ifm-heading-margin-bottom)}.menu,.navbar__link{font-weight:var(--ifm-font-weight-semibold)}.docItemContainer_Djhp article>:first-child,.docItemContainer_Djhp header+*,.footer__item{margin-top:0}.admonitionContent_S0QG>:last-child,.cardContainer_fWXF :last-child,.collapsibleContent_i85q>:last-child,.footer__items,.tabItem_Ymn6>:last-child{margin-bottom:0}.codeBlockStandalone_MEMb,[type=checkbox]{padding:0}.hero{align-items:center;background-color:var(--ifm-hero-background-color);color:var(--ifm-hero-text-color);display:flex;padding:4rem 2rem}.hero--primary{--ifm-hero-background-color:var(--ifm-color-primary);--ifm-hero-text-color:var(--ifm-font-color-base-inverse)}.hero--dark{--ifm-hero-background-color:#303846;--ifm-hero-text-color:var(--ifm-color-white)}.hero__title{font-size:3rem}.hero__subtitle{font-size:1.5rem}.menu__list{list-style:none;margin:0;padding-left:0}.menu__caret,.menu__link{padding:var(--ifm-menu-link-padding-vertical) var(--ifm-menu-link-padding-horizontal)}.menu__list .menu__list{flex:0 0 100%;margin-top:.25rem;padding-left:var(--ifm-menu-link-padding-horizontal)}.menu__list-item:not(:first-child){margin-top:.25rem}.menu__list-item--collapsed .menu__list{height:0;overflow:hidden}.details_lb9f[data-collapsed=false].isBrowser_bmU9>summary:before,.details_lb9f[open]:not(.isBrowser_bmU9)>summary:before,.menu__list-item--collapsed .menu__caret:before,.menu__list-item--collapsed .menu__link--sublist:after{transform:rotate(90deg)}.menu__list-item-collapsible{display:flex;flex-wrap:wrap;position:relative}.menu__caret:hover,.menu__link:hover,.menu__list-item-collapsible--active,.menu__list-item-collapsible:hover{background:var(--ifm-menu-color-background-hover)}.menu__list-item-collapsible .menu__link--active,.menu__list-item-collapsible .menu__link:hover{background:none!important}.menu__caret,.menu__link{align-items:center;display:flex}.navbar-sidebar,.navbar-sidebar__backdrop{bottom:0;opacity:0;transition-duration:var(--ifm-transition-fast);transition-timing-function:ease-in-out;top:0;left:0;visibility:hidden}.menu__link{color:var(--ifm-menu-color);flex:1;line-height:1.25}.menu__link:hover{color:var(--ifm-menu-color);text-decoration:none}.menu__caret:before,.menu__link--sublist-caret:after{height:1.25rem;transform:rotate(180deg);transition:transform var(--ifm-transition-fast) linear;width:1.25rem;filter:var(--ifm-menu-link-sublist-icon-filter);content:""}.menu__link--sublist-caret:after{background:var(--ifm-menu-link-sublist-icon) 50%/2rem 2rem;margin-left:auto;min-width:1.25rem}.menu__link--active,.menu__link--active:hover{color:var(--ifm-menu-color-active)}.navbar__brand,.navbar__link{color:var(--ifm-navbar-link-color)}.menu__link--active:not(.menu__link--sublist){background-color:var(--ifm-menu-color-background-active)}.menu__caret:before{background:var(--ifm-menu-link-sublist-icon) 50%/2rem 2rem}.navbar--dark,html[data-theme=dark]{--ifm-menu-link-sublist-icon-filter:invert(100%) sepia(94%) saturate(17%) hue-rotate(223deg) brightness(104%) contrast(98%)}.navbar{background-color:var(--ifm-navbar-background-color);box-shadow:var(--ifm-navbar-shadow);height:var(--ifm-navbar-height);padding:var(--ifm-navbar-padding-vertical) var(--ifm-navbar-padding-horizontal)}.navbar,.navbar>.container,.navbar>.container-fluid{display:flex}.navbar--fixed-top{position:sticky;top:0;z-index:var(--ifm-z-index-fixed)}.navbar__inner{display:flex;flex-wrap:wrap;justify-content:space-between;width:100%}.navbar__brand{align-items:center;display:flex;margin-right:1rem;min-width:0}.navbar__brand:hover{color:var(--ifm-navbar-link-hover-color);text-decoration:none}.announcementBarContent_xLdY,.navbar__title{flex:1 1 auto}.navbar__toggle{display:none;margin-right:.5rem}.navbar__logo{flex:0 0 auto;height:2rem;margin-right:.5rem}.navbar__items{align-items:center;display:flex;flex:1;min-width:0}.navbar__items--center{flex:0 0 auto}.navbar__items--center .navbar__brand{margin:0}.navbar__items--center+.navbar__items--right{flex:1}.navbar__items--right{flex:0 0 auto;justify-content:flex-end}.navbar__items--right>:last-child{padding-right:0}.navbar__item{display:inline-block;padding:var(--ifm-navbar-item-padding-vertical) var(--ifm-navbar-item-padding-horizontal)}.navbar__link--active,.navbar__link:hover{color:var(--ifm-navbar-link-hover-color);text-decoration:none}.navbar--dark,.navbar--primary{--ifm-menu-color:var(--ifm-color-gray-300);--ifm-navbar-link-color:var(--ifm-color-gray-100);--ifm-navbar-search-input-background-color:#ffffff1a;--ifm-navbar-search-input-placeholder-color:#ffffff80;color:var(--ifm-color-white)}.navbar--dark{--ifm-navbar-background-color:#242526;--ifm-menu-color-background-active:#ffffff0d;--ifm-navbar-search-input-color:var(--ifm-color-white)}.navbar--primary{--ifm-navbar-background-color:var(--ifm-color-primary);--ifm-navbar-link-hover-color:var(--ifm-color-white);--ifm-menu-color-active:var(--ifm-color-white);--ifm-navbar-search-input-color:var(--ifm-color-emphasis-500)}.navbar__search-input{appearance:none;background:var(--ifm-navbar-search-input-background-color) var(--ifm-navbar-search-input-icon) no-repeat .75rem center/1rem 1rem;border:none;border-radius:2rem;color:var(--ifm-navbar-search-input-color);cursor:text;display:inline-block;font-size:.9rem;height:2rem;padding:0 .5rem 0 2.25rem;width:12.5rem}.navbar__search-input::placeholder{color:var(--ifm-navbar-search-input-placeholder-color)}.navbar-sidebar{background-color:var(--ifm-navbar-background-color);box-shadow:var(--ifm-global-shadow-md);position:fixed;transform:translate3d(-100%,0,0);transition-property:opacity,visibility,transform;width:var(--ifm-navbar-sidebar-width)}.navbar-sidebar--show .navbar-sidebar,.navbar-sidebar__items{transform:translateZ(0)}.navbar-sidebar--show .navbar-sidebar,.navbar-sidebar--show .navbar-sidebar__backdrop{opacity:1;visibility:visible}.navbar-sidebar__backdrop{background-color:#0009;position:fixed;right:0;transition-property:opacity,visibility}.navbar-sidebar__brand{align-items:center;box-shadow:var(--ifm-navbar-shadow);display:flex;flex:1;height:var(--ifm-navbar-height);padding:var(--ifm-navbar-padding-vertical) var(--ifm-navbar-padding-horizontal)}.navbar-sidebar__items{display:flex;height:calc(100% - var(--ifm-navbar-height));transition:transform var(--ifm-transition-fast) ease-in-out}.navbar-sidebar__items--show-secondary{transform:translate3d(calc((var(--ifm-navbar-sidebar-width))*-1),0,0)}.navbar-sidebar__item{flex-shrink:0;padding:.5rem;width:calc(var(--ifm-navbar-sidebar-width))}.navbar-sidebar__back{background:var(--ifm-menu-color-background-active);font-size:15px;font-weight:var(--ifm-button-font-weight);margin:0 0 .2rem -.5rem;padding:.6rem 1.5rem;position:relative;text-align:left;top:-.5rem;width:calc(100% + 1rem)}.navbar-sidebar__close{display:flex;margin-left:auto}.pagination{column-gap:var(--ifm-pagination-page-spacing);display:flex;font-size:var(--ifm-pagination-font-size);padding-left:0}.pagination--sm{--ifm-pagination-font-size:0.8rem;--ifm-pagination-padding-horizontal:0.8rem;--ifm-pagination-padding-vertical:0.2rem}.pagination--lg{--ifm-pagination-font-size:1.2rem;--ifm-pagination-padding-horizontal:1.2rem;--ifm-pagination-padding-vertical:0.3rem}.pagination__item{display:inline-flex}.pagination__item>span{padding:var(--ifm-pagination-padding-vertical)}.pagination__item--active .pagination__link{color:var(--ifm-pagination-color-active)}.pagination__item--active .pagination__link,.pagination__item:not(.pagination__item--active):hover .pagination__link{background:var(--ifm-pagination-item-active-background)}.pagination__item--disabled,.pagination__item[disabled]{opacity:.25;pointer-events:none}.pagination__link{border-radius:var(--ifm-pagination-border-radius);color:var(--ifm-font-color-base);display:inline-block;padding:var(--ifm-pagination-padding-vertical) var(--ifm-pagination-padding-horizontal);transition:background var(--ifm-transition-fast) var(--ifm-transition-timing-default)}.pagination__link:hover{text-decoration:none}.pagination-nav{grid-gap:var(--ifm-spacing-horizontal);display:grid;gap:var(--ifm-spacing-horizontal);grid-template-columns:repeat(2,1fr)}.pagination-nav__link{border:1px solid var(--ifm-color-emphasis-300);border-radius:var(--ifm-pagination-nav-border-radius);display:block;height:100%;line-height:var(--ifm-heading-line-height);padding:var(--ifm-global-spacing);transition:border-color var(--ifm-transition-fast) var(--ifm-transition-timing-default)}.pagination-nav__link:hover{border-color:var(--ifm-pagination-nav-color-hover);text-decoration:none}.pagination-nav__link--next{grid-column:2/3;text-align:right}.pagination-nav__label{font-size:var(--ifm-h4-font-size);font-weight:var(--ifm-heading-font-weight);word-break:break-word}.pagination-nav__link--prev .pagination-nav__label:before{content:"« "}.pagination-nav__link--next .pagination-nav__label:after{content:" »"}.pagination-nav__sublabel{color:var(--ifm-color-content-secondary);font-size:var(--ifm-h5-font-size);font-weight:var(--ifm-font-weight-semibold);margin-bottom:.25rem}.badge,.pills__item,.tabs{font-weight:var(--ifm-font-weight-bold)}.pills{display:flex;gap:var(--ifm-pills-spacing);padding-left:0}.pills__item{border-radius:.5rem;cursor:pointer;display:inline-block;padding:.25rem 1rem;transition:background var(--ifm-transition-fast) var(--ifm-transition-timing-default)}.tabs,:not(.containsTaskList_mC6p>li)>.containsTaskList_mC6p{padding-left:0}.pills__item--active{color:var(--ifm-pills-color-active)}.pills__item--active,.pills__item:not(.pills__item--active):hover{background:var(--ifm-pills-color-background-active)}.pills--block{justify-content:stretch}.pills--block .pills__item{flex-grow:1;text-align:center}.tabs{color:var(--ifm-tabs-color);display:flex;margin-bottom:0;overflow-x:auto}.tabs__item{border-bottom:3px solid #0000;border-radius:var(--ifm-global-radius);cursor:pointer;display:inline-flex;padding:var(--ifm-tabs-padding-vertical) var(--ifm-tabs-padding-horizontal);transition:background-color var(--ifm-transition-fast) var(--ifm-transition-timing-default)}.tabs__item--active{border-bottom-color:var(--ifm-tabs-color-active-border);border-bottom-left-radius:0;border-bottom-right-radius:0;color:var(--ifm-tabs-color-active)}.tabs__item:hover{background-color:var(--ifm-hover-overlay)}.tabs--block{justify-content:stretch}.tabs--block .tabs__item{flex-grow:1;justify-content:center}.DocSearch-Button,.DocSearch-Button-Container{align-items:center;display:flex}html[data-theme=dark]{--ifm-color-scheme:dark;--ifm-color-emphasis-0:var(--ifm-color-gray-1000);--ifm-color-emphasis-100:var(--ifm-color-gray-900);--ifm-color-emphasis-200:var(--ifm-color-gray-800);--ifm-color-emphasis-300:var(--ifm-color-gray-700);--ifm-color-emphasis-400:var(--ifm-color-gray-600);--ifm-color-emphasis-600:var(--ifm-color-gray-400);--ifm-color-emphasis-700:var(--ifm-color-gray-300);--ifm-color-emphasis-800:var(--ifm-color-gray-200);--ifm-color-emphasis-900:var(--ifm-color-gray-100);--ifm-color-emphasis-1000:var(--ifm-color-gray-0);--ifm-background-color:#1b1b1d;--ifm-background-surface-color:#242526;--ifm-hover-overlay:#ffffff0d;--ifm-color-content:#e3e3e3;--ifm-color-content-secondary:#fff;--ifm-breadcrumb-separator-filter:invert(64%) sepia(11%) saturate(0%) hue-rotate(149deg) brightness(99%) contrast(95%);--ifm-code-background:#ffffff1a;--ifm-scrollbar-track-background-color:#444;--ifm-scrollbar-thumb-background-color:#686868;--ifm-scrollbar-thumb-hover-background-color:#7a7a7a;--ifm-table-stripe-background:#ffffff12;--ifm-toc-border-color:var(--ifm-color-emphasis-200);--ifm-color-primary-contrast-background:#102445;--ifm-color-primary-contrast-foreground:#ebf2fc;--ifm-color-secondary-contrast-background:#474748;--ifm-color-secondary-contrast-foreground:#fdfdfe;--ifm-color-success-contrast-background:#003100;--ifm-color-success-contrast-foreground:#e6f6e6;--ifm-color-info-contrast-background:#193c47;--ifm-color-info-contrast-foreground:#eef9fd;--ifm-color-warning-contrast-background:#4d3800;--ifm-color-warning-contrast-foreground:#fff8e6;--ifm-color-danger-contrast-background:#4b1113;--ifm-color-danger-contrast-foreground:#ffebec;--docsearch-text-color:#f5f6f7;--docsearch-container-background:#090a11cc;--docsearch-modal-background:#15172a;--docsearch-modal-shadow:inset 1px 1px 0 0 #2c2e40,0 3px 8px 0 #000309;--docsearch-searchbox-background:#090a11;--docsearch-searchbox-focus-background:#000;--docsearch-hit-color:#bec3c9;--docsearch-hit-shadow:none;--docsearch-hit-background:#090a11;--docsearch-key-gradient:linear-gradient(-26.5deg,#565872,#31355b);--docsearch-key-shadow:inset 0 -2px 0 0 #282d55,inset 0 0 1px 1px #51577d,0 2px 2px 0 #0304094d;--docsearch-key-pressed-shadow:inset 0 -2px 0 0 #282d55,inset 0 0 1px 1px #51577d,0 1px 1px 0 rgba(3,4,9,.302);--docsearch-footer-background:#1e2136;--docsearch-footer-shadow:inset 0 1px 0 0 #494c6a80,0 -4px 8px 0 #0003;--docsearch-logo-color:#fff;--docsearch-muted-color:#7f8497}#nprogress .bar{background:var(--docusaurus-progress-bar-color);height:2px;left:0;position:fixed;top:0;width:100%;z-index:1031}#nprogress .peg{box-shadow:0 0 10px var(--docusaurus-progress-bar-color),0 0 5px var(--docusaurus-progress-bar-color);height:100%;opacity:1;position:absolute;right:0;transform:rotate(3deg) translateY(-4px);width:100px}[data-theme=dark]{--ifm-color-primary:#a7b5f6;--ifm-color-primary-dark:#828ef1;--ifm-color-primary-darker:#626ae9;--ifm-color-primary-darkest:#4c4ddc;--ifm-color-primary-light:#c9d2fa;--ifm-color-primary-lighter:#e1e7fd;--ifm-color-primary-lightest:#eff2fe;--ifm-table-head-background:var(--ifm-table-stripe-background);--ifm-table-border-color:#d8dbe6}table,table thead tr{width:100%}table thead tr th{font-size:14px;font-weight:600;text-align:left}table thead tr{border-bottom:0}table tr td,table tr th{border:0;border-bottom:var(--ifm-table-border-width) solid var(--ifm-table-border-color)}table tr th{border-top:var(--ifm-table-border-width) solid var(--ifm-table-border-color)}table tr td:first-child,table tr th:first-child{border-left:var(--ifm-table-border-width) solid var(--ifm-table-border-color)}table tr th:first-child{border-top-left-radius:8px}table tr th:last-child{border-top-right-radius:8px}table tbody tr:last-child td:first-child{border-bottom-left-radius:8px}table tbody tr:last-child td:last-child{border-bottom-right-radius:8px}table tr td:last-child,table tr th:last-child{border-right:var(--ifm-table-border-width) solid var(--ifm-table-border-color)}table tr td{font-size:14px}table tbody tr:hover{background:var(--ifm-table-head-background)}table img{margin:0 0 -5px 18px}.video-container{overflow:hidden;position:relative;width:100%}.video-container:after{content:"";display:block;padding-top:56.25%}.video-container iframe{height:100%;left:0;position:absolute;top:0;width:100%}.gems-table table{display:table;width:100%}.gems-table table th:first-of-type,.gems-table table th:nth-of-type(2){width:150px}.alert--info{--ifm-alert-background-color:var(--ifm-color-secondary-contrast-background);--ifm-alert-border-color:var(--ifm-color-primary-lightest);--ifm-code-background:inherit}.badge{background-color:#f8f9fa;border:var(--ifm-badge-border-width) solid var(--ifm-badge-border-color);border-radius:50rem;color:#000;font-size:75%;line-height:1;padding:var(--ifm-badge-padding-vertical) var(--ifm-badge-padding-horizontal)}.cardContainer_fWXF{--ifm-link-color:var(--ifm-color-emphasis-800);--ifm-link-hover-color:var(--ifm-color-emphasis-700);--ifm-link-hover-decoration:none;border:1px solid var(--ifm-color-emphasis-200);box-shadow:0 1.5px 3px 0 #00000026;transition:all var(--ifm-transition-fast) ease;transition-property:border,box-shadow}.cardContainer_fWXF:hover{border-color:var(--ifm-color-primary);box-shadow:0 3px 6px 0 #0003}.cardTitle_rnsV{font-size:1.2rem}.cardDescription_PWke{font-size:.8rem}.iconEdit_Z9Sw{margin-right:.3em;vertical-align:sub}.tableOfContentsInline_prmo ul{font-size:medium;list-style-type:disc;padding-top:0}.DocSearch-Button{background:var(--docsearch-searchbox-background);border:0;border-radius:40px;color:var(--docsearch-muted-color);cursor:pointer;font-weight:500;height:36px;justify-content:space-between;padding:0 8px;-webkit-user-select:none;user-select:none}.DocSearch-Button:active,.DocSearch-Button:focus,.DocSearch-Button:hover{background:var(--docsearch-searchbox-focus-background);box-shadow:var(--docsearch-searchbox-shadow);color:var(--docsearch-text-color);outline:0}.DocSearch-Search-Icon{stroke-width:1.6}.DocSearch-Hit-Tree,.DocSearch-Hit-action,.DocSearch-Hit-icon,.DocSearch-Reset{stroke-width:var(--docsearch-icon-stroke-width)}.DocSearch-Button .DocSearch-Search-Icon{color:var(--docsearch-text-color)}.DocSearch-Button-Placeholder{font-size:1rem;padding:0 12px 0 6px}.DocSearch-Button-Keys{display:flex;min-width:calc(40px + .8em)}.DocSearch-Button-Key{align-items:center;background:var(--docsearch-key-gradient);border:0;border-radius:3px;box-shadow:var(--docsearch-key-shadow);color:var(--docsearch-muted-color);display:flex;height:18px;justify-content:center;margin-right:.4em;padding:0 0 2px;position:relative;top:-1px;width:20px}.DocSearch-Button-Key--pressed{box-shadow:var(--docsearch-key-pressed-shadow);transform:translate3d(0,1px,0)}.DocSearch--active{overflow:hidden!important}.DocSearch-Container{background-color:var(--docsearch-container-background);height:100vh;left:0;position:fixed;top:0;width:100vw;z-index:200}.DocSearch-Container a{text-decoration:none}.DocSearch-Hit[aria-selected=true] mark,.content_knG7 a{text-decoration:underline}.DocSearch-Link{appearance:none;background:none;border:0;color:var(--docsearch-highlight-color);cursor:pointer;font:inherit;margin:0;padding:0}.DocSearch-Modal{background:var(--docsearch-modal-background);border-radius:6px;box-shadow:var(--docsearch-modal-shadow);flex-direction:column;margin:60px auto auto;max-width:var(--docsearch-modal-width);position:relative}.DocSearch-SearchBar{display:flex;padding:var(--docsearch-spacing) var(--docsearch-spacing) 0}.DocSearch-Form{align-items:center;background:var(--docsearch-searchbox-focus-background);border-radius:4px;box-shadow:var(--docsearch-searchbox-shadow);display:flex;height:var(--docsearch-searchbox-height);margin:0;padding:0 var(--docsearch-spacing);position:relative;width:100%}.DocSearch-Input{appearance:none;background:#0000;border:0;color:var(--docsearch-text-color);flex:1;font:inherit;font-size:1.2em;height:100%;outline:0;padding:0 0 0 8px;width:80%}.DocSearch-Input::placeholder{color:var(--docsearch-muted-color);opacity:1}.DocSearch-Input::-webkit-search-cancel-button,.DocSearch-Input::-webkit-search-decoration,.DocSearch-Input::-webkit-search-results-button,.DocSearch-Input::-webkit-search-results-decoration{display:none}.DocSearch-LoadingIndicator,.DocSearch-MagnifierLabel,.DocSearch-Reset{margin:0;padding:0}.DocSearch-Container--Stalled .DocSearch-LoadingIndicator,.DocSearch-MagnifierLabel,.DocSearch-Reset{align-items:center;color:var(--docsearch-highlight-color);display:flex;justify-content:center}.DocSearch-Cancel,.DocSearch-Container--Stalled .DocSearch-MagnifierLabel,.DocSearch-LoadingIndicator,.DocSearch-Reset[hidden]{display:none}.DocSearch-Reset{animation:.1s ease-in forwards a;appearance:none;background:none;border:0;border-radius:50%;color:var(--docsearch-icon-color);cursor:pointer;padding:2px;right:0}.DocSearch-Help,.DocSearch-HitsFooter,.DocSearch-Label{color:var(--docsearch-muted-color)}.DocSearch-Reset:hover{color:var(--docsearch-highlight-color)}.DocSearch-LoadingIndicator svg,.DocSearch-MagnifierLabel svg{height:24px;width:24px}.DocSearch-Dropdown{max-height:calc(var(--docsearch-modal-height) - var(--docsearch-searchbox-height) - var(--docsearch-spacing) - var(--docsearch-footer-height));min-height:var(--docsearch-spacing);overflow-y:auto;overflow-y:overlay;padding:0 var(--docsearch-spacing);scrollbar-color:var(--docsearch-muted-color) var(--docsearch-modal-background);scrollbar-width:thin}.DocSearch-Dropdown::-webkit-scrollbar{width:12px}.DocSearch-Dropdown::-webkit-scrollbar-track{background:#0000}.DocSearch-Dropdown::-webkit-scrollbar-thumb{background-color:var(--docsearch-muted-color);border:3px solid var(--docsearch-modal-background);border-radius:20px}.DocSearch-Dropdown ul{list-style:none;margin:0;padding:0}.DocSearch-Label{font-size:.75em;line-height:1.6em}.DocSearch-Help{font-size:.9em;margin:0;-webkit-user-select:none;user-select:none}.DocSearch-Title{font-size:1.2em}.DocSearch-Logo a{display:flex}.DocSearch-Logo svg{color:var(--docsearch-logo-color);margin-left:8px}.DocSearch-Hits:last-of-type{margin-bottom:24px}.DocSearch-Hits mark{background:none;color:var(--docsearch-highlight-color)}.DocSearch-HitsFooter{display:flex;font-size:.85em;justify-content:center;margin-bottom:var(--docsearch-spacing);padding:var(--docsearch-spacing)}.DocSearch-HitsFooter a{border-bottom:1px solid;color:inherit}.DocSearch-Hit{border-radius:4px;display:flex;padding-bottom:4px;position:relative}.DocSearch-Hit--deleting{opacity:0;transition:.25s linear}.DocSearch-Hit--favoriting{transform:scale(0);transform-origin:top center;transition:.25s linear .25s}.DocSearch-Hit a{background:var(--docsearch-hit-background);border-radius:4px;box-shadow:var(--docsearch-hit-shadow);display:block;padding-left:var(--docsearch-spacing);width:100%}.DocSearch-Hit-source{background:var(--docsearch-modal-background);color:var(--docsearch-highlight-color);font-size:.85em;font-weight:600;line-height:32px;margin:0 -4px;padding:8px 4px 0;position:sticky;top:0;z-index:10}.DocSearch-Hit-Tree{color:var(--docsearch-muted-color);height:var(--docsearch-hit-height);opacity:.5;width:24px}.DocSearch-Hit[aria-selected=true] a{background-color:var(--docsearch-highlight-color)}.DocSearch-Hit-Container{align-items:center;color:var(--docsearch-hit-color);display:flex;flex-direction:row;height:var(--docsearch-hit-height);padding:0 var(--docsearch-spacing) 0 0}.DocSearch-Hit-icon{height:20px;width:20px}.DocSearch-Hit-action,.DocSearch-Hit-icon{color:var(--docsearch-muted-color)}.DocSearch-Hit-action{align-items:center;display:flex;height:22px;width:22px}.DocSearch-Hit-action svg{display:block;height:18px;width:18px}.DocSearch-Hit-action+.DocSearch-Hit-action{margin-left:6px}.DocSearch-Hit-action-button{appearance:none;background:none;border:0;border-radius:50%;color:inherit;cursor:pointer;padding:2px}#__docusaurus-base-url-issue-banner-container,.docSidebarContainer_b6E3,.sidebarLogo_isFc,.themedImage_ToTc,[data-theme=dark] .lightToggleIcon_pyhR,[data-theme=light] .darkToggleIcon_wfgR,html[data-announcement-bar-initially-dismissed=true] .announcementBar_mb4j,svg.DocSearch-Hit-Select-Icon{display:none}.DocSearch-Hit[aria-selected=true] .DocSearch-Hit-Select-Icon,.tocCollapsibleContent_vkbj a{display:block}.DocSearch-Hit-action-button:focus,.DocSearch-Hit-action-button:hover{background:#0003;transition:background-color .1s ease-in}.DocSearch-Hit-action-button:focus path,.DocSearch-Hit-action-button:hover path{fill:#fff}.DocSearch-Hit-content-wrapper{display:flex;flex:1 1 auto;flex-direction:column;font-weight:500;justify-content:center;line-height:1.2em;margin:0 8px;overflow-x:hidden;position:relative;text-overflow:ellipsis;white-space:nowrap;width:80%}.DocSearch-Hit-title{font-size:.9em}.DocSearch-Hit-path{color:var(--docsearch-muted-color);font-size:.75em}.DocSearch-Hit[aria-selected=true] .DocSearch-Hit-Tree,.DocSearch-Hit[aria-selected=true] .DocSearch-Hit-action,.DocSearch-Hit[aria-selected=true] .DocSearch-Hit-icon,.DocSearch-Hit[aria-selected=true] .DocSearch-Hit-path,.DocSearch-Hit[aria-selected=true] .DocSearch-Hit-text,.DocSearch-Hit[aria-selected=true] .DocSearch-Hit-title,.DocSearch-Hit[aria-selected=true] mark{color:var(--docsearch-hit-active-color)!important}.DocSearch-ErrorScreen,.DocSearch-NoResults,.DocSearch-StartScreen{font-size:.9em;margin:0 auto;padding:36px 0;text-align:center;width:80%}.DocSearch-Screen-Icon{color:var(--docsearch-muted-color);padding-bottom:12px}.DocSearch-NoResults-Prefill-List{display:inline-block;padding-bottom:24px;text-align:left}.DocSearch-NoResults-Prefill-List ul{display:inline-block;padding:8px 0 0}.DocSearch-NoResults-Prefill-List li{list-style-position:inside;list-style-type:"» "}.DocSearch-Prefill{appearance:none;background:none;border:0;border-radius:1em;color:var(--docsearch-highlight-color);cursor:pointer;display:inline-block;font-size:1em;font-weight:700;padding:0}.DocSearch-Prefill:focus,.DocSearch-Prefill:hover{outline:0;text-decoration:underline}.DocSearch-Footer{align-items:center;background:var(--docsearch-footer-background);border-radius:0 0 8px 8px;box-shadow:var(--docsearch-footer-shadow);display:flex;flex-direction:row-reverse;flex-shrink:0;height:var(--docsearch-footer-height);justify-content:space-between;padding:0 var(--docsearch-spacing);position:relative;-webkit-user-select:none;user-select:none;width:100%;z-index:300}.DocSearch-Commands li,.DocSearch-Commands-Key{align-items:center;display:flex}.DocSearch-Commands{color:var(--docsearch-muted-color);display:flex;list-style:none;margin:0;padding:0}.DocSearch-Commands li:not(:last-of-type){margin-right:.8em}.DocSearch-Commands-Key{background:var(--docsearch-key-gradient);border:0;border-radius:2px;box-shadow:var(--docsearch-key-shadow);color:var(--docsearch-muted-color);height:18px;justify-content:center;margin-right:.4em;padding:0 0 1px;width:20px}.DocSearch-VisuallyHiddenForAccessibility{clip:rect(0 0 0 0);clip-path:inset(50%);height:1px;overflow:hidden;position:absolute;white-space:nowrap;width:1px}@keyframes a{0%{opacity:0}to{opacity:1}}.DocSearch-Button{margin:0;transition:all var(--ifm-transition-fast) var(--ifm-transition-timing-default)}.DocSearch-Container,.skipToContent_fXgn{z-index:calc(var(--ifm-z-index-fixed) + 1)}.skipToContent_fXgn{background-color:var(--ifm-background-surface-color);color:var(--ifm-color-emphasis-900);left:100%;padding:calc(var(--ifm-global-spacing)/2) var(--ifm-global-spacing);position:fixed;top:1rem}.skipToContent_fXgn:focus{box-shadow:var(--ifm-global-shadow-md);left:1rem}.closeButton_CVFx{line-height:0;padding:0}.content_knG7{font-size:85%;padding:5px 0;text-align:center}.content_knG7 a{color:inherit}.announcementBar_mb4j{align-items:center;background-color:var(--ifm-color-white);border-bottom:1px solid var(--ifm-color-emphasis-100);color:var(--ifm-color-black);display:flex;height:var(--docusaurus-announcement-bar-height)}.announcementBarPlaceholder_vyr4{flex:0 0 10px}.announcementBarClose_gvF7{align-self:stretch;flex:0 0 30px}.toggle_vylO{height:2rem;width:2rem}.toggleButton_gllP{align-items:center;border-radius:50%;display:flex;height:100%;justify-content:center;transition:background var(--ifm-transition-fast);width:100%}.toggleButton_gllP:hover{background:var(--ifm-color-emphasis-200)}.toggleButtonDisabled_aARS{cursor:not-allowed}.darkNavbarColorModeToggle_X3D1:hover{background:var(--ifm-color-gray-800)}[data-theme=dark] .themedImage--dark_i4oU,[data-theme=light] .themedImage--light_HNdA,html:not([data-theme]) .themedComponent--light_NU7w{display:initial}.iconExternalLink_nPIU{margin-left:.3rem}.iconLanguage_nlXk{margin-right:5px;vertical-align:text-bottom}.navbarHideable_m1mJ{transition:transform var(--ifm-transition-fast) ease}.navbarHidden_jGov{transform:translate3d(0,calc(-100% - 2px),0)}.errorBoundaryError_a6uf{color:red;white-space:pre-wrap}body:not(.navigation-with-keyboard) :not(input):focus{outline:0}.footerLogoLink_BH7S{opacity:.5;transition:opacity var(--ifm-transition-fast) var(--ifm-transition-timing-default)}.footerLogoLink_BH7S:hover,.hash-link:focus,:hover>.hash-link{opacity:1}.mainWrapper_z2l0{display:flex;flex:1 0 auto;flex-direction:column}.docusaurus-mt-lg{margin-top:3rem}#__docusaurus{display:flex;flex-direction:column;min-height:100%}.tag_zVej{border:1px solid var(--docusaurus-tag-list-border);transition:border var(--ifm-transition-fast)}.tag_zVej:hover{--docusaurus-tag-list-border:var(--ifm-link-color);text-decoration:none}.tagRegular_sFm0{border-radius:var(--ifm-global-radius);font-size:90%;padding:.2rem .5rem .3rem}.tagWithCount_h2kH{align-items:center;border-left:0;display:flex;padding:0 .5rem 0 1rem;position:relative}.tagWithCount_h2kH:after,.tagWithCount_h2kH:before{border:1px solid var(--docusaurus-tag-list-border);content:"";position:absolute;top:50%;transition:inherit}.tagWithCount_h2kH:before{border-bottom:0;border-right:0;height:1.18rem;right:100%;transform:translate(50%,-50%) rotate(-45deg);width:1.18rem}.tagWithCount_h2kH:after{border-radius:50%;height:.5rem;left:0;transform:translateY(-50%);width:.5rem}.tagWithCount_h2kH span{background:var(--ifm-color-secondary);border-radius:var(--ifm-global-radius);color:var(--ifm-color-black);font-size:.7rem;line-height:1.2;margin-left:.3rem;padding:.1rem .4rem}.tags_jXut{display:inline}.tag_QGVx{display:inline-block;margin:0 .4rem .5rem 0}.lastUpdated_vwxv{font-size:smaller;font-style:italic;margin-top:.2rem}.tocCollapsibleButton_TO0P{align-items:center;display:flex;font-size:inherit;justify-content:space-between;padding:.4rem .8rem;width:100%}.tocCollapsibleButton_TO0P:after{background:var(--ifm-menu-link-sublist-icon) 50% 50%/2rem 2rem no-repeat;content:"";filter:var(--ifm-menu-link-sublist-icon-filter);height:1.25rem;transform:rotate(180deg);transition:transform var(--ifm-transition-fast);width:1.25rem}.tocCollapsibleButtonExpanded_MG3E:after,.tocCollapsibleExpanded_sAul{transform:none}.tocCollapsible_ETCw{background-color:var(--ifm-menu-color-background-active);border-radius:var(--ifm-global-radius);margin:1rem 0}.tocCollapsibleContent_vkbj>ul{border-left:none;border-top:1px solid var(--ifm-color-emphasis-300);font-size:15px;padding:.2rem 0}.tocCollapsibleContent_vkbj ul li{margin:.4rem .8rem}.searchQueryInput_u2C7,.searchVersionInput_m0Ui{background:var(--docsearch-searchbox-focus-background);border:2px solid var(--ifm-toc-border-color);border-radius:var(--ifm-global-radius);color:var(--docsearch-text-color);font:var(--ifm-font-size-base) var(--ifm-font-family-base);margin-bottom:.5rem;padding:.8rem;transition:border var(--ifm-transition-fast) ease;width:100%}.searchQueryInput_u2C7:focus,.searchVersionInput_m0Ui:focus{border-color:var(--docsearch-primary-color);outline:0}.searchQueryInput_u2C7::placeholder{color:var(--docsearch-muted-color)}.searchResultsColumn_JPFH{font-size:.9rem;font-weight:700}.algoliaLogo_rT1R{max-width:150px}.algoliaLogoPathFill_WdUC{fill:var(--ifm-font-color-base)}.searchResultItem_Tv2o{border-bottom:1px solid var(--ifm-toc-border-color);padding:1rem 0}.searchResultItemHeading_KbCB{font-weight:400;margin-bottom:0}.searchResultItemPath_lhe1{--ifm-breadcrumb-separator-size-multiplier:1;color:var(--ifm-color-content-secondary);font-size:.8rem}.searchResultItemSummary_AEaO{font-style:italic;margin:.5rem 0 0}.loadingSpinner_XVxU{animation:1s linear infinite b;border:.4em solid #eee;border-radius:50%;border-top:.4em solid var(--ifm-color-primary);height:3rem;margin:0 auto;width:3rem}@keyframes b{to{transform:rotate(1turn)}}.loader_vvXV{margin-top:2rem}.search-result-match{background:#ffd78e40;color:var(--docsearch-hit-color);padding:.09em 0}.backToTopButton_sjWU{background-color:var(--ifm-color-emphasis-200);border-radius:50%;bottom:1.3rem;box-shadow:var(--ifm-global-shadow-lw);height:3rem;opacity:0;position:fixed;right:1.3rem;transform:scale(0);transition:all var(--ifm-transition-fast) var(--ifm-transition-timing-default);visibility:hidden;width:3rem;z-index:calc(var(--ifm-z-index-fixed) - 1)}.buttonGroup__atx button,.codeBlockContainer_Ckt0{background:var(--prism-background-color);color:var(--prism-color)}.backToTopButton_sjWU:after{background-color:var(--ifm-color-emphasis-1000);content:" ";display:inline-block;height:100%;-webkit-mask:var(--ifm-menu-link-sublist-icon) 50%/2rem 2rem no-repeat;mask:var(--ifm-menu-link-sublist-icon) 50%/2rem 2rem no-repeat;width:100%}.backToTopButtonShow_xfvO{opacity:1;transform:scale(1);visibility:visible}[data-theme=dark]:root{--docusaurus-collapse-button-bg:#ffffff0d;--docusaurus-collapse-button-bg-hover:#ffffff1a}.collapseSidebarButton_PEFL{display:none;margin:0}.docMainContainer_gTbr,.docPage__5DB{display:flex;width:100%}.docPage__5DB{flex:1 0}.docsWrapper_BCFX{display:flex;flex:1 0 auto}.tag_Nnez{display:inline-block;margin:.5rem .5rem 0 1rem}.codeBlockContainer_Ckt0{border-radius:var(--ifm-code-border-radius);box-shadow:var(--ifm-global-shadow-lw);margin-bottom:var(--ifm-leading)}.codeBlockContent_biex{border-radius:inherit;direction:ltr;position:relative}.codeBlockTitle_Ktv7{border-bottom:1px solid var(--ifm-color-emphasis-300);border-top-left-radius:inherit;border-top-right-radius:inherit;font-size:var(--ifm-code-font-size);font-weight:500;padding:.75rem var(--ifm-pre-padding)}.codeBlock_bY9V{--ifm-pre-background:var(--prism-background-color);margin:0;padding:0}.codeBlockTitle_Ktv7+.codeBlockContent_biex .codeBlock_bY9V{border-top-left-radius:0;border-top-right-radius:0}.codeBlockLines_e6Vv{float:left;font:inherit;min-width:100%;padding:var(--ifm-pre-padding)}.codeBlockLinesWithNumbering_o6Pm{display:table;padding:var(--ifm-pre-padding) 0}.buttonGroup__atx{column-gap:.2rem;display:flex;position:absolute;right:calc(var(--ifm-pre-padding)/2);top:calc(var(--ifm-pre-padding)/2)}.buttonGroup__atx button{align-items:center;border:1px solid var(--ifm-color-emphasis-300);border-radius:var(--ifm-global-radius);display:flex;line-height:0;opacity:0;padding:.4rem;transition:opacity var(--ifm-transition-fast) ease-in-out}.buttonGroup__atx button:focus-visible,.buttonGroup__atx button:hover{opacity:1!important}.theme-code-block:hover .buttonGroup__atx button{opacity:.4}:where(:root){--docusaurus-highlighted-code-line-bg:#484d5b}:where([data-theme=dark]){--docusaurus-highlighted-code-line-bg:#646464}.theme-code-block-highlighted-line{background-color:var(--docusaurus-highlighted-code-line-bg);display:block;margin:0 calc(var(--ifm-pre-padding)*-1);padding:0 var(--ifm-pre-padding)}.codeLine_lJS_{counter-increment:a;display:table-row}.codeLineNumber_Tfdd{background:var(--ifm-pre-background);display:table-cell;left:0;overflow-wrap:normal;padding:0 var(--ifm-pre-padding);position:sticky;text-align:right;width:1%}.codeLineNumber_Tfdd:before{content:counter(a);opacity:.4}.codeLineContent_feaV{padding-right:var(--ifm-pre-padding)}.theme-code-block:hover .copyButtonCopied_obH4{opacity:1!important}.copyButtonIcons_eSgA{height:1.125rem;position:relative;width:1.125rem}.copyButtonIcon_y97N,.copyButtonSuccessIcon_LjdS{fill:currentColor;height:inherit;left:0;opacity:inherit;position:absolute;top:0;transition:all var(--ifm-transition-fast) ease;width:inherit}.copyButtonSuccessIcon_LjdS{color:#00d600;left:50%;opacity:0;top:50%;transform:translate(-50%,-50%) scale(.33)}.copyButtonCopied_obH4 .copyButtonIcon_y97N{opacity:0;transform:scale(.33)}.copyButtonCopied_obH4 .copyButtonSuccessIcon_LjdS{opacity:1;transform:translate(-50%,-50%) scale(1);transition-delay:75ms}.wordWrapButtonIcon_Bwma{height:1.2rem;width:1.2rem}.details_lb9f{--docusaurus-details-summary-arrow-size:0.38rem;--docusaurus-details-transition:transform 200ms ease;--docusaurus-details-decoration-color:grey}.details_lb9f>summary{cursor:pointer;list-style:none;padding-left:1rem;position:relative}.details_lb9f>summary::-webkit-details-marker{display:none}.details_lb9f>summary:before{border-color:#0000 #0000 #0000 var(--docusaurus-details-decoration-color);border-style:solid;border-width:var(--docusaurus-details-summary-arrow-size);content:"";left:0;position:absolute;top:.45rem;transform:rotate(0);transform-origin:calc(var(--docusaurus-details-summary-arrow-size)/2) 50%;transition:var(--docusaurus-details-transition)}.collapsibleContent_i85q{border-top:1px solid var(--docusaurus-details-decoration-color);margin-top:1rem;padding-top:1rem}.details_b_Ee{--docusaurus-details-decoration-color:var(--ifm-alert-border-color);--docusaurus-details-transition:transform var(--ifm-transition-fast) ease;border:1px solid var(--ifm-alert-border-color);margin:0 0 var(--ifm-spacing-vertical)}.anchorWithStickyNavbar_LWe7{scroll-margin-top:calc(var(--ifm-navbar-height) + .5rem)}.anchorWithHideOnScrollNavbar_WYt5{scroll-margin-top:.5rem}.hash-link{opacity:0;padding-left:.5rem;transition:opacity var(--ifm-transition-fast);-webkit-user-select:none;user-select:none}.hash-link:before{content:"#"}.containsTaskList_mC6p{list-style:none}.img_ev3q{height:auto}.tableOfContents_bqdL{max-height:calc(100vh - var(--ifm-navbar-height) - 2rem);overflow-y:auto;position:sticky;top:calc(var(--ifm-navbar-height) + 1rem)}.admonition_LlT9{margin-bottom:1em}.admonitionHeading_tbUL{font:var(--ifm-heading-font-weight) var(--ifm-h5-font-size)/var(--ifm-heading-line-height) var(--ifm-heading-font-family);margin-bottom:.3rem}.admonitionHeading_tbUL code{text-transform:none}.admonitionIcon_kALy{display:inline-block;margin-right:.4em;vertical-align:middle}.admonitionIcon_kALy svg{fill:var(--ifm-alert-foreground-color);display:inline-block;height:1.6em;width:1.6em}.breadcrumbHomeIcon_YNFT{height:1.1rem;position:relative;top:1px;vertical-align:top;width:1.1rem}.breadcrumbsContainer_Z_bl{--ifm-breadcrumb-size-multiplier:0.8;margin-bottom:.8rem}.mdxPageWrapper_j9I6{justify-content:center}@media (min-width:997px){.collapseSidebarButton_PEFL,.expandButton_m80_{background-color:var(--docusaurus-collapse-button-bg)}:root{--docusaurus-announcement-bar-height:30px}.announcementBarClose_gvF7,.announcementBarPlaceholder_vyr4{flex-basis:50px}.searchBox_ZlJk{padding:var(--ifm-navbar-item-padding-vertical) var(--ifm-navbar-item-padding-horizontal)}.lastUpdated_vwxv{text-align:right}.tocMobile_ITEo{display:none}.collapseSidebarButton_PEFL{border:1px solid var(--ifm-toc-border-color);border-radius:0;bottom:0;display:block!important;height:40px;position:sticky}.collapseSidebarButtonIcon_kv0_{margin-top:4px;transform:rotate(180deg)}.expandButtonIcon_BlDH,[dir=rtl] .collapseSidebarButtonIcon_kv0_{transform:rotate(0)}.collapseSidebarButton_PEFL:focus,.collapseSidebarButton_PEFL:hover,.expandButton_m80_:focus,.expandButton_m80_:hover{background-color:var(--docusaurus-collapse-button-bg-hover)}.menuHtmlItem_M9Kj{padding:var(--ifm-menu-link-padding-vertical) var(--ifm-menu-link-padding-horizontal)}.menu_SIkG{flex-grow:1;padding:.5rem}@supports (scrollbar-gutter:stable){.menu_SIkG{padding:.5rem 0 .5rem .5rem;scrollbar-gutter:stable}}.menuWithAnnouncementBar_GW3s{margin-bottom:var(--docusaurus-announcement-bar-height)}.sidebar_njMd{display:flex;flex-direction:column;height:100%;padding-top:var(--ifm-navbar-height);width:var(--doc-sidebar-width)}.sidebarWithHideableNavbar_wUlq{padding-top:0}.sidebarHidden_VK0M{opacity:0;visibility:hidden}.sidebarLogo_isFc{align-items:center;color:inherit!important;display:flex!important;margin:0 var(--ifm-navbar-padding-horizontal);max-height:var(--ifm-navbar-height);min-height:var(--ifm-navbar-height);text-decoration:none!important}.sidebarLogo_isFc img{height:2rem;margin-right:.5rem}.expandButton_m80_{align-items:center;display:flex;height:100%;justify-content:center;position:absolute;right:0;top:0;transition:background-color var(--ifm-transition-fast) ease;width:100%}[dir=rtl] .expandButtonIcon_BlDH{transform:rotate(180deg)}.docSidebarContainer_b6E3{border-right:1px solid var(--ifm-toc-border-color);clip-path:inset(0);display:block;margin-top:calc(var(--ifm-navbar-height)*-1);transition:width var(--ifm-transition-fast) ease;width:var(--doc-sidebar-width);will-change:width}.docSidebarContainerHidden_b3ry{cursor:pointer;width:var(--doc-sidebar-hidden-width)}.sidebarViewport_Xe31{height:100%;max-height:100vh;position:sticky;top:0}.docMainContainer_gTbr{flex-grow:1;max-width:calc(100% - var(--doc-sidebar-width))}.docMainContainerEnhanced_Uz_u{max-width:calc(100% - var(--doc-sidebar-hidden-width))}.docItemWrapperEnhanced_czyv{max-width:calc(var(--ifm-container-width) + var(--doc-sidebar-width))!important}.docItemCol_VOVn{max-width:75%!important}}@media (min-width:1440px){.container{max-width:var(--ifm-container-width-xl)}}@media (max-width:996px){.col{--ifm-col-width:100%;flex-basis:var(--ifm-col-width);margin-left:0}.footer{--ifm-footer-padding-horizontal:0}.colorModeToggle_DEke,.footer__link-separator,.navbar__item,.tableOfContents_bqdL{display:none}.footer__col{margin-bottom:calc(var(--ifm-spacing-vertical)*3)}.footer__link-item{display:block}.hero{padding-left:0;padding-right:0}.navbar>.container,.navbar>.container-fluid{padding:0}.navbar__toggle{display:inherit}.navbar__search-input{width:9rem}.pills--block,.tabs--block{flex-direction:column}.searchBox_ZlJk{position:absolute;right:var(--ifm-navbar-padding-horizontal)}.docItemContainer_F8PC{padding:0 .3rem}}@media only screen and (max-width:996px){.searchQueryColumn_RTkw,.searchResultsColumn_JPFH{max-width:60%!important}.searchLogoColumn_rJIA,.searchVersionColumn_ypXd{max-width:40%!important}.searchLogoColumn_rJIA{padding-left:0!important}}@media (max-width:768px){.DocSearch-Button-Keys,.DocSearch-Button-Placeholder,.DocSearch-Commands,.DocSearch-Hit-Tree{display:none}:root{--docsearch-spacing:10px;--docsearch-footer-height:40px}.DocSearch-Dropdown{height:100%;max-height:calc(var(--docsearch-vh,1vh)*100 - var(--docsearch-searchbox-height) - var(--docsearch-spacing) - var(--docsearch-footer-height))}.DocSearch-Container{height:100vh;height:-webkit-fill-available;height:calc(var(--docsearch-vh,1vh)*100);position:absolute}.DocSearch-Footer{border-radius:0;bottom:0;position:absolute}.DocSearch-Hit-content-wrapper{display:flex;position:relative;width:80%}.DocSearch-Modal{border-radius:0;box-shadow:none;height:100vh;height:-webkit-fill-available;height:calc(var(--docsearch-vh,1vh)*100);margin:0;max-width:100%;width:100%}.DocSearch-Cancel{appearance:none;background:none;border:0;color:var(--docsearch-highlight-color);cursor:pointer;display:inline-block;flex:none;font:inherit;font-size:1em;font-weight:500;margin-left:var(--docsearch-spacing);outline:0;overflow:hidden;padding:0;-webkit-user-select:none;user-select:none;white-space:nowrap}}@media (max-width:576px){.markdown h1:first-child{--ifm-h1-font-size:2rem}.markdown>h2{--ifm-h2-font-size:1.5rem}.markdown>h3{--ifm-h3-font-size:1.25rem}}@media screen and (max-width:576px){.searchQueryColumn_RTkw{max-width:100%!important}.searchVersionColumn_ypXd{max-width:100%!important;padding-left:var(--ifm-spacing-horizontal)!important}}@media (hover:hover){.backToTopButton_sjWU:hover{background-color:var(--ifm-color-emphasis-300)}}@media (pointer:fine){.thin-scrollbar{scrollbar-width:thin}.thin-scrollbar::-webkit-scrollbar{height:var(--ifm-scrollbar-size);width:var(--ifm-scrollbar-size)}.thin-scrollbar::-webkit-scrollbar-track{background:var(--ifm-scrollbar-track-background-color);border-radius:10px}.thin-scrollbar::-webkit-scrollbar-thumb{background:var(--ifm-scrollbar-thumb-background-color);border-radius:10px}.thin-scrollbar::-webkit-scrollbar-thumb:hover{background:var(--ifm-scrollbar-thumb-hover-background-color)}}@media (prefers-reduced-motion:reduce){:root{--ifm-transition-fast:0ms;--ifm-transition-slow:0ms}}@media screen and (prefers-reduced-motion:reduce){.DocSearch-Reset{stroke-width:var(--docsearch-icon-stroke-width);animation:none;appearance:none;background:none;border:0;border-radius:50%;color:var(--docsearch-icon-color);cursor:pointer;right:0}.DocSearch-Hit--deleting,.DocSearch-Hit--favoriting{transition:none}.DocSearch-Hit-action-button:focus,.DocSearch-Hit-action-button:hover{background:#0003;transition:none}}@media print{.announcementBar_mb4j,.footer,.menu,.navbar,.pagination-nav,.table-of-contents,.tocMobile_ITEo{display:none}.tabs{page-break-inside:avoid}.codeBlockLines_e6Vv{white-space:pre-wrap}} \ No newline at end of file diff --git a/assets/js/0207d280.8628b197.js b/assets/js/0207d280.5cc53b80.js similarity index 51% rename from assets/js/0207d280.8628b197.js rename to assets/js/0207d280.5cc53b80.js index 0efb12eeea..330838b8fd 100644 --- a/assets/js/0207d280.8628b197.js +++ b/assets/js/0207d280.5cc53b80.js @@ -1 +1 @@ -"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[29324],{15680:(e,t,a)=>{a.d(t,{xA:()=>c,yg:()=>d});var n=a(96540);function r(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function s(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,n)}return a}function o(e){for(var t=1;t=0||(r[a]=e[a]);return r}(e,t);if(Object.getOwnPropertySymbols){var s=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(r[a]=e[a])}return r}var i=n.createContext({}),p=function(e){var t=n.useContext(i),a=t;return e&&(a="function"==typeof e?e(t):o(o({},t),e)),a},c=function(e){var t=p(e.components);return n.createElement(i.Provider,{value:t},e.children)},m="mdxType",u={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},g=n.forwardRef((function(e,t){var a=e.components,r=e.mdxType,s=e.originalType,i=e.parentName,c=l(e,["components","mdxType","originalType","parentName"]),m=p(a),g=r,d=m["".concat(i,".").concat(g)]||m[g]||u[g]||s;return a?n.createElement(d,o(o({ref:t},c),{},{components:a})):n.createElement(d,o({ref:t},c))}));function d(e,t){var a=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var s=a.length,o=new Array(s);o[0]=g;var l={};for(var i in t)hasOwnProperty.call(t,i)&&(l[i]=t[i]);l.originalType=e,l[m]="string"==typeof e?e:r,o[1]=l;for(var p=2;p{a.r(t),a.d(t,{assets:()=>i,contentTitle:()=>o,default:()=>u,frontMatter:()=>s,metadata:()=>l,toc:()=>p});var n=a(58168),r=(a(96540),a(15680));const s={sidebar_position:5,title:"FlattenSchema",id:"flatten-schema",description:"Flatten nested data",tags:["gems","schema","explode","flatten"]},o=void 0,l={unversionedId:"Spark/gems/transform/flatten-schema",id:"Spark/gems/transform/flatten-schema",title:"FlattenSchema",description:"Flatten nested data",source:"@site/docs/Spark/gems/transform/flattenschema.md",sourceDirName:"Spark/gems/transform",slug:"/Spark/gems/transform/flatten-schema",permalink:"/Spark/gems/transform/flatten-schema",draft:!1,tags:[{label:"gems",permalink:"/tags/gems"},{label:"schema",permalink:"/tags/schema"},{label:"explode",permalink:"/tags/explode"},{label:"flatten",permalink:"/tags/flatten"}],version:"current",sidebarPosition:5,frontMatter:{sidebar_position:5,title:"FlattenSchema",id:"flatten-schema",description:"Flatten nested data",tags:["gems","schema","explode","flatten"]},sidebar:"defaultSidebar",previous:{title:"Aggregate",permalink:"/Spark/gems/transform/aggregate"},next:{title:"SchemaTransform",permalink:"/Spark/gems/transform/schema-transform"}},i={},p=[{value:"The Input",id:"the-input",level:2},{value:"The Expressions",id:"the-expressions",level:2},{value:"The Output",id:"the-output",level:2}],c={toc:p},m="wrapper";function u(e){let{components:t,...s}=e;return(0,r.yg)(m,(0,n.A)({},c,s,{components:t,mdxType:"MDXLayout"}),(0,r.yg)("h3",null,(0,r.yg)("span",{class:"badge rounded-pill text-bg-light"},"Spark Gem")),(0,r.yg)("p",null,"When processing raw data it can be useful to flatten complex data types like structures and arrays into simpler, flatter schemas."),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"The FlattenSchema gem",src:a(16464).A,width:"290",height:"305"})),(0,r.yg)("h2",{id:"the-input"},"The Input"),(0,r.yg)("p",null,"FlattenSchema works on DataFrames that have nested columns that you'd like to extract into a flat schema."),(0,r.yg)("p",null,"For example, with an input schema like so:"),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Input schema",src:a(29081).A,width:"376",height:"434"})),(0,r.yg)("p",null,"And the data looks like so:"),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Input data",src:a(30856).A,width:"2764",height:"416"})),(0,r.yg)("p",null,"We want to extract ",(0,r.yg)("inlineCode",{parentName:"p"},"count")," from ",(0,r.yg)("em",{parentName:"p"},"result")," and all of the columns from ",(0,r.yg)("em",{parentName:"p"},"events")," into a flattened schema."),(0,r.yg)("h2",{id:"the-expressions"},"The Expressions"),(0,r.yg)("p",null,"Having added a FlattenSchema Gem to your Pipeline, all you need to do is click the column names you wish to extract and they'll be added to the ",(0,r.yg)("strong",{parentName:"p"},"Expressions")," section. Then, you can change the values in the ",(0,r.yg)("strong",{parentName:"p"},"Target Column")," to change the name of output columns."),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Adding Expressions",src:a(6211).A,width:"630",height:"432"})),(0,r.yg)("p",null,"The ",(0,r.yg)("strong",{parentName:"p"},"Columns Delimiter")," dropdown allows you to control how the names of the new columns are derived. Currently dashes and underscores are supported."),(0,r.yg)("h2",{id:"the-output"},"The Output"),(0,r.yg)("p",null,"If we check the ",(0,r.yg)("strong",{parentName:"p"},"Output")," tab in the Gem, you'll see the schema that we've created using the selected columns."),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Output schema",src:a(68954).A,width:"404",height:"531"})),(0,r.yg)("p",null,"And here's what the output data looks like:"),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Output interim",src:a(85507).A,width:"2766",height:"1436"})),(0,r.yg)("p",null,"No more nested structures!"),(0,r.yg)("admonition",{type:"info"},(0,r.yg)("p",{parentName:"admonition"},"For more advanced use cases, the Spark ",(0,r.yg)("inlineCode",{parentName:"p"},"explode")," function is available to use in the ",(0,r.yg)("a",{parentName:"p",href:"/Spark/gems/transform/reformat"},"Reformat")," Gem, ",(0,r.yg)("a",{parentName:"p",href:"/Spark/gems/custom/sql-statement"},"Custom SQL")," Gem, or anywhere else that accepts Spark expressions.")))}u.isMDXComponent=!0},6211:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/flatten_add_exp-9b144921f044f3a1ad68091d823283a1.gif"},16464:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/flatten_gem-088dcb90a9e1679a18b6f2497692a93b.png"},29081:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/flatten_input-252ad05824ac1130f88196d0d1132dde.png"},30856:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/flatten_input_interim-895ec66b9df67847cfb7df93737c7236.png"},68954:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/flatten_output-2377e8e555ac97a7dcd5c9faf7a32045.png"},85507:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/flatten_output_interim-66b78f2b754bce19f56ff25afa3c2037.png"}}]); \ No newline at end of file +"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[29324],{15680:(e,t,a)=>{a.d(t,{xA:()=>c,yg:()=>d});var n=a(96540);function r(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function s(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,n)}return a}function o(e){for(var t=1;t=0||(r[a]=e[a]);return r}(e,t);if(Object.getOwnPropertySymbols){var s=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(r[a]=e[a])}return r}var i=n.createContext({}),p=function(e){var t=n.useContext(i),a=t;return e&&(a="function"==typeof e?e(t):o(o({},t),e)),a},c=function(e){var t=p(e.components);return n.createElement(i.Provider,{value:t},e.children)},m="mdxType",u={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},g=n.forwardRef((function(e,t){var a=e.components,r=e.mdxType,s=e.originalType,i=e.parentName,c=l(e,["components","mdxType","originalType","parentName"]),m=p(a),g=r,d=m["".concat(i,".").concat(g)]||m[g]||u[g]||s;return a?n.createElement(d,o(o({ref:t},c),{},{components:a})):n.createElement(d,o({ref:t},c))}));function d(e,t){var a=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var s=a.length,o=new Array(s);o[0]=g;var l={};for(var i in t)hasOwnProperty.call(t,i)&&(l[i]=t[i]);l.originalType=e,l[m]="string"==typeof e?e:r,o[1]=l;for(var p=2;p{a.r(t),a.d(t,{assets:()=>i,contentTitle:()=>o,default:()=>u,frontMatter:()=>s,metadata:()=>l,toc:()=>p});var n=a(58168),r=(a(96540),a(15680));const s={sidebar_position:5,title:"FlattenSchema",id:"flatten-schema",description:"Flatten nested data",tags:["gems","schema","explode","flatten"]},o=void 0,l={unversionedId:"Spark/gems/transform/flatten-schema",id:"Spark/gems/transform/flatten-schema",title:"FlattenSchema",description:"Flatten nested data",source:"@site/docs/Spark/gems/transform/flattenschema.md",sourceDirName:"Spark/gems/transform",slug:"/Spark/gems/transform/flatten-schema",permalink:"/Spark/gems/transform/flatten-schema",draft:!1,tags:[{label:"gems",permalink:"/tags/gems"},{label:"schema",permalink:"/tags/schema"},{label:"explode",permalink:"/tags/explode"},{label:"flatten",permalink:"/tags/flatten"}],version:"current",sidebarPosition:5,frontMatter:{sidebar_position:5,title:"FlattenSchema",id:"flatten-schema",description:"Flatten nested data",tags:["gems","schema","explode","flatten"]},sidebar:"defaultSidebar",previous:{title:"Aggregate",permalink:"/Spark/gems/transform/aggregate"},next:{title:"SchemaTransform",permalink:"/Spark/gems/transform/schema-transform"}},i={},p=[{value:"The Input",id:"the-input",level:2},{value:"The Expressions",id:"the-expressions",level:2},{value:"The Output",id:"the-output",level:2}],c={toc:p},m="wrapper";function u(e){let{components:t,...s}=e;return(0,r.yg)(m,(0,n.A)({},c,s,{components:t,mdxType:"MDXLayout"}),(0,r.yg)("h3",null,(0,r.yg)("span",{class:"badge"},"Spark Gem")),(0,r.yg)("p",null,"When processing raw data it can be useful to flatten complex data types like structures and arrays into simpler, flatter schemas."),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"The FlattenSchema gem",src:a(16464).A,width:"290",height:"305"})),(0,r.yg)("h2",{id:"the-input"},"The Input"),(0,r.yg)("p",null,"FlattenSchema works on DataFrames that have nested columns that you'd like to extract into a flat schema."),(0,r.yg)("p",null,"For example, with an input schema like so:"),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Input schema",src:a(29081).A,width:"376",height:"434"})),(0,r.yg)("p",null,"And the data looks like so:"),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Input data",src:a(30856).A,width:"2764",height:"416"})),(0,r.yg)("p",null,"We want to extract ",(0,r.yg)("inlineCode",{parentName:"p"},"count")," from ",(0,r.yg)("em",{parentName:"p"},"result")," and all of the columns from ",(0,r.yg)("em",{parentName:"p"},"events")," into a flattened schema."),(0,r.yg)("h2",{id:"the-expressions"},"The Expressions"),(0,r.yg)("p",null,"Having added a FlattenSchema Gem to your Pipeline, all you need to do is click the column names you wish to extract and they'll be added to the ",(0,r.yg)("strong",{parentName:"p"},"Expressions")," section. Then, you can change the values in the ",(0,r.yg)("strong",{parentName:"p"},"Target Column")," to change the name of output columns."),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Adding Expressions",src:a(6211).A,width:"630",height:"432"})),(0,r.yg)("p",null,"The ",(0,r.yg)("strong",{parentName:"p"},"Columns Delimiter")," dropdown allows you to control how the names of the new columns are derived. Currently dashes and underscores are supported."),(0,r.yg)("h2",{id:"the-output"},"The Output"),(0,r.yg)("p",null,"If we check the ",(0,r.yg)("strong",{parentName:"p"},"Output")," tab in the Gem, you'll see the schema that we've created using the selected columns."),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Output schema",src:a(68954).A,width:"404",height:"531"})),(0,r.yg)("p",null,"And here's what the output data looks like:"),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Output interim",src:a(85507).A,width:"2766",height:"1436"})),(0,r.yg)("p",null,"No more nested structures!"),(0,r.yg)("admonition",{type:"info"},(0,r.yg)("p",{parentName:"admonition"},"For more advanced use cases, the Spark ",(0,r.yg)("inlineCode",{parentName:"p"},"explode")," function is available to use in the ",(0,r.yg)("a",{parentName:"p",href:"/Spark/gems/transform/reformat"},"Reformat")," Gem, ",(0,r.yg)("a",{parentName:"p",href:"/Spark/gems/custom/sql-statement"},"Custom SQL")," Gem, or anywhere else that accepts Spark expressions.")))}u.isMDXComponent=!0},6211:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/flatten_add_exp-9b144921f044f3a1ad68091d823283a1.gif"},16464:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/flatten_gem-088dcb90a9e1679a18b6f2497692a93b.png"},29081:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/flatten_input-252ad05824ac1130f88196d0d1132dde.png"},30856:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/flatten_input_interim-895ec66b9df67847cfb7df93737c7236.png"},68954:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/flatten_output-2377e8e555ac97a7dcd5c9faf7a32045.png"},85507:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/flatten_output_interim-66b78f2b754bce19f56ff25afa3c2037.png"}}]); \ No newline at end of file diff --git a/assets/js/07e49c2d.2fda557a.js b/assets/js/07e49c2d.2fda557a.js deleted file mode 100644 index 2d5d07d477..0000000000 --- a/assets/js/07e49c2d.2fda557a.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[48045],{15680:(e,a,t)=>{t.d(a,{xA:()=>l,yg:()=>d});var r=t(96540);function n(e,a,t){return a in e?Object.defineProperty(e,a,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[a]=t,e}function o(e,a){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);a&&(r=r.filter((function(a){return Object.getOwnPropertyDescriptor(e,a).enumerable}))),t.push.apply(t,r)}return t}function s(e){for(var a=1;a=0||(n[t]=e[t]);return n}(e,a);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(n[t]=e[t])}return n}var u=r.createContext({}),p=function(e){var a=r.useContext(u),t=a;return e&&(t="function"==typeof e?e(a):s(s({},a),e)),t},l=function(e){var a=p(e.components);return r.createElement(u.Provider,{value:a},e.children)},g="mdxType",c={inlineCode:"code",wrapper:function(e){var a=e.children;return r.createElement(r.Fragment,{},a)}},h=r.forwardRef((function(e,a){var t=e.components,n=e.mdxType,o=e.originalType,u=e.parentName,l=i(e,["components","mdxType","originalType","parentName"]),g=p(t),h=n,d=g["".concat(u,".").concat(h)]||g[h]||c[h]||o;return t?r.createElement(d,s(s({ref:a},l),{},{components:t})):r.createElement(d,s({ref:a},l))}));function d(e,a){var t=arguments,n=a&&a.mdxType;if("string"==typeof e||n){var o=t.length,s=new Array(o);s[0]=h;var i={};for(var u in a)hasOwnProperty.call(a,u)&&(i[u]=a[u]);i.originalType=e,i[g]="string"==typeof e?e:n,s[1]=i;for(var p=2;p{t.r(a),t.d(a,{assets:()=>u,contentTitle:()=>s,default:()=>c,frontMatter:()=>o,metadata:()=>i,toc:()=>p});var r=t(58168),n=(t(96540),t(15680));const o={title:"Subgraph",id:"subgraph",description:"Work with Subgraphs by grouping your Gems in Parent Gems",tags:["subgraph","group","SQL"]},s=void 0,i={unversionedId:"SQL/gems/subgraph/subgraph",id:"SQL/gems/subgraph/subgraph",title:"Subgraph",description:"Work with Subgraphs by grouping your Gems in Parent Gems",source:"@site/docs/SQL/gems/subgraph/subgraph.md",sourceDirName:"SQL/gems/subgraph",slug:"/SQL/gems/subgraph/",permalink:"/SQL/gems/subgraph/",draft:!1,tags:[{label:"subgraph",permalink:"/tags/subgraph"},{label:"group",permalink:"/tags/group"},{label:"SQL",permalink:"/tags/sql"}],version:"current",frontMatter:{title:"Subgraph",id:"subgraph",description:"Work with Subgraphs by grouping your Gems in Parent Gems",tags:["subgraph","group","SQL"]},sidebar:"defaultSidebar",previous:{title:"Custom",permalink:"/SQL/gems/custom/"},next:{title:"Execution",permalink:"/SQL/execution/"}},u={},p=[{value:"Basic Subgraph",id:"basic-subgraph",level:2},{value:"Create a Basic Subgraph",id:"create-a-basic-subgraph",level:3},{value:"Run a Basic Subgraph",id:"run-a-basic-subgraph",level:3},{value:"Add/Remove Port",id:"addremove-port",level:3},{value:"Code view",id:"code-view",level:2},{value:"Subgraph Configurations",id:"subgraph-configurations",level:2}],l={toc:p},g="wrapper";function c(e){let{components:a,...o}=e;return(0,n.yg)(g,(0,r.A)({},l,o,{components:a,mdxType:"MDXLayout"}),(0,n.yg)("h3",null,(0,n.yg)("span",{class:"badge rounded-pill text-bg-light"},"SQL Gem")),(0,n.yg)("p",null,"Subgraph Gems let you take multiple different Gems and wrap them under a single reusable parent Gem. In other words, they allow you to decompose complex logic into reusable components and simplify the visual view of your data model."),(0,n.yg)("h2",{id:"basic-subgraph"},"Basic Subgraph"),(0,n.yg)("p",null,"Basic Subgraphs are single-use containers that capture one or more Gems within a model. They are the equivalent of a nested CTE."),(0,n.yg)("p",null,"If you want to create a complex model with large sets of Transform and Join Gems, you can use a Basic Subgraph to group them together. This organizational approach enhances the visual clarity of your model by grouping various sections together under a common parent Gem. Additionally, it empowers you to break down intricate logic into modular components, thereby streamlining your data transformation processes."),(0,n.yg)("h3",{id:"create-a-basic-subgraph"},"Create a Basic Subgraph"),(0,n.yg)("p",null,"You can create a Basic Subgraph the same way you create other Gems."),(0,n.yg)("p",null,"To create a Basic Subgraph, follow these steps:"),(0,n.yg)("ol",null,(0,n.yg)("li",{parentName:"ol"},"Drag and drop the ",(0,n.yg)("strong",{parentName:"li"},"Subgraph")," Gem from the Subgraph menu, and connect it to any previously created Gem on your canvas.")),(0,n.yg)("p",null,(0,n.yg)("img",{alt:"create_basic_subgraph",src:t(47784).A,width:"2620",height:"1507"})),(0,n.yg)("ol",{start:2},(0,n.yg)("li",{parentName:"ol"},(0,n.yg)("p",{parentName:"li"},"Once you've added the Gem, click on it to open the subgraph canvas.")),(0,n.yg)("li",{parentName:"ol"},(0,n.yg)("p",{parentName:"li"},"On the subgraph canvas, add Gems to your Basic Subgraph by dragging and dropping from the Gems menu. You can even add a subgraph within the subgraph to create a nested subgraph."))),(0,n.yg)("h3",{id:"run-a-basic-subgraph"},"Run a Basic Subgraph"),(0,n.yg)("p",null,"A Basic Subgraph is functionally equivalent to the sequence of Gems that it contains. You can run a Basic Subgraph to see the output."),(0,n.yg)("p",null,"To run a Basic Subgraph, follow this step:"),(0,n.yg)("ul",null,(0,n.yg)("li",{parentName:"ul"},"On the Basic Subgraph Gem, click on the play button,")),(0,n.yg)("p",null,(0,n.yg)("img",{alt:"run_basic_subgraph",src:t(98713).A,width:"2620",height:"1536"})),(0,n.yg)("h3",{id:"addremove-port"},"Add/Remove Port"),(0,n.yg)("p",null,"Gems and subgraphs are operations or transformations that takes one or more tables as inputs. Therefore, Input ports signify the number of tables that a Basic Subgraph is taking in as inputs. There is no limit to the number of Input ports you can add."),(0,n.yg)("p",null,"While using a Subgraph, you can configure the number of Input ports as per the requirements. However, as with all SQL Gems, there can only be one Output port."),(0,n.yg)("p",null,"To add an Input port, follow these steps:"),(0,n.yg)("ol",null,(0,n.yg)("li",{parentName:"ol"},"On the subgraph canvas, click on the ",(0,n.yg)("strong",{parentName:"li"},"+")," button to add a new port."),(0,n.yg)("li",{parentName:"ol"},"Optional: You can click the ",(0,n.yg)("strong",{parentName:"li"},"Delete")," icon next to the input port you want to remove.")),(0,n.yg)("p",null,(0,n.yg)("img",{alt:"add_remove_port",src:t(18008).A,width:"2620",height:"1507"})),(0,n.yg)("h2",{id:"code-view"},"Code view"),(0,n.yg)("p",null,"Normally from the Code view, we create one Gem per CTE. However, since subgraphs are represented as nested CTEs in code, one subgraph can represent multiple nested SQL statements."),(0,n.yg)("p",null,(0,n.yg)("img",{alt:"subgraph_code_view",src:t(47013).A,width:"2620",height:"1507"})),(0,n.yg)("p",null,"If you'd like, you can create a subgraph from the Code view by writing multiple nested statements. Then toggle back to the Visual view to see an auto-generated Subgraph Gem based on your defined transformations."),(0,n.yg)("h2",{id:"subgraph-configurations"},"Subgraph Configurations"),(0,n.yg)("p",null,"You can configure your subgraphs by using either:"),(0,n.yg)("ul",null,(0,n.yg)("li",{parentName:"ul"},"Model-level configurations"),(0,n.yg)("li",{parentName:"ul"},"Project-level configurations")))}c.isMDXComponent=!0},18008:(e,a,t)=>{t.d(a,{A:()=>r});const r=t.p+"assets/images/add-remove-subgraph-port-c5ddff6e74896bc46009b676a569e5d8.png"},47784:(e,a,t)=>{t.d(a,{A:()=>r});const r=t.p+"assets/images/create-subgraph-95d23622a42f7cec13d7ec7253ef0f0b.png"},98713:(e,a,t)=>{t.d(a,{A:()=>r});const r=t.p+"assets/images/run-subgraph-44ab1c625c5fe4d7af88d34f2452f051.png"},47013:(e,a,t)=>{t.d(a,{A:()=>r});const r=t.p+"assets/images/subgraph-code-view-76a8d39380eae1e54bad5a9aea6b58de.png"}}]); \ No newline at end of file diff --git a/assets/js/07e49c2d.642c6d62.js b/assets/js/07e49c2d.642c6d62.js new file mode 100644 index 0000000000..b8317b9913 --- /dev/null +++ b/assets/js/07e49c2d.642c6d62.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[48045],{15680:(e,a,t)=>{t.d(a,{xA:()=>l,yg:()=>m});var r=t(96540);function n(e,a,t){return a in e?Object.defineProperty(e,a,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[a]=t,e}function o(e,a){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);a&&(r=r.filter((function(a){return Object.getOwnPropertyDescriptor(e,a).enumerable}))),t.push.apply(t,r)}return t}function s(e){for(var a=1;a=0||(n[t]=e[t]);return n}(e,a);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(n[t]=e[t])}return n}var u=r.createContext({}),p=function(e){var a=r.useContext(u),t=a;return e&&(t="function"==typeof e?e(a):s(s({},a),e)),t},l=function(e){var a=p(e.components);return r.createElement(u.Provider,{value:a},e.children)},g="mdxType",c={inlineCode:"code",wrapper:function(e){var a=e.children;return r.createElement(r.Fragment,{},a)}},h=r.forwardRef((function(e,a){var t=e.components,n=e.mdxType,o=e.originalType,u=e.parentName,l=i(e,["components","mdxType","originalType","parentName"]),g=p(t),h=n,m=g["".concat(u,".").concat(h)]||g[h]||c[h]||o;return t?r.createElement(m,s(s({ref:a},l),{},{components:t})):r.createElement(m,s({ref:a},l))}));function m(e,a){var t=arguments,n=a&&a.mdxType;if("string"==typeof e||n){var o=t.length,s=new Array(o);s[0]=h;var i={};for(var u in a)hasOwnProperty.call(a,u)&&(i[u]=a[u]);i.originalType=e,i[g]="string"==typeof e?e:n,s[1]=i;for(var p=2;p{t.r(a),t.d(a,{assets:()=>u,contentTitle:()=>s,default:()=>c,frontMatter:()=>o,metadata:()=>i,toc:()=>p});var r=t(58168),n=(t(96540),t(15680));const o={title:"Subgraph",id:"subgraph",description:"Work with Subgraphs by grouping your Gems in Parent Gems",tags:["subgraph","group","SQL"]},s=void 0,i={unversionedId:"SQL/gems/subgraph/subgraph",id:"SQL/gems/subgraph/subgraph",title:"Subgraph",description:"Work with Subgraphs by grouping your Gems in Parent Gems",source:"@site/docs/SQL/gems/subgraph/subgraph.md",sourceDirName:"SQL/gems/subgraph",slug:"/SQL/gems/subgraph/",permalink:"/SQL/gems/subgraph/",draft:!1,tags:[{label:"subgraph",permalink:"/tags/subgraph"},{label:"group",permalink:"/tags/group"},{label:"SQL",permalink:"/tags/sql"}],version:"current",frontMatter:{title:"Subgraph",id:"subgraph",description:"Work with Subgraphs by grouping your Gems in Parent Gems",tags:["subgraph","group","SQL"]},sidebar:"defaultSidebar",previous:{title:"Custom",permalink:"/SQL/gems/custom/"},next:{title:"Execution",permalink:"/SQL/execution/"}},u={},p=[{value:"Basic Subgraph",id:"basic-subgraph",level:2},{value:"Create a Basic Subgraph",id:"create-a-basic-subgraph",level:3},{value:"Run a Basic Subgraph",id:"run-a-basic-subgraph",level:3},{value:"Add/Remove Port",id:"addremove-port",level:3},{value:"Code view",id:"code-view",level:2},{value:"Subgraph Configurations",id:"subgraph-configurations",level:2}],l={toc:p},g="wrapper";function c(e){let{components:a,...o}=e;return(0,n.yg)(g,(0,r.A)({},l,o,{components:a,mdxType:"MDXLayout"}),(0,n.yg)("h3",null,(0,n.yg)("span",{class:"badge"},"SQL Gem")),(0,n.yg)("p",null,"Subgraph Gems let you take multiple different Gems and wrap them under a single reusable parent Gem. In other words, they allow you to decompose complex logic into reusable components and simplify the visual view of your data model."),(0,n.yg)("h2",{id:"basic-subgraph"},"Basic Subgraph"),(0,n.yg)("p",null,"Basic Subgraphs are single-use containers that capture one or more Gems within a model. They are the equivalent of a nested CTE."),(0,n.yg)("p",null,"If you want to create a complex model with large sets of Transform and Join Gems, you can use a Basic Subgraph to group them together. This organizational approach enhances the visual clarity of your model by grouping various sections together under a common parent Gem. Additionally, it empowers you to break down intricate logic into modular components, thereby streamlining your data transformation processes."),(0,n.yg)("h3",{id:"create-a-basic-subgraph"},"Create a Basic Subgraph"),(0,n.yg)("p",null,"You can create a Basic Subgraph the same way you create other Gems."),(0,n.yg)("p",null,"To create a Basic Subgraph, follow these steps:"),(0,n.yg)("ol",null,(0,n.yg)("li",{parentName:"ol"},"Drag and drop the ",(0,n.yg)("strong",{parentName:"li"},"Subgraph")," Gem from the Subgraph menu, and connect it to any previously created Gem on your canvas.")),(0,n.yg)("p",null,(0,n.yg)("img",{alt:"create_basic_subgraph",src:t(47784).A,width:"2620",height:"1507"})),(0,n.yg)("ol",{start:2},(0,n.yg)("li",{parentName:"ol"},(0,n.yg)("p",{parentName:"li"},"Once you've added the Gem, click on it to open the subgraph canvas.")),(0,n.yg)("li",{parentName:"ol"},(0,n.yg)("p",{parentName:"li"},"On the subgraph canvas, add Gems to your Basic Subgraph by dragging and dropping from the Gems menu. You can even add a subgraph within the subgraph to create a nested subgraph."))),(0,n.yg)("h3",{id:"run-a-basic-subgraph"},"Run a Basic Subgraph"),(0,n.yg)("p",null,"A Basic Subgraph is functionally equivalent to the sequence of Gems that it contains. You can run a Basic Subgraph to see the output."),(0,n.yg)("p",null,"To run a Basic Subgraph, follow this step:"),(0,n.yg)("ul",null,(0,n.yg)("li",{parentName:"ul"},"On the Basic Subgraph Gem, click on the play button,")),(0,n.yg)("p",null,(0,n.yg)("img",{alt:"run_basic_subgraph",src:t(98713).A,width:"2620",height:"1536"})),(0,n.yg)("h3",{id:"addremove-port"},"Add/Remove Port"),(0,n.yg)("p",null,"Gems and subgraphs are operations or transformations that takes one or more tables as inputs. Therefore, Input ports signify the number of tables that a Basic Subgraph is taking in as inputs. There is no limit to the number of Input ports you can add."),(0,n.yg)("p",null,"While using a Subgraph, you can configure the number of Input ports as per the requirements. However, as with all SQL Gems, there can only be one Output port."),(0,n.yg)("p",null,"To add an Input port, follow these steps:"),(0,n.yg)("ol",null,(0,n.yg)("li",{parentName:"ol"},"On the subgraph canvas, click on the ",(0,n.yg)("strong",{parentName:"li"},"+")," button to add a new port."),(0,n.yg)("li",{parentName:"ol"},"Optional: You can click the ",(0,n.yg)("strong",{parentName:"li"},"Delete")," icon next to the input port you want to remove.")),(0,n.yg)("p",null,(0,n.yg)("img",{alt:"add_remove_port",src:t(18008).A,width:"2620",height:"1507"})),(0,n.yg)("h2",{id:"code-view"},"Code view"),(0,n.yg)("p",null,"Normally from the Code view, we create one Gem per CTE. However, since subgraphs are represented as nested CTEs in code, one subgraph can represent multiple nested SQL statements."),(0,n.yg)("p",null,(0,n.yg)("img",{alt:"subgraph_code_view",src:t(47013).A,width:"2620",height:"1507"})),(0,n.yg)("p",null,"If you'd like, you can create a subgraph from the Code view by writing multiple nested statements. Then toggle back to the Visual view to see an auto-generated Subgraph Gem based on your defined transformations."),(0,n.yg)("h2",{id:"subgraph-configurations"},"Subgraph Configurations"),(0,n.yg)("p",null,"You can configure your subgraphs by using either:"),(0,n.yg)("ul",null,(0,n.yg)("li",{parentName:"ul"},"Model-level configurations"),(0,n.yg)("li",{parentName:"ul"},"Project-level configurations")))}c.isMDXComponent=!0},18008:(e,a,t)=>{t.d(a,{A:()=>r});const r=t.p+"assets/images/add-remove-subgraph-port-c5ddff6e74896bc46009b676a569e5d8.png"},47784:(e,a,t)=>{t.d(a,{A:()=>r});const r=t.p+"assets/images/create-subgraph-95d23622a42f7cec13d7ec7253ef0f0b.png"},98713:(e,a,t)=>{t.d(a,{A:()=>r});const r=t.p+"assets/images/run-subgraph-44ab1c625c5fe4d7af88d34f2452f051.png"},47013:(e,a,t)=>{t.d(a,{A:()=>r});const r=t.p+"assets/images/subgraph-code-view-76a8d39380eae1e54bad5a9aea6b58de.png"}}]); \ No newline at end of file diff --git a/assets/js/13b3561e.091b2efb.js b/assets/js/13b3561e.79dc4954.js similarity index 66% rename from assets/js/13b3561e.091b2efb.js rename to assets/js/13b3561e.79dc4954.js index 265b61a4d8..5618100d10 100644 --- a/assets/js/13b3561e.091b2efb.js +++ b/assets/js/13b3561e.79dc4954.js @@ -1 +1 @@ -"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[23581],{15680:(e,t,a)=>{a.d(t,{xA:()=>d,yg:()=>g});var n=a(96540);function r(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function l(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,n)}return a}function s(e){for(var t=1;t=0||(r[a]=e[a]);return r}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(r[a]=e[a])}return r}var o=n.createContext({}),m=function(e){var t=n.useContext(o),a=t;return e&&(a="function"==typeof e?e(t):s(s({},t),e)),a},d=function(e){var t=m(e.components);return n.createElement(o.Provider,{value:t},e.children)},c="mdxType",p={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},u=n.forwardRef((function(e,t){var a=e.components,r=e.mdxType,l=e.originalType,o=e.parentName,d=i(e,["components","mdxType","originalType","parentName"]),c=m(a),u=r,g=c["".concat(o,".").concat(u)]||c[u]||p[u]||l;return a?n.createElement(g,s(s({ref:t},d),{},{components:a})):n.createElement(g,s({ref:t},d))}));function g(e,t){var a=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var l=a.length,s=new Array(l);s[0]=u;var i={};for(var o in t)hasOwnProperty.call(t,o)&&(i[o]=t[o]);i.originalType=e,i[c]="string"==typeof e?e:r,s[1]=i;for(var m=2;m{a.r(t),a.d(t,{assets:()=>o,contentTitle:()=>s,default:()=>p,frontMatter:()=>l,metadata:()=>i,toc:()=>m});var n=a(58168),r=(a(96540),a(15680));const l={sidebar_position:12,title:"DataCleansing",id:"data-cleansing",description:"Standardize data formats and address missing or null values in the data.",tags:["gems","clean","format"]},s=void 0,i={unversionedId:"Spark/gems/transform/data-cleansing",id:"Spark/gems/transform/data-cleansing",title:"DataCleansing",description:"Standardize data formats and address missing or null values in the data.",source:"@site/docs/Spark/gems/transform/data-cleansing.md",sourceDirName:"Spark/gems/transform",slug:"/Spark/gems/transform/data-cleansing",permalink:"/Spark/gems/transform/data-cleansing",draft:!1,tags:[{label:"gems",permalink:"/tags/gems"},{label:"clean",permalink:"/tags/clean"},{label:"format",permalink:"/tags/format"}],version:"current",sidebarPosition:12,frontMatter:{sidebar_position:12,title:"DataCleansing",id:"data-cleansing",description:"Standardize data formats and address missing or null values in the data.",tags:["gems","clean","format"]},sidebar:"defaultSidebar",previous:{title:"BulkColumnExpressions",permalink:"/Spark/gems/transform/bulk-column-expressions"},next:{title:"DynamicSelect",permalink:"/Spark/gems/transform/dynamic-select"}},o={},m=[{value:"Parameters",id:"parameters",level:2},{value:"Example",id:"example",level:2}],d={toc:m},c="wrapper";function p(e){let{components:t,...l}=e;return(0,r.yg)(c,(0,n.A)({},d,l,{components:t,mdxType:"MDXLayout"}),(0,r.yg)("h3",null,(0,r.yg)("span",{class:"badge rounded-pill text-bg-light"},"Spark Gem")),(0,r.yg)("p",null,"Use the DataCleansing Gem to standardize data formats and address missing or null values in the data."),(0,r.yg)("h2",{id:"parameters"},"Parameters"),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:null},"Parameter"),(0,r.yg)("th",{parentName:"tr",align:null},"Description"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Select columns you want to clean"),(0,r.yg)("td",{parentName:"tr",align:null},"The set of columns on which to perform cleaning transformations")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Remove null data"),(0,r.yg)("td",{parentName:"tr",align:null},"The method used to remove null data")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Replace null values in column"),(0,r.yg)("td",{parentName:"tr",align:null},"The method used to replace null values")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Clean data"),(0,r.yg)("td",{parentName:"tr",align:null},"Different ways to standardize the format of data in columns")))),(0,r.yg)("h2",{id:"example"},"Example"),(0,r.yg)("p",null,"Assume you have a table that includes customer feedback on individual orders. In this scenario, some customers may not provide feedback, resulting in null values in the data. You can use the DataCleansing Gem to replace null values with the string ",(0,r.yg)("inlineCode",{parentName:"p"},"NA"),"."),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Replace null with string",src:a(82591).A,width:"2620",height:"1508"})))}p.isMDXComponent=!0},82591:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/replace-null-with-string-d6f58e1cf09526c92a2b5f45e327d7e7.png"}}]); \ No newline at end of file +"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[23581],{15680:(e,t,a)=>{a.d(t,{xA:()=>d,yg:()=>g});var n=a(96540);function r(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function l(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,n)}return a}function s(e){for(var t=1;t=0||(r[a]=e[a]);return r}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(r[a]=e[a])}return r}var o=n.createContext({}),m=function(e){var t=n.useContext(o),a=t;return e&&(a="function"==typeof e?e(t):s(s({},t),e)),a},d=function(e){var t=m(e.components);return n.createElement(o.Provider,{value:t},e.children)},c="mdxType",p={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},u=n.forwardRef((function(e,t){var a=e.components,r=e.mdxType,l=e.originalType,o=e.parentName,d=i(e,["components","mdxType","originalType","parentName"]),c=m(a),u=r,g=c["".concat(o,".").concat(u)]||c[u]||p[u]||l;return a?n.createElement(g,s(s({ref:t},d),{},{components:a})):n.createElement(g,s({ref:t},d))}));function g(e,t){var a=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var l=a.length,s=new Array(l);s[0]=u;var i={};for(var o in t)hasOwnProperty.call(t,o)&&(i[o]=t[o]);i.originalType=e,i[c]="string"==typeof e?e:r,s[1]=i;for(var m=2;m{a.r(t),a.d(t,{assets:()=>o,contentTitle:()=>s,default:()=>p,frontMatter:()=>l,metadata:()=>i,toc:()=>m});var n=a(58168),r=(a(96540),a(15680));const l={sidebar_position:12,title:"DataCleansing",id:"data-cleansing",description:"Standardize data formats and address missing or null values in the data.",tags:["gems","clean","format"]},s=void 0,i={unversionedId:"Spark/gems/transform/data-cleansing",id:"Spark/gems/transform/data-cleansing",title:"DataCleansing",description:"Standardize data formats and address missing or null values in the data.",source:"@site/docs/Spark/gems/transform/data-cleansing.md",sourceDirName:"Spark/gems/transform",slug:"/Spark/gems/transform/data-cleansing",permalink:"/Spark/gems/transform/data-cleansing",draft:!1,tags:[{label:"gems",permalink:"/tags/gems"},{label:"clean",permalink:"/tags/clean"},{label:"format",permalink:"/tags/format"}],version:"current",sidebarPosition:12,frontMatter:{sidebar_position:12,title:"DataCleansing",id:"data-cleansing",description:"Standardize data formats and address missing or null values in the data.",tags:["gems","clean","format"]},sidebar:"defaultSidebar",previous:{title:"BulkColumnExpressions",permalink:"/Spark/gems/transform/bulk-column-expressions"},next:{title:"DynamicSelect",permalink:"/Spark/gems/transform/dynamic-select"}},o={},m=[{value:"Parameters",id:"parameters",level:2},{value:"Example",id:"example",level:2}],d={toc:m},c="wrapper";function p(e){let{components:t,...l}=e;return(0,r.yg)(c,(0,n.A)({},d,l,{components:t,mdxType:"MDXLayout"}),(0,r.yg)("h3",null,(0,r.yg)("span",{class:"badge"},"Spark Gem")),(0,r.yg)("p",null,"Use the DataCleansing Gem to standardize data formats and address missing or null values in the data."),(0,r.yg)("h2",{id:"parameters"},"Parameters"),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:null},"Parameter"),(0,r.yg)("th",{parentName:"tr",align:null},"Description"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Select columns you want to clean"),(0,r.yg)("td",{parentName:"tr",align:null},"The set of columns on which to perform cleaning transformations")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Remove null data"),(0,r.yg)("td",{parentName:"tr",align:null},"The method used to remove null data")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Replace null values in column"),(0,r.yg)("td",{parentName:"tr",align:null},"The method used to replace null values")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Clean data"),(0,r.yg)("td",{parentName:"tr",align:null},"Different ways to standardize the format of data in columns")))),(0,r.yg)("h2",{id:"example"},"Example"),(0,r.yg)("p",null,"Assume you have a table that includes customer feedback on individual orders. In this scenario, some customers may not provide feedback, resulting in null values in the data. You can use the DataCleansing Gem to replace null values with the string ",(0,r.yg)("inlineCode",{parentName:"p"},"NA"),"."),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Replace null with string",src:a(82591).A,width:"2620",height:"1508"})))}p.isMDXComponent=!0},82591:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/replace-null-with-string-d6f58e1cf09526c92a2b5f45e327d7e7.png"}}]); \ No newline at end of file diff --git a/assets/js/1d7b424d.a5664aae.js b/assets/js/1d7b424d.a5664aae.js new file mode 100644 index 0000000000..4c2a5678f8 --- /dev/null +++ b/assets/js/1d7b424d.a5664aae.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[73041],{15680:(e,t,n)=>{n.d(t,{xA:()=>c,yg:()=>d});var a=n(96540);function o(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function r(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function i(e){for(var t=1;t=0||(o[n]=e[n]);return o}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(o[n]=e[n])}return o}var p=a.createContext({}),l=function(e){var t=a.useContext(p),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},c=function(e){var t=l(e.components);return a.createElement(p.Provider,{value:t},e.children)},m="mdxType",g={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,o=e.mdxType,r=e.originalType,p=e.parentName,c=s(e,["components","mdxType","originalType","parentName"]),m=l(n),u=o,d=m["".concat(p,".").concat(u)]||m[u]||g[u]||r;return n?a.createElement(d,i(i({ref:t},c),{},{components:n})):a.createElement(d,i({ref:t},c))}));function d(e,t){var n=arguments,o=t&&t.mdxType;if("string"==typeof e||o){var r=n.length,i=new Array(r);i[0]=u;var s={};for(var p in t)hasOwnProperty.call(t,p)&&(s[p]=t[p]);s.originalType=e,s[m]="string"==typeof e?e:o,i[1]=s;for(var l=2;l{n.r(t),n.d(t,{assets:()=>p,contentTitle:()=>i,default:()=>g,frontMatter:()=>r,metadata:()=>s,toc:()=>l});var a=n(58168),o=(n(96540),n(15680));const r={title:"Join",id:"data-joins",description:"Join data from multiple tables",sidebar_position:3,tags:["join","data","gem","transformation"]},i=void 0,s={unversionedId:"SQL/gems/data-joins",id:"SQL/gems/data-joins",title:"Join",description:"Join data from multiple tables",source:"@site/docs/SQL/gems/joins.md",sourceDirName:"SQL/gems",slug:"/SQL/gems/data-joins",permalink:"/SQL/gems/data-joins",draft:!1,tags:[{label:"join",permalink:"/tags/join"},{label:"data",permalink:"/tags/data"},{label:"gem",permalink:"/tags/gem"},{label:"transformation",permalink:"/tags/transformation"}],version:"current",sidebarPosition:3,frontMatter:{title:"Join",id:"data-joins",description:"Join data from multiple tables",sidebar_position:3,tags:["join","data","gem","transformation"]},sidebar:"defaultSidebar",previous:{title:"Flatten Schema",permalink:"/SQL/gems/transform/flattenschema"},next:{title:"Custom",permalink:"/SQL/gems/custom/"}},p={},l=[{value:"Add a port",id:"add-a-port",level:2},{value:"Run",id:"run",level:2}],c={toc:l},m="wrapper";function g(e){let{components:t,...r}=e;return(0,o.yg)(m,(0,a.A)({},c,r,{components:t,mdxType:"MDXLayout"}),(0,o.yg)("h3",null,(0,o.yg)("span",{class:"badge"},"SQL Gem")),(0,o.yg)("p",null,"Upon opening the Join Gem, you can see a pop-up which provides several helpful features."),(0,o.yg)("p",null,(0,o.yg)("img",{alt:"Join definition",src:n(89133).A,width:"2880",height:"1084"})),(0,o.yg)("p",null,"For transparency, you can always see the ",(0,o.yg)("strong",{parentName:"p"},"(1) Input schema")," on the left hand-side, ",(0,o.yg)("strong",{parentName:"p"},"(2) Errors")," in the footer, and have the ability to ",(0,o.yg)("strong",{parentName:"p"},"(3) Run")," the Gem on the top right."),(0,o.yg)("p",null,"To fill-in our ",(0,o.yg)("strong",{parentName:"p"},"(5) Join condition")," within the ",(0,o.yg)("strong",{parentName:"p"},"(4) Conditions")," section, start typing the input table name and key. For example, if we have two input tables, ",(0,o.yg)("inlineCode",{parentName:"p"},"nation")," and ",(0,o.yg)("inlineCode",{parentName:"p"},"customer"),", type ",(0,o.yg)("inlineCode",{parentName:"p"},"nation.nationkey = customers.nationkey"),". This condition finds a nation based on the nationkey feild for every single customer."),(0,o.yg)("p",null,"When you\u2019re writing your join conditions, you\u2019ll see available functions and columns to speed up your development. When the autocomplete appears, press \u2191, \u2193 to navigate between the suggestions and press tab to accept the suggestion."),(0,o.yg)("p",null,"Select the ",(0,o.yg)("strong",{parentName:"p"},"(6)Join Type")," according to the provider, e.g. ",(0,o.yg)("a",{parentName:"p",href:"https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-join.html"},"Databricks")," or ",(0,o.yg)("a",{parentName:"p",href:"https://docs.snowflake.com/en/user-guide/querying-joins"},"Snowflake.")),(0,o.yg)("p",null,"The ",(0,o.yg)("strong",{parentName:"p"},"(7) Expressions")," tab allows you to define the set of output columns that are going to be returned from the Gem. Here we leave it empty, which by default passes through all the input columns, from both of the joined sources, without any modifications."),(0,o.yg)("p",null,"To rename our Gem to describe its functionality, click on it\u2019s ",(0,o.yg)("strong",{parentName:"p"},"(8) Name")," or try the ",(0,o.yg)("strong",{parentName:"p"},"Auto-label")," option. Gem names are going to be used as query names, which means that they should be concise and composed of alphanumeric characters with no spaces."),(0,o.yg)("p",null,"Once done, press ",(0,o.yg)("strong",{parentName:"p"},"(9) Save.")),(0,o.yg)("admonition",{type:"info"},(0,o.yg)("p",{parentName:"admonition"},"To learn more about the Join Gem UI, see ",(0,o.yg)("a",{parentName:"p",href:"/concepts/project/gems"},"this page")," which illustrates features common to all Gems.")),(0,o.yg)("h2",{id:"add-a-port"},"Add a port"),(0,o.yg)("p",null,"It's easy to add an extra source to a Join Gem. Just connect and configure."),(0,o.yg)("p",null,(0,o.yg)("img",{alt:"JoinPort",src:n(45978).A,width:"2880",height:"1084"})),(0,o.yg)("p",null,"Once the source is ",(0,o.yg)("strong",{parentName:"p"},"(1) connected"),", click to ",(0,o.yg)("strong",{parentName:"p"},"(2) edit")," the ports."),(0,o.yg)("p",null,"Update the ",(0,o.yg)("strong",{parentName:"p"},"(3) port name")," from the default input ",(0,o.yg)("inlineCode",{parentName:"p"},"in2")," to a more descriptive name such as the table name, in this case ",(0,o.yg)("inlineCode",{parentName:"p"},"NATIONS"),"."),(0,o.yg)("p",null,"Fill in the ",(0,o.yg)("strong",{parentName:"p"},"(4) Join condition")," for the new table and specify the ",(0,o.yg)("strong",{parentName:"p"},"(5) Join type"),"."),(0,o.yg)("p",null,"Click ",(0,o.yg)("strong",{parentName:"p"},"(6) Save"),"."),(0,o.yg)("h2",{id:"run"},"Run"),(0,o.yg)("p",null,"When your Join Gem has the desired inputs, conditions and expressions, ",(0,o.yg)("strong",{parentName:"p"},"(7) run")," interactively to view ",(0,o.yg)("strong",{parentName:"p"},"(8)",(0,o.yg)("a",{parentName:"strong",href:"/SQL/execution/data-explorer"},"sample data"),".")))}g.isMDXComponent=!0},45978:(e,t,n)=>{n.d(t,{A:()=>a});const a=n.p+"assets/images/JoinAddPort-6d18181c8f0af98b1988159c70c72cd0.png"},89133:(e,t,n)=>{n.d(t,{A:()=>a});const a=n.p+"assets/images/JoinCondition-37f13df39da82e29e11bea4af6e65b53.png"}}]); \ No newline at end of file diff --git a/assets/js/1d7b424d.b6e07850.js b/assets/js/1d7b424d.b6e07850.js deleted file mode 100644 index 57cb63f9a5..0000000000 --- a/assets/js/1d7b424d.b6e07850.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[73041],{15680:(e,t,n)=>{n.d(t,{xA:()=>c,yg:()=>d});var a=n(96540);function o(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function r(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function i(e){for(var t=1;t=0||(o[n]=e[n]);return o}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(o[n]=e[n])}return o}var p=a.createContext({}),l=function(e){var t=a.useContext(p),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},c=function(e){var t=l(e.components);return a.createElement(p.Provider,{value:t},e.children)},m="mdxType",g={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,o=e.mdxType,r=e.originalType,p=e.parentName,c=s(e,["components","mdxType","originalType","parentName"]),m=l(n),u=o,d=m["".concat(p,".").concat(u)]||m[u]||g[u]||r;return n?a.createElement(d,i(i({ref:t},c),{},{components:n})):a.createElement(d,i({ref:t},c))}));function d(e,t){var n=arguments,o=t&&t.mdxType;if("string"==typeof e||o){var r=n.length,i=new Array(r);i[0]=u;var s={};for(var p in t)hasOwnProperty.call(t,p)&&(s[p]=t[p]);s.originalType=e,s[m]="string"==typeof e?e:o,i[1]=s;for(var l=2;l{n.r(t),n.d(t,{assets:()=>p,contentTitle:()=>i,default:()=>g,frontMatter:()=>r,metadata:()=>s,toc:()=>l});var a=n(58168),o=(n(96540),n(15680));const r={title:"Join",id:"data-joins",description:"Join data from multiple tables",sidebar_position:3,tags:["join","data","gem","transformation"]},i=void 0,s={unversionedId:"SQL/gems/data-joins",id:"SQL/gems/data-joins",title:"Join",description:"Join data from multiple tables",source:"@site/docs/SQL/gems/joins.md",sourceDirName:"SQL/gems",slug:"/SQL/gems/data-joins",permalink:"/SQL/gems/data-joins",draft:!1,tags:[{label:"join",permalink:"/tags/join"},{label:"data",permalink:"/tags/data"},{label:"gem",permalink:"/tags/gem"},{label:"transformation",permalink:"/tags/transformation"}],version:"current",sidebarPosition:3,frontMatter:{title:"Join",id:"data-joins",description:"Join data from multiple tables",sidebar_position:3,tags:["join","data","gem","transformation"]},sidebar:"defaultSidebar",previous:{title:"Flatten Schema",permalink:"/SQL/gems/transform/flattenschema"},next:{title:"Custom",permalink:"/SQL/gems/custom/"}},p={},l=[{value:"Add a port",id:"add-a-port",level:2},{value:"Run",id:"run",level:2}],c={toc:l},m="wrapper";function g(e){let{components:t,...r}=e;return(0,o.yg)(m,(0,a.A)({},c,r,{components:t,mdxType:"MDXLayout"}),(0,o.yg)("h3",null,(0,o.yg)("span",{class:"badge rounded-pill text-bg-light"},"SQL Gem")),(0,o.yg)("p",null,"Upon opening the Join Gem, you can see a pop-up which provides several helpful features."),(0,o.yg)("p",null,(0,o.yg)("img",{alt:"Join definition",src:n(89133).A,width:"2880",height:"1084"})),(0,o.yg)("p",null,"For transparency, you can always see the ",(0,o.yg)("strong",{parentName:"p"},"(1) Input schema")," on the left hand-side, ",(0,o.yg)("strong",{parentName:"p"},"(2) Errors")," in the footer, and have the ability to ",(0,o.yg)("strong",{parentName:"p"},"(3) Run")," the Gem on the top right."),(0,o.yg)("p",null,"To fill-in our ",(0,o.yg)("strong",{parentName:"p"},"(5) Join condition")," within the ",(0,o.yg)("strong",{parentName:"p"},"(4) Conditions")," section, start typing the input table name and key. For example, if we have two input tables, ",(0,o.yg)("inlineCode",{parentName:"p"},"nation")," and ",(0,o.yg)("inlineCode",{parentName:"p"},"customer"),", type ",(0,o.yg)("inlineCode",{parentName:"p"},"nation.nationkey = customers.nationkey"),". This condition finds a nation based on the nationkey feild for every single customer."),(0,o.yg)("p",null,"When you\u2019re writing your join conditions, you\u2019ll see available functions and columns to speed up your development. When the autocomplete appears, press \u2191, \u2193 to navigate between the suggestions and press tab to accept the suggestion."),(0,o.yg)("p",null,"Select the ",(0,o.yg)("strong",{parentName:"p"},"(6)Join Type")," according to the provider, e.g. ",(0,o.yg)("a",{parentName:"p",href:"https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-join.html"},"Databricks")," or ",(0,o.yg)("a",{parentName:"p",href:"https://docs.snowflake.com/en/user-guide/querying-joins"},"Snowflake.")),(0,o.yg)("p",null,"The ",(0,o.yg)("strong",{parentName:"p"},"(7) Expressions")," tab allows you to define the set of output columns that are going to be returned from the Gem. Here we leave it empty, which by default passes through all the input columns, from both of the joined sources, without any modifications."),(0,o.yg)("p",null,"To rename our Gem to describe its functionality, click on it\u2019s ",(0,o.yg)("strong",{parentName:"p"},"(8) Name")," or try the ",(0,o.yg)("strong",{parentName:"p"},"Auto-label")," option. Gem names are going to be used as query names, which means that they should be concise and composed of alphanumeric characters with no spaces."),(0,o.yg)("p",null,"Once done, press ",(0,o.yg)("strong",{parentName:"p"},"(9) Save.")),(0,o.yg)("admonition",{type:"info"},(0,o.yg)("p",{parentName:"admonition"},"To learn more about the Join Gem UI, see ",(0,o.yg)("a",{parentName:"p",href:"/concepts/project/gems"},"this page")," which illustrates features common to all Gems.")),(0,o.yg)("h2",{id:"add-a-port"},"Add a port"),(0,o.yg)("p",null,"It's easy to add an extra source to a Join Gem. Just connect and configure."),(0,o.yg)("p",null,(0,o.yg)("img",{alt:"JoinPort",src:n(45978).A,width:"2880",height:"1084"})),(0,o.yg)("p",null,"Once the source is ",(0,o.yg)("strong",{parentName:"p"},"(1) connected"),", click to ",(0,o.yg)("strong",{parentName:"p"},"(2) edit")," the ports."),(0,o.yg)("p",null,"Update the ",(0,o.yg)("strong",{parentName:"p"},"(3) port name")," from the default input ",(0,o.yg)("inlineCode",{parentName:"p"},"in2")," to a more descriptive name such as the table name, in this case ",(0,o.yg)("inlineCode",{parentName:"p"},"NATIONS"),"."),(0,o.yg)("p",null,"Fill in the ",(0,o.yg)("strong",{parentName:"p"},"(4) Join condition")," for the new table and specify the ",(0,o.yg)("strong",{parentName:"p"},"(5) Join type"),"."),(0,o.yg)("p",null,"Click ",(0,o.yg)("strong",{parentName:"p"},"(6) Save"),"."),(0,o.yg)("h2",{id:"run"},"Run"),(0,o.yg)("p",null,"When your Join Gem has the desired inputs, conditions and expressions, ",(0,o.yg)("strong",{parentName:"p"},"(7) run")," interactively to view ",(0,o.yg)("strong",{parentName:"p"},"(8)",(0,o.yg)("a",{parentName:"strong",href:"/SQL/execution/data-explorer"},"sample data"),".")))}g.isMDXComponent=!0},45978:(e,t,n)=>{n.d(t,{A:()=>a});const a=n.p+"assets/images/JoinAddPort-6d18181c8f0af98b1988159c70c72cd0.png"},89133:(e,t,n)=>{n.d(t,{A:()=>a});const a=n.p+"assets/images/JoinCondition-37f13df39da82e29e11bea4af6e65b53.png"}}]); \ No newline at end of file diff --git a/assets/js/23d439be.5f3a2fdb.js b/assets/js/23d439be.5f3a2fdb.js deleted file mode 100644 index 9e79373819..0000000000 --- a/assets/js/23d439be.5f3a2fdb.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[62745],{15680:(e,n,a)=>{a.d(n,{xA:()=>s,yg:()=>g});var t=a(96540);function r(e,n,a){return n in e?Object.defineProperty(e,n,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[n]=a,e}function o(e,n){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var t=Object.getOwnPropertySymbols(e);n&&(t=t.filter((function(n){return Object.getOwnPropertyDescriptor(e,n).enumerable}))),a.push.apply(a,t)}return a}function i(e){for(var n=1;n=0||(r[a]=e[a]);return r}(e,n);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(t=0;t=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(r[a]=e[a])}return r}var d=t.createContext({}),u=function(e){var n=t.useContext(d),a=n;return e&&(a="function"==typeof e?e(n):i(i({},n),e)),a},s=function(e){var n=u(e.components);return t.createElement(d.Provider,{value:n},e.children)},c="mdxType",p={inlineCode:"code",wrapper:function(e){var n=e.children;return t.createElement(t.Fragment,{},n)}},m=t.forwardRef((function(e,n){var a=e.components,r=e.mdxType,o=e.originalType,d=e.parentName,s=l(e,["components","mdxType","originalType","parentName"]),c=u(a),m=r,g=c["".concat(d,".").concat(m)]||c[m]||p[m]||o;return a?t.createElement(g,i(i({ref:n},s),{},{components:a})):t.createElement(g,i({ref:n},s))}));function g(e,n){var a=arguments,r=n&&n.mdxType;if("string"==typeof e||r){var o=a.length,i=new Array(o);i[0]=m;var l={};for(var d in n)hasOwnProperty.call(n,d)&&(l[d]=n[d]);l.originalType=e,l[c]="string"==typeof e?e:r,i[1]=l;for(var u=2;u{a.d(n,{A:()=>i});var t=a(96540),r=a(20053);const o={tabItem:"tabItem_Ymn6"};function i(e){let{children:n,hidden:a,className:i}=e;return t.createElement("div",{role:"tabpanel",className:(0,r.A)(o.tabItem,i),hidden:a},n)}},11470:(e,n,a)=>{a.d(n,{A:()=>N});var t=a(58168),r=a(96540),o=a(20053),i=a(23104),l=a(56347),d=a(57485),u=a(31682),s=a(89466);function c(e){return function(e){return r.Children.map(e,(e=>{if(!e||(0,r.isValidElement)(e)&&function(e){const{props:n}=e;return!!n&&"object"==typeof n&&"value"in n}(e))return e;throw new Error(`Docusaurus error: Bad child <${"string"==typeof e.type?e.type:e.type.name}>: all children of the component should be , and every should have a unique "value" prop.`)}))?.filter(Boolean)??[]}(e).map((e=>{let{props:{value:n,label:a,attributes:t,default:r}}=e;return{value:n,label:a,attributes:t,default:r}}))}function p(e){const{values:n,children:a}=e;return(0,r.useMemo)((()=>{const e=n??c(a);return function(e){const n=(0,u.X)(e,((e,n)=>e.value===n.value));if(n.length>0)throw new Error(`Docusaurus error: Duplicate values "${n.map((e=>e.value)).join(", ")}" found in . Every value needs to be unique.`)}(e),e}),[n,a])}function m(e){let{value:n,tabValues:a}=e;return a.some((e=>e.value===n))}function g(e){let{queryString:n=!1,groupId:a}=e;const t=(0,l.W6)(),o=function(e){let{queryString:n=!1,groupId:a}=e;if("string"==typeof n)return n;if(!1===n)return null;if(!0===n&&!a)throw new Error('Docusaurus error: The component groupId prop is required if queryString=true, because this value is used as the search param name. You can also provide an explicit value such as queryString="my-search-param".');return a??null}({queryString:n,groupId:a});return[(0,d.aZ)(o),(0,r.useCallback)((e=>{if(!o)return;const n=new URLSearchParams(t.location.search);n.set(o,e),t.replace({...t.location,search:n.toString()})}),[o,t])]}function y(e){const{defaultValue:n,queryString:a=!1,groupId:t}=e,o=p(e),[i,l]=(0,r.useState)((()=>function(e){let{defaultValue:n,tabValues:a}=e;if(0===a.length)throw new Error("Docusaurus error: the component requires at least one children component");if(n){if(!m({value:n,tabValues:a}))throw new Error(`Docusaurus error: The has a defaultValue "${n}" but none of its children has the corresponding value. Available values are: ${a.map((e=>e.value)).join(", ")}. If you intend to show no default tab, use defaultValue={null} instead.`);return n}const t=a.find((e=>e.default))??a[0];if(!t)throw new Error("Unexpected error: 0 tabValues");return t.value}({defaultValue:n,tabValues:o}))),[d,u]=g({queryString:a,groupId:t}),[c,y]=function(e){let{groupId:n}=e;const a=function(e){return e?`docusaurus.tab.${e}`:null}(n),[t,o]=(0,s.Dv)(a);return[t,(0,r.useCallback)((e=>{a&&o.set(e)}),[a,o])]}({groupId:t}),w=(()=>{const e=d??c;return m({value:e,tabValues:o})?e:null})();(0,r.useLayoutEffect)((()=>{w&&l(w)}),[w]);return{selectedValue:i,selectValue:(0,r.useCallback)((e=>{if(!m({value:e,tabValues:o}))throw new Error(`Can't select invalid tab value=${e}`);l(e),u(e),y(e)}),[u,y,o]),tabValues:o}}var w=a(92303);const f={tabList:"tabList__CuJ",tabItem:"tabItem_LNqP"};function b(e){let{className:n,block:a,selectedValue:l,selectValue:d,tabValues:u}=e;const s=[],{blockElementScrollPositionUntilNextRender:c}=(0,i.a_)(),p=e=>{const n=e.currentTarget,a=s.indexOf(n),t=u[a].value;t!==l&&(c(n),d(t))},m=e=>{let n=null;switch(e.key){case"Enter":p(e);break;case"ArrowRight":{const a=s.indexOf(e.currentTarget)+1;n=s[a]??s[0];break}case"ArrowLeft":{const a=s.indexOf(e.currentTarget)-1;n=s[a]??s[s.length-1];break}}n?.focus()};return r.createElement("ul",{role:"tablist","aria-orientation":"horizontal",className:(0,o.A)("tabs",{"tabs--block":a},n)},u.map((e=>{let{value:n,label:a,attributes:i}=e;return r.createElement("li",(0,t.A)({role:"tab",tabIndex:l===n?0:-1,"aria-selected":l===n,key:n,ref:e=>s.push(e),onKeyDown:m,onClick:p},i,{className:(0,o.A)("tabs__item",f.tabItem,i?.className,{"tabs__item--active":l===n})}),a??n)})))}function h(e){let{lazy:n,children:a,selectedValue:t}=e;const o=(Array.isArray(a)?a:[a]).filter(Boolean);if(n){const e=o.find((e=>e.props.value===t));return e?(0,r.cloneElement)(e,{className:"margin-top--md"}):null}return r.createElement("div",{className:"margin-top--md"},o.map(((e,n)=>(0,r.cloneElement)(e,{key:n,hidden:e.props.value!==t}))))}function v(e){const n=y(e);return r.createElement("div",{className:(0,o.A)("tabs-container",f.tabList)},r.createElement(b,(0,t.A)({},e,n)),r.createElement(h,(0,t.A)({},e,n)))}function N(e){const n=(0,w.A)();return r.createElement(v,(0,t.A)({key:String(n)},e))}},42933:(e,n,a)=>{a.r(n),a.d(n,{assets:()=>s,contentTitle:()=>d,default:()=>g,frontMatter:()=>l,metadata:()=>u,toc:()=>c});var t=a(58168),r=(a(96540),a(15680)),o=a(11470),i=a(19365);const l={sidebar_position:9,title:"WindowFunction",id:"window-function",description:"Aggregate and transform Windowed data",tags:["gems","window","aggregate"]},d=void 0,u={unversionedId:"Spark/gems/transform/window-function",id:"Spark/gems/transform/window-function",title:"WindowFunction",description:"Aggregate and transform Windowed data",source:"@site/docs/Spark/gems/transform/window-function.md",sourceDirName:"Spark/gems/transform",slug:"/Spark/gems/transform/window-function",permalink:"/Spark/gems/transform/window-function",draft:!1,tags:[{label:"gems",permalink:"/tags/gems"},{label:"window",permalink:"/tags/window"},{label:"aggregate",permalink:"/tags/aggregate"}],version:"current",sidebarPosition:9,frontMatter:{sidebar_position:9,title:"WindowFunction",id:"window-function",description:"Aggregate and transform Windowed data",tags:["gems","window","aggregate"]},sidebar:"defaultSidebar",previous:{title:"SetOperation",permalink:"/Spark/gems/transform/set-operation"},next:{title:"BulkColumnRename",permalink:"/Spark/gems/transform/bulk-column-rename"}},s={},c=[{value:"Parameters",id:"parameters",level:3},{value:"Examples",id:"examples",level:3},{value:"Ranking Functions with Window",id:"ranking-functions-with-window",level:4},{value:"Analytical Functions with Window",id:"analytical-functions-with-window",level:4},{value:"Aggregate Functions with Window",id:"aggregate-functions-with-window",level:4}],p={toc:c},m="wrapper";function g(e){let{components:n,...l}=e;return(0,r.yg)(m,(0,t.A)({},p,l,{components:n,mdxType:"MDXLayout"}),(0,r.yg)("h3",null,(0,r.yg)("span",{class:"badge rounded-pill text-bg-light"},"Spark Gem")),(0,r.yg)("p",null,"The WindowFunction lets you define a ",(0,r.yg)("strong",{parentName:"p"},"WindowSpec")," and apply window functions on a DataFrame."),(0,r.yg)("h3",{id:"parameters"},"Parameters"),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:null},"Parameter"),(0,r.yg)("th",{parentName:"tr",align:null},"Description"),(0,r.yg)("th",{parentName:"tr",align:null},"Required"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"DataFrame"),(0,r.yg)("td",{parentName:"tr",align:null},"Input DataFrame"),(0,r.yg)("td",{parentName:"tr",align:null},"True")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Target column"),(0,r.yg)("td",{parentName:"tr",align:null},"Output Column name"),(0,r.yg)("td",{parentName:"tr",align:null},"True")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Source expression"),(0,r.yg)("td",{parentName:"tr",align:null},"Window function expression to perform over the created Window"),(0,r.yg)("td",{parentName:"tr",align:null},"True")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Order columns"),(0,r.yg)("td",{parentName:"tr",align:null},"Columns to order by in Window. Must be a numeric type column if a ",(0,r.yg)("inlineCode",{parentName:"td"},"Range Frame")," is selected"),(0,r.yg)("td",{parentName:"tr",align:null},"Required when ",(0,r.yg)("inlineCode",{parentName:"td"},"Source expression")," has a Ranking/Analytical function ",(0,r.yg)("strong",{parentName:"td"},"OR")," when ",(0,r.yg)("inlineCode",{parentName:"td"},"Range Frame")," is selected")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Partition column"),(0,r.yg)("td",{parentName:"tr",align:null},"Column to partition by in Window"),(0,r.yg)("td",{parentName:"tr",align:null},"False")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Row frame"),(0,r.yg)("td",{parentName:"tr",align:null},"Row based frame boundary to apply on Window"),(0,r.yg)("td",{parentName:"tr",align:null},"False")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Range frame"),(0,r.yg)("td",{parentName:"tr",align:null},"Range based frame boundary to apply on Window"),(0,r.yg)("td",{parentName:"tr",align:null},"False")))),(0,r.yg)("admonition",{type:"info"},(0,r.yg)("p",{parentName:"admonition"},"When ",(0,r.yg)("inlineCode",{parentName:"p"},"Order Columns")," are not defined, an unbounded window frame ",(0,r.yg)("inlineCode",{parentName:"p"},"(rowFrame, unboundedPreceding, unboundedFollowing)")," is used by default.")),(0,r.yg)("admonition",{type:"info"},(0,r.yg)("p",{parentName:"admonition"},"When ",(0,r.yg)("inlineCode",{parentName:"p"},"Order Columns")," are defined, a growing window frame ",(0,r.yg)("inlineCode",{parentName:"p"},"(rangeFrame, unboundedPreceding, currentRow)")," is used by default.")),(0,r.yg)("h3",{id:"examples"},"Examples"),(0,r.yg)("hr",null),(0,r.yg)("h4",{id:"ranking-functions-with-window"},"Ranking Functions with Window"),(0,r.yg)("p",null,"Examples of ranking functions are: ",(0,r.yg)("inlineCode",{parentName:"p"},"row_number()"),", ",(0,r.yg)("inlineCode",{parentName:"p"},"rank()"),", ",(0,r.yg)("inlineCode",{parentName:"p"},"dense_rank()")," and ",(0,r.yg)("inlineCode",{parentName:"p"},"ntile()")),(0,r.yg)("admonition",{type:"info"},(0,r.yg)("p",{parentName:"admonition"},"Only the default window frame ",(0,r.yg)("inlineCode",{parentName:"p"},"(rowFrame, unboundedPreceding, currentRow)")," can be used with Ranking functions")),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Example usage of Window - Ranking",src:a(39810).A,width:"1920",height:"1080"})),(0,r.yg)(o.A,{mdxType:"Tabs"},(0,r.yg)(i.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-py"},'def rank_cust_orders(spark: SparkSession, in0: DataFrame) -> DataFrame:\n return in0\\\n .withColumn(\n "order_number",\n row_number().over(\n Window.partitionBy(col("customer_id")).orderBy(col("order_date").asc())\n )\n )\\\n .withColumn(\n "order_recency",\n ntile(2).over(\n Window.partitionBy(col("customer_id")).orderBy(col("order_date").asc())\n )\n )\n'))),(0,r.yg)(i.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-scala"},'object rank_cust_orders {\n\n def apply(spark: SparkSession, in1: DataFrame): DataFrame = {\n import org.apache.spark.sql.expressions.{Window, WindowSpec}\n in1\n .withColumn(\n "order_number",\n row_number().over(\n Window.partitionBy(col("customer_id")).orderBy(col("order_date").asc)\n )\n )\n .withColumn(\n "order_recency",\n ntile(2).over(\n Window.partitionBy(col("customer_id")).orderBy(col("order_date").asc)\n )\n )\n }\n}\n')))),(0,r.yg)("hr",null),(0,r.yg)("h4",{id:"analytical-functions-with-window"},"Analytical Functions with Window"),(0,r.yg)("p",null,"Examples of analytical functions are: ",(0,r.yg)("inlineCode",{parentName:"p"},"lead()"),", ",(0,r.yg)("inlineCode",{parentName:"p"},"lag()"),", ",(0,r.yg)("inlineCode",{parentName:"p"},"cume_dist()"),", etc."),(0,r.yg)("admonition",{type:"info"},(0,r.yg)("p",{parentName:"admonition"},"Window frame for ",(0,r.yg)("inlineCode",{parentName:"p"},"lead()")," and ",(0,r.yg)("inlineCode",{parentName:"p"},"lag()")," can not be specified.")),(0,r.yg)("admonition",{type:"info"},(0,r.yg)("p",{parentName:"admonition"},"Only the default window frame ",(0,r.yg)("inlineCode",{parentName:"p"},"(rangeFrame, unboundedPreceding, currentRow)")," can be used with ",(0,r.yg)("inlineCode",{parentName:"p"},"cume_dist()"))),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Example usage of Window - Analytical",src:a(80232).A,width:"1920",height:"1080"})),(0,r.yg)(o.A,{mdxType:"Tabs"},(0,r.yg)(i.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-py"},'def analyse_orders(spark: SparkSession, in0: DataFrame) -> DataFrame:\n return in0\\\n .withColumn(\n "previous_order_date",\n lag(col("order_date")).over(\n Window.partitionBy(col("customer_id")).orderBy(col("order_id").asc())\n )\n )\\\n .withColumn(\n "next_order_date",\n lead(col("order_date")).over(\n Window.partitionBy(col("customer_id")).orderBy(col("order_id").asc())\n )\n )\n'))),(0,r.yg)(i.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-scala"},'object analyse_orders {\n def apply(spark: SparkSession, in1: DataFrame): DataFrame = {\n import org.apache.spark.sql.expressions.{Window, WindowSpec}\n in1\n .withColumn(\n "previous_order_date",\n lag(col("order_date")).over(\n Window.partitionBy(col("customer_id")).orderBy(col("order_date").asc)\n )\n )\n .withColumn(\n "next_order_date",\n lead(col("order_date")).over(\n Window.partitionBy(col("customer_id")).orderBy(col("order_date").asc)\n )\n )\n }\n}\n')))),(0,r.yg)("h4",{id:"aggregate-functions-with-window"},"Aggregate Functions with Window"),(0,r.yg)("p",null,"Examples of analytical functions are: ",(0,r.yg)("inlineCode",{parentName:"p"},"min()"),", ",(0,r.yg)("inlineCode",{parentName:"p"},"max()"),", ",(0,r.yg)("inlineCode",{parentName:"p"},"avg()"),", etc.\n",(0,r.yg)("img",{alt:"Example usage of Window - Aggregate",src:a(45843).A,width:"1847",height:"1016"})),(0,r.yg)(o.A,{mdxType:"Tabs"},(0,r.yg)(i.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-py"},'def agg_orders(spark: SparkSession, in0: DataFrame) -> DataFrame:\n return in0\\\n .withColumn(\n "running_avg_spend",\n avg(col("amount"))\\\n .over(Window.partitionBy(col("customer_id"))\\\n .rowsBetween(Window.unboundedPreceding, Window.currentRow))\n )\\\n .withColumn("running_max_spend", max(col("amount"))\\\n .over(Window.partitionBy(col("customer_id"))\\\n .rowsBetween(Window.unboundedPreceding, Window.currentRow)))\n'))),(0,r.yg)(i.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-scala"},'object agg_orders {\n def apply(spark: SparkSession, in1: DataFrame): DataFrame = {\n import org.apache.spark.sql.expressions.{Window, WindowSpec}\n in1\n .withColumn("running_avg_spend",\n avg(col("amount")).over(\n Window\n .partitionBy(col("customer_id"))\n .rowsBetween(Window.unboundedPreceding, Window.currentRow)\n )\n )\n .withColumn("running_max_spend",\n max(col("amount")).over(\n Window\n .partitionBy(col("customer_id"))\n .rowsBetween(Window.unboundedPreceding, Window.currentRow)\n )\n )\n }\n}\n')))))}g.isMDXComponent=!0},45843:(e,n,a)=>{a.d(n,{A:()=>t});const t=a.p+"assets/images/window_eg_agg-8a34b6829ab9ac882ccba44db524a903.png"},80232:(e,n,a)=>{a.d(n,{A:()=>t});const t=a.p+"assets/images/window_eg_analytical-f61582f4e18249a8c33cf5a3210326e4.png"},39810:(e,n,a)=>{a.d(n,{A:()=>t});const t=a.p+"assets/images/window_eg_ranking-50daead83be3eaf26cb68c15845a722b.png"}}]); \ No newline at end of file diff --git a/assets/js/23d439be.715e3e9d.js b/assets/js/23d439be.715e3e9d.js new file mode 100644 index 0000000000..d062cb7bab --- /dev/null +++ b/assets/js/23d439be.715e3e9d.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[62745],{15680:(e,n,a)=>{a.d(n,{xA:()=>s,yg:()=>g});var t=a(96540);function r(e,n,a){return n in e?Object.defineProperty(e,n,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[n]=a,e}function o(e,n){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var t=Object.getOwnPropertySymbols(e);n&&(t=t.filter((function(n){return Object.getOwnPropertyDescriptor(e,n).enumerable}))),a.push.apply(a,t)}return a}function i(e){for(var n=1;n=0||(r[a]=e[a]);return r}(e,n);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(t=0;t=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(r[a]=e[a])}return r}var d=t.createContext({}),u=function(e){var n=t.useContext(d),a=n;return e&&(a="function"==typeof e?e(n):i(i({},n),e)),a},s=function(e){var n=u(e.components);return t.createElement(d.Provider,{value:n},e.children)},c="mdxType",p={inlineCode:"code",wrapper:function(e){var n=e.children;return t.createElement(t.Fragment,{},n)}},m=t.forwardRef((function(e,n){var a=e.components,r=e.mdxType,o=e.originalType,d=e.parentName,s=l(e,["components","mdxType","originalType","parentName"]),c=u(a),m=r,g=c["".concat(d,".").concat(m)]||c[m]||p[m]||o;return a?t.createElement(g,i(i({ref:n},s),{},{components:a})):t.createElement(g,i({ref:n},s))}));function g(e,n){var a=arguments,r=n&&n.mdxType;if("string"==typeof e||r){var o=a.length,i=new Array(o);i[0]=m;var l={};for(var d in n)hasOwnProperty.call(n,d)&&(l[d]=n[d]);l.originalType=e,l[c]="string"==typeof e?e:r,i[1]=l;for(var u=2;u{a.d(n,{A:()=>i});var t=a(96540),r=a(20053);const o={tabItem:"tabItem_Ymn6"};function i(e){let{children:n,hidden:a,className:i}=e;return t.createElement("div",{role:"tabpanel",className:(0,r.A)(o.tabItem,i),hidden:a},n)}},11470:(e,n,a)=>{a.d(n,{A:()=>N});var t=a(58168),r=a(96540),o=a(20053),i=a(23104),l=a(56347),d=a(57485),u=a(31682),s=a(89466);function c(e){return function(e){return r.Children.map(e,(e=>{if(!e||(0,r.isValidElement)(e)&&function(e){const{props:n}=e;return!!n&&"object"==typeof n&&"value"in n}(e))return e;throw new Error(`Docusaurus error: Bad child <${"string"==typeof e.type?e.type:e.type.name}>: all children of the component should be , and every should have a unique "value" prop.`)}))?.filter(Boolean)??[]}(e).map((e=>{let{props:{value:n,label:a,attributes:t,default:r}}=e;return{value:n,label:a,attributes:t,default:r}}))}function p(e){const{values:n,children:a}=e;return(0,r.useMemo)((()=>{const e=n??c(a);return function(e){const n=(0,u.X)(e,((e,n)=>e.value===n.value));if(n.length>0)throw new Error(`Docusaurus error: Duplicate values "${n.map((e=>e.value)).join(", ")}" found in . Every value needs to be unique.`)}(e),e}),[n,a])}function m(e){let{value:n,tabValues:a}=e;return a.some((e=>e.value===n))}function g(e){let{queryString:n=!1,groupId:a}=e;const t=(0,l.W6)(),o=function(e){let{queryString:n=!1,groupId:a}=e;if("string"==typeof n)return n;if(!1===n)return null;if(!0===n&&!a)throw new Error('Docusaurus error: The component groupId prop is required if queryString=true, because this value is used as the search param name. You can also provide an explicit value such as queryString="my-search-param".');return a??null}({queryString:n,groupId:a});return[(0,d.aZ)(o),(0,r.useCallback)((e=>{if(!o)return;const n=new URLSearchParams(t.location.search);n.set(o,e),t.replace({...t.location,search:n.toString()})}),[o,t])]}function y(e){const{defaultValue:n,queryString:a=!1,groupId:t}=e,o=p(e),[i,l]=(0,r.useState)((()=>function(e){let{defaultValue:n,tabValues:a}=e;if(0===a.length)throw new Error("Docusaurus error: the component requires at least one children component");if(n){if(!m({value:n,tabValues:a}))throw new Error(`Docusaurus error: The has a defaultValue "${n}" but none of its children has the corresponding value. Available values are: ${a.map((e=>e.value)).join(", ")}. If you intend to show no default tab, use defaultValue={null} instead.`);return n}const t=a.find((e=>e.default))??a[0];if(!t)throw new Error("Unexpected error: 0 tabValues");return t.value}({defaultValue:n,tabValues:o}))),[d,u]=g({queryString:a,groupId:t}),[c,y]=function(e){let{groupId:n}=e;const a=function(e){return e?`docusaurus.tab.${e}`:null}(n),[t,o]=(0,s.Dv)(a);return[t,(0,r.useCallback)((e=>{a&&o.set(e)}),[a,o])]}({groupId:t}),w=(()=>{const e=d??c;return m({value:e,tabValues:o})?e:null})();(0,r.useLayoutEffect)((()=>{w&&l(w)}),[w]);return{selectedValue:i,selectValue:(0,r.useCallback)((e=>{if(!m({value:e,tabValues:o}))throw new Error(`Can't select invalid tab value=${e}`);l(e),u(e),y(e)}),[u,y,o]),tabValues:o}}var w=a(92303);const f={tabList:"tabList__CuJ",tabItem:"tabItem_LNqP"};function b(e){let{className:n,block:a,selectedValue:l,selectValue:d,tabValues:u}=e;const s=[],{blockElementScrollPositionUntilNextRender:c}=(0,i.a_)(),p=e=>{const n=e.currentTarget,a=s.indexOf(n),t=u[a].value;t!==l&&(c(n),d(t))},m=e=>{let n=null;switch(e.key){case"Enter":p(e);break;case"ArrowRight":{const a=s.indexOf(e.currentTarget)+1;n=s[a]??s[0];break}case"ArrowLeft":{const a=s.indexOf(e.currentTarget)-1;n=s[a]??s[s.length-1];break}}n?.focus()};return r.createElement("ul",{role:"tablist","aria-orientation":"horizontal",className:(0,o.A)("tabs",{"tabs--block":a},n)},u.map((e=>{let{value:n,label:a,attributes:i}=e;return r.createElement("li",(0,t.A)({role:"tab",tabIndex:l===n?0:-1,"aria-selected":l===n,key:n,ref:e=>s.push(e),onKeyDown:m,onClick:p},i,{className:(0,o.A)("tabs__item",f.tabItem,i?.className,{"tabs__item--active":l===n})}),a??n)})))}function h(e){let{lazy:n,children:a,selectedValue:t}=e;const o=(Array.isArray(a)?a:[a]).filter(Boolean);if(n){const e=o.find((e=>e.props.value===t));return e?(0,r.cloneElement)(e,{className:"margin-top--md"}):null}return r.createElement("div",{className:"margin-top--md"},o.map(((e,n)=>(0,r.cloneElement)(e,{key:n,hidden:e.props.value!==t}))))}function v(e){const n=y(e);return r.createElement("div",{className:(0,o.A)("tabs-container",f.tabList)},r.createElement(b,(0,t.A)({},e,n)),r.createElement(h,(0,t.A)({},e,n)))}function N(e){const n=(0,w.A)();return r.createElement(v,(0,t.A)({key:String(n)},e))}},42933:(e,n,a)=>{a.r(n),a.d(n,{assets:()=>s,contentTitle:()=>d,default:()=>g,frontMatter:()=>l,metadata:()=>u,toc:()=>c});var t=a(58168),r=(a(96540),a(15680)),o=a(11470),i=a(19365);const l={sidebar_position:9,title:"WindowFunction",id:"window-function",description:"Aggregate and transform Windowed data",tags:["gems","window","aggregate"]},d=void 0,u={unversionedId:"Spark/gems/transform/window-function",id:"Spark/gems/transform/window-function",title:"WindowFunction",description:"Aggregate and transform Windowed data",source:"@site/docs/Spark/gems/transform/window-function.md",sourceDirName:"Spark/gems/transform",slug:"/Spark/gems/transform/window-function",permalink:"/Spark/gems/transform/window-function",draft:!1,tags:[{label:"gems",permalink:"/tags/gems"},{label:"window",permalink:"/tags/window"},{label:"aggregate",permalink:"/tags/aggregate"}],version:"current",sidebarPosition:9,frontMatter:{sidebar_position:9,title:"WindowFunction",id:"window-function",description:"Aggregate and transform Windowed data",tags:["gems","window","aggregate"]},sidebar:"defaultSidebar",previous:{title:"SetOperation",permalink:"/Spark/gems/transform/set-operation"},next:{title:"BulkColumnRename",permalink:"/Spark/gems/transform/bulk-column-rename"}},s={},c=[{value:"Parameters",id:"parameters",level:3},{value:"Examples",id:"examples",level:3},{value:"Ranking Functions with Window",id:"ranking-functions-with-window",level:4},{value:"Analytical Functions with Window",id:"analytical-functions-with-window",level:4},{value:"Aggregate Functions with Window",id:"aggregate-functions-with-window",level:4}],p={toc:c},m="wrapper";function g(e){let{components:n,...l}=e;return(0,r.yg)(m,(0,t.A)({},p,l,{components:n,mdxType:"MDXLayout"}),(0,r.yg)("h3",null,(0,r.yg)("span",{class:"badge"},"Spark Gem")),(0,r.yg)("p",null,"The WindowFunction lets you define a ",(0,r.yg)("strong",{parentName:"p"},"WindowSpec")," and apply window functions on a DataFrame."),(0,r.yg)("h3",{id:"parameters"},"Parameters"),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:null},"Parameter"),(0,r.yg)("th",{parentName:"tr",align:null},"Description"),(0,r.yg)("th",{parentName:"tr",align:null},"Required"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"DataFrame"),(0,r.yg)("td",{parentName:"tr",align:null},"Input DataFrame"),(0,r.yg)("td",{parentName:"tr",align:null},"True")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Target column"),(0,r.yg)("td",{parentName:"tr",align:null},"Output Column name"),(0,r.yg)("td",{parentName:"tr",align:null},"True")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Source expression"),(0,r.yg)("td",{parentName:"tr",align:null},"Window function expression to perform over the created Window"),(0,r.yg)("td",{parentName:"tr",align:null},"True")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Order columns"),(0,r.yg)("td",{parentName:"tr",align:null},"Columns to order by in Window. Must be a numeric type column if a ",(0,r.yg)("inlineCode",{parentName:"td"},"Range Frame")," is selected"),(0,r.yg)("td",{parentName:"tr",align:null},"Required when ",(0,r.yg)("inlineCode",{parentName:"td"},"Source expression")," has a Ranking/Analytical function ",(0,r.yg)("strong",{parentName:"td"},"OR")," when ",(0,r.yg)("inlineCode",{parentName:"td"},"Range Frame")," is selected")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Partition column"),(0,r.yg)("td",{parentName:"tr",align:null},"Column to partition by in Window"),(0,r.yg)("td",{parentName:"tr",align:null},"False")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Row frame"),(0,r.yg)("td",{parentName:"tr",align:null},"Row based frame boundary to apply on Window"),(0,r.yg)("td",{parentName:"tr",align:null},"False")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Range frame"),(0,r.yg)("td",{parentName:"tr",align:null},"Range based frame boundary to apply on Window"),(0,r.yg)("td",{parentName:"tr",align:null},"False")))),(0,r.yg)("admonition",{type:"info"},(0,r.yg)("p",{parentName:"admonition"},"When ",(0,r.yg)("inlineCode",{parentName:"p"},"Order Columns")," are not defined, an unbounded window frame ",(0,r.yg)("inlineCode",{parentName:"p"},"(rowFrame, unboundedPreceding, unboundedFollowing)")," is used by default.")),(0,r.yg)("admonition",{type:"info"},(0,r.yg)("p",{parentName:"admonition"},"When ",(0,r.yg)("inlineCode",{parentName:"p"},"Order Columns")," are defined, a growing window frame ",(0,r.yg)("inlineCode",{parentName:"p"},"(rangeFrame, unboundedPreceding, currentRow)")," is used by default.")),(0,r.yg)("h3",{id:"examples"},"Examples"),(0,r.yg)("hr",null),(0,r.yg)("h4",{id:"ranking-functions-with-window"},"Ranking Functions with Window"),(0,r.yg)("p",null,"Examples of ranking functions are: ",(0,r.yg)("inlineCode",{parentName:"p"},"row_number()"),", ",(0,r.yg)("inlineCode",{parentName:"p"},"rank()"),", ",(0,r.yg)("inlineCode",{parentName:"p"},"dense_rank()")," and ",(0,r.yg)("inlineCode",{parentName:"p"},"ntile()")),(0,r.yg)("admonition",{type:"info"},(0,r.yg)("p",{parentName:"admonition"},"Only the default window frame ",(0,r.yg)("inlineCode",{parentName:"p"},"(rowFrame, unboundedPreceding, currentRow)")," can be used with Ranking functions")),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Example usage of Window - Ranking",src:a(39810).A,width:"1920",height:"1080"})),(0,r.yg)(o.A,{mdxType:"Tabs"},(0,r.yg)(i.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-py"},'def rank_cust_orders(spark: SparkSession, in0: DataFrame) -> DataFrame:\n return in0\\\n .withColumn(\n "order_number",\n row_number().over(\n Window.partitionBy(col("customer_id")).orderBy(col("order_date").asc())\n )\n )\\\n .withColumn(\n "order_recency",\n ntile(2).over(\n Window.partitionBy(col("customer_id")).orderBy(col("order_date").asc())\n )\n )\n'))),(0,r.yg)(i.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-scala"},'object rank_cust_orders {\n\n def apply(spark: SparkSession, in1: DataFrame): DataFrame = {\n import org.apache.spark.sql.expressions.{Window, WindowSpec}\n in1\n .withColumn(\n "order_number",\n row_number().over(\n Window.partitionBy(col("customer_id")).orderBy(col("order_date").asc)\n )\n )\n .withColumn(\n "order_recency",\n ntile(2).over(\n Window.partitionBy(col("customer_id")).orderBy(col("order_date").asc)\n )\n )\n }\n}\n')))),(0,r.yg)("hr",null),(0,r.yg)("h4",{id:"analytical-functions-with-window"},"Analytical Functions with Window"),(0,r.yg)("p",null,"Examples of analytical functions are: ",(0,r.yg)("inlineCode",{parentName:"p"},"lead()"),", ",(0,r.yg)("inlineCode",{parentName:"p"},"lag()"),", ",(0,r.yg)("inlineCode",{parentName:"p"},"cume_dist()"),", etc."),(0,r.yg)("admonition",{type:"info"},(0,r.yg)("p",{parentName:"admonition"},"Window frame for ",(0,r.yg)("inlineCode",{parentName:"p"},"lead()")," and ",(0,r.yg)("inlineCode",{parentName:"p"},"lag()")," can not be specified.")),(0,r.yg)("admonition",{type:"info"},(0,r.yg)("p",{parentName:"admonition"},"Only the default window frame ",(0,r.yg)("inlineCode",{parentName:"p"},"(rangeFrame, unboundedPreceding, currentRow)")," can be used with ",(0,r.yg)("inlineCode",{parentName:"p"},"cume_dist()"))),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Example usage of Window - Analytical",src:a(80232).A,width:"1920",height:"1080"})),(0,r.yg)(o.A,{mdxType:"Tabs"},(0,r.yg)(i.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-py"},'def analyse_orders(spark: SparkSession, in0: DataFrame) -> DataFrame:\n return in0\\\n .withColumn(\n "previous_order_date",\n lag(col("order_date")).over(\n Window.partitionBy(col("customer_id")).orderBy(col("order_id").asc())\n )\n )\\\n .withColumn(\n "next_order_date",\n lead(col("order_date")).over(\n Window.partitionBy(col("customer_id")).orderBy(col("order_id").asc())\n )\n )\n'))),(0,r.yg)(i.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-scala"},'object analyse_orders {\n def apply(spark: SparkSession, in1: DataFrame): DataFrame = {\n import org.apache.spark.sql.expressions.{Window, WindowSpec}\n in1\n .withColumn(\n "previous_order_date",\n lag(col("order_date")).over(\n Window.partitionBy(col("customer_id")).orderBy(col("order_date").asc)\n )\n )\n .withColumn(\n "next_order_date",\n lead(col("order_date")).over(\n Window.partitionBy(col("customer_id")).orderBy(col("order_date").asc)\n )\n )\n }\n}\n')))),(0,r.yg)("h4",{id:"aggregate-functions-with-window"},"Aggregate Functions with Window"),(0,r.yg)("p",null,"Examples of analytical functions are: ",(0,r.yg)("inlineCode",{parentName:"p"},"min()"),", ",(0,r.yg)("inlineCode",{parentName:"p"},"max()"),", ",(0,r.yg)("inlineCode",{parentName:"p"},"avg()"),", etc.\n",(0,r.yg)("img",{alt:"Example usage of Window - Aggregate",src:a(45843).A,width:"1847",height:"1016"})),(0,r.yg)(o.A,{mdxType:"Tabs"},(0,r.yg)(i.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-py"},'def agg_orders(spark: SparkSession, in0: DataFrame) -> DataFrame:\n return in0\\\n .withColumn(\n "running_avg_spend",\n avg(col("amount"))\\\n .over(Window.partitionBy(col("customer_id"))\\\n .rowsBetween(Window.unboundedPreceding, Window.currentRow))\n )\\\n .withColumn("running_max_spend", max(col("amount"))\\\n .over(Window.partitionBy(col("customer_id"))\\\n .rowsBetween(Window.unboundedPreceding, Window.currentRow)))\n'))),(0,r.yg)(i.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-scala"},'object agg_orders {\n def apply(spark: SparkSession, in1: DataFrame): DataFrame = {\n import org.apache.spark.sql.expressions.{Window, WindowSpec}\n in1\n .withColumn("running_avg_spend",\n avg(col("amount")).over(\n Window\n .partitionBy(col("customer_id"))\n .rowsBetween(Window.unboundedPreceding, Window.currentRow)\n )\n )\n .withColumn("running_max_spend",\n max(col("amount")).over(\n Window\n .partitionBy(col("customer_id"))\n .rowsBetween(Window.unboundedPreceding, Window.currentRow)\n )\n )\n }\n}\n')))))}g.isMDXComponent=!0},45843:(e,n,a)=>{a.d(n,{A:()=>t});const t=a.p+"assets/images/window_eg_agg-8a34b6829ab9ac882ccba44db524a903.png"},80232:(e,n,a)=>{a.d(n,{A:()=>t});const t=a.p+"assets/images/window_eg_analytical-f61582f4e18249a8c33cf5a3210326e4.png"},39810:(e,n,a)=>{a.d(n,{A:()=>t});const t=a.p+"assets/images/window_eg_ranking-50daead83be3eaf26cb68c15845a722b.png"}}]); \ No newline at end of file diff --git a/assets/js/326b65c5.b5317393.js b/assets/js/326b65c5.b5317393.js deleted file mode 100644 index f16ac40e61..0000000000 --- a/assets/js/326b65c5.b5317393.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[10735],{15680:(e,t,a)=>{a.d(t,{xA:()=>m,yg:()=>f});var n=a(96540);function r(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function l(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,n)}return a}function i(e){for(var t=1;t=0||(r[a]=e[a]);return r}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(r[a]=e[a])}return r}var g=n.createContext({}),p=function(e){var t=n.useContext(g),a=t;return e&&(a="function"==typeof e?e(t):i(i({},t),e)),a},m=function(e){var t=p(e.components);return n.createElement(g.Provider,{value:t},e.children)},d="mdxType",y={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},s=n.forwardRef((function(e,t){var a=e.components,r=e.mdxType,l=e.originalType,g=e.parentName,m=o(e,["components","mdxType","originalType","parentName"]),d=p(a),s=r,f=d["".concat(g,".").concat(s)]||d[s]||y[s]||l;return a?n.createElement(f,i(i({ref:t},m),{},{components:a})):n.createElement(f,i({ref:t},m))}));function f(e,t){var a=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var l=a.length,i=new Array(l);i[0]=s;var o={};for(var g in t)hasOwnProperty.call(t,g)&&(o[g]=t[g]);o.originalType=e,o[d]="string"==typeof e?e:r,i[1]=o;for(var p=2;p{a.d(t,{A:()=>i});var n=a(96540),r=a(20053);const l={tabItem:"tabItem_Ymn6"};function i(e){let{children:t,hidden:a,className:i}=e;return n.createElement("div",{role:"tabpanel",className:(0,r.A)(l.tabItem,i),hidden:a},t)}},11470:(e,t,a)=>{a.d(t,{A:()=>v});var n=a(58168),r=a(96540),l=a(20053),i=a(23104),o=a(56347),g=a(57485),p=a(31682),m=a(89466);function d(e){return function(e){return r.Children.map(e,(e=>{if(!e||(0,r.isValidElement)(e)&&function(e){const{props:t}=e;return!!t&&"object"==typeof t&&"value"in t}(e))return e;throw new Error(`Docusaurus error: Bad child <${"string"==typeof e.type?e.type:e.type.name}>: all children of the component should be , and every should have a unique "value" prop.`)}))?.filter(Boolean)??[]}(e).map((e=>{let{props:{value:t,label:a,attributes:n,default:r}}=e;return{value:t,label:a,attributes:n,default:r}}))}function y(e){const{values:t,children:a}=e;return(0,r.useMemo)((()=>{const e=t??d(a);return function(e){const t=(0,p.X)(e,((e,t)=>e.value===t.value));if(t.length>0)throw new Error(`Docusaurus error: Duplicate values "${t.map((e=>e.value)).join(", ")}" found in . Every value needs to be unique.`)}(e),e}),[t,a])}function s(e){let{value:t,tabValues:a}=e;return a.some((e=>e.value===t))}function f(e){let{queryString:t=!1,groupId:a}=e;const n=(0,o.W6)(),l=function(e){let{queryString:t=!1,groupId:a}=e;if("string"==typeof t)return t;if(!1===t)return null;if(!0===t&&!a)throw new Error('Docusaurus error: The component groupId prop is required if queryString=true, because this value is used as the search param name. You can also provide an explicit value such as queryString="my-search-param".');return a??null}({queryString:t,groupId:a});return[(0,g.aZ)(l),(0,r.useCallback)((e=>{if(!l)return;const t=new URLSearchParams(n.location.search);t.set(l,e),n.replace({...n.location,search:t.toString()})}),[l,n])]}function u(e){const{defaultValue:t,queryString:a=!1,groupId:n}=e,l=y(e),[i,o]=(0,r.useState)((()=>function(e){let{defaultValue:t,tabValues:a}=e;if(0===a.length)throw new Error("Docusaurus error: the component requires at least one children component");if(t){if(!s({value:t,tabValues:a}))throw new Error(`Docusaurus error: The has a defaultValue "${t}" but none of its children has the corresponding value. Available values are: ${a.map((e=>e.value)).join(", ")}. If you intend to show no default tab, use defaultValue={null} instead.`);return t}const n=a.find((e=>e.default))??a[0];if(!n)throw new Error("Unexpected error: 0 tabValues");return n.value}({defaultValue:t,tabValues:l}))),[g,p]=f({queryString:a,groupId:n}),[d,u]=function(e){let{groupId:t}=e;const a=function(e){return e?`docusaurus.tab.${e}`:null}(t),[n,l]=(0,m.Dv)(a);return[n,(0,r.useCallback)((e=>{a&&l.set(e)}),[a,l])]}({groupId:n}),c=(()=>{const e=g??d;return s({value:e,tabValues:l})?e:null})();(0,r.useLayoutEffect)((()=>{c&&o(c)}),[c]);return{selectedValue:i,selectValue:(0,r.useCallback)((e=>{if(!s({value:e,tabValues:l}))throw new Error(`Can't select invalid tab value=${e}`);o(e),p(e),u(e)}),[p,u,l]),tabValues:l}}var c=a(92303);const N={tabList:"tabList__CuJ",tabItem:"tabItem_LNqP"};function h(e){let{className:t,block:a,selectedValue:o,selectValue:g,tabValues:p}=e;const m=[],{blockElementScrollPositionUntilNextRender:d}=(0,i.a_)(),y=e=>{const t=e.currentTarget,a=m.indexOf(t),n=p[a].value;n!==o&&(d(t),g(n))},s=e=>{let t=null;switch(e.key){case"Enter":y(e);break;case"ArrowRight":{const a=m.indexOf(e.currentTarget)+1;t=m[a]??m[0];break}case"ArrowLeft":{const a=m.indexOf(e.currentTarget)-1;t=m[a]??m[m.length-1];break}}t?.focus()};return r.createElement("ul",{role:"tablist","aria-orientation":"horizontal",className:(0,l.A)("tabs",{"tabs--block":a},t)},p.map((e=>{let{value:t,label:a,attributes:i}=e;return r.createElement("li",(0,n.A)({role:"tab",tabIndex:o===t?0:-1,"aria-selected":o===t,key:t,ref:e=>m.push(e),onKeyDown:s,onClick:y},i,{className:(0,l.A)("tabs__item",N.tabItem,i?.className,{"tabs__item--active":o===t})}),a??t)})))}function b(e){let{lazy:t,children:a,selectedValue:n}=e;const l=(Array.isArray(a)?a:[a]).filter(Boolean);if(t){const e=l.find((e=>e.props.value===n));return e?(0,r.cloneElement)(e,{className:"margin-top--md"}):null}return r.createElement("div",{className:"margin-top--md"},l.map(((e,t)=>(0,r.cloneElement)(e,{key:t,hidden:e.props.value!==n}))))}function D(e){const t=u(e);return r.createElement("div",{className:(0,l.A)("tabs-container",N.tabList)},r.createElement(h,(0,n.A)({},e,t)),r.createElement(b,(0,n.A)({},e,t)))}function v(e){const t=(0,c.A)();return r.createElement(D,(0,n.A)({key:String(t)},e))}},15479:(e,t,a)=>{a.d(t,{A:()=>g});var n=a(96540),r=a(86025);const l=e=>{let{children:t}=e;return n.createElement("div",{style:{position:"relative",display:"flex","justify-content":"center","align-items":"center"}},t)},i=e=>{let{source:t,children:a}=e;return n.createElement("img",{src:(0,r.A)(t),style:{"object-fit":"cover"}})},o=e=>{let{slides:t}=e;const[a,r]=(0,n.useState)(0);return n.createElement(l,null,n.createElement("i",{class:"fa fa-chevron-left",onClick:()=>{r(0===a?t.length-1:a-1)},style:{position:"absolute",top:"50%",left:"0px","font-size":"2rem"}}),n.createElement("i",{class:"fa fa-chevron-right",onClick:()=>{r(a===t.length-1?0:a+1)},style:{position:"absolute",top:"50%",right:"0px","font-size":"2rem"}}),n.createElement("div",{style:{padding:"30px"}},n.createElement(i,{source:t[a].image}),t[a].description))};function g(e){let{ImageData:t}=e;return n.createElement(o,{slides:t,style:{"font-family":" sans-serif","text-align":"center"}})}},94799:(e,t,a)=>{a.r(t),a.d(t,{ImageData:()=>s,assets:()=>d,contentTitle:()=>p,default:()=>b,frontMatter:()=>g,metadata:()=>m,toc:()=>y});var n=a(58168),r=(a(96540),a(15680)),l=a(11470),i=a(19365),o=a(15479);const g={sidebar_position:1,title:"Join",id:"join",description:"Join one or more DataFrames on conditions",tags:["gems","join","inner","outer","left join","right join","hints","merge"]},p=void 0,m={unversionedId:"Spark/gems/join-split/join",id:"Spark/gems/join-split/join",title:"Join",description:"Join one or more DataFrames on conditions",source:"@site/docs/Spark/gems/join-split/join.md",sourceDirName:"Spark/gems/join-split",slug:"/Spark/gems/join-split/join",permalink:"/Spark/gems/join-split/join",draft:!1,tags:[{label:"gems",permalink:"/tags/gems"},{label:"join",permalink:"/tags/join"},{label:"inner",permalink:"/tags/inner"},{label:"outer",permalink:"/tags/outer"},{label:"left join",permalink:"/tags/left-join"},{label:"right join",permalink:"/tags/right-join"},{label:"hints",permalink:"/tags/hints"},{label:"merge",permalink:"/tags/merge"}],version:"current",sidebarPosition:1,frontMatter:{sidebar_position:1,title:"Join",id:"join",description:"Join one or more DataFrames on conditions",tags:["gems","join","inner","outer","left join","right join","hints","merge"]},sidebar:"defaultSidebar",previous:{title:"Join & Split",permalink:"/Spark/gems/join-split/"},next:{title:"Repartition",permalink:"/Spark/gems/join-split/Repartition"}},d={},y=[{value:"Parameters",id:"parameters",level:2},{value:"Adding a new input",id:"adding-a-new-input",level:2},{value:"Examples",id:"examples",level:2},{value:"Example 1 - Join with three DataFrame inputs",id:"example-1---join-with-three-dataframe-inputs",level:3},{value:"Example 2 - Join with Hints",id:"example-2---join-with-hints",level:3},{value:"Example 3 - Join with Propagate Columns",id:"example-3---join-with-propagate-columns",level:3},{value:"Types of Join",id:"types-of-join",level:2},{value:"Table A",id:"table-a",level:3},{value:"Table B",id:"table-b",level:3},{value:"INNER JOIN",id:"inner-join",level:3},{value:"LEFT JOIN",id:"left-join",level:3},{value:"RIGHT JOIN",id:"right-join",level:3},{value:"FULL OUTER JOIN",id:"full-outer-join",level:3},{value:"LEFT SEMI JOIN",id:"left-semi-join",level:3},{value:"LEFT ANTI JOIN",id:"left-anti-join",level:3}],s=[{image:"/img/join/join-eg3-conditions.png",description:(0,r.yg)("h3",{style:{padding:"10px"}},"Step 1 - Specify join condition")},{image:"/img/join/join-eg3-expressions.png",description:(0,r.yg)("h3",{style:{padding:"10px"}},"Step 2 - Choose required columns from dataframe")},{image:"/img/join/join-eg3-advanced.png",description:(0,r.yg)("h3",{style:{padding:"10px"}},"Step 3 - Select Propagate all columns from in0")},{image:"/img/join/join-eg3-output.png",description:(0,r.yg)("h3",{style:{padding:"10px"}},"Output - Output with all columns from in0 and selected columns from in1")}],f=e=>function(t){return console.warn("Component "+e+" was not imported, exported, or provided by MDXProvider as global scope"),(0,r.yg)("div",t)},u=f("Tabs1"),c=f("Tabs2"),N={toc:y,ImageData:s},h="wrapper";function b(e){let{components:t,...g}=e;return(0,r.yg)(h,(0,n.A)({},N,g,{components:t,mdxType:"MDXLayout"}),(0,r.yg)("h3",null,(0,r.yg)("span",{class:"badge rounded-pill text-bg-light"},"Spark Gem")),(0,r.yg)("p",null,"Joins 2 or more DataFrames based on the given configuration."),(0,r.yg)("h2",{id:"parameters"},"Parameters"),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:"left"},"Parameter"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Description"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Required"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"DataFrame 1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"First input DataFrame"),(0,r.yg)("td",{parentName:"tr",align:"left"},"True")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"DataFrame 2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Second input DataFrame"),(0,r.yg)("td",{parentName:"tr",align:"left"},"True")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"DataFrame N"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Nth input DataFrame"),(0,r.yg)("td",{parentName:"tr",align:"left"},"False")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Join Condition (Conditions tab)"),(0,r.yg)("td",{parentName:"tr",align:"left"},"The join condition specifies how the rows will be combined."),(0,r.yg)("td",{parentName:"tr",align:"left"},"True")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Type (Conditions tab)"),(0,r.yg)("td",{parentName:"tr",align:"left"},"The type of JOIN ",(0,r.yg)("inlineCode",{parentName:"td"},"(Inner, Full Outer, Left , Right , Left Semi, Left Anti)")),(0,r.yg)("td",{parentName:"tr",align:"left"},"True")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Where Clause (Conditions tab)"),(0,r.yg)("td",{parentName:"tr",align:"left"},(0,r.yg)("inlineCode",{parentName:"td"},"Filter")," applied after the Join operation"),(0,r.yg)("td",{parentName:"tr",align:"left"},"False")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Target column (Expressions)"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Output column name"),(0,r.yg)("td",{parentName:"tr",align:"left"},"False")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Expression (Expressions)"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Expression to compute target column. If no expression is given, then all columns from all DataFrames would reflect in output."),(0,r.yg)("td",{parentName:"tr",align:"left"},"False")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Hint Type (Advanced)"),(0,r.yg)("td",{parentName:"tr",align:"left"},"The type of Join Hint (",(0,r.yg)("inlineCode",{parentName:"td"},"Broadcast"),", ",(0,r.yg)("inlineCode",{parentName:"td"},"Merge"),", ",(0,r.yg)("inlineCode",{parentName:"td"},"Shuffle Hash"),", ",(0,r.yg)("inlineCode",{parentName:"td"},"Shuffle Replicate NL")," or ",(0,r.yg)("inlineCode",{parentName:"td"},"None"),"). To read more about join hints ",(0,r.yg)("a",{parentName:"td",href:"https://developpaper.com/analysis-of-five-join-strategies-of-spark/"},"click here")),(0,r.yg)("td",{parentName:"tr",align:"left"},"False")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Propagate All Columns (Advanced)"),(0,r.yg)("td",{parentName:"tr",align:"left"},"If ",(0,r.yg)("inlineCode",{parentName:"td"},"true"),", all columns from that DataFrame would be propagated to output DataFrame. Equivalent to selecting ",(0,r.yg)("inlineCode",{parentName:"td"},"df.*")," for the selected DataFrame."),(0,r.yg)("td",{parentName:"tr",align:"left"},"False")))),(0,r.yg)("h2",{id:"adding-a-new-input"},"Adding a new input"),(0,r.yg)("ol",null,(0,r.yg)("li",{parentName:"ol"},"Click on the plus icon to add a new input."),(0,r.yg)("li",{parentName:"ol"},"Then add your condition expression for the newly added input.\n",(0,r.yg)("img",{alt:"Example usage of Join - Add new input to join gem",src:a(92826).A,width:"1616",height:"802"}))),(0,r.yg)("h2",{id:"examples"},"Examples"),(0,r.yg)("h3",{id:"example-1---join-with-three-dataframe-inputs"},"Example 1 - Join with three DataFrame inputs"),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Example usage of Join - Join three DataFrame inputs",src:a(87303).A,width:"1618",height:"806"})),(0,r.yg)(l.A,{mdxType:"Tabs"},(0,r.yg)(i.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-py"},'def Join_1(spark: SparkSession, in0: DataFrame) -> DataFrame:\n return in0\\\n .alias("in0")\\\n .join(in1.alias("in1"), (col("in0.customer_id") == col("in1.customer_id")), "inner")\\\n .join(in2.alias("in2"), (col("in1.customer_id") == col("in2.customer_id")), "inner")\n'))),(0,r.yg)(i.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-scala"},'object Join_1 {\n def apply(spark: SparkSession, in0: DataFrame, in1: DataFrame, in2: DataFrame): DataFrame =\n in0\n .as("in0")\n .join(in1.as("in1"), col("in0.customer_id") === col("in1.customer_id"), "inner")\n .join(in2.as("in2"), col("in1.customer_id") === col("in2.customer_id"), "inner")\n}\n')))),(0,r.yg)("h3",{id:"example-2---join-with-hints"},"Example 2 - Join with Hints"),(0,r.yg)("p",null,"Join hints allow users to suggest the join strategy that Spark should use. For a quick overview, see Spark's Join Hints ",(0,r.yg)("a",{parentName:"p",href:"https://spark.apache.org/docs/3.0.0/sql-ref-syntax-qry-select-hints.html#join-hints"},"documentation"),"."),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Example usage of Join - Join with hints",src:a(68065).A,width:"1644",height:"668"})),(0,r.yg)(u,{mdxType:"Tabs1"},(0,r.yg)(i.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-py"},'def Join_1(spark: SparkSession, in0: DataFrame, in1: DataFrame, in2: DataFrame) -> DataFrame:\n df1 = in1.hint("merge")\n\n return in0\\\n .alias("in0")\\\n .hint("broadcast")\\\n .join(df1.alias("in1"), col("in0.customer_id") == col("in1.customer_id"), "inner")\\\n .join(in2.alias("in2"), col("in0.customer_id") == col("in1.customer_id"), "inner")\n'))),(0,r.yg)(i.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-scala"},'object Join_1 {\n def apply(spark: SparkSession, in0: DataFrame, in1: DataFrame, in2: DataFrame): DataFrame =\n in0\n .as("in0")\n .hint("broadcast")\n .join(in1.as("in1").hint("merge"), col("in0.customer_id") === col("in1.customer_id"), "inner")\n .join(in2.as("in2"), col("in1.customer_id") === col("in2.customer_id"), "inner")\n}\n')))),(0,r.yg)("h3",{id:"example-3---join-with-propagate-columns"},"Example 3 - Join with Propagate Columns"),(0,r.yg)(o.A,{ImageData:s,mdxType:"App"}),(0,r.yg)(c,{mdxType:"Tabs2"},(0,r.yg)(i.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-py"},'def Join_1(spark: SparkSession, in0: DataFrame, in1: DataFrame, ) -> DataFrame:\n return in0\\\n .alias("in0")\\\n .join(in1.alias("in1"), (col("in0.customer_id") == col("in1.customer_id")), "inner")\\\n .select(*[col("in1.email").alias("email"), col("in1.phone").alias("phone")], col("in0.*"))\n'))),(0,r.yg)(i.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-scala"},'object Join_1 {\n\n def apply(spark: SparkSession, in0: DataFrame, in1: DataFrame): DataFrame =\n in0\n .as("in0")\n .join(in1.as("in1"), col("in0.customer_id") === col("in1.customer_id"), "inner")\n .select(col("in1.phone").as("phone"), col("in1.email").as("email"), col("in0.*"))\n\n}\n')))),(0,r.yg)("h2",{id:"types-of-join"},"Types of Join"),(0,r.yg)("p",null,"Suppose there are 2 tables TableA and TableB with only 2 columns (Ref, Data) and following contents:"),(0,r.yg)("h3",{id:"table-a"},"Table A"),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:"left"},"Ref"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Data"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A11")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A12")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A13")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A21")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"3"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A31")))),(0,r.yg)("h3",{id:"table-b"},"Table B"),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:"left"},"Ref"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Data"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B11")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B21")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B22")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B23")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"4"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B41")))),(0,r.yg)("h3",{id:"inner-join"},"INNER JOIN"),(0,r.yg)("p",null,"Inner Join on column Ref will return columns from both the tables and only the matching records as long as the condition is satisfied:"),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:"left"},"Ref"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Data"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Ref"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Data"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A11"),(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B11")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A12"),(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B11")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A13"),(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B11")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A21"),(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B21")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A21"),(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B22")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A21"),(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B23")))),(0,r.yg)("h3",{id:"left-join"},"LEFT JOIN"),(0,r.yg)("p",null,"Left Join (or Left Outer join) on column Ref will return columns from both the tables and match records with records from the left table. The result-set will contain null for the rows for which there is no matching row on the right side."),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:"left"},"Ref"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Data"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Ref"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Data"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A11"),(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B11")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A12"),(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B11")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A13"),(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B11")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A21"),(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B21")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A21"),(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B22")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A21"),(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B23")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"3"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A31"),(0,r.yg)("td",{parentName:"tr",align:"left"},"NULL"),(0,r.yg)("td",{parentName:"tr",align:"left"},"NULL")))),(0,r.yg)("h3",{id:"right-join"},"RIGHT JOIN"),(0,r.yg)("p",null,"Right Join (or Right Outer join) on column Ref will return columns from both the tables and match records with records from the right table. The result-set will contain null for the rows for which there is no matching row on the left side."),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:"left"},"Ref"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Data"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Ref"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Data"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A11"),(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B11")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A12"),(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B11")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A13"),(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B11")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A21"),(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B21")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A21"),(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B22")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A21"),(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B23")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"NULL"),(0,r.yg)("td",{parentName:"tr",align:"left"},"NULL"),(0,r.yg)("td",{parentName:"tr",align:"left"},"4"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B41")))),(0,r.yg)("h3",{id:"full-outer-join"},"FULL OUTER JOIN"),(0,r.yg)("p",null,"Full Outer Join on column Ref will return columns from both the tables and matching records with records from the left table and records from the right table . The result-set will contain NULL values for the rows for which there is no matching."),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:"left"},"Ref"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Data"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Ref"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Data"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A11"),(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B11")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A12"),(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B11")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A13"),(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B11")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A21"),(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B21")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A21"),(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B22")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A21"),(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B23")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"3"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A31"),(0,r.yg)("td",{parentName:"tr",align:"left"},"NULL"),(0,r.yg)("td",{parentName:"tr",align:"left"},"NULL")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"NULL"),(0,r.yg)("td",{parentName:"tr",align:"left"},"NULL"),(0,r.yg)("td",{parentName:"tr",align:"left"},"4"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B41")))),(0,r.yg)("h3",{id:"left-semi-join"},"LEFT SEMI JOIN"),(0,r.yg)("p",null,"Left Semi Join on column Ref will return columns only from left table and matching records only from left table."),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:"left"},"Ref"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Data"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B11")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B21")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B22")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B23")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"3"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B41")))),(0,r.yg)("h3",{id:"left-anti-join"},"LEFT ANTI JOIN"),(0,r.yg)("p",null,"Left anti join on column Ref will return columns from the left for non-matched records :"),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:"left"},"Ref"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Data"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Ref"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Data"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"3"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A31"),(0,r.yg)("td",{parentName:"tr",align:"left"},"NULL"),(0,r.yg)("td",{parentName:"tr",align:"left"},"NULL")))))}b.isMDXComponent=!0},92826:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/add_new_input-2ce622c38f55019de1600c2bd52c1041.png"},68065:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/join_with_hints-0bed10c358bdb7a5a6710f283cf3704b.png"},87303:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/join_without_hints-3c24895b7c58f07bea0b290f5ca459f7.png"}}]); \ No newline at end of file diff --git a/assets/js/326b65c5.ed0f6532.js b/assets/js/326b65c5.ed0f6532.js new file mode 100644 index 0000000000..39dc632f05 --- /dev/null +++ b/assets/js/326b65c5.ed0f6532.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[10735],{15680:(e,t,a)=>{a.d(t,{xA:()=>m,yg:()=>f});var n=a(96540);function r(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function l(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,n)}return a}function i(e){for(var t=1;t=0||(r[a]=e[a]);return r}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(r[a]=e[a])}return r}var g=n.createContext({}),p=function(e){var t=n.useContext(g),a=t;return e&&(a="function"==typeof e?e(t):i(i({},t),e)),a},m=function(e){var t=p(e.components);return n.createElement(g.Provider,{value:t},e.children)},d="mdxType",y={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},s=n.forwardRef((function(e,t){var a=e.components,r=e.mdxType,l=e.originalType,g=e.parentName,m=o(e,["components","mdxType","originalType","parentName"]),d=p(a),s=r,f=d["".concat(g,".").concat(s)]||d[s]||y[s]||l;return a?n.createElement(f,i(i({ref:t},m),{},{components:a})):n.createElement(f,i({ref:t},m))}));function f(e,t){var a=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var l=a.length,i=new Array(l);i[0]=s;var o={};for(var g in t)hasOwnProperty.call(t,g)&&(o[g]=t[g]);o.originalType=e,o[d]="string"==typeof e?e:r,i[1]=o;for(var p=2;p{a.d(t,{A:()=>i});var n=a(96540),r=a(20053);const l={tabItem:"tabItem_Ymn6"};function i(e){let{children:t,hidden:a,className:i}=e;return n.createElement("div",{role:"tabpanel",className:(0,r.A)(l.tabItem,i),hidden:a},t)}},11470:(e,t,a)=>{a.d(t,{A:()=>v});var n=a(58168),r=a(96540),l=a(20053),i=a(23104),o=a(56347),g=a(57485),p=a(31682),m=a(89466);function d(e){return function(e){return r.Children.map(e,(e=>{if(!e||(0,r.isValidElement)(e)&&function(e){const{props:t}=e;return!!t&&"object"==typeof t&&"value"in t}(e))return e;throw new Error(`Docusaurus error: Bad child <${"string"==typeof e.type?e.type:e.type.name}>: all children of the component should be , and every should have a unique "value" prop.`)}))?.filter(Boolean)??[]}(e).map((e=>{let{props:{value:t,label:a,attributes:n,default:r}}=e;return{value:t,label:a,attributes:n,default:r}}))}function y(e){const{values:t,children:a}=e;return(0,r.useMemo)((()=>{const e=t??d(a);return function(e){const t=(0,p.X)(e,((e,t)=>e.value===t.value));if(t.length>0)throw new Error(`Docusaurus error: Duplicate values "${t.map((e=>e.value)).join(", ")}" found in . Every value needs to be unique.`)}(e),e}),[t,a])}function s(e){let{value:t,tabValues:a}=e;return a.some((e=>e.value===t))}function f(e){let{queryString:t=!1,groupId:a}=e;const n=(0,o.W6)(),l=function(e){let{queryString:t=!1,groupId:a}=e;if("string"==typeof t)return t;if(!1===t)return null;if(!0===t&&!a)throw new Error('Docusaurus error: The component groupId prop is required if queryString=true, because this value is used as the search param name. You can also provide an explicit value such as queryString="my-search-param".');return a??null}({queryString:t,groupId:a});return[(0,g.aZ)(l),(0,r.useCallback)((e=>{if(!l)return;const t=new URLSearchParams(n.location.search);t.set(l,e),n.replace({...n.location,search:t.toString()})}),[l,n])]}function u(e){const{defaultValue:t,queryString:a=!1,groupId:n}=e,l=y(e),[i,o]=(0,r.useState)((()=>function(e){let{defaultValue:t,tabValues:a}=e;if(0===a.length)throw new Error("Docusaurus error: the component requires at least one children component");if(t){if(!s({value:t,tabValues:a}))throw new Error(`Docusaurus error: The has a defaultValue "${t}" but none of its children has the corresponding value. Available values are: ${a.map((e=>e.value)).join(", ")}. If you intend to show no default tab, use defaultValue={null} instead.`);return t}const n=a.find((e=>e.default))??a[0];if(!n)throw new Error("Unexpected error: 0 tabValues");return n.value}({defaultValue:t,tabValues:l}))),[g,p]=f({queryString:a,groupId:n}),[d,u]=function(e){let{groupId:t}=e;const a=function(e){return e?`docusaurus.tab.${e}`:null}(t),[n,l]=(0,m.Dv)(a);return[n,(0,r.useCallback)((e=>{a&&l.set(e)}),[a,l])]}({groupId:n}),c=(()=>{const e=g??d;return s({value:e,tabValues:l})?e:null})();(0,r.useLayoutEffect)((()=>{c&&o(c)}),[c]);return{selectedValue:i,selectValue:(0,r.useCallback)((e=>{if(!s({value:e,tabValues:l}))throw new Error(`Can't select invalid tab value=${e}`);o(e),p(e),u(e)}),[p,u,l]),tabValues:l}}var c=a(92303);const N={tabList:"tabList__CuJ",tabItem:"tabItem_LNqP"};function h(e){let{className:t,block:a,selectedValue:o,selectValue:g,tabValues:p}=e;const m=[],{blockElementScrollPositionUntilNextRender:d}=(0,i.a_)(),y=e=>{const t=e.currentTarget,a=m.indexOf(t),n=p[a].value;n!==o&&(d(t),g(n))},s=e=>{let t=null;switch(e.key){case"Enter":y(e);break;case"ArrowRight":{const a=m.indexOf(e.currentTarget)+1;t=m[a]??m[0];break}case"ArrowLeft":{const a=m.indexOf(e.currentTarget)-1;t=m[a]??m[m.length-1];break}}t?.focus()};return r.createElement("ul",{role:"tablist","aria-orientation":"horizontal",className:(0,l.A)("tabs",{"tabs--block":a},t)},p.map((e=>{let{value:t,label:a,attributes:i}=e;return r.createElement("li",(0,n.A)({role:"tab",tabIndex:o===t?0:-1,"aria-selected":o===t,key:t,ref:e=>m.push(e),onKeyDown:s,onClick:y},i,{className:(0,l.A)("tabs__item",N.tabItem,i?.className,{"tabs__item--active":o===t})}),a??t)})))}function b(e){let{lazy:t,children:a,selectedValue:n}=e;const l=(Array.isArray(a)?a:[a]).filter(Boolean);if(t){const e=l.find((e=>e.props.value===n));return e?(0,r.cloneElement)(e,{className:"margin-top--md"}):null}return r.createElement("div",{className:"margin-top--md"},l.map(((e,t)=>(0,r.cloneElement)(e,{key:t,hidden:e.props.value!==n}))))}function D(e){const t=u(e);return r.createElement("div",{className:(0,l.A)("tabs-container",N.tabList)},r.createElement(h,(0,n.A)({},e,t)),r.createElement(b,(0,n.A)({},e,t)))}function v(e){const t=(0,c.A)();return r.createElement(D,(0,n.A)({key:String(t)},e))}},15479:(e,t,a)=>{a.d(t,{A:()=>g});var n=a(96540),r=a(86025);const l=e=>{let{children:t}=e;return n.createElement("div",{style:{position:"relative",display:"flex","justify-content":"center","align-items":"center"}},t)},i=e=>{let{source:t,children:a}=e;return n.createElement("img",{src:(0,r.A)(t),style:{"object-fit":"cover"}})},o=e=>{let{slides:t}=e;const[a,r]=(0,n.useState)(0);return n.createElement(l,null,n.createElement("i",{class:"fa fa-chevron-left",onClick:()=>{r(0===a?t.length-1:a-1)},style:{position:"absolute",top:"50%",left:"0px","font-size":"2rem"}}),n.createElement("i",{class:"fa fa-chevron-right",onClick:()=>{r(a===t.length-1?0:a+1)},style:{position:"absolute",top:"50%",right:"0px","font-size":"2rem"}}),n.createElement("div",{style:{padding:"30px"}},n.createElement(i,{source:t[a].image}),t[a].description))};function g(e){let{ImageData:t}=e;return n.createElement(o,{slides:t,style:{"font-family":" sans-serif","text-align":"center"}})}},94799:(e,t,a)=>{a.r(t),a.d(t,{ImageData:()=>s,assets:()=>d,contentTitle:()=>p,default:()=>b,frontMatter:()=>g,metadata:()=>m,toc:()=>y});var n=a(58168),r=(a(96540),a(15680)),l=a(11470),i=a(19365),o=a(15479);const g={sidebar_position:1,title:"Join",id:"join",description:"Join one or more DataFrames on conditions",tags:["gems","join","inner","outer","left join","right join","hints","merge"]},p=void 0,m={unversionedId:"Spark/gems/join-split/join",id:"Spark/gems/join-split/join",title:"Join",description:"Join one or more DataFrames on conditions",source:"@site/docs/Spark/gems/join-split/join.md",sourceDirName:"Spark/gems/join-split",slug:"/Spark/gems/join-split/join",permalink:"/Spark/gems/join-split/join",draft:!1,tags:[{label:"gems",permalink:"/tags/gems"},{label:"join",permalink:"/tags/join"},{label:"inner",permalink:"/tags/inner"},{label:"outer",permalink:"/tags/outer"},{label:"left join",permalink:"/tags/left-join"},{label:"right join",permalink:"/tags/right-join"},{label:"hints",permalink:"/tags/hints"},{label:"merge",permalink:"/tags/merge"}],version:"current",sidebarPosition:1,frontMatter:{sidebar_position:1,title:"Join",id:"join",description:"Join one or more DataFrames on conditions",tags:["gems","join","inner","outer","left join","right join","hints","merge"]},sidebar:"defaultSidebar",previous:{title:"Join & Split",permalink:"/Spark/gems/join-split/"},next:{title:"Repartition",permalink:"/Spark/gems/join-split/Repartition"}},d={},y=[{value:"Parameters",id:"parameters",level:2},{value:"Adding a new input",id:"adding-a-new-input",level:2},{value:"Examples",id:"examples",level:2},{value:"Example 1 - Join with three DataFrame inputs",id:"example-1---join-with-three-dataframe-inputs",level:3},{value:"Example 2 - Join with Hints",id:"example-2---join-with-hints",level:3},{value:"Example 3 - Join with Propagate Columns",id:"example-3---join-with-propagate-columns",level:3},{value:"Types of Join",id:"types-of-join",level:2},{value:"Table A",id:"table-a",level:3},{value:"Table B",id:"table-b",level:3},{value:"INNER JOIN",id:"inner-join",level:3},{value:"LEFT JOIN",id:"left-join",level:3},{value:"RIGHT JOIN",id:"right-join",level:3},{value:"FULL OUTER JOIN",id:"full-outer-join",level:3},{value:"LEFT SEMI JOIN",id:"left-semi-join",level:3},{value:"LEFT ANTI JOIN",id:"left-anti-join",level:3}],s=[{image:"/img/join/join-eg3-conditions.png",description:(0,r.yg)("h3",{style:{padding:"10px"}},"Step 1 - Specify join condition")},{image:"/img/join/join-eg3-expressions.png",description:(0,r.yg)("h3",{style:{padding:"10px"}},"Step 2 - Choose required columns from dataframe")},{image:"/img/join/join-eg3-advanced.png",description:(0,r.yg)("h3",{style:{padding:"10px"}},"Step 3 - Select Propagate all columns from in0")},{image:"/img/join/join-eg3-output.png",description:(0,r.yg)("h3",{style:{padding:"10px"}},"Output - Output with all columns from in0 and selected columns from in1")}],f=e=>function(t){return console.warn("Component "+e+" was not imported, exported, or provided by MDXProvider as global scope"),(0,r.yg)("div",t)},u=f("Tabs1"),c=f("Tabs2"),N={toc:y,ImageData:s},h="wrapper";function b(e){let{components:t,...g}=e;return(0,r.yg)(h,(0,n.A)({},N,g,{components:t,mdxType:"MDXLayout"}),(0,r.yg)("h3",null,(0,r.yg)("span",{class:"badge"},"Spark Gem")),(0,r.yg)("p",null,"Joins 2 or more DataFrames based on the given configuration."),(0,r.yg)("h2",{id:"parameters"},"Parameters"),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:"left"},"Parameter"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Description"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Required"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"DataFrame 1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"First input DataFrame"),(0,r.yg)("td",{parentName:"tr",align:"left"},"True")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"DataFrame 2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Second input DataFrame"),(0,r.yg)("td",{parentName:"tr",align:"left"},"True")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"DataFrame N"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Nth input DataFrame"),(0,r.yg)("td",{parentName:"tr",align:"left"},"False")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Join Condition (Conditions tab)"),(0,r.yg)("td",{parentName:"tr",align:"left"},"The join condition specifies how the rows will be combined."),(0,r.yg)("td",{parentName:"tr",align:"left"},"True")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Type (Conditions tab)"),(0,r.yg)("td",{parentName:"tr",align:"left"},"The type of JOIN ",(0,r.yg)("inlineCode",{parentName:"td"},"(Inner, Full Outer, Left , Right , Left Semi, Left Anti)")),(0,r.yg)("td",{parentName:"tr",align:"left"},"True")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Where Clause (Conditions tab)"),(0,r.yg)("td",{parentName:"tr",align:"left"},(0,r.yg)("inlineCode",{parentName:"td"},"Filter")," applied after the Join operation"),(0,r.yg)("td",{parentName:"tr",align:"left"},"False")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Target column (Expressions)"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Output column name"),(0,r.yg)("td",{parentName:"tr",align:"left"},"False")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Expression (Expressions)"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Expression to compute target column. If no expression is given, then all columns from all DataFrames would reflect in output."),(0,r.yg)("td",{parentName:"tr",align:"left"},"False")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Hint Type (Advanced)"),(0,r.yg)("td",{parentName:"tr",align:"left"},"The type of Join Hint (",(0,r.yg)("inlineCode",{parentName:"td"},"Broadcast"),", ",(0,r.yg)("inlineCode",{parentName:"td"},"Merge"),", ",(0,r.yg)("inlineCode",{parentName:"td"},"Shuffle Hash"),", ",(0,r.yg)("inlineCode",{parentName:"td"},"Shuffle Replicate NL")," or ",(0,r.yg)("inlineCode",{parentName:"td"},"None"),"). To read more about join hints ",(0,r.yg)("a",{parentName:"td",href:"https://developpaper.com/analysis-of-five-join-strategies-of-spark/"},"click here")),(0,r.yg)("td",{parentName:"tr",align:"left"},"False")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Propagate All Columns (Advanced)"),(0,r.yg)("td",{parentName:"tr",align:"left"},"If ",(0,r.yg)("inlineCode",{parentName:"td"},"true"),", all columns from that DataFrame would be propagated to output DataFrame. Equivalent to selecting ",(0,r.yg)("inlineCode",{parentName:"td"},"df.*")," for the selected DataFrame."),(0,r.yg)("td",{parentName:"tr",align:"left"},"False")))),(0,r.yg)("h2",{id:"adding-a-new-input"},"Adding a new input"),(0,r.yg)("ol",null,(0,r.yg)("li",{parentName:"ol"},"Click on the plus icon to add a new input."),(0,r.yg)("li",{parentName:"ol"},"Then add your condition expression for the newly added input.\n",(0,r.yg)("img",{alt:"Example usage of Join - Add new input to join gem",src:a(92826).A,width:"1616",height:"802"}))),(0,r.yg)("h2",{id:"examples"},"Examples"),(0,r.yg)("h3",{id:"example-1---join-with-three-dataframe-inputs"},"Example 1 - Join with three DataFrame inputs"),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Example usage of Join - Join three DataFrame inputs",src:a(87303).A,width:"1618",height:"806"})),(0,r.yg)(l.A,{mdxType:"Tabs"},(0,r.yg)(i.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-py"},'def Join_1(spark: SparkSession, in0: DataFrame) -> DataFrame:\n return in0\\\n .alias("in0")\\\n .join(in1.alias("in1"), (col("in0.customer_id") == col("in1.customer_id")), "inner")\\\n .join(in2.alias("in2"), (col("in1.customer_id") == col("in2.customer_id")), "inner")\n'))),(0,r.yg)(i.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-scala"},'object Join_1 {\n def apply(spark: SparkSession, in0: DataFrame, in1: DataFrame, in2: DataFrame): DataFrame =\n in0\n .as("in0")\n .join(in1.as("in1"), col("in0.customer_id") === col("in1.customer_id"), "inner")\n .join(in2.as("in2"), col("in1.customer_id") === col("in2.customer_id"), "inner")\n}\n')))),(0,r.yg)("h3",{id:"example-2---join-with-hints"},"Example 2 - Join with Hints"),(0,r.yg)("p",null,"Join hints allow users to suggest the join strategy that Spark should use. For a quick overview, see Spark's Join Hints ",(0,r.yg)("a",{parentName:"p",href:"https://spark.apache.org/docs/3.0.0/sql-ref-syntax-qry-select-hints.html#join-hints"},"documentation"),"."),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Example usage of Join - Join with hints",src:a(68065).A,width:"1644",height:"668"})),(0,r.yg)(u,{mdxType:"Tabs1"},(0,r.yg)(i.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-py"},'def Join_1(spark: SparkSession, in0: DataFrame, in1: DataFrame, in2: DataFrame) -> DataFrame:\n df1 = in1.hint("merge")\n\n return in0\\\n .alias("in0")\\\n .hint("broadcast")\\\n .join(df1.alias("in1"), col("in0.customer_id") == col("in1.customer_id"), "inner")\\\n .join(in2.alias("in2"), col("in0.customer_id") == col("in1.customer_id"), "inner")\n'))),(0,r.yg)(i.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-scala"},'object Join_1 {\n def apply(spark: SparkSession, in0: DataFrame, in1: DataFrame, in2: DataFrame): DataFrame =\n in0\n .as("in0")\n .hint("broadcast")\n .join(in1.as("in1").hint("merge"), col("in0.customer_id") === col("in1.customer_id"), "inner")\n .join(in2.as("in2"), col("in1.customer_id") === col("in2.customer_id"), "inner")\n}\n')))),(0,r.yg)("h3",{id:"example-3---join-with-propagate-columns"},"Example 3 - Join with Propagate Columns"),(0,r.yg)(o.A,{ImageData:s,mdxType:"App"}),(0,r.yg)(c,{mdxType:"Tabs2"},(0,r.yg)(i.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-py"},'def Join_1(spark: SparkSession, in0: DataFrame, in1: DataFrame, ) -> DataFrame:\n return in0\\\n .alias("in0")\\\n .join(in1.alias("in1"), (col("in0.customer_id") == col("in1.customer_id")), "inner")\\\n .select(*[col("in1.email").alias("email"), col("in1.phone").alias("phone")], col("in0.*"))\n'))),(0,r.yg)(i.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-scala"},'object Join_1 {\n\n def apply(spark: SparkSession, in0: DataFrame, in1: DataFrame): DataFrame =\n in0\n .as("in0")\n .join(in1.as("in1"), col("in0.customer_id") === col("in1.customer_id"), "inner")\n .select(col("in1.phone").as("phone"), col("in1.email").as("email"), col("in0.*"))\n\n}\n')))),(0,r.yg)("h2",{id:"types-of-join"},"Types of Join"),(0,r.yg)("p",null,"Suppose there are 2 tables TableA and TableB with only 2 columns (Ref, Data) and following contents:"),(0,r.yg)("h3",{id:"table-a"},"Table A"),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:"left"},"Ref"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Data"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A11")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A12")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A13")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A21")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"3"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A31")))),(0,r.yg)("h3",{id:"table-b"},"Table B"),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:"left"},"Ref"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Data"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B11")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B21")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B22")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B23")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"4"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B41")))),(0,r.yg)("h3",{id:"inner-join"},"INNER JOIN"),(0,r.yg)("p",null,"Inner Join on column Ref will return columns from both the tables and only the matching records as long as the condition is satisfied:"),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:"left"},"Ref"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Data"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Ref"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Data"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A11"),(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B11")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A12"),(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B11")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A13"),(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B11")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A21"),(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B21")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A21"),(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B22")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A21"),(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B23")))),(0,r.yg)("h3",{id:"left-join"},"LEFT JOIN"),(0,r.yg)("p",null,"Left Join (or Left Outer join) on column Ref will return columns from both the tables and match records with records from the left table. The result-set will contain null for the rows for which there is no matching row on the right side."),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:"left"},"Ref"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Data"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Ref"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Data"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A11"),(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B11")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A12"),(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B11")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A13"),(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B11")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A21"),(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B21")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A21"),(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B22")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A21"),(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B23")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"3"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A31"),(0,r.yg)("td",{parentName:"tr",align:"left"},"NULL"),(0,r.yg)("td",{parentName:"tr",align:"left"},"NULL")))),(0,r.yg)("h3",{id:"right-join"},"RIGHT JOIN"),(0,r.yg)("p",null,"Right Join (or Right Outer join) on column Ref will return columns from both the tables and match records with records from the right table. The result-set will contain null for the rows for which there is no matching row on the left side."),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:"left"},"Ref"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Data"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Ref"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Data"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A11"),(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B11")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A12"),(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B11")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A13"),(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B11")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A21"),(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B21")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A21"),(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B22")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A21"),(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B23")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"NULL"),(0,r.yg)("td",{parentName:"tr",align:"left"},"NULL"),(0,r.yg)("td",{parentName:"tr",align:"left"},"4"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B41")))),(0,r.yg)("h3",{id:"full-outer-join"},"FULL OUTER JOIN"),(0,r.yg)("p",null,"Full Outer Join on column Ref will return columns from both the tables and matching records with records from the left table and records from the right table . The result-set will contain NULL values for the rows for which there is no matching."),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:"left"},"Ref"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Data"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Ref"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Data"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A11"),(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B11")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A12"),(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B11")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A13"),(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B11")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A21"),(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B21")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A21"),(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B22")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A21"),(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B23")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"3"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A31"),(0,r.yg)("td",{parentName:"tr",align:"left"},"NULL"),(0,r.yg)("td",{parentName:"tr",align:"left"},"NULL")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"NULL"),(0,r.yg)("td",{parentName:"tr",align:"left"},"NULL"),(0,r.yg)("td",{parentName:"tr",align:"left"},"4"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B41")))),(0,r.yg)("h3",{id:"left-semi-join"},"LEFT SEMI JOIN"),(0,r.yg)("p",null,"Left Semi Join on column Ref will return columns only from left table and matching records only from left table."),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:"left"},"Ref"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Data"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B11")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B21")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B22")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B23")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"3"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_B41")))),(0,r.yg)("h3",{id:"left-anti-join"},"LEFT ANTI JOIN"),(0,r.yg)("p",null,"Left anti join on column Ref will return columns from the left for non-matched records :"),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:"left"},"Ref"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Data"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Ref"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Data"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"3"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Data_A31"),(0,r.yg)("td",{parentName:"tr",align:"left"},"NULL"),(0,r.yg)("td",{parentName:"tr",align:"left"},"NULL")))))}b.isMDXComponent=!0},92826:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/add_new_input-2ce622c38f55019de1600c2bd52c1041.png"},68065:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/join_with_hints-0bed10c358bdb7a5a6710f283cf3704b.png"},87303:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/join_without_hints-3c24895b7c58f07bea0b290f5ca459f7.png"}}]); \ No newline at end of file diff --git a/assets/js/332c99fa.cbba2096.js b/assets/js/332c99fa.be6a2311.js similarity index 81% rename from assets/js/332c99fa.cbba2096.js rename to assets/js/332c99fa.be6a2311.js index 2839a40d64..31d0bf3556 100644 --- a/assets/js/332c99fa.cbba2096.js +++ b/assets/js/332c99fa.be6a2311.js @@ -1 +1 @@ -"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[22777],{15680:(e,t,r)=>{r.d(t,{xA:()=>c,yg:()=>f});var a=r(96540);function n(e,t,r){return t in e?Object.defineProperty(e,t,{value:r,enumerable:!0,configurable:!0,writable:!0}):e[t]=r,e}function l(e,t){var r=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),r.push.apply(r,a)}return r}function i(e){for(var t=1;t=0||(n[r]=e[r]);return n}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,r)&&(n[r]=e[r])}return n}var u=a.createContext({}),s=function(e){var t=a.useContext(u),r=t;return e&&(r="function"==typeof e?e(t):i(i({},t),e)),r},c=function(e){var t=s(e.components);return a.createElement(u.Provider,{value:t},e.children)},m="mdxType",p={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},d=a.forwardRef((function(e,t){var r=e.components,n=e.mdxType,l=e.originalType,u=e.parentName,c=o(e,["components","mdxType","originalType","parentName"]),m=s(r),d=n,f=m["".concat(u,".").concat(d)]||m[d]||p[d]||l;return r?a.createElement(f,i(i({ref:t},c),{},{components:r})):a.createElement(f,i({ref:t},c))}));function f(e,t){var r=arguments,n=t&&t.mdxType;if("string"==typeof e||n){var l=r.length,i=new Array(l);i[0]=d;var o={};for(var u in t)hasOwnProperty.call(t,u)&&(o[u]=t[u]);o.originalType=e,o[m]="string"==typeof e?e:n,i[1]=o;for(var s=2;s{r.d(t,{A:()=>i});var a=r(96540),n=r(20053);const l={tabItem:"tabItem_Ymn6"};function i(e){let{children:t,hidden:r,className:i}=e;return a.createElement("div",{role:"tabpanel",className:(0,n.A)(l.tabItem,i),hidden:r},t)}},11470:(e,t,r)=>{r.d(t,{A:()=>k});var a=r(58168),n=r(96540),l=r(20053),i=r(23104),o=r(56347),u=r(57485),s=r(31682),c=r(89466);function m(e){return function(e){return n.Children.map(e,(e=>{if(!e||(0,n.isValidElement)(e)&&function(e){const{props:t}=e;return!!t&&"object"==typeof t&&"value"in t}(e))return e;throw new Error(`Docusaurus error: Bad child <${"string"==typeof e.type?e.type:e.type.name}>: all children of the component should be , and every should have a unique "value" prop.`)}))?.filter(Boolean)??[]}(e).map((e=>{let{props:{value:t,label:r,attributes:a,default:n}}=e;return{value:t,label:r,attributes:a,default:n}}))}function p(e){const{values:t,children:r}=e;return(0,n.useMemo)((()=>{const e=t??m(r);return function(e){const t=(0,s.X)(e,((e,t)=>e.value===t.value));if(t.length>0)throw new Error(`Docusaurus error: Duplicate values "${t.map((e=>e.value)).join(", ")}" found in . Every value needs to be unique.`)}(e),e}),[t,r])}function d(e){let{value:t,tabValues:r}=e;return r.some((e=>e.value===t))}function f(e){let{queryString:t=!1,groupId:r}=e;const a=(0,o.W6)(),l=function(e){let{queryString:t=!1,groupId:r}=e;if("string"==typeof t)return t;if(!1===t)return null;if(!0===t&&!r)throw new Error('Docusaurus error: The component groupId prop is required if queryString=true, because this value is used as the search param name. You can also provide an explicit value such as queryString="my-search-param".');return r??null}({queryString:t,groupId:r});return[(0,u.aZ)(l),(0,n.useCallback)((e=>{if(!l)return;const t=new URLSearchParams(a.location.search);t.set(l,e),a.replace({...a.location,search:t.toString()})}),[l,a])]}function g(e){const{defaultValue:t,queryString:r=!1,groupId:a}=e,l=p(e),[i,o]=(0,n.useState)((()=>function(e){let{defaultValue:t,tabValues:r}=e;if(0===r.length)throw new Error("Docusaurus error: the component requires at least one children component");if(t){if(!d({value:t,tabValues:r}))throw new Error(`Docusaurus error: The has a defaultValue "${t}" but none of its children has the corresponding value. Available values are: ${r.map((e=>e.value)).join(", ")}. If you intend to show no default tab, use defaultValue={null} instead.`);return t}const a=r.find((e=>e.default))??r[0];if(!a)throw new Error("Unexpected error: 0 tabValues");return a.value}({defaultValue:t,tabValues:l}))),[u,s]=f({queryString:r,groupId:a}),[m,g]=function(e){let{groupId:t}=e;const r=function(e){return e?`docusaurus.tab.${e}`:null}(t),[a,l]=(0,c.Dv)(r);return[a,(0,n.useCallback)((e=>{r&&l.set(e)}),[r,l])]}({groupId:a}),b=(()=>{const e=u??m;return d({value:e,tabValues:l})?e:null})();(0,n.useLayoutEffect)((()=>{b&&o(b)}),[b]);return{selectedValue:i,selectValue:(0,n.useCallback)((e=>{if(!d({value:e,tabValues:l}))throw new Error(`Can't select invalid tab value=${e}`);o(e),s(e),g(e)}),[s,g,l]),tabValues:l}}var b=r(92303);const y={tabList:"tabList__CuJ",tabItem:"tabItem_LNqP"};function h(e){let{className:t,block:r,selectedValue:o,selectValue:u,tabValues:s}=e;const c=[],{blockElementScrollPositionUntilNextRender:m}=(0,i.a_)(),p=e=>{const t=e.currentTarget,r=c.indexOf(t),a=s[r].value;a!==o&&(m(t),u(a))},d=e=>{let t=null;switch(e.key){case"Enter":p(e);break;case"ArrowRight":{const r=c.indexOf(e.currentTarget)+1;t=c[r]??c[0];break}case"ArrowLeft":{const r=c.indexOf(e.currentTarget)-1;t=c[r]??c[c.length-1];break}}t?.focus()};return n.createElement("ul",{role:"tablist","aria-orientation":"horizontal",className:(0,l.A)("tabs",{"tabs--block":r},t)},s.map((e=>{let{value:t,label:r,attributes:i}=e;return n.createElement("li",(0,a.A)({role:"tab",tabIndex:o===t?0:-1,"aria-selected":o===t,key:t,ref:e=>c.push(e),onKeyDown:d,onClick:p},i,{className:(0,l.A)("tabs__item",y.tabItem,i?.className,{"tabs__item--active":o===t})}),r??t)})))}function v(e){let{lazy:t,children:r,selectedValue:a}=e;const l=(Array.isArray(r)?r:[r]).filter(Boolean);if(t){const e=l.find((e=>e.props.value===a));return e?(0,n.cloneElement)(e,{className:"margin-top--md"}):null}return n.createElement("div",{className:"margin-top--md"},l.map(((e,t)=>(0,n.cloneElement)(e,{key:t,hidden:e.props.value!==a}))))}function w(e){const t=g(e);return n.createElement("div",{className:(0,l.A)("tabs-container",y.tabList)},n.createElement(h,(0,a.A)({},e,t)),n.createElement(v,(0,a.A)({},e,t)))}function k(e){const t=(0,b.A)();return n.createElement(w,(0,a.A)({key:String(t)},e))}},71361:(e,t,r)=>{r.r(t),r.d(t,{assets:()=>c,contentTitle:()=>u,default:()=>f,frontMatter:()=>o,metadata:()=>s,toc:()=>m});var a=r(58168),n=(r(96540),r(15680)),l=r(11470),i=r(19365);const o={sidebar_position:6,title:"Limit",id:"limit",description:"Limit the number of rows",tags:["gems","limit"]},u=void 0,s={unversionedId:"Spark/gems/transform/limit",id:"Spark/gems/transform/limit",title:"Limit",description:"Limit the number of rows",source:"@site/docs/Spark/gems/transform/limit.md",sourceDirName:"Spark/gems/transform",slug:"/Spark/gems/transform/limit",permalink:"/Spark/gems/transform/limit",draft:!1,tags:[{label:"gems",permalink:"/tags/gems"},{label:"limit",permalink:"/tags/limit"}],version:"current",sidebarPosition:6,frontMatter:{sidebar_position:6,title:"Limit",id:"limit",description:"Limit the number of rows",tags:["gems","limit"]},sidebar:"defaultSidebar",previous:{title:"SchemaTransform",permalink:"/Spark/gems/transform/schema-transform"},next:{title:"Deduplicate",permalink:"/Spark/gems/transform/deduplicate"}},c={},m=[{value:"Parameters",id:"parameters",level:3},{value:"Example",id:"example",level:3},{value:"Spark Code",id:"spark-code",level:3}],p={toc:m},d="wrapper";function f(e){let{components:t,...o}=e;return(0,n.yg)(d,(0,a.A)({},p,o,{components:t,mdxType:"MDXLayout"}),(0,n.yg)("h3",null,(0,n.yg)("span",{class:"badge rounded-pill text-bg-light"},"Spark Gem")),(0,n.yg)("p",null,"Limits the number of rows in the output."),(0,n.yg)("h3",{id:"parameters"},"Parameters"),(0,n.yg)("table",null,(0,n.yg)("thead",{parentName:"table"},(0,n.yg)("tr",{parentName:"thead"},(0,n.yg)("th",{parentName:"tr",align:"left"},"Parameter"),(0,n.yg)("th",{parentName:"tr",align:"left"},"Description"),(0,n.yg)("th",{parentName:"tr",align:"left"},"Required"))),(0,n.yg)("tbody",{parentName:"table"},(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},"DataFrame"),(0,n.yg)("td",{parentName:"tr",align:"left"},"Input DataFrame"),(0,n.yg)("td",{parentName:"tr",align:"left"},"True")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},"Limit"),(0,n.yg)("td",{parentName:"tr",align:"left"},"Number of rows required in output (Allowed range: ","[0, 2",(0,n.yg)("sup",null,"31")," -1]",")"),(0,n.yg)("td",{parentName:"tr",align:"left"},"True")))),(0,n.yg)("h3",{id:"example"},"Example"),(0,n.yg)("p",null,(0,n.yg)("img",{alt:"Example usage of Limit",src:r(26648).A,width:"940",height:"268"})),(0,n.yg)("h3",{id:"spark-code"},"Spark Code"),(0,n.yg)(l.A,{mdxType:"Tabs"},(0,n.yg)(i.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-py"},"def limit(spark: SparkSession, in0: DataFrame) -> DataFrame:\n return in0.limit(10)\n\n"))),(0,n.yg)(i.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-scala"},"object limit {\n def apply(spark: SparkSession, in: DataFrame): DataFrame =\n in.limit(10)\n}\n")))))}f.isMDXComponent=!0},26648:(e,t,r)=>{r.d(t,{A:()=>a});const a=r.p+"assets/images/limit_eg_1-541be60d2f406a01a92c7ae2ca28cf29.png"}}]); \ No newline at end of file +"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[22777],{15680:(e,t,r)=>{r.d(t,{xA:()=>c,yg:()=>f});var a=r(96540);function n(e,t,r){return t in e?Object.defineProperty(e,t,{value:r,enumerable:!0,configurable:!0,writable:!0}):e[t]=r,e}function l(e,t){var r=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),r.push.apply(r,a)}return r}function i(e){for(var t=1;t=0||(n[r]=e[r]);return n}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,r)&&(n[r]=e[r])}return n}var u=a.createContext({}),s=function(e){var t=a.useContext(u),r=t;return e&&(r="function"==typeof e?e(t):i(i({},t),e)),r},c=function(e){var t=s(e.components);return a.createElement(u.Provider,{value:t},e.children)},m="mdxType",p={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},d=a.forwardRef((function(e,t){var r=e.components,n=e.mdxType,l=e.originalType,u=e.parentName,c=o(e,["components","mdxType","originalType","parentName"]),m=s(r),d=n,f=m["".concat(u,".").concat(d)]||m[d]||p[d]||l;return r?a.createElement(f,i(i({ref:t},c),{},{components:r})):a.createElement(f,i({ref:t},c))}));function f(e,t){var r=arguments,n=t&&t.mdxType;if("string"==typeof e||n){var l=r.length,i=new Array(l);i[0]=d;var o={};for(var u in t)hasOwnProperty.call(t,u)&&(o[u]=t[u]);o.originalType=e,o[m]="string"==typeof e?e:n,i[1]=o;for(var s=2;s{r.d(t,{A:()=>i});var a=r(96540),n=r(20053);const l={tabItem:"tabItem_Ymn6"};function i(e){let{children:t,hidden:r,className:i}=e;return a.createElement("div",{role:"tabpanel",className:(0,n.A)(l.tabItem,i),hidden:r},t)}},11470:(e,t,r)=>{r.d(t,{A:()=>k});var a=r(58168),n=r(96540),l=r(20053),i=r(23104),o=r(56347),u=r(57485),s=r(31682),c=r(89466);function m(e){return function(e){return n.Children.map(e,(e=>{if(!e||(0,n.isValidElement)(e)&&function(e){const{props:t}=e;return!!t&&"object"==typeof t&&"value"in t}(e))return e;throw new Error(`Docusaurus error: Bad child <${"string"==typeof e.type?e.type:e.type.name}>: all children of the component should be , and every should have a unique "value" prop.`)}))?.filter(Boolean)??[]}(e).map((e=>{let{props:{value:t,label:r,attributes:a,default:n}}=e;return{value:t,label:r,attributes:a,default:n}}))}function p(e){const{values:t,children:r}=e;return(0,n.useMemo)((()=>{const e=t??m(r);return function(e){const t=(0,s.X)(e,((e,t)=>e.value===t.value));if(t.length>0)throw new Error(`Docusaurus error: Duplicate values "${t.map((e=>e.value)).join(", ")}" found in . Every value needs to be unique.`)}(e),e}),[t,r])}function d(e){let{value:t,tabValues:r}=e;return r.some((e=>e.value===t))}function f(e){let{queryString:t=!1,groupId:r}=e;const a=(0,o.W6)(),l=function(e){let{queryString:t=!1,groupId:r}=e;if("string"==typeof t)return t;if(!1===t)return null;if(!0===t&&!r)throw new Error('Docusaurus error: The component groupId prop is required if queryString=true, because this value is used as the search param name. You can also provide an explicit value such as queryString="my-search-param".');return r??null}({queryString:t,groupId:r});return[(0,u.aZ)(l),(0,n.useCallback)((e=>{if(!l)return;const t=new URLSearchParams(a.location.search);t.set(l,e),a.replace({...a.location,search:t.toString()})}),[l,a])]}function g(e){const{defaultValue:t,queryString:r=!1,groupId:a}=e,l=p(e),[i,o]=(0,n.useState)((()=>function(e){let{defaultValue:t,tabValues:r}=e;if(0===r.length)throw new Error("Docusaurus error: the component requires at least one children component");if(t){if(!d({value:t,tabValues:r}))throw new Error(`Docusaurus error: The has a defaultValue "${t}" but none of its children has the corresponding value. Available values are: ${r.map((e=>e.value)).join(", ")}. If you intend to show no default tab, use defaultValue={null} instead.`);return t}const a=r.find((e=>e.default))??r[0];if(!a)throw new Error("Unexpected error: 0 tabValues");return a.value}({defaultValue:t,tabValues:l}))),[u,s]=f({queryString:r,groupId:a}),[m,g]=function(e){let{groupId:t}=e;const r=function(e){return e?`docusaurus.tab.${e}`:null}(t),[a,l]=(0,c.Dv)(r);return[a,(0,n.useCallback)((e=>{r&&l.set(e)}),[r,l])]}({groupId:a}),b=(()=>{const e=u??m;return d({value:e,tabValues:l})?e:null})();(0,n.useLayoutEffect)((()=>{b&&o(b)}),[b]);return{selectedValue:i,selectValue:(0,n.useCallback)((e=>{if(!d({value:e,tabValues:l}))throw new Error(`Can't select invalid tab value=${e}`);o(e),s(e),g(e)}),[s,g,l]),tabValues:l}}var b=r(92303);const y={tabList:"tabList__CuJ",tabItem:"tabItem_LNqP"};function h(e){let{className:t,block:r,selectedValue:o,selectValue:u,tabValues:s}=e;const c=[],{blockElementScrollPositionUntilNextRender:m}=(0,i.a_)(),p=e=>{const t=e.currentTarget,r=c.indexOf(t),a=s[r].value;a!==o&&(m(t),u(a))},d=e=>{let t=null;switch(e.key){case"Enter":p(e);break;case"ArrowRight":{const r=c.indexOf(e.currentTarget)+1;t=c[r]??c[0];break}case"ArrowLeft":{const r=c.indexOf(e.currentTarget)-1;t=c[r]??c[c.length-1];break}}t?.focus()};return n.createElement("ul",{role:"tablist","aria-orientation":"horizontal",className:(0,l.A)("tabs",{"tabs--block":r},t)},s.map((e=>{let{value:t,label:r,attributes:i}=e;return n.createElement("li",(0,a.A)({role:"tab",tabIndex:o===t?0:-1,"aria-selected":o===t,key:t,ref:e=>c.push(e),onKeyDown:d,onClick:p},i,{className:(0,l.A)("tabs__item",y.tabItem,i?.className,{"tabs__item--active":o===t})}),r??t)})))}function v(e){let{lazy:t,children:r,selectedValue:a}=e;const l=(Array.isArray(r)?r:[r]).filter(Boolean);if(t){const e=l.find((e=>e.props.value===a));return e?(0,n.cloneElement)(e,{className:"margin-top--md"}):null}return n.createElement("div",{className:"margin-top--md"},l.map(((e,t)=>(0,n.cloneElement)(e,{key:t,hidden:e.props.value!==a}))))}function w(e){const t=g(e);return n.createElement("div",{className:(0,l.A)("tabs-container",y.tabList)},n.createElement(h,(0,a.A)({},e,t)),n.createElement(v,(0,a.A)({},e,t)))}function k(e){const t=(0,b.A)();return n.createElement(w,(0,a.A)({key:String(t)},e))}},71361:(e,t,r)=>{r.r(t),r.d(t,{assets:()=>c,contentTitle:()=>u,default:()=>f,frontMatter:()=>o,metadata:()=>s,toc:()=>m});var a=r(58168),n=(r(96540),r(15680)),l=r(11470),i=r(19365);const o={sidebar_position:6,title:"Limit",id:"limit",description:"Limit the number of rows",tags:["gems","limit"]},u=void 0,s={unversionedId:"Spark/gems/transform/limit",id:"Spark/gems/transform/limit",title:"Limit",description:"Limit the number of rows",source:"@site/docs/Spark/gems/transform/limit.md",sourceDirName:"Spark/gems/transform",slug:"/Spark/gems/transform/limit",permalink:"/Spark/gems/transform/limit",draft:!1,tags:[{label:"gems",permalink:"/tags/gems"},{label:"limit",permalink:"/tags/limit"}],version:"current",sidebarPosition:6,frontMatter:{sidebar_position:6,title:"Limit",id:"limit",description:"Limit the number of rows",tags:["gems","limit"]},sidebar:"defaultSidebar",previous:{title:"SchemaTransform",permalink:"/Spark/gems/transform/schema-transform"},next:{title:"Deduplicate",permalink:"/Spark/gems/transform/deduplicate"}},c={},m=[{value:"Parameters",id:"parameters",level:3},{value:"Example",id:"example",level:3},{value:"Spark Code",id:"spark-code",level:3}],p={toc:m},d="wrapper";function f(e){let{components:t,...o}=e;return(0,n.yg)(d,(0,a.A)({},p,o,{components:t,mdxType:"MDXLayout"}),(0,n.yg)("h3",null,(0,n.yg)("span",{class:"badge"},"Spark Gem")),(0,n.yg)("p",null,"Limits the number of rows in the output."),(0,n.yg)("h3",{id:"parameters"},"Parameters"),(0,n.yg)("table",null,(0,n.yg)("thead",{parentName:"table"},(0,n.yg)("tr",{parentName:"thead"},(0,n.yg)("th",{parentName:"tr",align:"left"},"Parameter"),(0,n.yg)("th",{parentName:"tr",align:"left"},"Description"),(0,n.yg)("th",{parentName:"tr",align:"left"},"Required"))),(0,n.yg)("tbody",{parentName:"table"},(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},"DataFrame"),(0,n.yg)("td",{parentName:"tr",align:"left"},"Input DataFrame"),(0,n.yg)("td",{parentName:"tr",align:"left"},"True")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},"Limit"),(0,n.yg)("td",{parentName:"tr",align:"left"},"Number of rows required in output (Allowed range: ","[0, 2",(0,n.yg)("sup",null,"31")," -1]",")"),(0,n.yg)("td",{parentName:"tr",align:"left"},"True")))),(0,n.yg)("h3",{id:"example"},"Example"),(0,n.yg)("p",null,(0,n.yg)("img",{alt:"Example usage of Limit",src:r(26648).A,width:"940",height:"268"})),(0,n.yg)("h3",{id:"spark-code"},"Spark Code"),(0,n.yg)(l.A,{mdxType:"Tabs"},(0,n.yg)(i.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-py"},"def limit(spark: SparkSession, in0: DataFrame) -> DataFrame:\n return in0.limit(10)\n\n"))),(0,n.yg)(i.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-scala"},"object limit {\n def apply(spark: SparkSession, in: DataFrame): DataFrame =\n in.limit(10)\n}\n")))))}f.isMDXComponent=!0},26648:(e,t,r)=>{r.d(t,{A:()=>a});const a=r.p+"assets/images/limit_eg_1-541be60d2f406a01a92c7ae2ca28cf29.png"}}]); \ No newline at end of file diff --git a/assets/js/47cf1bcd.3f1ec781.js b/assets/js/47cf1bcd.71e9a197.js similarity index 74% rename from assets/js/47cf1bcd.3f1ec781.js rename to assets/js/47cf1bcd.71e9a197.js index 4bc51ef94e..97b5cb1bd8 100644 --- a/assets/js/47cf1bcd.3f1ec781.js +++ b/assets/js/47cf1bcd.71e9a197.js @@ -1 +1 @@ -"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[59811],{15680:(e,t,r)=>{r.d(t,{xA:()=>c,yg:()=>f});var a=r(96540);function n(e,t,r){return t in e?Object.defineProperty(e,t,{value:r,enumerable:!0,configurable:!0,writable:!0}):e[t]=r,e}function s(e,t){var r=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),r.push.apply(r,a)}return r}function o(e){for(var t=1;t=0||(n[r]=e[r]);return n}(e,t);if(Object.getOwnPropertySymbols){var s=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,r)&&(n[r]=e[r])}return n}var u=a.createContext({}),i=function(e){var t=a.useContext(u),r=t;return e&&(r="function"==typeof e?e(t):o(o({},t),e)),r},c=function(e){var t=i(e.components);return a.createElement(u.Provider,{value:t},e.children)},m="mdxType",p={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},d=a.forwardRef((function(e,t){var r=e.components,n=e.mdxType,s=e.originalType,u=e.parentName,c=l(e,["components","mdxType","originalType","parentName"]),m=i(r),d=n,f=m["".concat(u,".").concat(d)]||m[d]||p[d]||s;return r?a.createElement(f,o(o({ref:t},c),{},{components:r})):a.createElement(f,o({ref:t},c))}));function f(e,t){var r=arguments,n=t&&t.mdxType;if("string"==typeof e||n){var s=r.length,o=new Array(s);o[0]=d;var l={};for(var u in t)hasOwnProperty.call(t,u)&&(l[u]=t[u]);l.originalType=e,l[m]="string"==typeof e?e:n,o[1]=l;for(var i=2;i{r.d(t,{A:()=>o});var a=r(96540),n=r(20053);const s={tabItem:"tabItem_Ymn6"};function o(e){let{children:t,hidden:r,className:o}=e;return a.createElement("div",{role:"tabpanel",className:(0,n.A)(s.tabItem,o),hidden:r},t)}},11470:(e,t,r)=>{r.d(t,{A:()=>k});var a=r(58168),n=r(96540),s=r(20053),o=r(23104),l=r(56347),u=r(57485),i=r(31682),c=r(89466);function m(e){return function(e){return n.Children.map(e,(e=>{if(!e||(0,n.isValidElement)(e)&&function(e){const{props:t}=e;return!!t&&"object"==typeof t&&"value"in t}(e))return e;throw new Error(`Docusaurus error: Bad child <${"string"==typeof e.type?e.type:e.type.name}>: all children of the component should be , and every should have a unique "value" prop.`)}))?.filter(Boolean)??[]}(e).map((e=>{let{props:{value:t,label:r,attributes:a,default:n}}=e;return{value:t,label:r,attributes:a,default:n}}))}function p(e){const{values:t,children:r}=e;return(0,n.useMemo)((()=>{const e=t??m(r);return function(e){const t=(0,i.X)(e,((e,t)=>e.value===t.value));if(t.length>0)throw new Error(`Docusaurus error: Duplicate values "${t.map((e=>e.value)).join(", ")}" found in . Every value needs to be unique.`)}(e),e}),[t,r])}function d(e){let{value:t,tabValues:r}=e;return r.some((e=>e.value===t))}function f(e){let{queryString:t=!1,groupId:r}=e;const a=(0,l.W6)(),s=function(e){let{queryString:t=!1,groupId:r}=e;if("string"==typeof t)return t;if(!1===t)return null;if(!0===t&&!r)throw new Error('Docusaurus error: The component groupId prop is required if queryString=true, because this value is used as the search param name. You can also provide an explicit value such as queryString="my-search-param".');return r??null}({queryString:t,groupId:r});return[(0,u.aZ)(s),(0,n.useCallback)((e=>{if(!s)return;const t=new URLSearchParams(a.location.search);t.set(s,e),a.replace({...a.location,search:t.toString()})}),[s,a])]}function g(e){const{defaultValue:t,queryString:r=!1,groupId:a}=e,s=p(e),[o,l]=(0,n.useState)((()=>function(e){let{defaultValue:t,tabValues:r}=e;if(0===r.length)throw new Error("Docusaurus error: the component requires at least one children component");if(t){if(!d({value:t,tabValues:r}))throw new Error(`Docusaurus error: The has a defaultValue "${t}" but none of its children has the corresponding value. Available values are: ${r.map((e=>e.value)).join(", ")}. If you intend to show no default tab, use defaultValue={null} instead.`);return t}const a=r.find((e=>e.default))??r[0];if(!a)throw new Error("Unexpected error: 0 tabValues");return a.value}({defaultValue:t,tabValues:s}))),[u,i]=f({queryString:r,groupId:a}),[m,g]=function(e){let{groupId:t}=e;const r=function(e){return e?`docusaurus.tab.${e}`:null}(t),[a,s]=(0,c.Dv)(r);return[a,(0,n.useCallback)((e=>{r&&s.set(e)}),[r,s])]}({groupId:a}),b=(()=>{const e=u??m;return d({value:e,tabValues:s})?e:null})();(0,n.useLayoutEffect)((()=>{b&&l(b)}),[b]);return{selectedValue:o,selectValue:(0,n.useCallback)((e=>{if(!d({value:e,tabValues:s}))throw new Error(`Can't select invalid tab value=${e}`);l(e),i(e),g(e)}),[i,g,s]),tabValues:s}}var b=r(92303);const y={tabList:"tabList__CuJ",tabItem:"tabItem_LNqP"};function v(e){let{className:t,block:r,selectedValue:l,selectValue:u,tabValues:i}=e;const c=[],{blockElementScrollPositionUntilNextRender:m}=(0,o.a_)(),p=e=>{const t=e.currentTarget,r=c.indexOf(t),a=i[r].value;a!==l&&(m(t),u(a))},d=e=>{let t=null;switch(e.key){case"Enter":p(e);break;case"ArrowRight":{const r=c.indexOf(e.currentTarget)+1;t=c[r]??c[0];break}case"ArrowLeft":{const r=c.indexOf(e.currentTarget)-1;t=c[r]??c[c.length-1];break}}t?.focus()};return n.createElement("ul",{role:"tablist","aria-orientation":"horizontal",className:(0,s.A)("tabs",{"tabs--block":r},t)},i.map((e=>{let{value:t,label:r,attributes:o}=e;return n.createElement("li",(0,a.A)({role:"tab",tabIndex:l===t?0:-1,"aria-selected":l===t,key:t,ref:e=>c.push(e),onKeyDown:d,onClick:p},o,{className:(0,s.A)("tabs__item",y.tabItem,o?.className,{"tabs__item--active":l===t})}),r??t)})))}function h(e){let{lazy:t,children:r,selectedValue:a}=e;const s=(Array.isArray(r)?r:[r]).filter(Boolean);if(t){const e=s.find((e=>e.props.value===a));return e?(0,n.cloneElement)(e,{className:"margin-top--md"}):null}return n.createElement("div",{className:"margin-top--md"},s.map(((e,t)=>(0,n.cloneElement)(e,{key:t,hidden:e.props.value!==a}))))}function S(e){const t=g(e);return n.createElement("div",{className:(0,s.A)("tabs-container",y.tabList)},n.createElement(v,(0,a.A)({},e,t)),n.createElement(h,(0,a.A)({},e,t)))}function k(e){const t=(0,b.A)();return n.createElement(S,(0,a.A)({key:String(t)},e))}},13365:(e,t,r)=>{r.r(t),r.d(t,{assets:()=>c,contentTitle:()=>u,default:()=>f,frontMatter:()=>l,metadata:()=>i,toc:()=>m});var a=r(58168),n=(r(96540),r(15680)),s=r(11470),o=r(19365);const l={sidebar_position:1,title:"SQLStatement",id:"sql-statement",description:"Create DataFrames based on custom SQL queries",tags:["gems","sql","custom"]},u=void 0,i={unversionedId:"Spark/gems/custom/sql-statement",id:"Spark/gems/custom/sql-statement",title:"SQLStatement",description:"Create DataFrames based on custom SQL queries",source:"@site/docs/Spark/gems/custom/sql-statement.md",sourceDirName:"Spark/gems/custom",slug:"/Spark/gems/custom/sql-statement",permalink:"/Spark/gems/custom/sql-statement",draft:!1,tags:[{label:"gems",permalink:"/tags/gems"},{label:"sql",permalink:"/tags/sql"},{label:"custom",permalink:"/tags/custom"}],version:"current",sidebarPosition:1,frontMatter:{sidebar_position:1,title:"SQLStatement",id:"sql-statement",description:"Create DataFrames based on custom SQL queries",tags:["gems","sql","custom"]},sidebar:"defaultSidebar",previous:{title:"Custom",permalink:"/Spark/gems/custom/"},next:{title:"Script",permalink:"/Spark/gems/custom/script"}},c={},m=[{value:"Parameters",id:"parameters",level:3},{value:"Example",id:"example",level:3},{value:"Generated Code",id:"generated-code",level:3}],p={toc:m},d="wrapper";function f(e){let{components:t,...l}=e;return(0,n.yg)(d,(0,a.A)({},p,l,{components:t,mdxType:"MDXLayout"}),(0,n.yg)("h3",null,(0,n.yg)("span",{class:"badge rounded-pill text-bg-light"},"Spark Gem")),(0,n.yg)("p",null,"Create one or more DataFrame(s) based on provided SQL queries to run against one or more input DataFrames."),(0,n.yg)("h3",{id:"parameters"},"Parameters"),(0,n.yg)("table",null,(0,n.yg)("thead",{parentName:"table"},(0,n.yg)("tr",{parentName:"thead"},(0,n.yg)("th",{parentName:"tr",align:"left"},"Parameter"),(0,n.yg)("th",{parentName:"tr",align:"left"},"Meaning"),(0,n.yg)("th",{parentName:"tr",align:"left"},"Required"))),(0,n.yg)("tbody",{parentName:"table"},(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},"DataFrame(s)"),(0,n.yg)("td",{parentName:"tr",align:"left"},"Input DataFrame(s)"),(0,n.yg)("td",{parentName:"tr",align:"left"},"True")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},"SQL Queries"),(0,n.yg)("td",{parentName:"tr",align:"left"},"SQL Query for each output tab"),(0,n.yg)("td",{parentName:"tr",align:"left"},"True")))),(0,n.yg)("h3",{id:"example"},"Example"),(0,n.yg)("p",null,(0,n.yg)("img",{alt:"SQL example 1",src:r(89461).A,width:"2504",height:"502"})),(0,n.yg)("admonition",{type:"info"},(0,n.yg)("p",{parentName:"admonition"},"Number of inputs and outputs can be changed as needed by clicking the ",(0,n.yg)("inlineCode",{parentName:"p"},"+")," button on the respective tab.")),(0,n.yg)("h3",{id:"generated-code"},"Generated Code"),(0,n.yg)(s.A,{mdxType:"Tabs"},(0,n.yg)(o.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-py"},'def SQLStatement(spark: SparkSession, orders: DataFrame, customers: DataFrame) -> (DataFrame, DataFrame):\n orders.createOrReplaceTempView("orders")\n customers.createOrReplaceTempView("customers")\n df1 = spark.sql("select * from orders inner join customers on orders.customer_id = customers.customer_id")\n df2 = spark.sql("select distinct customer_id from orders")\n\n return df1, df2\n\n'))),(0,n.yg)(o.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-scala"},'object SQLStatement {\n\n def apply(\n spark: SparkSession,\n orders: DataFrame,\n customers: DataFrame\n ): (DataFrame, DataFrame) = {\n orders.createOrReplaceTempView("orders")\n customers.createOrReplaceTempView("customers")\n (\n spark.sql(\n """select * from orders inner join customers on orders.customer_id = customers.customer_id"""\n ),\n spark.sql(\n """select distinct customer_id from orders"""\n )\n )\n }\n\n}\n\n')))))}f.isMDXComponent=!0},89461:(e,t,r)=>{r.d(t,{A:()=>a});const a=r.p+"assets/images/sqlstatement_eg_1-83a269e80f80336bc9cf0e8f3e9eb11e.png"}}]); \ No newline at end of file +"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[59811],{15680:(e,t,r)=>{r.d(t,{xA:()=>c,yg:()=>f});var a=r(96540);function n(e,t,r){return t in e?Object.defineProperty(e,t,{value:r,enumerable:!0,configurable:!0,writable:!0}):e[t]=r,e}function s(e,t){var r=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),r.push.apply(r,a)}return r}function o(e){for(var t=1;t=0||(n[r]=e[r]);return n}(e,t);if(Object.getOwnPropertySymbols){var s=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,r)&&(n[r]=e[r])}return n}var u=a.createContext({}),i=function(e){var t=a.useContext(u),r=t;return e&&(r="function"==typeof e?e(t):o(o({},t),e)),r},c=function(e){var t=i(e.components);return a.createElement(u.Provider,{value:t},e.children)},m="mdxType",p={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},d=a.forwardRef((function(e,t){var r=e.components,n=e.mdxType,s=e.originalType,u=e.parentName,c=l(e,["components","mdxType","originalType","parentName"]),m=i(r),d=n,f=m["".concat(u,".").concat(d)]||m[d]||p[d]||s;return r?a.createElement(f,o(o({ref:t},c),{},{components:r})):a.createElement(f,o({ref:t},c))}));function f(e,t){var r=arguments,n=t&&t.mdxType;if("string"==typeof e||n){var s=r.length,o=new Array(s);o[0]=d;var l={};for(var u in t)hasOwnProperty.call(t,u)&&(l[u]=t[u]);l.originalType=e,l[m]="string"==typeof e?e:n,o[1]=l;for(var i=2;i{r.d(t,{A:()=>o});var a=r(96540),n=r(20053);const s={tabItem:"tabItem_Ymn6"};function o(e){let{children:t,hidden:r,className:o}=e;return a.createElement("div",{role:"tabpanel",className:(0,n.A)(s.tabItem,o),hidden:r},t)}},11470:(e,t,r)=>{r.d(t,{A:()=>k});var a=r(58168),n=r(96540),s=r(20053),o=r(23104),l=r(56347),u=r(57485),i=r(31682),c=r(89466);function m(e){return function(e){return n.Children.map(e,(e=>{if(!e||(0,n.isValidElement)(e)&&function(e){const{props:t}=e;return!!t&&"object"==typeof t&&"value"in t}(e))return e;throw new Error(`Docusaurus error: Bad child <${"string"==typeof e.type?e.type:e.type.name}>: all children of the component should be , and every should have a unique "value" prop.`)}))?.filter(Boolean)??[]}(e).map((e=>{let{props:{value:t,label:r,attributes:a,default:n}}=e;return{value:t,label:r,attributes:a,default:n}}))}function p(e){const{values:t,children:r}=e;return(0,n.useMemo)((()=>{const e=t??m(r);return function(e){const t=(0,i.X)(e,((e,t)=>e.value===t.value));if(t.length>0)throw new Error(`Docusaurus error: Duplicate values "${t.map((e=>e.value)).join(", ")}" found in . Every value needs to be unique.`)}(e),e}),[t,r])}function d(e){let{value:t,tabValues:r}=e;return r.some((e=>e.value===t))}function f(e){let{queryString:t=!1,groupId:r}=e;const a=(0,l.W6)(),s=function(e){let{queryString:t=!1,groupId:r}=e;if("string"==typeof t)return t;if(!1===t)return null;if(!0===t&&!r)throw new Error('Docusaurus error: The component groupId prop is required if queryString=true, because this value is used as the search param name. You can also provide an explicit value such as queryString="my-search-param".');return r??null}({queryString:t,groupId:r});return[(0,u.aZ)(s),(0,n.useCallback)((e=>{if(!s)return;const t=new URLSearchParams(a.location.search);t.set(s,e),a.replace({...a.location,search:t.toString()})}),[s,a])]}function g(e){const{defaultValue:t,queryString:r=!1,groupId:a}=e,s=p(e),[o,l]=(0,n.useState)((()=>function(e){let{defaultValue:t,tabValues:r}=e;if(0===r.length)throw new Error("Docusaurus error: the component requires at least one children component");if(t){if(!d({value:t,tabValues:r}))throw new Error(`Docusaurus error: The has a defaultValue "${t}" but none of its children has the corresponding value. Available values are: ${r.map((e=>e.value)).join(", ")}. If you intend to show no default tab, use defaultValue={null} instead.`);return t}const a=r.find((e=>e.default))??r[0];if(!a)throw new Error("Unexpected error: 0 tabValues");return a.value}({defaultValue:t,tabValues:s}))),[u,i]=f({queryString:r,groupId:a}),[m,g]=function(e){let{groupId:t}=e;const r=function(e){return e?`docusaurus.tab.${e}`:null}(t),[a,s]=(0,c.Dv)(r);return[a,(0,n.useCallback)((e=>{r&&s.set(e)}),[r,s])]}({groupId:a}),b=(()=>{const e=u??m;return d({value:e,tabValues:s})?e:null})();(0,n.useLayoutEffect)((()=>{b&&l(b)}),[b]);return{selectedValue:o,selectValue:(0,n.useCallback)((e=>{if(!d({value:e,tabValues:s}))throw new Error(`Can't select invalid tab value=${e}`);l(e),i(e),g(e)}),[i,g,s]),tabValues:s}}var b=r(92303);const y={tabList:"tabList__CuJ",tabItem:"tabItem_LNqP"};function v(e){let{className:t,block:r,selectedValue:l,selectValue:u,tabValues:i}=e;const c=[],{blockElementScrollPositionUntilNextRender:m}=(0,o.a_)(),p=e=>{const t=e.currentTarget,r=c.indexOf(t),a=i[r].value;a!==l&&(m(t),u(a))},d=e=>{let t=null;switch(e.key){case"Enter":p(e);break;case"ArrowRight":{const r=c.indexOf(e.currentTarget)+1;t=c[r]??c[0];break}case"ArrowLeft":{const r=c.indexOf(e.currentTarget)-1;t=c[r]??c[c.length-1];break}}t?.focus()};return n.createElement("ul",{role:"tablist","aria-orientation":"horizontal",className:(0,s.A)("tabs",{"tabs--block":r},t)},i.map((e=>{let{value:t,label:r,attributes:o}=e;return n.createElement("li",(0,a.A)({role:"tab",tabIndex:l===t?0:-1,"aria-selected":l===t,key:t,ref:e=>c.push(e),onKeyDown:d,onClick:p},o,{className:(0,s.A)("tabs__item",y.tabItem,o?.className,{"tabs__item--active":l===t})}),r??t)})))}function h(e){let{lazy:t,children:r,selectedValue:a}=e;const s=(Array.isArray(r)?r:[r]).filter(Boolean);if(t){const e=s.find((e=>e.props.value===a));return e?(0,n.cloneElement)(e,{className:"margin-top--md"}):null}return n.createElement("div",{className:"margin-top--md"},s.map(((e,t)=>(0,n.cloneElement)(e,{key:t,hidden:e.props.value!==a}))))}function S(e){const t=g(e);return n.createElement("div",{className:(0,s.A)("tabs-container",y.tabList)},n.createElement(v,(0,a.A)({},e,t)),n.createElement(h,(0,a.A)({},e,t)))}function k(e){const t=(0,b.A)();return n.createElement(S,(0,a.A)({key:String(t)},e))}},13365:(e,t,r)=>{r.r(t),r.d(t,{assets:()=>c,contentTitle:()=>u,default:()=>f,frontMatter:()=>l,metadata:()=>i,toc:()=>m});var a=r(58168),n=(r(96540),r(15680)),s=r(11470),o=r(19365);const l={sidebar_position:1,title:"SQLStatement",id:"sql-statement",description:"Create DataFrames based on custom SQL queries",tags:["gems","sql","custom"]},u=void 0,i={unversionedId:"Spark/gems/custom/sql-statement",id:"Spark/gems/custom/sql-statement",title:"SQLStatement",description:"Create DataFrames based on custom SQL queries",source:"@site/docs/Spark/gems/custom/sql-statement.md",sourceDirName:"Spark/gems/custom",slug:"/Spark/gems/custom/sql-statement",permalink:"/Spark/gems/custom/sql-statement",draft:!1,tags:[{label:"gems",permalink:"/tags/gems"},{label:"sql",permalink:"/tags/sql"},{label:"custom",permalink:"/tags/custom"}],version:"current",sidebarPosition:1,frontMatter:{sidebar_position:1,title:"SQLStatement",id:"sql-statement",description:"Create DataFrames based on custom SQL queries",tags:["gems","sql","custom"]},sidebar:"defaultSidebar",previous:{title:"Custom",permalink:"/Spark/gems/custom/"},next:{title:"Script",permalink:"/Spark/gems/custom/script"}},c={},m=[{value:"Parameters",id:"parameters",level:3},{value:"Example",id:"example",level:3},{value:"Generated Code",id:"generated-code",level:3}],p={toc:m},d="wrapper";function f(e){let{components:t,...l}=e;return(0,n.yg)(d,(0,a.A)({},p,l,{components:t,mdxType:"MDXLayout"}),(0,n.yg)("h3",null,(0,n.yg)("span",{class:"badge"},"Spark Gem")),(0,n.yg)("p",null,"Create one or more DataFrame(s) based on provided SQL queries to run against one or more input DataFrames."),(0,n.yg)("h3",{id:"parameters"},"Parameters"),(0,n.yg)("table",null,(0,n.yg)("thead",{parentName:"table"},(0,n.yg)("tr",{parentName:"thead"},(0,n.yg)("th",{parentName:"tr",align:"left"},"Parameter"),(0,n.yg)("th",{parentName:"tr",align:"left"},"Meaning"),(0,n.yg)("th",{parentName:"tr",align:"left"},"Required"))),(0,n.yg)("tbody",{parentName:"table"},(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},"DataFrame(s)"),(0,n.yg)("td",{parentName:"tr",align:"left"},"Input DataFrame(s)"),(0,n.yg)("td",{parentName:"tr",align:"left"},"True")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},"SQL Queries"),(0,n.yg)("td",{parentName:"tr",align:"left"},"SQL Query for each output tab"),(0,n.yg)("td",{parentName:"tr",align:"left"},"True")))),(0,n.yg)("h3",{id:"example"},"Example"),(0,n.yg)("p",null,(0,n.yg)("img",{alt:"SQL example 1",src:r(89461).A,width:"2504",height:"502"})),(0,n.yg)("admonition",{type:"info"},(0,n.yg)("p",{parentName:"admonition"},"Number of inputs and outputs can be changed as needed by clicking the ",(0,n.yg)("inlineCode",{parentName:"p"},"+")," button on the respective tab.")),(0,n.yg)("h3",{id:"generated-code"},"Generated Code"),(0,n.yg)(s.A,{mdxType:"Tabs"},(0,n.yg)(o.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-py"},'def SQLStatement(spark: SparkSession, orders: DataFrame, customers: DataFrame) -> (DataFrame, DataFrame):\n orders.createOrReplaceTempView("orders")\n customers.createOrReplaceTempView("customers")\n df1 = spark.sql("select * from orders inner join customers on orders.customer_id = customers.customer_id")\n df2 = spark.sql("select distinct customer_id from orders")\n\n return df1, df2\n\n'))),(0,n.yg)(o.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-scala"},'object SQLStatement {\n\n def apply(\n spark: SparkSession,\n orders: DataFrame,\n customers: DataFrame\n ): (DataFrame, DataFrame) = {\n orders.createOrReplaceTempView("orders")\n customers.createOrReplaceTempView("customers")\n (\n spark.sql(\n """select * from orders inner join customers on orders.customer_id = customers.customer_id"""\n ),\n spark.sql(\n """select distinct customer_id from orders"""\n )\n )\n }\n\n}\n\n')))))}f.isMDXComponent=!0},89461:(e,t,r)=>{r.d(t,{A:()=>a});const a=r.p+"assets/images/sqlstatement_eg_1-83a269e80f80336bc9cf0e8f3e9eb11e.png"}}]); \ No newline at end of file diff --git a/assets/js/486ce9f7.1b8dcbcc.js b/assets/js/486ce9f7.1b8dcbcc.js deleted file mode 100644 index 949fb821b2..0000000000 --- a/assets/js/486ce9f7.1b8dcbcc.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[44281],{15680:(e,t,a)=>{a.d(t,{xA:()=>c,yg:()=>d});var r=a(96540);function n(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function i(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,r)}return a}function o(e){for(var t=1;t=0||(n[a]=e[a]);return n}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(n[a]=e[a])}return n}var s=r.createContext({}),l=function(e){var t=r.useContext(s),a=t;return e&&(a="function"==typeof e?e(t):o(o({},t),e)),a},c=function(e){var t=l(e.components);return r.createElement(s.Provider,{value:t},e.children)},u="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},g=r.forwardRef((function(e,t){var a=e.components,n=e.mdxType,i=e.originalType,s=e.parentName,c=p(e,["components","mdxType","originalType","parentName"]),u=l(a),g=n,d=u["".concat(s,".").concat(g)]||u[g]||m[g]||i;return a?r.createElement(d,o(o({ref:t},c),{},{components:a})):r.createElement(d,o({ref:t},c))}));function d(e,t){var a=arguments,n=t&&t.mdxType;if("string"==typeof e||n){var i=a.length,o=new Array(i);o[0]=g;var p={};for(var s in t)hasOwnProperty.call(t,s)&&(p[s]=t[s]);p.originalType=e,p[u]="string"==typeof e?e:n,o[1]=p;for(var l=2;l{a.r(t),a.d(t,{assets:()=>s,contentTitle:()=>o,default:()=>m,frontMatter:()=>i,metadata:()=>p,toc:()=>l});var r=a(58168),n=(a(96540),a(15680));const i={sidebar_position:2,title:"Script",id:"script",description:"Provide a place to use a SparkSession and whatever code you wish to use",tags:["custom","code"]},o=void 0,p={unversionedId:"Spark/gems/custom/script",id:"Spark/gems/custom/script",title:"Script",description:"Provide a place to use a SparkSession and whatever code you wish to use",source:"@site/docs/Spark/gems/custom/script.md",sourceDirName:"Spark/gems/custom",slug:"/Spark/gems/custom/script",permalink:"/Spark/gems/custom/script",draft:!1,tags:[{label:"custom",permalink:"/tags/custom"},{label:"code",permalink:"/tags/code"}],version:"current",sidebarPosition:2,frontMatter:{sidebar_position:2,title:"Script",id:"script",description:"Provide a place to use a SparkSession and whatever code you wish to use",tags:["custom","code"]},sidebar:"defaultSidebar",previous:{title:"SQLStatement",permalink:"/Spark/gems/custom/sql-statement"},next:{title:"FileOperation",permalink:"/Spark/gems/custom/file-operations"}},s={},l=[{value:"Parameters",id:"parameters",level:2},{value:"Schema",id:"schema",level:2},{value:"Examples",id:"examples",level:2},{value:"Script Gem with Input and Output: Un-pivoting a DataFrame",id:"script-gem-with-input-and-output-un-pivoting-a-dataframe",level:3},{value:"Script Gem with only Output: Generating a DataFrame",id:"script-gem-with-only-output-generating-a-dataframe",level:3}],c={toc:l},u="wrapper";function m(e){let{components:t,...i}=e;return(0,n.yg)(u,(0,r.A)({},c,i,{components:t,mdxType:"MDXLayout"}),(0,n.yg)("h3",null,(0,n.yg)("span",{class:"badge rounded-pill text-bg-light"},"Spark Gem")),(0,n.yg)("p",null,"Provides a ",(0,n.yg)("inlineCode",{parentName:"p"},"SparkSession")," and allows you to run custom code."),(0,n.yg)("h2",{id:"parameters"},"Parameters"),(0,n.yg)("table",null,(0,n.yg)("thead",{parentName:"table"},(0,n.yg)("tr",{parentName:"thead"},(0,n.yg)("th",{parentName:"tr",align:"left"},"Parameter"),(0,n.yg)("th",{parentName:"tr",align:"left"},"Meaning"),(0,n.yg)("th",{parentName:"tr",align:"left"},"Required"))),(0,n.yg)("tbody",{parentName:"table"},(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},"Input DataFrame(s)"),(0,n.yg)("td",{parentName:"tr",align:"left"},"Input DataFrame(s)"),(0,n.yg)("td",{parentName:"tr",align:"left"},"False")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},"Output DataFrame(s)"),(0,n.yg)("td",{parentName:"tr",align:"left"},"Output DataFrame(s)"),(0,n.yg)("td",{parentName:"tr",align:"left"},"False")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},"Code"),(0,n.yg)("td",{parentName:"tr",align:"left"},"Custom code to be executed"),(0,n.yg)("td",{parentName:"tr",align:"left"},"True")))),(0,n.yg)("admonition",{type:"info"},(0,n.yg)("p",{parentName:"admonition"},"To remove input/output DataFrame(s), simply click ",(0,n.yg)("svg",{width:"24",height:"24",viewBox:"0 0 24 24",xmlns:"http://www.w3.org/2000/svg",color:"#667085",class:"sc-jrAFXE hhLaQQ ui-icon undefined"},(0,n.yg)("path",{d:"M11.9999 2C10.2458 2 8.78514 3.31072 8.53998 5H5.11908C5.0765 4.99271 5.03337 4.98912 4.99017 4.98926C4.95285 4.99006 4.91564 4.99365 4.87885 5H3.24994C3.15056 4.99859 3.05188 5.01696 2.95966 5.05402C2.86743 5.09108 2.78349 5.1461 2.71271 5.21588C2.64194 5.28566 2.58573 5.36882 2.54737 5.46051C2.50901 5.5522 2.48926 5.65061 2.48926 5.75C2.48926 5.84939 2.50901 5.9478 2.54737 6.03949C2.58573 6.13118 2.64194 6.21434 2.71271 6.28412C2.78349 6.3539 2.86743 6.40892 2.95966 6.44598C3.05188 6.48304 3.15056 6.50141 3.24994 6.5H4.31928L5.57806 19.5146C5.7136 20.918 6.90583 22 8.31537 22H15.6835C17.0931 22 18.2854 20.9181 18.4208 19.5146L19.6806 6.5H20.7499C20.8493 6.50141 20.948 6.48304 21.0402 6.44598C21.1324 6.40892 21.2164 6.3539 21.2872 6.28412C21.3579 6.21434 21.4141 6.13118 21.4525 6.03949C21.4909 5.9478 21.5106 5.84939 21.5106 5.75C21.5106 5.65061 21.4909 5.5522 21.4525 5.46051C21.4141 5.36882 21.3579 5.28566 21.2872 5.21588C21.2164 5.1461 21.1324 5.09108 21.0402 5.05402C20.948 5.01696 20.8493 4.99859 20.7499 5H19.122C19.0425 4.98709 18.9613 4.98709 18.8818 5H15.4599C15.2147 3.31072 13.7541 2 11.9999 2ZM11.9999 3.5C12.9395 3.5 13.7103 4.13408 13.9306 5H10.0693C10.2896 4.13408 11.0604 3.5 11.9999 3.5ZM5.82513 6.5H18.1738L16.9277 19.3701C16.8652 20.0177 16.3339 20.5 15.6835 20.5H8.31537C7.66591 20.5 7.13369 20.0168 7.07123 19.3701L5.82513 6.5ZM10.2382 8.98926C10.0395 8.99236 9.8501 9.07423 9.71167 9.21686C9.57324 9.3595 9.49709 9.55125 9.49994 9.75V17.25C9.49853 17.3494 9.5169 17.4481 9.55396 17.5403C9.59102 17.6325 9.64604 17.7164 9.71582 17.7872C9.7856 17.858 9.86876 17.9142 9.96045 17.9526C10.0521 17.9909 10.1505 18.0107 10.2499 18.0107C10.3493 18.0107 10.4477 17.9909 10.5394 17.9526C10.6311 17.9142 10.7143 17.858 10.7841 17.7872C10.8538 17.7164 10.9089 17.6325 10.9459 17.5403C10.983 17.4481 11.0013 17.3494 10.9999 17.25V9.75C11.0014 9.64962 10.9827 9.54997 10.9449 9.45695C10.9071 9.36394 10.851 9.27946 10.78 9.20852C10.709 9.13757 10.6244 9.08161 10.5313 9.04395C10.4383 9.00629 10.3386 8.98769 10.2382 8.98926V8.98926ZM13.7382 8.98926C13.5395 8.99236 13.3501 9.07423 13.2117 9.21686C13.0732 9.3595 12.9971 9.55125 12.9999 9.75V17.25C12.9985 17.3494 13.0169 17.4481 13.054 17.5403C13.091 17.6325 13.146 17.7164 13.2158 17.7872C13.2856 17.858 13.3688 17.9142 13.4605 17.9526C13.5521 17.9909 13.6505 18.0107 13.7499 18.0107C13.8493 18.0107 13.9477 17.9909 14.0394 17.9526C14.1311 17.9142 14.2143 17.858 14.2841 17.7872C14.3538 17.7164 14.4089 17.6325 14.4459 17.5403C14.483 17.4481 14.5013 17.3494 14.4999 17.25V9.75C14.5014 9.64962 14.4827 9.54997 14.4449 9.45695C14.4071 9.36394 14.351 9.27946 14.28 9.20852C14.209 9.13757 14.1244 9.08161 14.0313 9.04395C13.9383 9.00629 13.8386 8.98769 13.7382 8.98926V8.98926Z"}))," icon on the left sidebar"),(0,n.yg)("p",{parentName:"admonition"},(0,n.yg)("img",{alt:"Script - Remove inputs",src:a(8811).A,width:"1920",height:"344"}))),(0,n.yg)("h2",{id:"schema"},"Schema"),(0,n.yg)("p",null,"When executing a custom script Gem, the output schema is not known by Prophecy so it must be inferred from a sample computation result. Click the ",(0,n.yg)("inlineCode",{parentName:"p"},"Custom Schema")," button and ",(0,n.yg)("inlineCode",{parentName:"p"},"Infer from cluster")," as shown in the ",(0,n.yg)("strong",{parentName:"p"},"Gems --\x3e Outputs")," description ",(0,n.yg)("a",{parentName:"p",href:"/concepts/project/gems#inputs-outputs"},"here.")," The schema will be inferred according to the script and the Spark version running on the connected cluster."),(0,n.yg)("h2",{id:"examples"},"Examples"),(0,n.yg)("hr",null),(0,n.yg)("h3",{id:"script-gem-with-input-and-output-un-pivoting-a-dataframe"},"Script Gem with Input and Output: Un-pivoting a DataFrame"),(0,n.yg)("p",null,"We'll perform the ",(0,n.yg)("inlineCode",{parentName:"p"},"unpivot")," operation using our custom code"),(0,n.yg)("p",null,(0,n.yg)("img",{alt:"Script - Unpivot",src:a(65448).A,width:"1726",height:"812"})),(0,n.yg)("hr",null),(0,n.yg)("h3",{id:"script-gem-with-only-output-generating-a-dataframe"},"Script Gem with only Output: Generating a DataFrame"),(0,n.yg)("p",null,"We'll use the provided ",(0,n.yg)("inlineCode",{parentName:"p"},"SparkSession")," to create and return a DataFrame"),(0,n.yg)("admonition",{type:"note"},(0,n.yg)("p",{parentName:"admonition"},"Since we removed the input port, we don't see input DataFrame in the method signature")),(0,n.yg)("p",null,(0,n.yg)("img",{alt:"Script - Unpivot",src:a(45625).A,width:"2110",height:"1228"})))}m.isMDXComponent=!0},45625:(e,t,a)=>{a.d(t,{A:()=>r});const r=a.p+"assets/images/script_generate_df-0b0db4e4ec1c0de934b618e41102edc5.png"},8811:(e,t,a)=>{a.d(t,{A:()=>r});const r=a.p+"assets/images/script_remove_inputs-1fa64b1d98805f98b57fc6f20fe50b10.png"},65448:(e,t,a)=>{a.d(t,{A:()=>r});const r=a.p+"assets/images/script_unpivot-be4b541e680f7028ab24c2a3b37ebda4.png"}}]); \ No newline at end of file diff --git a/assets/js/486ce9f7.c508bfda.js b/assets/js/486ce9f7.c508bfda.js new file mode 100644 index 0000000000..5522ab497a --- /dev/null +++ b/assets/js/486ce9f7.c508bfda.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[44281],{15680:(e,t,a)=>{a.d(t,{xA:()=>c,yg:()=>d});var r=a(96540);function n(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function i(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,r)}return a}function o(e){for(var t=1;t=0||(n[a]=e[a]);return n}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(n[a]=e[a])}return n}var s=r.createContext({}),l=function(e){var t=r.useContext(s),a=t;return e&&(a="function"==typeof e?e(t):o(o({},t),e)),a},c=function(e){var t=l(e.components);return r.createElement(s.Provider,{value:t},e.children)},u="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},g=r.forwardRef((function(e,t){var a=e.components,n=e.mdxType,i=e.originalType,s=e.parentName,c=p(e,["components","mdxType","originalType","parentName"]),u=l(a),g=n,d=u["".concat(s,".").concat(g)]||u[g]||m[g]||i;return a?r.createElement(d,o(o({ref:t},c),{},{components:a})):r.createElement(d,o({ref:t},c))}));function d(e,t){var a=arguments,n=t&&t.mdxType;if("string"==typeof e||n){var i=a.length,o=new Array(i);o[0]=g;var p={};for(var s in t)hasOwnProperty.call(t,s)&&(p[s]=t[s]);p.originalType=e,p[u]="string"==typeof e?e:n,o[1]=p;for(var l=2;l{a.r(t),a.d(t,{assets:()=>s,contentTitle:()=>o,default:()=>m,frontMatter:()=>i,metadata:()=>p,toc:()=>l});var r=a(58168),n=(a(96540),a(15680));const i={sidebar_position:2,title:"Script",id:"script",description:"Provide a place to use a SparkSession and whatever code you wish to use",tags:["custom","code"]},o=void 0,p={unversionedId:"Spark/gems/custom/script",id:"Spark/gems/custom/script",title:"Script",description:"Provide a place to use a SparkSession and whatever code you wish to use",source:"@site/docs/Spark/gems/custom/script.md",sourceDirName:"Spark/gems/custom",slug:"/Spark/gems/custom/script",permalink:"/Spark/gems/custom/script",draft:!1,tags:[{label:"custom",permalink:"/tags/custom"},{label:"code",permalink:"/tags/code"}],version:"current",sidebarPosition:2,frontMatter:{sidebar_position:2,title:"Script",id:"script",description:"Provide a place to use a SparkSession and whatever code you wish to use",tags:["custom","code"]},sidebar:"defaultSidebar",previous:{title:"SQLStatement",permalink:"/Spark/gems/custom/sql-statement"},next:{title:"FileOperation",permalink:"/Spark/gems/custom/file-operations"}},s={},l=[{value:"Parameters",id:"parameters",level:2},{value:"Schema",id:"schema",level:2},{value:"Examples",id:"examples",level:2},{value:"Script Gem with Input and Output: Un-pivoting a DataFrame",id:"script-gem-with-input-and-output-un-pivoting-a-dataframe",level:3},{value:"Script Gem with only Output: Generating a DataFrame",id:"script-gem-with-only-output-generating-a-dataframe",level:3}],c={toc:l},u="wrapper";function m(e){let{components:t,...i}=e;return(0,n.yg)(u,(0,r.A)({},c,i,{components:t,mdxType:"MDXLayout"}),(0,n.yg)("h3",null,(0,n.yg)("span",{class:"badge"},"Spark Gem")),(0,n.yg)("p",null,"Provides a ",(0,n.yg)("inlineCode",{parentName:"p"},"SparkSession")," and allows you to run custom code."),(0,n.yg)("h2",{id:"parameters"},"Parameters"),(0,n.yg)("table",null,(0,n.yg)("thead",{parentName:"table"},(0,n.yg)("tr",{parentName:"thead"},(0,n.yg)("th",{parentName:"tr",align:"left"},"Parameter"),(0,n.yg)("th",{parentName:"tr",align:"left"},"Meaning"),(0,n.yg)("th",{parentName:"tr",align:"left"},"Required"))),(0,n.yg)("tbody",{parentName:"table"},(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},"Input DataFrame(s)"),(0,n.yg)("td",{parentName:"tr",align:"left"},"Input DataFrame(s)"),(0,n.yg)("td",{parentName:"tr",align:"left"},"False")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},"Output DataFrame(s)"),(0,n.yg)("td",{parentName:"tr",align:"left"},"Output DataFrame(s)"),(0,n.yg)("td",{parentName:"tr",align:"left"},"False")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},"Code"),(0,n.yg)("td",{parentName:"tr",align:"left"},"Custom code to be executed"),(0,n.yg)("td",{parentName:"tr",align:"left"},"True")))),(0,n.yg)("admonition",{type:"info"},(0,n.yg)("p",{parentName:"admonition"},"To remove input/output DataFrame(s), simply click ",(0,n.yg)("svg",{width:"24",height:"24",viewBox:"0 0 24 24",xmlns:"http://www.w3.org/2000/svg",color:"#667085",class:"sc-jrAFXE hhLaQQ ui-icon undefined"},(0,n.yg)("path",{d:"M11.9999 2C10.2458 2 8.78514 3.31072 8.53998 5H5.11908C5.0765 4.99271 5.03337 4.98912 4.99017 4.98926C4.95285 4.99006 4.91564 4.99365 4.87885 5H3.24994C3.15056 4.99859 3.05188 5.01696 2.95966 5.05402C2.86743 5.09108 2.78349 5.1461 2.71271 5.21588C2.64194 5.28566 2.58573 5.36882 2.54737 5.46051C2.50901 5.5522 2.48926 5.65061 2.48926 5.75C2.48926 5.84939 2.50901 5.9478 2.54737 6.03949C2.58573 6.13118 2.64194 6.21434 2.71271 6.28412C2.78349 6.3539 2.86743 6.40892 2.95966 6.44598C3.05188 6.48304 3.15056 6.50141 3.24994 6.5H4.31928L5.57806 19.5146C5.7136 20.918 6.90583 22 8.31537 22H15.6835C17.0931 22 18.2854 20.9181 18.4208 19.5146L19.6806 6.5H20.7499C20.8493 6.50141 20.948 6.48304 21.0402 6.44598C21.1324 6.40892 21.2164 6.3539 21.2872 6.28412C21.3579 6.21434 21.4141 6.13118 21.4525 6.03949C21.4909 5.9478 21.5106 5.84939 21.5106 5.75C21.5106 5.65061 21.4909 5.5522 21.4525 5.46051C21.4141 5.36882 21.3579 5.28566 21.2872 5.21588C21.2164 5.1461 21.1324 5.09108 21.0402 5.05402C20.948 5.01696 20.8493 4.99859 20.7499 5H19.122C19.0425 4.98709 18.9613 4.98709 18.8818 5H15.4599C15.2147 3.31072 13.7541 2 11.9999 2ZM11.9999 3.5C12.9395 3.5 13.7103 4.13408 13.9306 5H10.0693C10.2896 4.13408 11.0604 3.5 11.9999 3.5ZM5.82513 6.5H18.1738L16.9277 19.3701C16.8652 20.0177 16.3339 20.5 15.6835 20.5H8.31537C7.66591 20.5 7.13369 20.0168 7.07123 19.3701L5.82513 6.5ZM10.2382 8.98926C10.0395 8.99236 9.8501 9.07423 9.71167 9.21686C9.57324 9.3595 9.49709 9.55125 9.49994 9.75V17.25C9.49853 17.3494 9.5169 17.4481 9.55396 17.5403C9.59102 17.6325 9.64604 17.7164 9.71582 17.7872C9.7856 17.858 9.86876 17.9142 9.96045 17.9526C10.0521 17.9909 10.1505 18.0107 10.2499 18.0107C10.3493 18.0107 10.4477 17.9909 10.5394 17.9526C10.6311 17.9142 10.7143 17.858 10.7841 17.7872C10.8538 17.7164 10.9089 17.6325 10.9459 17.5403C10.983 17.4481 11.0013 17.3494 10.9999 17.25V9.75C11.0014 9.64962 10.9827 9.54997 10.9449 9.45695C10.9071 9.36394 10.851 9.27946 10.78 9.20852C10.709 9.13757 10.6244 9.08161 10.5313 9.04395C10.4383 9.00629 10.3386 8.98769 10.2382 8.98926V8.98926ZM13.7382 8.98926C13.5395 8.99236 13.3501 9.07423 13.2117 9.21686C13.0732 9.3595 12.9971 9.55125 12.9999 9.75V17.25C12.9985 17.3494 13.0169 17.4481 13.054 17.5403C13.091 17.6325 13.146 17.7164 13.2158 17.7872C13.2856 17.858 13.3688 17.9142 13.4605 17.9526C13.5521 17.9909 13.6505 18.0107 13.7499 18.0107C13.8493 18.0107 13.9477 17.9909 14.0394 17.9526C14.1311 17.9142 14.2143 17.858 14.2841 17.7872C14.3538 17.7164 14.4089 17.6325 14.4459 17.5403C14.483 17.4481 14.5013 17.3494 14.4999 17.25V9.75C14.5014 9.64962 14.4827 9.54997 14.4449 9.45695C14.4071 9.36394 14.351 9.27946 14.28 9.20852C14.209 9.13757 14.1244 9.08161 14.0313 9.04395C13.9383 9.00629 13.8386 8.98769 13.7382 8.98926V8.98926Z"}))," icon on the left sidebar"),(0,n.yg)("p",{parentName:"admonition"},(0,n.yg)("img",{alt:"Script - Remove inputs",src:a(8811).A,width:"1920",height:"344"}))),(0,n.yg)("h2",{id:"schema"},"Schema"),(0,n.yg)("p",null,"When executing a custom script Gem, the output schema is not known by Prophecy so it must be inferred from a sample computation result. Click the ",(0,n.yg)("inlineCode",{parentName:"p"},"Custom Schema")," button and ",(0,n.yg)("inlineCode",{parentName:"p"},"Infer from cluster")," as shown in the ",(0,n.yg)("strong",{parentName:"p"},"Gems --\x3e Outputs")," description ",(0,n.yg)("a",{parentName:"p",href:"/concepts/project/gems#inputs-outputs"},"here.")," The schema will be inferred according to the script and the Spark version running on the connected cluster."),(0,n.yg)("h2",{id:"examples"},"Examples"),(0,n.yg)("hr",null),(0,n.yg)("h3",{id:"script-gem-with-input-and-output-un-pivoting-a-dataframe"},"Script Gem with Input and Output: Un-pivoting a DataFrame"),(0,n.yg)("p",null,"We'll perform the ",(0,n.yg)("inlineCode",{parentName:"p"},"unpivot")," operation using our custom code"),(0,n.yg)("p",null,(0,n.yg)("img",{alt:"Script - Unpivot",src:a(65448).A,width:"1726",height:"812"})),(0,n.yg)("hr",null),(0,n.yg)("h3",{id:"script-gem-with-only-output-generating-a-dataframe"},"Script Gem with only Output: Generating a DataFrame"),(0,n.yg)("p",null,"We'll use the provided ",(0,n.yg)("inlineCode",{parentName:"p"},"SparkSession")," to create and return a DataFrame"),(0,n.yg)("admonition",{type:"note"},(0,n.yg)("p",{parentName:"admonition"},"Since we removed the input port, we don't see input DataFrame in the method signature")),(0,n.yg)("p",null,(0,n.yg)("img",{alt:"Script - Unpivot",src:a(45625).A,width:"2110",height:"1228"})))}m.isMDXComponent=!0},45625:(e,t,a)=>{a.d(t,{A:()=>r});const r=a.p+"assets/images/script_generate_df-0b0db4e4ec1c0de934b618e41102edc5.png"},8811:(e,t,a)=>{a.d(t,{A:()=>r});const r=a.p+"assets/images/script_remove_inputs-1fa64b1d98805f98b57fc6f20fe50b10.png"},65448:(e,t,a)=>{a.d(t,{A:()=>r});const r=a.p+"assets/images/script_unpivot-be4b541e680f7028ab24c2a3b37ebda4.png"}}]); \ No newline at end of file diff --git a/assets/js/4ce6e96f.18e4256a.js b/assets/js/4ce6e96f.e771c6f2.js similarity index 79% rename from assets/js/4ce6e96f.18e4256a.js rename to assets/js/4ce6e96f.e771c6f2.js index 5ba66ccf02..971f7b38ef 100644 --- a/assets/js/4ce6e96f.18e4256a.js +++ b/assets/js/4ce6e96f.e771c6f2.js @@ -1 +1 @@ -"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[47475],{15680:(e,r,t)=>{t.d(r,{xA:()=>c,yg:()=>g});var a=t(96540);function n(e,r,t){return r in e?Object.defineProperty(e,r,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[r]=t,e}function l(e,r){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);r&&(a=a.filter((function(r){return Object.getOwnPropertyDescriptor(e,r).enumerable}))),t.push.apply(t,a)}return t}function o(e){for(var r=1;r=0||(n[t]=e[t]);return n}(e,r);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(n[t]=e[t])}return n}var u=a.createContext({}),i=function(e){var r=a.useContext(u),t=r;return e&&(t="function"==typeof e?e(r):o(o({},r),e)),t},c=function(e){var r=i(e.components);return a.createElement(u.Provider,{value:r},e.children)},d="mdxType",p={inlineCode:"code",wrapper:function(e){var r=e.children;return a.createElement(a.Fragment,{},r)}},m=a.forwardRef((function(e,r){var t=e.components,n=e.mdxType,l=e.originalType,u=e.parentName,c=s(e,["components","mdxType","originalType","parentName"]),d=i(t),m=n,g=d["".concat(u,".").concat(m)]||d[m]||p[m]||l;return t?a.createElement(g,o(o({ref:r},c),{},{components:t})):a.createElement(g,o({ref:r},c))}));function g(e,r){var t=arguments,n=r&&r.mdxType;if("string"==typeof e||n){var l=t.length,o=new Array(l);o[0]=m;var s={};for(var u in r)hasOwnProperty.call(r,u)&&(s[u]=r[u]);s.originalType=e,s[d]="string"==typeof e?e:n,o[1]=s;for(var i=2;i{t.d(r,{A:()=>o});var a=t(96540),n=t(20053);const l={tabItem:"tabItem_Ymn6"};function o(e){let{children:r,hidden:t,className:o}=e;return a.createElement("div",{role:"tabpanel",className:(0,n.A)(l.tabItem,o),hidden:t},r)}},11470:(e,r,t)=>{t.d(r,{A:()=>S});var a=t(58168),n=t(96540),l=t(20053),o=t(23104),s=t(56347),u=t(57485),i=t(31682),c=t(89466);function d(e){return function(e){return n.Children.map(e,(e=>{if(!e||(0,n.isValidElement)(e)&&function(e){const{props:r}=e;return!!r&&"object"==typeof r&&"value"in r}(e))return e;throw new Error(`Docusaurus error: Bad child <${"string"==typeof e.type?e.type:e.type.name}>: all children of the component should be , and every should have a unique "value" prop.`)}))?.filter(Boolean)??[]}(e).map((e=>{let{props:{value:r,label:t,attributes:a,default:n}}=e;return{value:r,label:t,attributes:a,default:n}}))}function p(e){const{values:r,children:t}=e;return(0,n.useMemo)((()=>{const e=r??d(t);return function(e){const r=(0,i.X)(e,((e,r)=>e.value===r.value));if(r.length>0)throw new Error(`Docusaurus error: Duplicate values "${r.map((e=>e.value)).join(", ")}" found in . Every value needs to be unique.`)}(e),e}),[r,t])}function m(e){let{value:r,tabValues:t}=e;return t.some((e=>e.value===r))}function g(e){let{queryString:r=!1,groupId:t}=e;const a=(0,s.W6)(),l=function(e){let{queryString:r=!1,groupId:t}=e;if("string"==typeof r)return r;if(!1===r)return null;if(!0===r&&!t)throw new Error('Docusaurus error: The component groupId prop is required if queryString=true, because this value is used as the search param name. You can also provide an explicit value such as queryString="my-search-param".');return t??null}({queryString:r,groupId:t});return[(0,u.aZ)(l),(0,n.useCallback)((e=>{if(!l)return;const r=new URLSearchParams(a.location.search);r.set(l,e),a.replace({...a.location,search:r.toString()})}),[l,a])]}function y(e){const{defaultValue:r,queryString:t=!1,groupId:a}=e,l=p(e),[o,s]=(0,n.useState)((()=>function(e){let{defaultValue:r,tabValues:t}=e;if(0===t.length)throw new Error("Docusaurus error: the component requires at least one children component");if(r){if(!m({value:r,tabValues:t}))throw new Error(`Docusaurus error: The has a defaultValue "${r}" but none of its children has the corresponding value. Available values are: ${t.map((e=>e.value)).join(", ")}. If you intend to show no default tab, use defaultValue={null} instead.`);return r}const a=t.find((e=>e.default))??t[0];if(!a)throw new Error("Unexpected error: 0 tabValues");return a.value}({defaultValue:r,tabValues:l}))),[u,i]=g({queryString:t,groupId:a}),[d,y]=function(e){let{groupId:r}=e;const t=function(e){return e?`docusaurus.tab.${e}`:null}(r),[a,l]=(0,c.Dv)(t);return[a,(0,n.useCallback)((e=>{t&&l.set(e)}),[t,l])]}({groupId:a}),b=(()=>{const e=u??d;return m({value:e,tabValues:l})?e:null})();(0,n.useLayoutEffect)((()=>{b&&s(b)}),[b]);return{selectedValue:o,selectValue:(0,n.useCallback)((e=>{if(!m({value:e,tabValues:l}))throw new Error(`Can't select invalid tab value=${e}`);s(e),i(e),y(e)}),[i,y,l]),tabValues:l}}var b=t(92303);const f={tabList:"tabList__CuJ",tabItem:"tabItem_LNqP"};function v(e){let{className:r,block:t,selectedValue:s,selectValue:u,tabValues:i}=e;const c=[],{blockElementScrollPositionUntilNextRender:d}=(0,o.a_)(),p=e=>{const r=e.currentTarget,t=c.indexOf(r),a=i[t].value;a!==s&&(d(r),u(a))},m=e=>{let r=null;switch(e.key){case"Enter":p(e);break;case"ArrowRight":{const t=c.indexOf(e.currentTarget)+1;r=c[t]??c[0];break}case"ArrowLeft":{const t=c.indexOf(e.currentTarget)-1;r=c[t]??c[c.length-1];break}}r?.focus()};return n.createElement("ul",{role:"tablist","aria-orientation":"horizontal",className:(0,l.A)("tabs",{"tabs--block":t},r)},i.map((e=>{let{value:r,label:t,attributes:o}=e;return n.createElement("li",(0,a.A)({role:"tab",tabIndex:s===r?0:-1,"aria-selected":s===r,key:r,ref:e=>c.push(e),onKeyDown:m,onClick:p},o,{className:(0,l.A)("tabs__item",f.tabItem,o?.className,{"tabs__item--active":s===r})}),t??r)})))}function h(e){let{lazy:r,children:t,selectedValue:a}=e;const l=(Array.isArray(t)?t:[t]).filter(Boolean);if(r){const e=l.find((e=>e.props.value===a));return e?(0,n.cloneElement)(e,{className:"margin-top--md"}):null}return n.createElement("div",{className:"margin-top--md"},l.map(((e,r)=>(0,n.cloneElement)(e,{key:r,hidden:e.props.value!==a}))))}function k(e){const r=y(e);return n.createElement("div",{className:(0,l.A)("tabs-container",f.tabList)},n.createElement(v,(0,a.A)({},e,r)),n.createElement(h,(0,a.A)({},e,r)))}function S(e){const r=(0,b.A)();return n.createElement(k,(0,a.A)({key:String(r)},e))}},28690:(e,r,t)=>{t.r(r),t.d(r,{assets:()=>c,contentTitle:()=>u,default:()=>g,frontMatter:()=>s,metadata:()=>i,toc:()=>d});var a=t(58168),n=(t(96540),t(15680)),l=t(11470),o=t(19365);const s={sidebar_position:3,title:"OrderBy",id:"order-by",description:"Sort your data based on one or more Columns",tags:["gems","order by","sort","ascending","descending"]},u=void 0,i={unversionedId:"Spark/gems/transform/order-by",id:"Spark/gems/transform/order-by",title:"OrderBy",description:"Sort your data based on one or more Columns",source:"@site/docs/Spark/gems/transform/order-by.md",sourceDirName:"Spark/gems/transform",slug:"/Spark/gems/transform/order-by",permalink:"/Spark/gems/transform/order-by",draft:!1,tags:[{label:"gems",permalink:"/tags/gems"},{label:"order by",permalink:"/tags/order-by"},{label:"sort",permalink:"/tags/sort"},{label:"ascending",permalink:"/tags/ascending"},{label:"descending",permalink:"/tags/descending"}],version:"current",sidebarPosition:3,frontMatter:{sidebar_position:3,title:"OrderBy",id:"order-by",description:"Sort your data based on one or more Columns",tags:["gems","order by","sort","ascending","descending"]},sidebar:"defaultSidebar",previous:{title:"Filter",permalink:"/Spark/gems/transform/filter"},next:{title:"Aggregate",permalink:"/Spark/gems/transform/aggregate"}},c={},d=[{value:"Parameters",id:"parameters",level:3},{value:"Example",id:"example",level:3},{value:"Spark Code",id:"spark-code",level:3}],p={toc:d},m="wrapper";function g(e){let{components:r,...s}=e;return(0,n.yg)(m,(0,a.A)({},p,s,{components:r,mdxType:"MDXLayout"}),(0,n.yg)("h3",null,(0,n.yg)("span",{class:"badge rounded-pill text-bg-light"},"Spark Gem")),(0,n.yg)("p",null,"Sorts a DataFrame on one or more columns in ascending or descending order."),(0,n.yg)("h3",{id:"parameters"},"Parameters"),(0,n.yg)("table",null,(0,n.yg)("thead",{parentName:"table"},(0,n.yg)("tr",{parentName:"thead"},(0,n.yg)("th",{parentName:"tr",align:null},"Parameter"),(0,n.yg)("th",{parentName:"tr",align:null},"Description"),(0,n.yg)("th",{parentName:"tr",align:null},"Required"))),(0,n.yg)("tbody",{parentName:"table"},(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},"DataFrame"),(0,n.yg)("td",{parentName:"tr",align:null},"Input DataFrame to be sorted"),(0,n.yg)("td",{parentName:"tr",align:null},"True")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},"Order columns"),(0,n.yg)("td",{parentName:"tr",align:null},"Columns to sort DataFrame by"),(0,n.yg)("td",{parentName:"tr",align:null},"True")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},"Sort"),(0,n.yg)("td",{parentName:"tr",align:null},"Order of sorting - ascending or descending"),(0,n.yg)("td",{parentName:"tr",align:null},"True")))),(0,n.yg)("h3",{id:"example"},"Example"),(0,n.yg)("p",null,(0,n.yg)("img",{alt:"Example usage of OrderBy",src:t(98935).A,width:"940",height:"260"})),(0,n.yg)("h3",{id:"spark-code"},"Spark Code"),(0,n.yg)(l.A,{mdxType:"Tabs"},(0,n.yg)(o.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-py"},'def Sort(spark: SparkSession, in0: DataFrame) -> DataFrame:\n return in0.orderBy(col("name").asc(), col("updated_at").desc())\n'))),(0,n.yg)(o.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-scala"},'object Sort {\n def apply(spark: SparkSession, in: DataFrame): DataFrame =\n in.orderBy(col("updated_at").desc, col("name").asc)\n}\n')))))}g.isMDXComponent=!0},98935:(e,r,t)=>{t.d(r,{A:()=>a});const a=t.p+"assets/images/orderby_eg_0-860c1a17045d750aa875a39dbcf114fc.png"}}]); \ No newline at end of file +"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[47475],{15680:(e,r,t)=>{t.d(r,{xA:()=>c,yg:()=>g});var a=t(96540);function n(e,r,t){return r in e?Object.defineProperty(e,r,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[r]=t,e}function l(e,r){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);r&&(a=a.filter((function(r){return Object.getOwnPropertyDescriptor(e,r).enumerable}))),t.push.apply(t,a)}return t}function o(e){for(var r=1;r=0||(n[t]=e[t]);return n}(e,r);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(n[t]=e[t])}return n}var u=a.createContext({}),i=function(e){var r=a.useContext(u),t=r;return e&&(t="function"==typeof e?e(r):o(o({},r),e)),t},c=function(e){var r=i(e.components);return a.createElement(u.Provider,{value:r},e.children)},d="mdxType",p={inlineCode:"code",wrapper:function(e){var r=e.children;return a.createElement(a.Fragment,{},r)}},m=a.forwardRef((function(e,r){var t=e.components,n=e.mdxType,l=e.originalType,u=e.parentName,c=s(e,["components","mdxType","originalType","parentName"]),d=i(t),m=n,g=d["".concat(u,".").concat(m)]||d[m]||p[m]||l;return t?a.createElement(g,o(o({ref:r},c),{},{components:t})):a.createElement(g,o({ref:r},c))}));function g(e,r){var t=arguments,n=r&&r.mdxType;if("string"==typeof e||n){var l=t.length,o=new Array(l);o[0]=m;var s={};for(var u in r)hasOwnProperty.call(r,u)&&(s[u]=r[u]);s.originalType=e,s[d]="string"==typeof e?e:n,o[1]=s;for(var i=2;i{t.d(r,{A:()=>o});var a=t(96540),n=t(20053);const l={tabItem:"tabItem_Ymn6"};function o(e){let{children:r,hidden:t,className:o}=e;return a.createElement("div",{role:"tabpanel",className:(0,n.A)(l.tabItem,o),hidden:t},r)}},11470:(e,r,t)=>{t.d(r,{A:()=>S});var a=t(58168),n=t(96540),l=t(20053),o=t(23104),s=t(56347),u=t(57485),i=t(31682),c=t(89466);function d(e){return function(e){return n.Children.map(e,(e=>{if(!e||(0,n.isValidElement)(e)&&function(e){const{props:r}=e;return!!r&&"object"==typeof r&&"value"in r}(e))return e;throw new Error(`Docusaurus error: Bad child <${"string"==typeof e.type?e.type:e.type.name}>: all children of the component should be , and every should have a unique "value" prop.`)}))?.filter(Boolean)??[]}(e).map((e=>{let{props:{value:r,label:t,attributes:a,default:n}}=e;return{value:r,label:t,attributes:a,default:n}}))}function p(e){const{values:r,children:t}=e;return(0,n.useMemo)((()=>{const e=r??d(t);return function(e){const r=(0,i.X)(e,((e,r)=>e.value===r.value));if(r.length>0)throw new Error(`Docusaurus error: Duplicate values "${r.map((e=>e.value)).join(", ")}" found in . Every value needs to be unique.`)}(e),e}),[r,t])}function m(e){let{value:r,tabValues:t}=e;return t.some((e=>e.value===r))}function g(e){let{queryString:r=!1,groupId:t}=e;const a=(0,s.W6)(),l=function(e){let{queryString:r=!1,groupId:t}=e;if("string"==typeof r)return r;if(!1===r)return null;if(!0===r&&!t)throw new Error('Docusaurus error: The component groupId prop is required if queryString=true, because this value is used as the search param name. You can also provide an explicit value such as queryString="my-search-param".');return t??null}({queryString:r,groupId:t});return[(0,u.aZ)(l),(0,n.useCallback)((e=>{if(!l)return;const r=new URLSearchParams(a.location.search);r.set(l,e),a.replace({...a.location,search:r.toString()})}),[l,a])]}function y(e){const{defaultValue:r,queryString:t=!1,groupId:a}=e,l=p(e),[o,s]=(0,n.useState)((()=>function(e){let{defaultValue:r,tabValues:t}=e;if(0===t.length)throw new Error("Docusaurus error: the component requires at least one children component");if(r){if(!m({value:r,tabValues:t}))throw new Error(`Docusaurus error: The has a defaultValue "${r}" but none of its children has the corresponding value. Available values are: ${t.map((e=>e.value)).join(", ")}. If you intend to show no default tab, use defaultValue={null} instead.`);return r}const a=t.find((e=>e.default))??t[0];if(!a)throw new Error("Unexpected error: 0 tabValues");return a.value}({defaultValue:r,tabValues:l}))),[u,i]=g({queryString:t,groupId:a}),[d,y]=function(e){let{groupId:r}=e;const t=function(e){return e?`docusaurus.tab.${e}`:null}(r),[a,l]=(0,c.Dv)(t);return[a,(0,n.useCallback)((e=>{t&&l.set(e)}),[t,l])]}({groupId:a}),b=(()=>{const e=u??d;return m({value:e,tabValues:l})?e:null})();(0,n.useLayoutEffect)((()=>{b&&s(b)}),[b]);return{selectedValue:o,selectValue:(0,n.useCallback)((e=>{if(!m({value:e,tabValues:l}))throw new Error(`Can't select invalid tab value=${e}`);s(e),i(e),y(e)}),[i,y,l]),tabValues:l}}var b=t(92303);const f={tabList:"tabList__CuJ",tabItem:"tabItem_LNqP"};function v(e){let{className:r,block:t,selectedValue:s,selectValue:u,tabValues:i}=e;const c=[],{blockElementScrollPositionUntilNextRender:d}=(0,o.a_)(),p=e=>{const r=e.currentTarget,t=c.indexOf(r),a=i[t].value;a!==s&&(d(r),u(a))},m=e=>{let r=null;switch(e.key){case"Enter":p(e);break;case"ArrowRight":{const t=c.indexOf(e.currentTarget)+1;r=c[t]??c[0];break}case"ArrowLeft":{const t=c.indexOf(e.currentTarget)-1;r=c[t]??c[c.length-1];break}}r?.focus()};return n.createElement("ul",{role:"tablist","aria-orientation":"horizontal",className:(0,l.A)("tabs",{"tabs--block":t},r)},i.map((e=>{let{value:r,label:t,attributes:o}=e;return n.createElement("li",(0,a.A)({role:"tab",tabIndex:s===r?0:-1,"aria-selected":s===r,key:r,ref:e=>c.push(e),onKeyDown:m,onClick:p},o,{className:(0,l.A)("tabs__item",f.tabItem,o?.className,{"tabs__item--active":s===r})}),t??r)})))}function h(e){let{lazy:r,children:t,selectedValue:a}=e;const l=(Array.isArray(t)?t:[t]).filter(Boolean);if(r){const e=l.find((e=>e.props.value===a));return e?(0,n.cloneElement)(e,{className:"margin-top--md"}):null}return n.createElement("div",{className:"margin-top--md"},l.map(((e,r)=>(0,n.cloneElement)(e,{key:r,hidden:e.props.value!==a}))))}function k(e){const r=y(e);return n.createElement("div",{className:(0,l.A)("tabs-container",f.tabList)},n.createElement(v,(0,a.A)({},e,r)),n.createElement(h,(0,a.A)({},e,r)))}function S(e){const r=(0,b.A)();return n.createElement(k,(0,a.A)({key:String(r)},e))}},28690:(e,r,t)=>{t.r(r),t.d(r,{assets:()=>c,contentTitle:()=>u,default:()=>g,frontMatter:()=>s,metadata:()=>i,toc:()=>d});var a=t(58168),n=(t(96540),t(15680)),l=t(11470),o=t(19365);const s={sidebar_position:3,title:"OrderBy",id:"order-by",description:"Sort your data based on one or more Columns",tags:["gems","order by","sort","ascending","descending"]},u=void 0,i={unversionedId:"Spark/gems/transform/order-by",id:"Spark/gems/transform/order-by",title:"OrderBy",description:"Sort your data based on one or more Columns",source:"@site/docs/Spark/gems/transform/order-by.md",sourceDirName:"Spark/gems/transform",slug:"/Spark/gems/transform/order-by",permalink:"/Spark/gems/transform/order-by",draft:!1,tags:[{label:"gems",permalink:"/tags/gems"},{label:"order by",permalink:"/tags/order-by"},{label:"sort",permalink:"/tags/sort"},{label:"ascending",permalink:"/tags/ascending"},{label:"descending",permalink:"/tags/descending"}],version:"current",sidebarPosition:3,frontMatter:{sidebar_position:3,title:"OrderBy",id:"order-by",description:"Sort your data based on one or more Columns",tags:["gems","order by","sort","ascending","descending"]},sidebar:"defaultSidebar",previous:{title:"Filter",permalink:"/Spark/gems/transform/filter"},next:{title:"Aggregate",permalink:"/Spark/gems/transform/aggregate"}},c={},d=[{value:"Parameters",id:"parameters",level:3},{value:"Example",id:"example",level:3},{value:"Spark Code",id:"spark-code",level:3}],p={toc:d},m="wrapper";function g(e){let{components:r,...s}=e;return(0,n.yg)(m,(0,a.A)({},p,s,{components:r,mdxType:"MDXLayout"}),(0,n.yg)("h3",null,(0,n.yg)("span",{class:"badge"},"Spark Gem")),(0,n.yg)("p",null,"Sorts a DataFrame on one or more columns in ascending or descending order."),(0,n.yg)("h3",{id:"parameters"},"Parameters"),(0,n.yg)("table",null,(0,n.yg)("thead",{parentName:"table"},(0,n.yg)("tr",{parentName:"thead"},(0,n.yg)("th",{parentName:"tr",align:null},"Parameter"),(0,n.yg)("th",{parentName:"tr",align:null},"Description"),(0,n.yg)("th",{parentName:"tr",align:null},"Required"))),(0,n.yg)("tbody",{parentName:"table"},(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},"DataFrame"),(0,n.yg)("td",{parentName:"tr",align:null},"Input DataFrame to be sorted"),(0,n.yg)("td",{parentName:"tr",align:null},"True")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},"Order columns"),(0,n.yg)("td",{parentName:"tr",align:null},"Columns to sort DataFrame by"),(0,n.yg)("td",{parentName:"tr",align:null},"True")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},"Sort"),(0,n.yg)("td",{parentName:"tr",align:null},"Order of sorting - ascending or descending"),(0,n.yg)("td",{parentName:"tr",align:null},"True")))),(0,n.yg)("h3",{id:"example"},"Example"),(0,n.yg)("p",null,(0,n.yg)("img",{alt:"Example usage of OrderBy",src:t(98935).A,width:"940",height:"260"})),(0,n.yg)("h3",{id:"spark-code"},"Spark Code"),(0,n.yg)(l.A,{mdxType:"Tabs"},(0,n.yg)(o.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-py"},'def Sort(spark: SparkSession, in0: DataFrame) -> DataFrame:\n return in0.orderBy(col("name").asc(), col("updated_at").desc())\n'))),(0,n.yg)(o.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-scala"},'object Sort {\n def apply(spark: SparkSession, in: DataFrame): DataFrame =\n in.orderBy(col("updated_at").desc, col("name").asc)\n}\n')))))}g.isMDXComponent=!0},98935:(e,r,t)=>{t.d(r,{A:()=>a});const a=t.p+"assets/images/orderby_eg_0-860c1a17045d750aa875a39dbcf114fc.png"}}]); \ No newline at end of file diff --git a/assets/js/50ddc816.41a1dded.js b/assets/js/50ddc816.41a1dded.js deleted file mode 100644 index 2d006a6058..0000000000 --- a/assets/js/50ddc816.41a1dded.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[50616],{15680:(e,t,a)=>{a.d(t,{xA:()=>c,yg:()=>m});var n=a(96540);function r(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function l(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,n)}return a}function i(e){for(var t=1;t=0||(r[a]=e[a]);return r}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(r[a]=e[a])}return r}var s=n.createContext({}),u=function(e){var t=n.useContext(s),a=t;return e&&(a="function"==typeof e?e(t):i(i({},t),e)),a},c=function(e){var t=u(e.components);return n.createElement(s.Provider,{value:t},e.children)},p="mdxType",g={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},d=n.forwardRef((function(e,t){var a=e.components,r=e.mdxType,l=e.originalType,s=e.parentName,c=o(e,["components","mdxType","originalType","parentName"]),p=u(a),d=r,m=p["".concat(s,".").concat(d)]||p[d]||g[d]||l;return a?n.createElement(m,i(i({ref:t},c),{},{components:a})):n.createElement(m,i({ref:t},c))}));function m(e,t){var a=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var l=a.length,i=new Array(l);i[0]=d;var o={};for(var s in t)hasOwnProperty.call(t,s)&&(o[s]=t[s]);o.originalType=e,o[p]="string"==typeof e?e:r,i[1]=o;for(var u=2;u{a.d(t,{A:()=>i});var n=a(96540),r=a(20053);const l={tabItem:"tabItem_Ymn6"};function i(e){let{children:t,hidden:a,className:i}=e;return n.createElement("div",{role:"tabpanel",className:(0,r.A)(l.tabItem,i),hidden:a},t)}},11470:(e,t,a)=>{a.d(t,{A:()=>w});var n=a(58168),r=a(96540),l=a(20053),i=a(23104),o=a(56347),s=a(57485),u=a(31682),c=a(89466);function p(e){return function(e){return r.Children.map(e,(e=>{if(!e||(0,r.isValidElement)(e)&&function(e){const{props:t}=e;return!!t&&"object"==typeof t&&"value"in t}(e))return e;throw new Error(`Docusaurus error: Bad child <${"string"==typeof e.type?e.type:e.type.name}>: all children of the component should be , and every should have a unique "value" prop.`)}))?.filter(Boolean)??[]}(e).map((e=>{let{props:{value:t,label:a,attributes:n,default:r}}=e;return{value:t,label:a,attributes:n,default:r}}))}function g(e){const{values:t,children:a}=e;return(0,r.useMemo)((()=>{const e=t??p(a);return function(e){const t=(0,u.X)(e,((e,t)=>e.value===t.value));if(t.length>0)throw new Error(`Docusaurus error: Duplicate values "${t.map((e=>e.value)).join(", ")}" found in . Every value needs to be unique.`)}(e),e}),[t,a])}function d(e){let{value:t,tabValues:a}=e;return a.some((e=>e.value===t))}function m(e){let{queryString:t=!1,groupId:a}=e;const n=(0,o.W6)(),l=function(e){let{queryString:t=!1,groupId:a}=e;if("string"==typeof t)return t;if(!1===t)return null;if(!0===t&&!a)throw new Error('Docusaurus error: The component groupId prop is required if queryString=true, because this value is used as the search param name. You can also provide an explicit value such as queryString="my-search-param".');return a??null}({queryString:t,groupId:a});return[(0,s.aZ)(l),(0,r.useCallback)((e=>{if(!l)return;const t=new URLSearchParams(n.location.search);t.set(l,e),n.replace({...n.location,search:t.toString()})}),[l,n])]}function y(e){const{defaultValue:t,queryString:a=!1,groupId:n}=e,l=g(e),[i,o]=(0,r.useState)((()=>function(e){let{defaultValue:t,tabValues:a}=e;if(0===a.length)throw new Error("Docusaurus error: the component requires at least one children component");if(t){if(!d({value:t,tabValues:a}))throw new Error(`Docusaurus error: The has a defaultValue "${t}" but none of its children has the corresponding value. Available values are: ${a.map((e=>e.value)).join(", ")}. If you intend to show no default tab, use defaultValue={null} instead.`);return t}const n=a.find((e=>e.default))??a[0];if(!n)throw new Error("Unexpected error: 0 tabValues");return n.value}({defaultValue:t,tabValues:l}))),[s,u]=m({queryString:a,groupId:n}),[p,y]=function(e){let{groupId:t}=e;const a=function(e){return e?`docusaurus.tab.${e}`:null}(t),[n,l]=(0,c.Dv)(a);return[n,(0,r.useCallback)((e=>{a&&l.set(e)}),[a,l])]}({groupId:n}),h=(()=>{const e=s??p;return d({value:e,tabValues:l})?e:null})();(0,r.useLayoutEffect)((()=>{h&&o(h)}),[h]);return{selectedValue:i,selectValue:(0,r.useCallback)((e=>{if(!d({value:e,tabValues:l}))throw new Error(`Can't select invalid tab value=${e}`);o(e),u(e),y(e)}),[u,y,l]),tabValues:l}}var h=a(92303);const b={tabList:"tabList__CuJ",tabItem:"tabItem_LNqP"};function f(e){let{className:t,block:a,selectedValue:o,selectValue:s,tabValues:u}=e;const c=[],{blockElementScrollPositionUntilNextRender:p}=(0,i.a_)(),g=e=>{const t=e.currentTarget,a=c.indexOf(t),n=u[a].value;n!==o&&(p(t),s(n))},d=e=>{let t=null;switch(e.key){case"Enter":g(e);break;case"ArrowRight":{const a=c.indexOf(e.currentTarget)+1;t=c[a]??c[0];break}case"ArrowLeft":{const a=c.indexOf(e.currentTarget)-1;t=c[a]??c[c.length-1];break}}t?.focus()};return r.createElement("ul",{role:"tablist","aria-orientation":"horizontal",className:(0,l.A)("tabs",{"tabs--block":a},t)},u.map((e=>{let{value:t,label:a,attributes:i}=e;return r.createElement("li",(0,n.A)({role:"tab",tabIndex:o===t?0:-1,"aria-selected":o===t,key:t,ref:e=>c.push(e),onKeyDown:d,onClick:g},i,{className:(0,l.A)("tabs__item",b.tabItem,i?.className,{"tabs__item--active":o===t})}),a??t)})))}function v(e){let{lazy:t,children:a,selectedValue:n}=e;const l=(Array.isArray(a)?a:[a]).filter(Boolean);if(t){const e=l.find((e=>e.props.value===n));return e?(0,r.cloneElement)(e,{className:"margin-top--md"}):null}return r.createElement("div",{className:"margin-top--md"},l.map(((e,t)=>(0,r.cloneElement)(e,{key:t,hidden:e.props.value!==n}))))}function x(e){const t=y(e);return r.createElement("div",{className:(0,l.A)("tabs-container",b.tabList)},r.createElement(f,(0,n.A)({},e,t)),r.createElement(v,(0,n.A)({},e,t)))}function w(e){const t=(0,h.A)();return r.createElement(x,(0,n.A)({key:String(t)},e))}},62363:(e,t,a)=>{a.r(t),a.d(t,{assets:()=>c,contentTitle:()=>s,default:()=>m,frontMatter:()=>o,metadata:()=>u,toc:()=>p});var n=a(58168),r=(a(96540),a(15680)),l=a(11470),i=a(19365);const o={sidebar_position:1,title:"TextProcessing",id:"ml-text-processing",description:"Text processing to prepare data to submit to a foundational model API.",tags:["generative-ai","machine-learning","llm","text-processing","web-scraping","chunk","extract"]},s=void 0,u={unversionedId:"Spark/gems/machine-learning/ml-text-processing",id:"Spark/gems/machine-learning/ml-text-processing",title:"TextProcessing",description:"Text processing to prepare data to submit to a foundational model API.",source:"@site/docs/Spark/gems/machine-learning/ml-text-processing.md",sourceDirName:"Spark/gems/machine-learning",slug:"/Spark/gems/machine-learning/ml-text-processing",permalink:"/Spark/gems/machine-learning/ml-text-processing",draft:!1,tags:[{label:"generative-ai",permalink:"/tags/generative-ai"},{label:"machine-learning",permalink:"/tags/machine-learning"},{label:"llm",permalink:"/tags/llm"},{label:"text-processing",permalink:"/tags/text-processing"},{label:"web-scraping",permalink:"/tags/web-scraping"},{label:"chunk",permalink:"/tags/chunk"},{label:"extract",permalink:"/tags/extract"}],version:"current",sidebarPosition:1,frontMatter:{sidebar_position:1,title:"TextProcessing",id:"ml-text-processing",description:"Text processing to prepare data to submit to a foundational model API.",tags:["generative-ai","machine-learning","llm","text-processing","web-scraping","chunk","extract"]},sidebar:"defaultSidebar",previous:{title:"Machine Learning",permalink:"/Spark/gems/machine-learning/"},next:{title:"OpenAI",permalink:"/Spark/gems/machine-learning/ml-openai"}},c={},p=[{value:"1. Load web URLs and Extract Text",id:"1-load-web-urls-and-extract-text",level:3},{value:"1a. Configure web scrape",id:"1a-configure-web-scrape",level:4},{value:"1b. Input",id:"1b-input",level:4},{value:"1c. Output",id:"1c-output",level:4},{value:"1d. Generated Code",id:"1d-generated-code",level:4},{value:"2. Split text data into equal chunks",id:"2-split-text-data-into-equal-chunks",level:3},{value:"2a. Configure text splitting",id:"2a-configure-text-splitting",level:4},{value:"2b. Input",id:"2b-input",level:4},{value:"2c. Output",id:"2c-output",level:4},{value:"2d. Generated code",id:"2d-generated-code",level:4},{value:"FAQ",id:"faq",level:3},{value:"Troubleshooting",id:"troubleshooting",level:4}],g={toc:p},d="wrapper";function m(e){let{components:t,...o}=e;return(0,r.yg)(d,(0,n.A)({},g,o,{components:t,mdxType:"MDXLayout"}),(0,r.yg)("h3",null,(0,r.yg)("span",{class:"badge rounded-pill text-bg-light"},"Spark Gem")),(0,r.yg)("p",null,"The TextProcessing Gem enables text data preparation for machine learning in two different ways:"),(0,r.yg)("ol",null,(0,r.yg)("li",{parentName:"ol"},(0,r.yg)("a",{parentName:"li",href:"/Spark/gems/machine-learning/ml-text-processing#1-load-web-urls-and-extract-text"},"Load")," web URLs and extract text."),(0,r.yg)("li",{parentName:"ol"},(0,r.yg)("a",{parentName:"li",href:"/Spark/gems/machine-learning/ml-text-processing#2-split-text-data-into-equal-chunks"},"Split")," text data into equal chunks.")),(0,r.yg)("p",null,"Follow along to see how to use the TextProcessing Gem. For an example set of Pipelines that uses this Gem to create a Generative AI Chatbot, see this ",(0,r.yg)("a",{parentName:"p",href:"https://docs.prophecy.io/getting-started/gen-ai-chatbot"},"guide.")),(0,r.yg)("div",{class:"wistia_responsive_padding",style:{padding:"56.25% 0 0 0",position:"relative"}},(0,r.yg)("div",{class:"wistia_responsive_wrapper",style:{height:"100%",left:0,position:"absolute",top:0,width:"100%"}},(0,r.yg)("iframe",{src:"https://fast.wistia.net/embed/iframe/7v6y4ldt5x?seo=false?videoFoam=true",title:"Getting Started With SQL Video",allow:"autoplay; fullscreen",allowtransparency:"true",frameborder:"0",scrolling:"no",class:"wistia_embed",name:"wistia_embed",msallowfullscreen:!0,width:"100%",height:"100%"}))),(0,r.yg)("script",{src:"https://fast.wistia.net/assets/external/E-v1.js",async:!0}),(0,r.yg)("br",null),(0,r.yg)("h3",{id:"1-load-web-urls-and-extract-text"},"1. Load web URLs and Extract Text"),(0,r.yg)("p",null,"Given a column with web URLs, the ",(0,r.yg)("inlineCode",{parentName:"p"},"Load web URLs")," operation will scrape the content from each URL, and output the content as a binary format or as a human readable text format, depending on the operation type selected. The figure below shows the ",(0,r.yg)("inlineCode",{parentName:"p"},"Load web URL and Extract Text")," operation."),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Overview web scrape and extract text",src:a(11959).A,width:"2376",height:"934"})),(0,r.yg)("h4",{id:"1a-configure-web-scrape"},"1a. Configure web scrape"),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Configure to web scrape",src:a(56144).A,width:"2880",height:"1726"})),(0,r.yg)("p",null,"Configure the ",(0,r.yg)("strong",{parentName:"p"},"(1) Operation Type")," to Load url (web scrape), and optionally extract the text. Specify which input ",(0,r.yg)("strong",{parentName:"p"},"(2) Column name")," contains the web urls. If the ",(0,r.yg)("inlineCode",{parentName:"p"},"extract text")," operation is selected, the text will be converted from binary to human readable format. When would you want to use the binary format? Binary web scraping is useful for downloading content including images or archived documents."),(0,r.yg)("h4",{id:"1b-input"},"1b. Input"),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:null},"Parameter"),(0,r.yg)("th",{parentName:"tr",align:null},"Description"),(0,r.yg)("th",{parentName:"tr",align:null},"Required"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Column name (string with urls)"),(0,r.yg)("td",{parentName:"tr",align:null},"string - the input column which contains the strings of web URLs"),(0,r.yg)("td",{parentName:"tr",align:null},"True")))),(0,r.yg)("h4",{id:"1c-output"},"1c. Output"),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:null},"Parameter"),(0,r.yg)("th",{parentName:"tr",align:null},"Description"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Result content ",(0,r.yg)("inlineCode",{parentName:"td"},"Load url (web scrape)")),(0,r.yg)("td",{parentName:"tr",align:null},"binary - the contents of each web page")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Result content ",(0,r.yg)("inlineCode",{parentName:"td"},"Load url (web scrape) and extract text")),(0,r.yg)("td",{parentName:"tr",align:null},"string - the contents of each web page, converted from binary to human readable text")))),(0,r.yg)("h4",{id:"1d-generated-code"},"1d. Generated Code"),(0,r.yg)(l.A,{mdxType:"Tabs"},(0,r.yg)(i.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-py"},'def scrape_pages(spark: SparkSession, in0: DataFrame) -> DataFrame:\n from pyspark.sql.functions import expr, array, struct\n from spark_ai.webapps import WebUtils\n WebUtils().register_udfs(spark)\n\n return in0.withColumn("result_content", expr(f"web_scrape(loc)"))\n'))),(0,r.yg)(i.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre"},"[Not yet supported]\n")))),(0,r.yg)("h3",{id:"2-split-text-data-into-equal-chunks"},"2. Split text data into equal chunks"),(0,r.yg)("p",null,'Sometimes you\'d like to send text data to a foundational model or store in a vector database, but the text is too long. For this case, just split the text into "chunks" of characters.'),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Overview Chunkify",src:a(48382).A,width:"2376",height:"814"})),(0,r.yg)("h4",{id:"2a-configure-text-splitting"},"2a. Configure text splitting"),(0,r.yg)("p",null,"Given a text input, the ",(0,r.yg)("inlineCode",{parentName:"p"},"Split data")," operation will separate the input column entries into chunks of specified ",(0,r.yg)("inlineCode",{parentName:"p"},"size"),"."),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Configure to Chunkify",src:a(2125).A,width:"2880",height:"1726"})),(0,r.yg)("p",null,"Select the ",(0,r.yg)("strong",{parentName:"p"},"(1) Operation type")," to split text into equal chunks. Specify which input ",(0,r.yg)("strong",{parentName:"p"},"(2) Column name")," contains the relevant content. Specify an integer chunk ",(0,r.yg)("strong",{parentName:"p"},"(3) Size")," relevant for your generative AI use case."),(0,r.yg)("h4",{id:"2b-input"},"2b. Input"),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:null},"Parameter"),(0,r.yg)("th",{parentName:"tr",align:null},"Description"),(0,r.yg)("th",{parentName:"tr",align:null},"Required"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Column name"),(0,r.yg)("td",{parentName:"tr",align:null},"string - the text content which should be split into equal chunks"),(0,r.yg)("td",{parentName:"tr",align:null},"True")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Size"),(0,r.yg)("td",{parentName:"tr",align:null},"integer - the size of each chunk, number of characters. Example: ",(0,r.yg)("inlineCode",{parentName:"td"},"1000")),(0,r.yg)("td",{parentName:"tr",align:null},"True")))),(0,r.yg)("h4",{id:"2c-output"},"2c. Output"),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:null},"Parameter"),(0,r.yg)("th",{parentName:"tr",align:null},"Description"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"result_chunks"),(0,r.yg)("td",{parentName:"tr",align:null},"array(string) - an array of text strings, each string representing one chunk of the larger text content")))),(0,r.yg)("h4",{id:"2d-generated-code"},"2d. Generated code"),(0,r.yg)(l.A,{mdxType:"Tabs"},(0,r.yg)(i.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-py"},'def Chunkify(spark: SparkSession, web_bronze_content: DataFrame) -> DataFrame:\n from pyspark.sql.functions import expr, array, struct\n from spark_ai.files.text import FileTextUtils\n FileTextUtils().register_udfs(spark)\n\n return web_bronze_content.withColumn("result_chunks", expr(f"text_split_into_chunks(content, 1000)"))\n'))),(0,r.yg)(i.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre"},"[Not yet supported]\n")))),(0,r.yg)("h3",{id:"faq"},"FAQ"),(0,r.yg)("p",null,(0,r.yg)("strong",{parentName:"p"},"How does this Gem fit into the bigger picture of building a generative AI application?")),(0,r.yg)("p",null,"For an example set of Pipelines that uses this Gem to create a Generative AI Chatbot, see this ",(0,r.yg)("a",{parentName:"p",href:"https://docs.prophecy.io/getting-started/gen-ai-chatbot"},"guide.")," Feel free to ",(0,r.yg)("a",{parentName:"p",href:"https://www.prophecy.io/request-a-demo"},"reach out")," and explore your use case with us."),(0,r.yg)("h4",{id:"troubleshooting"},"Troubleshooting"),(0,r.yg)("p",null,"Select a chunk size according to the limitations of your vector database index."))}m.isMDXComponent=!0},2125:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/ml-text-proc-configure-chunkify-d99e640e07f1831c369ad80aa09339d2.png"},48382:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/ml-text-proc-overview-chunkify-c7292b08f77cc25d9899ef1c0fc88c4f.png"},56144:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/ml-text-proc-scrape-configure-3fc90ca55df1009a84cc9e5a5752eeb6.png"},11959:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/ml-text-proc-scrape-extract-overview-34f31e4baf8a155a7537eb20431d08ac.png"}}]); \ No newline at end of file diff --git a/assets/js/50ddc816.96a6828a.js b/assets/js/50ddc816.96a6828a.js new file mode 100644 index 0000000000..b9ce960f96 --- /dev/null +++ b/assets/js/50ddc816.96a6828a.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[50616],{15680:(e,t,a)=>{a.d(t,{xA:()=>c,yg:()=>m});var n=a(96540);function r(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function l(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,n)}return a}function i(e){for(var t=1;t=0||(r[a]=e[a]);return r}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(r[a]=e[a])}return r}var s=n.createContext({}),u=function(e){var t=n.useContext(s),a=t;return e&&(a="function"==typeof e?e(t):i(i({},t),e)),a},c=function(e){var t=u(e.components);return n.createElement(s.Provider,{value:t},e.children)},p="mdxType",g={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},d=n.forwardRef((function(e,t){var a=e.components,r=e.mdxType,l=e.originalType,s=e.parentName,c=o(e,["components","mdxType","originalType","parentName"]),p=u(a),d=r,m=p["".concat(s,".").concat(d)]||p[d]||g[d]||l;return a?n.createElement(m,i(i({ref:t},c),{},{components:a})):n.createElement(m,i({ref:t},c))}));function m(e,t){var a=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var l=a.length,i=new Array(l);i[0]=d;var o={};for(var s in t)hasOwnProperty.call(t,s)&&(o[s]=t[s]);o.originalType=e,o[p]="string"==typeof e?e:r,i[1]=o;for(var u=2;u{a.d(t,{A:()=>i});var n=a(96540),r=a(20053);const l={tabItem:"tabItem_Ymn6"};function i(e){let{children:t,hidden:a,className:i}=e;return n.createElement("div",{role:"tabpanel",className:(0,r.A)(l.tabItem,i),hidden:a},t)}},11470:(e,t,a)=>{a.d(t,{A:()=>w});var n=a(58168),r=a(96540),l=a(20053),i=a(23104),o=a(56347),s=a(57485),u=a(31682),c=a(89466);function p(e){return function(e){return r.Children.map(e,(e=>{if(!e||(0,r.isValidElement)(e)&&function(e){const{props:t}=e;return!!t&&"object"==typeof t&&"value"in t}(e))return e;throw new Error(`Docusaurus error: Bad child <${"string"==typeof e.type?e.type:e.type.name}>: all children of the component should be , and every should have a unique "value" prop.`)}))?.filter(Boolean)??[]}(e).map((e=>{let{props:{value:t,label:a,attributes:n,default:r}}=e;return{value:t,label:a,attributes:n,default:r}}))}function g(e){const{values:t,children:a}=e;return(0,r.useMemo)((()=>{const e=t??p(a);return function(e){const t=(0,u.X)(e,((e,t)=>e.value===t.value));if(t.length>0)throw new Error(`Docusaurus error: Duplicate values "${t.map((e=>e.value)).join(", ")}" found in . Every value needs to be unique.`)}(e),e}),[t,a])}function d(e){let{value:t,tabValues:a}=e;return a.some((e=>e.value===t))}function m(e){let{queryString:t=!1,groupId:a}=e;const n=(0,o.W6)(),l=function(e){let{queryString:t=!1,groupId:a}=e;if("string"==typeof t)return t;if(!1===t)return null;if(!0===t&&!a)throw new Error('Docusaurus error: The component groupId prop is required if queryString=true, because this value is used as the search param name. You can also provide an explicit value such as queryString="my-search-param".');return a??null}({queryString:t,groupId:a});return[(0,s.aZ)(l),(0,r.useCallback)((e=>{if(!l)return;const t=new URLSearchParams(n.location.search);t.set(l,e),n.replace({...n.location,search:t.toString()})}),[l,n])]}function y(e){const{defaultValue:t,queryString:a=!1,groupId:n}=e,l=g(e),[i,o]=(0,r.useState)((()=>function(e){let{defaultValue:t,tabValues:a}=e;if(0===a.length)throw new Error("Docusaurus error: the component requires at least one children component");if(t){if(!d({value:t,tabValues:a}))throw new Error(`Docusaurus error: The has a defaultValue "${t}" but none of its children has the corresponding value. Available values are: ${a.map((e=>e.value)).join(", ")}. If you intend to show no default tab, use defaultValue={null} instead.`);return t}const n=a.find((e=>e.default))??a[0];if(!n)throw new Error("Unexpected error: 0 tabValues");return n.value}({defaultValue:t,tabValues:l}))),[s,u]=m({queryString:a,groupId:n}),[p,y]=function(e){let{groupId:t}=e;const a=function(e){return e?`docusaurus.tab.${e}`:null}(t),[n,l]=(0,c.Dv)(a);return[n,(0,r.useCallback)((e=>{a&&l.set(e)}),[a,l])]}({groupId:n}),h=(()=>{const e=s??p;return d({value:e,tabValues:l})?e:null})();(0,r.useLayoutEffect)((()=>{h&&o(h)}),[h]);return{selectedValue:i,selectValue:(0,r.useCallback)((e=>{if(!d({value:e,tabValues:l}))throw new Error(`Can't select invalid tab value=${e}`);o(e),u(e),y(e)}),[u,y,l]),tabValues:l}}var h=a(92303);const b={tabList:"tabList__CuJ",tabItem:"tabItem_LNqP"};function f(e){let{className:t,block:a,selectedValue:o,selectValue:s,tabValues:u}=e;const c=[],{blockElementScrollPositionUntilNextRender:p}=(0,i.a_)(),g=e=>{const t=e.currentTarget,a=c.indexOf(t),n=u[a].value;n!==o&&(p(t),s(n))},d=e=>{let t=null;switch(e.key){case"Enter":g(e);break;case"ArrowRight":{const a=c.indexOf(e.currentTarget)+1;t=c[a]??c[0];break}case"ArrowLeft":{const a=c.indexOf(e.currentTarget)-1;t=c[a]??c[c.length-1];break}}t?.focus()};return r.createElement("ul",{role:"tablist","aria-orientation":"horizontal",className:(0,l.A)("tabs",{"tabs--block":a},t)},u.map((e=>{let{value:t,label:a,attributes:i}=e;return r.createElement("li",(0,n.A)({role:"tab",tabIndex:o===t?0:-1,"aria-selected":o===t,key:t,ref:e=>c.push(e),onKeyDown:d,onClick:g},i,{className:(0,l.A)("tabs__item",b.tabItem,i?.className,{"tabs__item--active":o===t})}),a??t)})))}function v(e){let{lazy:t,children:a,selectedValue:n}=e;const l=(Array.isArray(a)?a:[a]).filter(Boolean);if(t){const e=l.find((e=>e.props.value===n));return e?(0,r.cloneElement)(e,{className:"margin-top--md"}):null}return r.createElement("div",{className:"margin-top--md"},l.map(((e,t)=>(0,r.cloneElement)(e,{key:t,hidden:e.props.value!==n}))))}function x(e){const t=y(e);return r.createElement("div",{className:(0,l.A)("tabs-container",b.tabList)},r.createElement(f,(0,n.A)({},e,t)),r.createElement(v,(0,n.A)({},e,t)))}function w(e){const t=(0,h.A)();return r.createElement(x,(0,n.A)({key:String(t)},e))}},62363:(e,t,a)=>{a.r(t),a.d(t,{assets:()=>c,contentTitle:()=>s,default:()=>m,frontMatter:()=>o,metadata:()=>u,toc:()=>p});var n=a(58168),r=(a(96540),a(15680)),l=a(11470),i=a(19365);const o={sidebar_position:1,title:"TextProcessing",id:"ml-text-processing",description:"Text processing to prepare data to submit to a foundational model API.",tags:["generative-ai","machine-learning","llm","text-processing","web-scraping","chunk","extract"]},s=void 0,u={unversionedId:"Spark/gems/machine-learning/ml-text-processing",id:"Spark/gems/machine-learning/ml-text-processing",title:"TextProcessing",description:"Text processing to prepare data to submit to a foundational model API.",source:"@site/docs/Spark/gems/machine-learning/ml-text-processing.md",sourceDirName:"Spark/gems/machine-learning",slug:"/Spark/gems/machine-learning/ml-text-processing",permalink:"/Spark/gems/machine-learning/ml-text-processing",draft:!1,tags:[{label:"generative-ai",permalink:"/tags/generative-ai"},{label:"machine-learning",permalink:"/tags/machine-learning"},{label:"llm",permalink:"/tags/llm"},{label:"text-processing",permalink:"/tags/text-processing"},{label:"web-scraping",permalink:"/tags/web-scraping"},{label:"chunk",permalink:"/tags/chunk"},{label:"extract",permalink:"/tags/extract"}],version:"current",sidebarPosition:1,frontMatter:{sidebar_position:1,title:"TextProcessing",id:"ml-text-processing",description:"Text processing to prepare data to submit to a foundational model API.",tags:["generative-ai","machine-learning","llm","text-processing","web-scraping","chunk","extract"]},sidebar:"defaultSidebar",previous:{title:"Machine Learning",permalink:"/Spark/gems/machine-learning/"},next:{title:"OpenAI",permalink:"/Spark/gems/machine-learning/ml-openai"}},c={},p=[{value:"1. Load web URLs and Extract Text",id:"1-load-web-urls-and-extract-text",level:3},{value:"1a. Configure web scrape",id:"1a-configure-web-scrape",level:4},{value:"1b. Input",id:"1b-input",level:4},{value:"1c. Output",id:"1c-output",level:4},{value:"1d. Generated Code",id:"1d-generated-code",level:4},{value:"2. Split text data into equal chunks",id:"2-split-text-data-into-equal-chunks",level:3},{value:"2a. Configure text splitting",id:"2a-configure-text-splitting",level:4},{value:"2b. Input",id:"2b-input",level:4},{value:"2c. Output",id:"2c-output",level:4},{value:"2d. Generated code",id:"2d-generated-code",level:4},{value:"FAQ",id:"faq",level:3},{value:"Troubleshooting",id:"troubleshooting",level:4}],g={toc:p},d="wrapper";function m(e){let{components:t,...o}=e;return(0,r.yg)(d,(0,n.A)({},g,o,{components:t,mdxType:"MDXLayout"}),(0,r.yg)("h3",null,(0,r.yg)("span",{class:"badge"},"Spark Gem")),(0,r.yg)("p",null,"The TextProcessing Gem enables text data preparation for machine learning in two different ways:"),(0,r.yg)("ol",null,(0,r.yg)("li",{parentName:"ol"},(0,r.yg)("a",{parentName:"li",href:"/Spark/gems/machine-learning/ml-text-processing#1-load-web-urls-and-extract-text"},"Load")," web URLs and extract text."),(0,r.yg)("li",{parentName:"ol"},(0,r.yg)("a",{parentName:"li",href:"/Spark/gems/machine-learning/ml-text-processing#2-split-text-data-into-equal-chunks"},"Split")," text data into equal chunks.")),(0,r.yg)("p",null,"Follow along to see how to use the TextProcessing Gem. For an example set of Pipelines that uses this Gem to create a Generative AI Chatbot, see this ",(0,r.yg)("a",{parentName:"p",href:"https://docs.prophecy.io/getting-started/gen-ai-chatbot"},"guide.")),(0,r.yg)("div",{class:"wistia_responsive_padding",style:{padding:"56.25% 0 0 0",position:"relative"}},(0,r.yg)("div",{class:"wistia_responsive_wrapper",style:{height:"100%",left:0,position:"absolute",top:0,width:"100%"}},(0,r.yg)("iframe",{src:"https://fast.wistia.net/embed/iframe/7v6y4ldt5x?seo=false?videoFoam=true",title:"Getting Started With SQL Video",allow:"autoplay; fullscreen",allowtransparency:"true",frameborder:"0",scrolling:"no",class:"wistia_embed",name:"wistia_embed",msallowfullscreen:!0,width:"100%",height:"100%"}))),(0,r.yg)("script",{src:"https://fast.wistia.net/assets/external/E-v1.js",async:!0}),(0,r.yg)("br",null),(0,r.yg)("h3",{id:"1-load-web-urls-and-extract-text"},"1. Load web URLs and Extract Text"),(0,r.yg)("p",null,"Given a column with web URLs, the ",(0,r.yg)("inlineCode",{parentName:"p"},"Load web URLs")," operation will scrape the content from each URL, and output the content as a binary format or as a human readable text format, depending on the operation type selected. The figure below shows the ",(0,r.yg)("inlineCode",{parentName:"p"},"Load web URL and Extract Text")," operation."),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Overview web scrape and extract text",src:a(11959).A,width:"2376",height:"934"})),(0,r.yg)("h4",{id:"1a-configure-web-scrape"},"1a. Configure web scrape"),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Configure to web scrape",src:a(56144).A,width:"2880",height:"1726"})),(0,r.yg)("p",null,"Configure the ",(0,r.yg)("strong",{parentName:"p"},"(1) Operation Type")," to Load url (web scrape), and optionally extract the text. Specify which input ",(0,r.yg)("strong",{parentName:"p"},"(2) Column name")," contains the web urls. If the ",(0,r.yg)("inlineCode",{parentName:"p"},"extract text")," operation is selected, the text will be converted from binary to human readable format. When would you want to use the binary format? Binary web scraping is useful for downloading content including images or archived documents."),(0,r.yg)("h4",{id:"1b-input"},"1b. Input"),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:null},"Parameter"),(0,r.yg)("th",{parentName:"tr",align:null},"Description"),(0,r.yg)("th",{parentName:"tr",align:null},"Required"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Column name (string with urls)"),(0,r.yg)("td",{parentName:"tr",align:null},"string - the input column which contains the strings of web URLs"),(0,r.yg)("td",{parentName:"tr",align:null},"True")))),(0,r.yg)("h4",{id:"1c-output"},"1c. Output"),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:null},"Parameter"),(0,r.yg)("th",{parentName:"tr",align:null},"Description"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Result content ",(0,r.yg)("inlineCode",{parentName:"td"},"Load url (web scrape)")),(0,r.yg)("td",{parentName:"tr",align:null},"binary - the contents of each web page")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Result content ",(0,r.yg)("inlineCode",{parentName:"td"},"Load url (web scrape) and extract text")),(0,r.yg)("td",{parentName:"tr",align:null},"string - the contents of each web page, converted from binary to human readable text")))),(0,r.yg)("h4",{id:"1d-generated-code"},"1d. Generated Code"),(0,r.yg)(l.A,{mdxType:"Tabs"},(0,r.yg)(i.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-py"},'def scrape_pages(spark: SparkSession, in0: DataFrame) -> DataFrame:\n from pyspark.sql.functions import expr, array, struct\n from spark_ai.webapps import WebUtils\n WebUtils().register_udfs(spark)\n\n return in0.withColumn("result_content", expr(f"web_scrape(loc)"))\n'))),(0,r.yg)(i.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre"},"[Not yet supported]\n")))),(0,r.yg)("h3",{id:"2-split-text-data-into-equal-chunks"},"2. Split text data into equal chunks"),(0,r.yg)("p",null,'Sometimes you\'d like to send text data to a foundational model or store in a vector database, but the text is too long. For this case, just split the text into "chunks" of characters.'),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Overview Chunkify",src:a(48382).A,width:"2376",height:"814"})),(0,r.yg)("h4",{id:"2a-configure-text-splitting"},"2a. Configure text splitting"),(0,r.yg)("p",null,"Given a text input, the ",(0,r.yg)("inlineCode",{parentName:"p"},"Split data")," operation will separate the input column entries into chunks of specified ",(0,r.yg)("inlineCode",{parentName:"p"},"size"),"."),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Configure to Chunkify",src:a(2125).A,width:"2880",height:"1726"})),(0,r.yg)("p",null,"Select the ",(0,r.yg)("strong",{parentName:"p"},"(1) Operation type")," to split text into equal chunks. Specify which input ",(0,r.yg)("strong",{parentName:"p"},"(2) Column name")," contains the relevant content. Specify an integer chunk ",(0,r.yg)("strong",{parentName:"p"},"(3) Size")," relevant for your generative AI use case."),(0,r.yg)("h4",{id:"2b-input"},"2b. Input"),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:null},"Parameter"),(0,r.yg)("th",{parentName:"tr",align:null},"Description"),(0,r.yg)("th",{parentName:"tr",align:null},"Required"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Column name"),(0,r.yg)("td",{parentName:"tr",align:null},"string - the text content which should be split into equal chunks"),(0,r.yg)("td",{parentName:"tr",align:null},"True")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Size"),(0,r.yg)("td",{parentName:"tr",align:null},"integer - the size of each chunk, number of characters. Example: ",(0,r.yg)("inlineCode",{parentName:"td"},"1000")),(0,r.yg)("td",{parentName:"tr",align:null},"True")))),(0,r.yg)("h4",{id:"2c-output"},"2c. Output"),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:null},"Parameter"),(0,r.yg)("th",{parentName:"tr",align:null},"Description"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"result_chunks"),(0,r.yg)("td",{parentName:"tr",align:null},"array(string) - an array of text strings, each string representing one chunk of the larger text content")))),(0,r.yg)("h4",{id:"2d-generated-code"},"2d. Generated code"),(0,r.yg)(l.A,{mdxType:"Tabs"},(0,r.yg)(i.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-py"},'def Chunkify(spark: SparkSession, web_bronze_content: DataFrame) -> DataFrame:\n from pyspark.sql.functions import expr, array, struct\n from spark_ai.files.text import FileTextUtils\n FileTextUtils().register_udfs(spark)\n\n return web_bronze_content.withColumn("result_chunks", expr(f"text_split_into_chunks(content, 1000)"))\n'))),(0,r.yg)(i.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre"},"[Not yet supported]\n")))),(0,r.yg)("h3",{id:"faq"},"FAQ"),(0,r.yg)("p",null,(0,r.yg)("strong",{parentName:"p"},"How does this Gem fit into the bigger picture of building a generative AI application?")),(0,r.yg)("p",null,"For an example set of Pipelines that uses this Gem to create a Generative AI Chatbot, see this ",(0,r.yg)("a",{parentName:"p",href:"https://docs.prophecy.io/getting-started/gen-ai-chatbot"},"guide.")," Feel free to ",(0,r.yg)("a",{parentName:"p",href:"https://www.prophecy.io/request-a-demo"},"reach out")," and explore your use case with us."),(0,r.yg)("h4",{id:"troubleshooting"},"Troubleshooting"),(0,r.yg)("p",null,"Select a chunk size according to the limitations of your vector database index."))}m.isMDXComponent=!0},2125:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/ml-text-proc-configure-chunkify-d99e640e07f1831c369ad80aa09339d2.png"},48382:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/ml-text-proc-overview-chunkify-c7292b08f77cc25d9899ef1c0fc88c4f.png"},56144:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/ml-text-proc-scrape-configure-3fc90ca55df1009a84cc9e5a5752eeb6.png"},11959:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/ml-text-proc-scrape-extract-overview-34f31e4baf8a155a7537eb20431d08ac.png"}}]); \ No newline at end of file diff --git a/assets/js/5714fd1a.3da2ac7e.js b/assets/js/5714fd1a.ca967822.js similarity index 60% rename from assets/js/5714fd1a.3da2ac7e.js rename to assets/js/5714fd1a.ca967822.js index bc825a816f..b739314592 100644 --- a/assets/js/5714fd1a.3da2ac7e.js +++ b/assets/js/5714fd1a.ca967822.js @@ -1 +1 @@ -"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[21728],{15680:(e,a,t)=>{t.d(a,{xA:()=>c,yg:()=>g});var r=t(96540);function n(e,a,t){return a in e?Object.defineProperty(e,a,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[a]=t,e}function o(e,a){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);a&&(r=r.filter((function(a){return Object.getOwnPropertyDescriptor(e,a).enumerable}))),t.push.apply(t,r)}return t}function l(e){for(var a=1;a=0||(n[t]=e[t]);return n}(e,a);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(n[t]=e[t])}return n}var s=r.createContext({}),i=function(e){var a=r.useContext(s),t=a;return e&&(t="function"==typeof e?e(a):l(l({},a),e)),t},c=function(e){var a=i(e.components);return r.createElement(s.Provider,{value:a},e.children)},p="mdxType",d={inlineCode:"code",wrapper:function(e){var a=e.children;return r.createElement(r.Fragment,{},a)}},m=r.forwardRef((function(e,a){var t=e.components,n=e.mdxType,o=e.originalType,s=e.parentName,c=u(e,["components","mdxType","originalType","parentName"]),p=i(t),m=n,g=p["".concat(s,".").concat(m)]||p[m]||d[m]||o;return t?r.createElement(g,l(l({ref:a},c),{},{components:t})):r.createElement(g,l({ref:a},c))}));function g(e,a){var t=arguments,n=a&&a.mdxType;if("string"==typeof e||n){var o=t.length,l=new Array(o);l[0]=m;var u={};for(var s in a)hasOwnProperty.call(a,s)&&(u[s]=a[s]);u.originalType=e,u[p]="string"==typeof e?e:n,l[1]=u;for(var i=2;i{t.d(a,{A:()=>l});var r=t(96540),n=t(20053);const o={tabItem:"tabItem_Ymn6"};function l(e){let{children:a,hidden:t,className:l}=e;return r.createElement("div",{role:"tabpanel",className:(0,n.A)(o.tabItem,l),hidden:t},a)}},11470:(e,a,t)=>{t.d(a,{A:()=>N});var r=t(58168),n=t(96540),o=t(20053),l=t(23104),u=t(56347),s=t(57485),i=t(31682),c=t(89466);function p(e){return function(e){return n.Children.map(e,(e=>{if(!e||(0,n.isValidElement)(e)&&function(e){const{props:a}=e;return!!a&&"object"==typeof a&&"value"in a}(e))return e;throw new Error(`Docusaurus error: Bad child <${"string"==typeof e.type?e.type:e.type.name}>: all children of the component should be , and every should have a unique "value" prop.`)}))?.filter(Boolean)??[]}(e).map((e=>{let{props:{value:a,label:t,attributes:r,default:n}}=e;return{value:a,label:t,attributes:r,default:n}}))}function d(e){const{values:a,children:t}=e;return(0,n.useMemo)((()=>{const e=a??p(t);return function(e){const a=(0,i.X)(e,((e,a)=>e.value===a.value));if(a.length>0)throw new Error(`Docusaurus error: Duplicate values "${a.map((e=>e.value)).join(", ")}" found in . Every value needs to be unique.`)}(e),e}),[a,t])}function m(e){let{value:a,tabValues:t}=e;return t.some((e=>e.value===a))}function g(e){let{queryString:a=!1,groupId:t}=e;const r=(0,u.W6)(),o=function(e){let{queryString:a=!1,groupId:t}=e;if("string"==typeof a)return a;if(!1===a)return null;if(!0===a&&!t)throw new Error('Docusaurus error: The component groupId prop is required if queryString=true, because this value is used as the search param name. You can also provide an explicit value such as queryString="my-search-param".');return t??null}({queryString:a,groupId:t});return[(0,s.aZ)(o),(0,n.useCallback)((e=>{if(!o)return;const a=new URLSearchParams(r.location.search);a.set(o,e),r.replace({...r.location,search:a.toString()})}),[o,r])]}function y(e){const{defaultValue:a,queryString:t=!1,groupId:r}=e,o=d(e),[l,u]=(0,n.useState)((()=>function(e){let{defaultValue:a,tabValues:t}=e;if(0===t.length)throw new Error("Docusaurus error: the component requires at least one children component");if(a){if(!m({value:a,tabValues:t}))throw new Error(`Docusaurus error: The has a defaultValue "${a}" but none of its children has the corresponding value. Available values are: ${t.map((e=>e.value)).join(", ")}. If you intend to show no default tab, use defaultValue={null} instead.`);return a}const r=t.find((e=>e.default))??t[0];if(!r)throw new Error("Unexpected error: 0 tabValues");return r.value}({defaultValue:a,tabValues:o}))),[s,i]=g({queryString:t,groupId:r}),[p,y]=function(e){let{groupId:a}=e;const t=function(e){return e?`docusaurus.tab.${e}`:null}(a),[r,o]=(0,c.Dv)(t);return[r,(0,n.useCallback)((e=>{t&&o.set(e)}),[t,o])]}({groupId:r}),b=(()=>{const e=s??p;return m({value:e,tabValues:o})?e:null})();(0,n.useLayoutEffect)((()=>{b&&u(b)}),[b]);return{selectedValue:l,selectValue:(0,n.useCallback)((e=>{if(!m({value:e,tabValues:o}))throw new Error(`Can't select invalid tab value=${e}`);u(e),i(e),y(e)}),[i,y,o]),tabValues:o}}var b=t(92303);const f={tabList:"tabList__CuJ",tabItem:"tabItem_LNqP"};function h(e){let{className:a,block:t,selectedValue:u,selectValue:s,tabValues:i}=e;const c=[],{blockElementScrollPositionUntilNextRender:p}=(0,l.a_)(),d=e=>{const a=e.currentTarget,t=c.indexOf(a),r=i[t].value;r!==u&&(p(a),s(r))},m=e=>{let a=null;switch(e.key){case"Enter":d(e);break;case"ArrowRight":{const t=c.indexOf(e.currentTarget)+1;a=c[t]??c[0];break}case"ArrowLeft":{const t=c.indexOf(e.currentTarget)-1;a=c[t]??c[c.length-1];break}}a?.focus()};return n.createElement("ul",{role:"tablist","aria-orientation":"horizontal",className:(0,o.A)("tabs",{"tabs--block":t},a)},i.map((e=>{let{value:a,label:t,attributes:l}=e;return n.createElement("li",(0,r.A)({role:"tab",tabIndex:u===a?0:-1,"aria-selected":u===a,key:a,ref:e=>c.push(e),onKeyDown:m,onClick:d},l,{className:(0,o.A)("tabs__item",f.tabItem,l?.className,{"tabs__item--active":u===a})}),t??a)})))}function v(e){let{lazy:a,children:t,selectedValue:r}=e;const o=(Array.isArray(t)?t:[t]).filter(Boolean);if(a){const e=o.find((e=>e.props.value===r));return e?(0,n.cloneElement)(e,{className:"margin-top--md"}):null}return n.createElement("div",{className:"margin-top--md"},o.map(((e,a)=>(0,n.cloneElement)(e,{key:a,hidden:e.props.value!==r}))))}function k(e){const a=y(e);return n.createElement("div",{className:(0,o.A)("tabs-container",f.tabList)},n.createElement(h,(0,r.A)({},e,a)),n.createElement(v,(0,r.A)({},e,a)))}function N(e){const a=(0,b.A)();return n.createElement(k,(0,r.A)({key:String(a)},e))}},10202:(e,a,t)=>{t.r(a),t.d(a,{assets:()=>c,contentTitle:()=>s,default:()=>g,frontMatter:()=>u,metadata:()=>i,toc:()=>p});var r=t(58168),n=(t(96540),t(15680)),o=t(11470),l=t(19365);const u={title:"Lookup",id:"lookup",description:"Lookup",sidebar_position:2,tags:["gems","lookup"]},s=void 0,i={unversionedId:"Spark/gems/source-target/advanced/lookup",id:"Spark/gems/source-target/advanced/lookup",title:"Lookup",description:"Lookup",source:"@site/docs/Spark/gems/source-target/advanced/lookup.md",sourceDirName:"Spark/gems/source-target/advanced",slug:"/Spark/gems/source-target/advanced/lookup",permalink:"/Spark/gems/source-target/advanced/lookup",draft:!1,tags:[{label:"gems",permalink:"/tags/gems"},{label:"lookup",permalink:"/tags/lookup"}],version:"current",sidebarPosition:2,frontMatter:{title:"Lookup",id:"lookup",description:"Lookup",sidebar_position:2,tags:["gems","lookup"]},sidebar:"defaultSidebar",previous:{title:"Providers",permalink:"/Spark/gems/source-target/advanced/synthetic-data-generator/providers"},next:{title:"Transform",permalink:"/Spark/gems/transform/"}},c={},p=[{value:"Using Lookups",id:"using-lookups",level:2},{value:"Column-based lookups",id:"column-based-lookups",level:3},{value:"Literal lookups",id:"literal-lookups",level:3}],d={toc:p},m="wrapper";function g(e){let{components:a,...u}=e;return(0,n.yg)(m,(0,r.A)({},d,u,{components:a,mdxType:"MDXLayout"}),(0,n.yg)("h3",null,(0,n.yg)("span",{class:"badge rounded-pill text-bg-light"},"Spark Gem")),(0,n.yg)("p",null,"Lookups are a special kind of Gem that allow you to mark a particular DataFrame as a ",(0,n.yg)("em",{parentName:"p"},"Broadcast")," DataFrame. Spark will ensure that this data is available on every computation node so that these lookups can be done without shuffling data. This is useful for looking up values in tables, hence the name."),(0,n.yg)("p",null,(0,n.yg)("img",{alt:"Lookup Gem",src:t(28009).A,width:"230",height:"207"})),(0,n.yg)("p",null,(0,n.yg)("img",{alt:"Lookup UI",src:t(13992).A,width:"1283",height:"770"})),(0,n.yg)("table",null,(0,n.yg)("thead",{parentName:"table"},(0,n.yg)("tr",{parentName:"thead"},(0,n.yg)("th",{parentName:"tr",align:"center"}),(0,n.yg)("th",{parentName:"tr",align:null},"Name"),(0,n.yg)("th",{parentName:"tr",align:null},"Description"))),(0,n.yg)("tbody",{parentName:"table"},(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"center"},"1"),(0,n.yg)("td",{parentName:"tr",align:null},"Key Columns"),(0,n.yg)("td",{parentName:"tr",align:null},"Specify one or more columns to use as the lookup key in the source DataFrame")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"center"},"2"),(0,n.yg)("td",{parentName:"tr",align:null},"Value Columns"),(0,n.yg)("td",{parentName:"tr",align:null},"Pick which columns can be referenced wherever this Lookup is used")))),(0,n.yg)("h2",{id:"using-lookups"},"Using Lookups"),(0,n.yg)("p",null,"Lookups can be used wherever any other Expression can be used, but usage depends on your Expression language of choice. Lookup references follow a certain pattern:"),(0,n.yg)(o.A,{mdxType:"Tabs"},(0,n.yg)(l.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-py"},'lookup("", ).getField()\n'))),(0,n.yg)(l.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-scala"},'lookup("", ).getField()\n'))),(0,n.yg)(l.A,{value:"sql",label:"SQL",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-sql"},"()['']\n")))),(0,n.yg)("h3",{id:"column-based-lookups"},"Column-based lookups"),(0,n.yg)("p",null,"So, based on our above ",(0,n.yg)("inlineCode",{parentName:"p"},"MyLookup")," example we'd use:"),(0,n.yg)(o.A,{mdxType:"Tabs"},(0,n.yg)(l.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-py"},'lookup("MyLookup", col("customer_id")).getField("order_category")\n'))),(0,n.yg)(l.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-scala"},'lookup("MyLookup", col("customer_id")).getField("order_category")\n'))),(0,n.yg)(l.A,{value:"sql",label:"SQL",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-sql"},"MyLookup(customer_id)['order_category']\n")))),(0,n.yg)("p",null,"For example, let's look at a ",(0,n.yg)("a",{parentName:"p",href:"/Spark/gems/transform/reformat"},"Reformat")," component:"),(0,n.yg)("p",null,(0,n.yg)("img",{alt:"Reformat example",src:t(1781).A,width:"1260",height:"182"})),(0,n.yg)("p",null,"Here we have a column named ",(0,n.yg)("inlineCode",{parentName:"p"},"category")," that is set to the value of ",(0,n.yg)("inlineCode",{parentName:"p"},"MyLookup(customer_id)['order_category']")," in SQL Expression mode. Whatever the value of ",(0,n.yg)("inlineCode",{parentName:"p"},"order_category")," is for the key found in the ",(0,n.yg)("inlineCode",{parentName:"p"},"c_id")," column (compared to the source ",(0,n.yg)("inlineCode",{parentName:"p"},"customer_id")," key column) will be used for the new column."),(0,n.yg)("h3",{id:"literal-lookups"},"Literal lookups"),(0,n.yg)("p",null,"Since any column reference can be used in Lookup expressions, you can use Lookups with static keys:"),(0,n.yg)(o.A,{mdxType:"Tabs"},(0,n.yg)(l.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-py"},'lookup("MyLookup", lit("0000")).getField("order_category")\n'))),(0,n.yg)(l.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-scala"},'lookup("MyLookup", lit("0000")).getField("order_category")\n'))),(0,n.yg)(l.A,{value:"sql",label:"SQL",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-sql"},"MyLookup('0000')['order_category']\n")))),(0,n.yg)("p",null,"In this case, the expression evaluates to the value of ",(0,n.yg)("inlineCode",{parentName:"p"},"order_category")," where ",(0,n.yg)("inlineCode",{parentName:"p"},"customer_id")," is ",(0,n.yg)("inlineCode",{parentName:"p"},"0000"),". This can be useful in situations when you want to have a table of predefined keys and their values available in Expressions."))}g.isMDXComponent=!0},28009:(e,a,t)=>{t.d(a,{A:()=>r});const r=t.p+"assets/images/lookup-adb6f51a0f1bfd87b22ed4328e57a140.png"},13992:(e,a,t)=>{t.d(a,{A:()=>r});const r=t.p+"assets/images/lookup_ui-dee0b6773250bea2c99b94dbeda25a86.png"},1781:(e,a,t)=>{t.d(a,{A:()=>r});const r=t.p+"assets/images/lookup_use-606f0c4b09ceef3346b9f8e946c5cccb.png"}}]); \ No newline at end of file +"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[21728],{15680:(e,a,t)=>{t.d(a,{xA:()=>c,yg:()=>g});var r=t(96540);function n(e,a,t){return a in e?Object.defineProperty(e,a,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[a]=t,e}function o(e,a){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);a&&(r=r.filter((function(a){return Object.getOwnPropertyDescriptor(e,a).enumerable}))),t.push.apply(t,r)}return t}function l(e){for(var a=1;a=0||(n[t]=e[t]);return n}(e,a);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(n[t]=e[t])}return n}var s=r.createContext({}),i=function(e){var a=r.useContext(s),t=a;return e&&(t="function"==typeof e?e(a):l(l({},a),e)),t},c=function(e){var a=i(e.components);return r.createElement(s.Provider,{value:a},e.children)},p="mdxType",d={inlineCode:"code",wrapper:function(e){var a=e.children;return r.createElement(r.Fragment,{},a)}},m=r.forwardRef((function(e,a){var t=e.components,n=e.mdxType,o=e.originalType,s=e.parentName,c=u(e,["components","mdxType","originalType","parentName"]),p=i(t),m=n,g=p["".concat(s,".").concat(m)]||p[m]||d[m]||o;return t?r.createElement(g,l(l({ref:a},c),{},{components:t})):r.createElement(g,l({ref:a},c))}));function g(e,a){var t=arguments,n=a&&a.mdxType;if("string"==typeof e||n){var o=t.length,l=new Array(o);l[0]=m;var u={};for(var s in a)hasOwnProperty.call(a,s)&&(u[s]=a[s]);u.originalType=e,u[p]="string"==typeof e?e:n,l[1]=u;for(var i=2;i{t.d(a,{A:()=>l});var r=t(96540),n=t(20053);const o={tabItem:"tabItem_Ymn6"};function l(e){let{children:a,hidden:t,className:l}=e;return r.createElement("div",{role:"tabpanel",className:(0,n.A)(o.tabItem,l),hidden:t},a)}},11470:(e,a,t)=>{t.d(a,{A:()=>N});var r=t(58168),n=t(96540),o=t(20053),l=t(23104),u=t(56347),s=t(57485),i=t(31682),c=t(89466);function p(e){return function(e){return n.Children.map(e,(e=>{if(!e||(0,n.isValidElement)(e)&&function(e){const{props:a}=e;return!!a&&"object"==typeof a&&"value"in a}(e))return e;throw new Error(`Docusaurus error: Bad child <${"string"==typeof e.type?e.type:e.type.name}>: all children of the component should be , and every should have a unique "value" prop.`)}))?.filter(Boolean)??[]}(e).map((e=>{let{props:{value:a,label:t,attributes:r,default:n}}=e;return{value:a,label:t,attributes:r,default:n}}))}function d(e){const{values:a,children:t}=e;return(0,n.useMemo)((()=>{const e=a??p(t);return function(e){const a=(0,i.X)(e,((e,a)=>e.value===a.value));if(a.length>0)throw new Error(`Docusaurus error: Duplicate values "${a.map((e=>e.value)).join(", ")}" found in . Every value needs to be unique.`)}(e),e}),[a,t])}function m(e){let{value:a,tabValues:t}=e;return t.some((e=>e.value===a))}function g(e){let{queryString:a=!1,groupId:t}=e;const r=(0,u.W6)(),o=function(e){let{queryString:a=!1,groupId:t}=e;if("string"==typeof a)return a;if(!1===a)return null;if(!0===a&&!t)throw new Error('Docusaurus error: The component groupId prop is required if queryString=true, because this value is used as the search param name. You can also provide an explicit value such as queryString="my-search-param".');return t??null}({queryString:a,groupId:t});return[(0,s.aZ)(o),(0,n.useCallback)((e=>{if(!o)return;const a=new URLSearchParams(r.location.search);a.set(o,e),r.replace({...r.location,search:a.toString()})}),[o,r])]}function y(e){const{defaultValue:a,queryString:t=!1,groupId:r}=e,o=d(e),[l,u]=(0,n.useState)((()=>function(e){let{defaultValue:a,tabValues:t}=e;if(0===t.length)throw new Error("Docusaurus error: the component requires at least one children component");if(a){if(!m({value:a,tabValues:t}))throw new Error(`Docusaurus error: The has a defaultValue "${a}" but none of its children has the corresponding value. Available values are: ${t.map((e=>e.value)).join(", ")}. If you intend to show no default tab, use defaultValue={null} instead.`);return a}const r=t.find((e=>e.default))??t[0];if(!r)throw new Error("Unexpected error: 0 tabValues");return r.value}({defaultValue:a,tabValues:o}))),[s,i]=g({queryString:t,groupId:r}),[p,y]=function(e){let{groupId:a}=e;const t=function(e){return e?`docusaurus.tab.${e}`:null}(a),[r,o]=(0,c.Dv)(t);return[r,(0,n.useCallback)((e=>{t&&o.set(e)}),[t,o])]}({groupId:r}),b=(()=>{const e=s??p;return m({value:e,tabValues:o})?e:null})();(0,n.useLayoutEffect)((()=>{b&&u(b)}),[b]);return{selectedValue:l,selectValue:(0,n.useCallback)((e=>{if(!m({value:e,tabValues:o}))throw new Error(`Can't select invalid tab value=${e}`);u(e),i(e),y(e)}),[i,y,o]),tabValues:o}}var b=t(92303);const f={tabList:"tabList__CuJ",tabItem:"tabItem_LNqP"};function h(e){let{className:a,block:t,selectedValue:u,selectValue:s,tabValues:i}=e;const c=[],{blockElementScrollPositionUntilNextRender:p}=(0,l.a_)(),d=e=>{const a=e.currentTarget,t=c.indexOf(a),r=i[t].value;r!==u&&(p(a),s(r))},m=e=>{let a=null;switch(e.key){case"Enter":d(e);break;case"ArrowRight":{const t=c.indexOf(e.currentTarget)+1;a=c[t]??c[0];break}case"ArrowLeft":{const t=c.indexOf(e.currentTarget)-1;a=c[t]??c[c.length-1];break}}a?.focus()};return n.createElement("ul",{role:"tablist","aria-orientation":"horizontal",className:(0,o.A)("tabs",{"tabs--block":t},a)},i.map((e=>{let{value:a,label:t,attributes:l}=e;return n.createElement("li",(0,r.A)({role:"tab",tabIndex:u===a?0:-1,"aria-selected":u===a,key:a,ref:e=>c.push(e),onKeyDown:m,onClick:d},l,{className:(0,o.A)("tabs__item",f.tabItem,l?.className,{"tabs__item--active":u===a})}),t??a)})))}function v(e){let{lazy:a,children:t,selectedValue:r}=e;const o=(Array.isArray(t)?t:[t]).filter(Boolean);if(a){const e=o.find((e=>e.props.value===r));return e?(0,n.cloneElement)(e,{className:"margin-top--md"}):null}return n.createElement("div",{className:"margin-top--md"},o.map(((e,a)=>(0,n.cloneElement)(e,{key:a,hidden:e.props.value!==r}))))}function k(e){const a=y(e);return n.createElement("div",{className:(0,o.A)("tabs-container",f.tabList)},n.createElement(h,(0,r.A)({},e,a)),n.createElement(v,(0,r.A)({},e,a)))}function N(e){const a=(0,b.A)();return n.createElement(k,(0,r.A)({key:String(a)},e))}},10202:(e,a,t)=>{t.r(a),t.d(a,{assets:()=>c,contentTitle:()=>s,default:()=>g,frontMatter:()=>u,metadata:()=>i,toc:()=>p});var r=t(58168),n=(t(96540),t(15680)),o=t(11470),l=t(19365);const u={title:"Lookup",id:"lookup",description:"Lookup",sidebar_position:2,tags:["gems","lookup"]},s=void 0,i={unversionedId:"Spark/gems/source-target/advanced/lookup",id:"Spark/gems/source-target/advanced/lookup",title:"Lookup",description:"Lookup",source:"@site/docs/Spark/gems/source-target/advanced/lookup.md",sourceDirName:"Spark/gems/source-target/advanced",slug:"/Spark/gems/source-target/advanced/lookup",permalink:"/Spark/gems/source-target/advanced/lookup",draft:!1,tags:[{label:"gems",permalink:"/tags/gems"},{label:"lookup",permalink:"/tags/lookup"}],version:"current",sidebarPosition:2,frontMatter:{title:"Lookup",id:"lookup",description:"Lookup",sidebar_position:2,tags:["gems","lookup"]},sidebar:"defaultSidebar",previous:{title:"Providers",permalink:"/Spark/gems/source-target/advanced/synthetic-data-generator/providers"},next:{title:"Transform",permalink:"/Spark/gems/transform/"}},c={},p=[{value:"Using Lookups",id:"using-lookups",level:2},{value:"Column-based lookups",id:"column-based-lookups",level:3},{value:"Literal lookups",id:"literal-lookups",level:3}],d={toc:p},m="wrapper";function g(e){let{components:a,...u}=e;return(0,n.yg)(m,(0,r.A)({},d,u,{components:a,mdxType:"MDXLayout"}),(0,n.yg)("h3",null,(0,n.yg)("span",{class:"badge"},"Spark Gem")),(0,n.yg)("p",null,"Lookups are a special kind of Gem that allow you to mark a particular DataFrame as a ",(0,n.yg)("em",{parentName:"p"},"Broadcast")," DataFrame. Spark will ensure that this data is available on every computation node so that these lookups can be done without shuffling data. This is useful for looking up values in tables, hence the name."),(0,n.yg)("p",null,(0,n.yg)("img",{alt:"Lookup Gem",src:t(28009).A,width:"230",height:"207"})),(0,n.yg)("p",null,(0,n.yg)("img",{alt:"Lookup UI",src:t(13992).A,width:"1283",height:"770"})),(0,n.yg)("table",null,(0,n.yg)("thead",{parentName:"table"},(0,n.yg)("tr",{parentName:"thead"},(0,n.yg)("th",{parentName:"tr",align:"center"}),(0,n.yg)("th",{parentName:"tr",align:null},"Name"),(0,n.yg)("th",{parentName:"tr",align:null},"Description"))),(0,n.yg)("tbody",{parentName:"table"},(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"center"},"1"),(0,n.yg)("td",{parentName:"tr",align:null},"Key Columns"),(0,n.yg)("td",{parentName:"tr",align:null},"Specify one or more columns to use as the lookup key in the source DataFrame")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"center"},"2"),(0,n.yg)("td",{parentName:"tr",align:null},"Value Columns"),(0,n.yg)("td",{parentName:"tr",align:null},"Pick which columns can be referenced wherever this Lookup is used")))),(0,n.yg)("h2",{id:"using-lookups"},"Using Lookups"),(0,n.yg)("p",null,"Lookups can be used wherever any other Expression can be used, but usage depends on your Expression language of choice. Lookup references follow a certain pattern:"),(0,n.yg)(o.A,{mdxType:"Tabs"},(0,n.yg)(l.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-py"},'lookup("", ).getField()\n'))),(0,n.yg)(l.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-scala"},'lookup("", ).getField()\n'))),(0,n.yg)(l.A,{value:"sql",label:"SQL",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-sql"},"()['']\n")))),(0,n.yg)("h3",{id:"column-based-lookups"},"Column-based lookups"),(0,n.yg)("p",null,"So, based on our above ",(0,n.yg)("inlineCode",{parentName:"p"},"MyLookup")," example we'd use:"),(0,n.yg)(o.A,{mdxType:"Tabs"},(0,n.yg)(l.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-py"},'lookup("MyLookup", col("customer_id")).getField("order_category")\n'))),(0,n.yg)(l.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-scala"},'lookup("MyLookup", col("customer_id")).getField("order_category")\n'))),(0,n.yg)(l.A,{value:"sql",label:"SQL",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-sql"},"MyLookup(customer_id)['order_category']\n")))),(0,n.yg)("p",null,"For example, let's look at a ",(0,n.yg)("a",{parentName:"p",href:"/Spark/gems/transform/reformat"},"Reformat")," component:"),(0,n.yg)("p",null,(0,n.yg)("img",{alt:"Reformat example",src:t(1781).A,width:"1260",height:"182"})),(0,n.yg)("p",null,"Here we have a column named ",(0,n.yg)("inlineCode",{parentName:"p"},"category")," that is set to the value of ",(0,n.yg)("inlineCode",{parentName:"p"},"MyLookup(customer_id)['order_category']")," in SQL Expression mode. Whatever the value of ",(0,n.yg)("inlineCode",{parentName:"p"},"order_category")," is for the key found in the ",(0,n.yg)("inlineCode",{parentName:"p"},"c_id")," column (compared to the source ",(0,n.yg)("inlineCode",{parentName:"p"},"customer_id")," key column) will be used for the new column."),(0,n.yg)("h3",{id:"literal-lookups"},"Literal lookups"),(0,n.yg)("p",null,"Since any column reference can be used in Lookup expressions, you can use Lookups with static keys:"),(0,n.yg)(o.A,{mdxType:"Tabs"},(0,n.yg)(l.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-py"},'lookup("MyLookup", lit("0000")).getField("order_category")\n'))),(0,n.yg)(l.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-scala"},'lookup("MyLookup", lit("0000")).getField("order_category")\n'))),(0,n.yg)(l.A,{value:"sql",label:"SQL",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-sql"},"MyLookup('0000')['order_category']\n")))),(0,n.yg)("p",null,"In this case, the expression evaluates to the value of ",(0,n.yg)("inlineCode",{parentName:"p"},"order_category")," where ",(0,n.yg)("inlineCode",{parentName:"p"},"customer_id")," is ",(0,n.yg)("inlineCode",{parentName:"p"},"0000"),". This can be useful in situations when you want to have a table of predefined keys and their values available in Expressions."))}g.isMDXComponent=!0},28009:(e,a,t)=>{t.d(a,{A:()=>r});const r=t.p+"assets/images/lookup-adb6f51a0f1bfd87b22ed4328e57a140.png"},13992:(e,a,t)=>{t.d(a,{A:()=>r});const r=t.p+"assets/images/lookup_ui-dee0b6773250bea2c99b94dbeda25a86.png"},1781:(e,a,t)=>{t.d(a,{A:()=>r});const r=t.p+"assets/images/lookup_use-606f0c4b09ceef3346b9f8e946c5cccb.png"}}]); \ No newline at end of file diff --git a/assets/js/5b2eddc9.673748c2.js b/assets/js/5b2eddc9.43f64355.js similarity index 77% rename from assets/js/5b2eddc9.673748c2.js rename to assets/js/5b2eddc9.43f64355.js index 76e64892a2..c4736a5c0d 100644 --- a/assets/js/5b2eddc9.673748c2.js +++ b/assets/js/5b2eddc9.43f64355.js @@ -1 +1 @@ -"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[71625],{15680:(e,t,r)=>{r.d(t,{xA:()=>c,yg:()=>f});var a=r(96540);function n(e,t,r){return t in e?Object.defineProperty(e,t,{value:r,enumerable:!0,configurable:!0,writable:!0}):e[t]=r,e}function l(e,t){var r=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),r.push.apply(r,a)}return r}function o(e){for(var t=1;t=0||(n[r]=e[r]);return n}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,r)&&(n[r]=e[r])}return n}var s=a.createContext({}),u=function(e){var t=a.useContext(s),r=t;return e&&(r="function"==typeof e?e(t):o(o({},t),e)),r},c=function(e){var t=u(e.components);return a.createElement(s.Provider,{value:t},e.children)},p="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},m=a.forwardRef((function(e,t){var r=e.components,n=e.mdxType,l=e.originalType,s=e.parentName,c=i(e,["components","mdxType","originalType","parentName"]),p=u(r),m=n,f=p["".concat(s,".").concat(m)]||p[m]||d[m]||l;return r?a.createElement(f,o(o({ref:t},c),{},{components:r})):a.createElement(f,o({ref:t},c))}));function f(e,t){var r=arguments,n=t&&t.mdxType;if("string"==typeof e||n){var l=r.length,o=new Array(l);o[0]=m;var i={};for(var s in t)hasOwnProperty.call(t,s)&&(i[s]=t[s]);i.originalType=e,i[p]="string"==typeof e?e:n,o[1]=i;for(var u=2;u{r.d(t,{A:()=>o});var a=r(96540),n=r(20053);const l={tabItem:"tabItem_Ymn6"};function o(e){let{children:t,hidden:r,className:o}=e;return a.createElement("div",{role:"tabpanel",className:(0,n.A)(l.tabItem,o),hidden:r},t)}},11470:(e,t,r)=>{r.d(t,{A:()=>w});var a=r(58168),n=r(96540),l=r(20053),o=r(23104),i=r(56347),s=r(57485),u=r(31682),c=r(89466);function p(e){return function(e){return n.Children.map(e,(e=>{if(!e||(0,n.isValidElement)(e)&&function(e){const{props:t}=e;return!!t&&"object"==typeof t&&"value"in t}(e))return e;throw new Error(`Docusaurus error: Bad child <${"string"==typeof e.type?e.type:e.type.name}>: all children of the component should be , and every should have a unique "value" prop.`)}))?.filter(Boolean)??[]}(e).map((e=>{let{props:{value:t,label:r,attributes:a,default:n}}=e;return{value:t,label:r,attributes:a,default:n}}))}function d(e){const{values:t,children:r}=e;return(0,n.useMemo)((()=>{const e=t??p(r);return function(e){const t=(0,u.X)(e,((e,t)=>e.value===t.value));if(t.length>0)throw new Error(`Docusaurus error: Duplicate values "${t.map((e=>e.value)).join(", ")}" found in . Every value needs to be unique.`)}(e),e}),[t,r])}function m(e){let{value:t,tabValues:r}=e;return r.some((e=>e.value===t))}function f(e){let{queryString:t=!1,groupId:r}=e;const a=(0,i.W6)(),l=function(e){let{queryString:t=!1,groupId:r}=e;if("string"==typeof t)return t;if(!1===t)return null;if(!0===t&&!r)throw new Error('Docusaurus error: The component groupId prop is required if queryString=true, because this value is used as the search param name. You can also provide an explicit value such as queryString="my-search-param".');return r??null}({queryString:t,groupId:r});return[(0,s.aZ)(l),(0,n.useCallback)((e=>{if(!l)return;const t=new URLSearchParams(a.location.search);t.set(l,e),a.replace({...a.location,search:t.toString()})}),[l,a])]}function g(e){const{defaultValue:t,queryString:r=!1,groupId:a}=e,l=d(e),[o,i]=(0,n.useState)((()=>function(e){let{defaultValue:t,tabValues:r}=e;if(0===r.length)throw new Error("Docusaurus error: the component requires at least one children component");if(t){if(!m({value:t,tabValues:r}))throw new Error(`Docusaurus error: The has a defaultValue "${t}" but none of its children has the corresponding value. Available values are: ${r.map((e=>e.value)).join(", ")}. If you intend to show no default tab, use defaultValue={null} instead.`);return t}const a=r.find((e=>e.default))??r[0];if(!a)throw new Error("Unexpected error: 0 tabValues");return a.value}({defaultValue:t,tabValues:l}))),[s,u]=f({queryString:r,groupId:a}),[p,g]=function(e){let{groupId:t}=e;const r=function(e){return e?`docusaurus.tab.${e}`:null}(t),[a,l]=(0,c.Dv)(r);return[a,(0,n.useCallback)((e=>{r&&l.set(e)}),[r,l])]}({groupId:a}),b=(()=>{const e=s??p;return m({value:e,tabValues:l})?e:null})();(0,n.useLayoutEffect)((()=>{b&&i(b)}),[b]);return{selectedValue:o,selectValue:(0,n.useCallback)((e=>{if(!m({value:e,tabValues:l}))throw new Error(`Can't select invalid tab value=${e}`);i(e),u(e),g(e)}),[u,g,l]),tabValues:l}}var b=r(92303);const y={tabList:"tabList__CuJ",tabItem:"tabItem_LNqP"};function h(e){let{className:t,block:r,selectedValue:i,selectValue:s,tabValues:u}=e;const c=[],{blockElementScrollPositionUntilNextRender:p}=(0,o.a_)(),d=e=>{const t=e.currentTarget,r=c.indexOf(t),a=u[r].value;a!==i&&(p(t),s(a))},m=e=>{let t=null;switch(e.key){case"Enter":d(e);break;case"ArrowRight":{const r=c.indexOf(e.currentTarget)+1;t=c[r]??c[0];break}case"ArrowLeft":{const r=c.indexOf(e.currentTarget)-1;t=c[r]??c[c.length-1];break}}t?.focus()};return n.createElement("ul",{role:"tablist","aria-orientation":"horizontal",className:(0,l.A)("tabs",{"tabs--block":r},t)},u.map((e=>{let{value:t,label:r,attributes:o}=e;return n.createElement("li",(0,a.A)({role:"tab",tabIndex:i===t?0:-1,"aria-selected":i===t,key:t,ref:e=>c.push(e),onKeyDown:m,onClick:d},o,{className:(0,l.A)("tabs__item",y.tabItem,o?.className,{"tabs__item--active":i===t})}),r??t)})))}function v(e){let{lazy:t,children:r,selectedValue:a}=e;const l=(Array.isArray(r)?r:[r]).filter(Boolean);if(t){const e=l.find((e=>e.props.value===a));return e?(0,n.cloneElement)(e,{className:"margin-top--md"}):null}return n.createElement("div",{className:"margin-top--md"},l.map(((e,t)=>(0,n.cloneElement)(e,{key:t,hidden:e.props.value!==a}))))}function k(e){const t=g(e);return n.createElement("div",{className:(0,l.A)("tabs-container",y.tabList)},n.createElement(h,(0,a.A)({},e,t)),n.createElement(v,(0,a.A)({},e,t)))}function w(e){const t=(0,b.A)();return n.createElement(k,(0,a.A)({key:String(t)},e))}},70386:(e,t,r)=>{r.r(t),r.d(t,{assets:()=>c,contentTitle:()=>s,default:()=>f,frontMatter:()=>i,metadata:()=>u,toc:()=>p});var a=r(58168),n=(r(96540),r(15680)),l=r(11470),o=r(19365);const i={sidebar_position:2,title:"Filter",id:"filter",description:"Filter your data based on a custom filter condition",tags:["gems","filter","where"]},s=void 0,u={unversionedId:"Spark/gems/transform/filter",id:"Spark/gems/transform/filter",title:"Filter",description:"Filter your data based on a custom filter condition",source:"@site/docs/Spark/gems/transform/filter.md",sourceDirName:"Spark/gems/transform",slug:"/Spark/gems/transform/filter",permalink:"/Spark/gems/transform/filter",draft:!1,tags:[{label:"gems",permalink:"/tags/gems"},{label:"filter",permalink:"/tags/filter"},{label:"where",permalink:"/tags/where"}],version:"current",sidebarPosition:2,frontMatter:{sidebar_position:2,title:"Filter",id:"filter",description:"Filter your data based on a custom filter condition",tags:["gems","filter","where"]},sidebar:"defaultSidebar",previous:{title:"Reformat",permalink:"/Spark/gems/transform/reformat"},next:{title:"OrderBy",permalink:"/Spark/gems/transform/order-by"}},c={},p=[{value:"Parameters",id:"parameters",level:3},{value:"Example",id:"example",level:3},{value:"Spark Code",id:"spark-code",level:3}],d={toc:p},m="wrapper";function f(e){let{components:t,...i}=e;return(0,n.yg)(m,(0,a.A)({},d,i,{components:t,mdxType:"MDXLayout"}),(0,n.yg)("h3",null,(0,n.yg)("span",{class:"badge rounded-pill text-bg-light"},"Spark Gem")),(0,n.yg)("p",null,"Filters DataFrame based on the provided filter condition"),(0,n.yg)("h3",{id:"parameters"},"Parameters"),(0,n.yg)("table",null,(0,n.yg)("thead",{parentName:"table"},(0,n.yg)("tr",{parentName:"thead"},(0,n.yg)("th",{parentName:"tr",align:"left"},"Parameter"),(0,n.yg)("th",{parentName:"tr",align:"left"},"Description"),(0,n.yg)("th",{parentName:"tr",align:"left"},"Required"))),(0,n.yg)("tbody",{parentName:"table"},(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},"DataFrame"),(0,n.yg)("td",{parentName:"tr",align:"left"},"Input DataFrame on which the filter condition will be applied."),(0,n.yg)("td",{parentName:"tr",align:"left"},"True")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},"Filter Condition"),(0,n.yg)("td",{parentName:"tr",align:"left"},"BooleanType column or boolean expression. Supports SQL, Python and Scala expressions."),(0,n.yg)("td",{parentName:"tr",align:"left"},"True")))),(0,n.yg)("h3",{id:"example"},"Example"),(0,n.yg)("p",null,(0,n.yg)("img",{alt:"Example usage of Filter",src:r(82895).A,width:"2034",height:"802"})),(0,n.yg)("h3",{id:"spark-code"},"Spark Code"),(0,n.yg)(l.A,{mdxType:"Tabs"},(0,n.yg)(o.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-py"},'def Filter_Orders(spark: SparkSession, in0: DataFrame) -> DataFrame:\n return in0.filter(\n (\n ((col("order_category") == lit("Marketing"))\n & ((col("order_status") == lit("Finished")) | (col("order_status") == lit("Approved"))))\n & ~ col("is_discounted")\n )\n )\n'))),(0,n.yg)(o.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-scala"},'object Filter_Orders {\n\n def apply(spark: SparkSession, in: DataFrame): DataFrame =\n in.filter(\n (\n col("order_category") === lit("Marketing"))\n .and(\n (col("order_status") === lit("Finished"))\n .or(col("order_status") === lit("Approved"))\n )\n .and(!col("is_discounted"))\n )\n}\n')))))}f.isMDXComponent=!0},82895:(e,t,r)=>{r.d(t,{A:()=>a});const a=r.p+"assets/images/filter_eg_1-ba6f5ebc4ade54f4f8bc7d8d44dae592.png"}}]); \ No newline at end of file +"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[71625],{15680:(e,t,r)=>{r.d(t,{xA:()=>c,yg:()=>f});var a=r(96540);function n(e,t,r){return t in e?Object.defineProperty(e,t,{value:r,enumerable:!0,configurable:!0,writable:!0}):e[t]=r,e}function l(e,t){var r=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),r.push.apply(r,a)}return r}function o(e){for(var t=1;t=0||(n[r]=e[r]);return n}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,r)&&(n[r]=e[r])}return n}var s=a.createContext({}),u=function(e){var t=a.useContext(s),r=t;return e&&(r="function"==typeof e?e(t):o(o({},t),e)),r},c=function(e){var t=u(e.components);return a.createElement(s.Provider,{value:t},e.children)},p="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},m=a.forwardRef((function(e,t){var r=e.components,n=e.mdxType,l=e.originalType,s=e.parentName,c=i(e,["components","mdxType","originalType","parentName"]),p=u(r),m=n,f=p["".concat(s,".").concat(m)]||p[m]||d[m]||l;return r?a.createElement(f,o(o({ref:t},c),{},{components:r})):a.createElement(f,o({ref:t},c))}));function f(e,t){var r=arguments,n=t&&t.mdxType;if("string"==typeof e||n){var l=r.length,o=new Array(l);o[0]=m;var i={};for(var s in t)hasOwnProperty.call(t,s)&&(i[s]=t[s]);i.originalType=e,i[p]="string"==typeof e?e:n,o[1]=i;for(var u=2;u{r.d(t,{A:()=>o});var a=r(96540),n=r(20053);const l={tabItem:"tabItem_Ymn6"};function o(e){let{children:t,hidden:r,className:o}=e;return a.createElement("div",{role:"tabpanel",className:(0,n.A)(l.tabItem,o),hidden:r},t)}},11470:(e,t,r)=>{r.d(t,{A:()=>w});var a=r(58168),n=r(96540),l=r(20053),o=r(23104),i=r(56347),s=r(57485),u=r(31682),c=r(89466);function p(e){return function(e){return n.Children.map(e,(e=>{if(!e||(0,n.isValidElement)(e)&&function(e){const{props:t}=e;return!!t&&"object"==typeof t&&"value"in t}(e))return e;throw new Error(`Docusaurus error: Bad child <${"string"==typeof e.type?e.type:e.type.name}>: all children of the component should be , and every should have a unique "value" prop.`)}))?.filter(Boolean)??[]}(e).map((e=>{let{props:{value:t,label:r,attributes:a,default:n}}=e;return{value:t,label:r,attributes:a,default:n}}))}function d(e){const{values:t,children:r}=e;return(0,n.useMemo)((()=>{const e=t??p(r);return function(e){const t=(0,u.X)(e,((e,t)=>e.value===t.value));if(t.length>0)throw new Error(`Docusaurus error: Duplicate values "${t.map((e=>e.value)).join(", ")}" found in . Every value needs to be unique.`)}(e),e}),[t,r])}function m(e){let{value:t,tabValues:r}=e;return r.some((e=>e.value===t))}function f(e){let{queryString:t=!1,groupId:r}=e;const a=(0,i.W6)(),l=function(e){let{queryString:t=!1,groupId:r}=e;if("string"==typeof t)return t;if(!1===t)return null;if(!0===t&&!r)throw new Error('Docusaurus error: The component groupId prop is required if queryString=true, because this value is used as the search param name. You can also provide an explicit value such as queryString="my-search-param".');return r??null}({queryString:t,groupId:r});return[(0,s.aZ)(l),(0,n.useCallback)((e=>{if(!l)return;const t=new URLSearchParams(a.location.search);t.set(l,e),a.replace({...a.location,search:t.toString()})}),[l,a])]}function g(e){const{defaultValue:t,queryString:r=!1,groupId:a}=e,l=d(e),[o,i]=(0,n.useState)((()=>function(e){let{defaultValue:t,tabValues:r}=e;if(0===r.length)throw new Error("Docusaurus error: the component requires at least one children component");if(t){if(!m({value:t,tabValues:r}))throw new Error(`Docusaurus error: The has a defaultValue "${t}" but none of its children has the corresponding value. Available values are: ${r.map((e=>e.value)).join(", ")}. If you intend to show no default tab, use defaultValue={null} instead.`);return t}const a=r.find((e=>e.default))??r[0];if(!a)throw new Error("Unexpected error: 0 tabValues");return a.value}({defaultValue:t,tabValues:l}))),[s,u]=f({queryString:r,groupId:a}),[p,g]=function(e){let{groupId:t}=e;const r=function(e){return e?`docusaurus.tab.${e}`:null}(t),[a,l]=(0,c.Dv)(r);return[a,(0,n.useCallback)((e=>{r&&l.set(e)}),[r,l])]}({groupId:a}),b=(()=>{const e=s??p;return m({value:e,tabValues:l})?e:null})();(0,n.useLayoutEffect)((()=>{b&&i(b)}),[b]);return{selectedValue:o,selectValue:(0,n.useCallback)((e=>{if(!m({value:e,tabValues:l}))throw new Error(`Can't select invalid tab value=${e}`);i(e),u(e),g(e)}),[u,g,l]),tabValues:l}}var b=r(92303);const y={tabList:"tabList__CuJ",tabItem:"tabItem_LNqP"};function h(e){let{className:t,block:r,selectedValue:i,selectValue:s,tabValues:u}=e;const c=[],{blockElementScrollPositionUntilNextRender:p}=(0,o.a_)(),d=e=>{const t=e.currentTarget,r=c.indexOf(t),a=u[r].value;a!==i&&(p(t),s(a))},m=e=>{let t=null;switch(e.key){case"Enter":d(e);break;case"ArrowRight":{const r=c.indexOf(e.currentTarget)+1;t=c[r]??c[0];break}case"ArrowLeft":{const r=c.indexOf(e.currentTarget)-1;t=c[r]??c[c.length-1];break}}t?.focus()};return n.createElement("ul",{role:"tablist","aria-orientation":"horizontal",className:(0,l.A)("tabs",{"tabs--block":r},t)},u.map((e=>{let{value:t,label:r,attributes:o}=e;return n.createElement("li",(0,a.A)({role:"tab",tabIndex:i===t?0:-1,"aria-selected":i===t,key:t,ref:e=>c.push(e),onKeyDown:m,onClick:d},o,{className:(0,l.A)("tabs__item",y.tabItem,o?.className,{"tabs__item--active":i===t})}),r??t)})))}function v(e){let{lazy:t,children:r,selectedValue:a}=e;const l=(Array.isArray(r)?r:[r]).filter(Boolean);if(t){const e=l.find((e=>e.props.value===a));return e?(0,n.cloneElement)(e,{className:"margin-top--md"}):null}return n.createElement("div",{className:"margin-top--md"},l.map(((e,t)=>(0,n.cloneElement)(e,{key:t,hidden:e.props.value!==a}))))}function k(e){const t=g(e);return n.createElement("div",{className:(0,l.A)("tabs-container",y.tabList)},n.createElement(h,(0,a.A)({},e,t)),n.createElement(v,(0,a.A)({},e,t)))}function w(e){const t=(0,b.A)();return n.createElement(k,(0,a.A)({key:String(t)},e))}},70386:(e,t,r)=>{r.r(t),r.d(t,{assets:()=>c,contentTitle:()=>s,default:()=>f,frontMatter:()=>i,metadata:()=>u,toc:()=>p});var a=r(58168),n=(r(96540),r(15680)),l=r(11470),o=r(19365);const i={sidebar_position:2,title:"Filter",id:"filter",description:"Filter your data based on a custom filter condition",tags:["gems","filter","where"]},s=void 0,u={unversionedId:"Spark/gems/transform/filter",id:"Spark/gems/transform/filter",title:"Filter",description:"Filter your data based on a custom filter condition",source:"@site/docs/Spark/gems/transform/filter.md",sourceDirName:"Spark/gems/transform",slug:"/Spark/gems/transform/filter",permalink:"/Spark/gems/transform/filter",draft:!1,tags:[{label:"gems",permalink:"/tags/gems"},{label:"filter",permalink:"/tags/filter"},{label:"where",permalink:"/tags/where"}],version:"current",sidebarPosition:2,frontMatter:{sidebar_position:2,title:"Filter",id:"filter",description:"Filter your data based on a custom filter condition",tags:["gems","filter","where"]},sidebar:"defaultSidebar",previous:{title:"Reformat",permalink:"/Spark/gems/transform/reformat"},next:{title:"OrderBy",permalink:"/Spark/gems/transform/order-by"}},c={},p=[{value:"Parameters",id:"parameters",level:3},{value:"Example",id:"example",level:3},{value:"Spark Code",id:"spark-code",level:3}],d={toc:p},m="wrapper";function f(e){let{components:t,...i}=e;return(0,n.yg)(m,(0,a.A)({},d,i,{components:t,mdxType:"MDXLayout"}),(0,n.yg)("h3",null,(0,n.yg)("span",{class:"badge"},"Spark Gem")),(0,n.yg)("p",null,"Filters DataFrame based on the provided filter condition"),(0,n.yg)("h3",{id:"parameters"},"Parameters"),(0,n.yg)("table",null,(0,n.yg)("thead",{parentName:"table"},(0,n.yg)("tr",{parentName:"thead"},(0,n.yg)("th",{parentName:"tr",align:"left"},"Parameter"),(0,n.yg)("th",{parentName:"tr",align:"left"},"Description"),(0,n.yg)("th",{parentName:"tr",align:"left"},"Required"))),(0,n.yg)("tbody",{parentName:"table"},(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},"DataFrame"),(0,n.yg)("td",{parentName:"tr",align:"left"},"Input DataFrame on which the filter condition will be applied."),(0,n.yg)("td",{parentName:"tr",align:"left"},"True")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},"Filter Condition"),(0,n.yg)("td",{parentName:"tr",align:"left"},"BooleanType column or boolean expression. Supports SQL, Python and Scala expressions."),(0,n.yg)("td",{parentName:"tr",align:"left"},"True")))),(0,n.yg)("h3",{id:"example"},"Example"),(0,n.yg)("p",null,(0,n.yg)("img",{alt:"Example usage of Filter",src:r(82895).A,width:"2034",height:"802"})),(0,n.yg)("h3",{id:"spark-code"},"Spark Code"),(0,n.yg)(l.A,{mdxType:"Tabs"},(0,n.yg)(o.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-py"},'def Filter_Orders(spark: SparkSession, in0: DataFrame) -> DataFrame:\n return in0.filter(\n (\n ((col("order_category") == lit("Marketing"))\n & ((col("order_status") == lit("Finished")) | (col("order_status") == lit("Approved"))))\n & ~ col("is_discounted")\n )\n )\n'))),(0,n.yg)(o.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-scala"},'object Filter_Orders {\n\n def apply(spark: SparkSession, in: DataFrame): DataFrame =\n in.filter(\n (\n col("order_category") === lit("Marketing"))\n .and(\n (col("order_status") === lit("Finished"))\n .or(col("order_status") === lit("Approved"))\n )\n .and(!col("is_discounted"))\n )\n}\n')))))}f.isMDXComponent=!0},82895:(e,t,r)=>{r.d(t,{A:()=>a});const a=r.p+"assets/images/filter_eg_1-ba6f5ebc4ade54f4f8bc7d8d44dae592.png"}}]); \ No newline at end of file diff --git a/assets/js/5beb85dd.5ace51e4.js b/assets/js/5beb85dd.5ace51e4.js deleted file mode 100644 index 696d643a92..0000000000 --- a/assets/js/5beb85dd.5ace51e4.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[14334],{15680:(e,t,a)=>{a.d(t,{xA:()=>u,yg:()=>c});var r=a(96540);function n(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function i(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,r)}return a}function l(e){for(var t=1;t=0||(n[a]=e[a]);return n}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(n[a]=e[a])}return n}var s=r.createContext({}),p=function(e){var t=r.useContext(s),a=t;return e&&(a="function"==typeof e?e(t):l(l({},t),e)),a},u=function(e){var t=p(e.components);return r.createElement(s.Provider,{value:t},e.children)},g="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},m=r.forwardRef((function(e,t){var a=e.components,n=e.mdxType,i=e.originalType,s=e.parentName,u=o(e,["components","mdxType","originalType","parentName"]),g=p(a),m=n,c=g["".concat(s,".").concat(m)]||g[m]||d[m]||i;return a?r.createElement(c,l(l({ref:t},u),{},{components:a})):r.createElement(c,l({ref:t},u))}));function c(e,t){var a=arguments,n=t&&t.mdxType;if("string"==typeof e||n){var i=a.length,l=new Array(i);l[0]=m;var o={};for(var s in t)hasOwnProperty.call(t,s)&&(o[s]=t[s]);o.originalType=e,o[g]="string"==typeof e?e:n,l[1]=o;for(var p=2;p{a.d(t,{A:()=>l});var r=a(96540),n=a(20053);const i={tabItem:"tabItem_Ymn6"};function l(e){let{children:t,hidden:a,className:l}=e;return r.createElement("div",{role:"tabpanel",className:(0,n.A)(i.tabItem,l),hidden:a},t)}},11470:(e,t,a)=>{a.d(t,{A:()=>w});var r=a(58168),n=a(96540),i=a(20053),l=a(23104),o=a(56347),s=a(57485),p=a(31682),u=a(89466);function g(e){return function(e){return n.Children.map(e,(e=>{if(!e||(0,n.isValidElement)(e)&&function(e){const{props:t}=e;return!!t&&"object"==typeof t&&"value"in t}(e))return e;throw new Error(`Docusaurus error: Bad child <${"string"==typeof e.type?e.type:e.type.name}>: all children of the component should be , and every should have a unique "value" prop.`)}))?.filter(Boolean)??[]}(e).map((e=>{let{props:{value:t,label:a,attributes:r,default:n}}=e;return{value:t,label:a,attributes:r,default:n}}))}function d(e){const{values:t,children:a}=e;return(0,n.useMemo)((()=>{const e=t??g(a);return function(e){const t=(0,p.X)(e,((e,t)=>e.value===t.value));if(t.length>0)throw new Error(`Docusaurus error: Duplicate values "${t.map((e=>e.value)).join(", ")}" found in . Every value needs to be unique.`)}(e),e}),[t,a])}function m(e){let{value:t,tabValues:a}=e;return a.some((e=>e.value===t))}function c(e){let{queryString:t=!1,groupId:a}=e;const r=(0,o.W6)(),i=function(e){let{queryString:t=!1,groupId:a}=e;if("string"==typeof t)return t;if(!1===t)return null;if(!0===t&&!a)throw new Error('Docusaurus error: The component groupId prop is required if queryString=true, because this value is used as the search param name. You can also provide an explicit value such as queryString="my-search-param".');return a??null}({queryString:t,groupId:a});return[(0,s.aZ)(i),(0,n.useCallback)((e=>{if(!i)return;const t=new URLSearchParams(r.location.search);t.set(i,e),r.replace({...r.location,search:t.toString()})}),[i,r])]}function y(e){const{defaultValue:t,queryString:a=!1,groupId:r}=e,i=d(e),[l,o]=(0,n.useState)((()=>function(e){let{defaultValue:t,tabValues:a}=e;if(0===a.length)throw new Error("Docusaurus error: the component requires at least one children component");if(t){if(!m({value:t,tabValues:a}))throw new Error(`Docusaurus error: The has a defaultValue "${t}" but none of its children has the corresponding value. Available values are: ${a.map((e=>e.value)).join(", ")}. If you intend to show no default tab, use defaultValue={null} instead.`);return t}const r=a.find((e=>e.default))??a[0];if(!r)throw new Error("Unexpected error: 0 tabValues");return r.value}({defaultValue:t,tabValues:i}))),[s,p]=c({queryString:a,groupId:r}),[g,y]=function(e){let{groupId:t}=e;const a=function(e){return e?`docusaurus.tab.${e}`:null}(t),[r,i]=(0,u.Dv)(a);return[r,(0,n.useCallback)((e=>{a&&i.set(e)}),[a,i])]}({groupId:r}),b=(()=>{const e=s??g;return m({value:e,tabValues:i})?e:null})();(0,n.useLayoutEffect)((()=>{b&&o(b)}),[b]);return{selectedValue:l,selectValue:(0,n.useCallback)((e=>{if(!m({value:e,tabValues:i}))throw new Error(`Can't select invalid tab value=${e}`);o(e),p(e),y(e)}),[p,y,i]),tabValues:i}}var b=a(92303);const f={tabList:"tabList__CuJ",tabItem:"tabItem_LNqP"};function h(e){let{className:t,block:a,selectedValue:o,selectValue:s,tabValues:p}=e;const u=[],{blockElementScrollPositionUntilNextRender:g}=(0,l.a_)(),d=e=>{const t=e.currentTarget,a=u.indexOf(t),r=p[a].value;r!==o&&(g(t),s(r))},m=e=>{let t=null;switch(e.key){case"Enter":d(e);break;case"ArrowRight":{const a=u.indexOf(e.currentTarget)+1;t=u[a]??u[0];break}case"ArrowLeft":{const a=u.indexOf(e.currentTarget)-1;t=u[a]??u[u.length-1];break}}t?.focus()};return n.createElement("ul",{role:"tablist","aria-orientation":"horizontal",className:(0,i.A)("tabs",{"tabs--block":a},t)},p.map((e=>{let{value:t,label:a,attributes:l}=e;return n.createElement("li",(0,r.A)({role:"tab",tabIndex:o===t?0:-1,"aria-selected":o===t,key:t,ref:e=>u.push(e),onKeyDown:m,onClick:d},l,{className:(0,i.A)("tabs__item",f.tabItem,l?.className,{"tabs__item--active":o===t})}),a??t)})))}function v(e){let{lazy:t,children:a,selectedValue:r}=e;const i=(Array.isArray(a)?a:[a]).filter(Boolean);if(t){const e=i.find((e=>e.props.value===r));return e?(0,n.cloneElement)(e,{className:"margin-top--md"}):null}return n.createElement("div",{className:"margin-top--md"},i.map(((e,t)=>(0,n.cloneElement)(e,{key:t,hidden:e.props.value!==r}))))}function N(e){const t=y(e);return n.createElement("div",{className:(0,i.A)("tabs-container",f.tabList)},n.createElement(h,(0,r.A)({},e,t)),n.createElement(v,(0,r.A)({},e,t)))}function w(e){const t=(0,b.A)();return n.createElement(N,(0,r.A)({key:String(t)},e))}},84886:(e,t,a)=>{a.r(t),a.d(t,{assets:()=>u,contentTitle:()=>s,default:()=>c,frontMatter:()=>o,metadata:()=>p,toc:()=>g});var r=a(58168),n=(a(96540),a(15680)),i=a(11470),l=a(19365);const o={title:"Repartition",id:"Repartition",description:"Repartition or coalesce a DataFrame",sidebar_position:2,tags:["gems","join-split","partition","repartition","coalesce"]},s=void 0,p={unversionedId:"Spark/gems/join-split/Repartition",id:"Spark/gems/join-split/Repartition",title:"Repartition",description:"Repartition or coalesce a DataFrame",source:"@site/docs/Spark/gems/join-split/repartition.md",sourceDirName:"Spark/gems/join-split",slug:"/Spark/gems/join-split/Repartition",permalink:"/Spark/gems/join-split/Repartition",draft:!1,tags:[{label:"gems",permalink:"/tags/gems"},{label:"join-split",permalink:"/tags/join-split"},{label:"partition",permalink:"/tags/partition"},{label:"repartition",permalink:"/tags/repartition"},{label:"coalesce",permalink:"/tags/coalesce"}],version:"current",sidebarPosition:2,frontMatter:{title:"Repartition",id:"Repartition",description:"Repartition or coalesce a DataFrame",sidebar_position:2,tags:["gems","join-split","partition","repartition","coalesce"]},sidebar:"defaultSidebar",previous:{title:"Join",permalink:"/Spark/gems/join-split/join"},next:{title:"RowDistributor",permalink:"/Spark/gems/join-split/row-distributor"}},u={},g=[{value:"Hash Repartitoning",id:"hash-repartitoning",level:2},{value:"Parameters",id:"hash-repartitoning",level:3},{value:"Generated Code",id:"hash-repartitoning",level:3},{value:"Random Repartitioning",id:"random-repartitioning",level:2},{value:"Parameters",id:"random-repartitioning",level:3},{value:"Generated Code",id:"random-repartitioning",level:3},{value:"Range Repartitoning",id:"range-repartitoning",level:2},{value:"Parameters",id:"range-repartitoning",level:3},{value:"Generated Code",id:"range-repartitoning",level:3},{value:"Coalesce",id:"coalesce",level:2},{value:"Parameters",id:"coalesce",level:3},{value:"Generated Code",id:"coalesce",level:3},{value:"Video demo",id:"video-demo",level:2}],d={toc:g},m="wrapper";function c(e){let{components:t,...a}=e;return(0,n.yg)(m,(0,r.A)({},d,a,{components:t,mdxType:"MDXLayout"}),(0,n.yg)("h3",null,(0,n.yg)("span",{class:"badge rounded-pill text-bg-light"},"Spark Gem")),(0,n.yg)("p",null,"This will repartition or coalesce the input DataFrame based on the specified configuration. There are four different repartitioning options:"),(0,n.yg)("h2",{id:"hash-repartitoning"},"Hash Repartitoning"),(0,n.yg)("p",null,"Repartitions the data evenly across various partitions based on the hash value of the specified key."),(0,n.yg)("h3",{id:"hash-repartitoning"},"Parameters"),(0,n.yg)("table",null,(0,n.yg)("thead",{parentName:"table"},(0,n.yg)("tr",{parentName:"thead"},(0,n.yg)("th",{parentName:"tr",align:null},"Parameter"),(0,n.yg)("th",{parentName:"tr",align:null},"Description"),(0,n.yg)("th",{parentName:"tr",align:null},"Required"))),(0,n.yg)("tbody",{parentName:"table"},(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},"DataFrame"),(0,n.yg)("td",{parentName:"tr",align:null},"Input DataFrame"),(0,n.yg)("td",{parentName:"tr",align:null},"True")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},"Overwrite default partitions"),(0,n.yg)("td",{parentName:"tr",align:null},"Flag to overwrite default partitions"),(0,n.yg)("td",{parentName:"tr",align:null},"False")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},"Number of partitions"),(0,n.yg)("td",{parentName:"tr",align:null},"Integer value specifying number of partitions"),(0,n.yg)("td",{parentName:"tr",align:null},"False")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},"Repartition expression(s)"),(0,n.yg)("td",{parentName:"tr",align:null},"List of expressions to repartition by"),(0,n.yg)("td",{parentName:"tr",align:null},"True")))),(0,n.yg)("h3",{id:"hash-repartitoning"},"Generated Code"),(0,n.yg)(i.A,{mdxType:"Tabs"},(0,n.yg)(l.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-py"},'def hashRepartition(spark: SparkSession, in0: DataFrame) -> DataFrame:\n return in0.repartition(5, col("customer_id"))\n'))),(0,n.yg)(l.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-scala"},'object hashRepartition {\n\n def apply(spark: SparkSession, in: DataFrame): DataFrame =\n in.repartition(5, col("customer_id"))\n\n}\n')))),(0,n.yg)("h2",{id:"random-repartitioning"},"Random Repartitioning"),(0,n.yg)("p",null,"Repartitions without data distribution defined."),(0,n.yg)("h3",{id:"random-repartitioning"},"Parameters"),(0,n.yg)("table",null,(0,n.yg)("thead",{parentName:"table"},(0,n.yg)("tr",{parentName:"thead"},(0,n.yg)("th",{parentName:"tr",align:"left"},"Parameter"),(0,n.yg)("th",{parentName:"tr",align:"left"},"Description"),(0,n.yg)("th",{parentName:"tr",align:"left"},"Required"))),(0,n.yg)("tbody",{parentName:"table"},(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},"DataFrame"),(0,n.yg)("td",{parentName:"tr",align:"left"},"Input DataFrame"),(0,n.yg)("td",{parentName:"tr",align:"left"},"True")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},"Number of partitions"),(0,n.yg)("td",{parentName:"tr",align:"left"},"Integer value specifying number of partitions"),(0,n.yg)("td",{parentName:"tr",align:"left"},"True")))),(0,n.yg)("h3",{id:"random-repartitioning"},"Generated Code"),(0,n.yg)(i.A,{mdxType:"Tabs"},(0,n.yg)(l.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-py"},"def randomRepartition(spark: SparkSession, in0: DataFrame) -> DataFrame:\n return in0.repartition(5)\n"))),(0,n.yg)(l.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-scala"},"object randomRepartition {\n\n def apply(spark: SparkSession, in: DataFrame): DataFrame =\n in.repartition(5)\n\n}\n")))),(0,n.yg)("h2",{id:"range-repartitoning"},"Range Repartitoning"),(0,n.yg)("p",null,"Repartitions the data with tuples having keys within the same range on the same worker."),(0,n.yg)("h3",{id:"range-repartitoning"},"Parameters"),(0,n.yg)("table",null,(0,n.yg)("thead",{parentName:"table"},(0,n.yg)("tr",{parentName:"thead"},(0,n.yg)("th",{parentName:"tr",align:null},"Parameter"),(0,n.yg)("th",{parentName:"tr",align:null},"Description"),(0,n.yg)("th",{parentName:"tr",align:null},"Required"))),(0,n.yg)("tbody",{parentName:"table"},(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},"DataFrame"),(0,n.yg)("td",{parentName:"tr",align:null},"Input DataFrame"),(0,n.yg)("td",{parentName:"tr",align:null},"True")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},"Overwrite default partitions"),(0,n.yg)("td",{parentName:"tr",align:null},"Flag to overwrite default partitions"),(0,n.yg)("td",{parentName:"tr",align:null},"False")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},"Number of partitions"),(0,n.yg)("td",{parentName:"tr",align:null},"Integer value specifying number of partitions"),(0,n.yg)("td",{parentName:"tr",align:null},"False")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},"Repartition expression(s) with sorting"),(0,n.yg)("td",{parentName:"tr",align:null},"List of expressions to repartition by with corresponding sorting order"),(0,n.yg)("td",{parentName:"tr",align:null},"True")))),(0,n.yg)("h3",{id:"range-repartitoning"},"Generated Code"),(0,n.yg)(i.A,{mdxType:"Tabs"},(0,n.yg)(l.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-py"},'def RepartitionByRange(spark: SparkSession, in0: DataFrame) -> DataFrame:\n return in0.repartitionByRange(5, col("customer_id").asc())\n'))),(0,n.yg)(l.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-scala"},'object RepartitionByRange {\n\n def apply(spark: SparkSession, in: DataFrame): DataFrame =\n in.repartitionByRange(5, col("customer_id").asc())\n\n}\n')))),(0,n.yg)("h2",{id:"coalesce"},"Coalesce"),(0,n.yg)("p",null,"Reduces the number of partitions without shuffling the dataset."),(0,n.yg)("h3",{id:"coalesce"},"Parameters"),(0,n.yg)("table",null,(0,n.yg)("thead",{parentName:"table"},(0,n.yg)("tr",{parentName:"thead"},(0,n.yg)("th",{parentName:"tr",align:"left"},"Parameter"),(0,n.yg)("th",{parentName:"tr",align:"left"},"Description"),(0,n.yg)("th",{parentName:"tr",align:"left"},"Required"))),(0,n.yg)("tbody",{parentName:"table"},(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},"DataFrame"),(0,n.yg)("td",{parentName:"tr",align:"left"},"Input DataFrame"),(0,n.yg)("td",{parentName:"tr",align:"left"},"True")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},"Number of partitions"),(0,n.yg)("td",{parentName:"tr",align:"left"},"Integer value specifying number of partitions"),(0,n.yg)("td",{parentName:"tr",align:"left"},"True")))),(0,n.yg)("h3",{id:"coalesce"},"Generated Code"),(0,n.yg)(i.A,{mdxType:"Tabs"},(0,n.yg)(l.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-py"},"def Coalesce(spark: SparkSession, in0: DataFrame) -> DataFrame:\n return in0.coalesce(5)\n"))),(0,n.yg)(l.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-scala"},"object Coalesce {\n\n def apply(spark: SparkSession, in: DataFrame): DataFrame =\n in.coalesce(5)\n\n}\n")))),(0,n.yg)("h2",{id:"video-demo"},"Video demo"),(0,n.yg)("div",{class:"wistia_responsive_padding",style:{padding:"56.25% 0 0 0",position:"relative"}},(0,n.yg)("div",{class:"wistia_responsive_wrapper",style:{height:"100%",left:0,position:"absolute",top:0,width:"100%"}},(0,n.yg)("iframe",{src:"https://user-images.githubusercontent.com/103921419/174014498-277e1037-8634-4752-a4f1-e0e1aae66659.mp4",title:"Repartition",allow:"autoplay;fullscreen",allowtransparency:"true",frameborder:"0",scrolling:"no",class:"wistia_embed",name:"wistia_embed",msallowfullscreen:!0,width:"100%",height:"100%"}))))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/5beb85dd.bb97cd1a.js b/assets/js/5beb85dd.bb97cd1a.js new file mode 100644 index 0000000000..5de044a213 --- /dev/null +++ b/assets/js/5beb85dd.bb97cd1a.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[14334],{15680:(e,t,a)=>{a.d(t,{xA:()=>u,yg:()=>d});var r=a(96540);function n(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function i(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,r)}return a}function l(e){for(var t=1;t=0||(n[a]=e[a]);return n}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(n[a]=e[a])}return n}var s=r.createContext({}),p=function(e){var t=r.useContext(s),a=t;return e&&(a="function"==typeof e?e(t):l(l({},t),e)),a},u=function(e){var t=p(e.components);return r.createElement(s.Provider,{value:t},e.children)},g="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},c=r.forwardRef((function(e,t){var a=e.components,n=e.mdxType,i=e.originalType,s=e.parentName,u=o(e,["components","mdxType","originalType","parentName"]),g=p(a),c=n,d=g["".concat(s,".").concat(c)]||g[c]||m[c]||i;return a?r.createElement(d,l(l({ref:t},u),{},{components:a})):r.createElement(d,l({ref:t},u))}));function d(e,t){var a=arguments,n=t&&t.mdxType;if("string"==typeof e||n){var i=a.length,l=new Array(i);l[0]=c;var o={};for(var s in t)hasOwnProperty.call(t,s)&&(o[s]=t[s]);o.originalType=e,o[g]="string"==typeof e?e:n,l[1]=o;for(var p=2;p{a.d(t,{A:()=>l});var r=a(96540),n=a(20053);const i={tabItem:"tabItem_Ymn6"};function l(e){let{children:t,hidden:a,className:l}=e;return r.createElement("div",{role:"tabpanel",className:(0,n.A)(i.tabItem,l),hidden:a},t)}},11470:(e,t,a)=>{a.d(t,{A:()=>w});var r=a(58168),n=a(96540),i=a(20053),l=a(23104),o=a(56347),s=a(57485),p=a(31682),u=a(89466);function g(e){return function(e){return n.Children.map(e,(e=>{if(!e||(0,n.isValidElement)(e)&&function(e){const{props:t}=e;return!!t&&"object"==typeof t&&"value"in t}(e))return e;throw new Error(`Docusaurus error: Bad child <${"string"==typeof e.type?e.type:e.type.name}>: all children of the component should be , and every should have a unique "value" prop.`)}))?.filter(Boolean)??[]}(e).map((e=>{let{props:{value:t,label:a,attributes:r,default:n}}=e;return{value:t,label:a,attributes:r,default:n}}))}function m(e){const{values:t,children:a}=e;return(0,n.useMemo)((()=>{const e=t??g(a);return function(e){const t=(0,p.X)(e,((e,t)=>e.value===t.value));if(t.length>0)throw new Error(`Docusaurus error: Duplicate values "${t.map((e=>e.value)).join(", ")}" found in . Every value needs to be unique.`)}(e),e}),[t,a])}function c(e){let{value:t,tabValues:a}=e;return a.some((e=>e.value===t))}function d(e){let{queryString:t=!1,groupId:a}=e;const r=(0,o.W6)(),i=function(e){let{queryString:t=!1,groupId:a}=e;if("string"==typeof t)return t;if(!1===t)return null;if(!0===t&&!a)throw new Error('Docusaurus error: The component groupId prop is required if queryString=true, because this value is used as the search param name. You can also provide an explicit value such as queryString="my-search-param".');return a??null}({queryString:t,groupId:a});return[(0,s.aZ)(i),(0,n.useCallback)((e=>{if(!i)return;const t=new URLSearchParams(r.location.search);t.set(i,e),r.replace({...r.location,search:t.toString()})}),[i,r])]}function y(e){const{defaultValue:t,queryString:a=!1,groupId:r}=e,i=m(e),[l,o]=(0,n.useState)((()=>function(e){let{defaultValue:t,tabValues:a}=e;if(0===a.length)throw new Error("Docusaurus error: the component requires at least one children component");if(t){if(!c({value:t,tabValues:a}))throw new Error(`Docusaurus error: The has a defaultValue "${t}" but none of its children has the corresponding value. Available values are: ${a.map((e=>e.value)).join(", ")}. If you intend to show no default tab, use defaultValue={null} instead.`);return t}const r=a.find((e=>e.default))??a[0];if(!r)throw new Error("Unexpected error: 0 tabValues");return r.value}({defaultValue:t,tabValues:i}))),[s,p]=d({queryString:a,groupId:r}),[g,y]=function(e){let{groupId:t}=e;const a=function(e){return e?`docusaurus.tab.${e}`:null}(t),[r,i]=(0,u.Dv)(a);return[r,(0,n.useCallback)((e=>{a&&i.set(e)}),[a,i])]}({groupId:r}),b=(()=>{const e=s??g;return c({value:e,tabValues:i})?e:null})();(0,n.useLayoutEffect)((()=>{b&&o(b)}),[b]);return{selectedValue:l,selectValue:(0,n.useCallback)((e=>{if(!c({value:e,tabValues:i}))throw new Error(`Can't select invalid tab value=${e}`);o(e),p(e),y(e)}),[p,y,i]),tabValues:i}}var b=a(92303);const f={tabList:"tabList__CuJ",tabItem:"tabItem_LNqP"};function h(e){let{className:t,block:a,selectedValue:o,selectValue:s,tabValues:p}=e;const u=[],{blockElementScrollPositionUntilNextRender:g}=(0,l.a_)(),m=e=>{const t=e.currentTarget,a=u.indexOf(t),r=p[a].value;r!==o&&(g(t),s(r))},c=e=>{let t=null;switch(e.key){case"Enter":m(e);break;case"ArrowRight":{const a=u.indexOf(e.currentTarget)+1;t=u[a]??u[0];break}case"ArrowLeft":{const a=u.indexOf(e.currentTarget)-1;t=u[a]??u[u.length-1];break}}t?.focus()};return n.createElement("ul",{role:"tablist","aria-orientation":"horizontal",className:(0,i.A)("tabs",{"tabs--block":a},t)},p.map((e=>{let{value:t,label:a,attributes:l}=e;return n.createElement("li",(0,r.A)({role:"tab",tabIndex:o===t?0:-1,"aria-selected":o===t,key:t,ref:e=>u.push(e),onKeyDown:c,onClick:m},l,{className:(0,i.A)("tabs__item",f.tabItem,l?.className,{"tabs__item--active":o===t})}),a??t)})))}function v(e){let{lazy:t,children:a,selectedValue:r}=e;const i=(Array.isArray(a)?a:[a]).filter(Boolean);if(t){const e=i.find((e=>e.props.value===r));return e?(0,n.cloneElement)(e,{className:"margin-top--md"}):null}return n.createElement("div",{className:"margin-top--md"},i.map(((e,t)=>(0,n.cloneElement)(e,{key:t,hidden:e.props.value!==r}))))}function N(e){const t=y(e);return n.createElement("div",{className:(0,i.A)("tabs-container",f.tabList)},n.createElement(h,(0,r.A)({},e,t)),n.createElement(v,(0,r.A)({},e,t)))}function w(e){const t=(0,b.A)();return n.createElement(N,(0,r.A)({key:String(t)},e))}},84886:(e,t,a)=>{a.r(t),a.d(t,{assets:()=>u,contentTitle:()=>s,default:()=>d,frontMatter:()=>o,metadata:()=>p,toc:()=>g});var r=a(58168),n=(a(96540),a(15680)),i=a(11470),l=a(19365);const o={title:"Repartition",id:"Repartition",description:"Repartition or coalesce a DataFrame",sidebar_position:2,tags:["gems","join-split","partition","repartition","coalesce"]},s=void 0,p={unversionedId:"Spark/gems/join-split/Repartition",id:"Spark/gems/join-split/Repartition",title:"Repartition",description:"Repartition or coalesce a DataFrame",source:"@site/docs/Spark/gems/join-split/repartition.md",sourceDirName:"Spark/gems/join-split",slug:"/Spark/gems/join-split/Repartition",permalink:"/Spark/gems/join-split/Repartition",draft:!1,tags:[{label:"gems",permalink:"/tags/gems"},{label:"join-split",permalink:"/tags/join-split"},{label:"partition",permalink:"/tags/partition"},{label:"repartition",permalink:"/tags/repartition"},{label:"coalesce",permalink:"/tags/coalesce"}],version:"current",sidebarPosition:2,frontMatter:{title:"Repartition",id:"Repartition",description:"Repartition or coalesce a DataFrame",sidebar_position:2,tags:["gems","join-split","partition","repartition","coalesce"]},sidebar:"defaultSidebar",previous:{title:"Join",permalink:"/Spark/gems/join-split/join"},next:{title:"RowDistributor",permalink:"/Spark/gems/join-split/row-distributor"}},u={},g=[{value:"Hash Repartitoning",id:"hash-repartitoning",level:2},{value:"Parameters",id:"hash-repartitoning",level:3},{value:"Generated Code",id:"hash-repartitoning",level:3},{value:"Random Repartitioning",id:"random-repartitioning",level:2},{value:"Parameters",id:"random-repartitioning",level:3},{value:"Generated Code",id:"random-repartitioning",level:3},{value:"Range Repartitoning",id:"range-repartitoning",level:2},{value:"Parameters",id:"range-repartitoning",level:3},{value:"Generated Code",id:"range-repartitoning",level:3},{value:"Coalesce",id:"coalesce",level:2},{value:"Parameters",id:"coalesce",level:3},{value:"Generated Code",id:"coalesce",level:3},{value:"Video demo",id:"video-demo",level:2}],m={toc:g},c="wrapper";function d(e){let{components:t,...a}=e;return(0,n.yg)(c,(0,r.A)({},m,a,{components:t,mdxType:"MDXLayout"}),(0,n.yg)("h3",null,(0,n.yg)("span",{class:"badge"},"Spark Gem")),(0,n.yg)("p",null,"This will repartition or coalesce the input DataFrame based on the specified configuration. There are four different repartitioning options:"),(0,n.yg)("h2",{id:"hash-repartitoning"},"Hash Repartitoning"),(0,n.yg)("p",null,"Repartitions the data evenly across various partitions based on the hash value of the specified key."),(0,n.yg)("h3",{id:"hash-repartitoning"},"Parameters"),(0,n.yg)("table",null,(0,n.yg)("thead",{parentName:"table"},(0,n.yg)("tr",{parentName:"thead"},(0,n.yg)("th",{parentName:"tr",align:null},"Parameter"),(0,n.yg)("th",{parentName:"tr",align:null},"Description"),(0,n.yg)("th",{parentName:"tr",align:null},"Required"))),(0,n.yg)("tbody",{parentName:"table"},(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},"DataFrame"),(0,n.yg)("td",{parentName:"tr",align:null},"Input DataFrame"),(0,n.yg)("td",{parentName:"tr",align:null},"True")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},"Overwrite default partitions"),(0,n.yg)("td",{parentName:"tr",align:null},"Flag to overwrite default partitions"),(0,n.yg)("td",{parentName:"tr",align:null},"False")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},"Number of partitions"),(0,n.yg)("td",{parentName:"tr",align:null},"Integer value specifying number of partitions"),(0,n.yg)("td",{parentName:"tr",align:null},"False")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},"Repartition expression(s)"),(0,n.yg)("td",{parentName:"tr",align:null},"List of expressions to repartition by"),(0,n.yg)("td",{parentName:"tr",align:null},"True")))),(0,n.yg)("h3",{id:"hash-repartitoning"},"Generated Code"),(0,n.yg)(i.A,{mdxType:"Tabs"},(0,n.yg)(l.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-py"},'def hashRepartition(spark: SparkSession, in0: DataFrame) -> DataFrame:\n return in0.repartition(5, col("customer_id"))\n'))),(0,n.yg)(l.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-scala"},'object hashRepartition {\n\n def apply(spark: SparkSession, in: DataFrame): DataFrame =\n in.repartition(5, col("customer_id"))\n\n}\n')))),(0,n.yg)("h2",{id:"random-repartitioning"},"Random Repartitioning"),(0,n.yg)("p",null,"Repartitions without data distribution defined."),(0,n.yg)("h3",{id:"random-repartitioning"},"Parameters"),(0,n.yg)("table",null,(0,n.yg)("thead",{parentName:"table"},(0,n.yg)("tr",{parentName:"thead"},(0,n.yg)("th",{parentName:"tr",align:"left"},"Parameter"),(0,n.yg)("th",{parentName:"tr",align:"left"},"Description"),(0,n.yg)("th",{parentName:"tr",align:"left"},"Required"))),(0,n.yg)("tbody",{parentName:"table"},(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},"DataFrame"),(0,n.yg)("td",{parentName:"tr",align:"left"},"Input DataFrame"),(0,n.yg)("td",{parentName:"tr",align:"left"},"True")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},"Number of partitions"),(0,n.yg)("td",{parentName:"tr",align:"left"},"Integer value specifying number of partitions"),(0,n.yg)("td",{parentName:"tr",align:"left"},"True")))),(0,n.yg)("h3",{id:"random-repartitioning"},"Generated Code"),(0,n.yg)(i.A,{mdxType:"Tabs"},(0,n.yg)(l.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-py"},"def randomRepartition(spark: SparkSession, in0: DataFrame) -> DataFrame:\n return in0.repartition(5)\n"))),(0,n.yg)(l.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-scala"},"object randomRepartition {\n\n def apply(spark: SparkSession, in: DataFrame): DataFrame =\n in.repartition(5)\n\n}\n")))),(0,n.yg)("h2",{id:"range-repartitoning"},"Range Repartitoning"),(0,n.yg)("p",null,"Repartitions the data with tuples having keys within the same range on the same worker."),(0,n.yg)("h3",{id:"range-repartitoning"},"Parameters"),(0,n.yg)("table",null,(0,n.yg)("thead",{parentName:"table"},(0,n.yg)("tr",{parentName:"thead"},(0,n.yg)("th",{parentName:"tr",align:null},"Parameter"),(0,n.yg)("th",{parentName:"tr",align:null},"Description"),(0,n.yg)("th",{parentName:"tr",align:null},"Required"))),(0,n.yg)("tbody",{parentName:"table"},(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},"DataFrame"),(0,n.yg)("td",{parentName:"tr",align:null},"Input DataFrame"),(0,n.yg)("td",{parentName:"tr",align:null},"True")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},"Overwrite default partitions"),(0,n.yg)("td",{parentName:"tr",align:null},"Flag to overwrite default partitions"),(0,n.yg)("td",{parentName:"tr",align:null},"False")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},"Number of partitions"),(0,n.yg)("td",{parentName:"tr",align:null},"Integer value specifying number of partitions"),(0,n.yg)("td",{parentName:"tr",align:null},"False")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},"Repartition expression(s) with sorting"),(0,n.yg)("td",{parentName:"tr",align:null},"List of expressions to repartition by with corresponding sorting order"),(0,n.yg)("td",{parentName:"tr",align:null},"True")))),(0,n.yg)("h3",{id:"range-repartitoning"},"Generated Code"),(0,n.yg)(i.A,{mdxType:"Tabs"},(0,n.yg)(l.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-py"},'def RepartitionByRange(spark: SparkSession, in0: DataFrame) -> DataFrame:\n return in0.repartitionByRange(5, col("customer_id").asc())\n'))),(0,n.yg)(l.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-scala"},'object RepartitionByRange {\n\n def apply(spark: SparkSession, in: DataFrame): DataFrame =\n in.repartitionByRange(5, col("customer_id").asc())\n\n}\n')))),(0,n.yg)("h2",{id:"coalesce"},"Coalesce"),(0,n.yg)("p",null,"Reduces the number of partitions without shuffling the dataset."),(0,n.yg)("h3",{id:"coalesce"},"Parameters"),(0,n.yg)("table",null,(0,n.yg)("thead",{parentName:"table"},(0,n.yg)("tr",{parentName:"thead"},(0,n.yg)("th",{parentName:"tr",align:"left"},"Parameter"),(0,n.yg)("th",{parentName:"tr",align:"left"},"Description"),(0,n.yg)("th",{parentName:"tr",align:"left"},"Required"))),(0,n.yg)("tbody",{parentName:"table"},(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},"DataFrame"),(0,n.yg)("td",{parentName:"tr",align:"left"},"Input DataFrame"),(0,n.yg)("td",{parentName:"tr",align:"left"},"True")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},"Number of partitions"),(0,n.yg)("td",{parentName:"tr",align:"left"},"Integer value specifying number of partitions"),(0,n.yg)("td",{parentName:"tr",align:"left"},"True")))),(0,n.yg)("h3",{id:"coalesce"},"Generated Code"),(0,n.yg)(i.A,{mdxType:"Tabs"},(0,n.yg)(l.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-py"},"def Coalesce(spark: SparkSession, in0: DataFrame) -> DataFrame:\n return in0.coalesce(5)\n"))),(0,n.yg)(l.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-scala"},"object Coalesce {\n\n def apply(spark: SparkSession, in: DataFrame): DataFrame =\n in.coalesce(5)\n\n}\n")))),(0,n.yg)("h2",{id:"video-demo"},"Video demo"),(0,n.yg)("div",{class:"wistia_responsive_padding",style:{padding:"56.25% 0 0 0",position:"relative"}},(0,n.yg)("div",{class:"wistia_responsive_wrapper",style:{height:"100%",left:0,position:"absolute",top:0,width:"100%"}},(0,n.yg)("iframe",{src:"https://user-images.githubusercontent.com/103921419/174014498-277e1037-8634-4752-a4f1-e0e1aae66659.mp4",title:"Repartition",allow:"autoplay;fullscreen",allowtransparency:"true",frameborder:"0",scrolling:"no",class:"wistia_embed",name:"wistia_embed",msallowfullscreen:!0,width:"100%",height:"100%"}))))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/61a8b9e9.799b17eb.js b/assets/js/61a8b9e9.799b17eb.js new file mode 100644 index 0000000000..9a87156f9a --- /dev/null +++ b/assets/js/61a8b9e9.799b17eb.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[30878],{15680:(e,t,n)=>{n.d(t,{xA:()=>u,yg:()=>g});var a=n(96540);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function i(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),p=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},u=function(e){var t=p(e.components);return a.createElement(s.Provider,{value:t},e.children)},c="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},m=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,o=e.originalType,s=e.parentName,u=l(e,["components","mdxType","originalType","parentName"]),c=p(n),m=r,g=c["".concat(s,".").concat(m)]||c[m]||d[m]||o;return n?a.createElement(g,i(i({ref:t},u),{},{components:n})):a.createElement(g,i({ref:t},u))}));function g(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var o=n.length,i=new Array(o);i[0]=m;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[c]="string"==typeof e?e:r,i[1]=l;for(var p=2;p{n.d(t,{A:()=>i});var a=n(96540),r=n(20053);const o={tabItem:"tabItem_Ymn6"};function i(e){let{children:t,hidden:n,className:i}=e;return a.createElement("div",{role:"tabpanel",className:(0,r.A)(o.tabItem,i),hidden:n},t)}},11470:(e,t,n)=>{n.d(t,{A:()=>A});var a=n(58168),r=n(96540),o=n(20053),i=n(23104),l=n(56347),s=n(57485),p=n(31682),u=n(89466);function c(e){return function(e){return r.Children.map(e,(e=>{if(!e||(0,r.isValidElement)(e)&&function(e){const{props:t}=e;return!!t&&"object"==typeof t&&"value"in t}(e))return e;throw new Error(`Docusaurus error: Bad child <${"string"==typeof e.type?e.type:e.type.name}>: all children of the component should be , and every should have a unique "value" prop.`)}))?.filter(Boolean)??[]}(e).map((e=>{let{props:{value:t,label:n,attributes:a,default:r}}=e;return{value:t,label:n,attributes:a,default:r}}))}function d(e){const{values:t,children:n}=e;return(0,r.useMemo)((()=>{const e=t??c(n);return function(e){const t=(0,p.X)(e,((e,t)=>e.value===t.value));if(t.length>0)throw new Error(`Docusaurus error: Duplicate values "${t.map((e=>e.value)).join(", ")}" found in . Every value needs to be unique.`)}(e),e}),[t,n])}function m(e){let{value:t,tabValues:n}=e;return n.some((e=>e.value===t))}function g(e){let{queryString:t=!1,groupId:n}=e;const a=(0,l.W6)(),o=function(e){let{queryString:t=!1,groupId:n}=e;if("string"==typeof t)return t;if(!1===t)return null;if(!0===t&&!n)throw new Error('Docusaurus error: The component groupId prop is required if queryString=true, because this value is used as the search param name. You can also provide an explicit value such as queryString="my-search-param".');return n??null}({queryString:t,groupId:n});return[(0,s.aZ)(o),(0,r.useCallback)((e=>{if(!o)return;const t=new URLSearchParams(a.location.search);t.set(o,e),a.replace({...a.location,search:t.toString()})}),[o,a])]}function h(e){const{defaultValue:t,queryString:n=!1,groupId:a}=e,o=d(e),[i,l]=(0,r.useState)((()=>function(e){let{defaultValue:t,tabValues:n}=e;if(0===n.length)throw new Error("Docusaurus error: the component requires at least one children component");if(t){if(!m({value:t,tabValues:n}))throw new Error(`Docusaurus error: The has a defaultValue "${t}" but none of its children has the corresponding value. Available values are: ${n.map((e=>e.value)).join(", ")}. If you intend to show no default tab, use defaultValue={null} instead.`);return t}const a=n.find((e=>e.default))??n[0];if(!a)throw new Error("Unexpected error: 0 tabValues");return a.value}({defaultValue:t,tabValues:o}))),[s,p]=g({queryString:n,groupId:a}),[c,h]=function(e){let{groupId:t}=e;const n=function(e){return e?`docusaurus.tab.${e}`:null}(t),[a,o]=(0,u.Dv)(n);return[a,(0,r.useCallback)((e=>{n&&o.set(e)}),[n,o])]}({groupId:a}),y=(()=>{const e=s??c;return m({value:e,tabValues:o})?e:null})();(0,r.useLayoutEffect)((()=>{y&&l(y)}),[y]);return{selectedValue:i,selectValue:(0,r.useCallback)((e=>{if(!m({value:e,tabValues:o}))throw new Error(`Can't select invalid tab value=${e}`);l(e),p(e),h(e)}),[p,h,o]),tabValues:o}}var y=n(92303);const b={tabList:"tabList__CuJ",tabItem:"tabItem_LNqP"};function f(e){let{className:t,block:n,selectedValue:l,selectValue:s,tabValues:p}=e;const u=[],{blockElementScrollPositionUntilNextRender:c}=(0,i.a_)(),d=e=>{const t=e.currentTarget,n=u.indexOf(t),a=p[n].value;a!==l&&(c(t),s(a))},m=e=>{let t=null;switch(e.key){case"Enter":d(e);break;case"ArrowRight":{const n=u.indexOf(e.currentTarget)+1;t=u[n]??u[0];break}case"ArrowLeft":{const n=u.indexOf(e.currentTarget)-1;t=u[n]??u[u.length-1];break}}t?.focus()};return r.createElement("ul",{role:"tablist","aria-orientation":"horizontal",className:(0,o.A)("tabs",{"tabs--block":n},t)},p.map((e=>{let{value:t,label:n,attributes:i}=e;return r.createElement("li",(0,a.A)({role:"tab",tabIndex:l===t?0:-1,"aria-selected":l===t,key:t,ref:e=>u.push(e),onKeyDown:m,onClick:d},i,{className:(0,o.A)("tabs__item",b.tabItem,i?.className,{"tabs__item--active":l===t})}),n??t)})))}function w(e){let{lazy:t,children:n,selectedValue:a}=e;const o=(Array.isArray(n)?n:[n]).filter(Boolean);if(t){const e=o.find((e=>e.props.value===a));return e?(0,r.cloneElement)(e,{className:"margin-top--md"}):null}return r.createElement("div",{className:"margin-top--md"},o.map(((e,t)=>(0,r.cloneElement)(e,{key:t,hidden:e.props.value!==a}))))}function v(e){const t=h(e);return r.createElement("div",{className:(0,o.A)("tabs-container",b.tabList)},r.createElement(f,(0,a.A)({},e,t)),r.createElement(w,(0,a.A)({},e,t)))}function A(e){const t=(0,y.A)();return r.createElement(v,(0,a.A)({key:String(t)},e))}},82934:(e,t,n)=>{n.r(t),n.d(t,{assets:()=>u,contentTitle:()=>s,default:()=>g,frontMatter:()=>l,metadata:()=>p,toc:()=>c});var a=n(58168),r=(n(96540),n(15680)),o=n(11470),i=n(19365);const l={sidebar_position:2,title:"OpenAI",id:"ml-openai",description:"Request OpenAI to generate a vector embedding or request OpenAI to answer a question with an optional context.",tags:["generative-ai","machine-learning","llm","openai","embedding","vector","answer","question"]},s=void 0,p={unversionedId:"Spark/gems/machine-learning/ml-openai",id:"Spark/gems/machine-learning/ml-openai",title:"OpenAI",description:"Request OpenAI to generate a vector embedding or request OpenAI to answer a question with an optional context.",source:"@site/docs/Spark/gems/machine-learning/ml-openai.md",sourceDirName:"Spark/gems/machine-learning",slug:"/Spark/gems/machine-learning/ml-openai",permalink:"/Spark/gems/machine-learning/ml-openai",draft:!1,tags:[{label:"generative-ai",permalink:"/tags/generative-ai"},{label:"machine-learning",permalink:"/tags/machine-learning"},{label:"llm",permalink:"/tags/llm"},{label:"openai",permalink:"/tags/openai"},{label:"embedding",permalink:"/tags/embedding"},{label:"vector",permalink:"/tags/vector"},{label:"answer",permalink:"/tags/answer"},{label:"question",permalink:"/tags/question"}],version:"current",sidebarPosition:2,frontMatter:{sidebar_position:2,title:"OpenAI",id:"ml-openai",description:"Request OpenAI to generate a vector embedding or request OpenAI to answer a question with an optional context.",tags:["generative-ai","machine-learning","llm","openai","embedding","vector","answer","question"]},sidebar:"defaultSidebar",previous:{title:"TextProcessing",permalink:"/Spark/gems/machine-learning/ml-text-processing"},next:{title:"PineconeLookup",permalink:"/Spark/gems/machine-learning/ml-pinecone-lookup"}},u={},c=[{value:"1. Compute text embeddings",id:"1-compute-text-embeddings",level:3},{value:"1a. Configure",id:"1a-configure",level:4},{value:"1b. Input",id:"1b-input",level:4},{value:"1c. Output",id:"1c-output",level:4},{value:"1d. Generated code",id:"1d-generated-code",level:4},{value:"2. Answer a question with a given context",id:"2-answer-a-question-with-a-given-context",level:3},{value:"2a. Configure",id:"2a-configure",level:4},{value:"2b. Input",id:"2b-input",level:4},{value:"2c. Output",id:"2c-output",level:4},{value:"2d. Generated code",id:"2d-generated-code",level:4},{value:"FAQ",id:"faq",level:3},{value:"Troubleshooting",id:"troubleshooting",level:4},{value:"Can I choose other OpenAI models?",id:"can-i-choose-other-openai-models",level:4}],d={toc:c},m="wrapper";function g(e){let{components:t,...l}=e;return(0,r.yg)(m,(0,a.A)({},d,l,{components:t,mdxType:"MDXLayout"}),(0,r.yg)("h3",null,(0,r.yg)("span",{class:"badge"},"Spark Gem")),(0,r.yg)("p",null,"The OpenAI Gem allows the Prophecy user to interact with the OpenAI API using two different requests:"),(0,r.yg)("ol",null,(0,r.yg)("li",{parentName:"ol"},"Compute text embeddings"),(0,r.yg)("li",{parentName:"ol"},"Answer a question, where the user has the option to provide context")),(0,r.yg)("p",null,"Follow along to learn how to interact with the OpenAI API using Prophecy's easy-to-use interface. For an example set of Pipelines that use these Gems to create a Generative AI Chatbot, see this ",(0,r.yg)("a",{parentName:"p",href:"/getting-started/gen-ai-chatbot"},"guide.")),(0,r.yg)("admonition",{type:"caution"},(0,r.yg)("p",{parentName:"admonition"},"As with all applications that interface with Large Language Models (LLMs), the OpenAI Gem can generate results that are incorrect and/or misleading. The OpenAI Gem is subject to the same ",(0,r.yg)("a",{parentName:"p",href:"https://platform.openai.com/docs/guides/embeddings/limitations-risks"},"limitations and risks")," as those posed by OpenAI itself.")),(0,r.yg)("br",null),(0,r.yg)("div",{class:"wistia_responsive_padding",style:{padding:"56.25% 0 0 0",position:"relative"}},(0,r.yg)("div",{class:"wistia_responsive_wrapper",style:{height:"100%",left:0,position:"absolute",top:0,width:"100%"}},(0,r.yg)("iframe",{src:"https://fast.wistia.net/embed/iframe/i1x7g14wn4?seo=false?videoFoam=true",title:"Getting Started With SQL Video",allow:"autoplay; fullscreen",allowtransparency:"true",frameborder:"0",scrolling:"no",class:"wistia_embed",name:"wistia_embed",msallowfullscreen:!0,width:"100%",height:"100%"}))),(0,r.yg)("script",{src:"https://fast.wistia.net/assets/external/E-v1.js",async:!0}),(0,r.yg)("br",null),(0,r.yg)("h3",{id:"1-compute-text-embeddings"},"1. Compute text embeddings"),(0,r.yg)("p",null,"Given a question input, the OpenAI Gem will return a text embedding by calling the OpenAI ",(0,r.yg)("a",{parentName:"p",href:"https://platform.openai.com/docs/guides/embeddings/how-to-get-embeddings"},"ada-002 model"),". View the input and output from this Gem to understand the data formats and sample."),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Overview of the Gem showing the input and output for computing a text embedding",src:n(49354).A,width:"2376",height:"814"})),(0,r.yg)("h4",{id:"1a-configure"},"1a. Configure"),(0,r.yg)("p",null,"Follow the steps below to configure the OpenAI Gem to compute text embeddings."),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Configure the Gem to compute a text embedding",src:n(10687).A,width:"2880",height:"1640"})),(0,r.yg)("p",null,"Storing the OpenAI API token as a ",(0,r.yg)("strong",{parentName:"p"},"(1) Databricks Secret")," is highly recommended. For instructions click ",(0,r.yg)("a",{parentName:"p",href:"https://docs.databricks.com/en/security/secrets/index.html"},"here.")," Be sure to use the ",(0,r.yg)("strong",{parentName:"p"}," (2) Fabric connection")," to the Databricks workspace which contains the Databricks scope and secrets configured in this Gem. Contact us to understand the integrations with other secret managers."),(0,r.yg)("p",null,"Select the Operation type from the dropdown menu. ",(0,r.yg)("strong",{parentName:"p"},"(3) Compute text embeddings")," operation will send the selected ",(0,r.yg)("strong",{parentName:"p"},"(4) Texts column")," to the OpenAI API. For each entry in the Texts column, OpenAI's ada-002 model will return a text embedding."),(0,r.yg)("p",null,"Instead of sending a single row to OpenAI's API, select the ",(0,r.yg)("strong",{parentName:"p"},"(5) Group data")," option. Group data is a window function, using a window of size 20, ",(0,r.yg)("strong",{parentName:"p"},"(6) ordered by")," the selected column. Using the Group data option influences model performance based on the column selected."),(0,r.yg)("h4",{id:"1b-input"},"1b. Input"),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:null},"Column"),(0,r.yg)("th",{parentName:"tr",align:null},"Description"),(0,r.yg)("th",{parentName:"tr",align:null},"Required"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Question/Text"),(0,r.yg)("td",{parentName:"tr",align:null},"string - a question or text string of interest"),(0,r.yg)("td",{parentName:"tr",align:null},"True")))),(0,r.yg)("h4",{id:"1c-output"},"1c. Output"),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:null},"Column"),(0,r.yg)("th",{parentName:"tr",align:null},"Description"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"openai_embedding"),(0,r.yg)("td",{parentName:"tr",align:null},"array(float) - The vector embedding returned from OpenAI corresponding to the input question/text. Each record is an array of ",(0,r.yg)("inlineCode",{parentName:"td"},"1536")," floating point numbers, such as ",(0,r.yg)("inlineCode",{parentName:"td"},"[-0.0018493991, -0.0059955865, ... -0.02498541]"),".")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"openai_error"),(0,r.yg)("td",{parentName:"tr",align:null},"string - this column is provided to display any error message returned from the OpenAI API; helpful for troubleshooting.")))),(0,r.yg)("h4",{id:"1d-generated-code"},"1d. Generated code"),(0,r.yg)("p",null,"All the visual designs are converted to code and committed to the Prophecy user's Git repository. See below for a sample of the code which calls the OpenAI API to compute text embeddings."),(0,r.yg)(o.A,{mdxType:"Tabs"},(0,r.yg)(i.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-py"},'def vectorize(spark: SparkSession, question_seed: DataFrame) -> DataFrame:\n from spark_ai.llms.openai import OpenAiLLM\n from pyspark.dbutils import DBUtils\n OpenAiLLM(api_key = DBUtils(spark).secrets.get(scope = "", key = ""))\\\n .register_udfs(spark = spark)\n\n return question_seed\\\n .withColumn("_row_num", row_number().over(Window.partitionBy().orderBy(col("input"))))\\\n .withColumn("_group_num", ceil(col("_row_num") / 20))\\\n .withColumn("_data", struct(col("*")))\\\n .groupBy(col("_group_num"))\\\n .agg(collect_list(col("_data")).alias("_data"), collect_list(col("input")).alias("_texts"))\\\n .withColumn("_embedded", expr(f"openai_embed_texts(_texts)"))\\\n .select(\n col("_texts"),\n col("_embedded.embeddings").alias("_embeddings"),\n col("_embedded.error").alias("openai_error"),\n col("_data")\n )\\\n .select(expr("explode_outer(arrays_zip(_embeddings, _data))").alias("_content"), col("openai_error"))\\\n .select(col("_content._embeddings").alias("openai_embedding"), col("openai_error"), col("_content._data.*"))\\\n .drop("_row_num")\\\n .drop("_group_num")\n'))),(0,r.yg)(i.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-scala"},"[Not yet supported]\n")))),(0,r.yg)("h3",{id:"2-answer-a-question-with-a-given-context"},"2. Answer a question with a given context"),(0,r.yg)("p",null,"In addition to computing text embeddings, OpenAI's ada-002 model is also very good at answering questions. The Prophecy interface allows users to input a question (and optionally provide a context) as components of the ",(0,r.yg)("inlineCode",{parentName:"p"},"prompt")," sent to OpenAI. In response, OpenAI's ada-002 model returns an answer(s) to the question. See the input and output data previews before and after the OpenAI Gem to understand the operation."),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Overview of the Gem showing the input and output for answering a question",src:n(96223).A,width:"2376",height:"966"})),(0,r.yg)("h4",{id:"2a-configure"},"2a. Configure"),(0,r.yg)("p",null,"Follow the steps below to configure the OpenAI Gem to answer a question, and to understand how to provide a context if desired."),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Configure the gem to answer a question with a given context",src:n(20600).A,width:"2880",height:"1834"})),(0,r.yg)("p",null,"Storing the OpenAI API token as a ",(0,r.yg)("strong",{parentName:"p"},"(1) Databricks Secret")," is highly recommended. For instructions click ",(0,r.yg)("a",{parentName:"p",href:"https://docs.databricks.com/en/security/secrets/index.html"},"here.")," Be sure to use the ",(0,r.yg)("strong",{parentName:"p"}," (2) Fabric connection")," to the Databricks workspace which contains the Databricks scope and secrets configured in this Gem."),(0,r.yg)("p",null,"Hardcoding the OpenAI credential is not recommended. Selecting this option could send credentials to be stored hardcoded in Git; use only for credentials that should be shared with the world. Contact us to understand the integrations with other secret managers. (",(0,r.yg)("a",{parentName:"p",href:"mailto:contact.us@Prophecy.io"},"contact.us@Prophecy.io"),")"),(0,r.yg)("p",null,"Now it's time to craft a prompt to send to the OpenAI ada-002 model. Select the Operation type from the dropdown menu. The operation ",(0,r.yg)("inlineCode",{parentName:"p"},"Answer questions")," will prompt OpenAI's ada-002 model to answer the provided question using the datasets the model was trained on, which have some ",(0,r.yg)("a",{parentName:"p",href:"https://platform.openai.com/docs/guides/embeddings/blindness-to-recent-events"},"blindness.")," For many users, you'll want to provide some context as part of your prompt. The operation ",(0,r.yg)("strong",{parentName:"p"},"(3) Answer questions for given context")," will likely generate answers more related to the context. Select the input column which has the question of interest as the ",(0,r.yg)("strong",{parentName:"p"},"(4) Question text column"),". To provide context in addition to the question, select ",(0,r.yg)("strong",{parentName:"p"},"(5) Context text column"),". For example, if the question is ",(0,r.yg)("inlineCode",{parentName:"p"},"Does Prophecy support on-premise environments?"),", an appropriate context would be some section of Prophecy's documentation. The ",(0,r.yg)("strong",{parentName:"p"},"(6) context")," and ",(0,r.yg)("strong",{parentName:"p"},"(7) question (query)")," comprise the prompt sent to OpenAI."),(0,r.yg)("h4",{id:"2b-input"},"2b. Input"),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:null},"Column"),(0,r.yg)("th",{parentName:"tr",align:null},"Description"),(0,r.yg)("th",{parentName:"tr",align:null},"Required"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Question"),(0,r.yg)("td",{parentName:"tr",align:null},"string - a question of interest to include in the prompt sent to OpenAI. Example: ",(0,r.yg)("inlineCode",{parentName:"td"},"What is Prophecy's AI Assistant feature?")),(0,r.yg)("td",{parentName:"tr",align:null},"True")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Context"),(0,r.yg)("td",{parentName:"tr",align:null},"string - a text corpus related to the question of interest, also included in the prompt sent to OpenAI. Frequently the context column should undergo data transformations in the Gems preceding the OpenAI Gem. See ",(0,r.yg)("a",{parentName:"td",href:"/getting-started/gen-ai-chatbot"},"this guide")," for a great example of preparing the text corpus and transforming sufficiently to include in a useful prompt."),(0,r.yg)("td",{parentName:"tr",align:null},"False")))),(0,r.yg)("h4",{id:"2c-output"},"2c. Output"),(0,r.yg)("p",null,"Since OpenAI's models are probabalistic, they return at least one, and frequently more than one, answer. These responses are formatted as a json array of answer choices. The user would usually select the best answer from the choices; we recommend selecting the first answer if you wish to select one by default. This can be done in the Gem following the OpenAI Gem as in this ",(0,r.yg)("a",{parentName:"p",href:"/getting-started/gen-ai-chatbot#3a-chatbot-live-pipeline"},"example"),"."),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:null},"Column"),(0,r.yg)("th",{parentName:"tr",align:null},"Description"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"openai_answer"),(0,r.yg)("td",{parentName:"tr",align:null},"struct - this column contains the response from OpenAI in as a json array. Example: ",(0,r.yg)("inlineCode",{parentName:"td"},'{"choices":["Prophecy\'s AI Assistant feature is called Data Copilot."]}')," Select/filter from multiple answer choices in a Gem following the OpenAI Gem.")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"openai_error"),(0,r.yg)("td",{parentName:"tr",align:null},"string - this column is provided to display any error message returned from the OpenAI API; helpful for troubleshooting.")))),(0,r.yg)("h4",{id:"2d-generated-code"},"2d. Generated code"),(0,r.yg)("p",null,"See below for a sample of the code which calls the OpenAI API to answer a question provided some context."),(0,r.yg)(o.A,{mdxType:"Tabs"},(0,r.yg)(i.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-py"},'def OpenAI_1(spark: SparkSession, collect_context: DataFrame) -> DataFrame:\n from spark_ai.llms.openai import OpenAiLLM\n from pyspark.dbutils import DBUtils\n OpenAiLLM(api_key = DBUtils(spark).secrets.get(scope = "[redacted]", key = "[redacted]"))\\\n .register_udfs(spark = spark)\n\n return collect_context\\\n .withColumn("_context", col("context"))\\\n .withColumn("_query", col("input"))\\\n .withColumn(\n "openai_answer",\n expr(\n "openai_answer_question(_context, _query, \\" Answer the question based on the context below.\\nContext:\\n```\\n{context}\\n```\\nQuestion: \\n```\\n{query}\\n```\\nAnswer:\\n \\")"\n )\n )\\\n .drop("_context", "_query")\n'))),(0,r.yg)(i.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-scala"}," [page under construction]\n")))),(0,r.yg)("h3",{id:"faq"},"FAQ"),(0,r.yg)("h4",{id:"troubleshooting"},"Troubleshooting"),(0,r.yg)("p",null,"The output data sample following the OpenAI Gem also contains a column for any error message(s) returned from OpenAI. This handy column surfaces errors including invalid OpenAI credentials, invalid input questions, or problems with data formatting."),(0,r.yg)("h4",{id:"can-i-choose-other-openai-models"},"Can I choose other OpenAI models?"),(0,r.yg)("p",null,"Currently we use ChatGPT 3.5 Turbo. Contact us for additional options: ",(0,r.yg)("a",{parentName:"p",href:"mailto:contact.us@Prophecy.io"},"contact.us@Prophecy.io")))}g.isMDXComponent=!0},20600:(e,t,n)=>{n.d(t,{A:()=>a});const a=n.p+"assets/images/openai-configure-answer-a2f543becfb0237af252fd218f0dc6ee.png"},10687:(e,t,n)=>{n.d(t,{A:()=>a});const a=n.p+"assets/images/openai-configure-embedding-27b293504f71c3e4c49f5377eb4b887e.png"},96223:(e,t,n)=>{n.d(t,{A:()=>a});const a=n.p+"assets/images/openai-intro-answer-question-context-b81d39354874965e789254f90c6fae32.png"},49354:(e,t,n)=>{n.d(t,{A:()=>a});const a=n.p+"assets/images/openai-intro-compute-text-embeddings-31cbbb227cfe0f3c9c39b1bdb698f95d.png"}}]); \ No newline at end of file diff --git a/assets/js/61a8b9e9.df6b2808.js b/assets/js/61a8b9e9.df6b2808.js deleted file mode 100644 index d5b44d38e4..0000000000 --- a/assets/js/61a8b9e9.df6b2808.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[30878],{15680:(e,t,n)=>{n.d(t,{xA:()=>u,yg:()=>g});var a=n(96540);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function i(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),p=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},u=function(e){var t=p(e.components);return a.createElement(s.Provider,{value:t},e.children)},c="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},m=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,o=e.originalType,s=e.parentName,u=l(e,["components","mdxType","originalType","parentName"]),c=p(n),m=r,g=c["".concat(s,".").concat(m)]||c[m]||d[m]||o;return n?a.createElement(g,i(i({ref:t},u),{},{components:n})):a.createElement(g,i({ref:t},u))}));function g(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var o=n.length,i=new Array(o);i[0]=m;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[c]="string"==typeof e?e:r,i[1]=l;for(var p=2;p{n.d(t,{A:()=>i});var a=n(96540),r=n(20053);const o={tabItem:"tabItem_Ymn6"};function i(e){let{children:t,hidden:n,className:i}=e;return a.createElement("div",{role:"tabpanel",className:(0,r.A)(o.tabItem,i),hidden:n},t)}},11470:(e,t,n)=>{n.d(t,{A:()=>A});var a=n(58168),r=n(96540),o=n(20053),i=n(23104),l=n(56347),s=n(57485),p=n(31682),u=n(89466);function c(e){return function(e){return r.Children.map(e,(e=>{if(!e||(0,r.isValidElement)(e)&&function(e){const{props:t}=e;return!!t&&"object"==typeof t&&"value"in t}(e))return e;throw new Error(`Docusaurus error: Bad child <${"string"==typeof e.type?e.type:e.type.name}>: all children of the component should be , and every should have a unique "value" prop.`)}))?.filter(Boolean)??[]}(e).map((e=>{let{props:{value:t,label:n,attributes:a,default:r}}=e;return{value:t,label:n,attributes:a,default:r}}))}function d(e){const{values:t,children:n}=e;return(0,r.useMemo)((()=>{const e=t??c(n);return function(e){const t=(0,p.X)(e,((e,t)=>e.value===t.value));if(t.length>0)throw new Error(`Docusaurus error: Duplicate values "${t.map((e=>e.value)).join(", ")}" found in . Every value needs to be unique.`)}(e),e}),[t,n])}function m(e){let{value:t,tabValues:n}=e;return n.some((e=>e.value===t))}function g(e){let{queryString:t=!1,groupId:n}=e;const a=(0,l.W6)(),o=function(e){let{queryString:t=!1,groupId:n}=e;if("string"==typeof t)return t;if(!1===t)return null;if(!0===t&&!n)throw new Error('Docusaurus error: The component groupId prop is required if queryString=true, because this value is used as the search param name. You can also provide an explicit value such as queryString="my-search-param".');return n??null}({queryString:t,groupId:n});return[(0,s.aZ)(o),(0,r.useCallback)((e=>{if(!o)return;const t=new URLSearchParams(a.location.search);t.set(o,e),a.replace({...a.location,search:t.toString()})}),[o,a])]}function h(e){const{defaultValue:t,queryString:n=!1,groupId:a}=e,o=d(e),[i,l]=(0,r.useState)((()=>function(e){let{defaultValue:t,tabValues:n}=e;if(0===n.length)throw new Error("Docusaurus error: the component requires at least one children component");if(t){if(!m({value:t,tabValues:n}))throw new Error(`Docusaurus error: The has a defaultValue "${t}" but none of its children has the corresponding value. Available values are: ${n.map((e=>e.value)).join(", ")}. If you intend to show no default tab, use defaultValue={null} instead.`);return t}const a=n.find((e=>e.default))??n[0];if(!a)throw new Error("Unexpected error: 0 tabValues");return a.value}({defaultValue:t,tabValues:o}))),[s,p]=g({queryString:n,groupId:a}),[c,h]=function(e){let{groupId:t}=e;const n=function(e){return e?`docusaurus.tab.${e}`:null}(t),[a,o]=(0,u.Dv)(n);return[a,(0,r.useCallback)((e=>{n&&o.set(e)}),[n,o])]}({groupId:a}),y=(()=>{const e=s??c;return m({value:e,tabValues:o})?e:null})();(0,r.useLayoutEffect)((()=>{y&&l(y)}),[y]);return{selectedValue:i,selectValue:(0,r.useCallback)((e=>{if(!m({value:e,tabValues:o}))throw new Error(`Can't select invalid tab value=${e}`);l(e),p(e),h(e)}),[p,h,o]),tabValues:o}}var y=n(92303);const b={tabList:"tabList__CuJ",tabItem:"tabItem_LNqP"};function f(e){let{className:t,block:n,selectedValue:l,selectValue:s,tabValues:p}=e;const u=[],{blockElementScrollPositionUntilNextRender:c}=(0,i.a_)(),d=e=>{const t=e.currentTarget,n=u.indexOf(t),a=p[n].value;a!==l&&(c(t),s(a))},m=e=>{let t=null;switch(e.key){case"Enter":d(e);break;case"ArrowRight":{const n=u.indexOf(e.currentTarget)+1;t=u[n]??u[0];break}case"ArrowLeft":{const n=u.indexOf(e.currentTarget)-1;t=u[n]??u[u.length-1];break}}t?.focus()};return r.createElement("ul",{role:"tablist","aria-orientation":"horizontal",className:(0,o.A)("tabs",{"tabs--block":n},t)},p.map((e=>{let{value:t,label:n,attributes:i}=e;return r.createElement("li",(0,a.A)({role:"tab",tabIndex:l===t?0:-1,"aria-selected":l===t,key:t,ref:e=>u.push(e),onKeyDown:m,onClick:d},i,{className:(0,o.A)("tabs__item",b.tabItem,i?.className,{"tabs__item--active":l===t})}),n??t)})))}function w(e){let{lazy:t,children:n,selectedValue:a}=e;const o=(Array.isArray(n)?n:[n]).filter(Boolean);if(t){const e=o.find((e=>e.props.value===a));return e?(0,r.cloneElement)(e,{className:"margin-top--md"}):null}return r.createElement("div",{className:"margin-top--md"},o.map(((e,t)=>(0,r.cloneElement)(e,{key:t,hidden:e.props.value!==a}))))}function v(e){const t=h(e);return r.createElement("div",{className:(0,o.A)("tabs-container",b.tabList)},r.createElement(f,(0,a.A)({},e,t)),r.createElement(w,(0,a.A)({},e,t)))}function A(e){const t=(0,y.A)();return r.createElement(v,(0,a.A)({key:String(t)},e))}},82934:(e,t,n)=>{n.r(t),n.d(t,{assets:()=>u,contentTitle:()=>s,default:()=>g,frontMatter:()=>l,metadata:()=>p,toc:()=>c});var a=n(58168),r=(n(96540),n(15680)),o=n(11470),i=n(19365);const l={sidebar_position:2,title:"OpenAI",id:"ml-openai",description:"Request OpenAI to generate a vector embedding or request OpenAI to answer a question with an optional context.",tags:["generative-ai","machine-learning","llm","openai","embedding","vector","answer","question"]},s=void 0,p={unversionedId:"Spark/gems/machine-learning/ml-openai",id:"Spark/gems/machine-learning/ml-openai",title:"OpenAI",description:"Request OpenAI to generate a vector embedding or request OpenAI to answer a question with an optional context.",source:"@site/docs/Spark/gems/machine-learning/ml-openai.md",sourceDirName:"Spark/gems/machine-learning",slug:"/Spark/gems/machine-learning/ml-openai",permalink:"/Spark/gems/machine-learning/ml-openai",draft:!1,tags:[{label:"generative-ai",permalink:"/tags/generative-ai"},{label:"machine-learning",permalink:"/tags/machine-learning"},{label:"llm",permalink:"/tags/llm"},{label:"openai",permalink:"/tags/openai"},{label:"embedding",permalink:"/tags/embedding"},{label:"vector",permalink:"/tags/vector"},{label:"answer",permalink:"/tags/answer"},{label:"question",permalink:"/tags/question"}],version:"current",sidebarPosition:2,frontMatter:{sidebar_position:2,title:"OpenAI",id:"ml-openai",description:"Request OpenAI to generate a vector embedding or request OpenAI to answer a question with an optional context.",tags:["generative-ai","machine-learning","llm","openai","embedding","vector","answer","question"]},sidebar:"defaultSidebar",previous:{title:"TextProcessing",permalink:"/Spark/gems/machine-learning/ml-text-processing"},next:{title:"PineconeLookup",permalink:"/Spark/gems/machine-learning/ml-pinecone-lookup"}},u={},c=[{value:"1. Compute text embeddings",id:"1-compute-text-embeddings",level:3},{value:"1a. Configure",id:"1a-configure",level:4},{value:"1b. Input",id:"1b-input",level:4},{value:"1c. Output",id:"1c-output",level:4},{value:"1d. Generated code",id:"1d-generated-code",level:4},{value:"2. Answer a question with a given context",id:"2-answer-a-question-with-a-given-context",level:3},{value:"2a. Configure",id:"2a-configure",level:4},{value:"2b. Input",id:"2b-input",level:4},{value:"2c. Output",id:"2c-output",level:4},{value:"2d. Generated code",id:"2d-generated-code",level:4},{value:"FAQ",id:"faq",level:3},{value:"Troubleshooting",id:"troubleshooting",level:4},{value:"Can I choose other OpenAI models?",id:"can-i-choose-other-openai-models",level:4}],d={toc:c},m="wrapper";function g(e){let{components:t,...l}=e;return(0,r.yg)(m,(0,a.A)({},d,l,{components:t,mdxType:"MDXLayout"}),(0,r.yg)("h3",null,(0,r.yg)("span",{class:"badge rounded-pill text-bg-light"},"Spark Gem")),(0,r.yg)("p",null,"The OpenAI Gem allows the Prophecy user to interact with the OpenAI API using two different requests:"),(0,r.yg)("ol",null,(0,r.yg)("li",{parentName:"ol"},"Compute text embeddings"),(0,r.yg)("li",{parentName:"ol"},"Answer a question, where the user has the option to provide context")),(0,r.yg)("p",null,"Follow along to learn how to interact with the OpenAI API using Prophecy's easy-to-use interface. For an example set of Pipelines that use these Gems to create a Generative AI Chatbot, see this ",(0,r.yg)("a",{parentName:"p",href:"/getting-started/gen-ai-chatbot"},"guide.")),(0,r.yg)("admonition",{type:"caution"},(0,r.yg)("p",{parentName:"admonition"},"As with all applications that interface with Large Language Models (LLMs), the OpenAI Gem can generate results that are incorrect and/or misleading. The OpenAI Gem is subject to the same ",(0,r.yg)("a",{parentName:"p",href:"https://platform.openai.com/docs/guides/embeddings/limitations-risks"},"limitations and risks")," as those posed by OpenAI itself.")),(0,r.yg)("br",null),(0,r.yg)("div",{class:"wistia_responsive_padding",style:{padding:"56.25% 0 0 0",position:"relative"}},(0,r.yg)("div",{class:"wistia_responsive_wrapper",style:{height:"100%",left:0,position:"absolute",top:0,width:"100%"}},(0,r.yg)("iframe",{src:"https://fast.wistia.net/embed/iframe/i1x7g14wn4?seo=false?videoFoam=true",title:"Getting Started With SQL Video",allow:"autoplay; fullscreen",allowtransparency:"true",frameborder:"0",scrolling:"no",class:"wistia_embed",name:"wistia_embed",msallowfullscreen:!0,width:"100%",height:"100%"}))),(0,r.yg)("script",{src:"https://fast.wistia.net/assets/external/E-v1.js",async:!0}),(0,r.yg)("br",null),(0,r.yg)("h3",{id:"1-compute-text-embeddings"},"1. Compute text embeddings"),(0,r.yg)("p",null,"Given a question input, the OpenAI Gem will return a text embedding by calling the OpenAI ",(0,r.yg)("a",{parentName:"p",href:"https://platform.openai.com/docs/guides/embeddings/how-to-get-embeddings"},"ada-002 model"),". View the input and output from this Gem to understand the data formats and sample."),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Overview of the Gem showing the input and output for computing a text embedding",src:n(49354).A,width:"2376",height:"814"})),(0,r.yg)("h4",{id:"1a-configure"},"1a. Configure"),(0,r.yg)("p",null,"Follow the steps below to configure the OpenAI Gem to compute text embeddings."),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Configure the Gem to compute a text embedding",src:n(10687).A,width:"2880",height:"1640"})),(0,r.yg)("p",null,"Storing the OpenAI API token as a ",(0,r.yg)("strong",{parentName:"p"},"(1) Databricks Secret")," is highly recommended. For instructions click ",(0,r.yg)("a",{parentName:"p",href:"https://docs.databricks.com/en/security/secrets/index.html"},"here.")," Be sure to use the ",(0,r.yg)("strong",{parentName:"p"}," (2) Fabric connection")," to the Databricks workspace which contains the Databricks scope and secrets configured in this Gem. Contact us to understand the integrations with other secret managers."),(0,r.yg)("p",null,"Select the Operation type from the dropdown menu. ",(0,r.yg)("strong",{parentName:"p"},"(3) Compute text embeddings")," operation will send the selected ",(0,r.yg)("strong",{parentName:"p"},"(4) Texts column")," to the OpenAI API. For each entry in the Texts column, OpenAI's ada-002 model will return a text embedding."),(0,r.yg)("p",null,"Instead of sending a single row to OpenAI's API, select the ",(0,r.yg)("strong",{parentName:"p"},"(5) Group data")," option. Group data is a window function, using a window of size 20, ",(0,r.yg)("strong",{parentName:"p"},"(6) ordered by")," the selected column. Using the Group data option influences model performance based on the column selected."),(0,r.yg)("h4",{id:"1b-input"},"1b. Input"),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:null},"Column"),(0,r.yg)("th",{parentName:"tr",align:null},"Description"),(0,r.yg)("th",{parentName:"tr",align:null},"Required"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Question/Text"),(0,r.yg)("td",{parentName:"tr",align:null},"string - a question or text string of interest"),(0,r.yg)("td",{parentName:"tr",align:null},"True")))),(0,r.yg)("h4",{id:"1c-output"},"1c. Output"),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:null},"Column"),(0,r.yg)("th",{parentName:"tr",align:null},"Description"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"openai_embedding"),(0,r.yg)("td",{parentName:"tr",align:null},"array(float) - The vector embedding returned from OpenAI corresponding to the input question/text. Each record is an array of ",(0,r.yg)("inlineCode",{parentName:"td"},"1536")," floating point numbers, such as ",(0,r.yg)("inlineCode",{parentName:"td"},"[-0.0018493991, -0.0059955865, ... -0.02498541]"),".")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"openai_error"),(0,r.yg)("td",{parentName:"tr",align:null},"string - this column is provided to display any error message returned from the OpenAI API; helpful for troubleshooting.")))),(0,r.yg)("h4",{id:"1d-generated-code"},"1d. Generated code"),(0,r.yg)("p",null,"All the visual designs are converted to code and committed to the Prophecy user's Git repository. See below for a sample of the code which calls the OpenAI API to compute text embeddings."),(0,r.yg)(o.A,{mdxType:"Tabs"},(0,r.yg)(i.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-py"},'def vectorize(spark: SparkSession, question_seed: DataFrame) -> DataFrame:\n from spark_ai.llms.openai import OpenAiLLM\n from pyspark.dbutils import DBUtils\n OpenAiLLM(api_key = DBUtils(spark).secrets.get(scope = "", key = ""))\\\n .register_udfs(spark = spark)\n\n return question_seed\\\n .withColumn("_row_num", row_number().over(Window.partitionBy().orderBy(col("input"))))\\\n .withColumn("_group_num", ceil(col("_row_num") / 20))\\\n .withColumn("_data", struct(col("*")))\\\n .groupBy(col("_group_num"))\\\n .agg(collect_list(col("_data")).alias("_data"), collect_list(col("input")).alias("_texts"))\\\n .withColumn("_embedded", expr(f"openai_embed_texts(_texts)"))\\\n .select(\n col("_texts"),\n col("_embedded.embeddings").alias("_embeddings"),\n col("_embedded.error").alias("openai_error"),\n col("_data")\n )\\\n .select(expr("explode_outer(arrays_zip(_embeddings, _data))").alias("_content"), col("openai_error"))\\\n .select(col("_content._embeddings").alias("openai_embedding"), col("openai_error"), col("_content._data.*"))\\\n .drop("_row_num")\\\n .drop("_group_num")\n'))),(0,r.yg)(i.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-scala"},"[Not yet supported]\n")))),(0,r.yg)("h3",{id:"2-answer-a-question-with-a-given-context"},"2. Answer a question with a given context"),(0,r.yg)("p",null,"In addition to computing text embeddings, OpenAI's ada-002 model is also very good at answering questions. The Prophecy interface allows users to input a question (and optionally provide a context) as components of the ",(0,r.yg)("inlineCode",{parentName:"p"},"prompt")," sent to OpenAI. In response, OpenAI's ada-002 model returns an answer(s) to the question. See the input and output data previews before and after the OpenAI Gem to understand the operation."),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Overview of the Gem showing the input and output for answering a question",src:n(96223).A,width:"2376",height:"966"})),(0,r.yg)("h4",{id:"2a-configure"},"2a. Configure"),(0,r.yg)("p",null,"Follow the steps below to configure the OpenAI Gem to answer a question, and to understand how to provide a context if desired."),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Configure the gem to answer a question with a given context",src:n(20600).A,width:"2880",height:"1834"})),(0,r.yg)("p",null,"Storing the OpenAI API token as a ",(0,r.yg)("strong",{parentName:"p"},"(1) Databricks Secret")," is highly recommended. For instructions click ",(0,r.yg)("a",{parentName:"p",href:"https://docs.databricks.com/en/security/secrets/index.html"},"here.")," Be sure to use the ",(0,r.yg)("strong",{parentName:"p"}," (2) Fabric connection")," to the Databricks workspace which contains the Databricks scope and secrets configured in this Gem."),(0,r.yg)("p",null,"Hardcoding the OpenAI credential is not recommended. Selecting this option could send credentials to be stored hardcoded in Git; use only for credentials that should be shared with the world. Contact us to understand the integrations with other secret managers. (",(0,r.yg)("a",{parentName:"p",href:"mailto:contact.us@Prophecy.io"},"contact.us@Prophecy.io"),")"),(0,r.yg)("p",null,"Now it's time to craft a prompt to send to the OpenAI ada-002 model. Select the Operation type from the dropdown menu. The operation ",(0,r.yg)("inlineCode",{parentName:"p"},"Answer questions")," will prompt OpenAI's ada-002 model to answer the provided question using the datasets the model was trained on, which have some ",(0,r.yg)("a",{parentName:"p",href:"https://platform.openai.com/docs/guides/embeddings/blindness-to-recent-events"},"blindness.")," For many users, you'll want to provide some context as part of your prompt. The operation ",(0,r.yg)("strong",{parentName:"p"},"(3) Answer questions for given context")," will likely generate answers more related to the context. Select the input column which has the question of interest as the ",(0,r.yg)("strong",{parentName:"p"},"(4) Question text column"),". To provide context in addition to the question, select ",(0,r.yg)("strong",{parentName:"p"},"(5) Context text column"),". For example, if the question is ",(0,r.yg)("inlineCode",{parentName:"p"},"Does Prophecy support on-premise environments?"),", an appropriate context would be some section of Prophecy's documentation. The ",(0,r.yg)("strong",{parentName:"p"},"(6) context")," and ",(0,r.yg)("strong",{parentName:"p"},"(7) question (query)")," comprise the prompt sent to OpenAI."),(0,r.yg)("h4",{id:"2b-input"},"2b. Input"),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:null},"Column"),(0,r.yg)("th",{parentName:"tr",align:null},"Description"),(0,r.yg)("th",{parentName:"tr",align:null},"Required"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Question"),(0,r.yg)("td",{parentName:"tr",align:null},"string - a question of interest to include in the prompt sent to OpenAI. Example: ",(0,r.yg)("inlineCode",{parentName:"td"},"What is Prophecy's AI Assistant feature?")),(0,r.yg)("td",{parentName:"tr",align:null},"True")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Context"),(0,r.yg)("td",{parentName:"tr",align:null},"string - a text corpus related to the question of interest, also included in the prompt sent to OpenAI. Frequently the context column should undergo data transformations in the Gems preceding the OpenAI Gem. See ",(0,r.yg)("a",{parentName:"td",href:"/getting-started/gen-ai-chatbot"},"this guide")," for a great example of preparing the text corpus and transforming sufficiently to include in a useful prompt."),(0,r.yg)("td",{parentName:"tr",align:null},"False")))),(0,r.yg)("h4",{id:"2c-output"},"2c. Output"),(0,r.yg)("p",null,"Since OpenAI's models are probabalistic, they return at least one, and frequently more than one, answer. These responses are formatted as a json array of answer choices. The user would usually select the best answer from the choices; we recommend selecting the first answer if you wish to select one by default. This can be done in the Gem following the OpenAI Gem as in this ",(0,r.yg)("a",{parentName:"p",href:"/getting-started/gen-ai-chatbot#3a-chatbot-live-pipeline"},"example"),"."),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:null},"Column"),(0,r.yg)("th",{parentName:"tr",align:null},"Description"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"openai_answer"),(0,r.yg)("td",{parentName:"tr",align:null},"struct - this column contains the response from OpenAI in as a json array. Example: ",(0,r.yg)("inlineCode",{parentName:"td"},'{"choices":["Prophecy\'s AI Assistant feature is called Data Copilot."]}')," Select/filter from multiple answer choices in a Gem following the OpenAI Gem.")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"openai_error"),(0,r.yg)("td",{parentName:"tr",align:null},"string - this column is provided to display any error message returned from the OpenAI API; helpful for troubleshooting.")))),(0,r.yg)("h4",{id:"2d-generated-code"},"2d. Generated code"),(0,r.yg)("p",null,"See below for a sample of the code which calls the OpenAI API to answer a question provided some context."),(0,r.yg)(o.A,{mdxType:"Tabs"},(0,r.yg)(i.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-py"},'def OpenAI_1(spark: SparkSession, collect_context: DataFrame) -> DataFrame:\n from spark_ai.llms.openai import OpenAiLLM\n from pyspark.dbutils import DBUtils\n OpenAiLLM(api_key = DBUtils(spark).secrets.get(scope = "[redacted]", key = "[redacted]"))\\\n .register_udfs(spark = spark)\n\n return collect_context\\\n .withColumn("_context", col("context"))\\\n .withColumn("_query", col("input"))\\\n .withColumn(\n "openai_answer",\n expr(\n "openai_answer_question(_context, _query, \\" Answer the question based on the context below.\\nContext:\\n```\\n{context}\\n```\\nQuestion: \\n```\\n{query}\\n```\\nAnswer:\\n \\")"\n )\n )\\\n .drop("_context", "_query")\n'))),(0,r.yg)(i.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-scala"}," [page under construction]\n")))),(0,r.yg)("h3",{id:"faq"},"FAQ"),(0,r.yg)("h4",{id:"troubleshooting"},"Troubleshooting"),(0,r.yg)("p",null,"The output data sample following the OpenAI Gem also contains a column for any error message(s) returned from OpenAI. This handy column surfaces errors including invalid OpenAI credentials, invalid input questions, or problems with data formatting."),(0,r.yg)("h4",{id:"can-i-choose-other-openai-models"},"Can I choose other OpenAI models?"),(0,r.yg)("p",null,"Currently we use ChatGPT 3.5 Turbo. Contact us for additional options: ",(0,r.yg)("a",{parentName:"p",href:"mailto:contact.us@Prophecy.io"},"contact.us@Prophecy.io")))}g.isMDXComponent=!0},20600:(e,t,n)=>{n.d(t,{A:()=>a});const a=n.p+"assets/images/openai-configure-answer-a2f543becfb0237af252fd218f0dc6ee.png"},10687:(e,t,n)=>{n.d(t,{A:()=>a});const a=n.p+"assets/images/openai-configure-embedding-27b293504f71c3e4c49f5377eb4b887e.png"},96223:(e,t,n)=>{n.d(t,{A:()=>a});const a=n.p+"assets/images/openai-intro-answer-question-context-b81d39354874965e789254f90c6fae32.png"},49354:(e,t,n)=>{n.d(t,{A:()=>a});const a=n.p+"assets/images/openai-intro-compute-text-embeddings-31cbbb227cfe0f3c9c39b1bdb698f95d.png"}}]); \ No newline at end of file diff --git a/assets/js/6e9ec4f2.50888d14.js b/assets/js/6e9ec4f2.466f344b.js similarity index 64% rename from assets/js/6e9ec4f2.50888d14.js rename to assets/js/6e9ec4f2.466f344b.js index 8528d9809a..1ca1823687 100644 --- a/assets/js/6e9ec4f2.50888d14.js +++ b/assets/js/6e9ec4f2.466f344b.js @@ -1 +1 @@ -"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[7087],{15680:(e,t,n)=>{n.d(t,{xA:()=>u,yg:()=>g});var r=n(96540);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function l(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var i=r.createContext({}),m=function(e){var t=r.useContext(i),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},u=function(e){var t=m(e.components);return r.createElement(i.Provider,{value:t},e.children)},p="mdxType",c={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},d=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,i=e.parentName,u=s(e,["components","mdxType","originalType","parentName"]),p=m(n),d=a,g=p["".concat(i,".").concat(d)]||p[d]||c[d]||o;return n?r.createElement(g,l(l({ref:t},u),{},{components:n})):r.createElement(g,l({ref:t},u))}));function g(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,l=new Array(o);l[0]=d;var s={};for(var i in t)hasOwnProperty.call(t,i)&&(s[i]=t[i]);s.originalType=e,s[p]="string"==typeof e?e:a,l[1]=s;for(var m=2;m{n.r(t),n.d(t,{assets:()=>i,contentTitle:()=>l,default:()=>c,frontMatter:()=>o,metadata:()=>s,toc:()=>m});var r=n(58168),a=(n(96540),n(15680));const o={sidebar_position:10,title:"BulkColumnRename",id:"bulk-column-rename",description:"Rename multiple columns in your Dataset in a systematic way.",tags:["gems","rename","columns"]},l=void 0,s={unversionedId:"Spark/gems/transform/bulk-column-rename",id:"Spark/gems/transform/bulk-column-rename",title:"BulkColumnRename",description:"Rename multiple columns in your Dataset in a systematic way.",source:"@site/docs/Spark/gems/transform/bulk-column-rename.md",sourceDirName:"Spark/gems/transform",slug:"/Spark/gems/transform/bulk-column-rename",permalink:"/Spark/gems/transform/bulk-column-rename",draft:!1,tags:[{label:"gems",permalink:"/tags/gems"},{label:"rename",permalink:"/tags/rename"},{label:"columns",permalink:"/tags/columns"}],version:"current",sidebarPosition:10,frontMatter:{sidebar_position:10,title:"BulkColumnRename",id:"bulk-column-rename",description:"Rename multiple columns in your Dataset in a systematic way.",tags:["gems","rename","columns"]},sidebar:"defaultSidebar",previous:{title:"WindowFunction",permalink:"/Spark/gems/transform/window-function"},next:{title:"BulkColumnExpressions",permalink:"/Spark/gems/transform/bulk-column-expressions"}},i={},m=[{value:"Parameters",id:"parameters",level:2},{value:"Examples",id:"examples",level:2},{value:"Add a prefix",id:"add-a-prefix",level:3},{value:"Use a custom expression",id:"use-a-custom-expression",level:3}],u={toc:m},p="wrapper";function c(e){let{components:t,...o}=e;return(0,a.yg)(p,(0,r.A)({},u,o,{components:t,mdxType:"MDXLayout"}),(0,a.yg)("h3",null,(0,a.yg)("span",{class:"badge rounded-pill text-bg-light"},"Spark Gem")),(0,a.yg)("p",null,"Use the BulkColumnRename Gem to rename multiple columns in your Dataset in a systematic way."),(0,a.yg)("h2",{id:"parameters"},"Parameters"),(0,a.yg)("table",null,(0,a.yg)("thead",{parentName:"table"},(0,a.yg)("tr",{parentName:"thead"},(0,a.yg)("th",{parentName:"tr",align:null},"Parameter"),(0,a.yg)("th",{parentName:"tr",align:null},"Description"))),(0,a.yg)("tbody",{parentName:"table"},(0,a.yg)("tr",{parentName:"tbody"},(0,a.yg)("td",{parentName:"tr",align:null},"Columns to rename"),(0,a.yg)("td",{parentName:"tr",align:null},"Select one or more columns to rename from the dropdown.")),(0,a.yg)("tr",{parentName:"tbody"},(0,a.yg)("td",{parentName:"tr",align:null},"Method"),(0,a.yg)("td",{parentName:"tr",align:null},"Choose to add a prefix, add a suffix, or use a custom expression to change column names.")))),(0,a.yg)("p",null,"Based on the method you select, you will see an option to enter the prefix, suffix, or expression of your choice."),(0,a.yg)("h2",{id:"examples"},"Examples"),(0,a.yg)("h3",{id:"add-a-prefix"},"Add a prefix"),(0,a.yg)("p",null,"One example is to add the prefix ",(0,a.yg)("inlineCode",{parentName:"p"},"meta_")," to tag columns that contain metadata."),(0,a.yg)("p",null,(0,a.yg)("img",{alt:"Add prefix to multiple columns",src:n(28737).A,width:"2620",height:"1508"})),(0,a.yg)("h3",{id:"use-a-custom-expression"},"Use a custom expression"),(0,a.yg)("p",null,"You can accomplish the same or more complex changes using a custom expression like ",(0,a.yg)("inlineCode",{parentName:"p"},"concat('meta_', column_name)"),"."))}c.isMDXComponent=!0},28737:(e,t,n)=>{n.d(t,{A:()=>r});const r=n.p+"assets/images/bulk-add-prefix-04cac34e9823bce1bf40be9486f03131.png"}}]); \ No newline at end of file +"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[7087],{15680:(e,t,n)=>{n.d(t,{xA:()=>u,yg:()=>y});var r=n(96540);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function l(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var i=r.createContext({}),m=function(e){var t=r.useContext(i),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},u=function(e){var t=m(e.components);return r.createElement(i.Provider,{value:t},e.children)},p="mdxType",c={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},d=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,i=e.parentName,u=s(e,["components","mdxType","originalType","parentName"]),p=m(n),d=a,y=p["".concat(i,".").concat(d)]||p[d]||c[d]||o;return n?r.createElement(y,l(l({ref:t},u),{},{components:n})):r.createElement(y,l({ref:t},u))}));function y(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,l=new Array(o);l[0]=d;var s={};for(var i in t)hasOwnProperty.call(t,i)&&(s[i]=t[i]);s.originalType=e,s[p]="string"==typeof e?e:a,l[1]=s;for(var m=2;m{n.r(t),n.d(t,{assets:()=>i,contentTitle:()=>l,default:()=>c,frontMatter:()=>o,metadata:()=>s,toc:()=>m});var r=n(58168),a=(n(96540),n(15680));const o={sidebar_position:10,title:"BulkColumnRename",id:"bulk-column-rename",description:"Rename multiple columns in your Dataset in a systematic way.",tags:["gems","rename","columns"]},l=void 0,s={unversionedId:"Spark/gems/transform/bulk-column-rename",id:"Spark/gems/transform/bulk-column-rename",title:"BulkColumnRename",description:"Rename multiple columns in your Dataset in a systematic way.",source:"@site/docs/Spark/gems/transform/bulk-column-rename.md",sourceDirName:"Spark/gems/transform",slug:"/Spark/gems/transform/bulk-column-rename",permalink:"/Spark/gems/transform/bulk-column-rename",draft:!1,tags:[{label:"gems",permalink:"/tags/gems"},{label:"rename",permalink:"/tags/rename"},{label:"columns",permalink:"/tags/columns"}],version:"current",sidebarPosition:10,frontMatter:{sidebar_position:10,title:"BulkColumnRename",id:"bulk-column-rename",description:"Rename multiple columns in your Dataset in a systematic way.",tags:["gems","rename","columns"]},sidebar:"defaultSidebar",previous:{title:"WindowFunction",permalink:"/Spark/gems/transform/window-function"},next:{title:"BulkColumnExpressions",permalink:"/Spark/gems/transform/bulk-column-expressions"}},i={},m=[{value:"Parameters",id:"parameters",level:2},{value:"Examples",id:"examples",level:2},{value:"Add a prefix",id:"add-a-prefix",level:3},{value:"Use a custom expression",id:"use-a-custom-expression",level:3}],u={toc:m},p="wrapper";function c(e){let{components:t,...o}=e;return(0,a.yg)(p,(0,r.A)({},u,o,{components:t,mdxType:"MDXLayout"}),(0,a.yg)("h3",null,(0,a.yg)("span",{class:"badge"},"Spark Gem")),(0,a.yg)("p",null,"Use the BulkColumnRename Gem to rename multiple columns in your Dataset in a systematic way."),(0,a.yg)("h2",{id:"parameters"},"Parameters"),(0,a.yg)("table",null,(0,a.yg)("thead",{parentName:"table"},(0,a.yg)("tr",{parentName:"thead"},(0,a.yg)("th",{parentName:"tr",align:null},"Parameter"),(0,a.yg)("th",{parentName:"tr",align:null},"Description"))),(0,a.yg)("tbody",{parentName:"table"},(0,a.yg)("tr",{parentName:"tbody"},(0,a.yg)("td",{parentName:"tr",align:null},"Columns to rename"),(0,a.yg)("td",{parentName:"tr",align:null},"Select one or more columns to rename from the dropdown.")),(0,a.yg)("tr",{parentName:"tbody"},(0,a.yg)("td",{parentName:"tr",align:null},"Method"),(0,a.yg)("td",{parentName:"tr",align:null},"Choose to add a prefix, add a suffix, or use a custom expression to change column names.")))),(0,a.yg)("p",null,"Based on the method you select, you will see an option to enter the prefix, suffix, or expression of your choice."),(0,a.yg)("h2",{id:"examples"},"Examples"),(0,a.yg)("h3",{id:"add-a-prefix"},"Add a prefix"),(0,a.yg)("p",null,"One example is to add the prefix ",(0,a.yg)("inlineCode",{parentName:"p"},"meta_")," to tag columns that contain metadata."),(0,a.yg)("p",null,(0,a.yg)("img",{alt:"Add prefix to multiple columns",src:n(28737).A,width:"2620",height:"1508"})),(0,a.yg)("h3",{id:"use-a-custom-expression"},"Use a custom expression"),(0,a.yg)("p",null,"You can accomplish the same or more complex changes using a custom expression like ",(0,a.yg)("inlineCode",{parentName:"p"},"concat('meta_', column_name)"),"."))}c.isMDXComponent=!0},28737:(e,t,n)=>{n.d(t,{A:()=>r});const r=n.p+"assets/images/bulk-add-prefix-04cac34e9823bce1bf40be9486f03131.png"}}]); \ No newline at end of file diff --git a/assets/js/77c4a354.5bb6b04f.js b/assets/js/77c4a354.5bb6b04f.js new file mode 100644 index 0000000000..f46ba997e9 --- /dev/null +++ b/assets/js/77c4a354.5bb6b04f.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[35415],{15680:(e,t,n)=>{n.d(t,{xA:()=>d,yg:()=>g});var a=n(96540);function l(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function i(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(l[n]=e[n]);return l}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(l[n]=e[n])}return l}var r=a.createContext({}),c=function(e){var t=a.useContext(r),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},d=function(e){var t=c(e.components);return a.createElement(r.Provider,{value:t},e.children)},p="mdxType",u={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},m=a.forwardRef((function(e,t){var n=e.components,l=e.mdxType,i=e.originalType,r=e.parentName,d=s(e,["components","mdxType","originalType","parentName"]),p=c(n),m=l,g=p["".concat(r,".").concat(m)]||p[m]||u[m]||i;return n?a.createElement(g,o(o({ref:t},d),{},{components:n})):a.createElement(g,o({ref:t},d))}));function g(e,t){var n=arguments,l=t&&t.mdxType;if("string"==typeof e||l){var i=n.length,o=new Array(i);o[0]=m;var s={};for(var r in t)hasOwnProperty.call(t,r)&&(s[r]=t[r]);s.originalType=e,s[p]="string"==typeof e?e:l,o[1]=s;for(var c=2;c{n.r(t),n.d(t,{assets:()=>r,contentTitle:()=>o,default:()=>u,frontMatter:()=>i,metadata:()=>s,toc:()=>c});var a=n(58168),l=(n(96540),n(15680));const i={title:"Flatten Schema",id:"flattenschema",description:"Flatten nested data",sidebar_position:4,tags:["gems","schema","explode","flatten"]},o=void 0,s={unversionedId:"SQL/gems/transform/flattenschema",id:"SQL/gems/transform/flattenschema",title:"Flatten Schema",description:"Flatten nested data",source:"@site/docs/SQL/gems/transform/flattenschema.md",sourceDirName:"SQL/gems/transform",slug:"/SQL/gems/transform/flattenschema",permalink:"/SQL/gems/transform/flattenschema",draft:!1,tags:[{label:"gems",permalink:"/tags/gems"},{label:"schema",permalink:"/tags/schema"},{label:"explode",permalink:"/tags/explode"},{label:"flatten",permalink:"/tags/flatten"}],version:"current",sidebarPosition:4,frontMatter:{title:"Flatten Schema",id:"flattenschema",description:"Flatten nested data",sidebar_position:4,tags:["gems","schema","explode","flatten"]},sidebar:"defaultSidebar",previous:{title:"Deduplicate",permalink:"/SQL/gems/transform/deduplicate"},next:{title:"Join",permalink:"/SQL/gems/data-joins"}},r={},c=[{value:"The Input",id:"the-input",level:2},{value:"The Expressions",id:"the-expressions",level:2},{value:"The Output",id:"the-output",level:2},{value:"Advanced settings",id:"advanced-settings",level:2}],d={toc:c},p="wrapper";function u(e){let{components:t,...i}=e;return(0,l.yg)(p,(0,a.A)({},d,i,{components:t,mdxType:"MDXLayout"}),(0,l.yg)("h3",null,(0,l.yg)("span",{class:"badge"},"SQL Gem")),(0,l.yg)("p",null,"When processing raw data it can be useful to flatten complex data types like ",(0,l.yg)("inlineCode",{parentName:"p"},"Struct"),"s and ",(0,l.yg)("inlineCode",{parentName:"p"},"Array"),"s into simpler, flatter schemas. This allows you to preserve all schemas, and not just the first one. You can use FlattenSchema with Snowflake Models."),(0,l.yg)("p",null,(0,l.yg)("img",{alt:"The FlattenSchema gem",src:n(52965).A,width:"290",height:"305"})),(0,l.yg)("h2",{id:"the-input"},"The Input"),(0,l.yg)("p",null,"FlattenSchema works on Snowflake sources that have nested columns that you'd like to extract into a flat schema."),(0,l.yg)("p",null,"For example, with an input schema like so:"),(0,l.yg)("p",null,(0,l.yg)("img",{alt:"Input schema",src:n(51960).A,width:"1310",height:"754"})),(0,l.yg)("p",null,"And the data looks like so:"),(0,l.yg)("p",null,(0,l.yg)("img",{alt:"Input data",src:n(64221).A,width:"2620",height:"1537"})),(0,l.yg)("p",null,"We want to extract the ",(0,l.yg)("inlineCode",{parentName:"p"},"contact"),", and all of the columns from the ",(0,l.yg)("inlineCode",{parentName:"p"},"struct"),"s in ",(0,l.yg)("inlineCode",{parentName:"p"},"content")," into a flattened schema."),(0,l.yg)("h2",{id:"the-expressions"},"The Expressions"),(0,l.yg)("p",null,"Having added a ",(0,l.yg)("inlineCode",{parentName:"p"},"FlattenSchema")," Gem to your Model, all you need to do is click the column names you wish to extract and they'll be added to the ",(0,l.yg)("inlineCode",{parentName:"p"},"Expressions")," section."),(0,l.yg)("admonition",{type:"tip"},(0,l.yg)("p",{parentName:"admonition"},"You can click to add all columns, which would make all nested leaf level values of an object visible as columns.")),(0,l.yg)("p",null,"Once added you can change the ",(0,l.yg)("inlineCode",{parentName:"p"},"Output Column")," for a given row to change the name of the Column in the output."),(0,l.yg)("p",null,(0,l.yg)("img",{alt:"Adding expressions",src:n(73533).A,width:"2620",height:"1507"})),(0,l.yg)("h2",{id:"the-output"},"The Output"),(0,l.yg)("p",null,"If we check the ",(0,l.yg)("inlineCode",{parentName:"p"},"Output")," tab in the Gem, you'll see the schema that we've created using the selected columns."),(0,l.yg)("p",null,"And here's what the output data looks like:"),(0,l.yg)("p",null,(0,l.yg)("img",{alt:"Output interim",src:n(41848).A,width:"2620",height:"1507"})),(0,l.yg)("p",null,"The nested contact information has been flatten so that you have individual rows for each content type."),(0,l.yg)("h2",{id:"advanced-settings"},"Advanced settings"),(0,l.yg)("p",null,"If you're familiar with Snowflake's ",(0,l.yg)("inlineCode",{parentName:"p"},"FLATTEN")," table function, you can use the advanced settings to customize the optional column arguments."),(0,l.yg)("p",null,"To use the advanced settings, hover over a column, and click the dropdown arrow."),(0,l.yg)("p",null,(0,l.yg)("img",{alt:"Advanced settings",src:n(12818).A,width:"2620",height:"1507"})),(0,l.yg)("p",null,"You can customize the following options:"),(0,l.yg)("ul",null,(0,l.yg)("li",{parentName:"ul"},"Path to the element: The path to the element within the variant data structure that you want to flatten."),(0,l.yg)("li",{parentName:"ul"},"Flatten all elements recursively: If set to ",(0,l.yg)("inlineCode",{parentName:"li"},"false"),", only the element mentioned in the path is expanded. If set to ",(0,l.yg)("inlineCode",{parentName:"li"},"true"),", all sub-elements are expanded recursively. This is set to false by default."),(0,l.yg)("li",{parentName:"ul"},"Preserve rows with missing fields: If set to ",(0,l.yg)("inlineCode",{parentName:"li"},"false"),", rows with missing fields are omitted from the output. If set to ",(0,l.yg)("inlineCode",{parentName:"li"},"true"),", rows with missing fields are generated with ",(0,l.yg)("inlineCode",{parentName:"li"},"null")," in the key, index, and value columns. This is set to false by default."),(0,l.yg)("li",{parentName:"ul"},"Datatype that needs to be flattened: The data type that you want to flatten. You can choose ",(0,l.yg)("inlineCode",{parentName:"li"},"Object"),", ",(0,l.yg)("inlineCode",{parentName:"li"},"Array"),", or ",(0,l.yg)("inlineCode",{parentName:"li"},"Both"),". This is set to ",(0,l.yg)("inlineCode",{parentName:"li"},"Both")," by default.")))}u.isMDXComponent=!0},73533:(e,t,n)=>{n.d(t,{A:()=>a});const a=n.p+"assets/images/flatten_add_exp-e1b9e410e33edebaf180b873544b1152.png"},12818:(e,t,n)=>{n.d(t,{A:()=>a});const a=n.p+"assets/images/flatten_advanced_settings-5e7381dda09858272e7d1b0c1f5c9d60.png"},52965:(e,t,n)=>{n.d(t,{A:()=>a});const a=n.p+"assets/images/flatten_gem-088dcb90a9e1679a18b6f2497692a93b.png"},51960:(e,t,n)=>{n.d(t,{A:()=>a});const a=n.p+"assets/images/flatten_input-6f94e353ebd52d670b50729aecc0dbb1.png"},64221:(e,t,n)=>{n.d(t,{A:()=>a});const a=n.p+"assets/images/flatten_input_interim-32f3884d132f1fba87579a634b4bf47d.png"},41848:(e,t,n)=>{n.d(t,{A:()=>a});const a=n.p+"assets/images/flatten_output_interim-cf9c2a3d1f4c7528791c2a7243b0aee6.png"}}]); \ No newline at end of file diff --git a/assets/js/77c4a354.7dbd59b6.js b/assets/js/77c4a354.7dbd59b6.js deleted file mode 100644 index 099802a7da..0000000000 --- a/assets/js/77c4a354.7dbd59b6.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[35415],{15680:(e,t,n)=>{n.d(t,{xA:()=>c,yg:()=>g});var a=n(96540);function l(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function i(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(l[n]=e[n]);return l}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(l[n]=e[n])}return l}var r=a.createContext({}),d=function(e){var t=a.useContext(r),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},c=function(e){var t=d(e.components);return a.createElement(r.Provider,{value:t},e.children)},p="mdxType",u={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},m=a.forwardRef((function(e,t){var n=e.components,l=e.mdxType,i=e.originalType,r=e.parentName,c=s(e,["components","mdxType","originalType","parentName"]),p=d(n),m=l,g=p["".concat(r,".").concat(m)]||p[m]||u[m]||i;return n?a.createElement(g,o(o({ref:t},c),{},{components:n})):a.createElement(g,o({ref:t},c))}));function g(e,t){var n=arguments,l=t&&t.mdxType;if("string"==typeof e||l){var i=n.length,o=new Array(i);o[0]=m;var s={};for(var r in t)hasOwnProperty.call(t,r)&&(s[r]=t[r]);s.originalType=e,s[p]="string"==typeof e?e:l,o[1]=s;for(var d=2;d{n.r(t),n.d(t,{assets:()=>r,contentTitle:()=>o,default:()=>u,frontMatter:()=>i,metadata:()=>s,toc:()=>d});var a=n(58168),l=(n(96540),n(15680));const i={title:"Flatten Schema",id:"flattenschema",description:"Flatten nested data",sidebar_position:4,tags:["gems","schema","explode","flatten"]},o=void 0,s={unversionedId:"SQL/gems/transform/flattenschema",id:"SQL/gems/transform/flattenschema",title:"Flatten Schema",description:"Flatten nested data",source:"@site/docs/SQL/gems/transform/flattenschema.md",sourceDirName:"SQL/gems/transform",slug:"/SQL/gems/transform/flattenschema",permalink:"/SQL/gems/transform/flattenschema",draft:!1,tags:[{label:"gems",permalink:"/tags/gems"},{label:"schema",permalink:"/tags/schema"},{label:"explode",permalink:"/tags/explode"},{label:"flatten",permalink:"/tags/flatten"}],version:"current",sidebarPosition:4,frontMatter:{title:"Flatten Schema",id:"flattenschema",description:"Flatten nested data",sidebar_position:4,tags:["gems","schema","explode","flatten"]},sidebar:"defaultSidebar",previous:{title:"Deduplicate",permalink:"/SQL/gems/transform/deduplicate"},next:{title:"Join",permalink:"/SQL/gems/data-joins"}},r={},d=[{value:"The Input",id:"the-input",level:2},{value:"The Expressions",id:"the-expressions",level:2},{value:"The Output",id:"the-output",level:2},{value:"Advanced settings",id:"advanced-settings",level:2}],c={toc:d},p="wrapper";function u(e){let{components:t,...i}=e;return(0,l.yg)(p,(0,a.A)({},c,i,{components:t,mdxType:"MDXLayout"}),(0,l.yg)("h3",null,(0,l.yg)("span",{class:"badge rounded-pill text-bg-light"},"SQL Gem")),(0,l.yg)("p",null,"When processing raw data it can be useful to flatten complex data types like ",(0,l.yg)("inlineCode",{parentName:"p"},"Struct"),"s and ",(0,l.yg)("inlineCode",{parentName:"p"},"Array"),"s into simpler, flatter schemas. This allows you to preserve all schemas, and not just the first one. You can use FlattenSchema with Snowflake Models."),(0,l.yg)("p",null,(0,l.yg)("img",{alt:"The FlattenSchema gem",src:n(52965).A,width:"290",height:"305"})),(0,l.yg)("h2",{id:"the-input"},"The Input"),(0,l.yg)("p",null,"FlattenSchema works on Snowflake sources that have nested columns that you'd like to extract into a flat schema."),(0,l.yg)("p",null,"For example, with an input schema like so:"),(0,l.yg)("p",null,(0,l.yg)("img",{alt:"Input schema",src:n(51960).A,width:"1310",height:"754"})),(0,l.yg)("p",null,"And the data looks like so:"),(0,l.yg)("p",null,(0,l.yg)("img",{alt:"Input data",src:n(64221).A,width:"2620",height:"1537"})),(0,l.yg)("p",null,"We want to extract the ",(0,l.yg)("inlineCode",{parentName:"p"},"contact"),", and all of the columns from the ",(0,l.yg)("inlineCode",{parentName:"p"},"struct"),"s in ",(0,l.yg)("inlineCode",{parentName:"p"},"content")," into a flattened schema."),(0,l.yg)("h2",{id:"the-expressions"},"The Expressions"),(0,l.yg)("p",null,"Having added a ",(0,l.yg)("inlineCode",{parentName:"p"},"FlattenSchema")," Gem to your Model, all you need to do is click the column names you wish to extract and they'll be added to the ",(0,l.yg)("inlineCode",{parentName:"p"},"Expressions")," section."),(0,l.yg)("admonition",{type:"tip"},(0,l.yg)("p",{parentName:"admonition"},"You can click to add all columns, which would make all nested leaf level values of an object visible as columns.")),(0,l.yg)("p",null,"Once added you can change the ",(0,l.yg)("inlineCode",{parentName:"p"},"Output Column")," for a given row to change the name of the Column in the output."),(0,l.yg)("p",null,(0,l.yg)("img",{alt:"Adding expressions",src:n(73533).A,width:"2620",height:"1507"})),(0,l.yg)("h2",{id:"the-output"},"The Output"),(0,l.yg)("p",null,"If we check the ",(0,l.yg)("inlineCode",{parentName:"p"},"Output")," tab in the Gem, you'll see the schema that we've created using the selected columns."),(0,l.yg)("p",null,"And here's what the output data looks like:"),(0,l.yg)("p",null,(0,l.yg)("img",{alt:"Output interim",src:n(41848).A,width:"2620",height:"1507"})),(0,l.yg)("p",null,"The nested contact information has been flatten so that you have individual rows for each content type."),(0,l.yg)("h2",{id:"advanced-settings"},"Advanced settings"),(0,l.yg)("p",null,"If you're familiar with Snowflake's ",(0,l.yg)("inlineCode",{parentName:"p"},"FLATTEN")," table function, you can use the advanced settings to customize the optional column arguments."),(0,l.yg)("p",null,"To use the advanced settings, hover over a column, and click the dropdown arrow."),(0,l.yg)("p",null,(0,l.yg)("img",{alt:"Advanced settings",src:n(12818).A,width:"2620",height:"1507"})),(0,l.yg)("p",null,"You can customize the following options:"),(0,l.yg)("ul",null,(0,l.yg)("li",{parentName:"ul"},"Path to the element: The path to the element within the variant data structure that you want to flatten."),(0,l.yg)("li",{parentName:"ul"},"Flatten all elements recursively: If set to ",(0,l.yg)("inlineCode",{parentName:"li"},"false"),", only the element mentioned in the path is expanded. If set to ",(0,l.yg)("inlineCode",{parentName:"li"},"true"),", all sub-elements are expanded recursively. This is set to false by default."),(0,l.yg)("li",{parentName:"ul"},"Preserve rows with missing fields: If set to ",(0,l.yg)("inlineCode",{parentName:"li"},"false"),", rows with missing fields are omitted from the output. If set to ",(0,l.yg)("inlineCode",{parentName:"li"},"true"),", rows with missing fields are generated with ",(0,l.yg)("inlineCode",{parentName:"li"},"null")," in the key, index, and value columns. This is set to false by default."),(0,l.yg)("li",{parentName:"ul"},"Datatype that needs to be flattened: The data type that you want to flatten. You can choose ",(0,l.yg)("inlineCode",{parentName:"li"},"Object"),", ",(0,l.yg)("inlineCode",{parentName:"li"},"Array"),", or ",(0,l.yg)("inlineCode",{parentName:"li"},"Both"),". This is set to ",(0,l.yg)("inlineCode",{parentName:"li"},"Both")," by default.")))}u.isMDXComponent=!0},73533:(e,t,n)=>{n.d(t,{A:()=>a});const a=n.p+"assets/images/flatten_add_exp-e1b9e410e33edebaf180b873544b1152.png"},12818:(e,t,n)=>{n.d(t,{A:()=>a});const a=n.p+"assets/images/flatten_advanced_settings-5e7381dda09858272e7d1b0c1f5c9d60.png"},52965:(e,t,n)=>{n.d(t,{A:()=>a});const a=n.p+"assets/images/flatten_gem-088dcb90a9e1679a18b6f2497692a93b.png"},51960:(e,t,n)=>{n.d(t,{A:()=>a});const a=n.p+"assets/images/flatten_input-6f94e353ebd52d670b50729aecc0dbb1.png"},64221:(e,t,n)=>{n.d(t,{A:()=>a});const a=n.p+"assets/images/flatten_input_interim-32f3884d132f1fba87579a634b4bf47d.png"},41848:(e,t,n)=>{n.d(t,{A:()=>a});const a=n.p+"assets/images/flatten_output_interim-cf9c2a3d1f4c7528791c2a7243b0aee6.png"}}]); \ No newline at end of file diff --git a/assets/js/809b845a.5682d8ab.js b/assets/js/809b845a.5682d8ab.js deleted file mode 100644 index 231cb011e0..0000000000 --- a/assets/js/809b845a.5682d8ab.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[3232],{15680:(e,t,a)=>{a.d(t,{xA:()=>p,yg:()=>c});var n=a(96540);function r(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function o(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,n)}return a}function l(e){for(var t=1;t=0||(r[a]=e[a]);return r}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(r[a]=e[a])}return r}var m=n.createContext({}),s=function(e){var t=n.useContext(m),a=t;return e&&(a="function"==typeof e?e(t):l(l({},t),e)),a},p=function(e){var t=s(e.components);return n.createElement(m.Provider,{value:t},e.children)},u="mdxType",g={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},d=n.forwardRef((function(e,t){var a=e.components,r=e.mdxType,o=e.originalType,m=e.parentName,p=i(e,["components","mdxType","originalType","parentName"]),u=s(a),d=r,c=u["".concat(m,".").concat(d)]||u[d]||g[d]||o;return a?n.createElement(c,l(l({ref:t},p),{},{components:a})):n.createElement(c,l({ref:t},p))}));function c(e,t){var a=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var o=a.length,l=new Array(o);l[0]=d;var i={};for(var m in t)hasOwnProperty.call(t,m)&&(i[m]=t[m]);i.originalType=e,i[u]="string"==typeof e?e:r,l[1]=i;for(var s=2;s{a.r(t),a.d(t,{assets:()=>m,contentTitle:()=>l,default:()=>g,frontMatter:()=>o,metadata:()=>i,toc:()=>s});var n=a(58168),r=(a(96540),a(15680));const o={title:"Unpivot",id:"unpivot",description:"Use the Unpivot Gem to transform your data from a wide format to a long format.",tags:["gems","unpivot","wideformat","longformat"]},l=void 0,i={unversionedId:"Spark/gems/transform/unpivot",id:"Spark/gems/transform/unpivot",title:"Unpivot",description:"Use the Unpivot Gem to transform your data from a wide format to a long format.",source:"@site/docs/Spark/gems/transform/unpivot.md",sourceDirName:"Spark/gems/transform",slug:"/Spark/gems/transform/unpivot",permalink:"/Spark/gems/transform/unpivot",draft:!1,tags:[{label:"gems",permalink:"/tags/gems"},{label:"unpivot",permalink:"/tags/unpivot"},{label:"wideformat",permalink:"/tags/wideformat"},{label:"longformat",permalink:"/tags/longformat"}],version:"current",frontMatter:{title:"Unpivot",id:"unpivot",description:"Use the Unpivot Gem to transform your data from a wide format to a long format.",tags:["gems","unpivot","wideformat","longformat"]},sidebar:"defaultSidebar",previous:{title:"DynamicSelect",permalink:"/Spark/gems/transform/dynamic-select"},next:{title:"Join & Split",permalink:"/Spark/gems/join-split/"}},m={},s=[{value:"Parameters",id:"parameters",level:2},{value:"Example",id:"example",level:2}],p={toc:s},u="wrapper";function g(e){let{components:t,...o}=e;return(0,r.yg)(u,(0,n.A)({},p,o,{components:t,mdxType:"MDXLayout"}),(0,r.yg)("h3",null,(0,r.yg)("span",{class:"badge rounded-pill text-bg-light"},"Spark Gem")),(0,r.yg)("p",null,"Use the Unpivot Gem to transform your data from a wide format to a long format."),(0,r.yg)("admonition",{type:"note"},(0,r.yg)("p",{parentName:"admonition"},"If you want to pivot the data, rather than unpivot, use the ",(0,r.yg)("a",{parentName:"p",href:"/Spark/gems/transform/aggregate"},"Aggregate")," Gem.")),(0,r.yg)("h2",{id:"parameters"},"Parameters"),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:null},"Parameter"),(0,r.yg)("th",{parentName:"tr",align:null},"Description"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Column(s) to use as identifiers"),(0,r.yg)("td",{parentName:"tr",align:null},"The column(s) that will identify to which group or entity the observation corresponds to.")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Columns to unpivot"),(0,r.yg)("td",{parentName:"tr",align:null},"The columns (wide format) that you would like to transform into a single column (long format).")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Variable column name"),(0,r.yg)("td",{parentName:"tr",align:null},"The name of the column that contains the names of the unpivoted columns. This helps describe the values in the value column.")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Value column name"),(0,r.yg)("td",{parentName:"tr",align:null},"The name of the column that will contain the values from the unpivoted columns.")))),(0,r.yg)("h2",{id:"example"},"Example"),(0,r.yg)("p",null,"Transforming your data into a long format can be beneficial when creating visualizations, comparing variables, handling dynamic data, and more."),(0,r.yg)("p",null,"Let's think about a time series example. If you have product sales data in a wide format, you may want to transform it into a long format before modeling the time series and analyzing the seasonal patterns in sales."),(0,r.yg)("p",null,"The image below shows sample input and output tables for this scenario."),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Wide and long formats of time series data",src:a(68541).A,width:"2812",height:"900"})),(0,r.yg)("p",null,"This table describes how this transformation was achieved:"),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:null},"Parameter"),(0,r.yg)("th",{parentName:"tr",align:null},"Input"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Column(s) to use as identifiers"),(0,r.yg)("td",{parentName:"tr",align:null},"The ",(0,r.yg)("em",{parentName:"td"},"Product")," column is the identifier because it defines which product the sales correspond to.")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Columns to unpivot"),(0,r.yg)("td",{parentName:"tr",align:null},"All of the quarterly sales columns will be unpivoted.")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Variable column name"),(0,r.yg)("td",{parentName:"tr",align:null},"The variable column is named ",(0,r.yg)("em",{parentName:"td"},"Quarter")," because it identifies the sales period.")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Value column name"),(0,r.yg)("td",{parentName:"tr",align:null},"The value column is named ",(0,r.yg)("em",{parentName:"td"},"UnitsSold")," because it contains information about number of units sold.")))))}g.isMDXComponent=!0},68541:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/unpivot-time-series-84c93d408d25ba8cdda39ce7906de127.png"}}]); \ No newline at end of file diff --git a/assets/js/809b845a.697bc25e.js b/assets/js/809b845a.697bc25e.js new file mode 100644 index 0000000000..6a640c31f1 --- /dev/null +++ b/assets/js/809b845a.697bc25e.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[3232],{15680:(e,t,a)=>{a.d(t,{xA:()=>p,yg:()=>c});var n=a(96540);function r(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function o(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,n)}return a}function l(e){for(var t=1;t=0||(r[a]=e[a]);return r}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(r[a]=e[a])}return r}var m=n.createContext({}),s=function(e){var t=n.useContext(m),a=t;return e&&(a="function"==typeof e?e(t):l(l({},t),e)),a},p=function(e){var t=s(e.components);return n.createElement(m.Provider,{value:t},e.children)},u="mdxType",g={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},d=n.forwardRef((function(e,t){var a=e.components,r=e.mdxType,o=e.originalType,m=e.parentName,p=i(e,["components","mdxType","originalType","parentName"]),u=s(a),d=r,c=u["".concat(m,".").concat(d)]||u[d]||g[d]||o;return a?n.createElement(c,l(l({ref:t},p),{},{components:a})):n.createElement(c,l({ref:t},p))}));function c(e,t){var a=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var o=a.length,l=new Array(o);l[0]=d;var i={};for(var m in t)hasOwnProperty.call(t,m)&&(i[m]=t[m]);i.originalType=e,i[u]="string"==typeof e?e:r,l[1]=i;for(var s=2;s{a.r(t),a.d(t,{assets:()=>m,contentTitle:()=>l,default:()=>g,frontMatter:()=>o,metadata:()=>i,toc:()=>s});var n=a(58168),r=(a(96540),a(15680));const o={title:"Unpivot",id:"unpivot",description:"Use the Unpivot Gem to transform your data from a wide format to a long format.",tags:["gems","unpivot","wideformat","longformat"]},l=void 0,i={unversionedId:"Spark/gems/transform/unpivot",id:"Spark/gems/transform/unpivot",title:"Unpivot",description:"Use the Unpivot Gem to transform your data from a wide format to a long format.",source:"@site/docs/Spark/gems/transform/unpivot.md",sourceDirName:"Spark/gems/transform",slug:"/Spark/gems/transform/unpivot",permalink:"/Spark/gems/transform/unpivot",draft:!1,tags:[{label:"gems",permalink:"/tags/gems"},{label:"unpivot",permalink:"/tags/unpivot"},{label:"wideformat",permalink:"/tags/wideformat"},{label:"longformat",permalink:"/tags/longformat"}],version:"current",frontMatter:{title:"Unpivot",id:"unpivot",description:"Use the Unpivot Gem to transform your data from a wide format to a long format.",tags:["gems","unpivot","wideformat","longformat"]},sidebar:"defaultSidebar",previous:{title:"DynamicSelect",permalink:"/Spark/gems/transform/dynamic-select"},next:{title:"Join & Split",permalink:"/Spark/gems/join-split/"}},m={},s=[{value:"Parameters",id:"parameters",level:2},{value:"Example",id:"example",level:2}],p={toc:s},u="wrapper";function g(e){let{components:t,...o}=e;return(0,r.yg)(u,(0,n.A)({},p,o,{components:t,mdxType:"MDXLayout"}),(0,r.yg)("h3",null,(0,r.yg)("span",{class:"badge"},"Spark Gem")),(0,r.yg)("p",null,"Use the Unpivot Gem to transform your data from a wide format to a long format."),(0,r.yg)("admonition",{type:"note"},(0,r.yg)("p",{parentName:"admonition"},"If you want to pivot the data, rather than unpivot, use the ",(0,r.yg)("a",{parentName:"p",href:"/Spark/gems/transform/aggregate"},"Aggregate")," Gem.")),(0,r.yg)("h2",{id:"parameters"},"Parameters"),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:null},"Parameter"),(0,r.yg)("th",{parentName:"tr",align:null},"Description"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Column(s) to use as identifiers"),(0,r.yg)("td",{parentName:"tr",align:null},"The column(s) that will identify to which group or entity the observation corresponds to.")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Columns to unpivot"),(0,r.yg)("td",{parentName:"tr",align:null},"The columns (wide format) that you would like to transform into a single column (long format).")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Variable column name"),(0,r.yg)("td",{parentName:"tr",align:null},"The name of the column that contains the names of the unpivoted columns. This helps describe the values in the value column.")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Value column name"),(0,r.yg)("td",{parentName:"tr",align:null},"The name of the column that will contain the values from the unpivoted columns.")))),(0,r.yg)("h2",{id:"example"},"Example"),(0,r.yg)("p",null,"Transforming your data into a long format can be beneficial when creating visualizations, comparing variables, handling dynamic data, and more."),(0,r.yg)("p",null,"Let's think about a time series example. If you have product sales data in a wide format, you may want to transform it into a long format before modeling the time series and analyzing the seasonal patterns in sales."),(0,r.yg)("p",null,"The image below shows sample input and output tables for this scenario."),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Wide and long formats of time series data",src:a(68541).A,width:"2812",height:"900"})),(0,r.yg)("p",null,"This table describes how this transformation was achieved:"),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:null},"Parameter"),(0,r.yg)("th",{parentName:"tr",align:null},"Input"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Column(s) to use as identifiers"),(0,r.yg)("td",{parentName:"tr",align:null},"The ",(0,r.yg)("em",{parentName:"td"},"Product")," column is the identifier because it defines which product the sales correspond to.")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Columns to unpivot"),(0,r.yg)("td",{parentName:"tr",align:null},"All of the quarterly sales columns will be unpivoted.")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Variable column name"),(0,r.yg)("td",{parentName:"tr",align:null},"The variable column is named ",(0,r.yg)("em",{parentName:"td"},"Quarter")," because it identifies the sales period.")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Value column name"),(0,r.yg)("td",{parentName:"tr",align:null},"The value column is named ",(0,r.yg)("em",{parentName:"td"},"UnitsSold")," because it contains information about number of units sold.")))))}g.isMDXComponent=!0},68541:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/unpivot-time-series-84c93d408d25ba8cdda39ce7906de127.png"}}]); \ No newline at end of file diff --git a/assets/js/8ddf4ff6.80419e01.js b/assets/js/8ddf4ff6.80419e01.js new file mode 100644 index 0000000000..c4c2a26b7d --- /dev/null +++ b/assets/js/8ddf4ff6.80419e01.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[95171],{15680:(e,t,a)=>{a.d(t,{xA:()=>d,yg:()=>c});var n=a(96540);function r(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function l(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,n)}return a}function o(e){for(var t=1;t=0||(r[a]=e[a]);return r}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(r[a]=e[a])}return r}var s=n.createContext({}),m=function(e){var t=n.useContext(s),a=t;return e&&(a="function"==typeof e?e(t):o(o({},t),e)),a},d=function(e){var t=m(e.components);return n.createElement(s.Provider,{value:t},e.children)},p="mdxType",u={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},g=n.forwardRef((function(e,t){var a=e.components,r=e.mdxType,l=e.originalType,s=e.parentName,d=i(e,["components","mdxType","originalType","parentName"]),p=m(a),g=r,c=p["".concat(s,".").concat(g)]||p[g]||u[g]||l;return a?n.createElement(c,o(o({ref:t},d),{},{components:a})):n.createElement(c,o({ref:t},d))}));function c(e,t){var a=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var l=a.length,o=new Array(l);o[0]=g;var i={};for(var s in t)hasOwnProperty.call(t,s)&&(i[s]=t[s]);i.originalType=e,i[p]="string"==typeof e?e:r,o[1]=i;for(var m=2;m{a.d(t,{A:()=>o});var n=a(96540),r=a(20053);const l={tabItem:"tabItem_Ymn6"};function o(e){let{children:t,hidden:a,className:o}=e;return n.createElement("div",{role:"tabpanel",className:(0,r.A)(l.tabItem,o),hidden:a},t)}},11470:(e,t,a)=>{a.d(t,{A:()=>w});var n=a(58168),r=a(96540),l=a(20053),o=a(23104),i=a(56347),s=a(57485),m=a(31682),d=a(89466);function p(e){return function(e){return r.Children.map(e,(e=>{if(!e||(0,r.isValidElement)(e)&&function(e){const{props:t}=e;return!!t&&"object"==typeof t&&"value"in t}(e))return e;throw new Error(`Docusaurus error: Bad child <${"string"==typeof e.type?e.type:e.type.name}>: all children of the component should be , and every should have a unique "value" prop.`)}))?.filter(Boolean)??[]}(e).map((e=>{let{props:{value:t,label:a,attributes:n,default:r}}=e;return{value:t,label:a,attributes:n,default:r}}))}function u(e){const{values:t,children:a}=e;return(0,r.useMemo)((()=>{const e=t??p(a);return function(e){const t=(0,m.X)(e,((e,t)=>e.value===t.value));if(t.length>0)throw new Error(`Docusaurus error: Duplicate values "${t.map((e=>e.value)).join(", ")}" found in . Every value needs to be unique.`)}(e),e}),[t,a])}function g(e){let{value:t,tabValues:a}=e;return a.some((e=>e.value===t))}function c(e){let{queryString:t=!1,groupId:a}=e;const n=(0,i.W6)(),l=function(e){let{queryString:t=!1,groupId:a}=e;if("string"==typeof t)return t;if(!1===t)return null;if(!0===t&&!a)throw new Error('Docusaurus error: The component groupId prop is required if queryString=true, because this value is used as the search param name. You can also provide an explicit value such as queryString="my-search-param".');return a??null}({queryString:t,groupId:a});return[(0,s.aZ)(l),(0,r.useCallback)((e=>{if(!l)return;const t=new URLSearchParams(n.location.search);t.set(l,e),n.replace({...n.location,search:t.toString()})}),[l,n])]}function y(e){const{defaultValue:t,queryString:a=!1,groupId:n}=e,l=u(e),[o,i]=(0,r.useState)((()=>function(e){let{defaultValue:t,tabValues:a}=e;if(0===a.length)throw new Error("Docusaurus error: the component requires at least one children component");if(t){if(!g({value:t,tabValues:a}))throw new Error(`Docusaurus error: The has a defaultValue "${t}" but none of its children has the corresponding value. Available values are: ${a.map((e=>e.value)).join(", ")}. If you intend to show no default tab, use defaultValue={null} instead.`);return t}const n=a.find((e=>e.default))??a[0];if(!n)throw new Error("Unexpected error: 0 tabValues");return n.value}({defaultValue:t,tabValues:l}))),[s,m]=c({queryString:a,groupId:n}),[p,y]=function(e){let{groupId:t}=e;const a=function(e){return e?`docusaurus.tab.${e}`:null}(t),[n,l]=(0,d.Dv)(a);return[n,(0,r.useCallback)((e=>{a&&l.set(e)}),[a,l])]}({groupId:n}),f=(()=>{const e=s??p;return g({value:e,tabValues:l})?e:null})();(0,r.useLayoutEffect)((()=>{f&&i(f)}),[f]);return{selectedValue:o,selectValue:(0,r.useCallback)((e=>{if(!g({value:e,tabValues:l}))throw new Error(`Can't select invalid tab value=${e}`);i(e),m(e),y(e)}),[m,y,l]),tabValues:l}}var f=a(92303);const h={tabList:"tabList__CuJ",tabItem:"tabItem_LNqP"};function b(e){let{className:t,block:a,selectedValue:i,selectValue:s,tabValues:m}=e;const d=[],{blockElementScrollPositionUntilNextRender:p}=(0,o.a_)(),u=e=>{const t=e.currentTarget,a=d.indexOf(t),n=m[a].value;n!==i&&(p(t),s(n))},g=e=>{let t=null;switch(e.key){case"Enter":u(e);break;case"ArrowRight":{const a=d.indexOf(e.currentTarget)+1;t=d[a]??d[0];break}case"ArrowLeft":{const a=d.indexOf(e.currentTarget)-1;t=d[a]??d[d.length-1];break}}t?.focus()};return r.createElement("ul",{role:"tablist","aria-orientation":"horizontal",className:(0,l.A)("tabs",{"tabs--block":a},t)},m.map((e=>{let{value:t,label:a,attributes:o}=e;return r.createElement("li",(0,n.A)({role:"tab",tabIndex:i===t?0:-1,"aria-selected":i===t,key:t,ref:e=>d.push(e),onKeyDown:g,onClick:u},o,{className:(0,l.A)("tabs__item",h.tabItem,o?.className,{"tabs__item--active":i===t})}),a??t)})))}function N(e){let{lazy:t,children:a,selectedValue:n}=e;const l=(Array.isArray(a)?a:[a]).filter(Boolean);if(t){const e=l.find((e=>e.props.value===n));return e?(0,r.cloneElement)(e,{className:"margin-top--md"}):null}return r.createElement("div",{className:"margin-top--md"},l.map(((e,t)=>(0,r.cloneElement)(e,{key:t,hidden:e.props.value!==n}))))}function v(e){const t=y(e);return r.createElement("div",{className:(0,l.A)("tabs-container",h.tabList)},r.createElement(b,(0,n.A)({},e,t)),r.createElement(N,(0,n.A)({},e,t)))}function w(e){const t=(0,f.A)();return r.createElement(v,(0,n.A)({key:String(t)},e))}},45202:(e,t,a)=>{a.r(t),a.d(t,{assets:()=>d,contentTitle:()=>s,default:()=>c,frontMatter:()=>i,metadata:()=>m,toc:()=>p});var n=a(58168),r=(a(96540),a(15680)),l=a(11470),o=a(19365);const i={sidebar_position:5,title:"SchemaTransform",id:"schema-transform",description:"Add, Edit, Rename or Drop Columns",tags:["gems","withColumn","transform","schema"]},s=void 0,m={unversionedId:"Spark/gems/transform/schema-transform",id:"Spark/gems/transform/schema-transform",title:"SchemaTransform",description:"Add, Edit, Rename or Drop Columns",source:"@site/docs/Spark/gems/transform/schema-transform.md",sourceDirName:"Spark/gems/transform",slug:"/Spark/gems/transform/schema-transform",permalink:"/Spark/gems/transform/schema-transform",draft:!1,tags:[{label:"gems",permalink:"/tags/gems"},{label:"withColumn",permalink:"/tags/with-column"},{label:"transform",permalink:"/tags/transform"},{label:"schema",permalink:"/tags/schema"}],version:"current",sidebarPosition:5,frontMatter:{sidebar_position:5,title:"SchemaTransform",id:"schema-transform",description:"Add, Edit, Rename or Drop Columns",tags:["gems","withColumn","transform","schema"]},sidebar:"defaultSidebar",previous:{title:"FlattenSchema",permalink:"/Spark/gems/transform/flatten-schema"},next:{title:"Limit",permalink:"/Spark/gems/transform/limit"}},d={},p=[{value:"Parameters",id:"parameters",level:2},{value:"Operation types",id:"operation-types",level:2},{value:"Example",id:"example",level:2},{value:"Spark Code",id:"spark-code",level:2},{value:"Advanced Import",id:"advanced-import",level:2},{value:"Using Advanced Import",id:"using-advanced-import",level:3},{value:"Format",id:"format",level:3},{value:"Advanced Import Operation types",id:"advanced-import-operation-types",level:3}],u={toc:p},g="wrapper";function c(e){let{components:t,...i}=e;return(0,r.yg)(g,(0,n.A)({},u,i,{components:t,mdxType:"MDXLayout"}),(0,r.yg)("h3",null,(0,r.yg)("span",{class:"badge"},"Spark Gem")),(0,r.yg)("p",null,"SchemaTransform is used to add, edit, rename or drop columns from the incoming DataFrame."),(0,r.yg)("admonition",{type:"info"},(0,r.yg)("p",{parentName:"admonition"},"Unlike Reformat which is a set operation where all the transforms are applied in parallel, transformations here are applied ",(0,r.yg)("em",{parentName:"p"},"in order"),".\nReformat is a SQL ",(0,r.yg)("inlineCode",{parentName:"p"},"select")," and is preferable when making many changes.")),(0,r.yg)("h2",{id:"parameters"},"Parameters"),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:"left"},"Parameter"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Description"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Required"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"DataFrame"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Input DataFrame"),(0,r.yg)("td",{parentName:"tr",align:"left"},"True")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Operation"),(0,r.yg)("td",{parentName:"tr",align:"left"},(0,r.yg)("inlineCode",{parentName:"td"},"Add/Replace Column"),", ",(0,r.yg)("inlineCode",{parentName:"td"},"Rename Column")," and ",(0,r.yg)("inlineCode",{parentName:"td"},"Drop Column")),(0,r.yg)("td",{parentName:"tr",align:"left"},"Required if a transformation is added")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"New Column"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Output column name (when Add/Replace operation is selected)"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Required if ",(0,r.yg)("inlineCode",{parentName:"td"},"Add/Replace Column")," is selected")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Expression"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Expression to generate new column (when Add/Replace operation is selected)"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Required if ",(0,r.yg)("inlineCode",{parentName:"td"},"Add/Replace Column")," is selected")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Old Column Name"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Column to be renamed (when Rename operation is selected)"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Required if ",(0,r.yg)("inlineCode",{parentName:"td"},"Rename Column")," is selected")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"New Column Name"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Output column name (when Rename operation is selected)"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Required if ",(0,r.yg)("inlineCode",{parentName:"td"},"Rename Column")," is selected")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Column to drop"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Column to be dropped (when Drop operation is selected)"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Required if ",(0,r.yg)("inlineCode",{parentName:"td"},"Drop Column")," is selected")))),(0,r.yg)("h2",{id:"operation-types"},"Operation types"),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:null},"Operation Type"),(0,r.yg)("th",{parentName:"tr",align:null},"Description"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Add/Replace"),(0,r.yg)("td",{parentName:"tr",align:null},"Add a new column or replace an existing one based on an expression")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Drop"),(0,r.yg)("td",{parentName:"tr",align:null},"Removes a single column from the next stages of the pipeline. This is useful if you need 9 out of 10 columns, for example.")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Rename"),(0,r.yg)("td",{parentName:"tr",align:null},"Renames an existing column")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Add if Missing"),(0,r.yg)("td",{parentName:"tr",align:null},"Provide a default value for a column if it's missing from the source. For example, if reading from a CSV file daily and want to ensure a column has a value even if it's not in the source files use this option.")))),(0,r.yg)("h2",{id:"example"},"Example"),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Example usage of SchemaTransform",src:a(34994).A,width:"940",height:"365"})),(0,r.yg)("h2",{id:"spark-code"},"Spark Code"),(0,r.yg)(l.A,{mdxType:"Tabs"},(0,r.yg)(o.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-py"},'def transform(spark: SparkSession, in0: DataFrame) -> DataFrame:\n return in0\\\n .withColumn("business_date", to_date(lit("2022-05-05"), "yyyy-MM-dd"))\\\n .withColumnRenamed("bonus_rate", "bonus")\\\n .drop("slug")\n\n'))),(0,r.yg)(o.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-scala"},'object transform {\n def apply(spark: SparkSession, in: DataFrame): DataFrame =\n in.withColumn("business_date", to_date(lit("2022-05-05"), "yyyy-MM-dd"))\n .withColumnRenamed("bonus_rate", "bonus")\n .drop("slug")\n}\n')))),(0,r.yg)("h2",{id:"advanced-import"},"Advanced Import"),(0,r.yg)("p",null,"The Advanced Import feature allows you to bulk import statements that are structured similarly to CSV/TSV files. This can be useful if you have your expressions/transformation logic in another format and just want to quickly configure a SchemaTransform Gem based on existing logic."),(0,r.yg)("h3",{id:"using-advanced-import"},"Using Advanced Import"),(0,r.yg)("ol",null,(0,r.yg)("li",{parentName:"ol"},"Click the ",(0,r.yg)("strong",{parentName:"li"},"Advanced")," button in the SchemaTransform Gem UI")),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Advanced import toggle",src:a(91092).A,width:"1418",height:"603"})),(0,r.yg)("ol",{start:2},(0,r.yg)("li",{parentName:"ol"},"Enter the expressions into the text area using the format as described below:")),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Advanced import mode",src:a(96207).A,width:"1433",height:"255"})),(0,r.yg)("ol",{start:3},(0,r.yg)("li",{parentName:"ol"},"Use the button at the top (labeled ",(0,r.yg)("strong",{parentName:"li"},"Expressions"),") to switch back to the expressions view. This will translate the expressions from the CSV format to the table format and will show any errors detected.")),(0,r.yg)("h3",{id:"format"},"Format"),(0,r.yg)("p",null,"The format of these expressions is ",(0,r.yg)("inlineCode",{parentName:"p"},"op_type,target_name,target_expr"),", where ",(0,r.yg)("inlineCode",{parentName:"p"},"op_type")," is the type of operation (see below); ",(0,r.yg)("inlineCode",{parentName:"p"},"target_name")," is the desired new column name and ",(0,r.yg)("inlineCode",{parentName:"p"},"target_expr")," is the Spark expression that will be used to generate the new column. Each ",(0,r.yg)("inlineCode",{parentName:"p"},"op_type")," has a different number of extra columns that have to be provided, see below for more details."),(0,r.yg)("admonition",{type:"caution"},(0,r.yg)("p",{parentName:"admonition"},"For ",(0,r.yg)("inlineCode",{parentName:"p"},"target_expr")," values that contain a comma ",(0,r.yg)("inlineCode",{parentName:"p"},",")," or span multiple lines, you must surround them by ",(0,r.yg)("inlineCode",{parentName:"p"},"``")," on either side. For example:"),(0,r.yg)("pre",{parentName:"admonition"},(0,r.yg)("code",{parentName:"pre"},"addrep,customer_id,customer_id\naddrep,full_name,``concat(first_name, ' ', last_name)``\n"))),(0,r.yg)("h3",{id:"advanced-import-operation-types"},"Advanced Import Operation types"),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:null},"Operation Type"),(0,r.yg)("th",{parentName:"tr",align:null},"Advanced Import name"),(0,r.yg)("th",{parentName:"tr",align:null},":Arguments:"),(0,r.yg)("th",{parentName:"tr",align:null},"Example"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Add/Replace"),(0,r.yg)("td",{parentName:"tr",align:null},(0,r.yg)("inlineCode",{parentName:"td"},"addrep")),(0,r.yg)("td",{parentName:"tr",align:null},"2"),(0,r.yg)("td",{parentName:"tr",align:null},(0,r.yg)("inlineCode",{parentName:"td"},"addrep,foo,CAST(NULL as int)"))),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Drop"),(0,r.yg)("td",{parentName:"tr",align:null},(0,r.yg)("inlineCode",{parentName:"td"},"drop")),(0,r.yg)("td",{parentName:"tr",align:null},"1"),(0,r.yg)("td",{parentName:"tr",align:null},(0,r.yg)("inlineCode",{parentName:"td"},"drop bar"))),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Rename"),(0,r.yg)("td",{parentName:"tr",align:null},(0,r.yg)("inlineCode",{parentName:"td"},"rename")),(0,r.yg)("td",{parentName:"tr",align:null},"2"),(0,r.yg)("td",{parentName:"tr",align:null},(0,r.yg)("inlineCode",{parentName:"td"},"rename,foo,bar"))),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Add if missing"),(0,r.yg)("td",{parentName:"tr",align:null},(0,r.yg)("inlineCode",{parentName:"td"},"missing")),(0,r.yg)("td",{parentName:"tr",align:null},"2"),(0,r.yg)("td",{parentName:"tr",align:null},(0,r.yg)("inlineCode",{parentName:"td"},"missing,foo,current_timestamp()"))))))}c.isMDXComponent=!0},34994:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/schemaTransform_eg_1-c37ea99ca92e266556a154dd1bc961ad.png"},91092:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/schematransform_advanced_1-fd79241dd8f49aaf0887ea382dadc2a9.png"},96207:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/schematransform_advanced_2-a4cbe3cff9433842b21d5e17509a3939.png"}}]); \ No newline at end of file diff --git a/assets/js/8ddf4ff6.a1ff605e.js b/assets/js/8ddf4ff6.a1ff605e.js deleted file mode 100644 index 72491d7397..0000000000 --- a/assets/js/8ddf4ff6.a1ff605e.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[95171],{15680:(e,t,a)=>{a.d(t,{xA:()=>d,yg:()=>c});var n=a(96540);function r(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function l(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,n)}return a}function o(e){for(var t=1;t=0||(r[a]=e[a]);return r}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(r[a]=e[a])}return r}var s=n.createContext({}),m=function(e){var t=n.useContext(s),a=t;return e&&(a="function"==typeof e?e(t):o(o({},t),e)),a},d=function(e){var t=m(e.components);return n.createElement(s.Provider,{value:t},e.children)},p="mdxType",u={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},g=n.forwardRef((function(e,t){var a=e.components,r=e.mdxType,l=e.originalType,s=e.parentName,d=i(e,["components","mdxType","originalType","parentName"]),p=m(a),g=r,c=p["".concat(s,".").concat(g)]||p[g]||u[g]||l;return a?n.createElement(c,o(o({ref:t},d),{},{components:a})):n.createElement(c,o({ref:t},d))}));function c(e,t){var a=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var l=a.length,o=new Array(l);o[0]=g;var i={};for(var s in t)hasOwnProperty.call(t,s)&&(i[s]=t[s]);i.originalType=e,i[p]="string"==typeof e?e:r,o[1]=i;for(var m=2;m{a.d(t,{A:()=>o});var n=a(96540),r=a(20053);const l={tabItem:"tabItem_Ymn6"};function o(e){let{children:t,hidden:a,className:o}=e;return n.createElement("div",{role:"tabpanel",className:(0,r.A)(l.tabItem,o),hidden:a},t)}},11470:(e,t,a)=>{a.d(t,{A:()=>w});var n=a(58168),r=a(96540),l=a(20053),o=a(23104),i=a(56347),s=a(57485),m=a(31682),d=a(89466);function p(e){return function(e){return r.Children.map(e,(e=>{if(!e||(0,r.isValidElement)(e)&&function(e){const{props:t}=e;return!!t&&"object"==typeof t&&"value"in t}(e))return e;throw new Error(`Docusaurus error: Bad child <${"string"==typeof e.type?e.type:e.type.name}>: all children of the component should be , and every should have a unique "value" prop.`)}))?.filter(Boolean)??[]}(e).map((e=>{let{props:{value:t,label:a,attributes:n,default:r}}=e;return{value:t,label:a,attributes:n,default:r}}))}function u(e){const{values:t,children:a}=e;return(0,r.useMemo)((()=>{const e=t??p(a);return function(e){const t=(0,m.X)(e,((e,t)=>e.value===t.value));if(t.length>0)throw new Error(`Docusaurus error: Duplicate values "${t.map((e=>e.value)).join(", ")}" found in . Every value needs to be unique.`)}(e),e}),[t,a])}function g(e){let{value:t,tabValues:a}=e;return a.some((e=>e.value===t))}function c(e){let{queryString:t=!1,groupId:a}=e;const n=(0,i.W6)(),l=function(e){let{queryString:t=!1,groupId:a}=e;if("string"==typeof t)return t;if(!1===t)return null;if(!0===t&&!a)throw new Error('Docusaurus error: The component groupId prop is required if queryString=true, because this value is used as the search param name. You can also provide an explicit value such as queryString="my-search-param".');return a??null}({queryString:t,groupId:a});return[(0,s.aZ)(l),(0,r.useCallback)((e=>{if(!l)return;const t=new URLSearchParams(n.location.search);t.set(l,e),n.replace({...n.location,search:t.toString()})}),[l,n])]}function y(e){const{defaultValue:t,queryString:a=!1,groupId:n}=e,l=u(e),[o,i]=(0,r.useState)((()=>function(e){let{defaultValue:t,tabValues:a}=e;if(0===a.length)throw new Error("Docusaurus error: the component requires at least one children component");if(t){if(!g({value:t,tabValues:a}))throw new Error(`Docusaurus error: The has a defaultValue "${t}" but none of its children has the corresponding value. Available values are: ${a.map((e=>e.value)).join(", ")}. If you intend to show no default tab, use defaultValue={null} instead.`);return t}const n=a.find((e=>e.default))??a[0];if(!n)throw new Error("Unexpected error: 0 tabValues");return n.value}({defaultValue:t,tabValues:l}))),[s,m]=c({queryString:a,groupId:n}),[p,y]=function(e){let{groupId:t}=e;const a=function(e){return e?`docusaurus.tab.${e}`:null}(t),[n,l]=(0,d.Dv)(a);return[n,(0,r.useCallback)((e=>{a&&l.set(e)}),[a,l])]}({groupId:n}),f=(()=>{const e=s??p;return g({value:e,tabValues:l})?e:null})();(0,r.useLayoutEffect)((()=>{f&&i(f)}),[f]);return{selectedValue:o,selectValue:(0,r.useCallback)((e=>{if(!g({value:e,tabValues:l}))throw new Error(`Can't select invalid tab value=${e}`);i(e),m(e),y(e)}),[m,y,l]),tabValues:l}}var f=a(92303);const h={tabList:"tabList__CuJ",tabItem:"tabItem_LNqP"};function b(e){let{className:t,block:a,selectedValue:i,selectValue:s,tabValues:m}=e;const d=[],{blockElementScrollPositionUntilNextRender:p}=(0,o.a_)(),u=e=>{const t=e.currentTarget,a=d.indexOf(t),n=m[a].value;n!==i&&(p(t),s(n))},g=e=>{let t=null;switch(e.key){case"Enter":u(e);break;case"ArrowRight":{const a=d.indexOf(e.currentTarget)+1;t=d[a]??d[0];break}case"ArrowLeft":{const a=d.indexOf(e.currentTarget)-1;t=d[a]??d[d.length-1];break}}t?.focus()};return r.createElement("ul",{role:"tablist","aria-orientation":"horizontal",className:(0,l.A)("tabs",{"tabs--block":a},t)},m.map((e=>{let{value:t,label:a,attributes:o}=e;return r.createElement("li",(0,n.A)({role:"tab",tabIndex:i===t?0:-1,"aria-selected":i===t,key:t,ref:e=>d.push(e),onKeyDown:g,onClick:u},o,{className:(0,l.A)("tabs__item",h.tabItem,o?.className,{"tabs__item--active":i===t})}),a??t)})))}function N(e){let{lazy:t,children:a,selectedValue:n}=e;const l=(Array.isArray(a)?a:[a]).filter(Boolean);if(t){const e=l.find((e=>e.props.value===n));return e?(0,r.cloneElement)(e,{className:"margin-top--md"}):null}return r.createElement("div",{className:"margin-top--md"},l.map(((e,t)=>(0,r.cloneElement)(e,{key:t,hidden:e.props.value!==n}))))}function v(e){const t=y(e);return r.createElement("div",{className:(0,l.A)("tabs-container",h.tabList)},r.createElement(b,(0,n.A)({},e,t)),r.createElement(N,(0,n.A)({},e,t)))}function w(e){const t=(0,f.A)();return r.createElement(v,(0,n.A)({key:String(t)},e))}},45202:(e,t,a)=>{a.r(t),a.d(t,{assets:()=>d,contentTitle:()=>s,default:()=>c,frontMatter:()=>i,metadata:()=>m,toc:()=>p});var n=a(58168),r=(a(96540),a(15680)),l=a(11470),o=a(19365);const i={sidebar_position:5,title:"SchemaTransform",id:"schema-transform",description:"Add, Edit, Rename or Drop Columns",tags:["gems","withColumn","transform","schema"]},s=void 0,m={unversionedId:"Spark/gems/transform/schema-transform",id:"Spark/gems/transform/schema-transform",title:"SchemaTransform",description:"Add, Edit, Rename or Drop Columns",source:"@site/docs/Spark/gems/transform/schema-transform.md",sourceDirName:"Spark/gems/transform",slug:"/Spark/gems/transform/schema-transform",permalink:"/Spark/gems/transform/schema-transform",draft:!1,tags:[{label:"gems",permalink:"/tags/gems"},{label:"withColumn",permalink:"/tags/with-column"},{label:"transform",permalink:"/tags/transform"},{label:"schema",permalink:"/tags/schema"}],version:"current",sidebarPosition:5,frontMatter:{sidebar_position:5,title:"SchemaTransform",id:"schema-transform",description:"Add, Edit, Rename or Drop Columns",tags:["gems","withColumn","transform","schema"]},sidebar:"defaultSidebar",previous:{title:"FlattenSchema",permalink:"/Spark/gems/transform/flatten-schema"},next:{title:"Limit",permalink:"/Spark/gems/transform/limit"}},d={},p=[{value:"Parameters",id:"parameters",level:2},{value:"Operation types",id:"operation-types",level:2},{value:"Example",id:"example",level:2},{value:"Spark Code",id:"spark-code",level:2},{value:"Advanced Import",id:"advanced-import",level:2},{value:"Using Advanced Import",id:"using-advanced-import",level:3},{value:"Format",id:"format",level:3},{value:"Advanced Import Operation types",id:"advanced-import-operation-types",level:3}],u={toc:p},g="wrapper";function c(e){let{components:t,...i}=e;return(0,r.yg)(g,(0,n.A)({},u,i,{components:t,mdxType:"MDXLayout"}),(0,r.yg)("h3",null,(0,r.yg)("span",{class:"badge rounded-pill text-bg-light"},"Spark Gem")),(0,r.yg)("p",null,"SchemaTransform is used to add, edit, rename or drop columns from the incoming DataFrame."),(0,r.yg)("admonition",{type:"info"},(0,r.yg)("p",{parentName:"admonition"},"Unlike Reformat which is a set operation where all the transforms are applied in parallel, transformations here are applied ",(0,r.yg)("em",{parentName:"p"},"in order"),".\nReformat is a SQL ",(0,r.yg)("inlineCode",{parentName:"p"},"select")," and is preferable when making many changes.")),(0,r.yg)("h2",{id:"parameters"},"Parameters"),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:"left"},"Parameter"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Description"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Required"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"DataFrame"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Input DataFrame"),(0,r.yg)("td",{parentName:"tr",align:"left"},"True")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Operation"),(0,r.yg)("td",{parentName:"tr",align:"left"},(0,r.yg)("inlineCode",{parentName:"td"},"Add/Replace Column"),", ",(0,r.yg)("inlineCode",{parentName:"td"},"Rename Column")," and ",(0,r.yg)("inlineCode",{parentName:"td"},"Drop Column")),(0,r.yg)("td",{parentName:"tr",align:"left"},"Required if a transformation is added")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"New Column"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Output column name (when Add/Replace operation is selected)"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Required if ",(0,r.yg)("inlineCode",{parentName:"td"},"Add/Replace Column")," is selected")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Expression"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Expression to generate new column (when Add/Replace operation is selected)"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Required if ",(0,r.yg)("inlineCode",{parentName:"td"},"Add/Replace Column")," is selected")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Old Column Name"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Column to be renamed (when Rename operation is selected)"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Required if ",(0,r.yg)("inlineCode",{parentName:"td"},"Rename Column")," is selected")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"New Column Name"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Output column name (when Rename operation is selected)"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Required if ",(0,r.yg)("inlineCode",{parentName:"td"},"Rename Column")," is selected")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Column to drop"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Column to be dropped (when Drop operation is selected)"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Required if ",(0,r.yg)("inlineCode",{parentName:"td"},"Drop Column")," is selected")))),(0,r.yg)("h2",{id:"operation-types"},"Operation types"),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:null},"Operation Type"),(0,r.yg)("th",{parentName:"tr",align:null},"Description"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Add/Replace"),(0,r.yg)("td",{parentName:"tr",align:null},"Add a new column or replace an existing one based on an expression")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Drop"),(0,r.yg)("td",{parentName:"tr",align:null},"Removes a single column from the next stages of the pipeline. This is useful if you need 9 out of 10 columns, for example.")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Rename"),(0,r.yg)("td",{parentName:"tr",align:null},"Renames an existing column")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Add if Missing"),(0,r.yg)("td",{parentName:"tr",align:null},"Provide a default value for a column if it's missing from the source. For example, if reading from a CSV file daily and want to ensure a column has a value even if it's not in the source files use this option.")))),(0,r.yg)("h2",{id:"example"},"Example"),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Example usage of SchemaTransform",src:a(34994).A,width:"940",height:"365"})),(0,r.yg)("h2",{id:"spark-code"},"Spark Code"),(0,r.yg)(l.A,{mdxType:"Tabs"},(0,r.yg)(o.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-py"},'def transform(spark: SparkSession, in0: DataFrame) -> DataFrame:\n return in0\\\n .withColumn("business_date", to_date(lit("2022-05-05"), "yyyy-MM-dd"))\\\n .withColumnRenamed("bonus_rate", "bonus")\\\n .drop("slug")\n\n'))),(0,r.yg)(o.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-scala"},'object transform {\n def apply(spark: SparkSession, in: DataFrame): DataFrame =\n in.withColumn("business_date", to_date(lit("2022-05-05"), "yyyy-MM-dd"))\n .withColumnRenamed("bonus_rate", "bonus")\n .drop("slug")\n}\n')))),(0,r.yg)("h2",{id:"advanced-import"},"Advanced Import"),(0,r.yg)("p",null,"The Advanced Import feature allows you to bulk import statements that are structured similarly to CSV/TSV files. This can be useful if you have your expressions/transformation logic in another format and just want to quickly configure a SchemaTransform Gem based on existing logic."),(0,r.yg)("h3",{id:"using-advanced-import"},"Using Advanced Import"),(0,r.yg)("ol",null,(0,r.yg)("li",{parentName:"ol"},"Click the ",(0,r.yg)("strong",{parentName:"li"},"Advanced")," button in the SchemaTransform Gem UI")),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Advanced import toggle",src:a(91092).A,width:"1418",height:"603"})),(0,r.yg)("ol",{start:2},(0,r.yg)("li",{parentName:"ol"},"Enter the expressions into the text area using the format as described below:")),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Advanced import mode",src:a(96207).A,width:"1433",height:"255"})),(0,r.yg)("ol",{start:3},(0,r.yg)("li",{parentName:"ol"},"Use the button at the top (labeled ",(0,r.yg)("strong",{parentName:"li"},"Expressions"),") to switch back to the expressions view. This will translate the expressions from the CSV format to the table format and will show any errors detected.")),(0,r.yg)("h3",{id:"format"},"Format"),(0,r.yg)("p",null,"The format of these expressions is ",(0,r.yg)("inlineCode",{parentName:"p"},"op_type,target_name,target_expr"),", where ",(0,r.yg)("inlineCode",{parentName:"p"},"op_type")," is the type of operation (see below); ",(0,r.yg)("inlineCode",{parentName:"p"},"target_name")," is the desired new column name and ",(0,r.yg)("inlineCode",{parentName:"p"},"target_expr")," is the Spark expression that will be used to generate the new column. Each ",(0,r.yg)("inlineCode",{parentName:"p"},"op_type")," has a different number of extra columns that have to be provided, see below for more details."),(0,r.yg)("admonition",{type:"caution"},(0,r.yg)("p",{parentName:"admonition"},"For ",(0,r.yg)("inlineCode",{parentName:"p"},"target_expr")," values that contain a comma ",(0,r.yg)("inlineCode",{parentName:"p"},",")," or span multiple lines, you must surround them by ",(0,r.yg)("inlineCode",{parentName:"p"},"``")," on either side. For example:"),(0,r.yg)("pre",{parentName:"admonition"},(0,r.yg)("code",{parentName:"pre"},"addrep,customer_id,customer_id\naddrep,full_name,``concat(first_name, ' ', last_name)``\n"))),(0,r.yg)("h3",{id:"advanced-import-operation-types"},"Advanced Import Operation types"),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:null},"Operation Type"),(0,r.yg)("th",{parentName:"tr",align:null},"Advanced Import name"),(0,r.yg)("th",{parentName:"tr",align:null},":Arguments:"),(0,r.yg)("th",{parentName:"tr",align:null},"Example"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Add/Replace"),(0,r.yg)("td",{parentName:"tr",align:null},(0,r.yg)("inlineCode",{parentName:"td"},"addrep")),(0,r.yg)("td",{parentName:"tr",align:null},"2"),(0,r.yg)("td",{parentName:"tr",align:null},(0,r.yg)("inlineCode",{parentName:"td"},"addrep,foo,CAST(NULL as int)"))),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Drop"),(0,r.yg)("td",{parentName:"tr",align:null},(0,r.yg)("inlineCode",{parentName:"td"},"drop")),(0,r.yg)("td",{parentName:"tr",align:null},"1"),(0,r.yg)("td",{parentName:"tr",align:null},(0,r.yg)("inlineCode",{parentName:"td"},"drop bar"))),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Rename"),(0,r.yg)("td",{parentName:"tr",align:null},(0,r.yg)("inlineCode",{parentName:"td"},"rename")),(0,r.yg)("td",{parentName:"tr",align:null},"2"),(0,r.yg)("td",{parentName:"tr",align:null},(0,r.yg)("inlineCode",{parentName:"td"},"rename,foo,bar"))),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Add if missing"),(0,r.yg)("td",{parentName:"tr",align:null},(0,r.yg)("inlineCode",{parentName:"td"},"missing")),(0,r.yg)("td",{parentName:"tr",align:null},"2"),(0,r.yg)("td",{parentName:"tr",align:null},(0,r.yg)("inlineCode",{parentName:"td"},"missing,foo,current_timestamp()"))))))}c.isMDXComponent=!0},34994:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/schemaTransform_eg_1-c37ea99ca92e266556a154dd1bc961ad.png"},91092:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/schematransform_advanced_1-fd79241dd8f49aaf0887ea382dadc2a9.png"},96207:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/schematransform_advanced_2-a4cbe3cff9433842b21d5e17509a3939.png"}}]); \ No newline at end of file diff --git a/assets/js/92b0f648.268d49ab.js b/assets/js/92b0f648.268d49ab.js deleted file mode 100644 index a8db17c1ca..0000000000 --- a/assets/js/92b0f648.268d49ab.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[60032],{15680:(e,t,a)=>{a.d(t,{xA:()=>m,yg:()=>N});var n=a(96540);function r(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function l(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,n)}return a}function i(e){for(var t=1;t=0||(r[a]=e[a]);return r}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(r[a]=e[a])}return r}var g=n.createContext({}),p=function(e){var t=n.useContext(g),a=t;return e&&(a="function"==typeof e?e(t):i(i({},t),e)),a},m=function(e){var t=p(e.components);return n.createElement(g.Provider,{value:t},e.children)},d="mdxType",y={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},s=n.forwardRef((function(e,t){var a=e.components,r=e.mdxType,l=e.originalType,g=e.parentName,m=o(e,["components","mdxType","originalType","parentName"]),d=p(a),s=r,N=d["".concat(g,".").concat(s)]||d[s]||y[s]||l;return a?n.createElement(N,i(i({ref:t},m),{},{components:a})):n.createElement(N,i({ref:t},m))}));function N(e,t){var a=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var l=a.length,i=new Array(l);i[0]=s;var o={};for(var g in t)hasOwnProperty.call(t,g)&&(o[g]=t[g]);o.originalType=e,o[d]="string"==typeof e?e:r,i[1]=o;for(var p=2;p{a.r(t),a.d(t,{assets:()=>g,contentTitle:()=>i,default:()=>y,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(58168),r=(a(96540),a(15680));const l={title:"Deduplicate",id:"deduplicate",description:"Remove rows with duplicate values of specified columns",sidebar_position:3,tags:["gems","dedupe","distinct","unique"]},i=void 0,o={unversionedId:"SQL/gems/transform/deduplicate",id:"SQL/gems/transform/deduplicate",title:"Deduplicate",description:"Remove rows with duplicate values of specified columns",source:"@site/docs/SQL/gems/transform/deduplicate.md",sourceDirName:"SQL/gems/transform",slug:"/SQL/gems/transform/deduplicate",permalink:"/SQL/gems/transform/deduplicate",draft:!1,tags:[{label:"gems",permalink:"/tags/gems"},{label:"dedupe",permalink:"/tags/dedupe"},{label:"distinct",permalink:"/tags/distinct"},{label:"unique",permalink:"/tags/unique"}],version:"current",sidebarPosition:3,frontMatter:{title:"Deduplicate",id:"deduplicate",description:"Remove rows with duplicate values of specified columns",sidebar_position:3,tags:["gems","dedupe","distinct","unique"]},sidebar:"defaultSidebar",previous:{title:"Aggregate",permalink:"/SQL/gems/transform/sql-aggregate"},next:{title:"Flatten Schema",permalink:"/SQL/gems/transform/flattenschema"}},g={},p=[{value:"Parameters",id:"parameters",level:2},{value:"Row to keep options",id:"row-to-keep-options",level:2},{value:"Example",id:"example",level:2}],m={toc:p},d="wrapper";function y(e){let{components:t,...l}=e;return(0,r.yg)(d,(0,n.A)({},m,l,{components:t,mdxType:"MDXLayout"}),(0,r.yg)("h3",null,(0,r.yg)("span",{class:"badge rounded-pill text-bg-light"},"SQL Gem")),(0,r.yg)("p",null,"Removes rows with duplicate values of specified columns."),(0,r.yg)("h2",{id:"parameters"},"Parameters"),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:"left"},"Parameter"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Description"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Required"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Source"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Input source"),(0,r.yg)("td",{parentName:"tr",align:"left"},"True")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Row to keep"),(0,r.yg)("td",{parentName:"tr",align:"left"},"- ",(0,r.yg)("inlineCode",{parentName:"td"},"Distinct Rows"),": Keeps all distinct rows. This is equivalent to performing a ",(0,r.yg)("inlineCode",{parentName:"td"},"select distinct")," operation ",(0,r.yg)("br",null),"- ",(0,r.yg)("inlineCode",{parentName:"td"},"Unique Only"),": Keeps rows that don't have duplicates ",(0,r.yg)("br",null),"- ",(0,r.yg)("inlineCode",{parentName:"td"},"First"),": Keeps first occurrence of the duplicate row ",(0,r.yg)("br",null),"- ",(0,r.yg)("inlineCode",{parentName:"td"},"Last"),": Keeps last occurrence of the duplicate row ",(0,r.yg)("br",null),"Default is ",(0,r.yg)("inlineCode",{parentName:"td"},"Distinct Rows")),(0,r.yg)("td",{parentName:"tr",align:"left"},"True")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Deduplicate On Columns"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Columns to consider while removing duplicate rows (not required for ",(0,r.yg)("inlineCode",{parentName:"td"},"Distinct Rows"),")"),(0,r.yg)("td",{parentName:"tr",align:"left"},"True")))),(0,r.yg)("h2",{id:"row-to-keep-options"},"Row to keep options"),(0,r.yg)("p",null,"As mentioned in the previous parameters, there are four ",(0,r.yg)("strong",{parentName:"p"},"Row to keep")," options that you can use in your deduplicate Gem."),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Deduplicate row to keep",src:a(390).A,width:"2620",height:"1507"})),(0,r.yg)("p",null,"In the Code view, you can see that the Deduplicate Gem contains ",(0,r.yg)("inlineCode",{parentName:"p"},"SELECT DISTINCT *")," when using the ",(0,r.yg)("inlineCode",{parentName:"p"},"Distinct Rows")," option."),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Deduplicate code view",src:a(86027).A,width:"2620",height:"1539"})),(0,r.yg)("h2",{id:"example"},"Example"),(0,r.yg)("p",null,"Suppose you're deduplicating the following table."),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:"left"},"First_Name"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Last_Name"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Type"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Contact"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"John"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Doe"),(0,r.yg)("td",{parentName:"tr",align:"left"},"phone"),(0,r.yg)("td",{parentName:"tr",align:"left"},"123-456-7890")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"John"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Doe"),(0,r.yg)("td",{parentName:"tr",align:"left"},"phone"),(0,r.yg)("td",{parentName:"tr",align:"left"},"123-456-7890")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"John"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Doe"),(0,r.yg)("td",{parentName:"tr",align:"left"},"phone"),(0,r.yg)("td",{parentName:"tr",align:"left"},"123-456-7890")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Alice"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Johnson"),(0,r.yg)("td",{parentName:"tr",align:"left"},"phone"),(0,r.yg)("td",{parentName:"tr",align:"left"},"246-135-0987")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Alice"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Johnson"),(0,r.yg)("td",{parentName:"tr",align:"left"},"phone"),(0,r.yg)("td",{parentName:"tr",align:"left"},"246-135-0987")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Alice"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Johnson"),(0,r.yg)("td",{parentName:"tr",align:"left"},"email"),(0,r.yg)("td",{parentName:"tr",align:"left"},(0,r.yg)("a",{parentName:"td",href:"mailto:alice@johnson.com"},"alice@johnson.com"))),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Alice"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Johnson"),(0,r.yg)("td",{parentName:"tr",align:"left"},"email"),(0,r.yg)("td",{parentName:"tr",align:"left"},(0,r.yg)("a",{parentName:"td",href:"mailto:alice@johnson.com"},"alice@johnson.com"))),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Bob"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Smith"),(0,r.yg)("td",{parentName:"tr",align:"left"},"email"),(0,r.yg)("td",{parentName:"tr",align:"left"},(0,r.yg)("a",{parentName:"td",href:"mailto:bob@smith.com"},"bob@smith.com"))))),(0,r.yg)("p",null,"For ",(0,r.yg)("inlineCode",{parentName:"p"},"Distinct Rows"),", the interim data will show the following:"),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:"left"},"First_Name"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Last_Name"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Type"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Contact"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"John"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Doe"),(0,r.yg)("td",{parentName:"tr",align:"left"},"phone"),(0,r.yg)("td",{parentName:"tr",align:"left"},"123-456-7890")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Alice"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Johnson"),(0,r.yg)("td",{parentName:"tr",align:"left"},"phone"),(0,r.yg)("td",{parentName:"tr",align:"left"},"246-135-0987")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Alice"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Johnson"),(0,r.yg)("td",{parentName:"tr",align:"left"},"email"),(0,r.yg)("td",{parentName:"tr",align:"left"},(0,r.yg)("a",{parentName:"td",href:"mailto:alice@johnson.com"},"alice@johnson.com"))),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Bob"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Smith"),(0,r.yg)("td",{parentName:"tr",align:"left"},"email"),(0,r.yg)("td",{parentName:"tr",align:"left"},(0,r.yg)("a",{parentName:"td",href:"mailto:bob@smith.com"},"bob@smith.com"))))),(0,r.yg)("p",null,"The ",(0,r.yg)("inlineCode",{parentName:"p"},"First")," and ",(0,r.yg)("inlineCode",{parentName:"p"},"Last")," options work similarly to ",(0,r.yg)("inlineCode",{parentName:"p"},"Distinct Rows"),", but they keep the first and last occurrence of the duplicate rows respectively."),(0,r.yg)("p",null,"For ",(0,r.yg)("inlineCode",{parentName:"p"},"Unique Only"),", the interim data will look like the following:"),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:"left"},"First_Name"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Last_Name"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Type"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Contact"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Bob"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Smith"),(0,r.yg)("td",{parentName:"tr",align:"left"},"email"),(0,r.yg)("td",{parentName:"tr",align:"left"},(0,r.yg)("a",{parentName:"td",href:"mailto:bob@smith.com"},"bob@smith.com"))))),(0,r.yg)("p",null,"You'll be left with only one unique row since the rest were all duplicates."),(0,r.yg)("hr",null),(0,r.yg)("p",null,"You can add ",(0,r.yg)("inlineCode",{parentName:"p"},"First_Name")," and ",(0,r.yg)("inlineCode",{parentName:"p"},"Last_Name")," to Deduplicate On Columns if you want to further deduplicate the table."),(0,r.yg)("p",null,"For ",(0,r.yg)("inlineCode",{parentName:"p"},"Distinct Rows"),", the interim data will show the following:"),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:"left"},"First_Name"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Last_Name"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"John"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Doe")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Alice"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Johnson")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Bob"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Smith")))),(0,r.yg)("admonition",{type:"note"},(0,r.yg)("p",{parentName:"admonition"},"For ",(0,r.yg)("inlineCode",{parentName:"p"},"First"),", ",(0,r.yg)("inlineCode",{parentName:"p"},"Last"),", and ",(0,r.yg)("inlineCode",{parentName:"p"},"Unique Only"),", the interim data will contain all columns, irrespective of the columns that were added."),(0,r.yg)("p",{parentName:"admonition"},"For ",(0,r.yg)("inlineCode",{parentName:"p"},"First")," and ",(0,r.yg)("inlineCode",{parentName:"p"},"Last"),", the interim data will look like the following:"),(0,r.yg)("table",{parentName:"admonition"},(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:"left"},"First_Name"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Last_Name"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Type"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Contact"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"John"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Doe"),(0,r.yg)("td",{parentName:"tr",align:"left"},"phone"),(0,r.yg)("td",{parentName:"tr",align:"left"},"123-456-7890")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Alice"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Johnson"),(0,r.yg)("td",{parentName:"tr",align:"left"},"phone"),(0,r.yg)("td",{parentName:"tr",align:"left"},"246-135-0987")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Alice"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Johnson"),(0,r.yg)("td",{parentName:"tr",align:"left"},"email"),(0,r.yg)("td",{parentName:"tr",align:"left"},(0,r.yg)("a",{parentName:"td",href:"mailto:alice@johnson.com"},"alice@johnson.com"))),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Bob"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Smith"),(0,r.yg)("td",{parentName:"tr",align:"left"},"email"),(0,r.yg)("td",{parentName:"tr",align:"left"},(0,r.yg)("a",{parentName:"td",href:"mailto:bob@smith.com"},"bob@smith.com"))))),(0,r.yg)("p",{parentName:"admonition"},"For ",(0,r.yg)("inlineCode",{parentName:"p"},"Unique Only"),", the interim data will look like the following:"),(0,r.yg)("table",{parentName:"admonition"},(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:"left"},"First_Name"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Last_Name"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Type"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Contact"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Bob"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Smith"),(0,r.yg)("td",{parentName:"tr",align:"left"},"email"),(0,r.yg)("td",{parentName:"tr",align:"left"},(0,r.yg)("a",{parentName:"td",href:"mailto:bob@smith.com"},"bob@smith.com")))))))}y.isMDXComponent=!0},86027:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/deduplicate_code_view-0547325e0c346a5613414aeeb7ac6292.png"},390:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/deduplicate_row_to_keep-d77d5f3a3d4592a545e78c80f5da1c7a.png"}}]); \ No newline at end of file diff --git a/assets/js/92b0f648.8a4e717e.js b/assets/js/92b0f648.8a4e717e.js new file mode 100644 index 0000000000..28ec0ac0e3 --- /dev/null +++ b/assets/js/92b0f648.8a4e717e.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[60032],{15680:(e,t,a)=>{a.d(t,{xA:()=>m,yg:()=>N});var n=a(96540);function r(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function l(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,n)}return a}function i(e){for(var t=1;t=0||(r[a]=e[a]);return r}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(r[a]=e[a])}return r}var g=n.createContext({}),p=function(e){var t=n.useContext(g),a=t;return e&&(a="function"==typeof e?e(t):i(i({},t),e)),a},m=function(e){var t=p(e.components);return n.createElement(g.Provider,{value:t},e.children)},d="mdxType",y={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},s=n.forwardRef((function(e,t){var a=e.components,r=e.mdxType,l=e.originalType,g=e.parentName,m=o(e,["components","mdxType","originalType","parentName"]),d=p(a),s=r,N=d["".concat(g,".").concat(s)]||d[s]||y[s]||l;return a?n.createElement(N,i(i({ref:t},m),{},{components:a})):n.createElement(N,i({ref:t},m))}));function N(e,t){var a=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var l=a.length,i=new Array(l);i[0]=s;var o={};for(var g in t)hasOwnProperty.call(t,g)&&(o[g]=t[g]);o.originalType=e,o[d]="string"==typeof e?e:r,i[1]=o;for(var p=2;p{a.r(t),a.d(t,{assets:()=>g,contentTitle:()=>i,default:()=>y,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(58168),r=(a(96540),a(15680));const l={title:"Deduplicate",id:"deduplicate",description:"Remove rows with duplicate values of specified columns",sidebar_position:3,tags:["gems","dedupe","distinct","unique"]},i=void 0,o={unversionedId:"SQL/gems/transform/deduplicate",id:"SQL/gems/transform/deduplicate",title:"Deduplicate",description:"Remove rows with duplicate values of specified columns",source:"@site/docs/SQL/gems/transform/deduplicate.md",sourceDirName:"SQL/gems/transform",slug:"/SQL/gems/transform/deduplicate",permalink:"/SQL/gems/transform/deduplicate",draft:!1,tags:[{label:"gems",permalink:"/tags/gems"},{label:"dedupe",permalink:"/tags/dedupe"},{label:"distinct",permalink:"/tags/distinct"},{label:"unique",permalink:"/tags/unique"}],version:"current",sidebarPosition:3,frontMatter:{title:"Deduplicate",id:"deduplicate",description:"Remove rows with duplicate values of specified columns",sidebar_position:3,tags:["gems","dedupe","distinct","unique"]},sidebar:"defaultSidebar",previous:{title:"Aggregate",permalink:"/SQL/gems/transform/sql-aggregate"},next:{title:"Flatten Schema",permalink:"/SQL/gems/transform/flattenschema"}},g={},p=[{value:"Parameters",id:"parameters",level:2},{value:"Row to keep options",id:"row-to-keep-options",level:2},{value:"Example",id:"example",level:2}],m={toc:p},d="wrapper";function y(e){let{components:t,...l}=e;return(0,r.yg)(d,(0,n.A)({},m,l,{components:t,mdxType:"MDXLayout"}),(0,r.yg)("h3",null,(0,r.yg)("span",{class:"badge"},"SQL Gem")),(0,r.yg)("p",null,"Removes rows with duplicate values of specified columns."),(0,r.yg)("h2",{id:"parameters"},"Parameters"),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:"left"},"Parameter"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Description"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Required"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Source"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Input source"),(0,r.yg)("td",{parentName:"tr",align:"left"},"True")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Row to keep"),(0,r.yg)("td",{parentName:"tr",align:"left"},"- ",(0,r.yg)("inlineCode",{parentName:"td"},"Distinct Rows"),": Keeps all distinct rows. This is equivalent to performing a ",(0,r.yg)("inlineCode",{parentName:"td"},"select distinct")," operation ",(0,r.yg)("br",null),"- ",(0,r.yg)("inlineCode",{parentName:"td"},"Unique Only"),": Keeps rows that don't have duplicates ",(0,r.yg)("br",null),"- ",(0,r.yg)("inlineCode",{parentName:"td"},"First"),": Keeps first occurrence of the duplicate row ",(0,r.yg)("br",null),"- ",(0,r.yg)("inlineCode",{parentName:"td"},"Last"),": Keeps last occurrence of the duplicate row ",(0,r.yg)("br",null),"Default is ",(0,r.yg)("inlineCode",{parentName:"td"},"Distinct Rows")),(0,r.yg)("td",{parentName:"tr",align:"left"},"True")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Deduplicate On Columns"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Columns to consider while removing duplicate rows (not required for ",(0,r.yg)("inlineCode",{parentName:"td"},"Distinct Rows"),")"),(0,r.yg)("td",{parentName:"tr",align:"left"},"True")))),(0,r.yg)("h2",{id:"row-to-keep-options"},"Row to keep options"),(0,r.yg)("p",null,"As mentioned in the previous parameters, there are four ",(0,r.yg)("strong",{parentName:"p"},"Row to keep")," options that you can use in your deduplicate Gem."),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Deduplicate row to keep",src:a(390).A,width:"2620",height:"1507"})),(0,r.yg)("p",null,"In the Code view, you can see that the Deduplicate Gem contains ",(0,r.yg)("inlineCode",{parentName:"p"},"SELECT DISTINCT *")," when using the ",(0,r.yg)("inlineCode",{parentName:"p"},"Distinct Rows")," option."),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Deduplicate code view",src:a(86027).A,width:"2620",height:"1539"})),(0,r.yg)("h2",{id:"example"},"Example"),(0,r.yg)("p",null,"Suppose you're deduplicating the following table."),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:"left"},"First_Name"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Last_Name"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Type"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Contact"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"John"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Doe"),(0,r.yg)("td",{parentName:"tr",align:"left"},"phone"),(0,r.yg)("td",{parentName:"tr",align:"left"},"123-456-7890")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"John"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Doe"),(0,r.yg)("td",{parentName:"tr",align:"left"},"phone"),(0,r.yg)("td",{parentName:"tr",align:"left"},"123-456-7890")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"John"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Doe"),(0,r.yg)("td",{parentName:"tr",align:"left"},"phone"),(0,r.yg)("td",{parentName:"tr",align:"left"},"123-456-7890")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Alice"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Johnson"),(0,r.yg)("td",{parentName:"tr",align:"left"},"phone"),(0,r.yg)("td",{parentName:"tr",align:"left"},"246-135-0987")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Alice"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Johnson"),(0,r.yg)("td",{parentName:"tr",align:"left"},"phone"),(0,r.yg)("td",{parentName:"tr",align:"left"},"246-135-0987")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Alice"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Johnson"),(0,r.yg)("td",{parentName:"tr",align:"left"},"email"),(0,r.yg)("td",{parentName:"tr",align:"left"},(0,r.yg)("a",{parentName:"td",href:"mailto:alice@johnson.com"},"alice@johnson.com"))),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Alice"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Johnson"),(0,r.yg)("td",{parentName:"tr",align:"left"},"email"),(0,r.yg)("td",{parentName:"tr",align:"left"},(0,r.yg)("a",{parentName:"td",href:"mailto:alice@johnson.com"},"alice@johnson.com"))),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Bob"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Smith"),(0,r.yg)("td",{parentName:"tr",align:"left"},"email"),(0,r.yg)("td",{parentName:"tr",align:"left"},(0,r.yg)("a",{parentName:"td",href:"mailto:bob@smith.com"},"bob@smith.com"))))),(0,r.yg)("p",null,"For ",(0,r.yg)("inlineCode",{parentName:"p"},"Distinct Rows"),", the interim data will show the following:"),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:"left"},"First_Name"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Last_Name"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Type"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Contact"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"John"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Doe"),(0,r.yg)("td",{parentName:"tr",align:"left"},"phone"),(0,r.yg)("td",{parentName:"tr",align:"left"},"123-456-7890")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Alice"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Johnson"),(0,r.yg)("td",{parentName:"tr",align:"left"},"phone"),(0,r.yg)("td",{parentName:"tr",align:"left"},"246-135-0987")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Alice"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Johnson"),(0,r.yg)("td",{parentName:"tr",align:"left"},"email"),(0,r.yg)("td",{parentName:"tr",align:"left"},(0,r.yg)("a",{parentName:"td",href:"mailto:alice@johnson.com"},"alice@johnson.com"))),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Bob"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Smith"),(0,r.yg)("td",{parentName:"tr",align:"left"},"email"),(0,r.yg)("td",{parentName:"tr",align:"left"},(0,r.yg)("a",{parentName:"td",href:"mailto:bob@smith.com"},"bob@smith.com"))))),(0,r.yg)("p",null,"The ",(0,r.yg)("inlineCode",{parentName:"p"},"First")," and ",(0,r.yg)("inlineCode",{parentName:"p"},"Last")," options work similarly to ",(0,r.yg)("inlineCode",{parentName:"p"},"Distinct Rows"),", but they keep the first and last occurrence of the duplicate rows respectively."),(0,r.yg)("p",null,"For ",(0,r.yg)("inlineCode",{parentName:"p"},"Unique Only"),", the interim data will look like the following:"),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:"left"},"First_Name"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Last_Name"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Type"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Contact"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Bob"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Smith"),(0,r.yg)("td",{parentName:"tr",align:"left"},"email"),(0,r.yg)("td",{parentName:"tr",align:"left"},(0,r.yg)("a",{parentName:"td",href:"mailto:bob@smith.com"},"bob@smith.com"))))),(0,r.yg)("p",null,"You'll be left with only one unique row since the rest were all duplicates."),(0,r.yg)("hr",null),(0,r.yg)("p",null,"You can add ",(0,r.yg)("inlineCode",{parentName:"p"},"First_Name")," and ",(0,r.yg)("inlineCode",{parentName:"p"},"Last_Name")," to Deduplicate On Columns if you want to further deduplicate the table."),(0,r.yg)("p",null,"For ",(0,r.yg)("inlineCode",{parentName:"p"},"Distinct Rows"),", the interim data will show the following:"),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:"left"},"First_Name"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Last_Name"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"John"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Doe")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Alice"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Johnson")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Bob"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Smith")))),(0,r.yg)("admonition",{type:"note"},(0,r.yg)("p",{parentName:"admonition"},"For ",(0,r.yg)("inlineCode",{parentName:"p"},"First"),", ",(0,r.yg)("inlineCode",{parentName:"p"},"Last"),", and ",(0,r.yg)("inlineCode",{parentName:"p"},"Unique Only"),", the interim data will contain all columns, irrespective of the columns that were added."),(0,r.yg)("p",{parentName:"admonition"},"For ",(0,r.yg)("inlineCode",{parentName:"p"},"First")," and ",(0,r.yg)("inlineCode",{parentName:"p"},"Last"),", the interim data will look like the following:"),(0,r.yg)("table",{parentName:"admonition"},(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:"left"},"First_Name"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Last_Name"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Type"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Contact"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"John"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Doe"),(0,r.yg)("td",{parentName:"tr",align:"left"},"phone"),(0,r.yg)("td",{parentName:"tr",align:"left"},"123-456-7890")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Alice"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Johnson"),(0,r.yg)("td",{parentName:"tr",align:"left"},"phone"),(0,r.yg)("td",{parentName:"tr",align:"left"},"246-135-0987")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Alice"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Johnson"),(0,r.yg)("td",{parentName:"tr",align:"left"},"email"),(0,r.yg)("td",{parentName:"tr",align:"left"},(0,r.yg)("a",{parentName:"td",href:"mailto:alice@johnson.com"},"alice@johnson.com"))),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Bob"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Smith"),(0,r.yg)("td",{parentName:"tr",align:"left"},"email"),(0,r.yg)("td",{parentName:"tr",align:"left"},(0,r.yg)("a",{parentName:"td",href:"mailto:bob@smith.com"},"bob@smith.com"))))),(0,r.yg)("p",{parentName:"admonition"},"For ",(0,r.yg)("inlineCode",{parentName:"p"},"Unique Only"),", the interim data will look like the following:"),(0,r.yg)("table",{parentName:"admonition"},(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:"left"},"First_Name"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Last_Name"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Type"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Contact"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Bob"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Smith"),(0,r.yg)("td",{parentName:"tr",align:"left"},"email"),(0,r.yg)("td",{parentName:"tr",align:"left"},(0,r.yg)("a",{parentName:"td",href:"mailto:bob@smith.com"},"bob@smith.com")))))))}y.isMDXComponent=!0},86027:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/deduplicate_code_view-0547325e0c346a5613414aeeb7ac6292.png"},390:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/deduplicate_row_to_keep-d77d5f3a3d4592a545e78c80f5da1c7a.png"}}]); \ No newline at end of file diff --git a/assets/js/9a47c610.b199ea30.js b/assets/js/9a47c610.b199ea30.js new file mode 100644 index 0000000000..faa3105740 --- /dev/null +++ b/assets/js/9a47c610.b199ea30.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[70778],{15680:(e,t,a)=>{a.d(t,{xA:()=>u,yg:()=>g});var n=a(96540);function l(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function r(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,n)}return a}function o(e){for(var t=1;t=0||(l[a]=e[a]);return l}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(l[a]=e[a])}return l}var c=n.createContext({}),i=function(e){var t=n.useContext(c),a=t;return e&&(a="function"==typeof e?e(t):o(o({},t),e)),a},u=function(e){var t=i(e.components);return n.createElement(c.Provider,{value:t},e.children)},m="mdxType",p={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},d=n.forwardRef((function(e,t){var a=e.components,l=e.mdxType,r=e.originalType,c=e.parentName,u=s(e,["components","mdxType","originalType","parentName"]),m=i(a),d=l,g=m["".concat(c,".").concat(d)]||m[d]||p[d]||r;return a?n.createElement(g,o(o({ref:t},u),{},{components:a})):n.createElement(g,o({ref:t},u))}));function g(e,t){var a=arguments,l=t&&t.mdxType;if("string"==typeof e||l){var r=a.length,o=new Array(r);o[0]=d;var s={};for(var c in t)hasOwnProperty.call(t,c)&&(s[c]=t[c]);s.originalType=e,s[m]="string"==typeof e?e:l,o[1]=s;for(var i=2;i{a.d(t,{A:()=>o});var n=a(96540),l=a(20053);const r={tabItem:"tabItem_Ymn6"};function o(e){let{children:t,hidden:a,className:o}=e;return n.createElement("div",{role:"tabpanel",className:(0,l.A)(r.tabItem,o),hidden:a},t)}},11470:(e,t,a)=>{a.d(t,{A:()=>w});var n=a(58168),l=a(96540),r=a(20053),o=a(23104),s=a(56347),c=a(57485),i=a(31682),u=a(89466);function m(e){return function(e){return l.Children.map(e,(e=>{if(!e||(0,l.isValidElement)(e)&&function(e){const{props:t}=e;return!!t&&"object"==typeof t&&"value"in t}(e))return e;throw new Error(`Docusaurus error: Bad child <${"string"==typeof e.type?e.type:e.type.name}>: all children of the component should be , and every should have a unique "value" prop.`)}))?.filter(Boolean)??[]}(e).map((e=>{let{props:{value:t,label:a,attributes:n,default:l}}=e;return{value:t,label:a,attributes:n,default:l}}))}function p(e){const{values:t,children:a}=e;return(0,l.useMemo)((()=>{const e=t??m(a);return function(e){const t=(0,i.X)(e,((e,t)=>e.value===t.value));if(t.length>0)throw new Error(`Docusaurus error: Duplicate values "${t.map((e=>e.value)).join(", ")}" found in . Every value needs to be unique.`)}(e),e}),[t,a])}function d(e){let{value:t,tabValues:a}=e;return a.some((e=>e.value===t))}function g(e){let{queryString:t=!1,groupId:a}=e;const n=(0,s.W6)(),r=function(e){let{queryString:t=!1,groupId:a}=e;if("string"==typeof t)return t;if(!1===t)return null;if(!0===t&&!a)throw new Error('Docusaurus error: The component groupId prop is required if queryString=true, because this value is used as the search param name. You can also provide an explicit value such as queryString="my-search-param".');return a??null}({queryString:t,groupId:a});return[(0,c.aZ)(r),(0,l.useCallback)((e=>{if(!r)return;const t=new URLSearchParams(n.location.search);t.set(r,e),n.replace({...n.location,search:t.toString()})}),[r,n])]}function f(e){const{defaultValue:t,queryString:a=!1,groupId:n}=e,r=p(e),[o,s]=(0,l.useState)((()=>function(e){let{defaultValue:t,tabValues:a}=e;if(0===a.length)throw new Error("Docusaurus error: the component requires at least one children component");if(t){if(!d({value:t,tabValues:a}))throw new Error(`Docusaurus error: The has a defaultValue "${t}" but none of its children has the corresponding value. Available values are: ${a.map((e=>e.value)).join(", ")}. If you intend to show no default tab, use defaultValue={null} instead.`);return t}const n=a.find((e=>e.default))??a[0];if(!n)throw new Error("Unexpected error: 0 tabValues");return n.value}({defaultValue:t,tabValues:r}))),[c,i]=g({queryString:a,groupId:n}),[m,f]=function(e){let{groupId:t}=e;const a=function(e){return e?`docusaurus.tab.${e}`:null}(t),[n,r]=(0,u.Dv)(a);return[n,(0,l.useCallback)((e=>{a&&r.set(e)}),[a,r])]}({groupId:n}),h=(()=>{const e=c??m;return d({value:e,tabValues:r})?e:null})();(0,l.useLayoutEffect)((()=>{h&&s(h)}),[h]);return{selectedValue:o,selectValue:(0,l.useCallback)((e=>{if(!d({value:e,tabValues:r}))throw new Error(`Can't select invalid tab value=${e}`);s(e),i(e),f(e)}),[i,f,r]),tabValues:r}}var h=a(92303);const y={tabList:"tabList__CuJ",tabItem:"tabItem_LNqP"};function b(e){let{className:t,block:a,selectedValue:s,selectValue:c,tabValues:i}=e;const u=[],{blockElementScrollPositionUntilNextRender:m}=(0,o.a_)(),p=e=>{const t=e.currentTarget,a=u.indexOf(t),n=i[a].value;n!==s&&(m(t),c(n))},d=e=>{let t=null;switch(e.key){case"Enter":p(e);break;case"ArrowRight":{const a=u.indexOf(e.currentTarget)+1;t=u[a]??u[0];break}case"ArrowLeft":{const a=u.indexOf(e.currentTarget)-1;t=u[a]??u[u.length-1];break}}t?.focus()};return l.createElement("ul",{role:"tablist","aria-orientation":"horizontal",className:(0,r.A)("tabs",{"tabs--block":a},t)},i.map((e=>{let{value:t,label:a,attributes:o}=e;return l.createElement("li",(0,n.A)({role:"tab",tabIndex:s===t?0:-1,"aria-selected":s===t,key:t,ref:e=>u.push(e),onKeyDown:d,onClick:p},o,{className:(0,r.A)("tabs__item",y.tabItem,o?.className,{"tabs__item--active":s===t})}),a??t)})))}function _(e){let{lazy:t,children:a,selectedValue:n}=e;const r=(Array.isArray(a)?a:[a]).filter(Boolean);if(t){const e=r.find((e=>e.props.value===n));return e?(0,l.cloneElement)(e,{className:"margin-top--md"}):null}return l.createElement("div",{className:"margin-top--md"},r.map(((e,t)=>(0,l.cloneElement)(e,{key:t,hidden:e.props.value!==n}))))}function v(e){const t=f(e);return l.createElement("div",{className:(0,r.A)("tabs-container",y.tabList)},l.createElement(b,(0,n.A)({},e,t)),l.createElement(_,(0,n.A)({},e,t)))}function w(e){const t=(0,h.A)();return l.createElement(v,(0,n.A)({key:String(t)},e))}},11002:(e,t,a)=>{a.r(t),a.d(t,{assets:()=>u,contentTitle:()=>c,default:()=>g,frontMatter:()=>s,metadata:()=>i,toc:()=>m});var n=a(58168),l=(a(96540),a(15680)),r=a(11470),o=a(19365);const s={sidebar_position:4,title:"CompareColumns",id:"compare-columns",description:"Compare columns between two dataframes",tags:["gems","compare","diff","compare-columns"]},c=void 0,i={unversionedId:"Spark/gems/join-split/compare-columns",id:"Spark/gems/join-split/compare-columns",title:"CompareColumns",description:"Compare columns between two dataframes",source:"@site/docs/Spark/gems/join-split/compare-columns.md",sourceDirName:"Spark/gems/join-split",slug:"/Spark/gems/join-split/compare-columns",permalink:"/Spark/gems/join-split/compare-columns",draft:!1,tags:[{label:"gems",permalink:"/tags/gems"},{label:"compare",permalink:"/tags/compare"},{label:"diff",permalink:"/tags/diff"},{label:"compare-columns",permalink:"/tags/compare-columns"}],version:"current",sidebarPosition:4,frontMatter:{sidebar_position:4,title:"CompareColumns",id:"compare-columns",description:"Compare columns between two dataframes",tags:["gems","compare","diff","compare-columns"]},sidebar:"defaultSidebar",previous:{title:"RowDistributor",permalink:"/Spark/gems/join-split/row-distributor"},next:{title:"Custom",permalink:"/Spark/gems/custom/"}},u={},m=[{value:"Parameters",id:"parameters",level:2},{value:"Example - Compare columns of two DataFrames",id:"example---compare-columns-of-two-dataframes",level:3},{value:"Generated code",id:"generated-code",level:3}],p={toc:m},d="wrapper";function g(e){let{components:t,...a}=e;return(0,l.yg)(d,(0,n.A)({},p,a,{components:t,mdxType:"MDXLayout"}),(0,l.yg)("h3",null,(0,l.yg)("span",{class:"badge"},"Spark Gem")),(0,l.yg)("p",null,"The CompareColumns Gem lets you compare columns between two DataFrames based on the key id columns defined."),(0,l.yg)("h2",{id:"parameters"},"Parameters"),(0,l.yg)("table",null,(0,l.yg)("thead",{parentName:"table"},(0,l.yg)("tr",{parentName:"thead"},(0,l.yg)("th",{parentName:"tr",align:"left"},"Parameter"),(0,l.yg)("th",{parentName:"tr",align:"left"},"Description"),(0,l.yg)("th",{parentName:"tr",align:"left"},"Required"))),(0,l.yg)("tbody",{parentName:"table"},(0,l.yg)("tr",{parentName:"tbody"},(0,l.yg)("td",{parentName:"tr",align:"left"},"DataFrame 1"),(0,l.yg)("td",{parentName:"tr",align:"left"},"First input DataFrame"),(0,l.yg)("td",{parentName:"tr",align:"left"},"True")),(0,l.yg)("tr",{parentName:"tbody"},(0,l.yg)("td",{parentName:"tr",align:"left"},"DataFrame 2"),(0,l.yg)("td",{parentName:"tr",align:"left"},"Second input DataFrame"),(0,l.yg)("td",{parentName:"tr",align:"left"},"True")),(0,l.yg)("tr",{parentName:"tbody"},(0,l.yg)("td",{parentName:"tr",align:"left"},"ID columns to retain(Select Id Columns)"),(0,l.yg)("td",{parentName:"tr",align:"left"},"List of columns that are used joining two dataframes"),(0,l.yg)("td",{parentName:"tr",align:"left"},"True")),(0,l.yg)("tr",{parentName:"tbody"},(0,l.yg)("td",{parentName:"tr",align:"left"},"Output Column Name(Select Output Columns)"),(0,l.yg)("td",{parentName:"tr",align:"left"},"In the output, alias name of the column name that was compared among dataframes"),(0,l.yg)("td",{parentName:"tr",align:"left"},"True")),(0,l.yg)("tr",{parentName:"tbody"},(0,l.yg)("td",{parentName:"tr",align:"left"},"Match Count Column Name(Select Output Columns)"),(0,l.yg)("td",{parentName:"tr",align:"left"},"In the output, alias name of the column that shows the count of rows that matched between two dataframes"),(0,l.yg)("td",{parentName:"tr",align:"left"},"True")),(0,l.yg)("tr",{parentName:"tbody"},(0,l.yg)("td",{parentName:"tr",align:"left"},"Mismatch Count Column Name(Select Output Columns)"),(0,l.yg)("td",{parentName:"tr",align:"left"},"In the output, alias name of the column that shows the count of rows that mismatched between two dataframes"),(0,l.yg)("td",{parentName:"tr",align:"left"},"True")),(0,l.yg)("tr",{parentName:"tbody"},(0,l.yg)("td",{parentName:"tr",align:"left"},"Mismatch Example Left Column Name(Select Output Columns)"),(0,l.yg)("td",{parentName:"tr",align:"left"},"In the output, alias name of the column displaying an incorrect left column value"),(0,l.yg)("td",{parentName:"tr",align:"left"},"True")),(0,l.yg)("tr",{parentName:"tbody"},(0,l.yg)("td",{parentName:"tr",align:"left"},"Mismatch Example Right Column Name(Select Output Columns)"),(0,l.yg)("td",{parentName:"tr",align:"left"},"In the output, alias name of the column displaying an incorrect right column value"),(0,l.yg)("td",{parentName:"tr",align:"left"},"True")),(0,l.yg)("tr",{parentName:"tbody"},(0,l.yg)("td",{parentName:"tr",align:"left"},"Mismatch Example ID Column Prefix(Select Output Columns)"),(0,l.yg)("td",{parentName:"tr",align:"left"},"In the output, alias name of the ID column value that mismatched between two dataframes"),(0,l.yg)("td",{parentName:"tr",align:"left"},"True")))),(0,l.yg)("h3",{id:"example---compare-columns-of-two-dataframes"},"Example - Compare columns of two DataFrames"),(0,l.yg)("div",{class:"wistia_responsive_padding",style:{padding:"56.25% 0 0 0",position:"relative"}},(0,l.yg)("div",{class:"wistia_responsive_wrapper",style:{height:"100%",left:0,position:"absolute",top:0,width:"100%"}},(0,l.yg)("iframe",{src:"https://github.com/SimpleDataLabsInc/prophecy-docs/assets/130362885/23c23ea9-e98b-4624-91a8-597cfaf0e647",title:"Compare columns",allow:"autoplay;fullscreen",allowtransparency:"true",frameborder:"0",scrolling:"no",class:"wistia_embed",name:"wistia_embed",msallowfullscreen:!0,width:"100%",height:"100%"}))),(0,l.yg)("h3",{id:"generated-code"},"Generated code"),(0,l.yg)(r.A,{mdxType:"Tabs"},(0,l.yg)(o.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,l.yg)("pre",null,(0,l.yg)("code",{parentName:"pre",className:"language-py"},'def CompareColumns_1(spark: SparkSession, in0: DataFrame, in1: DataFrame) -> DataFrame:\n joined = exploded1\\\n .join(\n exploded2,\n reduce(\n lambda a, c: a & c,\n [col(f"exploded1.column_name") == col(f"exploded2.column_name"), col(f"exploded1.customer_id") == col(f"exploded2.customer_id")],\n lit(True)\n ),\n "full_outer"\n )\\\n .select(\n coalesce(col(f"exploded1.column_name"), col(f"exploded2.column_name")).alias("column_name"),\n coalesce(col(f"exploded1.customer_id"), col(f"exploded2.customer_id")).alias("customer_id"),\n col(\n f"exploded1.##value##"\n )\\\n .alias(\n "##left_value##"\n ),\n col(\n f"exploded2.##value##"\n )\\\n .alias(\n "##right_value##"\n )\n )\\\n .withColumn(\n "match_count",\n when(\n coalesce(\n (\n col("##left_value##")\n == col(\n "##right_value##"\n )\n ),\n (\n col(\n "##left_value##"\n )\\\n .isNull()\n & col(\n "##right_value##"\n )\\\n .isNull()\n )\n ),\n lit(1)\n )\\\n .otherwise(lit(0))\n )\\\n .withColumn(\n "mismatch_count",\n when(\n coalesce(\n (\n col("##left_value##")\n != col(\n "##right_value##"\n )\n ),\n ~ (\n col(\n "##left_value##"\n )\\\n .isNull()\n & col(\n "##right_value##"\n )\\\n .isNull()\n )\n ),\n lit(1)\n )\\\n .otherwise(lit(0))\n )\n mismatchExamples = joined\\\n .select(\n col("column_name"),\n col("customer_id"),\n lit(0).alias("match_count"),\n lit(0).alias("mismatch_count"),\n col(\n "##left_value##"\n )\\\n .alias("mismatch_example_left"),\n col(\n "##right_value##"\n )\\\n .alias("mismatch_example_right")\n )\\\n .dropDuplicates(["column_name"])\n\n return joined\\\n .union(mismatchExamples)\\\n .groupBy("column_name")\\\n .agg(\n sum("match_count").alias("match_count"),\n sum("mismatch_count").alias("mismatch_count"),\n first(col("mismatch_example_left"), ignorenulls = True).alias("mismatch_example_left"),\n first(col("mismatch_example_right"), ignorenulls = True).alias("mismatch_example_right"),\n first(\n when(coalesce(col("mismatch_example_left"), col("mismatch_example_right")).isNotNull(), col("customer_id"))\\\n .otherwise(lit(None)),\n ignorenulls = True\n )\\\n .alias("mismatch_example_customer_id")\n )\\\n .orderBy(col("mismatch_count").desc(), col("column_name"))\n\n'))),(0,l.yg)(o.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,l.yg)("pre",null,(0,l.yg)("code",{parentName:"pre",className:"language-scala"},'object CompareColumns_1 {\n def apply(context: Context, in0: DataFrame, in1: DataFrame): DataFrame = {\n import org.apache.spark.sql.expressions.Window\n val joined = in0\n .select(\n col("customer_id"),\n explode_outer(\n map(\n (in0.columns.toSet -- List("customer_id").toSet).toSeq.flatMap(c =>\n List(lit(c), col(c).cast("string"))\n ): _*\n )\n ).as(List("column_name", "##value##"))\n )\n .as("exploded1")\n .join(\n in1\n .select(\n col("customer_id"),\n explode_outer(\n map(\n (in0.columns.toSet -- List("customer_id").toSet).toSeq\n .flatMap(c => List(lit(c), col(c).cast("string"))): _*\n )\n ).as(List("column_name", "##value##"))\n )\n .as("exploded2"),\n lit(true)\n .and(col("exploded1.column_name") === col("exploded2.column_name"))\n .and(col("exploded1.customer_id") === col("exploded2.customer_id")),\n "full_outer"\n )\n .select(\n coalesce(col("exploded1.column_name"), col("exploded2.column_name"))\n .as("column_name"),\n coalesce(col("exploded1.customer_id"), col("exploded2.customer_id"))\n .as("customer_id"),\n col("exploded1.##value##").as("##left_value##"),\n col("exploded2.##value##").as("##right_value##")\n )\n .withColumn(\n "match_count",\n when(\n coalesce(col("##left_value##") === col("##right_value##"),\n col("##left_value##").isNull && col("##right_value##").isNull\n ),\n lit(1)\n ).otherwise(lit(0))\n )\n .withColumn(\n "mismatch_count",\n when(coalesce(\n col("##left_value##") =!= col("##right_value##"),\n !(col("##left_value##").isNull && col("##right_value##").isNull)\n ),\n lit(1)\n ).otherwise(lit(0))\n )\n joined\n .groupBy("column_name")\n .agg(\n sum("match_count").as("match_count"),\n sum("mismatch_count").as("mismatch_count"),\n first(col("mismatch_example_left"), ignoreNulls = true)\n .as("mismatch_example_left"),\n first(col("mismatch_example_right"), ignoreNulls = true)\n .as("mismatch_example_right"),\n first(when(coalesce(col("mismatch_example_left"),\n col("mismatch_example_right")\n ).isNotNull,\n col("customer_id")\n ).otherwise(lit(null)),\n ignoreNulls = true\n ).as("mismatch_example_customer_id")\n )\n .orderBy(col("mismatch_count").desc, col("column_name"))\n }\n}\n')))),(0,l.yg)("p",null,"Below are the steps that are performed to compare two DataFrames in compare column Gem:"),(0,l.yg)("ul",null,(0,l.yg)("li",{parentName:"ul"},"Pivot the DataFrame to get the key column's, compare column name and value"),(0,l.yg)("li",{parentName:"ul"},"Join the pivoted DataFrames and compare the column value using key column's"),(0,l.yg)("li",{parentName:"ul"},"Calculate the match and mismatch record counts")),(0,l.yg)("admonition",{type:"note"},(0,l.yg)("p",{parentName:"admonition"},"Repartition the DataFrames as they will be exploded and joined with each other")))}g.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/9a47c610.d2681540.js b/assets/js/9a47c610.d2681540.js deleted file mode 100644 index a68ec1682f..0000000000 --- a/assets/js/9a47c610.d2681540.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[70778],{15680:(e,t,a)=>{a.d(t,{xA:()=>u,yg:()=>g});var n=a(96540);function l(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function r(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,n)}return a}function o(e){for(var t=1;t=0||(l[a]=e[a]);return l}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(l[a]=e[a])}return l}var i=n.createContext({}),c=function(e){var t=n.useContext(i),a=t;return e&&(a="function"==typeof e?e(t):o(o({},t),e)),a},u=function(e){var t=c(e.components);return n.createElement(i.Provider,{value:t},e.children)},m="mdxType",p={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},d=n.forwardRef((function(e,t){var a=e.components,l=e.mdxType,r=e.originalType,i=e.parentName,u=s(e,["components","mdxType","originalType","parentName"]),m=c(a),d=l,g=m["".concat(i,".").concat(d)]||m[d]||p[d]||r;return a?n.createElement(g,o(o({ref:t},u),{},{components:a})):n.createElement(g,o({ref:t},u))}));function g(e,t){var a=arguments,l=t&&t.mdxType;if("string"==typeof e||l){var r=a.length,o=new Array(r);o[0]=d;var s={};for(var i in t)hasOwnProperty.call(t,i)&&(s[i]=t[i]);s.originalType=e,s[m]="string"==typeof e?e:l,o[1]=s;for(var c=2;c{a.d(t,{A:()=>o});var n=a(96540),l=a(20053);const r={tabItem:"tabItem_Ymn6"};function o(e){let{children:t,hidden:a,className:o}=e;return n.createElement("div",{role:"tabpanel",className:(0,l.A)(r.tabItem,o),hidden:a},t)}},11470:(e,t,a)=>{a.d(t,{A:()=>w});var n=a(58168),l=a(96540),r=a(20053),o=a(23104),s=a(56347),i=a(57485),c=a(31682),u=a(89466);function m(e){return function(e){return l.Children.map(e,(e=>{if(!e||(0,l.isValidElement)(e)&&function(e){const{props:t}=e;return!!t&&"object"==typeof t&&"value"in t}(e))return e;throw new Error(`Docusaurus error: Bad child <${"string"==typeof e.type?e.type:e.type.name}>: all children of the component should be , and every should have a unique "value" prop.`)}))?.filter(Boolean)??[]}(e).map((e=>{let{props:{value:t,label:a,attributes:n,default:l}}=e;return{value:t,label:a,attributes:n,default:l}}))}function p(e){const{values:t,children:a}=e;return(0,l.useMemo)((()=>{const e=t??m(a);return function(e){const t=(0,c.X)(e,((e,t)=>e.value===t.value));if(t.length>0)throw new Error(`Docusaurus error: Duplicate values "${t.map((e=>e.value)).join(", ")}" found in . Every value needs to be unique.`)}(e),e}),[t,a])}function d(e){let{value:t,tabValues:a}=e;return a.some((e=>e.value===t))}function g(e){let{queryString:t=!1,groupId:a}=e;const n=(0,s.W6)(),r=function(e){let{queryString:t=!1,groupId:a}=e;if("string"==typeof t)return t;if(!1===t)return null;if(!0===t&&!a)throw new Error('Docusaurus error: The component groupId prop is required if queryString=true, because this value is used as the search param name. You can also provide an explicit value such as queryString="my-search-param".');return a??null}({queryString:t,groupId:a});return[(0,i.aZ)(r),(0,l.useCallback)((e=>{if(!r)return;const t=new URLSearchParams(n.location.search);t.set(r,e),n.replace({...n.location,search:t.toString()})}),[r,n])]}function f(e){const{defaultValue:t,queryString:a=!1,groupId:n}=e,r=p(e),[o,s]=(0,l.useState)((()=>function(e){let{defaultValue:t,tabValues:a}=e;if(0===a.length)throw new Error("Docusaurus error: the component requires at least one children component");if(t){if(!d({value:t,tabValues:a}))throw new Error(`Docusaurus error: The has a defaultValue "${t}" but none of its children has the corresponding value. Available values are: ${a.map((e=>e.value)).join(", ")}. If you intend to show no default tab, use defaultValue={null} instead.`);return t}const n=a.find((e=>e.default))??a[0];if(!n)throw new Error("Unexpected error: 0 tabValues");return n.value}({defaultValue:t,tabValues:r}))),[i,c]=g({queryString:a,groupId:n}),[m,f]=function(e){let{groupId:t}=e;const a=function(e){return e?`docusaurus.tab.${e}`:null}(t),[n,r]=(0,u.Dv)(a);return[n,(0,l.useCallback)((e=>{a&&r.set(e)}),[a,r])]}({groupId:n}),h=(()=>{const e=i??m;return d({value:e,tabValues:r})?e:null})();(0,l.useLayoutEffect)((()=>{h&&s(h)}),[h]);return{selectedValue:o,selectValue:(0,l.useCallback)((e=>{if(!d({value:e,tabValues:r}))throw new Error(`Can't select invalid tab value=${e}`);s(e),c(e),f(e)}),[c,f,r]),tabValues:r}}var h=a(92303);const y={tabList:"tabList__CuJ",tabItem:"tabItem_LNqP"};function b(e){let{className:t,block:a,selectedValue:s,selectValue:i,tabValues:c}=e;const u=[],{blockElementScrollPositionUntilNextRender:m}=(0,o.a_)(),p=e=>{const t=e.currentTarget,a=u.indexOf(t),n=c[a].value;n!==s&&(m(t),i(n))},d=e=>{let t=null;switch(e.key){case"Enter":p(e);break;case"ArrowRight":{const a=u.indexOf(e.currentTarget)+1;t=u[a]??u[0];break}case"ArrowLeft":{const a=u.indexOf(e.currentTarget)-1;t=u[a]??u[u.length-1];break}}t?.focus()};return l.createElement("ul",{role:"tablist","aria-orientation":"horizontal",className:(0,r.A)("tabs",{"tabs--block":a},t)},c.map((e=>{let{value:t,label:a,attributes:o}=e;return l.createElement("li",(0,n.A)({role:"tab",tabIndex:s===t?0:-1,"aria-selected":s===t,key:t,ref:e=>u.push(e),onKeyDown:d,onClick:p},o,{className:(0,r.A)("tabs__item",y.tabItem,o?.className,{"tabs__item--active":s===t})}),a??t)})))}function _(e){let{lazy:t,children:a,selectedValue:n}=e;const r=(Array.isArray(a)?a:[a]).filter(Boolean);if(t){const e=r.find((e=>e.props.value===n));return e?(0,l.cloneElement)(e,{className:"margin-top--md"}):null}return l.createElement("div",{className:"margin-top--md"},r.map(((e,t)=>(0,l.cloneElement)(e,{key:t,hidden:e.props.value!==n}))))}function v(e){const t=f(e);return l.createElement("div",{className:(0,r.A)("tabs-container",y.tabList)},l.createElement(b,(0,n.A)({},e,t)),l.createElement(_,(0,n.A)({},e,t)))}function w(e){const t=(0,h.A)();return l.createElement(v,(0,n.A)({key:String(t)},e))}},11002:(e,t,a)=>{a.r(t),a.d(t,{assets:()=>u,contentTitle:()=>i,default:()=>g,frontMatter:()=>s,metadata:()=>c,toc:()=>m});var n=a(58168),l=(a(96540),a(15680)),r=a(11470),o=a(19365);const s={sidebar_position:4,title:"CompareColumns",id:"compare-columns",description:"Compare columns between two dataframes",tags:["gems","compare","diff","compare-columns"]},i=void 0,c={unversionedId:"Spark/gems/join-split/compare-columns",id:"Spark/gems/join-split/compare-columns",title:"CompareColumns",description:"Compare columns between two dataframes",source:"@site/docs/Spark/gems/join-split/compare-columns.md",sourceDirName:"Spark/gems/join-split",slug:"/Spark/gems/join-split/compare-columns",permalink:"/Spark/gems/join-split/compare-columns",draft:!1,tags:[{label:"gems",permalink:"/tags/gems"},{label:"compare",permalink:"/tags/compare"},{label:"diff",permalink:"/tags/diff"},{label:"compare-columns",permalink:"/tags/compare-columns"}],version:"current",sidebarPosition:4,frontMatter:{sidebar_position:4,title:"CompareColumns",id:"compare-columns",description:"Compare columns between two dataframes",tags:["gems","compare","diff","compare-columns"]},sidebar:"defaultSidebar",previous:{title:"RowDistributor",permalink:"/Spark/gems/join-split/row-distributor"},next:{title:"Custom",permalink:"/Spark/gems/custom/"}},u={},m=[{value:"Parameters",id:"parameters",level:2},{value:"Example - Compare columns of two DataFrames",id:"example---compare-columns-of-two-dataframes",level:3},{value:"Generated code",id:"generated-code",level:3}],p={toc:m},d="wrapper";function g(e){let{components:t,...a}=e;return(0,l.yg)(d,(0,n.A)({},p,a,{components:t,mdxType:"MDXLayout"}),(0,l.yg)("h3",null,(0,l.yg)("span",{class:"badge rounded-pill text-bg-light"},"Spark Gem")),(0,l.yg)("p",null,"The CompareColumns Gem lets you compare columns between two DataFrames based on the key id columns defined."),(0,l.yg)("h2",{id:"parameters"},"Parameters"),(0,l.yg)("table",null,(0,l.yg)("thead",{parentName:"table"},(0,l.yg)("tr",{parentName:"thead"},(0,l.yg)("th",{parentName:"tr",align:"left"},"Parameter"),(0,l.yg)("th",{parentName:"tr",align:"left"},"Description"),(0,l.yg)("th",{parentName:"tr",align:"left"},"Required"))),(0,l.yg)("tbody",{parentName:"table"},(0,l.yg)("tr",{parentName:"tbody"},(0,l.yg)("td",{parentName:"tr",align:"left"},"DataFrame 1"),(0,l.yg)("td",{parentName:"tr",align:"left"},"First input DataFrame"),(0,l.yg)("td",{parentName:"tr",align:"left"},"True")),(0,l.yg)("tr",{parentName:"tbody"},(0,l.yg)("td",{parentName:"tr",align:"left"},"DataFrame 2"),(0,l.yg)("td",{parentName:"tr",align:"left"},"Second input DataFrame"),(0,l.yg)("td",{parentName:"tr",align:"left"},"True")),(0,l.yg)("tr",{parentName:"tbody"},(0,l.yg)("td",{parentName:"tr",align:"left"},"ID columns to retain(Select Id Columns)"),(0,l.yg)("td",{parentName:"tr",align:"left"},"List of columns that are used joining two dataframes"),(0,l.yg)("td",{parentName:"tr",align:"left"},"True")),(0,l.yg)("tr",{parentName:"tbody"},(0,l.yg)("td",{parentName:"tr",align:"left"},"Output Column Name(Select Output Columns)"),(0,l.yg)("td",{parentName:"tr",align:"left"},"In the output, alias name of the column name that was compared among dataframes"),(0,l.yg)("td",{parentName:"tr",align:"left"},"True")),(0,l.yg)("tr",{parentName:"tbody"},(0,l.yg)("td",{parentName:"tr",align:"left"},"Match Count Column Name(Select Output Columns)"),(0,l.yg)("td",{parentName:"tr",align:"left"},"In the output, alias name of the column that shows the count of rows that matched between two dataframes"),(0,l.yg)("td",{parentName:"tr",align:"left"},"True")),(0,l.yg)("tr",{parentName:"tbody"},(0,l.yg)("td",{parentName:"tr",align:"left"},"Mismatch Count Column Name(Select Output Columns)"),(0,l.yg)("td",{parentName:"tr",align:"left"},"In the output, alias name of the column that shows the count of rows that mismatched between two dataframes"),(0,l.yg)("td",{parentName:"tr",align:"left"},"True")),(0,l.yg)("tr",{parentName:"tbody"},(0,l.yg)("td",{parentName:"tr",align:"left"},"Mismatch Example Left Column Name(Select Output Columns)"),(0,l.yg)("td",{parentName:"tr",align:"left"},"In the output, alias name of the column displaying an incorrect left column value"),(0,l.yg)("td",{parentName:"tr",align:"left"},"True")),(0,l.yg)("tr",{parentName:"tbody"},(0,l.yg)("td",{parentName:"tr",align:"left"},"Mismatch Example Right Column Name(Select Output Columns)"),(0,l.yg)("td",{parentName:"tr",align:"left"},"In the output, alias name of the column displaying an incorrect right column value"),(0,l.yg)("td",{parentName:"tr",align:"left"},"True")),(0,l.yg)("tr",{parentName:"tbody"},(0,l.yg)("td",{parentName:"tr",align:"left"},"Mismatch Example ID Column Prefix(Select Output Columns)"),(0,l.yg)("td",{parentName:"tr",align:"left"},"In the output, alias name of the ID column value that mismatched between two dataframes"),(0,l.yg)("td",{parentName:"tr",align:"left"},"True")))),(0,l.yg)("h3",{id:"example---compare-columns-of-two-dataframes"},"Example - Compare columns of two DataFrames"),(0,l.yg)("div",{class:"wistia_responsive_padding",style:{padding:"56.25% 0 0 0",position:"relative"}},(0,l.yg)("div",{class:"wistia_responsive_wrapper",style:{height:"100%",left:0,position:"absolute",top:0,width:"100%"}},(0,l.yg)("iframe",{src:"https://github.com/SimpleDataLabsInc/prophecy-docs/assets/130362885/23c23ea9-e98b-4624-91a8-597cfaf0e647",title:"Compare columns",allow:"autoplay;fullscreen",allowtransparency:"true",frameborder:"0",scrolling:"no",class:"wistia_embed",name:"wistia_embed",msallowfullscreen:!0,width:"100%",height:"100%"}))),(0,l.yg)("h3",{id:"generated-code"},"Generated code"),(0,l.yg)(r.A,{mdxType:"Tabs"},(0,l.yg)(o.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,l.yg)("pre",null,(0,l.yg)("code",{parentName:"pre",className:"language-py"},'def CompareColumns_1(spark: SparkSession, in0: DataFrame, in1: DataFrame) -> DataFrame:\n joined = exploded1\\\n .join(\n exploded2,\n reduce(\n lambda a, c: a & c,\n [col(f"exploded1.column_name") == col(f"exploded2.column_name"), col(f"exploded1.customer_id") == col(f"exploded2.customer_id")],\n lit(True)\n ),\n "full_outer"\n )\\\n .select(\n coalesce(col(f"exploded1.column_name"), col(f"exploded2.column_name")).alias("column_name"),\n coalesce(col(f"exploded1.customer_id"), col(f"exploded2.customer_id")).alias("customer_id"),\n col(\n f"exploded1.##value##"\n )\\\n .alias(\n "##left_value##"\n ),\n col(\n f"exploded2.##value##"\n )\\\n .alias(\n "##right_value##"\n )\n )\\\n .withColumn(\n "match_count",\n when(\n coalesce(\n (\n col("##left_value##")\n == col(\n "##right_value##"\n )\n ),\n (\n col(\n "##left_value##"\n )\\\n .isNull()\n & col(\n "##right_value##"\n )\\\n .isNull()\n )\n ),\n lit(1)\n )\\\n .otherwise(lit(0))\n )\\\n .withColumn(\n "mismatch_count",\n when(\n coalesce(\n (\n col("##left_value##")\n != col(\n "##right_value##"\n )\n ),\n ~ (\n col(\n "##left_value##"\n )\\\n .isNull()\n & col(\n "##right_value##"\n )\\\n .isNull()\n )\n ),\n lit(1)\n )\\\n .otherwise(lit(0))\n )\n mismatchExamples = joined\\\n .select(\n col("column_name"),\n col("customer_id"),\n lit(0).alias("match_count"),\n lit(0).alias("mismatch_count"),\n col(\n "##left_value##"\n )\\\n .alias("mismatch_example_left"),\n col(\n "##right_value##"\n )\\\n .alias("mismatch_example_right")\n )\\\n .dropDuplicates(["column_name"])\n\n return joined\\\n .union(mismatchExamples)\\\n .groupBy("column_name")\\\n .agg(\n sum("match_count").alias("match_count"),\n sum("mismatch_count").alias("mismatch_count"),\n first(col("mismatch_example_left"), ignorenulls = True).alias("mismatch_example_left"),\n first(col("mismatch_example_right"), ignorenulls = True).alias("mismatch_example_right"),\n first(\n when(coalesce(col("mismatch_example_left"), col("mismatch_example_right")).isNotNull(), col("customer_id"))\\\n .otherwise(lit(None)),\n ignorenulls = True\n )\\\n .alias("mismatch_example_customer_id")\n )\\\n .orderBy(col("mismatch_count").desc(), col("column_name"))\n\n'))),(0,l.yg)(o.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,l.yg)("pre",null,(0,l.yg)("code",{parentName:"pre",className:"language-scala"},'object CompareColumns_1 {\n def apply(context: Context, in0: DataFrame, in1: DataFrame): DataFrame = {\n import org.apache.spark.sql.expressions.Window\n val joined = in0\n .select(\n col("customer_id"),\n explode_outer(\n map(\n (in0.columns.toSet -- List("customer_id").toSet).toSeq.flatMap(c =>\n List(lit(c), col(c).cast("string"))\n ): _*\n )\n ).as(List("column_name", "##value##"))\n )\n .as("exploded1")\n .join(\n in1\n .select(\n col("customer_id"),\n explode_outer(\n map(\n (in0.columns.toSet -- List("customer_id").toSet).toSeq\n .flatMap(c => List(lit(c), col(c).cast("string"))): _*\n )\n ).as(List("column_name", "##value##"))\n )\n .as("exploded2"),\n lit(true)\n .and(col("exploded1.column_name") === col("exploded2.column_name"))\n .and(col("exploded1.customer_id") === col("exploded2.customer_id")),\n "full_outer"\n )\n .select(\n coalesce(col("exploded1.column_name"), col("exploded2.column_name"))\n .as("column_name"),\n coalesce(col("exploded1.customer_id"), col("exploded2.customer_id"))\n .as("customer_id"),\n col("exploded1.##value##").as("##left_value##"),\n col("exploded2.##value##").as("##right_value##")\n )\n .withColumn(\n "match_count",\n when(\n coalesce(col("##left_value##") === col("##right_value##"),\n col("##left_value##").isNull && col("##right_value##").isNull\n ),\n lit(1)\n ).otherwise(lit(0))\n )\n .withColumn(\n "mismatch_count",\n when(coalesce(\n col("##left_value##") =!= col("##right_value##"),\n !(col("##left_value##").isNull && col("##right_value##").isNull)\n ),\n lit(1)\n ).otherwise(lit(0))\n )\n joined\n .groupBy("column_name")\n .agg(\n sum("match_count").as("match_count"),\n sum("mismatch_count").as("mismatch_count"),\n first(col("mismatch_example_left"), ignoreNulls = true)\n .as("mismatch_example_left"),\n first(col("mismatch_example_right"), ignoreNulls = true)\n .as("mismatch_example_right"),\n first(when(coalesce(col("mismatch_example_left"),\n col("mismatch_example_right")\n ).isNotNull,\n col("customer_id")\n ).otherwise(lit(null)),\n ignoreNulls = true\n ).as("mismatch_example_customer_id")\n )\n .orderBy(col("mismatch_count").desc, col("column_name"))\n }\n}\n')))),(0,l.yg)("p",null,"Below are the steps that are performed to compare two DataFrames in compare column Gem:"),(0,l.yg)("ul",null,(0,l.yg)("li",{parentName:"ul"},"Pivot the DataFrame to get the key column's, compare column name and value"),(0,l.yg)("li",{parentName:"ul"},"Join the pivoted DataFrames and compare the column value using key column's"),(0,l.yg)("li",{parentName:"ul"},"Calculate the match and mismatch record counts")),(0,l.yg)("admonition",{type:"note"},(0,l.yg)("p",{parentName:"admonition"},"Repartition the DataFrames as they will be exploded and joined with each other")))}g.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/a135f75b.2c24fc77.js b/assets/js/a135f75b.2c24fc77.js deleted file mode 100644 index 485d68d498..0000000000 --- a/assets/js/a135f75b.2c24fc77.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[83781],{15680:(e,t,a)=>{a.d(t,{xA:()=>p,yg:()=>c});var n=a(96540);function r(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function o(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,n)}return a}function i(e){for(var t=1;t=0||(r[a]=e[a]);return r}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(r[a]=e[a])}return r}var l=n.createContext({}),g=function(e){var t=n.useContext(l),a=t;return e&&(a="function"==typeof e?e(t):i(i({},t),e)),a},p=function(e){var t=g(e.components);return n.createElement(l.Provider,{value:t},e.children)},m="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},u=n.forwardRef((function(e,t){var a=e.components,r=e.mdxType,o=e.originalType,l=e.parentName,p=s(e,["components","mdxType","originalType","parentName"]),m=g(a),u=r,c=m["".concat(l,".").concat(u)]||m[u]||d[u]||o;return a?n.createElement(c,i(i({ref:t},p),{},{components:a})):n.createElement(c,i({ref:t},p))}));function c(e,t){var a=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var o=a.length,i=new Array(o);i[0]=u;var s={};for(var l in t)hasOwnProperty.call(t,l)&&(s[l]=t[l]);s.originalType=e,s[m]="string"==typeof e?e:r,i[1]=s;for(var g=2;g{a.r(t),a.d(t,{assets:()=>l,contentTitle:()=>i,default:()=>d,frontMatter:()=>o,metadata:()=>s,toc:()=>g});var n=a(58168),r=(a(96540),a(15680));const o={title:"Aggregate",id:"sql-aggregate",description:"Understand how to use transformation gems, use expressions, and use variables",sidebar_position:2,tags:["aggregate","groupby","data","gem","transformation"]},i=void 0,s={unversionedId:"SQL/gems/transform/sql-aggregate",id:"SQL/gems/transform/sql-aggregate",title:"Aggregate",description:"Understand how to use transformation gems, use expressions, and use variables",source:"@site/docs/SQL/gems/transform/aggregate.md",sourceDirName:"SQL/gems/transform",slug:"/SQL/gems/transform/sql-aggregate",permalink:"/SQL/gems/transform/sql-aggregate",draft:!1,tags:[{label:"aggregate",permalink:"/tags/aggregate"},{label:"groupby",permalink:"/tags/groupby"},{label:"data",permalink:"/tags/data"},{label:"gem",permalink:"/tags/gem"},{label:"transformation",permalink:"/tags/transformation"}],version:"current",sidebarPosition:2,frontMatter:{title:"Aggregate",id:"sql-aggregate",description:"Understand how to use transformation gems, use expressions, and use variables",sidebar_position:2,tags:["aggregate","groupby","data","gem","transformation"]},sidebar:"defaultSidebar",previous:{title:"Transform",permalink:"/SQL/gems/transform/"},next:{title:"Deduplicate",permalink:"/SQL/gems/transform/deduplicate"}},l={},g=[{value:"Using the Gem",id:"using-the-gem",level:2},{value:"Using Expressions",id:"using-expressions",level:2},{value:"GroupBy expression",id:"groupby-expression",level:3},{value:"Aggregate expressions",id:"aggregate-expressions",level:3},{value:"Using Variables",id:"using-variables",level:2}],p={toc:g},m="wrapper";function d(e){let{components:t,...o}=e;return(0,r.yg)(m,(0,n.A)({},p,o,{components:t,mdxType:"MDXLayout"}),(0,r.yg)("h3",null,(0,r.yg)("span",{class:"badge rounded-pill text-bg-light"},"SQL Gem")),(0,r.yg)("p",null,"Together let's deconstruct a commonly used Transformation, the Aggregate Gem. Follow along in the ",(0,r.yg)("inlineCode",{parentName:"p"},"HelloWorld_SQL")," Project."),(0,r.yg)("h2",{id:"using-the-gem"},"Using the Gem"),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"1",src:a(92310).A,width:"2880",height:"1084"})),(0,r.yg)("ol",null,(0,r.yg)("li",{parentName:"ol"},(0,r.yg)("strong",{parentName:"li"},"Open")," the HelloWorld_SQL ",(0,r.yg)("a",{parentName:"li",href:"https://app.prophecy.io/metadata"},"Project"),"."),(0,r.yg)("li",{parentName:"ol"},"From the list of Models, select the ",(0,r.yg)("strong",{parentName:"li"},"Orders")," Model. A Model is a series of transformation steps (Gems) that describe how to create a single table or view. The Orders Model defines the steps to create the Orders table."),(0,r.yg)("li",{parentName:"ol"},"Open the ",(0,r.yg)("strong",{parentName:"li"},"Transformation")," dropdown to see the available Transformation Gems. The Aggregate Gem has already been dragged to the canvas and configured in this HelloWorld_SQL example."),(0,r.yg)("li",{parentName:"ol"},"Click the arrow to ",(0,r.yg)("strong",{parentName:"li"},"Run upto the Aggregate Gem"),"."),(0,r.yg)("li",{parentName:"ol"},"Preview a ",(0,r.yg)("strong",{parentName:"li"},"data sample")," before the Aggregate Gem."),(0,r.yg)("li",{parentName:"ol"},"This is the ",(0,r.yg)("inlineCode",{parentName:"li"},"payments")," data sample before the Aggregate Gem. There is one row per payment."),(0,r.yg)("li",{parentName:"ol"},"Click to ",(0,r.yg)("strong",{parentName:"li"},"Run upto the Join Gem"),"."),(0,r.yg)("li",{parentName:"ol"},"Preview a ",(0,r.yg)("strong",{parentName:"li"},"data sample")," after the Aggregate Gem."),(0,r.yg)("li",{parentName:"ol"},"This is the ",(0,r.yg)("inlineCode",{parentName:"li"},"order_payments")," data sample after the Aggregate Gem. The individual rows of ",(0,r.yg)("inlineCode",{parentName:"li"},"payments")," have been grouped according to ",(0,r.yg)("inlineCode",{parentName:"li"},"ORDER_ID"),", and the amounts have been summed according to the payment type."),(0,r.yg)("li",{parentName:"ol"},"Let's see how to use expressions. Click to ",(0,r.yg)("strong",{parentName:"li"},"Open"),".")),(0,r.yg)("h2",{id:"using-expressions"},"Using Expressions"),(0,r.yg)("h3",{id:"groupby-expression"},"GroupBy expression"),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"2",src:a(141).A,width:"2880",height:"1958"})),(0,r.yg)("ol",null,(0,r.yg)("li",{parentName:"ol"},"There is one ",(0,r.yg)("strong",{parentName:"li"},"Input")," Dataset, ",(0,r.yg)("inlineCode",{parentName:"li"},"payments"),", and we can see the columns and datatypes below."),(0,r.yg)("li",{parentName:"ol"},"Open the ",(0,r.yg)("strong",{parentName:"li"},"GroupBy")," tab."),(0,r.yg)("li",{parentName:"ol"},"We can see the Gem is configured to group according to the ",(0,r.yg)("inlineCode",{parentName:"li"},"order_id")," column. Just click any column name listed in ",(0,r.yg)("strong",{parentName:"li"},"(1) Input")," to add a column to the GroupBy expressions."),(0,r.yg)("li",{parentName:"ol"},"Syntax ",(0,r.yg)("strong",{parentName:"li"},"errors")," are surfaced here as you're designing your Gem (and Model) on the canvas. That's handy so you don't have to run a Job to discover a typo."),(0,r.yg)("li",{parentName:"ol"},"The ",(0,r.yg)("strong",{parentName:"li"},"Run")," button is available here to test and view data samples. This way you can make sure your Aggregate Gem is configured as desired.")),(0,r.yg)("h3",{id:"aggregate-expressions"},"Aggregate expressions"),(0,r.yg)("p",null,"Next we'll walk through the Aggregate tab, where we have a lot more bells and knobs to turn.\n",(0,r.yg)("img",{alt:"3",src:a(89024).A,width:"2880",height:"1958"})),(0,r.yg)("ol",null,(0,r.yg)("li",{parentName:"ol"},"Click the ",(0,r.yg)("strong",{parentName:"li"},"Aggregate")," tab, where we'll define our new column names and data manipulation expressions."),(0,r.yg)("li",{parentName:"ol"},"The list of ",(0,r.yg)("strong",{parentName:"li"},"Expressions")," describe how to manipulate a particular column from the input Dataset."),(0,r.yg)("li",{parentName:"ol"},"The list of ",(0,r.yg)("strong",{parentName:"li"},"Target Columns")," is the list of column names to be manipulated by the Aggregate Gem and included in the Gem's output."),(0,r.yg)("li",{parentName:"ol"},"Click ",(0,r.yg)("strong",{parentName:"li"},"Output")," to see the schema of the Dataset resulting from the Aggregate Gem."),(0,r.yg)("li",{parentName:"ol"},"The ",(0,r.yg)("inlineCode",{parentName:"li"},"order_id")," column is getting passed through the Aggregate step without being changed. Recall this is the column that will be used to group the data. To add any column, just click the column name from the Input list, or start typing the column name and Prophecy Copilot will provide suggestions."),(0,r.yg)("li",{parentName:"ol"},"Since the ",(0,r.yg)("inlineCode",{parentName:"li"},"order_id")," column was ",(0,r.yg)("strong",{parentName:"li"},"(5)selected,")," then this column appears in the output Dataset. It has a number datatype."),(0,r.yg)("li",{parentName:"ol"},"Here is an ",(0,r.yg)("strong",{parentName:"li"},"expression")," that includes some data manipulation logic. The amount is summed according to the payment method. ",(0,r.yg)("inlineCode",{parentName:"li"},"payment_method")," is being passed as a configurable variable surrounded by curly braces ",(0,r.yg)("inlineCode",{parentName:"li"},"{{ }}"),". We'll see how to configure the variables ",(0,r.yg)("inlineCode",{parentName:"li"},"credit_card"),", ",(0,r.yg)("inlineCode",{parentName:"li"},"coupon"),", ",(0,r.yg)("inlineCode",{parentName:"li"},"bank_transfer"),", ",(0,r.yg)("inlineCode",{parentName:"li"},"gift_card")," in the next section."),(0,r.yg)("li",{parentName:"ol"},"These are the ",(0,r.yg)("strong",{parentName:"li"},"output columns")," according to the ",(0,r.yg)("strong",{parentName:"li"},"(3)Target Column.")," ",(0,r.yg)("inlineCode",{parentName:"li"},"{{ payment_method }}")," is a configurable parameter, and each of the payment methods (e.g. GIFT_CARD) has been appended with the string ",(0,r.yg)("inlineCode",{parentName:"li"},"amount"),". Now we are starting to see how the data sample output from the Aggregate Gem will be constructed."),(0,r.yg)("li",{parentName:"ol"},"Let's ",(0,r.yg)("strong",{parentName:"li"},"AskAI"),' to help write a new expression. Type "Calculate customer size based on the amount purchased." Copilot AI generates a SQL expression and we can keep or reject the suggestion.'),(0,r.yg)("li",{parentName:"ol"},"The new expression will be reflected in the Aggregate Gem output, ",(0,r.yg)("inlineCode",{parentName:"li"},"CUSTOMER_SIZE")," column.")),(0,r.yg)("h2",{id:"using-variables"},"Using Variables"),(0,r.yg)("p",null,"Now let's see how to configure the ",(0,r.yg)("inlineCode",{parentName:"p"},"payment_methods")," variable."),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"4",src:a(56367).A,width:"2880",height:"1084"})),(0,r.yg)("ol",null,(0,r.yg)("li",{parentName:"ol"},"Click ",(0,r.yg)("strong",{parentName:"li"},"Config")," to open the configuration screen."),(0,r.yg)("li",{parentName:"ol"},"We see the option to apply a configuration at several different ",(0,r.yg)("strong",{parentName:"li"},"levels:")," apply to the entire Model, all the Models in the GitHub folder, or all the Models in the Project. Here we can see there are Configurations that apply to this particular ",(0,r.yg)("inlineCode",{parentName:"li"},"Orders")," Model."),(0,r.yg)("li",{parentName:"ol"},"See the list of ",(0,r.yg)("a",{parentName:"li",href:"https://docs.getdbt.com/reference/configs-and-properties"},"DBT Defined Configs"),'. These are configs every user could employ with their DBT Projects, such as whether to materialize the model as table, view, ephemeral, or incremental. Click the dropdown to select the config of interest, then enter the appropriate value. Hover over the "i" icon for a short description of each DBT Config.'),(0,r.yg)("li",{parentName:"ol"},"See the list of user-defined ",(0,r.yg)("strong",{parentName:"li"},"Variables"),". In our HelloWorld_SQL project, the ",(0,r.yg)("inlineCode",{parentName:"li"},"payment_methods")," variable has been defined with the four ",(0,r.yg)("strong",{parentName:"li"},"values")," shown."),(0,r.yg)("li",{parentName:"ol"},"Click ",(0,r.yg)("strong",{parentName:"li"},"Save")," after editing the Config for the Model, Folder, or Project.")),(0,r.yg)("p",null,"Click the code view to see the Config encoded in the ",(0,r.yg)("inlineCode",{parentName:"p"},"dbt_project.yml")," file or the ",(0,r.yg)("inlineCode",{parentName:"p"},"schema.yml/properties.yml")," file. Further information can be found in DBT documentation, as Prophecy's Model Config is based on DBT's ",(0,r.yg)("a",{parentName:"p",href:"https://docs.getdbt.com/reference/model-configs"},"Model Configurations"),"."),(0,r.yg)("p",null,"Using Config variables (and DBT Defined Configs) within a Gem is easy. Just wrap the variable name (e.g. ",(0,r.yg)("inlineCode",{parentName:"p"},"payment_method"),") in curly braces ",(0,r.yg)("inlineCode",{parentName:"p"},"{{ }}")," like this: ",(0,r.yg)("inlineCode",{parentName:"p"},"{{ payment_method }}"),"."),(0,r.yg)("admonition",{type:"info"},(0,r.yg)("p",{parentName:"admonition"},"To learn more about the Aggregate Gem UI, see ",(0,r.yg)("a",{parentName:"p",href:"/concepts/project/gems"},"this page")," which illustrates features common to all ",(0,r.yg)("a",{parentName:"p",href:"/SQL/gems/"},"Gems"),".")),(0,r.yg)("p",null,"Here we used the Aggregate Gem from the HelloWorld_SQL Project as a learning guide. What types of Aggregations will you build? ",(0,r.yg)("a",{parentName:"p",href:"/getting-started/getting-help"},"Reach out")," with questions and to let us know how you're using Prophecy."))}d.isMDXComponent=!0},92310:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/Snow4.6.1_Aggregate-8acc945fe1ed46a97b9624d9d586f46f.png"},141:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/Snow4.6.2_Aggregate-d8437ffd92ca9d25cdf3d0d28560f2aa.png"},89024:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/Snow4.6.3_Aggregate-3d1d43a68de60bfd3b4e60124fe100a7.png"},56367:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/Snow4.6.4_Aggregate-46e41f2a516d25465ced483161c528c9.png"}}]); \ No newline at end of file diff --git a/assets/js/a135f75b.54f84a18.js b/assets/js/a135f75b.54f84a18.js new file mode 100644 index 0000000000..8390d350c7 --- /dev/null +++ b/assets/js/a135f75b.54f84a18.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[83781],{15680:(e,t,a)=>{a.d(t,{xA:()=>p,yg:()=>c});var n=a(96540);function r(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function o(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,n)}return a}function i(e){for(var t=1;t=0||(r[a]=e[a]);return r}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(r[a]=e[a])}return r}var l=n.createContext({}),g=function(e){var t=n.useContext(l),a=t;return e&&(a="function"==typeof e?e(t):i(i({},t),e)),a},p=function(e){var t=g(e.components);return n.createElement(l.Provider,{value:t},e.children)},m="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},u=n.forwardRef((function(e,t){var a=e.components,r=e.mdxType,o=e.originalType,l=e.parentName,p=s(e,["components","mdxType","originalType","parentName"]),m=g(a),u=r,c=m["".concat(l,".").concat(u)]||m[u]||d[u]||o;return a?n.createElement(c,i(i({ref:t},p),{},{components:a})):n.createElement(c,i({ref:t},p))}));function c(e,t){var a=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var o=a.length,i=new Array(o);i[0]=u;var s={};for(var l in t)hasOwnProperty.call(t,l)&&(s[l]=t[l]);s.originalType=e,s[m]="string"==typeof e?e:r,i[1]=s;for(var g=2;g{a.r(t),a.d(t,{assets:()=>l,contentTitle:()=>i,default:()=>d,frontMatter:()=>o,metadata:()=>s,toc:()=>g});var n=a(58168),r=(a(96540),a(15680));const o={title:"Aggregate",id:"sql-aggregate",description:"Understand how to use transformation gems, use expressions, and use variables",sidebar_position:2,tags:["aggregate","groupby","data","gem","transformation"]},i=void 0,s={unversionedId:"SQL/gems/transform/sql-aggregate",id:"SQL/gems/transform/sql-aggregate",title:"Aggregate",description:"Understand how to use transformation gems, use expressions, and use variables",source:"@site/docs/SQL/gems/transform/aggregate.md",sourceDirName:"SQL/gems/transform",slug:"/SQL/gems/transform/sql-aggregate",permalink:"/SQL/gems/transform/sql-aggregate",draft:!1,tags:[{label:"aggregate",permalink:"/tags/aggregate"},{label:"groupby",permalink:"/tags/groupby"},{label:"data",permalink:"/tags/data"},{label:"gem",permalink:"/tags/gem"},{label:"transformation",permalink:"/tags/transformation"}],version:"current",sidebarPosition:2,frontMatter:{title:"Aggregate",id:"sql-aggregate",description:"Understand how to use transformation gems, use expressions, and use variables",sidebar_position:2,tags:["aggregate","groupby","data","gem","transformation"]},sidebar:"defaultSidebar",previous:{title:"Transform",permalink:"/SQL/gems/transform/"},next:{title:"Deduplicate",permalink:"/SQL/gems/transform/deduplicate"}},l={},g=[{value:"Using the Gem",id:"using-the-gem",level:2},{value:"Using Expressions",id:"using-expressions",level:2},{value:"GroupBy expression",id:"groupby-expression",level:3},{value:"Aggregate expressions",id:"aggregate-expressions",level:3},{value:"Using Variables",id:"using-variables",level:2}],p={toc:g},m="wrapper";function d(e){let{components:t,...o}=e;return(0,r.yg)(m,(0,n.A)({},p,o,{components:t,mdxType:"MDXLayout"}),(0,r.yg)("h3",null,(0,r.yg)("span",{class:"badge"},"SQL Gem")),(0,r.yg)("p",null,"Together let's deconstruct a commonly used Transformation, the Aggregate Gem. Follow along in the ",(0,r.yg)("inlineCode",{parentName:"p"},"HelloWorld_SQL")," Project."),(0,r.yg)("h2",{id:"using-the-gem"},"Using the Gem"),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"1",src:a(92310).A,width:"2880",height:"1084"})),(0,r.yg)("ol",null,(0,r.yg)("li",{parentName:"ol"},(0,r.yg)("strong",{parentName:"li"},"Open")," the HelloWorld_SQL ",(0,r.yg)("a",{parentName:"li",href:"https://app.prophecy.io/metadata"},"Project"),"."),(0,r.yg)("li",{parentName:"ol"},"From the list of Models, select the ",(0,r.yg)("strong",{parentName:"li"},"Orders")," Model. A Model is a series of transformation steps (Gems) that describe how to create a single table or view. The Orders Model defines the steps to create the Orders table."),(0,r.yg)("li",{parentName:"ol"},"Open the ",(0,r.yg)("strong",{parentName:"li"},"Transformation")," dropdown to see the available Transformation Gems. The Aggregate Gem has already been dragged to the canvas and configured in this HelloWorld_SQL example."),(0,r.yg)("li",{parentName:"ol"},"Click the arrow to ",(0,r.yg)("strong",{parentName:"li"},"Run upto the Aggregate Gem"),"."),(0,r.yg)("li",{parentName:"ol"},"Preview a ",(0,r.yg)("strong",{parentName:"li"},"data sample")," before the Aggregate Gem."),(0,r.yg)("li",{parentName:"ol"},"This is the ",(0,r.yg)("inlineCode",{parentName:"li"},"payments")," data sample before the Aggregate Gem. There is one row per payment."),(0,r.yg)("li",{parentName:"ol"},"Click to ",(0,r.yg)("strong",{parentName:"li"},"Run upto the Join Gem"),"."),(0,r.yg)("li",{parentName:"ol"},"Preview a ",(0,r.yg)("strong",{parentName:"li"},"data sample")," after the Aggregate Gem."),(0,r.yg)("li",{parentName:"ol"},"This is the ",(0,r.yg)("inlineCode",{parentName:"li"},"order_payments")," data sample after the Aggregate Gem. The individual rows of ",(0,r.yg)("inlineCode",{parentName:"li"},"payments")," have been grouped according to ",(0,r.yg)("inlineCode",{parentName:"li"},"ORDER_ID"),", and the amounts have been summed according to the payment type."),(0,r.yg)("li",{parentName:"ol"},"Let's see how to use expressions. Click to ",(0,r.yg)("strong",{parentName:"li"},"Open"),".")),(0,r.yg)("h2",{id:"using-expressions"},"Using Expressions"),(0,r.yg)("h3",{id:"groupby-expression"},"GroupBy expression"),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"2",src:a(141).A,width:"2880",height:"1958"})),(0,r.yg)("ol",null,(0,r.yg)("li",{parentName:"ol"},"There is one ",(0,r.yg)("strong",{parentName:"li"},"Input")," Dataset, ",(0,r.yg)("inlineCode",{parentName:"li"},"payments"),", and we can see the columns and datatypes below."),(0,r.yg)("li",{parentName:"ol"},"Open the ",(0,r.yg)("strong",{parentName:"li"},"GroupBy")," tab."),(0,r.yg)("li",{parentName:"ol"},"We can see the Gem is configured to group according to the ",(0,r.yg)("inlineCode",{parentName:"li"},"order_id")," column. Just click any column name listed in ",(0,r.yg)("strong",{parentName:"li"},"(1) Input")," to add a column to the GroupBy expressions."),(0,r.yg)("li",{parentName:"ol"},"Syntax ",(0,r.yg)("strong",{parentName:"li"},"errors")," are surfaced here as you're designing your Gem (and Model) on the canvas. That's handy so you don't have to run a Job to discover a typo."),(0,r.yg)("li",{parentName:"ol"},"The ",(0,r.yg)("strong",{parentName:"li"},"Run")," button is available here to test and view data samples. This way you can make sure your Aggregate Gem is configured as desired.")),(0,r.yg)("h3",{id:"aggregate-expressions"},"Aggregate expressions"),(0,r.yg)("p",null,"Next we'll walk through the Aggregate tab, where we have a lot more bells and knobs to turn.\n",(0,r.yg)("img",{alt:"3",src:a(89024).A,width:"2880",height:"1958"})),(0,r.yg)("ol",null,(0,r.yg)("li",{parentName:"ol"},"Click the ",(0,r.yg)("strong",{parentName:"li"},"Aggregate")," tab, where we'll define our new column names and data manipulation expressions."),(0,r.yg)("li",{parentName:"ol"},"The list of ",(0,r.yg)("strong",{parentName:"li"},"Expressions")," describe how to manipulate a particular column from the input Dataset."),(0,r.yg)("li",{parentName:"ol"},"The list of ",(0,r.yg)("strong",{parentName:"li"},"Target Columns")," is the list of column names to be manipulated by the Aggregate Gem and included in the Gem's output."),(0,r.yg)("li",{parentName:"ol"},"Click ",(0,r.yg)("strong",{parentName:"li"},"Output")," to see the schema of the Dataset resulting from the Aggregate Gem."),(0,r.yg)("li",{parentName:"ol"},"The ",(0,r.yg)("inlineCode",{parentName:"li"},"order_id")," column is getting passed through the Aggregate step without being changed. Recall this is the column that will be used to group the data. To add any column, just click the column name from the Input list, or start typing the column name and Prophecy Copilot will provide suggestions."),(0,r.yg)("li",{parentName:"ol"},"Since the ",(0,r.yg)("inlineCode",{parentName:"li"},"order_id")," column was ",(0,r.yg)("strong",{parentName:"li"},"(5)selected,")," then this column appears in the output Dataset. It has a number datatype."),(0,r.yg)("li",{parentName:"ol"},"Here is an ",(0,r.yg)("strong",{parentName:"li"},"expression")," that includes some data manipulation logic. The amount is summed according to the payment method. ",(0,r.yg)("inlineCode",{parentName:"li"},"payment_method")," is being passed as a configurable variable surrounded by curly braces ",(0,r.yg)("inlineCode",{parentName:"li"},"{{ }}"),". We'll see how to configure the variables ",(0,r.yg)("inlineCode",{parentName:"li"},"credit_card"),", ",(0,r.yg)("inlineCode",{parentName:"li"},"coupon"),", ",(0,r.yg)("inlineCode",{parentName:"li"},"bank_transfer"),", ",(0,r.yg)("inlineCode",{parentName:"li"},"gift_card")," in the next section."),(0,r.yg)("li",{parentName:"ol"},"These are the ",(0,r.yg)("strong",{parentName:"li"},"output columns")," according to the ",(0,r.yg)("strong",{parentName:"li"},"(3)Target Column.")," ",(0,r.yg)("inlineCode",{parentName:"li"},"{{ payment_method }}")," is a configurable parameter, and each of the payment methods (e.g. GIFT_CARD) has been appended with the string ",(0,r.yg)("inlineCode",{parentName:"li"},"amount"),". Now we are starting to see how the data sample output from the Aggregate Gem will be constructed."),(0,r.yg)("li",{parentName:"ol"},"Let's ",(0,r.yg)("strong",{parentName:"li"},"AskAI"),' to help write a new expression. Type "Calculate customer size based on the amount purchased." Copilot AI generates a SQL expression and we can keep or reject the suggestion.'),(0,r.yg)("li",{parentName:"ol"},"The new expression will be reflected in the Aggregate Gem output, ",(0,r.yg)("inlineCode",{parentName:"li"},"CUSTOMER_SIZE")," column.")),(0,r.yg)("h2",{id:"using-variables"},"Using Variables"),(0,r.yg)("p",null,"Now let's see how to configure the ",(0,r.yg)("inlineCode",{parentName:"p"},"payment_methods")," variable."),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"4",src:a(56367).A,width:"2880",height:"1084"})),(0,r.yg)("ol",null,(0,r.yg)("li",{parentName:"ol"},"Click ",(0,r.yg)("strong",{parentName:"li"},"Config")," to open the configuration screen."),(0,r.yg)("li",{parentName:"ol"},"We see the option to apply a configuration at several different ",(0,r.yg)("strong",{parentName:"li"},"levels:")," apply to the entire Model, all the Models in the GitHub folder, or all the Models in the Project. Here we can see there are Configurations that apply to this particular ",(0,r.yg)("inlineCode",{parentName:"li"},"Orders")," Model."),(0,r.yg)("li",{parentName:"ol"},"See the list of ",(0,r.yg)("a",{parentName:"li",href:"https://docs.getdbt.com/reference/configs-and-properties"},"DBT Defined Configs"),'. These are configs every user could employ with their DBT Projects, such as whether to materialize the model as table, view, ephemeral, or incremental. Click the dropdown to select the config of interest, then enter the appropriate value. Hover over the "i" icon for a short description of each DBT Config.'),(0,r.yg)("li",{parentName:"ol"},"See the list of user-defined ",(0,r.yg)("strong",{parentName:"li"},"Variables"),". In our HelloWorld_SQL project, the ",(0,r.yg)("inlineCode",{parentName:"li"},"payment_methods")," variable has been defined with the four ",(0,r.yg)("strong",{parentName:"li"},"values")," shown."),(0,r.yg)("li",{parentName:"ol"},"Click ",(0,r.yg)("strong",{parentName:"li"},"Save")," after editing the Config for the Model, Folder, or Project.")),(0,r.yg)("p",null,"Click the code view to see the Config encoded in the ",(0,r.yg)("inlineCode",{parentName:"p"},"dbt_project.yml")," file or the ",(0,r.yg)("inlineCode",{parentName:"p"},"schema.yml/properties.yml")," file. Further information can be found in DBT documentation, as Prophecy's Model Config is based on DBT's ",(0,r.yg)("a",{parentName:"p",href:"https://docs.getdbt.com/reference/model-configs"},"Model Configurations"),"."),(0,r.yg)("p",null,"Using Config variables (and DBT Defined Configs) within a Gem is easy. Just wrap the variable name (e.g. ",(0,r.yg)("inlineCode",{parentName:"p"},"payment_method"),") in curly braces ",(0,r.yg)("inlineCode",{parentName:"p"},"{{ }}")," like this: ",(0,r.yg)("inlineCode",{parentName:"p"},"{{ payment_method }}"),"."),(0,r.yg)("admonition",{type:"info"},(0,r.yg)("p",{parentName:"admonition"},"To learn more about the Aggregate Gem UI, see ",(0,r.yg)("a",{parentName:"p",href:"/concepts/project/gems"},"this page")," which illustrates features common to all ",(0,r.yg)("a",{parentName:"p",href:"/SQL/gems/"},"Gems"),".")),(0,r.yg)("p",null,"Here we used the Aggregate Gem from the HelloWorld_SQL Project as a learning guide. What types of Aggregations will you build? ",(0,r.yg)("a",{parentName:"p",href:"/getting-started/getting-help"},"Reach out")," with questions and to let us know how you're using Prophecy."))}d.isMDXComponent=!0},92310:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/Snow4.6.1_Aggregate-8acc945fe1ed46a97b9624d9d586f46f.png"},141:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/Snow4.6.2_Aggregate-d8437ffd92ca9d25cdf3d0d28560f2aa.png"},89024:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/Snow4.6.3_Aggregate-3d1d43a68de60bfd3b4e60124fe100a7.png"},56367:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/Snow4.6.4_Aggregate-46e41f2a516d25465ced483161c528c9.png"}}]); \ No newline at end of file diff --git a/assets/js/a5d53aff.43621ef8.js b/assets/js/a5d53aff.a04c4099.js similarity index 68% rename from assets/js/a5d53aff.43621ef8.js rename to assets/js/a5d53aff.a04c4099.js index 698ab6f815..f7ac7492be 100644 --- a/assets/js/a5d53aff.43621ef8.js +++ b/assets/js/a5d53aff.a04c4099.js @@ -1 +1 @@ -"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[98692],{15680:(e,t,n)=>{n.d(t,{xA:()=>u,yg:()=>g});var r=n(96540);function o(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function s(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function a(e){for(var t=1;t=0||(o[n]=e[n]);return o}(e,t);if(Object.getOwnPropertySymbols){var s=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(o[n]=e[n])}return o}var c=r.createContext({}),l=function(e){var t=r.useContext(c),n=t;return e&&(n="function"==typeof e?e(t):a(a({},t),e)),n},u=function(e){var t=l(e.components);return r.createElement(c.Provider,{value:t},e.children)},m="mdxType",p={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},d=r.forwardRef((function(e,t){var n=e.components,o=e.mdxType,s=e.originalType,c=e.parentName,u=i(e,["components","mdxType","originalType","parentName"]),m=l(n),d=o,g=m["".concat(c,".").concat(d)]||m[d]||p[d]||s;return n?r.createElement(g,a(a({ref:t},u),{},{components:n})):r.createElement(g,a({ref:t},u))}));function g(e,t){var n=arguments,o=t&&t.mdxType;if("string"==typeof e||o){var s=n.length,a=new Array(s);a[0]=d;var i={};for(var c in t)hasOwnProperty.call(t,c)&&(i[c]=t[c]);i.originalType=e,i[m]="string"==typeof e?e:o,a[1]=i;for(var l=2;l{n.r(t),n.d(t,{assets:()=>c,contentTitle:()=>a,default:()=>p,frontMatter:()=>s,metadata:()=>i,toc:()=>l});var r=n(58168),o=(n(96540),n(15680));const s={title:"Custom",id:"custom-sql-gems",description:"Gems that don't fit an existing category",sidebar_position:12,tags:["custom","sql"]},a=void 0,i={unversionedId:"SQL/gems/custom/custom-sql-gems",id:"SQL/gems/custom/custom-sql-gems",title:"Custom",description:"Gems that don't fit an existing category",source:"@site/docs/SQL/gems/custom/custom.md",sourceDirName:"SQL/gems/custom",slug:"/SQL/gems/custom/",permalink:"/SQL/gems/custom/",draft:!1,tags:[{label:"custom",permalink:"/tags/custom"},{label:"sql",permalink:"/tags/sql"}],version:"current",sidebarPosition:12,frontMatter:{title:"Custom",id:"custom-sql-gems",description:"Gems that don't fit an existing category",sidebar_position:12,tags:["custom","sql"]},sidebar:"defaultSidebar",previous:{title:"Join",permalink:"/SQL/gems/data-joins"},next:{title:"Subgraph",permalink:"/SQL/gems/subgraph/"}},c={},l=[],u={toc:l},m="wrapper";function p(e){let{components:t,...n}=e;return(0,o.yg)(m,(0,r.A)({},u,n,{components:t,mdxType:"MDXLayout"}),(0,o.yg)("h3",null,(0,o.yg)("span",{class:"badge rounded-pill text-bg-light"},"SQL Gem")),(0,o.yg)("admonition",{type:"caution"},(0,o.yg)("p",{parentName:"admonition"},"This page about Custom SQL Gems is under construction. Please pardon our dust.")),(0,o.yg)("p",null,"Prophecy allows you to define new functions and gems by leveraging dbt macros as the underlying format. Both functions and gems can be easily defined visually and in code."))}p.isMDXComponent=!0}}]); \ No newline at end of file +"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[98692],{15680:(e,t,n)=>{n.d(t,{xA:()=>l,yg:()=>g});var r=n(96540);function o(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function s(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function a(e){for(var t=1;t=0||(o[n]=e[n]);return o}(e,t);if(Object.getOwnPropertySymbols){var s=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(o[n]=e[n])}return o}var c=r.createContext({}),u=function(e){var t=r.useContext(c),n=t;return e&&(n="function"==typeof e?e(t):a(a({},t),e)),n},l=function(e){var t=u(e.components);return r.createElement(c.Provider,{value:t},e.children)},m="mdxType",p={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},d=r.forwardRef((function(e,t){var n=e.components,o=e.mdxType,s=e.originalType,c=e.parentName,l=i(e,["components","mdxType","originalType","parentName"]),m=u(n),d=o,g=m["".concat(c,".").concat(d)]||m[d]||p[d]||s;return n?r.createElement(g,a(a({ref:t},l),{},{components:n})):r.createElement(g,a({ref:t},l))}));function g(e,t){var n=arguments,o=t&&t.mdxType;if("string"==typeof e||o){var s=n.length,a=new Array(s);a[0]=d;var i={};for(var c in t)hasOwnProperty.call(t,c)&&(i[c]=t[c]);i.originalType=e,i[m]="string"==typeof e?e:o,a[1]=i;for(var u=2;u{n.r(t),n.d(t,{assets:()=>c,contentTitle:()=>a,default:()=>p,frontMatter:()=>s,metadata:()=>i,toc:()=>u});var r=n(58168),o=(n(96540),n(15680));const s={title:"Custom",id:"custom-sql-gems",description:"Gems that don't fit an existing category",sidebar_position:12,tags:["custom","sql"]},a=void 0,i={unversionedId:"SQL/gems/custom/custom-sql-gems",id:"SQL/gems/custom/custom-sql-gems",title:"Custom",description:"Gems that don't fit an existing category",source:"@site/docs/SQL/gems/custom/custom.md",sourceDirName:"SQL/gems/custom",slug:"/SQL/gems/custom/",permalink:"/SQL/gems/custom/",draft:!1,tags:[{label:"custom",permalink:"/tags/custom"},{label:"sql",permalink:"/tags/sql"}],version:"current",sidebarPosition:12,frontMatter:{title:"Custom",id:"custom-sql-gems",description:"Gems that don't fit an existing category",sidebar_position:12,tags:["custom","sql"]},sidebar:"defaultSidebar",previous:{title:"Join",permalink:"/SQL/gems/data-joins"},next:{title:"Subgraph",permalink:"/SQL/gems/subgraph/"}},c={},u=[],l={toc:u},m="wrapper";function p(e){let{components:t,...n}=e;return(0,o.yg)(m,(0,r.A)({},l,n,{components:t,mdxType:"MDXLayout"}),(0,o.yg)("h3",null,(0,o.yg)("span",{class:"badge"},"SQL Gem")),(0,o.yg)("admonition",{type:"caution"},(0,o.yg)("p",{parentName:"admonition"},"This page about Custom SQL Gems is under construction. Please pardon our dust.")),(0,o.yg)("p",null,"Prophecy allows you to define new functions and gems by leveraging dbt macros as the underlying format. Both functions and gems can be easily defined visually and in code."))}p.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/c327a517.6c3b4d95.js b/assets/js/c327a517.4088179e.js similarity index 60% rename from assets/js/c327a517.6c3b4d95.js rename to assets/js/c327a517.4088179e.js index f7482c2bd2..03546bed7c 100644 --- a/assets/js/c327a517.6c3b4d95.js +++ b/assets/js/c327a517.4088179e.js @@ -1 +1 @@ -"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[64738],{15680:(e,t,a)=>{a.d(t,{xA:()=>c,yg:()=>g});var r=a(96540);function n(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function o(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,r)}return a}function l(e){for(var t=1;t=0||(n[a]=e[a]);return n}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(n[a]=e[a])}return n}var i=r.createContext({}),u=function(e){var t=r.useContext(i),a=t;return e&&(a="function"==typeof e?e(t):l(l({},t),e)),a},c=function(e){var t=u(e.components);return r.createElement(i.Provider,{value:t},e.children)},m="mdxType",p={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},d=r.forwardRef((function(e,t){var a=e.components,n=e.mdxType,o=e.originalType,i=e.parentName,c=s(e,["components","mdxType","originalType","parentName"]),m=u(a),d=n,g=m["".concat(i,".").concat(d)]||m[d]||p[d]||o;return a?r.createElement(g,l(l({ref:t},c),{},{components:a})):r.createElement(g,l({ref:t},c))}));function g(e,t){var a=arguments,n=t&&t.mdxType;if("string"==typeof e||n){var o=a.length,l=new Array(o);l[0]=d;var s={};for(var i in t)hasOwnProperty.call(t,i)&&(s[i]=t[i]);s.originalType=e,s[m]="string"==typeof e?e:n,l[1]=s;for(var u=2;u{a.d(t,{A:()=>l});var r=a(96540),n=a(20053);const o={tabItem:"tabItem_Ymn6"};function l(e){let{children:t,hidden:a,className:l}=e;return r.createElement("div",{role:"tabpanel",className:(0,n.A)(o.tabItem,l),hidden:a},t)}},11470:(e,t,a)=>{a.d(t,{A:()=>N});var r=a(58168),n=a(96540),o=a(20053),l=a(23104),s=a(56347),i=a(57485),u=a(31682),c=a(89466);function m(e){return function(e){return n.Children.map(e,(e=>{if(!e||(0,n.isValidElement)(e)&&function(e){const{props:t}=e;return!!t&&"object"==typeof t&&"value"in t}(e))return e;throw new Error(`Docusaurus error: Bad child <${"string"==typeof e.type?e.type:e.type.name}>: all children of the component should be , and every should have a unique "value" prop.`)}))?.filter(Boolean)??[]}(e).map((e=>{let{props:{value:t,label:a,attributes:r,default:n}}=e;return{value:t,label:a,attributes:r,default:n}}))}function p(e){const{values:t,children:a}=e;return(0,n.useMemo)((()=>{const e=t??m(a);return function(e){const t=(0,u.X)(e,((e,t)=>e.value===t.value));if(t.length>0)throw new Error(`Docusaurus error: Duplicate values "${t.map((e=>e.value)).join(", ")}" found in . Every value needs to be unique.`)}(e),e}),[t,a])}function d(e){let{value:t,tabValues:a}=e;return a.some((e=>e.value===t))}function g(e){let{queryString:t=!1,groupId:a}=e;const r=(0,s.W6)(),o=function(e){let{queryString:t=!1,groupId:a}=e;if("string"==typeof t)return t;if(!1===t)return null;if(!0===t&&!a)throw new Error('Docusaurus error: The component groupId prop is required if queryString=true, because this value is used as the search param name. You can also provide an explicit value such as queryString="my-search-param".');return a??null}({queryString:t,groupId:a});return[(0,i.aZ)(o),(0,n.useCallback)((e=>{if(!o)return;const t=new URLSearchParams(r.location.search);t.set(o,e),r.replace({...r.location,search:t.toString()})}),[o,r])]}function f(e){const{defaultValue:t,queryString:a=!1,groupId:r}=e,o=p(e),[l,s]=(0,n.useState)((()=>function(e){let{defaultValue:t,tabValues:a}=e;if(0===a.length)throw new Error("Docusaurus error: the component requires at least one children component");if(t){if(!d({value:t,tabValues:a}))throw new Error(`Docusaurus error: The has a defaultValue "${t}" but none of its children has the corresponding value. Available values are: ${a.map((e=>e.value)).join(", ")}. If you intend to show no default tab, use defaultValue={null} instead.`);return t}const r=a.find((e=>e.default))??a[0];if(!r)throw new Error("Unexpected error: 0 tabValues");return r.value}({defaultValue:t,tabValues:o}))),[i,u]=g({queryString:a,groupId:r}),[m,f]=function(e){let{groupId:t}=e;const a=function(e){return e?`docusaurus.tab.${e}`:null}(t),[r,o]=(0,c.Dv)(a);return[r,(0,n.useCallback)((e=>{a&&o.set(e)}),[a,o])]}({groupId:r}),y=(()=>{const e=i??m;return d({value:e,tabValues:o})?e:null})();(0,n.useLayoutEffect)((()=>{y&&s(y)}),[y]);return{selectedValue:l,selectValue:(0,n.useCallback)((e=>{if(!d({value:e,tabValues:o}))throw new Error(`Can't select invalid tab value=${e}`);s(e),u(e),f(e)}),[u,f,o]),tabValues:o}}var y=a(92303);const b={tabList:"tabList__CuJ",tabItem:"tabItem_LNqP"};function h(e){let{className:t,block:a,selectedValue:s,selectValue:i,tabValues:u}=e;const c=[],{blockElementScrollPositionUntilNextRender:m}=(0,l.a_)(),p=e=>{const t=e.currentTarget,a=c.indexOf(t),r=u[a].value;r!==s&&(m(t),i(r))},d=e=>{let t=null;switch(e.key){case"Enter":p(e);break;case"ArrowRight":{const a=c.indexOf(e.currentTarget)+1;t=c[a]??c[0];break}case"ArrowLeft":{const a=c.indexOf(e.currentTarget)-1;t=c[a]??c[c.length-1];break}}t?.focus()};return n.createElement("ul",{role:"tablist","aria-orientation":"horizontal",className:(0,o.A)("tabs",{"tabs--block":a},t)},u.map((e=>{let{value:t,label:a,attributes:l}=e;return n.createElement("li",(0,r.A)({role:"tab",tabIndex:s===t?0:-1,"aria-selected":s===t,key:t,ref:e=>c.push(e),onKeyDown:d,onClick:p},l,{className:(0,o.A)("tabs__item",b.tabItem,l?.className,{"tabs__item--active":s===t})}),a??t)})))}function v(e){let{lazy:t,children:a,selectedValue:r}=e;const o=(Array.isArray(a)?a:[a]).filter(Boolean);if(t){const e=o.find((e=>e.props.value===r));return e?(0,n.cloneElement)(e,{className:"margin-top--md"}):null}return n.createElement("div",{className:"margin-top--md"},o.map(((e,t)=>(0,n.cloneElement)(e,{key:t,hidden:e.props.value!==r}))))}function w(e){const t=f(e);return n.createElement("div",{className:(0,o.A)("tabs-container",b.tabList)},n.createElement(h,(0,r.A)({},e,t)),n.createElement(v,(0,r.A)({},e,t)))}function N(e){const t=(0,y.A)();return n.createElement(w,(0,r.A)({key:String(t)},e))}},84958:(e,t,a)=>{a.r(t),a.d(t,{assets:()=>c,contentTitle:()=>i,default:()=>g,frontMatter:()=>s,metadata:()=>u,toc:()=>m});var r=a(58168),n=(a(96540),a(15680)),o=a(11470),l=a(19365);const s={sidebar_position:1,title:"Reformat",id:"reformat",description:"Select one or more columns or values using expressions and functions.",tags:["gems","select","reformat","transform"]},i=void 0,u={unversionedId:"Spark/gems/transform/reformat",id:"Spark/gems/transform/reformat",title:"Reformat",description:"Select one or more columns or values using expressions and functions.",source:"@site/docs/Spark/gems/transform/reformat.md",sourceDirName:"Spark/gems/transform",slug:"/Spark/gems/transform/reformat",permalink:"/Spark/gems/transform/reformat",draft:!1,tags:[{label:"gems",permalink:"/tags/gems"},{label:"select",permalink:"/tags/select"},{label:"reformat",permalink:"/tags/reformat"},{label:"transform",permalink:"/tags/transform"}],version:"current",sidebarPosition:1,frontMatter:{sidebar_position:1,title:"Reformat",id:"reformat",description:"Select one or more columns or values using expressions and functions.",tags:["gems","select","reformat","transform"]},sidebar:"defaultSidebar",previous:{title:"Transform",permalink:"/Spark/gems/transform/"},next:{title:"Filter",permalink:"/Spark/gems/transform/filter"}},c={},m=[{value:"Parameters",id:"parameters",level:2},{value:"Example",id:"example",level:2},{value:"Spark Code",id:"spark-code",level:2},{value:"Advanced Import",id:"advanced-import",level:2},{value:"Using Advanced Import",id:"using-advanced-import",level:3},{value:"Format",id:"format",level:3}],p={toc:m},d="wrapper";function g(e){let{components:t,...s}=e;return(0,n.yg)(d,(0,r.A)({},p,s,{components:t,mdxType:"MDXLayout"}),(0,n.yg)("h3",null,(0,n.yg)("span",{class:"badge rounded-pill text-bg-light"},"Spark Gem")),(0,n.yg)("p",null,"Transforms one or more column names or values by using expressions and/or functions. It's useful when we need to extract only the required columns or make changes column-wise."),(0,n.yg)("h2",{id:"parameters"},"Parameters"),(0,n.yg)("table",null,(0,n.yg)("thead",{parentName:"table"},(0,n.yg)("tr",{parentName:"thead"},(0,n.yg)("th",{parentName:"tr",align:"left"},"Parameter"),(0,n.yg)("th",{parentName:"tr",align:"left"},"Description"),(0,n.yg)("th",{parentName:"tr",align:"left"},"Required"))),(0,n.yg)("tbody",{parentName:"table"},(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},"DataFrame"),(0,n.yg)("td",{parentName:"tr",align:"left"},"Input DataFrame on which changes are required"),(0,n.yg)("td",{parentName:"tr",align:"left"},"True")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},"Target column"),(0,n.yg)("td",{parentName:"tr",align:"left"},"Output column name"),(0,n.yg)("td",{parentName:"tr",align:"left"},"False")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},"Expression"),(0,n.yg)("td",{parentName:"tr",align:"left"},"Expression to compute target column"),(0,n.yg)("td",{parentName:"tr",align:"left"},"Required if a ",(0,n.yg)("inlineCode",{parentName:"td"},"Target column")," is present")))),(0,n.yg)("admonition",{type:"info"},(0,n.yg)("p",{parentName:"admonition"},"If no columns are selected, then all columns are passed through to the output")),(0,n.yg)("h2",{id:"example"},"Example"),(0,n.yg)("p",null,(0,n.yg)("img",{alt:"Example usage of Reformat",src:a(51611).A,width:"1209",height:"724"})),(0,n.yg)("h2",{id:"spark-code"},"Spark Code"),(0,n.yg)("p",null,"Reformat converts to a SQL ",(0,n.yg)("inlineCode",{parentName:"p"},"Select")," or in relational terms into a projection, unlike ",(0,n.yg)("inlineCode",{parentName:"p"},"SchemaTransform")," Gem which uses underlying ",(0,n.yg)("inlineCode",{parentName:"p"},"withColumn")," construct"),(0,n.yg)(o.A,{mdxType:"Tabs"},(0,n.yg)(l.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-py"},'def Reformat(spark: SparkSession, in0: DataFrame) -> DataFrame:\n return in0.select(\n col("id"),\n col("email").alias("email_address"),\n col("name"),\n col("updated_at"),\n concat_ws("$$$", col("address_line1"), col("address_line2"), col("postal_code"))\n .alias("address_string")\n )\n'))),(0,n.yg)(l.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-scala"},'object Reformat {\n\n def apply(spark: SparkSession, in: DataFrame): DataFrame =\n in.select(\n col("id"),\n col("email").as("email_address"),\n col("name"),\n col("updated_at"),\n expr("concat_ws(\'$$$\', address_line1, address_line2, postal_code)").as("address_string")\n )\n }\n\n')))),(0,n.yg)("h2",{id:"advanced-import"},"Advanced Import"),(0,n.yg)("p",null,"The Advanced Import feature allows you to bulk import statements that are structured similarly to CSV/TSV files. This can be useful if you have your expressions/transformation logic in another format and just want to quickly configure a ",(0,n.yg)("inlineCode",{parentName:"p"},"Reformat")," Gem based on this logic."),(0,n.yg)("h3",{id:"using-advanced-import"},"Using Advanced Import"),(0,n.yg)("ol",null,(0,n.yg)("li",{parentName:"ol"},"Click the ",(0,n.yg)("inlineCode",{parentName:"li"},"Advanced")," button in the `Reformat UI")),(0,n.yg)("p",null,(0,n.yg)("img",{alt:"Advanced import toggle",src:a(45497).A,width:"2090",height:"1395"})),(0,n.yg)("ol",{start:2},(0,n.yg)("li",{parentName:"ol"},"Enter the expressions into the text area using the format as described below:")),(0,n.yg)("p",null,(0,n.yg)("img",{alt:"Advanced import mode",src:a(69858).A,width:"2618",height:"2026"})),(0,n.yg)("ol",{start:3},(0,n.yg)("li",{parentName:"ol"},"Use the button at the top (labeled ",(0,n.yg)("inlineCode",{parentName:"li"},"Expressions"),") to switch back to the expressions view. This will translate the expressions from the CSV format to the table format and will show any errors detected.")),(0,n.yg)("h3",{id:"format"},"Format"),(0,n.yg)("p",null,"The format of these expressions is ",(0,n.yg)("inlineCode",{parentName:"p"},"target_name,target_expr"),", where ",(0,n.yg)("inlineCode",{parentName:"p"},"target_name")," is the desired new column name and ",(0,n.yg)("inlineCode",{parentName:"p"},"target_expr")," is the Spark expression that will be used to generate the new column."),(0,n.yg)("admonition",{type:"caution"},(0,n.yg)("p",{parentName:"admonition"},"For ",(0,n.yg)("inlineCode",{parentName:"p"},"target_expr")," values that contain a comma ",(0,n.yg)("inlineCode",{parentName:"p"},",")," or span multiple lines, you must surround them by ",(0,n.yg)("inlineCode",{parentName:"p"},"``")," on either side. For example:"),(0,n.yg)("pre",{parentName:"admonition"},(0,n.yg)("code",{parentName:"pre"},"customer_id,customer_id\nfull_name,``concat(first_name, ' ', last_name)``\n"))))}g.isMDXComponent=!0},45497:(e,t,a)=>{a.d(t,{A:()=>r});const r=a.p+"assets/images/reformat_advanced-c65c99761ced1311392e26c6c335bf7b.png"},69858:(e,t,a)=>{a.d(t,{A:()=>r});const r=a.p+"assets/images/reformat_advanced_2-c906ca7673c85cf3c013bdd6923d471b.png"},51611:(e,t,a)=>{a.d(t,{A:()=>r});const r=a.p+"assets/images/reformat_eg_1-9a9344e1d3174c50427768400047df6c.png"}}]); \ No newline at end of file +"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[64738],{15680:(e,t,a)=>{a.d(t,{xA:()=>c,yg:()=>g});var r=a(96540);function n(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function o(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,r)}return a}function l(e){for(var t=1;t=0||(n[a]=e[a]);return n}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(n[a]=e[a])}return n}var i=r.createContext({}),u=function(e){var t=r.useContext(i),a=t;return e&&(a="function"==typeof e?e(t):l(l({},t),e)),a},c=function(e){var t=u(e.components);return r.createElement(i.Provider,{value:t},e.children)},m="mdxType",p={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},d=r.forwardRef((function(e,t){var a=e.components,n=e.mdxType,o=e.originalType,i=e.parentName,c=s(e,["components","mdxType","originalType","parentName"]),m=u(a),d=n,g=m["".concat(i,".").concat(d)]||m[d]||p[d]||o;return a?r.createElement(g,l(l({ref:t},c),{},{components:a})):r.createElement(g,l({ref:t},c))}));function g(e,t){var a=arguments,n=t&&t.mdxType;if("string"==typeof e||n){var o=a.length,l=new Array(o);l[0]=d;var s={};for(var i in t)hasOwnProperty.call(t,i)&&(s[i]=t[i]);s.originalType=e,s[m]="string"==typeof e?e:n,l[1]=s;for(var u=2;u{a.d(t,{A:()=>l});var r=a(96540),n=a(20053);const o={tabItem:"tabItem_Ymn6"};function l(e){let{children:t,hidden:a,className:l}=e;return r.createElement("div",{role:"tabpanel",className:(0,n.A)(o.tabItem,l),hidden:a},t)}},11470:(e,t,a)=>{a.d(t,{A:()=>N});var r=a(58168),n=a(96540),o=a(20053),l=a(23104),s=a(56347),i=a(57485),u=a(31682),c=a(89466);function m(e){return function(e){return n.Children.map(e,(e=>{if(!e||(0,n.isValidElement)(e)&&function(e){const{props:t}=e;return!!t&&"object"==typeof t&&"value"in t}(e))return e;throw new Error(`Docusaurus error: Bad child <${"string"==typeof e.type?e.type:e.type.name}>: all children of the component should be , and every should have a unique "value" prop.`)}))?.filter(Boolean)??[]}(e).map((e=>{let{props:{value:t,label:a,attributes:r,default:n}}=e;return{value:t,label:a,attributes:r,default:n}}))}function p(e){const{values:t,children:a}=e;return(0,n.useMemo)((()=>{const e=t??m(a);return function(e){const t=(0,u.X)(e,((e,t)=>e.value===t.value));if(t.length>0)throw new Error(`Docusaurus error: Duplicate values "${t.map((e=>e.value)).join(", ")}" found in . Every value needs to be unique.`)}(e),e}),[t,a])}function d(e){let{value:t,tabValues:a}=e;return a.some((e=>e.value===t))}function g(e){let{queryString:t=!1,groupId:a}=e;const r=(0,s.W6)(),o=function(e){let{queryString:t=!1,groupId:a}=e;if("string"==typeof t)return t;if(!1===t)return null;if(!0===t&&!a)throw new Error('Docusaurus error: The component groupId prop is required if queryString=true, because this value is used as the search param name. You can also provide an explicit value such as queryString="my-search-param".');return a??null}({queryString:t,groupId:a});return[(0,i.aZ)(o),(0,n.useCallback)((e=>{if(!o)return;const t=new URLSearchParams(r.location.search);t.set(o,e),r.replace({...r.location,search:t.toString()})}),[o,r])]}function f(e){const{defaultValue:t,queryString:a=!1,groupId:r}=e,o=p(e),[l,s]=(0,n.useState)((()=>function(e){let{defaultValue:t,tabValues:a}=e;if(0===a.length)throw new Error("Docusaurus error: the component requires at least one children component");if(t){if(!d({value:t,tabValues:a}))throw new Error(`Docusaurus error: The has a defaultValue "${t}" but none of its children has the corresponding value. Available values are: ${a.map((e=>e.value)).join(", ")}. If you intend to show no default tab, use defaultValue={null} instead.`);return t}const r=a.find((e=>e.default))??a[0];if(!r)throw new Error("Unexpected error: 0 tabValues");return r.value}({defaultValue:t,tabValues:o}))),[i,u]=g({queryString:a,groupId:r}),[m,f]=function(e){let{groupId:t}=e;const a=function(e){return e?`docusaurus.tab.${e}`:null}(t),[r,o]=(0,c.Dv)(a);return[r,(0,n.useCallback)((e=>{a&&o.set(e)}),[a,o])]}({groupId:r}),y=(()=>{const e=i??m;return d({value:e,tabValues:o})?e:null})();(0,n.useLayoutEffect)((()=>{y&&s(y)}),[y]);return{selectedValue:l,selectValue:(0,n.useCallback)((e=>{if(!d({value:e,tabValues:o}))throw new Error(`Can't select invalid tab value=${e}`);s(e),u(e),f(e)}),[u,f,o]),tabValues:o}}var y=a(92303);const b={tabList:"tabList__CuJ",tabItem:"tabItem_LNqP"};function h(e){let{className:t,block:a,selectedValue:s,selectValue:i,tabValues:u}=e;const c=[],{blockElementScrollPositionUntilNextRender:m}=(0,l.a_)(),p=e=>{const t=e.currentTarget,a=c.indexOf(t),r=u[a].value;r!==s&&(m(t),i(r))},d=e=>{let t=null;switch(e.key){case"Enter":p(e);break;case"ArrowRight":{const a=c.indexOf(e.currentTarget)+1;t=c[a]??c[0];break}case"ArrowLeft":{const a=c.indexOf(e.currentTarget)-1;t=c[a]??c[c.length-1];break}}t?.focus()};return n.createElement("ul",{role:"tablist","aria-orientation":"horizontal",className:(0,o.A)("tabs",{"tabs--block":a},t)},u.map((e=>{let{value:t,label:a,attributes:l}=e;return n.createElement("li",(0,r.A)({role:"tab",tabIndex:s===t?0:-1,"aria-selected":s===t,key:t,ref:e=>c.push(e),onKeyDown:d,onClick:p},l,{className:(0,o.A)("tabs__item",b.tabItem,l?.className,{"tabs__item--active":s===t})}),a??t)})))}function v(e){let{lazy:t,children:a,selectedValue:r}=e;const o=(Array.isArray(a)?a:[a]).filter(Boolean);if(t){const e=o.find((e=>e.props.value===r));return e?(0,n.cloneElement)(e,{className:"margin-top--md"}):null}return n.createElement("div",{className:"margin-top--md"},o.map(((e,t)=>(0,n.cloneElement)(e,{key:t,hidden:e.props.value!==r}))))}function w(e){const t=f(e);return n.createElement("div",{className:(0,o.A)("tabs-container",b.tabList)},n.createElement(h,(0,r.A)({},e,t)),n.createElement(v,(0,r.A)({},e,t)))}function N(e){const t=(0,y.A)();return n.createElement(w,(0,r.A)({key:String(t)},e))}},84958:(e,t,a)=>{a.r(t),a.d(t,{assets:()=>c,contentTitle:()=>i,default:()=>g,frontMatter:()=>s,metadata:()=>u,toc:()=>m});var r=a(58168),n=(a(96540),a(15680)),o=a(11470),l=a(19365);const s={sidebar_position:1,title:"Reformat",id:"reformat",description:"Select one or more columns or values using expressions and functions.",tags:["gems","select","reformat","transform"]},i=void 0,u={unversionedId:"Spark/gems/transform/reformat",id:"Spark/gems/transform/reformat",title:"Reformat",description:"Select one or more columns or values using expressions and functions.",source:"@site/docs/Spark/gems/transform/reformat.md",sourceDirName:"Spark/gems/transform",slug:"/Spark/gems/transform/reformat",permalink:"/Spark/gems/transform/reformat",draft:!1,tags:[{label:"gems",permalink:"/tags/gems"},{label:"select",permalink:"/tags/select"},{label:"reformat",permalink:"/tags/reformat"},{label:"transform",permalink:"/tags/transform"}],version:"current",sidebarPosition:1,frontMatter:{sidebar_position:1,title:"Reformat",id:"reformat",description:"Select one or more columns or values using expressions and functions.",tags:["gems","select","reformat","transform"]},sidebar:"defaultSidebar",previous:{title:"Transform",permalink:"/Spark/gems/transform/"},next:{title:"Filter",permalink:"/Spark/gems/transform/filter"}},c={},m=[{value:"Parameters",id:"parameters",level:2},{value:"Example",id:"example",level:2},{value:"Spark Code",id:"spark-code",level:2},{value:"Advanced Import",id:"advanced-import",level:2},{value:"Using Advanced Import",id:"using-advanced-import",level:3},{value:"Format",id:"format",level:3}],p={toc:m},d="wrapper";function g(e){let{components:t,...s}=e;return(0,n.yg)(d,(0,r.A)({},p,s,{components:t,mdxType:"MDXLayout"}),(0,n.yg)("h3",null,(0,n.yg)("span",{class:"badge"},"Spark Gem")),(0,n.yg)("p",null,"Transforms one or more column names or values by using expressions and/or functions. It's useful when we need to extract only the required columns or make changes column-wise."),(0,n.yg)("h2",{id:"parameters"},"Parameters"),(0,n.yg)("table",null,(0,n.yg)("thead",{parentName:"table"},(0,n.yg)("tr",{parentName:"thead"},(0,n.yg)("th",{parentName:"tr",align:"left"},"Parameter"),(0,n.yg)("th",{parentName:"tr",align:"left"},"Description"),(0,n.yg)("th",{parentName:"tr",align:"left"},"Required"))),(0,n.yg)("tbody",{parentName:"table"},(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},"DataFrame"),(0,n.yg)("td",{parentName:"tr",align:"left"},"Input DataFrame on which changes are required"),(0,n.yg)("td",{parentName:"tr",align:"left"},"True")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},"Target column"),(0,n.yg)("td",{parentName:"tr",align:"left"},"Output column name"),(0,n.yg)("td",{parentName:"tr",align:"left"},"False")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:"left"},"Expression"),(0,n.yg)("td",{parentName:"tr",align:"left"},"Expression to compute target column"),(0,n.yg)("td",{parentName:"tr",align:"left"},"Required if a ",(0,n.yg)("inlineCode",{parentName:"td"},"Target column")," is present")))),(0,n.yg)("admonition",{type:"info"},(0,n.yg)("p",{parentName:"admonition"},"If no columns are selected, then all columns are passed through to the output")),(0,n.yg)("h2",{id:"example"},"Example"),(0,n.yg)("p",null,(0,n.yg)("img",{alt:"Example usage of Reformat",src:a(51611).A,width:"1209",height:"724"})),(0,n.yg)("h2",{id:"spark-code"},"Spark Code"),(0,n.yg)("p",null,"Reformat converts to a SQL ",(0,n.yg)("inlineCode",{parentName:"p"},"Select")," or in relational terms into a projection, unlike ",(0,n.yg)("inlineCode",{parentName:"p"},"SchemaTransform")," Gem which uses underlying ",(0,n.yg)("inlineCode",{parentName:"p"},"withColumn")," construct"),(0,n.yg)(o.A,{mdxType:"Tabs"},(0,n.yg)(l.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-py"},'def Reformat(spark: SparkSession, in0: DataFrame) -> DataFrame:\n return in0.select(\n col("id"),\n col("email").alias("email_address"),\n col("name"),\n col("updated_at"),\n concat_ws("$$$", col("address_line1"), col("address_line2"), col("postal_code"))\n .alias("address_string")\n )\n'))),(0,n.yg)(l.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-scala"},'object Reformat {\n\n def apply(spark: SparkSession, in: DataFrame): DataFrame =\n in.select(\n col("id"),\n col("email").as("email_address"),\n col("name"),\n col("updated_at"),\n expr("concat_ws(\'$$$\', address_line1, address_line2, postal_code)").as("address_string")\n )\n }\n\n')))),(0,n.yg)("h2",{id:"advanced-import"},"Advanced Import"),(0,n.yg)("p",null,"The Advanced Import feature allows you to bulk import statements that are structured similarly to CSV/TSV files. This can be useful if you have your expressions/transformation logic in another format and just want to quickly configure a ",(0,n.yg)("inlineCode",{parentName:"p"},"Reformat")," Gem based on this logic."),(0,n.yg)("h3",{id:"using-advanced-import"},"Using Advanced Import"),(0,n.yg)("ol",null,(0,n.yg)("li",{parentName:"ol"},"Click the ",(0,n.yg)("inlineCode",{parentName:"li"},"Advanced")," button in the `Reformat UI")),(0,n.yg)("p",null,(0,n.yg)("img",{alt:"Advanced import toggle",src:a(45497).A,width:"2090",height:"1395"})),(0,n.yg)("ol",{start:2},(0,n.yg)("li",{parentName:"ol"},"Enter the expressions into the text area using the format as described below:")),(0,n.yg)("p",null,(0,n.yg)("img",{alt:"Advanced import mode",src:a(69858).A,width:"2618",height:"2026"})),(0,n.yg)("ol",{start:3},(0,n.yg)("li",{parentName:"ol"},"Use the button at the top (labeled ",(0,n.yg)("inlineCode",{parentName:"li"},"Expressions"),") to switch back to the expressions view. This will translate the expressions from the CSV format to the table format and will show any errors detected.")),(0,n.yg)("h3",{id:"format"},"Format"),(0,n.yg)("p",null,"The format of these expressions is ",(0,n.yg)("inlineCode",{parentName:"p"},"target_name,target_expr"),", where ",(0,n.yg)("inlineCode",{parentName:"p"},"target_name")," is the desired new column name and ",(0,n.yg)("inlineCode",{parentName:"p"},"target_expr")," is the Spark expression that will be used to generate the new column."),(0,n.yg)("admonition",{type:"caution"},(0,n.yg)("p",{parentName:"admonition"},"For ",(0,n.yg)("inlineCode",{parentName:"p"},"target_expr")," values that contain a comma ",(0,n.yg)("inlineCode",{parentName:"p"},",")," or span multiple lines, you must surround them by ",(0,n.yg)("inlineCode",{parentName:"p"},"``")," on either side. For example:"),(0,n.yg)("pre",{parentName:"admonition"},(0,n.yg)("code",{parentName:"pre"},"customer_id,customer_id\nfull_name,``concat(first_name, ' ', last_name)``\n"))))}g.isMDXComponent=!0},45497:(e,t,a)=>{a.d(t,{A:()=>r});const r=a.p+"assets/images/reformat_advanced-c65c99761ced1311392e26c6c335bf7b.png"},69858:(e,t,a)=>{a.d(t,{A:()=>r});const r=a.p+"assets/images/reformat_advanced_2-c906ca7673c85cf3c013bdd6923d471b.png"},51611:(e,t,a)=>{a.d(t,{A:()=>r});const r=a.p+"assets/images/reformat_eg_1-9a9344e1d3174c50427768400047df6c.png"}}]); \ No newline at end of file diff --git a/assets/js/d1e881cf.7130d71d.js b/assets/js/d1e881cf.7130d71d.js deleted file mode 100644 index eeed012475..0000000000 --- a/assets/js/d1e881cf.7130d71d.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[59143],{15680:(e,t,n)=>{n.d(t,{xA:()=>u,yg:()=>g});var a=n(96540);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function i(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),p=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},u=function(e){var t=p(e.components);return a.createElement(s.Provider,{value:t},e.children)},c="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},d=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,o=e.originalType,s=e.parentName,u=l(e,["components","mdxType","originalType","parentName"]),c=p(n),d=r,g=c["".concat(s,".").concat(d)]||c[d]||m[d]||o;return n?a.createElement(g,i(i({ref:t},u),{},{components:n})):a.createElement(g,i({ref:t},u))}));function g(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var o=n.length,i=new Array(o);i[0]=d;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[c]="string"==typeof e?e:r,i[1]=l;for(var p=2;p{n.d(t,{A:()=>i});var a=n(96540),r=n(20053);const o={tabItem:"tabItem_Ymn6"};function i(e){let{children:t,hidden:n,className:i}=e;return a.createElement("div",{role:"tabpanel",className:(0,r.A)(o.tabItem,i),hidden:n},t)}},11470:(e,t,n)=>{n.d(t,{A:()=>w});var a=n(58168),r=n(96540),o=n(20053),i=n(23104),l=n(56347),s=n(57485),p=n(31682),u=n(89466);function c(e){return function(e){return r.Children.map(e,(e=>{if(!e||(0,r.isValidElement)(e)&&function(e){const{props:t}=e;return!!t&&"object"==typeof t&&"value"in t}(e))return e;throw new Error(`Docusaurus error: Bad child <${"string"==typeof e.type?e.type:e.type.name}>: all children of the component should be , and every should have a unique "value" prop.`)}))?.filter(Boolean)??[]}(e).map((e=>{let{props:{value:t,label:n,attributes:a,default:r}}=e;return{value:t,label:n,attributes:a,default:r}}))}function m(e){const{values:t,children:n}=e;return(0,r.useMemo)((()=>{const e=t??c(n);return function(e){const t=(0,p.X)(e,((e,t)=>e.value===t.value));if(t.length>0)throw new Error(`Docusaurus error: Duplicate values "${t.map((e=>e.value)).join(", ")}" found in . Every value needs to be unique.`)}(e),e}),[t,n])}function d(e){let{value:t,tabValues:n}=e;return n.some((e=>e.value===t))}function g(e){let{queryString:t=!1,groupId:n}=e;const a=(0,l.W6)(),o=function(e){let{queryString:t=!1,groupId:n}=e;if("string"==typeof t)return t;if(!1===t)return null;if(!0===t&&!n)throw new Error('Docusaurus error: The component groupId prop is required if queryString=true, because this value is used as the search param name. You can also provide an explicit value such as queryString="my-search-param".');return n??null}({queryString:t,groupId:n});return[(0,s.aZ)(o),(0,r.useCallback)((e=>{if(!o)return;const t=new URLSearchParams(a.location.search);t.set(o,e),a.replace({...a.location,search:t.toString()})}),[o,a])]}function h(e){const{defaultValue:t,queryString:n=!1,groupId:a}=e,o=m(e),[i,l]=(0,r.useState)((()=>function(e){let{defaultValue:t,tabValues:n}=e;if(0===n.length)throw new Error("Docusaurus error: the component requires at least one children component");if(t){if(!d({value:t,tabValues:n}))throw new Error(`Docusaurus error: The has a defaultValue "${t}" but none of its children has the corresponding value. Available values are: ${n.map((e=>e.value)).join(", ")}. If you intend to show no default tab, use defaultValue={null} instead.`);return t}const a=n.find((e=>e.default))??n[0];if(!a)throw new Error("Unexpected error: 0 tabValues");return a.value}({defaultValue:t,tabValues:o}))),[s,p]=g({queryString:n,groupId:a}),[c,h]=function(e){let{groupId:t}=e;const n=function(e){return e?`docusaurus.tab.${e}`:null}(t),[a,o]=(0,u.Dv)(n);return[a,(0,r.useCallback)((e=>{n&&o.set(e)}),[n,o])]}({groupId:a}),y=(()=>{const e=s??c;return d({value:e,tabValues:o})?e:null})();(0,r.useLayoutEffect)((()=>{y&&l(y)}),[y]);return{selectedValue:i,selectValue:(0,r.useCallback)((e=>{if(!d({value:e,tabValues:o}))throw new Error(`Can't select invalid tab value=${e}`);l(e),p(e),h(e)}),[p,h,o]),tabValues:o}}var y=n(92303);const b={tabList:"tabList__CuJ",tabItem:"tabItem_LNqP"};function f(e){let{className:t,block:n,selectedValue:l,selectValue:s,tabValues:p}=e;const u=[],{blockElementScrollPositionUntilNextRender:c}=(0,i.a_)(),m=e=>{const t=e.currentTarget,n=u.indexOf(t),a=p[n].value;a!==l&&(c(t),s(a))},d=e=>{let t=null;switch(e.key){case"Enter":m(e);break;case"ArrowRight":{const n=u.indexOf(e.currentTarget)+1;t=u[n]??u[0];break}case"ArrowLeft":{const n=u.indexOf(e.currentTarget)-1;t=u[n]??u[u.length-1];break}}t?.focus()};return r.createElement("ul",{role:"tablist","aria-orientation":"horizontal",className:(0,o.A)("tabs",{"tabs--block":n},t)},p.map((e=>{let{value:t,label:n,attributes:i}=e;return r.createElement("li",(0,a.A)({role:"tab",tabIndex:l===t?0:-1,"aria-selected":l===t,key:t,ref:e=>u.push(e),onKeyDown:d,onClick:m},i,{className:(0,o.A)("tabs__item",b.tabItem,i?.className,{"tabs__item--active":l===t})}),n??t)})))}function v(e){let{lazy:t,children:n,selectedValue:a}=e;const o=(Array.isArray(n)?n:[n]).filter(Boolean);if(t){const e=o.find((e=>e.props.value===a));return e?(0,r.cloneElement)(e,{className:"margin-top--md"}):null}return r.createElement("div",{className:"margin-top--md"},o.map(((e,t)=>(0,r.cloneElement)(e,{key:t,hidden:e.props.value!==a}))))}function k(e){const t=h(e);return r.createElement("div",{className:(0,o.A)("tabs-container",b.tabList)},r.createElement(f,(0,a.A)({},e,t)),r.createElement(v,(0,a.A)({},e,t)))}function w(e){const t=(0,y.A)();return r.createElement(k,(0,a.A)({key:String(t)},e))}},76140:(e,t,n)=>{n.r(t),n.d(t,{assets:()=>u,contentTitle:()=>s,default:()=>g,frontMatter:()=>l,metadata:()=>p,toc:()=>c});var a=n(58168),r=(n(96540),n(15680)),o=n(11470),i=n(19365);const l={sidebar_position:3,title:"PineconeLookup",id:"ml-pinecone-lookup",description:"Lookup a vector embedding from a Pinecone Database",tags:["generative-ai","machine-learning","llm","pinecone","openai"]},s=void 0,p={unversionedId:"Spark/gems/machine-learning/ml-pinecone-lookup",id:"Spark/gems/machine-learning/ml-pinecone-lookup",title:"PineconeLookup",description:"Lookup a vector embedding from a Pinecone Database",source:"@site/docs/Spark/gems/machine-learning/ml-pinecone-lookup.md",sourceDirName:"Spark/gems/machine-learning",slug:"/Spark/gems/machine-learning/ml-pinecone-lookup",permalink:"/Spark/gems/machine-learning/ml-pinecone-lookup",draft:!1,tags:[{label:"generative-ai",permalink:"/tags/generative-ai"},{label:"machine-learning",permalink:"/tags/machine-learning"},{label:"llm",permalink:"/tags/llm"},{label:"pinecone",permalink:"/tags/pinecone"},{label:"openai",permalink:"/tags/openai"}],version:"current",sidebarPosition:3,frontMatter:{sidebar_position:3,title:"PineconeLookup",id:"ml-pinecone-lookup",description:"Lookup a vector embedding from a Pinecone Database",tags:["generative-ai","machine-learning","llm","pinecone","openai"]},sidebar:"defaultSidebar",previous:{title:"OpenAI",permalink:"/Spark/gems/machine-learning/ml-openai"},next:{title:"Subgraph",permalink:"/Spark/gems/subgraph/"}},u={},c=[{value:"Gem Parameters",id:"gem-parameters",level:3},{value:"Credentials",id:"credentials",level:4},{value:"Properties",id:"properties",level:4},{value:"Input",id:"input",level:3},{value:"Output",id:"output",level:3},{value:"FAQ",id:"faq",level:3},{value:"Troubleshooting",id:"troubleshooting",level:4},{value:"Creating a Pinecone Index",id:"creating-a-pinecone-index",level:4}],m={toc:c},d="wrapper";function g(e){let{components:t,...l}=e;return(0,r.yg)(d,(0,a.A)({},m,l,{components:t,mdxType:"MDXLayout"}),(0,r.yg)("h3",null,(0,r.yg)("span",{class:"badge rounded-pill text-bg-light"},"Spark Gem")),(0,r.yg)("p",null,"The PineconeLookup Gem identifies content that is similar to a provided vector embedding. The Gem calls the Pinecone API and returns a set of IDs with highest similarity to the provided embedding."),(0,r.yg)("ul",null,(0,r.yg)("li",{parentName:"ul"},(0,r.yg)("p",{parentName:"li"},(0,r.yg)("a",{parentName:"p",href:"https://docs.prophecy.io/Spark/gems/machine-learning/ml-pinecone-lookup#gem-parameters"},(0,r.yg)("strong",{parentName:"a"},"Parameters:"))," Configure the parameters needed to call the Pinecone API.")),(0,r.yg)("li",{parentName:"ul"},(0,r.yg)("p",{parentName:"li"},(0,r.yg)("a",{parentName:"p",href:"https://docs.prophecy.io/Spark/gems/machine-learning/ml-pinecone-lookup#input"},(0,r.yg)("strong",{parentName:"a"},"Input:"))," This Gem requires an embedding as input. The embedding is provided by a foundational model like ",(0,r.yg)("a",{parentName:"p",href:"https://platform.openai.com/docs/introduction"},"OpenAI"),".")),(0,r.yg)("li",{parentName:"ul"},(0,r.yg)("p",{parentName:"li"},(0,r.yg)("a",{parentName:"p",href:"https://docs.prophecy.io/Spark/gems/machine-learning/ml-pinecone-lookup#output"},(0,r.yg)("strong",{parentName:"a"},"Output:"))," This Gem outputs an array of IDs with corresponding similarity scores."))),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Input and Output",src:n(17921).A,width:"2376",height:"814"})),(0,r.yg)("p",null,"Now let\u2019s understand the Gem Parameters, Input, and Output in detail."),(0,r.yg)("div",{class:"wistia_responsive_padding",style:{padding:"56.25% 0 0 0",position:"relative"}},(0,r.yg)("div",{class:"wistia_responsive_wrapper",style:{height:"100%",left:0,position:"absolute",top:0,width:"100%"}},(0,r.yg)("iframe",{src:"https://fast.wistia.net/embed/iframe/nupkza0ir6?seo=false?videoFoam=true",title:"Getting Started With SQL Video",allow:"autoplay; fullscreen",allowtransparency:"true",frameborder:"0",scrolling:"no",class:"wistia_embed",name:"wistia_embed",msallowfullscreen:!0,width:"100%",height:"100%"}))),(0,r.yg)("script",{src:"https://fast.wistia.net/assets/external/E-v1.js",async:!0}),(0,r.yg)("br",null),(0,r.yg)("h3",{id:"gem-parameters"},"Gem Parameters"),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Parameters",src:n(16869).A,width:"2880",height:"1726"})),(0,r.yg)("p",null,"Verify the ",(0,r.yg)("strong",{parentName:"p"},"(1) input columns")," contain a column with the embeddings. The structure of this column's entries must be compatible with the structure of the Pinecone index."),(0,r.yg)("h4",{id:"credentials"},"Credentials"),(0,r.yg)("p",null,"Configure the Pinecone API credentials here. Storing the Pinecone API token as a ",(0,r.yg)("strong",{parentName:"p"},"(2) Databricks Secret")," is highly recommended. For instructions click ",(0,r.yg)("a",{parentName:"p",href:"https://docs.databricks.com/en/security/secrets/index.html"},"here.")," Be sure to use the ",(0,r.yg)("strong",{parentName:"p"},"(3) Fabric connection")," to the Databricks workspace which contains the Databricks scope and secrets configured in this Gem."),(0,r.yg)("p",null,"Hardcoding the Pinecone credential is not recommended. Selecting this option could send credentials to be stored hardcoded in Git; ",(0,r.yg)("a",{parentName:"p",href:"https://www.prophecy.io/request-a-demo"},"reach out")," to understand the integrations with other secret managers."),(0,r.yg)("h4",{id:"properties"},"Properties"),(0,r.yg)("p",null,"Pinecone DB uses indexing to map the vectors to a data structure that will enable faster searching. The PineconeLookup Gem searches through a Pinecone index to identify embeddings with similarity to the input embedding. Enter the Pinecone ",(0,r.yg)("strong",{parentName:"p"},(0,r.yg)("a",{parentName:"strong",href:"https://docs.prophecy.io/Spark/gems/machine-learning/ml-pinecone-lookup#faq"},"(4) Index name"))," which you\u2019d like to use for looking up embeddings."),(0,r.yg)("p",null,"Select one of the Gem\u2019s input columns with vector embeddings as the ",(0,r.yg)("strong",{parentName:"p"},"(5) Vector column")," to send to Pinecone\u2019s API. The column ",(0,r.yg)("a",{parentName:"p",href:"https://docs.prophecy.io/Spark/gems/machine-learning/ml-pinecone-lookup#input"},"must")," be compatible with the Pinecone Index. To change the column\u2019s datatype and properties, ",(0,r.yg)("a",{parentName:"p",href:"https://docs.prophecy.io/Spark/gems/machine-learning/ml-pinecone-lookup#faq"},"configure")," the Gem(s) preceding the PineconeLookup Gem."),(0,r.yg)("p",null,"Pinecone\u2019s API can return multiple results. Depending on the use case, select the desired ",(0,r.yg)("strong",{parentName:"p"},"(6) Number of results")," sorted by similarity score. The result with highest similarity to the user\u2019s text question will be listed first."),(0,r.yg)("h3",{id:"input"},"Input"),(0,r.yg)("p",null,"PineconeLookup requires a model_embedding column as input. Use one of Prophecy's Machine Learning Gems to provide the model_embedding. For example, the OpenAI Gem can precede the PineconeLookup Gem in the Pipeline. The OpenAI Gem, configured to ",(0,r.yg)("inlineCode",{parentName:"p"},"Compute a text embedding"),", will output an openai_embedding column. This is a suitable input for the PineconeLookup Gem."),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:null},"Column"),(0,r.yg)("th",{parentName:"tr",align:null},"Description"),(0,r.yg)("th",{parentName:"tr",align:null},"Required"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"model_embedding"),(0,r.yg)("td",{parentName:"tr",align:null},"array(float) - The format of this embedding is important. It must be an array of floating point numbers that matches the requirements of the Pinecone index. For example, we used a Pinecone index with ",(0,r.yg)("inlineCode",{parentName:"td"},"1536")," dimensions, ",(0,r.yg)("inlineCode",{parentName:"td"},"Cosine")," metric, and an ",(0,r.yg)("inlineCode",{parentName:"td"},"s1")," pod type. So each record in the model_embedding column must be an array of ",(0,r.yg)("inlineCode",{parentName:"td"},"1536")," floating point numbers, such as ",(0,r.yg)("inlineCode",{parentName:"td"},"[-0.0018493991, -0.0059955865, ... -0.02498541]"),"."),(0,r.yg)("td",{parentName:"tr",align:null},"True")))),(0,r.yg)("h3",{id:"output"},"Output"),(0,r.yg)("p",null,"The output Dataset contains the pinecone_matches and pinecone_error columns. For each input content entry, this Gem adds an array to the pinecone_matches column. The output array will have ",(0,r.yg)("a",{parentName:"p",href:"https://docs.prophecy.io/Spark/gems/machine-learning/ml-pinecone-lookup#properties"},"Number of Results")," entries."),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:null},"Column"),(0,r.yg)("th",{parentName:"tr",align:null},"Description"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"pinecone_matches"),(0,r.yg)("td",{parentName:"tr",align:null},"array - an array of several content IDs and their scores. Example: ",(0,r.yg)("inlineCode",{parentName:"td"},'[{"id":"web-223","score":0.8437653},{"id":"web-224","score":0.8403446}, ...{"id":"web-237","score":0.82916564}]'))),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"pinecone_error"),(0,r.yg)("td",{parentName:"tr",align:null},"string - this column is provided to show any error message returned from Pinecone\u2019s API; helpful for troubleshooting errors related to the PineconeLookup Gem.")))),(0,r.yg)("p",null,"Prophecy converts the visual design into Spark code available on the Prophecy user's Git repository. Find the Spark code for the PineconeLookup Gem below."),(0,r.yg)(o.A,{mdxType:"Tabs"},(0,r.yg)(i.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-py"},'def vector_lookup(Spark: SparkSession, in0: DataFrame) -> DataFrame:\n from pySpark.sql.functions import expr, array, struct\n from Spark_ai.dbs.pinecone import PineconeDB, IdVector\n from pySpark.dbutils import DBUtils\n PineconeDB(DBUtils(Spark).secrets.get(scope = "< my_scope >", key = "< my_key >"), "us-east-1-aws")\\\n .register_udfs(Spark)\n\n return in0\\\n .withColumn("_vector", col("_embedding"))\\\n .withColumn("_response", expr(f"pinecone_query(\\\\", _vector, {3})"))\\\n .withColumn("pinecone_matches", col("_response.matches"))\\\n .withColumn("pinecone_error", col("_response.error"))\\\n .drop("_vector", "_response")\n'))),(0,r.yg)(i.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-scala"}," [Not yet supported]\n")))),(0,r.yg)("h3",{id:"faq"},"FAQ"),(0,r.yg)("h4",{id:"troubleshooting"},"Troubleshooting"),(0,r.yg)("p",null,"To troubleshoot the Gem preceding PineconeLookup, open the data preview output from the previous Gem. For example if the embedding structure is incorrect then try adjusting the previous Gem, run, and view that Gem\u2019s output data preview."),(0,r.yg)("h4",{id:"creating-a-pinecone-index"},"Creating a Pinecone Index"),(0,r.yg)("p",null,"If you don\u2019t have one yet, ",(0,r.yg)("a",{parentName:"p",href:"https://docs.pinecone.io/docs/quickstart"},"create a Pinecone index"),". Click ",(0,r.yg)("a",{parentName:"p",href:"https://docs.pinecone.io/docs/choosing-index-type-and-size"},"here")," for pointers on choosing an index type and size. How to populate the index? For example, ",(0,r.yg)("a",{parentName:"p",href:"https://docs.prophecy.io/getting-started/gen-ai-chatbot#step-2-build-a-knowledge-warehouse"},"this guide")," shows how to ingest and vectorize web content data to store in a Pinecone Database index."))}g.isMDXComponent=!0},16869:(e,t,n)=>{n.d(t,{A:()=>a});const a=n.p+"assets/images/pinecone_lookup_configure-c0222e658f0a0e83456793ea8e3bffd0.png"},17921:(e,t,n)=>{n.d(t,{A:()=>a});const a=n.p+"assets/images/pinecone_lookup_input_output-268c3aecc7c4cb663c302cdd2682e30f.png"}}]); \ No newline at end of file diff --git a/assets/js/d1e881cf.a8034068.js b/assets/js/d1e881cf.a8034068.js new file mode 100644 index 0000000000..acf84ebc28 --- /dev/null +++ b/assets/js/d1e881cf.a8034068.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[59143],{15680:(e,t,n)=>{n.d(t,{xA:()=>u,yg:()=>g});var a=n(96540);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function i(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),p=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},u=function(e){var t=p(e.components);return a.createElement(s.Provider,{value:t},e.children)},c="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},d=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,o=e.originalType,s=e.parentName,u=l(e,["components","mdxType","originalType","parentName"]),c=p(n),d=r,g=c["".concat(s,".").concat(d)]||c[d]||m[d]||o;return n?a.createElement(g,i(i({ref:t},u),{},{components:n})):a.createElement(g,i({ref:t},u))}));function g(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var o=n.length,i=new Array(o);i[0]=d;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[c]="string"==typeof e?e:r,i[1]=l;for(var p=2;p{n.d(t,{A:()=>i});var a=n(96540),r=n(20053);const o={tabItem:"tabItem_Ymn6"};function i(e){let{children:t,hidden:n,className:i}=e;return a.createElement("div",{role:"tabpanel",className:(0,r.A)(o.tabItem,i),hidden:n},t)}},11470:(e,t,n)=>{n.d(t,{A:()=>w});var a=n(58168),r=n(96540),o=n(20053),i=n(23104),l=n(56347),s=n(57485),p=n(31682),u=n(89466);function c(e){return function(e){return r.Children.map(e,(e=>{if(!e||(0,r.isValidElement)(e)&&function(e){const{props:t}=e;return!!t&&"object"==typeof t&&"value"in t}(e))return e;throw new Error(`Docusaurus error: Bad child <${"string"==typeof e.type?e.type:e.type.name}>: all children of the component should be , and every should have a unique "value" prop.`)}))?.filter(Boolean)??[]}(e).map((e=>{let{props:{value:t,label:n,attributes:a,default:r}}=e;return{value:t,label:n,attributes:a,default:r}}))}function m(e){const{values:t,children:n}=e;return(0,r.useMemo)((()=>{const e=t??c(n);return function(e){const t=(0,p.X)(e,((e,t)=>e.value===t.value));if(t.length>0)throw new Error(`Docusaurus error: Duplicate values "${t.map((e=>e.value)).join(", ")}" found in . Every value needs to be unique.`)}(e),e}),[t,n])}function d(e){let{value:t,tabValues:n}=e;return n.some((e=>e.value===t))}function g(e){let{queryString:t=!1,groupId:n}=e;const a=(0,l.W6)(),o=function(e){let{queryString:t=!1,groupId:n}=e;if("string"==typeof t)return t;if(!1===t)return null;if(!0===t&&!n)throw new Error('Docusaurus error: The component groupId prop is required if queryString=true, because this value is used as the search param name. You can also provide an explicit value such as queryString="my-search-param".');return n??null}({queryString:t,groupId:n});return[(0,s.aZ)(o),(0,r.useCallback)((e=>{if(!o)return;const t=new URLSearchParams(a.location.search);t.set(o,e),a.replace({...a.location,search:t.toString()})}),[o,a])]}function h(e){const{defaultValue:t,queryString:n=!1,groupId:a}=e,o=m(e),[i,l]=(0,r.useState)((()=>function(e){let{defaultValue:t,tabValues:n}=e;if(0===n.length)throw new Error("Docusaurus error: the component requires at least one children component");if(t){if(!d({value:t,tabValues:n}))throw new Error(`Docusaurus error: The has a defaultValue "${t}" but none of its children has the corresponding value. Available values are: ${n.map((e=>e.value)).join(", ")}. If you intend to show no default tab, use defaultValue={null} instead.`);return t}const a=n.find((e=>e.default))??n[0];if(!a)throw new Error("Unexpected error: 0 tabValues");return a.value}({defaultValue:t,tabValues:o}))),[s,p]=g({queryString:n,groupId:a}),[c,h]=function(e){let{groupId:t}=e;const n=function(e){return e?`docusaurus.tab.${e}`:null}(t),[a,o]=(0,u.Dv)(n);return[a,(0,r.useCallback)((e=>{n&&o.set(e)}),[n,o])]}({groupId:a}),y=(()=>{const e=s??c;return d({value:e,tabValues:o})?e:null})();(0,r.useLayoutEffect)((()=>{y&&l(y)}),[y]);return{selectedValue:i,selectValue:(0,r.useCallback)((e=>{if(!d({value:e,tabValues:o}))throw new Error(`Can't select invalid tab value=${e}`);l(e),p(e),h(e)}),[p,h,o]),tabValues:o}}var y=n(92303);const b={tabList:"tabList__CuJ",tabItem:"tabItem_LNqP"};function f(e){let{className:t,block:n,selectedValue:l,selectValue:s,tabValues:p}=e;const u=[],{blockElementScrollPositionUntilNextRender:c}=(0,i.a_)(),m=e=>{const t=e.currentTarget,n=u.indexOf(t),a=p[n].value;a!==l&&(c(t),s(a))},d=e=>{let t=null;switch(e.key){case"Enter":m(e);break;case"ArrowRight":{const n=u.indexOf(e.currentTarget)+1;t=u[n]??u[0];break}case"ArrowLeft":{const n=u.indexOf(e.currentTarget)-1;t=u[n]??u[u.length-1];break}}t?.focus()};return r.createElement("ul",{role:"tablist","aria-orientation":"horizontal",className:(0,o.A)("tabs",{"tabs--block":n},t)},p.map((e=>{let{value:t,label:n,attributes:i}=e;return r.createElement("li",(0,a.A)({role:"tab",tabIndex:l===t?0:-1,"aria-selected":l===t,key:t,ref:e=>u.push(e),onKeyDown:d,onClick:m},i,{className:(0,o.A)("tabs__item",b.tabItem,i?.className,{"tabs__item--active":l===t})}),n??t)})))}function v(e){let{lazy:t,children:n,selectedValue:a}=e;const o=(Array.isArray(n)?n:[n]).filter(Boolean);if(t){const e=o.find((e=>e.props.value===a));return e?(0,r.cloneElement)(e,{className:"margin-top--md"}):null}return r.createElement("div",{className:"margin-top--md"},o.map(((e,t)=>(0,r.cloneElement)(e,{key:t,hidden:e.props.value!==a}))))}function k(e){const t=h(e);return r.createElement("div",{className:(0,o.A)("tabs-container",b.tabList)},r.createElement(f,(0,a.A)({},e,t)),r.createElement(v,(0,a.A)({},e,t)))}function w(e){const t=(0,y.A)();return r.createElement(k,(0,a.A)({key:String(t)},e))}},76140:(e,t,n)=>{n.r(t),n.d(t,{assets:()=>u,contentTitle:()=>s,default:()=>g,frontMatter:()=>l,metadata:()=>p,toc:()=>c});var a=n(58168),r=(n(96540),n(15680)),o=n(11470),i=n(19365);const l={sidebar_position:3,title:"PineconeLookup",id:"ml-pinecone-lookup",description:"Lookup a vector embedding from a Pinecone Database",tags:["generative-ai","machine-learning","llm","pinecone","openai"]},s=void 0,p={unversionedId:"Spark/gems/machine-learning/ml-pinecone-lookup",id:"Spark/gems/machine-learning/ml-pinecone-lookup",title:"PineconeLookup",description:"Lookup a vector embedding from a Pinecone Database",source:"@site/docs/Spark/gems/machine-learning/ml-pinecone-lookup.md",sourceDirName:"Spark/gems/machine-learning",slug:"/Spark/gems/machine-learning/ml-pinecone-lookup",permalink:"/Spark/gems/machine-learning/ml-pinecone-lookup",draft:!1,tags:[{label:"generative-ai",permalink:"/tags/generative-ai"},{label:"machine-learning",permalink:"/tags/machine-learning"},{label:"llm",permalink:"/tags/llm"},{label:"pinecone",permalink:"/tags/pinecone"},{label:"openai",permalink:"/tags/openai"}],version:"current",sidebarPosition:3,frontMatter:{sidebar_position:3,title:"PineconeLookup",id:"ml-pinecone-lookup",description:"Lookup a vector embedding from a Pinecone Database",tags:["generative-ai","machine-learning","llm","pinecone","openai"]},sidebar:"defaultSidebar",previous:{title:"OpenAI",permalink:"/Spark/gems/machine-learning/ml-openai"},next:{title:"Subgraph",permalink:"/Spark/gems/subgraph/"}},u={},c=[{value:"Gem Parameters",id:"gem-parameters",level:3},{value:"Credentials",id:"credentials",level:4},{value:"Properties",id:"properties",level:4},{value:"Input",id:"input",level:3},{value:"Output",id:"output",level:3},{value:"FAQ",id:"faq",level:3},{value:"Troubleshooting",id:"troubleshooting",level:4},{value:"Creating a Pinecone Index",id:"creating-a-pinecone-index",level:4}],m={toc:c},d="wrapper";function g(e){let{components:t,...l}=e;return(0,r.yg)(d,(0,a.A)({},m,l,{components:t,mdxType:"MDXLayout"}),(0,r.yg)("h3",null,(0,r.yg)("span",{class:"badge"},"Spark Gem")),(0,r.yg)("p",null,"The PineconeLookup Gem identifies content that is similar to a provided vector embedding. The Gem calls the Pinecone API and returns a set of IDs with highest similarity to the provided embedding."),(0,r.yg)("ul",null,(0,r.yg)("li",{parentName:"ul"},(0,r.yg)("p",{parentName:"li"},(0,r.yg)("a",{parentName:"p",href:"https://docs.prophecy.io/Spark/gems/machine-learning/ml-pinecone-lookup#gem-parameters"},(0,r.yg)("strong",{parentName:"a"},"Parameters:"))," Configure the parameters needed to call the Pinecone API.")),(0,r.yg)("li",{parentName:"ul"},(0,r.yg)("p",{parentName:"li"},(0,r.yg)("a",{parentName:"p",href:"https://docs.prophecy.io/Spark/gems/machine-learning/ml-pinecone-lookup#input"},(0,r.yg)("strong",{parentName:"a"},"Input:"))," This Gem requires an embedding as input. The embedding is provided by a foundational model like ",(0,r.yg)("a",{parentName:"p",href:"https://platform.openai.com/docs/introduction"},"OpenAI"),".")),(0,r.yg)("li",{parentName:"ul"},(0,r.yg)("p",{parentName:"li"},(0,r.yg)("a",{parentName:"p",href:"https://docs.prophecy.io/Spark/gems/machine-learning/ml-pinecone-lookup#output"},(0,r.yg)("strong",{parentName:"a"},"Output:"))," This Gem outputs an array of IDs with corresponding similarity scores."))),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Input and Output",src:n(17921).A,width:"2376",height:"814"})),(0,r.yg)("p",null,"Now let\u2019s understand the Gem Parameters, Input, and Output in detail."),(0,r.yg)("div",{class:"wistia_responsive_padding",style:{padding:"56.25% 0 0 0",position:"relative"}},(0,r.yg)("div",{class:"wistia_responsive_wrapper",style:{height:"100%",left:0,position:"absolute",top:0,width:"100%"}},(0,r.yg)("iframe",{src:"https://fast.wistia.net/embed/iframe/nupkza0ir6?seo=false?videoFoam=true",title:"Getting Started With SQL Video",allow:"autoplay; fullscreen",allowtransparency:"true",frameborder:"0",scrolling:"no",class:"wistia_embed",name:"wistia_embed",msallowfullscreen:!0,width:"100%",height:"100%"}))),(0,r.yg)("script",{src:"https://fast.wistia.net/assets/external/E-v1.js",async:!0}),(0,r.yg)("br",null),(0,r.yg)("h3",{id:"gem-parameters"},"Gem Parameters"),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Parameters",src:n(16869).A,width:"2880",height:"1726"})),(0,r.yg)("p",null,"Verify the ",(0,r.yg)("strong",{parentName:"p"},"(1) input columns")," contain a column with the embeddings. The structure of this column's entries must be compatible with the structure of the Pinecone index."),(0,r.yg)("h4",{id:"credentials"},"Credentials"),(0,r.yg)("p",null,"Configure the Pinecone API credentials here. Storing the Pinecone API token as a ",(0,r.yg)("strong",{parentName:"p"},"(2) Databricks Secret")," is highly recommended. For instructions click ",(0,r.yg)("a",{parentName:"p",href:"https://docs.databricks.com/en/security/secrets/index.html"},"here.")," Be sure to use the ",(0,r.yg)("strong",{parentName:"p"},"(3) Fabric connection")," to the Databricks workspace which contains the Databricks scope and secrets configured in this Gem."),(0,r.yg)("p",null,"Hardcoding the Pinecone credential is not recommended. Selecting this option could send credentials to be stored hardcoded in Git; ",(0,r.yg)("a",{parentName:"p",href:"https://www.prophecy.io/request-a-demo"},"reach out")," to understand the integrations with other secret managers."),(0,r.yg)("h4",{id:"properties"},"Properties"),(0,r.yg)("p",null,"Pinecone DB uses indexing to map the vectors to a data structure that will enable faster searching. The PineconeLookup Gem searches through a Pinecone index to identify embeddings with similarity to the input embedding. Enter the Pinecone ",(0,r.yg)("strong",{parentName:"p"},(0,r.yg)("a",{parentName:"strong",href:"https://docs.prophecy.io/Spark/gems/machine-learning/ml-pinecone-lookup#faq"},"(4) Index name"))," which you\u2019d like to use for looking up embeddings."),(0,r.yg)("p",null,"Select one of the Gem\u2019s input columns with vector embeddings as the ",(0,r.yg)("strong",{parentName:"p"},"(5) Vector column")," to send to Pinecone\u2019s API. The column ",(0,r.yg)("a",{parentName:"p",href:"https://docs.prophecy.io/Spark/gems/machine-learning/ml-pinecone-lookup#input"},"must")," be compatible with the Pinecone Index. To change the column\u2019s datatype and properties, ",(0,r.yg)("a",{parentName:"p",href:"https://docs.prophecy.io/Spark/gems/machine-learning/ml-pinecone-lookup#faq"},"configure")," the Gem(s) preceding the PineconeLookup Gem."),(0,r.yg)("p",null,"Pinecone\u2019s API can return multiple results. Depending on the use case, select the desired ",(0,r.yg)("strong",{parentName:"p"},"(6) Number of results")," sorted by similarity score. The result with highest similarity to the user\u2019s text question will be listed first."),(0,r.yg)("h3",{id:"input"},"Input"),(0,r.yg)("p",null,"PineconeLookup requires a model_embedding column as input. Use one of Prophecy's Machine Learning Gems to provide the model_embedding. For example, the OpenAI Gem can precede the PineconeLookup Gem in the Pipeline. The OpenAI Gem, configured to ",(0,r.yg)("inlineCode",{parentName:"p"},"Compute a text embedding"),", will output an openai_embedding column. This is a suitable input for the PineconeLookup Gem."),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:null},"Column"),(0,r.yg)("th",{parentName:"tr",align:null},"Description"),(0,r.yg)("th",{parentName:"tr",align:null},"Required"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"model_embedding"),(0,r.yg)("td",{parentName:"tr",align:null},"array(float) - The format of this embedding is important. It must be an array of floating point numbers that matches the requirements of the Pinecone index. For example, we used a Pinecone index with ",(0,r.yg)("inlineCode",{parentName:"td"},"1536")," dimensions, ",(0,r.yg)("inlineCode",{parentName:"td"},"Cosine")," metric, and an ",(0,r.yg)("inlineCode",{parentName:"td"},"s1")," pod type. So each record in the model_embedding column must be an array of ",(0,r.yg)("inlineCode",{parentName:"td"},"1536")," floating point numbers, such as ",(0,r.yg)("inlineCode",{parentName:"td"},"[-0.0018493991, -0.0059955865, ... -0.02498541]"),"."),(0,r.yg)("td",{parentName:"tr",align:null},"True")))),(0,r.yg)("h3",{id:"output"},"Output"),(0,r.yg)("p",null,"The output Dataset contains the pinecone_matches and pinecone_error columns. For each input content entry, this Gem adds an array to the pinecone_matches column. The output array will have ",(0,r.yg)("a",{parentName:"p",href:"https://docs.prophecy.io/Spark/gems/machine-learning/ml-pinecone-lookup#properties"},"Number of Results")," entries."),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:null},"Column"),(0,r.yg)("th",{parentName:"tr",align:null},"Description"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"pinecone_matches"),(0,r.yg)("td",{parentName:"tr",align:null},"array - an array of several content IDs and their scores. Example: ",(0,r.yg)("inlineCode",{parentName:"td"},'[{"id":"web-223","score":0.8437653},{"id":"web-224","score":0.8403446}, ...{"id":"web-237","score":0.82916564}]'))),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"pinecone_error"),(0,r.yg)("td",{parentName:"tr",align:null},"string - this column is provided to show any error message returned from Pinecone\u2019s API; helpful for troubleshooting errors related to the PineconeLookup Gem.")))),(0,r.yg)("p",null,"Prophecy converts the visual design into Spark code available on the Prophecy user's Git repository. Find the Spark code for the PineconeLookup Gem below."),(0,r.yg)(o.A,{mdxType:"Tabs"},(0,r.yg)(i.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-py"},'def vector_lookup(Spark: SparkSession, in0: DataFrame) -> DataFrame:\n from pySpark.sql.functions import expr, array, struct\n from Spark_ai.dbs.pinecone import PineconeDB, IdVector\n from pySpark.dbutils import DBUtils\n PineconeDB(DBUtils(Spark).secrets.get(scope = "< my_scope >", key = "< my_key >"), "us-east-1-aws")\\\n .register_udfs(Spark)\n\n return in0\\\n .withColumn("_vector", col("_embedding"))\\\n .withColumn("_response", expr(f"pinecone_query(\\\\", _vector, {3})"))\\\n .withColumn("pinecone_matches", col("_response.matches"))\\\n .withColumn("pinecone_error", col("_response.error"))\\\n .drop("_vector", "_response")\n'))),(0,r.yg)(i.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-scala"}," [Not yet supported]\n")))),(0,r.yg)("h3",{id:"faq"},"FAQ"),(0,r.yg)("h4",{id:"troubleshooting"},"Troubleshooting"),(0,r.yg)("p",null,"To troubleshoot the Gem preceding PineconeLookup, open the data preview output from the previous Gem. For example if the embedding structure is incorrect then try adjusting the previous Gem, run, and view that Gem\u2019s output data preview."),(0,r.yg)("h4",{id:"creating-a-pinecone-index"},"Creating a Pinecone Index"),(0,r.yg)("p",null,"If you don\u2019t have one yet, ",(0,r.yg)("a",{parentName:"p",href:"https://docs.pinecone.io/docs/quickstart"},"create a Pinecone index"),". Click ",(0,r.yg)("a",{parentName:"p",href:"https://docs.pinecone.io/docs/choosing-index-type-and-size"},"here")," for pointers on choosing an index type and size. How to populate the index? For example, ",(0,r.yg)("a",{parentName:"p",href:"https://docs.prophecy.io/getting-started/gen-ai-chatbot#step-2-build-a-knowledge-warehouse"},"this guide")," shows how to ingest and vectorize web content data to store in a Pinecone Database index."))}g.isMDXComponent=!0},16869:(e,t,n)=>{n.d(t,{A:()=>a});const a=n.p+"assets/images/pinecone_lookup_configure-c0222e658f0a0e83456793ea8e3bffd0.png"},17921:(e,t,n)=>{n.d(t,{A:()=>a});const a=n.p+"assets/images/pinecone_lookup_input_output-268c3aecc7c4cb663c302cdd2682e30f.png"}}]); \ No newline at end of file diff --git a/assets/js/d7087486.38b0dc27.js b/assets/js/d7087486.e025e5a9.js similarity index 51% rename from assets/js/d7087486.38b0dc27.js rename to assets/js/d7087486.e025e5a9.js index 91d1f346b4..87dccc122e 100644 --- a/assets/js/d7087486.38b0dc27.js +++ b/assets/js/d7087486.e025e5a9.js @@ -1 +1 @@ -"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[18755],{15680:(e,t,n)=>{n.d(t,{xA:()=>u,yg:()=>y});var r=n(96540);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function l(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function o(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var i=r.createContext({}),p=function(e){var t=r.useContext(i),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},u=function(e){var t=p(e.components);return r.createElement(i.Provider,{value:t},e.children)},m="mdxType",c={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},g=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,l=e.originalType,i=e.parentName,u=s(e,["components","mdxType","originalType","parentName"]),m=p(n),g=a,y=m["".concat(i,".").concat(g)]||m[g]||c[g]||l;return n?r.createElement(y,o(o({ref:t},u),{},{components:n})):r.createElement(y,o({ref:t},u))}));function y(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var l=n.length,o=new Array(l);o[0]=g;var s={};for(var i in t)hasOwnProperty.call(t,i)&&(s[i]=t[i]);s.originalType=e,s[m]="string"==typeof e?e:a,o[1]=s;for(var p=2;p{n.r(t),n.d(t,{assets:()=>i,contentTitle:()=>o,default:()=>c,frontMatter:()=>l,metadata:()=>s,toc:()=>p});var r=n(58168),a=(n(96540),n(15680));const l={sidebar_position:11,title:"BulkColumnExpressions",id:"bulk-column-expressions",description:"Change the data type of multiple columns at once.",tags:["gems","type","columns"]},o=void 0,s={unversionedId:"Spark/gems/transform/bulk-column-expressions",id:"Spark/gems/transform/bulk-column-expressions",title:"BulkColumnExpressions",description:"Change the data type of multiple columns at once.",source:"@site/docs/Spark/gems/transform/bulk-column-expressions.md",sourceDirName:"Spark/gems/transform",slug:"/Spark/gems/transform/bulk-column-expressions",permalink:"/Spark/gems/transform/bulk-column-expressions",draft:!1,tags:[{label:"gems",permalink:"/tags/gems"},{label:"type",permalink:"/tags/type"},{label:"columns",permalink:"/tags/columns"}],version:"current",sidebarPosition:11,frontMatter:{sidebar_position:11,title:"BulkColumnExpressions",id:"bulk-column-expressions",description:"Change the data type of multiple columns at once.",tags:["gems","type","columns"]},sidebar:"defaultSidebar",previous:{title:"BulkColumnRename",permalink:"/Spark/gems/transform/bulk-column-rename"},next:{title:"DataCleansing",permalink:"/Spark/gems/transform/data-cleansing"}},i={},p=[{value:"Parameters",id:"parameters",level:2},{value:"Example",id:"example",level:2}],u={toc:p},m="wrapper";function c(e){let{components:t,...n}=e;return(0,a.yg)(m,(0,r.A)({},u,n,{components:t,mdxType:"MDXLayout"}),(0,a.yg)("h3",null,(0,a.yg)("span",{class:"badge rounded-pill text-bg-light"},"Spark Gem")),(0,a.yg)("p",null,"The BulkColumnExpressions Gem primarily lets you cast or change the data type of multiple columns at once. It provides additional functionality, including:"),(0,a.yg)("ul",null,(0,a.yg)("li",{parentName:"ul"},"Adding a prefix or suffix to selected columns."),(0,a.yg)("li",{parentName:"ul"},"Applying a custom expression to selected columns.")),(0,a.yg)("h2",{id:"parameters"},"Parameters"),(0,a.yg)("table",null,(0,a.yg)("thead",{parentName:"table"},(0,a.yg)("tr",{parentName:"thead"},(0,a.yg)("th",{parentName:"tr",align:null},"Parameter"),(0,a.yg)("th",{parentName:"tr",align:null},"Description"))),(0,a.yg)("tbody",{parentName:"table"},(0,a.yg)("tr",{parentName:"tbody"},(0,a.yg)("td",{parentName:"tr",align:null},"Data Type of the columns to do operations on"),(0,a.yg)("td",{parentName:"tr",align:null},"The data type of columns to select.")),(0,a.yg)("tr",{parentName:"tbody"},(0,a.yg)("td",{parentName:"tr",align:null},"Selected Columns"),(0,a.yg)("td",{parentName:"tr",align:null},"The columns on which to apply transformations.")),(0,a.yg)("tr",{parentName:"tbody"},(0,a.yg)("td",{parentName:"tr",align:null},"Change output column name"),(0,a.yg)("td",{parentName:"tr",align:null},"An option to add a prefix or suffix to the selected column names.")),(0,a.yg)("tr",{parentName:"tbody"},(0,a.yg)("td",{parentName:"tr",align:null},"Change output column type"),(0,a.yg)("td",{parentName:"tr",align:null},"The data type that the columns will be transformed into.")),(0,a.yg)("tr",{parentName:"tbody"},(0,a.yg)("td",{parentName:"tr",align:null},"Output Expression"),(0,a.yg)("td",{parentName:"tr",align:null},"A Spark SQL expression that can be applied to the selected columns. This field is required. If you only want to select the column, use ",(0,a.yg)("inlineCode",{parentName:"td"},"column_value")," as the expression.")))),(0,a.yg)("h2",{id:"example"},"Example"),(0,a.yg)("p",null,"Assume you have some columns in a table that represent zero-based indices and are stored as long data types. You want them to represent one-based indices and be stored as integers to optimize memory use."),(0,a.yg)("p",null,"Using the BulkColumnExpressions Gem, you can:"),(0,a.yg)("ul",null,(0,a.yg)("li",{parentName:"ul"},"Filter your columns by long data types."),(0,a.yg)("li",{parentName:"ul"},"Select the columns you wish to transform."),(0,a.yg)("li",{parentName:"ul"},"Cast the output column(s) to be integers."),(0,a.yg)("li",{parentName:"ul"},"Include ",(0,a.yg)("inlineCode",{parentName:"li"},"column_value + 1")," in the expression field to shift the indices.")))}c.isMDXComponent=!0}}]); \ No newline at end of file +"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[18755],{15680:(e,t,n)=>{n.d(t,{xA:()=>u,yg:()=>g});var r=n(96540);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function l(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function o(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var i=r.createContext({}),p=function(e){var t=r.useContext(i),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},u=function(e){var t=p(e.components);return r.createElement(i.Provider,{value:t},e.children)},m="mdxType",c={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},y=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,l=e.originalType,i=e.parentName,u=s(e,["components","mdxType","originalType","parentName"]),m=p(n),y=a,g=m["".concat(i,".").concat(y)]||m[y]||c[y]||l;return n?r.createElement(g,o(o({ref:t},u),{},{components:n})):r.createElement(g,o({ref:t},u))}));function g(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var l=n.length,o=new Array(l);o[0]=y;var s={};for(var i in t)hasOwnProperty.call(t,i)&&(s[i]=t[i]);s.originalType=e,s[m]="string"==typeof e?e:a,o[1]=s;for(var p=2;p{n.r(t),n.d(t,{assets:()=>i,contentTitle:()=>o,default:()=>c,frontMatter:()=>l,metadata:()=>s,toc:()=>p});var r=n(58168),a=(n(96540),n(15680));const l={sidebar_position:11,title:"BulkColumnExpressions",id:"bulk-column-expressions",description:"Change the data type of multiple columns at once.",tags:["gems","type","columns"]},o=void 0,s={unversionedId:"Spark/gems/transform/bulk-column-expressions",id:"Spark/gems/transform/bulk-column-expressions",title:"BulkColumnExpressions",description:"Change the data type of multiple columns at once.",source:"@site/docs/Spark/gems/transform/bulk-column-expressions.md",sourceDirName:"Spark/gems/transform",slug:"/Spark/gems/transform/bulk-column-expressions",permalink:"/Spark/gems/transform/bulk-column-expressions",draft:!1,tags:[{label:"gems",permalink:"/tags/gems"},{label:"type",permalink:"/tags/type"},{label:"columns",permalink:"/tags/columns"}],version:"current",sidebarPosition:11,frontMatter:{sidebar_position:11,title:"BulkColumnExpressions",id:"bulk-column-expressions",description:"Change the data type of multiple columns at once.",tags:["gems","type","columns"]},sidebar:"defaultSidebar",previous:{title:"BulkColumnRename",permalink:"/Spark/gems/transform/bulk-column-rename"},next:{title:"DataCleansing",permalink:"/Spark/gems/transform/data-cleansing"}},i={},p=[{value:"Parameters",id:"parameters",level:2},{value:"Example",id:"example",level:2}],u={toc:p},m="wrapper";function c(e){let{components:t,...n}=e;return(0,a.yg)(m,(0,r.A)({},u,n,{components:t,mdxType:"MDXLayout"}),(0,a.yg)("h3",null,(0,a.yg)("span",{class:"badge"},"Spark Gem")),(0,a.yg)("p",null,"The BulkColumnExpressions Gem primarily lets you cast or change the data type of multiple columns at once. It provides additional functionality, including:"),(0,a.yg)("ul",null,(0,a.yg)("li",{parentName:"ul"},"Adding a prefix or suffix to selected columns."),(0,a.yg)("li",{parentName:"ul"},"Applying a custom expression to selected columns.")),(0,a.yg)("h2",{id:"parameters"},"Parameters"),(0,a.yg)("table",null,(0,a.yg)("thead",{parentName:"table"},(0,a.yg)("tr",{parentName:"thead"},(0,a.yg)("th",{parentName:"tr",align:null},"Parameter"),(0,a.yg)("th",{parentName:"tr",align:null},"Description"))),(0,a.yg)("tbody",{parentName:"table"},(0,a.yg)("tr",{parentName:"tbody"},(0,a.yg)("td",{parentName:"tr",align:null},"Data Type of the columns to do operations on"),(0,a.yg)("td",{parentName:"tr",align:null},"The data type of columns to select.")),(0,a.yg)("tr",{parentName:"tbody"},(0,a.yg)("td",{parentName:"tr",align:null},"Selected Columns"),(0,a.yg)("td",{parentName:"tr",align:null},"The columns on which to apply transformations.")),(0,a.yg)("tr",{parentName:"tbody"},(0,a.yg)("td",{parentName:"tr",align:null},"Change output column name"),(0,a.yg)("td",{parentName:"tr",align:null},"An option to add a prefix or suffix to the selected column names.")),(0,a.yg)("tr",{parentName:"tbody"},(0,a.yg)("td",{parentName:"tr",align:null},"Change output column type"),(0,a.yg)("td",{parentName:"tr",align:null},"The data type that the columns will be transformed into.")),(0,a.yg)("tr",{parentName:"tbody"},(0,a.yg)("td",{parentName:"tr",align:null},"Output Expression"),(0,a.yg)("td",{parentName:"tr",align:null},"A Spark SQL expression that can be applied to the selected columns. This field is required. If you only want to select the column, use ",(0,a.yg)("inlineCode",{parentName:"td"},"column_value")," as the expression.")))),(0,a.yg)("h2",{id:"example"},"Example"),(0,a.yg)("p",null,"Assume you have some columns in a table that represent zero-based indices and are stored as long data types. You want them to represent one-based indices and be stored as integers to optimize memory use."),(0,a.yg)("p",null,"Using the BulkColumnExpressions Gem, you can:"),(0,a.yg)("ul",null,(0,a.yg)("li",{parentName:"ul"},"Filter your columns by long data types."),(0,a.yg)("li",{parentName:"ul"},"Select the columns you wish to transform."),(0,a.yg)("li",{parentName:"ul"},"Cast the output column(s) to be integers."),(0,a.yg)("li",{parentName:"ul"},"Include ",(0,a.yg)("inlineCode",{parentName:"li"},"column_value + 1")," in the expression field to shift the indices.")))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/db3522d0.6c7d2099.js b/assets/js/db3522d0.4b7a81c5.js similarity index 62% rename from assets/js/db3522d0.6c7d2099.js rename to assets/js/db3522d0.4b7a81c5.js index 3e4b80325c..f2dcd5fdb6 100644 --- a/assets/js/db3522d0.6c7d2099.js +++ b/assets/js/db3522d0.4b7a81c5.js @@ -1 +1 @@ -"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[489],{15680:(e,t,n)=>{n.d(t,{xA:()=>m,yg:()=>y});var a=n(96540);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function i(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),c=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},m=function(e){var t=c(e.components);return a.createElement(s.Provider,{value:t},e.children)},p="mdxType",u={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},d=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,o=e.originalType,s=e.parentName,m=l(e,["components","mdxType","originalType","parentName"]),p=c(n),d=r,y=p["".concat(s,".").concat(d)]||p[d]||u[d]||o;return n?a.createElement(y,i(i({ref:t},m),{},{components:n})):a.createElement(y,i({ref:t},m))}));function y(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var o=n.length,i=new Array(o);i[0]=d;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[p]="string"==typeof e?e:r,i[1]=l;for(var c=2;c{n.r(t),n.d(t,{assets:()=>s,contentTitle:()=>i,default:()=>u,frontMatter:()=>o,metadata:()=>l,toc:()=>c});var a=n(58168),r=(n(96540),n(15680));const o={sidebar_position:13,title:"DynamicSelect",id:"dynamic-select",description:"Dynamically filter columns of your dataset based on a set of conditions.",tags:["gems","filter","dynamic"]},i=void 0,l={unversionedId:"Spark/gems/transform/dynamic-select",id:"Spark/gems/transform/dynamic-select",title:"DynamicSelect",description:"Dynamically filter columns of your dataset based on a set of conditions.",source:"@site/docs/Spark/gems/transform/dynamic-select.md",sourceDirName:"Spark/gems/transform",slug:"/Spark/gems/transform/dynamic-select",permalink:"/Spark/gems/transform/dynamic-select",draft:!1,tags:[{label:"gems",permalink:"/tags/gems"},{label:"filter",permalink:"/tags/filter"},{label:"dynamic",permalink:"/tags/dynamic"}],version:"current",sidebarPosition:13,frontMatter:{sidebar_position:13,title:"DynamicSelect",id:"dynamic-select",description:"Dynamically filter columns of your dataset based on a set of conditions.",tags:["gems","filter","dynamic"]},sidebar:"defaultSidebar",previous:{title:"DataCleansing",permalink:"/Spark/gems/transform/data-cleansing"},next:{title:"Unpivot",permalink:"/Spark/gems/transform/unpivot"}},s={},c=[{value:"Configuration",id:"configuration",level:2},{value:"Examples",id:"examples",level:2},{value:"Remove date columns using field type",id:"remove-date-columns-using-field-type",level:3},{value:"Remove date columns with an expression",id:"remove-date-columns-with-an-expression",level:3}],m={toc:c},p="wrapper";function u(e){let{components:t,...o}=e;return(0,r.yg)(p,(0,a.A)({},m,o,{components:t,mdxType:"MDXLayout"}),(0,r.yg)("h3",null,(0,r.yg)("span",{class:"badge rounded-pill text-bg-light"},"Spark Gem")),(0,r.yg)("p",null,"Use the DynamicSelect Gem to dynamically filter columns of your Dataset based on a set of conditions."),(0,r.yg)("h2",{id:"configuration"},"Configuration"),(0,r.yg)("p",null,"There are two ways to configure the DynamicSelect."),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:null},"Configuration"),(0,r.yg)("th",{parentName:"tr",align:null},"Description"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Select field types"),(0,r.yg)("td",{parentName:"tr",align:null},"Choose one or more types of columns to keep in the Dataset, such as string, decimal, or date.")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Select via expression"),(0,r.yg)("td",{parentName:"tr",align:null},"Create an expression that limits the type of columns to keep in the Dataset.")))),(0,r.yg)("h2",{id:"examples"},"Examples"),(0,r.yg)("p",null,"You\u2019ll use DynamicSelect when you want to avoid hard-coding your choice of columns. In other words, rather than define each column to keep in your Pipeline, you let the system automatically choose the columns based on certain conditions or rules."),(0,r.yg)("h3",{id:"remove-date-columns-using-field-type"},"Remove date columns using field type"),(0,r.yg)("p",null,"Assume you would like to remove irrelevant date and timestamp columns from your Dataset. You can do so with the ",(0,r.yg)("strong",{parentName:"p"},"Select field types")," method by selecting all field types to maintain, except for date and timestamp."),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Keep all columns except Date and Timestamp column using the visual interface",src:n(88104).A,width:"2620",height:"1508"})),(0,r.yg)("h3",{id:"remove-date-columns-with-an-expression"},"Remove date columns with an expression"),(0,r.yg)("p",null,"Using the same example, you can accomplish the same task with the ",(0,r.yg)("strong",{parentName:"p"},"Select via expression")," method by inputting the the expression ",(0,r.yg)("inlineCode",{parentName:"p"},"column_type NOT IN ('date', 'timestamp')"),"."))}u.isMDXComponent=!0},88104:(e,t,n)=>{n.d(t,{A:()=>a});const a=n.p+"assets/images/remove-date-timestamp-3a1ea91b088d881397d1d64a7f02d9f7.png"}}]); \ No newline at end of file +"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[489],{15680:(e,t,n)=>{n.d(t,{xA:()=>m,yg:()=>y});var a=n(96540);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function i(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),c=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},m=function(e){var t=c(e.components);return a.createElement(s.Provider,{value:t},e.children)},p="mdxType",u={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},d=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,o=e.originalType,s=e.parentName,m=l(e,["components","mdxType","originalType","parentName"]),p=c(n),d=r,y=p["".concat(s,".").concat(d)]||p[d]||u[d]||o;return n?a.createElement(y,i(i({ref:t},m),{},{components:n})):a.createElement(y,i({ref:t},m))}));function y(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var o=n.length,i=new Array(o);i[0]=d;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[p]="string"==typeof e?e:r,i[1]=l;for(var c=2;c{n.r(t),n.d(t,{assets:()=>s,contentTitle:()=>i,default:()=>u,frontMatter:()=>o,metadata:()=>l,toc:()=>c});var a=n(58168),r=(n(96540),n(15680));const o={sidebar_position:13,title:"DynamicSelect",id:"dynamic-select",description:"Dynamically filter columns of your dataset based on a set of conditions.",tags:["gems","filter","dynamic"]},i=void 0,l={unversionedId:"Spark/gems/transform/dynamic-select",id:"Spark/gems/transform/dynamic-select",title:"DynamicSelect",description:"Dynamically filter columns of your dataset based on a set of conditions.",source:"@site/docs/Spark/gems/transform/dynamic-select.md",sourceDirName:"Spark/gems/transform",slug:"/Spark/gems/transform/dynamic-select",permalink:"/Spark/gems/transform/dynamic-select",draft:!1,tags:[{label:"gems",permalink:"/tags/gems"},{label:"filter",permalink:"/tags/filter"},{label:"dynamic",permalink:"/tags/dynamic"}],version:"current",sidebarPosition:13,frontMatter:{sidebar_position:13,title:"DynamicSelect",id:"dynamic-select",description:"Dynamically filter columns of your dataset based on a set of conditions.",tags:["gems","filter","dynamic"]},sidebar:"defaultSidebar",previous:{title:"DataCleansing",permalink:"/Spark/gems/transform/data-cleansing"},next:{title:"Unpivot",permalink:"/Spark/gems/transform/unpivot"}},s={},c=[{value:"Configuration",id:"configuration",level:2},{value:"Examples",id:"examples",level:2},{value:"Remove date columns using field type",id:"remove-date-columns-using-field-type",level:3},{value:"Remove date columns with an expression",id:"remove-date-columns-with-an-expression",level:3}],m={toc:c},p="wrapper";function u(e){let{components:t,...o}=e;return(0,r.yg)(p,(0,a.A)({},m,o,{components:t,mdxType:"MDXLayout"}),(0,r.yg)("h3",null,(0,r.yg)("span",{class:"badge"},"Spark Gem")),(0,r.yg)("p",null,"Use the DynamicSelect Gem to dynamically filter columns of your Dataset based on a set of conditions."),(0,r.yg)("h2",{id:"configuration"},"Configuration"),(0,r.yg)("p",null,"There are two ways to configure the DynamicSelect."),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:null},"Configuration"),(0,r.yg)("th",{parentName:"tr",align:null},"Description"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Select field types"),(0,r.yg)("td",{parentName:"tr",align:null},"Choose one or more types of columns to keep in the Dataset, such as string, decimal, or date.")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"Select via expression"),(0,r.yg)("td",{parentName:"tr",align:null},"Create an expression that limits the type of columns to keep in the Dataset.")))),(0,r.yg)("h2",{id:"examples"},"Examples"),(0,r.yg)("p",null,"You\u2019ll use DynamicSelect when you want to avoid hard-coding your choice of columns. In other words, rather than define each column to keep in your Pipeline, you let the system automatically choose the columns based on certain conditions or rules."),(0,r.yg)("h3",{id:"remove-date-columns-using-field-type"},"Remove date columns using field type"),(0,r.yg)("p",null,"Assume you would like to remove irrelevant date and timestamp columns from your Dataset. You can do so with the ",(0,r.yg)("strong",{parentName:"p"},"Select field types")," method by selecting all field types to maintain, except for date and timestamp."),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Keep all columns except Date and Timestamp column using the visual interface",src:n(88104).A,width:"2620",height:"1508"})),(0,r.yg)("h3",{id:"remove-date-columns-with-an-expression"},"Remove date columns with an expression"),(0,r.yg)("p",null,"Using the same example, you can accomplish the same task with the ",(0,r.yg)("strong",{parentName:"p"},"Select via expression")," method by inputting the the expression ",(0,r.yg)("inlineCode",{parentName:"p"},"column_type NOT IN ('date', 'timestamp')"),"."))}u.isMDXComponent=!0},88104:(e,t,n)=>{n.d(t,{A:()=>a});const a=n.p+"assets/images/remove-date-timestamp-3a1ea91b088d881397d1d64a7f02d9f7.png"}}]); \ No newline at end of file diff --git a/assets/js/dca7a11b.199de914.js b/assets/js/dca7a11b.687f5c42.js similarity index 67% rename from assets/js/dca7a11b.199de914.js rename to assets/js/dca7a11b.687f5c42.js index 4d17288d8a..4ba5afca96 100644 --- a/assets/js/dca7a11b.199de914.js +++ b/assets/js/dca7a11b.687f5c42.js @@ -1 +1 @@ -"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[12179],{15680:(e,t,r)=>{r.d(t,{xA:()=>c,yg:()=>b});var a=r(96540);function n(e,t,r){return t in e?Object.defineProperty(e,t,{value:r,enumerable:!0,configurable:!0,writable:!0}):e[t]=r,e}function l(e,t){var r=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),r.push.apply(r,a)}return r}function o(e){for(var t=1;t=0||(n[r]=e[r]);return n}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,r)&&(n[r]=e[r])}return n}var s=a.createContext({}),u=function(e){var t=a.useContext(s),r=t;return e&&(r="function"==typeof e?e(t):o(o({},t),e)),r},c=function(e){var t=u(e.components);return a.createElement(s.Provider,{value:t},e.children)},p="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},m=a.forwardRef((function(e,t){var r=e.components,n=e.mdxType,l=e.originalType,s=e.parentName,c=i(e,["components","mdxType","originalType","parentName"]),p=u(r),m=n,b=p["".concat(s,".").concat(m)]||p[m]||d[m]||l;return r?a.createElement(b,o(o({ref:t},c),{},{components:r})):a.createElement(b,o({ref:t},c))}));function b(e,t){var r=arguments,n=t&&t.mdxType;if("string"==typeof e||n){var l=r.length,o=new Array(l);o[0]=m;var i={};for(var s in t)hasOwnProperty.call(t,s)&&(i[s]=t[s]);i.originalType=e,i[p]="string"==typeof e?e:n,o[1]=i;for(var u=2;u{r.d(t,{A:()=>o});var a=r(96540),n=r(20053);const l={tabItem:"tabItem_Ymn6"};function o(e){let{children:t,hidden:r,className:o}=e;return a.createElement("div",{role:"tabpanel",className:(0,n.A)(l.tabItem,o),hidden:r},t)}},11470:(e,t,r)=>{r.d(t,{A:()=>k});var a=r(58168),n=r(96540),l=r(20053),o=r(23104),i=r(56347),s=r(57485),u=r(31682),c=r(89466);function p(e){return function(e){return n.Children.map(e,(e=>{if(!e||(0,n.isValidElement)(e)&&function(e){const{props:t}=e;return!!t&&"object"==typeof t&&"value"in t}(e))return e;throw new Error(`Docusaurus error: Bad child <${"string"==typeof e.type?e.type:e.type.name}>: all children of the component should be , and every should have a unique "value" prop.`)}))?.filter(Boolean)??[]}(e).map((e=>{let{props:{value:t,label:r,attributes:a,default:n}}=e;return{value:t,label:r,attributes:a,default:n}}))}function d(e){const{values:t,children:r}=e;return(0,n.useMemo)((()=>{const e=t??p(r);return function(e){const t=(0,u.X)(e,((e,t)=>e.value===t.value));if(t.length>0)throw new Error(`Docusaurus error: Duplicate values "${t.map((e=>e.value)).join(", ")}" found in . Every value needs to be unique.`)}(e),e}),[t,r])}function m(e){let{value:t,tabValues:r}=e;return r.some((e=>e.value===t))}function b(e){let{queryString:t=!1,groupId:r}=e;const a=(0,i.W6)(),l=function(e){let{queryString:t=!1,groupId:r}=e;if("string"==typeof t)return t;if(!1===t)return null;if(!0===t&&!r)throw new Error('Docusaurus error: The component groupId prop is required if queryString=true, because this value is used as the search param name. You can also provide an explicit value such as queryString="my-search-param".');return r??null}({queryString:t,groupId:r});return[(0,s.aZ)(l),(0,n.useCallback)((e=>{if(!l)return;const t=new URLSearchParams(a.location.search);t.set(l,e),a.replace({...a.location,search:t.toString()})}),[l,a])]}function f(e){const{defaultValue:t,queryString:r=!1,groupId:a}=e,l=d(e),[o,i]=(0,n.useState)((()=>function(e){let{defaultValue:t,tabValues:r}=e;if(0===r.length)throw new Error("Docusaurus error: the component requires at least one children component");if(t){if(!m({value:t,tabValues:r}))throw new Error(`Docusaurus error: The has a defaultValue "${t}" but none of its children has the corresponding value. Available values are: ${r.map((e=>e.value)).join(", ")}. If you intend to show no default tab, use defaultValue={null} instead.`);return t}const a=r.find((e=>e.default))??r[0];if(!a)throw new Error("Unexpected error: 0 tabValues");return a.value}({defaultValue:t,tabValues:l}))),[s,u]=b({queryString:r,groupId:a}),[p,f]=function(e){let{groupId:t}=e;const r=function(e){return e?`docusaurus.tab.${e}`:null}(t),[a,l]=(0,c.Dv)(r);return[a,(0,n.useCallback)((e=>{r&&l.set(e)}),[r,l])]}({groupId:a}),g=(()=>{const e=s??p;return m({value:e,tabValues:l})?e:null})();(0,n.useLayoutEffect)((()=>{g&&i(g)}),[g]);return{selectedValue:o,selectValue:(0,n.useCallback)((e=>{if(!m({value:e,tabValues:l}))throw new Error(`Can't select invalid tab value=${e}`);i(e),u(e),f(e)}),[u,f,l]),tabValues:l}}var g=r(92303);const y={tabList:"tabList__CuJ",tabItem:"tabItem_LNqP"};function h(e){let{className:t,block:r,selectedValue:i,selectValue:s,tabValues:u}=e;const c=[],{blockElementScrollPositionUntilNextRender:p}=(0,o.a_)(),d=e=>{const t=e.currentTarget,r=c.indexOf(t),a=u[r].value;a!==i&&(p(t),s(a))},m=e=>{let t=null;switch(e.key){case"Enter":d(e);break;case"ArrowRight":{const r=c.indexOf(e.currentTarget)+1;t=c[r]??c[0];break}case"ArrowLeft":{const r=c.indexOf(e.currentTarget)-1;t=c[r]??c[c.length-1];break}}t?.focus()};return n.createElement("ul",{role:"tablist","aria-orientation":"horizontal",className:(0,l.A)("tabs",{"tabs--block":r},t)},u.map((e=>{let{value:t,label:r,attributes:o}=e;return n.createElement("li",(0,a.A)({role:"tab",tabIndex:i===t?0:-1,"aria-selected":i===t,key:t,ref:e=>c.push(e),onKeyDown:m,onClick:d},o,{className:(0,l.A)("tabs__item",y.tabItem,o?.className,{"tabs__item--active":i===t})}),r??t)})))}function v(e){let{lazy:t,children:r,selectedValue:a}=e;const l=(Array.isArray(r)?r:[r]).filter(Boolean);if(t){const e=l.find((e=>e.props.value===a));return e?(0,n.cloneElement)(e,{className:"margin-top--md"}):null}return n.createElement("div",{className:"margin-top--md"},l.map(((e,t)=>(0,n.cloneElement)(e,{key:t,hidden:e.props.value!==a}))))}function w(e){const t=f(e);return n.createElement("div",{className:(0,l.A)("tabs-container",y.tabList)},n.createElement(h,(0,a.A)({},e,t)),n.createElement(v,(0,a.A)({},e,t)))}function k(e){const t=(0,g.A)();return n.createElement(w,(0,a.A)({key:String(t)},e))}},94587:(e,t,r)=>{r.r(t),r.d(t,{assets:()=>c,contentTitle:()=>s,default:()=>b,frontMatter:()=>i,metadata:()=>u,toc:()=>p});var a=r(58168),n=(r(96540),r(15680)),l=r(11470),o=r(19365);const i={sidebar_position:3,title:"RowDistributor",id:"row-distributor",description:"Create multiple DataFrames based on filter conditions",tags:["gems","split","filter","row distributor"]},s=void 0,u={unversionedId:"Spark/gems/join-split/row-distributor",id:"Spark/gems/join-split/row-distributor",title:"RowDistributor",description:"Create multiple DataFrames based on filter conditions",source:"@site/docs/Spark/gems/join-split/row-distributor.md",sourceDirName:"Spark/gems/join-split",slug:"/Spark/gems/join-split/row-distributor",permalink:"/Spark/gems/join-split/row-distributor",draft:!1,tags:[{label:"gems",permalink:"/tags/gems"},{label:"split",permalink:"/tags/split"},{label:"filter",permalink:"/tags/filter"},{label:"row distributor",permalink:"/tags/row-distributor"}],version:"current",sidebarPosition:3,frontMatter:{sidebar_position:3,title:"RowDistributor",id:"row-distributor",description:"Create multiple DataFrames based on filter conditions",tags:["gems","split","filter","row distributor"]},sidebar:"defaultSidebar",previous:{title:"Repartition",permalink:"/Spark/gems/join-split/Repartition"},next:{title:"CompareColumns",permalink:"/Spark/gems/join-split/compare-columns"}},c={},p=[{value:"Parameters",id:"parameters",level:3},{value:"Example",id:"example",level:3},{value:"Generated Code",id:"generated-code",level:3}],d={toc:p},m="wrapper";function b(e){let{components:t,...i}=e;return(0,n.yg)(m,(0,a.A)({},d,i,{components:t,mdxType:"MDXLayout"}),(0,n.yg)("h3",null,(0,n.yg)("span",{class:"badge rounded-pill text-bg-light"},"Spark Gem")),(0,n.yg)("p",null,"Use the RowDistributor Gem to create multiple DataFrames based on provided filter conditions from an input DataFrame."),(0,n.yg)("p",null,"This is useful for cases where rows from the input DataFrame needs to be distributed into multiple DataFrames in different ways for downstream Gems."),(0,n.yg)("h3",{id:"parameters"},"Parameters"),(0,n.yg)("table",null,(0,n.yg)("thead",{parentName:"table"},(0,n.yg)("tr",{parentName:"thead"},(0,n.yg)("th",{parentName:"tr",align:null},"Parameter"),(0,n.yg)("th",{parentName:"tr",align:null},"Description"),(0,n.yg)("th",{parentName:"tr",align:null},"Required"))),(0,n.yg)("tbody",{parentName:"table"},(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},"DataFrame"),(0,n.yg)("td",{parentName:"tr",align:null},"Input DataFrame for which rows needs to be distributed into multiple DataFrames"),(0,n.yg)("td",{parentName:"tr",align:null},"True")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},"Filter Conditions"),(0,n.yg)("td",{parentName:"tr",align:null},"Boolean Type column or boolean expression for each output tab. Supports SQL, Python and Scala expressions"),(0,n.yg)("td",{parentName:"tr",align:null},"True")))),(0,n.yg)("h3",{id:"example"},"Example"),(0,n.yg)("p",null,(0,n.yg)("img",{alt:"Row distributor 1",src:r(60523).A,width:"1748",height:"850"})),(0,n.yg)("admonition",{type:"info"},(0,n.yg)("p",{parentName:"admonition"},"Number of outputs can be changed as needed by clicking the ",(0,n.yg)("inlineCode",{parentName:"p"},"+")," button.")),(0,n.yg)("h3",{id:"generated-code"},"Generated Code"),(0,n.yg)(l.A,{mdxType:"Tabs"},(0,n.yg)(o.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-py"},'def RowDistributor(spark: SparkSession, in0: DataFrame) -> (DataFrame, DataFrame, DataFrame):\n df1 = in0.filter((col("order_status") == lit("Started")))\n df2 = in0.filter((col("order_status") == lit("Approved")))\n df3 = in0.filter((col("order_status") == lit("Finished")))\n\n return df1, df2, df3\n'))),(0,n.yg)(o.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-scala"},'object RowDistributor {\n\n def apply(\n spark: SparkSession,\n in: DataFrame\n ): (DataFrame, DataFrame, DataFrame) =\n (in.filter(col("order_status") === lit("Started")),\n in.filter(col("order_status") === lit("Approved")),\n in.filter(col("order_status") === lit("Finished"))\n )\n\n}\n')))))}b.isMDXComponent=!0},60523:(e,t,r)=>{r.d(t,{A:()=>a});const a=r.p+"assets/images/rowdistributor_eg_1-45da3213c2f5f108833af1d18986f6d7.png"}}]); \ No newline at end of file +"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[12179],{15680:(e,t,r)=>{r.d(t,{xA:()=>c,yg:()=>f});var a=r(96540);function n(e,t,r){return t in e?Object.defineProperty(e,t,{value:r,enumerable:!0,configurable:!0,writable:!0}):e[t]=r,e}function l(e,t){var r=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),r.push.apply(r,a)}return r}function o(e){for(var t=1;t=0||(n[r]=e[r]);return n}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,r)&&(n[r]=e[r])}return n}var s=a.createContext({}),u=function(e){var t=a.useContext(s),r=t;return e&&(r="function"==typeof e?e(t):o(o({},t),e)),r},c=function(e){var t=u(e.components);return a.createElement(s.Provider,{value:t},e.children)},p="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},m=a.forwardRef((function(e,t){var r=e.components,n=e.mdxType,l=e.originalType,s=e.parentName,c=i(e,["components","mdxType","originalType","parentName"]),p=u(r),m=n,f=p["".concat(s,".").concat(m)]||p[m]||d[m]||l;return r?a.createElement(f,o(o({ref:t},c),{},{components:r})):a.createElement(f,o({ref:t},c))}));function f(e,t){var r=arguments,n=t&&t.mdxType;if("string"==typeof e||n){var l=r.length,o=new Array(l);o[0]=m;var i={};for(var s in t)hasOwnProperty.call(t,s)&&(i[s]=t[s]);i.originalType=e,i[p]="string"==typeof e?e:n,o[1]=i;for(var u=2;u{r.d(t,{A:()=>o});var a=r(96540),n=r(20053);const l={tabItem:"tabItem_Ymn6"};function o(e){let{children:t,hidden:r,className:o}=e;return a.createElement("div",{role:"tabpanel",className:(0,n.A)(l.tabItem,o),hidden:r},t)}},11470:(e,t,r)=>{r.d(t,{A:()=>k});var a=r(58168),n=r(96540),l=r(20053),o=r(23104),i=r(56347),s=r(57485),u=r(31682),c=r(89466);function p(e){return function(e){return n.Children.map(e,(e=>{if(!e||(0,n.isValidElement)(e)&&function(e){const{props:t}=e;return!!t&&"object"==typeof t&&"value"in t}(e))return e;throw new Error(`Docusaurus error: Bad child <${"string"==typeof e.type?e.type:e.type.name}>: all children of the component should be , and every should have a unique "value" prop.`)}))?.filter(Boolean)??[]}(e).map((e=>{let{props:{value:t,label:r,attributes:a,default:n}}=e;return{value:t,label:r,attributes:a,default:n}}))}function d(e){const{values:t,children:r}=e;return(0,n.useMemo)((()=>{const e=t??p(r);return function(e){const t=(0,u.X)(e,((e,t)=>e.value===t.value));if(t.length>0)throw new Error(`Docusaurus error: Duplicate values "${t.map((e=>e.value)).join(", ")}" found in . Every value needs to be unique.`)}(e),e}),[t,r])}function m(e){let{value:t,tabValues:r}=e;return r.some((e=>e.value===t))}function f(e){let{queryString:t=!1,groupId:r}=e;const a=(0,i.W6)(),l=function(e){let{queryString:t=!1,groupId:r}=e;if("string"==typeof t)return t;if(!1===t)return null;if(!0===t&&!r)throw new Error('Docusaurus error: The component groupId prop is required if queryString=true, because this value is used as the search param name. You can also provide an explicit value such as queryString="my-search-param".');return r??null}({queryString:t,groupId:r});return[(0,s.aZ)(l),(0,n.useCallback)((e=>{if(!l)return;const t=new URLSearchParams(a.location.search);t.set(l,e),a.replace({...a.location,search:t.toString()})}),[l,a])]}function b(e){const{defaultValue:t,queryString:r=!1,groupId:a}=e,l=d(e),[o,i]=(0,n.useState)((()=>function(e){let{defaultValue:t,tabValues:r}=e;if(0===r.length)throw new Error("Docusaurus error: the component requires at least one children component");if(t){if(!m({value:t,tabValues:r}))throw new Error(`Docusaurus error: The has a defaultValue "${t}" but none of its children has the corresponding value. Available values are: ${r.map((e=>e.value)).join(", ")}. If you intend to show no default tab, use defaultValue={null} instead.`);return t}const a=r.find((e=>e.default))??r[0];if(!a)throw new Error("Unexpected error: 0 tabValues");return a.value}({defaultValue:t,tabValues:l}))),[s,u]=f({queryString:r,groupId:a}),[p,b]=function(e){let{groupId:t}=e;const r=function(e){return e?`docusaurus.tab.${e}`:null}(t),[a,l]=(0,c.Dv)(r);return[a,(0,n.useCallback)((e=>{r&&l.set(e)}),[r,l])]}({groupId:a}),g=(()=>{const e=s??p;return m({value:e,tabValues:l})?e:null})();(0,n.useLayoutEffect)((()=>{g&&i(g)}),[g]);return{selectedValue:o,selectValue:(0,n.useCallback)((e=>{if(!m({value:e,tabValues:l}))throw new Error(`Can't select invalid tab value=${e}`);i(e),u(e),b(e)}),[u,b,l]),tabValues:l}}var g=r(92303);const y={tabList:"tabList__CuJ",tabItem:"tabItem_LNqP"};function h(e){let{className:t,block:r,selectedValue:i,selectValue:s,tabValues:u}=e;const c=[],{blockElementScrollPositionUntilNextRender:p}=(0,o.a_)(),d=e=>{const t=e.currentTarget,r=c.indexOf(t),a=u[r].value;a!==i&&(p(t),s(a))},m=e=>{let t=null;switch(e.key){case"Enter":d(e);break;case"ArrowRight":{const r=c.indexOf(e.currentTarget)+1;t=c[r]??c[0];break}case"ArrowLeft":{const r=c.indexOf(e.currentTarget)-1;t=c[r]??c[c.length-1];break}}t?.focus()};return n.createElement("ul",{role:"tablist","aria-orientation":"horizontal",className:(0,l.A)("tabs",{"tabs--block":r},t)},u.map((e=>{let{value:t,label:r,attributes:o}=e;return n.createElement("li",(0,a.A)({role:"tab",tabIndex:i===t?0:-1,"aria-selected":i===t,key:t,ref:e=>c.push(e),onKeyDown:m,onClick:d},o,{className:(0,l.A)("tabs__item",y.tabItem,o?.className,{"tabs__item--active":i===t})}),r??t)})))}function v(e){let{lazy:t,children:r,selectedValue:a}=e;const l=(Array.isArray(r)?r:[r]).filter(Boolean);if(t){const e=l.find((e=>e.props.value===a));return e?(0,n.cloneElement)(e,{className:"margin-top--md"}):null}return n.createElement("div",{className:"margin-top--md"},l.map(((e,t)=>(0,n.cloneElement)(e,{key:t,hidden:e.props.value!==a}))))}function w(e){const t=b(e);return n.createElement("div",{className:(0,l.A)("tabs-container",y.tabList)},n.createElement(h,(0,a.A)({},e,t)),n.createElement(v,(0,a.A)({},e,t)))}function k(e){const t=(0,g.A)();return n.createElement(w,(0,a.A)({key:String(t)},e))}},94587:(e,t,r)=>{r.r(t),r.d(t,{assets:()=>c,contentTitle:()=>s,default:()=>f,frontMatter:()=>i,metadata:()=>u,toc:()=>p});var a=r(58168),n=(r(96540),r(15680)),l=r(11470),o=r(19365);const i={sidebar_position:3,title:"RowDistributor",id:"row-distributor",description:"Create multiple DataFrames based on filter conditions",tags:["gems","split","filter","row distributor"]},s=void 0,u={unversionedId:"Spark/gems/join-split/row-distributor",id:"Spark/gems/join-split/row-distributor",title:"RowDistributor",description:"Create multiple DataFrames based on filter conditions",source:"@site/docs/Spark/gems/join-split/row-distributor.md",sourceDirName:"Spark/gems/join-split",slug:"/Spark/gems/join-split/row-distributor",permalink:"/Spark/gems/join-split/row-distributor",draft:!1,tags:[{label:"gems",permalink:"/tags/gems"},{label:"split",permalink:"/tags/split"},{label:"filter",permalink:"/tags/filter"},{label:"row distributor",permalink:"/tags/row-distributor"}],version:"current",sidebarPosition:3,frontMatter:{sidebar_position:3,title:"RowDistributor",id:"row-distributor",description:"Create multiple DataFrames based on filter conditions",tags:["gems","split","filter","row distributor"]},sidebar:"defaultSidebar",previous:{title:"Repartition",permalink:"/Spark/gems/join-split/Repartition"},next:{title:"CompareColumns",permalink:"/Spark/gems/join-split/compare-columns"}},c={},p=[{value:"Parameters",id:"parameters",level:3},{value:"Example",id:"example",level:3},{value:"Generated Code",id:"generated-code",level:3}],d={toc:p},m="wrapper";function f(e){let{components:t,...i}=e;return(0,n.yg)(m,(0,a.A)({},d,i,{components:t,mdxType:"MDXLayout"}),(0,n.yg)("h3",null,(0,n.yg)("span",{class:"badge"},"Spark Gem")),(0,n.yg)("p",null,"Use the RowDistributor Gem to create multiple DataFrames based on provided filter conditions from an input DataFrame."),(0,n.yg)("p",null,"This is useful for cases where rows from the input DataFrame needs to be distributed into multiple DataFrames in different ways for downstream Gems."),(0,n.yg)("h3",{id:"parameters"},"Parameters"),(0,n.yg)("table",null,(0,n.yg)("thead",{parentName:"table"},(0,n.yg)("tr",{parentName:"thead"},(0,n.yg)("th",{parentName:"tr",align:null},"Parameter"),(0,n.yg)("th",{parentName:"tr",align:null},"Description"),(0,n.yg)("th",{parentName:"tr",align:null},"Required"))),(0,n.yg)("tbody",{parentName:"table"},(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},"DataFrame"),(0,n.yg)("td",{parentName:"tr",align:null},"Input DataFrame for which rows needs to be distributed into multiple DataFrames"),(0,n.yg)("td",{parentName:"tr",align:null},"True")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},"Filter Conditions"),(0,n.yg)("td",{parentName:"tr",align:null},"Boolean Type column or boolean expression for each output tab. Supports SQL, Python and Scala expressions"),(0,n.yg)("td",{parentName:"tr",align:null},"True")))),(0,n.yg)("h3",{id:"example"},"Example"),(0,n.yg)("p",null,(0,n.yg)("img",{alt:"Row distributor 1",src:r(60523).A,width:"1748",height:"850"})),(0,n.yg)("admonition",{type:"info"},(0,n.yg)("p",{parentName:"admonition"},"Number of outputs can be changed as needed by clicking the ",(0,n.yg)("inlineCode",{parentName:"p"},"+")," button.")),(0,n.yg)("h3",{id:"generated-code"},"Generated Code"),(0,n.yg)(l.A,{mdxType:"Tabs"},(0,n.yg)(o.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-py"},'def RowDistributor(spark: SparkSession, in0: DataFrame) -> (DataFrame, DataFrame, DataFrame):\n df1 = in0.filter((col("order_status") == lit("Started")))\n df2 = in0.filter((col("order_status") == lit("Approved")))\n df3 = in0.filter((col("order_status") == lit("Finished")))\n\n return df1, df2, df3\n'))),(0,n.yg)(o.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-scala"},'object RowDistributor {\n\n def apply(\n spark: SparkSession,\n in: DataFrame\n ): (DataFrame, DataFrame, DataFrame) =\n (in.filter(col("order_status") === lit("Started")),\n in.filter(col("order_status") === lit("Approved")),\n in.filter(col("order_status") === lit("Finished"))\n )\n\n}\n')))))}f.isMDXComponent=!0},60523:(e,t,r)=>{r.d(t,{A:()=>a});const a=r.p+"assets/images/rowdistributor_eg_1-45da3213c2f5f108833af1d18986f6d7.png"}}]); \ No newline at end of file diff --git a/assets/js/dda96e46.272e2c52.js b/assets/js/dda96e46.272e2c52.js new file mode 100644 index 0000000000..c7eafce97c --- /dev/null +++ b/assets/js/dda96e46.272e2c52.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[33852],{15680:(e,a,t)=>{t.d(a,{xA:()=>g,yg:()=>c});var r=t(96540);function n(e,a,t){return a in e?Object.defineProperty(e,a,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[a]=t,e}function o(e,a){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);a&&(r=r.filter((function(a){return Object.getOwnPropertyDescriptor(e,a).enumerable}))),t.push.apply(t,r)}return t}function l(e){for(var a=1;a=0||(n[t]=e[t]);return n}(e,a);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(n[t]=e[t])}return n}var s=r.createContext({}),u=function(e){var a=r.useContext(s),t=a;return e&&(t="function"==typeof e?e(a):l(l({},a),e)),t},g=function(e){var a=u(e.components);return r.createElement(s.Provider,{value:a},e.children)},p="mdxType",d={inlineCode:"code",wrapper:function(e){var a=e.children;return r.createElement(r.Fragment,{},a)}},m=r.forwardRef((function(e,a){var t=e.components,n=e.mdxType,o=e.originalType,s=e.parentName,g=i(e,["components","mdxType","originalType","parentName"]),p=u(t),m=n,c=p["".concat(s,".").concat(m)]||p[m]||d[m]||o;return t?r.createElement(c,l(l({ref:a},g),{},{components:t})):r.createElement(c,l({ref:a},g))}));function c(e,a){var t=arguments,n=a&&a.mdxType;if("string"==typeof e||n){var o=t.length,l=new Array(o);l[0]=m;var i={};for(var s in a)hasOwnProperty.call(a,s)&&(i[s]=a[s]);i.originalType=e,i[p]="string"==typeof e?e:n,l[1]=i;for(var u=2;u{t.d(a,{A:()=>l});var r=t(96540),n=t(20053);const o={tabItem:"tabItem_Ymn6"};function l(e){let{children:a,hidden:t,className:l}=e;return r.createElement("div",{role:"tabpanel",className:(0,n.A)(o.tabItem,l),hidden:t},a)}},11470:(e,a,t)=>{t.d(a,{A:()=>_});var r=t(58168),n=t(96540),o=t(20053),l=t(23104),i=t(56347),s=t(57485),u=t(31682),g=t(89466);function p(e){return function(e){return n.Children.map(e,(e=>{if(!e||(0,n.isValidElement)(e)&&function(e){const{props:a}=e;return!!a&&"object"==typeof a&&"value"in a}(e))return e;throw new Error(`Docusaurus error: Bad child <${"string"==typeof e.type?e.type:e.type.name}>: all children of the component should be , and every should have a unique "value" prop.`)}))?.filter(Boolean)??[]}(e).map((e=>{let{props:{value:a,label:t,attributes:r,default:n}}=e;return{value:a,label:t,attributes:r,default:n}}))}function d(e){const{values:a,children:t}=e;return(0,n.useMemo)((()=>{const e=a??p(t);return function(e){const a=(0,u.X)(e,((e,a)=>e.value===a.value));if(a.length>0)throw new Error(`Docusaurus error: Duplicate values "${a.map((e=>e.value)).join(", ")}" found in . Every value needs to be unique.`)}(e),e}),[a,t])}function m(e){let{value:a,tabValues:t}=e;return t.some((e=>e.value===a))}function c(e){let{queryString:a=!1,groupId:t}=e;const r=(0,i.W6)(),o=function(e){let{queryString:a=!1,groupId:t}=e;if("string"==typeof a)return a;if(!1===a)return null;if(!0===a&&!t)throw new Error('Docusaurus error: The component groupId prop is required if queryString=true, because this value is used as the search param name. You can also provide an explicit value such as queryString="my-search-param".');return t??null}({queryString:a,groupId:t});return[(0,s.aZ)(o),(0,n.useCallback)((e=>{if(!o)return;const a=new URLSearchParams(r.location.search);a.set(o,e),r.replace({...r.location,search:a.toString()})}),[o,r])]}function y(e){const{defaultValue:a,queryString:t=!1,groupId:r}=e,o=d(e),[l,i]=(0,n.useState)((()=>function(e){let{defaultValue:a,tabValues:t}=e;if(0===t.length)throw new Error("Docusaurus error: the component requires at least one children component");if(a){if(!m({value:a,tabValues:t}))throw new Error(`Docusaurus error: The has a defaultValue "${a}" but none of its children has the corresponding value. Available values are: ${t.map((e=>e.value)).join(", ")}. If you intend to show no default tab, use defaultValue={null} instead.`);return a}const r=t.find((e=>e.default))??t[0];if(!r)throw new Error("Unexpected error: 0 tabValues");return r.value}({defaultValue:a,tabValues:o}))),[s,u]=c({queryString:t,groupId:r}),[p,y]=function(e){let{groupId:a}=e;const t=function(e){return e?`docusaurus.tab.${e}`:null}(a),[r,o]=(0,g.Dv)(t);return[r,(0,n.useCallback)((e=>{t&&o.set(e)}),[t,o])]}({groupId:r}),f=(()=>{const e=s??p;return m({value:e,tabValues:o})?e:null})();(0,n.useLayoutEffect)((()=>{f&&i(f)}),[f]);return{selectedValue:l,selectValue:(0,n.useCallback)((e=>{if(!m({value:e,tabValues:o}))throw new Error(`Can't select invalid tab value=${e}`);i(e),u(e),y(e)}),[u,y,o]),tabValues:o}}var f=t(92303);const b={tabList:"tabList__CuJ",tabItem:"tabItem_LNqP"};function h(e){let{className:a,block:t,selectedValue:i,selectValue:s,tabValues:u}=e;const g=[],{blockElementScrollPositionUntilNextRender:p}=(0,l.a_)(),d=e=>{const a=e.currentTarget,t=g.indexOf(a),r=u[t].value;r!==i&&(p(a),s(r))},m=e=>{let a=null;switch(e.key){case"Enter":d(e);break;case"ArrowRight":{const t=g.indexOf(e.currentTarget)+1;a=g[t]??g[0];break}case"ArrowLeft":{const t=g.indexOf(e.currentTarget)-1;a=g[t]??g[g.length-1];break}}a?.focus()};return n.createElement("ul",{role:"tablist","aria-orientation":"horizontal",className:(0,o.A)("tabs",{"tabs--block":t},a)},u.map((e=>{let{value:a,label:t,attributes:l}=e;return n.createElement("li",(0,r.A)({role:"tab",tabIndex:i===a?0:-1,"aria-selected":i===a,key:a,ref:e=>g.push(e),onKeyDown:m,onClick:d},l,{className:(0,o.A)("tabs__item",b.tabItem,l?.className,{"tabs__item--active":i===a})}),t??a)})))}function v(e){let{lazy:a,children:t,selectedValue:r}=e;const o=(Array.isArray(t)?t:[t]).filter(Boolean);if(a){const e=o.find((e=>e.props.value===r));return e?(0,n.cloneElement)(e,{className:"margin-top--md"}):null}return n.createElement("div",{className:"margin-top--md"},o.map(((e,a)=>(0,n.cloneElement)(e,{key:a,hidden:e.props.value!==r}))))}function N(e){const a=y(e);return n.createElement("div",{className:(0,o.A)("tabs-container",b.tabList)},n.createElement(h,(0,r.A)({},e,a)),n.createElement(v,(0,r.A)({},e,a)))}function _(e){const a=(0,f.A)();return n.createElement(N,(0,r.A)({key:String(a)},e))}},99093:(e,a,t)=>{t.r(a),t.d(a,{assets:()=>g,contentTitle:()=>s,default:()=>c,frontMatter:()=>i,metadata:()=>u,toc:()=>p});var r=t(58168),n=(t(96540),t(15680)),o=t(11470),l=t(19365);const i={sidebar_position:4,title:"Aggregate",id:"aggregate",description:"Group data and apply aggregation methods or pivot operations",tags:["gems","aggregate","group by","sum","count"]},s=void 0,u={unversionedId:"Spark/gems/transform/aggregate",id:"Spark/gems/transform/aggregate",title:"Aggregate",description:"Group data and apply aggregation methods or pivot operations",source:"@site/docs/Spark/gems/transform/aggregate.md",sourceDirName:"Spark/gems/transform",slug:"/Spark/gems/transform/aggregate",permalink:"/Spark/gems/transform/aggregate",draft:!1,tags:[{label:"gems",permalink:"/tags/gems"},{label:"aggregate",permalink:"/tags/aggregate"},{label:"group by",permalink:"/tags/group-by"},{label:"sum",permalink:"/tags/sum"},{label:"count",permalink:"/tags/count"}],version:"current",sidebarPosition:4,frontMatter:{sidebar_position:4,title:"Aggregate",id:"aggregate",description:"Group data and apply aggregation methods or pivot operations",tags:["gems","aggregate","group by","sum","count"]},sidebar:"defaultSidebar",previous:{title:"OrderBy",permalink:"/Spark/gems/transform/order-by"},next:{title:"FlattenSchema",permalink:"/Spark/gems/transform/flatten-schema"}},g={},p=[{value:"Parameters",id:"parameters",level:2},{value:"Examples",id:"examples",level:2},{value:"Aggregation without Grouping",id:"aggregation-without-grouping",level:3},{value:"Aggregation with Grouping",id:"aggregation-with-grouping",level:3},{value:"Pivot Columns",id:"pivot-columns",level:3},{value:"Propagate all input Columns",id:"propagate-all-input-columns",level:3}],d={toc:p},m="wrapper";function c(e){let{components:a,...i}=e;return(0,n.yg)(m,(0,r.A)({},d,i,{components:a,mdxType:"MDXLayout"}),(0,n.yg)("h3",null,(0,n.yg)("span",{class:"badge"},"Spark Gem")),(0,n.yg)("p",null,"Allows you to group the data and apply aggregation methods and pivot operation."),(0,n.yg)("h2",{id:"parameters"},"Parameters"),(0,n.yg)("table",null,(0,n.yg)("thead",{parentName:"table"},(0,n.yg)("tr",{parentName:"thead"},(0,n.yg)("th",{parentName:"tr",align:null},"Parameter"),(0,n.yg)("th",{parentName:"tr",align:null},"Description"),(0,n.yg)("th",{parentName:"tr",align:null},"Required"))),(0,n.yg)("tbody",{parentName:"table"},(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},"DataFrame"),(0,n.yg)("td",{parentName:"tr",align:null},"Input DataFrame"),(0,n.yg)("td",{parentName:"tr",align:null},"True")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},"Target column (Aggregate Tab)"),(0,n.yg)("td",{parentName:"tr",align:null},"Output column name of aggregated column"),(0,n.yg)("td",{parentName:"tr",align:null},"True")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},"Expression (Aggregate Tab)"),(0,n.yg)("td",{parentName:"tr",align:null},"Aggregate function expression",(0,n.yg)("br",null)," Eg: ",(0,n.yg)("inlineCode",{parentName:"td"},'sum("amount")'),", ",(0,n.yg)("inlineCode",{parentName:"td"},"count(*)"),", ",(0,n.yg)("inlineCode",{parentName:"td"},'avg("amount")')),(0,n.yg)("td",{parentName:"tr",align:null},"True")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},"Target column (Group By Tab)"),(0,n.yg)("td",{parentName:"tr",align:null},"Output column name of grouped column"),(0,n.yg)("td",{parentName:"tr",align:null},"Required if ",(0,n.yg)("inlineCode",{parentName:"td"},"Pivot Column")," is present")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},"Expression (Group By Tab)"),(0,n.yg)("td",{parentName:"tr",align:null},"Column expression to group on ",(0,n.yg)("br",null)," Eg: ",(0,n.yg)("inlineCode",{parentName:"td"},'col("id")'),", ",(0,n.yg)("inlineCode",{parentName:"td"},'month(col("order_date"))')),(0,n.yg)("td",{parentName:"tr",align:null},"Required if a ",(0,n.yg)("inlineCode",{parentName:"td"},"Target Column"),"(Group By) is present")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},"Pivot column"),(0,n.yg)("td",{parentName:"tr",align:null},"Column name to pivot"),(0,n.yg)("td",{parentName:"tr",align:null},"False")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},"Unique values"),(0,n.yg)("td",{parentName:"tr",align:null},"List of values in ",(0,n.yg)("inlineCode",{parentName:"td"},"Pivot Column")," that will be translated to columns in the output DataFrame"),(0,n.yg)("td",{parentName:"tr",align:null},"False")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},"Propagate All Input Columns"),(0,n.yg)("td",{parentName:"tr",align:null},"If ",(0,n.yg)("inlineCode",{parentName:"td"},"true"),", all columns from the DataFrame would be propagated to output DataFrame. By default all columns apart from ones specified in ",(0,n.yg)("inlineCode",{parentName:"td"},"group by"),", ",(0,n.yg)("inlineCode",{parentName:"td"},"pivot"),", ",(0,n.yg)("inlineCode",{parentName:"td"},"aggregate")," expressions are propagated as ",(0,n.yg)("inlineCode",{parentName:"td"},"first(col_name)")," in the output DataFrame"),(0,n.yg)("td",{parentName:"tr",align:null},"False")))),(0,n.yg)("admonition",{type:"info"},(0,n.yg)("p",{parentName:"admonition"},"Providing ",(0,n.yg)("inlineCode",{parentName:"p"},"Unique values")," while performing pivot operation improves the performance of the operation since Spark does not have to first compute the list of distinct values of ",(0,n.yg)("inlineCode",{parentName:"p"},"Pivot Column")," internally.")),(0,n.yg)("h2",{id:"examples"},"Examples"),(0,n.yg)("h3",{id:"aggregation-without-grouping"},"Aggregation without Grouping"),(0,n.yg)("p",null,(0,n.yg)("img",{alt:"Example usage of Aggregate - Aggregation without Grouping",src:t(10246).A,width:"1691",height:"629"})),(0,n.yg)(o.A,{mdxType:"Tabs"},(0,n.yg)(l.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-py"},'def total_orders(spark: SparkSession, in0: DataFrame) -> DataFrame:\n return in0.agg(count(lit(1)).alias("number_of_orders"))\n'))),(0,n.yg)(l.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-scala"},'object total_orders {\n def apply(spark: SparkSession, in: DataFrame): DataFrame =\n in.agg(count(lit(1)).as("number_of_orders"))\n}\n')))),(0,n.yg)("h3",{id:"aggregation-with-grouping"},"Aggregation with Grouping"),(0,n.yg)("p",null,(0,n.yg)("img",{alt:"Example usage of Aggregate - Aggregation with Grouping",src:t(91453).A,width:"1851",height:"796"})),(0,n.yg)(o.A,{mdxType:"Tabs"},(0,n.yg)(l.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-py"},'def orders_by_date(spark: SparkSession, in0: DataFrame) -> DataFrame:\n df1 = in0.groupBy(concat(month(col("order_date")), lit("/"), year(col("order_date")))\n .alias("order_month(MM/YYYY)"))\n return df1.agg(count(lit(1)).alias("number_of_orders"))\n'))),(0,n.yg)(l.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-scala"},'object orders_by_date {\n def apply(spark: SparkSession, in: DataFrame): DataFrame =\n in.groupBy(\n concat(month(col("order_date")), lit("/"), year(col("order_date")))\n .as("order_month(MM/YYYY)")\n )\n .agg(count(lit(1)).as("number_of_orders"))\n}\n')))),(0,n.yg)("h3",{id:"pivot-columns"},"Pivot Columns"),(0,n.yg)("p",null,(0,n.yg)("img",{alt:"Example usage of Aggregate - Pivoting",src:t(70324).A,width:"1920",height:"1080"})),(0,n.yg)(o.A,{mdxType:"Tabs"},(0,n.yg)(l.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-py"},'def orders_by_date_N_status(spark: SparkSession, in0: DataFrame) -> DataFrame:\n df1 = in0.groupBy(concat(month(col("order_date")), lit("/"), year(col("order_date"))).alias("order_month(MM/YYYY)"))\n df2 = df1.pivot("order_status", ["Approved", "Finished", "Pending", "Started"])\n return df2.agg(count(lit(1)).alias("number_of_orders"))\n'))),(0,n.yg)(l.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-scala"},'object orders_by_date_N_status {\n def apply(spark: SparkSession, in: DataFrame): DataFrame =\n in.groupBy(\n concat(month(col("order_date")), lit("/"), year(col("order_date")))\n .as("order_month(MM/YYYY)")\n )\n .pivot(col("order_status"),\n List("Approved", "Finished", "Pending", "Started")\n )\n .agg(count(lit(1)).as("number_of_orders"))\n}\n')))),(0,n.yg)("h3",{id:"propagate-all-input-columns"},"Propagate all input Columns"),(0,n.yg)("p",null,"This option in used to propagate all columns from input DataFrame to output DataFrame.\nBy default ",(0,n.yg)("inlineCode",{parentName:"p"},"first(col_name)")," is used as aggregate function for columns not specified in ",(0,n.yg)("inlineCode",{parentName:"p"},"group by"),", ",(0,n.yg)("inlineCode",{parentName:"p"},"pivot"),", ",(0,n.yg)("inlineCode",{parentName:"p"},"aggregate")," expressions."),(0,n.yg)("div",{class:"wistia_responsive_padding",style:{padding:"56.25% 0 0 0",position:"relative"}},(0,n.yg)("div",{class:"wistia_responsive_wrapper",style:{height:"100%",left:0,position:"absolute",top:0,width:"100%"}},(0,n.yg)("iframe",{src:"https://user-images.githubusercontent.com/103921419/185245719-2be22f30-c84f-4b85-8712-be626c77e4e4.mp4",title:"Aggregate Propagate columns",allow:"autoplay;fullscreen",allowtransparency:"true",frameborder:"0",scrolling:"no",class:"wistia_embed",name:"wistia_embed",msallowfullscreen:!0,width:"100%",height:"100%"}))),(0,n.yg)(o.A,{mdxType:"Tabs"},(0,n.yg)(l.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-py"},'def Aggregate_1(spark: SparkSession, in0: DataFrame) -> DataFrame:\n df1 = in0.groupBy(col("customer_id"))\n\n return df1.agg(\n *[first(col("order_date")).alias("order_date")],\n *[\n first(col(x)).alias(x)\n for x in in0.columns\n if x not in ["order_date", "customer_id"]\n ]\n )\n'))),(0,n.yg)(l.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-scala"},'object Aggregate {\n\n def apply(spark: SparkSession, in: DataFrame): DataFrame =\n in.agg(first(col("order_date")).as("order_date"),\n List() ++ in.columns.toList\n .diff(List("order_date", "customer_id"))\n .map(x => first(col(x)).as(x)): _*\n )\n\n}\n')))))}c.isMDXComponent=!0},10246:(e,a,t)=>{t.d(a,{A:()=>r});const r=t.p+"assets/images/agg_eg_1-40497a04aacf57cef089dc04e8968b62.png"},91453:(e,a,t)=>{t.d(a,{A:()=>r});const r=t.p+"assets/images/agg_eg_2-8f09b2a49e553ed7489fa41295f6f74e.png"},70324:(e,a,t)=>{t.d(a,{A:()=>r});const r=t.p+"assets/images/agg_eg_3-60109ae121ceafd6b5b2dbf9cdb855d7.png"}}]); \ No newline at end of file diff --git a/assets/js/dda96e46.c89988f8.js b/assets/js/dda96e46.c89988f8.js deleted file mode 100644 index 8d9d04d4c6..0000000000 --- a/assets/js/dda96e46.c89988f8.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[33852],{15680:(e,a,t)=>{t.d(a,{xA:()=>g,yg:()=>c});var r=t(96540);function n(e,a,t){return a in e?Object.defineProperty(e,a,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[a]=t,e}function l(e,a){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);a&&(r=r.filter((function(a){return Object.getOwnPropertyDescriptor(e,a).enumerable}))),t.push.apply(t,r)}return t}function o(e){for(var a=1;a=0||(n[t]=e[t]);return n}(e,a);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(n[t]=e[t])}return n}var s=r.createContext({}),u=function(e){var a=r.useContext(s),t=a;return e&&(t="function"==typeof e?e(a):o(o({},a),e)),t},g=function(e){var a=u(e.components);return r.createElement(s.Provider,{value:a},e.children)},p="mdxType",d={inlineCode:"code",wrapper:function(e){var a=e.children;return r.createElement(r.Fragment,{},a)}},m=r.forwardRef((function(e,a){var t=e.components,n=e.mdxType,l=e.originalType,s=e.parentName,g=i(e,["components","mdxType","originalType","parentName"]),p=u(t),m=n,c=p["".concat(s,".").concat(m)]||p[m]||d[m]||l;return t?r.createElement(c,o(o({ref:a},g),{},{components:t})):r.createElement(c,o({ref:a},g))}));function c(e,a){var t=arguments,n=a&&a.mdxType;if("string"==typeof e||n){var l=t.length,o=new Array(l);o[0]=m;var i={};for(var s in a)hasOwnProperty.call(a,s)&&(i[s]=a[s]);i.originalType=e,i[p]="string"==typeof e?e:n,o[1]=i;for(var u=2;u{t.d(a,{A:()=>o});var r=t(96540),n=t(20053);const l={tabItem:"tabItem_Ymn6"};function o(e){let{children:a,hidden:t,className:o}=e;return r.createElement("div",{role:"tabpanel",className:(0,n.A)(l.tabItem,o),hidden:t},a)}},11470:(e,a,t)=>{t.d(a,{A:()=>_});var r=t(58168),n=t(96540),l=t(20053),o=t(23104),i=t(56347),s=t(57485),u=t(31682),g=t(89466);function p(e){return function(e){return n.Children.map(e,(e=>{if(!e||(0,n.isValidElement)(e)&&function(e){const{props:a}=e;return!!a&&"object"==typeof a&&"value"in a}(e))return e;throw new Error(`Docusaurus error: Bad child <${"string"==typeof e.type?e.type:e.type.name}>: all children of the component should be , and every should have a unique "value" prop.`)}))?.filter(Boolean)??[]}(e).map((e=>{let{props:{value:a,label:t,attributes:r,default:n}}=e;return{value:a,label:t,attributes:r,default:n}}))}function d(e){const{values:a,children:t}=e;return(0,n.useMemo)((()=>{const e=a??p(t);return function(e){const a=(0,u.X)(e,((e,a)=>e.value===a.value));if(a.length>0)throw new Error(`Docusaurus error: Duplicate values "${a.map((e=>e.value)).join(", ")}" found in . Every value needs to be unique.`)}(e),e}),[a,t])}function m(e){let{value:a,tabValues:t}=e;return t.some((e=>e.value===a))}function c(e){let{queryString:a=!1,groupId:t}=e;const r=(0,i.W6)(),l=function(e){let{queryString:a=!1,groupId:t}=e;if("string"==typeof a)return a;if(!1===a)return null;if(!0===a&&!t)throw new Error('Docusaurus error: The component groupId prop is required if queryString=true, because this value is used as the search param name. You can also provide an explicit value such as queryString="my-search-param".');return t??null}({queryString:a,groupId:t});return[(0,s.aZ)(l),(0,n.useCallback)((e=>{if(!l)return;const a=new URLSearchParams(r.location.search);a.set(l,e),r.replace({...r.location,search:a.toString()})}),[l,r])]}function y(e){const{defaultValue:a,queryString:t=!1,groupId:r}=e,l=d(e),[o,i]=(0,n.useState)((()=>function(e){let{defaultValue:a,tabValues:t}=e;if(0===t.length)throw new Error("Docusaurus error: the component requires at least one children component");if(a){if(!m({value:a,tabValues:t}))throw new Error(`Docusaurus error: The has a defaultValue "${a}" but none of its children has the corresponding value. Available values are: ${t.map((e=>e.value)).join(", ")}. If you intend to show no default tab, use defaultValue={null} instead.`);return a}const r=t.find((e=>e.default))??t[0];if(!r)throw new Error("Unexpected error: 0 tabValues");return r.value}({defaultValue:a,tabValues:l}))),[s,u]=c({queryString:t,groupId:r}),[p,y]=function(e){let{groupId:a}=e;const t=function(e){return e?`docusaurus.tab.${e}`:null}(a),[r,l]=(0,g.Dv)(t);return[r,(0,n.useCallback)((e=>{t&&l.set(e)}),[t,l])]}({groupId:r}),f=(()=>{const e=s??p;return m({value:e,tabValues:l})?e:null})();(0,n.useLayoutEffect)((()=>{f&&i(f)}),[f]);return{selectedValue:o,selectValue:(0,n.useCallback)((e=>{if(!m({value:e,tabValues:l}))throw new Error(`Can't select invalid tab value=${e}`);i(e),u(e),y(e)}),[u,y,l]),tabValues:l}}var f=t(92303);const b={tabList:"tabList__CuJ",tabItem:"tabItem_LNqP"};function h(e){let{className:a,block:t,selectedValue:i,selectValue:s,tabValues:u}=e;const g=[],{blockElementScrollPositionUntilNextRender:p}=(0,o.a_)(),d=e=>{const a=e.currentTarget,t=g.indexOf(a),r=u[t].value;r!==i&&(p(a),s(r))},m=e=>{let a=null;switch(e.key){case"Enter":d(e);break;case"ArrowRight":{const t=g.indexOf(e.currentTarget)+1;a=g[t]??g[0];break}case"ArrowLeft":{const t=g.indexOf(e.currentTarget)-1;a=g[t]??g[g.length-1];break}}a?.focus()};return n.createElement("ul",{role:"tablist","aria-orientation":"horizontal",className:(0,l.A)("tabs",{"tabs--block":t},a)},u.map((e=>{let{value:a,label:t,attributes:o}=e;return n.createElement("li",(0,r.A)({role:"tab",tabIndex:i===a?0:-1,"aria-selected":i===a,key:a,ref:e=>g.push(e),onKeyDown:m,onClick:d},o,{className:(0,l.A)("tabs__item",b.tabItem,o?.className,{"tabs__item--active":i===a})}),t??a)})))}function v(e){let{lazy:a,children:t,selectedValue:r}=e;const l=(Array.isArray(t)?t:[t]).filter(Boolean);if(a){const e=l.find((e=>e.props.value===r));return e?(0,n.cloneElement)(e,{className:"margin-top--md"}):null}return n.createElement("div",{className:"margin-top--md"},l.map(((e,a)=>(0,n.cloneElement)(e,{key:a,hidden:e.props.value!==r}))))}function N(e){const a=y(e);return n.createElement("div",{className:(0,l.A)("tabs-container",b.tabList)},n.createElement(h,(0,r.A)({},e,a)),n.createElement(v,(0,r.A)({},e,a)))}function _(e){const a=(0,f.A)();return n.createElement(N,(0,r.A)({key:String(a)},e))}},99093:(e,a,t)=>{t.r(a),t.d(a,{assets:()=>g,contentTitle:()=>s,default:()=>c,frontMatter:()=>i,metadata:()=>u,toc:()=>p});var r=t(58168),n=(t(96540),t(15680)),l=t(11470),o=t(19365);const i={sidebar_position:4,title:"Aggregate",id:"aggregate",description:"Group data and apply aggregation methods or pivot operations",tags:["gems","aggregate","group by","sum","count"]},s=void 0,u={unversionedId:"Spark/gems/transform/aggregate",id:"Spark/gems/transform/aggregate",title:"Aggregate",description:"Group data and apply aggregation methods or pivot operations",source:"@site/docs/Spark/gems/transform/aggregate.md",sourceDirName:"Spark/gems/transform",slug:"/Spark/gems/transform/aggregate",permalink:"/Spark/gems/transform/aggregate",draft:!1,tags:[{label:"gems",permalink:"/tags/gems"},{label:"aggregate",permalink:"/tags/aggregate"},{label:"group by",permalink:"/tags/group-by"},{label:"sum",permalink:"/tags/sum"},{label:"count",permalink:"/tags/count"}],version:"current",sidebarPosition:4,frontMatter:{sidebar_position:4,title:"Aggregate",id:"aggregate",description:"Group data and apply aggregation methods or pivot operations",tags:["gems","aggregate","group by","sum","count"]},sidebar:"defaultSidebar",previous:{title:"OrderBy",permalink:"/Spark/gems/transform/order-by"},next:{title:"FlattenSchema",permalink:"/Spark/gems/transform/flatten-schema"}},g={},p=[{value:"Parameters",id:"parameters",level:2},{value:"Examples",id:"examples",level:2},{value:"Aggregation without Grouping",id:"aggregation-without-grouping",level:3},{value:"Aggregation with Grouping",id:"aggregation-with-grouping",level:3},{value:"Pivot Columns",id:"pivot-columns",level:3},{value:"Propagate all input Columns",id:"propagate-all-input-columns",level:3}],d={toc:p},m="wrapper";function c(e){let{components:a,...i}=e;return(0,n.yg)(m,(0,r.A)({},d,i,{components:a,mdxType:"MDXLayout"}),(0,n.yg)("h3",null,(0,n.yg)("span",{class:"badge rounded-pill text-bg-light"},"Spark Gem")),(0,n.yg)("p",null,"Allows you to group the data and apply aggregation methods and pivot operation."),(0,n.yg)("h2",{id:"parameters"},"Parameters"),(0,n.yg)("table",null,(0,n.yg)("thead",{parentName:"table"},(0,n.yg)("tr",{parentName:"thead"},(0,n.yg)("th",{parentName:"tr",align:null},"Parameter"),(0,n.yg)("th",{parentName:"tr",align:null},"Description"),(0,n.yg)("th",{parentName:"tr",align:null},"Required"))),(0,n.yg)("tbody",{parentName:"table"},(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},"DataFrame"),(0,n.yg)("td",{parentName:"tr",align:null},"Input DataFrame"),(0,n.yg)("td",{parentName:"tr",align:null},"True")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},"Target column (Aggregate Tab)"),(0,n.yg)("td",{parentName:"tr",align:null},"Output column name of aggregated column"),(0,n.yg)("td",{parentName:"tr",align:null},"True")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},"Expression (Aggregate Tab)"),(0,n.yg)("td",{parentName:"tr",align:null},"Aggregate function expression",(0,n.yg)("br",null)," Eg: ",(0,n.yg)("inlineCode",{parentName:"td"},'sum("amount")'),", ",(0,n.yg)("inlineCode",{parentName:"td"},"count(*)"),", ",(0,n.yg)("inlineCode",{parentName:"td"},'avg("amount")')),(0,n.yg)("td",{parentName:"tr",align:null},"True")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},"Target column (Group By Tab)"),(0,n.yg)("td",{parentName:"tr",align:null},"Output column name of grouped column"),(0,n.yg)("td",{parentName:"tr",align:null},"Required if ",(0,n.yg)("inlineCode",{parentName:"td"},"Pivot Column")," is present")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},"Expression (Group By Tab)"),(0,n.yg)("td",{parentName:"tr",align:null},"Column expression to group on ",(0,n.yg)("br",null)," Eg: ",(0,n.yg)("inlineCode",{parentName:"td"},'col("id")'),", ",(0,n.yg)("inlineCode",{parentName:"td"},'month(col("order_date"))')),(0,n.yg)("td",{parentName:"tr",align:null},"Required if a ",(0,n.yg)("inlineCode",{parentName:"td"},"Target Column"),"(Group By) is present")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},"Pivot column"),(0,n.yg)("td",{parentName:"tr",align:null},"Column name to pivot"),(0,n.yg)("td",{parentName:"tr",align:null},"False")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},"Unique values"),(0,n.yg)("td",{parentName:"tr",align:null},"List of values in ",(0,n.yg)("inlineCode",{parentName:"td"},"Pivot Column")," that will be translated to columns in the output DataFrame"),(0,n.yg)("td",{parentName:"tr",align:null},"False")),(0,n.yg)("tr",{parentName:"tbody"},(0,n.yg)("td",{parentName:"tr",align:null},"Propagate All Input Columns"),(0,n.yg)("td",{parentName:"tr",align:null},"If ",(0,n.yg)("inlineCode",{parentName:"td"},"true"),", all columns from the DataFrame would be propagated to output DataFrame. By default all columns apart from ones specified in ",(0,n.yg)("inlineCode",{parentName:"td"},"group by"),", ",(0,n.yg)("inlineCode",{parentName:"td"},"pivot"),", ",(0,n.yg)("inlineCode",{parentName:"td"},"aggregate")," expressions are propagated as ",(0,n.yg)("inlineCode",{parentName:"td"},"first(col_name)")," in the output DataFrame"),(0,n.yg)("td",{parentName:"tr",align:null},"False")))),(0,n.yg)("admonition",{type:"info"},(0,n.yg)("p",{parentName:"admonition"},"Providing ",(0,n.yg)("inlineCode",{parentName:"p"},"Unique values")," while performing pivot operation improves the performance of the operation since Spark does not have to first compute the list of distinct values of ",(0,n.yg)("inlineCode",{parentName:"p"},"Pivot Column")," internally.")),(0,n.yg)("h2",{id:"examples"},"Examples"),(0,n.yg)("h3",{id:"aggregation-without-grouping"},"Aggregation without Grouping"),(0,n.yg)("p",null,(0,n.yg)("img",{alt:"Example usage of Aggregate - Aggregation without Grouping",src:t(10246).A,width:"1691",height:"629"})),(0,n.yg)(l.A,{mdxType:"Tabs"},(0,n.yg)(o.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-py"},'def total_orders(spark: SparkSession, in0: DataFrame) -> DataFrame:\n return in0.agg(count(lit(1)).alias("number_of_orders"))\n'))),(0,n.yg)(o.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-scala"},'object total_orders {\n def apply(spark: SparkSession, in: DataFrame): DataFrame =\n in.agg(count(lit(1)).as("number_of_orders"))\n}\n')))),(0,n.yg)("h3",{id:"aggregation-with-grouping"},"Aggregation with Grouping"),(0,n.yg)("p",null,(0,n.yg)("img",{alt:"Example usage of Aggregate - Aggregation with Grouping",src:t(91453).A,width:"1851",height:"796"})),(0,n.yg)(l.A,{mdxType:"Tabs"},(0,n.yg)(o.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-py"},'def orders_by_date(spark: SparkSession, in0: DataFrame) -> DataFrame:\n df1 = in0.groupBy(concat(month(col("order_date")), lit("/"), year(col("order_date")))\n .alias("order_month(MM/YYYY)"))\n return df1.agg(count(lit(1)).alias("number_of_orders"))\n'))),(0,n.yg)(o.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-scala"},'object orders_by_date {\n def apply(spark: SparkSession, in: DataFrame): DataFrame =\n in.groupBy(\n concat(month(col("order_date")), lit("/"), year(col("order_date")))\n .as("order_month(MM/YYYY)")\n )\n .agg(count(lit(1)).as("number_of_orders"))\n}\n')))),(0,n.yg)("h3",{id:"pivot-columns"},"Pivot Columns"),(0,n.yg)("p",null,(0,n.yg)("img",{alt:"Example usage of Aggregate - Pivoting",src:t(70324).A,width:"1920",height:"1080"})),(0,n.yg)(l.A,{mdxType:"Tabs"},(0,n.yg)(o.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-py"},'def orders_by_date_N_status(spark: SparkSession, in0: DataFrame) -> DataFrame:\n df1 = in0.groupBy(concat(month(col("order_date")), lit("/"), year(col("order_date"))).alias("order_month(MM/YYYY)"))\n df2 = df1.pivot("order_status", ["Approved", "Finished", "Pending", "Started"])\n return df2.agg(count(lit(1)).alias("number_of_orders"))\n'))),(0,n.yg)(o.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-scala"},'object orders_by_date_N_status {\n def apply(spark: SparkSession, in: DataFrame): DataFrame =\n in.groupBy(\n concat(month(col("order_date")), lit("/"), year(col("order_date")))\n .as("order_month(MM/YYYY)")\n )\n .pivot(col("order_status"),\n List("Approved", "Finished", "Pending", "Started")\n )\n .agg(count(lit(1)).as("number_of_orders"))\n}\n')))),(0,n.yg)("h3",{id:"propagate-all-input-columns"},"Propagate all input Columns"),(0,n.yg)("p",null,"This option in used to propagate all columns from input DataFrame to output DataFrame.\nBy default ",(0,n.yg)("inlineCode",{parentName:"p"},"first(col_name)")," is used as aggregate function for columns not specified in ",(0,n.yg)("inlineCode",{parentName:"p"},"group by"),", ",(0,n.yg)("inlineCode",{parentName:"p"},"pivot"),", ",(0,n.yg)("inlineCode",{parentName:"p"},"aggregate")," expressions."),(0,n.yg)("div",{class:"wistia_responsive_padding",style:{padding:"56.25% 0 0 0",position:"relative"}},(0,n.yg)("div",{class:"wistia_responsive_wrapper",style:{height:"100%",left:0,position:"absolute",top:0,width:"100%"}},(0,n.yg)("iframe",{src:"https://user-images.githubusercontent.com/103921419/185245719-2be22f30-c84f-4b85-8712-be626c77e4e4.mp4",title:"Aggregate Propagate columns",allow:"autoplay;fullscreen",allowtransparency:"true",frameborder:"0",scrolling:"no",class:"wistia_embed",name:"wistia_embed",msallowfullscreen:!0,width:"100%",height:"100%"}))),(0,n.yg)(l.A,{mdxType:"Tabs"},(0,n.yg)(o.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-py"},'def Aggregate_1(spark: SparkSession, in0: DataFrame) -> DataFrame:\n df1 = in0.groupBy(col("customer_id"))\n\n return df1.agg(\n *[first(col("order_date")).alias("order_date")],\n *[\n first(col(x)).alias(x)\n for x in in0.columns\n if x not in ["order_date", "customer_id"]\n ]\n )\n'))),(0,n.yg)(o.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,n.yg)("pre",null,(0,n.yg)("code",{parentName:"pre",className:"language-scala"},'object Aggregate {\n\n def apply(spark: SparkSession, in: DataFrame): DataFrame =\n in.agg(first(col("order_date")).as("order_date"),\n List() ++ in.columns.toList\n .diff(List("order_date", "customer_id"))\n .map(x => first(col(x)).as(x)): _*\n )\n\n}\n')))))}c.isMDXComponent=!0},10246:(e,a,t)=>{t.d(a,{A:()=>r});const r=t.p+"assets/images/agg_eg_1-40497a04aacf57cef089dc04e8968b62.png"},91453:(e,a,t)=>{t.d(a,{A:()=>r});const r=t.p+"assets/images/agg_eg_2-8f09b2a49e553ed7489fa41295f6f74e.png"},70324:(e,a,t)=>{t.d(a,{A:()=>r});const r=t.p+"assets/images/agg_eg_3-60109ae121ceafd6b5b2dbf9cdb855d7.png"}}]); \ No newline at end of file diff --git a/assets/js/e5297273.32262110.js b/assets/js/e5297273.32262110.js deleted file mode 100644 index 9e3bf1ed6a..0000000000 --- a/assets/js/e5297273.32262110.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[13395],{15680:(e,t,a)=>{a.d(t,{xA:()=>m,yg:()=>c});var r=a(96540);function l(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function n(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,r)}return a}function i(e){for(var t=1;t=0||(l[a]=e[a]);return l}(e,t);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(l[a]=e[a])}return l}var p=r.createContext({}),s=function(e){var t=r.useContext(p),a=t;return e&&(a="function"==typeof e?e(t):i(i({},t),e)),a},m=function(e){var t=s(e.components);return r.createElement(p.Provider,{value:t},e.children)},g="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},y=r.forwardRef((function(e,t){var a=e.components,l=e.mdxType,n=e.originalType,p=e.parentName,m=o(e,["components","mdxType","originalType","parentName"]),g=s(a),y=l,c=g["".concat(p,".").concat(y)]||g[y]||d[y]||n;return a?r.createElement(c,i(i({ref:t},m),{},{components:a})):r.createElement(c,i({ref:t},m))}));function c(e,t){var a=arguments,l=t&&t.mdxType;if("string"==typeof e||l){var n=a.length,i=new Array(n);i[0]=y;var o={};for(var p in t)hasOwnProperty.call(t,p)&&(o[p]=t[p]);o.originalType=e,o[g]="string"==typeof e?e:l,i[1]=o;for(var s=2;s{a.r(t),a.d(t,{assets:()=>p,contentTitle:()=>i,default:()=>d,frontMatter:()=>n,metadata:()=>o,toc:()=>s});var r=a(58168),l=(a(96540),a(15680));const n={sidebar_position:4,title:"DeltaTableOperations",id:"delta-ops",description:"Gem that encompasses some of the import side operations of Delta",tags:["file","delta","devops"]},i=void 0,o={unversionedId:"Spark/gems/custom/delta-ops",id:"Spark/gems/custom/delta-ops",title:"DeltaTableOperations",description:"Gem that encompasses some of the import side operations of Delta",source:"@site/docs/Spark/gems/custom/delta-table-operations.md",sourceDirName:"Spark/gems/custom",slug:"/Spark/gems/custom/delta-ops",permalink:"/Spark/gems/custom/delta-ops",draft:!1,tags:[{label:"file",permalink:"/tags/file"},{label:"delta",permalink:"/tags/delta"},{label:"devops",permalink:"/tags/devops"}],version:"current",sidebarPosition:4,frontMatter:{sidebar_position:4,title:"DeltaTableOperations",id:"delta-ops",description:"Gem that encompasses some of the import side operations of Delta",tags:["file","delta","devops"]},sidebar:"defaultSidebar",previous:{title:"FileOperation",permalink:"/Spark/gems/custom/file-operations"},next:{title:"RestAPIEnrich",permalink:"/Spark/gems/custom/rest-api-enrich"}},p={},s=[{value:"Parameters",id:"parameters",level:2},{value:"Example",id:"example",level:2},{value:"Register table in catalog",id:"register-table-in-catalog",level:2},{value:"Vacuum table",id:"vacuum-table",level:2},{value:"Parameters",id:"vacuum-parameters",level:3},{value:"Optimize table",id:"optimize-table",level:2},{value:"Parameters",id:"optimize-parameters",level:3},{value:"Restore table",id:"restore-table",level:2},{value:"Parameters",id:"restore-parameters",level:3},{value:"Delete from table",id:"delete-from-table",level:2},{value:"Parameters",id:"delete-parameters",level:3},{value:"Drop table",id:"drop-table",level:2},{value:"FSCK Repair table",id:"fsck-repair-table",level:2}],m={toc:s},g="wrapper";function d(e){let{components:t,...n}=e;return(0,l.yg)(g,(0,r.A)({},m,n,{components:t,mdxType:"MDXLayout"}),(0,l.yg)("h3",null,(0,l.yg)("span",{class:"badge rounded-pill text-bg-light"},"Spark Gem")),(0,l.yg)("p",null,"Helps perform the following operations on Delta tables."),(0,l.yg)("ol",null,(0,l.yg)("li",{parentName:"ol"},"Register table in catalog"),(0,l.yg)("li",{parentName:"ol"},"Vacuum table"),(0,l.yg)("li",{parentName:"ol"},"Optimize table"),(0,l.yg)("li",{parentName:"ol"},"Restore table"),(0,l.yg)("li",{parentName:"ol"},"Delete from table"),(0,l.yg)("li",{parentName:"ol"},"Drop table"),(0,l.yg)("li",{parentName:"ol"},"FSCK Repair table")),(0,l.yg)("h2",{id:"parameters"},"Parameters"),(0,l.yg)("table",null,(0,l.yg)("thead",{parentName:"table"},(0,l.yg)("tr",{parentName:"thead"},(0,l.yg)("th",{parentName:"tr",align:"left"},"Parameter"),(0,l.yg)("th",{parentName:"tr",align:"left"},"Description"),(0,l.yg)("th",{parentName:"tr",align:"left"},"Required"))),(0,l.yg)("tbody",{parentName:"table"},(0,l.yg)("tr",{parentName:"tbody"},(0,l.yg)("td",{parentName:"tr",align:"left"},"Database name"),(0,l.yg)("td",{parentName:"tr",align:"left"},"Database name"),(0,l.yg)("td",{parentName:"tr",align:"left"},"False")),(0,l.yg)("tr",{parentName:"tbody"},(0,l.yg)("td",{parentName:"tr",align:"left"},"Table name"),(0,l.yg)("td",{parentName:"tr",align:"left"},"Table name"),(0,l.yg)("td",{parentName:"tr",align:"left"},"False")),(0,l.yg)("tr",{parentName:"tbody"},(0,l.yg)("td",{parentName:"tr",align:"left"},"File path"),(0,l.yg)("td",{parentName:"tr",align:"left"},"File path for delta table"),(0,l.yg)("td",{parentName:"tr",align:"left"},"False")),(0,l.yg)("tr",{parentName:"tbody"},(0,l.yg)("td",{parentName:"tr",align:"left"},"Action"),(0,l.yg)("td",{parentName:"tr",align:"left"},"Action to perform on the table"),(0,l.yg)("td",{parentName:"tr",align:"left"},"True")))),(0,l.yg)("admonition",{type:"note"},(0,l.yg)("p",{parentName:"admonition"},"At least one value from table name or file path needs to be provided.")),(0,l.yg)("h2",{id:"example"},"Example"),(0,l.yg)("p",null,(0,l.yg)("img",{alt:"Example usage of Delta Table Operations Gem",src:a(30040).A,width:"3010",height:"1496"})),(0,l.yg)("h2",{id:"register-table-in-catalog"},"Register table in catalog"),(0,l.yg)("p",null,"This will register the data at mentioned file path as a table in the whichever Metadata catalog is available in your execution environment."),(0,l.yg)("h2",{id:"vacuum-table"},"Vacuum table"),(0,l.yg)("p",null,"Recursively vacuum directories associated with the Delta table. VACUUM removes all files from the table directory that are not managed by Delta, as well as data files that are no longer in the latest state of the transaction log for the table and are older than a retention threshold. The default threshold is 7 days."),(0,l.yg)("p",null,"To learn more about vacuum ",(0,l.yg)("a",{parentName:"p",href:"https://docs.databricks.com/spark/latest/spark-sql/language-manual/delta-vacuum.html"},"click here"),"."),(0,l.yg)("h3",{id:"vacuum-parameters"},"Parameters"),(0,l.yg)("table",null,(0,l.yg)("thead",{parentName:"table"},(0,l.yg)("tr",{parentName:"thead"},(0,l.yg)("th",{parentName:"tr",align:"left"},"Parameter"),(0,l.yg)("th",{parentName:"tr",align:"left"},"Description"),(0,l.yg)("th",{parentName:"tr",align:"left"},"Required"))),(0,l.yg)("tbody",{parentName:"table"},(0,l.yg)("tr",{parentName:"tbody"},(0,l.yg)("td",{parentName:"tr",align:"left"},"Retention hours"),(0,l.yg)("td",{parentName:"tr",align:"left"},"Retention threshold"),(0,l.yg)("td",{parentName:"tr",align:"left"},"False")))),(0,l.yg)("h2",{id:"optimize-table"},"Optimize table"),(0,l.yg)("p",null,"Optimizes the layout of Delta Table data. Optionally optimize a subset of data or colocate data by column. If colocation is not specified, bin-packing optimization is performed by default."),(0,l.yg)("p",null,"To learn more about optimize ",(0,l.yg)("a",{parentName:"p",href:"https://docs.databricks.com/spark/latest/spark-sql/language-manual/delta-optimize.html"},"click here"),"."),(0,l.yg)("h3",{id:"optimize-parameters"},"Parameters"),(0,l.yg)("table",null,(0,l.yg)("thead",{parentName:"table"},(0,l.yg)("tr",{parentName:"thead"},(0,l.yg)("th",{parentName:"tr",align:"left"},"Parameter"),(0,l.yg)("th",{parentName:"tr",align:"left"},"Description"),(0,l.yg)("th",{parentName:"tr",align:"left"},"Required"))),(0,l.yg)("tbody",{parentName:"table"},(0,l.yg)("tr",{parentName:"tbody"},(0,l.yg)("td",{parentName:"tr",align:"left"},"Where clause"),(0,l.yg)("td",{parentName:"tr",align:"left"},"Optimize the subset of rows matching the given partition predicate. Only filters involving partition key attributes are supported."),(0,l.yg)("td",{parentName:"tr",align:"left"},"False")),(0,l.yg)("tr",{parentName:"tbody"},(0,l.yg)("td",{parentName:"tr",align:"left"},"ZOrder By"),(0,l.yg)("td",{parentName:"tr",align:"left"},"List of columns to perform ZOrder on"),(0,l.yg)("td",{parentName:"tr",align:"left"},"False")))),(0,l.yg)("h2",{id:"restore-table"},"Restore table"),(0,l.yg)("p",null,"Restores a Delta table to an earlier state. Restoring to an earlier version number or a timestamp is supported."),(0,l.yg)("h3",{id:"restore-parameters"},"Parameters"),(0,l.yg)("table",null,(0,l.yg)("thead",{parentName:"table"},(0,l.yg)("tr",{parentName:"thead"},(0,l.yg)("th",{parentName:"tr",align:"left"},"Parameter"),(0,l.yg)("th",{parentName:"tr",align:"left"},"Description"),(0,l.yg)("th",{parentName:"tr",align:"left"},"Required"))),(0,l.yg)("tbody",{parentName:"table"},(0,l.yg)("tr",{parentName:"tbody"},(0,l.yg)("td",{parentName:"tr",align:"left"},"Restore via"),(0,l.yg)("td",{parentName:"tr",align:"left"},"Restore the table via timestamp or version"),(0,l.yg)("td",{parentName:"tr",align:"left"},"False")),(0,l.yg)("tr",{parentName:"tbody"},(0,l.yg)("td",{parentName:"tr",align:"left"},"Value"),(0,l.yg)("td",{parentName:"tr",align:"left"},"Value to restore on"),(0,l.yg)("td",{parentName:"tr",align:"left"},"False")))),(0,l.yg)("h2",{id:"delete-from-table"},"Delete from table"),(0,l.yg)("p",null,"Delete removes the data from the latest version of the Delta table that matches the specified condition. Please note that delete does not remove it from the physical storage until the older versions are explicitly ",(0,l.yg)("a",{parentName:"p",href:"#vacuum-table"},"vacuumed"),"."),(0,l.yg)("h3",{id:"delete-parameters"},"Parameters"),(0,l.yg)("table",null,(0,l.yg)("thead",{parentName:"table"},(0,l.yg)("tr",{parentName:"thead"},(0,l.yg)("th",{parentName:"tr",align:"left"},"Parameter"),(0,l.yg)("th",{parentName:"tr",align:"left"},"Description"),(0,l.yg)("th",{parentName:"tr",align:"left"},"Required"))),(0,l.yg)("tbody",{parentName:"table"},(0,l.yg)("tr",{parentName:"tbody"},(0,l.yg)("td",{parentName:"tr",align:"left"},"Where clause"),(0,l.yg)("td",{parentName:"tr",align:"left"},"Condition which needs to be satisfied to delete a row"),(0,l.yg)("td",{parentName:"tr",align:"left"},"True")))),(0,l.yg)("h2",{id:"drop-table"},"Drop table"),(0,l.yg)("p",null,"This will drop the table from catalog and remove the files."),(0,l.yg)("h2",{id:"fsck-repair-table"},"FSCK Repair table"),(0,l.yg)("p",null,"Removes the file entries from the transaction log of a Delta table that can no longer be found in the underlying file system. This can happen when these files have been manually deleted."),(0,l.yg)("p",null,"To learn more about fsck repair ",(0,l.yg)("a",{parentName:"p",href:"https://docs.databricks.com/spark/latest/spark-sql/language-manual/delta-fsck.html"},"click here"),"."))}d.isMDXComponent=!0},30040:(e,t,a)=>{a.d(t,{A:()=>r});const r=a.p+"assets/images/delta_operations_eg1-17a89a0e851e87ffb3ef36e8e6cea0ab.png"}}]); \ No newline at end of file diff --git a/assets/js/e5297273.920b27aa.js b/assets/js/e5297273.920b27aa.js new file mode 100644 index 0000000000..f722028687 --- /dev/null +++ b/assets/js/e5297273.920b27aa.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[13395],{15680:(e,t,a)=>{a.d(t,{xA:()=>m,yg:()=>c});var r=a(96540);function l(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function n(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,r)}return a}function i(e){for(var t=1;t=0||(l[a]=e[a]);return l}(e,t);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(l[a]=e[a])}return l}var p=r.createContext({}),s=function(e){var t=r.useContext(p),a=t;return e&&(a="function"==typeof e?e(t):i(i({},t),e)),a},m=function(e){var t=s(e.components);return r.createElement(p.Provider,{value:t},e.children)},g="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},y=r.forwardRef((function(e,t){var a=e.components,l=e.mdxType,n=e.originalType,p=e.parentName,m=o(e,["components","mdxType","originalType","parentName"]),g=s(a),y=l,c=g["".concat(p,".").concat(y)]||g[y]||d[y]||n;return a?r.createElement(c,i(i({ref:t},m),{},{components:a})):r.createElement(c,i({ref:t},m))}));function c(e,t){var a=arguments,l=t&&t.mdxType;if("string"==typeof e||l){var n=a.length,i=new Array(n);i[0]=y;var o={};for(var p in t)hasOwnProperty.call(t,p)&&(o[p]=t[p]);o.originalType=e,o[g]="string"==typeof e?e:l,i[1]=o;for(var s=2;s{a.r(t),a.d(t,{assets:()=>p,contentTitle:()=>i,default:()=>d,frontMatter:()=>n,metadata:()=>o,toc:()=>s});var r=a(58168),l=(a(96540),a(15680));const n={sidebar_position:4,title:"DeltaTableOperations",id:"delta-ops",description:"Gem that encompasses some of the import side operations of Delta",tags:["file","delta","devops"]},i=void 0,o={unversionedId:"Spark/gems/custom/delta-ops",id:"Spark/gems/custom/delta-ops",title:"DeltaTableOperations",description:"Gem that encompasses some of the import side operations of Delta",source:"@site/docs/Spark/gems/custom/delta-table-operations.md",sourceDirName:"Spark/gems/custom",slug:"/Spark/gems/custom/delta-ops",permalink:"/Spark/gems/custom/delta-ops",draft:!1,tags:[{label:"file",permalink:"/tags/file"},{label:"delta",permalink:"/tags/delta"},{label:"devops",permalink:"/tags/devops"}],version:"current",sidebarPosition:4,frontMatter:{sidebar_position:4,title:"DeltaTableOperations",id:"delta-ops",description:"Gem that encompasses some of the import side operations of Delta",tags:["file","delta","devops"]},sidebar:"defaultSidebar",previous:{title:"FileOperation",permalink:"/Spark/gems/custom/file-operations"},next:{title:"RestAPIEnrich",permalink:"/Spark/gems/custom/rest-api-enrich"}},p={},s=[{value:"Parameters",id:"parameters",level:2},{value:"Example",id:"example",level:2},{value:"Register table in catalog",id:"register-table-in-catalog",level:2},{value:"Vacuum table",id:"vacuum-table",level:2},{value:"Parameters",id:"vacuum-parameters",level:3},{value:"Optimize table",id:"optimize-table",level:2},{value:"Parameters",id:"optimize-parameters",level:3},{value:"Restore table",id:"restore-table",level:2},{value:"Parameters",id:"restore-parameters",level:3},{value:"Delete from table",id:"delete-from-table",level:2},{value:"Parameters",id:"delete-parameters",level:3},{value:"Drop table",id:"drop-table",level:2},{value:"FSCK Repair table",id:"fsck-repair-table",level:2}],m={toc:s},g="wrapper";function d(e){let{components:t,...n}=e;return(0,l.yg)(g,(0,r.A)({},m,n,{components:t,mdxType:"MDXLayout"}),(0,l.yg)("h3",null,(0,l.yg)("span",{class:"badge"},"Spark Gem")),(0,l.yg)("p",null,"Helps perform the following operations on Delta tables."),(0,l.yg)("ol",null,(0,l.yg)("li",{parentName:"ol"},"Register table in catalog"),(0,l.yg)("li",{parentName:"ol"},"Vacuum table"),(0,l.yg)("li",{parentName:"ol"},"Optimize table"),(0,l.yg)("li",{parentName:"ol"},"Restore table"),(0,l.yg)("li",{parentName:"ol"},"Delete from table"),(0,l.yg)("li",{parentName:"ol"},"Drop table"),(0,l.yg)("li",{parentName:"ol"},"FSCK Repair table")),(0,l.yg)("h2",{id:"parameters"},"Parameters"),(0,l.yg)("table",null,(0,l.yg)("thead",{parentName:"table"},(0,l.yg)("tr",{parentName:"thead"},(0,l.yg)("th",{parentName:"tr",align:"left"},"Parameter"),(0,l.yg)("th",{parentName:"tr",align:"left"},"Description"),(0,l.yg)("th",{parentName:"tr",align:"left"},"Required"))),(0,l.yg)("tbody",{parentName:"table"},(0,l.yg)("tr",{parentName:"tbody"},(0,l.yg)("td",{parentName:"tr",align:"left"},"Database name"),(0,l.yg)("td",{parentName:"tr",align:"left"},"Database name"),(0,l.yg)("td",{parentName:"tr",align:"left"},"False")),(0,l.yg)("tr",{parentName:"tbody"},(0,l.yg)("td",{parentName:"tr",align:"left"},"Table name"),(0,l.yg)("td",{parentName:"tr",align:"left"},"Table name"),(0,l.yg)("td",{parentName:"tr",align:"left"},"False")),(0,l.yg)("tr",{parentName:"tbody"},(0,l.yg)("td",{parentName:"tr",align:"left"},"File path"),(0,l.yg)("td",{parentName:"tr",align:"left"},"File path for delta table"),(0,l.yg)("td",{parentName:"tr",align:"left"},"False")),(0,l.yg)("tr",{parentName:"tbody"},(0,l.yg)("td",{parentName:"tr",align:"left"},"Action"),(0,l.yg)("td",{parentName:"tr",align:"left"},"Action to perform on the table"),(0,l.yg)("td",{parentName:"tr",align:"left"},"True")))),(0,l.yg)("admonition",{type:"note"},(0,l.yg)("p",{parentName:"admonition"},"At least one value from table name or file path needs to be provided.")),(0,l.yg)("h2",{id:"example"},"Example"),(0,l.yg)("p",null,(0,l.yg)("img",{alt:"Example usage of Delta Table Operations Gem",src:a(30040).A,width:"3010",height:"1496"})),(0,l.yg)("h2",{id:"register-table-in-catalog"},"Register table in catalog"),(0,l.yg)("p",null,"This will register the data at mentioned file path as a table in the whichever Metadata catalog is available in your execution environment."),(0,l.yg)("h2",{id:"vacuum-table"},"Vacuum table"),(0,l.yg)("p",null,"Recursively vacuum directories associated with the Delta table. VACUUM removes all files from the table directory that are not managed by Delta, as well as data files that are no longer in the latest state of the transaction log for the table and are older than a retention threshold. The default threshold is 7 days."),(0,l.yg)("p",null,"To learn more about vacuum ",(0,l.yg)("a",{parentName:"p",href:"https://docs.databricks.com/spark/latest/spark-sql/language-manual/delta-vacuum.html"},"click here"),"."),(0,l.yg)("h3",{id:"vacuum-parameters"},"Parameters"),(0,l.yg)("table",null,(0,l.yg)("thead",{parentName:"table"},(0,l.yg)("tr",{parentName:"thead"},(0,l.yg)("th",{parentName:"tr",align:"left"},"Parameter"),(0,l.yg)("th",{parentName:"tr",align:"left"},"Description"),(0,l.yg)("th",{parentName:"tr",align:"left"},"Required"))),(0,l.yg)("tbody",{parentName:"table"},(0,l.yg)("tr",{parentName:"tbody"},(0,l.yg)("td",{parentName:"tr",align:"left"},"Retention hours"),(0,l.yg)("td",{parentName:"tr",align:"left"},"Retention threshold"),(0,l.yg)("td",{parentName:"tr",align:"left"},"False")))),(0,l.yg)("h2",{id:"optimize-table"},"Optimize table"),(0,l.yg)("p",null,"Optimizes the layout of Delta Table data. Optionally optimize a subset of data or colocate data by column. If colocation is not specified, bin-packing optimization is performed by default."),(0,l.yg)("p",null,"To learn more about optimize ",(0,l.yg)("a",{parentName:"p",href:"https://docs.databricks.com/spark/latest/spark-sql/language-manual/delta-optimize.html"},"click here"),"."),(0,l.yg)("h3",{id:"optimize-parameters"},"Parameters"),(0,l.yg)("table",null,(0,l.yg)("thead",{parentName:"table"},(0,l.yg)("tr",{parentName:"thead"},(0,l.yg)("th",{parentName:"tr",align:"left"},"Parameter"),(0,l.yg)("th",{parentName:"tr",align:"left"},"Description"),(0,l.yg)("th",{parentName:"tr",align:"left"},"Required"))),(0,l.yg)("tbody",{parentName:"table"},(0,l.yg)("tr",{parentName:"tbody"},(0,l.yg)("td",{parentName:"tr",align:"left"},"Where clause"),(0,l.yg)("td",{parentName:"tr",align:"left"},"Optimize the subset of rows matching the given partition predicate. Only filters involving partition key attributes are supported."),(0,l.yg)("td",{parentName:"tr",align:"left"},"False")),(0,l.yg)("tr",{parentName:"tbody"},(0,l.yg)("td",{parentName:"tr",align:"left"},"ZOrder By"),(0,l.yg)("td",{parentName:"tr",align:"left"},"List of columns to perform ZOrder on"),(0,l.yg)("td",{parentName:"tr",align:"left"},"False")))),(0,l.yg)("h2",{id:"restore-table"},"Restore table"),(0,l.yg)("p",null,"Restores a Delta table to an earlier state. Restoring to an earlier version number or a timestamp is supported."),(0,l.yg)("h3",{id:"restore-parameters"},"Parameters"),(0,l.yg)("table",null,(0,l.yg)("thead",{parentName:"table"},(0,l.yg)("tr",{parentName:"thead"},(0,l.yg)("th",{parentName:"tr",align:"left"},"Parameter"),(0,l.yg)("th",{parentName:"tr",align:"left"},"Description"),(0,l.yg)("th",{parentName:"tr",align:"left"},"Required"))),(0,l.yg)("tbody",{parentName:"table"},(0,l.yg)("tr",{parentName:"tbody"},(0,l.yg)("td",{parentName:"tr",align:"left"},"Restore via"),(0,l.yg)("td",{parentName:"tr",align:"left"},"Restore the table via timestamp or version"),(0,l.yg)("td",{parentName:"tr",align:"left"},"False")),(0,l.yg)("tr",{parentName:"tbody"},(0,l.yg)("td",{parentName:"tr",align:"left"},"Value"),(0,l.yg)("td",{parentName:"tr",align:"left"},"Value to restore on"),(0,l.yg)("td",{parentName:"tr",align:"left"},"False")))),(0,l.yg)("h2",{id:"delete-from-table"},"Delete from table"),(0,l.yg)("p",null,"Delete removes the data from the latest version of the Delta table that matches the specified condition. Please note that delete does not remove it from the physical storage until the older versions are explicitly ",(0,l.yg)("a",{parentName:"p",href:"#vacuum-table"},"vacuumed"),"."),(0,l.yg)("h3",{id:"delete-parameters"},"Parameters"),(0,l.yg)("table",null,(0,l.yg)("thead",{parentName:"table"},(0,l.yg)("tr",{parentName:"thead"},(0,l.yg)("th",{parentName:"tr",align:"left"},"Parameter"),(0,l.yg)("th",{parentName:"tr",align:"left"},"Description"),(0,l.yg)("th",{parentName:"tr",align:"left"},"Required"))),(0,l.yg)("tbody",{parentName:"table"},(0,l.yg)("tr",{parentName:"tbody"},(0,l.yg)("td",{parentName:"tr",align:"left"},"Where clause"),(0,l.yg)("td",{parentName:"tr",align:"left"},"Condition which needs to be satisfied to delete a row"),(0,l.yg)("td",{parentName:"tr",align:"left"},"True")))),(0,l.yg)("h2",{id:"drop-table"},"Drop table"),(0,l.yg)("p",null,"This will drop the table from catalog and remove the files."),(0,l.yg)("h2",{id:"fsck-repair-table"},"FSCK Repair table"),(0,l.yg)("p",null,"Removes the file entries from the transaction log of a Delta table that can no longer be found in the underlying file system. This can happen when these files have been manually deleted."),(0,l.yg)("p",null,"To learn more about fsck repair ",(0,l.yg)("a",{parentName:"p",href:"https://docs.databricks.com/spark/latest/spark-sql/language-manual/delta-fsck.html"},"click here"),"."))}d.isMDXComponent=!0},30040:(e,t,a)=>{a.d(t,{A:()=>r});const r=a.p+"assets/images/delta_operations_eg1-17a89a0e851e87ffb3ef36e8e6cea0ab.png"}}]); \ No newline at end of file diff --git a/assets/js/ec996830.339685b4.js b/assets/js/ec996830.339685b4.js new file mode 100644 index 0000000000..04e361a63e --- /dev/null +++ b/assets/js/ec996830.339685b4.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[65210],{15680:(e,t,a)=>{a.d(t,{xA:()=>l,yg:()=>b});var r=a(96540);function n(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function o(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,r)}return a}function i(e){for(var t=1;t=0||(n[a]=e[a]);return n}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(n[a]=e[a])}return n}var p=r.createContext({}),u=function(e){var t=r.useContext(p),a=t;return e&&(a="function"==typeof e?e(t):i(i({},t),e)),a},l=function(e){var t=u(e.components);return r.createElement(p.Provider,{value:t},e.children)},g="mdxType",c={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},h=r.forwardRef((function(e,t){var a=e.components,n=e.mdxType,o=e.originalType,p=e.parentName,l=s(e,["components","mdxType","originalType","parentName"]),g=u(a),h=n,b=g["".concat(p,".").concat(h)]||g[h]||c[h]||o;return a?r.createElement(b,i(i({ref:t},l),{},{components:a})):r.createElement(b,i({ref:t},l))}));function b(e,t){var a=arguments,n=t&&t.mdxType;if("string"==typeof e||n){var o=a.length,i=new Array(o);i[0]=h;var s={};for(var p in t)hasOwnProperty.call(t,p)&&(s[p]=t[p]);s.originalType=e,s[g]="string"==typeof e?e:n,i[1]=s;for(var u=2;u{a.r(t),a.d(t,{assets:()=>p,contentTitle:()=>i,default:()=>c,frontMatter:()=>o,metadata:()=>s,toc:()=>u});var r=a(58168),n=(a(96540),a(15680));const o={sidebar_position:1,title:"Basic subgraph",id:"basic-subgraph",description:"Basic Subgraph, Group your Gems in reusable Parent Gems.",tags:["subgraph","reusable","group"]},i=void 0,s={unversionedId:"Spark/gems/subgraph/basic-subgraph",id:"Spark/gems/subgraph/basic-subgraph",title:"Basic subgraph",description:"Basic Subgraph, Group your Gems in reusable Parent Gems.",source:"@site/docs/Spark/gems/subgraph/basicSubgraph.md",sourceDirName:"Spark/gems/subgraph",slug:"/Spark/gems/subgraph/basic-subgraph",permalink:"/Spark/gems/subgraph/basic-subgraph",draft:!1,tags:[{label:"subgraph",permalink:"/tags/subgraph"},{label:"reusable",permalink:"/tags/reusable"},{label:"group",permalink:"/tags/group"}],version:"current",sidebarPosition:1,frontMatter:{sidebar_position:1,title:"Basic subgraph",id:"basic-subgraph",description:"Basic Subgraph, Group your Gems in reusable Parent Gems.",tags:["subgraph","reusable","group"]},sidebar:"defaultSidebar",previous:{title:"Subgraph",permalink:"/Spark/gems/subgraph/"},next:{title:"TableIterator",permalink:"/Spark/gems/subgraph/table-iterator"}},p={},u=[{value:"Create a Basic Subgraph",id:"create-a-basic-subgraph",level:2},{value:"Add/Remove Port",id:"addremove-port",level:2},{value:"Subgraph Configurations",id:"subgraph-configurations",level:2},{value:"Pipeline Level Configurations can apply to the Subgraph",id:"pipeline-level-configurations-can-apply-to-the-subgraph",level:3},{value:"Specify configurations only for the Subgraph",id:"specify-configurations-only-for-the-subgraph",level:3}],l={toc:u},g="wrapper";function c(e){let{components:t,...o}=e;return(0,n.yg)(g,(0,r.A)({},l,o,{components:t,mdxType:"MDXLayout"}),(0,n.yg)("h3",null,(0,n.yg)("span",{class:"badge"},"Spark Gem")),(0,n.yg)("p",null,"In a ",(0,n.yg)("strong",{parentName:"p"},"Basic Subgraph"),", you have the capability to encapsulate multiple distinct Gems within a single, reusable parent Gem. This organizational approach enhances the visual clarity of your Pipeline by grouping various sections together under a common parent Gem. Additionally, it empowers you to break down intricate logic into modular components, thereby streamlining the Data Engineering processes."),(0,n.yg)("p",null,"Furthermore, the ability to ",(0,n.yg)("a",{parentName:"p",href:"/package-hub/package-builder/shareable-subgraphs"},"Publish a Subgraph")," extends the utility beyond a singular Pipeline. By doing so, you can reuse the encapsulated logic in other Pipelines or Projects, promoting code re-usability and simplifying the overall development workflow."),(0,n.yg)("h2",{id:"create-a-basic-subgraph"},"Create a Basic Subgraph"),(0,n.yg)("p",null,"To create a Basic Subgraph, drag and drop the ",(0,n.yg)("strong",{parentName:"p"},"(1) Basic")," Subgraph Gem from the Subgraph menu, and connect it to any previously created Gem on your canvas."),(0,n.yg)("p",null,"Once you've added the Gem, click ",(0,n.yg)("strong",{parentName:"p"},"(2) Open"),", to open the subgraph canvas."),(0,n.yg)("p",null,(0,n.yg)("img",{alt:"create_basic_subgraph",src:a(92624).A,width:"2880",height:"1084"})),(0,n.yg)("p",null,"Once you are on the canvas, you can start adding Gems to your subgraph by dragging and dropping from the Gems menu."),(0,n.yg)("h2",{id:"addremove-port"},"Add/Remove Port"),(0,n.yg)("p",null,"While using a Subgraph, you can control the Input and Output ports as per the requirements. Click on the ",(0,n.yg)("strong",{parentName:"p"},"(1) Add/Remove Part")," Button to open the port settings dialog as shown below."),(0,n.yg)("p",null,(0,n.yg)("img",{alt:"add_remove_port",src:a(23191).A,width:"2880",height:"1084"})),(0,n.yg)("p",null,"In this dialog, you can add or remove the Input or Output ports.\nTo Add an input Put click on ",(0,n.yg)("strong",{parentName:"p"},"(2) +")," button. This will add an input Port.\nTo remove an Input port, Click the ",(0,n.yg)("strong",{parentName:"p"},"(3) Delete")," icon next to the input port you want to delete."),(0,n.yg)("p",null,"Similarly, you can control the Output Ports by switching to the ",(0,n.yg)("strong",{parentName:"p"},"(4) Output")," Tab.\nAs you connect the input and output ports to gems Outside the subgraph, you would be able to see the schema for the port reflected here."),(0,n.yg)("h2",{id:"subgraph-configurations"},"Subgraph Configurations"),(0,n.yg)("p",null,"Subgraphs are configurable just like ",(0,n.yg)("a",{parentName:"p",href:"/Spark/configuration/"},"Pipelines are configurable"),". For Subgraphs, the configurations can apply in one of two ways: (1) Pipeline level Configurations apply to the Subgraph or (2) Specify Configurations only for the Subgraph"),(0,n.yg)("h3",{id:"pipeline-level-configurations-can-apply-to-the-subgraph"},"Pipeline Level Configurations can apply to the Subgraph"),(0,n.yg)("p",null,"Pipeline configs are accessible inside Subgraphs by ",(0,n.yg)("strong",{parentName:"p"},"copying")," the config to the Subgraph. Checkout the video below to see how this works."),(0,n.yg)("div",{class:"wistia_responsive_padding",style:{padding:"56.25% 0 0 0",position:"relative"}},(0,n.yg)("div",{class:"wistia_responsive_wrapper",style:{height:"100%",left:0,position:"absolute",top:0,width:"100%"}},(0,n.yg)("iframe",{src:"https://fast.wistia.net/embed/iframe/56j5k1f6ea?seo=false?videoFoam=true",title:"Getting Started With SQL Video",allow:"autoplay; fullscreen",allowtransparency:"true",frameborder:"0",scrolling:"no",class:"wistia_embed",name:"wistia_embed",msallowfullscreen:!0,width:"100%",height:"100%"}))),(0,n.yg)("script",{src:"https://fast.wistia.net/assets/external/E-v1.js",async:!0}),(0,n.yg)("h3",{id:"specify-configurations-only-for-the-subgraph"},"Specify configurations only for the Subgraph"),(0,n.yg)("p",null,"In Subgraph Configs, you can define values that can be set at the Subgraph level and then be accessed inside any component in the Subgraph. These will also reflect under Configurations of Pipelines using these Subgraphs, but they can only be edited from Subgraph configs."))}c.isMDXComponent=!0},92624:(e,t,a)=>{a.d(t,{A:()=>r});const r=a.p+"assets/images/Create_basic_subgraph-28591dd35a72ea7fd116107fe2bfccb7.png"},23191:(e,t,a)=>{a.d(t,{A:()=>r});const r=a.p+"assets/images/subgraph_additional_ports-38d18d2005477a5dbf6750838ef7f5eb.png"}}]); \ No newline at end of file diff --git a/assets/js/ec996830.a0758903.js b/assets/js/ec996830.a0758903.js deleted file mode 100644 index 15f5f61adf..0000000000 --- a/assets/js/ec996830.a0758903.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[65210],{15680:(e,t,a)=>{a.d(t,{xA:()=>l,yg:()=>b});var r=a(96540);function n(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function o(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,r)}return a}function i(e){for(var t=1;t=0||(n[a]=e[a]);return n}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(n[a]=e[a])}return n}var p=r.createContext({}),u=function(e){var t=r.useContext(p),a=t;return e&&(a="function"==typeof e?e(t):i(i({},t),e)),a},l=function(e){var t=u(e.components);return r.createElement(p.Provider,{value:t},e.children)},g="mdxType",c={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},h=r.forwardRef((function(e,t){var a=e.components,n=e.mdxType,o=e.originalType,p=e.parentName,l=s(e,["components","mdxType","originalType","parentName"]),g=u(a),h=n,b=g["".concat(p,".").concat(h)]||g[h]||c[h]||o;return a?r.createElement(b,i(i({ref:t},l),{},{components:a})):r.createElement(b,i({ref:t},l))}));function b(e,t){var a=arguments,n=t&&t.mdxType;if("string"==typeof e||n){var o=a.length,i=new Array(o);i[0]=h;var s={};for(var p in t)hasOwnProperty.call(t,p)&&(s[p]=t[p]);s.originalType=e,s[g]="string"==typeof e?e:n,i[1]=s;for(var u=2;u{a.r(t),a.d(t,{assets:()=>p,contentTitle:()=>i,default:()=>c,frontMatter:()=>o,metadata:()=>s,toc:()=>u});var r=a(58168),n=(a(96540),a(15680));const o={sidebar_position:1,title:"Basic subgraph",id:"basic-subgraph",description:"Basic Subgraph, Group your Gems in reusable Parent Gems.",tags:["subgraph","reusable","group"]},i=void 0,s={unversionedId:"Spark/gems/subgraph/basic-subgraph",id:"Spark/gems/subgraph/basic-subgraph",title:"Basic subgraph",description:"Basic Subgraph, Group your Gems in reusable Parent Gems.",source:"@site/docs/Spark/gems/subgraph/basicSubgraph.md",sourceDirName:"Spark/gems/subgraph",slug:"/Spark/gems/subgraph/basic-subgraph",permalink:"/Spark/gems/subgraph/basic-subgraph",draft:!1,tags:[{label:"subgraph",permalink:"/tags/subgraph"},{label:"reusable",permalink:"/tags/reusable"},{label:"group",permalink:"/tags/group"}],version:"current",sidebarPosition:1,frontMatter:{sidebar_position:1,title:"Basic subgraph",id:"basic-subgraph",description:"Basic Subgraph, Group your Gems in reusable Parent Gems.",tags:["subgraph","reusable","group"]},sidebar:"defaultSidebar",previous:{title:"Subgraph",permalink:"/Spark/gems/subgraph/"},next:{title:"TableIterator",permalink:"/Spark/gems/subgraph/table-iterator"}},p={},u=[{value:"Create a Basic Subgraph",id:"create-a-basic-subgraph",level:2},{value:"Add/Remove Port",id:"addremove-port",level:2},{value:"Subgraph Configurations",id:"subgraph-configurations",level:2},{value:"Pipeline Level Configurations can apply to the Subgraph",id:"pipeline-level-configurations-can-apply-to-the-subgraph",level:3},{value:"Specify configurations only for the Subgraph",id:"specify-configurations-only-for-the-subgraph",level:3}],l={toc:u},g="wrapper";function c(e){let{components:t,...o}=e;return(0,n.yg)(g,(0,r.A)({},l,o,{components:t,mdxType:"MDXLayout"}),(0,n.yg)("h3",null,(0,n.yg)("span",{class:"badge rounded-pill text-bg-light"},"Spark Gem")),(0,n.yg)("p",null,"In a ",(0,n.yg)("strong",{parentName:"p"},"Basic Subgraph"),", you have the capability to encapsulate multiple distinct Gems within a single, reusable parent Gem. This organizational approach enhances the visual clarity of your Pipeline by grouping various sections together under a common parent Gem. Additionally, it empowers you to break down intricate logic into modular components, thereby streamlining the Data Engineering processes."),(0,n.yg)("p",null,"Furthermore, the ability to ",(0,n.yg)("a",{parentName:"p",href:"/package-hub/package-builder/shareable-subgraphs"},"Publish a Subgraph")," extends the utility beyond a singular Pipeline. By doing so, you can reuse the encapsulated logic in other Pipelines or Projects, promoting code re-usability and simplifying the overall development workflow."),(0,n.yg)("h2",{id:"create-a-basic-subgraph"},"Create a Basic Subgraph"),(0,n.yg)("p",null,"To create a Basic Subgraph, drag and drop the ",(0,n.yg)("strong",{parentName:"p"},"(1) Basic")," Subgraph Gem from the Subgraph menu, and connect it to any previously created Gem on your canvas."),(0,n.yg)("p",null,"Once you've added the Gem, click ",(0,n.yg)("strong",{parentName:"p"},"(2) Open"),", to open the subgraph canvas."),(0,n.yg)("p",null,(0,n.yg)("img",{alt:"create_basic_subgraph",src:a(92624).A,width:"2880",height:"1084"})),(0,n.yg)("p",null,"Once you are on the canvas, you can start adding Gems to your subgraph by dragging and dropping from the Gems menu."),(0,n.yg)("h2",{id:"addremove-port"},"Add/Remove Port"),(0,n.yg)("p",null,"While using a Subgraph, you can control the Input and Output ports as per the requirements. Click on the ",(0,n.yg)("strong",{parentName:"p"},"(1) Add/Remove Part")," Button to open the port settings dialog as shown below."),(0,n.yg)("p",null,(0,n.yg)("img",{alt:"add_remove_port",src:a(23191).A,width:"2880",height:"1084"})),(0,n.yg)("p",null,"In this dialog, you can add or remove the Input or Output ports.\nTo Add an input Put click on ",(0,n.yg)("strong",{parentName:"p"},"(2) +")," button. This will add an input Port.\nTo remove an Input port, Click the ",(0,n.yg)("strong",{parentName:"p"},"(3) Delete")," icon next to the input port you want to delete."),(0,n.yg)("p",null,"Similarly, you can control the Output Ports by switching to the ",(0,n.yg)("strong",{parentName:"p"},"(4) Output")," Tab.\nAs you connect the input and output ports to gems Outside the subgraph, you would be able to see the schema for the port reflected here."),(0,n.yg)("h2",{id:"subgraph-configurations"},"Subgraph Configurations"),(0,n.yg)("p",null,"Subgraphs are configurable just like ",(0,n.yg)("a",{parentName:"p",href:"/Spark/configuration/"},"Pipelines are configurable"),". For Subgraphs, the configurations can apply in one of two ways: (1) Pipeline level Configurations apply to the Subgraph or (2) Specify Configurations only for the Subgraph"),(0,n.yg)("h3",{id:"pipeline-level-configurations-can-apply-to-the-subgraph"},"Pipeline Level Configurations can apply to the Subgraph"),(0,n.yg)("p",null,"Pipeline configs are accessible inside Subgraphs by ",(0,n.yg)("strong",{parentName:"p"},"copying")," the config to the Subgraph. Checkout the video below to see how this works."),(0,n.yg)("div",{class:"wistia_responsive_padding",style:{padding:"56.25% 0 0 0",position:"relative"}},(0,n.yg)("div",{class:"wistia_responsive_wrapper",style:{height:"100%",left:0,position:"absolute",top:0,width:"100%"}},(0,n.yg)("iframe",{src:"https://fast.wistia.net/embed/iframe/56j5k1f6ea?seo=false?videoFoam=true",title:"Getting Started With SQL Video",allow:"autoplay; fullscreen",allowtransparency:"true",frameborder:"0",scrolling:"no",class:"wistia_embed",name:"wistia_embed",msallowfullscreen:!0,width:"100%",height:"100%"}))),(0,n.yg)("script",{src:"https://fast.wistia.net/assets/external/E-v1.js",async:!0}),(0,n.yg)("h3",{id:"specify-configurations-only-for-the-subgraph"},"Specify configurations only for the Subgraph"),(0,n.yg)("p",null,"In Subgraph Configs, you can define values that can be set at the Subgraph level and then be accessed inside any component in the Subgraph. These will also reflect under Configurations of Pipelines using these Subgraphs, but they can only be edited from Subgraph configs."))}c.isMDXComponent=!0},92624:(e,t,a)=>{a.d(t,{A:()=>r});const r=a.p+"assets/images/Create_basic_subgraph-28591dd35a72ea7fd116107fe2bfccb7.png"},23191:(e,t,a)=>{a.d(t,{A:()=>r});const r=a.p+"assets/images/subgraph_additional_ports-38d18d2005477a5dbf6750838ef7f5eb.png"}}]); \ No newline at end of file diff --git a/assets/js/f11ee91b.07a32cba.js b/assets/js/f11ee91b.07a32cba.js new file mode 100644 index 0000000000..6ea3706039 --- /dev/null +++ b/assets/js/f11ee91b.07a32cba.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[97501],{15680:(e,t,a)=>{a.d(t,{xA:()=>u,yg:()=>d});var o=a(96540);function r(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function n(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);t&&(o=o.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,o)}return a}function i(e){for(var t=1;t=0||(r[a]=e[a]);return r}(e,t);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);for(o=0;o=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(r[a]=e[a])}return r}var s=o.createContext({}),p=function(e){var t=o.useContext(s),a=t;return e&&(a="function"==typeof e?e(t):i(i({},t),e)),a},u=function(e){var t=p(e.components);return o.createElement(s.Provider,{value:t},e.children)},g="mdxType",c={inlineCode:"code",wrapper:function(e){var t=e.children;return o.createElement(o.Fragment,{},t)}},h=o.forwardRef((function(e,t){var a=e.components,r=e.mdxType,n=e.originalType,s=e.parentName,u=l(e,["components","mdxType","originalType","parentName"]),g=p(a),h=r,d=g["".concat(s,".").concat(h)]||g[h]||c[h]||n;return a?o.createElement(d,i(i({ref:t},u),{},{components:a})):o.createElement(d,i({ref:t},u))}));function d(e,t){var a=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var n=a.length,i=new Array(n);i[0]=h;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[g]="string"==typeof e?e:r,i[1]=l;for(var p=2;p{a.r(t),a.d(t,{assets:()=>s,contentTitle:()=>i,default:()=>c,frontMatter:()=>n,metadata:()=>l,toc:()=>p});var o=a(58168),r=(a(96540),a(15680));const n={sidebar_position:2,title:"TableIterator",id:"table-iterator",description:"Loop over each row of an input Dataframe",tags:["loop","table","iterator"]},i=void 0,l={unversionedId:"Spark/gems/subgraph/table-iterator",id:"Spark/gems/subgraph/table-iterator",title:"TableIterator",description:"Loop over each row of an input Dataframe",source:"@site/docs/Spark/gems/subgraph/tableIterator.md",sourceDirName:"Spark/gems/subgraph",slug:"/Spark/gems/subgraph/table-iterator",permalink:"/Spark/gems/subgraph/table-iterator",draft:!1,tags:[{label:"loop",permalink:"/tags/loop"},{label:"table",permalink:"/tags/table"},{label:"iterator",permalink:"/tags/iterator"}],version:"current",sidebarPosition:2,frontMatter:{sidebar_position:2,title:"TableIterator",id:"table-iterator",description:"Loop over each row of an input Dataframe",tags:["loop","table","iterator"]},sidebar:"defaultSidebar",previous:{title:"Basic subgraph",permalink:"/Spark/gems/subgraph/basic-subgraph"},next:{title:"Execution",permalink:"/Spark/execution/"}},s={},p=[{value:"Creating a TableIterator Gem",id:"creating-a-tableiterator-gem",level:2},{value:"Configure the TableIterator",id:"configure-the-tableiterator",level:2},{value:"Running the Loop",id:"running-the-loop",level:2},{value:"Adding Inputs and Outputs to TableIterator",id:"adding-inputs-and-outputs-to-tableiterator",level:2}],u={toc:p},g="wrapper";function c(e){let{components:t,...n}=e;return(0,r.yg)(g,(0,o.A)({},u,n,{components:t,mdxType:"MDXLayout"}),(0,r.yg)("h3",null,(0,r.yg)("span",{class:"badge"},"Spark Gem")),(0,r.yg)("p",null,"TableIterator allows you to iterate over one or more Gems for each row of the first input DataFrame.\nLet's see how to create a Basic Loop which loops over a Metadata Table, and for each row of the table will run the Gems inside the Subgraph."),(0,r.yg)("h2",{id:"creating-a-tableiterator-gem"},"Creating a TableIterator Gem"),(0,r.yg)("p",null,"First add the Input Gem on which you want to Iterate over. For this, simply use an existing Dataset or create a new ",(0,r.yg)("a",{parentName:"p",href:"/Spark/gems/source-target/"},"Source Gem")," pointing to your Metadata table.\nYou can run this Source Gem to see the data your loop would be running for."),(0,r.yg)("p",null,"Now, Drag and Drop the ",(0,r.yg)("strong",{parentName:"p"},"(1) TableIterator")," Gem from the Subgraph menu, and connect it to the above created Source Gem."),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Create_table_iterator",src:a(68982).A,width:"2880",height:"1084"})),(0,r.yg)("h2",{id:"configure-the-tableiterator"},"Configure the TableIterator"),(0,r.yg)("p",null,"Open the TableIterator Gem, and click on ",(0,r.yg)("strong",{parentName:"p"},"(1) Configure")," to open the Settings dialog.\nHere, on the left side panel you can edit the ",(0,r.yg)("strong",{parentName:"p"},"(2) Name ")," of your Gem, check the ",(0,r.yg)("strong",{parentName:"p"},"(3) Input Schema")," for your DataFrame on which the loop will iterate."),(0,r.yg)("p",null,"On the right side, you can define your Iterator Settings, and any other Subgraph Configs you want to use in the Subgraph.\nIn the ",(0,r.yg)("strong",{parentName:"p"},"(4) Max Iterations")," field, you can control the maximum number of Iterations this loop can have. This is to safeguard that nobody runs the loop on a very large DataFrame by mistake. The default value is set to 1000."),(0,r.yg)("p",null,"You can also ",(0,r.yg)("strong",{parentName:"p"},"(5) Enable Parallel Execution"),"."),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"configure_loop",src:a(35012).A,width:"2880",height:"1084"})),(0,r.yg)("p",null,"When you check Enable Parallel Execution, you can also control ",(0,r.yg)("strong",{parentName:"p"},"(1) Number of threads")," to run in parallel as shown below."),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"loop_settings",src:a(30254).A,width:"2880",height:"1726"})),(0,r.yg)("p",null,"In the table below that, map the columns from the incoming DataFrame, to the configs of the Subgraph.\nWhen you pick a column name in ",(0,r.yg)("strong",{parentName:"p"},"(2) Source column"),", a config is automatically created with the same name in ",(0,r.yg)("strong",{parentName:"p"},"(3) Config name"),". You can see the configs created by switching to the ",(0,r.yg)("strong",{parentName:"p"},"(4) Configuration")," tab."),(0,r.yg)("p",null,"This will show all the configurations created for this Subgraph as shown below."),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"loop_configs",src:a(7180).A,width:"2880",height:"1084"})),(0,r.yg)("p",null,"You can add default values for any of these by switching to the ",(0,r.yg)("strong",{parentName:"p"},"(1) Config Tab"),", as shown above. You can also add more configurations in case you want to use in your subgraph."),(0,r.yg)("admonition",{type:"info"},(0,r.yg)("p",{parentName:"admonition"},"Note, the configurations associated with a source column will have different values for each iteration based on the input data's current row value for that column.\nThe configs which are not mapped to a source column, will have a fixed value for each Iteration provided in the Config tab.")),(0,r.yg)("p",null,"Once done, click on ",(0,r.yg)("strong",{parentName:"p"},"(2) Save"),", to save the Iterator configurations."),(0,r.yg)("p",null,"Now you can add the Gems to your Subgraph on which you want to loop on. To do this simply Drag and Drop any Gem onto the Subgraph Canvas.\nYou can add any Source, Target or any other transformation Gem and use the configs inside these Gems to change the values for each Iteration."),(0,r.yg)("h2",{id:"running-the-loop"},"Running the Loop"),(0,r.yg)("p",null,"Once you have added the Gems to your Subgraph, Click on the ",(0,r.yg)("strong",{parentName:"p"},"(1) Run button")," to start execution.\nAs the execution starts, you will see a new ",(0,r.yg)("strong",{parentName:"p"},"(2) Iteration")," button. This will show for which Iteration you are seeing the current interims.\nYou can click on the ",(0,r.yg)("strong",{parentName:"p"},"(3) Interim")," to check values for that Iteration."),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"run_loop",src:a(41550).A,width:"2880",height:"1726"})),(0,r.yg)("p",null,"Click on the ",(0,r.yg)("strong",{parentName:"p"},"(2) Iteration")," button, and it will open up the Iterations table as shown below. Here you can see all iterations and config values for each of them."),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"iterations",src:a(33930).A,width:"2880",height:"1726"})),(0,r.yg)("h2",{id:"adding-inputs-and-outputs-to-tableiterator"},"Adding Inputs and Outputs to TableIterator"),(0,r.yg)("p",null,"For a TableIterator Gem, the first input port is for your DataFrame on which you want to Iterate Over.\nYou can ",(0,r.yg)("strong",{parentName:"p"},"(1)Add")," more Inputs or Switch to ",(0,r.yg)("strong",{parentName:"p"},"(2) Output")," tab to add more Outputs as needed. These extra inputs would not change for every iteration.\nAlso, the output will be a Union of output of all Iterations. You can ",(0,r.yg)("strong",{parentName:"p"},"(3) Delete")," any port by hovering over it."),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"table_iterator_ports",src:a(36835).A,width:"2880",height:"1084"})))}c.isMDXComponent=!0},35012:(e,t,a)=>{a.d(t,{A:()=>o});const o=a.p+"assets/images/Configure_table_iterator-c60445a1e833bcee9de2733f401015b5.png"},68982:(e,t,a)=>{a.d(t,{A:()=>o});const o=a.p+"assets/images/Create_table_iterator-a3512c5b4d7b155d25e12d6d29c107af.png"},33930:(e,t,a)=>{a.d(t,{A:()=>o});const o=a.p+"assets/images/Iterations-965b7bd7ee678a4dcc7d37764a86f606.png"},41550:(e,t,a)=>{a.d(t,{A:()=>o});const o=a.p+"assets/images/Run_loop-008c14dcf9e7453783ca2aab1b6cc081.png"},36835:(e,t,a)=>{a.d(t,{A:()=>o});const o=a.p+"assets/images/loop_additional_ports-fd25bddd2573f1df164048664248d380.png"},7180:(e,t,a)=>{a.d(t,{A:()=>o});const o=a.p+"assets/images/loop_configs-e654f1ca4d4227838351199e1fe6d235.png"},30254:(e,t,a)=>{a.d(t,{A:()=>o});const o=a.p+"assets/images/loop_settings-3a32aca4935e3fb4db5e9e428226444a.png"}}]); \ No newline at end of file diff --git a/assets/js/f11ee91b.be73946a.js b/assets/js/f11ee91b.be73946a.js deleted file mode 100644 index 96ba5f20c0..0000000000 --- a/assets/js/f11ee91b.be73946a.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[97501],{15680:(e,t,a)=>{a.d(t,{xA:()=>u,yg:()=>d});var o=a(96540);function r(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function n(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);t&&(o=o.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,o)}return a}function i(e){for(var t=1;t=0||(r[a]=e[a]);return r}(e,t);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);for(o=0;o=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(r[a]=e[a])}return r}var s=o.createContext({}),p=function(e){var t=o.useContext(s),a=t;return e&&(a="function"==typeof e?e(t):i(i({},t),e)),a},u=function(e){var t=p(e.components);return o.createElement(s.Provider,{value:t},e.children)},g="mdxType",c={inlineCode:"code",wrapper:function(e){var t=e.children;return o.createElement(o.Fragment,{},t)}},h=o.forwardRef((function(e,t){var a=e.components,r=e.mdxType,n=e.originalType,s=e.parentName,u=l(e,["components","mdxType","originalType","parentName"]),g=p(a),h=r,d=g["".concat(s,".").concat(h)]||g[h]||c[h]||n;return a?o.createElement(d,i(i({ref:t},u),{},{components:a})):o.createElement(d,i({ref:t},u))}));function d(e,t){var a=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var n=a.length,i=new Array(n);i[0]=h;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[g]="string"==typeof e?e:r,i[1]=l;for(var p=2;p{a.r(t),a.d(t,{assets:()=>s,contentTitle:()=>i,default:()=>c,frontMatter:()=>n,metadata:()=>l,toc:()=>p});var o=a(58168),r=(a(96540),a(15680));const n={sidebar_position:2,title:"TableIterator",id:"table-iterator",description:"Loop over each row of an input Dataframe",tags:["loop","table","iterator"]},i=void 0,l={unversionedId:"Spark/gems/subgraph/table-iterator",id:"Spark/gems/subgraph/table-iterator",title:"TableIterator",description:"Loop over each row of an input Dataframe",source:"@site/docs/Spark/gems/subgraph/tableIterator.md",sourceDirName:"Spark/gems/subgraph",slug:"/Spark/gems/subgraph/table-iterator",permalink:"/Spark/gems/subgraph/table-iterator",draft:!1,tags:[{label:"loop",permalink:"/tags/loop"},{label:"table",permalink:"/tags/table"},{label:"iterator",permalink:"/tags/iterator"}],version:"current",sidebarPosition:2,frontMatter:{sidebar_position:2,title:"TableIterator",id:"table-iterator",description:"Loop over each row of an input Dataframe",tags:["loop","table","iterator"]},sidebar:"defaultSidebar",previous:{title:"Basic subgraph",permalink:"/Spark/gems/subgraph/basic-subgraph"},next:{title:"Execution",permalink:"/Spark/execution/"}},s={},p=[{value:"Creating a TableIterator Gem",id:"creating-a-tableiterator-gem",level:2},{value:"Configure the TableIterator",id:"configure-the-tableiterator",level:2},{value:"Running the Loop",id:"running-the-loop",level:2},{value:"Adding Inputs and Outputs to TableIterator",id:"adding-inputs-and-outputs-to-tableiterator",level:2}],u={toc:p},g="wrapper";function c(e){let{components:t,...n}=e;return(0,r.yg)(g,(0,o.A)({},u,n,{components:t,mdxType:"MDXLayout"}),(0,r.yg)("h3",null,(0,r.yg)("span",{class:"badge rounded-pill text-bg-light"},"Spark Gem")),(0,r.yg)("p",null,"TableIterator allows you to iterate over one or more Gems for each row of the first input DataFrame.\nLet's see how to create a Basic Loop which loops over a Metadata Table, and for each row of the table will run the Gems inside the Subgraph."),(0,r.yg)("h2",{id:"creating-a-tableiterator-gem"},"Creating a TableIterator Gem"),(0,r.yg)("p",null,"First add the Input Gem on which you want to Iterate over. For this, simply use an existing Dataset or create a new ",(0,r.yg)("a",{parentName:"p",href:"/Spark/gems/source-target/"},"Source Gem")," pointing to your Metadata table.\nYou can run this Source Gem to see the data your loop would be running for."),(0,r.yg)("p",null,"Now, Drag and Drop the ",(0,r.yg)("strong",{parentName:"p"},"(1) TableIterator")," Gem from the Subgraph menu, and connect it to the above created Source Gem."),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Create_table_iterator",src:a(68982).A,width:"2880",height:"1084"})),(0,r.yg)("h2",{id:"configure-the-tableiterator"},"Configure the TableIterator"),(0,r.yg)("p",null,"Open the TableIterator Gem, and click on ",(0,r.yg)("strong",{parentName:"p"},"(1) Configure")," to open the Settings dialog.\nHere, on the left side panel you can edit the ",(0,r.yg)("strong",{parentName:"p"},"(2) Name ")," of your Gem, check the ",(0,r.yg)("strong",{parentName:"p"},"(3) Input Schema")," for your DataFrame on which the loop will iterate."),(0,r.yg)("p",null,"On the right side, you can define your Iterator Settings, and any other Subgraph Configs you want to use in the Subgraph.\nIn the ",(0,r.yg)("strong",{parentName:"p"},"(4) Max Iterations")," field, you can control the maximum number of Iterations this loop can have. This is to safeguard that nobody runs the loop on a very large DataFrame by mistake. The default value is set to 1000."),(0,r.yg)("p",null,"You can also ",(0,r.yg)("strong",{parentName:"p"},"(5) Enable Parallel Execution"),"."),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"configure_loop",src:a(35012).A,width:"2880",height:"1084"})),(0,r.yg)("p",null,"When you check Enable Parallel Execution, you can also control ",(0,r.yg)("strong",{parentName:"p"},"(1) Number of threads")," to run in parallel as shown below."),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"loop_settings",src:a(30254).A,width:"2880",height:"1726"})),(0,r.yg)("p",null,"In the table below that, map the columns from the incoming DataFrame, to the configs of the Subgraph.\nWhen you pick a column name in ",(0,r.yg)("strong",{parentName:"p"},"(2) Source column"),", a config is automatically created with the same name in ",(0,r.yg)("strong",{parentName:"p"},"(3) Config name"),". You can see the configs created by switching to the ",(0,r.yg)("strong",{parentName:"p"},"(4) Configuration")," tab."),(0,r.yg)("p",null,"This will show all the configurations created for this Subgraph as shown below."),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"loop_configs",src:a(7180).A,width:"2880",height:"1084"})),(0,r.yg)("p",null,"You can add default values for any of these by switching to the ",(0,r.yg)("strong",{parentName:"p"},"(1) Config Tab"),", as shown above. You can also add more configurations in case you want to use in your subgraph."),(0,r.yg)("admonition",{type:"info"},(0,r.yg)("p",{parentName:"admonition"},"Note, the configurations associated with a source column will have different values for each iteration based on the input data's current row value for that column.\nThe configs which are not mapped to a source column, will have a fixed value for each Iteration provided in the Config tab.")),(0,r.yg)("p",null,"Once done, click on ",(0,r.yg)("strong",{parentName:"p"},"(2) Save"),", to save the Iterator configurations."),(0,r.yg)("p",null,"Now you can add the Gems to your Subgraph on which you want to loop on. To do this simply Drag and Drop any Gem onto the Subgraph Canvas.\nYou can add any Source, Target or any other transformation Gem and use the configs inside these Gems to change the values for each Iteration."),(0,r.yg)("h2",{id:"running-the-loop"},"Running the Loop"),(0,r.yg)("p",null,"Once you have added the Gems to your Subgraph, Click on the ",(0,r.yg)("strong",{parentName:"p"},"(1) Run button")," to start execution.\nAs the execution starts, you will see a new ",(0,r.yg)("strong",{parentName:"p"},"(2) Iteration")," button. This will show for which Iteration you are seeing the current interims.\nYou can click on the ",(0,r.yg)("strong",{parentName:"p"},"(3) Interim")," to check values for that Iteration."),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"run_loop",src:a(41550).A,width:"2880",height:"1726"})),(0,r.yg)("p",null,"Click on the ",(0,r.yg)("strong",{parentName:"p"},"(2) Iteration")," button, and it will open up the Iterations table as shown below. Here you can see all iterations and config values for each of them."),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"iterations",src:a(33930).A,width:"2880",height:"1726"})),(0,r.yg)("h2",{id:"adding-inputs-and-outputs-to-tableiterator"},"Adding Inputs and Outputs to TableIterator"),(0,r.yg)("p",null,"For a TableIterator Gem, the first input port is for your DataFrame on which you want to Iterate Over.\nYou can ",(0,r.yg)("strong",{parentName:"p"},"(1)Add")," more Inputs or Switch to ",(0,r.yg)("strong",{parentName:"p"},"(2) Output")," tab to add more Outputs as needed. These extra inputs would not change for every iteration.\nAlso, the output will be a Union of output of all Iterations. You can ",(0,r.yg)("strong",{parentName:"p"},"(3) Delete")," any port by hovering over it."),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"table_iterator_ports",src:a(36835).A,width:"2880",height:"1084"})))}c.isMDXComponent=!0},35012:(e,t,a)=>{a.d(t,{A:()=>o});const o=a.p+"assets/images/Configure_table_iterator-c60445a1e833bcee9de2733f401015b5.png"},68982:(e,t,a)=>{a.d(t,{A:()=>o});const o=a.p+"assets/images/Create_table_iterator-a3512c5b4d7b155d25e12d6d29c107af.png"},33930:(e,t,a)=>{a.d(t,{A:()=>o});const o=a.p+"assets/images/Iterations-965b7bd7ee678a4dcc7d37764a86f606.png"},41550:(e,t,a)=>{a.d(t,{A:()=>o});const o=a.p+"assets/images/Run_loop-008c14dcf9e7453783ca2aab1b6cc081.png"},36835:(e,t,a)=>{a.d(t,{A:()=>o});const o=a.p+"assets/images/loop_additional_ports-fd25bddd2573f1df164048664248d380.png"},7180:(e,t,a)=>{a.d(t,{A:()=>o});const o=a.p+"assets/images/loop_configs-e654f1ca4d4227838351199e1fe6d235.png"},30254:(e,t,a)=>{a.d(t,{A:()=>o});const o=a.p+"assets/images/loop_settings-3a32aca4935e3fb4db5e9e428226444a.png"}}]); \ No newline at end of file diff --git a/assets/js/f29eab11.27d94399.js b/assets/js/f29eab11.27d94399.js deleted file mode 100644 index d7725fca0f..0000000000 --- a/assets/js/f29eab11.27d94399.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[1634],{15680:(e,t,n)=>{n.d(t,{xA:()=>c,yg:()=>f});var r=n(96540);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function l(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var i=r.createContext({}),p=function(e){var t=r.useContext(i),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},c=function(e){var t=p(e.components);return r.createElement(i.Provider,{value:t},e.children)},u="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},y=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,i=e.parentName,c=s(e,["components","mdxType","originalType","parentName"]),u=p(n),y=a,f=u["".concat(i,".").concat(y)]||u[y]||d[y]||o;return n?r.createElement(f,l(l({ref:t},c),{},{components:n})):r.createElement(f,l({ref:t},c))}));function f(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,l=new Array(o);l[0]=y;var s={};for(var i in t)hasOwnProperty.call(t,i)&&(s[i]=t[i]);s.originalType=e,s[u]="string"==typeof e?e:a,l[1]=s;for(var p=2;p{n.d(t,{A:()=>l});var r=n(96540),a=n(20053);const o={tabItem:"tabItem_Ymn6"};function l(e){let{children:t,hidden:n,className:l}=e;return r.createElement("div",{role:"tabpanel",className:(0,a.A)(o.tabItem,l),hidden:n},t)}},11470:(e,t,n)=>{n.d(t,{A:()=>x});var r=n(58168),a=n(96540),o=n(20053),l=n(23104),s=n(56347),i=n(57485),p=n(31682),c=n(89466);function u(e){return function(e){return a.Children.map(e,(e=>{if(!e||(0,a.isValidElement)(e)&&function(e){const{props:t}=e;return!!t&&"object"==typeof t&&"value"in t}(e))return e;throw new Error(`Docusaurus error: Bad child <${"string"==typeof e.type?e.type:e.type.name}>: all children of the component should be , and every should have a unique "value" prop.`)}))?.filter(Boolean)??[]}(e).map((e=>{let{props:{value:t,label:n,attributes:r,default:a}}=e;return{value:t,label:n,attributes:r,default:a}}))}function d(e){const{values:t,children:n}=e;return(0,a.useMemo)((()=>{const e=t??u(n);return function(e){const t=(0,p.X)(e,((e,t)=>e.value===t.value));if(t.length>0)throw new Error(`Docusaurus error: Duplicate values "${t.map((e=>e.value)).join(", ")}" found in . Every value needs to be unique.`)}(e),e}),[t,n])}function y(e){let{value:t,tabValues:n}=e;return n.some((e=>e.value===t))}function f(e){let{queryString:t=!1,groupId:n}=e;const r=(0,s.W6)(),o=function(e){let{queryString:t=!1,groupId:n}=e;if("string"==typeof t)return t;if(!1===t)return null;if(!0===t&&!n)throw new Error('Docusaurus error: The component groupId prop is required if queryString=true, because this value is used as the search param name. You can also provide an explicit value such as queryString="my-search-param".');return n??null}({queryString:t,groupId:n});return[(0,i.aZ)(o),(0,a.useCallback)((e=>{if(!o)return;const t=new URLSearchParams(r.location.search);t.set(o,e),r.replace({...r.location,search:t.toString()})}),[o,r])]}function m(e){const{defaultValue:t,queryString:n=!1,groupId:r}=e,o=d(e),[l,s]=(0,a.useState)((()=>function(e){let{defaultValue:t,tabValues:n}=e;if(0===n.length)throw new Error("Docusaurus error: the component requires at least one children component");if(t){if(!y({value:t,tabValues:n}))throw new Error(`Docusaurus error: The has a defaultValue "${t}" but none of its children has the corresponding value. Available values are: ${n.map((e=>e.value)).join(", ")}. If you intend to show no default tab, use defaultValue={null} instead.`);return t}const r=n.find((e=>e.default))??n[0];if(!r)throw new Error("Unexpected error: 0 tabValues");return r.value}({defaultValue:t,tabValues:o}))),[i,p]=f({queryString:n,groupId:r}),[u,m]=function(e){let{groupId:t}=e;const n=function(e){return e?`docusaurus.tab.${e}`:null}(t),[r,o]=(0,c.Dv)(n);return[r,(0,a.useCallback)((e=>{n&&o.set(e)}),[n,o])]}({groupId:r}),b=(()=>{const e=i??u;return y({value:e,tabValues:o})?e:null})();(0,a.useLayoutEffect)((()=>{b&&s(b)}),[b]);return{selectedValue:l,selectValue:(0,a.useCallback)((e=>{if(!y({value:e,tabValues:o}))throw new Error(`Can't select invalid tab value=${e}`);s(e),p(e),m(e)}),[p,m,o]),tabValues:o}}var b=n(92303);const g={tabList:"tabList__CuJ",tabItem:"tabItem_LNqP"};function h(e){let{className:t,block:n,selectedValue:s,selectValue:i,tabValues:p}=e;const c=[],{blockElementScrollPositionUntilNextRender:u}=(0,l.a_)(),d=e=>{const t=e.currentTarget,n=c.indexOf(t),r=p[n].value;r!==s&&(u(t),i(r))},y=e=>{let t=null;switch(e.key){case"Enter":d(e);break;case"ArrowRight":{const n=c.indexOf(e.currentTarget)+1;t=c[n]??c[0];break}case"ArrowLeft":{const n=c.indexOf(e.currentTarget)-1;t=c[n]??c[c.length-1];break}}t?.focus()};return a.createElement("ul",{role:"tablist","aria-orientation":"horizontal",className:(0,o.A)("tabs",{"tabs--block":n},t)},p.map((e=>{let{value:t,label:n,attributes:l}=e;return a.createElement("li",(0,r.A)({role:"tab",tabIndex:s===t?0:-1,"aria-selected":s===t,key:t,ref:e=>c.push(e),onKeyDown:y,onClick:d},l,{className:(0,o.A)("tabs__item",g.tabItem,l?.className,{"tabs__item--active":s===t})}),n??t)})))}function _(e){let{lazy:t,children:n,selectedValue:r}=e;const o=(Array.isArray(n)?n:[n]).filter(Boolean);if(t){const e=o.find((e=>e.props.value===r));return e?(0,a.cloneElement)(e,{className:"margin-top--md"}):null}return a.createElement("div",{className:"margin-top--md"},o.map(((e,t)=>(0,a.cloneElement)(e,{key:t,hidden:e.props.value!==r}))))}function v(e){const t=m(e);return a.createElement("div",{className:(0,o.A)("tabs-container",g.tabList)},a.createElement(h,(0,r.A)({},e,t)),a.createElement(_,(0,r.A)({},e,t)))}function x(e){const t=(0,b.A)();return a.createElement(v,(0,r.A)({key:String(t)},e))}},38035:(e,t,n)=>{n.r(t),n.d(t,{assets:()=>c,contentTitle:()=>i,default:()=>f,frontMatter:()=>s,metadata:()=>p,toc:()=>u});var r=n(58168),a=(n(96540),n(15680)),o=n(11470),l=n(19365);const s={sidebar_position:3,title:"FileOperation",id:"file-operations",description:"Perform file operations on different file systems",tags:["file","dbfs"]},i=void 0,p={unversionedId:"Spark/gems/custom/file-operations",id:"Spark/gems/custom/file-operations",title:"FileOperation",description:"Perform file operations on different file systems",source:"@site/docs/Spark/gems/custom/file-operation.md",sourceDirName:"Spark/gems/custom",slug:"/Spark/gems/custom/file-operations",permalink:"/Spark/gems/custom/file-operations",draft:!1,tags:[{label:"file",permalink:"/tags/file"},{label:"dbfs",permalink:"/tags/dbfs"}],version:"current",sidebarPosition:3,frontMatter:{sidebar_position:3,title:"FileOperation",id:"file-operations",description:"Perform file operations on different file systems",tags:["file","dbfs"]},sidebar:"defaultSidebar",previous:{title:"Script",permalink:"/Spark/gems/custom/script"},next:{title:"DeltaTableOperations",permalink:"/Spark/gems/custom/delta-ops"}},c={},u=[{value:"Parameters",id:"parameters",level:2},{value:"Examples",id:"examples",level:2},{value:"Copy Single File",id:"copy-single-file",level:3},{value:"Copy All Files From A Directory",id:"copy-all-files-from-a-directory",level:3},{value:"Move Files",id:"move-files",level:3},{value:"S3 - Sync Entire Directory",id:"s3---sync-entire-directory",level:3}],d={toc:u},y="wrapper";function f(e){let{components:t,...n}=e;return(0,a.yg)(y,(0,r.A)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,a.yg)("h3",null,(0,a.yg)("span",{class:"badge rounded-pill text-bg-light"},"Spark Gem")),(0,a.yg)("p",null,"Helps perform file operations like ",(0,a.yg)("inlineCode",{parentName:"p"},"copy")," and ",(0,a.yg)("inlineCode",{parentName:"p"},"move")," on different file systems."),(0,a.yg)("h2",{id:"parameters"},"Parameters"),(0,a.yg)("table",null,(0,a.yg)("thead",{parentName:"table"},(0,a.yg)("tr",{parentName:"thead"},(0,a.yg)("th",{parentName:"tr",align:"left"},"Parameter"),(0,a.yg)("th",{parentName:"tr",align:"left"},"Description"),(0,a.yg)("th",{parentName:"tr",align:"left"},"Required"))),(0,a.yg)("tbody",{parentName:"table"},(0,a.yg)("tr",{parentName:"tbody"},(0,a.yg)("td",{parentName:"tr",align:"left"},"File System"),(0,a.yg)("td",{parentName:"tr",align:"left"},(0,a.yg)("inlineCode",{parentName:"td"},"Local")," - for operations on driver node file system ",(0,a.yg)("br",null)," ",(0,a.yg)("inlineCode",{parentName:"td"},"DBFS")," - for operations on Databricks file system ",(0,a.yg)("br",null)," ",(0,a.yg)("inlineCode",{parentName:"td"},"S3")," - for operations on S3 object store"),(0,a.yg)("td",{parentName:"tr",align:"left"},"True")),(0,a.yg)("tr",{parentName:"tbody"},(0,a.yg)("td",{parentName:"tr",align:"left"},"Operation"),(0,a.yg)("td",{parentName:"tr",align:"left"},"Operation to perform, ",(0,a.yg)("inlineCode",{parentName:"td"},"Copy"),", ",(0,a.yg)("inlineCode",{parentName:"td"},"Move")," or ",(0,a.yg)("inlineCode",{parentName:"td"},"Sync")),(0,a.yg)("td",{parentName:"tr",align:"left"},"True")),(0,a.yg)("tr",{parentName:"tbody"},(0,a.yg)("td",{parentName:"tr",align:"left"},"Filename Regex"),(0,a.yg)("td",{parentName:"tr",align:"left"},"Regex to Filter File Names Eg: stdlog.","*",".","txt"),(0,a.yg)("td",{parentName:"tr",align:"left"},"False")),(0,a.yg)("tr",{parentName:"tbody"},(0,a.yg)("td",{parentName:"tr",align:"left"},"Ignore empty files"),(0,a.yg)("td",{parentName:"tr",align:"left"},"Ignore if file size is empty (Size of file is 0 bytes)"),(0,a.yg)("td",{parentName:"tr",align:"left"},"False")),(0,a.yg)("tr",{parentName:"tbody"},(0,a.yg)("td",{parentName:"tr",align:"left"},"Recurse"),(0,a.yg)("td",{parentName:"tr",align:"left"},"Boolean for performing ",(0,a.yg)("inlineCode",{parentName:"td"},"Operation")," recursively. Default is ",(0,a.yg)("inlineCode",{parentName:"td"},"False")),(0,a.yg)("td",{parentName:"tr",align:"left"},"False")),(0,a.yg)("tr",{parentName:"tbody"},(0,a.yg)("td",{parentName:"tr",align:"left"},"Source Path"),(0,a.yg)("td",{parentName:"tr",align:"left"},"Path of source file/directory. ",(0,a.yg)("br",null),"Eg: /dbfs/source_file.txt, dbfs:/source_file.txt, s3://source_bucket/source_prefix/filename.txt"),(0,a.yg)("td",{parentName:"tr",align:"left"},"True")),(0,a.yg)("tr",{parentName:"tbody"},(0,a.yg)("td",{parentName:"tr",align:"left"},"Destination Path"),(0,a.yg)("td",{parentName:"tr",align:"left"},"Path of destination file/directory. ",(0,a.yg)("br",null)," Eg: /dbfs/target_file.txt, dbfs:/target_file.txt, s3://target_bucket/target_prefix/filename.txt"),(0,a.yg)("td",{parentName:"tr",align:"left"},"True")))),(0,a.yg)("admonition",{type:"info"},(0,a.yg)("p",{parentName:"admonition"},"You can perform operations on DBFS files using ",(0,a.yg)("inlineCode",{parentName:"p"},"Local")," file system too by providing path under ",(0,a.yg)("inlineCode",{parentName:"p"},"/dbfs"),"!",(0,a.yg)("br",null)," This is because Databricks uses a FUSE mount to provide local access to the files stored in the cloud. A FUSE mount is a secure, virtual filesystem.")),(0,a.yg)("h2",{id:"examples"},"Examples"),(0,a.yg)("hr",null),(0,a.yg)("h3",{id:"copy-single-file"},"Copy Single File"),(0,a.yg)("div",{class:"wistia_responsive_padding",style:{padding:"56.25% 0 0 0",position:"relative"}},(0,a.yg)("div",{class:"wistia_responsive_wrapper",style:{height:"100%",left:0,position:"absolute",top:0,width:"100%"}},(0,a.yg)("iframe",{src:"https://github.com/SimpleDataLabsInc/prophecy-docs/assets/130362885/6db06ea9-27ef-4833-a837-a49adf3ff2c6",title:"File Copy single file",allow:"autoplay;fullscreen",allowtransparency:"true",frameborder:"0",scrolling:"no",class:"wistia_embed",name:"wistia_embed",msallowfullscreen:!0,width:"100%",height:"100%"}))),(0,a.yg)(o.A,{mdxType:"Tabs"},(0,a.yg)(l.A,{value:"py",label:"DBFS",mdxType:"TabItem"},(0,a.yg)("pre",null,(0,a.yg)("code",{parentName:"pre",className:"language-py"},'def copy_file(spark: SparkSession):\n from pyspark.dbutils import DBUtils\n DBUtils(spark).fs.cp(\n "dbfs:/Prophecy/example/source/person.json",\n "dbfs:/Prophecy/example/target/person.json",\n recurse = False\n )\n'))),(0,a.yg)(l.A,{value:"py2",label:"Local",mdxType:"TabItem"},(0,a.yg)("pre",null,(0,a.yg)("code",{parentName:"pre",className:"language-py"},'def copy_file(spark: SparkSession):\n import os\n import shutil\n shutil.copy2("/dbfs/Prophecy/example/source/person.json",\n "/dbfs/Prophecy/example/target/person.json")\n'))),(0,a.yg)(l.A,{value:"py3",label:"S3",mdxType:"TabItem"},(0,a.yg)("pre",null,(0,a.yg)("code",{parentName:"pre",className:"language-py"},"def copy_file(spark: SparkSession):\n for obj in boto3.client(\"s3\").list_objects_v2(Bucket = src_bucket, Prefix = src_url.path.lstrip('/'))['Contents']:\n new_dest_prefix = re.sub(src_prefix, dest_prefix, obj['Key'], 1)\n\n if (\n (\n mode in [\"copy\", \"move\"]\n and not obj['Key'].endswith(\"/\")\n )\n or (\n not obj['Key'].endswith(\"/\")\n and mode == \"sync\"\n and re.sub(src_prefix, dest_prefix, obj['Key'], 1) not in dest_files\n )\n ):\n\n if (\n (\n bool(ignoreEmptyFiles) == True\n and (\n s3.head_object(Bucket=src_bucket, Key=obj['Key'])['ContentLength']\n == 0\n )\n )\n or (\n bool(fileRegex)\n and fileRegex != \"\"\n and not bool(\n re.compile(fileRegex).match(obj['Key'].split('/')[- 1])\n )\n )\n ):\n continue\n\n s3.copy(\n {'Bucket' : src_bucket, 'Key' : obj['Key']},\n dest_bucket,\n re.sub(src_prefix, dest_prefix, obj['Key'], 1)\n )\n\n if props.operation == \"move\":\n s3.delete_object(Bucket = src_bucket, Key = obj['Key'])\n\n")))),(0,a.yg)("hr",null),(0,a.yg)("h3",{id:"copy-all-files-from-a-directory"},"Copy All Files From A Directory"),(0,a.yg)("div",{class:"wistia_responsive_padding",style:{padding:"56.25% 0 0 0",position:"relative"}},(0,a.yg)("div",{class:"wistia_responsive_wrapper",style:{height:"100%",left:0,position:"absolute",top:0,width:"100%"}},(0,a.yg)("iframe",{src:"https://github.com/SimpleDataLabsInc/prophecy-docs/assets/130362885/107a8195-e76a-48ab-900f-28e07b7798ed",title:"File Copy a directory",allow:"autoplay;fullscreen",allowtransparency:"true",frameborder:"0",scrolling:"no",class:"wistia_embed",name:"wistia_embed",msallowfullscreen:!0,width:"100%",height:"100%"}))),(0,a.yg)(o.A,{mdxType:"Tabs"},(0,a.yg)(l.A,{value:"py",label:"DBFS",mdxType:"TabItem"},(0,a.yg)("pre",null,(0,a.yg)("code",{parentName:"pre",className:"language-py"},'def copy_file(spark: SparkSession):\n from pyspark.dbutils import DBUtils\n DBUtils(spark).fs.cp(\n "dbfs:/Prophecy/example/source/",\n "dbfs:/Prophecy/example/target/",\n recurse = True\n )\n'))),(0,a.yg)(l.A,{value:"py2",label:"Local",mdxType:"TabItem"},(0,a.yg)("pre",null,(0,a.yg)("code",{parentName:"pre",className:"language-py"},'def copy_file(spark: SparkSession):\n import os\n import shutil\n shutil.copytree(\n "/dbfs/Prophecy/example/source/",\n "/dbfs/Prophecy/example/target/",\n copy_function = shutil.copy2,\n dirs_exist_ok = True\n )\n'))),(0,a.yg)(l.A,{value:"py3",label:"S3",mdxType:"TabItem"},(0,a.yg)("pre",null,(0,a.yg)("code",{parentName:"pre",className:"language-py"},"def copy_file(spark: SparkSession):\n for obj in boto3.client(\"s3\").list_objects_v2(Bucket = src_bucket, Prefix = src_url.path.lstrip('/'))['Contents']:\n new_dest_prefix = re.sub(src_prefix, dest_prefix, obj['Key'], 1)\n\n if (\n (\n mode in [\"copy\", \"move\"]\n and not obj['Key'].endswith(\"/\")\n )\n or (\n not obj['Key'].endswith(\"/\")\n and mode == \"sync\"\n and re.sub(src_prefix, dest_prefix, obj['Key'], 1) not in dest_files\n )\n ):\n\n if (\n (\n bool(ignoreEmptyFiles) == True\n and (\n s3.head_object(Bucket=src_bucket, Key=obj['Key'])['ContentLength']\n == 0\n )\n )\n or (\n bool(fileRegex)\n and fileRegex != \"\"\n and not bool(\n re.compile(fileRegex).match(obj['Key'].split('/')[- 1])\n )\n )\n ):\n continue\n\n s3.copy(\n {'Bucket' : src_bucket, 'Key' : obj['Key']},\n dest_bucket,\n re.sub(src_prefix, dest_prefix, obj['Key'], 1)\n )\n\n if props.operation == \"move\":\n s3.delete_object(Bucket = src_bucket, Key = obj['Key'])\n\n")))),(0,a.yg)("hr",null),(0,a.yg)("h3",{id:"move-files"},"Move Files"),(0,a.yg)("div",{class:"wistia_responsive_padding",style:{padding:"56.25% 0 0 0",position:"relative"}},(0,a.yg)("div",{class:"wistia_responsive_wrapper",style:{height:"100%",left:0,position:"absolute",top:0,width:"100%"}},(0,a.yg)("iframe",{src:"https://github.com/SimpleDataLabsInc/prophecy-docs/assets/130362885/6bbd4a4e-2b6a-4cf6-bb07-0712f6720650",title:"Move File",allow:"autoplay;fullscreen",allowtransparency:"true",frameborder:"0",scrolling:"no",class:"wistia_embed",name:"wistia_embed",msallowfullscreen:!0,width:"100%",height:"100%"}))),(0,a.yg)(o.A,{mdxType:"Tabs"},(0,a.yg)(l.A,{value:"py",label:"DBFS",mdxType:"TabItem"},(0,a.yg)("pre",null,(0,a.yg)("code",{parentName:"pre",className:"language-py"},'def move_file(spark: SparkSession):\n from pyspark.dbutils import DBUtils\n DBUtils(spark).fs.mv("dbfs:/Prophecy/example/source/", "dbfs:/Prophecy/example/target/", recurse = False)\n\n'))),(0,a.yg)(l.A,{value:"py2",label:"Local",mdxType:"TabItem"},(0,a.yg)("pre",null,(0,a.yg)("code",{parentName:"pre",className:"language-py"},'def move_file(spark: SparkSession):\n import os\n import shutil\n shutil.copy2("/Prophecy/example/source/", "/Prophecy/example/target/")\n shutil.rmtree("/Prophecy/example/source/")\n'))),(0,a.yg)(l.A,{value:"py3",label:"S3",mdxType:"TabItem"},(0,a.yg)("pre",null,(0,a.yg)("code",{parentName:"pre",className:"language-py"},"def move_file(spark: SparkSession):\n for obj in boto3.client(\"s3\").list_objects_v2(Bucket = src_bucket, Prefix = src_url.path.lstrip('/'))['Contents']:\n new_dest_prefix = re.sub(src_prefix, dest_prefix, obj['Key'], 1)\n\n if (\n (\n mode in [\"copy\", \"move\"]\n and not obj['Key'].endswith(\"/\")\n )\n or (\n not obj['Key'].endswith(\"/\")\n and mode == \"sync\"\n and re.sub(src_prefix, dest_prefix, obj['Key'], 1) not in dest_files\n )\n ):\n\n if (\n (\n bool(ignoreEmptyFiles) == True\n and (\n s3.head_object(Bucket=src_bucket, Key=obj['Key'])['ContentLength']\n == 0\n )\n )\n or (\n bool(fileRegex)\n and fileRegex != \"\"\n and not bool(re.compile(fileRegex).match(obj['Key'].split('/')[- 1]))\n )\n ):\n continue\n\n s3.copy(\n {'Bucket' : src_bucket, 'Key' : obj['Key']},\n dest_bucket,\n re.sub(src_prefix, dest_prefix, obj['Key'], 1)\n )\n\n if mode == \"move\":\n s3.delete_object(Bucket = src_bucket, Key = obj['Key'])\n\n\n")))),(0,a.yg)("hr",null),(0,a.yg)("h3",{id:"s3---sync-entire-directory"},"S3 - Sync Entire Directory"),(0,a.yg)("div",{class:"wistia_responsive_padding",style:{padding:"56.25% 0 0 0",position:"relative"}},(0,a.yg)("div",{class:"wistia_responsive_wrapper",style:{height:"100%",left:0,position:"absolute",top:0,width:"100%"}},(0,a.yg)("iframe",{src:"https://github.com/SimpleDataLabsInc/prophecy-docs/assets/130362885/2e579779-3d61-476d-9f04-38f687c96ebf",title:"S3 File Sync",allow:"autoplay;fullscreen",allowtransparency:"true",frameborder:"0",scrolling:"no",class:"wistia_embed",name:"wistia_embed",msallowfullscreen:!0,width:"100%",height:"100%"}))),(0,a.yg)(o.A,{mdxType:"Tabs"},(0,a.yg)(l.A,{value:"py",label:"S3",mdxType:"TabItem"},(0,a.yg)("pre",null,(0,a.yg)("code",{parentName:"pre",className:"language-python"},"def sync_file(spark: SparkSession):\n dest_files = set(\n [\n f_object['Key'].lstrip('/')\n for f_object in boto3.client(\"s3\").list_objects_v2(Bucket = dest_bucket, Prefix = dest_url.path.lstrip('/'))['Contents']\n if not f_object['Key'].endswith(\"/\")\n ]\n )\n\n for obj in boto3.client(\"s3\").list_objects_v2(Bucket = src_bucket, Prefix = src_url.path.lstrip('/'))['Contents']:\n new_dest_prefix = re.sub(src_prefix, dest_prefix, obj['Key'], 1)\n\n if (\n (\n mode in [\"copy\", \"move\"]\n and not obj['Key'].endswith(\"/\")\n )\n or (\n not obj['Key'].endswith(\"/\")\n and mode == \"sync\"\n and re.sub(src_prefix, dest_prefix, obj['Key'], 1) not in dest_files\n )\n ):\n\n if (\n (\n bool(ignoreEmptyFiles) == True\n and (\n s3.head_object(Bucket=src_bucket, Key=obj['Key'])['ContentLength']\n == 0\n )\n )\n or (\n bool(fileRegex)\n and fileRegex != \"\"\n and not bool(re.compile(fileRegex).match(obj['Key'].split('/')[- 1]))\n )\n ):\n continue\n\n s3.copy(\n {'Bucket' : src_bucket, 'Key' : obj['Key']},\n dest_bucket,\n re.sub(src_prefix, dest_prefix, obj['Key'], 1)\n )\n\n if mode == \"move\":\n s3.delete_object(Bucket = src_bucket, Key = obj['Key'])\n\n")))),(0,a.yg)("hr",null))}f.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/f29eab11.5672147f.js b/assets/js/f29eab11.5672147f.js new file mode 100644 index 0000000000..0dc0d45b8b --- /dev/null +++ b/assets/js/f29eab11.5672147f.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[1634],{15680:(e,t,n)=>{n.d(t,{xA:()=>c,yg:()=>f});var r=n(96540);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function l(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var i=r.createContext({}),p=function(e){var t=r.useContext(i),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},c=function(e){var t=p(e.components);return r.createElement(i.Provider,{value:t},e.children)},u="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},y=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,i=e.parentName,c=s(e,["components","mdxType","originalType","parentName"]),u=p(n),y=a,f=u["".concat(i,".").concat(y)]||u[y]||d[y]||o;return n?r.createElement(f,l(l({ref:t},c),{},{components:n})):r.createElement(f,l({ref:t},c))}));function f(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,l=new Array(o);l[0]=y;var s={};for(var i in t)hasOwnProperty.call(t,i)&&(s[i]=t[i]);s.originalType=e,s[u]="string"==typeof e?e:a,l[1]=s;for(var p=2;p{n.d(t,{A:()=>l});var r=n(96540),a=n(20053);const o={tabItem:"tabItem_Ymn6"};function l(e){let{children:t,hidden:n,className:l}=e;return r.createElement("div",{role:"tabpanel",className:(0,a.A)(o.tabItem,l),hidden:n},t)}},11470:(e,t,n)=>{n.d(t,{A:()=>x});var r=n(58168),a=n(96540),o=n(20053),l=n(23104),s=n(56347),i=n(57485),p=n(31682),c=n(89466);function u(e){return function(e){return a.Children.map(e,(e=>{if(!e||(0,a.isValidElement)(e)&&function(e){const{props:t}=e;return!!t&&"object"==typeof t&&"value"in t}(e))return e;throw new Error(`Docusaurus error: Bad child <${"string"==typeof e.type?e.type:e.type.name}>: all children of the component should be , and every should have a unique "value" prop.`)}))?.filter(Boolean)??[]}(e).map((e=>{let{props:{value:t,label:n,attributes:r,default:a}}=e;return{value:t,label:n,attributes:r,default:a}}))}function d(e){const{values:t,children:n}=e;return(0,a.useMemo)((()=>{const e=t??u(n);return function(e){const t=(0,p.X)(e,((e,t)=>e.value===t.value));if(t.length>0)throw new Error(`Docusaurus error: Duplicate values "${t.map((e=>e.value)).join(", ")}" found in . Every value needs to be unique.`)}(e),e}),[t,n])}function y(e){let{value:t,tabValues:n}=e;return n.some((e=>e.value===t))}function f(e){let{queryString:t=!1,groupId:n}=e;const r=(0,s.W6)(),o=function(e){let{queryString:t=!1,groupId:n}=e;if("string"==typeof t)return t;if(!1===t)return null;if(!0===t&&!n)throw new Error('Docusaurus error: The component groupId prop is required if queryString=true, because this value is used as the search param name. You can also provide an explicit value such as queryString="my-search-param".');return n??null}({queryString:t,groupId:n});return[(0,i.aZ)(o),(0,a.useCallback)((e=>{if(!o)return;const t=new URLSearchParams(r.location.search);t.set(o,e),r.replace({...r.location,search:t.toString()})}),[o,r])]}function m(e){const{defaultValue:t,queryString:n=!1,groupId:r}=e,o=d(e),[l,s]=(0,a.useState)((()=>function(e){let{defaultValue:t,tabValues:n}=e;if(0===n.length)throw new Error("Docusaurus error: the component requires at least one children component");if(t){if(!y({value:t,tabValues:n}))throw new Error(`Docusaurus error: The has a defaultValue "${t}" but none of its children has the corresponding value. Available values are: ${n.map((e=>e.value)).join(", ")}. If you intend to show no default tab, use defaultValue={null} instead.`);return t}const r=n.find((e=>e.default))??n[0];if(!r)throw new Error("Unexpected error: 0 tabValues");return r.value}({defaultValue:t,tabValues:o}))),[i,p]=f({queryString:n,groupId:r}),[u,m]=function(e){let{groupId:t}=e;const n=function(e){return e?`docusaurus.tab.${e}`:null}(t),[r,o]=(0,c.Dv)(n);return[r,(0,a.useCallback)((e=>{n&&o.set(e)}),[n,o])]}({groupId:r}),b=(()=>{const e=i??u;return y({value:e,tabValues:o})?e:null})();(0,a.useLayoutEffect)((()=>{b&&s(b)}),[b]);return{selectedValue:l,selectValue:(0,a.useCallback)((e=>{if(!y({value:e,tabValues:o}))throw new Error(`Can't select invalid tab value=${e}`);s(e),p(e),m(e)}),[p,m,o]),tabValues:o}}var b=n(92303);const g={tabList:"tabList__CuJ",tabItem:"tabItem_LNqP"};function h(e){let{className:t,block:n,selectedValue:s,selectValue:i,tabValues:p}=e;const c=[],{blockElementScrollPositionUntilNextRender:u}=(0,l.a_)(),d=e=>{const t=e.currentTarget,n=c.indexOf(t),r=p[n].value;r!==s&&(u(t),i(r))},y=e=>{let t=null;switch(e.key){case"Enter":d(e);break;case"ArrowRight":{const n=c.indexOf(e.currentTarget)+1;t=c[n]??c[0];break}case"ArrowLeft":{const n=c.indexOf(e.currentTarget)-1;t=c[n]??c[c.length-1];break}}t?.focus()};return a.createElement("ul",{role:"tablist","aria-orientation":"horizontal",className:(0,o.A)("tabs",{"tabs--block":n},t)},p.map((e=>{let{value:t,label:n,attributes:l}=e;return a.createElement("li",(0,r.A)({role:"tab",tabIndex:s===t?0:-1,"aria-selected":s===t,key:t,ref:e=>c.push(e),onKeyDown:y,onClick:d},l,{className:(0,o.A)("tabs__item",g.tabItem,l?.className,{"tabs__item--active":s===t})}),n??t)})))}function _(e){let{lazy:t,children:n,selectedValue:r}=e;const o=(Array.isArray(n)?n:[n]).filter(Boolean);if(t){const e=o.find((e=>e.props.value===r));return e?(0,a.cloneElement)(e,{className:"margin-top--md"}):null}return a.createElement("div",{className:"margin-top--md"},o.map(((e,t)=>(0,a.cloneElement)(e,{key:t,hidden:e.props.value!==r}))))}function v(e){const t=m(e);return a.createElement("div",{className:(0,o.A)("tabs-container",g.tabList)},a.createElement(h,(0,r.A)({},e,t)),a.createElement(_,(0,r.A)({},e,t)))}function x(e){const t=(0,b.A)();return a.createElement(v,(0,r.A)({key:String(t)},e))}},38035:(e,t,n)=>{n.r(t),n.d(t,{assets:()=>c,contentTitle:()=>i,default:()=>f,frontMatter:()=>s,metadata:()=>p,toc:()=>u});var r=n(58168),a=(n(96540),n(15680)),o=n(11470),l=n(19365);const s={sidebar_position:3,title:"FileOperation",id:"file-operations",description:"Perform file operations on different file systems",tags:["file","dbfs"]},i=void 0,p={unversionedId:"Spark/gems/custom/file-operations",id:"Spark/gems/custom/file-operations",title:"FileOperation",description:"Perform file operations on different file systems",source:"@site/docs/Spark/gems/custom/file-operation.md",sourceDirName:"Spark/gems/custom",slug:"/Spark/gems/custom/file-operations",permalink:"/Spark/gems/custom/file-operations",draft:!1,tags:[{label:"file",permalink:"/tags/file"},{label:"dbfs",permalink:"/tags/dbfs"}],version:"current",sidebarPosition:3,frontMatter:{sidebar_position:3,title:"FileOperation",id:"file-operations",description:"Perform file operations on different file systems",tags:["file","dbfs"]},sidebar:"defaultSidebar",previous:{title:"Script",permalink:"/Spark/gems/custom/script"},next:{title:"DeltaTableOperations",permalink:"/Spark/gems/custom/delta-ops"}},c={},u=[{value:"Parameters",id:"parameters",level:2},{value:"Examples",id:"examples",level:2},{value:"Copy Single File",id:"copy-single-file",level:3},{value:"Copy All Files From A Directory",id:"copy-all-files-from-a-directory",level:3},{value:"Move Files",id:"move-files",level:3},{value:"S3 - Sync Entire Directory",id:"s3---sync-entire-directory",level:3}],d={toc:u},y="wrapper";function f(e){let{components:t,...n}=e;return(0,a.yg)(y,(0,r.A)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,a.yg)("h3",null,(0,a.yg)("span",{class:"badge"},"Spark Gem")),(0,a.yg)("p",null,"Helps perform file operations like ",(0,a.yg)("inlineCode",{parentName:"p"},"copy")," and ",(0,a.yg)("inlineCode",{parentName:"p"},"move")," on different file systems."),(0,a.yg)("h2",{id:"parameters"},"Parameters"),(0,a.yg)("table",null,(0,a.yg)("thead",{parentName:"table"},(0,a.yg)("tr",{parentName:"thead"},(0,a.yg)("th",{parentName:"tr",align:"left"},"Parameter"),(0,a.yg)("th",{parentName:"tr",align:"left"},"Description"),(0,a.yg)("th",{parentName:"tr",align:"left"},"Required"))),(0,a.yg)("tbody",{parentName:"table"},(0,a.yg)("tr",{parentName:"tbody"},(0,a.yg)("td",{parentName:"tr",align:"left"},"File System"),(0,a.yg)("td",{parentName:"tr",align:"left"},(0,a.yg)("inlineCode",{parentName:"td"},"Local")," - for operations on driver node file system ",(0,a.yg)("br",null)," ",(0,a.yg)("inlineCode",{parentName:"td"},"DBFS")," - for operations on Databricks file system ",(0,a.yg)("br",null)," ",(0,a.yg)("inlineCode",{parentName:"td"},"S3")," - for operations on S3 object store"),(0,a.yg)("td",{parentName:"tr",align:"left"},"True")),(0,a.yg)("tr",{parentName:"tbody"},(0,a.yg)("td",{parentName:"tr",align:"left"},"Operation"),(0,a.yg)("td",{parentName:"tr",align:"left"},"Operation to perform, ",(0,a.yg)("inlineCode",{parentName:"td"},"Copy"),", ",(0,a.yg)("inlineCode",{parentName:"td"},"Move")," or ",(0,a.yg)("inlineCode",{parentName:"td"},"Sync")),(0,a.yg)("td",{parentName:"tr",align:"left"},"True")),(0,a.yg)("tr",{parentName:"tbody"},(0,a.yg)("td",{parentName:"tr",align:"left"},"Filename Regex"),(0,a.yg)("td",{parentName:"tr",align:"left"},"Regex to Filter File Names Eg: stdlog.","*",".","txt"),(0,a.yg)("td",{parentName:"tr",align:"left"},"False")),(0,a.yg)("tr",{parentName:"tbody"},(0,a.yg)("td",{parentName:"tr",align:"left"},"Ignore empty files"),(0,a.yg)("td",{parentName:"tr",align:"left"},"Ignore if file size is empty (Size of file is 0 bytes)"),(0,a.yg)("td",{parentName:"tr",align:"left"},"False")),(0,a.yg)("tr",{parentName:"tbody"},(0,a.yg)("td",{parentName:"tr",align:"left"},"Recurse"),(0,a.yg)("td",{parentName:"tr",align:"left"},"Boolean for performing ",(0,a.yg)("inlineCode",{parentName:"td"},"Operation")," recursively. Default is ",(0,a.yg)("inlineCode",{parentName:"td"},"False")),(0,a.yg)("td",{parentName:"tr",align:"left"},"False")),(0,a.yg)("tr",{parentName:"tbody"},(0,a.yg)("td",{parentName:"tr",align:"left"},"Source Path"),(0,a.yg)("td",{parentName:"tr",align:"left"},"Path of source file/directory. ",(0,a.yg)("br",null),"Eg: /dbfs/source_file.txt, dbfs:/source_file.txt, s3://source_bucket/source_prefix/filename.txt"),(0,a.yg)("td",{parentName:"tr",align:"left"},"True")),(0,a.yg)("tr",{parentName:"tbody"},(0,a.yg)("td",{parentName:"tr",align:"left"},"Destination Path"),(0,a.yg)("td",{parentName:"tr",align:"left"},"Path of destination file/directory. ",(0,a.yg)("br",null)," Eg: /dbfs/target_file.txt, dbfs:/target_file.txt, s3://target_bucket/target_prefix/filename.txt"),(0,a.yg)("td",{parentName:"tr",align:"left"},"True")))),(0,a.yg)("admonition",{type:"info"},(0,a.yg)("p",{parentName:"admonition"},"You can perform operations on DBFS files using ",(0,a.yg)("inlineCode",{parentName:"p"},"Local")," file system too by providing path under ",(0,a.yg)("inlineCode",{parentName:"p"},"/dbfs"),"!",(0,a.yg)("br",null)," This is because Databricks uses a FUSE mount to provide local access to the files stored in the cloud. A FUSE mount is a secure, virtual filesystem.")),(0,a.yg)("h2",{id:"examples"},"Examples"),(0,a.yg)("hr",null),(0,a.yg)("h3",{id:"copy-single-file"},"Copy Single File"),(0,a.yg)("div",{class:"wistia_responsive_padding",style:{padding:"56.25% 0 0 0",position:"relative"}},(0,a.yg)("div",{class:"wistia_responsive_wrapper",style:{height:"100%",left:0,position:"absolute",top:0,width:"100%"}},(0,a.yg)("iframe",{src:"https://github.com/SimpleDataLabsInc/prophecy-docs/assets/130362885/6db06ea9-27ef-4833-a837-a49adf3ff2c6",title:"File Copy single file",allow:"autoplay;fullscreen",allowtransparency:"true",frameborder:"0",scrolling:"no",class:"wistia_embed",name:"wistia_embed",msallowfullscreen:!0,width:"100%",height:"100%"}))),(0,a.yg)(o.A,{mdxType:"Tabs"},(0,a.yg)(l.A,{value:"py",label:"DBFS",mdxType:"TabItem"},(0,a.yg)("pre",null,(0,a.yg)("code",{parentName:"pre",className:"language-py"},'def copy_file(spark: SparkSession):\n from pyspark.dbutils import DBUtils\n DBUtils(spark).fs.cp(\n "dbfs:/Prophecy/example/source/person.json",\n "dbfs:/Prophecy/example/target/person.json",\n recurse = False\n )\n'))),(0,a.yg)(l.A,{value:"py2",label:"Local",mdxType:"TabItem"},(0,a.yg)("pre",null,(0,a.yg)("code",{parentName:"pre",className:"language-py"},'def copy_file(spark: SparkSession):\n import os\n import shutil\n shutil.copy2("/dbfs/Prophecy/example/source/person.json",\n "/dbfs/Prophecy/example/target/person.json")\n'))),(0,a.yg)(l.A,{value:"py3",label:"S3",mdxType:"TabItem"},(0,a.yg)("pre",null,(0,a.yg)("code",{parentName:"pre",className:"language-py"},"def copy_file(spark: SparkSession):\n for obj in boto3.client(\"s3\").list_objects_v2(Bucket = src_bucket, Prefix = src_url.path.lstrip('/'))['Contents']:\n new_dest_prefix = re.sub(src_prefix, dest_prefix, obj['Key'], 1)\n\n if (\n (\n mode in [\"copy\", \"move\"]\n and not obj['Key'].endswith(\"/\")\n )\n or (\n not obj['Key'].endswith(\"/\")\n and mode == \"sync\"\n and re.sub(src_prefix, dest_prefix, obj['Key'], 1) not in dest_files\n )\n ):\n\n if (\n (\n bool(ignoreEmptyFiles) == True\n and (\n s3.head_object(Bucket=src_bucket, Key=obj['Key'])['ContentLength']\n == 0\n )\n )\n or (\n bool(fileRegex)\n and fileRegex != \"\"\n and not bool(\n re.compile(fileRegex).match(obj['Key'].split('/')[- 1])\n )\n )\n ):\n continue\n\n s3.copy(\n {'Bucket' : src_bucket, 'Key' : obj['Key']},\n dest_bucket,\n re.sub(src_prefix, dest_prefix, obj['Key'], 1)\n )\n\n if props.operation == \"move\":\n s3.delete_object(Bucket = src_bucket, Key = obj['Key'])\n\n")))),(0,a.yg)("hr",null),(0,a.yg)("h3",{id:"copy-all-files-from-a-directory"},"Copy All Files From A Directory"),(0,a.yg)("div",{class:"wistia_responsive_padding",style:{padding:"56.25% 0 0 0",position:"relative"}},(0,a.yg)("div",{class:"wistia_responsive_wrapper",style:{height:"100%",left:0,position:"absolute",top:0,width:"100%"}},(0,a.yg)("iframe",{src:"https://github.com/SimpleDataLabsInc/prophecy-docs/assets/130362885/107a8195-e76a-48ab-900f-28e07b7798ed",title:"File Copy a directory",allow:"autoplay;fullscreen",allowtransparency:"true",frameborder:"0",scrolling:"no",class:"wistia_embed",name:"wistia_embed",msallowfullscreen:!0,width:"100%",height:"100%"}))),(0,a.yg)(o.A,{mdxType:"Tabs"},(0,a.yg)(l.A,{value:"py",label:"DBFS",mdxType:"TabItem"},(0,a.yg)("pre",null,(0,a.yg)("code",{parentName:"pre",className:"language-py"},'def copy_file(spark: SparkSession):\n from pyspark.dbutils import DBUtils\n DBUtils(spark).fs.cp(\n "dbfs:/Prophecy/example/source/",\n "dbfs:/Prophecy/example/target/",\n recurse = True\n )\n'))),(0,a.yg)(l.A,{value:"py2",label:"Local",mdxType:"TabItem"},(0,a.yg)("pre",null,(0,a.yg)("code",{parentName:"pre",className:"language-py"},'def copy_file(spark: SparkSession):\n import os\n import shutil\n shutil.copytree(\n "/dbfs/Prophecy/example/source/",\n "/dbfs/Prophecy/example/target/",\n copy_function = shutil.copy2,\n dirs_exist_ok = True\n )\n'))),(0,a.yg)(l.A,{value:"py3",label:"S3",mdxType:"TabItem"},(0,a.yg)("pre",null,(0,a.yg)("code",{parentName:"pre",className:"language-py"},"def copy_file(spark: SparkSession):\n for obj in boto3.client(\"s3\").list_objects_v2(Bucket = src_bucket, Prefix = src_url.path.lstrip('/'))['Contents']:\n new_dest_prefix = re.sub(src_prefix, dest_prefix, obj['Key'], 1)\n\n if (\n (\n mode in [\"copy\", \"move\"]\n and not obj['Key'].endswith(\"/\")\n )\n or (\n not obj['Key'].endswith(\"/\")\n and mode == \"sync\"\n and re.sub(src_prefix, dest_prefix, obj['Key'], 1) not in dest_files\n )\n ):\n\n if (\n (\n bool(ignoreEmptyFiles) == True\n and (\n s3.head_object(Bucket=src_bucket, Key=obj['Key'])['ContentLength']\n == 0\n )\n )\n or (\n bool(fileRegex)\n and fileRegex != \"\"\n and not bool(\n re.compile(fileRegex).match(obj['Key'].split('/')[- 1])\n )\n )\n ):\n continue\n\n s3.copy(\n {'Bucket' : src_bucket, 'Key' : obj['Key']},\n dest_bucket,\n re.sub(src_prefix, dest_prefix, obj['Key'], 1)\n )\n\n if props.operation == \"move\":\n s3.delete_object(Bucket = src_bucket, Key = obj['Key'])\n\n")))),(0,a.yg)("hr",null),(0,a.yg)("h3",{id:"move-files"},"Move Files"),(0,a.yg)("div",{class:"wistia_responsive_padding",style:{padding:"56.25% 0 0 0",position:"relative"}},(0,a.yg)("div",{class:"wistia_responsive_wrapper",style:{height:"100%",left:0,position:"absolute",top:0,width:"100%"}},(0,a.yg)("iframe",{src:"https://github.com/SimpleDataLabsInc/prophecy-docs/assets/130362885/6bbd4a4e-2b6a-4cf6-bb07-0712f6720650",title:"Move File",allow:"autoplay;fullscreen",allowtransparency:"true",frameborder:"0",scrolling:"no",class:"wistia_embed",name:"wistia_embed",msallowfullscreen:!0,width:"100%",height:"100%"}))),(0,a.yg)(o.A,{mdxType:"Tabs"},(0,a.yg)(l.A,{value:"py",label:"DBFS",mdxType:"TabItem"},(0,a.yg)("pre",null,(0,a.yg)("code",{parentName:"pre",className:"language-py"},'def move_file(spark: SparkSession):\n from pyspark.dbutils import DBUtils\n DBUtils(spark).fs.mv("dbfs:/Prophecy/example/source/", "dbfs:/Prophecy/example/target/", recurse = False)\n\n'))),(0,a.yg)(l.A,{value:"py2",label:"Local",mdxType:"TabItem"},(0,a.yg)("pre",null,(0,a.yg)("code",{parentName:"pre",className:"language-py"},'def move_file(spark: SparkSession):\n import os\n import shutil\n shutil.copy2("/Prophecy/example/source/", "/Prophecy/example/target/")\n shutil.rmtree("/Prophecy/example/source/")\n'))),(0,a.yg)(l.A,{value:"py3",label:"S3",mdxType:"TabItem"},(0,a.yg)("pre",null,(0,a.yg)("code",{parentName:"pre",className:"language-py"},"def move_file(spark: SparkSession):\n for obj in boto3.client(\"s3\").list_objects_v2(Bucket = src_bucket, Prefix = src_url.path.lstrip('/'))['Contents']:\n new_dest_prefix = re.sub(src_prefix, dest_prefix, obj['Key'], 1)\n\n if (\n (\n mode in [\"copy\", \"move\"]\n and not obj['Key'].endswith(\"/\")\n )\n or (\n not obj['Key'].endswith(\"/\")\n and mode == \"sync\"\n and re.sub(src_prefix, dest_prefix, obj['Key'], 1) not in dest_files\n )\n ):\n\n if (\n (\n bool(ignoreEmptyFiles) == True\n and (\n s3.head_object(Bucket=src_bucket, Key=obj['Key'])['ContentLength']\n == 0\n )\n )\n or (\n bool(fileRegex)\n and fileRegex != \"\"\n and not bool(re.compile(fileRegex).match(obj['Key'].split('/')[- 1]))\n )\n ):\n continue\n\n s3.copy(\n {'Bucket' : src_bucket, 'Key' : obj['Key']},\n dest_bucket,\n re.sub(src_prefix, dest_prefix, obj['Key'], 1)\n )\n\n if mode == \"move\":\n s3.delete_object(Bucket = src_bucket, Key = obj['Key'])\n\n\n")))),(0,a.yg)("hr",null),(0,a.yg)("h3",{id:"s3---sync-entire-directory"},"S3 - Sync Entire Directory"),(0,a.yg)("div",{class:"wistia_responsive_padding",style:{padding:"56.25% 0 0 0",position:"relative"}},(0,a.yg)("div",{class:"wistia_responsive_wrapper",style:{height:"100%",left:0,position:"absolute",top:0,width:"100%"}},(0,a.yg)("iframe",{src:"https://github.com/SimpleDataLabsInc/prophecy-docs/assets/130362885/2e579779-3d61-476d-9f04-38f687c96ebf",title:"S3 File Sync",allow:"autoplay;fullscreen",allowtransparency:"true",frameborder:"0",scrolling:"no",class:"wistia_embed",name:"wistia_embed",msallowfullscreen:!0,width:"100%",height:"100%"}))),(0,a.yg)(o.A,{mdxType:"Tabs"},(0,a.yg)(l.A,{value:"py",label:"S3",mdxType:"TabItem"},(0,a.yg)("pre",null,(0,a.yg)("code",{parentName:"pre",className:"language-python"},"def sync_file(spark: SparkSession):\n dest_files = set(\n [\n f_object['Key'].lstrip('/')\n for f_object in boto3.client(\"s3\").list_objects_v2(Bucket = dest_bucket, Prefix = dest_url.path.lstrip('/'))['Contents']\n if not f_object['Key'].endswith(\"/\")\n ]\n )\n\n for obj in boto3.client(\"s3\").list_objects_v2(Bucket = src_bucket, Prefix = src_url.path.lstrip('/'))['Contents']:\n new_dest_prefix = re.sub(src_prefix, dest_prefix, obj['Key'], 1)\n\n if (\n (\n mode in [\"copy\", \"move\"]\n and not obj['Key'].endswith(\"/\")\n )\n or (\n not obj['Key'].endswith(\"/\")\n and mode == \"sync\"\n and re.sub(src_prefix, dest_prefix, obj['Key'], 1) not in dest_files\n )\n ):\n\n if (\n (\n bool(ignoreEmptyFiles) == True\n and (\n s3.head_object(Bucket=src_bucket, Key=obj['Key'])['ContentLength']\n == 0\n )\n )\n or (\n bool(fileRegex)\n and fileRegex != \"\"\n and not bool(re.compile(fileRegex).match(obj['Key'].split('/')[- 1]))\n )\n ):\n continue\n\n s3.copy(\n {'Bucket' : src_bucket, 'Key' : obj['Key']},\n dest_bucket,\n re.sub(src_prefix, dest_prefix, obj['Key'], 1)\n )\n\n if mode == \"move\":\n s3.delete_object(Bucket = src_bucket, Key = obj['Key'])\n\n")))),(0,a.yg)("hr",null))}f.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/f779aca8.5d6c571b.js b/assets/js/f779aca8.5d6c571b.js new file mode 100644 index 0000000000..1ce4074667 --- /dev/null +++ b/assets/js/f779aca8.5d6c571b.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[72661],{15680:(e,t,a)=>{a.d(t,{xA:()=>p,yg:()=>g});var n=a(96540);function r(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function l(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,n)}return a}function i(e){for(var t=1;t=0||(r[a]=e[a]);return r}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(r[a]=e[a])}return r}var s=n.createContext({}),u=function(e){var t=n.useContext(s),a=t;return e&&(a="function"==typeof e?e(t):i(i({},t),e)),a},p=function(e){var t=u(e.components);return n.createElement(s.Provider,{value:t},e.children)},d="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},c=n.forwardRef((function(e,t){var a=e.components,r=e.mdxType,l=e.originalType,s=e.parentName,p=o(e,["components","mdxType","originalType","parentName"]),d=u(a),c=r,g=d["".concat(s,".").concat(c)]||d[c]||m[c]||l;return a?n.createElement(g,i(i({ref:t},p),{},{components:a})):n.createElement(g,i({ref:t},p))}));function g(e,t){var a=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var l=a.length,i=new Array(l);i[0]=c;var o={};for(var s in t)hasOwnProperty.call(t,s)&&(o[s]=t[s]);o.originalType=e,o[d]="string"==typeof e?e:r,i[1]=o;for(var u=2;u{a.d(t,{A:()=>i});var n=a(96540),r=a(20053);const l={tabItem:"tabItem_Ymn6"};function i(e){let{children:t,hidden:a,className:i}=e;return n.createElement("div",{role:"tabpanel",className:(0,r.A)(l.tabItem,i),hidden:a},t)}},11470:(e,t,a)=>{a.d(t,{A:()=>w});var n=a(58168),r=a(96540),l=a(20053),i=a(23104),o=a(56347),s=a(57485),u=a(31682),p=a(89466);function d(e){return function(e){return r.Children.map(e,(e=>{if(!e||(0,r.isValidElement)(e)&&function(e){const{props:t}=e;return!!t&&"object"==typeof t&&"value"in t}(e))return e;throw new Error(`Docusaurus error: Bad child <${"string"==typeof e.type?e.type:e.type.name}>: all children of the component should be , and every should have a unique "value" prop.`)}))?.filter(Boolean)??[]}(e).map((e=>{let{props:{value:t,label:a,attributes:n,default:r}}=e;return{value:t,label:a,attributes:n,default:r}}))}function m(e){const{values:t,children:a}=e;return(0,r.useMemo)((()=>{const e=t??d(a);return function(e){const t=(0,u.X)(e,((e,t)=>e.value===t.value));if(t.length>0)throw new Error(`Docusaurus error: Duplicate values "${t.map((e=>e.value)).join(", ")}" found in . Every value needs to be unique.`)}(e),e}),[t,a])}function c(e){let{value:t,tabValues:a}=e;return a.some((e=>e.value===t))}function g(e){let{queryString:t=!1,groupId:a}=e;const n=(0,o.W6)(),l=function(e){let{queryString:t=!1,groupId:a}=e;if("string"==typeof t)return t;if(!1===t)return null;if(!0===t&&!a)throw new Error('Docusaurus error: The component groupId prop is required if queryString=true, because this value is used as the search param name. You can also provide an explicit value such as queryString="my-search-param".');return a??null}({queryString:t,groupId:a});return[(0,s.aZ)(l),(0,r.useCallback)((e=>{if(!l)return;const t=new URLSearchParams(n.location.search);t.set(l,e),n.replace({...n.location,search:t.toString()})}),[l,n])]}function y(e){const{defaultValue:t,queryString:a=!1,groupId:n}=e,l=m(e),[i,o]=(0,r.useState)((()=>function(e){let{defaultValue:t,tabValues:a}=e;if(0===a.length)throw new Error("Docusaurus error: the component requires at least one children component");if(t){if(!c({value:t,tabValues:a}))throw new Error(`Docusaurus error: The has a defaultValue "${t}" but none of its children has the corresponding value. Available values are: ${a.map((e=>e.value)).join(", ")}. If you intend to show no default tab, use defaultValue={null} instead.`);return t}const n=a.find((e=>e.default))??a[0];if(!n)throw new Error("Unexpected error: 0 tabValues");return n.value}({defaultValue:t,tabValues:l}))),[s,u]=g({queryString:a,groupId:n}),[d,y]=function(e){let{groupId:t}=e;const a=function(e){return e?`docusaurus.tab.${e}`:null}(t),[n,l]=(0,p.Dv)(a);return[n,(0,r.useCallback)((e=>{a&&l.set(e)}),[a,l])]}({groupId:n}),f=(()=>{const e=s??d;return c({value:e,tabValues:l})?e:null})();(0,r.useLayoutEffect)((()=>{f&&o(f)}),[f]);return{selectedValue:i,selectValue:(0,r.useCallback)((e=>{if(!c({value:e,tabValues:l}))throw new Error(`Can't select invalid tab value=${e}`);o(e),u(e),y(e)}),[u,y,l]),tabValues:l}}var f=a(92303);const h={tabList:"tabList__CuJ",tabItem:"tabItem_LNqP"};function b(e){let{className:t,block:a,selectedValue:o,selectValue:s,tabValues:u}=e;const p=[],{blockElementScrollPositionUntilNextRender:d}=(0,i.a_)(),m=e=>{const t=e.currentTarget,a=p.indexOf(t),n=u[a].value;n!==o&&(d(t),s(n))},c=e=>{let t=null;switch(e.key){case"Enter":m(e);break;case"ArrowRight":{const a=p.indexOf(e.currentTarget)+1;t=p[a]??p[0];break}case"ArrowLeft":{const a=p.indexOf(e.currentTarget)-1;t=p[a]??p[p.length-1];break}}t?.focus()};return r.createElement("ul",{role:"tablist","aria-orientation":"horizontal",className:(0,l.A)("tabs",{"tabs--block":a},t)},u.map((e=>{let{value:t,label:a,attributes:i}=e;return r.createElement("li",(0,n.A)({role:"tab",tabIndex:o===t?0:-1,"aria-selected":o===t,key:t,ref:e=>p.push(e),onKeyDown:c,onClick:m},i,{className:(0,l.A)("tabs__item",h.tabItem,i?.className,{"tabs__item--active":o===t})}),a??t)})))}function N(e){let{lazy:t,children:a,selectedValue:n}=e;const l=(Array.isArray(a)?a:[a]).filter(Boolean);if(t){const e=l.find((e=>e.props.value===n));return e?(0,r.cloneElement)(e,{className:"margin-top--md"}):null}return r.createElement("div",{className:"margin-top--md"},l.map(((e,t)=>(0,r.cloneElement)(e,{key:t,hidden:e.props.value!==n}))))}function v(e){const t=y(e);return r.createElement("div",{className:(0,l.A)("tabs-container",h.tabList)},r.createElement(b,(0,n.A)({},e,t)),r.createElement(N,(0,n.A)({},e,t)))}function w(e){const t=(0,f.A)();return r.createElement(v,(0,n.A)({key:String(t)},e))}},80350:(e,t,a)=>{a.r(t),a.d(t,{assets:()=>p,contentTitle:()=>s,default:()=>g,frontMatter:()=>o,metadata:()=>u,toc:()=>d});var n=a(58168),r=(a(96540),a(15680)),l=a(11470),i=a(19365);const o={sidebar_position:5,title:"RestAPIEnrich",id:"rest-api-enrich",description:"Enrich DataFrame with content from rest API response based on configuration",tags:["gems","api","custom","rest"]},s=void 0,u={unversionedId:"Spark/gems/custom/rest-api-enrich",id:"Spark/gems/custom/rest-api-enrich",title:"RestAPIEnrich",description:"Enrich DataFrame with content from rest API response based on configuration",source:"@site/docs/Spark/gems/custom/rest-api-enrich.md",sourceDirName:"Spark/gems/custom",slug:"/Spark/gems/custom/rest-api-enrich",permalink:"/Spark/gems/custom/rest-api-enrich",draft:!1,tags:[{label:"gems",permalink:"/tags/gems"},{label:"api",permalink:"/tags/api"},{label:"custom",permalink:"/tags/custom"},{label:"rest",permalink:"/tags/rest"}],version:"current",sidebarPosition:5,frontMatter:{sidebar_position:5,title:"RestAPIEnrich",id:"rest-api-enrich",description:"Enrich DataFrame with content from rest API response based on configuration",tags:["gems","api","custom","rest"]},sidebar:"defaultSidebar",previous:{title:"DeltaTableOperations",permalink:"/Spark/gems/custom/delta-ops"},next:{title:"Machine Learning",permalink:"/Spark/gems/machine-learning/"}},p={},d=[{value:"Parameters",id:"parameters",level:3},{value:"Example 1",id:"example-1",level:3},{value:"Example 2",id:"example-2",level:3},{value:"Generated Code",id:"generated-code",level:4}],m={toc:d},c="wrapper";function g(e){let{components:t,...a}=e;return(0,r.yg)(c,(0,n.A)({},m,a,{components:t,mdxType:"MDXLayout"}),(0,r.yg)("h3",null,(0,r.yg)("span",{class:"badge"},"Spark Gem")),(0,r.yg)("p",null,"Enriches the DataFrame by adding column(s) with content from REST API output based on the given configuration."),(0,r.yg)("h3",{id:"parameters"},"Parameters"),(0,r.yg)("p",null,"Each property can either be set as a static value or a value from an existing column of the input DataFrame. Please refer\nto the examples in the description column of each parameter for reference on how the string value should be formed."),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:null},"Parameter"),(0,r.yg)("th",{parentName:"tr",align:null},"Description"),(0,r.yg)("th",{parentName:"tr",align:null},"Required"),(0,r.yg)("th",{parentName:"tr",align:null},"Default"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"method"),(0,r.yg)("td",{parentName:"tr",align:null},"method for the new Request object: ",(0,r.yg)("inlineCode",{parentName:"td"},"GET"),", ",(0,r.yg)("inlineCode",{parentName:"td"},"OPTIONS"),", ",(0,r.yg)("inlineCode",{parentName:"td"},"HEAD"),", ",(0,r.yg)("inlineCode",{parentName:"td"},"POST"),", ",(0,r.yg)("inlineCode",{parentName:"td"},"PUT"),", ",(0,r.yg)("inlineCode",{parentName:"td"},"PATCH"),", or ",(0,r.yg)("inlineCode",{parentName:"td"},"DELETE"),"."),(0,r.yg)("td",{parentName:"tr",align:null},"true"),(0,r.yg)("td",{parentName:"tr",align:null})),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"url"),(0,r.yg)("td",{parentName:"tr",align:null},"URL for the REST API."),(0,r.yg)("td",{parentName:"tr",align:null},"true"),(0,r.yg)("td",{parentName:"tr",align:null})),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"params"),(0,r.yg)("td",{parentName:"tr",align:null},"Dictionary, list of tuples or bytes to send in the query string for the Request. eg: ",(0,r.yg)("inlineCode",{parentName:"td"},'{"key1":"value1", "key2": value2, "key3": ["value1", "value2"]}')),(0,r.yg)("td",{parentName:"tr",align:null},"false"),(0,r.yg)("td",{parentName:"tr",align:null})),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"data"),(0,r.yg)("td",{parentName:"tr",align:null},"Dictionary to send in the body of the Request. eg: ",(0,r.yg)("inlineCode",{parentName:"td"},'{"key1":"value1", "key2": value2}')),(0,r.yg)("td",{parentName:"tr",align:null},"false"),(0,r.yg)("td",{parentName:"tr",align:null})),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"JSON"),(0,r.yg)("td",{parentName:"tr",align:null},"A JSON serializable Python object to send in the body of the Request. eg: ",(0,r.yg)("inlineCode",{parentName:"td"},'{"key1":"value1", "key2": value2}')),(0,r.yg)("td",{parentName:"tr",align:null},"false"),(0,r.yg)("td",{parentName:"tr",align:null})),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"headers"),(0,r.yg)("td",{parentName:"tr",align:null},"Dictionary of HTTP Headers to send with the Request. eg: ",(0,r.yg)("inlineCode",{parentName:"td"},'{"key1":"value1", "key2": "value2"}')),(0,r.yg)("td",{parentName:"tr",align:null},"false"),(0,r.yg)("td",{parentName:"tr",align:null})),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"cookies"),(0,r.yg)("td",{parentName:"tr",align:null},"Dictionary to send with the Request. eg: ",(0,r.yg)("inlineCode",{parentName:"td"},'{"key1":"value1", "key2": "value2"}')),(0,r.yg)("td",{parentName:"tr",align:null},"false"),(0,r.yg)("td",{parentName:"tr",align:null})),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"auth"),(0,r.yg)("td",{parentName:"tr",align:null},"Auth tuple to enable Basic/Digest/Custom HTTP Auth. eg: ",(0,r.yg)("inlineCode",{parentName:"td"},"user:pass")),(0,r.yg)("td",{parentName:"tr",align:null},"false"),(0,r.yg)("td",{parentName:"tr",align:null})),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"timeout"),(0,r.yg)("td",{parentName:"tr",align:null},"How many seconds to wait for the server to send data before giving up, as a float, eg: ",(0,r.yg)("inlineCode",{parentName:"td"},"0.5")," or a (connect timeout, read timeout) tuple. eg: ",(0,r.yg)("inlineCode",{parentName:"td"},"0.5:0.25")),(0,r.yg)("td",{parentName:"tr",align:null},"false"),(0,r.yg)("td",{parentName:"tr",align:null})),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"allow redirects"),(0,r.yg)("td",{parentName:"tr",align:null},"Enable/disable ",(0,r.yg)("inlineCode",{parentName:"td"},"GET/OPTIONS/POST/PUT/PATCH/DELETE/HEAD redirection"),". eg: ",(0,r.yg)("inlineCode",{parentName:"td"},"true")," or ",(0,r.yg)("inlineCode",{parentName:"td"},"false")),(0,r.yg)("td",{parentName:"tr",align:null},"false"),(0,r.yg)("td",{parentName:"tr",align:null},"true")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"proxies"),(0,r.yg)("td",{parentName:"tr",align:null},"Dictionary mapping protocol to the URL of the proxy. eg: ",(0,r.yg)("inlineCode",{parentName:"td"},'{"https" : "https://1.1.0.1:80"}')),(0,r.yg)("td",{parentName:"tr",align:null},"false"),(0,r.yg)("td",{parentName:"tr",align:null})),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"verify"),(0,r.yg)("td",{parentName:"tr",align:null},"Either a boolean, in which case it controls whether we verify the server\u2019s TLS certificate eg: ",(0,r.yg)("inlineCode",{parentName:"td"},"true")," or ",(0,r.yg)("inlineCode",{parentName:"td"},"false")," or a string, in which case it must be a path to a CA bundle to use. Defaults to True. eg: ",(0,r.yg)("inlineCode",{parentName:"td"},"dbfs:/path-to-file")),(0,r.yg)("td",{parentName:"tr",align:null},"false"),(0,r.yg)("td",{parentName:"tr",align:null},"true")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"stream"),(0,r.yg)("td",{parentName:"tr",align:null},"if False, the response content will be immediately downloaded. eg: ",(0,r.yg)("inlineCode",{parentName:"td"},"true")," or ",(0,r.yg)("inlineCode",{parentName:"td"},"false")),(0,r.yg)("td",{parentName:"tr",align:null},"false"),(0,r.yg)("td",{parentName:"tr",align:null})),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"cert"),(0,r.yg)("td",{parentName:"tr",align:null},"if String, path to SSL client cert file (.pem). eg. ",(0,r.yg)("inlineCode",{parentName:"td"},"dbfs:/path-to-file"),". If Tuple, (\u2018cert\u2019, \u2018key\u2019) pair. eg: ",(0,r.yg)("inlineCode",{parentName:"td"},"cert:key"),"."),(0,r.yg)("td",{parentName:"tr",align:null},"false"),(0,r.yg)("td",{parentName:"tr",align:null})),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"parse content"),(0,r.yg)("td",{parentName:"tr",align:null},"Parse content as JSON (to make the schema available, enable ",(0,r.yg)("inlineCode",{parentName:"td"},"custom schema"),", and click ",(0,r.yg)("inlineCode",{parentName:"td"},"infer from cluster")," at the bottom left in the output tab)"),(0,r.yg)("td",{parentName:"tr",align:null},"false"),(0,r.yg)("td",{parentName:"tr",align:null},"false")))),(0,r.yg)("admonition",{type:"info"},(0,r.yg)("ol",{parentName:"admonition"},(0,r.yg)("li",{parentName:"ol"},"To store sensitive information like API key (headers), auth etc., ",(0,r.yg)("inlineCode",{parentName:"li"},"Databricks secrets")," can be used as shown in ",(0,r.yg)("a",{parentName:"li",href:"#example-1"},"Example")," below."),(0,r.yg)("li",{parentName:"ol"},"If the expected number of rows are very large, it's better to provide ",(0,r.yg)("inlineCode",{parentName:"li"},"await time")," in the ",(0,r.yg)("inlineCode",{parentName:"li"},"advanced tab")," so you don't overwhelm the source server or exceed any request limits."),(0,r.yg)("li",{parentName:"ol"},"For APIs which takes list of parameters as inputs, window functions like ",(0,r.yg)("inlineCode",{parentName:"li"},"collect_list")," can be used before ",(0,r.yg)("inlineCode",{parentName:"li"},"RestApiEnrich")," Gem to reduce the number of API calls.")),(0,r.yg)("p",{parentName:"admonition"},"Please make sure that cluster is connected while using the ",(0,r.yg)("inlineCode",{parentName:"p"},"parse content")," option to ",(0,r.yg)("inlineCode",{parentName:"p"},"infer the schema from cluster")," for the first time.")),(0,r.yg)("admonition",{type:"note"},(0,r.yg)("p",{parentName:"admonition"},"All input parameters are expected to be in string format. Other column types such as ",(0,r.yg)("inlineCode",{parentName:"p"},"array/JSON/struct")," can be created\nusing combination of aggregate/window Gems along with reformat component and then can be cast as string prior to passing the column in ",(0,r.yg)("inlineCode",{parentName:"p"},"RestAPIEnrich Gem"),"\nas needed.")),(0,r.yg)("h3",{id:"example-1"},"Example 1"),(0,r.yg)("p",null,"Let's try to fetch prices for few cryptocurrencies from ",(0,r.yg)("a",{parentName:"p",href:"https://www.coinapi.io/"},"Coin-API"),"."),(0,r.yg)("p",null,"We would be taking cryptocurrency and currency as input from DataFrame and pass url, headers as static values.\nNote that URL in this example is created using static base url and adding cryptocurrency and currency as inputs\nfrom DataFrame."),(0,r.yg)("p",null,"Also, we would be using Databricks-secrets to pass headers as it requires API-key."),(0,r.yg)("div",{class:"wistia_responsive_padding",style:{padding:"56.25% 0 0 0",position:"relative"}},(0,r.yg)("div",{class:"wistia_responsive_wrapper",style:{height:"100%",left:0,position:"absolute",top:0,width:"100%"}},(0,r.yg)("iframe",{src:"https://user-images.githubusercontent.com/103921419/184725747-88115fa5-b70b-4caf-b3e0-1f2476e15d6e.mp4",title:"Rest API example 1",allow:"autoplay;fullscreen",allowtransparency:"true",frameborder:"0",scrolling:"no",class:"wistia_embed",name:"wistia_embed",msallowfullscreen:!0,width:"100%",height:"100%"}))),(0,r.yg)("h3",{id:"example-2"},"Example 2"),(0,r.yg)("p",null,"Let's take a more complex example, where all method, url, headers, params etc are passed as values from DataFrame\ncolumns."),(0,r.yg)("div",{class:"wistia_responsive_padding",style:{padding:"56.25% 0 0 0",position:"relative"}},(0,r.yg)("div",{class:"wistia_responsive_wrapper",style:{height:"100%",left:0,position:"absolute",top:0,width:"100%"}},(0,r.yg)("iframe",{src:"https://user-images.githubusercontent.com/103921419/184725732-5cafc278-c1cf-4bad-9078-9f810ede008a.mp4",title:"Rest API example 2",allow:"autoplay;fullscreen",allowtransparency:"true",frameborder:"0",scrolling:"no",class:"wistia_embed",name:"wistia_embed",msallowfullscreen:!0,width:"100%",height:"100%"}))),(0,r.yg)("h4",{id:"generated-code"},"Generated Code"),(0,r.yg)(l.A,{mdxType:"Tabs"},(0,r.yg)(i.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-py"},'def get_data_from_api(spark: SparkSession, in0: DataFrame) -> DataFrame:\n requestDF = in0.withColumn(\n "api_output",\n get_rest_api(\n to_json(struct(lit("GET").alias("method"), col("url"), lit(Config.coin_api_key).alias("headers"))),\n lit("")\n )\n )\n\n return requestDF.withColumn(\n "content_parsed",\n from_json(col("api_output.content"), schema_of_json(requestDF.select("api_output.content").take(1)[0][0]))\n )\n\n')))))}g.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/f779aca8.8fd6a6d1.js b/assets/js/f779aca8.8fd6a6d1.js deleted file mode 100644 index e312f70651..0000000000 --- a/assets/js/f779aca8.8fd6a6d1.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[72661],{15680:(e,t,a)=>{a.d(t,{xA:()=>p,yg:()=>g});var n=a(96540);function r(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function l(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,n)}return a}function i(e){for(var t=1;t=0||(r[a]=e[a]);return r}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(r[a]=e[a])}return r}var s=n.createContext({}),u=function(e){var t=n.useContext(s),a=t;return e&&(a="function"==typeof e?e(t):i(i({},t),e)),a},p=function(e){var t=u(e.components);return n.createElement(s.Provider,{value:t},e.children)},d="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},c=n.forwardRef((function(e,t){var a=e.components,r=e.mdxType,l=e.originalType,s=e.parentName,p=o(e,["components","mdxType","originalType","parentName"]),d=u(a),c=r,g=d["".concat(s,".").concat(c)]||d[c]||m[c]||l;return a?n.createElement(g,i(i({ref:t},p),{},{components:a})):n.createElement(g,i({ref:t},p))}));function g(e,t){var a=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var l=a.length,i=new Array(l);i[0]=c;var o={};for(var s in t)hasOwnProperty.call(t,s)&&(o[s]=t[s]);o.originalType=e,o[d]="string"==typeof e?e:r,i[1]=o;for(var u=2;u{a.d(t,{A:()=>i});var n=a(96540),r=a(20053);const l={tabItem:"tabItem_Ymn6"};function i(e){let{children:t,hidden:a,className:i}=e;return n.createElement("div",{role:"tabpanel",className:(0,r.A)(l.tabItem,i),hidden:a},t)}},11470:(e,t,a)=>{a.d(t,{A:()=>w});var n=a(58168),r=a(96540),l=a(20053),i=a(23104),o=a(56347),s=a(57485),u=a(31682),p=a(89466);function d(e){return function(e){return r.Children.map(e,(e=>{if(!e||(0,r.isValidElement)(e)&&function(e){const{props:t}=e;return!!t&&"object"==typeof t&&"value"in t}(e))return e;throw new Error(`Docusaurus error: Bad child <${"string"==typeof e.type?e.type:e.type.name}>: all children of the component should be , and every should have a unique "value" prop.`)}))?.filter(Boolean)??[]}(e).map((e=>{let{props:{value:t,label:a,attributes:n,default:r}}=e;return{value:t,label:a,attributes:n,default:r}}))}function m(e){const{values:t,children:a}=e;return(0,r.useMemo)((()=>{const e=t??d(a);return function(e){const t=(0,u.X)(e,((e,t)=>e.value===t.value));if(t.length>0)throw new Error(`Docusaurus error: Duplicate values "${t.map((e=>e.value)).join(", ")}" found in . Every value needs to be unique.`)}(e),e}),[t,a])}function c(e){let{value:t,tabValues:a}=e;return a.some((e=>e.value===t))}function g(e){let{queryString:t=!1,groupId:a}=e;const n=(0,o.W6)(),l=function(e){let{queryString:t=!1,groupId:a}=e;if("string"==typeof t)return t;if(!1===t)return null;if(!0===t&&!a)throw new Error('Docusaurus error: The component groupId prop is required if queryString=true, because this value is used as the search param name. You can also provide an explicit value such as queryString="my-search-param".');return a??null}({queryString:t,groupId:a});return[(0,s.aZ)(l),(0,r.useCallback)((e=>{if(!l)return;const t=new URLSearchParams(n.location.search);t.set(l,e),n.replace({...n.location,search:t.toString()})}),[l,n])]}function y(e){const{defaultValue:t,queryString:a=!1,groupId:n}=e,l=m(e),[i,o]=(0,r.useState)((()=>function(e){let{defaultValue:t,tabValues:a}=e;if(0===a.length)throw new Error("Docusaurus error: the component requires at least one children component");if(t){if(!c({value:t,tabValues:a}))throw new Error(`Docusaurus error: The has a defaultValue "${t}" but none of its children has the corresponding value. Available values are: ${a.map((e=>e.value)).join(", ")}. If you intend to show no default tab, use defaultValue={null} instead.`);return t}const n=a.find((e=>e.default))??a[0];if(!n)throw new Error("Unexpected error: 0 tabValues");return n.value}({defaultValue:t,tabValues:l}))),[s,u]=g({queryString:a,groupId:n}),[d,y]=function(e){let{groupId:t}=e;const a=function(e){return e?`docusaurus.tab.${e}`:null}(t),[n,l]=(0,p.Dv)(a);return[n,(0,r.useCallback)((e=>{a&&l.set(e)}),[a,l])]}({groupId:n}),f=(()=>{const e=s??d;return c({value:e,tabValues:l})?e:null})();(0,r.useLayoutEffect)((()=>{f&&o(f)}),[f]);return{selectedValue:i,selectValue:(0,r.useCallback)((e=>{if(!c({value:e,tabValues:l}))throw new Error(`Can't select invalid tab value=${e}`);o(e),u(e),y(e)}),[u,y,l]),tabValues:l}}var f=a(92303);const h={tabList:"tabList__CuJ",tabItem:"tabItem_LNqP"};function b(e){let{className:t,block:a,selectedValue:o,selectValue:s,tabValues:u}=e;const p=[],{blockElementScrollPositionUntilNextRender:d}=(0,i.a_)(),m=e=>{const t=e.currentTarget,a=p.indexOf(t),n=u[a].value;n!==o&&(d(t),s(n))},c=e=>{let t=null;switch(e.key){case"Enter":m(e);break;case"ArrowRight":{const a=p.indexOf(e.currentTarget)+1;t=p[a]??p[0];break}case"ArrowLeft":{const a=p.indexOf(e.currentTarget)-1;t=p[a]??p[p.length-1];break}}t?.focus()};return r.createElement("ul",{role:"tablist","aria-orientation":"horizontal",className:(0,l.A)("tabs",{"tabs--block":a},t)},u.map((e=>{let{value:t,label:a,attributes:i}=e;return r.createElement("li",(0,n.A)({role:"tab",tabIndex:o===t?0:-1,"aria-selected":o===t,key:t,ref:e=>p.push(e),onKeyDown:c,onClick:m},i,{className:(0,l.A)("tabs__item",h.tabItem,i?.className,{"tabs__item--active":o===t})}),a??t)})))}function N(e){let{lazy:t,children:a,selectedValue:n}=e;const l=(Array.isArray(a)?a:[a]).filter(Boolean);if(t){const e=l.find((e=>e.props.value===n));return e?(0,r.cloneElement)(e,{className:"margin-top--md"}):null}return r.createElement("div",{className:"margin-top--md"},l.map(((e,t)=>(0,r.cloneElement)(e,{key:t,hidden:e.props.value!==n}))))}function v(e){const t=y(e);return r.createElement("div",{className:(0,l.A)("tabs-container",h.tabList)},r.createElement(b,(0,n.A)({},e,t)),r.createElement(N,(0,n.A)({},e,t)))}function w(e){const t=(0,f.A)();return r.createElement(v,(0,n.A)({key:String(t)},e))}},80350:(e,t,a)=>{a.r(t),a.d(t,{assets:()=>p,contentTitle:()=>s,default:()=>g,frontMatter:()=>o,metadata:()=>u,toc:()=>d});var n=a(58168),r=(a(96540),a(15680)),l=a(11470),i=a(19365);const o={sidebar_position:5,title:"RestAPIEnrich",id:"rest-api-enrich",description:"Enrich DataFrame with content from rest API response based on configuration",tags:["gems","api","custom","rest"]},s=void 0,u={unversionedId:"Spark/gems/custom/rest-api-enrich",id:"Spark/gems/custom/rest-api-enrich",title:"RestAPIEnrich",description:"Enrich DataFrame with content from rest API response based on configuration",source:"@site/docs/Spark/gems/custom/rest-api-enrich.md",sourceDirName:"Spark/gems/custom",slug:"/Spark/gems/custom/rest-api-enrich",permalink:"/Spark/gems/custom/rest-api-enrich",draft:!1,tags:[{label:"gems",permalink:"/tags/gems"},{label:"api",permalink:"/tags/api"},{label:"custom",permalink:"/tags/custom"},{label:"rest",permalink:"/tags/rest"}],version:"current",sidebarPosition:5,frontMatter:{sidebar_position:5,title:"RestAPIEnrich",id:"rest-api-enrich",description:"Enrich DataFrame with content from rest API response based on configuration",tags:["gems","api","custom","rest"]},sidebar:"defaultSidebar",previous:{title:"DeltaTableOperations",permalink:"/Spark/gems/custom/delta-ops"},next:{title:"Machine Learning",permalink:"/Spark/gems/machine-learning/"}},p={},d=[{value:"Parameters",id:"parameters",level:3},{value:"Example 1",id:"example-1",level:3},{value:"Example 2",id:"example-2",level:3},{value:"Generated Code",id:"generated-code",level:4}],m={toc:d},c="wrapper";function g(e){let{components:t,...a}=e;return(0,r.yg)(c,(0,n.A)({},m,a,{components:t,mdxType:"MDXLayout"}),(0,r.yg)("h3",null,(0,r.yg)("span",{class:"badge rounded-pill text-bg-light"},"Spark Gem")),(0,r.yg)("p",null,"Enriches the DataFrame by adding column(s) with content from REST API output based on the given configuration."),(0,r.yg)("h3",{id:"parameters"},"Parameters"),(0,r.yg)("p",null,"Each property can either be set as a static value or a value from an existing column of the input DataFrame. Please refer\nto the examples in the description column of each parameter for reference on how the string value should be formed."),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:null},"Parameter"),(0,r.yg)("th",{parentName:"tr",align:null},"Description"),(0,r.yg)("th",{parentName:"tr",align:null},"Required"),(0,r.yg)("th",{parentName:"tr",align:null},"Default"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"method"),(0,r.yg)("td",{parentName:"tr",align:null},"method for the new Request object: ",(0,r.yg)("inlineCode",{parentName:"td"},"GET"),", ",(0,r.yg)("inlineCode",{parentName:"td"},"OPTIONS"),", ",(0,r.yg)("inlineCode",{parentName:"td"},"HEAD"),", ",(0,r.yg)("inlineCode",{parentName:"td"},"POST"),", ",(0,r.yg)("inlineCode",{parentName:"td"},"PUT"),", ",(0,r.yg)("inlineCode",{parentName:"td"},"PATCH"),", or ",(0,r.yg)("inlineCode",{parentName:"td"},"DELETE"),"."),(0,r.yg)("td",{parentName:"tr",align:null},"true"),(0,r.yg)("td",{parentName:"tr",align:null})),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"url"),(0,r.yg)("td",{parentName:"tr",align:null},"URL for the REST API."),(0,r.yg)("td",{parentName:"tr",align:null},"true"),(0,r.yg)("td",{parentName:"tr",align:null})),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"params"),(0,r.yg)("td",{parentName:"tr",align:null},"Dictionary, list of tuples or bytes to send in the query string for the Request. eg: ",(0,r.yg)("inlineCode",{parentName:"td"},'{"key1":"value1", "key2": value2, "key3": ["value1", "value2"]}')),(0,r.yg)("td",{parentName:"tr",align:null},"false"),(0,r.yg)("td",{parentName:"tr",align:null})),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"data"),(0,r.yg)("td",{parentName:"tr",align:null},"Dictionary to send in the body of the Request. eg: ",(0,r.yg)("inlineCode",{parentName:"td"},'{"key1":"value1", "key2": value2}')),(0,r.yg)("td",{parentName:"tr",align:null},"false"),(0,r.yg)("td",{parentName:"tr",align:null})),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"JSON"),(0,r.yg)("td",{parentName:"tr",align:null},"A JSON serializable Python object to send in the body of the Request. eg: ",(0,r.yg)("inlineCode",{parentName:"td"},'{"key1":"value1", "key2": value2}')),(0,r.yg)("td",{parentName:"tr",align:null},"false"),(0,r.yg)("td",{parentName:"tr",align:null})),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"headers"),(0,r.yg)("td",{parentName:"tr",align:null},"Dictionary of HTTP Headers to send with the Request. eg: ",(0,r.yg)("inlineCode",{parentName:"td"},'{"key1":"value1", "key2": "value2"}')),(0,r.yg)("td",{parentName:"tr",align:null},"false"),(0,r.yg)("td",{parentName:"tr",align:null})),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"cookies"),(0,r.yg)("td",{parentName:"tr",align:null},"Dictionary to send with the Request. eg: ",(0,r.yg)("inlineCode",{parentName:"td"},'{"key1":"value1", "key2": "value2"}')),(0,r.yg)("td",{parentName:"tr",align:null},"false"),(0,r.yg)("td",{parentName:"tr",align:null})),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"auth"),(0,r.yg)("td",{parentName:"tr",align:null},"Auth tuple to enable Basic/Digest/Custom HTTP Auth. eg: ",(0,r.yg)("inlineCode",{parentName:"td"},"user:pass")),(0,r.yg)("td",{parentName:"tr",align:null},"false"),(0,r.yg)("td",{parentName:"tr",align:null})),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"timeout"),(0,r.yg)("td",{parentName:"tr",align:null},"How many seconds to wait for the server to send data before giving up, as a float, eg: ",(0,r.yg)("inlineCode",{parentName:"td"},"0.5")," or a (connect timeout, read timeout) tuple. eg: ",(0,r.yg)("inlineCode",{parentName:"td"},"0.5:0.25")),(0,r.yg)("td",{parentName:"tr",align:null},"false"),(0,r.yg)("td",{parentName:"tr",align:null})),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"allow redirects"),(0,r.yg)("td",{parentName:"tr",align:null},"Enable/disable ",(0,r.yg)("inlineCode",{parentName:"td"},"GET/OPTIONS/POST/PUT/PATCH/DELETE/HEAD redirection"),". eg: ",(0,r.yg)("inlineCode",{parentName:"td"},"true")," or ",(0,r.yg)("inlineCode",{parentName:"td"},"false")),(0,r.yg)("td",{parentName:"tr",align:null},"false"),(0,r.yg)("td",{parentName:"tr",align:null},"true")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"proxies"),(0,r.yg)("td",{parentName:"tr",align:null},"Dictionary mapping protocol to the URL of the proxy. eg: ",(0,r.yg)("inlineCode",{parentName:"td"},'{"https" : "https://1.1.0.1:80"}')),(0,r.yg)("td",{parentName:"tr",align:null},"false"),(0,r.yg)("td",{parentName:"tr",align:null})),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"verify"),(0,r.yg)("td",{parentName:"tr",align:null},"Either a boolean, in which case it controls whether we verify the server\u2019s TLS certificate eg: ",(0,r.yg)("inlineCode",{parentName:"td"},"true")," or ",(0,r.yg)("inlineCode",{parentName:"td"},"false")," or a string, in which case it must be a path to a CA bundle to use. Defaults to True. eg: ",(0,r.yg)("inlineCode",{parentName:"td"},"dbfs:/path-to-file")),(0,r.yg)("td",{parentName:"tr",align:null},"false"),(0,r.yg)("td",{parentName:"tr",align:null},"true")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"stream"),(0,r.yg)("td",{parentName:"tr",align:null},"if False, the response content will be immediately downloaded. eg: ",(0,r.yg)("inlineCode",{parentName:"td"},"true")," or ",(0,r.yg)("inlineCode",{parentName:"td"},"false")),(0,r.yg)("td",{parentName:"tr",align:null},"false"),(0,r.yg)("td",{parentName:"tr",align:null})),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"cert"),(0,r.yg)("td",{parentName:"tr",align:null},"if String, path to SSL client cert file (.pem). eg. ",(0,r.yg)("inlineCode",{parentName:"td"},"dbfs:/path-to-file"),". If Tuple, (\u2018cert\u2019, \u2018key\u2019) pair. eg: ",(0,r.yg)("inlineCode",{parentName:"td"},"cert:key"),"."),(0,r.yg)("td",{parentName:"tr",align:null},"false"),(0,r.yg)("td",{parentName:"tr",align:null})),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:null},"parse content"),(0,r.yg)("td",{parentName:"tr",align:null},"Parse content as JSON (to make the schema available, enable ",(0,r.yg)("inlineCode",{parentName:"td"},"custom schema"),", and click ",(0,r.yg)("inlineCode",{parentName:"td"},"infer from cluster")," at the bottom left in the output tab)"),(0,r.yg)("td",{parentName:"tr",align:null},"false"),(0,r.yg)("td",{parentName:"tr",align:null},"false")))),(0,r.yg)("admonition",{type:"info"},(0,r.yg)("ol",{parentName:"admonition"},(0,r.yg)("li",{parentName:"ol"},"To store sensitive information like API key (headers), auth etc., ",(0,r.yg)("inlineCode",{parentName:"li"},"Databricks secrets")," can be used as shown in ",(0,r.yg)("a",{parentName:"li",href:"#example-1"},"Example")," below."),(0,r.yg)("li",{parentName:"ol"},"If the expected number of rows are very large, it's better to provide ",(0,r.yg)("inlineCode",{parentName:"li"},"await time")," in the ",(0,r.yg)("inlineCode",{parentName:"li"},"advanced tab")," so you don't overwhelm the source server or exceed any request limits."),(0,r.yg)("li",{parentName:"ol"},"For APIs which takes list of parameters as inputs, window functions like ",(0,r.yg)("inlineCode",{parentName:"li"},"collect_list")," can be used before ",(0,r.yg)("inlineCode",{parentName:"li"},"RestApiEnrich")," Gem to reduce the number of API calls.")),(0,r.yg)("p",{parentName:"admonition"},"Please make sure that cluster is connected while using the ",(0,r.yg)("inlineCode",{parentName:"p"},"parse content")," option to ",(0,r.yg)("inlineCode",{parentName:"p"},"infer the schema from cluster")," for the first time.")),(0,r.yg)("admonition",{type:"note"},(0,r.yg)("p",{parentName:"admonition"},"All input parameters are expected to be in string format. Other column types such as ",(0,r.yg)("inlineCode",{parentName:"p"},"array/JSON/struct")," can be created\nusing combination of aggregate/window Gems along with reformat component and then can be cast as string prior to passing the column in ",(0,r.yg)("inlineCode",{parentName:"p"},"RestAPIEnrich Gem"),"\nas needed.")),(0,r.yg)("h3",{id:"example-1"},"Example 1"),(0,r.yg)("p",null,"Let's try to fetch prices for few cryptocurrencies from ",(0,r.yg)("a",{parentName:"p",href:"https://www.coinapi.io/"},"Coin-API"),"."),(0,r.yg)("p",null,"We would be taking cryptocurrency and currency as input from DataFrame and pass url, headers as static values.\nNote that URL in this example is created using static base url and adding cryptocurrency and currency as inputs\nfrom DataFrame."),(0,r.yg)("p",null,"Also, we would be using Databricks-secrets to pass headers as it requires API-key."),(0,r.yg)("div",{class:"wistia_responsive_padding",style:{padding:"56.25% 0 0 0",position:"relative"}},(0,r.yg)("div",{class:"wistia_responsive_wrapper",style:{height:"100%",left:0,position:"absolute",top:0,width:"100%"}},(0,r.yg)("iframe",{src:"https://user-images.githubusercontent.com/103921419/184725747-88115fa5-b70b-4caf-b3e0-1f2476e15d6e.mp4",title:"Rest API example 1",allow:"autoplay;fullscreen",allowtransparency:"true",frameborder:"0",scrolling:"no",class:"wistia_embed",name:"wistia_embed",msallowfullscreen:!0,width:"100%",height:"100%"}))),(0,r.yg)("h3",{id:"example-2"},"Example 2"),(0,r.yg)("p",null,"Let's take a more complex example, where all method, url, headers, params etc are passed as values from DataFrame\ncolumns."),(0,r.yg)("div",{class:"wistia_responsive_padding",style:{padding:"56.25% 0 0 0",position:"relative"}},(0,r.yg)("div",{class:"wistia_responsive_wrapper",style:{height:"100%",left:0,position:"absolute",top:0,width:"100%"}},(0,r.yg)("iframe",{src:"https://user-images.githubusercontent.com/103921419/184725732-5cafc278-c1cf-4bad-9078-9f810ede008a.mp4",title:"Rest API example 2",allow:"autoplay;fullscreen",allowtransparency:"true",frameborder:"0",scrolling:"no",class:"wistia_embed",name:"wistia_embed",msallowfullscreen:!0,width:"100%",height:"100%"}))),(0,r.yg)("h4",{id:"generated-code"},"Generated Code"),(0,r.yg)(l.A,{mdxType:"Tabs"},(0,r.yg)(i.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-py"},'def get_data_from_api(spark: SparkSession, in0: DataFrame) -> DataFrame:\n requestDF = in0.withColumn(\n "api_output",\n get_rest_api(\n to_json(struct(lit("GET").alias("method"), col("url"), lit(Config.coin_api_key).alias("headers"))),\n lit("")\n )\n )\n\n return requestDF.withColumn(\n "content_parsed",\n from_json(col("api_output.content"), schema_of_json(requestDF.select("api_output.content").take(1)[0][0]))\n )\n\n')))))}g.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/fe598bea.3b03a0a7.js b/assets/js/fe598bea.3b03a0a7.js new file mode 100644 index 0000000000..0b302d89c8 --- /dev/null +++ b/assets/js/fe598bea.3b03a0a7.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[7054],{15680:(e,t,a)=>{a.d(t,{xA:()=>p,yg:()=>m});var r=a(96540);function n(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function i(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,r)}return a}function o(e){for(var t=1;t=0||(n[a]=e[a]);return n}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(n[a]=e[a])}return n}var c=r.createContext({}),l=function(e){var t=r.useContext(c),a=t;return e&&(a="function"==typeof e?e(t):o(o({},t),e)),a},p=function(e){var t=l(e.components);return r.createElement(c.Provider,{value:t},e.children)},g="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},h=r.forwardRef((function(e,t){var a=e.components,n=e.mdxType,i=e.originalType,c=e.parentName,p=s(e,["components","mdxType","originalType","parentName"]),g=l(a),h=n,m=g["".concat(c,".").concat(h)]||g[h]||d[h]||i;return a?r.createElement(m,o(o({ref:t},p),{},{components:a})):r.createElement(m,o({ref:t},p))}));function m(e,t){var a=arguments,n=t&&t.mdxType;if("string"==typeof e||n){var i=a.length,o=new Array(i);o[0]=h;var s={};for(var c in t)hasOwnProperty.call(t,c)&&(s[c]=t[c]);s.originalType=e,s[g]="string"==typeof e?e:n,o[1]=s;for(var l=2;l{a.r(t),a.d(t,{assets:()=>c,contentTitle:()=>o,default:()=>d,frontMatter:()=>i,metadata:()=>s,toc:()=>l});var r=a(58168),n=(a(96540),a(15680));const i={title:"Data Generator",id:"data-generator",description:"Don't have the right data? Create some!",sidebar_position:1,tags:["synthetic","random","fake","data","generator"]},o=void 0,s={unversionedId:"Spark/gems/source-target/advanced/synthetic-data-generator/data-generator",id:"Spark/gems/source-target/advanced/synthetic-data-generator/data-generator",title:"Data Generator",description:"Don't have the right data? Create some!",source:"@site/docs/Spark/gems/source-target/advanced/synthetic-data-generator/synthetic-data-generator.md",sourceDirName:"Spark/gems/source-target/advanced/synthetic-data-generator",slug:"/Spark/gems/source-target/advanced/synthetic-data-generator/",permalink:"/Spark/gems/source-target/advanced/synthetic-data-generator/",draft:!1,tags:[{label:"synthetic",permalink:"/tags/synthetic"},{label:"random",permalink:"/tags/random"},{label:"fake",permalink:"/tags/fake"},{label:"data",permalink:"/tags/data"},{label:"generator",permalink:"/tags/generator"}],version:"current",sidebarPosition:1,frontMatter:{title:"Data Generator",id:"data-generator",description:"Don't have the right data? Create some!",sidebar_position:1,tags:["synthetic","random","fake","data","generator"]},sidebar:"defaultSidebar",previous:{title:"Hive Table",permalink:"/Spark/gems/source-target/catalog-table/hive"},next:{title:"Providers",permalink:"/Spark/gems/source-target/advanced/synthetic-data-generator/providers"}},c={},l=[{value:"Cluster requirements",id:"cluster-requirements",level:2},{value:"Prophecy requirements",id:"prophecy-requirements",level:2},{value:"Create the Gem",id:"create-the-gem",level:2},{value:"Properties: Specify Data Structure",id:"properties-specify-data-structure",level:3},{value:"Infer the Schema",id:"infer-the-schema",level:3},{value:"Preview the data",id:"preview-the-data",level:3},{value:"Store the data",id:"store-the-data",level:2}],p={toc:l},g="wrapper";function d(e){let{components:t,...i}=e;return(0,n.yg)(g,(0,r.A)({},p,i,{components:t,mdxType:"MDXLayout"}),(0,n.yg)("h3",null,(0,n.yg)("span",{class:"badge"},"Spark Gem")),(0,n.yg)("p",null,"Generate synthetic data with this special kind of Source Gem."),(0,n.yg)("p",null,"Generating mock data is crucial when building data Pipelines to simulate real-world scenarios for testing, validating, and optimizing Pipeline performance before using actual production data. It helps ensure the Pipeline handles various data formats, structures, and edge cases effectively, minimizing potential issues in a live environment."),(0,n.yg)("p",null,"A wide range of synthetic data can be created using any column name and an array of data types. For example, generate browser history data as shown below."),(0,n.yg)("p",null,(0,n.yg)("img",{alt:"img",src:a(48995).A,width:"2880",height:"1726"})),(0,n.yg)("p",null,"Follow the steps below to generate your own mock data using the Data Generator Gem."),(0,n.yg)("h2",{id:"cluster-requirements"},"Cluster requirements"),(0,n.yg)("p",null,"Create a Fabric and configure the ",(0,n.yg)("a",{parentName:"p",href:"/Spark/fabrics/databricks-fabric"},"Job Size")," as below, or log into an existing Spark cluster UI. Here we use Databricks as an example."),(0,n.yg)("ol",null,(0,n.yg)("li",{parentName:"ol"},"Verify the Databricks Runtime uses Python version >= 3.8.\nFor example, ",(0,n.yg)("a",{parentName:"li",href:"https://docs.databricks.com/en/release-notes/runtime/12.2lts.html"},"Databricks Runtime 12.2 LTS")," uses Python 3.9.19. If you are using Databricks Runtime 12.2+, the Python version meets this requirement."),(0,n.yg)("li",{parentName:"ol"},'Create a new Environment variable called "SPARK_VERSION" with value 3.3'),(0,n.yg)("li",{parentName:"ol"},"Confirm and restart the Spark cluster.\n",(0,n.yg)("img",{alt:"requirements",src:a(54077).A,width:"2880",height:"912"}))),(0,n.yg)("h2",{id:"prophecy-requirements"},"Prophecy requirements"),(0,n.yg)("p",null,"Open a Prophecy Project and upgrade the ",(0,n.yg)("inlineCode",{parentName:"p"},"ProphecySparkBasicsPython")," Dependency to ",(0,n.yg)("inlineCode",{parentName:"p"},"0.2.34")," or later. Connecting a Prophecy project to a Spark cluster with a different dependency version will prompt a cluster restart. Ideally this is a one-time restart, and you're ready to proceed!"),(0,n.yg)("p",null,(0,n.yg)("img",{alt:"img",src:a(47377).A,width:"2880",height:"912"})),(0,n.yg)("admonition",{title:"Caution",type:"caution"},(0,n.yg)("p",{parentName:"admonition"},"Using two Prophecy projects with the same Spark cluster will cause cluster restarts (when each project attaches to the cluster) unless the ",(0,n.yg)("inlineCode",{parentName:"p"},"ProphecySparkBasicsPython")," versions match across both Projects. The same caution applies to ",(0,n.yg)("inlineCode",{parentName:"p"},"ProphecyLibsPython")," versions."),(0,n.yg)("p",{parentName:"admonition"},(0,n.yg)("em",{parentName:"p"},"The Fix:")," Do yourself a favor and upgrade all your Prophecy projects to the same ",(0,n.yg)("inlineCode",{parentName:"p"},"ProphecySparkBasicsPython")," and ",(0,n.yg)("inlineCode",{parentName:"p"},"ProphecyLibsPython")," versions or use separate Spark clusters.")),(0,n.yg)("h2",{id:"create-the-gem"},"Create the Gem"),(0,n.yg)("p",null,"Create a new Dataset and select the Type as Data Generator. Note we are not specifying a storage location yet; we will ",(0,n.yg)("a",{parentName:"p",href:"#store-the-data"},"store the data")," in a separate Gem.",(0,n.yg)("br",{parentName:"p"}),"\n",(0,n.yg)("img",{alt:"img",src:a(30583).A,width:"2880",height:"640"})),(0,n.yg)("p",null,(0,n.yg)("img",{alt:"img",src:a(46943).A,width:"2880",height:"1726"})),(0,n.yg)("h3",{id:"properties-specify-data-structure"},"Properties: Specify Data Structure"),(0,n.yg)("p",null,"What type of data do you need to generate? Specify the data structure using Random Data Providers. Prophecy offers a selection of ",(0,n.yg)("a",{parentName:"p",href:"./providers"},"Random Data Providers")," including integers, booleans, and elements from a list."),(0,n.yg)("p",null,(0,n.yg)("img",{alt:"img",src:a(65223).A,width:"2620",height:"1507"})),(0,n.yg)("p",null,"Generate column using a sequence of integers (left). Generate another column by referencing an existing catalog table (right). Randomly select elements of the foreign key from that table.",(0,n.yg)("br",{parentName:"p"}),"\n",(0,n.yg)("img",{alt:"img",src:a(79758).A,width:"2880",height:"882"})),(0,n.yg)("h3",{id:"infer-the-schema"},"Infer the Schema"),(0,n.yg)("p",null,"Changes to the columns in the Properties tab are incorporated by inferring the schema in the Schema tab."),(0,n.yg)("h3",{id:"preview-the-data"},"Preview the data"),(0,n.yg)("p",null,"This Gem returns a DataFrame with randomly generated values. Preview the first few records to verify the schema is correct. Then save the Gem."),(0,n.yg)("h2",{id:"store-the-data"},"Store the data"),(0,n.yg)("p",null,"The newly generated data from the Data Generator Gem is not saved by default. Store the data (use your favorite file type!) using the Target Gem."),(0,n.yg)("p",null,"Create the target Gem.\n",(0,n.yg)("img",{alt:"img",src:a(48659).A,width:"2880",height:"640"})),(0,n.yg)("p",null,"Connect the Data Generator SOURCE Gem to the Target Gem.\n",(0,n.yg)("img",{alt:"img",src:a(68316).A,width:"2880",height:"640"})),(0,n.yg)("p",null,"Be sure to configure the write mode for the target Gem. This is ",(0,n.yg)("strong",{parentName:"p"},"very important")," because the Data Generator Gem is ",(0,n.yg)("strong",{parentName:"p"},"not")," idempotent. There is a ",(0,n.yg)("strong",{parentName:"p"},"new random seed")," each time the Gem is run.\n",(0,n.yg)("img",{alt:"img",src:a(22018).A,width:"2880",height:"794"})),(0,n.yg)("admonition",{type:"caution"},(0,n.yg)("p",{parentName:"admonition"},"The Data Generator only generates the data. If you want to store the data just connect the output to a target Gem and configure the location, write properties etc. The data generated is new for each run (execution). The target write mode can be error, overwrite, append, or ignore as desired.")))}d.isMDXComponent=!0},54077:(e,t,a)=>{a.d(t,{A:()=>r});const r=a.p+"assets/images/synth_0_1_requirements-2c772b3704bbb6be6781bf853266fdaf.png"},47377:(e,t,a)=>{a.d(t,{A:()=>r});const r=a.p+"assets/images/synth_0_2_proph_reqiuirements-c45e20cbb84de985a86b3520055b9c1f.png"},48995:(e,t,a)=>{a.d(t,{A:()=>r});const r=a.p+"assets/images/synth_0_datasample-7848d4eecabe3ea3b12e476431b34d71.png"},30583:(e,t,a)=>{a.d(t,{A:()=>r});const r=a.p+"assets/images/synth_1_new_dataset-9cac2c831393359c0096d8c32e6c2a7d.png"},46943:(e,t,a)=>{a.d(t,{A:()=>r});const r=a.p+"assets/images/synth_2_type-0ba8ee2462116e7f329176334a34280f.png"},65223:(e,t,a)=>{a.d(t,{A:()=>r});const r=a.p+"assets/images/synth_3_properties-ea373dd8f815df6e612adabee3350329.png"},48659:(e,t,a)=>{a.d(t,{A:()=>r});const r=a.p+"assets/images/synth_4_new_target-9a024f9eba4fe04462b66e94fc4abd12.png"},68316:(e,t,a)=>{a.d(t,{A:()=>r});const r=a.p+"assets/images/synth_5_connect_target-2ac884bc5fe3848c5b19f2c640d0e81a.png"},22018:(e,t,a)=>{a.d(t,{A:()=>r});const r=a.p+"assets/images/synth_6_write_mode-d4b873c8829feb7aa978b249dcc2296b.png"},79758:(e,t,a)=>{a.d(t,{A:()=>r});const r=a.p+"assets/images/synth_7_seq_or_foreign-351453fa09849a080c836b8a5dc2e7ad.png"}}]); \ No newline at end of file diff --git a/assets/js/fe598bea.8d795564.js b/assets/js/fe598bea.8d795564.js deleted file mode 100644 index de1b57c68b..0000000000 --- a/assets/js/fe598bea.8d795564.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[7054],{15680:(e,t,a)=>{a.d(t,{xA:()=>p,yg:()=>m});var r=a(96540);function n(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function i(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,r)}return a}function o(e){for(var t=1;t=0||(n[a]=e[a]);return n}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(n[a]=e[a])}return n}var c=r.createContext({}),l=function(e){var t=r.useContext(c),a=t;return e&&(a="function"==typeof e?e(t):o(o({},t),e)),a},p=function(e){var t=l(e.components);return r.createElement(c.Provider,{value:t},e.children)},g="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},h=r.forwardRef((function(e,t){var a=e.components,n=e.mdxType,i=e.originalType,c=e.parentName,p=s(e,["components","mdxType","originalType","parentName"]),g=l(a),h=n,m=g["".concat(c,".").concat(h)]||g[h]||d[h]||i;return a?r.createElement(m,o(o({ref:t},p),{},{components:a})):r.createElement(m,o({ref:t},p))}));function m(e,t){var a=arguments,n=t&&t.mdxType;if("string"==typeof e||n){var i=a.length,o=new Array(i);o[0]=h;var s={};for(var c in t)hasOwnProperty.call(t,c)&&(s[c]=t[c]);s.originalType=e,s[g]="string"==typeof e?e:n,o[1]=s;for(var l=2;l{a.r(t),a.d(t,{assets:()=>c,contentTitle:()=>o,default:()=>d,frontMatter:()=>i,metadata:()=>s,toc:()=>l});var r=a(58168),n=(a(96540),a(15680));const i={title:"Data Generator",id:"data-generator",description:"Don't have the right data? Create some!",sidebar_position:1,tags:["synthetic","random","fake","data","generator"]},o=void 0,s={unversionedId:"Spark/gems/source-target/advanced/synthetic-data-generator/data-generator",id:"Spark/gems/source-target/advanced/synthetic-data-generator/data-generator",title:"Data Generator",description:"Don't have the right data? Create some!",source:"@site/docs/Spark/gems/source-target/advanced/synthetic-data-generator/synthetic-data-generator.md",sourceDirName:"Spark/gems/source-target/advanced/synthetic-data-generator",slug:"/Spark/gems/source-target/advanced/synthetic-data-generator/",permalink:"/Spark/gems/source-target/advanced/synthetic-data-generator/",draft:!1,tags:[{label:"synthetic",permalink:"/tags/synthetic"},{label:"random",permalink:"/tags/random"},{label:"fake",permalink:"/tags/fake"},{label:"data",permalink:"/tags/data"},{label:"generator",permalink:"/tags/generator"}],version:"current",sidebarPosition:1,frontMatter:{title:"Data Generator",id:"data-generator",description:"Don't have the right data? Create some!",sidebar_position:1,tags:["synthetic","random","fake","data","generator"]},sidebar:"defaultSidebar",previous:{title:"Hive Table",permalink:"/Spark/gems/source-target/catalog-table/hive"},next:{title:"Providers",permalink:"/Spark/gems/source-target/advanced/synthetic-data-generator/providers"}},c={},l=[{value:"Cluster requirements",id:"cluster-requirements",level:2},{value:"Prophecy requirements",id:"prophecy-requirements",level:2},{value:"Create the Gem",id:"create-the-gem",level:2},{value:"Properties: Specify Data Structure",id:"properties-specify-data-structure",level:3},{value:"Infer the Schema",id:"infer-the-schema",level:3},{value:"Preview the data",id:"preview-the-data",level:3},{value:"Store the data",id:"store-the-data",level:2}],p={toc:l},g="wrapper";function d(e){let{components:t,...i}=e;return(0,n.yg)(g,(0,r.A)({},p,i,{components:t,mdxType:"MDXLayout"}),(0,n.yg)("h3",null,(0,n.yg)("span",{class:"badge rounded-pill text-bg-light"},"Spark Gem")),(0,n.yg)("p",null,"Generate synthetic data with this special kind of Source Gem."),(0,n.yg)("p",null,"Generating mock data is crucial when building data Pipelines to simulate real-world scenarios for testing, validating, and optimizing Pipeline performance before using actual production data. It helps ensure the Pipeline handles various data formats, structures, and edge cases effectively, minimizing potential issues in a live environment."),(0,n.yg)("p",null,"A wide range of synthetic data can be created using any column name and an array of data types. For example, generate browser history data as shown below."),(0,n.yg)("p",null,(0,n.yg)("img",{alt:"img",src:a(48995).A,width:"2880",height:"1726"})),(0,n.yg)("p",null,"Follow the steps below to generate your own mock data using the Data Generator Gem."),(0,n.yg)("h2",{id:"cluster-requirements"},"Cluster requirements"),(0,n.yg)("p",null,"Create a Fabric and configure the ",(0,n.yg)("a",{parentName:"p",href:"/Spark/fabrics/databricks-fabric"},"Job Size")," as below, or log into an existing Spark cluster UI. Here we use Databricks as an example."),(0,n.yg)("ol",null,(0,n.yg)("li",{parentName:"ol"},"Verify the Databricks Runtime uses Python version >= 3.8.\nFor example, ",(0,n.yg)("a",{parentName:"li",href:"https://docs.databricks.com/en/release-notes/runtime/12.2lts.html"},"Databricks Runtime 12.2 LTS")," uses Python 3.9.19. If you are using Databricks Runtime 12.2+, the Python version meets this requirement."),(0,n.yg)("li",{parentName:"ol"},'Create a new Environment variable called "SPARK_VERSION" with value 3.3'),(0,n.yg)("li",{parentName:"ol"},"Confirm and restart the Spark cluster.\n",(0,n.yg)("img",{alt:"requirements",src:a(54077).A,width:"2880",height:"912"}))),(0,n.yg)("h2",{id:"prophecy-requirements"},"Prophecy requirements"),(0,n.yg)("p",null,"Open a Prophecy Project and upgrade the ",(0,n.yg)("inlineCode",{parentName:"p"},"ProphecySparkBasicsPython")," Dependency to ",(0,n.yg)("inlineCode",{parentName:"p"},"0.2.34")," or later. Connecting a Prophecy project to a Spark cluster with a different dependency version will prompt a cluster restart. Ideally this is a one-time restart, and you're ready to proceed!"),(0,n.yg)("p",null,(0,n.yg)("img",{alt:"img",src:a(47377).A,width:"2880",height:"912"})),(0,n.yg)("admonition",{title:"Caution",type:"caution"},(0,n.yg)("p",{parentName:"admonition"},"Using two Prophecy projects with the same Spark cluster will cause cluster restarts (when each project attaches to the cluster) unless the ",(0,n.yg)("inlineCode",{parentName:"p"},"ProphecySparkBasicsPython")," versions match across both Projects. The same caution applies to ",(0,n.yg)("inlineCode",{parentName:"p"},"ProphecyLibsPython")," versions."),(0,n.yg)("p",{parentName:"admonition"},(0,n.yg)("em",{parentName:"p"},"The Fix:")," Do yourself a favor and upgrade all your Prophecy projects to the same ",(0,n.yg)("inlineCode",{parentName:"p"},"ProphecySparkBasicsPython")," and ",(0,n.yg)("inlineCode",{parentName:"p"},"ProphecyLibsPython")," versions or use separate Spark clusters.")),(0,n.yg)("h2",{id:"create-the-gem"},"Create the Gem"),(0,n.yg)("p",null,"Create a new Dataset and select the Type as Data Generator. Note we are not specifying a storage location yet; we will ",(0,n.yg)("a",{parentName:"p",href:"#store-the-data"},"store the data")," in a separate Gem.",(0,n.yg)("br",{parentName:"p"}),"\n",(0,n.yg)("img",{alt:"img",src:a(30583).A,width:"2880",height:"640"})),(0,n.yg)("p",null,(0,n.yg)("img",{alt:"img",src:a(46943).A,width:"2880",height:"1726"})),(0,n.yg)("h3",{id:"properties-specify-data-structure"},"Properties: Specify Data Structure"),(0,n.yg)("p",null,"What type of data do you need to generate? Specify the data structure using Random Data Providers. Prophecy offers a selection of ",(0,n.yg)("a",{parentName:"p",href:"./providers"},"Random Data Providers")," including integers, booleans, and elements from a list."),(0,n.yg)("p",null,(0,n.yg)("img",{alt:"img",src:a(65223).A,width:"2620",height:"1507"})),(0,n.yg)("p",null,"Generate column using a sequence of integers (left). Generate another column by referencing an existing catalog table (right). Randomly select elements of the foreign key from that table.",(0,n.yg)("br",{parentName:"p"}),"\n",(0,n.yg)("img",{alt:"img",src:a(79758).A,width:"2880",height:"882"})),(0,n.yg)("h3",{id:"infer-the-schema"},"Infer the Schema"),(0,n.yg)("p",null,"Changes to the columns in the Properties tab are incorporated by inferring the schema in the Schema tab."),(0,n.yg)("h3",{id:"preview-the-data"},"Preview the data"),(0,n.yg)("p",null,"This Gem returns a DataFrame with randomly generated values. Preview the first few records to verify the schema is correct. Then save the Gem."),(0,n.yg)("h2",{id:"store-the-data"},"Store the data"),(0,n.yg)("p",null,"The newly generated data from the Data Generator Gem is not saved by default. Store the data (use your favorite file type!) using the Target Gem."),(0,n.yg)("p",null,"Create the target Gem.\n",(0,n.yg)("img",{alt:"img",src:a(48659).A,width:"2880",height:"640"})),(0,n.yg)("p",null,"Connect the Data Generator SOURCE Gem to the Target Gem.\n",(0,n.yg)("img",{alt:"img",src:a(68316).A,width:"2880",height:"640"})),(0,n.yg)("p",null,"Be sure to configure the write mode for the target Gem. This is ",(0,n.yg)("strong",{parentName:"p"},"very important")," because the Data Generator Gem is ",(0,n.yg)("strong",{parentName:"p"},"not")," idempotent. There is a ",(0,n.yg)("strong",{parentName:"p"},"new random seed")," each time the Gem is run.\n",(0,n.yg)("img",{alt:"img",src:a(22018).A,width:"2880",height:"794"})),(0,n.yg)("admonition",{type:"caution"},(0,n.yg)("p",{parentName:"admonition"},"The Data Generator only generates the data. If you want to store the data just connect the output to a target Gem and configure the location, write properties etc. The data generated is new for each run (execution). The target write mode can be error, overwrite, append, or ignore as desired.")))}d.isMDXComponent=!0},54077:(e,t,a)=>{a.d(t,{A:()=>r});const r=a.p+"assets/images/synth_0_1_requirements-2c772b3704bbb6be6781bf853266fdaf.png"},47377:(e,t,a)=>{a.d(t,{A:()=>r});const r=a.p+"assets/images/synth_0_2_proph_reqiuirements-c45e20cbb84de985a86b3520055b9c1f.png"},48995:(e,t,a)=>{a.d(t,{A:()=>r});const r=a.p+"assets/images/synth_0_datasample-7848d4eecabe3ea3b12e476431b34d71.png"},30583:(e,t,a)=>{a.d(t,{A:()=>r});const r=a.p+"assets/images/synth_1_new_dataset-9cac2c831393359c0096d8c32e6c2a7d.png"},46943:(e,t,a)=>{a.d(t,{A:()=>r});const r=a.p+"assets/images/synth_2_type-0ba8ee2462116e7f329176334a34280f.png"},65223:(e,t,a)=>{a.d(t,{A:()=>r});const r=a.p+"assets/images/synth_3_properties-ea373dd8f815df6e612adabee3350329.png"},48659:(e,t,a)=>{a.d(t,{A:()=>r});const r=a.p+"assets/images/synth_4_new_target-9a024f9eba4fe04462b66e94fc4abd12.png"},68316:(e,t,a)=>{a.d(t,{A:()=>r});const r=a.p+"assets/images/synth_5_connect_target-2ac884bc5fe3848c5b19f2c640d0e81a.png"},22018:(e,t,a)=>{a.d(t,{A:()=>r});const r=a.p+"assets/images/synth_6_write_mode-d4b873c8829feb7aa978b249dcc2296b.png"},79758:(e,t,a)=>{a.d(t,{A:()=>r});const r=a.p+"assets/images/synth_7_seq_or_foreign-351453fa09849a080c836b8a5dc2e7ad.png"}}]); \ No newline at end of file diff --git a/assets/js/fe6a71b8.dd8a21f3.js b/assets/js/fe6a71b8.d1c4ae35.js similarity index 50% rename from assets/js/fe6a71b8.dd8a21f3.js rename to assets/js/fe6a71b8.d1c4ae35.js index ebdeb02310..de2abd015d 100644 --- a/assets/js/fe6a71b8.dd8a21f3.js +++ b/assets/js/fe6a71b8.d1c4ae35.js @@ -1 +1 @@ -"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[40405],{15680:(e,t,a)=>{a.d(t,{xA:()=>p,yg:()=>g});var n=a(96540);function r(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function o(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,n)}return a}function l(e){for(var t=1;t=0||(r[a]=e[a]);return r}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(r[a]=e[a])}return r}var s=n.createContext({}),u=function(e){var t=n.useContext(s),a=t;return e&&(a="function"==typeof e?e(t):l(l({},t),e)),a},p=function(e){var t=u(e.components);return n.createElement(s.Provider,{value:t},e.children)},c="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},m=n.forwardRef((function(e,t){var a=e.components,r=e.mdxType,o=e.originalType,s=e.parentName,p=i(e,["components","mdxType","originalType","parentName"]),c=u(a),m=r,g=c["".concat(s,".").concat(m)]||c[m]||d[m]||o;return a?n.createElement(g,l(l({ref:t},p),{},{components:a})):n.createElement(g,l({ref:t},p))}));function g(e,t){var a=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var o=a.length,l=new Array(o);l[0]=m;var i={};for(var s in t)hasOwnProperty.call(t,s)&&(i[s]=t[s]);i.originalType=e,i[c]="string"==typeof e?e:r,l[1]=i;for(var u=2;u{a.d(t,{A:()=>l});var n=a(96540),r=a(20053);const o={tabItem:"tabItem_Ymn6"};function l(e){let{children:t,hidden:a,className:l}=e;return n.createElement("div",{role:"tabpanel",className:(0,r.A)(o.tabItem,l),hidden:a},t)}},11470:(e,t,a)=>{a.d(t,{A:()=>k});var n=a(58168),r=a(96540),o=a(20053),l=a(23104),i=a(56347),s=a(57485),u=a(31682),p=a(89466);function c(e){return function(e){return r.Children.map(e,(e=>{if(!e||(0,r.isValidElement)(e)&&function(e){const{props:t}=e;return!!t&&"object"==typeof t&&"value"in t}(e))return e;throw new Error(`Docusaurus error: Bad child <${"string"==typeof e.type?e.type:e.type.name}>: all children of the component should be , and every should have a unique "value" prop.`)}))?.filter(Boolean)??[]}(e).map((e=>{let{props:{value:t,label:a,attributes:n,default:r}}=e;return{value:t,label:a,attributes:n,default:r}}))}function d(e){const{values:t,children:a}=e;return(0,r.useMemo)((()=>{const e=t??c(a);return function(e){const t=(0,u.X)(e,((e,t)=>e.value===t.value));if(t.length>0)throw new Error(`Docusaurus error: Duplicate values "${t.map((e=>e.value)).join(", ")}" found in . Every value needs to be unique.`)}(e),e}),[t,a])}function m(e){let{value:t,tabValues:a}=e;return a.some((e=>e.value===t))}function g(e){let{queryString:t=!1,groupId:a}=e;const n=(0,i.W6)(),o=function(e){let{queryString:t=!1,groupId:a}=e;if("string"==typeof t)return t;if(!1===t)return null;if(!0===t&&!a)throw new Error('Docusaurus error: The component groupId prop is required if queryString=true, because this value is used as the search param name. You can also provide an explicit value such as queryString="my-search-param".');return a??null}({queryString:t,groupId:a});return[(0,s.aZ)(o),(0,r.useCallback)((e=>{if(!o)return;const t=new URLSearchParams(n.location.search);t.set(o,e),n.replace({...n.location,search:t.toString()})}),[o,n])]}function y(e){const{defaultValue:t,queryString:a=!1,groupId:n}=e,o=d(e),[l,i]=(0,r.useState)((()=>function(e){let{defaultValue:t,tabValues:a}=e;if(0===a.length)throw new Error("Docusaurus error: the component requires at least one children component");if(t){if(!m({value:t,tabValues:a}))throw new Error(`Docusaurus error: The has a defaultValue "${t}" but none of its children has the corresponding value. Available values are: ${a.map((e=>e.value)).join(", ")}. If you intend to show no default tab, use defaultValue={null} instead.`);return t}const n=a.find((e=>e.default))??a[0];if(!n)throw new Error("Unexpected error: 0 tabValues");return n.value}({defaultValue:t,tabValues:o}))),[s,u]=g({queryString:a,groupId:n}),[c,y]=function(e){let{groupId:t}=e;const a=function(e){return e?`docusaurus.tab.${e}`:null}(t),[n,o]=(0,p.Dv)(a);return[n,(0,r.useCallback)((e=>{a&&o.set(e)}),[a,o])]}({groupId:n}),f=(()=>{const e=s??c;return m({value:e,tabValues:o})?e:null})();(0,r.useLayoutEffect)((()=>{f&&i(f)}),[f]);return{selectedValue:l,selectValue:(0,r.useCallback)((e=>{if(!m({value:e,tabValues:o}))throw new Error(`Can't select invalid tab value=${e}`);i(e),u(e),y(e)}),[u,y,o]),tabValues:o}}var f=a(92303);const b={tabList:"tabList__CuJ",tabItem:"tabItem_LNqP"};function h(e){let{className:t,block:a,selectedValue:i,selectValue:s,tabValues:u}=e;const p=[],{blockElementScrollPositionUntilNextRender:c}=(0,l.a_)(),d=e=>{const t=e.currentTarget,a=p.indexOf(t),n=u[a].value;n!==i&&(c(t),s(n))},m=e=>{let t=null;switch(e.key){case"Enter":d(e);break;case"ArrowRight":{const a=p.indexOf(e.currentTarget)+1;t=p[a]??p[0];break}case"ArrowLeft":{const a=p.indexOf(e.currentTarget)-1;t=p[a]??p[p.length-1];break}}t?.focus()};return r.createElement("ul",{role:"tablist","aria-orientation":"horizontal",className:(0,o.A)("tabs",{"tabs--block":a},t)},u.map((e=>{let{value:t,label:a,attributes:l}=e;return r.createElement("li",(0,n.A)({role:"tab",tabIndex:i===t?0:-1,"aria-selected":i===t,key:t,ref:e=>p.push(e),onKeyDown:m,onClick:d},l,{className:(0,o.A)("tabs__item",b.tabItem,l?.className,{"tabs__item--active":i===t})}),a??t)})))}function w(e){let{lazy:t,children:a,selectedValue:n}=e;const o=(Array.isArray(a)?a:[a]).filter(Boolean);if(t){const e=o.find((e=>e.props.value===n));return e?(0,r.cloneElement)(e,{className:"margin-top--md"}):null}return r.createElement("div",{className:"margin-top--md"},o.map(((e,t)=>(0,r.cloneElement)(e,{key:t,hidden:e.props.value!==n}))))}function v(e){const t=y(e);return r.createElement("div",{className:(0,o.A)("tabs-container",b.tabList)},r.createElement(h,(0,n.A)({},e,t)),r.createElement(w,(0,n.A)({},e,t)))}function k(e){const t=(0,f.A)();return r.createElement(v,(0,n.A)({key:String(t)},e))}},7318:(e,t,a)=>{a.r(t),a.d(t,{assets:()=>p,contentTitle:()=>s,default:()=>g,frontMatter:()=>i,metadata:()=>u,toc:()=>c});var n=a(58168),r=(a(96540),a(15680)),o=a(11470),l=a(19365);const i={sidebar_position:7,title:"Deduplicate",id:"deduplicate",description:"Remove rows with duplicate values of specified columns",tags:["gems","dedupe","distinct","unique"]},s=void 0,u={unversionedId:"Spark/gems/transform/deduplicate",id:"Spark/gems/transform/deduplicate",title:"Deduplicate",description:"Remove rows with duplicate values of specified columns",source:"@site/docs/Spark/gems/transform/deduplicate.md",sourceDirName:"Spark/gems/transform",slug:"/Spark/gems/transform/deduplicate",permalink:"/Spark/gems/transform/deduplicate",draft:!1,tags:[{label:"gems",permalink:"/tags/gems"},{label:"dedupe",permalink:"/tags/dedupe"},{label:"distinct",permalink:"/tags/distinct"},{label:"unique",permalink:"/tags/unique"}],version:"current",sidebarPosition:7,frontMatter:{sidebar_position:7,title:"Deduplicate",id:"deduplicate",description:"Remove rows with duplicate values of specified columns",tags:["gems","dedupe","distinct","unique"]},sidebar:"defaultSidebar",previous:{title:"Limit",permalink:"/Spark/gems/transform/limit"},next:{title:"SetOperation",permalink:"/Spark/gems/transform/set-operation"}},p={},c=[{value:"Parameters",id:"parameters",level:2},{value:"Examples",id:"examples",level:2},{value:"Rows to keep - Any",id:"rows-to-keep---any",level:3},{value:"Rows to keep - First",id:"rows-to-keep---first",level:3},{value:"Rows to keep - Last",id:"rows-to-keep---last",level:3},{value:"Rows to keep - Unique Only",id:"rows-to-keep---unique-only",level:3},{value:"Rows to keep - Distinct Rows",id:"rows-to-keep---distinct-rows",level:3}],d={toc:c},m="wrapper";function g(e){let{components:t,...i}=e;return(0,r.yg)(m,(0,n.A)({},d,i,{components:t,mdxType:"MDXLayout"}),(0,r.yg)("h3",null,(0,r.yg)("span",{class:"badge rounded-pill text-bg-light"},"Spark Gem")),(0,r.yg)("p",null,"Removes rows with duplicate values of specified columns."),(0,r.yg)("h2",{id:"parameters"},"Parameters"),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:"left"},"Parameter"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Description"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Required"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Dataframe"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Input dataframe"),(0,r.yg)("td",{parentName:"tr",align:"left"},"True")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Row to keep"),(0,r.yg)("td",{parentName:"tr",align:"left"},"- ",(0,r.yg)("inlineCode",{parentName:"td"},"Any"),": Keeps any one row among duplicates. Uses underlying ",(0,r.yg)("inlineCode",{parentName:"td"},"dropDuplicates")," construct",(0,r.yg)("br",null),"- ",(0,r.yg)("inlineCode",{parentName:"td"},"First"),": Keeps first occurrence of the duplicate row ",(0,r.yg)("br",null),"- ",(0,r.yg)("inlineCode",{parentName:"td"},"Last"),": Keeps last occurrence of the duplicate row ",(0,r.yg)("br",null),"- ",(0,r.yg)("inlineCode",{parentName:"td"},"Unique Only"),": Keeps rows that don't have duplicates ",(0,r.yg)("br",null),"- ",(0,r.yg)("inlineCode",{parentName:"td"},"Distinct Rows"),": Keeps all distinct rows. This is equivalent to performing a ",(0,r.yg)("inlineCode",{parentName:"td"},"df.distinct()")," operation ",(0,r.yg)("br",null),"Default is ",(0,r.yg)("inlineCode",{parentName:"td"},"Any")),(0,r.yg)("td",{parentName:"tr",align:"left"},"True")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Deduplicate columns"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Columns to consider while removing duplicate rows (not required for ",(0,r.yg)("inlineCode",{parentName:"td"},"Distinct Rows"),")"),(0,r.yg)("td",{parentName:"tr",align:"left"},"True")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Order columns"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Columns to sort Dataframe on before de-duping in case of ",(0,r.yg)("inlineCode",{parentName:"td"},"First")," and ",(0,r.yg)("inlineCode",{parentName:"td"},"Last")," rows to keep"),(0,r.yg)("td",{parentName:"tr",align:"left"},"False")))),(0,r.yg)("h2",{id:"examples"},"Examples"),(0,r.yg)("hr",null),(0,r.yg)("h3",{id:"rows-to-keep---any"},"Rows to keep - ",(0,r.yg)("inlineCode",{parentName:"h3"},"Any")),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Example usage of Deduplicate",src:a(12627).A,width:"940",height:"283"})),(0,r.yg)(o.A,{mdxType:"Tabs"},(0,r.yg)(l.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-py"},'def dedup(spark: SparkSession, in0: DataFrame) -> DataFrame:\n return in0.dropDuplicates(["tran_id"])\n'))),(0,r.yg)(l.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-scala"},'object dedup {\n def apply(spark: SparkSession, in: DataFrame): DataFrame = {\n in.dropDuplicates(List("tran_id"))\n }\n}\n')))),(0,r.yg)("hr",null),(0,r.yg)("h3",{id:"rows-to-keep---first"},"Rows to keep - ",(0,r.yg)("inlineCode",{parentName:"h3"},"First")),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Example usage of Deduplicate - First",src:a(1184).A,width:"2000",height:"845"})),(0,r.yg)(o.A,{mdxType:"Tabs"},(0,r.yg)(l.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-py"},'def earliest_cust_order(spark: SparkSession, in0: DataFrame) -> DataFrame:\n return in0\\\n .withColumn(\n "row_number",\n row_number()\\\n .over(Window\\\n .partitionBy("customer_id")\\\n .orderBy(col("order_dt").asc())\n )\\\n .filter(col("row_number") == lit(1))\\\n .drop("row_number")\n'))),(0,r.yg)(l.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-scala"},'object earliest_cust_order {\n def apply(spark: SparkSession, in: DataFrame): DataFrame = {\n import org.apache.spark.sql.expressions.Window\n in.withColumn(\n "row_number",\n row_number().over(\n Window\n .partitionBy("customer_id")\n .orderBy(col("order_date").asc)\n )\n )\n .filter(col("row_number") === lit(1))\n .drop("row_number")\n }\n}\n')))),(0,r.yg)("hr",null),(0,r.yg)("h3",{id:"rows-to-keep---last"},"Rows to keep - ",(0,r.yg)("inlineCode",{parentName:"h3"},"Last")),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Example usage of Deduplicate - Last",src:a(9184).A,width:"3974",height:"1678"})),(0,r.yg)(o.A,{mdxType:"Tabs"},(0,r.yg)(l.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-py"},'def latest_cust_order(spark: SparkSession, in0: DataFrame) -> DataFrame:\n return in0\\\n .withColumn(\n "row_number",\n row_number()\\\n .over(Window\\\n .partitionBy("customer_id")\\\n .orderBy(col("order_dt").asc())\n )\\\n .withColumn(\n "count",\n count("*")\\\n .over(Window\\\n .partitionBy("customer_id")\n )\\\n .filter(col("row_number") == col("count"))\\\n .drop("row_number")\\\n .drop("count")\n'))),(0,r.yg)(l.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-scala"},'object latest_cust_order {\n def apply(spark: SparkSession, in: DataFrame): DataFrame = {\n import org.apache.spark.sql.expressions.Window\n in.withColumn(\n "row_number",\n row_number().over(\n Window\n .partitionBy("customer_id")\n .orderBy(col("order_date").asc)\n )\n )\n .withColumn(\n "count",\n count("*").over(\n Window\n .partitionBy("customer_id")\n )\n )\n .filter(col("row_number") === col("count"))\n .drop("row_number")\n .drop("count")\n }\n}\n')))),(0,r.yg)("h3",{id:"rows-to-keep---unique-only"},"Rows to keep - ",(0,r.yg)("inlineCode",{parentName:"h3"},"Unique Only")),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Example usage of Deduplicate - Unique",src:a(59209).A,width:"1906",height:"809"})),(0,r.yg)(o.A,{mdxType:"Tabs"},(0,r.yg)(l.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-py"},'def single_order_customers(spark: SparkSession, in0: DataFrame) -> DataFrame:\n return in0\\\n .withColumn(\n "count",\n count("*")\\\n .over(Window\\\n .partitionBy("customer_id")\n )\\\n .filter(col("count") == lit(1))\\\n .drop("count")\n'))),(0,r.yg)(l.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-scala"},'object single_order_customers {\n def apply(spark: SparkSession, in: DataFrame): DataFrame = {\n import org.apache.spark.sql.expressions.Window\n in.withColumn(\n "count",\n count("*").over(\n Window\n .partitionBy("customer_id")\n )\n )\n .filter(col("count") === lit(1))\n .drop("count")\n }\n\n}\n')))),(0,r.yg)("h3",{id:"rows-to-keep---distinct-rows"},"Rows to keep - ",(0,r.yg)("inlineCode",{parentName:"h3"},"Distinct Rows")),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Example usage of Deduplicate - Distinct",src:a(21524).A,width:"2042",height:"846"})),(0,r.yg)(o.A,{mdxType:"Tabs"},(0,r.yg)(l.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-py"},"def single_order_customers(spark: SparkSession, in0: DataFrame) -> DataFrame:\n return in0.distinct()\n"))),(0,r.yg)(l.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-scala"},"object single_order_customers {\n def apply(spark: SparkSession, in: DataFrame): DataFrame = {\n in.distinct()\n }\n\n}\n")))))}g.isMDXComponent=!0},21524:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/dedup_eg_distinct-ce4df273ebe6eccd4b71e44f1fd66777.png"},1184:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/dedup_eg_first-332e45b4e9c2056f28e516fab0a25776.png"},9184:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/dedup_eg_last-bb06dca08a11dc0d78e142cddf134ea6.png"},59209:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/dedup_eg_unique-1265d14fa8c45ec25a4bdd2e874deb6b.png"},12627:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/deduplicate_eg_1-31f75cb0d1229ebd70c3aa9de7c45f13.png"}}]); \ No newline at end of file +"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[40405],{15680:(e,t,a)=>{a.d(t,{xA:()=>p,yg:()=>g});var n=a(96540);function r(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function o(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,n)}return a}function l(e){for(var t=1;t=0||(r[a]=e[a]);return r}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(r[a]=e[a])}return r}var s=n.createContext({}),u=function(e){var t=n.useContext(s),a=t;return e&&(a="function"==typeof e?e(t):l(l({},t),e)),a},p=function(e){var t=u(e.components);return n.createElement(s.Provider,{value:t},e.children)},c="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},m=n.forwardRef((function(e,t){var a=e.components,r=e.mdxType,o=e.originalType,s=e.parentName,p=i(e,["components","mdxType","originalType","parentName"]),c=u(a),m=r,g=c["".concat(s,".").concat(m)]||c[m]||d[m]||o;return a?n.createElement(g,l(l({ref:t},p),{},{components:a})):n.createElement(g,l({ref:t},p))}));function g(e,t){var a=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var o=a.length,l=new Array(o);l[0]=m;var i={};for(var s in t)hasOwnProperty.call(t,s)&&(i[s]=t[s]);i.originalType=e,i[c]="string"==typeof e?e:r,l[1]=i;for(var u=2;u{a.d(t,{A:()=>l});var n=a(96540),r=a(20053);const o={tabItem:"tabItem_Ymn6"};function l(e){let{children:t,hidden:a,className:l}=e;return n.createElement("div",{role:"tabpanel",className:(0,r.A)(o.tabItem,l),hidden:a},t)}},11470:(e,t,a)=>{a.d(t,{A:()=>k});var n=a(58168),r=a(96540),o=a(20053),l=a(23104),i=a(56347),s=a(57485),u=a(31682),p=a(89466);function c(e){return function(e){return r.Children.map(e,(e=>{if(!e||(0,r.isValidElement)(e)&&function(e){const{props:t}=e;return!!t&&"object"==typeof t&&"value"in t}(e))return e;throw new Error(`Docusaurus error: Bad child <${"string"==typeof e.type?e.type:e.type.name}>: all children of the component should be , and every should have a unique "value" prop.`)}))?.filter(Boolean)??[]}(e).map((e=>{let{props:{value:t,label:a,attributes:n,default:r}}=e;return{value:t,label:a,attributes:n,default:r}}))}function d(e){const{values:t,children:a}=e;return(0,r.useMemo)((()=>{const e=t??c(a);return function(e){const t=(0,u.X)(e,((e,t)=>e.value===t.value));if(t.length>0)throw new Error(`Docusaurus error: Duplicate values "${t.map((e=>e.value)).join(", ")}" found in . Every value needs to be unique.`)}(e),e}),[t,a])}function m(e){let{value:t,tabValues:a}=e;return a.some((e=>e.value===t))}function g(e){let{queryString:t=!1,groupId:a}=e;const n=(0,i.W6)(),o=function(e){let{queryString:t=!1,groupId:a}=e;if("string"==typeof t)return t;if(!1===t)return null;if(!0===t&&!a)throw new Error('Docusaurus error: The component groupId prop is required if queryString=true, because this value is used as the search param name. You can also provide an explicit value such as queryString="my-search-param".');return a??null}({queryString:t,groupId:a});return[(0,s.aZ)(o),(0,r.useCallback)((e=>{if(!o)return;const t=new URLSearchParams(n.location.search);t.set(o,e),n.replace({...n.location,search:t.toString()})}),[o,n])]}function y(e){const{defaultValue:t,queryString:a=!1,groupId:n}=e,o=d(e),[l,i]=(0,r.useState)((()=>function(e){let{defaultValue:t,tabValues:a}=e;if(0===a.length)throw new Error("Docusaurus error: the component requires at least one children component");if(t){if(!m({value:t,tabValues:a}))throw new Error(`Docusaurus error: The has a defaultValue "${t}" but none of its children has the corresponding value. Available values are: ${a.map((e=>e.value)).join(", ")}. If you intend to show no default tab, use defaultValue={null} instead.`);return t}const n=a.find((e=>e.default))??a[0];if(!n)throw new Error("Unexpected error: 0 tabValues");return n.value}({defaultValue:t,tabValues:o}))),[s,u]=g({queryString:a,groupId:n}),[c,y]=function(e){let{groupId:t}=e;const a=function(e){return e?`docusaurus.tab.${e}`:null}(t),[n,o]=(0,p.Dv)(a);return[n,(0,r.useCallback)((e=>{a&&o.set(e)}),[a,o])]}({groupId:n}),f=(()=>{const e=s??c;return m({value:e,tabValues:o})?e:null})();(0,r.useLayoutEffect)((()=>{f&&i(f)}),[f]);return{selectedValue:l,selectValue:(0,r.useCallback)((e=>{if(!m({value:e,tabValues:o}))throw new Error(`Can't select invalid tab value=${e}`);i(e),u(e),y(e)}),[u,y,o]),tabValues:o}}var f=a(92303);const b={tabList:"tabList__CuJ",tabItem:"tabItem_LNqP"};function h(e){let{className:t,block:a,selectedValue:i,selectValue:s,tabValues:u}=e;const p=[],{blockElementScrollPositionUntilNextRender:c}=(0,l.a_)(),d=e=>{const t=e.currentTarget,a=p.indexOf(t),n=u[a].value;n!==i&&(c(t),s(n))},m=e=>{let t=null;switch(e.key){case"Enter":d(e);break;case"ArrowRight":{const a=p.indexOf(e.currentTarget)+1;t=p[a]??p[0];break}case"ArrowLeft":{const a=p.indexOf(e.currentTarget)-1;t=p[a]??p[p.length-1];break}}t?.focus()};return r.createElement("ul",{role:"tablist","aria-orientation":"horizontal",className:(0,o.A)("tabs",{"tabs--block":a},t)},u.map((e=>{let{value:t,label:a,attributes:l}=e;return r.createElement("li",(0,n.A)({role:"tab",tabIndex:i===t?0:-1,"aria-selected":i===t,key:t,ref:e=>p.push(e),onKeyDown:m,onClick:d},l,{className:(0,o.A)("tabs__item",b.tabItem,l?.className,{"tabs__item--active":i===t})}),a??t)})))}function w(e){let{lazy:t,children:a,selectedValue:n}=e;const o=(Array.isArray(a)?a:[a]).filter(Boolean);if(t){const e=o.find((e=>e.props.value===n));return e?(0,r.cloneElement)(e,{className:"margin-top--md"}):null}return r.createElement("div",{className:"margin-top--md"},o.map(((e,t)=>(0,r.cloneElement)(e,{key:t,hidden:e.props.value!==n}))))}function v(e){const t=y(e);return r.createElement("div",{className:(0,o.A)("tabs-container",b.tabList)},r.createElement(h,(0,n.A)({},e,t)),r.createElement(w,(0,n.A)({},e,t)))}function k(e){const t=(0,f.A)();return r.createElement(v,(0,n.A)({key:String(t)},e))}},7318:(e,t,a)=>{a.r(t),a.d(t,{assets:()=>p,contentTitle:()=>s,default:()=>g,frontMatter:()=>i,metadata:()=>u,toc:()=>c});var n=a(58168),r=(a(96540),a(15680)),o=a(11470),l=a(19365);const i={sidebar_position:7,title:"Deduplicate",id:"deduplicate",description:"Remove rows with duplicate values of specified columns",tags:["gems","dedupe","distinct","unique"]},s=void 0,u={unversionedId:"Spark/gems/transform/deduplicate",id:"Spark/gems/transform/deduplicate",title:"Deduplicate",description:"Remove rows with duplicate values of specified columns",source:"@site/docs/Spark/gems/transform/deduplicate.md",sourceDirName:"Spark/gems/transform",slug:"/Spark/gems/transform/deduplicate",permalink:"/Spark/gems/transform/deduplicate",draft:!1,tags:[{label:"gems",permalink:"/tags/gems"},{label:"dedupe",permalink:"/tags/dedupe"},{label:"distinct",permalink:"/tags/distinct"},{label:"unique",permalink:"/tags/unique"}],version:"current",sidebarPosition:7,frontMatter:{sidebar_position:7,title:"Deduplicate",id:"deduplicate",description:"Remove rows with duplicate values of specified columns",tags:["gems","dedupe","distinct","unique"]},sidebar:"defaultSidebar",previous:{title:"Limit",permalink:"/Spark/gems/transform/limit"},next:{title:"SetOperation",permalink:"/Spark/gems/transform/set-operation"}},p={},c=[{value:"Parameters",id:"parameters",level:2},{value:"Examples",id:"examples",level:2},{value:"Rows to keep - Any",id:"rows-to-keep---any",level:3},{value:"Rows to keep - First",id:"rows-to-keep---first",level:3},{value:"Rows to keep - Last",id:"rows-to-keep---last",level:3},{value:"Rows to keep - Unique Only",id:"rows-to-keep---unique-only",level:3},{value:"Rows to keep - Distinct Rows",id:"rows-to-keep---distinct-rows",level:3}],d={toc:c},m="wrapper";function g(e){let{components:t,...i}=e;return(0,r.yg)(m,(0,n.A)({},d,i,{components:t,mdxType:"MDXLayout"}),(0,r.yg)("h3",null,(0,r.yg)("span",{class:"badge"},"Spark Gem")),(0,r.yg)("p",null,"Removes rows with duplicate values of specified columns."),(0,r.yg)("h2",{id:"parameters"},"Parameters"),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:"left"},"Parameter"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Description"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Required"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Dataframe"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Input dataframe"),(0,r.yg)("td",{parentName:"tr",align:"left"},"True")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Row to keep"),(0,r.yg)("td",{parentName:"tr",align:"left"},"- ",(0,r.yg)("inlineCode",{parentName:"td"},"Any"),": Keeps any one row among duplicates. Uses underlying ",(0,r.yg)("inlineCode",{parentName:"td"},"dropDuplicates")," construct",(0,r.yg)("br",null),"- ",(0,r.yg)("inlineCode",{parentName:"td"},"First"),": Keeps first occurrence of the duplicate row ",(0,r.yg)("br",null),"- ",(0,r.yg)("inlineCode",{parentName:"td"},"Last"),": Keeps last occurrence of the duplicate row ",(0,r.yg)("br",null),"- ",(0,r.yg)("inlineCode",{parentName:"td"},"Unique Only"),": Keeps rows that don't have duplicates ",(0,r.yg)("br",null),"- ",(0,r.yg)("inlineCode",{parentName:"td"},"Distinct Rows"),": Keeps all distinct rows. This is equivalent to performing a ",(0,r.yg)("inlineCode",{parentName:"td"},"df.distinct()")," operation ",(0,r.yg)("br",null),"Default is ",(0,r.yg)("inlineCode",{parentName:"td"},"Any")),(0,r.yg)("td",{parentName:"tr",align:"left"},"True")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Deduplicate columns"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Columns to consider while removing duplicate rows (not required for ",(0,r.yg)("inlineCode",{parentName:"td"},"Distinct Rows"),")"),(0,r.yg)("td",{parentName:"tr",align:"left"},"True")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Order columns"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Columns to sort Dataframe on before de-duping in case of ",(0,r.yg)("inlineCode",{parentName:"td"},"First")," and ",(0,r.yg)("inlineCode",{parentName:"td"},"Last")," rows to keep"),(0,r.yg)("td",{parentName:"tr",align:"left"},"False")))),(0,r.yg)("h2",{id:"examples"},"Examples"),(0,r.yg)("hr",null),(0,r.yg)("h3",{id:"rows-to-keep---any"},"Rows to keep - ",(0,r.yg)("inlineCode",{parentName:"h3"},"Any")),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Example usage of Deduplicate",src:a(12627).A,width:"940",height:"283"})),(0,r.yg)(o.A,{mdxType:"Tabs"},(0,r.yg)(l.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-py"},'def dedup(spark: SparkSession, in0: DataFrame) -> DataFrame:\n return in0.dropDuplicates(["tran_id"])\n'))),(0,r.yg)(l.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-scala"},'object dedup {\n def apply(spark: SparkSession, in: DataFrame): DataFrame = {\n in.dropDuplicates(List("tran_id"))\n }\n}\n')))),(0,r.yg)("hr",null),(0,r.yg)("h3",{id:"rows-to-keep---first"},"Rows to keep - ",(0,r.yg)("inlineCode",{parentName:"h3"},"First")),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Example usage of Deduplicate - First",src:a(1184).A,width:"2000",height:"845"})),(0,r.yg)(o.A,{mdxType:"Tabs"},(0,r.yg)(l.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-py"},'def earliest_cust_order(spark: SparkSession, in0: DataFrame) -> DataFrame:\n return in0\\\n .withColumn(\n "row_number",\n row_number()\\\n .over(Window\\\n .partitionBy("customer_id")\\\n .orderBy(col("order_dt").asc())\n )\\\n .filter(col("row_number") == lit(1))\\\n .drop("row_number")\n'))),(0,r.yg)(l.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-scala"},'object earliest_cust_order {\n def apply(spark: SparkSession, in: DataFrame): DataFrame = {\n import org.apache.spark.sql.expressions.Window\n in.withColumn(\n "row_number",\n row_number().over(\n Window\n .partitionBy("customer_id")\n .orderBy(col("order_date").asc)\n )\n )\n .filter(col("row_number") === lit(1))\n .drop("row_number")\n }\n}\n')))),(0,r.yg)("hr",null),(0,r.yg)("h3",{id:"rows-to-keep---last"},"Rows to keep - ",(0,r.yg)("inlineCode",{parentName:"h3"},"Last")),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Example usage of Deduplicate - Last",src:a(9184).A,width:"3974",height:"1678"})),(0,r.yg)(o.A,{mdxType:"Tabs"},(0,r.yg)(l.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-py"},'def latest_cust_order(spark: SparkSession, in0: DataFrame) -> DataFrame:\n return in0\\\n .withColumn(\n "row_number",\n row_number()\\\n .over(Window\\\n .partitionBy("customer_id")\\\n .orderBy(col("order_dt").asc())\n )\\\n .withColumn(\n "count",\n count("*")\\\n .over(Window\\\n .partitionBy("customer_id")\n )\\\n .filter(col("row_number") == col("count"))\\\n .drop("row_number")\\\n .drop("count")\n'))),(0,r.yg)(l.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-scala"},'object latest_cust_order {\n def apply(spark: SparkSession, in: DataFrame): DataFrame = {\n import org.apache.spark.sql.expressions.Window\n in.withColumn(\n "row_number",\n row_number().over(\n Window\n .partitionBy("customer_id")\n .orderBy(col("order_date").asc)\n )\n )\n .withColumn(\n "count",\n count("*").over(\n Window\n .partitionBy("customer_id")\n )\n )\n .filter(col("row_number") === col("count"))\n .drop("row_number")\n .drop("count")\n }\n}\n')))),(0,r.yg)("h3",{id:"rows-to-keep---unique-only"},"Rows to keep - ",(0,r.yg)("inlineCode",{parentName:"h3"},"Unique Only")),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Example usage of Deduplicate - Unique",src:a(59209).A,width:"1906",height:"809"})),(0,r.yg)(o.A,{mdxType:"Tabs"},(0,r.yg)(l.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-py"},'def single_order_customers(spark: SparkSession, in0: DataFrame) -> DataFrame:\n return in0\\\n .withColumn(\n "count",\n count("*")\\\n .over(Window\\\n .partitionBy("customer_id")\n )\\\n .filter(col("count") == lit(1))\\\n .drop("count")\n'))),(0,r.yg)(l.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-scala"},'object single_order_customers {\n def apply(spark: SparkSession, in: DataFrame): DataFrame = {\n import org.apache.spark.sql.expressions.Window\n in.withColumn(\n "count",\n count("*").over(\n Window\n .partitionBy("customer_id")\n )\n )\n .filter(col("count") === lit(1))\n .drop("count")\n }\n\n}\n')))),(0,r.yg)("h3",{id:"rows-to-keep---distinct-rows"},"Rows to keep - ",(0,r.yg)("inlineCode",{parentName:"h3"},"Distinct Rows")),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Example usage of Deduplicate - Distinct",src:a(21524).A,width:"2042",height:"846"})),(0,r.yg)(o.A,{mdxType:"Tabs"},(0,r.yg)(l.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-py"},"def single_order_customers(spark: SparkSession, in0: DataFrame) -> DataFrame:\n return in0.distinct()\n"))),(0,r.yg)(l.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-scala"},"object single_order_customers {\n def apply(spark: SparkSession, in: DataFrame): DataFrame = {\n in.distinct()\n }\n\n}\n")))))}g.isMDXComponent=!0},21524:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/dedup_eg_distinct-ce4df273ebe6eccd4b71e44f1fd66777.png"},1184:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/dedup_eg_first-332e45b4e9c2056f28e516fab0a25776.png"},9184:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/dedup_eg_last-bb06dca08a11dc0d78e142cddf134ea6.png"},59209:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/dedup_eg_unique-1265d14fa8c45ec25a4bdd2e874deb6b.png"},12627:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/deduplicate_eg_1-31f75cb0d1229ebd70c3aa9de7c45f13.png"}}]); \ No newline at end of file diff --git a/assets/js/fec9a08a.1405b60a.js b/assets/js/fec9a08a.f73987e7.js similarity index 61% rename from assets/js/fec9a08a.1405b60a.js rename to assets/js/fec9a08a.f73987e7.js index cdd74070f1..95dca4e9fc 100644 --- a/assets/js/fec9a08a.1405b60a.js +++ b/assets/js/fec9a08a.f73987e7.js @@ -1 +1 @@ -"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[31902],{15680:(e,t,a)=>{a.d(t,{xA:()=>u,yg:()=>g});var n=a(96540);function r(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function l(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,n)}return a}function i(e){for(var t=1;t=0||(r[a]=e[a]);return r}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(r[a]=e[a])}return r}var s=n.createContext({}),p=function(e){var t=n.useContext(s),a=t;return e&&(a="function"==typeof e?e(t):i(i({},t),e)),a},u=function(e){var t=p(e.components);return n.createElement(s.Provider,{value:t},e.children)},c="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},d=n.forwardRef((function(e,t){var a=e.components,r=e.mdxType,l=e.originalType,s=e.parentName,u=o(e,["components","mdxType","originalType","parentName"]),c=p(a),d=r,g=c["".concat(s,".").concat(d)]||c[d]||m[d]||l;return a?n.createElement(g,i(i({ref:t},u),{},{components:a})):n.createElement(g,i({ref:t},u))}));function g(e,t){var a=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var l=a.length,i=new Array(l);i[0]=d;var o={};for(var s in t)hasOwnProperty.call(t,s)&&(o[s]=t[s]);o.originalType=e,o[c]="string"==typeof e?e:r,i[1]=o;for(var p=2;p{a.d(t,{A:()=>i});var n=a(96540),r=a(20053);const l={tabItem:"tabItem_Ymn6"};function i(e){let{children:t,hidden:a,className:i}=e;return n.createElement("div",{role:"tabpanel",className:(0,r.A)(l.tabItem,i),hidden:a},t)}},11470:(e,t,a)=>{a.d(t,{A:()=>A});var n=a(58168),r=a(96540),l=a(20053),i=a(23104),o=a(56347),s=a(57485),p=a(31682),u=a(89466);function c(e){return function(e){return r.Children.map(e,(e=>{if(!e||(0,r.isValidElement)(e)&&function(e){const{props:t}=e;return!!t&&"object"==typeof t&&"value"in t}(e))return e;throw new Error(`Docusaurus error: Bad child <${"string"==typeof e.type?e.type:e.type.name}>: all children of the component should be , and every should have a unique "value" prop.`)}))?.filter(Boolean)??[]}(e).map((e=>{let{props:{value:t,label:a,attributes:n,default:r}}=e;return{value:t,label:a,attributes:n,default:r}}))}function m(e){const{values:t,children:a}=e;return(0,r.useMemo)((()=>{const e=t??c(a);return function(e){const t=(0,p.X)(e,((e,t)=>e.value===t.value));if(t.length>0)throw new Error(`Docusaurus error: Duplicate values "${t.map((e=>e.value)).join(", ")}" found in . Every value needs to be unique.`)}(e),e}),[t,a])}function d(e){let{value:t,tabValues:a}=e;return a.some((e=>e.value===t))}function g(e){let{queryString:t=!1,groupId:a}=e;const n=(0,o.W6)(),l=function(e){let{queryString:t=!1,groupId:a}=e;if("string"==typeof t)return t;if(!1===t)return null;if(!0===t&&!a)throw new Error('Docusaurus error: The component groupId prop is required if queryString=true, because this value is used as the search param name. You can also provide an explicit value such as queryString="my-search-param".');return a??null}({queryString:t,groupId:a});return[(0,s.aZ)(l),(0,r.useCallback)((e=>{if(!l)return;const t=new URLSearchParams(n.location.search);t.set(l,e),n.replace({...n.location,search:t.toString()})}),[l,n])]}function y(e){const{defaultValue:t,queryString:a=!1,groupId:n}=e,l=m(e),[i,o]=(0,r.useState)((()=>function(e){let{defaultValue:t,tabValues:a}=e;if(0===a.length)throw new Error("Docusaurus error: the component requires at least one children component");if(t){if(!d({value:t,tabValues:a}))throw new Error(`Docusaurus error: The has a defaultValue "${t}" but none of its children has the corresponding value. Available values are: ${a.map((e=>e.value)).join(", ")}. If you intend to show no default tab, use defaultValue={null} instead.`);return t}const n=a.find((e=>e.default))??a[0];if(!n)throw new Error("Unexpected error: 0 tabValues");return n.value}({defaultValue:t,tabValues:l}))),[s,p]=g({queryString:a,groupId:n}),[c,y]=function(e){let{groupId:t}=e;const a=function(e){return e?`docusaurus.tab.${e}`:null}(t),[n,l]=(0,u.Dv)(a);return[n,(0,r.useCallback)((e=>{a&&l.set(e)}),[a,l])]}({groupId:n}),f=(()=>{const e=s??c;return d({value:e,tabValues:l})?e:null})();(0,r.useLayoutEffect)((()=>{f&&o(f)}),[f]);return{selectedValue:i,selectValue:(0,r.useCallback)((e=>{if(!d({value:e,tabValues:l}))throw new Error(`Can't select invalid tab value=${e}`);o(e),p(e),y(e)}),[p,y,l]),tabValues:l}}var f=a(92303);const b={tabList:"tabList__CuJ",tabItem:"tabItem_LNqP"};function h(e){let{className:t,block:a,selectedValue:o,selectValue:s,tabValues:p}=e;const u=[],{blockElementScrollPositionUntilNextRender:c}=(0,i.a_)(),m=e=>{const t=e.currentTarget,a=u.indexOf(t),n=p[a].value;n!==o&&(c(t),s(n))},d=e=>{let t=null;switch(e.key){case"Enter":m(e);break;case"ArrowRight":{const a=u.indexOf(e.currentTarget)+1;t=u[a]??u[0];break}case"ArrowLeft":{const a=u.indexOf(e.currentTarget)-1;t=u[a]??u[u.length-1];break}}t?.focus()};return r.createElement("ul",{role:"tablist","aria-orientation":"horizontal",className:(0,l.A)("tabs",{"tabs--block":a},t)},p.map((e=>{let{value:t,label:a,attributes:i}=e;return r.createElement("li",(0,n.A)({role:"tab",tabIndex:o===t?0:-1,"aria-selected":o===t,key:t,ref:e=>u.push(e),onKeyDown:d,onClick:m},i,{className:(0,l.A)("tabs__item",b.tabItem,i?.className,{"tabs__item--active":o===t})}),a??t)})))}function v(e){let{lazy:t,children:a,selectedValue:n}=e;const l=(Array.isArray(a)?a:[a]).filter(Boolean);if(t){const e=l.find((e=>e.props.value===n));return e?(0,r.cloneElement)(e,{className:"margin-top--md"}):null}return r.createElement("div",{className:"margin-top--md"},l.map(((e,t)=>(0,r.cloneElement)(e,{key:t,hidden:e.props.value!==n}))))}function N(e){const t=y(e);return r.createElement("div",{className:(0,l.A)("tabs-container",b.tabList)},r.createElement(h,(0,n.A)({},e,t)),r.createElement(v,(0,n.A)({},e,t)))}function A(e){const t=(0,f.A)();return r.createElement(N,(0,n.A)({key:String(t)},e))}},73330:(e,t,a)=>{a.r(t),a.d(t,{assets:()=>u,contentTitle:()=>s,default:()=>g,frontMatter:()=>o,metadata:()=>p,toc:()=>c});var n=a(58168),r=(a(96540),a(15680)),l=a(11470),i=a(19365);const o={sidebar_position:8,title:"SetOperation",id:"set-operation",description:"Union, Intersect and Difference",tags:["gems","set","union","intersect","difference"]},s=void 0,p={unversionedId:"Spark/gems/transform/set-operation",id:"Spark/gems/transform/set-operation",title:"SetOperation",description:"Union, Intersect and Difference",source:"@site/docs/Spark/gems/transform/set-operation.md",sourceDirName:"Spark/gems/transform",slug:"/Spark/gems/transform/set-operation",permalink:"/Spark/gems/transform/set-operation",draft:!1,tags:[{label:"gems",permalink:"/tags/gems"},{label:"set",permalink:"/tags/set"},{label:"union",permalink:"/tags/union"},{label:"intersect",permalink:"/tags/intersect"},{label:"difference",permalink:"/tags/difference"}],version:"current",sidebarPosition:8,frontMatter:{sidebar_position:8,title:"SetOperation",id:"set-operation",description:"Union, Intersect and Difference",tags:["gems","set","union","intersect","difference"]},sidebar:"defaultSidebar",previous:{title:"Deduplicate",permalink:"/Spark/gems/transform/deduplicate"},next:{title:"WindowFunction",permalink:"/Spark/gems/transform/window-function"}},u={},c=[{value:"Parameters",id:"parameters",level:3},{value:"Examples",id:"examples",level:3},{value:"Operation Type - Union",id:"operation-type---union",level:4},{value:"Operation Type - Intersect All",id:"operation-type---intersect-all",level:4},{value:"Operation Type - Except All",id:"operation-type---except-all",level:4}],m={toc:c},d="wrapper";function g(e){let{components:t,...o}=e;return(0,r.yg)(d,(0,n.A)({},m,o,{components:t,mdxType:"MDXLayout"}),(0,r.yg)("h3",null,(0,r.yg)("span",{class:"badge rounded-pill text-bg-light"},"Spark Gem")),(0,r.yg)("p",null,"Use the SetOperation Gem to perform addition or subtraction of rows from DataFrames with identical schemas and different data."),(0,r.yg)("h3",{id:"parameters"},"Parameters"),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:"left"},"Parameter"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Description"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Required"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"DataFrame 1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"First input DataFrame"),(0,r.yg)("td",{parentName:"tr",align:"left"},"True")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"DataFrame 2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Second input DataFrame"),(0,r.yg)("td",{parentName:"tr",align:"left"},"True")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"DataFrame N"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Nth input DataFrame"),(0,r.yg)("td",{parentName:"tr",align:"left"},"False")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Operation type"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Operation to perform",(0,r.yg)("br",null),"- ",(0,r.yg)("inlineCode",{parentName:"td"},"Union"),": Returns a DataFrame containing rows in any one of the input DataFrames, while preserving duplicates.",(0,r.yg)("br",null),"- ",(0,r.yg)("inlineCode",{parentName:"td"},"Intersect All"),": Returns a DataFrame containing rows in all of the input DataFrames, while preserving duplicates. ",(0,r.yg)("br",null),"- ",(0,r.yg)("inlineCode",{parentName:"td"},"Except All"),": Returns a DataFrames containing rows in the first DataFrame, but not in the other DataFrames, while preserving duplicates."),(0,r.yg)("td",{parentName:"tr",align:"left"},"True")))),(0,r.yg)("admonition",{type:"info"},(0,r.yg)("p",{parentName:"admonition"},"To add more input DataFrames, simply click ",(0,r.yg)("inlineCode",{parentName:"p"},"+")," icon on the left sidebar\n",(0,r.yg)("img",{alt:"Set Operation - Add input dataframe",src:a(45740).A,width:"556",height:"136"}))),(0,r.yg)("h3",{id:"examples"},"Examples"),(0,r.yg)("hr",null),(0,r.yg)("h4",{id:"operation-type---union"},"Operation Type - ",(0,r.yg)("inlineCode",{parentName:"h4"},"Union")),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Example usage of Set Operation - Union",src:a(65864).A,width:"1798",height:"904"})),(0,r.yg)(l.A,{mdxType:"Tabs"},(0,r.yg)(i.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-py"},"def union(spark: SparkSession, in0: DataFrame, in1: DataFrame, ) -> DataFrame:\n return in0.unionAll(in1)\n"))),(0,r.yg)(i.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-scala"},"object union {\n def apply(spark: SparkSession, in0: DataFrame, in1: DataFrame): DataFrame =\n in0.unionAll(in1)\n}\n")))),(0,r.yg)("hr",null),(0,r.yg)("h4",{id:"operation-type---intersect-all"},"Operation Type - ",(0,r.yg)("inlineCode",{parentName:"h4"},"Intersect All")),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Example usage of Set Operation - Intersect All",src:a(62272).A,width:"1822",height:"868"})),(0,r.yg)(l.A,{mdxType:"Tabs"},(0,r.yg)(i.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-py"},"def intersectAll(spark: SparkSession, in0: DataFrame, in1: DataFrame, ) -> DataFrame:\n return in0.intersectAll(in1)\n"))),(0,r.yg)(i.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-scala"},"object intersectAll {\n def apply(spark: SparkSession, in0: DataFrame, in1: DataFrame): DataFrame =\n in0.intersectAll(in1)\n}\n")))),(0,r.yg)("hr",null),(0,r.yg)("h4",{id:"operation-type---except-all"},"Operation Type - ",(0,r.yg)("inlineCode",{parentName:"h4"},"Except All")),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Example usage of Set Operation - Except All",src:a(86473).A,width:"1812",height:"783"})),(0,r.yg)(l.A,{mdxType:"Tabs"},(0,r.yg)(i.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-py"},"def exceptAll(spark: SparkSession, in0: DataFrame, in1: DataFrame, ) -> DataFrame:\n return in0.exceptAll(in1)\n"))),(0,r.yg)(i.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-scala"},"object exceptAll {\n def apply(spark: SparkSession, in0: DataFrame, in1: DataFrame): DataFrame =\n in0.exceptAll(in1)\n}\n")))))}g.isMDXComponent=!0},45740:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/set_add_inputs-bdf7b6691fc473ea1eee60161a5b9786.png"},65864:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/set_eg_1-46701e21c8cebc491888db3d556e8345.png"},62272:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/set_eg_2-fe430fbd13a44d729c0c0ef00e7ca2d9.png"},86473:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/set_eg_3-5ad43a57037f0e1ada3b964fcf2934a7.png"}}]); \ No newline at end of file +"use strict";(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[31902],{15680:(e,t,a)=>{a.d(t,{xA:()=>u,yg:()=>g});var n=a(96540);function r(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function l(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,n)}return a}function i(e){for(var t=1;t=0||(r[a]=e[a]);return r}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(r[a]=e[a])}return r}var s=n.createContext({}),p=function(e){var t=n.useContext(s),a=t;return e&&(a="function"==typeof e?e(t):i(i({},t),e)),a},u=function(e){var t=p(e.components);return n.createElement(s.Provider,{value:t},e.children)},c="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},d=n.forwardRef((function(e,t){var a=e.components,r=e.mdxType,l=e.originalType,s=e.parentName,u=o(e,["components","mdxType","originalType","parentName"]),c=p(a),d=r,g=c["".concat(s,".").concat(d)]||c[d]||m[d]||l;return a?n.createElement(g,i(i({ref:t},u),{},{components:a})):n.createElement(g,i({ref:t},u))}));function g(e,t){var a=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var l=a.length,i=new Array(l);i[0]=d;var o={};for(var s in t)hasOwnProperty.call(t,s)&&(o[s]=t[s]);o.originalType=e,o[c]="string"==typeof e?e:r,i[1]=o;for(var p=2;p{a.d(t,{A:()=>i});var n=a(96540),r=a(20053);const l={tabItem:"tabItem_Ymn6"};function i(e){let{children:t,hidden:a,className:i}=e;return n.createElement("div",{role:"tabpanel",className:(0,r.A)(l.tabItem,i),hidden:a},t)}},11470:(e,t,a)=>{a.d(t,{A:()=>A});var n=a(58168),r=a(96540),l=a(20053),i=a(23104),o=a(56347),s=a(57485),p=a(31682),u=a(89466);function c(e){return function(e){return r.Children.map(e,(e=>{if(!e||(0,r.isValidElement)(e)&&function(e){const{props:t}=e;return!!t&&"object"==typeof t&&"value"in t}(e))return e;throw new Error(`Docusaurus error: Bad child <${"string"==typeof e.type?e.type:e.type.name}>: all children of the component should be , and every should have a unique "value" prop.`)}))?.filter(Boolean)??[]}(e).map((e=>{let{props:{value:t,label:a,attributes:n,default:r}}=e;return{value:t,label:a,attributes:n,default:r}}))}function m(e){const{values:t,children:a}=e;return(0,r.useMemo)((()=>{const e=t??c(a);return function(e){const t=(0,p.X)(e,((e,t)=>e.value===t.value));if(t.length>0)throw new Error(`Docusaurus error: Duplicate values "${t.map((e=>e.value)).join(", ")}" found in . Every value needs to be unique.`)}(e),e}),[t,a])}function d(e){let{value:t,tabValues:a}=e;return a.some((e=>e.value===t))}function g(e){let{queryString:t=!1,groupId:a}=e;const n=(0,o.W6)(),l=function(e){let{queryString:t=!1,groupId:a}=e;if("string"==typeof t)return t;if(!1===t)return null;if(!0===t&&!a)throw new Error('Docusaurus error: The component groupId prop is required if queryString=true, because this value is used as the search param name. You can also provide an explicit value such as queryString="my-search-param".');return a??null}({queryString:t,groupId:a});return[(0,s.aZ)(l),(0,r.useCallback)((e=>{if(!l)return;const t=new URLSearchParams(n.location.search);t.set(l,e),n.replace({...n.location,search:t.toString()})}),[l,n])]}function y(e){const{defaultValue:t,queryString:a=!1,groupId:n}=e,l=m(e),[i,o]=(0,r.useState)((()=>function(e){let{defaultValue:t,tabValues:a}=e;if(0===a.length)throw new Error("Docusaurus error: the component requires at least one children component");if(t){if(!d({value:t,tabValues:a}))throw new Error(`Docusaurus error: The has a defaultValue "${t}" but none of its children has the corresponding value. Available values are: ${a.map((e=>e.value)).join(", ")}. If you intend to show no default tab, use defaultValue={null} instead.`);return t}const n=a.find((e=>e.default))??a[0];if(!n)throw new Error("Unexpected error: 0 tabValues");return n.value}({defaultValue:t,tabValues:l}))),[s,p]=g({queryString:a,groupId:n}),[c,y]=function(e){let{groupId:t}=e;const a=function(e){return e?`docusaurus.tab.${e}`:null}(t),[n,l]=(0,u.Dv)(a);return[n,(0,r.useCallback)((e=>{a&&l.set(e)}),[a,l])]}({groupId:n}),f=(()=>{const e=s??c;return d({value:e,tabValues:l})?e:null})();(0,r.useLayoutEffect)((()=>{f&&o(f)}),[f]);return{selectedValue:i,selectValue:(0,r.useCallback)((e=>{if(!d({value:e,tabValues:l}))throw new Error(`Can't select invalid tab value=${e}`);o(e),p(e),y(e)}),[p,y,l]),tabValues:l}}var f=a(92303);const b={tabList:"tabList__CuJ",tabItem:"tabItem_LNqP"};function h(e){let{className:t,block:a,selectedValue:o,selectValue:s,tabValues:p}=e;const u=[],{blockElementScrollPositionUntilNextRender:c}=(0,i.a_)(),m=e=>{const t=e.currentTarget,a=u.indexOf(t),n=p[a].value;n!==o&&(c(t),s(n))},d=e=>{let t=null;switch(e.key){case"Enter":m(e);break;case"ArrowRight":{const a=u.indexOf(e.currentTarget)+1;t=u[a]??u[0];break}case"ArrowLeft":{const a=u.indexOf(e.currentTarget)-1;t=u[a]??u[u.length-1];break}}t?.focus()};return r.createElement("ul",{role:"tablist","aria-orientation":"horizontal",className:(0,l.A)("tabs",{"tabs--block":a},t)},p.map((e=>{let{value:t,label:a,attributes:i}=e;return r.createElement("li",(0,n.A)({role:"tab",tabIndex:o===t?0:-1,"aria-selected":o===t,key:t,ref:e=>u.push(e),onKeyDown:d,onClick:m},i,{className:(0,l.A)("tabs__item",b.tabItem,i?.className,{"tabs__item--active":o===t})}),a??t)})))}function v(e){let{lazy:t,children:a,selectedValue:n}=e;const l=(Array.isArray(a)?a:[a]).filter(Boolean);if(t){const e=l.find((e=>e.props.value===n));return e?(0,r.cloneElement)(e,{className:"margin-top--md"}):null}return r.createElement("div",{className:"margin-top--md"},l.map(((e,t)=>(0,r.cloneElement)(e,{key:t,hidden:e.props.value!==n}))))}function N(e){const t=y(e);return r.createElement("div",{className:(0,l.A)("tabs-container",b.tabList)},r.createElement(h,(0,n.A)({},e,t)),r.createElement(v,(0,n.A)({},e,t)))}function A(e){const t=(0,f.A)();return r.createElement(N,(0,n.A)({key:String(t)},e))}},73330:(e,t,a)=>{a.r(t),a.d(t,{assets:()=>u,contentTitle:()=>s,default:()=>g,frontMatter:()=>o,metadata:()=>p,toc:()=>c});var n=a(58168),r=(a(96540),a(15680)),l=a(11470),i=a(19365);const o={sidebar_position:8,title:"SetOperation",id:"set-operation",description:"Union, Intersect and Difference",tags:["gems","set","union","intersect","difference"]},s=void 0,p={unversionedId:"Spark/gems/transform/set-operation",id:"Spark/gems/transform/set-operation",title:"SetOperation",description:"Union, Intersect and Difference",source:"@site/docs/Spark/gems/transform/set-operation.md",sourceDirName:"Spark/gems/transform",slug:"/Spark/gems/transform/set-operation",permalink:"/Spark/gems/transform/set-operation",draft:!1,tags:[{label:"gems",permalink:"/tags/gems"},{label:"set",permalink:"/tags/set"},{label:"union",permalink:"/tags/union"},{label:"intersect",permalink:"/tags/intersect"},{label:"difference",permalink:"/tags/difference"}],version:"current",sidebarPosition:8,frontMatter:{sidebar_position:8,title:"SetOperation",id:"set-operation",description:"Union, Intersect and Difference",tags:["gems","set","union","intersect","difference"]},sidebar:"defaultSidebar",previous:{title:"Deduplicate",permalink:"/Spark/gems/transform/deduplicate"},next:{title:"WindowFunction",permalink:"/Spark/gems/transform/window-function"}},u={},c=[{value:"Parameters",id:"parameters",level:3},{value:"Examples",id:"examples",level:3},{value:"Operation Type - Union",id:"operation-type---union",level:4},{value:"Operation Type - Intersect All",id:"operation-type---intersect-all",level:4},{value:"Operation Type - Except All",id:"operation-type---except-all",level:4}],m={toc:c},d="wrapper";function g(e){let{components:t,...o}=e;return(0,r.yg)(d,(0,n.A)({},m,o,{components:t,mdxType:"MDXLayout"}),(0,r.yg)("h3",null,(0,r.yg)("span",{class:"badge"},"Spark Gem")),(0,r.yg)("p",null,"Use the SetOperation Gem to perform addition or subtraction of rows from DataFrames with identical schemas and different data."),(0,r.yg)("h3",{id:"parameters"},"Parameters"),(0,r.yg)("table",null,(0,r.yg)("thead",{parentName:"table"},(0,r.yg)("tr",{parentName:"thead"},(0,r.yg)("th",{parentName:"tr",align:"left"},"Parameter"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Description"),(0,r.yg)("th",{parentName:"tr",align:"left"},"Required"))),(0,r.yg)("tbody",{parentName:"table"},(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"DataFrame 1"),(0,r.yg)("td",{parentName:"tr",align:"left"},"First input DataFrame"),(0,r.yg)("td",{parentName:"tr",align:"left"},"True")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"DataFrame 2"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Second input DataFrame"),(0,r.yg)("td",{parentName:"tr",align:"left"},"True")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"DataFrame N"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Nth input DataFrame"),(0,r.yg)("td",{parentName:"tr",align:"left"},"False")),(0,r.yg)("tr",{parentName:"tbody"},(0,r.yg)("td",{parentName:"tr",align:"left"},"Operation type"),(0,r.yg)("td",{parentName:"tr",align:"left"},"Operation to perform",(0,r.yg)("br",null),"- ",(0,r.yg)("inlineCode",{parentName:"td"},"Union"),": Returns a DataFrame containing rows in any one of the input DataFrames, while preserving duplicates.",(0,r.yg)("br",null),"- ",(0,r.yg)("inlineCode",{parentName:"td"},"Intersect All"),": Returns a DataFrame containing rows in all of the input DataFrames, while preserving duplicates. ",(0,r.yg)("br",null),"- ",(0,r.yg)("inlineCode",{parentName:"td"},"Except All"),": Returns a DataFrames containing rows in the first DataFrame, but not in the other DataFrames, while preserving duplicates."),(0,r.yg)("td",{parentName:"tr",align:"left"},"True")))),(0,r.yg)("admonition",{type:"info"},(0,r.yg)("p",{parentName:"admonition"},"To add more input DataFrames, simply click ",(0,r.yg)("inlineCode",{parentName:"p"},"+")," icon on the left sidebar\n",(0,r.yg)("img",{alt:"Set Operation - Add input dataframe",src:a(45740).A,width:"556",height:"136"}))),(0,r.yg)("h3",{id:"examples"},"Examples"),(0,r.yg)("hr",null),(0,r.yg)("h4",{id:"operation-type---union"},"Operation Type - ",(0,r.yg)("inlineCode",{parentName:"h4"},"Union")),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Example usage of Set Operation - Union",src:a(65864).A,width:"1798",height:"904"})),(0,r.yg)(l.A,{mdxType:"Tabs"},(0,r.yg)(i.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-py"},"def union(spark: SparkSession, in0: DataFrame, in1: DataFrame, ) -> DataFrame:\n return in0.unionAll(in1)\n"))),(0,r.yg)(i.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-scala"},"object union {\n def apply(spark: SparkSession, in0: DataFrame, in1: DataFrame): DataFrame =\n in0.unionAll(in1)\n}\n")))),(0,r.yg)("hr",null),(0,r.yg)("h4",{id:"operation-type---intersect-all"},"Operation Type - ",(0,r.yg)("inlineCode",{parentName:"h4"},"Intersect All")),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Example usage of Set Operation - Intersect All",src:a(62272).A,width:"1822",height:"868"})),(0,r.yg)(l.A,{mdxType:"Tabs"},(0,r.yg)(i.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-py"},"def intersectAll(spark: SparkSession, in0: DataFrame, in1: DataFrame, ) -> DataFrame:\n return in0.intersectAll(in1)\n"))),(0,r.yg)(i.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-scala"},"object intersectAll {\n def apply(spark: SparkSession, in0: DataFrame, in1: DataFrame): DataFrame =\n in0.intersectAll(in1)\n}\n")))),(0,r.yg)("hr",null),(0,r.yg)("h4",{id:"operation-type---except-all"},"Operation Type - ",(0,r.yg)("inlineCode",{parentName:"h4"},"Except All")),(0,r.yg)("p",null,(0,r.yg)("img",{alt:"Example usage of Set Operation - Except All",src:a(86473).A,width:"1812",height:"783"})),(0,r.yg)(l.A,{mdxType:"Tabs"},(0,r.yg)(i.A,{value:"py",label:"Python",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-py"},"def exceptAll(spark: SparkSession, in0: DataFrame, in1: DataFrame, ) -> DataFrame:\n return in0.exceptAll(in1)\n"))),(0,r.yg)(i.A,{value:"scala",label:"Scala",mdxType:"TabItem"},(0,r.yg)("pre",null,(0,r.yg)("code",{parentName:"pre",className:"language-scala"},"object exceptAll {\n def apply(spark: SparkSession, in0: DataFrame, in1: DataFrame): DataFrame =\n in0.exceptAll(in1)\n}\n")))))}g.isMDXComponent=!0},45740:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/set_add_inputs-bdf7b6691fc473ea1eee60161a5b9786.png"},65864:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/set_eg_1-46701e21c8cebc491888db3d556e8345.png"},62272:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/set_eg_2-fe430fbd13a44d729c0c0ef00e7ca2d9.png"},86473:(e,t,a)=>{a.d(t,{A:()=>n});const n=a.p+"assets/images/set_eg_3-5ad43a57037f0e1ada3b964fcf2934a7.png"}}]); \ No newline at end of file diff --git a/assets/js/main.d1d4b5f9.js b/assets/js/main.5871d47d.js similarity index 88% rename from assets/js/main.d1d4b5f9.js rename to assets/js/main.5871d47d.js index a2c9662b63..fb21594224 100644 --- a/assets/js/main.d1d4b5f9.js +++ b/assets/js/main.5871d47d.js @@ -1,2 +1,2 @@ -/*! For license information please see main.d1d4b5f9.js.LICENSE.txt */ -(self.webpackChunkdocs_4=self.webpackChunkdocs_4||[]).push([[38792],{89188:(e,t,a)=>{"use strict";a.d(t,{W:()=>r});var n=a(96540);function r(){return n.createElement("svg",{width:"20",height:"20",className:"DocSearch-Search-Icon",viewBox:"0 0 20 20","aria-hidden":"true"},n.createElement("path",{d:"M14.386 14.386l4.0877 4.0877-4.0877-4.0877c-2.9418 2.9419-7.7115 2.9419-10.6533 0-2.9419-2.9418-2.9419-7.7115 0-10.6533 2.9418-2.9419 7.7115-2.9419 10.6533 0 2.9419 2.9418 2.9419 7.7115 0 10.6533z",stroke:"currentColor",fill:"none",fillRule:"evenodd",strokeLinecap:"round",strokeLinejoin:"round"}))}},35947:(e,t,a)=>{"use strict";a.d(t,{A:()=>f});var n=a(96540),r=a(58168),o=a(53259),i=a.n(o),s=a(84054);const c={"01e25001":[()=>a.e(36081).then(a.bind(a,30101)),"@site/docs/release_notes/version_chart/versions_support.md",30101],"0207d280":[()=>a.e(29324).then(a.bind(a,29411)),"@site/docs/Spark/gems/transform/flattenschema.md",29411],"025dfd68":[()=>Promise.all([a.e(71869),a.e(40134)]).then(a.bind(a,53719)),"@site/docs/package-hub/package-builder/gem-builder.md",53719],"02719045":[()=>a.e(85623).then(a.t.bind(a,89196,19)),"~docs/default/tag-tags-difference-d5f.json",89196],"02c38545":[()=>a.e(35299).then(a.t.bind(a,19120,19)),"~docs/default/tag-tags-coalesce-531.json",19120],"02cade15":[()=>a.e(74938).then(a.t.bind(a,74101,19)),"~docs/default/tag-tags-generate-886.json",74101],"03436fb1":[()=>a.e(4136).then(a.bind(a,43914)),"@site/docs/architecture/self-hosted/installation-helm/installation-helm.mdx",43914],"03f3436f":[()=>a.e(99350).then(a.t.bind(a,89010,19)),"~docs/default/tag-tags-openai-9ab.json",89010],"044007a9":[()=>a.e(82170).then(a.t.bind(a,91789,19)),"~docs/default/tag-tags-historical-runs-6bf.json",91789],"0591c636":[()=>a.e(57984).then(a.t.bind(a,3395,19)),"~docs/default/tag-tags-version-018.json",3395],"05945682":[()=>a.e(6030).then(a.t.bind(a,97652,19)),"~docs/default/tag-tags-functionality-f85.json",97652],"05a5cb4c":[()=>a.e(7844).then(a.t.bind(a,4079,19)),"~docs/default/tag-tags-fabric-341.json",4079],"06376234":[()=>a.e(11978).then(a.t.bind(a,41322,19)),"~docs/default/tag-tags-data-2dd.json",41322],"067f059f":[()=>Promise.all([a.e(71869),a.e(69986)]).then(a.bind(a,30924)),"@site/docs/Spark/gems/source-target/file/orc.md",30924],"07342fab":[()=>a.e(33892).then(a.t.bind(a,45866,19)),"~docs/default/tag-tags-groupby-ff2.json",45866],"0772ef59":[()=>a.e(89908).then(a.t.bind(a,73908,19)),"~docs/default/tag-tags-changelog-e85.json",73908],"07dd44c0":[()=>a.e(9449).then(a.t.bind(a,70218,19)),"~docs/default/tag-tags-text-1df.json",70218],"07e49c2d":[()=>a.e(48045).then(a.bind(a,82987)),"@site/docs/SQL/gems/subgraph/subgraph.md",82987],"07e6e491":[()=>a.e(38208).then(a.t.bind(a,62793,19)),"~docs/default/tag-tags-pipelines-ebc.json",62793],"0886272a":[()=>a.e(31331).then(a.t.bind(a,22540,19)),"~docs/default/tag-tags-redshift-717.json",22540],"08956b7c":[()=>a.e(64139).then(a.bind(a,56986)),"@site/src/pages/mdapi/index.md",56986],"08b7acd5":[()=>a.e(20767).then(a.bind(a,64914)),"@site/docs/Orchestration/alternative-schedulers.md",64914],"091a56e7":[()=>a.e(1101).then(a.bind(a,68837)),"@site/docs/metadata/pull-request-templates.md",68837],"0a0939c2":[()=>a.e(58772).then(a.t.bind(a,29467,19)),"~docs/default/tag-tags-embedding-06b.json",29467],"0af66f68":[()=>Promise.all([a.e(71869),a.e(3076)]).then(a.bind(a,94677)),"@site/docs/metadata/git/git.md",94677],"0afe2388":[()=>a.e(8032).then(a.t.bind(a,57629,19)),"~docs/default/tag-tags-scheduling-683.json",57629],"0b97ee01":[()=>a.e(98106).then(a.t.bind(a,20908,19)),"~docs/default/tag-tags-upload-681.json",20908],"0bfc0a96":[()=>a.e(5768).then(a.t.bind(a,48932,19)),"~docs/default/tag-tags-monitoring-45e.json",48932],"0c58dc48":[()=>a.e(69896).then(a.t.bind(a,90118,19)),"~docs/default/tag-tags-resolve-71d.json",90118],"0c94fcd5":[()=>Promise.all([a.e(71869),a.e(55127)]).then(a.bind(a,53015)),"@site/docs/Orchestration/airflow/setup/prophecy-managed/connections/connections.md",53015],"0d1d48e9":[()=>a.e(83926).then(a.t.bind(a,68078,19)),"~docs/default/tag-tags-seeds-709.json",68078],"0d663aab":[()=>Promise.all([a.e(71869),a.e(18454)]).then(a.bind(a,81579)),"@site/docs/architecture/deployment/deployment.md",81579],"0d69ada6":[()=>a.e(79814).then(a.bind(a,87298)),"@site/docs/architecture/self-hosted/authentication/security-settings.md",87298],"0d6eb03b":[()=>Promise.all([a.e(71869),a.e(44729)]).then(a.bind(a,98252)),"@site/docs/Spark/gems/source-target/catalog-table/catalog-table.md",98252],"0e0794f1":[()=>a.e(59053).then(a.t.bind(a,63671,19)),"~docs/default/tag-tags-teradata-57c.json",63671],"0e0ad2b2":[()=>a.e(19845).then(a.t.bind(a,33611,19)),"~docs/default/tag-tags-runtime-config-9c6.json",33611],"0e783820":[()=>a.e(76240).then(a.t.bind(a,20126,19)),"~docs/default/tag-tags-helm-6ac.json",20126],"0eeefd26":[()=>a.e(4130).then(a.t.bind(a,52669,19)),"~docs/default/tag-tags-repartition-082.json",52669],"0f4e5027":[()=>a.e(46690).then(a.t.bind(a,40773,19)),"~docs/default/tag-tags-installation-cbb.json",40773],"0f9ddd5e":[()=>a.e(79468).then(a.bind(a,83717)),"@site/docs/Spark/best-practices/use-dbx-secret.md",83717],"0fc340d5":[()=>a.e(78222).then(a.t.bind(a,46851,19)),"~docs/default/tag-tags-synase-514.json",46851],"113d94f9":[()=>a.e(95600).then(a.bind(a,42090)),"@site/docs/release_notes/2024/August_2024/new-ui-sql.md",42090],"115935af":[()=>a.e(18482).then(a.bind(a,14435)),"@site/docs/Spark/gems/source-target/warehouse/db2.md",14435],"11ee4258":[()=>a.e(57654).then(a.bind(a,25437)),"@site/docs/architecture/self-hosted/authentication/azuread-scim.md",25437],"11f9f2ab":[()=>a.e(13655).then(a.t.bind(a,65428,19)),"~docs/default/tag-tags-build-055.json",65428],"12868e5a":[()=>Promise.all([a.e(71869),a.e(5391)]).then(a.bind(a,40339)),"@site/docs/Spark/extensibility/extensibility.md",40339],"12a7552c":[()=>a.e(82045).then(a.bind(a,16821)),"@site/docs/Spark/gems/source-target/warehouse/teradata.md",16821],"12a7c48f":[()=>a.e(20745).then(a.bind(a,61299)),"@site/docs/getting-started/getting-started-with-low-code-spark.md",61299],"12a86512":[()=>a.e(73215).then(a.t.bind(a,42607,19)),"~docs/default/tag-tags-september-510.json",42607],"13b3561e":[()=>a.e(23581).then(a.bind(a,36555)),"@site/docs/Spark/gems/transform/data-cleansing.md",36555],"13b67e3b":[()=>a.e(74800).then(a.t.bind(a,36538,19)),"~docs/default/tag-tags-jenkins-756.json",36538],"13d5a781":[()=>a.e(38455).then(a.t.bind(a,71710,19)),"~docs/default/tag-tags-interim-f64.json",71710],"1458b78b":[()=>a.e(74640).then(a.t.bind(a,16126,19)),"~docs/default/tag-tags-continuous-integration-b0d.json",16126],"147f12f7":[()=>a.e(49311).then(a.t.bind(a,88779,19)),"~docs/default/tag-tags-rename-649.json",88779],"14ede2c4":[()=>a.e(39807).then(a.t.bind(a,7602,19)),"~docs/default/tag-tags-security-95f.json",7602],"150c05b5":[()=>a.e(22192).then(a.t.bind(a,79944,19)),"~docs/default/tag-tags-settings-2b7.json",79944],"1574f6a7":[()=>a.e(76432).then(a.t.bind(a,74093,19)),"~docs/default/tag-tags-dedupe-fec.json",74093],"159b5c15":[()=>a.e(62910).then(a.t.bind(a,63002,19)),"~docs/default/tag-tags-webinar-84b.json",63002],"160ba20f":[()=>a.e(26351).then(a.t.bind(a,90372,19)),"~docs/default/tag-tags-data-privacy-60e.json",90372],"170b731c":[()=>Promise.all([a.e(71869),a.e(64847)]).then(a.bind(a,14265)),"@site/docs/release_notes/release_notes.md",14265],17896441:[()=>Promise.all([a.e(71869),a.e(92663),a.e(18401)]).then(a.bind(a,10652)),"@theme/DocItem",10652],"17fe6c8f":[()=>a.e(6287).then(a.t.bind(a,4820,19)),"~docs/default/tag-tags-warehouse-based-65d.json",4820],"18efabfd":[()=>Promise.all([a.e(71869),a.e(56534)]).then(a.bind(a,50259)),"@site/docs/Spark/execution/execution.md",50259],"190d3bf5":[()=>Promise.all([a.e(71869),a.e(26373)]).then(a.bind(a,35951)),"@site/docs/Spark/gems/source-target/file/text.md",35951],"198b88ec":[()=>a.e(73187).then(a.t.bind(a,1784,19)),"~docs/default/tag-tags-limit-6ab.json",1784],"198c73ac":[()=>a.e(51500).then(a.t.bind(a,94481,19)),"~docs/default/tag-tags-audit-logs-4ff.json",94481],"1a24edc3":[()=>a.e(99314).then(a.t.bind(a,50434,19)),"~docs/default/tag-tags-github-actions-8e6.json",50434],"1a4e3797":[()=>Promise.all([a.e(71869),a.e(62138)]).then(a.bind(a,74604)),"@theme/SearchPage",74604],"1a6770ff":[()=>a.e(72321).then(a.bind(a,96582)),"@site/docs/release_notes/2024/feb2024.md",96582],"1ae18dac":[()=>a.e(4803).then(a.bind(a,90260)),"@site/docs/package-hub/package-builder/ShareableDatasets.md",90260],"1bba7e1a":[()=>a.e(44127).then(a.t.bind(a,59077,19)),"~docs/default/tag-tags-plib-e07.json",59077],"1be36493":[()=>a.e(84968).then(a.bind(a,55608)),"@site/docs/Orchestration/airflow/setup/prophecy-managed/limits-and-restrictions.md",55608],"1be78505":[()=>Promise.all([a.e(71869),a.e(88714)]).then(a.bind(a,50010)),"@theme/DocPage",50010],"1c2e9cbd":[()=>a.e(69842).then(a.bind(a,26323)),"@site/docs/Spark/fabrics/dataproc/dataproc-tips.md",26323],"1c44a3f7":[()=>a.e(19438).then(a.bind(a,38374)),"@site/docs/Spark/configuration/conditional-execution.md",38374],"1c8a4c5b":[()=>a.e(98584).then(a.bind(a,1042)),"@site/docs/release_notes/2024/may2024.md",1042],"1c8e7482":[()=>a.e(55093).then(a.bind(a,57673)),"@site/docs/architecture/self-hosted/configurations/configure-alerts.md",57673],"1c9c1a7e":[()=>a.e(23621).then(a.t.bind(a,73167,19)),"~docs/default/tag-tags-sandbox-88f.json",73167],"1cb4e630":[()=>a.e(84045).then(a.t.bind(a,15308,19)),"~docs/default/tag-tags-self-managed-4b4.json",15308],"1d1d32fe":[()=>Promise.all([a.e(71869),a.e(35559)]).then(a.bind(a,20904)),"@site/docs/package-hub/package-builder/package-builder.md",20904],"1d337d1d":[()=>a.e(82872).then(a.bind(a,8716)),"@site/docs/metadata/configure-audit-logging.md",8716],"1d545d0e":[()=>a.e(86429).then(a.t.bind(a,33084,19)),"~docs/default/tag-tags-expressions-e64.json",33084],"1d7b424d":[()=>a.e(73041).then(a.bind(a,87847)),"@site/docs/SQL/gems/joins.md",87847],"1dc43ddf":[()=>a.e(98730).then(a.t.bind(a,34047,19)),"~docs/default/tag-tags-enterprise-83a.json",34047],"1f391b9e":[()=>Promise.all([a.e(71869),a.e(92663),a.e(66061)]).then(a.bind(a,67973)),"@theme/MDXPage",67973],"1f5216c4":[()=>a.e(35767).then(a.t.bind(a,97723,19)),"~docs/default/tag-tags-transformation-6dd.json",97723],"1fe15682":[()=>a.e(32307).then(a.t.bind(a,40299,19)),"~docs/default/tag-tags-visual-2f5.json",40299],"21048a1d":[()=>a.e(30762).then(a.t.bind(a,8991,19)),"~docs/default/tag-tags-databricksworkflow-b9a.json",8991],"2118534e":[()=>a.e(52362).then(a.t.bind(a,31064,19)),"~docs/default/tag-tags-saml-b12.json",31064],"21afbb45":[()=>Promise.all([a.e(71869),a.e(36656)]).then(a.bind(a,23069)),"@site/docs/concepts/fabrics/fabrics.md",23069],22861438:[()=>Promise.all([a.e(71869),a.e(29407)]).then(a.bind(a,88490)),"@site/docs/Spark/gems/source-target/file/csv.md",88490],"22e0b6cb":[()=>a.e(18019).then(a.bind(a,50464)),"@site/docs/SQL/gems/datasources/upload-files.md",50464],"22e832a8":[()=>a.e(76592).then(a.bind(a,88974)),"@site/docs/concepts/fabrics/prophecy-libs.md",88974],"236df6ea":[()=>a.e(25522).then(a.t.bind(a,63135,19)),"~docs/default/tag-tags-concepts-ff2.json",63135],"23d439be":[()=>Promise.all([a.e(71869),a.e(62745)]).then(a.bind(a,42933)),"@site/docs/Spark/gems/transform/window-function.md",42933],"241eb839":[()=>a.e(6713).then(a.t.bind(a,91983,19)),"~docs/default/tag-tags-cte-755.json",91983],"242c8b4c":[()=>Promise.all([a.e(71869),a.e(40517)]).then(a.bind(a,6663)),"@site/docs/Spark/fabrics/emr.mdx",6663],"245fb99f":[()=>a.e(76166).then(a.bind(a,75261)),"@site/docs/Spark/pipeline-monitoring/use-pipeline-monitoring.md",75261],"247cc2d4":[()=>a.e(70224).then(a.t.bind(a,20797,19)),"~docs/default/tag-tags-bigquery-9fb.json",20797],"25a15c9f":[()=>a.e(24139).then(a.t.bind(a,70338,19)),"~docs/default/tag-tags-emr-721.json",70338],"26d47dc7":[()=>a.e(8124).then(a.bind(a,23481)),"@site/docs/Spark/execution/data-explorer.md",23481],"271365b4":[()=>a.e(7863).then(a.t.bind(a,16009,19)),"~docs/default/tag-tags-compatibility-751.json",16009],"284afd25":[()=>Promise.all([a.e(71869),a.e(87093)]).then(a.bind(a,54952)),"@site/docs/Spark/spark-streaming/streaming-sources-and-targets/streaming-sources-and-targets.md",54952],"296637e0":[()=>a.e(29269).then(a.bind(a,18509)),"@site/docs/tutorials/Orchestration/reliable-ci-cd.md",18509],"29664fbe":[()=>a.e(1785).then(a.t.bind(a,44472,19)),"~docs/default/tag-tags-streaming-84f.json",44472],"2ac0262c":[()=>a.e(30776).then(a.bind(a,63546)),"@site/docs/product-feature-matrix.md",63546],"2ac1502a":[()=>Promise.all([a.e(71869),a.e(91552)]).then(a.bind(a,85606)),"@site/docs/Spark/gems/custom/custom.md",85606],"2ae7c5a2":[()=>Promise.all([a.e(71869),a.e(39881)]).then(a.bind(a,42691)),"@site/docs/architecture/self-hosted/self-hosted.md",42691],"2b72b631":[()=>a.e(15444).then(a.t.bind(a,72211,19)),"~docs/default/tag-tags-secret-provider-fb5.json",72211],"2b7e62e1":[()=>a.e(38129).then(a.bind(a,83412)),"@site/docs/Spark/spark-streaming/streaming-sources-and-targets/streaming-warehouse-gem.md",83412],"2baf1f7c":[()=>a.e(71689).then(a.bind(a,10396)),"@site/docs/Orchestration/airflow/setup/prophecy-managed/connections/aws-connection.md",10396],"2c1b1cfa":[()=>Promise.all([a.e(71869),a.e(63004)]).then(a.bind(a,30098)),"@site/docs/Spark/gems/source-target/catalog-table/hive.md",30098],"2c801439":[()=>a.e(80661).then(a.t.bind(a,51156,19)),"~docs/default/tag-tags-partition-a4e.json",51156],"2c9d9dcc":[()=>a.e(75840).then(a.t.bind(a,74284,19)),"~docs/default/tag-tags-pinecone-98f.json",74284],"2cc1edb3":[()=>a.e(45692).then(a.t.bind(a,92531,19)),"~docs/default/tag-tags-june-9c9.json",92531],"2d535854":[()=>Promise.all([a.e(71869),a.e(95181)]).then(a.bind(a,66675)),"@site/docs/tutorials/Spark/excel.md",66675],"2de46ad6":[()=>Promise.all([a.e(71869),a.e(27303)]).then(a.bind(a,38948)),"@site/docs/Spark/gems/machine-learning/machine-learning.md",38948],"2f0dc9a2":[()=>a.e(33290).then(a.t.bind(a,11697,19)),"~docs/default/tag-tags-loop-81b.json",11697],"2f261630":[()=>a.e(95103).then(a.t.bind(a,48747,19)),"~docs/default/tag-tags-answer-41d.json",48747],"2f81bee2":[()=>a.e(92824).then(a.t.bind(a,89927,19)),"~docs/default/tag-tags-connect-2c6.json",89927],"2fcc74d2":[()=>a.e(41514).then(a.t.bind(a,66788,19)),"~docs/default/tag-tags-commit-384.json",66788],"30607bf6":[()=>a.e(21079).then(a.bind(a,21585)),"@site/docs/SQL/gems/datasources/datasources.md",21585],"30cf3d77":[()=>a.e(59582).then(a.t.bind(a,99470,19)),"~docs/default/tag-tags-source-e13.json",99470],"30d115e3":[()=>a.e(80593).then(a.bind(a,41482)),"@site/docs/SQL/development/target-models/write-options.md",41482],"32547e8c":[()=>a.e(87908).then(a.bind(a,3593)),"@site/docs/SQL/data-tests/use-project-tests.md",3593],"326b65c5":[()=>Promise.all([a.e(71869),a.e(10735)]).then(a.bind(a,94799)),"@site/docs/Spark/gems/join-split/join.md",94799],"3321a965":[()=>a.e(26699).then(a.bind(a,75250)),"@site/docs/SQL/development/target-models/type-and-format.md",75250],"332c99fa":[()=>Promise.all([a.e(71869),a.e(22777)]).then(a.bind(a,71361)),"@site/docs/Spark/gems/transform/limit.md",71361],"336012cf":[()=>a.e(10461).then(a.t.bind(a,19232,19)),"~docs/default/tag-tags-deployment-7e5.json",19232],"33bdf9dc":[()=>a.e(52839).then(a.t.bind(a,95280,19)),"~docs/default/tag-tags-functions-fe0.json",95280],"33d7ed5a":[()=>a.e(91919).then(a.bind(a,46289)),"@site/docs/Spark/spark-streaming/streaming-sources-and-targets/streaming-file-gem.md",46289],"33ec3803":[()=>Promise.all([a.e(71869),a.e(32627)]).then(a.bind(a,37695)),"@site/docs/Orchestration/airflow/airflow.md",37695],"3542d14b":[()=>a.e(11818).then(a.t.bind(a,71399,19)),"~docs/default/tag-tags-library-9c6.json",71399],"3720c009":[()=>Promise.all([a.e(71869),a.e(84787)]).then(a.bind(a,30876)),"@theme/DocTagsListPage",30876],"37c2ad0b":[()=>a.e(22271).then(a.t.bind(a,50024,19)),"~docs/default/tag-tags-upgrade-b71.json",50024],"3882e233":[()=>a.e(67863).then(a.t.bind(a,35983,19)),"~docs/default/tag-tags-fabrics-b18.json",35983],"38c61b12":[()=>a.e(65613).then(a.bind(a,6833)),"@site/docs/release_notes/version_chart/version_chart.md",6833],"3927d3d9":[()=>a.e(66292).then(a.t.bind(a,98402,19)),"~docs/default/tag-tags-tags-13a.json",98402],"39839fb8":[()=>Promise.all([a.e(71869),a.e(70423)]).then(a.bind(a,91669)),"@site/docs/deployment/deployment.md",91669],"3a6423c6":[()=>a.e(74943).then(a.t.bind(a,7224,19)),"~docs/default/tag-tags-how-to-0b2.json",7224],"3a6cc802":[()=>a.e(57873).then(a.bind(a,62426)),"@site/docs/getting-started/getting-started-with-low-code-sql.md",62426],"3b358076":[()=>a.e(70063).then(a.t.bind(a,53822,19)),"~docs/default/tag-tags-format-209.json",53822],"3b8a2e69":[()=>a.e(35957).then(a.t.bind(a,58448,19)),"~docs/default/tag-tags-restore-69e.json",58448],"3d83722c":[()=>a.e(44695).then(a.t.bind(a,57490,19)),"~docs/default/tag-tags-chatbot-364.json",57490],"3df96aa7":[()=>a.e(4602).then(a.bind(a,9053)),"@site/docs/Spark/extensibility/dependencies.md",9053],"3dfe6802":[()=>a.e(48189).then(a.t.bind(a,2510,19)),"~docs/default/tag-tags-smtp-d95.json",2510],"3f70bd78":[()=>a.e(66078).then(a.t.bind(a,86247,19)),"~docs/default/tag-tags-pull-requests-e86.json",86247],"3fc68db4":[()=>a.e(14481).then(a.bind(a,62164)),"@site/src/pages/prophecy-ir.md",62164],"4073213e":[()=>a.e(58130).then(a.t.bind(a,36221,19)),"~docs/default/tag-tags-pr-cf8.json",36221],"4095a46f":[()=>a.e(58354).then(a.bind(a,35587)),"@site/docs/release_notes/2023/july2023.md",35587],"41738d9f":[()=>a.e(34745).then(a.t.bind(a,13460,19)),"~docs/default/tag-tags-code-a1a.json",13460],"4237bab4":[()=>a.e(54110).then(a.t.bind(a,78749,19)),"~docs/default/tag-tags-fake-660.json",78749],"42eff174":[()=>a.e(53978).then(a.bind(a,65604)),"@site/docs/Spark/execution/execution-metrics-on-livy.md",65604],"43ab4b58":[()=>a.e(75736).then(a.bind(a,39945)),"@site/docs/Spark/tests.md",39945],"43d83672":[()=>a.e(41632).then(a.t.bind(a,57111,19)),"~docs/default/tag-tags-unpivot-974.json",57111],"4431fe44":[()=>Promise.all([a.e(71869),a.e(43232)]).then(a.bind(a,96762)),"@site/docs/SQL/execution/execution.md",96762],"447e739f":[()=>a.e(62864).then(a.t.bind(a,85785,19)),"~docs/default/tag-tags-merge-e17.json",85785],"45c1aab2":[()=>a.e(34536).then(a.t.bind(a,30161,19)),"~docs/default/tag-tags-email-e39.json",30161],"4601dfc2":[()=>a.e(25113).then(a.t.bind(a,51746,19)),"~docs/default/tag-tags-airflow-e27.json",51746],"463a2bb6":[()=>Promise.all([a.e(71869),a.e(12201)]).then(a.bind(a,72046)),"@site/docs/Spark/gems/transform/transform.md",72046],"46a9fbb4":[()=>a.e(2113).then(a.bind(a,33899)),"@site/docs/Spark/spark-streaming/spark-streaming.md",33899],"4723deab":[()=>a.e(95697).then(a.t.bind(a,4140,19)),"~docs/default/tag-tags-execution-ba9.json",4140],"4767af5f":[()=>a.e(29337).then(a.t.bind(a,21916,19)),"~docs/default/tag-tags-db-2-c26.json",21916],"47cf1bcd":[()=>Promise.all([a.e(71869),a.e(59811)]).then(a.bind(a,13365)),"@site/docs/Spark/gems/custom/sql-statement.md",13365],"47ef1a4e":[()=>a.e(3762).then(a.t.bind(a,10524,19)),"~docs/default/tag-tags-azure-982.json",10524],"48459a62":[()=>a.e(55228).then(a.t.bind(a,36484,19)),"~docs/default/tag-tags-project-7a5.json",36484],"4858d93d":[()=>a.e(33076).then(a.bind(a,86699)),"@site/docs/Spark/secret-management/using-secrets.md",86699],"486ce9f7":[()=>a.e(44281).then(a.bind(a,97430)),"@site/docs/Spark/gems/custom/script.md",97430],"497c9fbe":[()=>a.e(86370).then(a.t.bind(a,20068,19)),"~docs/default/tag-tags-reusable-c8d.json",20068],"498cb46f":[()=>Promise.all([a.e(71869),a.e(94503)]).then(a.bind(a,94489)),"@site/docs/metadata/metadata.md",94489],"49e281cf":[()=>a.e(86929).then(a.t.bind(a,44583,19)),"~docs/default/tag-tags-provider-ec6.json",44583],"49f26d76":[()=>a.e(33239).then(a.t.bind(a,50617,19)),"~docs/default/tag-tags-interactive-f90.json",50617],"4b1126a3":[()=>a.e(31731).then(a.t.bind(a,64917,19)),"~docs/default/tag-tags-guide-58d.json",64917],"4b970f2c":[()=>a.e(6774).then(a.t.bind(a,80402,19)),"~docs/default/tag-tags-aws-e06.json",80402],"4bfce9fc":[()=>a.e(11443).then(a.t.bind(a,86631,19)),"~docs/default/tag-tags-configurations-c43.json",86631],"4cae9b66":[()=>a.e(8755).then(a.t.bind(a,76168,19)),"~docs/default/tag-tags-transform-3eb.json",76168],"4ce6e96f":[()=>Promise.all([a.e(71869),a.e(47475)]).then(a.bind(a,28690)),"@site/docs/Spark/gems/transform/order-by.md",28690],"4d3c2eb9":[()=>a.e(17585).then(a.bind(a,87144)),"@site/docs/release_notes/2024/October_2024/October_2024.md",87144],"4d7692f7":[()=>a.e(26608).then(a.bind(a,14782)),"@site/docs/metadata/git/git-fork.md",14782],"4df01b2a":[()=>a.e(26443).then(a.t.bind(a,29995,19)),"~docs/default/tag-tags-capabilities-6ef.json",29995],"4e37ce00":[()=>a.e(72456).then(a.t.bind(a,22015,19)),"~docs/default/tag-tags-ascending-dda.json",22015],"4f12e139":[()=>Promise.all([a.e(71869),a.e(64810)]).then(a.bind(a,86773)),"@site/docs/SQL/development/visual-editor/visual-expression-builder/visual-expression-builder.md",86773],"4fd00004":[()=>a.e(4861).then(a.t.bind(a,75667,19)),"~docs/default/tag-tags-hashicorp-47c.json",75667],"506830f5":[()=>a.e(98508).then(a.bind(a,31922)),"@site/docs/release_notes/2023/feb2023.md",31922],"50ddc816":[()=>Promise.all([a.e(71869),a.e(50616)]).then(a.bind(a,62363)),"@site/docs/Spark/gems/machine-learning/ml-text-processing.md",62363],"510aee0b":[()=>a.e(59872).then(a.t.bind(a,73997,19)),"~docs/default/tag-tags-jobs-d87.json",73997],"523749b8":[()=>a.e(80863).then(a.t.bind(a,55177,19)),"~docs/default/tag-tags-search-b34.json",55177],"523b61ff":[()=>a.e(2790).then(a.bind(a,25461)),"@site/docs/metadata/metadata-connections.md",25461],"52507ae4":[()=>a.e(78074).then(a.t.bind(a,43734,19)),"~docs/default/tag-tags-salesforce-865.json",43734],"535bf742":[()=>a.e(96997).then(a.t.bind(a,17177,19)),"~docs/default/tag-tags-dependencies-0ce.json",17177],"54257f7c":[()=>a.e(39598).then(a.t.bind(a,30927,19)),"~docs/default/tag-tags-right-join-2f6.json",30927],"5428e127":[()=>a.e(65025).then(a.bind(a,94392)),"@site/docs/SQL/execution/data-explorer.md",94392],"550c2c4f":[()=>a.e(13729).then(a.bind(a,84923)),"@site/docs/Spark/gems/source-target/advanced/synthetic-data-generator/providers.md",84923],55156658:[()=>a.e(51592).then(a.t.bind(a,35792,19)),"~docs/default/tag-tags-lookup-6f6.json",35792],"551b107c":[()=>a.e(71769).then(a.bind(a,80433)),"@site/docs/Spark/secret-management/hashicorp-vault.md",80433],"55960ee5":[()=>a.e(35151).then(a.t.bind(a,30350,19)),"~docs/default/tags-list-current-prop-15a.json",30350],"55c5efd5":[()=>a.e(4101).then(a.t.bind(a,24996,19)),"~docs/default/tag-tags-chart-97f.json",24996],"55ce3018":[()=>a.e(10115).then(a.bind(a,91695)),"@site/docs/tutorials/Orchestration/multi-jobs-trigger.md",91695],"56a7a51d":[()=>a.e(51643).then(a.t.bind(a,71422,19)),"~docs/default/tag-tags-expression-builder-bff.json",71422],"56cfb456":[()=>a.e(89570).then(a.t.bind(a,22100,19)),"~docs/default/tag-tags-hints-9a6.json",22100],"5714fd1a":[()=>Promise.all([a.e(71869),a.e(21728)]).then(a.bind(a,10202)),"@site/docs/Spark/gems/source-target/advanced/lookup.md",10202],"57b763fd":[()=>Promise.all([a.e(71869),a.e(12945)]).then(a.bind(a,5797)),"@site/docs/Spark/Spark.md",5797],"5808065b":[()=>a.e(60015).then(a.bind(a,24801)),"@site/docs/Spark/secret-management/databricks-secrets.md",24801],"582be2f7":[()=>a.e(20120).then(a.t.bind(a,65389,19)),"~docs/default/tag-tags-development-9b9.json",65389],"59dfd706":[()=>a.e(61102).then(a.t.bind(a,82023,19)),"~docs/default/tag-tags-explode-fa1.json",82023],"5af0874d":[()=>a.e(25264).then(a.t.bind(a,2533,19)),"~docs/default/tag-tags-cicd-b7a.json",2533],"5b2eddc9":[()=>Promise.all([a.e(71869),a.e(71625)]).then(a.bind(a,70386)),"@site/docs/Spark/gems/transform/filter.md",70386],"5b3e4dee":[()=>a.e(36948).then(a.t.bind(a,1746,19)),"~docs/default/tag-tags-mongodb-7c4.json",1746],"5beb85dd":[()=>Promise.all([a.e(71869),a.e(14334)]).then(a.bind(a,84886)),"@site/docs/Spark/gems/join-split/repartition.md",84886],"5c025541":[()=>a.e(85754).then(a.bind(a,40902)),"@site/docs/release_notes/2023/may2023.md",40902],"5c1a5735":[()=>a.e(7281).then(a.bind(a,71206)),"@site/docs/SQL/development/code-editor.md",71206],"5c44c6cb":[()=>Promise.all([a.e(71869),a.e(4907)]).then(a.bind(a,97588)),"@site/docs/Spark/gems/subgraph/subgraph.md",97588],"5cf6e758":[()=>a.e(45284).then(a.t.bind(a,88598,19)),"~docs/default/tag-tags-cloudera-92f.json",88598],"5d33ff88":[()=>a.e(52482).then(a.t.bind(a,50129,19)),"~docs/default/tag-tags-rest-863.json",50129],"5d49185a":[()=>a.e(76140).then(a.t.bind(a,60546,19)),"~docs/default/tag-tags-run-6d5.json",60546],"5d910575":[()=>a.e(53631).then(a.bind(a,52909)),"@site/docs/architecture/self-hosted/configurations/configure-object-store.md",52909],"5e2f2c78":[()=>a.e(17429).then(a.t.bind(a,39074,19)),"~docs/default/tag-tags-alerts-4b5.json",39074],"5f3f4636":[()=>a.e(44196).then(a.t.bind(a,68458,19)),"~docs/default/tag-tags-pullrequest-8a7.json",68458],"5fd7f2fd":[()=>a.e(71227).then(a.bind(a,69584)),"@site/docs/metadata/git/git-resolve.md",69584],"5ff98a79":[()=>a.e(80056).then(a.t.bind(a,55100,19)),"~docs/default/tag-tags-set-fb6.json",55100],"60043c2c":[()=>a.e(56598).then(a.bind(a,5055)),"@site/docs/architecture/self-hosted/configurations/configure-audit-logs.md",5055],"60e97ff4":[()=>a.e(18994).then(a.t.bind(a,93320,19)),"~docs/default/tag-tags-location-100.json",93320],"612c4519":[()=>Promise.all([a.e(71869),a.e(46198)]).then(a.bind(a,70922)),"@site/docs/Spark/gems/source-target/warehouse/jdbc.md",70922],"613ec2cf":[()=>a.e(87148).then(a.t.bind(a,70567,19)),"~docs/default/tag-tags-ad-hoc-27d.json",70567],"61827f00":[()=>Promise.all([a.e(71869),a.e(41957)]).then(a.bind(a,88405)),"@site/docs/SQL/sql.md",88405],"61a8b9e9":[()=>Promise.all([a.e(71869),a.e(30878)]).then(a.bind(a,82934)),"@site/docs/Spark/gems/machine-learning/ml-openai.md",82934],"629a71af":[()=>Promise.all([a.e(71869),a.e(8268)]).then(a.bind(a,19661)),"@site/docs/Spark/gems/gems.md",19661],"62a155e1":[()=>a.e(99193).then(a.t.bind(a,8236,19)),"~docs/default/tag-tags-iceberg-4da.json",8236],"62d56cf7":[()=>a.e(95442).then(a.t.bind(a,41835,19)),"~docs/default/tag-tags-azuread-269.json",41835],"6307e468":[()=>a.e(70296).then(a.t.bind(a,99177,19)),"~docs/default/tag-tags-keytab-416.json",99177],"63384ed2":[()=>Promise.all([a.e(71869),a.e(2040)]).then(a.bind(a,71085)),"@site/docs/getting-started/getting-started.md",71085],"6343b2c9":[()=>a.e(24157).then(a.t.bind(a,6823,19)),"~docs/default/tag-tags-config-8f1.json",6823],"638423a9":[()=>a.e(31177).then(a.t.bind(a,69160,19)),"~docs/default/tag-tags-open-source-spark-5af.json",69160],"63a2de3d":[()=>a.e(11287).then(a.t.bind(a,99160,19)),"~docs/default/tag-tags-tutorial-0ee.json",99160],"63e4eba4":[()=>a.e(15795).then(a.t.bind(a,48433,19)),"~docs/default/tag-tags-okta-bea.json",48433],"63ed3c78":[()=>a.e(27262).then(a.t.bind(a,71044,19)),"~docs/default/tag-tags-license-eea.json",71044],"64dea1ec":[()=>a.e(33518).then(a.t.bind(a,85167,19)),"~docs/default/tag-tags-runs-f0a.json",85167],"6622be1d":[()=>a.e(51879).then(a.t.bind(a,90848,19)),"~docs/default/tag-tags-file-based-cb5.json",90848],"66a7caca":[()=>a.e(3027).then(a.t.bind(a,64929,19)),"~docs/default/tag-tags-clean-fb2.json",64929],"67f04d0c":[()=>a.e(5927).then(a.t.bind(a,77460,19)),"~docs/default/tag-tags-snowflake-671.json",77460],"688fd072":[()=>a.e(70536).then(a.t.bind(a,73048,19)),"~docs/default/tag-tags-jdbc-52d.json",73048],"6952b0df":[()=>a.e(24037).then(a.t.bind(a,19007,19)),"~docs/default/tag-tags-cosmos-d01.json",19007],"69b5c624":[()=>a.e(66970).then(a.bind(a,43453)),"@site/docs/getting-started/getting-help.md",43453],"69dc1991":[()=>a.e(29923).then(a.bind(a,84267)),"@site/docs/release_notes/2024/june2024.md",84267],"6b2fcb41":[()=>Promise.all([a.e(71869),a.e(34349)]).then(a.bind(a,61527)),"@site/docs/Spark/best-practices/best-practices.md",61527],"6c0cffb2":[()=>a.e(7545).then(a.bind(a,22075)),"@site/docs/Spark/fabrics/synapsefabric.md",22075],"6c29375e":[()=>a.e(74657).then(a.t.bind(a,23961,19)),"~docs/default/tag-tags-compare-112.json",23961],"6ca7c457":[()=>a.e(38075).then(a.t.bind(a,47403,19)),"~docs/default/tag-tags-object-store-cfa.json",47403],"6cbfe791":[()=>a.e(25432).then(a.t.bind(a,56436,19)),"~docs/default/tag-tags-plibs-f7a.json",56436],"6d186070":[()=>a.e(34917).then(a.bind(a,76402)),"@site/docs/Spark/gems/source-target/warehouse/redshift.md",76402],"6d5632b2":[()=>a.e(34147).then(a.bind(a,79982)),"@site/docs/concepts/teamuser.md",79982],"6d71f2f7":[()=>a.e(27620).then(a.t.bind(a,4061,19)),"/home/runner/work/prophecy-docs/prophecy-docs/.docusaurus/docusaurus-plugin-content-pages/default/plugin-route-context-module-100.json",4061],"6d9167cd":[()=>a.e(59619).then(a.t.bind(a,89426,19)),"~docs/default/tag-tags-gem-f69.json",89426],"6df2f38a":[()=>a.e(73229).then(a.t.bind(a,34867,19)),"~docs/default/tag-tags-extract-974.json",34867],"6e9ec4f2":[()=>a.e(7087).then(a.bind(a,92014)),"@site/docs/Spark/gems/transform/bulk-column-rename.md",92014],"6e9fb509":[()=>a.e(6807).then(a.bind(a,88420)),"@site/docs/release_notes/2024/October_2024/webinar_new_features/observability.md",88420],"6f4ebe18":[()=>a.e(86074).then(a.t.bind(a,68416,19)),"~docs/default/tag-tags-count-2b3.json",68416],"6ff1ca16":[()=>a.e(4053).then(a.bind(a,661)),"@site/docs/deployment/use-external-release-tags.md",661],"705f02da":[()=>a.e(48828).then(a.bind(a,16795)),"@site/docs/Orchestration/airflow/setup/prophecy-managed/connections/snowflake-connection.md",16795],"717b9cf2":[()=>a.e(21204).then(a.t.bind(a,81764,19)),"~docs/default/tag-tags-dataproc-758.json",81764],"71966df4":[()=>Promise.all([a.e(71869),a.e(86458)]).then(a.bind(a,94265)),"@site/docs/tutorials/Spark/spark.md",94265],"71d46d69":[()=>a.e(65418).then(a.t.bind(a,41638,19)),"~docs/default/tag-tags-release-bf3.json",41638],"721fc42c":[()=>a.e(61297).then(a.bind(a,43780)),"@site/docs/Orchestration/airflow/setup/prophecy-managed/connections/databricks-sql-connection.md",43780],"7255074c":[()=>a.e(30430).then(a.t.bind(a,12033,19)),"~docs/default/tag-tags-row-distributor-7ea.json",12033],"72f219eb":[()=>a.e(41199).then(a.t.bind(a,76594,19)),"~docs/default/tag-tags-dbt-aba.json",76594],"7365a4b8":[()=>a.e(4919).then(a.t.bind(a,77173,19)),"~docs/default/tag-tags-composer-72d.json",77173],"7479f134":[()=>a.e(7948).then(a.t.bind(a,70006,19)),"~docs/default/tag-tags-connections-f8a.json",70006],"750e5736":[()=>a.e(14985).then(a.bind(a,81943)),"@site/docs/Spark/execution/execution-metrics.md",81943],75721059:[()=>a.e(56365).then(a.t.bind(a,84591,19)),"~docs/default/tag-tags-group-653.json",84591],"759d6316":[()=>a.e(42020).then(a.t.bind(a,3040,19)),"~docs/default/tag-tags-select-196.json",3040],"765d0783":[()=>a.e(76631).then(a.t.bind(a,69366,19)),"~docs/default/tag-tags-variant-5f8.json",69366],"76cfe10a":[()=>a.e(49904).then(a.t.bind(a,1933,19)),"~docs/default/tag-tags-sql-e00.json",1933],77051279:[()=>Promise.all([a.e(71869),a.e(75713)]).then(a.bind(a,41289)),"@site/docs/Spark/gems/source-target/warehouse/bigquery.md",41289],"77a3d71c":[()=>a.e(88842).then(a.t.bind(a,32579,19)),"~docs/default/tag-tags-recommendations-775.json",32579],"77c4a354":[()=>a.e(35415).then(a.bind(a,12188)),"@site/docs/SQL/gems/transform/flattenschema.md",12188],"77d88847":[()=>a.e(58630).then(a.bind(a,8323)),"@site/docs/architecture/self-hosted/authentication/saml-okta.md",8323],"790b9f8a":[()=>a.e(4437).then(a.t.bind(a,74188,19)),"~docs/default/tag-tags-matrix-964.json",74188],"79b4b0cc":[()=>a.e(50861).then(a.t.bind(a,17454,19)),"~docs/default/tag-tags-delta-f04.json",17454],"79c36df7":[()=>a.e(12602).then(a.bind(a,79609)),"@site/docs/SQL/development/visual-editor/visual-expression-builder/visual-expression-builder-reference.md",79609],"7abca4d4":[()=>Promise.all([a.e(71869),a.e(13333)]).then(a.bind(a,73852)),"@site/docs/SQL/development/target-models/target-models.md",73852],"7af966b9":[()=>a.e(35003).then(a.t.bind(a,83223,19)),"~docs/default/tag-tags-setup-3f5.json",83223],"7bd997b7":[()=>a.e(75388).then(a.t.bind(a,96987,19)),"~docs/default/tag-tags-user-90e.json",96987],"7c2e4869":[()=>a.e(41861).then(a.t.bind(a,64839,19)),"~docs/default/tag-tags-unit-tests-803.json",64839],"7c6b4b39":[()=>a.e(36171).then(a.t.bind(a,93594,19)),"~docs/default/tag-tags-dynamic-498.json",93594],"7cc76b06":[()=>Promise.all([a.e(71869),a.e(53881)]).then(a.bind(a,2307)),"@site/docs/Spark/fabrics/fabrics.md",2307],"7debfd10":[()=>a.e(54022).then(a.t.bind(a,35180,19)),"~docs/default/tag-tags-best-practices-9f0.json",35180],"7edf7663":[()=>a.e(52275).then(a.t.bind(a,57757,19)),"/home/runner/work/prophecy-docs/prophecy-docs/.docusaurus/docusaurus-theme-search-algolia/default/plugin-route-context-module-100.json",57757],"7f4dccc0":[()=>a.e(10519).then(a.t.bind(a,99960,19)),"~docs/default/tag-tags-inner-92e.json",99960],"7fa73fce":[()=>a.e(86635).then(a.t.bind(a,77250,19)),"~docs/default/tag-tags-columns-4eb.json",77250],"801bcdab":[()=>a.e(64310).then(a.t.bind(a,14062,19)),"~docs/default/tag-tags-diagnostics-a09.json",14062],"809b845a":[()=>a.e(3232).then(a.bind(a,46579)),"@site/docs/Spark/gems/transform/unpivot.md",46579],"81a085d6":[()=>Promise.all([a.e(71869),a.e(78726)]).then(a.bind(a,39688)),"@site/docs/Spark/gems/source-target/file/delta.md",39688],"81a61c00":[()=>a.e(69831).then(a.t.bind(a,56747,19)),"~docs/default/tag-tags-longformat-afa.json",56747],"81caeba3":[()=>a.e(51115).then(a.t.bind(a,84155,19)),"~docs/default/tag-tags-scala-16f.json",84155],"81ea9dae":[()=>a.e(77).then(a.t.bind(a,79257,19)),"~docs/default/tag-tags-intersect-ef1.json",79257],"825c22f4":[()=>a.e(90783).then(a.t.bind(a,33582,19)),"~docs/default/tag-tags-conditional-1fc.json",33582],"8262ae1a":[()=>a.e(60937).then(a.t.bind(a,28695,19)),"~docs/default/tag-tags-distinct-c59.json",28695],"827dd016":[()=>a.e(42781).then(a.t.bind(a,65980,19)),"~docs/default/tag-tags-with-column-078.json",65980],"82cb6480":[()=>a.e(88528).then(a.t.bind(a,11028,19)),"~docs/default/tag-tags-may-d67.json",11028],"8344c75d":[()=>a.e(821).then(a.t.bind(a,81424,19)),"~docs/default/tag-tags-compare-columns-71a.json",81424],"83582f1b":[()=>Promise.all([a.e(71869),a.e(24574)]).then(a.bind(a,54213)),"@site/docs/architecture/architecture.md",54213],"836ce766":[()=>a.e(87953).then(a.bind(a,48213)),"@site/docs/Spark/execution/databricks-clusters-behaviors.md",48213],"8394531b":[()=>a.e(83126).then(a.t.bind(a,39918,19)),"~docs/default/tag-tags-prophecy-managed-d88.json",39918],"83d48d6d":[()=>a.e(45830).then(a.t.bind(a,56566,19)),"~docs/default/tag-tags-disaster-recovery-083.json",56566],"83fad8d1":[()=>a.e(31350).then(a.t.bind(a,74478,19)),"~docs/default/tag-tags-databricks-secrets-bea.json",74478],"843d9c7b":[()=>a.e(32725).then(a.t.bind(a,55927,19)),"~docs/default/tag-tags-subgraph-fb3.json",55927],"875fb614":[()=>a.e(68646).then(a.bind(a,22906)),"@site/docs/Spark/execution/interactive-execution.md",22906],"87de2ab1":[()=>a.e(78525).then(a.t.bind(a,61966,19)),"/home/runner/work/prophecy-docs/prophecy-docs/.docusaurus/docusaurus-plugin-content-docs/default/plugin-route-context-module-100.json",61966],"881f9824":[()=>Promise.all([a.e(71869),a.e(51275)]).then(a.bind(a,34876)),"@site/docs/Spark/gems/source-target/catalog-table/delta.md",34876],"883da182":[()=>a.e(76330).then(a.t.bind(a,55980,19)),"~docs/default/tag-tags-aggregate-a24.json",55980],"88663bc9":[()=>a.e(31840).then(a.bind(a,39768)),"@site/docs/SQL/extensibility/gem-builder/gem-builder.md",39768],"888b809b":[()=>a.e(52152).then(a.bind(a,39067)),"@site/docs/Spark/extensibility/gem-builder/optimization-functions.md",39067],"88bb1ea3":[()=>a.e(2125).then(a.t.bind(a,7742,19)),"~docs/default/tag-tags-unique-0f9.json",7742],"88bbec44":[()=>Promise.all([a.e(71869),a.e(8515)]).then(a.bind(a,17140)),"@site/docs/SQL/development/visual-editor/visual-editor.md",17140],"8907967c":[()=>a.e(48502).then(a.t.bind(a,47272,19)),"~docs/default/tag-tags-copilot-4b0.json",47272],"89c4abfb":[()=>a.e(30382).then(a.bind(a,26240)),"@site/docs/release_notes/2024/October_2024/webinar_new_features/ai_capabilities.md",26240],"89ea6264":[()=>Promise.all([a.e(71869),a.e(61306)]).then(a.bind(a,18479)),"@site/docs/Spark/extensibility/user-defined-functions.md",18479],"89ed3843":[()=>a.e(18663).then(a.t.bind(a,30933,19)),"~docs/default/tag-tags-unity-catalog-d7a.json",30933],"89f23bf8":[()=>a.e(34159).then(a.t.bind(a,96851,19)),"~docs/default/tag-tags-gcp-19f.json",96851],"8a834cbf":[()=>a.e(83938).then(a.bind(a,9463)),"@site/docs/package-hub/package-builder/ShareableUDFs.md",9463],"8a8552d5":[()=>a.e(48439).then(a.t.bind(a,39818,19)),"~docs/default/tag-tags-explorer-d2a.json",39818],"8afe1424":[()=>Promise.all([a.e(71869),a.e(35392)]).then(a.bind(a,89741)),"@site/docs/Spark/gems/source-target/warehouse/mongodb.md",89741],"8b5e518d":[()=>a.e(9233).then(a.t.bind(a,65654,19)),"~docs/default/tag-tags-admin-c1e.json",65654],"8bc5072f":[()=>Promise.all([a.e(71869),a.e(15406)]).then(a.bind(a,45516)),"@site/docs/Orchestration/airflow/setup/prophecy-managed/prophecy-managed.md",45516],"8c3d3b0e":[()=>a.e(99063).then(a.t.bind(a,98122,19)),"~docs/default/tag-tags-mwaa-d53.json",98122],"8ddf4ff6":[()=>Promise.all([a.e(71869),a.e(95171)]).then(a.bind(a,45202)),"@site/docs/Spark/gems/transform/schema-transform.md",45202],"8de4885f":[()=>a.e(88945).then(a.t.bind(a,79813,19)),"~docs/default/tag-tags-november-571.json",79813],"8e989a18":[()=>a.e(61296).then(a.t.bind(a,66194,19)),"~docs/default/tag-tags-alerting-9da.json",66194],"8eb8756a":[()=>a.e(46263).then(a.t.bind(a,8551,19)),"~docs/default/tag-tags-continuous-deployment-76a.json",8551],"8f82902e":[()=>a.e(11787).then(a.t.bind(a,11974,19)),"~docs/default/tag-tags-join-split-148.json",11974],"8fbe11b2":[()=>a.e(37772).then(a.t.bind(a,66320,19)),"~docs/default/tag-tags-generative-ai-cbf.json",66320],"8fd0dee8":[()=>a.e(31323).then(a.bind(a,65003)),"@site/docs/Spark/spark-streaming/streaming-sources-and-targets/streaming-event-gem.md",65003],"8fee5992":[()=>Promise.all([a.e(71869),a.e(39444)]).then(a.bind(a,53065)),"@site/docs/Spark/extensibility/gem-builder/gem-builder.md",53065],"914d2bef":[()=>a.e(10872).then(a.bind(a,79107)),"@site/docs/Spark/fabrics/livy.md",79107],"91c6c3f3":[()=>a.e(85133).then(a.t.bind(a,99651,19)),"~docs/default/tag-tags-dependency-983.json",99651],"923d0931":[()=>a.e(1474).then(a.t.bind(a,56819,19)),"~docs/default/tag-tags-azure-blob-storage-a69.json",56819],"923e7acb":[()=>a.e(21985).then(a.t.bind(a,6798,19)),"~docs/default/tag-tags-april-541.json",6798],"928dc651":[()=>a.e(34635).then(a.t.bind(a,28294,19)),"~docs/default/tag-tags-teams-987.json",28294],"929e57e2":[()=>a.e(70409).then(a.t.bind(a,77523,19)),"~docs/default/tag-tags-schema-2c7.json",77523],"92b0f648":[()=>a.e(60032).then(a.bind(a,22805)),"@site/docs/SQL/gems/transform/deduplicate.md",22805],"92f1e91b":[()=>a.e(74246).then(a.t.bind(a,33185,19)),"~docs/default/tag-tags-scim-883.json",33185],"93108c41":[()=>a.e(25922).then(a.t.bind(a,90126,19)),"~docs/default/tag-tags-scd-2-69b.json",90126],"9345c039":[()=>Promise.all([a.e(71869),a.e(73263)]).then(a.bind(a,654)),"@site/docs/Spark/gems/source-target/file/file.md",654],"935f2afb":[()=>a.e(98581).then(a.t.bind(a,35610,19)),"~docs/default/version-current-metadata-prop-751.json",35610],"93769bd5":[()=>a.e(43368).then(a.t.bind(a,91697,19)),"~docs/default/tag-tags-udfs-553.json",91697],"9396cbd1":[()=>a.e(32728).then(a.t.bind(a,22494,19)),"~docs/default/tag-tags-window-cc8.json",22494],"93f84d12":[()=>a.e(62756).then(a.t.bind(a,33101,19)),"~docs/default/tag-tags-gems-49d.json",33101],"941e073f":[()=>a.e(45558).then(a.bind(a,41559)),"@site/docs/SQL/development/target-models/location.md",41559],95677701:[()=>a.e(17539).then(a.t.bind(a,59294,19)),"~docs/default/tag-tags-group-by-a64.json",59294],"96d646da":[()=>a.e(5447).then(a.t.bind(a,27238,19)),"~docs/default/tag-tags-generator-70d.json",27238],"977ecb80":[()=>a.e(24528).then(a.bind(a,95619)),"@site/docs/SQL/development/target-models/sql-query.md",95619],"98175f9f":[()=>a.e(96712).then(a.bind(a,69132)),"@site/docs/metadata/git/git-merge.md",69132],"985d43d9":[()=>a.e(63133).then(a.t.bind(a,46007,19)),"~docs/default/tag-tags-tables-709.json",46007],"98aecf8b":[()=>Promise.all([a.e(71869),a.e(5579)]).then(a.bind(a,59048)),"@site/docs/Spark/gems/source-target/warehouse/salesforce.md",59048],"98f9234f":[()=>a.e(60540).then(a.bind(a,15080)),"@site/docs/tutorials/videos/design-pipeline.md",15080],"9995b2be":[()=>a.e(26552).then(a.t.bind(a,93698,19)),"~docs/default/tag-tags-table-e12.json",93698],"9a47c610":[()=>Promise.all([a.e(71869),a.e(70778)]).then(a.bind(a,11002)),"@site/docs/Spark/gems/join-split/compare-columns.md",11002],"9b15144a":[()=>a.e(76670).then(a.t.bind(a,16382,19)),"~docs/default/tag-tags-where-4b7.json",16382],"9b6bfc41":[()=>a.e(21602).then(a.t.bind(a,40376,19)),"~docs/default/tag-tags-custom-35a.json",40376],"9c0297ce":[()=>Promise.all([a.e(71869),a.e(24161)]).then(a.bind(a,97311)),"@site/docs/Spark/gems/source-target/warehouse/snowflake.md",97311],"9c4c896c":[()=>Promise.all([a.e(71869),a.e(23852)]).then(a.bind(a,69857)),"@site/docs/Spark/gems/source-target/file/kafka-stream.md",69857],"9c5f1e27":[()=>a.e(98429).then(a.t.bind(a,99996,19)),"~docs/default/tag-tags-flatten-d66.json",99996],"9c9d26e6":[()=>Promise.all([a.e(71869),a.e(76065)]).then(a.bind(a,22084)),"@site/docs/Spark/gems/source-target/warehouse/warehouse.md",22084],"9dca7f6c":[()=>a.e(84566).then(a.t.bind(a,1447,19)),"~docs/default/tag-tags-infer-c08.json",1447],"9ddcd7ec":[()=>a.e(60880).then(a.t.bind(a,93037,19)),"~docs/default/tag-tags-random-11d.json",93037],"9e13a3d5":[()=>a.e(28600).then(a.bind(a,79214)),"@site/docs/metadata/lineage/lineage-run-and-diagnose.md",79214],"9e810a23":[()=>a.e(10096).then(a.bind(a,17380)),"@site/src/pages/mdapi/enums.md",17380],"9f792844":[()=>a.e(90564).then(a.t.bind(a,82890,19)),"~docs/default/tag-tags-deploy-eb0.json",82890],"9f8f5e6c":[()=>Promise.all([a.e(71869),a.e(59383)]).then(a.bind(a,73121)),"@site/docs/tutorials/tutorials.md",73121],"9fc1aaa2":[()=>a.e(39449).then(a.bind(a,98029)),"@site/docs/release_notes/2023/oct2023.md",98029],a0454ec9:[()=>a.e(94740).then(a.t.bind(a,37461,19)),"~docs/default/tag-tags-left-join-aa7.json",37461],a04e0048:[()=>a.e(32409).then(a.bind(a,81770)),"@site/docs/tutorials/videos/test-pipeline.md",81770],a0c0a6fe:[()=>a.e(48203).then(a.bind(a,78598)),"@site/docs/deployment/prophecy-build-tool/pbt-jenkins.md",78598],a1339959:[()=>a.e(58527).then(a.t.bind(a,3801,19)),"~docs/default/tag-tags-instructions-a1b.json",3801],a135f75b:[()=>a.e(83781).then(a.bind(a,8642)),"@site/docs/SQL/gems/transform/aggregate.md",8642],a185f24b:[()=>a.e(32214).then(a.t.bind(a,4212,19)),"~docs/default/tag-tags-diff-881.json",4212],a1a084f0:[()=>a.e(21092).then(a.bind(a,26010)),"@site/docs/concepts/dataset.md",26010],a1ca88e4:[()=>a.e(72250).then(a.t.bind(a,78733,19)),"~docs/default/tag-tags-order-by-ac9.json",78733],a1ee719f:[()=>a.e(43534).then(a.t.bind(a,78939,19)),"~docs/default/tag-tags-passwords-e08.json",78939],a2e33e40:[()=>a.e(21002).then(a.bind(a,69829)),"@site/docs/package-hub/package-hub.md",69829],a375c2f7:[()=>a.e(98545).then(a.t.bind(a,29263,19)),"~docs/default/tag-tags-release-notes-116.json",29263],a48fe1d9:[()=>a.e(3465).then(a.t.bind(a,12051,19)),"~docs/default/tag-tags-job-e29.json",12051],a4c7938e:[()=>a.e(13255).then(a.t.bind(a,16448,19)),"~docs/default/tag-tags-cdp-221.json",16448],a58e7f63:[()=>a.e(3922).then(a.bind(a,36495)),"@site/docs/SQL/development/target-models/schema.md",36495],a5d53aff:[()=>a.e(98692).then(a.bind(a,69581)),"@site/docs/SQL/gems/custom/custom.md",69581],a627b8ac:[()=>a.e(30981).then(a.bind(a,43153)),"@site/docs/SQL/development/visual-editor/visual-expression-builder/use-the-visual-expression-builder.md",43153],a62a3640:[()=>a.e(27558).then(a.t.bind(a,30419,19)),"~docs/default/tag-tags-sum-428.json",30419],a6425293:[()=>a.e(54405).then(a.t.bind(a,65178,19)),"~docs/default/tag-tags-udafs-0da.json",65178],a696530d:[()=>Promise.all([a.e(71869),a.e(82531)]).then(a.bind(a,13898)),"@site/docs/SQL/development/development.md",13898],a71ccd0b:[()=>a.e(17480).then(a.t.bind(a,83527,19)),"~docs/default/tag-tags-csv-1df.json",83527],a722ba21:[()=>a.e(27367).then(a.t.bind(a,17289,19)),"~docs/default/tag-tags-october-6dc.json",17289],a788a32d:[()=>a.e(46597).then(a.t.bind(a,91566,19)),"~docs/default/tag-tags-query-721.json",91566],a96edeab:[()=>a.e(82406).then(a.t.bind(a,6546,19)),"~docs/default/tag-tags-mock-8a7.json",6546],aa249818:[()=>a.e(20547).then(a.bind(a,52391)),"@site/docs/release_notes/2023/dec2023.md",52391],aa61fc2a:[()=>a.e(67072).then(a.t.bind(a,75801,19)),"~docs/default/tag-tags-metadata-11f.json",75801],aadc993b:[()=>a.e(59791).then(a.t.bind(a,10837,19)),"~docs/default/tag-tags-question-44c.json",10837],ab1d3064:[()=>a.e(99018).then(a.t.bind(a,10587,19)),"~docs/default/tag-tags-parquet-f41.json",10587],ab27f75b:[()=>a.e(43119).then(a.t.bind(a,80228,19)),"~docs/default/tag-tags-trigger-701.json",80228],ab291dc2:[()=>a.e(11669).then(a.t.bind(a,42333,19)),"~docs/default/tag-tags-maven-d79.json",42333],ab56ab22:[()=>a.e(29486).then(a.t.bind(a,55611,19)),"~docs/default/tag-tags-union-a72.json",55611],aba6f2fb:[()=>a.e(22559).then(a.bind(a,37807)),"@site/docs/Spark/fabrics/databricks.md",37807],ac2cd0e2:[()=>a.e(94043).then(a.t.bind(a,77738,19)),"~docs/default/tag-tags-reserve-pods-4ad.json",77738],ac3c8b70:[()=>a.e(61890).then(a.t.bind(a,85637,19)),"~docs/default/tag-tags-package-ee3.json",85637],acd5882d:[()=>a.e(9852).then(a.bind(a,3394)),"@site/docs/package-hub/package-builder/ShareableSubgraphs.md",3394],ad27b778:[()=>a.e(73753).then(a.t.bind(a,47665,19)),"~docs/default/tag-tags-ad-2df.json",47665],ad791090:[()=>a.e(95044).then(a.t.bind(a,86590,19)),"~docs/default/tag-tags-nfs-821.json",86590],ada810e1:[()=>a.e(47958).then(a.t.bind(a,38146,19)),"~docs/default/tag-tags-dbfs-38b.json",38146],adb9059a:[()=>a.e(46036).then(a.t.bind(a,11623,19)),"~docs/default/tag-tags-join-04e.json",11623],aea3dd16:[()=>a.e(11220).then(a.t.bind(a,30624,19)),"~docs/default/tag-tags-self-hosted-b33.json",30624],af573f30:[()=>a.e(93844).then(a.t.bind(a,9495,19)),"~docs/default/tag-tags-fixed-format-01d.json",9495],aff5c2a0:[()=>a.e(93929).then(a.t.bind(a,903,19)),"~docs/default/tag-tags-august-2ac.json",903],b06e9c18:[()=>a.e(23309).then(a.bind(a,645)),"@site/docs/Spark/secret-management/env-variable.md",645],b177fa8f:[()=>a.e(83603).then(a.bind(a,75788)),"@site/docs/release_notes/2023/aug2023.md",75788],b18272d1:[()=>a.e(63424).then(a.bind(a,13040)),"@site/docs/release_notes/2024/October_2024/webinar_new_features/development.md",13040],b196ad21:[()=>a.e(5469).then(a.bind(a,21070)),"@site/docs/getting-started/getting-started-with-low-code-airflow.md",21070],b1a1dd54:[()=>a.e(41305).then(a.bind(a,87955)),"@site/docs/Orchestration/airflow/setup/prophecy-managed/connections/databricks-spark-connection.md",87955],b24558ac:[()=>a.e(68585).then(a.t.bind(a,19271,19)),"~docs/default/tag-tags-iterator-4c2.json",19271],b261635c:[()=>a.e(16641).then(a.bind(a,43805)),"@site/docs/release_notes/2023/sept2023.md",43805],b2ab5fcc:[()=>a.e(24061).then(a.bind(a,73152)),"@site/docs/SQL/fabrics/databricks.md",73152],b343e24f:[()=>a.e(28794).then(a.t.bind(a,30871,19)),"~docs/default/tag-tags-transformations-272.json",30871],b345d6c8:[()=>Promise.all([a.e(71869),a.e(25900)]).then(a.bind(a,91305)),"@site/docs/concepts/project/project.md",91305],b378c42d:[()=>a.e(68356).then(a.bind(a,28256)),"@site/docs/SQL/data-tests/data-tests.md",28256],b387da95:[()=>a.e(83794).then(a.bind(a,4458)),"@site/docs/concepts/copilot/enable-data-copilot.md",4458],b3b53493:[()=>a.e(60888).then(a.t.bind(a,33290,19)),"~docs/default/tag-tags-fork-1be.json",33290],b3fba1c0:[()=>a.e(98913).then(a.t.bind(a,66668,19)),"~docs/default/tag-tags-wideformat-fa1.json",66668],b4561505:[()=>a.e(64479).then(a.bind(a,7333)),"@site/docs/architecture/self-hosted/download-logs.md",7333],b4784e86:[()=>a.e(95187).then(a.bind(a,38069)),"@site/docs/SQL/gems/transform/transform.md",38069],b4a98d99:[()=>Promise.all([a.e(71869),a.e(47385)]).then(a.bind(a,42541)),"@site/docs/concepts/copilot/copilot.md",42541],b5931da8:[()=>a.e(77068).then(a.bind(a,95423)),"@site/docs/release_notes/2024/april2024.md",95423],b61b67cb:[()=>a.e(61640).then(a.t.bind(a,96568,19)),"~docs/default/tag-tags-livy-80b.json",96568],b671a259:[()=>a.e(1248).then(a.t.bind(a,47108,19)),"~docs/default/tag-tags-july-2ac.json",47108],b6b7143c:[()=>a.e(60343).then(a.t.bind(a,70241,19)),"~docs/default/tag-tags-oracle-93e.json",70241],b8bedb49:[()=>a.e(209).then(a.bind(a,36896)),"@site/docs/release_notes/2024/August_2024/August_2024.md",36896],b8c254c2:[()=>a.e(89706).then(a.bind(a,67204)),"@site/docs/Spark/spark-streaming/streaming-transformations.md",67204],b8e30058:[()=>Promise.all([a.e(71869),a.e(21038)]).then(a.bind(a,96745)),"@site/docs/concepts/concepts.md",96745],b90ce171:[()=>a.e(69229).then(a.t.bind(a,61559,19)),"~docs/default/tag-tags-model-2e5.json",61559],ba5937ec:[()=>a.e(57188).then(a.t.bind(a,19126,19)),"~docs/default/tag-tags-outer-0c1.json",19126],ba9afd68:[()=>a.e(59674).then(a.t.bind(a,12058,19)),"~docs/default/tag-tags-descending-baa.json",12058],baac4c4f:[()=>a.e(47809).then(a.t.bind(a,38267,19)),"~docs/default/tag-tags-hive-db9.json",38267],bb246428:[()=>Promise.all([a.e(71869),a.e(7577)]).then(a.bind(a,28110)),"@site/docs/architecture/self-hosted/configurations/configurations.md",28110],bb26ea31:[()=>Promise.all([a.e(71869),a.e(92349)]).then(a.bind(a,72674)),"@site/docs/SQL/extensibility/extensibility.md",72674],bc5b2c90:[()=>a.e(47113).then(a.t.bind(a,67494,19)),"~docs/default/tag-tags-env-var-b2a.json",67494],bd679c89:[()=>a.e(43545).then(a.t.bind(a,1346,19)),"~docs/default/tag-tags-databricks-439.json",1346],bddd21fc:[()=>a.e(27003).then(a.t.bind(a,57118,19)),"~docs/default/tag-tags-vault-bf0.json",57118],be80cd00:[()=>a.e(34133).then(a.bind(a,39949)),"@site/docs/metadata/project-metadata.md",39949],bf20dd1a:[()=>a.e(8127).then(a.t.bind(a,2841,19)),"~docs/default/tag-tags-key-22d.json",2841],bf524578:[()=>a.e(44606).then(a.t.bind(a,88516,19)),"~docs/default/tag-tags-python-7cd.json",88516],bf6deedb:[()=>a.e(77921).then(a.t.bind(a,9028,19)),"~docs/default/tag-tags-reformat-fc1.json",9028],bfebd052:[()=>a.e(17667).then(a.t.bind(a,36357,19)),"~docs/default/tag-tags-s-3-d95.json",36357],bfec6206:[()=>a.e(94045).then(a.t.bind(a,36225,19)),"~docs/default/tag-tags-sources-7d0.json",36225],bff46f23:[()=>Promise.all([a.e(71869),a.e(72288)]).then(a.bind(a,79888)),"@site/docs/Orchestration/airflow/setup/setup.md",79888],c00dea16:[()=>a.e(95641).then(a.t.bind(a,87938,19)),"~docs/default/tag-tags-lineage-ac3.json",87938],c057d7b2:[()=>a.e(59285).then(a.bind(a,80234)),"@site/docs/release_notes/2024/jan2024.md",80234],c0e25ad0:[()=>a.e(80465).then(a.t.bind(a,52775,19)),"~docs/default/tag-tags-test-de6.json",52775],c1177b0e:[()=>a.e(45121).then(a.bind(a,68473)),"@site/docs/architecture/self-hosted/configurations/sandbox-configuration.md",68473],c12d5b23:[()=>a.e(51625).then(a.bind(a,31320)),"@site/docs/metadata/prophecyAPI.md",31320],c260c508:[()=>a.e(25346).then(a.bind(a,1811)),"@site/docs/concepts/copilot/copilot-ai-capabilities.md",1811],c277ac9e:[()=>a.e(36708).then(a.bind(a,48121)),"@site/docs/Spark/gems/source-target/warehouse/oracle.md",48121],c2849c5a:[()=>a.e(9546).then(a.bind(a,27836)),"@site/docs/release_notes/2024/nov2024.md",27836],c29b80fe:[()=>Promise.all([a.e(71869),a.e(32594)]).then(a.bind(a,64922)),"@site/docs/Spark/gems/join-split/join-split.md",64922],c327a517:[()=>Promise.all([a.e(71869),a.e(64738)]).then(a.bind(a,84958)),"@site/docs/Spark/gems/transform/reformat.md",84958],c377a04b:[()=>a.e(45742).then(a.bind(a,61866)),"@site/docs/index.md",61866],c43cae16:[()=>a.e(79351).then(a.bind(a,85767)),"@site/docs/deployment/prophecy-build-tool/pbt-github-actions.md",85767],c50e492b:[()=>Promise.all([a.e(71869),a.e(38655)]).then(a.bind(a,61756)),"@site/docs/concepts/project/models.md",61756],c7291002:[()=>a.e(66371).then(a.t.bind(a,2652,19)),"~docs/default/tag-tags-gcs-a99.json",2652],c7365c5e:[()=>a.e(42580).then(a.bind(a,95331)),"@site/docs/architecture/self-hosted/authentication/azure-ad.md",95331],c75f905f:[()=>a.e(80882).then(a.bind(a,96920)),"@site/docs/metadata/lineage/lineage-view-and-search.md",96920],c7855b86:[()=>a.e(15203).then(a.t.bind(a,84178,19)),"~docs/default/tag-tags-google-4d3.json",84178],c8675ec5:[()=>a.e(68753).then(a.bind(a,93138)),"@site/docs/Spark/expression-builder.md",93138],c8beefd1:[()=>a.e(5969).then(a.t.bind(a,55191,19)),"~docs/default/tag-tags-datasets-45a.json",55191],c8ec8ce8:[()=>Promise.all([a.e(71869),a.e(10722)]).then(a.bind(a,68851)),"@site/docs/Spark/gems/source-target/file/xlsx.md",68851],c9324529:[()=>a.e(41862).then(a.t.bind(a,39707,19)),"~docs/default/tag-tags-march-db0.json",39707],c9a8325d:[()=>a.e(39504).then(a.t.bind(a,53151,19)),"~docs/default/tag-tags-catalog-9e8.json",53151],ca23fa19:[()=>a.e(5655).then(a.t.bind(a,23411,19)),"~docs/default/tag-tags-azure-blob-491.json",23411],ca3d9138:[()=>a.e(68701).then(a.t.bind(a,13233,19)),"~docs/default/tag-tags-avro-f30.json",13233],ca7752c3:[()=>a.e(75518).then(a.bind(a,36910)),"@site/docs/Spark/gems/source-target/source-target.md",36910],cb0f5c9e:[()=>a.e(50436).then(a.t.bind(a,77205,19)),"~docs/default/tag-tags-generativeai-5fd.json",77205],cb1fc966:[()=>a.e(62125).then(a.bind(a,63302)),"@site/docs/getting-started/genaichatbot.md",63302],cb416cf5:[()=>Promise.all([a.e(71869),a.e(463)]).then(a.bind(a,25109)),"@site/docs/Orchestration/Orchestration.md",25109],cb78d7c2:[()=>a.e(92945).then(a.t.bind(a,60725,19)),"~docs/default/tag-tags-orc-7e3.json",60725],cb821a7c:[()=>a.e(91898).then(a.bind(a,19462)),"@site/docs/SQL/extensibility/dependencies.md",19462],cbb989e8:[()=>a.e(65957).then(a.t.bind(a,18694,19)),"~docs/default/tag-tags-testing-974.json",18694],cbc8e58f:[()=>Promise.all([a.e(71869),a.e(8441)]).then(a.bind(a,30547)),"@site/docs/Spark/gems/source-target/file/iceberg.md",30547],cc79057c:[()=>a.e(78334).then(a.bind(a,96325)),"@site/docs/release_notes/2024/july2024.md",96325],cc7b4211:[()=>a.e(17066).then(a.bind(a,75079)),"@site/docs/Spark/pipeline-monitoring/enable-pipeline-monitoring.md",75079],cc95c67f:[()=>a.e(39755).then(a.bind(a,74455)),"@site/src/pages/mdapi/types.md",74455],cc9fede1:[()=>a.e(6045).then(a.t.bind(a,72338,19)),"~docs/default/tag-tags-download-e65.json",72338],ccf2653d:[()=>a.e(61036).then(a.bind(a,20287)),"@site/docs/concepts/project/pipelines.md",20287],cd32080b:[()=>a.e(64403).then(a.bind(a,57095)),"@site/docs/metadata/git/git-commit.md",57095],ce924262:[()=>a.e(95376).then(a.t.bind(a,66746,19)),"~docs/default/tag-tags-package-hub-4a1.json",66746],ceb8a478:[()=>a.e(79359).then(a.bind(a,46807)),"@site/docs/release_notes/2023/mar2023.md",46807],cf89b950:[()=>a.e(32469).then(a.bind(a,65865)),"@site/docs/tutorials/videos/schedule-pipeline.md",65865],d02d84d2:[()=>a.e(85679).then(a.t.bind(a,21992,19)),"~docs/default/tag-tags-type-b7c.json",21992],d08e5d2e:[()=>a.e(4326).then(a.t.bind(a,90123,19)),"~docs/default/tag-tags-users-0fa.json",90123],d09e1221:[()=>a.e(48935).then(a.bind(a,35481)),"@site/docs/settings/settings.md",35481],d0e1c0c5:[()=>Promise.all([a.e(71869),a.e(54328)]).then(a.bind(a,32582)),"@site/docs/release_notes/2024/October_2024/webinar_new_features/webinar_new_features.md",32582],d0f2b4b8:[()=>a.e(46907).then(a.bind(a,56605)),"@site/docs/architecture/self-hosted/generate-api-key.md",56605],d12332fc:[()=>Promise.all([a.e(71869),a.e(47009)]).then(a.bind(a,40292)),"@site/docs/Spark/fabrics/dataproc/dataproc.md",40292],d12c5626:[()=>a.e(95854).then(a.t.bind(a,11768,19)),"~docs/default/tag-tags-xlsx-3b0.json",11768],d1b58582:[()=>a.e(54054).then(a.t.bind(a,71368,19)),"~docs/default/tag-tags-target-73a.json",71368],d1e881cf:[()=>Promise.all([a.e(71869),a.e(59143)]).then(a.bind(a,76140)),"@site/docs/Spark/gems/machine-learning/ml-pinecone-lookup.md",76140],d2577bdf:[()=>a.e(82504).then(a.bind(a,81401)),"@site/src/pages/mdapi/inputs.md",81401],d29a14d0:[()=>a.e(86124).then(a.bind(a,67727)),"@site/docs/Spark/fabrics/prophecy-managed.md",67727],d34f547c:[()=>a.e(69555).then(a.t.bind(a,89119,19)),"~docs/default/tag-tags-extensibility-eff.json",89119],d3624a2f:[()=>a.e(20753).then(a.bind(a,77747)),"@site/docs/Orchestration/airflow/setup/prophecy-managed/connections/email-connection.md",77747],d3aa5623:[()=>a.e(6638).then(a.t.bind(a,31481,19)),"~docs/default/tag-tags-active-directory-d98.json",31481],d4040588:[()=>a.e(51222).then(a.t.bind(a,93328,19)),"~docs/default/tag-tags-split-90c.json",93328],d40c75e7:[()=>a.e(88132).then(a.bind(a,55589)),"@site/docs/Orchestration/databricks-jobs.md",55589],d5875b60:[()=>a.e(32682).then(a.t.bind(a,90303,19)),"~docs/default/tag-tags-api-f4a.json",90303],d632c082:[()=>a.e(35449).then(a.bind(a,6902)),"@site/docs/Spark/gems/source-target/file/json.md",6902],d69a651a:[()=>a.e(45242).then(a.t.bind(a,78635,19)),"~docs/default/tag-tags-audit-events-3a8.json",78635],d6c3f943:[()=>a.e(10487).then(a.t.bind(a,44391,19)),"~docs/default/tag-tags-machine-learning-7b6.json",44391],d7087486:[()=>a.e(18755).then(a.bind(a,653)),"@site/docs/Spark/gems/transform/bulk-column-expressions.md",653],d851b6ea:[()=>a.e(52691).then(a.bind(a,34663)),"@site/docs/Orchestration/airflow/setup/composer.md",34663],d869e50c:[()=>a.e(80941).then(a.t.bind(a,47352,19)),"~docs/default/tag-tags-web-scraping-e36.json",47352],d8761ce4:[()=>Promise.all([a.e(71869),a.e(1799)]).then(a.bind(a,68129)),"@site/docs/architecture/self-hosted/installation-helm/install-on-aws.mdx",68129],d9210705:[()=>a.e(23607).then(a.bind(a,30162)),"@site/docs/release_notes/2023/apr2023.md",30162],d9a88ca1:[()=>a.e(74072).then(a.bind(a,15604)),"@site/docs/release_notes/2023/nov2023.md",15604],d9aa8ce3:[()=>a.e(45675).then(a.bind(a,51963)),"@site/docs/Orchestration/airflow/setup/mwaa.md",51963],da2c53c3:[()=>a.e(87517).then(a.t.bind(a,67841,19)),"~docs/default/tag-tags-diagnose-014.json",67841],dada2480:[()=>a.e(96041).then(a.t.bind(a,90742,19)),"~docs/default/tag-tags-backup-d8d.json",90742],daf24bae:[()=>Promise.all([a.e(71869),a.e(76450)]).then(a.bind(a,64944)),"@site/docs/Spark/gems/source-target/file/avro.md",64944],db3522d0:[()=>a.e(489).then(a.bind(a,79056)),"@site/docs/Spark/gems/transform/dynamic-select.md",79056],dba55630:[()=>a.e(99303).then(a.t.bind(a,98508,19)),"~docs/default/tag-tags-git-925.json",98508],dbe71acf:[()=>a.e(50816).then(a.t.bind(a,36703,19)),"~docs/default/tag-tags-secrets-6a9.json",36703],dc1c5b3c:[()=>a.e(19847).then(a.t.bind(a,24602,19)),"~docs/default/tag-tags-json-a8a.json",24602],dc266cc5:[()=>a.e(15437).then(a.t.bind(a,47357,19)),"~docs/default/tag-tags-configuration-fa6.json",47357],dca7a11b:[()=>Promise.all([a.e(71869),a.e(12179)]).then(a.bind(a,94587)),"@site/docs/Spark/gems/join-split/row-distributor.md",94587],dda96e46:[()=>Promise.all([a.e(71869),a.e(33852)]).then(a.bind(a,99093)),"@site/docs/Spark/gems/transform/aggregate.md",99093],ddce3f9a:[()=>a.e(14222).then(a.t.bind(a,77370,19)),"~docs/default/tag-tags-synthetic-2f9.json",77370],ddf10ea6:[()=>a.e(89636).then(a.bind(a,17007)),"@site/docs/concepts/copilot/copilot-data-privacy.md",17007],de14c699:[()=>a.e(71036).then(a.t.bind(a,30134,19)),"~docs/default/tag-tags-view-496.json",30134],de3cce1f:[()=>a.e(48401).then(a.bind(a,91716)),"@site/docs/SQL/fabrics/snowflake.md",91716],ded7b364:[()=>a.e(2794).then(a.t.bind(a,12993,19)),"~docs/default/tag-tags-text-processing-213.json",12993],df203c0f:[()=>a.e(24279).then(a.bind(a,45921)),"@theme/DocTagDocListPage",45921],dffd6c42:[()=>a.e(52164).then(a.t.bind(a,9144,19)),"~docs/default/tag-tags-llm-385.json",9144],e0230970:[()=>a.e(88217).then(a.bind(a,7843)),"@site/docs/architecture/self-hosted/upgrade-backup-restore.md",7843],e1c22bb0:[()=>a.e(26488).then(a.bind(a,46385)),"@site/docs/release_notes/2023/june2023.md",46385],e26d3537:[()=>a.e(83766).then(a.t.bind(a,3936,19)),"~docs/default/tag-tags-environment-438.json",3936],e29b2710:[()=>a.e(66869).then(a.bind(a,98996)),"@site/docs/architecture/self-hosted/authentication/active-directory.md",98996],e3120721:[()=>a.e(64622).then(a.t.bind(a,6439,19)),"~docs/default/tag-tags-filter-2d5.json",6439],e3260124:[()=>a.e(43084).then(a.bind(a,7309)),"@site/docs/getting-started/getting-started-sql-snowflake.md",7309],e33b1931:[()=>a.e(69202).then(a.bind(a,81238)),"@site/docs/package-hub/package-builder/ShareablePipelines.md",81238],e3722ece:[()=>a.e(79851).then(a.t.bind(a,81303,19)),"~docs/default/tag-tags-expression-95a.json",81303],e51b7957:[()=>Promise.all([a.e(71869),a.e(908)]).then(a.bind(a,68946)),"@site/docs/SQL/fabrics/fabrics.md",68946],e5297273:[()=>a.e(13395).then(a.bind(a,4241)),"@site/docs/Spark/gems/custom/delta-table-operations.md",4241],e593f7dc:[()=>a.e(98367).then(a.t.bind(a,35340,19)),"~docs/default/tag-tags-variable-72a.json",35340],e6f906f8:[()=>a.e(34293).then(a.t.bind(a,13533,19)),"~docs/default/tag-tags-spark-submit-3f6.json",13533],e726655f:[()=>Promise.all([a.e(71869),a.e(83513)]).then(a.bind(a,45175)),"@site/docs/deployment/prophecy-build-tool/prophecy-build-tool.md",45175],e75c2806:[()=>Promise.all([a.e(71869),a.e(82047)]).then(a.bind(a,50199)),"@site/docs/Spark/pipeline-monitoring/pipeline-monitoring.md",50199],e776dea9:[()=>a.e(3519).then(a.bind(a,57133)),"@site/docs/Spark/fabrics/diagnostics.md",57133],e7d08d17:[()=>a.e(28819).then(a.bind(a,32024)),"@site/docs/release_notes/2024/march2024.md",32024],e81d71dd:[()=>Promise.all([a.e(71869),a.e(21367)]).then(a.bind(a,50537)),"@site/docs/tutorials/Orchestration/orchestration.md",50537],e86802c8:[()=>a.e(99209).then(a.t.bind(a,69138,19)),"~docs/default/tag-tags-write-options-a0e.json",69138],e8868f5f:[()=>Promise.all([a.e(71869),a.e(99129)]).then(a.bind(a,11040)),"@site/docs/architecture/self-hosted/authentication/authentication.md",11040],e912c03f:[()=>a.e(30162).then(a.bind(a,6314)),"@site/docs/concepts/project/gems.md",6314],e9fb7e1a:[()=>a.e(38635).then(a.t.bind(a,25211,19)),"~docs/default/tag-tags-orchestration-952.json",25211],ea49f9a5:[()=>a.e(20857).then(a.bind(a,54960)),"@site/docs/SQL/development/visual-editor/variant-schema.md",54960],ead81094:[()=>a.e(5937).then(a.t.bind(a,92807,19)),"~docs/default/tag-tags-authentication-b1d.json",92807],eae2a1a9:[()=>a.e(81788).then(a.t.bind(a,52314,19)),"~docs/default/tag-tags-gem-builder-670.json",52314],ebc1eaa8:[()=>a.e(3871).then(a.bind(a,41735)),"@site/docs/release_notes/2024/sept2024.md",41735],ebfb664b:[()=>a.e(66240).then(a.t.bind(a,48695,19)),"~docs/default/tag-tags-support-logs-f75.json",48695],ec996830:[()=>a.e(65210).then(a.bind(a,64188)),"@site/docs/Spark/gems/subgraph/basicSubgraph.md",64188],ef3b5271:[()=>a.e(56580).then(a.t.bind(a,86942,19)),"~docs/default/tag-tags-unit-945.json",86942],f07076ec:[()=>Promise.all([a.e(71869),a.e(97083)]).then(a.bind(a,98447)),"@site/docs/Spark/configuration/configuration.md",98447],f07b5009:[()=>a.e(23636).then(a.bind(a,81321)),"@site/docs/SQL/data-tests/use-model-tests.md",81321],f11ee91b:[()=>a.e(97501).then(a.bind(a,27842)),"@site/docs/Spark/gems/subgraph/tableIterator.md",27842],f146c87c:[()=>a.e(88509).then(a.t.bind(a,17669,19)),"~docs/default/tag-tags-hdfs-8f0.json",17669],f1ea0ce9:[()=>Promise.all([a.e(71869),a.e(49727)]).then(a.bind(a,4383)),"@site/docs/Spark/secret-management/secret-management.md",4383],f1f59c9a:[()=>a.e(16389).then(a.t.bind(a,91619,19)),"~docs/default/tag-tags-spark-72e.json",91619],f22c019c:[()=>a.e(93635).then(a.t.bind(a,69629,19)),"~docs/default/tag-tags-vector-4d6.json",69629],f29eab11:[()=>Promise.all([a.e(71869),a.e(1634)]).then(a.bind(a,38035)),"@site/docs/Spark/gems/custom/file-operation.md",38035],f3d40c67:[()=>a.e(51376).then(a.t.bind(a,41056,19)),"~docs/default/tag-tags-file-499.json",41056],f415b733:[()=>a.e(83580).then(a.t.bind(a,86233,19)),"~docs/default/tag-tags-serverless-f69.json",86233],f4191957:[()=>a.e(2101).then(a.t.bind(a,7958,19)),"~docs/default/tag-tags-external-21b.json",7958],f4292763:[()=>a.e(35562).then(a.t.bind(a,92928,19)),"~docs/default/tag-tags-metrics-478.json",92928],f468eab0:[()=>a.e(83607).then(a.t.bind(a,13086,19)),"~docs/default/tag-tags-concept-022.json",13086],f4763366:[()=>a.e(22868).then(a.t.bind(a,14214,19)),"~docs/default/tag-tags-cli-09a.json",14214],f4f16549:[()=>Promise.all([a.e(71869),a.e(34088)]).then(a.bind(a,39146)),"@site/docs/Spark/gems/source-target/file/fixed-format.md",39146],f6361778:[()=>a.e(58525).then(a.t.bind(a,32013,19)),"~docs/default/tag-tags-models-834.json",32013],f6692c73:[()=>a.e(58891).then(a.t.bind(a,20437,19)),"~docs/default/tag-tags-kafka-384.json",20437],f678853a:[()=>a.e(58447).then(a.bind(a,48817)),"@site/docs/metadata/lineage/lineage.md",48817],f727ae13:[()=>a.e(97805).then(a.bind(a,64212)),"@site/docs/SQL/gems/gems.md",64212],f777afb0:[()=>a.e(33823).then(a.t.bind(a,8848,19)),"~docs/default/tag-tags-warehouse-ba6.json",8848],f779aca8:[()=>Promise.all([a.e(71869),a.e(72661)]).then(a.bind(a,80350)),"@site/docs/Spark/gems/custom/rest-api-enrich.md",80350],f9bb51d1:[()=>a.e(52487).then(a.t.bind(a,69079,19)),"~docs/default/tag-tags-splunk-63c.json",69079],fa210598:[()=>Promise.all([a.e(71869),a.e(42685)]).then(a.bind(a,37102)),"@site/docs/Spark/gems/source-target/file/parquet.md",37102],fb12fd2d:[()=>a.e(54238).then(a.t.bind(a,92833,19)),"~docs/default/tag-tags-sort-d14.json",92833],fc432830:[()=>a.e(56034).then(a.t.bind(a,65098,19)),"~docs/default/tag-tags-username-89e.json",65098],fc9f5876:[()=>Promise.all([a.e(71869),a.e(16263)]).then(a.bind(a,1571)),"@site/docs/tutorials/videos/videos.md",1571],fda3e47c:[()=>a.e(23868).then(a.t.bind(a,28497,19)),"~docs/default/tag-tags-schedule-37b.json",28497],fdfd2aa4:[()=>a.e(4843).then(a.t.bind(a,61823,19)),"~docs/default/tag-tags-devops-bd4.json",61823],fe598bea:[()=>a.e(7054).then(a.bind(a,30899)),"@site/docs/Spark/gems/source-target/advanced/synthetic-data-generator/synthetic-data-generator.md",30899],fe6a71b8:[()=>Promise.all([a.e(71869),a.e(40405)]).then(a.bind(a,7318)),"@site/docs/Spark/gems/transform/deduplicate.md",7318],fec9a08a:[()=>Promise.all([a.e(71869),a.e(31902)]).then(a.bind(a,73330)),"@site/docs/Spark/gems/transform/set-operation.md",73330],fef83351:[()=>a.e(49574).then(a.t.bind(a,29589,19)),"~docs/default/tag-tags-qa-841.json",29589],ff225128:[()=>a.e(53659).then(a.t.bind(a,9453,19)),"~docs/default/tag-tags-reference-6ce.json",9453],ff978e4b:[()=>a.e(2472).then(a.t.bind(a,13374,19)),"~docs/default/tag-tags-chunk-cbf.json",13374],ffa0a549:[()=>a.e(41319).then(a.t.bind(a,72143,19)),"~docs/default/tag-tags-shared-c2c.json",72143],ffd96de8:[()=>a.e(827).then(a.bind(a,73751)),"@site/docs/Spark/gems/source-target/warehouse/cosmos.md",73751]};function l(e){let{error:t,retry:a,pastDelay:r}=e;return t?n.createElement("div",{style:{textAlign:"center",color:"#fff",backgroundColor:"#fa383e",borderColor:"#fa383e",borderStyle:"solid",borderRadius:"0.25rem",borderWidth:"1px",boxSizing:"border-box",display:"block",padding:"1rem",flex:"0 0 50%",marginLeft:"25%",marginRight:"25%",marginTop:"5rem",maxWidth:"50%",width:"100%"}},n.createElement("p",null,String(t)),n.createElement("div",null,n.createElement("button",{type:"button",onClick:a},"Retry"))):r?n.createElement("div",{style:{display:"flex",justifyContent:"center",alignItems:"center",height:"100vh"}},n.createElement("svg",{id:"loader",style:{width:128,height:110,position:"absolute",top:"calc(100vh - 64%)"},viewBox:"0 0 45 45",xmlns:"http://www.w3.org/2000/svg",stroke:"#61dafb"},n.createElement("g",{fill:"none",fillRule:"evenodd",transform:"translate(1 1)",strokeWidth:"2"},n.createElement("circle",{cx:"22",cy:"22",r:"6",strokeOpacity:"0"},n.createElement("animate",{attributeName:"r",begin:"1.5s",dur:"3s",values:"6;22",calcMode:"linear",repeatCount:"indefinite"}),n.createElement("animate",{attributeName:"stroke-opacity",begin:"1.5s",dur:"3s",values:"1;0",calcMode:"linear",repeatCount:"indefinite"}),n.createElement("animate",{attributeName:"stroke-width",begin:"1.5s",dur:"3s",values:"2;0",calcMode:"linear",repeatCount:"indefinite"})),n.createElement("circle",{cx:"22",cy:"22",r:"6",strokeOpacity:"0"},n.createElement("animate",{attributeName:"r",begin:"3s",dur:"3s",values:"6;22",calcMode:"linear",repeatCount:"indefinite"}),n.createElement("animate",{attributeName:"stroke-opacity",begin:"3s",dur:"3s",values:"1;0",calcMode:"linear",repeatCount:"indefinite"}),n.createElement("animate",{attributeName:"stroke-width",begin:"3s",dur:"3s",values:"2;0",calcMode:"linear",repeatCount:"indefinite"})),n.createElement("circle",{cx:"22",cy:"22",r:"8"},n.createElement("animate",{attributeName:"r",begin:"0s",dur:"1.5s",values:"6;1;2;3;4;5;6",calcMode:"linear",repeatCount:"indefinite"}))))):null}var d=a(86921),u=a(53102);function p(e,t){if("*"===e)return i()({loading:l,loader:()=>a.e(81774).then(a.bind(a,81774)),modules:["@theme/NotFound"],webpack:()=>[81774],render(e,t){const a=e.default;return n.createElement(u.W,{value:{plugin:{name:"native",id:"default"}}},n.createElement(a,t))}});const o=s[`${e}-${t}`],p={},f=[],g=[],m=(0,d.A)(o);return Object.entries(m).forEach((e=>{let[t,a]=e;const n=c[a];n&&(p[t]=n[0],f.push(n[1]),g.push(n[2]))})),i().Map({loading:l,loader:p,modules:f,webpack:()=>g,render(t,a){const i=JSON.parse(JSON.stringify(o));Object.entries(t).forEach((t=>{let[a,n]=t;const r=n.default;if(!r)throw new Error(`The page component at ${e} doesn't have a default export. This makes it impossible to render anything. Consider default-exporting a React component.`);"object"!=typeof r&&"function"!=typeof r||Object.keys(n).filter((e=>"default"!==e)).forEach((e=>{r[e]=n[e]}));let o=i;const s=a.split(".");s.slice(0,-1).forEach((e=>{o=o[e]})),o[s[s.length-1]]=r}));const s=i.__comp;delete i.__comp;const c=i.__context;return delete i.__context,n.createElement(u.W,{value:c},n.createElement(s,(0,r.A)({},i,a)))}})}const f=[{path:"/mdapi/",component:p("/mdapi/","e00"),exact:!0},{path:"/mdapi/enums",component:p("/mdapi/enums","523"),exact:!0},{path:"/mdapi/inputs",component:p("/mdapi/inputs","42c"),exact:!0},{path:"/mdapi/types",component:p("/mdapi/types","436"),exact:!0},{path:"/prophecy-ir",component:p("/prophecy-ir","d32"),exact:!0},{path:"/search",component:p("/search","c98"),exact:!0},{path:"/tags",component:p("/tags","f59"),exact:!0},{path:"/tags/active-directory",component:p("/tags/active-directory","bef"),exact:!0},{path:"/tags/ad",component:p("/tags/ad","ef4"),exact:!0},{path:"/tags/ad-hoc",component:p("/tags/ad-hoc","166"),exact:!0},{path:"/tags/admin",component:p("/tags/admin","862"),exact:!0},{path:"/tags/aggregate",component:p("/tags/aggregate","695"),exact:!0},{path:"/tags/airflow",component:p("/tags/airflow","330"),exact:!0},{path:"/tags/alerting",component:p("/tags/alerting","b83"),exact:!0},{path:"/tags/alerts",component:p("/tags/alerts","9f8"),exact:!0},{path:"/tags/answer",component:p("/tags/answer","686"),exact:!0},{path:"/tags/api",component:p("/tags/api","62f"),exact:!0},{path:"/tags/april",component:p("/tags/april","2b7"),exact:!0},{path:"/tags/ascending",component:p("/tags/ascending","0c3"),exact:!0},{path:"/tags/audit-events",component:p("/tags/audit-events","3c5"),exact:!0},{path:"/tags/audit-logs",component:p("/tags/audit-logs","21b"),exact:!0},{path:"/tags/august",component:p("/tags/august","cf0"),exact:!0},{path:"/tags/authentication",component:p("/tags/authentication","74a"),exact:!0},{path:"/tags/avro",component:p("/tags/avro","84c"),exact:!0},{path:"/tags/aws",component:p("/tags/aws","b6f"),exact:!0},{path:"/tags/azure",component:p("/tags/azure","4bc"),exact:!0},{path:"/tags/azure-blob",component:p("/tags/azure-blob","4b4"),exact:!0},{path:"/tags/azure-blob-storage",component:p("/tags/azure-blob-storage","d4e"),exact:!0},{path:"/tags/azuread",component:p("/tags/azuread","326"),exact:!0},{path:"/tags/backup",component:p("/tags/backup","33f"),exact:!0},{path:"/tags/best-practices",component:p("/tags/best-practices","f4d"),exact:!0},{path:"/tags/bigquery",component:p("/tags/bigquery","34f"),exact:!0},{path:"/tags/build",component:p("/tags/build","7fc"),exact:!0},{path:"/tags/capabilities",component:p("/tags/capabilities","39e"),exact:!0},{path:"/tags/catalog",component:p("/tags/catalog","a25"),exact:!0},{path:"/tags/cdp",component:p("/tags/cdp","885"),exact:!0},{path:"/tags/changelog",component:p("/tags/changelog","c99"),exact:!0},{path:"/tags/chart",component:p("/tags/chart","f2b"),exact:!0},{path:"/tags/chatbot",component:p("/tags/chatbot","426"),exact:!0},{path:"/tags/chunk",component:p("/tags/chunk","81f"),exact:!0},{path:"/tags/cicd",component:p("/tags/cicd","18e"),exact:!0},{path:"/tags/clean",component:p("/tags/clean","7d7"),exact:!0},{path:"/tags/cli",component:p("/tags/cli","50a"),exact:!0},{path:"/tags/cloudera",component:p("/tags/cloudera","209"),exact:!0},{path:"/tags/coalesce",component:p("/tags/coalesce","ac3"),exact:!0},{path:"/tags/code",component:p("/tags/code","3f0"),exact:!0},{path:"/tags/columns",component:p("/tags/columns","428"),exact:!0},{path:"/tags/commit",component:p("/tags/commit","d2d"),exact:!0},{path:"/tags/compare",component:p("/tags/compare","b44"),exact:!0},{path:"/tags/compare-columns",component:p("/tags/compare-columns","532"),exact:!0},{path:"/tags/compatibility",component:p("/tags/compatibility","97f"),exact:!0},{path:"/tags/composer",component:p("/tags/composer","ab1"),exact:!0},{path:"/tags/concept",component:p("/tags/concept","65c"),exact:!0},{path:"/tags/concepts",component:p("/tags/concepts","60c"),exact:!0},{path:"/tags/conditional",component:p("/tags/conditional","c0d"),exact:!0},{path:"/tags/config",component:p("/tags/config","611"),exact:!0},{path:"/tags/configuration",component:p("/tags/configuration","cd6"),exact:!0},{path:"/tags/configurations",component:p("/tags/configurations","382"),exact:!0},{path:"/tags/connect",component:p("/tags/connect","5d0"),exact:!0},{path:"/tags/connections",component:p("/tags/connections","cf8"),exact:!0},{path:"/tags/continuous-deployment",component:p("/tags/continuous-deployment","0b1"),exact:!0},{path:"/tags/continuous-integration",component:p("/tags/continuous-integration","2c9"),exact:!0},{path:"/tags/copilot",component:p("/tags/copilot","446"),exact:!0},{path:"/tags/cosmos",component:p("/tags/cosmos","fa7"),exact:!0},{path:"/tags/count",component:p("/tags/count","997"),exact:!0},{path:"/tags/csv",component:p("/tags/csv","bef"),exact:!0},{path:"/tags/cte",component:p("/tags/cte","055"),exact:!0},{path:"/tags/custom",component:p("/tags/custom","f3a"),exact:!0},{path:"/tags/data",component:p("/tags/data","80c"),exact:!0},{path:"/tags/data-privacy",component:p("/tags/data-privacy","eb5"),exact:!0},{path:"/tags/databricks",component:p("/tags/databricks","fcd"),exact:!0},{path:"/tags/databricks-secrets",component:p("/tags/databricks-secrets","d5a"),exact:!0},{path:"/tags/databricksworkflow",component:p("/tags/databricksworkflow","d20"),exact:!0},{path:"/tags/dataproc",component:p("/tags/dataproc","d38"),exact:!0},{path:"/tags/datasets",component:p("/tags/datasets","46f"),exact:!0},{path:"/tags/db-2",component:p("/tags/db-2","2e4"),exact:!0},{path:"/tags/dbfs",component:p("/tags/dbfs","6c6"),exact:!0},{path:"/tags/dbt",component:p("/tags/dbt","1e6"),exact:!0},{path:"/tags/dedupe",component:p("/tags/dedupe","f72"),exact:!0},{path:"/tags/delta",component:p("/tags/delta","c78"),exact:!0},{path:"/tags/dependencies",component:p("/tags/dependencies","4bf"),exact:!0},{path:"/tags/dependency",component:p("/tags/dependency","79f"),exact:!0},{path:"/tags/deploy",component:p("/tags/deploy","662"),exact:!0},{path:"/tags/deployment",component:p("/tags/deployment","54e"),exact:!0},{path:"/tags/descending",component:p("/tags/descending","759"),exact:!0},{path:"/tags/development",component:p("/tags/development","2da"),exact:!0},{path:"/tags/devops",component:p("/tags/devops","342"),exact:!0},{path:"/tags/diagnose",component:p("/tags/diagnose","cb1"),exact:!0},{path:"/tags/diagnostics",component:p("/tags/diagnostics","738"),exact:!0},{path:"/tags/diff",component:p("/tags/diff","98f"),exact:!0},{path:"/tags/difference",component:p("/tags/difference","ce4"),exact:!0},{path:"/tags/disaster-recovery",component:p("/tags/disaster-recovery","2f8"),exact:!0},{path:"/tags/distinct",component:p("/tags/distinct","0f9"),exact:!0},{path:"/tags/download",component:p("/tags/download","29d"),exact:!0},{path:"/tags/dynamic",component:p("/tags/dynamic","568"),exact:!0},{path:"/tags/email",component:p("/tags/email","d41"),exact:!0},{path:"/tags/embedding",component:p("/tags/embedding","69c"),exact:!0},{path:"/tags/emr",component:p("/tags/emr","d4d"),exact:!0},{path:"/tags/enterprise",component:p("/tags/enterprise","c2f"),exact:!0},{path:"/tags/env-var",component:p("/tags/env-var","742"),exact:!0},{path:"/tags/environment",component:p("/tags/environment","01a"),exact:!0},{path:"/tags/execution",component:p("/tags/execution","6fa"),exact:!0},{path:"/tags/explode",component:p("/tags/explode","4bf"),exact:!0},{path:"/tags/explorer",component:p("/tags/explorer","ebd"),exact:!0},{path:"/tags/expression",component:p("/tags/expression","46c"),exact:!0},{path:"/tags/expression-builder",component:p("/tags/expression-builder","41e"),exact:!0},{path:"/tags/expressions",component:p("/tags/expressions","d9d"),exact:!0},{path:"/tags/extensibility",component:p("/tags/extensibility","a74"),exact:!0},{path:"/tags/external",component:p("/tags/external","688"),exact:!0},{path:"/tags/extract",component:p("/tags/extract","7db"),exact:!0},{path:"/tags/fabric",component:p("/tags/fabric","f0e"),exact:!0},{path:"/tags/fabrics",component:p("/tags/fabrics","864"),exact:!0},{path:"/tags/fake",component:p("/tags/fake","561"),exact:!0},{path:"/tags/file",component:p("/tags/file","b37"),exact:!0},{path:"/tags/file-based",component:p("/tags/file-based","8b1"),exact:!0},{path:"/tags/filter",component:p("/tags/filter","16c"),exact:!0},{path:"/tags/fixed-format",component:p("/tags/fixed-format","6be"),exact:!0},{path:"/tags/flatten",component:p("/tags/flatten","3be"),exact:!0},{path:"/tags/fork",component:p("/tags/fork","3ea"),exact:!0},{path:"/tags/format",component:p("/tags/format","f31"),exact:!0},{path:"/tags/functionality",component:p("/tags/functionality","f8b"),exact:!0},{path:"/tags/functions",component:p("/tags/functions","322"),exact:!0},{path:"/tags/gcp",component:p("/tags/gcp","f4d"),exact:!0},{path:"/tags/gcs",component:p("/tags/gcs","6f8"),exact:!0},{path:"/tags/gem",component:p("/tags/gem","a69"),exact:!0},{path:"/tags/gem-builder",component:p("/tags/gem-builder","bdb"),exact:!0},{path:"/tags/gems",component:p("/tags/gems","574"),exact:!0},{path:"/tags/generate",component:p("/tags/generate","ecc"),exact:!0},{path:"/tags/generative-ai",component:p("/tags/generative-ai","2af"),exact:!0},{path:"/tags/generativeai",component:p("/tags/generativeai","23d"),exact:!0},{path:"/tags/generator",component:p("/tags/generator","2ac"),exact:!0},{path:"/tags/git",component:p("/tags/git","b2b"),exact:!0},{path:"/tags/github-actions",component:p("/tags/github-actions","340"),exact:!0},{path:"/tags/google",component:p("/tags/google","121"),exact:!0},{path:"/tags/group",component:p("/tags/group","b07"),exact:!0},{path:"/tags/group-by",component:p("/tags/group-by","465"),exact:!0},{path:"/tags/groupby",component:p("/tags/groupby","69a"),exact:!0},{path:"/tags/guide",component:p("/tags/guide","c97"),exact:!0},{path:"/tags/hashicorp",component:p("/tags/hashicorp","f35"),exact:!0},{path:"/tags/hdfs",component:p("/tags/hdfs","b52"),exact:!0},{path:"/tags/helm",component:p("/tags/helm","6ae"),exact:!0},{path:"/tags/hints",component:p("/tags/hints","2b6"),exact:!0},{path:"/tags/historical-runs",component:p("/tags/historical-runs","2ef"),exact:!0},{path:"/tags/hive",component:p("/tags/hive","45e"),exact:!0},{path:"/tags/how-to",component:p("/tags/how-to","546"),exact:!0},{path:"/tags/iceberg",component:p("/tags/iceberg","aa7"),exact:!0},{path:"/tags/infer",component:p("/tags/infer","cf0"),exact:!0},{path:"/tags/inner",component:p("/tags/inner","1b1"),exact:!0},{path:"/tags/installation",component:p("/tags/installation","e19"),exact:!0},{path:"/tags/instructions",component:p("/tags/instructions","aaa"),exact:!0},{path:"/tags/interactive",component:p("/tags/interactive","7d9"),exact:!0},{path:"/tags/interim",component:p("/tags/interim","ff3"),exact:!0},{path:"/tags/intersect",component:p("/tags/intersect","788"),exact:!0},{path:"/tags/iterator",component:p("/tags/iterator","ebf"),exact:!0},{path:"/tags/jdbc",component:p("/tags/jdbc","a43"),exact:!0},{path:"/tags/jenkins",component:p("/tags/jenkins","21f"),exact:!0},{path:"/tags/job",component:p("/tags/job","694"),exact:!0},{path:"/tags/jobs",component:p("/tags/jobs","ec7"),exact:!0},{path:"/tags/join",component:p("/tags/join","03e"),exact:!0},{path:"/tags/join-split",component:p("/tags/join-split","881"),exact:!0},{path:"/tags/json",component:p("/tags/json","309"),exact:!0},{path:"/tags/july",component:p("/tags/july","268"),exact:!0},{path:"/tags/june",component:p("/tags/june","d22"),exact:!0},{path:"/tags/kafka",component:p("/tags/kafka","043"),exact:!0},{path:"/tags/key",component:p("/tags/key","691"),exact:!0},{path:"/tags/keytab",component:p("/tags/keytab","77d"),exact:!0},{path:"/tags/left-join",component:p("/tags/left-join","63b"),exact:!0},{path:"/tags/library",component:p("/tags/library","48a"),exact:!0},{path:"/tags/license",component:p("/tags/license","8e5"),exact:!0},{path:"/tags/limit",component:p("/tags/limit","d44"),exact:!0},{path:"/tags/lineage",component:p("/tags/lineage","88c"),exact:!0},{path:"/tags/livy",component:p("/tags/livy","24c"),exact:!0},{path:"/tags/llm",component:p("/tags/llm","15d"),exact:!0},{path:"/tags/location",component:p("/tags/location","59f"),exact:!0},{path:"/tags/longformat",component:p("/tags/longformat","c2b"),exact:!0},{path:"/tags/lookup",component:p("/tags/lookup","e5c"),exact:!0},{path:"/tags/loop",component:p("/tags/loop","273"),exact:!0},{path:"/tags/machine-learning",component:p("/tags/machine-learning","be6"),exact:!0},{path:"/tags/march",component:p("/tags/march","98f"),exact:!0},{path:"/tags/matrix",component:p("/tags/matrix","f94"),exact:!0},{path:"/tags/maven",component:p("/tags/maven","619"),exact:!0},{path:"/tags/may",component:p("/tags/may","79f"),exact:!0},{path:"/tags/merge",component:p("/tags/merge","7ee"),exact:!0},{path:"/tags/metadata",component:p("/tags/metadata","0c3"),exact:!0},{path:"/tags/metrics",component:p("/tags/metrics","6c3"),exact:!0},{path:"/tags/mock",component:p("/tags/mock","0af"),exact:!0},{path:"/tags/model",component:p("/tags/model","c1a"),exact:!0},{path:"/tags/models",component:p("/tags/models","c81"),exact:!0},{path:"/tags/mongodb",component:p("/tags/mongodb","757"),exact:!0},{path:"/tags/monitoring",component:p("/tags/monitoring","1a8"),exact:!0},{path:"/tags/mwaa",component:p("/tags/mwaa","b8e"),exact:!0},{path:"/tags/nfs",component:p("/tags/nfs","2dc"),exact:!0},{path:"/tags/november",component:p("/tags/november","8fb"),exact:!0},{path:"/tags/object-store",component:p("/tags/object-store","5ac"),exact:!0},{path:"/tags/october",component:p("/tags/october","27c"),exact:!0},{path:"/tags/okta",component:p("/tags/okta","d7d"),exact:!0},{path:"/tags/open-source-spark",component:p("/tags/open-source-spark","6ab"),exact:!0},{path:"/tags/openai",component:p("/tags/openai","1a2"),exact:!0},{path:"/tags/oracle",component:p("/tags/oracle","627"),exact:!0},{path:"/tags/orc",component:p("/tags/orc","c4e"),exact:!0},{path:"/tags/orchestration",component:p("/tags/orchestration","682"),exact:!0},{path:"/tags/order-by",component:p("/tags/order-by","831"),exact:!0},{path:"/tags/outer",component:p("/tags/outer","bf0"),exact:!0},{path:"/tags/package",component:p("/tags/package","14c"),exact:!0},{path:"/tags/package-hub",component:p("/tags/package-hub","ea5"),exact:!0},{path:"/tags/parquet",component:p("/tags/parquet","650"),exact:!0},{path:"/tags/partition",component:p("/tags/partition","78e"),exact:!0},{path:"/tags/passwords",component:p("/tags/passwords","acf"),exact:!0},{path:"/tags/pinecone",component:p("/tags/pinecone","fe8"),exact:!0},{path:"/tags/pipelines",component:p("/tags/pipelines","60a"),exact:!0},{path:"/tags/plib",component:p("/tags/plib","e80"),exact:!0},{path:"/tags/plibs",component:p("/tags/plibs","00e"),exact:!0},{path:"/tags/pr",component:p("/tags/pr","230"),exact:!0},{path:"/tags/project",component:p("/tags/project","98b"),exact:!0},{path:"/tags/prophecy-managed",component:p("/tags/prophecy-managed","d7d"),exact:!0},{path:"/tags/provider",component:p("/tags/provider","af6"),exact:!0},{path:"/tags/pull-requests",component:p("/tags/pull-requests","ae5"),exact:!0},{path:"/tags/pullrequest",component:p("/tags/pullrequest","424"),exact:!0},{path:"/tags/python",component:p("/tags/python","17e"),exact:!0},{path:"/tags/qa",component:p("/tags/qa","ab2"),exact:!0},{path:"/tags/query",component:p("/tags/query","587"),exact:!0},{path:"/tags/question",component:p("/tags/question","944"),exact:!0},{path:"/tags/random",component:p("/tags/random","8dd"),exact:!0},{path:"/tags/recommendations",component:p("/tags/recommendations","795"),exact:!0},{path:"/tags/redshift",component:p("/tags/redshift","365"),exact:!0},{path:"/tags/reference",component:p("/tags/reference","783"),exact:!0},{path:"/tags/reformat",component:p("/tags/reformat","65c"),exact:!0},{path:"/tags/release",component:p("/tags/release","96d"),exact:!0},{path:"/tags/release-notes",component:p("/tags/release-notes","a02"),exact:!0},{path:"/tags/rename",component:p("/tags/rename","28c"),exact:!0},{path:"/tags/repartition",component:p("/tags/repartition","c5d"),exact:!0},{path:"/tags/reserve-pods",component:p("/tags/reserve-pods","f6f"),exact:!0},{path:"/tags/resolve",component:p("/tags/resolve","f3a"),exact:!0},{path:"/tags/rest",component:p("/tags/rest","34a"),exact:!0},{path:"/tags/restore",component:p("/tags/restore","07b"),exact:!0},{path:"/tags/reusable",component:p("/tags/reusable","19d"),exact:!0},{path:"/tags/right-join",component:p("/tags/right-join","992"),exact:!0},{path:"/tags/row-distributor",component:p("/tags/row-distributor","322"),exact:!0},{path:"/tags/run",component:p("/tags/run","af8"),exact:!0},{path:"/tags/runs",component:p("/tags/runs","453"),exact:!0},{path:"/tags/runtime-config",component:p("/tags/runtime-config","ed7"),exact:!0},{path:"/tags/s-3",component:p("/tags/s-3","ae6"),exact:!0},{path:"/tags/salesforce",component:p("/tags/salesforce","c7f"),exact:!0},{path:"/tags/saml",component:p("/tags/saml","973"),exact:!0},{path:"/tags/sandbox",component:p("/tags/sandbox","dd6"),exact:!0},{path:"/tags/scala",component:p("/tags/scala","d1c"),exact:!0},{path:"/tags/scd-2",component:p("/tags/scd-2","c05"),exact:!0},{path:"/tags/schedule",component:p("/tags/schedule","b51"),exact:!0},{path:"/tags/scheduling",component:p("/tags/scheduling","81c"),exact:!0},{path:"/tags/schema",component:p("/tags/schema","ce5"),exact:!0},{path:"/tags/scim",component:p("/tags/scim","bb4"),exact:!0},{path:"/tags/search",component:p("/tags/search","a8a"),exact:!0},{path:"/tags/secret-provider",component:p("/tags/secret-provider","de0"),exact:!0},{path:"/tags/secrets",component:p("/tags/secrets","1d9"),exact:!0},{path:"/tags/security",component:p("/tags/security","e65"),exact:!0},{path:"/tags/seeds",component:p("/tags/seeds","08d"),exact:!0},{path:"/tags/select",component:p("/tags/select","0ad"),exact:!0},{path:"/tags/self-hosted",component:p("/tags/self-hosted","415"),exact:!0},{path:"/tags/self-managed",component:p("/tags/self-managed","7d6"),exact:!0},{path:"/tags/september",component:p("/tags/september","36a"),exact:!0},{path:"/tags/serverless",component:p("/tags/serverless","045"),exact:!0},{path:"/tags/set",component:p("/tags/set","930"),exact:!0},{path:"/tags/settings",component:p("/tags/settings","f7c"),exact:!0},{path:"/tags/setup",component:p("/tags/setup","ac2"),exact:!0},{path:"/tags/shared",component:p("/tags/shared","245"),exact:!0},{path:"/tags/smtp",component:p("/tags/smtp","1de"),exact:!0},{path:"/tags/snowflake",component:p("/tags/snowflake","80d"),exact:!0},{path:"/tags/sort",component:p("/tags/sort","6b5"),exact:!0},{path:"/tags/source",component:p("/tags/source","d6b"),exact:!0},{path:"/tags/sources",component:p("/tags/sources","fc8"),exact:!0},{path:"/tags/spark",component:p("/tags/spark","7e6"),exact:!0},{path:"/tags/spark-submit",component:p("/tags/spark-submit","8af"),exact:!0},{path:"/tags/split",component:p("/tags/split","293"),exact:!0},{path:"/tags/splunk",component:p("/tags/splunk","f1c"),exact:!0},{path:"/tags/sql",component:p("/tags/sql","447"),exact:!0},{path:"/tags/streaming",component:p("/tags/streaming","4e2"),exact:!0},{path:"/tags/subgraph",component:p("/tags/subgraph","415"),exact:!0},{path:"/tags/sum",component:p("/tags/sum","33e"),exact:!0},{path:"/tags/support-logs",component:p("/tags/support-logs","66d"),exact:!0},{path:"/tags/synase",component:p("/tags/synase","b3a"),exact:!0},{path:"/tags/synthetic",component:p("/tags/synthetic","bed"),exact:!0},{path:"/tags/table",component:p("/tags/table","8aa"),exact:!0},{path:"/tags/tables",component:p("/tags/tables","9ec"),exact:!0},{path:"/tags/tags",component:p("/tags/tags","be4"),exact:!0},{path:"/tags/target",component:p("/tags/target","dc1"),exact:!0},{path:"/tags/teams",component:p("/tags/teams","871"),exact:!0},{path:"/tags/teradata",component:p("/tags/teradata","52f"),exact:!0},{path:"/tags/test",component:p("/tags/test","90b"),exact:!0},{path:"/tags/testing",component:p("/tags/testing","e26"),exact:!0},{path:"/tags/text",component:p("/tags/text","416"),exact:!0},{path:"/tags/text-processing",component:p("/tags/text-processing","619"),exact:!0},{path:"/tags/transform",component:p("/tags/transform","731"),exact:!0},{path:"/tags/transformation",component:p("/tags/transformation","b4c"),exact:!0},{path:"/tags/transformations",component:p("/tags/transformations","99e"),exact:!0},{path:"/tags/trigger",component:p("/tags/trigger","c7f"),exact:!0},{path:"/tags/tutorial",component:p("/tags/tutorial","f8b"),exact:!0},{path:"/tags/type",component:p("/tags/type","e92"),exact:!0},{path:"/tags/udafs",component:p("/tags/udafs","ca1"),exact:!0},{path:"/tags/udfs",component:p("/tags/udfs","bfc"),exact:!0},{path:"/tags/union",component:p("/tags/union","479"),exact:!0},{path:"/tags/unique",component:p("/tags/unique","b83"),exact:!0},{path:"/tags/unit",component:p("/tags/unit","04f"),exact:!0},{path:"/tags/unit-tests",component:p("/tags/unit-tests","1c1"),exact:!0},{path:"/tags/unity-catalog",component:p("/tags/unity-catalog","fbd"),exact:!0},{path:"/tags/unpivot",component:p("/tags/unpivot","228"),exact:!0},{path:"/tags/upgrade",component:p("/tags/upgrade","b21"),exact:!0},{path:"/tags/upload",component:p("/tags/upload","b20"),exact:!0},{path:"/tags/user",component:p("/tags/user","b59"),exact:!0},{path:"/tags/username",component:p("/tags/username","320"),exact:!0},{path:"/tags/users",component:p("/tags/users","721"),exact:!0},{path:"/tags/variable",component:p("/tags/variable","84c"),exact:!0},{path:"/tags/variant",component:p("/tags/variant","eb6"),exact:!0},{path:"/tags/vault",component:p("/tags/vault","a7c"),exact:!0},{path:"/tags/vector",component:p("/tags/vector","add"),exact:!0},{path:"/tags/version",component:p("/tags/version","8cb"),exact:!0},{path:"/tags/view",component:p("/tags/view","d95"),exact:!0},{path:"/tags/visual",component:p("/tags/visual","ee8"),exact:!0},{path:"/tags/warehouse",component:p("/tags/warehouse","f6b"),exact:!0},{path:"/tags/warehouse-based",component:p("/tags/warehouse-based","48a"),exact:!0},{path:"/tags/web-scraping",component:p("/tags/web-scraping","139"),exact:!0},{path:"/tags/webinar",component:p("/tags/webinar","594"),exact:!0},{path:"/tags/where",component:p("/tags/where","3fd"),exact:!0},{path:"/tags/wideformat",component:p("/tags/wideformat","ef0"),exact:!0},{path:"/tags/window",component:p("/tags/window","431"),exact:!0},{path:"/tags/with-column",component:p("/tags/with-column","202"),exact:!0},{path:"/tags/write-options",component:p("/tags/write-options","fb1"),exact:!0},{path:"/tags/xlsx",component:p("/tags/xlsx","0fb"),exact:!0},{path:"/",component:p("/","64a"),routes:[{path:"/",component:p("/","d65"),exact:!0,sidebar:"defaultSidebar"},{path:"/architecture/",component:p("/architecture/","ca8"),exact:!0,sidebar:"defaultSidebar"},{path:"/architecture/deployment/",component:p("/architecture/deployment/","eed"),exact:!0,sidebar:"defaultSidebar"},{path:"/architecture/self-hosted/",component:p("/architecture/self-hosted/","c6e"),exact:!0,sidebar:"defaultSidebar"},{path:"/architecture/self-hosted/authentication/",component:p("/architecture/self-hosted/authentication/","19e"),exact:!0,sidebar:"defaultSidebar"},{path:"/architecture/self-hosted/authentication/active_directory",component:p("/architecture/self-hosted/authentication/active_directory","8be"),exact:!0,sidebar:"defaultSidebar"},{path:"/architecture/self-hosted/authentication/azure-ad",component:p("/architecture/self-hosted/authentication/azure-ad","06d"),exact:!0,sidebar:"defaultSidebar"},{path:"/architecture/self-hosted/authentication/azuread-scim",component:p("/architecture/self-hosted/authentication/azuread-scim","070"),exact:!0,sidebar:"defaultSidebar"},{path:"/architecture/self-hosted/authentication/saml-okta",component:p("/architecture/self-hosted/authentication/saml-okta","c4a"),exact:!0,sidebar:"defaultSidebar"},{path:"/architecture/self-hosted/authentication/security-settings",component:p("/architecture/self-hosted/authentication/security-settings","8ec"),exact:!0,sidebar:"defaultSidebar"},{path:"/architecture/self-hosted/configurations/",component:p("/architecture/self-hosted/configurations/","106"),exact:!0,sidebar:"defaultSidebar"},{path:"/architecture/self-hosted/configurations/configure-alerts",component:p("/architecture/self-hosted/configurations/configure-alerts","9c3"),exact:!0,sidebar:"defaultSidebar"},{path:"/architecture/self-hosted/configurations/configure-audit-logs",component:p("/architecture/self-hosted/configurations/configure-audit-logs","ff6"),exact:!0,sidebar:"defaultSidebar"},{path:"/architecture/self-hosted/configurations/configure-object-store",component:p("/architecture/self-hosted/configurations/configure-object-store","c6b"),exact:!0,sidebar:"defaultSidebar"},{path:"/architecture/self-hosted/configurations/sandbox-configuration",component:p("/architecture/self-hosted/configurations/sandbox-configuration","b1d"),exact:!0,sidebar:"defaultSidebar"},{path:"/architecture/self-hosted/download-logs",component:p("/architecture/self-hosted/download-logs","def"),exact:!0,sidebar:"defaultSidebar"},{path:"/architecture/self-hosted/generate-api-key",component:p("/architecture/self-hosted/generate-api-key","1a6"),exact:!0,sidebar:"defaultSidebar"},{path:"/architecture/self-hosted/installation-helm/",component:p("/architecture/self-hosted/installation-helm/","e36"),exact:!0,sidebar:"defaultSidebar"},{path:"/architecture/self-hosted/installation-helm/install-on-aws",component:p("/architecture/self-hosted/installation-helm/install-on-aws","dc4"),exact:!0,sidebar:"defaultSidebar"},{path:"/architecture/self-hosted/upgrade-backup-restore",component:p("/architecture/self-hosted/upgrade-backup-restore","7d6"),exact:!0,sidebar:"defaultSidebar"},{path:"/concepts/",component:p("/concepts/","009"),exact:!0,sidebar:"defaultSidebar"},{path:"/concepts/copilot/",component:p("/concepts/copilot/","b1c"),exact:!0,sidebar:"defaultSidebar"},{path:"/concepts/copilot/copilot-ai-capabilities",component:p("/concepts/copilot/copilot-ai-capabilities","e07"),exact:!0,sidebar:"defaultSidebar"},{path:"/concepts/copilot/copilot-data-privacy",component:p("/concepts/copilot/copilot-data-privacy","3c0"),exact:!0,sidebar:"defaultSidebar"},{path:"/concepts/copilot/enable-data-copilot",component:p("/concepts/copilot/enable-data-copilot","c80"),exact:!0,sidebar:"defaultSidebar"},{path:"/concepts/dataset",component:p("/concepts/dataset","c90"),exact:!0,sidebar:"defaultSidebar"},{path:"/concepts/fabrics/",component:p("/concepts/fabrics/","941"),exact:!0,sidebar:"defaultSidebar"},{path:"/concepts/fabrics/prophecy-libraries",component:p("/concepts/fabrics/prophecy-libraries","f64"),exact:!0,sidebar:"defaultSidebar"},{path:"/concepts/project/",component:p("/concepts/project/","285"),exact:!0,sidebar:"defaultSidebar"},{path:"/concepts/project/gems",component:p("/concepts/project/gems","0d0"),exact:!0,sidebar:"defaultSidebar"},{path:"/concepts/project/Model",component:p("/concepts/project/Model","b00"),exact:!0,sidebar:"defaultSidebar"},{path:"/concepts/project/pipeline",component:p("/concepts/project/pipeline","502"),exact:!0,sidebar:"defaultSidebar"},{path:"/concepts/teamuser",component:p("/concepts/teamuser","f96"),exact:!0,sidebar:"defaultSidebar"},{path:"/deployment/",component:p("/deployment/","454"),exact:!0,sidebar:"defaultSidebar"},{path:"/deployment/prophecy-build-tool/",component:p("/deployment/prophecy-build-tool/","7e6"),exact:!0,sidebar:"defaultSidebar"},{path:"/deployment/prophecy-build-tool/prophecy-build-tool-github-actions",component:p("/deployment/prophecy-build-tool/prophecy-build-tool-github-actions","db3"),exact:!0,sidebar:"defaultSidebar"},{path:"/deployment/prophecy-build-tool/prophecy-build-tool-jenkins",component:p("/deployment/prophecy-build-tool/prophecy-build-tool-jenkins","d85"),exact:!0,sidebar:"defaultSidebar"},{path:"/deployment/use-external-release-tags",component:p("/deployment/use-external-release-tags","360"),exact:!0,sidebar:"defaultSidebar"},{path:"/feature-matrix",component:p("/feature-matrix","4ae"),exact:!0,sidebar:"defaultSidebar"},{path:"/getting-started/",component:p("/getting-started/","246"),exact:!0,sidebar:"defaultSidebar"},{path:"/getting-started/airflow",component:p("/getting-started/airflow","7c9"),exact:!0,sidebar:"defaultSidebar"},{path:"/getting-started/gen-ai-chatbot",component:p("/getting-started/gen-ai-chatbot","a47"),exact:!0,sidebar:"defaultSidebar"},{path:"/getting-started/getting-help",component:p("/getting-started/getting-help","439"),exact:!0,sidebar:"defaultSidebar"},{path:"/getting-started/spark-with-databricks",component:p("/getting-started/spark-with-databricks","b41"),exact:!0,sidebar:"defaultSidebar"},{path:"/getting-started/sql-with-databricks",component:p("/getting-started/sql-with-databricks","792"),exact:!0,sidebar:"defaultSidebar"},{path:"/getting-started/sql-with-snowflake",component:p("/getting-started/sql-with-snowflake","37c"),exact:!0,sidebar:"defaultSidebar"},{path:"/metadata/",component:p("/metadata/","d5e"),exact:!0,sidebar:"defaultSidebar"},{path:"/metadata/audit-logging",component:p("/metadata/audit-logging","44b"),exact:!0,sidebar:"defaultSidebar"},{path:"/metadata/git/",component:p("/metadata/git/","8b7"),exact:!0,sidebar:"defaultSidebar"},{path:"/metadata/git/git-commit",component:p("/metadata/git/git-commit","828"),exact:!0,sidebar:"defaultSidebar"},{path:"/metadata/git/git-fork",component:p("/metadata/git/git-fork","fae"),exact:!0,sidebar:"defaultSidebar"},{path:"/metadata/git/git-merge",component:p("/metadata/git/git-merge","5d6"),exact:!0,sidebar:"defaultSidebar"},{path:"/metadata/git/git-resolve",component:p("/metadata/git/git-resolve","f74"),exact:!0,sidebar:"defaultSidebar"},{path:"/metadata/lineage/",component:p("/metadata/lineage/","9dc"),exact:!0,sidebar:"defaultSidebar"},{path:"/metadata/lineage/lineage-run-and-diagnose",component:p("/metadata/lineage/lineage-run-and-diagnose","f81"),exact:!0,sidebar:"defaultSidebar"},{path:"/metadata/lineage/lineage-view-and-search",component:p("/metadata/lineage/lineage-view-and-search","2f7"),exact:!0,sidebar:"defaultSidebar"},{path:"/metadata/metadata-connections",component:p("/metadata/metadata-connections","32f"),exact:!0,sidebar:"defaultSidebar"},{path:"/metadata/pr-templates",component:p("/metadata/pr-templates","d36"),exact:!0,sidebar:"defaultSidebar"},{path:"/metadata/Project Metadata",component:p("/metadata/Project Metadata","27e"),exact:!0,sidebar:"defaultSidebar"},{path:"/metadata/prophecyAPI",component:p("/metadata/prophecyAPI","92d"),exact:!0,sidebar:"defaultSidebar"},{path:"/Orchestration/",component:p("/Orchestration/","835"),exact:!0,sidebar:"defaultSidebar"},{path:"/Orchestration/airflow/",component:p("/Orchestration/airflow/","ea0"),exact:!0,sidebar:"defaultSidebar"},{path:"/Orchestration/airflow/setup/",component:p("/Orchestration/airflow/setup/","7d5"),exact:!0,sidebar:"defaultSidebar"},{path:"/Orchestration/airflow/setup/composer_fabric",component:p("/Orchestration/airflow/setup/composer_fabric","61b"),exact:!0,sidebar:"defaultSidebar"},{path:"/Orchestration/airflow/setup/MWAA_fabric",component:p("/Orchestration/airflow/setup/MWAA_fabric","41e"),exact:!0,sidebar:"defaultSidebar"},{path:"/Orchestration/airflow/setup/prophecy-managed/",component:p("/Orchestration/airflow/setup/prophecy-managed/","f5b"),exact:!0,sidebar:"defaultSidebar"},{path:"/Orchestration/airflow/setup/prophecy-managed/connections/",component:p("/Orchestration/airflow/setup/prophecy-managed/connections/","53d"),exact:!0,sidebar:"defaultSidebar"},{path:"/Orchestration/airflow/setup/prophecy-managed/connections/prophecy_managed_airflow_fabric_aws_connections",component:p("/Orchestration/airflow/setup/prophecy-managed/connections/prophecy_managed_airflow_fabric_aws_connections","671"),exact:!0,sidebar:"defaultSidebar"},{path:"/Orchestration/airflow/setup/prophecy-managed/connections/prophecy_managed_airflow_fabric_dbx_spark_connections",component:p("/Orchestration/airflow/setup/prophecy-managed/connections/prophecy_managed_airflow_fabric_dbx_spark_connections","c4b"),exact:!0,sidebar:"defaultSidebar"},{path:"/Orchestration/airflow/setup/prophecy-managed/connections/prophecy_managed_airflow_fabric_dbx_sql_connections",component:p("/Orchestration/airflow/setup/prophecy-managed/connections/prophecy_managed_airflow_fabric_dbx_sql_connections","436"),exact:!0,sidebar:"defaultSidebar"},{path:"/Orchestration/airflow/setup/prophecy-managed/connections/prophecy_managed_airflow_fabric_email_connections",component:p("/Orchestration/airflow/setup/prophecy-managed/connections/prophecy_managed_airflow_fabric_email_connections","587"),exact:!0,sidebar:"defaultSidebar"},{path:"/Orchestration/airflow/setup/prophecy-managed/connections/prophecy_managed_airflow_fabric_snowflake_connections",component:p("/Orchestration/airflow/setup/prophecy-managed/connections/prophecy_managed_airflow_fabric_snowflake_connections","f50"),exact:!0,sidebar:"defaultSidebar"},{path:"/Orchestration/airflow/setup/prophecy-managed/prophecy_managed_airflow_fabric_limits",component:p("/Orchestration/airflow/setup/prophecy-managed/prophecy_managed_airflow_fabric_limits","a92"),exact:!0,sidebar:"defaultSidebar"},{path:"/Orchestration/alternative-schedulers",component:p("/Orchestration/alternative-schedulers","2ca"),exact:!0,sidebar:"defaultSidebar"},{path:"/Orchestration/databricks-jobs",component:p("/Orchestration/databricks-jobs","134"),exact:!0,sidebar:"defaultSidebar"},{path:"/package-hub/",component:p("/package-hub/","950"),exact:!0,sidebar:"defaultSidebar"},{path:"/package-hub/package-builder/",component:p("/package-hub/package-builder/","def"),exact:!0,sidebar:"defaultSidebar"},{path:"/package-hub/package-builder/Gem-builder",component:p("/package-hub/package-builder/Gem-builder","b14"),exact:!0,sidebar:"defaultSidebar"},{path:"/package-hub/package-builder/sharable-udfs",component:p("/package-hub/package-builder/sharable-udfs","fa6"),exact:!0,sidebar:"defaultSidebar"},{path:"/package-hub/package-builder/shareable-datasets",component:p("/package-hub/package-builder/shareable-datasets","0ff"),exact:!0,sidebar:"defaultSidebar"},{path:"/package-hub/package-builder/shareable-pipelines",component:p("/package-hub/package-builder/shareable-pipelines","628"),exact:!0,sidebar:"defaultSidebar"},{path:"/package-hub/package-builder/shareable-subgraphs",component:p("/package-hub/package-builder/shareable-subgraphs","5c3"),exact:!0,sidebar:"defaultSidebar"},{path:"/release_notes/",component:p("/release_notes/","074"),exact:!0,sidebar:"defaultSidebar"},{path:"/release_notes/2023/Apr_2023",component:p("/release_notes/2023/Apr_2023","320"),exact:!0,sidebar:"defaultSidebar"},{path:"/release_notes/2023/August_2023",component:p("/release_notes/2023/August_2023","98b"),exact:!0,sidebar:"defaultSidebar"},{path:"/release_notes/2023/December_2023",component:p("/release_notes/2023/December_2023","b45"),exact:!0,sidebar:"defaultSidebar"},{path:"/release_notes/2023/Feb_2023",component:p("/release_notes/2023/Feb_2023","8d1"),exact:!0,sidebar:"defaultSidebar"},{path:"/release_notes/2023/July_2023",component:p("/release_notes/2023/July_2023","36c"),exact:!0,sidebar:"defaultSidebar"},{path:"/release_notes/2023/June_2023",component:p("/release_notes/2023/June_2023","cf1"),exact:!0,sidebar:"defaultSidebar"},{path:"/release_notes/2023/Mar_2023",component:p("/release_notes/2023/Mar_2023","692"),exact:!0,sidebar:"defaultSidebar"},{path:"/release_notes/2023/May_2023",component:p("/release_notes/2023/May_2023","ddf"),exact:!0,sidebar:"defaultSidebar"},{path:"/release_notes/2023/November_2023",component:p("/release_notes/2023/November_2023","ed1"),exact:!0,sidebar:"defaultSidebar"},{path:"/release_notes/2023/October_2023",component:p("/release_notes/2023/October_2023","4b3"),exact:!0,sidebar:"defaultSidebar"},{path:"/release_notes/2023/September_2023",component:p("/release_notes/2023/September_2023","759"),exact:!0,sidebar:"defaultSidebar"},{path:"/release_notes/2024/April_2024",component:p("/release_notes/2024/April_2024","438"),exact:!0,sidebar:"defaultSidebar"},{path:"/release_notes/2024/August_2024/",component:p("/release_notes/2024/August_2024/","695"),exact:!0,sidebar:"defaultSidebar"},{path:"/release_notes/2024/August_2024/new-ui-sql-onboarding",component:p("/release_notes/2024/August_2024/new-ui-sql-onboarding","325"),exact:!0,sidebar:"defaultSidebar"},{path:"/release_notes/2024/Feb_2024",component:p("/release_notes/2024/Feb_2024","bc5"),exact:!0,sidebar:"defaultSidebar"},{path:"/release_notes/2024/Jan_2024",component:p("/release_notes/2024/Jan_2024","503"),exact:!0,sidebar:"defaultSidebar"},{path:"/release_notes/2024/July_2024",component:p("/release_notes/2024/July_2024","f86"),exact:!0,sidebar:"defaultSidebar"},{path:"/release_notes/2024/June_2024",component:p("/release_notes/2024/June_2024","6ce"),exact:!0,sidebar:"defaultSidebar"},{path:"/release_notes/2024/March_2024",component:p("/release_notes/2024/March_2024","3c9"),exact:!0,sidebar:"defaultSidebar"},{path:"/release_notes/2024/May_2024",component:p("/release_notes/2024/May_2024","88f"),exact:!0,sidebar:"defaultSidebar"},{path:"/release_notes/2024/November_2024",component:p("/release_notes/2024/November_2024","4a2"),exact:!0,sidebar:"defaultSidebar"},{path:"/release_notes/2024/October_2024/",component:p("/release_notes/2024/October_2024/","18c"),exact:!0,sidebar:"defaultSidebar"},{path:"/release_notes/2024/October_2024/webinar_new_features/",component:p("/release_notes/2024/October_2024/webinar_new_features/","266"),exact:!0,sidebar:"defaultSidebar"},{path:"/release_notes/2024/October_2024/webinar_new_features/ai_capabilities",component:p("/release_notes/2024/October_2024/webinar_new_features/ai_capabilities","030"),exact:!0,sidebar:"defaultSidebar"},{path:"/release_notes/2024/October_2024/webinar_new_features/development_highlights",component:p("/release_notes/2024/October_2024/webinar_new_features/development_highlights","552"),exact:!0,sidebar:"defaultSidebar"},{path:"/release_notes/2024/October_2024/webinar_new_features/observability",component:p("/release_notes/2024/October_2024/webinar_new_features/observability","f31"),exact:!0,sidebar:"defaultSidebar"},{path:"/release_notes/2024/September_2024",component:p("/release_notes/2024/September_2024","bef"),exact:!0,sidebar:"defaultSidebar"},{path:"/release_notes/version_chart/",component:p("/release_notes/version_chart/","53d"),exact:!0,sidebar:"defaultSidebar"},{path:"/release_notes/version_chart/versions_support",component:p("/release_notes/version_chart/versions_support","7b2"),exact:!0,sidebar:"defaultSidebar"},{path:"/settings/",component:p("/settings/","b88"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/",component:p("/Spark/","dab"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/best-practices/",component:p("/Spark/best-practices/","c90"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/best-practices/use-dbx-secrets",component:p("/Spark/best-practices/use-dbx-secrets","6fc"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/configuration/",component:p("/Spark/configuration/","fde"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/configuration/conditional-execution",component:p("/Spark/configuration/conditional-execution","76e"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/execution/",component:p("/Spark/execution/","398"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/execution/data-explorer",component:p("/Spark/execution/data-explorer","1c2"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/execution/execution-metrics",component:p("/Spark/execution/execution-metrics","e61"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/execution/executions_on_databricks_clusters",component:p("/Spark/execution/executions_on_databricks_clusters","7ce"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/execution/executions_on_livy_clusters",component:p("/Spark/execution/executions_on_livy_clusters","9b5"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/execution/interactive-execution",component:p("/Spark/execution/interactive-execution","22b"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/expression-builder",component:p("/Spark/expression-builder","bb0"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/extensibility/",component:p("/Spark/extensibility/","246"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/extensibility/dependencies",component:p("/Spark/extensibility/dependencies","a44"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/extensibility/gem-builder/",component:p("/Spark/extensibility/gem-builder/","ad9"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/extensibility/gem-builder/optimization-functions",component:p("/Spark/extensibility/gem-builder/optimization-functions","452"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/extensibility/udfs",component:p("/Spark/extensibility/udfs","c46"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/fabrics/",component:p("/Spark/fabrics/","68a"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/fabrics/azure-synapse-fabric-guide",component:p("/Spark/fabrics/azure-synapse-fabric-guide","457"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/fabrics/databricks-fabric",component:p("/Spark/fabrics/databricks-fabric","91f"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/fabrics/dataproc/",component:p("/Spark/fabrics/dataproc/","d8d"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/fabrics/dataproc/gcp-dataproc-fabric-tips",component:p("/Spark/fabrics/dataproc/gcp-dataproc-fabric-tips","8c3"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/fabrics/emr",component:p("/Spark/fabrics/emr","473"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/fabrics/fabric-diagnostics",component:p("/Spark/fabrics/fabric-diagnostics","18a"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/fabrics/livy",component:p("/Spark/fabrics/livy","d97"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/fabrics/prophecy-managed-databricks",component:p("/Spark/fabrics/prophecy-managed-databricks","8c8"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/",component:p("/Spark/gems/","d64"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/custom/",component:p("/Spark/gems/custom/","d5f"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/custom/delta-ops",component:p("/Spark/gems/custom/delta-ops","1b8"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/custom/file-operations",component:p("/Spark/gems/custom/file-operations","9cd"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/custom/rest-api-enrich",component:p("/Spark/gems/custom/rest-api-enrich","a00"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/custom/script",component:p("/Spark/gems/custom/script","315"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/custom/sql-statement",component:p("/Spark/gems/custom/sql-statement","845"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/join-split/",component:p("/Spark/gems/join-split/","198"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/join-split/compare-columns",component:p("/Spark/gems/join-split/compare-columns","ac1"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/join-split/join",component:p("/Spark/gems/join-split/join","a05"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/join-split/Repartition",component:p("/Spark/gems/join-split/Repartition","0ca"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/join-split/row-distributor",component:p("/Spark/gems/join-split/row-distributor","a71"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/machine-learning/",component:p("/Spark/gems/machine-learning/","2db"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/machine-learning/ml-openai",component:p("/Spark/gems/machine-learning/ml-openai","e8d"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/machine-learning/ml-pinecone-lookup",component:p("/Spark/gems/machine-learning/ml-pinecone-lookup","099"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/machine-learning/ml-text-processing",component:p("/Spark/gems/machine-learning/ml-text-processing","19f"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/source-target/",component:p("/Spark/gems/source-target/","111"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/source-target/advanced/lookup",component:p("/Spark/gems/source-target/advanced/lookup","7ec"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/source-target/advanced/synthetic-data-generator/",component:p("/Spark/gems/source-target/advanced/synthetic-data-generator/","0e9"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/source-target/advanced/synthetic-data-generator/providers",component:p("/Spark/gems/source-target/advanced/synthetic-data-generator/providers","feb"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/source-target/catalog-table/",component:p("/Spark/gems/source-target/catalog-table/","19a"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/source-target/catalog-table/delta",component:p("/Spark/gems/source-target/catalog-table/delta","8bc"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/source-target/catalog-table/hive",component:p("/Spark/gems/source-target/catalog-table/hive","b31"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/source-target/file/",component:p("/Spark/gems/source-target/file/","3a9"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/source-target/file/avro",component:p("/Spark/gems/source-target/file/avro","8cf"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/source-target/file/csv",component:p("/Spark/gems/source-target/file/csv","635"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/source-target/file/delta",component:p("/Spark/gems/source-target/file/delta","e28"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/source-target/file/fixed-format",component:p("/Spark/gems/source-target/file/fixed-format","b05"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/source-target/file/iceberg",component:p("/Spark/gems/source-target/file/iceberg","018"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/source-target/file/json",component:p("/Spark/gems/source-target/file/json","2b9"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/source-target/file/kafka",component:p("/Spark/gems/source-target/file/kafka","b7e"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/source-target/file/orc",component:p("/Spark/gems/source-target/file/orc","b7c"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/source-target/file/parquet",component:p("/Spark/gems/source-target/file/parquet","6a9"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/source-target/file/text",component:p("/Spark/gems/source-target/file/text","891"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/source-target/file/xlsx",component:p("/Spark/gems/source-target/file/xlsx","337"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/source-target/warehouse/",component:p("/Spark/gems/source-target/warehouse/","0c3"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/source-target/warehouse/bigquery",component:p("/Spark/gems/source-target/warehouse/bigquery","759"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/source-target/warehouse/cosmos",component:p("/Spark/gems/source-target/warehouse/cosmos","f35"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/source-target/warehouse/db2",component:p("/Spark/gems/source-target/warehouse/db2","5c7"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/source-target/warehouse/jdbc",component:p("/Spark/gems/source-target/warehouse/jdbc","4b5"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/source-target/warehouse/mongodb",component:p("/Spark/gems/source-target/warehouse/mongodb","8b8"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/source-target/warehouse/oracle",component:p("/Spark/gems/source-target/warehouse/oracle","3a7"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/source-target/warehouse/redshift",component:p("/Spark/gems/source-target/warehouse/redshift","190"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/source-target/warehouse/salesforce",component:p("/Spark/gems/source-target/warehouse/salesforce","3a9"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/source-target/warehouse/snowflake",component:p("/Spark/gems/source-target/warehouse/snowflake","145"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/source-target/warehouse/teradata",component:p("/Spark/gems/source-target/warehouse/teradata","418"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/subgraph/",component:p("/Spark/gems/subgraph/","9b6"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/subgraph/basic-subgraph",component:p("/Spark/gems/subgraph/basic-subgraph","65c"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/subgraph/table-iterator",component:p("/Spark/gems/subgraph/table-iterator","d9f"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/transform/",component:p("/Spark/gems/transform/","a7f"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/transform/aggregate",component:p("/Spark/gems/transform/aggregate","927"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/transform/bulk-column-expressions",component:p("/Spark/gems/transform/bulk-column-expressions","ac3"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/transform/bulk-column-rename",component:p("/Spark/gems/transform/bulk-column-rename","f50"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/transform/data-cleansing",component:p("/Spark/gems/transform/data-cleansing","097"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/transform/deduplicate",component:p("/Spark/gems/transform/deduplicate","0e4"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/transform/dynamic-select",component:p("/Spark/gems/transform/dynamic-select","ea8"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/transform/filter",component:p("/Spark/gems/transform/filter","d86"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/transform/flatten-schema",component:p("/Spark/gems/transform/flatten-schema","65c"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/transform/limit",component:p("/Spark/gems/transform/limit","bc8"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/transform/order-by",component:p("/Spark/gems/transform/order-by","589"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/transform/reformat",component:p("/Spark/gems/transform/reformat","e86"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/transform/schema-transform",component:p("/Spark/gems/transform/schema-transform","7f9"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/transform/set-operation",component:p("/Spark/gems/transform/set-operation","5cf"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/transform/unpivot",component:p("/Spark/gems/transform/unpivot","b8f"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/gems/transform/window-function",component:p("/Spark/gems/transform/window-function","ab0"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/pipeline-monitoring/",component:p("/Spark/pipeline-monitoring/","e92"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/pipeline-monitoring/enable-pipeline-monitoring",component:p("/Spark/pipeline-monitoring/enable-pipeline-monitoring","055"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/pipeline-monitoring/use-pipeline-monitoring",component:p("/Spark/pipeline-monitoring/use-pipeline-monitoring","c8c"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/secret-management/",component:p("/Spark/secret-management/","b87"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/secret-management/databricks-secrets",component:p("/Spark/secret-management/databricks-secrets","75c"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/secret-management/env-variable",component:p("/Spark/secret-management/env-variable","670"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/secret-management/hashicorp-vault",component:p("/Spark/secret-management/hashicorp-vault","8ab"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/secret-management/using-secrets",component:p("/Spark/secret-management/using-secrets","396"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/spark-streaming/",component:p("/Spark/spark-streaming/","f9d"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/spark-streaming/streaming-sources-and-targets/",component:p("/Spark/spark-streaming/streaming-sources-and-targets/","f9f"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/spark-streaming/streaming-sources-and-targets/streaming-event-apps",component:p("/Spark/spark-streaming/streaming-sources-and-targets/streaming-event-apps","8b1"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/spark-streaming/streaming-sources-and-targets/streaming-file-apps",component:p("/Spark/spark-streaming/streaming-sources-and-targets/streaming-file-apps","9a8"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/spark-streaming/streaming-sources-and-targets/streaming-warehouse-apps",component:p("/Spark/spark-streaming/streaming-sources-and-targets/streaming-warehouse-apps","bb3"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/spark-streaming/transformations-streaming",component:p("/Spark/spark-streaming/transformations-streaming","5a8"),exact:!0,sidebar:"defaultSidebar"},{path:"/Spark/tests",component:p("/Spark/tests","ce3"),exact:!0,sidebar:"defaultSidebar"},{path:"/SQL/",component:p("/SQL/","e11"),exact:!0,sidebar:"defaultSidebar"},{path:"/SQL/data-tests/",component:p("/SQL/data-tests/","c67"),exact:!0,sidebar:"defaultSidebar"},{path:"/SQL/data-tests/use-model-tests",component:p("/SQL/data-tests/use-model-tests","aa5"),exact:!0,sidebar:"defaultSidebar"},{path:"/SQL/data-tests/use-project-tests",component:p("/SQL/data-tests/use-project-tests","493"),exact:!0,sidebar:"defaultSidebar"},{path:"/SQL/development/",component:p("/SQL/development/","2a0"),exact:!0,sidebar:"defaultSidebar"},{path:"/SQL/development/code-editor",component:p("/SQL/development/code-editor","676"),exact:!0,sidebar:"defaultSidebar"},{path:"/SQL/development/target-models/",component:p("/SQL/development/target-models/","9f3"),exact:!0,sidebar:"defaultSidebar"},{path:"/SQL/development/target-models/location",component:p("/SQL/development/target-models/location","00a"),exact:!0,sidebar:"defaultSidebar"},{path:"/SQL/development/target-models/schema",component:p("/SQL/development/target-models/schema","a1d"),exact:!0,sidebar:"defaultSidebar"},{path:"/SQL/development/target-models/sql-query",component:p("/SQL/development/target-models/sql-query","4cc"),exact:!0,sidebar:"defaultSidebar"},{path:"/SQL/development/target-models/type-and-format",component:p("/SQL/development/target-models/type-and-format","ae0"),exact:!0,sidebar:"defaultSidebar"},{path:"/SQL/development/target-models/write-options",component:p("/SQL/development/target-models/write-options","61b"),exact:!0,sidebar:"defaultSidebar"},{path:"/SQL/development/visual-editor/",component:p("/SQL/development/visual-editor/","3c7"),exact:!0,sidebar:"defaultSidebar"},{path:"/SQL/development/visual-editor/variant-schema",component:p("/SQL/development/visual-editor/variant-schema","bba"),exact:!0,sidebar:"defaultSidebar"},{path:"/SQL/development/visual-editor/visual-expression-builder/",component:p("/SQL/development/visual-editor/visual-expression-builder/","c22"),exact:!0,sidebar:"defaultSidebar"},{path:"/SQL/development/visual-editor/visual-expression-builder/use-the-expression-builder",component:p("/SQL/development/visual-editor/visual-expression-builder/use-the-expression-builder","49d"),exact:!0,sidebar:"defaultSidebar"},{path:"/SQL/development/visual-editor/visual-expression-builder/visual-expression-builder-reference",component:p("/SQL/development/visual-editor/visual-expression-builder/visual-expression-builder-reference","04c"),exact:!0,sidebar:"defaultSidebar"},{path:"/SQL/execution/",component:p("/SQL/execution/","242"),exact:!0,sidebar:"defaultSidebar"},{path:"/SQL/execution/data-explorer",component:p("/SQL/execution/data-explorer","07a"),exact:!0,sidebar:"defaultSidebar"},{path:"/SQL/extensibility/",component:p("/SQL/extensibility/","ccb"),exact:!0,sidebar:"defaultSidebar"},{path:"/SQL/extensibility/dependencies",component:p("/SQL/extensibility/dependencies","028"),exact:!0,sidebar:"defaultSidebar"},{path:"/SQL/extensibility/gem-builder/",component:p("/SQL/extensibility/gem-builder/","729"),exact:!0,sidebar:"defaultSidebar"},{path:"/SQL/fabrics/",component:p("/SQL/fabrics/","81b"),exact:!0,sidebar:"defaultSidebar"},{path:"/SQL/fabrics/databricks",component:p("/SQL/fabrics/databricks","2ed"),exact:!0,sidebar:"defaultSidebar"},{path:"/SQL/fabrics/snowflake",component:p("/SQL/fabrics/snowflake","1eb"),exact:!0,sidebar:"defaultSidebar"},{path:"/SQL/gems/",component:p("/SQL/gems/","5a4"),exact:!0,sidebar:"defaultSidebar"},{path:"/SQL/gems/custom/",component:p("/SQL/gems/custom/","86e"),exact:!0,sidebar:"defaultSidebar"},{path:"/SQL/gems/data-joins",component:p("/SQL/gems/data-joins","064"),exact:!0,sidebar:"defaultSidebar"},{path:"/SQL/gems/datasources/",component:p("/SQL/gems/datasources/","f26"),exact:!0,sidebar:"defaultSidebar"},{path:"/SQL/gems/datasources/upload-files",component:p("/SQL/gems/datasources/upload-files","7a7"),exact:!0,sidebar:"defaultSidebar"},{path:"/SQL/gems/subgraph/",component:p("/SQL/gems/subgraph/","b08"),exact:!0,sidebar:"defaultSidebar"},{path:"/SQL/gems/transform/",component:p("/SQL/gems/transform/","8a8"),exact:!0,sidebar:"defaultSidebar"},{path:"/SQL/gems/transform/deduplicate",component:p("/SQL/gems/transform/deduplicate","a64"),exact:!0,sidebar:"defaultSidebar"},{path:"/SQL/gems/transform/flattenschema",component:p("/SQL/gems/transform/flattenschema","c89"),exact:!0,sidebar:"defaultSidebar"},{path:"/SQL/gems/transform/sql-aggregate",component:p("/SQL/gems/transform/sql-aggregate","324"),exact:!0,sidebar:"defaultSidebar"},{path:"/tutorials/",component:p("/tutorials/","6b0"),exact:!0,sidebar:"defaultSidebar"},{path:"/tutorials/Orchestration/",component:p("/tutorials/Orchestration/","a5c"),exact:!0,sidebar:"defaultSidebar"},{path:"/tutorials/Orchestration/multi-jobs-trigger",component:p("/tutorials/Orchestration/multi-jobs-trigger","67e"),exact:!0,sidebar:"defaultSidebar"},{path:"/tutorials/Orchestration/reliable-ci-cd",component:p("/tutorials/Orchestration/reliable-ci-cd","75c"),exact:!0,sidebar:"defaultSidebar"},{path:"/tutorials/Spark/",component:p("/tutorials/Spark/","5f5"),exact:!0,sidebar:"defaultSidebar"},{path:"/tutorials/Spark/working-with-excel",component:p("/tutorials/Spark/working-with-excel","dae"),exact:!0,sidebar:"defaultSidebar"},{path:"/tutorials/videos/",component:p("/tutorials/videos/","109"),exact:!0,sidebar:"defaultSidebar"},{path:"/tutorials/videos/design-Pipeline",component:p("/tutorials/videos/design-Pipeline","8e3"),exact:!0,sidebar:"defaultSidebar"},{path:"/tutorials/videos/schedule-Pipeline",component:p("/tutorials/videos/schedule-Pipeline","634"),exact:!0,sidebar:"defaultSidebar"},{path:"/tutorials/videos/test-Pipeline",component:p("/tutorials/videos/test-Pipeline","5ff"),exact:!0,sidebar:"defaultSidebar"}]},{path:"*",component:p("*")}]},6125:(e,t,a)=>{"use strict";a.d(t,{o:()=>r,x:()=>o});var n=a(96540);const r=n.createContext(!1);function o(e){let{children:t}=e;const[a,o]=(0,n.useState)(!1);return(0,n.useEffect)((()=>{o(!0)}),[]),n.createElement(r.Provider,{value:a},t)}},38536:(e,t,a)=>{"use strict";var n=a(96540),r=a(40961),o=a(54625),i=a(80545),s=a(38193);const c=[a(10119),a(26134),a(76294),a(51043),a(71609)];var l=a(35947),d=a(56347),u=a(22831);function p(e){let{children:t}=e;return n.createElement(n.Fragment,null,t)}var f=a(58168),g=a(5260),m=a(44586),b=a(86025),h=a(6342),_=a(69024),v=a(32131),y=a(14090),S=a(2967),k=a(70440),x=a(41463);function w(){const{i18n:{defaultLocale:e,localeConfigs:t}}=(0,m.A)(),a=(0,v.o)();return n.createElement(g.A,null,Object.entries(t).map((e=>{let[t,{htmlLang:r}]=e;return n.createElement("link",{key:t,rel:"alternate",href:a.createUrl({locale:t,fullyQualified:!0}),hrefLang:r})})),n.createElement("link",{rel:"alternate",href:a.createUrl({locale:e,fullyQualified:!0}),hrefLang:"x-default"}))}function E(e){let{permalink:t}=e;const{siteConfig:{url:a}}=(0,m.A)(),r=function(){const{siteConfig:{url:e,baseUrl:t,trailingSlash:a}}=(0,m.A)(),{pathname:n}=(0,d.zy)();return e+(0,k.applyTrailingSlash)((0,b.A)(n),{trailingSlash:a,baseUrl:t})}(),o=t?`${a}${t}`:r;return n.createElement(g.A,null,n.createElement("meta",{property:"og:url",content:o}),n.createElement("link",{rel:"canonical",href:o}))}function A(){const{i18n:{currentLocale:e}}=(0,m.A)(),{metadata:t,image:a}=(0,h.p)();return n.createElement(n.Fragment,null,n.createElement(g.A,null,n.createElement("meta",{name:"twitter:card",content:"summary_large_image"}),n.createElement("body",{className:y.w})),a&&n.createElement(_.be,{image:a}),n.createElement(E,null),n.createElement(w,null),n.createElement(x.A,{tag:S.Cy,locale:e}),n.createElement(g.A,null,t.map(((e,t)=>n.createElement("meta",(0,f.A)({key:t},e))))))}const T=new Map;function C(e){if(T.has(e.pathname))return{...e,pathname:T.get(e.pathname)};if((0,u.u)(l.A,e.pathname).some((e=>{let{route:t}=e;return!0===t.exact})))return T.set(e.pathname,e.pathname),e;const t=e.pathname.trim().replace(/(?:\/index)?\.html$/,"")||"/";return T.set(e.pathname,t),{...e,pathname:t}}var L=a(6125),O=a(26988);function j(e){for(var t=arguments.length,a=new Array(t>1?t-1:0),n=1;n{const n=t.default?.[e]??t[e];return n?.(...a)}));return()=>r.forEach((e=>e?.()))}const P=function(e){let{children:t,location:a,previousLocation:r}=e;return(0,n.useLayoutEffect)((()=>{r!==a&&(!function(e){let{location:t,previousLocation:a}=e;if(!a)return;const n=t.pathname===a.pathname,r=t.hash===a.hash,o=t.search===a.search;if(n&&r&&!o)return;const{hash:i}=t;if(i){const e=decodeURIComponent(i.substring(1)),t=document.getElementById(e);t?.scrollIntoView()}else window.scrollTo(0,0)}({location:a,previousLocation:r}),j("onRouteDidUpdate",{previousLocation:r,location:a}))}),[r,a]),t};function N(e){const t=Array.from(new Set([e,decodeURI(e)])).map((e=>(0,u.u)(l.A,e))).flat();return Promise.all(t.map((e=>e.route.component.preload?.())))}class R extends n.Component{previousLocation;routeUpdateCleanupCb;constructor(e){super(e),this.previousLocation=null,this.routeUpdateCleanupCb=s.A.canUseDOM?j("onRouteUpdate",{previousLocation:null,location:this.props.location}):()=>{},this.state={nextRouteHasLoaded:!0}}shouldComponentUpdate(e,t){if(e.location===this.props.location)return t.nextRouteHasLoaded;const a=e.location;return this.previousLocation=this.props.location,this.setState({nextRouteHasLoaded:!1}),this.routeUpdateCleanupCb=j("onRouteUpdate",{previousLocation:this.previousLocation,location:a}),N(a.pathname).then((()=>{this.routeUpdateCleanupCb(),this.setState({nextRouteHasLoaded:!0})})).catch((e=>{console.warn(e),window.location.reload()})),!1}render(){const{children:e,location:t}=this.props;return n.createElement(P,{previousLocation:this.previousLocation,location:t},n.createElement(d.qh,{location:t,render:()=>e}))}}const I=R,M="__docusaurus-base-url-issue-banner-container",D="__docusaurus-base-url-issue-banner",z="__docusaurus-base-url-issue-banner-suggestion-container",F="__DOCUSAURUS_INSERT_BASEURL_BANNER";function B(e){return`\nwindow['${F}'] = true;\n\ndocument.addEventListener('DOMContentLoaded', maybeInsertBanner);\n\nfunction maybeInsertBanner() {\n var shouldInsert = window['${F}'];\n shouldInsert && insertBanner();\n}\n\nfunction insertBanner() {\n var bannerContainer = document.getElementById('${M}');\n if (!bannerContainer) {\n return;\n }\n var bannerHtml = ${JSON.stringify(function(e){return`\n
    \n

    Your Docusaurus site did not load properly.

    \n

    A very common reason is a wrong site baseUrl configuration.

    \n

    Current configured baseUrl = ${e} ${"/"===e?" (default value)":""}

    \n

    We suggest trying baseUrl =

    \n
    \n`}(e)).replace(/{window[F]=!1}),[]),n.createElement(n.Fragment,null,!s.A.canUseDOM&&n.createElement(g.A,null,n.createElement("script",null,B(e))),n.createElement("div",{id:M}))}function U(){const{siteConfig:{baseUrl:e,baseUrlIssueBanner:t}}=(0,m.A)(),{pathname:a}=(0,d.zy)();return t&&a===e?n.createElement($,null):null}function Q(){const{siteConfig:{favicon:e,title:t,noIndex:a},i18n:{currentLocale:r,localeConfigs:o}}=(0,m.A)(),i=(0,b.A)(e),{htmlLang:s,direction:c}=o[r];return n.createElement(g.A,null,n.createElement("html",{lang:s,dir:c}),n.createElement("title",null,t),n.createElement("meta",{property:"og:title",content:t}),n.createElement("meta",{name:"viewport",content:"width=device-width, initial-scale=1.0"}),a&&n.createElement("meta",{name:"robots",content:"noindex, nofollow"}),e&&n.createElement("link",{rel:"icon",href:i}))}var q=a(67489),H=a(92303);function V(){const e=(0,H.A)();return n.createElement(g.A,null,n.createElement("html",{"data-has-hydrated":e}))}function G(){const e=(0,u.v)(l.A),t=(0,d.zy)();return n.createElement(q.A,null,n.createElement(O.l,null,n.createElement(L.x,null,n.createElement(p,null,n.createElement(Q,null),n.createElement(A,null),n.createElement(U,null),n.createElement(I,{location:C(t)},e)),n.createElement(V,null))))}var W=a(84054);const K=function(e){try{return document.createElement("link").relList.supports(e)}catch{return!1}}("prefetch")?function(e){return new Promise(((t,a)=>{if("undefined"==typeof document)return void a();const n=document.createElement("link");n.setAttribute("rel","prefetch"),n.setAttribute("href",e),n.onload=()=>t(),n.onerror=()=>a();const r=document.getElementsByTagName("head")[0]??document.getElementsByName("script")[0]?.parentNode;r?.appendChild(n)}))}:function(e){return new Promise(((t,a)=>{const n=new XMLHttpRequest;n.open("GET",e,!0),n.withCredentials=!0,n.onload=()=>{200===n.status?t():a()},n.send(null)}))};var Y=a(86921);const X=new Set,Z=new Set,J=()=>navigator.connection?.effectiveType.includes("2g")||navigator.connection?.saveData,ee={prefetch(e){if(!(e=>!J()&&!Z.has(e)&&!X.has(e))(e))return!1;X.add(e);const t=(0,u.u)(l.A,e).flatMap((e=>{return t=e.route.path,Object.entries(W).filter((e=>{let[a]=e;return a.replace(/-[^-]+$/,"")===t})).flatMap((e=>{let[,t]=e;return Object.values((0,Y.A)(t))}));var t}));return Promise.all(t.map((e=>{const t=a.gca(e);return t&&!t.includes("undefined")?K(t).catch((()=>{})):Promise.resolve()})))},preload:e=>!!(e=>!J()&&!Z.has(e))(e)&&(Z.add(e),N(e))},te=Object.freeze(ee);if(s.A.canUseDOM){window.docusaurus=te;const e=r.hydrate;N(window.location.pathname).then((()=>{e(n.createElement(i.vd,null,n.createElement(o.Kd,null,n.createElement(G,null))),document.getElementById("__docusaurus"))}))}},26988:(e,t,a)=>{"use strict";a.d(t,{o:()=>d,l:()=>u});var n=a(96540),r=a(4784);const o=JSON.parse('{"docusaurus-plugin-content-docs":{"default":{"path":"/","versions":[{"name":"current","label":"Next","isLast":true,"path":"/","mainDocId":"index","docs":[{"id":"architecture/architecture","path":"/architecture/","sidebar":"defaultSidebar"},{"id":"architecture/deployment/deployment","path":"/architecture/deployment/","sidebar":"defaultSidebar"},{"id":"architecture/self-hosted/authentication/active_directory","path":"/architecture/self-hosted/authentication/active_directory","sidebar":"defaultSidebar"},{"id":"architecture/self-hosted/authentication/authentication","path":"/architecture/self-hosted/authentication/","sidebar":"defaultSidebar"},{"id":"architecture/self-hosted/authentication/azure-ad","path":"/architecture/self-hosted/authentication/azure-ad","sidebar":"defaultSidebar"},{"id":"architecture/self-hosted/authentication/azuread-scim","path":"/architecture/self-hosted/authentication/azuread-scim","sidebar":"defaultSidebar"},{"id":"architecture/self-hosted/authentication/saml-okta","path":"/architecture/self-hosted/authentication/saml-okta","sidebar":"defaultSidebar"},{"id":"architecture/self-hosted/authentication/security-settings","path":"/architecture/self-hosted/authentication/security-settings","sidebar":"defaultSidebar"},{"id":"architecture/self-hosted/configurations/configurations","path":"/architecture/self-hosted/configurations/","sidebar":"defaultSidebar"},{"id":"architecture/self-hosted/configurations/configure-alerts","path":"/architecture/self-hosted/configurations/configure-alerts","sidebar":"defaultSidebar"},{"id":"architecture/self-hosted/configurations/configure-audit-logs","path":"/architecture/self-hosted/configurations/configure-audit-logs","sidebar":"defaultSidebar"},{"id":"architecture/self-hosted/configurations/configure-object-store","path":"/architecture/self-hosted/configurations/configure-object-store","sidebar":"defaultSidebar"},{"id":"architecture/self-hosted/configurations/sandbox-configuration","path":"/architecture/self-hosted/configurations/sandbox-configuration","sidebar":"defaultSidebar"},{"id":"architecture/self-hosted/download-logs","path":"/architecture/self-hosted/download-logs","sidebar":"defaultSidebar"},{"id":"architecture/self-hosted/generate-api-key","path":"/architecture/self-hosted/generate-api-key","sidebar":"defaultSidebar"},{"id":"architecture/self-hosted/installation-helm/install-on-aws","path":"/architecture/self-hosted/installation-helm/install-on-aws","sidebar":"defaultSidebar"},{"id":"architecture/self-hosted/installation-helm/installation-helm","path":"/architecture/self-hosted/installation-helm/","sidebar":"defaultSidebar"},{"id":"architecture/self-hosted/self-hosted","path":"/architecture/self-hosted/","sidebar":"defaultSidebar"},{"id":"architecture/self-hosted/upgrade-backup-restore","path":"/architecture/self-hosted/upgrade-backup-restore","sidebar":"defaultSidebar"},{"id":"concepts/copilot/copilot","path":"/concepts/copilot/","sidebar":"defaultSidebar"},{"id":"concepts/copilot/copilot-ai-capabilities","path":"/concepts/copilot/copilot-ai-capabilities","sidebar":"defaultSidebar"},{"id":"concepts/copilot/copilot-data-privacy","path":"/concepts/copilot/copilot-data-privacy","sidebar":"defaultSidebar"},{"id":"concepts/copilot/enable-data-copilot","path":"/concepts/copilot/enable-data-copilot","sidebar":"defaultSidebar"},{"id":"concepts/dataset","path":"/concepts/dataset","sidebar":"defaultSidebar"},{"id":"concepts/fabrics/Fabric","path":"/concepts/fabrics/","sidebar":"defaultSidebar"},{"id":"concepts/fabrics/prophecy-libraries","path":"/concepts/fabrics/prophecy-libraries","sidebar":"defaultSidebar"},{"id":"concepts/key-concepts","path":"/concepts/","sidebar":"defaultSidebar"},{"id":"concepts/project/gems","path":"/concepts/project/gems","sidebar":"defaultSidebar"},{"id":"concepts/project/Model","path":"/concepts/project/Model","sidebar":"defaultSidebar"},{"id":"concepts/project/pipeline","path":"/concepts/project/pipeline","sidebar":"defaultSidebar"},{"id":"concepts/project/project","path":"/concepts/project/","sidebar":"defaultSidebar"},{"id":"concepts/teamuser","path":"/concepts/teamuser","sidebar":"defaultSidebar"},{"id":"deployment/Deployment","path":"/deployment/","sidebar":"defaultSidebar"},{"id":"deployment/prophecy-build-tool/prophecy-build-tool","path":"/deployment/prophecy-build-tool/","sidebar":"defaultSidebar"},{"id":"deployment/prophecy-build-tool/prophecy-build-tool-github-actions","path":"/deployment/prophecy-build-tool/prophecy-build-tool-github-actions","sidebar":"defaultSidebar"},{"id":"deployment/prophecy-build-tool/prophecy-build-tool-jenkins","path":"/deployment/prophecy-build-tool/prophecy-build-tool-jenkins","sidebar":"defaultSidebar"},{"id":"deployment/use-external-release-tags","path":"/deployment/use-external-release-tags","sidebar":"defaultSidebar"},{"id":"feature-matrix","path":"/feature-matrix","sidebar":"defaultSidebar"},{"id":"getting-started/airflow","path":"/getting-started/airflow","sidebar":"defaultSidebar"},{"id":"getting-started/gen-ai-chatbot","path":"/getting-started/gen-ai-chatbot","sidebar":"defaultSidebar"},{"id":"getting-started/getting-help","path":"/getting-started/getting-help","sidebar":"defaultSidebar"},{"id":"getting-started/getting-started","path":"/getting-started/","sidebar":"defaultSidebar"},{"id":"getting-started/spark-with-databricks","path":"/getting-started/spark-with-databricks","sidebar":"defaultSidebar"},{"id":"getting-started/sql-with-databricks","path":"/getting-started/sql-with-databricks","sidebar":"defaultSidebar"},{"id":"getting-started/sql-with-snowflake","path":"/getting-started/sql-with-snowflake","sidebar":"defaultSidebar"},{"id":"index","path":"/","sidebar":"defaultSidebar"},{"id":"metadata/audit-logging","path":"/metadata/audit-logging","sidebar":"defaultSidebar"},{"id":"metadata/git/Git","path":"/metadata/git/","sidebar":"defaultSidebar"},{"id":"metadata/git/git-commit","path":"/metadata/git/git-commit","sidebar":"defaultSidebar"},{"id":"metadata/git/git-fork","path":"/metadata/git/git-fork","sidebar":"defaultSidebar"},{"id":"metadata/git/git-merge","path":"/metadata/git/git-merge","sidebar":"defaultSidebar"},{"id":"metadata/git/git-resolve","path":"/metadata/git/git-resolve","sidebar":"defaultSidebar"},{"id":"metadata/lineage/lineage","path":"/metadata/lineage/","sidebar":"defaultSidebar"},{"id":"metadata/lineage/lineage-run-and-diagnose","path":"/metadata/lineage/lineage-run-and-diagnose","sidebar":"defaultSidebar"},{"id":"metadata/lineage/lineage-view-and-search","path":"/metadata/lineage/lineage-view-and-search","sidebar":"defaultSidebar"},{"id":"metadata/metadata","path":"/metadata/","sidebar":"defaultSidebar"},{"id":"metadata/metadata-connections","path":"/metadata/metadata-connections","sidebar":"defaultSidebar"},{"id":"metadata/pr-templates","path":"/metadata/pr-templates","sidebar":"defaultSidebar"},{"id":"metadata/Project Metadata","path":"/metadata/Project Metadata","sidebar":"defaultSidebar"},{"id":"metadata/prophecyAPI","path":"/metadata/prophecyAPI","sidebar":"defaultSidebar"},{"id":"Orchestration/airflow/airflow","path":"/Orchestration/airflow/","sidebar":"defaultSidebar"},{"id":"Orchestration/airflow/setup/composer_fabric","path":"/Orchestration/airflow/setup/composer_fabric","sidebar":"defaultSidebar"},{"id":"Orchestration/airflow/setup/MWAA_fabric","path":"/Orchestration/airflow/setup/MWAA_fabric","sidebar":"defaultSidebar"},{"id":"Orchestration/airflow/setup/prophecy-managed/connections/prophecy_managed_airflow_fabric_aws_connections","path":"/Orchestration/airflow/setup/prophecy-managed/connections/prophecy_managed_airflow_fabric_aws_connections","sidebar":"defaultSidebar"},{"id":"Orchestration/airflow/setup/prophecy-managed/connections/prophecy_managed_airflow_fabric_connections","path":"/Orchestration/airflow/setup/prophecy-managed/connections/","sidebar":"defaultSidebar"},{"id":"Orchestration/airflow/setup/prophecy-managed/connections/prophecy_managed_airflow_fabric_dbx_spark_connections","path":"/Orchestration/airflow/setup/prophecy-managed/connections/prophecy_managed_airflow_fabric_dbx_spark_connections","sidebar":"defaultSidebar"},{"id":"Orchestration/airflow/setup/prophecy-managed/connections/prophecy_managed_airflow_fabric_dbx_sql_connections","path":"/Orchestration/airflow/setup/prophecy-managed/connections/prophecy_managed_airflow_fabric_dbx_sql_connections","sidebar":"defaultSidebar"},{"id":"Orchestration/airflow/setup/prophecy-managed/connections/prophecy_managed_airflow_fabric_email_connections","path":"/Orchestration/airflow/setup/prophecy-managed/connections/prophecy_managed_airflow_fabric_email_connections","sidebar":"defaultSidebar"},{"id":"Orchestration/airflow/setup/prophecy-managed/connections/prophecy_managed_airflow_fabric_snowflake_connections","path":"/Orchestration/airflow/setup/prophecy-managed/connections/prophecy_managed_airflow_fabric_snowflake_connections","sidebar":"defaultSidebar"},{"id":"Orchestration/airflow/setup/prophecy-managed/prophecy_managed_airflow_fabric","path":"/Orchestration/airflow/setup/prophecy-managed/","sidebar":"defaultSidebar"},{"id":"Orchestration/airflow/setup/prophecy-managed/prophecy_managed_airflow_fabric_limits","path":"/Orchestration/airflow/setup/prophecy-managed/prophecy_managed_airflow_fabric_limits","sidebar":"defaultSidebar"},{"id":"Orchestration/airflow/setup/setup_airflow","path":"/Orchestration/airflow/setup/","sidebar":"defaultSidebar"},{"id":"Orchestration/alternative-schedulers","path":"/Orchestration/alternative-schedulers","sidebar":"defaultSidebar"},{"id":"Orchestration/databricks-jobs","path":"/Orchestration/databricks-jobs","sidebar":"defaultSidebar"},{"id":"Orchestration/Orchestration","path":"/Orchestration/","sidebar":"defaultSidebar"},{"id":"package-hub/package-builder/Gem-builder","path":"/package-hub/package-builder/Gem-builder","sidebar":"defaultSidebar"},{"id":"package-hub/package-builder/Package-builder","path":"/package-hub/package-builder/","sidebar":"defaultSidebar"},{"id":"package-hub/package-builder/sharable-udfs","path":"/package-hub/package-builder/sharable-udfs","sidebar":"defaultSidebar"},{"id":"package-hub/package-builder/shareable-datasets","path":"/package-hub/package-builder/shareable-datasets","sidebar":"defaultSidebar"},{"id":"package-hub/package-builder/shareable-pipelines","path":"/package-hub/package-builder/shareable-pipelines","sidebar":"defaultSidebar"},{"id":"package-hub/package-builder/shareable-subgraphs","path":"/package-hub/package-builder/shareable-subgraphs","sidebar":"defaultSidebar"},{"id":"package-hub/package-hub","path":"/package-hub/","sidebar":"defaultSidebar"},{"id":"release_notes/2023/Apr_2023","path":"/release_notes/2023/Apr_2023","sidebar":"defaultSidebar"},{"id":"release_notes/2023/August_2023","path":"/release_notes/2023/August_2023","sidebar":"defaultSidebar"},{"id":"release_notes/2023/December_2023","path":"/release_notes/2023/December_2023","sidebar":"defaultSidebar"},{"id":"release_notes/2023/Feb_2023","path":"/release_notes/2023/Feb_2023","sidebar":"defaultSidebar"},{"id":"release_notes/2023/July_2023","path":"/release_notes/2023/July_2023","sidebar":"defaultSidebar"},{"id":"release_notes/2023/June_2023","path":"/release_notes/2023/June_2023","sidebar":"defaultSidebar"},{"id":"release_notes/2023/Mar_2023","path":"/release_notes/2023/Mar_2023","sidebar":"defaultSidebar"},{"id":"release_notes/2023/May_2023","path":"/release_notes/2023/May_2023","sidebar":"defaultSidebar"},{"id":"release_notes/2023/November_2023","path":"/release_notes/2023/November_2023","sidebar":"defaultSidebar"},{"id":"release_notes/2023/October_2023","path":"/release_notes/2023/October_2023","sidebar":"defaultSidebar"},{"id":"release_notes/2023/September_2023","path":"/release_notes/2023/September_2023","sidebar":"defaultSidebar"},{"id":"release_notes/2024/April_2024","path":"/release_notes/2024/April_2024","sidebar":"defaultSidebar"},{"id":"release_notes/2024/August_2024/August_2024","path":"/release_notes/2024/August_2024/","sidebar":"defaultSidebar"},{"id":"release_notes/2024/August_2024/new-ui-sql-onboarding","path":"/release_notes/2024/August_2024/new-ui-sql-onboarding","sidebar":"defaultSidebar"},{"id":"release_notes/2024/Feb_2024","path":"/release_notes/2024/Feb_2024","sidebar":"defaultSidebar"},{"id":"release_notes/2024/Jan_2024","path":"/release_notes/2024/Jan_2024","sidebar":"defaultSidebar"},{"id":"release_notes/2024/July_2024","path":"/release_notes/2024/July_2024","sidebar":"defaultSidebar"},{"id":"release_notes/2024/June_2024","path":"/release_notes/2024/June_2024","sidebar":"defaultSidebar"},{"id":"release_notes/2024/March_2024","path":"/release_notes/2024/March_2024","sidebar":"defaultSidebar"},{"id":"release_notes/2024/May_2024","path":"/release_notes/2024/May_2024","sidebar":"defaultSidebar"},{"id":"release_notes/2024/November_2024","path":"/release_notes/2024/November_2024","sidebar":"defaultSidebar"},{"id":"release_notes/2024/October_2024/October_2024","path":"/release_notes/2024/October_2024/","sidebar":"defaultSidebar"},{"id":"release_notes/2024/October_2024/webinar_new_features/ai_capabilities","path":"/release_notes/2024/October_2024/webinar_new_features/ai_capabilities","sidebar":"defaultSidebar"},{"id":"release_notes/2024/October_2024/webinar_new_features/development_highlights","path":"/release_notes/2024/October_2024/webinar_new_features/development_highlights","sidebar":"defaultSidebar"},{"id":"release_notes/2024/October_2024/webinar_new_features/observability","path":"/release_notes/2024/October_2024/webinar_new_features/observability","sidebar":"defaultSidebar"},{"id":"release_notes/2024/October_2024/webinar_new_features/webinar_new_features","path":"/release_notes/2024/October_2024/webinar_new_features/","sidebar":"defaultSidebar"},{"id":"release_notes/2024/September_2024","path":"/release_notes/2024/September_2024","sidebar":"defaultSidebar"},{"id":"release_notes/release_notes","path":"/release_notes/","sidebar":"defaultSidebar"},{"id":"release_notes/version_chart/version_chart","path":"/release_notes/version_chart/","sidebar":"defaultSidebar"},{"id":"release_notes/version_chart/versions_support","path":"/release_notes/version_chart/versions_support","sidebar":"defaultSidebar"},{"id":"settings/settings","path":"/settings/","sidebar":"defaultSidebar"},{"id":"Spark/best-practices/best-practices-spark","path":"/Spark/best-practices/","sidebar":"defaultSidebar"},{"id":"Spark/best-practices/use-dbx-secrets","path":"/Spark/best-practices/use-dbx-secrets","sidebar":"defaultSidebar"},{"id":"Spark/configuration/conditional-execution","path":"/Spark/configuration/conditional-execution","sidebar":"defaultSidebar"},{"id":"Spark/configuration/configuration","path":"/Spark/configuration/","sidebar":"defaultSidebar"},{"id":"Spark/copilot-for-spark-users","path":"/Spark/","sidebar":"defaultSidebar"},{"id":"Spark/execution/data-explorer","path":"/Spark/execution/data-explorer","sidebar":"defaultSidebar"},{"id":"Spark/execution/execution","path":"/Spark/execution/","sidebar":"defaultSidebar"},{"id":"Spark/execution/execution-metrics","path":"/Spark/execution/execution-metrics","sidebar":"defaultSidebar"},{"id":"Spark/execution/executions_on_databricks_clusters","path":"/Spark/execution/executions_on_databricks_clusters","sidebar":"defaultSidebar"},{"id":"Spark/execution/executions_on_livy_clusters","path":"/Spark/execution/executions_on_livy_clusters","sidebar":"defaultSidebar"},{"id":"Spark/execution/interactive-execution","path":"/Spark/execution/interactive-execution","sidebar":"defaultSidebar"},{"id":"Spark/expression-builder","path":"/Spark/expression-builder","sidebar":"defaultSidebar"},{"id":"Spark/extensibility/dependencies","path":"/Spark/extensibility/dependencies","sidebar":"defaultSidebar"},{"id":"Spark/extensibility/extensibility","path":"/Spark/extensibility/","sidebar":"defaultSidebar"},{"id":"Spark/extensibility/gem-builder/gem-builder","path":"/Spark/extensibility/gem-builder/","sidebar":"defaultSidebar"},{"id":"Spark/extensibility/gem-builder/optimization-functions","path":"/Spark/extensibility/gem-builder/optimization-functions","sidebar":"defaultSidebar"},{"id":"Spark/extensibility/udfs","path":"/Spark/extensibility/udfs","sidebar":"defaultSidebar"},{"id":"Spark/fabrics/azure-synapse-fabric-guide","path":"/Spark/fabrics/azure-synapse-fabric-guide","sidebar":"defaultSidebar"},{"id":"Spark/fabrics/databricks-fabric","path":"/Spark/fabrics/databricks-fabric","sidebar":"defaultSidebar"},{"id":"Spark/fabrics/dataproc/gcp-dataproc-fabric-guide","path":"/Spark/fabrics/dataproc/","sidebar":"defaultSidebar"},{"id":"Spark/fabrics/dataproc/gcp-dataproc-fabric-tips","path":"/Spark/fabrics/dataproc/gcp-dataproc-fabric-tips","sidebar":"defaultSidebar"},{"id":"Spark/fabrics/emr","path":"/Spark/fabrics/emr","sidebar":"defaultSidebar"},{"id":"Spark/fabrics/fabric-diagnostics","path":"/Spark/fabrics/fabric-diagnostics","sidebar":"defaultSidebar"},{"id":"Spark/fabrics/Fabrics","path":"/Spark/fabrics/","sidebar":"defaultSidebar"},{"id":"Spark/fabrics/livy","path":"/Spark/fabrics/livy","sidebar":"defaultSidebar"},{"id":"Spark/fabrics/prophecy-managed-databricks","path":"/Spark/fabrics/prophecy-managed-databricks","sidebar":"defaultSidebar"},{"id":"Spark/gems/custom/custom","path":"/Spark/gems/custom/","sidebar":"defaultSidebar"},{"id":"Spark/gems/custom/delta-ops","path":"/Spark/gems/custom/delta-ops","sidebar":"defaultSidebar"},{"id":"Spark/gems/custom/file-operations","path":"/Spark/gems/custom/file-operations","sidebar":"defaultSidebar"},{"id":"Spark/gems/custom/rest-api-enrich","path":"/Spark/gems/custom/rest-api-enrich","sidebar":"defaultSidebar"},{"id":"Spark/gems/custom/script","path":"/Spark/gems/custom/script","sidebar":"defaultSidebar"},{"id":"Spark/gems/custom/sql-statement","path":"/Spark/gems/custom/sql-statement","sidebar":"defaultSidebar"},{"id":"Spark/gems/join-split/compare-columns","path":"/Spark/gems/join-split/compare-columns","sidebar":"defaultSidebar"},{"id":"Spark/gems/join-split/join","path":"/Spark/gems/join-split/join","sidebar":"defaultSidebar"},{"id":"Spark/gems/join-split/join-split","path":"/Spark/gems/join-split/","sidebar":"defaultSidebar"},{"id":"Spark/gems/join-split/Repartition","path":"/Spark/gems/join-split/Repartition","sidebar":"defaultSidebar"},{"id":"Spark/gems/join-split/row-distributor","path":"/Spark/gems/join-split/row-distributor","sidebar":"defaultSidebar"},{"id":"Spark/gems/machine-learning/Machine Learning","path":"/Spark/gems/machine-learning/","sidebar":"defaultSidebar"},{"id":"Spark/gems/machine-learning/ml-openai","path":"/Spark/gems/machine-learning/ml-openai","sidebar":"defaultSidebar"},{"id":"Spark/gems/machine-learning/ml-pinecone-lookup","path":"/Spark/gems/machine-learning/ml-pinecone-lookup","sidebar":"defaultSidebar"},{"id":"Spark/gems/machine-learning/ml-text-processing","path":"/Spark/gems/machine-learning/ml-text-processing","sidebar":"defaultSidebar"},{"id":"Spark/gems/source-target/advanced/lookup","path":"/Spark/gems/source-target/advanced/lookup","sidebar":"defaultSidebar"},{"id":"Spark/gems/source-target/advanced/synthetic-data-generator/data-generator","path":"/Spark/gems/source-target/advanced/synthetic-data-generator/","sidebar":"defaultSidebar"},{"id":"Spark/gems/source-target/advanced/synthetic-data-generator/providers","path":"/Spark/gems/source-target/advanced/synthetic-data-generator/providers","sidebar":"defaultSidebar"},{"id":"Spark/gems/source-target/catalog-table/catalog-table","path":"/Spark/gems/source-target/catalog-table/","sidebar":"defaultSidebar"},{"id":"Spark/gems/source-target/catalog-table/delta","path":"/Spark/gems/source-target/catalog-table/delta","sidebar":"defaultSidebar"},{"id":"Spark/gems/source-target/catalog-table/hive","path":"/Spark/gems/source-target/catalog-table/hive","sidebar":"defaultSidebar"},{"id":"Spark/gems/source-target/file/avro","path":"/Spark/gems/source-target/file/avro","sidebar":"defaultSidebar"},{"id":"Spark/gems/source-target/file/csv","path":"/Spark/gems/source-target/file/csv","sidebar":"defaultSidebar"},{"id":"Spark/gems/source-target/file/delta","path":"/Spark/gems/source-target/file/delta","sidebar":"defaultSidebar"},{"id":"Spark/gems/source-target/file/file","path":"/Spark/gems/source-target/file/","sidebar":"defaultSidebar"},{"id":"Spark/gems/source-target/file/fixed-format","path":"/Spark/gems/source-target/file/fixed-format","sidebar":"defaultSidebar"},{"id":"Spark/gems/source-target/file/iceberg","path":"/Spark/gems/source-target/file/iceberg","sidebar":"defaultSidebar"},{"id":"Spark/gems/source-target/file/json","path":"/Spark/gems/source-target/file/json","sidebar":"defaultSidebar"},{"id":"Spark/gems/source-target/file/kafka","path":"/Spark/gems/source-target/file/kafka","sidebar":"defaultSidebar"},{"id":"Spark/gems/source-target/file/orc","path":"/Spark/gems/source-target/file/orc","sidebar":"defaultSidebar"},{"id":"Spark/gems/source-target/file/parquet","path":"/Spark/gems/source-target/file/parquet","sidebar":"defaultSidebar"},{"id":"Spark/gems/source-target/file/text","path":"/Spark/gems/source-target/file/text","sidebar":"defaultSidebar"},{"id":"Spark/gems/source-target/file/xlsx","path":"/Spark/gems/source-target/file/xlsx","sidebar":"defaultSidebar"},{"id":"Spark/gems/source-target/source-target","path":"/Spark/gems/source-target/","sidebar":"defaultSidebar"},{"id":"Spark/gems/source-target/warehouse/bigquery","path":"/Spark/gems/source-target/warehouse/bigquery","sidebar":"defaultSidebar"},{"id":"Spark/gems/source-target/warehouse/cosmos","path":"/Spark/gems/source-target/warehouse/cosmos","sidebar":"defaultSidebar"},{"id":"Spark/gems/source-target/warehouse/db2","path":"/Spark/gems/source-target/warehouse/db2","sidebar":"defaultSidebar"},{"id":"Spark/gems/source-target/warehouse/jdbc","path":"/Spark/gems/source-target/warehouse/jdbc","sidebar":"defaultSidebar"},{"id":"Spark/gems/source-target/warehouse/mongodb","path":"/Spark/gems/source-target/warehouse/mongodb","sidebar":"defaultSidebar"},{"id":"Spark/gems/source-target/warehouse/oracle","path":"/Spark/gems/source-target/warehouse/oracle","sidebar":"defaultSidebar"},{"id":"Spark/gems/source-target/warehouse/redshift","path":"/Spark/gems/source-target/warehouse/redshift","sidebar":"defaultSidebar"},{"id":"Spark/gems/source-target/warehouse/salesforce","path":"/Spark/gems/source-target/warehouse/salesforce","sidebar":"defaultSidebar"},{"id":"Spark/gems/source-target/warehouse/snowflake","path":"/Spark/gems/source-target/warehouse/snowflake","sidebar":"defaultSidebar"},{"id":"Spark/gems/source-target/warehouse/teradata","path":"/Spark/gems/source-target/warehouse/teradata","sidebar":"defaultSidebar"},{"id":"Spark/gems/source-target/warehouse/warehouse","path":"/Spark/gems/source-target/warehouse/","sidebar":"defaultSidebar"},{"id":"Spark/gems/spark-gems","path":"/Spark/gems/","sidebar":"defaultSidebar"},{"id":"Spark/gems/subgraph/basic-subgraph","path":"/Spark/gems/subgraph/basic-subgraph","sidebar":"defaultSidebar"},{"id":"Spark/gems/subgraph/subgraph","path":"/Spark/gems/subgraph/","sidebar":"defaultSidebar"},{"id":"Spark/gems/subgraph/table-iterator","path":"/Spark/gems/subgraph/table-iterator","sidebar":"defaultSidebar"},{"id":"Spark/gems/transform/aggregate","path":"/Spark/gems/transform/aggregate","sidebar":"defaultSidebar"},{"id":"Spark/gems/transform/bulk-column-expressions","path":"/Spark/gems/transform/bulk-column-expressions","sidebar":"defaultSidebar"},{"id":"Spark/gems/transform/bulk-column-rename","path":"/Spark/gems/transform/bulk-column-rename","sidebar":"defaultSidebar"},{"id":"Spark/gems/transform/data-cleansing","path":"/Spark/gems/transform/data-cleansing","sidebar":"defaultSidebar"},{"id":"Spark/gems/transform/deduplicate","path":"/Spark/gems/transform/deduplicate","sidebar":"defaultSidebar"},{"id":"Spark/gems/transform/dynamic-select","path":"/Spark/gems/transform/dynamic-select","sidebar":"defaultSidebar"},{"id":"Spark/gems/transform/filter","path":"/Spark/gems/transform/filter","sidebar":"defaultSidebar"},{"id":"Spark/gems/transform/flatten-schema","path":"/Spark/gems/transform/flatten-schema","sidebar":"defaultSidebar"},{"id":"Spark/gems/transform/limit","path":"/Spark/gems/transform/limit","sidebar":"defaultSidebar"},{"id":"Spark/gems/transform/order-by","path":"/Spark/gems/transform/order-by","sidebar":"defaultSidebar"},{"id":"Spark/gems/transform/reformat","path":"/Spark/gems/transform/reformat","sidebar":"defaultSidebar"},{"id":"Spark/gems/transform/schema-transform","path":"/Spark/gems/transform/schema-transform","sidebar":"defaultSidebar"},{"id":"Spark/gems/transform/set-operation","path":"/Spark/gems/transform/set-operation","sidebar":"defaultSidebar"},{"id":"Spark/gems/transform/transform","path":"/Spark/gems/transform/","sidebar":"defaultSidebar"},{"id":"Spark/gems/transform/unpivot","path":"/Spark/gems/transform/unpivot","sidebar":"defaultSidebar"},{"id":"Spark/gems/transform/window-function","path":"/Spark/gems/transform/window-function","sidebar":"defaultSidebar"},{"id":"Spark/pipeline-monitoring/enable-pipeline-monitoring","path":"/Spark/pipeline-monitoring/enable-pipeline-monitoring","sidebar":"defaultSidebar"},{"id":"Spark/pipeline-monitoring/pipeline-monitoring","path":"/Spark/pipeline-monitoring/","sidebar":"defaultSidebar"},{"id":"Spark/pipeline-monitoring/use-pipeline-monitoring","path":"/Spark/pipeline-monitoring/use-pipeline-monitoring","sidebar":"defaultSidebar"},{"id":"Spark/secret-management/databricks-secrets","path":"/Spark/secret-management/databricks-secrets","sidebar":"defaultSidebar"},{"id":"Spark/secret-management/env-variable","path":"/Spark/secret-management/env-variable","sidebar":"defaultSidebar"},{"id":"Spark/secret-management/hashicorp-vault","path":"/Spark/secret-management/hashicorp-vault","sidebar":"defaultSidebar"},{"id":"Spark/secret-management/secret-management-spark","path":"/Spark/secret-management/","sidebar":"defaultSidebar"},{"id":"Spark/secret-management/using-secrets","path":"/Spark/secret-management/using-secrets","sidebar":"defaultSidebar"},{"id":"Spark/spark-streaming/streaming","path":"/Spark/spark-streaming/","sidebar":"defaultSidebar"},{"id":"Spark/spark-streaming/streaming-sources-and-targets/streaming-event-apps","path":"/Spark/spark-streaming/streaming-sources-and-targets/streaming-event-apps","sidebar":"defaultSidebar"},{"id":"Spark/spark-streaming/streaming-sources-and-targets/streaming-file-apps","path":"/Spark/spark-streaming/streaming-sources-and-targets/streaming-file-apps","sidebar":"defaultSidebar"},{"id":"Spark/spark-streaming/streaming-sources-and-targets/streaming-sources-and-targets","path":"/Spark/spark-streaming/streaming-sources-and-targets/","sidebar":"defaultSidebar"},{"id":"Spark/spark-streaming/streaming-sources-and-targets/streaming-warehouse-apps","path":"/Spark/spark-streaming/streaming-sources-and-targets/streaming-warehouse-apps","sidebar":"defaultSidebar"},{"id":"Spark/spark-streaming/transformations-streaming","path":"/Spark/spark-streaming/transformations-streaming","sidebar":"defaultSidebar"},{"id":"Spark/tests","path":"/Spark/tests","sidebar":"defaultSidebar"},{"id":"SQL/copilot-for-sql-users","path":"/SQL/","sidebar":"defaultSidebar"},{"id":"SQL/data-tests/data-tests","path":"/SQL/data-tests/","sidebar":"defaultSidebar"},{"id":"SQL/data-tests/use-model-tests","path":"/SQL/data-tests/use-model-tests","sidebar":"defaultSidebar"},{"id":"SQL/data-tests/use-project-tests","path":"/SQL/data-tests/use-project-tests","sidebar":"defaultSidebar"},{"id":"SQL/development/code-editor","path":"/SQL/development/code-editor","sidebar":"defaultSidebar"},{"id":"SQL/development/development","path":"/SQL/development/","sidebar":"defaultSidebar"},{"id":"SQL/development/target-models/location","path":"/SQL/development/target-models/location","sidebar":"defaultSidebar"},{"id":"SQL/development/target-models/schema","path":"/SQL/development/target-models/schema","sidebar":"defaultSidebar"},{"id":"SQL/development/target-models/sql-query","path":"/SQL/development/target-models/sql-query","sidebar":"defaultSidebar"},{"id":"SQL/development/target-models/target-models","path":"/SQL/development/target-models/","sidebar":"defaultSidebar"},{"id":"SQL/development/target-models/type-and-format","path":"/SQL/development/target-models/type-and-format","sidebar":"defaultSidebar"},{"id":"SQL/development/target-models/write-options","path":"/SQL/development/target-models/write-options","sidebar":"defaultSidebar"},{"id":"SQL/development/visual-editor/variant-schema","path":"/SQL/development/visual-editor/variant-schema","sidebar":"defaultSidebar"},{"id":"SQL/development/visual-editor/visual-editor","path":"/SQL/development/visual-editor/","sidebar":"defaultSidebar"},{"id":"SQL/development/visual-editor/visual-expression-builder/use-the-expression-builder","path":"/SQL/development/visual-editor/visual-expression-builder/use-the-expression-builder","sidebar":"defaultSidebar"},{"id":"SQL/development/visual-editor/visual-expression-builder/visual-expression-builder","path":"/SQL/development/visual-editor/visual-expression-builder/","sidebar":"defaultSidebar"},{"id":"SQL/development/visual-editor/visual-expression-builder/visual-expression-builder-reference","path":"/SQL/development/visual-editor/visual-expression-builder/visual-expression-builder-reference","sidebar":"defaultSidebar"},{"id":"SQL/execution/data-explorer","path":"/SQL/execution/data-explorer","sidebar":"defaultSidebar"},{"id":"SQL/execution/execution","path":"/SQL/execution/","sidebar":"defaultSidebar"},{"id":"SQL/extensibility/dependencies","path":"/SQL/extensibility/dependencies","sidebar":"defaultSidebar"},{"id":"SQL/extensibility/extensibility","path":"/SQL/extensibility/","sidebar":"defaultSidebar"},{"id":"SQL/extensibility/gem-builder/gem-builder","path":"/SQL/extensibility/gem-builder/","sidebar":"defaultSidebar"},{"id":"SQL/fabrics/databricks","path":"/SQL/fabrics/databricks","sidebar":"defaultSidebar"},{"id":"SQL/fabrics/Fabrics","path":"/SQL/fabrics/","sidebar":"defaultSidebar"},{"id":"SQL/fabrics/snowflake","path":"/SQL/fabrics/snowflake","sidebar":"defaultSidebar"},{"id":"SQL/gems/custom/custom-sql-gems","path":"/SQL/gems/custom/","sidebar":"defaultSidebar"},{"id":"SQL/gems/data-joins","path":"/SQL/gems/data-joins","sidebar":"defaultSidebar"},{"id":"SQL/gems/datasources/datasources-sql","path":"/SQL/gems/datasources/","sidebar":"defaultSidebar"},{"id":"SQL/gems/datasources/upload-files","path":"/SQL/gems/datasources/upload-files","sidebar":"defaultSidebar"},{"id":"SQL/gems/sql-gems","path":"/SQL/gems/","sidebar":"defaultSidebar"},{"id":"SQL/gems/subgraph/subgraph","path":"/SQL/gems/subgraph/","sidebar":"defaultSidebar"},{"id":"SQL/gems/transform/deduplicate","path":"/SQL/gems/transform/deduplicate","sidebar":"defaultSidebar"},{"id":"SQL/gems/transform/flattenschema","path":"/SQL/gems/transform/flattenschema","sidebar":"defaultSidebar"},{"id":"SQL/gems/transform/sql-aggregate","path":"/SQL/gems/transform/sql-aggregate","sidebar":"defaultSidebar"},{"id":"SQL/gems/transform/transform","path":"/SQL/gems/transform/","sidebar":"defaultSidebar"},{"id":"tutorials/Orchestration/multi-jobs-trigger","path":"/tutorials/Orchestration/multi-jobs-trigger","sidebar":"defaultSidebar"},{"id":"tutorials/Orchestration/orchestration-tutorials","path":"/tutorials/Orchestration/","sidebar":"defaultSidebar"},{"id":"tutorials/Orchestration/reliable-ci-cd","path":"/tutorials/Orchestration/reliable-ci-cd","sidebar":"defaultSidebar"},{"id":"tutorials/Spark/spark-tutorials","path":"/tutorials/Spark/","sidebar":"defaultSidebar"},{"id":"tutorials/Spark/working-with-excel","path":"/tutorials/Spark/working-with-excel","sidebar":"defaultSidebar"},{"id":"tutorials/tutorials","path":"/tutorials/","sidebar":"defaultSidebar"},{"id":"tutorials/videos/design-Pipeline","path":"/tutorials/videos/design-Pipeline","sidebar":"defaultSidebar"},{"id":"tutorials/videos/schedule-Pipeline","path":"/tutorials/videos/schedule-Pipeline","sidebar":"defaultSidebar"},{"id":"tutorials/videos/test-Pipeline","path":"/tutorials/videos/test-Pipeline","sidebar":"defaultSidebar"},{"id":"tutorials/videos/video-tutorials","path":"/tutorials/videos/","sidebar":"defaultSidebar"}],"draftIds":["Spark/data-quality","Spark/development","Spark/parameterized-gems","metadata/crawlers","metadata/graphql-api","metadata/search","Spark/spark/functions","Spark/spark/optimizations","Orchestration/airflow/development/development_airflow","Orchestration/airflow/development/functions_airflow","Orchestration/airflow/development/settings_airflow","Orchestration/airflow/testingAndMonitoring/testing_monitoring_airflow","Spark/gems/source-target/file/ftp"],"sidebars":{"defaultSidebar":{"link":{"path":"/","label":"Prophecy"}}}}],"breadcrumbs":true}}}'),i=JSON.parse('{"defaultLocale":"en","locales":["en"],"path":"i18n","currentLocale":"en","localeConfigs":{"en":{"label":"English","direction":"ltr","htmlLang":"en","calendar":"gregory","path":"en"}}}');var s=a(22654);const c=JSON.parse('{"docusaurusVersion":"2.4.3","siteVersion":"0.0.0","pluginVersions":{"docusaurus-plugin-content-docs":{"type":"package","name":"@docusaurus/plugin-content-docs","version":"2.4.3"},"docusaurus-plugin-content-pages":{"type":"package","name":"@docusaurus/plugin-content-pages","version":"2.4.3"},"docusaurus-plugin-sitemap":{"type":"package","name":"@docusaurus/plugin-sitemap","version":"2.4.3"},"docusaurus-theme-classic":{"type":"package","name":"@docusaurus/theme-classic","version":"2.4.3"},"docusaurus-theme-search-algolia":{"type":"package","name":"@docusaurus/theme-search-algolia","version":"2.4.3"},"docusaurus-plugin-image-zoom":{"type":"package","name":"docusaurus-plugin-image-zoom","version":"1.0.1"},"docusaurus-plugin-client-redirects":{"type":"package","name":"@docusaurus/plugin-client-redirects","version":"2.4.3"}}}'),l={siteConfig:r.default,siteMetadata:c,globalData:o,i18n:i,codeTranslations:s},d=n.createContext(l);function u(e){let{children:t}=e;return n.createElement(d.Provider,{value:l},t)}},67489:(e,t,a)=>{"use strict";a.d(t,{A:()=>p});var n=a(96540),r=a(38193),o=a(5260),i=a(70440),s=a(78511);function c(e){let{error:t,tryAgain:a}=e;return n.createElement("div",{style:{display:"flex",flexDirection:"column",justifyContent:"center",alignItems:"flex-start",minHeight:"100vh",width:"100%",maxWidth:"80ch",fontSize:"20px",margin:"0 auto",padding:"1rem"}},n.createElement("h1",{style:{fontSize:"3rem"}},"This page crashed"),n.createElement("button",{type:"button",onClick:a,style:{margin:"1rem 0",fontSize:"2rem",cursor:"pointer",borderRadius:20,padding:"1rem"}},"Try again"),n.createElement(l,{error:t}))}function l(e){let{error:t}=e;const a=(0,i.getErrorCausalChain)(t).map((e=>e.message)).join("\n\nCause:\n");return n.createElement("p",{style:{whiteSpace:"pre-wrap"}},a)}function d(e){let{error:t,tryAgain:a}=e;return n.createElement(p,{fallback:()=>n.createElement(c,{error:t,tryAgain:a})},n.createElement(o.A,null,n.createElement("title",null,"Page Error")),n.createElement(s.A,null,n.createElement(c,{error:t,tryAgain:a})))}const u=e=>n.createElement(d,e);class p extends n.Component{constructor(e){super(e),this.state={error:null}}componentDidCatch(e){r.A.canUseDOM&&this.setState({error:e})}render(){const{children:e}=this.props,{error:t}=this.state;if(t){const e={error:t,tryAgain:()=>this.setState({error:null})};return(this.props.fallback??u)(e)}return e??null}}},38193:(e,t,a)=>{"use strict";a.d(t,{A:()=>r});const n="undefined"!=typeof window&&"document"in window&&"createElement"in window.document,r={canUseDOM:n,canUseEventListeners:n&&("addEventListener"in window||"attachEvent"in window),canUseIntersectionObserver:n&&"IntersectionObserver"in window,canUseViewport:n&&"screen"in window}},5260:(e,t,a)=>{"use strict";a.d(t,{A:()=>o});var n=a(96540),r=a(80545);function o(e){return n.createElement(r.mg,e)}},75489:(e,t,a)=>{"use strict";a.d(t,{A:()=>f});var n=a(58168),r=a(96540),o=a(54625),i=a(70440),s=a(44586),c=a(16654),l=a(38193);const d=r.createContext({collectLink:()=>{}});var u=a(86025);function p(e,t){let{isNavLink:a,to:p,href:f,activeClassName:g,isActive:m,"data-noBrokenLinkCheck":b,autoAddBaseUrl:h=!0,..._}=e;const{siteConfig:{trailingSlash:v,baseUrl:y}}=(0,s.A)(),{withBaseUrl:S}=(0,u.h)(),k=(0,r.useContext)(d),x=(0,r.useRef)(null);(0,r.useImperativeHandle)(t,(()=>x.current));const w=p||f;const E=(0,c.A)(w),A=w?.replace("pathname://","");let T=void 0!==A?(C=A,h&&(e=>e.startsWith("/"))(C)?S(C):C):void 0;var C;T&&E&&(T=(0,i.applyTrailingSlash)(T,{trailingSlash:v,baseUrl:y}));const L=(0,r.useRef)(!1),O=a?o.k2:o.N_,j=l.A.canUseIntersectionObserver,P=(0,r.useRef)(),N=()=>{L.current||null==T||(window.docusaurus.preload(T),L.current=!0)};(0,r.useEffect)((()=>(!j&&E&&null!=T&&window.docusaurus.prefetch(T),()=>{j&&P.current&&P.current.disconnect()})),[P,T,j,E]);const R=T?.startsWith("#")??!1,I=!T||!E||R;return I||b||k.collectLink(T),I?r.createElement("a",(0,n.A)({ref:x,href:T},w&&!E&&{target:"_blank",rel:"noopener noreferrer"},_)):r.createElement(O,(0,n.A)({},_,{onMouseEnter:N,onTouchStart:N,innerRef:e=>{x.current=e,j&&e&&E&&(P.current=new window.IntersectionObserver((t=>{t.forEach((t=>{e===t.target&&(t.isIntersecting||t.intersectionRatio>0)&&(P.current.unobserve(e),P.current.disconnect(),null!=T&&window.docusaurus.prefetch(T))}))})),P.current.observe(e))},to:T},a&&{isActive:m,activeClassName:g}))}const f=r.forwardRef(p)},21312:(e,t,a)=>{"use strict";a.d(t,{A:()=>c,T:()=>s});var n=a(96540);function r(e,t){const a=e.split(/(\{\w+\})/).map(((e,a)=>{if(a%2==1){const a=t?.[e.slice(1,-1)];if(void 0!==a)return a}return e}));return a.some((e=>(0,n.isValidElement)(e)))?a.map(((e,t)=>(0,n.isValidElement)(e)?n.cloneElement(e,{key:t}):e)).filter((e=>""!==e)):a.join("")}var o=a(22654);function i(e){let{id:t,message:a}=e;if(void 0===t&&void 0===a)throw new Error("Docusaurus translation declarations must have at least a translation id or a default translation message");return o[t??a]??a??t}function s(e,t){let{message:a,id:n}=e;return r(i({message:a,id:n}),t)}function c(e){let{children:t,id:a,values:o}=e;if(t&&"string"!=typeof t)throw console.warn("Illegal children",t),new Error("The Docusaurus component only accept simple string values");const s=i({message:t,id:a});return n.createElement(n.Fragment,null,r(s,o))}},17065:(e,t,a)=>{"use strict";a.d(t,{W:()=>n});const n="default"},16654:(e,t,a)=>{"use strict";function n(e){return/^(?:\w*:|\/\/)/.test(e)}function r(e){return void 0!==e&&!n(e)}a.d(t,{A:()=>r,z:()=>n})},86025:(e,t,a)=>{"use strict";a.d(t,{A:()=>s,h:()=>i});var n=a(96540),r=a(44586),o=a(16654);function i(){const{siteConfig:{baseUrl:e,url:t}}=(0,r.A)(),a=(0,n.useCallback)(((a,n)=>function(e,t,a,n){let{forcePrependBaseUrl:r=!1,absolute:i=!1}=void 0===n?{}:n;if(!a||a.startsWith("#")||(0,o.z)(a))return a;if(r)return t+a.replace(/^\//,"");if(a===t.replace(/\/$/,""))return t;const s=a.startsWith(t)?a:t+a.replace(/^\//,"");return i?e+s:s}(t,e,a,n)),[t,e]);return{withBaseUrl:a}}function s(e,t){void 0===t&&(t={});const{withBaseUrl:a}=i();return a(e,t)}},44586:(e,t,a)=>{"use strict";a.d(t,{A:()=>o});var n=a(96540),r=a(26988);function o(){return(0,n.useContext)(r.o)}},92303:(e,t,a)=>{"use strict";a.d(t,{A:()=>o});var n=a(96540),r=a(6125);function o(){return(0,n.useContext)(r.o)}},86921:(e,t,a)=>{"use strict";a.d(t,{A:()=>r});const n=e=>"object"==typeof e&&!!e&&Object.keys(e).length>0;function r(e){const t={};return function e(a,r){Object.entries(a).forEach((a=>{let[o,i]=a;const s=r?`${r}.${o}`:o;n(i)?e(i,s):t[s]=i}))}(e),t}},53102:(e,t,a)=>{"use strict";a.d(t,{W:()=>o,o:()=>r});var n=a(96540);const r=n.createContext(null);function o(e){let{children:t,value:a}=e;const o=n.useContext(r),i=(0,n.useMemo)((()=>function(e){let{parent:t,value:a}=e;if(!t){if(!a)throw new Error("Unexpected: no Docusaurus route context found");if(!("plugin"in a))throw new Error("Unexpected: Docusaurus topmost route context has no `plugin` attribute");return a}const n={...t.data,...a?.data};return{plugin:t.plugin,data:n}}({parent:o,value:a})),[o,a]);return n.createElement(r.Provider,{value:i},t)}},44070:(e,t,a)=>{"use strict";a.d(t,{zK:()=>h,vT:()=>f,gk:()=>g,Gy:()=>u,HW:()=>_,ht:()=>p,r7:()=>b,jh:()=>m});var n=a(56347),r=a(44586),o=a(17065);function i(e,t){void 0===t&&(t={});const a=function(){const{globalData:e}=(0,r.A)();return e}()[e];if(!a&&t.failfast)throw new Error(`Docusaurus plugin global data not found for "${e}" plugin.`);return a}const s=e=>e.versions.find((e=>e.isLast));function c(e,t){const a=s(e);return[...e.versions.filter((e=>e!==a)),a].find((e=>!!(0,n.B6)(t,{path:e.path,exact:!1,strict:!1})))}function l(e,t){const a=c(e,t),r=a?.docs.find((e=>!!(0,n.B6)(t,{path:e.path,exact:!0,strict:!1})));return{activeVersion:a,activeDoc:r,alternateDocVersions:r?function(t){const a={};return e.versions.forEach((e=>{e.docs.forEach((n=>{n.id===t&&(a[e.name]=n)}))})),a}(r.id):{}}}const d={},u=()=>i("docusaurus-plugin-content-docs")??d,p=e=>function(e,t,a){void 0===t&&(t=o.W),void 0===a&&(a={});const n=i(e),r=n?.[t];if(!r&&a.failfast)throw new Error(`Docusaurus plugin global data not found for "${e}" plugin with id "${t}".`);return r}("docusaurus-plugin-content-docs",e,{failfast:!0});function f(e){void 0===e&&(e={});const t=u(),{pathname:a}=(0,n.zy)();return function(e,t,a){void 0===a&&(a={});const r=Object.entries(e).sort(((e,t)=>t[1].path.localeCompare(e[1].path))).find((e=>{let[,a]=e;return!!(0,n.B6)(t,{path:a.path,exact:!1,strict:!1})})),o=r?{pluginId:r[0],pluginData:r[1]}:void 0;if(!o&&a.failfast)throw new Error(`Can't find active docs plugin for "${t}" pathname, while it was expected to be found. Maybe you tried to use a docs feature that can only be used on a docs-related page? Existing docs plugin paths are: ${Object.values(e).map((e=>e.path)).join(", ")}`);return o}(t,a,e)}function g(e){void 0===e&&(e={});const t=f(e),{pathname:a}=(0,n.zy)();if(!t)return;return{activePlugin:t,activeVersion:c(t.pluginData,a)}}function m(e){return p(e).versions}function b(e){const t=p(e);return s(t)}function h(e){const t=p(e),{pathname:a}=(0,n.zy)();return l(t,a)}function _(e){const t=p(e),{pathname:a}=(0,n.zy)();return function(e,t){const a=s(e);return{latestDocSuggestion:l(e,t).alternateDocVersions[a.name],latestVersionSuggestion:a}}(t,a)}},76294:(e,t,a)=>{"use strict";a.r(t),a.d(t,{default:()=>o});var n=a(5947),r=a.n(n);r().configure({showSpinner:!1});const o={onRouteUpdate(e){let{location:t,previousLocation:a}=e;if(a&&t.pathname!==a.pathname){const e=window.setTimeout((()=>{r().start()}),200);return()=>window.clearTimeout(e)}},onRouteDidUpdate(){r().done()}}},26134:(e,t,a)=>{"use strict";a.r(t);var n=a(61258),r=a(4784);!function(e){const{themeConfig:{prism:t}}=r.default,{additionalLanguages:n}=t;globalThis.Prism=e,n.forEach((e=>{a(56864)(`./prism-${e}`)})),delete globalThis.Prism}(n.A)},43186:(e,t,a)=>{"use strict";a.d(t,{A:()=>o});var n=a(96540);const r={iconExternalLink:"iconExternalLink_nPIU"};function o(e){let{width:t=13.5,height:a=13.5}=e;return n.createElement("svg",{width:t,height:a,"aria-hidden":"true",viewBox:"0 0 24 24",className:r.iconExternalLink},n.createElement("path",{fill:"currentColor",d:"M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"}))}},78511:(e,t,a)=>{"use strict";a.d(t,{A:()=>jt});var n=a(96540),r=a(20053),o=a(67489),i=a(69024),s=a(58168),c=a(56347),l=a(21312),d=a(75062);const u="__docusaurus_skipToContent_fallback";function p(e){e.setAttribute("tabindex","-1"),e.focus(),e.removeAttribute("tabindex")}function f(){const e=(0,n.useRef)(null),{action:t}=(0,c.W6)(),a=(0,n.useCallback)((e=>{e.preventDefault();const t=document.querySelector("main:first-of-type")??document.getElementById(u);t&&p(t)}),[]);return(0,d.$)((a=>{let{location:n}=a;e.current&&!n.hash&&"PUSH"===t&&p(e.current)})),{containerRef:e,onClick:a}}const g=(0,l.T)({id:"theme.common.skipToMainContent",description:"The skip to content label used for accessibility, allowing to rapidly navigate to main content with keyboard tab/enter navigation",message:"Skip to main content"});function m(e){const t=e.children??g,{containerRef:a,onClick:r}=f();return n.createElement("div",{ref:a,role:"region","aria-label":g},n.createElement("a",(0,s.A)({},e,{href:`#${u}`,onClick:r}),t))}var b=a(17559),h=a(14090);const _={skipToContent:"skipToContent_fXgn"};function v(){return n.createElement(m,{className:_.skipToContent})}var y=a(6342),S=a(65041);function k(e){let{width:t=21,height:a=21,color:r="currentColor",strokeWidth:o=1.2,className:i,...c}=e;return n.createElement("svg",(0,s.A)({viewBox:"0 0 15 15",width:t,height:a},c),n.createElement("g",{stroke:r,strokeWidth:o},n.createElement("path",{d:"M.75.75l13.5 13.5M14.25.75L.75 14.25"})))}const x={closeButton:"closeButton_CVFx"};function w(e){return n.createElement("button",(0,s.A)({type:"button","aria-label":(0,l.T)({id:"theme.AnnouncementBar.closeButtonAriaLabel",message:"Close",description:"The ARIA label for close button of announcement bar"})},e,{className:(0,r.A)("clean-btn close",x.closeButton,e.className)}),n.createElement(k,{width:14,height:14,strokeWidth:3.1}))}const E={content:"content_knG7"};function A(e){const{announcementBar:t}=(0,y.p)(),{content:a}=t;return n.createElement("div",(0,s.A)({},e,{className:(0,r.A)(E.content,e.className),dangerouslySetInnerHTML:{__html:a}}))}const T={announcementBar:"announcementBar_mb4j",announcementBarPlaceholder:"announcementBarPlaceholder_vyr4",announcementBarClose:"announcementBarClose_gvF7",announcementBarContent:"announcementBarContent_xLdY"};function C(){const{announcementBar:e}=(0,y.p)(),{isActive:t,close:a}=(0,S.Mj)();if(!t)return null;const{backgroundColor:r,textColor:o,isCloseable:i}=e;return n.createElement("div",{className:T.announcementBar,style:{backgroundColor:r,color:o},role:"banner"},i&&n.createElement("div",{className:T.announcementBarPlaceholder}),n.createElement(A,{className:T.announcementBarContent}),i&&n.createElement(w,{onClick:a,className:T.announcementBarClose}))}var L=a(22069),O=a(23104);var j=a(89532),P=a(75600);const N=n.createContext(null);function R(e){let{children:t}=e;const a=function(){const e=(0,L.M)(),t=(0,P.YL)(),[a,r]=(0,n.useState)(!1),o=null!==t.component,i=(0,j.ZC)(o);return(0,n.useEffect)((()=>{o&&!i&&r(!0)}),[o,i]),(0,n.useEffect)((()=>{o?e.shown||r(!0):r(!1)}),[e.shown,o]),(0,n.useMemo)((()=>[a,r]),[a])}();return n.createElement(N.Provider,{value:a},t)}function I(e){if(e.component){const t=e.component;return n.createElement(t,e.props)}}function M(){const e=(0,n.useContext)(N);if(!e)throw new j.dV("NavbarSecondaryMenuDisplayProvider");const[t,a]=e,r=(0,n.useCallback)((()=>a(!1)),[a]),o=(0,P.YL)();return(0,n.useMemo)((()=>({shown:t,hide:r,content:I(o)})),[r,o,t])}function D(e){let{header:t,primaryMenu:a,secondaryMenu:o}=e;const{shown:i}=M();return n.createElement("div",{className:"navbar-sidebar"},t,n.createElement("div",{className:(0,r.A)("navbar-sidebar__items",{"navbar-sidebar__items--show-secondary":i})},n.createElement("div",{className:"navbar-sidebar__item menu"},a),n.createElement("div",{className:"navbar-sidebar__item menu"},o)))}var z=a(95293),F=a(92303);function B(e){return n.createElement("svg",(0,s.A)({viewBox:"0 0 24 24",width:24,height:24},e),n.createElement("path",{fill:"currentColor",d:"M12,9c1.65,0,3,1.35,3,3s-1.35,3-3,3s-3-1.35-3-3S10.35,9,12,9 M12,7c-2.76,0-5,2.24-5,5s2.24,5,5,5s5-2.24,5-5 S14.76,7,12,7L12,7z M2,13l2,0c0.55,0,1-0.45,1-1s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S1.45,13,2,13z M20,13l2,0c0.55,0,1-0.45,1-1 s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S19.45,13,20,13z M11,2v2c0,0.55,0.45,1,1,1s1-0.45,1-1V2c0-0.55-0.45-1-1-1S11,1.45,11,2z M11,20v2c0,0.55,0.45,1,1,1s1-0.45,1-1v-2c0-0.55-0.45-1-1-1C11.45,19,11,19.45,11,20z M5.99,4.58c-0.39-0.39-1.03-0.39-1.41,0 c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0s0.39-1.03,0-1.41L5.99,4.58z M18.36,16.95 c-0.39-0.39-1.03-0.39-1.41,0c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0c0.39-0.39,0.39-1.03,0-1.41 L18.36,16.95z M19.42,5.99c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06c-0.39,0.39-0.39,1.03,0,1.41 s1.03,0.39,1.41,0L19.42,5.99z M7.05,18.36c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06 c-0.39,0.39-0.39,1.03,0,1.41s1.03,0.39,1.41,0L7.05,18.36z"}))}function $(e){return n.createElement("svg",(0,s.A)({viewBox:"0 0 24 24",width:24,height:24},e),n.createElement("path",{fill:"currentColor",d:"M9.37,5.51C9.19,6.15,9.1,6.82,9.1,7.5c0,4.08,3.32,7.4,7.4,7.4c0.68,0,1.35-0.09,1.99-0.27C17.45,17.19,14.93,19,12,19 c-3.86,0-7-3.14-7-7C5,9.07,6.81,6.55,9.37,5.51z M12,3c-4.97,0-9,4.03-9,9s4.03,9,9,9s9-4.03,9-9c0-0.46-0.04-0.92-0.1-1.36 c-0.98,1.37-2.58,2.26-4.4,2.26c-2.98,0-5.4-2.42-5.4-5.4c0-1.81,0.89-3.42,2.26-4.4C12.92,3.04,12.46,3,12,3L12,3z"}))}const U={toggle:"toggle_vylO",toggleButton:"toggleButton_gllP",darkToggleIcon:"darkToggleIcon_wfgR",lightToggleIcon:"lightToggleIcon_pyhR",toggleButtonDisabled:"toggleButtonDisabled_aARS"};function Q(e){let{className:t,buttonClassName:a,value:o,onChange:i}=e;const s=(0,F.A)(),c=(0,l.T)({message:"Switch between dark and light mode (currently {mode})",id:"theme.colorToggle.ariaLabel",description:"The ARIA label for the navbar color mode toggle"},{mode:"dark"===o?(0,l.T)({message:"dark mode",id:"theme.colorToggle.ariaLabel.mode.dark",description:"The name for the dark color mode"}):(0,l.T)({message:"light mode",id:"theme.colorToggle.ariaLabel.mode.light",description:"The name for the light color mode"})});return n.createElement("div",{className:(0,r.A)(U.toggle,t)},n.createElement("button",{className:(0,r.A)("clean-btn",U.toggleButton,!s&&U.toggleButtonDisabled,a),type:"button",onClick:()=>i("dark"===o?"light":"dark"),disabled:!s,title:c,"aria-label":c,"aria-live":"polite"},n.createElement(B,{className:(0,r.A)(U.toggleIcon,U.lightToggleIcon)}),n.createElement($,{className:(0,r.A)(U.toggleIcon,U.darkToggleIcon)})))}const q=n.memo(Q),H={darkNavbarColorModeToggle:"darkNavbarColorModeToggle_X3D1"};function V(e){let{className:t}=e;const a=(0,y.p)().navbar.style,r=(0,y.p)().colorMode.disableSwitch,{colorMode:o,setColorMode:i}=(0,z.G)();return r?null:n.createElement(q,{className:t,buttonClassName:"dark"===a?H.darkNavbarColorModeToggle:void 0,value:o,onChange:i})}var G=a(23465);function W(){return n.createElement(G.A,{className:"navbar__brand",imageClassName:"navbar__logo",titleClassName:"navbar__title text--truncate"})}function K(){const e=(0,L.M)();return n.createElement("button",{type:"button","aria-label":(0,l.T)({id:"theme.docs.sidebar.closeSidebarButtonAriaLabel",message:"Close navigation bar",description:"The ARIA label for close button of mobile sidebar"}),className:"clean-btn navbar-sidebar__close",onClick:()=>e.toggle()},n.createElement(k,{color:"var(--ifm-color-emphasis-600)"}))}function Y(){return n.createElement("div",{className:"navbar-sidebar__brand"},n.createElement(W,null),n.createElement(V,{className:"margin-right--md"}),n.createElement(K,null))}var X=a(75489),Z=a(86025),J=a(16654),ee=a(91252),te=a(43186);function ae(e){let{activeBasePath:t,activeBaseRegex:a,to:r,href:o,label:i,html:c,isDropdownLink:l,prependBaseUrlToHref:d,...u}=e;const p=(0,Z.A)(r),f=(0,Z.A)(t),g=(0,Z.A)(o,{forcePrependBaseUrl:!0}),m=i&&o&&!(0,J.A)(o),b=c?{dangerouslySetInnerHTML:{__html:c}}:{children:n.createElement(n.Fragment,null,i,m&&n.createElement(te.A,l&&{width:12,height:12}))};return o?n.createElement(X.A,(0,s.A)({href:d?g:o},u,b)):n.createElement(X.A,(0,s.A)({to:p,isNavLink:!0},(t||a)&&{isActive:(e,t)=>a?(0,ee.G)(a,t.pathname):t.pathname.startsWith(f)},u,b))}function ne(e){let{className:t,isDropdownItem:a=!1,...o}=e;const i=n.createElement(ae,(0,s.A)({className:(0,r.A)(a?"dropdown__link":"navbar__item navbar__link",t),isDropdownLink:a},o));return a?n.createElement("li",null,i):i}function re(e){let{className:t,isDropdownItem:a,...o}=e;return n.createElement("li",{className:"menu__list-item"},n.createElement(ae,(0,s.A)({className:(0,r.A)("menu__link",t)},o)))}function oe(e){let{mobile:t=!1,position:a,...r}=e;const o=t?re:ne;return n.createElement(o,(0,s.A)({},r,{activeClassName:r.activeClassName??(t?"menu__link--active":"navbar__link--active")}))}var ie=a(41422),se=a(99169),ce=a(44586);function le(e,t){return e.some((e=>function(e,t){return!!(0,se.ys)(e.to,t)||!!(0,ee.G)(e.activeBaseRegex,t)||!(!e.activeBasePath||!t.startsWith(e.activeBasePath))}(e,t)))}function de(e){let{items:t,position:a,className:o,onClick:i,...c}=e;const l=(0,n.useRef)(null),[d,u]=(0,n.useState)(!1);return(0,n.useEffect)((()=>{const e=e=>{l.current&&!l.current.contains(e.target)&&u(!1)};return document.addEventListener("mousedown",e),document.addEventListener("touchstart",e),document.addEventListener("focusin",e),()=>{document.removeEventListener("mousedown",e),document.removeEventListener("touchstart",e),document.removeEventListener("focusin",e)}}),[l]),n.createElement("div",{ref:l,className:(0,r.A)("navbar__item","dropdown","dropdown--hoverable",{"dropdown--right":"right"===a,"dropdown--show":d})},n.createElement(ae,(0,s.A)({"aria-haspopup":"true","aria-expanded":d,role:"button",href:c.to?void 0:"#",className:(0,r.A)("navbar__link",o)},c,{onClick:c.to?void 0:e=>e.preventDefault(),onKeyDown:e=>{"Enter"===e.key&&(e.preventDefault(),u(!d))}}),c.children??c.label),n.createElement("ul",{className:"dropdown__menu"},t.map(((e,t)=>n.createElement(qe,(0,s.A)({isDropdownItem:!0,activeClassName:"dropdown__link--active"},e,{key:t}))))))}function ue(e){let{items:t,className:a,position:o,onClick:i,...l}=e;const d=function(){const{siteConfig:{baseUrl:e}}=(0,ce.A)(),{pathname:t}=(0,c.zy)();return t.replace(e,"/")}(),u=le(t,d),{collapsed:p,toggleCollapsed:f,setCollapsed:g}=(0,ie.u)({initialState:()=>!u});return(0,n.useEffect)((()=>{u&&g(!u)}),[d,u,g]),n.createElement("li",{className:(0,r.A)("menu__list-item",{"menu__list-item--collapsed":p})},n.createElement(ae,(0,s.A)({role:"button",className:(0,r.A)("menu__link menu__link--sublist menu__link--sublist-caret",a)},l,{onClick:e=>{e.preventDefault(),f()}}),l.children??l.label),n.createElement(ie.N,{lazy:!0,as:"ul",className:"menu__list",collapsed:p},t.map(((e,t)=>n.createElement(qe,(0,s.A)({mobile:!0,isDropdownItem:!0,onClick:i,activeClassName:"menu__link--active"},e,{key:t}))))))}function pe(e){let{mobile:t=!1,...a}=e;const r=t?ue:de;return n.createElement(r,a)}var fe=a(32131);function ge(e){let{width:t=20,height:a=20,...r}=e;return n.createElement("svg",(0,s.A)({viewBox:"0 0 24 24",width:t,height:a,"aria-hidden":!0},r),n.createElement("path",{fill:"currentColor",d:"M12.87 15.07l-2.54-2.51.03-.03c1.74-1.94 2.98-4.17 3.71-6.53H17V4h-7V2H8v2H1v1.99h11.17C11.5 7.92 10.44 9.75 9 11.35 8.07 10.32 7.3 9.19 6.69 8h-2c.73 1.63 1.73 3.17 2.98 4.56l-5.09 5.02L4 19l5-5 3.11 3.11.76-2.04zM18.5 10h-2L12 22h2l1.12-3h4.75L21 22h2l-4.5-12zm-2.62 7l1.62-4.33L19.12 17h-3.24z"}))}const me="iconLanguage_nlXk";function be(){return n.createElement("svg",{width:"15",height:"15",className:"DocSearch-Control-Key-Icon"},n.createElement("path",{d:"M4.505 4.496h2M5.505 5.496v5M8.216 4.496l.055 5.993M10 7.5c.333.333.5.667.5 1v2M12.326 4.5v5.996M8.384 4.496c1.674 0 2.116 0 2.116 1.5s-.442 1.5-2.116 1.5M3.205 9.303c-.09.448-.277 1.21-1.241 1.203C1 10.5.5 9.513.5 8V7c0-1.57.5-2.5 1.464-2.494.964.006 1.134.598 1.24 1.342M12.553 10.5h1.953",strokeWidth:"1.2",stroke:"currentColor",fill:"none",strokeLinecap:"square"}))}var he=a(89188),_e=["translations"];function ve(){return ve=Object.assign||function(e){for(var t=1;te.length)&&(t=e.length);for(var a=0,n=new Array(t);a=0||(r[a]=e[a]);return r}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(r[a]=e[a])}return r}var xe="Ctrl";var we=n.forwardRef((function(e,t){var a=e.translations,r=void 0===a?{}:a,o=ke(e,_e),i=r.buttonText,s=void 0===i?"Search":i,c=r.buttonAriaLabel,l=void 0===c?"Search":c,d=ye((0,n.useState)(null),2),u=d[0],p=d[1];return(0,n.useEffect)((function(){"undefined"!=typeof navigator&&(/(Mac|iPhone|iPod|iPad)/i.test(navigator.platform)?p("\u2318"):p(xe))}),[]),n.createElement("button",ve({type:"button",className:"DocSearch DocSearch-Button","aria-label":l},o,{ref:t}),n.createElement("span",{className:"DocSearch-Button-Container"},n.createElement(he.W,null),n.createElement("span",{className:"DocSearch-Button-Placeholder"},s)),n.createElement("span",{className:"DocSearch-Button-Keys"},null!==u&&n.createElement(n.Fragment,null,n.createElement(Ee,{reactsToKey:u===xe?xe:"Meta"},u===xe?n.createElement(be,null):u),n.createElement(Ee,{reactsToKey:"k"},"K"))))}));function Ee(e){var t=e.reactsToKey,a=e.children,r=ye((0,n.useState)(!1),2),o=r[0],i=r[1];return(0,n.useEffect)((function(){if(t)return window.addEventListener("keydown",e),window.addEventListener("keyup",a),function(){window.removeEventListener("keydown",e),window.removeEventListener("keyup",a)};function e(e){e.key===t&&i(!0)}function a(e){e.key!==t&&"Meta"!==e.key||i(!1)}}),[t]),n.createElement("kbd",{className:o?"DocSearch-Button-Key DocSearch-Button-Key--pressed":"DocSearch-Button-Key"},a)}var Ae=a(5260),Te=a(24255),Ce=a(51062),Le=a(2967);var Oe=a(40961);const je={button:{buttonText:(0,l.T)({id:"theme.SearchBar.label",message:"Search",description:"The ARIA label and placeholder for search button"}),buttonAriaLabel:(0,l.T)({id:"theme.SearchBar.label",message:"Search",description:"The ARIA label and placeholder for search button"})},modal:{searchBox:{resetButtonTitle:(0,l.T)({id:"theme.SearchModal.searchBox.resetButtonTitle",message:"Clear the query",description:"The label and ARIA label for search box reset button"}),resetButtonAriaLabel:(0,l.T)({id:"theme.SearchModal.searchBox.resetButtonTitle",message:"Clear the query",description:"The label and ARIA label for search box reset button"}),cancelButtonText:(0,l.T)({id:"theme.SearchModal.searchBox.cancelButtonText",message:"Cancel",description:"The label and ARIA label for search box cancel button"}),cancelButtonAriaLabel:(0,l.T)({id:"theme.SearchModal.searchBox.cancelButtonText",message:"Cancel",description:"The label and ARIA label for search box cancel button"})},startScreen:{recentSearchesTitle:(0,l.T)({id:"theme.SearchModal.startScreen.recentSearchesTitle",message:"Recent",description:"The title for recent searches"}),noRecentSearchesText:(0,l.T)({id:"theme.SearchModal.startScreen.noRecentSearchesText",message:"No recent searches",description:"The text when no recent searches"}),saveRecentSearchButtonTitle:(0,l.T)({id:"theme.SearchModal.startScreen.saveRecentSearchButtonTitle",message:"Save this search",description:"The label for save recent search button"}),removeRecentSearchButtonTitle:(0,l.T)({id:"theme.SearchModal.startScreen.removeRecentSearchButtonTitle",message:"Remove this search from history",description:"The label for remove recent search button"}),favoriteSearchesTitle:(0,l.T)({id:"theme.SearchModal.startScreen.favoriteSearchesTitle",message:"Favorite",description:"The title for favorite searches"}),removeFavoriteSearchButtonTitle:(0,l.T)({id:"theme.SearchModal.startScreen.removeFavoriteSearchButtonTitle",message:"Remove this search from favorites",description:"The label for remove favorite search button"})},errorScreen:{titleText:(0,l.T)({id:"theme.SearchModal.errorScreen.titleText",message:"Unable to fetch results",description:"The title for error screen of search modal"}),helpText:(0,l.T)({id:"theme.SearchModal.errorScreen.helpText",message:"You might want to check your network connection.",description:"The help text for error screen of search modal"})},footer:{selectText:(0,l.T)({id:"theme.SearchModal.footer.selectText",message:"to select",description:"The explanatory text of the action for the enter key"}),selectKeyAriaLabel:(0,l.T)({id:"theme.SearchModal.footer.selectKeyAriaLabel",message:"Enter key",description:"The ARIA label for the Enter key button that makes the selection"}),navigateText:(0,l.T)({id:"theme.SearchModal.footer.navigateText",message:"to navigate",description:"The explanatory text of the action for the Arrow up and Arrow down key"}),navigateUpKeyAriaLabel:(0,l.T)({id:"theme.SearchModal.footer.navigateUpKeyAriaLabel",message:"Arrow up",description:"The ARIA label for the Arrow up key button that makes the navigation"}),navigateDownKeyAriaLabel:(0,l.T)({id:"theme.SearchModal.footer.navigateDownKeyAriaLabel",message:"Arrow down",description:"The ARIA label for the Arrow down key button that makes the navigation"}),closeText:(0,l.T)({id:"theme.SearchModal.footer.closeText",message:"to close",description:"The explanatory text of the action for Escape key"}),closeKeyAriaLabel:(0,l.T)({id:"theme.SearchModal.footer.closeKeyAriaLabel",message:"Escape key",description:"The ARIA label for the Escape key button that close the modal"}),searchByText:(0,l.T)({id:"theme.SearchModal.footer.searchByText",message:"Search by",description:"The text explain that the search is making by Algolia"})},noResultsScreen:{noResultsText:(0,l.T)({id:"theme.SearchModal.noResultsScreen.noResultsText",message:"No results for",description:"The text explains that there are no results for the following search"}),suggestedQueryText:(0,l.T)({id:"theme.SearchModal.noResultsScreen.suggestedQueryText",message:"Try searching for",description:"The text for the suggested query when no results are found for the following search"}),reportMissingResultsText:(0,l.T)({id:"theme.SearchModal.noResultsScreen.reportMissingResultsText",message:"Believe this query should return results?",description:"The text for the question where the user thinks there are missing results"}),reportMissingResultsLinkText:(0,l.T)({id:"theme.SearchModal.noResultsScreen.reportMissingResultsLinkText",message:"Let us know.",description:"The text for the link to report missing results"})}},placeholder:(0,l.T)({id:"theme.SearchModal.placeholder",message:"Search docs",description:"The placeholder of the input of the DocSearch pop-up modal"})};let Pe=null;function Ne(e){let{hit:t,children:a}=e;return n.createElement(X.A,{to:t.url},a)}function Re(e){let{state:t,onClose:a}=e;const r=(0,Te.w)();return n.createElement(X.A,{to:r(t.query),onClick:a},n.createElement(l.A,{id:"theme.SearchBar.seeAll",values:{count:t.context.nbHits}},"See all {count} results"))}function Ie(e){let{contextualSearch:t,externalUrlRegex:r,...o}=e;const{siteMetadata:i}=(0,ce.A)(),l=(0,Ce.C)(),d=function(){const{locale:e,tags:t}=(0,Le.af)();return[`language:${e}`,t.map((e=>`docusaurus_tag:${e}`))]}(),u=o.searchParameters?.facetFilters??[],p=t?function(e,t){const a=e=>"string"==typeof e?[e]:e;return[...a(e),...a(t)]}(d,u):u,f={...o.searchParameters,facetFilters:p},g=(0,c.W6)(),m=(0,n.useRef)(null),b=(0,n.useRef)(null),[h,_]=(0,n.useState)(!1),[v,y]=(0,n.useState)(void 0),S=(0,n.useCallback)((()=>Pe?Promise.resolve():Promise.all([a.e(9462).then(a.bind(a,9462)),Promise.all([a.e(71869),a.e(58913)]).then(a.bind(a,58913)),Promise.all([a.e(71869),a.e(90416)]).then(a.bind(a,90416))]).then((e=>{let[{DocSearchModal:t}]=e;Pe=t}))),[]),k=(0,n.useCallback)((()=>{S().then((()=>{m.current=document.createElement("div"),document.body.insertBefore(m.current,document.body.firstChild),_(!0)}))}),[S,_]),x=(0,n.useCallback)((()=>{_(!1),m.current?.remove()}),[_]),w=(0,n.useCallback)((e=>{S().then((()=>{_(!0),y(e.key)}))}),[S,_,y]),E=(0,n.useRef)({navigate(e){let{itemUrl:t}=e;(0,ee.G)(r,t)?window.location.href=t:g.push(t)}}).current,A=(0,n.useRef)((e=>o.transformItems?o.transformItems(e):e.map((e=>({...e,url:l(e.url)}))))).current,T=(0,n.useMemo)((()=>e=>n.createElement(Re,(0,s.A)({},e,{onClose:x}))),[x]),C=(0,n.useCallback)((e=>(e.addAlgoliaAgent("docusaurus",i.docusaurusVersion),e)),[i.docusaurusVersion]);return function(e){var t=e.isOpen,a=e.onOpen,r=e.onClose,o=e.onInput,i=e.searchButtonRef;n.useEffect((function(){function e(e){var n;(27===e.keyCode&&t||"k"===(null===(n=e.key)||void 0===n?void 0:n.toLowerCase())&&(e.metaKey||e.ctrlKey)||!function(e){var t=e.target,a=t.tagName;return t.isContentEditable||"INPUT"===a||"SELECT"===a||"TEXTAREA"===a}(e)&&"/"===e.key&&!t)&&(e.preventDefault(),t?r():document.body.classList.contains("DocSearch--active")||document.body.classList.contains("DocSearch--active")||a()),i&&i.current===document.activeElement&&o&&/[a-zA-Z0-9]/.test(String.fromCharCode(e.keyCode))&&o(e)}return window.addEventListener("keydown",e),function(){window.removeEventListener("keydown",e)}}),[t,a,r,o,i])}({isOpen:h,onOpen:k,onClose:x,onInput:w,searchButtonRef:b}),n.createElement(n.Fragment,null,n.createElement(Ae.A,null,n.createElement("link",{rel:"preconnect",href:`https://${o.appId}-dsn.algolia.net`,crossOrigin:"anonymous"})),n.createElement(we,{onTouchStart:S,onFocus:S,onMouseOver:S,onClick:k,ref:b,translations:je.button}),h&&Pe&&m.current&&(0,Oe.createPortal)(n.createElement(Pe,(0,s.A)({onClose:x,initialScrollY:window.scrollY,initialQuery:v,navigator:E,transformItems:A,hitComponent:Ne,transformSearchClient:C},o.searchPagePath&&{resultsFooterComponent:T},o,{searchParameters:f,placeholder:je.placeholder,translations:je.modal})),m.current))}function Me(){const{siteConfig:e}=(0,ce.A)();return n.createElement(Ie,e.themeConfig.algolia)}const De={searchBox:"searchBox_ZlJk"};function ze(e){let{children:t,className:a}=e;return n.createElement("div",{className:(0,r.A)(a,De.searchBox)},t)}var Fe=a(44070),Be=a(84142);var $e=a(55597);const Ue=e=>e.docs.find((t=>t.id===e.mainDocId));const Qe={default:oe,localeDropdown:function(e){let{mobile:t,dropdownItemsBefore:a,dropdownItemsAfter:r,...o}=e;const{i18n:{currentLocale:i,locales:d,localeConfigs:u}}=(0,ce.A)(),p=(0,fe.o)(),{search:f,hash:g}=(0,c.zy)(),m=[...a,...d.map((e=>{const a=`${`pathname://${p.createUrl({locale:e,fullyQualified:!1})}`}${f}${g}`;return{label:u[e].label,lang:u[e].htmlLang,to:a,target:"_self",autoAddBaseUrl:!1,className:e===i?t?"menu__link--active":"dropdown__link--active":""}})),...r],b=t?(0,l.T)({message:"Languages",id:"theme.navbar.mobileLanguageDropdown.label",description:"The label for the mobile language switcher dropdown"}):u[i].label;return n.createElement(pe,(0,s.A)({},o,{mobile:t,label:n.createElement(n.Fragment,null,n.createElement(ge,{className:me}),b),items:m}))},search:function(e){let{mobile:t,className:a}=e;return t?null:n.createElement(ze,{className:a},n.createElement(Me,null))},dropdown:pe,html:function(e){let{value:t,className:a,mobile:o=!1,isDropdownItem:i=!1}=e;const s=i?"li":"div";return n.createElement(s,{className:(0,r.A)({navbar__item:!o&&!i,"menu__list-item":o},a),dangerouslySetInnerHTML:{__html:t}})},doc:function(e){let{docId:t,label:a,docsPluginId:r,...o}=e;const{activeDoc:i}=(0,Fe.zK)(r),c=(0,Be.QB)(t,r);return null===c?null:n.createElement(oe,(0,s.A)({exact:!0},o,{isActive:()=>i?.path===c.path||!!i?.sidebar&&i.sidebar===c.sidebar,label:a??c.id,to:c.path}))},docSidebar:function(e){let{sidebarId:t,label:a,docsPluginId:r,...o}=e;const{activeDoc:i}=(0,Fe.zK)(r),c=(0,Be.fW)(t,r).link;if(!c)throw new Error(`DocSidebarNavbarItem: Sidebar with ID "${t}" doesn't have anything to be linked to.`);return n.createElement(oe,(0,s.A)({exact:!0},o,{isActive:()=>i?.sidebar===t,label:a??c.label,to:c.path}))},docsVersion:function(e){let{label:t,to:a,docsPluginId:r,...o}=e;const i=(0,Be.Vd)(r)[0],c=t??i.label,l=a??(e=>e.docs.find((t=>t.id===e.mainDocId)))(i).path;return n.createElement(oe,(0,s.A)({},o,{label:c,to:l}))},docsVersionDropdown:function(e){let{mobile:t,docsPluginId:a,dropdownActiveClassDisabled:r,dropdownItemsBefore:o,dropdownItemsAfter:i,...d}=e;const{search:u,hash:p}=(0,c.zy)(),f=(0,Fe.zK)(a),g=(0,Fe.jh)(a),{savePreferredVersionName:m}=(0,$e.g1)(a),b=[...o,...g.map((e=>{const t=f.alternateDocVersions[e.name]??Ue(e);return{label:e.label,to:`${t.path}${u}${p}`,isActive:()=>e===f.activeVersion,onClick:()=>m(e.name)}})),...i],h=(0,Be.Vd)(a)[0],_=t&&b.length>1?(0,l.T)({id:"theme.navbar.mobileVersionsDropdown.label",message:"Versions",description:"The label for the navbar versions dropdown on mobile view"}):h.label,v=t&&b.length>1?void 0:Ue(h).path;return b.length<=1?n.createElement(oe,(0,s.A)({},d,{mobile:t,label:_,to:v,isActive:r?()=>!1:void 0})):n.createElement(pe,(0,s.A)({},d,{mobile:t,label:_,to:v,items:b,isActive:r?()=>!1:void 0}))}};function qe(e){let{type:t,...a}=e;const r=function(e,t){return e&&"default"!==e?e:"items"in t?"dropdown":"default"}(t,a),o=Qe[r];if(!o)throw new Error(`No NavbarItem component found for type "${t}".`);return n.createElement(o,a)}function He(){const e=(0,L.M)(),t=(0,y.p)().navbar.items;return n.createElement("ul",{className:"menu__list"},t.map(((t,a)=>n.createElement(qe,(0,s.A)({mobile:!0},t,{onClick:()=>e.toggle(),key:a})))))}function Ve(e){return n.createElement("button",(0,s.A)({},e,{type:"button",className:"clean-btn navbar-sidebar__back"}),n.createElement(l.A,{id:"theme.navbar.mobileSidebarSecondaryMenu.backButtonLabel",description:"The label of the back button to return to main menu, inside the mobile navbar sidebar secondary menu (notably used to display the docs sidebar)"},"\u2190 Back to main menu"))}function Ge(){const e=0===(0,y.p)().navbar.items.length,t=M();return n.createElement(n.Fragment,null,!e&&n.createElement(Ve,{onClick:()=>t.hide()}),t.content)}function We(){const e=(0,L.M)();var t;return void 0===(t=e.shown)&&(t=!0),(0,n.useEffect)((()=>(document.body.style.overflow=t?"hidden":"visible",()=>{document.body.style.overflow="visible"})),[t]),e.shouldRender?n.createElement(D,{header:n.createElement(Y,null),primaryMenu:n.createElement(He,null),secondaryMenu:n.createElement(Ge,null)}):null}const Ke={navbarHideable:"navbarHideable_m1mJ",navbarHidden:"navbarHidden_jGov"};function Ye(e){return n.createElement("div",(0,s.A)({role:"presentation"},e,{className:(0,r.A)("navbar-sidebar__backdrop",e.className)}))}function Xe(e){let{children:t}=e;const{navbar:{hideOnScroll:a,style:o}}=(0,y.p)(),i=(0,L.M)(),{navbarRef:s,isNavbarVisible:c}=function(e){const[t,a]=(0,n.useState)(e),r=(0,n.useRef)(!1),o=(0,n.useRef)(0),i=(0,n.useCallback)((e=>{null!==e&&(o.current=e.getBoundingClientRect().height)}),[]);return(0,O.Mq)(((t,n)=>{let{scrollY:i}=t;if(!e)return;if(i=s?a(!1):i+l{if(!e)return;const n=t.location.hash;if(n?document.getElementById(n.substring(1)):void 0)return r.current=!0,void a(!1);a(!0)})),{navbarRef:i,isNavbarVisible:t}}(a);return n.createElement("nav",{ref:s,"aria-label":(0,l.T)({id:"theme.NavBar.navAriaLabel",message:"Main",description:"The ARIA label for the main navigation"}),className:(0,r.A)("navbar","navbar--fixed-top",a&&[Ke.navbarHideable,!c&&Ke.navbarHidden],{"navbar--dark":"dark"===o,"navbar--primary":"primary"===o,"navbar-sidebar--show":i.shown})},t,n.createElement(Ye,{onClick:i.toggle}),n.createElement(We,null))}var Ze=a(70440);const Je={errorBoundaryError:"errorBoundaryError_a6uf"};function et(e){return n.createElement("button",(0,s.A)({type:"button"},e),n.createElement(l.A,{id:"theme.ErrorPageContent.tryAgain",description:"The label of the button to try again rendering when the React error boundary captures an error"},"Try again"))}function tt(e){let{error:t}=e;const a=(0,Ze.getErrorCausalChain)(t).map((e=>e.message)).join("\n\nCause:\n");return n.createElement("p",{className:Je.errorBoundaryError},a)}class at extends n.Component{componentDidCatch(e,t){throw this.props.onError(e,t)}render(){return this.props.children}}const nt="right";function rt(e){let{width:t=30,height:a=30,className:r,...o}=e;return n.createElement("svg",(0,s.A)({className:r,width:t,height:a,viewBox:"0 0 30 30","aria-hidden":"true"},o),n.createElement("path",{stroke:"currentColor",strokeLinecap:"round",strokeMiterlimit:"10",strokeWidth:"2",d:"M4 7h22M4 15h22M4 23h22"}))}function ot(){const{toggle:e,shown:t}=(0,L.M)();return n.createElement("button",{onClick:e,"aria-label":(0,l.T)({id:"theme.docs.sidebar.toggleSidebarButtonAriaLabel",message:"Toggle navigation bar",description:"The ARIA label for hamburger menu button of mobile navigation"}),"aria-expanded":t,className:"navbar__toggle clean-btn",type:"button"},n.createElement(rt,null))}const it={colorModeToggle:"colorModeToggle_DEke"};function st(e){let{items:t}=e;return n.createElement(n.Fragment,null,t.map(((e,t)=>n.createElement(at,{key:t,onError:t=>new Error(`A theme navbar item failed to render.\nPlease double-check the following navbar item (themeConfig.navbar.items) of your Docusaurus config:\n${JSON.stringify(e,null,2)}`,{cause:t})},n.createElement(qe,e)))))}function ct(e){let{left:t,right:a}=e;return n.createElement("div",{className:"navbar__inner"},n.createElement("div",{className:"navbar__items"},t),n.createElement("div",{className:"navbar__items navbar__items--right"},a))}function lt(){const e=(0,L.M)(),t=(0,y.p)().navbar.items,[a,r]=function(e){function t(e){return"left"===(e.position??nt)}return[e.filter(t),e.filter((e=>!t(e)))]}(t),o=t.find((e=>"search"===e.type));return n.createElement(ct,{left:n.createElement(n.Fragment,null,!e.disabled&&n.createElement(ot,null),n.createElement(W,null),n.createElement(st,{items:a})),right:n.createElement(n.Fragment,null,n.createElement(st,{items:r}),n.createElement(V,{className:it.colorModeToggle}),!o&&n.createElement(ze,null,n.createElement(Me,null)))})}function dt(){return n.createElement(Xe,null,n.createElement(lt,null))}function ut(e){let{item:t}=e;const{to:a,href:r,label:o,prependBaseUrlToHref:i,...c}=t,l=(0,Z.A)(a),d=(0,Z.A)(r,{forcePrependBaseUrl:!0});return n.createElement(X.A,(0,s.A)({className:"footer__link-item"},r?{href:i?d:r}:{to:l},c),o,r&&!(0,J.A)(r)&&n.createElement(te.A,null))}function pt(e){let{item:t}=e;return t.html?n.createElement("li",{className:"footer__item",dangerouslySetInnerHTML:{__html:t.html}}):n.createElement("li",{key:t.href??t.to,className:"footer__item"},n.createElement(ut,{item:t}))}function ft(e){let{column:t}=e;return n.createElement("div",{className:"col footer__col"},n.createElement("div",{className:"footer__title"},t.title),n.createElement("ul",{className:"footer__items clean-list"},t.items.map(((e,t)=>n.createElement(pt,{key:t,item:e})))))}function gt(e){let{columns:t}=e;return n.createElement("div",{className:"row footer__links"},t.map(((e,t)=>n.createElement(ft,{key:t,column:e}))))}function mt(){return n.createElement("span",{className:"footer__link-separator"},"\xb7")}function bt(e){let{item:t}=e;return t.html?n.createElement("span",{className:"footer__link-item",dangerouslySetInnerHTML:{__html:t.html}}):n.createElement(ut,{item:t})}function ht(e){let{links:t}=e;return n.createElement("div",{className:"footer__links text--center"},n.createElement("div",{className:"footer__links"},t.map(((e,a)=>n.createElement(n.Fragment,{key:a},n.createElement(bt,{item:e}),t.length!==a+1&&n.createElement(mt,null))))))}function _t(e){let{links:t}=e;return function(e){return"title"in e[0]}(t)?n.createElement(gt,{columns:t}):n.createElement(ht,{links:t})}var vt=a(41653);const yt={footerLogoLink:"footerLogoLink_BH7S"};function St(e){let{logo:t}=e;const{withBaseUrl:a}=(0,Z.h)(),o={light:a(t.src),dark:a(t.srcDark??t.src)};return n.createElement(vt.A,{className:(0,r.A)("footer__logo",t.className),alt:t.alt,sources:o,width:t.width,height:t.height,style:t.style})}function kt(e){let{logo:t}=e;return t.href?n.createElement(X.A,{href:t.href,className:yt.footerLogoLink,target:t.target},n.createElement(St,{logo:t})):n.createElement(St,{logo:t})}function xt(e){let{copyright:t}=e;return n.createElement("div",{className:"footer__copyright",dangerouslySetInnerHTML:{__html:t}})}function wt(e){let{style:t,links:a,logo:o,copyright:i}=e;return n.createElement("footer",{className:(0,r.A)("footer",{"footer--dark":"dark"===t})},n.createElement("div",{className:"container container-fluid"},a,(o||i)&&n.createElement("div",{className:"footer__bottom text--center"},o&&n.createElement("div",{className:"margin-bottom--sm"},o),i)))}function Et(){const{footer:e}=(0,y.p)();if(!e)return null;const{copyright:t,links:a,logo:r,style:o}=e;return n.createElement(wt,{style:o,links:a&&a.length>0&&n.createElement(_t,{links:a}),logo:r&&n.createElement(kt,{logo:r}),copyright:t&&n.createElement(xt,{copyright:t})})}const At=n.memo(Et),Tt=(0,j.fM)([z.a,S.oq,O.Tv,$e.VQ,i.Jx,function(e){let{children:t}=e;return n.createElement(P.y_,null,n.createElement(L.e,null,n.createElement(R,null,t)))}]);function Ct(e){let{children:t}=e;return n.createElement(Tt,null,t)}function Lt(e){let{error:t,tryAgain:a}=e;return n.createElement("main",{className:"container margin-vert--xl"},n.createElement("div",{className:"row"},n.createElement("div",{className:"col col--6 col--offset-3"},n.createElement("h1",{className:"hero__title"},n.createElement(l.A,{id:"theme.ErrorPageContent.title",description:"The title of the fallback page when the page crashed"},"This page crashed.")),n.createElement("div",{className:"margin-vert--lg"},n.createElement(et,{onClick:a,className:"button button--primary shadow--lw"})),n.createElement("hr",null),n.createElement("div",{className:"margin-vert--md"},n.createElement(tt,{error:t})))))}const Ot={mainWrapper:"mainWrapper_z2l0"};function jt(e){const{children:t,noFooter:a,wrapperClassName:s,title:c,description:l}=e;return(0,h.J)(),n.createElement(Ct,null,n.createElement(i.be,{title:c,description:l}),n.createElement(v,null),n.createElement(C,null),n.createElement(dt,null),n.createElement("div",{id:u,className:(0,r.A)(b.G.wrapper.main,Ot.mainWrapper,s)},n.createElement(o.A,{fallback:e=>n.createElement(Lt,e)},t)),!a&&n.createElement(At,null))}},23465:(e,t,a)=>{"use strict";a.d(t,{A:()=>u});var n=a(58168),r=a(96540),o=a(75489),i=a(86025),s=a(44586),c=a(6342),l=a(41653);function d(e){let{logo:t,alt:a,imageClassName:n}=e;const o={light:(0,i.A)(t.src),dark:(0,i.A)(t.srcDark||t.src)},s=r.createElement(l.A,{className:t.className,sources:o,height:t.height,width:t.width,alt:a,style:t.style});return n?r.createElement("div",{className:n},s):s}function u(e){const{siteConfig:{title:t}}=(0,s.A)(),{navbar:{title:a,logo:l}}=(0,c.p)(),{imageClassName:u,titleClassName:p,...f}=e,g=(0,i.A)(l?.href||"/"),m=a?"":t,b=l?.alt??m;return r.createElement(o.A,(0,n.A)({to:g},f,l?.target&&{target:l.target}),l&&r.createElement(d,{logo:l,alt:b,imageClassName:u}),null!=a&&r.createElement("b",{className:p},a))}},41463:(e,t,a)=>{"use strict";a.d(t,{A:()=>o});var n=a(96540),r=a(5260);function o(e){let{locale:t,version:a,tag:o}=e;const i=t;return n.createElement(r.A,null,t&&n.createElement("meta",{name:"docusaurus_locale",content:t}),a&&n.createElement("meta",{name:"docusaurus_version",content:a}),o&&n.createElement("meta",{name:"docusaurus_tag",content:o}),i&&n.createElement("meta",{name:"docsearch:language",content:i}),a&&n.createElement("meta",{name:"docsearch:version",content:a}),o&&n.createElement("meta",{name:"docsearch:docusaurus_tag",content:o}))}},41653:(e,t,a)=>{"use strict";a.d(t,{A:()=>l});var n=a(58168),r=a(96540),o=a(20053),i=a(92303),s=a(95293);const c={themedImage:"themedImage_ToTc","themedImage--light":"themedImage--light_HNdA","themedImage--dark":"themedImage--dark_i4oU"};function l(e){const t=(0,i.A)(),{colorMode:a}=(0,s.G)(),{sources:l,className:d,alt:u,...p}=e,f=t?"dark"===a?["dark"]:["light"]:["light","dark"];return r.createElement(r.Fragment,null,f.map((e=>r.createElement("img",(0,n.A)({key:e,src:l[e],alt:u,className:(0,o.A)(c.themedImage,c[`themedImage--${e}`],d)},p)))))}},41422:(e,t,a)=>{"use strict";a.d(t,{N:()=>b,u:()=>c});var n=a(58168),r=a(96540),o=a(38193),i=a(53109);const s="ease-in-out";function c(e){let{initialState:t}=e;const[a,n]=(0,r.useState)(t??!1),o=(0,r.useCallback)((()=>{n((e=>!e))}),[]);return{collapsed:a,setCollapsed:n,toggleCollapsed:o}}const l={display:"none",overflow:"hidden",height:"0px"},d={display:"block",overflow:"visible",height:"auto"};function u(e,t){const a=t?l:d;e.style.display=a.display,e.style.overflow=a.overflow,e.style.height=a.height}function p(e){let{collapsibleRef:t,collapsed:a,animation:n}=e;const o=(0,r.useRef)(!1);(0,r.useEffect)((()=>{const e=t.current;function r(){const t=e.scrollHeight,a=n?.duration??function(e){if((0,i.O)())return 1;const t=e/36;return Math.round(10*(4+15*t**.25+t/5))}(t);return{transition:`height ${a}ms ${n?.easing??s}`,height:`${t}px`}}function c(){const t=r();e.style.transition=t.transition,e.style.height=t.height}if(!o.current)return u(e,a),void(o.current=!0);return e.style.willChange="height",function(){const t=requestAnimationFrame((()=>{a?(c(),requestAnimationFrame((()=>{e.style.height=l.height,e.style.overflow=l.overflow}))):(e.style.display="block",requestAnimationFrame((()=>{c()})))}));return()=>cancelAnimationFrame(t)}()}),[t,a,n])}function f(e){if(!o.A.canUseDOM)return e?l:d}function g(e){let{as:t="div",collapsed:a,children:n,animation:o,onCollapseTransitionEnd:i,className:s,disableSSRStyle:c}=e;const l=(0,r.useRef)(null);return p({collapsibleRef:l,collapsed:a,animation:o}),r.createElement(t,{ref:l,style:c?void 0:f(a),onTransitionEnd:e=>{"height"===e.propertyName&&(u(l.current,a),i?.(a))},className:s},n)}function m(e){let{collapsed:t,...a}=e;const[o,i]=(0,r.useState)(!t),[s,c]=(0,r.useState)(t);return(0,r.useLayoutEffect)((()=>{t||i(!0)}),[t]),(0,r.useLayoutEffect)((()=>{o&&c(t)}),[o,t]),o?r.createElement(g,(0,n.A)({},a,{collapsed:s})):null}function b(e){let{lazy:t,...a}=e;const n=t?m:g;return r.createElement(n,a)}},65041:(e,t,a)=>{"use strict";a.d(t,{Mj:()=>g,oq:()=>f});var n=a(96540),r=a(92303),o=a(89466),i=a(89532),s=a(6342);const c=(0,o.Wf)("docusaurus.announcement.dismiss"),l=(0,o.Wf)("docusaurus.announcement.id"),d=()=>"true"===c.get(),u=e=>c.set(String(e)),p=n.createContext(null);function f(e){let{children:t}=e;const a=function(){const{announcementBar:e}=(0,s.p)(),t=(0,r.A)(),[a,o]=(0,n.useState)((()=>!!t&&d()));(0,n.useEffect)((()=>{o(d())}),[]);const i=(0,n.useCallback)((()=>{u(!0),o(!0)}),[]);return(0,n.useEffect)((()=>{if(!e)return;const{id:t}=e;let a=l.get();"annoucement-bar"===a&&(a="announcement-bar");const n=t!==a;l.set(t),n&&u(!1),!n&&d()||o(!1)}),[e]),(0,n.useMemo)((()=>({isActive:!!e&&!a,close:i})),[e,a,i])}();return n.createElement(p.Provider,{value:a},t)}function g(){const e=(0,n.useContext)(p);if(!e)throw new i.dV("AnnouncementBarProvider");return e}},95293:(e,t,a)=>{"use strict";a.d(t,{G:()=>b,a:()=>m});var n=a(96540),r=a(38193),o=a(89532),i=a(89466),s=a(6342);const c=n.createContext(void 0),l="theme",d=(0,i.Wf)(l),u={light:"light",dark:"dark"},p=e=>e===u.dark?u.dark:u.light,f=e=>r.A.canUseDOM?p(document.documentElement.getAttribute("data-theme")):p(e),g=e=>{d.set(p(e))};function m(e){let{children:t}=e;const a=function(){const{colorMode:{defaultMode:e,disableSwitch:t,respectPrefersColorScheme:a}}=(0,s.p)(),[r,o]=(0,n.useState)(f(e));(0,n.useEffect)((()=>{t&&d.del()}),[t]);const i=(0,n.useCallback)((function(t,n){void 0===n&&(n={});const{persist:r=!0}=n;t?(o(t),r&&g(t)):(o(a?window.matchMedia("(prefers-color-scheme: dark)").matches?u.dark:u.light:e),d.del())}),[a,e]);(0,n.useEffect)((()=>{document.documentElement.setAttribute("data-theme",p(r))}),[r]),(0,n.useEffect)((()=>{if(t)return;const e=e=>{if(e.key!==l)return;const t=d.get();null!==t&&i(p(t))};return window.addEventListener("storage",e),()=>window.removeEventListener("storage",e)}),[t,i]);const c=(0,n.useRef)(!1);return(0,n.useEffect)((()=>{if(t&&!a)return;const e=window.matchMedia("(prefers-color-scheme: dark)"),n=()=>{window.matchMedia("print").matches||c.current?c.current=window.matchMedia("print").matches:i(null)};return e.addListener(n),()=>e.removeListener(n)}),[i,t,a]),(0,n.useMemo)((()=>({colorMode:r,setColorMode:i,get isDarkTheme(){return r===u.dark},setLightTheme(){i(u.light)},setDarkTheme(){i(u.dark)}})),[r,i])}();return n.createElement(c.Provider,{value:a},t)}function b(){const e=(0,n.useContext)(c);if(null==e)throw new o.dV("ColorModeProvider","Please see https://docusaurus.io/docs/api/themes/configuration#use-color-mode.");return e}},55597:(e,t,a)=>{"use strict";a.d(t,{VQ:()=>b,XK:()=>v,g1:()=>_});var n=a(96540),r=a(44070),o=a(17065),i=a(6342),s=a(84142),c=a(89532),l=a(89466);const d=e=>`docs-preferred-version-${e}`,u={save:(e,t,a)=>{(0,l.Wf)(d(e),{persistence:t}).set(a)},read:(e,t)=>(0,l.Wf)(d(e),{persistence:t}).get(),clear:(e,t)=>{(0,l.Wf)(d(e),{persistence:t}).del()}},p=e=>Object.fromEntries(e.map((e=>[e,{preferredVersionName:null}])));const f=n.createContext(null);function g(){const e=(0,r.Gy)(),t=(0,i.p)().docs.versionPersistence,a=(0,n.useMemo)((()=>Object.keys(e)),[e]),[o,s]=(0,n.useState)((()=>p(a)));(0,n.useEffect)((()=>{s(function(e){let{pluginIds:t,versionPersistence:a,allDocsData:n}=e;function r(e){const t=u.read(e,a);return n[e].versions.some((e=>e.name===t))?{preferredVersionName:t}:(u.clear(e,a),{preferredVersionName:null})}return Object.fromEntries(t.map((e=>[e,r(e)])))}({allDocsData:e,versionPersistence:t,pluginIds:a}))}),[e,t,a]);return[o,(0,n.useMemo)((()=>({savePreferredVersion:function(e,a){u.save(e,t,a),s((t=>({...t,[e]:{preferredVersionName:a}})))}})),[t])]}function m(e){let{children:t}=e;const a=g();return n.createElement(f.Provider,{value:a},t)}function b(e){let{children:t}=e;return s.C5?n.createElement(m,null,t):n.createElement(n.Fragment,null,t)}function h(){const e=(0,n.useContext)(f);if(!e)throw new c.dV("DocsPreferredVersionContextProvider");return e}function _(e){void 0===e&&(e=o.W);const t=(0,r.ht)(e),[a,i]=h(),{preferredVersionName:s}=a[e];return{preferredVersion:t.versions.find((e=>e.name===s))??null,savePreferredVersionName:(0,n.useCallback)((t=>{i.savePreferredVersion(e,t)}),[i,e])}}function v(){const e=(0,r.Gy)(),[t]=h();function a(a){const n=e[a],{preferredVersionName:r}=t[a];return n.versions.find((e=>e.name===r))??null}const n=Object.keys(e);return Object.fromEntries(n.map((e=>[e,a(e)])))}},26588:(e,t,a)=>{"use strict";a.d(t,{V:()=>s,t:()=>c});var n=a(96540),r=a(89532);const o=Symbol("EmptyContext"),i=n.createContext(o);function s(e){let{children:t,name:a,items:r}=e;const o=(0,n.useMemo)((()=>a&&r?{name:a,items:r}:null),[a,r]);return n.createElement(i.Provider,{value:o},t)}function c(){const e=(0,n.useContext)(i);if(e===o)throw new r.dV("DocsSidebarProvider");return e}},32252:(e,t,a)=>{"use strict";a.d(t,{n:()=>i,r:()=>s});var n=a(96540),r=a(89532);const o=n.createContext(null);function i(e){let{children:t,version:a}=e;return n.createElement(o.Provider,{value:a},t)}function s(){const e=(0,n.useContext)(o);if(null===e)throw new r.dV("DocsVersionProvider");return e}},22069:(e,t,a)=>{"use strict";a.d(t,{M:()=>p,e:()=>u});var n=a(96540),r=a(75600),o=a(24581),i=a(57485),s=a(6342),c=a(89532);const l=n.createContext(void 0);function d(){const e=function(){const e=(0,r.YL)(),{items:t}=(0,s.p)().navbar;return 0===t.length&&!e.component}(),t=(0,o.l)(),a=!e&&"mobile"===t,[c,l]=(0,n.useState)(!1);(0,i.$Z)((()=>{if(c)return l(!1),!1}));const d=(0,n.useCallback)((()=>{l((e=>!e))}),[]);return(0,n.useEffect)((()=>{"desktop"===t&&l(!1)}),[t]),(0,n.useMemo)((()=>({disabled:e,shouldRender:a,toggle:d,shown:c})),[e,a,d,c])}function u(e){let{children:t}=e;const a=d();return n.createElement(l.Provider,{value:a},t)}function p(){const e=n.useContext(l);if(void 0===e)throw new c.dV("NavbarMobileSidebarProvider");return e}},75600:(e,t,a)=>{"use strict";a.d(t,{GX:()=>c,YL:()=>s,y_:()=>i});var n=a(96540),r=a(89532);const o=n.createContext(null);function i(e){let{children:t}=e;const a=(0,n.useState)({component:null,props:null});return n.createElement(o.Provider,{value:a},t)}function s(){const e=(0,n.useContext)(o);if(!e)throw new r.dV("NavbarSecondaryMenuContentProvider");return e[0]}function c(e){let{component:t,props:a}=e;const i=(0,n.useContext)(o);if(!i)throw new r.dV("NavbarSecondaryMenuContentProvider");const[,s]=i,c=(0,r.Be)(a);return(0,n.useEffect)((()=>{s({component:t,props:c})}),[s,t,c]),(0,n.useEffect)((()=>()=>s({component:null,props:null})),[s]),null}},14090:(e,t,a)=>{"use strict";a.d(t,{w:()=>r,J:()=>o});var n=a(96540);const r="navigation-with-keyboard";function o(){(0,n.useEffect)((()=>{function e(e){"keydown"===e.type&&"Tab"===e.key&&document.body.classList.add(r),"mousedown"===e.type&&document.body.classList.remove(r)}return document.addEventListener("keydown",e),document.addEventListener("mousedown",e),()=>{document.body.classList.remove(r),document.removeEventListener("keydown",e),document.removeEventListener("mousedown",e)}}),[])}},24255:(e,t,a)=>{"use strict";a.d(t,{b:()=>s,w:()=>c});var n=a(96540),r=a(44586),o=a(57485);const i="q";function s(){return(0,o.l)(i)}function c(){const{siteConfig:{baseUrl:e,themeConfig:t}}=(0,r.A)(),{algolia:{searchPagePath:a}}=t;return(0,n.useCallback)((t=>`${e}${a}?${i}=${encodeURIComponent(t)}`),[e,a])}},24581:(e,t,a)=>{"use strict";a.d(t,{l:()=>l});var n=a(96540),r=a(38193);const o={desktop:"desktop",mobile:"mobile",ssr:"ssr"},i=996;function s(){return r.A.canUseDOM?window.innerWidth>i?o.desktop:o.mobile:o.ssr}const c=!1;function l(){const[e,t]=(0,n.useState)((()=>c?"ssr":s()));return(0,n.useEffect)((()=>{function e(){t(s())}const a=c?window.setTimeout(e,1e3):void 0;return window.addEventListener("resize",e),()=>{window.removeEventListener("resize",e),clearTimeout(a)}}),[]),e}},17559:(e,t,a)=>{"use strict";a.d(t,{G:()=>n});const n={page:{blogListPage:"blog-list-page",blogPostPage:"blog-post-page",blogTagsListPage:"blog-tags-list-page",blogTagPostListPage:"blog-tags-post-list-page",docsDocPage:"docs-doc-page",docsTagsListPage:"docs-tags-list-page",docsTagDocListPage:"docs-tags-doc-list-page",mdxPage:"mdx-page"},wrapper:{main:"main-wrapper",blogPages:"blog-wrapper",docsPages:"docs-wrapper",mdxPages:"mdx-wrapper"},common:{editThisPage:"theme-edit-this-page",lastUpdated:"theme-last-updated",backToTopButton:"theme-back-to-top-button",codeBlock:"theme-code-block",admonition:"theme-admonition",admonitionType:e=>`theme-admonition-${e}`},layout:{},docs:{docVersionBanner:"theme-doc-version-banner",docVersionBadge:"theme-doc-version-badge",docBreadcrumbs:"theme-doc-breadcrumbs",docMarkdown:"theme-doc-markdown",docTocMobile:"theme-doc-toc-mobile",docTocDesktop:"theme-doc-toc-desktop",docFooter:"theme-doc-footer",docFooterTagsRow:"theme-doc-footer-tags-row",docFooterEditMetaRow:"theme-doc-footer-edit-meta-row",docSidebarContainer:"theme-doc-sidebar-container",docSidebarMenu:"theme-doc-sidebar-menu",docSidebarItemCategory:"theme-doc-sidebar-item-category",docSidebarItemLink:"theme-doc-sidebar-item-link",docSidebarItemCategoryLevel:e=>`theme-doc-sidebar-item-category-level-${e}`,docSidebarItemLinkLevel:e=>`theme-doc-sidebar-item-link-level-${e}`},blog:{}}},53109:(e,t,a)=>{"use strict";function n(){return window.matchMedia("(prefers-reduced-motion: reduce)").matches}a.d(t,{O:()=>n})},84142:(e,t,a)=>{"use strict";a.d(t,{$S:()=>m,C5:()=>p,OF:()=>y,QB:()=>x,Vd:()=>S,_o:()=>g,cC:()=>f,d1:()=>E,fW:()=>k,mz:()=>w,w8:()=>_});var n=a(96540),r=a(56347),o=a(22831),i=a(44070),s=a(55597),c=a(32252),l=a(26588),d=a(31682),u=a(99169);const p=!!i.Gy;function f(e){const t=(0,c.r)();if(!e)return;const a=t.docs[e];if(!a)throw new Error(`no version doc found by id=${e}`);return a}function g(e){if(e.href)return e.href;for(const t of e.items){if("link"===t.type)return t.href;if("category"===t.type){const e=g(t);if(e)return e}}}function m(){const{pathname:e}=(0,r.zy)(),t=(0,l.t)();if(!t)throw new Error("Unexpected: cant find current sidebar in context");const a=v({sidebarItems:t.items,pathname:e,onlyCategories:!0}).slice(-1)[0];if(!a)throw new Error(`${e} is not associated with a category. useCurrentSidebarCategory() should only be used on category index pages.`);return a}const b=(e,t)=>void 0!==e&&(0,u.ys)(e,t),h=(e,t)=>e.some((e=>_(e,t)));function _(e,t){return"link"===e.type?b(e.href,t):"category"===e.type&&(b(e.href,t)||h(e.items,t))}function v(e){let{sidebarItems:t,pathname:a,onlyCategories:n=!1}=e;const r=[];return function e(t){for(const o of t)if("category"===o.type&&((0,u.ys)(o.href,a)||e(o.items))||"link"===o.type&&(0,u.ys)(o.href,a)){return n&&"category"!==o.type||r.unshift(o),!0}return!1}(t),r}function y(){const e=(0,l.t)(),{pathname:t}=(0,r.zy)(),a=(0,i.vT)()?.pluginData.breadcrumbs;return!1!==a&&e?v({sidebarItems:e.items,pathname:t}):null}function S(e){const{activeVersion:t}=(0,i.zK)(e),{preferredVersion:a}=(0,s.g1)(e),r=(0,i.r7)(e);return(0,n.useMemo)((()=>(0,d.s)([t,a,r].filter(Boolean))),[t,a,r])}function k(e,t){const a=S(t);return(0,n.useMemo)((()=>{const t=a.flatMap((e=>e.sidebars?Object.entries(e.sidebars):[])),n=t.find((t=>t[0]===e));if(!n)throw new Error(`Can't find any sidebar with id "${e}" in version${a.length>1?"s":""} ${a.map((e=>e.name)).join(", ")}".\nAvailable sidebar ids are:\n- ${t.map((e=>e[0])).join("\n- ")}`);return n[1]}),[e,a])}function x(e,t){const a=S(t);return(0,n.useMemo)((()=>{const t=a.flatMap((e=>e.docs)),n=t.find((t=>t.id===e));if(!n){if(a.flatMap((e=>e.draftIds)).includes(e))return null;throw new Error(`Couldn't find any doc with id "${e}" in version${a.length>1?"s":""} "${a.map((e=>e.name)).join(", ")}".\nAvailable doc ids are:\n- ${(0,d.s)(t.map((e=>e.id))).join("\n- ")}`)}return n}),[e,a])}function w(e){let{route:t,versionMetadata:a}=e;const n=(0,r.zy)(),i=t.routes,s=i.find((e=>(0,r.B6)(n.pathname,e)));if(!s)return null;const c=s.sidebar,l=c?a.docsSidebars[c]:void 0;return{docElement:(0,o.v)(i),sidebarName:c,sidebarItems:l}}function E(e){return e.filter((e=>"category"!==e.type||!!g(e)))}},20481:(e,t,a)=>{"use strict";a.d(t,{s:()=>r});var n=a(44586);function r(e){const{siteConfig:t}=(0,n.A)(),{title:a,titleDelimiter:r}=t;return e?.trim().length?`${e.trim()} ${r} ${a}`:a}},57485:(e,t,a)=>{"use strict";a.d(t,{$Z:()=>s,aZ:()=>c,l:()=>l});var n=a(96540),r=a(56347),o=a(19888),i=a(89532);function s(e){!function(e){const t=(0,r.W6)(),a=(0,i._q)(e);(0,n.useEffect)((()=>t.block(((e,t)=>a(e,t)))),[t,a])}(((t,a)=>{if("POP"===a)return e(t,a)}))}function c(e){return function(e){const t=(0,r.W6)();return(0,o.useSyncExternalStore)(t.listen,(()=>e(t)),(()=>e(t)))}((t=>null===e?null:new URLSearchParams(t.location.search).get(e)))}function l(e){const t=c(e)??"",a=function(){const e=(0,r.W6)();return(0,n.useCallback)(((t,a,n)=>{const r=new URLSearchParams(e.location.search);a?r.set(t,a):r.delete(t),(n?.push?e.push:e.replace)({search:r.toString()})}),[e])}();return[t,(0,n.useCallback)(((t,n)=>{a(e,t,n)}),[a,e])]}},31682:(e,t,a)=>{"use strict";function n(e,t){return void 0===t&&(t=(e,t)=>e===t),e.filter(((a,n)=>e.findIndex((e=>t(e,a)))!==n))}function r(e){return Array.from(new Set(e))}a.d(t,{X:()=>n,s:()=>r})},69024:(e,t,a)=>{"use strict";a.d(t,{e3:()=>p,be:()=>d,Jx:()=>f});var n=a(96540),r=a(20053),o=a(5260),i=a(53102);function s(){const e=n.useContext(i.o);if(!e)throw new Error("Unexpected: no Docusaurus route context found");return e}var c=a(86025),l=a(20481);function d(e){let{title:t,description:a,keywords:r,image:i,children:s}=e;const d=(0,l.s)(t),{withBaseUrl:u}=(0,c.h)(),p=i?u(i,{absolute:!0}):void 0;return n.createElement(o.A,null,t&&n.createElement("title",null,d),t&&n.createElement("meta",{property:"og:title",content:d}),a&&n.createElement("meta",{name:"description",content:a}),a&&n.createElement("meta",{property:"og:description",content:a}),r&&n.createElement("meta",{name:"keywords",content:Array.isArray(r)?r.join(","):r}),p&&n.createElement("meta",{property:"og:image",content:p}),p&&n.createElement("meta",{name:"twitter:image",content:p}),s)}const u=n.createContext(void 0);function p(e){let{className:t,children:a}=e;const i=n.useContext(u),s=(0,r.A)(i,t);return n.createElement(u.Provider,{value:s},n.createElement(o.A,null,n.createElement("html",{className:s})),a)}function f(e){let{children:t}=e;const a=s(),o=`plugin-${a.plugin.name.replace(/docusaurus-(?:plugin|theme)-(?:content-)?/gi,"")}`;const i=`plugin-id-${a.plugin.id}`;return n.createElement(p,{className:(0,r.A)(o,i)},t)}},89532:(e,t,a)=>{"use strict";a.d(t,{Be:()=>c,ZC:()=>i,_q:()=>o,dV:()=>s,fM:()=>l});var n=a(96540);const r=a(38193).A.canUseDOM?n.useLayoutEffect:n.useEffect;function o(e){const t=(0,n.useRef)(e);return r((()=>{t.current=e}),[e]),(0,n.useCallback)((function(){return t.current(...arguments)}),[])}function i(e){const t=(0,n.useRef)();return r((()=>{t.current=e})),t.current}class s extends Error{constructor(e,t){super(),this.name="ReactContextError",this.message=`Hook ${this.stack?.split("\n")[1]?.match(/at (?:\w+\.)?(?\w+)/)?.groups.name??""} is called outside the <${e}>. ${t??""}`}}function c(e){const t=Object.entries(e);return t.sort(((e,t)=>e[0].localeCompare(t[0]))),(0,n.useMemo)((()=>e),t.flat())}function l(e){return t=>{let{children:a}=t;return n.createElement(n.Fragment,null,e.reduceRight(((e,t)=>n.createElement(t,null,e)),a))}}},91252:(e,t,a)=>{"use strict";function n(e,t){return void 0!==e&&void 0!==t&&new RegExp(e,"gi").test(t)}a.d(t,{G:()=>n})},99169:(e,t,a)=>{"use strict";a.d(t,{Dt:()=>s,ys:()=>i});var n=a(96540),r=a(35947),o=a(44586);function i(e,t){const a=e=>(!e||e.endsWith("/")?e:`${e}/`)?.toLowerCase();return a(e)===a(t)}function s(){const{baseUrl:e}=(0,o.A)().siteConfig;return(0,n.useMemo)((()=>function(e){let{baseUrl:t,routes:a}=e;function n(e){return e.path===t&&!0===e.exact}function r(e){return e.path===t&&!e.exact}return function e(t){if(0===t.length)return;return t.find(n)||e(t.filter(r).flatMap((e=>e.routes??[])))}(a)}({routes:r.A,baseUrl:e})),[e])}},23104:(e,t,a)=>{"use strict";a.d(t,{Mq:()=>u,Tv:()=>c,a_:()=>p,gk:()=>f});var n=a(96540),r=a(38193),o=a(92303),i=a(89532);const s=n.createContext(void 0);function c(e){let{children:t}=e;const a=function(){const e=(0,n.useRef)(!0);return(0,n.useMemo)((()=>({scrollEventsEnabledRef:e,enableScrollEvents:()=>{e.current=!0},disableScrollEvents:()=>{e.current=!1}})),[])}();return n.createElement(s.Provider,{value:a},t)}function l(){const e=(0,n.useContext)(s);if(null==e)throw new i.dV("ScrollControllerProvider");return e}const d=()=>r.A.canUseDOM?{scrollX:window.pageXOffset,scrollY:window.pageYOffset}:null;function u(e,t){void 0===t&&(t=[]);const{scrollEventsEnabledRef:a}=l(),r=(0,n.useRef)(d()),o=(0,i._q)(e);(0,n.useEffect)((()=>{const e=()=>{if(!a.current)return;const e=d();o(e,r.current),r.current=e},t={passive:!0};return e(),window.addEventListener("scroll",e,t),()=>window.removeEventListener("scroll",e,t)}),[o,a,...t])}function p(){const e=l(),t=function(){const e=(0,n.useRef)({elem:null,top:0}),t=(0,n.useCallback)((t=>{e.current={elem:t,top:t.getBoundingClientRect().top}}),[]),a=(0,n.useCallback)((()=>{const{current:{elem:t,top:a}}=e;if(!t)return{restored:!1};const n=t.getBoundingClientRect().top-a;return n&&window.scrollBy({left:0,top:n}),e.current={elem:null,top:0},{restored:0!==n}}),[]);return(0,n.useMemo)((()=>({save:t,restore:a})),[a,t])}(),a=(0,n.useRef)(void 0),r=(0,n.useCallback)((n=>{t.save(n),e.disableScrollEvents(),a.current=()=>{const{restored:n}=t.restore();if(a.current=void 0,n){const t=()=>{e.enableScrollEvents(),window.removeEventListener("scroll",t)};window.addEventListener("scroll",t)}else e.enableScrollEvents()}}),[e,t]);return(0,n.useLayoutEffect)((()=>{queueMicrotask((()=>a.current?.()))})),{blockElementScrollPositionUntilNextRender:r}}function f(){const e=(0,n.useRef)(null),t=(0,o.A)()&&"smooth"===getComputedStyle(document.documentElement).scrollBehavior;return{startScroll:a=>{e.current=t?function(e){return window.scrollTo({top:e,behavior:"smooth"}),()=>{}}(a):function(e){let t=null;const a=document.documentElement.scrollTop>e;return function n(){const r=document.documentElement.scrollTop;(a&&r>e||!a&&rt&&cancelAnimationFrame(t)}(a)},cancelScroll:()=>e.current?.()}}},2967:(e,t,a)=>{"use strict";a.d(t,{Cy:()=>i,af:()=>c,tU:()=>s});var n=a(44070),r=a(44586),o=a(55597);const i="default";function s(e,t){return`docs-${e}-${t}`}function c(){const{i18n:e}=(0,r.A)(),t=(0,n.Gy)(),a=(0,n.gk)(),c=(0,o.XK)();const l=[i,...Object.keys(t).map((function(e){const n=a?.activePlugin.pluginId===e?a.activeVersion:void 0,r=c[e],o=t[e].versions.find((e=>e.isLast));return s(e,(n??r??o).name)}))];return{locale:e.currentLocale,tags:l}}},89466:(e,t,a)=>{"use strict";a.d(t,{Dv:()=>u,Wf:()=>d});var n=a(96540),r=a(19888);const o="localStorage";function i(e){let{key:t,oldValue:a,newValue:n,storage:r}=e;if(a===n)return;const o=document.createEvent("StorageEvent");o.initStorageEvent("storage",!1,!1,t,a,n,window.location.href,r),window.dispatchEvent(o)}function s(e){if(void 0===e&&(e=o),"undefined"==typeof window)throw new Error("Browser storage is not available on Node.js/Docusaurus SSR process.");if("none"===e)return null;try{return window[e]}catch(a){return t=a,c||(console.warn("Docusaurus browser storage is not available.\nPossible reasons: running Docusaurus in an iframe, in an incognito browser session, or using too strict browser privacy settings.",t),c=!0),null}var t}let c=!1;const l={get:()=>null,set:()=>{},del:()=>{},listen:()=>()=>{}};function d(e,t){if("undefined"==typeof window)return function(e){function t(){throw new Error(`Illegal storage API usage for storage key "${e}".\nDocusaurus storage APIs are not supposed to be called on the server-rendering process.\nPlease only call storage APIs in effects and event handlers.`)}return{get:t,set:t,del:t,listen:t}}(e);const a=s(t?.persistence);return null===a?l:{get:()=>{try{return a.getItem(e)}catch(t){return console.error(`Docusaurus storage error, can't get key=${e}`,t),null}},set:t=>{try{const n=a.getItem(e);a.setItem(e,t),i({key:e,oldValue:n,newValue:t,storage:a})}catch(n){console.error(`Docusaurus storage error, can't set ${e}=${t}`,n)}},del:()=>{try{const t=a.getItem(e);a.removeItem(e),i({key:e,oldValue:t,newValue:null,storage:a})}catch(t){console.error(`Docusaurus storage error, can't delete key=${e}`,t)}},listen:t=>{try{const n=n=>{n.storageArea===a&&n.key===e&&t(n)};return window.addEventListener("storage",n),()=>window.removeEventListener("storage",n)}catch(n){return console.error(`Docusaurus storage error, can't listen for changes of key=${e}`,n),()=>{}}}}}function u(e,t){const a=(0,n.useRef)((()=>null===e?l:d(e,t))).current(),o=(0,n.useCallback)((e=>"undefined"==typeof window?()=>{}:a.listen(e)),[a]);return[(0,r.useSyncExternalStore)(o,(()=>"undefined"==typeof window?null:a.get()),(()=>null)),a]}},32131:(e,t,a)=>{"use strict";a.d(t,{o:()=>i});var n=a(44586),r=a(56347),o=a(70440);function i(){const{siteConfig:{baseUrl:e,url:t,trailingSlash:a},i18n:{defaultLocale:i,currentLocale:s}}=(0,n.A)(),{pathname:c}=(0,r.zy)(),l=(0,o.applyTrailingSlash)(c,{trailingSlash:a,baseUrl:e}),d=s===i?e:e.replace(`/${s}/`,"/"),u=l.replace(e,"");return{createUrl:function(e){let{locale:a,fullyQualified:n}=e;return`${n?t:""}${function(e){return e===i?`${d}`:`${d}${e}/`}(a)}${u}`}}}},75062:(e,t,a)=>{"use strict";a.d(t,{$:()=>i});var n=a(96540),r=a(56347),o=a(89532);function i(e){const t=(0,r.zy)(),a=(0,o.ZC)(t),i=(0,o._q)(e);(0,n.useEffect)((()=>{a&&t!==a&&i({location:t,previousLocation:a})}),[i,t,a])}},6342:(e,t,a)=>{"use strict";a.d(t,{p:()=>r});var n=a(44586);function r(){return(0,n.A)().siteConfig.themeConfig}},38126:(e,t,a)=>{"use strict";a.d(t,{c:()=>r});var n=a(44586);function r(){const{siteConfig:{themeConfig:e}}=(0,n.A)();return e}},51062:(e,t,a)=>{"use strict";a.d(t,{C:()=>s});var n=a(96540),r=a(91252),o=a(86025),i=a(38126);function s(){const{withBaseUrl:e}=(0,o.h)(),{algolia:{externalUrlRegex:t,replaceSearchResultPathname:a}}=(0,i.c)();return(0,n.useCallback)((n=>{const o=new URL(n);if((0,r.G)(t,o.href))return n;const i=`${o.pathname+o.hash}`;return e(function(e,t){return t?e.replaceAll(new RegExp(t.from,"g"),t.to):e}(i,a))}),[e,t,a])}},12983:(e,t)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.default=function(e,t){const{trailingSlash:a,baseUrl:n}=t;if(e.startsWith("#"))return e;if(void 0===a)return e;const[r]=e.split(/[#?]/),o="/"===r||r===n?r:(i=r,a?function(e){return e.endsWith("/")?e:`${e}/`}(i):function(e){return e.endsWith("/")?e.slice(0,-1):e}(i));var i;return e.replace(r,o)}},80253:(e,t)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.getErrorCausalChain=void 0,t.getErrorCausalChain=function e(t){return t.cause?[t,...e(t.cause)]:[t]}},70440:function(e,t,a){"use strict";var n=this&&this.__importDefault||function(e){return e&&e.__esModule?e:{default:e}};Object.defineProperty(t,"__esModule",{value:!0}),t.getErrorCausalChain=t.applyTrailingSlash=t.blogPostContainerID=void 0,t.blogPostContainerID="__blog-post-container";var r=a(12983);Object.defineProperty(t,"applyTrailingSlash",{enumerable:!0,get:function(){return n(r).default}});var o=a(80253);Object.defineProperty(t,"getErrorCausalChain",{enumerable:!0,get:function(){return o.getErrorCausalChain}})},71609:(e,t,a)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0});const n=a(31635),r=n.__importDefault(a(4784)),o=n.__importDefault(a(79739)),{themeConfig:i}=r.default;function s(e){var t,a;return document.querySelector('html[data-theme="dark"]')?(null===(t=e.background)||void 0===t?void 0:t.dark)||"rgb(50, 50, 50)":(null===(a=e.background)||void 0===a?void 0:a.light)||"rgb(255, 255, 255)"}t.default=function(){if("undefined"==typeof window)return null;let e;const{zoom:t}=i,{selector:a=".markdown img",config:n={}}=t||{};if(!t)return null;n.background=s(t);var r=new MutationObserver((function(){e&&e.update({background:s(t)})}));const c=document.querySelector("html");return r.observe(c,{attributes:!0,attributeFilter:["data-theme"]}),setTimeout((()=>{e&&e.detach(),e=(0,o.default)(a,n)}),1e3),{onRouteUpdate(){setTimeout((()=>{e&&e.detach(),e=(0,o.default)(a,n)}),1e3)}}}()},20053:(e,t,a)=>{"use strict";function n(e){var t,a,r="";if("string"==typeof e||"number"==typeof e)r+=e;else if("object"==typeof e)if(Array.isArray(e))for(t=0;tr});const r=function(){for(var e,t,a=0,r="";a{"use strict";a.d(t,{zR:()=>y,TM:()=>A,yJ:()=>f,sC:()=>C,AO:()=>p});var n=a(58168);function r(e){return"/"===e.charAt(0)}function o(e,t){for(var a=t,n=a+1,r=e.length;n=0;p--){var f=i[p];"."===f?o(i,p):".."===f?(o(i,p),u++):u&&(o(i,p),u--)}if(!l)for(;u--;u)i.unshift("..");!l||""===i[0]||i[0]&&r(i[0])||i.unshift("");var g=i.join("/");return a&&"/"!==g.substr(-1)&&(g+="/"),g};var s=a(11561);function c(e){return"/"===e.charAt(0)?e:"/"+e}function l(e){return"/"===e.charAt(0)?e.substr(1):e}function d(e,t){return function(e,t){return 0===e.toLowerCase().indexOf(t.toLowerCase())&&-1!=="/?#".indexOf(e.charAt(t.length))}(e,t)?e.substr(t.length):e}function u(e){return"/"===e.charAt(e.length-1)?e.slice(0,-1):e}function p(e){var t=e.pathname,a=e.search,n=e.hash,r=t||"/";return a&&"?"!==a&&(r+="?"===a.charAt(0)?a:"?"+a),n&&"#"!==n&&(r+="#"===n.charAt(0)?n:"#"+n),r}function f(e,t,a,r){var o;"string"==typeof e?(o=function(e){var t=e||"/",a="",n="",r=t.indexOf("#");-1!==r&&(n=t.substr(r),t=t.substr(0,r));var o=t.indexOf("?");return-1!==o&&(a=t.substr(o),t=t.substr(0,o)),{pathname:t,search:"?"===a?"":a,hash:"#"===n?"":n}}(e),o.state=t):(void 0===(o=(0,n.A)({},e)).pathname&&(o.pathname=""),o.search?"?"!==o.search.charAt(0)&&(o.search="?"+o.search):o.search="",o.hash?"#"!==o.hash.charAt(0)&&(o.hash="#"+o.hash):o.hash="",void 0!==t&&void 0===o.state&&(o.state=t));try{o.pathname=decodeURI(o.pathname)}catch(s){throw s instanceof URIError?new URIError('Pathname "'+o.pathname+'" could not be decoded. This is likely caused by an invalid percent-encoding.'):s}return a&&(o.key=a),r?o.pathname?"/"!==o.pathname.charAt(0)&&(o.pathname=i(o.pathname,r.pathname)):o.pathname=r.pathname:o.pathname||(o.pathname="/"),o}function g(){var e=null;var t=[];return{setPrompt:function(t){return e=t,function(){e===t&&(e=null)}},confirmTransitionTo:function(t,a,n,r){if(null!=e){var o="function"==typeof e?e(t,a):e;"string"==typeof o?"function"==typeof n?n(o,r):r(!0):r(!1!==o)}else r(!0)},appendListener:function(e){var a=!0;function n(){a&&e.apply(void 0,arguments)}return t.push(n),function(){a=!1,t=t.filter((function(e){return e!==n}))}},notifyListeners:function(){for(var e=arguments.length,a=new Array(e),n=0;nt?a.splice(t,a.length-t,r):a.push(r),u({action:n,location:r,index:t,entries:a})}}))},replace:function(e,t){var n="REPLACE",r=f(e,t,m(),y.location);d.confirmTransitionTo(r,n,a,(function(e){e&&(y.entries[y.index]=r,u({action:n,location:r}))}))},go:v,goBack:function(){v(-1)},goForward:function(){v(1)},canGo:function(e){var t=y.index+e;return t>=0&&t{"use strict";var n=a(44363),r={childContextTypes:!0,contextType:!0,contextTypes:!0,defaultProps:!0,displayName:!0,getDefaultProps:!0,getDerivedStateFromError:!0,getDerivedStateFromProps:!0,mixins:!0,propTypes:!0,type:!0},o={name:!0,length:!0,prototype:!0,caller:!0,callee:!0,arguments:!0,arity:!0},i={$$typeof:!0,compare:!0,defaultProps:!0,displayName:!0,propTypes:!0,type:!0},s={};function c(e){return n.isMemo(e)?i:s[e.$$typeof]||r}s[n.ForwardRef]={$$typeof:!0,render:!0,defaultProps:!0,displayName:!0,propTypes:!0},s[n.Memo]=i;var l=Object.defineProperty,d=Object.getOwnPropertyNames,u=Object.getOwnPropertySymbols,p=Object.getOwnPropertyDescriptor,f=Object.getPrototypeOf,g=Object.prototype;e.exports=function e(t,a,n){if("string"!=typeof a){if(g){var r=f(a);r&&r!==g&&e(t,r,n)}var i=d(a);u&&(i=i.concat(u(a)));for(var s=c(t),m=c(a),b=0;b{"use strict";e.exports=function(e,t,a,n,r,o,i,s){if(!e){var c;if(void 0===t)c=new Error("Minified exception occurred; use the non-minified dev environment for the full error message and additional helpful warnings.");else{var l=[a,n,r,o,i,s],d=0;(c=new Error(t.replace(/%s/g,(function(){return l[d++]})))).name="Invariant Violation"}throw c.framesToPop=1,c}}},64634:e=>{e.exports=Array.isArray||function(e){return"[object Array]"==Object.prototype.toString.call(e)}},79739:(e,t,a)=>{"use strict";a.r(t),a.d(t,{default:()=>l});var n=Object.assign||function(e){for(var t=1;t1&&void 0!==arguments[1]?arguments[1]:{},r=window.Promise||function(e){function t(){}e(t,t)},l=function(){for(var e=arguments.length,t=Array(e),a=0;a0&&void 0!==arguments[0]?arguments[0]:{}).target,t=function(){var e={width:document.documentElement.clientWidth,height:document.documentElement.clientHeight,left:0,top:0,right:0,bottom:0},t=void 0,a=void 0;if(h.container)if(h.container instanceof Object)t=(e=n({},e,h.container)).width-e.left-e.right-2*h.margin,a=e.height-e.top-e.bottom-2*h.margin;else{var r=(o(h.container)?h.container:document.querySelector(h.container)).getBoundingClientRect(),s=r.width,c=r.height,l=r.left,d=r.top;e=n({},e,{width:s,height:c,left:l,top:d})}t=t||e.width-2*h.margin,a=a||e.height-2*h.margin;var u=_.zoomedHd||_.original,p=i(u)?t:u.naturalWidth||t,f=i(u)?a:u.naturalHeight||a,g=u.getBoundingClientRect(),m=g.top,b=g.left,v=g.width,y=g.height,S=Math.min(Math.max(v,p),t)/v,k=Math.min(Math.max(y,f),a)/y,x=Math.min(S,k),w="scale("+x+") translate3d("+((t-v)/2-b+h.margin+e.left)/x+"px, "+((a-y)/2-m+h.margin+e.top)/x+"px, 0)";_.zoomed.style.transform=w,_.zoomedHd&&(_.zoomedHd.style.transform=w)};return new r((function(a){if(e&&-1===f.indexOf(e))a(y);else{if(_.zoomed)a(y);else{if(e)_.original=e;else{if(!(f.length>0))return void a(y);var n=f;_.original=n[0]}if(_.original.dispatchEvent(c("medium-zoom:open",{detail:{zoom:y}})),b=window.pageYOffset||document.documentElement.scrollTop||document.body.scrollTop||0,m=!0,_.zoomed=function(e){var t=e.getBoundingClientRect(),a=t.top,n=t.left,r=t.width,o=t.height,i=e.cloneNode(),s=window.pageYOffset||document.documentElement.scrollTop||document.body.scrollTop||0,c=window.pageXOffset||document.documentElement.scrollLeft||document.body.scrollLeft||0;return i.removeAttribute("id"),i.style.position="absolute",i.style.top=a+s+"px",i.style.left=n+c+"px",i.style.width=r+"px",i.style.height=o+"px",i.style.transform="",i}(_.original),document.body.appendChild(v),h.template){var r=o(h.template)?h.template:document.querySelector(h.template);_.template=document.createElement("div"),_.template.appendChild(r.content.cloneNode(!0)),document.body.appendChild(_.template)}if(_.original.parentElement&&"PICTURE"===_.original.parentElement.tagName&&_.original.currentSrc&&(_.zoomed.src=_.original.currentSrc),document.body.appendChild(_.zoomed),window.requestAnimationFrame((function(){document.body.classList.add("medium-zoom--opened")})),_.original.classList.add("medium-zoom-image--hidden"),_.zoomed.classList.add("medium-zoom-image--opened"),_.zoomed.addEventListener("click",u),_.zoomed.addEventListener("transitionend",(function e(){m=!1,_.zoomed.removeEventListener("transitionend",e),_.original.dispatchEvent(c("medium-zoom:opened",{detail:{zoom:y}})),a(y)})),_.original.getAttribute("data-zoom-src")){_.zoomedHd=_.zoomed.cloneNode(),_.zoomedHd.removeAttribute("srcset"),_.zoomedHd.removeAttribute("sizes"),_.zoomedHd.removeAttribute("loading"),_.zoomedHd.src=_.zoomed.getAttribute("data-zoom-src"),_.zoomedHd.onerror=function(){clearInterval(i),console.warn("Unable to reach the zoom image target "+_.zoomedHd.src),_.zoomedHd=null,t()};var i=setInterval((function(){_.zoomedHd.complete&&(clearInterval(i),_.zoomedHd.classList.add("medium-zoom-image--opened"),_.zoomedHd.addEventListener("click",u),document.body.appendChild(_.zoomedHd),t())}),10)}else if(_.original.hasAttribute("srcset")){_.zoomedHd=_.zoomed.cloneNode(),_.zoomedHd.removeAttribute("sizes"),_.zoomedHd.removeAttribute("loading");var s=_.zoomedHd.addEventListener("load",(function(){_.zoomedHd.removeEventListener("load",s),_.zoomedHd.classList.add("medium-zoom-image--opened"),_.zoomedHd.addEventListener("click",u),document.body.appendChild(_.zoomedHd),t()}))}else t()}}}))},u=function(){return new r((function(e){if(!m&&_.original){m=!0,document.body.classList.remove("medium-zoom--opened"),_.zoomed.style.transform="",_.zoomedHd&&(_.zoomedHd.style.transform=""),_.template&&(_.template.style.transition="opacity 150ms",_.template.style.opacity=0),_.original.dispatchEvent(c("medium-zoom:close",{detail:{zoom:y}})),_.zoomed.addEventListener("transitionend",(function t(){_.original.classList.remove("medium-zoom-image--hidden"),document.body.removeChild(_.zoomed),_.zoomedHd&&document.body.removeChild(_.zoomedHd),document.body.removeChild(v),_.zoomed.classList.remove("medium-zoom-image--opened"),_.template&&document.body.removeChild(_.template),m=!1,_.zoomed.removeEventListener("transitionend",t),_.original.dispatchEvent(c("medium-zoom:closed",{detail:{zoom:y}})),_.original=null,_.zoomed=null,_.zoomedHd=null,_.template=null,e(y)}))}else e(y)}))},p=function(){var e=(arguments.length>0&&void 0!==arguments[0]?arguments[0]:{}).target;return _.original?u():d({target:e})},f=[],g=[],m=!1,b=0,h=a,_={original:null,zoomed:null,zoomedHd:null,template:null};"[object Object]"===Object.prototype.toString.call(t)?h=t:(t||"string"==typeof t)&&l(t);var v=function(e){var t=document.createElement("div");return t.classList.add("medium-zoom-overlay"),t.style.background=e,t}((h=n({margin:0,background:"#fff",scrollOffset:40,container:null,template:null},h)).background);document.addEventListener("click",(function(e){var t=e.target;t!==v?-1!==f.indexOf(t)&&p({target:t}):u()})),document.addEventListener("keyup",(function(e){var t=e.key||e.keyCode;"Escape"!==t&&"Esc"!==t&&27!==t||u()})),document.addEventListener("scroll",(function(){if(!m&&_.original){var e=window.pageYOffset||document.documentElement.scrollTop||document.body.scrollTop||0;Math.abs(b-e)>h.scrollOffset&&setTimeout(u,150)}})),window.addEventListener("resize",u);var y={open:d,close:u,toggle:p,update:function(){var e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:{},t=e;if(e.background&&(v.style.background=e.background),e.container&&e.container instanceof Object&&(t.container=n({},h.container,e.container)),e.template){var a=o(e.template)?e.template:document.querySelector(e.template);t.template=a}return h=n({},h,t),f.forEach((function(e){e.dispatchEvent(c("medium-zoom:update",{detail:{zoom:y}}))})),y},clone:function(){return e(n({},h,arguments.length>0&&void 0!==arguments[0]?arguments[0]:{}))},attach:l,detach:function(){for(var e=arguments.length,t=Array(e),a=0;a0?t.reduce((function(e,t){return[].concat(e,s(t))}),[]):f;return n.forEach((function(e){e.classList.remove("medium-zoom-image"),e.dispatchEvent(c("medium-zoom:detach",{detail:{zoom:y}}))})),f=f.filter((function(e){return-1===n.indexOf(e)})),y},on:function(e,t){var a=arguments.length>2&&void 0!==arguments[2]?arguments[2]:{};return f.forEach((function(n){n.addEventListener("medium-zoom:"+e,t,a)})),g.push({type:"medium-zoom:"+e,listener:t,options:a}),y},off:function(e,t){var a=arguments.length>2&&void 0!==arguments[2]?arguments[2]:{};return f.forEach((function(n){n.removeEventListener("medium-zoom:"+e,t,a)})),g=g.filter((function(a){return!(a.type==="medium-zoom:"+e&&a.listener.toString()===t.toString())})),y},getOptions:function(){return h},getImages:function(){return f},getZoomedImage:function(){return _.original}};return y}},10119:(e,t,a)=>{"use strict";a.r(t)},51043:(e,t,a)=>{"use strict";a.r(t)},5947:function(e,t,a){var n,r;n=function(){var e,t,a={version:"0.2.0"},n=a.settings={minimum:.08,easing:"ease",positionUsing:"",speed:200,trickle:!0,trickleRate:.02,trickleSpeed:800,showSpinner:!0,barSelector:'[role="bar"]',spinnerSelector:'[role="spinner"]',parent:"body",template:'
    '};function r(e,t,a){return ea?a:e}function o(e){return 100*(-1+e)}function i(e,t,a){var r;return(r="translate3d"===n.positionUsing?{transform:"translate3d("+o(e)+"%,0,0)"}:"translate"===n.positionUsing?{transform:"translate("+o(e)+"%,0)"}:{"margin-left":o(e)+"%"}).transition="all "+t+"ms "+a,r}a.configure=function(e){var t,a;for(t in e)void 0!==(a=e[t])&&e.hasOwnProperty(t)&&(n[t]=a);return this},a.status=null,a.set=function(e){var t=a.isStarted();e=r(e,n.minimum,1),a.status=1===e?null:e;var o=a.render(!t),l=o.querySelector(n.barSelector),d=n.speed,u=n.easing;return o.offsetWidth,s((function(t){""===n.positionUsing&&(n.positionUsing=a.getPositioningCSS()),c(l,i(e,d,u)),1===e?(c(o,{transition:"none",opacity:1}),o.offsetWidth,setTimeout((function(){c(o,{transition:"all "+d+"ms linear",opacity:0}),setTimeout((function(){a.remove(),t()}),d)}),d)):setTimeout(t,d)})),this},a.isStarted=function(){return"number"==typeof a.status},a.start=function(){a.status||a.set(0);var e=function(){setTimeout((function(){a.status&&(a.trickle(),e())}),n.trickleSpeed)};return n.trickle&&e(),this},a.done=function(e){return e||a.status?a.inc(.3+.5*Math.random()).set(1):this},a.inc=function(e){var t=a.status;return t?("number"!=typeof e&&(e=(1-t)*r(Math.random()*t,.1,.95)),t=r(t+e,0,.994),a.set(t)):a.start()},a.trickle=function(){return a.inc(Math.random()*n.trickleRate)},e=0,t=0,a.promise=function(n){return n&&"resolved"!==n.state()?(0===t&&a.start(),e++,t++,n.always((function(){0==--t?(e=0,a.done()):a.set((e-t)/e)})),this):this},a.render=function(e){if(a.isRendered())return document.getElementById("nprogress");d(document.documentElement,"nprogress-busy");var t=document.createElement("div");t.id="nprogress",t.innerHTML=n.template;var r,i=t.querySelector(n.barSelector),s=e?"-100":o(a.status||0),l=document.querySelector(n.parent);return c(i,{transition:"all 0 linear",transform:"translate3d("+s+"%,0,0)"}),n.showSpinner||(r=t.querySelector(n.spinnerSelector))&&f(r),l!=document.body&&d(l,"nprogress-custom-parent"),l.appendChild(t),t},a.remove=function(){u(document.documentElement,"nprogress-busy"),u(document.querySelector(n.parent),"nprogress-custom-parent");var e=document.getElementById("nprogress");e&&f(e)},a.isRendered=function(){return!!document.getElementById("nprogress")},a.getPositioningCSS=function(){var e=document.body.style,t="WebkitTransform"in e?"Webkit":"MozTransform"in e?"Moz":"msTransform"in e?"ms":"OTransform"in e?"O":"";return t+"Perspective"in e?"translate3d":t+"Transform"in e?"translate":"margin"};var s=function(){var e=[];function t(){var a=e.shift();a&&a(t)}return function(a){e.push(a),1==e.length&&t()}}(),c=function(){var e=["Webkit","O","Moz","ms"],t={};function a(e){return e.replace(/^-ms-/,"ms-").replace(/-([\da-z])/gi,(function(e,t){return t.toUpperCase()}))}function n(t){var a=document.body.style;if(t in a)return t;for(var n,r=e.length,o=t.charAt(0).toUpperCase()+t.slice(1);r--;)if((n=e[r]+o)in a)return n;return t}function r(e){return e=a(e),t[e]||(t[e]=n(e))}function o(e,t,a){t=r(t),e.style[t]=a}return function(e,t){var a,n,r=arguments;if(2==r.length)for(a in t)void 0!==(n=t[a])&&t.hasOwnProperty(a)&&o(e,a,n);else o(e,r[1],r[2])}}();function l(e,t){return("string"==typeof e?e:p(e)).indexOf(" "+t+" ")>=0}function d(e,t){var a=p(e),n=a+t;l(a,t)||(e.className=n.substring(1))}function u(e,t){var a,n=p(e);l(e,t)&&(a=n.replace(" "+t+" "," "),e.className=a.substring(1,a.length-1))}function p(e){return(" "+(e.className||"")+" ").replace(/\s+/gi," ")}function f(e){e&&e.parentNode&&e.parentNode.removeChild(e)}return a},void 0===(r="function"==typeof n?n.call(t,a,t,e):n)||(e.exports=r)},45228:e=>{"use strict";var t=Object.getOwnPropertySymbols,a=Object.prototype.hasOwnProperty,n=Object.prototype.propertyIsEnumerable;e.exports=function(){try{if(!Object.assign)return!1;var e=new String("abc");if(e[5]="de","5"===Object.getOwnPropertyNames(e)[0])return!1;for(var t={},a=0;a<10;a++)t["_"+String.fromCharCode(a)]=a;if("0123456789"!==Object.getOwnPropertyNames(t).map((function(e){return t[e]})).join(""))return!1;var n={};return"abcdefghijklmnopqrst".split("").forEach((function(e){n[e]=e})),"abcdefghijklmnopqrst"===Object.keys(Object.assign({},n)).join("")}catch(r){return!1}}()?Object.assign:function(e,r){for(var o,i,s=function(e){if(null==e)throw new TypeError("Object.assign cannot be called with null or undefined");return Object(e)}(e),c=1;c{"use strict";a.d(t,{A:()=>o});var n=function(){var e=/(?:^|\s)lang(?:uage)?-([\w-]+)(?=\s|$)/i,t=0,a={},n={util:{encode:function e(t){return t instanceof r?new r(t.type,e(t.content),t.alias):Array.isArray(t)?t.map(e):t.replace(/&/g,"&").replace(/=u.reach);x+=k.value.length,k=k.next){var w=k.value;if(t.length>e.length)return;if(!(w instanceof r)){var E,A=1;if(_){if(!(E=o(S,x,e,h))||E.index>=e.length)break;var T=E.index,C=E.index+E[0].length,L=x;for(L+=k.value.length;T>=L;)L+=(k=k.next).value.length;if(x=L-=k.value.length,k.value instanceof r)continue;for(var O=k;O!==t.tail&&(Lu.reach&&(u.reach=R);var I=k.prev;if(P&&(I=c(t,I,P),x+=P.length),l(t,I,A),k=c(t,I,new r(p,b?n.tokenize(j,b):j,v,j)),N&&c(t,k,N),A>1){var M={cause:p+","+g,reach:R};i(e,t,a,k.prev,x,M),u&&M.reach>u.reach&&(u.reach=M.reach)}}}}}}function s(){var e={value:null,prev:null,next:null},t={value:null,prev:e,next:null};e.next=t,this.head=e,this.tail=t,this.length=0}function c(e,t,a){var n=t.next,r={value:a,prev:t,next:n};return t.next=r,n.prev=r,e.length++,r}function l(e,t,a){for(var n=t.next,r=0;r"+o.content+""},n}(),r=n;n.default=n,r.languages.markup={comment:{pattern://,greedy:!0},prolog:{pattern:/<\?[\s\S]+?\?>/,greedy:!0},doctype:{pattern:/"'[\]]|"[^"]*"|'[^']*')+(?:\[(?:[^<"'\]]|"[^"]*"|'[^']*'|<(?!!--)|)*\]\s*)?>/i,greedy:!0,inside:{"internal-subset":{pattern:/(^[^\[]*\[)[\s\S]+(?=\]>$)/,lookbehind:!0,greedy:!0,inside:null},string:{pattern:/"[^"]*"|'[^']*'/,greedy:!0},punctuation:/^$|[[\]]/,"doctype-tag":/^DOCTYPE/i,name:/[^\s<>'"]+/}},cdata:{pattern://i,greedy:!0},tag:{pattern:/<\/?(?!\d)[^\s>\/=$<%]+(?:\s(?:\s*[^\s>\/=]+(?:\s*=\s*(?:"[^"]*"|'[^']*'|[^\s'">=]+(?=[\s>]))|(?=[\s/>])))+)?\s*\/?>/,greedy:!0,inside:{tag:{pattern:/^<\/?[^\s>\/]+/,inside:{punctuation:/^<\/?/,namespace:/^[^\s>\/:]+:/}},"special-attr":[],"attr-value":{pattern:/=\s*(?:"[^"]*"|'[^']*'|[^\s'">=]+)/,inside:{punctuation:[{pattern:/^=/,alias:"attr-equals"},/"|'/]}},punctuation:/\/?>/,"attr-name":{pattern:/[^\s>\/]+/,inside:{namespace:/^[^\s>\/:]+:/}}}},entity:[{pattern:/&[\da-z]{1,8};/i,alias:"named-entity"},/&#x?[\da-f]{1,8};/i]},r.languages.markup.tag.inside["attr-value"].inside.entity=r.languages.markup.entity,r.languages.markup.doctype.inside["internal-subset"].inside=r.languages.markup,r.hooks.add("wrap",(function(e){"entity"===e.type&&(e.attributes.title=e.content.replace(/&/,"&"))})),Object.defineProperty(r.languages.markup.tag,"addInlined",{value:function(e,t){var a={};a["language-"+t]={pattern:/(^$)/i,lookbehind:!0,inside:r.languages[t]},a.cdata=/^$/i;var n={"included-cdata":{pattern://i,inside:a}};n["language-"+t]={pattern:/[\s\S]+/,inside:r.languages[t]};var o={};o[e]={pattern:RegExp(/(<__[^>]*>)(?:))*\]\]>|(?!)/.source.replace(/__/g,(function(){return e})),"i"),lookbehind:!0,greedy:!0,inside:n},r.languages.insertBefore("markup","cdata",o)}}),Object.defineProperty(r.languages.markup.tag,"addAttribute",{value:function(e,t){r.languages.markup.tag.inside["special-attr"].push({pattern:RegExp(/(^|["'\s])/.source+"(?:"+e+")"+/\s*=\s*(?:"[^"]*"|'[^']*'|[^\s'">=]+(?=[\s>]))/.source,"i"),lookbehind:!0,inside:{"attr-name":/^[^\s=]+/,"attr-value":{pattern:/=[\s\S]+/,inside:{value:{pattern:/(^=\s*(["']|(?!["'])))\S[\s\S]*(?=\2$)/,lookbehind:!0,alias:[t,"language-"+t],inside:r.languages[t]},punctuation:[{pattern:/^=/,alias:"attr-equals"},/"|'/]}}}})}}),r.languages.html=r.languages.markup,r.languages.mathml=r.languages.markup,r.languages.svg=r.languages.markup,r.languages.xml=r.languages.extend("markup",{}),r.languages.ssml=r.languages.xml,r.languages.atom=r.languages.xml,r.languages.rss=r.languages.xml,function(e){var t="\\b(?:BASH|BASHOPTS|BASH_ALIASES|BASH_ARGC|BASH_ARGV|BASH_CMDS|BASH_COMPLETION_COMPAT_DIR|BASH_LINENO|BASH_REMATCH|BASH_SOURCE|BASH_VERSINFO|BASH_VERSION|COLORTERM|COLUMNS|COMP_WORDBREAKS|DBUS_SESSION_BUS_ADDRESS|DEFAULTS_PATH|DESKTOP_SESSION|DIRSTACK|DISPLAY|EUID|GDMSESSION|GDM_LANG|GNOME_KEYRING_CONTROL|GNOME_KEYRING_PID|GPG_AGENT_INFO|GROUPS|HISTCONTROL|HISTFILE|HISTFILESIZE|HISTSIZE|HOME|HOSTNAME|HOSTTYPE|IFS|INSTANCE|JOB|LANG|LANGUAGE|LC_ADDRESS|LC_ALL|LC_IDENTIFICATION|LC_MEASUREMENT|LC_MONETARY|LC_NAME|LC_NUMERIC|LC_PAPER|LC_TELEPHONE|LC_TIME|LESSCLOSE|LESSOPEN|LINES|LOGNAME|LS_COLORS|MACHTYPE|MAILCHECK|MANDATORY_PATH|NO_AT_BRIDGE|OLDPWD|OPTERR|OPTIND|ORBIT_SOCKETDIR|OSTYPE|PAPERSIZE|PATH|PIPESTATUS|PPID|PS1|PS2|PS3|PS4|PWD|RANDOM|REPLY|SECONDS|SELINUX_INIT|SESSION|SESSIONTYPE|SESSION_MANAGER|SHELL|SHELLOPTS|SHLVL|SSH_AUTH_SOCK|TERM|UID|UPSTART_EVENTS|UPSTART_INSTANCE|UPSTART_JOB|UPSTART_SESSION|USER|WINDOWID|XAUTHORITY|XDG_CONFIG_DIRS|XDG_CURRENT_DESKTOP|XDG_DATA_DIRS|XDG_GREETER_DATA_DIR|XDG_MENU_PREFIX|XDG_RUNTIME_DIR|XDG_SEAT|XDG_SEAT_PATH|XDG_SESSION_DESKTOP|XDG_SESSION_ID|XDG_SESSION_PATH|XDG_SESSION_TYPE|XDG_VTNR|XMODIFIERS)\\b",a={pattern:/(^(["']?)\w+\2)[ \t]+\S.*/,lookbehind:!0,alias:"punctuation",inside:null},n={bash:a,environment:{pattern:RegExp("\\$"+t),alias:"constant"},variable:[{pattern:/\$?\(\([\s\S]+?\)\)/,greedy:!0,inside:{variable:[{pattern:/(^\$\(\([\s\S]+)\)\)/,lookbehind:!0},/^\$\(\(/],number:/\b0x[\dA-Fa-f]+\b|(?:\b\d+(?:\.\d*)?|\B\.\d+)(?:[Ee]-?\d+)?/,operator:/--|\+\+|\*\*=?|<<=?|>>=?|&&|\|\||[=!+\-*/%<>^&|]=?|[?~:]/,punctuation:/\(\(?|\)\)?|,|;/}},{pattern:/\$\((?:\([^)]+\)|[^()])+\)|`[^`]+`/,greedy:!0,inside:{variable:/^\$\(|^`|\)$|`$/}},{pattern:/\$\{[^}]+\}/,greedy:!0,inside:{operator:/:[-=?+]?|[!\/]|##?|%%?|\^\^?|,,?/,punctuation:/[\[\]]/,environment:{pattern:RegExp("(\\{)"+t),lookbehind:!0,alias:"constant"}}},/\$(?:\w+|[#?*!@$])/],entity:/\\(?:[abceEfnrtv\\"]|O?[0-7]{1,3}|U[0-9a-fA-F]{8}|u[0-9a-fA-F]{4}|x[0-9a-fA-F]{1,2})/};e.languages.bash={shebang:{pattern:/^#!\s*\/.*/,alias:"important"},comment:{pattern:/(^|[^"{\\$])#.*/,lookbehind:!0},"function-name":[{pattern:/(\bfunction\s+)[\w-]+(?=(?:\s*\(?:\s*\))?\s*\{)/,lookbehind:!0,alias:"function"},{pattern:/\b[\w-]+(?=\s*\(\s*\)\s*\{)/,alias:"function"}],"for-or-select":{pattern:/(\b(?:for|select)\s+)\w+(?=\s+in\s)/,alias:"variable",lookbehind:!0},"assign-left":{pattern:/(^|[\s;|&]|[<>]\()\w+(?=\+?=)/,inside:{environment:{pattern:RegExp("(^|[\\s;|&]|[<>]\\()"+t),lookbehind:!0,alias:"constant"}},alias:"variable",lookbehind:!0},string:[{pattern:/((?:^|[^<])<<-?\s*)(\w+)\s[\s\S]*?(?:\r?\n|\r)\2/,lookbehind:!0,greedy:!0,inside:n},{pattern:/((?:^|[^<])<<-?\s*)(["'])(\w+)\2\s[\s\S]*?(?:\r?\n|\r)\3/,lookbehind:!0,greedy:!0,inside:{bash:a}},{pattern:/(^|[^\\](?:\\\\)*)"(?:\\[\s\S]|\$\([^)]+\)|\$(?!\()|`[^`]+`|[^"\\`$])*"/,lookbehind:!0,greedy:!0,inside:n},{pattern:/(^|[^$\\])'[^']*'/,lookbehind:!0,greedy:!0},{pattern:/\$'(?:[^'\\]|\\[\s\S])*'/,greedy:!0,inside:{entity:n.entity}}],environment:{pattern:RegExp("\\$?"+t),alias:"constant"},variable:n.variable,function:{pattern:/(^|[\s;|&]|[<>]\()(?:add|apropos|apt|apt-cache|apt-get|aptitude|aspell|automysqlbackup|awk|basename|bash|bc|bconsole|bg|bzip2|cal|cat|cfdisk|chgrp|chkconfig|chmod|chown|chroot|cksum|clear|cmp|column|comm|composer|cp|cron|crontab|csplit|curl|cut|date|dc|dd|ddrescue|debootstrap|df|diff|diff3|dig|dir|dircolors|dirname|dirs|dmesg|docker|docker-compose|du|egrep|eject|env|ethtool|expand|expect|expr|fdformat|fdisk|fg|fgrep|file|find|fmt|fold|format|free|fsck|ftp|fuser|gawk|git|gparted|grep|groupadd|groupdel|groupmod|groups|grub-mkconfig|gzip|halt|head|hg|history|host|hostname|htop|iconv|id|ifconfig|ifdown|ifup|import|install|ip|jobs|join|kill|killall|less|link|ln|locate|logname|logrotate|look|lpc|lpr|lprint|lprintd|lprintq|lprm|ls|lsof|lynx|make|man|mc|mdadm|mkconfig|mkdir|mke2fs|mkfifo|mkfs|mkisofs|mknod|mkswap|mmv|more|most|mount|mtools|mtr|mutt|mv|nano|nc|netstat|nice|nl|node|nohup|notify-send|npm|nslookup|op|open|parted|passwd|paste|pathchk|ping|pkill|pnpm|podman|podman-compose|popd|pr|printcap|printenv|ps|pushd|pv|quota|quotacheck|quotactl|ram|rar|rcp|reboot|remsync|rename|renice|rev|rm|rmdir|rpm|rsync|scp|screen|sdiff|sed|sendmail|seq|service|sftp|sh|shellcheck|shuf|shutdown|sleep|slocate|sort|split|ssh|stat|strace|su|sudo|sum|suspend|swapon|sync|tac|tail|tar|tee|time|timeout|top|touch|tr|traceroute|tsort|tty|umount|uname|unexpand|uniq|units|unrar|unshar|unzip|update-grub|uptime|useradd|userdel|usermod|users|uudecode|uuencode|v|vcpkg|vdir|vi|vim|virsh|vmstat|wait|watch|wc|wget|whereis|which|who|whoami|write|xargs|xdg-open|yarn|yes|zenity|zip|zsh|zypper)(?=$|[)\s;|&])/,lookbehind:!0},keyword:{pattern:/(^|[\s;|&]|[<>]\()(?:case|do|done|elif|else|esac|fi|for|function|if|in|select|then|until|while)(?=$|[)\s;|&])/,lookbehind:!0},builtin:{pattern:/(^|[\s;|&]|[<>]\()(?:\.|:|alias|bind|break|builtin|caller|cd|command|continue|declare|echo|enable|eval|exec|exit|export|getopts|hash|help|let|local|logout|mapfile|printf|pwd|read|readarray|readonly|return|set|shift|shopt|source|test|times|trap|type|typeset|ulimit|umask|unalias|unset)(?=$|[)\s;|&])/,lookbehind:!0,alias:"class-name"},boolean:{pattern:/(^|[\s;|&]|[<>]\()(?:false|true)(?=$|[)\s;|&])/,lookbehind:!0},"file-descriptor":{pattern:/\B&\d\b/,alias:"important"},operator:{pattern:/\d?<>|>\||\+=|=[=~]?|!=?|<<[<-]?|[&\d]?>>|\d[<>]&?|[<>][&=]?|&[>&]?|\|[&|]?/,inside:{"file-descriptor":{pattern:/^\d/,alias:"important"}}},punctuation:/\$?\(\(?|\)\)?|\.\.|[{}[\];\\]/,number:{pattern:/(^|\s)(?:[1-9]\d*|0)(?:[.,]\d+)?\b/,lookbehind:!0}},a.inside=e.languages.bash;for(var r=["comment","function-name","for-or-select","assign-left","string","environment","function","keyword","builtin","boolean","file-descriptor","operator","punctuation","number"],o=n.variable[1].inside,i=0;i]=?|[!=]=?=?|--?|\+\+?|&&?|\|\|?|[?*/~^%]/,punctuation:/[{}[\];(),.:]/},r.languages.c=r.languages.extend("clike",{comment:{pattern:/\/\/(?:[^\r\n\\]|\\(?:\r\n?|\n|(?![\r\n])))*|\/\*[\s\S]*?(?:\*\/|$)/,greedy:!0},string:{pattern:/"(?:\\(?:\r\n|[\s\S])|[^"\\\r\n])*"/,greedy:!0},"class-name":{pattern:/(\b(?:enum|struct)\s+(?:__attribute__\s*\(\([\s\S]*?\)\)\s*)?)\w+|\b[a-z]\w*_t\b/,lookbehind:!0},keyword:/\b(?:_Alignas|_Alignof|_Atomic|_Bool|_Complex|_Generic|_Imaginary|_Noreturn|_Static_assert|_Thread_local|__attribute__|asm|auto|break|case|char|const|continue|default|do|double|else|enum|extern|float|for|goto|if|inline|int|long|register|return|short|signed|sizeof|static|struct|switch|typedef|typeof|union|unsigned|void|volatile|while)\b/,function:/\b[a-z_]\w*(?=\s*\()/i,number:/(?:\b0x(?:[\da-f]+(?:\.[\da-f]*)?|\.[\da-f]+)(?:p[+-]?\d+)?|(?:\b\d+(?:\.\d*)?|\B\.\d+)(?:e[+-]?\d+)?)[ful]{0,4}/i,operator:/>>=?|<<=?|->|([-+&|:])\1|[?:~]|[-+*/%&|^!=<>]=?/}),r.languages.insertBefore("c","string",{char:{pattern:/'(?:\\(?:\r\n|[\s\S])|[^'\\\r\n]){0,32}'/,greedy:!0}}),r.languages.insertBefore("c","string",{macro:{pattern:/(^[\t ]*)#\s*[a-z](?:[^\r\n\\/]|\/(?!\*)|\/\*(?:[^*]|\*(?!\/))*\*\/|\\(?:\r\n|[\s\S]))*/im,lookbehind:!0,greedy:!0,alias:"property",inside:{string:[{pattern:/^(#\s*include\s*)<[^>]+>/,lookbehind:!0},r.languages.c.string],char:r.languages.c.char,comment:r.languages.c.comment,"macro-name":[{pattern:/(^#\s*define\s+)\w+\b(?!\()/i,lookbehind:!0},{pattern:/(^#\s*define\s+)\w+\b(?=\()/i,lookbehind:!0,alias:"function"}],directive:{pattern:/^(#\s*)[a-z]+/,lookbehind:!0,alias:"keyword"},"directive-hash":/^#/,punctuation:/##|\\(?=[\r\n])/,expression:{pattern:/\S[\s\S]*/,inside:r.languages.c}}}}),r.languages.insertBefore("c","function",{constant:/\b(?:EOF|NULL|SEEK_CUR|SEEK_END|SEEK_SET|__DATE__|__FILE__|__LINE__|__TIMESTAMP__|__TIME__|__func__|stderr|stdin|stdout)\b/}),delete r.languages.c.boolean,function(e){var t=/\b(?:alignas|alignof|asm|auto|bool|break|case|catch|char|char16_t|char32_t|char8_t|class|co_await|co_return|co_yield|compl|concept|const|const_cast|consteval|constexpr|constinit|continue|decltype|default|delete|do|double|dynamic_cast|else|enum|explicit|export|extern|final|float|for|friend|goto|if|import|inline|int|int16_t|int32_t|int64_t|int8_t|long|module|mutable|namespace|new|noexcept|nullptr|operator|override|private|protected|public|register|reinterpret_cast|requires|return|short|signed|sizeof|static|static_assert|static_cast|struct|switch|template|this|thread_local|throw|try|typedef|typeid|typename|uint16_t|uint32_t|uint64_t|uint8_t|union|unsigned|using|virtual|void|volatile|wchar_t|while)\b/,a=/\b(?!)\w+(?:\s*\.\s*\w+)*\b/.source.replace(//g,(function(){return t.source}));e.languages.cpp=e.languages.extend("c",{"class-name":[{pattern:RegExp(/(\b(?:class|concept|enum|struct|typename)\s+)(?!)\w+/.source.replace(//g,(function(){return t.source}))),lookbehind:!0},/\b[A-Z]\w*(?=\s*::\s*\w+\s*\()/,/\b[A-Z_]\w*(?=\s*::\s*~\w+\s*\()/i,/\b\w+(?=\s*<(?:[^<>]|<(?:[^<>]|<[^<>]*>)*>)*>\s*::\s*\w+\s*\()/],keyword:t,number:{pattern:/(?:\b0b[01']+|\b0x(?:[\da-f']+(?:\.[\da-f']*)?|\.[\da-f']+)(?:p[+-]?[\d']+)?|(?:\b[\d']+(?:\.[\d']*)?|\B\.[\d']+)(?:e[+-]?[\d']+)?)[ful]{0,4}/i,greedy:!0},operator:/>>=?|<<=?|->|--|\+\+|&&|\|\||[?:~]|<=>|[-+*/%&|^!=<>]=?|\b(?:and|and_eq|bitand|bitor|not|not_eq|or|or_eq|xor|xor_eq)\b/,boolean:/\b(?:false|true)\b/}),e.languages.insertBefore("cpp","string",{module:{pattern:RegExp(/(\b(?:import|module)\s+)/.source+"(?:"+/"(?:\\(?:\r\n|[\s\S])|[^"\\\r\n])*"|<[^<>\r\n]*>/.source+"|"+/(?:\s*:\s*)?|:\s*/.source.replace(//g,(function(){return a}))+")"),lookbehind:!0,greedy:!0,inside:{string:/^[<"][\s\S]+/,operator:/:/,punctuation:/\./}},"raw-string":{pattern:/R"([^()\\ ]{0,16})\([\s\S]*?\)\1"/,alias:"string",greedy:!0}}),e.languages.insertBefore("cpp","keyword",{"generic-function":{pattern:/\b(?!operator\b)[a-z_]\w*\s*<(?:[^<>]|<[^<>]*>)*>(?=\s*\()/i,inside:{function:/^\w+/,generic:{pattern:/<[\s\S]+/,alias:"class-name",inside:e.languages.cpp}}}}),e.languages.insertBefore("cpp","operator",{"double-colon":{pattern:/::/,alias:"punctuation"}}),e.languages.insertBefore("cpp","class-name",{"base-clause":{pattern:/(\b(?:class|struct)\s+\w+\s*:\s*)[^;{}"'\s]+(?:\s+[^;{}"'\s]+)*(?=\s*[;{])/,lookbehind:!0,greedy:!0,inside:e.languages.extend("cpp",{})}}),e.languages.insertBefore("inside","double-colon",{"class-name":/\b[a-z_]\w*\b(?!\s*::)/i},e.languages.cpp["base-clause"])}(r),function(e){var t=/(?:"(?:\\(?:\r\n|[\s\S])|[^"\\\r\n])*"|'(?:\\(?:\r\n|[\s\S])|[^'\\\r\n])*')/;e.languages.css={comment:/\/\*[\s\S]*?\*\//,atrule:{pattern:/@[\w-](?:[^;{\s]|\s+(?![\s{]))*(?:;|(?=\s*\{))/,inside:{rule:/^@[\w-]+/,"selector-function-argument":{pattern:/(\bselector\s*\(\s*(?![\s)]))(?:[^()\s]|\s+(?![\s)])|\((?:[^()]|\([^()]*\))*\))+(?=\s*\))/,lookbehind:!0,alias:"selector"},keyword:{pattern:/(^|[^\w-])(?:and|not|only|or)(?![\w-])/,lookbehind:!0}}},url:{pattern:RegExp("\\burl\\((?:"+t.source+"|"+/(?:[^\\\r\n()"']|\\[\s\S])*/.source+")\\)","i"),greedy:!0,inside:{function:/^url/i,punctuation:/^\(|\)$/,string:{pattern:RegExp("^"+t.source+"$"),alias:"url"}}},selector:{pattern:RegExp("(^|[{}\\s])[^{}\\s](?:[^{};\"'\\s]|\\s+(?![\\s{])|"+t.source+")*(?=\\s*\\{)"),lookbehind:!0},string:{pattern:t,greedy:!0},property:{pattern:/(^|[^-\w\xA0-\uFFFF])(?!\s)[-_a-z\xA0-\uFFFF](?:(?!\s)[-\w\xA0-\uFFFF])*(?=\s*:)/i,lookbehind:!0},important:/!important\b/i,function:{pattern:/(^|[^-a-z0-9])[-a-z0-9]+(?=\()/i,lookbehind:!0},punctuation:/[(){};:,]/},e.languages.css.atrule.inside.rest=e.languages.css;var a=e.languages.markup;a&&(a.tag.addInlined("style","css"),a.tag.addAttribute("style","css"))}(r),function(e){var t,a=/("|')(?:\\(?:\r\n|[\s\S])|(?!\1)[^\\\r\n])*\1/;e.languages.css.selector={pattern:e.languages.css.selector.pattern,lookbehind:!0,inside:t={"pseudo-element":/:(?:after|before|first-letter|first-line|selection)|::[-\w]+/,"pseudo-class":/:[-\w]+/,class:/\.[-\w]+/,id:/#[-\w]+/,attribute:{pattern:RegExp("\\[(?:[^[\\]\"']|"+a.source+")*\\]"),greedy:!0,inside:{punctuation:/^\[|\]$/,"case-sensitivity":{pattern:/(\s)[si]$/i,lookbehind:!0,alias:"keyword"},namespace:{pattern:/^(\s*)(?:(?!\s)[-*\w\xA0-\uFFFF])*\|(?!=)/,lookbehind:!0,inside:{punctuation:/\|$/}},"attr-name":{pattern:/^(\s*)(?:(?!\s)[-\w\xA0-\uFFFF])+/,lookbehind:!0},"attr-value":[a,{pattern:/(=\s*)(?:(?!\s)[-\w\xA0-\uFFFF])+(?=\s*$)/,lookbehind:!0}],operator:/[|~*^$]?=/}},"n-th":[{pattern:/(\(\s*)[+-]?\d*[\dn](?:\s*[+-]\s*\d+)?(?=\s*\))/,lookbehind:!0,inside:{number:/[\dn]+/,operator:/[+-]/}},{pattern:/(\(\s*)(?:even|odd)(?=\s*\))/i,lookbehind:!0}],combinator:/>|\+|~|\|\|/,punctuation:/[(),]/}},e.languages.css.atrule.inside["selector-function-argument"].inside=t,e.languages.insertBefore("css","property",{variable:{pattern:/(^|[^-\w\xA0-\uFFFF])--(?!\s)[-_a-z\xA0-\uFFFF](?:(?!\s)[-\w\xA0-\uFFFF])*/i,lookbehind:!0}});var n={pattern:/(\b\d+)(?:%|[a-z]+(?![\w-]))/,lookbehind:!0},r={pattern:/(^|[^\w.-])-?(?:\d+(?:\.\d+)?|\.\d+)/,lookbehind:!0};e.languages.insertBefore("css","function",{operator:{pattern:/(\s)[+\-*\/](?=\s)/,lookbehind:!0},hexcode:{pattern:/\B#[\da-f]{3,8}\b/i,alias:"color"},color:[{pattern:/(^|[^\w-])(?:AliceBlue|AntiqueWhite|Aqua|Aquamarine|Azure|Beige|Bisque|Black|BlanchedAlmond|Blue|BlueViolet|Brown|BurlyWood|CadetBlue|Chartreuse|Chocolate|Coral|CornflowerBlue|Cornsilk|Crimson|Cyan|DarkBlue|DarkCyan|DarkGoldenRod|DarkGr[ae]y|DarkGreen|DarkKhaki|DarkMagenta|DarkOliveGreen|DarkOrange|DarkOrchid|DarkRed|DarkSalmon|DarkSeaGreen|DarkSlateBlue|DarkSlateGr[ae]y|DarkTurquoise|DarkViolet|DeepPink|DeepSkyBlue|DimGr[ae]y|DodgerBlue|FireBrick|FloralWhite|ForestGreen|Fuchsia|Gainsboro|GhostWhite|Gold|GoldenRod|Gr[ae]y|Green|GreenYellow|HoneyDew|HotPink|IndianRed|Indigo|Ivory|Khaki|Lavender|LavenderBlush|LawnGreen|LemonChiffon|LightBlue|LightCoral|LightCyan|LightGoldenRodYellow|LightGr[ae]y|LightGreen|LightPink|LightSalmon|LightSeaGreen|LightSkyBlue|LightSlateGr[ae]y|LightSteelBlue|LightYellow|Lime|LimeGreen|Linen|Magenta|Maroon|MediumAquaMarine|MediumBlue|MediumOrchid|MediumPurple|MediumSeaGreen|MediumSlateBlue|MediumSpringGreen|MediumTurquoise|MediumVioletRed|MidnightBlue|MintCream|MistyRose|Moccasin|NavajoWhite|Navy|OldLace|Olive|OliveDrab|Orange|OrangeRed|Orchid|PaleGoldenRod|PaleGreen|PaleTurquoise|PaleVioletRed|PapayaWhip|PeachPuff|Peru|Pink|Plum|PowderBlue|Purple|Red|RosyBrown|RoyalBlue|SaddleBrown|Salmon|SandyBrown|SeaGreen|SeaShell|Sienna|Silver|SkyBlue|SlateBlue|SlateGr[ae]y|Snow|SpringGreen|SteelBlue|Tan|Teal|Thistle|Tomato|Transparent|Turquoise|Violet|Wheat|White|WhiteSmoke|Yellow|YellowGreen)(?![\w-])/i,lookbehind:!0},{pattern:/\b(?:hsl|rgb)\(\s*\d{1,3}\s*,\s*\d{1,3}%?\s*,\s*\d{1,3}%?\s*\)\B|\b(?:hsl|rgb)a\(\s*\d{1,3}\s*,\s*\d{1,3}%?\s*,\s*\d{1,3}%?\s*,\s*(?:0|0?\.\d+|1)\s*\)\B/i,inside:{unit:n,number:r,function:/[\w-]+(?=\()/,punctuation:/[(),]/}}],entity:/\\[\da-f]{1,8}/i,unit:n,number:r})}(r),r.languages.javascript=r.languages.extend("clike",{"class-name":[r.languages.clike["class-name"],{pattern:/(^|[^$\w\xA0-\uFFFF])(?!\s)[_$A-Z\xA0-\uFFFF](?:(?!\s)[$\w\xA0-\uFFFF])*(?=\.(?:constructor|prototype))/,lookbehind:!0}],keyword:[{pattern:/((?:^|\})\s*)catch\b/,lookbehind:!0},{pattern:/(^|[^.]|\.\.\.\s*)\b(?:as|assert(?=\s*\{)|async(?=\s*(?:function\b|\(|[$\w\xA0-\uFFFF]|$))|await|break|case|class|const|continue|debugger|default|delete|do|else|enum|export|extends|finally(?=\s*(?:\{|$))|for|from(?=\s*(?:['"]|$))|function|(?:get|set)(?=\s*(?:[#\[$\w\xA0-\uFFFF]|$))|if|implements|import|in|instanceof|interface|let|new|null|of|package|private|protected|public|return|static|super|switch|this|throw|try|typeof|undefined|var|void|while|with|yield)\b/,lookbehind:!0}],function:/#?(?!\s)[_$a-zA-Z\xA0-\uFFFF](?:(?!\s)[$\w\xA0-\uFFFF])*(?=\s*(?:\.\s*(?:apply|bind|call)\s*)?\()/,number:{pattern:RegExp(/(^|[^\w$])/.source+"(?:"+/NaN|Infinity/.source+"|"+/0[bB][01]+(?:_[01]+)*n?/.source+"|"+/0[oO][0-7]+(?:_[0-7]+)*n?/.source+"|"+/0[xX][\dA-Fa-f]+(?:_[\dA-Fa-f]+)*n?/.source+"|"+/\d+(?:_\d+)*n/.source+"|"+/(?:\d+(?:_\d+)*(?:\.(?:\d+(?:_\d+)*)?)?|\.\d+(?:_\d+)*)(?:[Ee][+-]?\d+(?:_\d+)*)?/.source+")"+/(?![\w$])/.source),lookbehind:!0},operator:/--|\+\+|\*\*=?|=>|&&=?|\|\|=?|[!=]==|<<=?|>>>?=?|[-+*/%&|^!=<>]=?|\.{3}|\?\?=?|\?\.?|[~:]/}),r.languages.javascript["class-name"][0].pattern=/(\b(?:class|extends|implements|instanceof|interface|new)\s+)[\w.\\]+/,r.languages.insertBefore("javascript","keyword",{regex:{pattern:/((?:^|[^$\w\xA0-\uFFFF."'\])\s]|\b(?:return|yield))\s*)\/(?:\[(?:[^\]\\\r\n]|\\.)*\]|\\.|[^/\\\[\r\n])+\/[dgimyus]{0,7}(?=(?:\s|\/\*(?:[^*]|\*(?!\/))*\*\/)*(?:$|[\r\n,.;:})\]]|\/\/))/,lookbehind:!0,greedy:!0,inside:{"regex-source":{pattern:/^(\/)[\s\S]+(?=\/[a-z]*$)/,lookbehind:!0,alias:"language-regex",inside:r.languages.regex},"regex-delimiter":/^\/|\/$/,"regex-flags":/^[a-z]+$/}},"function-variable":{pattern:/#?(?!\s)[_$a-zA-Z\xA0-\uFFFF](?:(?!\s)[$\w\xA0-\uFFFF])*(?=\s*[=:]\s*(?:async\s*)?(?:\bfunction\b|(?:\((?:[^()]|\([^()]*\))*\)|(?!\s)[_$a-zA-Z\xA0-\uFFFF](?:(?!\s)[$\w\xA0-\uFFFF])*)\s*=>))/,alias:"function"},parameter:[{pattern:/(function(?:\s+(?!\s)[_$a-zA-Z\xA0-\uFFFF](?:(?!\s)[$\w\xA0-\uFFFF])*)?\s*\(\s*)(?!\s)(?:[^()\s]|\s+(?![\s)])|\([^()]*\))+(?=\s*\))/,lookbehind:!0,inside:r.languages.javascript},{pattern:/(^|[^$\w\xA0-\uFFFF])(?!\s)[_$a-z\xA0-\uFFFF](?:(?!\s)[$\w\xA0-\uFFFF])*(?=\s*=>)/i,lookbehind:!0,inside:r.languages.javascript},{pattern:/(\(\s*)(?!\s)(?:[^()\s]|\s+(?![\s)])|\([^()]*\))+(?=\s*\)\s*=>)/,lookbehind:!0,inside:r.languages.javascript},{pattern:/((?:\b|\s|^)(?!(?:as|async|await|break|case|catch|class|const|continue|debugger|default|delete|do|else|enum|export|extends|finally|for|from|function|get|if|implements|import|in|instanceof|interface|let|new|null|of|package|private|protected|public|return|set|static|super|switch|this|throw|try|typeof|undefined|var|void|while|with|yield)(?![$\w\xA0-\uFFFF]))(?:(?!\s)[_$a-zA-Z\xA0-\uFFFF](?:(?!\s)[$\w\xA0-\uFFFF])*\s*)\(\s*|\]\s*\(\s*)(?!\s)(?:[^()\s]|\s+(?![\s)])|\([^()]*\))+(?=\s*\)\s*\{)/,lookbehind:!0,inside:r.languages.javascript}],constant:/\b[A-Z](?:[A-Z_]|\dx?)*\b/}),r.languages.insertBefore("javascript","string",{hashbang:{pattern:/^#!.*/,greedy:!0,alias:"comment"},"template-string":{pattern:/`(?:\\[\s\S]|\$\{(?:[^{}]|\{(?:[^{}]|\{[^}]*\})*\})+\}|(?!\$\{)[^\\`])*`/,greedy:!0,inside:{"template-punctuation":{pattern:/^`|`$/,alias:"string"},interpolation:{pattern:/((?:^|[^\\])(?:\\{2})*)\$\{(?:[^{}]|\{(?:[^{}]|\{[^}]*\})*\})+\}/,lookbehind:!0,inside:{"interpolation-punctuation":{pattern:/^\$\{|\}$/,alias:"punctuation"},rest:r.languages.javascript}},string:/[\s\S]+/}},"string-property":{pattern:/((?:^|[,{])[ \t]*)(["'])(?:\\(?:\r\n|[\s\S])|(?!\2)[^\\\r\n])*\2(?=\s*:)/m,lookbehind:!0,greedy:!0,alias:"property"}}),r.languages.insertBefore("javascript","operator",{"literal-property":{pattern:/((?:^|[,{])[ \t]*)(?!\s)[_$a-zA-Z\xA0-\uFFFF](?:(?!\s)[$\w\xA0-\uFFFF])*(?=\s*:)/m,lookbehind:!0,alias:"property"}}),r.languages.markup&&(r.languages.markup.tag.addInlined("script","javascript"),r.languages.markup.tag.addAttribute(/on(?:abort|blur|change|click|composition(?:end|start|update)|dblclick|error|focus(?:in|out)?|key(?:down|up)|load|mouse(?:down|enter|leave|move|out|over|up)|reset|resize|scroll|select|slotchange|submit|unload|wheel)/.source,"javascript")),r.languages.js=r.languages.javascript,function(e){var t=/#(?!\{).+/,a={pattern:/#\{[^}]+\}/,alias:"variable"};e.languages.coffeescript=e.languages.extend("javascript",{comment:t,string:[{pattern:/'(?:\\[\s\S]|[^\\'])*'/,greedy:!0},{pattern:/"(?:\\[\s\S]|[^\\"])*"/,greedy:!0,inside:{interpolation:a}}],keyword:/\b(?:and|break|by|catch|class|continue|debugger|delete|do|each|else|extend|extends|false|finally|for|if|in|instanceof|is|isnt|let|loop|namespace|new|no|not|null|of|off|on|or|own|return|super|switch|then|this|throw|true|try|typeof|undefined|unless|until|when|while|window|with|yes|yield)\b/,"class-member":{pattern:/@(?!\d)\w+/,alias:"variable"}}),e.languages.insertBefore("coffeescript","comment",{"multiline-comment":{pattern:/###[\s\S]+?###/,alias:"comment"},"block-regex":{pattern:/\/{3}[\s\S]*?\/{3}/,alias:"regex",inside:{comment:t,interpolation:a}}}),e.languages.insertBefore("coffeescript","string",{"inline-javascript":{pattern:/`(?:\\[\s\S]|[^\\`])*`/,inside:{delimiter:{pattern:/^`|`$/,alias:"punctuation"},script:{pattern:/[\s\S]+/,alias:"language-javascript",inside:e.languages.javascript}}},"multiline-string":[{pattern:/'''[\s\S]*?'''/,greedy:!0,alias:"string"},{pattern:/"""[\s\S]*?"""/,greedy:!0,alias:"string",inside:{interpolation:a}}]}),e.languages.insertBefore("coffeescript","keyword",{property:/(?!\d)\w+(?=\s*:(?!:))/}),delete e.languages.coffeescript["template-string"],e.languages.coffee=e.languages.coffeescript}(r),function(e){var t=/[*&][^\s[\]{},]+/,a=/!(?:<[\w\-%#;/?:@&=+$,.!~*'()[\]]+>|(?:[a-zA-Z\d-]*!)?[\w\-%#;/?:@&=+$.~*'()]+)?/,n="(?:"+a.source+"(?:[ \t]+"+t.source+")?|"+t.source+"(?:[ \t]+"+a.source+")?)",r=/(?:[^\s\x00-\x08\x0e-\x1f!"#%&'*,\-:>?@[\]`{|}\x7f-\x84\x86-\x9f\ud800-\udfff\ufffe\uffff]|[?:-])(?:[ \t]*(?:(?![#:])|:))*/.source.replace(//g,(function(){return/[^\s\x00-\x08\x0e-\x1f,[\]{}\x7f-\x84\x86-\x9f\ud800-\udfff\ufffe\uffff]/.source})),o=/"(?:[^"\\\r\n]|\\.)*"|'(?:[^'\\\r\n]|\\.)*'/.source;function i(e,t){t=(t||"").replace(/m/g,"")+"m";var a=/([:\-,[{]\s*(?:\s<>[ \t]+)?)(?:<>)(?=[ \t]*(?:$|,|\]|\}|(?:[\r\n]\s*)?#))/.source.replace(/<>/g,(function(){return n})).replace(/<>/g,(function(){return e}));return RegExp(a,t)}e.languages.yaml={scalar:{pattern:RegExp(/([\-:]\s*(?:\s<>[ \t]+)?[|>])[ \t]*(?:((?:\r?\n|\r)[ \t]+)\S[^\r\n]*(?:\2[^\r\n]+)*)/.source.replace(/<>/g,(function(){return n}))),lookbehind:!0,alias:"string"},comment:/#.*/,key:{pattern:RegExp(/((?:^|[:\-,[{\r\n?])[ \t]*(?:<>[ \t]+)?)<>(?=\s*:\s)/.source.replace(/<>/g,(function(){return n})).replace(/<>/g,(function(){return"(?:"+r+"|"+o+")"}))),lookbehind:!0,greedy:!0,alias:"atrule"},directive:{pattern:/(^[ \t]*)%.+/m,lookbehind:!0,alias:"important"},datetime:{pattern:i(/\d{4}-\d\d?-\d\d?(?:[tT]|[ \t]+)\d\d?:\d{2}:\d{2}(?:\.\d*)?(?:[ \t]*(?:Z|[-+]\d\d?(?::\d{2})?))?|\d{4}-\d{2}-\d{2}|\d\d?:\d{2}(?::\d{2}(?:\.\d*)?)?/.source),lookbehind:!0,alias:"number"},boolean:{pattern:i(/false|true/.source,"i"),lookbehind:!0,alias:"important"},null:{pattern:i(/null|~/.source,"i"),lookbehind:!0,alias:"important"},string:{pattern:i(o),lookbehind:!0,greedy:!0},number:{pattern:i(/[+-]?(?:0x[\da-f]+|0o[0-7]+|(?:\d+(?:\.\d*)?|\.\d+)(?:e[+-]?\d+)?|\.inf|\.nan)/.source,"i"),lookbehind:!0},tag:a,important:t,punctuation:/---|[:[\]{}\-,|>?]|\.\.\./},e.languages.yml=e.languages.yaml}(r),function(e){var t=/(?:\\.|[^\\\n\r]|(?:\n|\r\n?)(?![\r\n]))/.source;function a(e){return e=e.replace(//g,(function(){return t})),RegExp(/((?:^|[^\\])(?:\\{2})*)/.source+"(?:"+e+")")}var n=/(?:\\.|``(?:[^`\r\n]|`(?!`))+``|`[^`\r\n]+`|[^\\|\r\n`])+/.source,r=/\|?__(?:\|__)+\|?(?:(?:\n|\r\n?)|(?![\s\S]))/.source.replace(/__/g,(function(){return n})),o=/\|?[ \t]*:?-{3,}:?[ \t]*(?:\|[ \t]*:?-{3,}:?[ \t]*)+\|?(?:\n|\r\n?)/.source;e.languages.markdown=e.languages.extend("markup",{}),e.languages.insertBefore("markdown","prolog",{"front-matter-block":{pattern:/(^(?:\s*[\r\n])?)---(?!.)[\s\S]*?[\r\n]---(?!.)/,lookbehind:!0,greedy:!0,inside:{punctuation:/^---|---$/,"front-matter":{pattern:/\S+(?:\s+\S+)*/,alias:["yaml","language-yaml"],inside:e.languages.yaml}}},blockquote:{pattern:/^>(?:[\t ]*>)*/m,alias:"punctuation"},table:{pattern:RegExp("^"+r+o+"(?:"+r+")*","m"),inside:{"table-data-rows":{pattern:RegExp("^("+r+o+")(?:"+r+")*$"),lookbehind:!0,inside:{"table-data":{pattern:RegExp(n),inside:e.languages.markdown},punctuation:/\|/}},"table-line":{pattern:RegExp("^("+r+")"+o+"$"),lookbehind:!0,inside:{punctuation:/\||:?-{3,}:?/}},"table-header-row":{pattern:RegExp("^"+r+"$"),inside:{"table-header":{pattern:RegExp(n),alias:"important",inside:e.languages.markdown},punctuation:/\|/}}}},code:[{pattern:/((?:^|\n)[ \t]*\n|(?:^|\r\n?)[ \t]*\r\n?)(?: {4}|\t).+(?:(?:\n|\r\n?)(?: {4}|\t).+)*/,lookbehind:!0,alias:"keyword"},{pattern:/^```[\s\S]*?^```$/m,greedy:!0,inside:{"code-block":{pattern:/^(```.*(?:\n|\r\n?))[\s\S]+?(?=(?:\n|\r\n?)^```$)/m,lookbehind:!0},"code-language":{pattern:/^(```).+/,lookbehind:!0},punctuation:/```/}}],title:[{pattern:/\S.*(?:\n|\r\n?)(?:==+|--+)(?=[ \t]*$)/m,alias:"important",inside:{punctuation:/==+$|--+$/}},{pattern:/(^\s*)#.+/m,lookbehind:!0,alias:"important",inside:{punctuation:/^#+|#+$/}}],hr:{pattern:/(^\s*)([*-])(?:[\t ]*\2){2,}(?=\s*$)/m,lookbehind:!0,alias:"punctuation"},list:{pattern:/(^\s*)(?:[*+-]|\d+\.)(?=[\t ].)/m,lookbehind:!0,alias:"punctuation"},"url-reference":{pattern:/!?\[[^\]]+\]:[\t ]+(?:\S+|<(?:\\.|[^>\\])+>)(?:[\t ]+(?:"(?:\\.|[^"\\])*"|'(?:\\.|[^'\\])*'|\((?:\\.|[^)\\])*\)))?/,inside:{variable:{pattern:/^(!?\[)[^\]]+/,lookbehind:!0},string:/(?:"(?:\\.|[^"\\])*"|'(?:\\.|[^'\\])*'|\((?:\\.|[^)\\])*\))$/,punctuation:/^[\[\]!:]|[<>]/},alias:"url"},bold:{pattern:a(/\b__(?:(?!_)|_(?:(?!_))+_)+__\b|\*\*(?:(?!\*)|\*(?:(?!\*))+\*)+\*\*/.source),lookbehind:!0,greedy:!0,inside:{content:{pattern:/(^..)[\s\S]+(?=..$)/,lookbehind:!0,inside:{}},punctuation:/\*\*|__/}},italic:{pattern:a(/\b_(?:(?!_)|__(?:(?!_))+__)+_\b|\*(?:(?!\*)|\*\*(?:(?!\*))+\*\*)+\*/.source),lookbehind:!0,greedy:!0,inside:{content:{pattern:/(^.)[\s\S]+(?=.$)/,lookbehind:!0,inside:{}},punctuation:/[*_]/}},strike:{pattern:a(/(~~?)(?:(?!~))+\2/.source),lookbehind:!0,greedy:!0,inside:{content:{pattern:/(^~~?)[\s\S]+(?=\1$)/,lookbehind:!0,inside:{}},punctuation:/~~?/}},"code-snippet":{pattern:/(^|[^\\`])(?:``[^`\r\n]+(?:`[^`\r\n]+)*``(?!`)|`[^`\r\n]+`(?!`))/,lookbehind:!0,greedy:!0,alias:["code","keyword"]},url:{pattern:a(/!?\[(?:(?!\]))+\](?:\([^\s)]+(?:[\t ]+"(?:\\.|[^"\\])*")?\)|[ \t]?\[(?:(?!\]))+\])/.source),lookbehind:!0,greedy:!0,inside:{operator:/^!/,content:{pattern:/(^\[)[^\]]+(?=\])/,lookbehind:!0,inside:{}},variable:{pattern:/(^\][ \t]?\[)[^\]]+(?=\]$)/,lookbehind:!0},url:{pattern:/(^\]\()[^\s)]+/,lookbehind:!0},string:{pattern:/(^[ \t]+)"(?:\\.|[^"\\])*"(?=\)$)/,lookbehind:!0}}}}),["url","bold","italic","strike"].forEach((function(t){["url","bold","italic","strike","code-snippet"].forEach((function(a){t!==a&&(e.languages.markdown[t].inside.content.inside[a]=e.languages.markdown[a])}))})),e.hooks.add("after-tokenize",(function(e){"markdown"!==e.language&&"md"!==e.language||function e(t){if(t&&"string"!=typeof t)for(var a=0,n=t.length;a",quot:'"'},c=String.fromCodePoint||String.fromCharCode;e.languages.md=e.languages.markdown}(r),r.languages.graphql={comment:/#.*/,description:{pattern:/(?:"""(?:[^"]|(?!""")")*"""|"(?:\\.|[^\\"\r\n])*")(?=\s*[a-z_])/i,greedy:!0,alias:"string",inside:{"language-markdown":{pattern:/(^"(?:"")?)(?!\1)[\s\S]+(?=\1$)/,lookbehind:!0,inside:r.languages.markdown}}},string:{pattern:/"""(?:[^"]|(?!""")")*"""|"(?:\\.|[^\\"\r\n])*"/,greedy:!0},number:/(?:\B-|\b)\d+(?:\.\d+)?(?:e[+-]?\d+)?\b/i,boolean:/\b(?:false|true)\b/,variable:/\$[a-z_]\w*/i,directive:{pattern:/@[a-z_]\w*/i,alias:"function"},"attr-name":{pattern:/\b[a-z_]\w*(?=\s*(?:\((?:[^()"]|"(?:\\.|[^\\"\r\n])*")*\))?:)/i,greedy:!0},"atom-input":{pattern:/\b[A-Z]\w*Input\b/,alias:"class-name"},scalar:/\b(?:Boolean|Float|ID|Int|String)\b/,constant:/\b[A-Z][A-Z_\d]*\b/,"class-name":{pattern:/(\b(?:enum|implements|interface|on|scalar|type|union)\s+|&\s*|:\s*|\[)[A-Z_]\w*/,lookbehind:!0},fragment:{pattern:/(\bfragment\s+|\.{3}\s*(?!on\b))[a-zA-Z_]\w*/,lookbehind:!0,alias:"function"},"definition-mutation":{pattern:/(\bmutation\s+)[a-zA-Z_]\w*/,lookbehind:!0,alias:"function"},"definition-query":{pattern:/(\bquery\s+)[a-zA-Z_]\w*/,lookbehind:!0,alias:"function"},keyword:/\b(?:directive|enum|extend|fragment|implements|input|interface|mutation|on|query|repeatable|scalar|schema|subscription|type|union)\b/,operator:/[!=|&]|\.{3}/,"property-query":/\w+(?=\s*\()/,object:/\w+(?=\s*\{)/,punctuation:/[!(){}\[\]:=,]/,property:/\w+/},r.hooks.add("after-tokenize",(function(e){if("graphql"===e.language)for(var t=e.tokens.filter((function(e){return"string"!=typeof e&&"comment"!==e.type&&"scalar"!==e.type})),a=0;a0)){var s=p(/^\{$/,/^\}$/);if(-1===s)continue;for(var c=a;c=0&&f(l,"variable-input")}}}}function d(e){return t[a+e]}function u(e,t){t=t||0;for(var a=0;a?|<|>)?|>[>=]?|\b(?:AND|BETWEEN|DIV|ILIKE|IN|IS|LIKE|NOT|OR|REGEXP|RLIKE|SOUNDS LIKE|XOR)\b/i,punctuation:/[;[\]()`,.]/},function(e){var t=e.languages.javascript["template-string"],a=t.pattern.source,n=t.inside.interpolation,r=n.inside["interpolation-punctuation"],o=n.pattern.source;function i(t,n){if(e.languages[t])return{pattern:RegExp("((?:"+n+")\\s*)"+a),lookbehind:!0,greedy:!0,inside:{"template-punctuation":{pattern:/^`|`$/,alias:"string"},"embedded-code":{pattern:/[\s\S]+/,alias:t}}}}function s(e,t){return"___"+t.toUpperCase()+"_"+e+"___"}function c(t,a,n){var r={code:t,grammar:a,language:n};return e.hooks.run("before-tokenize",r),r.tokens=e.tokenize(r.code,r.grammar),e.hooks.run("after-tokenize",r),r.tokens}function l(t){var a={};a["interpolation-punctuation"]=r;var o=e.tokenize(t,a);if(3===o.length){var i=[1,1];i.push.apply(i,c(o[1],e.languages.javascript,"javascript")),o.splice.apply(o,i)}return new e.Token("interpolation",o,n.alias,t)}function d(t,a,n){var r=e.tokenize(t,{interpolation:{pattern:RegExp(o),lookbehind:!0}}),i=0,d={},u=c(r.map((function(e){if("string"==typeof e)return e;for(var a,r=e.content;-1!==t.indexOf(a=s(i++,n)););return d[a]=r,a})).join(""),a,n),p=Object.keys(d);return i=0,function e(t){for(var a=0;a=p.length)return;var n=t[a];if("string"==typeof n||"string"==typeof n.content){var r=p[i],o="string"==typeof n?n:n.content,s=o.indexOf(r);if(-1!==s){++i;var c=o.substring(0,s),u=l(d[r]),f=o.substring(s+r.length),g=[];if(c&&g.push(c),g.push(u),f){var m=[f];e(m),g.push.apply(g,m)}"string"==typeof n?(t.splice.apply(t,[a,1].concat(g)),a+=g.length-1):n.content=g}}else{var b=n.content;Array.isArray(b)?e(b):e([b])}}}(u),new e.Token(n,u,"language-"+n,t)}e.languages.javascript["template-string"]=[i("css",/\b(?:styled(?:\([^)]*\))?(?:\s*\.\s*\w+(?:\([^)]*\))*)*|css(?:\s*\.\s*(?:global|resolve))?|createGlobalStyle|keyframes)/.source),i("html",/\bhtml|\.\s*(?:inner|outer)HTML\s*\+?=/.source),i("svg",/\bsvg/.source),i("markdown",/\b(?:markdown|md)/.source),i("graphql",/\b(?:gql|graphql(?:\s*\.\s*experimental)?)/.source),i("sql",/\bsql/.source),t].filter(Boolean);var u={javascript:!0,js:!0,typescript:!0,ts:!0,jsx:!0,tsx:!0};function p(e){return"string"==typeof e?e:Array.isArray(e)?e.map(p).join(""):p(e.content)}e.hooks.add("after-tokenize",(function(t){t.language in u&&function t(a){for(var n=0,r=a.length;n]|<(?:[^<>]|<[^<>]*>)*>)*>)?/,lookbehind:!0,greedy:!0,inside:null},builtin:/\b(?:Array|Function|Promise|any|boolean|console|never|number|string|symbol|unknown)\b/}),e.languages.typescript.keyword.push(/\b(?:abstract|declare|is|keyof|readonly|require)\b/,/\b(?:asserts|infer|interface|module|namespace|type)\b(?=\s*(?:[{_$a-zA-Z\xA0-\uFFFF]|$))/,/\btype\b(?=\s*(?:[\{*]|$))/),delete e.languages.typescript.parameter,delete e.languages.typescript["literal-property"];var t=e.languages.extend("typescript",{});delete t["class-name"],e.languages.typescript["class-name"].inside=t,e.languages.insertBefore("typescript","function",{decorator:{pattern:/@[$\w\xA0-\uFFFF]+/,inside:{at:{pattern:/^@/,alias:"operator"},function:/^[\s\S]+/}},"generic-function":{pattern:/#?(?!\s)[_$a-zA-Z\xA0-\uFFFF](?:(?!\s)[$\w\xA0-\uFFFF])*\s*<(?:[^<>]|<(?:[^<>]|<[^<>]*>)*>)*>(?=\s*\()/,greedy:!0,inside:{function:/^#?(?!\s)[_$a-zA-Z\xA0-\uFFFF](?:(?!\s)[$\w\xA0-\uFFFF])*/,generic:{pattern:/<[\s\S]+/,alias:"class-name",inside:t}}}}),e.languages.ts=e.languages.typescript}(r),function(e){function t(e,t){return RegExp(e.replace(//g,(function(){return/(?!\s)[_$a-zA-Z\xA0-\uFFFF](?:(?!\s)[$\w\xA0-\uFFFF])*/.source})),t)}e.languages.insertBefore("javascript","function-variable",{"method-variable":{pattern:RegExp("(\\.\\s*)"+e.languages.javascript["function-variable"].pattern.source),lookbehind:!0,alias:["function-variable","method","function","property-access"]}}),e.languages.insertBefore("javascript","function",{method:{pattern:RegExp("(\\.\\s*)"+e.languages.javascript.function.source),lookbehind:!0,alias:["function","property-access"]}}),e.languages.insertBefore("javascript","constant",{"known-class-name":[{pattern:/\b(?:(?:Float(?:32|64)|(?:Int|Uint)(?:8|16|32)|Uint8Clamped)?Array|ArrayBuffer|BigInt|Boolean|DataView|Date|Error|Function|Intl|JSON|(?:Weak)?(?:Map|Set)|Math|Number|Object|Promise|Proxy|Reflect|RegExp|String|Symbol|WebAssembly)\b/,alias:"class-name"},{pattern:/\b(?:[A-Z]\w*)Error\b/,alias:"class-name"}]}),e.languages.insertBefore("javascript","keyword",{imports:{pattern:t(/(\bimport\b\s*)(?:(?:\s*,\s*(?:\*\s*as\s+|\{[^{}]*\}))?|\*\s*as\s+|\{[^{}]*\})(?=\s*\bfrom\b)/.source),lookbehind:!0,inside:e.languages.javascript},exports:{pattern:t(/(\bexport\b\s*)(?:\*(?:\s*as\s+)?(?=\s*\bfrom\b)|\{[^{}]*\})/.source),lookbehind:!0,inside:e.languages.javascript}}),e.languages.javascript.keyword.unshift({pattern:/\b(?:as|default|export|from|import)\b/,alias:"module"},{pattern:/\b(?:await|break|catch|continue|do|else|finally|for|if|return|switch|throw|try|while|yield)\b/,alias:"control-flow"},{pattern:/\bnull\b/,alias:["null","nil"]},{pattern:/\bundefined\b/,alias:"nil"}),e.languages.insertBefore("javascript","operator",{spread:{pattern:/\.{3}/,alias:"operator"},arrow:{pattern:/=>/,alias:"operator"}}),e.languages.insertBefore("javascript","punctuation",{"property-access":{pattern:t(/(\.\s*)#?/.source),lookbehind:!0},"maybe-class-name":{pattern:/(^|[^$\w\xA0-\uFFFF])[A-Z][$\w\xA0-\uFFFF]+/,lookbehind:!0},dom:{pattern:/\b(?:document|(?:local|session)Storage|location|navigator|performance|window)\b/,alias:"variable"},console:{pattern:/\bconsole(?=\s*\.)/,alias:"class-name"}});for(var a=["function","function-variable","method","method-variable","property-access"],n=0;n*\.{3}(?:[^{}]|)*\})/.source;function o(e,t){return e=e.replace(//g,(function(){return a})).replace(//g,(function(){return n})).replace(//g,(function(){return r})),RegExp(e,t)}r=o(r).source,e.languages.jsx=e.languages.extend("markup",t),e.languages.jsx.tag.pattern=o(/<\/?(?:[\w.:-]+(?:+(?:[\w.:$-]+(?:=(?:"(?:\\[\s\S]|[^\\"])*"|'(?:\\[\s\S]|[^\\'])*'|[^\s{'"/>=]+|))?|))**\/?)?>/.source),e.languages.jsx.tag.inside.tag.pattern=/^<\/?[^\s>\/]*/,e.languages.jsx.tag.inside["attr-value"].pattern=/=(?!\{)(?:"(?:\\[\s\S]|[^\\"])*"|'(?:\\[\s\S]|[^\\'])*'|[^\s'">]+)/,e.languages.jsx.tag.inside.tag.inside["class-name"]=/^[A-Z]\w*(?:\.[A-Z]\w*)*$/,e.languages.jsx.tag.inside.comment=t.comment,e.languages.insertBefore("inside","attr-name",{spread:{pattern:o(//.source),inside:e.languages.jsx}},e.languages.jsx.tag),e.languages.insertBefore("inside","special-attr",{script:{pattern:o(/=/.source),alias:"language-javascript",inside:{"script-punctuation":{pattern:/^=(?=\{)/,alias:"punctuation"},rest:e.languages.jsx}}},e.languages.jsx.tag);var i=function(e){return e?"string"==typeof e?e:"string"==typeof e.content?e.content:e.content.map(i).join(""):""},s=function(t){for(var a=[],n=0;n0&&a[a.length-1].tagName===i(r.content[0].content[1])&&a.pop():"/>"===r.content[r.content.length-1].content||a.push({tagName:i(r.content[0].content[1]),openedBraces:0}):a.length>0&&"punctuation"===r.type&&"{"===r.content?a[a.length-1].openedBraces++:a.length>0&&a[a.length-1].openedBraces>0&&"punctuation"===r.type&&"}"===r.content?a[a.length-1].openedBraces--:o=!0),(o||"string"==typeof r)&&a.length>0&&0===a[a.length-1].openedBraces){var c=i(r);n0&&("string"==typeof t[n-1]||"plain-text"===t[n-1].type)&&(c=i(t[n-1])+c,t.splice(n-1,1),n--),t[n]=new e.Token("plain-text",c,null,c)}r.content&&"string"!=typeof r.content&&s(r.content)}};e.hooks.add("after-tokenize",(function(e){"jsx"!==e.language&&"tsx"!==e.language||s(e.tokens)}))}(r),function(e){e.languages.diff={coord:[/^(?:\*{3}|-{3}|\+{3}).*$/m,/^@@.*@@$/m,/^\d.*$/m]};var t={"deleted-sign":"-","deleted-arrow":"<","inserted-sign":"+","inserted-arrow":">",unchanged:" ",diff:"!"};Object.keys(t).forEach((function(a){var n=t[a],r=[];/^\w+$/.test(a)||r.push(/\w+/.exec(a)[0]),"diff"===a&&r.push("bold"),e.languages.diff[a]={pattern:RegExp("^(?:["+n+"].*(?:\r\n?|\n|(?![\\s\\S])))+","m"),alias:r,inside:{line:{pattern:/(.)(?=[\s\S]).*(?:\r\n?|\n)?/,lookbehind:!0},prefix:{pattern:/[\s\S]/,alias:/\w+/.exec(a)[0]}}}})),Object.defineProperty(e.languages.diff,"PREFIXES",{value:t})}(r),r.languages.git={comment:/^#.*/m,deleted:/^[-\u2013].*/m,inserted:/^\+.*/m,string:/("|')(?:\\.|(?!\1)[^\\\r\n])*\1/,command:{pattern:/^.*\$ git .*$/m,inside:{parameter:/\s--?\w+/}},coord:/^@@.*@@$/m,"commit-sha1":/^commit \w{40}$/m},r.languages.go=r.languages.extend("clike",{string:{pattern:/(^|[^\\])"(?:\\.|[^"\\\r\n])*"|`[^`]*`/,lookbehind:!0,greedy:!0},keyword:/\b(?:break|case|chan|const|continue|default|defer|else|fallthrough|for|func|go(?:to)?|if|import|interface|map|package|range|return|select|struct|switch|type|var)\b/,boolean:/\b(?:_|false|iota|nil|true)\b/,number:[/\b0(?:b[01_]+|o[0-7_]+)i?\b/i,/\b0x(?:[a-f\d_]+(?:\.[a-f\d_]*)?|\.[a-f\d_]+)(?:p[+-]?\d+(?:_\d+)*)?i?(?!\w)/i,/(?:\b\d[\d_]*(?:\.[\d_]*)?|\B\.\d[\d_]*)(?:e[+-]?[\d_]+)?i?(?!\w)/i],operator:/[*\/%^!=]=?|\+[=+]?|-[=-]?|\|[=|]?|&(?:=|&|\^=?)?|>(?:>=?|=)?|<(?:<=?|=|-)?|:=|\.\.\./,builtin:/\b(?:append|bool|byte|cap|close|complex|complex(?:64|128)|copy|delete|error|float(?:32|64)|u?int(?:8|16|32|64)?|imag|len|make|new|panic|print(?:ln)?|real|recover|rune|string|uintptr)\b/}),r.languages.insertBefore("go","string",{char:{pattern:/'(?:\\.|[^'\\\r\n]){0,10}'/,greedy:!0}}),delete r.languages.go["class-name"],function(e){function t(e,t){return"___"+e.toUpperCase()+t+"___"}Object.defineProperties(e.languages["markup-templating"]={},{buildPlaceholders:{value:function(a,n,r,o){if(a.language===n){var i=a.tokenStack=[];a.code=a.code.replace(r,(function(e){if("function"==typeof o&&!o(e))return e;for(var r,s=i.length;-1!==a.code.indexOf(r=t(n,s));)++s;return i[s]=e,r})),a.grammar=e.languages.markup}}},tokenizePlaceholders:{value:function(a,n){if(a.language===n&&a.tokenStack){a.grammar=e.languages[n];var r=0,o=Object.keys(a.tokenStack);!function i(s){for(var c=0;c=o.length);c++){var l=s[c];if("string"==typeof l||l.content&&"string"==typeof l.content){var d=o[r],u=a.tokenStack[d],p="string"==typeof l?l:l.content,f=t(n,d),g=p.indexOf(f);if(g>-1){++r;var m=p.substring(0,g),b=new e.Token(n,e.tokenize(u,a.grammar),"language-"+n,u),h=p.substring(g+f.length),_=[];m&&_.push.apply(_,i([m])),_.push(b),h&&_.push.apply(_,i([h])),"string"==typeof l?s.splice.apply(s,[c,1].concat(_)):l.content=_}}else l.content&&i(l.content)}return s}(a.tokens)}}}})}(r),function(e){e.languages.handlebars={comment:/\{\{![\s\S]*?\}\}/,delimiter:{pattern:/^\{\{\{?|\}\}\}?$/,alias:"punctuation"},string:/(["'])(?:\\.|(?!\1)[^\\\r\n])*\1/,number:/\b0x[\dA-Fa-f]+\b|(?:\b\d+(?:\.\d*)?|\B\.\d+)(?:[Ee][+-]?\d+)?/,boolean:/\b(?:false|true)\b/,block:{pattern:/^(\s*(?:~\s*)?)[#\/]\S+?(?=\s*(?:~\s*)?$|\s)/,lookbehind:!0,alias:"keyword"},brackets:{pattern:/\[[^\]]+\]/,inside:{punctuation:/\[|\]/,variable:/[\s\S]+/}},punctuation:/[!"#%&':()*+,.\/;<=>@\[\\\]^`{|}~]/,variable:/[^!"#%&'()*+,\/;<=>@\[\\\]^`{|}~\s]+/},e.hooks.add("before-tokenize",(function(t){e.languages["markup-templating"].buildPlaceholders(t,"handlebars",/\{\{\{[\s\S]+?\}\}\}|\{\{[\s\S]+?\}\}/g)})),e.hooks.add("after-tokenize",(function(t){e.languages["markup-templating"].tokenizePlaceholders(t,"handlebars")})),e.languages.hbs=e.languages.handlebars}(r),r.languages.json={property:{pattern:/(^|[^\\])"(?:\\.|[^\\"\r\n])*"(?=\s*:)/,lookbehind:!0,greedy:!0},string:{pattern:/(^|[^\\])"(?:\\.|[^\\"\r\n])*"(?!\s*:)/,lookbehind:!0,greedy:!0},comment:{pattern:/\/\/.*|\/\*[\s\S]*?(?:\*\/|$)/,greedy:!0},number:/-?\b\d+(?:\.\d+)?(?:e[+-]?\d+)?\b/i,punctuation:/[{}[\],]/,operator:/:/,boolean:/\b(?:false|true)\b/,null:{pattern:/\bnull\b/,alias:"keyword"}},r.languages.webmanifest=r.languages.json,r.languages.less=r.languages.extend("css",{comment:[/\/\*[\s\S]*?\*\//,{pattern:/(^|[^\\])\/\/.*/,lookbehind:!0}],atrule:{pattern:/@[\w-](?:\((?:[^(){}]|\([^(){}]*\))*\)|[^(){};\s]|\s+(?!\s))*?(?=\s*\{)/,inside:{punctuation:/[:()]/}},selector:{pattern:/(?:@\{[\w-]+\}|[^{};\s@])(?:@\{[\w-]+\}|\((?:[^(){}]|\([^(){}]*\))*\)|[^(){};@\s]|\s+(?!\s))*?(?=\s*\{)/,inside:{variable:/@+[\w-]+/}},property:/(?:@\{[\w-]+\}|[\w-])+(?:\+_?)?(?=\s*:)/,operator:/[+\-*\/]/}),r.languages.insertBefore("less","property",{variable:[{pattern:/@[\w-]+\s*:/,inside:{punctuation:/:/}},/@@?[\w-]+/],"mixin-usage":{pattern:/([{;]\s*)[.#](?!\d)[\w-].*?(?=[(;])/,lookbehind:!0,alias:"function"}}),r.languages.makefile={comment:{pattern:/(^|[^\\])#(?:\\(?:\r\n|[\s\S])|[^\\\r\n])*/,lookbehind:!0},string:{pattern:/(["'])(?:\\(?:\r\n|[\s\S])|(?!\1)[^\\\r\n])*\1/,greedy:!0},"builtin-target":{pattern:/\.[A-Z][^:#=\s]+(?=\s*:(?!=))/,alias:"builtin"},target:{pattern:/^(?:[^:=\s]|[ \t]+(?![\s:]))+(?=\s*:(?!=))/m,alias:"symbol",inside:{variable:/\$+(?:(?!\$)[^(){}:#=\s]+|(?=[({]))/}},variable:/\$+(?:(?!\$)[^(){}:#=\s]+|\([@*%<^+?][DF]\)|(?=[({]))/,keyword:/-include\b|\b(?:define|else|endef|endif|export|ifn?def|ifn?eq|include|override|private|sinclude|undefine|unexport|vpath)\b/,function:{pattern:/(\()(?:abspath|addsuffix|and|basename|call|dir|error|eval|file|filter(?:-out)?|findstring|firstword|flavor|foreach|guile|if|info|join|lastword|load|notdir|or|origin|patsubst|realpath|shell|sort|strip|subst|suffix|value|warning|wildcard|word(?:list|s)?)(?=[ \t])/,lookbehind:!0},operator:/(?:::|[?:+!])?=|[|@]/,punctuation:/[:;(){}]/},r.languages.objectivec=r.languages.extend("c",{string:{pattern:/@?"(?:\\(?:\r\n|[\s\S])|[^"\\\r\n])*"/,greedy:!0},keyword:/\b(?:asm|auto|break|case|char|const|continue|default|do|double|else|enum|extern|float|for|goto|if|in|inline|int|long|register|return|self|short|signed|sizeof|static|struct|super|switch|typedef|typeof|union|unsigned|void|volatile|while)\b|(?:@interface|@end|@implementation|@protocol|@class|@public|@protected|@private|@property|@try|@catch|@finally|@throw|@synthesize|@dynamic|@selector)\b/,operator:/-[->]?|\+\+?|!=?|<>?=?|==?|&&?|\|\|?|[~^%?*\/@]/}),delete r.languages.objectivec["class-name"],r.languages.objc=r.languages.objectivec,r.languages.ocaml={comment:{pattern:/\(\*[\s\S]*?\*\)/,greedy:!0},char:{pattern:/'(?:[^\\\r\n']|\\(?:.|[ox]?[0-9a-f]{1,3}))'/i,greedy:!0},string:[{pattern:/"(?:\\(?:[\s\S]|\r\n)|[^\\\r\n"])*"/,greedy:!0},{pattern:/\{([a-z_]*)\|[\s\S]*?\|\1\}/,greedy:!0}],number:[/\b(?:0b[01][01_]*|0o[0-7][0-7_]*)\b/i,/\b0x[a-f0-9][a-f0-9_]*(?:\.[a-f0-9_]*)?(?:p[+-]?\d[\d_]*)?(?!\w)/i,/\b\d[\d_]*(?:\.[\d_]*)?(?:e[+-]?\d[\d_]*)?(?!\w)/i],directive:{pattern:/\B#\w+/,alias:"property"},label:{pattern:/\B~\w+/,alias:"property"},"type-variable":{pattern:/\B'\w+/,alias:"function"},variant:{pattern:/`\w+/,alias:"symbol"},keyword:/\b(?:as|assert|begin|class|constraint|do|done|downto|else|end|exception|external|for|fun|function|functor|if|in|include|inherit|initializer|lazy|let|match|method|module|mutable|new|nonrec|object|of|open|private|rec|sig|struct|then|to|try|type|val|value|virtual|when|where|while|with)\b/,boolean:/\b(?:false|true)\b/,"operator-like-punctuation":{pattern:/\[[<>|]|[>|]\]|\{<|>\}/,alias:"punctuation"},operator:/\.[.~]|:[=>]|[=<>@^|&+\-*\/$%!?~][!$%&*+\-.\/:<=>?@^|~]*|\b(?:and|asr|land|lor|lsl|lsr|lxor|mod|or)\b/,punctuation:/;;|::|[(){}\[\].,:;#]|\b_\b/},r.languages.python={comment:{pattern:/(^|[^\\])#.*/,lookbehind:!0,greedy:!0},"string-interpolation":{pattern:/(?:f|fr|rf)(?:("""|''')[\s\S]*?\1|("|')(?:\\.|(?!\2)[^\\\r\n])*\2)/i,greedy:!0,inside:{interpolation:{pattern:/((?:^|[^{])(?:\{\{)*)\{(?!\{)(?:[^{}]|\{(?!\{)(?:[^{}]|\{(?!\{)(?:[^{}])+\})+\})+\}/,lookbehind:!0,inside:{"format-spec":{pattern:/(:)[^:(){}]+(?=\}$)/,lookbehind:!0},"conversion-option":{pattern:/![sra](?=[:}]$)/,alias:"punctuation"},rest:null}},string:/[\s\S]+/}},"triple-quoted-string":{pattern:/(?:[rub]|br|rb)?("""|''')[\s\S]*?\1/i,greedy:!0,alias:"string"},string:{pattern:/(?:[rub]|br|rb)?("|')(?:\\.|(?!\1)[^\\\r\n])*\1/i,greedy:!0},function:{pattern:/((?:^|\s)def[ \t]+)[a-zA-Z_]\w*(?=\s*\()/g,lookbehind:!0},"class-name":{pattern:/(\bclass\s+)\w+/i,lookbehind:!0},decorator:{pattern:/(^[\t ]*)@\w+(?:\.\w+)*/m,lookbehind:!0,alias:["annotation","punctuation"],inside:{punctuation:/\./}},keyword:/\b(?:_(?=\s*:)|and|as|assert|async|await|break|case|class|continue|def|del|elif|else|except|exec|finally|for|from|global|if|import|in|is|lambda|match|nonlocal|not|or|pass|print|raise|return|try|while|with|yield)\b/,builtin:/\b(?:__import__|abs|all|any|apply|ascii|basestring|bin|bool|buffer|bytearray|bytes|callable|chr|classmethod|cmp|coerce|compile|complex|delattr|dict|dir|divmod|enumerate|eval|execfile|file|filter|float|format|frozenset|getattr|globals|hasattr|hash|help|hex|id|input|int|intern|isinstance|issubclass|iter|len|list|locals|long|map|max|memoryview|min|next|object|oct|open|ord|pow|property|range|raw_input|reduce|reload|repr|reversed|round|set|setattr|slice|sorted|staticmethod|str|sum|super|tuple|type|unichr|unicode|vars|xrange|zip)\b/,boolean:/\b(?:False|None|True)\b/,number:/\b0(?:b(?:_?[01])+|o(?:_?[0-7])+|x(?:_?[a-f0-9])+)\b|(?:\b\d+(?:_\d+)*(?:\.(?:\d+(?:_\d+)*)?)?|\B\.\d+(?:_\d+)*)(?:e[+-]?\d+(?:_\d+)*)?j?(?!\w)/i,operator:/[-+%=]=?|!=|:=|\*\*?=?|\/\/?=?|<[<=>]?|>[=>]?|[&|^~]/,punctuation:/[{}[\];(),.:]/},r.languages.python["string-interpolation"].inside.interpolation.inside.rest=r.languages.python,r.languages.py=r.languages.python,r.languages.reason=r.languages.extend("clike",{string:{pattern:/"(?:\\(?:\r\n|[\s\S])|[^\\\r\n"])*"/,greedy:!0},"class-name":/\b[A-Z]\w*/,keyword:/\b(?:and|as|assert|begin|class|constraint|do|done|downto|else|end|exception|external|for|fun|function|functor|if|in|include|inherit|initializer|lazy|let|method|module|mutable|new|nonrec|object|of|open|or|private|rec|sig|struct|switch|then|to|try|type|val|virtual|when|while|with)\b/,operator:/\.{3}|:[:=]|\|>|->|=(?:==?|>)?|<=?|>=?|[|^?'#!~`]|[+\-*\/]\.?|\b(?:asr|land|lor|lsl|lsr|lxor|mod)\b/}),r.languages.insertBefore("reason","class-name",{char:{pattern:/'(?:\\x[\da-f]{2}|\\o[0-3][0-7][0-7]|\\\d{3}|\\.|[^'\\\r\n])'/,greedy:!0},constructor:/\b[A-Z]\w*\b(?!\s*\.)/,label:{pattern:/\b[a-z]\w*(?=::)/,alias:"symbol"}}),delete r.languages.reason.function,function(e){e.languages.sass=e.languages.extend("css",{comment:{pattern:/^([ \t]*)\/[\/*].*(?:(?:\r?\n|\r)\1[ \t].+)*/m,lookbehind:!0,greedy:!0}}),e.languages.insertBefore("sass","atrule",{"atrule-line":{pattern:/^(?:[ \t]*)[@+=].+/m,greedy:!0,inside:{atrule:/(?:@[\w-]+|[+=])/}}}),delete e.languages.sass.atrule;var t=/\$[-\w]+|#\{\$[-\w]+\}/,a=[/[+*\/%]|[=!]=|<=?|>=?|\b(?:and|not|or)\b/,{pattern:/(\s)-(?=\s)/,lookbehind:!0}];e.languages.insertBefore("sass","property",{"variable-line":{pattern:/^[ \t]*\$.+/m,greedy:!0,inside:{punctuation:/:/,variable:t,operator:a}},"property-line":{pattern:/^[ \t]*(?:[^:\s]+ *:.*|:[^:\s].*)/m,greedy:!0,inside:{property:[/[^:\s]+(?=\s*:)/,{pattern:/(:)[^:\s]+/,lookbehind:!0}],punctuation:/:/,variable:t,operator:a,important:e.languages.sass.important}}}),delete e.languages.sass.property,delete e.languages.sass.important,e.languages.insertBefore("sass","punctuation",{selector:{pattern:/^([ \t]*)\S(?:,[^,\r\n]+|[^,\r\n]*)(?:,[^,\r\n]+)*(?:,(?:\r?\n|\r)\1[ \t]+\S(?:,[^,\r\n]+|[^,\r\n]*)(?:,[^,\r\n]+)*)*/m,lookbehind:!0,greedy:!0}})}(r),r.languages.scss=r.languages.extend("css",{comment:{pattern:/(^|[^\\])(?:\/\*[\s\S]*?\*\/|\/\/.*)/,lookbehind:!0},atrule:{pattern:/@[\w-](?:\([^()]+\)|[^()\s]|\s+(?!\s))*?(?=\s+[{;])/,inside:{rule:/@[\w-]+/}},url:/(?:[-a-z]+-)?url(?=\()/i,selector:{pattern:/(?=\S)[^@;{}()]?(?:[^@;{}()\s]|\s+(?!\s)|#\{\$[-\w]+\})+(?=\s*\{(?:\}|\s|[^}][^:{}]*[:{][^}]))/,inside:{parent:{pattern:/&/,alias:"important"},placeholder:/%[-\w]+/,variable:/\$[-\w]+|#\{\$[-\w]+\}/}},property:{pattern:/(?:[-\w]|\$[-\w]|#\{\$[-\w]+\})+(?=\s*:)/,inside:{variable:/\$[-\w]+|#\{\$[-\w]+\}/}}}),r.languages.insertBefore("scss","atrule",{keyword:[/@(?:content|debug|each|else(?: if)?|extend|for|forward|function|if|import|include|mixin|return|use|warn|while)\b/i,{pattern:/( )(?:from|through)(?= )/,lookbehind:!0}]}),r.languages.insertBefore("scss","important",{variable:/\$[-\w]+|#\{\$[-\w]+\}/}),r.languages.insertBefore("scss","function",{"module-modifier":{pattern:/\b(?:as|hide|show|with)\b/i,alias:"keyword"},placeholder:{pattern:/%[-\w]+/,alias:"selector"},statement:{pattern:/\B!(?:default|optional)\b/i,alias:"keyword"},boolean:/\b(?:false|true)\b/,null:{pattern:/\bnull\b/,alias:"keyword"},operator:{pattern:/(\s)(?:[-+*\/%]|[=!]=|<=?|>=?|and|not|or)(?=\s)/,lookbehind:!0}}),r.languages.scss.atrule.inside.rest=r.languages.scss,function(e){var t={pattern:/(\b\d+)(?:%|[a-z]+)/,lookbehind:!0},a={pattern:/(^|[^\w.-])-?(?:\d+(?:\.\d+)?|\.\d+)/,lookbehind:!0},n={comment:{pattern:/(^|[^\\])(?:\/\*[\s\S]*?\*\/|\/\/.*)/,lookbehind:!0},url:{pattern:/\burl\((["']?).*?\1\)/i,greedy:!0},string:{pattern:/("|')(?:(?!\1)[^\\\r\n]|\\(?:\r\n|[\s\S]))*\1/,greedy:!0},interpolation:null,func:null,important:/\B!(?:important|optional)\b/i,keyword:{pattern:/(^|\s+)(?:(?:else|for|if|return|unless)(?=\s|$)|@[\w-]+)/,lookbehind:!0},hexcode:/#[\da-f]{3,6}/i,color:[/\b(?:AliceBlue|AntiqueWhite|Aqua|Aquamarine|Azure|Beige|Bisque|Black|BlanchedAlmond|Blue|BlueViolet|Brown|BurlyWood|CadetBlue|Chartreuse|Chocolate|Coral|CornflowerBlue|Cornsilk|Crimson|Cyan|DarkBlue|DarkCyan|DarkGoldenRod|DarkGr[ae]y|DarkGreen|DarkKhaki|DarkMagenta|DarkOliveGreen|DarkOrange|DarkOrchid|DarkRed|DarkSalmon|DarkSeaGreen|DarkSlateBlue|DarkSlateGr[ae]y|DarkTurquoise|DarkViolet|DeepPink|DeepSkyBlue|DimGr[ae]y|DodgerBlue|FireBrick|FloralWhite|ForestGreen|Fuchsia|Gainsboro|GhostWhite|Gold|GoldenRod|Gr[ae]y|Green|GreenYellow|HoneyDew|HotPink|IndianRed|Indigo|Ivory|Khaki|Lavender|LavenderBlush|LawnGreen|LemonChiffon|LightBlue|LightCoral|LightCyan|LightGoldenRodYellow|LightGr[ae]y|LightGreen|LightPink|LightSalmon|LightSeaGreen|LightSkyBlue|LightSlateGr[ae]y|LightSteelBlue|LightYellow|Lime|LimeGreen|Linen|Magenta|Maroon|MediumAquaMarine|MediumBlue|MediumOrchid|MediumPurple|MediumSeaGreen|MediumSlateBlue|MediumSpringGreen|MediumTurquoise|MediumVioletRed|MidnightBlue|MintCream|MistyRose|Moccasin|NavajoWhite|Navy|OldLace|Olive|OliveDrab|Orange|OrangeRed|Orchid|PaleGoldenRod|PaleGreen|PaleTurquoise|PaleVioletRed|PapayaWhip|PeachPuff|Peru|Pink|Plum|PowderBlue|Purple|Red|RosyBrown|RoyalBlue|SaddleBrown|Salmon|SandyBrown|SeaGreen|SeaShell|Sienna|Silver|SkyBlue|SlateBlue|SlateGr[ae]y|Snow|SpringGreen|SteelBlue|Tan|Teal|Thistle|Tomato|Transparent|Turquoise|Violet|Wheat|White|WhiteSmoke|Yellow|YellowGreen)\b/i,{pattern:/\b(?:hsl|rgb)\(\s*\d{1,3}\s*,\s*\d{1,3}%?\s*,\s*\d{1,3}%?\s*\)\B|\b(?:hsl|rgb)a\(\s*\d{1,3}\s*,\s*\d{1,3}%?\s*,\s*\d{1,3}%?\s*,\s*(?:0|0?\.\d+|1)\s*\)\B/i,inside:{unit:t,number:a,function:/[\w-]+(?=\()/,punctuation:/[(),]/}}],entity:/\\[\da-f]{1,8}/i,unit:t,boolean:/\b(?:false|true)\b/,operator:[/~|[+!\/%<>?=]=?|[-:]=|\*[*=]?|\.{2,3}|&&|\|\||\B-\B|\b(?:and|in|is(?: a| defined| not|nt)?|not|or)\b/],number:a,punctuation:/[{}()\[\];:,]/};n.interpolation={pattern:/\{[^\r\n}:]+\}/,alias:"variable",inside:{delimiter:{pattern:/^\{|\}$/,alias:"punctuation"},rest:n}},n.func={pattern:/[\w-]+\([^)]*\).*/,inside:{function:/^[^(]+/,rest:n}},e.languages.stylus={"atrule-declaration":{pattern:/(^[ \t]*)@.+/m,lookbehind:!0,inside:{atrule:/^@[\w-]+/,rest:n}},"variable-declaration":{pattern:/(^[ \t]*)[\w$-]+\s*.?=[ \t]*(?:\{[^{}]*\}|\S.*|$)/m,lookbehind:!0,inside:{variable:/^\S+/,rest:n}},statement:{pattern:/(^[ \t]*)(?:else|for|if|return|unless)[ \t].+/m,lookbehind:!0,inside:{keyword:/^\S+/,rest:n}},"property-declaration":{pattern:/((?:^|\{)([ \t]*))(?:[\w-]|\{[^}\r\n]+\})+(?:\s*:\s*|[ \t]+)(?!\s)[^{\r\n]*(?:;|[^{\r\n,]$(?!(?:\r?\n|\r)(?:\{|\2[ \t])))/m,lookbehind:!0,inside:{property:{pattern:/^[^\s:]+/,inside:{interpolation:n.interpolation}},rest:n}},selector:{pattern:/(^[ \t]*)(?:(?=\S)(?:[^{}\r\n:()]|::?[\w-]+(?:\([^)\r\n]*\)|(?![\w-]))|\{[^}\r\n]+\})+)(?:(?:\r?\n|\r)(?:\1(?:(?=\S)(?:[^{}\r\n:()]|::?[\w-]+(?:\([^)\r\n]*\)|(?![\w-]))|\{[^}\r\n]+\})+)))*(?:,$|\{|(?=(?:\r?\n|\r)(?:\{|\1[ \t])))/m,lookbehind:!0,inside:{interpolation:n.interpolation,comment:n.comment,punctuation:/[{},]/}},func:n.func,string:n.string,comment:{pattern:/(^|[^\\])(?:\/\*[\s\S]*?\*\/|\/\/.*)/,lookbehind:!0,greedy:!0},interpolation:n.interpolation,punctuation:/[{}()\[\];:.]/}}(r),function(e){var t=e.util.clone(e.languages.typescript);e.languages.tsx=e.languages.extend("jsx",t),delete e.languages.tsx.parameter,delete e.languages.tsx["literal-property"];var a=e.languages.tsx.tag;a.pattern=RegExp(/(^|[^\w$]|(?=<\/))/.source+"(?:"+a.pattern.source+")",a.pattern.flags),a.lookbehind=!0}(r),r.languages.wasm={comment:[/\(;[\s\S]*?;\)/,{pattern:/;;.*/,greedy:!0}],string:{pattern:/"(?:\\[\s\S]|[^"\\])*"/,greedy:!0},keyword:[{pattern:/\b(?:align|offset)=/,inside:{operator:/=/}},{pattern:/\b(?:(?:f32|f64|i32|i64)(?:\.(?:abs|add|and|ceil|clz|const|convert_[su]\/i(?:32|64)|copysign|ctz|demote\/f64|div(?:_[su])?|eqz?|extend_[su]\/i32|floor|ge(?:_[su])?|gt(?:_[su])?|le(?:_[su])?|load(?:(?:8|16|32)_[su])?|lt(?:_[su])?|max|min|mul|neg?|nearest|or|popcnt|promote\/f32|reinterpret\/[fi](?:32|64)|rem_[su]|rot[lr]|shl|shr_[su]|sqrt|store(?:8|16|32)?|sub|trunc(?:_[su]\/f(?:32|64))?|wrap\/i64|xor))?|memory\.(?:grow|size))\b/,inside:{punctuation:/\./}},/\b(?:anyfunc|block|br(?:_if|_table)?|call(?:_indirect)?|data|drop|elem|else|end|export|func|get_(?:global|local)|global|if|import|local|loop|memory|module|mut|nop|offset|param|result|return|select|set_(?:global|local)|start|table|tee_local|then|type|unreachable)\b/],variable:/\$[\w!#$%&'*+\-./:<=>?@\\^`|~]+/,number:/[+-]?\b(?:\d(?:_?\d)*(?:\.\d(?:_?\d)*)?(?:[eE][+-]?\d(?:_?\d)*)?|0x[\da-fA-F](?:_?[\da-fA-F])*(?:\.[\da-fA-F](?:_?[\da-fA-D])*)?(?:[pP][+-]?\d(?:_?\d)*)?)\b|\binf\b|\bnan(?::0x[\da-fA-F](?:_?[\da-fA-D])*)?\b/,punctuation:/[()]/};const o=r},75538:()=>{!function(e){var t={pattern:/((?:^|[^\\$])(?:\\{2})*)\$(?:\w+|\{[^{}]*\})/,lookbehind:!0,inside:{"interpolation-punctuation":{pattern:/^\$\{?|\}$/,alias:"punctuation"},expression:{pattern:/[\s\S]+/,inside:null}}};e.languages.groovy=e.languages.extend("clike",{string:{pattern:/'''(?:[^\\]|\\[\s\S])*?'''|'(?:\\.|[^\\'\r\n])*'/,greedy:!0},keyword:/\b(?:abstract|as|assert|boolean|break|byte|case|catch|char|class|const|continue|def|default|do|double|else|enum|extends|final|finally|float|for|goto|if|implements|import|in|instanceof|int|interface|long|native|new|package|private|protected|public|return|short|static|strictfp|super|switch|synchronized|this|throw|throws|trait|transient|try|void|volatile|while)\b/,number:/\b(?:0b[01_]+|0x[\da-f_]+(?:\.[\da-f_p\-]+)?|[\d_]+(?:\.[\d_]+)?(?:e[+-]?\d+)?)[glidf]?\b/i,operator:{pattern:/(^|[^.])(?:~|==?~?|\?[.:]?|\*(?:[.=]|\*=?)?|\.[@&]|\.\.<|\.\.(?!\.)|-[-=>]?|\+[+=]?|!=?|<(?:<=?|=>?)?|>(?:>>?=?|=)?|&[&=]?|\|[|=]?|\/=?|\^=?|%=?)/,lookbehind:!0},punctuation:/\.+|[{}[\];(),:$]/}),e.languages.insertBefore("groovy","string",{shebang:{pattern:/#!.+/,alias:"comment",greedy:!0},"interpolation-string":{pattern:/"""(?:[^\\]|\\[\s\S])*?"""|(["/])(?:\\.|(?!\1)[^\\\r\n])*\1|\$\/(?:[^/$]|\$(?:[/$]|(?![/$]))|\/(?!\$))*\/\$/,greedy:!0,inside:{interpolation:t,string:/[\s\S]+/}}}),e.languages.insertBefore("groovy","punctuation",{"spock-block":/\b(?:and|cleanup|expect|given|setup|then|when|where):/}),e.languages.insertBefore("groovy","function",{annotation:{pattern:/(^|[^.])@\w+/,lookbehind:!0,alias:"punctuation"}}),t.inside.expression.inside=e.languages.groovy}(Prism)},96976:()=>{!function(e){var t=/\b(?:abstract|assert|boolean|break|byte|case|catch|char|class|const|continue|default|do|double|else|enum|exports|extends|final|finally|float|for|goto|if|implements|import|instanceof|int|interface|long|module|native|new|non-sealed|null|open|opens|package|permits|private|protected|provides|public|record(?!\s*[(){}[\]<>=%~.:,;?+\-*/&|^])|requires|return|sealed|short|static|strictfp|super|switch|synchronized|this|throw|throws|to|transient|transitive|try|uses|var|void|volatile|while|with|yield)\b/,a=/(?:[a-z]\w*\s*\.\s*)*(?:[A-Z]\w*\s*\.\s*)*/.source,n={pattern:RegExp(/(^|[^\w.])/.source+a+/[A-Z](?:[\d_A-Z]*[a-z]\w*)?\b/.source),lookbehind:!0,inside:{namespace:{pattern:/^[a-z]\w*(?:\s*\.\s*[a-z]\w*)*(?:\s*\.)?/,inside:{punctuation:/\./}},punctuation:/\./}};e.languages.java=e.languages.extend("clike",{string:{pattern:/(^|[^\\])"(?:\\.|[^"\\\r\n])*"/,lookbehind:!0,greedy:!0},"class-name":[n,{pattern:RegExp(/(^|[^\w.])/.source+a+/[A-Z]\w*(?=\s+\w+\s*[;,=()]|\s*(?:\[[\s,]*\]\s*)?::\s*new\b)/.source),lookbehind:!0,inside:n.inside},{pattern:RegExp(/(\b(?:class|enum|extends|implements|instanceof|interface|new|record|throws)\s+)/.source+a+/[A-Z]\w*\b/.source),lookbehind:!0,inside:n.inside}],keyword:t,function:[e.languages.clike.function,{pattern:/(::\s*)[a-z_]\w*/,lookbehind:!0}],number:/\b0b[01][01_]*L?\b|\b0x(?:\.[\da-f_p+-]+|[\da-f_]+(?:\.[\da-f_p+-]+)?)\b|(?:\b\d[\d_]*(?:\.[\d_]*)?|\B\.\d[\d_]*)(?:e[+-]?\d[\d_]*)?[dfl]?/i,operator:{pattern:/(^|[^.])(?:<<=?|>>>?=?|->|--|\+\+|&&|\|\||::|[?:~]|[-+*/%&|^!=<>]=?)/m,lookbehind:!0},constant:/\b[A-Z][A-Z_\d]+\b/}),e.languages.insertBefore("java","string",{"triple-quoted-string":{pattern:/"""[ \t]*[\r\n](?:(?:"|"")?(?:\\.|[^"\\]))*"""/,greedy:!0,alias:"string"},char:{pattern:/'(?:\\.|[^'\\\r\n]){1,6}'/,greedy:!0}}),e.languages.insertBefore("java","class-name",{annotation:{pattern:/(^|[^.])@\w+(?:\s*\.\s*\w+)*/,lookbehind:!0,alias:"punctuation"},generics:{pattern:/<(?:[\w\s,.?]|&(?!&)|<(?:[\w\s,.?]|&(?!&)|<(?:[\w\s,.?]|&(?!&)|<(?:[\w\s,.?]|&(?!&))*>)*>)*>)*>/,inside:{"class-name":n,keyword:t,punctuation:/[<>(),.:]/,operator:/[?&|]/}},import:[{pattern:RegExp(/(\bimport\s+)/.source+a+/(?:[A-Z]\w*|\*)(?=\s*;)/.source),lookbehind:!0,inside:{namespace:n.inside.namespace,punctuation:/\./,operator:/\*/,"class-name":/\w+/}},{pattern:RegExp(/(\bimport\s+static\s+)/.source+a+/(?:\w+|\*)(?=\s*;)/.source),lookbehind:!0,alias:"static",inside:{namespace:n.inside.namespace,static:/\b\w+$/,punctuation:/\./,operator:/\*/,"class-name":/\w+/}}],namespace:{pattern:RegExp(/(\b(?:exports|import(?:\s+static)?|module|open|opens|package|provides|requires|to|transitive|uses|with)\s+)(?!)[a-z]\w*(?:\.[a-z]\w*)*\.?/.source.replace(//g,(function(){return t.source}))),lookbehind:!0,inside:{punctuation:/\./}}})}(Prism)},64252:()=>{Prism.languages.scala=Prism.languages.extend("java",{"triple-quoted-string":{pattern:/"""[\s\S]*?"""/,greedy:!0,alias:"string"},string:{pattern:/("|')(?:\\.|(?!\1)[^\\\r\n])*\1/,greedy:!0},keyword:/<-|=>|\b(?:abstract|case|catch|class|def|derives|do|else|enum|extends|extension|final|finally|for|forSome|given|if|implicit|import|infix|inline|lazy|match|new|null|object|opaque|open|override|package|private|protected|return|sealed|self|super|this|throw|trait|transparent|try|type|using|val|var|while|with|yield)\b/,number:/\b0x(?:[\da-f]*\.)?[\da-f]+|(?:\b\d+(?:\.\d*)?|\B\.\d+)(?:e\d+)?[dfl]?/i,builtin:/\b(?:Any|AnyRef|AnyVal|Boolean|Byte|Char|Double|Float|Int|Long|Nothing|Short|String|Unit)\b/,symbol:/'[^\d\s\\]\w*/}),Prism.languages.insertBefore("scala","triple-quoted-string",{"string-interpolation":{pattern:/\b[a-z]\w*(?:"""(?:[^$]|\$(?:[^{]|\{(?:[^{}]|\{[^{}]*\})*\}))*?"""|"(?:[^$"\r\n]|\$(?:[^{]|\{(?:[^{}]|\{[^{}]*\})*\}))*")/i,greedy:!0,inside:{id:{pattern:/^\w+/,greedy:!0,alias:"function"},escape:{pattern:/\\\$"|\$[$"]/,greedy:!0,alias:"symbol"},interpolation:{pattern:/\$(?:\w+|\{(?:[^{}]|\{[^{}]*\})*\})/,greedy:!0,inside:{punctuation:/^\$\{?|\}$/,expression:{pattern:/[\s\S]+/,inside:Prism.languages.scala}}},string:/[\s\S]+/}}}),delete Prism.languages.scala["class-name"],delete Prism.languages.scala.function,delete Prism.languages.scala.constant},56864:(e,t,a)=>{var n={"./prism-groovy":75538,"./prism-java":96976,"./prism-scala":64252};function r(e){var t=o(e);return a(t)}function o(e){if(!a.o(n,e)){var t=new Error("Cannot find module '"+e+"'");throw t.code="MODULE_NOT_FOUND",t}return n[e]}r.keys=function(){return Object.keys(n)},r.resolve=o,e.exports=r,r.id=56864},2694:(e,t,a)=>{"use strict";var n=a(6925);function r(){}function o(){}o.resetWarningCache=r,e.exports=function(){function e(e,t,a,r,o,i){if(i!==n){var s=new Error("Calling PropTypes validators directly is not supported by the `prop-types` package. Use PropTypes.checkPropTypes() to call them. Read more at http://fb.me/use-check-prop-types");throw s.name="Invariant Violation",s}}function t(){return e}e.isRequired=e;var a={array:e,bigint:e,bool:e,func:e,number:e,object:e,string:e,symbol:e,any:e,arrayOf:t,element:e,elementType:e,instanceOf:t,node:e,objectOf:t,oneOf:t,oneOfType:t,shape:t,exact:t,checkPropTypes:o,resetWarningCache:r};return a.PropTypes=a,a}},5556:(e,t,a)=>{e.exports=a(2694)()},6925:e=>{"use strict";e.exports="SECRET_DO_NOT_PASS_THIS_OR_YOU_WILL_BE_FIRED"},22551:(e,t,a)=>{"use strict";var n=a(96540),r=a(45228),o=a(69982);function i(e){for(var t="https://reactjs.org/docs/error-decoder.html?invariant="+e,a=1;a