diff --git a/README.md b/README.md deleted file mode 100644 index 78a4c64f..00000000 --- a/README.md +++ /dev/null @@ -1 +0,0 @@ -# data-platform-field-docs diff --git a/_images/VAST_Data_Source_dialog_1.png b/_images/VAST_Data_Source_dialog_1.png new file mode 100644 index 00000000..187998ff Binary files /dev/null and b/_images/VAST_Data_Source_dialog_1.png differ diff --git a/_images/VAST_Data_Source_dialog_2.png b/_images/VAST_Data_Source_dialog_2.png new file mode 100644 index 00000000..7cee62f0 Binary files /dev/null and b/_images/VAST_Data_Source_dialog_2.png differ diff --git a/_images/awscredprovider.png b/_images/awscredprovider.png new file mode 100644 index 00000000..4110010c Binary files /dev/null and b/_images/awscredprovider.png differ diff --git a/_images/common_options.png b/_images/common_options.png new file mode 100644 index 00000000..8a7e4f2e Binary files /dev/null and b/_images/common_options.png differ diff --git a/_images/importvastdb.png b/_images/importvastdb.png new file mode 100644 index 00000000..4a5d669b Binary files /dev/null and b/_images/importvastdb.png differ diff --git a/_images/lists3processor.png b/_images/lists3processor.png new file mode 100644 index 00000000..ab7e7ba2 Binary files /dev/null and b/_images/lists3processor.png differ diff --git a/_images/overview.png b/_images/overview.png new file mode 100644 index 00000000..1b9da90b Binary files /dev/null and b/_images/overview.png differ diff --git a/_images/sparkui.png b/_images/sparkui.png new file mode 100644 index 00000000..312b184b Binary files /dev/null and b/_images/sparkui.png differ diff --git a/_sources/intro.md b/_sources/intro.md new file mode 100644 index 00000000..b97d7e4e --- /dev/null +++ b/_sources/intro.md @@ -0,0 +1,5 @@ +# Data Platform Field Docs + +Welcome to the VAST Data Platform Field Documentation + +This site is a comprehensive collection of documentation curated by VAST Data’s Field Engineering team. Our goal is to help accelerate your journey with VAST, offering insights, best practices, and technical guidance to ensure you maximize the potential of VAST’s cutting-edge technology. Whether you’re deploying, optimizing, or scaling with VAST, this resource will provide you with the tools and knowledge you need to succeed. Explore the documentation to discover powerful solutions, practical examples, and expert advice tailored to your needs. diff --git a/_sources/vast_catalog/overview.md b/_sources/vast_catalog/overview.md new file mode 100644 index 00000000..dabc56a7 --- /dev/null +++ b/_sources/vast_catalog/overview.md @@ -0,0 +1,7 @@ +# Vast Catalog Introduction + +The VAST Catalog is the foundation for a Semantic Layer. This layer, composed on top of files and objects, eliminates the need to create or maintain separate systems. Now line of business and domain experts can quickly discover and access data using standard search terms. + +See also: + +- Blog: [VAST Catalog: Treat Your File System Like a Database](https://www.vastdata.com/blog/vast-catalog-treat-your-file-system-like-a-database) diff --git a/_sources/vast_catalog/python-sdk-vast-catalog.ipynb b/_sources/vast_catalog/python-sdk-vast-catalog.ipynb new file mode 100644 index 00000000..b2145df1 --- /dev/null +++ b/_sources/vast_catalog/python-sdk-vast-catalog.ipynb @@ -0,0 +1,1849 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "55877738", + "metadata": {}, + "source": [ + "# Python SDK Reference\n", + "\n", + "**WARNING**: This notebook is in the process of being migrated to Vast Data Platform Field Docs. It will probably not run yet." + ] + }, + { + "cell_type": "markdown", + "id": "97c45f3e", + "metadata": {}, + "source": [ + "## Deploy boto3 library for S3\n", + "\n", + "We will download the boto3 standard S3 library to get data into the S3 compatible VAST Datastore" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e32e952e", + "metadata": {}, + "outputs": [], + "source": [ + "!pip install --quiet boto3 | tail -5" + ] + }, + { + "cell_type": "markdown", + "id": "7f33ed49", + "metadata": {}, + "source": [ + "## Setup demo environment" + ] + }, + { + "cell_type": "markdown", + "id": "cae3b134", + "metadata": {}, + "source": [ + "Setup Python's behavior to be more strict, including raising errors for some cases where variables are accidentally undefined." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "70414184", + "metadata": {}, + "outputs": [], + "source": [ + "from __future__ import annotations # Enable stricter type checking" + ] + }, + { + "cell_type": "markdown", + "id": "c1dd49f8", + "metadata": {}, + "source": [ + "Import some utility functions for this demo." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d1614f62", + "metadata": {}, + "outputs": [], + "source": [ + "import sys \n", + "sys.path.insert(1, '../misc/')\n", + "\n", + "from vastdb_demo_util import get_connection_vars, upload_to_s3, delete_s3_object, list_objects_in_bucket" + ] + }, + { + "cell_type": "markdown", + "id": "74bf128e-5b91-41e0-965b-7769192363b0", + "metadata": {}, + "source": [ + "## Upload S3 Object" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "5b61ebd5-8561-4a04-ba30-386a4e0945aa", + "metadata": { + "execution": { + "iopub.execute_input": "2024-03-13T12:23:00.192680Z", + "iopub.status.busy": "2024-03-13T12:23:00.191972Z", + "iopub.status.idle": "2024-03-13T12:23:00.249357Z", + "shell.execute_reply": "2024-03-13T12:23:00.248658Z", + "shell.execute_reply.started": "2024-03-13T12:23:00.192618Z" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Uploaded Object\n", + "Objects in bucket vastdb with prefix '':\n", + "nyc-taxi/2011-01.data.parquet\n", + "nyc-taxi/2011-02.data.parquet\n", + "nyc-taxi/2011-03.data.parquet\n", + "nyc-taxi/2011-04.data.parquet\n", + "nyc-taxi/2011-05.data.parquet\n", + "nyc-taxi/2011-06.data.parquet\n", + "nyc-taxi/2011-07.data.parquet\n", + "nyc-taxi/2011-08.data.parquet\n", + "nyc-taxi/2011-09.data.parquet\n", + "nyc-taxi/2011-10.data.parquet\n", + "nyc-taxi/2011-11.data.parquet\n", + "nyc-taxi/2011-12.data.parquet\n", + "nyc-taxi/2012-01.data.parquet\n", + "nyc-taxi/2012-02.data.parquet\n", + "nyc-taxi/2012-03.data.parquet\n", + "nyc-taxi/2012-04.data.parquet\n", + "nyc-taxi/2012-05.data.parquet\n", + "nyc-taxi/2012-06.data.parquet\n", + "nyc-taxi/2012-07.data.parquet\n", + "nyc-taxi/2012-08.data.parquet\n", + "nyc-taxi/2012-09.data.parquet\n", + "nyc-taxi/2012-10.data.parquet\n", + "nyc-taxi/2012-11.data.parquet\n", + "nyc-taxi/2012-12.data.parquet\n", + "nyc-taxi/2013-01.data.parquet\n", + "nyc-taxi/2013-02.data.parquet\n", + "nyc-taxi/2013-03.data.parquet\n", + "nyc-taxi/2013-04.data.parquet\n", + "nyc-taxi/2013-05.data.parquet\n", + "nyc-taxi/2013-06.data.parquet\n", + "nyc-taxi/2013-07.data.parquet\n", + "nyc-taxi/2013-08.data.parquet\n", + "nyc-taxi/2013-09.data.parquet\n", + "nyc-taxi/2013-10.data.parquet\n", + "nyc-taxi/2013-11.data.parquet\n", + "nyc-taxi/2013-12.data.parquet\n", + "nyc-taxi/2014-01.data.parquet\n", + "nyc-taxi/2014-02.data.parquet\n", + "nyc-taxi/2014-03.data.parquet\n", + "nyc-taxi/2014-04.data.parquet\n", + "nyc-taxi/2014-05.data.parquet\n", + "nyc-taxi/2014-06.data.parquet\n", + "nyc-taxi/2014-07.data.parquet\n", + "nyc-taxi/2014-08.data.parquet\n", + "nyc-taxi/2014-09.data.parquet\n", + "nyc-taxi/2014-10.data.parquet\n", + "nyc-taxi/2014-11.data.parquet\n", + "nyc-taxi/2014-12.data.parquet\n", + "nyc-taxi/2015-01.data.parquet\n", + "nyc-taxi/2015-02.data.parquet\n", + "nyc-taxi/2015-03.data.parquet\n", + "nyc-taxi/2015-04.data.parquet\n", + "nyc-taxi/2015-05.data.parquet\n", + "nyc-taxi/2015-06.data.parquet\n", + "nyc-taxi/2015-07.data.parquet\n", + "nyc-taxi/2015-08.data.parquet\n", + "nyc-taxi/2015-09.data.parquet\n", + "nyc-taxi/2015-10.data.parquet\n", + "nyc-taxi/2015-11.data.parquet\n", + "nyc-taxi/2015-12.data.parquet\n", + "nyc-taxi/2016-01.data.parquet\n", + "nyc-taxi/2016-02.data.parquet\n", + "nyc-taxi/2016-03.data.parquet\n", + "nyc-taxi/2016-04.data.parquet\n", + "nyc-taxi/2016-05.data.parquet\n", + "nyc-taxi/2016-06.data.parquet\n", + "nyc-taxi/2016-07.data.parquet\n", + "nyc-taxi/2016-08.data.parquet\n", + "nyc-taxi/2016-09.data.parquet\n", + "nyc-taxi/2016-10.data.parquet\n", + "nyc-taxi/2016-11.data.parquet\n", + "nyc-taxi/2016-12.data.parquet\n", + "nyc-taxi/2017-01.data.parquet\n", + "nyc-taxi/2017-02.data.parquet\n", + "nyc-taxi/2017-03.data.parquet\n", + "nyc-taxi/2017-04.data.parquet\n", + "nyc-taxi/2017-05.data.parquet\n", + "nyc-taxi/2017-06.data.parquet\n", + "nyc-taxi/2017-07.data.parquet\n", + "nyc-taxi/2017-08.data.parquet\n", + "nyc-taxi/2017-09.data.parquet\n", + "nyc-taxi/2017-10.data.parquet\n", + "nyc-taxi/2017-11.data.parquet\n", + "nyc-taxi/2017-12.data.parquet\n", + "nyc-taxi/2018-01.data.parquet\n", + "nyc-taxi/2018-02.data.parquet\n", + "nyc-taxi/2018-03.data.parquet\n", + "nyc-taxi/2018-04.data.parquet\n", + "nyc-taxi/2018-05.data.parquet\n", + "nyc-taxi/2018-06.data.parquet\n", + "nyc-taxi/2018-07.data.parquet\n", + "nyc-taxi/2018-08.data.parquet\n", + "nyc-taxi/2018-09.data.parquet\n", + "nyc-taxi/2018-10.data.parquet\n", + "nyc-taxi/2018-11.data.parquet\n", + "nyc-taxi/2018-12.data.parquet\n", + "nyc-taxi/2019-01.data.parquet\n", + "nyc-taxi/2019-02.data.parquet\n", + "nyc-taxi/2019-03.data.parquet\n", + "nyc-taxi/2019-04.data.parquet\n", + "nyc-taxi/2019-05.data.parquet\n", + "nyc-taxi/2019-06.data.parquet\n", + "tabular_schema_table\n", + "tpcds/\n", + "vastschema/taxi\n" + ] + } + ], + "source": [ + "import os\n", + "import boto3\n", + "from botocore.exceptions import NoCredentialsError\n", + "\n", + "#upload_to_s3(local_file_path, bucket_name, s3_file_key, aws_access_key_id, aws_secret_access_key, s3_endpoint)\n", + "\n", + "# List objects in the bucket\n", + "print(\"Uploaded Object\")\n", + "list_objects_in_bucket(bucket_name, aws_access_key_id, aws_secret_access_key, s3_endpoint, prefix='')" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "1bb625a2-5204-4572-bb03-cd69aa2c8df7", + "metadata": { + "execution": { + "iopub.execute_input": "2024-03-13T12:22:48.238175Z", + "iopub.status.busy": "2024-03-13T12:22:48.236861Z", + "iopub.status.idle": "2024-03-13T12:22:48.258636Z", + "shell.execute_reply": "2024-03-13T12:22:48.257208Z", + "shell.execute_reply.started": "2024-03-13T12:22:48.238109Z" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Uploaded Object\n", + "No objects found in bucket vastdb with prefix 'labimage4.png'.\n" + ] + } + ], + "source": [ + "print(\"Uploaded Object\")\n", + "list_objects_in_bucket(bucket_name, aws_access_key_id, aws_secret_access_key, s3_endpoint, prefix='labimage4.png')" + ] + }, + { + "cell_type": "markdown", + "id": "fae3669f-311f-4f34-a3f8-ad7d9097ef5b", + "metadata": {}, + "source": [ + "### Add user tags to existing files/objects using S3 API" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "3578579c-ebff-40d3-a00b-91df474f0e6b", + "metadata": { + "execution": { + "iopub.execute_input": "2024-03-13T13:38:50.822735Z", + "iopub.status.busy": "2024-03-13T13:38:50.821838Z", + "iopub.status.idle": "2024-03-13T13:38:50.855109Z", + "shell.execute_reply": "2024-03-13T13:38:50.854515Z", + "shell.execute_reply.started": "2024-03-13T13:38:50.822675Z" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tags added successfully to S3 object: s3://vastdb/nyc-taxi/2015-10.data.parquet\n" + ] + } + ], + "source": [ + "import boto3\n", + "from botocore.exceptions import NoCredentialsError\n", + "\n", + "def add_tags_to_s3_object(access_key, secret_key, endpoint_url, bucket_name, key, tags):\n", + " # Create an S3 client with custom configurations\n", + " s3 = boto3.client(\n", + " 's3',\n", + " aws_access_key_id=access_key,\n", + " aws_secret_access_key=secret_key,\n", + " endpoint_url=endpoint_url,\n", + " verify=False # Set to False to disable SSL verification\n", + " )\n", + "\n", + " # Add tags to the existing S3 object\n", + " s3.put_object_tagging(\n", + " Bucket=bucket_name,\n", + " Key=key,\n", + " Tagging={'TagSet': tags}\n", + " )\n", + "\n", + "if __name__ == \"__main__\":\n", + " # Replace these values with your AWS credentials, S3 endpoint, bucket name, object key, and desired tags\n", + " aws_access_key = 'BGU86QBVQBX0A71S4PTL'\n", + " aws_secret_key = 'LOUtdwqsgoyUxc5EG+204RRi1gQqbm1wNWJGMbJ0'\n", + " s3_endpoint = 'http://localhost:55555' # e.g., 'https://s3.example.com'\n", + " bucket_name = 'vastdb'\n", + " object_key = 'nyc-taxi/2015-10.data.parquet'\n", + " user_defined_tags = [\n", + " {'Key': 'Location', 'Value': 'Boston'},\n", + " {'Key': 'Lab_Device_ID', 'Value': '11111'},\n", + " {'Key': 'Type', 'Value': 'xRNA'},\n", + " {'Key': 'Experiment_ID', 'Value': '3333'},\n", + " {'Key': 'Status', 'Value': 'Processing'}\n", + " # Add more tags as needed\n", + " ]\n", + "\n", + " try:\n", + " add_tags_to_s3_object(aws_access_key, aws_secret_key, s3_endpoint, bucket_name, object_key, user_defined_tags)\n", + " print(f\"Tags added successfully to S3 object: s3://{bucket_name}/{object_key}\")\n", + " except NoCredentialsError:\n", + " print(\"AWS credentials not available.\")\n", + " except Exception as e:\n", + " print(f\"Error adding tags: {e}\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "7cc45b0b-7e3c-4503-9cfa-af7db7507fa0", + "metadata": { + "execution": { + "iopub.execute_input": "2024-03-13T15:13:42.387162Z", + "iopub.status.busy": "2024-03-13T15:13:42.385508Z", + "iopub.status.idle": "2024-03-13T15:13:42.394774Z", + "shell.execute_reply": "2024-03-13T15:13:42.392926Z", + "shell.execute_reply.started": "2024-03-13T15:13:42.387098Z" + }, + "tags": [] + }, + "source": [ + "## Get user tags to existing files/objects using S3 API" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "711cdd58-a3a9-4eba-b02d-41dc23c35f58", + "metadata": { + "execution": { + "iopub.execute_input": "2024-03-13T13:39:18.255283Z", + "iopub.status.busy": "2024-03-13T13:39:18.254636Z", + "iopub.status.idle": "2024-03-13T13:39:18.286670Z", + "shell.execute_reply": "2024-03-13T13:39:18.284373Z", + "shell.execute_reply.started": "2024-03-13T13:39:18.255226Z" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tags for S3 object: s3://vastdb/nyc-taxi/2018-06.data.parquet\n", + "Key: Experiment_ID, Value: 3333\n", + "Key: Lab_Device_ID, Value: 11111\n", + "Key: Location, Value: Basel\n", + "Key: Status, Value: Processing\n", + "Key: Type, Value: xRNA\n" + ] + } + ], + "source": [ + "import boto3\n", + "from botocore.exceptions import NoCredentialsError\n", + "\n", + "def list_tags_of_s3_object(access_key, secret_key, endpoint_url, bucket_name, key):\n", + " # Create an S3 client with custom configurations\n", + " s3 = boto3.client(\n", + " 's3',\n", + " aws_access_key_id=access_key,\n", + " aws_secret_access_key=secret_key,\n", + " endpoint_url=endpoint_url,\n", + " verify=False # Set to False to disable SSL verification\n", + " )\n", + "\n", + " # Get the tags associated with the S3 object\n", + " response = s3.get_object_tagging(\n", + " Bucket=bucket_name,\n", + " Key=key\n", + " )\n", + "\n", + " return response['TagSet']\n", + "\n", + "if __name__ == \"__main__\":\n", + " # Replace these values with your AWS credentials, S3 endpoint, bucket name, and object key\n", + " aws_access_key = 'BGU86QBVQBX0A71S4PTL'\n", + " aws_secret_key = 'LOUtdwqsgoyUxc5EG+204RRi1gQqbm1wNWJGMbJ0'\n", + " s3_endpoint = 'http://localhost:55555' # e.g., 'https://s3.example.com'\n", + " bucket_name = 'vastdb'\n", + " object_key = 'nyc-taxi/2018-06.data.parquet'\n", + "\n", + " try:\n", + " object_tags = list_tags_of_s3_object(aws_access_key, aws_secret_key, s3_endpoint, bucket_name, object_key)\n", + " \n", + " # Print the tags associated with the S3 object\n", + " print(f\"Tags for S3 object: s3://{bucket_name}/{object_key}\")\n", + " for tag in object_tags:\n", + " print(f\"Key: {tag['Key']}, Value: {tag['Value']}\")\n", + " \n", + " except NoCredentialsError:\n", + " print(\"AWS credentials not available.\")\n", + " except Exception as e:\n", + " print(f\"Error listing tags: {e}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "20292766-0a25-46f9-9d95-d59f50cfb5ea", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "47172f3e-839c-4621-9910-bc8bc668e29d", + "metadata": {}, + "source": [ + "### Create VAST DB Catalog Session" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "fd430495-6500-4afe-bbd1-adc6e9356ae7", + "metadata": { + "execution": { + "iopub.execute_input": "2024-03-13T12:30:31.498419Z", + "iopub.status.busy": "2024-03-13T12:30:31.497727Z", + "iopub.status.idle": "2024-03-13T12:30:31.789569Z", + "shell.execute_reply": "2024-03-13T12:30:31.788792Z", + "shell.execute_reply.started": "2024-03-13T12:30:31.498357Z" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "from vastdb.api import VastdbApi\n", + "import pyarrow as pa\n", + "\n", + "def create_vastdb_session(access_key, secret_key):\n", + " return VastdbApi(host='localhost:55555', access_key=access_key, secret_key=secret_key)\n", + "\n", + "\n", + "bucket_name='vastdb'\n", + "schema_name='python-sdk'\n", + "table='pythonsdk'\n", + "\n", + "access_key='BGU86QBVQBX0A71S4PTL'\n", + "secret_key='LOUtdwqsgoyUxc5EG+204RRi1gQqbm1wNWJGMbJ0'\n", + "vastdb_session = create_vastdb_session(access_key, secret_key)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "7f6f67b5-fbae-47b6-958d-3e430b7f0e77", + "metadata": { + "execution": { + "iopub.execute_input": "2024-03-13T12:30:42.635238Z", + "iopub.status.busy": "2024-03-13T12:30:42.633581Z", + "iopub.status.idle": "2024-03-13T12:30:43.181064Z", + "shell.execute_reply": "2024-03-13T12:30:43.180320Z", + "shell.execute_reply.started": "2024-03-13T12:30:42.635166Z" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "field_names = ['element_type'] # Only need the element_type field for counting\n", + "table = vastdb_session.query('vast-big-catalog-bucket', 'vast_big_catalog_schema', 'vast_big_catalog_table', field_names=field_names, num_sub_splits=8)\n", + "df = table.to_pandas()" + ] + }, + { + "cell_type": "markdown", + "id": "c67e2259-ecaf-4e2a-9a5e-de3f05042184", + "metadata": {}, + "source": [ + "### How many elements are in the catalog\n" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "3cf3ae56-0737-4fb0-8b73-fb5867b032df", + "metadata": { + "execution": { + "iopub.execute_input": "2024-03-13T12:30:56.207438Z", + "iopub.status.busy": "2024-03-13T12:30:56.206081Z", + "iopub.status.idle": "2024-03-13T12:30:56.216512Z", + "shell.execute_reply": "2024-03-13T12:30:56.215325Z", + "shell.execute_reply.started": "2024-03-13T12:30:56.207371Z" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total elements in the catalog: 345\n" + ] + } + ], + "source": [ + "total_elements = len(df)\n", + "print(f\"Total elements in the catalog: {total_elements}\")" + ] + }, + { + "cell_type": "markdown", + "id": "627bc337-4f0f-4800-b590-70fd1a4ef27a", + "metadata": {}, + "source": [ + "### How many files/objects?\n" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "469562ed-9aff-4fcf-9baf-131366958c13", + "metadata": { + "execution": { + "iopub.execute_input": "2024-03-13T12:31:01.256380Z", + "iopub.status.busy": "2024-03-13T12:31:01.254952Z", + "iopub.status.idle": "2024-03-13T12:31:01.264410Z", + "shell.execute_reply": "2024-03-13T12:31:01.263695Z", + "shell.execute_reply.started": "2024-03-13T12:31:01.256273Z" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of files/objects: 163\n" + ] + } + ], + "source": [ + "file_count = len(df[df['element_type'] == 'FILE'])\n", + "print(f\"Number of files/objects: {file_count}\")" + ] + }, + { + "cell_type": "markdown", + "id": "071c92c2-4dae-41ec-ab8b-d434177d24f2", + "metadata": {}, + "source": [ + "### How many directories?\n" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "f3b01199-eeef-410b-a8f4-b41d607e902c", + "metadata": { + "execution": { + "iopub.execute_input": "2024-03-13T12:31:17.862220Z", + "iopub.status.busy": "2024-03-13T12:31:17.861452Z", + "iopub.status.idle": "2024-03-13T12:31:17.875324Z", + "shell.execute_reply": "2024-03-13T12:31:17.874509Z", + "shell.execute_reply.started": "2024-03-13T12:31:17.862193Z" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of directories: 179\n" + ] + } + ], + "source": [ + "dir_count = len(df[df['element_type'] == 'DIR'])\n", + "print(f\"Number of directories: {dir_count}\")" + ] + }, + { + "cell_type": "markdown", + "id": "1d87b37b-bffb-4d72-805a-bfcf57f80a9f", + "metadata": {}, + "source": [ + "### How many Database tables?" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "6619861f-a3da-4ffa-9cf8-cd77948e767f", + "metadata": { + "execution": { + "iopub.execute_input": "2024-03-13T12:31:18.407462Z", + "iopub.status.busy": "2024-03-13T12:31:18.406802Z", + "iopub.status.idle": "2024-03-13T12:31:18.416753Z", + "shell.execute_reply": "2024-03-13T12:31:18.416148Z", + "shell.execute_reply.started": "2024-03-13T12:31:18.407402Z" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of database tables: 2\n" + ] + } + ], + "source": [ + "table_count = len(df[df['element_type'] == 'TABLE'])\n", + "print(f\"Number of database tables: {table_count}\")" + ] + }, + { + "cell_type": "markdown", + "id": "a85c4e86-45ae-410e-89d1-8e9688eee85f", + "metadata": {}, + "source": [ + "### What are all of the elements on my system anyway?\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "834f14bb-cf91-4b47-aa32-fdcc5f2782ac", + "metadata": { + "execution": { + "iopub.execute_input": "2024-03-13T12:31:22.629293Z", + "iopub.status.busy": "2024-03-13T12:31:22.628729Z", + "iopub.status.idle": "2024-03-13T12:31:22.634016Z", + "shell.execute_reply": "2024-03-13T12:31:22.633317Z", + "shell.execute_reply.started": "2024-03-13T12:31:22.629262Z" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Distinct element types on the system:\n", + "['FILE' 'DIR' 'SCHEMA' 'TABLE']\n" + ] + } + ], + "source": [ + "distinct_elements = df['element_type'].unique()\n", + "print(\"Distinct element types on the system:\")\n", + "print(distinct_elements)" + ] + }, + { + "cell_type": "markdown", + "id": "d42cc7d7-9435-498c-9958-08cb5dc0c07b", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-07T17:12:48.624016Z", + "iopub.status.busy": "2024-01-07T17:12:48.623443Z", + "iopub.status.idle": "2024-01-07T17:12:48.628368Z", + "shell.execute_reply": "2024-01-07T17:12:48.627455Z", + "shell.execute_reply.started": "2024-01-07T17:12:48.623984Z" + }, + "tags": [] + }, + "source": [ + "### Simplified example of count of elements returned from parallel execution\n", + "\n", + "The query_iterator iteratively executes a query on a database table, returning results in chunks as PyArrow RecordBatches, enabling efficient handling of large datasets by processing data in smaller, manageable segments.\n", + "Simplified example of count of elements returned from parallel execution." + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "518d57d8-c16e-4c10-8bf9-bf8e20fdebe1", + "metadata": { + "execution": { + "iopub.execute_input": "2024-03-13T12:31:32.085675Z", + "iopub.status.busy": "2024-03-13T12:31:32.085148Z", + "iopub.status.idle": "2024-03-13T12:31:32.389202Z", + "shell.execute_reply": "2024-03-13T12:31:32.387545Z", + "shell.execute_reply.started": "2024-03-13T12:31:32.085644Z" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total elements in the catalog: 345\n" + ] + } + ], + "source": [ + "def query_and_count_elements(session, bucket, schema, table, field_names):\n", + " elements_count = 0\n", + "\n", + " for record_batch in session.query_iterator(bucket, schema, table, field_names=field_names, num_sub_splits=8):\n", + " elements_count += len(record_batch)\n", + "\n", + " return elements_count\n", + "\n", + "# Query Parameters\n", + "field_names = ['element_type'] # Only need the element_type field for counting\n", + "\n", + "# Perform the query\n", + "total_elements = query_and_count_elements(\n", + " vastdb_session, 'vast-big-catalog-bucket', 'vast_big_catalog_schema', 'vast_big_catalog_table', field_names\n", + ")\n", + "print(f\"Total elements in the catalog: {total_elements}\")" + ] + }, + { + "cell_type": "markdown", + "id": "cd360703-a361-47a0-8611-ca880edba24e", + "metadata": {}, + "source": [ + "### Simple Filtering\n" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "id": "6dd13c71-e7a2-4bfa-9fdf-13350a1e0da0", + "metadata": { + "execution": { + "iopub.execute_input": "2024-03-13T14:36:43.586994Z", + "iopub.status.busy": "2024-03-13T14:36:43.586433Z", + "iopub.status.idle": "2024-03-13T14:36:43.808815Z", + "shell.execute_reply": "2024-03-13T14:36:43.808371Z", + "shell.execute_reply.started": "2024-03-13T14:36:43.586967Z" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " | name | \n", + "creation_time | \n", + "uid | \n", + "owner_name | \n", + "size | \n", + "user_metadata | \n", + "user_tags | \n", + "tag_Location | \n", + "
---|---|---|---|---|---|---|---|---|
0 | \n", + "2018-06.data.parquet | \n", + "2024-03-05 15:03:21.610385219 | \n", + "99999 | \n", + "trinos3 | \n", + "141083662 | \n", + "[(s3cmd-attrs, atime:1709650655/ctime:17096506... | \n", + "[(Experiment_ID, 3333), (Lab_Device_ID, 11111)... | \n", + "Basel | \n", + "
1 | \n", + "2019-02.data.parquet | \n", + "2024-03-05 15:03:28.300958652 | \n", + "99999 | \n", + "trinos3 | \n", + "120274946 | \n", + "[(s3cmd-attrs, atime:1709650666/ctime:17096506... | \n", + "[(Experiment_ID, 3333), (Lab_Device_ID, 11111)... | \n", + "Basel | \n", + "
\n", + " | uid | \n", + "owner_name | \n", + "element_type | \n", + "
---|---|---|---|
0 | \n", + "1000 | \n", + "vastdata | \n", + "FILE | \n", + "
1 | \n", + "1000 | \n", + "vastdata | \n", + "FILE | \n", + "
2 | \n", + "1000 | \n", + "vastdata | \n", + "FILE | \n", + "
3 | \n", + "1000 | \n", + "vastdata | \n", + "FILE | \n", + "
4 | \n", + "1000 | \n", + "vastdata | \n", + "FILE | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "
477787 | \n", + "1000 | \n", + "vastdata | \n", + "FILE | \n", + "
477788 | \n", + "1000 | \n", + "vastdata | \n", + "FILE | \n", + "
477789 | \n", + "1000 | \n", + "vastdata | \n", + "FILE | \n", + "
477790 | \n", + "1000 | \n", + "vastdata | \n", + "FILE | \n", + "
477791 | \n", + "1000 | \n", + "vastdata | \n", + "FILE | \n", + "
477792 rows × 3 columns
\n", + "\n", + " | uid | \n", + "extension | \n", + "size | \n", + "
---|---|---|---|
0 | \n", + "1000 | \n", + "log | \n", + "45511 | \n", + "
1 | \n", + "1000 | \n", + "log | \n", + "4050387 | \n", + "
2 | \n", + "1000 | \n", + "log | \n", + "1730 | \n", + "
3 | \n", + "1000 | \n", + "log | \n", + "104 | \n", + "
4 | \n", + "1000 | \n", + "log | \n", + "240974 | \n", + "
5 | \n", + "1000 | \n", + "log | \n", + "47233 | \n", + "
6 | \n", + "1000 | \n", + "log | \n", + "73391 | \n", + "
7 | \n", + "1000 | \n", + "log | \n", + "77396 | \n", + "
8 | \n", + "1000 | \n", + "log | \n", + "47334 | \n", + "
9 | \n", + "1000 | \n", + "log | \n", + "40836 | \n", + "
10 | \n", + "1000 | \n", + "log | \n", + "31460 | \n", + "
11 | \n", + "1000 | \n", + "log | \n", + "1 | \n", + "
12 | \n", + "1000 | \n", + "log | \n", + "20002 | \n", + "
13 | \n", + "1000 | \n", + "log | \n", + "16541 | \n", + "
14 | \n", + "1000 | \n", + "log | \n", + "30185 | \n", + "
15 | \n", + "1000 | \n", + "log | \n", + "16649 | \n", + "
16 | \n", + "1000 | \n", + "log | \n", + "65283 | \n", + "
17 | \n", + "1000 | \n", + "log | \n", + "16591 | \n", + "
18 | \n", + "1000 | \n", + "log | \n", + "17126 | \n", + "
19 | \n", + "1000 | \n", + "log | \n", + "2297 | \n", + "
20 | \n", + "1000 | \n", + "log | \n", + "6602790 | \n", + "
21 | \n", + "1000 | \n", + "log | \n", + "2421070 | \n", + "
22 | \n", + "1000 | \n", + "log | \n", + "30947 | \n", + "
23 | \n", + "1000 | \n", + "log | \n", + "112871 | \n", + "
24 | \n", + "1000 | \n", + "log | \n", + "963022 | \n", + "
25 | \n", + "1000 | \n", + "log | \n", + "12696 | \n", + "
26 | \n", + "1000 | \n", + "log | \n", + "53285 | \n", + "
27 | \n", + "1000 | \n", + "log | \n", + "30947 | \n", + "
\n", + " | element_type | \n", + "size | \n", + "name | \n", + "
---|---|---|---|
0 | \n", + "FILE | \n", + "442558237 | \n", + "2012-02.data.parquet | \n", + "
1 | \n", + "FILE | \n", + "16777216 | \n", + "upload | \n", + "
2 | \n", + "FILE | \n", + "146758259 | \n", + "2018-04.data.parquet | \n", + "
3 | \n", + "FILE | \n", + "144467145 | \n", + "2018-10.data.parquet | \n", + "
4 | \n", + "FILE | \n", + "390453487 | \n", + "2013-09.data.parquet | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "
158 | \n", + "FILE | \n", + "67108864 | \n", + "upload | \n", + "
159 | \n", + "FILE | \n", + "346955670 | \n", + "2014-01.data.parquet | \n", + "
160 | \n", + "FILE | \n", + "376498312 | \n", + "2013-01.data.parquet | \n", + "
161 | \n", + "FILE | \n", + "145782225 | \n", + "2017-11.data.parquet | \n", + "
162 | \n", + "FILE | \n", + "67108864 | \n", + "upload | \n", + "
163 rows × 3 columns
\n", + "\n", + " | Files | \n", + "KB_Used | \n", + "Avg_Size_KB | \n", + "Oldest_data | \n", + "Last_access | \n", + "
---|---|---|---|---|---|
owner_name | \n", + "\n", + " | \n", + " | \n", + " | \n", + " | \n", + " |
0 | \n", + "40 | \n", + "0 | \n", + "0.00 | \n", + "2024-03-05 15:21:31.916288773 | \n", + "2024-03-05 15:25:42.213137875 | \n", + "
trinos3 | \n", + "123 | \n", + "30,963,037 | \n", + "251,732.01 | \n", + "2024-03-05 15:00:39.594759193 | \n", + "2024-03-05 15:25:42.864619921 | \n", + "
SparkSession - in-memory
\n", + " \n", + "SparkContext
\n", + "\n", + " \n", + "\n", + "v3.5.2
local[*]
KafkaToVastDB
\n", + " | FL_DATE | \n", + "DEP_DELAY | \n", + "ARR_DELAY | \n", + "AIR_TIME | \n", + "DISTANCE | \n", + "DEP_TIME | \n", + "ARR_TIME | \n", + "
---|---|---|---|---|---|---|---|
0 | \n", + "2006-01-01 | \n", + "5 | \n", + "19 | \n", + "350 | \n", + "2475 | \n", + "9.083333 | \n", + "12.483334 | \n", + "
1 | \n", + "2006-01-02 | \n", + "167 | \n", + "216 | \n", + "343 | \n", + "2475 | \n", + "11.783334 | \n", + "15.766666 | \n", + "
2 | \n", + "2006-01-03 | \n", + "-7 | \n", + "-2 | \n", + "344 | \n", + "2475 | \n", + "8.883333 | \n", + "12.133333 | \n", + "
3 | \n", + "2006-01-04 | \n", + "-5 | \n", + "-13 | \n", + "331 | \n", + "2475 | \n", + "8.916667 | \n", + "11.950000 | \n", + "
4 | \n", + "2006-01-05 | \n", + "-3 | \n", + "-17 | \n", + "321 | \n", + "2475 | \n", + "8.950000 | \n", + "11.883333 | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
999995 | \n", + "2006-01-19 | \n", + "5 | \n", + "4 | \n", + "244 | \n", + "1781 | \n", + "15.000000 | \n", + "17.350000 | \n", + "
999996 | \n", + "2006-01-20 | \n", + "14 | \n", + "12 | \n", + "240 | \n", + "1781 | \n", + "15.150000 | \n", + "17.483334 | \n", + "
999997 | \n", + "2006-01-21 | \n", + "9 | \n", + "12 | \n", + "241 | \n", + "1781 | \n", + "15.066667 | \n", + "17.483334 | \n", + "
999998 | \n", + "2006-01-22 | \n", + "-2 | \n", + "8 | \n", + "242 | \n", + "1781 | \n", + "14.883333 | \n", + "17.416666 | \n", + "
999999 | \n", + "2006-01-23 | \n", + "1 | \n", + "-12 | \n", + "232 | \n", + "1781 | \n", + "14.933333 | \n", + "17.083334 | \n", + "
1000000 rows × 7 columns
\n", + "\n", + " | FL_DATE | \n", + "DEP_DELAY | \n", + "ARR_DELAY | \n", + "AIR_TIME | \n", + "DISTANCE | \n", + "DEP_TIME | \n", + "ARR_TIME | \n", + "
---|---|---|---|---|---|---|---|
0 | \n", + "2006-01-01 | \n", + "5 | \n", + "19 | \n", + "350 | \n", + "2475 | \n", + "9.083333 | \n", + "12.483334 | \n", + "
1 | \n", + "2006-01-02 | \n", + "167 | \n", + "216 | \n", + "343 | \n", + "2475 | \n", + "11.783334 | \n", + "15.766666 | \n", + "
2 | \n", + "2006-01-03 | \n", + "-7 | \n", + "-2 | \n", + "344 | \n", + "2475 | \n", + "8.883333 | \n", + "12.133333 | \n", + "
3 | \n", + "2006-01-04 | \n", + "-5 | \n", + "-13 | \n", + "331 | \n", + "2475 | \n", + "8.916667 | \n", + "11.950000 | \n", + "
4 | \n", + "2006-01-05 | \n", + "-3 | \n", + "-17 | \n", + "321 | \n", + "2475 | \n", + "8.950000 | \n", + "11.883333 | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
999995 | \n", + "2006-01-19 | \n", + "5 | \n", + "4 | \n", + "244 | \n", + "1781 | \n", + "15.000000 | \n", + "17.350000 | \n", + "
999996 | \n", + "2006-01-20 | \n", + "14 | \n", + "12 | \n", + "240 | \n", + "1781 | \n", + "15.150000 | \n", + "17.483334 | \n", + "
999997 | \n", + "2006-01-21 | \n", + "9 | \n", + "12 | \n", + "241 | \n", + "1781 | \n", + "15.066667 | \n", + "17.483334 | \n", + "
999998 | \n", + "2006-01-22 | \n", + "-2 | \n", + "8 | \n", + "242 | \n", + "1781 | \n", + "14.883333 | \n", + "17.416666 | \n", + "
999999 | \n", + "2006-01-23 | \n", + "1 | \n", + "-12 | \n", + "232 | \n", + "1781 | \n", + "14.933333 | \n", + "17.083334 | \n", + "
1000000 rows × 7 columns
\n", + "\n", + " | Citizen_Age | \n", + "Citizen_Name | \n", + "Citizen_experience | \n", + "Is_married | \n", + "
---|---|---|---|---|
0 | \n", + "45 | \n", + "Alice | \n", + "25.5 | \n", + "True | \n", + "
1 | \n", + "38 | \n", + "Bob | \n", + "17.9 | \n", + "False | \n", + "
2 | \n", + "27 | \n", + "Koko | \n", + "5.3 | \n", + "False | \n", + "
3 | \n", + "51 | \n", + "Menny | \n", + "28.2 | \n", + "True | \n", + "
Short
+ */ + .o-tooltip--left { + position: relative; + } + + .o-tooltip--left:after { + opacity: 0; + visibility: hidden; + position: absolute; + content: attr(data-tooltip); + padding: .2em; + font-size: .8em; + left: -.2em; + background: grey; + color: white; + white-space: nowrap; + z-index: 2; + border-radius: 2px; + transform: translateX(-102%) translateY(0); + transition: opacity 0.2s cubic-bezier(0.64, 0.09, 0.08, 1), transform 0.2s cubic-bezier(0.64, 0.09, 0.08, 1); +} + +.o-tooltip--left:hover:after { + display: block; + opacity: 1; + visibility: visible; + transform: translateX(-100%) translateY(0); + transition: opacity 0.2s cubic-bezier(0.64, 0.09, 0.08, 1), transform 0.2s cubic-bezier(0.64, 0.09, 0.08, 1); + transition-delay: .5s; +} + +/* By default the copy button shouldn't show up when printing a page */ +@media print { + button.copybtn { + display: none; + } +} diff --git a/_static/copybutton.js b/_static/copybutton.js new file mode 100644 index 00000000..2ea7ff3e --- /dev/null +++ b/_static/copybutton.js @@ -0,0 +1,248 @@ +// Localization support +const messages = { + 'en': { + 'copy': 'Copy', + 'copy_to_clipboard': 'Copy to clipboard', + 'copy_success': 'Copied!', + 'copy_failure': 'Failed to copy', + }, + 'es' : { + 'copy': 'Copiar', + 'copy_to_clipboard': 'Copiar al portapapeles', + 'copy_success': '¡Copiado!', + 'copy_failure': 'Error al copiar', + }, + 'de' : { + 'copy': 'Kopieren', + 'copy_to_clipboard': 'In die Zwischenablage kopieren', + 'copy_success': 'Kopiert!', + 'copy_failure': 'Fehler beim Kopieren', + }, + 'fr' : { + 'copy': 'Copier', + 'copy_to_clipboard': 'Copier dans le presse-papier', + 'copy_success': 'Copié !', + 'copy_failure': 'Échec de la copie', + }, + 'ru': { + 'copy': 'Скопировать', + 'copy_to_clipboard': 'Скопировать в буфер', + 'copy_success': 'Скопировано!', + 'copy_failure': 'Не удалось скопировать', + }, + 'zh-CN': { + 'copy': '复制', + 'copy_to_clipboard': '复制到剪贴板', + 'copy_success': '复制成功!', + 'copy_failure': '复制失败', + }, + 'it' : { + 'copy': 'Copiare', + 'copy_to_clipboard': 'Copiato negli appunti', + 'copy_success': 'Copiato!', + 'copy_failure': 'Errore durante la copia', + } +} + +let locale = 'en' +if( document.documentElement.lang !== undefined + && messages[document.documentElement.lang] !== undefined ) { + locale = document.documentElement.lang +} + +let doc_url_root = DOCUMENTATION_OPTIONS.URL_ROOT; +if (doc_url_root == '#') { + doc_url_root = ''; +} + +/** + * SVG files for our copy buttons + */ +let iconCheck = `` + +// If the user specified their own SVG use that, otherwise use the default +let iconCopy = ``; +if (!iconCopy) { + iconCopy = `` +} + +/** + * Set up copy/paste for code blocks + */ + +const runWhenDOMLoaded = cb => { + if (document.readyState != 'loading') { + cb() + } else if (document.addEventListener) { + document.addEventListener('DOMContentLoaded', cb) + } else { + document.attachEvent('onreadystatechange', function() { + if (document.readyState == 'complete') cb() + }) + } +} + +const codeCellId = index => `codecell${index}` + +// Clears selected text since ClipboardJS will select the text when copying +const clearSelection = () => { + if (window.getSelection) { + window.getSelection().removeAllRanges() + } else if (document.selection) { + document.selection.empty() + } +} + +// Changes tooltip text for a moment, then changes it back +// We want the timeout of our `success` class to be a bit shorter than the +// tooltip and icon change, so that we can hide the icon before changing back. +var timeoutIcon = 2000; +var timeoutSuccessClass = 1500; + +const temporarilyChangeTooltip = (el, oldText, newText) => { + el.setAttribute('data-tooltip', newText) + el.classList.add('success') + // Remove success a little bit sooner than we change the tooltip + // So that we can use CSS to hide the copybutton first + setTimeout(() => el.classList.remove('success'), timeoutSuccessClass) + setTimeout(() => el.setAttribute('data-tooltip', oldText), timeoutIcon) +} + +// Changes the copy button icon for two seconds, then changes it back +const temporarilyChangeIcon = (el) => { + el.innerHTML = iconCheck; + setTimeout(() => {el.innerHTML = iconCopy}, timeoutIcon) +} + +const addCopyButtonToCodeCells = () => { + // If ClipboardJS hasn't loaded, wait a bit and try again. This + // happens because we load ClipboardJS asynchronously. + if (window.ClipboardJS === undefined) { + setTimeout(addCopyButtonToCodeCells, 250) + return + } + + // Add copybuttons to all of our code cells + const COPYBUTTON_SELECTOR = 'div.highlight pre'; + const codeCells = document.querySelectorAll(COPYBUTTON_SELECTOR) + codeCells.forEach((codeCell, index) => { + const id = codeCellId(index) + codeCell.setAttribute('id', id) + + const clipboardButton = id => + `` + codeCell.insertAdjacentHTML('afterend', clipboardButton(id)) + }) + +function escapeRegExp(string) { + return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string +} + +/** + * Removes excluded text from a Node. + * + * @param {Node} target Node to filter. + * @param {string} exclude CSS selector of nodes to exclude. + * @returns {DOMString} Text from `target` with text removed. + */ +function filterText(target, exclude) { + const clone = target.cloneNode(true); // clone as to not modify the live DOM + if (exclude) { + // remove excluded nodes + clone.querySelectorAll(exclude).forEach(node => node.remove()); + } + return clone.innerText; +} + +// Callback when a copy button is clicked. Will be passed the node that was clicked +// should then grab the text and replace pieces of text that shouldn't be used in output +function formatCopyText(textContent, copybuttonPromptText, isRegexp = false, onlyCopyPromptLines = true, removePrompts = true, copyEmptyLines = true, lineContinuationChar = "", hereDocDelim = "") { + var regexp; + var match; + + // Do we check for line continuation characters and "HERE-documents"? + var useLineCont = !!lineContinuationChar + var useHereDoc = !!hereDocDelim + + // create regexp to capture prompt and remaining line + if (isRegexp) { + regexp = new RegExp('^(' + copybuttonPromptText + ')(.*)') + } else { + regexp = new RegExp('^(' + escapeRegExp(copybuttonPromptText) + ')(.*)') + } + + const outputLines = []; + var promptFound = false; + var gotLineCont = false; + var gotHereDoc = false; + const lineGotPrompt = []; + for (const line of textContent.split('\n')) { + match = line.match(regexp) + if (match || gotLineCont || gotHereDoc) { + promptFound = regexp.test(line) + lineGotPrompt.push(promptFound) + if (removePrompts && promptFound) { + outputLines.push(match[2]) + } else { + outputLines.push(line) + } + gotLineCont = line.endsWith(lineContinuationChar) & useLineCont + if (line.includes(hereDocDelim) & useHereDoc) + gotHereDoc = !gotHereDoc + } else if (!onlyCopyPromptLines) { + outputLines.push(line) + } else if (copyEmptyLines && line.trim() === '') { + outputLines.push(line) + } + } + + // If no lines with the prompt were found then just use original lines + if (lineGotPrompt.some(v => v === true)) { + textContent = outputLines.join('\n'); + } + + // Remove a trailing newline to avoid auto-running when pasting + if (textContent.endsWith("\n")) { + textContent = textContent.slice(0, -1) + } + return textContent +} + + +var copyTargetText = (trigger) => { + var target = document.querySelector(trigger.attributes['data-clipboard-target'].value); + + // get filtered text + let exclude = '.linenos'; + + let text = filterText(target, exclude); + return formatCopyText(text, '', false, true, true, true, '', '') +} + + // Initialize with a callback so we can modify the text before copy + const clipboard = new ClipboardJS('.copybtn', {text: copyTargetText}) + + // Update UI with error/success messages + clipboard.on('success', event => { + clearSelection() + temporarilyChangeTooltip(event.trigger, messages[locale]['copy'], messages[locale]['copy_success']) + temporarilyChangeIcon(event.trigger) + }) + + clipboard.on('error', event => { + temporarilyChangeTooltip(event.trigger, messages[locale]['copy'], messages[locale]['copy_failure']) + }) +} + +runWhenDOMLoaded(addCopyButtonToCodeCells) \ No newline at end of file diff --git a/_static/copybutton_funcs.js b/_static/copybutton_funcs.js new file mode 100644 index 00000000..dbe1aaad --- /dev/null +++ b/_static/copybutton_funcs.js @@ -0,0 +1,73 @@ +function escapeRegExp(string) { + return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string +} + +/** + * Removes excluded text from a Node. + * + * @param {Node} target Node to filter. + * @param {string} exclude CSS selector of nodes to exclude. + * @returns {DOMString} Text from `target` with text removed. + */ +export function filterText(target, exclude) { + const clone = target.cloneNode(true); // clone as to not modify the live DOM + if (exclude) { + // remove excluded nodes + clone.querySelectorAll(exclude).forEach(node => node.remove()); + } + return clone.innerText; +} + +// Callback when a copy button is clicked. Will be passed the node that was clicked +// should then grab the text and replace pieces of text that shouldn't be used in output +export function formatCopyText(textContent, copybuttonPromptText, isRegexp = false, onlyCopyPromptLines = true, removePrompts = true, copyEmptyLines = true, lineContinuationChar = "", hereDocDelim = "") { + var regexp; + var match; + + // Do we check for line continuation characters and "HERE-documents"? + var useLineCont = !!lineContinuationChar + var useHereDoc = !!hereDocDelim + + // create regexp to capture prompt and remaining line + if (isRegexp) { + regexp = new RegExp('^(' + copybuttonPromptText + ')(.*)') + } else { + regexp = new RegExp('^(' + escapeRegExp(copybuttonPromptText) + ')(.*)') + } + + const outputLines = []; + var promptFound = false; + var gotLineCont = false; + var gotHereDoc = false; + const lineGotPrompt = []; + for (const line of textContent.split('\n')) { + match = line.match(regexp) + if (match || gotLineCont || gotHereDoc) { + promptFound = regexp.test(line) + lineGotPrompt.push(promptFound) + if (removePrompts && promptFound) { + outputLines.push(match[2]) + } else { + outputLines.push(line) + } + gotLineCont = line.endsWith(lineContinuationChar) & useLineCont + if (line.includes(hereDocDelim) & useHereDoc) + gotHereDoc = !gotHereDoc + } else if (!onlyCopyPromptLines) { + outputLines.push(line) + } else if (copyEmptyLines && line.trim() === '') { + outputLines.push(line) + } + } + + // If no lines with the prompt were found then just use original lines + if (lineGotPrompt.some(v => v === true)) { + textContent = outputLines.join('\n'); + } + + // Remove a trailing newline to avoid auto-running when pasting + if (textContent.endsWith("\n")) { + textContent = textContent.slice(0, -1) + } + return textContent +} diff --git a/_static/design-tabs.js b/_static/design-tabs.js new file mode 100644 index 00000000..b25bd6a4 --- /dev/null +++ b/_static/design-tabs.js @@ -0,0 +1,101 @@ +// @ts-check + +// Extra JS capability for selected tabs to be synced +// The selection is stored in local storage so that it persists across page loads. + +/** + * @type {Record