From 8602fb3b84a1a249559d10ad6baa0d645116c465 Mon Sep 17 00:00:00 2001 From: Vamsi Manohar Date: Thu, 3 Nov 2022 17:33:13 -0700 Subject: [PATCH] Catalog to Datasource changes Signed-off-by: Vamsi Manohar --- .../physical/catalog/CatalogTableScan.java | 2 +- .../catalog/CatalogTableScanTest.java | 2 +- .../physical/catalog/CatalogTableTest.java | 2 +- docs/user/general/identifiers.rst | 30 +++---- docs/user/ppl/admin/catalog.rst | 83 ------------------- docs/user/ppl/admin/datasources.rst | 83 +++++++++++++++++++ docs/user/ppl/cmd/information_schema.rst | 12 +-- docs/user/ppl/cmd/showcatalogs.rst | 30 +++---- docs/user/ppl/index.rst | 2 +- doctest/build.gradle | 2 +- integ-test/build.gradle | 2 +- .../sql/ppl/ShowCatalogsCommandIT.java | 4 +- .../sql/plugin/catalog/CatalogSettings.java | 2 +- .../catalog/CatalogServiceImplTest.java | 2 +- ppl/src/main/antlr/OpenSearchPPLLexer.g4 | 2 +- 15 files changed, 130 insertions(+), 130 deletions(-) delete mode 100644 docs/user/ppl/admin/catalog.rst create mode 100644 docs/user/ppl/admin/datasources.rst diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/catalog/CatalogTableScan.java b/core/src/main/java/org/opensearch/sql/planner/physical/catalog/CatalogTableScan.java index 894ff9f216..efc59c97ec 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/catalog/CatalogTableScan.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/catalog/CatalogTableScan.java @@ -51,7 +51,7 @@ public void open() { for (Catalog catalog : catalogs) { exprValues.add( new ExprTupleValue(new LinkedHashMap<>(ImmutableMap.of( - "CATALOG_NAME", ExprValueUtils.stringValue(catalog.getName()), + "DATASOURCE_NAME", ExprValueUtils.stringValue(catalog.getName()), "CONNECTOR_TYPE", ExprValueUtils.stringValue(catalog.getConnectorType().name()))))); } iterator = exprValues.iterator(); diff --git a/core/src/test/java/org/opensearch/sql/planner/physical/catalog/CatalogTableScanTest.java b/core/src/test/java/org/opensearch/sql/planner/physical/catalog/CatalogTableScanTest.java index 26374ff042..cf9b5fe016 100644 --- a/core/src/test/java/org/opensearch/sql/planner/physical/catalog/CatalogTableScanTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/physical/catalog/CatalogTableScanTest.java @@ -61,7 +61,7 @@ void testIterator() { assertTrue(catalogTableScan.hasNext()); for (Catalog catalog : catalogSet) { assertEquals(new ExprTupleValue(new LinkedHashMap<>(ImmutableMap.of( - "CATALOG_NAME", ExprValueUtils.stringValue(catalog.getName()), + "DATASOURCE_NAME", ExprValueUtils.stringValue(catalog.getName()), "CONNECTOR_TYPE", ExprValueUtils.stringValue(catalog.getConnectorType().name())))), catalogTableScan.next()); } diff --git a/core/src/test/java/org/opensearch/sql/planner/physical/catalog/CatalogTableTest.java b/core/src/test/java/org/opensearch/sql/planner/physical/catalog/CatalogTableTest.java index 59e57a97b3..def92bf5e3 100644 --- a/core/src/test/java/org/opensearch/sql/planner/physical/catalog/CatalogTableTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/physical/catalog/CatalogTableTest.java @@ -33,7 +33,7 @@ void testGetFieldTypes() { CatalogTable catalogTable = new CatalogTable(catalogService); Map fieldTypes = catalogTable.getFieldTypes(); Map expectedTypes = new HashMap<>(); - expectedTypes.put("CATALOG_NAME", ExprCoreType.STRING); + expectedTypes.put("DATASOURCE_NAME", ExprCoreType.STRING); expectedTypes.put("CONNECTOR_TYPE", ExprCoreType.STRING); assertEquals(expectedTypes, fieldTypes); } diff --git a/docs/user/general/identifiers.rst b/docs/user/general/identifiers.rst index affd381d41..8bb42bb7e7 100644 --- a/docs/user/general/identifiers.rst +++ b/docs/user/general/identifiers.rst @@ -184,18 +184,18 @@ Fully Qualified Table Names Description ----------- -With the introduction of different datasource catalogs along with Opensearch, support for fully qualified table names became compulsory to resolve tables to a catalog. +With the introduction of different datasources along with Opensearch, support for fully qualified table names became compulsory to resolve tables to a datasource. Format for fully qualified table name. -``..`` +``..`` -* catalogName:[Mandatory] Catalog information is mandatory when querying over tables from catalogs other than opensearch connector. +* datasourceName:[Mandatory] Datasource information is mandatory when querying over tables from datasources other than opensearch connector. * schemaName:[Optional] Schema is a logical abstraction for a group of tables. In the current state, we only support ``default`` and ``information_schema``. Any schema mentioned in the fully qualified name other than these two will be resolved to be part of tableName. * tableName:[Mandatory] tableName is mandatory. -The current resolution algorithm works in such a way, the old queries on opensearch work without specifying any catalog name. +The current resolution algorithm works in such a way, the old queries on opensearch work without specifying any datasource name. So queries on opensearch indices doesn't need a fully qualified table name. Table Name Resolution Algorithm. @@ -205,24 +205,24 @@ Fully qualified Name is divided into parts based on ``.`` character. TableName resolution algorithm works in the following manner. -1. Take the first part of the qualified name and resolve it to a catalog from the list of catalogs configured. -If it doesn't resolve to any of the catalog names configured, catalog name will default to ``@opensearch`` catalog. +1. Take the first part of the qualified name and resolve it to a datasource from the list of datasources configured. +If it doesn't resolve to any of the datasource names configured, datasource name will default to ``@opensearch`` datasource. -2. Take the first part of the remaining qualified name after capturing the catalog name. -If this part represents any of the supported schemas under catalog, it will resolve to the same otherwise schema name will resolve to ``default`` schema. +2. Take the first part of the remaining qualified name after capturing the datasource name. +If this part represents any of the supported schemas under datasource, it will resolve to the same otherwise schema name will resolve to ``default`` schema. Currently ``default`` and ``information_schema`` are the only schemas supported. 3. Rest of the parts are combined to resolve tablename. -** Only table name identifiers are supported with fully qualified names, identifiers used for columns and other attributes doesn't require prefixing with catalog and schema information.** +** Only table name identifiers are supported with fully qualified names, identifiers used for columns and other attributes doesn't require prefixing with datasource and schema information.** Examples -------- -Assume [my_prometheus] is the only catalog configured other than default opensearch engine. +Assume [my_prometheus] is the only datasource configured other than default opensearch engine. 1. ``my_prometheus.default.http_requests_total`` -catalogName = ``my_prometheus`` [Is in the list of catalogs configured]. +datasourceName = ``my_prometheus`` [Is in the list of datasources configured]. schemaName = ``default`` [Is in the list of schemas supported]. @@ -231,7 +231,7 @@ tableName = ``http_requests_total``. 2. ``logs.12.13.1`` -catalogName = ``@opensearch`` [Resolves to default @opensearch connector since [my_prometheus] is the only catalog configured name.] +datasourceName = ``@opensearch`` [Resolves to default @opensearch connector since [my_prometheus] is the only catalog configured name.] schemaName = ``default`` [No supported schema found, so default to `default`]. @@ -241,7 +241,7 @@ tableName = ``logs.12.13.1``. 3. ``my_prometheus.http_requests_total`` -catalogName = ```my_prometheus`` [Is in the list of catalogs configured]. +datasourceName = ```my_prometheus`` [Is in the list of datasources configured]. schemaName = ``default`` [No supported schema found, so default to `default`]. @@ -249,7 +249,7 @@ tableName = ``http_requests_total``. 4. ``prometheus.http_requests_total`` -catalogName = ``@opensearch`` [Resolves to default @opensearch connector since [my_prometheus] is the only catalog configured name.] +datasourceName = ``@opensearch`` [Resolves to default @opensearch connector since [my_prometheus] is the only datasource configured name.] schemaName = ``default`` [No supported schema found, so default to `default`]. @@ -257,7 +257,7 @@ tableName = ``prometheus.http_requests_total``. 5. ``prometheus.default.http_requests_total.1.2.3`` -catalogName = ``@opensearch`` [Resolves to default @opensearch connector since [my_prometheus] is the only catalog configured name.] +datasourceName = ``@opensearch`` [Resolves to default @opensearch connector since [my_prometheus] is the only catalog configured name.] schemaName = ``default`` [No supported schema found, so default to `default`]. diff --git a/docs/user/ppl/admin/catalog.rst b/docs/user/ppl/admin/catalog.rst deleted file mode 100644 index ccaab342a5..0000000000 --- a/docs/user/ppl/admin/catalog.rst +++ /dev/null @@ -1,83 +0,0 @@ -.. highlight:: sh - -================= -Catalog Settings -================= - -.. rubric:: Table of contents - -.. contents:: - :local: - :depth: 1 - -Introduction -============ - -The concept of ``catalog`` is introduced to support the federation of SQL/PPL query engine to multiple data sources. -This helps PPL users to leverage data from multiple data sources and derive correlation and insights. -Catalog definition provides the information to connect to a datasource and also gives a name to them to refer in PPL commands. - - -Definitions of catalog and connector -==================================== -* Connector is a component that adapts the query engine to a datasource. For example, Prometheus connector would adapt and help execute the queries to run on Prometheus data source. connector name is enough in the catalog definition json. -* Catalog is a construct to define how to connect to a datasource and which connector to adapt by query engine. - -Example Prometheus Catalog Definition :: - - [{ - "name" : "my_prometheus", - "connector": "prometheus", - "properties" : { - "prometheus.uri" : "http://localhost:8080", - "prometheus.auth.type" : "basicauth", - "prometheus.auth.username" : "admin", - "prometheus.auth.password" : "admin" - } - }] -Catalog configuration Restrictions. - -* ``name``, ``connector``, ``properties`` are required fields in the catalog configuration. -* All the catalog names should be unique and match the following regex[``[@*A-Za-z]+?[*a-zA-Z_\-0-9]*``]. -* Allowed Connectors. - * ``prometheus`` [More details: `Prometheus Connector `_] -* All the allowed config parameters in ``properties`` are defined in individual connector pages mentioned above. - -Configuring catalog in OpenSearch -==================================== - -* Catalogs are configured in opensearch keystore as secure settings under ``plugins.query.federation.catalog.config`` key as they contain credential info. -* A json file containing array of catalog configurations should be injected into keystore with the above mentioned key. sample json file can be seen in the above section. - - -[**To be run on all the nodes in the cluster**] Command to add catalog.json file to OpenSearch Keystore :: - - >> bin/opensearch-keystore add-file plugins.query.federation.catalog.config catalog.json - -Catalogs can be configured during opensearch start up or can be updated while the opensearch is running. -If we update catalog configuration during runtime, the following api should be triggered to update the query engine with the latest changes. - -[**Required only if we update keystore settings during runtime**] Secure Settings refresh api:: - - >> curl --request POST \ - --url http://{{opensearch-domain}}:9200/_nodes/reload_secure_settings \ - --data '{"secure_settings_password":"{{keystore-password}}"}' - - -Using a catalog in PPL command -==================================== -Catalog is referred in source command as show in the code block below. -Based on the abstraction designed by the connector, -one can refer the corresponding entity as table in the source command. -For example in prometheus connector, each metric is abstracted as a table. -so we can refer a metric and apply stats over it in the following way. - -Example source command with prometheus catalog :: - - >> source = my_prometheus.prometheus_http_requests_total | stats avg(@value) by job; - - -Limitations of catalog -==================================== -Catalog settings are global and users with PPL access are allowed to fetch data from all the defined catalogs. -PPL access can be controlled using roles.(More details: `Security Settings `_) \ No newline at end of file diff --git a/docs/user/ppl/admin/datasources.rst b/docs/user/ppl/admin/datasources.rst new file mode 100644 index 0000000000..2974ac20ce --- /dev/null +++ b/docs/user/ppl/admin/datasources.rst @@ -0,0 +1,83 @@ +.. highlight:: sh + +=================== +Datasource Settings +=================== + +.. rubric:: Table of contents + +.. contents:: + :local: + :depth: 1 + +Introduction +============ + +The concept of ``datasource`` is introduced to support the federation of SQL/PPL query engine to multiple data stores. +This helps PPL users to leverage data from multiple data stores and derive correlation and insights. +Datasource definition provides the information to connect to a data store and also gives a name to them to refer in PPL commands. + + +Definitions of datasource and connector +==================================== +* Connector is a component that adapts the query engine to a datastore. For example, Prometheus connector would adapt and help execute the queries to run on Prometheus datastore. connector name is enough in the datasource definition json. +* Datasource is a construct to define how to connect to a data store and which connector to adapt by query engine. + +Example Prometheus Datasource Definition :: + + [{ + "name" : "my_prometheus", + "connector": "prometheus", + "properties" : { + "prometheus.uri" : "http://localhost:8080", + "prometheus.auth.type" : "basicauth", + "prometheus.auth.username" : "admin", + "prometheus.auth.password" : "admin" + } + }] +Datasource configuration Restrictions. + +* ``name``, ``connector``, ``properties`` are required fields in the datasource configuration. +* All the datasource names should be unique and match the following regex[``[@*A-Za-z]+?[*a-zA-Z_\-0-9]*``]. +* Allowed Connectors. + * ``prometheus`` [More details: `Prometheus Connector `_] +* All the allowed config parameters in ``properties`` are defined in individual connector pages mentioned above. + +Configuring a datasource in OpenSearch +====================================== + +* Datasources are configured in opensearch keystore as secure settings under ``plugins.query.federation.datasources.config`` key as they contain credential info. +* A json file containing array of datasource configurations should be injected into keystore with the above mentioned key. sample json file can be seen in the above section. + + +[**To be run on all the nodes in the cluster**] Command to add datasources.json file to OpenSearch Keystore :: + + >> bin/opensearch-keystore add-file plugins.query.federation.datasource.config datasources.json + +Datasources can be configured during opensearch start up or can be updated while the opensearch is running. +If we update a datasource configuration during runtime, the following api should be triggered to update the query engine with the latest changes. + +[**Required only if we update keystore settings during runtime**] Secure Settings refresh api:: + + >> curl --request POST \ + --url http://{{opensearch-domain}}:9200/_nodes/reload_secure_settings \ + --data '{"secure_settings_password":"{{keystore-password}}"}' + + +Using a datasource in PPL command +==================================== +Datasource is referred in source command as show in the code block below. +Based on the abstraction designed by the connector, +one can refer the corresponding entity as table in the source command. +For example in prometheus connector, each metric is abstracted as a table. +so we can refer a metric and apply stats over it in the following way. + +Example source command with prometheus datasource :: + + >> source = my_prometheus.prometheus_http_requests_total | stats avg(@value) by job; + + +Limitations of datasource +==================================== +Datasource settings are global and users with PPL access are allowed to fetch data from all the defined datasources. +PPL access can be controlled using roles.(More details: `Security Settings `_) \ No newline at end of file diff --git a/docs/user/ppl/cmd/information_schema.rst b/docs/user/ppl/cmd/information_schema.rst index a756fb080e..17ef1af5d7 100644 --- a/docs/user/ppl/cmd/information_schema.rst +++ b/docs/user/ppl/cmd/information_schema.rst @@ -11,19 +11,19 @@ Metadata queries using information_schema Description ============ -| Use ``information_schema`` in source command to query tables information under a catalog. +| Use ``information_schema`` in source command to query tables information under a datasource. In the current state, ``information_schema`` only support metadata of tables. This schema will be extended for views, columns and other metadata info in future. Syntax ============ -source = catalog.information_schema.tables; +source = datasource.information_schema.tables; -Example 1: Fetch tables in prometheus catalog. +Example 1: Fetch tables in prometheus datasource. ============================================== -The examples fetches tables in the prometheus catalog. +The examples fetches tables in the prometheus datasource. PPL query for fetching PROMETHEUS TABLES with where clause:: @@ -36,8 +36,8 @@ PPL query for fetching PROMETHEUS TABLES with where clause:: +-----------------+----------------+--------------------------------+--------------+--------+---------------------------+ -Example 2: Search tables in prometheus catalog. -=============================================== +Example 2: Search tables in prometheus datasource. +================================================= The examples searches tables in the prometheus catalog. diff --git a/docs/user/ppl/cmd/showcatalogs.rst b/docs/user/ppl/cmd/showcatalogs.rst index d304cba768..f7c6beb82f 100644 --- a/docs/user/ppl/cmd/showcatalogs.rst +++ b/docs/user/ppl/cmd/showcatalogs.rst @@ -1,6 +1,6 @@ -============= -show catalogs -============= +================ +show datasources +================ .. rubric:: Table of contents @@ -11,26 +11,26 @@ show catalogs Description ============ -| Using ``show catalogs`` command to query catalogs configured in the PPL engine. ``show catalogs`` command could be only used as the first command in the PPL query. +| Using ``show datasources`` command to query datasources configured in the PPL engine. ``show datasources`` command could be only used as the first command in the PPL query. Syntax ============ -show catalogs +show datasources -Example 1: Fetch all PROMETHEUS catalogs -================================= +Example 1: Fetch all PROMETHEUS datasources +=========================================== -The example fetches all the catalogs configured. +The example fetches all the datasources of type prometheus. -PPL query for all PROMETHEUS CATALOGS:: +PPL query for all PROMETHEUS DATASOURCES:: - os> show catalogs | where CONNECTOR_TYPE='PROMETHEUS'; + os> show datasources | where CONNECTOR_TYPE='PROMETHEUS'; fetched rows / total rows = 1/1 - +----------------+------------------+ - | CATALOG_NAME | CONNECTOR_TYPE | - |----------------+------------------| - | my_prometheus | PROMETHEUS | - +----------------+------------------+ + +-------------------+------------------+ + | DATASOURCE_NAME | CONNECTOR_TYPE | + |-------------------+------------------| + | my_prometheus | PROMETHEUS | + +-------------------+------------------+ diff --git a/docs/user/ppl/index.rst b/docs/user/ppl/index.rst index e09315b1c3..e2bf6ec679 100644 --- a/docs/user/ppl/index.rst +++ b/docs/user/ppl/index.rst @@ -34,7 +34,7 @@ The query start with search command and then flowing a set of command delimited - `Monitoring `_ - - `Catalog Settings `_ + - `Datasource Settings `_ - `Prometheus Connector `_ diff --git a/doctest/build.gradle b/doctest/build.gradle index bf789e7434..3f2cfbd816 100644 --- a/doctest/build.gradle +++ b/doctest/build.gradle @@ -104,7 +104,7 @@ String mlCommonsPlugin = 'opensearch-ml' testClusters { docTestCluster { - keystore 'plugins.query.federation.catalog.config', new File("$projectDir/catalog", 'catalog.json') + keystore 'plugins.query.federation.datasources.config', new File("$projectDir/catalog", 'catalog.json') plugin(provider(new Callable(){ @Override RegularFile call() throws Exception { diff --git a/integ-test/build.gradle b/integ-test/build.gradle index 11ba5542fd..f723c7d67c 100644 --- a/integ-test/build.gradle +++ b/integ-test/build.gradle @@ -111,7 +111,7 @@ testClusters.all { testClusters.integTest { plugin ":opensearch-sql-plugin" - keystore 'plugins.query.federation.catalog.config', new File("$projectDir/src/test/resources/catalog/", 'catalog.json') + keystore 'plugins.query.federation.datasources.config', new File("$projectDir/src/test/resources/catalog/", 'catalog.json') } task startPrometheus(type: SpawnProcessTask) { mustRunAfter ':doctest:doctest' diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/ShowCatalogsCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/ShowCatalogsCommandIT.java index 23418366be..2c6f449e46 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/ShowCatalogsCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/ShowCatalogsCommandIT.java @@ -26,7 +26,7 @@ public void testShowCatalogsCommands() throws IOException { rows("@opensearch", "OPENSEARCH")); verifyColumn( result, - columnName("CATALOG_NAME"), + columnName("DATASOURCE_NAME"), columnName("CONNECTOR_TYPE") ); } @@ -38,7 +38,7 @@ public void testShowCatalogsCommandsWithWhereClause() throws IOException { rows("my_prometheus", "PROMETHEUS")); verifyColumn( result, - columnName("CATALOG_NAME"), + columnName("DATASOURCE_NAME"), columnName("CONNECTOR_TYPE") ); } diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/catalog/CatalogSettings.java b/plugin/src/main/java/org/opensearch/sql/plugin/catalog/CatalogSettings.java index 20efce1b7a..558e7558ca 100644 --- a/plugin/src/main/java/org/opensearch/sql/plugin/catalog/CatalogSettings.java +++ b/plugin/src/main/java/org/opensearch/sql/plugin/catalog/CatalogSettings.java @@ -12,6 +12,6 @@ public class CatalogSettings { public static final Setting CATALOG_CONFIG = SecureSetting.secureFile( - "plugins.query.federation.catalog.config", + "plugins.query.federation.datasources.config", null); } diff --git a/plugin/src/test/java/org/opensearch/sql/plugin/catalog/CatalogServiceImplTest.java b/plugin/src/test/java/org/opensearch/sql/plugin/catalog/CatalogServiceImplTest.java index 07ee458e5c..cdbce55cb1 100644 --- a/plugin/src/test/java/org/opensearch/sql/plugin/catalog/CatalogServiceImplTest.java +++ b/plugin/src/test/java/org/opensearch/sql/plugin/catalog/CatalogServiceImplTest.java @@ -30,7 +30,7 @@ public class CatalogServiceImplTest { public static final String CATALOG_SETTING_METADATA_KEY = - "plugins.query.federation.catalog.config"; + "plugins.query.federation.datasources.config"; @Mock private StorageEngine storageEngine; diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index 79c812949f..e0aeb0ac47 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -43,7 +43,7 @@ SOURCE: 'SOURCE'; INDEX: 'INDEX'; D: 'D'; DESC: 'DESC'; -CATALOGS: 'CATALOGS'; +DATASOURCES: 'DATASOURCES'; // CLAUSE KEYWORDS SORTBY: 'SORTBY';