From 9c176c0f8a6362dc3c4d3276ff655256cc98c3bd Mon Sep 17 00:00:00 2001 From: Michael Chin Date: Mon, 17 Jun 2024 21:05:27 -0700 Subject: [PATCH] Enhancements for Gremlin HTTP (#624) * Gremlin HTTP enhancments * remove unused import * update changelog --- ChangeLog.md | 1 + .../configuration/get_config.py | 8 +++- src/graph_notebook/magics/graph_magic.py | 21 ++++++---- src/graph_notebook/magics/metadata.py | 7 +++- src/graph_notebook/neptune/client.py | 42 +++++++++++++++---- 5 files changed, 58 insertions(+), 21 deletions(-) diff --git a/ChangeLog.md b/ChangeLog.md index c9231906..36b872da 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -8,6 +8,7 @@ Starting with v1.31.6, this file will contain a record of major features and upd - Added `--connection-protocol` option to `%%gremlin` ([Link to PR](https://github.com/aws/graph-notebook/pull/617)) - Added global Gremlin `connection_protocol` setting to `%%graph_notebook_config` ([Link to PR](https://github.com/aws/graph-notebook/pull/621)) +- Added various enhancements for `%%gremlin` HTTP connections to Neptune ([Link to PR](https://github.com/aws/graph-notebook/pull/624)) - Restored left alignment of numeric value columns in results table widget ([Link to PR](https://github.com/aws/graph-notebook/pull/620)) ## Release 4.4.0 (June 10, 2024) diff --git a/src/graph_notebook/configuration/get_config.py b/src/graph_notebook/configuration/get_config.py index 0c5d713f..c5efd1d2 100644 --- a/src/graph_notebook/configuration/get_config.py +++ b/src/graph_notebook/configuration/get_config.py @@ -9,7 +9,7 @@ SparqlSection, GremlinSection, Neo4JSection from graph_notebook.neptune.client import NEPTUNE_CONFIG_HOST_IDENTIFIERS, is_allowed_neptune_host, false_str_variants, \ DEFAULT_NEO4J_USERNAME, DEFAULT_NEO4J_PASSWORD, DEFAULT_NEO4J_DATABASE, \ - NEPTUNE_DB_SERVICE_NAME, NEPTUNE_ANALYTICS_SERVICE_NAME, NEPTUNE_DB_CONFIG_NAMES, NEPTUNE_ANALYTICS_CONFIG_NAMES + NEPTUNE_DB_SERVICE_NAME, DEFAULT_WS_PROTOCOL, DEFAULT_HTTP_PROTOCOL neptune_params = ['neptune_service', 'auth_mode', 'load_from_s3_arn', 'aws_region'] neptune_gremlin_params = ['connection_protocol'] @@ -33,11 +33,15 @@ def get_config_from_dict(data: dict, neptune_hosts: list = NEPTUNE_CONFIG_HOST_I neptune_service = data['neptune_service'] if 'neptune_service' in data else NEPTUNE_DB_SERVICE_NAME if 'gremlin' in data: data['gremlin']['include_protocol'] = True + if 'connection_protocol' not in data['gremlin']: + data['gremlin']['connection_protocol'] = DEFAULT_WS_PROTOCOL \ + if neptune_service == NEPTUNE_DB_SERVICE_NAME else DEFAULT_HTTP_PROTOCOL gremlin_section = GremlinSection(**data['gremlin']) if gremlin_section.to_dict()['traversal_source'] != 'g': print('Ignoring custom traversal source, Amazon Neptune does not support this functionality.\n') else: - gremlin_section = GremlinSection(include_protocol=True) + protocol = DEFAULT_WS_PROTOCOL if neptune_service == NEPTUNE_DB_SERVICE_NAME else DEFAULT_HTTP_PROTOCOL + gremlin_section = GremlinSection(include_protocol=True, connection_protocol=protocol) if neo4j_section.to_dict()['username'] != DEFAULT_NEO4J_USERNAME \ or neo4j_section.to_dict()['password'] != DEFAULT_NEO4J_PASSWORD: print('Ignoring Neo4J custom authentication, Amazon Neptune does not support this functionality.\n') diff --git a/src/graph_notebook/magics/graph_magic.py b/src/graph_notebook/magics/graph_magic.py index 97298ed2..737d0b28 100644 --- a/src/graph_notebook/magics/graph_magic.py +++ b/src/graph_notebook/magics/graph_magic.py @@ -44,15 +44,16 @@ neptune_db_only, neptune_graph_only from graph_notebook.magics.ml import neptune_ml_magic_handler, generate_neptune_ml_parser from graph_notebook.magics.streams import StreamViewer -from graph_notebook.neptune.client import ClientBuilder, Client, PARALLELISM_OPTIONS, PARALLELISM_HIGH, \ +from graph_notebook.neptune.client import (ClientBuilder, Client, PARALLELISM_OPTIONS, PARALLELISM_HIGH, \ LOAD_JOB_MODES, MODE_AUTO, FINAL_LOAD_STATUSES, SPARQL_ACTION, FORMAT_CSV, FORMAT_OPENCYPHER, FORMAT_NTRIPLE, \ DB_LOAD_TYPES, ANALYTICS_LOAD_TYPES, VALID_BULK_FORMATS, VALID_INCREMENTAL_FORMATS, \ FORMAT_NQUADS, FORMAT_RDFXML, FORMAT_TURTLE, STREAM_RDF, STREAM_PG, STREAM_ENDPOINTS, \ NEPTUNE_CONFIG_HOST_IDENTIFIERS, is_allowed_neptune_host, \ STATISTICS_LANGUAGE_INPUTS, STATISTICS_LANGUAGE_INPUTS_SPARQL, STATISTICS_MODES, SUMMARY_MODES, \ - SPARQL_EXPLAIN_MODES, OPENCYPHER_EXPLAIN_MODES, OPENCYPHER_PLAN_CACHE_MODES, OPENCYPHER_DEFAULT_TIMEOUT, \ - OPENCYPHER_STATUS_STATE_MODES, normalize_service_name, GRAPH_PG_INFO_METRICS, \ - DEFAULT_GREMLIN_PROTOCOL, GREMLIN_PROTOCOL_FORMATS, DEFAULT_HTTP_PROTOCOL, normalize_protocol_name + SPARQL_EXPLAIN_MODES, OPENCYPHER_EXPLAIN_MODES, GREMLIN_EXPLAIN_MODES, \ + OPENCYPHER_PLAN_CACHE_MODES, OPENCYPHER_DEFAULT_TIMEOUT, OPENCYPHER_STATUS_STATE_MODES, \ + normalize_service_name, GRAPH_PG_INFO_METRICS, \ + DEFAULT_GREMLIN_PROTOCOL, GREMLIN_PROTOCOL_FORMATS, DEFAULT_HTTP_PROTOCOL, normalize_protocol_name) from graph_notebook.network import SPARQLNetwork from graph_notebook.network.gremlin.GremlinNetwork import parse_pattern_list_str, GremlinNetwork from graph_notebook.visualization.rows_and_columns import sparql_get_rows_and_columns, opencypher_get_rows_and_columns @@ -534,7 +535,7 @@ def stream_viewer(self, line): language = args.language limit = args.limit - uri = self.client.get_uri_with_port() + uri = self.client.get_uri(include_port=True) viewer = StreamViewer(self.client, uri, language, limit=limit) viewer.show() @@ -1034,8 +1035,9 @@ def gremlin(self, line, cell, local_ns: dict = None): f'If not specified, defaults to the value of the gremlin.connection_protocol field ' f'in %graph_notebook_config. Please note that this option has no effect on the ' f'Profile and Explain modes, which must use HTTP.') - parser.add_argument('--explain-type', type=str.lower, default='', - help='Explain mode to use when using the explain query mode.') + parser.add_argument('--explain-type', type=str.lower, default='dynamic', + help=f'Explain mode to use when using the explain query mode. ' + f'Accepted values: {GREMLIN_EXPLAIN_MODES}') parser.add_argument('-p', '--path-pattern', default='', help='path pattern') parser.add_argument('-g', '--group-by', type=str, default='', help='Property used to group nodes (e.g. code, T.region) default is T.label') @@ -1074,6 +1076,8 @@ def gremlin(self, line, cell, local_ns: dict = None): 'TinkerPop driver "Serializers" enum values. Default is GRAPHSON_V3_UNTYPED') parser.add_argument('--profile-indexOps', action='store_true', default=False, help='Show a detailed report of all index operations.') + parser.add_argument('--profile-debug', action='store_true', default=False, + help='Enable debug mode.') parser.add_argument('--profile-misc-args', type=str, default='{}', help='Additional profile options, passed in as a map.') parser.add_argument('-sp', '--stop-physics', action='store_true', default=False, @@ -1154,7 +1158,8 @@ def gremlin(self, line, cell, local_ns: dict = None): profile_args = {"profile.results": args.profile_no_results, "profile.chop": args.profile_chop, "profile.serializer": serializer, - "profile.indexOps": args.profile_indexOps} + "profile.indexOps": args.profile_indexOps, + "profile.debug": args.profile_debug} try: profile_misc_args_dict = json.loads(args.profile_misc_args) profile_args.update(profile_misc_args_dict) diff --git a/src/graph_notebook/magics/metadata.py b/src/graph_notebook/magics/metadata.py index f50d8583..b36765ed 100644 --- a/src/graph_notebook/magics/metadata.py +++ b/src/graph_notebook/magics/metadata.py @@ -204,8 +204,11 @@ def build_gremlin_metadata_from_query(query_type: str, results: any, res: Respon if query_type == 'explain': gremlin_metadata = create_propertygraph_metadata_obj('explain') gremlin_metadata.set_request_metrics(res) - gremlin_metadata.set_metric_value('predicates', int((re.search(r'# of predicates: (.*?)\n', results).group(1)) - .replace(".", '').replace(",", ''))) + try: + gremlin_metadata.set_metric_value('predicates', int((re.search(r'# of predicates: (.*?)\n', results).group(1)) + .replace(".", '').replace(",", ''))) + except AttributeError: + pass return gremlin_metadata elif query_type == 'profile': gremlin_metadata = create_propertygraph_metadata_obj('profile') diff --git a/src/graph_notebook/neptune/client.py b/src/graph_notebook/neptune/client.py index a11198b0..98cfe132 100644 --- a/src/graph_notebook/neptune/client.py +++ b/src/graph_notebook/neptune/client.py @@ -137,6 +137,7 @@ SPARQL_EXPLAIN_MODES = ['dynamic', 'static', 'details'] OPENCYPHER_EXPLAIN_MODES = ['dynamic', 'static', 'details'] +GREMLIN_EXPLAIN_MODES = ['dynamic', 'static', 'details'] OPENCYPHER_PLAN_CACHE_MODES = ['auto', 'enabled', 'disabled'] OPENCYPHER_DEFAULT_TIMEOUT = 120000 OPENCYPHER_STATUS_STATE_MODES = ['ALL', 'RUNNING', 'WAITING', 'CANCELLING'] @@ -257,7 +258,7 @@ def is_neptune_domain(self): def is_analytics_domain(self): return self.service == NEPTUNE_ANALYTICS_SERVICE_NAME - def get_uri_with_port(self, use_websocket=False, use_proxy=False): + def get_uri(self, use_websocket=False, use_proxy=False, include_port=True): if use_websocket is True: protocol = self._ws_protocol else: @@ -270,7 +271,9 @@ def get_uri_with_port(self, use_websocket=False, use_proxy=False): uri_host = self.target_host uri_port = self.target_port - uri = f'{protocol}://{uri_host}:{uri_port}' + uri = f'{protocol}://{uri_host}' + if include_port: + uri += f':{uri_port}' return uri def get_graph_id(self): @@ -347,9 +350,9 @@ def sparql_cancel(self, query_id: str, silent: bool = False): def get_gremlin_connection(self, transport_kwargs) -> client.Client: nest_asyncio.apply() - ws_url = f'{self.get_uri_with_port(use_websocket=True, use_proxy=False)}/gremlin' + ws_url = f'{self.get_uri(use_websocket=True, use_proxy=False)}/gremlin' if self.proxy_host != '': - proxy_http_url = f'{self.get_uri_with_port(use_websocket=False, use_proxy=True)}/gremlin' + proxy_http_url = f'{self.get_uri(use_websocket=False, use_proxy=True)}/gremlin' transport_factory_args = lambda: AiohttpTransport(call_from_event_loop=True, proxy=proxy_http_url, **transport_kwargs) request = self._prepare_request('GET', proxy_http_url) @@ -387,9 +390,17 @@ def gremlin_http_query(self, query, headers=None) -> requests.Response: if headers is None: headers = {} + data = {} use_proxy = True if self.proxy_host != '' else False - uri = f'{self.get_uri_with_port(use_websocket=False, use_proxy=use_proxy)}/gremlin' - data = {'gremlin': query} + if self.is_analytics_domain(): + uri = f'{self.get_uri(use_websocket=False, use_proxy=use_proxy, include_port=False)}/queries' + data['language'] = 'gremlin' + data['gremlin'] = query + headers['content-type'] = 'application/json' + else: + uri = f'{self.get_uri(use_websocket=False, use_proxy=use_proxy)}/gremlin' + data['gremlin'] = query + req = self._prepare_request('POST', uri, data=json.dumps(data), headers=headers) res = self._http_session.send(req, verify=self.ssl_verify) return res @@ -412,12 +423,25 @@ def gremlin_profile(self, query: str, args={}) -> requests.Response: return self._gremlin_query_plan(query=query, plan_type='profile', args=args) def _gremlin_query_plan(self, query: str, plan_type: str, args: dict, ) -> requests.Response: - url = f'{self._http_protocol}://{self.host}:{self.port}/gremlin/{plan_type}' - data = {'gremlin': query} + data = {} + headers = {} + url = f'{self._http_protocol}://{self.host}' + if self.is_analytics_domain(): + url += '/queries' + data['gremlin'] = query + data['language'] = 'gremlin' + headers['content-type'] = 'application/json' + if plan_type == 'explain': + data['explain.mode'] = args.pop('explain.mode') + elif plan_type == 'profile': + data['profile.debug'] = args.pop('profile.debug') + else: + url += f':{self.port}/gremlin/{plan_type}' + data['gremlin'] = query if args: for param, value in args.items(): data[param] = value - req = self._prepare_request('POST', url, data=json.dumps(data)) + req = self._prepare_request('POST', url, data=json.dumps(data), headers=headers) res = self._http_session.send(req, verify=self.ssl_verify) return res