graphistry · lmeyerov · Nov 28, 2024 · Nov 28, 2024 · Nov 28, 2024 · Nov 28, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -11,6 +11,19 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
 
 * New tutorial on GPU memory management and capacity planning in the GPU section
 
+### Feat
+
+* `plot(render=)` supports literal-typed mode values: `"auto"`, `"g"`, `"url"`, `"ipython"`, `"databricks"`, where `"g"` is a new Plottable
+* Expose and track `._dataset_id`, `._url`, `._nodes_file_id`, and `._edges_file_id`
+* Upload: Factor out explicit upload method `g2 = g1.upload(); assert g2._dataset_id` from plot interface
+* bind: Remote dataset binding via `g1 = graphistry.bind(dataset_id='abc123')`
+* chain: Remote GFQL calls via `g2 = g1.chain_remote([...])` and `meta_df = g1.chain_remote_shape([...])`
+* python: Remote Python calls via `g2 = g1.python_remote(...)`
+
+### Changed
+
+* `plot(render=)` now `Union[bool, RenderMode]`, not just `bool` 
+
 ## [0.34.17 - 2024-10-20]
 
 ### Added

diff --git a/docs/source/gfql/about.rst b/docs/source/gfql/about.rst
@@ -272,6 +272,30 @@ Use PyGraphistry's visualization capabilities to explore your graph.
 - Filters nodes where `pagerank > 0.1`.
 - Visualizes the subgraph consisting of high PageRank nodes.
 
+10. Run remotely
+
+You may want to run GFQL remotely because the data is remote or a GPU is available remotely:
+
+**Example: Run GFQL remotely**
+
+::
+
+    from graphistry import n, e
+    g2 = g1.chain_remote([n(), e(), n()])
+
+**Example: Run GFQL remotely, and decouple the upload step**
+
+::
+
+    from graphistry import n, e
+    g2 = g1.upload()
+    assert g2._dataset_id is not None, "Uploading sets `dataset_id` for subsequent calls"
+    g3 = g2.chain_remote([n(), e(), n()])
+
+
+Additional parameters enable controlling options such as the execution engine and what is returned 
+
+
 Conclusion and Next Steps
 -------------------------
 

diff --git a/docs/source/gfql/index.rst b/docs/source/gfql/index.rst
@@ -13,6 +13,7 @@ See also:
 
    about
    overview
+   remote
    GFQL CPU & GPU Acceleration <performance>
    translate
    combo

diff --git a/docs/source/gfql/remote.rst b/docs/source/gfql/remote.rst
@@ -0,0 +1,146 @@
+.. _gfql-remote:
+
+GFQL Remote Mode
+====================
+
+You can run GFQL queries remotely, such as when data is already remote, gets big, or you would like to use a remote GPU
+
+Basic Usage
+-----------
+
+Run chain remotely and fetch results
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: python
+
+    from graphistry import n, e
+    g2 = g1.chain_remote([n(), e(), n()])
+    assert len(g2._nodes) <= len(g1._nodes)
+
+Method :meth:`chain_remote <graphistry.compute.ComputeMixin.ComputeMixin.chain_remote>` runs chain remotely and fetched the computed graph
+
+- **chain**: Sequence of graph node and edge matchers (:class:`ASTObject <graphistry.compute.ast.ASTObject>` instances).
+- **output_type**: Defaulting to "all", whether to return the nodes (`'nodes'`), edges (`'edges'`), or both. See :meth:`chain_remote_shape <graphistry.compute.ComputeMixin.ComputeMixin.chain_remote_shape>` to return only metadata.
+- **node_col_subset**: Optionally limit which node attributes are returned to an allowlist.
+- **edge_col_subset**: Optionally limit which edge attributes are returned to an allowlist.
+- **engine**: Optional execution engine. Engine is typically not set, defaulting to `'auto'`. Use `'cudf'` for GPU acceleration and `'pandas'` for CPU.
+- **validate**: Defaulting to `True`, whether to validate the query and data.
+
+
+Manual CPU, GPU engine selection
+---------------------------------
+
+By default, GFQL will decide which engine to use based on workload characteristics like the dataset size. You can override this default by specifying which engine to use.
+
+GPU
+~~~~~
+
+Run on GPU remotely and fetch results
+
+.. code-block:: python
+
+    from graphistry import n, e
+    g2 = g1.chain_remote([n(), e(), n()], engine='cudf')
+    assert len(g2._nodes) <= len(g1._nodes)
+
+CPU
+~~~~~~~~~
+
+Run on CPU remotely and fetch results
+
+.. code-block:: python
+
+    from graphistry import n, e
+    g2 = g1.chain_remote([n(), e(), n()], engine='pandas')
+
+
+
+
+Explicit uploads
+-----------------
+
+Explicit uploads via :meth:`upload <graphistry.PlotterBase.PlotterBase.upload>` will bind the field `Plottable::dataset_id`, so subsequent remote calls know to skip re-uploading. Always using explicit uploads can make code more predictable for larger codebases.
+
+
+.. code-block:: python
+
+    from graphistry import n, e
+    g2 = g1.upload()
+    assert g2._dataset_id is not None, "Uploading sets `dataset_id` for subsequent calls"
+
+    g3a = g2.chain_remote([n()])
+    g3b = g2.chain_remote([n(), e(), n()])
+    assert len(g3a._nodes) >= len(g3b._nodes)
+
+
+Bind to existing remote data
+-------------------------------
+
+If data is already uploaded and your user has access to it, such as from a previous session or shared from another user, you can bind it to a local `Plottable` for remote access.
+
+.. code-block:: python
+
+    import graphistry
+    from graphistry import  n, e
+
+    g1 = graphistry.bind(dataset_id='abc123')
+    assert g1._nodes is None, "Binding does not fetch data"
+
+    g2 = g1.chain_remote([n(), e(), n()])
+    print(g2._nodes.shape)
+
+
+Download less
+----------------
+
+You may not need to download all -- or any -- of your results, which can  significantly speed up execution
+
+
+Return only nodes
+~~~~~~~~~~~~~~~~~
+
+.. code-block:: python
+
+  g1.chain_remote([n(), e(), n()], output_type="nodes")
+
+Return only nodes and specific columns
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: python
+
+  cols = [g1._node, 'time']
+  g2b = g1.chain_remote(
+    [n(), e(), n()],
+    output_type="nodes",
+    node_col_subset=cols)
+  assert len(g2b._nodes.columns) == len(cols)
+
+
+Return only edges
+~~~~~~~~~~~~~~~~~
+
+.. code-block:: python
+
+  g2a = g1.chain_remote([n(), e(), n()], output_type="edges")
+
+Return only edges and specific columns
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: python
+
+  cols = [g1._source, g1._destination, 'time']
+  g2b = g1.chain_remote([n(), e(), n()],
+    output_type="edges",
+    edge_col_subset=cols)
+  assert len(g2b._edges.columns) == len(cols)
+
+Return metadata but not the actual graph
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: python
+
+    from graphistry import n, e
+    shape_df = g1.chain_remote_shape([n(), e(), n()])
+    assert len(shape_df) == 2
+    print(shape_df)
+
diff --git a/graphistry/Plottable.py b/graphistry/Plottable.py
@@ -1,9 +1,11 @@
-from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Set, Tuple, Union
 from typing_extensions import Literal
 import pandas as pd
 
+from graphistry.models.compute.chain_remote import FormatType, OutputType
 from graphistry.plugins_types.cugraph_types import CuGraphKind
 from graphistry.Engine import Engine, EngineAbstract
+from graphistry.utils.json import JSONVal
 
 
 if TYPE_CHECKING:
@@ -19,6 +21,12 @@
     UMAP = Any
     Pipeline = Any
 
+
+RenderModesConcrete = Literal["g", "url", "ipython", "databricks", "browser"]
+RENDER_MODE_CONCRETE_VALUES: Set[RenderModesConcrete] = set(["g", "url", "ipython", "databricks", "browser"])
+RenderModes = Union[Literal["auto"], RenderModesConcrete]
+RENDER_MODE_VALUES: Set[RenderModes] = set(["auto", "g", "url", "ipython", "databricks", "browser"])
+
 class Plottable(object):
 
     _edges : Any
@@ -55,6 +63,11 @@ class Plottable(object):
     _bolt_driver : Any
     _tigergraph : Any
 
+    _dataset_id: Optional[str]
+    _url: Optional[str]
+    _nodes_file_id: Optional[str]
+    _edges_file_id: Optional[str]
+
     _node_embedding : Optional[pd.DataFrame]
     _node_encoder : Optional[Any]
     _node_features : Optional[pd.DataFrame]
@@ -236,13 +249,65 @@ def filter_edges_by_dict(self, filter_dict: Optional[dict] = None) -> 'Plottable
         return self
 
     # FIXME python recursive typing issues
-    def chain(self, ops: List[Any]) -> 'Plottable':
+    def chain(self, ops: Union[Any, List[Any]]) -> 'Plottable':
         """
-        ops is List[ASTObject]
+        ops is Union[List[ASTObject], Chain]
         """
         if 1 + 1:
             raise RuntimeError('should not happen')
         return self
+
+    def chain_remote(
+        self: 'Plottable',
+        chain: Union[Any, Dict[str, JSONVal]],
+        api_token: Optional[str] = None,
+        dataset_id: Optional[str] = None,
+        output_type: OutputType = "all",
+        format: Optional[FormatType] = None,
+        df_export_args: Optional[Dict[str, Any]] = None,
+        node_col_subset: Optional[List[str]] = None,
+        edge_col_subset: Optional[List[str]] = None,
+        engine: Optional[Literal["pandas", "cudf"]] = None
+    ) -> 'Plottable':
+        """
+        chain is Union[List[ASTObject], Chain]
+        """
+        if 1 + 1:
+            raise RuntimeError('should not happen')
+        return self
+
+    def chain_remote_shape(
+        self: 'Plottable',
+        chain: Union[Any, Dict[str, JSONVal]],
+        api_token: Optional[str] = None,
+        dataset_id: Optional[str] = None,
+        format: Optional[FormatType] = None,
+        df_export_args: Optional[Dict[str, Any]] = None,
+        node_col_subset: Optional[List[str]] = None,
+        edge_col_subset: Optional[List[str]] = None,
+        engine: Optional[Literal["pandas", "cudf"]] = None
+    ) -> pd.DataFrame:
+        """
+        chain is Union[List[ASTObject], Chain]
+        """
+        if 1 + 1:
+            raise RuntimeError('should not happen')
+        return pd.DataFrame({})
+
+    def python_remote(
+        self: 'Plottable',
+        code: str,
+        api_token: Optional[str] = None,
+        dataset_id: Optional[str] = None,
+        engine: Literal["pandas", "cudf"] = "cudf",
+        validate: bool = True
+    ) -> Any:
+        """
+        Return JSON literal
+        """
+        if 1 + 1:
+            raise RuntimeError('should not happen')
+        return {}
 
     def to_igraph(self, 
         directed: bool = True,
@@ -399,7 +464,11 @@ def encode_axis(self, rows=[]) -> 'Plottable':
             raise RuntimeError('should not happen')
         return self
 
-    def settings(self, height: Optional[float] = None, url_params: Dict[str, Any] = {}, render: Optional[bool] = None) -> 'Plottable':
+    def settings(self,
+        height: Optional[float] = None,
+        url_params: Dict[str, Any] = {},
+        render: Optional[Union[bool, RenderModes]] = None
+    ) -> 'Plottable':
         """Specify iframe height and add URL parameter dictionary.
 
         The library takes care of URI component encoding for the dictionary.
@@ -410,8 +479,8 @@ def settings(self, height: Optional[float] = None, url_params: Dict[str, Any] =
         :param url_params: Dictionary of querystring parameters to append to the URL.
         :type url_params: dict
 
-        :param render: Whether to render the visualization using the native notebook environment (default True), or return the visualization URL
-        :type render: bool
+        :param render: Set default render mode from RenderModes types, where True/None is "auto" and False is "url"
+        :type render: Optional[Union[bool, RenderModes]]
 
         """
 
@@ -430,3 +499,26 @@ def to_pandas(self) -> 'Plottable':
         if 1 + 1:
             raise RuntimeError('should not happen')
         return self
+
+    def upload(
+        self,
+        memoize: bool = True,
+        validate: bool = True
+    ) -> 'Plottable':
+        if 1 + 1:
+            raise RuntimeError('should not happen')
+        return self
+
+    def plot(
+        self,
+        graph=None,
+        nodes=None,
+        name=None,
+        description=None,
+        render: Optional[Union[bool, RenderModes]] = "auto",
+        skip_upload=False, as_files=False, memoize=True,
+        extra_html="", override_html_style=None, validate: bool = True
+    ) -> 'Plottable':
+        if 1 + 1:
+            raise RuntimeError('should not happen')
+        return self