holoviz · jonmmease · Feb 7, 2019 · Jan 24, 2019 · Jan 30, 2019 · Feb 6, 2019
diff --git a/.appveyor.yml b/.appveyor.yml
@@ -10,7 +10,6 @@ environment:
 install:
   - "SET PATH=%CONDA%;%CONDA%\\Scripts;%PATH%"
   - "conda install -y -c pyviz pyctdev && doit ecosystem_setup"
-  - conda install -y "conda<4.6"
   - "doit env_create %CHANNELS% --name=test --python=%PY%"
   - "activate test"
   - "doit develop_install %CHANNELS%"

diff --git a/datashader/glyphs.py b/datashader/glyphs.py
@@ -123,7 +123,8 @@ def __init__(self, x, y, z=None, weight_type=True, interp=True):
 
     @property
     def inputs(self):
-        return tuple([self.x, self.y] + list(self.z))
+        return (tuple([self.x, self.y] + list(self.z)) +
+                (self.weight_type, self.interpolate))
 
     def validate(self, in_dshape):
         for col in [self.x, self.y] + list(self.z):

diff --git a/datashader/tests/test_dask.py b/datashader/tests/test_dask.py
@@ -1,10 +1,12 @@
+from __future__ import division
 from dask.context import config
 import dask.dataframe as dd
 import numpy as np
 import pandas as pd
 import xarray as xr
 
 import datashader as ds
+import datashader.utils as du
 
 import pytest
 
@@ -476,3 +478,53 @@ def test_trimesh_no_double_edge():
         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
     ], dtype='i4')
     np.testing.assert_array_equal(np.flipud(agg.fillna(0).astype('i4').values)[:5], sol)
+
+
+@pytest.mark.parametrize('npartitions', list(range(1, 6)))
+def test_trimesh_dask_partitions(npartitions):
+    """Assert that when two triangles share an edge that would normally get
+    double-drawn, the edge is only drawn for the rightmost (or bottommost)
+    triangle.
+    """
+    # Test left/right edge shared
+    verts = dd.from_pandas(pd.DataFrame({'x': [4, 1, 5, 5, 5, 4],
+                                         'y': [4, 5, 5, 5, 4, 4]}),
+                           npartitions=npartitions)
+    tris = dd.from_pandas(
+        pd.DataFrame(
+            {'v0': [0, 3], 'v1': [1, 4], 'v2': [2, 5], 'val': [1, 2]}),
+        npartitions=npartitions)
+
+    cvs = ds.Canvas(plot_width=20, plot_height=20,
+                    x_range=(0, 5), y_range=(0, 5))
+
+    # Precompute mesh with dask dataframes
+    mesh = du.mesh(verts, tris)
+
+    # Make sure mesh is a dask DataFrame
+    assert isinstance(mesh, dd.DataFrame)
+
+    # Check mesh length
+    n = len(mesh)
+    assert n == 6
+
+    # Make sure we have expected number of partitions
+    expected_chunksize = int(np.ceil(len(mesh) / (3*npartitions)) * 3)
+    expected_npartitions = int(np.ceil(n / expected_chunksize))
+    assert expected_npartitions == mesh.npartitions
+
+    # Make sure triangles don't straddle partitions
+    partitions_lens = mesh.map_partitions(len).compute()
+    for partitions_len in partitions_lens:
+        assert partitions_len % 3 == 0
+
+    agg = cvs.trimesh(verts, tris, mesh)
+    sol = np.array([
+        [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
+        [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0],
+        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 0],
+        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+    ], dtype='i4')
+    np.testing.assert_array_equal(
+        np.flipud(agg.fillna(0).astype('i4').values)[:5], sol)
diff --git a/datashader/utils.py b/datashader/utils.py
@@ -450,18 +450,16 @@ def _dd_mesh(vertices, simplices):
     Dask DataFrame objects.
     """
     # Construct mesh by indexing into vertices with simplex indices
-    # TODO: For dask: avoid .compute() calls, and add winding auto-detection
-    vertex_idxs = simplices.values[:, :3].astype(np.int64)
-    vals = vertices.values.compute()[vertex_idxs]
-    vals = vals.reshape(np.prod(vals.shape[:2]), vals.shape[2])
-    res = pd.DataFrame(vals, columns=vertices.columns)
+    # TODO: For dask: avoid .compute() calls
+    res = _pd_mesh(vertices.compute(), simplices.compute())
 
-    # If vertices don't have weights, use simplex weights
-    verts_have_weights = len(vertices.columns) > 2
-    if not verts_have_weights:
-        weight_col = simplices.columns[3]
-        res[weight_col] = simplices.values[:, 3].compute().repeat(3)
+    # Compute a chunksize that will not split the vertices of a single
+    # triangle across partitions
+    approx_npartitions = max(vertices.npartitions, simplices.npartitions)
+    chunksize = int(np.ceil(len(res) / (3*approx_npartitions)) * 3)
 
+    # Create dask dataframe
+    res = dd.from_pandas(res, chunksize=chunksize)
     return res
 
 

diff --git a/examples/user_guide/6_Trimesh.ipynb b/examples/user_guide/6_Trimesh.ipynb
@@ -47,6 +47,7 @@
     "import numpy as np, datashader as ds, pandas as pd\n",
     "import datashader.utils as du, datashader.transfer_functions as tf\n",
     "from scipy.spatial import Delaunay\n",
+    "import dask.dataframe as dd\n",
     "\n",
     "n = 10\n",
     "np.random.seed(2)\n",
@@ -345,6 +346,37 @@
     "          tf.shade(cvs.trimesh(verts, tris, mesh=mesh, agg=ds.std('z')), name='std')).cols(3)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Parallelizing trimesh aggregation with Dask\n",
+    "The trimesh aggregation process can be parallelized by providing `du.mesh` and `Canvas.trimesh` with partitioned Dask dataframes.\n",
+    "\n",
+    "**Note:** While the calls to `Canvas.trimesh` will be parallelized across the partitions of the Dask dataframe, the construction of the partitioned mesh using `du.mesh` is not currently parallelized.  Furthermore, it currently requires loading the entire `verts` and `tris` dataframes into memory in order to construct the partitioned mesh.  Because of these constraints, this approach is most useful for the repeated aggregation of large meshes that fit in memory on a single multicore machine."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "verts_ddf = dd.from_pandas(verts, npartitions=4)\n",
+    "tris_ddf = dd.from_pandas(tris, npartitions=4)\n",
+    "mesh_ddf = du.mesh(verts_ddf, tris_ddf)\n",
+    "mesh_ddf"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tf.shade(cvs.trimesh(verts_ddf, tris_ddf, mesh=mesh_ddf))"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},