diff --git a/dask/array/routines.py b/dask/array/routines.py
index 96bcedd6cef..205173b3e37 100644
--- a/dask/array/routines.py
+++ b/dask/array/routines.py
@@ -978,12 +978,12 @@ def histogram(a, bins=None, range=None, normed=False, weights=None, density=None
         return n, bins
 
 
-def _block_histogramdd(sample, bins, range=None, weights=None):
+def _block_histogramdd_rect(sample, bins, range, weights):
     """Call numpy.histogramdd for a blocked/chunked calculation.
 
-    Slurps the result into an additional outer axis via [np.newaxis].
-    This new axis will be used to stack chunked calls of the numpy
-    function and add them together later.
+    Slurps the result into an additional outer axis; this new axis
+    will be used to stack chunked calls of the numpy function and add
+    them together later.
 
     Returns
     -------
@@ -994,6 +994,28 @@ def _block_histogramdd(sample, bins, range=None, weights=None):
     return np.histogramdd(sample, bins, range=range, weights=weights)[0:1]
 
 
+def _block_histogramdd_multiarg(*args):
+    """Call numpy.histogramdd for a multi argument blocked/chunked calculation.
+
+    Slurps the result into an additional outer axis; this new axis
+    will be used to stack chunked calls of the numpy function and add
+    them together later.
+
+    The last three arguments _must be_ (bins, range, weights).
+
+    The difference between this function and
+    _block_histogramdd_rect is that here we expect the sample
+    to be composed of multiple arguments (multiple 1D arrays, each one
+    representing a coordinate), while _block_histogramdd_rect
+    expects a single rectangular (2D array where columns are
+    coordinates) sample.
+
+    """
+    bins, range, weights = args[-3:]
+    sample = args[:-3]
+    return np.histogramdd(sample, bins=bins, range=range, weights=weights)[0:1]
+
+
 def histogramdd(sample, bins, range=None, normed=None, weights=None, density=None):
     """Blocked variant of :func:`numpy.histogramdd`.
 
@@ -1003,10 +1025,10 @@ def histogramdd(sample, bins, range=None, normed=None, weights=None, density=Non
     compatible with this function. If weights are used, they must be
     chunked along the 0th axis identically to the input sample.
 
-    A proper example setup for a three dimensional histogram, where
-    the sample shape is ``(8, 3)`` and weights are shape ``(8,)``,
-    sample chunks would be ``((4, 4), (3,))`` and the weights chunks
-    would be ``((4, 4),)`` a table of the structure:
+    An example setup for a three dimensional histogram, where the
+    sample shape is ``(8, 3)`` and weights are shape ``(8,)``, sample
+    chunks would be ``((4, 4), (3,))`` and the weights chunks would be
+    ``((4, 4),)`` a table of the structure:
 
     +-------+-----------------------+-----------+
     |       |      sample (8 x 3)   |  weights  |
@@ -1042,6 +1064,10 @@ def histogramdd(sample, bins, range=None, normed=None, weights=None, density=Non
     from ``dask.dataframe``, i.e. :class:`dask.dataframe.Series` or
     :class:`dask.dataframe.DataFrame`.
 
+    The function is also compatible with `x`, `y`, and `z` being
+    individual 1D arrays with equal chunking. In that case, the data
+    should be passed as a tuple: ``histogramdd((x, y, z), ...)``
+
     Parameters
     ----------
     sample : dask.array.Array (N, D) or sequence of dask.array.Array
@@ -1053,9 +1079,7 @@ def histogramdd(sample, bins, range=None, normed=None, weights=None, density=Non
         * When a (N, D) dask Array, each row is an entry in the sample
           (coordinate in D dimensional space).
         * When a sequence of dask Arrays, each element in the sequence
-          is the array of values for a single coordinate. This type of
-          input will be automatically rechunked along the column axis
-          if necessary. This may induce a runtime increase.
+          is the array of values for a single coordinate.
     bins : sequence of arrays describing bin edges, int, or sequence of ints
         The bin specification.
 
@@ -1126,8 +1150,31 @@ def histogramdd(sample, bins, range=None, normed=None, weights=None, density=Non
     >>> np.isclose(h.sum().compute(), w.sum().compute())
     True
 
-    """
+    Using a sequence of 1D arrays as the input:
+
+    >>> x = da.array([2, 4, 2, 4, 2, 4])
+    >>> y = da.array([2, 2, 4, 4, 2, 4])
+    >>> z = da.array([4, 2, 4, 2, 4, 2])
+    >>> bins = ([0, 3, 6],) * 3
+    >>> h, edges = da.histogramdd((x, y, z), bins)
+    >>> h
+    dask.array<sum-aggregate, shape=(2, 2, 2), dtype=float64, chunksize=(2, 2, 2), chunktype=numpy.ndarray>
+    >>> edges[0]
+    dask.array<array, shape=(3,), dtype=int64, chunksize=(3,), chunktype=numpy.ndarray>
+    >>> h.compute()
+    array([[[0., 2.],
+            [0., 1.]],
+    <BLANKLINE>
+           [[1., 0.],
+            [2., 0.]]])
+    >>> edges[0].compute()
+    array([0, 3, 6])
+    >>> edges[1].compute()
+    array([0, 3, 6])
+    >>> edges[2].compute()
+    array([0, 3, 6])
 
+    """
     # logic used in numpy.histogramdd to handle normed/density.
     if normed is None:
         if density is None:
@@ -1157,16 +1204,27 @@ def histogramdd(sample, bins, range=None, normed=None, weights=None, density=Non
 
     # N == total number of samples
     # D == total number of dimensions
-    try:
-        # Recommended input ND-array
-        N, D = sample.shape
-    except (AttributeError, ValueError):
-        # If we have a sequence of 1D arrays
-        sample = atleast_2d(sample).T
-        N, D = sample.shape
-        # rechunk if necessary
-        if sample.chunksize[1] != D:
-            sample = sample.rechunk((sample.chunksize[0], D))
+    if hasattr(sample, "shape"):
+        if len(sample.shape) != 2:
+            raise ValueError("Single array input to histogramdd should be columnar")
+        else:
+            _, D = sample.shape
+        n_chunks = sample.numblocks[0]
+        rectangular_sample = True
+        # Require data to be chunked along the first axis only.
+        if sample.shape[1:] != sample.chunksize[1:]:
+            raise ValueError("Input array can only be chunked along the 0th axis.")
+    elif isinstance(sample, (tuple, list)):
+        rectangular_sample = False
+        D = len(sample)
+        n_chunks = sample[0].numblocks[0]
+        for i in _range(1, D):
+            if sample[i].chunks != sample[0].chunks:
+                raise ValueError("All coordinate arrays must be chunked identically.")
+    else:
+        raise ValueError(
+            "Incompatible sample. Must be a 2D array or a sequence of 1D arrays."
+        )
 
     # Require only Array or Delayed objects for bins, range, and weights.
     for argname, val in [("bins", bins), ("range", range), ("weights", weights)]:
@@ -1176,16 +1234,18 @@ def histogramdd(sample, bins, range=None, normed=None, weights=None, density=Non
                 "for `histogramdd`. For argument `{}`, got: {!r}".format(argname, val)
             )
 
-    # Require data to be chunked along the first axis only.
-    if sample.shape[1:] != sample.chunksize[1:]:
-        raise ValueError("Input array can only be chunked along the 0th axis")
-
     # Require that the chunking of the sample and weights are compatible.
-    if weights is not None and weights.chunks[0] != sample.chunks[0]:
-        raise ValueError(
-            "Input array and weights must have the same shape "
-            "and chunk structure along the first dimension."
-        )
+    if weights is not None:
+        if rectangular_sample and weights.chunks[0] != sample.chunks[0]:
+            raise ValueError(
+                "Input array and weights must have the same shape "
+                "and chunk structure along the first dimension."
+            )
+        elif not rectangular_sample and weights.numblocks != n_chunks:
+            raise ValueError(
+                "Input arrays and weights must have the same shape "
+                "and chunk structure."
+            )
 
     # if bins is a list, tuple, then make sure the length is the same
     # as the number dimensions.
@@ -1205,50 +1265,73 @@ def histogramdd(sample, bins, range=None, normed=None, weights=None, density=Non
         if not all(len(r) == 2 for r in range):
             raise ValueError("range argument should be a sequence of pairs")
 
-    # we will return the edges to mimic the NumPy API (we also use the
-    # edges later as a way to calculate the total number of bins).
+    # If bins is a single int, create a tuple of len `D` containing `bins`.
     if isinstance(bins, int):
         bins = (bins,) * D
+
+    # we will return the edges to mimic the NumPy API (we also use the
+    # edges later as a way to calculate the total number of bins).
     if all(isinstance(b, int) for b in bins) and all(len(r) == 2 for r in range):
         edges = [np.linspace(r[0], r[1], b + 1) for b, r in zip(bins, range)]
     else:
         edges = [np.asarray(b) for b in bins]
 
-    # With dsk below, we will construct a (D + 1) dimensional array
-    # stacked for each chunk. For example, if the histogram is going
-    # to be 3 dimensions, this creates a stack of cubes (1 cube for
-    # each sample chunk) that will be collapsed into a final cube (the
-    # result).
+    if rectangular_sample:
+        deps = (sample,)
+    else:
+        deps = tuple(sample)
+
+    if weights is not None:
+        w_keys = flatten(weights.__dask_keys__())
+        deps += (weights,)
+        dtype = weights.dtype
+    else:
+        w_keys = (None,) * n_chunks
+        dtype = np.histogramdd([])[0].dtype
 
     # This tuple of zeros represents the chunk index along the columns
     # (we only allow chunking along the rows).
     column_zeros = tuple(0 for _ in _range(D))
 
-    if weights is None:
+    # With dsk below, we will construct a (D + 1) dimensional array
+    # stacked for each chunk. For example, if the histogram is going
+    # to be 3 dimensions, this creates a stack of cubes (1 cube for
+    # each sample chunk) that will be collapsed into a final cube (the
+    # result). Depending on the input data, we can do this in two ways
+    #
+    # 1. The rectangular case: when the sample is a single 2D array
+    #    where each column in the sample represents a coordinate of
+    #    the sample).
+    #
+    # 2. The sequence-of-arrays case, when the sample is a tuple or
+    #    list of arrays, with each array in that sequence representing
+    #    the entirety of one coordinate of the complete sample.
+
+    if rectangular_sample:
+        sample_keys = flatten(sample.__dask_keys__())
         dsk = {
-            (name, i, *column_zeros): (_block_histogramdd, k, bins, range)
-            for i, k in enumerate(flatten(sample.__dask_keys__()))
+            (name, i, *column_zeros): (_block_histogramdd_rect, k, bins, range, w)
+            for i, (k, w) in enumerate(zip(sample_keys, w_keys))
         }
-        dtype = np.histogramdd([])[0].dtype
     else:
-        a_keys = flatten(sample.__dask_keys__())
-        w_keys = flatten(weights.__dask_keys__())
+        sample_keys = [
+            list(flatten(sample[i].__dask_keys__())) for i in _range(len(sample))
+        ]
+        fused_on_chunk_keys = [
+            tuple(sample_keys[j][i] for j in _range(D)) for i in _range(n_chunks)
+        ]
         dsk = {
-            (name, i, *column_zeros): (_block_histogramdd, k, bins, range, w)
-            for i, (k, w) in enumerate(zip(a_keys, w_keys))
+            (name, i, *column_zeros): (
+                _block_histogramdd_multiarg,
+                *(*k, bins, range, w),
+            )
+            for i, (k, w) in enumerate(zip(fused_on_chunk_keys, w_keys))
         }
-        dtype = weights.dtype
 
-    deps = (sample,)
-    if weights is not None:
-        deps += (weights,)
     graph = HighLevelGraph.from_collections(name, dsk, dependencies=deps)
-
-    nchunks = len(list(flatten(sample.__dask_keys__())))
     all_nbins = tuple((b.size - 1,) for b in edges)
-    stacked_chunks = ((1,) * nchunks, *all_nbins)
+    stacked_chunks = ((1,) * n_chunks, *all_nbins)
     mapped = Array(graph, name, stacked_chunks, dtype=dtype)
-
     # Finally, sum over chunks providing to get the final D
     # dimensional result array.
     n = mapped.sum(axis=0)
diff --git a/dask/array/tests/test_routines.py b/dask/array/tests/test_routines.py
index 30dea6de963..429bb7feb95 100644
--- a/dask/array/tests/test_routines.py
+++ b/dask/array/tests/test_routines.py
@@ -850,6 +850,7 @@ def test_histogramdd():
     assert a1.sum() == n1
     assert a2.sum() == n1
     assert same_keys(da.histogramdd(x, bins=bins)[0], a1)
+    assert a1.compute().shape == a3.shape
 
 
 def test_histogramdd_seq_of_arrays():
@@ -860,7 +861,9 @@ def test_histogramdd_seq_of_arrays():
     by = [0.0, 0.30, 0.70, 0.8, 1.0]
     (a1, b1) = da.histogramdd([x, y], bins=[bx, by])
     (a2, b2) = np.histogramdd([x, y], bins=[bx, by])
+    (a3, b3) = np.histogramdd((x.compute(), y.compute()), bins=[bx, by])
     assert_eq(a1, a2)
+    assert_eq(a1, a3)
 
 
 def test_histogramdd_alternative_bins_range():
@@ -871,7 +874,9 @@ def test_histogramdd_alternative_bins_range():
     ranges = ((0, 1),) * len(bins)
     (a1, b1) = da.histogramdd(x, bins=bins, range=ranges)
     (a2, b2) = np.histogramdd(x, bins=bins, range=ranges)
+    (a3, b3) = np.histogramdd(x.compute(), bins=bins, range=ranges)
     assert_eq(a1, a2)
+    assert_eq(a1, a3)
     bins = 4
     (a1, b1) = da.histogramdd(x, bins=bins, range=ranges)
     (a2, b2) = np.histogramdd(x, bins=bins, range=ranges)
@@ -891,22 +896,13 @@ def test_histogramdd_weighted():
     ranges = ((0, 1),) * len(bins)
     (a1, b1) = da.histogramdd(x, bins=bins, range=ranges, weights=w)
     (a2, b2) = np.histogramdd(x, bins=bins, range=ranges, weights=w)
+    (a3, b3) = np.histogramdd(x.compute(), bins=bins, range=ranges, weights=w.compute())
     assert_eq(a1, a2)
+    assert_eq(a1, a3)
     bins = 4
     (a1, b1) = da.histogramdd(x, bins=bins, range=ranges, weights=w)
     (a2, b2) = np.histogramdd(x, bins=bins, range=ranges, weights=w)
-    assert_eq(a1, a2)
-
-
-def test_histogramdd_trigger_rechunk():
-    n = 600
-    x = da.random.uniform(0, 1, size=(n,), chunks=200)
-    y = da.random.uniform(0, 1, size=(n,), chunks=200)
-    bins = (6, 7)
-    ranges = ((0, 1),) * len(bins)
-    (a1, b1) = da.histogramdd([x, y], bins=bins, range=ranges)
-    (a2, b2) = np.histogramdd([x, y], bins=bins, range=ranges)
-    (a3, b3) = np.histogramdd([x.compute(), y.compute()], bins=bins, range=ranges)
+    (a3, b3) = np.histogramdd(x.compute(), bins=bins, range=ranges, weights=w.compute())
     assert_eq(a1, a2)
     assert_eq(a1, a3)
 
@@ -918,8 +914,10 @@ def test_histogramdd_density():
     (a1, b1) = da.histogramdd(x, bins=bins, density=True)
     (a2, b2) = np.histogramdd(x, bins=bins, density=True)
     (a3, b3) = da.histogramdd(x, bins=bins, normed=True)
+    (a4, b4) = np.histogramdd(x.compute(), bins=bins, density=True)
     assert_eq(a1, a2)
     assert_eq(a1, a3)
+    assert_eq(a1, a4)
     assert same_keys(da.histogramdd(x, bins=bins, density=True)[0], a1)
 
 
@@ -936,7 +934,7 @@ def test_histogramdd_weighted_density():
     assert_eq(a1, a3)
 
 
-def test_histogramdd_raises_incompat_sample_chuks():
+def test_histogramdd_raises_incompat_sample_chunks():
     data = da.random.random(size=(10, 3), chunks=(5, 1))
     with pytest.raises(
         ValueError, match="Input array can only be chunked along the 0th axis"
@@ -944,6 +942,33 @@ def test_histogramdd_raises_incompat_sample_chuks():
         da.histogramdd(data, bins=10, range=((0, 1),) * 3)
 
 
+def test_histogramdd_raises_incompat_multiarg_chunks():
+    x = da.random.random(size=(10,), chunks=2)
+    y = da.random.random(size=(10,), chunks=2)
+    z = da.random.random(size=(10,), chunks=5)
+    with pytest.raises(
+        ValueError, match="All coordinate arrays must be chunked identically."
+    ):
+        da.histogramdd((x, y, z), bins=(3,) * 3, range=((0, 1),) * 3)
+
+
+def test_histogramdd_raises_incompat_weight_chunks():
+    x = da.random.random(size=(10,), chunks=2)
+    y = da.random.random(size=(10,), chunks=2)
+    z = da.atleast_2d((x, y)).T.rechunk((2, 2))
+    w = da.random.random(size=(10,), chunks=5)
+    with pytest.raises(
+        ValueError,
+        match="Input arrays and weights must have the same shape and chunk structure.",
+    ):
+        da.histogramdd((x, y), bins=(3,) * 2, range=((0, 1),) * 2, weights=w)
+    with pytest.raises(
+        ValueError,
+        match="Input array and weights must have the same shape and chunk structure along the first dimension.",
+    ):
+        da.histogramdd(z, bins=(3,) * 2, range=((0, 1),) * 2, weights=w)
+
+
 def test_histogramdd_raises_incompat_bins_or_range():
     data = da.random.random(size=(10, 4), chunks=(5, 4))
     bins = (2, 3, 4, 5)
@@ -981,6 +1006,21 @@ def test_histogramdd_raise_normed_and_density():
         da.histogramdd(data, bins=bins, range=ranges, normed=True, density=True)
 
 
+def test_histogramdd_raise_incompat_shape():
+    # 1D
+    data = da.random.random(size=(10,), chunks=(2,))
+    with pytest.raises(
+        ValueError, match="Single array input to histogramdd should be columnar"
+    ):
+        da.histogramdd(data, bins=4, range=((-3, 3),))
+    # 3D (not columnar)
+    data = da.random.random(size=(4, 4, 4), chunks=(2, 2, 2))
+    with pytest.raises(
+        ValueError, match="Single array input to histogramdd should be columnar"
+    ):
+        da.histogramdd(data, bins=4, range=((-3, 3),))
+
+
 def test_histogramdd_edges():
     data = da.random.random(size=(10, 3), chunks=(5, 3))
     edges = [