diff --git a/Documentation/config/survey.txt b/Documentation/config/survey.txt
index 9e594a2092f225..f3ae768933fe1b 100644
--- a/Documentation/config/survey.txt
+++ b/Documentation/config/survey.txt
@@ -4,6 +4,10 @@ survey.*::
 	background with these options.
 +
 --
+	survey.namerev::
+		Boolean to show/hide `git name-rev` information for each
+		reported commit and the containing commit of each
+		reported tree and blob.
 	verbose::
 		This boolean value implies the `--[no-]verbose` option.
 	progress::
@@ -11,4 +15,33 @@ survey.*::
 	top::
 		This integer value implies `--top=<N>`, specifying the
 		number of entries in the detail tables.
+	showBlobSizes::
+		A non-negative integer value.  Requests details on the
+		<n> largest file blobs by size in bytes.  Provides a
+		default value for `--blob-sizes=<n>` in
+		linkgit:git-survey[1].
+	showCommitParents::
+		A non-negative integer value.  Requests details on the
+		<n> commits with the most number of parents.  Provides a
+		default value for `--commit-parents=<n>` in
+		linkgit:git-survey[1].
+	showCommitSizes::
+		A non-negative integer value.  Requests details on the
+		<n> largest commits by size in bytes.  Generally, these
+		are the commits with the largest commit messages.
+		Provides a default value for `--commit-sizes=<n>` in
+		linkgit:git-survey[1].
+	showTreeEntries::
+		A non-negative integer value.  Requests details on the
+		<n> trees (directories) with the most number of entries
+		(files and subdirectories).  Provides a default value
+		for `--tree-entries=<n>` in linkgit:git-survey[1].
+	showTreeSizes::
+		A non-negative integer value.  Requests details on the
+		<n> largest trees (directories) by size in bytes.  This
+		will set will usually be equal to the
+		`survey.showTreeEntries` set, but may be skewed by very
+		long file or subdirectory entry names.  Provides a
+		default value for `--tree-sizes=<n>` in
+		linkgit:git-survey[1].
 --
diff --git a/Documentation/git-survey.txt b/Documentation/git-survey.txt
index 894c7be3053eb9..cb1fb3c37c3706 100644
--- a/Documentation/git-survey.txt
+++ b/Documentation/git-survey.txt
@@ -32,6 +32,10 @@ OPTIONS
 --progress::
 	Show progress.  This is automatically enabled when interactive.
 
+--[no-]name-rev::
+	Print `git name-rev` output for each commit, tree, and blob.
+	Defaults to true.
+
 Ref Selection
 ~~~~~~~~~~~~~
 
@@ -59,6 +63,32 @@ only refs for the given options are added.
 --other::
 	Add notes (`refs/notes/`) and stashes (`refs/stash/`) to the set.
 
+Large Item Selection
+~~~~~~~~~~~~~~~~~~~~
+
+The following options control the optional display of large items under
+various dimensions of scale.  The OID of the largest `n` objects will be
+displayed in reverse sorted order.  For each, `n` defaults to 10.
+
+--commit-parents::
+	Shows the OIDs of the commits with the most parent commits.
+
+--commit-sizes::
+	Shows the OIDs of the largest commits by size in bytes.  This is
+	usually the ones with the largest commit messages.
+
+--tree-entries::
+	Shows the OIDs of the trees with the most number of entries.  These
+	are the directories with the most number of files or subdirectories.
+
+--tree-sizes::
+	Shows the OIDs of the largest trees by size in bytes.  This set
+	will usually be the same as the vector of number of entries unless
+	skewed by very long entry names.
+
+--blob-sizes::
+	Shows the OIDs of the largest blobs by size in bytes.
+
 OUTPUT
 ------
 
@@ -78,6 +108,11 @@ Reachable Object Summary
 The reachable object summary shows the total number of each kind of Git
 object, including tags, commits, trees, and blobs.
 
+CONFIGURATION
+-------------
+
+include::config/survey.txt[]
+
 GIT
 ---
 Part of the linkgit:git[1] suite
diff --git a/builtin/survey.c b/builtin/survey.c
index 34850399fd601a..5bf1adb6668eed 100644
--- a/builtin/survey.c
+++ b/builtin/survey.c
@@ -13,11 +13,12 @@
 #include "ref-filter.h"
 #include "refs.h"
 #include "revision.h"
+#include "run-command.h"
 #include "strbuf.h"
 #include "strvec.h"
-#include "tag.h"
 #include "trace2.h"
-#include "color.h"
+#include "tree.h"
+#include "tree-walk.h"
 
 static const char * const survey_usage[] = {
 	N_("(EXPERIMENTAL!) git survey <options>"),
@@ -41,6 +42,16 @@ static struct survey_refs_wanted default_ref_options = {
 struct survey_opts {
 	int verbose;
 	int show_progress;
+	int show_name_rev;
+
+	int show_largest_commits_by_nr_parents;
+	int show_largest_commits_by_size_bytes;
+
+	int show_largest_trees_by_nr_entries;
+	int show_largest_trees_by_size_bytes;
+
+	int show_largest_blobs_by_size_bytes;
+
 	int top_nr;
 	struct survey_refs_wanted refs;
 };
@@ -53,6 +64,312 @@ struct survey_report_ref_summary {
 	size_t tags_annotated_nr;
 	size_t others_nr;
 	size_t unknown_nr;
+
+	size_t cnt_symref;
+
+	size_t cnt_packed;
+	size_t cnt_loose;
+
+	/*
+	 * Measure the length of the refnames.  We can look for
+	 * potential platform limits.  The partial sums may help us
+	 * estimate the size of a haves/wants conversation, since each
+	 * refname and a SHA must be transmitted.
+	 */
+	size_t len_max_local_refname;
+	size_t len_sum_local_refnames;
+	size_t len_max_remote_refname;
+	size_t len_sum_remote_refnames;
+};
+
+/*
+ * HBIN -- hex binning (histogram bucketing).
+ *
+ * We create histograms for various counts and sums.  Since we have a
+ * wide range of values (objects range in size from 1 to 4G bytes), a
+ * linear bucketing is not interesting.  Instead, lets use a
+ * log16()-based bucketing.  This gives us a better spread on the low
+ * and middle range and a coarse bucketing on the high end.
+ *
+ * The idea here is that it doesn't matter if you have n 1GB blobs or
+ * n/2 1GB blobs and n/2 1.5GB blobs -- either way you have a scaling
+ * problem that we want to report on.
+ */
+#define HBIN_LEN (sizeof(unsigned long) * 2)
+#define HBIN_MASK (0xF)
+#define HBIN_SHIFT (4)
+
+static int hbin(unsigned long value)
+{
+	for (int k = 0; k < HBIN_LEN; k++) {
+		if ((value & ~(HBIN_MASK)) == 0)
+			return k;
+		value >>= HBIN_SHIFT;
+	}
+
+	return 0; /* should not happen */
+}
+
+/*
+ * QBIN -- base4 binning (histogram bucketing).
+ *
+ * This is the same idea as the above, but we want better granularity
+ * in the low end and don't expect as many large values.
+ */
+#define QBIN_LEN (sizeof(unsigned long) * 4)
+#define QBIN_MASK (0x3)
+#define QBIN_SHIFT (2)
+
+static int qbin(unsigned long value)
+{
+	for (int k = 0; k < QBIN_LEN; k++) {
+		if ((value & ~(QBIN_MASK)) == 0)
+			return k;
+		value >>= (QBIN_SHIFT);
+	}
+
+	return 0; /* should not happen */
+}
+
+/*
+ * histogram bin for objects.
+ */
+struct obj_hist_bin {
+	uint64_t sum_size;      /* sum(object_size) for all objects in this bin */
+	uint64_t sum_disk_size; /* sum(on_disk_size) for all objects in this bin */
+	uint32_t cnt_seen;      /* number seen in this bin */
+};
+
+static void incr_obj_hist_bin(struct obj_hist_bin *pbin,
+			       unsigned long object_length,
+			       off_t disk_sizep)
+{
+	pbin->sum_size += object_length;
+	pbin->sum_disk_size += disk_sizep;
+	pbin->cnt_seen++;
+}
+
+/*
+ * Remember the largest n objects for some scaling dimension.  This
+ * could be the observed object size or number of entries in a tree.
+ * We'll use this to generate a sorted vector in the output for that
+ * dimension.
+ */
+struct large_item {
+	uint64_t size;
+	struct object_id oid;
+
+	/*
+	 * For blobs and trees the name field is the pathname of the
+	 * file or directory.  Root trees will have a zero-length
+	 * name.  The name field is not currenly used for commits.
+	 */
+	struct strbuf name;
+
+	/*
+	 * For blobs and trees remember the transient commit from
+	 * the treewalk so that we can say that this large item
+	 * first appeared in this commit (relative to the treewalk
+	 * order).
+	 */
+	struct object_id containing_commit_oid;
+
+	/*
+	 * Lookup `containing_commit_oid` using `git name-rev`.
+	 * Lazy allocate this post-treewalk.
+	 */
+	struct strbuf name_rev;
+};
+
+struct large_item_vec {
+	char *dimension_label;
+	char *item_label;
+	uint64_t nr_items;
+	struct large_item items[FLEX_ARRAY]; /* nr_items */
+};
+
+static struct large_item_vec *alloc_large_item_vec(const char *dimension_label,
+						   const char *item_label,
+						   uint64_t nr_items)
+{
+	struct large_item_vec *vec;
+	size_t flex_len = nr_items * sizeof(struct large_item);
+	size_t k;
+
+	if (!nr_items)
+		return NULL;
+
+	vec = xcalloc(1, (sizeof(struct large_item_vec) + flex_len));
+	vec->dimension_label = strdup(dimension_label);
+	vec->item_label = strdup(item_label);
+	vec->nr_items = nr_items;
+
+	for (k = 0; k < nr_items; k++)
+		strbuf_init(&vec->items[k].name, 0);
+
+	return vec;
+}
+
+static void free_large_item_vec(struct large_item_vec *vec)
+{
+	if (!vec)
+		return;
+
+	for (size_t k = 0; k < vec->nr_items; k++) {
+		strbuf_release(&vec->items[k].name);
+		strbuf_release(&vec->items[k].name_rev);
+	}
+
+	free(vec->dimension_label);
+	free(vec->item_label);
+	free(vec);
+}
+
+static void maybe_insert_large_item(struct large_item_vec *vec,
+				    uint64_t size,
+				    struct object_id *oid,
+				    const char *name,
+				    const struct object_id *containing_commit_oid)
+{
+	size_t rest_len;
+	size_t k;
+
+	if (!vec || !vec->nr_items)
+		return;
+
+	/*
+	 * Since the odds an object being among the largest n
+	 * is small, shortcut and see if it is smaller than
+	 * the smallest one in our set and quickly reject it.
+	 */
+	if (size < vec->items[vec->nr_items - 1].size)
+		return;
+
+	for (k = 0; k < vec->nr_items; k++) {
+		if (size < vec->items[k].size)
+			continue;
+
+		/*
+		 * The last large_item in the vector is about to be
+		 * overwritten by the previous one during the shift.
+		 * Steal its allocated strbuf and reuse it.
+		 *
+		 * We can ignore .name_rev because it will not be
+		 * allocated until after the treewalk.
+		 */
+		strbuf_release(&vec->items[vec->nr_items - 1].name);
+
+		/* push items[k..] down one and insert data for this item here */
+
+		rest_len = (vec->nr_items - k - 1) * sizeof(struct large_item);
+		if (rest_len)
+			memmove(&vec->items[k + 1], &vec->items[k], rest_len);
+
+		memset(&vec->items[k], 0, sizeof(struct large_item));
+		vec->items[k].size = size;
+		oidcpy(&vec->items[k].oid, oid);
+		oidcpy(&vec->items[k].containing_commit_oid, containing_commit_oid ? containing_commit_oid : null_oid());
+		strbuf_init(&vec->items[k].name, 0);
+		if (name && *name)
+			strbuf_addstr(&vec->items[k].name, name);
+
+		return;
+	}
+}
+
+/*
+ * Common fields for any type of object.
+ */
+struct survey_stats_base_object {
+	uint32_t cnt_seen;
+
+	uint32_t cnt_missing; /* we may have a partial clone. */
+
+	/*
+	 * Number of objects grouped by where they are stored on disk.
+	 * This is a function of how the ODB is packed.
+	 */
+	uint32_t cnt_cached;   /* see oi.whence */
+	uint32_t cnt_loose;    /* see oi.whence */
+	uint32_t cnt_packed;   /* see oi.whence */
+	uint32_t cnt_dbcached; /* see oi.whence */
+
+	uint64_t sum_size; /* sum(object_size) */
+	uint64_t sum_disk_size; /* sum(disk_size) */
+
+	/*
+	 * A histogram of the count of objects, the observed size, and
+	 * the on-disk size grouped by the observed size.
+	 */
+	struct obj_hist_bin size_hbin[HBIN_LEN];
+};
+
+/*
+ * PBIN -- parent vector binning (histogram bucketing).
+ *
+ * We create a histogram based upon the number of parents
+ * in a commit.  This is a simple linear vector.  It starts
+ * at zero for "initial" commits.
+ *
+ * If a commit has more parents, just put it in the last bin.
+ */
+#define PBIN_VEC_LEN (32)
+
+struct survey_stats_commits {
+	struct survey_stats_base_object base;
+
+	/*
+	 * Count of commits with k parents.
+	 */
+	uint32_t parent_cnt_pbin[PBIN_VEC_LEN];
+
+	struct large_item_vec *vec_largest_by_nr_parents;
+	struct large_item_vec *vec_largest_by_size_bytes;
+};
+
+/*
+ * Stats for reachable trees.
+ */
+struct survey_stats_trees {
+	struct survey_stats_base_object base;
+
+	/*
+	 * Keep a vector of the trees with the most number of entries.
+	 * This gives us a feel for the width of a tree when there are
+	 * gigantic directories.
+	 */
+	struct large_item_vec *vec_largest_by_nr_entries;
+
+	/*
+	 * Keep a vector of the trees with the largest size in bytes.
+	 * The contents of this may or may not match items in the other
+	 * vector, since entryname length can alter the results.
+	 */
+	struct large_item_vec *vec_largest_by_size_bytes;
+
+	/*
+	 * Computing the sum of the number of entries across all trees
+	 * is probably not that interesting.
+	 */
+	uint64_t sum_entries; /* sum(nr_entries) -- sum across all trees */
+
+	/*
+	 * A histogram of the count of trees, the observed size, and
+	 * the on-disk size grouped by the number of entries in the tree.
+	 */
+	struct obj_hist_bin entry_qbin[QBIN_LEN];
+};
+
+/*
+ * Stats for reachable blobs.
+ */
+struct survey_stats_blobs {
+	struct survey_stats_base_object base;
+
+	/*
+	 * Remember the OIDs of the largest n blobs.
+	 */
+	struct large_item_vec *vec_largest_by_size_bytes;
 };
 
 struct survey_report_object_summary {
@@ -60,6 +377,10 @@ struct survey_report_object_summary {
 	size_t tags_nr;
 	size_t trees_nr;
 	size_t blobs_nr;
+
+	struct survey_stats_commits commits;
+	struct survey_stats_trees   trees;
+	struct survey_stats_blobs   blobs;
 };
 
 /**
@@ -229,6 +550,12 @@ struct survey_context {
 
 static void clear_survey_context(struct survey_context *ctx)
 {
+	free_large_item_vec(ctx->report.reachable_objects.commits.vec_largest_by_nr_parents);
+	free_large_item_vec(ctx->report.reachable_objects.commits.vec_largest_by_size_bytes);
+	free_large_item_vec(ctx->report.reachable_objects.trees.vec_largest_by_nr_entries);
+	free_large_item_vec(ctx->report.reachable_objects.trees.vec_largest_by_size_bytes);
+	free_large_item_vec(ctx->report.reachable_objects.blobs.vec_largest_by_size_bytes);
+
 	ref_array_clear(&ctx->ref_array);
 	strvec_clear(&ctx->refs);
 }
@@ -349,6 +676,128 @@ static void print_table_plaintext(struct survey_table *table)
 	free(column_widths);
 }
 
+static void pretty_print_bin_table(const char *title_caption,
+				   const char *bucket_header,
+				   struct obj_hist_bin *bin,
+				   uint64_t bin_len, int bin_shift, uint64_t bin_mask)
+{
+	struct survey_table table = SURVEY_TABLE_INIT;
+	struct strbuf bucket = STRBUF_INIT, cnt_seen = STRBUF_INIT;
+	struct strbuf sum_size = STRBUF_INIT, sum_disk_size = STRBUF_INIT;
+	uint64_t lower = 0;
+	uint64_t upper = bin_mask;
+
+	table.table_name = title_caption;
+	strvec_pushl(&table.header, bucket_header, "Count", "Size", "Disk Size", NULL);
+
+	for (int k = 0; k < bin_len; k++) {
+		struct obj_hist_bin *p = bin + k;
+		uint64_t lower_k = lower;
+		uint64_t upper_k = upper;
+
+		lower = upper+1;
+		upper = (upper << bin_shift) + bin_mask;
+
+		if (!p->cnt_seen)
+			continue;
+
+		strbuf_reset(&bucket);
+		strbuf_addf(&bucket, "%"PRIu64"..%"PRIu64, lower_k, upper_k);
+
+		strbuf_reset(&cnt_seen);
+		strbuf_addf(&cnt_seen, "%"PRIu64, (uintmax_t)p->cnt_seen);
+
+		strbuf_reset(&sum_size);
+		strbuf_addf(&sum_size, "%"PRIu64, (uintmax_t)p->sum_size);
+
+		strbuf_reset(&sum_disk_size);
+		strbuf_addf(&sum_disk_size, "%"PRIu64, (uintmax_t)p->sum_disk_size);
+
+		insert_table_rowv(&table, bucket.buf,
+			     cnt_seen.buf, sum_size.buf, sum_disk_size.buf);
+	}
+	strbuf_release(&bucket);
+	strbuf_release(&cnt_seen);
+	strbuf_release(&sum_size);
+	strbuf_release(&sum_disk_size);
+
+	print_table_plaintext(&table);
+	clear_table(&table);
+}
+
+static void survey_report_hbin(const char *title_caption,
+			       struct obj_hist_bin *bin)
+{
+	pretty_print_bin_table(title_caption,
+			       "Byte Range",
+			       bin,
+			       HBIN_LEN, HBIN_SHIFT, HBIN_MASK);
+}
+
+static void survey_report_tree_lengths(struct survey_context *ctx)
+{
+	pretty_print_bin_table(_("TREE HISTOGRAM BY NUMBER OF ENTRIES"),
+			       "Entry Range",
+			       ctx->report.reachable_objects.trees.entry_qbin,
+			       QBIN_LEN, QBIN_SHIFT, QBIN_MASK);
+}
+
+static void survey_report_commit_parents(struct survey_context *ctx)
+{
+	struct survey_stats_commits *psc = &ctx->report.reachable_objects.commits;
+	struct survey_table table = SURVEY_TABLE_INIT;
+	struct strbuf parents = STRBUF_INIT, counts = STRBUF_INIT;
+
+	table.table_name = _("HISTOGRAM BY NUMBER OF COMMIT PARENTS");
+	strvec_pushl(&table.header, "Parents", "Counts", NULL);
+
+	for (int k = 0; k < PBIN_VEC_LEN; k++)
+		if (psc->parent_cnt_pbin[k]) {
+			strbuf_reset(&parents);
+			strbuf_addf(&parents, "%02d", k);
+
+			strbuf_reset(&counts);
+			strbuf_addf(&counts, "%14"PRIuMAX, (uintmax_t)psc->parent_cnt_pbin[k]);
+
+			insert_table_rowv(&table, parents.buf, counts.buf, NULL);
+		}
+	strbuf_release(&parents);
+	strbuf_release(&counts);
+
+	print_table_plaintext(&table);
+	clear_table(&table);
+}
+
+static void survey_report_largest_vec(struct survey_context *ctx, struct large_item_vec *vec)
+{
+	struct survey_table table = SURVEY_TABLE_INIT;
+	struct strbuf size = STRBUF_INIT;
+
+	if (!vec || !vec->nr_items)
+		return;
+
+	table.table_name = vec->dimension_label;
+	strvec_pushl(&table.header, "Size", "OID", "Name", "Commit", ctx->opts.show_name_rev ? "Name-Rev" : NULL, NULL);
+
+	for (int k = 0; k < vec->nr_items; k++) {
+		struct large_item *pk = &vec->items[k];
+		if (!is_null_oid(&pk->oid)) {
+			strbuf_reset(&size);
+			strbuf_addf(&size, "%"PRIuMAX, (uintmax_t)pk->size);
+
+			insert_table_rowv(&table, size.buf, oid_to_hex(&pk->oid), pk->name.buf,
+					  is_null_oid(&pk->containing_commit_oid) ?
+					  "" : oid_to_hex(&pk->containing_commit_oid),
+					  !ctx->opts.show_name_rev ? NULL : pk->name_rev.len ? pk->name_rev.buf : "",
+					  NULL);
+		}
+	}
+	strbuf_release(&size);
+
+	print_table_plaintext(&table);
+	clear_table(&table);
+}
+
 static void survey_report_plaintext_refs(struct survey_context *ctx)
 {
 	struct survey_report_ref_summary *refs = &ctx->report.refs;
@@ -380,6 +829,42 @@ static void survey_report_plaintext_refs(struct survey_context *ctx)
 		free(fmt);
 	}
 
+	/*
+	 * SymRefs are somewhat orthogonal to the above classification (e.g.
+	 * "HEAD" --> detached and "refs/remotes/origin/HEAD" --> remote) so the
+	 * above classified counts will already include them, but it is less
+	 * confusing to display them here than to create a whole new section.
+	 */
+	if (ctx->report.refs.cnt_symref) {
+		char *fmt = xstrfmt("%"PRIuMAX"", (uintmax_t)refs->cnt_symref);
+		insert_table_rowv(&table, _("Symbolic refs"), fmt, NULL);
+		free(fmt);
+	}
+
+	if (ctx->report.refs.cnt_loose || ctx->report.refs.cnt_packed) {
+		char *fmt = xstrfmt("%"PRIuMAX"", (uintmax_t)refs->cnt_loose);
+		insert_table_rowv(&table, _("Loose refs"), fmt, NULL);
+		free(fmt);
+		fmt = xstrfmt("%"PRIuMAX"", (uintmax_t)refs->cnt_packed);
+		insert_table_rowv(&table, _("Packed refs"), fmt, NULL);
+		free(fmt);
+	}
+
+	if (ctx->report.refs.len_max_local_refname || ctx->report.refs.len_max_remote_refname) {
+		char *fmt = xstrfmt("%"PRIuMAX"", (uintmax_t)refs->len_max_local_refname);
+		insert_table_rowv(&table, _("Max local refname length"), fmt, NULL);
+		free(fmt);
+		fmt = xstrfmt("%"PRIuMAX"", (uintmax_t)refs->len_sum_local_refnames);
+		insert_table_rowv(&table, _("Sum local refnames length"), fmt, NULL);
+		free(fmt);
+		fmt = xstrfmt("%"PRIuMAX"", (uintmax_t)refs->len_max_remote_refname);
+		insert_table_rowv(&table, _("Max remote refname length"), fmt, NULL);
+		free(fmt);
+		fmt = xstrfmt("%"PRIuMAX"", (uintmax_t)refs->len_sum_remote_refnames);
+		insert_table_rowv(&table, _("Sum remote refnames length"), fmt, NULL);
+		free(fmt);
+	}
+
 	print_table_plaintext(&table);
 	clear_table(&table);
 }
@@ -465,6 +950,19 @@ static void survey_report_plaintext(struct survey_context *ctx)
 				   ctx->report.by_type,
 				   REPORT_TYPE_COUNT);
 
+	survey_report_commit_parents(ctx);
+
+	survey_report_hbin(_("COMMITS HISTOGRAM BY SIZE IN BYTES"),
+			   ctx->report.reachable_objects.commits.base.size_hbin);
+
+	survey_report_tree_lengths(ctx);
+
+	survey_report_hbin(_("TREES HISTOGRAM BY SIZE IN BYTES"),
+			   ctx->report.reachable_objects.trees.base.size_hbin);
+
+	survey_report_hbin(_("BLOBS HISTOGRAM BY SIZE IN BYTES"),
+			   ctx->report.reachable_objects.blobs.base.size_hbin);
+
 	survey_report_plaintext_sorted_size(
 		&ctx->report.top_paths_by_count[REPORT_TYPE_TREE]);
 	survey_report_plaintext_sorted_size(
@@ -479,6 +977,12 @@ static void survey_report_plaintext(struct survey_context *ctx)
 		&ctx->report.top_paths_by_inflate[REPORT_TYPE_TREE]);
 	survey_report_plaintext_sorted_size(
 		&ctx->report.top_paths_by_inflate[REPORT_TYPE_BLOB]);
+
+	survey_report_largest_vec(ctx, ctx->report.reachable_objects.commits.vec_largest_by_nr_parents);
+	survey_report_largest_vec(ctx, ctx->report.reachable_objects.commits.vec_largest_by_size_bytes);
+	survey_report_largest_vec(ctx, ctx->report.reachable_objects.trees.vec_largest_by_nr_entries);
+	survey_report_largest_vec(ctx, ctx->report.reachable_objects.trees.vec_largest_by_size_bytes);
+	survey_report_largest_vec(ctx, ctx->report.reachable_objects.blobs.vec_largest_by_size_bytes);
 }
 
 /*
@@ -550,6 +1054,31 @@ static int survey_load_config_cb(const char *var, const char *value,
 		ctx->opts.show_progress = git_config_bool(var, value);
 		return 0;
 	}
+	if (!strcmp(var, "survey.namerev")) {
+		ctx->opts.show_name_rev = git_config_bool(var, value);
+		return 0;
+	}
+	if (!strcmp(var, "survey.showcommitparents")) {
+		ctx->opts.show_largest_commits_by_nr_parents = git_config_ulong(var, value, cctx->kvi);
+		return 0;
+	}
+	if (!strcmp(var, "survey.showcommitsizes")) {
+		ctx->opts.show_largest_commits_by_size_bytes = git_config_ulong(var, value, cctx->kvi);
+		return 0;
+	}
+
+	if (!strcmp(var, "survey.showtreeentries")) {
+		ctx->opts.show_largest_trees_by_nr_entries = git_config_ulong(var, value, cctx->kvi);
+		return 0;
+	}
+	if (!strcmp(var, "survey.showtreesizes")) {
+		ctx->opts.show_largest_trees_by_size_bytes = git_config_ulong(var, value, cctx->kvi);
+		return 0;
+	}
+	if (!strcmp(var, "survey.showblobsizes")) {
+		ctx->opts.show_largest_blobs_by_size_bytes = git_config_ulong(var, value, cctx->kvi);
+		return 0;
+	}
 	if (!strcmp(var, "survey.top")) {
 		ctx->opts.top_nr = git_config_bool(var, value);
 		return 0;
@@ -614,6 +1143,80 @@ static void do_load_refs(struct survey_context *ctx,
 	ref_sorting_release(sorting);
 }
 
+/*
+ * Try to run `git name-rev` on each of the containing-commit-oid's
+ * in this large-item-vec to get a pretty name for each OID.  Silently
+ * ignore errors if it fails because this info is nice to have but not
+ * essential.
+ */
+static void large_item_vec_lookup_name_rev(struct survey_context *ctx,
+					   struct large_item_vec *vec)
+{
+	struct child_process cp = CHILD_PROCESS_INIT;
+	struct strbuf in = STRBUF_INIT;
+	struct strbuf out = STRBUF_INIT;
+	const char *line;
+	size_t k;
+
+	if (!vec || !vec->nr_items)
+		return;
+
+	ctx->progress_total += vec->nr_items;
+	display_progress(ctx->progress, ctx->progress_total);
+
+	for (k = 0; k < vec->nr_items; k++)
+		strbuf_addf(&in, "%s\n", oid_to_hex(&vec->items[k].containing_commit_oid));
+
+	cp.git_cmd = 1;
+	strvec_pushl(&cp.args, "name-rev", "--name-only", "--annotate-stdin", NULL);
+	if (pipe_command(&cp, in.buf, in.len, &out, 0, NULL, 0)) {
+		strbuf_release(&in);
+		strbuf_release(&out);
+		return;
+	}
+
+	line = out.buf;
+	k = 0;
+	while (*line) {
+		const char *eol = strchrnul(line, '\n');
+
+		strbuf_init(&vec->items[k].name_rev, 0);
+		strbuf_add(&vec->items[k].name_rev, line, (eol - line));
+
+		line = eol + 1;
+		k++;
+	}
+
+	strbuf_release(&in);
+	strbuf_release(&out);
+}
+
+static void do_lookup_name_rev(struct survey_context *ctx)
+{
+	/*
+	 * `git name-rev` can be very expensive when there are lots of
+	 * refs, so make it optional.
+	 */
+	if (!ctx->opts.show_name_rev)
+		return;
+
+	if (ctx->opts.show_progress) {
+		ctx->progress_total = 0;
+		ctx->progress = start_progress(_("Resolving name-revs..."), 0);
+	}
+
+	large_item_vec_lookup_name_rev(ctx, ctx->report.reachable_objects.commits.vec_largest_by_nr_parents);
+	large_item_vec_lookup_name_rev(ctx, ctx->report.reachable_objects.commits.vec_largest_by_size_bytes);
+
+	large_item_vec_lookup_name_rev(ctx, ctx->report.reachable_objects.trees.vec_largest_by_nr_entries);
+	large_item_vec_lookup_name_rev(ctx, ctx->report.reachable_objects.trees.vec_largest_by_size_bytes);
+
+	large_item_vec_lookup_name_rev(ctx, ctx->report.reachable_objects.blobs.vec_largest_by_size_bytes);
+
+	if (ctx->opts.show_progress)
+		stop_progress(&ctx->progress);
+}
+
 /*
  * The REFS phase:
  *
@@ -637,6 +1240,7 @@ static void survey_phase_refs(struct survey_context *ctx)
 	for (size_t i = 0; i < ctx->ref_array.nr; i++) {
 		unsigned long size;
 		struct ref_array_item *item = ctx->ref_array.items[i];
+		size_t len = strlen(item->refname);
 
 		switch (item->kind) {
 		case FILTER_REFS_TAGS:
@@ -663,6 +1267,33 @@ static void survey_phase_refs(struct survey_context *ctx)
 			ctx->report.refs.unknown_nr++;
 			break;
 		}
+
+		/*
+		 * SymRefs are somewhat orthogonal to the above
+		 * classification (e.g. "HEAD" --> detached
+		 * and "refs/remotes/origin/HEAD" --> remote) so
+		 * our totals will already include them.
+		 */
+		if (item->flag & REF_ISSYMREF)
+			ctx->report.refs.cnt_symref++;
+
+		/*
+		 * Where/how is the ref stored in GITDIR.
+		 */
+		if (item->flag & REF_ISPACKED)
+			ctx->report.refs.cnt_packed++;
+		else
+			ctx->report.refs.cnt_loose++;
+
+		if (item->kind == FILTER_REFS_REMOTES) {
+			ctx->report.refs.len_sum_remote_refnames += len;
+			if (len > ctx->report.refs.len_max_remote_refname)
+				ctx->report.refs.len_max_remote_refname = len;
+		} else {
+			ctx->report.refs.len_sum_local_refnames += len;
+			if (len > ctx->report.refs.len_max_local_refname)
+				ctx->report.refs.len_max_local_refname = len;
+		}
 	}
 
 	trace2_region_leave("survey", "phase/refs", ctx->repo);
@@ -697,7 +1328,8 @@ static void increment_object_counts(
 
 static void increment_totals(struct survey_context *ctx,
 			     struct oid_array *oids,
-			     struct survey_report_object_size_summary *summary)
+			     struct survey_report_object_size_summary *summary,
+			     const char *path)
 {
 	for (size_t i = 0; i < oids->nr; i++) {
 		struct object_info oi = OBJECT_INFO_INIT;
@@ -705,6 +1337,8 @@ static void increment_totals(struct survey_context *ctx,
 		unsigned long object_length = 0;
 		off_t disk_sizep = 0;
 		enum object_type type;
+		struct survey_stats_base_object *base;
+		int hb;
 
 		oi.typep = &type;
 		oi.sizep = &object_length;
@@ -713,11 +1347,86 @@ static void increment_totals(struct survey_context *ctx,
 		if (oid_object_info_extended(ctx->repo, &oids->oid[i],
 					     &oi, oi_flags) < 0) {
 			summary->num_missing++;
-		} else {
-			summary->nr++;
-			summary->disk_size += disk_sizep;
-			summary->inflated_size += object_length;
+			continue;
+		}
+
+		summary->nr++;
+		summary->disk_size += disk_sizep;
+		summary->inflated_size += object_length;
+
+		switch (type) {
+		case OBJ_COMMIT: {
+			struct commit *commit = lookup_commit(ctx->repo, &oids->oid[i]);
+			unsigned k = commit_list_count(commit->parents);
+
+			if (k >= PBIN_VEC_LEN)
+				k = PBIN_VEC_LEN - 1;
+
+			ctx->report.reachable_objects.commits.parent_cnt_pbin[k]++;
+			base = &ctx->report.reachable_objects.commits.base;
+
+			maybe_insert_large_item(ctx->report.reachable_objects.commits.vec_largest_by_nr_parents, k, &commit->object.oid, NULL, &commit->object.oid);
+			maybe_insert_large_item(ctx->report.reachable_objects.commits.vec_largest_by_size_bytes, object_length, &commit->object.oid, NULL, &commit->object.oid);
+			break;
+		}
+		case OBJ_TREE: {
+			struct tree *tree = lookup_tree(ctx->repo, &oids->oid[i]);
+			if (tree) {
+				struct survey_stats_trees *pst = &ctx->report.reachable_objects.trees;
+				struct tree_desc desc;
+				struct name_entry entry;
+				int nr_entries;
+				int qb;
+
+				parse_tree(tree);
+				init_tree_desc(&desc, &oids->oid[i], tree->buffer, tree->size);
+				nr_entries = 0;
+				while (tree_entry(&desc, &entry))
+					nr_entries++;
+
+				pst->sum_entries += nr_entries;
+
+				maybe_insert_large_item(pst->vec_largest_by_nr_entries, nr_entries, &tree->object.oid, path, NULL);
+				maybe_insert_large_item(pst->vec_largest_by_size_bytes, object_length, &tree->object.oid, path, NULL);
+
+				qb = qbin(nr_entries);
+				incr_obj_hist_bin(&pst->entry_qbin[qb], object_length, disk_sizep);
+			}
+			base = &ctx->report.reachable_objects.trees.base;
+			break;
+		}
+		case OBJ_BLOB:
+			base = &ctx->report.reachable_objects.blobs.base;
+
+			maybe_insert_large_item(ctx->report.reachable_objects.blobs.vec_largest_by_size_bytes, object_length, &oids->oid[i], path, NULL);
+			break;
+		default:
+			continue;
+		}
+
+		switch (oi.whence) {
+		case OI_CACHED:
+			base->cnt_cached++;
+			break;
+		case OI_LOOSE:
+			base->cnt_loose++;
+			break;
+		case OI_PACKED:
+			base->cnt_packed++;
+			break;
+		case OI_DBCACHED:
+			base->cnt_dbcached++;
+			break;
+		default:
+			break;
 		}
+
+		base->sum_size += object_length;
+		base->sum_disk_size += disk_sizep;
+
+		hb = hbin(object_length);
+		incr_obj_hist_bin(&base->size_hbin[hb], object_length, disk_sizep);
+
 	}
 }
 
@@ -729,7 +1438,7 @@ static void increment_object_totals(struct survey_context *ctx,
 	struct survey_report_object_size_summary *total;
 	struct survey_report_object_size_summary summary = { 0 };
 
-	increment_totals(ctx, oids, &summary);
+	increment_totals(ctx, oids, &summary, path);
 
 	switch (type) {
 	case OBJ_COMMIT:
@@ -861,6 +1570,12 @@ static void survey_phase_objects(struct survey_context *ctx)
 
 	release_revisions(&revs);
 	trace2_region_leave("survey", "phase/objects", ctx->repo);
+
+	if (ctx->opts.show_name_rev) {
+		trace2_region_enter("survey", "phase/namerev", the_repository);
+		do_lookup_name_rev(ctx);
+		trace2_region_enter("survey", "phase/namerev", the_repository);
+	}
 }
 
 int cmd_survey(int argc, const char **argv, const char *prefix, struct repository *repo)
@@ -869,7 +1584,7 @@ int cmd_survey(int argc, const char **argv, const char *prefix, struct repositor
 		.opts = {
 			.verbose = 0,
 			.show_progress = -1, /* defaults to isatty(2) */
-			.top_nr = 100,
+			.top_nr = 10,
 
 			.refs.want_all_refs = -1,
 
@@ -885,6 +1600,7 @@ int cmd_survey(int argc, const char **argv, const char *prefix, struct repositor
 	static struct option survey_options[] = {
 		OPT__VERBOSE(&ctx.opts.verbose, N_("verbose output")),
 		OPT_BOOL(0, "progress", &ctx.opts.show_progress, N_("show progress")),
+		OPT_BOOL(0, "name-rev", &ctx.opts.show_name_rev, N_("run name-rev on each reported commit")),
 		OPT_INTEGER('n', "top", &ctx.opts.top_nr,
 			    N_("number of entries to include in detail tables")),
 
@@ -896,6 +1612,14 @@ int cmd_survey(int argc, const char **argv, const char *prefix, struct repositor
 		OPT_BOOL_F(0, "detached", &ctx.opts.refs.want_detached, N_("include detached HEAD"),     PARSE_OPT_NONEG),
 		OPT_BOOL_F(0, "other",    &ctx.opts.refs.want_other,    N_("include notes and stashes"), PARSE_OPT_NONEG),
 
+		OPT_INTEGER_F(0, "commit-parents", &ctx.opts.show_largest_commits_by_nr_parents, N_("show N largest commits by parent count"),  PARSE_OPT_NONEG),
+		OPT_INTEGER_F(0, "commit-sizes",   &ctx.opts.show_largest_commits_by_size_bytes, N_("show N largest commits by size in bytes"), PARSE_OPT_NONEG),
+
+		OPT_INTEGER_F(0, "tree-entries",   &ctx.opts.show_largest_trees_by_nr_entries,   N_("show N largest trees by entry count"),     PARSE_OPT_NONEG),
+		OPT_INTEGER_F(0, "tree-sizes",     &ctx.opts.show_largest_trees_by_size_bytes,   N_("show N largest trees by size in bytes"),   PARSE_OPT_NONEG),
+
+		OPT_INTEGER_F(0, "blob-sizes",     &ctx.opts.show_largest_blobs_by_size_bytes,   N_("show N largest blobs by size in bytes"),   PARSE_OPT_NONEG),
+
 		OPT_END(),
 	};
 
@@ -919,6 +1643,39 @@ int cmd_survey(int argc, const char **argv, const char *prefix, struct repositor
 
 	fixup_refs_wanted(&ctx);
 
+	if (ctx.opts.show_largest_commits_by_nr_parents)
+		ctx.report.reachable_objects.commits.vec_largest_by_nr_parents =
+			alloc_large_item_vec(
+				"largest_commits_by_nr_parents",
+				"nr_parents",
+				ctx.opts.show_largest_commits_by_nr_parents);
+	if (ctx.opts.show_largest_commits_by_size_bytes)
+		ctx.report.reachable_objects.commits.vec_largest_by_size_bytes =
+			alloc_large_item_vec(
+				"largest_commits_by_size_bytes",
+				"size",
+				ctx.opts.show_largest_commits_by_size_bytes);
+
+	if (ctx.opts.show_largest_trees_by_nr_entries)
+		ctx.report.reachable_objects.trees.vec_largest_by_nr_entries =
+			alloc_large_item_vec(
+				"largest_trees_by_nr_entries",
+				"nr_entries",
+				ctx.opts.show_largest_trees_by_nr_entries);
+	if (ctx.opts.show_largest_trees_by_size_bytes)
+		ctx.report.reachable_objects.trees.vec_largest_by_size_bytes =
+			alloc_large_item_vec(
+				"largest_trees_by_size_bytes",
+				"size",
+				ctx.opts.show_largest_trees_by_size_bytes);
+
+	if (ctx.opts.show_largest_blobs_by_size_bytes)
+		ctx.report.reachable_objects.blobs.vec_largest_by_size_bytes =
+			alloc_large_item_vec(
+				"largest_blobs_by_size_bytes",
+				"size",
+				ctx.opts.show_largest_blobs_by_size_bytes);
+
 	survey_phase_refs(&ctx);
 
 	survey_phase_objects(&ctx);
@@ -928,3 +1685,143 @@ int cmd_survey(int argc, const char **argv, const char *prefix, struct repositor
 	clear_survey_context(&ctx);
 	return 0;
 }
+
+/*
+ * NEEDSWORK: So far, I only have iteration on the requested set of
+ * refs and treewalk/reachable objects on that set of refs.  The
+ * following is a bit of a laundry list of things that I'd like to
+ * add.
+ *
+ * [] Dump stats on all of the packfiles. The number and size of each.
+ *    Whether each is in the .git directory or in an alternate.  The
+ *    state of the IDX or MIDX files and etc.  Delta chain stats.  All
+ *    of this data is relative to the "lived-in" state of the
+ *    repository.  Stuff that may change after a GC or repack.
+ *
+ * [] Clone and Index stats. partial, shallow, sparse-checkout,
+ *    sparse-index, etc.  Hydration stats.
+ *
+ * [] Dump stats on each remote.  When we fetch from a remote the size
+ *    of the response is related to the set of haves on the server.
+ *    You can see this in `GIT_TRACE_CURL=1 git fetch`. We get a
+ *    `ls-refs` payload that lists all of the branches and tags on the
+ *    server, so at a minimum the RefName and SHA for each. But for
+ *    annotated tags we also get the peeled SHA.  The size of this
+ *    overhead on every fetch is proporational to the size of the `git
+ *    ls-remote` response (roughly, although the latter repeats the
+ *    RefName of the peeled tag).  If, for example, you have 500K refs
+ *    on a remote, you're going to have a long "haves" message, so
+ *    every fetch will be slow just because of that overhead (not
+ *    counting new objects to be downloaded).
+ *
+ *    Note that the local set of tags in "refs/tags/" is a union over
+ *    all remotes.  However, since most people only have one remote,
+ *    we can probaly estimate the overhead value directly from the
+ *    size of the set of "refs/tags/" that we visited while building
+ *    the `ref_info` and `ref_array` and not need to ask the remote.
+ *
+ *    [] Should the "string length of refnames / remote refs", for
+ *       example, be sub-divided by remote so we can project the
+ *       cost of the haves/wants overhead a fetch.
+ *
+ * [] Can we examine the merge commits and classify them as clean or
+ *    dirty?  (ie. ones with merge conflicts that needed to be
+ *    addressed during the merge itself.)
+ *
+ *    [] Do dirty merges affect performance of later operations?
+ *
+ * [] Dump info on the complexity of the DAG.  Criss-cross merges.
+ *    The number of edges that must be touched to compute merge bases.
+ *    Edge length. The number of parallel lanes in the history that
+ *    must be navigated to get to the merge base.  What affects the
+ *    cost of the Ahead/Behind computation?  How often do
+ *    criss-crosses occur and do they cause various operations to slow
+ *    down?
+ *
+ * [] If there are primary branches (like "main" or "master") are they
+ *    always on the left side of merges?  Does the graph have a clean
+ *    left edge?  Or are there normal and "backwards" merges?  Do
+ *    these cause problems at scale?
+ *
+ * [] If we have a hierarchy of FI/RI branches like "L1", "L2, ...,
+ *    can we learn anything about the shape of the repo around these
+ *    FI and RI integrations?
+ *
+ * [] Do we need a no-PII flag to omit pathnames or branch/tag names
+ *    in the various histograms?  (This would turn off --name-rev
+ *    too.)
+ *
+ * [] I have so far avoided adding opinions about individual fields
+ *    (such as the way `git-sizer` prints a row of stars or bangs in
+ *    the last column).
+ *
+ *    I'm wondering if that is a job of this executable or if it
+ *    should be done in a post-processing step using the JSON output.
+ *
+ *    My problem with the `git-sizer` approach is that it doesn't give
+ *    the (casual) user any information on why it has stars or bangs.
+ *    And there isn't a good way to print detailed information in the
+ *    ASCII-art tables that would be easy to understand.
+ *
+ *    [] For example, a large number of refs does not define a cliff.
+ *       Performance will drop off (linearly, quadratically, ... ??).
+ *       The tool should refer them to article(s) talking about the
+ *       different problems that it could cause.  So should `git
+ *       survey` just print the number and (implicitly) refer them to
+ *       the man page (chapter/verse) or to a tool that will interpret
+ *       the number and explain it?
+ *
+ *    [] Alternatively, should `git survey` do that analysis too and
+ *       just print footnotes for each large number?
+ *
+ *    [] The computation of the raw survey JSON data can take HOURS on
+ *       a very large repo (like Windows), so I'm wondering if we
+ *       want to keep the opinion portion separate.
+ *
+ * [] In addition to opinions based on the static data, I would like
+ *    to dump the JSON results (or the Trace2 telemetry) into a DB and
+ *    aggregate it with other users.
+ *
+ *    Granted, they should all see the same DAG and the same set of
+ *    reachable objects, but we could average across all datasets
+ *    generated on a particular date and detect outlier users.
+ *
+ *    [] Maybe someone cloned from the `_full` endpoint rather than
+ *       the limited refs endpoint.
+ *
+ *    [] Maybe that user is having problems with repacking / GC /
+ *       maintenance without knowing it.
+ *
+ * [] I'd also like to dump use the DB to compare survey datasets over
+ *    a time.  How fast is their repository growing and in what ways?
+ *
+ *    [] I'd rather have the delta analysis NOT be inside `git
+ *       survey`, so it makes sense to consider having all of it in a
+ *       post-process step.
+ *
+ * [] Another reason to put the opinion analysis in a post-process
+ *    is that it would be easier to generate plots on the data tables.
+ *    Granted, we can get plots from telemetry, but a stand-alone user
+ *    could run the JSON thru python or jq or something and generate
+ *    something nicer than ASCII-art and it could handle cross-referencing
+ *    and hyperlinking to helpful information on each issue.
+ *
+ * [] I think there are several classes of data that we can report on:
+ *
+ *    [] The "inherit repo properties", such as the shape and size of
+ *       the DAG -- these should be universal in each enlistment.
+ *
+ *    [] The "ODB lived in properties", such as the efficiency
+ *       of the repack and things like partial and shallow clone.
+ *       These will vary, but indicate health of the ODB.
+ *
+ *    [] The "index related properties", such as sparse-checkout,
+ *       sparse-index, cache-tree, untracked-cache, fsmonitor, and
+ *       etc.  These will also vary, but are more like knobs for
+ *       the user to adjust.
+ *
+ *    [] I want to compare these with Matt's "dimensions of scale"
+ *       notes and see if there are other pieces of data that we
+ *       could compute/consider.
+ *
+ */
diff --git a/t/t8100-git-survey.sh b/t/t8100-git-survey.sh
index 8c6edfcae0c6c2..0d35dfcf311827 100755
--- a/t/t8100-git-survey.sh
+++ b/t/t8100-git-survey.sh
@@ -59,13 +59,20 @@ test_expect_success 'git survey (default)' '
 	-----------------------------------------------------
 
 	REFERENCES SUMMARY
-	========================
-	,       Ref Type | Count
-	-----------------+------
-	,       Branches |     1
-	     Remote refs |     0
-	      Tags (all) |     2
-	Tags (annotated) |     2
+	==================================
+	,                 Ref Type | Count
+	---------------------------+------
+	,                 Branches |     1
+	,              Remote refs |     0
+	,               Tags (all) |     2
+	,         Tags (annotated) |     2
+	,            Symbolic refs |     1
+	,               Loose refs |     4
+	,              Packed refs |     0
+	  Max local refname length |    15
+	 Sum local refnames length |    46
+	 Max remote refname length |     0
+	Sum remote refnames length |     0
 
 	REACHABLE OBJECT SUMMARY
 	========================