[PATCH v2 03/11] diff: halt tree-diff early after max_changes
From: Derrick Stolee via GitGitGadget <hidden>
Date: 2020-02-05 22:56:43
Subsystem:
the rest · Maintainer:
Linus Torvalds
From: Derrick Stolee <redacted> When computing the changed-paths bloom filters for the commit-graph, we limit the size of the filter by restricting the number of paths in the diff. Instead of computing a large diff and then ignoring the result, it is better to halt the diff computation early. Create a new "max_changes" option in struct diff_options. If non-zero, then halt the diff computation after discovering strictly more changed paths. This includes paths corresponding to trees that change. Use this max_changes option in the bloom filter calculations. This reduces the time taken to compute the filters for the Linux kernel repo from 2m50s to 2m35s. On a large internal repository with ~500 commits that perform tree-wide changes, the time reduced from 6m15s to 3m48s. Signed-off-by: Derrick Stolee <redacted> Signed-off-by: Garima Singh <redacted> --- bloom.c | 4 +++- diff.h | 5 +++++ tree-diff.c | 6 ++++++ 3 files changed, 14 insertions(+), 1 deletion(-)
diff --git a/bloom.c b/bloom.c
index 6082193a75..818382c03b 100644
--- a/bloom.c
+++ b/bloom.c@@ -134,6 +134,7 @@ struct bloom_filter *get_bloom_filter(struct repository *r, struct bloom_filter_settings settings = DEFAULT_BLOOM_FILTER_SETTINGS; int i; struct diff_options diffopt; + int max_changes = 512; if (!bloom_filters.slab_size) return NULL;
@@ -142,6 +143,7 @@ struct bloom_filter *get_bloom_filter(struct repository *r, repo_diff_setup(r, &diffopt); diffopt.flags.recursive = 1; + diffopt.max_changes = max_changes; diff_setup_done(&diffopt); if (c->parents)
@@ -150,7 +152,7 @@ struct bloom_filter *get_bloom_filter(struct repository *r, diff_tree_oid(NULL, &c->object.oid, "", &diffopt); diffcore_std(&diffopt); - if (diff_queued_diff.nr <= 512) { + if (diff_queued_diff.nr <= max_changes) { struct hashmap pathmap; struct pathmap_hash_entry* e; struct hashmap_iter iter;
diff --git a/diff.h b/diff.h
index 6febe7e365..9443dc1b00 100644
--- a/diff.h
+++ b/diff.h@@ -285,6 +285,11 @@ struct diff_options { /* Number of hexdigits to abbreviate raw format output to. */ int abbrev; + /* If non-zero, then stop computing after this many changes. */ + int max_changes; + /* For internal use only. */ + int num_changes; + int ita_invisible_in_index; /* white-space error highlighting */ #define WSEH_NEW (1<<12)
diff --git a/tree-diff.c b/tree-diff.c
index 33ded7f8b3..f3d303c6e5 100644
--- a/tree-diff.c
+++ b/tree-diff.c@@ -434,6 +434,9 @@ static struct combine_diff_path *ll_diff_tree_paths( if (diff_can_quit_early(opt)) break; + if (opt->max_changes && opt->num_changes > opt->max_changes) + break; + if (opt->pathspec.nr) { skip_uninteresting(&t, base, opt); for (i = 0; i < nparent; i++)
@@ -518,6 +521,7 @@ static struct combine_diff_path *ll_diff_tree_paths( /* t↓ */ update_tree_entry(&t); + opt->num_changes++; } /* t > p[imin] */
@@ -535,6 +539,7 @@ static struct combine_diff_path *ll_diff_tree_paths( skip_emit_tp: /* ∀ pi=p[imin] pi↓ */ update_tp_entries(tp, nparent); + opt->num_changes++; } }
@@ -552,6 +557,7 @@ struct combine_diff_path *diff_tree_paths( const struct object_id **parents_oid, int nparent, struct strbuf *base, struct diff_options *opt) { + opt->num_changes = 0; p = ll_diff_tree_paths(p, oid, parents_oid, nparent, base, opt); /*
--
gitgitgadget