[PATCH 4/5] blame: consult diff process for zero-hunk detection
From: Michael Montalbo via GitGitGadget <hidden>
Date: 2026-05-22 02:11:31
Subsystem:
documentation, the rest · Maintainers:
Jonathan Corbet, Linus Torvalds
From: Michael Montalbo <redacted> When a diff process is configured via diff.<driver>.process, consult it during blame's per-commit diffing. If the process returns zero hunks for a commit's changes to a file, treat the commit as having no changes, causing blame to attribute lines to earlier commits. The subprocess is long-running (one startup cost amortized across the blame traversal), but each commit in the file's history incurs a round-trip to the tool. Signed-off-by: Michael Montalbo <redacted> --- Documentation/gitattributes.adoc | 3 +++ blame.c | 43 +++++++++++++++++++++++++++++--- t/t4080-diff-process.sh | 32 ++++++++++++++++++++++++ 3 files changed, 74 insertions(+), 4 deletions(-)
diff --git a/Documentation/gitattributes.adoc b/Documentation/gitattributes.adoc
index cc724f8c63..7d66fa3aa1 100644
--- a/Documentation/gitattributes.adoc
+++ b/Documentation/gitattributes.adoc@@ -857,6 +857,9 @@ The tool responds with lines of the form If the tool returns zero hunks with `status=success`, Git treats the file as having no changes and produces no diff output. +`git blame` also consults the diff process and skips commits +where it reports zero hunks, attributing lines to earlier commits +instead. Tools should ignore unknown keys in the per-file request to remain forward-compatible.
diff --git a/blame.c b/blame.c
index a3c49d132e..8a5f14db7a 100644
--- a/blame.c
+++ b/blame.c@@ -19,6 +19,8 @@ #include "tag.h" #include "trace2.h" #include "blame.h" +#include "diff-process.h" +#include "userdiff.h" #include "alloc.h" #include "commit-slab.h" #include "bloom.h"
@@ -315,16 +317,47 @@ static struct commit *fake_working_tree_commit(struct repository *r, static int diff_hunks(mmfile_t *file_a, mmfile_t *file_b, - xdl_emit_hunk_consume_func_t hunk_func, void *cb_data, int xdl_opts) + xdl_emit_hunk_consume_func_t hunk_func, void *cb_data, + int xdl_opts, struct index_state *istate, + const char *path) { xpparam_t xpp = {0}; xdemitconf_t xecfg = {0}; xdemitcb_t ecb = {NULL}; + struct xdl_hunk *ext_hunks = NULL; + int ret; xpp.flags = xdl_opts; xecfg.hunk_func = hunk_func; ecb.priv = cb_data; - return xdi_diff(file_a, file_b, &xpp, &xecfg, &ecb); + + if (path && istate) { + struct userdiff_driver *drv; + drv = userdiff_find_by_path(istate, path); + if (drv && drv->process) { + size_t nr = 0; + if (!diff_process_get_hunks(drv, path, + file_a->ptr, file_a->size, + file_b->ptr, file_b->size, + &ext_hunks, &nr)) { + if (!nr) { + /* + * Zero hunks: the diff process + * considers these files equivalent. + * Skip so blame looks past this + * commit. + */ + return 0; + } + xpp.external_hunks = ext_hunks; + xpp.external_hunks_nr = nr; + } + } + } + + ret = xdi_diff(file_a, file_b, &xpp, &xecfg, &ecb); + free(ext_hunks); + return ret; } static const char *get_next_line(const char *start, const char *end)
@@ -1961,7 +1994,8 @@ static void pass_blame_to_parent(struct blame_scoreboard *sb, &sb->num_read_blob, ignore_diffs); sb->num_get_patch++; - if (diff_hunks(&file_p, &file_o, blame_chunk_cb, &d, sb->xdl_opts)) + if (diff_hunks(&file_p, &file_o, blame_chunk_cb, &d, sb->xdl_opts, + sb->revs->diffopt.repo->index, target->path)) die("unable to generate diff (%s -> %s)", oid_to_hex(&parent->commit->object.oid), oid_to_hex(&target->commit->object.oid));
@@ -2114,7 +2148,8 @@ static void find_copy_in_blob(struct blame_scoreboard *sb, * file_p partially may match that image. */ memset(split, 0, sizeof(struct blame_entry [3])); - if (diff_hunks(file_p, &file_o, handle_split_cb, &d, sb->xdl_opts)) + if (diff_hunks(file_p, &file_o, handle_split_cb, &d, sb->xdl_opts, + NULL, NULL)) die("unable to generate diff (%s)", oid_to_hex(&parent->commit->object.oid)); /* remainder, if any, all match the preimage */
diff --git a/t/t4080-diff-process.sh b/t/t4080-diff-process.sh
index 6f49f4e66b..5ed644b786 100755
--- a/t/t4080-diff-process.sh
+++ b/t/t4080-diff-process.sh@@ -335,4 +335,36 @@ test_expect_success PYTHON 'diff process zero hunks suppresses diff output' ' test_must_be_empty actual ' +test_expect_success PYTHON 'blame skips commits with zero hunks from diff process' ' + cat >blame.c <<-\EOF && + int main(void) + { + return 0; + } + EOF + git add blame.c && + git commit -m "add blame.c" && + + cat >blame.c <<-\EOF && + int main(void) + { + return 0; + } + EOF + git add blame.c && + git commit -m "reformat blame.c" && + BLAME_COMMIT=$(git rev-parse --short HEAD) && + + # Without zero-hunk mode, blame attributes the change. + git blame blame.c >without && + grep "$BLAME_COMMIT" without && + + # With zero-hunk mode, the process considers the files equivalent + # and blame skips the reformat commit. + git -c diff.cdiff.process="$BACKEND --mode=zero-hunk" \ + blame blame.c >with && + ! grep "$BLAME_COMMIT" with +' + + test_done
--
gitgitgadget