Thread (28 messages) 28 messages, 3 authors, 7h ago
HOTtoday
Revisions (2)
  1. v1 current
  2. v2 [diff vs current]

[PATCH 4/5] blame: consult diff process for zero-hunk detection

From: Michael Montalbo via GitGitGadget <hidden>
Date: 2026-05-22 02:11:31
Subsystem: documentation, the rest · Maintainers: Jonathan Corbet, Linus Torvalds

From: Michael Montalbo <redacted>

When a diff process is configured via diff.<driver>.process,
consult it during blame's per-commit diffing.  If the process
returns zero hunks for a commit's changes to a file, treat the
commit as having no changes, causing blame to attribute lines
to earlier commits.

The subprocess is long-running (one startup cost amortized
across the blame traversal), but each commit in the file's
history incurs a round-trip to the tool.

Signed-off-by: Michael Montalbo <redacted>
---
 Documentation/gitattributes.adoc |  3 +++
 blame.c                          | 43 +++++++++++++++++++++++++++++---
 t/t4080-diff-process.sh          | 32 ++++++++++++++++++++++++
 3 files changed, 74 insertions(+), 4 deletions(-)
diff --git a/Documentation/gitattributes.adoc b/Documentation/gitattributes.adoc
index cc724f8c63..7d66fa3aa1 100644
--- a/Documentation/gitattributes.adoc
+++ b/Documentation/gitattributes.adoc
@@ -857,6 +857,9 @@ The tool responds with lines of the form
 
 If the tool returns zero hunks with `status=success`, Git treats
 the file as having no changes and produces no diff output.
+`git blame` also consults the diff process and skips commits
+where it reports zero hunks, attributing lines to earlier commits
+instead.
 
 Tools should ignore unknown keys in the per-file request to
 remain forward-compatible.
diff --git a/blame.c b/blame.c
index a3c49d132e..8a5f14db7a 100644
--- a/blame.c
+++ b/blame.c
@@ -19,6 +19,8 @@
 #include "tag.h"
 #include "trace2.h"
 #include "blame.h"
+#include "diff-process.h"
+#include "userdiff.h"
 #include "alloc.h"
 #include "commit-slab.h"
 #include "bloom.h"
@@ -315,16 +317,47 @@ static struct commit *fake_working_tree_commit(struct repository *r,
 
 
 static int diff_hunks(mmfile_t *file_a, mmfile_t *file_b,
-		      xdl_emit_hunk_consume_func_t hunk_func, void *cb_data, int xdl_opts)
+		      xdl_emit_hunk_consume_func_t hunk_func, void *cb_data,
+		      int xdl_opts, struct index_state *istate,
+		      const char *path)
 {
 	xpparam_t xpp = {0};
 	xdemitconf_t xecfg = {0};
 	xdemitcb_t ecb = {NULL};
+	struct xdl_hunk *ext_hunks = NULL;
+	int ret;
 
 	xpp.flags = xdl_opts;
 	xecfg.hunk_func = hunk_func;
 	ecb.priv = cb_data;
-	return xdi_diff(file_a, file_b, &xpp, &xecfg, &ecb);
+
+	if (path && istate) {
+		struct userdiff_driver *drv;
+		drv = userdiff_find_by_path(istate, path);
+		if (drv && drv->process) {
+			size_t nr = 0;
+			if (!diff_process_get_hunks(drv, path,
+						    file_a->ptr, file_a->size,
+						    file_b->ptr, file_b->size,
+						    &ext_hunks, &nr)) {
+				if (!nr) {
+					/*
+					 * Zero hunks: the diff process
+					 * considers these files equivalent.
+					 * Skip so blame looks past this
+					 * commit.
+					 */
+					return 0;
+				}
+				xpp.external_hunks = ext_hunks;
+				xpp.external_hunks_nr = nr;
+			}
+		}
+	}
+
+	ret = xdi_diff(file_a, file_b, &xpp, &xecfg, &ecb);
+	free(ext_hunks);
+	return ret;
 }
 
 static const char *get_next_line(const char *start, const char *end)
@@ -1961,7 +1994,8 @@ static void pass_blame_to_parent(struct blame_scoreboard *sb,
 			 &sb->num_read_blob, ignore_diffs);
 	sb->num_get_patch++;
 
-	if (diff_hunks(&file_p, &file_o, blame_chunk_cb, &d, sb->xdl_opts))
+	if (diff_hunks(&file_p, &file_o, blame_chunk_cb, &d, sb->xdl_opts,
+		       sb->revs->diffopt.repo->index, target->path))
 		die("unable to generate diff (%s -> %s)",
 		    oid_to_hex(&parent->commit->object.oid),
 		    oid_to_hex(&target->commit->object.oid));
@@ -2114,7 +2148,8 @@ static void find_copy_in_blob(struct blame_scoreboard *sb,
 	 * file_p partially may match that image.
 	 */
 	memset(split, 0, sizeof(struct blame_entry [3]));
-	if (diff_hunks(file_p, &file_o, handle_split_cb, &d, sb->xdl_opts))
+	if (diff_hunks(file_p, &file_o, handle_split_cb, &d, sb->xdl_opts,
+		       NULL, NULL))
 		die("unable to generate diff (%s)",
 		    oid_to_hex(&parent->commit->object.oid));
 	/* remainder, if any, all match the preimage */
diff --git a/t/t4080-diff-process.sh b/t/t4080-diff-process.sh
index 6f49f4e66b..5ed644b786 100755
--- a/t/t4080-diff-process.sh
+++ b/t/t4080-diff-process.sh
@@ -335,4 +335,36 @@ test_expect_success PYTHON 'diff process zero hunks suppresses diff output' '
 	test_must_be_empty actual
 '
 
+test_expect_success PYTHON 'blame skips commits with zero hunks from diff process' '
+	cat >blame.c <<-\EOF &&
+	int main(void)
+	{
+	    return 0;
+	}
+	EOF
+	git add blame.c &&
+	git commit -m "add blame.c" &&
+
+	cat >blame.c <<-\EOF &&
+	int main(void)
+	{
+	        return 0;
+	}
+	EOF
+	git add blame.c &&
+	git commit -m "reformat blame.c" &&
+	BLAME_COMMIT=$(git rev-parse --short HEAD) &&
+
+	# Without zero-hunk mode, blame attributes the change.
+	git blame blame.c >without &&
+	grep "$BLAME_COMMIT" without &&
+
+	# With zero-hunk mode, the process considers the files equivalent
+	# and blame skips the reformat commit.
+	git -c diff.cdiff.process="$BACKEND --mode=zero-hunk" \
+		blame blame.c >with &&
+	! grep "$BLAME_COMMIT" with
+'
+
+
 test_done
-- 
gitgitgadget
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help