Thread (94 messages) 94 messages, 5 authors, 2018-02-26

Re: [PATCH v2 06/14] commit-graph: implement git-commit-graph --read

From: SZEDER Gábor <hidden>
Date: 2018-02-02 00:02:45

quoted hunk ↗ jump to hunk
Teach git-commit-graph to read commit graph files and summarize their contents.

Use the --read option to verify the contents of a commit graph file in the
tests.

Signed-off-by: Derrick Stolee <redacted>
---
 Documentation/git-commit-graph.txt |   7 ++
 builtin/commit-graph.c             |  55 +++++++++++++++
 commit-graph.c                     | 138 ++++++++++++++++++++++++++++++++++++-
 commit-graph.h                     |  25 +++++++
 t/t5318-commit-graph.sh            |  28 ++++++--
 5 files changed, 247 insertions(+), 6 deletions(-)
diff --git a/Documentation/git-commit-graph.txt b/Documentation/git-commit-graph.txt
index 3f3790d9a8..09aeaf6c82 100644
--- a/Documentation/git-commit-graph.txt
+++ b/Documentation/git-commit-graph.txt
@@ -10,6 +10,7 @@ SYNOPSIS
 --------
 [verse]
 'git commit-graph' --write <options> [--pack-dir <pack_dir>]
+'git commit-graph' --read <options> [--pack-dir <pack_dir>]
Again, what does this option do?
quoted hunk ↗ jump to hunk
 EXAMPLES
 --------
@@ -20,6 +21,12 @@ EXAMPLES
 $ git commit-graph --write
 ------------------------------------------------
 
+* Read basic information from a graph file.
++
+------------------------------------------------
+$ git commit-graph --read --graph-hash=<hash>
+------------------------------------------------
+
 GIT
 ---
 Part of the linkgit:git[1] suite
diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c
index 7affd512f1..218740b1f8 100644
--- a/builtin/commit-graph.c
+++ b/builtin/commit-graph.c
+int close_commit_graph(struct commit_graph *g)
static, perhaps?  I see it's declared as extern in the headeer file
below, but I don't see it called outside of this source file by the
end of the patch series.
+{
+	if (g->graph_fd < 0)
+		return 0;
+
+	munmap((void *)g->data, g->data_len);
+	g->data = 0;
+
+	close(g->graph_fd);
+	g->graph_fd = -1;
+
+	return 1;
+}
+
+static void free_commit_graph(struct commit_graph **g)
+{
+	if (!g || !*g)
+		return;
+
+	close_commit_graph(*g);
+
+	free(*g);
+	*g = NULL;
+}
+
+struct commit_graph *load_commit_graph_one(const char *graph_file, const char *pack_dir)
+{
+	void *graph_map;
+	const unsigned char *data;
+	struct commit_graph_header *hdr;
+	size_t graph_size;
+	struct stat st;
+	uint32_t i;
+	struct commit_graph *graph;
+	int fd = git_open(graph_file);
+	uint64_t last_chunk_offset;
+	uint32_t last_chunk_id;
+
+	if (fd < 0)
+		return 0;
+	if (fstat(fd, &st)) {
+		close(fd);
+		return 0;
+	}
+	graph_size = xsize_t(st.st_size);
+
+	if (graph_size < GRAPH_MIN_SIZE) {
+		close(fd);
+		die("graph file %s is too small", graph_file);
+	}
+	graph_map = xmmap(NULL, graph_size, PROT_READ, MAP_PRIVATE, fd, 0);
+	data = (const unsigned char *)graph_map;
+
+	hdr = graph_map;
+	if (ntohl(hdr->graph_signature) != GRAPH_SIGNATURE) {
+		uint32_t signature = ntohl(hdr->graph_signature);
+		munmap(graph_map, graph_size);
+		close(fd);
+		die("graph signature %X does not match signature %X",
+			signature, GRAPH_SIGNATURE);
+	}
+	if (hdr->graph_version != GRAPH_VERSION) {
+		unsigned char version = hdr->graph_version;
+		munmap(graph_map, graph_size);
+		close(fd);
+		die("graph version %X does not match version %X",
+			version, GRAPH_VERSION);
+	}
+
+	graph = alloc_commit_graph(strlen(pack_dir) + 1);
+
+	graph->hdr = hdr;
+	graph->graph_fd = fd;
+	graph->data = graph_map;
+	graph->data_len = graph_size;
+
+	last_chunk_id = 0;
+	last_chunk_offset = (uint64_t)sizeof(*hdr);
+	for (i = 0; i < hdr->num_chunks; i++) {
+		uint32_t chunk_id = ntohl(*(uint32_t*)(data + sizeof(*hdr) + 12 * i));
+		uint64_t chunk_offset1 = ntohl(*(uint32_t*)(data + sizeof(*hdr) + 12 * i + 4));
+		uint32_t chunk_offset2 = ntohl(*(uint32_t*)(data + sizeof(*hdr) + 12 * i + 8));
There are a lot of magic number in these three lines, but at least
they are all multiples of 4.
quoted hunk ↗ jump to hunk
+		uint64_t chunk_offset = (chunk_offset1 << 32) | chunk_offset2;
+
+		if (chunk_offset > graph_size - GIT_MAX_RAWSZ)
+			die("improper chunk offset %08x%08x", (uint32_t)(chunk_offset >> 32),
+			    (uint32_t)chunk_offset);
+
+		switch (chunk_id) {
+			case GRAPH_CHUNKID_OIDFANOUT:
+				graph->chunk_oid_fanout = data + chunk_offset;
+				break;
+
+			case GRAPH_CHUNKID_OIDLOOKUP:
+				graph->chunk_oid_lookup = data + chunk_offset;
+				break;
+
+			case GRAPH_CHUNKID_DATA:
+				graph->chunk_commit_data = data + chunk_offset;
+				break;
+
+			case GRAPH_CHUNKID_LARGEEDGES:
+				graph->chunk_large_edges = data + chunk_offset;
+				break;
+
+			case 0:
+				break;
+
+			default:
+				free_commit_graph(&graph);
+				die("unrecognized graph chunk id: %08x", chunk_id);
+		}
+
+		if (last_chunk_id == GRAPH_CHUNKID_OIDLOOKUP)
+		{
+			graph->num_commits = (chunk_offset - last_chunk_offset)
+					     / hdr->hash_len;
+		}
+
+		last_chunk_id = chunk_id;
+		last_chunk_offset = chunk_offset;
+	}
+
+	strcpy(graph->pack_dir, pack_dir);
+	return graph;
+}
+
 static void write_graph_chunk_fanout(struct sha1file *f,
 				     struct commit **commits,
 				     int nr_commits)
@@ -361,7 +497,7 @@ struct object_id *construct_commit_graph(const char *pack_dir)
 	sha1close(f, final_hash, CSUM_CLOSE | CSUM_FSYNC);
 
 	f_hash = (struct object_id *)malloc(sizeof(struct object_id));
-	memcpy(f_hash->hash, final_hash, GIT_MAX_RAWSZ);
+	hashcpy(f_hash->hash, final_hash);
Oh, look, I told you it's hashcpy()! ;)
quoted hunk ↗ jump to hunk
 	fname = get_commit_graph_filename_hash(pack_dir, f_hash);
 
 	if (rename(graph_name, fname))
diff --git a/commit-graph.h b/commit-graph.h
index 7b3469a7df..e046ae575c 100644
--- a/commit-graph.h
+++ b/commit-graph.h
@@ -15,6 +15,31 @@ struct commit_graph_header {
 	unsigned char num_chunks;
 };
 
+extern struct commit_graph {
+	int graph_fd;
+
+	const unsigned char *data;
+	size_t data_len;
+
+	const struct commit_graph_header *hdr;
+
+	struct object_id oid;
+
+	uint32_t num_commits;
+
+	const unsigned char *chunk_oid_fanout;
+	const unsigned char *chunk_oid_lookup;
+	const unsigned char *chunk_commit_data;
+	const unsigned char *chunk_large_edges;
+
+	/* something like ".git/objects/pack" */
+	char pack_dir[FLEX_ARRAY]; /* more */
+} *commit_graph;
+
+extern int close_commit_graph(struct commit_graph *g);
+
+extern struct commit_graph *load_commit_graph_one(const char *graph_file, const char *pack_dir);
+
 extern struct object_id *construct_commit_graph(const char *pack_dir);
 
 #endif
diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh
index 6bcd1cc264..da565624e3 100755
--- a/t/t5318-commit-graph.sh
+++ b/t/t5318-commit-graph.sh
@@ -25,11 +25,23 @@ test_expect_success 'create commits and repack' \
      done &&
      git repack'
 
+_graph_read_expect() {
+    cat >expect <<- EOF
+header: 43475048 01 01 14 04
+num_commits: $1
+chunks: oid_fanout oid_lookup commit_metadata large_edges
+pack_dir: $2
+EOF
+}
Style nit: since you are already using the '<<-' operator for the
here-doc, you could indent it with tabs.
+
 test_expect_success 'write graph' \
     'graph1=$(git commit-graph --write) &&
-     test_path_is_file ${packdir}/graph-${graph1}.graph'
+     test_path_is_file ${packdir}/graph-${graph1}.graph &&
+     git commit-graph --read --graph-hash=${graph1} >output &&
+     _graph_read_expect "5" "${packdir}" &&
+     cmp expect output'
Please use the 'test_cmp' helper throughout the tests instead.
If the two files don't match, 'cmp' will only tell you where they
start to differ, whereas 'test_cmp' will actually show the difference.
-t_expect_success 'Add more commits' \
+test_expect_success 'Add more commits' \
This should be squashed into the earlier commit.
quoted hunk ↗ jump to hunk
     'git reset --hard commits/3 &&
      for i in $(test_seq 6 10)
      do
@@ -79,7 +91,10 @@ t_expect_success 'Add more commits' \
 
 test_expect_success 'write graph with merges' \
     'graph2=$(git commit-graph --write) &&
-     test_path_is_file ${packdir}/graph-${graph2}.graph'
+     test_path_is_file ${packdir}/graph-${graph2}.graph &&
+     git commit-graph --read --graph-hash=${graph2} >output &&
+     _graph_read_expect "18" "${packdir}" &&
+     cmp expect output'
 
 test_expect_success 'setup bare repo' \
     'cd .. &&
@@ -87,10 +102,13 @@ test_expect_success 'setup bare repo' \
      cd bare &&
      git config core.graph true &&
      git config pack.threads 1 &&
-     baredir="objects/pack"'
+     baredir="./objects/pack"'
Is this change really necessary?  If it is, then perhaps it should
have been written this way upon its introduction.
 test_expect_success 'write graph in bare repo' \
     'graphbare=$(git commit-graph --write) &&
-     test_path_is_file ${baredir}/graph-${graphbare}.graph'
+     test_path_is_file ${baredir}/graph-${graphbare}.graph &&
+     git commit-graph --read --graph-hash=${graphbare} >output &&
+     _graph_read_expect "18" "${baredir}" &&
+     cmp expect output'
 
 test_done
-- 
2.16.0.15.g9c3cf44.dirty
  
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help