[PATCH v9 0/5] unpack large blobs in stream
From: Han Xin <hidden>
Date: 2022-01-20 11:22:50
From: Han Xin <redacted>
Changes since v8:
* Rename "assert_no_loose ()" into "test_no_loose ()" in
"t5329-unpack-large-objects.sh". Remove "assert_no_pack ()" and use
"test_dir_is_empty" instead.
* Revert changes to "create_tmpfile()" and error handling is now in
"start_loose_object_common()".
* Remove "finalize_object_file_with_mtime()" which seems to be an overkill
for "write_loose_object()" now.
* Remove the commit "object-file.c: remove the slash for directory_size()",
it can be in a separate patch if necessary.
Han Xin (4):
unpack-objects: low memory footprint for get_data() in dry_run mode
object-file.c: refactor write_loose_object() to several steps
object-file.c: add "stream_loose_object()" to handle large object
unpack-objects: unpack_non_delta_entry() read data in a stream
Ævar Arnfjörð Bjarmason (1):
object-file API: add a format_object_header() function
builtin/index-pack.c | 3 +-
builtin/unpack-objects.c | 110 ++++++++++++++--
bulk-checkin.c | 4 +-
cache.h | 21 +++
http-push.c | 2 +-
object-file.c | 220 +++++++++++++++++++++++++++-----
object-store.h | 9 ++
t/t5328-unpack-large-objects.sh | 65 ++++++++++
8 files changed, 384 insertions(+), 50 deletions(-)
create mode 100755 t/t5328-unpack-large-objects.sh
Range-diff against v8:
1: bd34da5816 ! 1: 6a6c11ba93 unpack-objects: low memory footprint for get_data() in dry_run mode
@@ builtin/unpack-objects.c: static void unpack_delta_entry(enum object_type type,
hi = nr;
while (lo < hi) {
- ## t/t5329-unpack-large-objects.sh (new) ##
+ ## t/t5328-unpack-large-objects.sh (new) ##
@@
+#!/bin/sh
+#
-+# Copyright (c) 2021 Han Xin
++# Copyright (c) 2022 Han Xin
+#
+
+test_description='git unpack-objects with large objects'
@@ t/t5329-unpack-large-objects.sh (new)
+ git init --bare dest.git
+}
+
-+assert_no_loose () {
++test_no_loose () {
+ glob=dest.git/objects/?? &&
+ echo "$glob" >expect &&
+ eval "echo $glob" >actual &&
+ test_cmp expect actual
+}
+
-+assert_no_pack () {
-+ rmdir dest.git/objects/pack
-+}
-+
+test_expect_success "create large objects (1.5 MB) and PACK" '
+ test-tool genrandom foo 1500000 >big-blob &&
+ test_commit --append foo big-blob &&
@@ t/t5329-unpack-large-objects.sh (new)
+test_expect_success 'unpack-objects works with memory limitation in dry-run mode' '
+ prepare_dest &&
+ git -C dest.git unpack-objects -n <test-$PACK.pack &&
-+ assert_no_loose &&
-+ assert_no_pack
++ test_no_loose &&
++ test_dir_is_empty dest.git/objects/pack
+'
+
+test_done
2: f9a4365a7d ! 2: bab9e0402f object-file.c: refactor write_loose_object() to several steps
@@ Commit message
Signed-off-by: Han Xin [off-list ref]
## object-file.c ##
-@@ object-file.c: static void write_object_file_prepare(const struct git_hash_algo *algo,
- algo->final_oid_fn(oid, &c);
- }
-
-+/*
-+ * Move the just written object with proper mtime into its final resting place.
-+ */
-+static int finalize_object_file_with_mtime(const char *tmpfile,
-+ const char *filename,
-+ time_t mtime,
-+ unsigned flags)
-+{
-+ struct utimbuf utb;
-+
-+ if (mtime) {
-+ utb.actime = mtime;
-+ utb.modtime = mtime;
-+ if (utime(tmpfile, &utb) < 0 && !(flags & HASH_SILENT))
-+ warning_errno(_("failed utime() on %s"), tmpfile);
-+ }
-+ return finalize_object_file(tmpfile, filename);
-+}
-+
- /*
- * Move the just written object into its final resting place.
- */
-@@ object-file.c: static inline int directory_size(const char *filename)
- * We want to avoid cross-directory filename renames, because those
- * can have problems on various filesystems (FAT, NFS, Coda).
- */
--static int create_tmpfile(struct strbuf *tmp, const char *filename)
-+static int create_tmpfile(struct strbuf *tmp, const char *filename,
-+ unsigned flags)
- {
- int fd, dirlen = directory_size(filename);
-
-@@ object-file.c: static int create_tmpfile(struct strbuf *tmp, const char *filename)
- strbuf_add(tmp, filename, dirlen);
- strbuf_addstr(tmp, "tmp_obj_XXXXXX");
- fd = git_mkstemp_mode(tmp->buf, 0444);
-- if (fd < 0 && dirlen && errno == ENOENT) {
-+ do {
-+ if (fd >= 0 || !dirlen || errno != ENOENT)
-+ break;
- /*
- * Make sure the directory exists; note that the contents
- * of the buffer are undefined after mkstemp returns an
@@ object-file.c: static int create_tmpfile(struct strbuf *tmp, const char *filename)
- strbuf_reset(tmp);
- strbuf_add(tmp, filename, dirlen - 1);
- if (mkdir(tmp->buf, 0777) && errno != EEXIST)
-- return -1;
-+ break;
- if (adjust_shared_perm(tmp->buf))
-- return -1;
-+ break;
-
- /* Try again */
- strbuf_addstr(tmp, "/tmp_obj_XXXXXX");
- fd = git_mkstemp_mode(tmp->buf, 0444);
-+ } while (0);
-+
-+ if (fd < 0 && !(flags & HASH_SILENT)) {
-+ if (errno == EACCES)
-+ return error(_("insufficient permission for adding an "
-+ "object to repository database %s"),
-+ get_object_directory());
-+ else
-+ return error_errno(_("unable to create temporary file"));
- }
-+
return fd;
}
@@ object-file.c: static int create_tmpfile(struct strbuf *tmp, const char *filenam
+ git_zstream *stream,
+ unsigned char *buf, size_t buflen,
+ git_hash_ctx *c,
-+ enum object_type type, size_t len,
+ char *hdr, int hdrlen)
+{
+ int fd;
+
-+ fd = create_tmpfile(tmp_file, filename, flags);
-+ if (fd < 0)
-+ return -1;
++ fd = create_tmpfile(tmp_file, filename);
++ if (fd < 0) {
++ if (flags & HASH_SILENT)
++ return -1;
++ else if (errno == EACCES)
++ return error(_("insufficient permission for adding "
++ "an object to repository database %s"),
++ get_object_directory());
++ else
++ return error_errno(
++ _("unable to create temporary file"));
++ }
+
+ /* Setup zlib stream for compression */
+ git_deflate_init(stream, zlib_compression_level);
@@ object-file.c: static int write_loose_object(const struct object_id *oid, char *
+ */
+ fd = start_loose_object_common(&tmp_file, filename.buf, flags,
+ &stream, compressed, sizeof(compressed),
-+ &c, OBJ_NONE, 0, hdr, hdrlen);
++ &c, hdr, hdrlen);
+ if (fd < 0)
+ return -1;
@@ object-file.c: static int write_loose_object(const struct object_id *oid, char *
if (!oideq(oid, ¶no_oid))
die(_("confused by unstable object source data for %s"),
oid_to_hex(oid));
-
- close_loose_object(fd);
-
-- if (mtime) {
-- struct utimbuf utb;
-- utb.actime = mtime;
-- utb.modtime = mtime;
-- if (utime(tmp_file.buf, &utb) < 0 &&
-- !(flags & HASH_SILENT))
-- warning_errno(_("failed utime() on %s"), tmp_file.buf);
-- }
--
-- return finalize_object_file(tmp_file.buf, filename.buf);
-+ return finalize_object_file_with_mtime(tmp_file.buf, filename.buf,
-+ mtime, flags);
- }
-
- static int freshen_loose_object(const struct object_id *oid)
3: 18dd21122d < -: ---------- object-file.c: remove the slash for directory_size()
4: 964715451b ! 3: dd13614985 object-file.c: add "stream_loose_object()" to handle large object
@@ object-file.c: static int freshen_packed_object(const struct object_id *oid)
+ */
+ fd = start_loose_object_common(&tmp_file, filename.buf, 0,
+ &stream, compressed, sizeof(compressed),
-+ &c, OBJ_BLOB, len, hdr, hdrlen);
++ &c, hdr, hdrlen);
+ if (fd < 0) {
+ err = -1;
+ goto cleanup;
5: 3f620466fe ! 4: cd84e27b08 unpack-objects: unpack_non_delta_entry() read data in a stream
@@ builtin/unpack-objects.c: static void added_object(unsigned nr, enum object_type
write_object(nr, type, buf, size);
}
- ## t/t5329-unpack-large-objects.sh ##
-@@ t/t5329-unpack-large-objects.sh: test_description='git unpack-objects with large objects'
+ ## t/t5328-unpack-large-objects.sh ##
+@@ t/t5328-unpack-large-objects.sh: test_description='git unpack-objects with large objects'
prepare_dest () {
test_when_finished "rm -rf dest.git" &&
@@ t/t5329-unpack-large-objects.sh: test_description='git unpack-objects with large
+ fi
}
- assert_no_loose () {
-@@ t/t5329-unpack-large-objects.sh: test_expect_success 'set memory limitation to 1MB' '
+ test_no_loose () {
+@@ t/t5328-unpack-large-objects.sh: test_expect_success 'set memory limitation to 1MB' '
'
test_expect_success 'unpack-objects failed under memory limitation' '
@@ t/t5329-unpack-large-objects.sh: test_expect_success 'set memory limitation to 1
- prepare_dest &&
+ prepare_dest 2m &&
git -C dest.git unpack-objects -n <test-$PACK.pack &&
- assert_no_loose &&
- assert_no_pack
+ test_no_loose &&
+ test_dir_is_empty dest.git/objects/pack
'
+test_expect_success 'unpack big object in stream' '
+ prepare_dest 1m &&
+ git -C dest.git unpack-objects <test-$PACK.pack &&
-+ assert_no_pack
++ test_dir_is_empty dest.git/objects/pack
+'
+
+test_expect_success 'do not unpack existing large objects' '
+ prepare_dest 1m &&
+ git -C dest.git index-pack --stdin <test-$PACK.pack &&
+ git -C dest.git unpack-objects <test-$PACK.pack &&
-+ assert_no_loose
++ test_no_loose
+'
+
test_done
6: 8073a3888d = 5: 59f0ad95c7 object-file API: add a format_object_header() function
--
2.34.1.52.gc288e771b4.agit.6.5.6