Thread (33 messages) 33 messages, 2 authors, 2021-04-19

Re: [PATCH 2/5] parallel-checkout: make it truly parallel

From: Christian Couder <hidden>
Date: 2021-03-31 04:33:24

On Wed, Mar 17, 2021 at 10:12 PM Matheus Tavares
[off-list ref] wrote:
quoted hunk ↗ jump to hunk
diff --git a/.gitignore b/.gitignore
index 3dcdb6bb5a..26f8ddfc55 100644
--- a/.gitignore
+++ b/.gitignore
@@ -33,6 +33,7 @@
 /git-check-mailmap
 /git-check-ref-format
 /git-checkout
+/git-checkout--helper
I wonder if "checkout--worker" would be better than "checkout--helper".
 /git-checkout-index
 /git-cherry
 /git-cherry-pick
[...]
+#define ASSERT_PC_ITEM_RESULT_SIZE(got, exp) \
+{ \
+       if (got != exp) \
+               BUG("corrupted result from checkout worker (got %dB, exp %dB)", \
Maybe precompilers are smart enough to not replace the "got" and "exp"
in the above string, but it might be a bit confusing for readers.
Anway I wonder if this macro could just be a regular (possibly inline)
function.
+                   got, exp); \
+} while(0)
+static void parse_and_save_result(const char *line, int len,
+                                 struct pc_worker *worker)
+{
+       struct pc_item_result *res;
+       struct parallel_checkout_item *pc_item;
+       struct stat *st = NULL;
+
+       if (len < PC_ITEM_RESULT_BASE_SIZE)
+               BUG("too short result from checkout worker (got %dB, exp %dB)",
+                   len, (int)PC_ITEM_RESULT_BASE_SIZE);
+
+       res = (struct pc_item_result *)line;
+
+       /*
+        * Worker should send either the full result struct on success, or
+        * just the base (i.e. no stat data), otherwise.
+        */
+       if (res->status == PC_ITEM_WRITTEN) {
+               ASSERT_PC_ITEM_RESULT_SIZE(len, (int)sizeof(struct pc_item_result));
+               st = &res->st;
+       } else {
+               ASSERT_PC_ITEM_RESULT_SIZE(len, (int)PC_ITEM_RESULT_BASE_SIZE);
+       }
+
+       if (!worker->nr_items_to_complete || res->id != worker->next_item_to_complete)
Nit: maybe it could be useful to distinguish between these 2 potential
bugs and have a specific BUG() for each one.
+               BUG("checkout worker sent unexpected item id");
+       worker->next_item_to_complete++;
+       worker->nr_items_to_complete--;
+
+       pc_item = &parallel_checkout.items[res->id];
+       pc_item->status = res->status;
+       if (st)
+               pc_item->st = *st;
+}
+
+
One blank line is enough.
+static void gather_results_from_workers(struct pc_worker *workers,
+                                       int num_workers)
+{
+       int i, active_workers = num_workers;
+       struct pollfd *pfds;
+
+       CALLOC_ARRAY(pfds, num_workers);
+       for (i = 0; i < num_workers; i++) {
+               pfds[i].fd = workers[i].cp.out;
+               pfds[i].events = POLLIN;
+       }
+
+       while (active_workers) {
+               int nr = poll(pfds, num_workers, -1);
+
+               if (nr < 0) {
+                       if (errno == EINTR)
+                               continue;
+                       die_errno("failed to poll checkout workers");
+               }
+
+               for (i = 0; i < num_workers && nr > 0; i++) {
Is it possible that nr is 0? If that happens, it looks like we would
be in an infinite `while (active_workers) { ... }` loop.

Actually in poll(2) there is: "A value of 0 indicates that the call
timed out and no file descriptors were ready." So it seems that it
could, at least theorically, happen.
+                       struct pc_worker *worker = &workers[i];
+                       struct pollfd *pfd = &pfds[i];
+
+                       if (!pfd->revents)
+                               continue;
+
+                       if (pfd->revents & POLLIN) {
+                               int len;
+                               const char *line = packet_read_line(pfd->fd, &len);
+
+                               if (!line) {
+                                       pfd->fd = -1;
+                                       active_workers--;
+                               } else {
+                                       parse_and_save_result(line, len, worker);
+                               }
+                       } else if (pfd->revents & POLLHUP) {
+                               pfd->fd = -1;
+                               active_workers--;
+                       } else if (pfd->revents & (POLLNVAL | POLLERR)) {
+                               die(_("error polling from checkout worker"));
+                       }
+
+                       nr--;
+               }
+       }
+
+       free(pfds);
+}
[...]
+enum pc_item_status {
+       PC_ITEM_PENDING = 0,
+       PC_ITEM_WRITTEN,
+       /*
+        * The entry could not be written because there was another file
+        * already present in its path or leading directories. Since
+        * checkout_entry_ca() removes such files from the working tree before
+        * enqueueing the entry for parallel checkout, it means that there was
+        * a path collision among the entries being written.
+        */
+       PC_ITEM_COLLIDED,
+       PC_ITEM_FAILED,
+};
+
+struct parallel_checkout_item {
+       /*
+        * In main process ce points to a istate->cache[] entry. Thus, it's not
+        * owned by us. In workers they own the memory, which *must be* released.
+        */
+       struct cache_entry *ce;
+       struct conv_attrs ca;
+       size_t id; /* position in parallel_checkout.items[] of main process */
+
+       /* Output fields, sent from workers. */
+       enum pc_item_status status;
+       struct stat st;
+};
Maybe the previous patch could have declared both 'enum
pc_item_status' and 'struct parallel_checkout_item' here, in
parallel-checkout.h, so that this patch wouldn't need to move them
here.
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help