Re: [PATCH 2/5] parallel-checkout: make it truly parallel
From: Christian Couder <hidden>
Date: 2021-03-31 04:33:24
On Wed, Mar 17, 2021 at 10:12 PM Matheus Tavares [off-list ref] wrote:
quoted hunk ↗ jump to hunk
diff --git a/.gitignore b/.gitignore index 3dcdb6bb5a..26f8ddfc55 100644 --- a/.gitignore +++ b/.gitignore@@ -33,6 +33,7 @@ /git-check-mailmap /git-check-ref-format /git-checkout +/git-checkout--helper
I wonder if "checkout--worker" would be better than "checkout--helper".
/git-checkout-index /git-cherry /git-cherry-pick
[...]
+#define ASSERT_PC_ITEM_RESULT_SIZE(got, exp) \
+{ \
+ if (got != exp) \
+ BUG("corrupted result from checkout worker (got %dB, exp %dB)", \Maybe precompilers are smart enough to not replace the "got" and "exp" in the above string, but it might be a bit confusing for readers. Anway I wonder if this macro could just be a regular (possibly inline) function.
+ got, exp); \ +} while(0)
+static void parse_and_save_result(const char *line, int len,
+ struct pc_worker *worker)
+{
+ struct pc_item_result *res;
+ struct parallel_checkout_item *pc_item;
+ struct stat *st = NULL;
+
+ if (len < PC_ITEM_RESULT_BASE_SIZE)
+ BUG("too short result from checkout worker (got %dB, exp %dB)",
+ len, (int)PC_ITEM_RESULT_BASE_SIZE);
+
+ res = (struct pc_item_result *)line;
+
+ /*
+ * Worker should send either the full result struct on success, or
+ * just the base (i.e. no stat data), otherwise.
+ */
+ if (res->status == PC_ITEM_WRITTEN) {
+ ASSERT_PC_ITEM_RESULT_SIZE(len, (int)sizeof(struct pc_item_result));
+ st = &res->st;
+ } else {
+ ASSERT_PC_ITEM_RESULT_SIZE(len, (int)PC_ITEM_RESULT_BASE_SIZE);
+ }
+
+ if (!worker->nr_items_to_complete || res->id != worker->next_item_to_complete)Nit: maybe it could be useful to distinguish between these 2 potential bugs and have a specific BUG() for each one.
+ BUG("checkout worker sent unexpected item id");+ worker->next_item_to_complete++; + worker->nr_items_to_complete--; + + pc_item = ¶llel_checkout.items[res->id]; + pc_item->status = res->status; + if (st) + pc_item->st = *st; +} + +
One blank line is enough.
+static void gather_results_from_workers(struct pc_worker *workers,
+ int num_workers)
+{
+ int i, active_workers = num_workers;
+ struct pollfd *pfds;
+
+ CALLOC_ARRAY(pfds, num_workers);
+ for (i = 0; i < num_workers; i++) {
+ pfds[i].fd = workers[i].cp.out;
+ pfds[i].events = POLLIN;
+ }
+
+ while (active_workers) {
+ int nr = poll(pfds, num_workers, -1);
+
+ if (nr < 0) {
+ if (errno == EINTR)
+ continue;
+ die_errno("failed to poll checkout workers");
+ }
+
+ for (i = 0; i < num_workers && nr > 0; i++) {
Is it possible that nr is 0? If that happens, it looks like we would
be in an infinite `while (active_workers) { ... }` loop.
Actually in poll(2) there is: "A value of 0 indicates that the call
timed out and no file descriptors were ready." So it seems that it
could, at least theorically, happen.
+ struct pc_worker *worker = &workers[i];
+ struct pollfd *pfd = &pfds[i];
+
+ if (!pfd->revents)
+ continue;
+
+ if (pfd->revents & POLLIN) {
+ int len;
+ const char *line = packet_read_line(pfd->fd, &len);
+
+ if (!line) {
+ pfd->fd = -1;
+ active_workers--;
+ } else {
+ parse_and_save_result(line, len, worker);
+ }
+ } else if (pfd->revents & POLLHUP) {
+ pfd->fd = -1;
+ active_workers--;
+ } else if (pfd->revents & (POLLNVAL | POLLERR)) {
+ die(_("error polling from checkout worker"));
+ }
+
+ nr--;
+ }
+ }
+
+ free(pfds);
+}[...]
+enum pc_item_status {
+ PC_ITEM_PENDING = 0,
+ PC_ITEM_WRITTEN,
+ /*
+ * The entry could not be written because there was another file
+ * already present in its path or leading directories. Since
+ * checkout_entry_ca() removes such files from the working tree before
+ * enqueueing the entry for parallel checkout, it means that there was
+ * a path collision among the entries being written.
+ */
+ PC_ITEM_COLLIDED,
+ PC_ITEM_FAILED,
+};
+
+struct parallel_checkout_item {
+ /*
+ * In main process ce points to a istate->cache[] entry. Thus, it's not
+ * owned by us. In workers they own the memory, which *must be* released.
+ */
+ struct cache_entry *ce;
+ struct conv_attrs ca;
+ size_t id; /* position in parallel_checkout.items[] of main process */
+
+ /* Output fields, sent from workers. */
+ enum pc_item_status status;
+ struct stat st;
+};Maybe the previous patch could have declared both 'enum pc_item_status' and 'struct parallel_checkout_item' here, in parallel-checkout.h, so that this patch wouldn't need to move them here.