From 7b57429c48826f997ad9c147afc2bc4b9bd8a89b Mon Sep 17 00:00:00 2001 From: Xavi Hernandez Date: Thu, 27 Jun 2024 15:41:19 +0200 Subject: [PATCH] Fix starvation of pending writes in CTDB queues CTDB uses a queue to receive requests and send answers. It works asynchronously using the tevent framework. However there was an issue that gave priority to the receiving side so, when a request was processed and the answer posted to the queue, if another incoming request arrived, it was served before sending the previous answer. This scenario could repeat for long periods of time if the frequency of incoming requests was high enough. Eventually, a small time gap between incoming request gave a chance to process the pending output queue, sending many answers in a burst. This patch makes sure that both queues (input and output) are processed if the event contains the appropriate flag. BUG: https://bugzilla.samba.org/show_bug.cgi?id=15678 RN: Fix unnecessary delays in CTDB while processing requests under high load. Signed-off-by: Xavi Hernandez Reviewed-by: Volker Lendecke Reviewed-by: Martin Schwenke Autobuild-User(master): Volker Lendecke Autobuild-Date(master): Mon Jul 1 09:17:43 UTC 2024 on atb-devel-224 (cherry picked from commit 60550fbe184a5cefa55a8f0bab508f70def7a684) --- ctdb/common/ctdb_io.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/ctdb/common/ctdb_io.c b/ctdb/common/ctdb_io.c index bf8bc73b77d..9ac9b84a7fb 100644 --- a/ctdb/common/ctdb_io.c +++ b/ctdb/common/ctdb_io.c @@ -272,7 +272,7 @@ static void queue_dead(struct tevent_context *ev, struct tevent_immediate *im, /* called when an incoming connection is writeable */ -static void queue_io_write(struct ctdb_queue *queue) +static bool queue_io_write(struct ctdb_queue *queue) { while (queue->out_queue) { struct ctdb_queue_pkt *pkt = queue->out_queue; @@ -294,14 +294,14 @@ static void queue_io_write(struct ctdb_queue *queue) queue->fd = -1; tevent_schedule_immediate(queue->im, queue->ctdb->ev, queue_dead, queue); - return; + return false; } - if (n <= 0) return; + if (n <= 0) return true; if (n != pkt->length) { pkt->length -= n; pkt->data += n; - return; + return true; } DLIST_REMOVE(queue->out_queue, pkt); @@ -310,6 +310,8 @@ static void queue_io_write(struct ctdb_queue *queue) } TEVENT_FD_NOT_WRITEABLE(queue->fde); + + return true; } /* @@ -320,10 +322,13 @@ static void queue_io_handler(struct tevent_context *ev, struct tevent_fd *fde, { struct ctdb_queue *queue = talloc_get_type(private_data, struct ctdb_queue); + if (flags & TEVENT_FD_WRITE) { + if (!queue_io_write(queue)) { + return; + } + } if (flags & TEVENT_FD_READ) { queue_io_read(queue); - } else { - queue_io_write(queue); } } -- GitLab