From 6b2129736858f32b595c17f1a4fd282a7069e701 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Thu, 8 Aug 2019 16:20:44 +1000 Subject: [PATCH 1/7] ctdb-daemon: Add function ctdb_ip_to_node() This is the core logic from ctdb_ip_to_pnn(), so re-implement that that function using ctdb_ip_to_node(). Something similar (ctdb_ip_to_nodeid()) was recently removed in commit 010c1d77cd7e192b1fff39b7b91fccbdbbf4a786 because it wasn't required. Now there is a use case. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14084 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 3acb8e9d1c854b577d6be282257269df83055d31) --- ctdb/include/ctdb_private.h | 2 ++ ctdb/server/ctdb_server.c | 24 +++++++++++++++++++----- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h index 2bcc7c94156..1e9619faddf 100644 --- a/ctdb/include/ctdb_private.h +++ b/ctdb/include/ctdb_private.h @@ -831,6 +831,8 @@ void ctdb_stop_recoverd(struct ctdb_context *ctdb); int ctdb_set_transport(struct ctdb_context *ctdb, const char *transport); +struct ctdb_node *ctdb_ip_to_node(struct ctdb_context *ctdb, + const ctdb_sock_addr *nodeip); uint32_t ctdb_ip_to_pnn(struct ctdb_context *ctdb, const ctdb_sock_addr *nodeip); diff --git a/ctdb/server/ctdb_server.c b/ctdb/server/ctdb_server.c index dcd761a2961..9724d1fe0a8 100644 --- a/ctdb/server/ctdb_server.c +++ b/ctdb/server/ctdb_server.c @@ -45,9 +45,9 @@ int ctdb_set_transport(struct ctdb_context *ctdb, const char *transport) return 0; } -/* Return the PNN for nodeip, CTDB_UNKNOWN_PNN if nodeip is invalid */ -uint32_t ctdb_ip_to_pnn(struct ctdb_context *ctdb, - const ctdb_sock_addr *nodeip) +/* Return the node structure for nodeip, NULL if nodeip is invalid */ +struct ctdb_node *ctdb_ip_to_node(struct ctdb_context *ctdb, + const ctdb_sock_addr *nodeip) { unsigned int nodeid; @@ -56,11 +56,25 @@ uint32_t ctdb_ip_to_pnn(struct ctdb_context *ctdb, continue; } if (ctdb_same_ip(&ctdb->nodes[nodeid]->address, nodeip)) { - return ctdb->nodes[nodeid]->pnn; + return ctdb->nodes[nodeid]; } } - return CTDB_UNKNOWN_PNN; + return NULL; +} + +/* Return the PNN for nodeip, CTDB_UNKNOWN_PNN if nodeip is invalid */ +uint32_t ctdb_ip_to_pnn(struct ctdb_context *ctdb, + const ctdb_sock_addr *nodeip) +{ + struct ctdb_node *node; + + node = ctdb_ip_to_node(ctdb, nodeip); + if (node == NULL) { + return CTDB_UNKNOWN_PNN; + } + + return node->pnn; } /* Load a nodes list file into a nodes array */ -- 2.23.0.rc1 From f98e111f16c007ea4482cc8a19baa0e558f97df0 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Fri, 9 Aug 2019 15:06:34 +1000 Subject: [PATCH 2/7] ctdb-tcp: Rename fd -> out_fd in_fd is coming soon. Fix coding style violations in the affected and adjacent lines. Modernise some debug macros and make them more consistent (e.g. drop logging of errno when strerror(errno) is already logged. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14084 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit c06620169fc178ea6db2631f03edf008285d8cf2) --- ctdb/tcp/ctdb_tcp.h | 2 +- ctdb/tcp/tcp_connect.c | 97 +++++++++++++++++++++++++----------------- ctdb/tcp/tcp_init.c | 22 ++++++---- 3 files changed, 72 insertions(+), 49 deletions(-) diff --git a/ctdb/tcp/ctdb_tcp.h b/ctdb/tcp/ctdb_tcp.h index 0a998c94da4..acd343fb8f3 100644 --- a/ctdb/tcp/ctdb_tcp.h +++ b/ctdb/tcp/ctdb_tcp.h @@ -39,7 +39,7 @@ struct ctdb_incoming { state associated with one tcp node */ struct ctdb_tcp_node { - int fd; + int out_fd; struct ctdb_queue *out_queue; struct tevent_fd *connect_fde; struct tevent_timer *connect_te; diff --git a/ctdb/tcp/tcp_connect.c b/ctdb/tcp/tcp_connect.c index d757abdf26c..4253f3bef7c 100644 --- a/ctdb/tcp/tcp_connect.c +++ b/ctdb/tcp/tcp_connect.c @@ -50,9 +50,9 @@ void ctdb_tcp_stop_connection(struct ctdb_node *node) talloc_free(tnode->connect_fde); tnode->connect_fde = NULL; tnode->connect_te = NULL; - if (tnode->fd != -1) { - close(tnode->fd); - tnode->fd = -1; + if (tnode->out_fd != -1) { + close(tnode->out_fd); + tnode->out_fd = -1; } } @@ -93,12 +93,13 @@ static void ctdb_node_connect_write(struct tevent_context *ev, int error = 0; socklen_t len = sizeof(error); int one = 1; + int ret; talloc_free(tnode->connect_te); tnode->connect_te = NULL; - if (getsockopt(tnode->fd, SOL_SOCKET, SO_ERROR, &error, &len) != 0 || - error != 0) { + ret = getsockopt(tnode->out_fd, SOL_SOCKET, SO_ERROR, &error, &len); + if (ret != 0 || error != 0) { ctdb_tcp_stop_connection(node); tnode->connect_te = tevent_add_timer(ctdb->ev, tnode, timeval_current_ofs(1, 0), @@ -109,19 +110,28 @@ static void ctdb_node_connect_write(struct tevent_context *ev, talloc_free(tnode->connect_fde); tnode->connect_fde = NULL; - if (setsockopt(tnode->fd,IPPROTO_TCP,TCP_NODELAY,(char *)&one,sizeof(one)) == -1) { - DEBUG(DEBUG_WARNING, ("Failed to set TCP_NODELAY on fd - %s\n", - strerror(errno))); + ret = setsockopt(tnode->out_fd, + IPPROTO_TCP, + TCP_NODELAY, + (char *)&one, + sizeof(one)); + if (ret == -1) { + DBG_WARNING("Failed to set TCP_NODELAY on fd - %s\n", + strerror(errno)); } - if (setsockopt(tnode->fd,SOL_SOCKET,SO_KEEPALIVE,(char *)&one,sizeof(one)) == -1) { - DEBUG(DEBUG_WARNING, ("Failed to set KEEPALIVE on fd - %s\n", - strerror(errno))); + ret = setsockopt(tnode->out_fd, + SOL_SOCKET, + SO_KEEPALIVE,(char *)&one, + sizeof(one)); + if (ret == -1) { + DBG_WARNING("Failed to set KEEPALIVE on fd - %s\n", + strerror(errno)); } - ctdb_queue_set_fd(tnode->out_queue, tnode->fd); + ctdb_queue_set_fd(tnode->out_queue, tnode->out_fd); /* the queue subsystem now owns this fd */ - tnode->fd = -1; + tnode->out_fd = -1; /* tell the ctdb layer we are connected */ node->ctdb->upcalls->node_connected(node); @@ -149,26 +159,24 @@ void ctdb_tcp_node_connect(struct tevent_context *ev, struct tevent_timer *te, sock_out = node->address; - tnode->fd = socket(sock_out.sa.sa_family, SOCK_STREAM, IPPROTO_TCP); - if (tnode->fd == -1) { - DEBUG(DEBUG_ERR, (__location__ " Failed to create socket\n")); + tnode->out_fd = socket(sock_out.sa.sa_family, SOCK_STREAM, IPPROTO_TCP); + if (tnode->out_fd == -1) { + DBG_ERR("Failed to create socket\n"); return; } - ret = set_blocking(tnode->fd, false); + ret = set_blocking(tnode->out_fd, false); if (ret != 0) { - DEBUG(DEBUG_ERR, - (__location__ - " failed to set socket non-blocking (%s)\n", - strerror(errno))); - close(tnode->fd); - tnode->fd = -1; + DBG_ERR("Failed to set socket non-blocking (%s)\n", + strerror(errno)); + close(tnode->out_fd); + tnode->out_fd = -1; return; } - set_close_on_exec(tnode->fd); + set_close_on_exec(tnode->out_fd); - DEBUG(DEBUG_DEBUG, (__location__ " Created TCP SOCKET FD:%d\n", tnode->fd)); + DBG_DEBUG("Created TCP SOCKET FD:%d\n", tnode->out_fd); /* Bind our side of the socketpair to the same address we use to listen * on incoming CTDB traffic. @@ -197,39 +205,48 @@ void ctdb_tcp_node_connect(struct tevent_context *ev, struct tevent_timer *te, default: DEBUG(DEBUG_ERR, (__location__ " unknown family %u\n", sock_in.sa.sa_family)); - close(tnode->fd); - tnode->fd = -1; + close(tnode->out_fd); + tnode->out_fd = -1; return; } - if (bind(tnode->fd, (struct sockaddr *)&sock_in, sockin_size) == -1) { - DEBUG(DEBUG_ERR, (__location__ " Failed to bind socket %s(%d)\n", - strerror(errno), errno)); - close(tnode->fd); - tnode->fd = -1; + ret = bind(tnode->out_fd, (struct sockaddr *)&sock_in, sockin_size); + if (ret == -1) { + DBG_ERR("Failed to bind socket (%s)\n", strerror(errno)); + close(tnode->out_fd); + tnode->out_fd = -1; return; } - if (connect(tnode->fd, (struct sockaddr *)&sock_out, sockout_size) != 0 && - errno != EINPROGRESS) { + ret = connect(tnode->out_fd, + (struct sockaddr *)&sock_out, + sockout_size); + if (ret != 0 && errno != EINPROGRESS) { ctdb_tcp_stop_connection(node); - tnode->connect_te = tevent_add_timer(ctdb->ev, tnode, + tnode->connect_te = tevent_add_timer(ctdb->ev, + tnode, timeval_current_ofs(1, 0), - ctdb_tcp_node_connect, node); + ctdb_tcp_node_connect, + node); return; } /* non-blocking connect - wait for write event */ - tnode->connect_fde = tevent_add_fd(node->ctdb->ev, tnode, tnode->fd, + tnode->connect_fde = tevent_add_fd(node->ctdb->ev, + tnode, + tnode->out_fd, TEVENT_FD_WRITE|TEVENT_FD_READ, - ctdb_node_connect_write, node); + ctdb_node_connect_write, + node); /* don't give it long to connect - retry in one second. This ensures that we find a node is up quickly (tcp normally backs off a syn reply delay by quite a lot) */ - tnode->connect_te = tevent_add_timer(ctdb->ev, tnode, + tnode->connect_te = tevent_add_timer(ctdb->ev, + tnode, timeval_current_ofs(1, 0), - ctdb_tcp_node_connect, node); + ctdb_tcp_node_connect, + node); } /* diff --git a/ctdb/tcp/tcp_init.c b/ctdb/tcp/tcp_init.c index 87d628aba93..6b1299f32b5 100644 --- a/ctdb/tcp/tcp_init.c +++ b/ctdb/tcp/tcp_init.c @@ -38,16 +38,16 @@ static int tnode_destructor(struct ctdb_tcp_node *tnode) { // struct ctdb_node *node = talloc_find_parent_bytype(tnode, struct ctdb_node); - if (tnode->fd != -1) { - close(tnode->fd); - tnode->fd = -1; + if (tnode->out_fd != -1) { + close(tnode->out_fd); + tnode->out_fd = -1; } return 0; } /* - initialise tcp portion of a ctdb node + initialise tcp portion of a ctdb node */ static int ctdb_tcp_add_node(struct ctdb_node *node) { @@ -55,13 +55,19 @@ static int ctdb_tcp_add_node(struct ctdb_node *node) tnode = talloc_zero(node, struct ctdb_tcp_node); CTDB_NO_MEMORY(node->ctdb, tnode); - tnode->fd = -1; + tnode->out_fd = -1; node->private_data = tnode; talloc_set_destructor(tnode, tnode_destructor); - tnode->out_queue = ctdb_queue_setup(node->ctdb, node, tnode->fd, CTDB_TCP_ALIGNMENT, - ctdb_tcp_tnode_cb, node, "to-node-%s", node->name); - + tnode->out_queue = ctdb_queue_setup(node->ctdb, + node, + tnode->out_fd, + CTDB_TCP_ALIGNMENT, + ctdb_tcp_tnode_cb, + node, + "to-node-%s", + node->name); + return 0; } -- 2.23.0.rc1 From 1df84d21471886ec9997344304d60133057e8e11 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Fri, 9 Aug 2019 15:29:36 +1000 Subject: [PATCH 3/7] ctdb-tcp: Move incoming fd and queue into struct ctdb_tcp_node This makes it easy to track both incoming and outgoing connectivity states. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14084 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit c68b6f96f26664459187ab2fbd56767fb31767e0) --- ctdb/tcp/ctdb_tcp.h | 14 ++++----- ctdb/tcp/tcp_connect.c | 64 +++++++++++++++++++++++++++--------------- ctdb/tcp/tcp_init.c | 8 ++++++ ctdb/tcp/tcp_io.c | 9 ++++-- 4 files changed, 61 insertions(+), 34 deletions(-) diff --git a/ctdb/tcp/ctdb_tcp.h b/ctdb/tcp/ctdb_tcp.h index acd343fb8f3..9a615fc6393 100644 --- a/ctdb/tcp/ctdb_tcp.h +++ b/ctdb/tcp/ctdb_tcp.h @@ -26,23 +26,19 @@ struct ctdb_tcp { int listen_fd; }; -/* - state associated with an incoming connection -*/ -struct ctdb_incoming { - struct ctdb_context *ctdb; - int fd; - struct ctdb_queue *queue; -}; - /* state associated with one tcp node */ struct ctdb_tcp_node { int out_fd; struct ctdb_queue *out_queue; + struct tevent_fd *connect_fde; struct tevent_timer *connect_te; + + struct ctdb_context *ctdb; + int in_fd; + struct ctdb_queue *in_queue; }; diff --git a/ctdb/tcp/tcp_connect.c b/ctdb/tcp/tcp_connect.c index 4253f3bef7c..866454f3d29 100644 --- a/ctdb/tcp/tcp_connect.c +++ b/ctdb/tcp/tcp_connect.c @@ -262,8 +262,8 @@ static void ctdb_listen_event(struct tevent_context *ev, struct tevent_fd *fde, ctdb_sock_addr addr; socklen_t len; int fd; - uint32_t pnn; - struct ctdb_incoming *in; + struct ctdb_node *node; + struct ctdb_tcp_node *tnode; int one = 1; int ret; @@ -273,41 +273,61 @@ static void ctdb_listen_event(struct tevent_context *ev, struct tevent_fd *fde, if (fd == -1) return; smb_set_close_on_exec(fd); - pnn = ctdb_ip_to_pnn(ctdb, &addr); - - if (pnn == CTDB_UNKNOWN_PNN) { + node = ctdb_ip_to_node(ctdb, &addr); + if (node == NULL) { D_ERR("Refused connection from unknown node %s\n", ctdb_addr_to_str(&addr)); close(fd); return; } - in = talloc_zero(ctcp, struct ctdb_incoming); - in->fd = fd; - in->ctdb = ctdb; + tnode = talloc_get_type_abort(node->private_data, + struct ctdb_tcp_node); + if (tnode == NULL) { + /* This can't happen - see ctdb_tcp_initialise() */ + DBG_ERR("INTERNAL ERROR setting up connection from node %s\n", + ctdb_addr_to_str(&addr)); + close(fd); + return; + } - ret = set_blocking(in->fd, false); + ret = set_blocking(fd, false); if (ret != 0) { - DEBUG(DEBUG_ERR, - (__location__ - " failed to set socket non-blocking (%s)\n", - strerror(errno))); - close(in->fd); - in->fd = -1; + DBG_ERR("Failed to set socket non-blocking (%s)\n", + strerror(errno)); + close(fd); return; } - set_close_on_exec(in->fd); + set_close_on_exec(fd); - DEBUG(DEBUG_DEBUG, (__location__ " Created SOCKET FD:%d to incoming ctdb connection\n", fd)); + DBG_DEBUG("Created SOCKET FD:%d to incoming ctdb connection\n", fd); - if (setsockopt(in->fd,SOL_SOCKET,SO_KEEPALIVE,(char *)&one,sizeof(one)) == -1) { - DEBUG(DEBUG_WARNING, ("Failed to set KEEPALIVE on fd - %s\n", - strerror(errno))); + ret = setsockopt(fd, + SOL_SOCKET, + SO_KEEPALIVE, + (char *)&one, + sizeof(one)); + if (ret == -1) { + DBG_WARNING("Failed to set KEEPALIVE on fd - %s\n", + strerror(errno)); + } + + tnode->in_queue = ctdb_queue_setup(ctdb, + tnode, + fd, + CTDB_TCP_ALIGNMENT, + ctdb_tcp_read_cb, + tnode, + "ctdbd-%s", + ctdb_addr_to_str(&addr)); + if (tnode->in_queue == NULL) { + DBG_ERR("Failed to set up incoming queue\n"); + close(fd); + return; } - in->queue = ctdb_queue_setup(ctdb, in, in->fd, CTDB_TCP_ALIGNMENT, - ctdb_tcp_read_cb, in, "ctdbd-%s", ctdb_addr_to_str(&addr)); + tnode->in_fd = fd; } diff --git a/ctdb/tcp/tcp_init.c b/ctdb/tcp/tcp_init.c index 6b1299f32b5..d4b42b0d0f2 100644 --- a/ctdb/tcp/tcp_init.c +++ b/ctdb/tcp/tcp_init.c @@ -43,6 +43,11 @@ static int tnode_destructor(struct ctdb_tcp_node *tnode) tnode->out_fd = -1; } + if (tnode->in_fd != -1) { + close(tnode->in_fd); + tnode->in_fd = -1; + } + return 0; } @@ -56,6 +61,9 @@ static int ctdb_tcp_add_node(struct ctdb_node *node) CTDB_NO_MEMORY(node->ctdb, tnode); tnode->out_fd = -1; + tnode->in_fd = -1; + tnode->ctdb = node->ctdb; + node->private_data = tnode; talloc_set_destructor(tnode, tnode_destructor); diff --git a/ctdb/tcp/tcp_io.c b/ctdb/tcp/tcp_io.c index 0eb8e25eea3..be4558b16ea 100644 --- a/ctdb/tcp/tcp_io.c +++ b/ctdb/tcp/tcp_io.c @@ -37,7 +37,8 @@ */ void ctdb_tcp_read_cb(uint8_t *data, size_t cnt, void *args) { - struct ctdb_incoming *in = talloc_get_type(args, struct ctdb_incoming); + struct ctdb_tcp_node *tnode = talloc_get_type_abort( + args, struct ctdb_tcp_node); struct ctdb_req_header *hdr = (struct ctdb_req_header *)data; if (data == NULL) { @@ -69,11 +70,13 @@ void ctdb_tcp_read_cb(uint8_t *data, size_t cnt, void *args) } /* tell the ctdb layer above that we have a packet */ - in->ctdb->upcalls->recv_pkt(in->ctdb, data, cnt); + tnode->ctdb->upcalls->recv_pkt(tnode->ctdb, data, cnt); return; failed: - TALLOC_FREE(in); + TALLOC_FREE(tnode->in_queue); + close(tnode->in_fd); + tnode->in_fd = -1; TALLOC_FREE(data); } -- 2.23.0.rc1 From 34cc4b0420c825913355f658cd87cc901a42df4a Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Thu, 15 Aug 2019 15:45:16 +1000 Subject: [PATCH 4/7] ctdb-tcp: Use TALLOC_FREE() BUG: https://bugzilla.samba.org/show_bug.cgi?id=14084 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit d80d9edb4dc107b15a35a39e5c966a3eaed6453a) --- ctdb/tcp/tcp_connect.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/ctdb/tcp/tcp_connect.c b/ctdb/tcp/tcp_connect.c index 866454f3d29..16d75481050 100644 --- a/ctdb/tcp/tcp_connect.c +++ b/ctdb/tcp/tcp_connect.c @@ -46,10 +46,8 @@ void ctdb_tcp_stop_connection(struct ctdb_node *node) node->private_data, struct ctdb_tcp_node); ctdb_queue_set_fd(tnode->out_queue, -1); - talloc_free(tnode->connect_te); - talloc_free(tnode->connect_fde); - tnode->connect_fde = NULL; - tnode->connect_te = NULL; + TALLOC_FREE(tnode->connect_te); + TALLOC_FREE(tnode->connect_fde); if (tnode->out_fd != -1) { close(tnode->out_fd); tnode->out_fd = -1; -- 2.23.0.rc1 From 92410448f42b68cdbab0c8ed0989dd9fe1b8c7b3 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Thu, 15 Aug 2019 15:57:31 +1000 Subject: [PATCH 5/7] ctdb-tcp: Create outbound queue when the connection becomes writable Since commit ddd97553f0a8bfaada178ec4a7460d76fa21f079 ctdb_queue_send() doesn't queue a packet if the connection isn't yet established (i.e. when fd == -1). So, don't bother creating the outbound queue during initialisation but create it when the connection becomes writable. Now the presence of the queue indicates that the outbound connection is up. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14084 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 7f4854d9643a096a6d8a354fcd27b7c6ed24a75e) --- ctdb/tcp/tcp_connect.c | 23 ++++++++++++++++++++--- ctdb/tcp/tcp_init.c | 9 --------- ctdb/tcp/tcp_io.c | 5 +++++ 3 files changed, 25 insertions(+), 12 deletions(-) diff --git a/ctdb/tcp/tcp_connect.c b/ctdb/tcp/tcp_connect.c index 16d75481050..4374242015c 100644 --- a/ctdb/tcp/tcp_connect.c +++ b/ctdb/tcp/tcp_connect.c @@ -44,8 +44,8 @@ void ctdb_tcp_stop_connection(struct ctdb_node *node) { struct ctdb_tcp_node *tnode = talloc_get_type( node->private_data, struct ctdb_tcp_node); - - ctdb_queue_set_fd(tnode->out_queue, -1); + + TALLOC_FREE(tnode->out_queue); TALLOC_FREE(tnode->connect_te); TALLOC_FREE(tnode->connect_fde); if (tnode->out_fd != -1) { @@ -126,7 +126,24 @@ static void ctdb_node_connect_write(struct tevent_context *ev, strerror(errno)); } - ctdb_queue_set_fd(tnode->out_queue, tnode->out_fd); + tnode->out_queue = ctdb_queue_setup(node->ctdb, + tnode, + tnode->out_fd, + CTDB_TCP_ALIGNMENT, + ctdb_tcp_tnode_cb, + node, + "to-node-%s", + node->name); + if (tnode->out_queue == NULL) { + DBG_ERR("Failed to set up outgoing queue\n"); + ctdb_tcp_stop_connection(node); + tnode->connect_te = tevent_add_timer(ctdb->ev, + tnode, + timeval_current_ofs(1, 0), + ctdb_tcp_node_connect, + node); + return; + } /* the queue subsystem now owns this fd */ tnode->out_fd = -1; diff --git a/ctdb/tcp/tcp_init.c b/ctdb/tcp/tcp_init.c index d4b42b0d0f2..a9cb9b36a01 100644 --- a/ctdb/tcp/tcp_init.c +++ b/ctdb/tcp/tcp_init.c @@ -67,15 +67,6 @@ static int ctdb_tcp_add_node(struct ctdb_node *node) node->private_data = tnode; talloc_set_destructor(tnode, tnode_destructor); - tnode->out_queue = ctdb_queue_setup(node->ctdb, - node, - tnode->out_fd, - CTDB_TCP_ALIGNMENT, - ctdb_tcp_tnode_cb, - node, - "to-node-%s", - node->name); - return 0; } diff --git a/ctdb/tcp/tcp_io.c b/ctdb/tcp/tcp_io.c index be4558b16ea..e33ed44048e 100644 --- a/ctdb/tcp/tcp_io.c +++ b/ctdb/tcp/tcp_io.c @@ -87,5 +87,10 @@ int ctdb_tcp_queue_pkt(struct ctdb_node *node, uint8_t *data, uint32_t length) { struct ctdb_tcp_node *tnode = talloc_get_type(node->private_data, struct ctdb_tcp_node); + if (tnode->out_queue == NULL) { + DBG_DEBUG("No outgoing connection, dropping packet\n"); + return 0; + } + return ctdb_queue_send(tnode->out_queue, data, length); } -- 2.23.0.rc1 From 002c7996619b3b8329972814e9a16c8436efd1e7 Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Fri, 9 Aug 2019 15:33:05 +1000 Subject: [PATCH 6/7] ctdb-tcp: Only mark a node connected if both directions are up Nodes are currently marked as up if the outgoing connection is established. However, if the incoming connection is not yet established then this node could send a request where the replying node can not queue its reply. Wait until both directions are up before marking a node as connected. BUG: https://bugzilla.samba.org/show_bug.cgi?id=14084 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs (cherry picked from commit 8c98c10f242bc722beffc711e85c0e4f2e74cd57) --- ctdb/tcp/tcp_connect.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/ctdb/tcp/tcp_connect.c b/ctdb/tcp/tcp_connect.c index 4374242015c..fd635b9abf2 100644 --- a/ctdb/tcp/tcp_connect.c +++ b/ctdb/tcp/tcp_connect.c @@ -148,8 +148,14 @@ static void ctdb_node_connect_write(struct tevent_context *ev, /* the queue subsystem now owns this fd */ tnode->out_fd = -1; - /* tell the ctdb layer we are connected */ - node->ctdb->upcalls->node_connected(node); + /* + * Mark the node to which this connection has been established + * as connected, but only if the corresponding listening + * socket is also connected + */ + if (tnode->in_fd != -1) { + node->ctdb->upcalls->node_connected(node); + } } @@ -343,7 +349,15 @@ static void ctdb_listen_event(struct tevent_context *ev, struct tevent_fd *fde, } tnode->in_fd = fd; -} + + /* + * Mark the connecting node as connected, but only if the + * corresponding outbound connected is also up + */ + if (tnode->out_queue != NULL) { + node->ctdb->upcalls->node_connected(node); + } + } /* -- 2.23.0.rc1 From e539555123dd1d7e9e3baaec75c30c6c10b1e01f Mon Sep 17 00:00:00 2001 From: Martin Schwenke Date: Tue, 13 Aug 2019 17:08:43 +1000 Subject: [PATCH 7/7] ctdb-tcp: Mark node as disconnected if incoming connection goes away To make it easy to pass the node data to the upcall, the private data for ctdb_tcp_read_cb() needs to be changed from tnode to node. RN: Avoid marking a node as connected before it can receive packets BUG: https://bugzilla.samba.org/show_bug.cgi?id=14084 Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs Autobuild-User(master): Martin Schwenke Autobuild-Date(master): Fri Aug 16 22:50:35 UTC 2019 on sn-devel-184 (cherry picked from commit 73c850eda4209b688a169aeeb20c453b738cbb35) --- ctdb/tcp/tcp_connect.c | 2 +- ctdb/tcp/tcp_io.c | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/ctdb/tcp/tcp_connect.c b/ctdb/tcp/tcp_connect.c index fd635b9abf2..6123380ca9f 100644 --- a/ctdb/tcp/tcp_connect.c +++ b/ctdb/tcp/tcp_connect.c @@ -339,7 +339,7 @@ static void ctdb_listen_event(struct tevent_context *ev, struct tevent_fd *fde, fd, CTDB_TCP_ALIGNMENT, ctdb_tcp_read_cb, - tnode, + node, "ctdbd-%s", ctdb_addr_to_str(&addr)); if (tnode->in_queue == NULL) { diff --git a/ctdb/tcp/tcp_io.c b/ctdb/tcp/tcp_io.c index e33ed44048e..e8ebff887e1 100644 --- a/ctdb/tcp/tcp_io.c +++ b/ctdb/tcp/tcp_io.c @@ -37,8 +37,9 @@ */ void ctdb_tcp_read_cb(uint8_t *data, size_t cnt, void *args) { + struct ctdb_node *node = talloc_get_type_abort(args, struct ctdb_node); struct ctdb_tcp_node *tnode = talloc_get_type_abort( - args, struct ctdb_tcp_node); + node->private_data, struct ctdb_tcp_node); struct ctdb_req_header *hdr = (struct ctdb_req_header *)data; if (data == NULL) { @@ -77,6 +78,8 @@ failed: TALLOC_FREE(tnode->in_queue); close(tnode->in_fd); tnode->in_fd = -1; + node->ctdb->upcalls->node_dead(node); + TALLOC_FREE(data); } -- 2.23.0.rc1