[Pkg-ofed-commits] [libfabric] 83/123: prov/psm: properly terminate the name server thread

Ana Beatriz Guerrero López ana at moszumanska.debian.org
Sat Oct 22 12:28:33 UTC 2016


This is an automated email from the git hooks/post-receive script.

ana pushed a commit to annotated tag v1.1.1
in repository libfabric.

commit adeea1c66c8f9bfbfc8f529d972bf8dce8486f32
Author: Jianxin Xiong <jianxin.xiong at intel.com>
Date:   Thu Sep 17 23:35:47 2015 -0700

    prov/psm: properly terminate the name server thread
    
    Instead of letting the name server thread running detached, terminate
    it when the associated fabric object is closed. Since the thread is
    started when the fabric object is opened, this change makes the life
    cycle of these two better aligned.
    
    Signed-off-by: Jianxin Xiong <jianxin.xiong at intel.com>
---
 prov/psm/src/psmx.h      |  1 +
 prov/psm/src/psmx_init.c | 34 +++++++++++++++++++++++++++++-----
 prov/psm/src/psmx_util.c | 13 +++++++++++++
 3 files changed, 43 insertions(+), 5 deletions(-)

diff --git a/prov/psm/src/psmx.h b/prov/psm/src/psmx.h
index c152b5d..6b8243d 100644
--- a/prov/psm/src/psmx.h
+++ b/prov/psm/src/psmx.h
@@ -227,6 +227,7 @@ struct psmx_fid_fabric {
 	int			refcnt;
 	struct psmx_fid_domain	*active_domain;
 	psm_uuid_t		uuid;
+	pthread_t		name_server_thread;
 };
 
 struct psmx_fid_domain {
diff --git a/prov/psm/src/psmx_init.c b/prov/psm/src/psmx_init.c
index 4901bbb..64c3e4e 100644
--- a/prov/psm/src/psmx_init.c
+++ b/prov/psm/src/psmx_init.c
@@ -496,11 +496,32 @@ err_out:
 static int psmx_fabric_close(fid_t fid)
 {
 	struct psmx_fid_fabric *fabric;
+	void *exit_code;
+	int ret;
 
 	FI_INFO(&psmx_prov, FI_LOG_CORE, "\n");
 
 	fabric = container_of(fid, struct psmx_fid_fabric, fabric.fid);
 	if (! --fabric->refcnt) {
+		if (psmx_env.name_server &&
+		    !pthread_equal(fabric->name_server_thread, pthread_self())) {
+			ret = pthread_cancel(fabric->name_server_thread);
+			if (ret) {
+				FI_INFO(&psmx_prov, FI_LOG_CORE,
+					"pthread_cancel returns %d\n", ret);
+			}
+			ret = pthread_join(fabric->name_server_thread, &exit_code);
+			if (ret) {
+				FI_INFO(&psmx_prov, FI_LOG_CORE,
+					"pthread_join returns %d\n", ret);
+			}
+			else {
+				FI_INFO(&psmx_prov, FI_LOG_CORE,
+					"name server thread exited with code %ld (%s)\n",
+					(uintptr_t)exit_code,
+					(exit_code == PTHREAD_CANCELED) ? "PTHREAD_CANCELED" : "?");
+			}
+		}
 		if (fabric->active_domain)
 			fi_close(&fabric->active_domain->domain.fid);
 		assert(fabric == psmx_active_fabric);
@@ -528,8 +549,7 @@ static int psmx_fabric(struct fi_fabric_attr *attr,
 		       struct fid_fabric **fabric, void *context)
 {
 	struct psmx_fid_fabric *fabric_priv;
-	pthread_t thread;
-	pthread_attr_t thread_attr;
+	int ret;
 
 	FI_INFO(&psmx_prov, FI_LOG_CORE, "\n");
 
@@ -554,9 +574,13 @@ static int psmx_fabric(struct fi_fabric_attr *attr,
 	psmx_get_uuid(fabric_priv->uuid);
 
 	if (psmx_env.name_server) {
-		pthread_attr_init(&thread_attr);
-		pthread_attr_setdetachstate(&thread_attr,PTHREAD_CREATE_DETACHED);
-		pthread_create(&thread, &thread_attr, psmx_name_server, (void *)fabric_priv);
+		ret = pthread_create(&fabric_priv->name_server_thread, NULL,
+				     psmx_name_server, (void *)fabric_priv);
+		if (ret) {
+			FI_INFO(&psmx_prov, FI_LOG_CORE, "pthread_create returns %d\n", ret);
+			/* use the main thread's ID as invalid value for the new thread */
+			fabric_priv->name_server_thread = pthread_self();
+		}
 	}
 
 	psmx_query_mpi();
diff --git a/prov/psm/src/psmx_util.c b/prov/psm/src/psmx_util.c
index 520e7c9..745e3b0 100644
--- a/prov/psm/src/psmx_util.c
+++ b/prov/psm/src/psmx_util.c
@@ -98,6 +98,12 @@ char *psmx_uuid_to_string(psm_uuid_t uuid)
  * have the transport address of the server in the "dest_addr"
  * field. Both sides have to use the same UUID.
  *************************************************************/
+static void psmx_name_server_cleanup(void *arg)
+{
+	FI_INFO(&psmx_prov, FI_LOG_CORE, "\n");
+	close((uintptr_t)arg);
+}
+
 void *psmx_name_server(void *args)
 {
 	struct psmx_fid_fabric *fabric;
@@ -116,6 +122,8 @@ void *psmx_name_server(void *args)
 	fabric = args;
 	port = psmx_uuid_to_port(fabric->uuid);
 
+	FI_INFO(&psmx_prov, FI_LOG_CORE, "port: %d\n", port);
+
 	if (asprintf(&service, "%d", port) < 0)
 		return NULL;
 
@@ -151,6 +159,9 @@ void *psmx_name_server(void *args)
 
 	listen(listenfd, 256);
 
+	pthread_cleanup_push(psmx_name_server_cleanup, (void *)(uintptr_t)listenfd);
+	FI_INFO(&psmx_prov, FI_LOG_CORE, "Start working ...\n");
+
 	while (1) {
 		connfd = accept(listenfd, NULL, 0);
 		if (connfd >= 0) {
@@ -165,6 +176,8 @@ void *psmx_name_server(void *args)
 		}
 	}
 
+	pthread_cleanup_pop(1);
+
 	return NULL;
 }
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-ofed/libfabric.git



More information about the Pkg-ofed-commits mailing list