[Pkg-ofed-commits] [libpsm2] 01/03: New upstream version 10.3-37
Brian Smith
bsmith-guest at moszumanska.debian.org
Thu Dec 7 22:30:00 UTC 2017
This is an automated email from the git hooks/post-receive script.
bsmith-guest pushed a commit to branch master
in repository libpsm2.
commit 03e1122f5f6edcf34ebc789c45ccd51aabb72538
Author: Brian T. Smith <bsmith at systemfabricworks.com>
Date: Thu Dec 7 10:33:37 2017 -0600
New upstream version 10.3-37
---
COMMIT | 2 +-
Makefile | 38 +++++++++++++--------
buildflags.mak | 32 +++++++++--------
compat/Makefile | 2 +-
compat/buildflags.mak | 7 ++--
libpsm2.spec.in | 4 ++-
libuuid/Makefile | 7 ++--
makesrpm.sh | 2 +-
opa/Makefile | 10 +++---
opa/opa_sysfs.c | 1 +
opa/opa_time.c | 15 +++++++-
psm.c | 93 +++++++++++++++++++-------------------------------
psm2_am.h | 57 ++++++++++++++++++++++++++++++-
psm2_mq.h | 56 +++++++++++++++++++++++++++---
psm_am.c | 73 +++++++++++++++++++++++++++++++++------
psm_am_internal.h | 24 ++++++++++---
psm_context.c | 15 ++++----
psm_ep.h | 2 +-
psm_ep_connect.c | 2 +-
psm_mq.c | 32 +++++++++++++++++
psm_mq_internal.h | 8 ++++-
psm_perf.c | 80 +++++++++++++++++++++++++------------------
psm_perf.h | 16 ++++-----
psm_user.h | 32 ++++++++++-------
psm_utils.c | 66 +++++++++++++++++++++++------------
psm_utils.h | 5 +--
ptl_am/Makefile | 5 ++-
ptl_am/ptl.c | 18 ++++++++--
ptl_ips/Makefile | 5 ++-
ptl_ips/ips_path_rec.c | 2 +-
ptl_ips/ips_proto.c | 38 +++++++++++++++++++++
ptl_ips/ips_proto_am.c | 20 +++++++++--
ptl_ips/ips_recvhdrq.c | 19 +++++++++++
ptl_ips/ips_tidcache.c | 11 ++++--
ptl_self/Makefile | 5 ++-
ptl_self/ptl.c | 38 +++++++++++++++++----
rpm_release_extension | 2 +-
37 files changed, 607 insertions(+), 237 deletions(-)
diff --git a/COMMIT b/COMMIT
index d81e4f3..b55e71f 100644
--- a/COMMIT
+++ b/COMMIT
@@ -1 +1 @@
-f8df7f0de7139df384ea8b94dc8567885bf76070
\ No newline at end of file
+295d2ac9ced1415c309531cbb478eab943e174e8
\ No newline at end of file
diff --git a/Makefile b/Makefile
index 8db7c1d..dc11046 100644
--- a/Makefile
+++ b/Makefile
@@ -84,16 +84,15 @@ override OUTDIR := $(shell readlink -m $(OUTDIR))
endif
endif
-LINKER_SCRIPT_FILE := ${OUTDIR}/psm2_linker_script.map
PSM2_VERNO_MAJOR := $(shell sed -n 's/^\#define.*PSM2_VERNO_MAJOR.*0x0\?\([1-9a-f]\?[0-9a-f]\+\).*/\1/p' $(top_srcdir)/psm2.h)
PSM2_VERNO_MINOR := $(shell sed -n 's/^\#define.*PSM2_VERNO_MINOR.*0x\([0-9]\?[0-9a-f]\+\).*/\1/p' $(top_srcdir)/psm2.h)
PSM2_LIB_MAJOR := $(shell printf "%d" ${PSM2_VERNO_MAJOR})
PSM2_LIB_MINOR := $(shell printf "%d" `sed -n 's/^\#define.*PSM2_VERNO_MINOR.*\(0x[0-9a-f]\+\).*/\1/p' $(top_srcdir)/psm2.h`)
+LINKER_SCRIPT_FILE = ${OUTDIR}/psm2_linker_script.map
SOURCES_CHKSUM_FILES = Makefile buildflags.mak $(LINKER_SCRIPT_FILE) \
`find . -regex '\(.*\.h\|.*\.c\)' -not -path "./test/*" -not -path "./tools/*" -not -path "_revision.c" | sort`
SOURCES_CHKSUM_VALUE = $(shell cat ${SOURCES_CHKSUM_FILES} | sha1sum | cut -d' ' -f 1)
-
OPA_LIB_MAJOR := 4
OPA_LIB_MINOR := 0
@@ -105,8 +104,16 @@ export OPA_LIB_MAJOR
export OPA_LIB_MINOR
export CCARCH ?= gcc
export FCARCH ?= gfortran
+export AR ?= ar
include $(top_srcdir)/buildflags.mak
+# We need to unexport these environs as during mock testing and normal calls,
+# if they are exported then during each submake they will be evaulated again.
+# This is costly and the LINKER_SCRIPT_FILE doesn't exist until after its
+# target rule runs.
+unexport SOURCES_CHKSUM_FILES
+unexport SOURCES_CHKSUM_VALUE
+unexport LINKER_SCRIPT_FILE
INCLUDES += -I$(top_srcdir)
ifneq (x86_64,$(arch))
@@ -272,18 +279,20 @@ all: outdir symlinks
@if [ ! -e $(HISTORY) ] || [ -z "`grep -E '^$(OUTDIR)$$' $(HISTORY)`" ]; then \
echo $(OUTDIR) >> $(HISTORY); \
fi
+ # Our buildflags.mak exports all variables, all are propogated to submakes.
@for subdir in $(SUBDIRS); do \
mkdir -p $(OUTDIR)/$$subdir; \
- $(MAKE) -j $(nthreads) -C $$subdir OUTDIR=$(OUTDIR)/$$subdir $(OPTIONS); \
+ $(MAKE) -j $(nthreads) -C $$subdir OUTDIR=$(OUTDIR)/$$subdir; \
done
- $(MAKE) -j $(nthreads) OUTDIR=$(OUTDIR) $(OPTIONS) $(OUTDIR)/${TARGLIB}.so
+ $(MAKE) -j $(nthreads) $(OUTDIR)/${TARGLIB}.so
+ $(MAKE) -j $(nthreads) $(OUTDIR)/${TARGLIB}.a
@mkdir -p $(OUTDIR)/compat
- $(MAKE) -j $(nthreads) -C compat OUTDIR=$(OUTDIR)/compat $(OPTIONS)
+ $(MAKE) -j $(nthreads) -C compat OUTDIR=$(OUTDIR)/compat
%_clean:
make OUTDIR=$* clean
-clean: linker_script_file_clean cleanlinks
+clean: cleanlinks
rm -rf ${OUTDIR}
@if [ -e $(HISTORY) ]; then \
grep -v -E "^$(OUTDIR)$$" $(HISTORY) > $(HISTORY)_tmp; \
@@ -294,12 +303,11 @@ clean: linker_script_file_clean cleanlinks
fi
mock: OUTDIR := $(MOCK_OUTDIR)
-mock: OPTIONS = PSM2_MOCK_TESTING=1
mock:
- $(MAKE) OUTDIR=$(OUTDIR) OPTIONS=$(OPTIONS)
+ $(MAKE) OUTDIR=$(OUTDIR) PSM2_MOCK_TESTING=1
debug: OUTDIR := $(DEBUG_OUTDIR)
-debug: OPTIONS = PSM_DEBUG=1
+debug: OPTIONS := PSM_DEBUG=1
debug:
$(MAKE) OUTDIR=$(OUTDIR) OPTIONS=$(OPTIONS)
@@ -338,6 +346,8 @@ install: all
(cd ${DESTDIR}${INSTALL_LIB_TARG} ; \
ln -sf ${TARGLIB}.so.${MAJOR}.${MINOR} ${TARGLIB}.so.${MAJOR} ; \
ln -sf ${TARGLIB}.so.${MAJOR} ${TARGLIB}.so)
+ install -D $(OUTDIR)/${TARGLIB}.a \
+ ${DESTDIR}${INSTALL_LIB_TARG}/${TARGLIB}.a
install -m 0644 -D psm2.h ${DESTDIR}/usr/include/psm2.h
install -m 0644 -D psm2_mq.h ${DESTDIR}/usr/include/psm2_mq.h
install -m 0644 -D psm2_am.h ${DESTDIR}/usr/include/psm2_am.h
@@ -501,16 +511,16 @@ $(OUTDIR)/${TARGLIB}.so.${MAJOR}.${MINOR}: ${${TARGLIB}-objs} $(LINKER_SCRIPT_FI
date -u -d@$${SOURCE_DATE_EPOCH:-$$(date +%s)} +'char psmi_hfi_build_timestamp[] ="%F %T%:z";' >> ${OUTDIR}/_revision.c
echo "char psmi_hfi_sources_checksum[] =\"${SOURCES_CHKSUM_VALUE}\";" >> ${OUTDIR}/_revision.c
echo "char psmi_hfi_git_checksum[] =\"`git rev-parse HEAD`\";" >> ${OUTDIR}/_revision.c
- $(CC) -c $(BASECFLAGS) $(INCLUDES) ${OUTDIR}/_revision.c -o $(OUTDIR)/_revision.o
+ $(CC) -c $(CFLAGS) $(BASECFLAGS) $(INCLUDES) ${OUTDIR}/_revision.c -o $(OUTDIR)/_revision.o
$(CC) $(LINKER_SCRIPT) $(LDFLAGS) -o $@ -Wl,-soname=${TARGLIB}.so.${MAJOR} -shared \
${${TARGLIB}-objs} $(OUTDIR)/_revision.o -Lopa $(LDLIBS)
+$(OUTDIR)/${TARGLIB}.a: $(OUTDIR)/${TARGLIB}.so.${MAJOR}.${MINOR}
+ $(AR) rcs $(OUTDIR)/${TARGLIB}.a ${${TARGLIB}-objs} $(OUTDIR)/_revision.o
+
${OUTDIR}/%.o: ${top_srcdir}/%.c
- $(CC) $(CFLAGS) $(INCLUDES) -MMD -c $< -o $@
+ $(CC) $(CFLAGS) $(BASECFLAGS) $(INCLUDES) -MMD -c $< -o $@
$(LINKER_SCRIPT_FILE): psm2_linker_script_map.in
sed "s/_psm2_additional_globals_;/$(PSM2_ADDITIONAL_GLOBALS)/" \
psm2_linker_script_map.in > ${OUTDIR}/psm2_linker_script.map
-
-linker_script_file_clean:
- rm -f $(LINKER_SCRIPT_FILE)
diff --git a/buildflags.mak b/buildflags.mak
index 67593f5..f59958c 100644
--- a/buildflags.mak
+++ b/buildflags.mak
@@ -86,8 +86,9 @@ ASFLAGS += $(BASE_FLAGS)
ifeq ($(PSM2_MOCK_TESTING),1)
BASECFLAGS += -DPSM2_MOCK_TESTING=1
-# we skip the linker script for testing version, we want all symbols to be
-# reachable from outside the library
+unexport LINKER_SCRIPT
+# We skip the linker script for mock testing version, we want all symbols
+# to be reachable from outside the library
else
LINKER_SCRIPT := -Wl,--version-script $(LINKER_SCRIPT_FILE)
endif
@@ -178,14 +179,10 @@ endif
BASECFLAGS += -fpic -fPIC -D_GNU_SOURCE
-ifeq (${CCARCH},gcc)
- BASECFLAGS += -funwind-tables
-endif
-
ifneq (,${PSM_VALGRIND})
- CFLAGS += -DPSM_VALGRIND
+ BASECFLAGS += -DPSM_VALGRIND
else
- CFLAGS += -DNVALGRIND
+ BASECFLAGS += -DNVALGRIND
endif
ASFLAGS += -g3 -fpic
@@ -193,18 +190,25 @@ ASFLAGS += -g3 -fpic
BASECFLAGS += ${OPA_CFLAGS}
ifeq (${CCARCH},icc)
- BASECFLAGS += -O3 -g3 -fpic -fPIC -D_GNU_SOURCE -DPACK_STRUCT_STL=packed,
- CFLAGS += $(BASECFLAGS)
+ BASECFLAGS += -fpic -fPIC -D_GNU_SOURCE -DPACK_STRUCT_STL=packed,
LDFLAGS += -static-intel
else
ifeq (${CCARCH},gcc)
- CFLAGS += $(BASECFLAGS) -Wno-strict-aliasing -Wformat-security
+ BASECFLAGS += -funwind-tables -Wno-strict-aliasing -Wformat-security
else
- ifeq (${CCARCH},gcc4)
- CFLAGS += $(BASECFLAGS)
- else
+ ifneq (${CCARCH},gcc4)
$(error Unknown compiler arch "${CCARCH}")
endif # gcc4
endif # gcc
endif # icc
+# We run export here to ensure all the above setup is in the environment
+# for sub makes. However, we exclude this during clean and distclean
+# to avoid resolution of some variables that don't need to be resolved
+# and avoid unnecessary missing file warnings during cleanup.
+ifneq ($(MAKECMDGOALS), clean)
+ifneq ($(MAKECMDGOALS), distclean)
+export
+endif
+endif
+
diff --git a/compat/Makefile b/compat/Makefile
index 092775f..996b7e9 100644
--- a/compat/Makefile
+++ b/compat/Makefile
@@ -76,7 +76,7 @@ install: all
install -D $(OUTDIR)/${COMPATLIB}.so.${MAJOR} ${DESTDIR}${COMPAT_LIB_TARG}/${COMPATLIB}.so.${MAJOR}
$(OUTDIR)/%.o: $(compat_build_dir)/%.c
- $(CC) $(CFLAGS) $(INCLUDES) -MMD -c $< -o $@
+ $(CC) $(CFLAGS) $(BASECFLAGS) $(INCLUDES) -MMD -c $< -o $@
$(OUTDIR)/${COMPATLIB}.so.${MAJOR}: ${${COMPATLIB}-objs}
$(CC) $(BASECFLAGS) $(LINKER_SCRIPT) $(LDFLAGS) -Wl,-soname=${COMPATLIB}.so.${MAJOR} -shared \
diff --git a/compat/buildflags.mak b/compat/buildflags.mak
index c677989..b448e4e 100644
--- a/compat/buildflags.mak
+++ b/compat/buildflags.mak
@@ -88,15 +88,12 @@ ASFLAGS += -g3 -fpic
ifeq (${CCARCH},icc)
BASECFLAGS += -O3 -g3
- CFLAGS += $(BASECFLAGS)
LDFLAGS += -static-intel
else
ifeq (${CCARCH},gcc)
- CFLAGS += $(BASECFLAGS) -Wno-strict-aliasing
+ BASECFLAGS += -Wno-strict-aliasing
else
- ifeq (${CCARCH},gcc4)
- CFLAGS += $(BASECFLAGS)
- else
+ ifneq (${CCARCH},gcc4)
$(error Unknown compiler arch "${CCARCH}")
endif
endif
diff --git a/libpsm2.spec.in b/libpsm2.spec.in
index c5ddf62..7bd7836 100644
--- a/libpsm2.spec.in
+++ b/libpsm2.spec.in
@@ -73,7 +73,8 @@ Obsoletes: hfi1-psm < 1.0.0
%package -n @RPM_NAME@@RPM_NAME_BASEEXT@
%endif
Summary: Intel PSM2 Libraries
-Provides: @RPM_NAME@
+Provides: @RPM_NAME@ = %{version}-%{release}
+Provides: @RPM_NAME@%{_isa} = %{version}-%{release}
%if 0%{?suse_version}
BuildRequires: libnuma-devel
Requires: libnuma1
@@ -155,6 +156,7 @@ make %{?_smp_mflags}
%files -n @RPM_NAME at -devel
%{_libdir}/@TARGLIB at .so
+%{_libdir}/@TARGLIB at .a
%{_includedir}/psm2.h
%{_includedir}/psm2_mq.h
%{_includedir}/psm2_am.h
diff --git a/libuuid/Makefile b/libuuid/Makefile
index aa3f5ac..2f5babe 100644
--- a/libuuid/Makefile
+++ b/libuuid/Makefile
@@ -55,8 +55,7 @@ OUTDIR = .
this_srcdir := $(shell readlink -m .)
top_srcdir := $(this_srcdir)/..
-include $(top_srcdir)/buildflags.mak
-CFLAGS += -DPSM_UUID=1 -Wno-unused-function
+BASECFLAGS += -DPSM_UUID=1 -Wno-unused-function
INCLUDES += -I$(top_srcdir)
${TARGLIB}-objs := psm_uuid.o parse.o pack.o unpack.o unparse.o
@@ -70,10 +69,10 @@ IGNORE_DEP_TARGETS = clean
all .DEFAULT: ${${TARGLIB}-objs}
$(OUTDIR)/%.d: $(this_srcdir)/%.c
- $(CC) $(CFLAGS) $(INCLUDES) $< -MM -MF $@ -MQ $(@:.d=.o)
+ $(CC) $(CFLAGS) $(BASECFLAGS) $(INCLUDES) $< -MM -MF $@ -MQ $(@:.d=.o)
$(OUTDIR)/%.o: $(this_srcdir)/%.c | ${DEPS}
- $(CC) $(CFLAGS) $(INCLUDES) -c $< -o $@
+ $(CC) $(CFLAGS) $(BASECFLAGS) $(INCLUDES) -c $< -o $@
clean:
@if [ -d $(OUTDIR) ]; then \
diff --git a/makesrpm.sh b/makesrpm.sh
index e673b35..5fc4939 100755
--- a/makesrpm.sh
+++ b/makesrpm.sh
@@ -113,7 +113,7 @@ while [ "$1" != "" ]; do
if [ -z "$1" ]; then
usage
fi
- $RPM_NAME_BASEEXT="$1"
+ RPM_NAME_BASEEXT="$1"
export RPM_NAME_BASEEXT="$1"
;;
-r | -rpmname) shift
diff --git a/opa/Makefile b/opa/Makefile
index d065429..97c51bc 100644
--- a/opa/Makefile
+++ b/opa/Makefile
@@ -59,10 +59,8 @@ MINOR := $(OPA_LIB_MINOR)
this_srcdir := $(shell readlink -m .)
top_srcdir := $(this_srcdir)/..
-include $(top_srcdir)/buildflags.mak
-BASECFLAGS += -D_GNU_SOURCE
-INCLUDES += -I$(top_srcdir) -I$(top_srcdir)/ptl_ips
+INCLUDES += -I$(top_srcdir) -I$(top_srcdir)/ptl_ips
ifeq (${arch},x86_64)
PLATFORM_OBJ=opa_dwordcpy-x86_64-fast.o
else
@@ -86,13 +84,13 @@ install: all
@echo "Nothing to do for install."
$(OUTDIR)/%.d: $(this_srcdir)/%.c
- $(CC) $(CFLAGS) $(INCLUDES) $< -MM -MF $@ -MQ $(@:.d=.o)
+ $(CC) $(CFLAGS) $(BASECFLAGS) $(INCLUDES) $< -MM -MF $@ -MQ $(@:.d=.o)
$(OUTDIR)/%.d: $(this_srcdir)/%.S
- $(CC) $(CFLAGS) $(INCLUDES) $< -MM -MF $@ -MQ $(@:.d=.o)
+ $(CC) $(CFLAGS) $(BASECFLAGS) $(INCLUDES) $< -MM -MF $@ -MQ $(@:.d=.o)
$(OUTDIR)/%.o: $(this_srcdir)/%.c | ${DEPS}
- $(CC) $(CFLAGS) $(INCLUDES) -c $< -o $@
+ $(CC) $(CFLAGS) $(BASECFLAGS) $(INCLUDES) -c $< -o $@
$(OUTDIR)/%.o: $(this_srcdir)/%.S | ${DEPS}
$(CC) $(ASFLAGS) -c $< -o $@
diff --git a/opa/opa_sysfs.c b/opa/opa_sysfs.c
index f0cec91..00cc18a 100644
--- a/opa/opa_sysfs.c
+++ b/opa/opa_sysfs.c
@@ -266,6 +266,7 @@ static int hfi_sysfs_unit_open_for_node(uint32_t unit, int flags)
snprintf(buf, sizeof(buf), "%s/hfi1_%u/device/numa_node",
dirname(path_copy), unit);
+ free(path_copy);
fd = open(buf, flags);
saved_errno = errno;
diff --git a/opa/opa_time.c b/opa/opa_time.c
index 1b636ed..272fdb0 100644
--- a/opa/opa_time.c
+++ b/opa/opa_time.c
@@ -69,6 +69,16 @@
#include "opa_user.h"
+#ifdef min
+#undef min
+#endif
+#define min(a, b) ((a) < (b) ? (a) : (b))
+
+#ifdef max
+#undef max
+#endif
+#define max(a, b) ((a) > (b) ? (a) : (b))
+
/* init the cycle counter to picosecs/cycle conversion automatically */
/* at program startup, if it's using timing functions. */
static void init_picos_per_cycle(void) __attribute__ ((constructor));
@@ -224,6 +234,7 @@ static uint32_t hfi_timebase_from_cpuinfo(uint32_t old_pico_per_cycle)
{
/* we only validate once */
uint32_t new_pico_per_cycle = old_pico_per_cycle;
+ uint32_t max_bet_new_old_pico, min_bet_new_old_pico;
char hostname[80];
gethostname(hostname, 80);
@@ -262,8 +273,10 @@ static uint32_t hfi_timebase_from_cpuinfo(uint32_t old_pico_per_cycle)
}
#endif
+ max_bet_new_old_pico = max(new_pico_per_cycle, old_pico_per_cycle);
+ min_bet_new_old_pico = min(new_pico_per_cycle, old_pico_per_cycle);
/* If there's no change (within a small range), just return the old one */
- if (abs(new_pico_per_cycle - old_pico_per_cycle) < 5)
+ if ((max_bet_new_old_pico - min_bet_new_old_pico) < 5)
return old_pico_per_cycle;
if (hfi_timebase_isvalid(new_pico_per_cycle)) {
diff --git a/psm.c b/psm.c
index 16a2ceb..cd543d8 100644
--- a/psm.c
+++ b/psm.c
@@ -154,60 +154,34 @@ int psmi_cuda_initialize()
goto fail;
}
-
- psmi_cuCtxGetCurrent = dlsym(psmi_cuda_lib, "cuCtxGetCurrent");
- psmi_cuCtxSetCurrent = dlsym(psmi_cuda_lib, "cuCtxSetCurrent");
- psmi_cuPointerGetAttribute = dlsym(psmi_cuda_lib, "cuPointerGetAttribute");
- psmi_cuPointerSetAttribute = dlsym(psmi_cuda_lib, "cuPointerSetAttribute");
-
- psmi_cudaGetDeviceCount = dlsym(psmi_cudart_lib, "cudaGetDeviceCount");
- psmi_cudaGetDeviceProperties = dlsym(psmi_cudart_lib, "cudaGetDeviceProperties");
- psmi_cudaGetDevice = dlsym(psmi_cudart_lib, "cudaGetDevice");
- psmi_cudaSetDevice = dlsym(psmi_cudart_lib, "cudaSetDevice");
- psmi_cudaStreamCreate = dlsym(psmi_cudart_lib, "cudaStreamCreate");
- psmi_cudaDeviceSynchronize = dlsym(psmi_cudart_lib, "cudaDeviceSynchronize");
- psmi_cudaStreamSynchronize = dlsym(psmi_cudart_lib, "cudaStreamSynchronize");
- psmi_cudaEventCreate = dlsym(psmi_cudart_lib, "cudaEventCreate");
- psmi_cudaEventDestroy = dlsym(psmi_cudart_lib, "cudaEventDestroy");
- psmi_cudaEventQuery = dlsym(psmi_cudart_lib, "cudaEventQuery");
- psmi_cudaEventRecord = dlsym(psmi_cudart_lib, "cudaEventRecord");
- psmi_cudaEventSynchronize = dlsym(psmi_cudart_lib, "cudaEventSynchronize");
- psmi_cudaMalloc = dlsym(psmi_cudart_lib, "cudaMalloc");
- psmi_cudaHostAlloc = dlsym(psmi_cudart_lib, "cudaHostAlloc");
- psmi_cudaFreeHost = dlsym(psmi_cudart_lib, "cudaFreeHost");
- psmi_cudaMemcpy = dlsym(psmi_cudart_lib, "cudaMemcpy");
- psmi_cudaMemcpyAsync = dlsym(psmi_cudart_lib, "cudaMemcpyAsync");
-
- psmi_cudaIpcGetMemHandle = dlsym(psmi_cudart_lib, "cudaIpcGetMemHandle");
- psmi_cudaIpcOpenMemHandle = dlsym(psmi_cudart_lib, "cudaIpcOpenMemHandle");
- psmi_cudaIpcCloseMemHandle = dlsym(psmi_cudart_lib, "cudaIpcCloseMemHandle");
-
- if (!psmi_cuCtxGetCurrent || !psmi_cuCtxSetCurrent ||
- !psmi_cuPointerGetAttribute || !psmi_cuPointerSetAttribute ||
- !psmi_cudaGetDeviceCount || !psmi_cudaGetDeviceProperties ||
- !psmi_cudaGetDevice || !psmi_cudaSetDevice ||
- !psmi_cudaStreamCreate ||
- !psmi_cudaDeviceSynchronize || !psmi_cudaStreamSynchronize ||
- !psmi_cudaEventCreate || !psmi_cudaEventDestroy ||
- !psmi_cudaEventQuery || !psmi_cudaEventRecord ||
- !psmi_cudaEventSynchronize ||
- !psmi_cudaMalloc || !psmi_cudaHostAlloc || !psmi_cudaFreeHost ||
- !psmi_cudaMemcpy || !psmi_cudaMemcpyAsync || !psmi_cudaIpcGetMemHandle ||
- !psmi_cudaIpcOpenMemHandle || !psmi_cudaIpcCloseMemHandle) {
- _HFI_ERROR
- ("Unable to resolve symbols in CUDA libraries.\n");
- goto fail;
- }
-
- if (cuda_runtime_version > 7000) {
- psmi_cudaStreamCreateWithFlags = dlsym(psmi_cudart_lib,
- "cudaStreamCreateWithFlags");
- if (!psmi_cudaStreamCreateWithFlags) {
- _HFI_ERROR
- ("Unable to resolve symbols in CUDA libraries.\n");
- goto fail;
- }
- }
+ PSMI_CUDA_DLSYM(psmi_cuda_lib, cuCtxGetCurrent);
+ PSMI_CUDA_DLSYM(psmi_cuda_lib, cuCtxSetCurrent);
+ PSMI_CUDA_DLSYM(psmi_cuda_lib, cuPointerGetAttribute);
+ PSMI_CUDA_DLSYM(psmi_cuda_lib, cuPointerSetAttribute);
+
+ PSMI_CUDA_DLSYM(psmi_cudart_lib, cudaGetDeviceCount);
+ PSMI_CUDA_DLSYM(psmi_cudart_lib, cudaGetDeviceProperties);
+ PSMI_CUDA_DLSYM(psmi_cudart_lib, cudaGetDevice);
+ PSMI_CUDA_DLSYM(psmi_cudart_lib, cudaSetDevice);
+ PSMI_CUDA_DLSYM(psmi_cudart_lib, cudaStreamCreate);
+ PSMI_CUDA_DLSYM(psmi_cudart_lib, cudaDeviceSynchronize);
+ PSMI_CUDA_DLSYM(psmi_cudart_lib, cudaStreamSynchronize);
+ PSMI_CUDA_DLSYM(psmi_cudart_lib, cudaEventCreate);
+ PSMI_CUDA_DLSYM(psmi_cudart_lib, cudaEventDestroy);
+ PSMI_CUDA_DLSYM(psmi_cudart_lib, cudaEventQuery);
+ PSMI_CUDA_DLSYM(psmi_cudart_lib, cudaEventRecord);
+ PSMI_CUDA_DLSYM(psmi_cudart_lib, cudaEventSynchronize);
+ PSMI_CUDA_DLSYM(psmi_cudart_lib, cudaMalloc);
+ PSMI_CUDA_DLSYM(psmi_cudart_lib, cudaHostAlloc);
+ PSMI_CUDA_DLSYM(psmi_cudart_lib, cudaFreeHost);
+ PSMI_CUDA_DLSYM(psmi_cudart_lib, cudaMemcpy);
+ PSMI_CUDA_DLSYM(psmi_cudart_lib, cudaMemcpyAsync);
+ PSMI_CUDA_DLSYM(psmi_cudart_lib, cudaIpcGetMemHandle);
+ PSMI_CUDA_DLSYM(psmi_cudart_lib, cudaIpcOpenMemHandle);
+ PSMI_CUDA_DLSYM(psmi_cudart_lib, cudaIpcCloseMemHandle);
+
+ if (cuda_runtime_version > 7000)
+ PSMI_CUDA_DLSYM(psmi_cudart_lib, cudaStreamCreateWithFlags);
/* Check if all devices support Unified Virtual Addressing. */
PSMI_CUDA_CALL(cudaGetDeviceCount, &num_devices);
@@ -243,11 +217,13 @@ psm2_error_t __psm2_init(int *major, int *minor)
psmi_log_initialize();
PSM2_LOG_MSG("entering");
-#ifdef RDPMC_PERF_FRAMEWORK
- psmi_rdpmc_perf_framework_init();
-#endif /* RDPMC_PERF_FRAMEWORK */
+ /* When PSM_PERF is enabled, the following code causes the
+ PMU to be programmed to measure instruction cycles of the
+ TX/RX speedpaths of PSM. */
GENERIC_PERF_INIT();
+ GENERIC_PERF_SET_SLOT_NAME(PSM_TX_SPEEDPATH_CTR, "TX");
+ GENERIC_PERF_SET_SLOT_NAME(PSM_RX_SPEEDPATH_CTR, "RX");
if (psmi_isinit == PSMI_INITIALIZED)
goto update;
@@ -457,6 +433,9 @@ psm2_error_t __psm2_finalize(void)
PSMI_ERR_UNLESS_INITIALIZED(NULL);
+ /* When PSM_PERF is enabled, the following line causes the
+ instruction cycles gathered in the current run to be dumped
+ to stderr. */
GENERIC_PERF_DUMP(stderr);
ep = psmi_opened_endpoint;
while (ep != NULL) {
diff --git a/psm2_am.h b/psm2_am.h
index 1383fbb..f085ea0 100644
--- a/psm2_am.h
+++ b/psm2_am.h
@@ -161,7 +161,7 @@ struct psm2_amarg {
/** @brief The AM handler function type
*
- * psm2_am_handler_fm_t is the datatype for an AM handler. PSM2 AM will call-back
+ * psm2_am_handler_fn_t is the datatype for an AM handler. PSM2 AM will call-back
* into an AM handler using this function prototype. The parameters and result
* of these handler functions are described here.
*
@@ -183,6 +183,32 @@ int (*psm2_am_handler_fn_t) (psm2_am_token_t token,
psm2_amarg_t *args, int nargs,
void *src, uint32_t len);
+/** @brief The AM handler function type with caller context
+ *
+ * psm2_am_handler_2_fn_t is the datatype for an AM handler that
+ * includes a user context. PSM2 AM will call-back into an AM handler using
+ * this function prototype. The parameters and result
+ * of these handler functions are described here.
+ *
+ * @param[in] token This is an opaque token value passed into a handler.
+ * A request handler may send at most one reply back to the
+ * original requestor, and must pass this value as the token
+ * parameter to the psm2_am_reply_short() function. A reply
+ * handler is also passed a token value, but must not attempt
+ * to reply.
+ * @param[in] args A pointer to the arguments provided to this handler.
+ * @param[in] nargs The number of arguments.
+ * @param[in] src A pointer to the data payload provided to this handler.
+ * @param[in] len The length of the data payload in bytes.
+ * @param[in] hctx The user context pointer provided at handler registration.
+ *
+ * @returns 0 The handler should always return a result of 0.
+ */
+typedef
+int (*psm2_am_handler_2_fn_t) (psm2_am_token_t token,
+ psm2_amarg_t *args, int nargs,
+ void *src, uint32_t len, void *hctx);
+
/** @brief Type for a completion call-back handler.
*
* A completion handler can be specified to give a call-back on the initiation
@@ -226,6 +252,35 @@ psm2_error_t psm2_am_register_handlers(psm2_ep_t ep,
handlers, int num_handlers,
int *handlers_idx);
+/** @brief Register AM call-back handlers at the specified end-point.
+ *
+ * This function is used to register an array of handlers, and may be called
+ * multiple times to register additonal handlers. The maximum number of
+ * handlers that can be registered is limited to the max_handlers value
+ * returned by psm2_am_get_parameters(). Handlers are associated with a PSM
+ * end-point. The handlers are allocated index numbers in the the handler table
+ * for that end-point. The allocated index for the handler function in
+ * handlers[i] is returned in handlers_idx[i] for i in (0, num_handlers]. These
+ * handler index values are used in the psm2_am_request_short() and
+ * psm2_am_reply_short() functions.
+ *
+ * @param[in] ep End-point value
+ * @param[in] handlers Array of handler functions
+ * @param[in] num_handlers Number of handlers (sizes the handlers and
+ * handlers_idx arrays)
+ * @param[in] hctx Array of void* pointers to a user contexts for identifying the
+ * target ep that registered these handlers.
+ * @param[out] handlers_idx Used to return handler index mapping table
+ *
+ * @returns PSM2_OK Indicates success
+ * @returns PSM2_EP_NO_RESOURCES Insufficient slots in the AM handler table
+ */
+psm2_error_t psm2_am_register_handlers_2(psm2_ep_t ep,
+ const psm2_am_handler_2_fn_t *
+ handlers, int num_handlers,
+ void **hctx,
+ int *handlers_idx);
+
/** @brief Generate an AM request.
*
* This function generates an AM request causing an AM handler function to be
diff --git a/psm2_mq.h b/psm2_mq.h
index 6c23b10..b9cbf4e 100644
--- a/psm2_mq.h
+++ b/psm2_mq.h
@@ -334,9 +334,10 @@ psm2_mq_init(psm2_ep_t ep, uint64_t tag_order_mask,
psm2_error_t
psm2_mq_finalize(psm2_mq_t mq);
-#define PSM2_MQ_TAG_ELEMENTS 3
+#define PSM2_MQ_TAG_ELEMENTS 4
/**< Represents the number of 32-bit tag elements in the psm2_mq_tag_t
- * type. */
+ * type plus one extra element to keep alignment and padding
+ * as 16 bytes. */
/** @struct psm2_mq_tag
** @brief MQ Message tag
@@ -356,7 +357,11 @@ typedef
//struct psm2_mq_tag {
union psm2_mq_tag {
// union {
- uint32_t tag[PSM2_MQ_TAG_ELEMENTS] __attribute__ ((aligned(16)));
+ uint32_t tag[PSM2_MQ_TAG_ELEMENTS]; /* No longer specifying
+ * alignment as it makes
+ * code break with newer
+ * compilers. */
+
/**< 3 x 32bit array representation of @ref psm2_mq_tag */
struct {
uint32_t tag0; /**< 1 of 3 uint32_t tag values */
@@ -403,7 +408,11 @@ struct psm2_mq_status2 {
/** Remote peer's epaddr */
psm2_epaddr_t msg_peer;
/** Sender's original message tag */
- psm2_mq_tag_t msg_tag;
+ psm2_mq_tag_t msg_tag __attribute__ ((aligned(16)));/* Alignment added
+ * to preserve the
+ * layout as is
+ * expected by
+ * existent code */
/** Sender's original message length */
uint32_t msg_length;
/** Actual number of bytes transfered (receiver only) */
@@ -1081,6 +1090,45 @@ psm2_mq_ipeek(psm2_mq_t mq, psm2_mq_req_t *req, psm2_mq_status_t *status);
psm2_error_t
psm2_mq_ipeek2(psm2_mq_t mq, psm2_mq_req_t *req, psm2_mq_status2_t *status);
+/** @brief Check and dequeue the first request entry from the completed queue.
+ *
+ * Function to atomically check and dequeue the first entry from the completed
+ * queue. It must be paired with function psm2_mq_req_free, which returns the
+ * request to PSM2 library.
+ *
+ * @param[in] mq Matched Queue Handle
+ * @param[out] req PSM MQ Request handle, to be used for receiving the matched
+ * message.
+ *
+ * The following error codes are returned.
+ *
+ * @retval PSM2_OK The dequeue operation was successful and @c req is updated
+ * with a request ready for completion.
+ *
+ * @retval PSM2_MQ_NO_COMPLETIONS The dequeue operation was not successful,
+ * meaning that there are no further requests ready
+ * for completion. The contents of @c req remain
+ * unchanged.
+ */
+psm2_error_t
+psm2_mq_ipeek_dequeue(psm2_mq_t mq, psm2_mq_req_t *req);
+
+/** @brief Return the request to PSM2 library.
+ *
+ * Function returns the request previously obtained via psm2_mq_ipeek_dequeue
+ * to the PSM2 library.
+ *
+ * @param[in] mq Matched Queue Handle
+ * @param[in] req PSM MQ Request handle to be returned to PSM2 library.
+ If @p req is NULL, no operation is performed.
+ *
+ * The following error codes are returned.
+ *
+ * @retval PSM2_OK Return of an object to PSM2 library pool was successful.
+ */
+psm2_error_t
+psm2_mq_req_free(psm2_mq_t mq, psm2_mq_req_t req);
+
/** @brief Wait until a non-blocking request completes
*
* Function to wait on requests created from either preposted receive buffers
diff --git a/psm_am.c b/psm_am.c
index df193da..bef1a92 100644
--- a/psm_am.c
+++ b/psm_am.c
@@ -90,7 +90,7 @@ static void psmi_am_min_parameters(struct psm2_am_parameters *dest,
psm2_error_t psmi_am_init_internal(psm2_ep_t ep)
{
int i;
- psm2_am_handler_fn_t *am_htable;
+ struct psm2_ep_am_handle_entry *am_htable;
struct psm2_am_parameters params;
psmi_am_parameters.max_handlers = INT_MAX;
@@ -115,15 +115,19 @@ psm2_error_t psmi_am_init_internal(psm2_ep_t ep)
ep->am_htable =
psmi_malloc(ep, UNDEFINED,
- sizeof(psm2_am_handler_fn_t) * PSMI_AM_NUM_HANDLERS);
+ sizeof(struct psm2_ep_am_handle_entry) * PSMI_AM_NUM_HANDLERS);
if (ep->am_htable == NULL)
return PSM2_NO_MEMORY;
- am_htable = (psm2_am_handler_fn_t *) ep->am_htable;
- for (i = 0; i < PSMI_AM_NUM_HANDLERS; i++)
- am_htable[i] = _ignore_handler;
+ am_htable = (struct psm2_ep_am_handle_entry *) ep->am_htable;
+ for (i = 0; i < PSMI_AM_NUM_HANDLERS; i++) {
+ am_htable[i].hfn = _ignore_handler;
+ am_htable[i].hctx = NULL;
+ am_htable[i].version = PSM2_AM_HANDLER_V2;
+ }
return PSM2_OK;
+
}
psm2_error_t
@@ -133,11 +137,15 @@ __psm2_am_register_handlers(psm2_ep_t ep,
{
int i, j;
+ psmi_assert_always(ep->am_htable != NULL);
+
PSM2_LOG_MSG("entering");
/* For now just assign any free one */
- for (i = 0, j = 0; i < PSMI_AM_NUM_HANDLERS; i++) {
- if (ep->am_htable[i] == _ignore_handler) {
- ep->am_htable[i] = handlers[j];
+ for (i = 0, j = 0; (i < PSMI_AM_NUM_HANDLERS) && (j < num_handlers); i++) {
+ if (ep->am_htable[i].hfn == _ignore_handler) {
+ ep->am_htable[i].hfn = handlers[j];
+ ep->am_htable[i].hctx = NULL;
+ ep->am_htable[i].version = PSM2_AM_HANDLER_V1;
handlers_idx[j] = i;
if (++j == num_handlers) /* all registered */
break;
@@ -146,8 +154,11 @@ __psm2_am_register_handlers(psm2_ep_t ep,
if (j < num_handlers) {
/* Not enough free handlers, restore unused handlers */
- for (i = 0; i < j; i++)
- ep->am_htable[handlers_idx[i]] = _ignore_handler;
+ for (i = 0; i < j; i++) {
+ ep->am_htable[handlers_idx[i]].hfn = _ignore_handler;
+ ep->am_htable[handlers_idx[i]].hctx = NULL;
+ ep->am_htable[handlers_idx[i]].version = PSM2_AM_HANDLER_V2;
+ }
PSM2_LOG_MSG("leaving");
return psmi_handle_error(ep, PSM2_EP_NO_RESOURCES,
"Insufficient "
@@ -162,6 +173,48 @@ __psm2_am_register_handlers(psm2_ep_t ep,
PSMI_API_DECL(psm2_am_register_handlers)
psm2_error_t
+__psm2_am_register_handlers_2(psm2_ep_t ep,
+ const psm2_am_handler_2_fn_t *handlers,
+ int num_handlers, void **hctx, int *handlers_idx)
+{
+ int i, j;
+
+ psmi_assert_always(ep->am_htable != NULL);
+
+ PSM2_LOG_MSG("entering");
+ /* For now just assign any free one */
+ for (i = 0, j = 0; (i < PSMI_AM_NUM_HANDLERS) && (j < num_handlers); i++) {
+ if (ep->am_htable[i].hfn == _ignore_handler) {
+ ep->am_htable[i].hfn = handlers[j];
+ ep->am_htable[i].hctx = hctx[j];
+ ep->am_htable[i].version = PSM2_AM_HANDLER_V2;
+ handlers_idx[j] = i;
+ if (++j == num_handlers) /* all registered */
+ break;
+ }
+ }
+
+ if (j < num_handlers) {
+ /* Not enough free handlers, restore unused handlers */
+ for (i = 0; i < j; i++) {
+ ep->am_htable[handlers_idx[i]].hfn = _ignore_handler;
+ ep->am_htable[handlers_idx[i]].hctx = NULL;
+ ep->am_htable[handlers_idx[i]].version = PSM2_AM_HANDLER_V2;
+ }
+ PSM2_LOG_MSG("leaving");
+ return psmi_handle_error(ep, PSM2_EP_NO_RESOURCES,
+ "Insufficient "
+ "available AM handlers: registered %d of %d requested handlers",
+ j, num_handlers);
+ }
+ else {
+ PSM2_LOG_MSG("leaving");
+ return PSM2_OK;
+ }
+}
+PSMI_API_DECL(psm2_am_register_handlers_2)
+
+psm2_error_t
__psm2_am_request_short(psm2_epaddr_t epaddr, psm2_handler_t handler,
psm2_amarg_t *args, int nargs, void *src, size_t len,
int flags, psm2_am_completion_fn_t completion_fn,
diff --git a/psm_am_internal.h b/psm_am_internal.h
index 29edfb8..bc2c128 100644
--- a/psm_am_internal.h
+++ b/psm_am_internal.h
@@ -61,7 +61,21 @@
#define PSMI_AM_ARGS_DEFAULT psm2_am_token_t token, \
psm2_amarg_t *args, int nargs, \
- void *src, uint32_t len
+ void *src, uint32_t len, \
+ void *hctx
+
+enum psm2_am_handler_version
+{
+ PSM2_AM_HANDLER_V1 = 0,
+ PSM2_AM_HANDLER_V2,
+};
+
+struct psm2_ep_am_handle_entry
+{
+ void *hfn;
+ void *hctx;
+ enum psm2_am_handler_version version;
+};
struct psmi_am_token {
psm2_epaddr_t epaddr_incoming;
@@ -77,14 +91,14 @@ struct psmi_am_token {
various assertions reference these parameters for sanity checking. */
extern struct psm2_am_parameters psmi_am_parameters;
-PSMI_ALWAYS_INLINE(psm2_am_handler_fn_t
+PSMI_ALWAYS_INLINE(struct psm2_ep_am_handle_entry *
psm_am_get_handler_function(psm2_ep_t ep,
psm2_handler_t handler_idx))
{
int hidx = handler_idx & (PSMI_AM_NUM_HANDLERS - 1);
- psm2_am_handler_fn_t fn = (psm2_am_handler_fn_t) ep->am_htable[hidx];
- psmi_assert_always(fn != NULL);
- return fn;
+ struct psm2_ep_am_handle_entry *hentry = &ep->am_htable[hidx];
+ psmi_assert_always(hentry != NULL);
+ return hentry;
}
/* PSM internal initialization */
diff --git a/psm_context.c b/psm_context.c
index e87b69e..b2181b1 100644
--- a/psm_context.c
+++ b/psm_context.c
@@ -515,8 +515,10 @@ psm2_error_t psmi_context_close(psmi_context_t *context)
/* only unmap the RTAIL if it was enabled in the first place */
if (cinfo->runtime_flags & HFI1_CAP_DMA_RTAIL) {
munmap((void*)PSMI_ALIGNDOWN(binfo->rcvhdrtail_base, __hfi_pg_sz),
- __hfi_pg_sz);
+ __hfi_pg_sz);
}
+ munmap((void*)PSMI_ALIGNDOWN(binfo->user_regbase, __hfi_pg_sz),
+ __hfi_pg_sz);
munmap((void*)PSMI_ALIGNDOWN(binfo->events_bufbase, __hfi_pg_sz),
__hfi_pg_sz);
munmap((void*)PSMI_ALIGNDOWN(binfo->status_bufbase, __hfi_pg_sz),
@@ -669,12 +671,13 @@ psmi_init_userinfo_params(psm2_ep_t ep, int unit_id,
max_contexts = max(env_maxctxt.e_int, 1); /* needs to be non-negative */
ask_contexts = min(max_contexts, avail_contexts); /* needs to be available */
} else if (!psmi_getenv("PSM2_SHAREDCONTEXTS_MAX",
- "Maximum number of contexts for this PSM2 job",
- PSMI_ENVVAR_LEVEL_USER, PSMI_ENVVAR_TYPE_INT,
- (union psmi_envvar_val)avail_contexts, &env_maxctxt)) {
+ "", /* deprecated */
+ PSMI_ENVVAR_LEVEL_HIDDEN | PSMI_ENVVAR_LEVEL_NEVER_PRINT,
+ PSMI_ENVVAR_TYPE_INT,
+ (union psmi_envvar_val)avail_contexts, &env_maxctxt)) {
_HFI_INFO
- ("This env variable is deprecated. Please use PSM2_MAX_CONTEXTS_PER_JOB in future.\n");
+ ("The PSM2_SHAREDCONTEXTS_MAX env variable is deprecated. Please use PSM2_MAX_CONTEXTS_PER_JOB in future.\n");
max_contexts = max(env_maxctxt.e_int, 1); /* needs to be non-negative */
ask_contexts = min(max_contexts, avail_contexts); /* needs to be available */
@@ -717,7 +720,7 @@ psmi_init_userinfo_params(psm2_ep_t ep, int unit_id,
if (contexts > ask_contexts) {
err = psmi_handle_error(NULL, PSM2_EP_NO_DEVICE,
"Incompatible settings for "
- "(PSM2_SHAREDCONTEXTS_MAX / PSM2_MAX_CONTEXTS_PER_JOB) and PSM2_RANKS_PER_CONTEXT");
+ "PSM2_MAX_CONTEXTS_PER_JOB and PSM2_RANKS_PER_CONTEXT");
goto fail;
}
ask_contexts = contexts;
diff --git a/psm_ep.h b/psm_ep.h
index 78b12f1..4354e75 100644
--- a/psm_ep.h
+++ b/psm_ep.h
@@ -173,7 +173,7 @@ struct psm2_ep {
struct psm2_ep *mctxt_master;
/* Active Message handler table */
- void **am_htable;
+ struct psm2_ep_am_handle_entry *am_htable;
uint64_t gid_hi;
uint64_t gid_lo;
diff --git a/psm_ep_connect.c b/psm_ep_connect.c
index 9657209..1eb836f 100644
--- a/psm_ep_connect.c
+++ b/psm_ep_connect.c
@@ -132,7 +132,7 @@ __psm2_ep_connect(psm2_ep_t ep, int num_of_epid, psm2_epid_t const *array_of_epi
if (psmi_epid_version(array_of_epid[j]) >
PSMI_EPID_VERSION) {
psmi_handle_error(PSMI_EP_NORETURN, PSM2_INTERNAL_ERR,
- " Unkown version of EPID - %"PRIu64" \n"
+ " Unknown version of EPID - %"PRIu64" \n"
"Please upgrade PSM2 or set PSM2_ADDR_FMT=1 in the environment to force EPID version 1 \n",
psmi_epid_version(array_of_epid[j]));
}
diff --git a/psm_mq.c b/psm_mq.c
index 44b602a..37290bd 100644
--- a/psm_mq.c
+++ b/psm_mq.c
@@ -1166,6 +1166,38 @@ __psm2_mq_ipeek(psm2_mq_t mq, psm2_mq_req_t *oreq, psm2_mq_status_t *status)
}
PSMI_API_DECL(psm2_mq_ipeek)
+psm2_error_t __psm2_mq_ipeek_dequeue(psm2_mq_t mq, psm2_mq_req_t *oreq)
+{
+ psm2_mq_req_t req;
+
+ PSMI_ASSERT_INITIALIZED();
+ PSMI_LOCK(mq->progress_lock);
+ if (mq->completed_q.first == NULL)
+ psmi_poll_internal(mq->ep, 1);
+ if ((req = mq->completed_q.first) == NULL) {
+ PSMI_UNLOCK(mq->progress_lock);
+ return PSM2_MQ_NO_COMPLETIONS;
+ }
+ mq_qq_remove(&mq->completed_q, req);
+ PSMI_UNLOCK(mq->progress_lock);
+ *oreq = req;
+ return PSM2_OK;
+}
+PSMI_API_DECL(psm2_mq_ipeek_dequeue)
+
+psm2_error_t __psm2_mq_req_free(psm2_mq_t mq, psm2_mq_req_t req)
+{
+ PSMI_ASSERT_INITIALIZED();
+ if (req == NULL)
+ return PSM2_OK;
+ PSMI_LOCK(mq->progress_lock);
+ psmi_mq_req_free(req);
+ PSMI_UNLOCK(mq->progress_lock);
+
+ return PSM2_OK;
+}
+PSMI_API_DECL(psm2_mq_req_free)
+
static
psm2_error_t psmi_mqopt_ctl(psm2_mq_t mq, uint32_t key, void *value, int get)
{
diff --git a/psm_mq_internal.h b/psm_mq_internal.h
index f20bf34..0f30e5c 100644
--- a/psm_mq_internal.h
+++ b/psm_mq_internal.h
@@ -231,7 +231,11 @@ struct psm2_mq_req {
/* Tag matching vars */
psm2_epaddr_t peer;
- psm2_mq_tag_t tag;
+ psm2_mq_tag_t tag __attribute__ ((aligned(16)));/* Alignment added
+ * to preserve the
+ * layout as is
+ * expected by
+ * existent code */
psm2_mq_tag_t tagsel; /* used for receives */
/* Some PTLs want to get notified when there's a test/wait event */
@@ -279,6 +283,8 @@ struct psm2_mq_req {
uint8_t cuda_ipc_handle_attached;
#endif
+ uint64_t user_reserved[4];
+
/* PTLs get to store their own per-request data. MQ manages the allocation
* by allocating psm2_mq_req so that ptl_req_data has enough space for all
* possible PTLs.
diff --git a/psm_perf.c b/psm_perf.c
index f3d7e94..aaf3fd0 100644
--- a/psm_perf.c
+++ b/psm_perf.c
@@ -62,8 +62,14 @@
#include <stdint.h>
#include <stdlib.h>
#include <asm/unistd.h>
+#include <linux/perf_event.h>
+
+/* Configuration */
-struct rdpmc_ctx global_rdpmc_ctx;
+#define RDPMC_PERF_DEFAULT_TYPE (PERF_TYPE_HARDWARE)
+#define RDPMC_PERF_DEFAULT_CONFIG (PERF_COUNT_HW_CPU_CYCLES)
+
+__thread struct rdpmc_ctx global_rdpmc_ctx;
u64 global_rdpmc_begin[RDPMC_PERF_MAX_SLOT_NUMBER];
u64 global_rdpmc_summ[RDPMC_PERF_MAX_SLOT_NUMBER];
@@ -71,8 +77,8 @@ u64 global_rdpmc_number[RDPMC_PERF_MAX_SLOT_NUMBER];
char global_rdpmc_slot_name[RDPMC_PERF_MAX_SLOT_NUMBER][RDPMC_PERF_MAX_SLOT_NAME];
-unsigned int global_rdpmc_type = RDPMC_PERF_DEFAULT_TYPE;
-unsigned int global_rdpmc_config = RDPMC_PERF_DEFAULT_CONFIG;
+__thread unsigned int global_rdpmc_type = RDPMC_PERF_DEFAULT_TYPE;
+__thread unsigned int global_rdpmc_config = RDPMC_PERF_DEFAULT_CONFIG;
struct rdpmc_ctx {
int fd;
@@ -160,36 +166,7 @@ PSMI_ALWAYS_INLINE(void rdpmc_close(struct rdpmc_ctx *ctx))
munmap(ctx->buf, sysconf(_SC_PAGESIZE));
}
-/**
- * rdpmc_read: read a ring 3 readable performance counter
- * @ctx: Pointer to initialized &rdpmc_ctx structure.
- *
- * Read the current value of a running performance counter.
- */
-unsigned long long rdpmc_read(struct rdpmc_ctx *ctx)
-{
- u64 val;
- unsigned seq;
- u64 offset = 0;
-
- typeof (ctx->buf) buf = ctx->buf;
- do {
- seq = buf->lock;
- ips_rmb();
- if (buf->index <= 0)
- return buf->offset;
-#if defined(__ICC) || defined(__INTEL_COMPILER)
- val = _rdpmc(buf->index - 1);
-#else /* GCC */
- val = __builtin_ia32_rdpmc(buf->index - 1);
-#endif
- offset = buf->offset;
- ips_rmb();
- } while (buf->lock != seq);
- return val + offset;
-}
-
-void psmi_rdpmc_perf_framework_init()
+static void psmi_rdpmc_perf_framework_init()
{
int rdpmc_retval;
@@ -243,4 +220,41 @@ void psmi_rdpmc_perf_framework_init()
}
}
+/**
+ * rdpmc_read: read a ring 3 readable performance counter
+ * @ctx: Pointer to initialized &rdpmc_ctx structure.
+ *
+ * Read the current value of a running performance counter.
+ */
+unsigned long long rdpmc_read(struct rdpmc_ctx *ctx)
+{
+ static __thread int rdpmc_perf_initialized = 0;
+
+ if_pf(!rdpmc_perf_initialized)
+ {
+ psmi_rdpmc_perf_framework_init();
+ rdpmc_perf_initialized = 1;
+ }
+
+ u64 val;
+ unsigned seq;
+ u64 offset = 0;
+
+ typeof (ctx->buf) buf = ctx->buf;
+ do {
+ seq = buf->lock;
+ ips_rmb();
+ if (buf->index <= 0)
+ return buf->offset;
+#if defined(__ICC) || defined(__INTEL_COMPILER)
+ val = _rdpmc(buf->index - 1);
+#else /* GCC */
+ val = __builtin_ia32_rdpmc(buf->index - 1);
+#endif
+ offset = buf->offset;
+ ips_rmb();
+ } while (buf->lock != seq);
+ return val + offset;
+}
+
#endif /* RDPMC_PERF_FRAMEWORK */
diff --git a/psm_perf.h b/psm_perf.h
index 6fa06d2..b6b77f0 100644
--- a/psm_perf.h
+++ b/psm_perf.h
@@ -51,21 +51,19 @@
*/
-#ifdef RDPMC_PERF_FRAMEWORK
+#define PSM_TX_SPEEDPATH_CTR 0
+#define PSM_RX_SPEEDPATH_CTR 1
-#include <linux/perf_event.h>
+#ifdef RDPMC_PERF_FRAMEWORK
/* Configuration */
-#define RDPMC_PERF_DEFAULT_TYPE (PERF_TYPE_HARDWARE)
-#define RDPMC_PERF_DEFAULT_CONFIG (PERF_COUNT_HW_CPU_CYCLES)
-
#define RDPMC_PERF_MAX_SLOT_NUMBER (8)
#define RDPMC_PERF_MAX_SLOT_NAME (256)
/* RDPMC infrastructure */
-extern struct rdpmc_ctx global_rdpmc_ctx;
+extern __thread struct rdpmc_ctx global_rdpmc_ctx;
typedef unsigned long long u64;
@@ -75,10 +73,8 @@ extern u64 global_rdpmc_number[RDPMC_PERF_MAX_SLOT_NUMBER];
extern char global_rdpmc_slot_name[RDPMC_PERF_MAX_SLOT_NUMBER][RDPMC_PERF_MAX_SLOT_NAME];
-extern unsigned int global_rdpmc_type;
-extern unsigned int global_rdpmc_config;
-
-extern void psmi_rdpmc_perf_framework_init();
+extern __thread unsigned int global_rdpmc_type;
+extern __thread unsigned int global_rdpmc_config;
extern unsigned long long rdpmc_read(struct rdpmc_ctx *ctx);
diff --git a/psm_user.h b/psm_user.h
index dd5384f..437c983 100644
--- a/psm_user.h
+++ b/psm_user.h
@@ -154,13 +154,13 @@ _psmi_get_epid_version()) {
return psmi_epid_ver;
}
-#define PSMI_EPID_VERSION_SHM 0
+#define PSMI_EPID_VERSION_SHM 0
#define PSMI_EPID_SHM_ONLY 1
#define PSMI_EPID_IPS_SHM 0
#define PSMI_EPID_VERSION _psmi_get_epid_version()
-#define PSMI_MAX_EPID_VERNO_SUPPORTED 2
-#define PSMI_MIN_EPID_VERNO_SUPPORTED 1
-#define PSMI_EPID_VERNO_DEFAULT 2
+#define PSMI_MAX_EPID_VERNO_SUPPORTED 2
+#define PSMI_MIN_EPID_VERNO_SUPPORTED 1
+#define PSMI_EPID_VERNO_DEFAULT 2
#define PSMI_EPID_V1 1
#define PSMI_EPID_V2 2
@@ -240,18 +240,18 @@ _psmi_mutex_unlock_inner(pthread_mutex_t *mutex,
#define _PSMI_LOCK_INIT(pl) /* static initialization */
#define _PSMI_LOCK_TRY(pl) \
- _psmi_mutex_trylock_inner(&((pl).lock), PSMI_CURLOC, \
+ _psmi_mutex_trylock_inner(&((pl).lock), PSMI_CURLOC, \
&((pl).lock_owner))
#define _PSMI_LOCK(pl) \
- _psmi_mutex_lock_inner(&((pl).lock), PSMI_CURLOC, \
+ _psmi_mutex_lock_inner(&((pl).lock), PSMI_CURLOC, \
&((pl).lock_owner))
#define _PSMI_UNLOCK(pl) \
- _psmi_mutex_unlock_inner(&((pl).lock), PSMI_CURLOC, \
+ _psmi_mutex_unlock_inner(&((pl).lock), PSMI_CURLOC, \
&((pl).lock_owner))
#define _PSMI_LOCK_ASSERT(pl) \
- psmi_assert_always(pl.lock_owner == pthread_self());
-#define _PSMI_UNLOCK_ASSERT(pl) \
- psmi_assert_always(pl.lock_owner != pthread_self());
+ psmi_assert_always((pl).lock_owner == pthread_self());
+#define _PSMI_UNLOCK_ASSERT(pl) \
+ psmi_assert_always((pl).lock_owner != pthread_self());
#define PSMI_LOCK_DISABLED 0
#elif defined(PSMI_LOCK_IS_MUTEXLOCK)
@@ -409,7 +409,7 @@ cudaError_t (*psmi_cudaIpcCloseMemHandle)(void* devPtr);
#define PSMI_CUDA_CHECK_EVENT(event, cudaerr) do { \
cudaerr = psmi_cudaEventQuery(event); \
- if ((cudaerr != cudaSuccess) && \
+ if ((cudaerr != cudaSuccess) && \
(cudaerr != cudaErrorNotReady)) { \
_HFI_ERROR( \
"CUDA failure: %s() returned %d\n", \
@@ -420,7 +420,15 @@ cudaError_t (*psmi_cudaIpcCloseMemHandle)(void* devPtr);
} \
} while (0)
-
+#define PSMI_CUDA_DLSYM(psmi_cuda_lib,func) do { \
+ psmi_##func = dlsym(psmi_cuda_lib, STRINGIFY(func)); \
+ if (!psmi_##func) { \
+ psmi_handle_error(PSMI_EP_NORETURN, \
+ PSM2_INTERNAL_ERR, \
+ " Unable to resolve %s symbol" \
+ " in CUDA libraries.\n",STRINGIFY(func));\
+ } \
+} while (0)
PSMI_ALWAYS_INLINE(
int
diff --git a/psm_utils.c b/psm_utils.c
index df45cdd..e5d4fbc 100644
--- a/psm_utils.c
+++ b/psm_utils.c
@@ -401,12 +401,15 @@ static int psmi_getenv_is_verblevel(int printlevel)
return (printlevel <= psmi_getenv_verblevel);
}
-#define GETENV_PRINTF(_level, _fmt, ...) \
- do { \
- int nlevel = _level; \
- if (psmi_getenv_is_verblevel(nlevel)) \
- nlevel = 0; \
- _HFI_ENVDBG(nlevel, _fmt, ##__VA_ARGS__); \
+#define GETENV_PRINTF(_level, _fmt, ...) \
+ do { \
+ if ((_level & PSMI_ENVVAR_LEVEL_NEVER_PRINT) == 0) \
+ { \
+ int nlevel = _level; \
+ if (psmi_getenv_is_verblevel(nlevel)) \
+ nlevel = 0; \
+ _HFI_ENVDBG(nlevel, _fmt, ##__VA_ARGS__); \
+ } \
} while (0)
int
@@ -476,10 +479,14 @@ MOCKABLE(psmi_getenv)(const char *name, const char *descr, int level,
used_default = 1;
} else {
char *ep;
- tval.e_int = (int)strtol(env, &ep, 0);
+ /* Avoid base 8 (octal) on purpose, so don't pass in 0 for radix */
+ tval.e_int = (int)strtol(env, &ep, 10);
if (ep == env) {
- used_default = 1;
- tval = defval;
+ tval.e_int = (int)strtol(env, &ep, 16);
+ if (ep == env) {
+ used_default = 1;
+ tval = defval;
+ }
}
}
_GETENV_PRINT(used_default, "%d", tval.e_int, defval.e_int);
@@ -492,10 +499,14 @@ MOCKABLE(psmi_getenv)(const char *name, const char *descr, int level,
used_default = 1;
} else {
char *ep;
- tval.e_int = (unsigned int)strtoul(env, &ep, 0);
+ /* Avoid base 8 (octal) on purpose, so don't pass in 0 for radix */
+ tval.e_int = (unsigned int)strtoul(env, &ep, 10);
if (ep == env) {
- used_default = 1;
- tval = defval;
+ tval.e_int = (unsigned int)strtoul(env, &ep, 16);
+ if (ep == env) {
+ used_default = 1;
+ tval = defval;
+ }
}
}
if (type == PSMI_ENVVAR_TYPE_UINT_FLAGS)
@@ -512,10 +523,14 @@ MOCKABLE(psmi_getenv)(const char *name, const char *descr, int level,
used_default = 1;
} else {
char *ep;
- tval.e_long = strtol(env, &ep, 0);
+ /* Avoid base 8 (octal) on purpose, so don't pass in 0 for radix */
+ tval.e_long = strtol(env, &ep, 10);
if (ep == env) {
- used_default = 1;
- tval = defval;
+ tval.e_long = strtol(env, &ep, 16);
+ if (ep == env) {
+ used_default = 1;
+ tval = defval;
+ }
}
}
_GETENV_PRINT(used_default, "%ld", tval.e_long, defval.e_long);
@@ -526,11 +541,16 @@ MOCKABLE(psmi_getenv)(const char *name, const char *descr, int level,
used_default = 1;
} else {
char *ep;
+ /* Avoid base 8 (octal) on purpose, so don't pass in 0 for radix */
tval.e_ulonglong =
- (unsigned long long)strtoull(env, &ep, 0);
+ (unsigned long long)strtoull(env, &ep, 10);
if (ep == env) {
- used_default = 1;
- tval = defval;
+ tval.e_ulonglong =
+ (unsigned long long)strtoull(env, &ep, 16);
+ if (ep == env) {
+ used_default = 1;
+ tval = defval;
+ }
}
}
_GETENV_PRINT(used_default, "%llu",
@@ -544,10 +564,14 @@ MOCKABLE(psmi_getenv)(const char *name, const char *descr, int level,
used_default = 1;
} else {
char *ep;
- tval.e_ulong = (unsigned long)strtoul(env, &ep, 0);
+ /* Avoid base 8 (octal) on purpose, so don't pass in 0 for radix */
+ tval.e_ulong = (unsigned long)strtoul(env, &ep, 10);
if (ep == env) {
- used_default = 1;
- tval = defval;
+ tval.e_ulong = (unsigned long)strtoul(env, &ep, 16);
+ if (ep == env) {
+ used_default = 1;
+ tval = defval;
+ }
}
}
if (type == PSMI_ENVVAR_TYPE_ULONG_FLAGS)
diff --git a/psm_utils.h b/psm_utils.h
index 07d198b..3358704 100644
--- a/psm_utils.h
+++ b/psm_utils.h
@@ -259,8 +259,9 @@ union psmi_envvar_val {
unsigned long long e_ulonglong;
};
-#define PSMI_ENVVAR_LEVEL_USER 1
-#define PSMI_ENVVAR_LEVEL_HIDDEN 2
+#define PSMI_ENVVAR_LEVEL_USER 1
+#define PSMI_ENVVAR_LEVEL_HIDDEN 2
+#define PSMI_ENVVAR_LEVEL_NEVER_PRINT 4
#define PSMI_ENVVAR_TYPE_YESNO 0
#define PSMI_ENVVAR_TYPE_STR 1
diff --git a/ptl_am/Makefile b/ptl_am/Makefile
index 5aa5a46..1109e89 100644
--- a/ptl_am/Makefile
+++ b/ptl_am/Makefile
@@ -55,7 +55,6 @@ OUTDIR = .
this_srcdir := $(shell readlink -m .)
top_srcdir := $(this_srcdir)/..
-include $(top_srcdir)/buildflags.mak
INCLUDES += -I$(top_srcdir)
${TARGLIB}-objs := am_reqrep.o am_reqrep_shmem.o ptl.o cmarwu.o
@@ -69,10 +68,10 @@ IGNORE_DEP_TARGETS = clean
all .DEFAULT: ${${TARGLIB}-objs}
$(OUTDIR)/%.d: $(this_srcdir)/%.c
- $(CC) $(CFLAGS) $(INCLUDES) $< -MM -MF $@ -MQ $(@:.d=.o)
+ $(CC) $(CFLAGS) $(BASECFLAGS) $(INCLUDES) $< -MM -MF $@ -MQ $(@:.d=.o)
$(OUTDIR)/%.o: $(this_srcdir)/%.c | ${DEPS}
- $(CC) $(CFLAGS) $(INCLUDES) -c $< -o $@
+ $(CC) $(CFLAGS) $(BASECFLAGS) $(INCLUDES) -c $< -o $@
clean:
@if [ -d $(OUTDIR) ]; then \
diff --git a/ptl_am/ptl.c b/ptl_am/ptl.c
index 1f20cdf..99479c5 100644
--- a/ptl_am/ptl.c
+++ b/ptl_am/ptl.c
@@ -350,15 +350,27 @@ void
psmi_am_handler(void *toki, psm2_amarg_t *args, int narg, void *buf, size_t len)
{
amsh_am_token_t *tok = (amsh_am_token_t *) toki;
- psm2_am_handler_fn_t hfn;
+ struct psm2_ep_am_handle_entry *hentry;
psmi_assert(toki != NULL);
- hfn = psm_am_get_handler_function(tok->mq->ep,
+ hentry = psm_am_get_handler_function(tok->mq->ep,
(psm2_handler_t) args[0].u32w0);
+ /* Note a guard here for hentry != NULL is not needed because at
+ * initialization, a psmi_assert_always() assure the entry will be
+ * non-NULL. */
+
/* Invoke handler function. For AM we do not support break functionality */
- hfn(toki, args + 1, narg - 1, buf, len);
+ if (likely(hentry->version == PSM2_AM_HANDLER_V2)) {
+ psm2_am_handler_2_fn_t hfn2 =
+ (psm2_am_handler_2_fn_t)hentry->hfn;
+ hfn2(toki, args + 1, narg - 1, buf, len, hentry->hctx);
+ } else {
+ psm2_am_handler_fn_t hfn1 =
+ (psm2_am_handler_fn_t)hentry->hfn;
+ hfn1(toki, args + 1, narg - 1, buf, len);
+ }
return;
}
diff --git a/ptl_ips/Makefile b/ptl_ips/Makefile
index d48c883..86e2055 100644
--- a/ptl_ips/Makefile
+++ b/ptl_ips/Makefile
@@ -55,7 +55,6 @@ OUTDIR = .
this_srcdir = $(shell readlink -m .)
top_srcdir := $(this_srcdir)/..
-include $(top_srcdir)/buildflags.mak
INCLUDES += -I$(top_srcdir)
${TARGLIB}-objs := ptl.o ptl_rcvthread.o ips_proto.o ipserror.o ips_recvq.o \
@@ -74,10 +73,10 @@ IGNORE_DEP_TARGETS = clean
all .DEFAULT: ${${TARGLIB}-objs}
$(OUTDIR)/%.d: $(this_srcdir)/%.c
- $(CC) $(CFLAGS) $(INCLUDES) $< -MM -MF $@ -MQ $(@:.d=.o)
+ $(CC) $(CFLAGS) $(BASECFLAGS) $(INCLUDES) $< -MM -MF $@ -MQ $(@:.d=.o)
$(OUTDIR)/%.o: $(this_srcdir)/%.c | ${DEPS}
- $(CC) $(CFLAGS) $(INCLUDES) -c $< -o $@
+ $(CC) $(CFLAGS) $(BASECFLAGS) $(INCLUDES) -c $< -o $@
clean:
@if [ -d $(OUTDIR) ]; then \
diff --git a/ptl_ips/ips_path_rec.c b/ptl_ips/ips_path_rec.c
index 647b111..1d52a55 100644
--- a/ptl_ips/ips_path_rec.c
+++ b/ptl_ips/ips_path_rec.c
@@ -659,7 +659,7 @@ MOCKABLE(ips_ibta_init)(struct ips_proto *proto)
_HFI_PRDBG("Static path selection: Base LID\n");
psmi_getenv("PSM2_DISABLE_CCA",
- "Disable use of Congestion Control Architecure (CCA) [enabled] ",
+ "Disable use of Congestion Control Architecture (CCA) [enabled] ",
PSMI_ENVVAR_LEVEL_USER, PSMI_ENVVAR_TYPE_UINT,
(union psmi_envvar_val)0, &disable_cca);
if (disable_cca.e_uint)
diff --git a/ptl_ips/ips_proto.c b/ptl_ips/ips_proto.c
index 150bda1..2c4ebd9 100644
--- a/ptl_ips/ips_proto.c
+++ b/ptl_ips/ips_proto.c
@@ -1097,6 +1097,10 @@ ips_proto_timer_ctrlq_callback(struct psmi_timer *timer, uint64_t t_cyc_expire)
while (ctrlq->ctrlq_cqe[ctrlq->ctrlq_tail].msg_queue_mask) {
cqe = &ctrlq->ctrlq_cqe[ctrlq->ctrlq_tail];
+ /* When PSM_PERF is enabled, the following line causes the
+ PMU to start a stop watch to measure instruction cycles of the
+ TX speedpath of PSM. The stop watch is stopped below. */
+ GENERIC_PERF_BEGIN(PSM_TX_SPEEDPATH_CTR);
if (cqe->msg_scb.flow->transfer == PSM_TRANSFER_PIO) {
err = ips_spio_transfer_frame(proto,
cqe->msg_scb.flow, &cqe->msg_scb.pbc,
@@ -1112,6 +1116,10 @@ ips_proto_timer_ctrlq_callback(struct psmi_timer *timer, uint64_t t_cyc_expire)
cqe->msg_scb.cksum, 0,
have_cksum, cqe->msg_scb.cksum[0]);
}
+ /* When PSM_PERF is enabled, the following line causes the
+ PMU to stop a stop watch to measure instruction cycles of the
+ TX speedpath of PSM. The stop watch was started above. */
+ GENERIC_PERF_END(PSM_TX_SPEEDPATH_CTR);
if (err == PSM2_OK) {
ips_proto_epaddr_stats_set(proto, cqe->message_type);
@@ -1197,6 +1205,10 @@ ips_proto_send_ctrl_message(struct ips_flow *flow, uint8_t message_type,
switch (flow->transfer) {
case PSM_TRANSFER_PIO:
+ /* When PSM_PERF is enabled, the following line causes the
+ PMU to start a stop watch to measure instruction cycles of the
+ TX speedpath of PSM. The stop watch is stopped below. */
+ GENERIC_PERF_BEGIN(PSM_TX_SPEEDPATH_CTR);
err = ips_spio_transfer_frame(proto, flow,
&ctrlscb->pbc, payload, paylen,
PSMI_TRUE, have_cksum, ctrlscb->cksum[0]
@@ -1204,11 +1216,23 @@ ips_proto_send_ctrl_message(struct ips_flow *flow, uint8_t message_type,
, 0
#endif
);
+ /* When PSM_PERF is enabled, the following line causes the
+ PMU to stop a stop watch to measure instruction cycles of the
+ TX speedpath of PSM. The stop watch was started above. */
+ GENERIC_PERF_END(PSM_TX_SPEEDPATH_CTR);
break;
case PSM_TRANSFER_DMA:
+ /* When PSM_PERF is enabled, the following line causes the
+ PMU to start a stop watch to measure instruction cycles of the
+ TX speedpath of PSM. The stop watch is stopped below. */
+ GENERIC_PERF_BEGIN(PSM_TX_SPEEDPATH_CTR);
err = ips_dma_transfer_frame(proto, flow,
ctrlscb, payload, paylen,
have_cksum, ctrlscb->cksum[0]);
+ /* When PSM_PERF is enabled, the following line causes the
+ PMU to stop a stop watch to measure instruction cycles of the
+ TX speedpath of PSM. The stop watch was started above. */
+ GENERIC_PERF_END(PSM_TX_SPEEDPATH_CTR);
break;
default:
err = PSM2_INTERNAL_ERR;
@@ -1347,6 +1371,10 @@ ips_proto_flow_flush_pio(struct ips_flow *flow, int *nflushed)
scb = SLIST_FIRST(scb_pend);
psmi_assert(scb->nfrag == 1);
+ /* When PSM_PERF is enabled, the following line causes the
+ PMU to start a stop watch to measure instruction cycles of the
+ TX speedpath of PSM. The stop watch is stopped below. */
+ GENERIC_PERF_BEGIN(PSM_TX_SPEEDPATH_CTR);
if ((err = ips_spio_transfer_frame(proto, flow, &scb->pbc,
ips_scb_buffer(scb),
scb->payload_size,
@@ -1359,6 +1387,10 @@ ips_proto_flow_flush_pio(struct ips_flow *flow, int *nflushed)
, IS_TRANSFER_BUF_GPU_MEM(scb)
#endif
)) == PSM2_OK) {
+ /* When PSM_PERF is enabled, the following line causes the
+ PMU to stop a stop watch to measure instruction cycles of the
+ TX speedpath of PSM. The stop watch was started above. */
+ GENERIC_PERF_END(PSM_TX_SPEEDPATH_CTR);
t_cyc = get_cycles();
scb->flags &= ~IPS_SEND_FLAG_PENDING;
scb->ack_timeout = proto->epinfo.ep_timeout_ack;
@@ -1373,7 +1405,13 @@ ips_proto_flow_flush_pio(struct ips_flow *flow, int *nflushed)
#endif
} else
+ {
+ /* When PSM_PERF is enabled, the following line causes the
+ PMU to stop a stop watch to measure instruction cycles of the
+ TX speedpath of PSM. The stop watch was started above. */
+ GENERIC_PERF_END(PSM_TX_SPEEDPATH_CTR);
break;
+ }
}
/* If out of flow credits re-schedule send timer */
diff --git a/ptl_ips/ips_proto_am.c b/ptl_ips/ips_proto_am.c
index 98a7460..f5eb1cf 100644
--- a/ptl_ips/ips_proto_am.c
+++ b/ptl_ips/ips_proto_am.c
@@ -410,7 +410,8 @@ ips_am_run_handler(const struct ips_message_header *p_hdr,
{
struct ips_am_token token;
int nargs = p_hdr->amhdr_nargs;
- psm2_am_handler_fn_t hfn;
+ int ret;
+ struct psm2_ep_am_handle_entry *hentry;
psm2_amarg_t *args = (psm2_amarg_t *)p_hdr->data;
token.tok.flags = p_hdr->flags;
@@ -449,10 +450,23 @@ ips_am_run_handler(const struct ips_message_header *p_hdr,
paylen -= p_hdr->amhdr_len;
}
- hfn = psm_am_get_handler_function(proto_am->proto->ep,
+ hentry = psm_am_get_handler_function(proto_am->proto->ep,
p_hdr->amhdr_hidx);
- int ret = hfn(&token, args, nargs, payload, paylen);
+ /* Note a guard here for hentry != NULL is not needed because at
+ * initialization, a psmi_assert_always() assure the entry will be
+ * non-NULL. */
+
+ if (likely(hentry->version == PSM2_AM_HANDLER_V2)) {
+ psm2_am_handler_2_fn_t hfn2 =
+ (psm2_am_handler_2_fn_t)hentry->hfn;
+ ret = hfn2(&token, args, nargs, payload, paylen, hentry->hctx);
+ } else {
+ psm2_am_handler_fn_t hfn1 =
+ (psm2_am_handler_fn_t)hentry->hfn;
+ ret = hfn1(&token, args, nargs, payload, paylen);
+ }
+
return ret;
}
diff --git a/ptl_ips/ips_recvhdrq.c b/ptl_ips/ips_recvhdrq.c
index 4b2617f..7c61399 100644
--- a/ptl_ips/ips_recvhdrq.c
+++ b/ptl_ips/ips_recvhdrq.c
@@ -438,6 +438,10 @@ process_pending_acks(struct ips_recvhdrq *recvq))
*/
psm2_error_t ips_recvhdrq_progress(struct ips_recvhdrq *recvq)
{
+ /* When PSM_PERF is enabled, the following line causes the
+ PMU to start a stop watch to measure instruction cycles of the
+ RX speedpath of PSM. The stop watch is stopped below. */
+ GENERIC_PERF_BEGIN(PSM_RX_SPEEDPATH_CTR);
struct ips_recvhdrq_state *state = recvq->state;
const __le32 *rhf;
PSMI_CACHEALIGN struct ips_recvhdrq_event rcv_ev = {.proto =
@@ -545,6 +549,11 @@ psm2_error_t ips_recvhdrq_progress(struct ips_recvhdrq *recvq)
if (ret == IPS_RECVHDRQ_REVISIT)
{
PSM2_LOG_MSG("leaving");
+ /* When PSM_PERF is enabled, the following line causes the
+ PMU to stop a stop watch to measure instruction cycles of
+ the RX speedpath of PSM. The stop watch was started
+ above. */
+ GENERIC_PERF_END(PSM_RX_SPEEDPATH_CTR);
return PSM2_OK_NO_PROGRESS;
}
@@ -631,6 +640,11 @@ psm2_error_t ips_recvhdrq_progress(struct ips_recvhdrq *recvq)
if (ret == IPS_RECVHDRQ_REVISIT)
{
PSM2_LOG_MSG("leaving");
+ /* When PSM_PERF is enabled, the following line causes the
+ PMU to stop a stop watch to measure instruction cycles of
+ the RX speedpath of PSM. The stop watch was started
+ above. */
+ GENERIC_PERF_END(PSM_RX_SPEEDPATH_CTR);
return PSM2_OK_NO_PROGRESS;
}
}
@@ -726,6 +740,11 @@ skip_packet_no_egr_update:
process_pending_acks(recvq);
PSM2_LOG_MSG("leaving");
+ /* When PSM_PERF is enabled, the following line causes the
+ PMU to stop a stop watch to measure instruction cycles of
+ the RX speedpath of PSM. The stop watch was started
+ above. */
+ GENERIC_PERF_END(PSM_RX_SPEEDPATH_CTR);
return num_hdrq_done ? PSM2_OK : PSM2_OK_NO_PROGRESS;
}
diff --git a/ptl_ips/ips_tidcache.c b/ptl_ips/ips_tidcache.c
index ecc0bba..aad1ee9 100644
--- a/ptl_ips/ips_tidcache.c
+++ b/ptl_ips/ips_tidcache.c
@@ -203,14 +203,21 @@ retry:
* PSM frees tidcache enteries when the driver sends
* EINVAL there by unpinning pages and freeing some
* BAR1 space.*/
- || (PSMI_IS_CUDA_ENABLED && errno == EINVAL)
+ || (PSMI_IS_CUDA_ENABLED && PSMI_IS_CUDA_MEM((void*)start) && errno == EINVAL)
#endif
) && NIDLE) {
uint64_t lengthEvicted = ips_tidcache_evict(tidc,length);
if (lengthEvicted >= length)
goto retry;
- }
+ } else if (errno == EFAULT)
+ psmi_handle_error(PSMI_EP_NORETURN, PSM2_INTERNAL_ERR,
+ " Unhandled error in TID Update: %s\n", strerror(errno));
+#ifdef PSM_CUDA
+ else if (PSMI_IS_CUDA_ENABLED && errno == ENOTSUP)
+ psmi_handle_error(PSMI_EP_NORETURN, PSM2_INTERNAL_ERR,
+ " Nvidia driver apis mismatch: %s\n", strerror(errno));
+#endif
/* Unable to pin pages? retry later */
return PSM2_EP_DEVICE_FAILURE;
diff --git a/ptl_self/Makefile b/ptl_self/Makefile
index daeac5b..6af8bf7 100644
--- a/ptl_self/Makefile
+++ b/ptl_self/Makefile
@@ -55,7 +55,6 @@ OUTDIR = .
this_srcdir = $(shell readlink -m .)
top_srcdir := $(this_srcdir)/..
-include $(top_srcdir)/buildflags.mak
INCLUDES += -I$(top_srcdir)
${TARGLIB}-objs := ptl.o
@@ -68,10 +67,10 @@ IGNORE_DEP_TARGETS = clean
all .DEFAULT: ${${TARGLIB}-objs}
$(OUTDIR)/%.d: $(this_srcdir)/%.c
- $(CC) $(CFLAGS) $(INCLUDES) $< -MM -MF $@ -MQ $(@:.d=.o)
+ $(CC) $(CFLAGS) $(BASECFLAGS) $(INCLUDES) $< -MM -MF $@ -MQ $(@:.d=.o)
$(OUTDIR)/%.o: $(this_srcdir)/%.c | ${DEPS}
- $(CC) $(CFLAGS) $(INCLUDES) -c $< -o $@
+ $(CC) $(CFLAGS) $(BASECFLAGS) $(INCLUDES) -c $< -o $@
clean:
@if [ -d $(OUTDIR) ]; then \
diff --git a/ptl_self/ptl.c b/ptl_self/ptl.c
index da613d9..4e42bef 100644
--- a/ptl_self/ptl.c
+++ b/ptl_self/ptl.c
@@ -223,14 +223,27 @@ self_am_short_request(psm2_epaddr_t epaddr,
psm2_am_completion_fn_t completion_fn,
void *completion_ctxt)
{
- psm2_am_handler_fn_t hfn;
+ struct psm2_ep_am_handle_entry *hentry;
psm2_ep_t ep = epaddr->ptlctl->ptl->ep;
struct psmi_am_token tok;
tok.epaddr_incoming = epaddr;
- hfn = psm_am_get_handler_function(ep, handler);
- hfn(&tok, args, nargs, src, len);
+ hentry = psm_am_get_handler_function(ep, handler);
+
+ /* Note a guard here for hentry != NULL is not needed because at
+ * initialization, a psmi_assert_always() assure the entry will be
+ * non-NULL. */
+
+ if (likely(hentry->version == PSM2_AM_HANDLER_V2)) {
+ psm2_am_handler_2_fn_t hfn2 =
+ (psm2_am_handler_2_fn_t)hentry->hfn;
+ hfn2(&tok, args, nargs, src, len, hentry->hctx);
+ } else {
+ psm2_am_handler_fn_t hfn1 =
+ (psm2_am_handler_fn_t)hentry->hfn;
+ hfn1(&tok, args, nargs, src, len);
+ }
if (completion_fn) {
completion_fn(completion_ctxt);
@@ -246,12 +259,25 @@ self_am_short_reply(psm2_am_token_t token,
void *src, size_t len, int flags,
psm2_am_completion_fn_t completion_fn, void *completion_ctxt)
{
- psm2_am_handler_fn_t hfn;
+ struct psm2_ep_am_handle_entry *hentry;
struct psmi_am_token *tok = token;
psm2_ep_t ep = tok->epaddr_incoming->ptlctl->ptl->ep;
- hfn = psm_am_get_handler_function(ep, handler);
- hfn(token, args, nargs, src, len);
+ hentry = psm_am_get_handler_function(ep, handler);
+
+ /* Note a guard here for hentry != NULL is not needed because at
+ * initialization, a psmi_assert_always() assure the entry will be
+ * non-NULL. */
+
+ if (likely(hentry->version == PSM2_AM_HANDLER_V2)) {
+ psm2_am_handler_2_fn_t hfn2 =
+ (psm2_am_handler_2_fn_t)hentry->hfn;
+ hfn2(token, args, nargs, src, len, hentry->hctx);
+ } else {
+ psm2_am_handler_fn_t hfn1 =
+ (psm2_am_handler_fn_t)hentry->hfn;
+ hfn1(token, args, nargs, src, len);
+ }
if (completion_fn) {
completion_fn(completion_ctxt);
diff --git a/rpm_release_extension b/rpm_release_extension
index 98d9bcb..81b5c5d 100644
--- a/rpm_release_extension
+++ b/rpm_release_extension
@@ -1 +1 @@
-17
+37
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-ofed/libpsm2.git
More information about the Pkg-ofed-commits
mailing list