[Pkg-ofed-commits] [dapl] 01/06: Imported Upstream version 2.1.6

Ana Beatriz Guerrero López ana at moszumanska.debian.org
Fri Mar 25 17:45:17 UTC 2016


This is an automated email from the git hooks/post-receive script.

ana pushed a commit to branch master
in repository dapl.

commit c11fc1052b7be61455c38d6e1efce1e7628f10d4
Author: Ana Beatriz Guerrero Lopez <ana at debian.org>
Date:   Fri Mar 25 18:44:18 2016 +0100

    Imported Upstream version 2.1.6
---
 AUTHORS                                          |    0
 COPYING                                          |    0
 ChangeLog                                        |  317 +++
 INSTALL                                          |    0
 LICENSE.txt                                      |    0
 LICENSE2.txt                                     |    0
 LICENSE3.txt                                     |    0
 NEWS                                             |    0
 README                                           | 2828 ++++++++++++++++++----
 README.mcm                                       |  309 +--
 config.h.in                                      |    0
 config/ltmain.sh                                 |    0
 configure                                        |   20 +-
 configure.in                                     |    4 +-
 dapl.spec                                        |    5 +-
 dapl.spec.in                                     |    3 +
 dapl/common/dapl_cookie.c                        |    0
 dapl/common/dapl_cookie.h                        |    0
 dapl/common/dapl_cr_accept.c                     |    0
 dapl/common/dapl_cr_callback.c                   |    0
 dapl/common/dapl_cr_handoff.c                    |    0
 dapl/common/dapl_cr_query.c                      |    0
 dapl/common/dapl_cr_util.c                       |    0
 dapl/common/dapl_cr_util.h                       |    0
 dapl/common/dapl_debug.c                         |    0
 dapl/common/dapl_ep_create.c                     |    0
 dapl/common/dapl_ep_create_with_srq.c            |    0
 dapl/common/dapl_ep_disconnect.c                 |    0
 dapl/common/dapl_ep_dup_connect.c                |    0
 dapl/common/dapl_ep_free.c                       |    0
 dapl/common/dapl_ep_get_status.c                 |    0
 dapl/common/dapl_ep_modify.c                     |    0
 dapl/common/dapl_ep_post_rdma_read.c             |    0
 dapl/common/dapl_ep_post_rdma_write.c            |    0
 dapl/common/dapl_ep_post_recv.c                  |    0
 dapl/common/dapl_ep_post_send.c                  |    0
 dapl/common/dapl_ep_query.c                      |    0
 dapl/common/dapl_ep_recv_query.c                 |    0
 dapl/common/dapl_ep_reset.c                      |    0
 dapl/common/dapl_ep_set_watermark.c              |    0
 dapl/common/dapl_ep_util.c                       |    0
 dapl/common/dapl_ep_util.h                       |    0
 dapl/common/dapl_evd_connection_callb.c          |    0
 dapl/common/dapl_evd_cq_async_error_callb.c      |    0
 dapl/common/dapl_evd_dequeue.c                   |    0
 dapl/common/dapl_evd_post_se.c                   |    0
 dapl/common/dapl_evd_qp_async_error_callb.c      |    0
 dapl/common/dapl_evd_resize.c                    |    0
 dapl/common/dapl_evd_un_async_error_callb.c      |    0
 dapl/common/dapl_evd_util.c                      |    1 +
 dapl/common/dapl_evd_util.h                      |    0
 dapl/common/dapl_get_consumer_context.c          |    0
 dapl/common/dapl_get_handle_type.c               |    0
 dapl/common/dapl_hash.c                          |    0
 dapl/common/dapl_hash.h                          |    0
 dapl/common/dapl_hca_util.c                      |    0
 dapl/common/dapl_hca_util.h                      |    0
 dapl/common/dapl_ia_close.c                      |    0
 dapl/common/dapl_ia_open.c                       |    0
 dapl/common/dapl_ia_util.h                       |    0
 dapl/common/dapl_init.h                          |    0
 dapl/common/dapl_llist.c                         |    0
 dapl/common/dapl_lmr_free.c                      |    0
 dapl/common/dapl_lmr_query.c                     |    0
 dapl/common/dapl_lmr_sync_rdma_read.c            |    0
 dapl/common/dapl_lmr_sync_rdma_write.c           |    0
 dapl/common/dapl_lmr_util.c                      |    0
 dapl/common/dapl_lmr_util.h                      |    0
 dapl/common/dapl_mr_util.c                       |    0
 dapl/common/dapl_mr_util.h                       |    0
 dapl/common/dapl_name_service.c                  |    0
 dapl/common/dapl_name_service.h                  |    0
 dapl/common/dapl_provider.h                      |    0
 dapl/common/dapl_psp_create.c                    |    0
 dapl/common/dapl_psp_create_any.c                |    0
 dapl/common/dapl_psp_free.c                      |    0
 dapl/common/dapl_psp_query.c                     |    0
 dapl/common/dapl_pz_create.c                     |    0
 dapl/common/dapl_pz_free.c                       |    0
 dapl/common/dapl_pz_query.c                      |    0
 dapl/common/dapl_pz_util.c                       |    0
 dapl/common/dapl_pz_util.h                       |    0
 dapl/common/dapl_ring_buffer_util.c              |    0
 dapl/common/dapl_ring_buffer_util.h              |    0
 dapl/common/dapl_rmr_free.c                      |    0
 dapl/common/dapl_rmr_query.c                     |    0
 dapl/common/dapl_rmr_util.c                      |    0
 dapl/common/dapl_rmr_util.h                      |    0
 dapl/common/dapl_rsp_create.c                    |    0
 dapl/common/dapl_rsp_free.c                      |    0
 dapl/common/dapl_rsp_query.c                     |    0
 dapl/common/dapl_set_consumer_context.c          |    0
 dapl/common/dapl_sp_util.c                       |    0
 dapl/common/dapl_sp_util.h                       |    0
 dapl/common/dapl_srq_create.c                    |    0
 dapl/common/dapl_srq_free.c                      |    0
 dapl/common/dapl_srq_post_recv.c                 |    0
 dapl/common/dapl_srq_query.c                     |    0
 dapl/common/dapl_srq_resize.c                    |    0
 dapl/common/dapl_srq_set_lw.c                    |    0
 dapl/common/dapl_srq_util.c                      |    0
 dapl/common/dapl_srq_util.h                      |    0
 dapl/common/dapl_timer_util.c                    |    0
 dapl/common/dapl_timer_util.h                    |    0
 dapl/include/dapl.h                              |    1 +
 dapl/include/dapl_debug.h                        |    0
 dapl/include/dapl_ipoib_names.h                  |    0
 dapl/include/dapl_vendor.h                       |    0
 dapl/openib_cma/cm.c                             |    0
 dapl/openib_cma/device.c                         |    0
 dapl/openib_cma/linux/openib_osd.h               |    0
 dapl/openib_common/collectives/fca_provider.c    |    0
 dapl/openib_common/collectives/fca_provider.h    |    0
 dapl/openib_common/collectives/ib_collectives.h  |    0
 dapl/openib_common/cq.c                          |   18 +-
 dapl/openib_common/dapl_ib_common.h              |    0
 dapl/openib_common/dapl_ib_dto.h                 |    5 +-
 dapl/openib_common/dapl_mic_common.h             |  100 +-
 dapl/openib_common/ib_extensions.c               |    4 +-
 dapl/openib_common/mem.c                         |  150 +-
 dapl/openib_common/qp.c                          |   45 +-
 dapl/openib_common/srq.c                         |    0
 dapl/openib_common/util.c                        |   58 +-
 dapl/openib_mcm/cm.c                             |  130 +-
 dapl/openib_mcm/dapl_ib_util.h                   |    6 +
 dapl/openib_mcm/device.c                         |  203 +-
 dapl/openib_mcm/linux/openib_osd.h               |    0
 dapl/openib_mcm/mix.c                            |  390 ++-
 dapl/openib_mcm/proxy.c                          |    8 +-
 dapl/openib_scm/cm.c                             |    0
 dapl/openib_scm/dapl_ib_util.h                   |    0
 dapl/openib_scm/device.c                         |    0
 dapl/openib_scm/linux/openib_osd.h               |    0
 dapl/openib_ucm/cm.c                             |    0
 dapl/openib_ucm/dapl_ib_util.h                   |    7 +
 dapl/openib_ucm/device.c                         |   25 +-
 dapl/openib_ucm/linux/openib_osd.h               |    0
 dapl/svc/mcm.c                                   |  114 +-
 dapl/svc/mix.c                                   |  316 ++-
 dapl/svc/mpxy_in.c                               |   73 +-
 dapl/svc/mpxy_out.c                              |   54 +-
 dapl/svc/mpxyd.c                                 |   42 +-
 dapl/svc/mpxyd.h                                 |   16 +-
 dapl/svc/util.c                                  |    3 +-
 dapl/udapl/dapl_cno_create.c                     |    0
 dapl/udapl/dapl_cno_free.c                       |    0
 dapl/udapl/dapl_cno_modify_agent.c               |    0
 dapl/udapl/dapl_cno_query.c                      |    0
 dapl/udapl/dapl_cno_wait.c                       |    0
 dapl/udapl/dapl_evd_clear_unwaitable.c           |    0
 dapl/udapl/dapl_evd_create.c                     |    0
 dapl/udapl/dapl_evd_disable.c                    |    0
 dapl/udapl/dapl_evd_enable.c                     |    0
 dapl/udapl/dapl_evd_modify_cno.c                 |    0
 dapl/udapl/dapl_evd_query.c                      |    0
 dapl/udapl/dapl_evd_set_unwaitable.c             |    0
 dapl/udapl/dapl_evd_wait.c                       |    0
 dapl/udapl/dapl_init.c                           |    0
 dapl/udapl/dapl_lmr_create.c                     |    0
 dapl/udapl/libdaplomcm.map                       |    0
 dapl/udapl/linux/dapl_osd.c                      |    0
 dapl/udapl/linux/dapl_osd.h                      |    0
 dat/common/dat_dictionary.c                      |    0
 dat/common/dat_dictionary.h                      |    0
 dat/common/dat_dr.c                              |    0
 dat/common/dat_dr.h                              |    0
 dat/common/dat_init.c                            |    0
 dat/common/dat_init.h                            |    0
 dat/common/dat_sr.h                              |    0
 dat/common/dat_strerror.c                        |    0
 dat/include/dat2/dat_error.h                     |    0
 dat/include/dat2/dat_platform_specific.h         |    0
 dat/include/dat2/dat_registry.h                  |    0
 dat/include/dat2/dat_vendor_specific.h           |    0
 dat/include/dat2/udat_config.h                   |    0
 dat/include/dat2/udat_vendor_specific.h          |    0
 dat/udat/linux/dat_osd.c                         |    0
 dat/udat/linux/dat_osd.h                         |    0
 dat/udat/udat_api.c                              |    0
 dat/udat/udat_sr_parser.c                        |    0
 dat/udat/udat_sr_parser.h                        |    0
 doc/dat.conf                                     |    6 +-
 doc/mpxyd.conf                                   |    0
 m4/libtool.m4                                    |    0
 m4/ltoptions.m4                                  |    0
 m4/ltsugar.m4                                    |    0
 m4/ltversion.m4                                  |    0
 m4/lt~obsolete.m4                                |    0
 man/dapltest.1                                   |    0
 man/dat.conf.5                                   |    0
 mpxyd.init.in                                    |    0
 test/dapltest/README                             |    0
 test/dapltest/cmd/dapl_fft_cmd.c                 |    0
 test/dapltest/cmd/dapl_getopt.c                  |    0
 test/dapltest/cmd/dapl_limit_cmd.c               |    0
 test/dapltest/cmd/dapl_main.c                    |    0
 test/dapltest/cmd/dapl_netaddr.c                 |    0
 test/dapltest/cmd/dapl_params.c                  |    0
 test/dapltest/cmd/dapl_performance_cmd.c         |    0
 test/dapltest/cmd/dapl_qos_util.c                |    0
 test/dapltest/cmd/dapl_quit_cmd.c                |    2 +-
 test/dapltest/cmd/dapl_server_cmd.c              |    0
 test/dapltest/cmd/dapl_transaction_cmd.c         |    0
 test/dapltest/common/dapl_endian.c               |    0
 test/dapltest/common/dapl_global.c               |    0
 test/dapltest/common/dapl_performance_cmd_util.c |    0
 test/dapltest/common/dapl_quit_cmd_util.c        |    0
 test/dapltest/common/dapl_transaction_cmd_util.c |    0
 test/dapltest/include/dapl_bpool.h               |    0
 test/dapltest/include/dapl_client_info.h         |    0
 test/dapltest/include/dapl_common.h              |    0
 test/dapltest/include/dapl_execute.h             |    0
 test/dapltest/include/dapl_fft_cmd.h             |    0
 test/dapltest/include/dapl_fft_util.h            |    0
 test/dapltest/include/dapl_getopt.h              |    0
 test/dapltest/include/dapl_global.h              |    0
 test/dapltest/include/dapl_limit_cmd.h           |    0
 test/dapltest/include/dapl_mdep.h                |    0
 test/dapltest/include/dapl_memlist.h             |    0
 test/dapltest/include/dapl_params.h              |    0
 test/dapltest/include/dapl_performance_cmd.h     |    0
 test/dapltest/include/dapl_performance_stats.h   |    0
 test/dapltest/include/dapl_performance_test.h    |    0
 test/dapltest/include/dapl_proto.h               |    0
 test/dapltest/include/dapl_quit_cmd.h            |    0
 test/dapltest/include/dapl_server_cmd.h          |    0
 test/dapltest/include/dapl_server_info.h         |    0
 test/dapltest/include/dapl_tdep.h                |    0
 test/dapltest/include/dapl_tdep_print.h          |    0
 test/dapltest/include/dapl_test_data.h           |    0
 test/dapltest/include/dapl_transaction_cmd.h     |    0
 test/dapltest/include/dapl_transaction_stats.h   |    0
 test/dapltest/include/dapl_transaction_test.h    |    0
 test/dapltest/include/dapl_version.h             |    0
 test/dapltest/mdep/linux/dapl_mdep_user.c        |    0
 test/dapltest/test/dapl_bpool.c                  |    0
 test/dapltest/test/dapl_client.c                 |    0
 test/dapltest/test/dapl_client_info.c            |    0
 test/dapltest/test/dapl_cnxn.c                   |    0
 test/dapltest/test/dapl_execute.c                |    0
 test/dapltest/test/dapl_fft_connmgt.c            |    0
 test/dapltest/test/dapl_fft_endpoint.c           |    0
 test/dapltest/test/dapl_fft_hwconn.c             |    0
 test/dapltest/test/dapl_fft_mem.c                |    0
 test/dapltest/test/dapl_fft_pz.c                 |    0
 test/dapltest/test/dapl_fft_queryinfo.c          |    0
 test/dapltest/test/dapl_fft_test.c               |    0
 test/dapltest/test/dapl_fft_util.c               |    0
 test/dapltest/test/dapl_limit.c                  |    0
 test/dapltest/test/dapl_memlist.c                |    0
 test/dapltest/test/dapl_performance_client.c     |    0
 test/dapltest/test/dapl_performance_server.c     |    0
 test/dapltest/test/dapl_performance_stats.c      |    0
 test/dapltest/test/dapl_performance_util.c       |    0
 test/dapltest/test/dapl_quit_util.c              |    0
 test/dapltest/test/dapl_server.c                 |    0
 test/dapltest/test/dapl_server_info.c            |    0
 test/dapltest/test/dapl_test_data.c              |    0
 test/dapltest/test/dapl_test_util.c              |    0
 test/dapltest/test/dapl_thread.c                 |    0
 test/dapltest/test/dapl_transaction_stats.c      |    0
 test/dapltest/test/dapl_transaction_test.c       |    0
 test/dapltest/test/dapl_transaction_util.c       |    0
 test/dapltest/test/dapl_util.c                   |    0
 test/dapltest/udapl/udapl_tdep.c                 |    0
 test/dtest/README                                |    0
 test/dtest/dtest.c                               |  222 +-
 test/dtest/dtestcm.c                             |    0
 test/dtest/dtestsrq.c                            |    0
 269 files changed, 4304 insertions(+), 1181 deletions(-)

diff --git a/AUTHORS b/AUTHORS
old mode 100644
new mode 100755
diff --git a/COPYING b/COPYING
old mode 100644
new mode 100755
diff --git a/ChangeLog b/ChangeLog
old mode 100644
new mode 100755
index e0db172..3b10b3c
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,320 @@
+commit ad43b8d3ca9f67d3231525b2808776719686deba
+Author: Arlin Davis <arlin.r.davis at intel.com>
+Date:   Wed Aug 12 17:30:23 2015 -0700
+
+    ucm: add cluster size environments to adjust CM timers
+    
+    Signed-off-by: Arlin Davis <arlin.r.davis at intel.com>
+
+commit b140211771b3fb212784c514c58198de22fa3dfc
+Author: Arlin Davis <arlin.r.davis at intel.com>
+Date:   Wed Aug 12 09:46:30 2015 -0700
+
+    mpxyd: proxy_in data transfers can improperly start before RTU received
+    
+    Proxy-in data transfers must be defered until RTU is received
+    and QP is in CONN state. Otherwise, the remote PI WC address/rkey
+    information is still unitialized.
+    
+    Check for initial CONN state before processing RR or WT data phase
+    and set RR to pause state until RTU and remote PI WRC information
+    is processed. Update pi_req_event error logging.
+    
+    Signed-off-by: Arlin Davis <arlin.r.davis at intel.com>
+
+commit 7e1be9682ff4c97eb2ba47a4b194b1ee0e5f3f07
+Author: Arlin Davis <arlin.r.davis at intel.com>
+Date:   Wed Aug 12 09:19:07 2015 -0700
+
+    mcm: forward open/query for MFO devices in query only mode
+    
+    Signed-off-by: Arlin Davis <arlin.r.davis at intel.com>
+
+commit 49749253cce22c22581df40039891639589dd7ac
+Author: Arlin Davis <arlin.r.davis at intel.com>
+Date:   Wed Aug 12 08:51:03 2015 -0700
+
+    mpxyd: byte swap incorrect on WRC wr_len
+    
+    Signed-off-by: Arlin Davis <arlin.r.davis at intel.com>
+
+commit c70efa65a08389b3b169d8904b23cce9960431cd
+Author: Amir Hanania <amir.hanania at intel.com>
+Date:   Mon Aug 10 17:24:15 2015 -0700
+
+    dtest: remove ERR message from flush QP function
+    
+    Signed-off-by: Arlin Davis <arlin.r.davis at intel.com>
+    Signed-off-by: Amir Hanania <amir.hanania at intel.com>
+
+commit a9a417be3f2b42dc7b8777d88be020dfa2dba1bf
+Author: David Dai <zdai at linux.vnet.ibm.com>
+Date:   Fri Aug 7 13:05:56 2015 -0700
+
+    dapltest: Quit command with "-n port" number will core dump
+    
+    -n option specified with n, should be n:
+    
+    Signed-off-by: David Dai <zdai at linux.vnet.ibm.com>
+    Signed-off-by: Arlin Davis <arlin.r.davis at intel.com>
+
+commit 11471f50ccd8e2021824fe46f68d27b82ec10c19
+Author: Amir Hanania <amir.hanania at intel.com>
+Date:   Wed Aug 5 15:01:49 2015 -0700
+
+    config: update dat.conf for MFO qib devices, 2 adapters/ports
+    
+    ofa-v2-qib0-1m and libdaplomcm.so
+    
+    Signed-off-by: Arlin Davis <arlin.r.davis at intel.com>
+    Signed-off-by: Amir Hanania <amir.hanania at intel.com>
+
+commit d6afeedb96a4ba3662a30de663b1775a0a0dda7e
+Author: Amir Hanania <amir.hanania at intel.com>
+Date:   Wed Aug 5 14:55:30 2015 -0700
+
+    mpxyd: add MFO support on proxy side
+    
+    Add checking for MFO and MXS and provide proxy-in and proxy-out
+    services for each mode. MXS_EP check is now MXF_EP (MFO or MXS).
+    Add new MIX device open, query, port query, pz operations.
+    Add new pz list and object management via scif_dev structure.
+    
+    Signed-off-by: Arlin Davis <arlin.r.davis at intel.com>
+    Signed-off-by: Amir Hanania <amir.hanania at intel.com>
+
+commit 77ce17348275a90c49cf17766f073b961897269b
+Author: Amir Hanania <amir.hanania at intel.com>
+Date:   Wed Aug 5 14:46:20 2015 -0700
+
+    mcm: add MFO proxy commands, device, and CM support
+    
+    CM will support Proxy-in services on both MFO and MXS modes.
+    CM thread will not process ibv channels when in MFO mode.
+    
+    Device open/close will export all verbs calls in MFO mode.
+    
+    Add MIX (MIC to Proxy) functions for pz, device query, port query.
+    
+    Signed-off-by: Arlin Davis <arlin.r.davis at intel.com>
+    Signed-off-by: Amir Hanania <amir.hanania at intel.com>
+
+commit 586c010c57883d6aed5cc2880eeba84933222342
+Author: Amir Hanania <amir.hanania at intel.com>
+Date:   Wed Aug 5 13:41:32 2015 -0700
+
+    mcm: add MFO support to openib_common code base
+    
+    Provide full proxy support of CQ, QP, PZ, MR and device.
+    Use use new MXF_EP macro to switch proxy service based
+    on MXS (cross socket) or MFO (full offload) modes.
+    
+    Signed-off-by: Arlin Davis <arlin.r.davis at intel.com>
+    Signed-off-by: Amir Hanania <amir.hanania at intel.com>
+
+commit bff1eaa83360b4012e20ed8c440993a099e887a9
+Author: Amir Hanania <amir.hanania at intel.com>
+Date:   Wed Aug 5 13:35:28 2015 -0700
+
+    mcm: add full offload (MFO) mode to provider to support qib on MIC
+    
+    Add new MIX proxy definitions and commands for query device, query port,
+    pz create, and pz free.
+    
+    Signed-off-by: Arlin Davis <arlin.r.davis at intel.com>
+    Signed-off-by: Amir Hanania <amir.hanania at intel.com>
+
+commit a3afd24a0ff9c77bc073cb27dfc79da7236beb47
+Author: Amir Hanania <amir.hanania at intel.com>
+Date:   Wed Aug 5 13:16:12 2015 -0700
+
+    dtest: pre-allocated buffer too small for RMR, DTO ops timeout
+    
+    The buf_len settings (-b) for small IO may cause segfault.
+    Increase allocation and adjust DTO operations to infinite.
+    
+    Signed-off-by: Arlin Davis <arlin.r.davis at intel.com>
+    Signed-off-by: Amir Hanania <amir.hanania at intel.com>
+
+commit 2d8348ec1f2099d083b35f63890bb37225c25ff5
+Author: Amir Hanania <amir.hanania at intel.com>
+Date:   Fri Jul 31 15:35:12 2015 -0700
+
+    mpxyd: fix buffer initialization when no-inline support is active
+    
+    wr_buf buffer was zeroed instead of wr_buf_rx
+    
+    Signed-off-by: Amir Hanania <amir.hanania at intel.com>
+    Signed-off-by: Arlin Davis <arlin.r.davis at intel.com>
+
+commit a94eacfe4e7d8b25cb45f98f0373e5b31c5e2cf6
+Author: Arlin Davis <arlin.r.davis at intel.com>
+Date:   Thu Jul 30 08:16:17 2015 -0700
+
+    mpxyd: reduce log level on qp_flush to CM level
+    
+    Signed-off-by: Arlin Davis <arlin.r.davis at intel.com>
+
+commit f72e3b3f31096898ee86dbe511b33e4f31d1d4c4
+Author: Arlin Davis <arlin.r.davis at intel.com>
+Date:   Thu Jul 30 08:15:22 2015 -0700
+
+    mcm: intra-node proxy missing LID setup on rejects
+    
+    Signed-off-by: Arlin Davis <arlin.r.davis at intel.com>
+
+commit 03f3b77c6061380b1130c5df95c1808d679dc455
+Author: Arlin Davis <arlin.r.davis at intel.com>
+Date:   Fri Jul 24 16:01:29 2015 -0700
+
+    mcm: add intra-node support via ibscif device and mcm provider
+    
+    - New device entry ofa-v2-scif0-m
+    - Support for different CM and EP locality (MIC vs proxy LID)
+    - MSS mode for all scif device opens via proxy
+    - logging changes for multi-lid options
+    
+    Signed-off-by: Arlin Davis <arlin.r.davis at intel.com>
+
+commit 8cbf658bf7982e5f057f73349ec6bf993c13bc1c
+Author: Arlin Davis <arlin.r.davis at intel.com>
+Date:   Fri Jul 24 12:48:52 2015 -0700
+
+    mcm: provide MIC address info with proxy device open
+    
+    Signed-off-by: Arlin Davis <arlin.r.davis at intel.com>
+
+commit 8442a490aadf4ccee40b65ba007c0d38f5c8b65a
+Author: Arlin Davis <arlin.r.davis at intel.com>
+Date:   Fri Jul 24 12:45:11 2015 -0700
+
+    mcm: add device info to non-debug log
+    
+    Signed-off-by: Arlin Davis <arlin.r.davis at intel.com>
+
+commit 77e255d4ea22b41ccfe0adcc46db1f80b6ad6ec9
+Author: Arlin Davis <arlin.r.davis at intel.com>
+Date:   Tue Jul 14 15:41:35 2015 -0700
+
+    common: add DAPL_DTO_TYPE_EXTENSION_IMM for rdma_write_imm DTO type checking
+    
+    Add new extended DTO type to request cookie to identify rdma write operations
+    with immediate data during completions.
+    
+    Signed-off-by: Arlin Davis <arlin.r.davis at intel.com>
+
+commit 042a03dfd0a6b259ca7859a3c3146681df62b52e
+Author: Arlin Davis <arlin.r.davis at intel.com>
+Date:   Tue Jul 14 15:39:52 2015 -0700
+
+    mpxyd: fix up some of the PI logging
+    
+    Signed-off-by: Arlin Davis <arlin.r.davis at intel.com>
+
+commit 27fcdc8da49f6af40d84090dfc38b16ddb6c9f61
+Author: Arlin Davis <arlin.r.davis at intel.com>
+Date:   Tue Jul 14 15:30:16 2015 -0700
+
+    dtest: modify rdma_write_with_msg to support uni-direction streaming
+    
+    add proper client->server handshake at end of rdma data stream
+    to insure all data is delivered before disconnecting.
+    
+    Signed-off-by: Arlin Davis <arlin.r.davis at intel.com>
+
+commit 0b39116c18d43faf367bac08263fa62341d8ecf8
+Author: Arlin Davis <arlin.r.davis at intel.com>
+Date:   Tue Jul 14 14:58:32 2015 -0700
+
+    mcm,mpxyd: fix dreq processing to defer QP flush when proxy WRs still pending
+    
+    The proxy will now defer DREQ flushing of proxy QPs if PI and PO
+    data engines have outstanding requests. Add mcm_qp_busy routine
+    for checking PI and PO data engines. When MIC calls disconnect
+    always send DREQ up to proxy in order to handle deferred flush
+    of proxy side posted rcv messages.
+    
+    Change QP free to modify both local and proxy QPs and check for
+    outstanding rcv message before qp_destroy to avoid infinite wait
+    in dapls_ep_flush_cqs.
+    
+    Signed-off-by: Arlin Davis <arlin.r.davis at intel.com>
+
+commit 40d7d9fc01b376fc18ac9ac7f21ab790f720241b
+Author: Arlin Davis <arlin.r.davis at intel.com>
+Date:   Tue Jul 14 14:47:24 2015 -0700
+
+    mpxyd: update byte_len and comp_cnt for PO to remote HST communications
+    
+    Signed-off-by: Arlin Davis <arlin.r.davis at intel.com>
+
+commit 57924cce8b5abbd659931d7fa10836f5dc121511
+Author: Amir Hanania <amir.hanania at intel.com>
+Date:   Wed Jun 17 10:12:24 2015 -0700
+
+    mcm: bug fixes for non-inline devices
+    
+    mcm proxy mi_send_pi setup registered WR structure properly for no
+    inline data support but incorrectly overwrote sg.addr with WR
+    WR structure on stack.
+    
+    qp create didn't check for no inline and setup create accordingly
+    
+    Signed-off-by: Amir Hanania <amir.hanania at intel.com>
+    Signed-off-by: Arlin Davis <arlin.r.davis at intel.com>
+
+commit 175d2334fa704c4656b0f4e69422992cc8545698
+Author: Arlin Davis <arlin.r.davis at intel.com>
+Date:   Fri Jun 12 13:56:38 2015 -0700
+
+    mcm: return CM_rej with CM_req_in errors
+    
+    Signed-off-by: Arlin Davis <arlin.r.davis at intel.com>
+
+commit 6374ae6f588a53330d7a6dd5c030bad82d74bb4e
+Author: Arlin Davis <arlin.r.davis at intel.com>
+Date:   Fri Jun 5 12:14:37 2015 -0700
+
+    mpxyd,mcm: RDMA write with immed data not signaled on request side
+    
+    With eager completions set, the wc_flags is not set properly on event.
+    With eager completions no set, the proxy CQ reference is incorrect
+    and event is forwarded to MCM receive EVD instead of transmit EVD.
+    
+    Signed-off-by: Arlin Davis <arlin.r.davis at intel.com>
+
+commit e6667bc2062824822f07da7f168ad8f4a0986f4c
+Author: Arlin Davis <arlin.r.davis at intel.com>
+Date:   Thu Jun 4 16:53:59 2015 -0700
+
+    mcm: add WC opcode and wc_flags in debug log message
+    
+    Signed-off-by: Arlin Davis <arlin.r.davis at intel.com>
+
+commit 01963af15a6c88b2f5ea676d6cbbb230fbd8cf0b
+Author: Arlin Davis <arlin.r.davis at intel.com>
+Date:   Thu Jun 4 16:52:11 2015 -0700
+
+    mpxyd: set options bug fix for mcm_ib_inline
+    
+    Signed-off-by: Arlin Davis <arlin.r.davis at intel.com>
+
+commit 387bfa365b5d13de795a84bd1b882c3bb6ac56b8
+Author: Arlin Davis <arlin.r.davis at intel.com>
+Date:   Thu May 28 08:22:24 2015 -0700
+
+    Update release notes with latest CM times
+    
+    Signed-off-by: Arlin Davis <arlin.r.davis at intel.com>
+
+commit facfb793374e4030ebf2ed539d77270a3e90d26e
+Author: Arlin Davis <arlin.r.davis at intel.com>
+Date:   Tue May 26 10:28:11 2015 -0700
+
+    Release 2.1.5
+    
+    Signed-off-by: Arlin Davis <arlin.r.davis at intel.com>
+
 commit 2973531db7e0cb98c2524f33cd26d2dfc07c7435
 Author: Arlin Davis <arlin.r.davis at intel.com>
 Date:   Tue May 26 10:06:44 2015 -0700
diff --git a/INSTALL b/INSTALL
old mode 100644
new mode 100755
diff --git a/LICENSE.txt b/LICENSE.txt
old mode 100644
new mode 100755
diff --git a/LICENSE2.txt b/LICENSE2.txt
old mode 100644
new mode 100755
diff --git a/LICENSE3.txt b/LICENSE3.txt
old mode 100644
new mode 100755
diff --git a/NEWS b/NEWS
old mode 100644
new mode 100755
diff --git a/README b/README
old mode 100644
new mode 100755
index f56cb66..77cb5ae
--- a/README
+++ b/README
@@ -1,539 +1,2389 @@
+		      README/Release Notes 
+	  	  OFED 3.18 DAPL Release 2.1.6
+		          August 2015
+
+	User space libraries/utilities for Direct Access Transport (DAT) v2.0. DAT is 
+	a transport-independent, platform-independent Application Programming 
+	Interface that supports RDMA (remote direct memory access) devices. 
+	Note: v1.2 is no longer supported and will not be included with OFED releases
+	
+	MIC support is provided with the new MCM provider and MPXYD service, since dapl-2.1.0. 
+        MCM requires the Intel(R) MPSS 3.x (YOCTO) release for Linux to be installed on your system. 
+        MPSS 3.x for Linux can be downloaded from: http://software.intel.com/mic-developer
+
+	For latest documentation and packages: //www.openfabrics.org/downloads/dapl/ 
+
+	=================
+	1.0 Release Notes
+	=================
+	
+	dapl-2.1.5 changes include improvements for large scale UD communication management:
+
+	- AH caching, reduced memory footprint (grows as needed)
+	- Port space increased to 24 bits
+	- Hash table for port space, CM object management
+	- Optimized CM wire protocol for fast index lookup 
+	
+	Tested on 1200n 28ppn cluster, AlltoAll Intel MPI, UD mode.
+	Both static and dynamic modes, over 500m UD QP connections.
+	
+	dapl-2.1.6 changes include MIC support for full offload mode
+	
+	- Add support for Truescale qib devices with no CCL Direct verbs support on MIC.
+	- Enhancement for inside the box transfers without IB adapter via ibscif.
+	- Add DAPL_NETWORK_NODES, DAPL_NETWORK_PPN environment variables. 
+	
+	==========
+	2.0 BUILD:
+	==========
+
+	# NON_DEBUG build/install example for x86_64, OFED targets
+	./configure --prefix /usr --sysconf=/etc --libdir /usr/lib64 LDFLAGS=-L/usr/lib64 CPPFLAGS="-I/usr/include"
+	make install
+
+	# DEBUG build/install example for x86_64, using OFED targets
+	./configure --enable-debug --prefix /usr --sysconf=/etc --libdir /usr/lib64 LDFLAGS=-L/usr/lib64 CPPFLAGS="-I/usr/include"
+	make install
+
+	# COUNTERS build/install example for x86_64, using OFED targets
+	./configure --prefix /usr --sysconf=/etc --libdir /usr/lib64 LDFLAGS=-L/usr/lib64 CPPFLAGS="-I/usr/include -DDAPL_COUNTERS"
+	make install
+
+	=========================================================
+	3.0 Provider descriptions and CM results (cma, scm, ucm):
+	=========================================================
+
+	1. CMA - uses OFA rdma_cm to setup QP's. IPoIB, ARP, and SA queries required.
+       
+	Provider name: ofa-v2-ib0
+	PROs:	OFA rdma_cm has the most testing across many applications.
+		Supports both iWARP and IB.
+                            
+	CONs:	Serialization of conn processing with kernel based CM service
+		Requires IPoIB ARP for name resolution, storms
+		Requires SA for path record queries for IB fabrics.
+		Conn Request private data limited to 52 bytes.
+        
+	Settings for larger clusters (512+ cores):
+
+	setenv DAPL_CM_ROUTE_TIMEOUT_MS 20000
+	setenv DAPL_CM_ARP_TIMEOUT_MS 10000
+
+	2. SCM - uses sockets to exchange QP information. IPoIB, ARP, and SA queries NOT required.
+       
+	Provider name (connectx): ofa-v2-mlx4_0-1
+	PROs:	Each rank has own instance of socket cm. More private data with requests. 
+		Doesn't require path-record lookup.   	
+                            
+	CONs:	Socket resources grow with scale-out, serialization of
+		connections with kernel based tcp sockets, 
+		Competes for MPI socket resources/port space and other TCP applications. 
+		Sockets remain in TIMEWAIT state for minutes after closure. 
+		Requires ARP for name resolution.
+		Doesn't support iWARP devices.
+        
+	Settings for larger clusters (512+ cores):
+
+	setenv DAPL_ACK_RETRY 7         /* IB RC Ack retry count */
+	setenv DAPL_ACK_TIMER 20        /* IB RC Ack retry timer */
+
+	3. UCM - use's IB UD QP to exchange QP info. Sockets, ARP, IPoIB, and SA queries NOT required.
+       
+	Provider name (connectx): ofa-v2-mlx4_0-1u
+	PROs:	Each rank has own instance of CM in user process 
+		Resources fixed per rank regardless of scale-out size
+		No serialization of user or kernel resources establishing connections, 
+		Simple 3-way msg handsake, CM messages fit in inline data for lowest message latency,
+		Supports alternate paths
+		No address resolution required. 
+		No path resolution required.
+                            
+	CONs:	New provider with limited testing, a little tougher to debug. 
+		Doesn't support iWARP	
+        
+	Settings for larger clusters (512+ cores):
+
+	setenv DAPL_UCM_REP_TIME 2000   /* REQUEST timer, waiting for REPLY in millisecs */
+	setenv DAPL_UCM_RTU_TIME 2000   /* REPLY timer, waiting for RTU in millisecs */
+	setenv DAPL_UCM_CQ_SIZE  2000   /* CM completion queue */
+	setenv DAPL_UCM_QP_SIZE  2000   /* CM message queue */
+	setenv DAPL_UCM_RETRY 7         /* REQUEST and REPLY retries */
+	setenv DAPL_ACK_RETRY 7         /* IB RC Ack retry count */
+	setenv DAPL_ACK_TIMER 20        /* IB RC Ack retry timer */
+
+	CM Performance: CPS profile for cma, scm, and ucm v2 uDAPL providers:
+	-----------------------------------------------------------------------
+ 	Intel(R) Xeon(R) CPU E5-2690 v2 @ 3.00GHz (IVT)
+	Mellanox MLX4 IB FDR, no switch.
+
+	dtestcm (server/client):
+
+        cma: Connections: 313.10 usec, CPS  3193.83 Total 0.31 secs, poll_cnt=6300, Num=1000
+        scm: Connections: 167.65 usec, CPS  5964.92 Total 0.17 secs, poll_cnt=2394, Num=1000
+        ucm: Connections:  71.85 usec, CPS 13918.06 Total 0.07 secs, poll_cnt=2360, Num=1000
+
+        dapl_cm_bw: MPI uDAPL/CM profiling application (all-to-all connections, all ranks)
+
+        CMA
+        2  Connect times (10):   Total 0.0049 per 0.0005 CPS=2051.38
+        4  Connect times (40):   Total 0.0151 per 0.0004 CPS=2650.16
+        8  Connect times (240):  Total 0.0548 per 0.0002 CPS=4380.59
+        16 Connect times (1120): Total 4.0356 per 0.0036 CPS=277.53
+        32 Connect times (4800): Total 4.4704 per 0.0009 CPS=1073.72
+
+        SCM
+        2  Connect times (10):   Total 0.0029 per 0.0003 CPS=3441.31
+        4  Connect times (40):   Total 0.0060 per 0.0002 CPS=6635.97
+        8  Connect times (240):  Total 0.0194 per 0.0001 CPS=12383.47
+        16 Connect times (1120): Total 0.0649 per 0.0001 CPS=17246.93
+        32 Connect times (4800): Total 1.0193 per 0.0002 CPS=4708.95
+
+        UCM
+        2  Connect times (10):   Total 0.0014 per 0.0001 CPS=6993.91
+        4  Connect times (40):   Total 0.0045 per 0.0001 CPS=8837.87
+        8  Connect times (240):  Total 0.0155 per 0.0001 CPS=15477.13
+        16 Connect times (1120): Total 0.0630 per 0.0001 CPS=17765.12
+        32 Connect times (4800): Total 0.2632 per 0.0001 CPS=18236.54
+
+	===================================================================================================
+	4.0 BKM for installing new DAPL library on your cluster without any impact on existing OFED install:
+	====================================================================================================
+	
+	Note: example for user /home/user1, (assumes /home/user1 is exported) and MLX4 adapter, port 1
+
+	Download latest 2.1.x package: http://www.openfabrics.org/downloads/dapl/dapl-2.1.6.tar.gz
+
+	untar in /home/user1 
+	cd /home/user1/dapl-2.1.6
+	./configure LDFLAGS=-L/usr/lib64 CPPFLAGS="-I/usr/include" 
+	make 
+
+	Create /home/user1/dat.conf with following 3 lines. (entries with path to new libraries):
+
+	  ofa-v2-mlx4_0-1u u2.0 nonthreadsafe default /home/user1/dapl-2.1.5/dapl/udapl/.libs/libdaploucm.so.2 dapl.2.0 "mlx4_0 1" ""
+	  ofa-v2-mlx4_0-1m u2.0 nonthreadsafe default /home/user1/dapl-2.1.5/dapl/udapl/.libs/libdaplomcm.so.2 dapl.2.0 "mlx4_0 1" ""
+	  ofa-v2-mlx4_0-1 u2.0 nonthreadsafe default /home/user1/dapl-2.1.5/dapl/udapl/.libs/libdaploscm.so.2 dapl.2.0 "mlx4_0 1" ""
+	  ofa-v2-ib0 u2.0 nonthreadsafe default /home/user1/dapl-2.1.5/dapl/udapl/.libs/libdaplcma.so.1 dapl.2.0 "ib0 0" ""
+
+	Run uDAPL application or Intel MPI that uses uDAPL, with (assuming mlx4_0 adapters) following:
+
+	  setenv DAT_OVERRIDE=/home/user1/dat.conf
+	  setenv LD_LIBRARY_PATH=/home/user1/dapl-2.1.5/dapl/udapl/.libs:$LD_LIBRARY_PATH
+
+	If running Intel MPI and uDAPL IB UD cm, set the following (recommended):
+
+  	  setenv I_MPI_DAPL_PROVIDER=ofa-v2-mlx4_0-1u
+	
+	If running Intel MPI and uDAPL IB mcm with MIC, set the following:
+
+  	  setenv I_MPI_DAPL_PROVIDER=ofa-v2-mlx4_0-1m
+	
+	If running Intel MPI and uDAPL socket cm, set the following:
+
+  	  setenv I_MPI_DAPL_PROVIDER=ofa-v2-mlx4_0-1
+
+	
+	If running Intel MPI and uDAPL rdma_cm, set the following:
+
+	  setenv I_MPI_DAPL_PROVIDER=ofa-v2-ib0
+
+
+	============================================================
+	5.0 MCM Provider, MPXYD Daemon (CCL-proxy) Build and Install
+	============================================================
+	 
+	MCM is a new uDAPL provider that is an extension to standard DAT 2.0 libraries. The purpose of this service
+	is to proxy RDMA writes from the MIC to the HOST to improve large IO performance. The provider will support
+	MIC to MIC, HOST to HOST, and MIC to HOST environments. The mcm client will NOT use MPXYD when running on the host.
+	It requires a new MPXYD daemon service when clients are running on a MIC KNC adapter. This package installs all the
+	host side libraries and daemon service. The MIC libraries must be built and moved over to MIC adapter. This verion
+	is currently included with MPSS and all libraries and services will be installed by default.
+
+	Current release package: dapl-2.1.6.tar.gz 
+
+	* Sample host build from source package (ofed must be installed)
+
+  	./configure --enable-mcm --prefix=/usr --libdir=/usr/lib64 --sysconfdir=/etc
+  	make
+  	make install
+
+	* Sample host rpmbuild/update from release tarball, /root:
+
+	rpmbuild -ta dapl-2.1.6.tar.gz
+	rpm -U /root/rpmbuild/RPMS/x86_64/dapl*
+
+	* Sample MIC build from source package for MPSS 3.x (MPSS must be installed)
+	* Assume /opt is nfs mounted across cluster
+
+  	source /opt/mpss/3.x/environment-setup-k1om-mpss-linux 
+	./configure --enable-mcm --prefix /opt/dapl/mic --host=x86_64-k1om-linux
+	make
+	make install
+
+	copy /opt/dapl/mic/* files out to all MIC cards
+   
+	* Cluster deployment
+
+  	(1) Build once on the head or on one of the nodes (with MPSS) as described in the above steps.
+
+  	(2) HOST: Install dapl libraries and mpxyd service, "rpm -U" all dapl RPM files on host nodes:
+
+  	(3) MIC: Setup dapl overlay for new package (/opt/intel/dapl):
+	
+		Create /etc/mpss/conf.d/dapl.conf with following entry:
+
+			Overlay Filelist /opt/dapl /opt/dapl/dapl.filelist on
+		
+		Create /opt/dapl/dapl.filelist with following entries: 
+
+			file /etc/dat.conf mic/etc/dat.conf 755 0 0
+			file /usr/bin/dtest mic/bin/dtest 755 0 0
+			file /usr/bin/dtestx mic/bin/dtestx 755 0 0
+			file /usr/bin/dtestcm mic/bin/dtestcm 755 0 0
+			file /usr/bin/dapltest mic/bin/dapltest 755 0 0
+			file /usr/lib64/libdat.so.2.0.0 mic/lib/libdat.so.2.0.0 755 0 0
+			file /usr/lib64/libdaplofa.so.2.0.0 mic/lib/libdaplofa.so.2.0.0 755 0 0
+			file /usr/lib64/libdaplomcm.so.2.0.0 mic/lib/libdaplomcm.so.2.0.0 755 0 0
+			file /usr/lib64/libdaploscm.so.2.0.0 mic/lib/libdaploscm.so.2.0.0 755 0 0
+			file /usr/lib64/libdaploucm.so.2.0.0 mic/lib/libdaploucm.so.2.0.0 755 0 0
+
+			slink /usr/lib64/libdat.so libdat.so.2.0.0 777 0 0
+			slink /usr/lib64/libdat.so.2 libdat.so.2.0.0 777 0 0
+			slink /usr/lib64/libdaplofa.so libdaplofa.so.2.0.0 777 0 0
+			slink /usr/lib64/libdaplofa.so.2 libdaplofa.so.2.0.0 777 0 0
+			slink /usr/lib64/libdaplomcm.so libdaplomcm.so.2.0.0 777 0 0
+			slink /usr/lib64/libdaplomcm.so.2 libdaplomcm.so.2.0.0 777 0 0
+			slink /usr/lib64/libdaploscm.so libdaploscm.so.2.0.0 777 0 0
+			slink /usr/lib64/libdaploscm.so.2 libdaploscm.so.2.0.0 777 0 0
+			slink /usr/lib64/libdaploucm.so libdaploucm.so.2.0.0 777 0 0
+			slink /usr/lib64/libdaploucm.so.2 libdaploucm.so.2.0.0 777 0 0
+	
+		Reboot or restart MPSS and ofed-mic services
+
+		Check for dapl overlay
+			micctrl --config  
+
+	* Setup for non-root CCL Proxy testing, MPXYD running as process with different service port from your /home directory:
+
+   	Using build instructions above, change prefix as follow and "make install":
+
+   	Build MIC:
+		--prefix=/home/username/ccl-proxy-mic
+
+   	Build host:
+		--prefix=/home/username/ccl-proxy-host
+	
+	edit /home/username/ccl-proxy-host/etc/mpxyd.conf and change the following entries:
+	
+	log_file /var/log/mpxyd.log  	to log_file /tmp/username/mpxyd.log
+	lock_file /var/log/mpxyd.pid 	to lock_file /tmp/username/mpxyd.log
+	scif_port_id 68 		to scif_port_id 1068
+	
+	start the mpxyd process on each node
+	
+	ssh node1-hostname /home/username/ccl-proxy-host/sbin/mpxyd -P -O /home/username/ccl-proxy-host/etc/mpxyd.conf&
+	
+	Note: override default port id using following environment variable:
+	
+	export DAPL_MCM_PORT_ID=1068
+   
+   	* Notes
+
+  	(1) Modify "/etc/mpxyd.conf" to change the settings for the proxy. Especially, try different values
+      	of "buffer_segment_size" for performance tuning. Use a smaller value for "buffer_pool_mb"   
+      	to reduce the memory foorprint of mpxyd. Use a larger value for "scif_listen_qlen" to run 
+      	more MPI ranks per card. Also modify mcm_affinity_base to the desired CPU_id to insure
+      	socket to adapter affinity. Best performance when HCA, MIC, and CPU are on same socket.
+      	Default settings are on CPU socket 0.
+
+  	(2) By default, only writes originated from MIC is proxied. However, it is also possible to proxy 
+      	host-originated writes (e.g. for debugging purpose). To do this, set the environment variable
+      	"DAPL_MCM_ALWAYS_PROXY=1". This variable applies to the provider, not the proxy.
+
+	* Use the MCM provider with Intel MPI 5.1 or greater for best out of box experiences with MIC.
+
+  	(1) Recommended settings:
+
+		export I_MPI_MIC=1
+		export I_MPI_DEBUG=2
+		export I_MPI_FALLBACK=0
+		
+	=============================
+	6.0 Environment Variables
+	=============================
+	
+	 - IB UD options using UCM provider, large scale settings (Xeon)
+	
+	export DAPL_NETWORK_NODES= 	/* set to active nodes on network for CM */
+	export DAPL_NETWORK_PPN= 	/* set to active processes per node for CM */ 
+	
+	/* The following will be adjusted by provider based on NODES, PPN */
+	export DAPL_UCM_REP_TIME=8000   /* REQUEST timer, waiting on REPLY, msecs, default = 800 */
+	export DAPL_UCM_RTU_TIME=8000   /* REPLY timer, waiting for RTU in msecs, default=400 */
+	export DAPL_UCM_RETRY=7       	/* REQUEST & REPLY retries, default = 7 */
+	export DAPL_UCM_QP_SIZE=4000	/* CM req/reply work queue size, default = 500 entries */
+	export DAPL_UCM_CQ_SIZE=4000	/* CM req/reply completion queue size, default = 500 entries */
+	export DAPL_UCM_TX_BURST=100	/* CM signal rate on send messages */
+	export DAPL_UCM_ENTRY_BITS=11	/* default = 11-bit, 2KB entries, allocation blocks */; 
+	export DAPL_UCM_ARRAY_BITS=18	/* default = 18 bit, 256KB total */
+	
+	- IB RC options using SCM provider
+	
+	export DAPL_SCM_NETDEV=ib0	/* default is first non-loopback netdev */
+	
+	- Other IB settings for all providers:
+	
+	export DAPL_MAX_INLINE=64	/*  IB RC inline optimization, best small msg latency, def=64 */
+	export DAPL_ACK_RETRY=7         /*  IB RC Ack retry count, default 7 */
+	export DAPL_ACK_TIMER=20       	/* IB RC Ack retry timer, 5 bits, 4.096us*2^ack_timer. 16== 268ms, 20==4.2s */
+	export DAPL_IB_MTU=2048		/* IB MTU size, default = 2048 */
+	export DAPL_RNR_TIMER=12	/* 5 bits, 12 =.64ms, 28 =163ms, 31 =491ms */
+	export DAPL_RNR_RETRY=7		/* 3 bits, 7 == infinite */
+	export DAPL_IB_PKEY= 0		/* override IB partition key, default is pkey index 0 */
+	export DAPL_IB_SL=0		/* override IB Sevice level, default = 0 */
+	
+	- Other options:
+	export DAPL_WR_MAX=500 		/* used to reduce max qp depth on all IB providers, default = dev attributes */
+	
+	Debug logging and Counter settings ( --enable-counters)
+	
+	export DAPL_DBG_SYS_MEM=10	/* threshold for low sys memory warning, def = 10 percent */
+	export DAPL_DBG_TYPE=0x0000003 	/* set log, monitor, and error checking, default = warnings and errors */
+	
+	DAPL_DBG_TYPE bit settings as follow:
+	
+	DAPL_DBG_TYPE_ERR          = 0x0001,
+	DAPL_DBG_TYPE_WARN         = 0x0002,
+	DAPL_DBG_TYPE_EVD          = 0x0004,
+	DAPL_DBG_TYPE_CM           = 0x0008,
+	DAPL_DBG_TYPE_EP           = 0x0010,
+	DAPL_DBG_TYPE_UTIL         = 0x0020,
+	DAPL_DBG_TYPE_CALLBACK     = 0x0040,
+	DAPL_DBG_TYPE_DTO_COMP_ERR = 0x0080,
+	DAPL_DBG_TYPE_API          = 0x0100,
+	DAPL_DBG_TYPE_RTN          = 0x0200,
+	DAPL_DBG_TYPE_EXCEPTION   = 0x0400,
+	DAPL_DBG_TYPE_SRQ         = 0x0800,
+	DAPL_DBG_TYPE_CNTR        = 0x1000,
+	DAPL_DBG_TYPE_CM_LIST     = 0x2000,
+	DAPL_DBG_TYPE_THREAD      = 0x4000,
+	DAPL_DBG_TYPE_CM_EST      = 0x8000,
+	DAPL_DBG_TYPE_CM_WARN    = 0x10000,
+	DAPL_DBG_TYPE_EXTENSION  = 0x20000,
+	DAPL_DBG_TYPE_CM_STATS   = 0x40000,
+	DAPL_DBG_TYPE_CM_ERRS    = 0x80000,    /* print any cm errors on device close */
+	DAPL_DBG_TYPE_LINK_ERRS  = 0x100000,   /* print any link errors on device close */
+	DAPL_DBG_TYPE_LINK_WARN  = 0x200000,   /* print any link warning on device close */
+	DAPL_DBG_TYPE_DIAG_ERRS  = 0x400000,   /* print any diag_counter errors on dev close */
+	DAPL_DBG_TYPE_SYS_WARN   = 0x800000,   /* print low mem warning during alloc, reg_mem */
+	DAPL_DBG_TYPE_VER        = 0x1000000,  /* print dapl ver and build date during dev open */
+	
+	=============================
+	7.0 SAMPLE uDAPL APPLICATION:
+	=============================
+	
+	There are 2 sample programs, with manpages, provided with this package.
+	
+	(dapl/test/dtest/)
+	
+	NAME
+	       dtest - simple uDAPL send/receive and RDMA test
+	
+	SYNOPSIS
+	       dtest [-P provider] [-b buf size] [-B burst count][-v] [-c] [-p] [-d] [-s]
+	
+	       dtest [-P provider] [-b buf size] [-B burst count][-v] [-c] [-p] [-d] [-h HOSTNAME]
+	
+	DESCRIPTION
+	       dtest  is a simple test used to exercise and verify the uDAPL interfaces.  At least two instantia-
+	       tions of the test must be run. One acts as the server and the other the client. The server side of
+	       the  test,  once invoked listens for connection requests, until timing out or killed. Upon receipt
+	       of a cd connection request, the connection is established, the server and  client  sides  exchange
+	       information necessary to perform RDMA writes and reads.
+	
+	OPTIONS
+	       -P=PROVIDER
+	              use PROVIDER to specify uDAPL interface using /etc/dat.conf (default OpenIB-cma)
+	
+	       -b=BUFFER_SIZE
+	              use buffer size BUFFER_SIZE for RDMA(default 64)
+	
+	       -B=BURST_COUNT
+	              use busrt count BURST_COUNT for interations (default 10)
+	
+	       -v, verbose output(default off)
+	
+	       -c, use consumer notification events (default off)
+	
+	       -p, use polling (default wait for event)
+	
+	       -d, delay in seconds before close (default off)
+	
+	       -s, run as server (default - run as server)
+	
+	       -h=HOSTNAME
+	              use HOSTNAME to specify server hostname or IP address (default - none)
+	
+	EXAMPLES
+	       dtest -P OpenIB-cma -v -s
+	            Starts a server process with debug verbosity using provider OpenIB-cma.
+	
+	       dtest -P OpenIB-cma -h server1-ib0
+	
+	            Starts a client process, using OpenIB-cma provider to connect to hostname server1-ib0.
+	
+	SEE ALSO
+	       dapltest(1)
+	
+	AUTHORS
+	       Arlin Davis
+	              <ardavis at ichips.intel.com>
+	
+	BUGS
+	
+	/dapl/test/dapltest/
+	
+	NAME
+	        dapltest - test for the Direct Access Programming Library (DAPL)
+	
+	DESCRIPTION
+	       Dapltest  is  a  set  of tests developed to exercise, characterize, and verify the DAPL interfaces
+	       during development and porting.  At least two instantiations of the test must be run. One acts  as
+	       the  server, fielding requests and spawning server-side test threads as needed. Other client invo-
+	       cations connect to the server and issue test requests. The server side of the test, once  invoked,
+	       listens  continuously for client connection requests, until quit or killed. Upon receipt of a con-
+	       nection request, the connection is established, the server and client sides swap  version  numbers
+	       to  verify that they are able to communicate, and the client sends the test request to the server.
+	       If the version numbers match, and the test request is well-formed, the server spawns  the  threads
+	       needed to run the test before awaiting further connections.
+	
+	USAGE
+	       dapltest [ -f script_file_name ] [ -T S|Q|T|P|L ] [ -D device_name ] [ -d ] [ -R HT|LL|EC|PM|BE ]
+	
+	       With  no  arguments,  dapltest runs as a server using default values, and loops accepting requests
+	       from clients.
+	
+	       The -f option allows all arguments to be placed in a file, to ease test automation.
+	
+	       The following arguments are common to all tests:
+	
+	       [ -T S|Q|T|P|L ]
+	              Test function to be performed:
+	
+	              S      - server loop
+	
+	              Q      - quit, client requests that server wait for any outstanding tests to complete, then
+	                     clean up and exit
+	
+	              T      - transaction test, transfers data between client and server
+	
+	              P      - performance test, times DTO operations
+	
+	              L      -  limit  test,  exhausts  various  resources, runs in client w/o server interaction
+	                     Default: S
+	
+	      [ -D device_name ]
+	              Specifies the interface adapter name as documented in the /etc/dat.conf  static  configura-
+	              tion file. This name corresponds to the provider library to open.  Default: none
+	
+	       [ -d ] Enables  extra  debug  verbosity,  primarily tracing of the various DAPL operations as they
+	              progress.  Repeating this parameter increases debug spew.  Errors encountered result in the
+	              test  spewing some explanatory text and stopping; this flag provides more detail about what
+	              lead up to the error.  Default: zero
+	
+	       [ -R BE ]
+	              Indicate the quality of service (QoS) desired.  Choices are:
+	
+	              HT     - high throughput
+	
+	              LL     - low latency
+	
+	              EC     - economy (neither HT nor LL)
+	
+	              PM     - premium
+	
+	              BE     - best effort Default: BE
+	
+	       Usage - Quit test client
+	
+	           dapltest [Common_Args] [ -s server_name ]
+	
+	           Quit testing (-T Q) connects to the server to ask it to clean up and
+	           exit (after it waits for any outstanding test runs to complete).
+	           In addition to being more polite than simply killing the server,
+	           this test exercises the DAPL object teardown code paths.
+	           There is only one argument other than those supported by all tests:
+	
+	           -s server_name      Specifies the name of the server interface.
+	                               No default.
+	
+	       Usage - Transaction test client
+	
+	           dapltest [Common_Args] [ -s server_name ]
+	                    [ -t threads ] [ -w endpoints ] [ -i iterations ] [ -Q ]
+	                    [ -V ] [ -P ] OPclient OPserver [ op3,
+	
+	           Transaction testing (-T T) transfers a variable amount of data between
+	           client and server.  The data transfer can be described as a sequence of
+	           individual operations; that entire sequence is transferred ’iterations’
+	           times by each thread over all of its endpoint(s).
+	
+	           The following parameters determine the behavior of the transaction test:
+	
+	           -s server_name      Specifies the name or IP address of the server interface.
+	                               No default.
+	
+	           [ -t threads ]      Specify the number of threads to be used.
+	                               Default: 1
+	
+	           [ -w endpoints ]    Specify the number of connected endpoints per thread.
+	                               Default: 1
+	
+	           [ -i iterations ]   Specify the number of times the entire sequence
+	                               of data transfers will be made over each endpoint.
+	                               Default: 1000
+	
+	           [ -Q ]              Funnel completion events into a CNO.
+	                               Default: use EVDs
+	
+	           [ -V ]              Validate the data being transferred.
+	                               Default: ignore the data
+	
+	           [ -P ]              Turn on DTO completion polling
+	                               Default: off
+	
+	           OP1 OP2 [ OP3, ... ]
+	                               A single transaction (OPx) consists of:
+	
+	                               server|client   Indicates who initiates the
+	                                               data transfer.
+	
+	                               SR|RR|RW        Indicates the type of transfer:
+	                                               SR  send/recv
+	                                               RR  RDMA read
+	                                               RW  RDMA write
+	                               Defaults: none
+	
+	                               [ seg_size [ num_segs ] ]
+	:
+	
+	                                              Indicates the amount and format
+	                                               of the data to be transferred.
+	                                               Default:  4096  1
+	                                                         (i.e., 1 4KB buffer)
+	
+	                               [ -f ]          For SR transfers only, indicates
+	                                               that a client’s send transfer
+	                                               completion should be reaped when
+	                                               the next recv completion is reaped.
+	                                               Sends and receives must be paired
+	                                               (one client, one server, and in that
+	                                               order) for this option to be used.
+	           Restrictions:
+	
+	           Due to the flow control algorithm used by the transaction test, there
+	           must be at least one SR OP for both the client and the server.
+	
+	           Requesting data validation (-V) causes the test to automatically append
+	           three OPs to those specified. These additional operations provide
+	           synchronization points during each iteration, at which all user-specified
+	           transaction buffers are checked. These three appended operations satisfy
+	           the "one SR in each direction" requirement.
+	
+	           The transaction OP list is printed out if -d is supplied.
+	
+	       Usage - Performance test client
+	
+	           dapltest [Common_Args] -s server_name [ -m p|b ]
+	                    [ -i iterations ] [ -p pipeline ] OP
+	
+	           Performance testing (-T P) times the transfer of an operation.
+	           The operation is posted ’iterations’ times.
+	
+	           The following parameters determine the behavior of the transaction test:
+	
+	           -s server_name      Specifies the name or IP address of the server interface.
+	                               No default.
+	
+	           -m b|p              Used to choose either blocking (b) or polling (p)
+	                               Default: blocking (b)
+	          [ -i iterations ]   Specify the number of times the entire sequence
+	                               of data transfers will be made over each endpoint.
+	                               Default: 1000
+	
+	           [ -p pipeline ]     Specify the pipline length, valid arguments are in
+	                               the range [0,MAX_SEND_DTOS]. If a value greater than
+	                               MAX_SEND_DTOS is requested the value will be
+	                               adjusted down to MAX_SEND_DTOS.
+	                               Default: MAX_SEND_DTOS
+	
+	           OP                  Specifies the operation as follow:
+	
+	                               RR|RW           Indicates the type of transfer:
+	                                               RR  RDMA read
+	                                               RW  RDMA write
+	                                               Defaults: none
+	
+	                               [ seg_size [ num_segs ] ]
+	                                               Indicates the amount and format
+	                                               of the data to be transferred.
+	                                               Default:  4096  1
+	                                                         (i.e., 1 4KB buffer)
+	       Usage - Limit test client
+	
+	           Limit testing (-T L) neither requires nor connects to any server
+	           instance.  The client runs one or more tests which attempt to
+	           exhaust various resources to determine DAPL limits and exercise
+	           DAPL error paths.  If no arguments are given, all tests are run.
+	
+	           Limit testing creates the sequence of DAT objects needed to
+	           move data back and forth, attempting to find the limits supported
+	           for the DAPL object requested.  For example, if the LMR creation
+	           limit is being examined, the test will create a set of
+	           {IA, PZ, CNO, EVD, EP} before trying to run dat_lmr_create() to
+	           failure using that set of DAPL objects.  The ’width’ parameter
+	           can be used to control how many of these parallel DAPL object
+	           sets are created before beating upon the requested constructor.
+	           Use of -m limits the number of dat_*_create() calls that will
+	           be attempted, which can be helpful if the DAPL in use supports
+	           essentailly unlimited numbers of some objects.
+	           The limit test arguments are:
+	
+	           [ -m maximum ]      Specify the maximum number of dapl_*_create()
+	                               attempts.
+	                               Default: run to object creation failure
+	
+	           [ -w width ]        Specify the number of DAPL object sets to
+	                               create while initializing.
+	                               Default: 1
+	
+	           [ limit_ia ]        Attempt to exhaust dat_ia_open()
+	
+	           [ limit_pz ]        Attempt to exhaust dat_pz_create()
+	
+	           [ limit_cno ]       Attempt to exhaust dat_cno_create()
+	
+	           [ limit_evd ]       Attempt to exhaust dat_evd_create()
+	
+	           [ limit_ep ]        Attempt to exhaust dat_ep_create()
+	
+	           [ limit_rsp ]       Attempt to exhaust dat_rsp_create()
+	
+	           [ limit_psp ]       Attempt to exhaust dat_psp_create()
+	
+	           [ limit_lmr ]       Attempt to exhaust dat_lmr_create(4KB)
+	
+	           [ limit_rpost ]     Attempt to exhaust dat_ep_post_recv(4KB)
+	
+	           [ limit_size_lmr ]  Probe maximum size dat_lmr_create()
+	
+	                               Default: run all tests
+	EXAMPLES
+	       dapltest -T S -d -D OpenIB-cma
+	
+	                               Starts a server process with debug verbosity.
+	
+	       dapltest -T T -d -s host1-ib0 -D OpenIB-cma -i 100 client SR 4096 2 server SR 4096 2
+	
+	                               Runs a transaction test, with both sides
+	                               sending one buffer with two 4KB segments,
+	                              one hundred times.
+	
+	       dapltest -T P -d -s host1-ib0 -D OpenIB-cma -i 100 SR 4096 2
+	
+	                               Runs a performance test, with the client
+	                               sending one buffer with two 4KB segments,
+	                               one hundred times.
+	
+	       dapltest -T Q -s host1-ib0 -D OpenIB-cma
+	
+	                               Asks the server to clean up and exit.
+	
+	       dapltest -T L -D OpenIB-cma -d -w 16 -m 1000
+	
+	                               Runs all of the limit tests, setting up
+	                               16 complete sets of DAPL objects, and
+	                               creating at most a thousand instances
+	                               when trying to exhaust resources.
+	
+	       dapltest -T T -V -d -t 2 -w 4 -i 55555 -s linux3 -D OpenIB-cma client RW 4096 1 server RW  2048  4
+	       client SR 1024 4 server SR 4096 2 client SR 1024 3 -f server SR 2048 1 -f
+	
+	                               Runs a more complicated transaction test,
+	                               with two thread using four EPs each,
+	                               sending a more complicated buffer pattern
+	                               for a larger number of iterations,
+	                               validating the data received.
+	
+	=============================
+	8.0 Summary of Fixes/Changes:
+	=============================
+		
+	 Release 2.1.6 (OFED 3.18-1)
+	 ucm: add cluster size environments to adjust CM timers
+	 mpxyd: proxy_in data transfers can improperly start before RTU received
+	 mcm: forward open/query for MFO devices in query only mode
+	 mpxyd: byte swap incorrect on WRC wr_len
+	 dtest: remove ERR message from flush QP function
+	 dapltest: Quit command with "-n port" number will core dump
+	 config: update dat.conf for MFO qib devices, 2 adapters/ports
+	 mpxyd: add MFO support on proxy side
+	 mcm: add MFO proxy commands, device, and CM support
+	 mcm: add MFO support to openib_common code base
+	 mcm: add full offload (MFO) mode to provider to support qib on MIC
+	 dtest: pre-allocated buffer too small for RMR, DTO ops timeout
+	 mpxyd: fix buffer initialization when no-inline support is active
+	 mpxyd: reduce log level on qp_flush to CM level
+	 mcm: intra-node proxy missing LID setup on rejects
+	 mcm: add intra-node support via ibscif device and mcm provider
+	 mcm: provide MIC address info with proxy device open
+	 mcm: add device info to non-debug log
+	 common: add DAPL_DTO_TYPE_EXTENSION_IMM for rdma_write_imm DTO type checking
+	 mpxyd: fix up some of the PI logging
+	 dtest: modify rdma_write_with_msg to support uni-direction streaming
+	 mcm,mpxyd: fix dreq processing to defer QP flush when proxy WRs still pending
+	 mpxyd: update byte_len and comp_cnt for PO to remote HST communications
+	 mcm: bug fixes for non-inline devices
+	 mcm: return CM_rej with CM_req_in errors
+	 mpxyd,mcm: RDMA write with immed data not signaled on request side
+	 mcm: add WC opcode and wc_flags in debug log message
+	 mpxyd: set options bug fix for mcm_ib_inline
+	 Update release notes with latest CM times
+	
+	Release 2.1.5 (OFED 3.18 RC3)
+	update release notes, readme
+	dat.conf: update comments regarding versions
+	dtest: add logging of provider private data size with -v
+	scm: remove use of msg.resv field for process id logging
+	cma: report correct CM req private data size on query
+	mpxyd: memset ib_wr structure before post_send on WC and WR requests
+	mcm: add HST side provider support for device without inline data capability
+	ucm: CM changes for UD extended port space and indexer
+	ucm: add device support for new port space hash table
+	ucm: allocate/free AH hash table for UD endpoint types
+	ucm: check for AH caching when destroying via UD extension
+	ucm: optimizations for large scale UD communication management
+	mpxyd: use wr opcode instead of wc opcode to support logging on error cases
+	mcm: HST->MXS mode, using RDMA_WRITE_WITH_IMM, fails with dtest -w
+	dapl: aarch64 support for linux
+	dapltest: add scripts to dist, set default device to IPoIB
+	mpxyd: add wc_flags to proxy work completions
+	
+	Release 2.1.4 (OFED 3.18 RC1)
+	mpxyd: fix typo in configuration file
+	cma: RR attributes moved to common ib_cm struct
+	mpxyd: tx thread incorrectly sleeps with negative pi_rw_cnt value
+	dat.conf: add entries for True Scale qib device
+	mpxyd: add support for devices without inline data support
+	ucm: long disconnect times with many-to-one applications
+	openib: add inline data support check during device open
+	cleanup ib/cm attribute management across openib providers
+	dapltest: fix -Werror=format-security issue with printf
+	Release 2.1.3 (targeting OFED 3.18)
+	dapl: mpxyd service changes to support multi-thread single-core option
+	dapl: add rdma_write_imm and write only option to dtest
+	ucm: add time wait override capability for CM services
+	common: dapl_ep_free must serialize CM object destroy
+	dtestx: allow scale up to 1000 EP's
+	ucm: RTU not retransmitted in TIMEWAIT state
+	mpxyd: increase max open files for service
+	mpxyd: DTO completion ERR: status 12, op RDMA_WRITE running MPI alltoall test
+	mcm: HST->MXS mode incorrectly signals multiple fragments per WR
+	mcm: add segmentation to HST->MXS mode for improved performance
+	mpxyd: set global seg_sz to 128KB for proxy data service
+	openib: add port_num to provider named attributes
+	mcm: provide CPU family/model attribute on both host and mic sides
+	dtestx: update IB extension example test with new v2.0.9 features
+	dtest: add dtestsrq for SRQ example and provider testing
+	common: add srq support for openib verbs providers
+	openib: add IB UD cm_free/ah_free extension support in UCM provider
+	openib: add new TIMEWAIT state for CM
+	extension: add IB UD extensions to reduce provider CM and AH memory footprint
+	mpxyd/mcm: add provider specific attribute DAT_IB_PROXY_VERSION
+	mpxyd: log warning if running in COMPAT mode
+	add provider and proxy support for GUID across platform
+	common: return appropriate handles with affiliated EP and EVD async events
+	
+	Release 2.1.2 (OFED 3.12-1)
+	mpxyd: add global routing support for proxy connections
+	mcm: only call mix_get_attr if running on MIC
+	openib: modify check for link_layer to handle unspecified
+	dapl: add support for the s390x platform
+	dtest server exchange connection info with client
+	mpxyd: 2 MICs in same numa_node will overlap CPU affinity, don't reset base
+	mcm: implement proxy mix_prov_attr function, add fields CPU model and family
+	mpxyd: tx thread may not be signaled on small segment writes
+	
+	Release 2.1.1 (OFED 3.12-1 RC1)
+	common: add provider name to log messages
+	mpxyd: log warning message if numa_node invalid include debuginfo with build
+	build: include debuginfo with build
+	mpxyd: tx thread doesn't sleep during no pending IO state
+	mpxyd: change MIC cpu_mask to per numa node instead of adapter
+	mpxyd: set to MXS mode if device numa_node is invalid (-1)
+	mpxyd: MXS based alltoall benchmark hangs or returns post_send timeout
+	mpxyd: add IO profile capabilities to help debug alltoall stall cases
+	mpxyd: retry stalled inline post_send, init m_idx only when signaled
+	
+	Release 2.1.0 (OFED 3.12-1, MIC support added)
+	build: add missing NEWS file
+	update autogen.sh
+	add MCM provider and MPXYD service to build
+	mpxyd: service startup script and configuration file
+	add readme for MCM provider and MPXYD service
+	update Copyright dates
+	add new MIC RDMA proxy service daemon (MPXYD)
+	add new dapl MIC provider (MCM) to support MIC RDMA proxy services
+	MCM: new MIC provider and proxy service definitions
+	cleanup build warnings
+	common: add CQ,QP,MR abstractions for new MIC provider and data proxy service
+	openib: cleanup, use inet_ntop for GIDs, remove some logs, destroy pipes on release
+	common: new dapls_evd_cqe_to_event call, cqe to event
+	common: init ring_buffer, assign hd/tl pos in range
+	allow log level changes during device open
+	ucm: fix cm rbuf setup, include grh pad on initialization
+	ucm: remove duplicate async_event code, use common async event call
+	new lightweight open_query/close_query IB extension for fast attribute query
+	dtestcm: add more detailed debug during disconnect phase
+	cma: long delays when opening cma provider with no IPoIB configured
+	common: new debug levels for low system memory, IA stats, and package info
+	build: remove library check for mverbs with --enable-fca
+	IB extension: segfault in create collective group with non-vector type IA handle"
+	build: change configure help to correctly state collective default=none
+
+	Release 2.0.42 fixes (OFED 3.12 GA)
+	dapltest: increase DTO evd size to prevent CQ overflow on limit_rpost test
+	dapltest: RSP limit test fails. Creation of reserved SP moves EP state to DAT_EP_STATE_RESERVED in error cases.
+	dapl: fix string bug in dapls_dto_op_str
+
+	Release 2.0.41 fixes (OFED 3.12 RC1)
+	dapltest: change server port, from 45278 to 62000, out of registered IANA range
+	dat: lower log level on load errors of provider library
+	dat: dat_ia_open should close provider after failure
+	dapltest: set default limit max to 1000
+	openib: add new provider specific attributes
+	dapltest: update scripts for regression testing purposes
+	dapltest: Add final send/recv "sync" for transaction tests.
+
+	Release 2.0.40 fixes (OFED 3.12)
+	dist: ib collective extension include files missing
+	dapltest: the quit command is missing changes for -n option
+	dat.conf: remove v1, add Mellanox Connect-IB and Intel Xeon Phi MIC
+	NULL undefined on Fedora, incorrectly using kernel stddef.h
+
+	Release 2.0.39 fixes (OFED 3.5-2 GA)
+	dapltest: fix endian swap issue with performance test
+	scm: getifaddrs modfications for better out of the box experience
+	ucm, scm: UD mode triggers list_head assert with large scale alltoall test
+
+	Release 2.0.38
+	dapltest: add -n parameter to override default server port number (45278)
+	ucm,scm: UD mode creates many CR objects per EP that needs cleaned up
+	cma: add DAPL_CM_TOS environment variable to enable passing a TOS to the RDMA CM
+
+	Release 2.0.37
+	common: add support for ia name during dat_ia_query
+	common: dapl_os_atomic_inc/dec() not working as expected on ppc64 machines.
+	dapltest: ppc64 endian issue with exchanged mem handle and address
+
+	Release 2.0.36
+	scm: increase ACK timeout to 20 for a default value to match other providers.
+	common: allow qp modify in init state
+	common: check for valid states during ep posting
+	dat.conf: keep list of providers in order for backward compatibility
+	ucm: record and silently drop a duplicate reject CM message
+	windows: new version of getlocalipaddr not portable
+	dapltest: DFLT_QLEN is defined in multiple tests
+
+	Release 2.0.35
+	config/build: remove post/postun hacking used to modify dat.conf
+	config: clean up help option displays with ext-type options
+	windows: Provide auto-detect between RoCE and Infiniband for Windows.
+	ucm: update UD cm provider to support new CM stat and error counters
+	scm: update socket cm provider to support new CM stat and error counters
+	commom: add cm, link, and diag event counters in IB extended builds
+	scm: use ioctl SIOCIFCONF to get complete list of configured netdev interfaces
+	ucm: UD send failures at scale, ucm_send ERR: get_smsg(hd=149,tl=150)
+	scm: fix retry count on connection pending timeout
+	ucm: cleanup debug message, ntohl on p_size is incorrect
+	cma, scm, ucm: allow EP (QP) creation without EVD (CQ)
+	common: add DAPL_DBG_TYPE_CM_STATS (0x40000) to debug log options
+	common: dapls_ep_flush_cq will segfault when no CQ is attached to EP
+	common: ep_create should allow max_request_iov attribute setting of zero
+	common: add check for NULL handle on ext calls, SRQ free, and helper functions
+	common: add missing sub-types to dat_strerror()
+	common: extended CR event processing missing rejects on errors
+	ucm: incorrectly sends user reject during CR callback errors
+	common: change dbg level on CR callback if not listening on SP
+	scm: incorrectly sends user reject during CR callback errors
+	dat: add check for NULL handle on IA calls
+	cma,scm,ucm: extra reference on EP, with RSP, causes dat_ep_free() to hang
+	common: RSP service points incorrectly freed during CR callback
+	common: clean up dat_rsp_create log message
+	common: cleanup debug message on EVD overflows
+	scm: return correct event error code when remote host refuses requests
+	dapltest: server CR EVD is too small for multi-client configurations.
+	Common: CR EVD overflow causes segfault.
+
+	Release 2.0.34
+	scm: change debug message level for listen/bind errors
+	common: increase default IB ack timer from 16 to 20
+	common: remote ia address null pointer creates seg fault
+	common: posting events on full queue returns wrong error code
+	common: dat_ep_modify seg faults with null ep_param ptr
+	common: dat_evd_free seg faults with resized software EVD
+	common: remove assert for incorrect events during cm_request
+	dat: dat_cno_query with NULL cno_handle causes segmentation fault
+	scm: dat_psp_create returns wrong error code on bind/listen failure
+	scm: socket connect request count is reset improperly on retry
+	scm: when hostname has loopback addr assigned, default to eth0 instead of failing
+	scm: add port number to error log during hca_open failures
+	common: query calls return incorrect IA handle to consumer
+	common: srq create asserts with !dapl_llist_is_empty(head) failed
+
+	Release 2.0.33
+	scm,ucm: fix compatibility issues and set minimum protocol support
+	build: link librdmacm dependency to ib_acm usage for ucm and scm providers
+	build: add selective enable/disable-xxx build switch for each provider
+	build: add extended header files to EXTRA_DIST and fix missing backslash
+	build: set IB extended coll-type to none by default
+	common: change errno mapping of EINVAL to DAT_INVALID_PARAMETER
+	build: add IB collective and FCA provider to dapl build package as an option
+	common: add new dapls_evd_post_event_ext call for extended events
+	ucm: add support for IB collective providers
+	scm: add support for IB collective providers
+	cma: add support for IB collective providers
+	common: add supported collective types in named attributes for query
+	common: add collective call mappings via standard dapli_post_ext()
+	common: new debug bitmask definition for extension logging
+	common: new IB collective provider for Mellanox Fabric Collective Agent
+	dat: add definitions for MPI offloaded collectives in IB transport extensions
+	common: cleanup debug messages when building with ibacm feature
+
+	Release 2.0.32 fixes (OFED 1.5.3 GA): 
+
+	cma: reduce output log level in disconnect from WARN to CM_WARN 
+	ucm: delay freeing of active side UD cm object in case RTU is dropped 
+	ucm: cm object needs to be on work queue before req sent on wire 
+	ucm,scm: remove use of usec_sleep delays and use events for disc and destroy 
+	common: reduce default max inline data size because of performance anomaly 
+	common: dapls_evd_dto_wait() dbg message should print status and not errno 
+	ucm, scm: exchange max_qp_rd_atom and limit outstanding requests 
+	scm: retry socket connect on ECONNREFUSED under heavy load 
+	common: qp modify RTR using wrong ep attribute parameter for dest_rd_atomic 
+
+	Release 2.0.31 fixes (OFED 1.5.3 RC1): 
+
+	common: clean up build warning for unused variable event_ptr 
+	scm, ucm: set RAI_NOROUTE flag with rdma_getaddrinfo() call to avoid blocking. 
+	cma: definition for dapl_sp_remove_ep() is missing in cm.c 
+	libdat: static provider entries created for local SR database not freed 
+	libdat: memory leak in static registration during parsing 
+	common: increase default IB inline send threshold to 400 
+	common cq: a mixup of errno and the -1 return from poll in dapls_wait_comp_channel 
+	ucm: release UD cm objects after AH is exchanged to avoid duplicate request drops 
+	ucm: decrease timeout retry count for disconnect requests 
+	ucm: hold lock when sending cm_msgs to sync timer start with packet send 
+	ucm: add debugging to include process id for better scale up debug aids 
+	cma: disconnect can block for excessive times waiting for rdma_cm DREP timeout 
+	ucm: configure the recv channel FD to non-blocking 
+	windows: Missing librdmacm include path for build 
+	debug build: only timestamp if sending to stdout to avoid performance hit 
+	common: print out errors on free build and not just debug builds 
+	cma: fix debug build issue 
+	scm, ucm: MPI spawn test on oversubcribed server taking excessive time to complete 
+	common: add high resolution time stamps and thread id to sdtout debug logs 
+	common: modify debug in dat_evd_dequeue to reduce noise, only output on non-empty 
+	cma: rdma_destroy_id called twice during device open bind error 
+	common: dat_evd_dequeue (poll_cq) fails with invalid parameter after EP (qp) free 
+	ucm: allow configuration of CM burst (signal) threshold on posting 
+	cma: fix debug build 
+	windows: debug version of windows does not build. 
+	Allow DAPL out of band connection models to use ibacm to obtain path record data. 
+	ucm: add missing map file for UCM provider 
+	ibal: delay QP transition during disconnect phase 
+	Revert "ibal: delay QP transition during disconnect phase" 
+	ibal: delay QP transition during disconnect phase 
+	common: restructure EVD processing to handle EP destruction phase 
+	ibal: sync QP destruction and device close 
+	ucm: remove unnecessary debug warning in async callback 
+
+	v1.2 Package:
+
+	Release 1.2.19 fixes (OFED 1.5.2 GA): 
+
+	common, cma: disconnect and cleanup CR linkings after DTO error on EP 
+	common: race conditions with DTO error, disconnect and dapl_reset_ep 
+	common: add new dapl_os_sleep_usec() function 
+	configure: need a false conditional for verbs attr.link_layer member check 
+	config: add conditional check for new verbs port_attr.link_layer 
+	cma, scm: new provider entries for Mellanox RDMA over Ethernet device for uDAPL v1.2 
+	cma: memory leak of verbs CQ and completion channels created during dat_ia_open 
+	cma: memory leak of FD's (pipe) created during dat_evd_create 
+
+
+--- HISTORY -----------
+
+        OFED 1.5.1 RELEASE NOTES
+        uDAPL v1 (1.2.16-1) and v2 (2.0.27-1)
+
+	----------------
+        
+	* New Features (v2 only) - UCM provider with IB UD based CM per process. 
+				   More scalable then rdma_cm (cma) or socket cm (scm). 
+	----------------
+
+	* Bug Fixes
+
+	V2.0 Package
+
+	Release 2.0.27
+	windows: add scm makefile 
+	windows does not require rdma_cma_abi.h, move the include from common code 
+	windows patch to fix IB_INVALID_HANDLE name collision 
+	scm: dat_ep_connect fails on 32bit servers 
+	undefined symbol: dapls_print_cm_list 
+	cleanup CM object lock before freeing CM object memory 
+	destroy verbs completion channels created via ia_open or ep_create. 
+	package: update Copyright file and include the 3 license files in distribution 
+	common: when copying private_data out of rdma_cm events, use the 
+	cma: fix referencing freed address 
+	dapl: move close device after async thread is done 
+
+	Release 2.0.26
+	openib_common: add check for both gid and global routing in RTR
+	openib_common: remote memory read privilege set multi times
+	ucm, scm: DAPL_GLOBAL_ROUTING enabled causes segv
+
+	Release 2.0.25
+	winof scm: initialize opt for NODELAY setsockopt
+	winof cma: windows definition for EADDRNOTAVAIL missing
+	scm: client side setsockopt NODELAY fails if data arrives before setting
+	cma: setup_listener Cannot assign requested address
+	common: seg fault in dapl_evd_wait with multi-thread application using CNO's.
+	ucm: inbound DREQ/DREP handshake should transition QP.
+	winof: Remove duplicate include of comp_channel.cpp from cm.c as it is
+	included in opensm_ucb/device.c.
+
+	Release 2.0.24
+	winof: Utilize WinOF version of inet_ntop() for Windows OSes which do not
+	support inet_ntop().
+	ucm: windows build issue with new CQ completion channel
+	winof: add ucm provider to windows build
+	winof: add missing build files for ibal, scm
+	scm: connection peer resets under heavy load, incorrect event on error
+	ucm: increase default reply and rtu timeout values.
+	ucm: change some debug message levels and add check for valid UD REPLY during retries.
+	ucm: increase timers during subsequent retries
+	ucm, scm: address handles need destroyed when freeing Endpoints with UD QP's.
+	openib_common: ignore pd free errors, clear pd_handle and return.
+	ucm: using UD type QP's, ucm reports wrong reject event when user rejects AH resolution request.
+	ucm, scm, cma: Fix CNO support on DTO type EVD's
+	ucm: fix lock init bug in ucm_cm_find
+	ucm: fix build problem with latest windows ucm changes
+	ucm: The HCA should not be closed until all resources have been released.
+	ucm: Fix build warning when compiling on 32-bit systems.
+	ucm: Trying to deregister the same memory region twice leads to an
+	dat: reduce debug message level when parsing for location of dat.conf
+	ucm: update ucm provider for windows environment
+	ucm: add timer/retry CM logic to the ucm provider
+
+	Release 2.0.23
+	cma: cannot reuse the cm_id and qp for new connection, must reallocate a new one.
+	scm, cma: update DAPL cm protocol revision with latest address/port changes
+	ucm: modify IB address format to align better with sockaddr_in6
+	Add definition for getpid similar to that used by the other dtest apps.
+	WinOF provides a common implementation of gettimeofday that should
+	The completion manager was updated to provide an abstraction that
+	dtestcm: remove IB verb definitions
+	dtest, dtestx: remove IB verb definitions
+	scm: tighten up socket options to insure similiar behavior on Windows and Linux.
+	cma: improve serialization of destroy and event processing
+	scm: improve serialization of destroy and state changes
+	common: no cleanup/release code for timer thread
+	scm, cma: dapli_thread doesn't always get teminated on library close.
+	ucm: tighten up locking with CM processing, state changes
+	ucm: For UD type QP's, return CR p_data with CONN_EST event on passive side.
+	ucm: cleanup extra cr/lf
+	ucm: fix issues with UD QP's.
+	winof: Convert windows version of dapl and dat libaries to use private heaps.
+	dtest, dtestx: modifications for UD QP testing with ucm provider.
+	scm, ucm: UD QP support was broken when porting to common openib code base.
+	cma: cleanup warning with unused local variable, ret, in disconnect
+	cma: remove debug message after rdma_disconnect failure
+	scm: socket errno check needs O/S dependent wrapper
+	dapltest: update script files for WinOF
+	cma: conditional check for new rdma_cm definition.
+
+	Release 2.0.22
+	dapltest: add mdep processor yield and use with dapltest
+	ucm: Add new provider using a DAPL based IB-UD cm mechanism for MPI implementations.
+
+	Release 2.0.21
+	scm: Fix disconnect. QP's need to move to ERROR state in
+	modify dtest.c to cleanup CNO wait code and consolidate into
+	CNO events, once triggered will not be returned during the cno wait.
+	CNO support broken in both CMA and SCM providers.
+	common osd: include winsock2.h for IPv6 definitions.
+	common osd: include w2tcpip.h for sockaddr_in6 definitions.
+	DAPL introduced the concept of directly waiting on the CQ for
+	dapltest: Implement a malloc() threshold for the completion reaping.
+	scm: handle connected state when freeing CM objects
+	scm, dtest: changes for winof gettimeofday and FD_SETSIZE settings.
+	scm: set TCP_NODELAY sockopt on the server side for sends.
+	remove obsolete files in dapl/udapl source tree
+	dtestcm: add UD type QP option to test
+	scm: destroy QP called before disconnect
+	cma: add support for rdma_cm TIME_WAIT event.
+	scm: remove old udapl_scm code replaced by openib_scm.
+	winof: fix issues after consolidating cma, scm code base.
+	cma: lock held when exiting as a result of a rdma_create_event_channel failure.
+	windows: all dlist functions have been moved to the header file.
+	dtestcm windows: add build infrastructure for new dtestcm test suite
+	openib_common: reorganize code base to share common mem, cq, qp, dto functions
+	scm: fixes and optimizations for connection scaling
+	scm: double the default fd_set_size
+	scm: EP reference in CR should be cleared during ep_destroy
+	dtestx: fix conn establishment event checking
+	dtestcm: new test to measure dapl connection rates.
+
+	Release 2.0.20
+	common,scm: add debug capabilities to print in-process CM lists
+	scm: disconnect EP before cleaning up orphaned CR's during dat_ep_free
+	dapltest: windows scripts updated
+	scm: private data is not handled properly via CR rejects.
+	scm: cleanup orphaned UD CR's when destroying the EP
+	scm: provider specific query for default UD MTU is wrong.
+	scm: update CM code to shutdown before closing socket
+	dapltest: windows script dt-cli.bat updated
+	dapl/windows cma provider: add support for network devices based on index
+	openib: remove 1st gen provider, replaced with openib_cma and openib_scm
+	dapltest: update windows script files
+	dapltest: windows batch files in sripts directory
+	windows_osd/linux_osd: new dapl_os_gettid macro to return thread id
+	windows: missing build files for common and udapl sub-directories
+	windows: add build files for openib_scm, remove /Wp64 build option.
+	scm: multi-hca CM processing broken. Need cr thread wakeup mechanism per HCA.
+	dtest: add connection timers on client side
+	linux_osd: use pthread_self instead of getpid for debug messages
+	windows ibal-scm: dapl/dirs file needs updated to remove ibal-scm
+
+	v1.2 Package:
+
+	Release 1.2.16
+	package: update Copyright file and include the 3 license files in distribution 
+	cma: max sge incorrectly decremented during ibv_device_query 
+
+	Release 1.2.15
+	dtest, dapltest: conflict with dapl-2 utils package, change to dapl1, dapltest1
+	scm: fix compiler warning, unused variable
+
+	----------------
+
+	* BKM for running new DAPL library on your cluster without any impact on existing OFED installation:
+
+	Note: example for user /home/user1, (assumes /home/user1 is exported) and MLX4 adapter, port 1
+
+	Download latest 2.x package: http://www.openfabrics.org/downloads/dapl/dapl-2.0.25.tar.gz
+
+	untar in /home/user1 
+	cd /home/user1/dapl-2.0.25
+	./configure && make (build on node with OFED 1.3 or higher installed, dependency on verb/rdma_cm libraries)
+
+	create /home/user1/dat.conf with following 3 lines. (entries with path to new libraries):
+
+	  ofa-v2-ib0 u2.0 nonthreadsafe default /home/user1/dapl-2.0.19/dapl/udapl/.libs/libdaplcma.so.1 dapl.2.0 "ib0 0" ""
+	  ofa-v2-mlx4_0-1 u2.0 nonthreadsafe default /home/user1/dapl-2.0.19/dapl/udapl/.libs/libdaploscm.so.2 dapl.2.0 "mlx4_0 1" ""
+	  ofa-v2-mlx4_0-1u u2.0 nonthreadsafe default /home/user1/dapl-2.0.19/dapl/udapl/.libs/libdaploucm.so.2 dapl.2.0 "mlx4_0 1" ""
+
+	Run uDAPL application or an MPI that uses uDAPL, with (assuming MLX4 connectx adapters) following:
+
+	  setenv DAT_OVERRIDE=/home/user1/dat.conf
+
+	If running Intel MPI and uDAPL socket cm, set the following:
 
-==========
-1.0 BUILD:
-==========
-
-The default build includes a non-debug version of libdat and libdapl-cma uDAPL provider. It will also builds test suites dtest and dapltest and provides manpages for each. This version requires libibverbs and librdmacm installation, IPoIB installation, and IPoIB configuration with an IP address. 
-
-Building :
-----------
-./autogen.sh 
-./configure 
-make
-
-Building debug version:
-----------------------
-./autogen.sh
-./configure --enable-debug
-make
-
-Build example with OFED prefix (x86_64)
----------------------------------------------
-./autogen.sh
-./configure --prefix /usr --sysconf=/etc --libdir /usr/lib64 LDFLAGS=-L/usr/lib64 CPPFLAGS="-I/usr/include"
-make
-
-Installing:
-----------
-make install
-
-NOTE: to link these libraries you must either use libtool and 
-specify the full pathname of the library, or use the `-LLIBDIR' 
-flag during linking and do at least one of the following:
-   - add LIBDIR to the `LD_LIBRARY_PATH' environment variable
-     during execution
-   - add LIBDIR to the `LD_RUN_PATH' environment variable
-     during linking
-   - use the `-Wl,--rpath -Wl,LIBDIR' linker flag
-   - have your system administrator add LIBDIR to `/etc/ld.so.conf'
-
-See any operating system documentation about shared libraries for
-more information, such as the ld(1) and ld.so(8) manual pages.
-
-===================
-2.0 CONFIGURATION:
-===================
-
-/etc/dat.conf 
-
-# DAT v2.0
-#
-# Note: Both API and Provider versions are based on DAT specification, v2.0
-#  
-# Each entry should have the following fields:
-#
-# <ia_name> <api_version> <threadsafety> <default> <lib_path> \
-#           <provider_version> <ia_params> <platform_params>
-#
-# For uDAPL cma provder, <ia_params> is one of the following:
-#       network address, network hostname, or netdev name and 0 for port
-#
-# For uDAPL scm provider, <ia_params> is device name and port
-# For uDAPL ucm provider, <ia_params> is device name and port
-# For uDAPL iWARP provider, <ia_params> is netdev device name and 0 
-# For uDAPL iWARP provider, <ia_params> is netdev device name and 0 
-# For uDAPL RoCE provider, <ia_params> is device name and 0 
-# 
-ofa-v2-mlx4_0-1 u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 "mlx4_0 1" ""
-ofa-v2-mlx4_0-2 u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 "mlx4_0 2" ""
-ofa-v2-ib0 u2.0 nonthreadsafe default libdaplofa.so.2 dapl.2.0 "ib0 0" ""
-ofa-v2-ib1 u2.0 nonthreadsafe default libdaplofa.so.2 dapl.2.0 "ib1 0" ""
-ofa-v2-mthca0-1 u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 "mthca0 1" ""
-ofa-v2-mthca0-2 u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 "mthca0 2" ""
-ofa-v2-ipath0-1 u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 "ipath0 1" ""
-ofa-v2-ipath0-2 u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 "ipath0 2" ""
-ofa-v2-ehca0-2 u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 "ehca0 1" ""
-ofa-v2-iwarp u2.0 nonthreadsafe default libdaplofa.so.2 dapl.2.0 "eth2 0" ""
-ofa-v2-mlx4_0-1u u2.0 nonthreadsafe default libdaploucm.so.2 dapl.2.0 "mlx4_0 1" ""
-ofa-v2-mlx4_0-2u u2.0 nonthreadsafe default libdaploucm.so.2 dapl.2.0 "mlx4_0 2" ""
-ofa-v2-mthca0-1u u2.0 nonthreadsafe default libdaploucm.so.2 dapl.2.0 "mthca0 1" ""
-ofa-v2-mthca0-2u u2.0 nonthreadsafe default libdaploucm.so.2 dapl.2.0 "mthca0 2" ""
-ofa-v2-cma-roe-eth2 u2.0 nonthreadsafe default libdaplofa.so.2 dapl.2.0 "eth2 0" ""
-ofa-v2-cma-roe-eth3 u2.0 nonthreadsafe default libdaplofa.so.2 dapl.2.0 "eth3 0" ""
-ofa-v2-scm-roe-mlx4_0-1 u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 "mlx4_0 1" ""
-ofa-v2-scm-roe-mlx4_0-2 u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 "mlx4_0 2" ""
-ofa-v2-mcm-1 u2.0 nonthreadsafe default libdaplomcm.so.2 dapl.2.0 "mlx4_0 1" ""
-ofa-v2-mcm-2 u2.0 nonthreadsafe default libdaplomcm.so.2 dapl.2.0 "mlx4_0 2" ""
-ofa-v2-scif0 u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 "scif0 1" ""
-ofa-v2-scif0-u u2.0 nonthreadsafe default libdaploucm.so.2 dapl.2.0 "scif0 1" ""
-ofa-v2-mic0 u2.0 nonthreadsafe default libdaplofa.so.2 dapl.2.0 "mic0:ib 1" ""
-ofa-v2-mlx4_0-1s u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 "mlx4_0 1" ""
-ofa-v2-mlx4_0-2s u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 "mlx4_0 2" ""
-ofa-v2-mlx4_1-1s u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 "mlx4_1 1" ""
-ofa-v2-mlx4_1-2s u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 "mlx4_1 2" ""
-ofa-v2-mlx4_1-1u u2.0 nonthreadsafe default libdaploucm.so.2 dapl.2.0 "mlx4_1 1" ""
-ofa-v2-mlx4_1-2u u2.0 nonthreadsafe default libdaploucm.so.2 dapl.2.0 "mlx4_1 2" ""
-ofa-v2-mlx4_0-1m u2.0 nonthreadsafe default libdaplomcm.so.2 dapl.2.0 "mlx4_0 1" ""
-ofa-v2-mlx4_0-2m u2.0 nonthreadsafe default libdaplomcm.so.2 dapl.2.0 "mlx4_0 2" ""
-ofa-v2-mlx4_1-1m u2.0 nonthreadsafe default libdaplomcm.so.2 dapl.2.0 "mlx4_1 1" ""
-ofa-v2-mlx4_1-2m u2.0 nonthreadsafe default libdaplomcm.so.2 dapl.2.0 "mlx4_1 2" ""
-ofa-v2-mlx5_0-1s u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 "mlx5_0 1" ""
-ofa-v2-mlx5_0-2s u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 "mlx5_0 2" ""
-ofa-v2-mlx5_1-1s u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 "mlx5_1 1" ""
-ofa-v2-mlx5_1-2s u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 "mlx5_1 2" ""
-ofa-v2-mlx5_0-1u u2.0 nonthreadsafe default libdaploucm.so.2 dapl.2.0 "mlx5_0 1" ""
-ofa-v2-mlx5_0-2u u2.0 nonthreadsafe default libdaploucm.so.2 dapl.2.0 "mlx5_0 2" ""
-ofa-v2-mlx5_1-1u u2.0 nonthreadsafe default libdaploucm.so.2 dapl.2.0 "mlx5_1 1" ""
-ofa-v2-mlx5_1-2u u2.0 nonthreadsafe default libdaploucm.so.2 dapl.2.0 "mlx5_1 2" ""
-ofa-v2-mlx5_0-1m u2.0 nonthreadsafe default libdaplomcm.so.2 dapl.2.0 "mlx5_0 1" ""
-ofa-v2-mlx5_0-2m u2.0 nonthreadsafe default libdaplomcm.so.2 dapl.2.0 "mlx5_0 2" ""
-ofa-v2-mlx5_1-1m u2.0 nonthreadsafe default libdaplomcm.so.2 dapl.2.0 "mlx5_1 1" ""
-ofa-v2-mlx5_1-2m u2.0 nonthreadsafe default libdaplomcm.so.2 dapl.2.0 "mlx5_1 2" ""
-ofa-v2-qib0-1s u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 "qib0 1" ""
-ofa-v2-qib0-2s u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 "qib0 2" ""
-ofa-v2-qib1-1s u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 "qib1 1" ""
-ofa-v2-qib1-2s u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 "qib1 2" ""
-
-=============================
-3.0 Environment Variables
-=============================
-
- - IB UD options using UCM provider, large scale settings (Xeon)
- 
-export DAPL_UCM_REP_TIME=2000   /*  REQUEST timer, waiting on REPLY, msecs, default = 800 */
-export DAPL_UCM_RTU_TIME=1000   /* REPLY timer, waiting for RTU in msecs, default=400 */
-export DAPL_UCM_RETRY=7       	/* REQUEST & REPLY retries, default = 7 */
-export DAPL_UCM_QP_SIZE=1000	/* CM req/reply work queue size, default = 500 entries */
-export DAPL_UCM_CQ_SIZE=1000	/* CM req/reply completion queue size, default = 500 entries */
-export DAPL_UCM_TX_BURST=100	/* CM signal rate on send messages */
-
- - IB RC options using UCM provider, large scale settings (Xeon)
- 
-export DAPL_MAX_INLINE=64	/*  IB RC inline optimization, best small msg latency, def=64 */
-export DAPL_ACK_RETRY=7         /*  IB RC Ack retry count, default 7 */
-export DAPL_ACK_TIMER=20       	/* IB RC Ack retry timer, default 20 */
-					IB formula:: 5 bits, 4.096us*2^ack_timer. 16== 268ms, 20==4.2s
-- IB RC options using SCM provider
-
-export DAPL_SCM_NETDEV=mic0	/* default is first non-loopback netdev,  use mic0 with KNCs */
-
-Other  IB settings:
-export DAPL_IB_MTU=2048		/* IB MTU size, default = 2048 */
-export DAPL_RNR_TIMER=12	/* 5 bits, 12 =.64ms, 28 =163ms, 31 =491ms */
-export DAPL_RNR_RETRY=7		/* 3 bits, 7 == infinite */
-export DAPL_IB_PKEY= 0		/* override IB partition key, default is pkey index 0 */
-export DAPL_IB_SL=0		/* override IB Sevice level, default = 0 */
-
-- Other options:
-export DAPL_WR_MAX=500 		/* used to reduce max qp depth on all IB providers, default = dev attributes */
-
-Debug logging and Counter settings ( --enable-counters)
-
-export DAPL_DBG_SYS_MEM=10	/* threshold for low sys memory warning, def = 10 percent */
-export DAPL_DBG_TYPE=0x0000003 	/* set log, monitor, and error checking, default = warnings and errors */
-
-DAPL_DBG_TYPE bit settings as follow:
-
-DAPL_DBG_TYPE_ERR          = 0x0001,
-DAPL_DBG_TYPE_WARN         = 0x0002,
-DAPL_DBG_TYPE_EVD          = 0x0004,
-DAPL_DBG_TYPE_CM           = 0x0008,
-DAPL_DBG_TYPE_EP           = 0x0010,
-DAPL_DBG_TYPE_UTIL         = 0x0020,
-DAPL_DBG_TYPE_CALLBACK     = 0x0040,
-DAPL_DBG_TYPE_DTO_COMP_ERR = 0x0080,
-DAPL_DBG_TYPE_API          = 0x0100,
-DAPL_DBG_TYPE_RTN          = 0x0200,
-DAPL_DBG_TYPE_EXCEPTION   = 0x0400,
-DAPL_DBG_TYPE_SRQ         = 0x0800,
-DAPL_DBG_TYPE_CNTR        = 0x1000,
-DAPL_DBG_TYPE_CM_LIST     = 0x2000,
-DAPL_DBG_TYPE_THREAD      = 0x4000,
-DAPL_DBG_TYPE_CM_EST      = 0x8000,
-DAPL_DBG_TYPE_CM_WARN    = 0x10000,
-DAPL_DBG_TYPE_EXTENSION  = 0x20000,
-DAPL_DBG_TYPE_CM_STATS   = 0x40000,
-DAPL_DBG_TYPE_CM_ERRS    = 0x80000,    /* print any cm errors on device close */
-DAPL_DBG_TYPE_LINK_ERRS  = 0x100000,   /* print any link errors on device close */
-DAPL_DBG_TYPE_LINK_WARN  = 0x200000,   /* print any link warning on device close */
-DAPL_DBG_TYPE_DIAG_ERRS  = 0x400000,   /* print any diag_counter errors on dev close */
-DAPL_DBG_TYPE_SYS_WARN   = 0x800000,   /* print low mem warning during alloc, reg_mem */
-DAPL_DBG_TYPE_VER        = 0x1000000,  /* print dapl ver and build date during dev open */
-
-
-=============================
-4.0 Bugs/Known issues
-=============================
-
-mlx5 support included, new dat.conf entries. beta level.
-
-=============================
-5.0 SAMPLE uDAPL APPLICATION:
-=============================
-
-There are 2 sample programs, with manpages, provided with this package.
-
-(dapl/test/dtest/)
-
-NAME
-       dtest - simple uDAPL send/receive and RDMA test
-
-SYNOPSIS
-       dtest [-P provider] [-b buf size] [-B burst count][-v] [-c] [-p] [-d] [-s]
-
-       dtest [-P provider] [-b buf size] [-B burst count][-v] [-c] [-p] [-d] [-h HOSTNAME]
-
-DESCRIPTION
-       dtest  is a simple test used to exercise and verify the uDAPL interfaces.  At least two instantia-
-       tions of the test must be run. One acts as the server and the other the client. The server side of
-       the  test,  once invoked listens for connection requests, until timing out or killed. Upon receipt
-       of a cd connection request, the connection is established, the server and  client  sides  exchange
-       information necessary to perform RDMA writes and reads.
-
-OPTIONS
-       -P=PROVIDER
-              use PROVIDER to specify uDAPL interface using /etc/dat.conf (default OpenIB-cma)
-
-       -b=BUFFER_SIZE
-              use buffer size BUFFER_SIZE for RDMA(default 64)
-
-       -B=BURST_COUNT
-              use busrt count BURST_COUNT for interations (default 10)
-
-       -v, verbose output(default off)
-
-       -c, use consumer notification events (default off)
-
-       -p, use polling (default wait for event)
-
-       -d, delay in seconds before close (default off)
-
-       -s, run as server (default - run as server)
-
-       -h=HOSTNAME
-              use HOSTNAME to specify server hostname or IP address (default - none)
-
-EXAMPLES
-       dtest -P OpenIB-cma -v -s
-            Starts a server process with debug verbosity using provider OpenIB-cma.
-
-       dtest -P OpenIB-cma -h server1-ib0
-
-            Starts a client process, using OpenIB-cma provider to connect to hostname server1-ib0.
-
-SEE ALSO
-       dapltest(1)
-
-AUTHORS
-       Arlin Davis
-              <ardavis at ichips.intel.com>
-
-BUGS
-
-/dapl/test/dapltest/
-
-NAME
-        dapltest - test for the Direct Access Programming Library (DAPL)
-
-DESCRIPTION
-       Dapltest  is  a  set  of tests developed to exercise, characterize, and verify the DAPL interfaces
-       during development and porting.  At least two instantiations of the test must be run. One acts  as
-       the  server, fielding requests and spawning server-side test threads as needed. Other client invo-
-       cations connect to the server and issue test requests. The server side of the test, once  invoked,
-       listens  continuously for client connection requests, until quit or killed. Upon receipt of a con-
-       nection request, the connection is established, the server and client sides swap  version  numbers
-       to  verify that they are able to communicate, and the client sends the test request to the server.
-       If the version numbers match, and the test request is well-formed, the server spawns  the  threads
-       needed to run the test before awaiting further connections.
-
-USAGE
-       dapltest [ -f script_file_name ] [ -T S|Q|T|P|L ] [ -D device_name ] [ -d ] [ -R HT|LL|EC|PM|BE ]
-
-       With  no  arguments,  dapltest runs as a server using default values, and loops accepting requests
-       from clients.
+  	  setenv I_MPI_DEVICE=rdssm:ofa-v2-mlx4_0-1
 
-       The -f option allows all arguments to be placed in a file, to ease test automation.
+	or if running Intel MPI and uDAPL IB UD cm, set the following:
 
-       The following arguments are common to all tests:
+  	  setenv I_MPI_DEVICE=rdssm:ofa-v2-mlx4_0-1u
 
-       [ -T S|Q|T|P|L ]
-              Test function to be performed:
+	or if running Intel MPI and uDAPL rdma_cm, set the following:
 
-              S      - server loop
+	  setenv I_MPI_DEVICE=rdssm:ofa-v2-ib0
 
-              Q      - quit, client requests that server wait for any outstanding tests to complete, then
-                     clean up and exit
+-------------------------
 
-              T      - transaction test, transfers data between client and server
+        OFED 1.4.1 RELEASE NOTES
 
-              P      - performance test, times DTO operations
+        NEW SINCE OFED 1.4 - new versions of uDAPL v1 (1.2.14-1) and v2 (2.0.19-1)
 
-              L      -  limit  test,  exhausts  various  resources, runs in client w/o server interaction
-                     Default: S
+        * New Features - optional counters, must be configured/built with -DDAPL_COUNTERS
 
-      [ -D device_name ]
-              Specifies the interface adapter name as documented in the /etc/dat.conf  static  configura-
-              tion file. This name corresponds to the provider library to open.  Default: none
+        * Bug Fixes
 
-       [ -d ] Enables  extra  debug  verbosity,  primarily tracing of the various DAPL operations as they
-              progress.  Repeating this parameter increases debug spew.  Errors encountered result in the
-              test  spewing some explanatory text and stopping; this flag provides more detail about what
-              lead up to the error.  Default: zero
+	v2 - scm, cma: dat max_lmr_block_size is 32 bit, verbs max_mr_size is 64 bit 
+	v2 - scm, cma: use direct SGE mappings from dat_lmr_triplet to ibv_sge 
+	v2 - dtest: add flush EVD call after data transfer errors 
+	v2 - scm: increase default MTU size from 1024 to 2048 
+	v2 - dapltest: reset server listen ports to avoid collisions during long runs 
+	v2 - dapltest: avoid duplicating ports, increment based on ep/thread count 
+	v2 - dapltest: fix assumptions that multiple EP's will connect in order 
+	v2 - common: sync missing with when removing items off of EVD pending queue 
+	v2 - scm: reduce open time with thread start up 
+	v2 - scm: getsockopt optlen needs initialized to size of optval 
+	v2 - scm: cr_thread cleanup 
+	v2 - OFED and WinOF code sync 
+	v2 - scm: remove unnecessary query gid/lid from connection phase code. 
+	v2 - scm: add optional 64-bit counters, build with -DDAPL_COUNTERS. 
+	v1,v2 - spec files missing Requires(post) statements for sed/coreutils 
+	v1,v2 - dtest/dapltest: use $(top_builddir) for .la files during test builds 
+	v1,v2 - scm: remove unecessary thread when using direct objects 
+	v1,v2 - Fix SuSE 11 build issues, asm/atomic.h no longer exists 
 
-       [ -R BE ]
-              Indicate the quality of service (QoS) desired.  Choices are:
+	* Build Notes:
 
-              HT     - high throughput
+	# NON_DEBUG build/install example for x86_64, OFED targets
+	./configure --prefix /usr --sysconf=/etc --libdir /usr/lib64 LDFLAGS=-L/usr/lib64 CPPFLAGS="-I/usr/include"
+	make install
 
-              LL     - low latency
+	# DEBUG build/install example for x86_64, using OFED targets
+	./configure --enable-debug --prefix /usr --sysconf=/etc --libdir /usr/lib64 LDFLAGS=-L/usr/lib64 CPPFLAGS="-I/usr/include"
+	make install
 
-              EC     - economy (neither HT nor LL)
+	# COUNTERS build/install example for x86_64, using OFED targets
+	./configure --prefix /usr --sysconf=/etc --libdir /usr/lib64 LDFLAGS=-L/usr/lib64 CPPFLAGS="-I/usr/include -DDAPL_COUNTERS"
+	make install
 
-              PM     - premium
+	* BKM for running new DAPL library on your cluster without any impact on existing OFED installation:
 
-              BE     - best effort Default: BE
+	Note: example for user /home/user1, (assumes /home/user1 is exported) and MLX4 adapter, port 1
 
-       Usage - Quit test client
+	Download latest 2.x package: http://www.openfabrics.org/downloads/dapl/dapl-2.0.19.tar.gz
 
-           dapltest [Common_Args] [ -s server_name ]
+	untar in /home/user1 
+	cd /home/user1/dapl-2.0.19
+	./configure && make (build on node with OFED 1.3 or higher installed, dependency on verb/rdma_cm libraries)
 
-           Quit testing (-T Q) connects to the server to ask it to clean up and
-           exit (after it waits for any outstanding test runs to complete).
-           In addition to being more polite than simply killing the server,
-           this test exercises the DAPL object teardown code paths.
-           There is only one argument other than those supported by all tests:
+	create /home/user1/dat.conf with following 2 lines. (entries with path to new libraries):
 
-           -s server_name      Specifies the name of the server interface.
-                               No default.
+	  ofa-v2-ib0 u2.0 nonthreadsafe default /home/user1/dapl-2.0.19/dapl/udapl/.libs/libdaplcma.so.1 dapl.2.0 "ib0 0" ""
+	  ofa-v2-mlx4_0-1 u2.0 nonthreadsafe default /home/user1/dapl-2.0.19/dapl/udapl/.libs/libdaploscm.so.2 dapl.2.0 "mlx4_0 1" ""
 
-       Usage - Transaction test client
+	Run uDAPL application or an MPI that uses uDAPL, with (assuming MLX4 connectx adapters) following:
 
-           dapltest [Common_Args] [ -s server_name ]
-                    [ -t threads ] [ -w endpoints ] [ -i iterations ] [ -Q ]
-                    [ -V ] [ -P ] OPclient OPserver [ op3,
+	  setenv DAT_OVERRIDE=/home/user1/dat.conf
 
-           Transaction testing (-T T) transfers a variable amount of data between
-:
-          client and server.  The data transfer can be described as a sequence of
-           individual operations; that entire sequence is transferred ’iterations’
-           times by each thread over all of its endpoint(s).
+	If running Intel MPI and uDAPL socket cm, set the following:
 
-           The following parameters determine the behavior of the transaction test:
+  	  setenv I_MPI_DEVICE=rdssm:ofa-v2-mlx4_0-1
 
-           -s server_name      Specifies the name or IP address of the server interface.
-                               No default.
+	if running Intel MPI and uDAPL rdma_cm, set the following:
 
-           [ -t threads ]      Specify the number of threads to be used.
-                               Default: 1
+	  setenv I_MPI_DEVICE=rdssm:ofa-v2-ib0
 
-           [ -w endpoints ]    Specify the number of connected endpoints per thread.
-                               Default: 1
+-------------------------
 
-           [ -i iterations ]   Specify the number of times the entire sequence
-                               of data transfers will be made over each endpoint.
-                               Default: 1000
+        OFED 1.4 RELEASE NOTES
 
-           [ -Q ]              Funnel completion events into a CNO.
-                               Default: use EVDs
+        NEW SINCE OFED 1.3.1 - new versions of uDAPL v1 (1.2.12-1) and v2 (2.0.15-1)
 
-           [ -V ]              Validate the data being transferred.
-                               Default: ignore the data
+        * New Features 
 
-           [ -P ]              Turn on DTO completion polling
-                               Default: off
+	1. The new socket CM provider, introduced in 1.2.8 and 2.0.11 packages,
+	assumes homogeneous cluster and will setup the QP's based on local HCA port
+	attributes and exchanges QP information via socket's using the hostname of
+	each node. IPoIB and rdma_cm are NOT required for this provider. QP attributes
+	can be adjusted via the following environment parameters: 
 
-           OP1 OP2 [ OP3, ... ]
-                               A single transaction (OPx) consists of:
-
-                               server|client   Indicates who initiates the
-                                               data transfer.
-
-                               SR|RR|RW        Indicates the type of transfer:
-                                               SR  send/recv
-                                               RR  RDMA read
-                                               RW  RDMA write
-                               Defaults: none
-
-                               [ seg_size [ num_segs ] ]
-:
-
-                                              Indicates the amount and format
-                                               of the data to be transferred.
-                                               Default:  4096  1
-                                                         (i.e., 1 4KB buffer)
-
-                               [ -f ]          For SR transfers only, indicates
-                                               that a client’s send transfer
-                                               completion should be reaped when
-                                               the next recv completion is reaped.
-                                               Sends and receives must be paired
-                                               (one client, one server, and in that
-                                               order) for this option to be used.
-
-           Restrictions:
-
-           Due to the flow control algorithm used by the transaction test, there
-           must be at least one SR OP for both the client and the server.
-
-           Requesting data validation (-V) causes the test to automatically append
-           three OPs to those specified. These additional operations provide
-           synchronization points during each iteration, at which all user-specified
-           transaction buffers are checked. These three appended operations satisfy
-           the "one SR in each direction" requirement.
-
-           The transaction OP list is printed out if -d is supplied.
-
-       Usage - Performance test client
-
-           dapltest [Common_Args] -s server_name [ -m p|b ]
-                    [ -i iterations ] [ -p pipeline ] OP
-
-           Performance testing (-T P) times the transfer of an operation.
-           The operation is posted ’iterations’ times.
-
-           The following parameters determine the behavior of the transaction test:
-
-           -s server_name      Specifies the name or IP address of the server interface.
-                               No default.
-
-           -m b|p              Used to choose either blocking (b) or polling (p)
-                               Default: blocking (b)
-          [ -i iterations ]   Specify the number of times the entire sequence
-                               of data transfers will be made over each endpoint.
-                               Default: 1000
-
-           [ -p pipeline ]     Specify the pipline length, valid arguments are in
-                               the range [0,MAX_SEND_DTOS]. If a value greater than
-                               MAX_SEND_DTOS is requested the value will be
-                               adjusted down to MAX_SEND_DTOS.
-                               Default: MAX_SEND_DTOS
-
-           OP                  Specifies the operation as follow:
-
-                               RR|RW           Indicates the type of transfer:
-                                               RR  RDMA read
-                                               RW  RDMA write
-                                               Defaults: none
-
-                               [ seg_size [ num_segs ] ]
-                                               Indicates the amount and format
-                                               of the data to be transferred.
-                                               Default:  4096  1
-                                                         (i.e., 1 4KB buffer)
-
-       Usage - Limit test client
-
-           Limit testing (-T L) neither requires nor connects to any server
-           instance.  The client runs one or more tests which attempt to
-           exhaust various resources to determine DAPL limits and exercise
-           DAPL error paths.  If no arguments are given, all tests are run.
-
-           Limit testing creates the sequence of DAT objects needed to
-           move data back and forth, attempting to find the limits supported
-           for the DAPL object requested.  For example, if the LMR creation
-           limit is being examined, the test will create a set of
-           {IA, PZ, CNO, EVD, EP} before trying to run dat_lmr_create() to
-           failure using that set of DAPL objects.  The ’width’ parameter
-           can be used to control how many of these parallel DAPL object
-           sets are created before beating upon the requested constructor.
-           Use of -m limits the number of dat_*_create() calls that will
-           be attempted, which can be helpful if the DAPL in use supports
-           essentailly unlimited numbers of some objects.
-           The limit test arguments are:
-
-           [ -m maximum ]      Specify the maximum number of dapl_*_create()
-                               attempts.
-                               Default: run to object creation failure
-
-           [ -w width ]        Specify the number of DAPL object sets to
-                               create while initializing.
-                               Default: 1
-
-           [ limit_ia ]        Attempt to exhaust dat_ia_open()
-
-           [ limit_pz ]        Attempt to exhaust dat_pz_create()
-
-           [ limit_cno ]       Attempt to exhaust dat_cno_create()
-
-           [ limit_evd ]       Attempt to exhaust dat_evd_create()
-
-           [ limit_ep ]        Attempt to exhaust dat_ep_create()
-
-           [ limit_rsp ]       Attempt to exhaust dat_rsp_create()
-
-           [ limit_psp ]       Attempt to exhaust dat_psp_create()
-
-           [ limit_lmr ]       Attempt to exhaust dat_lmr_create(4KB)
-
-           [ limit_rpost ]     Attempt to exhaust dat_ep_post_recv(4KB)
-
-           [ limit_size_lmr ]  Probe maximum size dat_lmr_create()
-
-                               Default: run all tests
-
-EXAMPLES
-       dapltest -T S -d -D OpenIB-cma
-
-                               Starts a server process with debug verbosity.
+	DAPL_ACK_TIMER (default=16 5 bits, 4.096us*2^ack_timer. 16 == 268ms) 
+	DAPL_ACK_RETRY (default=7 3 bits, 7 * 268ms = 1.8 seconds) 
+	DAPL_RNR_TIMER (default=12 5 bits, 12 == 64ms, 28 == 163ms, 31 == 491ms) 
+	DAPL_RNR_RETRY (default=7 3 bits, 7 == infinite) 
+	DAPL_IB_MTU (default=1024 limited to active MTU max) 
+
+	The new socket cm entries in /etc/dat.conf provide a link to the actual HCA
+	device and port. Example v1 and v2 entries for a Mellanox connectx device, port 1: 
+
+	OpenIB-mlx4_0-1 u1.2 nonthreadsafe default libdaplscm.so.1 dapl.1.2 "mlx4_0 1" "" 
+	ofa-v2-mlx4_0-1 u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 "mlx4_0 1" "" 
+
+	This new socket cm provider, was successfully tested on the TATA CRL cluster
+	(#8 on Top500) with Intel MPI, achieving a HPLinpack score of 132.8TFlops on
+	1798 nodes, 14384 cores at ~76.9% of peak. DAPL_ACK_TIMER was increased to 21
+	for this scale. 
+
+	2. New v2 definitions for IB unreliable datagram extension (only supported in
+	scm provider, libdaploscm.so.2) 
+
+	Extended EP dat_service_type, with DAT_IB_SERVICE_TYPE_UD 
+	Add IB extension call dat_ib_post_send_ud(). 
+	Add address handle definition for UD calls. 
+	Add IB event definitions to provide remote AH via connect and connect requests 
+	See dtestx (-d) source for example usage model 
+
+        * Bug Fixes
+
+	v1,v2 - dapltest: trans test moves to cleanup stage before rdma_read processing is complete
+	v1,v2 - Fix static registration (dat.conf) to include sysconfdir override
+	v1,v2 - dat.conf: add default iwarp entry for eth2
+	v1,v2 - dapl: adjust max_rdma_read_iov to 1 for iWARP devices
+	v1,v2 - dtest: reduce default IOV's for ep_create to support iWARP
+	v1,v2 - dtest: fix 32-bit build issues
+	v1,v2 - build: $(DESTDIR) prepend needed on install hooks for dat.conf
+	v2 - scm: UD shares EP;s which requires serialization
+	v2 - dapl: fixes for IB UD extensions in common code and socket cm provider.
+	v2 - dapl: add provider specific attribute query option for IB UD MTU size
+	v2 - dapl build: add correct CFLAGS, set non-debug build by default for v2
+	v2 - dtestx: fix stack corruption problem with hostname strcpy
+	v2 - dapl extension: dapli_post_ext should always allocate cookie for requests.
+	v2 - dapltest: manpage - rdma write example incorrect
+	v1,v2 - dat, dapl, dtest, dapltest, providers: fix compiler warnings in dat common code
+	v1,v2 - dapl cma: debug message during query needs definition for inet_ntoa
+	v1,v2 - dapl scm: fix corner case that delivers duplicate disconnect events
+	v1,v2 - dat: include stddef.h for NULL definition in dat_platform_specific.h
+	v1,v2 - dapl: add debug messages during async and overflow events
+	v1,v2 - dapltest: add check for duplicate disconnect events in transaction test
+	v1,v2 - dapl scm: use correct device attribute for max_rdma_read_out, max_qp_init_rd_atom
+	v1,v2 - dapl scm: change IB RC qp inline and timer defaults.
+	v1,v2 - dapl scm: add mtu adjustments via environment, default = 1024.
+	v1,v2 - dapl scm: change connect and accept to non-blocking to avoid blocking user thread.
+	v1,v2 - dapl scm: update max_rdma_read_iov, max_rdma_write_iov EP attributes during query
+	v1,v2 - dat: allow TYPE_ERR messages to be turned off with DAT_DBG_TYPE
+	v1,v2 - dapl: remove needless terminating 0 in dto_op_str functions.
+	v1,v2 - dat: remove reference to doc/dat.conf in makefile.am
+	v1,v2 - dapl scm: fix ibv_destroy_cq busy error condition during dat_evd_free.
+	v1,v2 - dapl scm: add stdout logging for uname and gethostbyname errors during open.
+	v1,v2 - dapl scm: support global routing and set mtu based on active_mtu
+	v1,v2 - dapl: add opcode to string function to report opcode during failures.
+	v1,v2 - dapl: remove unused iov buffer allocation on the endpoint
+	v1,v2 - dapl: endpoint pending request count is wrong
+	
+-------------------------
+
+        OFED 1.3.1 RELEASE NOTES
+
+        NEW SINCE OFED 1.3 - new versions of uDAPL v1 (1.2.7-1) and v2 (2.0.9-1)
+	
+        * New Features - None
+
+        * Bug Fixes
+	v2 - add private data exchange with reject 
+	v1,v2 - better error reporting in non-debug builds 
+	v1,v2 - update only OFA entries in dat.conf, cooperate with non-ofa providers 
+	v1,v2 - support for zero byte operations, iov==NULL 
+	v1,v2 - multi-transport support for inline data and private data differences 
+	v1,v2 - fix memory leaks and other reported bugs since OFED 1.3 
+	v1,v2 - dtest,dtestx,dapltest build issues on RHEL5.1 
+	v1,v2 - long delay during dat_ia_open when DNS not configured 
+	v1,v2 - use rdma_read_in/out from ep_attr per consumer instead of HCA max 
+        
+-------------------------
+
+        OFED 1.3 RELEASE NOTES
+
+        NEW SINCE OFED 1.2
+
+        * New Features
+
+          1. Add v2.0 library support for new 2.0 API Specification
+          2. Separate v1.2 library release to co-exist with v2.0 libraries.
+          3. New dat.conf with both 1.2 and 2.0 support
+          4. New v2.0 dtestx utilities to test IB extensions
+
+        * Bug Fixes
+
+          v1.2 and v2.0
+           - uDAT: static/dynamic registry parsing fixes 
+           - uDAPL: provider fixes for dat_psp_create_any 
+           - dtest/dapltest: change default provider names to sync with dat.conf
+           - openib_cma: issues with destroy_cm_id and init/resp exchange
+           - dapltest: use gettimeofday instead of get_cycles for better portability
+           - dapltest: endian issue with mem_handle, mem_address
+           - dapltest fix to include inet_ntoa definitions
+           - fix build problems on 32-bit and 64-bit PowerPC 
+           - cleanup packaging
+
+          v2.0
+          - set default config options to match spec file, --enable-debug --enable-ext-type=ib 
+          - use unique devel target names, libdat2.so, /usr/include/dat2
+          - dtestx fix memory leak, freeaddrinfo after getaddrinfo
+          - Fix for IB extended DTO cookie deallocation on inbound rdma_Write_immed
+          - WinOF: Update OFED code base to include WinOF changes, work from same code base
+          - WinOF: add DAT_API definition, __stdcall for windows, nothing for linux
+          - dtest: add dat_evd_query to check correct size
+          - openib_cma: add macro to convert SID to PORT
+          - dtest: endian support for exchanging RMR info
+          - openib_cma: lower default settings, inline and RDMA init/resp
+          - openib_cma: missing ia_query for max_iov_segments_per_rdma_write
+  
+          v1.2
+          - openib_cma: turn down dbg noise level on rejects
+          - dtest: typo in memset
+  
+
+        BUILD: v1 and v2 uDAPL source install/build instructions (redhat example):
+
+        # cd to distribution SRPMS directory
+	cd /tmp/OFED-1.3/SRPMS
+        rpm -i dapl-1.2*.rpm
+        rpm -i dapl-2.0*.rpm
+        cd /usr/src/redhat/SOURCES
+        tar zxf dapl-1.2*.tgz
+        tar zxf dapl-2.0*.tgz
+        
+	# NON_DEBUG build example for x86_64, using OFED targets
+
+	./configure --prefix /usr --sysconf=/etc --libdir /usr/lib64 
+        LDFLAGS=-L/usr/lib64 CPPFLAGS="-I/usr/include"
+
+	# build and install 
+
+	make
+	make install
+
+	# DEBUG build example for x86_64, using OFED targets
+
+	./configure --enable-debug --prefix /usr --sysconf=/etc --libdir /usr/lib64 
+        LDFLAGS=-L/usr/lib64 CPPFLAGS="-I/usr/include"
+
+	# build and install 
+
+	make
+	make install
+
+	# DEBUG messages: set environment variable DAPL_DBG_TYPE, default
+	  mapping is 0x0003
+
+	DAPL_DBG_TYPE_ERR       = 0x0001,
+	DAPL_DBG_TYPE_WARN      = 0x0002,
+	DAPL_DBG_TYPE_EVD       = 0x0004,
+	DAPL_DBG_TYPE_CM        = 0x0008,
+	DAPL_DBG_TYPE_EP        = 0x0010,
+	DAPL_DBG_TYPE_UTIL      = 0x0020,
+	DAPL_DBG_TYPE_CALLBACK  = 0x0040,
+	DAPL_DBG_TYPE_DTO_COMP_ERR= 0x0080,
+	DAPL_DBG_TYPE_API       = 0x0100,
+	DAPL_DBG_TYPE_RTN       = 0x0200,
+	DAPL_DBG_TYPE_EXCEPTION = 0x0400,
+	DAPL_DBG_TYPE_SRQ       = 0x0800,
+	DAPL_DBG_TYPE_CNTR      = 0x1000
+
+-------------------------
+
+        OFED 1.2 RELEASE NOTES
+
+        NEW SINCE Gamma 3.2 and OFED 1.1
+
+        * New Features
+
+          1. Added dtest and dapltest to the openfabrics build and utils rpm. 
+             Includes manpages.
+          2. Added following enviroment variables to configure connection management
+             timers (default settings) for larger clusters:
+
+             DAPL_CM_ARP_TIMEOUT_MS      4000
+             DAPL_CM_ARP_RETRY_COUNT       15
+             DAPL_CM_ROUTE_TIMEOUT_MS    4000
+             DAPL_CM_ROUTE_RETRY_COUNT     15
+            
+        * Bug Fixes
+
+          + Added support for new ib verbs client register event. No extra 
+            processing required at the uDAPL level.
+          + Fix some issues supporting create qp without recv cq handle or 
+            recv qp resources. IB verbs assume a recv_cq handle and uDAPL 
+            dapl_ep_create assumes there is always recv_sge resources specified.
+          + Fix some timeout and long disconnect delay issues discovered during 
+            scale-out testing. Added support to retry rdma_cm address and route 
+            resolution with configuration options. Provide a disconnect call
+            when receiving the disconnect request to guarantee a disconnect reply 
+            and event on the remote side. The rdma_disconnect was not being called 
+            from dat_ep_disconnect() as a result of the state changing
+            to DISCONNECTED in the event callback.
+          + Changes to support exchanging and validation of the device 
+            responder_resources and the initiator_depth during conn establishment
+          + Fix some build issues with dapltest on 32 bit arch, and on ia64 SUSE arch
+          + Add support for multiple IB devices to dat.conf to support IPoIB HA failover
+          + Fix atomic operation build problem with ia64 and RHEL5.
+          + Add support to return local and remote port information with dat_ep_query
+          + Cleanup RPM specfile for the dapl package, move to 1.2-1 release.
+
+        NEW SINCE Gamma 3.1 and OFED 1.0
+ 
+        * BUG FIXES
+
+	  + Update obsolete CLK_TCK to CLOCKS_PER_SEC
+ 	  + Fill out some unitialized fields in the ia_attr structure returned by
+	  dat_ia_query().
+        + Update dtest to support multiple segments on rdma write and change
+	  makefile to use OpenIB-cma by default.
+        + Add support for dat_evd_set_unwaitable on a DTO evd in openib_cma
+	  provider
+        + Added errno reporting (message and return codes) during open to help
+	  diagnose create thread issues.
+        + Fix some suspicious inline assembly  EIEIO_ON_SMP and ISYNC_ON_SMP 
+        + Fix IA64 build problems 
+        + Lower the reject debug message level so we don't see warnings when
+	  consumers reject.
+        + Added support for active side TIMED_OUT event from a provider.
+        + Fix bug in dapls_ib_get_dat_event() call after adding new unreachable
+	  event.
+        + Update for new rdma_create_id() function signature.
+        + Set max rdma read per EP attributes
+        + Report the proper error and timeout events.
+        + Socket CM fix to guard against using a loopback address as the local
+	  device address.
+        + Use the uCM set_option feature to adjust connect request timeout
+	  retry values. 
+        + Fix to disallow any event after a disconnect event.
+
+	* OFED 1.1 uDAPL source build instructions:
+
+	cd /usr/local/ofed/src/openib-1.1/src/userspace/dapl
+
+	# NON_DEBUG build configuration
+
+	./configure --disable-libcheck --prefix /usr/local/ofed 
+	--libdir /usr/local/ofed/lib64 LDFLAGS=-L/usr/local/ofed/lib64	
+	CPPFLAGS="-I../libibverbs/include -I../librdmacm/include"
+
+	# build and install 
+
+	make
+	make install
+
+	# DEBUG build configuration
+
+	./configure --disable-libcheck --enable-debug --prefix /usr/local/ofed 	
+	--libdir /usr/local/ofed/lib64 LDFLAGS=-L/usr/local/ofed/lib64
+	CPPFLAGS="-I../libibverbs/include -I../librdmacm/include"
+
+	# build and install 
+
+	make
+	make install
+
+	# DEBUG messages: set environment variable DAPL_DBG_TYPE, default
+	  mapping is 0x0003
+
+	DAPL_DBG_TYPE_ERR       = 0x0001,
+	DAPL_DBG_TYPE_WARN      = 0x0002,
+	DAPL_DBG_TYPE_EVD       = 0x0004,
+	DAPL_DBG_TYPE_CM        = 0x0008,
+	DAPL_DBG_TYPE_EP        = 0x0010,
+	DAPL_DBG_TYPE_UTIL      = 0x0020,
+	DAPL_DBG_TYPE_CALLBACK  = 0x0040,
+	DAPL_DBG_TYPE_DTO_COMP_ERR= 0x0080,
+	DAPL_DBG_TYPE_API       = 0x0100,
+	DAPL_DBG_TYPE_RTN       = 0x0200,
+	DAPL_DBG_TYPE_EXCEPTION = 0x0400,
+	DAPL_DBG_TYPE_SRQ       = 0x0800,
+	DAPL_DBG_TYPE_CNTR      = 0x1000
+
+
+	Note: The udapl provider library libdaplscm.so is untested and 
+	unsupported, thus customers should not use it.
+	It will be removed in the next OFED release. 
+	
+        DAPL GAMMA 3.1 RELEASE NOTES
+
+        This release of the DAPL reference implementation 
+        is timed to coincide with the first release of the 
+        Open Fabrics (www.openfabrics.org) software stack.
+        This release adds support for this new stack, which 
+        is now the native Linux RDMA stack.
+        
+        This release also adds a new licensing option. In 
+        addition to the Common Public License and BSD License,
+	  the code can now be licensed under the terms of the GNU 
+        General Public License (GPL) version 2.
+
+        NEW SINCE Gamma 3.0
+
+        - GPL v2 added as a licensing option
+        - OpenFabrics (aka OpenIB) gen2 verbs support
+        - dapltest support for Solaris 10
+
+        * BUG FIXES
+
+        + Fixed a disconnect event processing race
+        + Fix to destroy all QPs on IA close
+        + Removed compiler warnings
+        + Removed unused variables
+        + And many more...
+
+        DAPL GAMMA 3.0 RELEASE NOTES
+
+        This is the first release based on version 1.2 of the spec. There 
+        are some components, such a shared receive queues (SRQs), which 
+        are not implemented yet. 
+
+        Once again there were numerous bug fixes submitted by the 
+        DAPL community.
+
+        NEW SINCE Beta 2.06
+
+        - DAT 1.2 headers
+        - DAT_IA_HANDLEs implemented as small integers
+	- Changed default device name to be "ia0a"
+        - Initial support for Linux 2.6.X kernels
+        - Updates to the OpenIB gen 1 provider 
+
+        * BUG FIXES
+
+        + Updated Makefile for differentiation between OS releases. 
+        + Updated atomic routines to use appropriate API
+        + Removed unnecessary assert from atomic_dec. 
+        + Fixed bugs when freeing a PSP.
+        + Fixed error codes returned by the DAT static registry.
+        + Kernel updates for dat_strerror.
+        + Cleaned up the transport layer/adapter interface to use DAPL 
+          types rather than transport types.
+        + Fixed ring buffer reallocation.
+        + Removed old test/udapl/dapltest directory.
+        + Fixed DAT_IA_HANDLE translation (from pointer to int and 
+          vice versa) on 64-bit platforms.
+
+	DAP BETA 2.06 RELEASE NOTES
+
+	We are not planning any further releases of the Beta series,
+	which are based  on the 1.1 version of the spec. There may be
+	further releases for bug fixes, but we anticipate the DAPL
+	community to move to the new 1.2 version of the spec and the
+	changes mandated in the reference implementation.
+
+	The biggest item in this release is the first inclusion of the
+	OpenIB Gen 1 provider, an item generating a lot of interest in
+	the IB community. This implementation has graciously been
+	provided by the Mellanox team. The kdapl implementation is in
+	progress, and we imagine work will soon begin on Gen 2.
+
+	There are also a handful of bug fixes available, as well as a long
+	awaited update to the endpoint design document.
+
+	NEW SINCE Beta 2.05
+
+	- OpenIB gen 1 provider support has been added
+	- Added dapls_evd_post_generic_event(), routine to post generic 
+	  event types as requested by some providers. Also cleaned up 
+	  error reporting.
+	- Updated the endpoint design document in the doc/ directory.
+
+	* BUG FIXES
+
+	+ Cleaned up memory leak on close by freeing the HCA structure;
+	+ Removed bogus #defs for rdtsc calls on IA64.
+	+ Changed daptest thread types to use internal types for 
+	  portability & correctness
+	+ Various 64 bit enhancements & updates
+	+ Fixes to conformance test that were defining CONN_QUAL twice
+	  and using it in different ways
+	+ Cleaned up private data handling in ep_connect & provider 
+	  support: we now avoid extra copy in connect code; reduced
+	  stack requirements by using private_data structure in the EP;
+	  removed provider variable.
+	+ Fixed problem in the dat conformance test where cno_wait would
+	  attempt to dereference a timer value and SEGV.
+	+ Removed old vestiges of depricated POLLING_COMPLETIONS 
+	  conditionals.
+
+	DAPL BETA 2.05 RELEASE NOTES
+
+	This was to be a very minor release, the primary change was
+	going to be the new wording of the DAT license as contained in
+	the header for all source files. But the interest and
+	development occurring in DAPL provided some extra bug fixes, and
+	some new functionality that has been requested for a while.
+
+	First, you may notice that every single source file was
+	changed. If you read the release notes from DAPL BETA 2.04, you
+	were warned this would happen. There was a legal issue with the
+	wording in the header, the end result was that every source file
+	was required to change the word 'either of' to 'both'. We've
+	been putting this change off as long as possible, but we wanted
+	to do it in a clean drop before we start working on DAT 1.2
+	changes in the reference implementation, just to keep things
+	reasonably sane.
+
+	kdapltest has enabled three of the subtests supported by
+	dapltest. The Performance test in particular has been very
+	useful to dapltest in getting minima and maxima. The Limit test
+	pushes the limits by allocating the maximum number of specific
+	resources. And the FFT tests are also available.
+
+	Most vendors have supported shared memory regions for a while,
+	several of which have asked the reference implementation team to
+	provide a common implementation. Shared memory registration has
+	been tested on ibapi, and compiled into vapi. Both InfiniBand
+	providers have the restriction that a memory region must be
+	created before it can be shared; not all RDMA APIs are this way,
+	several allow you to declare a memory region shared when it is
+	registered. Hence, details of the implementation are hidden in
+	the provider layer, rather than forcing other APIs to do
+	something strange.
+
+	This release also contains some changes that will allow dapl to
+	work on Opteron processors, as well as some preliminary support
+	for Power PC architecture. These features are not well tested
+	and may be incomplete at this time.
+
+	Finally, we have been asked several times over the course of the
+	project for a canonical interface between the common and
+	provider layers. This release includes a dummy provider to meet
+	that need. Anyone should be able to download the release and do
+	a:
+	   make VERBS=DUMMY
+
+	And have a cleanly compiled dapl library. This will be useful
+	both to those porting new transport providers, as well as those
+	going to new machines.
+
+	The DUMMY provider has been compiled on both Linux and Windows
+	machines.
+
+
+	NEW SINCE Beta 2.4
+	- kdapltest enhancements:
+	  * Limit subtests now work
+	  * Performance subtests now work.
+	  * FFT tests now work.
+
+	- The VAPI headers have been refreshed by Mellanox
+
+	- Initial Opteron and PPC support.
+
+	- Atomic data types now have consistent treatment, allowing us to
+	  use native data types other than integers. The Linux kdapl
+	  uses atomic_t, allowing dapl to use the kernel macros and
+	  eliminate the assembly code in dapl_osd.h
+
+	- The license language was updated per the direction of the
+	  DAT Collaborative. This two word change affected the header
+	  of every file in the tree.
+
+	- SHARED memory regions are now supported.
+
+	- Initial support for the TOPSPIN provider.
+
+	- Added a dummy provider, essentially the NULL provider. It's
+	  purpose is to aid in porting and to clarify exactly what is
+	  expected in a provider implementation.
+
+	- Removed memory allocation from the DTO path for VAPI
+
+	- cq_resize will now allow the CQ to be resized smaller. Not all
+	  providers support this, but it's a provider problem, not a
+	  limitation of the common code.
+
+	* BUG FIXES
+
+	+ Removed spurious lock in dapl_evd_connection_callb.c that
+	  would have caused a deadlock.
+	+ The Async EVD was getting torn down too early, potentially
+	  causing lost errors. Has been moved later in the teardown
+	  process.
+	+ kDAPL replaced mem_map_reserve() with newer SetPageReserved()
+	  for better Linux integration.
+	+ kdapltest no longer allocate large print buffers on the stack,
+	  is more careful to ensure buffers don't overflow.
+	+ Put dapl_os_dbg_print() under DAPL_DBG conditional, it is
+	  supposed to go away in a production build. 
+	+ dapltest protocol version has been bumped to reflect the
+	  change in the Service ID.
+	+ Corrected several instances of routines that did not adhere
+	  to the DAT 1.1 error code scheme.
+	+ Cleaned up vapi ib_reject_connection to pass DAT types rather
+	  than provider specific types. Also cleaned up naming interface
+	  declarations and their use in vapi_cm.c; fixed incorrect
+	  #ifdef for naming.  
+	+ Initialize missing uDAPL provider attr, pz_support.
+	+ Changes for better layering: first, moved
+	  dapl_lmr_convert_privileges to the provider layer as memory
+	  permissions are clearly transport specific and are not always
+	  defined in an integer bitfield; removed common routines for
+	  lmr and rmr. Second, move init and release setup/teardown
+	  routines into adapter_util.h, which defined the provider
+	  interface.
+	+ Cleaned up the HCA name cruft that allowed different types
+	  of names such as strings or ints to be dealt with in common
+	  code; but all names are presented by the dat_registry as
+	  strings, so pushed conversions down to the provider
+	  level. Greatly simplifies names.
+	+ Changed deprecated true/false to DAT_TRUE/DAT_FALSE.
+	+ Removed old IB_HCA_NAME type in favor of char *.
+	+ Fixed race condition in kdapltest's use of dat_evd_dequeue. 
+	+ Changed cast for SERVER_PORT_NUMBER to DAT_CONN_QUAL as it
+	  should be. 
+	+ Small code reorg to put the CNO into the EVD when it is
+	  allocated, which simplifies things. 
+	+ Removed gratuitous ib_hca_port_t and ib_send_op_type_t types,
+	  replaced with standard int.
+	+ Pass a pointer to cqe debug routine, not a structure. Some
+	  clean up of data types.
+	+ kdapl threads now invoke reparent_to_init() on exit to allow
+	  threads to get cleaned up.
+
+
+
+	DAPL BETA 2.04 RELEASE NOTES
+
+	The big changes for this release involve a more strict adherence
+	to the original dapl architecture. Originally, only InfiniBand
+	providers were available, so allowing various data types and
+	event codes to show through into common code wasn't a big deal.
+
+	But today, there are an increasing number of providers available
+	on a number of transports. Requiring an IP iWarp provider to
+	match up to InfiniBand events is silly, for example.
+
+	Restructuring the code allows more flexibility in providing an
+	implementation.
+
+	There are also a large number of bug fixes available in this
+	release, particularly in kdapl related code.
+
+	Be warned that the next release will change every file in the
+	tree as we move to the newly approved DAT license. This is a
+	small change, but all files are affected.
+
+	Future releases will also support to the soon to be ratified DAT
+	1.2 specification.
+
+	This release has benefited from many bug reports and fixes from
+	a number of individuals and companies. On behalf of the DAPL
+	community, thank you!
+
+
+	NEW SINCE Beta 2.3
+
+	- Made several changes to be more rigorous on the layering
+	  design of dapl. The intent is to make it easier for non
+	  InfiniBand transports to use dapl. These changes include:
+	  
+	  * Revamped the ib_hca_open/close code to use an hca_ptr
+	    rather than an ib_handle, giving the transport layer more
+	    flexibility in assigning transport handles and resources.
+
+	  * Removed the CQD calls, they are specific to the IBM API;
+	    folded this functionality into the provider open/close calls.
+
+	  * Moved VAPI, IBAPI transport specific items into a transport
+	    structure placed inside of the HCA structure. Also updated
+	    routines using these fields to use the new location. Cleaned
+	    up provider knobs that have been exposed for too long.
+
+	  * Changed a number of provider routines to use DAPL structure
+	    pointers rather than exposing provider handles & values. Moved
+	    provider specific items out of common code, including provider
+	    data types (e.g. ib_uint32_t).
+
+	  * Pushed provider completion codes and type back into the
+            provider layer. We no longer use EVD or CM completion types at
+            the common layer, instead we obtain the appropriate DAT type
+            from the provider and process only DAT types.
+
+	  * Change private_data handling such that we can now accommodate
+            variable length private data.
+
+	- Remove DAT 1.0 cruft from the DAT header files.
+
+	- Better spec compliance in headers and various routines.
+
+	- Major updates to the VAPI implementation from
+          Mellanox. Includes initial kdapl implementation
+
+	- Move kdapl platform specific support for hash routines into
+          OSD file.
+
+	- Cleanups to make the code more readable, including comments
+          and certain variable and structure names.
+
+	- Fixed CM_BUSTED code so that it works again: very useful for
+          new dapl ports where infrastructure is lacking. Also made
+	  some fixes for IBHOSTS_NAMING conditional code.
+
+	- Added DAPL_MERGE_CM_DTO as a compile time switch to support
+	  EVD stream merging of CM and DTO events. Default is off.
+
+	- 'Quit' test ported to kdapltest
+
+	- uDAPL now builds on Linux 2.6 platform (SuSE 9.1).
+
+	- kDAPL now builds for a larger range of Linux kernels, but
+          still lacks 2.6 support.
+
+	- Added shared memory ID to LMR structure. Shared memory is
+          still not fully supported in the reference implementation, but
+          the common code will appear soon.
+
+	* Bug fixes
+	  - Various Makefiles fixed to use the correct dat registry
+	    library in its new location (as of Beta 2.03)
+	  - Simple reorg of dat headers files to be consistent with
+	    the spec.
+	  - fixed bug in vapi_dto.h recv macro where we could have an
+	    uninitialized pointer.
+	  - Simple fix in dat_dr.c to initialize a variable early in the
+	    routine before errors occur.
+	  - Removed private data pointers from a CONNECTED event, as
+	    there should be no private data here.
+	  - dat_strerror no longer returns an uninitialized pointer if
+	    the error code is not recognized.
+	  - dat_dup_connect() will reject 0 timeout values, per the
+	    spec.
+	  - Removed unused internal_hca_names parameter from
+	    ib_enum_hcas() interface. 
+	  - Use a temporary DAT_EVENT for kdapl up-calls rather than
+	    making assumptions about the current event queue.
+	  - Relocated some platform dependent code to an OSD file.
+	  - Eliminated several #ifdefs in .c files.
+	  - Inserted a missing unlock() on an error path.
+	  - Added bounds checking on size of private data to make sure
+	    we don't overrun the buffer
+	  - Fixed a kdapltest problem that caused a machine to panic if
+	    the user hit ^C
+	  - kdapltest now uses spin locks more appropriate for their
+	    context, e.g. spin_lock_bh or spin_lock_irq. Under a
+	    conditional. 
+	  - Fixed kdapltest loops that drain EVDs so they don't go into
+	    endless loops.
+	  - Fixed bug in dapl_llist_add_entry link list code.
+	  - Better error reporting from provider code.
+	  - Handle case of user trying to reap DTO completions on an
+	    EP that has been freed.
+	  - No longer hold lock when ep_free() calls into provider layer
+	  - Fixed cr_accept() to not have an extra copy of
+	    private_data. 
+	  - Verify private_data pointers before using them, avoid
+	    panic. 
+	  - Fixed memory leak in kdapltest where print buffers were not
+	    getting reclaimed.
+
+
+
+	DAPL BETA 2.03 RELEASE NOTES
+
+	There are some  prominent features in this release:
+	1) dapltest/kdapltest. The dapltest test program has been
+	   rearchitected such that a kernel version is now available
+	   to test with kdapl. The most obvious change is a new
+	   directory structure that more closely matches other core
+	   dapl software. But there are a large number of changes
+	   throughout the source files to accommodate both the
+	   differences in udapl/kdapl interfaces, but also more mundane
+	   things such as printing.
+
+	   The new dapltest is in the tree at ./test/dapltest, while the
+	   old remains at ./test/udapl/dapltest. For this release, we
+	   have maintained both versions. In a future release, perhaps
+	   the next release, the old dapltest directory will be
+	   removed. Ongoing development will only occur in the new tree.
+
+	2) DAT 1.1 compliance. The DAT Collaborative has been busy
+	   finalizing the 1.1 revision of the spec. The header files
+	   have been reviewed and posted on the DAT Collaborative web
+	   site, they are now in full compliance.
+
+	   The reference implementation has been at a 1.1 level for a
+	   while. The current implementation has some features that will
+	   be part of the 1.2 DAT specification, but only in places
+	   where full compatibility can be maintained.
+
+	3) The DAT Registry has undergone some positive changes for
+           robustness and support of more platforms. It now has the
+           ability to support several identical provider names
+           simultaneously, which enables the same dat.conf file to
+           support multiple platforms. The registry will open each
+           library and return when successful. For example, a dat.conf
+           file may contain multiple provider names for ex0a, each
+           pointing to a different library that may represent different
+           platforms or vendors. This simplifies distribution into
+           different environments by enabling the use of common
+           dat.conf files.
+
+	In addition, there are a large number of bug fixes throughout
+	the code. Bug reports and fixes have come from a number of
+	companies.
+
+	Also note that the Release notes are cleaned up, no longer
+	containing the complete text of previous releases.
+
+	* EVDs no longer support DTO and CONNECTION event types on the
+          same EVD. NOTE: The problem is maintaining the event ordering
+          between two channels such that no DTO completes before a
+          connection is received; and no DTO completes after a
+          disconnect is received. For 90% of the cases this can be made
+          to work, but the remaining 10% will cause serious performance
+          degradation to get right.
+
+	NEW SINCE Beta 2.2
+
+	* DAT 1.1 spec compliance. This includes some new types, error
+          codes, and moving structures around in the header files,
+          among other things. Note the Class bits of dat_error.h have
+	  returned to a #define (from an enum) to cover the broadest
+	  range of platforms.
+
+	* Several additions for robustness, including handle and
+          pointer checking, better argument checking, state
+          verification, etc. Better recovery from error conditions,
+	  and some assert()s have been replaced with 'if' statements to
+          handle the error.
+
+	* EVDs now maintain the actual queue length, rather than the
+	  requested amount. Both the DAT spec and IB (and other
+	  transports) allow the underlying implementation to provide
+	  more CQ entries than requested.
+
+	  Requests for the same number of entries contained by an EVD
+	  return immediate success.
+
+	* kDAPL enhancements:
+	  - module parameters & OS support calls updated to work with
+            more recent Linux kernels.
+	  - kDAPL build options changes to match the Linux kernel, vastly
+	    reducing the size and making it more robust.
+	  - kDAPL unload now works properly
+	  - kDAPL takes a reference on the provider driver when it
+	    obtains a verbs vector, to prevent an accidental unload
+	  - Cleaned out all of the uDAPL cruft from the linux/osd files.
+
+	* New dapltest (see above).
+
+	* Added a new I/O trace facility, enabling a developer to debug
+          all I/O that are in progress or recently completed. Default
+          is OFF in the build.
+
+	* 0 timeout connections now refused, per the spec.
+
+	* Moved the remaining uDAPL specific files from the common/
+          directory to udapl/. Also removed udapl files from the kdapl
+	  build.
+
+	* Bug fixes
+	  - Better error reporting from provider layer  
+	  - Fixed race condition on reference counts for posting DTO
+	    ops.
+	  - Use DAT_COMPLETION_SUPPRESS_FLAG to suppress successful
+	    completion of dapl_rmr_bind  (instead of
+	    DAT_COMPLEITON_UNSIGNALLED, which is for non-notification
+	    completion). 
+	  - Verify psp_flags value per the spec
+	  - Bug in psp_create_any() checking psp_flags fixed
+	  - Fixed type of flags in ib_disconnect from
+	    DAT_COMPLETION_FLAGS to DAT_CLOSE_FLAGS
+	  - Removed hard coded check for ASYNC_EVD. Placed all EVD
+	    prevention in evd_stream_merging_supported array, and
+	    prevent ASYNC_EVD from being created by an app.
+	  - ep_free() fixed to comply with the spec
+	  - Replaced various printfs with dbg_log statements
+	  - Fixed kDAPL interaction with the Linux kernel
+	  - Corrected phy_register protottype
+	  - Corrected kDAPL wait/wakeup synchronization
+	  - Fixed kDAPL evd_kcreate() such that it no longer depends
+	    on uDAPL only code.
+	  - dapl_provider.h had wrong guard #def: changed DAT_PROVIDER_H
+	    to DAPL_PROVIDER_H
+	  - removed extra (and bogus) call to dapls_ib_completion_notify()
+	    in evd_kcreate.c
+	  - Inserted missing error code assignment in
+	    dapls_rbuf_realloc() 
+	  - When a CONNECTED event arrives, make sure we are ready for
+	    it, else something bad may have happened to the EP and we
+	    just return; this replaces an explicit check for a single
+	    error condition, replacing it with the general check for the
+	    state capable of dealing with the request.
+	  - Better context pointer verification. Removed locks around
+	    call to ib_disconnect on an error path, which would result
+	    in a deadlock. Added code for BROKEN events.
+	  - Brought the vapi code more up to date: added conditional
+	    compile switches, removed obsolete __ActivePort, deal
+	    with 0 length DTO
+	  - Several dapltest fixes to bring the code up to the 1.1
+	    specification.
+	  - Fixed mismatched dalp_os_dbg_print() #else dapl_Dbg_Print();
+	    the latter was replaced with the former.
+	  - ep_state_subtype() now includes UNCONNECTED.
+	  - Added some missing ibapi error codes.
+ 
 
-       dapltest -T T -d -s host1-ib0 -D OpenIB-cma -i 100 client SR 4096 2 server SR 4096 2
 
-                               Runs a transaction test, with both sides
-                               sending one buffer with two 4KB segments,
-                              one hundred times.
+	NEW SINCE Beta 2.1
+
+	* Changes for Erratta and 1.1 Spec
+	  - Removed DAT_NAME_NOT_FOUND, per DAT erratta
+	  - EVD's with DTO and CONNECTION flags set no longer valid.
+	  - Removed DAT_IS_SUCCESS macro
+	  - Moved provider attribute structures from vendor files to udat.h
+	    and kdat.h
+	  - kdapl UPCALL_OBJECT now passed by reference
+
+	* Completed dat_strerr return strings
+
+	* Now support interrupted system calls
+
+	* dapltest now used dat_strerror for error reporting.
+
+	* Large number of files were formatted to meet project standard,
+	  very cosmetic changes but improves readability and
+	  maintainability.  Also cleaned up a number of comments during
+	  this effort.
+
+	* dat_registry and RPM file changes (contributed by Steffen Persvold):
+	  - Renamed the RPM name of the registry to be dat-registry 
+	    (renamed the .spec file too, some cvs add/remove needed)
+	  - Added the ability to create RPMs as normal user (using 
+	    temporal paths), works on SuSE, Fedora, and RedHat.
+	  - 'make rpm' now works even if you didn't build first.
+	  - Changed to using the GNU __attribute__((constructor)) and
+	    __attribute__((destructor)) on the dat_init functions, dat_init
+	    and dat_fini. The old -init and -fini options to LD makes 
+	    applications crash on some platforms (Fedora for example).
+	  - Added support for 64 bit platforms.
+	  - Added code to allow multiple provider names in the registry,
+	    primarily to support ia32 and ia64 libraries simultaneously. 
+	    Provider names are now kept in a list, the first successful
+	    library open will be the provider.
+
+	* Added initial infrastructure for DAPL_DCNTR, a feature that
+	  will aid in debug and tuning of a dapl implementation. Partial
+	  implementation only at this point.
+
+	* Bug fixes
+	- Prevent debug messages from crashing dapl in EVD completions by
+	  verifying the error code to ensure data is valid.
+	- Verify CNO before using it to clean up in evd_free()
+	- CNO timeouts now return correct error codes, per the spec.
+	- cr_accept now complies with the spec concerning connection 
+	  requests that go away before the accept is invoked.
+	- Verify valid EVD before posting connection evens on active side
+	  of a connection. EP locking also corrected.
+	- Clean up of dapltest Makefile, no longer need to declare
+	  DAT_THREADSAFE
+	- Fixed check of EP states to see if we need to disconnect an
+	  IA is closed.
+	- ep_free() code reworked such that we can properly close a 
+	  connection pending EP.
+	- Changed disconnect processing to comply with the spec: user will
+	   see a BROKEN event, not DISCONNECTED.
+	- If we get a DTO error, issue a disconnect to let the CM and
+	  the user know the EP state changed to disconnect; checked IBA
+	  spec to make sure we disconnect on correct error codes.
+	- ep_disconnect now properly deals with abrupt disconnects on the
+	  active side of a connection.
+	- PSP now created in the correct state for psp_create_any(), making
+	  it usable.
+	- dapl_evd_resize() now returns correct status, instead of always
+	  DAT_NOT_IMPLEMENTED.
+	- dapl_evd_modify_cno() does better error checking before invoking
+	  the provider layer, avoiding bugs.
+	- Simple change to allow dapl_evd_modify_cno() to set the CNO to 
+	  NULL, per the spec.
+	- Added required locking around call to dapl_sp_remove_cr.
+
+	- Fixed problems related to dapl_ep_free: the new
+	  disconnect(abrupt) allows us to do a more immediate teardown of
+	  connections, removing the need for the MAGIC_EP_EXIT magic
+	  number/state, which has been removed. Mmuch cleanup of paths,
+	  and made more robust.
+	- Made changes to meet the spec, uDAPL 1.1 6.3.2.3: CNO is
+	  triggered if there are waiters when the last EVD is removed
+	  or when the IA is freed.
+	- Added code to deal with the provider synchronously telling us
+	   a connection is unreachable, and generate the appropriate
+	   event.
+	- Changed timer routine type from unsigned long to uintptr_t
+	  to better fit with machine architectures.
+	- ep.param data now initialized in ep_create, not ep_alloc.
+	- Or Gerlitz provided updates to Mellanox files for evd_resize,
+	  fw attributes, many others. Also implemented changes for correct
+	  sizes on REP side of a connection request.
+
+
+
+	NEW SINCE Beta 2.0
+
+	* dat_echo now DAT 1.1 compliant. Various small enhancements.
+
+	* Revamped atomic_inc/dec to be void, the return value was never
+	  used. This allows kdapl to use Linux kernel equivalents, and
+	  is a small performance advantage.
+
+	* kDAPL: dapl_evd_modify_upcall implemented and tested.
+
+	* kDAPL: physical memory registration implemented and tested.
+
+	* uDAPL now builds cleanly for non-debug versions.
+
+	* Default RDMA credits increased to 8.
+
+	* Default ACK_TIMEOUT now a reasonable value (2 sec vs old 2
+	  months).
+
+	* Cleaned up dat_error.h, now 1.1 compliant in comments.
+
+	* evd_resize initial implementation. Untested.
+
+	* Bug fixes
+	  - __KDAPL__ is defined in kdat_config.h, so apps don't need
+	    to define it.
+	  - Changed include file ordering in kdat.h to put kdat_config.h
+	    first.
+	  - resolved connection/tear-down race on the client side.
+	  - kDAPL timeouts now scaled properly; fixed 3 orders of
+	    magnitude difference.
+	  - kDAPL EVD callbacks now get invoked for all completions; old
+	    code would drop them in heavy utilization.
+	  - Fixed error path in kDAPL evd creation, so we no longer
+	    leak CNOs.
+	  - create_psp_any returns correct error code if it can't create
+	    a connection qualifier.
+	  - lock fix in ibapi disconnect code.
+	  - kDAPL INFINITE waits now work properly (non connection
+	    waits) 
+	  - kDAPL driver unload now works properly
+	  - dapl_lmr_[k]create now returns 1.1 error codes
+	  - ibapi routines now return DAT 1.1 error codes
+	  
+
+
+	NEW SINCE Beta 1.10
+
+	* kDAPL is now part of the DAPL distribution. See the release
+	  notes above.
+
+	  The kDAPL 1.1 spec is now contained in the doc/ subdirectory.
+
+	* Several files have been moved around as part of the kDAPL
+	  checkin. Some files that were previously in udapl/ are now
+	  in common/, some in common are now in udapl/. The goal was
+	  to make sure files are properly located and make sense for
+	  the build.
+
+	* Source code formatting changes for consistency.
+
+	* Bug fixes
+	  - dapl_evd_create() was comparing the wrong bit combinations,
+	    allowing bogus EVDs to be created.
+	  - Removed code that swallowed zero length I/O requests, which
+	    are allowed by the spec and are useful to applications.
+	  - Locking in dapli_get_sp_ep was asymmetric; fixed it so the
+	    routine will take and release the lock. Cosmetic change.
+	  - dapl_get_consuemr_context() will now verify the pointer
+	    argument 'context' is not NULL.
+
+
+	OBTAIN THE CODE
+
+	To obtain the tree for your local machine you can check it
+	out of the source repository using CVS tools. CVS is common
+	on Unix systems and available as freeware on Windows machines.
+	The command to anonymously obtain the source code from 
+	Source Forge (with no password) is:
+	
+	cvs -d:pserver:anonymous at cvs.dapl.sourceforge.net:/cvsroot/dapl login
+	cvs -z3 -d:pserver:anonymous at cvs.dapl.sourceforge.net:/cvsroot/dapl co .
+
+	When prompted for a password, simply  press the Enter key.
+
+	Source Forge also contains explicit directions on how to become
+	a developer, as well as how to use different CVS commands. You may
+	also browse the source code using the URL:
 
-       dapltest -T P -d -s host1-ib0 -D OpenIB-cma -i 100 SR 4096 2
+        http://svn.sourceforge.net/viewvc/dapl/trunk/
 
-                               Runs a performance test, with the client
-                               sending one buffer with two 4KB segments,
-                               one hundred times.
+	SYSTEM REQUIREMENTS
 
-       dapltest -T Q -s host1-ib0 -D OpenIB-cma
+	This project has been implemented on Red Hat Linux 7.3, SuSE
+	SLES 8, 9, and 10, Windows 2000, RHEL 3.0, 4.0 and 5.0 and a few 
+        other Linux distrubutions. The structure of the code is designed 
+        to allow other operating systems to easily be adapted.
 
-                               Asks the server to clean up and exit.
+	The DAPL team has used Mellanox Tavor based InfiniBand HCAs for
+	development, and continues with this platform. Our HCAs use the
+	IB verbs API submitted by IBM. Mellanox has contributed an
+	adapter layer using their VAPI verbs API. Either platform is
+	available to any group considering DAPL work. The structure of
+	the uDAPL source allows other provider API sets to be easily
+	integrated.
 
-       dapltest -T L -D OpenIB-cma -d -w 16 -m 1000
+	The development team uses any one of three topologies: two HCAs
+	in a single machine; a single HCA in each of two machines; and
+	most commonly, a switch. Machines connected to a switch may have
+	more than one HCA.
+
+	The DAPL Plugfest revealed that switches and HCAs available from
+	most vendors will interoperate with little trouble, given the
+	most recent releases of software. The dapl reference team makes
+	no recommendation on HCA or switch vendors.
+
+	Explicit machine configurations are available upon request.	
+
+	IN THE TREE
+
+	The DAPL tree contains source code for the uDAPL and kDAPL
+	implementations, and also includes tests and documentation.
+
+	Included documentation has the base level API of the
+	providers: OpenFabrics, IBM Access, and Mellanox Verbs API. Also
+	included are a growing number of DAPL design documents which
+	lead the reader through specific DAPL subsystems. More
+	design documents are in progress and will appear in the tree in
+	the near future.
+
+	A small number of test applications and a unit test framework
+	are also included. dapltest is the primary testing application
+	used by the DAPL team, it is capable of simulating a variety of
+	loads and exercises a large number of interfaces. Full
+	documentation is included for each of the tests.
+
+	Recently, the dapl conformance test has been added to the source
+	repository. The test provides coverage of the most common
+	interfaces, doing both positive and negative testing. Vendors
+	providing DAPL implementation are strongly encouraged to run
+	this set of tests.
 
-                               Runs all of the limit tests, setting up
-                               16 complete sets of DAPL objects, and
-                               creating at most a thousand instances
-                               when trying to exhaust resources.
+	MAKEFILE NOTES
 
-       dapltest -T T -V -d -t 2 -w 4 -i 55555 -s linux3 -D OpenIB-cma client RW 4096 1 server RW  2048  4
-       client SR 1024 4 server SR 4096 2 client SR 1024 3 -f server SR 2048 1 -f
+	There are a number #ifdef's in the code that were necessary
+	during early development. They are disappearing as we
+	have time to take advantage of features and work available from
+	newer releases of provider software. These #ifdefs are not 
+        documented as the intent is to remove them as soon as possible.
 
-                               Runs a more complicated transaction test,
-                               with two thread using four EPs each,
-                               sending a more complicated buffer pattern
-                               for a larger number of iterations,
-                               validating the data received.
+	CONTRIBUTIONS
+
+	As is common to Source Forge projects, there are a small number
+	of developers directly associated with the source tree and having
+	privileges to change the tree. Requested updates, changes, bug
+	fixes, enhancements, or contributions should be sent to 
+        James Lentini at jlentinit at netapp.com for review. We welcome your
+	contributions and expect the quality of the project will
+	improve thanks to your help.
+
+	The core DAPL team is:
+
+	  James Lentini
+          Arlin Davis
+	  Steve Sears
 
-       BUGS   (and  To Do List)
+	  ... with contributions from a number of excellent engineers in
+	  various companies contributing to the open source effort.
 
-           Use of CNOs (-Q) is not yet supported.
 
-           Further limit tests could be added.
+	ONGOING WORK
 
+	Not all of the DAPL spec is implemented at this time.
+	Functionality such as shared memory will probably not be
+	implemented by the reference implementation (there is a write up
+	on this in the doc/ area), and there are yet various cases where
+	work remains to be done.  And of course, not all of the
+	implemented functionality has been tested yet.  The DAPL team
+	continues to develop and test the tree with the intent of
+	completing the specification and delivering a robust and useful
+	implementation.
 
 
+The DAPL Team
 
diff --git a/README.mcm b/README.mcm
old mode 100644
new mode 100755
index f87d9af..bf592a8
--- a/README.mcm
+++ b/README.mcm
@@ -1,99 +1,109 @@
-	uDAPL MCM Provider and MPXYD Daemon (CCL-proxy) 
-	           dapl-2.1.x
-		    May 2015
-
-MCM is a new uDAPL provider that is an extension to standard DAT 2.0 libraries. The purpose of this service
-is to proxy RDMA writes from the MIC to the HOST to improve large IO performance. The provider will support
-MIC to MIC, HOST to HOST, and MIC to HOST environments. The mcm client will NOT use MPXYD when running on the host.
-It requires a new MPXYD daemon service when clients are running on a MIC KNC adapter. This package installs all the
-host side libraries and daemon service. The MIC libraries must be built and moved over to MIC adapter. This verion
-is currently included with MPSS and all libraries and services will be installed by default.
-
-Current release package: dapl-2.1.5.tar.gz 
-
-* Sample host build from source package (ofed must installed)
-
-  ./autogen.sh
-  ./configure \
-	--enable-mcm \
-	--prefix=/usr \
-	--libdir=/usr/lib64 \
-	--sysconfdir=/etc
-  make
-  sudo make install
-
-* Sample /home/user1 MIC build from source package for MPSS 3.x (MPSS must be installed)
-
-  source /opt/mpss/3.x/environment-setup-k1om-mpss-linux 
-   ./autogen.sh
-  ./configure \
-	--enable-mcm 
-	--host=x86_64-k1om-linux \
-	--prefix=/home/user1/dapl-mic-install \
- 	CC=/usr/linux-k1om-4.7/bin/x86_64-k1om-linux-gcc \
- 	CFLAGS="-I/opt/mpss/3.x/sysroots/k1om-mpss-linux/usr/include 
- 	LDFLAGS="-L/opt/mpss/3.x/sysroots/k1om-mpss-linux/usr/lib64"
-  make
-  sudo make install
-  
-* Sample /home/user1 MIC build from source package for MPSS 2.x (MPSS must be installed)
-    
-  export PATH=$PATH:/usr/linux-k1om-4.7/bin
-  ./autogen.sh
-  ./configure \
-	--enable-mcm \
-	--prefix=/home/user1/dapl-mic-install \
-	--libdir=/opt/intel/mic/ofed/card/usr/lib64 \
-	--sysconfdir=/opt/intel/mic/ofed/card/etc \
-	--host=x86_64-k1om-linux \
-	CFLAGS="-I/opt/intel/mic/ofed/card/usr/include" \
-	LDFLAGS="-L/opt/intel/mic/ofed/card/usr/lib64"
-  make
-  sudo make install
- 
-* Cluster deployment
+		uDAPL MCM Provider and MPXYD Daemon (CCL-proxy) 
+		   
+	MCM is a new uDAPL provider that is an extension to standard DAT 2.0 libraries. The purpose of this service
+	is to proxy RDMA writes from the MIC to the HOST to improve large IO performance. The provider will support
+	MIC to MIC, HOST to HOST, and MIC to HOST environments. The mcm client will NOT use MPXYD when running on the host.
+	It requires a new MPXYD daemon service when clients are running on a MIC KNC adapter. This package installs all the
+	host side libraries and daemon service. The MIC libraries must be built and moved over to MIC adapter. This verion
+	is currently included with MPSS and all libraries and services will be installed by default.
+	
+	=================
+	1.0 Release Notes
+	=================
+	
+	Current release package: dapl-2.1.6.tar.gz 
+	
+	dapl-2.1.6 changes include MIC support for full offload mode
+	
+	- Add support for Truescale qib devices with no CCL Direct verbs support on MIC.
+	- Enhancement for inside the box transfers without IB adapter via ibscif.
+	- Add DAPL_NETWORK_NODES, DAPL_NETWORK_PPN environment variables. 
+
+	============================================================
+	2.0 MCM Provider, MPXYD Daemon (CCL-proxy) Build and Install
+	============================================================
+
+	* Sample host build from source package (ofed must be installed)
+
+  	./configure --enable-mcm --prefix=/usr --libdir=/usr/lib64 --sysconfdir=/etc
+  	make
+  	make install
+
+	* Sample host rpmbuild/update from release tarball, /root:
 
-  (1) Build once on the head or on one of the nodes as described in the above steps.
+	rpmbuild -ta dapl-2.1.6.tar.gz
+	rpm -U /root/rpmbuild/RPMS/x86_64/dapl*
 
-  (2) Replicate these files on all the nodes:
+	* Sample MIC build from source package for MPSS 3.x (MPSS must be installed)
+	* Assume /opt is nfs mounted across cluster
 
-	/etc/dat.conf
-	/etc/mpxyd.conf
-	/usr/sbin/mpxyd
-	/usr/lib64/libdaplomcm.so.2
-	/opt/intel/mic/ofed/card/etc/dat.conf
-	/opt/intel/mic/ofed/card/usr/lib64/libdaplomcm.so.2
-	/opt/intel/mic/ofed/card/ofed.filelist
+  	source /opt/mpss/3.x/environment-setup-k1om-mpss-linux 
+	./configure --enable-mcm --prefix /opt/dapl/mic --host=x86_64-k1om-linux
+	make
+	make install
 
-  (3) Unload and then restart MPSS on all the nodes.
+	copy /opt/dapl/mic/* files out to all MIC cards
+  
+ 
+	* Cluster deployment
+
+  	(1) Build once on the head or on one of the nodes (with MPSS) as described in the above steps.
 
-* Start the proxy daemon on all the nodes (host only)
+  	(2) HOST: Install dapl libraries and mpxyd service, "rpm -U" all dapl RPM files on host nodes:
+
+  	(3) MIC: Setup dapl overlay for new package (/opt/intel/dapl):
+	
+		Create /etc/mpss/conf.d/dapl.conf with following entry:
+
+			Overlay Filelist /opt/dapl /opt/dapl/dapl.filelist on
+		
+		Create /opt/dapl/dapl.filelist with following entries: 
+
+			file /etc/dat.conf mic/etc/dat.conf 755 0 0
+			file /usr/bin/dtest mic/bin/dtest 755 0 0
+			file /usr/bin/dtestx mic/bin/dtestx 755 0 0
+			file /usr/bin/dtestcm mic/bin/dtestcm 755 0 0
+			file /usr/bin/dapltest mic/bin/dapltest 755 0 0
+			file /usr/lib64/libdat.so.2.0.0 mic/lib/libdat.so.2.0.0 755 0 0
+			file /usr/lib64/libdaplofa.so.2.0.0 mic/lib/libdaplofa.so.2.0.0 755 0 0
+			file /usr/lib64/libdaplomcm.so.2.0.0 mic/lib/libdaplomcm.so.2.0.0 755 0 0
+			file /usr/lib64/libdaploscm.so.2.0.0 mic/lib/libdaploscm.so.2.0.0 755 0 0
+			file /usr/lib64/libdaploucm.so.2.0.0 mic/lib/libdaploucm.so.2.0.0 755 0 0
+
+			slink /usr/lib64/libdat.so libdat.so.2.0.0 777 0 0
+			slink /usr/lib64/libdat.so.2 libdat.so.2.0.0 777 0 0
+			slink /usr/lib64/libdaplofa.so libdaplofa.so.2.0.0 777 0 0
+			slink /usr/lib64/libdaplofa.so.2 libdaplofa.so.2.0.0 777 0 0
+			slink /usr/lib64/libdaplomcm.so libdaplomcm.so.2.0.0 777 0 0
+			slink /usr/lib64/libdaplomcm.so.2 libdaplomcm.so.2.0.0 777 0 0
+			slink /usr/lib64/libdaploscm.so libdaploscm.so.2.0.0 777 0 0
+			slink /usr/lib64/libdaploscm.so.2 libdaploscm.so.2.0.0 777 0 0
+			slink /usr/lib64/libdaploucm.so libdaploucm.so.2.0.0 777 0 0
+			slink /usr/lib64/libdaploucm.so.2 libdaploucm.so.2.0.0 777 0 0
+	
+		Reboot or restart MPSS and ofed-mic services
 
-	sudo /usr/sbin/mpxyd
+		Check for dapl overlay
+			micctrl --config  
 
-* Use the MCM provider with Intel MPI 4.1.3 or greater for best out of box experiences.
 
-  (1) Recommended settings:
+	* Use the MCM provider with Intel MPI 5.1 or greater for best out of box experiences.
 
-	export I_MPI_MIC=1
-	export I_MPI_DEBUG=2
-	export I_MPI_FALLBACK=0
-	export I_MPI_MIC_DAPL_DIRECT_COPY_THRESHOLD=8192,262144
+  	(1) Recommended settings:
 
-      With these settings on MIC, messages less than 8192 bytes will be sent via pre-registered buffers; messages 
-      between 8192 and 262144 bytes will be sent via the Rendezvous protocol throught the first provider; and 
-      larger messages will be sent via the Rendezvous protocol through the second provider. Fine tune these
-      two sizes for the best performance.
+		export I_MPI_MIC=1
+		export I_MPI_DEBUG=2
+		export I_MPI_FALLBACK=0
   
-* Setup for non-root CCL Proxy testing, MPXYD running as process with different service port from your /home directory:
+	* Setup for non-root CCL Proxy - MPXYD running as process with different service port from your /home directory:
 
-   Using build instructions above, change prefix as follow and "make install":
+   	Using build instructions above, change prefix as follow and "make install":
 
-   Build MIC:
-	--prefix=/home/username/ccl-proxy-mic
+   	Build MIC:
+		--prefix=/home/username/ccl-proxy-mic
 
-   Build host:
-	--prefix=/home/username/ccl-proxy-host
+   	Build host:
+		--prefix=/home/username/ccl-proxy-host
 	
 	edit /home/username/ccl-proxy-host/etc/mpxyd.conf and change the following entries:
 	
@@ -109,126 +119,17 @@ Current release package: dapl-2.1.5.tar.gz
 	
 	export DAPL_MCM_PORT_ID=1068
    
-* Notes
-
-  (1) Modify "/etc/mpxyd.conf" to change the settings for the proxy. Especially, try different values
-      of "buffer_segment_size" for performance tuning. Use a smaller value for "buffer_pool_mb"   
-      to reduce the memory foorprint of mpxyd. Use a larger value for "scif_listen_qlen" to run 
-      more MPI ranks per card. Also modify mcm_affinity_base to the desired CPU_id to insure
-      socket to adapter affinity. Best performance when HCA, MIC, and CPU are on same socket.
-      Default settings are on CPU socket 0.
-
-  (2) By default, only writes originated from MIC is proxied. However, it is also possible to proxy 
-      host-originated writes (e.g. for debugging purpose). To do this, set the environment variable
-      "DAPL_MCM_ALWAYS_PROXY=1". This variable applies to the provider, not the proxy.
-
-ChangeLog:
-
-Release 2.1.5 (OFED 3.18 RC3)
-
- dat.conf: update comments regarding versions
- dtest: add logging of provider private data size with -v
- scm: remove use of msg.resv field for process id logging
- cma: report correct CM req private data size on query
- mpxyd: memset ib_wr structure before post_send on WC and WR requests
- mcm: add HST side provider support for device without inline data capability
- ucm: CM changes for UD extended port space and indexer
- ucm: add device support for new port space hash table
- ucm: allocate/free AH hash table for UD endpoint types
- ucm: check for AH caching when destroying via UD extension
- ucm: optimizations for large scale UD communication management
- mpxyd: use wr opcode instead of wc opcode to support logging on error cases
- mcm: HST->MXS mode, using RDMA_WRITE_WITH_IMM, fails with dtest -w
- dapl: aarch64 support for linux
- dapltest: add scripts to dist, set default device to IPoIB
- mpxyd: add wc_flags to proxy work completions
-
-Release 2.1.4 (OFED 3.18 RC1)
-
-mpxyd: fix typo in configuration file
-cma: RR attributes moved to common ib_cm struct
-mpxyd: tx thread incorrectly sleeps with negative pi_rw_cnt value
-dat.conf: add entries for True Scale qib device
-mpxyd: add support for devices without inline data support
-ucm: long disconnect times with many-to-one applications
-openib: add inline data support check during device open
-cleanup ib/cm attribute management across openib providers
-dapltest: fix -Werror=format-security issue with printf
-Release 2.1.3 (targeting OFED 3.18)
-dapl: mpxyd service changes to support multi-thread single-core option
-dapl: add rdma_write_imm and write only option to dtest
-ucm: add time wait override capability for CM services
-common: dapl_ep_free must serialize CM object destroy
-dtestx: allow scale up to 1000 EP's
-ucm: RTU not retransmitted in TIMEWAIT state
-mpxyd: increase max open files for service
-mpxyd: DTO completion ERR: status 12, op RDMA_WRITE running MPI alltoall test
-mcm: HST->MXS mode incorrectly signals multiple fragments per WR
-mcm: add segmentation to HST->MXS mode for improved performance
-mpxyd: set global seg_sz to 128KB for proxy data service
-openib: add port_num to provider named attributes
-mcm: provide CPU family/model attribute on both host and mic sides
-dtestx: update IB extension example test with new v2.0.9 features
-dtest: add dtestsrq for SRQ example and provider testing
-common: add srq support for openib verbs providers
-openib: add IB UD cm_free/ah_free extension support in UCM provider
-openib: add new TIMEWAIT state for CM
-extension: add IB UD extensions to reduce provider CM and AH memory footprint
-mpxyd/mcm: add provider specific attribute DAT_IB_PROXY_VERSION
-mpxyd: log warning if running in COMPAT mode
-add provider and proxy support for GUID across platform
-common: return appropriate handles with affiliated EP and EVD async events
-
-Release 2.1.2 (OFED 3.12-1)
-mpxyd: add global routing support for proxy connections
-mcm: only call mix_get_attr if running on MIC
-openib: modify check for link_layer to handle unspecified
-dapl: add support for the s390x platform
-dtest server exchange connection info with client
-mpxyd: 2 MICs in same numa_node will overlap CPU affinity, don't reset base
-mcm: implement proxy mix_prov_attr function, add fields CPU model and family
-mpxyd: tx thread may not be signaled on small segment writes
-
-Release 2.1.1 (OFED 3.12-1 RC1)
-common: add provider name to log messages
-mpxyd: log warning message if numa_node invalid include debuginfo with build
-build: include debuginfo with build
-mpxyd: tx thread doesn't sleep during no pending IO state
-mpxyd: change MIC cpu_mask to per numa node instead of adapter
-mpxyd: set to MXS mode if device numa_node is invalid (-1)
-mpxyd: MXS based alltoall benchmark hangs or returns post_send timeout
-mpxyd: add IO profile capabilities to help debug alltoall stall cases
-mpxyd: retry stalled inline post_send, init m_idx only when signaled
-
-Release 2.1.0 (OFED 3.12-1, MIC support added)
-build: add missing NEWS file
-update autogen.sh
-add MCM provider and MPXYD service to build
-mpxyd: service startup script and configuration file
-add readme for MCM provider and MPXYD service
-update Copyright dates
-add new MIC RDMA proxy service daemon (MPXYD)
-add new dapl MIC provider (MCM) to support MIC RDMA proxy services
-MCM: new MIC provider and proxy service definitions
-cleanup build warnings
-common: add CQ,QP,MR abstractions for new MIC provider and data proxy service
-openib: cleanup, use inet_ntop for GIDs, remove some logs, destroy pipes on release
-common: new dapls_evd_cqe_to_event call, cqe to event
-common: init ring_buffer, assign hd/tl pos in range
-allow log level changes during device open
-ucm: fix cm rbuf setup, include grh pad on initialization
-ucm: remove duplicate async_event code, use common async event call
-new lightweight open_query/close_query IB extension for fast attribute query
-dtestcm: add more detailed debug during disconnect phase
-cma: long delays when opening cma provider with no IPoIB configured
-common: new debug levels for low system memory, IA stats, and package info
-build: remove library check for mverbs with --enable-fca
-IB extension: segfault in create collective group with non-vector type IA handle"
-build: change configure help to correctly state collective default=none
-
-
-
-
-      
-  
+	* Notes
+
+  	(1) Modify "/etc/mpxyd.conf" to change the settings for the proxy. Especially, try different values
+      	of "buffer_segment_size" for performance tuning. Use a smaller value for "buffer_pool_mb"   
+      	to reduce the memory foorprint of mpxyd. Use a larger value for "scif_listen_qlen" to run 
+      	more MPI ranks per card. Also modify mcm_affinity_base to the desired CPU_id to insure
+      	socket to adapter affinity. Best performance when HCA, MIC, and CPU are on same socket.
+      	Default settings are on CPU socket 0.
+
+  	(2) By default, only writes originated from MIC is proxied. However, it is also possible to proxy 
+      	host-originated writes (e.g. for debugging purpose). To do this, set the environment variable
+      	"DAPL_MCM_ALWAYS_PROXY=1". This variable applies to the provider, not the proxy.
+
 
diff --git a/config.h.in b/config.h.in
old mode 100644
new mode 100755
diff --git a/config/ltmain.sh b/config/ltmain.sh
old mode 100644
new mode 100755
diff --git a/configure b/configure
index da554e6..8648901 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.67 for dapl 2.1.5.
+# Generated by GNU Autoconf 2.67 for dapl 2.1.6.
 #
 # Report bugs to <linux-rdma at vger.kernel.org>.
 #
@@ -562,8 +562,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='dapl'
 PACKAGE_TARNAME='dapl'
-PACKAGE_VERSION='2.1.5'
-PACKAGE_STRING='dapl 2.1.5'
+PACKAGE_VERSION='2.1.6'
+PACKAGE_STRING='dapl 2.1.6'
 PACKAGE_BUGREPORT='linux-rdma at vger.kernel.org'
 PACKAGE_URL=''
 
@@ -1318,7 +1318,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures dapl 2.1.5 to adapt to many kinds of systems.
+\`configure' configures dapl 2.1.6 to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1388,7 +1388,7 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of dapl 2.1.5:";;
+     short | recursive ) echo "Configuration of dapl 2.1.6:";;
    esac
   cat <<\_ACEOF
 
@@ -1509,7 +1509,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-dapl configure 2.1.5
+dapl configure 2.1.6
 generated by GNU Autoconf 2.67
 
 Copyright (C) 2010 Free Software Foundation, Inc.
@@ -1935,7 +1935,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by dapl $as_me 2.1.5, which was
+It was created by dapl $as_me 2.1.6, which was
 generated by GNU Autoconf 2.67.  Invocation command line was
 
   $ $0 $@
@@ -2803,7 +2803,7 @@ fi
 # Define the identity of the package.
 
  PACKAGE=dapl
- VERSION=2.1.5
+ VERSION=2.1.6
 
 
 cat >>confdefs.h <<_ACEOF
@@ -13281,7 +13281,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by dapl $as_me 2.1.5, which was
+This file was extended by dapl $as_me 2.1.6, which was
 generated by GNU Autoconf 2.67.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -13347,7 +13347,7 @@ _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
-dapl config.status 2.1.5
+dapl config.status 2.1.6
 configured by $0, generated by GNU Autoconf 2.67,
   with options \\"\$ac_cs_config\\"
 
diff --git a/configure.in b/configure.in
old mode 100644
new mode 100755
index 5e9f9e6..6a792d6
--- a/configure.in
+++ b/configure.in
@@ -1,12 +1,12 @@
 dnl Process this file with autoconf to produce a configure script.
 
 AC_PREREQ(2.57)
-AC_INIT(dapl, 2.1.5, linux-rdma at vger.kernel.org)
+AC_INIT(dapl, 2.1.6, linux-rdma at vger.kernel.org)
 AC_CONFIG_SRCDIR([dat/udat/udat.c])
 AC_CONFIG_AUX_DIR(config)
 AC_CONFIG_MACRO_DIR([m4])
 AM_CONFIG_HEADER(config.h)
-AM_INIT_AUTOMAKE(dapl, 2.1.5)
+AM_INIT_AUTOMAKE(dapl, 2.1.6)
 m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
 
 AM_PROG_LIBTOOL
diff --git a/dapl.spec b/dapl.spec
index f593567..17e00d7 100644
--- a/dapl.spec
+++ b/dapl.spec
@@ -37,7 +37,7 @@
 %{!?_CONF: %define _CONF ""}
 
 Name: dapl
-Version: 2.1.5
+Version: 2.1.6
 Release: 1%{?dist}
 Summary: A Library for userspace access to RDMA devices using OS Agnostic DAT APIs, proxy daemon for offloading RDMA 
 
@@ -153,6 +153,9 @@ fi
 mv /tmp/%{version}-dat.conf %{_sysconfdir}/dat.conf
 
 %changelog
+* Wed Aug 12 2015 Arlin Davis <ardavis at ichips.intel.com> - 2.1.6
+- DAT/DAPL Version 2.1.6 Release 1, OFED 3.18-1
+
 * Mon May 26 2015 Arlin Davis <ardavis at ichips.intel.com> - 2.1.5
 - DAT/DAPL Version 2.1.5 Release 1, OFED 3.18
 
diff --git a/dapl.spec.in b/dapl.spec.in
old mode 100644
new mode 100755
index 92ad33f..f6ed8fd
--- a/dapl.spec.in
+++ b/dapl.spec.in
@@ -153,6 +153,9 @@ fi
 mv /tmp/%{version}-dat.conf %{_sysconfdir}/dat.conf
 
 %changelog
+* Wed Aug 12 2015 Arlin Davis <ardavis at ichips.intel.com> - 2.1.6
+- DAT/DAPL Version 2.1.6 Release 1, OFED 3.18-1
+
 * Mon May 26 2015 Arlin Davis <ardavis at ichips.intel.com> - 2.1.5
 - DAT/DAPL Version 2.1.5 Release 1, OFED 3.18
 
diff --git a/dapl/common/dapl_cookie.c b/dapl/common/dapl_cookie.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_cookie.h b/dapl/common/dapl_cookie.h
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_cr_accept.c b/dapl/common/dapl_cr_accept.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_cr_callback.c b/dapl/common/dapl_cr_callback.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_cr_handoff.c b/dapl/common/dapl_cr_handoff.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_cr_query.c b/dapl/common/dapl_cr_query.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_cr_util.c b/dapl/common/dapl_cr_util.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_cr_util.h b/dapl/common/dapl_cr_util.h
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_debug.c b/dapl/common/dapl_debug.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_ep_create.c b/dapl/common/dapl_ep_create.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_ep_create_with_srq.c b/dapl/common/dapl_ep_create_with_srq.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_ep_disconnect.c b/dapl/common/dapl_ep_disconnect.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_ep_dup_connect.c b/dapl/common/dapl_ep_dup_connect.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_ep_free.c b/dapl/common/dapl_ep_free.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_ep_get_status.c b/dapl/common/dapl_ep_get_status.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_ep_modify.c b/dapl/common/dapl_ep_modify.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_ep_post_rdma_read.c b/dapl/common/dapl_ep_post_rdma_read.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_ep_post_rdma_write.c b/dapl/common/dapl_ep_post_rdma_write.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_ep_post_recv.c b/dapl/common/dapl_ep_post_recv.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_ep_post_send.c b/dapl/common/dapl_ep_post_send.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_ep_query.c b/dapl/common/dapl_ep_query.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_ep_recv_query.c b/dapl/common/dapl_ep_recv_query.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_ep_reset.c b/dapl/common/dapl_ep_reset.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_ep_set_watermark.c b/dapl/common/dapl_ep_set_watermark.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_ep_util.c b/dapl/common/dapl_ep_util.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_ep_util.h b/dapl/common/dapl_ep_util.h
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_evd_connection_callb.c b/dapl/common/dapl_evd_connection_callb.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_evd_cq_async_error_callb.c b/dapl/common/dapl_evd_cq_async_error_callb.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_evd_dequeue.c b/dapl/common/dapl_evd_dequeue.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_evd_post_se.c b/dapl/common/dapl_evd_post_se.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_evd_qp_async_error_callb.c b/dapl/common/dapl_evd_qp_async_error_callb.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_evd_resize.c b/dapl/common/dapl_evd_resize.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_evd_un_async_error_callb.c b/dapl/common/dapl_evd_un_async_error_callb.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_evd_util.c b/dapl/common/dapl_evd_util.c
old mode 100644
new mode 100755
index 85d8f64..53ed05b
--- a/dapl/common/dapl_evd_util.c
+++ b/dapl/common/dapl_evd_util.c
@@ -1111,6 +1111,7 @@ dapli_evd_cqe_to_event(IN DAPL_EVD * evd_ptr,
 #ifdef DAT_EXTENSIONS
 			/* Extended via request post or message receive */
 			if ((cookie->val.dto.type == DAPL_DTO_TYPE_EXTENSION) ||
+			    (cookie->val.dto.type == DAPL_DTO_TYPE_EXTENSION_IMM) ||
 			    (cookie->val.dto.type == DAPL_DTO_TYPE_RECV &&
 			     DAPL_GET_CQE_OPTYPE(cqe_ptr) != OP_RECEIVE)) {
 				dapls_cqe_to_event_extension(ep_ptr, cookie,
diff --git a/dapl/common/dapl_evd_util.h b/dapl/common/dapl_evd_util.h
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_get_consumer_context.c b/dapl/common/dapl_get_consumer_context.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_get_handle_type.c b/dapl/common/dapl_get_handle_type.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_hash.c b/dapl/common/dapl_hash.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_hash.h b/dapl/common/dapl_hash.h
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_hca_util.c b/dapl/common/dapl_hca_util.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_hca_util.h b/dapl/common/dapl_hca_util.h
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_ia_close.c b/dapl/common/dapl_ia_close.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_ia_open.c b/dapl/common/dapl_ia_open.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_ia_util.h b/dapl/common/dapl_ia_util.h
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_init.h b/dapl/common/dapl_init.h
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_llist.c b/dapl/common/dapl_llist.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_lmr_free.c b/dapl/common/dapl_lmr_free.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_lmr_query.c b/dapl/common/dapl_lmr_query.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_lmr_sync_rdma_read.c b/dapl/common/dapl_lmr_sync_rdma_read.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_lmr_sync_rdma_write.c b/dapl/common/dapl_lmr_sync_rdma_write.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_lmr_util.c b/dapl/common/dapl_lmr_util.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_lmr_util.h b/dapl/common/dapl_lmr_util.h
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_mr_util.c b/dapl/common/dapl_mr_util.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_mr_util.h b/dapl/common/dapl_mr_util.h
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_name_service.c b/dapl/common/dapl_name_service.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_name_service.h b/dapl/common/dapl_name_service.h
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_provider.h b/dapl/common/dapl_provider.h
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_psp_create.c b/dapl/common/dapl_psp_create.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_psp_create_any.c b/dapl/common/dapl_psp_create_any.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_psp_free.c b/dapl/common/dapl_psp_free.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_psp_query.c b/dapl/common/dapl_psp_query.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_pz_create.c b/dapl/common/dapl_pz_create.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_pz_free.c b/dapl/common/dapl_pz_free.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_pz_query.c b/dapl/common/dapl_pz_query.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_pz_util.c b/dapl/common/dapl_pz_util.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_pz_util.h b/dapl/common/dapl_pz_util.h
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_ring_buffer_util.c b/dapl/common/dapl_ring_buffer_util.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_ring_buffer_util.h b/dapl/common/dapl_ring_buffer_util.h
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_rmr_free.c b/dapl/common/dapl_rmr_free.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_rmr_query.c b/dapl/common/dapl_rmr_query.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_rmr_util.c b/dapl/common/dapl_rmr_util.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_rmr_util.h b/dapl/common/dapl_rmr_util.h
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_rsp_create.c b/dapl/common/dapl_rsp_create.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_rsp_free.c b/dapl/common/dapl_rsp_free.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_rsp_query.c b/dapl/common/dapl_rsp_query.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_set_consumer_context.c b/dapl/common/dapl_set_consumer_context.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_sp_util.c b/dapl/common/dapl_sp_util.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_sp_util.h b/dapl/common/dapl_sp_util.h
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_srq_create.c b/dapl/common/dapl_srq_create.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_srq_free.c b/dapl/common/dapl_srq_free.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_srq_post_recv.c b/dapl/common/dapl_srq_post_recv.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_srq_query.c b/dapl/common/dapl_srq_query.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_srq_resize.c b/dapl/common/dapl_srq_resize.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_srq_set_lw.c b/dapl/common/dapl_srq_set_lw.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_srq_util.c b/dapl/common/dapl_srq_util.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_srq_util.h b/dapl/common/dapl_srq_util.h
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_timer_util.c b/dapl/common/dapl_timer_util.c
old mode 100644
new mode 100755
diff --git a/dapl/common/dapl_timer_util.h b/dapl/common/dapl_timer_util.h
old mode 100644
new mode 100755
diff --git a/dapl/include/dapl.h b/dapl/include/dapl.h
index 60de914..77df53e 100755
--- a/dapl/include/dapl.h
+++ b/dapl/include/dapl.h
@@ -586,6 +586,7 @@ typedef enum dapl_dto_type
     DAPL_DTO_TYPE_RDMA_READ,
 #ifdef DAT_EXTENSIONS
     DAPL_DTO_TYPE_EXTENSION,
+    DAPL_DTO_TYPE_EXTENSION_IMM,
 #endif
 } DAPL_DTO_TYPE;
 
diff --git a/dapl/include/dapl_debug.h b/dapl/include/dapl_debug.h
old mode 100644
new mode 100755
diff --git a/dapl/include/dapl_ipoib_names.h b/dapl/include/dapl_ipoib_names.h
old mode 100644
new mode 100755
diff --git a/dapl/include/dapl_vendor.h b/dapl/include/dapl_vendor.h
old mode 100644
new mode 100755
diff --git a/dapl/openib_cma/cm.c b/dapl/openib_cma/cm.c
old mode 100644
new mode 100755
diff --git a/dapl/openib_cma/device.c b/dapl/openib_cma/device.c
old mode 100644
new mode 100755
diff --git a/dapl/openib_cma/linux/openib_osd.h b/dapl/openib_cma/linux/openib_osd.h
old mode 100644
new mode 100755
diff --git a/dapl/openib_common/collectives/fca_provider.c b/dapl/openib_common/collectives/fca_provider.c
old mode 100644
new mode 100755
diff --git a/dapl/openib_common/collectives/fca_provider.h b/dapl/openib_common/collectives/fca_provider.h
old mode 100644
new mode 100755
diff --git a/dapl/openib_common/collectives/ib_collectives.h b/dapl/openib_common/collectives/ib_collectives.h
old mode 100644
new mode 100755
diff --git a/dapl/openib_common/cq.c b/dapl/openib_common/cq.c
old mode 100644
new mode 100755
index feffded..968cf44
--- a/dapl/openib_common/cq.c
+++ b/dapl/openib_common/cq.c
@@ -208,7 +208,7 @@ dapls_ib_cq_alloc(IN DAPL_IA * ia_ptr,
 			goto err;
 
 		/* cross-socket: shadow both RX and TX, no IB CQ on MIC */
-		if (MXS_EP(&ia_ptr->hca_ptr->ib_trans.addr))
+		if (MXF_EP(&ia_ptr->hca_ptr->ib_trans.addr))
 			return DAT_SUCCESS;
 	}
 	dapl_llist_init_entry(&evd_ptr->ib_cq_handle->entry);
@@ -559,6 +559,17 @@ DAT_RETURN dapls_ib_completion_poll(IN DAPL_HCA * hca_ptr,
 #endif
 	ret = ibv_poll_cq(evd_ptr->ib_cq_handle->cq, 1, wc_ptr);
 
+#ifdef DAT_EXTENSIONS
+	if (ret==1) {
+		DAPL_COOKIE *cookie = (DAPL_COOKIE *)(uintptr_t) DAPL_GET_CQE_WRID(wc_ptr);
+		dapl_os_assert((NULL != cookie));
+
+		/* some rNICs don't set wc_flags correctly for immed data */
+		if (cookie->val.dto.type == DAPL_DTO_TYPE_EXTENSION_IMM)
+			wc_ptr->wc_flags |= IBV_WC_WITH_IMM;
+	}
+#endif
+
 #ifdef _OPENIB_MCM_
 	/*
 	 * HST->MXS, we need to intercept direct TX WC in flight
@@ -573,8 +584,9 @@ DAT_RETURN dapls_ib_completion_poll(IN DAPL_HCA * hca_ptr,
 		ep_ptr = cookie->ep;
 		dapl_os_assert((NULL != ep_ptr));
 		if ((!ep_ptr->qp_handle->tp->scif_ep) &&
-		    (ep_ptr->qp_handle->ep_map == MIC_XSOCK_DEV) &&
-		    (DAPL_GET_CQE_OPTYPE(wc_ptr) == OP_RDMA_WRITE_IMM)) {
+		    (MXF_EP(ep_ptr->qp_handle)) &&
+		    ((DAPL_GET_CQE_OPTYPE(wc_ptr) == OP_RDMA_WRITE_IMM) ||
+		     (DAPL_GET_CQE_OPTYPE(wc_ptr) == OP_RDMA_WRITE))) {
 			dapl_log(DAPL_DBG_TYPE_EP,
 				 " cq_dto_event: MCM RW_pi: evd %p ep %p st %d op %s ln %d wr_id %p\n",
 				 evd_ptr, ep_ptr,
diff --git a/dapl/openib_common/dapl_ib_common.h b/dapl/openib_common/dapl_ib_common.h
old mode 100644
new mode 100755
diff --git a/dapl/openib_common/dapl_ib_dto.h b/dapl/openib_common/dapl_ib_dto.h
old mode 100644
new mode 100755
index de8fdc9..97341d2
--- a/dapl/openib_common/dapl_ib_dto.h
+++ b/dapl/openib_common/dapl_ib_dto.h
@@ -236,11 +236,10 @@ dapls_ib_post_send (
 		 " post_snd: %s ep %p op %x flgs %x sgl %p,%d ln %d wr_id %Lx\n",
 		 PROVIDER_NAME, ep_ptr, wr.opcode, wr.send_flags, wr.sg_list,
 		 wr.num_sge, total_len, wr.wr_id);
-
 #ifdef _OPENIB_MCM_
 	if (ep_ptr->qp_handle->tp->scif_ep)
 		ret = dapli_mix_post_send(ep_ptr->qp_handle, total_len, &wr, &bad_wr);
-	else if (ep_ptr->qp_handle->ep_map == MIC_XSOCK_DEV)
+	else if (MXF_EP(ep_ptr->qp_handle))
 		ret = mcm_send_pi(ep_ptr->qp_handle, total_len, &wr, &bad_wr);
 	else
 		ret = ibv_post_send(ep_ptr->qp_handle->qp2, &wr, &bad_wr);
@@ -448,7 +447,7 @@ dapls_ib_post_ext_send (
 #ifdef _OPENIB_MCM_
 	if (ep_ptr->qp_handle->tp->scif_ep)
 		ret = dapli_mix_post_send(ep_ptr->qp_handle, total_len, &wr, &bad_wr);
-	else if (ep_ptr->qp_handle->ep_map == MIC_XSOCK_DEV)
+	else if (MXF_EP(ep_ptr->qp_handle))
 		ret = mcm_send_pi(ep_ptr->qp_handle, total_len, &wr, &bad_wr);
 	else
 		ret = ibv_post_send(ep_ptr->qp_handle->qp2, &wr, &bad_wr);
diff --git a/dapl/openib_common/dapl_mic_common.h b/dapl/openib_common/dapl_mic_common.h
index e7dd385..5afc8ec 100755
--- a/dapl/openib_common/dapl_mic_common.h
+++ b/dapl/openib_common/dapl_mic_common.h
@@ -311,6 +311,11 @@ typedef enum dat_mix_ops
 	MIX_PROV_ATTR,
 	MIX_RECV,
 	MIX_CM_REJECT_USER,
+	MIX_PZ_CREATE,
+	MIX_PZ_FREE,
+	MIX_QUERY_DEVICE,
+	MIX_QUERY_PORT,
+	MIX_LAST_OP,	/* Keep last */
 
 } dat_mix_ops_t;
 
@@ -347,8 +352,12 @@ static inline char * mix_op_str(IN int op)
 		"PROV_ATTR",
 		"POST_RECV",
 		"CM_REJECT_USER",
+		"PZ_CREATE",
+		"PZ_FREE",
+		"QUERY_DEVICE",
+		"QUERY_PORT",
 	};
-	return ((op < 2 || op > 28) ? "Invalid OP?" : mix_ops[op]);
+	return ((op < 2 || op >= MIX_LAST_OP) ? "Invalid OP?" : mix_ops[op]);
 }
 
 typedef enum dat_mix_op_flags
@@ -450,9 +459,88 @@ typedef struct dat_mix_open
 	uint16_t		port;		/* ib physical port number */
 	dat_mix_dev_attr_t	dev_attr;
 	dat_mcm_addr_t		dev_addr;
-
 }  __attribute__((packed)) dat_mix_open_t;
 
+/***** MIX PZ, PZ handle info returned */
+typedef struct dat_mix_pz
+{
+	dat_mix_hdr_t		hdr;
+	uint64_t		ctx;   /* client handle from mic */
+	uint64_t		ib_pd; /* IB PZ handle from proxy */
+}  __attribute__((packed)) dat_mix_pz_t;
+
+/***** MIX QUERY PORT info returned */
+typedef struct dat_mix_port_attr
+{
+	dat_mix_hdr_t           hdr;
+	uint64_t                gid_tbl_len;
+	uint32_t                port_cap_flags;
+	uint32_t                max_msg_sz;
+	uint32_t                bad_pkey_cntr;
+	uint32_t                qkey_viol_cntr;
+	uint16_t                pkey_tbl_len;
+	uint16_t                lid;
+	uint16_t                sm_lid;
+	uint8_t                 lmc;
+	uint8_t                 max_vl_num;
+	uint8_t                 sm_sl;
+	uint8_t                 subnet_timeout;
+	uint8_t                 init_type_reply;
+	uint8_t                 active_width;
+	uint8_t                 active_speed;
+	uint8_t                 phys_state;
+	uint8_t                 link_layer;
+	uint8_t                 state;
+	uint8_t                 max_mtu;
+	uint8_t                 active_mtu;
+}  __attribute__((packed)) dat_mix_port_attr_t;
+
+/***** MIX get,get device attributes, 289 bytes */
+typedef struct dat_mix_device_attr
+{
+dat_mix_hdr_t           hdr;
+	char                    fw_ver[64];
+	uint64_t                node_guid;
+	uint64_t                sys_image_guid;
+	uint64_t                max_mr_size;
+	uint64_t                page_size_cap;
+	uint64_t                max_qp;
+	uint64_t                max_qp_wr;
+	uint64_t                device_cap_flags;
+	uint64_t                max_sge;
+	uint64_t                max_sge_rd;
+	uint64_t                max_cq;
+	uint64_t                max_cqe;
+	uint64_t                max_mr;
+	uint64_t                max_pd;
+	uint64_t                max_qp_rd_atom;
+	uint64_t                max_ee_rd_atom;
+	uint64_t                max_res_rd_atom;
+	uint64_t                max_qp_init_rd_atom;
+	uint64_t                max_ee_init_rd_atom;
+	uint64_t                max_ee;
+	uint64_t                max_rdd;
+	uint64_t                max_mw;
+	uint64_t                max_raw_ipv6_qp;
+	uint64_t                max_raw_ethy_qp;
+	uint64_t                max_mcast_grp;
+	uint64_t                max_mcast_qp_attach;
+	uint64_t                max_total_mcast_qp_attach;
+	uint64_t                max_ah;
+	uint64_t                max_fmr;
+	uint64_t                max_map_per_fmr;
+	uint64_t                max_srq;
+	uint64_t                max_srq_wr;
+	uint64_t                max_srq_sge;
+	uint32_t                vendor_id;
+	uint32_t                vendor_part_id;
+	uint32_t                hw_ver;
+	uint16_t                max_pkeys;
+	uint8_t                 local_ca_ack_delay;
+	uint8_t                 phys_port_cnt;
+	uint8_t                 atomic_cap;
+}  __attribute__((packed)) dat_mix_device_attr_t;
+
 /***** MIX get,set attributes, 128 bytes */
 typedef struct dat_mix_attr
 {
@@ -826,14 +914,14 @@ static inline void mcm_hton_wrc(mcm_wrc_info_t *dst, mcm_wrc_info_t *src)
 	if (src->wr_addr) {
 		dst->wr_addr = htonll(src->wr_addr);
 		dst->wr_rkey = htonl(src->wr_rkey);
-		dst->wr_len = htons(src->wr_len);
+		dst->wr_len = htonl(src->wr_len);
 		dst->wr_sz = htons(src->wr_sz);
 		dst->wr_end = htons(src->wr_end);
 	}
 	if (src->wc_addr) {
 		dst->wc_addr = htonll(src->wc_addr);
 		dst->wc_rkey = htonl(src->wc_rkey);
-		dst->wc_len = htons(src->wc_len);
+		dst->wc_len = htonl(src->wc_len);
 		dst->wc_sz = htons(src->wc_sz);
 		dst->wc_end = htons(src->wc_end);
 	}
@@ -844,13 +932,13 @@ static inline void mcm_ntoh_wrc(mcm_wrc_info_t *dst, mcm_wrc_info_t *src)
 {
 	dst->wr_addr = ntohll(src->wr_addr);
 	dst->wr_rkey = ntohl(src->wr_rkey);
-	dst->wr_len = ntohs(src->wr_len);
+	dst->wr_len = ntohl(src->wr_len);
 	dst->wr_sz = ntohs(src->wr_sz);
 	dst->wr_end = ntohs(src->wr_end);
 
 	dst->wc_addr = ntohll(src->wc_addr);
 	dst->wc_rkey = ntohl(src->wc_rkey);
-	dst->wc_len = ntohs(src->wc_len);
+	dst->wc_len = ntohl(src->wc_len);
 	dst->wc_sz = ntohs(src->wc_sz);
 	dst->wc_end = ntohs(src->wc_end);
 }
diff --git a/dapl/openib_common/ib_extensions.c b/dapl/openib_common/ib_extensions.c
old mode 100644
new mode 100755
index 25db541..427c708
--- a/dapl/openib_common/ib_extensions.c
+++ b/dapl/openib_common/ib_extensions.c
@@ -525,7 +525,9 @@ dapli_post_ext(IN DAT_EP_HANDLE ep_handle,
 	 * requests, which aren't allowed to race with each other.
 	 */
 	dat_status = dapls_dto_cookie_alloc(&ep_ptr->req_buffer,
-					    DAPL_DTO_TYPE_EXTENSION,
+					    op_type == OP_RDMA_WRITE_IMM ?
+						DAPL_DTO_TYPE_EXTENSION_IMM:
+						DAPL_DTO_TYPE_EXTENSION,
 					    user_cookie, &cookie);
 	if (dat_status != DAT_SUCCESS)
 		goto bail;
diff --git a/dapl/openib_common/mem.c b/dapl/openib_common/mem.c
old mode 100644
new mode 100755
index 0a82c00..7f5ea6a
--- a/dapl/openib_common/mem.c
+++ b/dapl/openib_common/mem.c
@@ -83,7 +83,13 @@ STATIC _INLINE_ int dapls_convert_privileges(IN DAT_MEM_PRIV_FLAGS privileges)
 DAT_RETURN dapls_ib_pd_alloc(IN DAPL_IA * ia_ptr, IN DAPL_PZ * pz)
 {
 	/* get a protection domain */
-	pz->pd_handle = ibv_alloc_pd(ia_ptr->hca_ptr->ib_hca_handle);
+#ifdef _OPENIB_MCM_
+	if(MFO_EP(&ia_ptr->hca_ptr->ib_trans.addr) && ia_ptr->hca_ptr->ib_trans.scif_tx_ep)
+		dapli_mix_pz_create(ia_ptr, pz);
+	else
+#endif
+		pz->pd_handle = ibv_alloc_pd(ia_ptr->hca_ptr->ib_hca_handle);
+
 	if (!pz->pd_handle)
 		return (dapl_convert_errno(ENOMEM, "alloc_pd"));
 
@@ -112,11 +118,23 @@ DAT_RETURN dapls_ib_pd_alloc(IN DAPL_IA * ia_ptr, IN DAPL_PZ * pz)
  */
 DAT_RETURN dapls_ib_pd_free(IN DAPL_PZ * pz)
 {
+	int ret = DAT_INVALID_STATE;
+
 	if (pz->pd_handle != IB_INVALID_HANDLE) {
-		ibv_dealloc_pd(pz->pd_handle);
+#ifdef _OPENIB_MCM_
+		DAPL_IA * ia_ptr = pz->header.owner_ia;
+		if (MFO_EP(&ia_ptr->hca_ptr->ib_trans.addr) && ia_ptr->hca_ptr->ib_trans.scif_tx_ep)
+			ret = dapli_mix_pz_free(pz);
+		else
+#endif
+			ret = ibv_dealloc_pd(pz->pd_handle);
+
 		pz->pd_handle = IB_INVALID_HANDLE;
 	}
-	return DAT_SUCCESS;
+	if (!ret)
+		return DAT_SUCCESS;
+
+	return DAT_INVALID_STATE;
 }
 
 /*
@@ -138,15 +156,15 @@ DAT_RETURN dapls_ib_pd_free(IN DAPL_PZ * pz)
  *	DAT_INSUFFICIENT_RESOURCES
  *
  */
-
 DAT_RETURN
 dapls_ib_mr_register(IN DAPL_IA * ia_ptr,
 		     IN DAPL_LMR * lmr,
 		     IN DAT_PVOID virt_addr,
 		     IN DAT_VLEN length,
 		     IN DAT_MEM_PRIV_FLAGS privileges, IN DAT_VA_TYPE va_type)
+#ifdef _OPENIB_MCM_
 {
-	struct ibv_device *ibv_dev = ia_ptr->hca_ptr->ib_hca_handle->device;
+	struct ibv_device *ibv_dev;
 	int ib_access = dapls_convert_privileges(privileges);
 
 	dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
@@ -159,40 +177,37 @@ dapls_ib_mr_register(IN DAPL_IA * ia_ptr,
 		return DAT_ERROR(DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
 	}
 
-	/* iWARP only support */
-	if ((va_type == DAT_VA_TYPE_ZB) &&
-	    (ibv_dev->transport_type != IBV_TRANSPORT_IWARP)) {
+	if (va_type == DAT_VA_TYPE_ZB) {
 		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
 			     " va_type == DAT_VA_TYPE_ZB: NOT SUPPORTED\n");
 		return DAT_ERROR(DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
 	}
 
-#ifdef _OPENIB_MCM_
 	ib_access |= IBV_ACCESS_REMOTE_READ; /* HST->MXS, peer PI RR */
-#endif
 
 	/* local read is default on IB */
-	lmr->mr_handle =
-	    ibv_reg_mr(((DAPL_PZ *) lmr->param.pz_handle)->pd_handle,
-		       virt_addr, length, ib_access);
+	if (!MFO_EP(&ia_ptr->hca_ptr->ib_trans.addr) || !ia_ptr->hca_ptr->ib_trans.scif_tx_ep) {
+		lmr->mr_handle =
+		    ibv_reg_mr(((DAPL_PZ *) lmr->param.pz_handle)->pd_handle,
+			       virt_addr, length, ib_access);
 
-	if (!lmr->mr_handle)
-		return (dapl_convert_errno(ENOMEM, "reg_mr"));
+		if (!lmr->mr_handle)
+			return (dapl_convert_errno(ENOMEM, "reg_mr"));
 
-	lmr->param.lmr_context = lmr->mr_handle->lkey;
-	lmr->param.rmr_context = lmr->mr_handle->rkey;
+		lmr->param.lmr_context = lmr->mr_handle->lkey;
+		lmr->param.rmr_context = lmr->mr_handle->rkey;
+
+		dapl_log(DAPL_DBG_TYPE_UTIL,
+			     " mr_register: mr=%p addr=%p pd %p ctx %p "
+			     "lkey=0x%x rkey=0x%x priv=%x\n",
+			     lmr->mr_handle, lmr->mr_handle->addr,
+			     lmr->mr_handle->pd, lmr->mr_handle->context,
+			     lmr->mr_handle->lkey, lmr->mr_handle->rkey,
+			     length, dapls_convert_privileges(privileges));
+	}
 	lmr->param.registered_size = length;
 	lmr->param.registered_address = (DAT_VADDR) (uintptr_t) virt_addr;
 
-	dapl_log(DAPL_DBG_TYPE_UTIL,
-		     " mr_register: mr=%p addr=%p pd %p ctx %p "
-		     "lkey=0x%x rkey=0x%x priv=%x\n",
-		     lmr->mr_handle, lmr->mr_handle->addr,
-		     lmr->mr_handle->pd, lmr->mr_handle->context,
-		     lmr->mr_handle->lkey, lmr->mr_handle->rkey,
-		     length, dapls_convert_privileges(privileges));
-
-#ifdef _OPENIB_MCM_
 	/* RDMA proxy with MPXYD register with SCIF */
 	if (ia_ptr->hca_ptr->ib_trans.scif_tx_ep) {
 		DAT_VLEN offset = (DAT_VLEN)(virt_addr - ALIGN_DOWN_PPAGE(virt_addr));
@@ -218,20 +233,76 @@ dapls_ib_mr_register(IN DAPL_IA * ia_ptr,
 			     mcm_map_str(ia_ptr->hca_ptr->ib_trans.addr.ep_map));
 
 		/* plus cross socket proxy-in; send registration info to MPXYD */
-		if (MXS_EP(&ia_ptr->hca_ptr->ib_trans.addr)) {
+		if (MXF_EP(&ia_ptr->hca_ptr->ib_trans.addr)) {
+			/* MFO - Mic in Mic Full Offload */
 			if (dapli_mix_mr_create(&ia_ptr->hca_ptr->ib_trans, lmr)) {
 				dapls_ib_mr_deregister(lmr);
 				return (dapl_convert_errno(ENOMEM, "reg_mr"));
 			}
+			if (MFO_EP(&ia_ptr->hca_ptr->ib_trans.addr)) {
+				lmr->param.lmr_context = lmr->mr_id;
+				lmr->param.rmr_context = lmr->mr_id;
+				lmr->mr_handle->lkey = lmr->param.lmr_context;
+				lmr->mr_handle->rkey = lmr->param.rmr_context;
+			}
 		}
 	}
-#endif
 	dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
 		     " mr_register: ia=%p, lmr=%p va=%p ln=%d priv=%d return\n",
 		     ia_ptr, lmr, virt_addr, length, privileges);
 
 	return DAT_SUCCESS;
 }
+#else
+{
+	struct ibv_device *ibv_dev;
+	int ib_access = dapls_convert_privileges(privileges);
+
+	dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+		     " mr_register: ia=%p, lmr=%p va=%p ln=%d pv=0x%x\n",
+		     ia_ptr, lmr, virt_addr, length, privileges);
+
+	if (lmr->param.mem_type == DAT_MEM_TYPE_SHARED_VIRTUAL) {
+		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+			     " mr_register_shared: NOT IMPLEMENTED\n");
+		return DAT_ERROR(DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
+	}
+
+	/* iWARP only support */
+	ibv_dev = ia_ptr->hca_ptr->ib_hca_handle->device;
+	if ((va_type == DAT_VA_TYPE_ZB) && (ibv_dev->transport_type != IBV_TRANSPORT_IWARP)) {
+		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
+		     " va_type == DAT_VA_TYPE_ZB: NOT SUPPORTED\n");
+		return DAT_ERROR(DAT_NOT_IMPLEMENTED, DAT_NO_SUBTYPE);
+	}
+
+	/* local read is default on IB */
+	lmr->mr_handle = ibv_reg_mr(((DAPL_PZ *)lmr->param.pz_handle)->pd_handle,
+				    virt_addr, length, ib_access);
+	if (!lmr->mr_handle)
+		return (dapl_convert_errno(ENOMEM, "reg_mr"));
+
+	lmr->param.lmr_context = lmr->mr_handle->lkey;
+	lmr->param.rmr_context = lmr->mr_handle->rkey;
+
+	dapl_log(DAPL_DBG_TYPE_UTIL,
+		     " mr_register: mr=%p addr=%p pd %p ctx %p "
+		     "lkey=0x%x rkey=0x%x priv=%x\n",
+		     lmr->mr_handle, lmr->mr_handle->addr,
+		     lmr->mr_handle->pd, lmr->mr_handle->context,
+		     lmr->mr_handle->lkey, lmr->mr_handle->rkey,
+		     length, dapls_convert_privileges(privileges));
+
+	lmr->param.registered_size = length;
+	lmr->param.registered_address = (DAT_VADDR) (uintptr_t) virt_addr;
+
+	dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+		     " mr_register: ia=%p, lmr=%p va=%p ln=%d priv=%d return\n",
+		     ia_ptr, lmr, virt_addr, length, privileges);
+
+	return DAT_SUCCESS;
+}
+#endif
 
 /*
  * dapl_ib_mr_deregister
@@ -250,20 +321,22 @@ dapls_ib_mr_register(IN DAPL_IA * ia_ptr,
  *
  */
 DAT_RETURN dapls_ib_mr_deregister(IN DAPL_LMR * lmr)
-{
 #ifdef _OPENIB_MCM_
-	if (lmr->header.owner_ia->hca_ptr->ib_trans.scif_ep) {
+{
+	DAPL_IA * ia_ptr = (DAPL_IA *)lmr->param.ia_handle;
+	if (ia_ptr->hca_ptr->ib_trans.scif_ep) {
 		int ret;
 
-		if (lmr->header.owner_ia->hca_ptr->ib_trans.addr.ep_map == MIC_XSOCK_DEV) {
-			if (dapli_mix_mr_free(&lmr->header.owner_ia->hca_ptr->ib_trans, lmr)) {
+		if (MXF_EP(&ia_ptr->hca_ptr->ib_trans.addr)) {
+			if (dapli_mix_mr_free(&ia_ptr->hca_ptr->ib_trans, lmr)) {
 				dapl_log(DAPL_DBG_TYPE_ERR,
 					 " mr_deregister: ERR: lmr_remove; id %d sci_addr %p, off 0x%llx, len %d ret=%s\n",
 					 lmr->mr_id, lmr->param.registered_address, lmr->sci_addr,
 					 ALIGN_PAGE(lmr->param.registered_size+lmr->sci_off), strerror(errno));
+				return DAT_INVALID_STATE;
 			}
 		}
-		ret = scif_unregister(lmr->header.owner_ia->hca_ptr->ib_trans.scif_tx_ep,
+		ret = scif_unregister(ia_ptr->hca_ptr->ib_trans.scif_tx_ep,
 				      lmr->sci_addr, ALIGN_PAGE(lmr->param.registered_size+lmr->sci_off));
 		if (ret)
 			dapl_log(DAPL_DBG_TYPE_ERR,
@@ -276,15 +349,24 @@ DAT_RETURN dapls_ib_mr_deregister(IN DAPL_LMR * lmr)
 		lmr->sci_off = 0;
 		lmr->mr_id = 0;
 	}
-#endif
 	if (lmr->mr_handle != IB_INVALID_HANDLE) {
 		if (ibv_dereg_mr(lmr->mr_handle))
 			return (dapl_convert_errno(errno, "dereg_mr"));
 		lmr->mr_handle = IB_INVALID_HANDLE;
 	}
+	return DAT_SUCCESS;
+}
+#else
+{
+	if (lmr->mr_handle != IB_INVALID_HANDLE) {
+		if (ibv_dereg_mr(lmr->mr_handle))
+			return (dapl_convert_errno(errno, "dereg_mr"));
 
+		lmr->mr_handle = IB_INVALID_HANDLE;
+	}
 	return DAT_SUCCESS;
 }
+#endif
 
 /*
  * dapl_ib_mr_register_shared
diff --git a/dapl/openib_common/qp.c b/dapl/openib_common/qp.c
old mode 100644
new mode 100755
index 95ea70d..527fc1d
--- a/dapl/openib_common/qp.c
+++ b/dapl/openib_common/qp.c
@@ -223,7 +223,7 @@ dapls_ib_qp_alloc(IN DAPL_IA * ia_ptr,
 	/* Don't create any QP if MIC xsocket, QPt and QPr both on MPXYD */
 	if (!ia_ptr->hca_ptr->ib_trans.scif_ep ||
 	    (ia_ptr->hca_ptr->ib_trans.scif_ep &&
-	    !MXS_EP(&ia_ptr->hca_ptr->ib_trans.addr))) {
+	    !MXF_EP(&ia_ptr->hca_ptr->ib_trans.addr))) {
 		/* QP1 needed for RX only, set QP1 req_cq empty */
 		qp_create.send_cq = ia_ptr->hca_ptr->ib_trans.ib_cq_empty->cq;
 	} else {
@@ -258,7 +258,7 @@ skip_qp:
 	if (ia_ptr->hca_ptr->ib_trans.scif_ep) { /* MIC: shadow QPt on proxy */
 		req_cq->flags |= DCM_CQ_TX_INDIRECT;
 		qp_create.send_cq = req_cq->cq;
-		qp_create.cap.max_inline_data = 32; /* setup for bw not latency */
+		qp_create.cap.max_inline_data = max_inline;
 		qp_create.cap.max_send_wr = attr->max_request_dtos;
 		qp_create.cap.max_send_sge = attr->max_request_iov;
 		if (ep_ptr->qp_handle->qp) { /* MIC: unused shadow QPr on proxy */
@@ -281,8 +281,7 @@ skip_qp:
 	} else {
 		/* NON-MIC: need QPt, in case of shadowed QP's on remote MIC's */
 		/* Prep for HST -> MXS: xfers via remote PI instead of direct */
-		ia_ptr->hca_ptr->ib_trans.ib_cm.max_inline =
-				DAPL_MAX(sizeof(struct mcm_wr_rx), max_inline);
+
 		/* create CQ for peer PI, HST->MXS case */
 		if (mcm_create_pi_cq(ep_ptr->qp_handle, MCM_WRC_QLEN))
 			goto err;
@@ -292,7 +291,9 @@ skip_qp:
 		qp_create.cap.max_recv_sge = 1;
 		qp_create.cap.max_send_wr = DAPL_MAX(MCM_WRC_QLEN, attr->max_request_dtos);
 		qp_create.cap.max_send_sge = attr->max_request_iov;
-		qp_create.cap.max_inline_data = ia_ptr->hca_ptr->ib_trans.ib_cm.max_inline;
+		if (max_inline)
+			qp_create.cap.max_inline_data =
+				DAPL_MAX(sizeof(struct mcm_wr_rx), max_inline);
 
 		ep_ptr->qp_handle->qp2 = ibv_create_qp(ib_pd_handle, &qp_create);
 		if (!ep_ptr->qp_handle->qp2) {
@@ -373,7 +374,6 @@ err:
 DAT_RETURN dapls_ib_qp_free(IN DAPL_IA * ia_ptr, IN DAPL_EP * ep_ptr)
 {
 	struct ibv_qp *qp;
-	struct ibv_qp_attr qp_attr;
 
 	dapl_log(DAPL_DBG_TYPE_EP,
 		 " dapls_ib_qp_free: ep_ptr %p qp_handle %p\n",
@@ -389,39 +389,44 @@ DAT_RETURN dapls_ib_qp_free(IN DAPL_IA * ia_ptr, IN DAPL_EP * ep_ptr)
 	if (ep_ptr->qp_handle) {
 		qp = ep_ptr->qp_handle->qp;
 		dapl_os_unlock(&ep_ptr->header.lock);
-
-		qp_attr.qp_state = IBV_QPS_ERR;
-		if (qp)
-			ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE);
-		dapls_ep_flush_cqs(ep_ptr);
+		if (qp) {
+			dapls_modify_qp_state(qp, IBV_QPS_ERR, 0, 0, 0);
+			dapls_ep_flush_cqs(ep_ptr);
+		}
 
 #ifdef _OPENIB_CMA_
 		rdma_destroy_qp(cm_ptr->cm_id);
 		cm_ptr->cm_id->qp = NULL;
-#else
-		if (qp && ibv_destroy_qp(qp)) {
-			dapl_log(DAPL_DBG_TYPE_ERR, 
-				 " qp_free: ibv_destroy_qp error - %s\n",
-				 strerror(errno));
-		}
 #endif
 
 #ifdef _OPENIB_MCM_
+		if (ep_ptr->qp_handle->qp2)
+			dapls_modify_qp_state(ep_ptr->qp_handle->qp2,
+					      IBV_QPS_ERR, 0, 0, 0);
+
 		/* MIC: shadow support on MPXYD node */
 		if (ia_ptr->hca_ptr->ib_trans.scif_ep)
 			dapli_mix_qp_free(ep_ptr->qp_handle);
 
-		else 	/* NON MIC: local shadow queue */
+		dapls_ep_flush_cqs(ep_ptr);
+
+		if (ep_ptr->qp_handle->qp2)
 			ibv_destroy_qp(ep_ptr->qp_handle->qp2);
 
 		dapl_os_lock_destroy(&ep_ptr->qp_handle->lock);
-		mcm_destroy_pi_cq(ep_ptr->qp_handle);
-		mcm_destroy_wc_q(ep_ptr->qp_handle);
 #endif
+		if (qp)
+			ibv_destroy_qp(qp);
 	} else {
 		dapl_os_unlock(&ep_ptr->header.lock);
 	}
 
+#ifdef _OPENIB_MCM_
+	if (ep_ptr->qp_handle) {
+		mcm_destroy_pi_cq(ep_ptr->qp_handle);
+		mcm_destroy_wc_q(ep_ptr->qp_handle);
+	}
+#endif
 	/* destroy any UD address handles */
 	if (ep_ptr->qp_handle->ah) {
 		int i;
diff --git a/dapl/openib_common/srq.c b/dapl/openib_common/srq.c
old mode 100644
new mode 100755
diff --git a/dapl/openib_common/util.c b/dapl/openib_common/util.c
old mode 100644
new mode 100755
index c44f78a..55bda3b
--- a/dapl/openib_common/util.c
+++ b/dapl/openib_common/util.c
@@ -407,12 +407,19 @@ DAT_RETURN dapls_ib_query_hca(IN DAPL_HCA * hca_ptr,
 		tp->na.mode = "DIRECT";
 		tp->na.read = "TRUE";
 #endif
-
-	/* query verbs for this device and port attributes */
-	if (ibv_query_device(hca_ptr->ib_hca_handle, &dev_attr) ||
-	    ibv_query_port(hca_ptr->ib_hca_handle,
-			   hca_ptr->port_num, &port_attr))
-		return (dapl_convert_errno(errno, "ib_query_hca"));
+#ifdef _OPENIB_MCM_
+	if (MFO_EP(&hca_ptr->ib_trans.addr) && tp->self.node) {
+		if (dapli_mix_query_device(tp, &dev_attr) ||
+			dapli_mix_query_port(tp, hca_ptr->port_num, &port_attr))
+				return (dapl_convert_errno(errno, "mix_query_hca"));
+	}
+	else
+#endif
+		/* query verbs for this device and port attributes */
+		if (ibv_query_device(hca_ptr->ib_hca_handle, &dev_attr) ||
+				ibv_query_port(hca_ptr->ib_hca_handle,
+						hca_ptr->port_num, &port_attr))
+			return (dapl_convert_errno(errno, "ib_query_hca"));
 
 	dev_attr.max_qp_wr = DAPL_MIN(dev_attr.max_qp_wr,
 				      dapl_os_get_env_val("DAPL_WR_MAX", dev_attr.max_qp_wr));
@@ -428,9 +435,16 @@ DAT_RETURN dapls_ib_query_hca(IN DAPL_HCA * hca_ptr,
 
 	if (ia_attr != NULL) {
 		(void)dapl_os_memzero(ia_attr, sizeof(*ia_attr));
-		strncpy(ia_attr->adapter_name,
-		        ibv_get_device_name(tp->ib_dev),
-		        DAT_NAME_MAX_LENGTH - 1);
+#ifdef _OPENIB_MCM_
+		/* MIC at MFO mode - Initialized at Open */
+		if (MFO_EP(&hca_ptr->ib_trans.addr) && tp->self.node) {
+			strncpy(ia_attr->adapter_name, hca_ptr->name, DAT_NAME_MAX_LENGTH);
+		}
+		else
+#endif
+			strncpy(ia_attr->adapter_name, ibv_get_device_name(tp->ib_dev),
+					DAT_NAME_MAX_LENGTH - 1);
+
 		ia_attr->adapter_name[DAT_NAME_MAX_LENGTH - 1] = '\0';
 		ia_attr->vendor_name[DAT_NAME_MAX_LENGTH - 1] = '\0';
 		ia_attr->ia_address_ptr = (DAT_IA_ADDRESS_PTR) &hca_ptr->hca_address;
@@ -465,10 +479,12 @@ DAT_RETURN dapls_ib_query_hca(IN DAPL_HCA * hca_ptr,
 		ia_attr->max_message_size = port_attr.max_msg_sz;
 		ia_attr->max_rdma_size = port_attr.max_msg_sz;
 		/* iWARP spec. - 1 sge for RDMA reads */
-		if (hca_ptr->ib_hca_handle->device->transport_type == IBV_TRANSPORT_IWARP)
-			ia_attr->max_iov_segments_per_rdma_read = 1;
-		else
-			ia_attr->max_iov_segments_per_rdma_read = dev_attr.max_sge;
+		ia_attr->max_iov_segments_per_rdma_read = dev_attr.max_sge;
+#ifdef _OPENIB_MCM_
+		if (!MFO_EP(&hca_ptr->ib_trans.addr))
+#endif
+			if (hca_ptr->ib_hca_handle->device->transport_type == IBV_TRANSPORT_IWARP)
+				ia_attr->max_iov_segments_per_rdma_read = 1;
 		ia_attr->max_iov_segments_per_rdma_write = dev_attr.max_sge;
 		ia_attr->num_transport_attr = 0;
 		ia_attr->transport_attr = NULL;
@@ -489,8 +505,14 @@ DAT_RETURN dapls_ib_query_hca(IN DAPL_HCA * hca_ptr,
 		tp->na.mtu = dapl_ib_mtu_str(tp->ib_cm.mtu);
 		tp->na.port = dapl_ib_port_str(port_attr.state);
 		tp->na.port_num = dapl_ib_port_num_str(hca_ptr->port_num);
-		if (!tp->guid)
-			tp->guid = ntohll(ibv_get_device_guid(tp->ib_dev));
+		if (!tp->guid) {
+#ifdef _OPENIB_MCM_
+			if (MFO_EP(&hca_ptr->ib_trans.addr) && tp->self.node)
+				tp->guid = ntohll(dev_attr.node_guid);
+			else
+#endif
+				tp->guid = ntohll(ibv_get_device_guid(tp->ib_dev));
+		}
 
 		sprintf(tp->guid_str, "%04x:%04x:%04x:%04x",
 			(unsigned) (tp->guid >> 48) & 0xffff,
@@ -498,6 +520,12 @@ DAT_RETURN dapls_ib_query_hca(IN DAPL_HCA * hca_ptr,
 			(unsigned) (tp->guid >> 16) & 0xffff,
 			(unsigned) (tp->guid >>  0) & 0xffff);
 
+#ifdef _OPENIB_MCM_
+		/* initialized at open */
+		if (MFO_EP(&hca_ptr->ib_trans.addr) && tp->self.node)
+			goto skip_ib;
+#endif
+
 		if (hca_ptr->ib_hca_handle->device->transport_type != IBV_TRANSPORT_IB)
 			goto skip_ib;
 
diff --git a/dapl/openib_mcm/cm.c b/dapl/openib_mcm/cm.c
old mode 100644
new mode 100755
index fc623cc..56ed23a
--- a/dapl/openib_mcm/cm.c
+++ b/dapl/openib_mcm/cm.c
@@ -478,7 +478,7 @@ retry_listenq:
 		if (!listenq && 
 		    cm->msg.sport == msg->dport && cm->msg.sqpn == msg->dqpn && 
 		    cm->msg.dport == msg->sport && cm->msg.dqpn == msg->sqpn && 
-		    cm->msg.daddr1.lid == msg->saddr1.lid) {
+		    cm->msg.daddr2.lid == msg->saddr2.lid) {
 			if (ntohs(msg->op) != MCM_REQ) {
 				found = cm;
 				break; 
@@ -492,13 +492,13 @@ retry_listenq:
 					 cm, dapl_cm_op_str(ntohs(msg->op)),
 					 dapl_cm_op_str(ntohs(cm->msg.op)),
 					 dapl_cm_state_str(cm->state),
-					 ntohs(cm->msg.daddr1.lid), ntohs(cm->msg.dport),
+					 ntohs(cm->msg.daddr2.lid), ntohs(cm->msg.dport),
 					 ntohl(cm->msg.dqpn), ntohl(cm->msg.daddr1.qpn),
-					 ntohs(msg->saddr1.lid), ntohs(msg->sport),
+					 ntohs(msg->saddr2.lid), ntohs(msg->sport),
 					 ntohl(msg->sqpn), ntohl(msg->saddr1.qpn),
-					 ntohs(msg->daddr1.lid), ntohs(msg->dport),
+					 ntohs(msg->daddr2.lid), ntohs(msg->dport),
 					 ntohl(msg->dqpn), ntohl(msg->daddr1.qpn),
-					 ntohs(cm->msg.saddr1.lid), ntohs(cm->msg.sport),
+					 ntohs(cm->msg.saddr2.lid), ntohs(cm->msg.sport),
 					 ntohl(cm->msg.sqpn), ntohl(cm->msg.saddr1.qpn));
 
 				DAPL_CNTR(((DAPL_IA *)dapl_llist_peek_head(&cm->hca->ia_list_head)),
@@ -524,9 +524,9 @@ retry_listenq:
 			" mcm_recv: NO LISTENER for %s %x %x i%x c%x"
 			" < %x %x %x, sending reject\n", 
 			dapl_cm_op_str(ntohs(msg->op)), 
-			ntohs(msg->daddr1.lid), ntohs(msg->dport),
+			ntohs(msg->daddr2.lid), ntohs(msg->dport),
 			ntohl(msg->daddr1.qpn), ntohl(msg->sqpn),
-			ntohs(msg->saddr1.lid), ntohs(msg->sport),
+			ntohs(msg->saddr2.lid), ntohs(msg->sport),
 			ntohl(msg->saddr1.qpn));
 
 		mcm_reject(tp, msg);
@@ -537,9 +537,9 @@ retry_listenq:
 			 " NO MATCH: op %s [lid, port, cqp, iqp, pid]:"
 			 " %x %x %x %x %x <- %x %x %x %x l_pid %x r_pid %x\n",
 			 dapl_cm_op_str(ntohs(msg->op)),
-			 ntohs(msg->daddr1.lid), ntohs(msg->dport),
+			 ntohs(msg->daddr2.lid), ntohs(msg->dport),
 			 ntohl(msg->dqpn), ntohl(msg->daddr1.qpn),
-			 ntohl(msg->d_id), ntohs(msg->saddr1.lid),
+			 ntohl(msg->d_id), ntohs(msg->saddr2.lid),
 			 ntohs(msg->sport), ntohl(msg->sqpn),
 			 ntohl(msg->saddr1.qpn), ntohl(msg->s_id),
 			 ntohl(msg->d_id));
@@ -617,7 +617,7 @@ static int mcm_send(ib_hca_transport_t *tp, dat_mcm_msg_t *msg, DAT_PVOID p_data
 	struct ibv_send_wr wr, *bad_wr;
 	struct ibv_sge sge;
 	int len, ret = -1;
-	uint16_t dlid = ntohs(msg->daddr1.lid);
+	uint16_t dlid = ntohs(msg->daddr2.lid);
 
 	/* Get message from send queue, copy data, and send */
 	dapl_os_lock(&tp->slock);
@@ -772,7 +772,7 @@ dp_ib_cm_handle_t dapls_cm_create(DAPL_HCA *hca, DAPL_EP *ep)
 
 		}
 		/* QPr is on proxy when xsocket from device */
-		if (!MXS_EP(&hca->ib_trans.addr)) {
+		if (!MXF_EP(&hca->ib_trans.addr)) {
 			cm->msg.saddr1.qpn = htonl(ep->qp_handle->qp->qp_num); /* QPr local*/
 			cm->msg.saddr1.qp_type = ep->qp_handle->qp->qp_type;
 			cm->msg.saddr1.lid = hca->ib_trans.addr.lid;
@@ -987,16 +987,28 @@ DAT_RETURN dapli_cm_disconnect(dp_ib_cm_handle_t cm)
 DAT_RETURN
 dapli_cm_connect(DAPL_EP *ep, dp_ib_cm_handle_t cm)
 {
+	/* ibscif: intra-node, MIC lid != HST lid,
+	 * HST->HST/MIC (CM always HST lid)
+	 * 	CM locality via addr2
+	 * 	EP locality via addr1
+	 */
+	if (!strncmp(cm->hca->name, "scif", 4) &&
+	    HST_EP(&cm->hca->ib_trans.addr)) {
+		cm->msg.daddr2.lid = cm->tp->addr.lid;
+		memcpy(cm->msg.daddr2.gid, cm->tp->addr.gid, 16);
+	}
+
 	dapl_log(DAPL_DBG_TYPE_CM,
 		 " MCM connect: lid %x QPr %x QPt %x lport %x p_sz=%d -> "
-		 " lid %x c_qpn %x rport %x ep_map %d %s -> %d %s, retries=%d\n",
+		 " lid %x clid %x cqpn %x rport %x, %s -> %s"
+		 " retries=%d\n",
 		 htons(cm->tp->addr.lid), htonl(cm->msg.saddr1.qpn),
 		 htonl(cm->msg.saddr2.qpn),
 		 htons(cm->msg.sport), htons(cm->msg.p_size),
-		 htons(cm->msg.daddr1.lid), htonl(cm->msg.dqpn),
-		 htons(cm->msg.dport),
-		 cm->tp->addr.ep_map, mcm_map_str(cm->tp->addr.ep_map),
-		 cm->msg.daddr1.ep_map, mcm_map_str(cm->msg.daddr1.ep_map),
+		 htons(cm->msg.daddr1.lid), htons(cm->msg.daddr2.lid),
+		 htonl(cm->msg.dqpn), htons(cm->msg.dport),
+		 mcm_map_str(cm->tp->addr.ep_map),
+		 mcm_map_str(cm->msg.daddr1.ep_map),
 		 cm->tp->retries);
 
 	dapl_os_lock(&cm->lock);
@@ -1158,7 +1170,7 @@ void mcm_connect_rtu(dp_ib_cm_handle_t cm, dat_mcm_msg_t *msg)
  	/* QP to RTR-RTS with remote QPt (daddr2) info */
 	dapl_os_lock(&cm->ep->header.lock);
 
-	if (!MXS_EP(&cm->hca->ib_trans.addr)) {
+	if (!MXF_EP(&cm->hca->ib_trans.addr)) {
 		ret = dapls_modify_qp_rtu(cm->ep->qp_handle->qp,
 					  cm->msg.daddr2.qpn,
 					  cm->msg.daddr2.lid,
@@ -1183,7 +1195,7 @@ void mcm_connect_rtu(dp_ib_cm_handle_t cm, dat_mcm_msg_t *msg)
 			goto bail;
 		}
 		/* MXS peer: setup PI WC and save peer WR queue info */
-		if (MXS_EP(&cm->msg.daddr1)) {
+		if (MXF_EP(&cm->msg.daddr1)) {
 			/* save PI WR info, create local WC_q, send back WC info */
 			mcm_ntoh_wrc(&ep->qp_handle->wrc_rem, (mcm_wrc_info_t*)cm->msg.p_proxy);
 			mcm_create_wc_q(ep->qp_handle, ep->qp_handle->wrc_rem.wr_end + 1);
@@ -1471,7 +1483,7 @@ dapli_accept_usr(DAPL_EP *ep, DAPL_CR *cr, DAT_COUNT p_size, DAT_PVOID p_data)
 
 	/* modify QPr to RTR and then to RTS, QPr (qp) to remote QPt (daddr2), !xsocket */
 	dapl_os_lock(&ep->header.lock);
-	if (!MXS_EP(&cm->hca->ib_trans.addr)) {
+	if (!MXF_EP(&cm->hca->ib_trans.addr)) {
 		ret = dapls_modify_qp_rtu(ep->qp_handle->qp,
 					  cm->msg.daddr2.qpn,
 					  cm->msg.daddr2.lid,
@@ -1479,8 +1491,8 @@ dapli_accept_usr(DAPL_EP *ep, DAPL_CR *cr, DAT_COUNT p_size, DAT_PVOID p_data)
 		if (ret) {
 			dapl_log(DAPL_DBG_TYPE_ERR,
 				 " ACCEPT_USR: QPS_RTR ERR %s -> lid %x qpn %x\n",
-				 strerror(errno), ntohs(cm->msg.daddr1.lid),
-				 ntohl(cm->msg.daddr1.qpn));
+				 strerror(errno), ntohs(cm->msg.daddr2.lid),
+				 ntohl(cm->msg.daddr2.qpn));
 			dapl_os_unlock(&ep->header.lock);
 			goto bail;
 		}
@@ -1507,7 +1519,7 @@ dapli_accept_usr(DAPL_EP *ep, DAPL_CR *cr, DAT_COUNT p_size, DAT_PVOID p_data)
 			       &cm->hca->ib_trans.addr.gid, 16);
 
 		/* MXS peer: setup PI WC and save peer WR queue info */
-		if (MXS_EP(&cm->msg.daddr1)) {
+		if (MXF_EP(&cm->msg.daddr1)) {
 			/* save PI WR info, create local WC_q, send back WC info */
 			mcm_ntoh_wrc(&ep->qp_handle->wrc_rem, (mcm_wrc_info_t*)cm->msg.p_proxy);
 			mcm_create_wc_q(ep->qp_handle, ep->qp_handle->wrc_rem.wr_end + 1);
@@ -1540,7 +1552,7 @@ dapli_accept_usr(DAPL_EP *ep, DAPL_CR *cr, DAT_COUNT p_size, DAT_PVOID p_data)
 	cm->msg.op = htons(MCM_REP);
 	cm->msg.rd_in = ep->param.ep_attr.max_rdma_read_in;
 
-	if (!MXS_EP(&cm->hca->ib_trans.addr)) {
+	if (!MXF_EP(&cm->hca->ib_trans.addr)) {
 		cm->msg.saddr1.qpn = htonl(ep->qp_handle->qp->qp_num);
 		cm->msg.saddr1.qp_type = ep->qp_handle->qp->qp_type;
 		cm->msg.saddr1.lid = cm->hca->ib_trans.addr.lid;
@@ -1585,8 +1597,8 @@ dapli_accept_usr(DAPL_EP *ep, DAPL_CR *cr, DAT_COUNT p_size, DAT_PVOID p_data)
 	dapls_thread_signal(&cm->hca->ib_trans.signal);
 	return DAT_SUCCESS;
 bail:
+	dapls_ib_reject_connection(cm, ~IB_CM_REJ_REASON_CONSUMER_REJ, 0, 0);
 	DAPL_CNTR(ia, DCNT_IA_CM_ERR);
-	dapli_cm_free(cm);
 	return DAT_INTERNAL_ERROR;
 }
 
@@ -1639,8 +1651,10 @@ dapls_ib_connect(IN DAT_EP_HANDLE ep_handle,
 	dapl_os_memcpy(&cm->msg.daddr1, r_addr, sizeof(struct dat_mcm_addr));
 	dapl_os_memcpy(&cm->msg.daddr2, r_addr, sizeof(struct dat_mcm_addr));
 
+	/* HST dev scif0, remote LID is host proxy, not MIC */
+
 	/* validate port and ep_map range */
-	if ((mcm_ia->port > 2) || (mcm_ia->ep_map > 3))
+	if ((mcm_ia->port > 2) || (mcm_ia->ep_map > 4))
 		cm->msg.daddr1.ep_map = 0;
 
 	/* remote uCM information, comes from consumer provider r_addr */
@@ -1684,6 +1698,13 @@ dapls_ib_disconnect(IN DAPL_EP *ep_ptr, IN DAT_CLOSE_FLAGS close_flags)
 {
 	dp_ib_cm_handle_t cm_ptr = dapl_get_cm_from_ep(ep_ptr);
 
+	if (cm_ptr && cm_ptr->tp->scif_ep) { /* always force proxy flush */
+		dapli_mix_cm_dreq_out(cm_ptr);
+		if (ep_ptr->qp_handle->qp)
+			dapls_modify_qp_state(ep_ptr->qp_handle->qp,
+					      IBV_QPS_ERR,0,0,0);
+	}
+
 	dapl_os_lock(&ep_ptr->header.lock);
 	if (ep_ptr->param.ep_state == DAT_EP_STATE_DISCONNECTED ||
 	    ep_ptr->param.ep_attr.service_type != DAT_SERVICE_TYPE_RC ||
@@ -1693,16 +1714,13 @@ dapls_ib_disconnect(IN DAPL_EP *ep_ptr, IN DAT_CLOSE_FLAGS close_flags)
 	} 
 	dapl_os_unlock(&ep_ptr->header.lock);
 	
-	if (cm_ptr->tp->scif_ep) { /* QPt on MPXYD, QPr local or on MPXYD */
-		dapli_mix_cm_dreq_out(cm_ptr);
-		if (ep_ptr->qp_handle->qp)
-			dapls_modify_qp_state(ep_ptr->qp_handle->qp, IBV_QPS_ERR,0,0,0);
-	} else { /* QPt and QPr local */
+	/* HST: QPt and QPr local */
+	if (!cm_ptr->tp->scif_ep) {
 		dapli_cm_disconnect(cm_ptr);
 		dapls_modify_qp_state(ep_ptr->qp_handle->qp2, IBV_QPS_ERR,0,0,0);
 	}
 
- 	return DAT_SUCCESS;
+	return DAT_SUCCESS;
 }
 
 /*
@@ -2054,11 +2072,14 @@ void cm_thread(void *arg)
 		time_ms = -1; /* reset to blocking */
 		dapl_fd_zero(set);
 		dapl_fd_set(hca->ib_trans.signal.scm[0], set, DAPL_FD_READ);	
-		dapl_fd_set(hca->ib_hca_handle->async_fd, set, DAPL_FD_READ);
-		dapl_fd_set(hca->ib_trans.rch_fd, set, DAPL_FD_READ);
 		dapl_fd_set(hca->ib_trans.scif_ev_ep, set, DAPL_FD_READ);
-		dapl_fd_set(hca->ib_trans.ib_cq->fd, set, DAPL_FD_READ);
-		
+		if (!MFO_EP(&hca->ib_trans.addr) || !hca->ib_trans.self.node)
+		{
+			dapl_fd_set(hca->ib_hca_handle->async_fd, set, DAPL_FD_READ);
+			dapl_fd_set(hca->ib_trans.rch_fd, set, DAPL_FD_READ);
+			dapl_fd_set(hca->ib_trans.ib_cq->fd, set, DAPL_FD_READ);
+		}
+
 		dapl_os_lock(&hca->ib_trans.cqlock); /* CQt for HST->MXS */
 		if (!dapl_llist_is_empty(&hca->ib_trans.cqlist))
 			m_cq = dapl_llist_peek_head(&hca->ib_trans.cqlist);
@@ -2113,9 +2134,12 @@ void cm_thread(void *arg)
 		dapl_os_unlock(&hca->ib_trans.lock);
 		dapl_select(set, time_ms);
 
-		if (dapl_poll(hca->ib_trans.rch_fd,
-					DAPL_FD_READ) == DAPL_FD_READ) {
-			mcm_recv(&hca->ib_trans);
+		if (!MFO_EP(&hca->ib_trans.addr) || !hca->ib_trans.self.node)
+		{
+			if (dapl_poll(hca->ib_trans.rch_fd,
+				      DAPL_FD_READ) == DAPL_FD_READ) {
+				mcm_recv(&hca->ib_trans);
+			}
 		}
 		ret = dapl_poll(hca->ib_trans.scif_ev_ep, DAPL_FD_READ);
 		if (ret == DAPL_FD_READ)
@@ -2127,20 +2151,26 @@ void cm_thread(void *arg)
 				 hca->ib_trans.dev_id, hca->ib_trans.scif_ev_ep);
 
 			event.event_type = IBV_EVENT_DEVICE_FATAL;
-			dapl_evd_un_async_error_callback(hca->ib_hca_handle,
-							 &event,
-							 hca->ib_trans.async_un_ctx);
+			if (!MFO_EP(&hca->ib_trans.addr) || !hca->ib_trans.self.node)
+			{
+				dapl_evd_un_async_error_callback(hca->ib_hca_handle,
+								 &event,
+								 hca->ib_trans.async_un_ctx);
+			}
 			dapl_os_lock(&hca->ib_trans.lock);
 			hca->ib_trans.cm_state = IB_THREAD_CANCEL;
 			continue;
 		}
-		if (dapl_poll(hca->ib_hca_handle->async_fd, 
-			      DAPL_FD_READ) == DAPL_FD_READ) {
-			dapli_async_event_cb(&hca->ib_trans);
-		}
-		if (dapl_poll(hca->ib_trans.ib_cq->fd, 
-			      DAPL_FD_READ) == DAPL_FD_READ) {
-			dapli_cq_event_cb(&hca->ib_trans);
+		if (!MFO_EP(&hca->ib_trans.addr) || !hca->ib_trans.self.node)
+		{
+			if (dapl_poll(hca->ib_hca_handle->async_fd,
+				      DAPL_FD_READ) == DAPL_FD_READ) {
+				dapli_async_event_cb(&hca->ib_trans);
+			}
+			if (dapl_poll(hca->ib_trans.ib_cq->fd,
+				      DAPL_FD_READ) == DAPL_FD_READ) {
+				dapli_cq_event_cb(&hca->ib_trans);
+			}
 		}
 		while (dapl_poll(hca->ib_trans.signal.scm[0], 
 				 DAPL_FD_READ) == DAPL_FD_READ) {
@@ -2164,7 +2194,7 @@ out:
 static void mcm_log_addrs(int lvl, struct dat_mcm_msg *msg, int state, int in)
 {
 	if (in) {
-		if (MXS_EP(&msg->daddr1) && MXS_EP(&msg->saddr1)) {
+		if (MXF_EP(&msg->daddr1) && MXF_EP(&msg->saddr1)) {
 			dapl_log(lvl, " QPr_t addr2: %s 0x%x %x 0x%x %s <- QPt_r addr2: 0x%x %x 0x%x %s\n",
 				mcm_state_str(state), htons(msg->daddr2.lid),
 				htonl(msg->daddr2.qpn), htons(msg->dport),
@@ -2186,7 +2216,7 @@ static void mcm_log_addrs(int lvl, struct dat_mcm_msg *msg, int state, int in)
 				htons(msg->sport), mcm_map_str(msg->saddr1.ep_map));
 		}
 	} else {
-		if (MXS_EP(&msg->saddr1) && MXS_EP(&msg->daddr1)) {
+		if (MXF_EP(&msg->saddr1) && MXF_EP(&msg->daddr1)) {
 			dapl_log(lvl, " QPr_t addr2: %s 0x%x %x 0x%x %s -> QPt_r addr2: 0x%x %x 0x%x %s\n",
 				mcm_state_str(state), htons(msg->saddr2.lid),
 				htonl(msg->saddr2.qpn), htons(msg->sport),
diff --git a/dapl/openib_mcm/dapl_ib_util.h b/dapl/openib_mcm/dapl_ib_util.h
old mode 100644
new mode 100755
index 2d3e1bb..89d3650
--- a/dapl/openib_mcm/dapl_ib_util.h
+++ b/dapl/openib_mcm/dapl_ib_util.h
@@ -166,9 +166,12 @@ int  mcm_create_pi_cq(struct dcm_ib_qp *m_qp, int len);
 void mcm_destroy_pi_cq(struct dcm_ib_qp *m_qp);
 
 /* MIC eXchange (MIX) operations, mix.c */
+int  dapli_mix_mode(ib_hca_transport_t *tp, char *name);
 int  dapli_mix_open(ib_hca_transport_t *tp, char *name, int port, int query);
 void dapli_mix_close(ib_hca_transport_t *tp);
 int  dapli_mix_get_attr(ib_hca_transport_t *tp, dat_mix_prov_attr_t *pr_attr);
+int  dapli_mix_query_device(ib_hca_transport_t *tp, struct ibv_device_attr *dev_attr);
+int  dapli_mix_query_port(ib_hca_transport_t *tp, unsigned long port_num, struct ibv_port_attr *port_attr);
 int  dapli_mix_listen(dp_ib_cm_handle_t cm, uint16_t sid);
 int  dapli_mix_listen_free(dp_ib_cm_handle_t cm);
 int  dapli_mix_qp_create(ib_qp_handle_t m_qp, struct ibv_qp_init_attr *attr,
@@ -178,6 +181,9 @@ int  dapli_mix_cq_create(ib_cq_handle_t m_cq, int cq_len);
 int  dapli_mix_cq_free(ib_cq_handle_t m_cq);
 int  dapli_mix_cq_wait(ib_cq_handle_t m_cq, int time);
 int  dapli_mix_cq_poll(ib_cq_handle_t m_cq, struct ibv_wc *wc);
+int  dapli_mix_pz_create(DAPL_IA * ia_ptr, DAPL_PZ *m_pz);
+DAT_RETURN dapls_ib_pd_free(IN DAPL_PZ * pz);
+int  dapli_mix_pz_free(DAPL_PZ *m_pz);
 int  dapli_mix_cm_req_out(dp_ib_cm_handle_t m_cm, ib_qp_handle_t m_qp);
 int  dapli_mix_cm_rtu_out(dp_ib_cm_handle_t m_cm);
 void dapli_mix_cm_dreq_out(dp_ib_cm_handle_t m_cm);
diff --git a/dapl/openib_mcm/device.c b/dapl/openib_mcm/device.c
old mode 100644
new mode 100755
index fdf66bd..92ab201
--- a/dapl/openib_mcm/device.c
+++ b/dapl/openib_mcm/device.c
@@ -180,7 +180,7 @@ DAT_RETURN dapls_ib_open_hca(IN IB_HCA_NAME hca_name,
 			     IN DAPL_HCA * hca_ptr,
 			     IN DAPL_OPEN_FLAGS flags)
 {
-	struct ibv_device **dev_list;
+	struct ibv_device **dev_list = NULL;
 	struct dat_mcm_addr *mcm_ia = (struct dat_mcm_addr *) &hca_ptr->hca_address;
 	struct ibv_port_attr port_attr;
 	int i, nd = 0;
@@ -190,13 +190,71 @@ DAT_RETURN dapls_ib_open_hca(IN IB_HCA_NAME hca_name,
 		 PROVIDER_NAME, hca_name, hca_ptr,
 		 flags & DAPL_OPEN_QUERY ? "QUERY MODE":"STD MODE");
 
+	/* set RC tunables via enviroment or default */
+	hca_ptr->ib_trans.ib_cm.max_inline =
+	    dapl_os_get_env_val("DAPL_MAX_INLINE", INLINE_SEND_IB_DEFAULT);
+	hca_ptr->ib_trans.ib_cm.ack_retry =
+	    dapl_os_get_env_val("DAPL_ACK_RETRY", DCM_ACK_RETRY);
+	hca_ptr->ib_trans.ib_cm.ack_timer =
+	    dapl_os_get_env_val("DAPL_ACK_TIMER", DCM_ACK_TIMER);
+	hca_ptr->ib_trans.ib_cm.rnr_retry =
+	    dapl_os_get_env_val("DAPL_RNR_RETRY", DCM_RNR_RETRY);
+	hca_ptr->ib_trans.ib_cm.rnr_timer =
+	    dapl_os_get_env_val("DAPL_RNR_TIMER", DCM_RNR_TIMER);
+	hca_ptr->ib_trans.ib_cm.global =
+	    dapl_os_get_env_val("DAPL_GLOBAL_ROUTING", DCM_GLOBAL);
+	hca_ptr->ib_trans.ib_cm.hop_limit =
+	    dapl_os_get_env_val("DAPL_HOP_LIMIT", DCM_HOP_LIMIT);
+	hca_ptr->ib_trans.ib_cm.tclass =
+	    dapl_os_get_env_val("DAPL_TCLASS", DCM_TCLASS);
+	hca_ptr->ib_trans.ib_cm.mtu =
+	    dapl_ib_mtu(dapl_os_get_env_val("DAPL_IB_MTU", DCM_IB_MTU));
+
+	/* initialize CM list, LISTEN, SND queue, PSP array, locks */
+	if ((dapl_os_lock_init(&hca_ptr->ib_trans.lock)) != DAT_SUCCESS)
+		goto err;
+
+	if ((dapl_os_lock_init(&hca_ptr->ib_trans.llock)) != DAT_SUCCESS)
+		goto err;
+
+	if ((dapl_os_lock_init(&hca_ptr->ib_trans.slock)) != DAT_SUCCESS)
+		goto err;
+
+	if ((dapl_os_lock_init(&hca_ptr->ib_trans.plock)) != DAT_SUCCESS)
+		goto err;
+
+	if ((dapl_os_lock_init(&hca_ptr->ib_trans.cqlock)) != DAT_SUCCESS)
+		goto err;
+
+	/* initialize CM and listen lists on this HCA uCM QP */
+	dapl_llist_init_head(&hca_ptr->ib_trans.list);
+	dapl_llist_init_head(&hca_ptr->ib_trans.llist);
+	dapl_llist_init_head(&hca_ptr->ib_trans.cqlist);
+
+	/* Get MIC mode and EP mappings */
+	if (dapli_mix_mode(&hca_ptr->ib_trans, hca_name))
+		goto err;
+
+	/* MIC EP with Full offload, no local verbs device */
+	if (MFO_EP(&hca_ptr->ib_trans.addr)) {
+		if (dapli_mix_open(&hca_ptr->ib_trans, hca_name,
+				   hca_ptr->port_num,
+				   flags & DAPL_OPEN_QUERY)) {
+			dapl_log(DAPL_DBG_TYPE_ERR,
+				" open_hca: SCIF init ERR on %s\n", hca_name);
+			goto err;
+		}
+		hca_ptr->ib_hca_handle = hca_ptr->ib_trans.ib_ctx;
+		goto cm_init;
+	}
+
 	/* Get list of all IB devices, find match, open */
 	dev_list = ibv_get_device_list(&nd);
 	if (!dev_list) {
 		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
 			     " open_hca: ibv_get_device_list() failed\n",
 			     hca_name);
-		return DAT_INTERNAL_ERROR;
+		goto err;
 	}
 	dapl_log(DAPL_DBG_TYPE_UTIL, " open_hca %p: %d devices found\n", hca_ptr, nd);
 	hca_ptr->ib_trans.ib_dev = NULL;
@@ -211,7 +269,7 @@ DAT_RETURN dapls_ib_open_hca(IN IB_HCA_NAME hca_name,
 	if (hca_ptr->ib_trans.ib_dev == NULL) {
 		dapl_log(DAPL_DBG_TYPE_ERR, " open_hca: device %s not found\n", hca_name);
 		dat_status = DAT_PROVIDER_NOT_FOUND;
-		goto err;
+		goto err2;
 	}
 
 	hca_ptr->ib_hca_handle = ibv_open_device(hca_ptr->ib_trans.ib_dev);
@@ -219,7 +277,7 @@ DAT_RETURN dapls_ib_open_hca(IN IB_HCA_NAME hca_name,
 		dapl_log(DAPL_DBG_TYPE_ERR,
 			 " open_hca: dev open failed for %s\n",
 			 ibv_get_device_name(hca_ptr->ib_trans.ib_dev));
-		goto err;
+		goto err2;
 	}
 	hca_ptr->ib_trans.ib_ctx = hca_ptr->ib_hca_handle;
 	dapls_config_verbs(hca_ptr->ib_hca_handle);
@@ -232,11 +290,11 @@ DAT_RETURN dapls_ib_open_hca(IN IB_HCA_NAME hca_name,
 			 ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
 			 strerror(errno));
 		dat_status = DAT_INVALID_ADDRESS;
-		goto bail;
+		goto err2;
 	} else {
 		if (port_attr.state != IBV_PORT_ACTIVE) {
 			dat_status = DAT_INVALID_ADDRESS;
-			goto bail;
+			goto err2;
 		}
 		hca_ptr->ib_trans.addr.lid = htons(port_attr.lid);
 		hca_ptr->ib_trans.lid = htons(port_attr.lid);
@@ -251,58 +309,20 @@ DAT_RETURN dapls_ib_open_hca(IN IB_HCA_NAME hca_name,
 			 ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
 			 strerror(errno));
 		dat_status = DAT_INVALID_ADDRESS;
-		goto bail;
+		goto err2;
 	}
 
-	/* set RC tunables via enviroment or default */
+	/* max inline value when running on MIC is returned from host */
 	if (dapl_ib_inline_data(hca_ptr->ib_hca_handle)) {
 		hca_ptr->ib_trans.ib_cm.max_inline =
 			dapl_os_get_env_val("DAPL_MAX_INLINE",
 					    INLINE_SEND_IB_DEFAULT);
 	}
-	hca_ptr->ib_trans.ib_cm.ack_retry =
-	    dapl_os_get_env_val("DAPL_ACK_RETRY", DCM_ACK_RETRY);
-	hca_ptr->ib_trans.ib_cm.ack_timer =
-	    dapl_os_get_env_val("DAPL_ACK_TIMER", DCM_ACK_TIMER);
-	hca_ptr->ib_trans.ib_cm.rnr_retry =
-	    dapl_os_get_env_val("DAPL_RNR_RETRY", DCM_RNR_RETRY);
-	hca_ptr->ib_trans.ib_cm.rnr_timer =
-	    dapl_os_get_env_val("DAPL_RNR_TIMER", DCM_RNR_TIMER);
-	hca_ptr->ib_trans.ib_cm.global =
-	    dapl_os_get_env_val("DAPL_GLOBAL_ROUTING", DCM_GLOBAL);
-	hca_ptr->ib_trans.ib_cm.hop_limit =
-	    dapl_os_get_env_val("DAPL_HOP_LIMIT", DCM_HOP_LIMIT);
-	hca_ptr->ib_trans.ib_cm.tclass =
-	    dapl_os_get_env_val("DAPL_TCLASS", DCM_TCLASS);
-	hca_ptr->ib_trans.ib_cm.mtu =
-	    dapl_ib_mtu(dapl_os_get_env_val("DAPL_IB_MTU", DCM_IB_MTU));
+	else
+		hca_ptr->ib_trans.ib_cm.max_inline = 0;
 
-	if (dapli_mix_open(&hca_ptr->ib_trans, hca_name,
-			   hca_ptr->port_num, flags & DAPL_OPEN_QUERY)) {
-		dapl_log(DAPL_DBG_TYPE_ERR,
-			 " open_hca: SCIF init ERR for %s\n",
-			 ibv_get_device_name(hca_ptr->ib_trans.ib_dev));
-		goto bail;
-	}
-
-	if (flags & DAPL_OPEN_QUERY)
-		goto done;
-
-	/* initialize CM list, LISTEN, SND queue, PSP array, locks */
-	if ((dapl_os_lock_init(&hca_ptr->ib_trans.lock)) != DAT_SUCCESS)
-		goto bail;
-	
-	if ((dapl_os_lock_init(&hca_ptr->ib_trans.llock)) != DAT_SUCCESS)
-		goto bail;
-	
-	if ((dapl_os_lock_init(&hca_ptr->ib_trans.slock)) != DAT_SUCCESS)
-		goto bail;
-
-	if ((dapl_os_lock_init(&hca_ptr->ib_trans.plock)) != DAT_SUCCESS)
-		goto bail;
-
-	if ((dapl_os_lock_init(&hca_ptr->ib_trans.cqlock)) != DAT_SUCCESS)
-		goto bail;
+	dapl_log(DAPL_DBG_TYPE_UTIL, " open_hca: max inline data set to %d\n",
+		hca_ptr->ib_trans.ib_cm.max_inline);
 
 	/* EVD events without direct CQ channels, CNO support */
 	hca_ptr->ib_trans.ib_cq =
@@ -311,7 +331,7 @@ DAT_RETURN dapls_ib_open_hca(IN IB_HCA_NAME hca_name,
 		dapl_log(DAPL_DBG_TYPE_ERR,
 			 " open_hca: ibv_create_comp_channel ERR %s\n",
 			 strerror(errno));
-		goto bail;
+		goto err2;
 	}
 	dapls_config_comp_channel(hca_ptr->ib_trans.ib_cq);
 
@@ -321,23 +341,29 @@ DAT_RETURN dapls_ib_open_hca(IN IB_HCA_NAME hca_name,
 		dapl_log(DAPL_DBG_TYPE_ERR,
 			 " open_hca: ERR: create_empty_cq = %s\n",
 			 strerror(errno));
-		goto bail;
+		goto err2;
 	}
 
-	/* initialize CM and listen lists on this HCA uCM QP */
-	dapl_llist_init_head(&hca_ptr->ib_trans.list);
-	dapl_llist_init_head(&hca_ptr->ib_trans.llist);
-	dapl_llist_init_head(&hca_ptr->ib_trans.cqlist);
+	if (dapli_mix_open(&hca_ptr->ib_trans, hca_name,
+			   hca_ptr->port_num, flags & DAPL_OPEN_QUERY)) {
+		dapl_log(DAPL_DBG_TYPE_ERR,
+			 " open_hca: SCIF init ERR for %s\n", hca_name);
+		goto err2;
+	}
 
-	/* create uCM qp services */
+cm_init:
+	if (flags & DAPL_OPEN_QUERY)
+		goto done;
+
+	/* create MCM qp services */
 	if (mcm_service_create(hca_ptr))
-		goto bail;
+		goto err2;
 
 	if (create_os_signal(hca_ptr)) {
 		dapl_log(DAPL_DBG_TYPE_ERR,
 			 " open_hca: failed to init cr pipe - %s\n",
 			 strerror(errno));
-		goto bail;
+		goto err2;
 	}
 
 	/* create thread to process inbound connect request */
@@ -348,13 +374,12 @@ DAT_RETURN dapls_ib_open_hca(IN IB_HCA_NAME hca_name,
 	if (dat_status != DAT_SUCCESS) {
 		dapl_log(DAPL_DBG_TYPE_ERR,
 			 " open_hca: failed to create thread\n");
-		goto bail;
+		goto err2;
 	}
 
 	dapl_log(DAPL_DBG_TYPE_UTIL,
-		     " open_hca: MCM devname %s port %d, dev_IP %s ep_map %s\n",
-		     ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
-		     hca_ptr->port_num, 
+		     " open_hca: devname %s port %d, dev_IP %s ep_map %s\n",
+		     hca_name, hca_ptr->port_num,
 		     inet_ntoa(((struct sockaddr_in *)
 			       &hca_ptr->hca_address)->sin_addr),
 		     mcm_map_str(hca_ptr->ib_trans.addr.ep_map));
@@ -364,33 +389,39 @@ DAT_RETURN dapls_ib_open_hca(IN IB_HCA_NAME hca_name,
 		dapl_os_sleep_usec(1000);
 
 done:
+	if (dev_list)
+		ibv_free_device_list(dev_list);
+
 	/* save LID, GID, QPN, PORT address information, for ia_queries */
 	/* Set AF_INET6 to insure callee address storage of 28 bytes */
 	hca_ptr->ib_trans.hca = hca_ptr;
 	hca_ptr->ib_trans.addr.family = AF_INET6;
 	hca_ptr->ib_trans.addr.qp_type = IBV_QPT_UD;
-	memcpy(&hca_ptr->hca_address, 
-	       &hca_ptr->ib_trans.addr, 
+	memcpy(&hca_ptr->hca_address,
+	       &hca_ptr->ib_trans.addr,
 	       sizeof(struct dat_mcm_addr));
 
-	dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
+	dapl_log(DAPL_DBG_TYPE_UTIL,
 		 "%s open: dev %s port %d, GID %s, LID %x qpn %x sl %d %s\n",
 		 PROVIDER_NAME, hca_name, hca_ptr->port_num,
 		 inet_ntop(AF_INET6, &mcm_ia->gid, gid_str, sizeof(gid_str)),
 		 ntohs(mcm_ia->lid), ntohl(mcm_ia->qpn),
 		 mcm_ia->sl, mcm_map_str(mcm_ia->ep_map));
 
-	ibv_free_device_list(dev_list);
 	return DAT_SUCCESS;
-bail:
+
+err2:
 	mcm_service_destroy(hca_ptr);
-	ibv_close_device(hca_ptr->ib_hca_handle);
+	if (dev_list) {
+		if (hca_ptr->ib_hca_handle)
+			ibv_close_device(hca_ptr->ib_hca_handle);
+		ibv_free_device_list(dev_list);
+	}
+err:
 	hca_ptr->ib_hca_handle = IB_INVALID_HANDLE;
 	hca_ptr->ib_trans.ib_ctx = NULL;
 	hca_ptr->ib_trans.ib_dev = NULL;
-
-err:
-	ibv_free_device_list(dev_list);
+	hca_ptr->ib_trans.hca = NULL;
 	return dat_status;
 }
 
@@ -447,8 +478,10 @@ done:
 	}
 
 	if (hca_ptr->ib_hca_handle != IB_INVALID_HANDLE) {
-		if (ibv_close_device(hca_ptr->ib_hca_handle))
-			return (dapl_convert_errno(errno, "ib_close_device"));
+		if (!MFO_EP(&hca_ptr->ib_trans.addr) || hca_ptr->ib_trans.self.node == 0) {
+			if (ibv_close_device(hca_ptr->ib_hca_handle))
+				return (dapl_convert_errno(errno, "ib_close_device"));
+		}
 		hca_ptr->ib_hca_handle = IB_INVALID_HANDLE;
 	}
 
@@ -506,10 +539,10 @@ static void mcm_service_destroy(IN DAPL_HCA *hca)
 
 static int mcm_service_create(IN DAPL_HCA *hca)
 {
-        struct ibv_qp_init_attr qp_create;
+	struct ibv_qp_init_attr qp_create;
 	ib_hca_transport_t *tp = &hca->ib_trans;
 	struct ibv_recv_wr recv_wr, *recv_err;
-        struct ibv_sge sge;
+	struct ibv_sge sge;
 	int i, mlen = sizeof(dat_mcm_msg_t);
 	int hlen = sizeof(struct ibv_grh); /* hdr included with UD recv */
 	char *rbuf;
@@ -530,12 +563,12 @@ static int mcm_service_create(IN DAPL_HCA *hca)
 		return 0;
 
 	tp->pd = ibv_alloc_pd(hca->ib_hca_handle);
-        if (!tp->pd) 
-                goto bail;
-        
-        dapl_log(DAPL_DBG_TYPE_UTIL,
-                 " create_service: pd %p ctx %p handle 0x%x\n",
-                  tp->pd, tp->pd->context, tp->pd->handle);
+	if (!tp->pd) 
+		goto bail;
+	
+	dapl_log(DAPL_DBG_TYPE_UTIL,
+		 " create_service: pd %p ctx %p handle 0x%x\n",
+		  tp->pd, tp->pd->context, tp->pd->handle);
 
     	tp->rch = ibv_create_comp_channel(hca->ib_hca_handle);
 	if (!tp->rch) 
@@ -546,7 +579,7 @@ static int mcm_service_create(IN DAPL_HCA *hca)
 	tp->scq = ibv_create_cq(hca->ib_hca_handle, tp->cqe, hca, NULL, 0);
 	if (!tp->scq) 
 		goto bail;
-        
+	
 	tp->rcq = ibv_create_cq(hca->ib_hca_handle, tp->cqe, hca, tp->rch, 0);
 	if (!tp->rcq) 
 		goto bail;
@@ -565,7 +598,7 @@ static int mcm_service_create(IN DAPL_HCA *hca)
 
 	tp->qp = ibv_create_qp(tp->pd, &qp_create);
 	if (!tp->qp) 
-                goto bail;
+		goto bail;
 
 	tp->ah = (ib_ah_handle_t*) dapl_os_alloc(sizeof(ib_ah_handle_t) * 0xffff);
 	tp->sid = (uint8_t*) dapl_os_alloc(sizeof(uint8_t) * 0xffff);
@@ -616,7 +649,7 @@ static int mcm_service_create(IN DAPL_HCA *hca)
 
 	/* save qp_num as part of ia_address, network order */
 	tp->addr.qpn = htonl(tp->qp->qp_num);
-        return 0;
+	return 0;
 bail:
 	dapl_log(DAPL_DBG_TYPE_ERR,
 		 " ucm_create_services: ERR %s\n", strerror(errno));
diff --git a/dapl/openib_mcm/linux/openib_osd.h b/dapl/openib_mcm/linux/openib_osd.h
old mode 100644
new mode 100755
diff --git a/dapl/openib_mcm/mix.c b/dapl/openib_mcm/mix.c
old mode 100644
new mode 100755
index a97dfe5..970b372
--- a/dapl/openib_mcm/mix.c
+++ b/dapl/openib_mcm/mix.c
@@ -35,8 +35,42 @@
 #include "dapl_osd.h"
 
 /*
- * CM proxy services, MCM on MIC to MPXYD via SCIF
- *
+ * MCM Provider Proxy services, MIC to MPXYD via SCIF or HST with SCIF
+ */
+
+/*
+ * MIX_IA_MODE
+ */
+
+int dapli_mix_mode(ib_hca_transport_t *tp, char *name)
+{
+	int ret, mfo_dev, mfo_mode;
+
+	mfo_mode = dapl_os_get_env_val("DAPL_MCM_MFO", 0); /* Force MIC Full Offload */
+
+	ret = scif_get_nodeIDs(NULL, 0, &tp->self.node);
+	if (ret < 0) {
+		dapl_log(1, " scif_get_nodeIDs() failed with error %s\n", strerror(errno));
+		return -1;
+	}
+	dapl_log(DAPL_DBG_TYPE_EXTENSION,
+		 " SCIF node_id: %d client req_id 0x%x, %s\n",
+		 (uint16_t)tp->self.node, dapl_os_getpid(), name);
+
+	if (tp->self.node == 0) {
+		tp->addr.ep_map = HOST_SOCK_DEV;  /* non-MIC mapping */
+		return 0;
+	}
+
+	/*  MIC node: "qib" device requires full offload */
+	mfo_dev = !dapl_os_pstrcmp("qib", name);
+	if (mfo_mode || mfo_dev) {
+		tp->addr.ep_map = MIC_FULL_DEV; /* MIC with full proxy offload, no direct verbs */
+	}
+	return 0;
+}
+
+/*
  * MIX_IA_OPEN
  */
 int dapli_mix_open(ib_hca_transport_t *tp, char *name, int port, int query_only)
@@ -52,24 +86,17 @@ int dapli_mix_open(ib_hca_transport_t *tp, char *name, int port, int query_only)
 	always_proxy = dapl_os_get_env_val("DAPL_MCM_ALWAYS_PROXY", 0);
 	scif_port_id = dapl_os_get_env_val("DAPL_MCM_PORT_ID", SCIF_OFED_PORT_8);
 
-	ret = scif_get_nodeIDs(NULL, 0, &tp->self.node);
-	if (ret < 0) {
-		dapl_log(1, " scif_get_nodeIDs() failed with error %s\n", strerror(errno));
-		return -1;
-	}
-	dapl_log(DAPL_DBG_TYPE_EXTENSION,
-		 " SCIF node_id: %d client req_id 0x%x\n",
-		 (uint16_t)tp->self.node, dapl_os_getpid());
-
-	if (tp->self.node == 0)
-		tp->addr.ep_map = HOST_SOCK_DEV;  /* non-MIC mapping */
-
-	if (query_only || (tp->self.node == 0 && !always_proxy)){
-		dapl_log(DAPL_DBG_TYPE_EXTENSION," Not running on MIC, no MPXY connect required\n");
+	if ((query_only && !MFO_EP(&tp->addr)) ||
+	    (tp->self.node == 0 && !always_proxy)) {
+		dapl_log(DAPL_DBG_TYPE_EXTENSION,
+			" %s, no MPXYD connect required\n",
+			query_only ? "Query only,":"Host node,");
 		tp->scif_ep = 0;
 		return 0;
 	}
-	dapl_log(DAPL_DBG_TYPE_EXTENSION," Running on MIC, MPXY connect required\n");
+	dapl_log(DAPL_DBG_TYPE_EXTENSION,
+		 " Running on MIC at %s ep_map, MPXY connect required\n",
+		 mcm_map_str(tp->addr.ep_map));
 
 	/* Create an endpoint for MPXYD to connect back */
 	listen_ep = scif_open();
@@ -159,6 +186,9 @@ int dapli_mix_open(ib_hca_transport_t *tp, char *name, int port, int query_only)
 	msg.dev_attr.pkey_idx = tp->ib_cm.pkey_idx;
 	msg.dev_attr.pkey = tp->ib_cm.pkey;
 	msg.dev_attr.max_inline = tp->ib_cm.max_inline;
+	msg.dev_addr.ep_map = tp->addr.ep_map;
+
+	memcpy(&msg.dev_addr, &tp->addr, sizeof(dat_mcm_addr_t));
 
 	len = sizeof(dat_mix_open_t);
 	ret = scif_send(tp->scif_ep, &msg, len, SCIF_SEND_BLOCK);
@@ -199,19 +229,22 @@ int dapli_mix_open(ib_hca_transport_t *tp, char *name, int port, int query_only)
 	tp->ib_cm.hop_limit = msg.dev_attr.hop_limit;
 	tp->ib_cm.tclass = msg.dev_attr.tclass;
 	tp->ib_cm.sl = msg.dev_attr.sl;
-	tp->ib_cm.mtu = msg.dev_attr.mtu;
 	tp->ib_cm.rd_atom_in = msg.dev_attr.rd_atom_in;
 	tp->ib_cm.rd_atom_out = msg.dev_attr.rd_atom_out;
 	tp->ib_cm.pkey_idx = msg.dev_attr.pkey_idx;
 	tp->ib_cm.pkey = msg.dev_attr.pkey;
 	tp->ib_cm.max_inline = msg.dev_attr.max_inline;
-
 	tp->dev_id = msg.hdr.req_id;
 
+	if (MFO_EP(&tp->addr))
+		/* We do not use this var in MFO, but use it as a flag to signal success */
+		tp->ib_ctx = (struct ibv_context *)0xdeadbeef;
+
 	dapl_log(DAPL_DBG_TYPE_EXTENSION,
-		 " mix_open reply (msg %p, ln %d) EPs %d %d %d - dev_id %d\n",
+		 " mix_open reply (msg %p, ln %d) EPs %d %d %d - dev_id %d lid 0x%x\n",
 		 &msg, len, tp->scif_ep, tp->scif_ev_ep,
-		 tp->scif_tx_ep, tp->dev_id);
+		 tp->scif_tx_ep, tp->dev_id, ntohs(tp->addr.lid));
+
 	return 0;
 }
 
@@ -236,6 +269,99 @@ void dapli_mix_close(ib_hca_transport_t *tp)
 	}
 }
 
+/* MIX device ATTR */
+int dapli_mix_query_device(ib_hca_transport_t *tp, struct ibv_device_attr *dev_attr)
+{
+	dat_mix_device_attr_t msg;
+	scif_epd_t mix_ep = tp->scif_ep;
+	int ret, len;
+
+	if (!mix_ep)
+		return 0;
+
+	dapl_log(DAPL_DBG_TYPE_EXTENSION, " MIX_QUERY_DEVICE_ATTR tp = %p\n", tp);
+
+	/* get attr request */
+	msg.hdr.ver = DAT_MIX_VER;
+	msg.hdr.op = MIX_QUERY_DEVICE;
+	msg.hdr.status = 0;
+	msg.hdr.flags = MIX_OP_REQ;
+
+	len = sizeof(dat_mix_hdr_t);
+	ret = scif_send(mix_ep, &msg, len, SCIF_SEND_BLOCK);
+	if (ret != len) {
+		dapl_log(1, " ERR: %s msg %p send on %d, ret %d, exp %d, error %s\n",
+			 mix_op_str(msg.hdr.op), &msg, mix_ep, ret, len, strerror(errno));
+		return -1;
+	}
+	dapl_log(DAPL_DBG_TYPE_EXTENSION," Sent %s request on SCIF EP %d\n", mix_op_str(msg.hdr.op), mix_ep);
+
+	/* get device attr response */
+	len = sizeof(dat_mix_device_attr_t);
+	ret = scif_recv(mix_ep, &msg, len, SCIF_RECV_BLOCK);
+	if (ret != len) {
+		dapl_log(1, " ERR: rcv on new_ep %d, ret %d, exp %d, error %s\n", mix_ep, ret, len, strerror(errno));
+		return -1;
+	}
+	dapl_log(DAPL_DBG_TYPE_EXTENSION," Recv'd %s reply on SCIF EP %d for dev_id %d\n",
+		 mix_op_str(msg.hdr.op), mix_ep, msg.hdr.req_id);
+
+	if (msg.hdr.ver != DAT_MIX_VER || msg.hdr.op != MIX_QUERY_DEVICE ||
+	    msg.hdr.flags != MIX_OP_RSP || msg.hdr.status != MIX_SUCCESS) {
+		dapl_log(1, " ERR: MIX_QUERY_DEVICE ver %d, op %s, flgs %d, st %d dev_id %d\n",
+			     msg.hdr.ver, mix_op_str(msg.hdr.op),
+			     msg.hdr.flags, msg.hdr.status, msg.hdr.req_id);
+		if (msg.hdr.status != MIX_SUCCESS)
+			return msg.hdr.status;
+		else
+			return -1;
+	}
+
+	strncpy(dev_attr->fw_ver, msg.fw_ver, sizeof(dev_attr->fw_ver));
+	dev_attr->node_guid = msg.node_guid;
+	dev_attr->sys_image_guid = msg.sys_image_guid;
+	dev_attr->max_mr_size = msg.max_mr_size;
+	dev_attr->page_size_cap = msg.page_size_cap;
+	dev_attr->vendor_id = msg.vendor_id;
+	dev_attr->vendor_part_id = msg.vendor_part_id;
+	dev_attr->hw_ver = msg.hw_ver;
+	dev_attr->max_qp = msg.max_qp;
+	dev_attr->max_qp_wr = msg.max_qp_wr;
+	dev_attr->device_cap_flags = msg.device_cap_flags;
+	dev_attr->max_sge = msg.max_sge;
+	dev_attr->max_sge_rd = msg.max_sge_rd;
+	dev_attr->max_cq = msg.max_cq;
+	dev_attr->max_cqe = msg.max_cqe;
+	dev_attr->max_mr = msg.max_mr;
+	dev_attr->max_pd = msg.max_pd;
+	dev_attr->max_qp_rd_atom = msg.max_qp_rd_atom;
+	dev_attr->max_ee_rd_atom = msg.max_ee_rd_atom;
+	dev_attr->max_res_rd_atom = msg.max_ee_rd_atom;
+	dev_attr->max_qp_init_rd_atom = msg.max_qp_init_rd_atom;
+	dev_attr->max_ee_init_rd_atom = msg.max_ee_init_rd_atom;
+	dev_attr->atomic_cap = msg.atomic_cap;
+	dev_attr->max_ee = msg.max_ee;
+	dev_attr->max_rdd = msg.max_rdd;
+	dev_attr->max_mw = msg.max_mw;
+	dev_attr->max_raw_ipv6_qp = msg.max_raw_ipv6_qp;
+	dev_attr->max_raw_ethy_qp = msg.max_raw_ethy_qp;
+	dev_attr->max_mcast_grp = msg.max_mcast_grp;
+	dev_attr->max_mcast_qp_attach = msg.max_mcast_qp_attach;
+	dev_attr->max_total_mcast_qp_attach = msg.max_total_mcast_qp_attach;
+	dev_attr->max_ah = msg.max_ah;
+	dev_attr->max_fmr = msg.max_fmr;
+	dev_attr->max_map_per_fmr = msg.max_map_per_fmr;
+	dev_attr->max_srq = msg.max_srq;
+	dev_attr->max_srq_wr = msg.max_srq_wr;
+	dev_attr->max_srq_sge = msg.max_srq_sge;
+	dev_attr->max_pkeys = msg.max_pkeys;
+	dev_attr->local_ca_ack_delay = msg.local_ca_ack_delay;
+	dev_attr->phys_port_cnt = msg.phys_port_cnt;
+
+	dapl_log(DAPL_DBG_TYPE_EXTENSION," MIX_QUERY_DEVICE successful on SCIF EP %d\n", mix_ep);
+	return 0;
+}
+
 /* MIX_PROV_ATTR */
 int dapli_mix_get_attr(ib_hca_transport_t *tp, dat_mix_prov_attr_t *pr_attr)
 {
@@ -243,7 +369,10 @@ int dapli_mix_get_attr(ib_hca_transport_t *tp, dat_mix_prov_attr_t *pr_attr)
 	scif_epd_t mix_ep = tp->scif_ep;
 	int ret, len;
 
-	dapl_log(DAPL_DBG_TYPE_EXTENSION, " MIX_GET_ATTR tp = %p\n", tp);
+	if (!mix_ep)
+		return 0;
+
+	dapl_log(DAPL_DBG_TYPE_EXTENSION, " MIX_QUERY_PROV_ATTR tp = %p\n", tp);
 
 	/* get attr request */
 	msg.hdr.ver = DAT_MIX_VER;
@@ -283,6 +412,12 @@ int dapli_mix_get_attr(ib_hca_transport_t *tp, dat_mix_prov_attr_t *pr_attr)
 
 	memcpy(pr_attr, &msg.attr, sizeof(dat_mix_prov_attr_t));
 
+	/* update local TP CM attributes */
+	tp->retries = pr_attr->cm_retry;
+	tp->rep_time = pr_attr->cm_rep_time_ms;
+	tp->rtu_time = pr_attr->cm_rtu_time_ms;
+	tp->cm_timer = DAPL_MIN(tp->rep_time, tp->rtu_time);
+
 	dapl_log(DAPL_DBG_TYPE_EXTENSION," MIX_PROV_ATTR successful on SCIF EP %d\n", mix_ep);
 	return 0;
 }
@@ -390,9 +525,16 @@ int dapli_mix_mr_create(ib_hca_transport_t *tp, DAPL_LMR * lmr)
 	scif_epd_t mix_ep = tp->scif_ep;
 	int ret, len;
 
-	dapl_log(DAPL_DBG_TYPE_EXTENSION," lmr create %p, addr %p %p rmr_context %x mr->rkey %x\n",
-		 lmr, lmr->mr_handle->addr, lmr->param.registered_address,
-		 lmr->param.rmr_context, lmr->mr_handle->rkey );
+	dapl_log(DAPL_DBG_TYPE_EXTENSION," lmr create %p, addr %p rmr_context %x\n",
+		 lmr, lmr->param.registered_address, lmr->param.rmr_context);
+
+	if (MFO_EP(&tp->addr)) {
+		lmr->mr_handle = (ib_mr_handle_t) dapl_os_alloc (sizeof(struct ibv_mr));
+		if (NULL == lmr->mr_handle) {
+			dapl_log(1, " ERR: Could not allocat mr_hadle\n");
+			return -1;
+		}
+	}
 
 	/* request: */
 	msg.hdr.ver = DAT_MIX_VER;
@@ -404,7 +546,10 @@ int dapli_mix_mr_create(ib_hca_transport_t *tp, DAPL_LMR * lmr)
 	msg.mr_len = lmr->param.registered_size;
 	msg.sci_addr = lmr->sci_addr;
 	msg.sci_off = lmr->sci_off;
-	msg.ib_addr = (uint64_t) lmr->mr_handle->addr;
+	if (MFO_EP(&tp->addr))
+		msg.ib_addr = (uint64_t) lmr->param.registered_address;
+	else
+		msg.ib_addr = (uint64_t) lmr->mr_handle->addr;
 	msg.ib_rkey = lmr->param.rmr_context;
 	msg.ctx = (uint64_t)lmr;
 
@@ -476,6 +621,10 @@ int dapli_mix_mr_free(ib_hca_transport_t *tp, DAPL_LMR * lmr)
 			 msg.hdr.ver, msg.hdr.op, msg.hdr.flags, msg.hdr.status);
 		return -1;
 	}
+	if (MFO_EP(&tp->addr) && lmr->mr_handle) {
+		dapl_os_free(lmr->mr_handle, sizeof(struct ibv_mr));
+		lmr->mr_handle = IB_INVALID_HANDLE;
+	}
 	dapl_log(DAPL_DBG_TYPE_EXTENSION," removed lmr %p, id %d\n", lmr, lmr->mr_id);
 	return 0;
 }
@@ -705,6 +854,118 @@ int dapli_mix_cq_poll(ib_cq_handle_t m_cq, struct ibv_wc *wc)
 	return 0;
 }
 
+/*  MIX_PZ_CREATE */
+int dapli_mix_pz_create(DAPL_IA * ia_ptr, DAPL_PZ *m_pz)
+{
+	dat_mix_pz_t msg;
+	scif_epd_t mix_ep = ia_ptr->hca_ptr->ib_trans.scif_ep;
+	int ret, len;
+
+	m_pz->pd_handle = IB_INVALID_HANDLE;
+
+	/* request: QP_r local, QP_t shadowed */
+	msg.hdr.ver = DAT_MIX_VER;
+	msg.hdr.op = MIX_PZ_CREATE;
+	msg.hdr.status = 0;
+	msg.hdr.flags = MIX_OP_REQ;
+	msg.ctx = (uint64_t)m_pz;
+	msg.ib_pd = 0;
+
+	len = sizeof(dat_mix_pz_t);
+	ret = scif_send(mix_ep, &msg, len, SCIF_SEND_BLOCK);
+	if (ret != len) {
+		dapl_log(1, " ERR: %s snd on %d, ret %d, exp %d, err %s\n",
+			 mix_op_str(msg.hdr.op), mix_ep, ret, len,
+			 strerror(errno));
+		return -1;
+	}
+	dapl_log(DAPL_DBG_TYPE_EXTENSION," Sent %s request on SCIF EP\n",
+		 mix_op_str(msg.hdr.op));
+
+	/* wait for response */
+	msg.ctx = 0;
+	ret = scif_recv(mix_ep, &msg, len, SCIF_RECV_BLOCK);
+	if (ret != len) {
+		dapl_log(1, " ERR: rcv on ep %d, ret %d, exp %d, err %s\n",
+			    mix_ep, ret, len, strerror(errno));
+		return -1;
+	}
+	if (msg.hdr.ver != DAT_MIX_VER || msg.hdr.op != MIX_PZ_CREATE ||
+	    msg.hdr.flags != MIX_OP_RSP || msg.hdr.status != MIX_SUCCESS) {
+		dapl_log(1, " ERR: %s %p ver %d, op %d, flags %d, stat %d\n",
+			    mix_op_str(msg.hdr.op), m_pz, msg.hdr.ver,
+			    msg.hdr.op, msg.hdr.flags, msg.hdr.status);
+		return -1;
+	}
+
+	if (msg.ctx != (uint64_t)m_pz) {
+		dapl_log(1, " ERR: response ctx (0x%x) != sent one (0x%x)\n",
+				msg.ctx, (uint64_t)m_pz);
+		return -1;
+	}
+
+	/* save id from proxy PZ create */
+	m_pz->pd_handle = (ib_pd_handle_t)msg.ib_pd;
+
+	dapl_log(DAPL_DBG_TYPE_EXTENSION,
+		 " MIX_PZ_CREATE: pz %p, reply, proxy IB_PD %p\n",
+		 m_pz, m_pz->pd_handle);
+	return 0;
+}
+
+/* MIX_CQ_FREE, fits in header */
+int dapli_mix_pz_free(DAPL_PZ *m_pz)
+{
+	dat_mix_pz_t msg;
+	DAPL_IA * ia_ptr = m_pz->header.owner_ia;
+	scif_epd_t mix_ep = ia_ptr->hca_ptr->ib_trans.scif_ep;
+	int ret, len;
+
+	dapl_log(DAPL_DBG_TYPE_EXTENSION,
+		 " MIX_PZ_FREE: pz %p, send, proxy IB_PD %p\n",
+		 m_pz, m_pz->pd_handle);
+
+	/* request */
+	msg.hdr.ver = DAT_MIX_VER;
+	msg.hdr.op = MIX_PZ_FREE;
+	msg.hdr.status = 0;
+	msg.hdr.flags = MIX_OP_REQ;
+	msg.ctx = (uint64_t)m_pz;
+	msg.ib_pd = (uint64_t)m_pz->pd_handle;
+
+	len = sizeof(dat_mix_pz_t);
+	ret = scif_send(mix_ep, &msg, len, SCIF_SEND_BLOCK);
+	if (ret != len) {
+		dapl_log(1, " ERR: %s send on %d, ret %d, exp %d, error %s\n",
+			 mix_op_str(msg.hdr.op), mix_ep, ret, len, strerror(errno));
+	}
+
+	dapl_log(DAPL_DBG_TYPE_EXTENSION," Sent %s request on SCIF EP\n",
+		 mix_op_str(msg.hdr.op));
+
+	/* response */
+	ret = scif_recv(mix_ep, &msg, len, SCIF_RECV_BLOCK);
+	if (ret != len) {
+		dapl_log(1, " ERR: rcv on new_ep %d, ret %d, exp %d, error %s\n",
+			    mix_ep, ret, len, strerror(errno));
+		return -1;
+	}
+	if (msg.hdr.ver != DAT_MIX_VER || msg.hdr.op != MIX_PZ_FREE ||
+	    msg.hdr.flags != MIX_OP_RSP || msg.hdr.status != MIX_SUCCESS) {
+		dapl_log(1, " MIX_CQ_FREE ERR: ver %d, op %d, flags %d, or stat %d ln %d\n",
+			 msg.hdr.ver, msg.hdr.op, msg.hdr.flags, msg.hdr.status, ret);
+		return -1;
+	}
+
+	dapl_log(DAPL_DBG_TYPE_EXTENSION,
+		 " MIX_PZ_FREE: reply, proxy IB_PD 0x%x\n", msg.ib_pd);
+
+	m_pz->pd_handle = 0;
+
+	return 0;
+}
+
+
 /* SCIF DMA outbound writes and inbound msg receives; translate to scif_off via LMR */
 /* TODO: faster translation for post_send? */
 static inline int mix_proxy_data(ib_qp_handle_t m_qp, dat_mix_sr_t *msg, struct ibv_sge *sglist, int txlen, int mix_ep)
@@ -1152,9 +1413,11 @@ int dapli_mix_dto_event_in(ib_hca_transport_t *tp, scif_epd_t scif_ep, dat_mix_d
 			}
 			pmsg->wc[i].byte_len = cookie->val.dto.size;
 			dapl_log(DAPL_DBG_TYPE_EP,
-				 " mix_dto_event: MCM evd %p ep %p wr_id=%Lx ln=%d\n",
+				 " mix_dto_event: MCM evd %p ep %p wr_id=%Lx"
+				 " ln=%d op %d flgs %d\n",
 				 m_cq->evd, cookie->ep, pmsg->wc[i].wr_id,
-				 cookie->val.dto.size);
+				 cookie->val.dto.size, pmsg->wc[i].opcode,
+				 pmsg->wc[i].wc_flags);
 		}
 		mcm_const_ib_wc(&ib_wc, &pmsg->wc[i], 1);
 		dapl_os_lock(&m_cq->evd->header.lock);
@@ -1363,14 +1626,73 @@ int dapli_mix_recv(DAPL_HCA *hca, int scif_ep)
 	return ret;
 }
 
+int dapli_mix_query_port(ib_hca_transport_t *tp, unsigned long port_num, struct ibv_port_attr *port_attr)
+{
+	dat_mix_port_attr_t msg;
+	scif_epd_t mix_ep = tp->scif_ep;
+	int ret, len;
 
+	ret = scif_get_nodeIDs(NULL, 0, &tp->self.node);
+	if (ret < 0) {
+		dapl_log(1, " scif_get_nodeIDs() failed with error %s\n", strerror(errno));
+		return -1;
+	}
 
+	/* request: QP_r local, QP_t shadowed */
+	msg.hdr.ver = DAT_MIX_VER;
+	msg.hdr.op = MIX_QUERY_PORT;
+	msg.hdr.status = 0;
+	msg.hdr.flags = MIX_OP_REQ;
+	msg.hdr.req_id = port_num;
 
+	len = sizeof(dat_mix_hdr_t);
+	ret = scif_send(mix_ep, &msg, len, SCIF_SEND_BLOCK);
+	if (ret != len) {
+		dapl_log(1, " ERR: %s snd on %d, ret %d, exp %d, err %s\n",
+			 mix_op_str(msg.hdr.op), mix_ep, ret, len,
+			 strerror(errno));
+		return -1;
+	}
+	dapl_log(DAPL_DBG_TYPE_EXTENSION," Sent %s request on SCIF EP\n",
+		 mix_op_str(msg.hdr.op));
 
+	/* wait for response */
+	len = sizeof(dat_mix_port_attr_t);
+	ret = scif_recv(mix_ep, &msg, len, SCIF_RECV_BLOCK);
+	if (ret != len) {
+		dapl_log(1, " ERR: rcv on ep %d, ret %d, exp %d, err %s\n",
+			    mix_ep, ret, len, strerror(errno));
+		return -1;
+	}
+	if (msg.hdr.ver != DAT_MIX_VER || msg.hdr.op != MIX_QUERY_PORT ||
+	    msg.hdr.flags != MIX_OP_RSP || msg.hdr.status != MIX_SUCCESS) {
+		dapl_log(1, " ERR: %s ver %d, op %d, flags %d, stat %d\n",
+			    mix_op_str(msg.hdr.op), msg.hdr.ver,
+			    msg.hdr.op, msg.hdr.flags, msg.hdr.status);
+		return -1;
+	}
 
-
-
-
-
-
-
+	port_attr->gid_tbl_len = msg.gid_tbl_len;
+	port_attr->port_cap_flags = msg.port_cap_flags;
+	port_attr->max_msg_sz = msg.max_msg_sz;
+	port_attr->bad_pkey_cntr = msg.bad_pkey_cntr;
+	port_attr->qkey_viol_cntr = msg.qkey_viol_cntr;
+	port_attr->pkey_tbl_len = msg.pkey_tbl_len;
+	port_attr->lid = msg.lid;
+	port_attr->sm_lid = msg.sm_lid;
+	port_attr->lmc = msg.lmc;
+	port_attr->max_vl_num = msg.max_vl_num;
+	port_attr->sm_sl = msg.sm_sl;
+	port_attr->subnet_timeout = msg.subnet_timeout;
+	port_attr->init_type_reply = msg.init_type_reply;
+	port_attr->active_width = msg.active_width;
+	port_attr->active_speed = msg.active_speed;
+	port_attr->phys_state = msg.phys_state;
+	port_attr->link_layer = msg.link_layer;
+	port_attr->state = msg.state;
+	port_attr->max_mtu = msg.max_mtu;
+	port_attr->active_mtu = msg.active_mtu;
+
+	dapl_log(DAPL_DBG_TYPE_EXTENSION," MIX_QUERY_PORT successful on SCIF EP %d\n", mix_ep);
+	return 0;
+}
diff --git a/dapl/openib_mcm/proxy.c b/dapl/openib_mcm/proxy.c
old mode 100644
new mode 100755
index cb06161..824f575
--- a/dapl/openib_mcm/proxy.c
+++ b/dapl/openib_mcm/proxy.c
@@ -162,10 +162,6 @@ int mcm_send_pi(struct dcm_ib_qp *m_qp,
 				(uint64_t)(uintptr_t)
 				((struct mcm_wr_rx *) (m_qp->wrc_rem.wr_addr + (m_qp->wrc_rem.wr_sz * wr_idx)));
 
-			sge.addr = (uint64_t)(uintptr_t) &m_wr_rx;
-			sge.length = (uint32_t) sizeof(struct mcm_wr_rx); /* 160 byte WR */
-			sge.lkey = 0; /* inline doesn't need registered */
-
 			dapl_log(DAPL_DBG_TYPE_EP,
 				 " mcm_send_pi[%d]: WR_RX wr_id %Lx qn %x op %d flgs 0x%x"
 				 " imm %x laddr %p raddr %p rkey %x wr_ln %d ln %d\n",
@@ -186,7 +182,7 @@ int mcm_send_pi(struct dcm_ib_qp *m_qp,
 				dapl_log(DAPL_DBG_TYPE_ERR,
 					" mcm_send_pi ERR: m_wr %p idx %d laddr=%p ln=%d lkey=%x flgs %x"
 					" tl %d hd %d\n",
-					m_wr_rx, wr_idx, wr->sg_list[0].addr,
+					wr_rx_ptr, wr_idx, wr->sg_list[0].addr,
 					wr->sg_list[0].length, wr->sg_list[0].lkey,
 					wr_rx_ptr->flags, m_qp->wr_tl, m_qp->wr_hd);
 				dapl_log(DAPL_DBG_TYPE_ERR,
@@ -218,7 +214,7 @@ static inline void mcm_dto_req(struct dcm_ib_cq *m_cq, struct ibv_wc *wc)
 	cookie = (DAPL_COOKIE *)(uintptr_t)wc->wr_id;
 	m_qp = cookie->ep->qp_handle;
 
-	if (!m_qp->tp->scif_ep && MXS_EP(m_qp) &&
+	if (!m_qp->tp->scif_ep && MXF_EP(m_qp) &&
 	    (wc->opcode == (uint32_t)IBV_WR_RDMA_WRITE_WITH_IMM)) {
 		dapl_log(DAPL_DBG_TYPE_EP,
 			 " mcm_dto_req: RW_imm -> WR, wr_id %Lx\n", wc->wr_id);
diff --git a/dapl/openib_scm/cm.c b/dapl/openib_scm/cm.c
old mode 100644
new mode 100755
diff --git a/dapl/openib_scm/dapl_ib_util.h b/dapl/openib_scm/dapl_ib_util.h
old mode 100644
new mode 100755
diff --git a/dapl/openib_scm/device.c b/dapl/openib_scm/device.c
old mode 100644
new mode 100755
diff --git a/dapl/openib_scm/linux/openib_osd.h b/dapl/openib_scm/linux/openib_osd.h
old mode 100644
new mode 100755
diff --git a/dapl/openib_ucm/cm.c b/dapl/openib_ucm/cm.c
old mode 100644
new mode 100755
diff --git a/dapl/openib_ucm/dapl_ib_util.h b/dapl/openib_ucm/dapl_ib_util.h
old mode 100644
new mode 100755
index ece9c88..ac74bab
--- a/dapl/openib_ucm/dapl_ib_util.h
+++ b/dapl/openib_ucm/dapl_ib_util.h
@@ -57,6 +57,10 @@
 #define UCM_ARRAY_IDX(idx, abits) (idx >> abits)
 #define UCM_ENTRY_IDX(idx, abits) (idx & (abits - 1))
 
+/* Default UCM timers set for small cluster */
+#define UCM_DEF_NODES 32
+#define UCM_DEF_PPN   32
+#define UCM_DEF_THRESHOLD (UCM_DEF_NODES * UCM_DEF_PPN)
 
 /* DAPL CM objects MUST include list_entry, ref_count, event for EP linking */
 struct ib_cm_handle
@@ -125,6 +129,9 @@ typedef struct _ib_hca_transport
 	int			wait_time;
 	int			dreq_cnt;
 	int			drep_time;
+	int			nodes;
+	int			ppn;
+	int 			threshold;
 	DAPL_OS_LOCK		slock;	
 	int			s_hd;
 	int			s_tl;
diff --git a/dapl/openib_ucm/device.c b/dapl/openib_ucm/device.c
old mode 100644
new mode 100755
index 94ce812..98693b2
--- a/dapl/openib_ucm/device.c
+++ b/dapl/openib_ucm/device.c
@@ -517,17 +517,30 @@ static int ucm_service_create(IN DAPL_HCA *hca)
 	int hlen = sizeof(struct ibv_grh); /* hdr included with UD recv */
 	char *rbuf;
 
-	/* setup CM timers and queue sizes */
+	/* setup CM timers and queue sizes based on cluster size */
+	tp->nodes = dapl_os_get_env_val("DAPL_NETWORK_NODES", UCM_DEF_NODES);
+	tp->ppn = dapl_os_get_env_val("DAPL_NETWORK_PPN", UCM_DEF_PPN);
+	tp->threshold = dapl_os_get_env_val("DAPL_NETWORK_TRESHOLD", UCM_DEF_THRESHOLD);
 	tp->retries = dapl_os_get_env_val("DAPL_UCM_RETRY", DCM_RETRY_CNT);
-	tp->rep_time = dapl_os_get_env_val("DAPL_UCM_REP_TIME", DCM_REP_TIME);
-	tp->rtu_time = dapl_os_get_env_val("DAPL_UCM_RTU_TIME", DCM_RTU_TIME);
 	tp->wait_time = dapl_os_get_env_val("DAPL_UCM_WAIT_TIME", DCM_WAIT_TIME);
-	tp->qpe = dapl_os_get_env_val("DAPL_UCM_QP_SIZE", DCM_QP_SIZE);
-	tp->cqe = dapl_os_get_env_val("DAPL_UCM_CQ_SIZE", DCM_CQ_SIZE);
-	tp->burst = dapl_os_get_env_val("DAPL_UCM_TX_BURST", DCM_TX_BURST);
 	tp->dreq_cnt = dapl_os_get_env_val("DAPL_UCM_DREQ_RETRY", DCM_DREQ_CNT);
 	tp->drep_time = dapl_os_get_env_val("DAPL_UCM_DREP_TIME", DCM_DREP_TIME);
 	tp->cm_timer = dapl_os_get_env_val("DAPL_UCM_TIMER", DCM_CM_TIMER);
+
+	if ((tp->nodes * tp->ppn) <= tp->threshold) {
+		tp->rep_time = dapl_os_get_env_val("DAPL_UCM_REP_TIME", DCM_REP_TIME);
+		tp->rtu_time = dapl_os_get_env_val("DAPL_UCM_RTU_TIME", DCM_RTU_TIME);
+		tp->qpe = dapl_os_get_env_val("DAPL_UCM_QP_SIZE", DCM_QP_SIZE);
+		tp->cqe = dapl_os_get_env_val("DAPL_UCM_CQ_SIZE", DCM_CQ_SIZE);
+		tp->burst = dapl_os_get_env_val("DAPL_UCM_TX_BURST", DCM_TX_BURST);
+	} else {
+		tp->rep_time = dapl_os_get_env_val("DAPL_UCM_REP_TIME", DCM_REP_TIME*10);
+		tp->rtu_time = dapl_os_get_env_val("DAPL_UCM_RTU_TIME", DCM_RTU_TIME*10);
+		tp->qpe = dapl_os_get_env_val("DAPL_UCM_QP_SIZE", DCM_QP_SIZE*10);
+		tp->cqe = dapl_os_get_env_val("DAPL_UCM_CQ_SIZE", DCM_CQ_SIZE*10);
+		tp->burst = dapl_os_get_env_val("DAPL_UCM_TX_BURST", DCM_TX_BURST*2);
+	}
+
 	/* default = 11-bit, 2KB entries; 18 bit, 256KB total */
 	tp->cm_entry_bits = dapl_os_get_env_val("DAPL_UCM_ENTRY_BITS", UCM_ENTRY_BITS);
 	tp->cm_array_bits = DAPL_MAX(dapl_os_get_env_val("DAPL_UCM_ARRAY_BITS", UCM_ARRAY_BITS), tp->cm_entry_bits);
diff --git a/dapl/openib_ucm/linux/openib_osd.h b/dapl/openib_ucm/linux/openib_osd.h
old mode 100644
new mode 100755
diff --git a/dapl/svc/mcm.c b/dapl/svc/mcm.c
old mode 100644
new mode 100755
index 7374835..bd3149f
--- a/dapl/svc/mcm.c
+++ b/dapl/svc/mcm.c
@@ -163,6 +163,12 @@ int mcm_init_cm_service(mcm_ib_dev_t *md)
 	md->cqe = mcm_depth;
 	md->signal = mcm_signal;
 
+	/* ibscif: MIC and HST different, save MIC addresses */
+	if (!strncmp(md->ibdev->name, "scif", 4)) {
+		md->m_lid = md->addr.lid;
+		memcpy(md->m_gid, md->addr.gid, 16);
+	}
+
 	/* Save addr information */
 	/* get lid for this hca-port, convert to network order */
 	if (ibv_query_port(md->ibctx, md->port, &port_attr)) {
@@ -187,12 +193,14 @@ int mcm_init_cm_service(mcm_ib_dev_t *md)
 		return -1;
 	}
 	/* EP mapping hint for MIC to HCA, set MSS if compat or PI disabled */
-	if (((md->numa_node != -1) &&
-	     (md->numa_node == md->mc->numa_node)) ||
-	      md->mc->ver == MIX_COMP || mcm_proxy_in == 0)
-		md->addr.ep_map = MIC_SSOCK_DEV;
-	else
+	if (md->addr.ep_map != MIC_FULL_DEV) {
+		if (((md->numa_node != -1) &&
+			 (md->numa_node == md->mc->numa_node)) ||
+			  md->mc->ver == MIX_COMP || mcm_proxy_in == 0)
+			md->addr.ep_map = MIC_SSOCK_DEV;
+		else
 		md->addr.ep_map = MIC_XSOCK_DEV;
+	}
 
 	/* setup CM timers and queue sizes */
 	md->pd = ibv_alloc_pd(md->ibctx);
@@ -285,11 +293,11 @@ int mcm_init_cm_service(mcm_ib_dev_t *md)
 	/* save qp_num as part of ia_address, network order */
 	md->addr.qpn = htonl(md->qp->qp_num);
 
-	mlog(0, " IB LID 0x%x PORT %d GID %s QPN 0x%x: mic%d -> %s - %s, mic_ver %d %s\n",
-		ntohs(md->addr.lid), md->port,
+	mlog(0, " IB LID %x (%x) PORT %d GID %s QPN %x: mic%d -> %s - %s, mic_ver %d %s\n",
+		ntohs(md->addr.lid), md->m_lid, md->port,
 		inet_ntop(AF_INET6, md->addr.gid, gid_str, sizeof(gid_str)),
 		md->qp->qp_num, md->mc->scif_id - 1, md->ibdev->name,
-	        md->addr.ep_map == MIC_SSOCK_DEV ? "MSS":"MXS", md->mc->ver,
+		mcm_map_str(md->addr.ep_map), md->mc->ver,
 	        md->mc->ver == MIX_COMP ? "WARN: OLD MPSS -> RUN COMP MODE":"");
 
 	return 0;
@@ -442,22 +450,19 @@ int mcm_modify_qp(struct ibv_qp	*qp_handle,
 	return ret;
 }
 
-/* move QP's to error state and destroy. Flush the proxy SR queue is exists */
+/* move QP's to error state. Flush the proxy SR queue is exists */
 void mcm_flush_qp(struct mcm_qp *m_qp)
 {
 	struct mcm_sr *m_sr;
 	struct dat_mix_wc wc;
 
-	if (m_qp->ib_qp1) {
+	mlog(2, " QP1 %p QP2 %p\n", m_qp->ib_qp1, m_qp->ib_qp2);
+
+	if (m_qp->ib_qp1)
 		mcm_modify_qp(m_qp->ib_qp1, IBV_QPS_ERR, 0, 0, NULL);
-		ibv_destroy_qp(m_qp->ib_qp1);
-		m_qp->ib_qp1 = NULL;
-	}
-	if (m_qp->ib_qp2) {
+
+	if (m_qp->ib_qp2)
 		mcm_modify_qp(m_qp->ib_qp2, IBV_QPS_ERR, 0, 0, NULL);
-		ibv_destroy_qp(m_qp->ib_qp2);
-		m_qp->ib_qp2 = NULL;
-	}
 
 	mpxy_lock(&m_qp->rxlock);
 	while (m_qp->sr_tl != m_qp->sr_hd) {
@@ -557,14 +562,23 @@ mcm_cm_t *m_cm_create(mcm_scif_dev_t *smd, mcm_qp_t *m_qp, dat_mcm_addr_t *r_add
 		/* MPXYD SRC IB info, QP2t = saddr2 all cases */
 		cm->msg.saddr2.qpn = htonl(m_qp->ib_qp2->qp_num);
 		cm->msg.saddr2.qp_type = m_qp->qp_attr2.qp_type;
-                cm->msg.saddr2.lid = smd->md->addr.lid;
-                cm->msg.saddr2.ep_map = smd->md->addr.ep_map;
-                memcpy(&cm->msg.saddr2.gid[0], &smd->md->addr.gid, 16);
+		cm->msg.saddr2.ep_map = smd->md->addr.ep_map;
 
                 /* MPXYD RCV IB info */
-                cm->msg.saddr1.lid = smd->md->addr.lid;
                 cm->msg.saddr1.ep_map = smd->md->addr.ep_map;
-                memcpy(&cm->msg.saddr1.gid[0], &smd->md->addr.gid, 16);
+
+		/* intra-node: QPt addr 2 - ibscif HST lid != MICx lid's */
+		if (smd->md->m_lid) {
+			cm->msg.saddr1.lid = smd->md->m_lid;
+			cm->msg.saddr2.lid = smd->md->addr.lid;
+			memcpy(&cm->msg.saddr1.gid[0], &smd->md->m_gid, 16);
+			memcpy(&cm->msg.saddr2.gid[0], &smd->md->addr.gid, 16);
+		} else {
+			cm->msg.saddr1.lid = smd->md->lid;
+			cm->msg.saddr2.lid = smd->md->lid;
+			memcpy(&cm->msg.saddr1.gid[0], &smd->md->addr.gid, 16);
+			memcpy(&cm->msg.saddr2.gid[0], &smd->md->addr.gid, 16);
+		}
 
                 /* MSS, QPr is on MIC, QP1r == saddr1 */
 	        if (MSS_EP(l_addr)) {
@@ -572,14 +586,14 @@ mcm_cm_t *m_cm_create(mcm_scif_dev_t *smd, mcm_qp_t *m_qp, dat_mcm_addr_t *r_add
 			cm->msg.saddr1.qp_type = cm->m_qp->qp_attr1.qp_type;
 
 			/* MSS_EP -> (MXS or unknown) WC queue for MXS peer PI service */
-			if (MXS_EP(r_addr) || UND_EP(r_addr)) {
+			if (MXF_EP(r_addr) || UND_EP(r_addr)) {
 				if (m_pi_create_wc_q(m_qp, mcm_rx_entries))
 					goto bail;
 			}
 	        }
 
         	/* MXS -> (MSS, HOST, or unknown), need a QPr on mpxyd, QP1r == saddr1 */
-                if (MXS_EP(l_addr) && !MXS_EP(r_addr)) {
+                if (MXF_EP(l_addr) && !MXF_EP(r_addr)) {
                 	/* note: pi_wr_q and pi_wc_q created via MXS create_qp */
                 	if (m_qp_create_pi(smd, m_qp))
                 		goto bail;
@@ -588,7 +602,7 @@ mcm_cm_t *m_cm_create(mcm_scif_dev_t *smd, mcm_qp_t *m_qp, dat_mcm_addr_t *r_add
                 }
 
                 /* MXS -> MXS, QPs and QPr is QP2 on mpxyd, saddr 1 == saddr2 */
-                if (MXS_EP(l_addr) && MXS_EP(r_addr))
+                if (MXF_EP(l_addr) && MXF_EP(r_addr))
                 	memcpy(&cm->msg.saddr1, &cm->msg.saddr2, sizeof(dat_mcm_addr_t));
 
 		mlog(8, " SRC: QPt qpn 0x%x lid 0x%x, QPr qpn 0x%x lid 0x%x"
@@ -780,6 +794,10 @@ static int mcm_send(mcm_ib_dev_t *md, dat_mcm_msg_t *msg, DAT_PVOID p_data, DAT_
 	int len, ret = -1;
 	uint16_t dlid = ntohs(msg->daddr1.lid);
 
+	/* intra-node, CM on host side, HST lid != MIC lid */
+	if (md->m_lid)
+		dlid = ntohs(md->addr.lid);
+
 	/* Get message from send queue, copy data, and send */
 	mpxy_lock(&md->txlock);
 	if ((smsg = mcm_get_smsg(md)) == NULL) {
@@ -816,7 +834,7 @@ static int mcm_send(mcm_ib_dev_t *md, dat_mcm_msg_t *msg, DAT_PVOID p_data, DAT_
         }
 
 	mlog(8," cm_send: op %s ln %d lid %x c_qpn %x rport %x, p_size %d\n",
-		mcm_op_str(ntohs(smsg->op)), sge.length, ntohs(smsg->daddr1.lid),
+		mcm_op_str(ntohs(smsg->op)), sge.length, dlid,
 		ntohl(smsg->dqpn), ntohs(smsg->dport), p_size);
 
 	/* empty slot, then create AH */
@@ -888,9 +906,17 @@ int mcm_cm_rej_out(mcm_ib_dev_t *md, dat_mcm_msg_t *msg, DAT_MCM_OP type, int sw
 		memcpy(&smsg.saddr2, &msg->saddr2, sizeof(dat_mcm_addr_t));
 	}
 
+	/* intra-node; addr1 == mic lid, addr2 == cm lid */
+	if (md->m_lid) {
+		smsg.saddr1.lid = md->m_lid;
+		memcpy(&smsg.saddr1.gid[0], &md->m_gid, 16);
+	}
+	smsg.saddr2.lid = md->addr.lid;
+	memcpy(&smsg.saddr2.gid[0], &md->addr.gid, 16);
+
 	mlog(2," sLID %x, sQPN %x sPORT %x -> dLID %x, dQPN %x dPORT %x\n",
-	     ntohs(smsg.saddr1.lid), ntohl(smsg.sqpn), ntohs(smsg.sport),
-	     ntohs(smsg.daddr1.lid), ntohl(smsg.dqpn), ntohs(smsg.dport));
+	     ntohs(smsg.saddr2.lid), ntohl(smsg.sqpn), ntohs(smsg.sport),
+	     ntohs(smsg.daddr2.lid), ntohl(smsg.dqpn), ntohs(smsg.dport));
 
 	if (type == MCM_REJ_USER)
 		MCNTR(md, MCM_CM_REJ_USER_OUT);
@@ -900,6 +926,29 @@ int mcm_cm_rej_out(mcm_ib_dev_t *md, dat_mcm_msg_t *msg, DAT_MCM_OP type, int sw
 	return (mcm_send(md, &smsg, NULL, 0));
 }
 
+int mcm_qp_busy(struct mcm_qp *m_qp)
+{
+	int busy = 0;
+
+	mpxy_lock(&m_qp->rxlock);
+	busy += (m_qp->pi_rw_cnt + m_qp->pi_rr_cnt);
+	busy += (m_qp->stall_cnt_rr + m_qp->post_cnt_wt);
+	mpxy_unlock(&m_qp->rxlock);
+	mlog(2," PI[%d]: rw_cnt %d rr_cnt %d stall %d post %d\n",
+	     busy, m_qp->pi_rw_cnt, m_qp->pi_rr_cnt,
+	     m_qp->stall_cnt_rr, m_qp->post_cnt_wt);
+
+	mpxy_lock(&m_qp->txlock);
+	busy += (m_qp->wr_pp + m_qp->wr_pp_rem);
+	busy += (m_qp->post_sig_cnt - m_qp->comp_cnt);
+	mpxy_unlock(&m_qp->txlock);
+	mlog(2," PO[%d]: wr_pp %d pp_rem %d pst_sig %d cmp_sig %d\n",
+	     busy, m_qp->wr_pp, m_qp->wr_pp_rem,
+	     m_qp->post_sig_cnt, m_qp->comp_cnt);
+
+	return busy;
+}
+
 void mcm_cm_disc(mcm_cm_t *cm)
 {
 	int finalize = 1;
@@ -909,7 +958,7 @@ void mcm_cm_disc(mcm_cm_t *cm)
 	switch (cm->state) {
 	case MCM_CONNECTED:
 		/* CONSUMER: move to err state to flush */
-		if (cm->m_qp)
+		if (cm->m_qp && !mcm_qp_busy(cm->m_qp))
 			mcm_flush_qp(cm->m_qp);
 
 		/* send DREQ, event after DREP or DREQ timeout */
@@ -950,7 +999,7 @@ void mcm_cm_disc(mcm_cm_t *cm)
 	case MCM_DISC_RECV:
 		MCNTR(cm->md, MCM_CM_DREQ_IN);
 		/* CM_THREAD: move to err state to flush */
-		if (cm->m_qp)
+		if (cm->m_qp && !mcm_qp_busy(cm->m_qp))
 			mcm_flush_qp(cm->m_qp);
 
 		/* DREQ received, send DREP and schedule event, finalize */
@@ -964,6 +1013,8 @@ void mcm_cm_disc(mcm_cm_t *cm)
 		break;
 	case MCM_DISCONNECTED:
 		mlog(2," state = %s already disconnected\n",  mcm_state_str(cm->state) );
+		if (cm->m_qp) /* MIC client disc, force flush */
+			mcm_flush_qp(cm->m_qp);
 		mpxy_unlock(&cm->lock);
 		MCNTR(cm->md, MCM_CM_DREQ_DUP);
 		return;
@@ -1053,7 +1104,6 @@ static void mcm_process_recv(mcm_ib_dev_t *md, dat_mcm_msg_t *msg, mcm_cm_t *cm,
 	case MCM_RTU_PENDING: /* passive */
 		mlog(2, "RTU_PENDING: cm %p, my_id %d, cm_id %d\n",
 			cm, cm->entry.tid, cm->cm_id);
-		cm->state = MCM_CONNECTED;
 		mpxy_unlock(&cm->lock);
 		MCNTR(md, MCM_CM_RTU_IN);
 		mix_cm_rtu_in(cm, msg, len);
@@ -1456,7 +1506,7 @@ int mcm_cm_rtu_out(mcm_cm_t *m_cm)
 		m_cm, htons(m_cm->msg.saddr2.lid), htonl(m_cm->msg.saddr2.qpn),
 		htons(m_cm->msg.sport),	system_guid, mcm_map_str(m_cm->msg.saddr2.ep_map),
 		htons(m_cm->msg.daddr1.lid),
-		MXS_EP(&m_cm->msg.saddr1) && MXS_EP(&m_cm->msg.daddr1) ?
+		MXF_EP(&m_cm->msg.saddr1) && MXF_EP(&m_cm->msg.daddr1) ?
 				htonl(m_cm->msg.daddr2.qpn):htonl(m_cm->msg.daddr1.qpn),
 		htons(m_cm->msg.dport), ntohll(r_guid), mcm_map_str(m_cm->msg.daddr1.ep_map));
 
diff --git a/dapl/svc/mix.c b/dapl/svc/mix.c
old mode 100644
new mode 100755
index 31b24a0..ec715f3
--- a/dapl/svc/mix.c
+++ b/dapl/svc/mix.c
@@ -219,6 +219,110 @@ out_close_op_ep:
 	scif_close(op_ep);
 }
 
+static int mix_query_device(mcm_scif_dev_t *smd, dat_mix_device_attr_t *pmsg)
+{
+	int ret, len;
+	dat_mix_device_attr_t msg;
+	struct ibv_device_attr dev_attr;
+
+	len = sizeof(dat_mix_device_attr_t);
+	msg.hdr.ver = DAT_MIX_VER;
+	msg.hdr.op = MIX_QUERY_DEVICE;
+	msg.hdr.flags = MIX_OP_RSP;
+
+	ret = ibv_query_device(smd->md->ibctx, &dev_attr);
+	if (ret) {
+	        msg.hdr.status = MIX_EFAULT;
+	        goto done;
+	}
+	msg.hdr.status = MIX_SUCCESS;
+
+	strncpy(msg.fw_ver, dev_attr.fw_ver, sizeof(msg.fw_ver));
+	msg.node_guid = dev_attr.node_guid;
+	msg.sys_image_guid = dev_attr.sys_image_guid;
+	msg.max_mr_size = dev_attr.max_mr_size;
+	msg.page_size_cap = dev_attr.page_size_cap;
+	msg.vendor_id = dev_attr.vendor_id;
+	msg.vendor_part_id = dev_attr.vendor_part_id;
+	msg.hw_ver = dev_attr.hw_ver;
+	msg.max_qp = dev_attr.max_qp;
+	msg.max_qp_wr = dev_attr.max_qp_wr;
+	msg.device_cap_flags = dev_attr.device_cap_flags;
+	msg.max_sge = dev_attr.max_sge;
+	msg.max_sge_rd = dev_attr.max_sge_rd;
+	msg.max_cq = dev_attr.max_cq;
+	msg.max_cqe = dev_attr.max_cqe;
+	msg.max_mr = dev_attr.max_mr;
+	msg.max_pd = dev_attr.max_pd;
+	msg.max_qp_rd_atom = dev_attr.max_qp_rd_atom;
+	msg.max_ee_rd_atom = dev_attr.max_ee_rd_atom;
+	msg.max_ee_rd_atom = dev_attr.max_res_rd_atom;
+	msg.max_qp_init_rd_atom = dev_attr.max_qp_init_rd_atom;
+	msg.max_ee_init_rd_atom = dev_attr.max_ee_init_rd_atom;
+	msg.atomic_cap = dev_attr.atomic_cap;
+	msg.max_ee = dev_attr.max_ee;
+	msg.max_rdd = dev_attr.max_rdd;
+	msg.max_mw = dev_attr.max_mw;
+	msg.max_raw_ipv6_qp = dev_attr.max_raw_ipv6_qp;
+	msg.max_raw_ethy_qp = dev_attr.max_raw_ethy_qp;
+	msg.max_mcast_grp = dev_attr.max_mcast_grp;
+	msg.max_mcast_qp_attach = dev_attr.max_mcast_qp_attach;
+	msg.max_total_mcast_qp_attach = dev_attr.max_total_mcast_qp_attach;
+	msg.max_ah = dev_attr.max_ah;
+	msg.max_fmr = dev_attr.max_fmr;
+	msg.max_map_per_fmr = dev_attr.max_map_per_fmr;
+	msg.max_srq = dev_attr.max_srq;
+	msg.max_srq_wr = dev_attr.max_srq_wr;
+	msg.max_srq_sge = dev_attr.max_srq_sge;
+	msg.max_pkeys = dev_attr.max_pkeys;
+	msg.local_ca_ack_delay = dev_attr.local_ca_ack_delay;
+	msg.phys_port_cnt = dev_attr.phys_port_cnt;
+done:
+	return (scif_send_msg(smd->scif_op_ep, &msg, len));
+}
+
+static int mix_query_port(mcm_scif_dev_t *smd, dat_mix_port_attr_t *pmsg)
+{
+	int ret, len;
+	dat_mix_port_attr_t msg;
+	struct ibv_port_attr port_attr;
+
+	len = sizeof(dat_mix_port_attr_t);
+	msg.hdr.ver = DAT_MIX_VER;
+	msg.hdr.op = MIX_QUERY_PORT;
+	msg.hdr.flags = MIX_OP_RSP;
+
+	ret = ibv_query_port(smd->md->ibctx, pmsg->hdr.req_id, &port_attr);
+	if (ret) {
+	        msg.hdr.status = MIX_EFAULT;
+	        goto done;
+	}
+	msg.hdr.status = MIX_SUCCESS;
+
+	msg.gid_tbl_len = port_attr.gid_tbl_len;
+	msg.port_cap_flags = port_attr.port_cap_flags;
+	msg.max_msg_sz = port_attr.max_msg_sz;
+	msg.bad_pkey_cntr = port_attr.bad_pkey_cntr;
+	msg.qkey_viol_cntr = port_attr.qkey_viol_cntr;
+	msg.pkey_tbl_len = port_attr.pkey_tbl_len;
+	msg.lid = port_attr.lid;
+	msg.sm_lid = port_attr.sm_lid;
+	msg.lmc = port_attr.lmc;
+	msg.max_vl_num = port_attr.max_vl_num;
+	msg.sm_sl = port_attr.sm_sl;
+	msg.subnet_timeout = port_attr.subnet_timeout;
+	msg.init_type_reply = port_attr.init_type_reply;
+	msg.active_width = port_attr.active_width;
+	msg.active_speed = port_attr.active_speed;
+	msg.phys_state = port_attr.phys_state;
+	msg.link_layer = port_attr.link_layer;
+	msg.state = port_attr.state;
+	msg.max_mtu = port_attr.max_mtu;
+	msg.active_mtu = port_attr.active_mtu;
+done:
+	return (scif_send_msg(smd->scif_op_ep, &msg, len));
+}
+
 static int mix_prov_attr(mcm_scif_dev_t *smd, dat_mix_attr_t *pmsg)
 {
 	int len, ret;
@@ -555,6 +659,114 @@ resp:
 	return (scif_send_msg(smd->scif_op_ep, (void*)pmsg, len));
 }
 
+/* create new proxy-out PZ */
+static int mix_pz_create(mcm_scif_dev_t *smd, dat_mix_pz_t *pmsg)
+{
+	int len, ret;
+	mcm_pz_t *new_pz;
+
+	/* hdr already read, get operation data */
+	len = sizeof(dat_mix_pz_t) - sizeof(dat_mix_hdr_t);
+	ret = scif_recv(smd->scif_op_ep, ((char*)pmsg + sizeof(dat_mix_hdr_t)), len, SCIF_RECV_BLOCK);
+	if (ret != len) {
+		mlog(0, " ERR: ret %d, exp %d\n", ret, len);
+		return ret;
+	}
+
+	new_pz = malloc(sizeof(mcm_pz_t));
+	if (!new_pz) {
+		mlog(0, " ERR: Could not alloc mcm_pz_t\n");
+		goto err;
+	}
+
+	memset(new_pz, 0, sizeof(mcm_pz_t));
+
+	init_list(&new_pz->entry);
+	new_pz->smd = smd;
+	new_pz->pd_ctx = pmsg->ctx;
+
+	new_pz->ib_pd = ibv_alloc_pd(smd->md->ibctx);
+
+	if (!new_pz->ib_pd) {
+		mlog(0, " ERR: Could not alloc pd\n");
+		goto err;
+	}
+
+	mpxy_lock(&smd->pzlock);
+	insert_tail(&new_pz->entry, &smd->pzlist, new_pz);
+	mpxy_unlock(&smd->pzlock);
+	mlog(8, " new mcm_pz %p and ib_pd %p created\n", new_pz, new_pz->ib_pd);
+
+	pmsg->ib_pd = (uint64_t)new_pz;
+	pmsg->hdr.status = MIX_SUCCESS;
+	goto resp;
+err:
+	if (new_pz)
+		free(new_pz);
+
+	mlog(0, " ERR: %s\n", strerror(errno));
+	pmsg->hdr.status = MIX_EINVAL;
+resp:
+	/* send back response */
+	pmsg->hdr.flags = MIX_OP_RSP;
+	len = sizeof(dat_mix_pz_t);
+	return (scif_send_msg(smd->scif_op_ep, (void*)pmsg, len));
+}
+
+/* destroy proxy-out PZ */
+static int mix_pz_destroy(mcm_scif_dev_t *smd, dat_mix_pz_t *pmsg)
+{
+	mcm_pz_t *mcm_pz;
+	int len, ret;
+
+	/* hdr already read, get operation data */
+	len = sizeof(dat_mix_pz_t) - sizeof(dat_mix_hdr_t);
+	ret = scif_recv(smd->scif_op_ep, ((char*)pmsg + sizeof(dat_mix_hdr_t)), len, SCIF_RECV_BLOCK);
+	if (ret != len) {
+		mlog(0, " ERR: ret %d, exp %d\n", ret, len);
+		return ret;
+	}
+
+	mlog(8, " MIX_PZ_DESTROY: msg ctx 0x%lx, msg ib_pd 0x%lx\n", pmsg->ctx, pmsg->ib_pd);
+
+	/* find the PD in list */
+	mpxy_lock(&smd->pzlock);
+	mcm_pz = get_head_entry(&smd->pzlist);
+	while (mcm_pz) {
+		if ((uint64_t)mcm_pz == pmsg->ib_pd) {
+			mlog(8, " Matched mcm_pz %p (ib_pd %p)\n", mcm_pz, mcm_pz->ib_pd);
+			break;
+		}
+		mcm_pz = get_next_entry(&mcm_pz->entry, &smd->pzlist);
+	}
+	mpxy_unlock(&smd->pzlock);
+
+	if (!mcm_pz) {
+		mlog(0, " ERR: ib_pd 0x%lx was not found\n", pmsg->ib_pd);
+		goto err;
+	}
+
+	mpxy_lock(&smd->pzlock);
+	ret = ibv_dealloc_pd(mcm_pz->ib_pd);
+	remove_entry(&mcm_pz->entry);
+	free(mcm_pz);
+	mpxy_unlock(&smd->pzlock);
+
+	if (ret)
+		goto err;
+
+	pmsg->hdr.status = MIX_SUCCESS;
+	goto resp;
+err:
+	mlog(0, " ERR: %s\n", strerror(errno));
+	pmsg->hdr.status = MIX_EINVAL;
+resp:
+	/* send back response */
+	pmsg->hdr.flags = MIX_OP_RSP;
+	len = sizeof(dat_mix_pz_t);
+	return (scif_send_msg(smd->scif_op_ep, (void*)pmsg, len));
+}
+
 /* called with smd->qptlist lock held */
 void m_qp_free(struct mcm_qp *m_qp)
 {
@@ -580,7 +792,16 @@ void m_qp_free(struct mcm_qp *m_qp)
 		mpxy_unlock(&cm->lock);
 		mcm_dqconn_free(m_qp->smd, cm);
 	}
-	mcm_flush_qp(m_qp); /* move QP to error, flush & destroy */
+	mcm_flush_qp(m_qp); /* move QP to error, flush */
+
+	if (m_qp->ib_qp1) {
+		ibv_destroy_qp(m_qp->ib_qp1);
+		m_qp->ib_qp1 = NULL;
+	}
+	if (m_qp->ib_qp2) {
+		ibv_destroy_qp(m_qp->ib_qp2);
+		m_qp->ib_qp2 = NULL;
+	}
 
 #ifdef MCM_PROFILE
 	if (mcm_profile)
@@ -737,7 +958,7 @@ static int m_qp_create(mcm_scif_dev_t *smd,
 		goto err;
 
 	/* plus proxy-in: create WR and WC pools, rx_cq */
-	if (smd->md->addr.ep_map == MIC_XSOCK_DEV) {
+	if (MXF_EP(&smd->md->addr)) {
 		if (m_pi_create_bpool(m_qp, attr->cap.max_recv_wr))
 			goto err;
 
@@ -799,7 +1020,7 @@ static int m_qp_create(mcm_scif_dev_t *smd,
 		m_qp->qp_attr2.max_inline_data = attr->cap.max_inline_data;
 
 	/* don't post recv msgs until RTU, if local MXS or local MSS to remote MXS */
-	if (smd->md->addr.ep_map == MIC_XSOCK_DEV)
+	if (MXF_EP(&smd->md->addr))
 		memcpy(&m_qp->qp_attr1, &m_qp->qp_attr2, sizeof(dat_mix_qp_attr_t));
 
 	/* TX proxy-out thread queue */
@@ -877,7 +1098,7 @@ static int mix_qp_create(mcm_scif_dev_t *smd, dat_mix_qp_t *pmsg)
 	/* return QPt, QPr info to MIC client, insert on QP list */
 	memcpy(&pmsg->qp_t, &new_mqp->qp_attr2, sizeof(dat_mix_qp_attr_t));
 
-	if (smd->md->addr.ep_map == MIC_XSOCK_DEV)
+	if (MXF_EP(&smd->md->addr))
 		memcpy(&pmsg->qp_r, &new_mqp->qp_attr1, sizeof(dat_mix_qp_attr_t));
 	else {
 		new_mqp->qp_attr1.qp_num = pmsg->qp_r.qp_num; /* QP1 == MIC QPr */
@@ -954,6 +1175,10 @@ static int mix_mr_create(mcm_scif_dev_t *smd, dat_mix_mr_t *pmsg)
 
 	pmsg->mr_id = m_mr->entry.tid;
 	smd->ref_cnt++;
+	if (MFO_EP(&smd->md->addr)) {
+		m_mr->mre.ib_lkey = m_mr->entry.tid;
+		m_mr->mre.ib_rkey = m_mr->entry.tid;
+	}
 	pmsg->hdr.status = MIX_SUCCESS;
 
 	mlog(8, " mr[%d] - len %d lmr_ctx %p, scif_addr %Lx, scif_off 0x%x, ib addr %Lx ib_rkey 0x%x\n",
@@ -1042,8 +1267,8 @@ void mix_dto_event(struct mcm_cq *m_cq, struct dat_mix_wc *wc, int nc)
 
 		if (msg.wc[i].status != IBV_WC_SUCCESS) {
 			if (msg.wc[i].status  != IBV_WC_WR_FLUSH_ERR) {
-				mlog(0, " ERROR (ep=%d): cq %p id %d ctx %p stat %d"
-					" [%d:%d] op 0x%x ln %d wr_id %p wc's %d verr 0x%x errno=%d,%s\n",
+				mlog(0, " [%d:%d] ERROR (ep=%d): cq %p id %d ctx %p stat %d"
+					"  op 0x%x ln %d wr_id %p wc's %d verr 0x%x errno=%d,%s\n",
 					m_cq->smd->md->mc->scif_id, m_cq->smd->entry.tid,
 					m_cq->smd->scif_op_ep, m_cq, msg.cq_id, msg.cq_ctx,
 					msg.wc[i].status, msg.wc[i].opcode, msg.wc[i].byte_len,
@@ -1083,8 +1308,9 @@ void mix_cm_event(mcm_cm_t *m_cm, uint32_t event)
 		mpxy_unlock(&m_cm->lock);
 	}
 
-	mlog(2, " MIX_CM_EVENT: cm %p cm_id %d, ctx %p, event 0x%x dev_id %d\n",
-	     m_cm, m_cm->entry.tid, msg.cm_ctx, event, m_cm->smd->entry.tid);
+	mlog(2, " MIX_CM_EVENT: (ep=%d) cm %p id %d ctx %p event 0x%x dev_id %d\n",
+		m_cm->smd->scif_ev_ep, m_cm, m_cm->entry.tid,
+		msg.cm_ctx, event, m_cm->smd->entry.tid);
 
 	len = sizeof(dat_mix_cm_event_t);
 	mpxy_lock(&m_cm->smd->evlock);
@@ -1155,10 +1381,16 @@ static int mix_cm_req_out(mcm_scif_dev_t *smd, dat_mix_cm_t *pmsg, scif_epd_t sc
 	mcm_hton_wrc((mcm_wrc_info_t *)m_cm->msg.p_proxy, &m_qp->wrc); /* PI WR/WC raddr,rkey info */
 	m_cm->msg.seg_sz = mix_buffer_sg_po2;
 
+	/* intra-node; set QPt to HST lid */
+	if (smd->md->m_lid) {
+		m_cm->msg.saddr2.lid = smd->md->addr.lid;
+		memcpy(&m_cm->msg.saddr2.gid[0], &smd->md->addr.gid, 16);
+	}
+
 	mlog(2," QP2 0x%x QP1 0x%x:"
 	       " CM sPORT 0x%x sQPN 0x%x sLID 0x%x - dPORT 0x%x dQPN 0x%x dLID 0x%x, psz %d %s\n",
 		m_cm->msg.saddr2.qpn, m_cm->msg.saddr1.qpn,
-		ntohs(m_cm->msg.sport), ntohl(m_cm->msg.sqpn), ntohs(m_cm->msg.saddr1.lid),
+		ntohs(m_cm->msg.sport), ntohl(m_cm->msg.sqpn), ntohs(m_cm->msg.saddr2.lid),
 		ntohs(m_cm->msg.dport), ntohl(m_cm->msg.dqpn), ntohs(m_cm->msg.daddr1.lid),
 		ntohs(m_cm->msg.p_size), mcm_map_str(m_cm->msg.daddr1.ep_map));
 
@@ -1222,8 +1454,10 @@ static int mix_cm_disc_out(mcm_scif_dev_t *smd, dat_mix_cm_t *pmsg, scif_epd_t s
 		return 0;
 	}
 
-	/* process DREQ */
+	/* process DREQ, flush QP */
 	mcm_cm_disc(m_cm);
+	if (m_cm->m_qp)
+		mcm_flush_qp(m_cm->m_qp); /* move QP to error, flush */
 	return 0;
 }
 
@@ -1378,7 +1612,7 @@ int mix_cm_rep_in(mcm_cm_t *m_cm, dat_mcm_msg_t *pkt, int pkt_len)
 	     m_cm->m_qp->wrc_rem.wc_sz, m_cm->m_qp->wrc_rem.wc_end);
 
 	/* MXS <- MSS or HOST, fabric: TX: QP2->QP1 direct, RX: QP1<-QP2 proxy */
-	if ((MXS_EP(&m_cm->md->addr) && !MXS_EP(&m_cm->msg.daddr1)) &&
+	if ((MXF_EP(&m_cm->md->addr) && !MXF_EP(&m_cm->msg.daddr1)) &&
 	    system_guid != m_cm->msg.sys_guid) {
 		mlog(2, " MXS <- %s remote \n", mcm_map_str(m_cm->msg.daddr1.ep_map));
 
@@ -1409,7 +1643,7 @@ int mix_cm_rep_in(mcm_cm_t *m_cm, dat_mcm_msg_t *pkt, int pkt_len)
 		mpxy_unlock(&m_cm->smd->qprlock);
 
 	/* MXS <- MXS, proxy-in both sides, fabric, 1 QP only  */
-	} else if ((MXS_EP(&m_cm->md->addr) && MXS_EP(&m_cm->msg.daddr1)) &&
+	} else if ((MXF_EP(&m_cm->md->addr) && MXF_EP(&m_cm->msg.daddr1)) &&
 		   system_guid != m_cm->msg.sys_guid) {
 		mlog(2, " MXS <- MXS remote \n");
 
@@ -1430,7 +1664,7 @@ int mix_cm_rep_in(mcm_cm_t *m_cm, dat_mcm_msg_t *pkt, int pkt_len)
 		m_qp_destroy_pi(m_cm->m_qp);
 
 	/* MXS <- MXS, proxy-in both sides, inside system, no QP's, SCIF services only */
-	} else if ((MXS_EP(&m_cm->md->addr) && MXS_EP(&m_cm->msg.daddr1)) &&
+	} else if ((MXF_EP(&m_cm->md->addr) && MXF_EP(&m_cm->msg.daddr1)) &&
 		   system_guid == m_cm->msg.sys_guid) {
 		mlog(0, " MXS <- MXS local NOT SUPPORTED \n");
 		qp = NULL;
@@ -1439,10 +1673,10 @@ int mix_cm_rep_in(mcm_cm_t *m_cm, dat_mcm_msg_t *pkt, int pkt_len)
 	} else {
 		mlog(2, " MSS <- %s remote \n", mcm_map_str(m_cm->msg.daddr1.ep_map));
 
-		if (MXS_EP(&m_cm->msg.daddr1) && m_pi_prep_rcv_q(m_cm->m_qp))
+		if (MXF_EP(&m_cm->msg.daddr1) && m_pi_prep_rcv_q(m_cm->m_qp))
 				goto err;
 
-		if (!MXS_EP(&m_cm->msg.daddr1))
+		if (!MXF_EP(&m_cm->msg.daddr1))
 			m_pi_destroy_wc_q(m_cm->m_qp); /* created if ep_map was unknown */
 
 		qp = m_cm->m_qp->ib_qp2;
@@ -1572,14 +1806,14 @@ int mix_cm_rtu_in(mcm_cm_t *m_cm, dat_mcm_msg_t *pkt, int pkt_len)
 		m_cm->md->mc->scif_id, m_cm->smd->entry.tid,
 		m_cm->md->cntrs ? (uint32_t)((uint64_t *)m_cm->md->cntrs)[MCM_CM_RTU_IN]:0,
 		m_cm, htons(pkt->daddr1.lid),
-		MXS_EP(&m_cm->msg.daddr1) && MXS_EP(&m_cm->msg.saddr1) ?
+		MXF_EP(&m_cm->msg.daddr1) && MXF_EP(&m_cm->msg.saddr1) ?
 			htonl(m_cm->msg.daddr2.qpn):htonl(m_cm->msg.daddr1.qpn),
 		htons(pkt->dport), system_guid, mcm_map_str(pkt->daddr1.ep_map),
 		htons(pkt->saddr2.lid), htonl(pkt->saddr2.qpn),
 		htons(pkt->sport), ntohll(pkt->sys_guid), mcm_map_str(pkt->saddr2.ep_map));
 
-	/* MXS_EP <- HST_EP, host sends WC on RTU, save WRC info */
-	if (MXS_EP(&pkt->daddr1) && HST_EP(&pkt->saddr2)) {
+	/* MXF_EP <- HST_EP, host sends WC on RTU, save WRC info */
+	if (MXF_EP(&pkt->daddr1) && HST_EP(&pkt->saddr2)) {
 		mcm_ntoh_wrc(&m_cm->m_qp->wrc_rem, (mcm_wrc_info_t *)pkt->p_proxy);
 		mlog(2, " WRC_rem: m_qp %p - addr 0x%Lx rkey 0x%x len %d, sz %d end %d\n",
 		     m_cm->m_qp, m_cm->m_qp->wrc_rem.wc_addr, m_cm->m_qp->wrc_rem.wc_rkey,
@@ -1587,6 +1821,10 @@ int mix_cm_rtu_in(mcm_cm_t *m_cm, dat_mcm_msg_t *pkt, int pkt_len)
 		     m_cm->m_qp->wrc_rem.wc_end);
 	}
 
+	mpxy_lock(&m_cm->lock);
+	m_cm->state = MCM_CONNECTED;
+	mpxy_unlock(&m_cm->lock);
+
 	/* Forward, as is, conn_reply message to MIC client, with remote QP info */
 	msg.hdr.ver = m_cm->md->mc->ver;
 	msg.hdr.flags = MIX_OP_REQ;
@@ -1683,7 +1921,7 @@ static int mix_cm_rep_out(mcm_scif_dev_t *smd, dat_mix_cm_t *pmsg, scif_epd_t sc
 	     m_cm->m_qp->wrc_rem.wc_sz, m_cm->m_qp->wrc_rem.wc_end);
 
 	/* MXS -> MSS or HOST, remote: need QPr1, saddr1 on mpxyd */
-	if ((MXS_EP(&m_cm->md->addr) && !MXS_EP(&m_cm->msg.daddr1)) &&
+	if ((MXF_EP(&m_cm->md->addr) && !MXF_EP(&m_cm->msg.daddr1)) &&
 	    (system_guid != m_cm->msg.sys_guid) ) {
 		mlog(2, " MXS -> %s remote \n", mcm_map_str(m_cm->msg.daddr1.ep_map));
 
@@ -1695,13 +1933,13 @@ static int mix_cm_rep_out(mcm_scif_dev_t *smd, dat_mix_cm_t *pmsg, scif_epd_t sc
 
 		/* KR to KL or XEON, QP1<-QP2 and QP2->QP1 */
 		/* update the src information in CM msg */
-		m_cm->msg.saddr1.ep_map = MIC_XSOCK_DEV;
+		m_cm->msg.saddr1.ep_map = smd->md->addr.ep_map;
 		m_cm->msg.saddr1.qpn = htonl(m_cm->m_qp->ib_qp1->qp_num);
 		m_cm->msg.saddr1.qp_type = m_cm->m_qp->qp_attr1.qp_type;
 	        m_cm->msg.saddr1.lid = m_cm->smd->md->addr.lid;
 		memcpy(&m_cm->msg.saddr1.gid[0], &m_cm->smd->md->addr.gid, 16);
 
-		m_cm->msg.saddr2.ep_map = MIC_XSOCK_DEV;
+		m_cm->msg.saddr2.ep_map = smd->md->addr.ep_map;
 		m_cm->msg.saddr2.qpn = htonl(m_cm->m_qp->ib_qp2->qp_num);
 		m_cm->msg.saddr2.qp_type = m_cm->m_qp->qp_attr2.qp_type;
 	        m_cm->msg.saddr2.lid = m_cm->smd->md->addr.lid;
@@ -1725,7 +1963,7 @@ static int mix_cm_rep_out(mcm_scif_dev_t *smd, dat_mix_cm_t *pmsg, scif_epd_t sc
 		mpxy_unlock(&smd->qprlock);
 
 	/* MXS -> MXS, proxy-in both sides, remote, 1 QP - already setup */
-	} else if ((MXS_EP(&m_cm->md->addr) && MXS_EP(&m_cm->msg.daddr1)) &&
+	} else if ((MXF_EP(&m_cm->md->addr) && MXF_EP(&m_cm->msg.daddr1)) &&
 		   (system_guid != m_cm->msg.sys_guid)) {
 		mlog(2, " MXS -> MXS remote \n");
 
@@ -1733,8 +1971,8 @@ static int mix_cm_rep_out(mcm_scif_dev_t *smd, dat_mix_cm_t *pmsg, scif_epd_t sc
 			goto err;
 
 		/* update the QPt src information in CM msg */
-		m_cm->msg.saddr1.ep_map = MIC_XSOCK_DEV;
-		m_cm->msg.saddr2.ep_map = MIC_XSOCK_DEV;
+		m_cm->msg.saddr1.ep_map = smd->md->addr.ep_map;
+		m_cm->msg.saddr2.ep_map = smd->md->addr.ep_map;
 		m_cm->msg.saddr2.qpn = htonl(m_cm->m_qp->ib_qp2->qp_num);
 		m_cm->msg.saddr2.qp_type = m_cm->m_qp->qp_attr2.qp_type;
 		m_cm->msg.saddr2.lid = m_cm->smd->md->addr.lid;
@@ -1752,7 +1990,7 @@ static int mix_cm_rep_out(mcm_scif_dev_t *smd, dat_mix_cm_t *pmsg, scif_epd_t sc
 		mpxy_unlock(&smd->qprlock);
 
 	/* MXS -> MXS, proxy-in both sides, local, no QP's, SCIF services only */
-	} else if ((MXS_EP(&m_cm->md->addr) && MXS_EP(&m_cm->msg.daddr1)) &&
+	} else if ((MXF_EP(&m_cm->md->addr) && MXF_EP(&m_cm->msg.daddr1)) &&
 		   (system_guid == m_cm->msg.sys_guid)) {
 		mlog(2, " MXS -> MXS local - MODE NOT SUPPORTED, running MXS -> MXS remote mode\n");
 
@@ -1760,8 +1998,8 @@ static int mix_cm_rep_out(mcm_scif_dev_t *smd, dat_mix_cm_t *pmsg, scif_epd_t sc
 		dgid = (union ibv_gid *)m_cm->msg.daddr2.gid;
 		dqpn = m_cm->msg.daddr2.qpn;
 		dlid = m_cm->msg.daddr2.lid;
-		m_cm->msg.saddr1.ep_map = MIC_XSOCK_DEV;
-		m_cm->msg.saddr2.ep_map = MIC_XSOCK_DEV;
+		m_cm->msg.saddr1.ep_map = smd->md->addr.ep_map;
+		m_cm->msg.saddr2.ep_map = smd->md->addr.ep_map;
 
 	/* MSS -> MSS,MXS,HOST - fabric, TX: QP2->QP1 on mpxyd and RX: QP1->QP2 on MIC */
 	} else {
@@ -1769,8 +2007,12 @@ static int mix_cm_rep_out(mcm_scif_dev_t *smd, dat_mix_cm_t *pmsg, scif_epd_t sc
 
 		/* KL to KL, QP1->QP2 and QP1<-QP2 */
 		/* update the QPt src information in CM msg, QPr updated on MIC */
-		m_cm->msg.saddr1.ep_map = MIC_SSOCK_DEV;
-		m_cm->msg.saddr2.ep_map = MIC_SSOCK_DEV;
+		m_cm->msg.saddr1.ep_map = smd->md->addr.ep_map;
+		if (smd->md->m_lid) {
+			m_cm->msg.saddr1.lid = smd->md->m_lid;
+			memcpy(&m_cm->msg.saddr1.gid[0], &m_cm->smd->md->m_gid, 16);
+		}
+		m_cm->msg.saddr2.ep_map = smd->md->addr.ep_map;
 		m_cm->msg.saddr2.qpn = htonl(m_cm->m_qp->ib_qp2->qp_num);
 		m_cm->msg.saddr2.qp_type = m_cm->m_qp->qp_attr2.qp_type;
 		m_cm->msg.saddr2.lid = m_cm->smd->md->addr.lid;
@@ -1781,7 +2023,7 @@ static int mix_cm_rep_out(mcm_scif_dev_t *smd, dat_mix_cm_t *pmsg, scif_epd_t sc
 		dqpn = m_cm->msg.daddr1.qpn;
 		dlid = m_cm->msg.daddr1.lid;
 
-		if (MXS_EP(&m_cm->msg.daddr1)) {
+		if (MXF_EP(&m_cm->msg.daddr1)) {
 			if (m_pi_create_wc_q(m_cm->m_qp, mcm_rx_entries))
 				goto err;
 
@@ -2031,7 +2273,7 @@ retry_mr:
 			" pcnt %d sg_rate %d hd %d tl %d sz %d m_idx %x\n",
 			m_qp->smd->md->mc->scif_id, m_qp->smd->entry.tid,
 			m_qp->r_entry.tid,
-			(MXS_EP(&m_qp->cm->msg.daddr1)) ? "po_pi":"po_direct",
+			(MXF_EP(&m_qp->cm->msg.daddr1)) ? "po_pi":"po_direct",
 			m_qp, m_wr, m_wr->wr.wr_id, m_wr->wr.send_flags,
 			m_qp->post_cnt,	mcm_rw_signal, m_qp->wr_hd, m_qp->wr_tl,
 			m_wr->wr.sg_list->length, m_wr->m_idx);
@@ -2219,6 +2461,12 @@ int mix_scif_recv(mcm_scif_dev_t *smd, scif_epd_t scif_ep)
 	case MIX_QP_FREE:
 		ret = mix_qp_destroy(smd, phdr);
 		break;
+	case MIX_PZ_CREATE:
+		ret = mix_pz_create(smd, (dat_mix_pz_t *)phdr);
+		break;
+	case MIX_PZ_FREE:
+		ret = mix_pz_destroy(smd, (dat_mix_pz_t *)phdr);
+		break;
 	case MIX_CQ_CREATE:
 		ret = mix_cq_create(smd, (dat_mix_cq_t *)phdr);
 		break;
@@ -2243,6 +2491,12 @@ int mix_scif_recv(mcm_scif_dev_t *smd, scif_epd_t scif_ep)
 	case MIX_PROV_ATTR:
 		ret = mix_prov_attr(smd, (dat_mix_attr_t *)phdr);
 		break;
+	case MIX_QUERY_DEVICE:
+		ret = mix_query_device(smd, (dat_mix_device_attr_t *)phdr);
+		break;
+	case MIX_QUERY_PORT:
+		ret = mix_query_port(smd, (dat_mix_port_attr_t *)phdr);
+		break;
 	case MIX_CM_REQ:
 		ret = mix_cm_req_out(smd, (dat_mix_cm_t *)phdr, scif_ep);
 		break;
diff --git a/dapl/svc/mpxy_in.c b/dapl/svc/mpxy_in.c
old mode 100644
new mode 100755
index cdfab35..adf5021
--- a/dapl/svc/mpxy_in.c
+++ b/dapl/svc/mpxy_in.c
@@ -416,7 +416,7 @@ int m_pi_prep_rcv_q(struct mcm_qp *m_qp)
 	int i;
 
 	/* MXS -> MSS or HST, PI service will be on QP1 */
-	if (MXS_EP(&m_qp->smd->md->addr) &&
+	if (MXF_EP(&m_qp->smd->md->addr) &&
 	   (MSS_EP(&m_qp->cm->msg.daddr1) || HST_EP(&m_qp->cm->msg.daddr1)))
 	        ib_qp = m_qp->ib_qp1;
 	else
@@ -535,7 +535,7 @@ static int m_pi_send_wc(struct mcm_qp *m_qp, struct mcm_wr_rx *wr_rx, int status
 		wr.wr.rdma.rkey, sge.length, wr_rx->wr.opcode);
 
 	/* MXS -> MSS or HST, PI service will be on QP1 */
-	if (MXS_EP(&m_qp->smd->md->addr) &&
+	if (MXF_EP(&m_qp->smd->md->addr) &&
 	   (MSS_EP(&m_qp->cm->msg.daddr1) || HST_EP(&m_qp->cm->msg.daddr1)))
 	        ib_qp = m_qp->ib_qp1;
 	else
@@ -722,25 +722,23 @@ void m_pi_req_event(struct mcm_qp *m_qp, struct mcm_wr_rx *wr_rx, struct ibv_wc
 {
 	mlog(4, " WR_rx[%d] %p %s complete po-addr=%p ln=%d, key=%x ctx=%Lx\n",
 		wr_rx->w_idx, wr_rx,
-		wr_rx->wr.opcode == IBV_WR_RDMA_READ ? "RR":"RW_IMM WC",
+		type == WRID_RX_RR ? "RR":"RW_IMM WC",
 		wr_rx->sg[0].addr, wr_rx->sg[0].length,
 		wr_rx->sg[0].lkey, wr_rx->context);
 
 	if (wc->status && (wc->status != IBV_WC_WR_FLUSH_ERR)) {
-		char *sbuf = (char*)wr_rx->sg[1].addr;
-
-		mlog(0," WR ERR: st %d, vn %x pst %d cmp %d qstate 0x%x\n",
-			wc->status, wc->vendor_err, m_qp->post_cnt,
-			m_qp->comp_cnt, m_qp->ib_qp2->state);
-		mlog(0, " WR ERR: wr_rx %p laddr %p=0x%x - %p=0x%x, len=%d, lkey=%x\n",
-			wr_rx,  sbuf, sbuf[0], &sbuf[wr_rx->sg[1].length],
-			sbuf[wr_rx->sg[1].length], wr_rx->sg[1].length, wr_rx->sg[1].lkey);
-		mlog(0, " WR ERR: wr_id %Lx sglist %p sge %d op %d flgs"
-			" %d idata 0x%x raddr %p rkey %x saddr %p key %x ln %d\n",
-		     wr_rx->org_id, wr_rx->sg, wr_rx->wr.num_sge,
-		     wr_rx->wr.opcode, wr_rx->wr.send_flags, wr_rx->wr.imm_data,
-		     wr_rx->wr.wr.rdma.remote_addr, wr_rx->wr.wr.rdma.rkey,
-		     wr_rx->sg[0].addr, wr_rx->sg[0]. lkey,wr_rx->sg[0].length);
+		mlog(0," WR ERR: %s st %d, vn %x rr %d wt %d WC[%d] %p %x %d\n",
+			type == WRID_RX_RR ? "RR":"RW_IMM WC",
+			wc->status, wc->vendor_err, m_qp->post_cnt_rr,
+			m_qp->post_cnt_wt, m_qp->wc_hd_rem,
+			m_qp->wrc_rem.wc_addr, m_qp->wrc_rem.wc_rkey,
+			m_qp->wrc_rem.wc_sz);
+		mlog(0, " WR ERR: wr_rx[%d] oid %Lx sge %d op %d flgs %d"
+			" imm 0x%x SRC %p %x ln %d, DST %p %x ln %d\n",
+			wr_rx->w_idx, wr_rx->org_id, wr_rx->wr.num_sge,
+		        wr_rx->wr.opcode, wr_rx->wr.send_flags, wr_rx->wr.imm_data,
+		        wr_rx->sg[0].addr, wr_rx->sg[0].lkey, wr_rx->sg[0].length,
+		        wr_rx->sg[1].addr, wr_rx->sg[1].lkey, wr_rx->sg[1].length);
 
 		/* send WC with ERR to RW initiator, hold rxlock */
 		mpxy_lock(&m_qp->rxlock);
@@ -787,6 +785,20 @@ static void m_pi_post_read(struct mcm_qp *m_qp, struct mcm_wr_rx *wr_rx)
 	/* shared proxy-in buffer, device level serialization */
 	mpxy_lock(&smd->rblock);
 
+	/* Stall if data request arrives before RTU */
+	if (m_qp->cm && m_qp->cm->state != MCM_CONNECTED && !m_qp->post_cnt_rr) {
+		mlog(1, " [%d:%d:%d] WARN: WR[%d] %p PAUSED !CONN: po-addr=%p"
+			" ln=%d, key=%x ctx=%Lx\n",
+			m_qp->smd->md->mc->scif_id, m_qp->smd->entry.tid,
+			m_qp->r_entry.tid, wr_rx->w_idx, wr_rx,
+			wr_rx->sg[0].addr, wr_rx->sg[0].length,
+			wr_rx->sg[0].lkey, wr_rx->context);
+		wr_rx->flags |= M_READ_PAUSED;
+		m_qp->stall_cnt_rr++;
+		mpxy_unlock(&smd->rblock);
+		return;
+	}
+
 	/* slice out proxy buffer for this segment */
 	l_start = ALIGN_64(smd->m_hd_r);
 	if ((l_start + l_len) > smd->m_len_r)
@@ -901,7 +913,7 @@ static void m_pi_post_read(struct mcm_qp *m_qp, struct mcm_wr_rx *wr_rx)
 	mpxy_unlock(&smd->rblock);
 
 	/* MXS -> MSS or HST, PI service will be on QP1 */
-	if (MXS_EP(&m_qp->smd->md->addr) &&
+	if (MXF_EP(&m_qp->smd->md->addr) &&
 	   (MSS_EP(&m_qp->cm->msg.daddr1) || HST_EP(&m_qp->cm->msg.daddr1)))
 	        ib_qp = m_qp->ib_qp1;
 	else
@@ -961,7 +973,7 @@ buf_err:
 
 void m_pi_rcv_event(struct mcm_qp *m_qp, wrc_idata_t *wrc)
 {
-	mlog(8," WRC id %x, type %x, flags %x\n", wrc->id, wrc->type, wrc->flags);
+	mlog(8," WRC id %d, type %x, flags %x\n", wrc->id, wrc->type, wrc->flags);
 	if (wrc->type == M_WR_TYPE) {
 		struct mcm_wr_rx *wr_rx;
 
@@ -974,9 +986,10 @@ void m_pi_rcv_event(struct mcm_qp *m_qp, wrc_idata_t *wrc)
 		mcm_ntoh_wr_rx(wr_rx); /* received in network order, convert */
 		wr_rx->context = (uint64_t)(uintptr_t)m_qp;  /* local side QP context */
 
-		mlog(8," WR_rx[%d] %p org_id %Lx wc_tl_rem %d flgs %x wr.wr_id %Lx imm 0x%x\n",
+		mlog(8," WR_rx[%d] %p id %Lx wc_tl_rem %d flgs %x wr.wr_id %Lx imm 0x%x raddr %Lx,%x,%d\n",
 			wrc->id, wr_rx, wr_rx->org_id, wr_rx->w_idx, wr_rx->flags,
-			wr_rx->wr.wr_id, wr_rx->wr.imm_data);
+			wr_rx->wr.wr_id, wr_rx->wr.imm_data,
+			wr_rx->sg[0].addr, wr_rx->sg[0].lkey, wr_rx->sg[0].length);
 
 		mpxy_lock(&m_qp->rxlock);
 		m_qp->wc_tl_rem = wr_rx->w_idx; /* remote WC tail update in WR */
@@ -1089,7 +1102,7 @@ retry:
 			r_wr.wr_id = (uint64_t)(uintptr_t) m_qp;
 
 			/* MXS -> MSS or HST, PI service will be on QP1 */
-			if (MXS_EP(&m_qp->smd->md->addr) &&
+			if (MXF_EP(&m_qp->smd->md->addr) &&
 			   (MSS_EP(&m_qp->cm->msg.daddr1) || HST_EP(&m_qp->cm->msg.daddr1)))
 			        ib_qp = m_qp->ib_qp1;
 			else
@@ -1100,7 +1113,7 @@ retry:
 				err = ibv_post_recv(ib_qp, &r_wr, &r_err);
 				if (err) {
 					mlog(0,"ERR: qp %p (%s) qpn %x ibv_post_recv ret = %d %s\n",
-						m_qp, (MXS_EP(&m_qp->smd->md->addr) &&
+						m_qp, (MXF_EP(&m_qp->smd->md->addr) &&
 						MSS_EP(&m_qp->cm->msg.daddr1)) ? "QP1":"QP2",
 						m_qp->ib_qp2 ?
 						m_qp->ib_qp2->qp_num:m_qp->ib_qp1->qp_num,
@@ -1129,18 +1142,10 @@ void m_pi_pending_wr(struct mcm_qp *m_qp, int *data)
 	struct mcm_wr_rx *wr_rx;
 	int wr_idx, wr_max, wr_cnt;
 
-	if (m_qp->cm && m_qp->cm->state != MCM_CONNECTED) {
-		if (m_qp->post_cnt_wt) {
-			mlog(8," !CONN: qp %p cm %p %s tl_r %d wt_tl_r %d hd_r %d pp %d st %d data %d\n",
-				m_qp, m_qp->cm, m_qp->cm ? mcm_state_str(m_qp->cm->state):"",
-				m_qp->wr_tl_r, m_qp->wr_tl_r_wt,
-				m_qp->wr_hd_r, m_qp->post_cnt_wt,
-				m_qp->stall_cnt_rr, *data);
-		}
-		return;
-	}
-
 	mpxy_lock(&m_qp->rxlock);
+	if (m_qp->cm && m_qp->cm->state != MCM_CONNECTED && !m_qp->post_cnt_rr)
+		goto done;
+
 	wr_max = mcm_wr_max;
 	wr_idx = m_qp->wr_tl_r_wt; /* last write_to marker */
 	wr_cnt = 0;
diff --git a/dapl/svc/mpxy_out.c b/dapl/svc/mpxy_out.c
old mode 100644
new mode 100755
index 8df32f7..eff81fc
--- a/dapl/svc/mpxy_out.c
+++ b/dapl/svc/mpxy_out.c
@@ -103,7 +103,7 @@ int m_po_create_bpool(struct mcm_qp *m_qp, int entries)
 				 m_qp, entries * sizeof(mcm_wr_rx_t), entries);
 			goto err;
 		}
-		memset(m_qp->wr_buf, 0, entries * sizeof(mcm_wr_rx_t));
+		memset(m_qp->wr_buf_rx, 0, entries * sizeof(mcm_wr_rx_t));
 
 		m_qp->wr_buf_rx_mr = ibv_reg_mr(m_qp->smd->md->pd, (void*)m_qp->wr_buf_rx,
 						entries * sizeof(mcm_wr_rx_t),
@@ -313,7 +313,7 @@ static int m_po_send_pi(struct mcm_qp *m_qp, struct mcm_wr *m_wr, int wr_idx)
 
 
 	/* MXS -> MSS or HST, PI service will be on QP1 */
-	if (MXS_EP(&m_qp->smd->md->addr) &&
+	if (MXF_EP(&m_qp->smd->md->addr) &&
 	   (MSS_EP(&m_qp->cm->msg.daddr1) || HST_EP(&m_qp->cm->msg.daddr1)))
 	        ib_qp = m_qp->ib_qp1;
 	else
@@ -443,15 +443,15 @@ void m_po_pending_wr(struct mcm_qp *m_qp, int *data)
 					m_wr->wr.send_flags &= ~IBV_SEND_SIGNALED;
 			}
 
-			if (!(MXS_EP(&m_qp->cm->msg.daddr1)) &&
+			if (!(MXF_EP(&m_qp->cm->msg.daddr1)) &&
 			     (m_wr->wr.send_flags & IBV_SEND_SIGNALED)) {
 				m_qp->post_sig_cnt++;
 
-				mlog(0x10, "[%d:%d:%d] %s_RW_post_sig: qp %p wr %p wr_id %p flgs 0x%x,"
+				mlog(0x10, "[%d:%d:%d] %s_RW_post_sig: WR[%d] qp %p wr_id %p flgs 0x%x,"
 					" pcnt %d sg_rate %d hd %d tl %d sz %d m_idx %x\n",
 					m_qp->smd->md->mc->scif_id, m_qp->smd->entry.tid, m_qp->r_entry.tid,
-					(MXS_EP(&m_qp->cm->msg.daddr1)) ? "po_pi":"po_direct",
-					m_qp, m_wr, m_wr->wr.wr_id, m_wr->wr.send_flags,
+					(MXF_EP(&m_qp->cm->msg.daddr1)) ? "po_pi":"po_direct",
+					m_wr->w_idx, m_qp, m_wr->wr.wr_id, m_wr->wr.send_flags,
 					m_qp->post_cnt,	mcm_rw_signal, m_qp->wr_hd, m_qp->wr_tl,
 					m_wr->wr.sg_list->length, m_wr->m_idx);
 #if MCM_PROFILE
@@ -463,7 +463,7 @@ void m_po_pending_wr(struct mcm_qp *m_qp, int *data)
 			m_wr->wr.wr_id = WRID_SET(m_wr, WRID_TX_RW);
 
 			errno = 0;
-			if (MXS_EP(&m_qp->cm->msg.daddr1)) /* remote PI */
+			if (MXF_EP(&m_qp->cm->msg.daddr1)) /* remote PI */
 				ret = m_po_send_pi(m_qp, m_wr, wr_idx);
 			else
 				ret = ibv_post_send(m_qp->ib_qp2, &m_wr->wr, &bad_wr);
@@ -475,6 +475,10 @@ void m_po_pending_wr(struct mcm_qp *m_qp, int *data)
 				wc.byte_len = m_wr->sg->length;
 				wc.status = ret ? IBV_WC_GENERAL_ERR : IBV_WC_SUCCESS;
 				wc.opcode = m_wr->wr.opcode == IBV_WR_SEND ? IBV_WC_SEND:IBV_WC_RDMA_WRITE;
+				if (m_wr->wr.opcode == IBV_WR_RDMA_WRITE_WITH_IMM)
+					wc.wc_flags = IBV_WC_WITH_IMM;
+				else
+					wc.wc_flags = 0;
 				wc.vendor_err = ret;
 				mix_dto_event(m_qp->ib_qp2->send_cq->cq_context, &wc, 1);
 			}
@@ -789,13 +793,14 @@ retry_mr:
 						mpxy_unlock(&smd->tblock);
 						goto bail;
 					}
-					mlog(0x10, "[%d:%d:%d] %s_RF_post_sig: qp %p wr %p wr_id %p flgs 0x%x,"
-						" pcnt %d sg_rate %d hd %d tl %d sz %d m_idx %x\n",
-						m_qp->smd->md->mc->scif_id, m_qp->smd->entry.tid,
-						m_qp->r_entry.tid,
-						(MXS_EP(&m_qp->cm->msg.daddr1)) ? "po_pi":"po_direct",
+					mlog(0x10, "[%d:%d:%d] %s_RF_post_sig: WR[%d] qp %p wr_id %p flgs 0x%x,"
+						" sg_rate %d hd %d tl %d sz %d m_idx %x\n",
+						m_qp->smd->md->mc->scif_id, m_qp->smd->entry.tid, m_qp->r_entry.tid,
 						m_qp, m_wr, pmsg->wr.wr_id, m_wr->wr.send_flags,
 						m_qp->post_cnt,	mcm_rw_signal, m_qp->wr_hd, m_qp->wr_tl,
+						(MXF_EP(&m_qp->cm->msg.daddr1)) ? "po_pi":"po_direct",
+						m_wr->w_idx, m_qp, pmsg->wr.wr_id, m_wr->wr.send_flags,
+						mcm_rw_signal, m_qp->wr_hd, m_qp->wr_tl,
 						m_wr->wr.sg_list->length, m_wr->m_idx);
 				}
 
@@ -899,9 +904,13 @@ bail:
 		struct dat_mix_wc wc;
 
 		wc.wr_id = pmsg->wr.wr_id;
-		wc.byte_len = 0;
+		wc.byte_len = pmsg->sge[0].length;
 		wc.status = wc_err;
 		wc.opcode = pmsg->wr.opcode == IBV_WR_SEND ? IBV_WC_SEND:IBV_WC_RDMA_WRITE;
+		if (pmsg->wr.opcode == IBV_WR_RDMA_WRITE_WITH_IMM)
+			wc.wc_flags = IBV_WC_WITH_IMM;
+		else
+			wc.wc_flags = 0;
 		wc.vendor_err = ret;
 		mix_dto_event(m_qp->ib_qp2->send_cq->cq_context, &wc, 1);
 	}
@@ -929,15 +938,18 @@ void m_po_wc_event(struct mcm_qp *m_qp, struct mcm_wc_rx *wc_rx, int wc_idx)
 		     wc_rx->wr_idx, m_qp->wr_end);
 		return;
 	}
-	m_cq = m_qp->m_cq_rx;
+	m_cq = m_qp->m_cq_tx;
 
 	if (wc_rx->wc.status == IBV_WC_SUCCESS) {
 		mlog(8," WC_RX: SUCCESS m_wr %p idx %d=%d flags 0x%x \n",
 			m_wr, m_wr->w_idx, wc_rx->wr_idx, m_wr->flags);
 		if (m_wr->flags & M_SEND_CN_SIG) {
+			m_wr->flags &= ~M_SEND_CN_SIG;
 			wc_ev.wr_id = m_wr->org_id;
 			wc_ev.status = IBV_WC_SUCCESS;
 			wc_ev.byte_len = wc_rx->wc.byte_len;
+			wc_ev.opcode = wc_rx->wc.opcode;
+			wc_ev.wc_flags = wc_rx->wc.wc_flags;
 			event++;
 		}
 	}
@@ -1053,6 +1065,7 @@ retry:
 		       i+1, ret, wc[i].wr_id, wc[i].status, wc[i].opcode,
 		       wc[i].vendor_err, wc[i].byte_len, wr_type);
 
+		/* Proxy_in -> */
 		if ((wr_type == WRID_RX_RR) || (wr_type == WRID_RX_RW_IMM)) {
 			m_wr_rx = (struct mcm_wr_rx *)WRID_ADDR(wc[i].wr_id);
 			assert(m_wr_rx);
@@ -1070,19 +1083,26 @@ retry:
 			       wc[i].vendor_err, wc[i].byte_len, wr_type);
 			continue;
 		}
+		/* Proxy_out ->  */
 		m_wr = (struct mcm_wr *)WRID_ADDR(wc[i].wr_id);
 		m_qp = (struct mcm_qp *)m_wr->context;
-		if (MSS_EP(&m_qp->cm->msg.daddr1))
+		if (!MXF_EP(&m_qp->cm->msg.daddr1))
 			m_qp->comp_cnt++;
 		MCNTR(m_qp->smd->md, MCM_QP_WRITE_DONE);
 
-		mlog(8," wr_id[%d of %d] m_wr %p m_qp %p\n", i, ret, m_wr, m_qp);
+		mlog(8," wr_id[%d of %d] m_wr %p m_qp %p\n", i+1, ret, m_wr, m_qp);
 
 		if (wc[i].status == IBV_WC_SUCCESS) {
 			if (m_wr->flags & M_SEND_CN_SIG) {
+				m_wr->flags &= ~M_SEND_CN_SIG;
 				wc_ev[num].wr_id = m_wr->org_id;
 				wc_ev[num].status = IBV_WC_SUCCESS;
 				wc_ev[num].byte_len = wc[i].byte_len;
+				wc_ev[num].opcode = m_wr->wr.opcode == IBV_WR_SEND ? IBV_WC_SEND:IBV_WC_RDMA_WRITE;
+				if (m_wr->wr.opcode == IBV_WR_RDMA_WRITE_WITH_IMM)
+					wc_ev[num].wc_flags = IBV_WC_WITH_IMM;
+				else
+					wc_ev[num].wc_flags = 0;
 				num++;
 			}
 		}
@@ -1112,7 +1132,7 @@ retry:
 		}
 
 		/* Can move PO buffer tail if no peer PI service */
-		if (!MXS_EP(&m_qp->cm->msg.daddr1)) {
+		if (!MXF_EP(&m_qp->cm->msg.daddr1)) {
 #if MCM_PROFILE
 			mcm_qp_prof_ts( m_qp, MCM_QP_IB_RW,
 					m_wr->wr.wr.atomic.swap,
diff --git a/dapl/svc/mpxyd.c b/dapl/svc/mpxyd.c
old mode 100644
new mode 100755
index 80f597c..b04d823
--- a/dapl/svc/mpxyd.c
+++ b/dapl/svc/mpxyd.c
@@ -221,11 +221,23 @@ static struct ibv_context *open_ib_device(struct mcm_ib_dev *md, char *name, int
 			}
 			else {
 				char val[64];
+				struct ibv_device_attr device_attr;
 
+				if (ibv_query_device(ibctx, &device_attr)) {
+					mlog(0,"ERR ibv_device, %s\n", strerror(errno));
+					ibv_close_device(ibctx);
+					ibctx = NULL;
+					goto bail;
+				}
+
+				md->dev_attr.rd_atom_in = device_attr.max_qp_rd_atom;
+				md->dev_attr.rd_atom_out = device_attr.max_qp_init_rd_atom;
 				md->ibdev = iblist[i];
 				if (!rd_dev_file(md->ibdev->ibdev_path,
 				    "device/numa_node", val, sizeof val))
 					md->numa_node = atoi(val);
+				else if (!strncmp(name, "scif", 4))
+					md->numa_node = md->mc->numa_node; /* intra-node, MSS */
 				else
 					mlog(0," ERR ibdev %s numa_node at "
 					     "%s/device/numa_node unreadable\n",
@@ -444,6 +456,7 @@ void mpxy_destroy_smd(mcm_scif_dev_t *smd)
 	mpxy_lock_destroy(&smd->cqlock);
 	mpxy_lock_destroy(&smd->cqrlock);
 	mpxy_lock_destroy(&smd->mrlock);
+	mpxy_lock_destroy(&smd->pzlock);
 	mpxy_lock_destroy(&smd->evlock);
 	mpxy_lock_destroy(&smd->tblock);
 	mpxy_lock_destroy(&smd->rblock);
@@ -616,6 +629,7 @@ static mcm_scif_dev_t *mcm_create_smd(mcm_ib_dev_t *md, scif_epd_t op_ep, scif_e
 	mpxy_lock_init(&smd->cqlock, NULL);  /* cq list */
 	mpxy_lock_init(&smd->cqrlock, NULL);  /* cq rx list */
 	mpxy_lock_init(&smd->mrlock, NULL);  /* mr list */
+	mpxy_lock_init(&smd->pzlock, NULL);  /* pz list */
 	mpxy_lock_init(&smd->evlock, NULL);  /* DTO event, multi-threads */
 	mpxy_lock_init(&smd->tblock, NULL);  /* tx proxy buffer, shared across all QP's */
 	mpxy_lock_init(&smd->rblock, NULL);  /* rx proxy buffer, shared across all QP's */
@@ -628,6 +642,7 @@ static mcm_scif_dev_t *mcm_create_smd(mcm_ib_dev_t *md, scif_epd_t op_ep, scif_e
 	init_list(&smd->cqlist);
 	init_list(&smd->cqrlist);
 	init_list(&smd->mrlist);
+	init_list(&smd->pzlist);
 
 	return smd;
 err:
@@ -657,8 +672,8 @@ mcm_scif_dev_t *mix_open_device(dat_mix_open_t *msg, scif_epd_t op_ep, scif_epd_
 	mcm_scif_dev_t *smd = NULL;
 	int i, ret;
 
-	mlog(8, " Open IB device - %s, IB port %d, scif_node %d EPs %d %d %d op_msg %p\n",
-		msg->name, msg->port, node, op_ep, tx_ep, ev_ep, msg);
+	mlog(8, " IB device - %s, IB port %d, scif_node %d EPs %d %d %d op_msg %p lid %x\n",
+		msg->name, msg->port, node, op_ep, tx_ep, ev_ep, msg, ntohs(msg->dev_addr.lid));
 
 	mc = &mcm_client_list[node];
 
@@ -748,7 +763,9 @@ mcm_scif_dev_t *mix_open_device(dat_mix_open_t *msg, scif_epd_t op_ep, scif_epd_
 	memset(md->cntrs, 0, sizeof(uint64_t) * MCM_ALL_COUNTERS);
 	md->mc = mc;
 	md->port = msg->port;
+	memcpy(&md->addr, &msg->dev_addr, sizeof(dat_mcm_addr_t));
 	md->ibctx = open_ib_device(md, msg->name, msg->port);
+	md->addr.ep_map = msg->dev_addr.ep_map;
 
 	if ((!md->ibctx) || mcm_init_cm_service(md)) {
 		mcm_destroy_md(md);
@@ -769,8 +786,19 @@ found:
 
 	msg->hdr.req_id = smd->entry.tid;
 	msg->hdr.status = MIX_SUCCESS;
+	msg->dev_attr.rd_atom_in = md->dev_attr.rd_atom_in;
+	msg->dev_attr.rd_atom_out = md->dev_attr.rd_atom_out;
+	if (!(mcm_ib_inline_data(md->ibctx))  || !mcm_ib_inline)
+		msg->dev_attr.max_inline = 0;
+
 	memcpy(&md->dev_attr, &msg->dev_attr, sizeof(dat_mix_dev_attr_t));
-	memcpy(&msg->dev_addr, &md->addr, sizeof(dat_mcm_addr_t));
+	memcpy(&msg->dev_addr, &md->addr, sizeof(dat_mcm_addr_t)); /* proxy CM lid */
+
+	/* intra-node: restore MIC lid, gid */
+	if (md->m_lid) {
+		msg->dev_addr.lid = md->m_lid;
+		memcpy(msg->dev_addr.gid, md->m_gid, 16);
+	}
 err:
 	if (!smd) {
 		mlog(0, " ERR: mix_open_device failed for %s - %d\n", msg->name, msg->port);
@@ -789,9 +817,9 @@ err:
 		goto bail;
 	}
 
-	mlog(1, " MIC client: open mdev[%d] %p smd %p mic%d[%d] -> %s[%d] port %d - %s\n",
+	mlog(1, " MIC client: mdev[%d] %p smd %p mic%d[%d] -> %s[%d] port %d lid %x %s\n",
 		md->smd_list.tid, md, smd, mc->scif_id-1, mc->numa_node, msg->name,
-		md->numa_node, msg->port, md->addr.ep_map == MIC_SSOCK_DEV ? "MSS":"MXS");
+		md->numa_node, msg->port, ntohs(msg->dev_addr.lid), mcm_map_str(md->addr.ep_map));
 bail:
 	mpxy_unlock(&mc->oplock);
 	mpxy_unlock(&mc->cmlock);
@@ -1218,7 +1246,7 @@ static void mpxy_server(void)
 		set.revents = 0;
 		mlog(0x8, "Server sleep\n");
 		poll(&set, 1, -1); /* sleep */
-		mlog(0x8, "Server wake, cpu_id=%d\n");
+		mlog(0x8, "Server wake, cpu_id=%d\n", cpu_id);
 		/* process listens */
 		if (mcm_poll(scif_listen_ep, POLLIN) == POLLIN)
 			mix_scif_accept(scif_listen_ep);
@@ -1481,7 +1509,7 @@ void mcm_ib_dev_log(struct mcm_ib_dev *md)
 		md->mc->scif_id, md, ntohs(md->addr.lid), md->port,
 		inet_ntop(AF_INET6, md->addr.gid, gid_str, sizeof(gid_str)),
 		md->qp->qp_num, md->mc->scif_id - 1, md->ibdev->name,
-		md->addr.ep_map == MIC_SSOCK_DEV ? "MSS":"MXS", md->mc->ver);
+		mcm_map_str(md->addr.ep_map), md->mc->ver);
 }
 
 static int check_io_run;
diff --git a/dapl/svc/mpxyd.h b/dapl/svc/mpxyd.h
old mode 100644
new mode 100755
index 58312e4..e444f5f
--- a/dapl/svc/mpxyd.h
+++ b/dapl/svc/mpxyd.h
@@ -117,6 +117,8 @@ typedef struct mcm_ib_dev {
 	uint64_t		*ports;	/* SCIF device open clients, cm_id*/
 	struct dat_mcm_addr	addr;
 	uint16_t		lid;
+	uint16_t		m_lid; /* intra-node, ibscif HST lid != MICx lids */
+	uint8_t			m_gid[16];
 	struct dat_mix_dev_attr	dev_attr; /* provided with mix_open */
 	int			s_hd;
 	int			s_tl;
@@ -265,6 +267,14 @@ typedef struct mcm_cq {
 
 } mcm_cq_t;
 
+/*  DAPL MCM PZ object */
+typedef struct mcm_pz {
+	LLIST_ENTRY		entry;
+	struct mcm_scif_dev	*smd;
+	struct ibv_pd		*ib_pd;
+	uint64_t		pd_ctx; /* MIC client */
+} mcm_pz_t;
+
 /*  DAPL MCM MR object, id in entry */
 typedef struct mcm_mr {
 	LLIST_ENTRY		entry;
@@ -308,6 +318,7 @@ typedef struct mcm_scif_dev {
 	LLIST_ENTRY		cqlist;		/* client cq create list */
 	LLIST_ENTRY		cqrlist;	/* mpxyd cq list for proxy in service */
 	LLIST_ENTRY		mrlist;		/* mr list */
+	LLIST_ENTRY		pzlist;		/* pz list */
 	mpxy_lock_t		clock;		/* LOCKS: cm lock */
 	mpxy_lock_t		llock;		/* listen lock */
 	mpxy_lock_t		plock;		/* port space lock */
@@ -316,6 +327,7 @@ typedef struct mcm_scif_dev {
 	mpxy_lock_t		cqlock;		/* cq tx lock */
 	mpxy_lock_t		cqrlock;	/* cq rx lock */
 	mpxy_lock_t		mrlock;		/* mr lock */
+	mpxy_lock_t		pzlock;		/* pz lock */
 	mpxy_lock_t		evlock;		/* DTO event lock, multi-threads on ev_ep */
 	int			destroy;	/* destroying device, all resources */
 	int			ref_cnt;	/* child references */
@@ -807,7 +819,7 @@ static inline void const_ib_rr(struct ibv_send_wr *iwr, struct dat_mix_wr *mwr,
 static inline void mcm_pr_addrs(int lvl, struct dat_mcm_msg *msg, int state, int in)
 {
 	if (in) {
-		if (MXS_EP(&msg->daddr1) && MXS_EP(&msg->saddr1)) {
+		if (MXF_EP(&msg->daddr1) && MXF_EP(&msg->saddr1)) {
 			mlog(lvl, " QPr_t addr2: %s 0x%x %x 0x%x %s <- QPt_r addr2: 0x%x %x 0x%x %s\n",
 				mcm_state_str(state), htons(msg->daddr2.lid),
 				htonl(msg->daddr2.qpn), htons(msg->dport),
@@ -829,7 +841,7 @@ static inline void mcm_pr_addrs(int lvl, struct dat_mcm_msg *msg, int state, int
 				htons(msg->sport), mcm_map_str(msg->saddr1.ep_map));
 		}
 	} else {
-		if (MXS_EP(&msg->saddr1) && MXS_EP(&msg->daddr1)) {
+		if (MXF_EP(&msg->saddr1) && MXF_EP(&msg->daddr1)) {
 			mlog(lvl, " QPr_t addr2: %s 0x%x %x 0x%x %s -> QPt_r addr2: 0x%x %x 0x%x %s\n",
 				mcm_state_str(state), htons(msg->saddr2.lid),
 				htonl(msg->saddr2.qpn), htons(msg->sport),
diff --git a/dapl/svc/util.c b/dapl/svc/util.c
old mode 100644
new mode 100755
index e82c1a9..8b5db68
--- a/dapl/svc/util.c
+++ b/dapl/svc/util.c
@@ -422,10 +422,11 @@ void mpxy_set_options( int debug_mode )
 			mcm_affinity_base_hca = atoi(value);
 		else if (!strcasecmp("mcm_affinity_base_mic", opt))
 			mcm_affinity_base_mic = atoi(value);
-		else if (!strcasecmp("mcm_ib_inline", opt))
+		else if (!strcasecmp("mcm_ib_inline", opt)) {
 			mcm_ib_inline = atoi(value);
 			if (mcm_ib_inline)
 				mcm_ib_inline = max(atoi(value), MCM_IB_INLINE);
+		}
 		else if (!strcasecmp("mcm_perf_profile", opt))
 			mcm_profile = atoi(value);
 		else if (!strcasecmp("mcm_eager_completion", opt))
diff --git a/dapl/udapl/dapl_cno_create.c b/dapl/udapl/dapl_cno_create.c
old mode 100644
new mode 100755
diff --git a/dapl/udapl/dapl_cno_free.c b/dapl/udapl/dapl_cno_free.c
old mode 100644
new mode 100755
diff --git a/dapl/udapl/dapl_cno_modify_agent.c b/dapl/udapl/dapl_cno_modify_agent.c
old mode 100644
new mode 100755
diff --git a/dapl/udapl/dapl_cno_query.c b/dapl/udapl/dapl_cno_query.c
old mode 100644
new mode 100755
diff --git a/dapl/udapl/dapl_cno_wait.c b/dapl/udapl/dapl_cno_wait.c
old mode 100644
new mode 100755
diff --git a/dapl/udapl/dapl_evd_clear_unwaitable.c b/dapl/udapl/dapl_evd_clear_unwaitable.c
old mode 100644
new mode 100755
diff --git a/dapl/udapl/dapl_evd_create.c b/dapl/udapl/dapl_evd_create.c
old mode 100644
new mode 100755
diff --git a/dapl/udapl/dapl_evd_disable.c b/dapl/udapl/dapl_evd_disable.c
old mode 100644
new mode 100755
diff --git a/dapl/udapl/dapl_evd_enable.c b/dapl/udapl/dapl_evd_enable.c
old mode 100644
new mode 100755
diff --git a/dapl/udapl/dapl_evd_modify_cno.c b/dapl/udapl/dapl_evd_modify_cno.c
old mode 100644
new mode 100755
diff --git a/dapl/udapl/dapl_evd_query.c b/dapl/udapl/dapl_evd_query.c
old mode 100644
new mode 100755
diff --git a/dapl/udapl/dapl_evd_set_unwaitable.c b/dapl/udapl/dapl_evd_set_unwaitable.c
old mode 100644
new mode 100755
diff --git a/dapl/udapl/dapl_evd_wait.c b/dapl/udapl/dapl_evd_wait.c
old mode 100644
new mode 100755
diff --git a/dapl/udapl/dapl_init.c b/dapl/udapl/dapl_init.c
old mode 100644
new mode 100755
diff --git a/dapl/udapl/dapl_lmr_create.c b/dapl/udapl/dapl_lmr_create.c
old mode 100644
new mode 100755
diff --git a/dapl/udapl/libdaplomcm.map b/dapl/udapl/libdaplomcm.map
old mode 100644
new mode 100755
diff --git a/dapl/udapl/linux/dapl_osd.c b/dapl/udapl/linux/dapl_osd.c
old mode 100644
new mode 100755
diff --git a/dapl/udapl/linux/dapl_osd.h b/dapl/udapl/linux/dapl_osd.h
old mode 100644
new mode 100755
diff --git a/dat/common/dat_dictionary.c b/dat/common/dat_dictionary.c
old mode 100644
new mode 100755
diff --git a/dat/common/dat_dictionary.h b/dat/common/dat_dictionary.h
old mode 100644
new mode 100755
diff --git a/dat/common/dat_dr.c b/dat/common/dat_dr.c
old mode 100644
new mode 100755
diff --git a/dat/common/dat_dr.h b/dat/common/dat_dr.h
old mode 100644
new mode 100755
diff --git a/dat/common/dat_init.c b/dat/common/dat_init.c
old mode 100644
new mode 100755
diff --git a/dat/common/dat_init.h b/dat/common/dat_init.h
old mode 100644
new mode 100755
diff --git a/dat/common/dat_sr.h b/dat/common/dat_sr.h
old mode 100644
new mode 100755
diff --git a/dat/common/dat_strerror.c b/dat/common/dat_strerror.c
old mode 100644
new mode 100755
diff --git a/dat/include/dat2/dat_error.h b/dat/include/dat2/dat_error.h
old mode 100644
new mode 100755
diff --git a/dat/include/dat2/dat_platform_specific.h b/dat/include/dat2/dat_platform_specific.h
old mode 100644
new mode 100755
diff --git a/dat/include/dat2/dat_registry.h b/dat/include/dat2/dat_registry.h
old mode 100644
new mode 100755
diff --git a/dat/include/dat2/dat_vendor_specific.h b/dat/include/dat2/dat_vendor_specific.h
old mode 100644
new mode 100755
diff --git a/dat/include/dat2/udat_config.h b/dat/include/dat2/udat_config.h
old mode 100644
new mode 100755
diff --git a/dat/include/dat2/udat_vendor_specific.h b/dat/include/dat2/udat_vendor_specific.h
old mode 100644
new mode 100755
diff --git a/dat/udat/linux/dat_osd.c b/dat/udat/linux/dat_osd.c
old mode 100644
new mode 100755
diff --git a/dat/udat/linux/dat_osd.h b/dat/udat/linux/dat_osd.h
old mode 100644
new mode 100755
diff --git a/dat/udat/udat_api.c b/dat/udat/udat_api.c
old mode 100644
new mode 100755
diff --git a/dat/udat/udat_sr_parser.c b/dat/udat/udat_sr_parser.c
old mode 100644
new mode 100755
diff --git a/dat/udat/udat_sr_parser.h b/dat/udat/udat_sr_parser.h
old mode 100644
new mode 100755
diff --git a/doc/dat.conf b/doc/dat.conf
old mode 100644
new mode 100755
index a0ab015..c3794e7
--- a/doc/dat.conf
+++ b/doc/dat.conf
@@ -38,6 +38,7 @@ ofa-v2-mcm-1 u2.0 nonthreadsafe default libdaplomcm.so.2 dapl.2.0 "mlx4_0 1" ""
 ofa-v2-mcm-2 u2.0 nonthreadsafe default libdaplomcm.so.2 dapl.2.0 "mlx4_0 2" ""
 ofa-v2-scif0 u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 "scif0 1" ""
 ofa-v2-scif0-u u2.0 nonthreadsafe default libdaploucm.so.2 dapl.2.0 "scif0 1" ""
+ofa-v2-scif0-m u2.0 nonthreadsafe default libdaplomcm.so.2 dapl.2.0 "scif0 1" ""
 ofa-v2-mic0 u2.0 nonthreadsafe default libdaplofa.so.2 dapl.2.0 "mic0:ib 1" ""
 ofa-v2-mlx4_0-1s u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 "mlx4_0 1" ""
 ofa-v2-mlx4_0-2s u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 "mlx4_0 2" ""
@@ -65,4 +66,7 @@ ofa-v2-qib0-1s u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 "qib0 1" ""
 ofa-v2-qib0-2s u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 "qib0 2" ""
 ofa-v2-qib1-1s u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 "qib1 1" ""
 ofa-v2-qib1-2s u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 "qib1 2" ""
-
+ofa-v2-qib0-1m u2.0 nonthreadsafe default libdaplomcm.so.2 dapl.2.0 "qib0 1" ""
+ofa-v2-qib0-2m u2.0 nonthreadsafe default libdaplomcm.so.2 dapl.2.0 "qib0 2" ""
+ofa-v2-qib1-1m u2.0 nonthreadsafe default libdaplomcm.so.2 dapl.2.0 "qib1 1" ""
+ofa-v2-qib1-2m u2.0 nonthreadsafe default libdaplomcm.so.2 dapl.2.0 "qib1 2" ""
diff --git a/doc/mpxyd.conf b/doc/mpxyd.conf
old mode 100644
new mode 100755
diff --git a/m4/libtool.m4 b/m4/libtool.m4
old mode 100644
new mode 100755
diff --git a/m4/ltoptions.m4 b/m4/ltoptions.m4
old mode 100644
new mode 100755
diff --git a/m4/ltsugar.m4 b/m4/ltsugar.m4
old mode 100644
new mode 100755
diff --git a/m4/ltversion.m4 b/m4/ltversion.m4
old mode 100644
new mode 100755
diff --git a/m4/lt~obsolete.m4 b/m4/lt~obsolete.m4
old mode 100644
new mode 100755
diff --git a/man/dapltest.1 b/man/dapltest.1
old mode 100644
new mode 100755
diff --git a/man/dat.conf.5 b/man/dat.conf.5
old mode 100644
new mode 100755
diff --git a/mpxyd.init.in b/mpxyd.init.in
old mode 100644
new mode 100755
diff --git a/test/dapltest/README b/test/dapltest/README
old mode 100644
new mode 100755
diff --git a/test/dapltest/cmd/dapl_fft_cmd.c b/test/dapltest/cmd/dapl_fft_cmd.c
old mode 100644
new mode 100755
diff --git a/test/dapltest/cmd/dapl_getopt.c b/test/dapltest/cmd/dapl_getopt.c
old mode 100644
new mode 100755
diff --git a/test/dapltest/cmd/dapl_limit_cmd.c b/test/dapltest/cmd/dapl_limit_cmd.c
old mode 100644
new mode 100755
diff --git a/test/dapltest/cmd/dapl_main.c b/test/dapltest/cmd/dapl_main.c
old mode 100644
new mode 100755
diff --git a/test/dapltest/cmd/dapl_netaddr.c b/test/dapltest/cmd/dapl_netaddr.c
old mode 100644
new mode 100755
diff --git a/test/dapltest/cmd/dapl_params.c b/test/dapltest/cmd/dapl_params.c
old mode 100644
new mode 100755
diff --git a/test/dapltest/cmd/dapl_performance_cmd.c b/test/dapltest/cmd/dapl_performance_cmd.c
old mode 100644
new mode 100755
diff --git a/test/dapltest/cmd/dapl_qos_util.c b/test/dapltest/cmd/dapl_qos_util.c
old mode 100644
new mode 100755
diff --git a/test/dapltest/cmd/dapl_quit_cmd.c b/test/dapltest/cmd/dapl_quit_cmd.c
old mode 100644
new mode 100755
index d8930e4..ba8e708
--- a/test/dapltest/cmd/dapl_quit_cmd.c
+++ b/test/dapltest/cmd/dapl_quit_cmd.c
@@ -46,7 +46,7 @@ DT_Quit_Cmd_Parse(Quit_Cmd_t * cmd,
 	int c;
 
 	for (;;) {
-		c = DT_mygetopt_r(my_argc, my_argv, "ds:D:R:n", opts);
+		c = DT_mygetopt_r(my_argc, my_argv, "ds:D:R:n:", opts);
 		if (c == EOF) {
 			break;
 		}
diff --git a/test/dapltest/cmd/dapl_server_cmd.c b/test/dapltest/cmd/dapl_server_cmd.c
old mode 100644
new mode 100755
diff --git a/test/dapltest/cmd/dapl_transaction_cmd.c b/test/dapltest/cmd/dapl_transaction_cmd.c
old mode 100644
new mode 100755
diff --git a/test/dapltest/common/dapl_endian.c b/test/dapltest/common/dapl_endian.c
old mode 100644
new mode 100755
diff --git a/test/dapltest/common/dapl_global.c b/test/dapltest/common/dapl_global.c
old mode 100644
new mode 100755
diff --git a/test/dapltest/common/dapl_performance_cmd_util.c b/test/dapltest/common/dapl_performance_cmd_util.c
old mode 100644
new mode 100755
diff --git a/test/dapltest/common/dapl_quit_cmd_util.c b/test/dapltest/common/dapl_quit_cmd_util.c
old mode 100644
new mode 100755
diff --git a/test/dapltest/common/dapl_transaction_cmd_util.c b/test/dapltest/common/dapl_transaction_cmd_util.c
old mode 100644
new mode 100755
diff --git a/test/dapltest/include/dapl_bpool.h b/test/dapltest/include/dapl_bpool.h
old mode 100644
new mode 100755
diff --git a/test/dapltest/include/dapl_client_info.h b/test/dapltest/include/dapl_client_info.h
old mode 100644
new mode 100755
diff --git a/test/dapltest/include/dapl_common.h b/test/dapltest/include/dapl_common.h
old mode 100644
new mode 100755
diff --git a/test/dapltest/include/dapl_execute.h b/test/dapltest/include/dapl_execute.h
old mode 100644
new mode 100755
diff --git a/test/dapltest/include/dapl_fft_cmd.h b/test/dapltest/include/dapl_fft_cmd.h
old mode 100644
new mode 100755
diff --git a/test/dapltest/include/dapl_fft_util.h b/test/dapltest/include/dapl_fft_util.h
old mode 100644
new mode 100755
diff --git a/test/dapltest/include/dapl_getopt.h b/test/dapltest/include/dapl_getopt.h
old mode 100644
new mode 100755
diff --git a/test/dapltest/include/dapl_global.h b/test/dapltest/include/dapl_global.h
old mode 100644
new mode 100755
diff --git a/test/dapltest/include/dapl_limit_cmd.h b/test/dapltest/include/dapl_limit_cmd.h
old mode 100644
new mode 100755
diff --git a/test/dapltest/include/dapl_mdep.h b/test/dapltest/include/dapl_mdep.h
old mode 100644
new mode 100755
diff --git a/test/dapltest/include/dapl_memlist.h b/test/dapltest/include/dapl_memlist.h
old mode 100644
new mode 100755
diff --git a/test/dapltest/include/dapl_params.h b/test/dapltest/include/dapl_params.h
old mode 100644
new mode 100755
diff --git a/test/dapltest/include/dapl_performance_cmd.h b/test/dapltest/include/dapl_performance_cmd.h
old mode 100644
new mode 100755
diff --git a/test/dapltest/include/dapl_performance_stats.h b/test/dapltest/include/dapl_performance_stats.h
old mode 100644
new mode 100755
diff --git a/test/dapltest/include/dapl_performance_test.h b/test/dapltest/include/dapl_performance_test.h
old mode 100644
new mode 100755
diff --git a/test/dapltest/include/dapl_proto.h b/test/dapltest/include/dapl_proto.h
old mode 100644
new mode 100755
diff --git a/test/dapltest/include/dapl_quit_cmd.h b/test/dapltest/include/dapl_quit_cmd.h
old mode 100644
new mode 100755
diff --git a/test/dapltest/include/dapl_server_cmd.h b/test/dapltest/include/dapl_server_cmd.h
old mode 100644
new mode 100755
diff --git a/test/dapltest/include/dapl_server_info.h b/test/dapltest/include/dapl_server_info.h
old mode 100644
new mode 100755
diff --git a/test/dapltest/include/dapl_tdep.h b/test/dapltest/include/dapl_tdep.h
old mode 100644
new mode 100755
diff --git a/test/dapltest/include/dapl_tdep_print.h b/test/dapltest/include/dapl_tdep_print.h
old mode 100644
new mode 100755
diff --git a/test/dapltest/include/dapl_test_data.h b/test/dapltest/include/dapl_test_data.h
old mode 100644
new mode 100755
diff --git a/test/dapltest/include/dapl_transaction_cmd.h b/test/dapltest/include/dapl_transaction_cmd.h
old mode 100644
new mode 100755
diff --git a/test/dapltest/include/dapl_transaction_stats.h b/test/dapltest/include/dapl_transaction_stats.h
old mode 100644
new mode 100755
diff --git a/test/dapltest/include/dapl_transaction_test.h b/test/dapltest/include/dapl_transaction_test.h
old mode 100644
new mode 100755
diff --git a/test/dapltest/include/dapl_version.h b/test/dapltest/include/dapl_version.h
old mode 100644
new mode 100755
diff --git a/test/dapltest/mdep/linux/dapl_mdep_user.c b/test/dapltest/mdep/linux/dapl_mdep_user.c
old mode 100644
new mode 100755
diff --git a/test/dapltest/test/dapl_bpool.c b/test/dapltest/test/dapl_bpool.c
old mode 100644
new mode 100755
diff --git a/test/dapltest/test/dapl_client.c b/test/dapltest/test/dapl_client.c
old mode 100644
new mode 100755
diff --git a/test/dapltest/test/dapl_client_info.c b/test/dapltest/test/dapl_client_info.c
old mode 100644
new mode 100755
diff --git a/test/dapltest/test/dapl_cnxn.c b/test/dapltest/test/dapl_cnxn.c
old mode 100644
new mode 100755
diff --git a/test/dapltest/test/dapl_execute.c b/test/dapltest/test/dapl_execute.c
old mode 100644
new mode 100755
diff --git a/test/dapltest/test/dapl_fft_connmgt.c b/test/dapltest/test/dapl_fft_connmgt.c
old mode 100644
new mode 100755
diff --git a/test/dapltest/test/dapl_fft_endpoint.c b/test/dapltest/test/dapl_fft_endpoint.c
old mode 100644
new mode 100755
diff --git a/test/dapltest/test/dapl_fft_hwconn.c b/test/dapltest/test/dapl_fft_hwconn.c
old mode 100644
new mode 100755
diff --git a/test/dapltest/test/dapl_fft_mem.c b/test/dapltest/test/dapl_fft_mem.c
old mode 100644
new mode 100755
diff --git a/test/dapltest/test/dapl_fft_pz.c b/test/dapltest/test/dapl_fft_pz.c
old mode 100644
new mode 100755
diff --git a/test/dapltest/test/dapl_fft_queryinfo.c b/test/dapltest/test/dapl_fft_queryinfo.c
old mode 100644
new mode 100755
diff --git a/test/dapltest/test/dapl_fft_test.c b/test/dapltest/test/dapl_fft_test.c
old mode 100644
new mode 100755
diff --git a/test/dapltest/test/dapl_fft_util.c b/test/dapltest/test/dapl_fft_util.c
old mode 100644
new mode 100755
diff --git a/test/dapltest/test/dapl_limit.c b/test/dapltest/test/dapl_limit.c
old mode 100644
new mode 100755
diff --git a/test/dapltest/test/dapl_memlist.c b/test/dapltest/test/dapl_memlist.c
old mode 100644
new mode 100755
diff --git a/test/dapltest/test/dapl_performance_client.c b/test/dapltest/test/dapl_performance_client.c
old mode 100644
new mode 100755
diff --git a/test/dapltest/test/dapl_performance_server.c b/test/dapltest/test/dapl_performance_server.c
old mode 100644
new mode 100755
diff --git a/test/dapltest/test/dapl_performance_stats.c b/test/dapltest/test/dapl_performance_stats.c
old mode 100644
new mode 100755
diff --git a/test/dapltest/test/dapl_performance_util.c b/test/dapltest/test/dapl_performance_util.c
old mode 100644
new mode 100755
diff --git a/test/dapltest/test/dapl_quit_util.c b/test/dapltest/test/dapl_quit_util.c
old mode 100644
new mode 100755
diff --git a/test/dapltest/test/dapl_server.c b/test/dapltest/test/dapl_server.c
old mode 100644
new mode 100755
diff --git a/test/dapltest/test/dapl_server_info.c b/test/dapltest/test/dapl_server_info.c
old mode 100644
new mode 100755
diff --git a/test/dapltest/test/dapl_test_data.c b/test/dapltest/test/dapl_test_data.c
old mode 100644
new mode 100755
diff --git a/test/dapltest/test/dapl_test_util.c b/test/dapltest/test/dapl_test_util.c
old mode 100644
new mode 100755
diff --git a/test/dapltest/test/dapl_thread.c b/test/dapltest/test/dapl_thread.c
old mode 100644
new mode 100755
diff --git a/test/dapltest/test/dapl_transaction_stats.c b/test/dapltest/test/dapl_transaction_stats.c
old mode 100644
new mode 100755
diff --git a/test/dapltest/test/dapl_transaction_test.c b/test/dapltest/test/dapl_transaction_test.c
old mode 100644
new mode 100755
diff --git a/test/dapltest/test/dapl_transaction_util.c b/test/dapltest/test/dapl_transaction_util.c
old mode 100644
new mode 100755
diff --git a/test/dapltest/test/dapl_util.c b/test/dapltest/test/dapl_util.c
old mode 100644
new mode 100755
diff --git a/test/dapltest/udapl/udapl_tdep.c b/test/dapltest/udapl/udapl_tdep.c
old mode 100644
new mode 100755
diff --git a/test/dtest/README b/test/dtest/README
old mode 100644
new mode 100755
diff --git a/test/dtest/dtest.c b/test/dtest/dtest.c
index bd30207..c0c82ee 100755
--- a/test/dtest/dtest.c
+++ b/test/dtest/dtest.c
@@ -99,13 +99,16 @@
 #define MAX_RDMA_RD    4
 #define MAX_PROCS      1000
 
+#define min(a, b) ((a < b) ? (a) : (b))
+#define max(a, b) ((a > b) ? (a) : (b))
+
 /* Header files needed for DAT/uDAPL */
 #include "dat2/udat.h"
 #include "dat2/dat_ib_extensions.h"
 
 /* definitions */
 #define SERVER_CONN_QUAL  45248
-#define DTO_TIMEOUT       (1000*1000*5)
+#define DTO_TIMEOUT       DAT_TIMEOUT_INFINITE
 #define CNO_TIMEOUT       (1000*1000*1)
 #define DTO_FLUSH_TIMEOUT (1000*1000*2)
 #define CONN_TIMEOUT      (1000*1000*100)
@@ -257,14 +260,14 @@ void flush_evds(void)
 	DAT_EVENT event;
 
 	/* Flush async error queue */
-	printf("%d ERR: Checking ASYNC EVD...\n", getpid());
+	printf("%d: Checking ASYNC EVD...\n", getpid());
 	while (dat_evd_dequeue(h_async_evd, &event) == DAT_SUCCESS) {
-		printf(" ASYNC EVD ENTRY: handle=%p reason=%d\n",
+		printf("%d ERR: ASYNC EVD ENTRY: handle=%p reason=%d\n", getpid(),
 			event.event_data.asynch_error_event_data.dat_handle,
 			event.event_data.asynch_error_event_data.reason);
 	}
 	/* Flush receive queue */
-	printf("%d ERR: Checking RECEIVE EVD...\n", getpid());
+	printf("%d: Checking RECEIVE EVD...\n", getpid());
 	while (dat_evd_dequeue(h_dto_rcv_evd, &event) == DAT_SUCCESS) {
 		printf(" RCV EVD ENTRY: op=%d stat=%d ln=%d ck="F64x"\n",
 			event.event_data.dto_completion_event_data.operation,
@@ -273,7 +276,7 @@ void flush_evds(void)
 			event.event_data.dto_completion_event_data.user_cookie.as_64);
 	}
 	/* Flush request queue */
-	printf("%d ERR: Checking REQUEST EVD...\n", getpid());
+	printf("%d: Checking REQUEST EVD...\n", getpid());
 	while (dat_evd_dequeue(h_dto_req_evd, &event) == DAT_SUCCESS) {
 		printf(" REQ EVD ENTRY: op=%d stat=%d ln=%d ck="F64x"\n",
 			event.event_data.dto_completion_event_data.operation,
@@ -657,15 +660,15 @@ int main(int argc, char **argv)
 
 	if (align_data) {
 		/* allocate send and receive buffers */
-		if (posix_memalign((void**)&rbuf, 4096, buf_len * (burst+1)) ||
-		    posix_memalign((void**)&sbuf, 4096, buf_len * (burst+1))) {
+		if (posix_memalign((void**)&rbuf, 4096, max(64, buf_len * (burst+1))) ||
+		    posix_memalign((void**)&sbuf, 4096, max(64, buf_len * (burst+1)))) {
 			perror("malloc");
 			exit(1);
 		}
 	} else {
 		/* allocate send and receive buffers */
-		if (((rbuf = malloc(buf_len * (burst+1))) == NULL) ||
-		    ((sbuf = malloc(buf_len * (burst+1))) == NULL)) {
+		if (((rbuf = malloc(max(64, buf_len * (burst+1)))) == NULL) ||
+		    ((sbuf = malloc(max(64, buf_len * (burst+1)))) == NULL)) {
 			perror("malloc");
 			exit(1);
 		}
@@ -848,11 +851,12 @@ int main(int argc, char **argv)
 
 	if (ret != DAT_SUCCESS) {
 		fprintf(stderr, "%d Error do_rdma_write_%swith_msg: %s\n",
-			getpid(), write_immed ? "imm_":"", DT_RetToStr(ret));
+			getpid(), write_immed && write_only ? "imm_":"",
+			DT_RetToStr(ret));
 		goto cleanup;
 	} else
 		LOGPRINTF("%d do_rdma_write_%swith_msg complete\n",
-			  getpid(), write_immed ? "imm_":"");
+			  getpid(), write_immed && write_only ? "imm_":"");
 
 	if (write_only || !rdma_read)
 		goto complete;
@@ -878,7 +882,6 @@ int main(int argc, char **argv)
 	}
 
 cleanup:
-	flush_evds();
 	failed++;
 complete:
 
@@ -903,6 +906,8 @@ complete:
 			h_ep = DAT_HANDLE_NULL;
 		}
 	}
+	if (connected)
+		flush_evds();
 
 	/* free EVDs */
 	LOGPRINTF("%d destroy events\n", getpid());
@@ -1193,6 +1198,7 @@ DAT_RETURN connect_ep(char *hostname,
 		printf("%d Server is waiting for client connection to send"
 			" server info\n",
 			getpid());
+		fflush(stdout);
 		if (send_server_params(ser_sa)) {
 			printf("%d Failed to send server params\n", getpid());
 			return -1;
@@ -1408,6 +1414,7 @@ no_resolution:
 
 	printf("\n%d CONNECTED!\n\n", getpid());
 	connected = 1;
+	fflush(stdout);
 
 #if CONNECT_ONLY
 	return 0;
@@ -1416,16 +1423,16 @@ no_resolution:
 	 *  Setup our remote memory and tell the other side about it
 	 */
 	p_rmr_snd->virtual_address = htonll((DAT_VADDR) (uintptr_t) rbuf);
-	p_rmr_snd->segment_length = htonl(RDMA_BUFFER_SIZE);
+	p_rmr_snd->segment_length = htonl(buf_len);
 	p_rmr_snd->rmr_context = htonl(rmr_context_recv);
 
 	printf("%d Send RMR msg to remote: r_key_ctx=0x%x,va=%p,len=0x%x\n",
-	       getpid(), rmr_context_recv, rbuf, RDMA_BUFFER_SIZE);
+	       getpid(), rmr_context_recv, rbuf, buf_len);
 
 	ret = send_msg(p_rmr_snd,
 		       sizeof(DAT_RMR_TRIPLET),
 		       lmr_context_send_msg,
-		       cookie, DAT_COMPLETION_SUPPRESS_FLAG);
+		       cookie, DAT_COMPLETION_DEFAULT_FLAG);
 
 	if (ret != DAT_SUCCESS) {
 		fprintf(stderr, "%d Error send_msg: %s\n",
@@ -1571,15 +1578,17 @@ void disconnect_ep(void)
 DAT_RETURN do_rdma_write_with_msg(void)
 {
 	DAT_EVENT event;
+	DAT_DTO_COMPLETION_EVENT_DATA *dto_event;
 	DAT_LMR_TRIPLET l_iov[MSG_IOV_COUNT];
 	DAT_RMR_TRIPLET r_iov;
 	DAT_DTO_COOKIE cookie;
 	DAT_RETURN ret;
+	DAT_COMPLETION_FLAGS flags;
 	int i;
 
 	printf("\n %d RDMA WRITE DATA with SEND MSG\n\n", getpid());
 
-	cookie.as_64 = 0x5555;
+	dto_event = &event.event_data.dto_completion_event_data;
 
 	if (recv_msg_index >= MSG_BUF_COUNT)
 		return (DAT_ABORT);
@@ -1592,6 +1601,9 @@ DAT_RETURN do_rdma_write_with_msg(void)
 	else
 		strcpy((char *)sbuf, "client RDMA write data...");
 
+	if  (uni_direction && server)
+		goto rmsg;
+
 	for (i = 0; i < MSG_IOV_COUNT; i++) {
 		l_iov[i].lmr_context = lmr_context_send;
 		l_iov[i].segment_length = buf_len / MSG_IOV_COUNT;
@@ -1605,26 +1617,50 @@ DAT_RETURN do_rdma_write_with_msg(void)
 
 	start = get_time();
 	for (i = 0; i < burst; i++) {
-		cookie.as_64 = 0x9999;
+		if (!((i+1) % signal_rate))
+			flags =  DAT_COMPLETION_DEFAULT_FLAG;
+		else
+			flags = DAT_COMPLETION_SUPPRESS_FLAG;
+
+		cookie.as_64 = i;
+		LOGPRINTF("%d rdma_write # %d %s\n",
+			  getpid(), i + 1, flags ? "SUPPRESS":"SIGNAL");
 		ret = dat_ep_post_rdma_write(h_ep,	// ep_handle
 					     MSG_IOV_COUNT,	// num_segments
 					     l_iov,	// LMR
 					     cookie,	// user_cookie
 					     &r_iov,	// RMR
-					     DAT_COMPLETION_SUPPRESS_FLAG);
+					     flags);
 		if (ret != DAT_SUCCESS) {
 			fprintf(stderr,
 				"%d: ERROR: dat_ep_post_rdma_write() %s\n",
 				getpid(), DT_RetToStr(ret));
 			return (DAT_ABORT);
 		}
+		if (flags == DAT_COMPLETION_DEFAULT_FLAG) {
+			if (collect_event(h_dto_req_evd,
+					  &event,
+					  DTO_TIMEOUT,
+					  &rdma_wr_poll_count) != DAT_SUCCESS) {
+				printf("%d %s RDMA write buffer contains: %s\n",
+				       getpid(), server ? "SERVER:" : "CLIENT:", rbuf);
+				return (DAT_ABORT);
+			}
+			if (dto_event->status ||
+			    dto_event->user_cookie.as_64 != i) {
+				fprintf(stderr,	"ERROR rdma_write: cookie="
+						" "F64x " exp 0x%x st 0x%x\n",
+					dto_event->user_cookie.as_64, i,
+					dto_event->status);
+				return (DAT_ABORT);
+			}
+		}
 		LOGPRINTF("%d rdma_write # %d completed\n", getpid(), i + 1);
 	}
 
-	/*
-	 *  Send RMR information a 2nd time to indicate completion
-	 *  NOTE: already swapped to network order in connect_ep
-	 */
+	if (server)
+		goto rmsg;
+smsg:
 	printf("%d Sending RDMA WRITE completion message\n", getpid());
 
 	ret = send_msg(p_rmr_snd,
@@ -1639,14 +1675,18 @@ DAT_RETURN do_rdma_write_with_msg(void)
 	} else {
 		LOGPRINTF("%d send_msg completed\n", getpid());
 	}
-
+	if (server)
+		goto acked;
+rmsg:
 	/* inbound recv event, send completion's suppressed */
 	if (collect_event(h_dto_rcv_evd,
 			  &event,
 			  DTO_TIMEOUT,
-			  &rdma_wr_poll_count) != DAT_SUCCESS)
+			  &rdma_wr_poll_count) != DAT_SUCCESS) {
+		printf("%d %s RDMA write buffer contains: %s\n",
+			getpid(), server ? "SERVER:" : "CLIENT:", rbuf);
 		return (DAT_ABORT);
-
+	}
 	stop = get_time();
 	ts.rdma_wr = ((stop - start) * 1.0e6);
 
@@ -1663,15 +1703,18 @@ DAT_RETURN do_rdma_write_with_msg(void)
 	    || (event.event_data.dto_completion_event_data.user_cookie.as_64 !=
 		recv_msg_index)) {
 		fprintf(stderr,
-			"unexpected event data for receive: len=%d cookie=" F64x
-			" exp %d/%d\n",
-			(int)event.event_data.dto_completion_event_data.
-			transfered_length,
+			"unexpected event data for receive: st=%d len=%d"
+			"cookie=" F64x " exp %d/%d\n",
+			event.event_data.dto_completion_event_data.status,
+			(int)event.event_data.dto_completion_event_data.transfered_length,
 			event.event_data.dto_completion_event_data.user_cookie.
 			as_64, (int)sizeof(DAT_RMR_TRIPLET), recv_msg_index);
 
 		return (DAT_ABORT);
 	}
+	if (server)
+		goto smsg;
+acked:
 
 	/* swap received RMR msg: network order to host order */
 	r_iov = p_rmr_rcv[recv_msg_index];
@@ -1787,73 +1830,80 @@ DAT_RETURN do_rdma_write_imm_with_msg(void)
 		}
 	}
 
+	if (uni_direction && !server)
+		goto smsg;
 done:
-	if ((!uni_direction) || (uni_direction && server)) {
-		/* Wait to RECEIVE the LAST message, immediate data expected */
-		LOGPRINTF("%d Waiting for final inbound RW_imm from peer\n", getpid());
-		if (collect_event(h_dto_rcv_evd,
-				  &event,
-				  DTO_TIMEOUT,
-				  &rdma_wr_poll_count) != DAT_SUCCESS)
-			return (DAT_ABORT);
-
-		if (event.event_number != (int)DAT_IB_DTO_EVENT ||
-		    ext_event->type != DAT_IB_RDMA_WRITE_IMMED_DATA ||
-		    ext_event->val.immed.data != 0x7777) {
-			printf("unexpected event 0x%x type 0x%x or idata 0x%x"
-			       ", waiting for RW-IMMED #0x%x\n",
-			       event.event_number, ext_event->type,
-			       ext_event->val.immed.data, DAT_IB_DTO_EVENT);
-			return (DAT_ABORT);
-		}
-		recv_msg_index++;
+	/* Wait to RECEIVE the LAST message, immediate data expected */
+	LOGPRINTF("%d Waiting for final inbound RW_imm from peer\n", getpid());
+	if (collect_event(h_dto_rcv_evd,
+			  &event,
+			  DTO_TIMEOUT,
+			  &rdma_wr_poll_count) != DAT_SUCCESS)
+		return (DAT_ABORT);
 
-		/* Send last message received ACK message back */
-		cookie.as_64 = 0x9999;
-		ret = send_msg(p_rmr_snd,
-			       sizeof(DAT_RMR_TRIPLET),
-			       lmr_context_send_msg,
-			       cookie, DAT_COMPLETION_SUPPRESS_FLAG);
+	if (event.event_number != (int)DAT_IB_DTO_EVENT ||
+	    ext_event->type != DAT_IB_RDMA_WRITE_IMMED_DATA ||
+	    ext_event->val.immed.data != 0x7777) {
+		printf("unexpected event 0x%x type 0x%x or idata 0x%x"
+		       ", waiting for RW-IMMED #0x%x\n",
+		       event.event_number, ext_event->type,
+		       ext_event->val.immed.data, DAT_IB_DTO_EVENT);
+		return (DAT_ABORT);
+	}
+	recv_msg_index++;
 
-		if (ret != DAT_SUCCESS) {
-			fprintf(stderr, "%d Error send_msg: %s\n",
-				getpid(), DT_RetToStr(ret));
-			return (ret);
-		} else {
-			LOGPRINTF("%d send_msg completed\n", getpid());
-		}
+	if (server)
+		goto rmsg;
+smsg:
+	printf("%d sending LAST msg ACK to remote\n", getpid());
+	/* Send last message received ACK message back */
+	cookie.as_64 = 0x9999;
+	ret = send_msg(p_rmr_snd,
+		       sizeof(DAT_RMR_TRIPLET),
+		       lmr_context_send_msg,
+		       cookie, DAT_COMPLETION_SUPPRESS_FLAG);
 
+	if (ret != DAT_SUCCESS) {
+		fprintf(stderr, "%d Error send_msg: %s\n",
+			getpid(), DT_RetToStr(ret));
+		return (ret);
+	} else {
+		LOGPRINTF("%d send_msg completed\n", getpid());
 	}
 
-	if (!uni_direction || !server) {
-		/* Wait for my LAST message ACK from remote side */
-		printf("%d waiting for LAST msg ACK from remote\n", getpid());
-		if (collect_event(h_dto_rcv_evd,
-				  &event,
-				  DTO_TIMEOUT,
-				  &rdma_wr_poll_count) != DAT_SUCCESS)
-			return (DAT_ABORT);
+	if (server)
+		goto acked;
+rmsg:
+	/* Wait for my LAST message ACK from remote side */
+	printf("%d waiting for LAST msg ACK from remote\n", getpid());
+	if (collect_event(h_dto_rcv_evd,
+			  &event,
+			  DTO_TIMEOUT,
+			  &rdma_wr_poll_count) != DAT_SUCCESS)
+		return (DAT_ABORT);
 
-		printf("%d LAST rdma write ACK message arrived!\n", getpid());
-		if (event.event_number != DAT_DTO_COMPLETION_EVENT) {
-			fprintf(stderr, "%d Error unexpected DTO event : %s\n",
-				getpid(), DT_EventToStr(event.event_number));
-			return (DAT_ABORT);
-		}
+	printf("%d LAST rdma write ACK message arrived!\n", getpid());
+	if (event.event_number != DAT_DTO_COMPLETION_EVENT) {
+		fprintf(stderr, "%d Error unexpected DTO event : %s\n",
+			getpid(), DT_EventToStr(event.event_number));
+		return (DAT_ABORT);
+	}
 
-		if ((dto_event->transfered_length != sizeof(DAT_RMR_TRIPLET))
-		    || (dto_event->user_cookie.as_64 != recv_msg_index)) {
-			fprintf(stderr,
-				"unexpected event data for receive: len=%d "
-				"cookie=" F64x" exp %d/%d\n",
-				(int)dto_event->transfered_length,
-				dto_event->user_cookie.as_64,
-				(int)sizeof(DAT_RMR_TRIPLET), recv_msg_index);
-			return (DAT_ABORT);
-		}
-		printf("%d LAST RDMA_WRITE ACK from remote \n", getpid());
+	if ((dto_event->transfered_length != sizeof(DAT_RMR_TRIPLET))
+	    || (dto_event->user_cookie.as_64 != recv_msg_index)) {
+		fprintf(stderr,
+			"unexpected event data for receive: len=%d "
+			"cookie=" F64x" exp %d/%d\n",
+			(int)dto_event->transfered_length,
+			dto_event->user_cookie.as_64,
+			(int)sizeof(DAT_RMR_TRIPLET), recv_msg_index);
+		return (DAT_ABORT);
 	}
+	printf("%d LAST RDMA_WRITE ACK from remote \n", getpid());
 
+	if (server)
+		goto smsg;
+acked:
 	stop = get_time();
 	ts.rdma_wr = ((stop - start) * 1.0e6);
 
diff --git a/test/dtest/dtestcm.c b/test/dtest/dtestcm.c
old mode 100644
new mode 100755
diff --git a/test/dtest/dtestsrq.c b/test/dtest/dtestsrq.c
old mode 100644
new mode 100755

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-ofed/dapl.git



More information about the Pkg-ofed-commits mailing list