# Force NCCL to use the aws-ofi-nccl libfabric plugin
export NCCL_NET="AWS Libfabric"
# Use the high speed network interface
export NCCL_SOCKET_IFNAME="hsn"
# Print the NCCL version at startup
export NCCL_DEBUG="VERSION"
# Use P2P when GPUs share the same NUMA node
export NCCL_NET_GDR_LEVEL="PHB"
# Allow rings/trees to span multiple NICs
export NCCL_CROSS_NIC="1"
export NCCL_MIN_NCHANNELS="4"
export NCCL_GDRCOPY_ENABLE="1"
export NCCL_NET_FORCE_FLUSH="0"

# Libfabric (FI) tuning for Slingshot
export FI_CXI_DEFAULT_CQ_SIZE="131072"
export FI_CXI_DEFAULT_TX_SIZE="2048"
export FI_CXI_DISABLE_NON_INJECT_MSG_IDC="1"
export FI_HMEM_CUDA_USE_GDRCOPY="1"
export FI_CXI_DISABLE_HOST_REGISTER="1"
export FI_MR_CACHE_MONITOR="userfaultfd"
export FI_CXI_RDZV_PROTO="alt_read"
export FI_CXI_RDZV_THRESHOLD="0"
export FI_CXI_RDZV_GET_MIN="0"
export FI_CXI_RDZV_EAGER_SIZE="0"
export FI_CXI_RX_MATCH_MODE="hybrid"
