From fb55890c1ced2ff7e2ee0adcd0403c1fa3ca8346 Mon Sep 17 00:00:00 2001
From: Marty Kandes <mkandes@sdsc.edu>
Date: Tue, 19 Oct 2021 18:46:41 +0000
Subject: [PATCH] Add
 Singularity.pytorch-1.8.2-ubuntu-18.04-cuda-11.2-mlnx-ofed-4.7-3.2.9.0-openmpi-4.0.5

Updated PyTorch to v1.8.2, which is expected to be first version
available on Voyager. The container now includes PyTorch Lightning,
Horovod, and ONNX. Note the small fix/patch applied to PyTorch
Lightning in order to support TorchText integration.

Also corrected source build instructions of Horovod for TensorFlow in
the latest definition file.
---
 ...a-11.2-mlnx-ofed-4.7-3.2.9.0-openmpi-4.0.5 | 350 ++++++++++++++++++
 ...a-11.2-mlnx-ofed-4.7-3.2.9.0-openmpi-4.0.5 |   9 +-
 2 files changed, 354 insertions(+), 5 deletions(-)
 create mode 100644 definition-files/pytorch/Singularity.pytorch-1.8.2-ubuntu-18.04-cuda-11.2-mlnx-ofed-4.7-3.2.9.0-openmpi-4.0.5

diff --git a/definition-files/pytorch/Singularity.pytorch-1.8.2-ubuntu-18.04-cuda-11.2-mlnx-ofed-4.7-3.2.9.0-openmpi-4.0.5 b/definition-files/pytorch/Singularity.pytorch-1.8.2-ubuntu-18.04-cuda-11.2-mlnx-ofed-4.7-3.2.9.0-openmpi-4.0.5
new file mode 100644
index 0000000..9ab6387
--- /dev/null
+++ b/definition-files/pytorch/Singularity.pytorch-1.8.2-ubuntu-18.04-cuda-11.2-mlnx-ofed-4.7-3.2.9.0-openmpi-4.0.5
@@ -0,0 +1,350 @@
+Bootstrap: oras
+From: ghcr.io/mkandes/naked-singularity:ubuntu-18.04-cuda-11.2-mlnx-ofed-4.7-3.2.9.0-openmpi-4.0.5
+
+%labels
+
+    APPLICATION_NAME pytorch
+    APPLICATION_VERSION 1.8.2
+    APPLICATION_URL https://pytorch.org
+
+    AUTHOR_NAME Marty Kandes
+    AUTHOR_EMAIL mkandes@sdsc.edu
+
+    LAST_UPDATED 20211019
+
+%setup
+
+%environment
+
+    # Set default miniconda environment
+    export PATH="/opt/miniconda3/bin:${PATH}"
+
+%post -c /bin/bash
+
+    # Set operating system mirror URL
+    export MIRRORURL='http://us.archive.ubuntu.com/ubuntu'
+
+    # Set operating system version
+    export OSVERSION='bionic'
+
+    # Set system locale
+    export LC_ALL='C'
+
+    # Set debian frontend interface
+    export DEBIAN_FRONTEND='noninteractive'
+
+    # Set NVIDIA driver and CUDA versions
+    export CUDA_DRIVER_MAJOR='460'
+    export CUDA_DRIVER_MINOR='32'
+    export CUDA_DRIVER_REVISION='03'
+    export CUDA_DRIVER_VERSION="${CUDA_DRIVER_MAJOR}.${CUDA_DRIVER_MINOR}.${CUDA_DRIVER_REVISION}"
+
+    export CUDA_MAJOR='11'
+    export CUDA_MINOR='2'
+    export CUDA_REVISION='2'
+    export CUDA_VERSION="${CUDA_MAJOR}.${CUDA_MINOR}.${CUDA_REVISION}"
+
+    export CUDNN_MAJOR='8'
+    export CUDNN_MINOR='1'
+    export CUDNN_REVISION='1.33'
+    export CUDNN_VERSION="${CUDNN_MAJOR}.${CUDNN_MINOR}.${CUDNN_REVISION}"
+
+    export NCCL_MAJOR='2'
+    export NCCL_MINOR='8'
+    export NCCL_REVISION='4'
+    export NCCL_VERSION="${NCCL_MAJOR}.${NCCL_MINOR}.${NCCL_REVISION}"
+
+    export TENSORRT_MAJOR='8'
+    export TENSORRT_MINOR='0'
+    export TENSORRT_REVISION='3'
+    export TENSORRT_VERSION="${TENSORRT_MAJOR}.${TENSORRT_MINOR}.${TENSORRT_REVISION}"
+
+    # Set paths to CUDA binaries and libraries
+    export CUDA_HOME="/usr/local/cuda-${CUDA_MAJOR}.${CUDA_MINOR}"
+
+    export CUDNN_INCLUDE='/usr/include'
+    export CUDNN_LIB='/usr/lib/x86_64-linux-gnu'
+
+    export NCCL_INCLUDE='/usr/include'
+    export NCCL_LIB='/usr/lib/x86_64-linux-gnu'
+
+    export TENSORRT_INCLUDE='/usr/include'
+    export TENSORRT_LIB='/usr/lib/x86_64-linux-gnu'
+
+    export PATH="${CUDA_HOME}/bin${PATH:+:${PATH}}"
+    export LD_LIBRARY_PATH="${CUDA_HOME}/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}"
+    export LD_LIBRARY_PATH="${CUDNN_LIB}:${LD_LIBRARY_PATH}"
+    export LD_LIBRARY_PATH="${NCCL_LIB}:${LD_LIBRARY_PATH}"
+    export LD_LIBRARY_PATH="${TENSORRT_LIB}:${LD_LIBRARY_PATH}"
+
+    # Set Mellanox OFED version, operating system, and hardware platform
+    export MLNX_ROOT_URL='http://www.mellanox.com/downloads/ofed'
+    export MLNX_OFED_VERSION='4.7-3.2.9.0'
+    export MLNX_OS_VERSION='ubuntu18.04'
+    export MLNX_PLATFORM='x86_64'
+
+    # Set OpenMPI major, minor, and revision numbers, root and
+    # installation directories
+    export OMPI_ROOT_URL='https://download.open-mpi.org/release/open-mpi'
+    export OMPI_MAJOR='4'
+    export OMPI_MINOR='0'
+    export OMPI_REVISION='5'
+    export OMPI_VERSION="${OMPI_MAJOR}.${OMPI_MINOR}.${OMPI_REVISION}"
+    export OMPI_ROOT_DIR='/opt/openmpi'
+    export OMPI_INSTALL_DIR="${OMPI_ROOT_DIR}/${OMPI_VERSION}"
+
+    # Set paths to OpenMPI binaries and libraries
+    export PATH="${OMPI_INSTALL_DIR}/bin:${PATH}"
+    export LD_LIBRARY_PATH="${OMPI_INSTALL_DIR}/lib:${LD_LIBRARY_PATH}"
+
+    # DO NOT Upgrade all packages to their latest versions
+    # apt-get -y update && apt-get -y upgrade
+
+    cd /tmp
+
+    # Install miniconda3
+    wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
+    chmod +x Miniconda3-latest-Linux-x86_64.sh
+    ./Miniconda3-latest-Linux-x86_64.sh -b -p /opt/miniconda3
+
+    # Setup conda environment
+    export PATH="/opt/miniconda3/bin:${PATH}"
+    export CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"}
+
+    # Install common python packages 
+    conda install -y python=3.8.5
+    conda install -y jupyterlab
+    conda install -y numpy
+    conda install -y matplotlib
+    conda install -y pandas
+    conda install -y plotly
+    conda install -y scipy
+    conda install -y scikit-learn
+    conda install -y scikit-image
+    conda install -y seaborn
+    conda install -y statsmodels
+    #conda install -y opencv
+
+    # Install GPU-accelerated python packages
+    conda install -y numba
+
+    # Install additional python packages
+    conda install -y astropy
+    conda install -y biopython
+    conda install -y h5py
+    conda install -y nltk
+    conda install -y sympy
+    conda install -y netcdf4
+    conda install -y spacy
+
+    # Install mpi4py
+    pip3 install mpi4py==3.0.3
+
+    # Install PyTorch dependencies 
+    # https://github.com/pytorch/pytorch#install-dependencies
+    conda install -y astunparse
+    conda install -y numpy
+    conda install -y ninja
+    conda install -y pyyaml
+    conda install -y mkl
+    conda install -y mkl-include
+    conda install -y setuptools
+    conda install -y cmake
+    conda install -y cffi
+    conda install -y typing_extensions
+    conda install -y future
+    conda install -y six
+    conda install -y requests
+    conda install -y dataclasses
+
+    # Add LAPACK support for the GPU
+    conda install -y magma-cuda112 -c pytorch
+
+    cd /tmp
+
+    # Set PyTorch version number
+    export PYTORCH_VERSION='1.8.2'
+
+    # Download, build and install PyTorch
+    git clone --recursive https://github.com/pytorch/pytorch
+    cd pytorch
+    git checkout "v${PYTORCH_VERSION}"
+    git submodule sync
+    git submodule update --init --recursive --jobs 0
+
+    export CC='mpicc'
+    export CXX='mpicxx'
+    export CFLAGS='-mtune=generic'
+
+    export BLAS='MKL'
+    export USE_FBGEMM=1
+    export USE_MKLDNN=1
+    export USE_NNPACK=1
+    export USE_NUMPY=1
+    export USE_QNNPACK=1
+
+    export USE_DISTRIBUTED=1
+    export USE_IBVERBS=1
+    export USE_GLOO=1
+    export USE_MPI=1
+    export USE_C10D_MPI=1
+    export USE_NCCL=1
+    export USE_SYSTEM_NCCL=1
+    #sed -i '77,80 s/^/#/' cmake/Modules/FindNCCL.cmake
+
+    export USE_OPENMP=1
+    export ATEN_THREADING='OMP'
+    export MKL_THREADING='OMP'
+
+    export TORCH_CUDA_ARCH_LIST='7.0'
+    export USE_CUDA=1
+    export USE_CUDNN=1
+    export USE_TENSORRT=1
+
+    export USE_FFMPEG=0
+    export USE_OPENCV=0
+
+    python setup.py install
+
+    cd /tmp
+
+    # Set TorchVision version number
+    export PYTORCH_VISION_VERSION='0.9.1'
+
+    # Download, build and install TorchVision
+    git clone https://github.com/pytorch/vision.git
+    cd vision
+    git checkout "v${PYTORCH_VISION_VERSION}"
+    python setup.py install
+
+    cd /tmp
+
+    # Set TorchText version number
+    export PYTORCH_TEXT_VERSION='0.9.2-rc1'
+
+    # Download, build and install TorchText
+    git clone https://github.com/pytorch/text.git
+    cd text
+    git checkout "v${PYTORCH_TEXT_VERSION}"
+    git submodule update --init --recursive
+    python setup.py clean install
+
+    cd /tmp
+
+    # Set TorchAudio version number
+    export PYTORCH_AUDIO_VERSION='0.8.1'
+
+    # Download, build and install TorchAudio
+    git clone https://github.com/pytorch/audio.git
+    cd audio
+    git checkout "v${PYTORCH_AUDIO_VERSION}"
+    git submodule update --init --recursive
+    BUILD_SOX=1 python setup.py clean install
+
+    cd /tmp
+
+    # Set PyTorch Ignite version number
+    export PYTORCH_IGNITE_VERSION='0.4.5'
+
+    # Install PyTorch Ignite
+    # https://github.com/pytorch/ignite
+    pip install pytorch-ignite=="${PYTORCH_IGNITE_VERSION}"
+
+    cd /tmp
+
+    # Set PyTorch Lightning version number
+    export PYTORCH_LIGHTNING_VERSION='1.4.9'
+
+    # Download, build and install PyTorch Lightning
+    # https://github.com/PyTorchLightning/pytorch-lightning
+    git clone https://github.com/PyTorchLightning/pytorch-lightning.git
+    cd pytorch-lightning
+    git checkout "${PYTORCH_LIGHTNING_VERSION}"
+    git submodule sync
+    git submodule update --init --recursive
+
+    # # # # Create patch file for torchtext import # # #
+    # https://github.com/PyTorchLightning/pytorch-lightning/issues/6210
+    # https://github.com/PyTorchLightning/pytorch-lightning/pull/6211
+    cd /tmp/pytorch-lightning/pytorch_lightning/utilities
+
+    tee -a apply_func.patch << EOF
+--- apply_func.py       2021-10-19 00:04:42.685745070 +0000
++++ apply_func.py       2021-10-19 01:57:26.057751093 +0000
+@@ -27,10 +27,10 @@
+ from pytorch_lightning.utilities.imports import _compare_version, _TORCHTEXT_AVAILABLE
+
+ if _TORCHTEXT_AVAILABLE:
+-    if _compare_version("torchtext", operator.ge, "0.9.0"):
+-        from torchtext.legacy.data import Batch
+-    else:
+-        from torchtext.data import Batch
++    #if _compare_version("torchtext", operator.ge, "0.9.0"):
++    from torchtext.legacy.data import Batch
++    #else:
++    #    from torchtext.data import Batch
+ else:
+     Batch = type(None)
+EOF
+    patch -u apply_func.py -i apply_func.patch
+
+    cd /tmp/pytorch-lightning
+
+    python setup.py clean install
+
+    cd /tmp
+
+    # Set Horovod version number
+    export HOROVOD_VERSION=0.22.1
+
+    # Download, build and install Horovod with PyTorch + NVIDIA GPU support 
+    export HOROVOD_WITH_PYTORCH=1
+    export HOROVOD_WITHOUT_MXNET=1
+    export HOROVOD_WITHOUT_TENSORFLOW=1
+    export HOROVOD_WITH_MPI=1
+    export HOROVOD_WITH_GLOO=1
+    export HOROVOD_GPU=CUDA
+    export HOROVOD_CUDA_HOME="${CUDA_HOME}"
+    export HOROVOD_BUILD_CUDA_CC_LIST=70
+    export HOROVOD_GPU_OPERATIONS=NCCL
+    export HOROVOD_NCCL_INCLUDE="${NCCL_INCLUDE}"
+    export HOROVOD_NCCL_LIB="${NCCL_LIB}"
+
+    git clone --recursive https://github.com/horovod/horovod.git
+    cd horovod
+    git checkout "v${HOROVOD_VERSION}"
+    python3 setup.py clean
+    python3 setup.py sdist
+    ldconfig /usr/local/cuda/targets/x86_64-linux/lib/stubs 
+    pip3 install dist/horovod-*.tar.gz
+    ldconfig
+
+    cd /tmp
+
+    # Set ONNX version number
+    export ONNX_VERSION='1.10.1'
+
+    # Install prerequisites and dependencies for ONNX
+    apt-get -y install libprotobuf-dev
+    apt-get -y install protobuf-compiler
+
+    # Download, build and install ONNX
+    export CMAKE_ARGS="-DONNX_USE_PROTOBUF_SHARED_LIBS=ON"
+    git clone --recursive https://github.com/onnx/onnx.git
+    cd onnx
+    git checkout "v${ONNX_VERSION}"
+    set CMAKE_ARGS=-DONNX_USE_LITE_PROTO=ON
+    pip install -e .
+
+    # Cleanup
+    apt-get -y autoremove --purge
+    apt-get -y clean
+
+    # Update database for mlocate
+    updatedb
+
+%files
+
+%runscript
+
+%test
diff --git a/definition-files/tensorflow/Singularity.tensorflow-2.5.1-ubuntu-18.04-cuda-11.2-mlnx-ofed-4.7-3.2.9.0-openmpi-4.0.5 b/definition-files/tensorflow/Singularity.tensorflow-2.5.1-ubuntu-18.04-cuda-11.2-mlnx-ofed-4.7-3.2.9.0-openmpi-4.0.5
index 5a6b0dd..315607b 100644
--- a/definition-files/tensorflow/Singularity.tensorflow-2.5.1-ubuntu-18.04-cuda-11.2-mlnx-ofed-4.7-3.2.9.0-openmpi-4.0.5
+++ b/definition-files/tensorflow/Singularity.tensorflow-2.5.1-ubuntu-18.04-cuda-11.2-mlnx-ofed-4.7-3.2.9.0-openmpi-4.0.5
@@ -10,7 +10,7 @@ From: ghcr.io/mkandes/naked-singularity:ubuntu-18.04-cuda-11.2-mlnx-ofed-4.7-3.2
     AUTHOR_NAME Marty Kandes
     AUTHOR_EMAIL mkandes@sdsc.edu
 
-    LAST_UPDATED 20211002
+    LAST_UPDATED 20211019
 
 %setup
 
@@ -206,13 +206,12 @@ From: ghcr.io/mkandes/naked-singularity:ubuntu-18.04-cuda-11.2-mlnx-ofed-4.7-3.2
     git clone --recursive https://github.com/horovod/horovod.git
     cd horovod
     git checkout "v${HOROVOD_VERSION}"
-    pip3 setup.py sdist
+    python3 setup.py clean
+    python3 setup.py sdist
     ldconfig /usr/local/cuda/targets/x86_64-linux/lib/stubs 
-    pip3 install horovod
+    pip3 install dist/horovod-*.tar.gz
     ldconfig
 
-    #pip3 install horovod=="${HOROVOD_VERSION}"
-
     # Install TensorFlow tools, extentions, and related packages.
     pip3 install tensorflow-datasets==1.2.0
     pip3 install tensorflow-metadata==0.12.1
-- 
GitLab