Something went wrong on our end
-
Marty Kandes authored
Updated PyTorch to v1.8.2, which is expected to be first version available on Voyager. The container now includes PyTorch Lightning, Horovod, and ONNX. Note the small fix/patch applied to PyTorch Lightning in order to support TorchText integration. Also corrected source build instructions of Horovod for TensorFlow in the latest definition file.
Marty Kandes authoredUpdated PyTorch to v1.8.2, which is expected to be first version available on Voyager. The container now includes PyTorch Lightning, Horovod, and ONNX. Note the small fix/patch applied to PyTorch Lightning in order to support TorchText integration. Also corrected source build instructions of Horovod for TensorFlow in the latest definition file.
Singularity.tensorflow-2.5.1-ubuntu-18.04-cuda-11.2-mlnx-ofed-4.7-3.2.9.0-openmpi-4.0.5 7.94 KiB
Bootstrap: oras
From: ghcr.io/mkandes/naked-singularity:ubuntu-18.04-cuda-11.2-mlnx-ofed-4.7-3.2.9.0-openmpi-4.0.5
%labels
APPLICATION_NAME tensorflow
APPLICATION_VERSION 2.5.1
APPLICATION_URL https://www.tensorflow.org
AUTHOR_NAME Marty Kandes
AUTHOR_EMAIL mkandes@sdsc.edu
LAST_UPDATED 20211019
%setup
%environment
%post -c /bin/bash
# Set operating system mirror URL
export MIRRORURL='http://us.archive.ubuntu.com/ubuntu'
# Set operating system version
export OSVERSION='bionic'
# Set system locale
export LC_ALL='C'
# Set debian frontend interface
export DEBIAN_FRONTEND='noninteractive'
# Set NVIDIA driver and CUDA versions
export CUDA_DRIVER_MAJOR='460'
export CUDA_DRIVER_MINOR='32'
export CUDA_DRIVER_REVISION='03'
export CUDA_DRIVER_VERSION="${CUDA_DRIVER_MAJOR}.${CUDA_DRIVER_MINOR}.${CUDA_DRIVER_REVISION}"
export CUDA_MAJOR='11'
export CUDA_MINOR='2'
export CUDA_REVISION='2'
export CUDA_VERSION="${CUDA_MAJOR}.${CUDA_MINOR}.${CUDA_REVISION}"
export CUDNN_MAJOR='8'
export CUDNN_MINOR='1'
export CUDNN_REVISION='1.33'
export CUDNN_VERSION="${CUDNN_MAJOR}.${CUDNN_MINOR}.${CUDNN_REVISION}"
export NCCL_MAJOR='2'
export NCCL_MINOR='8'
export NCCL_REVISION='4'
export NCCL_VERSION="${NCCL_MAJOR}.${NCCL_MINOR}.${NCCL_REVISION}"
export TENSORRT_MAJOR='8'
export TENSORRT_MINOR='0'
export TENSORRT_REVISION='3'
export TENSORRT_VERSION="${TENSORRT_MAJOR}.${TENSORRT_MINOR}.${TENSORRT_REVISION}"
# Set paths to CUDA binaries and libraries
export CUDA_HOME="/usr/local/cuda-${CUDA_MAJOR}.${CUDA_MINOR}"
export CUDNN_INCLUDE='/usr/include'
export CUDNN_LIB='/usr/lib/x86_64-linux-gnu'
export NCCL_INCLUDE='/usr/include'
export NCCL_LIB='/usr/lib/x86_64-linux-gnu'
export TENSORRT_INCLUDE='/usr/include'
export TENSORRT_LIB='/usr/lib/x86_64-linux-gnu'
export PATH="${CUDA_HOME}/bin${PATH:+:${PATH}}"
export LD_LIBRARY_PATH="${CUDA_HOME}/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}"
export LD_LIBRARY_PATH="${CUDNN_LIB}:${LD_LIBRARY_PATH}"
export LD_LIBRARY_PATH="${NCCL_LIB}:${LD_LIBRARY_PATH}"
export LD_LIBRARY_PATH="${TENSORRT_LIB}:${LD_LIBRARY_PATH}"
# Set Mellanox OFED version, operating system, and hardware platform
export MLNX_ROOT_URL='http://www.mellanox.com/downloads/ofed'
export MLNX_OFED_VERSION='4.7-3.2.9.0'
export MLNX_OS_VERSION='ubuntu18.04'
export MLNX_PLATFORM='x86_64'
# Set OpenMPI major, minor, and revision numbers, root and
# installation directories
export OMPI_ROOT_URL='https://download.open-mpi.org/release/open-mpi'
export OMPI_MAJOR='4'
export OMPI_MINOR='0'
export OMPI_REVISION='5'
export OMPI_VERSION="${OMPI_MAJOR}.${OMPI_MINOR}.${OMPI_REVISION}"
export OMPI_ROOT_DIR='/opt/openmpi'
export OMPI_INSTALL_DIR="${OMPI_ROOT_DIR}/${OMPI_VERSION}"
# Set paths to OpenMPI binaries and libraries
export PATH="${OMPI_INSTALL_DIR}/bin:${PATH}"
export LD_LIBRARY_PATH="${OMPI_INSTALL_DIR}/lib:${LD_LIBRARY_PATH}"
# DO NOT Upgrade all packages to their latest versions
# apt-get -y update && apt-get -y upgrade
# Install python3 and some common core packages
apt-get -y install python3-dev
apt-get -y install python3-pip
apt-get -y install python3-setuptools
apt-get -y install python3-virtualenv
# Install Bazel dependencies and prerequisites
apt-get -y install g++
apt-get -y install unzip
apt-get -y install zip
apt-get -y install git
apt-get -y install openjdk-11-jdk
cd /tmp
# Set Bazel version number, root and installation directories
export BAZEL_VERSION='3.7.2'
export BAZEL_ROOT_DIR='/tmp/bazel'
export BAZEL_INSTALL_DIR="${BAZEL_ROOT_DIR}/${BAZEL_VERSION}"
# Download, build, and install Bazel
mkdir -p "${BAZEL_INSTALL_DIR}"
cd "${BAZEL_INSTALL_DIR}"
wget "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-dist.zip"
unzip -o "bazel-${BAZEL_VERSION}-dist.zip"
export EXTRA_BAZEL_ARGS="--host_javabase=@local_jdk//:jdk"
rm /usr/bin/python
ln -s /usr/bin/python3 /usr/bin/python
./compile.sh
# Set path to Bazel
export PATH="${BAZEL_INSTALL_DIR}/output:${PATH}"
# Install common mathematical and scientific python3 packages
apt-get -y install python3-numpy
apt-get -y install python3-scipy
apt-get -y install python3-pandas
apt-get -y install python3-sklearn
apt-get -y install python3-skimage
apt-get -y install python3-statsmodels
apt-get -y install python3-matplotlib
apt-get -y install python3-plotly
apt-get -y install python3-seaborn
apt-get -y install python3-opencv
apt-get -y install python3-astropy
apt-get -y install python3-biopython
apt-get -y install python3-h5py
apt-get -y install python3-nltk
apt-get -y install python3-sympy
apt-get -y install python3-netcdf4
# Install mpi4py
pip3 install mpi4py==3.0.3
# Install additioanl dependencies for TensorFlow
apt-get -y install python3-six
apt-get -y install python3-wheel
apt-get -y install python3-mock
apt-get -y install python3-future
pip3 install keras_preprocessing --no-deps
cd /tmp
# Set TensorFlow version number, root and install directories
export TENSORFLOW_VERSION='2.5.1'
# Download, build, and install TensorFlow
git clone https://github.com/tensorflow/tensorflow.git
cd tensorflow
git checkout "v${TENSORFLOW_VERSION}"
export PYTHON_BIN_PATH='/usr/bin/python3'
export PYTHON_LIB_PATH='/usr/local/lib/python3.6/dist-packages'
export TF_ENABLE_XLA=1
export TF_NEED_OPENCL_SYCL=0
export TF_NEED_ROCM=0
export TF_NEED_CUDA=1
export TF_NEED_TENSORRT=0
export TF_CUDA_COMPUTE_CAPABILITIES='7.0'
export TF_CUDA_CLANG=0
export TF_NEED_MPI=1
export MPI_HOME="${OPENMPI_INSTALL_DIR}"
export CC_OPT_FLAGS='-mtune=generic'
export TF_SET_ANDROID_WORKSPACE=0
bazel build --local_ram_resources 8192 --local_cpu_resources 4 -c opt --config=cuda --config=numa --config=v2 --verbose_failures //tensorflow/tools/pip_package:build_pip_package
bazel-bin/tensorflow/tools/pip_package/build_pip_package tensorflow_pkg
pip3 install "tensorflow_pkg/tensorflow-${TENSORFLOW_VERSION}-cp36-cp36m-linux_x86_64.whl"
cd /tmp
# Install Horovod with TensorFlow + NVIDIA GPU support
export HOROVOD_VERSION=0.22.1
export HOROVOD_WITHOUT_PYTORCH=1
export HOROVOD_WITHOUT_MXNET=1
export HOROVOD_WITH_TENSORFLOW=1
export HOROVOD_WITH_MPI=1
export HOROVOD_WITH_GLOO=1
export HOROVOD_GPU=CUDA
export HOROVOD_CUDA_HOME="${CUDA_HOME}"
export HOROVOD_BUILD_CUDA_CC_LIST=70
export HOROVOD_GPU_OPERATIONS=NCCL
export HOROVOD_NCCL_INCLUDE="${NCCL_INCLUDE}"
export HOROVOD_NCCL_LIB="${NCCL_LIB}"
git clone --recursive https://github.com/horovod/horovod.git
cd horovod
git checkout "v${HOROVOD_VERSION}"
python3 setup.py clean
python3 setup.py sdist
ldconfig /usr/local/cuda/targets/x86_64-linux/lib/stubs
pip3 install dist/horovod-*.tar.gz
ldconfig
# Install TensorFlow tools, extentions, and related packages.
pip3 install tensorflow-datasets==1.2.0
pip3 install tensorflow-metadata==0.12.1
pip3 install tensorflow-model-optimization==0.5.0
pip3 install tensorflow-probability==0.7.0
pip3 install tensorflow-hub==0.11.0
pip3 install tensorflow-gan==2.0.0
# Install TensorFlow Profiler
pip3 install tensorboard-plugin-profile==2.5.0
cd /tmp
# Install tf2onnx
git clone https://github.com/onnx/tensorflow-onnx
cd tensorflow-onnx
git checkout v1.9.2
python3 setup.py install
# Install JupyterLab
pip3 install jupyter
pip3 install jupyterlab
pip3 install --upgrade --force jupyter-console
# Cleanup
apt-get -y autoremove --purge
apt-get -y clean
# Update database for mlocate
updatedb
%files
%runscript
%test