Skip to content
Snippets Groups Projects
Commit 6674cc65 authored by Marty Kandes's avatar Marty Kandes
Browse files

Add Singularity.tensorflow-2.5.1-ubuntu-18.04-cuda-11.2-mlnx-ofed-4.7-3.2.9.0-openmpi-4.0.5

This TensorFlow container is intended to minic the Habana v1.0 software
stack [1] that will be available when Voyager is deployed. It will be
used to prepare for Voyager acceptance testing and early user access
code development.

[1]

https://docs.habana.ai/en/latest/Installation_Guide/GAUDI_Installation_Guide.html#tensorflow-installation
parent 2260899c
No related branches found
No related tags found
No related merge requests found
...@@ -60,7 +60,7 @@ University of California, San Diego ...@@ -60,7 +60,7 @@ University of California, San Diego
## Version ## Version
1.9.7 1.9.8
## Last Updated ## Last Updated
......
Bootstrap: oras
From: ghcr.io/mkandes/naked-singularity:ubuntu-18.04-cuda-11.2-mlnx-ofed-4.7-3.2.9.0-openmpi-4.0.5
%labels
APPLICATION_NAME tensorflow
APPLICATION_VERSION 2.5.1
APPLICATION_URL https://www.tensorflow.org
AUTHOR_NAME Marty Kandes
AUTHOR_EMAIL mkandes@sdsc.edu
LAST_UPDATED 20211002
%setup
%environment
%post -c /bin/bash
# Set operating system mirror URL
export MIRRORURL='http://us.archive.ubuntu.com/ubuntu'
# Set operating system version
export OSVERSION='bionic'
# Set system locale
export LC_ALL='C'
# Set debian frontend interface
export DEBIAN_FRONTEND='noninteractive'
# Set NVIDIA driver and CUDA versions
export CUDA_DRIVER_MAJOR='460'
export CUDA_DRIVER_MINOR='32'
export CUDA_DRIVER_REVISION='03'
export CUDA_DRIVER_VERSION="${CUDA_DRIVER_MAJOR}.${CUDA_DRIVER_MINOR}.${CUDA_DRIVER_REVISION}"
export CUDA_MAJOR='11'
export CUDA_MINOR='2'
export CUDA_REVISION='2'
export CUDA_VERSION="${CUDA_MAJOR}.${CUDA_MINOR}.${CUDA_REVISION}"
export CUDNN_MAJOR='8'
export CUDNN_MINOR='1'
export CUDNN_REVISION='1.33'
export CUDNN_VERSION="${CUDNN_MAJOR}.${CUDNN_MINOR}.${CUDNN_REVISION}"
export NCCL_MAJOR='2'
export NCCL_MINOR='8'
export NCCL_REVISION='4'
export NCCL_VERSION="${NCCL_MAJOR}.${NCCL_MINOR}.${NCCL_REVISION}"
export TENSORRT_MAJOR='8'
export TENSORRT_MINOR='0'
export TENSORRT_REVISION='3'
export TENSORRT_VERSION="${TENSORRT_MAJOR}.${TENSORRT_MINOR}.${TENSORRT_REVISION}"
# Set paths to CUDA binaries and libraries
export CUDA_HOME="/usr/local/cuda-${CUDA_MAJOR}.${CUDA_MINOR}"
export CUDNN_INCLUDE='/usr/include'
export CUDNN_LIB='/usr/lib/x86_64-linux-gnu'
export NCCL_INCLUDE='/usr/include'
export NCCL_LIB='/usr/lib/x86_64-linux-gnu'
export TENSORRT_INCLUDE='/usr/include'
export TENSORRT_LIB='/usr/lib/x86_64-linux-gnu'
export PATH="${CUDA_HOME}/bin${PATH:+:${PATH}}"
export LD_LIBRARY_PATH="${CUDA_HOME}/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}"
export LD_LIBRARY_PATH="${CUDNN_LIB}:${LD_LIBRARY_PATH}"
export LD_LIBRARY_PATH="${NCCL_LIB}:${LD_LIBRARY_PATH}"
export LD_LIBRARY_PATH="${TENSORRT_LIB}:${LD_LIBRARY_PATH}"
# Set Mellanox OFED version, operating system, and hardware platform
export MLNX_ROOT_URL='http://www.mellanox.com/downloads/ofed'
export MLNX_OFED_VERSION='4.7-3.2.9.0'
export MLNX_OS_VERSION='ubuntu18.04'
export MLNX_PLATFORM='x86_64'
# Set OpenMPI major, minor, and revision numbers, root and
# installation directories
export OMPI_ROOT_URL='https://download.open-mpi.org/release/open-mpi'
export OMPI_MAJOR='4'
export OMPI_MINOR='0'
export OMPI_REVISION='5'
export OMPI_VERSION="${OMPI_MAJOR}.${OMPI_MINOR}.${OMPI_REVISION}"
export OMPI_ROOT_DIR='/opt/openmpi'
export OMPI_INSTALL_DIR="${OMPI_ROOT_DIR}/${OMPI_VERSION}"
# Set paths to OpenMPI binaries and libraries
export PATH="${OMPI_INSTALL_DIR}/bin:${PATH}"
export LD_LIBRARY_PATH="${OMPI_INSTALL_DIR}/lib:${LD_LIBRARY_PATH}"
# DO NOT Upgrade all packages to their latest versions
# apt-get -y update && apt-get -y upgrade
# Install python3 and some common core packages
apt-get -y install python3-dev
apt-get -y install python3-pip
apt-get -y install python3-setuptools
apt-get -y install python3-virtualenv
# Install Bazel dependencies and prerequisites
apt-get -y install g++
apt-get -y install unzip
apt-get -y install zip
apt-get -y install git
apt-get -y install openjdk-11-jdk
cd /tmp
# Set Bazel version number, root and installation directories
export BAZEL_VERSION='3.7.2'
export BAZEL_ROOT_DIR='/tmp/bazel'
export BAZEL_INSTALL_DIR="${BAZEL_ROOT_DIR}/${BAZEL_VERSION}"
# Download, build, and install Bazel
mkdir -p "${BAZEL_INSTALL_DIR}"
cd "${BAZEL_INSTALL_DIR}"
wget "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-dist.zip"
unzip -o "bazel-${BAZEL_VERSION}-dist.zip"
export EXTRA_BAZEL_ARGS="--host_javabase=@local_jdk//:jdk"
rm /usr/bin/python
ln -s /usr/bin/python3 /usr/bin/python
./compile.sh
# Set path to Bazel
export PATH="${BAZEL_INSTALL_DIR}/output:${PATH}"
# Install common mathematical and scientific python3 packages
apt-get -y install python3-numpy
apt-get -y install python3-scipy
apt-get -y install python3-pandas
apt-get -y install python3-sklearn
apt-get -y install python3-skimage
apt-get -y install python3-statsmodels
apt-get -y install python3-matplotlib
apt-get -y install python3-plotly
apt-get -y install python3-seaborn
apt-get -y install python3-opencv
apt-get -y install python3-astropy
apt-get -y install python3-biopython
apt-get -y install python3-h5py
apt-get -y install python3-nltk
apt-get -y install python3-sympy
apt-get -y install python3-netcdf4
# Install mpi4py
pip3 install mpi4py==3.0.3
# Install additioanl dependencies for TensorFlow
apt-get -y install python3-six
apt-get -y install python3-wheel
apt-get -y install python3-mock
apt-get -y install python3-future
pip3 install keras_preprocessing --no-deps
cd /tmp
# Set TensorFlow version number, root and install directories
export TENSORFLOW_VERSION='2.5.1'
# Download, build, and install TensorFlow
git clone https://github.com/tensorflow/tensorflow.git
cd tensorflow
git checkout "v${TENSORFLOW_VERSION}"
export PYTHON_BIN_PATH='/usr/bin/python3'
export PYTHON_LIB_PATH='/usr/local/lib/python3.6/dist-packages'
export TF_ENABLE_XLA=1
export TF_NEED_OPENCL_SYCL=0
export TF_NEED_ROCM=0
export TF_NEED_CUDA=1
export TF_NEED_TENSORRT=0
export TF_CUDA_COMPUTE_CAPABILITIES='7.0'
export TF_CUDA_CLANG=0
export TF_NEED_MPI=1
export MPI_HOME="${OPENMPI_INSTALL_DIR}"
export CC_OPT_FLAGS='-mtune=generic'
export TF_SET_ANDROID_WORKSPACE=0
bazel build --local_ram_resources 8192 --local_cpu_resources 4 -c opt --config=cuda --config=numa --config=v2 --verbose_failures //tensorflow/tools/pip_package:build_pip_package
bazel-bin/tensorflow/tools/pip_package/build_pip_package tensorflow_pkg
pip3 install "tensorflow_pkg/tensorflow-${TENSORFLOW_VERSION}-cp36-cp36m-linux_x86_64.whl"
cd /tmp
# Install Horovod with TensorFlow + NVIDIA GPU support
export HOROVOD_VERSION=0.22.1
export HOROVOD_WITHOUT_PYTORCH=1
export HOROVOD_WITHOUT_MXNET=1
export HOROVOD_WITH_TENSORFLOW=1
export HOROVOD_WITH_MPI=1
export HOROVOD_WITH_GLOO=1
export HOROVOD_GPU=CUDA
export HOROVOD_CUDA_HOME="${CUDA_HOME}"
export HOROVOD_BUILD_CUDA_CC_LIST=70
export HOROVOD_GPU_OPERATIONS=NCCL
export HOROVOD_NCCL_INCLUDE="${NCCL_INCLUDE}"
export HOROVOD_NCCL_LIB="${NCCL_LIB}"
git clone --recursive https://github.com/horovod/horovod.git
cd horovod
git checkout "v${HOROVOD_VERSION}"
pip3 setup.py sdist
ldconfig /usr/local/cuda/targets/x86_64-linux/lib/stubs
pip3 install horovod
ldconfig
#pip3 install horovod=="${HOROVOD_VERSION}"
# Install TensorFlow tools, extentions, and related packages.
pip3 install tensorflow-datasets==1.2.0
pip3 install tensorflow-metadata==0.12.1
pip3 install tensorflow-model-optimization==0.5.0
pip3 install tensorflow-probability==0.7.0
pip3 install tensorflow-hub==0.11.0
pip3 install tensorflow-gan==2.0.0
# Install TensorFlow Profiler
pip3 install tensorboard-plugin-profile==2.5.0
cd /tmp
# Install tf2onnx
git clone https://github.com/onnx/tensorflow-onnx
cd tensorflow-onnx
git checkout v1.9.2
python3 setup.py install
# Install JupyterLab
pip3 install jupyter
pip3 install jupyterlab
pip3 install --upgrade --force jupyter-console
# Cleanup
apt-get -y autoremove --purge
apt-get -y clean
# Update database for mlocate
updatedb
%files
%runscript
%test
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment