Bootstrap: oras From: ghcr.io/mkandes/naked-singularity:ubuntu-18.04-cuda-11.2-mlnx-ofed-4.7-3.2.9.0-openmpi-4.0.5 %labels APPLICATION_NAME tensorflow APPLICATION_VERSION 2.5.1 APPLICATION_URL https://www.tensorflow.org AUTHOR_NAME Marty Kandes AUTHOR_EMAIL mkandes@sdsc.edu LAST_UPDATED 20211019 %setup %environment %post -c /bin/bash # Set operating system mirror URL export MIRRORURL='http://us.archive.ubuntu.com/ubuntu' # Set operating system version export OSVERSION='bionic' # Set system locale export LC_ALL='C' # Set debian frontend interface export DEBIAN_FRONTEND='noninteractive' # Set NVIDIA driver and CUDA versions export CUDA_DRIVER_MAJOR='460' export CUDA_DRIVER_MINOR='32' export CUDA_DRIVER_REVISION='03' export CUDA_DRIVER_VERSION="${CUDA_DRIVER_MAJOR}.${CUDA_DRIVER_MINOR}.${CUDA_DRIVER_REVISION}" export CUDA_MAJOR='11' export CUDA_MINOR='2' export CUDA_REVISION='2' export CUDA_VERSION="${CUDA_MAJOR}.${CUDA_MINOR}.${CUDA_REVISION}" export CUDNN_MAJOR='8' export CUDNN_MINOR='1' export CUDNN_REVISION='1.33' export CUDNN_VERSION="${CUDNN_MAJOR}.${CUDNN_MINOR}.${CUDNN_REVISION}" export NCCL_MAJOR='2' export NCCL_MINOR='8' export NCCL_REVISION='4' export NCCL_VERSION="${NCCL_MAJOR}.${NCCL_MINOR}.${NCCL_REVISION}" export TENSORRT_MAJOR='8' export TENSORRT_MINOR='0' export TENSORRT_REVISION='3' export TENSORRT_VERSION="${TENSORRT_MAJOR}.${TENSORRT_MINOR}.${TENSORRT_REVISION}" # Set paths to CUDA binaries and libraries export CUDA_HOME="/usr/local/cuda-${CUDA_MAJOR}.${CUDA_MINOR}" export CUDNN_INCLUDE='/usr/include' export CUDNN_LIB='/usr/lib/x86_64-linux-gnu' export NCCL_INCLUDE='/usr/include' export NCCL_LIB='/usr/lib/x86_64-linux-gnu' export TENSORRT_INCLUDE='/usr/include' export TENSORRT_LIB='/usr/lib/x86_64-linux-gnu' export PATH="${CUDA_HOME}/bin${PATH:+:${PATH}}" export LD_LIBRARY_PATH="${CUDA_HOME}/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}" export LD_LIBRARY_PATH="${CUDNN_LIB}:${LD_LIBRARY_PATH}" export LD_LIBRARY_PATH="${NCCL_LIB}:${LD_LIBRARY_PATH}" export LD_LIBRARY_PATH="${TENSORRT_LIB}:${LD_LIBRARY_PATH}" # Set Mellanox OFED version, operating system, and hardware platform export MLNX_ROOT_URL='http://www.mellanox.com/downloads/ofed' export MLNX_OFED_VERSION='4.7-3.2.9.0' export MLNX_OS_VERSION='ubuntu18.04' export MLNX_PLATFORM='x86_64' # Set OpenMPI major, minor, and revision numbers, root and # installation directories export OMPI_ROOT_URL='https://download.open-mpi.org/release/open-mpi' export OMPI_MAJOR='4' export OMPI_MINOR='0' export OMPI_REVISION='5' export OMPI_VERSION="${OMPI_MAJOR}.${OMPI_MINOR}.${OMPI_REVISION}" export OMPI_ROOT_DIR='/opt/openmpi' export OMPI_INSTALL_DIR="${OMPI_ROOT_DIR}/${OMPI_VERSION}" # Set paths to OpenMPI binaries and libraries export PATH="${OMPI_INSTALL_DIR}/bin:${PATH}" export LD_LIBRARY_PATH="${OMPI_INSTALL_DIR}/lib:${LD_LIBRARY_PATH}" # DO NOT Upgrade all packages to their latest versions # apt-get -y update && apt-get -y upgrade # Install python3 and some common core packages apt-get -y install python3-dev apt-get -y install python3-pip apt-get -y install python3-setuptools apt-get -y install python3-virtualenv # Install Bazel dependencies and prerequisites apt-get -y install g++ apt-get -y install unzip apt-get -y install zip apt-get -y install git apt-get -y install openjdk-11-jdk cd /tmp # Set Bazel version number, root and installation directories export BAZEL_VERSION='3.7.2' export BAZEL_ROOT_DIR='/tmp/bazel' export BAZEL_INSTALL_DIR="${BAZEL_ROOT_DIR}/${BAZEL_VERSION}" # Download, build, and install Bazel mkdir -p "${BAZEL_INSTALL_DIR}" cd "${BAZEL_INSTALL_DIR}" wget "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-dist.zip" unzip -o "bazel-${BAZEL_VERSION}-dist.zip" export EXTRA_BAZEL_ARGS="--host_javabase=@local_jdk//:jdk" rm /usr/bin/python ln -s /usr/bin/python3 /usr/bin/python ./compile.sh # Set path to Bazel export PATH="${BAZEL_INSTALL_DIR}/output:${PATH}" # Install common mathematical and scientific python3 packages apt-get -y install python3-numpy apt-get -y install python3-scipy apt-get -y install python3-pandas apt-get -y install python3-sklearn apt-get -y install python3-skimage apt-get -y install python3-statsmodels apt-get -y install python3-matplotlib apt-get -y install python3-plotly apt-get -y install python3-seaborn apt-get -y install python3-opencv apt-get -y install python3-astropy apt-get -y install python3-biopython apt-get -y install python3-h5py apt-get -y install python3-nltk apt-get -y install python3-sympy apt-get -y install python3-netcdf4 # Install mpi4py pip3 install mpi4py==3.0.3 # Install additioanl dependencies for TensorFlow apt-get -y install python3-six apt-get -y install python3-wheel apt-get -y install python3-mock apt-get -y install python3-future pip3 install keras_preprocessing --no-deps cd /tmp # Set TensorFlow version number, root and install directories export TENSORFLOW_VERSION='2.5.1' # Download, build, and install TensorFlow git clone https://github.com/tensorflow/tensorflow.git cd tensorflow git checkout "v${TENSORFLOW_VERSION}" export PYTHON_BIN_PATH='/usr/bin/python3' export PYTHON_LIB_PATH='/usr/local/lib/python3.6/dist-packages' export TF_ENABLE_XLA=1 export TF_NEED_OPENCL_SYCL=0 export TF_NEED_ROCM=0 export TF_NEED_CUDA=1 export TF_NEED_TENSORRT=0 export TF_CUDA_COMPUTE_CAPABILITIES='7.0' export TF_CUDA_CLANG=0 export TF_NEED_MPI=1 export MPI_HOME="${OPENMPI_INSTALL_DIR}" export CC_OPT_FLAGS='-mtune=generic' export TF_SET_ANDROID_WORKSPACE=0 bazel build --local_ram_resources 8192 --local_cpu_resources 4 -c opt --config=cuda --config=numa --config=v2 --verbose_failures //tensorflow/tools/pip_package:build_pip_package bazel-bin/tensorflow/tools/pip_package/build_pip_package tensorflow_pkg pip3 install "tensorflow_pkg/tensorflow-${TENSORFLOW_VERSION}-cp36-cp36m-linux_x86_64.whl" cd /tmp # Install Horovod with TensorFlow + NVIDIA GPU support export HOROVOD_VERSION=0.22.1 export HOROVOD_WITHOUT_PYTORCH=1 export HOROVOD_WITHOUT_MXNET=1 export HOROVOD_WITH_TENSORFLOW=1 export HOROVOD_WITH_MPI=1 export HOROVOD_WITH_GLOO=1 export HOROVOD_GPU=CUDA export HOROVOD_CUDA_HOME="${CUDA_HOME}" export HOROVOD_BUILD_CUDA_CC_LIST=70 export HOROVOD_GPU_OPERATIONS=NCCL export HOROVOD_NCCL_INCLUDE="${NCCL_INCLUDE}" export HOROVOD_NCCL_LIB="${NCCL_LIB}" git clone --recursive https://github.com/horovod/horovod.git cd horovod git checkout "v${HOROVOD_VERSION}" python3 setup.py clean python3 setup.py sdist ldconfig /usr/local/cuda/targets/x86_64-linux/lib/stubs pip3 install dist/horovod-*.tar.gz ldconfig # Install TensorFlow tools, extentions, and related packages. pip3 install tensorflow-datasets==1.2.0 pip3 install tensorflow-metadata==0.12.1 pip3 install tensorflow-model-optimization==0.5.0 pip3 install tensorflow-probability==0.7.0 pip3 install tensorflow-hub==0.11.0 pip3 install tensorflow-gan==2.0.0 # Install TensorFlow Profiler pip3 install tensorboard-plugin-profile==2.5.0 cd /tmp # Install tf2onnx git clone https://github.com/onnx/tensorflow-onnx cd tensorflow-onnx git checkout v1.9.2 python3 setup.py install # Install JupyterLab pip3 install jupyter pip3 install jupyterlab pip3 install --upgrade --force jupyter-console # Cleanup apt-get -y autoremove --purge apt-get -y clean # Update database for mlocate updatedb %files %runscript %test