diff --git a/README.md b/README.md index ba52dbe8d58691a3d129f37ce632577d0e33263c..1eceb4ac12f379078d1127d6c2a68cab1ff96a6c 100644 --- a/README.md +++ b/README.md @@ -60,7 +60,7 @@ University of California, San Diego ## Version -1.9.7 +1.9.8 ## Last Updated diff --git a/definition-files/tensorflow/Singularity.tensorflow-2.5.1-ubuntu-18.04-cuda-11.2-mlnx-ofed-4.7-3.2.9.0-openmpi-4.0.5 b/definition-files/tensorflow/Singularity.tensorflow-2.5.1-ubuntu-18.04-cuda-11.2-mlnx-ofed-4.7-3.2.9.0-openmpi-4.0.5 new file mode 100644 index 0000000000000000000000000000000000000000..5a6b0dddf92bcfe30641fecf92a94712e4a2c4d4 --- /dev/null +++ b/definition-files/tensorflow/Singularity.tensorflow-2.5.1-ubuntu-18.04-cuda-11.2-mlnx-ofed-4.7-3.2.9.0-openmpi-4.0.5 @@ -0,0 +1,251 @@ +Bootstrap: oras +From: ghcr.io/mkandes/naked-singularity:ubuntu-18.04-cuda-11.2-mlnx-ofed-4.7-3.2.9.0-openmpi-4.0.5 + +%labels + + APPLICATION_NAME tensorflow + APPLICATION_VERSION 2.5.1 + APPLICATION_URL https://www.tensorflow.org + + AUTHOR_NAME Marty Kandes + AUTHOR_EMAIL mkandes@sdsc.edu + + LAST_UPDATED 20211002 + +%setup + +%environment + +%post -c /bin/bash + + # Set operating system mirror URL + export MIRRORURL='http://us.archive.ubuntu.com/ubuntu' + + # Set operating system version + export OSVERSION='bionic' + + # Set system locale + export LC_ALL='C' + + # Set debian frontend interface + export DEBIAN_FRONTEND='noninteractive' + + # Set NVIDIA driver and CUDA versions + export CUDA_DRIVER_MAJOR='460' + export CUDA_DRIVER_MINOR='32' + export CUDA_DRIVER_REVISION='03' + export CUDA_DRIVER_VERSION="${CUDA_DRIVER_MAJOR}.${CUDA_DRIVER_MINOR}.${CUDA_DRIVER_REVISION}" + + export CUDA_MAJOR='11' + export CUDA_MINOR='2' + export CUDA_REVISION='2' + export CUDA_VERSION="${CUDA_MAJOR}.${CUDA_MINOR}.${CUDA_REVISION}" + + export CUDNN_MAJOR='8' + export CUDNN_MINOR='1' + export CUDNN_REVISION='1.33' + export CUDNN_VERSION="${CUDNN_MAJOR}.${CUDNN_MINOR}.${CUDNN_REVISION}" + + export NCCL_MAJOR='2' + export NCCL_MINOR='8' + export NCCL_REVISION='4' + export NCCL_VERSION="${NCCL_MAJOR}.${NCCL_MINOR}.${NCCL_REVISION}" + + export TENSORRT_MAJOR='8' + export TENSORRT_MINOR='0' + export TENSORRT_REVISION='3' + export TENSORRT_VERSION="${TENSORRT_MAJOR}.${TENSORRT_MINOR}.${TENSORRT_REVISION}" + + # Set paths to CUDA binaries and libraries + export CUDA_HOME="/usr/local/cuda-${CUDA_MAJOR}.${CUDA_MINOR}" + + export CUDNN_INCLUDE='/usr/include' + export CUDNN_LIB='/usr/lib/x86_64-linux-gnu' + + export NCCL_INCLUDE='/usr/include' + export NCCL_LIB='/usr/lib/x86_64-linux-gnu' + + export TENSORRT_INCLUDE='/usr/include' + export TENSORRT_LIB='/usr/lib/x86_64-linux-gnu' + + export PATH="${CUDA_HOME}/bin${PATH:+:${PATH}}" + export LD_LIBRARY_PATH="${CUDA_HOME}/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}" + export LD_LIBRARY_PATH="${CUDNN_LIB}:${LD_LIBRARY_PATH}" + export LD_LIBRARY_PATH="${NCCL_LIB}:${LD_LIBRARY_PATH}" + export LD_LIBRARY_PATH="${TENSORRT_LIB}:${LD_LIBRARY_PATH}" + + # Set Mellanox OFED version, operating system, and hardware platform + export MLNX_ROOT_URL='http://www.mellanox.com/downloads/ofed' + export MLNX_OFED_VERSION='4.7-3.2.9.0' + export MLNX_OS_VERSION='ubuntu18.04' + export MLNX_PLATFORM='x86_64' + + # Set OpenMPI major, minor, and revision numbers, root and + # installation directories + export OMPI_ROOT_URL='https://download.open-mpi.org/release/open-mpi' + export OMPI_MAJOR='4' + export OMPI_MINOR='0' + export OMPI_REVISION='5' + export OMPI_VERSION="${OMPI_MAJOR}.${OMPI_MINOR}.${OMPI_REVISION}" + export OMPI_ROOT_DIR='/opt/openmpi' + export OMPI_INSTALL_DIR="${OMPI_ROOT_DIR}/${OMPI_VERSION}" + + # Set paths to OpenMPI binaries and libraries + export PATH="${OMPI_INSTALL_DIR}/bin:${PATH}" + export LD_LIBRARY_PATH="${OMPI_INSTALL_DIR}/lib:${LD_LIBRARY_PATH}" + + # DO NOT Upgrade all packages to their latest versions + # apt-get -y update && apt-get -y upgrade + + # Install python3 and some common core packages + apt-get -y install python3-dev + apt-get -y install python3-pip + apt-get -y install python3-setuptools + apt-get -y install python3-virtualenv + + # Install Bazel dependencies and prerequisites + apt-get -y install g++ + apt-get -y install unzip + apt-get -y install zip + apt-get -y install git + apt-get -y install openjdk-11-jdk + + cd /tmp + + # Set Bazel version number, root and installation directories + export BAZEL_VERSION='3.7.2' + export BAZEL_ROOT_DIR='/tmp/bazel' + export BAZEL_INSTALL_DIR="${BAZEL_ROOT_DIR}/${BAZEL_VERSION}" + + # Download, build, and install Bazel + mkdir -p "${BAZEL_INSTALL_DIR}" + cd "${BAZEL_INSTALL_DIR}" + wget "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-dist.zip" + unzip -o "bazel-${BAZEL_VERSION}-dist.zip" + export EXTRA_BAZEL_ARGS="--host_javabase=@local_jdk//:jdk" + rm /usr/bin/python + ln -s /usr/bin/python3 /usr/bin/python + ./compile.sh + + # Set path to Bazel + export PATH="${BAZEL_INSTALL_DIR}/output:${PATH}" + + # Install common mathematical and scientific python3 packages + apt-get -y install python3-numpy + apt-get -y install python3-scipy + apt-get -y install python3-pandas + apt-get -y install python3-sklearn + apt-get -y install python3-skimage + apt-get -y install python3-statsmodels + apt-get -y install python3-matplotlib + apt-get -y install python3-plotly + apt-get -y install python3-seaborn + apt-get -y install python3-opencv + apt-get -y install python3-astropy + apt-get -y install python3-biopython + apt-get -y install python3-h5py + apt-get -y install python3-nltk + apt-get -y install python3-sympy + apt-get -y install python3-netcdf4 + + # Install mpi4py + pip3 install mpi4py==3.0.3 + + # Install additioanl dependencies for TensorFlow + apt-get -y install python3-six + apt-get -y install python3-wheel + apt-get -y install python3-mock + apt-get -y install python3-future + + pip3 install keras_preprocessing --no-deps + + cd /tmp + + # Set TensorFlow version number, root and install directories + export TENSORFLOW_VERSION='2.5.1' + + # Download, build, and install TensorFlow + git clone https://github.com/tensorflow/tensorflow.git + cd tensorflow + git checkout "v${TENSORFLOW_VERSION}" + + export PYTHON_BIN_PATH='/usr/bin/python3' + export PYTHON_LIB_PATH='/usr/local/lib/python3.6/dist-packages' + export TF_ENABLE_XLA=1 + export TF_NEED_OPENCL_SYCL=0 + export TF_NEED_ROCM=0 + export TF_NEED_CUDA=1 + export TF_NEED_TENSORRT=0 + export TF_CUDA_COMPUTE_CAPABILITIES='7.0' + export TF_CUDA_CLANG=0 + export TF_NEED_MPI=1 + export MPI_HOME="${OPENMPI_INSTALL_DIR}" + export CC_OPT_FLAGS='-mtune=generic' + export TF_SET_ANDROID_WORKSPACE=0 + + bazel build --local_ram_resources 8192 --local_cpu_resources 4 -c opt --config=cuda --config=numa --config=v2 --verbose_failures //tensorflow/tools/pip_package:build_pip_package + bazel-bin/tensorflow/tools/pip_package/build_pip_package tensorflow_pkg + pip3 install "tensorflow_pkg/tensorflow-${TENSORFLOW_VERSION}-cp36-cp36m-linux_x86_64.whl" + + cd /tmp + + # Install Horovod with TensorFlow + NVIDIA GPU support + export HOROVOD_VERSION=0.22.1 + export HOROVOD_WITHOUT_PYTORCH=1 + export HOROVOD_WITHOUT_MXNET=1 + export HOROVOD_WITH_TENSORFLOW=1 + export HOROVOD_WITH_MPI=1 + export HOROVOD_WITH_GLOO=1 + export HOROVOD_GPU=CUDA + export HOROVOD_CUDA_HOME="${CUDA_HOME}" + export HOROVOD_BUILD_CUDA_CC_LIST=70 + export HOROVOD_GPU_OPERATIONS=NCCL + export HOROVOD_NCCL_INCLUDE="${NCCL_INCLUDE}" + export HOROVOD_NCCL_LIB="${NCCL_LIB}" + + git clone --recursive https://github.com/horovod/horovod.git + cd horovod + git checkout "v${HOROVOD_VERSION}" + pip3 setup.py sdist + ldconfig /usr/local/cuda/targets/x86_64-linux/lib/stubs + pip3 install horovod + ldconfig + + #pip3 install horovod=="${HOROVOD_VERSION}" + + # Install TensorFlow tools, extentions, and related packages. + pip3 install tensorflow-datasets==1.2.0 + pip3 install tensorflow-metadata==0.12.1 + pip3 install tensorflow-model-optimization==0.5.0 + pip3 install tensorflow-probability==0.7.0 + pip3 install tensorflow-hub==0.11.0 + pip3 install tensorflow-gan==2.0.0 + + # Install TensorFlow Profiler + pip3 install tensorboard-plugin-profile==2.5.0 + + cd /tmp + + # Install tf2onnx + git clone https://github.com/onnx/tensorflow-onnx + cd tensorflow-onnx + git checkout v1.9.2 + python3 setup.py install + + # Install JupyterLab + pip3 install jupyter + pip3 install jupyterlab + pip3 install --upgrade --force jupyter-console + + # Cleanup + apt-get -y autoremove --purge + apt-get -y clean + + # Update database for mlocate + updatedb + +%files + +%runscript + +%test