Skip to content
Snippets Groups Projects
Commit ca1f6aed authored by Marty Kandes's avatar Marty Kandes
Browse files

Update TensorFlow to v2.5.0

parent 80d3ca9c
No related branches found
No related tags found
No related merge requests found
......@@ -10,7 +10,7 @@ From: mkandes/naked-singularity:ubuntu-18.04-cuda-11.2-openmpi-4.0.5
AUTHOR_NAME Marty Kandes
AUTHOR_EMAIL mkandes@sdsc.edu
LAST_UPDATED 20200605
LAST_UPDATED 20200617
%setup
......@@ -43,10 +43,19 @@ From: mkandes/naked-singularity:ubuntu-18.04-cuda-11.2-openmpi-4.0.5
# Set debian frontend interface
export DEBIAN_FRONTEND='noninteractive'
# Set CUDA version
# Set NVIDIA driver and CUDA versions
export CUDA_DRIVER_VERSION='460'
export CUDA_MAJOR='11'
export CUDA_MINOR='2'
export CUDA_VERSION="${CUDA_MAJOR}.${CUDA_MINOR}"
export CUDNN_MAJOR='8'
export CUDNN_MINOR='1'
export CUDNN_REVISION='0.77'
export CUDNN_VERSION="${CUDNN_MAJOR}.${CUDNN_MINOR}.${CUDNN_REVISION}"
export NCCL_MAJOR='2'
export NCCL_MINOR='8'
export NCCL_REVISION='4'
export NCCL_VERSION="${NCCL_MAJOR}.${NCCL_MINOR}.${NCCL_REVISION}"
# Set paths to CUDA binaries and libraries
export CUDA_HOME="/usr/local/cuda-${CUDA_VERSION}"
......@@ -139,107 +148,67 @@ From: mkandes/naked-singularity:ubuntu-18.04-cuda-11.2-openmpi-4.0.5
pip3 install keras_preprocessing --no-deps
# Set TensorFlow version number, root and install directories
# export TENSORFLOW_VERSION='2.5.0'
# export TENSORFLOW_ROOT_DIR='/opt/tensorflow'
# export TENSORFLOW_INSTALL_DIR="${TENSORFLOW_ROOT_DIR}/${TENSORFLOW_VERSION}"
#
# # Download, build, and install TensorFlow
# mkdir -p "${TENSORFLOW_INSTALL_DIR}"
# cd "${TENSORFLOW_INSTALL_DIR}"
# git clone https://github.com/tensorflow/tensorflow.git
# cd tensorflow
# git checkout "v${TENSORFLOW_VERSION}"
#
# export PYTHON_BIN_PATH='/usr/bin/python3'
# export PYTHON_LIB_PATH='/usr/local/lib/python3.6/dist-packages'
# export TF_ENABLE_XLA=1
# export TF_NEED_OPENCL_SYCL=0
# export TF_NEED_ROCM=0
# export TF_NEED_CUDA=1
# export TF_NEED_TENSORRT=0
# #export TF_CUDA_COMPUTE_CAPABILITIES='3.5,3.7,5.2,6.0,6.1,7.0,7.5'
# export TF_CUDA_COMPUTE_CAPABILITIES='3.7,6.0,7.0'
# export TF_CUDA_CLANG=0
# export TF_NEED_MPI=1
# export MPI_HOME="${OPENMPI_INSTALL_DIR}"
# export CC_OPT_FLAGS='-mtune=generic'
# export TF_SET_ANDROID_WORKSPACE=0
#
# #bazel build --local_ram_resources 2048 --local_cpu_resources 1 -c opt --config=cuda --config=numa --config=v2 --verbose_failures //tensorflow/tools/pip_package:build_pip_package
# bazel build -c opt --config=cuda --config=numa --config=v2 --verbose_failures //tensorflow/tools/pip_package:build_pip_package
# bazel-bin/tensorflow/tools/pip_package/build_pip_package tensorflow_pkg
# pip3 install tensorflow_pkg/tensorflow-2.5.0-cp36-cp36m-linux_x86_64.whl
# Build is currently failing with following error.
#Starting local Bazel server and connecting to it...
#WARNING: The following configs were expanded more than once: [v2]. For repeatable flags, repeats are counted twice and may lead to unexpected behavior.
#INFO: Options provided by the client:
# Inherited 'common' options: --isatty=1 --terminal_columns=80
#INFO: Reading rc options for 'build' from /opt/tensorflow/2.5.0/tensorflow/.bazelrc:
# Inherited 'common' options: --experimental_repo_remote_exec
#INFO: Reading rc options for 'build' from /opt/tensorflow/2.5.0/tensorflow/.bazelrc:
# 'build' options: --define framework_shared_object=true --java_toolchain=@tf_toolchains//toolchains/java:tf_java_toolchain --host_java_toolchain=@tf_toolchains//toolchains/java:tf_java_toolchain --define=use_fast_cpp_protos=true --define=allow_oversize_protos=true --spawn_strategy=standalone -c opt --announce_rc --define=grpc_no_ares=true --noincompatible_remove_legacy_whole_archive --noincompatible_prohibit_aapt1 --enable_platform_specific_config --define=with_xla_support=true --config=short_logs --config=v2
#INFO: Found applicable config definition build:short_logs in file /opt/tensorflow/2.5.0/tensorflow/.bazelrc: --output_filter=DONT_MATCH_ANYTHING
#INFO: Found applicable config definition build:v2 in file /opt/tensorflow/2.5.0/tensorflow/.bazelrc: --define=tf_api_version=2 --action_env=TF2_BEHAVIOR=1
#INFO: Found applicable config definition build:cuda in file /opt/tensorflow/2.5.0/tensorflow/.bazelrc: --repo_env TF_NEED_CUDA=1 --crosstool_top=@local_config_cuda//crosstool:toolchain --@local_config_cuda//:enable_cuda
#INFO: Found applicable config definition build:numa in file /opt/tensorflow/2.5.0/tensorflow/.bazelrc: --define=with_numa_support=true
#INFO: Found applicable config definition build:v2 in file /opt/tensorflow/2.5.0/tensorflow/.bazelrc: --define=tf_api_version=2 --action_env=TF2_BEHAVIOR=1
#INFO: Found applicable config definition build:linux in file /opt/tensorflow/2.5.0/tensorflow/.bazelrc: --copt=-w --host_copt=-w --define=PREFIX=/usr --define=LIBDIR=$(PREFIX)/lib --define=INCLUDEDIR=$(PREFIX)/include --define=PROTOBUF_INCLUDE_PATH=$(PREFIX)/include --cxxopt=-std=c++14 --host_cxxopt=-std=c++14 --config=dynamic_kernels
#INFO: Found applicable config definition build:dynamic_kernels in file /opt/tensorflow/2.5.0/tensorflow/.bazelrc: --define=dynamic_loaded_kernels=true --copt=-DAUTOLOAD_DYNAMIC_KERNELS
#DEBUG: /root/.cache/bazel/_bazel_root/62c1e32d266c8fdbd78a7dac0f001dcf/external/bazel_tools/tools/cpp/lib_cc_configure.bzl:118:10:
#Auto-Configuration Warning: 'TMP' environment variable is not set, using 'C:\Windows\Temp' as default
#DEBUG: Rule 'io_bazel_rules_docker' indicated that a canonical reproducible form can be obtained by modifying arguments shallow_since = "1556410077 -0400"
#DEBUG: Repository io_bazel_rules_docker instantiated at:
# /opt/tensorflow/2.5.0/tensorflow/WORKSPACE:23:14: in <toplevel>
# /opt/tensorflow/2.5.0/tensorflow/tensorflow/workspace0.bzl:105:34: in workspace
# /root/.cache/bazel/_bazel_root/62c1e32d266c8fdbd78a7dac0f001dcf/external/bazel_toolchains/repositories/repositories.bzl:37:23: in repositories
#Repository rule git_repository defined at:
# /root/.cache/bazel/_bazel_root/62c1e32d266c8fdbd78a7dac0f001dcf/external/bazel_tools/tools/build_defs/repo/git.bzl:199:33: in <toplevel>
#INFO: Analyzed target //tensorflow/tools/pip_package:build_pip_package (428 packages loaded, 36893 targets configured).
#INFO: Found 1 target...
#ERROR: /opt/tensorflow/2.5.0/tensorflow/tensorflow/compiler/mlir/tensorflow/BUILD:390:15: C++ compilation of rule '//tensorflow/compiler/mlir/tensorflow:tensorflow_ops_n_z' failed (Exit 4): crosstool_wrapper_driver_is_not_gcc failed: error executing command
# (cd /root/.cache/bazel/_bazel_root/62c1e32d266c8fdbd78a7dac0f001dcf/execroot/org_tensorflow && \
# exec env - \
# LD_LIBRARY_PATH=/opt/openmpi/4.0.5/lib:/usr/local/cuda-11.2/lib64:/.singularity.d/libs \
# PATH=/opt/bazel/3.7.2/output:/opt/openmpi/4.0.5/bin:/usr/local/cuda-11.2/bin:/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin \
# PWD=/proc/self/cwd \
#
# cd "${TENSORFLOW_INSTALL_DIR}"
#
# # Download, build and install TensorFlow Addons
# git clone https://github.com/tensorflow/addons.git
# cd addons
# git checkout v0.11.2
#
# export TF_NEED_CUDA=1
# export TF_CUDA_VERSION="${CUDA_VERSION}"
# export TF_CUDNN_VERSION=7
# export CUDA_TOOLKIT_PATH="${CUDA_HOME}"
# export CUDNN_INSTALL_PATH='/usr/lib/x86_64-linux-gnu'
#
# python3 ./configure.py
#
# rm /usr/bin/python
# ln -s /usr/bin/python3 /usr/bin/python
#
# bazel build --enable_runfiles build_pip_pkg
# bazel-bin/build_pip_pkg artifacts
# pip3 install artifacts/tensorflow_addons-*.whl
#
# cd "${TENSORFLOW_INSTALL_DIR}"
#
# # Download and install TensorFlow Models
# git clone https://github.com/tensorflow/models.git
# cd models
# git checkout "v${TENSORFLOW_VERSION}"
# export PYTHONPATH="${TENSORFLOW_INSTALL_DIR}/models:${PYTHONPATH}"
export TENSORFLOW_VERSION='2.5.0'
export TENSORFLOW_ROOT_DIR='/opt/tensorflow'
export TENSORFLOW_INSTALL_DIR="${TENSORFLOW_ROOT_DIR}/${TENSORFLOW_VERSION}"
# Download, build, and install TensorFlow
mkdir -p "${TENSORFLOW_INSTALL_DIR}"
cd "${TENSORFLOW_INSTALL_DIR}"
git clone https://github.com/tensorflow/tensorflow.git
cd tensorflow
git checkout "v${TENSORFLOW_VERSION}"
export PYTHON_BIN_PATH='/usr/bin/python3'
export PYTHON_LIB_PATH='/usr/local/lib/python3.6/dist-packages'
export TF_ENABLE_XLA=1
export TF_NEED_OPENCL_SYCL=0
export TF_NEED_ROCM=0
export TF_NEED_CUDA=1
export TF_NEED_TENSORRT=0
export TF_CUDA_COMPUTE_CAPABILITIES='3.5,3.7,5.2,6.0,6.1,7.0,7.5,8.0,8.6'
export TF_CUDA_CLANG=0
export TF_NEED_MPI=1
export MPI_HOME="${OPENMPI_INSTALL_DIR}"
export CC_OPT_FLAGS='-mtune=generic'
export TF_SET_ANDROID_WORKSPACE=0
bazel build --local_ram_resources 2048 --local_cpu_resources 1 -c opt --config=cuda --config=numa --config=v2 --verbose_failures //tensorflow/tools/pip_package:build_pip_package
bazel-bin/tensorflow/tools/pip_package/build_pip_package tensorflow_pkg
pip3 install tensorflow_pkg/tensorflow-2.5.0-cp36-cp36m-linux_x86_64.whl
cd "${TENSORFLOW_INSTALL_DIR}"
# Download, build and install TensorFlow Addons
git clone https://github.com/tensorflow/addons.git
cd addons
git checkout v0.13.0
export TF_NEED_CUDA=1
export TF_CUDA_VERSION="${CUDA_VERSION}"
export TF_CUDNN_VERSION="${CUDNN_MAJOR}"
export CUDA_TOOLKIT_PATH="${CUDA_HOME}"
export CUDNN_INSTALL_PATH='/usr/lib/x86_64-linux-gnu'
python3 ./configure.py
rm /usr/bin/python
ln -s /usr/bin/python3 /usr/bin/python
bazel build --enable_runfiles build_pip_pkg
bazel-bin/build_pip_pkg artifacts
pip3 install artifacts/tensorflow_addons-*.whl
cd "${TENSORFLOW_INSTALL_DIR}"
# Download and install TensorFlow Models
git clone https://github.com/tensorflow/models.git
cd models
git checkout "v${TENSORFLOW_VERSION}"
export PYTHONPATH="${TENSORFLOW_INSTALL_DIR}/models:${PYTHONPATH}"
#sed -i 's/kaggle>=1.3.9/#kaggle>=1.3.9/' official/requirements.txt
# pip3 install scikit-build
# pip3 install -r official/requirements.txt
pip3 install scikit-build
pip3 install -r official/requirements.txt
# Cleanup
apt-get -y autoremove --purge
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment