diff --git a/definition-files/MPI/Singularity.miniconda3-py39-4.9.2-ubuntu-18.04-OMPI b/definition-files/MPI/Singularity.miniconda3-py39-4.9.2-ubuntu-18.04-OMPI new file mode 100644 index 0000000000000000000000000000000000000000..f8557eda412382d992ba17cd277c396a71436af5 --- /dev/null +++ b/definition-files/MPI/Singularity.miniconda3-py39-4.9.2-ubuntu-18.04-OMPI @@ -0,0 +1,91 @@ +# Bootstrap: library +# From: mblaschek/imgw/ubuntu:18.04 +Bootstrap: localimage +From: ubuntu.sif + +%labels + + APPLICATION_NAME miniconda3 + APPLICATION_VERSION py39-4.9.2-Linux-x86_64 + APPLICATION_URL https://docs.conda.io + + AUTHOR_NAME Michael Blaschek + AUTHOR_EMAIL michael.blaschek@univie.ac.at + + LAST_UPDATED 20211118 + +%setup + +%environment + + # Set the conda distribution type, its version number, the python + # version it utilizes, the root and installation directories where + # the distribution will be installed within the container, and the + # root URL to the installer + export CONDA_DISTRIBUTION='miniconda' + export CONDA_VERSION='3' + export CONDA_PYTHON_VERSION='py39' + export CONDA_INSTALLER_VERSION='4.9.2' + export CONDA_ARCH='Linux-x86_64' + export CONDA_INSTALL_DIR="/opt/${CONDA_DISTRIBUTION}${CONDA_VERSION}" + + # Set PATH to conda distribution + export PATH="${CONDA_INSTALL_DIR}/bin:${PATH}" + +%post -c /bin/bash + + # Set operating system mirror URL + export MIRRORURL='http://at.archive.ubuntu.com/ubuntu' + + # Set operating system version + export OSVERSION='bionic' + + # Set system locale + export LC_ALL='C' + + # Set debian frontend interface + export DEBIAN_FRONTEND='noninteractive' + + # Upgrade all software packages to their latest versions + apt-get -y update && apt-get -y upgrade + + cd /tmp + + # Set the conda distribution type, its version number, the python + # version it utilizes, the root and installation directories where + # the distribution will be installed within the container, and the + # root URL to the installer + export CONDA_DISTRIBUTION='miniconda' + export CONDA_VERSION='3' + export CONDA_PYTHON_VERSION='py39' + export CONDA_INSTALLER_VERSION='4.9.2' + export CONDA_ARCH='Linux-x86_64' + export CONDA_INSTALLER="${CONDA_DISTRIBUTION^}${CONDA_VERSION}-${CONDA_PYTHON_VERSION}_${CONDA_INSTALLER_VERSION}-${CONDA_ARCH}.sh" + export CONDA_INSTALL_DIR="/opt/${CONDA_DISTRIBUTION}${CONDA_VERSION}" + export CONDA_ROOT_URL='https://repo.anaconda.com' + + # Download and install conda distribution + wget "${CONDA_ROOT_URL}/${CONDA_DISTRIBUTION}/${CONDA_INSTALLER}" + chmod +x "${CONDA_INSTALLER}" + "./${CONDA_INSTALLER}" -b -p "${CONDA_INSTALL_DIR}" + + # Remove conda installer + rm "${CONDA_INSTALLER}" + + # Add MPI Package from conda-forge + # ucx + # openmpi + $CONDA_INSTALL_DIR/bin/conda install -y -c conda-forge ucx openmpi mpi4py + + # Cleanup + apt-get -y autoremove --purge + apt-get -y clean + + # Update database for mlocate + updatedb + +%files + +%runscript + +%test diff --git a/workshop/MPI/README.md b/workshop/MPI/README.md new file mode 100644 index 0000000000000000000000000000000000000000..2671c0d5ec6e5cfebc24522613b51008983061d8 --- /dev/null +++ b/workshop/MPI/README.md @@ -0,0 +1,64 @@ +# MPI Tests with Python + +Based on a course from Ivan Kondov in [VSC Trainigs](https://gitlab.phaidra.org/imgw/trainings-course/-/blob/master/HPC%20with%20Python/docs/03_parallel_part_1.md#exercise-3-installation-and-setup-of-the-mpi4py-package) there are some easy tests that can be run with singularity containers and MPI. + +This requires to use the container built by the definition file: +[`definition-files/MPI/Singularity.miniconda3-py39-4.9.2-ubuntu-18.04-OMPI`](../../definition-files/MPI/Singularity.miniconda3-py39-4.9.2-ubuntu-18.04-OMPI) + + +## Example mpi4py + +There are some builtin tests witht mpi4py package to test its functionality, e.g. the ringtest + +```bash +mpirun -np 4 singularity exec miniconda3-ompi.sif python -m mpi4py.bench ringtest -n 1024 -l 1000 +time for 1000 loops = 0.00760765 seconds (4 processes, 1024 bytes) +``` +or hello world +```bash +mpirun -np 4 singularity exec miniconda3-ompi.sif python -m mpi4py.bench helloworld +Hello, World! I am process 0 of 4 on manjaro. +Hello, World! I am process 1 of 4 on manjaro. +Hello, World! I am process 2 of 4 on manjaro. +Hello, World! I am process 3 of 4 on manjaro. +``` + + +## Example Pi Statistics +based on an example from [Cornell University](https://cvw.cac.cornell.edu/python/exercise) - Monte Carlo with mpi4py + + +Randomly thrown darts: red dots are those darts that land in the unit circle, and blue dots are those that do not. + +Fortunately, even though you are not a very good dart thrower, you are a good random number generator, and you can put those skills to work to estimate the numerical value of pi — the ratio of the circumference of a circle to its diameter. + +```bash +# Install additional python packages not installed in our container +# Will be installed to .local/lib/python3.9/site-packages/ +singularity exec miniconda3-ompi.sif python -m pip install numpy matplotlib +Defaulting to user installation because normal site-packages is not writeable +Collecting matplotlib +... +Successfully installed cycler-0.11.0 fonttools-4.28.2 kiwisolver-1.3.2 matplotlib-3.5.0 numpy-1.21.4 packaging-21.3 pillow-8.4.0 pyparsing-3.0.6 setuptools-scm-6.3.2 tomli-1.2.2 +# Now run the script with MPI +mpirun -np 4 singularity exec miniconda3-ompi.sif python parallel_pi.py +MPI size = 4 +1024 3.1505126953125 0.06090403395917971 +4096 3.1389007568359375 0.025047770509458944 +16384 3.1422119140625 0.012053709369960034 +65536 3.141084671020508 0.006269848947240971 +262144 3.142267942428589 0.0035848687877291755 +1048576 3.1418583393096924 0.0019135112002101906 +4194304 3.1416348814964294 0.0008085763712888695 +16777216 3.1416458263993263 0.0004231117067632255 +67108864 3.141561470925808 0.00018093953356869197 +268435456 3.14160884777084 9.47003093073785e-05 +``` + +Which produces two figures: + +Numerical esimate of pi as a function of how many darts are thrown (log2 scale). Error bars reflect the standard deviation in the estimate over multiple independent runs. + + + +Scaling of the fluctuations in the estimate of pi (log2(standard deviation)) as a function of log2(N) diff --git a/workshop/MPI/log2_std_vs_log2_N.png b/workshop/MPI/log2_std_vs_log2_N.png new file mode 100644 index 0000000000000000000000000000000000000000..f24910d5cea41916a5ce3b5c5fdd30197eb28afe Binary files /dev/null and b/workshop/MPI/log2_std_vs_log2_N.png differ diff --git a/workshop/MPI/parallel_pi.py b/workshop/MPI/parallel_pi.py new file mode 100644 index 0000000000000000000000000000000000000000..c243465b995a5bbf7539b61369e6e6a6c876d76e --- /dev/null +++ b/workshop/MPI/parallel_pi.py @@ -0,0 +1,123 @@ +from __future__ import print_function, division +""" +An estimate of the numerical value of pi via Monte Carlo integration. +Computation is distributed across processors via MPI. +""" + +import numpy as np +from mpi4py import MPI +import matplotlib +matplotlib.use('Agg') +import matplotlib.pyplot as plt +import sys + + +def throw_darts(n): + """ + returns an array of n uniformly random (x,y) pairs lying within the + square that circumscribes the unit circle centered at the origin, + i.e., the square with corners at (-1,-1), (-1,1), (1,1), (1,-1) + """ + darts = 2*np.random.random((n,2)) - 1 + return darts + +def in_unit_circle(p): + """ + returns a boolean array, whose elements are True if the corresponding + point in the array p is within the unit circle centered at the origin, + and False otherwise -- hint: use np.linalg.norm to find the length of a vector + """ + return np.linalg.norm(p,axis=-1)<=1.0 + +def estimate_pi(n, block=100000): + """ + returns an estimate of pi by drawing n random numbers in the square + [[-1,1], [-1,1]] and calculating what fraction land within the unit circle; + in this version, draw random numbers in blocks of the specified size, + and keep a running total of the number of points within the unit circle; + by throwing darts in blocks, we are spared from having to allocate + very large arrays (and perhaps running out of memory), but still can get + good performance by processing large arrays of random numbers + """ + total_number = 0 + i = 0 + while i < n: + if n-i < block: + block = n-i + darts = throw_darts(block) + number_in_circle = np.sum(in_unit_circle(darts)) + total_number += number_in_circle + i += block + return (4.*total_number)/n + +def estimate_pi_in_parallel(comm, N): + """ + on each of the available processes, + calculate an estimate of pi by drawing N random numbers; + the manager process will assemble all of the estimates + produced by all workers, and compute the mean and + standard deviation across the independent runs + """ + + if rank == 0: + data = [N for i in range(size)] + else: + data = None + data = comm.scatter(data, root=0) + # + pi_est = estimate_pi(N) + # + pi_estimates = comm.gather(pi_est, root=0) + if rank == 0: + return pi_estimates + + +def estimate_pi_statistics(comm, Ndarts, Nruns_per_worker): + results = [] + for i in range(Nruns_per_worker): + result = estimate_pi_in_parallel(comm, Ndarts) + if rank == 0: + results.append(result) + if rank == 0: + pi_est_mean = np.mean(results) + pi_est_std = np.std(results) + return pi_est_mean, pi_est_std + +if __name__ == '__main__': + """ + for N from 4**5 to 4**14 (integer powers of 4), + compute mean and standard deviation of estimates of pi + by throwing N darts multiple times (Nruns_total times, + distributed across workers) + """ + comm = MPI.COMM_WORLD + rank = comm.Get_rank() + size = comm.Get_size() + if rank == 0: + print("MPI size = {}".format(size)) + sys.stdout.flush() + Nruns_total = 64 + Nruns_per_worker = Nruns_total // size + # + estimates = [] + for log4N in range(5,15): + N = int(4**log4N) + result = estimate_pi_statistics(comm, N, Nruns_per_worker) + if rank == 0: + pi_est_mean, pi_est_std = result + estimates.append((N, pi_est_mean, pi_est_std)) + print(N, pi_est_mean, pi_est_std) + sys.stdout.flush() + if rank == 0: + estimates = np.array(estimates) + plt.figure() + plt.errorbar(np.log2(estimates[:,0]), estimates[:,1], yerr=estimates[:,2]) + plt.ylabel('estimate of pi') + plt.xlabel('log2(number of darts N)') + plt.savefig('pi_vs_log2_N.png') + plt.figure() + plt.ylabel('log2(standard deviation)') + plt.xlabel('log2(number of darts N)') + plt.plot(np.log2(estimates[:,0]), np.log2(estimates[:,2])) + plt.savefig('log2_std_vs_log2_N.png') + MPI.Finalize() diff --git a/workshop/MPI/pi_vs_log2_N.png b/workshop/MPI/pi_vs_log2_N.png new file mode 100644 index 0000000000000000000000000000000000000000..2132f4eb3e502862c71a9eacde1670006b976dd5 Binary files /dev/null and b/workshop/MPI/pi_vs_log2_N.png differ