values = [1,2,3,4,5,6,7,8,9];


def part_sum(values,begin,end):
    sum = 0
    for i in range(int(begin),int(end)):
        sum += values[i]
    return sum


import concurrent.futures as f

executor = f.ThreadPoolExecutor(max_workers=4)

f1 = executor.submit(part_sum,values,0,len(values)/2)
f2 = executor.submit(part_sum,values,len(values)/2,len(values))

result = f1.result() + f2.result()
print(result)

45


sum=0
for value in values:
    sum += value
print(sum)

45

pip3 install joblib


from joblib import Parallel, delayed
import numpy as np


def assing(i):
    matrix[i,i-1] = -2;
    matrix[i,i] = 4;
    matrix[i,i+1] = -2;


n = 10
matrix = np.zeros([n, n])
h = 0.1

matrix[0,0] = 1


Parallel(n_jobs=2,prefer="threads")(delayed(assing)(i) for i in range(1,len(matrix)-1))

matrix[n-1,n-1] = 3*h;
matrix[n-1,n-2] = -4*h;    
matrix[n-1,n-3] = h;

matrix *= 1./(2*h*h);

module load openmpi
pip3 install mpi4py


from mpi4py import MPI
import sys


def print_hello(rank, size, name):
     msg = "Hello World! I am process {0} of {1} on {2}.\n"
     sys.stdout.write(msg.format(rank, size, name))


if __name__ == "__main__":
     size = MPI.COMM_WORLD.Get_size()
     rank = MPI.COMM_WORLD.Get_rank()
     name = MPI.Get_processor_name()
   
     print_hello(rank, size, name)

Hello World! I am process 0 of 1 on Patricks-Air.attlocal.net.


values = [1,2,3,4,5,6,7,8,9]


size = MPI.COMM_WORLD.Get_size()
rank = MPI.COMM_WORLD.Get_rank()
comm = MPI.COMM_WORLD


partition = round(len(values)/size)

begin = rank * partition
end = -1
if rank == size-1:
    end = len(values)
else:
    end = (rank+1) * partition


sum = 0

for i in range(int(begin),int(end)):
    sum += values[i]


if rank != 0 :
      req = comm.isend(sum,dest=0,tag=rank)
      req.wait()
else:
      total_sum = 0
      data = []
      for i in range(1,size):
          data.append(comm.irecv(source=i,tag=i))
      for d in data: 
          total_sum += d.wait()
      total_sum += sum
      print(total_sum)

45

Parallel programming in Python¶

Asynchronous programming¶

Define a function to compute the partial sum¶

Launch concurrent tasks two compute the partial sums¶

Check the result using a serial sum¶

Lightweight pipelining with Python functions (joblib)¶

Assemble the stiffness matrix using finite differences¶

Distributed programming in Python using MPI¶

Distributed hello world¶

Compute the sum on multiple compute nodes¶

Partitioning of the data¶

Computation on each node¶

Sending the partial sum to rank 0¶