This commit is contained in:
Laureηt 2023-06-23 19:34:09 +02:00
commit f0f362eeee
Signed by: Laurent
SSH key fingerprint: SHA256:kZEpW8cMJ54PDeCvOhzreNr4FSh6R13CMGH/POoO8DI
66 changed files with 12547 additions and 0 deletions

17
BE/01_RingD/Makefile Normal file
View file

@ -0,0 +1,17 @@
MPICC=smpicc
CFLAGS=-g -O4
DIR=01_RingD
SRC=ringd
all: ${SRC}
%.o: %.c
echo $@
$(MPICC) -c -Wall -o $@ $<
${SRC}: ${SRC}.o
$(MPICC) -o $@ $^
clean:
rm -rf *.o ${SRC}

136
BE/01_RingD/ringd.c Normal file
View file

@ -0,0 +1,136 @@
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
int main(int argc, char *argv[])
{
MPI_Init(&argc, &argv);
int comm_size;
MPI_Comm_size(MPI_COMM_WORLD, &comm_size);
if (comm_size % 2 != 0)
{
printf("This application is meant to be run with an even number of MPI processes, not %d.\n", comm_size);
MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
}
// Get my rank in the global communicator
int my_rank;
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
// Determine the colour and key based on whether my rank is even.
char subcommunicator;
int colour;
int key;
if (my_rank % 2 == 0)
{
subcommunicator = 'E';
colour = 0;
key = my_rank;
}
else
{
subcommunicator = 'O';
colour = 1;
key = comm_size - my_rank;
}
// Split of the global communicator
MPI_Comm new_comm;
MPI_Comm_split(MPI_COMM_WORLD, colour, key, &new_comm);
int my_new_comm_rank, new_comm_size;
// Get my rank in the new communicator
MPI_Comm_rank(new_comm, &my_new_comm_rank);
// Get the size of the new communicator
MPI_Comm_size(new_comm, &new_comm_size);
// Print my new rank and new communicator
printf("[MPI process %d] I am now MPI process %d in subcommunicator %c.\n", my_rank, my_new_comm_rank, subcommunicator);
// barriere pour clean un peu le stdout
// MPI_Barrier(MPI_COMM_WORLD);
int previous, next;
// determine my neighbours according to my rank in my subcommunicator
if (my_new_comm_rank == 0)
{
previous = new_comm_size - 1;
next = my_new_comm_rank + 1;
}
else if (my_new_comm_rank == new_comm_size - 1)
{
previous = my_new_comm_rank - 1;
next = 0;
}
else
{
previous = my_new_comm_rank - 1;
next = my_new_comm_rank + 1;
}
// printf("[MPI process %d] new %d previous %d next %d in subcommunicator %c.\n", my_rank, my_new_comm_rank, previous, next, subcommunicator);
float value = 1.0;
MPI_Status status;
// Even: clockwise + multiplication
if (subcommunicator == 'E')
{
// receive value from previous node
if (my_new_comm_rank != 0)
{
MPI_Recv(&value, 1, MPI_FLOAT, previous, 0, new_comm, &status);
printf("[MPI process %d_%c] RECEIVED from process %d of %d, value = %f\n", my_rank, subcommunicator, my_new_comm_rank, new_comm_size, value);
value = value * 2.0;
printf("[MPI process %d_%c] UPDATE, value = %f\n", my_rank, subcommunicator, value);
}
else
{
printf("[MPI process %d_%c] START, value = %f\n", my_rank, subcommunicator, value);
}
// send value to next node
if (my_new_comm_rank != new_comm_size - 1)
{
MPI_Send(&value, 1, MPI_FLOAT, next, 0, new_comm);
printf("[MPI process %d_%c] SENT to process %d of %d, value = %f\n", my_rank, subcommunicator, my_new_comm_rank, new_comm_size, value);
}
}
// Odd: counter-clockwise + division
if (subcommunicator == 'O')
{
// receive value from next node
if (my_new_comm_rank != 0)
{
MPI_Recv(&value, 1, MPI_FLOAT, next, 0, new_comm, &status);
printf("[MPI process %d_%c] RECEIVED from process %d of %d, value = %f\n", my_rank, subcommunicator, my_new_comm_rank, new_comm_size, value);
value = value / 2.0;
printf("[MPI process %d_%c] UPDATE, value = %f\n", my_rank, subcommunicator, value);
}
else
{
printf("[MPI process %d_%c] START, value = %f\n", my_rank, subcommunicator, value);
}
// send value to previous node
if (my_new_comm_rank != 1)
{
MPI_Send(&value, 1, MPI_FLOAT, previous, 0, new_comm);
printf("[MPI process %d_%c] SENT to process %d of %d, value = %f\n", my_rank, subcommunicator, my_new_comm_rank, new_comm_size, value);
}
}
// barrière pour clean un peu le stdout
// MPI_Barrier(MPI_COMM_WORLD);
// the end
printf("[MPI process %d_%c] The End\n", my_rank, subcommunicator);
// Free the communicator
MPI_Finalize();
return EXIT_SUCCESS;
}

17
BE/02_normA/Makefile Normal file
View file

@ -0,0 +1,17 @@
MPICC=smpicc
CFLAGS=-g -O4
DIR=02_normA
SRC=normA
all: ${SRC}
%.o: %.c
echo $@
$(MPICC) -c -Wall -o $@ $<
${SRC}: ${SRC}.o
$(MPICC) -o $@ $^
clean:
rm -rf *.o ${SRC} ${DIR}

159
BE/02_normA/normA.c Normal file
View file

@ -0,0 +1,159 @@
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <mpi.h>
void multAv(double x[], double *A, double y[], int m, int n);
void init0(double x[], int n);
double dot(double x[], double y[], int n);
int main(int argc, char *argv[])
{
int size;
int const n = 12;
int my_rank;
double local_dot, global_dot, normA, reference;
MPI_Init(&argc, &argv);
// Get number of processes and check that 4 processes are used
MPI_Comm_size(MPI_COMM_WORLD, &size);
if (size != 4)
{
printf("This application is meant to be run with 4 MPI processes.\n");
MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
}
// Get my rank
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
// Declaration and Initialization of A (one for all components)
// the blocking on rows, b, is the same for all nodes
// (if you don't change the constants)
int b = n / size;
double *A;
A = (double *)malloc(b * n * sizeof(double));
for (int i = 0; i < b; i++)
{
for (int j = 0; j < n; j++)
{
A[i * n + j] = 1.0;
reference = 66.000000; // sum_{i=1}^{12-1}
// A[i*n + j] = (double) my_rank;
// reference = 97.488461;
// A[i*n + j] = (double) my_rank*(i+1)+(j+1);
// reference = 239.899979;
// printf("Process [%d], A[%d][%d] = %f\n", my_rank, i, j, A[i*n+j]);
}
}
// reference vector to verify that the global vector is correct
double v_ref[n];
for (int i = 0; i < n; i++)
{
v_ref[i] = (double)i;
}
// local vector
double x_local[b];
for (int i = 0; i < b; i++)
{
x_local[i] = (double)b * my_rank + i;
// printf("Process [%d], v_local[%d] = %f\n", my_rank, i, v_local[i]);
}
// global vector
double x_global[n];
init0(x_global, n);
// Use a collective communication in order to gather on ALL the nodes the
// part of the local vector into the global vector
MPI_Allgather(x_local, b, MPI_DOUBLE, x_global, b, MPI_DOUBLE, MPI_COMM_WORLD);
// the node 2 checks if the global vector is correct (should be 0 for all components)
if (my_rank == 2)
{
for (int i = 0; i < n; i++)
{
printf("Process [%d], vérif[%d] = %f\n", my_rank, i, x_global[i] - v_ref[i]);
}
}
MPI_Barrier(MPI_COMM_WORLD);
// vector y_local = A * x_global
double y_local[b];
init0(y_local, b);
// Perform the multiplication
multAv(y_local, A, x_global, b, n);
// each node displays y (with A, full of ones, all the components of x
// should be the same)
for (int i = 0; i < b; i++)
{
printf("Process [%d] y_local[%d] = %f\n", my_rank, i, y_local[i]);
}
// Perform the dot product on the local x
local_dot = dot(x_local, y_local, b);
printf("Process [%d] local dot %f\n", my_rank, local_dot);
// Use one single collective communication to perfom the reduction in
// global_dot
MPI_Allreduce(&local_dot, &global_dot, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
// the norm is the square root of the global_dot
normA = sqrt(global_dot);
// Another node displays the norm
if (my_rank == 2)
{
printf("Process [%d] normA = %f, reference = %f\n", my_rank, normA, reference);
}
MPI_Finalize();
return EXIT_SUCCESS;
}
void multAv(double x[], double *A, double y[], int m, int n)
{
for (int i = 0; i < m; i++)
{
x[i] = 0.0;
for (int j = 0; j < n; j++)
{
x[i] += A[i * n + j] * y[j];
}
}
return;
}
void init0(double x[], int n)
{
for (int i = 0; i < n; i++)
{
x[i] = 0.0;
}
return;
}
double dot(double x[], double y[], int n)
{
double res = 0.0;
for (int i = 0; i < n; i++)
{
res += x[i] * y[i];
}
return res;
}

18
BE/03_overmean/Makefile Normal file
View file

@ -0,0 +1,18 @@
MPICC=smpicc
CFLAGS=-g -O4
DIR=03_overmean
SRC=overmean
all: ${SRC}
%.o: %.c
echo $@
$(MPICC) -c -Wall -o $@ $<
${SRC}: ${SRC}.o
$(MPICC) -o $@ $^
clean:
rm -rf *.o ${SRC} ${DIR}

120
BE/03_overmean/overmean.c Normal file
View file

@ -0,0 +1,120 @@
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <mpi.h>
int main(int argc, char *argv[])
{
// comment this line, if you want the same vector for each run
srand(time(NULL));
MPI_Init(&argc, &argv);
// Get number of processes
int nb_process;
MPI_Comm_size(MPI_COMM_WORLD, &nb_process);
// Fix root's rank
int root_rank = 0;
// Get my rank
int my_rank;
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
// global size (only the root know its value)
int global_size = 0;
// local size (we fix this value in order to be regular)
int local_size = 3;
// local vector
int *local_vector = NULL;
int *global_vector = NULL;
// root process
if (my_rank == root_rank)
{
global_size = nb_process * local_size; // to be able to split
// the global vector into sub-vectors
// with the same size
printf("global_size = %d\n", global_size);
global_vector = (int *)malloc(sizeof(int) * global_size);
for (int i = 0; i < global_size; i++)
{
// global_vector[i] = i;
global_vector[i] = rand() % 101;
printf("global_vector[%d] = %d\n", i, global_vector[i]);
}
}
// Each process gets its part of the global vector
local_vector = (int *)malloc(sizeof(int) * local_size);
MPI_Scatter(global_vector, local_size, MPI_INT, local_vector, local_size, MPI_INT, root_rank, MPI_COMM_WORLD);
// print the local vector
for(int i = 0; i < local_size; i++)
{
printf("[%d] local_vector[%d] = %d\n", my_rank, i, local_vector[i]);
}
// barriere pour clean un peu le stdout
// MPI_Barrier(MPI_COMM_WORLD);
// compute the local sum
int local_sum = 0.0;
for (int i = 0; i < local_size; i++)
{
local_sum += local_vector[i];
}
printf("Process %d computed its local sum = %d.\n", my_rank, local_sum);
// compute the global sum by a reduction
int global_sum;
MPI_Reduce(&local_sum, &global_sum, 1, MPI_INT, MPI_SUM, root_rank, MPI_COMM_WORLD);
// print the global sum
if (my_rank == root_rank) {
printf("Process %d got the global sum = %d.\n", my_rank, global_sum);
}
// barriere pour clean un peu le stdout
// MPI_Barrier(MPI_COMM_WORLD);
float mean; // float!!
// the root computes the mean (only one to know the global size)
if (my_rank == root_rank)
{
mean = ((float)global_sum) / global_size;
printf("Process %d computed the mean = %f.\n", my_rank, mean);
}
// broadcast of the mean to all process
MPI_Bcast(&mean, 1, MPI_FLOAT, root_rank, MPI_COMM_WORLD);
// print the mean
printf("Process %d got the mean = %f.\n", my_rank, mean);
// barriere pour clean un peu le stdout
// MPI_Barrier(MPI_COMM_WORLD);
// compute the number of values (from the local vector) over the mean
int local_number = 0;
for (int i = 0; i < local_size; i++)
{
if (local_vector[i] >= mean)
local_number++;
}
printf("Process %d has %d values over the mean.\n", my_rank, local_number);
// reduce these numbers on root process
int over_the_mean;
MPI_Reduce(&local_number, &over_the_mean, 1, MPI_INT, MPI_SUM, root_rank, MPI_COMM_WORLD);
// print the total number of values over the mean
if (my_rank == root_rank) {
printf("the total number of values over the mean is %d.\n", over_the_mean);
}
MPI_Finalize();
return EXIT_SUCCESS;
}

170
BE/04_n-corps/n-corps.md Normal file
View file

@ -0,0 +1,170 @@
# Exercice 4 : problème aux N-corps
Ce fichier fait partie du rendu évalué pour le BE de Calcul parallèle.
## Question 1
Déterminer quels calculs peuvent être parallélisés et quelles communications mettre en place dans le code séquentiel suivant. Proposer une réécriture parallèle avectransmission de messages de ce code.
```
variables : force[1,...,N], data[1,...,N]
for t in 1, nb_steps do
for i in 1, N do
force[i] = 0
for j in 1, N do
force[i] = force[i] + interaction(data[i], data[j])
end for
end for
for i in 1, N do
data[i] = update(data[i], force[i])
end for
end for
```
### Réponse Q2
On suppose que l'on possède K processus, tels que N divise K.
Par exemple K = 2N.
```C
variables (globales) : K, N, ratio
variables (locales) : ik, force[1,...,ratio]
variables : data[1,...,N]
// data est à la fois globale et locale car on le communique entre les processus
ratio = K / N
// Chaque processus k va s'occuper de `ratio` corps
// par exemple si `ratio` = 2
// processus 0 -> corps 0 + corps 1
// processus 1 -> corps 2 + corps 3
// ...
// Chaque processus doit connaitre `data`
// (seul le process 0 connait `data` au début)
// -> Broadcast data from 0 to all
// cette boucle n'est pas parallélisable
// on a besoin de t-1 pour calculer t
for t in 1, nb_steps do
ik = 0
// cette boucle est parralélisable
// dans le code on va "split" `N` par paquets de `ratio`
for i in 1, N do
if je_mocuppe_de_ce_corps(i, N, K) // on peut split de cette manière
// on reset les forces
force[ik] = 0
// on calcule la force totale des corps qu'on s'occupe
for j in 1, N do
force[ik] = force[ik] + interaction(data[i], data[j])
end for
// on update notre `data` local
data[i] = update(data[i], force[ik])
ik++
end if
end for
// une fois chaque `data` updaté localement (dans chaque processus)
// il faut rassembler toutes ces infos
// -> All_Gather des data locaux
// on obtient un `data` synchronisé entre tous les processus,
// comme lors du premier broadcast
end for
```
## Question 2
Proposer une version parallèle du code suivant.
```
variables : force[1,...,N], data[1,...,N]
for t in 1, nb_steps do
for i in 1, N do
force[i] = 0
end for
for i in 1, N do
for j in 1, i-1 do
f = interaction(data[i],data[j])
force[i] = force[i] + f
force[j] = force[j] - f
end for
end for
for i in 1, N do
data[i] = update(data[i], force[i])
end for
end for
```
### Réponse Q2
```C
variables (globales) : force[1,...,N], data[1,...,N]
// Chaque processus doit connaitre `data`
// (seul le process 0 connait `data` au début)
// -> Broadcast data from 0 to all
// cette boucle n'est pas parallélisable
// on a besoin de t-1 pour calculer t
for t in 1, nb_steps do
// on calcul les forces (plus efficacement)
// on effectue N(N-1)/2 appels à `interaction`
for i in 1, N do
if je_mocuppe_de_ce_corps(i, N, K) // je m'occupe de cette "colonne"
// on reset les forces
force[i] = 0
// on calcule la force totale des corps qu'on s'occupe
for j in 1, i-1 do
f = interaction(data[i],data[j])
force[i] = force[i] + f
force[j] = force[j] - f
end for
// on reduce les forces que l'on a calculé pour chaque corps
// -> All_reduce
// on update notre `data` local
data[i] = update(data[i], force[i])
end if
end for
// une fois chaque `data` updaté localement (dans chaque processus)
// il faut rassembler toutes ces infos
// -> All_Gather des data locaux
// on obtient un `data` synchronisé entre tous les processus,
// comme lors du premier broadcast
end for
```
## Question 3
Quels sont les inconvénients de cette version ?
Proposer une solution pour les atténuer.
### Réponse Q3
L'inconvénient de cette version est que l'on doit désormais répartir des calculs "en triangle". En effet puisque l'on ne calcul aucune redondance de interaction, on effectue les calculs suivants:
| | 0 | 1 | 2 | 3 |
|:-:|:-:|:-:|:-:|:-:|
| 0 | | x | x | x |
| 1 | | | x | x |
| 2 | | | | x |
| 3 | | | | |
On doit alors effectuer $\frac{N(N-1)}2$ calculs, ce qui est plus compliqué à répartir sur $K = \frac{N}{ratio}$ processus. La manière naïve que j'ai utilisé pour paralléliser le code Question 2 est sous optimal puisque la chaque de calcul entre chaque processus n'est pas égal.
Une manière plus efficace serait, un peu comme dans openMP, de créer des tasks pour chaque calcul de `interaction` et de répartir uniformément ces tasks entre chaque processus.

11
BE/Makefile Normal file
View file

@ -0,0 +1,11 @@
SOURCES=01_RingD 02_normA 03_overmean 04_n-corps
all: collect
collect:
echo ${USER}
(cd 01_RingD; make clean)
(cd 02_normA; make clean)
(cd 03_overmean; make clean)
tar cvf Calcul_${USER}_`hostname | cut -d'.' -f1`.tar ${SOURCES}

6
BE/init.sh Normal file
View file

@ -0,0 +1,6 @@
#!/bin/bash
SIMGRID=/mnt/n7fs/ens/tp_guivarch/opt2021/simgrid-3.31
export PATH=${SIMGRID}/bin:${PATH}
alias smpirun="smpirun -hostfile ${SIMGRID}/archis/cluster_hostfile.txt -platform ${SIMGRID}/archis/cluster_crossbar.xml"

15
TP1/00_Who_am_i/Makefile Normal file
View file

@ -0,0 +1,15 @@
MPICC=smpicc
CFLAGS=-g -O4
all: who_am_i
clean:
rm -f *.o who_am_i
%.o: %.c
echo $@
$(MPICC) -c -Wall -o $@ $<
who_am_i: who_am_i.o
$(MPICC) -o $@ $^

View file

@ -0,0 +1,26 @@
#include <stdio.h>
#include <mpi.h>
int main( int argc, char *argv[] ) {
int rank, size;
int l;
char name[MPI_MAX_PROCESSOR_NAME];
MPI_Init( &argc, &argv );
// Get rank
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
// Get size
MPI_Comm_size(MPI_COMM_WORLD, &size);
// Get name
MPI_Get_processor_name (name , &l);
printf("Hello world from process %d of %d on processor named %s\n", rank, size, name);
MPI_Finalize();
return 0;
}

14
TP1/01_Ring/Makefile Normal file
View file

@ -0,0 +1,14 @@
MPICC=smpicc
CFLAGS=-g -O4
all: ring
clean:
rm -rf *.o ring
%.o: %.c
echo $@
$(MPICC) -c -Wall -o $@ $<
ring: ring.o
$(MPICC) -o $@ $^

74
TP1/01_Ring/ring.c Normal file
View file

@ -0,0 +1,74 @@
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
int main(int argc, char *argv[])
{
int value;
int my_rank, size;
int previous, next;
MPI_Status status;
MPI_Init(NULL, NULL);
// Get number of processes
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
// determine my neighbours according to my rank
if (my_rank == 0)
{
previous = size - 1;
next = my_rank + 1;
}
else if (my_rank == size - 1)
{
previous = my_rank - 1;
next = 0;
}
else
{
previous = my_rank - 1;
next = my_rank + 1;
}
value = 1;
// The nodes, starting with node 0, transmit the value to each other,
// each time multiplying it by 2.
// At the end of the transmission, node 0 receives the value 2^(size-1)
//
// Instruction: before each send and after each receive, each node displays
// - its rank
// - the type communication (send, recv)
// - the value
// receive value from previous node
if (my_rank != 0)
{
MPI_Recv(&value, 1, MPI_INT, previous, 0, MPI_COMM_WORLD, &status);
printf("RECEIVED from process %d of %d, value = %d\n", my_rank, size, value);
value = value * 2;
}
else
{
printf("START, value = %d\n", value);
}
printf("SENDING from process %d of %d, value = %d\n", my_rank, size, value);
// send value to next node
if (my_rank != size - 1)
{
MPI_Send(&value, 1, MPI_INT, next, 0, MPI_COMM_WORLD);
}
else
{
printf("The End, value = %d\n", value);
}
MPI_Finalize();
return EXIT_SUCCESS;
}

14
TP1/02_Limite/Makefile Normal file
View file

@ -0,0 +1,14 @@
MPICC=smpicc
CFLAGS=-g -O4
all: limite
clean:
rm -rf *.o limite
%.o: %.c
echo $@
$(MPICC) -c -Wall -o $@ $<
limite: limite.o
$(MPICC) -Dhave_mpi -o $@ $^

86
TP1/02_Limite/limite.c Normal file
View file

@ -0,0 +1,86 @@
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
int main(int argc, char *argv[])
{
int size;
int my_rank;
int data_size = -100;
int *buffer_send, *buffer_recv;
int tag;
MPI_Status status;
int l;
char name[MPI_MAX_PROCESSOR_NAME];
// Make sure that the command line has one argument (the size of the data)
if (argc != 2)
{
printf("usage : limite <data size>\n");
return EXIT_FAILURE;
}
MPI_Init(&argc, &argv);
// Make sure exactly 2 MPI processes are used
MPI_Comm_size(MPI_COMM_WORLD, &size);
if (size != 2)
{
printf("%d MPI processes used, please use 2.\n", size);
MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
}
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
MPI_Get_processor_name(name, &l);
printf("process %d of %d on processor named %s\n", my_rank, size, name);
// Prepare parameters
data_size = atoi(argv[1]);
printf("The size of the data is %d\n", data_size);
buffer_send = (int *)malloc(data_size * sizeof(int));
buffer_recv = (int *)malloc(data_size * sizeof(int));
buffer_send[0] = (my_rank == 0) ? 12345 : 67890;
tag = 0;
if (my_rank == 0)
{
// node 0 sends its buffer buffer_send of size data_size to node 1
MPI_Send(buffer_send, data_size, MPI_INT, 1, tag, MPI_COMM_WORLD);
// node 0 receives in its buffer buffer_recv data from node 1
MPI_Recv(buffer_recv, data_size, MPI_INT, 1, tag, MPI_COMM_WORLD, &status);
printf("MPI process %d received value %d from MPI process %d.\n", my_rank, buffer_recv[0], 1);
}
else
{
// node 1 sends its buffer buffer_send of size data_size to node 0
MPI_Send(buffer_send, data_size, MPI_INT, 0, tag, MPI_COMM_WORLD);
// node 1 receives in its buffer buffer_recv data from node 0
MPI_Recv(buffer_recv, data_size, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
printf("MPI process %d received value %d from MPI process %d.\n", my_rank, buffer_recv[0], 0);
}
free(buffer_send);
free(buffer_recv);
MPI_Finalize();
return EXIT_SUCCESS;
}
// (a) rappelez pour quelle taille de message (petite, grande), MPI Send aura un comportement asynchrone (resp. synchrone)
// ->
// (b) que va-t-il se passer quand votre programme, compl ́et ́e comme indiqu ́e, sera appel ́e avec une taille de message qui fera que MPI Send sera synchrone ?
// -> deadlock, on passe en synchrone
// (c) estimez `a 10 entiers pr`es, la taille limite sur deux noeuds du mˆeme ordinateur ?
// -> 16383
// (d) proposez une solution pour que l ́echange entre les deux noeuds puissent se faire au del`a de cette limite (plusieurs r ́eponses possibles). Vous avez la possibilit ́e de les tester en dehors de la s ́eance.
// -> découper le buffer de telle manière à n'envoyer que des petits buffers en asynchrone
// -> changer ordre send/recv du deuxième noeud

14
TP1/03_Dot/Makefile Normal file
View file

@ -0,0 +1,14 @@
MPICC=smpicc
CFLAGS=-g -O4
all: dotp
clean:
rm -rf *.o dotp
%.o: %.c
echo $@
$(MPICC) -c -Wall -o $@ $<
dotp: dotp.o
$(MPICC) -o $@ $^ -lm

91
TP1/03_Dot/dotp.c Normal file
View file

@ -0,0 +1,91 @@
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <mpi.h>
// perform the dot product between the two vectors x and y of size n
float dot(float x[], float y[], int n);
int main(int argc, char *argv[])
{
int const local_data_size = 5;
float local_x[local_data_size], local_y[local_data_size];
float local_dot, global_dot1, global_dot2, reference;
int borne;
int my_rank, size;
MPI_Init(NULL, NULL);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
borne = size * local_data_size - 1;
reference = (float)(borne * (borne + 1) * (2 * borne + 1) / 6);
// Initialization of both local vectors with the same values
// the global vectors would be [0, 1, ..., local_data_size -1]
for (int i = 0; i < local_data_size; i++)
{
local_x[i] = (float)(local_data_size * my_rank + i);
local_y[i] = (float)(local_data_size * my_rank + i);
// printf("[MPI process %d] value[%d]: %f\n", my_rank, i, local_x[i]);
}
local_dot = dot(local_x, local_y, local_data_size);
printf("[MPI process %d] my local dot product: %f\n", my_rank, local_dot);
/* Two-step operation */
global_dot1 = 0.0;
// Step 1
// Use a collective communication to compute the global dot product
// in such a way that the node 0 gets this value
MPI_Reduce(&local_dot, &global_dot1, 1, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD);
// Node 0 displays the global value and the reference (sum of first integer ^ 2)
if (my_rank == 0)
{
printf("[MPI process %d] *Two-step collective operation* global dot product: %f == %f\n", my_rank, global_dot1, reference);
}
// Step 2
// Use a collective communication to broadcast the global value on each node
MPI_Bcast(&global_dot1, 1, MPI_FLOAT, 0, MPI_COMM_WORLD);
// A node i (i different from 0) displays the global value
if (my_rank != 0)
{
printf("[MPI process %d] *Two-step collective operation* global dot product: %f == %f\n", my_rank, global_dot1, reference);
}
/* One-step operation */
global_dot2 = 0;
// Step 3
// Now use one single collective communication to perfom both step 1 and 2
MPI_Allreduce(&local_dot, &global_dot2, 1, MPI_FLOAT, MPI_SUM, MPI_COMM_WORLD);
// Another node displays the global value
printf("[MPI process %d] *One-step collective operation* global dot product: %f == %f\n", my_rank, global_dot2, reference);
MPI_Finalize();
return EXIT_SUCCESS;
}
float dot(float x[], float y[], int n)
{
float res = 0.0;
for (int i = 0; i < n; i++)
{
res += x[i] * y[i];
}
return res;
}

14
TP1/04_Mult/Makefile Normal file
View file

@ -0,0 +1,14 @@
MPICC=smpicc
CFLAGS=-g -O4
all: MultAv
clean:
rm -rf *.o MultAv
%.o: %.c
echo $@
$(MPICC) -c -Wall -o $@ $<
MultAv: MultAv.o
$(MPICC) -o $@ $^

119
TP1/04_Mult/MultAv.c Normal file
View file

@ -0,0 +1,119 @@
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
void multAv(double x[], double *A, double y[], int m, int n);
void init0(double x[], int n);
int main(int argc, char *argv[])
{
int size;
int const n = 12;
int my_rank;
MPI_Init(&argc, &argv);
// Get number of processes and check that 4 processes are used
MPI_Comm_size(MPI_COMM_WORLD, &size);
if (size != 4)
{
printf("This application is meant to be run with 4 MPI processes.\n");
MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
}
// Get my rank
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
// Declaration and Initialization of A (one for all components)
// the number of bloc of lines, b, is the same for all node
// (if you don't change the constants)
int b = n / size;
double *A;
A = (double *)malloc(b * n * sizeof(double));
for (int i = 0; i < b; i++)
{
for (int j = 0; j < n; j++)
{
A[i * n + j] = 1.0;
// A[i*n + j] = (double) my_rank;
// A[i*n + j] = (double) my_rank*(i+1)+(j+1);
// printf("Process [%d], A[%d][%d] = %f\n", my_rank, i, j, A[i*n+j]);
}
}
// reference vector to verify that the global vector is correct
double v_ref[n];
for (int i = 0; i < n; i++)
{
v_ref[i] = (double)i;
}
// local vector
double v_local[b];
for (int i = 0; i < b; i++)
{
v_local[i] = (double)b * my_rank + i;
// printf("Process [%d], v_local[%d] = %f\n", my_rank, i, v_local[i]);
}
// global vector
double v_global[n];
init0(v_global, n);
// Use a collective communication in order to gather on ALL the nodes the
// part of the local vector into the global vector
MPI_Allgather(v_local, b, MPI_DOUBLE, v_global, b, MPI_DOUBLE, MPI_COMM_WORLD);
// the node 2 checks if the global vector is correct
if (my_rank == 2)
{
for (int i = 0; i < n; i++)
{
printf("Process [%d], vérif[%d] = %f\n", my_rank, i, v_global[i] - v_ref[i]);
}
}
MPI_Barrier(MPI_COMM_WORLD);
// vector x_loc = A * v_global
double x_loc[b];
init0(x_loc, b);
// Perform the multiplication
multAv(x_loc, A, v_global, b, n);
// each node displays x (with A, full of ones, all the components of x should be the same)
for (int i = 0; i < b; i++)
{
printf("Process [%d], x_loc[%d] = %f\n", my_rank, i, x_loc[i]);
}
MPI_Finalize();
return EXIT_SUCCESS;
}
void multAv(double x[], double *A, double y[], int m, int n)
{
for (int i = 0; i < m; i++)
{
x[i] = 0.0;
for (int j = 0; j < n; j++)
{
x[i] += A[i * n + j] * y[j];
}
}
return;
}
void init0(double x[], int n)
{
for (int i = 0; i < n; i++)
{
x[i] = 0.0;
}
return;
}

BIN
TP1/05_CG/CG_par Executable file

Binary file not shown.

121
TP1/05_CG/CG_par.c Normal file
View file

@ -0,0 +1,121 @@
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include <math.h>
#include "util.h"
void cg_par(double *A_local, double *rhs_local, int N, int b, float tol)
{
int size;
int my_rank;
// Get number of processes
MPI_Comm_size(MPI_COMM_WORLD, &size);
// Get my rank
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
//**************** Parallel CG (M == N)
int num_it, max_it;
double x[b], r[b], Ap[b];
double p_local[b], p_global[N];
double nr_global, nr_local;
double np2_global, np2_local;
double epsilon;
double alpha, beta;
max_it = 100;
// initialization of the solution (local vector)
for (int i = 0; i < b; i++)
{
x[i] = 0.0;
}
// compute the global norm of the rhs_local (dot product, then sqrt);
// all the nodes must have this value
nr_local = dot(rhs_local, rhs_local, b);
MPI_Allreduce(&nr_local, &nr_global, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
nr_global = sqrt(nr_global);
// if (my_rank == 0) printf("nr = %lg\n", nr_global);
// threshold of the CG
epsilon = tol * nr_global;
// Initialization of p_local and r (local vectors)
copy_v(p_local, rhs_local, b);
copy_v(r, rhs_local, b);
// number of iterations
num_it = 0;
printf("num_it %d -- epsilon %lg -- nr_global %lg\n", num_it, epsilon, nr_global);
while ((nr_global > epsilon) && (num_it < max_it))
{
// Compute the local vector Ap = A_local*p_global
// => gather p_local vectors to p_global
MPI_Allgather(p_local, b, MPI_DOUBLE, p_global, b, MPI_DOUBLE, MPI_COMM_WORLD);
// display p_global
if (my_rank == 0)
printf("p_global = %lg\n", p_global);
// do the matrix-vector multiplication
multAv(Ap, A_local, p_global, b, N);
// compute the global dot product np2_global = (Ap_global, p_global)
// all the node must have this value
np2_local = dot(p_local, Ap, b);
MPI_Allreduce(&np2_local, &np2_global, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
if (my_rank == 0)
printf("np2 = %lg\n", np2_global);
// alpha
alpha = (nr_global * nr_global) / np2_global;
// if(my_rank == 0) printf("alpha = %lg\n", alpha);
// compute the new x and r (local vectors)
axpy(alpha, x, p_local, b);
axpy(-alpha, r, Ap, b);
// compute the global norm of the residual (dot product, then sqrt);
// all the nodes must have this value
nr_local = dot(r, r, b);
MPI_Allreduce(&nr_local, &nr_global, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
nr_global = sqrt(nr_global);
// if(my_rank == 0) printf("nr = %lg\n", nr_global);
// beta
beta = (nr_global * nr_global) / (alpha * np2_global);
// if(my_rank == 0) printf("beta = %lg\n", beta);
// compute the new p_local (local vector)
xpay(beta, r, p_local, b);
// increase the number of iterations
num_it++;
// if(my_rank == 0) printf("num_it %d -- nr_global %lg\n", num_it, nr_global);
}
free(A_local);
// gather the solution on the node 0
double x_global[N];
MPI_Gather(x, b, MPI_DOUBLE, x_global, b, MPI_DOUBLE, 0, MPI_COMM_WORLD);
// display the solution
if (my_rank == 0)
{
for (int i = 0; i < N; i++)
{
printf("x[%d] = %lg\n", i, x_global[i]);
}
}
return;
}

1
TP1/05_CG/CG_par.h Normal file
View file

@ -0,0 +1 @@
void cg_par(double *A_local, double *rhs, int N, int b, float tol);

85
TP1/05_CG/CG_sq.c Normal file
View file

@ -0,0 +1,85 @@
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "util.h"
void cg_sq(double *A, double *rhs, int N, double tol)
{
int num_it, max_it;
double x[N], p[N], r[N], Ap[N];
double nr;
double epsilon;
double np2, alpha, beta;
max_it = 100;
// initialization of the solution
for (int i = 0; i < N; i++)
{
// b[i] = (float) i;
x[i] = 0.0;
}
// compute the norm of the rhs (dot product, then sqrt)
nr = dot(rhs, rhs, N);
nr = sqrt(nr);
printf("nr = %lg\n", nr);
// threshold of the CG
epsilon = tol * nr;
// Initialization of p and r
copy_v(p, rhs, N);
copy_v(r, rhs, N);
// number of iterations
num_it = 0;
printf("num_it %d -- epsilon %lg -- nr %lg\n", num_it, epsilon, nr);
while ((nr > epsilon) && (num_it < max_it))
{
// Compute the vector Ap = A*p
multAv(Ap, A, p, N, N);
// compute the dot product np2 = (Ap, p)
np2 = dot(p, Ap, N);
printf("np2 = %lg\n", np2);
// alpha
alpha = (nr * nr) / np2;
// printf("alpha = %lg\n", alpha);
// compute the new x and r
axpy(alpha, x, p, N);
axpy(-alpha, r, Ap, N);
// compute the norm of the residual (dot product, then sqrt)
nr = dot(r, r, N);
nr = sqrt(nr);
// printf("nr = %lg\n", nr);
// beta
beta = (nr * nr) / (alpha * np2);
// printf("beta = %lg\n", beta);
// compute the new p
xpay(beta, r, p, N);
// increase the number of iterations
num_it++;
// printf("num_it %d -- nr %lg \n", num_it, nr);
}
// display the solution
for (int i = 0; i < N; i++)
{
printf("x[%d] = %lg\n", i, x[i]);
}
return;
}

1
TP1/05_CG/CG_sq.h Normal file
View file

@ -0,0 +1 @@
void cg_sq(double *A, double *rhs, int N, double tol);

21
TP1/05_CG/Laplacien.mtx Normal file
View file

@ -0,0 +1,21 @@
%%MatrixMarket matrix coordinate real symmetric
%-------------------------------------------------------------------------------
% UF Sparse Matrix Collection, Tim Davis
% http://www.cise.ufl.edu/research/sparse/matrices/HB/nos3
% name: HB/nos3
% [SYMMETRIC MATRIX, FE APPROXIMATION TO BIHARMONIC OPERATOR ON PLATE]
% id: 219
% date: 1982
% author: H. Simon
% ed: I. Duff, R. Grimes, J. Lewis
% fields: title A name id date author ed kind
% kind: structural problem
%-------------------------------------------------------------------------------
4 4 7
1 1 2.0
1 2 -1.0
2 2 2.0
2 3 -1.0
3 3 2.0
3 4 -1.0
4 4 2.0

18
TP1/05_CG/Makefile Normal file
View file

@ -0,0 +1,18 @@
CC=gcc
MPICC=smpicc
CFLAGS=-g -O4
all: CG_par CG_sq
clean:
rm -rf *.o CG_par CG_sq
%.o: %.c
echo $@
$(MPICC) -c -Wall -o $@ $<
CG_par: util.o CG_par.o main_par.o
$(MPICC) -o $@ $^ -lm
CG_sq: util.o CG_sq.o main_sq.o
$(MPICC) -o $@ $^ -lm

98
TP1/05_CG/main_par.c Normal file
View file

@ -0,0 +1,98 @@
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include <math.h>
#include "util.h"
#include "CG_par.h"
int main(int argc, char* argv[]) {
int size;
int my_rank;
FILE *f;
int M, N, nz;
double *A = NULL;
double *rhs;
double tol = 1e-6;
// Make sure that the command line has one argument (name of the matrix file)
if(argc != 2){
printf("usage : CG_par <file>\n");
return EXIT_FAILURE;
}
//**************** MPI Initialization
MPI_Init(&argc, &argv);
// Get number of processes and check that 4 processes are used
MPI_Comm_size(MPI_COMM_WORLD, &size);
if(size != 4) {
printf("This application is meant to be run with 4 MPI processes.\n");
MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
}
// Get my rank
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
//**************** READING OF THE MATRICE AND DISTRIBUTION OF THE BLOCS OF LINES TO EACH NODE
// You have the possibility to test with a small matrice ("Laplacien.txt")
// or a larger one ("nos3.mtx")
f = fopen(argv[1], "r");
// All nodes get the sizes
mm_read_mtx_crd_size(f, &M, &N, &nz);
//printf("%d %d %d\n", M, N, nz);
// Reading the matrix by node 0
if(my_rank == 0) {
A = (double *) malloc(M*N*sizeof(double));
read_A(f, A, M, N, nz);
// increase diagonal to be sure to converge easily
for (int i = 0; i < M; i++) {
*(A+i*N+i) = *(A+i*N+i) + 10.0;
}
}
if (f != stdin) fclose(f);
// DISTRIBUTION OF BLOCS => A_local(b, N)
int b = M / size;
double *A_local;
A_local = (double *) malloc(b*N*sizeof(double));
MPI_Scatter(A, b*N, MPI_DOUBLE, A_local, b*N, MPI_DOUBLE, 0, MPI_COMM_WORLD);
if(my_rank == 0) free(A);
//**************** END OF THE READING OF THE MATRICE AND THE DISTRIBUTION OF THE BLOCS OF LINES TO EACH NODE
//**************** PARALLEL CG (M == N)
rhs = (double *) malloc(b*sizeof(double));
// initialization of the right hand side (local vector)
for(int i = 0; i < b; i++){
rhs[i] = (float) (b*my_rank + i);
}
cg_par(A_local, rhs, N, b, tol);
//**************** END OF PARALLEL CG
MPI_Finalize();
printf("The End\n");
return EXIT_SUCCESS;
}

79
TP1/05_CG/main_sq.c Normal file
View file

@ -0,0 +1,79 @@
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <mpi.h>
#include "util.h"
#include "CG_sq.h"
int main(int argc, char* argv[]) {
int size;
FILE *f;
int M, N, nz;
double *A = NULL;
double *rhs;
double tol = 1e-6;
// Make sure that the command line has one argument (name of the matrix file)
if(argc != 2){
printf("usage : CG_sq <file>\n");
return EXIT_FAILURE;
}
MPI_Init(&argc, &argv);
// Get number of processes and check that only 1 process is used
MPI_Comm_size(MPI_COMM_WORLD, &size);
if(size != 1) {
printf("This application is meant to be run with 1 MPI process.\n");
MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
}
//**************** READING THE MATRICE
// You have the possibility to test with a small matrice ("Laplacien.txt")
// or a larger one ("nos3.mtx")
f = fopen(argv[1], "r");
mm_read_mtx_crd_size(f, &M, &N, &nz);
//printf("%d %d %d\n", M, N, nz);
A = (double *) malloc(M*N*sizeof(double));
read_A(f, A, M, N, nz);
// increase diagonal to be sure to converge easily
for (int i = 0; i < M; i++) {
*(A+i*N+i) = *(A+i*N+i) + 10.0;
}
if (f !=stdin) fclose(f);
//**************** END OF READING THE MATRICE
//**************** SEQUENTIAL CG (M == N)
rhs = (double *) malloc(N*sizeof(double));
// initialization of the right-hand side
for(int i = 0; i < N; i++){
rhs[i] = (float) i;
}
cg_sq(A, rhs, N, tol);
//**************** END OF SEQUENTIAL CG
MPI_Finalize();
printf("The End\n");
return EXIT_SUCCESS;
}

8416
TP1/05_CG/nos3.mtx Normal file

File diff suppressed because it is too large Load diff

114
TP1/05_CG/util.c Normal file
View file

@ -0,0 +1,114 @@
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <ctype.h>
#include <string.h>
#include "util.h"
void multAv(double x[], double *A, double y[], int m, int n){
for(int i = 0; i < m; i++){
x[i] = 0.0;
for(int j = 0; j < n; j++){
x[i] += A[i*n + j] * y[j];
}
}
return;
}
void copy_v(double x[], double y[], int n){
for(int i = 0; i < n; i++){
x[i] = y[i];
}
return;
}
double dot(double x[], double y[], int n){
double res = 0.0;
for(int i = 0; i < n; i++){
res += x[i]*y[i];
}
return res;
}
void axpy(double a, double x[], double y[], int n){
for(int i = 0; i < n; i++){
x[i] = x[i] + a*y[i];
}
return;
}
void xpay(double a, double x[], double y[], int n){
for(int i = 0; i < n; i++){
y[i] = x[i] + a*y[i];
}
return;
}
int read_A(FILE *f, double *A, int M, int N, int nz){
int i, j, k;
double val;
int error;
for (i = 0; i < M; i++) {
for(j = 0; j < N; j++) {
*(A+i*N+j) = 0.0;
}
}
for (k = 0; k < nz; k++) {
error = fscanf(f, "%d %d %lg\n", &i, &j, &val);
if(!error) exit(0);
//printf("-- %d -- %d -- %lg\n", i, j, val);
*(A + (i-1)*N + (j-1)) = val;
// this is a symmetric matrix
*(A + (j-1)*N + (i-1)) = val;
}
/*
for (k = 0; k < nz; k++) {
printf("---- %lg\n", *(A+k));
}
*/
return 0;
}
int mm_read_mtx_crd_size(FILE *f, int *M, int *N, int *nz )
{
char line[MM_MAX_LINE_LENGTH];
int num_items_read;
/* set return null parameter values, in case we exit with errors */
*M = *N = *nz = 0;
/* now continue scanning until you reach the end-of-comments */
do
{
if (fgets(line,MM_MAX_LINE_LENGTH,f) == NULL)
return MM_PREMATURE_EOF;
}while (line[0] == '%');
/* line[] is either blank or has M,N, nz */
if (sscanf(line, "%d %d %d", M, N, nz) == 3)
return 0;
else
do
{
num_items_read = fscanf(f, "%d %d %d", M, N, nz);
if (num_items_read == EOF) return MM_PREMATURE_EOF;
}
while (num_items_read != 3);
return 0;
}

21
TP1/05_CG/util.h Normal file
View file

@ -0,0 +1,21 @@
#include <ctype.h>
#define MM_MAX_LINE_LENGTH 1025
#define MatrixMarketBanner "%%MatrixMarket"
#define MM_MAX_TOKEN_LENGTH 64
#define MM_PREMATURE_EOF 12
void multAv(double x[], double *A, double y[], int m, int n);
void copy_v(double x[], double y[], int n);
double dot(double x[], double y[], int n);
void axpy(double a, double x[], double y[], int n);
void xpay(double a, double x[], double y[], int n);
int read_A(FILE *f, double *A, int M, int N, int nz);
int mm_read_mtx_crd_size(FILE *f, int *M, int *N, int *nz);

6
TP1/init.sh Executable file
View file

@ -0,0 +1,6 @@
#!/bin/bash
SIMGRID=/mnt/n7fs/ens/tp_guivarch/opt2021/simgrid-3.31
export PATH=${SIMGRID}/bin:${PATH}
alias smpirun="smpirun -hostfile ${SIMGRID}/archis/cluster_hostfile.txt -platform ${SIMGRID}/archis/cluster_crossbar.xml"

BIN
TP1/tp_mpi.pdf Normal file

Binary file not shown.

7
TP2/.vscode/settings.json vendored Normal file
View file

@ -0,0 +1,7 @@
{
"files.associations": {
"*.html": "html",
"*.toml": "toml",
"*.bak": "c"
}
}

26
TP2/Makefile Normal file
View file

@ -0,0 +1,26 @@
CC=gcc
MPICC=smpicc
LD=smpicc
LDFLAGS=
CFLAGS=-O4
CLIBS=-lblas -llapack
INCLUDES=
SOURCEDIR=src
BUILDDIR=build
all: dir main # test
test_env: dir who_am_i
dir:
mkdir -p $(BUILDDIR)/bin
clean:
rm -rf $(BUILDDIR)
%.o: $(SOURCEDIR)/%.c
echo $@
$(MPICC) -c -Wall -o $(BUILDDIR)/$@ $< $(CFLAGS) $(INCLUDES)
main: main.o gemms.o ex1.o ex2.o ex3.o utils.o dsmat.o
$(LD) -o $(BUILDDIR)/bin/$@ $(addprefix $(BUILDDIR)/,$^) $(CLIBS) $(LDFLAGS)

1
TP2/README Normal file
View file

@ -0,0 +1 @@
https://laurent.fainsin.bzh/assets/CalcPar/

151
TP2/bench.csv Normal file
View file

@ -0,0 +1,151 @@
m,n,k,b,p,q,algo,lookahead,gflops
1024,1024,1024,256,2,2,p2p,0,7.475035
1024,1024,1024,256,2,2,p2p,0,7.475035
1024,1024,1024,256,2,2,p2p,0,7.475036
1024,1024,1024,256,2,2,p2p,0,7.475036
1024,1024,1024,256,2,2,p2p,0,7.475036
1024,1024,1024,256,2,2,bcast,0,7.471268
1024,1024,1024,256,2,2,bcast,0,7.471269
1024,1024,1024,256,2,2,bcast,0,7.471268
1024,1024,1024,256,2,2,bcast,0,7.471268
1024,1024,1024,256,2,2,bcast,0,7.471269
1024,1024,1024,256,2,2,p2p-i-la,1,14.306685
1024,1024,1024,256,2,2,p2p-i-la,1,14.306689
1024,1024,1024,256,2,2,p2p-i-la,1,14.306691
1024,1024,1024,256,2,2,p2p-i-la,1,14.306689
1024,1024,1024,256,2,2,p2p-i-la,1,14.306691
1024,1024,1024,256,2,2,p2p-i-la,2,9.856253
1024,1024,1024,256,2,2,p2p-i-la,2,9.856253
1024,1024,1024,256,2,2,p2p-i-la,2,9.856254
1024,1024,1024,256,2,2,p2p-i-la,2,9.856254
1024,1024,1024,256,2,2,p2p-i-la,2,9.856254
1024,1024,1024,256,2,2,p2p-i-la,3,14.317787
1024,1024,1024,256,2,2,p2p-i-la,3,14.317789
1024,1024,1024,256,2,2,p2p-i-la,3,14.317793
1024,1024,1024,256,2,2,p2p-i-la,3,14.317793
1024,1024,1024,256,2,2,p2p-i-la,3,14.317793
1024,1024,1024,256,2,2,p2p-i-la,4,14.317787
1024,1024,1024,256,2,2,p2p-i-la,4,14.317787
1024,1024,1024,256,2,2,p2p-i-la,4,14.317793
1024,1024,1024,256,2,2,p2p-i-la,4,14.317793
1024,1024,1024,256,2,2,p2p-i-la,4,14.317793
2048,2048,2048,256,2,2,p2p,0,14.951931
2048,2048,2048,256,2,2,p2p,0,14.951932
2048,2048,2048,256,2,2,p2p,0,14.951932
2048,2048,2048,256,2,2,p2p,0,14.951929
2048,2048,2048,256,2,2,p2p,0,14.951932
2048,2048,2048,256,2,2,bcast,0,14.950045
2048,2048,2048,256,2,2,bcast,0,14.950048
2048,2048,2048,256,2,2,bcast,0,14.950048
2048,2048,2048,256,2,2,bcast,0,14.950046
2048,2048,2048,256,2,2,bcast,0,14.950046
2048,2048,2048,256,2,2,p2p-i-la,1,28.642430
2048,2048,2048,256,2,2,p2p-i-la,1,28.642433
2048,2048,2048,256,2,2,p2p-i-la,1,28.642433
2048,2048,2048,256,2,2,p2p-i-la,1,28.642433
2048,2048,2048,256,2,2,p2p-i-la,1,28.642436
2048,2048,2048,256,2,2,p2p-i-la,2,23.366289
2048,2048,2048,256,2,2,p2p-i-la,2,23.366289
2048,2048,2048,256,2,2,p2p-i-la,2,23.366289
2048,2048,2048,256,2,2,p2p-i-la,2,23.366289
2048,2048,2048,256,2,2,p2p-i-la,2,23.366289
2048,2048,2048,256,2,2,p2p-i-la,3,28.653563
2048,2048,2048,256,2,2,p2p-i-la,3,28.653569
2048,2048,2048,256,2,2,p2p-i-la,3,28.653569
2048,2048,2048,256,2,2,p2p-i-la,3,28.653566
2048,2048,2048,256,2,2,p2p-i-la,3,28.653569
2048,2048,2048,256,2,2,p2p-i-la,4,23.369989
2048,2048,2048,256,2,2,p2p-i-la,4,23.369989
2048,2048,2048,256,2,2,p2p-i-la,4,23.369991
2048,2048,2048,256,2,2,p2p-i-la,4,23.369991
2048,2048,2048,256,2,2,p2p-i-la,4,23.369991
2048,2048,2048,256,2,2,p2p-i-la,5,28.653569
2048,2048,2048,256,2,2,p2p-i-la,5,28.653575
2048,2048,2048,256,2,2,p2p-i-la,5,28.653575
2048,2048,2048,256,2,2,p2p-i-la,5,28.653575
2048,2048,2048,256,2,2,p2p-i-la,5,28.653575
2048,2048,2048,256,2,2,p2p-i-la,6,23.369991
2048,2048,2048,256,2,2,p2p-i-la,6,23.369991
2048,2048,2048,256,2,2,p2p-i-la,6,23.369991
2048,2048,2048,256,2,2,p2p-i-la,6,23.369991
2048,2048,2048,256,2,2,p2p-i-la,6,23.369991
2048,2048,2048,256,2,2,p2p-i-la,7,28.659105
2048,2048,2048,256,2,2,p2p-i-la,7,28.659105
2048,2048,2048,256,2,2,p2p-i-la,7,28.659105
2048,2048,2048,256,2,2,p2p-i-la,7,28.659105
2048,2048,2048,256,2,2,p2p-i-la,7,28.659105
2048,2048,2048,256,2,2,p2p-i-la,8,28.659102
2048,2048,2048,256,2,2,p2p-i-la,8,28.659105
2048,2048,2048,256,2,2,p2p-i-la,8,28.659105
2048,2048,2048,256,2,2,p2p-i-la,8,28.659105
2048,2048,2048,256,2,2,p2p-i-la,8,28.659105
3072,3072,3072,256,2,2,p2p,0,22.428405
3072,3072,3072,256,2,2,p2p,0,22.428407
3072,3072,3072,256,2,2,p2p,0,22.428407
3072,3072,3072,256,2,2,p2p,0,22.428407
3072,3072,3072,256,2,2,p2p,0,22.428407
3072,3072,3072,256,2,2,bcast,0,22.427149
3072,3072,3072,256,2,2,bcast,0,22.427149
3072,3072,3072,256,2,2,bcast,0,22.427152
3072,3072,3072,256,2,2,bcast,0,22.427149
3072,3072,3072,256,2,2,bcast,0,22.427152
3072,3072,3072,256,2,2,p2p-i-la,1,42.976658
3072,3072,3072,256,2,2,p2p-i-la,1,42.976662
3072,3072,3072,256,2,2,p2p-i-la,1,42.976658
3072,3072,3072,256,2,2,p2p-i-la,1,42.976662
3072,3072,3072,256,2,2,p2p-i-la,1,42.976662
3072,3072,3072,256,2,2,p2p-i-la,2,33.027327
3072,3072,3072,256,2,2,p2p-i-la,2,33.027327
3072,3072,3072,256,2,2,p2p-i-la,2,33.027327
3072,3072,3072,256,2,2,p2p-i-la,2,33.027330
3072,3072,3072,256,2,2,p2p-i-la,2,33.027327
3072,3072,3072,256,2,2,p2p-i-la,3,42.987825
3072,3072,3072,256,2,2,p2p-i-la,3,42.987825
3072,3072,3072,256,2,2,p2p-i-la,3,42.987829
3072,3072,3072,256,2,2,p2p-i-la,3,42.987818
3072,3072,3072,256,2,2,p2p-i-la,3,42.987822
3072,3072,3072,256,2,2,p2p-i-la,4,37.356416
3072,3072,3072,256,2,2,p2p-i-la,4,37.356414
3072,3072,3072,256,2,2,p2p-i-la,4,37.356422
3072,3072,3072,256,2,2,p2p-i-la,4,37.356416
3072,3072,3072,256,2,2,p2p-i-la,4,37.356416
3072,3072,3072,256,2,2,p2p-i-la,5,42.991522
3072,3072,3072,256,2,2,p2p-i-la,5,42.991526
3072,3072,3072,256,2,2,p2p-i-la,5,42.991526
3072,3072,3072,256,2,2,p2p-i-la,5,42.991526
3072,3072,3072,256,2,2,p2p-i-la,5,42.991522
3072,3072,3072,256,2,2,p2p-i-la,6,37.359194
3072,3072,3072,256,2,2,p2p-i-la,6,37.359194
3072,3072,3072,256,2,2,p2p-i-la,6,37.359194
3072,3072,3072,256,2,2,p2p-i-la,6,37.359194
3072,3072,3072,256,2,2,p2p-i-la,6,37.359197
3072,3072,3072,256,2,2,p2p-i-la,7,42.991526
3072,3072,3072,256,2,2,p2p-i-la,7,42.991538
3072,3072,3072,256,2,2,p2p-i-la,7,42.991534
3072,3072,3072,256,2,2,p2p-i-la,7,42.991534
3072,3072,3072,256,2,2,p2p-i-la,7,42.991534
3072,3072,3072,256,2,2,p2p-i-la,8,37.359200
3072,3072,3072,256,2,2,p2p-i-la,8,37.359202
3072,3072,3072,256,2,2,p2p-i-la,8,37.359202
3072,3072,3072,256,2,2,p2p-i-la,8,37.359205
3072,3072,3072,256,2,2,p2p-i-la,8,37.359202
3072,3072,3072,256,2,2,p2p-i-la,9,42.991549
3072,3072,3072,256,2,2,p2p-i-la,9,42.991549
3072,3072,3072,256,2,2,p2p-i-la,9,42.991549
3072,3072,3072,256,2,2,p2p-i-la,9,42.991545
3072,3072,3072,256,2,2,p2p-i-la,9,42.991545
3072,3072,3072,256,2,2,p2p-i-la,10,37.359205
3072,3072,3072,256,2,2,p2p-i-la,10,37.359202
3072,3072,3072,256,2,2,p2p-i-la,10,37.359202
3072,3072,3072,256,2,2,p2p-i-la,10,37.359214
3072,3072,3072,256,2,2,p2p-i-la,10,37.359202
3072,3072,3072,256,2,2,p2p-i-la,11,42.995159
3072,3072,3072,256,2,2,p2p-i-la,11,42.995159
3072,3072,3072,256,2,2,p2p-i-la,11,42.995144
3072,3072,3072,256,2,2,p2p-i-la,11,42.995167
3072,3072,3072,256,2,2,p2p-i-la,11,42.995152
3072,3072,3072,256,2,2,p2p-i-la,12,42.995159
3072,3072,3072,256,2,2,p2p-i-la,12,42.995159
3072,3072,3072,256,2,2,p2p-i-la,12,42.995152
3072,3072,3072,256,2,2,p2p-i-la,12,42.995171
3072,3072,3072,256,2,2,p2p-i-la,12,42.995159
1 m n k b p q algo lookahead gflops
2 1024 1024 1024 256 2 2 p2p 0 7.475035
3 1024 1024 1024 256 2 2 p2p 0 7.475035
4 1024 1024 1024 256 2 2 p2p 0 7.475036
5 1024 1024 1024 256 2 2 p2p 0 7.475036
6 1024 1024 1024 256 2 2 p2p 0 7.475036
7 1024 1024 1024 256 2 2 bcast 0 7.471268
8 1024 1024 1024 256 2 2 bcast 0 7.471269
9 1024 1024 1024 256 2 2 bcast 0 7.471268
10 1024 1024 1024 256 2 2 bcast 0 7.471268
11 1024 1024 1024 256 2 2 bcast 0 7.471269
12 1024 1024 1024 256 2 2 p2p-i-la 1 14.306685
13 1024 1024 1024 256 2 2 p2p-i-la 1 14.306689
14 1024 1024 1024 256 2 2 p2p-i-la 1 14.306691
15 1024 1024 1024 256 2 2 p2p-i-la 1 14.306689
16 1024 1024 1024 256 2 2 p2p-i-la 1 14.306691
17 1024 1024 1024 256 2 2 p2p-i-la 2 9.856253
18 1024 1024 1024 256 2 2 p2p-i-la 2 9.856253
19 1024 1024 1024 256 2 2 p2p-i-la 2 9.856254
20 1024 1024 1024 256 2 2 p2p-i-la 2 9.856254
21 1024 1024 1024 256 2 2 p2p-i-la 2 9.856254
22 1024 1024 1024 256 2 2 p2p-i-la 3 14.317787
23 1024 1024 1024 256 2 2 p2p-i-la 3 14.317789
24 1024 1024 1024 256 2 2 p2p-i-la 3 14.317793
25 1024 1024 1024 256 2 2 p2p-i-la 3 14.317793
26 1024 1024 1024 256 2 2 p2p-i-la 3 14.317793
27 1024 1024 1024 256 2 2 p2p-i-la 4 14.317787
28 1024 1024 1024 256 2 2 p2p-i-la 4 14.317787
29 1024 1024 1024 256 2 2 p2p-i-la 4 14.317793
30 1024 1024 1024 256 2 2 p2p-i-la 4 14.317793
31 1024 1024 1024 256 2 2 p2p-i-la 4 14.317793
32 2048 2048 2048 256 2 2 p2p 0 14.951931
33 2048 2048 2048 256 2 2 p2p 0 14.951932
34 2048 2048 2048 256 2 2 p2p 0 14.951932
35 2048 2048 2048 256 2 2 p2p 0 14.951929
36 2048 2048 2048 256 2 2 p2p 0 14.951932
37 2048 2048 2048 256 2 2 bcast 0 14.950045
38 2048 2048 2048 256 2 2 bcast 0 14.950048
39 2048 2048 2048 256 2 2 bcast 0 14.950048
40 2048 2048 2048 256 2 2 bcast 0 14.950046
41 2048 2048 2048 256 2 2 bcast 0 14.950046
42 2048 2048 2048 256 2 2 p2p-i-la 1 28.642430
43 2048 2048 2048 256 2 2 p2p-i-la 1 28.642433
44 2048 2048 2048 256 2 2 p2p-i-la 1 28.642433
45 2048 2048 2048 256 2 2 p2p-i-la 1 28.642433
46 2048 2048 2048 256 2 2 p2p-i-la 1 28.642436
47 2048 2048 2048 256 2 2 p2p-i-la 2 23.366289
48 2048 2048 2048 256 2 2 p2p-i-la 2 23.366289
49 2048 2048 2048 256 2 2 p2p-i-la 2 23.366289
50 2048 2048 2048 256 2 2 p2p-i-la 2 23.366289
51 2048 2048 2048 256 2 2 p2p-i-la 2 23.366289
52 2048 2048 2048 256 2 2 p2p-i-la 3 28.653563
53 2048 2048 2048 256 2 2 p2p-i-la 3 28.653569
54 2048 2048 2048 256 2 2 p2p-i-la 3 28.653569
55 2048 2048 2048 256 2 2 p2p-i-la 3 28.653566
56 2048 2048 2048 256 2 2 p2p-i-la 3 28.653569
57 2048 2048 2048 256 2 2 p2p-i-la 4 23.369989
58 2048 2048 2048 256 2 2 p2p-i-la 4 23.369989
59 2048 2048 2048 256 2 2 p2p-i-la 4 23.369991
60 2048 2048 2048 256 2 2 p2p-i-la 4 23.369991
61 2048 2048 2048 256 2 2 p2p-i-la 4 23.369991
62 2048 2048 2048 256 2 2 p2p-i-la 5 28.653569
63 2048 2048 2048 256 2 2 p2p-i-la 5 28.653575
64 2048 2048 2048 256 2 2 p2p-i-la 5 28.653575
65 2048 2048 2048 256 2 2 p2p-i-la 5 28.653575
66 2048 2048 2048 256 2 2 p2p-i-la 5 28.653575
67 2048 2048 2048 256 2 2 p2p-i-la 6 23.369991
68 2048 2048 2048 256 2 2 p2p-i-la 6 23.369991
69 2048 2048 2048 256 2 2 p2p-i-la 6 23.369991
70 2048 2048 2048 256 2 2 p2p-i-la 6 23.369991
71 2048 2048 2048 256 2 2 p2p-i-la 6 23.369991
72 2048 2048 2048 256 2 2 p2p-i-la 7 28.659105
73 2048 2048 2048 256 2 2 p2p-i-la 7 28.659105
74 2048 2048 2048 256 2 2 p2p-i-la 7 28.659105
75 2048 2048 2048 256 2 2 p2p-i-la 7 28.659105
76 2048 2048 2048 256 2 2 p2p-i-la 7 28.659105
77 2048 2048 2048 256 2 2 p2p-i-la 8 28.659102
78 2048 2048 2048 256 2 2 p2p-i-la 8 28.659105
79 2048 2048 2048 256 2 2 p2p-i-la 8 28.659105
80 2048 2048 2048 256 2 2 p2p-i-la 8 28.659105
81 2048 2048 2048 256 2 2 p2p-i-la 8 28.659105
82 3072 3072 3072 256 2 2 p2p 0 22.428405
83 3072 3072 3072 256 2 2 p2p 0 22.428407
84 3072 3072 3072 256 2 2 p2p 0 22.428407
85 3072 3072 3072 256 2 2 p2p 0 22.428407
86 3072 3072 3072 256 2 2 p2p 0 22.428407
87 3072 3072 3072 256 2 2 bcast 0 22.427149
88 3072 3072 3072 256 2 2 bcast 0 22.427149
89 3072 3072 3072 256 2 2 bcast 0 22.427152
90 3072 3072 3072 256 2 2 bcast 0 22.427149
91 3072 3072 3072 256 2 2 bcast 0 22.427152
92 3072 3072 3072 256 2 2 p2p-i-la 1 42.976658
93 3072 3072 3072 256 2 2 p2p-i-la 1 42.976662
94 3072 3072 3072 256 2 2 p2p-i-la 1 42.976658
95 3072 3072 3072 256 2 2 p2p-i-la 1 42.976662
96 3072 3072 3072 256 2 2 p2p-i-la 1 42.976662
97 3072 3072 3072 256 2 2 p2p-i-la 2 33.027327
98 3072 3072 3072 256 2 2 p2p-i-la 2 33.027327
99 3072 3072 3072 256 2 2 p2p-i-la 2 33.027327
100 3072 3072 3072 256 2 2 p2p-i-la 2 33.027330
101 3072 3072 3072 256 2 2 p2p-i-la 2 33.027327
102 3072 3072 3072 256 2 2 p2p-i-la 3 42.987825
103 3072 3072 3072 256 2 2 p2p-i-la 3 42.987825
104 3072 3072 3072 256 2 2 p2p-i-la 3 42.987829
105 3072 3072 3072 256 2 2 p2p-i-la 3 42.987818
106 3072 3072 3072 256 2 2 p2p-i-la 3 42.987822
107 3072 3072 3072 256 2 2 p2p-i-la 4 37.356416
108 3072 3072 3072 256 2 2 p2p-i-la 4 37.356414
109 3072 3072 3072 256 2 2 p2p-i-la 4 37.356422
110 3072 3072 3072 256 2 2 p2p-i-la 4 37.356416
111 3072 3072 3072 256 2 2 p2p-i-la 4 37.356416
112 3072 3072 3072 256 2 2 p2p-i-la 5 42.991522
113 3072 3072 3072 256 2 2 p2p-i-la 5 42.991526
114 3072 3072 3072 256 2 2 p2p-i-la 5 42.991526
115 3072 3072 3072 256 2 2 p2p-i-la 5 42.991526
116 3072 3072 3072 256 2 2 p2p-i-la 5 42.991522
117 3072 3072 3072 256 2 2 p2p-i-la 6 37.359194
118 3072 3072 3072 256 2 2 p2p-i-la 6 37.359194
119 3072 3072 3072 256 2 2 p2p-i-la 6 37.359194
120 3072 3072 3072 256 2 2 p2p-i-la 6 37.359194
121 3072 3072 3072 256 2 2 p2p-i-la 6 37.359197
122 3072 3072 3072 256 2 2 p2p-i-la 7 42.991526
123 3072 3072 3072 256 2 2 p2p-i-la 7 42.991538
124 3072 3072 3072 256 2 2 p2p-i-la 7 42.991534
125 3072 3072 3072 256 2 2 p2p-i-la 7 42.991534
126 3072 3072 3072 256 2 2 p2p-i-la 7 42.991534
127 3072 3072 3072 256 2 2 p2p-i-la 8 37.359200
128 3072 3072 3072 256 2 2 p2p-i-la 8 37.359202
129 3072 3072 3072 256 2 2 p2p-i-la 8 37.359202
130 3072 3072 3072 256 2 2 p2p-i-la 8 37.359205
131 3072 3072 3072 256 2 2 p2p-i-la 8 37.359202
132 3072 3072 3072 256 2 2 p2p-i-la 9 42.991549
133 3072 3072 3072 256 2 2 p2p-i-la 9 42.991549
134 3072 3072 3072 256 2 2 p2p-i-la 9 42.991549
135 3072 3072 3072 256 2 2 p2p-i-la 9 42.991545
136 3072 3072 3072 256 2 2 p2p-i-la 9 42.991545
137 3072 3072 3072 256 2 2 p2p-i-la 10 37.359205
138 3072 3072 3072 256 2 2 p2p-i-la 10 37.359202
139 3072 3072 3072 256 2 2 p2p-i-la 10 37.359202
140 3072 3072 3072 256 2 2 p2p-i-la 10 37.359214
141 3072 3072 3072 256 2 2 p2p-i-la 10 37.359202
142 3072 3072 3072 256 2 2 p2p-i-la 11 42.995159
143 3072 3072 3072 256 2 2 p2p-i-la 11 42.995159
144 3072 3072 3072 256 2 2 p2p-i-la 11 42.995144
145 3072 3072 3072 256 2 2 p2p-i-la 11 42.995167
146 3072 3072 3072 256 2 2 p2p-i-la 11 42.995152
147 3072 3072 3072 256 2 2 p2p-i-la 12 42.995159
148 3072 3072 3072 256 2 2 p2p-i-la 12 42.995159
149 3072 3072 3072 256 2 2 p2p-i-la 12 42.995152
150 3072 3072 3072 256 2 2 p2p-i-la 12 42.995171
151 3072 3072 3072 256 2 2 p2p-i-la 12 42.995159

39
TP2/bench.sh Executable file
View file

@ -0,0 +1,39 @@
source utils.sh
echo BENCHMARKING THE METHODS
# you can modify these values
p=2
q=2
P=$((p * q))
#generate_hostfile $P
export OMP_NUM_THREADS=1
export MKL_NUM_THREADS=1
# proper benchmark <--- this could be a TODO for students ? (as in, show weak scaling and/or strong scaling)
#mpi_options="-hostfile hostfiles/hostfile.$P.txt"
mpi_options="-platform platforms/cluster_crossbar.xml -hostfile hostfiles/cluster_hostfile.txt -np $P"
b=256
iter=5
traces="bench_traces"
out="bench_outputs"
csv="bench.csv"
echo m,n,k,b,p,q,algo,lookahead,gflops >$csv
for i in 4 8 12; do
n=$((i * b))
m=$n
k=$n
la=0
options="-c"
for algo in p2p bcast; do
run
done
for la in $(seq 1 $((n / b))); do
algo="p2p-i-la"
options="-c -l $la"
run
done
done

16
TP2/check.csv Normal file
View file

@ -0,0 +1,16 @@
m,n,k,b,p,q,algo,lookahead,gflops
2,2,2,2,2,2,p2p,0,0.000172
2,2,2,2,2,2,p2p,0,0.000172
2,2,2,2,2,2,p2p,0,0.000172
2,2,2,2,2,2,p2p,0,0.000172
2,2,2,2,2,2,p2p,0,0.000172
2,2,2,2,2,2,bcast,0,0.000075
2,2,2,2,2,2,bcast,0,0.000075
2,2,2,2,2,2,bcast,0,0.000075
2,2,2,2,2,2,bcast,0,0.000075
2,2,2,2,2,2,bcast,0,0.000075
2,2,2,2,2,2,p2p-i-la,1,0.000223
2,2,2,2,2,2,p2p-i-la,1,0.000223
2,2,2,2,2,2,p2p-i-la,1,0.000223
2,2,2,2,2,2,p2p-i-la,1,0.000223
2,2,2,2,2,2,p2p-i-la,1,0.000223
1 m n k b p q algo lookahead gflops
2 2 2 2 2 2 2 p2p 0 0.000172
3 2 2 2 2 2 2 p2p 0 0.000172
4 2 2 2 2 2 2 p2p 0 0.000172
5 2 2 2 2 2 2 p2p 0 0.000172
6 2 2 2 2 2 2 p2p 0 0.000172
7 2 2 2 2 2 2 bcast 0 0.000075
8 2 2 2 2 2 2 bcast 0 0.000075
9 2 2 2 2 2 2 bcast 0 0.000075
10 2 2 2 2 2 2 bcast 0 0.000075
11 2 2 2 2 2 2 bcast 0 0.000075
12 2 2 2 2 2 2 p2p-i-la 1 0.000223
13 2 2 2 2 2 2 p2p-i-la 1 0.000223
14 2 2 2 2 2 2 p2p-i-la 1 0.000223
15 2 2 2 2 2 2 p2p-i-la 1 0.000223
16 2 2 2 2 2 2 p2p-i-la 1 0.000223

39
TP2/check.sh Executable file
View file

@ -0,0 +1,39 @@
source utils.sh
echo BENCHMARKING THE METHODS
# you can modify these values
p=2
q=2
P=$((p * q))
#generate_hostfile $P
export OMP_NUM_THREADS=1
export MKL_NUM_THREADS=1
# proper benchmark <--- this could be a TODO for students ? (as in, show weak scaling and/or strong scaling)
#mpi_options="-hostfile hostfiles/hostfile.$P.txt"
mpi_options="-platform platforms/cluster_crossbar.xml -hostfile hostfiles/cluster_hostfile.txt -np 4"
b=2
iter=5
traces="check_traces"
out="check_outputs"
csv="check.csv"
echo m,n,k,b,p,q,algo,lookahead,gflops >$csv
for i in 1; do
n=$((i * b))
m=$n
k=$n
la=0
options="-c"
for algo in p2p bcast; do
run
done
for la in $(seq 1 $((n / b))); do
algo="p2p-i-la"
options="-c -l $la"
run
done
done

View file

@ -0,0 +1,256 @@
host-0.hawaii.edu
host-1.hawaii.edu
host-2.hawaii.edu
host-3.hawaii.edu
host-4.hawaii.edu
host-5.hawaii.edu
host-6.hawaii.edu
host-7.hawaii.edu
host-8.hawaii.edu
host-9.hawaii.edu
host-10.hawaii.edu
host-11.hawaii.edu
host-12.hawaii.edu
host-13.hawaii.edu
host-14.hawaii.edu
host-15.hawaii.edu
host-16.hawaii.edu
host-17.hawaii.edu
host-18.hawaii.edu
host-19.hawaii.edu
host-20.hawaii.edu
host-21.hawaii.edu
host-22.hawaii.edu
host-23.hawaii.edu
host-24.hawaii.edu
host-25.hawaii.edu
host-26.hawaii.edu
host-27.hawaii.edu
host-28.hawaii.edu
host-29.hawaii.edu
host-30.hawaii.edu
host-31.hawaii.edu
host-32.hawaii.edu
host-33.hawaii.edu
host-34.hawaii.edu
host-35.hawaii.edu
host-36.hawaii.edu
host-37.hawaii.edu
host-38.hawaii.edu
host-39.hawaii.edu
host-40.hawaii.edu
host-41.hawaii.edu
host-42.hawaii.edu
host-43.hawaii.edu
host-44.hawaii.edu
host-45.hawaii.edu
host-46.hawaii.edu
host-47.hawaii.edu
host-48.hawaii.edu
host-49.hawaii.edu
host-50.hawaii.edu
host-51.hawaii.edu
host-52.hawaii.edu
host-53.hawaii.edu
host-54.hawaii.edu
host-55.hawaii.edu
host-56.hawaii.edu
host-57.hawaii.edu
host-58.hawaii.edu
host-59.hawaii.edu
host-60.hawaii.edu
host-61.hawaii.edu
host-62.hawaii.edu
host-63.hawaii.edu
host-64.hawaii.edu
host-65.hawaii.edu
host-66.hawaii.edu
host-67.hawaii.edu
host-68.hawaii.edu
host-69.hawaii.edu
host-70.hawaii.edu
host-71.hawaii.edu
host-72.hawaii.edu
host-73.hawaii.edu
host-74.hawaii.edu
host-75.hawaii.edu
host-76.hawaii.edu
host-77.hawaii.edu
host-78.hawaii.edu
host-79.hawaii.edu
host-80.hawaii.edu
host-81.hawaii.edu
host-82.hawaii.edu
host-83.hawaii.edu
host-84.hawaii.edu
host-85.hawaii.edu
host-86.hawaii.edu
host-87.hawaii.edu
host-88.hawaii.edu
host-89.hawaii.edu
host-90.hawaii.edu
host-91.hawaii.edu
host-92.hawaii.edu
host-93.hawaii.edu
host-94.hawaii.edu
host-95.hawaii.edu
host-96.hawaii.edu
host-97.hawaii.edu
host-98.hawaii.edu
host-99.hawaii.edu
host-100.hawaii.edu
host-101.hawaii.edu
host-102.hawaii.edu
host-103.hawaii.edu
host-104.hawaii.edu
host-105.hawaii.edu
host-106.hawaii.edu
host-107.hawaii.edu
host-108.hawaii.edu
host-109.hawaii.edu
host-110.hawaii.edu
host-111.hawaii.edu
host-112.hawaii.edu
host-113.hawaii.edu
host-114.hawaii.edu
host-115.hawaii.edu
host-116.hawaii.edu
host-117.hawaii.edu
host-118.hawaii.edu
host-119.hawaii.edu
host-120.hawaii.edu
host-121.hawaii.edu
host-122.hawaii.edu
host-123.hawaii.edu
host-124.hawaii.edu
host-125.hawaii.edu
host-126.hawaii.edu
host-127.hawaii.edu
host-128.hawaii.edu
host-129.hawaii.edu
host-130.hawaii.edu
host-131.hawaii.edu
host-132.hawaii.edu
host-133.hawaii.edu
host-134.hawaii.edu
host-135.hawaii.edu
host-136.hawaii.edu
host-137.hawaii.edu
host-138.hawaii.edu
host-139.hawaii.edu
host-140.hawaii.edu
host-141.hawaii.edu
host-142.hawaii.edu
host-143.hawaii.edu
host-144.hawaii.edu
host-145.hawaii.edu
host-146.hawaii.edu
host-147.hawaii.edu
host-148.hawaii.edu
host-149.hawaii.edu
host-150.hawaii.edu
host-151.hawaii.edu
host-152.hawaii.edu
host-153.hawaii.edu
host-154.hawaii.edu
host-155.hawaii.edu
host-156.hawaii.edu
host-157.hawaii.edu
host-158.hawaii.edu
host-159.hawaii.edu
host-160.hawaii.edu
host-161.hawaii.edu
host-162.hawaii.edu
host-163.hawaii.edu
host-164.hawaii.edu
host-165.hawaii.edu
host-166.hawaii.edu
host-167.hawaii.edu
host-168.hawaii.edu
host-169.hawaii.edu
host-170.hawaii.edu
host-171.hawaii.edu
host-172.hawaii.edu
host-173.hawaii.edu
host-174.hawaii.edu
host-175.hawaii.edu
host-176.hawaii.edu
host-177.hawaii.edu
host-178.hawaii.edu
host-179.hawaii.edu
host-180.hawaii.edu
host-181.hawaii.edu
host-182.hawaii.edu
host-183.hawaii.edu
host-184.hawaii.edu
host-185.hawaii.edu
host-186.hawaii.edu
host-187.hawaii.edu
host-188.hawaii.edu
host-189.hawaii.edu
host-190.hawaii.edu
host-191.hawaii.edu
host-192.hawaii.edu
host-193.hawaii.edu
host-194.hawaii.edu
host-195.hawaii.edu
host-196.hawaii.edu
host-197.hawaii.edu
host-198.hawaii.edu
host-199.hawaii.edu
host-200.hawaii.edu
host-201.hawaii.edu
host-202.hawaii.edu
host-203.hawaii.edu
host-204.hawaii.edu
host-205.hawaii.edu
host-206.hawaii.edu
host-207.hawaii.edu
host-208.hawaii.edu
host-209.hawaii.edu
host-210.hawaii.edu
host-211.hawaii.edu
host-212.hawaii.edu
host-213.hawaii.edu
host-214.hawaii.edu
host-215.hawaii.edu
host-216.hawaii.edu
host-217.hawaii.edu
host-218.hawaii.edu
host-219.hawaii.edu
host-220.hawaii.edu
host-221.hawaii.edu
host-222.hawaii.edu
host-223.hawaii.edu
host-224.hawaii.edu
host-225.hawaii.edu
host-226.hawaii.edu
host-227.hawaii.edu
host-228.hawaii.edu
host-229.hawaii.edu
host-230.hawaii.edu
host-231.hawaii.edu
host-232.hawaii.edu
host-233.hawaii.edu
host-234.hawaii.edu
host-235.hawaii.edu
host-236.hawaii.edu
host-237.hawaii.edu
host-238.hawaii.edu
host-239.hawaii.edu
host-240.hawaii.edu
host-241.hawaii.edu
host-242.hawaii.edu
host-243.hawaii.edu
host-244.hawaii.edu
host-245.hawaii.edu
host-246.hawaii.edu
host-247.hawaii.edu
host-248.hawaii.edu
host-249.hawaii.edu
host-250.hawaii.edu
host-251.hawaii.edu
host-252.hawaii.edu
host-253.hawaii.edu
host-254.hawaii.edu
host-255.hawaii.edu

View file

@ -0,0 +1,16 @@
node-0.simgrid.org
node-1.simgrid.org
node-2.simgrid.org
node-3.simgrid.org
node-4.simgrid.org
node-5.simgrid.org
node-6.simgrid.org
node-7.simgrid.org
node-8.simgrid.org
node-9.simgrid.org
node-10.simgrid.org
node-11.simgrid.org
node-12.simgrid.org
node-13.simgrid.org
node-14.simgrid.org
node-15.simgrid.org

4
TP2/init.sh Normal file
View file

@ -0,0 +1,4 @@
#!/bin/bash
SIMGRID=/mnt/n7fs/ens/tp_guivarch/opt2021/simgrid-3.31
export PATH=${SIMGRID}/bin:${PATH}

117
TP2/log.txt Normal file
View file

@ -0,0 +1,117 @@
File smpi_simgrid.trace
Errors :
150 : Unknown container: 0
153 : Unknown container: 0
156 : Unknown container: 0
165 : Unknown container: 0
168 : Unknown container: 0
171 : Unknown container: 0
185 : Unknown container: 0
191 : Unknown container: 0
199 : Unknown container: 0
205 : Unknown container: 0
207 : Unknown container: 0
213 : Unknown container: 0
216 : Unknown container: 0
221 : Unknown container: 0
223 : Unknown container: 0
231 : Unknown container: 0
236 : Unknown container: 0
243 : Unknown container: 0
275 : Unknown container: 0
283 : Unknown container: 0
285 : Unknown container: 0
287 : Unknown container: 0
294 : Unknown container: 0
303 : Unknown container: 0
362 : Unknown container: 0
364 : Unknown container: 0
366 : Unknown container: 0
371 : Unknown container: 0
373 : Unknown container: 0
375 : Unknown container: 0
380 : Unknown container: 0
382 : Unknown container: 0
384 : Unknown container: 0
389 : Unknown container: 0
391 : Unknown container: 0
393 : Unknown container: 0
398 : Unknown container: 0
400 : Unknown container: 0
402 : Unknown container: 0
407 : Unknown container: 0
409 : Unknown container: 0
411 : Unknown container: 0
416 : Unknown container: 0
418 : Unknown container: 0
420 : Unknown container: 0
425 : Unknown container: 0
427 : Unknown container: 0
429 : Unknown container: 0
434 : Unknown container: 0
436 : Unknown container: 0
438 : Unknown container: 0
443 : Unknown container: 0
445 : Unknown container: 0
447 : Unknown container: 0
570 : Unknown container: 0
573 : Unknown container: 0
576 : Unknown container: 0
585 : Unknown container: 0
588 : Unknown container: 0
591 : Unknown container: 0
604 : Unknown container: 0
612 : Unknown container: 0
619 : Unknown container: 0
625 : Unknown container: 0
627 : Unknown container: 0
633 : Unknown container: 0
635 : Unknown container: 0
641 : Unknown container: 0
643 : Unknown container: 0
650 : Unknown container: 0
656 : Unknown container: 0
663 : Unknown container: 0
695 : Unknown container: 0
703 : Unknown container: 0
705 : Unknown container: 0
707 : Unknown container: 0
713 : Unknown container: 0
723 : Unknown container: 0
782 : Unknown container: 0
784 : Unknown container: 0
786 : Unknown container: 0
791 : Unknown container: 0
793 : Unknown container: 0
795 : Unknown container: 0
800 : Unknown container: 0
802 : Unknown container: 0
804 : Unknown container: 0
809 : Unknown container: 0
811 : Unknown container: 0
813 : Unknown container: 0
818 : Unknown container: 0
820 : Unknown container: 0
822 : Unknown container: 0
827 : Unknown container: 0
829 : Unknown container: 0
831 : Unknown container: 0
836 : Unknown container: 0
838 : Unknown container: 0
840 : Unknown container: 0
845 : Unknown container: 0
847 : Unknown container: 0
849 : Unknown container: 0
854 : Unknown container: 0
856 : Unknown container: 0
858 : Unknown container: 0
863 : Unknown container: 0
865 : Unknown container: 0
867 : Unknown container: 0
Warnings :
1 : the definition is not identified
2 : the definition is not identified
Your trace has 108 errors and 2 warnings.

View file

@ -0,0 +1,7 @@
<?xml version='1.0'?>
<!DOCTYPE platform SYSTEM "http://simgrid.gforge.inria.fr/simgrid/simgrid.dtd">
<platform version="4.1">
<zone id="AS0" routing="Full">
<cluster id="my_cluster" prefix="host-" suffix=".hawaii.edu" radical="0-255" speed="1Gf" bw="125Mbps" lat="5us"/>
</zone>
</platform>

View file

@ -0,0 +1,17 @@
<?xml version='1.0'?>
<!DOCTYPE platform SYSTEM "https://simgrid.org/simgrid.dtd">
<platform version="4.1">
<!-- This is an example for a fat tree cluster.
This is taken from figure 1(b) of the paper "D-Mod-K Routing Providing on-Blocking Traffic for Shift Permutations on
Real Life Fat Trees" available at https://ece.technion.ac.il/wp-content/uploads/2021/01/publication_776.pdf
This defines a two levels fat-tree, with 4 leaf switches connected to 4 nodes each and 2 core switches connected to
each leaf switch by two cables -->
<zone id="world" routing="Full">
<cluster id="bob_cluster"
prefix="node-" radical="0-15" suffix=".simgrid.org"
speed="1Gf" bw="125MBps" lat="50us"
topology="FAT_TREE" topo_parameters="2;4,4;1,2;1,2"
loopback_bw="100MBps" loopback_lat="0" />
</zone>
</platform>

17
TP2/platforms/default.xml Normal file
View file

@ -0,0 +1,17 @@
<?xml version='1.0'?>
<!DOCTYPE platform SYSTEM "https://simgrid.org/simgrid.dtd">
<platform version="4.1">
<!-- This is an example for a fat tree cluster.
This is taken from figure 1(b) of the paper "D-Mod-K Routing Providing on-Blocking Traffic for Shift Permutations on
Real Life Fat Trees" available at https://ece.technion.ac.il/wp-content/uploads/2021/01/publication_776.pdf
This defines a two levels fat-tree, with 4 leaf switches connected to 4 nodes each and 2 core switches connected to
each leaf switch by two cables -->
<zone id="world" routing="Full">
<cluster id="bob_cluster"
prefix="node-" radical="0-15" suffix=".simgrid.org"
speed="1Gf" bw="125MBps" lat="50us"
topology="FAT_TREE" topo_parameters="2;4,4;1,2;1,2"
loopback_bw="100MBps" loopback_lat="0" />
</zone>
</platform>

View file

@ -0,0 +1,277 @@
#! /usr/bin/env perl
eval 'exec perl -S $0 ${1+"$@"}'
if $running_under_some_shell;
# This script updates the simgrid XML file passed as argument (modification in place)
# It is built to do the conversion incrementally.
# Copyright (c) 2006-2022. The SimGrid Team.
# All rights reserved.
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the license (GNU LGPL) which comes with this package.
=encoding UTF-8
=head1 NAME
simgrid_update_xml - updates simgrid XML files to latest version
=head1 SYNOPSIS
B<simgrid_update_xml> I<xml_file>
=head1 DESCRIPTION
simgrid_update_xml updates the simgrid XML file passed as argument. The file
is modified in place, without any kind of backup. You may want to save a copy
before running the script.
In SimGrid XML files, the standard version is indicated in the version
attribute of the platform tag. Current version is 4. Here is a list of major
changes in each version.
=over 4
=item B<Version 0:> Used before SimGrid 3.3
=item B<Version 1:> Introduced in SimGrid 3.3
=over 4
=item
The version attribute of platform were added to allow file versioning.
=item
The link bandwidth changed from Mb/s to b/s; and the CPU power were changed
from MFlop/s to Flop/s
=back
=item B<Version 2:> Introduced in SimGrid 3.4
=over
=item
Several tags were renamed:
CPU -> HOST
NETWORK_LINK -> LINK
ROUTE_ELEMENT -> LINK_CTN
PLATFORM_DESCRIPTION -> PLATFORM
=back
=item B<Version 3:> Introduced in SimGrid 3.5
=over 4
=item
The AS tag were introduced. Every platform should now contain an englobing AS
tag.
=item
Routes are now symmetric by default.
=item
Several tags were renamed (for sake of XML sanity):
LINK:CTN -> LINK_CTN
TRACE:CONNECT -> TRACE_CONNECT
=back
=item B<Version 4:> Introduced in SimGrid 3.13
=over 4
=item
Rename the attributes describing the amount of flop that a host / peer / cluster / cabinet can deliver per second.
<host power=...> -> <host speed=...>
=item
In <trace_connect>, attribute kind="POWER" is now kind="SPEED".
=item
The DOCTYPE points to the right URL.
=item
Units are now mandatory in attributes. USE THE SCRIPT sg_xml_unit_converter.py TO CONVERT THIS
- speed. Old default: 'f' or 'flops'. Also defined:
'Yf', 'Zf', 'Ef', 'Pf', 'Tf', 'Gf', 'Mf', 'kf'
'yottaflops', 'zettaflops', 'exaflops', 'petaflops', 'teraflops', 'gigaflops', 'megaflops', 'kiloflops'
- bandwidth. Old default: 'Bps' bytes per second (or 'bps' but 1 Bps = 8 bps)
Also defined in bytes: 'TiBps', 'GiBps', 'MiBps', 'KiBps', 'TBps', 'GBps', 'MBps', 'kBps', 'Bps'
And the same in bits: 'Tibps', 'Gibps', 'Mibps', 'Kibps', 'Tbps', 'Gbps', 'Mbps', 'kbps', 'bps'
- latency. Old default: 's' second. Also defined:
'w' week, 'd' day, 'h' hour, 'm' minute, 'ms' millisecond, 'us' microsecond, 'ns' nanosecond, 'ps' picosecond
=back
=item B<Version 4.1:> Introduced in SimGrid 3.16 (this is the current version).
=over 4
=item
Rename a few tags, but in a backward-compatible manner: the old names are still accepted.
AS -> zone
ASroute -> zoneRoute
bypassAsRoute -> bypassZoneRoute
process -> actor
=back
=item Other backward-compatible changes (old syntax is still accepted) for which we did not bump the DTD version:
=over 4
=item
Rename the FULLDUPLEX sharing into SPLITDUPLEX.
=item
In <host> and <peer>, rename the 'availability_file' attribute into 'speed_file'.
=back
=back
=head1 AUTHORS
The SimGrid team
=head1 COPYRIGHT AND LICENSE
Copyright (c) 2006-2022. The SimGrid Team. All rights reserved.
This program is free software; you may redistribute it and/or modify it
under the terms of GNU LGPL (v2.1) license.
=cut
use strict;
my $fromversion=-1;
my $toversion=4.1;
my $filename = $ARGV[0] or die "Usage: simgrid_update_xml.pl file_to_convert.xml\nPlease provide an XML to convert as a parameter.\n";
open INPUT, "$filename" or die "Cannot open input file $filename: $!\n";
my $output_string = "<?xml version='1.0'?>\n".
"<!DOCTYPE platform SYSTEM \"https://simgrid.org/simgrid.dtd\">\n".
"<platform version=\"$toversion\">\n";
my($AS_opened)=0;
my $line;
while (defined($line = <INPUT>)) {
chomp $line;
# eat the header, whatever form it has
next if ($line =~ s/<\?xml[^>]*>// && ! $line =~ /\S/); # just in case several tags are on the same line
next if ($line =~ s/<!DOCTYPE[^>]*>// && ! $line =~ /\S/);
if ($line =~ s/<platform(_description)? *>//) {
$fromversion = 0;
print "$filename was using version 0\n";
next if !$line =~ /\S/;
} elsif ($line =~ s/<platform.*version=["']*([0-9.]*)["']*>//) {
$fromversion = $1;
if ($fromversion == $toversion) {
warn "Input platform file $filename is already conformant to version $fromversion. This should be a no-op.\n";
}
if ($fromversion > $toversion) {
die "Input platform file $filename is more recent than this script (file version: $fromversion; script version: $toversion)\n";
}
next if !$line =~ /\S/;
print "$filename was using version $fromversion\n";
}
if ($fromversion == 0) {
while ($line =~ m|^(.*?)<cpu(.*?)power="([^"]*)"(.*)$|) {
$line = "$1TOTOTUTUTATA${2}TOTOTUTUTATA".($3*1000000)."TOTOTUTUTATA${4}";
}
while ($line =~ /^(.*?)TOTOTUTUTATA(.*?)TOTOTUTUTATA(.*?)TOTOTUTUTATA(.*)$/) {
$line = "$1<cpu${2}power=\"$3\"$4";
}
while ($line =~ m|^(.*?)<network_link(.*?)bandwidth="([^"]*)"(.*?)$|) {
$line = "$1TOTOTUTUTATA${2}TOTOTUTUTATA".($3*1000000)."TOTOTUTUTATA${4}";
}
while ($line =~ /^(.*?)TOTOTUTUTATA(.*?)TOTOTUTUTATA(.*?)TOTOTUTUTATA(.*?)$/) {
$line = "$1<network_link${2}bandwidth=\"$3\"$4";
}
}
if ($fromversion < 2) {
# The renamings (\b=zero-width word boundary check)
$line =~ s/\bplatform_description\b/platform/g;
$line =~ s/\bname\b/id/g;
$line =~ s/\bcpu\b/host/g;
$line =~ s/\bnetwork_link\b/link/g;
$line =~ s/\broute_element\b/link:ctn/g;
}
if ($fromversion < 3) {
$line =~ s/\blink:ctn\b/link_ctn/g;
$line =~ s/\btrace:connect\b/trace_connect/g;
if($AS_opened && (($line=~ /<\/platform>/) || ($line=~ /<process/))) {
$output_string .= "</AS>\n";
$AS_opened = 0;
}
if( (!$AS_opened) && (
($line =~ /<host/) ||
($line =~ /<link/) ||
($line =~ /<cluster/) ||
($line =~ /<router/)
)) {
$output_string .= " <AS id=\"AS0\" routing=\"Full\">\n";
$AS_opened=1;
}
if($line=~/<route /){$line =~ s/\<route/\<route symmetrical=\"NO\"/g;}
}
if ($fromversion < 4) {
$line =~ s/\bpower\b/speed/g;
$line =~ s/\bkind="POWER"/kind="SPEED"/g;
}
if ($fromversion < 4.1) {
$line =~ s/\bAS\b/zone/g;
$line =~ s/\bASroute\b/zoneRoute/g;
$line =~ s/\bbypassAsRoute\b/bypassZoneRoute/g;
$line =~ s/\bprocess\b/actor/g;
}
$line =~ s/\bFULLDUPLEX\b/SPLITDUPLEX/g;
$line =~ s/\bavailability_file\b/speed_file/g;
$output_string .= "$line\n";
}
close INPUT;
if ($fromversion == -1) {
die "Cannot retrieve the platform version of $filename\n";
}
open OUTPUT, "> $filename";
print OUTPUT $output_string;
close OUTPUT;

360
TP2/src/dsmat.c Normal file
View file

@ -0,0 +1,360 @@
#include <mpi.h>
#include <stdlib.h>
#include <stdio.h>
#include <cblas.h>
#include "simgrid/actor.h"
#include <simgrid/exec.h>
#include "utils.h"
#include "dsmat.h"
/* Tracing purposes */
static char* COMPUTE = "Computing";
static char* IDLE = "Idling";
void init_trace() {
// TRACE_host_state_declare(COMPUTE);
// TRACE_host_state_declare(IDLE);
}
int dsmat_fill(Matrix* a, int m, int n, int b, int p, int q, char* name) {
int me, node;
MPI_Comm_rank(MPI_COMM_WORLD, &me);
int mb = m/b, nb = n/b;
int ii, jj;
int row, col;
a->mb = mb;
a->nb = nb;
a->b = b;
//printf("%d] %s : m x n (b) = %d x %d (%d)\n", me, name, mb, nb, b);
a->blocks = calloc(mb,sizeof(Block*));
for (ii = 0; ii < mb;ii++) {
a->blocks[ii] = calloc(nb,sizeof(Block));
for (jj = 0; jj < nb;jj++) {
node = get_node(p,q,ii,jj);
node_coordinates_2i(p,q,node,&row,&col);
a->blocks[ii][jj].owner = node;
a->blocks[ii][jj].row = row;
a->blocks[ii][jj].col = col;
a->blocks[ii][jj].request = MPI_REQUEST_NULL;
if (me == a->blocks[ii][jj].owner) {
//printf("%d]allocating x_%d,%d\n",me,ii,jj);
a->blocks[ii][jj].c = calloc(b*b,sizeof(float));
rand_mat(b,b,a->blocks[ii][jj].c,10);
} else {
a->blocks[ii][jj].c = NULL;
}
}
}
return 0;
}
int dsmat_fill_v(Matrix* a, int m, int n, int b, int p, int q, char* name, float value) {
int me, node;
MPI_Comm_rank(MPI_COMM_WORLD, &me);
int mb = m/b, nb = n/b;
int ii, jj;
int row, col;
a->mb = mb;
a->nb = nb;
a->b = b;
a->blocks = calloc(mb,sizeof(Block*));
for (ii = 0; ii < mb;ii++) {
a->blocks[ii] = calloc(nb,sizeof(Block));
for (jj = 0; jj < nb;jj++) {
node = get_node(p,q,ii,jj);
node_coordinates_2i(p,q,node,&row,&col);
a->blocks[ii][jj].owner = node;
a->blocks[ii][jj].row = row;
a->blocks[ii][jj].col = col;
a->blocks[ii][jj].request = MPI_REQUEST_NULL;
if (me == a->blocks[ii][jj].owner) {
//printf("%d]allocating x_%d,%d to fill with %f\n",me,ii,jj, value);
a->blocks[ii][jj].c = calloc(b*b,sizeof(float));
val_mat(b,b,a->blocks[ii][jj].c,value);
} else {
a->blocks[ii][jj].c = NULL;
}
}
}
return 0;
}
int dsmat_fill_s(Matrix* a, int m, int n, int b, int p, int q, char* name) {
int me, node;
MPI_Comm_rank(MPI_COMM_WORLD, &me);
int mb = m/b, nb = n/b;
int ii, jj;
int row, col;
a->mb = mb;
a->nb = nb;
a->b = b;
a->blocks = calloc(mb,sizeof(Block*));
for (ii = 0; ii < mb;ii++) {
a->blocks[ii] = calloc(nb,sizeof(Block));
for (jj = 0; jj < nb;jj++) {
node = get_node(p,q,ii,jj);
node_coordinates_2i(p,q,node,&row,&col);
a->blocks[ii][jj].owner = node;
a->blocks[ii][jj].row = row;
a->blocks[ii][jj].col = col;
a->blocks[ii][jj].request = MPI_REQUEST_NULL;
if (me == a->blocks[ii][jj].owner) {
//printf("%d] s_allocating %s_%d,%d to fill with %f\n",me,name,ii,jj,(float)nb*(ii+1)+(jj+1));
a->blocks[ii][jj].c = calloc(b*b,sizeof(float));
val_mat(b,b,a->blocks[ii][jj].c,(float) nb*(ii+1)+(jj+1));
} else {
a->blocks[ii][jj].c = NULL;
}
}
}
return 0;
}
int dsmat_destroy(Matrix* a, char* name) {
int me;
MPI_Comm_rank(MPI_COMM_WORLD, &me);
int mb = a->mb, nb = a->nb;
//printf("[%d] destroying matrix %s (mb=%d,nb=%d,b=%d)\n",me, name, mb, nb, a->b);
int ii, jj;
Block * a_ij;
for (ii = 0; ii < mb ; ii++) {
for (jj = 0; jj < nb ; jj++) {
a_ij = & a->blocks[ii][jj];
//if (a_ij->c != NULL) { // && a_ij.owner == me) {
if (a_ij->c != NULL && a_ij->owner == me) {
free(a_ij->c);
}
}
free(a->blocks[ii]);
}
free(a->blocks);
return 0;
}
int dsmat_scal_check(Matrix* A, float alpha) {
int i,j;
int me;
if (alpha == 0.0) return 0;
MPI_Comm_rank(MPI_COMM_WORLD, &me);
Block* Aij;
for(i = 0; i < A->mb; i++) {
for(j = 0; j < A->nb; j++) {
Aij = & A->blocks[i][j];
if (Aij->owner == me) {
double computation_amount = 2.0*A->b*A->b*A->b;
cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, A->b, A->b, A->b,
0.0, Aij->c, A->b, Aij->c, A->b,
alpha, Aij->c, A->b);
}
}
}
return 0;
}
int dsmat_scal(Matrix* A, float alpha) {
int i,j;
int me;
if (alpha == 0.0) return 0;
MPI_Comm_rank(MPI_COMM_WORLD, &me);
Block* Aij;
SMPI_SAMPLE_LOCAL(i = 0, i < A->mb, i++, 10, 0.005) {
SMPI_SAMPLE_LOCAL(j = 0, j < A->nb, j++, 10, 0.005) {
Aij = & A->blocks[i][j];
if (Aij->owner == me) {
double computation_amount = 2.0*A->b*A->b*A->b;
cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, A->b, A->b, A->b,
0.0, Aij->c, A->b, Aij->c, A->b,
alpha, Aij->c, A->b);
}
}
}
return 0;
}
// FIXME : remove alpha/beta
int local_outer_product_check(float alpha, Matrix* A, Matrix* B, Matrix* C, int l, int p, int q) {
int i, j, err;
for(i = 0; i < C->mb; i++) {
for(j = 0; j < C->nb; j++) {
err = compute_local_op(alpha, A, B, C, i, j, l);
if (err != 0) return 1;
}
}
/* free useless memory */
free_local_op(A, B, l, p, q);
return 0;
}
int local_outer_product(float alpha, Matrix* A, Matrix* B, Matrix* C, int l, int p, int q) {
int i, j, err;
SMPI_SAMPLE_LOCAL(i = 0, i < C->mb, i++, 10, 0.005) {
SMPI_SAMPLE_LOCAL(j = 0, j < C->nb, j++, 10, 0.005) {
err = compute_local_op(alpha, A, B, C, i, j, l);
if (err != 0) return 1;
}
}
/* free useless memory */
free_local_op(A, B, l, p, q);
return 0;
}
int compute_local_op(float alpha, Matrix* A, Matrix* B, Matrix* C, int i, int j, int l) {
int me;
int b;
Block *Ail, *Blj, *Cij;
MPI_Comm_rank(MPI_COMM_WORLD, &me);
Cij = & C->blocks[i][j];
b = C->b;
if (Cij->owner == me) {
Ail = & A->blocks[i][l];
if (Ail->c == NULL) { return 1; }
Blj = & B->blocks[l][j];
if (Blj->c == NULL) { return 2; }
// TRACE_host_set_state(COMPUTE);
cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, b,b,b,
alpha, Ail->c, b, Blj->c, b,
1.0, Cij->c, b);
// TRACE_host_set_state(IDLE);
}
return 0;
}
int free_local_op(Matrix* A, Matrix* B, int l, int p, int q) {
int i,j;
int me, me_coord[2];
MPI_Comm_rank(MPI_COMM_WORLD, &me);
node_coordinates(p,q,me,me_coord);
Block *Ail, *Blj;
for (i = 0; i < A->mb; i++) {
Ail = & A->blocks[i][l];
if (Ail->owner != me && Ail->c != NULL) {
free(Ail->c);
Ail->c = NULL;
}
}
for (j = 0; j < B->nb; j++) {
Blj = & B->blocks[l][j];
if (Blj->owner != me && Blj->c != NULL) {
free(Blj->c);
Blj->c = NULL;
}
}
return 0;
}
int block_copy(float * a, float * b, int m, int n) {
int i, j;
for (i = 0; i < m ; i++) {
for (j = 0; j < n ; j++) {
a[n*i+j] = b[n*i+j];
}
}
return 0;
}
int block_print(float * a, int m, int n, char* name) {
int i, j;
printf("block %s\n", name);
for (i = 0; i < m ; i++) {
for (j = 0; j < n ; j++) {
printf("%9.2f\t", a[n*i+j]);
}
printf("\n");
}
printf("\n");
return 0;
}
// A <- B
int dsmat_copy(Matrix * A, Matrix * B) {
int i, j;
int me;
int mb, nb, b;
Block *Aij, *Bij;
MPI_Comm_rank(MPI_COMM_WORLD, &me);
A->mb = B->mb;
A->nb = B->nb;
A->b = B->b;
mb = A->mb;
nb = A->nb;
b = A->b;
A->blocks = calloc(mb, sizeof(Block*));
for (i = 0; i<mb;i++){
A->blocks[i] = calloc(nb, sizeof(Block));
for (j = 0; j<nb;j++){
Aij = & A->blocks[i][j];
Bij = & B->blocks[i][j];
Aij->owner = Bij->owner;
Aij->row = Bij->row;
Aij->col = Bij->col;
Aij->request = MPI_REQUEST_NULL;
if (Bij->owner == me) {
Aij->c = calloc(b*b,sizeof(float));
block_copy(Aij->c, Bij->c, b, b);
}
}
}
return 0;
}
int dsmat_copy_to(Matrix * A, Matrix * B, int rcv, char* copy, char* copied) {
int i, j, l;
int me,tag;
int mb, nb, b;
Block *Aij, *Bij;
float* localA;
MPI_Status status;
MPI_Comm_rank(MPI_COMM_WORLD, &me);
A->nb = 1;
A->mb = 1;
A->b = -1;
mb = B->mb;
nb = B->nb;
b = B->b;
tag = 0;
A->blocks = malloc(sizeof(Block*));
A->blocks[0] = malloc(sizeof(Block));
Aij = & A->blocks[0][0];
Aij->owner = rcv;
Aij->row = -1;
Aij->col = -1; // not on a grid ...
Aij->request = MPI_REQUEST_NULL;
if (me == rcv) {
Aij->c = malloc(mb*b*nb*b *sizeof(float));
}
for (i = 0; i<mb;i++){
for (j = 0; j<nb;j++){
Bij = & B->blocks[i][j];
if (Bij->owner == me) {
if (rcv != me) {
MPI_Send(Bij->c, b*b, MPI_FLOAT,
rcv, tag,
MPI_COMM_WORLD);
} else {
for (l = 0; l<b; l++) {
block_copy(&Aij->c[nb*i*b*b+j*b+l*nb*b], Bij->c, 1, b);
}
}
} else if (me == rcv) {
localA = malloc(b*b*sizeof(float));
MPI_Recv(localA, b*b, MPI_FLOAT,
Bij->owner, tag,
MPI_COMM_WORLD,&status);
for (l = 0; l<b; l++) {
block_copy(&Aij->c[nb*i*b*b+j*b+l*nb*b], localA, 1, b);
}
free(localA);
}
}
}
return 0;
}

62
TP2/src/dsmat.h Normal file
View file

@ -0,0 +1,62 @@
#ifndef DENSE_MAT_FNCT_H
#define DENSE_MAT_FNCT_H
typedef struct Blocks {
float* c; // The Content of the block stored in an array.
// This pointer is only meaningful to the owner
// otherwise it is NULL.
// Element x_i,j of a given block of size b
// can be accessed as x->c[b*i+j].
int owner; // The MPI rank of the owner of this block.
// This information is available to all the nodes.
int row, col; // owner = row * q + col in a p x q grid.
MPI_Request request; // The Request can be used when sending the block
// through Immediate Return routines of MPI such as MPI_Irecv
} Block;
typedef struct Matrices {
int mb, nb, b; // A given Matrix is of size mb*b x nb*b, b being the
// dimension of every of its square blocks i.e.
// nb is the number of column blocks, mb the one of row blocks -- oof
Block** blocks; // This 2D array describes each block of a given Matrix.
// This is meaningful to all the nodes : information on a block A_i,j
// from a matrix A can be accessed through the block A->blocks[i][j] from every MPI rank.
} Matrix;
// tracing
void init_trace();
/* dense matrices routines */
// fill matrix a with values matching the position of the block in the matrix
// i.e. block a_i,j is full of n*(i+1)+(j+1) with a of size m x n
int dsmat_fill_s(Matrix* a, int m, int n, int b, int p, int q, char* name);
// destroy matrix a
int dsmat_destroy(Matrix* a, char* name);
// scale matrix a by alpha
int dsmat_scal_check(Matrix* a, float alpha);
int dsmat_scal(Matrix* a, float alpha);
int dsmat_fill_v(Matrix* a, int m, int n, int b, int p, int q, char* name, float value);
/* dense matrices copy */
// copy a[0:m-1,0:n-1] into b[0:m-1,0:n-1]
int block_copy(float * a, float * b, int m, int n);
// print a[0:m-1,0:n-1]
int block_print(float * a, int m, int n, char* name);
// copy matrix B into matrix A
int dsmat_copy(Matrix * A, Matrix * B);
// copy matrix B into matrix A owned only by rank rcv
int dsmat_copy_to(Matrix * A, Matrix * B, int rcv, char* copy, char* copied);
/* gemm generic routines */
// computing C += A:l * Bl: for all blocks of C I own using compute_local_op
// matrices A and B that I do not own are freed from memory using free_local_op
int local_outer_product_check(float alpha, Matrix* A, Matrix* B, Matrix* C, int l, int p, int q);
int local_outer_product(float alpha, Matrix* A, Matrix* B, Matrix* C, int l, int p, int q);
// compute C_i,j += A_i,l * B_l,j
// if a given block is missing, the corresponding computation is skipped
int compute_local_op(float alpha, Matrix* A, Matrix* B, Matrix* C, int i, int j, int l);
// free A:l and Bl: from memory is I do not own them
int free_local_op(Matrix* A, Matrix* B, int l, int p, int q);
#endif

88
TP2/src/ex1.c Normal file
View file

@ -0,0 +1,88 @@
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include <cblas.h>
#include "utils.h"
#include "dsmat.h"
#include "gemms.h"
void p2p_transmit_A(int p, int q, Matrix *A, int i, int l)
{
int j;
int me, my_row, my_col;
MPI_Status status;
MPI_Comm_rank(MPI_COMM_WORLD, &me);
node_coordinates_2i(p, q, me, &my_row, &my_col);
Block *Ail;
int node, tag, b;
tag = 0;
Ail = &A->blocks[i][l];
b = A->b;
/* TODO : transmit A[i,l] using MPI_Ssend & MPI_Recv */
if (Ail->owner == me)
{ // I own A[i,l]
/* MPI_Ssend A[i,l] to my row */
for (j = 0; j < q; j++)
{
node = get_node(p, q, my_row, j);
if (node != me)
{
// printf("%d Sending A[%d,%d] to node %d\n", my_rank, i, l, node);
MPI_Ssend(Ail->c, b * b, MPI_FLOAT, node, tag, MPI_COMM_WORLD);
// printf("%d Sent A[%d,%d] to node %d\n", my_rank, i, l, node);
}
}
}
else if (Ail->row == my_row)
{ // A[i,l] is stored on my row
Ail->c = malloc(b * b * sizeof(float));
/* MPI_Recv A[i,l] */
// printf("%d Receiving A[%d,%d] from node %d\n", my_rank, i, l, node);
MPI_Recv(Ail->c, b * b, MPI_FLOAT, Ail->owner, tag, MPI_COMM_WORLD, &status);
// printf("%d Received A[%d,%d] from node %d\n", my_rank, i, l, node);
}
/* end TODO */
}
void p2p_transmit_B(int p, int q, Matrix *B, int l, int j)
{
int i;
int me, my_row, my_col;
MPI_Status status;
MPI_Comm_rank(MPI_COMM_WORLD, &me);
node_coordinates_2i(p, q, me, &my_row, &my_col);
int node, tag, b;
tag = 1;
Block *Blj;
Blj = &B->blocks[l][j];
b = B->b;
/* TODO : transmit B[l,j] using MPI_Ssend & MPI_Recv */
if (Blj->owner == me)
{ // I owned B[l,j]
/* MPI_Ssend B[l,j] to my column */
for (i = 0; i < p; i++)
{
node = get_node(p, q, i, my_col);
if (node != me)
{
// printf("%d Sending B[%d,%d] to node %d\n", me, l, j, node);
MPI_Ssend(Blj->c, b * b, MPI_FLOAT, node, tag, MPI_COMM_WORLD);
// printf("%d Sent B[%d,%d] to node %d\n", me, l, j, node);
}
}
}
else if (Blj->col == my_col)
{ // B[l,j] is stored on my column
Blj->c = malloc(b * b * sizeof(float));
/* MPI_Recv B[l,j] */
// printf("%d Receiving B[%d,%d] from node %d\n", me, l, j, node);
MPI_Recv(Blj->c, b * b, MPI_FLOAT, Blj->owner, tag, MPI_COMM_WORLD, &status);
// printf("%d Received B[%d,%d] from node %d\n", me, l, j, node);
}
/* end TODO */
}

63
TP2/src/ex1.c.clem Normal file
View file

@ -0,0 +1,63 @@
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include <cblas.h>
#include "utils.h"
#include "dsmat.h"
#include "gemms.h"
void p2p_transmit_A(int p, int q, Matrix *A, int i, int l) {
int j;
int me, my_row, my_col;
MPI_Status status;
MPI_Comm_rank(MPI_COMM_WORLD, &me);
node_coordinates_2i(p,q,me,&my_row,&my_col);
Block *Ail;
int node, tag, b;
Ail = & A->blocks[i][l];
b = A->b;
/* TODO : transmit A[i,l] using MPI_Ssend & MPI_Recv */
if (Ail->owner == me /* I own A[i,l]*/) {
/* MPI_Ssend A[i,l] to my row */
for (j = 0; j < q; j++) {
node = get_node(p, q, my_row, j);
if (node != me)
MPI_Ssend(Ail->c, b*b, MPI_FLOAT, node, 0, MPI_COMM_WORLD);
}
} else if (Ail->row == my_row /* A[i,l] is stored on my row */) {
Ail->c = malloc(b*b*sizeof(float));
/* MPI_Recv A[i,l] */
MPI_Recv(Ail->c, b*b, MPI_FLOAT, Ail->owner, 0, MPI_COMM_WORLD, &status);
}
/* end TODO */
}
void p2p_transmit_B(int p, int q, Matrix *B, int l, int j) {
int i;
int me, my_row, my_col;
MPI_Status status;
MPI_Comm_rank(MPI_COMM_WORLD, &me);
node_coordinates_2i(p,q,me,&my_row,&my_col);
int node, tag, b;
Block *Blj;
Blj = & B->blocks[l][j];
b = B->b;
/* TODO : transmit B[l,j] using MPI_Ssend & MPI_Recv */
if (Blj->owner == me /* I owned B[l,j]*/) {
/* MPI_Ssend B[l,j] to my column */
for (i = 0; i < p; i++) {
node = get_node(p, q, i, my_col);
if (node != me)
MPI_Ssend(Blj->c, b*b, MPI_FLOAT, node, 1, MPI_COMM_WORLD);
}
} else if (Blj->col == my_col /* B[l,j] is stored on my column */) {
Blj->c = malloc(b*b*sizeof(float));
/* MPI_Recv B[l,j] */
MPI_Recv(Blj->c, b*b, MPI_FLOAT, Blj->owner, 1, MPI_COMM_WORLD, &status);
}
/* end TODO */
}

5
TP2/src/ex1.h Normal file
View file

@ -0,0 +1,5 @@
#ifndef EXO_1_H
#define EXO_1_H
void p2p_transmit_A(int p, int q, Matrix *A, int i, int l);
void p2p_transmit_B(int p, int q, Matrix *B, int l, int j);
#endif

53
TP2/src/ex2.c Normal file
View file

@ -0,0 +1,53 @@
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include <cblas.h>
#include "utils.h"
#include "dsmat.h"
#include "gemms.h"
void bcast_A(int p, int q, Matrix *A, int i, int l, MPI_Comm row_comm)
{
int me, my_row, my_col;
MPI_Comm_rank(MPI_COMM_WORLD, &me);
node_coordinates_2i(p, q, me, &my_row, &my_col);
Block *Ail;
int b = A->b;
Ail = &A->blocks[i][l];
/* TODO : transmit A[i,l] using MPI_Bcast */
if (q > 1 && Ail->row == my_row)
{ /* Ail is stored on my row */
if (Ail->owner != me)
{
Ail->c = calloc(b * b, sizeof(float));
}
// MPI_Bcast
MPI_Bcast(Ail->c, b * b, MPI_FLOAT, Ail->col, row_comm);
}
/* end TODO */
}
void bcast_B(int p, int q, Matrix *B, int l, int j, MPI_Comm col_comm)
{
int me, my_row, my_col;
MPI_Comm_rank(MPI_COMM_WORLD, &me);
node_coordinates_2i(p, q, me, &my_row, &my_col);
Block *Blj;
int b = B->b;
Blj = &B->blocks[l][j];
/* TODO : transmit B[l,j] using MPI_Bcast */
if (p > 1 && Blj->col == my_col)
{ /* Blj is stored on my column */
if (Blj->owner != me)
{
Blj->c = calloc(b * b, sizeof(float));
}
// MPI_Bcast
MPI_Bcast(Blj->c, b * b, MPI_FLOAT, Blj->row, col_comm);
}
/* end TODO */
}

5
TP2/src/ex2.h Normal file
View file

@ -0,0 +1,5 @@
#ifndef EXO_2_H
#define EXO_2_H
void bcast_A(int p, int q, Matrix *A, int i, int l, MPI_Comm row_comm);
void bcast_B(int p, int q, Matrix *B, int l, int j, MPI_Comm col_comm);
#endif

108
TP2/src/ex3.c Normal file
View file

@ -0,0 +1,108 @@
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include <cblas.h>
#include "utils.h"
#include "dsmat.h"
#include "gemms.h"
void p2p_i_transmit_A(int p, int q, Matrix *A, int i, int l)
{
int j, b;
int me, my_row, my_col;
MPI_Comm_rank(MPI_COMM_WORLD, &me);
node_coordinates_2i(p, q, me, &my_row, &my_col);
int node, tag;
tag = 0;
Block *Ail;
Ail = &A->blocks[i][l];
b = A->b;
/* TODO : transmit A[i,l] using MPI_Isend/recv */
if (Ail->owner == me)
{
// MPI_Isend Ail to my row
for (j = 0; j < q; j++)
{
node = get_node(p, q, my_row, j);
if (node != me)
{
MPI_Isend(Ail->c, b * b, MPI_FLOAT, node, tag, MPI_COMM_WORLD, &Ail->request);
}
}
}
else if (Ail->row == my_row)
{
Ail->c = calloc(b * b, sizeof(float));
// MPI_Irecv Ail
MPI_Irecv(Ail->c, b * b, MPI_FLOAT, Ail->owner, tag, MPI_COMM_WORLD, &Ail->request);
}
/* end TODO */
}
void p2p_i_transmit_B(int p, int q, Matrix *B, int l, int j)
{
int i, b;
int me, my_row, my_col;
MPI_Comm_rank(MPI_COMM_WORLD, &me);
node_coordinates_2i(p, q, me, &my_row, &my_col);
int node, tag;
tag = 1;
Block *Blj;
Blj = &B->blocks[l][j];
b = B->b;
/* TODO : transmit B[l,j] using MPI_Isend/recv */
if (Blj->owner == me)
{
// MPI_Isend Blj to my col
for (i = 0; i < p; i++)
{
node = get_node(p, q, i, my_col);
if (node != me)
{
MPI_Isend(Blj->c, b * b, MPI_FLOAT, node, tag, MPI_COMM_WORLD, &Blj->request);
}
}
}
else if (Blj->col == my_col)
{
Blj->c = calloc(b * b, sizeof(float));
// MPI_Irecv Blj
MPI_Irecv(Blj->c, b * b, MPI_FLOAT, Blj->owner, tag, MPI_COMM_WORLD, &Blj->request);
}
/* end TODO */
}
void p2p_i_wait_AB(int p, int q, Matrix *A, Matrix *B, Matrix *C, int l)
{
int me, my_row, my_col;
MPI_Comm_rank(MPI_COMM_WORLD, &me);
node_coordinates_2i(p, q, me, &my_row, &my_col);
int i, j;
Block *Ail, *Blj;
/* TODO : wait for A[i,l] and B[l,j] if I need them */
for (i = 0; i < A->mb; i++)
{
Ail = &A->blocks[i][l];
if (Ail->owner != me && Ail->row == my_row)
{
// MPI_Wait Ail
MPI_Wait(&Ail->request, MPI_STATUS_IGNORE);
}
}
for (j = 0; j < B->nb; j++)
{
Blj = &B->blocks[l][j];
if (Blj->owner != me && Blj->col == my_col)
{
// MPI_Wait Blj
MPI_Wait(&Blj->request, MPI_STATUS_IGNORE);
}
}
/* Alternative suggestion : iterate over blocks of C */
/* end TODO */
}

6
TP2/src/ex3.h Normal file
View file

@ -0,0 +1,6 @@
#ifndef EXO_3_H
#define EXO_3_H
void p2p_i_transmit_A(int p, int q, Matrix *A, int i, int l);
void p2p_i_transmit_B(int p, int q, Matrix *B, int l, int j);
void p2p_i_wait_AB(int p, int q, Matrix *A, Matrix* B, Matrix* C,int l);
#endif

157
TP2/src/gemms.c Normal file
View file

@ -0,0 +1,157 @@
#include <stdlib.h>
#include <stdio.h>
#include <mpi.h>
#include <cblas.h>
#include "utils.h"
#include "dsmat.h"
#include "gemms.h"
#include "ex1.h"
#include "ex2.h"
#include "ex3.h"
int pgemm_p2p(int check, int p, int q, int m, int n, int k, Matrix* A, Matrix* B, Matrix* C) {
int mb, nb, kb;
int i, j, l;
int me, me_coord[2], my_row, my_col;
MPI_Comm_rank(MPI_COMM_WORLD, &me);
node_coordinates(p,q,me,me_coord);
node_coordinates_2i(p,q,me,&my_row,&my_col);
if (A->nb != B->mb || A->mb != C-> mb || B->nb != C->nb) {
if (me == 0) {
printf(" A B C\n");
printf(" mb %d %d %d\n", A->mb, B->mb, C->mb);
printf(" nb %d %d %d\n", A->nb, B->nb, C->nb);
}
return 1;
}
if (B->b != A->b || A->b != C-> b) return 2;
mb = C->mb;
nb = C->nb;
kb = A->nb;
for (l = 0; l < kb; l++) {
for (i = 0; i < mb; i++) {
p2p_transmit_A(p,q,A,i,l);
}
for (j = 0; j < nb; j++) {
p2p_transmit_B(p,q,B,l,j);
}
if (check) {
local_outer_product_check(1.0f, A, B, C, l, p, q);
} else {
local_outer_product(1.0f, A, B, C, l, p, q);
}
}
// printf("FINI\n");
return 0;
}
int pgemm_bcast(int check, int p, int q, int m, int n, int k, Matrix* A, Matrix* B, Matrix* C) {
int mb, nb, kb;
int i, j, l;
int me, me_row_comm, me_col_comm, me_coord[2];
int my_row, my_col;
MPI_Comm row_comm, col_comm;
MPI_Comm_rank(MPI_COMM_WORLD, &me);
if (A->nb != B->mb || A->mb != C-> mb || B->nb != C->nb) {
if (me == 0) {
printf(" A B C\n");
printf(" mb %d %d %d\n", A->mb, B->mb, C->mb);
printf(" nb %d %d %d\n", A->nb, B->nb, C->nb);
}
return 1;
}
if (B->b != A->b || A->b != C-> b) return 2;
mb = C->mb;
nb = C->nb;
kb = A->nb;
node_coordinates(p,q,me,me_coord);
node_coordinates_2i(p,q,me,&my_row, &my_col);
if (q > 1) {
MPI_Comm_split(MPI_COMM_WORLD, my_row, me, &row_comm);
MPI_Comm_rank(row_comm, &me_row_comm);
} else {
me_row_comm = -1;
}
if (p > 1) {
MPI_Comm_split(MPI_COMM_WORLD, my_col, me, &col_comm);
MPI_Comm_rank(col_comm, &me_col_comm);
} else {
me_col_comm = -1;
}
for (l = 0; l < kb ; l++) {
for (i = 0; i < mb; i++) {
bcast_A(p,q,A,i,l,row_comm);
}
for (j = 0; j < nb; j++) {
bcast_B(p,q,B,l,j,col_comm);
}
if (check) {
local_outer_product_check(1.0f, A, B, C, l, p, q);
} else {
local_outer_product(1.0f, A, B, C, l, p, q);
}
}
if (q > 1)
MPI_Comm_free(&row_comm);
if (p > 1)
MPI_Comm_free(&col_comm);
return 0;
}
int pgemm_p2p_i_la(int check, int p, int q, int lookahead, int m, int n, int k, Matrix* A, Matrix* B, Matrix* C) {
int mb, nb, kb;
int i, j, l;
int me, me_coord[2],my_row, my_col;
MPI_Comm_rank(MPI_COMM_WORLD, &me);
node_coordinates(p,q,me,me_coord);
node_coordinates_2i(p,q,me,&my_row,&my_col);
if (A->nb != B->mb || A->mb != C-> mb || B->nb != C->nb) {
if (me == 0) {
printf(" A B C\n");
printf(" mb %d %d %d\n", A->mb, B->mb, C->mb);
printf(" nb %d %d %d\n", A->nb, B->nb, C->nb);
}
return 1;
}
if (B->b != A->b || A->b != C-> b) return 2;
mb = C->mb;
nb = C->nb;
kb = A->nb;
if (lookahead <= 0) return 3;
if (lookahead >= kb) lookahead = kb;
//printf("LA = %d, KB = %d\n",lookahead, kb);
for (l = 0; l < lookahead ; l++) {
for (i = 0; i < mb; i++) {
p2p_i_transmit_A(p,q,A,i,l);
}
for (j = 0; j < nb; j++) {
p2p_i_transmit_B(p,q,B,l,j);
}
}
for (l = 0; l < kb ; l++) {
if (l < kb - lookahead) { // "kb-th" lookahead : kb = l + lookahead
for (i= 0; i < mb; i++) {
p2p_i_transmit_A(p,q,A,i,l+lookahead);
}
for (j= 0; j < nb; j++) {
p2p_i_transmit_B(p,q,B,l+lookahead,j);
}
}
p2p_i_wait_AB(p,q,A,B,C,l);
if (check) {
local_outer_product_check(1.0f, A, B, C, l, p, q);
} else {
local_outer_product(1.0f, A, B, C, l, p, q);
}
}
return 0;
}

9
TP2/src/gemms.h Normal file
View file

@ -0,0 +1,9 @@
#ifndef PROGPARALLEL_GEMMS_H
#define PROGPARALLEL_GEMMS_H
int pgemm_p2p(int check, int p, int q, int m, int n, int k, Matrix* A, Matrix* B, Matrix* C);
int pgemm_bcast(int check, int p, int q, int m, int n, int k, Matrix* A, Matrix* B, Matrix* C);
//int pgemm_p2p_i(int p, int q, int m, int n, int k, Matrix* A, Matrix* B, Matrix* C);
int pgemm_p2p_i_la(int check, int p, int q, int lookahead, int m, int n, int k, Matrix* A, Matrix* B, Matrix* C);
#endif

270
TP2/src/main.c Normal file
View file

@ -0,0 +1,270 @@
#include <mpi.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <float.h>
#include <cblas.h>
#include <time.h>
#include <argp.h>
#include "utils.h"
#include "dsmat.h"
#include "gemms.h"
static char doc[] =
"TP Prog Parallèle -- Ligne de commande";
static char args_doc[] = "-m [m] -n [n] -k [k] -b [b] -p [p] -q [q] --algorithm [p2p|p2p-i-la|bcast] --lookahead [la] --niter [i]";
static struct argp_option options[] = {
{"m", 'm', "int", 0, "Number of rows in A and C (deprecated)" },
{"n", 'n', "int", 0, "Dimension of A B and C" },
{"k", 'k', "int", 0, "Shared dimension of A and B (deprecated)" },
{"blocking", 'b', "int", 0, "Size of the square block of A, B and C (must divide m,n and k" },
{"p", 'p', "int", 0, "Length of the logical grid"},
{"q", 'q', "int", 0, "Width of the logical grid"},
{"algorithm",'a', "string", 0, "GEMM distributed algorithm to use"},
{"lookahead",'l', "int", 0, "Parameter for p2p-i-la algorithm"},
{"verbose", 'v', 0, 0, "If the program print more"},
{"checking", 'c', 0, 0, "If the program checks gemm results"},
{"niter", 'i', "int", 0, "Number of iterations"},
{ 0 }
};
struct arguments
{
int m, n, k, b;
int p, q;
int la;
char* algo;
int verbose, check;
int iter;
};
static error_t
parse_opt (int key, char *arg, struct argp_state *state)
{
/* Get the input argument from argp_parse, which we
know is a pointer to our arguments structure. */
struct arguments *arguments = state->input;
switch (key)
{
case 'm':
arguments->m = atoi(arg);
break;
case 'n':
arguments->n = atoi(arg);
break;
case 'k':
arguments->k = atoi(arg);
break;
case 'b':
arguments->b = atoi(arg);
break;
case 'p':
arguments->p = atoi(arg);
break;
case 'q':
arguments->q = atoi(arg);
break;
case 'l':
arguments->la = atoi(arg);
break;
case 'a':
arguments->algo = arg;
break;
case 'v':
arguments->verbose = 1;
break;
case 'c':
arguments->check = 1;
break;
case 'i':
arguments->iter = atoi(arg);
break;
default:
return ARGP_ERR_UNKNOWN;
}
return 0;
}
static struct argp argp = { options, parse_opt, args_doc, doc };
// void print_res(Matrix C, char* algo) {
// int i,j;
// int size, rank;
// MPI_Comm_size(MPI_COMM_WORLD, &size);
// MPI_Comm_rank(MPI_COMM_WORLD, &rank);
// char name[100];
// for (i=0;i<C.mb;i++) {
// for (j=0;j<C.nb;j++) {
// sprintf(name,"resC[%d,%d](%s)",i,j,algo);
// if (C.blocks[i][j].owner == rank)
// block_print(C.blocks[i][j].c, C.b, C.b, name);
// }
// }
// }
void gflops_gemm(int m, int n, int k, float exec_time, double* gflops) {
(*gflops) = 2.0*m*n*k/(exec_time*pow(10,9));
}
int main(int argc, char* argv[]) {
struct arguments arguments;
arguments.m = 20;
arguments.n = 20;
arguments.k = 20;
arguments.b = 10;
arguments.p = 2;
arguments.q = 2;
arguments.algo = "p2p";
arguments.la = 0;
arguments.verbose = 0;
arguments.check = 0;
arguments.iter = 1;
int p, q;
int m,n,k,b;
int la;
int err, iter, niter;
double d_start, d_stop; // on multiple nodes
clock_t t; // on one node
double time_taken, gflops;
char hostname[1024];
char * algo;
int vbose, check;
argp_parse (&argp, argc, argv, 0, 0, &arguments);
m = arguments.m;
n = arguments.n;
k = arguments.k;
b = arguments.b;
p = arguments.p;
q = arguments.q;
algo = arguments.algo;
la = arguments.la;
vbose = arguments.verbose;
check = arguments.check;
niter = arguments.iter;
if (strcmp(algo,"p2p")*strcmp(algo,"p2p-i-la")*strcmp(algo,"bcast") != 0) {
printf("Wrong value for algo, only p2p, p2p-i-la and p2p-bcast authorized\n");
return 1;
}
if (b < 0) { printf("Wrong value for B, should be non-negative\n"); return 1; }
if (m%b != 0) { printf("M should be divisible by B\n"); return 1; }
if (n%b != 0) { printf("N should be divisible by B\n"); return 1; }
if (k%b != 0) { printf("K should be divisible by B\n"); return 1; }
if (niter < 0) { printf("Wrong value for niter, should be non-negative\n"); return 1; }
get_host_name(hostname,1024);
init_trace();
// openblas_set_num_threads(1);
srand(time(NULL));
MPI_Init(NULL,NULL);
int size, rank;
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
if (vbose)
//printf("I am the %d-th node in a world of size %d\n", rank, size);
printf("%s is the %d-th node in a world of size %d\n", hostname, rank, size);
if (p*q != size) {
printf("bad world size (p*q != size)\n");
return 1;
}
// this initialization could probably get better
Matrix A = (Matrix){0}, B = (Matrix){0}, C = (Matrix){0};
Matrix wA = (Matrix){0},wB = (Matrix){0},wC = (Matrix){0}, bwC = (Matrix){0};
Matrix bA = (Matrix){0},bB = (Matrix){0},bC = (Matrix){0};
if (vbose)
printf("[%s] m,n,k = %d,%d,%d | b = %d | pxq = %dx%d | la = %d \n",
hostname, m,n,k, b, p,q, la);
// printf("[%d] m,n,k = %d,%d,%d | b = %d | pxq = %dx%d | la = %d \n",
// rank, m,n,k, b, p,q, la);
//err = dsmat_fill_v(&A, m, k, b, p, q, "A", 1.0f);
//err = dsmat_fill_v(&B, k, n, b, p, q, "B", 1.0f);
err = dsmat_fill_s(&A, m, k, b, p, q, "A");
err = dsmat_fill_s(&B, k, n, b, p, q, "B");
//err = dsmat_fill(&A, m, k, b, p, q, "A");
//err = dsmat_fill(&B, k, n, b, p, q, "B");
err = dsmat_fill_v(&C, m, n, b, p, q, "C", 0.0f);
err = MPI_Barrier(MPI_COMM_WORLD);
if (err != MPI_SUCCESS) return 1;
for (iter = 0; iter < niter; iter++) {
err = dsmat_copy(&wA,&A);
err = dsmat_copy(&wB,&B);
err = dsmat_copy(&wC,&C);
MPI_Barrier(MPI_COMM_WORLD);
d_start = MPI_Wtime();
if (strcmp(algo,"p2p") == 0) {
err = pgemm_p2p(check,p,q,m,n,k,&wA,&wB,&wC);
// } else if (strcmp(algo,"p2p-i") == 0) {
// err = pgemm_p2p_i(p,q,m,n,k,&wA,&wB,&wC);
} else if (strcmp(algo,"p2p-i-la") == 0) {
err = pgemm_p2p_i_la(check,p,q,la,m,n,k,&wA,&wB,&wC);
} else if (strcmp(algo,"bcast") == 0) {
err = pgemm_bcast(check,p,q,m,n,k,&wA,&wB,&wC);
}
MPI_Barrier(MPI_COMM_WORLD);
d_stop = MPI_Wtime();
gflops_gemm(m,n,k, d_stop - d_start, &gflops);
if (rank == 0) {
//printf("[%d] (%s) measured_wtime = %fs (la=%d) | %f Gflop/s\n", rank, algo, d_stop - d_start, la, gflops);
printf("[%s] (%s) measured_wtime = %fs (la=%d) | %f Gflop/s\n", hostname, algo, d_stop - d_start, la, gflops);
}
MPI_Barrier(MPI_COMM_WORLD);
if (check) {
err = dsmat_copy_to(&bwC,&wC,0,"bwC","wC");
err = dsmat_copy_to( &bA, &A,0, "bA", "A");
err = dsmat_copy_to( &bB, &B,0, "bB", "B");
err = dsmat_copy_to( &bC, &C,0, "bC", "C");
MPI_Barrier(MPI_COMM_WORLD);
if (rank == 0) {
if (vbose) {
block_print(bwC.blocks[0][0].c, m, n, algo);
block_print( bA.blocks[0][0].c, m, k, "gA");
block_print( bB.blocks[0][0].c, k, n, "gB");
block_print( bC.blocks[0][0].c, m, n, "gC");
}
t = clock();
cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m,n,k,
1.0f, bA.blocks[0][0].c, k, bB.blocks[0][0].c, n,
0.0f, bC.blocks[0][0].c, n);
t = clock() - t;
time_taken = ((double)t/CLOCKS_PER_SEC);
gflops_gemm(m,n,k, time_taken, &gflops);
//printf("[%d] (g) measured_wtime = %fs | %f Gflop/s\n", rank, time_taken, gflops);
printf("[%s] (g) measured_wtime = %fs | %f Gflop/s\n", hostname, time_taken, gflops);
if (vbose)
block_print(bC.blocks[0][0].c, m, n, "gresC");
myblas_sgepxy(-1.0,bC.blocks[0][0].c,bwC.blocks[0][0].c, m,n);
float nrm = cblas_snrm2(m*n,bwC.blocks[0][0].c,1);
if (nrm < DBL_EPSILON) printf("GEMM is correct (%12.5e)\n",nrm);
else printf("algorithm is not GEMM by %12.5e\n", nrm);
}
err = MPI_Barrier(MPI_COMM_WORLD);
err = dsmat_destroy(&bwC,"bwC");
err = dsmat_destroy( &bA,"bA");
err = dsmat_destroy( &bB,"bB");
err = dsmat_destroy( &bC,"bC");
}
MPI_Barrier(MPI_COMM_WORLD);
err = dsmat_destroy(&wA,"wA");
err = dsmat_destroy(&wB,"wB");
err = dsmat_destroy(&wC,"wC");
}
err = MPI_Barrier(MPI_COMM_WORLD);
err = dsmat_destroy(&A,"A");
err = dsmat_destroy(&B,"B");
err = dsmat_destroy(&C,"C");
if (vbose)
printf("[%s] matrices destroyed (%d) \n", hostname, err);
//printf("[%d] matrices destroyed (%d) \n", rank, err);
return MPI_Finalize();
}

74
TP2/src/test.c Normal file
View file

@ -0,0 +1,74 @@
#include <mpi.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <float.h>
#include <cblas.h>
#include <time.h>
#include "utils.h"
#include "dsmat.h"
#include "gemms.h"
int main(int argc, char* argv[]) {
int p, q;
int m,n,k,b;
int i,j,l,la;
int err, iter, niter;
double d_start, d_stop; // on multiple nodes
clock_t t; // on one node
double time_taken, gflops;
int node,tag;
long unsigned int total_us;
char name[100];
char * algo;
int vbose, check;
MPI_Status status;
m = 2;
n = 4;
k = 4;
b = 2;
p = 1;
q = 2;
// openblas_set_num_threads(1);
srand(time(NULL));
MPI_Init(NULL,NULL);
int world_size, world_rank;
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
printf("I am the %d-th node in a world of size %d\n", world_rank, world_size);
if (p*q != world_size) {
printf("bad world size\n");
return 1;
}
err = MPI_Barrier(MPI_COMM_WORLD);
if (err != MPI_SUCCESS) return 1;
// this initialization could probably get better
Matrix A = (Matrix){0},B = (Matrix){0},C = (Matrix){0};
Matrix bA = (Matrix){0},bB = (Matrix){0},bC= (Matrix){0};
Matrix wA = (Matrix){0},wB = (Matrix){0},wC= (Matrix){0}, bwC= (Matrix){0};
printf("[%d] m,n,k = %d,%d,%d | b = %d | pxq = %dx%d | la = %d | test %f \n",
world_rank, m,n,k, b, p,q, la, 1.0f);
err = dsmat_fill_s(&A, m, k, b, p, q, "A");
err = MPI_Barrier(MPI_COMM_WORLD);
if (err != MPI_SUCCESS) return 1;
err = dsmat_copy(&wA,&A);
MPI_Barrier(MPI_COMM_WORLD);
err = dsmat_copy_to(&wC,&A,0,"wC","A");
printf("%d ] dsmat_copy_to.err = %d\n", world_rank, err);
err = dsmat_destroy(&wA,"wA");
err = dsmat_copy(&wA,&A);
err = dsmat_destroy(&wA,"wA");
err = dsmat_copy(&wA,&A);
err = dsmat_destroy(&wC,"wC");
err = dsmat_destroy(&A,"A");
err = MPI_Barrier(MPI_COMM_WORLD);
printf("[%d] matrices destroyed (%d) \n", world_rank, err);
return MPI_Finalize();
}

85
TP2/src/utils.c Normal file
View file

@ -0,0 +1,85 @@
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <cblas.h>
//#include <time.h>
#include <sys/time.h>
#include "utils.h"
void val_mat(int m, int n, float* mat, float val) {
int i,j;
for(i = 0; i<m; i++) {
for(j = 0; j<n; j++) {
mat[i*n+j] = val;
}
}
}
void rand_mat(int m, int n, float* mat, float max) {
int i,j;
for(i = 0; i<m; i++)
for(j = 0; j<n; j++)
mat[i*n+j] = ((float)rand()/RAND_MAX) * max;
}
// unused, more as a reminder
int item_2d(int i, int j, int m, int n) {
return i*n + j;
}
long unsigned int time_interval(struct timeval start,struct timeval stop) {
return abs( (stop.tv_sec - start.tv_sec) * 1000000 + stop.tv_usec - start.tv_usec );
}
void print_gflops(struct timeval stop, struct timeval start, int m, int n, int k, int b, int kernel, char* name, float * c) {
long unsigned int total_us = time_interval(start, stop);
printf("flops%f\n",2.0*m*n*k);
printf("s = %f\n",total_us*pow(10,3));
float gflops = fabs( 2.0*m*n*k/(total_us*pow(10,3)) );
printf("gflops = %f\n",gflops);
float nrm;
if (c == NULL) { nrm = -1.0; } else { nrm = cblas_snrm2(m*n, c, 1); }
printf("%s took %lu µs => %f Gflop/s check: %f (block:%d, kernel:%d)\n", name, total_us, gflops, nrm, b, kernel);
printf("CSV %d,%d,%d,%d,%d,%s,%ld,%f\n", m,n,k,b,kernel,name,total_us,gflops);
}
void print_mat(float* a, int m, int n, char* name) {
int i,j;
for (i = 0; i < m ; i++) {
for (j = 0; j < n ; j++) {
printf("%s[%d,%d] = %f,",name,i,j,a[n*i+j]);
}
printf("\n");
}
printf("\n");
}
// b = alpha*a + b
void myblas_sgepxy(float alpha, float* a, float* b, int m, int n) {
int i;
for (i = 0; i < m ; i++) {
cblas_saxpy(n,alpha,&a[n*i],1,&b[n*i],1);
}
}
void node_coordinates(int p, int q, int node, int* coordinates) {
// node = q * c[0] + c[1]
coordinates[1] = node % q;
coordinates[0] = (node - coordinates[1])/q;
}
void node_coordinates_2i(int p, int q, int node, int* my_row, int* my_col) {
// node = q * my_row + my_col
*my_col = node % q;
*my_row = (node - *my_col)/q;
}
int get_node(int p, int q, int i, int j) {
return q*(i%p) + (j%q);
}
// cf stackoverflow (https://stackoverflow.com/questions/504810/how-do-i-find-the-current-machines-full-hostname-in-c-hostname-and-domain-info)
void get_host_name(char* hostname, int buffer_size) {
hostname[buffer_size - 1] = '\0';
gethostname(hostname, buffer_size - 1);
}

34
TP2/src/utils.h Normal file
View file

@ -0,0 +1,34 @@
#ifndef PROGPARALLEL_UTILS_H
#define PROGPARALLEL_UTILS_H
#define max(a,b) \
({ __typeof__ (a) _a = (a); \
__typeof__ (b) _b = (b); \
_a > _b ? _a : _b; })
// fill the content of mat with values val
void val_mat(int m, int n, float* mat, float val);
// fill the content of mat with random values from 0.0 to max
void rand_mat(int m, int n, float* mat, float max);
// b = alpha*a + b
void myblas_sgepxy(float alpha, float* a, float* b, int m, int n);
// return the time between start and stop in µs
long unsigned int time_interval(struct timeval start,struct timeval stop);
// deprecated
void print_gflops(struct timeval stop, struct timeval start, int m, int n, int k, int b, int kernel, char* name, float * c);
// print the content of a[0:m-1,0;n-1] with given name
void print_mat(float* a, int m, int n, char* name);
// fill coordinates according to node = q * coordinates[0] + coordinates[1]
void node_coordinates(int p, int q, int node, int* coordinates);
// fill my_row/col according to node = q * my_row + my_col
void node_coordinates_2i(int p, int q, int node, int* my_row, int* my_col);
// return the owner node of a block A_i,j on a p x q grid.
int get_node(int p, int q, int i, int j);
// return i*n +j;
int item_2d(int i, int j, int m, int n);
// get the name of the machine
void get_host_name(char* hostname, int buffer_size);
#endif

23
TP2/src/who_am_i.c Normal file
View file

@ -0,0 +1,23 @@
#include <stdio.h>
#include <mpi.h>
int main( int argc, char *argv[] ) {
int rank, size;
int l;
char name[MPI_MAX_PROCESSOR_NAME];
//MPI_Init (&argc, &argv); /* starts MPI */
MPI_Init (NULL, NULL); /* starts MPI */
MPI_Comm_rank (MPI_COMM_WORLD, &rank); /* get current process id */
MPI_Comm_size (MPI_COMM_WORLD, &size); /* get number of processes */
MPI_Get_processor_name(name, &l); /* get processor name */
printf("Hello world from process %d of %d on processor named %s\n", rank, size, name);
MPI_Finalize();
return 0;
}

BIN
TP2/subject_mpi.pdf Normal file

Binary file not shown.

53
TP2/utils.sh Executable file
View file

@ -0,0 +1,53 @@
TOOLS_DIR=/mnt/n7fs/ens/tp_guivarch/opt2021
SIMGRID_DIR=$TOOLS_DIR/simgrid-3.31
VITE_DIR=$TOOLS_DIR/vite
export PATH=${SIMGRID_DIR}/bin:${PATH}
# for check and bench
tmp=$HOME/tmp_simgrid
mkdir -p $tmp
my_mpirun="$SIMGRID_DIR/bin/smpirun -trace --cfg=smpi/tmpdir:$tmp"
traces="traces"
exec=build/bin/main
generate_hostfile() {
N=${1:-4}
mkdir -p hostfiles
rm -f hostfiles/hostfile.$N.txt
for i in $(seq 1 $N); do
echo node-${i}.simgrid.org >>hostfiles/hostfile.$N.txt
done
}
run() {
human=${1:-0}
mkdir -p $out
echo $my_mpirun $mpi_options ${exec:-build/bin/main} -m $m -k $k -n $n -b $b -a $algo -p $p -q $q -i $iter $options
$my_mpirun $mpi_options ${exec:-build/bin/main} -m $m -k $k -n $n -b $b -a $algo -p $p -q $q -i $iter $options &>$out/$algo.out
echo reading $out/$algo.out
correct=$(grep -i "gemm is correct" "$out/$algo.out" | wc -l)
trial=$(grep "Gflop/s" $out/$algo.out | grep $algo | wc -l)
echo Found $correct correct GEMM out of $trial
while read line; do
# [0] (p2p) measured_wtime = 0.000058s (la=0) | 0.002195 Gflop/s
gflops=$(echo $line | grep -o "| .* Gflop/s" | grep -o "[0-9]\\+.[0-9]\\+")
if [ $human -eq 0 ]; then
echo "$m,$k,$n,$b,$p,$q,$algo,$la,$gflops"
else
echo "mxnxk=${m}x${n}x${k},b=$b,p x q = $p x $q | using $algo, (lookahead:$la) => $gflops Gflop/s"
fi
echo "$m,$k,$n,$b,$p,$q,$algo,$la,$gflops" >>$csv
done < <(grep "Gflop/s" $out/$algo.out | grep $algo)
if [ $la -gt 0 ]; then
algo=$algo-$la
fi
mkdir -p $traces
mv -f smpi_simgrid.trace $traces/$algo.trace
echo You can open $traces/$algo.trace with $VITE_DIR/build/bin/vite
echo
}