init
This commit is contained in:
commit
f0f362eeee
17
BE/01_RingD/Makefile
Normal file
17
BE/01_RingD/Makefile
Normal file
|
@ -0,0 +1,17 @@
|
|||
MPICC=smpicc
|
||||
CFLAGS=-g -O4
|
||||
|
||||
DIR=01_RingD
|
||||
SRC=ringd
|
||||
|
||||
all: ${SRC}
|
||||
|
||||
%.o: %.c
|
||||
echo $@
|
||||
$(MPICC) -c -Wall -o $@ $<
|
||||
|
||||
${SRC}: ${SRC}.o
|
||||
$(MPICC) -o $@ $^
|
||||
|
||||
clean:
|
||||
rm -rf *.o ${SRC}
|
136
BE/01_RingD/ringd.c
Normal file
136
BE/01_RingD/ringd.c
Normal file
|
@ -0,0 +1,136 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <mpi.h>
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
|
||||
MPI_Init(&argc, &argv);
|
||||
|
||||
int comm_size;
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &comm_size);
|
||||
if (comm_size % 2 != 0)
|
||||
{
|
||||
printf("This application is meant to be run with an even number of MPI processes, not %d.\n", comm_size);
|
||||
MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
|
||||
}
|
||||
|
||||
// Get my rank in the global communicator
|
||||
int my_rank;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
|
||||
|
||||
// Determine the colour and key based on whether my rank is even.
|
||||
char subcommunicator;
|
||||
int colour;
|
||||
int key;
|
||||
if (my_rank % 2 == 0)
|
||||
{
|
||||
subcommunicator = 'E';
|
||||
colour = 0;
|
||||
key = my_rank;
|
||||
}
|
||||
else
|
||||
{
|
||||
subcommunicator = 'O';
|
||||
colour = 1;
|
||||
key = comm_size - my_rank;
|
||||
}
|
||||
|
||||
// Split of the global communicator
|
||||
MPI_Comm new_comm;
|
||||
MPI_Comm_split(MPI_COMM_WORLD, colour, key, &new_comm);
|
||||
|
||||
int my_new_comm_rank, new_comm_size;
|
||||
// Get my rank in the new communicator
|
||||
MPI_Comm_rank(new_comm, &my_new_comm_rank);
|
||||
// Get the size of the new communicator
|
||||
MPI_Comm_size(new_comm, &new_comm_size);
|
||||
|
||||
// Print my new rank and new communicator
|
||||
printf("[MPI process %d] I am now MPI process %d in subcommunicator %c.\n", my_rank, my_new_comm_rank, subcommunicator);
|
||||
|
||||
// barriere pour clean un peu le stdout
|
||||
// MPI_Barrier(MPI_COMM_WORLD);
|
||||
|
||||
int previous, next;
|
||||
// determine my neighbours according to my rank in my subcommunicator
|
||||
if (my_new_comm_rank == 0)
|
||||
{
|
||||
previous = new_comm_size - 1;
|
||||
next = my_new_comm_rank + 1;
|
||||
}
|
||||
else if (my_new_comm_rank == new_comm_size - 1)
|
||||
{
|
||||
previous = my_new_comm_rank - 1;
|
||||
next = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
previous = my_new_comm_rank - 1;
|
||||
next = my_new_comm_rank + 1;
|
||||
}
|
||||
|
||||
// printf("[MPI process %d] new %d previous %d next %d in subcommunicator %c.\n", my_rank, my_new_comm_rank, previous, next, subcommunicator);
|
||||
|
||||
float value = 1.0;
|
||||
MPI_Status status;
|
||||
|
||||
// Even: clockwise + multiplication
|
||||
if (subcommunicator == 'E')
|
||||
{
|
||||
// receive value from previous node
|
||||
if (my_new_comm_rank != 0)
|
||||
{
|
||||
MPI_Recv(&value, 1, MPI_FLOAT, previous, 0, new_comm, &status);
|
||||
printf("[MPI process %d_%c] RECEIVED from process %d of %d, value = %f\n", my_rank, subcommunicator, my_new_comm_rank, new_comm_size, value);
|
||||
value = value * 2.0;
|
||||
printf("[MPI process %d_%c] UPDATE, value = %f\n", my_rank, subcommunicator, value);
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("[MPI process %d_%c] START, value = %f\n", my_rank, subcommunicator, value);
|
||||
}
|
||||
|
||||
// send value to next node
|
||||
if (my_new_comm_rank != new_comm_size - 1)
|
||||
{
|
||||
MPI_Send(&value, 1, MPI_FLOAT, next, 0, new_comm);
|
||||
printf("[MPI process %d_%c] SENT to process %d of %d, value = %f\n", my_rank, subcommunicator, my_new_comm_rank, new_comm_size, value);
|
||||
}
|
||||
}
|
||||
|
||||
// Odd: counter-clockwise + division
|
||||
if (subcommunicator == 'O')
|
||||
{
|
||||
// receive value from next node
|
||||
if (my_new_comm_rank != 0)
|
||||
{
|
||||
MPI_Recv(&value, 1, MPI_FLOAT, next, 0, new_comm, &status);
|
||||
printf("[MPI process %d_%c] RECEIVED from process %d of %d, value = %f\n", my_rank, subcommunicator, my_new_comm_rank, new_comm_size, value);
|
||||
value = value / 2.0;
|
||||
printf("[MPI process %d_%c] UPDATE, value = %f\n", my_rank, subcommunicator, value);
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("[MPI process %d_%c] START, value = %f\n", my_rank, subcommunicator, value);
|
||||
}
|
||||
|
||||
// send value to previous node
|
||||
if (my_new_comm_rank != 1)
|
||||
{
|
||||
MPI_Send(&value, 1, MPI_FLOAT, previous, 0, new_comm);
|
||||
printf("[MPI process %d_%c] SENT to process %d of %d, value = %f\n", my_rank, subcommunicator, my_new_comm_rank, new_comm_size, value);
|
||||
}
|
||||
}
|
||||
|
||||
// barrière pour clean un peu le stdout
|
||||
// MPI_Barrier(MPI_COMM_WORLD);
|
||||
|
||||
// the end
|
||||
printf("[MPI process %d_%c] The End\n", my_rank, subcommunicator);
|
||||
|
||||
// Free the communicator
|
||||
MPI_Finalize();
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
17
BE/02_normA/Makefile
Normal file
17
BE/02_normA/Makefile
Normal file
|
@ -0,0 +1,17 @@
|
|||
MPICC=smpicc
|
||||
CFLAGS=-g -O4
|
||||
|
||||
DIR=02_normA
|
||||
SRC=normA
|
||||
|
||||
all: ${SRC}
|
||||
|
||||
%.o: %.c
|
||||
echo $@
|
||||
$(MPICC) -c -Wall -o $@ $<
|
||||
|
||||
${SRC}: ${SRC}.o
|
||||
$(MPICC) -o $@ $^
|
||||
|
||||
clean:
|
||||
rm -rf *.o ${SRC} ${DIR}
|
159
BE/02_normA/normA.c
Normal file
159
BE/02_normA/normA.c
Normal file
|
@ -0,0 +1,159 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <mpi.h>
|
||||
|
||||
void multAv(double x[], double *A, double y[], int m, int n);
|
||||
|
||||
void init0(double x[], int n);
|
||||
|
||||
double dot(double x[], double y[], int n);
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int size;
|
||||
int const n = 12;
|
||||
int my_rank;
|
||||
double local_dot, global_dot, normA, reference;
|
||||
|
||||
MPI_Init(&argc, &argv);
|
||||
|
||||
// Get number of processes and check that 4 processes are used
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &size);
|
||||
if (size != 4)
|
||||
{
|
||||
printf("This application is meant to be run with 4 MPI processes.\n");
|
||||
MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
|
||||
}
|
||||
|
||||
// Get my rank
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
|
||||
|
||||
// Declaration and Initialization of A (one for all components)
|
||||
// the blocking on rows, b, is the same for all nodes
|
||||
// (if you don't change the constants)
|
||||
int b = n / size;
|
||||
double *A;
|
||||
|
||||
A = (double *)malloc(b * n * sizeof(double));
|
||||
|
||||
for (int i = 0; i < b; i++)
|
||||
{
|
||||
for (int j = 0; j < n; j++)
|
||||
{
|
||||
A[i * n + j] = 1.0;
|
||||
reference = 66.000000; // sum_{i=1}^{12-1}
|
||||
|
||||
// A[i*n + j] = (double) my_rank;
|
||||
// reference = 97.488461;
|
||||
|
||||
// A[i*n + j] = (double) my_rank*(i+1)+(j+1);
|
||||
// reference = 239.899979;
|
||||
|
||||
// printf("Process [%d], A[%d][%d] = %f\n", my_rank, i, j, A[i*n+j]);
|
||||
}
|
||||
}
|
||||
|
||||
// reference vector to verify that the global vector is correct
|
||||
double v_ref[n];
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
v_ref[i] = (double)i;
|
||||
}
|
||||
|
||||
// local vector
|
||||
double x_local[b];
|
||||
for (int i = 0; i < b; i++)
|
||||
{
|
||||
x_local[i] = (double)b * my_rank + i;
|
||||
// printf("Process [%d], v_local[%d] = %f\n", my_rank, i, v_local[i]);
|
||||
}
|
||||
|
||||
// global vector
|
||||
double x_global[n];
|
||||
init0(x_global, n);
|
||||
|
||||
// Use a collective communication in order to gather on ALL the nodes the
|
||||
// part of the local vector into the global vector
|
||||
MPI_Allgather(x_local, b, MPI_DOUBLE, x_global, b, MPI_DOUBLE, MPI_COMM_WORLD);
|
||||
|
||||
// the node 2 checks if the global vector is correct (should be 0 for all components)
|
||||
if (my_rank == 2)
|
||||
{
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
printf("Process [%d], vérif[%d] = %f\n", my_rank, i, x_global[i] - v_ref[i]);
|
||||
}
|
||||
}
|
||||
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
|
||||
// vector y_local = A * x_global
|
||||
double y_local[b];
|
||||
init0(y_local, b);
|
||||
|
||||
// Perform the multiplication
|
||||
multAv(y_local, A, x_global, b, n);
|
||||
|
||||
// each node displays y (with A, full of ones, all the components of x
|
||||
// should be the same)
|
||||
for (int i = 0; i < b; i++)
|
||||
{
|
||||
printf("Process [%d] y_local[%d] = %f\n", my_rank, i, y_local[i]);
|
||||
}
|
||||
|
||||
// Perform the dot product on the local x
|
||||
local_dot = dot(x_local, y_local, b);
|
||||
printf("Process [%d] local dot %f\n", my_rank, local_dot);
|
||||
|
||||
// Use one single collective communication to perfom the reduction in
|
||||
// global_dot
|
||||
MPI_Allreduce(&local_dot, &global_dot, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
|
||||
|
||||
// the norm is the square root of the global_dot
|
||||
normA = sqrt(global_dot);
|
||||
|
||||
// Another node displays the norm
|
||||
if (my_rank == 2)
|
||||
{
|
||||
printf("Process [%d] normA = %f, reference = %f\n", my_rank, normA, reference);
|
||||
}
|
||||
|
||||
MPI_Finalize();
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
void multAv(double x[], double *A, double y[], int m, int n)
|
||||
{
|
||||
for (int i = 0; i < m; i++)
|
||||
{
|
||||
x[i] = 0.0;
|
||||
for (int j = 0; j < n; j++)
|
||||
{
|
||||
x[i] += A[i * n + j] * y[j];
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void init0(double x[], int n)
|
||||
{
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
x[i] = 0.0;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
double dot(double x[], double y[], int n)
|
||||
{
|
||||
double res = 0.0;
|
||||
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
res += x[i] * y[i];
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
18
BE/03_overmean/Makefile
Normal file
18
BE/03_overmean/Makefile
Normal file
|
@ -0,0 +1,18 @@
|
|||
MPICC=smpicc
|
||||
CFLAGS=-g -O4
|
||||
|
||||
|
||||
DIR=03_overmean
|
||||
SRC=overmean
|
||||
|
||||
all: ${SRC}
|
||||
|
||||
%.o: %.c
|
||||
echo $@
|
||||
$(MPICC) -c -Wall -o $@ $<
|
||||
|
||||
${SRC}: ${SRC}.o
|
||||
$(MPICC) -o $@ $^
|
||||
|
||||
clean:
|
||||
rm -rf *.o ${SRC} ${DIR}
|
120
BE/03_overmean/overmean.c
Normal file
120
BE/03_overmean/overmean.c
Normal file
|
@ -0,0 +1,120 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
#include <mpi.h>
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
// comment this line, if you want the same vector for each run
|
||||
srand(time(NULL));
|
||||
|
||||
MPI_Init(&argc, &argv);
|
||||
|
||||
// Get number of processes
|
||||
int nb_process;
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &nb_process);
|
||||
|
||||
// Fix root's rank
|
||||
int root_rank = 0;
|
||||
|
||||
// Get my rank
|
||||
int my_rank;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
|
||||
|
||||
// global size (only the root know its value)
|
||||
int global_size = 0;
|
||||
// local size (we fix this value in order to be regular)
|
||||
int local_size = 3;
|
||||
// local vector
|
||||
int *local_vector = NULL;
|
||||
int *global_vector = NULL;
|
||||
|
||||
// root process
|
||||
if (my_rank == root_rank)
|
||||
{
|
||||
global_size = nb_process * local_size; // to be able to split
|
||||
// the global vector into sub-vectors
|
||||
// with the same size
|
||||
printf("global_size = %d\n", global_size);
|
||||
global_vector = (int *)malloc(sizeof(int) * global_size);
|
||||
for (int i = 0; i < global_size; i++)
|
||||
{
|
||||
// global_vector[i] = i;
|
||||
global_vector[i] = rand() % 101;
|
||||
printf("global_vector[%d] = %d\n", i, global_vector[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Each process gets its part of the global vector
|
||||
local_vector = (int *)malloc(sizeof(int) * local_size);
|
||||
MPI_Scatter(global_vector, local_size, MPI_INT, local_vector, local_size, MPI_INT, root_rank, MPI_COMM_WORLD);
|
||||
|
||||
// print the local vector
|
||||
for(int i = 0; i < local_size; i++)
|
||||
{
|
||||
printf("[%d] local_vector[%d] = %d\n", my_rank, i, local_vector[i]);
|
||||
}
|
||||
|
||||
// barriere pour clean un peu le stdout
|
||||
// MPI_Barrier(MPI_COMM_WORLD);
|
||||
|
||||
// compute the local sum
|
||||
int local_sum = 0.0;
|
||||
for (int i = 0; i < local_size; i++)
|
||||
{
|
||||
local_sum += local_vector[i];
|
||||
}
|
||||
printf("Process %d computed its local sum = %d.\n", my_rank, local_sum);
|
||||
|
||||
// compute the global sum by a reduction
|
||||
int global_sum;
|
||||
MPI_Reduce(&local_sum, &global_sum, 1, MPI_INT, MPI_SUM, root_rank, MPI_COMM_WORLD);
|
||||
|
||||
// print the global sum
|
||||
if (my_rank == root_rank) {
|
||||
printf("Process %d got the global sum = %d.\n", my_rank, global_sum);
|
||||
}
|
||||
|
||||
// barriere pour clean un peu le stdout
|
||||
// MPI_Barrier(MPI_COMM_WORLD);
|
||||
|
||||
float mean; // float!!
|
||||
|
||||
// the root computes the mean (only one to know the global size)
|
||||
if (my_rank == root_rank)
|
||||
{
|
||||
mean = ((float)global_sum) / global_size;
|
||||
printf("Process %d computed the mean = %f.\n", my_rank, mean);
|
||||
}
|
||||
|
||||
// broadcast of the mean to all process
|
||||
MPI_Bcast(&mean, 1, MPI_FLOAT, root_rank, MPI_COMM_WORLD);
|
||||
|
||||
// print the mean
|
||||
printf("Process %d got the mean = %f.\n", my_rank, mean);
|
||||
|
||||
// barriere pour clean un peu le stdout
|
||||
// MPI_Barrier(MPI_COMM_WORLD);
|
||||
|
||||
// compute the number of values (from the local vector) over the mean
|
||||
int local_number = 0;
|
||||
for (int i = 0; i < local_size; i++)
|
||||
{
|
||||
if (local_vector[i] >= mean)
|
||||
local_number++;
|
||||
}
|
||||
printf("Process %d has %d values over the mean.\n", my_rank, local_number);
|
||||
|
||||
// reduce these numbers on root process
|
||||
int over_the_mean;
|
||||
MPI_Reduce(&local_number, &over_the_mean, 1, MPI_INT, MPI_SUM, root_rank, MPI_COMM_WORLD);
|
||||
|
||||
// print the total number of values over the mean
|
||||
if (my_rank == root_rank) {
|
||||
printf("the total number of values over the mean is %d.\n", over_the_mean);
|
||||
}
|
||||
|
||||
MPI_Finalize();
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
170
BE/04_n-corps/n-corps.md
Normal file
170
BE/04_n-corps/n-corps.md
Normal file
|
@ -0,0 +1,170 @@
|
|||
# Exercice 4 : problème aux N-corps
|
||||
|
||||
Ce fichier fait partie du rendu évalué pour le BE de Calcul parallèle.
|
||||
|
||||
## Question 1
|
||||
|
||||
Déterminer quels calculs peuvent être parallélisés et quelles communications mettre en place dans le code séquentiel suivant. Proposer une réécriture parallèle avectransmission de messages de ce code.
|
||||
|
||||
```
|
||||
variables : force[1,...,N], data[1,...,N]
|
||||
for t in 1, nb_steps do
|
||||
for i in 1, N do
|
||||
force[i] = 0
|
||||
for j in 1, N do
|
||||
force[i] = force[i] + interaction(data[i], data[j])
|
||||
end for
|
||||
end for
|
||||
for i in 1, N do
|
||||
data[i] = update(data[i], force[i])
|
||||
end for
|
||||
end for
|
||||
```
|
||||
|
||||
### Réponse Q2
|
||||
|
||||
On suppose que l'on possède K processus, tels que N divise K.
|
||||
Par exemple K = 2N.
|
||||
|
||||
```C
|
||||
variables (globales) : K, N, ratio
|
||||
variables (locales) : ik, force[1,...,ratio]
|
||||
variables : data[1,...,N]
|
||||
|
||||
// data est à la fois globale et locale car on le communique entre les processus
|
||||
|
||||
ratio = K / N
|
||||
|
||||
// Chaque processus k va s'occuper de `ratio` corps
|
||||
// par exemple si `ratio` = 2
|
||||
// processus 0 -> corps 0 + corps 1
|
||||
// processus 1 -> corps 2 + corps 3
|
||||
// ...
|
||||
|
||||
// Chaque processus doit connaitre `data`
|
||||
// (seul le process 0 connait `data` au début)
|
||||
// -> Broadcast data from 0 to all
|
||||
|
||||
// cette boucle n'est pas parallélisable
|
||||
// on a besoin de t-1 pour calculer t
|
||||
for t in 1, nb_steps do
|
||||
|
||||
ik = 0
|
||||
|
||||
// cette boucle est parralélisable
|
||||
// dans le code on va "split" `N` par paquets de `ratio`
|
||||
for i in 1, N do
|
||||
if je_mocuppe_de_ce_corps(i, N, K) // on peut split de cette manière
|
||||
|
||||
// on reset les forces
|
||||
force[ik] = 0
|
||||
|
||||
// on calcule la force totale des corps qu'on s'occupe
|
||||
for j in 1, N do
|
||||
force[ik] = force[ik] + interaction(data[i], data[j])
|
||||
end for
|
||||
|
||||
// on update notre `data` local
|
||||
data[i] = update(data[i], force[ik])
|
||||
ik++
|
||||
|
||||
end if
|
||||
end for
|
||||
|
||||
// une fois chaque `data` updaté localement (dans chaque processus)
|
||||
// il faut rassembler toutes ces infos
|
||||
// -> All_Gather des data locaux
|
||||
// on obtient un `data` synchronisé entre tous les processus,
|
||||
// comme lors du premier broadcast
|
||||
|
||||
end for
|
||||
```
|
||||
|
||||
## Question 2
|
||||
|
||||
Proposer une version parallèle du code suivant.
|
||||
|
||||
```
|
||||
variables : force[1,...,N], data[1,...,N]
|
||||
for t in 1, nb_steps do
|
||||
for i in 1, N do
|
||||
force[i] = 0
|
||||
end for
|
||||
for i in 1, N do
|
||||
for j in 1, i-1 do
|
||||
f = interaction(data[i],data[j])
|
||||
force[i] = force[i] + f
|
||||
force[j] = force[j] - f
|
||||
end for
|
||||
end for
|
||||
for i in 1, N do
|
||||
data[i] = update(data[i], force[i])
|
||||
end for
|
||||
end for
|
||||
```
|
||||
|
||||
### Réponse Q2
|
||||
|
||||
|
||||
```C
|
||||
variables (globales) : force[1,...,N], data[1,...,N]
|
||||
|
||||
// Chaque processus doit connaitre `data`
|
||||
// (seul le process 0 connait `data` au début)
|
||||
// -> Broadcast data from 0 to all
|
||||
|
||||
// cette boucle n'est pas parallélisable
|
||||
// on a besoin de t-1 pour calculer t
|
||||
for t in 1, nb_steps do
|
||||
|
||||
// on calcul les forces (plus efficacement)
|
||||
// on effectue N(N-1)/2 appels à `interaction`
|
||||
for i in 1, N do
|
||||
if je_mocuppe_de_ce_corps(i, N, K) // je m'occupe de cette "colonne"
|
||||
|
||||
// on reset les forces
|
||||
force[i] = 0
|
||||
|
||||
// on calcule la force totale des corps qu'on s'occupe
|
||||
for j in 1, i-1 do
|
||||
f = interaction(data[i],data[j])
|
||||
force[i] = force[i] + f
|
||||
force[j] = force[j] - f
|
||||
end for
|
||||
|
||||
// on reduce les forces que l'on a calculé pour chaque corps
|
||||
// -> All_reduce
|
||||
|
||||
// on update notre `data` local
|
||||
data[i] = update(data[i], force[i])
|
||||
end if
|
||||
end for
|
||||
|
||||
// une fois chaque `data` updaté localement (dans chaque processus)
|
||||
// il faut rassembler toutes ces infos
|
||||
// -> All_Gather des data locaux
|
||||
// on obtient un `data` synchronisé entre tous les processus,
|
||||
// comme lors du premier broadcast
|
||||
|
||||
end for
|
||||
```
|
||||
|
||||
## Question 3
|
||||
|
||||
Quels sont les inconvénients de cette version ?
|
||||
Proposer une solution pour les atténuer.
|
||||
|
||||
### Réponse Q3
|
||||
|
||||
L'inconvénient de cette version est que l'on doit désormais répartir des calculs "en triangle". En effet puisque l'on ne calcul aucune redondance de interaction, on effectue les calculs suivants:
|
||||
|
||||
| | 0 | 1 | 2 | 3 |
|
||||
|:-:|:-:|:-:|:-:|:-:|
|
||||
| 0 | | x | x | x |
|
||||
| 1 | | | x | x |
|
||||
| 2 | | | | x |
|
||||
| 3 | | | | |
|
||||
|
||||
On doit alors effectuer $\frac{N(N-1)}2$ calculs, ce qui est plus compliqué à répartir sur $K = \frac{N}{ratio}$ processus. La manière naïve que j'ai utilisé pour paralléliser le code Question 2 est sous optimal puisque la chaque de calcul entre chaque processus n'est pas égal.
|
||||
|
||||
Une manière plus efficace serait, un peu comme dans openMP, de créer des tasks pour chaque calcul de `interaction` et de répartir uniformément ces tasks entre chaque processus.
|
11
BE/Makefile
Normal file
11
BE/Makefile
Normal file
|
@ -0,0 +1,11 @@
|
|||
SOURCES=01_RingD 02_normA 03_overmean 04_n-corps
|
||||
|
||||
all: collect
|
||||
|
||||
|
||||
collect:
|
||||
echo ${USER}
|
||||
(cd 01_RingD; make clean)
|
||||
(cd 02_normA; make clean)
|
||||
(cd 03_overmean; make clean)
|
||||
tar cvf Calcul_${USER}_`hostname | cut -d'.' -f1`.tar ${SOURCES}
|
6
BE/init.sh
Normal file
6
BE/init.sh
Normal file
|
@ -0,0 +1,6 @@
|
|||
#!/bin/bash
|
||||
SIMGRID=/mnt/n7fs/ens/tp_guivarch/opt2021/simgrid-3.31
|
||||
|
||||
export PATH=${SIMGRID}/bin:${PATH}
|
||||
|
||||
alias smpirun="smpirun -hostfile ${SIMGRID}/archis/cluster_hostfile.txt -platform ${SIMGRID}/archis/cluster_crossbar.xml"
|
15
TP1/00_Who_am_i/Makefile
Normal file
15
TP1/00_Who_am_i/Makefile
Normal file
|
@ -0,0 +1,15 @@
|
|||
MPICC=smpicc
|
||||
CFLAGS=-g -O4
|
||||
|
||||
all: who_am_i
|
||||
|
||||
clean:
|
||||
rm -f *.o who_am_i
|
||||
|
||||
%.o: %.c
|
||||
echo $@
|
||||
$(MPICC) -c -Wall -o $@ $<
|
||||
|
||||
who_am_i: who_am_i.o
|
||||
$(MPICC) -o $@ $^
|
||||
|
26
TP1/00_Who_am_i/who_am_i.c
Normal file
26
TP1/00_Who_am_i/who_am_i.c
Normal file
|
@ -0,0 +1,26 @@
|
|||
#include <stdio.h>
|
||||
#include <mpi.h>
|
||||
|
||||
int main( int argc, char *argv[] ) {
|
||||
|
||||
int rank, size;
|
||||
int l;
|
||||
char name[MPI_MAX_PROCESSOR_NAME];
|
||||
|
||||
MPI_Init( &argc, &argv );
|
||||
|
||||
// Get rank
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
|
||||
|
||||
// Get size
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &size);
|
||||
|
||||
// Get name
|
||||
MPI_Get_processor_name (name , &l);
|
||||
|
||||
printf("Hello world from process %d of %d on processor named %s\n", rank, size, name);
|
||||
|
||||
MPI_Finalize();
|
||||
|
||||
return 0;
|
||||
}
|
14
TP1/01_Ring/Makefile
Normal file
14
TP1/01_Ring/Makefile
Normal file
|
@ -0,0 +1,14 @@
|
|||
MPICC=smpicc
|
||||
CFLAGS=-g -O4
|
||||
|
||||
all: ring
|
||||
|
||||
clean:
|
||||
rm -rf *.o ring
|
||||
|
||||
%.o: %.c
|
||||
echo $@
|
||||
$(MPICC) -c -Wall -o $@ $<
|
||||
|
||||
ring: ring.o
|
||||
$(MPICC) -o $@ $^
|
74
TP1/01_Ring/ring.c
Normal file
74
TP1/01_Ring/ring.c
Normal file
|
@ -0,0 +1,74 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <mpi.h>
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
|
||||
int value;
|
||||
int my_rank, size;
|
||||
int previous, next;
|
||||
MPI_Status status;
|
||||
|
||||
MPI_Init(NULL, NULL);
|
||||
|
||||
// Get number of processes
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &size);
|
||||
|
||||
// determine my neighbours according to my rank
|
||||
if (my_rank == 0)
|
||||
{
|
||||
previous = size - 1;
|
||||
next = my_rank + 1;
|
||||
}
|
||||
else if (my_rank == size - 1)
|
||||
{
|
||||
previous = my_rank - 1;
|
||||
next = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
previous = my_rank - 1;
|
||||
next = my_rank + 1;
|
||||
}
|
||||
|
||||
value = 1;
|
||||
|
||||
// The nodes, starting with node 0, transmit the value to each other,
|
||||
// each time multiplying it by 2.
|
||||
// At the end of the transmission, node 0 receives the value 2^(size-1)
|
||||
//
|
||||
// Instruction: before each send and after each receive, each node displays
|
||||
// - its rank
|
||||
// - the type communication (send, recv)
|
||||
// - the value
|
||||
|
||||
// receive value from previous node
|
||||
if (my_rank != 0)
|
||||
{
|
||||
MPI_Recv(&value, 1, MPI_INT, previous, 0, MPI_COMM_WORLD, &status);
|
||||
printf("RECEIVED from process %d of %d, value = %d\n", my_rank, size, value);
|
||||
value = value * 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("START, value = %d\n", value);
|
||||
}
|
||||
|
||||
printf("SENDING from process %d of %d, value = %d\n", my_rank, size, value);
|
||||
|
||||
// send value to next node
|
||||
if (my_rank != size - 1)
|
||||
{
|
||||
MPI_Send(&value, 1, MPI_INT, next, 0, MPI_COMM_WORLD);
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("The End, value = %d\n", value);
|
||||
}
|
||||
|
||||
MPI_Finalize();
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
14
TP1/02_Limite/Makefile
Normal file
14
TP1/02_Limite/Makefile
Normal file
|
@ -0,0 +1,14 @@
|
|||
MPICC=smpicc
|
||||
CFLAGS=-g -O4
|
||||
|
||||
all: limite
|
||||
|
||||
clean:
|
||||
rm -rf *.o limite
|
||||
|
||||
%.o: %.c
|
||||
echo $@
|
||||
$(MPICC) -c -Wall -o $@ $<
|
||||
|
||||
limite: limite.o
|
||||
$(MPICC) -Dhave_mpi -o $@ $^
|
86
TP1/02_Limite/limite.c
Normal file
86
TP1/02_Limite/limite.c
Normal file
|
@ -0,0 +1,86 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <mpi.h>
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
|
||||
int size;
|
||||
int my_rank;
|
||||
int data_size = -100;
|
||||
int *buffer_send, *buffer_recv;
|
||||
int tag;
|
||||
MPI_Status status;
|
||||
int l;
|
||||
char name[MPI_MAX_PROCESSOR_NAME];
|
||||
|
||||
// Make sure that the command line has one argument (the size of the data)
|
||||
|
||||
if (argc != 2)
|
||||
{
|
||||
printf("usage : limite <data size>\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
MPI_Init(&argc, &argv);
|
||||
|
||||
// Make sure exactly 2 MPI processes are used
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &size);
|
||||
if (size != 2)
|
||||
{
|
||||
printf("%d MPI processes used, please use 2.\n", size);
|
||||
MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
|
||||
}
|
||||
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
|
||||
MPI_Get_processor_name(name, &l);
|
||||
printf("process %d of %d on processor named %s\n", my_rank, size, name);
|
||||
|
||||
// Prepare parameters
|
||||
|
||||
data_size = atoi(argv[1]);
|
||||
printf("The size of the data is %d\n", data_size);
|
||||
|
||||
buffer_send = (int *)malloc(data_size * sizeof(int));
|
||||
buffer_recv = (int *)malloc(data_size * sizeof(int));
|
||||
buffer_send[0] = (my_rank == 0) ? 12345 : 67890;
|
||||
|
||||
tag = 0;
|
||||
|
||||
if (my_rank == 0)
|
||||
{
|
||||
// node 0 sends its buffer buffer_send of size data_size to node 1
|
||||
MPI_Send(buffer_send, data_size, MPI_INT, 1, tag, MPI_COMM_WORLD);
|
||||
// node 0 receives in its buffer buffer_recv data from node 1
|
||||
MPI_Recv(buffer_recv, data_size, MPI_INT, 1, tag, MPI_COMM_WORLD, &status);
|
||||
printf("MPI process %d received value %d from MPI process %d.\n", my_rank, buffer_recv[0], 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
// node 1 sends its buffer buffer_send of size data_size to node 0
|
||||
MPI_Send(buffer_send, data_size, MPI_INT, 0, tag, MPI_COMM_WORLD);
|
||||
// node 1 receives in its buffer buffer_recv data from node 0
|
||||
MPI_Recv(buffer_recv, data_size, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
|
||||
printf("MPI process %d received value %d from MPI process %d.\n", my_rank, buffer_recv[0], 0);
|
||||
}
|
||||
|
||||
free(buffer_send);
|
||||
free(buffer_recv);
|
||||
|
||||
MPI_Finalize();
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
// (a) rappelez pour quelle taille de message (petite, grande), MPI Send aura un comportement asynchrone (resp. synchrone)
|
||||
// ->
|
||||
|
||||
// (b) que va-t-il se passer quand votre programme, compl ́et ́e comme indiqu ́e, sera appel ́e avec une taille de message qui fera que MPI Send sera synchrone ?
|
||||
// -> deadlock, on passe en synchrone
|
||||
|
||||
// (c) estimez `a 10 entiers pr`es, la taille limite sur deux noeuds du mˆeme ordinateur ?
|
||||
// -> 16383
|
||||
|
||||
// (d) proposez une solution pour que l’ ́echange entre les deux noeuds puissent se faire au del`a de cette limite (plusieurs r ́eponses possibles). Vous avez la possibilit ́e de les tester en dehors de la s ́eance.
|
||||
// -> découper le buffer de telle manière à n'envoyer que des petits buffers en asynchrone
|
||||
// -> changer ordre send/recv du deuxième noeud
|
14
TP1/03_Dot/Makefile
Normal file
14
TP1/03_Dot/Makefile
Normal file
|
@ -0,0 +1,14 @@
|
|||
MPICC=smpicc
|
||||
CFLAGS=-g -O4
|
||||
|
||||
all: dotp
|
||||
|
||||
clean:
|
||||
rm -rf *.o dotp
|
||||
|
||||
%.o: %.c
|
||||
echo $@
|
||||
$(MPICC) -c -Wall -o $@ $<
|
||||
|
||||
dotp: dotp.o
|
||||
$(MPICC) -o $@ $^ -lm
|
91
TP1/03_Dot/dotp.c
Normal file
91
TP1/03_Dot/dotp.c
Normal file
|
@ -0,0 +1,91 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <mpi.h>
|
||||
|
||||
// perform the dot product between the two vectors x and y of size n
|
||||
float dot(float x[], float y[], int n);
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
|
||||
int const local_data_size = 5;
|
||||
float local_x[local_data_size], local_y[local_data_size];
|
||||
float local_dot, global_dot1, global_dot2, reference;
|
||||
int borne;
|
||||
|
||||
int my_rank, size;
|
||||
|
||||
MPI_Init(NULL, NULL);
|
||||
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &size);
|
||||
|
||||
borne = size * local_data_size - 1;
|
||||
reference = (float)(borne * (borne + 1) * (2 * borne + 1) / 6);
|
||||
|
||||
// Initialization of both local vectors with the same values
|
||||
// the global vectors would be [0, 1, ..., local_data_size -1]
|
||||
for (int i = 0; i < local_data_size; i++)
|
||||
{
|
||||
local_x[i] = (float)(local_data_size * my_rank + i);
|
||||
local_y[i] = (float)(local_data_size * my_rank + i);
|
||||
// printf("[MPI process %d] value[%d]: %f\n", my_rank, i, local_x[i]);
|
||||
}
|
||||
|
||||
local_dot = dot(local_x, local_y, local_data_size);
|
||||
|
||||
printf("[MPI process %d] my local dot product: %f\n", my_rank, local_dot);
|
||||
|
||||
/* Two-step operation */
|
||||
|
||||
global_dot1 = 0.0;
|
||||
|
||||
// Step 1
|
||||
// Use a collective communication to compute the global dot product
|
||||
// in such a way that the node 0 gets this value
|
||||
MPI_Reduce(&local_dot, &global_dot1, 1, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD);
|
||||
|
||||
// Node 0 displays the global value and the reference (sum of first integer ^ 2)
|
||||
if (my_rank == 0)
|
||||
{
|
||||
printf("[MPI process %d] *Two-step collective operation* global dot product: %f == %f\n", my_rank, global_dot1, reference);
|
||||
}
|
||||
|
||||
// Step 2
|
||||
// Use a collective communication to broadcast the global value on each node
|
||||
MPI_Bcast(&global_dot1, 1, MPI_FLOAT, 0, MPI_COMM_WORLD);
|
||||
|
||||
// A node i (i different from 0) displays the global value
|
||||
if (my_rank != 0)
|
||||
{
|
||||
printf("[MPI process %d] *Two-step collective operation* global dot product: %f == %f\n", my_rank, global_dot1, reference);
|
||||
}
|
||||
|
||||
/* One-step operation */
|
||||
|
||||
global_dot2 = 0;
|
||||
|
||||
// Step 3
|
||||
// Now use one single collective communication to perfom both step 1 and 2
|
||||
MPI_Allreduce(&local_dot, &global_dot2, 1, MPI_FLOAT, MPI_SUM, MPI_COMM_WORLD);
|
||||
|
||||
// Another node displays the global value
|
||||
printf("[MPI process %d] *One-step collective operation* global dot product: %f == %f\n", my_rank, global_dot2, reference);
|
||||
|
||||
MPI_Finalize();
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
float dot(float x[], float y[], int n)
|
||||
{
|
||||
float res = 0.0;
|
||||
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
res += x[i] * y[i];
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
14
TP1/04_Mult/Makefile
Normal file
14
TP1/04_Mult/Makefile
Normal file
|
@ -0,0 +1,14 @@
|
|||
MPICC=smpicc
|
||||
CFLAGS=-g -O4
|
||||
|
||||
all: MultAv
|
||||
|
||||
clean:
|
||||
rm -rf *.o MultAv
|
||||
|
||||
%.o: %.c
|
||||
echo $@
|
||||
$(MPICC) -c -Wall -o $@ $<
|
||||
|
||||
MultAv: MultAv.o
|
||||
$(MPICC) -o $@ $^
|
119
TP1/04_Mult/MultAv.c
Normal file
119
TP1/04_Mult/MultAv.c
Normal file
|
@ -0,0 +1,119 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <mpi.h>
|
||||
|
||||
void multAv(double x[], double *A, double y[], int m, int n);
|
||||
|
||||
void init0(double x[], int n);
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int size;
|
||||
int const n = 12;
|
||||
int my_rank;
|
||||
MPI_Init(&argc, &argv);
|
||||
|
||||
// Get number of processes and check that 4 processes are used
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &size);
|
||||
if (size != 4)
|
||||
{
|
||||
printf("This application is meant to be run with 4 MPI processes.\n");
|
||||
MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
|
||||
}
|
||||
|
||||
// Get my rank
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
|
||||
|
||||
// Declaration and Initialization of A (one for all components)
|
||||
// the number of bloc of lines, b, is the same for all node
|
||||
// (if you don't change the constants)
|
||||
int b = n / size;
|
||||
double *A;
|
||||
|
||||
A = (double *)malloc(b * n * sizeof(double));
|
||||
|
||||
for (int i = 0; i < b; i++)
|
||||
{
|
||||
for (int j = 0; j < n; j++)
|
||||
{
|
||||
A[i * n + j] = 1.0;
|
||||
// A[i*n + j] = (double) my_rank;
|
||||
// A[i*n + j] = (double) my_rank*(i+1)+(j+1);
|
||||
// printf("Process [%d], A[%d][%d] = %f\n", my_rank, i, j, A[i*n+j]);
|
||||
}
|
||||
}
|
||||
|
||||
// reference vector to verify that the global vector is correct
|
||||
double v_ref[n];
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
v_ref[i] = (double)i;
|
||||
}
|
||||
|
||||
// local vector
|
||||
double v_local[b];
|
||||
for (int i = 0; i < b; i++)
|
||||
{
|
||||
v_local[i] = (double)b * my_rank + i;
|
||||
// printf("Process [%d], v_local[%d] = %f\n", my_rank, i, v_local[i]);
|
||||
}
|
||||
|
||||
// global vector
|
||||
double v_global[n];
|
||||
init0(v_global, n);
|
||||
|
||||
// Use a collective communication in order to gather on ALL the nodes the
|
||||
// part of the local vector into the global vector
|
||||
|
||||
MPI_Allgather(v_local, b, MPI_DOUBLE, v_global, b, MPI_DOUBLE, MPI_COMM_WORLD);
|
||||
|
||||
// the node 2 checks if the global vector is correct
|
||||
if (my_rank == 2)
|
||||
{
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
printf("Process [%d], vérif[%d] = %f\n", my_rank, i, v_global[i] - v_ref[i]);
|
||||
}
|
||||
}
|
||||
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
|
||||
// vector x_loc = A * v_global
|
||||
double x_loc[b];
|
||||
init0(x_loc, b);
|
||||
|
||||
// Perform the multiplication
|
||||
multAv(x_loc, A, v_global, b, n);
|
||||
|
||||
// each node displays x (with A, full of ones, all the components of x should be the same)
|
||||
for (int i = 0; i < b; i++)
|
||||
{
|
||||
printf("Process [%d], x_loc[%d] = %f\n", my_rank, i, x_loc[i]);
|
||||
}
|
||||
|
||||
MPI_Finalize();
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
void multAv(double x[], double *A, double y[], int m, int n)
|
||||
{
|
||||
for (int i = 0; i < m; i++)
|
||||
{
|
||||
x[i] = 0.0;
|
||||
for (int j = 0; j < n; j++)
|
||||
{
|
||||
x[i] += A[i * n + j] * y[j];
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void init0(double x[], int n)
|
||||
{
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
x[i] = 0.0;
|
||||
}
|
||||
return;
|
||||
}
|
BIN
TP1/05_CG/CG_par
Executable file
BIN
TP1/05_CG/CG_par
Executable file
Binary file not shown.
121
TP1/05_CG/CG_par.c
Normal file
121
TP1/05_CG/CG_par.c
Normal file
|
@ -0,0 +1,121 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <mpi.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "util.h"
|
||||
|
||||
void cg_par(double *A_local, double *rhs_local, int N, int b, float tol)
|
||||
{
|
||||
|
||||
int size;
|
||||
int my_rank;
|
||||
|
||||
// Get number of processes
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &size);
|
||||
|
||||
// Get my rank
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
|
||||
|
||||
//**************** Parallel CG (M == N)
|
||||
int num_it, max_it;
|
||||
double x[b], r[b], Ap[b];
|
||||
double p_local[b], p_global[N];
|
||||
double nr_global, nr_local;
|
||||
double np2_global, np2_local;
|
||||
double epsilon;
|
||||
double alpha, beta;
|
||||
|
||||
max_it = 100;
|
||||
|
||||
// initialization of the solution (local vector)
|
||||
for (int i = 0; i < b; i++)
|
||||
{
|
||||
x[i] = 0.0;
|
||||
}
|
||||
|
||||
// compute the global norm of the rhs_local (dot product, then sqrt);
|
||||
// all the nodes must have this value
|
||||
nr_local = dot(rhs_local, rhs_local, b);
|
||||
MPI_Allreduce(&nr_local, &nr_global, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
|
||||
nr_global = sqrt(nr_global);
|
||||
// if (my_rank == 0) printf("nr = %lg\n", nr_global);
|
||||
|
||||
// threshold of the CG
|
||||
epsilon = tol * nr_global;
|
||||
|
||||
// Initialization of p_local and r (local vectors)
|
||||
copy_v(p_local, rhs_local, b);
|
||||
copy_v(r, rhs_local, b);
|
||||
|
||||
// number of iterations
|
||||
num_it = 0;
|
||||
|
||||
printf("num_it %d -- epsilon %lg -- nr_global %lg\n", num_it, epsilon, nr_global);
|
||||
|
||||
while ((nr_global > epsilon) && (num_it < max_it))
|
||||
{
|
||||
|
||||
// Compute the local vector Ap = A_local*p_global
|
||||
// => gather p_local vectors to p_global
|
||||
MPI_Allgather(p_local, b, MPI_DOUBLE, p_global, b, MPI_DOUBLE, MPI_COMM_WORLD);
|
||||
|
||||
// display p_global
|
||||
if (my_rank == 0)
|
||||
printf("p_global = %lg\n", p_global);
|
||||
|
||||
// do the matrix-vector multiplication
|
||||
multAv(Ap, A_local, p_global, b, N);
|
||||
|
||||
// compute the global dot product np2_global = (Ap_global, p_global)
|
||||
// all the node must have this value
|
||||
np2_local = dot(p_local, Ap, b);
|
||||
MPI_Allreduce(&np2_local, &np2_global, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
|
||||
if (my_rank == 0)
|
||||
printf("np2 = %lg\n", np2_global);
|
||||
|
||||
// alpha
|
||||
alpha = (nr_global * nr_global) / np2_global;
|
||||
// if(my_rank == 0) printf("alpha = %lg\n", alpha);
|
||||
|
||||
// compute the new x and r (local vectors)
|
||||
axpy(alpha, x, p_local, b);
|
||||
axpy(-alpha, r, Ap, b);
|
||||
|
||||
// compute the global norm of the residual (dot product, then sqrt);
|
||||
// all the nodes must have this value
|
||||
nr_local = dot(r, r, b);
|
||||
MPI_Allreduce(&nr_local, &nr_global, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
|
||||
nr_global = sqrt(nr_global);
|
||||
// if(my_rank == 0) printf("nr = %lg\n", nr_global);
|
||||
|
||||
// beta
|
||||
beta = (nr_global * nr_global) / (alpha * np2_global);
|
||||
// if(my_rank == 0) printf("beta = %lg\n", beta);
|
||||
|
||||
// compute the new p_local (local vector)
|
||||
xpay(beta, r, p_local, b);
|
||||
|
||||
// increase the number of iterations
|
||||
num_it++;
|
||||
|
||||
// if(my_rank == 0) printf("num_it %d -- nr_global %lg\n", num_it, nr_global);
|
||||
}
|
||||
|
||||
free(A_local);
|
||||
|
||||
// gather the solution on the node 0
|
||||
double x_global[N];
|
||||
MPI_Gather(x, b, MPI_DOUBLE, x_global, b, MPI_DOUBLE, 0, MPI_COMM_WORLD);
|
||||
|
||||
// display the solution
|
||||
if (my_rank == 0)
|
||||
{
|
||||
for (int i = 0; i < N; i++)
|
||||
{
|
||||
printf("x[%d] = %lg\n", i, x_global[i]);
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
1
TP1/05_CG/CG_par.h
Normal file
1
TP1/05_CG/CG_par.h
Normal file
|
@ -0,0 +1 @@
|
|||
void cg_par(double *A_local, double *rhs, int N, int b, float tol);
|
85
TP1/05_CG/CG_sq.c
Normal file
85
TP1/05_CG/CG_sq.c
Normal file
|
@ -0,0 +1,85 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "util.h"
|
||||
|
||||
void cg_sq(double *A, double *rhs, int N, double tol)
|
||||
{
|
||||
|
||||
int num_it, max_it;
|
||||
double x[N], p[N], r[N], Ap[N];
|
||||
double nr;
|
||||
double epsilon;
|
||||
double np2, alpha, beta;
|
||||
|
||||
max_it = 100;
|
||||
|
||||
// initialization of the solution
|
||||
for (int i = 0; i < N; i++)
|
||||
{
|
||||
// b[i] = (float) i;
|
||||
x[i] = 0.0;
|
||||
}
|
||||
|
||||
// compute the norm of the rhs (dot product, then sqrt)
|
||||
nr = dot(rhs, rhs, N);
|
||||
nr = sqrt(nr);
|
||||
printf("nr = %lg\n", nr);
|
||||
|
||||
// threshold of the CG
|
||||
epsilon = tol * nr;
|
||||
|
||||
// Initialization of p and r
|
||||
copy_v(p, rhs, N);
|
||||
copy_v(r, rhs, N);
|
||||
|
||||
// number of iterations
|
||||
num_it = 0;
|
||||
|
||||
printf("num_it %d -- epsilon %lg -- nr %lg\n", num_it, epsilon, nr);
|
||||
|
||||
while ((nr > epsilon) && (num_it < max_it))
|
||||
{
|
||||
|
||||
// Compute the vector Ap = A*p
|
||||
multAv(Ap, A, p, N, N);
|
||||
|
||||
// compute the dot product np2 = (Ap, p)
|
||||
np2 = dot(p, Ap, N);
|
||||
printf("np2 = %lg\n", np2);
|
||||
|
||||
// alpha
|
||||
alpha = (nr * nr) / np2;
|
||||
// printf("alpha = %lg\n", alpha);
|
||||
|
||||
// compute the new x and r
|
||||
axpy(alpha, x, p, N);
|
||||
axpy(-alpha, r, Ap, N);
|
||||
|
||||
// compute the norm of the residual (dot product, then sqrt)
|
||||
nr = dot(r, r, N);
|
||||
nr = sqrt(nr);
|
||||
// printf("nr = %lg\n", nr);
|
||||
|
||||
// beta
|
||||
beta = (nr * nr) / (alpha * np2);
|
||||
// printf("beta = %lg\n", beta);
|
||||
|
||||
// compute the new p
|
||||
xpay(beta, r, p, N);
|
||||
|
||||
// increase the number of iterations
|
||||
num_it++;
|
||||
|
||||
// printf("num_it %d -- nr %lg \n", num_it, nr);
|
||||
}
|
||||
|
||||
// display the solution
|
||||
for (int i = 0; i < N; i++)
|
||||
{
|
||||
printf("x[%d] = %lg\n", i, x[i]);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
1
TP1/05_CG/CG_sq.h
Normal file
1
TP1/05_CG/CG_sq.h
Normal file
|
@ -0,0 +1 @@
|
|||
void cg_sq(double *A, double *rhs, int N, double tol);
|
21
TP1/05_CG/Laplacien.mtx
Normal file
21
TP1/05_CG/Laplacien.mtx
Normal file
|
@ -0,0 +1,21 @@
|
|||
%%MatrixMarket matrix coordinate real symmetric
|
||||
%-------------------------------------------------------------------------------
|
||||
% UF Sparse Matrix Collection, Tim Davis
|
||||
% http://www.cise.ufl.edu/research/sparse/matrices/HB/nos3
|
||||
% name: HB/nos3
|
||||
% [SYMMETRIC MATRIX, FE APPROXIMATION TO BIHARMONIC OPERATOR ON PLATE]
|
||||
% id: 219
|
||||
% date: 1982
|
||||
% author: H. Simon
|
||||
% ed: I. Duff, R. Grimes, J. Lewis
|
||||
% fields: title A name id date author ed kind
|
||||
% kind: structural problem
|
||||
%-------------------------------------------------------------------------------
|
||||
4 4 7
|
||||
1 1 2.0
|
||||
1 2 -1.0
|
||||
2 2 2.0
|
||||
2 3 -1.0
|
||||
3 3 2.0
|
||||
3 4 -1.0
|
||||
4 4 2.0
|
18
TP1/05_CG/Makefile
Normal file
18
TP1/05_CG/Makefile
Normal file
|
@ -0,0 +1,18 @@
|
|||
CC=gcc
|
||||
MPICC=smpicc
|
||||
CFLAGS=-g -O4
|
||||
|
||||
all: CG_par CG_sq
|
||||
|
||||
clean:
|
||||
rm -rf *.o CG_par CG_sq
|
||||
|
||||
%.o: %.c
|
||||
echo $@
|
||||
$(MPICC) -c -Wall -o $@ $<
|
||||
|
||||
CG_par: util.o CG_par.o main_par.o
|
||||
$(MPICC) -o $@ $^ -lm
|
||||
|
||||
CG_sq: util.o CG_sq.o main_sq.o
|
||||
$(MPICC) -o $@ $^ -lm
|
98
TP1/05_CG/main_par.c
Normal file
98
TP1/05_CG/main_par.c
Normal file
|
@ -0,0 +1,98 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <mpi.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "util.h"
|
||||
#include "CG_par.h"
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
|
||||
int size;
|
||||
int my_rank;
|
||||
|
||||
FILE *f;
|
||||
int M, N, nz;
|
||||
|
||||
double *A = NULL;
|
||||
double *rhs;
|
||||
|
||||
double tol = 1e-6;
|
||||
|
||||
// Make sure that the command line has one argument (name of the matrix file)
|
||||
|
||||
if(argc != 2){
|
||||
printf("usage : CG_par <file>\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
//**************** MPI Initialization
|
||||
|
||||
MPI_Init(&argc, &argv);
|
||||
|
||||
// Get number of processes and check that 4 processes are used
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &size);
|
||||
|
||||
if(size != 4) {
|
||||
printf("This application is meant to be run with 4 MPI processes.\n");
|
||||
MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
|
||||
}
|
||||
|
||||
// Get my rank
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
|
||||
|
||||
//**************** READING OF THE MATRICE AND DISTRIBUTION OF THE BLOCS OF LINES TO EACH NODE
|
||||
|
||||
// You have the possibility to test with a small matrice ("Laplacien.txt")
|
||||
// or a larger one ("nos3.mtx")
|
||||
|
||||
f = fopen(argv[1], "r");
|
||||
|
||||
// All nodes get the sizes
|
||||
mm_read_mtx_crd_size(f, &M, &N, &nz);
|
||||
//printf("%d %d %d\n", M, N, nz);
|
||||
|
||||
// Reading the matrix by node 0
|
||||
if(my_rank == 0) {
|
||||
|
||||
A = (double *) malloc(M*N*sizeof(double));
|
||||
read_A(f, A, M, N, nz);
|
||||
|
||||
// increase diagonal to be sure to converge easily
|
||||
for (int i = 0; i < M; i++) {
|
||||
*(A+i*N+i) = *(A+i*N+i) + 10.0;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (f != stdin) fclose(f);
|
||||
|
||||
// DISTRIBUTION OF BLOCS => A_local(b, N)
|
||||
int b = M / size;
|
||||
double *A_local;
|
||||
|
||||
A_local = (double *) malloc(b*N*sizeof(double));
|
||||
MPI_Scatter(A, b*N, MPI_DOUBLE, A_local, b*N, MPI_DOUBLE, 0, MPI_COMM_WORLD);
|
||||
|
||||
if(my_rank == 0) free(A);
|
||||
|
||||
//**************** END OF THE READING OF THE MATRICE AND THE DISTRIBUTION OF THE BLOCS OF LINES TO EACH NODE
|
||||
|
||||
//**************** PARALLEL CG (M == N)
|
||||
|
||||
rhs = (double *) malloc(b*sizeof(double));
|
||||
|
||||
// initialization of the right hand side (local vector)
|
||||
for(int i = 0; i < b; i++){
|
||||
rhs[i] = (float) (b*my_rank + i);
|
||||
}
|
||||
|
||||
cg_par(A_local, rhs, N, b, tol);
|
||||
|
||||
//**************** END OF PARALLEL CG
|
||||
|
||||
MPI_Finalize();
|
||||
printf("The End\n");
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
79
TP1/05_CG/main_sq.c
Normal file
79
TP1/05_CG/main_sq.c
Normal file
|
@ -0,0 +1,79 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <mpi.h>
|
||||
|
||||
#include "util.h"
|
||||
#include "CG_sq.h"
|
||||
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
|
||||
int size;
|
||||
|
||||
FILE *f;
|
||||
int M, N, nz;
|
||||
|
||||
double *A = NULL;
|
||||
|
||||
double *rhs;
|
||||
|
||||
double tol = 1e-6;
|
||||
|
||||
// Make sure that the command line has one argument (name of the matrix file)
|
||||
|
||||
if(argc != 2){
|
||||
printf("usage : CG_sq <file>\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
MPI_Init(&argc, &argv);
|
||||
|
||||
// Get number of processes and check that only 1 process is used
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &size);
|
||||
|
||||
if(size != 1) {
|
||||
printf("This application is meant to be run with 1 MPI process.\n");
|
||||
MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
|
||||
}
|
||||
|
||||
|
||||
//**************** READING THE MATRICE
|
||||
|
||||
// You have the possibility to test with a small matrice ("Laplacien.txt")
|
||||
// or a larger one ("nos3.mtx")
|
||||
|
||||
f = fopen(argv[1], "r");
|
||||
|
||||
mm_read_mtx_crd_size(f, &M, &N, &nz);
|
||||
//printf("%d %d %d\n", M, N, nz);
|
||||
|
||||
A = (double *) malloc(M*N*sizeof(double));
|
||||
read_A(f, A, M, N, nz);
|
||||
|
||||
// increase diagonal to be sure to converge easily
|
||||
for (int i = 0; i < M; i++) {
|
||||
*(A+i*N+i) = *(A+i*N+i) + 10.0;
|
||||
}
|
||||
|
||||
if (f !=stdin) fclose(f);
|
||||
|
||||
//**************** END OF READING THE MATRICE
|
||||
|
||||
//**************** SEQUENTIAL CG (M == N)
|
||||
|
||||
rhs = (double *) malloc(N*sizeof(double));
|
||||
|
||||
// initialization of the right-hand side
|
||||
for(int i = 0; i < N; i++){
|
||||
rhs[i] = (float) i;
|
||||
}
|
||||
cg_sq(A, rhs, N, tol);
|
||||
|
||||
//**************** END OF SEQUENTIAL CG
|
||||
|
||||
MPI_Finalize();
|
||||
printf("The End\n");
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
8416
TP1/05_CG/nos3.mtx
Normal file
8416
TP1/05_CG/nos3.mtx
Normal file
File diff suppressed because it is too large
Load diff
114
TP1/05_CG/util.c
Normal file
114
TP1/05_CG/util.c
Normal file
|
@ -0,0 +1,114 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <ctype.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "util.h"
|
||||
|
||||
void multAv(double x[], double *A, double y[], int m, int n){
|
||||
|
||||
for(int i = 0; i < m; i++){
|
||||
x[i] = 0.0;
|
||||
for(int j = 0; j < n; j++){
|
||||
x[i] += A[i*n + j] * y[j];
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void copy_v(double x[], double y[], int n){
|
||||
|
||||
for(int i = 0; i < n; i++){
|
||||
x[i] = y[i];
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
double dot(double x[], double y[], int n){
|
||||
double res = 0.0;
|
||||
|
||||
for(int i = 0; i < n; i++){
|
||||
res += x[i]*y[i];
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
void axpy(double a, double x[], double y[], int n){
|
||||
|
||||
for(int i = 0; i < n; i++){
|
||||
x[i] = x[i] + a*y[i];
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void xpay(double a, double x[], double y[], int n){
|
||||
|
||||
for(int i = 0; i < n; i++){
|
||||
y[i] = x[i] + a*y[i];
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
int read_A(FILE *f, double *A, int M, int N, int nz){
|
||||
int i, j, k;
|
||||
double val;
|
||||
int error;
|
||||
|
||||
for (i = 0; i < M; i++) {
|
||||
for(j = 0; j < N; j++) {
|
||||
*(A+i*N+j) = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
for (k = 0; k < nz; k++) {
|
||||
error = fscanf(f, "%d %d %lg\n", &i, &j, &val);
|
||||
if(!error) exit(0);
|
||||
//printf("-- %d -- %d -- %lg\n", i, j, val);
|
||||
*(A + (i-1)*N + (j-1)) = val;
|
||||
// this is a symmetric matrix
|
||||
*(A + (j-1)*N + (i-1)) = val;
|
||||
}
|
||||
|
||||
/*
|
||||
for (k = 0; k < nz; k++) {
|
||||
printf("---- %lg\n", *(A+k));
|
||||
}
|
||||
*/
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mm_read_mtx_crd_size(FILE *f, int *M, int *N, int *nz )
|
||||
{
|
||||
char line[MM_MAX_LINE_LENGTH];
|
||||
int num_items_read;
|
||||
|
||||
/* set return null parameter values, in case we exit with errors */
|
||||
*M = *N = *nz = 0;
|
||||
|
||||
/* now continue scanning until you reach the end-of-comments */
|
||||
do
|
||||
{
|
||||
if (fgets(line,MM_MAX_LINE_LENGTH,f) == NULL)
|
||||
return MM_PREMATURE_EOF;
|
||||
}while (line[0] == '%');
|
||||
|
||||
/* line[] is either blank or has M,N, nz */
|
||||
if (sscanf(line, "%d %d %d", M, N, nz) == 3)
|
||||
return 0;
|
||||
|
||||
else
|
||||
do
|
||||
{
|
||||
num_items_read = fscanf(f, "%d %d %d", M, N, nz);
|
||||
if (num_items_read == EOF) return MM_PREMATURE_EOF;
|
||||
}
|
||||
while (num_items_read != 3);
|
||||
|
||||
return 0;
|
||||
}
|
21
TP1/05_CG/util.h
Normal file
21
TP1/05_CG/util.h
Normal file
|
@ -0,0 +1,21 @@
|
|||
#include <ctype.h>
|
||||
|
||||
#define MM_MAX_LINE_LENGTH 1025
|
||||
#define MatrixMarketBanner "%%MatrixMarket"
|
||||
#define MM_MAX_TOKEN_LENGTH 64
|
||||
#define MM_PREMATURE_EOF 12
|
||||
|
||||
void multAv(double x[], double *A, double y[], int m, int n);
|
||||
|
||||
void copy_v(double x[], double y[], int n);
|
||||
|
||||
double dot(double x[], double y[], int n);
|
||||
|
||||
void axpy(double a, double x[], double y[], int n);
|
||||
|
||||
void xpay(double a, double x[], double y[], int n);
|
||||
|
||||
int read_A(FILE *f, double *A, int M, int N, int nz);
|
||||
|
||||
int mm_read_mtx_crd_size(FILE *f, int *M, int *N, int *nz);
|
||||
|
6
TP1/init.sh
Executable file
6
TP1/init.sh
Executable file
|
@ -0,0 +1,6 @@
|
|||
#!/bin/bash
|
||||
SIMGRID=/mnt/n7fs/ens/tp_guivarch/opt2021/simgrid-3.31
|
||||
|
||||
export PATH=${SIMGRID}/bin:${PATH}
|
||||
|
||||
alias smpirun="smpirun -hostfile ${SIMGRID}/archis/cluster_hostfile.txt -platform ${SIMGRID}/archis/cluster_crossbar.xml"
|
BIN
TP1/tp_mpi.pdf
Normal file
BIN
TP1/tp_mpi.pdf
Normal file
Binary file not shown.
7
TP2/.vscode/settings.json
vendored
Normal file
7
TP2/.vscode/settings.json
vendored
Normal file
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"files.associations": {
|
||||
"*.html": "html",
|
||||
"*.toml": "toml",
|
||||
"*.bak": "c"
|
||||
}
|
||||
}
|
26
TP2/Makefile
Normal file
26
TP2/Makefile
Normal file
|
@ -0,0 +1,26 @@
|
|||
CC=gcc
|
||||
MPICC=smpicc
|
||||
LD=smpicc
|
||||
LDFLAGS=
|
||||
CFLAGS=-O4
|
||||
CLIBS=-lblas -llapack
|
||||
INCLUDES=
|
||||
SOURCEDIR=src
|
||||
BUILDDIR=build
|
||||
|
||||
all: dir main # test
|
||||
|
||||
test_env: dir who_am_i
|
||||
|
||||
dir:
|
||||
mkdir -p $(BUILDDIR)/bin
|
||||
|
||||
clean:
|
||||
rm -rf $(BUILDDIR)
|
||||
|
||||
%.o: $(SOURCEDIR)/%.c
|
||||
echo $@
|
||||
$(MPICC) -c -Wall -o $(BUILDDIR)/$@ $< $(CFLAGS) $(INCLUDES)
|
||||
|
||||
main: main.o gemms.o ex1.o ex2.o ex3.o utils.o dsmat.o
|
||||
$(LD) -o $(BUILDDIR)/bin/$@ $(addprefix $(BUILDDIR)/,$^) $(CLIBS) $(LDFLAGS)
|
1
TP2/README
Normal file
1
TP2/README
Normal file
|
@ -0,0 +1 @@
|
|||
https://laurent.fainsin.bzh/assets/CalcPar/
|
151
TP2/bench.csv
Normal file
151
TP2/bench.csv
Normal file
|
@ -0,0 +1,151 @@
|
|||
m,n,k,b,p,q,algo,lookahead,gflops
|
||||
1024,1024,1024,256,2,2,p2p,0,7.475035
|
||||
1024,1024,1024,256,2,2,p2p,0,7.475035
|
||||
1024,1024,1024,256,2,2,p2p,0,7.475036
|
||||
1024,1024,1024,256,2,2,p2p,0,7.475036
|
||||
1024,1024,1024,256,2,2,p2p,0,7.475036
|
||||
1024,1024,1024,256,2,2,bcast,0,7.471268
|
||||
1024,1024,1024,256,2,2,bcast,0,7.471269
|
||||
1024,1024,1024,256,2,2,bcast,0,7.471268
|
||||
1024,1024,1024,256,2,2,bcast,0,7.471268
|
||||
1024,1024,1024,256,2,2,bcast,0,7.471269
|
||||
1024,1024,1024,256,2,2,p2p-i-la,1,14.306685
|
||||
1024,1024,1024,256,2,2,p2p-i-la,1,14.306689
|
||||
1024,1024,1024,256,2,2,p2p-i-la,1,14.306691
|
||||
1024,1024,1024,256,2,2,p2p-i-la,1,14.306689
|
||||
1024,1024,1024,256,2,2,p2p-i-la,1,14.306691
|
||||
1024,1024,1024,256,2,2,p2p-i-la,2,9.856253
|
||||
1024,1024,1024,256,2,2,p2p-i-la,2,9.856253
|
||||
1024,1024,1024,256,2,2,p2p-i-la,2,9.856254
|
||||
1024,1024,1024,256,2,2,p2p-i-la,2,9.856254
|
||||
1024,1024,1024,256,2,2,p2p-i-la,2,9.856254
|
||||
1024,1024,1024,256,2,2,p2p-i-la,3,14.317787
|
||||
1024,1024,1024,256,2,2,p2p-i-la,3,14.317789
|
||||
1024,1024,1024,256,2,2,p2p-i-la,3,14.317793
|
||||
1024,1024,1024,256,2,2,p2p-i-la,3,14.317793
|
||||
1024,1024,1024,256,2,2,p2p-i-la,3,14.317793
|
||||
1024,1024,1024,256,2,2,p2p-i-la,4,14.317787
|
||||
1024,1024,1024,256,2,2,p2p-i-la,4,14.317787
|
||||
1024,1024,1024,256,2,2,p2p-i-la,4,14.317793
|
||||
1024,1024,1024,256,2,2,p2p-i-la,4,14.317793
|
||||
1024,1024,1024,256,2,2,p2p-i-la,4,14.317793
|
||||
2048,2048,2048,256,2,2,p2p,0,14.951931
|
||||
2048,2048,2048,256,2,2,p2p,0,14.951932
|
||||
2048,2048,2048,256,2,2,p2p,0,14.951932
|
||||
2048,2048,2048,256,2,2,p2p,0,14.951929
|
||||
2048,2048,2048,256,2,2,p2p,0,14.951932
|
||||
2048,2048,2048,256,2,2,bcast,0,14.950045
|
||||
2048,2048,2048,256,2,2,bcast,0,14.950048
|
||||
2048,2048,2048,256,2,2,bcast,0,14.950048
|
||||
2048,2048,2048,256,2,2,bcast,0,14.950046
|
||||
2048,2048,2048,256,2,2,bcast,0,14.950046
|
||||
2048,2048,2048,256,2,2,p2p-i-la,1,28.642430
|
||||
2048,2048,2048,256,2,2,p2p-i-la,1,28.642433
|
||||
2048,2048,2048,256,2,2,p2p-i-la,1,28.642433
|
||||
2048,2048,2048,256,2,2,p2p-i-la,1,28.642433
|
||||
2048,2048,2048,256,2,2,p2p-i-la,1,28.642436
|
||||
2048,2048,2048,256,2,2,p2p-i-la,2,23.366289
|
||||
2048,2048,2048,256,2,2,p2p-i-la,2,23.366289
|
||||
2048,2048,2048,256,2,2,p2p-i-la,2,23.366289
|
||||
2048,2048,2048,256,2,2,p2p-i-la,2,23.366289
|
||||
2048,2048,2048,256,2,2,p2p-i-la,2,23.366289
|
||||
2048,2048,2048,256,2,2,p2p-i-la,3,28.653563
|
||||
2048,2048,2048,256,2,2,p2p-i-la,3,28.653569
|
||||
2048,2048,2048,256,2,2,p2p-i-la,3,28.653569
|
||||
2048,2048,2048,256,2,2,p2p-i-la,3,28.653566
|
||||
2048,2048,2048,256,2,2,p2p-i-la,3,28.653569
|
||||
2048,2048,2048,256,2,2,p2p-i-la,4,23.369989
|
||||
2048,2048,2048,256,2,2,p2p-i-la,4,23.369989
|
||||
2048,2048,2048,256,2,2,p2p-i-la,4,23.369991
|
||||
2048,2048,2048,256,2,2,p2p-i-la,4,23.369991
|
||||
2048,2048,2048,256,2,2,p2p-i-la,4,23.369991
|
||||
2048,2048,2048,256,2,2,p2p-i-la,5,28.653569
|
||||
2048,2048,2048,256,2,2,p2p-i-la,5,28.653575
|
||||
2048,2048,2048,256,2,2,p2p-i-la,5,28.653575
|
||||
2048,2048,2048,256,2,2,p2p-i-la,5,28.653575
|
||||
2048,2048,2048,256,2,2,p2p-i-la,5,28.653575
|
||||
2048,2048,2048,256,2,2,p2p-i-la,6,23.369991
|
||||
2048,2048,2048,256,2,2,p2p-i-la,6,23.369991
|
||||
2048,2048,2048,256,2,2,p2p-i-la,6,23.369991
|
||||
2048,2048,2048,256,2,2,p2p-i-la,6,23.369991
|
||||
2048,2048,2048,256,2,2,p2p-i-la,6,23.369991
|
||||
2048,2048,2048,256,2,2,p2p-i-la,7,28.659105
|
||||
2048,2048,2048,256,2,2,p2p-i-la,7,28.659105
|
||||
2048,2048,2048,256,2,2,p2p-i-la,7,28.659105
|
||||
2048,2048,2048,256,2,2,p2p-i-la,7,28.659105
|
||||
2048,2048,2048,256,2,2,p2p-i-la,7,28.659105
|
||||
2048,2048,2048,256,2,2,p2p-i-la,8,28.659102
|
||||
2048,2048,2048,256,2,2,p2p-i-la,8,28.659105
|
||||
2048,2048,2048,256,2,2,p2p-i-la,8,28.659105
|
||||
2048,2048,2048,256,2,2,p2p-i-la,8,28.659105
|
||||
2048,2048,2048,256,2,2,p2p-i-la,8,28.659105
|
||||
3072,3072,3072,256,2,2,p2p,0,22.428405
|
||||
3072,3072,3072,256,2,2,p2p,0,22.428407
|
||||
3072,3072,3072,256,2,2,p2p,0,22.428407
|
||||
3072,3072,3072,256,2,2,p2p,0,22.428407
|
||||
3072,3072,3072,256,2,2,p2p,0,22.428407
|
||||
3072,3072,3072,256,2,2,bcast,0,22.427149
|
||||
3072,3072,3072,256,2,2,bcast,0,22.427149
|
||||
3072,3072,3072,256,2,2,bcast,0,22.427152
|
||||
3072,3072,3072,256,2,2,bcast,0,22.427149
|
||||
3072,3072,3072,256,2,2,bcast,0,22.427152
|
||||
3072,3072,3072,256,2,2,p2p-i-la,1,42.976658
|
||||
3072,3072,3072,256,2,2,p2p-i-la,1,42.976662
|
||||
3072,3072,3072,256,2,2,p2p-i-la,1,42.976658
|
||||
3072,3072,3072,256,2,2,p2p-i-la,1,42.976662
|
||||
3072,3072,3072,256,2,2,p2p-i-la,1,42.976662
|
||||
3072,3072,3072,256,2,2,p2p-i-la,2,33.027327
|
||||
3072,3072,3072,256,2,2,p2p-i-la,2,33.027327
|
||||
3072,3072,3072,256,2,2,p2p-i-la,2,33.027327
|
||||
3072,3072,3072,256,2,2,p2p-i-la,2,33.027330
|
||||
3072,3072,3072,256,2,2,p2p-i-la,2,33.027327
|
||||
3072,3072,3072,256,2,2,p2p-i-la,3,42.987825
|
||||
3072,3072,3072,256,2,2,p2p-i-la,3,42.987825
|
||||
3072,3072,3072,256,2,2,p2p-i-la,3,42.987829
|
||||
3072,3072,3072,256,2,2,p2p-i-la,3,42.987818
|
||||
3072,3072,3072,256,2,2,p2p-i-la,3,42.987822
|
||||
3072,3072,3072,256,2,2,p2p-i-la,4,37.356416
|
||||
3072,3072,3072,256,2,2,p2p-i-la,4,37.356414
|
||||
3072,3072,3072,256,2,2,p2p-i-la,4,37.356422
|
||||
3072,3072,3072,256,2,2,p2p-i-la,4,37.356416
|
||||
3072,3072,3072,256,2,2,p2p-i-la,4,37.356416
|
||||
3072,3072,3072,256,2,2,p2p-i-la,5,42.991522
|
||||
3072,3072,3072,256,2,2,p2p-i-la,5,42.991526
|
||||
3072,3072,3072,256,2,2,p2p-i-la,5,42.991526
|
||||
3072,3072,3072,256,2,2,p2p-i-la,5,42.991526
|
||||
3072,3072,3072,256,2,2,p2p-i-la,5,42.991522
|
||||
3072,3072,3072,256,2,2,p2p-i-la,6,37.359194
|
||||
3072,3072,3072,256,2,2,p2p-i-la,6,37.359194
|
||||
3072,3072,3072,256,2,2,p2p-i-la,6,37.359194
|
||||
3072,3072,3072,256,2,2,p2p-i-la,6,37.359194
|
||||
3072,3072,3072,256,2,2,p2p-i-la,6,37.359197
|
||||
3072,3072,3072,256,2,2,p2p-i-la,7,42.991526
|
||||
3072,3072,3072,256,2,2,p2p-i-la,7,42.991538
|
||||
3072,3072,3072,256,2,2,p2p-i-la,7,42.991534
|
||||
3072,3072,3072,256,2,2,p2p-i-la,7,42.991534
|
||||
3072,3072,3072,256,2,2,p2p-i-la,7,42.991534
|
||||
3072,3072,3072,256,2,2,p2p-i-la,8,37.359200
|
||||
3072,3072,3072,256,2,2,p2p-i-la,8,37.359202
|
||||
3072,3072,3072,256,2,2,p2p-i-la,8,37.359202
|
||||
3072,3072,3072,256,2,2,p2p-i-la,8,37.359205
|
||||
3072,3072,3072,256,2,2,p2p-i-la,8,37.359202
|
||||
3072,3072,3072,256,2,2,p2p-i-la,9,42.991549
|
||||
3072,3072,3072,256,2,2,p2p-i-la,9,42.991549
|
||||
3072,3072,3072,256,2,2,p2p-i-la,9,42.991549
|
||||
3072,3072,3072,256,2,2,p2p-i-la,9,42.991545
|
||||
3072,3072,3072,256,2,2,p2p-i-la,9,42.991545
|
||||
3072,3072,3072,256,2,2,p2p-i-la,10,37.359205
|
||||
3072,3072,3072,256,2,2,p2p-i-la,10,37.359202
|
||||
3072,3072,3072,256,2,2,p2p-i-la,10,37.359202
|
||||
3072,3072,3072,256,2,2,p2p-i-la,10,37.359214
|
||||
3072,3072,3072,256,2,2,p2p-i-la,10,37.359202
|
||||
3072,3072,3072,256,2,2,p2p-i-la,11,42.995159
|
||||
3072,3072,3072,256,2,2,p2p-i-la,11,42.995159
|
||||
3072,3072,3072,256,2,2,p2p-i-la,11,42.995144
|
||||
3072,3072,3072,256,2,2,p2p-i-la,11,42.995167
|
||||
3072,3072,3072,256,2,2,p2p-i-la,11,42.995152
|
||||
3072,3072,3072,256,2,2,p2p-i-la,12,42.995159
|
||||
3072,3072,3072,256,2,2,p2p-i-la,12,42.995159
|
||||
3072,3072,3072,256,2,2,p2p-i-la,12,42.995152
|
||||
3072,3072,3072,256,2,2,p2p-i-la,12,42.995171
|
||||
3072,3072,3072,256,2,2,p2p-i-la,12,42.995159
|
|
39
TP2/bench.sh
Executable file
39
TP2/bench.sh
Executable file
|
@ -0,0 +1,39 @@
|
|||
source utils.sh
|
||||
echo BENCHMARKING THE METHODS
|
||||
# you can modify these values
|
||||
p=2
|
||||
q=2
|
||||
P=$((p * q))
|
||||
#generate_hostfile $P
|
||||
|
||||
export OMP_NUM_THREADS=1
|
||||
export MKL_NUM_THREADS=1
|
||||
|
||||
# proper benchmark <--- this could be a TODO for students ? (as in, show weak scaling and/or strong scaling)
|
||||
#mpi_options="-hostfile hostfiles/hostfile.$P.txt"
|
||||
mpi_options="-platform platforms/cluster_crossbar.xml -hostfile hostfiles/cluster_hostfile.txt -np $P"
|
||||
b=256
|
||||
iter=5
|
||||
traces="bench_traces"
|
||||
out="bench_outputs"
|
||||
csv="bench.csv"
|
||||
echo m,n,k,b,p,q,algo,lookahead,gflops >$csv
|
||||
for i in 4 8 12; do
|
||||
|
||||
n=$((i * b))
|
||||
m=$n
|
||||
k=$n
|
||||
la=0
|
||||
options="-c"
|
||||
|
||||
for algo in p2p bcast; do
|
||||
run
|
||||
done
|
||||
|
||||
for la in $(seq 1 $((n / b))); do
|
||||
algo="p2p-i-la"
|
||||
options="-c -l $la"
|
||||
run
|
||||
done
|
||||
|
||||
done
|
16
TP2/check.csv
Normal file
16
TP2/check.csv
Normal file
|
@ -0,0 +1,16 @@
|
|||
m,n,k,b,p,q,algo,lookahead,gflops
|
||||
2,2,2,2,2,2,p2p,0,0.000172
|
||||
2,2,2,2,2,2,p2p,0,0.000172
|
||||
2,2,2,2,2,2,p2p,0,0.000172
|
||||
2,2,2,2,2,2,p2p,0,0.000172
|
||||
2,2,2,2,2,2,p2p,0,0.000172
|
||||
2,2,2,2,2,2,bcast,0,0.000075
|
||||
2,2,2,2,2,2,bcast,0,0.000075
|
||||
2,2,2,2,2,2,bcast,0,0.000075
|
||||
2,2,2,2,2,2,bcast,0,0.000075
|
||||
2,2,2,2,2,2,bcast,0,0.000075
|
||||
2,2,2,2,2,2,p2p-i-la,1,0.000223
|
||||
2,2,2,2,2,2,p2p-i-la,1,0.000223
|
||||
2,2,2,2,2,2,p2p-i-la,1,0.000223
|
||||
2,2,2,2,2,2,p2p-i-la,1,0.000223
|
||||
2,2,2,2,2,2,p2p-i-la,1,0.000223
|
|
39
TP2/check.sh
Executable file
39
TP2/check.sh
Executable file
|
@ -0,0 +1,39 @@
|
|||
source utils.sh
|
||||
echo BENCHMARKING THE METHODS
|
||||
# you can modify these values
|
||||
p=2
|
||||
q=2
|
||||
P=$((p * q))
|
||||
#generate_hostfile $P
|
||||
|
||||
export OMP_NUM_THREADS=1
|
||||
export MKL_NUM_THREADS=1
|
||||
|
||||
# proper benchmark <--- this could be a TODO for students ? (as in, show weak scaling and/or strong scaling)
|
||||
#mpi_options="-hostfile hostfiles/hostfile.$P.txt"
|
||||
mpi_options="-platform platforms/cluster_crossbar.xml -hostfile hostfiles/cluster_hostfile.txt -np 4"
|
||||
b=2
|
||||
iter=5
|
||||
traces="check_traces"
|
||||
out="check_outputs"
|
||||
csv="check.csv"
|
||||
echo m,n,k,b,p,q,algo,lookahead,gflops >$csv
|
||||
for i in 1; do
|
||||
|
||||
n=$((i * b))
|
||||
m=$n
|
||||
k=$n
|
||||
la=0
|
||||
options="-c"
|
||||
|
||||
for algo in p2p bcast; do
|
||||
run
|
||||
done
|
||||
|
||||
for la in $(seq 1 $((n / b))); do
|
||||
algo="p2p-i-la"
|
||||
options="-c -l $la"
|
||||
run
|
||||
done
|
||||
|
||||
done
|
256
TP2/hostfiles/cluster_hostfile.txt
Normal file
256
TP2/hostfiles/cluster_hostfile.txt
Normal file
|
@ -0,0 +1,256 @@
|
|||
host-0.hawaii.edu
|
||||
host-1.hawaii.edu
|
||||
host-2.hawaii.edu
|
||||
host-3.hawaii.edu
|
||||
host-4.hawaii.edu
|
||||
host-5.hawaii.edu
|
||||
host-6.hawaii.edu
|
||||
host-7.hawaii.edu
|
||||
host-8.hawaii.edu
|
||||
host-9.hawaii.edu
|
||||
host-10.hawaii.edu
|
||||
host-11.hawaii.edu
|
||||
host-12.hawaii.edu
|
||||
host-13.hawaii.edu
|
||||
host-14.hawaii.edu
|
||||
host-15.hawaii.edu
|
||||
host-16.hawaii.edu
|
||||
host-17.hawaii.edu
|
||||
host-18.hawaii.edu
|
||||
host-19.hawaii.edu
|
||||
host-20.hawaii.edu
|
||||
host-21.hawaii.edu
|
||||
host-22.hawaii.edu
|
||||
host-23.hawaii.edu
|
||||
host-24.hawaii.edu
|
||||
host-25.hawaii.edu
|
||||
host-26.hawaii.edu
|
||||
host-27.hawaii.edu
|
||||
host-28.hawaii.edu
|
||||
host-29.hawaii.edu
|
||||
host-30.hawaii.edu
|
||||
host-31.hawaii.edu
|
||||
host-32.hawaii.edu
|
||||
host-33.hawaii.edu
|
||||
host-34.hawaii.edu
|
||||
host-35.hawaii.edu
|
||||
host-36.hawaii.edu
|
||||
host-37.hawaii.edu
|
||||
host-38.hawaii.edu
|
||||
host-39.hawaii.edu
|
||||
host-40.hawaii.edu
|
||||
host-41.hawaii.edu
|
||||
host-42.hawaii.edu
|
||||
host-43.hawaii.edu
|
||||
host-44.hawaii.edu
|
||||
host-45.hawaii.edu
|
||||
host-46.hawaii.edu
|
||||
host-47.hawaii.edu
|
||||
host-48.hawaii.edu
|
||||
host-49.hawaii.edu
|
||||
host-50.hawaii.edu
|
||||
host-51.hawaii.edu
|
||||
host-52.hawaii.edu
|
||||
host-53.hawaii.edu
|
||||
host-54.hawaii.edu
|
||||
host-55.hawaii.edu
|
||||
host-56.hawaii.edu
|
||||
host-57.hawaii.edu
|
||||
host-58.hawaii.edu
|
||||
host-59.hawaii.edu
|
||||
host-60.hawaii.edu
|
||||
host-61.hawaii.edu
|
||||
host-62.hawaii.edu
|
||||
host-63.hawaii.edu
|
||||
host-64.hawaii.edu
|
||||
host-65.hawaii.edu
|
||||
host-66.hawaii.edu
|
||||
host-67.hawaii.edu
|
||||
host-68.hawaii.edu
|
||||
host-69.hawaii.edu
|
||||
host-70.hawaii.edu
|
||||
host-71.hawaii.edu
|
||||
host-72.hawaii.edu
|
||||
host-73.hawaii.edu
|
||||
host-74.hawaii.edu
|
||||
host-75.hawaii.edu
|
||||
host-76.hawaii.edu
|
||||
host-77.hawaii.edu
|
||||
host-78.hawaii.edu
|
||||
host-79.hawaii.edu
|
||||
host-80.hawaii.edu
|
||||
host-81.hawaii.edu
|
||||
host-82.hawaii.edu
|
||||
host-83.hawaii.edu
|
||||
host-84.hawaii.edu
|
||||
host-85.hawaii.edu
|
||||
host-86.hawaii.edu
|
||||
host-87.hawaii.edu
|
||||
host-88.hawaii.edu
|
||||
host-89.hawaii.edu
|
||||
host-90.hawaii.edu
|
||||
host-91.hawaii.edu
|
||||
host-92.hawaii.edu
|
||||
host-93.hawaii.edu
|
||||
host-94.hawaii.edu
|
||||
host-95.hawaii.edu
|
||||
host-96.hawaii.edu
|
||||
host-97.hawaii.edu
|
||||
host-98.hawaii.edu
|
||||
host-99.hawaii.edu
|
||||
host-100.hawaii.edu
|
||||
host-101.hawaii.edu
|
||||
host-102.hawaii.edu
|
||||
host-103.hawaii.edu
|
||||
host-104.hawaii.edu
|
||||
host-105.hawaii.edu
|
||||
host-106.hawaii.edu
|
||||
host-107.hawaii.edu
|
||||
host-108.hawaii.edu
|
||||
host-109.hawaii.edu
|
||||
host-110.hawaii.edu
|
||||
host-111.hawaii.edu
|
||||
host-112.hawaii.edu
|
||||
host-113.hawaii.edu
|
||||
host-114.hawaii.edu
|
||||
host-115.hawaii.edu
|
||||
host-116.hawaii.edu
|
||||
host-117.hawaii.edu
|
||||
host-118.hawaii.edu
|
||||
host-119.hawaii.edu
|
||||
host-120.hawaii.edu
|
||||
host-121.hawaii.edu
|
||||
host-122.hawaii.edu
|
||||
host-123.hawaii.edu
|
||||
host-124.hawaii.edu
|
||||
host-125.hawaii.edu
|
||||
host-126.hawaii.edu
|
||||
host-127.hawaii.edu
|
||||
host-128.hawaii.edu
|
||||
host-129.hawaii.edu
|
||||
host-130.hawaii.edu
|
||||
host-131.hawaii.edu
|
||||
host-132.hawaii.edu
|
||||
host-133.hawaii.edu
|
||||
host-134.hawaii.edu
|
||||
host-135.hawaii.edu
|
||||
host-136.hawaii.edu
|
||||
host-137.hawaii.edu
|
||||
host-138.hawaii.edu
|
||||
host-139.hawaii.edu
|
||||
host-140.hawaii.edu
|
||||
host-141.hawaii.edu
|
||||
host-142.hawaii.edu
|
||||
host-143.hawaii.edu
|
||||
host-144.hawaii.edu
|
||||
host-145.hawaii.edu
|
||||
host-146.hawaii.edu
|
||||
host-147.hawaii.edu
|
||||
host-148.hawaii.edu
|
||||
host-149.hawaii.edu
|
||||
host-150.hawaii.edu
|
||||
host-151.hawaii.edu
|
||||
host-152.hawaii.edu
|
||||
host-153.hawaii.edu
|
||||
host-154.hawaii.edu
|
||||
host-155.hawaii.edu
|
||||
host-156.hawaii.edu
|
||||
host-157.hawaii.edu
|
||||
host-158.hawaii.edu
|
||||
host-159.hawaii.edu
|
||||
host-160.hawaii.edu
|
||||
host-161.hawaii.edu
|
||||
host-162.hawaii.edu
|
||||
host-163.hawaii.edu
|
||||
host-164.hawaii.edu
|
||||
host-165.hawaii.edu
|
||||
host-166.hawaii.edu
|
||||
host-167.hawaii.edu
|
||||
host-168.hawaii.edu
|
||||
host-169.hawaii.edu
|
||||
host-170.hawaii.edu
|
||||
host-171.hawaii.edu
|
||||
host-172.hawaii.edu
|
||||
host-173.hawaii.edu
|
||||
host-174.hawaii.edu
|
||||
host-175.hawaii.edu
|
||||
host-176.hawaii.edu
|
||||
host-177.hawaii.edu
|
||||
host-178.hawaii.edu
|
||||
host-179.hawaii.edu
|
||||
host-180.hawaii.edu
|
||||
host-181.hawaii.edu
|
||||
host-182.hawaii.edu
|
||||
host-183.hawaii.edu
|
||||
host-184.hawaii.edu
|
||||
host-185.hawaii.edu
|
||||
host-186.hawaii.edu
|
||||
host-187.hawaii.edu
|
||||
host-188.hawaii.edu
|
||||
host-189.hawaii.edu
|
||||
host-190.hawaii.edu
|
||||
host-191.hawaii.edu
|
||||
host-192.hawaii.edu
|
||||
host-193.hawaii.edu
|
||||
host-194.hawaii.edu
|
||||
host-195.hawaii.edu
|
||||
host-196.hawaii.edu
|
||||
host-197.hawaii.edu
|
||||
host-198.hawaii.edu
|
||||
host-199.hawaii.edu
|
||||
host-200.hawaii.edu
|
||||
host-201.hawaii.edu
|
||||
host-202.hawaii.edu
|
||||
host-203.hawaii.edu
|
||||
host-204.hawaii.edu
|
||||
host-205.hawaii.edu
|
||||
host-206.hawaii.edu
|
||||
host-207.hawaii.edu
|
||||
host-208.hawaii.edu
|
||||
host-209.hawaii.edu
|
||||
host-210.hawaii.edu
|
||||
host-211.hawaii.edu
|
||||
host-212.hawaii.edu
|
||||
host-213.hawaii.edu
|
||||
host-214.hawaii.edu
|
||||
host-215.hawaii.edu
|
||||
host-216.hawaii.edu
|
||||
host-217.hawaii.edu
|
||||
host-218.hawaii.edu
|
||||
host-219.hawaii.edu
|
||||
host-220.hawaii.edu
|
||||
host-221.hawaii.edu
|
||||
host-222.hawaii.edu
|
||||
host-223.hawaii.edu
|
||||
host-224.hawaii.edu
|
||||
host-225.hawaii.edu
|
||||
host-226.hawaii.edu
|
||||
host-227.hawaii.edu
|
||||
host-228.hawaii.edu
|
||||
host-229.hawaii.edu
|
||||
host-230.hawaii.edu
|
||||
host-231.hawaii.edu
|
||||
host-232.hawaii.edu
|
||||
host-233.hawaii.edu
|
||||
host-234.hawaii.edu
|
||||
host-235.hawaii.edu
|
||||
host-236.hawaii.edu
|
||||
host-237.hawaii.edu
|
||||
host-238.hawaii.edu
|
||||
host-239.hawaii.edu
|
||||
host-240.hawaii.edu
|
||||
host-241.hawaii.edu
|
||||
host-242.hawaii.edu
|
||||
host-243.hawaii.edu
|
||||
host-244.hawaii.edu
|
||||
host-245.hawaii.edu
|
||||
host-246.hawaii.edu
|
||||
host-247.hawaii.edu
|
||||
host-248.hawaii.edu
|
||||
host-249.hawaii.edu
|
||||
host-250.hawaii.edu
|
||||
host-251.hawaii.edu
|
||||
host-252.hawaii.edu
|
||||
host-253.hawaii.edu
|
||||
host-254.hawaii.edu
|
||||
host-255.hawaii.edu
|
16
TP2/hostfiles/hostfile.txt
Normal file
16
TP2/hostfiles/hostfile.txt
Normal file
|
@ -0,0 +1,16 @@
|
|||
node-0.simgrid.org
|
||||
node-1.simgrid.org
|
||||
node-2.simgrid.org
|
||||
node-3.simgrid.org
|
||||
node-4.simgrid.org
|
||||
node-5.simgrid.org
|
||||
node-6.simgrid.org
|
||||
node-7.simgrid.org
|
||||
node-8.simgrid.org
|
||||
node-9.simgrid.org
|
||||
node-10.simgrid.org
|
||||
node-11.simgrid.org
|
||||
node-12.simgrid.org
|
||||
node-13.simgrid.org
|
||||
node-14.simgrid.org
|
||||
node-15.simgrid.org
|
4
TP2/init.sh
Normal file
4
TP2/init.sh
Normal file
|
@ -0,0 +1,4 @@
|
|||
#!/bin/bash
|
||||
SIMGRID=/mnt/n7fs/ens/tp_guivarch/opt2021/simgrid-3.31
|
||||
|
||||
export PATH=${SIMGRID}/bin:${PATH}
|
117
TP2/log.txt
Normal file
117
TP2/log.txt
Normal file
|
@ -0,0 +1,117 @@
|
|||
File smpi_simgrid.trace
|
||||
|
||||
Errors :
|
||||
150 : Unknown container: 0
|
||||
153 : Unknown container: 0
|
||||
156 : Unknown container: 0
|
||||
165 : Unknown container: 0
|
||||
168 : Unknown container: 0
|
||||
171 : Unknown container: 0
|
||||
185 : Unknown container: 0
|
||||
191 : Unknown container: 0
|
||||
199 : Unknown container: 0
|
||||
205 : Unknown container: 0
|
||||
207 : Unknown container: 0
|
||||
213 : Unknown container: 0
|
||||
216 : Unknown container: 0
|
||||
221 : Unknown container: 0
|
||||
223 : Unknown container: 0
|
||||
231 : Unknown container: 0
|
||||
236 : Unknown container: 0
|
||||
243 : Unknown container: 0
|
||||
275 : Unknown container: 0
|
||||
283 : Unknown container: 0
|
||||
285 : Unknown container: 0
|
||||
287 : Unknown container: 0
|
||||
294 : Unknown container: 0
|
||||
303 : Unknown container: 0
|
||||
362 : Unknown container: 0
|
||||
364 : Unknown container: 0
|
||||
366 : Unknown container: 0
|
||||
371 : Unknown container: 0
|
||||
373 : Unknown container: 0
|
||||
375 : Unknown container: 0
|
||||
380 : Unknown container: 0
|
||||
382 : Unknown container: 0
|
||||
384 : Unknown container: 0
|
||||
389 : Unknown container: 0
|
||||
391 : Unknown container: 0
|
||||
393 : Unknown container: 0
|
||||
398 : Unknown container: 0
|
||||
400 : Unknown container: 0
|
||||
402 : Unknown container: 0
|
||||
407 : Unknown container: 0
|
||||
409 : Unknown container: 0
|
||||
411 : Unknown container: 0
|
||||
416 : Unknown container: 0
|
||||
418 : Unknown container: 0
|
||||
420 : Unknown container: 0
|
||||
425 : Unknown container: 0
|
||||
427 : Unknown container: 0
|
||||
429 : Unknown container: 0
|
||||
434 : Unknown container: 0
|
||||
436 : Unknown container: 0
|
||||
438 : Unknown container: 0
|
||||
443 : Unknown container: 0
|
||||
445 : Unknown container: 0
|
||||
447 : Unknown container: 0
|
||||
570 : Unknown container: 0
|
||||
573 : Unknown container: 0
|
||||
576 : Unknown container: 0
|
||||
585 : Unknown container: 0
|
||||
588 : Unknown container: 0
|
||||
591 : Unknown container: 0
|
||||
604 : Unknown container: 0
|
||||
612 : Unknown container: 0
|
||||
619 : Unknown container: 0
|
||||
625 : Unknown container: 0
|
||||
627 : Unknown container: 0
|
||||
633 : Unknown container: 0
|
||||
635 : Unknown container: 0
|
||||
641 : Unknown container: 0
|
||||
643 : Unknown container: 0
|
||||
650 : Unknown container: 0
|
||||
656 : Unknown container: 0
|
||||
663 : Unknown container: 0
|
||||
695 : Unknown container: 0
|
||||
703 : Unknown container: 0
|
||||
705 : Unknown container: 0
|
||||
707 : Unknown container: 0
|
||||
713 : Unknown container: 0
|
||||
723 : Unknown container: 0
|
||||
782 : Unknown container: 0
|
||||
784 : Unknown container: 0
|
||||
786 : Unknown container: 0
|
||||
791 : Unknown container: 0
|
||||
793 : Unknown container: 0
|
||||
795 : Unknown container: 0
|
||||
800 : Unknown container: 0
|
||||
802 : Unknown container: 0
|
||||
804 : Unknown container: 0
|
||||
809 : Unknown container: 0
|
||||
811 : Unknown container: 0
|
||||
813 : Unknown container: 0
|
||||
818 : Unknown container: 0
|
||||
820 : Unknown container: 0
|
||||
822 : Unknown container: 0
|
||||
827 : Unknown container: 0
|
||||
829 : Unknown container: 0
|
||||
831 : Unknown container: 0
|
||||
836 : Unknown container: 0
|
||||
838 : Unknown container: 0
|
||||
840 : Unknown container: 0
|
||||
845 : Unknown container: 0
|
||||
847 : Unknown container: 0
|
||||
849 : Unknown container: 0
|
||||
854 : Unknown container: 0
|
||||
856 : Unknown container: 0
|
||||
858 : Unknown container: 0
|
||||
863 : Unknown container: 0
|
||||
865 : Unknown container: 0
|
||||
867 : Unknown container: 0
|
||||
|
||||
Warnings :
|
||||
1 : the definition is not identified
|
||||
2 : the definition is not identified
|
||||
|
||||
Your trace has 108 errors and 2 warnings.
|
7
TP2/platforms/cluster_crossbar.xml
Normal file
7
TP2/platforms/cluster_crossbar.xml
Normal file
|
@ -0,0 +1,7 @@
|
|||
<?xml version='1.0'?>
|
||||
<!DOCTYPE platform SYSTEM "http://simgrid.gforge.inria.fr/simgrid/simgrid.dtd">
|
||||
<platform version="4.1">
|
||||
<zone id="AS0" routing="Full">
|
||||
<cluster id="my_cluster" prefix="host-" suffix=".hawaii.edu" radical="0-255" speed="1Gf" bw="125Mbps" lat="5us"/>
|
||||
</zone>
|
||||
</platform>
|
17
TP2/platforms/cluster_fat_tree.xml
Normal file
17
TP2/platforms/cluster_fat_tree.xml
Normal file
|
@ -0,0 +1,17 @@
|
|||
<?xml version='1.0'?>
|
||||
<!DOCTYPE platform SYSTEM "https://simgrid.org/simgrid.dtd">
|
||||
<platform version="4.1">
|
||||
<!-- This is an example for a fat tree cluster.
|
||||
This is taken from figure 1(b) of the paper "D-Mod-K Routing Providing on-Blocking Traffic for Shift Permutations on
|
||||
Real Life Fat Trees" available at https://ece.technion.ac.il/wp-content/uploads/2021/01/publication_776.pdf
|
||||
This defines a two levels fat-tree, with 4 leaf switches connected to 4 nodes each and 2 core switches connected to
|
||||
each leaf switch by two cables -->
|
||||
|
||||
<zone id="world" routing="Full">
|
||||
<cluster id="bob_cluster"
|
||||
prefix="node-" radical="0-15" suffix=".simgrid.org"
|
||||
speed="1Gf" bw="125MBps" lat="50us"
|
||||
topology="FAT_TREE" topo_parameters="2;4,4;1,2;1,2"
|
||||
loopback_bw="100MBps" loopback_lat="0" />
|
||||
</zone>
|
||||
</platform>
|
17
TP2/platforms/default.xml
Normal file
17
TP2/platforms/default.xml
Normal file
|
@ -0,0 +1,17 @@
|
|||
<?xml version='1.0'?>
|
||||
<!DOCTYPE platform SYSTEM "https://simgrid.org/simgrid.dtd">
|
||||
<platform version="4.1">
|
||||
<!-- This is an example for a fat tree cluster.
|
||||
This is taken from figure 1(b) of the paper "D-Mod-K Routing Providing on-Blocking Traffic for Shift Permutations on
|
||||
Real Life Fat Trees" available at https://ece.technion.ac.il/wp-content/uploads/2021/01/publication_776.pdf
|
||||
This defines a two levels fat-tree, with 4 leaf switches connected to 4 nodes each and 2 core switches connected to
|
||||
each leaf switch by two cables -->
|
||||
|
||||
<zone id="world" routing="Full">
|
||||
<cluster id="bob_cluster"
|
||||
prefix="node-" radical="0-15" suffix=".simgrid.org"
|
||||
speed="1Gf" bw="125MBps" lat="50us"
|
||||
topology="FAT_TREE" topo_parameters="2;4,4;1,2;1,2"
|
||||
loopback_bw="100MBps" loopback_lat="0" />
|
||||
</zone>
|
||||
</platform>
|
277
TP2/platforms/simgrid_update_xml.pl
Executable file
277
TP2/platforms/simgrid_update_xml.pl
Executable file
|
@ -0,0 +1,277 @@
|
|||
#! /usr/bin/env perl
|
||||
eval 'exec perl -S $0 ${1+"$@"}'
|
||||
if $running_under_some_shell;
|
||||
|
||||
# This script updates the simgrid XML file passed as argument (modification in place)
|
||||
# It is built to do the conversion incrementally.
|
||||
|
||||
# Copyright (c) 2006-2022. The SimGrid Team.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify it
|
||||
# under the terms of the license (GNU LGPL) which comes with this package.
|
||||
|
||||
=encoding UTF-8
|
||||
|
||||
=head1 NAME
|
||||
|
||||
simgrid_update_xml - updates simgrid XML files to latest version
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
B<simgrid_update_xml> I<xml_file>
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
simgrid_update_xml updates the simgrid XML file passed as argument. The file
|
||||
is modified in place, without any kind of backup. You may want to save a copy
|
||||
before running the script.
|
||||
|
||||
In SimGrid XML files, the standard version is indicated in the version
|
||||
attribute of the platform tag. Current version is 4. Here is a list of major
|
||||
changes in each version.
|
||||
|
||||
=over 4
|
||||
|
||||
=item B<Version 0:> Used before SimGrid 3.3
|
||||
|
||||
=item B<Version 1:> Introduced in SimGrid 3.3
|
||||
|
||||
=over 4
|
||||
|
||||
=item
|
||||
|
||||
The version attribute of platform were added to allow file versioning.
|
||||
|
||||
=item
|
||||
|
||||
The link bandwidth changed from Mb/s to b/s; and the CPU power were changed
|
||||
from MFlop/s to Flop/s
|
||||
|
||||
=back
|
||||
|
||||
=item B<Version 2:> Introduced in SimGrid 3.4
|
||||
|
||||
=over
|
||||
|
||||
=item
|
||||
|
||||
Several tags were renamed:
|
||||
|
||||
CPU -> HOST
|
||||
NETWORK_LINK -> LINK
|
||||
ROUTE_ELEMENT -> LINK_CTN
|
||||
PLATFORM_DESCRIPTION -> PLATFORM
|
||||
|
||||
=back
|
||||
|
||||
=item B<Version 3:> Introduced in SimGrid 3.5
|
||||
|
||||
=over 4
|
||||
|
||||
=item
|
||||
|
||||
The AS tag were introduced. Every platform should now contain an englobing AS
|
||||
tag.
|
||||
|
||||
=item
|
||||
|
||||
Routes are now symmetric by default.
|
||||
|
||||
=item
|
||||
|
||||
Several tags were renamed (for sake of XML sanity):
|
||||
|
||||
LINK:CTN -> LINK_CTN
|
||||
TRACE:CONNECT -> TRACE_CONNECT
|
||||
|
||||
=back
|
||||
|
||||
=item B<Version 4:> Introduced in SimGrid 3.13
|
||||
|
||||
=over 4
|
||||
|
||||
=item
|
||||
|
||||
Rename the attributes describing the amount of flop that a host / peer / cluster / cabinet can deliver per second.
|
||||
|
||||
<host power=...> -> <host speed=...>
|
||||
|
||||
=item
|
||||
|
||||
In <trace_connect>, attribute kind="POWER" is now kind="SPEED".
|
||||
|
||||
=item
|
||||
|
||||
The DOCTYPE points to the right URL.
|
||||
|
||||
=item
|
||||
|
||||
Units are now mandatory in attributes. USE THE SCRIPT sg_xml_unit_converter.py TO CONVERT THIS
|
||||
|
||||
- speed. Old default: 'f' or 'flops'. Also defined:
|
||||
'Yf', 'Zf', 'Ef', 'Pf', 'Tf', 'Gf', 'Mf', 'kf'
|
||||
'yottaflops', 'zettaflops', 'exaflops', 'petaflops', 'teraflops', 'gigaflops', 'megaflops', 'kiloflops'
|
||||
|
||||
- bandwidth. Old default: 'Bps' bytes per second (or 'bps' but 1 Bps = 8 bps)
|
||||
Also defined in bytes: 'TiBps', 'GiBps', 'MiBps', 'KiBps', 'TBps', 'GBps', 'MBps', 'kBps', 'Bps'
|
||||
And the same in bits: 'Tibps', 'Gibps', 'Mibps', 'Kibps', 'Tbps', 'Gbps', 'Mbps', 'kbps', 'bps'
|
||||
|
||||
- latency. Old default: 's' second. Also defined:
|
||||
'w' week, 'd' day, 'h' hour, 'm' minute, 'ms' millisecond, 'us' microsecond, 'ns' nanosecond, 'ps' picosecond
|
||||
|
||||
|
||||
=back
|
||||
|
||||
=item B<Version 4.1:> Introduced in SimGrid 3.16 (this is the current version).
|
||||
|
||||
=over 4
|
||||
|
||||
=item
|
||||
|
||||
Rename a few tags, but in a backward-compatible manner: the old names are still accepted.
|
||||
|
||||
AS -> zone
|
||||
ASroute -> zoneRoute
|
||||
bypassAsRoute -> bypassZoneRoute
|
||||
process -> actor
|
||||
|
||||
=back
|
||||
|
||||
=item Other backward-compatible changes (old syntax is still accepted) for which we did not bump the DTD version:
|
||||
|
||||
=over 4
|
||||
|
||||
=item
|
||||
|
||||
Rename the FULLDUPLEX sharing into SPLITDUPLEX.
|
||||
|
||||
=item
|
||||
|
||||
In <host> and <peer>, rename the 'availability_file' attribute into 'speed_file'.
|
||||
|
||||
=back
|
||||
|
||||
=back
|
||||
|
||||
=head1 AUTHORS
|
||||
|
||||
The SimGrid team
|
||||
|
||||
=head1 COPYRIGHT AND LICENSE
|
||||
|
||||
Copyright (c) 2006-2022. The SimGrid Team. All rights reserved.
|
||||
|
||||
This program is free software; you may redistribute it and/or modify it
|
||||
under the terms of GNU LGPL (v2.1) license.
|
||||
|
||||
=cut
|
||||
|
||||
|
||||
use strict;
|
||||
|
||||
my $fromversion=-1;
|
||||
my $toversion=4.1;
|
||||
|
||||
my $filename = $ARGV[0] or die "Usage: simgrid_update_xml.pl file_to_convert.xml\nPlease provide an XML to convert as a parameter.\n";
|
||||
open INPUT, "$filename" or die "Cannot open input file $filename: $!\n";
|
||||
|
||||
my $output_string = "<?xml version='1.0'?>\n".
|
||||
"<!DOCTYPE platform SYSTEM \"https://simgrid.org/simgrid.dtd\">\n".
|
||||
"<platform version=\"$toversion\">\n";
|
||||
|
||||
my($AS_opened)=0;
|
||||
|
||||
my $line;
|
||||
while (defined($line = <INPUT>)) {
|
||||
chomp $line;
|
||||
# eat the header, whatever form it has
|
||||
next if ($line =~ s/<\?xml[^>]*>// && ! $line =~ /\S/); # just in case several tags are on the same line
|
||||
next if ($line =~ s/<!DOCTYPE[^>]*>// && ! $line =~ /\S/);
|
||||
|
||||
if ($line =~ s/<platform(_description)? *>//) {
|
||||
$fromversion = 0;
|
||||
print "$filename was using version 0\n";
|
||||
next if !$line =~ /\S/;
|
||||
} elsif ($line =~ s/<platform.*version=["']*([0-9.]*)["']*>//) {
|
||||
$fromversion = $1;
|
||||
if ($fromversion == $toversion) {
|
||||
warn "Input platform file $filename is already conformant to version $fromversion. This should be a no-op.\n";
|
||||
}
|
||||
if ($fromversion > $toversion) {
|
||||
die "Input platform file $filename is more recent than this script (file version: $fromversion; script version: $toversion)\n";
|
||||
}
|
||||
next if !$line =~ /\S/;
|
||||
print "$filename was using version $fromversion\n";
|
||||
}
|
||||
|
||||
if ($fromversion == 0) {
|
||||
while ($line =~ m|^(.*?)<cpu(.*?)power="([^"]*)"(.*)$|) {
|
||||
$line = "$1TOTOTUTUTATA${2}TOTOTUTUTATA".($3*1000000)."TOTOTUTUTATA${4}";
|
||||
}
|
||||
while ($line =~ /^(.*?)TOTOTUTUTATA(.*?)TOTOTUTUTATA(.*?)TOTOTUTUTATA(.*)$/) {
|
||||
$line = "$1<cpu${2}power=\"$3\"$4";
|
||||
}
|
||||
while ($line =~ m|^(.*?)<network_link(.*?)bandwidth="([^"]*)"(.*?)$|) {
|
||||
$line = "$1TOTOTUTUTATA${2}TOTOTUTUTATA".($3*1000000)."TOTOTUTUTATA${4}";
|
||||
}
|
||||
while ($line =~ /^(.*?)TOTOTUTUTATA(.*?)TOTOTUTUTATA(.*?)TOTOTUTUTATA(.*?)$/) {
|
||||
$line = "$1<network_link${2}bandwidth=\"$3\"$4";
|
||||
}
|
||||
}
|
||||
|
||||
if ($fromversion < 2) {
|
||||
# The renamings (\b=zero-width word boundary check)
|
||||
$line =~ s/\bplatform_description\b/platform/g;
|
||||
$line =~ s/\bname\b/id/g;
|
||||
$line =~ s/\bcpu\b/host/g;
|
||||
$line =~ s/\bnetwork_link\b/link/g;
|
||||
$line =~ s/\broute_element\b/link:ctn/g;
|
||||
}
|
||||
|
||||
if ($fromversion < 3) {
|
||||
$line =~ s/\blink:ctn\b/link_ctn/g;
|
||||
$line =~ s/\btrace:connect\b/trace_connect/g;
|
||||
|
||||
if($AS_opened && (($line=~ /<\/platform>/) || ($line=~ /<process/))) {
|
||||
$output_string .= "</AS>\n";
|
||||
$AS_opened = 0;
|
||||
}
|
||||
|
||||
if( (!$AS_opened) && (
|
||||
($line =~ /<host/) ||
|
||||
($line =~ /<link/) ||
|
||||
($line =~ /<cluster/) ||
|
||||
($line =~ /<router/)
|
||||
)) {
|
||||
$output_string .= " <AS id=\"AS0\" routing=\"Full\">\n";
|
||||
$AS_opened=1;
|
||||
}
|
||||
|
||||
if($line=~/<route /){$line =~ s/\<route/\<route symmetrical=\"NO\"/g;}
|
||||
}
|
||||
if ($fromversion < 4) {
|
||||
$line =~ s/\bpower\b/speed/g;
|
||||
$line =~ s/\bkind="POWER"/kind="SPEED"/g;
|
||||
}
|
||||
if ($fromversion < 4.1) {
|
||||
$line =~ s/\bAS\b/zone/g;
|
||||
$line =~ s/\bASroute\b/zoneRoute/g;
|
||||
$line =~ s/\bbypassAsRoute\b/bypassZoneRoute/g;
|
||||
$line =~ s/\bprocess\b/actor/g;
|
||||
}
|
||||
$line =~ s/\bFULLDUPLEX\b/SPLITDUPLEX/g;
|
||||
$line =~ s/\bavailability_file\b/speed_file/g;
|
||||
|
||||
$output_string .= "$line\n";
|
||||
}
|
||||
|
||||
close INPUT;
|
||||
|
||||
if ($fromversion == -1) {
|
||||
die "Cannot retrieve the platform version of $filename\n";
|
||||
}
|
||||
|
||||
open OUTPUT, "> $filename";
|
||||
print OUTPUT $output_string;
|
||||
close OUTPUT;
|
360
TP2/src/dsmat.c
Normal file
360
TP2/src/dsmat.c
Normal file
|
@ -0,0 +1,360 @@
|
|||
#include <mpi.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <cblas.h>
|
||||
|
||||
#include "simgrid/actor.h"
|
||||
#include <simgrid/exec.h>
|
||||
|
||||
#include "utils.h"
|
||||
#include "dsmat.h"
|
||||
|
||||
/* Tracing purposes */
|
||||
static char* COMPUTE = "Computing";
|
||||
static char* IDLE = "Idling";
|
||||
|
||||
void init_trace() {
|
||||
// TRACE_host_state_declare(COMPUTE);
|
||||
// TRACE_host_state_declare(IDLE);
|
||||
}
|
||||
|
||||
int dsmat_fill(Matrix* a, int m, int n, int b, int p, int q, char* name) {
|
||||
int me, node;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
int mb = m/b, nb = n/b;
|
||||
int ii, jj;
|
||||
int row, col;
|
||||
a->mb = mb;
|
||||
a->nb = nb;
|
||||
a->b = b;
|
||||
//printf("%d] %s : m x n (b) = %d x %d (%d)\n", me, name, mb, nb, b);
|
||||
a->blocks = calloc(mb,sizeof(Block*));
|
||||
for (ii = 0; ii < mb;ii++) {
|
||||
a->blocks[ii] = calloc(nb,sizeof(Block));
|
||||
for (jj = 0; jj < nb;jj++) {
|
||||
node = get_node(p,q,ii,jj);
|
||||
node_coordinates_2i(p,q,node,&row,&col);
|
||||
a->blocks[ii][jj].owner = node;
|
||||
a->blocks[ii][jj].row = row;
|
||||
a->blocks[ii][jj].col = col;
|
||||
a->blocks[ii][jj].request = MPI_REQUEST_NULL;
|
||||
if (me == a->blocks[ii][jj].owner) {
|
||||
//printf("%d]allocating x_%d,%d\n",me,ii,jj);
|
||||
a->blocks[ii][jj].c = calloc(b*b,sizeof(float));
|
||||
rand_mat(b,b,a->blocks[ii][jj].c,10);
|
||||
} else {
|
||||
a->blocks[ii][jj].c = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int dsmat_fill_v(Matrix* a, int m, int n, int b, int p, int q, char* name, float value) {
|
||||
int me, node;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
int mb = m/b, nb = n/b;
|
||||
int ii, jj;
|
||||
int row, col;
|
||||
a->mb = mb;
|
||||
a->nb = nb;
|
||||
a->b = b;
|
||||
a->blocks = calloc(mb,sizeof(Block*));
|
||||
for (ii = 0; ii < mb;ii++) {
|
||||
a->blocks[ii] = calloc(nb,sizeof(Block));
|
||||
for (jj = 0; jj < nb;jj++) {
|
||||
node = get_node(p,q,ii,jj);
|
||||
node_coordinates_2i(p,q,node,&row,&col);
|
||||
a->blocks[ii][jj].owner = node;
|
||||
a->blocks[ii][jj].row = row;
|
||||
a->blocks[ii][jj].col = col;
|
||||
a->blocks[ii][jj].request = MPI_REQUEST_NULL;
|
||||
if (me == a->blocks[ii][jj].owner) {
|
||||
//printf("%d]allocating x_%d,%d to fill with %f\n",me,ii,jj, value);
|
||||
a->blocks[ii][jj].c = calloc(b*b,sizeof(float));
|
||||
val_mat(b,b,a->blocks[ii][jj].c,value);
|
||||
} else {
|
||||
a->blocks[ii][jj].c = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int dsmat_fill_s(Matrix* a, int m, int n, int b, int p, int q, char* name) {
|
||||
int me, node;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
int mb = m/b, nb = n/b;
|
||||
int ii, jj;
|
||||
int row, col;
|
||||
a->mb = mb;
|
||||
a->nb = nb;
|
||||
a->b = b;
|
||||
a->blocks = calloc(mb,sizeof(Block*));
|
||||
for (ii = 0; ii < mb;ii++) {
|
||||
a->blocks[ii] = calloc(nb,sizeof(Block));
|
||||
for (jj = 0; jj < nb;jj++) {
|
||||
node = get_node(p,q,ii,jj);
|
||||
node_coordinates_2i(p,q,node,&row,&col);
|
||||
a->blocks[ii][jj].owner = node;
|
||||
a->blocks[ii][jj].row = row;
|
||||
a->blocks[ii][jj].col = col;
|
||||
a->blocks[ii][jj].request = MPI_REQUEST_NULL;
|
||||
if (me == a->blocks[ii][jj].owner) {
|
||||
//printf("%d] s_allocating %s_%d,%d to fill with %f\n",me,name,ii,jj,(float)nb*(ii+1)+(jj+1));
|
||||
a->blocks[ii][jj].c = calloc(b*b,sizeof(float));
|
||||
val_mat(b,b,a->blocks[ii][jj].c,(float) nb*(ii+1)+(jj+1));
|
||||
} else {
|
||||
a->blocks[ii][jj].c = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int dsmat_destroy(Matrix* a, char* name) {
|
||||
int me;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
int mb = a->mb, nb = a->nb;
|
||||
//printf("[%d] destroying matrix %s (mb=%d,nb=%d,b=%d)\n",me, name, mb, nb, a->b);
|
||||
int ii, jj;
|
||||
Block * a_ij;
|
||||
for (ii = 0; ii < mb ; ii++) {
|
||||
for (jj = 0; jj < nb ; jj++) {
|
||||
a_ij = & a->blocks[ii][jj];
|
||||
//if (a_ij->c != NULL) { // && a_ij.owner == me) {
|
||||
if (a_ij->c != NULL && a_ij->owner == me) {
|
||||
free(a_ij->c);
|
||||
}
|
||||
}
|
||||
free(a->blocks[ii]);
|
||||
}
|
||||
free(a->blocks);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int dsmat_scal_check(Matrix* A, float alpha) {
|
||||
int i,j;
|
||||
int me;
|
||||
if (alpha == 0.0) return 0;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
Block* Aij;
|
||||
for(i = 0; i < A->mb; i++) {
|
||||
for(j = 0; j < A->nb; j++) {
|
||||
Aij = & A->blocks[i][j];
|
||||
if (Aij->owner == me) {
|
||||
double computation_amount = 2.0*A->b*A->b*A->b;
|
||||
cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, A->b, A->b, A->b,
|
||||
0.0, Aij->c, A->b, Aij->c, A->b,
|
||||
alpha, Aij->c, A->b);
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int dsmat_scal(Matrix* A, float alpha) {
|
||||
int i,j;
|
||||
int me;
|
||||
if (alpha == 0.0) return 0;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
Block* Aij;
|
||||
SMPI_SAMPLE_LOCAL(i = 0, i < A->mb, i++, 10, 0.005) {
|
||||
SMPI_SAMPLE_LOCAL(j = 0, j < A->nb, j++, 10, 0.005) {
|
||||
Aij = & A->blocks[i][j];
|
||||
if (Aij->owner == me) {
|
||||
double computation_amount = 2.0*A->b*A->b*A->b;
|
||||
cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, A->b, A->b, A->b,
|
||||
0.0, Aij->c, A->b, Aij->c, A->b,
|
||||
alpha, Aij->c, A->b);
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// FIXME : remove alpha/beta
|
||||
int local_outer_product_check(float alpha, Matrix* A, Matrix* B, Matrix* C, int l, int p, int q) {
|
||||
int i, j, err;
|
||||
for(i = 0; i < C->mb; i++) {
|
||||
for(j = 0; j < C->nb; j++) {
|
||||
err = compute_local_op(alpha, A, B, C, i, j, l);
|
||||
if (err != 0) return 1;
|
||||
}
|
||||
}
|
||||
/* free useless memory */
|
||||
free_local_op(A, B, l, p, q);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int local_outer_product(float alpha, Matrix* A, Matrix* B, Matrix* C, int l, int p, int q) {
|
||||
int i, j, err;
|
||||
SMPI_SAMPLE_LOCAL(i = 0, i < C->mb, i++, 10, 0.005) {
|
||||
SMPI_SAMPLE_LOCAL(j = 0, j < C->nb, j++, 10, 0.005) {
|
||||
err = compute_local_op(alpha, A, B, C, i, j, l);
|
||||
if (err != 0) return 1;
|
||||
}
|
||||
}
|
||||
/* free useless memory */
|
||||
free_local_op(A, B, l, p, q);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int compute_local_op(float alpha, Matrix* A, Matrix* B, Matrix* C, int i, int j, int l) {
|
||||
int me;
|
||||
int b;
|
||||
Block *Ail, *Blj, *Cij;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
Cij = & C->blocks[i][j];
|
||||
b = C->b;
|
||||
if (Cij->owner == me) {
|
||||
Ail = & A->blocks[i][l];
|
||||
if (Ail->c == NULL) { return 1; }
|
||||
Blj = & B->blocks[l][j];
|
||||
if (Blj->c == NULL) { return 2; }
|
||||
// TRACE_host_set_state(COMPUTE);
|
||||
cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, b,b,b,
|
||||
alpha, Ail->c, b, Blj->c, b,
|
||||
1.0, Cij->c, b);
|
||||
// TRACE_host_set_state(IDLE);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int free_local_op(Matrix* A, Matrix* B, int l, int p, int q) {
|
||||
int i,j;
|
||||
int me, me_coord[2];
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
node_coordinates(p,q,me,me_coord);
|
||||
Block *Ail, *Blj;
|
||||
for (i = 0; i < A->mb; i++) {
|
||||
Ail = & A->blocks[i][l];
|
||||
if (Ail->owner != me && Ail->c != NULL) {
|
||||
free(Ail->c);
|
||||
Ail->c = NULL;
|
||||
}
|
||||
}
|
||||
for (j = 0; j < B->nb; j++) {
|
||||
Blj = & B->blocks[l][j];
|
||||
if (Blj->owner != me && Blj->c != NULL) {
|
||||
free(Blj->c);
|
||||
Blj->c = NULL;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int block_copy(float * a, float * b, int m, int n) {
|
||||
int i, j;
|
||||
for (i = 0; i < m ; i++) {
|
||||
for (j = 0; j < n ; j++) {
|
||||
a[n*i+j] = b[n*i+j];
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int block_print(float * a, int m, int n, char* name) {
|
||||
int i, j;
|
||||
printf("block %s\n", name);
|
||||
for (i = 0; i < m ; i++) {
|
||||
for (j = 0; j < n ; j++) {
|
||||
printf("%9.2f\t", a[n*i+j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
// A <- B
|
||||
int dsmat_copy(Matrix * A, Matrix * B) {
|
||||
int i, j;
|
||||
int me;
|
||||
int mb, nb, b;
|
||||
Block *Aij, *Bij;
|
||||
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
|
||||
A->mb = B->mb;
|
||||
A->nb = B->nb;
|
||||
A->b = B->b;
|
||||
|
||||
mb = A->mb;
|
||||
nb = A->nb;
|
||||
b = A->b;
|
||||
|
||||
A->blocks = calloc(mb, sizeof(Block*));
|
||||
for (i = 0; i<mb;i++){
|
||||
A->blocks[i] = calloc(nb, sizeof(Block));
|
||||
for (j = 0; j<nb;j++){
|
||||
Aij = & A->blocks[i][j];
|
||||
Bij = & B->blocks[i][j];
|
||||
Aij->owner = Bij->owner;
|
||||
Aij->row = Bij->row;
|
||||
Aij->col = Bij->col;
|
||||
Aij->request = MPI_REQUEST_NULL;
|
||||
if (Bij->owner == me) {
|
||||
Aij->c = calloc(b*b,sizeof(float));
|
||||
block_copy(Aij->c, Bij->c, b, b);
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int dsmat_copy_to(Matrix * A, Matrix * B, int rcv, char* copy, char* copied) {
|
||||
int i, j, l;
|
||||
int me,tag;
|
||||
int mb, nb, b;
|
||||
Block *Aij, *Bij;
|
||||
float* localA;
|
||||
MPI_Status status;
|
||||
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
A->nb = 1;
|
||||
A->mb = 1;
|
||||
A->b = -1;
|
||||
|
||||
mb = B->mb;
|
||||
nb = B->nb;
|
||||
b = B->b;
|
||||
|
||||
tag = 0;
|
||||
A->blocks = malloc(sizeof(Block*));
|
||||
A->blocks[0] = malloc(sizeof(Block));
|
||||
Aij = & A->blocks[0][0];
|
||||
Aij->owner = rcv;
|
||||
Aij->row = -1;
|
||||
Aij->col = -1; // not on a grid ...
|
||||
Aij->request = MPI_REQUEST_NULL;
|
||||
if (me == rcv) {
|
||||
Aij->c = malloc(mb*b*nb*b *sizeof(float));
|
||||
}
|
||||
for (i = 0; i<mb;i++){
|
||||
for (j = 0; j<nb;j++){
|
||||
Bij = & B->blocks[i][j];
|
||||
if (Bij->owner == me) {
|
||||
if (rcv != me) {
|
||||
MPI_Send(Bij->c, b*b, MPI_FLOAT,
|
||||
rcv, tag,
|
||||
MPI_COMM_WORLD);
|
||||
} else {
|
||||
for (l = 0; l<b; l++) {
|
||||
block_copy(&Aij->c[nb*i*b*b+j*b+l*nb*b], Bij->c, 1, b);
|
||||
}
|
||||
}
|
||||
} else if (me == rcv) {
|
||||
localA = malloc(b*b*sizeof(float));
|
||||
MPI_Recv(localA, b*b, MPI_FLOAT,
|
||||
Bij->owner, tag,
|
||||
MPI_COMM_WORLD,&status);
|
||||
for (l = 0; l<b; l++) {
|
||||
block_copy(&Aij->c[nb*i*b*b+j*b+l*nb*b], localA, 1, b);
|
||||
}
|
||||
free(localA);
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
62
TP2/src/dsmat.h
Normal file
62
TP2/src/dsmat.h
Normal file
|
@ -0,0 +1,62 @@
|
|||
#ifndef DENSE_MAT_FNCT_H
|
||||
#define DENSE_MAT_FNCT_H
|
||||
|
||||
typedef struct Blocks {
|
||||
float* c; // The Content of the block stored in an array.
|
||||
// This pointer is only meaningful to the owner
|
||||
// otherwise it is NULL.
|
||||
// Element x_i,j of a given block of size b
|
||||
// can be accessed as x->c[b*i+j].
|
||||
int owner; // The MPI rank of the owner of this block.
|
||||
// This information is available to all the nodes.
|
||||
int row, col; // owner = row * q + col in a p x q grid.
|
||||
MPI_Request request; // The Request can be used when sending the block
|
||||
// through Immediate Return routines of MPI such as MPI_Irecv
|
||||
} Block;
|
||||
|
||||
typedef struct Matrices {
|
||||
int mb, nb, b; // A given Matrix is of size mb*b x nb*b, b being the
|
||||
// dimension of every of its square blocks i.e.
|
||||
// nb is the number of column blocks, mb the one of row blocks -- oof
|
||||
Block** blocks; // This 2D array describes each block of a given Matrix.
|
||||
// This is meaningful to all the nodes : information on a block A_i,j
|
||||
// from a matrix A can be accessed through the block A->blocks[i][j] from every MPI rank.
|
||||
} Matrix;
|
||||
|
||||
// tracing
|
||||
void init_trace();
|
||||
|
||||
/* dense matrices routines */
|
||||
// fill matrix a with values matching the position of the block in the matrix
|
||||
// i.e. block a_i,j is full of n*(i+1)+(j+1) with a of size m x n
|
||||
int dsmat_fill_s(Matrix* a, int m, int n, int b, int p, int q, char* name);
|
||||
// destroy matrix a
|
||||
int dsmat_destroy(Matrix* a, char* name);
|
||||
// scale matrix a by alpha
|
||||
int dsmat_scal_check(Matrix* a, float alpha);
|
||||
int dsmat_scal(Matrix* a, float alpha);
|
||||
|
||||
int dsmat_fill_v(Matrix* a, int m, int n, int b, int p, int q, char* name, float value);
|
||||
|
||||
/* dense matrices copy */
|
||||
// copy a[0:m-1,0:n-1] into b[0:m-1,0:n-1]
|
||||
int block_copy(float * a, float * b, int m, int n);
|
||||
// print a[0:m-1,0:n-1]
|
||||
int block_print(float * a, int m, int n, char* name);
|
||||
// copy matrix B into matrix A
|
||||
int dsmat_copy(Matrix * A, Matrix * B);
|
||||
// copy matrix B into matrix A owned only by rank rcv
|
||||
int dsmat_copy_to(Matrix * A, Matrix * B, int rcv, char* copy, char* copied);
|
||||
|
||||
/* gemm generic routines */
|
||||
// computing C += A:l * Bl: for all blocks of C I own using compute_local_op
|
||||
// matrices A and B that I do not own are freed from memory using free_local_op
|
||||
int local_outer_product_check(float alpha, Matrix* A, Matrix* B, Matrix* C, int l, int p, int q);
|
||||
int local_outer_product(float alpha, Matrix* A, Matrix* B, Matrix* C, int l, int p, int q);
|
||||
// compute C_i,j += A_i,l * B_l,j
|
||||
// if a given block is missing, the corresponding computation is skipped
|
||||
int compute_local_op(float alpha, Matrix* A, Matrix* B, Matrix* C, int i, int j, int l);
|
||||
// free A:l and Bl: from memory is I do not own them
|
||||
int free_local_op(Matrix* A, Matrix* B, int l, int p, int q);
|
||||
|
||||
#endif
|
88
TP2/src/ex1.c
Normal file
88
TP2/src/ex1.c
Normal file
|
@ -0,0 +1,88 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <mpi.h>
|
||||
#include <cblas.h>
|
||||
#include "utils.h"
|
||||
#include "dsmat.h"
|
||||
#include "gemms.h"
|
||||
|
||||
void p2p_transmit_A(int p, int q, Matrix *A, int i, int l)
|
||||
{
|
||||
int j;
|
||||
int me, my_row, my_col;
|
||||
MPI_Status status;
|
||||
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
node_coordinates_2i(p, q, me, &my_row, &my_col);
|
||||
|
||||
Block *Ail;
|
||||
int node, tag, b;
|
||||
tag = 0;
|
||||
Ail = &A->blocks[i][l];
|
||||
b = A->b;
|
||||
|
||||
/* TODO : transmit A[i,l] using MPI_Ssend & MPI_Recv */
|
||||
if (Ail->owner == me)
|
||||
{ // I own A[i,l]
|
||||
/* MPI_Ssend A[i,l] to my row */
|
||||
for (j = 0; j < q; j++)
|
||||
{
|
||||
node = get_node(p, q, my_row, j);
|
||||
if (node != me)
|
||||
{
|
||||
// printf("%d Sending A[%d,%d] to node %d\n", my_rank, i, l, node);
|
||||
MPI_Ssend(Ail->c, b * b, MPI_FLOAT, node, tag, MPI_COMM_WORLD);
|
||||
// printf("%d Sent A[%d,%d] to node %d\n", my_rank, i, l, node);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (Ail->row == my_row)
|
||||
{ // A[i,l] is stored on my row
|
||||
Ail->c = malloc(b * b * sizeof(float));
|
||||
/* MPI_Recv A[i,l] */
|
||||
// printf("%d Receiving A[%d,%d] from node %d\n", my_rank, i, l, node);
|
||||
MPI_Recv(Ail->c, b * b, MPI_FLOAT, Ail->owner, tag, MPI_COMM_WORLD, &status);
|
||||
// printf("%d Received A[%d,%d] from node %d\n", my_rank, i, l, node);
|
||||
}
|
||||
/* end TODO */
|
||||
}
|
||||
|
||||
void p2p_transmit_B(int p, int q, Matrix *B, int l, int j)
|
||||
{
|
||||
int i;
|
||||
int me, my_row, my_col;
|
||||
MPI_Status status;
|
||||
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
node_coordinates_2i(p, q, me, &my_row, &my_col);
|
||||
|
||||
int node, tag, b;
|
||||
tag = 1;
|
||||
Block *Blj;
|
||||
Blj = &B->blocks[l][j];
|
||||
b = B->b;
|
||||
/* TODO : transmit B[l,j] using MPI_Ssend & MPI_Recv */
|
||||
if (Blj->owner == me)
|
||||
{ // I owned B[l,j]
|
||||
/* MPI_Ssend B[l,j] to my column */
|
||||
for (i = 0; i < p; i++)
|
||||
{
|
||||
node = get_node(p, q, i, my_col);
|
||||
if (node != me)
|
||||
{
|
||||
// printf("%d Sending B[%d,%d] to node %d\n", me, l, j, node);
|
||||
MPI_Ssend(Blj->c, b * b, MPI_FLOAT, node, tag, MPI_COMM_WORLD);
|
||||
// printf("%d Sent B[%d,%d] to node %d\n", me, l, j, node);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (Blj->col == my_col)
|
||||
{ // B[l,j] is stored on my column
|
||||
Blj->c = malloc(b * b * sizeof(float));
|
||||
/* MPI_Recv B[l,j] */
|
||||
// printf("%d Receiving B[%d,%d] from node %d\n", me, l, j, node);
|
||||
MPI_Recv(Blj->c, b * b, MPI_FLOAT, Blj->owner, tag, MPI_COMM_WORLD, &status);
|
||||
// printf("%d Received B[%d,%d] from node %d\n", me, l, j, node);
|
||||
}
|
||||
/* end TODO */
|
||||
}
|
63
TP2/src/ex1.c.clem
Normal file
63
TP2/src/ex1.c.clem
Normal file
|
@ -0,0 +1,63 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <mpi.h>
|
||||
#include <cblas.h>
|
||||
#include "utils.h"
|
||||
#include "dsmat.h"
|
||||
#include "gemms.h"
|
||||
|
||||
void p2p_transmit_A(int p, int q, Matrix *A, int i, int l) {
|
||||
int j;
|
||||
int me, my_row, my_col;
|
||||
MPI_Status status;
|
||||
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
node_coordinates_2i(p,q,me,&my_row,&my_col);
|
||||
|
||||
Block *Ail;
|
||||
int node, tag, b;
|
||||
Ail = & A->blocks[i][l];
|
||||
b = A->b;
|
||||
/* TODO : transmit A[i,l] using MPI_Ssend & MPI_Recv */
|
||||
if (Ail->owner == me /* I own A[i,l]*/) {
|
||||
/* MPI_Ssend A[i,l] to my row */
|
||||
for (j = 0; j < q; j++) {
|
||||
node = get_node(p, q, my_row, j);
|
||||
if (node != me)
|
||||
MPI_Ssend(Ail->c, b*b, MPI_FLOAT, node, 0, MPI_COMM_WORLD);
|
||||
}
|
||||
} else if (Ail->row == my_row /* A[i,l] is stored on my row */) {
|
||||
Ail->c = malloc(b*b*sizeof(float));
|
||||
/* MPI_Recv A[i,l] */
|
||||
MPI_Recv(Ail->c, b*b, MPI_FLOAT, Ail->owner, 0, MPI_COMM_WORLD, &status);
|
||||
}
|
||||
/* end TODO */
|
||||
}
|
||||
|
||||
void p2p_transmit_B(int p, int q, Matrix *B, int l, int j) {
|
||||
int i;
|
||||
int me, my_row, my_col;
|
||||
MPI_Status status;
|
||||
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
node_coordinates_2i(p,q,me,&my_row,&my_col);
|
||||
|
||||
int node, tag, b;
|
||||
Block *Blj;
|
||||
Blj = & B->blocks[l][j];
|
||||
b = B->b;
|
||||
/* TODO : transmit B[l,j] using MPI_Ssend & MPI_Recv */
|
||||
if (Blj->owner == me /* I owned B[l,j]*/) {
|
||||
/* MPI_Ssend B[l,j] to my column */
|
||||
for (i = 0; i < p; i++) {
|
||||
node = get_node(p, q, i, my_col);
|
||||
if (node != me)
|
||||
MPI_Ssend(Blj->c, b*b, MPI_FLOAT, node, 1, MPI_COMM_WORLD);
|
||||
}
|
||||
} else if (Blj->col == my_col /* B[l,j] is stored on my column */) {
|
||||
Blj->c = malloc(b*b*sizeof(float));
|
||||
/* MPI_Recv B[l,j] */
|
||||
MPI_Recv(Blj->c, b*b, MPI_FLOAT, Blj->owner, 1, MPI_COMM_WORLD, &status);
|
||||
}
|
||||
/* end TODO */
|
||||
}
|
5
TP2/src/ex1.h
Normal file
5
TP2/src/ex1.h
Normal file
|
@ -0,0 +1,5 @@
|
|||
#ifndef EXO_1_H
|
||||
#define EXO_1_H
|
||||
void p2p_transmit_A(int p, int q, Matrix *A, int i, int l);
|
||||
void p2p_transmit_B(int p, int q, Matrix *B, int l, int j);
|
||||
#endif
|
53
TP2/src/ex2.c
Normal file
53
TP2/src/ex2.c
Normal file
|
@ -0,0 +1,53 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <mpi.h>
|
||||
#include <cblas.h>
|
||||
#include "utils.h"
|
||||
#include "dsmat.h"
|
||||
#include "gemms.h"
|
||||
|
||||
void bcast_A(int p, int q, Matrix *A, int i, int l, MPI_Comm row_comm)
|
||||
{
|
||||
int me, my_row, my_col;
|
||||
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
node_coordinates_2i(p, q, me, &my_row, &my_col);
|
||||
|
||||
Block *Ail;
|
||||
int b = A->b;
|
||||
Ail = &A->blocks[i][l];
|
||||
/* TODO : transmit A[i,l] using MPI_Bcast */
|
||||
if (q > 1 && Ail->row == my_row)
|
||||
{ /* Ail is stored on my row */
|
||||
if (Ail->owner != me)
|
||||
{
|
||||
Ail->c = calloc(b * b, sizeof(float));
|
||||
}
|
||||
// MPI_Bcast
|
||||
MPI_Bcast(Ail->c, b * b, MPI_FLOAT, Ail->col, row_comm);
|
||||
}
|
||||
/* end TODO */
|
||||
}
|
||||
|
||||
void bcast_B(int p, int q, Matrix *B, int l, int j, MPI_Comm col_comm)
|
||||
{
|
||||
int me, my_row, my_col;
|
||||
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
node_coordinates_2i(p, q, me, &my_row, &my_col);
|
||||
|
||||
Block *Blj;
|
||||
int b = B->b;
|
||||
Blj = &B->blocks[l][j];
|
||||
/* TODO : transmit B[l,j] using MPI_Bcast */
|
||||
if (p > 1 && Blj->col == my_col)
|
||||
{ /* Blj is stored on my column */
|
||||
if (Blj->owner != me)
|
||||
{
|
||||
Blj->c = calloc(b * b, sizeof(float));
|
||||
}
|
||||
// MPI_Bcast
|
||||
MPI_Bcast(Blj->c, b * b, MPI_FLOAT, Blj->row, col_comm);
|
||||
}
|
||||
/* end TODO */
|
||||
}
|
5
TP2/src/ex2.h
Normal file
5
TP2/src/ex2.h
Normal file
|
@ -0,0 +1,5 @@
|
|||
#ifndef EXO_2_H
|
||||
#define EXO_2_H
|
||||
void bcast_A(int p, int q, Matrix *A, int i, int l, MPI_Comm row_comm);
|
||||
void bcast_B(int p, int q, Matrix *B, int l, int j, MPI_Comm col_comm);
|
||||
#endif
|
108
TP2/src/ex3.c
Normal file
108
TP2/src/ex3.c
Normal file
|
@ -0,0 +1,108 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <mpi.h>
|
||||
#include <cblas.h>
|
||||
#include "utils.h"
|
||||
#include "dsmat.h"
|
||||
#include "gemms.h"
|
||||
|
||||
void p2p_i_transmit_A(int p, int q, Matrix *A, int i, int l)
|
||||
{
|
||||
int j, b;
|
||||
int me, my_row, my_col;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
node_coordinates_2i(p, q, me, &my_row, &my_col);
|
||||
|
||||
int node, tag;
|
||||
tag = 0;
|
||||
Block *Ail;
|
||||
Ail = &A->blocks[i][l];
|
||||
b = A->b;
|
||||
|
||||
/* TODO : transmit A[i,l] using MPI_Isend/recv */
|
||||
if (Ail->owner == me)
|
||||
{
|
||||
// MPI_Isend Ail to my row
|
||||
for (j = 0; j < q; j++)
|
||||
{
|
||||
node = get_node(p, q, my_row, j);
|
||||
if (node != me)
|
||||
{
|
||||
MPI_Isend(Ail->c, b * b, MPI_FLOAT, node, tag, MPI_COMM_WORLD, &Ail->request);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (Ail->row == my_row)
|
||||
{
|
||||
Ail->c = calloc(b * b, sizeof(float));
|
||||
// MPI_Irecv Ail
|
||||
MPI_Irecv(Ail->c, b * b, MPI_FLOAT, Ail->owner, tag, MPI_COMM_WORLD, &Ail->request);
|
||||
}
|
||||
/* end TODO */
|
||||
}
|
||||
|
||||
void p2p_i_transmit_B(int p, int q, Matrix *B, int l, int j)
|
||||
{
|
||||
int i, b;
|
||||
int me, my_row, my_col;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
node_coordinates_2i(p, q, me, &my_row, &my_col);
|
||||
|
||||
int node, tag;
|
||||
tag = 1;
|
||||
Block *Blj;
|
||||
Blj = &B->blocks[l][j];
|
||||
b = B->b;
|
||||
|
||||
/* TODO : transmit B[l,j] using MPI_Isend/recv */
|
||||
if (Blj->owner == me)
|
||||
{
|
||||
// MPI_Isend Blj to my col
|
||||
for (i = 0; i < p; i++)
|
||||
{
|
||||
node = get_node(p, q, i, my_col);
|
||||
if (node != me)
|
||||
{
|
||||
MPI_Isend(Blj->c, b * b, MPI_FLOAT, node, tag, MPI_COMM_WORLD, &Blj->request);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (Blj->col == my_col)
|
||||
{
|
||||
Blj->c = calloc(b * b, sizeof(float));
|
||||
// MPI_Irecv Blj
|
||||
MPI_Irecv(Blj->c, b * b, MPI_FLOAT, Blj->owner, tag, MPI_COMM_WORLD, &Blj->request);
|
||||
}
|
||||
/* end TODO */
|
||||
}
|
||||
|
||||
void p2p_i_wait_AB(int p, int q, Matrix *A, Matrix *B, Matrix *C, int l)
|
||||
{
|
||||
int me, my_row, my_col;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
node_coordinates_2i(p, q, me, &my_row, &my_col);
|
||||
|
||||
int i, j;
|
||||
Block *Ail, *Blj;
|
||||
/* TODO : wait for A[i,l] and B[l,j] if I need them */
|
||||
for (i = 0; i < A->mb; i++)
|
||||
{
|
||||
Ail = &A->blocks[i][l];
|
||||
if (Ail->owner != me && Ail->row == my_row)
|
||||
{
|
||||
// MPI_Wait Ail
|
||||
MPI_Wait(&Ail->request, MPI_STATUS_IGNORE);
|
||||
}
|
||||
}
|
||||
for (j = 0; j < B->nb; j++)
|
||||
{
|
||||
Blj = &B->blocks[l][j];
|
||||
if (Blj->owner != me && Blj->col == my_col)
|
||||
{
|
||||
// MPI_Wait Blj
|
||||
MPI_Wait(&Blj->request, MPI_STATUS_IGNORE);
|
||||
}
|
||||
}
|
||||
/* Alternative suggestion : iterate over blocks of C */
|
||||
/* end TODO */
|
||||
}
|
6
TP2/src/ex3.h
Normal file
6
TP2/src/ex3.h
Normal file
|
@ -0,0 +1,6 @@
|
|||
#ifndef EXO_3_H
|
||||
#define EXO_3_H
|
||||
void p2p_i_transmit_A(int p, int q, Matrix *A, int i, int l);
|
||||
void p2p_i_transmit_B(int p, int q, Matrix *B, int l, int j);
|
||||
void p2p_i_wait_AB(int p, int q, Matrix *A, Matrix* B, Matrix* C,int l);
|
||||
#endif
|
157
TP2/src/gemms.c
Normal file
157
TP2/src/gemms.c
Normal file
|
@ -0,0 +1,157 @@
|
|||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <mpi.h>
|
||||
#include <cblas.h>
|
||||
#include "utils.h"
|
||||
#include "dsmat.h"
|
||||
#include "gemms.h"
|
||||
|
||||
#include "ex1.h"
|
||||
#include "ex2.h"
|
||||
#include "ex3.h"
|
||||
|
||||
int pgemm_p2p(int check, int p, int q, int m, int n, int k, Matrix* A, Matrix* B, Matrix* C) {
|
||||
int mb, nb, kb;
|
||||
int i, j, l;
|
||||
int me, me_coord[2], my_row, my_col;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
node_coordinates(p,q,me,me_coord);
|
||||
node_coordinates_2i(p,q,me,&my_row,&my_col);
|
||||
|
||||
if (A->nb != B->mb || A->mb != C-> mb || B->nb != C->nb) {
|
||||
if (me == 0) {
|
||||
printf(" A B C\n");
|
||||
printf(" mb %d %d %d\n", A->mb, B->mb, C->mb);
|
||||
printf(" nb %d %d %d\n", A->nb, B->nb, C->nb);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
if (B->b != A->b || A->b != C-> b) return 2;
|
||||
mb = C->mb;
|
||||
nb = C->nb;
|
||||
kb = A->nb;
|
||||
|
||||
for (l = 0; l < kb; l++) {
|
||||
for (i = 0; i < mb; i++) {
|
||||
p2p_transmit_A(p,q,A,i,l);
|
||||
}
|
||||
for (j = 0; j < nb; j++) {
|
||||
p2p_transmit_B(p,q,B,l,j);
|
||||
}
|
||||
if (check) {
|
||||
local_outer_product_check(1.0f, A, B, C, l, p, q);
|
||||
} else {
|
||||
local_outer_product(1.0f, A, B, C, l, p, q);
|
||||
}
|
||||
}
|
||||
// printf("FINI\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int pgemm_bcast(int check, int p, int q, int m, int n, int k, Matrix* A, Matrix* B, Matrix* C) {
|
||||
int mb, nb, kb;
|
||||
int i, j, l;
|
||||
int me, me_row_comm, me_col_comm, me_coord[2];
|
||||
int my_row, my_col;
|
||||
MPI_Comm row_comm, col_comm;
|
||||
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
|
||||
if (A->nb != B->mb || A->mb != C-> mb || B->nb != C->nb) {
|
||||
if (me == 0) {
|
||||
printf(" A B C\n");
|
||||
printf(" mb %d %d %d\n", A->mb, B->mb, C->mb);
|
||||
printf(" nb %d %d %d\n", A->nb, B->nb, C->nb);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
if (B->b != A->b || A->b != C-> b) return 2;
|
||||
mb = C->mb;
|
||||
nb = C->nb;
|
||||
kb = A->nb;
|
||||
|
||||
node_coordinates(p,q,me,me_coord);
|
||||
node_coordinates_2i(p,q,me,&my_row, &my_col);
|
||||
if (q > 1) {
|
||||
MPI_Comm_split(MPI_COMM_WORLD, my_row, me, &row_comm);
|
||||
MPI_Comm_rank(row_comm, &me_row_comm);
|
||||
} else {
|
||||
me_row_comm = -1;
|
||||
}
|
||||
if (p > 1) {
|
||||
MPI_Comm_split(MPI_COMM_WORLD, my_col, me, &col_comm);
|
||||
MPI_Comm_rank(col_comm, &me_col_comm);
|
||||
} else {
|
||||
me_col_comm = -1;
|
||||
}
|
||||
|
||||
for (l = 0; l < kb ; l++) {
|
||||
for (i = 0; i < mb; i++) {
|
||||
bcast_A(p,q,A,i,l,row_comm);
|
||||
}
|
||||
for (j = 0; j < nb; j++) {
|
||||
bcast_B(p,q,B,l,j,col_comm);
|
||||
}
|
||||
if (check) {
|
||||
local_outer_product_check(1.0f, A, B, C, l, p, q);
|
||||
} else {
|
||||
local_outer_product(1.0f, A, B, C, l, p, q);
|
||||
}
|
||||
}
|
||||
if (q > 1)
|
||||
MPI_Comm_free(&row_comm);
|
||||
if (p > 1)
|
||||
MPI_Comm_free(&col_comm);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int pgemm_p2p_i_la(int check, int p, int q, int lookahead, int m, int n, int k, Matrix* A, Matrix* B, Matrix* C) {
|
||||
int mb, nb, kb;
|
||||
int i, j, l;
|
||||
int me, me_coord[2],my_row, my_col;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &me);
|
||||
node_coordinates(p,q,me,me_coord);
|
||||
node_coordinates_2i(p,q,me,&my_row,&my_col);
|
||||
|
||||
if (A->nb != B->mb || A->mb != C-> mb || B->nb != C->nb) {
|
||||
if (me == 0) {
|
||||
printf(" A B C\n");
|
||||
printf(" mb %d %d %d\n", A->mb, B->mb, C->mb);
|
||||
printf(" nb %d %d %d\n", A->nb, B->nb, C->nb);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
if (B->b != A->b || A->b != C-> b) return 2;
|
||||
mb = C->mb;
|
||||
nb = C->nb;
|
||||
kb = A->nb;
|
||||
if (lookahead <= 0) return 3;
|
||||
if (lookahead >= kb) lookahead = kb;
|
||||
//printf("LA = %d, KB = %d\n",lookahead, kb);
|
||||
for (l = 0; l < lookahead ; l++) {
|
||||
for (i = 0; i < mb; i++) {
|
||||
p2p_i_transmit_A(p,q,A,i,l);
|
||||
}
|
||||
for (j = 0; j < nb; j++) {
|
||||
p2p_i_transmit_B(p,q,B,l,j);
|
||||
}
|
||||
}
|
||||
for (l = 0; l < kb ; l++) {
|
||||
if (l < kb - lookahead) { // "kb-th" lookahead : kb = l + lookahead
|
||||
for (i= 0; i < mb; i++) {
|
||||
p2p_i_transmit_A(p,q,A,i,l+lookahead);
|
||||
}
|
||||
for (j= 0; j < nb; j++) {
|
||||
p2p_i_transmit_B(p,q,B,l+lookahead,j);
|
||||
}
|
||||
}
|
||||
p2p_i_wait_AB(p,q,A,B,C,l);
|
||||
if (check) {
|
||||
local_outer_product_check(1.0f, A, B, C, l, p, q);
|
||||
} else {
|
||||
local_outer_product(1.0f, A, B, C, l, p, q);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
9
TP2/src/gemms.h
Normal file
9
TP2/src/gemms.h
Normal file
|
@ -0,0 +1,9 @@
|
|||
#ifndef PROGPARALLEL_GEMMS_H
|
||||
#define PROGPARALLEL_GEMMS_H
|
||||
|
||||
int pgemm_p2p(int check, int p, int q, int m, int n, int k, Matrix* A, Matrix* B, Matrix* C);
|
||||
int pgemm_bcast(int check, int p, int q, int m, int n, int k, Matrix* A, Matrix* B, Matrix* C);
|
||||
//int pgemm_p2p_i(int p, int q, int m, int n, int k, Matrix* A, Matrix* B, Matrix* C);
|
||||
int pgemm_p2p_i_la(int check, int p, int q, int lookahead, int m, int n, int k, Matrix* A, Matrix* B, Matrix* C);
|
||||
|
||||
#endif
|
270
TP2/src/main.c
Normal file
270
TP2/src/main.c
Normal file
|
@ -0,0 +1,270 @@
|
|||
#include <mpi.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
#include <float.h>
|
||||
#include <cblas.h>
|
||||
#include <time.h>
|
||||
#include <argp.h>
|
||||
|
||||
#include "utils.h"
|
||||
#include "dsmat.h"
|
||||
#include "gemms.h"
|
||||
|
||||
static char doc[] =
|
||||
"TP Prog Parallèle -- Ligne de commande";
|
||||
|
||||
|
||||
static char args_doc[] = "-m [m] -n [n] -k [k] -b [b] -p [p] -q [q] --algorithm [p2p|p2p-i-la|bcast] --lookahead [la] --niter [i]";
|
||||
|
||||
static struct argp_option options[] = {
|
||||
{"m", 'm', "int", 0, "Number of rows in A and C (deprecated)" },
|
||||
{"n", 'n', "int", 0, "Dimension of A B and C" },
|
||||
{"k", 'k', "int", 0, "Shared dimension of A and B (deprecated)" },
|
||||
{"blocking", 'b', "int", 0, "Size of the square block of A, B and C (must divide m,n and k" },
|
||||
{"p", 'p', "int", 0, "Length of the logical grid"},
|
||||
{"q", 'q', "int", 0, "Width of the logical grid"},
|
||||
{"algorithm",'a', "string", 0, "GEMM distributed algorithm to use"},
|
||||
{"lookahead",'l', "int", 0, "Parameter for p2p-i-la algorithm"},
|
||||
{"verbose", 'v', 0, 0, "If the program print more"},
|
||||
{"checking", 'c', 0, 0, "If the program checks gemm results"},
|
||||
{"niter", 'i', "int", 0, "Number of iterations"},
|
||||
{ 0 }
|
||||
};
|
||||
|
||||
struct arguments
|
||||
{
|
||||
int m, n, k, b;
|
||||
int p, q;
|
||||
int la;
|
||||
char* algo;
|
||||
int verbose, check;
|
||||
int iter;
|
||||
};
|
||||
|
||||
static error_t
|
||||
parse_opt (int key, char *arg, struct argp_state *state)
|
||||
{
|
||||
/* Get the input argument from argp_parse, which we
|
||||
know is a pointer to our arguments structure. */
|
||||
struct arguments *arguments = state->input;
|
||||
|
||||
switch (key)
|
||||
{
|
||||
case 'm':
|
||||
arguments->m = atoi(arg);
|
||||
break;
|
||||
case 'n':
|
||||
arguments->n = atoi(arg);
|
||||
break;
|
||||
case 'k':
|
||||
arguments->k = atoi(arg);
|
||||
break;
|
||||
case 'b':
|
||||
arguments->b = atoi(arg);
|
||||
break;
|
||||
case 'p':
|
||||
arguments->p = atoi(arg);
|
||||
break;
|
||||
case 'q':
|
||||
arguments->q = atoi(arg);
|
||||
break;
|
||||
case 'l':
|
||||
arguments->la = atoi(arg);
|
||||
break;
|
||||
case 'a':
|
||||
arguments->algo = arg;
|
||||
break;
|
||||
case 'v':
|
||||
arguments->verbose = 1;
|
||||
break;
|
||||
case 'c':
|
||||
arguments->check = 1;
|
||||
break;
|
||||
case 'i':
|
||||
arguments->iter = atoi(arg);
|
||||
break;
|
||||
default:
|
||||
return ARGP_ERR_UNKNOWN;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct argp argp = { options, parse_opt, args_doc, doc };
|
||||
|
||||
// void print_res(Matrix C, char* algo) {
|
||||
// int i,j;
|
||||
// int size, rank;
|
||||
// MPI_Comm_size(MPI_COMM_WORLD, &size);
|
||||
// MPI_Comm_rank(MPI_COMM_WORLD, &rank);
|
||||
// char name[100];
|
||||
// for (i=0;i<C.mb;i++) {
|
||||
// for (j=0;j<C.nb;j++) {
|
||||
// sprintf(name,"resC[%d,%d](%s)",i,j,algo);
|
||||
// if (C.blocks[i][j].owner == rank)
|
||||
// block_print(C.blocks[i][j].c, C.b, C.b, name);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
void gflops_gemm(int m, int n, int k, float exec_time, double* gflops) {
|
||||
(*gflops) = 2.0*m*n*k/(exec_time*pow(10,9));
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
struct arguments arguments;
|
||||
arguments.m = 20;
|
||||
arguments.n = 20;
|
||||
arguments.k = 20;
|
||||
arguments.b = 10;
|
||||
arguments.p = 2;
|
||||
arguments.q = 2;
|
||||
arguments.algo = "p2p";
|
||||
arguments.la = 0;
|
||||
arguments.verbose = 0;
|
||||
arguments.check = 0;
|
||||
arguments.iter = 1;
|
||||
|
||||
int p, q;
|
||||
int m,n,k,b;
|
||||
int la;
|
||||
int err, iter, niter;
|
||||
double d_start, d_stop; // on multiple nodes
|
||||
clock_t t; // on one node
|
||||
double time_taken, gflops;
|
||||
char hostname[1024];
|
||||
char * algo;
|
||||
int vbose, check;
|
||||
|
||||
argp_parse (&argp, argc, argv, 0, 0, &arguments);
|
||||
m = arguments.m;
|
||||
n = arguments.n;
|
||||
k = arguments.k;
|
||||
b = arguments.b;
|
||||
p = arguments.p;
|
||||
q = arguments.q;
|
||||
algo = arguments.algo;
|
||||
la = arguments.la;
|
||||
vbose = arguments.verbose;
|
||||
check = arguments.check;
|
||||
niter = arguments.iter;
|
||||
if (strcmp(algo,"p2p")*strcmp(algo,"p2p-i-la")*strcmp(algo,"bcast") != 0) {
|
||||
printf("Wrong value for algo, only p2p, p2p-i-la and p2p-bcast authorized\n");
|
||||
return 1;
|
||||
}
|
||||
if (b < 0) { printf("Wrong value for B, should be non-negative\n"); return 1; }
|
||||
if (m%b != 0) { printf("M should be divisible by B\n"); return 1; }
|
||||
if (n%b != 0) { printf("N should be divisible by B\n"); return 1; }
|
||||
if (k%b != 0) { printf("K should be divisible by B\n"); return 1; }
|
||||
if (niter < 0) { printf("Wrong value for niter, should be non-negative\n"); return 1; }
|
||||
|
||||
get_host_name(hostname,1024);
|
||||
init_trace();
|
||||
// openblas_set_num_threads(1);
|
||||
srand(time(NULL));
|
||||
|
||||
MPI_Init(NULL,NULL);
|
||||
int size, rank;
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &size);
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
|
||||
if (vbose)
|
||||
//printf("I am the %d-th node in a world of size %d\n", rank, size);
|
||||
printf("%s is the %d-th node in a world of size %d\n", hostname, rank, size);
|
||||
if (p*q != size) {
|
||||
printf("bad world size (p*q != size)\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
// this initialization could probably get better
|
||||
Matrix A = (Matrix){0}, B = (Matrix){0}, C = (Matrix){0};
|
||||
Matrix wA = (Matrix){0},wB = (Matrix){0},wC = (Matrix){0}, bwC = (Matrix){0};
|
||||
Matrix bA = (Matrix){0},bB = (Matrix){0},bC = (Matrix){0};
|
||||
if (vbose)
|
||||
printf("[%s] m,n,k = %d,%d,%d | b = %d | pxq = %dx%d | la = %d \n",
|
||||
hostname, m,n,k, b, p,q, la);
|
||||
// printf("[%d] m,n,k = %d,%d,%d | b = %d | pxq = %dx%d | la = %d \n",
|
||||
// rank, m,n,k, b, p,q, la);
|
||||
//err = dsmat_fill_v(&A, m, k, b, p, q, "A", 1.0f);
|
||||
//err = dsmat_fill_v(&B, k, n, b, p, q, "B", 1.0f);
|
||||
err = dsmat_fill_s(&A, m, k, b, p, q, "A");
|
||||
err = dsmat_fill_s(&B, k, n, b, p, q, "B");
|
||||
//err = dsmat_fill(&A, m, k, b, p, q, "A");
|
||||
//err = dsmat_fill(&B, k, n, b, p, q, "B");
|
||||
err = dsmat_fill_v(&C, m, n, b, p, q, "C", 0.0f);
|
||||
err = MPI_Barrier(MPI_COMM_WORLD);
|
||||
if (err != MPI_SUCCESS) return 1;
|
||||
|
||||
for (iter = 0; iter < niter; iter++) {
|
||||
err = dsmat_copy(&wA,&A);
|
||||
err = dsmat_copy(&wB,&B);
|
||||
err = dsmat_copy(&wC,&C);
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
d_start = MPI_Wtime();
|
||||
if (strcmp(algo,"p2p") == 0) {
|
||||
err = pgemm_p2p(check,p,q,m,n,k,&wA,&wB,&wC);
|
||||
// } else if (strcmp(algo,"p2p-i") == 0) {
|
||||
// err = pgemm_p2p_i(p,q,m,n,k,&wA,&wB,&wC);
|
||||
} else if (strcmp(algo,"p2p-i-la") == 0) {
|
||||
err = pgemm_p2p_i_la(check,p,q,la,m,n,k,&wA,&wB,&wC);
|
||||
} else if (strcmp(algo,"bcast") == 0) {
|
||||
err = pgemm_bcast(check,p,q,m,n,k,&wA,&wB,&wC);
|
||||
}
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
d_stop = MPI_Wtime();
|
||||
gflops_gemm(m,n,k, d_stop - d_start, &gflops);
|
||||
if (rank == 0) {
|
||||
//printf("[%d] (%s) measured_wtime = %fs (la=%d) | %f Gflop/s\n", rank, algo, d_stop - d_start, la, gflops);
|
||||
printf("[%s] (%s) measured_wtime = %fs (la=%d) | %f Gflop/s\n", hostname, algo, d_stop - d_start, la, gflops);
|
||||
}
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
|
||||
if (check) {
|
||||
err = dsmat_copy_to(&bwC,&wC,0,"bwC","wC");
|
||||
err = dsmat_copy_to( &bA, &A,0, "bA", "A");
|
||||
err = dsmat_copy_to( &bB, &B,0, "bB", "B");
|
||||
err = dsmat_copy_to( &bC, &C,0, "bC", "C");
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
if (rank == 0) {
|
||||
if (vbose) {
|
||||
block_print(bwC.blocks[0][0].c, m, n, algo);
|
||||
block_print( bA.blocks[0][0].c, m, k, "gA");
|
||||
block_print( bB.blocks[0][0].c, k, n, "gB");
|
||||
block_print( bC.blocks[0][0].c, m, n, "gC");
|
||||
}
|
||||
t = clock();
|
||||
cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m,n,k,
|
||||
1.0f, bA.blocks[0][0].c, k, bB.blocks[0][0].c, n,
|
||||
0.0f, bC.blocks[0][0].c, n);
|
||||
t = clock() - t;
|
||||
time_taken = ((double)t/CLOCKS_PER_SEC);
|
||||
gflops_gemm(m,n,k, time_taken, &gflops);
|
||||
//printf("[%d] (g) measured_wtime = %fs | %f Gflop/s\n", rank, time_taken, gflops);
|
||||
printf("[%s] (g) measured_wtime = %fs | %f Gflop/s\n", hostname, time_taken, gflops);
|
||||
if (vbose)
|
||||
block_print(bC.blocks[0][0].c, m, n, "gresC");
|
||||
myblas_sgepxy(-1.0,bC.blocks[0][0].c,bwC.blocks[0][0].c, m,n);
|
||||
float nrm = cblas_snrm2(m*n,bwC.blocks[0][0].c,1);
|
||||
if (nrm < DBL_EPSILON) printf("GEMM is correct (%12.5e)\n",nrm);
|
||||
else printf("algorithm is not GEMM by %12.5e\n", nrm);
|
||||
}
|
||||
err = MPI_Barrier(MPI_COMM_WORLD);
|
||||
err = dsmat_destroy(&bwC,"bwC");
|
||||
err = dsmat_destroy( &bA,"bA");
|
||||
err = dsmat_destroy( &bB,"bB");
|
||||
err = dsmat_destroy( &bC,"bC");
|
||||
}
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
err = dsmat_destroy(&wA,"wA");
|
||||
err = dsmat_destroy(&wB,"wB");
|
||||
err = dsmat_destroy(&wC,"wC");
|
||||
}
|
||||
err = MPI_Barrier(MPI_COMM_WORLD);
|
||||
err = dsmat_destroy(&A,"A");
|
||||
err = dsmat_destroy(&B,"B");
|
||||
err = dsmat_destroy(&C,"C");
|
||||
if (vbose)
|
||||
printf("[%s] matrices destroyed (%d) \n", hostname, err);
|
||||
//printf("[%d] matrices destroyed (%d) \n", rank, err);
|
||||
return MPI_Finalize();
|
||||
}
|
74
TP2/src/test.c
Normal file
74
TP2/src/test.c
Normal file
|
@ -0,0 +1,74 @@
|
|||
#include <mpi.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
#include <float.h>
|
||||
#include <cblas.h>
|
||||
#include <time.h>
|
||||
#include "utils.h"
|
||||
#include "dsmat.h"
|
||||
#include "gemms.h"
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
|
||||
int p, q;
|
||||
int m,n,k,b;
|
||||
int i,j,l,la;
|
||||
int err, iter, niter;
|
||||
double d_start, d_stop; // on multiple nodes
|
||||
clock_t t; // on one node
|
||||
double time_taken, gflops;
|
||||
int node,tag;
|
||||
long unsigned int total_us;
|
||||
char name[100];
|
||||
char * algo;
|
||||
int vbose, check;
|
||||
MPI_Status status;
|
||||
|
||||
m = 2;
|
||||
n = 4;
|
||||
k = 4;
|
||||
b = 2;
|
||||
p = 1;
|
||||
q = 2;
|
||||
|
||||
// openblas_set_num_threads(1);
|
||||
srand(time(NULL));
|
||||
|
||||
MPI_Init(NULL,NULL);
|
||||
int world_size, world_rank;
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
|
||||
printf("I am the %d-th node in a world of size %d\n", world_rank, world_size);
|
||||
if (p*q != world_size) {
|
||||
printf("bad world size\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
err = MPI_Barrier(MPI_COMM_WORLD);
|
||||
if (err != MPI_SUCCESS) return 1;
|
||||
// this initialization could probably get better
|
||||
Matrix A = (Matrix){0},B = (Matrix){0},C = (Matrix){0};
|
||||
Matrix bA = (Matrix){0},bB = (Matrix){0},bC= (Matrix){0};
|
||||
Matrix wA = (Matrix){0},wB = (Matrix){0},wC= (Matrix){0}, bwC= (Matrix){0};
|
||||
printf("[%d] m,n,k = %d,%d,%d | b = %d | pxq = %dx%d | la = %d | test %f \n",
|
||||
world_rank, m,n,k, b, p,q, la, 1.0f);
|
||||
err = dsmat_fill_s(&A, m, k, b, p, q, "A");
|
||||
err = MPI_Barrier(MPI_COMM_WORLD);
|
||||
if (err != MPI_SUCCESS) return 1;
|
||||
|
||||
err = dsmat_copy(&wA,&A);
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
err = dsmat_copy_to(&wC,&A,0,"wC","A");
|
||||
printf("%d ] dsmat_copy_to.err = %d\n", world_rank, err);
|
||||
err = dsmat_destroy(&wA,"wA");
|
||||
err = dsmat_copy(&wA,&A);
|
||||
err = dsmat_destroy(&wA,"wA");
|
||||
err = dsmat_copy(&wA,&A);
|
||||
err = dsmat_destroy(&wC,"wC");
|
||||
err = dsmat_destroy(&A,"A");
|
||||
err = MPI_Barrier(MPI_COMM_WORLD);
|
||||
printf("[%d] matrices destroyed (%d) \n", world_rank, err);
|
||||
return MPI_Finalize();
|
||||
}
|
85
TP2/src/utils.c
Normal file
85
TP2/src/utils.c
Normal file
|
@ -0,0 +1,85 @@
|
|||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include <cblas.h>
|
||||
//#include <time.h>
|
||||
#include <sys/time.h>
|
||||
#include "utils.h"
|
||||
|
||||
void val_mat(int m, int n, float* mat, float val) {
|
||||
int i,j;
|
||||
for(i = 0; i<m; i++) {
|
||||
for(j = 0; j<n; j++) {
|
||||
mat[i*n+j] = val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void rand_mat(int m, int n, float* mat, float max) {
|
||||
int i,j;
|
||||
for(i = 0; i<m; i++)
|
||||
for(j = 0; j<n; j++)
|
||||
mat[i*n+j] = ((float)rand()/RAND_MAX) * max;
|
||||
}
|
||||
|
||||
// unused, more as a reminder
|
||||
int item_2d(int i, int j, int m, int n) {
|
||||
return i*n + j;
|
||||
}
|
||||
|
||||
long unsigned int time_interval(struct timeval start,struct timeval stop) {
|
||||
return abs( (stop.tv_sec - start.tv_sec) * 1000000 + stop.tv_usec - start.tv_usec );
|
||||
}
|
||||
|
||||
void print_gflops(struct timeval stop, struct timeval start, int m, int n, int k, int b, int kernel, char* name, float * c) {
|
||||
long unsigned int total_us = time_interval(start, stop);
|
||||
printf("flops%f\n",2.0*m*n*k);
|
||||
printf("s = %f\n",total_us*pow(10,3));
|
||||
float gflops = fabs( 2.0*m*n*k/(total_us*pow(10,3)) );
|
||||
printf("gflops = %f\n",gflops);
|
||||
float nrm;
|
||||
if (c == NULL) { nrm = -1.0; } else { nrm = cblas_snrm2(m*n, c, 1); }
|
||||
printf("%s took %lu µs => %f Gflop/s check: %f (block:%d, kernel:%d)\n", name, total_us, gflops, nrm, b, kernel);
|
||||
printf("CSV %d,%d,%d,%d,%d,%s,%ld,%f\n", m,n,k,b,kernel,name,total_us,gflops);
|
||||
}
|
||||
|
||||
void print_mat(float* a, int m, int n, char* name) {
|
||||
int i,j;
|
||||
for (i = 0; i < m ; i++) {
|
||||
for (j = 0; j < n ; j++) {
|
||||
printf("%s[%d,%d] = %f,",name,i,j,a[n*i+j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
// b = alpha*a + b
|
||||
void myblas_sgepxy(float alpha, float* a, float* b, int m, int n) {
|
||||
int i;
|
||||
for (i = 0; i < m ; i++) {
|
||||
cblas_saxpy(n,alpha,&a[n*i],1,&b[n*i],1);
|
||||
}
|
||||
}
|
||||
|
||||
void node_coordinates(int p, int q, int node, int* coordinates) {
|
||||
// node = q * c[0] + c[1]
|
||||
coordinates[1] = node % q;
|
||||
coordinates[0] = (node - coordinates[1])/q;
|
||||
}
|
||||
|
||||
void node_coordinates_2i(int p, int q, int node, int* my_row, int* my_col) {
|
||||
// node = q * my_row + my_col
|
||||
*my_col = node % q;
|
||||
*my_row = (node - *my_col)/q;
|
||||
}
|
||||
|
||||
int get_node(int p, int q, int i, int j) {
|
||||
return q*(i%p) + (j%q);
|
||||
}
|
||||
|
||||
// cf stackoverflow (https://stackoverflow.com/questions/504810/how-do-i-find-the-current-machines-full-hostname-in-c-hostname-and-domain-info)
|
||||
void get_host_name(char* hostname, int buffer_size) {
|
||||
hostname[buffer_size - 1] = '\0';
|
||||
gethostname(hostname, buffer_size - 1);
|
||||
}
|
34
TP2/src/utils.h
Normal file
34
TP2/src/utils.h
Normal file
|
@ -0,0 +1,34 @@
|
|||
#ifndef PROGPARALLEL_UTILS_H
|
||||
#define PROGPARALLEL_UTILS_H
|
||||
|
||||
#define max(a,b) \
|
||||
({ __typeof__ (a) _a = (a); \
|
||||
__typeof__ (b) _b = (b); \
|
||||
_a > _b ? _a : _b; })
|
||||
|
||||
// fill the content of mat with values val
|
||||
void val_mat(int m, int n, float* mat, float val);
|
||||
// fill the content of mat with random values from 0.0 to max
|
||||
void rand_mat(int m, int n, float* mat, float max);
|
||||
// b = alpha*a + b
|
||||
void myblas_sgepxy(float alpha, float* a, float* b, int m, int n);
|
||||
|
||||
// return the time between start and stop in µs
|
||||
long unsigned int time_interval(struct timeval start,struct timeval stop);
|
||||
// deprecated
|
||||
void print_gflops(struct timeval stop, struct timeval start, int m, int n, int k, int b, int kernel, char* name, float * c);
|
||||
// print the content of a[0:m-1,0;n-1] with given name
|
||||
void print_mat(float* a, int m, int n, char* name);
|
||||
|
||||
// fill coordinates according to node = q * coordinates[0] + coordinates[1]
|
||||
void node_coordinates(int p, int q, int node, int* coordinates);
|
||||
// fill my_row/col according to node = q * my_row + my_col
|
||||
void node_coordinates_2i(int p, int q, int node, int* my_row, int* my_col);
|
||||
// return the owner node of a block A_i,j on a p x q grid.
|
||||
int get_node(int p, int q, int i, int j);
|
||||
// return i*n +j;
|
||||
int item_2d(int i, int j, int m, int n);
|
||||
|
||||
// get the name of the machine
|
||||
void get_host_name(char* hostname, int buffer_size);
|
||||
#endif
|
23
TP2/src/who_am_i.c
Normal file
23
TP2/src/who_am_i.c
Normal file
|
@ -0,0 +1,23 @@
|
|||
#include <stdio.h>
|
||||
#include <mpi.h>
|
||||
|
||||
int main( int argc, char *argv[] ) {
|
||||
|
||||
int rank, size;
|
||||
int l;
|
||||
char name[MPI_MAX_PROCESSOR_NAME];
|
||||
|
||||
//MPI_Init (&argc, &argv); /* starts MPI */
|
||||
MPI_Init (NULL, NULL); /* starts MPI */
|
||||
|
||||
MPI_Comm_rank (MPI_COMM_WORLD, &rank); /* get current process id */
|
||||
MPI_Comm_size (MPI_COMM_WORLD, &size); /* get number of processes */
|
||||
|
||||
MPI_Get_processor_name(name, &l); /* get processor name */
|
||||
|
||||
printf("Hello world from process %d of %d on processor named %s\n", rank, size, name);
|
||||
|
||||
MPI_Finalize();
|
||||
|
||||
return 0;
|
||||
}
|
BIN
TP2/subject_mpi.pdf
Normal file
BIN
TP2/subject_mpi.pdf
Normal file
Binary file not shown.
53
TP2/utils.sh
Executable file
53
TP2/utils.sh
Executable file
|
@ -0,0 +1,53 @@
|
|||
TOOLS_DIR=/mnt/n7fs/ens/tp_guivarch/opt2021
|
||||
SIMGRID_DIR=$TOOLS_DIR/simgrid-3.31
|
||||
VITE_DIR=$TOOLS_DIR/vite
|
||||
|
||||
export PATH=${SIMGRID_DIR}/bin:${PATH}
|
||||
|
||||
# for check and bench
|
||||
|
||||
tmp=$HOME/tmp_simgrid
|
||||
mkdir -p $tmp
|
||||
my_mpirun="$SIMGRID_DIR/bin/smpirun -trace --cfg=smpi/tmpdir:$tmp"
|
||||
traces="traces"
|
||||
exec=build/bin/main
|
||||
|
||||
generate_hostfile() {
|
||||
N=${1:-4}
|
||||
mkdir -p hostfiles
|
||||
rm -f hostfiles/hostfile.$N.txt
|
||||
for i in $(seq 1 $N); do
|
||||
echo node-${i}.simgrid.org >>hostfiles/hostfile.$N.txt
|
||||
done
|
||||
}
|
||||
|
||||
run() {
|
||||
human=${1:-0}
|
||||
mkdir -p $out
|
||||
echo $my_mpirun $mpi_options ${exec:-build/bin/main} -m $m -k $k -n $n -b $b -a $algo -p $p -q $q -i $iter $options
|
||||
$my_mpirun $mpi_options ${exec:-build/bin/main} -m $m -k $k -n $n -b $b -a $algo -p $p -q $q -i $iter $options &>$out/$algo.out
|
||||
echo reading $out/$algo.out
|
||||
correct=$(grep -i "gemm is correct" "$out/$algo.out" | wc -l)
|
||||
trial=$(grep "Gflop/s" $out/$algo.out | grep $algo | wc -l)
|
||||
echo Found $correct correct GEMM out of $trial
|
||||
|
||||
while read line; do
|
||||
# [0] (p2p) measured_wtime = 0.000058s (la=0) | 0.002195 Gflop/s
|
||||
gflops=$(echo $line | grep -o "| .* Gflop/s" | grep -o "[0-9]\\+.[0-9]\\+")
|
||||
if [ $human -eq 0 ]; then
|
||||
echo "$m,$k,$n,$b,$p,$q,$algo,$la,$gflops"
|
||||
else
|
||||
echo "mxnxk=${m}x${n}x${k},b=$b,p x q = $p x $q | using $algo, (lookahead:$la) => $gflops Gflop/s"
|
||||
fi
|
||||
echo "$m,$k,$n,$b,$p,$q,$algo,$la,$gflops" >>$csv
|
||||
done < <(grep "Gflop/s" $out/$algo.out | grep $algo)
|
||||
|
||||
if [ $la -gt 0 ]; then
|
||||
algo=$algo-$la
|
||||
fi
|
||||
|
||||
mkdir -p $traces
|
||||
mv -f smpi_simgrid.trace $traces/$algo.trace
|
||||
echo You can open $traces/$algo.trace with $VITE_DIR/build/bin/vite
|
||||
echo
|
||||
}
|
Loading…
Reference in a new issue