#include #include #include "trace.h" #include "common.h" #include /* This struct defines a task; it can be a panel (Task.type==PNL), an update (Task.type==UPD) or a termination message (Task.type==END). If Task.type==PNL, then the panel operation has to be executed on column Task.p. If Task.type==UPD the the update operation has to be executed on column Task.u using column Task.p.*/ typedef struct taskstruct{ Type type; int p; int u; } Task; /* This struct defines a progress table (see details below). */ typedef struct prog_table_struct{ int NB; int *table; } ProgTable; Task fetch_task( ProgTable ptable ); /* This routine performs a parallel LU factorization based on the dependencies among operations. These dependencies are represented in a progress table of the type ProgTable above. This progress table is nothing more than an array of size NB (number of block-columns in the matrix) where each coefficient tracks the status of a block-column. Precisely, ProgTabl.table[j]=i means that block-column j is up-to-date with respect to the panel operation i, i.e., the operation update(i,j) has bee executed. Equivalently ProgTable[i]=i means that the operation panel(i) has been already executed. Two main rules define the order in which operations have to be executed: 1) panel(i) can only be executed after update(0,i), update(1,i),...,update(i-1,i), i.e., only if ProgTable[i]=i-1. 2) update(i,j) can only be executed after panel(i) and after update(1,i),...,update(i-1,i), i.e., only if ProgTable[j]=i-1 and ProgTable[i]=i. This code works as follows: each thread enters an "endless" loop where at each iteration it performs the following steps: 1) calls the fetch_task. This routine analyses the progress table looking for operations to perform according to the rules above 2) if one operation is found, the corresponding action is executed and the progress table is updated accordingly. Note that in a sequential execution there will always be an operation ready for being executed whereas in parallel this may not be the case. Threads exit from the loop when all operations are performed, i.e., wher ProgTable[NB]=NB (i.e., panel(NB) has been performed). The initial code is sequential. OpenMP directives have to be added in the two routines below to parallelize it. */ void lu_par_dag(Matrix A, info_type info){ ProgTable ptable; Task task; int i; ptable.table = (int*)malloc(info.NB*sizeof(int)); ptable.NB = info.NB; for(i=0; i