Multiplicação Matriz-Vector
Solução II
Yi = Aij Xj
N
> A divida por colunas
( N/P multiplicações + (N/P-1) somas )
Soma Colectiva
> X dividido por blocos
N = k*P
Y calculado em
~ 2N2 Tvf / P
=
0
1
2
3
4
+ comunicações
Exercício
Implemente a multiplicação de uma matriz por um vector usando a
decomposição discutida no slide anterior.
int MPI_Allreduce ( void *sendbuf, void *recvbuf, int count,
MPI_Datatype datatype,
MPI_Op op,
MPI_Comm comm )
Tipos Básicos pré-definidos
Derived Data Types
M
P
I_
B
Y
T
E
M
P
I_
C
H
A
R.............................s
ig
n
e
dc
h
a
r
M
P
I_
D
O
U
B
L
E........................d
o
u
b
le
M
P
I_
F
L
O
A
T...........................flo
a
t
M
P
I_
IN
T................................. in
t
M
P
I_
L
O
N
G.............................lo
n
g
M
P
I_
L
O
N
G
_
L
O
N
G
_
IN
T.......lo
n
glo
n
g
M
P
I_
L
O
N
G
_
D
O
U
B
L
E...........lo
n
gd
o
u
b
le
M
P
I_
P
A
C
K
E
D
M
P
I_
S
H
O
R
T...........................s
h
o
rt
M
P
I_
U
N
S
IG
N
E
D
_
C
H
A
R.......u
n
s
ig
n
e
dc
h
a
r
M
P
I_
U
N
S
IG
N
E
D....................u
n
s
ig
n
e
din
t
M
P
I_
U
N
S
IG
N
E
D
_
L
O
N
G.......u
n
s
ig
n
e
dlo
n
g
M
P
I_
U
N
S
IG
N
E
D
_
S
H
O
R
T.....u
n
s
ig
n
e
ds
h
o
rt
 Dados não estão em endereços de memória sucessivos
 Dados não são todos do mesmo tipo
Recurso ao envio de várias mensagens e/ou buffers temporários
maiores overheads
Envio de uma Submatriz
for (i=0; i<n; ++i)
MPI_Send( &a[shift_linha + i][shift_coluna], m, MPI_DOUBLE,
dest, tag, MPI_COMM_WORLD);
PACKING utilização de um buffer contíguo definido pelo utilizador
MPI_PACKED = tipo de dados a enviar
MPI_Pack(void *inbuf, int incount, MPI_Datatype datatype,
void *outbuf, int outcount, int *position, MPI_Comm comm)
MPI_Pack_size(int incount, MPI_Datatype datatype, MPI_Comm comm,
int *sizee)
MPI_Pack_size( n*m, MPI_DOUBLE, MPI_COMM_WORLD, buffersize);
buffer = malloc( (unsigned) buffersize );
conta = 0;
for (i=1; i <= n; i++){
MPI_Pack(A[shift_linha+i][shift_coluna], m, MPI_DOUBLE,
buffer, buffersize, count, MPI_COMM_WORLD);
conta++; }
MPI_Send(buffer, count, MPI_PACKED, dest, tag, MPI_COMM_WORLD);
MPI_Recv(buffer, length, MPI_PACKED, source, tag,
MPI_COMM_WORLD, &status);
posicao = 0;
for (i=1; i <= n; i++){
MPI_Unpack( buffer, length, &posicao,
&A[shift_linha+i][shift_coluna], m, MPI_DOUBLE,
MPI_COMM_WORLD);
posicao++; }
MPI_Unpack(void *inbuf, int insize, int *position,
void *outbuf, int outcount, MPI_Datatype datatype,
MPI_Comm comm)
Packing on the fly
elimina a operação de packing e a necessidade de
utilização de um buffer intermédio
i) Contrução do derived datatype: MPI_Type_contiguous, MPI_Type_vector,
MPI_Type_hvector, MPI_Type_indexed, MPI_Type_hindexed,
MPI_Type_struct
ii) commit o datatype: MPI_Type_commit
MPI_Type_free(datatype)
datatype = MPI_DATATYPE_NULL
MPI_Type_contiguous( int count, MPI_Datatype oldtype,
MPI_Datatype *newtype)
MPI_Type_commit( MPI_Datatype *datatype)
MPI_Type_free(MPI_Datatype *datatype)
double a[size][size], b[size];
MPI_Datatype linha;
...
MPI_Type_contiguous( size, MPI_DOUBLE, &linha);
MPI_Type_commit(&linha);
...
MPI_Send(&a[i][0], 1, linha, ... );
...
MPI_Recv( b, size, MPI_DOUBLE, ... );
...
MPI_Type_free(&linha);
MPI_Type_vector( int count, int blocklength, int stride,
MPI_Datatype oldtype, MPI_Datatype *newtype )
stride
Count = 3
blocklength
double a[size][size], b[size];
Datatype coluna;
...
MPI_Type_vector( size, 1, size, MPI_DOUBLE, &coluna);
MPI_Type_commit(&coluna);
...
MPI_Send( &a[0][i], 1, coluna, ... );
MPI_Recv( b, size, MPI_DOUBLE, ...
...
MPI_Type_free(&coluna);
MPI_Type_indexed( int count, int *array_of_blocklengths,
int *array_of_displacements,
MPI_DATAtyoe oldtype, MPI_Datatype *newtype
int blocklengths[2], displacements[2];
double a[16], b[6];
MPI_Datatype indexado;
blocklengths[0] = 4;
blocklengths[1] = 2;
displacements[0] = 5;
displacements[1] = 12;
...
MPI_Type_indexed(2, blocklengths, displacements, MPI_DOUBLE,
&indexado);
MPI_Type_commit(&indexado);
...
MPI_Send(a, 1, indexado, ...); /* envia a[5]-a[8] + a[12]-a[13] */
...
MPI_Recv( b, 6, MPI_DOUBLE, ... )
MPI_Type_struct( int count, int *array_of_blocklengths,
MPI_Aint *array_of_displacements,
MPI_Datatype *array_of_types,
MPI_Datatype *newtype);
MPI_Type_get_extent( MPI_Datatype datatype, MPI_Aint *lb,
MPI_Aint *extent );
lb = lower bound = mínimo dos displacements
typedef long MPI_Aint;
extent = onde está o endereço do próximo bloco de dados
Typedef struct{ double x, y, z; double velocidade; int n, tipo; } particula;
particula p[NElementos], particulas[NElementos];
MPI_Datatype tipoparticula, oldtypes[2];
int blockounts[2];
MPI_Aint offsets[2], extent, lb;
/* coordenadas e velocidade */
offsets[0] = 0; oldtypes[0] = MPI_DOUBLE; blockcounts[0] = 4;
MPI_Type_get_extent( MPI_DOUBLE, &lb, &extent);
/* incluir o número e o tipo */
offsets[1] = 4*extent; oldtypes[1] = MPI_INT; blockcounts[1] = 2;
MPI_Type_struct( 2, blockcounts, offsets, oldtypes, &tipoparticula );
MPI_Type_commit( &tipoparticula );
MPI_Send(particulas, Nelementos, tipoparticula, ... )
MPI_Recv(p, Nelementos, tipoparticula, ... )