矩阵相乘的并行算法的设计与实现.docx

资源描述

矩阵相乘的并行算法的设计与实现.docx

《矩阵相乘的并行算法的设计与实现.docx》由会员分享，可在线阅读，更多相关《矩阵相乘的并行算法的设计与实现.docx（11页珍藏版）》请在冰豆网上搜索。

矩阵相乘的并行算法的设计与实现.docx

矩阵相乘的并行算法的设计与实现

仲恺农业工程学院实验报告纸

计算机科学与工程学院（院、系）网络工程专业083班组并行计算应用试验课

学号：

200810224311姓名：

李志冬实验日期：

2011-05-19教师评定

实验三矩阵相乘的并行算法的设计与实现

一、实验目的

理解和掌握矩阵相乘的并行算法的设计思想以及实现原理

二、实验内容

编译和运行一个两矩阵相乘算法的并行程序

三、实验步骤

1使用vi编辑器输入并行计算的代码，保存在multi.c中

#include

#include"mpi.h"

#defineNRA62

#defineNCA15

#defineNCB7

#defineMASTER0

#defineFROM_MASTER1

#defineFROM_WORKER2

MPI_Statusstatus;

intmain（intargc,char*argv[]）

{

intnumtasks,

taskid,

numworkers,

source,

dest,

nbytes,

mtype,

intsize,

dbsize,

rows,

averow,extra,offset,

i,j,k,

count;

doublea[NRA][NCA],b[NCA][NCB],c[NRA][NCB];

intsize=sizeof（int）;

dbsize=sizeof（double）;

MPI_Init（&argc,&argv）;

MPI_Comm_rank（MPI_COMM_WORLD,&taskid）;

MPI_Comm_size（MPI_COMM_WORLD,&numtasks）;

numworkers=numtasks-1;

if（taskid==MASTER）{

printf（"Numberofworkertasks=%d\n",numworkers）;

for（i=0;i

for（j=0;j

a[i][j]=i+j;

for（i=0;i

for（j=0;j

b[i][j]=i*j;

averow=NRA/numworkers;

extra=NRA%numworkers;

offset=0;

mtype=FROM_MASTER;

for（dest=1;dest<=numworkers;dest++）{

rows=（dest<=extra）?

averow+1:

averow;

printf（"sending%drowstotask%d\n",rows,dest）;

MPI_Send（&offset,1,MPI_INT,dest,mtype,MPI_COMM_WORLD）;

MPI_Send（&rows,1,MPI_INT,dest,mtype,MPI_COMM_WORLD）;

count=rows*NCA;

MPI_Send（&a[offset][0],count,MPI_DOUBLE,dest,mtype,MPI_COMM_WORLD）;

count=NCA*NCB;

MPI_Send（&b,count,MPI_DOUBLE,dest,mtype,MPI_COMM_WORLD）;

offset=offset+rows;

}

mtype=FROM_WORKER;

for（i=1;i<=numworkers;i++）{

source=i;

MPI_Recv（&offset,1,MPI_INT,source,mtype,MPI_COMM_WORLD,&status）;

MPI_Recv（&rows,1,MPI_INT,source,mtype,MPI_COMM_WORLD,&status）;

count=rows*NCB;

MPI_Recv（&c[offset][0],count,MPI_DOUBLE,source,mtype,MPI_COMM_WORLD,&status）;

}

printf（"Hereistheresultmatrix\n"）;

for（i=0;i

printf（"\n"）;

for（j=0;j

printf（"%6.2f",c[i][j]）;

}

printf（"\n"）;

}

if（taskid>MASTER）{

mtype=FROM_MASTER;

source=MASTER;

printf（"Master=%d,mtype=%d\n",source,mtype）;

MPI_Recv（&offset,1,MPI_INT,source,mtype,MPI_COMM_WORLD,&status）;

printf（"offset=%d\n",offset）;

MPI_Recv（&rows,1,MPI_INT,source,mtype,MPI_COMM_WORLD,&status）;

printf（"rows=%d\n",rows）;

count=rows*NCA;

MPI_Recv（&a,count,MPI_DOUBLE,source,mtype,MPI_COMM_WORLD,&status）;

printf（"a[0][0]=%e\n",a[0][0]）;

count=NCA*NCB;

MPI_Recv（&b,count,MPI_DOUBLE,source,mtype,MPI_COMM_WORLD,&status）;

printf（"b=\n"）;

for（k=0;k

for（i=0;i

c[i][k]=0.0;

for（j=0;j

c[i][k]=c[i][k]+a[i][j]*b[j][k];

}

mtype=FROM_WORKER;

printf（"aftercomputer\n"）;

MPI_Send（&offset,1,MPI_INT,MASTER,mtype,MPI_COMM_WORLD）;

MPI_Send（&rows,1,MPI_INT,MASTER,mtype,MPI_COMM_WORLD）;

MPI_Send（&c,rows*NCB,MPI_DOUBLE,MASTER,mtype,MPI_COMM_WORLD）;

printf（"aftersend\n"）;

}

MPI_Finalize（）;

return0;

}

2编译multi.c

mpicc–omulti.omulti.c

3启动mpd后台程序

mpd&

4在单机上运行multi.o

mpirun–np10./multi.o

5在多台计算机上运行multi.o

（1）编辑并行计算的主机文件nodelist

node1:

node2:

node3:

（2）运行并行计算程序

mpirun-machinefilenodelist-np2./multi.o

四、实验结果

单机运行结果：

Master=0,mtype=1

Numberofworkertasks=9

sending7rowstotask1

sending7rowstotask2

sending7rowstotask3

sending7rowstotask4

sending7rowstotask5

sending7rowstotask6

sending7rowstotask7

sending7rowstotask8

sending6rowstotask9

Master=0,mtype=1

offset=0

rows=7

a[0][0]=0.000000e+00

aftercomputer

aftersend

Master=0,mtype=1

offset=7

rows=7

a[0][0]=7.000000e+00

aftercomputer

aftersend

Master=0,mtype=1

offset=14

rows=7

a[0][0]=1.400000e+01

aftercomputer

aftersend

Master=0,mtype=1

offset=28

rows=7

a[0][0]=2.800000e+01

aftercomputer

aftersend

Master=0,mtype=1

offset=56

rows=6

a[0][0]=5.600000e+01

aftercomputer

aftersend

Master=0,mtype=1

offset=21

rows=7

a[0][0]=2.100000e+01

aftercomputer

aftersend

Master=0,mtype=1

offset=49

rows=7

a[0][0]=4.900000e+01

aftercomputer

aftersend

offset=35

rows=7

a[0][0]=3.500000e+01

aftercomputer

aftersend

offset=42

rows=7

a[0][0]=4.200000e+01

aftercomputer

aftersend

Hereistheresultmatrix

双机运行的结果：

Numberofworkertasks=1

sending62rowstotask1

Master=0,mtype=1

offset=0

rows=62

a[0][0]=0.000000e+00

aftercomputer

aftersend

Hereistheresultmatrix

计算出来的矩阵与单机的相同

五、实验心得

本次实验是通过程序生成两个静态的矩阵，然后计算出两个矩阵的乘机。

通过查看程序可知，通过数组生成矩阵之后采用两台主机同时并行计算出矩阵的乘积，这涉及到了机群之间的通信，通过通信才能协作进行海量数据的计算。

展开阅读全文