完整版SAS程序汇总.docx
《完整版SAS程序汇总.docx》由会员分享,可在线阅读,更多相关《完整版SAS程序汇总.docx(42页珍藏版)》请在冰豆网上搜索。
完整版SAS程序汇总
SAS基础
创建数据集
其他软件产生的标准格式文件与SAS数据集之间的互相转换。
libnamec"F:
\郑亚_32620151151279";
Procimportdatafile='E:
\data1_1.xls'dbms=excelreplaceout=c.data1_1_32620151151279;
/*sheet=”sheet1$”*/
Run;
从外部文件(文本文件)读取数据
DATAb;
INFILE'E:
\SYS.TXT';
INPUTIDGENDER$AGEBLOOD$SURT;
RUN;
DATAA;
INPUTIDGENDER$AGEBLOOD$SURT;
/*ID等为变量名,$表示其前面的那个变量为字符型变量*/
DATALINES;/*引出数据行*/
1M41A368
2M26B745
3F35B401
4M47AB552
5F37A478
6F39O628
7M28O549
8M31B128
9M43AB463
10M29A512
;
ODSHTMLFILE='E:
\RESULT.xls';
/*将以下SAS输出结果存放到EXCEL文件中*/
PROCPRINTDATA=A;
RUN;
ODSHTMLCLOSE;/*关闭对外输出系统*/
PROCFORMAT;
VALUEGE1='M'2='F';/*定义值标签*/
VALUEBL1='A'2='B'3='AB'4='O';
DATAA;
INPUTIDGENDERAGEBLOODSURT;
DATALINES;
11411368
21262745
32352401
41473552
52371478
62394628
71284549
81312128
91433463
101291512
;
RUN;
ODSHTMLFILE='E:
\RESULT.xls';
PROCPRINTDATA=A;
FORMATGENDERGE.;/*借用值标签*/
FORMATBLOODBL.;
RUN;
ODSHTMLCLOSE;
排序
Procsortdata=aout=b;byage;
Run;
Procsortdata=aout=c;bydescendingage;
Run;
计算产生新变量
DATAB;SETA;
打开数据集B,并从数据集A读入数据。
DATAD;
FORMATSURT_Y5.3;
SETA;
SURT_Y=SURT/365;
IFAGE>40THENAGROUP=1;
ELSEAGROUP=2;
RUN;
数据集的拆分
DATAA;
INPUTIDGENDER$AGEBLOOD$SURT;
DATALINES;/*引出数据行*/
1M41A368
2M26B745
3F35B401
4M47AB552
5F37A478
6F39O628
7M28O549
8M31B128
9M43AB463
10M29A512
;DATABC;
SETA;
IFGENDER='F'THENOUTPUTB;
ELSEOUTPUTC;
RUN;DATAB(DROP=GENDER)C(DROP=BLOOD);
SETA;
RUN;
数据集的合并
纵向连接
datast1;
inputxy@@;/*@@表示连续读入数据*/
cards;
120240;
procprint;
datast2;
inputxy@@;
cards;
3804160;
procprint;
datast;
setst1st2;
procprint;
run;
横向合并
dataa;
inputnumx@@;
cards;
101121021410315
;
procsortout=aa;
bynum;
datab;
inputnumy@@;
cards;
101100102134103145
;
procsortout=bb;
bynum;
datac;
mergeaabb;
bynum;
procprint;
run;
计量资料的描述
计算几何均数
dataex2_5;
inputxf@@;
y=log10(x);
cards;
104
203
4010
8010
16011
32015
64014
12802
;
procmeansnoprint;
vary;
freqf;
outputout=bmean=logmean;
run;
datac;
setb;
g=10**logmean;
procprintdata=c;
varg;
run;
制作频数表
dataex2_1;
inputx@@;
low=3.07;
dis=0.2;
z=x-mod(x-low,dis);
cards;
3.964.234.423.595.124.024.323.724.764.164.614.26
3.774.204.363.074.893.974.283.644.664.044.554.25
4.633.914.413.525.034.014.304.194.754.144.574.26
4.563.793.894.214.953.984.293.674.694.124.564.26
4.664.283.834.205.244.024.333.764.814.173.963.27
4.614.263.964.233.764.014.293.673.394.124.273.61
4.984.243.834.203.714.034.344.693.624.184.264.36
5.284.214.424.363.664.024.314.833.593.973.964.49
5.114.204.364.543.723.974.284.763.214.044.564.25
4.924.234.473.605.234.024.324.684.763.694.614.26
3.894.214.363.425.014.014.293.684.714.134.574.26
4.035.464.163.644.163.76
;
procfreq;
tablesz;
run;
单变量描述
/*用MEANS过程计算例数、均数、标准差、标准误*/
procmeansdata=ex2_1;
varx;
run;
/*用MEANS过程计算例数、均数、标准差、标准误和95%可信区间*/
procmeansdata=ex2_1nmeanstdstderrcvclm;
varx;
run;
/*用MEANS过程对频数表计算例数、均数、标准差、最小值和最大值*/
dataprg4_4;
inputxf@@;
cards;
3.172
3.373
3.579
3.7714
3.9722
4.1730
4.3721
4.5715
4.7710
4.976
5.174
5.372
;
procmeans;
freqf;
varx;
run;
procmeansmaxdec=2data=prg4_4;
freqf;
varx;
run;
多变量描述
/*用UNIVARIATE过程进行描述性统计*/
procunivariatedata=ex2_1;
freqf;
varx;
run;
/*计算2.5%和97.5%分位数*/
procunivariatedata=ex2_1;
freqf;
varx;
outputout=pctpctlpre=ppctlpts=2.597.5;
run;
procprintdata=pct;
run;
/*作正态性检验、茎叶图、箱式图和正态概率图*/
procunivariatedata=ex2_1
Normal
plot;
varx;
run;
两样本均数的比较
单一总体均数的可信区间
dataprg5_1;
n=10;
mean=166.95;
std=3.64;
t=tinv(0.975,n-1);
in=t*std/sqrt(n);
lclm=mean-in;
uclm=mean+in;
procprint;
varlclmuclm;
run;
两总体均数相差的可信区间
dataprg5_2;
n1=29;
n2=32;
m1=20.10;
m2=16.89;
s1=7.02;
s2=8.46;
ss1=s1**2*(n1-1);
ss2=s2**2*(n2-1);
sc2=(ss1+ss2)/(n1+n2-2);
se=sqrt(sc2*(1/n1+1/n2));
t=tinv(0.975,n1+n2-2);
lclm=(m1-m2)-t*se;
uclm=(m1-m2)+t*se;
procprint;
vartselclmuclm;
run;
单样本均数的t检验
原始数据已知-ttest
dataprg5_1;
inputx@@;
datalines;
;
Run;
Procttesth0=140;
Varx
Run;
原始数据未知
某医生测量了36名从事铅作业男性工人的血红蛋白含量,算得其均数为130.83g/L,标准差为25.74g/L。
问从事铅作业工人的血红蛋白是否不同于正常成年男性平均值140g/L?
dataprg5_3;
n=36;
s_m=130.83;
std=25.74;
p_m=140;
df=n-1;
t=abs(s_m-p_m)/(std/sqrt(n));
p=(1-probt(t,df))*2;
procprint;
vartp;
run;
配对资料两样本均数比较的t检验
dataprg5_2;
n1=29;
n2=32;
m1=20.10