R软件期末考试复习提纲说课材料.docx
《R软件期末考试复习提纲说课材料.docx》由会员分享,可在线阅读,更多相关《R软件期末考试复习提纲说课材料.docx(14页珍藏版)》请在冰豆网上搜索。
![R软件期末考试复习提纲说课材料.docx](https://file1.bdocx.com/fileroot1/2022-10/10/2cf11122-ca17-4878-8f3e-f61fb0fecfe9/2cf11122-ca17-4878-8f3e-f61fb0fecfe91.gif)
R软件期末考试复习提纲说课材料
R软件期末考试复习提纲
#期末考试专项复习
#一、矩阵与数据框
#1.生成特定的矩阵与数据框
#矩阵
#方法一
a=array(1:
10,dim=c(2,5))
rownames(a)=1:
2
colnames(a)=c("one","two","three","four","five")
a
dimnames(a)=list(1:
2,c("one","two","three","four","five"))
nrow=nrow(a)
ncol=ncol(a)
dim(a)
#方法二
a=matrix(1:
10,nrow=2,byrow=F)
rownames(a)=1:
2
colnames(a)=c("one","two","three","four","five")
a=matrix(1:
10,nrow=2,byrow=F,
dimnames=list(1:
2,c("one","two","three","four","five"))
)
#数据框的生成
df=data.frame(
Name=c("Alice","Becka","James","Jeffrey","John"),
Sex=c("F","F","M","M","M"),
Age=c(13,13,12,13,12),
Height=c(56.5,65.3,57.3,62.5,59.0),
Weight=c(84.0,98.0,83.0,84.0,99.5)
);df
Lst=list(Name=c("Alice","Becka","James","Jeffrey","John"),
Sex=c("F","F","M","M","M"),
Age=c(13,13,12,13,12),
Height=c(56.5,65.3,57.3,62.5,59.0),
Weight=c(84.0,98.0,83.0,84.0,99.5))
Lst
Lst[["Name"]]
Lst["Name"]
Lst[1]
Lst[[1]]
Lst$Name
df=as.data.frame(Lst)
df
x=array(1:
6,dim=c(2,3))
as.data.frame(x)
#数据框的引用
df[1:
2,3:
5]
df[["Height"]]
df$Weight
names(df)#此属性一定非空
rownames(df)=c("one","two","three","four","five")
df
attach(df)
r=Height/Weight
r
df$r=r
names(df)
detach()
r=Height/Weight
#2.矩阵的运算
a=diag(1:
3)
a[2][1]=1
a
#1转置运算
t(a)
#2行列式
det(a)
#3向量内积
x=1:
5
y=2*1:
5
x%*%y
t(x)%*%y
crossprod(x,y)
#4向量的外积
x%*%t(y)
tcrossprod(x,y)
outer(x,y)
x%o%y
#矩阵的乘法
a=array(1:
9,dim=c(3,3))
b=array(9:
1,dim=c(3,3))
x=1:
3
a*b
a%*%b
x%*%a%*%x
crossprod(a,b)#t(a)%*%b
tcrossprod(a,b)#a%*%t(b)
#矩阵的逆
solve(a)
b=1:
3
solve(a,b)#ax=b的解
#矩阵的特征值与特征向量
sm=eigen(a)
sm
e=diag(1:
3)
svde=svd(e)
svde
attach(svde)
u%*%diag(d)%*%t(v)
#与矩阵运算有关的函数
#取维数
a=diag(1:
4)
nrow(a)
ncol(a)
#矩阵的合并
x1=rbind(c(1,2),c(3,4))
x2=x1+10
x3=cbind(x1,x2)
x3
x4=rbind(x1,x2)
x4
cbind(1,x1)
#矩阵的拉直
a=matrix(1:
6,ncol=2,
dimnames=list(c("one","two","three"),
c("first","second")),byrow=T)
as.vector(a)
#apply函数
apply(a,1,mean)
apply(a,2,sum)
tapply(1:
5,factor(c("f","f","m","m","m")),mean)
#第二题
#产生随机数
x=rnorm(100,0,1)
x
#画随机数的直方图
hist(x,freq=F)
#核密度曲线
density(x)
lines(density(x),col="blue")
#添加正态分布分布函数
y=seq(-4,3,0.2)
lines(y,dnorm(y,mean(x),sd(x)),col="red")
#画随机数的经验分布函数
z=rnorm(50,0,1)
plot(ecdf(z),do.p=F,verticals=T)
d=seq(-3,2,0.2)
lines(d,pnorm(d,mean(z),sd(z)),col="red")
y=rpois(100,2)
plot(ecdf(y),col="red",verticals=T,do.p=F)
x=0:
8
lines(x,ppois(x,mean(y)),col="blue")
w=c(75,64,47.4,66.9,62.2,62.2,58.7,63.5,66.6,64.0,57.0,69.0
56.9,50.0,72.0)
hist(w,freq=F)
lines(density(w),col="blue")
x=44:
76
lines(x,dnorm(x,mean(w),sd(w)),col="red")
plot(ecdf(w),do.p=F,verticals=T)
lines(x,pnorm(x,mean(w),sd(w)),col="red")
#编写函数求随机数的各种描述统计量
data_outline=function(x){
n=length(x)
m=mean(x)
v=var(x)
s=sd(x)
me=median(x)
cv=100*s/m
css=sum((x-m)^2)
uss=sum(x^2)
R=max(x)-min(x)#样本极差
R1=quantile(x,3/4)-quantile(x,1/4)#四分位差
sm=s/sqrt(n)#样本标准误
g1=n/(n-1)/(n-2)*sum((x-m)^3)/s^3
g2=n*(n+1)/(n-1)/(n-2)/(n-3)*sum((x-m)^4)/s^4
-3*(n-1)^2/(n-2)/(n-3)
data.frame(N=n,Mean=m,Var=v,std_dev=s,
Median=me,std_mean=sm,CV=cv,CSS=css,USS=uss,
R=R,R1=R1,Skewness=g1,Kurtosis=g2,row.names=1)
}
x=rnorm(100)
data_outline(x)
#第三题
#r,p,q,d
rnorm(100,0,1)
pnorm(1:
5,0,1)
dnorm(-3:
3,0,1)
qnorm(seq(0,1,0.25),0,1)
rbeta(100,2,2)
rbinom(100,100,0.5)
pbinom(1:
100,100,0.5)
dbinom(1:
5,100,0.5)
qbinom(seq(0,1,0.1),100,0.5)
rchisq(100,1)
qchisq(seq(0,1,0.2),10)
pchisq(1:
10,10)
dchisq(1:
10,10)
rexp(100,0.5)
rpois(100,2)
ppois(1:
1000,2)
dpois(1:
100,2)
runif(100,0,1)
qunif(c(0,0.2,0.8),0,1)
punif(seq(0,1,0.2),0,1)
dunif(seq(0,1,0.01),0,1)
rt(100,2)
qt(0.8,2)
pt(-3:
3,2)
dt(-3:
3,2)
rf(100,1,2)
qf(0.8,1,2)
#四置信区间
#1
#
(1)sigma已知
interval_estimate1=function(x,side=0,sigma=1,alpha=0.05){
xb=mean(x);n=length(x)
if(side<0){
tmp=sigma/sqrt(n)*qnorm(1-alpha)
a=-Inf;b=xb+tmp
}
elseif(side>0){
tmp=sigma/sqrt(n)*qnorm(1-alpha)
a=xb-tmp;b=Inf}
else{
tmp=sigma/sqrt(n)*qnorm(1-alpha/2)
a=xb-tmp;b=xb+tmp}
data.frame(mean=xb,a=a,b=b)
}
x=rnorm(100,0,4)
interval_estimate1(x,sigma=4,side=0)
interval_estimate1(x,sigma=4,side=-1)
interval_estimate1(x,sigma=4,side=1)
#
(2)sigma未知
interval_estimate2=function(x,side=0,alpha=0.05){
xb=mean(x);n=length(x)
if(side<0){
tmp=sd(x)/sqrt(n)*qt(1-alpha,n-1)
a=-Inf;b=xb+tmp
}
elseif(side>0){
tmp=sd(x)/sqrt(n)*qt(1-alpha,n-1)
a=xb-tmp;b=Inf
}
else{
tmp=sd(x)/sqrt(n)*qt(1-alpha/2,n-1)
a=xb-tmp;b=xb+tmp
}
data.frame(mean=xb,a=a,b=b)
}
x=rnorm(100,0,1)
interval_estimate2(x,side=-1)
interval_estimate2(x,side=0)
interval_estimate2(x,side=1)
t.test(x,side=-1)
t.test(x,side=0)
t.test(x,side=1)
#两个总体sigma1=sigma2但未知
interval_estimate3=function(x,y,alpha=0.05){
xb=mean(x);yb=mean(y)
n1=length(x);n2=length(y)
sw=((n1-1)*var(x)+(n2-1)*var(y))/(n1+n1-2)
tmp=sqrt((1/n1+1/n2)*sw)*qt(1-alpha/2,n1+n2-2)
a=xb-yb-tmp;b=xb-yb+tmp
data.frame(mean=xb-yb,a=a,b=b)
}
x=rnorm(100,0,1)
y=rnorm(100,1,1)
interval_estimate3(x,y)
t.test(x,y)
-0.0364