本內容為【科研私家菜】R語言機器學習與臨床預測模型系列課程
R小鹽準備介紹R語言機器學習與預測模型的學習筆記
你想要的R語言學習資料都在這里, 快來收藏關注【科研私家菜】
01 Logistic 回歸列線圖
library(foreign)
library(rms)
mydata<-read.spss("lweight.sav")
mydata<-as.data.frame(mydata)
head(mydata)
mydata$low <- ifelse(mydata$low =="低出生體重",1,0)
mydata$race1 <- ifelse(mydata$race =="白種人",1,0)
mydata$race2 <- ifelse(mydata$race =="黑種人",1,0)
mydata$race3 <- ifelse(mydata$race =="其他種族",1,0)
attach(mydata)
#f<-glm(low ~ age+ftv+ht+lwt+ptl+smoke+ui+race2+race3,data=mydata,family = binomial())
#summary(f)
dd<-datadist(mydata)
options(datadist='dd')
fit1<-lrm(low~age+ftv+ht+lwt+ptl+smoke+ui+race2+race3,data=mydata,x=T,y=T)
fit1
summary(fit1)
nom1 <- nomogram(fit1, fun=plogis,fun.at=c(.001, .01, .05, seq(.1,.9, by=.1), .95, .99, .999),lp=F, funlabel="Low weight rate")
plot(nom1)
fit3<-lrm(low~ht+lwt+ptl+smoke+race,data=mydata,x=T,y=T)
fit3
summary(fit3)
nom3 <- nomogram(fit3, fun=plogis,fun.at=c(.001, .01, .05, seq(.1,.9, by=.1), .95, .99, .999),lp=F, funlabel="Low weight rate")
plot(nom3)
cal3 <- calibrate(fit3, cmethod='hare',method='boot', B=1000)
plot(cal3,xlim=c(0,1.0),ylim=c(0,1.0))
效果如下:
image.png
02 Cox 回歸列線圖
##
library(foreign)
library(survival)
library(rms)
pancer <- read.spss('R語言進階-第9-13章/ch09/pancer.sav')
pancer <- as.data.frame(pancer)
head(pancer)
pancer$censor <- ifelse(pancer$censor=='死亡',1,0)
pancer$Gender <- as.factor(ifelse(pancer$sex=='男',"Male","Female"))
pancer$ch <- as.factor(ifelse(pancer$ch=='CH3', "ch","nonch"))
dd<-datadist(pancer)
options(datadist='dd')
coxm1 <- cph(Surv(time,censor==1)~age+Gender+trt+bui+ch+p+stage,x=T,y=T,data=pancer,surv=T)
coxm1
summary(coxm1)
surv <- Survival(coxm1)
surv1 <- function(x)surv(1*3,lp=x)
surv2 <- function(x)surv(1*6,lp=x)
surv3 <- function(x)surv(1*12,lp=x)
nom1<-nomogram(coxm1,fun=list(surv1,surv2,surv3),lp = F,
funlabel=c('3-Month Survival probability',
'6-Month survival probability',
'12-Month survival probability'),
maxscale=100,
fun.at=c('0.9','0.85','0.80','0.70','0.6','0.5','0.4','0.3','0.2','0.1'))
plot(nom1)
#plot(nomogram(coxm1,fun=list(surv1,surv2,surv3),lp= F,funlabel=c('3-Month Survival probability','6-Month survival probability','12-Month survival probability'),maxscale=100,fun.at=c('0.9','0.85','0.80','0.70','0.6','0.5','0.4','0.3','0.2','0.1')),xfrac=.30)
coxm2 <- cph(Surv(time,censor==1)~age+trt+bui+p+stage,x=T,y=T,data=pancer,surv=T)
coxm2
summary(coxm2)
surv <- Survival(coxm2)
surv1 <- function(x)surv(1*3,lp=x)
surv2 <- function(x)surv(1*6,lp=x)
surv3 <- function(x)surv(1*12,lp=x)
nom2<-nomogram(coxm2,fun=list(surv1,surv2,surv3),lp = F,
funlabel=c('3-Month Survival probability',
'6-Month survival probability',
'12-Month survival probability'),
maxscale=100,
fun.at=c('0.9','0.85','0.80','0.70','0.6','0.5','0.4','0.3','0.2','0.1'))
plot(nom2)
##
library(survival)
f<-coxph(Surv(time,censor==1)~age+Gender+trt+bui+ch+p+stage,data=pancer)
summary(f)
sum.surv<-summary(f)
c_index<-sum.surv$concordance
cal <- calibrate(coxm2, cmethod='KM', method='boot', u=3, m=20, B=1000)
plot(cal,lwd=2,lty=1,errbar.col=c(rgb(0,118,192,maxColorValue=255)),xlim=c(0,1),ylim=c(0,1),xlab="Nomogram-Predicted Probabilityof 3 months OS",ylab="Actual 3 months OS (proportion)",col=c(rgb(192,98,83,maxColorValue=255)))
#lines(cal[,c("mean.predicted","KM")],type="b",lwd=2,col=c(rgb(192,98,83,maxColorValue=255)),pch=16)
#abline(0,1,lty=3,lwd=2,col=c(rgb(0,118,192,maxColorValue=255)))
效果如下:
列線圖
image.png
重采樣模型校正
calibrate(fit, ...)
重采樣模型校正使用自舉法或自舉法來獲得預測值與觀測值的偏差校正(過度擬合校正)估計值,這些估計值基于將預測值細分為區間(用于生存模型)或非參數模型(用于其他模型)。有 Cox (cph)、參數生存模型(psm)、二元和順序邏輯模型(lrm)和一般最小平方法模型(ols)的校正函數。對于生存模型,”預測”是指在單一時間點預測生存概率,”觀察”是指相應的 Kaplan-Meier 生存估計,按預測生存時間間隔分層,或者,如果安裝了 polspline 軟件包,則預測生存概率是使用靈活的危險回歸方法(詳見 val.surv 函數)轉換后的預測生存概率的函數。對于邏輯和線性模型,一個非參數校正曲線估計超過一系列的預測值。配合必須指定 x = TRUE,y = TRUE。Lrm 和 ols 模型的打印和繪圖方法(使用 calibrate.default)打印預測中的平均絕對誤差、均方差和絕對誤差的0.9分位數。在這里,誤差指的是預測值和相應的偏差校正值之間的差異。
calibrate(fit, ...)
## Default S3 method:
calibrate(fit, predy,
method=c("boot","crossvalidation",".632","randomization"),
B=40, bw=FALSE, rule=c("aic","p"),
type=c("residual","individual"),
sls=.05, aics=0, force=NULL, estimates=TRUE, pr=FALSE, kint,
smoother="lowess", digits=NULL, ...)
## S3 method for class 'cph'
calibrate(fit, cmethod=c('hare', 'KM'),
method="boot", u, m=150, pred, cuts, B=40,
bw=FALSE, rule="aic", type="residual", sls=0.05, aics=0, force=NULL,
estimates=TRUE,
pr=FALSE, what="observed-predicted", tol=1e-12, maxdim=5, ...)
## S3 method for class 'psm'
calibrate(fit, cmethod=c('hare', 'KM'),
method="boot", u, m=150, pred, cuts, B=40,
bw=FALSE,rule="aic",
type="residual", sls=.05, aics=0, force=NULL, estimates=TRUE,
pr=FALSE, what="observed-predicted", tol=1e-12, maxiter=15,
rel.tolerance=1e-5, maxdim=5, ...)
## S3 method for class 'calibrate'
print(x, B=Inf, ...)
## S3 method for class 'calibrate.default'
print(x, B=Inf, ...)
## S3 method for class 'calibrate'
plot(x, xlab, ylab, subtitles=TRUE, conf.int=TRUE,
cex.subtitles=.75, riskdist=TRUE, add=FALSE,
scat1d.opts=list(nhistSpike=200), par.corrected=NULL, ...)
## S3 method for class 'calibrate.default'
plot(x, xlab, ylab, xlim, ylim,
legend=TRUE, subtitles=TRUE, cex.subtitles=.75, riskdist=TRUE,
scat1d.opts=list(nhistSpike=200), ...)
關注R小鹽,關注科研私家菜(VX_GZH: SciPrivate),有問題請聯系R小鹽。讓我們一起來學習 R語言機器學習與臨床預測模型