survival/0000755000176200001440000000000014730356362012136 5ustar liggesuserssurvival/tests/0000755000176200001440000000000014730324275013276 5ustar liggesuserssurvival/tests/r_tdist.R0000644000176200001440000000163314607006645015075 0ustar liggesusersoptions(na.action=na.exclude) # preserve missings options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type library(survival) # # Test out the t-distribution # # First, a t-dist with 500 df should be nearly identical to the Gaussian fitig <- survreg(Surv(time, status)~voltage, dist = "gaussian", data = capacitor) fit1 <- survreg(Surv(time, status) ~ voltage, dist='t', parms=500, capacitor) fitig summary(fit1, corr=F) # A more realistic fit fit2 <- survreg(Surv(time, status) ~ voltage, dist='t', parms=5, capacitor) print(fit2) if (FALSE) { resid(fit2, type='response') resid(fit2, type='deviance') resid(fit2, type='working') resid(fit2, type='dfbeta') resid(fit2, type='dfbetas') resid(fit2, type='ldresp') resid(fit2, type='ldshape') resid(fit2, type='ldcase') resid(fit2, type='matrix') predict(fit2, type='link') predict(fit2, type='terms') predict(fit2, type='quantile') } survival/tests/doweight.R0000644000176200001440000001755514613770353015252 0ustar liggesusersoptions(na.action=na.exclude) # preserve missings options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type library(survival) # Tests of the weighted Cox model # # Similar data set to test1, but add weights, # a double-death/censor tied time # a censored last subject # The latter two are cases covered only feebly elsewhere. # # The data set testw2 has the same data, but done via replication # aeq <- function(x,y) all.equal(as.vector(x), as.vector(y)) testw1 <- data.frame(time= c(1,1,2,2,2,2,3,4,5), status= c(1,0,1,1,1,0,0,1,0), x= c(2,0,1,1,0,1,0,1,0), wt = c(1,2,3,4,3,2,1,2,1)) xx <- c(1,2,3,4,3,2,1,2,1) testw2 <- data.frame(time= rep(c(1,1,2,2,2,2,3,4,5), xx), status= rep(c(1,0,1,1,1,0,0,1,0), xx), x= rep(c(2,0,1,1,0,1,0,1,0), xx), id= rep(1:9, xx)) indx <- match(1:9, testw2$id) testw2 <- data.frame(time= rep(c(1,1,2,2,2,2,3,4,5), xx), status= rep(c(1,0,1,1,1,0,0,1,0), xx), x= rep(c(2,0,1,1,0,1,0,1,0), xx), id= rep(1:9, xx)) indx <- match(1:9, testw2$id) fit0 <- coxph(Surv(time, status) ~x, testw1, weights=wt, method='breslow', iter=0) fit0b <- coxph(Surv(time, status) ~x, testw2, ties='breslow', iter=0) fit <- coxph(Surv(time, status) ~x, testw1, weights=wt, ties='breslow') fitb <- coxph(Surv(time, status) ~x, testw2, ties='breslow') texp <- function(beta) { # expected, Breslow estimate r <- exp(beta) temp <- cumsum(c(1/(r^2 + 11*r +7), 10/(11*r +5), 2/(2*r+1))) c(r^2, 1,r,r,1,r,1,r,1)* temp[c(1,1,2,2,2,2,2,3,3)] } aeq(texp(0), c(1/19, 1/19, rep(103/152, 5), rep(613/456,2))) #verify texp() xbar <- function(beta) { # xbar, Breslow estimate r <- exp(beta) temp <- r* rep(c(2*r + 11, 11/10, 1), c(2, 5, 2)) temp * texp(beta) } fit0 summary(fit) aeq(resid(fit0), testw1$status - texp(0)) resid(fit0, type='score') resid(fit0, type='scho') aeq(resid(fit0, type='mart'), (resid(fit0b, type='mart'))[indx]) aeq(resid(fit0, type='scor'), (resid(fit0b, type='scor'))[indx]) aeq(unique(resid(fit0, type='scho')), unique(resid(fit0b, type='scho'))) aeq(resid(fit, type='mart'), testw1$status - texp(fit$coef)) resid(fit, type='score') resid(fit, type='scho') aeq(resid(fit, type='mart'), (resid(fitb, type='mart'))[indx]) aeq(resid(fit, type='scor'), (resid(fitb, type='scor'))[indx]) aeq(unique(resid(fit, type='scho')), unique(resid(fitb, type='scho'))) rr1 <- resid(fit, type='mart') rr2 <- resid(fit, type='mart', weighted=T) aeq(rr2/rr1, testw1$wt) rr1 <- resid(fit, type='score') rr2 <- resid(fit, type='score', weighted=T) aeq(rr2/rr1, testw1$wt) fit <- coxph(Surv(time, status) ~x, testw1, weights=wt, ties='efron') fit resid(fit, type='mart') resid(fit, type='score') resid(fit, type='scho') # Tests of the weighted Cox model, AG form of the data # Same solution as doweight1.s # testw3 <- data.frame(id = c( 1, 1, 2, 3, 3, 3, 4, 5, 5, 6, 7, 8, 8, 9), begin= c( 0, 5, 0, 0,10,15, 0, 0,14, 0, 0, 0,23, 0), time= c( 5,10,10,10,15,20,20,14,20,20,30,23,40,50), status= c( 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0), x= c( 2, 2, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0), wt = c( 1, 1, 2, 3, 3, 3, 4, 3, 3, 2, 1, 2, 2, 1)) fit0 <- coxph(Surv(begin,time, status) ~x, testw3, weights=wt, ties='breslow', iter=0) fit <- coxph(Surv(begin,time, status) ~x, testw3, weights=wt, ties='breslow') fit0 summary(fit) resid(fit0, type='mart', collapse=testw3$id) resid(fit0, type='score', collapse=testw3$id) resid(fit0, type='scho') resid(fit, type='mart', collapse=testw3$id) resid(fit, type='score', collapse=testw3$id) resid(fit, type='scho') fit0 <- coxph(Surv(begin, time, status) ~x,testw3, weights=wt, iter=0) resid(fit0, 'mart', collapse=testw3$id) resid(coxph(Surv(begin, time, status) ~1, testw3, weights=wt) , collapse=testw3$id) #Null model fit <- coxph(Surv(begin,time, status) ~x, testw3, weights=wt, ties='efron') fit resid(fit, type='mart', collapse=testw3$id) resid(fit, type='score', collapse=testw3$id) resid(fit, type='scho') # # Check out the impact of weights on the dfbetas # Am I computing them correctly? # wtemp <- rep(1,26) wtemp[c(5,10,15)] <- 2:4 fit <- coxph(Surv(futime, fustat) ~ age + ecog.ps, ovarian, weights=wtemp) rr <- resid(fit, 'dfbeta') fit1 <- coxph(Surv(futime, fustat) ~ age + ecog.ps, ovarian, weights=wtemp, subset=(-5)) fit2 <- coxph(Surv(futime, fustat) ~ age + ecog.ps, ovarian, weights=wtemp, subset=(-10)) fit3 <- coxph(Surv(futime, fustat) ~ age + ecog.ps, ovarian, weights=wtemp, subset=(-15)) # # Effect of case weights on expected survival curves post Cox model # fit0 <- coxph(Surv(time, status) ~x, testw1, weights=wt, ties='breslow', iter=0) fit0b <- coxph(Surv(time, status) ~x, testw2, ties='breslow', iter=0) surv1 <- survfit(fit0, newdata=list(x=0)) surv2 <- survfit(fit0b, newdata=list(x=0)) aeq(surv1$surv, surv2$surv) # # Check out the Efron approx. # fit0 <- coxph(Surv(time, status) ~x,testw1, weights=wt, iter=0) fit <- coxph(Surv(time, status) ~x,testw1, weights=wt) resid(fit0, 'mart') resid(coxph(Surv(time, status) ~1, testw1, weights=wt)) #Null model # lfun is the known log-likelihood for this data set, worked out in the # appendix of Therneau and Grambsch # ufun is the score vector and ifun the information matrix lfun <- function(beta) { r <- exp(beta) a <- 7*r +3 b <- 4*r +2 11*beta - ( log(r^2 + 11*r +7) + (10/3)*(log(a+b) + log(2*a/3 +b) + log(a/3 +b)) + 2*log(2*r +1)) } aeq(fit0$log[1], lfun(0)) aeq(fit$log[2], lfun(fit$coef)) ufun <- function(beta, efron=T) { #score statistic r <- exp(beta) xbar1 <- (2*r^2+11*r)/(r^2+11*r +7) xbar2 <- 11*r/(11*r +5) xbar3 <- 2*r/(2*r +1) xbar2b<- 26*r/(26*r+12) xbar2c<- 19*r/(19*r + 9) temp <- 11 - (xbar1 + 2*xbar3) if (efron) temp - (10/3)*(xbar2 + xbar2b + xbar2c) else temp - 10*xbar2 } print(ufun(fit$coef) < 1e-4) # Should be true ifun <- function(beta, efron=T) { # information matrix r <- exp(beta) xbar1 <- (2*r^2+11*r)/(r^2+11*r +7) xbar2 <- 11*r/(11*r +5) xbar3 <- 2*r/(2*r +1) xbar2b<- 26*r/(26*r+12) xbar2c<- 19*r/(19*r + 9) temp <- ((4*r^2 + 11*r)/(r^2+11*r +7) - xbar1^2) + 2*(xbar3 - xbar3^2) if (efron) temp + (10/3)*((xbar2- xbar2^2) + (xbar2b - xbar2b^2) + (xbar2c -xbar2c^2)) else temp + 10 * (xbar2- xbar2^2) } aeq(fit0$var, 1/ifun(0)) aeq(fit$var, 1/ifun(fit$coef)) # Make sure that the weights pass through the residuals correctly rr1 <- resid(fit, type='mart') rr2 <- resid(fit, type='mart', weighted=T) aeq(rr2/rr1, testw1$wt) rr1 <- resid(fit, type='score') rr2 <- resid(fit, type='score', weighted=T) aeq(rr2/rr1, testw1$wt) # # Look at the individual components # dt0 <- coxph.detail(fit0) dt <- coxph.detail(fit) aeq(sum(dt$score), ufun(fit$coef)) #score statistic aeq(sum(dt0$score), ufun(0)) aeq(dt0$hazard, c(1/19, (10/3)*(1/16 + 1/(6+20/3) + 1/(6+10/3)), 2/3)) rm(fit, fit0, rr1, rr2, dt, dt0) # # Effect of weights on the robust variance # test1 <- data.frame(time= c(9, 3,1,1,6,6,8), status=c(1,NA,1,0,1,1,0), x= c(0, 2,1,1,1,0,0), wt= c(3,0,1,1,1,1,1), id= 1:7) testx <- data.frame(time= c(4,4,4,1,1,2,2,3), status=c(1,1,1,1,0,1,1,0), x= c(0,0,0,1,1,1,0,0), wt= c(1,1,1,1,1,1,1,1), id= 1:8) fit1 <- coxph(Surv(time, status) ~x, cluster=id, test1, ties='breslow', weights=wt) fit2 <- coxph(Surv(time, status) ~x, cluster=id, testx, ties='breslow') db1 <- resid(fit1, 'dfbeta', weighted=F) db1 <- db1[-2] #toss the missing db2 <- resid(fit2, 'dfbeta') aeq(db1, db2[3:8]) W <- c(3,1,1,1,1,1) #Weights, after removal of the missing value aeq(fit2$var, sum(db1*db1*W)) aeq(fit1$var, sum(db1*db1*W*W)) survival/tests/neardate.R0000644000176200001440000000317414607006645015212 0ustar liggesuserslibrary(survival) # the second data set is not sorted by id/date, on purpose df1 <- data.frame(id= 1:10, y1= as.Date(c("1992-01-01", "1996-01-01", "1997-03-20", "2000-01-01", "2001-01-01", "2004-01-01", "2014-03-27", "2014-01-30", "2000-08-01", "1997-04-29"))) df2 <- data.frame(id= c(1, 1, 2, 3, 4, 4, 5, 6, 7, 7, 8, 9, 9, 9, 10, 3, 3, 6, 6, 8), y2= as.Date(c("1998-04-30", "2004-07-01", "1999-04-14", "2001-02-22", "2003-11-19", "2005-02-15", "2006-06-22", "2007-09-20", "2013-08-02", "2015-01-09", "2014-01-15", "2006-12-06", "1999-10-20", "2010-06-30", "1997-04-28", "1995-04-20", "1997-03-20", "1998-04-30", "1995-04-20", "2006-12-06"))) if (FALSE) { # plot for visual check plot(y2 ~ id, df2, ylim=range(c(df1$y1, df2$y2)), type='n') text(df2$id, df2$y2, as.numeric(1:nrow(df2))) points(y1~id, df1, col=2, pch='+') } i1 <- neardate(df1$id, df2$id, df1$y1, df2$y2) all.equal(i1, c(1, 3, 17, 5, 7, 8, 10, NA, 12, NA)) i2 <- neardate(df1$id, df2$id, df1$y1, df2$y2, best="prior") all.equal(i2, c(NA, NA, 17, NA, NA, 18, 9, 11, 13, 15)) indx <- order(df2$id, df2$y2) df3 <- df2[indx,] i3 <- neardate(df1$id, df3$id, df1$y1, df3$y2) all.equal(indx[i3], i1) i4 <- neardate(df1$id, df3$id, df1$y1, df3$y2, best="prior") all.equal(indx[i4], i2) indx <- c(2,3,10,9, 4,5, 7,8,1,6) df4 <- df1[indx,] i5 <- neardate(df4$id, df2$id, df4$y1, df2$y2) all.equal(i1[indx], i5) survival/tests/difftest.Rout.save0000644000176200001440000000605414607006645016724 0ustar liggesusers R Under development (unstable) (2021-02-16 r80015) -- "Unsuffered Consequences" Copyright (C) 2021 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > options(na.action=na.exclude) # preserve missings > options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type > library(survival) > > # > # Test some more features of surv.diff > # > # First, what happens when one group is a dummy > # > > > # > # The AML data, with a third group of early censorings "tacked on" > # > aml3 <- list(time= c( 9, 13, 13, 18, 23, 28, 31, 34, 45, 48, 161, + 5, 5, 8, 8, 12, 16, 23, 27, 30, 33, 43, 45, + 1, 2, 2, 3, 3, 3, 4), + status= c( 1,1,0,1,1,0,1,1,0,1,0, 1,1,1,1,1,0,1,1,1,1,1,1, + 0,0,0,0,0,0,0), + x = as.factor(c(rep("Maintained", 11), + rep("Nonmaintained", 12), rep("Dummy",7) ))) > > aml3 <- data.frame(aml3) > > # These should give the same result (chisq, df), but the second has an > # extra group > survdiff(Surv(time, status) ~x, aml) Call: survdiff(formula = Surv(time, status) ~ x, data = aml) N Observed Expected (O-E)^2/E (O-E)^2/V x=Maintained 11 7 10.69 1.27 3.4 x=Nonmaintained 12 11 7.31 1.86 3.4 Chisq= 3.4 on 1 degrees of freedom, p= 0.07 > survdiff(Surv(time, status) ~x, aml3) Call: survdiff(formula = Surv(time, status) ~ x, data = aml3) N Observed Expected (O-E)^2/E (O-E)^2/V x=Dummy 7 0 0.00 NaN NaN x=Maintained 11 7 10.69 1.27 3.4 x=Nonmaintained 12 11 7.31 1.86 3.4 Chisq= 3.4 on 1 degrees of freedom, p= 0.07 > > > # > # Now a test of the stratified log-rank > # There are no tied times within institution, so the coxph program > # can be used to give a complete test > # > fit <- survdiff(Surv(time, status) ~ pat.karno + strata(inst), lung) > > cfit <- coxph(Surv(time, status) ~ factor(pat.karno) + strata(inst), + lung, iter=0) > > tdata <- na.omit(lung[,c('time', 'status', 'pat.karno', 'inst')]) > > temp1 <- tapply(tdata$status-1, list(tdata$pat.karno, tdata$inst), sum) > temp1 <- ifelse(is.na(temp1), 0, temp1) > temp2 <- tapply(cfit$resid, list(tdata$pat.karno, tdata$inst), sum) > temp2 <- ifelse(is.na(temp2), 0, temp2) > > temp2 <- temp1 - temp2 > > #Now temp1=observed, temp2=expected > all.equal(c(temp1), c(fit$obs)) [1] TRUE > all.equal(c(temp2), c(fit$exp)) [1] TRUE > > all.equal(fit$var[-1,-1], solve(cfit$var)) [1] TRUE > > rm(tdata, temp1, temp2) > > proc.time() user system elapsed 0.827 0.047 0.867 survival/tests/r_lung.Rout.save0000644000176200001440000001214214670377460016403 0ustar liggesusers R Under development (unstable) (2024-08-21 r87038) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > options(na.action=na.exclude) # preserve missings > options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type > library(survival) > > aeq <- function(x,y, ...) all.equal(as.vector(x), as.vector(y), ...) > > lfit2 <- survreg(Surv(time, status) ~ age + ph.ecog + strata(sex), lung) > lfit3 <- survreg(Surv(time, status) ~ sex + (age+ph.ecog)*strata(sex), lung) > > lfit4 <- survreg(Surv(time, status) ~ age + ph.ecog , lung, + subset=(sex==1)) > lfit5 <- survreg(Surv(time, status) ~ age + ph.ecog , lung, + subset=(sex==2)) > > if (exists('censorReg')) { + lfit1 <- censorReg(censor(time, status) ~ age + ph.ecog + strata(sex),lung) + aeq(lfit4$coef, lfit1[[1]]$coef) + aeq(lfit4$scale, lfit1[[1]]$scale) + aeq(c(lfit4$scale, lfit5$scale), sapply(lfit1, function(x) x$scale)) + } > aeq(c(lfit4$scale, lfit5$scale), lfit3$scale ) [1] TRUE > > # > # Test out ridge regression and splines > # > lfit0 <- survreg(Surv(time, status) ~1, lung) > lfit1 <- survreg(Surv(time, status) ~ age + ridge(ph.ecog, theta=5), lung) > lfit2 <- survreg(Surv(time, status) ~ sex + ridge(age, ph.ecog, theta=1), lung) > lfit3 <- survreg(Surv(time, status) ~ sex + age + ph.ecog, lung) > > lfit0 Call: survreg(formula = Surv(time, status) ~ 1, data = lung) Coefficients: (Intercept) 6.034904 Scale= 0.7593936 Loglik(model)= -1153.9 Loglik(intercept only)= -1153.9 n= 228 > lfit1 Call: survreg(formula = Surv(time, status) ~ age + ridge(ph.ecog, theta = 5), data = lung) coef se(coef) se2 Chisq DF p (Intercept) 6.83082 0.42860 0.42860 254.0 1 3.5e-57 age -0.00783 0.00687 0.00687 1.3 1 2.5e-01 ridge(ph.ecog) -0.32032 0.08484 0.08405 14.2 1 1.6e-04 Scale= 0.738 Iterations: 1 outer, 5 Newton-Raphson Degrees of freedom for terms= 1 1 1 1 Likelihood ratio test=18.6 on 2 df, p=9e-05 n=227 (1 observation deleted due to missingness) > lfit2 Call: survreg(formula = Surv(time, status) ~ sex + ridge(age, ph.ecog, theta = 1), data = lung) coef se(coef) se2 Chisq DF p (Intercept) 6.27163 0.45280 0.45210 191.84 1 1.3e-43 sex 0.40096 0.12371 0.12371 10.50 1 1.2e-03 ridge(age) -0.00746 0.00675 0.00674 1.22 1 2.7e-01 ridge(ph.ecog) -0.33848 0.08329 0.08314 16.51 1 4.8e-05 Scale= 0.731 Iterations: 1 outer, 6 Newton-Raphson Degrees of freedom for terms= 1 1 2 1 Likelihood ratio test=30 on 3 df, p=1e-06 n=227 (1 observation deleted due to missingness) > lfit3 Call: survreg(formula = Surv(time, status) ~ sex + age + ph.ecog, data = lung) Coefficients: (Intercept) sex age ph.ecog 6.273435252 0.401090541 -0.007475439 -0.339638098 Scale= 0.731109 Loglik(model)= -1132.4 Loglik(intercept only)= -1147.4 Chisq= 29.98 on 3 degrees of freedom, p= 1.39e-06 n=227 (1 observation deleted due to missingness) > > > xx <- pspline(lung$age, nterm=3, theta=.3) > xx <- matrix(unclass(xx), ncol=ncol(xx)) # the raw matrix > lfit4 <- survreg(Surv(time, status) ~xx, lung) > lfit5 <- survreg(Surv(time, status) ~age, lung) > > lfit6 <- survreg(Surv(time, status)~pspline(age, df=2), lung) > > lfit7 <- survreg(Surv(time, status) ~ offset(lfit6$lin), lung) > > lfit4 Call: survreg(formula = Surv(time, status) ~ xx, data = lung) Coefficients: (Intercept) xx1 xx2 xx3 xx4 xx5 13.551290 -7.615741 -7.424565 -7.533378 -7.571272 -14.527489 Scale= 0.755741 Loglik(model)= -1150.1 Loglik(intercept only)= -1153.9 Chisq= 7.52 on 5 degrees of freedom, p= 0.185 n= 228 > lfit5 Call: survreg(formula = Surv(time, status) ~ age, data = lung) Coefficients: (Intercept) age 6.88712062 -0.01360829 Scale= 0.7587515 Loglik(model)= -1151.9 Loglik(intercept only)= -1153.9 Chisq= 3.91 on 1 degrees of freedom, p= 0.0479 n= 228 > lfit6 Call: survreg(formula = Surv(time, status) ~ pspline(age, df = 2), data = lung) coef se(coef) se2 Chisq DF p (Intercept) 6.5918 0.63681 0.41853 107.15 1.00 4.1e-25 pspline(age, df = 2), lin -0.0136 0.00687 0.00687 3.94 1.00 4.7e-02 pspline(age, df = 2), non 0.78 1.06 4.0e-01 Scale= 0.756 Iterations: 4 outer, 12 Newton-Raphson Theta= 0.926 Degrees of freedom for terms= 0.4 2.1 1.0 Likelihood ratio test=5.2 on 1.5 df, p=0.04 n= 228 > signif(lfit7$coef,6) (Intercept) 1.44318e-09 > > proc.time() user system elapsed 0.945 0.076 1.012 survival/tests/royston.R0000644000176200001440000000111714607006645015137 0ustar liggesusers# Verify the values found in the Royston paper library(survival) pbc2 <- na.omit(pbc[,-1]) # no id variable, no missings pfit1 <- coxph(Surv(time, status==2) ~ . + log(bili) - bili, pbc2, ties="breslow") # backwards elimination was used to eliminate all but 8 pfit2 <- coxph(Surv(time, status==2) ~ age + log(bili) + edema + albumin + stage + copper, data=pbc2, ties="breslow") temp <- rbind(royston(pfit1), royston(pfit1, adjust=TRUE), royston(pfit2), royston(pfit2, adjust=TRUE)) all.equal(round(temp[,1], 2), c(2.86, 2.56, 2.69, 2.59)) survival/tests/ties.rda0000644000176200001440000001365414607006645014744 0ustar liggesusersmZyNeRHٲe s<3<{;EHB ePH! HZ(l-H%}uo>}grݮ[euhTC *PW~[OAWtH~5ߔYN#,: _=|wFY4Q/W/n]>o],^7aIӺ'>HQAثH:zoQ[}b'XDbYJճ )wYԚ3JI_{Ʈ;8Af˦#"wy?+JSr$[֊M?nR9R1|OU'|훚˾ _Vg] R-Q[WHe~^?B+֬ۘ/6g%Y/zowL 7zb~j2_L%QƐy{ש6ڍu~y˖+O}r`WgԆk;]A7^5,"iٴ#9v#σW%]yǷ sk_.%@ORg?@y/mFo_ٳO8DG-LfJYl׹]=U(ê;K`u+>[v mt#۰S$YЎDke"5 ۨ_n)j^#fӖd)ۯmlYm>lg$]yfMbG ̮L;mFF򂅁ϖnOںNxoƏFoDY o(5ƾ$wWqխXTwj8d ܛJ},>_f!=@Wf=3- ZNe uquw5޹lIa+ǾޒwZ>;#(=^ |{-Ѵ`HWD&-[׀uzm>g 'Y C0a߃T!v\nuLʫKue:Z)a oÁ Stj# _I |}$4#kWw46PG_}|pa ga R\Y=텾$ιi$j $?pS<.%I|Oy$Dv04ig]8(OBK?΢{~Oa~2e_祀#)[wvQDZƾB=Huh^oU΢oHgZon8Z=PD?2nRִ5Z${wRڂN\G(b+pJF~nZ8xQu.'Rn#IpBTk[.ߒ>VI?"E272ytz?4[T5-\D=PPr[Db+)؍$,;q|&}29IGuTֹ Jn2mv׏ }%3 W[~5٥9ɢk3Im }EJ}>K;x[{}߹Hٟ/ UN[*FQW%t^k(q|.K fvQ⻕dA+Ə&Cj739ʔ=0W(pt׬+~DCTOZ4Dژ$b}$擩ni}t+ OnicttDIN0P/ D6vn@aKKkɶ}lX,Av %\-ZZ"cmQoj=.7i2U>>,u۾INWݶ )#]ԜDG_$׿@jK__;H:pˑq5bg:98C}~|%;?Ǻ>7vIt6 RQQ}|FݴΛ )O[~|]Uɂ H;X?os:x-벥??p=*EI_*2I^Uos/B?'N_gRX/GG|\ G QKЇYսƖ+!{;;  ?PokmG&uE^{ܲCAYc[ 9뷀Oz俯&lw5z5yMN"DIA&σso~.2_?;YЋu[ R Xg?vjA3tpzOUgewŝ8h.W;b[E8x^7E}JS: `f*E-wág]_tFU쿗{\v1ތu /AWאJV8Vx Jk'v05U.Or.Q +`ϣՍ`דʱwKV@dz֧AUhWr뻵QUdv@wD+}ttOc}ҙT#C_}|E*8qo'J\W]&&)"rFw%WIc>s3i%79ajb=D$_tbRQU|^u:N:m29{>@sBֹ9jP k凵PcOEQ)6" W|)=$}dZe wSy8l҂Ֆ R nxyP__EŝE;[׾|GŒ\W S]gmHh`F@Hag-6l<גͺEo |۸鸩T‡(DXHA8ȏzD^D*6E DuExctcGsG-H+ B~*e6kb=:PH,HjXtrKnbDž (Iv0%oyFZXW X'ڥD_&)X{lD&Bm$]x?ICĻmƓ:fa40NzC$`Xrn&FՆ(w cDaU a^d}#>G=x8v7W_@b2|PYFh2dj溺kp0o{ ).M)C>igq" ů4m!oĵaS;y,hJA~s4ǭ>n݅gM б4lwr,lsxS Z~4ݰ"{?@!|PD=~ UI>6aQ/5ׁCf.y_v Y'>]Hf/Bp]z*7 jbgx,^/i!rئ>~j3 Eo)}= Hm CIpH)?Bl^Z& E'\6w %9u/\~5޲.5 3L pmb-ZF:y y+̸Y.cRaQ8tf\5#3d7ZS\Opfx>8,aDK SN!.mb~ w#Ռduep:WCpGecدhZpGC|x)U}9\4[4cr\a>/)|)=CP!溈٨+]w|btggXO2Ǻ bY,>/i/*ҏ |6?||d`zc 4׻o(M'HOJ#ɹF*Aa~51<5)ޯQcV6p$|//@oGuzZŨ8+{<бzk!ghSG?Mp-Вa\_@}Ṏpd~4t92oľ38:|9z|Y<̇88،C = 7(Ԣ+9x* XG+>T%Π00n]#×è4Oz4wFxXxgpq"M#+\Dy/$I" !$׿!PMp;S.g7'$;0lahCć֠!qXTP$v..M8'.\Rq+survival/tests/nested.Rout.save0000644000176200001440000000227514607006645016377 0ustar liggesusers R version 2.15.2 (2012-10-26) -- "Trick or Treat" Copyright (C) 2012 The R Foundation for Statistical Computing ISBN 3-900051-07-0 Platform: i686-pc-linux-gnu (32-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(survival) Loading required package: splines > # > # A test of nesting. It makes sure the model.frame is built correctly > # > tfun <- function(fit, mydata) { + survfit(fit, newdata=mydata) + } > > myfit <- coxph(Surv(time, status) ~ age + factor(sex), lung) > > temp1 <- tfun(myfit, lung[1:5,]) > temp2 <- survfit(myfit, lung[1:5,]) > indx <- match('call', names(temp1)) #the call components won't match > > all.equal(unclass(temp1)[-indx], unclass(temp2)[-indx]) [1] TRUE > > > proc.time() user system elapsed 0.196 0.032 0.225 survival/tests/yates1.Rout.save0000644000176200001440000001175514613770353016327 0ustar liggesusers R Under development (unstable) (2024-04-17 r86441) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: aarch64-unknown-linux-gnu R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(survival) > aeq <- function(x, y, ...) all.equal(as.vector(x), as.vector(y), ...) > > fit1 <- lm(skips ~ Opening + Solder + Mask + PadType + Panel, + data=solder) > y1 <- yates(fit1, "Opening") > > temp <- levels(solder$Opening) > tpred <- matrix(0., nrow(solder), 3) > for (i in 1:3) { + tdata <- solder + tdata$Opening <- temp[i] + tpred[,i] <- predict(fit1, newdata=tdata) + } > all.equal(y1$estimate[,"pmm"], colMeans(tpred)) [1] TRUE > > # This fit is deficient: there are no Opening=L and Mask=A6 obs > # The MPV for Mask=A6 and Opening L will therefore be NA, as well > # as for all levels of Solder, but we can compute the others. > # Solder will be NA for all levels > fit2 <- lm(skips ~ Opening*Mask + Solder, + data=solder) > y2a <- yates(fit2, "Mask", population="factorial") > y2b <- yates(fit2, "Opening", population="factorial") > y2c <- yates(fit2, "Solder", population="factorial") > > # The predict.lm function gives correct predictions for estimable > # functions (all but L,A6) and nonsense for others. It knows that > # some are not estimable due to the NA coefficients, but not which ones, > # so always prints a warning. Hence the suppressWarnings call. > tdata <- do.call(expand.grid, fit2$xlevels[1:3]) > temp <- levels(solder$Mask) > tpreda <- matrix(0., nrow(tdata), length(temp), + dimnames=list(NULL, temp)) > for (i in seq_along(temp)) { + tdata$Mask <- temp[i] + suppressWarnings(tpreda[,i] <- predict(fit2, newdata=tdata)) + } > tpreda[,"A6"] <- NA # the A6 estimate is deficient > aeq(y2a$estimate[,"pmm"], colMeans(tpreda)) [1] TRUE > > tdata <- do.call(expand.grid, fit2$xlevels[1:3]) > temp <- levels(solder$Opening) > tpredb <- matrix(0., nrow(tdata), length(temp), + dimnames=list(NULL, temp)) > for (i in seq_along(temp)) { + tdata$Opening <- temp[i] + suppressWarnings(tpredb[,i] <- predict(fit2, newdata=tdata)) + } > tpredb[,"L"] <- NA > aeq(y2b$estimate[,"pmm"], colMeans(tpredb)) [1] TRUE > > # Solder should be all NA > all(is.na(y2c$estimate[,"pmm"])) [1] TRUE > > # Tests for Solder are defined for a non-factorial population, however. > # the [] below retains the factor structure of the variable, where the > # runs above did not. R gets prediction correct both ways. > y2d <- yates(fit2, ~Solder) > temp <- levels(solder$Solder) > tdata <- solder > tpredd <- matrix(0, nrow(tdata), length(temp), + dimnames=list(NULL, temp)) > for (i in seq_along(temp)) { + tdata$Solder[] <- temp[i] + suppressWarnings(tpredd[,i] <- predict(fit2, newdata=tdata)) + } > aeq(y2d$estimate$pmm, colMeans(tpredd)) [1] TRUE > > # > # Verify that the result is unchanged by how dummies are coded > # The coefs move all over the map, but predictions are unchanged > fit3 <- lm(skips ~ C(Opening, contr.helmert)*Mask + C(Solder, contr.SAS), + data=solder) > y3a <- yates(fit3, ~Mask, population='yates') > equal <- c("estimate", "test", "mvar") > all.equal(y3a[equal], y2a[equal]) [1] TRUE > > tdata <- do.call(expand.grid, fit2$xlevels[1:3]) # use orignal variable names > temp <- levels(solder$Mask) > cpred <- matrix(0., nrow(tdata), length(temp), + dimnames=list(NULL, temp)) > for (i in seq_along(temp)) { + tdata$Mask <- temp[i] + suppressWarnings(cpred[,i] <- predict(fit3, newdata=tdata)) + } > aeq(cpred[, temp!="A6"], tpreda[, temp!= "A6"]) # same predictions [1] TRUE > all.equal(y3a$estimate, y2a$estimate) [1] TRUE > > y3b <- yates(fit3, ~Opening, population='yates') > # column names will differ > all.equal(y3b$estimate, y2b$estimate, check.attributes=FALSE) [1] TRUE > > y3d <- yates(fit3, ~Solder) > for (i in 1:3) { + print(all.equal(y3d[[i]], y2d[[i]], check.attributes=FALSE)) + } [1] TRUE [1] TRUE [1] TRUE > > # Reprise this with a character variable in the model > sdata <- solder > sdata$Mask <- as.character(sdata$Mask) > fit4 <- lm(skips ~ Opening*Mask + Solder, data=sdata) > y4a <- yates(fit4, ~ Mask, population= "yates") > y4b <- yates(fit4, ~ Opening, population= "yates") > y4d <- yates(fit4, ~ Solder) > equal <- c("estimate", "tests", "mvar", "cmat") > all.equal(y2a[equal], y4a[equal]) # the "call" component differs [1] TRUE > all.equal(y2b[equal], y4b[equal]) [1] TRUE > all.equal(y2d[equal], y4d[equal]) [1] TRUE > > proc.time() user system elapsed 0.490 0.008 0.495 survival/tests/coxsurv3.R0000644000176200001440000001036614723661736015233 0ustar liggesuserslibrary(survival) aeq <- function(x,y) all.equal(as.vector(x), as.vector(y)) # One more test on coxph survival curves, to test out the individual # option. First fit a model with a time dependent covariate. # This is test data 2 of section 4 of the validation vignette (appendix E2 of # Therneau and Grambsch), i.e. all the results are known in closed form. # test2 <- data.frame(start=c(1, 2, 5, 2, 1, 7, 3, 4, 8, 8), stop =c(2, 3, 6, 7, 8, 9, 9, 9,14,17), event=c(1, 1, 1, 1, 1, 1, 1, 0, 0, 0), x =c(1, 0, 0, 1, 0, 1, 1, 1, 0, 0) ) # True hazard function, and components of the variance lambda <- function(beta, x=0, method="efron") { time <- c(2,3,6,7,8,9) r <- exp(beta) lambda <- c(1/(r+1), 1/(r+2), 1/(3*r +2), 1/(3*r+1), 1/(3*r+1), 1/(3*r+2) + 1/(2*r +2)) xbar <- c(r/(r+1), r/(r+2), 3*r/(3*r +2), 3*r/(3*r+1), 3*r/(3*r+1), (1.5*r)/(3*r +2) + r/(2*r+2)) if (method == "breslow") { lambda[6] <- 2/(3*r +2) xbar[6] <- 3*r/(3*r+2) } list(time=time, lambda=lambda, xbar=xbar) } fit <- coxph(Surv(start, stop, event) ~x, test2) # A curve for someone who never changes surv1 <-survfit(fit, newdata=list(x=0), censor=FALSE) true <- lambda(fit$coefficients, 0) aeq(true$time, surv1$time) aeq(-log(surv1$surv), cumsum(true$lambda)) # Reprise it with a time dependent subject who doesn't change data2 <- data.frame(start=c(0, 4, 9, 11), stop=c(4, 9, 11, 17), event=c(0,0,0,0), x=c(0,0,0,0), patn=c(1,1,1,1)) surv2 <- survfit(fit, newdata=data2, id=patn, censor=FALSE) aeq(surv2$surv, surv1$surv) aeq(surv2$std.err, surv1$std.err) # # Now a more complex data set with multiple strata # test3 <- data.frame(start=c(1, 2, 5, 2, 1, 7, 3, 4, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), stop =c(2, 3, 6, 7, 8, 9, 9, 9,14,17, 1:11), event=c(1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0,1), x =c(1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 2, 3, 2, 1, 1, 1, 0, 2, 1,0), grp = c(rep('a', 10), rep('b', 11))) fit2 <- coxph(Surv(start, stop, event) ~ x + strata(grp), test3) # The fit1 tests show the program works for a simple case, use it to # get a true baseline for strata 2 fit2b <- coxph(Surv(start, stop, event) ~x, test3, subset=(grp=='b'), init=fit2$coefficients, iter=0) temp <- survfit(fit2b, newdata=list(x=0), censor=F) true2 <- list(time=temp$time, lambda=diff(c(0, temp$cumhaz))) true1 <- lambda(fit2$coefficients, x=0) # Separate strata, one value surv3 <- survfit(fit2, list(x=0), censor=FALSE) aeq(true1$time, (surv3[1])$time) aeq(-log(surv3[1]$surv), cumsum(true1$lambda)) data4 <- data.frame(start=c(0, 4, 9, 11), stop=c(4, 9, 11, 17), event=c(0,0,0,0), x=c(0,0,0,0), grp=rep('a', 4), patid= rep("Jones", 4)) surv4a <- survfit(fit2, newdata=data4, id=patid, censor=FALSE) aeq(-log(surv4a$surv), cumsum(true1$lambda)) data4$grp <- rep('b',4) surv4b <- survfit(fit2, newdata=data4, id=patid, censor=FALSE) aeq(-log(surv4b$surv), cumsum(true2$lambda)) # Now for something more complex # Subject 1 skips day 4. Since there were no events that day the survival # will be the same, but the times will be different. # Subject 2 spends some time in strata 1, some in strata 2, with # moving covariates # data5 <- data.frame(start=c(0,5,9,11, 0, 4, 3), stop =c(4,9,11,17, 4,8,7), event=rep(0,7), x=c(1,1,1,1, 0,1,2), grp=c('a', 'a', 'a', 'a', 'a', 'a', 'b'), subject=c(1,1,1,1, 2,2,2)) surv5 <- survfit(fit2, newdata=data5, censor=FALSE, id=subject) aeq(surv5[1]$time, c(2,3,5,6,7,8)) #surv1 has 2, 3, 6, 7, 8, 9 aeq(surv5[1]$surv, surv3[1]$surv ^ exp(fit2$coefficients)) tlam <- c(true1$lambda[1:2]* exp(fit2$coefficients * data5$x[5]), true1$lambda[3:5]* exp(fit2$coefficients * data5$x[6]), true2$lambda[3:4]* exp(fit2$coefficients * data5$x[7])) aeq(-log(surv5[2]$surv), cumsum(tlam)) survival/tests/doublecolon.R0000644000176200001440000000513214727576546015747 0ustar liggesusers# # Check that my updates to a. remove survival:: out of formulas and # b: ensure that Surv(), cluster(), strata(), pspline(), and tt() use # these functions from the survival namespace, when called in a coxph, # survfit, survreg, etc formula library(survival) aeq <- function(x,y, ...) all.equal(as.vector(x), as.vector(y)) c1 <- coxph(Surv(time, status) ~ age + strata(inst), lung) # a local Surv that gives the wrong answer but won't error out (makes it # simpler to write the tests) Surv <- function(x, ...) survival::Surv(x, rep(1, length(x))) c2 <- coxph(Surv(time, status) ~ age + strata(inst), lung) c3 <- coxph(survival::Surv(time, status) ~ age + survival::strata(inst), lung) # in prior releases the above fits a different model, stata is not recognized # as a special and becomes a factor all.equal(coef(c1), coef(c2)) all.equal(coef(c1), coef(c3)) !(c2$call$formula == c3$call$formula) # c3$call will have 2 survival::, c2 none deparse1(c3$formula) == "survival::Surv(time, status) ~ age + strata(inst)" nocall <- function(x, omit="call") { z <- unclass(x) # needed for any object with a [ method z[-match(omit, names(z))] # all but $call } y2 <- with(lung, survival::Surv(time, status)) # outside a formula fit1a <- coxph(Surv(time, status) ~ age + strata(sex) + cluster(inst), lung) fit1b <- coxph(Surv(time, status) ~ age + survival::strata(sex) + survival::cluster(inst), lung) fit1c <- coxph(y2 ~ age + strata(sex) + survival::cluster(inst), lung) all.equal(nocall(fit1a), nocall(fit1b)) aeq(coef(fit1a), coef(fit1c)) fit2a <- survdiff(Surv(time, status) ~ sex + strata(inst), lung) fit2b <- survdiff(Surv(time, status) ~ sex + survival::strata(inst), data= lung) all.equal(nocall(fit2a), nocall(fit2b)) aeq(rowSums(fit2a$obs), c(111, 53)) # make sure it use the correct Surv fit3a <- survreg(Surv(time, status) ~ ph.ecog + strata(sex), lung) fit3b <- survreg(Surv(time, status) ~ ph.ecog + survival::strata(sex), data= survival::lung) all.equal(nocall(fit3a), nocall(fit3b)) fit4a <- concordance(Surv(time, status) ~ ph.ecog + strata(sex), lung) fit4b <- concordance(Surv(time, status) ~ ph.ecog + survival::strata(sex), lung) all.equal(nocall(fit4a), nocall(fit4b)) fit5a <- survfit(Surv(time, status) ~ sex, lung) fit5b <- survfit(Surv(time, status) ~ strata(sex), lung) fit5c <- survfit(Surv(time, status) ~ survival::strata(sex), lung) fit5d <- survfit(y2 ~ survival::strata(sex), lung) all.equal(nocall(fit5a, c("call", "strata")), nocall(fit5b, c("call", "strata"))) all.equal(nocall(fit5b), nocall(fit5c)) aeq(fit5a$surv, fit5d$surv) survival/tests/survreg1.Rout.save0000644000176200001440000003342014670377551016676 0ustar liggesusers R Under development (unstable) (2024-08-21 r87038) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > options(na.action=na.exclude) # preserve missings > options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type > library(survival) > > aeq <- function(x,y, ...) all.equal(as.vector(x), as.vector(y), ...) > > # fit1 and fit4 should follow identical iteration paths > fit1 <- survreg(Surv(futime, fustat) ~ age + ecog.ps, ovarian, x=TRUE) > fit4 <- survreg(Surv(log(futime), fustat) ~age + ecog.ps, ovarian, + dist='extreme') > aeq(fit1$coef, fit4$coef) [1] TRUE > aeq(fit1$var, fit4$var) [1] TRUE > > resid(fit1, type='working') 1 2 3 4 5 6 -4.5081778 -0.5909810 -2.4878519 0.6032744 -5.8993431 0.6032744 7 8 9 10 11 12 -1.7462937 -0.8102883 0.6032744 -1.6593962 -0.8235265 0.6032744 13 14 15 16 17 18 0.6032744 0.6032744 0.6032744 0.6032744 0.6032744 0.6032744 19 20 21 22 23 24 0.6032744 0.6032744 0.6032744 0.2572623 -31.8006867 -0.7426277 25 26 -0.2857597 0.6032744 > resid(fit1, type='response') 1 2 3 4 5 6 -155.14523 -58.62744 -262.03173 -927.79842 -1377.84908 -658.86626 7 8 9 10 11 12 -589.74449 -318.93436 4.50671 -686.83338 -434.39281 -1105.68733 13 14 15 16 17 18 -42.43371 -173.09223 -4491.29974 -3170.49394 -5028.31053 -2050.91373 19 20 21 22 23 24 -150.65033 -2074.09345 412.32400 76.35826 -3309.40331 -219.81579 25 26 -96.19691 -457.76731 > resid(fit1, type='deviance') 1 2 3 4 5 6 7 -1.5842290 -0.6132746 -1.2876971 0.5387840 -1.7148539 0.6682580 -1.1102921 8 9 10 11 12 13 14 -0.7460191 1.4253843 -1.0849419 -0.7531720 0.6648130 1.3526380 1.1954382 15 16 17 18 19 20 21 0.2962391 0.3916044 0.3278067 0.5929057 1.2747643 0.6171130 1.9857606 22 23 24 25 26 0.6125492 -2.4504208 -0.7080652 -0.3642424 0.7317955 > resid(fit1, type='dfbeta') [,1] [,2] [,3] [,4] 1 0.43370970 -1.087867e-02 0.126322520 0.048379059 2 0.14426449 -5.144770e-03 0.088768478 -0.033939677 3 0.25768057 -3.066698e-03 -0.066578834 0.021817646 4 0.05772598 -5.068044e-04 -0.013121427 -0.007762466 5 -0.58773456 6.676156e-03 0.084189274 0.008064026 6 0.01499533 -7.881949e-04 0.026570173 -0.013513160 7 -0.17869321 4.126121e-03 -0.072760519 -0.015006956 8 -0.11851540 2.520303e-03 -0.045549628 -0.035686269 9 0.08327656 3.206404e-03 -0.141835350 0.024490806 10 -0.25083921 5.321702e-03 -0.073986269 -0.020648720 11 -0.21333934 4.155746e-03 -0.049832434 -0.040215681 12 0.13889770 -1.586136e-03 -0.019701151 -0.004686340 13 0.07892133 -2.706713e-03 0.085242459 0.007847879 14 0.29690157 -1.987141e-03 -0.085553120 0.017447343 15 0.04344618 -6.319243e-04 -0.001944285 -0.003533279 16 0.04866809 -1.068317e-03 0.012398602 -0.006340983 17 0.04368104 -9.248316e-04 0.009428718 -0.004869178 18 0.15684611 -2.081485e-03 -0.013068320 -0.003265399 19 0.48839511 -4.775829e-03 -0.093258090 0.032703354 20 0.17598922 -2.349254e-03 -0.014202966 -0.002486428 21 0.37869758 -8.442011e-03 0.163476417 0.100850775 22 -0.59761427 8.803638e-03 0.052784598 -0.053085234 23 -0.79017984 1.092304e-02 0.053690092 0.080780399 24 -0.02348526 8.331002e-04 -0.039028433 -0.032765737 25 -0.13948485 3.687927e-04 0.056781884 -0.055647859 26 0.05778937 3.766350e-06 -0.029232389 -0.008927920 > resid(fit1, type='dfbetas') [,1] [,2] [,3] [,4] 1 0.288846658 -0.4627232074 0.345395116 0.20574292 2 0.096078819 -0.2188323823 0.242713641 -0.14433617 3 0.171612884 -0.1304417700 -0.182041999 0.09278449 4 0.038444974 -0.0215568869 -0.035877029 -0.03301165 5 -0.391425795 0.2839697749 0.230193032 0.03429410 6 0.009986751 -0.0335258093 0.072649027 -0.05746778 7 -0.119008027 0.1755042532 -0.198944162 -0.06382048 8 -0.078930164 0.1072008799 -0.124543264 -0.15176395 9 0.055461420 0.1363841532 -0.387810796 0.10415271 10 -0.167056601 0.2263581990 -0.202295647 -0.08781336 11 -0.142082031 0.1767643342 -0.136253451 -0.17102630 12 0.092504589 -0.0674661531 -0.053867524 -0.01992972 13 0.052560878 -0.1151298322 0.233072686 0.03337488 14 0.197733705 -0.0845228882 -0.233922105 0.07419878 15 0.028934753 -0.0268788526 -0.005316126 -0.01502607 16 0.032412497 -0.0454407662 0.033900659 -0.02696647 17 0.029091172 -0.0393376416 0.025780305 -0.02070728 18 0.104458066 -0.0885357994 -0.035731824 -0.01388685 19 0.325266641 -0.2031395176 -0.254989284 0.13907843 20 0.117207199 -0.0999253459 -0.038834208 -0.01057410 21 0.252209096 -0.3590802699 0.446982501 0.42889079 22 -0.398005596 0.3744620571 0.144325354 -0.22575700 23 -0.526252483 0.4646108448 0.146801184 0.34353696 24 -0.015640965 0.0354358527 -0.106712804 -0.13934372 25 -0.092895624 0.0156865706 0.155254862 -0.23665514 26 0.038487186 0.0001602014 -0.079928144 -0.03796800 > resid(fit1, type='ldcase') 1 2 3 4 5 6 0.374432175 0.145690278 0.112678800 0.006399163 0.261176992 0.013280058 7 8 9 10 11 12 0.109842490 0.074103234 0.248285282 0.128482147 0.094038203 0.016111951 13 14 15 16 17 18 0.132812463 0.111857574 0.001698300 0.004730718 0.003131173 0.015840667 19 20 21 22 23 24 0.179925399 0.019071941 0.797119488 0.233096445 0.666613755 0.062959708 25 26 0.080117437 0.015922378 > resid(fit1, type='ldresp') 1 2 3 4 5 6 0.076910173 0.173810883 0.078356928 0.005310644 0.060742612 0.010002154 7 8 9 10 11 12 0.067356838 0.067065693 0.355103899 0.067043195 0.068142828 0.016740944 13 14 15 16 17 18 0.193444572 0.165021262 0.001494685 0.004083386 0.002767560 0.016400993 19 20 21 22 23 24 0.269571809 0.020129806 1.409736499 1.040266083 0.058637282 0.071819025 25 26 0.112702844 0.015105534 > resid(fit1, type='ldshape') 1 2 3 4 5 6 0.870628250 0.383362440 0.412503605 0.005534970 0.513991064 0.003310847 7 8 9 10 11 12 0.291860593 0.154910362 0.256160646 0.312329770 0.183191309 0.004184904 13 14 15 16 17 18 0.110215710 0.049299495 0.007678445 0.011633336 0.011588605 0.008641251 19 20 21 22 23 24 0.112967758 0.008271358 2.246729275 0.966929220 1.022043272 0.143857170 25 26 0.079754096 0.001606647 > resid(fit1, type='matrix') g dg ddg ds dds dsg 1 -1.74950763 -1.46198129 -0.32429540 0.88466493 -2.42358635 1.8800360 2 -0.68266980 -0.82027857 -1.38799493 -0.66206188 -0.57351872 1.3921043 3 -1.32369884 -1.33411374 -0.53625126 0.31503768 -1.83606321 1.8626973 4 -0.14514412 0.24059386 -0.39881329 -0.28013223 -0.26053084 0.2237590 5 -1.96497889 -1.50383619 -0.25491587 1.15700933 -2.68145423 1.8694717 6 -0.22328436 0.37012071 -0.61351964 -0.33477229 -0.16715487 0.1848047 7 -1.11099124 -1.23201028 -0.70550005 0.01052036 -1.48515401 1.8106760 8 -0.77288913 -0.95018808 -1.17265428 -0.51190170 -0.79753045 1.5525642 9 -1.01586016 1.68391053 -2.79128447 0.01598527 -0.01623681 -1.7104080 10 -1.08316634 -1.21566480 -0.73259465 -0.03052447 -1.43539383 1.7998987 11 -0.77825093 -0.95675178 -1.16177415 -0.50314979 -0.81016011 1.5600720 12 -0.22098818 0.36631452 -0.60721042 -0.33361394 -0.17002503 0.1866908 13 -0.91481479 1.51641567 -2.51364157 -0.08144930 0.07419757 -1.3814037 14 -0.71453621 1.18442981 -1.96333502 -0.24017106 0.15944438 -0.7863174 15 -0.04387880 0.07273440 -0.12056602 -0.13717935 -0.29168773 0.1546569 16 -0.07667699 0.12710134 -0.21068577 -0.19691828 -0.30879813 0.1993144 17 -0.05372862 0.08906165 -0.14763041 -0.15709224 -0.30221555 0.1713377 18 -0.17576861 0.29135764 -0.48296037 -0.30558900 -0.22570402 0.2151929 19 -0.81251205 1.34683655 -2.23254376 -0.16869744 0.13367171 -1.0672002 20 -0.19041424 0.31563454 -0.52320225 -0.31581218 -0.20797917 0.2078622 21 -1.97162252 3.26820173 -5.41743790 1.33844939 -2.24706488 -5.4868428 22 -0.68222519 1.23245193 -4.79064290 -0.58668577 -0.95209805 -2.8390386 23 -3.49689798 -1.62675999 -0.05115487 2.90949868 -4.20494743 1.7496975 24 -0.74529506 -0.91462436 -1.23160543 -0.55723389 -0.73139169 1.5108398 25 -0.56095318 -0.53280415 -1.86451840 -0.87536233 -0.22666819 0.9689667 26 -0.26776235 0.44384834 -0.73573207 -0.35281852 -0.11207472 0.1409908 > > aeq(resid(fit1, type='working'),resid(fit4, type='working')) [1] TRUE > #aeq(resid(fit1, type='response'), resid(fit4, type='response'))#should differ > aeq(resid(fit1, type='deviance'), resid(fit4, type='deviance')) [1] TRUE > aeq(resid(fit1, type='dfbeta'), resid(fit4, type='dfbeta')) [1] TRUE > aeq(resid(fit1, type='dfbetas'), resid(fit4, type='dfbetas')) [1] TRUE > aeq(resid(fit1, type='ldcase'), resid(fit4, type='ldcase')) [1] TRUE > aeq(resid(fit1, type='ldresp'), resid(fit4, type='ldresp')) [1] TRUE > aeq(resid(fit1, type='ldshape'), resid(fit4, type='ldshape')) [1] TRUE > aeq(resid(fit1, type='matrix'), resid(fit4, type='matrix')) [1] TRUE > > # Test suggested by Achim Zieleis: residuals should give a score vector > r1 <-residuals(fit1, type='matrix') > score <- c(as.vector(r1[,c("dg")]) %*% model.matrix(fit1), + "log(scale)" = sum(r1[,"ds"])) > all(abs(score) < 1e-6) [1] TRUE > > # repeat this with Gaussian (no transform = different code path) > tfit <- survreg(Surv(durable, durable>0, type='left') ~age + quant, + data=tobin, dist='gaussian') > r2 <- residuals(tfit, type='matrix') > score <- c(as.vector(r2[, "dg"]) %*% model.matrix(tfit), + "log(scale)" = sum(r2[,"ds"])) > all(score < 1e-6) [1] TRUE > > # > # Some tests of the quantile residuals > # > # These should agree exactly with Ripley and Venables' book > fit1 <- survreg(Surv(time, status) ~ temp, data= imotor) > summary(fit1) Call: survreg(formula = Surv(time, status) ~ temp, data = imotor) Value Std. Error z p (Intercept) 16.31852 0.62296 26.2 < 2e-16 temp -0.04531 0.00319 -14.2 < 2e-16 Log(scale) -1.09564 0.21480 -5.1 3.4e-07 Scale= 0.334 Weibull distribution Loglik(model)= -147.4 Loglik(intercept only)= -169.5 Chisq= 44.32 on 1 degrees of freedom, p= 2.8e-11 Number of Newton-Raphson Iterations: 8 n= 40 > > # > # The first prediction has the SE that I think is correct > # The third is the se found in an early draft of Ripley; fit1 ignoring > # the variation in scale estimate, except via it's impact on the > # upper left corner of the inverse information matrix. > # Numbers 1 and 3 differ little for this dataset > # > predict(fit1, data.frame(temp=130), type='uquantile', p=c(.5, .1), se=T) $fit [1] 10.306068 9.676248 $se.fit [1] 0.2135247 0.2202088 > > fit2 <- survreg(Surv(time, status) ~ temp, data=imotor, scale=fit1$scale) > predict(fit2, data.frame(temp=130), type='uquantile', p=c(.5, .1), se=T) $fit [1] 10.306068 9.676248 $se.fit 1 1 0.2057964 0.2057964 > > fit3 <- fit2 > fit3$var <- fit1$var[1:2,1:2] > predict(fit3, data.frame(temp=130), type='uquantile', p=c(.5, .1), se=T) $fit [1] 10.306068 9.676248 $se.fit 1 1 0.2219959 0.2219959 > > pp <- seq(.05, .7, length=40) > xx <- predict(fit1, data.frame(temp=130), type='uquantile', se=T, + p=pp) > #matplot(pp, cbind(xx$fit, xx$fit+2*xx$se, xx$fit - 2*xx$se), type='l') > > > # > # Now try out the various combinations of strata, #predicted, and > # number of quantiles desired > # > fit1 <- survreg(Surv(time, status) ~ inst + strata(inst) + age + sex, lung) > qq1 <- predict(fit1, type='quantile', p=.3, se=T) > qq2 <- predict(fit1, type='quantile', p=c(.2, .3, .4), se=T) > aeq <- function(x,y) all.equal(as.vector(x), as.vector(y)) > aeq(qq1$fit, qq2$fit[,2]) [1] TRUE > aeq(qq1$se.fit, qq2$se.fit[,2]) [1] TRUE > > qq3 <- predict(fit1, type='quantile', p=c(.2, .3, .4), se=T, + newdata= lung[1:5,]) > aeq(qq3$fit, qq2$fit[1:5,]) [1] TRUE > > qq4 <- predict(fit1, type='quantile', p=c(.2, .3, .4), se=T, newdata=lung[7,]) > aeq(qq4$fit, qq2$fit[7,]) [1] TRUE > > qq5 <- predict(fit1, type='quantile', p=c(.2, .3, .4), se=T, newdata=lung) > aeq(qq2$fit, qq5$fit) [1] TRUE > aeq(qq2$se.fit, qq5$se.fit) [1] TRUE > > proc.time() user system elapsed 0.944 0.073 1.011 survival/tests/testnull.R0000644000176200001440000000113314613770353015273 0ustar liggesusersoptions(na.action=na.exclude) # preserve missings options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type library(survival) # # A test of NULL models # fit1 <- coxph(Surv(stop, event) ~ rx + strata(number), bladder, iter=0) fit2 <- coxph(Surv(stop, event) ~ strata(number), bladder) all.equal(fit1$loglik[2], fit2$loglik) all.equal(fit1$resid, fit2$resid) fit1 <- coxph(Surv(start, stop, event) ~ rx + strata(number), bladder2, iter=0) fit2 <- coxph(Surv(start, stop, event) ~ strata(number), bladder2) all.equal(fit1$loglik[2], fit2$loglik) all.equal(fit1$resid, fit2$resid) survival/tests/update.Rout.save0000644000176200001440000000263714607006645016401 0ustar liggesusers R Under development (unstable) (2019-08-23 r77061) -- "Unsuffered Consequences" Copyright (C) 2019 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(survival) > > # the way a +cluster() term is handled in coxph has implications for update. > > fit1 <- coxph(Surv(time, status) ~ age, cluster= inst, lung) > fit2 <- coxph(Surv(time, status) ~ age + cluster(inst), lung) > all.equal(fit1, fit2) [1] TRUE > > fit3 <- coxph(Surv(time, status) ~ age + sex + cluster(inst), lung) > > test1 <- update(fit1, .~ .+ sex) > all.equal(test1, fit3) [1] TRUE > > # Gives a spurious warning message > test2 <- update(fit1, . ~ age + sex + cluster(inst), lung) Warning message: In coxph(formula = Surv(time, status) ~ age + sex + cluster(inst), : cluster appears both in a formula and as an argument, formula term ignored > all.equal(test2, fit3) [1] TRUE > > > proc.time() user system elapsed 0.751 0.039 0.791 survival/tests/aareg.R0000644000176200001440000001572714613770353014516 0ustar liggesusersoptions(na.action=na.exclude) # preserve missings options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type library(survival) # # Test aareg, for some simple data where the answers can be computed # in closed form # aeq <- function(x,y, ...) all.equal(as.vector(x), as.vector(y), ...) test1 <- data.frame(time= c(4, 3,1,1,2,2,3), status=c(1,NA,1,0,1,1,0), x= c(0, 2,1,1,1,0,0), wt= c(1, 1:6)) tfit <- aareg(Surv(time, status) ~ x, test1) aeq(tfit$times, c(1,2,2)) aeq(tfit$nrisk, c(6,4,4)) aeq(tfit$coefficient, matrix(c(0,0,1/3, 1/3, 1, -1/3), ncol=2)) aeq(tfit$tweight, matrix(c(3,3,3, 3/2, 3/4, 3/4), ncol=2)) aeq(tfit$test.statistic, c(1,1)) aeq(tfit$test.var, c(1, -1/4, -1/4, 1/4 + 9/16 + 1/16)) tfit <- aareg(Surv(time, status) ~ x, test1, test='nrisk') aeq(tfit$tweight, matrix(c(3,3,3, 3/2, 3/4, 3/4), ncol=2)) #should be as before aeq(tfit$test.statistic, c(4/3, 6/3+ 4 - 4/3)) aeq(tfit$test.var, c(16/9, -16/9, -16/9, 36/9 + 16 + 16/9)) # In the 1-variable case, this is the same as the default Aalen weight tfit <- aareg(Surv(time, status) ~ x, test1, test='variance') aeq(tfit$test.statistic, c(1,1)) aeq(tfit$test.var, c(1, -1/4, -1/4, 1/4 + 9/16 + 1/16)) # # Repeat the above, with case weights # tfit <- aareg(Surv(time, status) ~x, test1, weights=wt) aeq(tfit$times, c(1,2,2)) aeq(tfit$nrisk, c(21,16,16)) aeq(tfit$coefficient, matrix(c(0,0,5/12, 2/9, 1, -5/12), ncol=2)) aeq(tfit$tweight, matrix(c(12,12,12, 36/7, 3,3), ncol=2)) aeq(tfit$test.statistic, c(5, 72/63 + 3 - 15/12)) aeq(tfit$test.var, c(25, -25/4, -25/4, (72/63)^2 + 9 + (5/4)^2)) tfit <- aareg(Surv(time, status) ~x, test1, weights=wt, test='nrisk') aeq(tfit$test.statistic, c(20/3, 42/9 + 16 - 16*5/12)) aeq(tfit$test.var, c(400/9, -400/9, -400/9, (42/9)^2 + 16^2 + (16*5/12)^2)) # # Make a test data set with no NAs, in sorted order, no ties, # 15 observations tdata <- lung[15:29, c('time', 'status', 'age', 'sex', 'ph.ecog')] tdata$status <- tdata$status -1 tdata <- tdata[order(tdata$time, tdata$status),] row.names(tdata) <- 1:15 tdata$status[8] <- 0 #for some variety afit <- aareg(Surv(time, status) ~ age + sex + ph.ecog, tdata, nmin=6) # # Now, do it "by hand" cfit <- coxph(Surv(time, status) ~ age + sex + ph.ecog, tdata, iter=0, method='breslow') dt1 <- coxph.detail(cfit) sch1 <- resid(cfit, type='schoen') # First estimate of Aalen: from the Cox computations, first 9 # The first and last cols of the ninth are somewhat unstable (approx =0) mine <- rbind(solve(dt1$imat[,,1], sch1[1,]), solve(dt1$imat[,,2], sch1[2,]), solve(dt1$imat[,,3], sch1[3,]), solve(dt1$imat[,,4], sch1[4,]), solve(dt1$imat[,,5], sch1[5,]), solve(dt1$imat[,,6], sch1[6,]), solve(dt1$imat[,,7], sch1[7,]), solve(dt1$imat[,,8], sch1[8,]), solve(dt1$imat[,,9], sch1[9,])) mine <- diag(1/dt1$nrisk[1:9]) %*% mine aeq(mine, afit$coefficient[1:9, -1]) # # Check out the dfbeta matrix from aareg # Note that it is kept internally in time order, not data set order # Those who want residuals should use the resid function! # # First, the simple test case where I know the anwers # afit <- aareg(Surv(time, status) ~ x, test1, dfbeta=T) temp <- c(rep(0,6), #intercepts at time 1 c(2,-1,-1,0,0,0)/9, #alpha at time 1 c(0,0,0,2, -1, -1)/9, #intercepts at time 2 c(0,0,0,-2,1,1)/9) #alpha at time 2 aeq(afit$dfbeta, temp) # #Now a multivariate data set # afit <- aareg(Surv(time, status) ~ age + sex + ph.ecog, lung, dfbeta=T) ord <- order(lung$time, -lung$status) cfit <- coxph(Surv(time, status) ~ age + sex + ph.ecog, lung[ord,], method='breslow', iter=0, x=T) cdt <- coxph.detail(cfit, riskmat=T) # an arbitrary list of times acoef <- rowsum(afit$coefficient, afit$times) #per death time coefs indx <- match(cdt$time, afit$times) for (i in c(2,5,27,54,101, 135)) { lwho <- (cdt$riskmat[,i]==1) lmx <- cfit$x[lwho,] lmy <- 1*( cfit$y[lwho,2]==1 & cfit$y[lwho,1] == cdt$time[i]) fit <- lm(lmy~ lmx) cat("i=", i, "coef=", aeq(fit$coefficients, acoef[i,])) rr <- diag(resid(fit)) zz <- cbind(1,lmx) zzinv <- solve(t(zz) %*% zz) cat(" twt=", aeq(1/(diag(zzinv)), afit$tweight[indx[i],])) df <- t(zzinv %*% t(zz) %*% rr) cat(" dfbeta=", aeq(df, afit$dfbeta[lwho,,i]), "\n") } # Repeat it with case weights ww <- rep(1:5, length.out=nrow(lung))/ 3.0 afit <- aareg(Surv(time, status) ~ age + sex + ph.ecog, lung, dfbeta=T, weights=ww) cfit <- coxph(Surv(time, status) ~ age + sex + ph.ecog, lung[ord,], method='breslow', iter=0, x=T, weights=ww[ord]) cdt <- coxph.detail(cfit, riskmat=T) acoef <- rowsum(afit$coefficient, afit$times) #per death time coefs for (i in c(2,5,27,54,101, 135)) { who <- (cdt$riskmat[,i]==1) x <- cfit$x[who,] y <- 1*( cfit$y[who,2]==1 & cfit$y[who,1] == cdt$time[i]) w <- cfit$weights[who] fit <- lm(y~x, weights=w) cat("i=", i, "coef=", aeq(fit$coefficients, acoef[i,])) rr <- diag(resid(fit)) zz <- cbind(1,x) zzinv <- solve(t(zz)%*% (w*zz)) cat(" twt=", aeq(1/(diag(zzinv)), afit$tweight[indx[i],])) df <- t(zzinv %*% t(zz) %*% (w*rr)) cat(" dfbeta=", aeq(df, afit$dfbeta[who,,i]), "\n") } # # Check that the test statistic computed within aareg and # the one recomputed within summary.aareg are the same. # Of course, they could both be wrong, but at least they'll agree! # If the maxtime argument is used in summary, it recomputes the test, # even if we know that it wouldn't have had to. # # Because the 1-variable and >1 variable case have different code, test # them both. # afit <- aareg(Surv(time, status) ~ age, lung, dfbeta=T) asum <- summary(afit, maxtime=max(afit$times)) aeq(afit$test.statistic, asum$test.statistic) aeq(afit$test.var, asum$test.var) aeq(afit$test.var2, asum$test.var2) print(afit) afit <- aareg(Surv(time, status) ~ age, lung, dfbeta=T, test='nrisk') asum <- summary(afit, maxtime=max(afit$times)) aeq(afit$test.statistic, asum$test.statistic) aeq(afit$test.var, asum$test.var) aeq(afit$test.var2, asum$test.var2) summary(afit) # # Mulitvariate # afit <- aareg(Surv(time, status) ~ age + sex + ph.karno + pat.karno, lung, dfbeta=T) asum <- summary(afit, maxtime=max(afit$times)) aeq(afit$test.statistic, asum$test.statistic) aeq(afit$test.var, asum$test.var) aeq(afit$test.var2, asum$test.var2) print(afit) afit <- aareg(Surv(time, status) ~ age + sex + ph.karno + pat.karno, lung, dfbeta=T, test='nrisk') asum <- summary(afit, maxtime=max(afit$times)) aeq(afit$test.statistic, asum$test.statistic) aeq(afit$test.var, asum$test.var) aeq(afit$test.var2, asum$test.var2) summary(afit) # Weights play no role in the final computation of the test statistic, given # the coefficient matrix, nrisk, and dfbeta as inputs. (Weights do # change the inputs). So there is no need to reprise the above with # case weights. survival/tests/fr_kidney.Rout.save0000644000176200001440000003236214607006645017067 0ustar liggesusers R Under development (unstable) (2020-06-10 r78681) -- "Unsuffered Consequences" Copyright (C) 2020 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > options(na.action=na.exclude) # preserve missings > options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type > library(survival) > > # From: McGilchrist and Aisbett, Biometrics 47, 461-66, 1991 > # Data on the recurrence times to infection, at the point of insertion of > # the catheter, for kidney patients using portable dialysis equipment. > # Catheters may be removed for reasons other than infection, in which case > # the observation is censored. Each patient has exactly 2 observations. > > # Variables: patient, time, status, age, > # sex (1=male, 2=female), > # disease type (0=GN, 1=AN, 2=PKD, 3=Other) > # author's estimate of the frailty > aeq <- function(x,y, ...) all.equal(as.vector(x), as.vector(y), ...) > > # I don't match their answers, and I think that I'm right > kfit <- coxph(Surv(time, status)~ age + sex + disease + frailty(id), kidney) > kfit1<- coxph(Surv(time, status) ~age + sex + disease + + frailty(id, theta=1), kidney, iter=20) > kfit0 <- coxph(Surv(time, status)~ age + sex + disease, kidney) > temp <- coxph(Surv(time, status) ~age + sex + disease + + frailty(id, theta=1, sparse=F), kidney) > > > # Check out the EM based score equations > # temp1 and kfit1 should have essentially the same coefficients > # temp2 should equal kfit1$frail > # equality won't be exact because of the different iteration paths > temp1 <- coxph(Surv(time, status) ~ age + sex + disease + + offset(kfit1$frail[id]), kidney) > rr <- tapply(resid(temp1), kidney$id, sum) > temp2 <- log(rr/1 +1) > aeq(temp1$coef, kfit1$coef, tolerance=.005) [1] TRUE > aeq(temp2, kfit1$frail, tolerance=.005) [1] TRUE > > > > kfit Call: coxph(formula = Surv(time, status) ~ age + sex + disease + frailty(id), data = kidney) coef se(coef) se2 Chisq DF p age 3.18e-03 1.11e-02 1.11e-02 8.14e-02 1 0.775 sex -1.48e+00 3.58e-01 3.58e-01 1.71e+01 1 3.5e-05 diseaseGN 8.80e-02 4.06e-01 4.06e-01 4.68e-02 1 0.829 diseaseAN 3.51e-01 4.00e-01 4.00e-01 7.70e-01 1 0.380 diseasePKD -1.43e+00 6.31e-01 6.31e-01 5.14e+00 1 0.023 frailty(id) 2.71e-05 0 0.933 Iterations: 6 outer, 35 Newton-Raphson Variance of random effect= 5e-07 I-likelihood = -179.1 Degrees of freedom for terms= 1 1 3 0 Likelihood ratio test=17.6 on 5 df, p=0.003 n= 76, number of events= 58 > kfit1 Call: coxph(formula = Surv(time, status) ~ age + sex + disease + frailty(id, theta = 1), data = kidney, iter = 20) coef se(coef) se2 Chisq DF p age 0.00389 0.01959 0.00943 0.03933 1.0 0.84280 sex -2.00764 0.59104 0.41061 11.53834 1.0 0.00068 diseaseGN 0.35335 0.71653 0.38015 0.24319 1.0 0.62191 diseaseAN 0.52341 0.72298 0.40463 0.52413 1.0 0.46909 diseasePKD -0.45938 1.08977 0.66088 0.17770 1.0 0.67336 frailty(id, theta = 1) 28.50571 18.8 0.06909 Iterations: 1 outer, 14 Newton-Raphson Variance of random effect= 1 I-likelihood = -182.5 Degrees of freedom for terms= 0.2 0.5 1.1 18.8 Likelihood ratio test=63.8 on 20.6 df, p=3e-06 n= 76, number of events= 58 > kfit0 Call: coxph(formula = Surv(time, status) ~ age + sex + disease, data = kidney) coef exp(coef) se(coef) z p age 0.003181 1.003186 0.011146 0.285 0.7754 sex -1.483137 0.226925 0.358230 -4.140 3.47e-05 diseaseGN 0.087957 1.091941 0.406369 0.216 0.8286 diseaseAN 0.350794 1.420195 0.399717 0.878 0.3802 diseasePKD -1.431108 0.239044 0.631109 -2.268 0.0234 Likelihood ratio test=17.65 on 5 df, p=0.003423 n= 76, number of events= 58 > temp Call: coxph(formula = Surv(time, status) ~ age + sex + disease + frailty(id, theta = 1, sparse = F), data = kidney) coef se(coef) se2 Chisq DF p age 0.00389 0.01865 0.01120 0.04342 1.0 0.83494 sex -2.00763 0.57624 0.40799 12.13849 1.0 0.00049 diseaseGN 0.35335 0.67865 0.43154 0.27109 1.0 0.60260 diseaseAN 0.52340 0.68910 0.44038 0.57690 1.0 0.44753 diseasePKD -0.45934 1.01394 0.71297 0.20523 1.0 0.65053 frailty(id, theta = 1, sp 26.23016 18.7 0.11573 Iterations: 1 outer, 5 Newton-Raphson Variance of random effect= 1 I-likelihood = -182.5 Degrees of freedom for terms= 0.4 0.5 1.4 18.7 Likelihood ratio test=63.8 on 21 df, p=3e-06 n= 76, number of events= 58 > > # > # Now fit the data using REML > # > kfitm1 <- coxph(Surv(time,status) ~ age + sex + disease + + frailty(id, dist='gauss'), kidney) > kfitm2 <- coxph(Surv(time,status) ~ age + sex + disease + + frailty(id, dist='gauss', sparse=F), kidney) > kfitm1 Call: coxph(formula = Surv(time, status) ~ age + sex + disease + frailty(id, dist = "gauss"), data = kidney) coef se(coef) se2 Chisq DF p age 0.00489 0.01497 0.01059 0.10678 1.0 0.74384 sex -1.69728 0.46101 0.36170 13.55454 1.0 0.00023 diseaseGN 0.17986 0.54485 0.39273 0.10897 1.0 0.74131 diseaseAN 0.39294 0.54482 0.39816 0.52016 1.0 0.47077 diseasePKD -1.13631 0.82519 0.61728 1.89621 1.0 0.16850 frailty(id, dist = "gauss 17.89195 12.1 0.12376 Iterations: 7 outer, 42 Newton-Raphson Variance of random effect= 0.493 Degrees of freedom for terms= 0.5 0.6 1.7 12.1 Likelihood ratio test=47.5 on 14.9 df, p=3e-05 n= 76, number of events= 58 > summary(kfitm2) Call: coxph(formula = Surv(time, status) ~ age + sex + disease + frailty(id, dist = "gauss", sparse = F), data = kidney) n= 76, number of events= 58 coef se(coef) se2 Chisq DF p age 0.004924 0.0149 0.01084 0.11 1.00 0.74000 sex -1.702037 0.4631 0.36134 13.51 1.00 0.00024 diseaseGN 0.181733 0.5413 0.40169 0.11 1.00 0.74000 diseaseAN 0.394416 0.5428 0.40520 0.53 1.00 0.47000 diseasePKD -1.131602 0.8175 0.62981 1.92 1.00 0.17000 frailty(id, dist = "gauss 18.13 12.27 0.12000 exp(coef) exp(-coef) lower .95 upper .95 age 1.0049 0.9951 0.97601 1.0347 sex 0.1823 5.4851 0.07355 0.4519 diseaseGN 1.1993 0.8338 0.41515 3.4646 diseaseAN 1.4835 0.6741 0.51196 4.2988 diseasePKD 0.3225 3.1006 0.06497 1.6010 gauss:1 1.7011 0.5879 0.51805 5.5856 gauss:2 1.4241 0.7022 0.38513 5.2662 gauss:3 1.1593 0.8626 0.38282 3.5108 gauss:4 0.6226 1.6063 0.23397 1.6566 gauss:5 1.2543 0.7972 0.39806 3.9526 gauss:6 1.1350 0.8811 0.38339 3.3599 gauss:7 1.9726 0.5069 0.56938 6.8342 gauss:8 0.6196 1.6140 0.21662 1.7721 gauss:9 0.8231 1.2149 0.28884 2.3456 gauss:10 0.5030 1.9882 0.17468 1.4482 gauss:11 0.7565 1.3218 0.27081 2.1134 gauss:12 1.1048 0.9052 0.33430 3.6510 gauss:13 1.3022 0.7679 0.42746 3.9673 gauss:14 0.5912 1.6915 0.18537 1.8855 gauss:15 0.5449 1.8352 0.18580 1.5980 gauss:16 1.0443 0.9576 0.31424 3.4702 gauss:17 0.9136 1.0945 0.30004 2.7820 gauss:18 0.9184 1.0889 0.32476 2.5970 gauss:19 0.6426 1.5562 0.19509 2.1166 gauss:20 1.1698 0.8549 0.34528 3.9631 gauss:21 0.3336 2.9974 0.10202 1.0910 gauss:22 0.6871 1.4554 0.23531 2.0064 gauss:23 1.4778 0.6767 0.47560 4.5918 gauss:24 1.0170 0.9832 0.31555 3.2779 gauss:25 0.8096 1.2352 0.27491 2.3843 gauss:26 0.6145 1.6274 0.21491 1.7570 gauss:27 1.0885 0.9187 0.32819 3.6101 gauss:28 1.5419 0.6485 0.49231 4.8292 gauss:29 1.3785 0.7254 0.43766 4.3421 gauss:30 1.3748 0.7274 0.44444 4.2530 gauss:31 1.4447 0.6922 0.47031 4.4380 gauss:32 1.1993 0.8339 0.35207 4.0850 gauss:33 1.9449 0.5142 0.55229 6.8491 gauss:34 0.8617 1.1605 0.27685 2.6820 gauss:35 1.7031 0.5872 0.52657 5.5084 gauss:36 0.8275 1.2085 0.22811 3.0015 gauss:37 1.4707 0.6800 0.38936 5.5549 gauss:38 1.0479 0.9543 0.30685 3.5789 Iterations: 6 outer, 21 Newton-Raphson Variance of random effect= 0.5090956 Degrees of freedom for terms= 0.5 0.6 1.7 12.3 Concordance= 0.796 (se = 0.032 ) Likelihood ratio test= 117.9 on 15.14 df, p=<2e-16 > # > # Fit the kidney data using AIC > # > > # gamma, corrected aic > coxph(Surv(time, status) ~ age + sex + frailty(id, method='aic', caic=T), + kidney) Call: coxph(formula = Surv(time, status) ~ age + sex + frailty(id, method = "aic", caic = T), data = kidney) coef se(coef) se2 Chisq DF p age 0.00364 0.01048 0.00891 0.12053 1.00 0.72846 sex -1.31953 0.39556 0.32497 11.12781 1.00 0.00085 frailty(id, method = "aic 13.55258 7.81 0.08692 Iterations: 9 outer, 63 Newton-Raphson Variance of random effect= 0.203 I-likelihood = -182.1 Degrees of freedom for terms= 0.7 0.7 7.8 Likelihood ratio test=33.3 on 9.21 df, p=1e-04 n= 76, number of events= 58 > > coxph(Surv(time, status) ~ age + sex + frailty(id, dist='t'), kidney) Call: coxph(formula = Surv(time, status) ~ age + sex + frailty(id, dist = "t"), data = kidney) coef se(coef) se2 Chisq DF p age 0.00561 0.01203 0.00872 0.21774 1.0 0.64077 sex -1.65487 0.48294 0.38527 11.74180 1.0 0.00061 frailty(id, dist = "t") 20.33462 13.9 0.11752 Iterations: 8 outer, 58 Newton-Raphson Variance of random effect= 0.825 Degrees of freedom for terms= 0.5 0.6 13.9 Likelihood ratio test=48.6 on 15.1 df, p=2e-05 n= 76, number of events= 58 > coxph(Surv(time, status) ~ age + sex + frailty(id, dist='gauss', method='aic', + caic=T), kidney) Call: coxph(formula = Surv(time, status) ~ age + sex + frailty(id, dist = "gauss", method = "aic", caic = T), data = kidney) coef se(coef) se2 Chisq DF p age 0.00303 0.01031 0.00895 0.08646 1.00 0.7687 sex -1.15152 0.36368 0.30556 10.02558 1.00 0.0015 frailty(id, dist = "gauss 12.35238 6.76 0.0800 Iterations: 7 outer, 41 Newton-Raphson Variance of random effect= 0.185 Degrees of freedom for terms= 0.8 0.7 6.8 Likelihood ratio test=28.4 on 8.22 df, p=5e-04 n= 76, number of events= 58 > > > # uncorrected aic > coxph(Surv(time, status) ~ age + sex + frailty(id, method='aic', caic=F), + kidney) Call: coxph(formula = Surv(time, status) ~ age + sex + frailty(id, method = "aic", caic = F), data = kidney) coef se(coef) se2 Chisq DF p age 0.00785 0.01503 0.00823 0.27284 1.0 0.60143 sex -1.88990 0.56114 0.39941 11.34311 1.0 0.00076 frailty(id, method = "aic 37.45897 19.7 0.00918 Iterations: 8 outer, 87 Newton-Raphson Variance of random effect= 0.886 I-likelihood = -182.8 Degrees of freedom for terms= 0.3 0.5 19.7 Likelihood ratio test=61.2 on 20.5 df, p=6e-06 n= 76, number of events= 58 Warning message: In coxpenal.fit(X, Y, istrat, offset, init = init, control, weights = weights, : Inner loop failed to coverge for iterations 4 > > coxph(Surv(time, status) ~ age + sex + frailty(id, dist='t', caic=F), kidney) Call: coxph(formula = Surv(time, status) ~ age + sex + frailty(id, dist = "t", caic = F), data = kidney) coef se(coef) se2 Chisq DF p age 0.00561 0.01203 0.00872 0.21774 1.0 0.64077 sex -1.65487 0.48294 0.38527 11.74180 1.0 0.00061 frailty(id, dist = "t", c 20.33462 13.9 0.11752 Iterations: 8 outer, 58 Newton-Raphson Variance of random effect= 0.825 Degrees of freedom for terms= 0.5 0.6 13.9 Likelihood ratio test=48.6 on 15.1 df, p=2e-05 n= 76, number of events= 58 > > proc.time() user system elapsed 0.930 0.067 0.989 survival/tests/testnull.Rout.save0000644000176200001440000000242714613770353016767 0ustar liggesusers R version 2.7.1 (2008-06-23) Copyright (C) 2008 The R Foundation for Statistical Computing ISBN 3-900051-07-0 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > options(na.action=na.exclude) # preserve missings > options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type > library(survival) Loading required package: splines > > # > # A test of NULL models > # > fit1 <- coxph(Surv(stop, event) ~ rx + strata(number), bladder, iter=0) > fit2 <- coxph(Surv(stop, event) ~ strata(number), bladder) > > all.equal(fit1$loglik[2], fit2$loglik) [1] TRUE > all.equal(fit1$resid, fit2$resid) [1] TRUE > > > fit1 <- coxph(Surv(start, stop, event) ~ rx + strata(number), bladder2, iter=0) > fit2 <- coxph(Surv(start, stop, event) ~ strata(number), bladder2) > > all.equal(fit1$loglik[2], fit2$loglik) [1] TRUE > all.equal(fit1$resid, fit2$resid) [1] TRUE > survival/tests/book3.Rout.save0000644000176200001440000002400714613770353016130 0ustar liggesusers R Under development (unstable) (2024-04-17 r86441) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: aarch64-unknown-linux-gnu R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(survival) > options(na.action=na.exclude) # preserve missings > options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type > > # > # Tests from the appendix of Therneau and Grambsch > # c. Data set 2 and Breslow estimate > # > test2 <- data.frame(start=c(1, 2, 5, 2, 1, 7, 3, 4, 8, 8), + stop =c(2, 3, 6, 7, 8, 9, 9, 9,14,17), + event=c(1, 1, 1, 1, 1, 1, 1, 0, 0, 0), + x =c(1, 0, 0, 1, 0, 1, 1, 1, 0, 0)) > > byhand <- function(beta, newx=0) { + r <- exp(beta) + loglik <- 4*beta - log(r+1) - log(r+2) - 3*log(3*r+2) - 2*log(3*r+1) + u <- 1/(r+1) + 1/(3*r+1) + 4/(3*r+2) - + ( r/(r+2) +3*r/(3*r+2) + 3*r/(3*r+1)) + imat <- r/(r+1)^2 + 2*r/(r+2)^2 + 6*r/(3*r+2)^2 + + 3*r/(3*r+1)^2 + 3*r/(3*r+1)^2 + 12*r/(3*r+2)^2 + + hazard <-c( 1/(r+1), 1/(r+2), 1/(3*r+2), 1/(3*r+1), 1/(3*r+1), 2/(3*r+2) ) + xbar <- c(r/(r+1), r/(r+2), 3*r/(3*r+2), 3*r/(3*r+1), 3*r/(3*r+1), + 3*r/(3*r+2)) + + # The matrix of weights, one row per obs, one col per time + # deaths at 2,3,6,7,8,9 + wtmat <- matrix(c(1,0,0,0,1,0,0,0,0,0, + 0,1,0,1,1,0,0,0,0,0, + 0,0,1,1,1,0,1,1,0,0, + 0,0,0,1,1,0,1,1,0,0, + 0,0,0,0,1,1,1,1,0,0, + 0,0,0,0,0,1,1,1,1,1), ncol=6) + wtmat <- diag(c(r,1,1,r,1,r,r,r,1,1)) %*% wtmat + + x <- c(1,0,0,1,0,1,1,1,0,0) + status <- c(1,1,1,1,1,1,1,0,0,0) + xbar <- colSums(wtmat*x)/ colSums(wtmat) + n <- length(x) + + # Table of sums for score and Schoenfeld resids + hazmat <- wtmat %*% diag(hazard) #each subject's hazard over time + dM <- -hazmat #Expected part + for (i in 1:6) dM[i,i] <- dM[i,i] +1 #observed + dM[7,6] <- dM[7,6] +1 # observed + mart <- rowSums(dM) + + # Table of sums for score and Schoenfeld resids + # Looks like the last table of appendix E.2.1 of the book + resid <- dM * outer(x, xbar, '-') + score <- rowSums(resid) + scho <- colSums(resid) + # We need to split the two tied times up, to match coxph + scho <- c(scho[1:5], scho[6]/2, scho[6]/2) + var.g <- cumsum(hazard*hazard /c(1,1,1,1,1,2)) + var.d <- cumsum( (xbar-newx)*hazard) + + surv <- exp(-cumsum(hazard) * exp(beta*newx)) + varhaz <- (var.g + var.d^2/imat)* exp(2*beta*newx) + + list(loglik=loglik, u=u, imat=imat, xbar=xbar, haz=hazard, + mart=mart, score=score, rmat=resid, + scho=scho, surv=surv, var=varhaz) + } > > > aeq <- function(x,y) all.equal(as.vector(x), as.vector(y)) > > fit0 <-coxph(Surv(start, stop, event) ~x, test2, iter=0, method='breslow') > truth0 <- byhand(0,0) > aeq(truth0$loglik, fit0$loglik[1]) [1] TRUE > aeq(1/truth0$imat, fit0$var) [1] TRUE > aeq(truth0$mart, fit0$residuals) [1] TRUE > aeq(truth0$scho, resid(fit0, 'schoen')) [1] TRUE > aeq(truth0$score, resid(fit0, 'score')) [1] TRUE > sfit <- survfit(fit0, list(x=0), censor=FALSE) > aeq(sfit$std.err^2, truth0$var) [1] TRUE > aeq(sfit$surv, truth0$surv) [1] TRUE > aeq(fit0$score, truth0$u^2/truth0$imat) [1] TRUE > > beta1 <- truth0$u/truth0$imat > fit1 <- coxph(Surv(start, stop, event) ~x, test2, iter=1, ties="breslow") > aeq(beta1, coef(fit1)) [1] TRUE > > truth <- byhand(-0.084526081, 0) > fit <- coxph(Surv(start, stop, event) ~x, test2, eps=1e-8, method='breslow', + nocenter= NULL) > aeq(truth$loglik, fit$loglik[2]) [1] TRUE > aeq(1/truth$imat, fit$var) [1] TRUE > aeq(truth$mart, fit$residuals) [1] TRUE > aeq(truth$scho, resid(fit, 'schoen')) [1] TRUE > aeq(truth$score, resid(fit, 'score')) [1] TRUE > expect <- predict(fit, type='expected', newdata=test2) #force recalc > aeq(test2$event -fit$residuals, expect) #tests the predict function [1] TRUE > > sfit <- survfit(fit, list(x=0), censor=FALSE) > aeq(sfit$std.err^2, truth$var) [1] TRUE > aeq(-log(sfit$surv), (cumsum(truth$haz))) [1] TRUE > > # Reprise the test, with strata > # offseting the times ensures that we will get the wrong risk sets > # if strata were not kept separate > test2b <- rbind(test2, test2, test2) > test2b$group <- rep(1:3, each= nrow(test2)) > test2b$start <- test2b$start + test2b$group > test2b$stop <- test2b$stop + test2b$group > fit0 <- coxph(Surv(start, stop, event) ~ x + strata(group), test2b, + iter=0, method="breslow") > aeq(3*truth0$loglik, fit0$loglik[1]) [1] TRUE > aeq(3*truth0$imat, 1/fit0$var) [1] TRUE > aeq(rep(truth0$mart,3), fit0$residuals) [1] TRUE > aeq(rep(truth0$scho,3), resid(fit0, 'schoen')) [1] TRUE > aeq(rep(truth0$score,3), resid(fit0, 'score')) [1] TRUE > > fit1 <- coxph(Surv(start, stop, event) ~ x + strata(group), test2b, + iter=1, method="breslow") > aeq(fit1$coefficients, beta1) [1] TRUE > > fit3 <- coxph(Surv(start, stop, event) ~x + strata(group), + test2b, eps=1e-8, method='breslow') > aeq(3*truth$loglik, fit3$loglik[2]) [1] TRUE > aeq(3*truth$imat, 1/fit3$var) [1] TRUE > aeq(rep(truth$mart,3), fit3$residuals) [1] TRUE > aeq(rep(truth$scho,3), resid(fit3, 'schoen')) [1] TRUE > aeq(rep(truth$score,3), resid(fit3, 'score')) [1] TRUE > > # > # Done with the formal test, now print out lots of bits > # > resid(fit) 1 2 3 4 5 6 0.52111895 0.65741078 0.78977654 0.24738772 -0.60629349 0.36902492 7 8 9 10 -0.06876579 -1.06876579 -0.42044692 -0.42044692 > resid(fit, 'scor') 1 2 3 4 5 6 0.27156496 -0.20696709 -0.45771743 -0.09586133 0.13608234 0.19288983 7 8 9 10 0.04655651 -0.37389040 0.24367131 0.24367131 > resid(fit, 'scho') 2 3 6 7 8 9 9 0.5211189 -0.3148216 -0.5795531 0.2661809 -0.7338191 0.4204469 0.4204469 > > predict(fit, type='lp') [1] -0.04226304 0.04226304 0.04226304 -0.04226304 0.04226304 -0.04226304 [7] -0.04226304 -0.04226304 0.04226304 0.04226304 > predict(fit, type='risk') [1] 0.9586176 1.0431688 1.0431688 0.9586176 1.0431688 0.9586176 0.9586176 [8] 0.9586176 1.0431688 1.0431688 > predict(fit, type='expected') 1 2 3 4 5 6 7 8 0.4788811 0.3425892 0.2102235 0.7526123 1.6062935 0.6309751 1.0687658 1.0687658 9 10 0.4204469 0.4204469 > predict(fit, type='terms') x 1 -0.04226304 2 0.04226304 3 0.04226304 4 -0.04226304 5 0.04226304 6 -0.04226304 7 -0.04226304 8 -0.04226304 9 0.04226304 10 0.04226304 attr(,"constant") [1] -0.04226304 > predict(fit, type='lp', se.fit=T) $fit 1 2 3 4 5 6 -0.04226304 0.04226304 0.04226304 -0.04226304 0.04226304 -0.04226304 7 8 9 10 -0.04226304 -0.04226304 0.04226304 0.04226304 $se.fit 1 2 3 4 5 6 7 8 0.3969086 0.3969086 0.3969086 0.3969086 0.3969086 0.3969086 0.3969086 0.3969086 9 10 0.3969086 0.3969086 > predict(fit, type='risk', se.fit=T) $fit 1 2 3 4 5 6 7 8 0.9586176 1.0431688 1.0431688 0.9586176 1.0431688 0.9586176 0.9586176 0.9586176 9 10 1.0431688 1.0431688 $se.fit 1 2 3 4 5 6 7 8 0.3886094 0.4053852 0.4053852 0.3886094 0.4053852 0.3886094 0.3886094 0.3886094 9 10 0.4053852 0.4053852 > predict(fit, type='expected', se.fit=T) $fit 1 2 3 4 5 6 7 8 0.4788811 0.3425892 0.2102235 0.7526123 1.6062935 0.6309751 1.0687658 1.0687658 9 10 0.4204469 0.4204469 $se.fit [1] 0.5182381 0.3982700 0.3292830 0.6266797 1.0255146 0.5852364 0.7341340 [8] 0.7341340 0.6268550 0.6268550 > predict(fit, type='terms', se.fit=T) $fit x 1 -0.04226304 2 0.04226304 3 0.04226304 4 -0.04226304 5 0.04226304 6 -0.04226304 7 -0.04226304 8 -0.04226304 9 0.04226304 10 0.04226304 attr(,"constant") [1] -0.04226304 $se.fit x 1 0.3969086 2 0.3969086 3 0.3969086 4 0.3969086 5 0.3969086 6 0.3969086 7 0.3969086 8 0.3969086 9 0.3969086 10 0.3969086 > > summary(survfit(fit)) Call: survfit(formula = fit) time n.risk n.event survival std.err lower 95% CI upper 95% CI 2 2 1 0.607 0.303 0.2279 1.000 3 3 1 0.437 0.262 0.1347 1.000 6 5 1 0.357 0.226 0.1034 1.000 7 4 1 0.277 0.188 0.0729 1.000 8 4 1 0.214 0.156 0.0514 0.894 9 5 2 0.143 0.112 0.0308 0.667 > summary(survfit(fit, list(x=2))) Call: survfit(formula = fit, newdata = list(x = 2)) time n.risk n.event survival std.err lower 95% CI upper 95% CI 2 2 1 0.644 0.444 0.16657 1 3 3 1 0.482 0.511 0.06055 1 6 5 1 0.404 0.504 0.03491 1 7 4 1 0.322 0.475 0.01801 1 8 4 1 0.258 0.437 0.00928 1 9 5 2 0.181 0.377 0.00302 1 > > proc.time() user system elapsed 0.467 0.020 0.484 survival/tests/r_scale.Rout.save0000644000176200001440000000502214607006645016516 0ustar liggesusers R version 3.0.0 (2013-04-03) -- "Masked Marvel" Copyright (C) 2013 The R Foundation for Statistical Computing Platform: i686-pc-linux-gnu (32-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > options(na.action=na.exclude) # preserve missings > options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type > library(survival) Loading required package: splines > > # > # Verify that scale can be fixed at a value > # coefs will differ slightly due to different iteration paths > tol <- .001 > > # Intercept only models > fit1 <- survreg(Surv(time,status) ~ 1, lung) > fit2 <- survreg(Surv(time,status) ~ 1, lung, scale=fit1$scale) > all.equal(fit1$coef, fit2$coef, tolerance= tol) [1] TRUE > all.equal(fit1$loglik, fit2$loglik, tolerance= tol) [1] TRUE > > # The two robust variance matrices are not the same, since removing > # an obs has a different effect on the two models. This just > # checks for failure, not for correctness > fit3 <- survreg(Surv(time,status) ~ 1, lung, robust=TRUE) > fit4 <- survreg(Surv(time,status) ~ 1, lung, scale=fit1$scale, robust=TRUE) > > > # multiple covariates > fit1 <- survreg(Surv(time,status) ~ age + ph.karno, lung) > fit2 <- survreg(Surv(time,status) ~ age + ph.karno, lung, + scale=fit1$scale) > all.equal(fit1$coef, fit2$coef, tolerance=tol) [1] TRUE > all.equal(fit1$loglik[2], fit2$loglik[2], tolerance=tol) [1] TRUE > > fit3 <- survreg(Surv(time,status) ~ age + ph.karno, lung, robust=TRUE) > fit4 <- survreg(Surv(time,status) ~ age + ph.karno, lung, + scale=fit1$scale, robust=TRUE) > > # penalized models > fit1 <- survreg(Surv(time, status) ~ pspline(age), lung) > fit2 <- survreg(Surv(time, status) ~ pspline(age), lung, scale=fit1$scale) > all.equal(fit1$coef, fit2$coef, tolerance=tol) [1] TRUE > all.equal(fit1$loglik[2], fit2$loglik[2], tolerance=tol) [1] TRUE > > fit3 <- survreg(Surv(time,status) ~ pspline(age) + ph.karno, lung, robust=TRUE) > fit4 <- survreg(Surv(time,status) ~ pspline(age) + ph.karno, lung, + scale=fit1$scale, robust=TRUE) > > > > proc.time() user system elapsed 0.304 0.044 0.344 survival/tests/fr_ovarian.Rout.save0000644000176200001440000000402614607006645017237 0ustar liggesusers R Under development (unstable) (2018-05-10 r74706) -- "Unsuffered Consequences" Copyright (C) 2018 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > options(na.action=na.exclude) # preserve missings > options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type > library(survival) > > # > # Test on the ovarian data > > fit1 <- coxph(Surv(futime, fustat) ~ rx + age, ovarian) > fit2 <- coxph(Surv(futime, fustat) ~ rx + pspline(age, df=2), + data=ovarian) > fit2$iter [1] 2 8 > > fit2$df [1] 0.9426611 1.9293051 > > fit2$history $`pspline(age, df = 2)` $`pspline(age, df = 2)`$theta [1] 0.4468868 $`pspline(age, df = 2)`$done [1] TRUE $`pspline(age, df = 2)`$history thetas dfs [1,] 1.0000000 1.000000 [2,] 0.0000000 5.000000 [3,] 0.6000000 1.734267 [4,] 0.4845205 1.929305 $`pspline(age, df = 2)`$half [1] 0 > > fit4 <- coxph(Surv(futime, fustat) ~ rx + pspline(age, df=4), + data=ovarian) > fit4 Call: coxph(formula = Surv(futime, fustat) ~ rx + pspline(age, df = 4), data = ovarian) coef se(coef) se2 Chisq DF p rx -0.373 0.761 0.749 0.241 1.00 0.6238 pspline(age, df = 4), lin 0.139 0.044 0.044 9.978 1.00 0.0016 pspline(age, df = 4), non 2.592 2.93 0.4457 Iterations: 3 outer, 14 Newton-Raphson Theta= 0.242 Degrees of freedom for terms= 1.0 3.9 Likelihood ratio test=19.4 on 4.9 df, p=0.001 n= 26, number of events= 12 > > > > proc.time() user system elapsed 0.808 0.040 0.847 survival/tests/concordance2.R0000644000176200001440000002174614613770353015775 0ustar liggesuserslibrary(survival) options(na.action=na.exclude) # preserve missings options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type # # Tests for the condordance function. # aeq <- function(x,y, ...) all.equal(as.vector(x), as.vector(y), ...) grank <- function(x, time, grp, wt) { if (all(wt==1)) unlist(tapply(x, grp, rank)) else unlist(tapply(1:length(x), grp, function(i) { xx <- x[i] # x and wts for this subset of the data ww <- wt[i] temp <- outer(xx, xx, function(a, b) sign(b-a)) colSums(ww*temp)/2 })) } # a Cox model using iter=0, ties='breslow' and the above function has a score # statistic which is U=(C-D)/2 and score test U^2/H, where H is the Cox model # information matrix, with fit$var=1/H. The concordance is U+ 1/2. # Pull out the Somers' d and its variance phget <- function(fit) { c(d = 2*sqrt(fit$score/fit$var), v= 4/fit$var) } fscale <- function(fit) { if (is.matrix(fit$count)) temp <- colSums(fit$count) else temp <- fit$count npair <- sum(temp[1:3]) c(d = abs(temp[1]-temp[2]), v=4*fit$cvar*npair^2) } # Concordance by brute force. O(n^2) algorithm, but ok for n<500 or so allpair <- function(time, status, x, wt, all=FALSE) { n <- length(time) if (missing(wt)) wt <- rep(1, length(x)) count <- sapply(which(status==1), function(i) { atrisk <- (time > time[i]) | (time==time[i] & status==0) temp <- tapply(wt[atrisk], factor(sign(x[i] -x[atrisk]), c(-1, 1, 0)), sum) tiedtime <- (time==time[i] & status ==1 & (1:n)>i) ties <- tapply(wt[tiedtime], factor(x[tiedtime]==x[i], c(FALSE, TRUE)),sum) wt[i]* c(ifelse(is.na(temp), 0, temp), ifelse(is.na(ties), 0, ties)) }) rownames(count) <- c("concordant", "discordant", "tied.x", "tied.y", "tied.xy") if (all) { colnames(count) <- time[status==1] t(count) } else rowSums(count) } # leverage by brute force leverage <- function(time, status, x, wt, eps=1e-5) { if (missing(wt)) wt <- rep(1, length(x)) toss <- is.na(time + status + x +wt) if (any(toss)) { time <- time[!toss] status <- status[!toss] x <- x[!toss] wt <- wt[!toss] } n <- length(time) influence <- matrix(0, n, 5) t2 <- time + eps*(status==0) for (i in 1:n) { if (status[i] ==0) comparable <- (time<=time[i] & status==1) else comparable <- ifelse(status==0, time >= time[i], time!=time[i]) temp <- sign((x[i]-x[comparable])*(t2[i] - t2[comparable])) influence[i,1:3] <-tapply(wt[comparable],factor(temp, c(1,-1,0)), sum) if (status[i]==1) { tied <- (time==time[i] & status==1 & (1:n)!= i) if (any(tied)) { itemp<- tapply(wt[tied], factor(x[tied]==x[i], c(FALSE, TRUE)), sum) influence[i,4:5] <- itemp } } } dimnames(influence) <- list(as.character(Surv(time, status)), c("concord", "discord", "tie.x", "tie.y", "tie.xy")) ifelse(is.na(influence), 0, influence) } tdata <- aml[aml$x=='Maintained', c("time", "status")] tdata$x <- c(1,6,2,7,3,7,3,8,4,4,5) tdata$wt <- c(1,2,3,2,1,2,3,4,3,2,1) fit <- concordance(Surv(time, status) ~x, tdata, influence=2) aeq(fit$count, with(tdata, allpair(time, status, x))) aeq(fit$influence, with(tdata, leverage(time, status, x))) cfit <- coxph(Surv(time, status) ~ tt(x), tdata, tt=grank, ties='breslow', iter=0, x=T) aeq(phget(cfit), fscale(fit)) # agree with Cox model # Test 2: Lots of ties tempy <- Surv(c(1,2,2,2,3,4,4,4,5,2), c(1,0,1,0,1,0,1,1,0,1)) tempx <- c(5,5,4,4,3,3,7,6,5,4) fit2 <- concordance(tempy ~ tempx, influence=2) aeq(fit2$count, allpair(tempy[,1], tempy[,2], tempx)) aeq(fit2$influence, leverage(tempy[,1], tempy[,2], tempx)) cfit2 <- coxph(tempy ~ tt(tempx), tt=grank, ties="breslow", iter=0) aeq(phget(cfit2), fscale(fit2)) # agree with Cox model # Bigger data cox3 <- coxph(Surv(time, status) ~ age + sex + ph.ecog, lung) fit3 <- concordance(Surv(time, status) ~ predict(cox3), lung, influence=2) aeq(fit3$count, allpair(lung$time, lung$status-1,predict(cox3))) aeq(fit3$influence, leverage(lung$time, lung$status-1,predict(cox3))) cfit3 <- coxph(Surv(time, status) ~ tt(predict(cox3)), tt=grank, ties="breslow", iter=0, data=lung) aeq(phget(cfit3), fscale(fit3)) # agree with Cox model # More ties fit4 <- concordance(Surv(time, status) ~ ph.ecog, lung, influence=2) fit4b <- concordance(Surv(time, status) ~ ph.ecog, lung, reverse=TRUE) aeq(fit4$count, allpair(lung$time, lung$status-1, lung$ph.ecog)) aeq(fit4b$count, c(8392, 4258, 7137, 21, 7)) cfit4 <- coxph(Surv(time, status) ~ tt(ph.ecog), lung, iter=0, method='breslow', tt=grank) aeq(phget(cfit4), fscale(fit4)) # agree with Cox model # Case weights fit5 <- concordance(Surv(time, status) ~ x, tdata, weights=wt, influence=2) fit6 <- concordance(Surv(time, status) ~x, tdata[rep(1:11,tdata$wt),]) aeq(fit5$count, with(tdata, allpair(time, status, x, wt))) aeq(fit5$count, c(91, 70, 7, 0, 0)) # checked by hand aeq(fit5$count[1:3], fit6$count[1:3]) #spurious "tied.xy" values, ignore aeq(fit5$var[2], fit6$var[2]) aeq(fit5$influence, with(tdata, leverage(time, status, x, wt))) cfit5 <- coxph(Surv(time, status) ~ tt(x), tdata, weights=wt, iter=0, method='breslow', tt=grank) aeq(phget(cfit5), fscale(fit5)) # agree with Cox model # Start, stop simplest cases fit6 <- concordance(Surv(rep(0,11), time, status) ~ x, tdata) aeq(fit6$count, fit$count) aeq(fit6$var, fit$var) fit7 <- concordance(Surv(rep(0,11), time, status) ~ x, tdata, weights=wt) aeq(fit7$count, fit5$count) aeq(fit7$var, fit5$var) # Multiple intervals for some, but same risk sets as tdata tdata2 <- data.frame(time1=c(0,3, 5, 6,7, 0, 4,17, 7, 0,16, 2, 0, 0,9, 5), time2=c(3,9, 13, 7,13, 18, 17,23, 28, 16,31, 34, 45, 9,48, 60), status=c(0,1, 1, 0,0, 1, 0,1, 0, 0,1, 1, 0, 0,1, 0), x = c(1,1, 6, 2,2, 7, 3,3, 7, 3,3, 8, 4, 4,4, 5), wt= c(1,1, 2, 3,3, 2, 1,1, 2, 3,3, 4, 3, 2,2, 1), id= c(1,1, 2, 3,3, 4, 5,5, 6, 7,7, 8, 9, 10,10, 11)) fit8 <- concordance(Surv(time1, time2, status) ~x, cluster=id, tdata2, weights=wt, influence=2) aeq(fit5$count, fit8$count) # influence has one row per obs, so the next line is false: mismatched lengths # aeq(fit5$influence, fit8$influence) aeq(fit5$var, fit8$var) cfit8 <- coxph(Surv(time1, time2, status) ~ tt(x), tdata2, weights=wt, iter=0, method='breslow', tt=grank) aeq(phget(cfit8), fscale(fit8)) # agree with Cox model # Stratified tdata3 <- data.frame(time1=c(tdata2$time1, rep(0, nrow(lung))), time2=c(tdata2$time2, lung$time), status = c(tdata2$status, lung$status -1), x = c(tdata2$x, lung$ph.ecog), wt= c(tdata2$wt, rep(1, nrow(lung))), grp=rep(1:2, c(nrow(tdata2), nrow(lung))), id = c(tdata2$id, 100+ 1:nrow(lung))) fit9 <- concordance(Surv(time1, time2, status) ~x + strata(grp), cluster=id, data=tdata3, weights=wt, influence=2) aeq(fit9$count, rbind(fit8$count, fit4$count)) # check out case weights, strata, and grouped jackknife; # force several ties in x, y, and xy (with missing values too for good measure). tdata <- subset(lung, select=-c(meal.cal, wt.loss, sex, age)) tdata$wt <- rep(1:25, length.out=nrow(tdata))/10 tdata$time <- ceiling(tdata$time/30) # force ties in y tfit <- coxph(Surv(time, status) ~ ph.ecog + pat.karno + strata(inst) + cluster(inst), tdata, weights=wt) tdata$tpred <- predict(tfit) cm4 <- concordance(tfit, influence=3, keepstrata=TRUE) cm5 <- concordance(Surv(time, status) ~ tpred + strata(inst) + cluster(inst), data=tdata, weights=wt, reverse=TRUE, influence=3, keepstrata=TRUE) all.equal(cm4[1:6], cm5[1:6]) # call and na.action won't match u.inst <- sort(unique(tdata$inst)) temp <- matrix(0, length(u.inst), 5) for (i in 1:length(u.inst)) { temp[i,] <- with(subset(tdata, inst==u.inst[i]), allpair(time, status-1, -tpred, wt)) } aeq(temp, cm4$count) eps <- 1e-6 keep <- (1:nrow(tdata))[-tfit$na.action] # the obs that are not tossed lmat <- matrix(0., length(keep), 5) for (i in 1:length(keep)) { wt2 <- tdata$wt wt2[keep[i]] <- wt2[keep[i]] + eps test <- concordance(Surv(time, status) ~ predict(tfit) + strata(inst), data=tdata, weights=wt2, group=group, reverse=TRUE, keepstrata=TRUE) lmat[i,] <- colSums(test$count - cm4$count)/eps } aeq(lmat, cm4$influence, tolerance=eps) # Check that keepstrata gives the correct sum cm4b <- concordance(tfit, keepstrata=FALSE) aeq(cm4b$count, colSums(cm4$count)) survival/tests/strata2.Rout.save0000644000176200001440000000272014607006645016470 0ustar liggesusers R Under development (unstable) (2018-04-09 r74565) -- "Unsuffered Consequences" Copyright (C) 2018 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > # > # New tests 4/2010 to validate strata by covariate interactions > # > library(survival) > options(na.action=na.exclude) # preserve missings > options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type > aeq <- function(x,y) all.equal(as.vector(x), as.vector(y)) > > tdata <- lung > tdata$sex <- lung$sex +3 > > # Both of these should produce warning messages about singular X, since there > # are ph.ecog=3 subjects in only 1 of the strata. > # Does not affect the test > fit1 <- coxph(Surv(time, status) ~ age + sex:strata(ph.ecog), lung) > fit2 <- coxph(Surv(time, status) ~ age + sex:strata(ph.ecog), tdata) > > aeq(fit1$coef, fit2$coef) [1] TRUE > aeq(fit1$var, fit2$var) [1] TRUE > aeq(predict(fit1), predict(fit2)) [1] TRUE > > proc.time() user system elapsed 0.692 0.036 0.727 survival/tests/ekm.R0000644000176200001440000001325414654462361014206 0ustar liggesuserslibrary(survival) aeq <- function(x, y, ...) all.equal(as.vector(x), as.vector(y), ...) # test for the "extended KM", where subjects change arms midstream # (I don't like it statistically, but some use it). tdata <- aml tdata$id <- 1:nrow(tdata) tdata <- survSplit(Surv(time, status) ~ ., tdata, cut= c(9, 17, 30)) tdata$trt <- rep(c(1,1,2,2,2), length=nrow(tdata)) # different weights for different rows of the same subject = hardest case tdata$wt <- rep(1:6, length= nrow(tdata)) tdata$status[tdata$time==13] <- 1 # force at least 1 tied event # not exported, but used in byhand if (!exists("survflag")) survflag <- survival:::survflag byhand <- function(t1, t2, status, grp, id, wt, debug=FALSE) { if (missing(wt)) wt <- rep(1, length(t1)) ugrp <- unique(grp) ngrp <- length(ugrp) out <- vector("list", ngrp) names(out) <- ugrp pos <- survflag(Surv(t1, t2, status), id, grp) for (i in ugrp) { # create this curve keep <- (grp ==i) n <- sum(keep) utime <- sort(unique(c(t1[keep], t2[keep]))) ntime <- length(utime) nrisk <- ncensor <- nevent <- entry <- double(ntime) surv <- cumhaz <- double(ntime) U <- C <- matrix(0, n, ntime) # influence U2 <- dN <- U # portions of U, useful for debugging the AUC utemp <- ctemp <- double(n) km <- 1.0; chaz <- 0 for (j in 1:ntime) { atrisk <- (keep & t1 < utime[j] & t2 >= utime[j]) nrisk[j] <- sum(wt[atrisk]) nevent[j] <- sum(wt[keep & t2== utime[j] & status ==1]) ncensor[j] <- sum(wt[keep & t2==utime[j] & status==0 & pos >1]) entry[j] <- sum(wt[keep & t1== utime[j] & pos%%2 ==1]) if (nrisk[j] >0) { km <- km * (nrisk[j]- nevent[j])/ nrisk[j] chaz <- chaz + nevent[j]/nrisk[j] } surv[j] =km cumhaz[j] =chaz # influence if (nrisk[j] > 0) { haz <- nevent[j]/nrisk[j] death <- (t2[keep]== utime[j] & status[keep] ==1) temp <- double(n) temp[death] <- 1/nrisk[j] temp[atrisk[keep]] <- temp[atrisk[keep]] - haz/nrisk[j] ctemp <- ctemp + temp if (haz <1) utemp <- utemp - temp/(1-haz) else utemp <- 0 dN[death,j] <- 1/(nrisk[j]* (1-haz)) U2[atrisk[keep],j] <- haz/(nrisk[j] * (1-haz)) } U[,j] <- utemp*km C[,j] <- ctemp } out[[i]] <- list(n.id = length(unique(id[keep])), n= n, time= utime, n.enter=entry, n.risk=nrisk, n.event=nevent, n.censor= ncensor, surv= surv, cumhaz = cumhaz, U=U, C=C, U2=U2, dN=dN) } out } true <- with(tdata, byhand(tstart, time, status, trt, id, wt)) ekm <- survfit(Surv(tstart, time, status) ~ trt, tdata, id=id, entry=TRUE, influence= TRUE, weights=wt) aeq(ekm$n.id, unlist(lapply(true, function(x) x$n.id))) aeq(ekm$n, unlist(lapply(true, function(x) x$n))) aeq(ekm$time, unlist(lapply(true, function(x) x$time))) aeq(ekm$n.risk, unlist(lapply(true, function(x) x$n.risk))) aeq(ekm$n.enter, unlist(lapply(true, function(x) x$n.enter))) aeq(ekm$n.event, unlist(lapply(true, function(x) x$n.event))) aeq(ekm$n.censor,unlist(lapply(true, function(x) x$n.censor))) aeq(ekm$surv ,unlist(lapply(true, function(x) x$surv))) # The byhand function gives per-observation influence, ekm has per-subject, # residuals can do either, but will fail with an error for this data when # collapse=TRUE with a message "same id appears in multiple curves " rr <- residuals(ekm, times= c(9, 17, 30, 45), type="cumhaz") aeq(rr[tdata$trt==1,], true[[1]]$C[, match(c(9,17,30,45), true[[1]]$time)]) aeq(rr[tdata$trt==2,], true[[2]]$C[, match(c(9,17,30,45), true[[2]]$time)]) rr <- residuals(ekm, times= c(9, 17, 30, 45), type= "pstate") aeq(rr[tdata$trt==1,], true[[1]]$U[, match(c(9,17,30,45), true[[1]]$time)]) aeq(rr[tdata$trt==2,], true[[2]]$U[, match(c(9,17,30,45), true[[2]]$time)]) # Check influence returned by survfit tdata1 <- subset(tdata, trt==1) tdata2 <- subset(tdata, trt==2) inf1 <- rowsum(tdata1$wt* true[[1]]$U, tdata1$id, reorder=FALSE) inf2 <- rowsum(tdata2$wt* true[[2]]$U, tdata2$id, reorder=FALSE) aeq(inf1, ekm$influence.surv[[1]]) aeq(inf2, ekm$influence.surv[[2]]) c1 <- rowsum(tdata1$wt* true[[1]]$C, tdata$id[tdata$trt==1], reorder=FALSE) c2 <- rowsum(tdata2$wt* true[[2]]$C, tdata$id[tdata$trt==2], reorder=FALSE) aeq(c1, ekm$influence.chaz[[1]]) aeq(c2, ekm$influence.chaz[[2]]) # Look at the AUC t1 <- true[[1]] width <- diff(c(t1$time, 50)) aucr <- width* t1$surv # the rectangles that make up auc(50) ainf <- t1$U %*% diag(width) # row i= influence of obs i on each rectangle aucinf <- t(apply(ainf,1,cumsum)) rr <- residuals(ekm, times= c(t1$time[-1], 50), type= "auc") aeq(rr[tdata$trt==1,], aucinf) # use factor(status) to force the multi-state code ekm2 <- survfit(Surv(tstart, time, factor(status)) ~ trt, tdata, id=id, entry=TRUE, influence= TRUE, weights=wt) aeq(ekm$n, ekm2$n) aeq(ekm$time, ekm2$time) aeq(ekm$n.risk, ekm2$n.risk[,1]) aeq(ekm$n.event, ekm2$n.event[,2]) aeq(ekm$n.censor, ekm2$n.censor[,1]) aeq(ekm$n.enter, ekm2$n.enter[,1]) aeq(ekm$surv, ekm2$pstate[,1]) aeq(ekm$std.err, ekm2$std.err[,1]) aeq(ekm$cumhaz, ekm2$cumhaz[,1]) aeq(ekm$std.chaz, ekm2$std.chaz[,1]) aeq(ekm$strata, ekm2$strata) aeq(ekm$n.id, ekm2$n.id) aeq(ekm$counts, ekm2$counts[,c("nrisk 1", "ntrans 1:2", "ncensor 1", "nenter 1")]) aeq(ekm$influence.surv[[1]], ekm2$influence.pstate[[1]][,,1]) aeq(ekm$influence.surv[[2]], ekm2$influence.pstate[[2]][,,1]) survival/tests/factor2.R0000644000176200001440000000163714607006645014771 0ustar liggesuserslibrary(survival) aeq <- function(x,y) all.equal(as.vector(x), as.vector(y)) options(na.action=na.exclude) # # More tests of factors in prediction, using a new data set # fit <- coxph(Surv(time, status) ~ factor(ph.ecog), lung) tdata <- data.frame(ph.ecog = factor(0:3)) p1 <- predict(fit, newdata=tdata, type='lp') p2 <- predict(fit, type='lp') aeq(p1, p2[match(0:3, lung$ph.ecog)]) fit2 <- coxph(Surv(time, status) ~ factor(ph.ecog) + factor(sex), lung) tdata <- expand.grid(ph.ecog = factor(0:3), sex=factor(1:2)) p1 <- predict(fit2, newdata=tdata, type='risk') xdata <- expand.grid(ph.ecog=factor(1:3), sex=factor(1:2)) p2 <- predict(fit2, newdata=xdata, type='risk') all.equal(p2, p1[c(2:4, 6:8)], check.attributes=FALSE) fit3 <- survreg(Surv(time, status) ~ factor(ph.ecog) + age, lung) tdata <- data.frame(ph.ecog=factor(0:3), age=50) predict(fit, type='lp', newdata=tdata) predict(fit3, type='lp', newdata=tdata) survival/tests/survcheck.Rout.save0000644000176200001440000000633214607006645017110 0ustar liggesusers R Under development (unstable) (2020-03-27 r78086) -- "Unsuffered Consequences" Copyright (C) 2020 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(survival) > aeq <- function(x, y, ...) all.equal(as.vector(x), as.vector(y), ...) > > # dummy data 1, simple survival > data1 <- data.frame(id=c(1,2,2,2,3,4), time=c(10, 20, 25, 30, 30, 10), + status=factor(c(0,0,1,1, 2,3))) > fit1 <- survcheck(Surv(time, status) ~ 1, data1, id=id) > aeq(fit1$flag, c(2,0,0,0,0)) [1] TRUE > aeq(fit1$overlap$row, 3:4) [1] TRUE > aeq(fit1$overlap$id, 2) [1] TRUE > aeq(fit1$transitions[1,], c(2,1,1,1)) [1] TRUE > > > # dummy data 2, no initial values, start stop data > # A: (0, 10, 0), (10, 20, 1), (20, 30, 2) # no issues > # B: (0, 20, 1), (15, 24, 2), (25,26, 0) > # C: (10,13, 1), (15, 18, 0), (18,25,3) > > data2 <- data.frame(id=rep(LETTERS[1:3], each=3), + t1 = c(0, 10, 20, 0, 15,25, 10, 15, 18), + t2 = c(10, 20,30, 20, 24, 26, 13, 18, 25), + status= factor(c(0, 1, 2, 1,2, 0, 1, 0, 3)), + x = c(1:5, NA, 7:9), + stringsAsFactors = FALSE) > fit2 <- survcheck(Surv(t1, t2, status) ~ 1, data2, id=id) > > aeq(fit2$flag , c(1,2,0,0,0)) [1] TRUE > aeq(fit2$transition, rbind(c(3,0,0,0), c(0,2,1,0), c(0,0,0,1), 0)) [1] TRUE > (fit2$overlap$id == 'B') [1] TRUE > (fit2$overlap$row ==5) [1] TRUE > all(fit2$gap$id == c("B", "C")) [1] TRUE > aeq(fit2$gap$row, c(6,8)) [1] TRUE > > # scramble > reord <- c(9,2,1,4,3,5,6,8,7) > tfit <- survcheck(Surv(t1, t2, status) ~ 1, data2[reord,], id=id) > all.equal(fit2[1:4], tfit[1:4]) [1] TRUE > > # let a missing value in > fit2b <- survcheck(Surv(t1, t2, status) ~ x, data2, id=id) > aeq(fit2b$flag , c(1,1,0,0,0)) [1] TRUE > aeq(fit2b$transition, rbind(c(3,0,0), c(0,2,1), 0,0)) [1] TRUE > (fit2b$overlap$id == 'B') [1] TRUE > (fit2b$overlap$row ==5) [1] TRUE > all(fit2b$gap$id == "C") [1] TRUE > aeq(fit2b$gap$row, 8) [1] TRUE > > # designed to trigger all 4 error types > data3 <- data2 > levels(data3$status) <- c("cens", "mgus", "recur", "death") > data3$istate <- c("entry", "entry", "recur", "entry", "recur", "recur", + "entry", "recur", "recur") > fit3 <- survcheck(Surv(t1, t2, status) ~ 1, data3, id=id, istate=istate) > > aeq(fit3$flag, c(1, 1, 1, 2, 0)) [1] TRUE > aeq(fit3$transitions, rbind(c(3,0,0,0), 0, c(0,2,1,1), 0)) [1] TRUE > all.equal(fit3$overlap, fit2$overlap) [1] TRUE > all(fit3$teleport$id == c("A", "C")) [1] TRUE > all(fit3$teleport$row == c(3,9)) [1] TRUE > all(fit3$jump$id == "C") [1] TRUE > all(fit3$jump$row == 8) [1] TRUE > all.equal(fit3$gap, list(row=6L, id= "B")) [1] TRUE > > > > proc.time() user system elapsed 0.879 0.044 0.914 survival/tests/cancer.Rout.save0000644000176200001440000001770214613770353016352 0ustar liggesusers R Under development (unstable) (2024-04-17 r86441) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: aarch64-unknown-linux-gnu R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(survival) > > options(na.action=na.exclude) # preserve missings > options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type > aeq <- function(x, y, ...) all.equal(as.vector(x), as.vector(y), ...) > > # > # Test out all of the routines on a more complex data set > # > temp <- survfit(Surv(time, status) ~ ph.ecog, lung) > summary(temp, times=c(30*1:11, 365*1:3)) Call: survfit(formula = Surv(time, status) ~ ph.ecog, data = lung) 1 observation deleted due to missingness ph.ecog=0 time n.risk n.event survival std.err lower 95% CI upper 95% CI 30 60 3 0.952 0.0268 0.9012 1.000 60 58 2 0.921 0.0341 0.8562 0.990 90 56 2 0.889 0.0396 0.8146 0.970 120 56 0 0.889 0.0396 0.8146 0.970 150 55 1 0.873 0.0419 0.7946 0.959 180 52 2 0.841 0.0461 0.7553 0.936 210 48 2 0.808 0.0498 0.7164 0.912 240 45 0 0.808 0.0498 0.7164 0.912 270 38 2 0.770 0.0543 0.6709 0.884 300 33 2 0.727 0.0591 0.6203 0.853 330 29 2 0.681 0.0637 0.5670 0.818 365 22 6 0.535 0.0728 0.4100 0.699 730 5 11 0.193 0.0707 0.0943 0.396 ph.ecog=1 time n.risk n.event survival std.err lower 95% CI upper 95% CI 30 111 2 0.982 0.0124 0.9583 1.000 60 110 3 0.956 0.0193 0.9186 0.994 90 104 4 0.920 0.0255 0.8718 0.972 120 99 5 0.876 0.0310 0.8174 0.939 150 93 6 0.823 0.0359 0.7556 0.896 180 82 8 0.751 0.0407 0.6756 0.836 210 68 9 0.666 0.0450 0.5831 0.760 240 57 6 0.604 0.0474 0.5176 0.704 270 53 4 0.561 0.0487 0.4729 0.665 300 46 3 0.527 0.0495 0.4384 0.633 330 40 4 0.480 0.0504 0.3903 0.589 365 34 4 0.431 0.0509 0.3417 0.543 730 7 21 0.114 0.0388 0.0582 0.222 ph.ecog=2 time n.risk n.event survival std.err lower 95% CI upper 95% CI 30 46 5 0.9000 0.0424 0.82057 0.987 60 43 2 0.8600 0.0491 0.76900 0.962 90 40 3 0.8000 0.0566 0.69647 0.919 120 34 4 0.7174 0.0641 0.60216 0.855 150 31 3 0.6541 0.0680 0.53342 0.802 180 26 6 0.5275 0.0719 0.40385 0.689 210 21 4 0.4431 0.0717 0.32266 0.608 240 17 3 0.3766 0.0705 0.26100 0.543 270 17 0 0.3766 0.0705 0.26100 0.543 300 13 3 0.3102 0.0677 0.20223 0.476 330 11 2 0.2624 0.0651 0.16135 0.427 365 9 2 0.2147 0.0614 0.12258 0.376 730 1 6 0.0371 0.0345 0.00601 0.229 ph.ecog=3 time n.risk n.event survival std.err lower 95% CI upper 95% CI 30 1 0 1 0 1 1 60 1 0 1 0 1 1 90 1 0 1 0 1 1 > print(temp[2:3]) Call: survfit(formula = Surv(time, status) ~ ph.ecog, data = lung) n events median 0.95LCL 0.95UCL ph.ecog=1 113 82 306 268 429 ph.ecog=2 50 44 199 156 288 > > temp <- survfit(Surv(time, status)~1, lung, type='fleming', + conf.int=.9, conf.type='log-log', error='tsiatis') > summary(temp, times=30 *1:5) Call: survfit(formula = Surv(time, status) ~ 1, data = lung, error = "tsiatis", type = "fleming", conf.int = 0.9, conf.type = "log-log") time n.risk n.event survival std.err lower 90% CI upper 90% CI 30 219 10 0.956 0.0135 0.928 0.974 60 213 7 0.926 0.0173 0.891 0.950 90 201 10 0.882 0.0213 0.842 0.913 120 189 10 0.838 0.0244 0.793 0.874 150 179 10 0.794 0.0268 0.745 0.834 > > temp <- survdiff(Surv(time, status) ~ inst, lung, rho=.5) > print(temp, digits=6) Call: survdiff(formula = Surv(time, status) ~ inst, data = lung, rho = 0.5) n=227, 1 observation deleted due to missingness. N Observed Expected (O-E)^2/E (O-E)^2/V inst=1 36 21.190058 17.455181 0.799149708 1.171232977 inst=2 5 3.173330 1.964395 0.744007932 0.860140808 inst=3 19 10.663476 11.958755 0.140294489 0.200472362 inst=4 4 2.245347 3.559344 0.485085848 0.677874608 inst=5 9 5.010883 4.500982 0.057765161 0.077128402 inst=6 14 8.862602 7.078516 0.449665221 0.582743947 inst=7 8 4.445647 4.416133 0.000197254 0.000253632 inst=10 4 2.901923 2.223283 0.207150016 0.249077097 inst=11 18 7.807867 9.525163 0.309611863 0.422142221 inst=12 23 14.009656 12.216768 0.263117640 0.365712493 inst=13 20 9.140983 11.863298 0.624699853 0.874238212 inst=15 6 3.170744 3.558447 0.042241456 0.057938955 inst=16 16 8.870360 9.992612 0.126038005 0.175170113 inst=21 13 9.263733 4.460746 5.171484268 6.149354145 inst=22 17 8.278566 11.971473 1.139171459 1.645863937 inst=26 6 1.627074 3.542694 1.035821659 1.286365543 inst=32 7 1.792468 2.679904 0.293869782 0.343966668 inst=33 2 0.929177 0.416202 0.632249272 0.676682390 Chisq= 15.1 on 17 degrees of freedom, p= 0.5904 > > # verify that the zph routine does the actual score test > dtime <- lung$time[lung$status==2] > lung2 <- survSplit(Surv(time, status) ~ ., lung, cut=dtime) > > cfit1 <-coxph(Surv(time, status) ~ ph.ecog + ph.karno + pat.karno + wt.loss + + sex + age + strata(inst), lung) > cfit2 <-coxph(Surv(tstart, time, status) ~ ph.ecog + ph.karno + pat.karno + + wt.loss + sex + age + strata(inst), lung2) > all.equal(cfit1$loglik, cfit2$loglik) [1] TRUE > all.equal(coef(cfit1), coef(cfit2)) [1] TRUE > > # the above verifies that the data set is correct > zp1 <- cox.zph(cfit1, transform="log") > zp2 <- cox.zph(cfit2, transform="log") > # everything should match but the call > icall <- match("Call", names(zp1)) > all.equal(unclass(zp2)[-icall], unclass(zp1)[-icall]) [1] TRUE > > # now compute score tests one variable at a time > ncoef <- length(coef(cfit2)) > check <- double(ncoef) > cname <- names(coef(cfit2)) > for (i in 1:ncoef) { + temp <- log(lung2$time) * lung2[[cname[i]]] + # score test for this new variable + tfit <- coxph(Surv(tstart, time, status) ~ ph.ecog + ph.karno + pat.karno + + wt.loss + sex + age + strata(inst) + + temp, lung2, init=c(cfit2$coefficients, 0), iter=0) + check[i] <- tfit$score + } > aeq(check, zp1$table[1:ncoef,1]) # skip the 'global' test [1] TRUE > > # > # Tests of using "." > # > fit1 <- coxph(Surv(time, status) ~ . - meal.cal - wt.loss - inst, lung) > fit2 <- update(fit1, .~. - ph.karno) > fit3 <- coxph(Surv(time, status) ~ age + sex + ph.ecog + pat.karno, lung) > all.equal(fit2, fit3) [1] TRUE > > proc.time() user system elapsed 0.732 0.036 0.765 survival/tests/mstate.Rout.save0000644000176200001440000003407114654470675016423 0ustar liggesusers R Under development (unstable) (2024-06-14 r86747) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > # > # A tiny multi-state example > # > library(survival) > aeq <- function(x,y) all.equal(as.vector(x), as.vector(y)) > mtest <- data.frame(id= c(1, 1, 1, 2, 3, 4, 4, 4, 5, 5), + t1= c(0, 4, 9, 0, 2, 0, 2, 8, 1, 3), + t2= c(4, 9, 10, 5, 9, 2, 8, 9, 3, 11), + st= c(1, 2, 1, 2, 3, 1, 3, 0, 2, 0)) > > mtest$state <- factor(mtest$st, 0:3, c("censor", "a", "b", "c")) > > if (FALSE) { + # this graph is very useful when debugging + temp <- survcheck(Surv(t1, t2, state) ~1, mtest, id=id) + plot(c(0,11), c(1,5.1), type='n', xlab="Time", ylab= "Subject") + with(mtest, segments(t1+.1, id, t2, id, col=as.numeric(temp$istate))) + event <- subset(mtest, state!='censor') + text(event$t2, event$id+.2, as.character(event$state)) + } > > mtest <- mtest[c(1,3,2,4,5,7,6,10, 9, 8),] #not in time order > > mfit <- survfit(Surv(t1, t2, state) ~ 1, mtest, id=id, time0=FALSE) > > # True results > # > #time state probabilities > # entry a b c entry a b c > # > #0 124 1 0 0 0 > #1+ 1245 > #2+ 1235 4 3/4 1/4 0 0 4 -> a, add 3 > #3+ 123 4 5 9/16 1/4 3/16 0 5 -> b > #4+ 23 14 5 6/16 7/16 3/16 0 1 -> a > #5+ 3 14 5 3/16 7/16 6/16 0 2 -> b, exits > #8+ 3 1 5 4 3/16 7/32 6/16 7/32 4 -> c > #9+ 15 0 0 19/32 13/32 1->b, 3->c & exit > # 10+ 1 5 19/64 19/64 13/32 1->a > > aeq(mfit$n.risk, matrix(c(4,4,3,2,1,1,0,0, + 0,1,1,2,2,1,0,0, + 0,0,1,1,1,1,2,1, + 0,0,0,0,0,1,0,0), ncol=4)) [1] TRUE > aeq(mfit$pstate, matrix(c(24, 18, 12, 6, 6, 0, 0, 0, + 8, 8, 14, 14, 7, 0, 9.5, 9.5, + 0, 6, 6, 12, 12,19,9.5, 9.5, + 0, 0, 0, 0, 7, 13, 13, 13)/32, ncol=4)) [1] TRUE > aeq(mfit$n.transition, matrix(c(1,0,1,0,0,0,0,0, + 0,0,0,0,0,0,1,0, + 0,1,0,1,0,0,0,0, + 0,0,0,0,0,1,0,0, + 0,0,0,0,0,1,0,0, + 0,0,0,0,1,0,0,0), ncol=6)) [1] TRUE > all.equal(mfit$time, c(2, 3, 4, 5, 8, 9, 10, 11)) [1] TRUE > > # Somewhat more complex. > # Scramble the input data > # Not everyone starts at the same time or in the same state > # Case weights > # > tdata <- data.frame(id= c(1, 1, 1, 2, 3, 4, 4, 4, 5, 5), + t1= c(0, 4, 9, 1, 2, 0, 2, 8, 1, 3), + t2= c(4, 9, 10, 5, 9, 2, 8, 9, 3, 11), + st= c(1, 2, 1, 2, 3, 1, 3, 0, 3, 0), + i0= c(4, 1, 2, 1, 4, 4, 1, 3, 2, 3), + wt= 1:10) > > tdata$st <- factor(tdata$st, c(0:3), + labels=c("censor", "a", "b", "c")) > tdata$i0 <- factor(tdata$i0, c(4, 1:3), + labels=c("entry", "a", "b", "c")) > if (FALSE) { + #useful picture + temp <- survcheck(Surv(t1, t2, st) ~1, tdata, id=id, istate=i0) + plot(c(0,11), c(1,5.5), type='n', xlab="Time", ylab= "Subject") + with(tdata, segments(t1+.1, id, t2, id, col=as.numeric(temp$istate))) + with(subset(tdata, st!= "censor"), + text(t2, id+.15, as.character(st))) + with(tdata, text((t1+t2)/2, id+.25, wt)) + with(subset(tdata, !duplicated(id)), + text(t1, id+.15, as.character(i0))) + #abline(v=c(2:5, 8:11), lty=3, col='gray') + } > > tfun <- function(data=tdata) { + reorder <- c(10, 9, 1, 2, 5, 4, 3, 7, 8, 6) + new <- data[reorder,] + new + } > > # These weight vectors are in the order of tdata > # w[9] is the weight for subject 5 at time 1.5, for instance > # p0 is defined as all those at risk just before the first event, which in > # this data set is entry:a at time 2 for id=4; id 1,2,4,5 at risk > > # When the functions below were written, the entry state was listed last. > # Currently the entry state is first, so "[swap]" was added to the aj routines > # rather than rearranging the formulas > swap <- c(4,1,2,3) > p0 <- function(w) c( w[1]+ w[6], w[4], w[9], 0)/ (w[1]+ w[4] + w[6] + w[9]) > > # aj2 = Aalen-Johansen H matrix at time 2, etc. > aj2 <- function(w) { + #subject 4 moves from entry to 'a' + rbind(c(1, 0, 0, 0), + c(0, 1, 0, 0), + c(0, 0, 1, 0), + c(w[6], 0, 0, w[1])/(w[1] + w[6]))[swap, swap] + } > aj3 <- function(w) rbind(c(1, 0, 0, 0), + c(0, 0, 1, 0), # 5 moves from b to c + c(0, 0, 1, 0), + c(0, 0, 0, 1))[swap,swap] > aj4 <- function(w) { + # subject 1 moves from entry to a + rbind(c(1, 0, 0, 0), + c(0, 1, 0, 0), + c(0, 0, 1, 0), + c(w[1], 0, 0, w[5])/(w[1] + w[5])) [swap, swap] + } > aj5 <- function(w) { + # subject 2 from a to b + rbind(c(w[2]+w[7], w[4], 0, 0)/(w[2]+ w[4] + w[7]), + c(0, 1, 0, 0), + c(0, 0, 1, 0), + c(0, 0, 0, 1))[swap, swap] + } > aj8 <- function(w) rbind(c(w[2], 0, w[7], 0)/(w[2]+ w[7]), # 4 a to c + c(0, 1, 0, 0), + c(0, 0, 1, 0), + c(0, 0, 0, 1))[swap, swap] > aj9 <- function(w) rbind(c(0, 1, 0, 0), # 1 a to b + c(0, 1, 0, 0), + c(0, 0, 1, 0), + c(0, 0, 1 ,0)) [swap, swap] # 3 entry to c > aj10 <- function(w)rbind(c(1, 0, 0, 0), + c(1, 0, 0, 0), #1 b to a + c(0, 0, 1, 0), + c(0, 0, 0, 1))[swap, swap] > > #time state > # a b c entry > # > #1 2 5 14 initial distribution > #2 24 5 1 4 -> a, add 3 > #3 24 5 13 5 from b to c > #4 124 5 3 1 -> a > #5 14 5 3 2 -> b, exits > #8 1 45 3 4 -> c > #9 1 45 1->b, 3->c & exit > #10 1 45 1->a > > # P is a product of matrices > dopstate <- function(w) { + p1 <- p0(w) + p2 <- p1 %*% aj2(w) + p3 <- p2 %*% aj3(w) + p4 <- p3 %*% aj4(w) + p5 <- p4 %*% aj5(w) + p8 <- p5 %*% aj8(w) + p9 <- p8 %*% aj9(w) + p10<- p9 %*% aj10(w) + rbind(p2, p3, p4, p5, p8, p9, p10, p10) + } > > # Check the pstate estimate > w1 <- rep(1,10) > mtest2 <- tfun(tdata) # scrambled order > mfit2 <- survfit(Surv(t1, t2, st) ~ 1, tdata, id=id, istate=i0, + time0=FALSE) # ordered > aeq(mfit2$pstate, dopstate(w1)) [1] TRUE > aeq(mfit2$p0, p0(w1)) [1] TRUE > > mfit2b <- survfit(Surv(t1, t2, st) ~ 1, mtest2, id=id, istate=i0, time0=FALSE) > aeq(mfit2b$pstate, dopstate(w1)) [1] TRUE > aeq(mfit2b$p0, p0(w1)) [1] TRUE > > mfit2b$call <- mfit2$call <- NULL > all.equal(mfit2b, mfit2) [1] TRUE > aeq(mfit2$transitions, c(2,0,1,0, 0,2,0,0, 1,1,1,0, 0,0,0,2)) [1] TRUE > > # Now the harder one, where subjects change weights > mfit3 <- survfit(Surv(t1, t2, st) ~ 1, tdata, id=id, istate=i0, + weights=wt, influence=TRUE, time0=FALSE) > aeq(mfit3$p0, p0(1:10)) [1] TRUE > aeq(mfit3$pstate, dopstate(1:10)) [1] TRUE > > > # The derivative of a matrix product AB is (dA)B + A(dB) where dA is the > # elementwise derivative of A and etc for B. > # dp0 creates the derivatives of p0 with respect to each subject, a 5 by 4 > # matrix > # All the functions below are hand coded for a weight vector that is in > # exactly the same order as the rows of mtest. > # Since p0 = (w[1]+ w[6], w[4], w[9], 0)/ (w[1]+ w[4] + w[6] + w[9]) > # and subject id is 1,1,1, 2, 3, 4,4,4, 5,5 > # we get the derivative below > # > > dp0 <- function(w) { # influence just before the first event + p <- p0(w) + w0 <- w[c(1,4,6,9)] # the 4 obs at the start, subjects 1, 2, 4, 5 + rbind(c(1,0, 0, 0) - p, # subject 1 affects p[entry] + c(0,1, 0, 0) - p, # subject 2 affects p[a] + 0, # subject 3 affects none + c(1, 0, 0, 0) - p, # subject 4 affect p[entry] + c(0, 0, 1, 0) - p)/ # subject 5 affects p[b] + sum(w0) + } > > > dp2 <- function(w) { + h2 <- aj2(w) # H matrix at time 2 + part1 <- dp0(w) %*% h2 + + # 1 and 4 in entry, obs 4 moves from entry to a + mult <- p0(w)[1]/(w[1] + w[6]) #p(t-) / weights in state + part2 <- rbind((c(1,0,0,0)- h2[1,]) * mult, + 0, + 0, + (c(0,1,0,0) - h2[1,]) * mult, + 0) + part1 + part2 + } > > dp3 <- function(w) { + dp2(w) %*% aj3(w) + } > > dp4 <- function(w) { + h4 <- aj4(w) # H matrix at time 4 + part1 <- dp3(w) %*% h4 + + # subjects 1 and 3 in state entry (obs 1 and 5) 1 moves to a + mult <- dopstate(w)[2,1]/ (w[1] + w[5]) # p_1(time 4-0) / wt + part2 <- rbind((c(0,1,0,0)- h4[1,]) * mult, + 0, + (c(1,0,0,0)- h4[1,]) * mult, + 0, + 0) + part1 + part2 + } > dp5 <- function(w) { + h5 <- aj5(w) # H matrix at time 5 + part1 <- dp4(w) %*% h5 + + # subjects 124 in state a (obs 2,4,7), 2 goes to b + mult <- dopstate(w)[3,2]/ (w[2] + w[4] + w[7]) + part2 <- rbind((c(0,1,0,0)- h5[2,]) * mult, + (c(0,0,1,0)- h5[2,]) * mult, + 0, + (c(0,1,0,0)- h5[2,]) * mult, + 0) + part1 + part2 + } > dp8 <- function(w) { + h8 <- aj8(w) # H matrix at time 8 + part1 <- dp5(w) %*% h8 + + # subjects 14 in state a (obs 2 &7), 4 goes to c + mult <- dopstate(w)[4, 2]/ (w[2] + w[7]) + part2 <- rbind((c(0,1,0,0)- h8[2,]) * mult, + 0, + 0, + (c(0,0,0,1)- h8[2,]) * mult, + 0) + part1 + part2 + } > dp9 <- function(w) dp8(w) %*% aj9(w) > dp10<- function(w) dp9(w) %*% aj10(w) > > # > # Feb 4 2024: discovered that the variance computation above is incorrect. > # Let U = influence for phat, with one row per observation in the data > # The weighted per subject influence is Z(t)= BDU(t) where > # B= rbind(c(1,1,1,0,0,0,0,0,0,0), > # c(0,0,0,1,0,0,0,0,0,0), > # c(0,0,0,0,1,0,0,0,0,0), > # c(0,0,0,0,0,1,1,1,0,0), > # c(0,0,0,0,0,0,0,0,1,1)) > # and D= diag(1:10) > # which can be summarized as "weight each row, then add over subjects". > # The variance at time t is the column sums of Z^2(t) (elementwise squares) > # > # The code above for dp0, dp2, etc returns BU, which matches the computation of > # the influence in survfitci.c. If the weight for a given subject is constant > # over time, then BDU= WBU where W is the diagonal matrix of per-subject > # weights: survfitci.c implicitly made this assumption, and was correct > # in this case. It returned U as the influence, which matches dp0 etc. > # > # survfitci.c has been replaced by survfitaj.c, which uses the careful > # derivations in the methods vignette, and returns BDU. > # The checks below have been changed to a case with constant weights per > # subject. R code to test for general weights is in mstate2.R > # > w1 <- tdata$id > mfit4 <- survfit(Surv(t1, t2, st) ~1, tdata, id=id, weights=id, istate=i0, + influence=TRUE, time0= FALSE) > aeq(mfit4$influence[,1,], 1:5*dp2(w1)) #time 2 [1] TRUE > aeq(mfit4$influence[,2,], 1:5*dp3(w1)) [1] TRUE > aeq(mfit4$influence[,3,], 1:5*dp4(w1)) [1] TRUE > aeq(mfit4$influence[,4,], 1:5*dp5(w1)) [1] TRUE > aeq(mfit4$influence[,5,], 1:5*dp8(w1)) # time 8 [1] TRUE > aeq(mfit4$influence[,6,], 1:5* dp9(w1)) [1] TRUE > aeq(mfit4$influence[,7,], 1:5* dp10(w1)) [1] TRUE > aeq(mfit4$influence[,8,], 1:5* dp10(w1)) # no changes at time 11 [1] TRUE > > ssq <- function(x) sqrt(sum(x^2)) > temp2 <- apply(mfit4$influence.pstate, 2:3, ssq) > aeq(temp2, mfit4$std.err) [1] TRUE > > if (FALSE) { # old test, survfitci returned the time 0 influence as well + w1 <- 1:10 + aeq(mfit3$influence[,1,], dp0(w1)) + aeq(mfit3$influence[,2,], dp2(w1)) + aeq(mfit3$influence[,3,], dp3(w1)) + aeq(mfit3$influence[,4,], dp4(w1)) + aeq(mfit3$influence[,5,], dp5(w1)) + aeq(mfit3$influence[,6,], dp8(w1)) + aeq(mfit3$influence[,7,], dp9(w1)) + aeq(mfit3$influence[,8,], dp10(w1)) + aeq(mfit3$influence[,9,], dp10(w1)) # no changes at time 11 + } # end of if (FALSE) > > # The cumulative hazard at each time point is remapped from a matrix > # into a vector (in survfit) > # First check out the names > nstate <- length(mfit4$states) > temp <- matrix(0, nstate, nstate) > indx1 <- match(rownames(mfit4$transitions), mfit4$states) > indx2 <- match(colnames(mfit4$transitions), mfit4$states, nomatch=0) > temp[indx1, indx2] <- mfit4$transitions[, indx2>0] > # temp is an nstate by nstate version of the transitions matrix > from <- row(temp)[temp>0] > to <- col(temp)[temp>0] > all.equal(colnames(mfit4$cumhaz), paste(from, to, sep=':')) [1] TRUE > > # check the cumulative hazard > temp <- mfit4$n.risk[,from] > hazard <- mfit4$n.transition/ifelse(temp==0, 1, temp) > aeq(apply(hazard, 2, cumsum), mfit4$cumhaz) [1] TRUE > > > proc.time() user system elapsed 1.736 0.162 1.885 survival/tests/mstate2.R0000644000176200001440000002231614607325257015010 0ustar liggesuserslibrary(survival) aeq <- function(x, y, ...) all.equal(as.vector(x), as.vector(y), ...) # This is a test of the influence matrix for an Andersen-Gill fit, using the # formulas found in the methods document, and implemented in the survfitaj.c # code. As much as anything it was a help in debugging -- both the mathematics # and the program. # The test case below has tied events, tied event/censoring, entry in mutiple # states, staggered entry, repeated events for a subject, varying case weights # within a subject, ... on purpose tdata <- data.frame(id= c(1, 1, 1, 2, 2, 3, 4, 4, 4, 4, 5, 5, 6, 6), t1= c(0, 4, 9, 1, 5, 2, 0, 2, 5, 8, 1, 3, 3, 5), t2= c(4, 9, 10, 5, 7, 9, 2, 5, 8, 9, 3, 11, 5, 8), st= c(2, 3, 2, 3, 1, 2, 2, 4, 4, 1, 3, 1, 3, 2), i0= c(1, 2, 3, 2, 3, 1, 1, 2, 4, 4, 4, 3, 2, 3), wt= c(1:8, 8:3)) tdata$st <- factor(tdata$st, c(1:4), labels=c("censor", "a", "b", "c")) tdata$i0 <- factor(tdata$i0, 1:4, labels=c("entry","a", "b", "c")) check <- survcheck(Surv(t1, t2,st) ~1, tdata, id=id, istate=i0) if (FALSE) { #useful picture plot(c(0,11), c(1,6.5), type='n', xlab="Time", ylab= "Subject") with(tdata, segments(t1+.1, id, t2, id, col=as.numeric(check$istate))) with(subset(tdata, st!= "censor"), text(t2, id+.15, as.character(st))) with(tdata, text((t1+t2)/2, id+.25, wt)) with(subset(tdata, !duplicated(id)), text(t1, id+.15, as.character(i0))) #segments are colored by current state, case weight in center, events at ends abline(v=c(2:5, 8:11), lty=3, col='gray') } # Compute the unweighted per observation leverages, using the approach in # the methods document, as a check of both it and the C code. # These IJ residuals can be directly verified using emprical derivatives, # and collapsed to test the weighted+collapsed results from survfitAJ. # survfitaj <- function(t1, t2, state, istate=NULL, wt, id, p0, start.time=NULL, debug = FALSE) { check <- survcheck(Surv(t1, t2, state) ~ 1, id=id, istate=istate) if (any(check$flag >0)) stop("failed survcheck") states <- check$states nstate <- length(states) istate <- check$istate # will have the correct levels isn <- as.numeric(istate) n <- length(t1) if (length(t2) !=n || length(state) !=n || length(istate) !=n || length(wt) !=n || length(id) !=n) stop("input error") newstate <- factor(state, unique(c(levels(state)[1], states))) Y <- Surv(t1, t2, newstate) # makes the levels match up position <- survival:::survflag(Y, id) uid <- unique(id) nid <- length(uid) id <- match(id, uid) # turn it into 1,2,... event <- (Y[,3] >0) U <- A <- matrix(0, n, nstate) # per observation influence, unweighted if (missing(p0)) { if (!missing(start.time)) t0 <- start.time else { if (all(Y[, 3] ==0)) t0 <- min(Y[, 2]) # no events! else t0 <- min(Y[event, 2]) } atrisk <- (Y[,1] < t0 & Y[,2] >= t0) wtsum <- sum(wt[atrisk]) # weights at that time p0 <- tapply(wt[atrisk], istate[atrisk], sum) / wtsum p0 <- ifelse(is.na(p0), 0, p0) #if a state has no one, tapply =NA if (all(p0 <1)) { # compute intitial leverage for (j in 1:nstate) { U[atrisk,j] <- (ifelse(istate[atrisk]==states[j], 1, 0) - p0[j])/wtsum } } } else { if (missing(start.time)) t0 <- 0 else t0 <- start.time } utime <- sort(unique(c(0, Y[event | position>1, 2]))) ntime <- length(utime) phat <- matrix(0, ntime, nstate) phat[1,] <- p0 n.risk <- matrix(0, ntime, nstate) n.risk[1,] <- table(istate[Y[,1]< start.time & Y[,2] > start.time]) # count the number of transitions, and make an index to them temp <- table(istate[event], factor(Y[event,3], 1:nstate, states)) trmat <- cbind(from= row(temp)[temp>0], to= col(temp)[temp>0]) nhaz <- nrow(trmat) n.event <- matrix(0, ntime, nhaz) C <- matrix(0, n, nhaz) chaz <- matrix(0, ntime, nhaz) hash <- trmat %*% c(1,10) tindx <- match(isn + 10*Y[,3], hash, nomatch=0) #index to transitions # at this point I have the initial inflence matrices (U= pstate, # C= cumhaz, A= auc). The auc and cumhaz are 0 at the starting point # so their influence is 0. Usave <- array(0, dim=c(dim(U), ntime)) Usave[,,1] <- U Csave <- array(0, dim= c(dim(C), ntime)) #chaz and AUC are 0 at start.time Asave <- array(0, dim= c(dim(A), ntime)) for (it in 2:ntime) { # AUC if (it==2) delta <- utime[it]- t0 else delta <- utime[it] - utime[it-1] A <- A + delta* U # count noses atrisk <- (t1 < utime[it] & t2 >= utime[it]) temp <- tapply(wt[atrisk], istate[atrisk], sum) n.risk[it,] <- ifelse(is.na(temp), 0, temp) event <- (Y[,2]== utime[it] & Y[,3]>0) temp <- tapply(wt[event], factor(tindx[event], 1:nhaz), sum) n.event[it,] <- ifelse(is.na(temp), 0, temp) # Add events to C and create the H matrix H <- diag(nstate) for (i in which(event)) { j <- isn[i] # from, to, and transition indices k <- Y[i,3] jk <- match(j+10*k, hash) C[i, jk] <- C[i, jk] + 1/n.risk[it,j] if (j!=k) { H[j,j] <- H[j,j] - wt[i]/n.risk[it,j] H[j,k] <- H[j,k] + wt[i]/n.risk[it,j] } } U <- U %*% H phat[it,] <- phat[it-1,] %*% H if (debug) browser() # Add events to U for (i in which(event)) { j <- isn[i] # from, to, and transition indices k <- Y[i,3] if (j != k) { U[i,j] <- U[i,j] - phat[it-1,j]/n.risk[it,j] U[i,k] <- U[i,k] + phat[it-1,j]/n.risk[it,j] } } if (debug) browser() # now the hazard part for (h in which(n.event[it,] >0)) { j <- trmat[h,1] k <- trmat[h,2] haz <- n.event[it,h]/n.risk[it, j] h2 <- haz/n.risk[it,j] who <- (atrisk & isn ==j) # at risk, currently in state j C[who,h] <- C[who,h] - h2 if (j != k) { U[who,j] <- U[who,j] + h2 * phat[it-1,j] U[who,k] <- U[who,k] - h2 * phat[it-1,j] } } if (debug) browser() Usave[,,it] <- U Csave[,,it] <- C Asave[,,it] <- A } colnames(n.event) <- paste(trmat[,1], trmat[,2], sep=':') colnames(n.risk) <- check$states colnames(phat) <- check$states list(time = utime, n.risk= n.risk, n.event=n.event, pstate= phat, C=Csave, U=Usave, A=Asave) } mfit <- survfit(Surv(t1, t2, st) ~ 1, tdata, id=id, istate=i0, weights=wt, influence=TRUE) mtest <- with(tdata, survfitaj(t1, t2, st, i0, wt, id)) # mtest <- with(tdata, survfitaj(t1, t2, st, i0, wt, id, debug=TRUE)) # p0 and U0 from the methods document p0 <- c(8, 4,0,6)/ 18 U0 <- rbind(c(1,0,0,0) - p0, 0, 0, c(0,1,0,0) - p0, 0, 0, c(1,0,0,0) - p0, 0, 0, 0, c(0,0,0,1) - p0, 0, 0, 0) /18 aeq(mtest$pstate[1,], p0) aeq(mtest$U[,,1], U0) aeq(mtest$time[-1], mfit$time) # mtest includes U(2-eps) as 'time 0' aeq(mtest$pstate[-1,], mfit$pstate) aeq(mfit$p0, p0) aeq(mfit$i0, rowsum(U0*tdata$wt, tdata$id)) # direct check that mtest has the correct answer eps <- 1e-6 delta <- array(0, dim= c(nrow(tdata), dim(mfit$pstate))) deltaC<- array(0, dim= c(nrow(tdata), dim(mfit$cumhaz))) for (i in 1:nrow(tdata)) { twt <- tdata$wt twt[i] <- twt[i] + eps tfit <- survfit(Surv(t1, t2, st) ~1, tdata, id=id, istate=i0, weights= twt) delta[i,,] <- (tfit$pstate - mfit$pstate)/eps deltaC[i,,] <-(tfit$cumhaz - mfit$cumhaz)/eps } temp <- aperm(mtest$U, c(1,3,2)) # drop time 0, put state last all.equal(temp[,-1,], delta, tol=eps/2) tempC <-aperm(mtest$C, c(1,3,2)) all.equal(tempC[,-1,], deltaC, tol= eps/2) # Now check mfit, which returns the weighted collapsed values BD <- t(model.matrix(~ factor(id) -1, tdata)) %*% diag(tdata$wt) rownames(BD) <- 1:6 collapse <- function(U, cmat=BD) { # for each time point, replace the inflence matrix U with BDU if (is.matrix(U)) BD %*% U else { dd <- dim(U) temp <- cmat %*% matrix(U, nrow = dd[1]) #fake out matrix multiply array(temp, dim= c(nrow(temp), dd[2:3])) } } sqsum <- function(x) sqrt(sum(x^2)) temp <- collapse(mtest$U[,,-1]) # mtest has time 0, mfit does not # mfit$influence is in id, time, state order aeq(aperm(temp, c(1,3,2)), mfit$influence) # mtest has time 0, mfit does not setemp <- apply(collapse(mtest$U[,,-1]), 2:3, sqsum) aeq(t(setemp), mfit$std.err) ctemp <- apply(collapse(mtest$C[,,-1]), 2:3, sqsum) aeq(t(ctemp), mfit$std.chaz) atemp <- apply(collapse(mtest$A[,,-1]), 2:3, sqsum) aeq(t(atemp), mfit$std.auc) # check residuals rr1 <- resid(mfit, times=mfit$time, type='pstate') aeq(rr1, mtest$U[,,-1]) rr2 <- resid(mfit, times=mfit$time, type='auc') aeq(rr2, mtest$A[,,-1]) survival/tests/survcheck.R0000644000176200001440000000424214607006645015421 0ustar liggesuserslibrary(survival) aeq <- function(x, y, ...) all.equal(as.vector(x), as.vector(y), ...) # dummy data 1, simple survival data1 <- data.frame(id=c(1,2,2,2,3,4), time=c(10, 20, 25, 30, 30, 10), status=factor(c(0,0,1,1, 2,3))) fit1 <- survcheck(Surv(time, status) ~ 1, data1, id=id) aeq(fit1$flag, c(2,0,0,0,0)) aeq(fit1$overlap$row, 3:4) aeq(fit1$overlap$id, 2) aeq(fit1$transitions[1,], c(2,1,1,1)) # dummy data 2, no initial values, start stop data # A: (0, 10, 0), (10, 20, 1), (20, 30, 2) # no issues # B: (0, 20, 1), (15, 24, 2), (25,26, 0) # C: (10,13, 1), (15, 18, 0), (18,25,3) data2 <- data.frame(id=rep(LETTERS[1:3], each=3), t1 = c(0, 10, 20, 0, 15,25, 10, 15, 18), t2 = c(10, 20,30, 20, 24, 26, 13, 18, 25), status= factor(c(0, 1, 2, 1,2, 0, 1, 0, 3)), x = c(1:5, NA, 7:9), stringsAsFactors = FALSE) fit2 <- survcheck(Surv(t1, t2, status) ~ 1, data2, id=id) aeq(fit2$flag , c(1,2,0,0,0)) aeq(fit2$transition, rbind(c(3,0,0,0), c(0,2,1,0), c(0,0,0,1), 0)) (fit2$overlap$id == 'B') (fit2$overlap$row ==5) all(fit2$gap$id == c("B", "C")) aeq(fit2$gap$row, c(6,8)) # scramble reord <- c(9,2,1,4,3,5,6,8,7) tfit <- survcheck(Surv(t1, t2, status) ~ 1, data2[reord,], id=id) all.equal(fit2[1:4], tfit[1:4]) # let a missing value in fit2b <- survcheck(Surv(t1, t2, status) ~ x, data2, id=id) aeq(fit2b$flag , c(1,1,0,0,0)) aeq(fit2b$transition, rbind(c(3,0,0), c(0,2,1), 0,0)) (fit2b$overlap$id == 'B') (fit2b$overlap$row ==5) all(fit2b$gap$id == "C") aeq(fit2b$gap$row, 8) # designed to trigger all 4 error types data3 <- data2 levels(data3$status) <- c("cens", "mgus", "recur", "death") data3$istate <- c("entry", "entry", "recur", "entry", "recur", "recur", "entry", "recur", "recur") fit3 <- survcheck(Surv(t1, t2, status) ~ 1, data3, id=id, istate=istate) aeq(fit3$flag, c(1, 1, 1, 2, 0)) aeq(fit3$transitions, rbind(c(3,0,0,0), 0, c(0,2,1,1), 0)) all.equal(fit3$overlap, fit2$overlap) all(fit3$teleport$id == c("A", "C")) all(fit3$teleport$row == c(3,9)) all(fit3$jump$id == "C") all(fit3$jump$row == 8) all.equal(fit3$gap, list(row=6L, id= "B")) survival/tests/tmerge3.Rout.save0000644000176200001440000001047214607006645016461 0ustar liggesusers R Under development (unstable) (2022-08-09 r82699) -- "Unsuffered Consequences" Copyright (C) 2022 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > # Understanding edge cases > library(survival) > > # > # this is from a user report of a problem with cumevents. When there is > # a row merged in that is a censor, don't mark it as a cumevent. > # > base <- data.frame( + id = 1:2, tstart = c(0, 0), tstop = c(10, 10), got_flu = c(0, 0), + has_flu = factor(c("no", "no"), levels = c("no", "yes"))) > base <- tmerge(base, base, id = id, got_flu = event(tstop, got_flu)) > > # add time-varying covariates > vars <- data.frame(id = c(1, rep(2, 5)), time = c(0, (0:4) * 2), x = rnorm(6)) > base <- tmerge(base, vars, id = id, x = tdc(time, x)) > > # add cumevents, using a covariate > events <- data.frame( + id = c(2, 2, 2), + # notice the zero -- the second row should not add an event + got_flu = c(1,0,2), + has_flu = c("yes", "no", "yes"), + time = c(3, 5, 8)) > b2 <- tmerge(base, events, id = id, got_flu = cumevent(time, got_flu), + has_flu = tdc(time, has_flu)) Warning message: In tmerge(base, events, id = id, got_flu = cumevent(time, got_flu), : replacement of variable 'has_flu' > > all.equal(b2$got_flu, c(0,0,1,0,0,0,3,0)) [1] TRUE > > > # Tied times in the merger data set > # for all of them missings are essentially ignored > # last obs wins for tdc and event > tiedat <- data.frame(id=c(1, 1, 1, 2,2,2), time=c(3,4, 4, 3, 5, 5), + x=c(1, NA,0, 2,3,4)) > b3 <- tmerge(base, tiedat, id=id, x1= tdc(time, x), x2=cumtdc(time, x), + x3= event(time, x), x4 = cumevent(time, x)) > all.equal(b3$x1, c(NA, 1, 0, NA, NA, 2,2, 4,4,4)) [1] TRUE > all.equal(b3$x2, c(NA, 1, 1, NA, NA, 2,2, 9,9,9)) [1] TRUE > all.equal(b3$x3, c(1,0,0,0,2,0,4,0,0,0)) [1] TRUE > all.equal(b3$x4, c(1,0,0,0,2,0,9,0,0,0)) [1] TRUE > > # Multiple overlapping time windows in the first step. > # Should generate an error message > test <- tryCatch( + {tmerge(pbcseq[, c("id", "trt", "age", "sex")], pbcseq, id, + death = event(futime, status==2))}, + error= function(cond) { + if (grepl("duplicate identifiers", cond)) + cat("successful tmerge error test\n") + } + ) successful tmerge error test > > # Using a tdc that depends on more than one variable. If they are not > # exactly the same class, tmerge should fail. > # Happens with wide data sets > > tdata <- data.frame(id= 1:3, age=c(40,44,38), dtime=c(700, 600, 500), + t1 = c(111, 211, 311), x1= as.integer(c(4, 5, 6)), + t2 = c(120, 240, 400.3), x2=c( 9, 8, 7), + t3 = c(400, 500, 450), x3=c(12,2, 0)) > # This works > wide1 <- tmerge(tdata[,1:2], tdata, id=id, death= event(dtime), + x = tdc(t1, x1), x= tdc(t2, x2), x= tdc(t3, x3)) > > r1 <- data.frame(id=rep(1:3, each=4), + age= tdata$age[rep(1:3, each=4)], + tstart=c(0,111, 120, 400, 0, 211, 240, 500, 0, 311,400.3, 450), + tstop =c(111, 120, 400, 700, 211, 240, 500, 600, + 311, 400.3, 450, 500), + death= rep(c(0,0,0,1), 3), + x= c(NA,4, 9,12, NA, 5, 8, 2, NA, 6,7, 0)) > all.equal(r1, wide1, check.attributes=FALSE) [1] TRUE > > tdata$x2[2] <- 'c' # different data type > test <- tryCatch( + {tmerge(tdata[,1:2], tdata, id=id, death= event(dtime), + x = tdc(t1, x1), x= tdc(t2, x2), x= tdc(t3, x3))}, + error= function(cond) { + if (grepl("tdc update does not match prior variable type: x", cond)) + cat("successful tmerge error test\n") + } + ) successful tmerge error test > > proc.time() user system elapsed 1.142 0.080 1.219 survival/tests/data.turbine0000644000176200001440000000217314607006645015605 0ustar liggesusersNA 4 0 4 NA 39 NA 10 4 10 NA 49 NA 14 2 14 NA 31 NA 18 7 18 NA 66 NA 22 5 22 NA 25 NA 26 9 26 NA 30 NA 30 9 30 NA 33 NA 34 6 34 NA 7 NA 38 22 38 NA 12 NA 42 21 42 NA 19 NA 46 21 46 NA 15 survival/tests/turnbull.R0000644000176200001440000001371114612274303015266 0ustar liggesusersoptions(na.action=na.exclude) # preserve missings options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type library(survival) # # The test data set from Turnbull, JASA 1974, 169-73. # # status 0=right censored # 1=exact # 2=left censored # aeq <- function(x,y, ...) all.equal(as.vector(x), as.vector(y), ...) turnbull <- data.frame( time =c( 1,1,1, 2,2,2, 3,3,3, 4,4,4), status=c( 1,0,2, 1,0,2, 1,0,2, 1,0,2), n =c(12,3,2, 6,2,4, 2,0,2, 3,3,5)) # # Compute the K-M for the Turnbull data # via a slow EM calculation # emsurv <- function(time, status, wt, verbose=T) { left.cen <- (status==2) if (!any(left.cen)) stop("No left censored data!") if (!any(status==1))stop("Must have some exact death times") tempy <- Surv(time[!left.cen], status[!left.cen]) ww <- wt[!left.cen] tempx <- factor(rep(1, sum(!left.cen))) tfit <- survfit(tempy~tempx, weights=ww) if (verbose) cat("Iteration 0, survival=", format(round(tfit$surv[tfit$n.event>0],3)), "\n") stimes <- tfit$time[tfit$n.event>0] ltime <- time[left.cen] lwt <- wt[left.cen] tempx <- factor(rep(1, length(stimes) + sum(!left.cen))) tempy <- Surv(c(time[!left.cen], stimes), c(status[!left.cen], rep(1, length(stimes)))) for (iter in 1:4) { wt2 <- stimes*0 ssurv <- tfit$surv[tfit$n.event>0] sjump <- diff(c(1, ssurv)) for (j in 1:(length(ltime))) { k <- sum(ltime[j]>=stimes) #index of the death time if (k==0) stop("Left censored observation before the first death") wt2[1:k] <- wt2[1:k] + lwt[j]*sjump[1:k] /(ssurv[k]-1) } tfit <- survfit(tempy~tempx, weights=c(ww, wt2)) if (verbose) { cat("Iteration", iter, "survival=", format(round(tfit$surv[tfit$n.event>0],3)), "\n") cat(" weights=", format(round(wt2,3)), "\n") } } survfit(tempy ~ tempx, weights=c(ww, wt2), robust=FALSE) } temp <-emsurv(turnbull$time, turnbull$status, turnbull$n) print(summary(temp)) # First check, use the data from Turnbull, JASA 1974, 169-173. tdata <- data.frame(time =c(1,1,1,2,2,2,3,3,3,4,4,4), status=rep(c(1,0,2),4), n =c(12,3,2,6,2,4,2,0,2,3,3,5)) tfit <- survfit(Surv(time, time, status, type='interval') ~1, tdata, weights=n) all.equal(round(tfit$surv,3), c(.538, .295, .210, .095)) # Second check, compare to a reversed survival curve # This is not as simple a test as one might think, because left and right # censored observations are not treated symmetrically by the routine: # time <= y for left and time> y for right (this is to make the routine # correct for the common situation of panel data). # To get equivalence, make the left censoreds happen just a little bit # earlier. The left-continuous/right-continuous shift is also a bother. # test1 <- data.frame(time= c(9, 3,1,1,6,6,8), status=c(1,NA,1,0,1,1,0), x= c(0, 2,1,1,1,0,0)) fit1 <- survfit(Surv(time, status) ~1, test1) temp <- ifelse(test1$status==0, 4.99,5) - test1$time fit2 <- survfit(Surv(temp, status, type='left') ~1, test1) all.equal(round(fit1$surv[1:2],5), round(1-fit2$surv[3:2],5)) rm(tdata, tfit, fit1, temp, fit2) # # Create a data set similar to the one provided by Al Zinsmeister # It is a hard test case for survfit.turnbull # time1 <- c(rep(0,100), rep(1,200), 100, 200, 210, 220, rep(365,100), rep(366,5), 731:741) time2 <- c((1:100)*3, 10+1:100, rep(365:366, c(60,40)), NA, 500, NA, 450, rep(730,90), rep(NA,10), c(528,571,691,730,731), NA, 1095:1099, NA, 1400, 1200, 772, 1461) zfit <- survfit(Surv(time1, time2, type='interval2') ~1) # # There are 100 intervals of the form (0,x) where x is from 3 to 300, # and 200 more of the form (1,x) where x is from 11 to 366. These # lead to a mass point in the interval (1,3), which is placed at 2. # The starting estimate has far too little mass placed here, and it takes # the EM a long time to realize that most of the weight for the first 300 # subjects goes here. With acceleration, it takes 16 iterations, without # it takes >40. (On Al's orginal data, without accel still wasn't there after # 165 iters!) # # The next 4 obs give rise to potential jumps at 100.5, 200.5, 211.5, and # 221. However, the final estimate has no mass at all on any of these. # Assume mass of a,b, and c at 2, 100.5 and 365.5, and consider the # contributions: # 123 obs that overlap a only # 137 obs that overlap a and b # 40 obs that overlap a, b, c # 1 obs that overlap b, c # 108 obs that overlap c (200, 210,200, 365, and 366 starting points) # For some trial values of a,b,c, compare the loglik to that of (a+b),0,c # First one: a^123 (a+b)^137 (a+b+c)^40 (b+c) c^108 # Second: (a+b)^123 (a+b)^137 (a+b+c)^40 c c^108 # Likelhood improves if (1 + b/a)^123 > 1+ b/c, which is true for almost # all a and c. In particular, at the solution a and c are approx .7 and # .18, respectively. # # The program can't see this coming, of course, and so iterates towards a # KM with epsilon sized jumps at 100.5, 200.5, and 211.5. Whether these # intervals should be removed during iteration, as detected, is an open # question for me. # # # True solution: mass points at 2, 365.5, 408, and 756.5, of sizes a, b, c, d # Likelihood: a^260 (a+b)^40 (b+c)^92 (b+c+d)^12 c^5 d^11 # Solution: a=0.6958, b=0.1674, c=0.1079, d=0.0289 tfun <- function(x) { if (length(x) ==3) x <- c(x, .03) x <- x/sum(x) #make probabilities sum to 1 loglik <- 260*log(x[1]) + 40*log(x[1]+x[2]) + 92*log(x[2] + x[3]) + 12*log(x[2]+x[3]+x[4]) + 5*log(x[3]) + 11*log(x[4]) -loglik #find the max, not the min } nfit <- nlminb(start=c(.7,.15, .1), tfun, lower=0, upper=1) nparm <- c(nfit$par, .03) nparm <- nparm / sum(nparm) zparm <- -diff(c(1, zfit$surv[match(c(2, 365.5, 408, 756.5), zfit$time)])) aeq(round(tfun(nparm),4), round(tfun(zparm),4)) # .0001 is the tolerance in survfit.turnbull rm(tfun, nfit, nparm, zparm, time1, time2, zfit) survival/tests/survfit2.R0000644000176200001440000000070614607006645015211 0ustar liggesuserslibrary(survival) # # Check out the Dory&Korn confidence interval option # tdata <- data.frame(time= 1:10, status=c(1,0,1,0,1,0,0,0,1,0)) fit1 <- survfit(Surv(time, status) ~1, tdata, conf.lower='modified') fit2 <- survfit(Surv(time, status) ~1, tdata) stdlow <- fit2$std.err * sqrt(c(1, 10/9, 1, 8/7, 1, 6/5, 6/4, 6/3, 1, 2/1)) lower <- exp(log(fit2$surv) - qnorm(.975)*stdlow) all.equal(fit1$lower, lower, check.attributes=FALSE) survival/tests/jasa.R0000644000176200001440000000710014725745300014335 0ustar liggesusersoptions(na.action=na.exclude) # preserve missings options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type library(survival) expect <- survexp(futime ~ 1, rmap = list(age=(accept.dt - birth.dt), sex=1, year=accept.dt, race='white'), jasa, cohort=F, ratetable=survexp.usr) survdiff(Surv(jasa$futime, jasa$fustat) ~ offset(expect)) # Now fit the 6 models found in Kalbfleisch and Prentice, p139 sfit.1 <- coxph(Surv(start, stop, event)~ (age + surgery)*transplant, jasa1, method='breslow') sfit.2 <- coxph(Surv(start, stop, event)~ year*transplant, jasa1, method='breslow') sfit.3 <- coxph(Surv(start, stop, event)~ (age + year)*transplant, jasa1, method='breslow') sfit.4 <- coxph(Surv(start, stop, event)~ (year +surgery) *transplant, jasa1, method='breslow') sfit.5 <- coxph(Surv(start, stop, event)~ (age + surgery)*transplant + year , jasa1, method='breslow') sfit.6 <- coxph(Surv(start, stop, event)~ age*transplant + surgery + year, jasa1, method='breslow') summary(sfit.1) sfit.2 summary(sfit.3) sfit.4 sfit.5 sfit.6 # Survival curve for an "average" subject, # done once as overall, once via individual method surv1 <- survfit(sfit.1, newdata=list(age=-2, surgery=0, transplant=0)) newdata <- data.frame(start=c(0,50,100), stop=c(50,100, max(jasa1$stop)), event=c(1,1,1), age=rep(-2,3), surgery=rep(0,3), transplant=rep(0,3), name=c("Smith", "Smith", "Smith")) surv2 <- survfit(sfit.1, newdata, id=name) # Have to use unclass to avoid [.survfit trying to pick curves, # remove the final element "call" because it won't match, nor will newdata ii <- match(c("newdata", "call"), names(surv1)) all.equal(unclass(surv1)[-ii], unclass(surv2)[-ii]) # Survival curve for a subject of age 50, with prior surgery, tx at 6 months # Remember that 'age' in jasa 1 was centered at 48 data <- data.frame(start=c(0,183), stop=c(183,3*365), event=c(1,1), age=c(2,2), surgery=c(1,1), transplant=c(0,1), id=c(1,1)) # This output changed in version 3.8-0; the drop in std(surv) at 183 was # incorrect summary(survfit(sfit.1, data, id=id)) # These should all give the same answer # When there are offsets, the default curve is always for someone with # the mean offset. j.age <- jasa$age -48 fit1 <- coxph(Surv(futime, fustat) ~ j.age, data=jasa) fit2 <- coxph(Surv(futime, fustat) ~ j.age, jasa, init=fit1$coef, iter=0) fit3 <- coxph(Surv(start, stop, event) ~ age, jasa1) fit4 <- coxph(Surv(start, stop, event) ~ offset(age*fit1$coef), jasa1) s1 <- survfit(fit1, list(j.age=fit3$means), censor=FALSE) s2 <- survfit(fit2, list(j.age=fit3$means), censor=FALSE) s3 <- survfit(fit3, censor=FALSE) s4 <- survfit(fit4, censor=FALSE) all.equal(s1$surv, s2$surv) all.equal(s1$surv, s3$surv) all.equal(s1$surv, s4$surv) # Still the same answer, fit multiple strata at once # Strata 1 has independent coefs of strata 2, so putting in # the other data should not affect it ll <- nrow(jasa1) ss <- rep(0:1, c(ll,ll)) tdata <- with(jasa1, data.frame(start=rep(start,2), stop=rep(stop,2), event=rep(event,2), ss=ss, age=rep(age,2), age2 = (rep(age,2))^2 * ss)) fit <- coxph(Surv(start, stop, event) ~ age*strata(ss) + age2, tdata) # Above replaced these 2 lines, which kill Splus5 as of 8/98 # Something with data frames, I expect. #fit <- coxph(Surv(rep(start,2), rep(stop,2), rep(event,2)) ~ # rep(age,2)*strata(ss) + I(rep(age,2)^2*ss) ) all.equal(fit$coef[1], fit3$coef) s5 <- survfit(fit, data.frame(age=fit3$means, age2=0, ss=0), censor=FALSE) all.equal(s5$surv[1:(s5$strata[1])], s3$surv) survival/tests/mstate.R0000644000176200001440000003075614654465430014735 0ustar liggesusers# # A tiny multi-state example # library(survival) aeq <- function(x,y) all.equal(as.vector(x), as.vector(y)) mtest <- data.frame(id= c(1, 1, 1, 2, 3, 4, 4, 4, 5, 5), t1= c(0, 4, 9, 0, 2, 0, 2, 8, 1, 3), t2= c(4, 9, 10, 5, 9, 2, 8, 9, 3, 11), st= c(1, 2, 1, 2, 3, 1, 3, 0, 2, 0)) mtest$state <- factor(mtest$st, 0:3, c("censor", "a", "b", "c")) if (FALSE) { # this graph is very useful when debugging temp <- survcheck(Surv(t1, t2, state) ~1, mtest, id=id) plot(c(0,11), c(1,5.1), type='n', xlab="Time", ylab= "Subject") with(mtest, segments(t1+.1, id, t2, id, col=as.numeric(temp$istate))) event <- subset(mtest, state!='censor') text(event$t2, event$id+.2, as.character(event$state)) } mtest <- mtest[c(1,3,2,4,5,7,6,10, 9, 8),] #not in time order mfit <- survfit(Surv(t1, t2, state) ~ 1, mtest, id=id, time0=FALSE) # True results # #time state probabilities # entry a b c entry a b c # #0 124 1 0 0 0 #1+ 1245 #2+ 1235 4 3/4 1/4 0 0 4 -> a, add 3 #3+ 123 4 5 9/16 1/4 3/16 0 5 -> b #4+ 23 14 5 6/16 7/16 3/16 0 1 -> a #5+ 3 14 5 3/16 7/16 6/16 0 2 -> b, exits #8+ 3 1 5 4 3/16 7/32 6/16 7/32 4 -> c #9+ 15 0 0 19/32 13/32 1->b, 3->c & exit # 10+ 1 5 19/64 19/64 13/32 1->a aeq(mfit$n.risk, matrix(c(4,4,3,2,1,1,0,0, 0,1,1,2,2,1,0,0, 0,0,1,1,1,1,2,1, 0,0,0,0,0,1,0,0), ncol=4)) aeq(mfit$pstate, matrix(c(24, 18, 12, 6, 6, 0, 0, 0, 8, 8, 14, 14, 7, 0, 9.5, 9.5, 0, 6, 6, 12, 12,19,9.5, 9.5, 0, 0, 0, 0, 7, 13, 13, 13)/32, ncol=4)) aeq(mfit$n.transition, matrix(c(1,0,1,0,0,0,0,0, 0,0,0,0,0,0,1,0, 0,1,0,1,0,0,0,0, 0,0,0,0,0,1,0,0, 0,0,0,0,0,1,0,0, 0,0,0,0,1,0,0,0), ncol=6)) all.equal(mfit$time, c(2, 3, 4, 5, 8, 9, 10, 11)) # Somewhat more complex. # Scramble the input data # Not everyone starts at the same time or in the same state # Case weights # tdata <- data.frame(id= c(1, 1, 1, 2, 3, 4, 4, 4, 5, 5), t1= c(0, 4, 9, 1, 2, 0, 2, 8, 1, 3), t2= c(4, 9, 10, 5, 9, 2, 8, 9, 3, 11), st= c(1, 2, 1, 2, 3, 1, 3, 0, 3, 0), i0= c(4, 1, 2, 1, 4, 4, 1, 3, 2, 3), wt= 1:10) tdata$st <- factor(tdata$st, c(0:3), labels=c("censor", "a", "b", "c")) tdata$i0 <- factor(tdata$i0, c(4, 1:3), labels=c("entry", "a", "b", "c")) if (FALSE) { #useful picture temp <- survcheck(Surv(t1, t2, st) ~1, tdata, id=id, istate=i0) plot(c(0,11), c(1,5.5), type='n', xlab="Time", ylab= "Subject") with(tdata, segments(t1+.1, id, t2, id, col=as.numeric(temp$istate))) with(subset(tdata, st!= "censor"), text(t2, id+.15, as.character(st))) with(tdata, text((t1+t2)/2, id+.25, wt)) with(subset(tdata, !duplicated(id)), text(t1, id+.15, as.character(i0))) #abline(v=c(2:5, 8:11), lty=3, col='gray') } tfun <- function(data=tdata) { reorder <- c(10, 9, 1, 2, 5, 4, 3, 7, 8, 6) new <- data[reorder,] new } # These weight vectors are in the order of tdata # w[9] is the weight for subject 5 at time 1.5, for instance # p0 is defined as all those at risk just before the first event, which in # this data set is entry:a at time 2 for id=4; id 1,2,4,5 at risk # When the functions below were written, the entry state was listed last. # Currently the entry state is first, so "[swap]" was added to the aj routines # rather than rearranging the formulas swap <- c(4,1,2,3) p0 <- function(w) c( w[1]+ w[6], w[4], w[9], 0)/ (w[1]+ w[4] + w[6] + w[9]) # aj2 = Aalen-Johansen H matrix at time 2, etc. aj2 <- function(w) { #subject 4 moves from entry to 'a' rbind(c(1, 0, 0, 0), c(0, 1, 0, 0), c(0, 0, 1, 0), c(w[6], 0, 0, w[1])/(w[1] + w[6]))[swap, swap] } aj3 <- function(w) rbind(c(1, 0, 0, 0), c(0, 0, 1, 0), # 5 moves from b to c c(0, 0, 1, 0), c(0, 0, 0, 1))[swap,swap] aj4 <- function(w) { # subject 1 moves from entry to a rbind(c(1, 0, 0, 0), c(0, 1, 0, 0), c(0, 0, 1, 0), c(w[1], 0, 0, w[5])/(w[1] + w[5])) [swap, swap] } aj5 <- function(w) { # subject 2 from a to b rbind(c(w[2]+w[7], w[4], 0, 0)/(w[2]+ w[4] + w[7]), c(0, 1, 0, 0), c(0, 0, 1, 0), c(0, 0, 0, 1))[swap, swap] } aj8 <- function(w) rbind(c(w[2], 0, w[7], 0)/(w[2]+ w[7]), # 4 a to c c(0, 1, 0, 0), c(0, 0, 1, 0), c(0, 0, 0, 1))[swap, swap] aj9 <- function(w) rbind(c(0, 1, 0, 0), # 1 a to b c(0, 1, 0, 0), c(0, 0, 1, 0), c(0, 0, 1 ,0)) [swap, swap] # 3 entry to c aj10 <- function(w)rbind(c(1, 0, 0, 0), c(1, 0, 0, 0), #1 b to a c(0, 0, 1, 0), c(0, 0, 0, 1))[swap, swap] #time state # a b c entry # #1 2 5 14 initial distribution #2 24 5 1 4 -> a, add 3 #3 24 5 13 5 from b to c #4 124 5 3 1 -> a #5 14 5 3 2 -> b, exits #8 1 45 3 4 -> c #9 1 45 1->b, 3->c & exit #10 1 45 1->a # P is a product of matrices dopstate <- function(w) { p1 <- p0(w) p2 <- p1 %*% aj2(w) p3 <- p2 %*% aj3(w) p4 <- p3 %*% aj4(w) p5 <- p4 %*% aj5(w) p8 <- p5 %*% aj8(w) p9 <- p8 %*% aj9(w) p10<- p9 %*% aj10(w) rbind(p2, p3, p4, p5, p8, p9, p10, p10) } # Check the pstate estimate w1 <- rep(1,10) mtest2 <- tfun(tdata) # scrambled order mfit2 <- survfit(Surv(t1, t2, st) ~ 1, tdata, id=id, istate=i0, time0=FALSE) # ordered aeq(mfit2$pstate, dopstate(w1)) aeq(mfit2$p0, p0(w1)) mfit2b <- survfit(Surv(t1, t2, st) ~ 1, mtest2, id=id, istate=i0, time0=FALSE) aeq(mfit2b$pstate, dopstate(w1)) aeq(mfit2b$p0, p0(w1)) mfit2b$call <- mfit2$call <- NULL all.equal(mfit2b, mfit2) aeq(mfit2$transitions, c(2,0,1,0, 0,2,0,0, 1,1,1,0, 0,0,0,2)) # Now the harder one, where subjects change weights mfit3 <- survfit(Surv(t1, t2, st) ~ 1, tdata, id=id, istate=i0, weights=wt, influence=TRUE, time0=FALSE) aeq(mfit3$p0, p0(1:10)) aeq(mfit3$pstate, dopstate(1:10)) # The derivative of a matrix product AB is (dA)B + A(dB) where dA is the # elementwise derivative of A and etc for B. # dp0 creates the derivatives of p0 with respect to each subject, a 5 by 4 # matrix # All the functions below are hand coded for a weight vector that is in # exactly the same order as the rows of mtest. # Since p0 = (w[1]+ w[6], w[4], w[9], 0)/ (w[1]+ w[4] + w[6] + w[9]) # and subject id is 1,1,1, 2, 3, 4,4,4, 5,5 # we get the derivative below # dp0 <- function(w) { # influence just before the first event p <- p0(w) w0 <- w[c(1,4,6,9)] # the 4 obs at the start, subjects 1, 2, 4, 5 rbind(c(1,0, 0, 0) - p, # subject 1 affects p[entry] c(0,1, 0, 0) - p, # subject 2 affects p[a] 0, # subject 3 affects none c(1, 0, 0, 0) - p, # subject 4 affect p[entry] c(0, 0, 1, 0) - p)/ # subject 5 affects p[b] sum(w0) } dp2 <- function(w) { h2 <- aj2(w) # H matrix at time 2 part1 <- dp0(w) %*% h2 # 1 and 4 in entry, obs 4 moves from entry to a mult <- p0(w)[1]/(w[1] + w[6]) #p(t-) / weights in state part2 <- rbind((c(1,0,0,0)- h2[1,]) * mult, 0, 0, (c(0,1,0,0) - h2[1,]) * mult, 0) part1 + part2 } dp3 <- function(w) { dp2(w) %*% aj3(w) } dp4 <- function(w) { h4 <- aj4(w) # H matrix at time 4 part1 <- dp3(w) %*% h4 # subjects 1 and 3 in state entry (obs 1 and 5) 1 moves to a mult <- dopstate(w)[2,1]/ (w[1] + w[5]) # p_1(time 4-0) / wt part2 <- rbind((c(0,1,0,0)- h4[1,]) * mult, 0, (c(1,0,0,0)- h4[1,]) * mult, 0, 0) part1 + part2 } dp5 <- function(w) { h5 <- aj5(w) # H matrix at time 5 part1 <- dp4(w) %*% h5 # subjects 124 in state a (obs 2,4,7), 2 goes to b mult <- dopstate(w)[3,2]/ (w[2] + w[4] + w[7]) part2 <- rbind((c(0,1,0,0)- h5[2,]) * mult, (c(0,0,1,0)- h5[2,]) * mult, 0, (c(0,1,0,0)- h5[2,]) * mult, 0) part1 + part2 } dp8 <- function(w) { h8 <- aj8(w) # H matrix at time 8 part1 <- dp5(w) %*% h8 # subjects 14 in state a (obs 2 &7), 4 goes to c mult <- dopstate(w)[4, 2]/ (w[2] + w[7]) part2 <- rbind((c(0,1,0,0)- h8[2,]) * mult, 0, 0, (c(0,0,0,1)- h8[2,]) * mult, 0) part1 + part2 } dp9 <- function(w) dp8(w) %*% aj9(w) dp10<- function(w) dp9(w) %*% aj10(w) # # Feb 4 2024: discovered that the variance computation above is incorrect. # Let U = influence for phat, with one row per observation in the data # The weighted per subject influence is Z(t)= BDU(t) where # B= rbind(c(1,1,1,0,0,0,0,0,0,0), # c(0,0,0,1,0,0,0,0,0,0), # c(0,0,0,0,1,0,0,0,0,0), # c(0,0,0,0,0,1,1,1,0,0), # c(0,0,0,0,0,0,0,0,1,1)) # and D= diag(1:10) # which can be summarized as "weight each row, then add over subjects". # The variance at time t is the column sums of Z^2(t) (elementwise squares) # # The code above for dp0, dp2, etc returns BU, which matches the computation of # the influence in survfitci.c. If the weight for a given subject is constant # over time, then BDU= WBU where W is the diagonal matrix of per-subject # weights: survfitci.c implicitly made this assumption, and was correct # in this case. It returned U as the influence, which matches dp0 etc. # # survfitci.c has been replaced by survfitaj.c, which uses the careful # derivations in the methods vignette, and returns BDU. # The checks below have been changed to a case with constant weights per # subject. R code to test for general weights is in mstate2.R # w1 <- tdata$id mfit4 <- survfit(Surv(t1, t2, st) ~1, tdata, id=id, weights=id, istate=i0, influence=TRUE, time0= FALSE) aeq(mfit4$influence[,1,], 1:5*dp2(w1)) #time 2 aeq(mfit4$influence[,2,], 1:5*dp3(w1)) aeq(mfit4$influence[,3,], 1:5*dp4(w1)) aeq(mfit4$influence[,4,], 1:5*dp5(w1)) aeq(mfit4$influence[,5,], 1:5*dp8(w1)) # time 8 aeq(mfit4$influence[,6,], 1:5* dp9(w1)) aeq(mfit4$influence[,7,], 1:5* dp10(w1)) aeq(mfit4$influence[,8,], 1:5* dp10(w1)) # no changes at time 11 ssq <- function(x) sqrt(sum(x^2)) temp2 <- apply(mfit4$influence.pstate, 2:3, ssq) aeq(temp2, mfit4$std.err) if (FALSE) { # old test, survfitci returned the time 0 influence as well w1 <- 1:10 aeq(mfit3$influence[,1,], dp0(w1)) aeq(mfit3$influence[,2,], dp2(w1)) aeq(mfit3$influence[,3,], dp3(w1)) aeq(mfit3$influence[,4,], dp4(w1)) aeq(mfit3$influence[,5,], dp5(w1)) aeq(mfit3$influence[,6,], dp8(w1)) aeq(mfit3$influence[,7,], dp9(w1)) aeq(mfit3$influence[,8,], dp10(w1)) aeq(mfit3$influence[,9,], dp10(w1)) # no changes at time 11 } # end of if (FALSE) # The cumulative hazard at each time point is remapped from a matrix # into a vector (in survfit) # First check out the names nstate <- length(mfit4$states) temp <- matrix(0, nstate, nstate) indx1 <- match(rownames(mfit4$transitions), mfit4$states) indx2 <- match(colnames(mfit4$transitions), mfit4$states, nomatch=0) temp[indx1, indx2] <- mfit4$transitions[, indx2>0] # temp is an nstate by nstate version of the transitions matrix from <- row(temp)[temp>0] to <- col(temp)[temp>0] all.equal(colnames(mfit4$cumhaz), paste(from, to, sep=':')) # check the cumulative hazard temp <- mfit4$n.risk[,from] hazard <- mfit4$n.transition/ifelse(temp==0, 1, temp) aeq(apply(hazard, 2, cumsum), mfit4$cumhaz) survival/tests/multi2.R0000644000176200001440000001534714654222147014647 0ustar liggesuserslibrary(survival) aeq <- function(x, y, ...) all.equal(as.vector(x), as.vector(y), ...) # Check that estimates from a multi-state model agree with single state models # Use a simplified version of the myeloid data set tdata <- tmerge(myeloid[,1:3], myeloid, id=id, death=event(futime,death), priortx = tdc(txtime), sct= event(txtime)) tdata$event <- factor(with(tdata, sct + 2*death), 0:2, c("censor", "sct", "death")) fit <- coxph(Surv(tstart, tstop, event) ~ trt + sex, tdata, id=id, iter=4, x=TRUE, robust=FALSE) # Multi-state now defaults to breslow rather than efron fit12 <- coxph(Surv(tstart, tstop, event=='sct') ~ trt + sex, tdata, subset=(priortx==0), iter=4, x=TRUE, method='breslow') fit13 <- coxph(Surv(tstart, tstop, event=='death') ~ trt + sex, tdata, subset=(priortx==0), iter=4, x=TRUE, method= 'breslow') fit23 <- coxph(Surv(tstart, tstop, event=='death') ~ trt + sex, tdata, subset=(priortx==1), iter=4, x=TRUE, method="breslow") aeq(coef(fit), c(coef(fit12), coef(fit13), coef(fit23))) aeq(fit$loglik, fit12$loglik + fit13$loglik + fit23$loglik) temp <- matrix(0, 6,6) temp[1:2, 1:2] <- fit12$var temp[3:4, 3:4] <- fit13$var temp[5:6, 5:6] <- fit23$var aeq(fit$var, temp) # check out model.frame fita <- coxph(Surv(tstart, tstop, event) ~ trt, tdata, id=id) fitb <- coxph(Surv(tstart, tstop, event) ~ trt, tdata, id=id, model=TRUE) all.equal(model.frame(fita), fitb$model) # model.frame fails due to an interal rule in R, factors vs characters # result when the xlev arg is in the call. So model.frame(fita) has trt # as a factor, not character. #check residuals indx1 <- which(fit$rmap[,2] ==1) indx2 <- which(fit$rmap[,2] ==2) indx3 <- which(fit$rmap[,2] ==3) aeq(residuals(fit), c(residuals(fit12), residuals(fit13), residuals(fit23))) aeq(residuals(fit)[indx1], residuals(fit12)) aeq(residuals(fit)[indx2], residuals(fit13)) aeq(residuals(fit)[indx3], residuals(fit23)) # score residuals temp <- residuals(fit, type='score') aeq(temp[indx1, 1:2], residuals(fit12, type='score')) aeq(temp[indx2, 3:4], residuals(fit13, type='score')) aeq(temp[indx3, 5:6], residuals(fit23, type='score')) all(temp[indx1, 3:6] ==0) all(temp[indx2, c(1,2,5,6)] ==0) all(temp[indx3, 1:4]==0) temp <- residuals(fit, type="dfbeta") all(temp[indx1, 3:6] ==0) all(temp[indx2, c(1,2,5,6)] ==0) all(temp[indx3, 1:4]==0) aeq(temp[indx1, 1:2], residuals(fit12, type='dfbeta')) aeq(temp[indx2, 3:4], residuals(fit13, type='dfbeta')) aeq(temp[indx3, 5:6], residuals(fit23, type='dfbeta')) temp <- residuals(fit, type="dfbetas") all(temp[indx1, 3:6] ==0) all(temp[indx2, c(1,2,5,6)] ==0) all(temp[indx3, 1:4]==0) aeq(temp[indx1, 1:2], residuals(fit12, type='dfbetas')) aeq(temp[indx2, 3:4], residuals(fit13, type='dfbetas')) aeq(temp[indx3, 5:6], residuals(fit23, type='dfbetas')) # Schoenfeld and scaled shoenfeld have one row per event sr1 <- residuals(fit12, type="schoenfeld") sr2 <- residuals(fit13, type="schoenfeld") sr3 <- residuals(fit23, type="schoenfeld") end <- rep(1:3, c(nrow(sr1), nrow(sr2), nrow(sr3))) temp <- residuals(fit, type="schoenfeld") aeq(temp[end==1, 1:2], sr1) aeq(temp[end==2, 3:4], sr2) aeq(temp[end==3, 5:6], sr3) all(temp[end==1, 3:6] ==0) all(temp[end==2, c(1,2,5,6)] ==0) all(temp[end==3, 1:4] ==0) #The scaled Schoenfeld don't agree, due to the use of a robust # variance in fit, regular variance in fit12, fit13 and fit23 #Along with being scaled by different event counts xfit <- fit xfit$var <- xfit$naive.var if (FALSE) { xfit <- fit xfit$var <- xfit$naive.var # fixes the first issue temp <- residuals(xfit, type="scaledsch") aeq(d1* temp[sindx1, 1:2], residuals(fit12, type='scaledsch')) aeq(temp[sindx2, 3:4], residuals(fit13, type='scaledsch')) aeq(temp[sindx3, 5:6], residuals(fit23, type='scaledsch')) } if (FALSE) { # the predicted values are a work in progress # predicted values differ because of different centering c0 <- sum(fit$mean * coef(fit)) c12 <- sum(fit12$mean * coef(fit12)) c13 <- sum(fit13$mean* coef(fit13)) c23 <- sum(fit23$mean * coef(fit23)) aeq(predict(fit)+c0, c(predict(fit12)+c12, predict(fit13)+c13, predict(fit23)+c23)) aeq(exp(predict(fit)), predict(fit, type='risk')) # expected survival is independent of centering aeq(predict(fit, type="expected"), c(predict(fit12, type="expected"), predict(fit13, type="expected"), predict(fit23, type="expected"))) } # predict(type='terms') is a matrix, centering changes as well if (FALSE) { temp <- predict(fit, type='terms') all(temp[indx1, 3:6] ==0) all(temp[indx2, c(1,2,5,6)] ==0) all(temp[indx3, 1:4]==0) aeq(temp[indx1, 1:2], predict(fit12, type='terms')) aeq(temp[indx2, 3:4], predict(fit13, type='terms')) aeq(temp[indx3, 5:6], predict(fit23, type='terms')) } # end of prediction section # The global and per strata zph tests will differ for the KM or rank # transform, because the overall and subset will have a different list # of event times, which changes the transformed value for all of them. # But identity and log are testable. test_a <- cox.zph(fit, transform="log",global=FALSE) test_a12 <- cox.zph(fit12, transform="log",global=FALSE) test_a13 <- cox.zph(fit13, transform="log", global=FALSE) test_a23 <- cox.zph(fit23, transform="log", global=FALSE) aeq(test_a$y[test_a$strata==1, 1:2], test_a12$y) aeq(test_a$table[1:2,], test_a12$table) aeq(test_a$table[3:4,], test_a13$table) aeq(test_a$table[5:6,], test_a23$table) # check cox.zph fit - transform = 'identity' test_b <- cox.zph(fit, transform="identity",global=FALSE) test_b12 <- cox.zph(fit12, transform="identity",global=FALSE) test_b13 <- cox.zph(fit13, transform="identity", global=FALSE) test_b23 <- cox.zph(fit23, transform="identity", global=FALSE) aeq(test_b$table[1:2,], test_b12$table) aeq(test_b$table[3:4,], test_b13$table) aeq(test_b$table[5:6,], test_b23$table) # check out subscripting of a multi-state zph cname <- c("table", "x", "time", "y", "var") sapply(cname, function(x) aeq(test_b[1:2]$x, test_b12$x)) sapply(cname, function(x) aeq(test_b[3:4]$x, test_b13$x)) sapply(cname, function(x) aeq(test_b[5:6]$x, test_b23$x)) # check model.matrix mat1 <- model.matrix(fit) all.equal(mat1, fit$x) # Check that the internal matix agrees (uses stacker, which is not exported) mat2 <- model.matrix(fit12) mat3 <- model.matrix(fit13) mat4 <- model.matrix(fit23) # first reconstruct istate tcheck <- survcheck(Surv(tstart, tstop, event) ~ 1, tdata, id=id) temp <- survival:::stacker(fit$cmap, fit$smap, as.numeric(tcheck$istate), fit$x, fit$y, NULL, fit$states) aeq(temp$X[temp$transition==1, 1:2], mat2) aeq(temp$X[temp$transition==2, 3:4], mat3) aeq(temp$X[temp$transition==3, 5:6], mat4) survival/tests/expected2.R0000644000176200001440000000467114607006645015315 0ustar liggesuserslibrary(survival) # # A Cox model with a factor, followed by survexp. # pfit2 <- coxph(Surv(time, status > 0) ~ trt + log(bili) + log(protime) + age + platelet + sex, data = pbc) esurv <- survexp(~ trt, ratetable = pfit2, data = pbc) temp <- pbc temp$sex2 <- factor(as.numeric(pbc$sex), levels=2:0, labels=c("f", "m", "unknown")) esurv2 <- survexp(~ trt, ratetable = pfit2, data = temp, rmap=list(sex=sex2)) # The call components won't match, which happen to be first all.equal(unclass(esurv)[-1], unclass(esurv2)[-1]) # Check that the ratetableDate function is okay # Datedate <- function(x) { # Dates have an origin of 1/1/1970, dates of 1/1/1960 offset <- as.numeric(as.Date("1970-01-01") - as.Date("1960-01-01")) y <- as.numeric(x) + offset class(y) <- "date" y } as.data.frame.date <- as.data.frame.vector # needed to make the functions work n <- nrow(lung) tdata <- data.frame(age=lung$age + (1:n)/365.25, sex = c('male', 'female')[lung$sex], ph.ecog = lung$ph.ecog, time = lung$time*3, status = lung$status, entry = as.Date("1940/01/01") + (n:1)*50) tdata$entry2 <- as.POSIXct(tdata$entry) tdata$entry3 <- Datedate(tdata$entry) p1 <- pyears(Surv(time, status) ~ ph.ecog, data=tdata, ratetable=survexp.us, rmap= list(age=age*365.25, sex=sex, year=entry)) p2 <- pyears(Surv(time, status) ~ ph.ecog, data=tdata, ratetable=survexp.us, rmap= list(age=age*365.25, sex=sex, year=entry2)) p3 <- pyears(Surv(time, status) ~ ph.ecog, data=tdata, ratetable=survexp.us, rmap= list(age=age*365.25, sex=sex, year=entry3)) all.equal(p1$expected, p2$expected) all.equal(p1$expected, p3$expected) # Now a ratetable with ordinary dates rather than US census style year trate <- survexp.us attr(trate, 'type') <- c(2,1,3) p4 <- pyears(Surv(time, status) ~ ph.ecog, data=tdata, ratetable=trate, rmap= list(age=age*365.25, sex=sex, year=entry)) p5 <- pyears(Surv(time, status) ~ ph.ecog, data=tdata, ratetable=trate, rmap= list(age=age*365.25, sex=sex, year=entry2)) p6 <- pyears(Surv(time, status) ~ ph.ecog, data=tdata, ratetable=trate, rmap= list(age=age*365.25, sex=sex, year=entry3)) #all.equal(p1$expected, p4$expected) # this won't be true, US special is special all.equal(p4$expected, p5$expected) all.equal(p5$expected, p6$expected) survival/tests/pspline.Rout.save0000644000176200001440000000552414613770353016570 0ustar liggesusers R Under development (unstable) (2020-06-10 r78681) -- "Unsuffered Consequences" Copyright (C) 2020 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(survival) > # > # Tests with the pspline function, to verify the prediction aspects > # > options(na.action=na.exclude) > aeq <- function(x,y, ...) all.equal(as.vector(x), as.vector(y), ...) > > spfit <- coxph(Surv(time, status) ~ pspline(age) + ph.ecog, lung) > > spfit2 <- coxph(Surv(time, status) ~ pspline(age) + ph.ecog, lung, x=TRUE) > x2 <- model.matrix(spfit) > all.equal(spfit2$x, x2) [1] TRUE > > keep <- (lung$age < 60) > x3 <- model.matrix(spfit, data=lung[keep,]) > attr(x3, 'assign') <- NULL #subscripting loses the assign attr below > all.equal(napredict(spfit$na.action,x2)[keep,], x3) [1] TRUE > > p2 <- predict(spfit, newdata=lung[keep,]) > aeq(p2, predict(spfit)[keep]) [1] TRUE > > > p3 <- survfit(spfit) > p4 <- survfit(spfit, newdata=lung[1:2,]) > temp <- scale(x2[1:2,], center=spfit$means, scale=FALSE)%*% coef(spfit) > aeq(p3$time, p4$time) [1] TRUE > aeq(outer(-log(p3$surv), exp(temp), '*'), -log(p4$surv)) [1] TRUE > > # Check out model.frame > spfit3 <- coxph(Surv(time, status) ~ pspline(age) + sex, lung, + model=TRUE) #avoid the missing value > m2 <- model.frame(spfit3, data=lung[keep,]) > all.equal(m2, spfit3$model[keep,], check.attributes=FALSE) [1] TRUE > > # > # Test of residuals, in response to a reported bug. > # These are three progam paths that should all lead to the same C routine > fit <- coxph(Surv(tstart, tstop, status) ~ sex + treat + pspline(age), cgd) > fit2 <- coxph(Surv(tstart, tstop, status) ~ fit$linear, cgd, iter=0, init=1) > fit3 <- coxph(Surv(tstart, tstop, status) ~ offset(fit$linear), cgd) > all.equal(fit$resid, fit2$resid) [1] TRUE > all.equal(fit$resid, fit3$resid) [1] TRUE > > # > # Check using coxph.detail. The matrix multiply below only is > # valid for the breslow approximation. > fit4 <- coxph(Surv(tstart, tstop, status) ~ sex + treat + pspline(age), + cgd, ties='breslow') > dt <- coxph.detail(fit4, riskmat=TRUE) > > # the results of coxph.detail used to be in time order, now are in data set > # order > rscore <- exp(fit4$linear) > exp4 <- (rscore *dt$riskmat) %*% dt$hazard > r4 <- cgd$status - exp4 > aeq(r4, fit4$resid) [1] TRUE > > proc.time() user system elapsed 0.853 0.088 0.934 survival/tests/coxsurv2.Rout.save0000644000176200001440000000554214613770353016711 0ustar liggesusers R Under development (unstable) (2024-04-17 r86441) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: aarch64-unknown-linux-gnu R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(survival) > # > # Check that the survival curves from a Cox model with beta=0 > # match ordinary survival > # > # Aalen > surv1 <- survfit(Surv(time,status) ~ sex, data=lung, stype=2) > fit1 <- coxph(Surv(time, status) ~ age + strata(sex), data=lung, iter=0, + ties='breslow') > fit1$var <- 0*fit1$var #sneaky, causes the extra term in the Cox variance > # calculation to be zero > surv2 <- survfit(fit1, stype=2) > surv3 <- survfit(fit1) > > arglist <- c('n', 'time', 'n.risk','n.event', 'n.censor', 'surv', 'strata', + 'std.err', 'upper', 'lower') > all.equal(unclass(surv1)[arglist], unclass(surv2)[arglist]) [1] TRUE > all.equal(unclass(surv1)[arglist], unclass(surv3)[arglist]) [1] TRUE > > > # Efron method > surv1 <- survfit(Surv(time,status) ~ sex, data=lung, stype=2, ctype=2) > surv2 <- survfit(fit1, ctype=2) > all.equal(unclass(surv1)[arglist], unclass(surv2)[arglist]) [1] TRUE > > # Kaplan-Meier > surv1 <- survfit(Surv(time,status) ~ sex, data=lung) > surv2 <- survfit(fit1, stype=1) > all.equal(unclass(surv1)[arglist], unclass(surv2)[arglist]) [1] TRUE > > > # Now add some random weights > rwt <- runif(nrow(lung), .5, 3) > surv1 <- survfit(Surv(time,status) ~ sex, data=lung, stype=2, weights=rwt, + robust=FALSE) > fit1 <- coxph(Surv(time, status) ~ age + strata(sex), data=lung, iter=0, + ties='breslow', weights=rwt, robust=FALSE) > fit1$var <- 0*fit1$var #sneaky > surv2 <- survfit(fit1, stype=2, ctype=1) > surv3 <- survfit(fit1) > > all.equal(unclass(surv1)[arglist], unclass(surv2)[arglist]) [1] TRUE > all.equal(unclass(surv1)[arglist], unclass(surv3)[arglist]) [1] TRUE > > > # Efron method > surv1 <- survfit(Surv(time,status) ~ sex, data=lung, stype=2, ctype=2, + weights=rwt, robust=FALSE) > surv2 <- survfit(fit1, ctype=2, robust=FALSE) > all.equal(unclass(surv1)[arglist], unclass(surv2)[arglist]) [1] TRUE > > # Kaplan-Meier > surv1 <- survfit(Surv(time,status) ~ sex, data=lung, weights=rwt, robust=FALSE) > surv2 <- survfit(fit1, stype=1, robust=FALSE) > all.equal(unclass(surv1)[arglist], unclass(surv2)[arglist]) [1] TRUE > > > proc.time() user system elapsed 0.412 0.023 0.433 survival/tests/overlap.Rout.save0000644000176200001440000000425114607006645016561 0ustar liggesusers R Under development (unstable) (2019-08-23 r77061) -- "Unsuffered Consequences" Copyright (C) 2019 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > # > # Make sure that useless intervals do not cause issues, i.e., any that do > # not overlap at least one event time > # > library(survival) > test2 <- data.frame(time1 =c(1, 2, 5, 2, 1, 7, 3, 4, 8, 8, 3), + time2 =c(2, 3, 6, 7, 8, 9, 9, 9,14,17, 5), + event =c(1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0), + x =c(1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 500) ) > > # The data set is the same as book3.R, except for the wild observation > # with x=500 whose time interval of (4,5) overlaps no events. > > fit1 <- coxph(Surv(time1, time2, event) ~ x, test2, subset=(x<100)) > fit2 <- coxph(Surv(time1, time2, event) ~ x, test2) > > ii <- match(c("coefficients", "var", "loglik", "score", "iter", + "wald.test", "concordance"), names(fit1)) > all.equal(fit1[ii], fit2[ii]) [1] TRUE > all.equal(c(fit1$residuals,0), fit2$residuals, check.attributes=FALSE) [1] TRUE > > # The mean differs condiderably, and so to the linear predictors > > # Now the same with a penalized model > fit3 <- coxph(Surv(time1, time2, event) ~ ridge(x, theta=.1), test2, + subset= (x< 100)) > fit4 <- coxph(Surv(time1, time2, event) ~ ridge(x, theta=.1), test2) > fit5 <- coxph(Surv(time1,time2, event) ~ x, test2, + iter=0, init=fit4$coef) > > all.equal(fit3[ii], fit4[ii]) [1] TRUE > all.equal(c(fit3$residuals,0), fit4$residuals, check.attributes=FALSE) [1] TRUE > all.equal(fit4$residuals, fit5$residuals) [1] TRUE > > proc.time() user system elapsed 0.997 0.056 1.336 survival/tests/fr_simple.R0000644000176200001440000000432414607006645015405 0ustar liggesusersoptions(na.action=na.exclude) # preserve missings options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type library(survival) # # Test the logic of the penalized code by fitting some no-frailty models # (theta=0). It should give exactly the same answers as 'ordinary' coxph. # test1 <- data.frame(time= c(4, 3,1,1,2,2,3), status=c(1,NA,1,0,1,1,0), x= c(0, 2,1,1,1,0,0)) test2 <- data.frame(start=c(1, 2, 5, 2, 1, 7, 3, 4, 8, 8), stop =c(2, 3, 6, 7, 8, 9, 9, 9,14,17), event=c(1, 1, 1, 1, 1, 1, 1, 0, 0, 0), x =c(1, 0, 0, 1, 0, 1, 1, 1, 0, 0) ) zz <- zz2 <- rep(0, nrow(test1)) tfit1 <- coxph(Surv(time,status) ~x, test1, eps=1e-7) tfit2 <- coxph(Surv(time,status) ~x + frailty(zz, theta=0, sparse=T), test1) tfit3 <- coxph(Surv(zz,time,status) ~x + frailty(zz2, theta=0, sparse=T), test1) temp <- c('coefficients', 'var', 'loglik', 'linear.predictors', 'means', 'n', 'concordance') all.equal(tfit1[temp], tfit2[temp]) all.equal(tfit2[temp], tfit3[temp]) zz <- rep(0, nrow(test2)) tfit1 <- coxph(Surv(start, stop, event) ~x, test2, eps=1e-7) tfit2 <- coxph(Surv(start, stop, event) ~ x + frailty(zz, theta=0, sparse=T), test2) all.equal(tfit1[temp], tfit2[temp]) # # Repeat the above tests, but with a strata added # Because the data set is simply doubled, the loglik will double, # beta is the same, variance is halved. # test3 <- rbind(test1, test1) test3$x2 <- rep(1:2, rep(nrow(test1),2)) zz <- zz2 <- rep(0, nrow(test3)) tfit1 <- coxph(Surv(time,status) ~x + strata(x2), test3, eps=1e-7) tfit2 <- coxph(Surv(time,status) ~x + frailty(zz, theta=0, sparse=T) + strata(x2), test3) tfit3 <- coxph(Surv(zz,time,status) ~x + frailty(zz2, theta=0, sparse=T) + strata(x2), test3) all.equal(tfit1[temp], tfit2[temp]) all.equal(tfit2[temp], tfit3[temp]) test4 <- rbind(test2, test2) test4$x2 <- rep(1:2, rep(nrow(test2),2)) zz <- rep(0, nrow(test4)) tfit1 <- coxph(Surv(start, stop, event) ~x, test4, eps=1e-7) tfit2 <- coxph(Surv(start, stop, event) ~ x + frailty(zz, theta=0, sparse=T), test4) all.equal(tfit1[temp], tfit2[temp]) rm(test3, test4, tfit1, tfit2, tfit3, temp, zz, zz2) survival/tests/survreg1.R0000644000176200001440000000706514607006645015210 0ustar liggesusersoptions(na.action=na.exclude) # preserve missings options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type library(survival) aeq <- function(x,y, ...) all.equal(as.vector(x), as.vector(y), ...) # fit1 and fit4 should follow identical iteration paths fit1 <- survreg(Surv(futime, fustat) ~ age + ecog.ps, ovarian, x=TRUE) fit4 <- survreg(Surv(log(futime), fustat) ~age + ecog.ps, ovarian, dist='extreme') aeq(fit1$coef, fit4$coef) aeq(fit1$var, fit4$var) resid(fit1, type='working') resid(fit1, type='response') resid(fit1, type='deviance') resid(fit1, type='dfbeta') resid(fit1, type='dfbetas') resid(fit1, type='ldcase') resid(fit1, type='ldresp') resid(fit1, type='ldshape') resid(fit1, type='matrix') aeq(resid(fit1, type='working'),resid(fit4, type='working')) #aeq(resid(fit1, type='response'), resid(fit4, type='response'))#should differ aeq(resid(fit1, type='deviance'), resid(fit4, type='deviance')) aeq(resid(fit1, type='dfbeta'), resid(fit4, type='dfbeta')) aeq(resid(fit1, type='dfbetas'), resid(fit4, type='dfbetas')) aeq(resid(fit1, type='ldcase'), resid(fit4, type='ldcase')) aeq(resid(fit1, type='ldresp'), resid(fit4, type='ldresp')) aeq(resid(fit1, type='ldshape'), resid(fit4, type='ldshape')) aeq(resid(fit1, type='matrix'), resid(fit4, type='matrix')) # Test suggested by Achim Zieleis: residuals should give a score vector r1 <-residuals(fit1, type='matrix') score <- c(as.vector(r1[,c("dg")]) %*% model.matrix(fit1), "log(scale)" = sum(r1[,"ds"])) all(abs(score) < 1e-6) # repeat this with Gaussian (no transform = different code path) tfit <- survreg(Surv(durable, durable>0, type='left') ~age + quant, data=tobin, dist='gaussian') r2 <- residuals(tfit, type='matrix') score <- c(as.vector(r2[, "dg"]) %*% model.matrix(tfit), "log(scale)" = sum(r2[,"ds"])) all(score < 1e-6) # # Some tests of the quantile residuals # # These should agree exactly with Ripley and Venables' book fit1 <- survreg(Surv(time, status) ~ temp, data= imotor) summary(fit1) # # The first prediction has the SE that I think is correct # The third is the se found in an early draft of Ripley; fit1 ignoring # the variation in scale estimate, except via it's impact on the # upper left corner of the inverse information matrix. # Numbers 1 and 3 differ little for this dataset # predict(fit1, data.frame(temp=130), type='uquantile', p=c(.5, .1), se=T) fit2 <- survreg(Surv(time, status) ~ temp, data=imotor, scale=fit1$scale) predict(fit2, data.frame(temp=130), type='uquantile', p=c(.5, .1), se=T) fit3 <- fit2 fit3$var <- fit1$var[1:2,1:2] predict(fit3, data.frame(temp=130), type='uquantile', p=c(.5, .1), se=T) pp <- seq(.05, .7, length=40) xx <- predict(fit1, data.frame(temp=130), type='uquantile', se=T, p=pp) #matplot(pp, cbind(xx$fit, xx$fit+2*xx$se, xx$fit - 2*xx$se), type='l') # # Now try out the various combinations of strata, #predicted, and # number of quantiles desired # fit1 <- survreg(Surv(time, status) ~ inst + strata(inst) + age + sex, lung) qq1 <- predict(fit1, type='quantile', p=.3, se=T) qq2 <- predict(fit1, type='quantile', p=c(.2, .3, .4), se=T) aeq <- function(x,y) all.equal(as.vector(x), as.vector(y)) aeq(qq1$fit, qq2$fit[,2]) aeq(qq1$se.fit, qq2$se.fit[,2]) qq3 <- predict(fit1, type='quantile', p=c(.2, .3, .4), se=T, newdata= lung[1:5,]) aeq(qq3$fit, qq2$fit[1:5,]) qq4 <- predict(fit1, type='quantile', p=c(.2, .3, .4), se=T, newdata=lung[7,]) aeq(qq4$fit, qq2$fit[7,]) qq5 <- predict(fit1, type='quantile', p=c(.2, .3, .4), se=T, newdata=lung) aeq(qq2$fit, qq5$fit) aeq(qq2$se.fit, qq5$se.fit) survival/tests/multistate.Rout.save0000644000176200001440000001146214654222147017305 0ustar liggesusers R Under development (unstable) (2024-06-14 r86747) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: aarch64-unknown-linux-gnu R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > # > # Tests for multi-state Cox models > # The default for multi-state is now ties='breslow' > bb <- "breslow" # I'm a lazy typist > library(survival) > > aeq <- function(x,y, ...) all.equal(as.vector(x), as.vector(y), ...) > > # There are a few subjects with progression and death on the same day. In the > # usual multi-state data set only one will count. > data1 <- mgus2 > data1$etime <- with(data1, ifelse(pstat==1, ptime, futime)) > data1$event <- factor(ifelse(data1$pstat==1, 1, 2L*data1$death), + 0:2, c("censor", "PCM", "death")) > > # direct data set with 2 rows per subject, much like mstate package would do > data2 <- mgus2[rep(1:nrow(mgus2) ,2), c("id", "age", "sex", "mspike")] > data2$time <- rep(data1$etime, 2) > data2$status <- 1* c(data1$event=="PCM", data1$event=="death") > data2$type <- rep(c(2:3), each=nrow(mgus2)) > > fit1 <- coxph(Surv(etime, event) ~ age + sex + mspike, data1, id=id, x=TRUE, + robust=FALSE) > fit1a <- coxph(Surv(etime, event=="PCM") ~ age + sex + mspike, ties=bb, data1) > fit1b <- coxph(Surv(etime, event=='death') ~ age + sex + mspike, ties=bb, data1) > fit1c <- coxph(Surv(time, status) ~ strata(type)/(age + sex+ mspike), + data2, x=TRUE, ties=bb) > > aeq(fit1$loglik, fit1a$loglik + fit1b$loglik) [1] TRUE > aeq(fit1$coef, c(fit1a$coef, fit1b$coef)) [1] TRUE > aeq(fit1$var[1:3, 1:3], fit1a$var) [1] TRUE > aeq(fit1$var[4:6, 4:6], fit1b$var) [1] TRUE > aeq(fit1$coef[c(1,4,2,5,3,6)], fit1c$coef) [1] TRUE > > # force a common age effect across all states > fit2 <- coxph(list(Surv(etime, event) ~ sex, + 1:0 ~ age / common), + data1, id=id) > > data2 <- rbind(cbind(data1, status= (data1$event=="PCM"), etype=1), + cbind(data1, status= (data1$event=='death'), etype=2)) > fit2a <- coxph(Surv(etime, status) ~ age + strata(etype)/sex, data2, ties=bb) > > aeq(coef(fit2), coef(fit2a)[c(2,1,3)]) # not in the same order [1] TRUE > aeq(fit2$loglik, fit2a$loglik) [1] TRUE > > #same fit in more complex ways > data1$entry <- "Entry" > fit2b <- coxph(list(Surv(etime, event) ~ sex, + "Entry":"PCM" + "Entry":"death" ~ age / common), + istate=entry, data1, id=id) > fit2c <- coxph(list(Surv(etime, event) ~ sex, + "Entry":state(c("PCM", "death")) ~ age / common), + istate=entry, data1, id=id) > > aeq(fit2b$loglik, fit2$loglik) [1] TRUE > aeq(fit2c$coef, fit2$coef) [1] TRUE > > # mspike size as a covariate for PCM only > # first, 4 different ways to write the same > fit3 <- coxph(list(Surv(etime, event) ~ age + sex, + 1:state("PCM") ~ mspike), + data1, id=id) > fit3b <- coxph(list(Surv(etime, event) ~ age + sex, + 1:"PCM" ~ mspike), + data1, id=id) > fit3c <- coxph(list(Surv(etime, event) ~ age + sex, + 1:c("PCM") ~ mspike), + data1, id=id) > fit3d <- coxph(list(Surv(etime, event) ~ age + sex + mspike, + 1:3 ~ -mspike), data1, id=id) > > aeq(fit3b$coef, fit3$coef) [1] TRUE > aeq(fit3c$coef, fit3$coef) [1] TRUE > aeq(fit3d$coef, fit3$coef) [1] TRUE > > data3 <- data2 > data3$mspike[data3$etype==2] <- 0 > fit3a <- coxph(Surv(etime, status) ~ strata(etype)/(age + sex + mspike), + data3, ties=bb) > aeq(fit3$loglik, fit3a$loglik) [1] TRUE > aeq(fit3$coef, fit3a$coef[c(1,3,5,2,4)]) [1] TRUE > > # models with strata > test1 <- coxph(Surv(etime, event=="PCM") ~ age + mspike + strata(sex), + data1, ties=bb) > test2 <- coxph(Surv(etime, event=="death") ~ age + strata(sex), data1, ties=bb) > > sfit1 <- coxph(list(Surv(etime, event) ~ age + strata(sex), + 1:state("PCM") ~ mspike), + data1, id=id, ties=bb) > aeq(coef(sfit1), c(coef(test1), coef(test2))) [1] TRUE > > test3 <- coxph(Surv(etime, event=="death") ~ age +sex, data1, ties=bb) > sfit2 <- coxph(list(Surv(etime, event) ~ age + sex, + 1:2 ~ mspike + strata(sex) - sex), data1, id=id) > aeq(coef(sfit2), c(coef(test1), coef(test3))) [1] TRUE > > > proc.time() user system elapsed 0.581 0.035 0.614 survival/tests/nsk.Rout.save0000644000176200001440000000262114607006645015703 0ustar liggesusers R Under development (unstable) (2020-12-17 r79644) -- "Unsuffered Consequences" Copyright (C) 2020 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(survival) > library(splines) > > # the nsk function should give the same solution as ns, but with a different > # parameterization > # > xx <- runif(500, 1, 100) > yy <- 10*log(xx) + rnorm(500, 0, 2) > tdata <- data.frame(xx=xx, yy=yy) > fit1 <- lm(yy ~ ns(xx, df=4), tdata, model=TRUE) > fit2 <- lm(yy ~ nsk(xx, df=4, b=0), tdata) > all.equal(predict(fit1), predict(fit2)) # same solution [1] TRUE > > xattr <- attributes(fit1$model[[2]]) > allknots <- sort(c(xattr$knots, xattr$Boundary.knots)) # knots that were used > pred.knot <- predict(fit1, newdata=list(xx=allknots)) > all.equal(pred.knot[-1] - pred.knot[1], coef(fit2)[-1], + check.attributes = FALSE) [1] TRUE > > > proc.time() user system elapsed 0.857 0.036 0.887 survival/tests/r_lung.R0000644000176200001440000000300714607006645014710 0ustar liggesusersoptions(na.action=na.exclude) # preserve missings options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type library(survival) aeq <- function(x,y, ...) all.equal(as.vector(x), as.vector(y), ...) lfit2 <- survreg(Surv(time, status) ~ age + ph.ecog + strata(sex), lung) lfit3 <- survreg(Surv(time, status) ~ sex + (age+ph.ecog)*strata(sex), lung) lfit4 <- survreg(Surv(time, status) ~ age + ph.ecog , lung, subset=(sex==1)) lfit5 <- survreg(Surv(time, status) ~ age + ph.ecog , lung, subset=(sex==2)) if (exists('censorReg')) { lfit1 <- censorReg(censor(time, status) ~ age + ph.ecog + strata(sex),lung) aeq(lfit4$coef, lfit1[[1]]$coef) aeq(lfit4$scale, lfit1[[1]]$scale) aeq(c(lfit4$scale, lfit5$scale), sapply(lfit1, function(x) x$scale)) } aeq(c(lfit4$scale, lfit5$scale), lfit3$scale ) # # Test out ridge regression and splines # lfit0 <- survreg(Surv(time, status) ~1, lung) lfit1 <- survreg(Surv(time, status) ~ age + ridge(ph.ecog, theta=5), lung) lfit2 <- survreg(Surv(time, status) ~ sex + ridge(age, ph.ecog, theta=1), lung) lfit3 <- survreg(Surv(time, status) ~ sex + age + ph.ecog, lung) lfit0 lfit1 lfit2 lfit3 xx <- pspline(lung$age, nterm=3, theta=.3) xx <- matrix(unclass(xx), ncol=ncol(xx)) # the raw matrix lfit4 <- survreg(Surv(time, status) ~xx, lung) lfit5 <- survreg(Surv(time, status) ~age, lung) lfit6 <- survreg(Surv(time, status)~pspline(age, df=2), lung) lfit7 <- survreg(Surv(time, status) ~ offset(lfit6$lin), lung) lfit4 lfit5 lfit6 signif(lfit7$coef,6) survival/tests/coxsurv6.R0000644000176200001440000002232514654222147015224 0ustar liggesuserslibrary(survival) aeq <- function(x, y, ...) all.equal(as.vector(x), as.vector(y), ...) # Test the survival curve for a fit with shared hazards. # Use the pbcseq data set, and turn bilirubin into a time-dependent state with # 4 levels, and a shared baseline hazard for the 4 transitions to death. # The subtlety is that coefficients for a shared (proportional) baseline hazard # are attached to a state, not to an observation. # (A bilirubin value of <1 is normal.) pbc1 <- pbcseq pbc1$bili4 <- cut(pbc1$bili, c(0,1, 2,4, 100), c("normal", "1-2", "2-4", ">4")) ptemp <- subset(pbc1, !duplicated(id)) # first row of each pbc2 <- tmerge(ptemp[, c("id", "age", "sex")], ptemp, id, death= event(futime, status==2)) pbc2 <- tmerge(pbc2, pbc1, id=id, bili = tdc(day, bili), bili4 = tdc(day, bili4), bstat = event(day, as.numeric(bili4))) btemp <- with(pbc2, ifelse(death, 5, bstat)) # a row with the same starting and ending bili4 level is not an event b2 <- ifelse(((as.numeric(pbc2$bili4)) == btemp), 0, btemp) pbc2$bstat <- factor(b2, 0:5, c("censor", "normal", "1-2", "2-4", ">4", "death")) check1 <- survcheck(Surv(tstart, tstop, bstat) ~ 1, istate= bili4, id = id, data=pbc2) check1$transitions fit2 <- coxph(list(Surv(tstart, tstop, bstat) ~ 1, c(1:4):5 ~ age / common + shared), id= id, istate=bili4, data=pbc2) # Before we tackle fit2, start small with just 9 subjects, coefs fixed to # simple values to make hand computation easier. There are no transitions # from state 3 to death in this subset. Since it is a shared hazard the # subjects in state 3 ARE at risk and so are found in the denominator of the # hazard, but since none of the progress the MLE for that ph coef is -infinity. # We set it to -1. pbc3 <- subset(pbc2, id < 10) pbc3$age <- round(pbc3$age) # easier to do "by hand" sums fit3 <- coxph(list(Surv(tstart, tstop, bstat) ~ 1, c(1:4):5 ~ age / common + shared), x=TRUE, id= id, istate=bili4, data=pbc3, init= c(.05, .6, -1, 1.1), iter=0) # a mixed p0 gives a stronger test than our usual (1, 0,0,0,0) surv3 <- survfit(fit3, newdata=list(age=50), p0=c(.4, .3, .2, .1, 0)) etime <- sort(unique(pbc3$tstop[pbc3$bstat != "censor"])) # At event time 1 (182), all 9 are at risk, (3,3,2,1) in initial states 1-4 atrisk <- pbc3$tstart < etime[1] & pbc3$tstop >= etime[1] # all 9 at risk table(pbc3$bili4[atrisk]) # One event occurs at 182, a 2:1 transition (1-2 to normal) # Risk scores for the non-death transitions are all exp(0) =1, # so the hazard matrix H will have second row of (1/3, -1/3, 0,0,0) and all # other rows are 0. with(subset(pbc3, tstop== 182), table(istate= bili4, state=bstat)) # The next four events are from 3:4, 3:2, 2:3, and 1:2, so also have # simple transtions, i.e., no covariates so all risk scores are exp(0) =1 # hmat <- array(0, dim=c(5,5,6)) # first 6 hazard matrices, start with 3,3,2,1 hmat[2,1,1] <- 1/3; hmat[2,2,1] <- -1/3 # new count= 4,2,2,1 hmat[3,4,2] <- 1/2; hmat[3,3,2] <- -1/2 # new count= 4,2,1,2 hmat[3,2,3] <- 1 ; hmat[3,3,3] <- -1 # new count= 4,3,0,2 hmat[2,3,4] <- 1/3; hmat[2,2,4] <- -1/3 # new count= 4,2,1,2 hmat[1,2,5] <- 1/4; hmat[1,1,5] <- -1/4 # new count= 3,3,1,2 # Event 6 is a transition from state 4 to death, at day 400 # For the shared hazard, the denominator is all those in states 1,2,3, or 4. atrisk <- with(pbc3, tstart < etime[6] & tstop >= etime[6]) table(pbc3$bili4[atrisk]) # current states just before time 6 adata <- subset(pbc3, atrisk) eta <- with(adata, .05*(age-50) + .6*(bili4=="1-2") + 1.1*(bili4 == ">4") - 1*(bili4=="2-4")) cbind(adata[,c('id', 'age', 'tstop', 'bili4', 'bstat')], eta, risk=exp(eta)) basehaz <- 1/sum(exp(eta)) hmat[1,5,6] <- basehaz; hmat[1,1,6] <- -basehaz hmat[2,5,6] <- basehaz * exp(.6); hmat[2,2,6] <- -basehaz*exp(.6) hmat[3,5,6] <- basehaz * exp(-1); hmat[3,3,6] <- -basehaz*exp(-1) hmat[4,5,6] <- basehaz * exp(1.1); hmat[4,4,6] <- -basehaz*exp(1.1) # double check: sum of per-subject hazards at this time point = number of # events at this time point sum(basehaz * exp(eta)) ==1 tmat <- array(0., dim= dim(hmat)) # transition matrices pstate <- matrix((4:0)/10, nrow=1) for (i in 1:6) { tmat[,,i] <- as.matrix(Matrix::expm(hmat[,,i])) pstate <- rbind(pstate, pstate[i,]%*% tmat[,,i]) } dtime <- which(surv3$time %in% etime) # skip censored rows aeq(surv3$pstate[dtime[1:6],1,], pstate[-1,]) # # A function to do the above "by hand" calculations, over all time points # It is verified for the particular fit we did, but written for # more generality. # fit: a multi-state fit, with shared baselines # istate: the inital state for each row of data # p0: starting dist for compuation # x0: curve for this set of covariates # mysurv <- function(fit, istate, p0, x0, debug=0) { if (!inherits(fit, 'coxphms')) stop("invalid fit") smap <- fit$smap from <- as.numeric(sub(":.*$", "", colnames(smap))) to <- as.numeric(sub("^.*:", "", colnames(smap))) shared <- duplicated(smap[1,]) nshare <- sum(shared) bcoef <- rep(1, ncol(smap)) # coefficients for shared baseline beta <- coef(fit, matrix=TRUE) if (nshare >0) { # coefficients for shared baseline will be the last nshare of them i <- seq(length=nshare, to=length(fit$coefficients)) bcoef[shared] <- exp(fit$coefficients[i]) # remove shared coef rows from beta phrow <- apply(fit$cmap, 1, function(x) any(x %in% i)) beta <- beta[!phrow,, drop=FALSE] } # Make the values for istate and state match the 1:2, etc of the fit, # i.e., the order of fit$states # istate and state are used in tables, using factors makes sure the result # is always the right size nstate <- length(fit$states) state <- factor(fit$y[,3], 1:nstate) # endpoint of a transition if (length(istate) != nrow(fit$y)) stop ("mismatched istate") istate <- factor(as.character(istate), fit$states) # set up output ntran <- ncol(smap) # number of transitions utime <- sort(unique(fit$y[!is.na(state), 2])) # unique event times ntime <- length(utime) tmat <- matrix(0, nstate, nstate) # transtion matrix at this time point pmat <- diag(nstate) # product of transitions nrisk <- matrix(0., ntime, nstate) #number at risk wtrisk<- matrix(0., ntime, ntran) # weighted number per transtion nevent <- matrix(0L, ntime, nstate) # number of events of each type pstate <- matrix(0L, ntime, nstate) # probability in state hmat <- matrix(0., nstate, nstate) # working matrix of hazards # eta is a matrix of (x for subject - x0) %*% coef, one row per subject, # one column per transition eta <- (fit$x - rep(x0, each= nrow(fit$y))) %*% beta rwt <- exp(eta) # the risk weight for each obs t1 <- fit$y[,1] t2 <- fit$y[,2] for (i in 1:ntime) { atrisk <- (t1 < utime[i] & utime[i] <= t2) # risk set at this time event <- which(utime[i] == t2) # potential events, at this time nrisk[i,] <- c(table(istate[atrisk])) # number at risk in each state nevent[i,] <- c(table(state[event])) # The linear predictor and hence the number at risk is different for # every transition. Also, some will not be at risk for the transition. # for (k in 1:ntran) { atrisk2 <- (atrisk & (as.numeric(istate) == from[k])) wtrisk[i,k] <- sum(rwt[atrisk2,k]) } dtemp <- table(istate[event], state[event]) #censors don't count # fill in hmat, one hazard at a time hmat <- 0*hmat for (j in unique(smap)) { # for each baseline hazard k <- which(smap == j) # transitons that share this hazard deaths <- sum(dtemp[cbind(from[k], to[k])]) # total events if (deaths==0) hmat[cbind(from[k], to[k])] <- 0 # avoid 0/0 else { hazard <- deaths/ sum(wtrisk[i, k] * bcoef[k]) #shared baseline hmat[cbind(from[k], to[k])] <- hazard * bcoef[k] # PH } } diag(hmat) <- diag(hmat) - rowSums(hmat) # rows sum to zero tmat <- as.matrix(Matrix::expm(hmat)) # transtion matrix # if (i >= debug) browser() pmat <- pmat %*% tmat pstate[i,] <- drop(p0 %*% pmat) } list(time=utime, nrisk=nrisk, nevent=nevent, pstate=pstate, wtrisk= wtrisk, P=pmat) } test3 <- mysurv(fit3, pbc3$bili4, p0= 4:0/10, x0 =50) aeq(test3$pstate, surv3$pstate[match(test3$time, surv3$time),1,]) # Now with the full data set fit2 <- coxph(list(Surv(tstart, tstop, bstat) ~ 1, c(1:4):5 ~ age / common + shared), id= id, istate=bili4, data=pbc2, ties='breslow', x=TRUE) surv2 <- survfit(fit2, newdata=list(age=50), p0=c(.4, .3, .2, .1, 0)) test2 <- mysurv(fit2, pbc2$bili4, p0= 4:0/10, fit2, x0 =50) aeq(test2$pstate, surv2$pstate[match(test2$time, surv2$time),1,]) if (FALSE){ # for testing, make a plot xfun <- function(i) { j <- match(test2$time[i], surv2$time) all.equal(test2$pstate[i,], surv2$pstate[j,1,]) } plot(surv2, col=1:5, lwd=2) matpoints(test2$time, test2$pstate, col=1:5, pch='o') } survival/tests/book5.Rout.save0000644000176200001440000001571014613770353016133 0ustar liggesusers R Under development (unstable) (2024-04-17 r86441) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: aarch64-unknown-linux-gnu R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(survival) > options(na.action=na.exclude) # preserve missings > options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type > > # Tests of the weighted Cox model > # This is section 1.3 of my appendix -- not yet found in the book > # though, it awaits the next edition > # > # Similar data set to test1, but add weights, > # a double-death/censor tied time > # a censored last subject > # The latter two are cases covered only feebly elsewhere. > # > # The data set testw2 has the same data, but done via replication > # > aeq <- function(x,y) all.equal(as.vector(x), as.vector(y)) > > testw1 <- data.frame(time= c(1,1,2,2,2,2,3,4,5), + status= c(1,0,1,1,1,0,0,1,0), + x= c(2,0,1,1,0,1,0,1,0), + wt = c(1,2,3,4,3,2,1,2,1), + id = 1:9) > # Expanded data set > testw2 <- testw1[rep(1:9, testw1$wt), -4] > row.names(testw2) <- NULL > indx <- match(1:9, testw2$id) > > # Breslow estimate > byhand <- function(beta, newx=0) { + r <- exp(beta) + loglik <- 11*beta - (log(r^2 + 11*r +7) + 10*log(11*r +5) +2*log(2*r+1)) + hazard <- c(1/(r^2 + 11*r +7), 10/(11*r +5), 2/(2*r+1)) + xbar <- c((2*r^2 + 11*r)*hazard[1], 11*r/(11*r +5), r*hazard[3]) + U <- 11- (xbar[1] + 10*xbar[2] + 2*xbar[3]) + imat <- (4*r^2 + 11*r)*hazard[1] - xbar[1]^2 + + 10*(xbar[2] - xbar[2]^2) + 2*(xbar[3] - xbar[3]^2) + + temp <- cumsum(hazard) + risk <- c(r^2, 1,r,r,1,r,1,r,1) + expected <- risk* temp[c(1,1,2,2,2,2,2,3,3)] + + # The matrix of weights, one row per obs, one col per death + # deaths at 1,2,2,2, and 4 + riskmat <- matrix(c(1,1,1,1,1,1,1,1,1, + 0,0,1,1,1,1,1,1,1, + 0,0,1,1,1,1,1,1,1, + 0,0,1,1,1,1,1,1,1, + 0,0,0,0,0,0,0,1,1), ncol=5) + wtmat <- diag(c(r^2, 2, 3*r, 4*r, 3, 2*r, 1, 2*r, 1)) %*% riskmat + + x <- c(2,0,1,1,0,1,0,1,0) + status <- c(1,0,1,1,1,0,0,1,0) + wt <- c(1,2,3,4,3,2,1,2,1) + # Table of sums for score and Schoenfeld resids + hazmat <- riskmat %*% diag(c(1,3,4,3,2)/colSums(wtmat)) + dM <- -risk*hazmat #Expected part + dM[1,1] <- dM[1,1] +1 # deaths at time 1 + for (i in 2:4) dM[i+1, i] <- dM[i+1,i] +1 + dM[8,5] <- dM[8,5] +1 + mart <- rowSums(dM) + resid <-dM * outer(x, xbar[c(1,2,2,2,3)] ,'-') + + # Increments to the variance of the hazard + var.g <- cumsum(hazard^2/ c(1,10,2)) + var.d <- cumsum((xbar-newx)*hazard) + + list(loglik=loglik, U=U, imat=imat, hazard=hazard, xbar=xbar, + mart=c(1,0,1,1,1,0,0,1,0)-expected, expected=expected, + score=rowSums(resid), schoen=c(2,1,1,0,1) - xbar[c(1,2,2,2,3)], + varhaz=(var.g + var.d^2/imat)* exp(2*beta*newx)) + } > > aeq(byhand(0)$expected, c(1/19, 1/19, rep(103/152, 5), rep(613/456,2))) #verify [1] TRUE > > fit0 <- coxph(Surv(time, status) ~x, testw1, weights=wt, + method='breslow', iter=0) > fit0b <- coxph(Surv(time, status) ~x, testw2, method='breslow', iter=0) > fit <- coxph(Surv(time, status) ~x, testw1, weights=wt, method='breslow') > fitb <- coxph(Surv(time, status) ~x, testw2, method='breslow') > > aeq(resid(fit0, type='mart'), (resid(fit0b, type='mart'))[indx]) [1] TRUE > aeq(resid(fit0, type='scor'), (resid(fit0b, type='scor'))[indx]) [1] TRUE > aeq(unique(resid(fit0, type='scho')), unique(resid(fit0b, type='scho'))) [1] TRUE > > truth0 <- byhand(0,pi) > aeq(fit0$loglik[1], truth0$loglik) [1] TRUE > aeq(1/truth0$imat, fit0$var) [1] TRUE > aeq(truth0$mart, fit0$residuals) [1] TRUE > aeq(truth0$schoen, resid(fit0, 'schoen')) [1] TRUE > aeq(truth0$score, resid(fit0, 'score')) [1] TRUE > sfit <- survfit(fit0, list(x=pi), censor=FALSE) > aeq(sfit$std.err^2, truth0$varhaz) [1] TRUE > aeq(-log(sfit$surv), cumsum(truth0$hazard)) [1] TRUE > > truth <- byhand(0.85955744, .3) > aeq(truth$loglik, fit$loglik[2]) [1] TRUE > aeq(1/truth$imat, fit$var) [1] TRUE > aeq(truth$mart, fit$residuals) [1] TRUE > aeq(truth$schoen, resid(fit, 'schoen')) [1] TRUE > aeq(truth$score, resid(fit, 'score')) [1] TRUE > > sfit <- survfit(fit, list(x=.3), censor=FALSE) > aeq(sfit$std.err^2, truth$varhaz) [1] TRUE > aeq(-log(sfit$surv), (cumsum(truth$hazard)* exp(fit$coefficients*.3))) [1] TRUE > > > fit0 Call: coxph(formula = Surv(time, status) ~ x, data = testw1, weights = wt, method = "breslow", iter = 0) coef exp(coef) se(coef) z p x 0.0000 1.0000 0.5858 0 1 Likelihood ratio test=0 on 1 df, p=1 n= 9, number of events= 5 > summary(fit) Call: coxph(formula = Surv(time, status) ~ x, data = testw1, weights = wt, method = "breslow") n= 9, number of events= 5 coef exp(coef) se(coef) z Pr(>|z|) x 0.8596 2.3621 0.7131 1.205 0.228 exp(coef) exp(-coef) lower .95 upper .95 x 2.362 0.4233 0.5839 9.556 Concordance= 0.637 (se = 0.161 ) Likelihood ratio test= 1.69 on 1 df, p=0.2 Wald test = 1.45 on 1 df, p=0.2 Score (logrank) test = 1.52 on 1 df, p=0.2 > resid(fit0, type='score') 1 2 3 4 5 6 1.24653740 0.03601108 0.10056700 0.10056700 -0.22180142 -0.21193300 7 8 9 0.46569858 -0.10082189 0.91014302 > resid(fit0, type='scho') 1 2 2 2 4 1.3157895 0.3125000 0.3125000 -0.6875000 0.3333333 > > resid(fit, type='score') 1 2 3 4 5 6 0.88681615 0.02497653 0.03608964 0.03608964 -0.54297652 -0.12528780 7 8 9 0.29564605 -0.09476911 0.58400064 > resid(fit, type='scho') 1 2 2 2 4 1.0368337 0.1613774 0.1613774 -0.8386226 0.1746960 > aeq(resid(fit, type='mart'), (resid(fitb, type='mart'))[indx]) [1] TRUE > aeq(resid(fit, type='scor'), (resid(fitb, type='scor'))[indx]) [1] TRUE > aeq(unique(resid(fit, type='scho')), unique(resid(fitb, type='scho'))) [1] TRUE > rr1 <- resid(fit, type='mart') > rr2 <- resid(fit, type='mart', weighted=T) > aeq(rr2/rr1, testw1$wt) [1] TRUE > > rr1 <- resid(fit, type='score') > rr2 <- resid(fit, type='score', weighted=T) > aeq(rr2/rr1, testw1$wt) [1] TRUE > > > proc.time() user system elapsed 0.423 0.031 0.452 survival/tests/anova.R0000644000176200001440000000204214607006645014524 0ustar liggesusers# # Test out anova, with strata terms # options(na.action=na.omit) library(survival) fit1 <- coxph(Surv(time, status) ~ ph.ecog + wt.loss + strata(sex) + poly(age,3), lung) ztemp <- anova(fit1) tdata <- na.omit(lung[, c('time', 'status', 'ph.ecog', 'wt.loss', 'sex', 'age')]) fit2 <- coxph(Surv(time, status)~ ph.ecog + wt.loss + poly(age,3) + strata(sex), data=tdata) ztemp2 <- anova(fit2) all.equal(ztemp, ztemp2) fit2 <- coxph(Surv(time, status) ~ ph.ecog + wt.loss + strata(sex), tdata) fit3 <- coxph(Surv(time, status) ~ ph.ecog + strata(sex), tdata) all.equal(ztemp$loglik, c(fit1$loglik[1], fit3$loglik[2], fit2$loglik[2], fit1$loglik[2])) all.equal(ztemp$Chisq[-1], 2* diff(ztemp$loglik)) all.equal(ztemp$Df[-1], c(1,1,3)) ztemp2 <- anova(fit3, fit2, fit1) all.equal(ztemp2$loglik, ztemp$loglik[-1]) all.equal(ztemp2$Chisq[2:3], ztemp$Chisq[3:4]) # Change from ztemp2$P; it's a data frame and in R 3.0.2 abbreviated names # give a warning all.equal(ztemp2[[4]][2:3], ztemp[[4]][3:4]) survival/tests/singtest.Rout.save0000644000176200001440000000343714607006645016756 0ustar liggesusers R Under development (unstable) (2018-04-09 r74565) -- "Unsuffered Consequences" Copyright (C) 2018 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > options(na.action=na.exclude) # preserve missings > options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type > library(survival) > > # > # A simple test of an overdetermined system > # Should give a set of NA coefficients > # > test1 <- data.frame(time= c(4, 3,1,1,2,2,3), + status=c(1,NA,1,0,1,1,0), + x= c(0, 2,1,1,1,0,0)) > > temp <- rep(0:3, rep(7,4)) > > stest <- data.frame(start = 10*temp, + stop = 10*temp + test1$time, + status = rep(test1$status,4), + x = c(test1$x+ 1:7, rep(test1$x,3)), + epoch = rep(1:4, rep(7,4))) > > # Will create a warning about a singular X matrix > fit1 <- coxph(Surv(start, stop, status) ~ x * factor(epoch), stest) > fit1$coef # elements 2:4 should be NA x factor(epoch)2 factor(epoch)3 factor(epoch)4 0.1041579 NA NA NA x:factor(epoch)2 x:factor(epoch)3 x:factor(epoch)4 1.5726996 1.5726996 1.5726996 > all.equal(is.na(fit1$coef), c(F,T,T,T,F,F,F), check.attributes=FALSE) [1] TRUE > > proc.time() user system elapsed 0.668 0.040 0.704 survival/tests/doaml.R0000644000176200001440000000356514612274303014521 0ustar liggesusersoptions(na.action=na.exclude) # preserve missings options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type library(survival) aeq <- function(x,y) all.equal(as.vector(x), as.vector(y)) # # These results can be found in Miller # fit <- coxph(Surv(aml$time, aml$status) ~ aml$x, method='breslow') fit resid(fit, type='mart') resid(fit, type='score') resid(fit, type='scho') # Test the drop of an itercept: should have no effect fit2 <- coxph(Surv(time, status) ~ x -1, method='breslow', data=aml) aeq(fit$loglik, fit2$loglik) aeq(coef(fit), coef(fit2)) aeq(fit$var, fit2$var) fit <- survfit(Surv(aml$time, aml$status) ~ aml$x) fit summary(fit) survdiff(Surv(aml$time, aml$status)~ aml$x) # # Test out the weighted K-M # # First, equal case weights- shouldn't change the survival, but will # halve the variance temp2 <-survfit(Surv(aml$time, aml$status)~1, weights=rep(2,23)) temp <-survfit(Surv(time, status)~1, aml) aeq(temp$surv, temp2$surv) aeq(temp$std.err^2, 2*temp2$std.err^2) # Risk weights-- use a null Cox model tfit <- coxph(Surv(aml$time, aml$status) ~ offset(log(1:23))) sfit <- survfit(tfit, stype=2, ctype=1, censor=FALSE) # Now compute it by hand. The survfit program will produce a curve # corresponding to the mean offset. # Ties are a nuisance, the line above forced the Nelson rather than Efron # to make it easier rscore <- exp(log(1:23) - mean(log(1:23)))[order(aml$time)] atime <- sort(aml$time) denom <- rev(cumsum(rev(rscore))) denom <- denom[match(unique(atime), atime)] deaths <- tapply(aml$status, aml$time, sum) chaz <- cumsum(deaths/denom) all.equal(sfit$surv, as.vector(exp(-chaz[deaths>0]))) # And the Efron result summary(survfit(tfit)) # Lots of ties, so its a good test case x1 <- coxph(Surv(time, status)~x, aml, method='efron') x1 x2 <- coxph(Surv(rep(0,23),time, status) ~x, aml, method='efron') aeq(x1$coef, x2$coef) survival/tests/pyear.Rout.save0000644000176200001440000004046514607006645016240 0ustar liggesusers R Under development (unstable) (2024-02-07 r85873) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > options(na.action=na.exclude) # preserve missings > options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type > library(survival) > mdy.date <- function(m, d, y) { + y <- ifelse(y<100, y+1900, y) + as.Date(paste(m,d,y, sep='/'), "%m/%d/%Y") + } > > # > # Simple case: a single male subject, born 6/6/36 and entered on study 6/6/55. > # > > temp1 <- mdy.date(6,6,36) > temp2 <- mdy.date(6,6,55)# Now compare the results from person-years > # > temp.age <- tcut(temp2-temp1, floor(c(-1, (18:31 * 365.24))), + labels=c('0-18', paste(18:30, 19:31, sep='-'))) > temp.yr <- tcut(temp2, mdy.date(1,1,1954:1965), labels=1954:1964) > temp.time <- 3700 #total days of fu > py1 <- pyears(temp.time ~ temp.age + temp.yr, scale=1) #output in days > > # The subject should appear in 20 cells > # 6/6/55 - 12/31/55, 209 days, age 19-20, 1955 > # 1/1/56 - 6/ 4/56, 156 days, age 19-20, 1956 > # 6/5/56 - 12/31/56, 210 days, age 20-21, 1956 (a leap year, and his > # birthday computes one day earlier) > # 1/1/57 - 6/ 5/57, 156 days, age 20-21, 1957 > # 6/6/57 - 12/31/57, 209 days, age 21-22, 1957 > # and etc > # with 203 days "off table", ie, beyond the last cell of the table > # > # It is a nuisance, but tcut follows 'cut' in that we give the ENDS of > # the intervals, whereas the survival tables use the starts of intervals. > # Thus this breakdown does not match that in doexpect.s > # > xx <- matrix(0, nrow=14, ncol=11) > xx[cbind(3:11, 3:11)] <- 156 > xx[cbind(3:12, 2:11)] <- c(209, 210, rep(c(209, 209, 209, 210),2)) > dimnames(xx) <- list(temp.age= c('0-18', paste(18:30, 19:31, sep='-')), + temp.yr = 1954:1964) > all.equal(xx, py1$pyears) [1] TRUE > all.equal(203, py1$offtable) [1] TRUE > all.equal(1*(xx>0), py1$n) [1] TRUE > > # > # Now with expecteds > # > py2 <- pyears(temp.time ~ temp.age + temp.yr, + rmap=list(age=temp2-temp1, year=temp2, sex=1), + scale=1, ratetable=survexp.us ) #output in days > all.equal(xx, py2$pyears) [1] TRUE > all.equal(203, py2$offtable) [1] TRUE > all.equal(1*(xx>0), py2$n) [1] TRUE > > > py3 <- pyears(temp.time ~ temp.age + temp.yr, + rmap=list(age=temp2-temp1, year=temp2, sex=1), + scale=1, ratetable=survexp.us , expect='pyears') > all.equal(py2$n, py3$n) [1] TRUE > all.equal(py2$pyear, py3$pyear) [1] TRUE > all.equal(py3$n, 1*(py3$expect>0)) [1] TRUE > > # Now, compute the py3 result "by hand". Since there is only one person > # it can be derived from py2. > # > xx1 <- py2$expect[py2$n>0] # the hazard over each interval > cumhaz <- cumsum(c(0, xx1[-length(xx1)])) # the cumulative hazard > xx2 <- py3$expect[py3$n>0] # the expected number of person days > xx3 <- py3$pyears[py3$n>0] # the potential number of person days > > # This is the integral of the curve "exp(-haz *t)" over the interval > integral <- xx3 * exp(-cumhaz)* (1- exp(-xx1))/ xx1 > # They might not be exactly equal, since the C code tracks changes in the > # rate tables that occur -within- an interval. So try for 6 digits > all.equal(round(integral,3), round(xx2,3)) [1] TRUE > > # Cut off the bottom of the table, instead of the side > temp.age <- tcut(temp2-temp1, floor(c(-1, (18:27 * 365.24))), + labels=c('0-18', paste(18:26, 19:27, sep='-'))) > > py4 <- eval(py3$call) > all.equal(py4$pyear, py3$pyear[1:10,]) [1] TRUE > all.equal(py4$expect, py3$expect[1:10,]) [1] TRUE > > > rm(temp.age, integral, xx1, xx2, xx3, cumhaz, py1, py2, py3, py4) > rm(temp1, temp2, temp.yr, temp.time, xx) > > > > > # > # Simple case: a single male subject, born 6/6/36 and entered on study 6/6/55. > # > > temp1 <- mdy.date(6,6,36) > temp2 <- mdy.date(6,6,55)# Now compare the results from person-years > # > temp.age <- tcut(temp2-temp1, floor(c(-1, (18:31 * 365.24))), + labels=c('0-18', paste(18:30, 19:31, sep='-'))) > temp.yr <- tcut(temp2, mdy.date(1,1,1954:1965), labels=1954:1964) > temp.time <- 3700 #total days of fu > py1 <- pyears(temp.time ~ temp.age + temp.yr, scale=1) #output in days > > # The subject should appear in 20 cells > # 6/6/55 - 12/31/55, 209 days, age 19-20, 1955 > # 1/1/56 - 6/ 4/56, 156 days, age 19-20, 1956 > # 6/5/56 - 12/31/56, 210 days, age 20-21, 1956 (a leap year, and his > # birthday computes one day earlier) > # 1/1/57 - 6/ 5/57, 156 days, age 20-21, 1957 > # 6/6/57 - 12/31/57, 209 days, age 21-22, 1957 > # and etc > # with 203 days "off table", ie, beyond the last cell of the table > # > # It is a nuisance, but tcut follows 'cut' in that we give the ENDS of > # the intervals, whereas the survival tables use the starts of intervals. > # > xx <- matrix(0, nrow=14, ncol=11) > xx[cbind(3:11, 3:11)] <- 156 > xx[cbind(3:12, 2:11)] <- c(209, 210, rep(c(209, 209, 209, 210),2)) > dimnames(xx) <- list(temp.age= c('0-18', paste(18:30, 19:31, sep='-')), + temp.yr = 1954:1964) > all.equal(xx, py1$pyears) [1] TRUE > all.equal(203, py1$offtable) [1] TRUE > all.equal(1*(xx>0), py1$n) [1] TRUE > > # > # Now with expecteds > # > py2 <- pyears(temp.time ~ temp.age + temp.yr, + rmap= list(age=temp2-temp1, year=temp2, sex=1), + scale=1, ratetable=survexp.us ) #output in days > all.equal(xx, py2$pyears) [1] TRUE > all.equal(203, py2$offtable) [1] TRUE > all.equal(1*(xx>0), py2$n) [1] TRUE > > > py3 <- pyears(temp.time ~ temp.age + temp.yr, + rmap= list(age=temp2-temp1, year=temp2, sex=1), + scale=1, ratetable=survexp.us , expect='pyears') > all.equal(py2$n, py3$n) [1] TRUE > all.equal(py2$pyear, py3$pyear) [1] TRUE > all.equal(py3$n, 1*(py3$expect>0)) [1] TRUE > > # Now, compute the py3 result "by hand". Since there is only one person > # it can be derived from py2. > # > xx1 <- py2$expect[py2$n>0] # the hazard over each interval > cumhaz <- cumsum(c(0, xx1[-length(xx1)])) # the cumulative hazard > xx2 <- py3$expect[py3$n>0] # the expected number of person days > xx3 <- py3$pyears[py3$n>0] # the potential number of person days > > # This is the integral of the curve "exp(-haz *t)" over the interval > integral <- xx3 * exp(-cumhaz)* (1- exp(-xx1))/ xx1 > # They might not be exactly equal, since the C code tracks changes in the > # rate tables that occur -within- an interval. So try for 6 digits > all.equal(round(integral,3), round(xx2,3)) [1] TRUE > > # Cut off the bottom of the table, instead of the side > temp.age <- tcut(temp2-temp1, floor(c(-1, (18:27 * 365.24))), + labels=c('0-18', paste(18:26, 19:27, sep='-'))) > > py4 <- eval(py3$call) > all.equal(py4$pyear, py3$pyear[1:10,]) [1] TRUE > all.equal(py4$expect, py3$expect[1:10,]) [1] TRUE > > > rm(temp.age, integral, xx1, xx2, xx3, cumhaz, py1, py2, py3, py4) > rm(temp1, temp2, temp.yr, temp.time, xx) > > > > > # > # Create a "user defined" rate table, using the smoking data > # > temp <- scan("data.smoke")/100000 Read 224 items > temp <- matrix(temp, ncol=8, byrow=T) > smoke.rate <- c(rep(temp[,1],6), rep(temp[,2],6), temp[,3:8]) > attributes(smoke.rate) <- list( + dim=c(7,2,2,6,3), + dimnames=list(c("45-49","50-54","55-59","60-64","65-69","70-74","75-79"), + c("1-20", "21+"), + c("Male","Female"), + c("<1", "1-2", "3-5", "6-10", "11-15", ">=16"), + c("Never", "Current", "Former")), + dimid=c("age", "amount", "sex", "duration", "status"), + factor=c(0,1,1,0,1), + cutpoints=list(c(45,50,55,60,65,70,75),NULL, NULL, + c(0,1,3,6,11,16),NULL), + class='ratetable' + ) > rm(temp) > > is.ratetable(smoke.rate) [1] TRUE > summary(smoke.rate) Rate table with 5 dimensions: age ranges from 45 to 75; with 7 categories amount has levels of: 1-20 21+ sex has levels of: Male Female duration ranges from 0 to 16; with 6 categories status has levels of: Never Current Former > print(smoke.rate) Rate table with dimension(s): age amount sex duration status , , Male, <1, Never 1-20 21+ 45-49 0.001860 0.001860 50-54 0.002556 0.002556 55-59 0.004489 0.004489 60-64 0.007337 0.007337 65-69 0.011194 0.011194 70-74 0.020705 0.020705 75-79 0.036753 0.036753 , , Female, <1, Never 1-20 21+ 45-49 0.001257 0.001257 50-54 0.001773 0.001773 55-59 0.002448 0.002448 60-64 0.003977 0.003977 65-69 0.006921 0.006921 70-74 0.011600 0.011600 75-79 0.020708 0.020708 , , Male, 1-2, Never 1-20 21+ 45-49 0.001860 0.001860 50-54 0.002556 0.002556 55-59 0.004489 0.004489 60-64 0.007337 0.007337 65-69 0.011194 0.011194 70-74 0.020705 0.020705 75-79 0.036753 0.036753 , , Female, 1-2, Never 1-20 21+ 45-49 0.001257 0.001257 50-54 0.001773 0.001773 55-59 0.002448 0.002448 60-64 0.003977 0.003977 65-69 0.006921 0.006921 70-74 0.011600 0.011600 75-79 0.020708 0.020708 , , Male, 3-5, Never 1-20 21+ 45-49 0.001860 0.001860 50-54 0.002556 0.002556 55-59 0.004489 0.004489 60-64 0.007337 0.007337 65-69 0.011194 0.011194 70-74 0.020705 0.020705 75-79 0.036753 0.036753 , , Female, 3-5, Never 1-20 21+ 45-49 0.001257 0.001257 50-54 0.001773 0.001773 55-59 0.002448 0.002448 60-64 0.003977 0.003977 65-69 0.006921 0.006921 70-74 0.011600 0.011600 75-79 0.020708 0.020708 , , Male, 6-10, Never 1-20 21+ 45-49 0.001860 0.001860 50-54 0.002556 0.002556 55-59 0.004489 0.004489 60-64 0.007337 0.007337 65-69 0.011194 0.011194 70-74 0.020705 0.020705 75-79 0.036753 0.036753 , , Female, 6-10, Never 1-20 21+ 45-49 0.001257 0.001257 50-54 0.001773 0.001773 55-59 0.002448 0.002448 60-64 0.003977 0.003977 65-69 0.006921 0.006921 70-74 0.011600 0.011600 75-79 0.020708 0.020708 , , Male, 11-15, Never 1-20 21+ 45-49 0.001860 0.001860 50-54 0.002556 0.002556 55-59 0.004489 0.004489 60-64 0.007337 0.007337 65-69 0.011194 0.011194 70-74 0.020705 0.020705 75-79 0.036753 0.036753 , , Female, 11-15, Never 1-20 21+ 45-49 0.001257 0.001257 50-54 0.001773 0.001773 55-59 0.002448 0.002448 60-64 0.003977 0.003977 65-69 0.006921 0.006921 70-74 0.011600 0.011600 75-79 0.020708 0.020708 , , Male, >=16, Never 1-20 21+ 45-49 0.001860 0.001860 50-54 0.002556 0.002556 55-59 0.004489 0.004489 60-64 0.007337 0.007337 65-69 0.011194 0.011194 70-74 0.020705 0.020705 75-79 0.036753 0.036753 , , Female, >=16, Never 1-20 21+ 45-49 0.001257 0.001257 50-54 0.001773 0.001773 55-59 0.002448 0.002448 60-64 0.003977 0.003977 65-69 0.006921 0.006921 70-74 0.011600 0.011600 75-79 0.020708 0.020708 , , Male, <1, Current 1-20 21+ 45-49 0.004392 0.006100 50-54 0.007027 0.009156 55-59 0.011324 0.013910 60-64 0.019811 0.023934 65-69 0.030030 0.034979 70-74 0.046975 0.058613 75-79 0.073406 0.062500 , , Female, <1, Current 1-20 21+ 45-49 0.002256 0.002779 50-54 0.003538 0.005179 55-59 0.005428 0.008235 60-64 0.008580 0.013029 65-69 0.014962 0.019349 70-74 0.020848 0.028270 75-79 0.033195 0.042731 , , Male, 1-2, Current 1-20 21+ 45-49 0.004392 0.006100 50-54 0.007027 0.009156 55-59 0.011324 0.013910 60-64 0.019811 0.023934 65-69 0.030030 0.034979 70-74 0.046975 0.058613 75-79 0.073406 0.062500 , , Female, 1-2, Current 1-20 21+ 45-49 0.002256 0.002779 50-54 0.003538 0.005179 55-59 0.005428 0.008235 60-64 0.008580 0.013029 65-69 0.014962 0.019349 70-74 0.020848 0.028270 75-79 0.033195 0.042731 , , Male, 3-5, Current 1-20 21+ 45-49 0.004392 0.006100 50-54 0.007027 0.009156 55-59 0.011324 0.013910 60-64 0.019811 0.023934 65-69 0.030030 0.034979 70-74 0.046975 0.058613 75-79 0.073406 0.062500 , , Female, 3-5, Current 1-20 21+ 45-49 0.002256 0.002779 50-54 0.003538 0.005179 55-59 0.005428 0.008235 60-64 0.008580 0.013029 65-69 0.014962 0.019349 70-74 0.020848 0.028270 75-79 0.033195 0.042731 , , Male, 6-10, Current 1-20 21+ 45-49 0.004392 0.006100 50-54 0.007027 0.009156 55-59 0.011324 0.013910 60-64 0.019811 0.023934 65-69 0.030030 0.034979 70-74 0.046975 0.058613 75-79 0.073406 0.062500 , , Female, 6-10, Current 1-20 21+ 45-49 0.002256 0.002779 50-54 0.003538 0.005179 55-59 0.005428 0.008235 60-64 0.008580 0.013029 65-69 0.014962 0.019349 70-74 0.020848 0.028270 75-79 0.033195 0.042731 , , Male, 11-15, Current 1-20 21+ 45-49 0.004392 0.006100 50-54 0.007027 0.009156 55-59 0.011324 0.013910 60-64 0.019811 0.023934 65-69 0.030030 0.034979 70-74 0.046975 0.058613 75-79 0.073406 0.062500 , , Female, 11-15, Current 1-20 21+ 45-49 0.002256 0.002779 50-54 0.003538 0.005179 55-59 0.005428 0.008235 60-64 0.008580 0.013029 65-69 0.014962 0.019349 70-74 0.020848 0.028270 75-79 0.033195 0.042731 , , Male, >=16, Current 1-20 21+ 45-49 0.004392 0.006100 50-54 0.007027 0.009156 55-59 0.011324 0.013910 60-64 0.019811 0.023934 65-69 0.030030 0.034979 70-74 0.046975 0.058613 75-79 0.073406 0.062500 , , Female, >=16, Current 1-20 21+ 45-49 0.002256 0.002779 50-54 0.003538 0.005179 55-59 0.005428 0.008235 60-64 0.008580 0.013029 65-69 0.014962 0.019349 70-74 0.020848 0.028270 75-79 0.033195 0.042731 , , Male, <1, Former 1-20 21+ 45-49 0.002344 0.004975 50-54 0.005447 0.004828 55-59 0.009452 0.017571 60-64 0.011777 0.015784 65-69 0.022449 0.023018 70-74 0.042553 0.031746 75-79 0.058824 0.040000 , , Female, <1, Former 1-20 21+ 45-49 0.000000 0.002667 50-54 0.001168 0.001387 55-59 0.002874 0.004736 60-64 0.010163 0.011148 65-69 0.011080 0.023196 70-74 0.006452 0.046358 75-79 0.000000 0.024096 , , Male, 1-2, Former 1-20 21+ 45-49 0.003658 0.002517 50-54 0.004310 0.005007 55-59 0.007288 0.009535 60-64 0.015892 0.018472 65-69 0.033803 0.037766 70-74 0.050830 0.029740 75-79 0.065972 0.044248 , , Female, 1-2, Former 1-20 21+ 45-49 0.004339 0.001027 50-54 0.000921 0.004668 55-59 0.002595 0.006020 60-64 0.003650 0.008621 65-69 0.013485 0.012500 70-74 0.014831 0.025172 75-79 0.025806 0.057692 , , Male, 3-5, Former 1-20 21+ 45-49 0.001596 0.004175 50-54 0.004548 0.004889 55-59 0.007294 0.010258 60-64 0.013165 0.017901 65-69 0.023749 0.020810 70-74 0.044850 0.037129 75-79 0.077075 0.073298 , , Female, 3-5, Former 1-20 21+ 45-49 0.002120 0.001786 50-54 0.002895 0.002701 55-59 0.003759 0.003610 60-64 0.006509 0.006996 65-69 0.012632 0.016880 70-74 0.012500 0.016873 75-79 0.025907 0.031250 , , Male, 6-10, Former 1-20 21+ 45-49 0.002169 0.001226 50-54 0.003497 0.004029 55-59 0.005902 0.007440 60-64 0.012669 0.012207 65-69 0.018202 0.027664 70-74 0.038887 0.039888 75-79 0.049451 0.063830 , , Female, 6-10, Former 1-20 21+ 45-49 0.001072 0.002247 50-54 0.002009 0.001902 55-59 0.001658 0.004545 60-64 0.004708 0.005417 65-69 0.008648 0.008287 70-74 0.011263 0.028487 75-79 0.039604 0.029787 , , Male, 11-15, Former 1-20 21+ 45-49 0.001674 0.001983 50-54 0.002140 0.003939 55-59 0.004473 0.006685 60-64 0.008756 0.011000 65-69 0.016691 0.022681 70-74 0.031843 0.032686 75-79 0.056180 0.076661 , , Female, 11-15, Former 1-20 21+ 45-49 0.001359 0.001421 50-54 0.001213 0.001168 55-59 0.002022 0.004122 60-64 0.005706 0.003731 65-69 0.005866 0.007979 70-74 0.010705 0.016212 75-79 0.016667 0.028037 , , Male, >=16, Former 1-20 21+ 45-49 0.001595 0.001934 50-54 0.002504 0.003543 55-59 0.004366 0.005378 60-64 0.007030 0.009933 65-69 0.011592 0.012307 70-74 0.021949 0.024689 75-79 0.041289 0.050481 , , Female, >=16, Former 1-20 21+ 45-49 0.000910 0.001388 50-54 0.001721 0.000830 55-59 0.002472 0.001821 60-64 0.003197 0.003564 65-69 0.006180 0.005815 70-74 0.012721 0.013634 75-79 0.018615 0.021954 > > summary(smoke.rate[1:3,,1,,]) #test subscripting Rate table with 4 dimensions: age ranges from 45 to 55; with 3 categories amount has levels of: 1-20 21+ duration ranges from 0 to 16; with 6 categories status has levels of: Never Current Former > > proc.time() user system elapsed 0.881 0.097 0.969 survival/tests/stratatest.Rout.save0000644000176200001440000000453714607006645017316 0ustar liggesusers R version 2.7.1 (2008-06-23) Copyright (C) 2008 The R Foundation for Statistical Computing ISBN 3-900051-07-0 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > options(na.action=na.exclude) # preserve missings > options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type > library(survival) Loading required package: splines > > # > # Trivial test of stratified residuals > # Make a second strata = replicate of the first, and I should get the > # exact same answers > test1 <- data.frame(time= c(9, 3,1,1,6,6,8), + status=c(1,NA,1,0,1,1,0), + x= c(0, 2,1,1,1,0,0)) > test2 <- data.frame(start=c(1, 2, 5, 2, 1, 7, 3, 4, 8, 8), + stop =c(2, 3, 6, 7, 8, 9, 9, 9,14,17), + event=c(1, 1, 1, 1, 1, 1, 1, 0, 0, 0), + x =c(1, 0, 0, 1, 0, 1, 1, 1, 0, 0) ) > > temp <- as.matrix(test1) > n <- nrow(temp) > ndead<- sum(test1$status[!is.na(test1$status)]) > temp <- data.frame(rbind(temp, temp)) #later releases of S have rbind.data.frame > tstrat <- rep(1:2, c(n,n)) > > fit1 <- coxph(Surv(time, status) ~x, test1) > fit2 <- coxph(Surv(time, status) ~x + strata(tstrat), temp) > > all.equal(resid(fit1) , (resid(fit2))[1:n]) [1] TRUE > all.equal(resid(fit1, type='score') , (resid(fit2, type='score'))[1:n]) [1] TRUE > all.equal(resid(fit1, type='schoe') , (resid(fit2, type='schoe'))[1:ndead]) [1] TRUE > > > #AG model > temp <- as.matrix(test2) > n <- nrow(temp) > ndead<- sum(test2$event[!is.na(test2$event)]) > temp <- data.frame(rbind(temp, temp)) > tstrat <- rep(1:2, c(n,n)) > > fit1 <- coxph(Surv(start, stop, event) ~x, test2) > fit2 <- coxph(Surv(start, stop, event) ~x + strata(tstrat), temp) > > all.equal(resid(fit1) , (resid(fit2))[1:n]) [1] TRUE > all.equal(resid(fit1, type='score') , (resid(fit2, type='score'))[1:n]) [1] TRUE > all.equal(resid(fit1, type='schoe') , (resid(fit2, type='schoe'))[1:ndead]) [1] TRUE > survival/tests/multistate.R0000644000176200001440000000741214654222147015620 0ustar liggesusers# # Tests for multi-state Cox models # The default for multi-state is now ties='breslow' bb <- "breslow" # I'm a lazy typist library(survival) aeq <- function(x,y, ...) all.equal(as.vector(x), as.vector(y), ...) # There are a few subjects with progression and death on the same day. In the # usual multi-state data set only one will count. data1 <- mgus2 data1$etime <- with(data1, ifelse(pstat==1, ptime, futime)) data1$event <- factor(ifelse(data1$pstat==1, 1, 2L*data1$death), 0:2, c("censor", "PCM", "death")) # direct data set with 2 rows per subject, much like mstate package would do data2 <- mgus2[rep(1:nrow(mgus2) ,2), c("id", "age", "sex", "mspike")] data2$time <- rep(data1$etime, 2) data2$status <- 1* c(data1$event=="PCM", data1$event=="death") data2$type <- rep(c(2:3), each=nrow(mgus2)) fit1 <- coxph(Surv(etime, event) ~ age + sex + mspike, data1, id=id, x=TRUE, robust=FALSE) fit1a <- coxph(Surv(etime, event=="PCM") ~ age + sex + mspike, ties=bb, data1) fit1b <- coxph(Surv(etime, event=='death') ~ age + sex + mspike, ties=bb, data1) fit1c <- coxph(Surv(time, status) ~ strata(type)/(age + sex+ mspike), data2, x=TRUE, ties=bb) aeq(fit1$loglik, fit1a$loglik + fit1b$loglik) aeq(fit1$coef, c(fit1a$coef, fit1b$coef)) aeq(fit1$var[1:3, 1:3], fit1a$var) aeq(fit1$var[4:6, 4:6], fit1b$var) aeq(fit1$coef[c(1,4,2,5,3,6)], fit1c$coef) # force a common age effect across all states fit2 <- coxph(list(Surv(etime, event) ~ sex, 1:0 ~ age / common), data1, id=id) data2 <- rbind(cbind(data1, status= (data1$event=="PCM"), etype=1), cbind(data1, status= (data1$event=='death'), etype=2)) fit2a <- coxph(Surv(etime, status) ~ age + strata(etype)/sex, data2, ties=bb) aeq(coef(fit2), coef(fit2a)[c(2,1,3)]) # not in the same order aeq(fit2$loglik, fit2a$loglik) #same fit in more complex ways data1$entry <- "Entry" fit2b <- coxph(list(Surv(etime, event) ~ sex, "Entry":"PCM" + "Entry":"death" ~ age / common), istate=entry, data1, id=id) fit2c <- coxph(list(Surv(etime, event) ~ sex, "Entry":state(c("PCM", "death")) ~ age / common), istate=entry, data1, id=id) aeq(fit2b$loglik, fit2$loglik) aeq(fit2c$coef, fit2$coef) # mspike size as a covariate for PCM only # first, 4 different ways to write the same fit3 <- coxph(list(Surv(etime, event) ~ age + sex, 1:state("PCM") ~ mspike), data1, id=id) fit3b <- coxph(list(Surv(etime, event) ~ age + sex, 1:"PCM" ~ mspike), data1, id=id) fit3c <- coxph(list(Surv(etime, event) ~ age + sex, 1:c("PCM") ~ mspike), data1, id=id) fit3d <- coxph(list(Surv(etime, event) ~ age + sex + mspike, 1:3 ~ -mspike), data1, id=id) aeq(fit3b$coef, fit3$coef) aeq(fit3c$coef, fit3$coef) aeq(fit3d$coef, fit3$coef) data3 <- data2 data3$mspike[data3$etype==2] <- 0 fit3a <- coxph(Surv(etime, status) ~ strata(etype)/(age + sex + mspike), data3, ties=bb) aeq(fit3$loglik, fit3a$loglik) aeq(fit3$coef, fit3a$coef[c(1,3,5,2,4)]) # models with strata test1 <- coxph(Surv(etime, event=="PCM") ~ age + mspike + strata(sex), data1, ties=bb) test2 <- coxph(Surv(etime, event=="death") ~ age + strata(sex), data1, ties=bb) sfit1 <- coxph(list(Surv(etime, event) ~ age + strata(sex), 1:state("PCM") ~ mspike), data1, id=id, ties=bb) aeq(coef(sfit1), c(coef(test1), coef(test2))) test3 <- coxph(Surv(etime, event=="death") ~ age +sex, data1, ties=bb) sfit2 <- coxph(list(Surv(etime, event) ~ age + sex, 1:2 ~ mspike + strata(sex) - sex), data1, id=id) aeq(coef(sfit2), c(coef(test1), coef(test3))) survival/tests/expected.Rout.save0000644000176200001440000002635014607006645016716 0ustar liggesusers R Under development (unstable) (2023-01-09 r83585) -- "Unsuffered Consequences" Copyright (C) 2023 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > options(na.action=na.exclude) # preserve missings > options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type > library(survival) > > # Tests of expected survival > aeq <- function(x,y) all.equal(as.vector(x), as.vector(y)) > # > # This makes several scripts easier > # > mdy.Date <- function(m, d, y) { + y <- ifelse(y<100, y+1900, y) + as.Date(paste(m,d,y, sep='/'), "%m/%d/%Y") + } > > # This function takes a single subject and walks down the rate table > # Input: the vector of starting points, futime, and a ratetable > # Output: the full history of walking through said table. Let n= #unique > # rates that were used > # cell = n by #dims of the table: index of the table cell > # days = time spent in cell > # hazard= accumulated hazard = days * rate > # This does not do date or factor conversions -- start has to be numeric > # > ratewalk <- function(start, futime, ratetable=survexp.us) { + if (!is.ratetable(ratetable)) stop("Bad rate table") + ratedim <- dim(ratetable) + nvar <- length(ratedim) + if (length(start) != nvar) stop("Wrong length for start") + if (futime <=0) stop("Invalid futime") + + attR <- attributes(ratetable) + discrete <- (attR$type ==1) #discrete categories + + maxn <- sum(!discrete)*prod(ratedim[!discrete]) #most cells you can hit + cell <- matrix(0, nrow=maxn, ncol=nvar) + days <- hazard <- double(maxn) + + eps <- 1e-8 #Avoid round off error + n <- 0 + while (futime >0) { + n <- n+1 + #what cell am I in? + # Note that at the edges of the rate table, we use the edge: if + # it only goes up the the year 2000, year 2000 is used for any + # dates beyond. This effectively eliminates one boundary + cell[n,discrete] <- start[discrete] + edge <- futime #time to nearest edge, or finish + for (j in which(!discrete)) { + indx <- sum(start[j] >= attR$cutpoints[[j]]-eps) + cell[n, j] <- max(1, indx) + if (indx < ratedim[j]) + edge <- min(edge, (attR$cutpoints[[j]])[indx+1] - start[j]) + } + days[n] <- edge #this many days in the cell + # using a matrix as a subscript is so handy sometimes + hazard[n] <- edge * (as.matrix(ratetable))[cell[n,,drop=F]] + futime <- futime - edge #amount of time yet to account for + start[!discrete] <- start[!discrete] + edge #walk forward in time + } + list(cell=cell[1:n,], days=days[1:n], hazard=hazard[1:n]) + } > > # Simple test of ratewalk: 20 years old, start on 7Sep 1960 > # 116 days at the 1960, 20 year old male rate, through the end of the day > # on 12/31/1960, then 84 days at the 1961 rate. > # The decennial q for 1960 males is .00169. > zz <- ratewalk(c(20.4*365.25, 1, as.Date("1960/09/07")), 200) > all.equal(zz$hazard[1], -(116/365.25)*log(1-.00169)) [1] TRUE > all.equal(zz$days, c(116,84)) [1] TRUE > > > # > # Simple case 1: a single male subject, born 1/1/36 and entered on study 1/2/55 > # > # Compute the 1, 5, 10 and 12 year expected survival > > temp1 <- mdy.Date(1,1,36) > temp2 <- mdy.Date(1,2,55) > exp1 <- survexp(~1, ratetable=survexp.usr,times=c(366, 1827, 3653, 4383), + rmap= list(year=temp2, age=(temp2-temp1), sex=1, race='white')) > > t12 <- as.numeric(temp2-temp1) # difftimes are a PITA > h1 <- ratewalk(c(t12, 1, 1, temp2), 366, survexp.usr) > h2 <- ratewalk(c(t12, 1, 1, temp2), 1827, survexp.usr) > h3 <- ratewalk(c(t12, 1, 1, temp2), 3653, survexp.usr) > h4 <- ratewalk(c(t12, 1, 1, temp2), 4383, survexp.usr) > > aeq(-log(exp1$surv), c(sum(h1$hazard), sum(h2$hazard), sum(h3$hazard), + sum(h4$hazard))) [1] TRUE > > # pyears should give the same result > dummy <- data.frame(time = 4383, + year=temp2, sex = 1, age= temp2-temp1, race="white") > cuts <- tcut(0, c(0, 366, 1827, 3653, 4383)) > exp1c <- pyears(time ~ cuts, data=dummy, ratetable=survexp.usr) > aeq(exp1$surv, exp(-cumsum(exp1c$expected))) [1] TRUE > > > # Just a little harder: > # Born 3/1/25 and entered the study on 6/10/55. The code creates shifted > # dates to align with US rate tables - entry is 59 days earlier (days from > # 1/1/1925 to 3/1/1925). > # > temp1 <- mdy.Date(3,1,25) > temp2 <- mdy.Date(6,10,55) > exp1 <- survexp(~1, ratetable=survexp.usr,times=c(366, 1827, 3653, 4383), + rmap= list(year=temp2, age=(temp2-temp1), sex=2, race='black')) > > tyear <- temp2 - 59 > t12 <- as.numeric(temp2-temp1) > h1 <- ratewalk(c(t12, 2, 2, tyear), 366, survexp.usr) > h2 <- ratewalk(c(t12, 2, 2, tyear), 1827, survexp.usr) > h3 <- ratewalk(c(t12, 2, 2, tyear), 3653, survexp.usr) > h4 <- ratewalk(c(t12, 2, 2, tyear), 4383, survexp.usr) > > aeq(-log(exp1$surv), c(sum(h1$hazard), sum(h2$hazard), sum(h3$hazard), + sum(h4$hazard))) [1] TRUE > > # > # Simple case 2: make sure that the averages are correct, for Ederer method > # > # Compute the 1, 5, 10 and 12 year expected survival > > temp1 <- mdy.Date(1:6,6:11,1890:1895) > temp2 <- mdy.Date(6:1,11:6,c(55:50)) > temp3 <- c(1,2,1,2,1,2) > age <- temp2 - temp1 > > exp1 <- survexp(~1, rmap= list(year=temp2, age=(temp2-temp1), sex=temp3), + times=c(366, 1827, 3653, 4383)) > exp2 <- survexp(~ I(1:6), + rmap= list(year=temp2, age=(temp2-temp1), sex=temp3), + times=c(366, 1827, 3653, 4383)) > exp3 <- exp2$surv > for (i in 1:length(temp1)){ + exp3[,i] <- survexp(~ 1, + rmap = list(year=temp2, age=(temp2-temp1), sex=temp3), + times=c(366, 1827, 3653, 4383), subset=i)$surv + } > > > print(aeq(exp2$surv, exp3)) [1] TRUE > print(all.equal(exp1$surv, apply(exp2$surv, 1, mean))) [1] TRUE > > # They agree, but are they right? > # > for (i in 1:length(temp1)) { + offset <- as.numeric(temp1[i] - mdy.Date(1,1, 1889+i)) + tyear = temp2[i] - offset + haz1 <- ratewalk(c(as.numeric(temp2-temp1)[i], temp3[i], tyear), 366) + haz2 <- ratewalk(c(as.numeric(temp2-temp1)[i], temp3[i], tyear), 1827) + haz3 <- ratewalk(c(as.numeric(temp2-temp1)[i], temp3[i], tyear), 3653) + haz4 <- ratewalk(c(as.numeric(temp2-temp1)[i], temp3[i], tyear), 4383) + print(aeq(-log(exp2$surv[,i]), c(sum(haz1$hazard), sum(haz2$hazard), + sum(haz3$hazard), sum(haz4$hazard)))) + } [1] TRUE [1] TRUE [1] TRUE [1] TRUE [1] TRUE [1] TRUE > > # > # Check that adding more time points doesn't change things > # > exp4 <- survexp(~ I(1:6), + rmap= list(year=temp2, age=(temp2-temp1), sex=temp3), + times=sort(c(366, 1827, 3653, 4383, 30*(1:100)))) > aeq(exp4$surv[match(exp2$time, exp4$time),], exp2$surv) [1] TRUE > > exp4 <- survexp(~1, + rmap = list(year=temp2, age=(temp2-temp1), sex=temp3), + times=sort(c(366, 1827, 3653, 4383, 30*(1:100)))) > aeq(exp1$surv, exp4$surv[match(exp1$time, exp4$time, nomatch=0)]) [1] TRUE > > > # > # Now test Hakulinen's method, assuming an analysis date of 3/1/57 > # > futime <- mdy.Date(3,1,57) - temp2 > xtime <- sort(c(futime, 30, 60, 185, 365)) > > exp1 <- survexp(futime ~ 1, rmap= list(year=temp2, age=(temp2-temp1), sex=1), + times=xtime, conditional=F) > exp2 <- survexp(~ I(1:6), times=futime, + rmap= list(year=temp2, age=(temp2-temp1), sex=1)) > > wt <- rep(1,6) > con <- double(6) > for (i in 1:6) { + con[i] <- sum(exp2$surv[i,i:6])/sum(wt[i:6]) + wt <- exp2$surv[i,] + } > > exp1$surv[match(futime, xtime)] [1] 0.9557362 0.9285840 0.9025661 0.8774220 0.8532489 0.8297416 > aeq(exp1$surv[match(futime, xtime)], cumprod(con)) [1] TRUE > > > # > # Now for the conditional method > # > exp1 <- survexp(futime ~ 1, rmap= list(year=temp2, age=(temp2-temp1), sex=1), + times=xtime, conditional=T) > > cond <- exp2$surv > for (i in 6:2) cond[i,] <- (cond[i,]/cond[i-1,]) #conditional survival > for (i in 1:6) con[i] <- exp(mean(log(cond[i, i:6]))) > > all.equal(exp1$surv[match(futime, xtime)], cumprod(con)) [1] TRUE > cumprod(con) [1] 0.9556656 0.9284398 0.9023612 0.8771798 0.8529944 0.8294940 > > # > # Test out expected survival, when the parent pop is another Cox model > # > test1 <- data.frame(time= c(4, 3,1,1,2,2,3), + status=c(1,NA,1,0,1,1,0), + x= c(0, 2,1,1,1,0,0)) > > fit <- coxph(Surv(time, status) ~x, test1, method='breslow') > > dummy <- data.frame(time=c(.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5), + status=c(1,0,1,0,1,0,1,1,1), x=(-4:4)/2) > > efit <- survexp(time ~ 1, rmap= list(x=x), dummy, ratetable=fit, cohort=F) > > # > # Now, compare to the true answer, which is known to us > # > ss <- exp(fit$coef) > haz <- c( 1/(3*ss+3), 2/(ss+3), 1) #truth at time 0,1,2,4+ > chaz <- cumsum(c(0,haz)) > chaz2 <- chaz[c(1,2,2,3,3,3,3,4,4)] > > risk <- exp(fit$coef*dummy$x) > efit2 <- exp(-risk*chaz2) > > all.equal(as.vector(efit), as.vector(efit2)) #ignore mismatched name attrib [1] TRUE > > # > # Now test the direct-adjusted curve (Ederer) > # > efit <- survexp( ~ 1, dummy, ratetable=fit, se=F) > direct <- survfit(fit, newdata=dummy, censor=FALSE)$surv > > chaz <- chaz[-1] #drop time 0 > d2 <- exp(outer(-chaz, risk)) > all.equal(as.vector(direct), as.vector(d2)) #this tests survfit [1] TRUE > > all.equal(as.vector(efit$surv), as.vector(apply(direct,1,mean))) #direct [1] TRUE > > # Check out the "times" arg of survexp > efit2 <- survexp( ~1, dummy, ratetable=fit, se=F, + times=c(.5, 2, 3.5,6)) > aeq(efit2$surv, c(1, efit$surv[c(2,2,3)])) [1] TRUE > > # > # Now test out the Hakulinen method (Bonsel's method) > # By construction, we have a large correlation between x and censoring > # > # In theory, hak1 and hak2 would be the same. In practice, like a KM and > # F-H, they differ when n is small. > # > efit <- survexp( time ~1, dummy, ratetable=fit, se=F) > > surv <- wt <- rep(1,9) > tt <- c(1,2,4) > hak1 <- hak2 <- NULL > for (i in 1:3) { + wt[dummy$time < tt[i]] <- 0 + hak1 <- c(hak1, exp(-sum(haz[i]*risk*surv*wt)/sum(surv*wt))) + hak2 <- c(hak2, sum(exp(-haz[i]*risk)*surv*wt)/sum(surv*wt)) + surv <- surv * exp(-haz[i]*risk) + } > > all.equal(as.vector(efit$surv), as.vector(cumprod(hak1))) [1] TRUE > > # > # Now do the conditional estimate > # > efit <- survexp( time ~ 1, dummy, ratetable=fit, se=F, + conditional=T) > wt <- rep(1,9) > cond <- NULL > for (i in 1:3) { + wt[dummy$time < tt[i]] <- 0 + cond <- c(cond, exp(-sum(haz[i]*risk*wt)/sum(wt))) + } > > all.equal(as.vector(efit$surv), as.vector(cumprod(cond))) [1] TRUE > > proc.time() user system elapsed 1.083 0.077 1.150 survival/tests/survfit2.Rout.save0000644000176200001440000000230014607006645016666 0ustar liggesusers R Under development (unstable) (2019-01-23 r76006) -- "Unsuffered Consequences" Copyright (C) 2019 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(survival) > # > # Check out the Dory&Korn confidence interval option > # > tdata <- data.frame(time= 1:10, + status=c(1,0,1,0,1,0,0,0,1,0)) > > fit1 <- survfit(Surv(time, status) ~1, tdata, conf.lower='modified') > fit2 <- survfit(Surv(time, status) ~1, tdata) > > stdlow <- fit2$std.err * sqrt(c(1, 10/9, 1, 8/7, 1, 6/5, 6/4, 6/3, 1, 2/1)) > lower <- exp(log(fit2$surv) - qnorm(.975)*stdlow) > all.equal(fit1$lower, lower, check.attributes=FALSE) [1] TRUE > > proc.time() user system elapsed 0.748 0.036 0.786 survival/tests/nested.R0000644000176200001440000000065014607006645014705 0ustar liggesuserslibrary(survival) # # A test of nesting. It makes sure the model.frame is built correctly # tfun <- function(fit, mydata) { survfit(fit, newdata=mydata) } myfit <- coxph(Surv(time, status) ~ age + factor(sex), lung) temp1 <- tfun(myfit, lung[1:5,]) temp2 <- survfit(myfit, lung[1:5,]) indx <- match('call', names(temp1)) #the call components won't match all.equal(unclass(temp1)[-indx], unclass(temp2)[-indx]) survival/tests/anova.Rout.save0000644000176200001440000000357214607006645016222 0ustar liggesusers R version 3.0.0 (2013-04-03) -- "Masked Marvel" Copyright (C) 2013 The R Foundation for Statistical Computing Platform: i686-pc-linux-gnu (32-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > # > # Test out anova, with strata terms > # > options(na.action=na.omit) > library(survival) Loading required package: splines > > fit1 <- coxph(Surv(time, status) ~ ph.ecog + wt.loss + strata(sex) + + poly(age,3), lung) > ztemp <- anova(fit1) > > tdata <- na.omit(lung[, c('time', 'status', 'ph.ecog', 'wt.loss', 'sex', 'age')]) > fit2 <- coxph(Surv(time, status)~ ph.ecog + wt.loss + poly(age,3) + strata(sex), + data=tdata) > ztemp2 <- anova(fit2) > all.equal(ztemp, ztemp2) [1] TRUE > > > fit2 <- coxph(Surv(time, status) ~ ph.ecog + wt.loss + strata(sex), tdata) > fit3 <- coxph(Surv(time, status) ~ ph.ecog + strata(sex), tdata) > > all.equal(ztemp$loglik, c(fit1$loglik[1], fit3$loglik[2], fit2$loglik[2], + fit1$loglik[2])) [1] TRUE > all.equal(ztemp$Chisq[-1], 2* diff(ztemp$loglik)) [1] TRUE > all.equal(ztemp$Df[-1], c(1,1,3)) [1] TRUE > > ztemp2 <- anova(fit3, fit2, fit1) > all.equal(ztemp2$loglik, ztemp$loglik[-1]) [1] TRUE > all.equal(ztemp2$Chisq[2:3], ztemp$Chisq[3:4]) [1] TRUE > # Change from ztemp2$P; it's a data frame and in R 3.0.2 abbreviated names > # give a warning > all.equal(ztemp2[[4]][2:3], ztemp[[4]][3:4]) [1] TRUE > > > > proc.time() user system elapsed 0.284 0.020 0.301 survival/tests/yates0.Rout.save0000644000176200001440000000627414607006645016325 0ustar liggesusers R Under development (unstable) (2022-07-22 r82614) -- "Unsuffered Consequences" Copyright (C) 2022 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > # > # This code verified some oddities about model frames > # > options(contrasts=c("contr.treatment", "contr.poly")) # clean slate > > tdata <- data.frame(y=1:10, x1=letters[c(1,2,3,1,2,3,1,2,3,1)], + x2=LETTERS[c(1,2,3,4,1,2,3,4,1,2)], + stringsAsFactors=TRUE) > tdata$x3 <- as.character(tdata$x1) > > fit1 <- lm(y ~ x1 + x2, tdata, x=TRUE) > m1 <- fit1$model > t1 <- terms(fit1) > > # Lesson 1: xlev is ignored when the variable already has levels > # as an attribute > > temp <- list(x1=c("a", "c", "b"), x2=LETTERS[4:1]) > x2 <- model.matrix(t1, m1, xlev=temp) > x3 <- model.matrix(t1, tdata, xlev=temp) > > all.equal(x2, fit1$x) [1] TRUE > !is.logical(all.equal(x2, x3)) # x2 and x3 do not agree [1] TRUE > attributes(m1$x1) $levels [1] "a" "b" "c" $class [1] "factor" > > > # Lesson 2: character variables do not have their levels > # remembered as attributes in the model frame, but these are > # found in fit$xlevels and fit$contrasts. > # However, the xlev argument is still ignored for a model frame! > fit2 <- lm(y ~ x3 + x2, tdata, x=TRUE) > m2 <- fit2$model > x3 <- model.matrix(terms(fit2), m2, xlev=fit2$xlevels) > x4 <- model.matrix(terms(fit2), m2, xlev=list(x3=letters[3:1])) > x5 <- model.matrix(terms(fit2), tdata, xlev=list(x3=letters[3:1])) > all.equal(fit2$x, x3) [1] TRUE > all.equal(x3, x4) [1] TRUE > all.equal(x3, x5) # FALSE [1] "Attributes: < Component \"dimnames\": Component 2: 1 string mismatch >" [2] "Mean relative difference: 2.333333" > > > # Lesson 3: contrasts.arg is relevant, even when the model frame > # has a saved contrast > ctemp <- list(x1="contr.SAS", x2= contr.helmert(LETTERS[1:4])) > x4 <- model.matrix(t1, m1, contrasts.arg=ctemp) # no saved contrast > > fit3 <- lm(y ~ x1 + C(x2, contr.SAS), tdata) > m3 <- fit3$model > attr(m3[[3]], 'contr') # the contrast is saved A B C A 1 0 0 B 0 1 0 C 0 0 1 D 0 0 0 > c2 <- ctemp > names(c2) <- names(fit3$contrasts) > x5 <- model.matrix(terms(fit3), m3, contrasts.arg=c2) > all.equal(x4, x5, check.attributes=FALSE) [1] TRUE > > # Lesson 4: and this holds for a character variable as well > fit4 <- lm(y ~ x3 + C(x2, contr.SAS), tdata, x=TRUE) > c4 <- ctemp > names(c4) <- names(fit4$contrasts) > x6 <- model.matrix(terms(fit4), fit4$model, xlev=fit4$xlevels, contrasts.arg=c4) > x7 <- model.matrix(terms(fit4), fit4$model, contrasts.arg=c4) > all.equal(x7, x6) [1] TRUE > all.equal(x7, x5, check.attributes=FALSE) [1] TRUE > > proc.time() user system elapsed 0.128 0.017 0.138 survival/tests/book2.R0000644000176200001440000000771114613770353014445 0ustar liggesuserslibrary(survival) options(na.action=na.exclude) # preserve missings options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type # # Tests from the appendix of Therneau and Grambsch # b. Data set 1 and Efron estimate # test1 <- data.frame(time= c(9, 3,1,1,6,6,8), status=c(1,NA,1,0,1,1,0), x= c(0, 2,1,1,1,0,0)) byhand <- function(beta, newx=0) { r <- exp(beta) loglik <- 2*beta - (log(3*r +3) + log((r+5)/2) + log(r+3)) u <- (30 + 23*r - r^3)/ ((r+1)*(r+3)*(r+5)) tfun <- function(x) x - x^2 imat <- tfun(r/(r+1)) + tfun(r/(r+5)) + tfun(r/(r+3)) # The matrix of weights, one row per obs, one col per time # Time of 1, 6, 6+0 (second death), and 9 wtmat <- matrix(c(1,1,1,1,1,1, 0,0,1,1,1,1, 0,0,.5, .5, 1,1, 0,0,0,0,0,1), ncol=4) wtmat <- diag(c(r,r,r,1,1,1)) %*% wtmat x <- c(1,1,1,0,0,0) status <- c(1,0,1,1,0,1) xbar <- colSums(wtmat*x)/ colSums(wtmat) haz <- 1/ colSums(wtmat) # one death at each of the times hazmat <- wtmat %*% diag(haz) #each subject's hazard over time mart <- status - rowSums(hazmat) a <- r+1; b<- r+3; d<- r+5 # 'c' in the book, 'd' here score <- c((2*r + 3)/ (3*a^2), -r/ (3*a^2), (675+ r*(1305 +r*(756 + r*(-4 +r*(-79 -13*r)))))/(3*(a*b*d)^2), r*(1/(3*a^2) - a/(2*b^2) - b/(2*d^2)), 2*r*(177 + r*(282 +r*(182 + r*(50 + 5*r)))) /(3*(a*b*d)^2), 2*r*(177 + r*(282 +r*(182 + r*(50 + 5*r)))) /(3*(a*b*d)^2)) # Schoenfeld residual d <- mean(xbar[2:3]) scho <- c(1/(r+1), 1- d, 0- d , 0) surv <- exp(-cumsum(haz)* exp(beta*newx))[c(1,3,4)] varhaz.g <- cumsum(haz^2) # since all numerators are 1 varhaz.d <- cumsum((newx-xbar) * haz) varhaz <- (varhaz.g + varhaz.d^2/ imat) * exp(2*beta*newx) list(loglik=loglik, u=u, imat=imat, xbar=xbar, haz=haz, mart=mart, score=score, var.g=varhaz.g, var.d=varhaz.d, scho=scho, surv=surv, var=varhaz[c(1,3,4)]) } aeq <- function(x,y) all.equal(as.vector(x), as.vector(y)) fit0 <-coxph(Surv(time, status) ~x, test1, iter=0) truth0 <- byhand(0,0) aeq(truth0$loglik, fit0$loglik[1]) aeq(1/truth0$imat, fit0$var) aeq(truth0$mart, fit0$residuals[c(2:6,1)]) aeq(resid(fit0), c(-3/4, NA, 5/6, -1/6, 5/12, 5/12, -3/4)) aeq(truth0$scho, resid(fit0, 'schoen')) aeq(truth0$score, resid(fit0, 'score')[c(3:7,1)]) sfit <- survfit(fit0, list(x=0), censor=FALSE) aeq(sfit$std.err^2, truth0$var) aeq(sfit$surv, truth0$surv) fit <- coxph(Surv(time, status) ~x, test1, eps=1e-8, nocenter=NULL) aeq(round(fit$coefficients,6), 1.676857) truebeta <- log(cos(acos((45/23)*sqrt(3/23))/3) * 2* sqrt(23/3)) truth <- byhand(truebeta, 0) aeq(truth$loglik, fit$loglik[2]) aeq(1/truth$imat, fit$var) aeq(truth$mart, fit$residuals[c(2:6,1)]) aeq(truth$scho, resid(fit, 'schoen')) aeq(truth$score, resid(fit, 'score')[c(3:7,1)]) # Per comments in the source code, the below is expected to fail for Efron # at the tied death times. (When predicting for new data, predict # treats a time in the new data set that exactly matches one in the original # as being just after the original, i.e., experiences the full hazard # jump there, in the same way that censors do.) expect <- predict(fit, type='expected', newdata=test1) #force recalc use <- !(test1$time==6 | is.na(test1$status)) aeq(test1$status[use] - resid(fit)[use], expect[use]) sfit <- survfit(fit, list(x=0), censor=FALSE) aeq(sfit$surv, truth$surv) aeq(sfit$std.err^2, truth$var) # # Done with the formal test, now print out lots of bits # resid(fit) resid(fit, 'scor') resid(fit, 'scho') predict(fit, type='lp') predict(fit, type='risk') predict(fit, type='expected') predict(fit, type='terms') predict(fit, type='lp', se.fit=T) predict(fit, type='risk', se.fit=T) predict(fit, type='expected', se.fit=T) predict(fit, type='terms', se.fit=T) summary(survfit(fit)) summary(survfit(fit, list(x=2))) survival/tests/checkSurv2.R0000644000176200001440000000627714607006645015455 0ustar liggesuserslibrary(survival) # # check of the Surv2 function # # Build a flat form of the mgus2 data set. Mix up the data set order, to test # out that part of the underlying code. set.seed(1953) m2 <- mgus2[sample(1:nrow(mgus2), nrow(mgus2),replace=FALSE),] temp1 <- data.frame(m2[,1:7], ftime=0) temp2 <- with(subset(m2, pstat==1), data.frame(id=id, ftime=ptime, event="progression")) # competing risks: use only the first of death and progression temp3 <- with(subset(m2, pstat==0), data.frame(id=id, ftime=futime, event=ifelse(death==0, "censor", "death"))) mflat <- merge(temp1, rbind(temp2, temp3), all=TRUE) mflat$event <- factor(mflat$event, c("censor", "progression", "death")) sfit1 <- survfit(Surv2(ftime, event) ~ sex, mflat, id=id) # now compare it to the usual way etime <- with(mgus2, ifelse(pstat==1, ptime, futime)) estat <- with(mgus2, ifelse(pstat==1, 1, 2*death)) estat <- factor(estat, 0:2, c("censor", "progression", "death")) sfit2 <- survfit(Surv(etime, estat) ~ sex, mgus2) all.equal(sfit1$pstate, sfit2$pstate) # Cox model cfit1 <- coxph(Surv2(ftime, event) ~ sex + age, data=mflat, id=id) cfit2 <- coxph(Surv(etime, estat) ~ sex + age, data=mgus2, id=id) all.equal(cfit1[c("coefficients", "var", "loglik", "score")], cfit2[c("coefficients", "var", "loglik", "score")]) # And using the explicit call to build a data set sdata <- Surv2data(Surv2(ftime, event) ~ ., data=mflat, id=id) cfit3 <- coxph(Surv2.y ~ sex + age, data=sdata, id=id) all.equal(cfit1[c("coefficients", "var", "loglik", "score")], cfit3[c("coefficients", "var", "loglik", "score")]) # Create a data set with error = two events on the same day # A model with this data will generate an error. temp4 <- with(m2, data.frame(id=id, ftime=futime, event=ifelse(death==0, "censor", "death"))) mflat2 <- merge(temp1, rbind(temp2, temp4), all=TRUE) mflat2$event <- factor(mflat2$event, c("censor", "prog", "death")) stemp <- survcheck(Surv2(ftime, event) ~ sex, data=mflat2, id=id) all.equal(stemp$duplicate$row, which(duplicated(mflat2[,c("id", "ftime")]))) # Full 3 state model. We need to make progressions that are tied with # deaths be just a bit sooner. temp2b <- with(subset(m2, pstat==1), data.frame(id=id, ftime= ifelse(ptime==futime & death==1, ptime-.1, ptime), event="progression")) temp3b <- with(m2, data.frame(id=id, ftime=futime, event=ifelse(death==0, "censor", "death"))) mflat3 <- merge(temp1, rbind(temp2b, temp3b), all=TRUE) mflat3$event <- factor(mflat3$event, c("censor", "progression", "death")) cfit4 <- coxph(Surv2(ftime, event) ~ sex + age + mspike, mflat3, id=id) # For a standard start-stop data set use tmerge m3 <- tmerge(m2[,1:7], subset(m2,,c(id, futime, death)), id=id, event= event(futime, 2*death)) m3 <- tmerge(m3, temp2b, id=id, event= event(ftime)) m3$event <- factor(m3$event, 0:2, c("censor", "progression", "death")) cfit5 <- coxph(Surv(tstart, tstop, event) ~ sex + age + mspike, m3, id=id) all.equal(cfit4[c("coefficients", "var", "loglik", "score")], cfit5[c("coefficients", "var", "loglik", "score")]) survival/tests/multi2.Rout.save0000644000176200001440000002057414654222147016332 0ustar liggesusers R Under development (unstable) (2024-06-14 r86747) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: aarch64-unknown-linux-gnu R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(survival) > aeq <- function(x, y, ...) all.equal(as.vector(x), as.vector(y), ...) > > # Check that estimates from a multi-state model agree with single state models > # Use a simplified version of the myeloid data set > tdata <- tmerge(myeloid[,1:3], myeloid, id=id, death=event(futime,death), + priortx = tdc(txtime), sct= event(txtime)) > tdata$event <- factor(with(tdata, sct + 2*death), 0:2, + c("censor", "sct", "death")) > fit <- coxph(Surv(tstart, tstop, event) ~ trt + sex, tdata, id=id, + iter=4, x=TRUE, robust=FALSE) > > # Multi-state now defaults to breslow rather than efron > fit12 <- coxph(Surv(tstart, tstop, event=='sct') ~ trt + sex, tdata, + subset=(priortx==0), iter=4, x=TRUE, method='breslow') > fit13 <- coxph(Surv(tstart, tstop, event=='death') ~ trt + sex, tdata, + subset=(priortx==0), iter=4, x=TRUE, method= 'breslow') > fit23 <- coxph(Surv(tstart, tstop, event=='death') ~ trt + sex, tdata, + subset=(priortx==1), iter=4, x=TRUE, method="breslow") > aeq(coef(fit), c(coef(fit12), coef(fit13), coef(fit23))) [1] TRUE > aeq(fit$loglik, fit12$loglik + fit13$loglik + fit23$loglik) [1] TRUE > temp <- matrix(0, 6,6) > temp[1:2, 1:2] <- fit12$var > temp[3:4, 3:4] <- fit13$var > temp[5:6, 5:6] <- fit23$var > aeq(fit$var, temp) [1] TRUE > > # check out model.frame > fita <- coxph(Surv(tstart, tstop, event) ~ trt, tdata, id=id) > fitb <- coxph(Surv(tstart, tstop, event) ~ trt, tdata, id=id, model=TRUE) > all.equal(model.frame(fita), fitb$model) [1] "Component \"trt\": 'current' is not a factor" > # model.frame fails due to an interal rule in R, factors vs characters > # result when the xlev arg is in the call. So model.frame(fita) has trt > # as a factor, not character. > > #check residuals > indx1 <- which(fit$rmap[,2] ==1) > indx2 <- which(fit$rmap[,2] ==2) > indx3 <- which(fit$rmap[,2] ==3) > aeq(residuals(fit), c(residuals(fit12), residuals(fit13), residuals(fit23))) [1] TRUE > aeq(residuals(fit)[indx1], residuals(fit12)) [1] TRUE > aeq(residuals(fit)[indx2], residuals(fit13)) [1] TRUE > aeq(residuals(fit)[indx3], residuals(fit23)) [1] TRUE > > # score residuals > temp <- residuals(fit, type='score') > aeq(temp[indx1, 1:2], residuals(fit12, type='score')) [1] TRUE > aeq(temp[indx2, 3:4], residuals(fit13, type='score')) [1] TRUE > aeq(temp[indx3, 5:6], residuals(fit23, type='score')) [1] TRUE > > all(temp[indx1, 3:6] ==0) [1] TRUE > all(temp[indx2, c(1,2,5,6)] ==0) [1] TRUE > all(temp[indx3, 1:4]==0) [1] TRUE > > temp <- residuals(fit, type="dfbeta") > all(temp[indx1, 3:6] ==0) [1] TRUE > all(temp[indx2, c(1,2,5,6)] ==0) [1] TRUE > all(temp[indx3, 1:4]==0) [1] TRUE > aeq(temp[indx1, 1:2], residuals(fit12, type='dfbeta')) [1] TRUE > aeq(temp[indx2, 3:4], residuals(fit13, type='dfbeta')) [1] TRUE > aeq(temp[indx3, 5:6], residuals(fit23, type='dfbeta')) [1] TRUE > > temp <- residuals(fit, type="dfbetas") > all(temp[indx1, 3:6] ==0) [1] TRUE > all(temp[indx2, c(1,2,5,6)] ==0) [1] TRUE > all(temp[indx3, 1:4]==0) [1] TRUE > aeq(temp[indx1, 1:2], residuals(fit12, type='dfbetas')) [1] TRUE > aeq(temp[indx2, 3:4], residuals(fit13, type='dfbetas')) [1] TRUE > aeq(temp[indx3, 5:6], residuals(fit23, type='dfbetas')) [1] TRUE > > # Schoenfeld and scaled shoenfeld have one row per event > sr1 <- residuals(fit12, type="schoenfeld") > sr2 <- residuals(fit13, type="schoenfeld") > sr3 <- residuals(fit23, type="schoenfeld") > end <- rep(1:3, c(nrow(sr1), nrow(sr2), nrow(sr3))) > temp <- residuals(fit, type="schoenfeld") > aeq(temp[end==1, 1:2], sr1) [1] TRUE > aeq(temp[end==2, 3:4], sr2) [1] TRUE > aeq(temp[end==3, 5:6], sr3) [1] TRUE > all(temp[end==1, 3:6] ==0) [1] TRUE > all(temp[end==2, c(1,2,5,6)] ==0) [1] TRUE > all(temp[end==3, 1:4] ==0) [1] TRUE > > > #The scaled Schoenfeld don't agree, due to the use of a robust > # variance in fit, regular variance in fit12, fit13 and fit23 > #Along with being scaled by different event counts > xfit <- fit > xfit$var <- xfit$naive.var > if (FALSE) { + xfit <- fit + xfit$var <- xfit$naive.var # fixes the first issue + temp <- residuals(xfit, type="scaledsch") + aeq(d1* temp[sindx1, 1:2], residuals(fit12, type='scaledsch')) + aeq(temp[sindx2, 3:4], residuals(fit13, type='scaledsch')) + aeq(temp[sindx3, 5:6], residuals(fit23, type='scaledsch')) + } > > if (FALSE) { # the predicted values are a work in progress + # predicted values differ because of different centering + c0 <- sum(fit$mean * coef(fit)) + c12 <- sum(fit12$mean * coef(fit12)) + c13 <- sum(fit13$mean* coef(fit13)) + c23 <- sum(fit23$mean * coef(fit23)) + + aeq(predict(fit)+c0, c(predict(fit12)+c12, predict(fit13)+c13, + predict(fit23)+c23)) + aeq(exp(predict(fit)), predict(fit, type='risk')) + + # expected survival is independent of centering + aeq(predict(fit, type="expected"), c(predict(fit12, type="expected"), + predict(fit13, type="expected"), + predict(fit23, type="expected"))) + } > # predict(type='terms') is a matrix, centering changes as well > if (FALSE) { + temp <- predict(fit, type='terms') + all(temp[indx1, 3:6] ==0) + all(temp[indx2, c(1,2,5,6)] ==0) + all(temp[indx3, 1:4]==0) + aeq(temp[indx1, 1:2], predict(fit12, type='terms')) + aeq(temp[indx2, 3:4], predict(fit13, type='terms')) + aeq(temp[indx3, 5:6], predict(fit23, type='terms')) + } # end of prediction section > > # The global and per strata zph tests will differ for the KM or rank > # transform, because the overall and subset will have a different list > # of event times, which changes the transformed value for all of them. > # But identity and log are testable. > test_a <- cox.zph(fit, transform="log",global=FALSE) > test_a12 <- cox.zph(fit12, transform="log",global=FALSE) > test_a13 <- cox.zph(fit13, transform="log", global=FALSE) > test_a23 <- cox.zph(fit23, transform="log", global=FALSE) > aeq(test_a$y[test_a$strata==1, 1:2], test_a12$y) [1] TRUE > > aeq(test_a$table[1:2,], test_a12$table) [1] TRUE > aeq(test_a$table[3:4,], test_a13$table) [1] TRUE > aeq(test_a$table[5:6,], test_a23$table) [1] TRUE > > # check cox.zph fit - transform = 'identity' > test_b <- cox.zph(fit, transform="identity",global=FALSE) > test_b12 <- cox.zph(fit12, transform="identity",global=FALSE) > test_b13 <- cox.zph(fit13, transform="identity", global=FALSE) > test_b23 <- cox.zph(fit23, transform="identity", global=FALSE) > > aeq(test_b$table[1:2,], test_b12$table) [1] TRUE > aeq(test_b$table[3:4,], test_b13$table) [1] TRUE > aeq(test_b$table[5:6,], test_b23$table) [1] TRUE > > # check out subscripting of a multi-state zph > cname <- c("table", "x", "time", "y", "var") > sapply(cname, function(x) aeq(test_b[1:2]$x, test_b12$x)) table x time y var TRUE TRUE TRUE TRUE TRUE > sapply(cname, function(x) aeq(test_b[3:4]$x, test_b13$x)) table x time y var TRUE TRUE TRUE TRUE TRUE > sapply(cname, function(x) aeq(test_b[5:6]$x, test_b23$x)) table x time y var TRUE TRUE TRUE TRUE TRUE > > # check model.matrix > mat1 <- model.matrix(fit) > all.equal(mat1, fit$x) [1] TRUE > > # Check that the internal matix agrees (uses stacker, which is not exported) > mat2 <- model.matrix(fit12) > mat3 <- model.matrix(fit13) > mat4 <- model.matrix(fit23) > > # first reconstruct istate > tcheck <- survcheck(Surv(tstart, tstop, event) ~ 1, tdata, id=id) > temp <- survival:::stacker(fit$cmap, fit$smap, as.numeric(tcheck$istate), fit$x, + fit$y, NULL, fit$states) > aeq(temp$X[temp$transition==1, 1:2], mat2) [1] TRUE > aeq(temp$X[temp$transition==2, 3:4], mat3) [1] TRUE > aeq(temp$X[temp$transition==3, 5:6], mat4) [1] TRUE > > > > proc.time() user system elapsed 0.481 0.040 0.518 survival/tests/royston.Rout.save0000644000176200001440000000251314607006645016625 0ustar liggesusers R Under development (unstable) (2020-06-10 r78681) -- "Unsuffered Consequences" Copyright (C) 2020 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > # Verify the values found in the Royston paper > library(survival) > > pbc2 <- na.omit(pbc[,-1]) # no id variable, no missings > > pfit1 <- coxph(Surv(time, status==2) ~ . + log(bili) - bili, pbc2, + ties="breslow") > # backwards elimination was used to eliminate all but 8 > pfit2 <- coxph(Surv(time, status==2) ~ age + log(bili) + edema + albumin + + stage + copper, data=pbc2, ties="breslow") > > temp <- rbind(royston(pfit1), royston(pfit1, adjust=TRUE), + royston(pfit2), royston(pfit2, adjust=TRUE)) > all.equal(round(temp[,1], 2), c(2.86, 2.56, 2.69, 2.59)) [1] TRUE > > proc.time() user system elapsed 0.824 0.052 0.868 survival/tests/nsk.R0000644000176200001440000000120214607006645014210 0ustar liggesuserslibrary(survival) library(splines) # the nsk function should give the same solution as ns, but with a different # parameterization # xx <- runif(500, 1, 100) yy <- 10*log(xx) + rnorm(500, 0, 2) tdata <- data.frame(xx=xx, yy=yy) fit1 <- lm(yy ~ ns(xx, df=4), tdata, model=TRUE) fit2 <- lm(yy ~ nsk(xx, df=4, b=0), tdata) all.equal(predict(fit1), predict(fit2)) # same solution xattr <- attributes(fit1$model[[2]]) allknots <- sort(c(xattr$knots, xattr$Boundary.knots)) # knots that were used pred.knot <- predict(fit1, newdata=list(xx=allknots)) all.equal(pred.knot[-1] - pred.knot[1], coef(fit2)[-1], check.attributes = FALSE) survival/tests/model.matrix.Rout.save0000644000176200001440000000624614613770353017523 0ustar liggesusers R Under development (unstable) (2019-08-23 r77061) -- "Unsuffered Consequences" Copyright (C) 2019 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > options(na.action=na.exclude) # preserve missings > options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type > library(survival) > > # > # Test out the revised model.matrix code > # > test1 <- data.frame(time= c(9, 3,1,1,6,6,8), + status=c(1,NA,1,0,1,1,0), + x= c(0, 2,1,1,1,0,0), + z= factor(c('a', 'a', 'b', 'b', 'c', 'c', 'a')), + stringsAsFactors=FALSE) > > fit1 <- coxph(Surv(time, status) ~ z, test1, iter=1) > fit2 <- coxph(Surv(time, status) ~z, test1, x=T, iter=1) > all.equal(model.matrix(fit1), fit2$x) [1] TRUE > > # This has no level 'b', make sure dummies recode properly > test2 <- data.frame(time= c(9, 3,1,1,6,6,8), + status=c(1,NA,1,0,1,1,0), + x= c(0, 2,1,1,1,0,0), + z= factor(c('a', 'a', 'a', 'a', 'c', 'c', 'a')), + stringsAsFactors=FALSE) > > ftest <- model.frame(fit1, data=test2) > all.equal(levels(ftest$z), levels(test1$z)) [1] TRUE > > # xtest will have one more row than the others, since it does not delete > # the observation with a missing value for status > xtest <- model.matrix(fit1, data=test2) > dummy <- fit2$x > dummy[,1] <- 0 > all.equal(xtest[-2,], dummy, check.attributes=FALSE) [1] TRUE > > # The case of a strata by factor interaction > # Use iter=0 since there are too many covariates and it won't converge > test1$x2 <- factor(rep(1:2, length=7)) > fit3 <- coxph(Surv(time, status) ~ strata(x2)*z, test1, iter=0) > xx <- model.matrix(fit3) > all.equal(attr(xx, "assign"), c(2,2,3,3)) [1] TRUE > all.equal(colnames(xx), c("zb", "zc", "strata(x2)2:zb", + "strata(x2)2:zc")) [1] TRUE > all.equal(attr(xx, "contrasts"), + list("strata(x2)"= "contr.treatment", z="contr.treatment")) [1] TRUE > > fit3b <- coxph(Surv(time, status) ~ strata(x2)*z, test1, iter=0, x=TRUE) > all.equal(fit3b$x, xx) [1] TRUE > > > # A model with a tt term > fit4 <- coxph(Surv(time, status) ~ tt(x) + x, test1, iter=0, + tt = function(x, t, ...) x*t) > ff <- model.frame(fit4) > # There is 1 subject in the final risk set, 4 at risk at time 6, 6 at time 1 > # The .strata. variable numbers from last time point to first > all.equal(ff$.strata., rep(1:3, c(1, 4,6))) [1] TRUE > all.equal(ff[["tt(x)"]], ff$x* c(9,6,1)[ff$.strata.]) [1] TRUE > > xx <- model.matrix(fit4) > all.equal(xx[,1], ff[[2]], check.attributes=FALSE) [1] TRUE > > > proc.time() user system elapsed 0.746 0.041 0.781 survival/tests/predsurv.Rout.save0000644000176200001440000001262714613773236016775 0ustar liggesusers R Under development (unstable) (2024-04-29 r86493) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: aarch64-unknown-linux-gnu R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > # > # Verify that predict(coxfit) agrees with survfit for type= expected and survival > # This also acts as a check on summary.survfit > # > library(survival) > aeq <- function(x,y, ...) all.equal(as.vector(x), as.vector(y), ...) > > fit1 <- coxph(Surv(time, status) ~ age + ph.ecog, lung) > > d1 <- data.frame(age= 40 * 1:8*5, ph.ecog= rep(0:2, length=8)) > curves <- survfit(fit1, newdata= d1) > > # the status variable isn't used, but has to be there > d1a <- cbind(time= 1:8 * 60, status=1, d1) # unique time per subject > d1b <- cbind(time= 365, status=1, d1) # same time for each > > p1 <- predict(fit1, newdata= d1a, type="expected", se.fit=TRUE) > p2 <- predict(fit1, newdata= d1a, type="survival", se.fit=TRUE) > p3 <- predict(fit1, newdata= d1b, type="survival", se.fit=TRUE) > > csum1 <- summary(curves, time= 1:8 * 60) > aeq(p1$fit, diag(csum1$cumhaz)) [1] TRUE > aeq(p1$se.fit , diag(csum1$std.chaz)) [1] TRUE > aeq(p2$fit, diag(csum1$surv)) [1] TRUE > aeq(p2$se.fit, diag(csum1$std.err)) [1] TRUE > > csum2 <- summary(curves, time=365) > aeq(p3$fit,csum2$surv) [1] TRUE > aeq(p3$se.fit, csum2$std.err) [1] TRUE > > # Harder, add a strata > fit2 <- coxph(Surv(time, status) ~ age + ph.ecog + strata(sex), lung) > d2 <- data.frame(age= 40 + 1:8*5, ph.ecog= rep(0:2, length=8), sex=rep(1:2,4)) > curve2 <- survfit(fit2, newdata= d2[,1:2]) > > d2a <- cbind(time= 1:8 * 60, status=1, d2) # unique time per subject > d2b <- cbind(time= 365, status=1, d2) # same time for each > > p1 <- predict(fit2, newdata= d2a, type="expected", se.fit=TRUE) > p2 <- predict(fit2, newdata= d2a, type="survival", se.fit=TRUE) > p3 <- predict(fit2, newdata= d2b, type="survival", se.fit=TRUE) > > csum1 <- summary(curve2, time= 1:8 * 60, data.frame=TRUE) > dummy <- data.frame(data=1:8, time= 1:8*60, strata=paste0("sex=", rep(1:2,4))) > temp <- merge(dummy, csum1, all.x=TRUE) # select the correct rows from csum1 > aeq(p1$fit, temp$cumhaz) [1] TRUE > aeq(p1$se.fit , temp$std.chaz) [1] TRUE > aeq(p2$fit, temp$surv) [1] TRUE > aeq(p2$se.fit, temp$std.err) [1] TRUE > > csum2 <- summary(curve2, time=365) > indx <- cbind(rep(1:2, 4), 1:8) > aeq(p3$fit,csum2$surv[indx]) [1] TRUE > aeq(p3$se.fit, csum2$std.err[indx]) [1] TRUE > > # Repeat for (time1, time2) data > fit3 <- coxph(Surv(tstart, tstop, status) ~ age + treat, cgd) > d3 <- data.frame(age= c(2,12, 20,30, 40), + treat = rep(levels(cgd$treat), length=5)) > > curve3 <- survfit(fit3, newdata=d3) > > d3a <- cbind(tstart= c(0, 50, 100, 200, 250), + tstop = c(400, 140, 150, 260, 310), status=1, d3) > d3b <- cbind(tstart=0, tstop=365, status=1, d3) > > p4 <- predict(fit3, newdata=d3a, type='expected', se.fit=TRUE) > p5 <- predict(fit3, newdata=d3b, type='survival', se.fit=TRUE) > # type survival is only valid from the start of the curve forward, so no d3a > > alltime <- sort(unique(c(d3a$tstart, d3a$tstop))) > csum3 <-summary(curve3, times= alltime) > temp1 <- csum3$cumhaz[cbind(match(d3a$tstart, alltime), 1:nrow(d3a))] > temp2 <- csum3$cumhaz[cbind(match(d3a$tstop, alltime), 1:nrow(d3a))] > aeq(p4$fit, temp2- temp1) [1] TRUE > > temp3 <- csum3$std.chaz[cbind(match(d3a$tstart, alltime), 1:nrow(d3a))] > temp4 <- csum3$std.chaz[cbind(match(d3a$tstop, alltime), 1:nrow(d3a))] > aeq(p4$se.fit, sqrt(temp4^2 - temp3^2)) [1] TRUE > > csum4 <- summary(curve3, times=365) > aeq(p5$fit, csum4$surv) [1] TRUE > aeq(p5$se.fit, csum4$std.err) [1] TRUE > > > # Harder case: add a strata to the problem > fit4 <- coxph(Surv(tstart, tstop, status) ~ age + treat + strata(hos.cat), cgd) > d4 <- data.frame(age= c(2,12, 20,30, 40), + treat = rep(levels(cgd$treat), length=5), + hos.cat= levels(cgd$hos.cat)[c(1,2,4,3,2)]) > > curve4 <- survfit(fit4, newdata=d4) > # by including hos.cat in the above curve4 has 5 strata, one per subject, > # and no data dimension > > d4a <- cbind(tstart= c(0, 50, 100, 200, 250), + tstop = c(400, 140, 150, 260, 310), status=1, d4) > d4b <- cbind(tstart=0, tstop=365, status=1, d4) > p4 <- predict(fit4, newdata=d4a, type='expected', se.fit=TRUE) > p5 <- predict(fit4, newdata=d4b, type='survival', se.fit=TRUE) > > # sort() skipped on purpose, summary should handle it > alltime <- unique(c(d4a$tstart, d4a$tstop)) > csum5 <- summary(curve4, times=alltime, extend=TRUE) > indx1 <- match(d4a$tstart, csum5$time) + 0:4*length(alltime) > indx2 <- match(d4a$tstop, csum5$time) + 0:4*length(alltime) > aeq(p4$fit, csum5$cumhaz[indx2] - csum5$cumhaz[indx1]) [1] TRUE > aeq(p4$se.fit, sqrt(csum5$std.chaz[indx2]^2 - csum5$std.chaz[indx1]^2)) [1] TRUE > > csum6 <- summary(curve4, times= 365, extend=TRUE) > aeq(p5$fit, csum6$surv) [1] TRUE > aeq(p5$se.fit, csum6$std.err) [1] TRUE > > proc.time() user system elapsed 0.455 0.020 0.473 survival/tests/pseudo.Rout.save0000644000176200001440000002751014611216353016405 0ustar liggesusers R Under development (unstable) (2024-04-17 r86441) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: aarch64-unknown-linux-gnu R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > # Tests of pseudovalues, by calculating directly from survfit and residuals > # this assumes that residuals.survfit is correct > library(survival) > aeq <- function(x, y, ...) all.equal(as.vector(x), as.vector(y), ...) > > mdata <- mgus2 > temp <- ifelse(mdata$pstat==1, 1, 2*mdata$death) > mdata$event <- factor(temp, 0:2, c("censor", "pcm", "death")) > mdata$etime <- ifelse(mdata$pstat==1, mdata$ptime, mdata$futime) > mdata <- subset(mdata, etime > 12) # remove first year > tvec <- c(10, 100, 200, 365) > > # Single endpoint, one curve > fit1 <- survfit(Surv(ptime, pstat) ~1, mdata) > # a time point before first event, after last event, at an event time, > # and between event times > rr1 <- resid(fit1, tvec) > aeq(colSums(rr1), rep(0,4)) [1] TRUE > sv1 <- summary(fit1, time=tvec, extend=TRUE)$surv > > # one time point > ps1a <- pseudo(fit1, time=100) > aeq(ps1a, sv1[2] + fit1$n*rr1[,2]) [1] TRUE > # multiple > ps1b <- pseudo(fit1, time=tvec) > aeq(ps1b, sv1[col(rr1)] + fit1$n * rr1) [1] TRUE > > # Single endpoint, multiple curves > fit2 <- survfit(Surv(futime, death) ~ sex, mdata) > rr2 <- resid(fit2, time=tvec) > aeq(colSums(rr2), rep(0,4)) [1] TRUE > sv2 <- summary(fit2, time=tvec, extend=TRUE)$surv > sv2 <- t(matrix(sv2, ncol=2)) # row 1= female, row2 = male > > # residuals are the same as for separate models > fit2a <- survfit(Surv(futime, death) ~1, mdata, subset=( sex=='F')) > fit2b <- survfit(Surv(futime, death) ~1, mdata, subset= (sex=='M')) > fem <- (mdata$sex=='F') > rr2a <- resid(fit2a, times=tvec) > rr2b <- resid(fit2b, times=tvec) > aeq(rr2a, rr2[fem,]) # row names won't be equal [1] TRUE > aeq(rr2b, rr2[!fem,]) [1] TRUE > > # one time point > ps2a <- pseudo(fit2a, time=100) > aeq(ps2a, sv2[1,2] + fit2a$n[1]* rr2a[,2]) [1] TRUE > ps2b <- pseudo(fit2b, time=100) > aeq(ps2b, sv2[2,2] + fit2b$n[1]* rr2b[,2]) [1] TRUE > > # overall psuedo are the same as for separate models > # (each row of mdata belongs to a single curve) > ps2c <- pseudo(fit2, time=100) > aeq(ps2c[ fem], ps2a) [1] TRUE > aeq(ps2c[!fem], ps2b) [1] TRUE > > # multiple time points > ps2d <- pseudo(fit2a, times=tvec) > aeq(ps2d, sv2[1, col(rr2a)] + fit2$n[1]* rr2a) [1] TRUE > ps2e <- pseudo(fit2b, times=tvec) > aeq(ps2e, sv2[2, col(rr2b)] + fit2$n[2]* rr2b) [1] TRUE > > ps2f <- pseudo(fit2, times=tvec) > aeq(ps2d, ps2f[ fem,]) [1] TRUE > aeq(ps2e, ps2f[!fem,]) [1] TRUE > > # Repeat the process for a multi-state model > fit3 <- survfit(Surv(etime, event) ~ sex, mdata) > fit3a <- survfit(Surv(etime, event) ~1, mdata, subset= (sex=='F')) > fit3b <- survfit(Surv(etime, event) ~1, mdata, subset= (sex=='M')) > rr3 <- resid(fit3, times=tvec) > aeq(apply(rr3, 2:3, sum), matrix(0,3,4)) # resids sum to 0 for each state & time [1] TRUE > rr3a <- resid(fit3a, times=tvec) > rr3b <- resid(fit3b, times=tvec) > aeq(rr3[fem,,], rr3a) [1] TRUE > aeq(rr3[!fem,,], rr3b) [1] TRUE > > ps3 <- pseudo(fit3, times=tvec) > ps3a <- pseudo(fit3a, times=tvec) > ps3b <- pseudo(fit3b, times=tvec) > aeq(ps3[ fem,,], ps3a) [1] TRUE > aeq(ps3[!fem,,], ps3b) [1] TRUE > > sv3 <- summary(fit3, times=tvec, extend=TRUE)$pstate > sv3 <- array(sv3, dim=c(4,2,3)) #times, curve, order > # ps3a has dimensions (number obs in fit3a, 3 states, 4 timepoints) > # to each of the 3x4 combinations we need to add the value of the > # survival curve at that time. A loop is easiest > temp1 <- array(0, dim= dim(rr3a)) > temp2 <- array(0, dim= dim(rr3b)) > for (i in 1:3) { # each of the 3 states + for (j in 1:4) { # each of the 4 times + temp1[, i,j] <- sv3[j,1,i] + fit3$n[1]*rr3a[,i,j] + temp2[, i,j] <- sv3[j,2,i] + fit3$n[2]*rr3b[,i,j] + } + } > aeq(temp1, ps3a) [1] TRUE > aeq(temp2, ps3b) [1] TRUE > > ########################### > # All again, just the same, for cumulative hazards > # Though there are 2 of them, vs 3 states. > # > rr1 <- resid(fit1, tvec, type="cumhaz") > aeq(colSums(rr1), rep(0,4)) [1] TRUE > sv1 <- summary(fit1, time=tvec, extend=TRUE)$cumhaz > > # one time point > ps1a <- pseudo(fit1, time=100, type="cumhaz") > aeq(ps1a, sv1[2] + fit1$n*rr1[,2]) [1] TRUE > # multiple > ps1b <- pseudo(fit1, time=tvec, type="cumhaz") > aeq(ps1b, sv1[col(rr1)] + fit1$n * rr1) [1] TRUE > > # Single endpoint, multiple curves > fit2 <- survfit(Surv(futime, death) ~ sex, mdata) > rr2 <- resid(fit2, time=tvec, type="cumhaz") > aeq(colSums(rr2), rep(0,4)) [1] TRUE > sv2 <- summary(fit2, time=tvec, extend=TRUE)$cumhaz > sv2 <- t(matrix(sv2, ncol=2)) # row 1= female, row2 = male > > # residuals are the same as for separate models > rr2a <- resid(fit2a, times=tvec, type= "cumhaz") > rr2b <- resid(fit2b, times=tvec, type= "cumhaz") > aeq(rr2a, rr2[fem,]) [1] TRUE > aeq(rr2b, rr2[!fem,]) [1] TRUE > > # one time point > ps2a <- pseudo(fit2a, time=100, type="cumhaz") > aeq(ps2a, sv2[1,2] + fit2a$n[1]* rr2a[,2]) [1] TRUE > ps2b <- pseudo(fit2b, time=100, type="cumhaz") > aeq(ps2b, sv2[2,2] + fit2b$n[1]* rr2b[,2]) [1] TRUE > > # overall psuedo are the same as for separate models > # (each row of mdata belongs to a single curve) > ps2c <- pseudo(fit2, time=100, type="cumhaz") > aeq(ps2c[ fem], ps2a) [1] TRUE > aeq(ps2c[!fem], ps2b) [1] TRUE > > # multiple time points > ps2d <- pseudo(fit2a, times=tvec, type="cumhaz") > aeq(ps2d, sv2[1, col(rr2a)] + fit2$n[1]* rr2a) [1] TRUE > ps2e <- pseudo(fit2b, times=tvec, type= "cumhaz") > aeq(ps2e, sv2[2, col(rr2b)] + fit2$n[2]* rr2b) [1] TRUE > > ps2f <- pseudo(fit2, times=tvec, type="cumhaz") > aeq(ps2d, ps2f[ fem,]) [1] TRUE > aeq(ps2e, ps2f[!fem,]) [1] TRUE > > # Repeat the process for a multi-state model > rr3 <- resid(fit3, times=tvec, type="cumhaz") > aeq(apply(rr3, 2:3, sum), matrix(0, 2,4)) [1] TRUE > rr3a <- resid(fit3a, times=tvec, type="cumhaz") > rr3b <- resid(fit3b, times=tvec, type="cumhaz") > aeq(rr3[fem,,], rr3a) [1] TRUE > aeq(rr3[!fem,,], rr3b) [1] TRUE > > ps3 <- pseudo(fit3, times=tvec, type="cumhaz") > ps3a <- pseudo(fit3a, times=tvec, type="cumhaz") > ps3b <- pseudo(fit3b, times=tvec, type="cumhaz") > aeq(ps3[ fem,,], ps3a) [1] TRUE > aeq(ps3[!fem,,], ps3b) [1] TRUE > > sv3 <- summary(fit3, times=tvec, extend=TRUE)$cumhaz > sv3 <- array(sv3, dim=c(4,2,2)) #times, curve, hazard > # ps3a has dimensions (number obs in fit3a, 4 timepoints, 3 states) > # to each of the 4x3 combinations we need to add the value of the > # survival curve at that time. A loop is easiest > temp1 <- array(0, dim= dim(rr3a)) > temp2 <- array(0, dim= dim(rr3b)) > for (i in 1:2) { # each of the 2 hazard + for (j in 1:4) { # each of the 4 timepoints + temp1[, i,j] <- sv3[j,1,i] + fit3$n[1]*rr3a[,i,j] + temp2[, i,j] <- sv3[j,2,i] + fit3$n[2]*rr3b[,i,j] + } + } > aeq(temp1, ps3a) [1] TRUE > aeq(temp2, ps3b) [1] TRUE > > ################################################# > # Last, one more time with AUC > # A bit more bother, since summary.survfit only returns AUC for one time > # value at a time. It also does not like times before the first event > # > tvec <- tvec[2:4] > > rr1 <- resid(fit1, tvec, type="auc") > aeq(colSums(rr1), rep(0,3)) [1] TRUE > afun <- function(fit, times) { + ntime <- length(times) + if (length(fit$strata)) xfun <- function(x) x$table[, "rmean"] + else xfun <- function(x) x$table["rmean"] + + temp <- xfun(summary(fit, rmean=times[1])) + if (ntime==1) return(temp) + + result <- matrix(0, ntime, length(temp)) + result[1,] <- temp + for (i in 2:ntime) + result[i,] <- xfun(summary(fit, rmean=times[i])) + drop(result) + } > > sv1 <- afun(fit1, tvec) > > # one time point > ps1a <- pseudo(fit1, time=tvec[1], type="auc") > aeq(ps1a, sv1[1] + fit1$n*rr1[,1]) [1] TRUE > # multiple > ps1b <- pseudo(fit1, time=tvec, type="auc") > aeq(ps1b, sv1[col(rr1)] + fit1$n * rr1) [1] TRUE > > # Single endpoint, multiple curves > rr2 <- resid(fit2, time=tvec, type="auc") > sv2 <- t(afun(fit2, tvec)) > aeq(colSums(rr2), rep(0,3)) [1] TRUE > > # residuals are the same as for separate models > rr2a <- resid(fit2a, times=tvec, type= "auc") > rr2b <- resid(fit2b, times=tvec, type= "auc") > aeq(rr2a, rr2[fem,]) [1] TRUE > aeq(rr2b, rr2[!fem,]) [1] TRUE > > # one time point > ps2a <- pseudo(fit2a, time=100, type="auc") > aeq(ps2a, sv2[1,1] + fit2a$n[1]* rr2a[,1]) [1] TRUE > ps2b <- pseudo(fit2b, time=100, type="auc") > aeq(ps2b, sv2[2,1] + fit2b$n[1]* rr2b[,1]) [1] TRUE > > # overall psuedo are the same as for separate models > # (each row of mdata belongs to a single curve) > ps2c <- pseudo(fit2, time=100, type="auc") > aeq(ps2c[ fem], ps2a) [1] TRUE > aeq(ps2c[!fem], ps2b) [1] TRUE > > # multiple time points > ps2d <- pseudo(fit2a, times=tvec, type="auc") > aeq(ps2d, sv2[1, col(rr2a)] + fit2$n[1]* rr2a) [1] TRUE > ps2e <- pseudo(fit2b, times=tvec, type= "auc") > aeq(ps2e, sv2[2, col(rr2b)] + fit2$n[2]* rr2b) [1] TRUE > > ps2f <- pseudo(fit2, times=tvec, type="auc") > aeq(ps2d, ps2f[ fem,]) [1] TRUE > aeq(ps2e, ps2f[!fem,]) [1] TRUE > > # Repeat the process for a multi-state model > rr3 <- resid(fit3, times=tvec, type="auc") > aeq(apply(rr3, 2:3, sum), matrix(0, 3,3)) [1] TRUE > rr3a <- resid(fit3a, times=tvec, type="auc") > rr3b <- resid(fit3b, times=tvec, type="auc") > aeq(rr3[fem,,], rr3a) [1] TRUE > aeq(rr3[!fem,,], rr3b) [1] TRUE > > ps3 <- pseudo(fit3, times=tvec, type="auc") > ps3a <- pseudo(fit3a, times=tvec, type="auc") > ps3b <- pseudo(fit3b, times=tvec, type="auc") > aeq(ps3[ fem,,], ps3a) [1] TRUE > aeq(ps3[!fem,,], ps3b) [1] TRUE > > sv3 <- rbind(summary(fit3, rmean=tvec[1])$table[,"rmean"], + summary(fit3, rmean=tvec[2])$table[,"rmean"], + summary(fit3, rmean=tvec[3])$table[,"rmean"]) > sv3 <- array(sv3, dim=c(3,2,3)) #times, curve, state > # ps3a has dimensions (number obs in fit3a, 4 timepoints, 3 states) > # to each of the 4x3 combinations we need to add the value of the > # survival curve at that time. A loop is easiest > temp1 <- array(0, dim= dim(rr3a)) > temp2 <- array(0, dim= dim(rr3b)) > for (i in 1:3) { # each of the 3 states + for (j in 1:3) { # each of the 3 times + temp1[, i,j] <- sv3[j,1,i] + fit3$n[1]*rr3a[,i,j] + temp2[, i,j] <- sv3[j,2,i] + fit3$n[2]*rr3b[,i,j] + } + } > aeq(temp1, ps3a) [1] TRUE > aeq(temp2, ps3b) [1] TRUE > > # > # a data set with a missing value, and with a group that has only one obs > # a good test of edge cases > # > lfit1 <- survfit(Surv(time, status) ~ ph.ecog, lung) > # This will warn about points beyond the curve; ph.ecog==3 has a single point > # at time=118, and it will have one fewer obs than the data > p1 <- pseudo(lfit1, times=c(100, 200)) Warning message: In pseudo(lfit1, times = c(100, 200)) : requested time points are beyond the end of one or more curves > aeq(dim(p1), c(nrow(lung)-1, 2)) [1] TRUE > > > # This will have rows that match the data > lfit2 <- survfit(Surv(time, status) ~ ph.ecog, lung, na.action= na.exclude) > p2 <- pseudo(lfit2, time=c(100, 200)) Warning message: In pseudo(lfit2, time = c(100, 200)) : requested time points are beyond the end of one or more curves > aeq(dim(p2), c(nrow(lung), 2)) [1] TRUE > all(is.na(p2[is.na(lung$ph.ecog)])) # a row of missing was inserted [1] TRUE > > row3 <- which(!is.na(lung$ph.ecog) & lung$ph.ecog ==3) # the singleton row > all(p2[row3,] == c(1, 0)) [1] TRUE > > > proc.time() user system elapsed 0.622 0.024 0.643 survival/tests/update.R0000644000176200001440000000074714607006645014714 0ustar liggesuserslibrary(survival) # the way a +cluster() term is handled in coxph has implications for update. fit1 <- coxph(Surv(time, status) ~ age, cluster= inst, lung) fit2 <- coxph(Surv(time, status) ~ age + cluster(inst), lung) all.equal(fit1, fit2) fit3 <- coxph(Surv(time, status) ~ age + sex + cluster(inst), lung) test1 <- update(fit1, .~ .+ sex) all.equal(test1, fit3) # Gives a spurious warning message test2 <- update(fit1, . ~ age + sex + cluster(inst), lung) all.equal(test2, fit3) survival/tests/mstrata.Rout.save0000644000176200001440000001033214607325257016564 0ustar liggesusers R Under development (unstable) (2024-02-07 r85873) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > # > # Verify that using multiple states + proportional baselines > # will mimic a factor covariate > # > library(survival) > aeq <- function(x, y, ...) all.equal(as.vector(x), as.vector(y), ...) > > tdata <- subset(lung, ph.ecog < 3) # there is only one row with ph.ecog=3 > tdata$state <- factor(tdata$status, 1:2, c("censor", "death")) > tdata$cstate<- factor(tdata$ph.ecog, 0:2, c("ph0", "ph1", "ph2")) > tdata$id <- 1:nrow(tdata) > survcheck(Surv(time, state) ~ 1, id=id, istate=cstate, tdata) Call: survcheck(formula = Surv(time, state) ~ 1, data = tdata, id = id, istate = cstate) Unique identifiers Observations Transitions 226 226 163 Transitions table: to from death (censored) ph0 37 26 ph1 82 31 ph2 44 6 death 0 0 Number of subjects with 0, 1, ... transitions to each state: count state 0 1 death 63 163 (any) 63 163 > > # standard coxph fit, stratified by the ph0/1/2 groups > fit1 <- coxph(Surv(time, status) ~ age + sex + factor(ph.ecog), tdata, + ties="breslow") > # multi-state fit, where ph0/1/2 are states with a shared hazard > fit2 <- coxph(list(Surv(time, state) ~1, + 1:4 + 2:4 + 3:4~ age + sex/ common + shared), + id=id, istate=cstate, data= tdata, ties="breslow") > > aeq(coef(fit1), coef(fit2)) # the names are quite different, values the same [1] TRUE > all.equal(fit1$loglik, fit2$loglik) [1] TRUE > > # Three curves in the usual way: ph0, 1, or 2 for all time, common baseline > csurv1 <- survfit(fit1, newdata=expand.grid(age=65, sex=1, ph.ecog=0:2)) > > # Multistate: start in p0, p1, or p2 (the only place to go is death) > csurv2a <- survfit(fit2, newdata= list(age=65, sex=1), p0=c(1,0,0,0)) > csurv2b <- survfit(fit2, newdata= list(age=65, sex=1), p0=c(0,1,0,0)) > csurv2c <- survfit(fit2, newdata= list(age=65, sex=1), p0=c(0,0,1,0)) > > aeq(csurv1[1]$surv, csurv2a$pstate[,1,1]) [1] TRUE > aeq(csurv1[2]$surv, csurv2b$pstate[,1,2]) [1] TRUE > aeq(csurv1[3]$surv, csurv2c$pstate[,1,3]) [1] TRUE > > # Note that multi-state defaults to the Breslow, as it implements the Efron > # only imperfectly. > > # part 2: predicted survival for a multistate model that has a strata > mgus2$etime <- with(mgus2, ifelse(pstat==0, futime, ptime)) > temp <- with(mgus2, ifelse(pstat==0, 2*death, 1)) > mgus2$event <- factor(temp, 0:2, labels=c("censor", "pcm", "death")) > > dummy <- expand.grid(age=c(60, 80), mspike=1.2) > > cfit1 <- coxph(Surv(etime, event) ~ age + mspike +strata(sex), mgus2, id=id) > > csurv1 <- survfit(cfit1, newdata=dummy) > > cfit2 <- coxph(Surv(etime, event) ~ age + mspike, id=id, + init= coef(cfit1), iter=0, data=mgus2, subset=(sex=='F')) > csurv3 <- survfit(cfit2, newdata= expand.grid(age=c(60, 80), mspike=1.2)) > test <- c('n', 'time', 'n.risk', 'n.event', 'n.censor', 'pstate', 'cumhaz') > all.equal(unclass(csurv1[1,,])[test], unclass(csurv3)[test]) [1] TRUE > > > # Part 3: compare a shared baseline to identical baseline > if (FALSE) { + # not yet completed + fit3 <- coxph(list(Surv(time, state) ~1, + 1:4 + 2:4 + 3:4~ age + sex/ common + 1), + id=id, istate=cstate, data= tdata) + fit4 <- coxph(list(Surv(time, state) ~1, + 1:4 + 2:4 + 3:4~ age + sex/ 1), + id=id, istate=cstate, data= tdata) + + fit0 <- coxph(Surv(time, status) ~ age + sex, tdata, ties="breslow") + + survfit(fit3, newdata= list(age=65, sex=1)) + } > > proc.time() user system elapsed 1.085 0.106 1.183 survival/tests/r_sas.R0000644000176200001440000002302514612274303014525 0ustar liggesusersoptions(na.action=na.exclude) # preserve missings options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type library(survival) # # Reproduce example 1 in the SAS lifereg documentation # # this fit doesn't give the same log-lik that they claim fit1 <- survreg(Surv(time, status) ~ I(1000/(273.2+temp)), imotor, subset=(temp>150), dist='lognormal') summary(fit1) # This one, with the loglik on the transformed scale (the inappropriate # scale, Ripley & Venables would argue) does agree. # All coefs are of course identical. fit2 <- survreg(Surv(log(time), status) ~ I(1000/(273.2+temp)), imotor, subset=(temp>150), dist='gaussian') # Give the quantile estimates, which is the lower half of "output 48.1.5" # in the SAS 9.2 manual pp1 <- predict(fit1, newdata=list(temp=c(130,150)), p=c(.1, .5, .9), type='quantile', se=T) pp2 <- predict(fit1, newdata=list(temp=c(130,150)), p=c(.1, .5, .9), type='uquantile', se=T) pp1 temp130 <- matrix(0, nrow=3, ncol=6) temp130[,1] <- pp1$fit[1,] temp130[,2] <- pp1$se.fit[1,] temp130[,3] <- pp2$fit[1,] temp130[,4] <- pp2$se.fit[1,] temp130[,5] <- exp(pp2$fit[1,] - 1.64*pp2$se.fit[1,]) temp130[,6] <- exp(pp2$fit[1,] + 1.64*pp2$se.fit[1,]) dimnames(temp130) <- list(c("p=.1", "p=.2", "p=.3"), c("Time", "se(time)", "log(time)", "se[log(time)]", "lower 90", "upper 90")) print(temp130) # A set of examples, copied from the manual pages of SAS procedure # "reliability", which is part of their QC product. # color <- c("black", "red", "green", "blue", "magenta", "red4", "orange", "DarkGreen", "cyan2", "DarkViolet") palette(color) pdf(file='reliability.pdf') # # Insulating fluids example # # Adding a -1 to the fit just causes the each group to have it's own # intercept, rather than a global intercept + constrasts. The strata # statement allows each to have a separate scale ffit <- survreg(Surv(time) ~ factor(voltage) + strata(voltage) -1, ifluid) # Get predicted quantiles at each of the voltages # By default predict() would give a line of results for each observation, # I only want the unique set of x's, i.e., only 4 cases uvolt <- sort(unique(ifluid$voltage)) #the unique levels plist <- c(1, 2, 5, 1:9 *10, 95, 99)/100 pred <- predict(ffit, type='quantile', p=plist, newdata=data.frame(voltage=uvolt)) tfun <- function(x) log(-log(1-x)) matplot(t(pred), tfun(plist), type='l', log='x', lty=1, col=1:4, yaxt='n', xlab="Predicted", ylab="Quantile") axis(2, tfun(plist), format(100*plist), adj=1) kfit <- survfit(Surv(time) ~ voltage, ifluid, type='fleming') #KM fit for (i in 1:4) { temp <- kfit[i] points(temp$time, tfun(1-temp$surv), col=i, pch=i) } # Now a table temp <- array(0, dim=c(4,4,4)) #4 groups by 4 parameters by 4 stats temp[,1,1] <- ffit$coef # "EV Location" in SAS manual temp[,2,1] <- ffit$scale # "EV scale" temp[,3,1] <- exp(ffit$coef) # "Weibull Scale" temp[,4,1] <- 1/ffit$scale # "Weibull Shape" temp[,1,2] <- sqrt(diag(ffit$var))[1:4] #standard error temp[,2,2] <- sqrt(diag(ffit$var))[5:8] * ffit$scale temp[,3,2] <- temp[,1,2] * temp[,3,1] temp[,4,2] <- temp[,2,2] / (temp[,2,1])^2 temp[,1,3] <- temp[,1,1] - 1.96*temp[,1,2] #lower conf limits temp[,1,4] <- temp[,1,1] + 1.96*temp[,1,2] # upper # log(scale) is the natural parameter, in which the routine did its fitting # and on which the std errors were computed temp[,2, 3] <- exp(log(ffit$scale) - 1.96*sqrt(diag(ffit$var))[5:8]) temp[,2, 4] <- exp(log(ffit$scale) + 1.96*sqrt(diag(ffit$var))[5:8]) temp[,3, 3:4] <- exp(temp[,1,3:4]) temp[,4, 3:4] <- 1/temp[,2,4:3] dimnames(temp) <- list(uvolt, c("EV Location", "EV Scale", "Weibull scale", "Weibull shape"), c("Estimate", "SE", "lower 95% CI", "uppper 95% CI")) print(aperm(temp, c(2,3,1)), digits=5) rm(temp, uvolt, plist, pred, ffit, kfit) ##################################################################### # Turbine cracks data crack2 <- with(cracks, data.frame(day1=c(NA, days), day2=c(days, NA), n=c(fail, 167-sum(fail)))) cfit <- survreg(Surv(day1, day2, type='interval2') ~1, dist='weibull', data=crack2, weights=n) summary(cfit) #Their output also has Wiebull scale = exp(cfit$coef), shape = 1/(cfit$scale) # Draw the SAS plot # The "type=fleming" argument reflects that they estimate hazards rather than # survival, and forces a Nelson-Aalen hazard estimate # plist <- c(1, 2, 5, 1:8 *10)/100 plot(qsurvreg(plist, cfit$coef, cfit$scale), tfun(plist), log='x', yaxt='n', type='l', xlab="Weibull Plot for Time", ylab="Percent") axis(2, tfun(plist), format(100*plist), adj=1) kfit <- survfit(Surv(day1, day2, type='interval2') ~1, data=crack2, weight=n, type='fleming') # Only plot point where n.event > 0 # Why? I'm trying to match them. Personally, all should be plotted. who <- (kfit$n.event > 0) points(kfit$time[who], tfun(1-kfit$surv[who]), pch='+') points(kfit$time[who], tfun(1-kfit$upper[who]), pch='-') points(kfit$time[who], tfun(1-kfit$lower[who]), pch='-') text(rep(3,6), seq(.5, -1.0, length=6), c("Scale", "Shape", "Right Censored", "Left Censored", "Interval Censored", "Fit"), adj=0) text(rep(9,6), seq(.5, -1.0, length=6), c(format(round(exp(cfit$coef), 2)), format(round(1/cfit$scale, 2)), format(tapply(crack2$n, cfit$y[,3], sum)), "ML"), adj=1) # Now a portion of his percentiles table # I don't get the same SE as SAS, I haven't checked out why. The # estimates and se for the underlying Weibull model are the same. temp <- predict(cfit, type='quantile', p=plist, se=T) tempse <- sqrt(temp$se[1,]) mat <- cbind(temp$fit[1,], tempse, temp$fit[1,] -1.96*tempse, temp$fit[1,] + 1.96*tempse) dimnames(mat) <- list(plist*100, c("Estimate", "SE", "Lower .95", "Upper .95")) print(mat) # # The cracks data has a particularly easy estimate, so use # it to double check code time <- c(crack2$day2[1], (crack2$day1 + crack2$day2)[2:8]/2, crack2$day1[9]) cdf <- cumsum(crack2$n)/sum(crack2$n) all.equal(kfit$time, time) all.equal(kfit$surv, 1-cdf[c(1:8,8)]) rm(time, cdf, kfit) ####################################################### # # Replacement of valve seats in diesel engines # The input data has id, time, and an indicator of whether there was an # event at that time: 0=no, 1=yes. No one has an event at their last time. # The input data has two engines with dual failures: 328 loses 2 valves at # time 653, and number 402 loses 2 at time 139. For each, fudge the first # time to be .1 days earlier. # ties <- which(diff(valveSeat$time)==0 & diff(valveSeat$id)==0) temp <- valveSeat$time temp[ties] <- temp[ties] - .1 n <- length(temp) first <- !duplicated(valveSeat$id) vtemp <- with(valveSeat, data.frame(id =id, time1= ifelse(first, 0, c(0, temp[-n])), time2= temp, status=status)) kfit <- survfit(Surv(time1, time2, status) ~1, vtemp, id=id) plot(kfit, fun='cumhaz', ylab="Sample Mean Cumulative Failures", xlab='Time') title("Valve replacement data") # The summary.survfit function doesn't have an option for printing out # cumulative hazards instead of survival --- need to add that # so I just reprise the central code of print.summary.survfit xx <- summary(kfit) temp <- cbind(xx$time, xx$n.risk, xx$n.event, xx$cumhaz, xx$std.chaz, -log(xx$upper), -log(xx$lower)) dimnames(temp) <- list(rep("", nrow(temp)), c("time", "n.risk", "n.event", "Cum haz", "std.err", "lower 95%", "upper 95%")) print(temp, digits=2) # Note that I have the same estimates but different SE's than SAS. We are using # a different estimator. It's a statistical argument as to which is # better (one could defend both sides): SAS the more standard estimate found # in the reliability literature, mine the estimate from statistics literature rm(temp, kfit, xx) ###################################################### # Turbine data, lognormal fit turbine <- read.table('data.turbine', col.names=c("time1", "time2", "n")) tfit <- survreg(Surv(time1, time2, type='interval2') ~1, turbine, dist='lognormal', weights=n, subset=(n>0)) summary(tfit) # Now, do his plot, but put bootstrap confidence bands on it! # First, make a simple data set without weights tdata <- turbine[rep(1:nrow(turbine), turbine$n),] qstat <- function(data) { temp <- survreg(Surv(time1, time2, type='interval2') ~1, data=data, dist='lognormal') qsurvreg(plist, temp$coef, temp$scale, dist='lognormal') } {if (exists('bootstrap')) { set.seed(1953) # a good year :-) bfit <- bootstrap(tdata, qstat, B=1000) bci <- limits.bca(bfit, probs=c(.025, .975)) } else { values <- matrix(0, nrow=1000, ncol=length(plist)) n <- nrow(tdata) for (i in 1:1000) { subset <- sample(1:n, n, replace=T) values[i,] <- qstat(tdata[subset,]) } bci <- t(apply(values,2, quantile, c(.05, .95))) } } xmat <- cbind(qsurvreg(plist, tfit$coef, tfit$scale, dist='lognormal'), bci) matplot(xmat, qnorm(plist), type='l', lty=c(1,2,2), col=c(1,1,1), log='x', yaxt='n', ylab='Percent', xlab='Time of Cracking (Hours x 100)') axis(2, qnorm(plist), format(100*plist), adj=1) title("Turbine Data") kfit <- survfit(Surv(time1, time2, type='interval2') ~1, data=tdata) points(kfit$time, qnorm(1-kfit$surv), pch='+') dev.off() #close the plot file survival/tests/residsfx.Rtemp0000644000176200001440000000612214654222147016137 0ustar liggesusers# # Residuals from a coxph survival curve # # This functionality is currently in development, and this test fails. # library(survival) aeq <- function(x,y, ...) all.equal(as.vector(x), as.vector(y), ...) # A test using validation set 1 book1 <- data.frame(time= c(9, 3,1,1,6,6,8), status=c(1,NA,1,0,1,1,0), x= c(0, 2,1,1,1,0,0)) fit1 <- coxph(Surv(time, status) ~x, book1, ties='breslow') s1 <- survfit(fit1, newdata = list(x=c(0, 1.5)), censor=FALSE) test1 <- resid(s1, times=c(2,6,10), type="cumhaz") aeq(dim(test1), c(6,3,2)) # 6 subjects, 3 time points, 2 Cox model curves r <- (3 + sqrt(33))/2 # true risk score aeq(coef(fit1), log(r)) denom <- c(3*r+3, r+3, 1) haz <- c(1,2,1)/denom chaz <- cumsum(haz) xscale <- exp(c(0, 1.5) * log(r)) aeq(outer(chaz, xscale, '*'), s1$cumhaz) # hazards for the two curves # the dM part of the residual risk <- c(1,r,r,r, 1, 1) # per subject risk dN <- cbind(c(0,1,0,0,0,0), c(0, 0 ,0, 1, 1, 0), c(1, 0,0,0,0, 0)) ytime <- c(9, 1,1,6,6, 8) # time values in the data dH <- cbind(haz[1], ifelse(ytime <6, 0, haz[2]), ifelse(ytime <9, 0, haz[3])) dM <- dN - risk*dH # increments to the martingale residuals aeq(rowSums(dM), fit1$resid) # double check # The hazard has increments at time 1, 6, and 9. # Mterm1[i,j] has the influence of observation i on the hazard increment at the # jth event time, for curve 1, first portion. Mterm2 = second curve. # Since there is only 1 subject at risk at time 9 the increment at time 9 is # guarranteed to be 1.0, hence all the derivatives are 0 Mterm1 <- dM %*% diag(1/denom) * xscale[1] Mterm2 <- dM %*% diag(1/denom) * xscale[2] # the influence of beta sresid <- resid(fit1, 'score') vmat <- vcov(fit1) aeq(s1$newdata$x, c(0, 1.5)) # verify xbar <- c(r/(r+1), r/(r+3), 0) Bterm1 <- sresid %*% vmat %*% ((xbar -0)* haz) *xscale[1] Bterm2 <- sresid %*% vmat %*% ((xbar -1.5)*haz)* xscale[2] zz <- sresid %*% vmat %*% cumsum((xbar -0)* haz) *xscale[1] IJ.haz <- array(c(Mterm1- Bterm1, Mterm2- Bterm2), dim=c(6,3,2)) IJ.cumhaz <- IJ.haz IJ.cumhaz[,2,] <- IJ.cumhaz[,1,] + IJ.haz[,2,] IJ.cumhaz[,3,] <- IJ.cumhaz[,2,] + IJ.haz[,3,] aeq(test1, IJ.cumhaz) test2 <- resid(s1, times=c(2,6,10), type="pstate") aeq(test2, -IJ.cumhaz* rep(s1$surv, each=6)) # The AUC is a weighted sum. # If A(0, t) is the AUC from 0 to t, and d_i is the set of event times < t # deriv = sum( -A(d_i, t) IJ.haz(d_i)) # = sum((A(0,d_i)- A(0,t)) IJ.haz(d_i)) # AUC at the event times, and the reporting times: 1,6, 9, 10 delta <- c(1, 5, 3, 1) #widths of intervals surv <- summary(s1, time=c(0, 1, 6, 9))$surv auc <- apply(delta*surv, 2, cumsum) # two column matrix wt1 <- pmin(auc[1:3,] - rep(auc[1,], each=3), 0) wt6 <- pmin(auc[1:3,] - rep(auc[2,], each=3), 0) wt10<- pmin(auc[1:3,] - rep(auc[4,], each=3), 0) # wt1 is all 0, no surprise as AUC(1)= 1 for any case weight t1 <- IJ.haz[,,1] %*% cbind(wt1[,1], wt6[,1], wt10[,1]) #curve1 t2 <- IJ.haz[,,2] %*% cbind(wt1[,2], wt6[,2], wt10[,2]) #curve2 IJ.auc <- array(c(t1, t2), dim=c(6,3,2)) test3 <- resid(s1, times=c(2,6,10), type='pstate') aeq(test3, IJ.auc) survival/tests/multi3.Rout.save0000644000176200001440000001123114654222147016321 0ustar liggesusers R Under development (unstable) (2024-06-14 r86747) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: aarch64-unknown-linux-gnu R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(survival) > aeq <- function(x, y, ...) all.equal(as.vector(x), as.vector(y), ...) > > # Check that a multi-state model, correctly set up, gives the same > # solution as a time-dependent covariate. > # This is a stronger test than mstrata: there the covariate which was mapped > # into a state was constant, here it is time-dependent. > # > # First build the TD data set from pbcseq, with a categorical bilirubin > pbc1 <- pbcseq > pbc1$bili4 <- cut(pbc1$bili, c(0,1, 2,4, 100), + c("normal", "1-2x", "2-4x", ">4")) > ptemp <- subset(pbc1, !duplicated(id)) # first row of each > > pbc2 <- tmerge(ptemp[, c("id", "age", "sex")], ptemp, id, + death= event(futime, status==2)) > > pbc2 <- tmerge(pbc2, pbc1, id=id, bili = tdc(day, bili), + bili4 = tdc(day, bili4), bstat = event(day, as.numeric(bili4))) > btemp <- with(pbc2, ifelse(death, 5, bstat)) > > # a row with the same starting and ending bili4 level is not an event > b2 <- ifelse(((as.numeric(pbc2$bili4)) == btemp), 0, btemp) > pbc2$bstat <- factor(b2, 0:5, + c("censor", "normal", "1-2x", "2-4x", ">4", "death")) > check1 <- survcheck(Surv(tstart, tstop, bstat) ~ 1, istate= bili4, + id = id, data=pbc2) > check1$transitions to from normal 1-2x 2-4x >4 death (censored) normal 0 81 10 3 9 77 1-2x 61 0 68 15 9 36 2-4x 2 33 0 94 12 24 >4 1 3 28 0 110 35 death 0 0 0 0 0 0 > all.equal(as.character(pbc2$bili4), as.character(check1$istate)) [1] TRUE > # the above verifies that I created the data set correctly > > # Standard coxph fit with a time dependent bili4 variable. > fit1 <- coxph(Surv(tstart, tstop, death) ~ age + bili4, pbc2, ties='breslow') > > # An additive multi-state fit, where bili4 is a state > # The three forms below should all give identical models > fit2 <- coxph(list(Surv(tstart, tstop, bstat) ~ 1, + c(1:4):5 ~ age / common + shared), id= id, istate=bili4, + data=pbc2) > fit2b <- coxph(list(Surv(tstart, tstop, bstat) ~ 1, + 1:5 + 2:5 + 3:5 + 4:5 ~ age / common + shared), + id= id, istate=bili4, data=pbc2) > fit2c <- coxph(list(Surv(tstart, tstop, bstat) ~ 1, + 0:5 ~ age / common + shared), + id= id, istate=bili4, data=pbc2) > > # Make sure the names are correct and the coefficients match > aeq(coef(fit1), coef(fit2)) [1] TRUE > aeq(names(coef(fit2)), c("age", "ph(2:5/1:5)", "ph(3:5/1:5)", "ph(4:5/1:5)")) [1] TRUE > all.equal(coef(fit2), coef(fit2b)) [1] TRUE > all.equal(coef(fit2), coef(fit2c)) [1] TRUE > > # Now a model with a separate age effect for each bilirubin group > fit3 <- coxph(Surv(tstart, tstop, death) ~ age*bili4, pbc2, ties='breslow') > fit3b <- coxph(Surv(tstart, tstop, death) ~ bili4/age, pbc2, ties='breslow') > fit4 <- coxph(list(Surv(tstart, tstop, bstat) ~ 1, + c(1:4):5 ~ age / shared), id= id, istate=bili4, + data=pbc2) > all.equal(fit3$loglik, fit3b$loglik) [1] TRUE > all.equal(fit3$loglik, fit4$loglik) [1] TRUE > > # The coefficients are quite different due to different codings for dummy vars > # Unpack the interaction, first 4 coefs will be the age effect within each > # bilirubin group > temp <- c(coef(fit3)[1] + c(0, coef(fit3)[5:7]), coef(fit3)[2:4]) > names(temp)[1:4] <- c("age1", "age2", "age3", "age4") > aeq(temp, coef(fit3b)[c(4:7, 1:3)]) [1] TRUE > aeq(temp, coef(fit4)) [1] TRUE > > # Third, a model with separate baseline hazards for each bili group > fit5 <- coxph(Surv(tstart, tstop, death) ~ strata(bili4)/age, pbc2, + cluster=id, ties='breslow') > fit6 <- coxph(list(Surv(tstart, tstop, bstat) ~ 1, 0:5 ~ age), + id=id, istate=bili4, pbc2) > aeq(coef(fit5), coef(fit6)) [1] TRUE > aeq(fit5$var, fit6$var) [1] TRUE > aeq(fit5$naive.var, fit6$naive.var) [1] TRUE > > proc.time() user system elapsed 0.536 0.024 0.557 survival/tests/book7.R0000644000176200001440000000343214613770353014446 0ustar liggesuserslibrary(survival) options(na.action=na.exclude) options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type # # Tests from the appendix of Therneau and Grambsch # Data set 1 + exact method test1 <- data.frame(time= c(9, 3,1,1,6,6,8), status=c(1,NA,1,0,1,1,0), x= c(0, 2,1,1,1,0,0)) byhand7 <- function(beta) { r <- exp(beta) loglik <- 2*(beta - log(3*r + 3)) u <- 2/(r+1) imat <- 2*r/(r+1)^2 haz <- c(1/(3*r+3), 2/(r+3), 0, 1 ) ties <- c(1,1,2,2,3,4) wt <- c(r,r,r,1,1,1) mart <- c(1,0,1,1,0,1) - wt* (cumsum(haz))[ties] #martingale residual list(loglik=loglik, u=u, imat=imat, mart=mart) } aeq <- function(x,y) all.equal(as.vector(x), as.vector(y)) fit0 <-coxph(Surv(time, status) ~x, test1, iter=0, method='exact') truth0 <- byhand7(0) aeq(truth0$loglik, fit0$loglik[1]) aeq(1/truth0$imat, fit0$var) aeq(truth0$mart, fit0$residuals[c(2:6,1)]) fit1 <- coxph(Surv(time, status) ~x, test1, iter=1, method='exact') aeq(fit1$coefficients, truth0$u*fit0$var) truth1 <- byhand7(fit1$coefficients) aeq(fit1$loglik[2], truth1$loglik) aeq(1/truth1$imat, fit1$var) aeq(truth1$mart, resid(fit1)[c(3:7,1)]) # Beta is infinite for this model, so we will get a warning message fit2 <- coxph(Surv(time, status) ~x, test1, method='exact') aeq(resid(fit2)[-2], c(0, 2/3, -1/3, -4/3, 1, 0)) #values from the book # # Now a multivariate case: start/stop data uses a different C routine # zz <- rep(0, nrow(lung)) fit1 <- coxph(Surv(time, status) ~ age + ph.ecog + sex, lung, method="exact") fit2 <- coxph(Surv(zz, time, status) ~ age + ph.ecog + sex, lung, method="exact") aeq(fit1$loglik, fit2$loglik) aeq(fit1$var, fit2$var) aeq(fit1$score, fit2$score) aeq(fit1$residuals, fit2$residuals) survival/tests/ovarian.R0000644000176200001440000000372514607006645015070 0ustar liggesusersoptions(na.action=na.exclude) # preserve missings options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type library(survival) # # Test the coxph program on the Ovarian data summary(survfit(Surv(futime, fustat)~1, data=ovarian), censor=TRUE) # Various models coxph(Surv(futime, fustat)~ age, data=ovarian) coxph(Surv(futime, fustat)~ resid.ds, data=ovarian) coxph(Surv(futime, fustat)~ rx, data=ovarian) coxph(Surv(futime, fustat)~ ecog.ps, data=ovarian) coxph(Surv(futime, fustat)~ resid.ds + rx + ecog.ps, data=ovarian) coxph(Surv(futime, fustat)~ age + rx + ecog.ps, data=ovarian) coxph(Surv(futime, fustat)~ age + resid.ds + ecog.ps, data=ovarian) coxph(Surv(futime, fustat)~ age + resid.ds + rx, data=ovarian) # Residuals fit <- coxph(Surv(futime, fustat)~ age + resid.ds + rx + ecog.ps, data=ovarian) resid(fit) resid(fit, 'dev') resid(fit, 'scor') resid(fit, 'scho') fit <- coxph(Surv(futime, fustat) ~ age + ecog.ps + strata(rx), data=ovarian) summary(fit) summary(survfit(fit)) sfit <- survfit(fit, list(age=c(30,70), ecog.ps=c(2,3))) #two columns sfit summary(sfit) # Check of offset + surv, added 7/2000 fit1 <- coxph(Surv(futime, fustat) ~ age + rx, ovarian, control=coxph.control(eps=1e-8)) fit2 <- coxph(Surv(futime, fustat) ~ age + offset(rx*fit1$coef[2]), ovarian, control=coxph.control(eps=1e-8)) all.equal(fit1$coef[1], fit2$coef[1]) fit <- coxph(Surv(futime, fustat) ~ age + offset(rx), ovarian) survfit(fit, censor=FALSE)$surv^exp(-1.5) # Check it by hand -- there are no tied times # Remember that offsets from survfit are centered, which is 1.5 for # this data set. eta <- fit$coef*(ovarian$age - fit$mean) + (ovarian$rx - 1.5) ord <- order(ovarian$futime) risk <- exp(eta[ord]) rsum <- rev(cumsum(rev(risk))) # cumulative risk at each time point dead <- (ovarian$fustat[ord]==1) baseline <- cumsum(1/rsum[dead]) all.equal(survfit(fit, censor=FALSE)$surv, exp(-baseline)) rm(fit, fit1, fit2, ord, eta, risk, rsum, dead, baseline, sfit) survival/tests/doublecolon.Rout.save0000644000176200001440000000702514727704020017413 0ustar liggesusers R Under development (unstable) (2024-12-10 r87437) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > # > # Check that my updates to a. remove survival:: out of formulas and > # b: ensure that Surv(), cluster(), strata(), pspline(), and tt() use > # these functions from the survival namespace, when called in a coxph, > # survfit, survreg, etc formula > library(survival) > aeq <- function(x,y, ...) all.equal(as.vector(x), as.vector(y)) > > c1 <- coxph(Surv(time, status) ~ age + strata(inst), lung) > > # a local Surv that gives the wrong answer but won't error out (makes it > # simpler to write the tests) > Surv <- function(x, ...) survival::Surv(x, rep(1, length(x))) > c2 <- coxph(Surv(time, status) ~ age + strata(inst), lung) > > c3 <- coxph(survival::Surv(time, status) ~ age + survival::strata(inst), lung) > # in prior releases the above fits a different model, stata is not recognized > # as a special and becomes a factor > > all.equal(coef(c1), coef(c2)) [1] TRUE > all.equal(coef(c1), coef(c3)) [1] TRUE > > !(c2$call$formula == c3$call$formula) # c3$call will have 2 survival::, c2 none [1] TRUE > deparse1(c3$formula) == "survival::Surv(time, status) ~ age + strata(inst)" [1] TRUE > > nocall <- function(x, omit="call") { + z <- unclass(x) # needed for any object with a [ method + z[-match(omit, names(z))] # all but $call + } > y2 <- with(lung, survival::Surv(time, status)) # outside a formula > > fit1a <- coxph(Surv(time, status) ~ age + strata(sex) + cluster(inst), lung) > fit1b <- coxph(Surv(time, status) ~ age + survival::strata(sex) + + survival::cluster(inst), lung) > fit1c <- coxph(y2 ~ age + strata(sex) + survival::cluster(inst), lung) > all.equal(nocall(fit1a), nocall(fit1b)) [1] TRUE > aeq(coef(fit1a), coef(fit1c)) [1] TRUE > > fit2a <- survdiff(Surv(time, status) ~ sex + strata(inst), lung) > fit2b <- survdiff(Surv(time, status) ~ sex + survival::strata(inst), + data= lung) > all.equal(nocall(fit2a), nocall(fit2b)) [1] TRUE > aeq(rowSums(fit2a$obs), c(111, 53)) # make sure it use the correct Surv [1] TRUE > > fit3a <- survreg(Surv(time, status) ~ ph.ecog + strata(sex), lung) > fit3b <- survreg(Surv(time, status) ~ ph.ecog + survival::strata(sex), + data= survival::lung) > all.equal(nocall(fit3a), nocall(fit3b)) [1] TRUE > > fit4a <- concordance(Surv(time, status) ~ ph.ecog + strata(sex), lung) > fit4b <- concordance(Surv(time, status) ~ ph.ecog + survival::strata(sex), lung) > all.equal(nocall(fit4a), nocall(fit4b)) [1] TRUE > > fit5a <- survfit(Surv(time, status) ~ sex, lung) > fit5b <- survfit(Surv(time, status) ~ strata(sex), lung) > fit5c <- survfit(Surv(time, status) ~ survival::strata(sex), lung) > fit5d <- survfit(y2 ~ survival::strata(sex), lung) > all.equal(nocall(fit5a, c("call", "strata")), nocall(fit5b, c("call", "strata"))) [1] TRUE > all.equal(nocall(fit5b), nocall(fit5c)) [1] TRUE > aeq(fit5a$surv, fit5d$surv) [1] TRUE > > proc.time() user system elapsed 1.231 0.072 1.293 survival/tests/concordance.Rout.save0000644000176200001440000001534514613770353017376 0ustar liggesusers R Under development (unstable) (2024-04-17 r86441) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: aarch64-unknown-linux-gnu R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(survival) > options(na.action=na.exclude) # preserve missings > options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type > > # > # Simple tests of concordance > # > aeq <- function(x,y, ...) all.equal(as.vector(x), as.vector(y), ...) > > grank <- function(x, time, grp, wt) + unlist(tapply(x, grp, rank)) > grank2 <- function(x, time, grp, wt) { #for case weights + if (length(wt)==0) wt <- rep(1, length(x)) + z <- double(length(x)) + for (i in unique(grp)) { + indx <- which(grp==i) + temp <- tapply(wt[indx], x[indx], sum) + temp <- temp/2 + c(0, cumsum(temp)[-length(temp)]) + z[indx] <- temp[match(x[indx], names(temp))] + } + z + } > > # Concordance by brute force. O(n^2) algorithm, but ok for n<500 or so > allpair <- function(x, time, status, wt, all=FALSE) { + if (missing(wt)) wt <- rep(1, length(x)) + count <- sapply(which(status==1), function(i) { + atrisk <- (time > time[i]) | (time==time[i] & status==0) + temp <- tapply(wt[atrisk], factor(sign(x[i] -x[atrisk]), c(1, -1, 0)), + sum) + wt[i]* c(ifelse(is.na(temp), 0, temp), + (sum(wt[time==time[i] & status==1]) - wt[i])/2) + }) + rownames(count) <- c("concordant", "discordant", "tied.x", "tied.y") + if (all) { + colnames(count) <- time[status==1] + t(count) + } + else rowSums(count) + } > > > # The std of C = std(numerator)/(number of comparable pairs) > # The information matrix of a Cox model is = to the var(C-D) > cfun <- function(fit) fit$cvar * sum(fit$count[1:3])^2 > > tdata <- aml[aml$x=='Maintained', c("time", "status")] > tdata$x <- c(1,6,2,7,3,7,3,8,4,4,5) > tdata$wt <- c(1,2,3,2,1,2,3,4,3,2,1) > fit <- concordance(Surv(time, status) ~x, tdata) > > aeq(fit$count[1:4], c(24,14,2,0)) [1] TRUE > cfit <- coxph(Surv(time, status) ~ tt(x), tdata, tt=grank, method='breslow', + iter=0, x=T) > cdt <- coxph.detail(cfit) > aeq(sum(cdt$imat), cfun(fit)) [1] TRUE > aeq(sum(2*cdt$score), diff(fit$count[1:2])) [1] TRUE > aeq(with(tdata, allpair(x, time, status)), c(14,24,2,0)) [1] TRUE > > # Lots of ties > tempy <- Surv(c(1,2,2,2,3,4,4,4,5,2), c(1,0,1,0,1,0,1,1,0,1)) > tempx <- c(5,5,4,4,3,3,7,6,5,4) > fit2 <- concordance(tempy ~ tempx) > addxy <- function(x) c(x[1:3], sum(x[4:5])) > aeq(addxy(fit2$count), allpair(tempx, tempy[,1], tempy[,2])) [1] TRUE > cfit2 <- coxph(tempy ~ tt(tempx), tt=grank, method='breslow', iter=0) > aeq(cfit2$var, 1/cfun(fit2)) [1] TRUE > > # Direct call > fit2b <- concordancefit(tempy, tempx) > fit2c <- concordancefit(tempy, tempx, std.err=FALSE) > all.equal(fit2[1:5], fit2b) [1] TRUE > all.equal(fit2b[1:3], fit2c) [1] TRUE > > # Bigger data > fit3 <- concordance(Surv(time, status) ~ age, lung, reverse=TRUE) > aeq(addxy(fit3$count), allpair(lung$age, lung$time, lung$status-1)) [1] TRUE > cfit3 <- coxph(Surv(time, status) ~ tt(age), lung, + iter=0, method='breslow', tt=grank, x=T) > cdt <- coxph.detail(cfit3) > aeq(sum(cdt$imat), cfun(fit3)) [1] TRUE > aeq(2*sum(cdt$score), diff(fit3$count[2:1])) [1] TRUE > > > # More ties > fit4 <- concordance(Surv(time, status) ~ ph.ecog, lung, reverse=TRUE) > aeq(addxy(fit4$count), allpair(lung$ph.ecog, lung$time, lung$status-1)) [1] TRUE > aeq(fit4$count[1:5], c(8392, 4258, 7137, 21, 7)) [1] TRUE > cfit4 <- coxph(Surv(time, status) ~ tt(ph.ecog), lung, + iter=0, method='breslow', tt=grank) > aeq(1/cfit4$var, cfun(fit4)) [1] TRUE > > # Case weights > fit5 <- concordance(Surv(time, status) ~ x, tdata, weights=wt, reverse=TRUE) > fit6 <- concordance(Surv(time, status) ~x, tdata[rep(1:11,tdata$wt),]) > aeq(addxy(fit5$count), with(tdata, allpair(x, time, status, wt))) [1] TRUE > aeq(fit5$count[1:4], c(70, 91, 7, 0)) # checked by hand [1] TRUE > aeq(fit5$count[1:3], fit6$count[c(2,1,3)]) #spurious "tied on time" values, ignore [1] TRUE > aeq(fit5$std, fit6$std) [1] TRUE > cfit5 <- coxph(Surv(time, status) ~ tt(x), tdata, weights=wt, + iter=0, method='breslow', tt=grank2) > cfit6 <- coxph(Surv(time, status) ~ tt(x), tdata[rep(1:11,tdata$wt),], + iter=0, method='breslow', tt=grank) > aeq(1/cfit6$var, cfun(fit6)) [1] TRUE > aeq(cfit5$var, cfit6$var) [1] TRUE > > # Start, stop simplest cases > fit7 <- concordance(Surv(rep(0,11), time, status) ~ x, tdata) > aeq(fit7$count, fit$count) [1] TRUE > aeq(fit7$std.err, fit$std.err) [1] TRUE > fit7 <- concordance(Surv(rep(0,11), time, status) ~ x, tdata, weights=wt) > aeq(fit5$count, fit7$count[c(2,1,3:5)]) #one reversed, one not [1] TRUE > > # Multiple intervals for some, but same risk sets as tdata > tdata2 <- data.frame(time1=c(0,3, 5, 6,7, 0, 4,17, 7, 0,16, 2, 0, + 0,9, 5), + time2=c(3,9, 13, 7,13, 18, 17,23, 28, 16,31, 34, 45, + 9,48, 60), + status=c(0,1, 1, 0,0, 1, 0,1, 0, 0,1, 1, 0, 0,1, 0), + x = c(1,1, 6, 2,2, 7, 3,3, 7, 3,3, 8, 4, 4,4, 5), + wt= c(1,1, 2, 3,3, 2, 1,1, 2, 3,3, 4, 3, 2,2, 1)) > fit8 <- concordance(Surv(time1, time2, status) ~x, tdata2, weights=wt, + reverse=TRUE) > aeq(fit5$count, fit8$count) [1] TRUE > aeq(fit5$std.err, fit8$std.err) [1] TRUE > cfit8 <- coxph(Surv(time1, time2, status) ~ tt(x), tdata2, weights=wt, + iter=0, method='breslow', tt=grank2) > aeq(1/cfit8$var, cfun(fit8)) [1] TRUE > > # Stratified > tdata3 <- data.frame(time1=c(tdata2$time1, rep(0, nrow(lung))), + time2=c(tdata2$time2, lung$time), + status = c(tdata2$status, lung$status -1), + x = c(tdata2$x, lung$ph.ecog), + wt= c(tdata2$wt, rep(1, nrow(lung))), + grp=rep(1:2, c(nrow(tdata2), nrow(lung)))) > fit9 <- concordance(Surv(time1, time2, status) ~x + strata(grp), + data=tdata3, weights=wt, reverse=TRUE) > aeq(fit9$count[1,], fit5$count) [1] TRUE > aeq(fit9$count[2,], fit4$count) [1] TRUE > > proc.time() user system elapsed 0.506 0.016 0.519 survival/tests/factor.Rout.save0000644000176200001440000000342014607006645016364 0ustar liggesusers R version 2.14.0 (2011-10-31) Copyright (C) 2011 The R Foundation for Statistical Computing ISBN 3-900051-07-0 Platform: i686-pc-linux-gnu (32-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > # > # Ensure that factors work in prediction > # > library(survival) Loading required package: splines > > options(na.action="na.exclude") # preserve missings > options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type > aeq <- function(x,y, ...) all.equal(as.vector(x), as.vector(y), ...) > > tfit <- coxph(Surv(time, status) ~ age + factor(ph.ecog), lung) > p1 <- predict(tfit, type='risk') > > # Testing NA handling is important too > keep <- (is.na(lung$ph.ecog) | lung$ph.ecog !=1) > lung2 <- lung[keep,] > p2 <- predict(tfit, type='risk', newdata=lung[keep,]) > aeq(p1[keep], p2) [1] TRUE > > # Same, for survreg > tfit <- survreg(Surv(time, status) ~ age + factor(ph.ecog), lung) > p1 <- predict(tfit, type='response') > p2 <- predict(tfit, type='response', newdata=lung2) > aeq(p1[keep], p2) [1] TRUE > > > # Now repeat it tossing the missings > options(na.action=na.omit) > keep2 <- (lung$ph.ecog[!is.na(lung$ph.ecog)] !=1) > > tfit2 <- survreg(Surv(time, status) ~ age + factor(ph.ecog), lung) > p3 <- predict(tfit2, type='response') > p4 <- predict(tfit2, type='response', newdata=lung2, na.action=na.omit) > aeq(p3[keep2] , p4) [1] TRUE > survival/tests/book1.Rout.save0000644000176200001440000001636414613770353016135 0ustar liggesusers R Under development (unstable) (2024-04-17 r86441) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: aarch64-unknown-linux-gnu R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(survival) > options(na.action=na.exclude) # preserve missings > options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type > aeq <- function(x,y) all.equal(as.vector(x), as.vector(y)) > > # > # Tests from the appendix of Therneau and Grambsch > # a. Data set 1 and Breslow estimate > # The data below is not in time order, to also test sorting, and has 1 NA > # > test1 <- data.frame(time= c(9, 3,1,1,6,6,8), + status=c(1,NA,1,0,1,1,0), + x= c(0, 2,1,1,1,0,0)) > > # Nelson-Aalen influence > s1 <- survfit(Surv(time, status) ~1, test1, id=1:7, influence=TRUE) > inf1 <- matrix(c(10, rep(-2,5), 10, -2, 7,7, -11, -11)/72, + ncol=2) > indx <- order(test1$time[!is.na(test1$status)]) > aeq(s1$influence.chaz[indx,], inf1[,c(1,2,2,2)]) [1] TRUE > > # KM influence > inf2 <- matrix(c(-20, rep(4,5), -10, 2, -13, -13, 17, 17, + rep(0,6))/144, ncol=3) > aeq(s1$influence.surv[indx,], inf2[, c(1,2,2,3)]) [1] TRUE > > # Fleming-Harrington influence > s2 <- survfit(Surv(time, status) ~ 1, test1, id=1:7, ctype=2, influence=2) > inf3 <- matrix(c( rep(c(5, -1), c(1, 5))/36, c(5,-1)/36, + c(21,21,-29, -29)/144), ncol=2) > aeq(s2$influence.chaz[indx,], inf3[,c(1,2,2,2)]) [1] TRUE > > > # Breslow estimate > byhand1 <- function(beta, newx=0) { + r <- exp(beta) + loglik <- 2*beta - (log(3*r+3) + 2*log(r+3)) + u <- (6 + 3*r - r^2) / ((r+1)*(r+3)) + imat <- r/(r+1)^2 + 6*r/(r+3)^2 + + x <- c(1,1,1,0,0,0) + status <- c(1,0,1,1,0,1) + xbar <- c(r/(r+1), r/(r+3), 0, 0) # at times 1, 6, 8 and 9 + haz <- c(1/(3*r+3), 2/(r+3), 0, 1 ) + ties <- c(1,1,2,2,3,4) + wt <- c(r,r,r,1,1,1) + mart <- c(1,0,1,1,0,1) - wt* (cumsum(haz))[ties] #martingale residual + + a <- 3*(r+1)^2; b<- (r+3)^2 + score <- c((2*r+3)/a, -r/a, -r/a + 3*(3-r)/b, r/a - r*(r+1)/b, + r/a + 2*r/b, r/a + 2*r/b) + + # Schoenfeld residual + scho <- c(1/(r+1), 1- (r/(3+r)), 0-(r/(3+r)) , 0) + + surv <- exp(-cumsum(haz)* exp(beta*newx)) + varhaz.g <- cumsum(c(1/(3*r+3)^2, 2/(r+3)^2, 0, 1 )) + + varhaz.d <- cumsum((newx-xbar) * haz) + + varhaz <- (varhaz.g + varhaz.d^2/ imat) * exp(2*beta*newx) + + names(xbar) <- names(haz) <- 1:4 + names(surv) <- names(varhaz) <- 1:4 + list(loglik=loglik, u=u, imat=imat, xbar=xbar, haz=haz, + mart=mart, score=score, + scho=scho, surv=surv, var=varhaz, + varhaz.g=varhaz.g, varhaz.d=varhaz.d) + } > > > > fit0 <-coxph(Surv(time, status) ~x, test1, iter=0, method='breslow') > truth0 <- byhand1(0,0) > aeq(truth0$loglik, fit0$loglik[1]) [1] TRUE > aeq(1/truth0$imat, fit0$var) [1] TRUE > aeq(truth0$mart, fit0$residuals[c(2:6,1)]) [1] TRUE > aeq(truth0$scho, resid(fit0, 'schoen')) [1] TRUE > aeq(truth0$score, resid(fit0, 'score')[c(3:7,1)]) [1] TRUE > sfit <- survfit(fit0, list(x=0)) > aeq(sfit$cumhaz, cumsum(truth0$haz)) [1] TRUE > aeq(sfit$surv, exp(-cumsum(truth0$haz))) [1] TRUE > aeq(sfit$std.err^2, c(7/180, 2/9, 2/9, 11/9)) [1] TRUE > aeq(resid(fit0, 'score'), c(5/24, NA, 5/12, -1/12, 7/24, -1/24, 5/24)) [1] TRUE > > fit1 <- coxph(Surv(time, status) ~x, test1, iter=1, method='breslow') > aeq(fit1$coefficients, 8/5) [1] TRUE > > # This next gives an ignorable warning message > fit2 <- coxph(Surv(time, status) ~x, test1, method='breslow', iter=2) Warning message: In coxph.fit(X, Y, istrat, offset, init, control, weights = weights, : Ran out of iterations and did not converge > aeq(round(fit2$coefficients, 6), 1.472724) [1] TRUE > > fit <- coxph(Surv(time, status) ~x, test1, method='breslow', eps=1e-8, + nocenter=NULL) > aeq(fit$coefficients, log(1.5 + sqrt(33)/2)) # the true solution [1] TRUE > truth <- byhand1(fit$coefficients, 0) > aeq(truth$loglik, fit$loglik[2]) [1] TRUE > aeq(1/truth$imat, fit$var) [1] TRUE > aeq(truth$mart, fit$residuals[c(2:6,1)]) [1] TRUE > aeq(truth$scho, resid(fit, 'schoen')) [1] TRUE > aeq(truth$score, resid(fit, 'score')[c(3:7,1)]) [1] TRUE > expect <- predict(fit, type='expected', newdata=test1) #force recalc > aeq(test1$status[-2] -fit$residuals, expect[-2]) #tests the predict function [1] TRUE > > sfit <- survfit(fit, list(x=0), censor=FALSE) > aeq(sfit$std.err^2, truth$var[c(1,2,4)]) # sfit skips time 8 (no events there) [1] TRUE > aeq(-log(sfit$surv), (cumsum(truth$haz))[c(1,2,4)]) [1] TRUE > sfit <- survfit(fit, list(x=0), censor=TRUE) > aeq(sfit$std.err^2, truth$var) [1] TRUE > aeq(-log(sfit$surv), (cumsum(truth$haz))) [1] TRUE > > # > # Done with the formal test, now print out lots of bits > # > resid(fit) 1 2 3 4 5 6 7 -0.3333333 NA 0.7287136 -0.2712864 -0.4574271 0.6666667 -0.3333333 > resid(fit, 'scor') 1 2 3 4 5 6 0.21138938 NA 0.13564322 -0.05049744 -0.12624360 -0.38168095 7 0.21138938 > resid(fit, 'scho') 1 6 6 9 0.1861407 0.4069297 -0.5930703 0.0000000 > > predict(fit, type='lp', se.fit=T) $fit 1 2 3 4 5 6 7 -0.7376425 NA 0.7376425 0.7376425 0.7376425 -0.7376425 -0.7376425 $se.fit 1 2 3 4 5 6 7 0.6278672 NA 0.6278672 0.6278672 0.6278672 0.6278672 0.6278672 > predict(fit, type='risk', se.fit=T) $fit 1 2 3 4 5 6 7 0.4782401 NA 2.0910001 2.0910001 2.0910001 0.4782401 0.4782401 $se.fit 1 2 3 4 5 6 7 0.4342009 NA 0.9079142 0.9079142 0.9079142 0.4342009 0.4342009 > predict(fit, type='expected', se.fit=T) $fit 1 2 3 4 5 6 7 1.3333333 NA 0.2712864 0.2712864 1.4574271 0.3333333 0.3333333 $se.fit [1] 1.0540926 NA 0.2785989 0.2785989 1.1069433 0.3333333 0.3333333 > predict(fit, type='terms', se.fit=T) $fit x 1 -0.7376425 2 NA 3 0.7376425 4 0.7376425 5 0.7376425 6 -0.7376425 7 -0.7376425 $se.fit x 1 0.6278672 2 NA 3 0.6278672 4 0.6278672 5 0.6278672 6 0.6278672 7 0.6278672 > > summary(survfit(fit, list(x=2))) Call: survfit(formula = fit, newdata = list(x = 2)) time n.risk n.event survival std.err lower 95% CI upper 95% CI 1 6 1 3.05e-01 6.50e-01 4.72e-03 1 6 4 2 1.71e-03 1.98e-02 2.33e-13 1 9 1 1 8.52e-12 5.29e-10 1.22e-64 1 > > proc.time() user system elapsed 0.454 0.004 0.455 survival/tests/book5.R0000644000176200001440000001070614613770353014446 0ustar liggesuserslibrary(survival) options(na.action=na.exclude) # preserve missings options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type # Tests of the weighted Cox model # This is section 1.3 of my appendix -- not yet found in the book # though, it awaits the next edition # # Similar data set to test1, but add weights, # a double-death/censor tied time # a censored last subject # The latter two are cases covered only feebly elsewhere. # # The data set testw2 has the same data, but done via replication # aeq <- function(x,y) all.equal(as.vector(x), as.vector(y)) testw1 <- data.frame(time= c(1,1,2,2,2,2,3,4,5), status= c(1,0,1,1,1,0,0,1,0), x= c(2,0,1,1,0,1,0,1,0), wt = c(1,2,3,4,3,2,1,2,1), id = 1:9) # Expanded data set testw2 <- testw1[rep(1:9, testw1$wt), -4] row.names(testw2) <- NULL indx <- match(1:9, testw2$id) # Breslow estimate byhand <- function(beta, newx=0) { r <- exp(beta) loglik <- 11*beta - (log(r^2 + 11*r +7) + 10*log(11*r +5) +2*log(2*r+1)) hazard <- c(1/(r^2 + 11*r +7), 10/(11*r +5), 2/(2*r+1)) xbar <- c((2*r^2 + 11*r)*hazard[1], 11*r/(11*r +5), r*hazard[3]) U <- 11- (xbar[1] + 10*xbar[2] + 2*xbar[3]) imat <- (4*r^2 + 11*r)*hazard[1] - xbar[1]^2 + 10*(xbar[2] - xbar[2]^2) + 2*(xbar[3] - xbar[3]^2) temp <- cumsum(hazard) risk <- c(r^2, 1,r,r,1,r,1,r,1) expected <- risk* temp[c(1,1,2,2,2,2,2,3,3)] # The matrix of weights, one row per obs, one col per death # deaths at 1,2,2,2, and 4 riskmat <- matrix(c(1,1,1,1,1,1,1,1,1, 0,0,1,1,1,1,1,1,1, 0,0,1,1,1,1,1,1,1, 0,0,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,1,1), ncol=5) wtmat <- diag(c(r^2, 2, 3*r, 4*r, 3, 2*r, 1, 2*r, 1)) %*% riskmat x <- c(2,0,1,1,0,1,0,1,0) status <- c(1,0,1,1,1,0,0,1,0) wt <- c(1,2,3,4,3,2,1,2,1) # Table of sums for score and Schoenfeld resids hazmat <- riskmat %*% diag(c(1,3,4,3,2)/colSums(wtmat)) dM <- -risk*hazmat #Expected part dM[1,1] <- dM[1,1] +1 # deaths at time 1 for (i in 2:4) dM[i+1, i] <- dM[i+1,i] +1 dM[8,5] <- dM[8,5] +1 mart <- rowSums(dM) resid <-dM * outer(x, xbar[c(1,2,2,2,3)] ,'-') # Increments to the variance of the hazard var.g <- cumsum(hazard^2/ c(1,10,2)) var.d <- cumsum((xbar-newx)*hazard) list(loglik=loglik, U=U, imat=imat, hazard=hazard, xbar=xbar, mart=c(1,0,1,1,1,0,0,1,0)-expected, expected=expected, score=rowSums(resid), schoen=c(2,1,1,0,1) - xbar[c(1,2,2,2,3)], varhaz=(var.g + var.d^2/imat)* exp(2*beta*newx)) } aeq(byhand(0)$expected, c(1/19, 1/19, rep(103/152, 5), rep(613/456,2))) #verify fit0 <- coxph(Surv(time, status) ~x, testw1, weights=wt, method='breslow', iter=0) fit0b <- coxph(Surv(time, status) ~x, testw2, method='breslow', iter=0) fit <- coxph(Surv(time, status) ~x, testw1, weights=wt, method='breslow') fitb <- coxph(Surv(time, status) ~x, testw2, method='breslow') aeq(resid(fit0, type='mart'), (resid(fit0b, type='mart'))[indx]) aeq(resid(fit0, type='scor'), (resid(fit0b, type='scor'))[indx]) aeq(unique(resid(fit0, type='scho')), unique(resid(fit0b, type='scho'))) truth0 <- byhand(0,pi) aeq(fit0$loglik[1], truth0$loglik) aeq(1/truth0$imat, fit0$var) aeq(truth0$mart, fit0$residuals) aeq(truth0$schoen, resid(fit0, 'schoen')) aeq(truth0$score, resid(fit0, 'score')) sfit <- survfit(fit0, list(x=pi), censor=FALSE) aeq(sfit$std.err^2, truth0$varhaz) aeq(-log(sfit$surv), cumsum(truth0$hazard)) truth <- byhand(0.85955744, .3) aeq(truth$loglik, fit$loglik[2]) aeq(1/truth$imat, fit$var) aeq(truth$mart, fit$residuals) aeq(truth$schoen, resid(fit, 'schoen')) aeq(truth$score, resid(fit, 'score')) sfit <- survfit(fit, list(x=.3), censor=FALSE) aeq(sfit$std.err^2, truth$varhaz) aeq(-log(sfit$surv), (cumsum(truth$hazard)* exp(fit$coefficients*.3))) fit0 summary(fit) resid(fit0, type='score') resid(fit0, type='scho') resid(fit, type='score') resid(fit, type='scho') aeq(resid(fit, type='mart'), (resid(fitb, type='mart'))[indx]) aeq(resid(fit, type='scor'), (resid(fitb, type='scor'))[indx]) aeq(unique(resid(fit, type='scho')), unique(resid(fitb, type='scho'))) rr1 <- resid(fit, type='mart') rr2 <- resid(fit, type='mart', weighted=T) aeq(rr2/rr1, testw1$wt) rr1 <- resid(fit, type='score') rr2 <- resid(fit, type='score', weighted=T) aeq(rr2/rr1, testw1$wt) survival/tests/tmerge2.Rout.save0000644000176200001440000001174114607006645016460 0ustar liggesusers R Under development (unstable) (2021-01-18 r79846) -- "Unsuffered Consequences" Copyright (C) 2021 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(survival) > > # This test is based on a user report that a 0/1 variable would not reset > # to zero. It turned out to be a bug when data2 was not sorted > > baseline <- data.frame(idd=1:5, futime=c(20, 30, 40, 30, 20), + status= c(0, 1, 0, 1, 0)) > tests <- data.frame(idd = c(2,3,3,3,4,4,5), + date = c(25, -1, 15, 23, 17, 19, 14), + onoff= c( 1, 1, 0, 1, 1, 0, 1)) > tests <- tests[c(7,2,6,3,4,1,5),] #scramble data2 > > mydata <- tmerge(baseline, baseline, id=idd, death=event(futime, status)) > mydata <- tmerge(mydata, tests, id=idd, ondrug=tdc(date, onoff)) > > all.equal(mydata$ondrug, c(NA, NA,1, 1,0,1, NA, 1,0, NA, 1)) [1] TRUE > > > # Check out addition of a factor, character, and logical > tests$ff <- factor(tests$onoff, 0:1, letters[4:5]) > tests$fchar <- as.character(tests$ff) > tests$logic <- as.logical(tests$onoff) > tests$num <- rep(1:3, length=nrow(tests)) > > mydata <- tmerge(mydata, tests, id=idd, fgrp= tdc(date, ff), + chgrp = tdc(date, fchar), + options=list(tdcstart="new")) > all.equal(mydata$fgrp, + factor(c(3,3,2,2,1,2,3,2,1,3,2), labels=c("d", "e", "new"))) [1] TRUE > all.equal(mydata$chgrp, + c("d", "e", "new")[c(3,3,2,2,1,2,3,2,1,3,2)]) [1] TRUE > > mydat2 <- tmerge(mydata, tests, id=idd, + logic1 = tdc(date, logic), logic2= event(date, logic)) > all.equal(mydat2$logic1, c(FALSE, TRUE, NA)[as.numeric(mydat2$fgrp)]) [1] TRUE > all.equal(mydat2$logic2, c(FALSE, TRUE, FALSE, FALSE, TRUE, FALSE, TRUE, + FALSE, FALSE, TRUE, FALSE)) [1] TRUE > > mydat3 <- tmerge(mydata, tests, id=idd, + xx = tdc(date, num), options=list(tdcstart=5)) > all.equal(mydat3$xx, c(5,5,3,2,1,2,5,1,3,5,1)) [1] TRUE > temp <- tmerge(mydata, tests, id=idd, xx=tdc(date, num, 5)) # alternate default > all.equal(mydat3$xx, temp$xx) [1] TRUE > > # Multiple chained calls. > temp <- outer(cgd0$id, 100*0:6, "+") > colnames(temp) <- paste0("x", 1:7) # add a time dependent covariate too > test <- cbind(cgd0, temp) > > newcgd <- tmerge(data1=cgd0[, 1:13], data2=cgd0, id=id, tstop=futime) > newcgd <- tmerge(newcgd, test, id=id, infect = event(etime1), xx=cumtdc(etime1, x1, 0)) > newcgd <- tmerge(newcgd, test, id=id, infect = event(etime2), xx=cumtdc(etime2, x2)) > newcgd <- tmerge(newcgd, test, id=id, infect = event(etime3), xx=cumtdc(etime3, x3)) > newcgd <- tmerge(newcgd, test, id=id, infect = event(etime4), xx=cumtdc(etime4, x4)) > newcgd <- tmerge(newcgd, test, id=id, infect = event(etime5), xx=cumtdc(etime5, x5)) > newcgd <- tmerge(newcgd, test, id=id, infect = event(etime6), xx=cumtdc(etime6, x6)) > newcgd <- tmerge(newcgd, test, id=id, infect = event(etime7), xx=cumtdc(etime7, x7)) > newcgd <- tmerge(newcgd, newcgd, id, enum=cumtdc(tstart)) > all.equal(dim(newcgd), c(203,18)) [1] TRUE > all.equal(as.vector(table(newcgd$infect)), c(127, 76)) [1] TRUE > temp <- with(newcgd, ifelse(enum==1, 0, id + (enum-2)*100)) > temp2 <- tapply(temp, newcgd$id, cumsum) > all.equal(newcgd$xx, unlist(temp2), check.attributes=FALSE) [1] TRUE > tcount <- attr(newcgd, "tcount") > all(tcount[,1:3] ==0) # no early, late, or gap [1] TRUE > > # table with number of subjects who have etime1 < futime (row 1) > # and etime1==futime (row 2) > # the table command ignores the missings > temp <- subset(cgd0, select=etime1:etime7) > counts <- sapply(temp, function(x) + as.vector(table(factor(x>= cgd0$futime, c(FALSE, TRUE))))) > > all(tcount[c(1,3,5,7,9,11,13), c("within", "trailing")] == t(counts)) [1] TRUE > > > # > # Merging with a date as the time variable. In this case tstart/tstop are required > # A default start of 0 has no meaning > # > base2 <- baseline > base2$date1 <- as.Date("1953-03-10") # everyone enrolled that day > base2$date2 <- as.Date("1953-03-10") + base2$futime > base2$futime <- NULL > test2 <- tests > test2$date <- as.Date("1953-03-10") + test2$date > > mydata2 <- tmerge(base2, base2, id=idd, death=event(date2, status), + tstart = date1, tstop= date2, + options=list(tstartname="date1", tstopname="date2")) > mydata2 <- tmerge(mydata2, test2, id=idd, ondrug=tdc(date, onoff)) > all.equal(mydata$ondrug, c(NA, NA,1, 1,0,1, NA, 1,0, NA, 1)) [1] TRUE > > proc.time() user system elapsed 1.050 0.105 1.149 survival/tests/concordance.R0000644000176200001440000001277514613770353015715 0ustar liggesuserslibrary(survival) options(na.action=na.exclude) # preserve missings options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type # # Simple tests of concordance # aeq <- function(x,y, ...) all.equal(as.vector(x), as.vector(y), ...) grank <- function(x, time, grp, wt) unlist(tapply(x, grp, rank)) grank2 <- function(x, time, grp, wt) { #for case weights if (length(wt)==0) wt <- rep(1, length(x)) z <- double(length(x)) for (i in unique(grp)) { indx <- which(grp==i) temp <- tapply(wt[indx], x[indx], sum) temp <- temp/2 + c(0, cumsum(temp)[-length(temp)]) z[indx] <- temp[match(x[indx], names(temp))] } z } # Concordance by brute force. O(n^2) algorithm, but ok for n<500 or so allpair <- function(x, time, status, wt, all=FALSE) { if (missing(wt)) wt <- rep(1, length(x)) count <- sapply(which(status==1), function(i) { atrisk <- (time > time[i]) | (time==time[i] & status==0) temp <- tapply(wt[atrisk], factor(sign(x[i] -x[atrisk]), c(1, -1, 0)), sum) wt[i]* c(ifelse(is.na(temp), 0, temp), (sum(wt[time==time[i] & status==1]) - wt[i])/2) }) rownames(count) <- c("concordant", "discordant", "tied.x", "tied.y") if (all) { colnames(count) <- time[status==1] t(count) } else rowSums(count) } # The std of C = std(numerator)/(number of comparable pairs) # The information matrix of a Cox model is = to the var(C-D) cfun <- function(fit) fit$cvar * sum(fit$count[1:3])^2 tdata <- aml[aml$x=='Maintained', c("time", "status")] tdata$x <- c(1,6,2,7,3,7,3,8,4,4,5) tdata$wt <- c(1,2,3,2,1,2,3,4,3,2,1) fit <- concordance(Surv(time, status) ~x, tdata) aeq(fit$count[1:4], c(24,14,2,0)) cfit <- coxph(Surv(time, status) ~ tt(x), tdata, tt=grank, method='breslow', iter=0, x=T) cdt <- coxph.detail(cfit) aeq(sum(cdt$imat), cfun(fit)) aeq(sum(2*cdt$score), diff(fit$count[1:2])) aeq(with(tdata, allpair(x, time, status)), c(14,24,2,0)) # Lots of ties tempy <- Surv(c(1,2,2,2,3,4,4,4,5,2), c(1,0,1,0,1,0,1,1,0,1)) tempx <- c(5,5,4,4,3,3,7,6,5,4) fit2 <- concordance(tempy ~ tempx) addxy <- function(x) c(x[1:3], sum(x[4:5])) aeq(addxy(fit2$count), allpair(tempx, tempy[,1], tempy[,2])) cfit2 <- coxph(tempy ~ tt(tempx), tt=grank, method='breslow', iter=0) aeq(cfit2$var, 1/cfun(fit2)) # Direct call fit2b <- concordancefit(tempy, tempx) fit2c <- concordancefit(tempy, tempx, std.err=FALSE) all.equal(fit2[1:5], fit2b) all.equal(fit2b[1:3], fit2c) # Bigger data fit3 <- concordance(Surv(time, status) ~ age, lung, reverse=TRUE) aeq(addxy(fit3$count), allpair(lung$age, lung$time, lung$status-1)) cfit3 <- coxph(Surv(time, status) ~ tt(age), lung, iter=0, method='breslow', tt=grank, x=T) cdt <- coxph.detail(cfit3) aeq(sum(cdt$imat), cfun(fit3)) aeq(2*sum(cdt$score), diff(fit3$count[2:1])) # More ties fit4 <- concordance(Surv(time, status) ~ ph.ecog, lung, reverse=TRUE) aeq(addxy(fit4$count), allpair(lung$ph.ecog, lung$time, lung$status-1)) aeq(fit4$count[1:5], c(8392, 4258, 7137, 21, 7)) cfit4 <- coxph(Surv(time, status) ~ tt(ph.ecog), lung, iter=0, method='breslow', tt=grank) aeq(1/cfit4$var, cfun(fit4)) # Case weights fit5 <- concordance(Surv(time, status) ~ x, tdata, weights=wt, reverse=TRUE) fit6 <- concordance(Surv(time, status) ~x, tdata[rep(1:11,tdata$wt),]) aeq(addxy(fit5$count), with(tdata, allpair(x, time, status, wt))) aeq(fit5$count[1:4], c(70, 91, 7, 0)) # checked by hand aeq(fit5$count[1:3], fit6$count[c(2,1,3)]) #spurious "tied on time" values, ignore aeq(fit5$std, fit6$std) cfit5 <- coxph(Surv(time, status) ~ tt(x), tdata, weights=wt, iter=0, method='breslow', tt=grank2) cfit6 <- coxph(Surv(time, status) ~ tt(x), tdata[rep(1:11,tdata$wt),], iter=0, method='breslow', tt=grank) aeq(1/cfit6$var, cfun(fit6)) aeq(cfit5$var, cfit6$var) # Start, stop simplest cases fit7 <- concordance(Surv(rep(0,11), time, status) ~ x, tdata) aeq(fit7$count, fit$count) aeq(fit7$std.err, fit$std.err) fit7 <- concordance(Surv(rep(0,11), time, status) ~ x, tdata, weights=wt) aeq(fit5$count, fit7$count[c(2,1,3:5)]) #one reversed, one not # Multiple intervals for some, but same risk sets as tdata tdata2 <- data.frame(time1=c(0,3, 5, 6,7, 0, 4,17, 7, 0,16, 2, 0, 0,9, 5), time2=c(3,9, 13, 7,13, 18, 17,23, 28, 16,31, 34, 45, 9,48, 60), status=c(0,1, 1, 0,0, 1, 0,1, 0, 0,1, 1, 0, 0,1, 0), x = c(1,1, 6, 2,2, 7, 3,3, 7, 3,3, 8, 4, 4,4, 5), wt= c(1,1, 2, 3,3, 2, 1,1, 2, 3,3, 4, 3, 2,2, 1)) fit8 <- concordance(Surv(time1, time2, status) ~x, tdata2, weights=wt, reverse=TRUE) aeq(fit5$count, fit8$count) aeq(fit5$std.err, fit8$std.err) cfit8 <- coxph(Surv(time1, time2, status) ~ tt(x), tdata2, weights=wt, iter=0, method='breslow', tt=grank2) aeq(1/cfit8$var, cfun(fit8)) # Stratified tdata3 <- data.frame(time1=c(tdata2$time1, rep(0, nrow(lung))), time2=c(tdata2$time2, lung$time), status = c(tdata2$status, lung$status -1), x = c(tdata2$x, lung$ph.ecog), wt= c(tdata2$wt, rep(1, nrow(lung))), grp=rep(1:2, c(nrow(tdata2), nrow(lung)))) fit9 <- concordance(Surv(time1, time2, status) ~x + strata(grp), data=tdata3, weights=wt, reverse=TRUE) aeq(fit9$count[1,], fit5$count) aeq(fit9$count[2,], fit4$count) survival/tests/infcox.R0000644000176200001440000000232114613770353014707 0ustar liggesusersoptions(na.action=na.exclude) # preserve missings options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type library(survival) # # A test to exercise the "infinity" check on 2 variables # test3 <- data.frame(futime=1:12, fustat=c(1,0,1,0,1,0,0,0,0,0,0,0), x1=rep(0:1,6), x2=c(rep(0,6), rep(1,6))) # This will produce a warning message, which is the point of the test. # The variance is close to singular and gives different answers # on different machines fit3 <- coxph(Surv(futime, fustat) ~ x1 + x2, test3, iter=25) all(fit3$coef < -22) all.equal(round(fit3$log, 4),c(-6.8669, -1.7918)) # # Actual solution # time 1, 12 at risk, 3 each of x1/x2 = 00, 01, 10, 11 # time 2, 10 at risk, 2, 3, 2 , 3 # time 5, 8 at risk, 1, 3, 1, 3 # Let r1 = exp(beta1), r2= exp(beta2) # loglik = -log(3 + 3r1 + 3r2 + 3 r1*r2) - log(2 + 2r1 + 3r2 + 3 r1*r2) - # log(1 + r1 + 3r2 + 3 r1*r2) true <- function(beta) { r1 <- exp(beta[1]) r2 <- exp(beta[2]) loglik <- -log(3*(1+ r1+ r2+ r1*r2)) - log(2+ 2*r1 + 3*r2 + 3*r1*r2) - log(1 + r1 + 3*r2 + 3*r1*r2) loglik } all.equal(fit3$loglik[2], true(fit3$coef), check.attributes=FALSE) survival/tests/counting.Rout.save0000644000176200001440000000551714613770353016746 0ustar liggesusers R Under development (unstable) (2024-04-17 r86441) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: aarch64-unknown-linux-gnu R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > options(na.action=na.exclude) # preserve missings > options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type > library(survival) > > # Create a "counting process" version of the simplest test data set > # > test1 <- data.frame(time= c(9, 3,1,1,6,6,8), + status=c(1,NA,1,0,1,1,0), + x= c(0, 2,1,1,1,0,0)) > > test1b<- list(start= c(0, 3, 0, 0, 5, 0, 6,14, 0, 0, 10,20,30, 0), + stop = c(3,10, 10, 5,20, 6,14,20, 30, 10,20,30,40, 10), + status=c(0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0), + x= c(1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, NA), + id = c(3, 3, 4, 5, 5, 6, 6, 6, 7, 1, 1, 1, 1, 2)) > > aeq <- function(x,y) all.equal(as.vector(x), as.vector(y)) > # > # Check out the various residuals under an Efron approximation > # > fit0 <- coxph(Surv(time, status)~ x, test1, iter=0) > fit <- coxph(Surv(time, status) ~x, test1) > fit0b <- coxph(Surv(start, stop, status) ~ x, test1b, iter=0) > fitb <- coxph(Surv(start, stop, status) ~x, test1b) > fitc <- coxph(Surv(time, status) ~ offset(fit$coefficients*x), test1) > fitd <- coxph(Surv(start, stop, status) ~ offset(fit$coefficients*x), test1b) > > aeq(fit0b$coefficients, fit0$coefficients) [1] TRUE > > aeq(resid(fit0), resid(fit0b, collapse=test1b$id)) [1] TRUE > aeq(resid(fit), resid(fitb, collapse=test1b$id)) [1] TRUE > aeq(resid(fitc), resid(fitd, collapse=test1b$id)) [1] TRUE > aeq(resid(fitc), resid(fit)) [1] TRUE > > aeq(resid(fit0, type='score'), resid(fit0b, type='score', collapse=test1b$id)) [1] TRUE > aeq(resid(fit, type='score'), resid(fitb, type='score', collapse=test1b$id)) [1] TRUE > > aeq(resid(fit0, type='scho'), resid(fit0b, type='scho', collapse=test1b$id)) [1] TRUE > aeq(resid(fit, type='scho'), resid(fitb, type='scho', collapse=test1b$id)) [1] TRUE > > # The two survivals will have different censoring times > # nrisk, nevent, surv, and std should be the same > temp1 <- survfit(fit, list(x=1), censor=FALSE) > temp2 <- survfit(fitb, list(x=1), censor=FALSE) > all.equal(unclass(temp1)[c(3,4,6,8)], unclass(temp2)[c(3,4,6,8)]) [1] TRUE > > > > proc.time() user system elapsed 0.436 0.012 0.445 survival/tests/r_user.R0000644000176200001440000000152514607006645014724 0ustar liggesusersoptions(na.action=na.exclude) #preserve length of missings library(survival) # # Check out using a "user specified" distribution # mydist <- c(survreg.distributions$extreme, survreg.distributions$weibull[-1]) mydist$name <- "Weibull2" mydist$dist <- NULL fit1 <- survreg(Surv(time, status) ~ age + ph.ecog, lung) fit2 <- survreg(Surv(time, status) ~ age + ph.ecog, lung, dist=mydist) all.equal(fit1$coef, fit2$coef) all.equal(fit1$var, fit2$var) # # And with an data set containing interval censoring # idat <- read.table('data.interval', skip=3, header=T, sep=',') fit1 <- survreg(Surv(ltime, rtime, type='interval2') ~ age + ecog.ps, idat) fit2 <- survreg(Surv(ltime, rtime, type='interval2') ~ age + ecog.ps, data=idat, dist=mydist) all.equal(fit1$coef, fit2$coef) all.equal(fit1$var, fit2$var) all.equal(fit1$log, fit2$log) survival/tests/coxsurv4.Rout.save0000644000176200001440000000514614613770353016713 0ustar liggesusers R Under development (unstable) (2024-04-17 r86441) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: aarch64-unknown-linux-gnu R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(survival) > > # Strata by covariate interactions, a case pointed out in early 2011 > # by Frank Harrell, which as it turns out had never been computed > # correctly by any version of the package. Which shows how often this > # case arises in practice. > # > aeq <- function(x, y, ...) all.equal(as.vector(x), as.vector(y)) > fit1 <- coxph(Surv(time, status) ~ wt.loss + age*strata(sex) + strata(ph.ecog), + data=lung) > tdata <- data.frame(wt.loss=c(10,5,0,10, 15,20,25), + age =c(50,60,50,60,70,40,21), + sex =c(1,1,2,2,1,1,1), + ph.ecog=c(0,0,1,1,2,2,2)) > surv1 <- survfit(fit1, newdata=tdata) > > fit2 <- coxph(Surv(time, status) ~ wt.loss + age + I(age*0), data=lung, + init=fit1$coefficients, iter=0, subset=(sex==1 & ph.ecog==0)) > fit2$var <- fit1$var > > surv2 <- survfit(fit2, newdata=list(wt.loss=c(10,5), age=c(50,60))) > s1 <- surv1[1:2] > aeq(s1$surv, surv2$surv) #first a vector, second a matrix [1] TRUE > aeq(s1$std.err, surv2$std.err) [1] TRUE > aeq(s1[1]$time, surv2$time) [1] TRUE > aeq(s1[1]$n.event, surv2$n.event) [1] TRUE > > fit3 <- coxph(Surv(time, status) ~ wt.loss + age + I(age*1), + data=lung, init=fit1$coefficients, iter=0, + subset=(sex==2 & ph.ecog==1)) > fit3$var <- fit1$var > surv3 <- survfit(fit3, newdata=list(wt.loss=c(0,10), age=c(50,60))) > aeq(surv1[3:4]$surv, surv3$surv) [1] TRUE > aeq(surv1[3:4]$std, surv3$std) [1] TRUE > > fit4 <- coxph(Surv(time, status) ~ wt.loss + age + I(age*0), + data=lung, init=fit1$coefficients, iter=0, + subset=(sex==1 & ph.ecog==2)) > fit4$var <- fit1$var > surv4 <- survfit(fit4, newdata=list(wt.loss=c(15,20,25), age=c(70,40,21))) > > aeq(surv1[5:7]$surv, surv4$surv) [1] TRUE > aeq(surv1[5:7]$std.err, surv4$std.err) [1] TRUE > aeq(surv1[5]$n.risk, surv4$n.risk) [1] TRUE > > > proc.time() user system elapsed 0.421 0.016 0.434 survival/tests/tt2.R0000644000176200001440000000720214607324372014134 0ustar liggesusers# A reprise of tt.R, using (time1, time2) data. library(survival) library(splines) aeq <- function(x, y) all.equal(as.vector(x), as.vector(y)) # A contrived example for the tt function # mkdata <- function(n, beta) { age <- round(runif(n, 20, 60)) x <- rbinom(n, 1, .5) futime <- rep(40, n) # everyone has 40 years of follow-up entry <- pmax(0, seq(-10, 30, length=n)) # 1/4 enter at 0 entry <- round(entry) status <- rep(0, n) dtime <- runif(n/2, 1, 40) # 1/2 of them die dtime <- sort(dtime) # The risk is set to beta[1]*x + beta[2]* f(current_age) # where f= 0 up to age 40, rises linear to age 70, flat after that for (i in 1:length(dtime)) { atrisk <- (futime >= dtime[i] & entry < dtime[i]) c.age <- age + dtime age2 <- pmin(30, pmax(0, c.age-40)) xbeta <- beta[1]*x + beta[2]*age2 # Select a death according to risk risk <- ifelse(atrisk, exp(xbeta), 0) dead <- sample(1:n, 1, prob=risk/sum(risk)) futime[dead] <- dtime[i] status[dead] <- 1 } out <- data.frame(time1= entry, time2=round(futime,1), status=status, age=age, x=x, risk=risk, casewt = sample(1:5, n, replace=TRUE), grp = sample(1:15, n, replace=TRUE), id= 1:n) subset(out, time1 < time2) } set.seed(1953) # a good year # Make n larger for the (time1, time2) case; more stress. tdata <- mkdata(250, c(log(1.5), 2/30)) # data set has many ties #tdata <- mkdata(100, c(log(1.5), 2/30)) # data set has many ties tdata$strat <- floor(tdata$grp/10) dtime <- sort(unique(tdata$time2[tdata$status==1])) data2 <- survSplit(Surv(time1, time2, status) ~., tdata, cut=dtime) data2$c.age <- data2$age + data2$time2 # current age # fit1 uses data at the event times, fit2$c.age might have a # wider range due to censorings. To make the two fits agree # fix the knots. I know a priori that 20 to 101 will cover it. ns2 <- function(x) ns(x, Boundary.knots=c(20, 101), knots=c(45, 60, 75)) fit1 <- coxph(Surv(time1, time2, status)~ x + tt(age), tdata, tt= function(x, t, ...) ns2(x+t)) fit2 <- coxph(Surv(time1, time2, status) ~ x + ns2(c.age), data2) aeq(coef(fit1), coef(fit2)) aeq(vcov(fit1), vcov(fit2)) # # Check that cluster, weight, and offset were correctly expanded # fit3a <- coxph(Surv(time1, time2, status)~ x + tt(age), tdata, weights=casewt, tt= function(x, t, ...) ns2(x+t), x=TRUE) fit3b <- coxph(Surv(time1, time2, status) ~ x + ns2(c.age), data2, weights=casewt) aeq(coef(fit3a), coef(fit3b)) aeq(vcov(fit3a), vcov(fit3b)) fit4a <- coxph(Surv(time1, time2, status)~ x + tt(age), tdata, tt= function(x, t, ...) ns2(x+t), cluster=grp) fit4b <- coxph(Surv(time1, time2, status) ~ x + ns2(c.age), data2, cluster=grp) fit4c <- coxph(Surv(time1, time2, status) ~ x + ns2(c.age) + cluster(grp), data2) aeq(coef(fit4a), coef(fit4b)) aeq(vcov(fit4a), vcov(fit4b)) aeq(coef(fit4a), coef(fit4c)) aeq(vcov(fit4a), vcov(fit4c)) fit5a <- coxph(Surv(time1, time2, status)~ x + tt(age) + offset(grp/10), tdata, tt= function(x, t, ...) ns2(x+t),) fit5b <- coxph(Surv(time1, time2, status) ~ x + ns2(c.age)+ offset(grp/10) , data=data2) aeq(coef(fit5a), coef(fit5b)) aeq(vcov(fit5a), vcov(fit5b)) # Check that strata is correct fit6a <- coxph(Surv(time1, time2, status) ~ x + tt(age) + strata(strat), tdata, tt = function(x, t, ...) (x+t)^2, x=TRUE) fit6b <- coxph(Surv(time1, time2, status) ~ x + I(c.age^2) +strata(strat), data2) aeq(coef(fit6a), coef(fit6b)) aeq(vcov(fit6a), vcov(fit6b)) survival/tests/tmerge.R0000644000176200001440000000654214607006645014714 0ustar liggesuserslibrary(survival) # Very simple tmerge example, for checking data1 <- data.frame(idd = c(1,5,4,3,2,6), x1=1:6, age=50:55) data2 <- data.frame(idd = c(2,5,1,2,1), x2=5:1, age=48:44) test1 <- tmerge(data1, data1, id=idd, death=event(age)) test2 <- tmerge(test1, data2, id=idd, zed=tdc(age, x2)) all.equal(test2$idd, c(1,1,1,5,5,4,3,2,2,2,6)) all.equal(test2$tstop, c(44, 46, 50, 47, 51, 52, 53, 45, 48, 54, 55)) all.equal(test2$death, c(0,0,1,0,1,1,1,0,0,1,1)) all.equal(test2$zed, c(NA, 1, 3,NA, 4, NA, NA, NA, 2, 5, NA)) #add in a cumtdc variable and cumevent variable data3 <- data.frame(idd=c(5,5,1,1,6,4,3,2), age=c(45, 50, 44, 48, 53,-5,0,20), x = c(1,5,2,3,7, 4,6,8)) test3 <- tmerge(test2, data3, id=idd, x=cumtdc(age, x), esum = cumevent(age)) all.equal(test3$x, c(NA,2,2,5,NA, 1,1,6,4,6, NA, 8,8,8, NA,7)) all.equal(test3$esum, c(1,0,2,0,1,0,2,0,0,0,1,0,0,0,1,0)) # An example from Brendan Caroll # It went wrong because the data is not sorted ages <- data.frame( id = c(1L, 2L, 5L, 6L, 9L, 10L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 20L, 21L, 24L, 26L, 27L, 28L, 29L, 30L, 31L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 42L, 45L, 46L, 43L, 48L, 49L, 50L, 51L, 52L, 54L, 55L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 68L, 69L, 70L, 71L, 72L, 73L, 74L, 75L, 8L, 19L, 22L, 23L, 33L, 41L), age = c(13668, 21550, 15249, 21550, 16045, 21550, 14976, 14976, 6574, 21550, 4463, 16927, 16927, 15706, 4567, 21306, 17235, 22158, 19692, 17632, 17597, 4383, 5811, 7704, 5063, 17351, 17015, 16801, 4383, 5080, 13185, 12604, 19784, 5310, 15369, 13239, 1638, 21323, 10914, 21262, 7297, 17214, 17508, 14199, 14062, 2227, 8434, 4593, 14429, 21323, 4782, 10813, 2667, 2853, 5709, 3140, 12237, 7882, 21550, 15553, 16466, 16621, 19534, 21842)) transitions <- data.frame(id=c(2,2, 8, 19, 22, 23, 24, 31, 33, 41, 43, 52, 55, 66, 6, 10, 43), transition = c(18993, 13668, 15706, 11609, 4023, 9316, 16193, 1461, 4584, 17824, 11261, 16818, 10670, 15479, 15249, 15887,3713)) # Unsorted tdata <- tmerge(ages, ages, id=id, tstop=age) newdata<- tmerge(tdata, transitions, id=id, enum=cumtdc(transition)) # sorted test1 <- ages[order(ages$id),] test2 <- tmerge(test1, test1, id=id, tstop=age) tran2 <- transitions[order(transitions$id, transitions$transition),] test3 <- tmerge(test2, tran2, id=id, enum=cumtdc(transition)) all.equal(attr(newdata,'tcount'), attr(test3, 'tcount')) test4 <- newdata[order(newdata$id, newdata$tstart),] all.equal(test3, test4, check.attributes=FALSE) #rownames differ # An extension of the first example, where the second data set has a surfeit # of rows: some before the start, some doubled up in the middle data1 <- data.frame(idd = c(1,5,4,3,2,6), x1=1:6, age=50:55) data3 <- data.frame(idd = c(2,5,1,2,1,2,2,1,1,7,3,3), x2=c(5:1, 10:4), age=c(48:44, -4, -3, -1, -2, 35, 62,61)) test1 <- tmerge(data1, data1, id=idd, death=event(age)) test3 <- tmerge(test1, data3, id= idd, xx = tdc(age, x2), cx=cumtdc(age, x2, 2)) all.equal(test3$idd, c(1,1,1,5,5,4,3,2,2,2,6)) all.equal(test3$xx, c(8, 1, 3, NA, 4, NA, NA, 9, 2, 5, NA)) all.equal(test3$cx, c(17, 18, 21, 2, 6, 2,2, 21, 23, 28, 2)) survival/tests/residsf.R0000644000176200001440000001556414607325257015077 0ustar liggesuserslibrary(survival) aeq <- function(x, y, ...) all.equal(as.vector(x), as.vector(y), ...) # # Tests of the residuals.survfit function # # The influence argument of survfit returns all the residuals at every time # point, but for large data sets the result will be huge. This function uses # a different algorithm which will be faster when the number of time # points being reported out is small. # Start with small data sets and work up. First simple survival. test1 <- data.frame(time= c(9, 3,1,1,6,6,8), status=c(1,NA,1,0,1,1,0), x= c(0, 2,1,1,1,0,0)) indx <- order(test1$time[!is.na(test1$status)]) s1 <- survfit(Surv(time, status) ~1, test1, influence=3) # true influence for survival and hazard, in time order inf1 <- matrix(c(-20, rep(4,5), -10, 2, -13, -13, 17, 17, rep(0,6))/144, ncol=3, dimnames=list(1:6, c(1,6,9))) inf2 <- matrix(c(10, rep(-2,5), 10, -2, 7,7, -11, -11)/72, ncol=2) aeq(s1$influence.surv[indx,], inf1[, c(1,2,2,3)]) aeq(s1$influence.chaz[indx,], inf2[,c(1,2,2,2)]) r1 <- resid(s1, times=c(0, 3, 5, 8, 10)) all(r1[,1] ==0) aeq(r1[indx,2:5], inf1[,c(1,1,2,3)]) r2 <- resid(s1, times=c(0, 3, 5, 8, 10), type="cumhaz") all(r2[,1] ==0) aeq(r2[indx,2:5], inf2[,c(1,1,2,2)]) # AUC is a sum of rectangles, height= S, width based on time points, # so the leverage is a weighted sum of dfbeta values for S r3 <- resid(s1, times=c(1,4, 8, 10), type="sojourn") inf3 <- inf1 %*% cbind(c(0,0,0), c(3,0,0), c(5,2,0), c(5,3,1)) aeq(r3[indx,], inf3) # exp(Nelson-Aalen) s2 <- survfit(Surv(time, status) ~1, test1, stype=2, influence=3) r4 <- resid(s2, times=c(0, 3, 5, 8, 10), type="pstate") inf4 <- -inf2[, c(1,2,2)] %*% diag(s2$surv[c(1,2,4)]) aeq(r4[indx,2:5], inf4[,c(1,1,2,3)]) aeq(s2$influence.surv[indx,], inf4[,c(1,2,2,3)]) r5 <- resid(s2, times=c(1,4, 8, 10), type="sojourn") inf5 <- inf4 %*% cbind(c(0,0,0), c(3,0,0), c(5,2,0), c(5,3,1)) aeq(r5[indx,], inf5) # Fleming-Harrington # This one is hard, the code still fails s3 <- survfit(Surv(time, status) ~1, test1, ctype=2, influence=2) inf6 <- matrix(c( rep(c(5, -1), c(1, 5))/36, c(5,-1)/36, c(21,21,-29, -29)/144), ncol=2) # r6 <- resid(s3, times =c(0, 3, 5, 8, 10), type="cumhaz") # Part 2: single state, with start/stop data, multiple curves, # second curve is identical to test1 # Then put it out of order. test2 <- data.frame(t1 =c(1, 2, 5, 2, 1, 7, 3, 4, 8, 8, 0,0,0,0,0,0), t2 =c(2, 3, 6, 7, 8, 9, 9, 9,14, 17, 9, 1, 1, 6, 6, 8), event=c(1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0), x = rep(1:2, c(10, 6)), id = 1:16) s4 <- survfit(Surv(t1, t2, event) ~ x, test2, influence=TRUE) r6 <- resid(s4, time=c(4, 8, 10), type="surv") aeq(r6[1:10,], s4$influence.surv[[1]][,c(2, 5, 6)]) aeq(r6[11:16,],s4$influence.surv[[2]][,c(1,3, 4)]) aeq(r6[11:16,2:3], r1[,4:5]) r7 <- resid(s4, time=c(4, 8, 10), type="cumhaz") aeq(r7[1:10,], s4$influence.chaz[[1]][,c(2, 5, 6)]) aeq(r7[11:16,],s4$influence.chaz[[2]][,c(1,3, 4)]) aeq(r7[11:16, 2:3], r2[,4:5]) # Compute the AUC at times 8 and 10, the first is a reporting time, the # second is in between r8 <- resid(s4, time= c(8, 10), type="auc") aeq(r8[11:16,], r3[,3:4]) # curve1: inf1 <- s4$influence.surv[[1]] d1 <- inf1[,1:4] %*% diff(s4$time[1:5]) d2 <- inf1[,1:6] %*% diff(c(s4$time[1:6], 10)) aeq(cbind(d1, d2), r8[1:10,]) # curve2: inf2 <- s4$influence.surv[[2]] d3 <- inf2[,1:2] %*% diff(s4$time[9:11]) d4 <- inf2[,1:4] %*% diff(c(s4$time[9:12], 10)) aeq(cbind(d3, d4), r8[11:16,]) # scramble the data reord <- c(1,3,5,7,9,11,13, 15,2,4,6,8,10,12,14,16) test2b <-test2[reord,] s5 <- survfit(Surv(t1, t2, event) ~x, test2b, influence=TRUE) r9 <- resid(s5, time=c(4, 8, 10), type="surv") aeq(r6[reord,], r9) # # For multistate use the same data set as mstate.R, where results have been # worked out by hand. Except, make it harder by adding an initial state. # tdata <- data.frame(id= LETTERS[3*c(1, 1, 1, 2, 3, 4, 4, 4, 5, 5)], t1= c(0, 4, 9, 1, 2, 0, 2, 8, 1, 3), t2= c(4, 9, 10, 5, 9, 2, 8, 9, 3, 11), st= c(1, 2, 1, 2, 3, 1, 3, 0, 3, 0), i0= c(1, 2, 3, 2, 1, 1, 2, 4, 3, 4), wt= 1:10) tdata$st <- factor(tdata$st, c(0:3), labels=c("censor", "a", "b", "c")) tdata$i0 <- factor(tdata$i0, 1:4, labels=c("entry", "a", "b", "c")) if (FALSE) { #useful picture check <- survcheck(Surv(t1, t2, st) ~1, tdata, istate=i0, id=id) plot(c(0,11), c(1,5.5), type='n', xlab="Time", ylab= "Subject") tdata$idx <- as.numeric(factor(tdata$id)) with(tdata, segments(t1+.1, idx, t2, idx, col=as.numeric(check$istate))) with(subset(tdata, st!= "censor"), text(t2, idx+.15, as.character(st))) with(tdata, text((t1+t2)/2, idx+.25, wt)) with(subset(tdata, !duplicated(id)), text(t1, idx+.15, as.character(i0))) #segments are colored by current state, case weight in center, events at ends abline(v=c(2:5, 8:11), lty=3, col='gray') } tfun <- function(data=tdata) { reorder <- c(10, 9, 1, 2, 5, 4, 3, 7, 8, 6) new <- data[reorder,] new } mtest2 <- tfun(tdata) # scrambled version mfit1 <- survfit(Surv(t1, t2, st) ~ 1, tdata, id=id, istate=i0, influence=1) test1 <- resid(mfit1, times= mfit1$time, collapse=TRUE) aeq(test1, aperm(mfit1$influence, c(1,3,2))) aeq(sqrt(apply(test1^2, 2:3, sum)), t(mfit1$std.err)) test1a <- resid(mfit1, times=c(3, 7, 9), method=1, collapse=TRUE) minf <- aperm(mfit1$influence, c(1,3,2)) # influence has time second, resid third aeq(test1a, minf[,,c(2,4,6)]) # interpolated times work test2 <- resid(mfit1, times= mfit1$time, collapse=TRUE, type="cumhaz") aeq(sqrt(apply(test2^2, 2:3, sum)), t(mfit1$std.chaz)) test3 <- resid(mfit1, times= mfit1$time, collapse=TRUE, type="auc") aeq(sqrt(apply(test3^2, 2:3, sum)), t(mfit1$std.auc)) # Do a couple AUC by hand atime <- c(1, 5.6, 8.1, 15) test4 <- resid(mfit1, times=atime, type="auc", collapse=TRUE) all(test4[,,1] ==0) # before the first time # 5.6 covers rectangles of widths 1,1,1, and .6 after times 2, 3,4 and 5 temp <- apply(test1, 1:2, function(x) sum(x*c(1,1,1, .6, 0,0,0,0))) aeq(temp, test4[,,2]) temp <- apply(test1, 1:2, function(x) sum(x*c(1,1,1, 3, .1, 0, 0, 0))) aeq(temp, test4[,,3]) temp <- apply(test1, 1:2, function(x) sum(x*c(1,1,1, 3, 1, 1, 1, 4))) aeq(temp, test4[,,4]) # # competing risks # mdata <- mgus2 mdata$etime <- with(mdata, ifelse(pstat==1, ptime, futime)) temp <- with(mdata, ifelse(pstat==1, 1, 2*death)) mdata$event <- factor(temp, 0:2, c("censor", "PCM", "Death")) mfit <- survfit(Surv(etime, event) ~1, mdata, influence=1) rr <- resid(mfit, time=360) index <- sum(mfit$time <= 360) aeq(mfit$influence.pstate[,index,], rr) survival/tests/concordance3.Rout.save0000644000176200001440000001426214613770353017456 0ustar liggesusers R Under development (unstable) (2024-04-17 r86441) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: aarch64-unknown-linux-gnu R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(survival) > options(na.action=na.exclude) > aeq <- function(x,y,...) all.equal(as.vector(x), as.vector(y), ...) > > # Make sure strata is retained, and that the overall variance is correct > fit1 <- coxph(Surv(time, status) ~ age + offset(ph.ecog*0) +strata(sex), lung) > fit2 <- coxph(Surv(time, status) ~ age + ph.ecog +strata(sex), lung) > > test <- concordance(fit1, fit2, influence=1) > > ksex <- model.frame(fit1)[["strata(sex)"]] > test1 <- concordance(fit1$y ~ fit1$linear.predictors + strata(ksex), + reverse=TRUE, influence=1) > test2 <- concordance(fit1$y ~ fit2$linear.predictors + strata(ksex), + reverse=TRUE, influence=1) > aeq(test$concordance, c(test1$concordance, test2$concordance)) [1] TRUE > aeq(diag(test$var), c(test1$var[1], test2$var[1])) [1] TRUE > aeq(test$dfbeta, cbind(test1$dfbeta, test2$dfbeta)) [1] TRUE > > cvec <- c(-1, 1) > aeq(cvec %*% test$var %*% cvec, sum((test1$dfbeta - test2$dfbeta)^2)) [1] TRUE > > # Time weights > # Start with a very small data set: aml has 23 subjects > # > atest1 <- concordance(Surv(time, status) ~ x, aml, ranks=TRUE) > atest2 <- concordance(Surv(time, status) ~ x, aml, ranks=TRUE, timewt='S') > atest3 <- concordance(Surv(time, status) ~ x, aml, ranks=TRUE, timewt='S/G') > atest4 <- concordance(Surv(time, status) ~ x, aml, ranks=TRUE, timewt='n/G2') > # The ranks data frame agrees for all but weights > all.equal(atest1$ranks[, -3], atest2$ranks[, -3]) [1] TRUE > all.equal(atest1$ranks[, -3], atest3$ranks[, -3]) [1] TRUE > all.equal(atest1$ranks[, -3], atest4$ranks[, -3]) [1] TRUE > > wt1 <- cbind(atest1$ranks[,"timewt"], atest2$ranks[,"timewt"], + atest3$ranks[,"timewt"], atest4$ranks[,"timewt"]) > > # survfit0 adds time 0 to the curves > # to break ties between censor/death for G, we need to add an offset to > # the censoring times. Since time is integer, .1 works nicely > s1 <- survfit0(survfit(Surv(time, status) ~ 1, aml)) > g1 <- survfit0(survfit(Surv(time + .1*(1-status), 1-status) ~1, aml)) > > # The ingredients of the weights > indx <- match(atest1$ranks[,"time"], s1$time) > nrisk <- s1$n.risk[indx] > sminus <- s1$surv[indx-1] > gminus <- g1$surv[findInterval(atest1$ranks[,"time"], g1$time)] > n <- nrow(aml) > > wt2 <- cbind(nrisk, n*sminus, n*sminus/gminus, nrisk/gminus^2) > aeq(wt1, wt2) [1] TRUE > > # The sum of weighted ranks should equal (C-D) for a Cox model fit > tfun <- function(cfit, reverse=FALSE) { + t1 <- sum(cfit$ranks$timewt * cfit$ranks$rank) + t2 <- cfit$count[1] - cfit$count[2] + all.equal(unname(t1), unname(t2)) + } > tfun(atest1) [1] TRUE > tfun(atest2) [1] TRUE > tfun(atest3) [1] TRUE > tfun(atest4) [1] TRUE > > # The nafld data set has strong and early censoring (one of the only ones > # in the package that does.) So it is a good check of time weights. > # > nfit <- coxph(Surv(futime, status) ~ male + pspline(age), nafld1) > cn1 <- concordance(nfit, timewt='n', ranks=TRUE) > cn2 <- concordance(nfit, timewt='S', ranks=TRUE) > cn3 <- concordance(nfit, timewt='S/G', ranks=TRUE) > cn4 <- concordance(nfit, timewt='n/G2', ranks=TRUE) > > sfit <- survfit0(survfit(Surv(futime, status) ~ 1, nafld1)) > gfit <- survfit0(survfit(Surv(futime + .1*(status==0), 1-status) ~0, nafld1)) > > # The ingredients of the weights > dtime <- cn1$ranks[, "time"] > indx <- match(dtime, sfit$time) > nrisk <- sfit$n.risk[indx] > sminus <- sfit$surv[indx-1] > gminus <- gfit$surv[findInterval(dtime, gfit$time)] > n <- nrow(nafld1) > > wt1 <- cbind(cn1$ranks[, "timewt"], cn2$ranks[,"timewt"], + cn3$ranks[, "timewt"], cn4$ranks[,"timewt"]) > wt2 <- cbind(nrisk, n*sminus, n*sminus/gminus, nrisk/gminus^2) > aeq(wt1, wt2) [1] TRUE > > rd1 <- cn1$ranks > rd2 <- cn2$ranks > rd3 <- cn3$ranks > all.equal(rd1[c('time', 'rank', 'casewt')], rd2[c('time', 'rank', 'casewt')]) [1] TRUE > all.equal(rd1[c('time', 'rank', 'casewt')], rd3[c('time', 'rank', 'casewt')]) [1] TRUE > > tfun(cn1) [1] TRUE > tfun(cn2) [1] TRUE > tfun(cn3) [1] TRUE > tfun(cn4) [1] TRUE > > # Simple check of (time1, time2) data > # First a check on the fastkm2 (internal) routine > test1 <- survfit(Surv(tstart, tstop, status) ~1, cgd, id=id) > nr <- nrow(cgd) > y <- with(cgd, Surv(tstart,tstop, status)) > sort1 <- order(-cgd$tstart); sort2 <- order(-cgd$tstop, cgd$status) > if (!exists("Cfastkm2")) Cfastkm2 <- survival:::Cfastkm2 # for my test env > test2 <- .Call(Cfastkm2, y, rep(1.0, nr), order(-cgd$tstart)-1L, + order(-cgd$tstop, cgd$status) -1L) > ii <- which(test1$n.event>0) > all.equal(test1$time[ii], test2$etime) [1] TRUE > all.equal(test1$n.risk[ii], test2$nrisk) [1] TRUE > all.equal(c(1, test1$surv[ii[-length(ii)]]), test2$S) # test 2 is lagged [1] TRUE > > zero <- rep(0, nrow(nafld1)) > test3 <- survfit(Surv(futime, status) ~1, nafld1, id=id) > test4 <- with(nafld1, .Call(Cfastkm2, Surv(zero, futime, status), zero+1, + seq.int(nrow(nafld1)) -1L, + order(-futime, status) -1L)) > ii <- which(test3$n.event >0) > all.equal(test3$time[ii], test4$etime) [1] TRUE > all.equal(test3$n.risk[ii], test4$nrisk) [1] TRUE > all.equal(c(1, test3$surv[ii[-length(ii)]]), test4$S) # test 2 is lagged [1] TRUE > > # Now a check of concordance > nfitx <- coxph(Surv(zero, futime, status) ~ male + pspline(age), nafld1) > cn1x <- concordance(nfitx, timewt='n', ranks=TRUE) > cn2x <- concordance(nfitx, timewt='S', ranks=TRUE) > all.equal(cn1x$count, cn1$count) [1] TRUE > all.equal(cn2x$count, cn2$count) [1] TRUE > > proc.time() user system elapsed 0.710 0.023 0.731 survival/tests/residsf.Rout.save0000644000176200001440000002030414607325257016550 0ustar liggesusers R Under development (unstable) (2024-03-04 r86048) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: aarch64-unknown-linux-gnu R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(survival) > aeq <- function(x, y, ...) all.equal(as.vector(x), as.vector(y), ...) > # > # Tests of the residuals.survfit function > # > # The influence argument of survfit returns all the residuals at every time > # point, but for large data sets the result will be huge. This function uses > # a different algorithm which will be faster when the number of time > # points being reported out is small. > > # Start with small data sets and work up. First simple survival. > test1 <- data.frame(time= c(9, 3,1,1,6,6,8), + status=c(1,NA,1,0,1,1,0), + x= c(0, 2,1,1,1,0,0)) > indx <- order(test1$time[!is.na(test1$status)]) > > s1 <- survfit(Surv(time, status) ~1, test1, influence=3) > # true influence for survival and hazard, in time order > inf1 <- matrix(c(-20, rep(4,5), -10, 2, -13, -13, 17, 17, + rep(0,6))/144, ncol=3, + dimnames=list(1:6, c(1,6,9))) > inf2 <- matrix(c(10, rep(-2,5), 10, -2, 7,7, -11, -11)/72, + ncol=2) > aeq(s1$influence.surv[indx,], inf1[, c(1,2,2,3)]) [1] TRUE > aeq(s1$influence.chaz[indx,], inf2[,c(1,2,2,2)]) [1] TRUE > > r1 <- resid(s1, times=c(0, 3, 5, 8, 10)) > all(r1[,1] ==0) [1] TRUE > aeq(r1[indx,2:5], inf1[,c(1,1,2,3)]) [1] TRUE > > r2 <- resid(s1, times=c(0, 3, 5, 8, 10), type="cumhaz") > all(r2[,1] ==0) [1] TRUE > aeq(r2[indx,2:5], inf2[,c(1,1,2,2)]) [1] TRUE > > # AUC is a sum of rectangles, height= S, width based on time points, > # so the leverage is a weighted sum of dfbeta values for S > r3 <- resid(s1, times=c(1,4, 8, 10), type="sojourn") > inf3 <- inf1 %*% cbind(c(0,0,0), c(3,0,0), c(5,2,0), c(5,3,1)) > aeq(r3[indx,], inf3) [1] TRUE > > # exp(Nelson-Aalen) > s2 <- survfit(Surv(time, status) ~1, test1, stype=2, influence=3) > r4 <- resid(s2, times=c(0, 3, 5, 8, 10), type="pstate") > inf4 <- -inf2[, c(1,2,2)] %*% diag(s2$surv[c(1,2,4)]) > aeq(r4[indx,2:5], inf4[,c(1,1,2,3)]) [1] TRUE > aeq(s2$influence.surv[indx,], inf4[,c(1,2,2,3)]) [1] TRUE > > r5 <- resid(s2, times=c(1,4, 8, 10), type="sojourn") > inf5 <- inf4 %*% cbind(c(0,0,0), c(3,0,0), c(5,2,0), c(5,3,1)) > aeq(r5[indx,], inf5) [1] TRUE > > # Fleming-Harrington > # This one is hard, the code still fails > s3 <- survfit(Surv(time, status) ~1, test1, ctype=2, influence=2) > inf6 <- matrix(c( rep(c(5, -1), c(1, 5))/36, c(5,-1)/36, + c(21,21,-29, -29)/144), ncol=2) > # r6 <- resid(s3, times =c(0, 3, 5, 8, 10), type="cumhaz") > > # Part 2: single state, with start/stop data, multiple curves, > # second curve is identical to test1 > # Then put it out of order. > > test2 <- data.frame(t1 =c(1, 2, 5, 2, 1, 7, 3, 4, 8, 8, + 0,0,0,0,0,0), + t2 =c(2, 3, 6, 7, 8, 9, 9, 9,14, 17, + 9, 1, 1, 6, 6, 8), + event=c(1, 1, 1, 1, 1, 1, 1, 0, 0, 0, + 1, 1, 0, 1, 1, 0), + x = rep(1:2, c(10, 6)), + id = 1:16) > > s4 <- survfit(Surv(t1, t2, event) ~ x, test2, influence=TRUE) > r6 <- resid(s4, time=c(4, 8, 10), type="surv") > aeq(r6[1:10,], s4$influence.surv[[1]][,c(2, 5, 6)]) [1] TRUE > aeq(r6[11:16,],s4$influence.surv[[2]][,c(1,3, 4)]) [1] TRUE > aeq(r6[11:16,2:3], r1[,4:5]) [1] TRUE > > r7 <- resid(s4, time=c(4, 8, 10), type="cumhaz") > aeq(r7[1:10,], s4$influence.chaz[[1]][,c(2, 5, 6)]) [1] TRUE > aeq(r7[11:16,],s4$influence.chaz[[2]][,c(1,3, 4)]) [1] TRUE > aeq(r7[11:16, 2:3], r2[,4:5]) [1] TRUE > > # Compute the AUC at times 8 and 10, the first is a reporting time, the > # second is in between > r8 <- resid(s4, time= c(8, 10), type="auc") > aeq(r8[11:16,], r3[,3:4]) [1] TRUE > > # curve1: > inf1 <- s4$influence.surv[[1]] > d1 <- inf1[,1:4] %*% diff(s4$time[1:5]) > d2 <- inf1[,1:6] %*% diff(c(s4$time[1:6], 10)) > aeq(cbind(d1, d2), r8[1:10,]) [1] TRUE > > # curve2: > inf2 <- s4$influence.surv[[2]] > d3 <- inf2[,1:2] %*% diff(s4$time[9:11]) > d4 <- inf2[,1:4] %*% diff(c(s4$time[9:12], 10)) > aeq(cbind(d3, d4), r8[11:16,]) [1] TRUE > > # scramble the data > reord <- c(1,3,5,7,9,11,13, 15,2,4,6,8,10,12,14,16) > test2b <-test2[reord,] > s5 <- survfit(Surv(t1, t2, event) ~x, test2b, influence=TRUE) > r9 <- resid(s5, time=c(4, 8, 10), type="surv") > aeq(r6[reord,], r9) [1] TRUE > > # > # For multistate use the same data set as mstate.R, where results have been > # worked out by hand. Except, make it harder by adding an initial state. > # > tdata <- data.frame(id= LETTERS[3*c(1, 1, 1, 2, 3, 4, 4, 4, 5, 5)], + t1= c(0, 4, 9, 1, 2, 0, 2, 8, 1, 3), + t2= c(4, 9, 10, 5, 9, 2, 8, 9, 3, 11), + st= c(1, 2, 1, 2, 3, 1, 3, 0, 3, 0), + i0= c(1, 2, 3, 2, 1, 1, 2, 4, 3, 4), + wt= 1:10) > > tdata$st <- factor(tdata$st, c(0:3), + labels=c("censor", "a", "b", "c")) > tdata$i0 <- factor(tdata$i0, 1:4, + labels=c("entry", "a", "b", "c")) > if (FALSE) { + #useful picture + check <- survcheck(Surv(t1, t2, st) ~1, tdata, istate=i0, id=id) + plot(c(0,11), c(1,5.5), type='n', xlab="Time", ylab= "Subject") + tdata$idx <- as.numeric(factor(tdata$id)) + with(tdata, segments(t1+.1, idx, t2, idx, col=as.numeric(check$istate))) + with(subset(tdata, st!= "censor"), + text(t2, idx+.15, as.character(st))) + with(tdata, text((t1+t2)/2, idx+.25, wt)) + with(subset(tdata, !duplicated(id)), + text(t1, idx+.15, as.character(i0))) + #segments are colored by current state, case weight in center, events at ends + abline(v=c(2:5, 8:11), lty=3, col='gray') + } > > tfun <- function(data=tdata) { + reorder <- c(10, 9, 1, 2, 5, 4, 3, 7, 8, 6) + new <- data[reorder,] + new + } > mtest2 <- tfun(tdata) # scrambled version > > mfit1 <- survfit(Surv(t1, t2, st) ~ 1, tdata, id=id, istate=i0, + influence=1) > > test1 <- resid(mfit1, times= mfit1$time, collapse=TRUE) > aeq(test1, aperm(mfit1$influence, c(1,3,2))) [1] TRUE > aeq(sqrt(apply(test1^2, 2:3, sum)), t(mfit1$std.err)) [1] TRUE > > test1a <- resid(mfit1, times=c(3, 7, 9), method=1, collapse=TRUE) > minf <- aperm(mfit1$influence, c(1,3,2)) # influence has time second, resid third > aeq(test1a, minf[,,c(2,4,6)]) # interpolated times work [1] TRUE > > test2 <- resid(mfit1, times= mfit1$time, collapse=TRUE, type="cumhaz") > aeq(sqrt(apply(test2^2, 2:3, sum)), t(mfit1$std.chaz)) [1] TRUE > test3 <- resid(mfit1, times= mfit1$time, collapse=TRUE, type="auc") > aeq(sqrt(apply(test3^2, 2:3, sum)), t(mfit1$std.auc)) [1] TRUE > > # Do a couple AUC by hand > atime <- c(1, 5.6, 8.1, 15) > test4 <- resid(mfit1, times=atime, type="auc", collapse=TRUE) > all(test4[,,1] ==0) # before the first time [1] TRUE > # 5.6 covers rectangles of widths 1,1,1, and .6 after times 2, 3,4 and 5 > temp <- apply(test1, 1:2, function(x) sum(x*c(1,1,1, .6, 0,0,0,0))) > aeq(temp, test4[,,2]) [1] TRUE > temp <- apply(test1, 1:2, function(x) sum(x*c(1,1,1, 3, .1, 0, 0, 0))) > aeq(temp, test4[,,3]) [1] TRUE > temp <- apply(test1, 1:2, function(x) sum(x*c(1,1,1, 3, 1, 1, 1, 4))) > aeq(temp, test4[,,4]) [1] TRUE > > # > # competing risks > # > mdata <- mgus2 > mdata$etime <- with(mdata, ifelse(pstat==1, ptime, futime)) > temp <- with(mdata, ifelse(pstat==1, 1, 2*death)) > mdata$event <- factor(temp, 0:2, c("censor", "PCM", "Death")) > mfit <- survfit(Surv(etime, event) ~1, mdata, influence=1) > rr <- resid(mfit, time=360) > index <- sum(mfit$time <= 360) > aeq(mfit$influence.pstate[,index,], rr) [1] TRUE > > > proc.time() user system elapsed 0.443 0.031 0.472 survival/tests/frailty.Rout.save0000644000176200001440000000317614607006645016570 0ustar liggesusers R Under development (unstable) (2019-05-15 r76504) -- "Unsuffered Consequences" Copyright (C) 2019 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(survival) > # > # The constuction of a survival curve with sparse frailties > # > # In this case the coefficient vector is kept in two parts, the > # fixed coefs and the (often very large) random effects coefficients > # The survfit function treats the second set of coefficients as fixed > # values, to avoid an unmanagable variance matrix, and behaves like > # the second fit below. > > fit1 <- coxph(Surv(time, status) ~ age + frailty(inst), lung) > sfit1 <- survfit(fit1) > > # A parallel model with the frailties treated as fixed offsets > offvar <- fit1$frail[as.numeric(factor(lung$inst))] > fit2 <- coxph(Surv(time, status) ~ age + offset(offvar),lung) > fit2$var <- fit1$var #force variances to match > > all.equal(fit1$coef, fit2$coef) [1] TRUE > sfit2 <- survfit(fit2, newdata=list(age=fit1$means, offvar=0)) > all.equal(sfit1$surv, sfit2$surv, tol=1e-7) [1] TRUE > all.equal(sfit1$var, sfit2$var) [1] TRUE > > proc.time() user system elapsed 0.768 0.040 0.807 survival/tests/stratatest.R0000644000176200001440000000314114607006645015617 0ustar liggesusersoptions(na.action=na.exclude) # preserve missings options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type library(survival) # # Trivial test of stratified residuals # Make a second strata = replicate of the first, and I should get the # exact same answers test1 <- data.frame(time= c(9, 3,1,1,6,6,8), status=c(1,NA,1,0,1,1,0), x= c(0, 2,1,1,1,0,0)) test2 <- data.frame(start=c(1, 2, 5, 2, 1, 7, 3, 4, 8, 8), stop =c(2, 3, 6, 7, 8, 9, 9, 9,14,17), event=c(1, 1, 1, 1, 1, 1, 1, 0, 0, 0), x =c(1, 0, 0, 1, 0, 1, 1, 1, 0, 0) ) temp <- as.matrix(test1) n <- nrow(temp) ndead<- sum(test1$status[!is.na(test1$status)]) temp <- data.frame(rbind(temp, temp)) #later releases of S have rbind.data.frame tstrat <- rep(1:2, c(n,n)) fit1 <- coxph(Surv(time, status) ~x, test1) fit2 <- coxph(Surv(time, status) ~x + strata(tstrat), temp) all.equal(resid(fit1) , (resid(fit2))[1:n]) all.equal(resid(fit1, type='score') , (resid(fit2, type='score'))[1:n]) all.equal(resid(fit1, type='schoe') , (resid(fit2, type='schoe'))[1:ndead]) #AG model temp <- as.matrix(test2) n <- nrow(temp) ndead<- sum(test2$event[!is.na(test2$event)]) temp <- data.frame(rbind(temp, temp)) tstrat <- rep(1:2, c(n,n)) fit1 <- coxph(Surv(start, stop, event) ~x, test2) fit2 <- coxph(Surv(start, stop, event) ~x + strata(tstrat), temp) all.equal(resid(fit1) , (resid(fit2))[1:n]) all.equal(resid(fit1, type='score') , (resid(fit2, type='score'))[1:n]) all.equal(resid(fit1, type='schoe') , (resid(fit2, type='schoe'))[1:ndead]) survival/tests/book3.R0000644000176200001440000001203114613770353014435 0ustar liggesuserslibrary(survival) options(na.action=na.exclude) # preserve missings options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type # # Tests from the appendix of Therneau and Grambsch # c. Data set 2 and Breslow estimate # test2 <- data.frame(start=c(1, 2, 5, 2, 1, 7, 3, 4, 8, 8), stop =c(2, 3, 6, 7, 8, 9, 9, 9,14,17), event=c(1, 1, 1, 1, 1, 1, 1, 0, 0, 0), x =c(1, 0, 0, 1, 0, 1, 1, 1, 0, 0)) byhand <- function(beta, newx=0) { r <- exp(beta) loglik <- 4*beta - log(r+1) - log(r+2) - 3*log(3*r+2) - 2*log(3*r+1) u <- 1/(r+1) + 1/(3*r+1) + 4/(3*r+2) - ( r/(r+2) +3*r/(3*r+2) + 3*r/(3*r+1)) imat <- r/(r+1)^2 + 2*r/(r+2)^2 + 6*r/(3*r+2)^2 + 3*r/(3*r+1)^2 + 3*r/(3*r+1)^2 + 12*r/(3*r+2)^2 hazard <-c( 1/(r+1), 1/(r+2), 1/(3*r+2), 1/(3*r+1), 1/(3*r+1), 2/(3*r+2) ) xbar <- c(r/(r+1), r/(r+2), 3*r/(3*r+2), 3*r/(3*r+1), 3*r/(3*r+1), 3*r/(3*r+2)) # The matrix of weights, one row per obs, one col per time # deaths at 2,3,6,7,8,9 wtmat <- matrix(c(1,0,0,0,1,0,0,0,0,0, 0,1,0,1,1,0,0,0,0,0, 0,0,1,1,1,0,1,1,0,0, 0,0,0,1,1,0,1,1,0,0, 0,0,0,0,1,1,1,1,0,0, 0,0,0,0,0,1,1,1,1,1), ncol=6) wtmat <- diag(c(r,1,1,r,1,r,r,r,1,1)) %*% wtmat x <- c(1,0,0,1,0,1,1,1,0,0) status <- c(1,1,1,1,1,1,1,0,0,0) xbar <- colSums(wtmat*x)/ colSums(wtmat) n <- length(x) # Table of sums for score and Schoenfeld resids hazmat <- wtmat %*% diag(hazard) #each subject's hazard over time dM <- -hazmat #Expected part for (i in 1:6) dM[i,i] <- dM[i,i] +1 #observed dM[7,6] <- dM[7,6] +1 # observed mart <- rowSums(dM) # Table of sums for score and Schoenfeld resids # Looks like the last table of appendix E.2.1 of the book resid <- dM * outer(x, xbar, '-') score <- rowSums(resid) scho <- colSums(resid) # We need to split the two tied times up, to match coxph scho <- c(scho[1:5], scho[6]/2, scho[6]/2) var.g <- cumsum(hazard*hazard /c(1,1,1,1,1,2)) var.d <- cumsum( (xbar-newx)*hazard) surv <- exp(-cumsum(hazard) * exp(beta*newx)) varhaz <- (var.g + var.d^2/imat)* exp(2*beta*newx) list(loglik=loglik, u=u, imat=imat, xbar=xbar, haz=hazard, mart=mart, score=score, rmat=resid, scho=scho, surv=surv, var=varhaz) } aeq <- function(x,y) all.equal(as.vector(x), as.vector(y)) fit0 <-coxph(Surv(start, stop, event) ~x, test2, iter=0, method='breslow') truth0 <- byhand(0,0) aeq(truth0$loglik, fit0$loglik[1]) aeq(1/truth0$imat, fit0$var) aeq(truth0$mart, fit0$residuals) aeq(truth0$scho, resid(fit0, 'schoen')) aeq(truth0$score, resid(fit0, 'score')) sfit <- survfit(fit0, list(x=0), censor=FALSE) aeq(sfit$std.err^2, truth0$var) aeq(sfit$surv, truth0$surv) aeq(fit0$score, truth0$u^2/truth0$imat) beta1 <- truth0$u/truth0$imat fit1 <- coxph(Surv(start, stop, event) ~x, test2, iter=1, ties="breslow") aeq(beta1, coef(fit1)) truth <- byhand(-0.084526081, 0) fit <- coxph(Surv(start, stop, event) ~x, test2, eps=1e-8, method='breslow', nocenter= NULL) aeq(truth$loglik, fit$loglik[2]) aeq(1/truth$imat, fit$var) aeq(truth$mart, fit$residuals) aeq(truth$scho, resid(fit, 'schoen')) aeq(truth$score, resid(fit, 'score')) expect <- predict(fit, type='expected', newdata=test2) #force recalc aeq(test2$event -fit$residuals, expect) #tests the predict function sfit <- survfit(fit, list(x=0), censor=FALSE) aeq(sfit$std.err^2, truth$var) aeq(-log(sfit$surv), (cumsum(truth$haz))) # Reprise the test, with strata # offseting the times ensures that we will get the wrong risk sets # if strata were not kept separate test2b <- rbind(test2, test2, test2) test2b$group <- rep(1:3, each= nrow(test2)) test2b$start <- test2b$start + test2b$group test2b$stop <- test2b$stop + test2b$group fit0 <- coxph(Surv(start, stop, event) ~ x + strata(group), test2b, iter=0, method="breslow") aeq(3*truth0$loglik, fit0$loglik[1]) aeq(3*truth0$imat, 1/fit0$var) aeq(rep(truth0$mart,3), fit0$residuals) aeq(rep(truth0$scho,3), resid(fit0, 'schoen')) aeq(rep(truth0$score,3), resid(fit0, 'score')) fit1 <- coxph(Surv(start, stop, event) ~ x + strata(group), test2b, iter=1, method="breslow") aeq(fit1$coefficients, beta1) fit3 <- coxph(Surv(start, stop, event) ~x + strata(group), test2b, eps=1e-8, method='breslow') aeq(3*truth$loglik, fit3$loglik[2]) aeq(3*truth$imat, 1/fit3$var) aeq(rep(truth$mart,3), fit3$residuals) aeq(rep(truth$scho,3), resid(fit3, 'schoen')) aeq(rep(truth$score,3), resid(fit3, 'score')) # # Done with the formal test, now print out lots of bits # resid(fit) resid(fit, 'scor') resid(fit, 'scho') predict(fit, type='lp') predict(fit, type='risk') predict(fit, type='expected') predict(fit, type='terms') predict(fit, type='lp', se.fit=T) predict(fit, type='risk', se.fit=T) predict(fit, type='expected', se.fit=T) predict(fit, type='terms', se.fit=T) summary(survfit(fit)) summary(survfit(fit, list(x=2))) survival/tests/survtest.Rout.save0000644000176200001440000001224314607006645017010 0ustar liggesusers R Under development (unstable) (2022-01-17 r81511) -- "Unsuffered Consequences" Copyright (C) 2022 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > options(na.action=na.exclude) # preserve missings > options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type > library(survival) > > # > # Simple test of (start, stop] Kaplan-Meier curves, using the test2 data > # set > # > test1 <- data.frame(time= c(9, 3,1,1,6,6,8), + status=c(1,NA,1,0,1,1,0), + x= c(0, 2,1,1,1,0,0)) > test2 <- data.frame(start=c(1, 2, 5, 2, 1, 7, 3, 4, 8, 8), + stop =c(2, 3, 6, 7, 8, 9, 9, 9,14,17), + event=c(1, 1, 1, 1, 1, 1, 1, 0, 0, 0), + x =c(1, 0, 0, 1, 0, 1, 1, 1, 0, 0), + wt = 1:10) > aeq <- function(x,y, ...) all.equal(as.vector(x), as.vector(y), ...) > test2 <- test2[c(1,6,2,7,3,8,4,9,5,10),] # unsorted data is a harder test > > fit1 <- survfit(Surv(start, stop, event) ~1, test2, type='fh2', + error='tsiatis') > fit2 <- survfit(Surv(start, stop, event) ~x, test2, start.time=3, + type='fh2') > > cfit1<- survfit(coxph(Surv(start, stop, event)~1, test2)) > cfit2<- survfit(coxph(Surv(start, stop, event) ~ strata(x), test2, subset=-1)) > > deaths <- (fit1$n.event + fit1$n.censor)>0 > aeq(fit1$time[deaths], cfit1$time) [1] TRUE > aeq(fit1$n.risk[deaths], cfit1$n.risk) [1] TRUE > aeq(fit1$n.event[deaths], cfit1$n.event) [1] TRUE > aeq(fit1$surv[deaths], cfit1$surv) [1] TRUE > aeq(fit1$std.err[deaths], cfit1$std.err) [1] TRUE > > deaths <- (fit2$n.event + fit2$n.censor)>0 > aeq(fit2$time[deaths], cfit2$time) [1] TRUE > aeq(fit2$n.risk[deaths], cfit2$n.risk) [1] TRUE > aeq(fit2$n.event[deaths], cfit2$n.event) [1] TRUE > aeq(fit2$surv[deaths], cfit2$surv) [1] TRUE > > fit3 <- survfit(Surv(start, stop, event) ~1, test2) #Kaplan-Meier > aeq(fit3$n, 10) [1] TRUE > aeq(fit3$time, sort(unique(test2$stop))) [1] TRUE > aeq(fit3$n.risk, c(2,3,5,4,4,5,2,1)) [1] TRUE > aeq(fit3$n.event,c(1,1,1,1,1,2,0,0)) [1] TRUE > aeq(fit3$surv[fit3$n.event>0], c(.5, 1/3, 4/15, 1/5, 3/20, 9/100)) [1] TRUE > temp <- with(fit3, n.event/(n.risk * (n.risk - n.event))) > aeq(fit3$std.err, sqrt(cumsum(temp))) [1] TRUE > > # > # Verify that both surv AND n.risk are right between time points. > # > fit <- survfit(Surv(time, status) ~1, test1) > temp <- summary(fit, time=c(.5,1, 1.5, 6, 7.5, 8, 8.9, 9, 10), extend=TRUE) > > aeq(temp$n.risk, c(6,6,4,4,2,2,1,1,0)) [1] TRUE > aeq(temp$surv, c(1, fit$surv[c(1,1,2,2,3,3,4,4)])) [1] TRUE > aeq(temp$n.event, c(0,1,0,2,0,0,0,1,0)) [1] TRUE > aeq(temp$std.err, c(0, (fit$surv*fit$std.err)[c(1,1,2,2,3,3,4,4)])) [1] TRUE > > > fit <- survfit(Surv(start, stop, event) ~1, test2) > temp <- summary(fit, times=c(.5, 1.5, 2.5, 3, 6.5, 14.5, 16.5)) > aeq(temp$surv, c(1, 1, fit$surv[c(1,2,3,6,6)])) [1] TRUE > > # This next fails. With start-stop data the number at risk at intermediate > # endpoints is not known precisely, since the underlying routine does not report > # time points at which only an addition occured. > if (FALSE) aeq(temp$n.risk, c(0, 2, 3, 3, 4, 1,1)) > > # compute conditional survival > fit1 <- survfit(Surv(start, stop, event)~1, test2, weights=wt) > fit2 <- survfit(Surv(start, stop, event)~1, test2, weights=wt, start.time=5) > > aeq(fit1$surv[2], summary(fit1, time=5)$surv) # verify my subscript [1] TRUE > aeq(fit2$surv, fit1$surv[3:8]/fit1$surv[2]) [1] TRUE > aeq(fit2$std.err^2, fit1$std.err[3:8]^2 - fit1$std.err[2]^2) [1] TRUE > aeq(fit2$cumhaz, fit1$cumhaz[3:8] - fit1$cumhaz[2]) [1] TRUE > aeq(fit2$std.chaz^2, fit1$std.chaz[3:8]^2 - fit1$std.chaz[2]^2) [1] TRUE > > # Now with a Cox model > cfit <- coxph(Surv(start, stop, event)~1, test2, weights=wt) > fit1 <- survfit(cfit) > fit2 <- survfit(cfit, start.time=5) > aeq(fit2$surv, fit1$surv[3:8]/fit1$surv[2]) [1] TRUE > aeq(fit2$std.err^2, fit1$std.err[3:8]^2 - fit1$std.err[2]^2) [1] TRUE > aeq(fit2$cumhaz, fit1$cumhaz[3:8] - fit1$cumhaz[2]) [1] TRUE > aeq(fit2$std.chaz^2, fit1$std.chaz[3:8]^2 - fit1$std.chaz[2]^2) [1] TRUE > > > # bigger data set, with covariates and some tied event times > mfit <- coxph(Surv(age, age+futime/12, death) ~ sex + mspike, mgus2) > dummy <- data.frame(sex='F', mspike=1.3) > > msurv1 <- survfit(mfit, newdata=dummy) > msurv2 <- survfit(mfit, newdata=dummy, start.time=80) > j <- max(which(msurv1$time < 80)) > k <- seq(j+1, length(msurv1$time)) > aeq(msurv2$surv, msurv1$surv[k] / msurv1$surv[j]) [1] TRUE > aeq(msurv2$cumhaz, msurv1$cumhaz[k] - msurv1$cumhaz[j]) [1] TRUE > # standard errors now have a term due to vmat(mfit), so don't factor > # into a simple sum > > proc.time() user system elapsed 0.942 0.048 0.982 survival/tests/coxsurv2.R0000644000176200001440000000372614613770353015226 0ustar liggesuserslibrary(survival) # # Check that the survival curves from a Cox model with beta=0 # match ordinary survival # # Aalen surv1 <- survfit(Surv(time,status) ~ sex, data=lung, stype=2) fit1 <- coxph(Surv(time, status) ~ age + strata(sex), data=lung, iter=0, ties='breslow') fit1$var <- 0*fit1$var #sneaky, causes the extra term in the Cox variance # calculation to be zero surv2 <- survfit(fit1, stype=2) surv3 <- survfit(fit1) arglist <- c('n', 'time', 'n.risk','n.event', 'n.censor', 'surv', 'strata', 'std.err', 'upper', 'lower') all.equal(unclass(surv1)[arglist], unclass(surv2)[arglist]) all.equal(unclass(surv1)[arglist], unclass(surv3)[arglist]) # Efron method surv1 <- survfit(Surv(time,status) ~ sex, data=lung, stype=2, ctype=2) surv2 <- survfit(fit1, ctype=2) all.equal(unclass(surv1)[arglist], unclass(surv2)[arglist]) # Kaplan-Meier surv1 <- survfit(Surv(time,status) ~ sex, data=lung) surv2 <- survfit(fit1, stype=1) all.equal(unclass(surv1)[arglist], unclass(surv2)[arglist]) # Now add some random weights rwt <- runif(nrow(lung), .5, 3) surv1 <- survfit(Surv(time,status) ~ sex, data=lung, stype=2, weights=rwt, robust=FALSE) fit1 <- coxph(Surv(time, status) ~ age + strata(sex), data=lung, iter=0, ties='breslow', weights=rwt, robust=FALSE) fit1$var <- 0*fit1$var #sneaky surv2 <- survfit(fit1, stype=2, ctype=1) surv3 <- survfit(fit1) all.equal(unclass(surv1)[arglist], unclass(surv2)[arglist]) all.equal(unclass(surv1)[arglist], unclass(surv3)[arglist]) # Efron method surv1 <- survfit(Surv(time,status) ~ sex, data=lung, stype=2, ctype=2, weights=rwt, robust=FALSE) surv2 <- survfit(fit1, ctype=2, robust=FALSE) all.equal(unclass(surv1)[arglist], unclass(surv2)[arglist]) # Kaplan-Meier surv1 <- survfit(Surv(time,status) ~ sex, data=lung, weights=rwt, robust=FALSE) surv2 <- survfit(fit1, stype=1, robust=FALSE) all.equal(unclass(surv1)[arglist], unclass(surv2)[arglist]) survival/tests/zph.Rout.save0000644000176200001440000002012114654222147015703 0ustar liggesusers R Under development (unstable) (2024-06-14 r86747) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: aarch64-unknown-linux-gnu R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(survival) > aeq <- function(x,y) all.equal(as.vector(x), as.vector(y)) > > test1 <- data.frame(time= c(9, 3,1,1,6,6,8), + status=c(1,NA,1,0,1,1,0), + x= c(0, 2,1,1,1,0,0)) > > # Verify that cox.zph computes a score test > # First for the Breslow estimate > r <- (3 + sqrt(33))/2 # actual MLE for log(beta) > U <- c(1/(r+1), 3/(r+3), -r/(r+3), 0) # score statistic > imat <- c(r/(r+1)^2, 3*r/(r+3)^2, 3*r/(r+3)^2, 0) # information matrix > g = c(1, 6, 6, 9) # death times > > u2 <- c(sum(U), sum(g*U)) # first derivative > i2 <- matrix(c(sum(imat), sum(g*imat), sum(g*imat), sum(g^2*imat)), + 2,2) # second derivative > sctest <- solve(i2, u2) %*% u2 > > # Verify that centering makes no difference for the test (though i2 changes) > g2 <- g - mean(g) > u2b <- c(sum(U), sum(g2*U)) > i2b <- matrix(c(sum(imat), sum(g2*imat), sum(g2*imat), sum(g2^2*imat)), + 2,2) > sctest2 <- solve(i2b, u2b) %*% u2b > all.equal(sctest, sctest2) [1] TRUE > > # Now check the program > fit1 <- coxph(Surv(time, status) ~ x, test1, ties='breslow') > aeq(fit1$coef, log(r)) [1] TRUE > zp1 <- cox.zph(fit1, transform='identity', global=FALSE) > aeq(zp1$table[,1], sctest) [1] TRUE > aeq(zp1$y, resid(fit1, type="scaledsch")) [1] TRUE > > dummy <- rep(0, nrow(test1)) > fit1b <- coxph(Surv(dummy, time, status) ~ x, test1, ties='breslow') > aeq(fit1b$coef, log(r)) [1] TRUE > zp1b <- cox.zph(fit1b, transform='identity', global=FALSE) > aeq(zp1b$table[,1], sctest) [1] TRUE > # the pair of tied times gets reversed in the zph result > # but since the 'y' values are only used to plot it doesn't matter > aeq(zp1b$y[c(1,3,2,4)], resid(fit1b, type="scaledsch")) [1] TRUE > > # log time check > g3 <- log(g) - mean(log(g)) > u3 <- c(sum(U), sum(g3*U)) # first derivative > i3 <- matrix(c(sum(imat), sum(g3*imat), sum(g3*imat), sum(g3^2*imat)), + 2,2) # second derivative > sctest3 <- solve(i3, u3) %*% u3 > zp3 <- cox.zph(fit1, transform='log', global=FALSE) > aeq(zp3$table[,1], sctest3) [1] TRUE > > # Efron approximation > phi <- acos((45/23)*sqrt(3/23)) > r <- 2*sqrt(23/3)* cos(phi/3) # actual MLE for log(beta) > U <- c(1/(r+1), 3/(r+3), -r/(r+5), 0) # score statistic > imat <- c(r/(r+1)^2, 3*r/(r+3)^2, 5*r/(r+5)^2, 0) # information matrix > > u4 <- c(sum(U), sum(g3*U)) # first derivative > i4 <- matrix(c(sum(imat), sum(g3*imat), sum(g3*imat), sum(g3^2*imat)), + 2,2) # second derivative > sctest4 <- solve(i4, u4) %*% u4 > > fit4 <- coxph(Surv(time, status) ~ x, test1, ties='efron') > aeq(fit4$coef, log(r)) [1] TRUE > zp4 <- cox.zph(fit4, transform='log', global=FALSE) > aeq(zp4$table[,1], sctest4) [1] TRUE > aeq(zp4$y, resid(fit4, type="scaledsch")) [1] TRUE > > fit5 <- coxph(Surv(dummy, time, status) ~ x, test1, ties="efron") > aeq(fit5$coef, log(r)) [1] TRUE > zp5 <- cox.zph(fit5, transform="log", global=FALSE) > aeq(zp5$table[,1], sctest4) [1] TRUE > > # Artificial stratification > test2 <- rbind(test1, test1) > test2$group <- rep(letters[1:2], each=nrow(test1)) > # U, imat, and sctest will all double > dummy <- c(dummy, dummy) > fit6 <- coxph(Surv(dummy, time, status) ~ x + strata(group), test2) > aeq(fit6$coef, log(r)) [1] TRUE > zp6 <- cox.zph(fit6, transform="log", globa=FALSE) > aeq(zp6$table[,1], 2*sctest4) [1] TRUE > > # A multi-state check, 2 covariates > # Verify that the multi-state result = the single state Cox models > etime <- with(mgus2, ifelse(pstat==0, futime, ptime)) > event <- with(mgus2, ifelse(pstat==0, 2*death, 1)) > event <- factor(event, 0:2, labels=c("censor", "pcm", "death")) > table(event) event censor pcm death 409 115 860 > > ct1 <- coxph(Surv(etime, event) ~ sex + age, mgus2, id=id, ties='efron') > ct2 <- coxph(Surv(etime, event=='pcm') ~ sex + age, mgus2) > ct3 <- coxph(Surv(etime, event=='death') ~ sex + age, mgus2) > > zp1 <- cox.zph(ct1, transform='identity') > zp2 <- cox.zph(ct2, transform='identity') > zp3 <- cox.zph(ct3, transform='identity') > aeq(zp1$table[1:2,], zp2$table[1:2,]) [1] TRUE > aeq(zp1$table[3:4,], zp3$table[1:2,]) [1] TRUE > > # Now add a starting time of zero > dummy <- rep(0, nrow(mgus2)) > ct4 <- coxph(Surv(dummy, etime, event) ~ sex + age, mgus2, id=id, ties='efron') > ct5 <- coxph(Surv(dummy, etime, event=='pcm') ~ sex + age, mgus2) > ct6 <- coxph(Surv(dummy, etime, event=='death') ~ sex + age, mgus2) > zp4 <- cox.zph(ct4, transform='identity') > zp5 <- cox.zph(ct5, transform='identity') > zp6 <- cox.zph(ct6, transform='identity') > aeq(zp4$table[1:2,], zp5$table[1:2,]) [1] TRUE > aeq(zp4$table[3:4,], zp6$table[1:2,]) [1] TRUE > > > # Direct check of a multivariate model with start, stop data > p1 <- pbcseq[!duplicated(pbcseq$id), 1:6] > pdata <- tmerge(p1[, c("id", "trt", "age", "sex")], p1, id=id, + death = event(futime, status==2)) > pdata <- tmerge(pdata, pbcseq, id=id, bili=tdc(day, bili), + edema = tdc(day, edema), albumin=tdc(day, albumin), + protime = tdc(day, protime)) > pfit <- coxph(Surv(tstart, tstop, death) ~ log(bili) + albumin + edema + + age + log(protime), data = pdata, ties='efron') > zp7 <- cox.zph(pfit, transform='log', global=FALSE) > > direct <- function(fit) { + nvar <- length(fit$coef) + dt <- coxph.detail(fit) + gtime <- log(dt$time) - mean(log(dt$time)) + # key idea: at any event time I have a first deriviative vector + # c(dt$score[i,], gtime[i]* dt$score[i,]) + # and second derivative matrix + # dt$imat[,,i] gtime[i] * dt$imat[,,i] + # gtime[i]*dt$imat[,,i] gtime[i]^2 * dt$imat[,,i] + # for the expanded model, where imat[,,i] is symmetric, + # and colSums(dt$score) =0 (since the model converged) + # + # Create score tests for adding one time-dependent variable + # gtime * x[,j] at a time: first derivative of this test is + # c(dt$score[i,], gtime[i]* dt$score[i,j]) + # and etc. + unew <- colSums(gtime * dt$score) + temp1 <- apply(dt$imat, 1:2, sum) + temp2 <- apply(dt$imat, 1:2, function(x) sum(x*gtime)) + temp3 <- apply(dt$imat, 1:2, function(x) sum(x * gtime^2)) + + score <- double(nvar) + smat <- matrix(0., nvar+1, nvar+1) # second deriv matrix for the test + smat[1:nvar, 1:nvar] <- temp1 + for (i in 1:nvar) { + smat[nvar+1,] <- c(temp2[i,], temp3[i,i]) + smat[,nvar+1] <- c(temp2[,i], temp3[i,i]) + utemp <- c(rep(0,nvar), unew[i]) + score[i] <- solve(smat, utemp) %*% utemp + } + list(sctest = score, u= c(colSums(dt$score), unew), + imat=cbind(rbind(temp1, temp2), rbind(temp2, temp3))) + } > > aeq(zp7$table[,1], direct(pfit)$sctest) [1] TRUE > > # Last, make sure that NA coefficients are ignored > d1 <- survSplit(Surv(time, status) ~ ., veteran, cut=150, episode="epoch") > fit <- coxph(Surv(tstart, time, status) ~ celltype:strata(epoch) + age, d1) > zz <- cox.zph(fit) > > fit2 <- coxph(Surv(tstart, time, status) ~ celltype:strata(epoch) + age, d1, + x=TRUE) > zz2 <- cox.zph(fit2) > > x2 <- fit2$x[, !is.na(fit$coefficients)][,-1] > fit3 <- coxph(Surv(tstart, time, status) ~ age + x2, d1) > all.equal(fit3$loglik, fit2$loglik) [1] TRUE > zz3 <- cox.zph(fit3) > > all.equal(unclass(zz)[1:7], unclass(zz2)[1:7]) #ignore the call component [1] TRUE > all.equal(as.vector(zz$table), as.vector(zz3$table)) # variable names change [1] TRUE > > proc.time() user system elapsed 0.515 0.028 0.540 survival/tests/survfit1.R0000644000176200001440000003524514612274303015210 0ustar liggesusers# # Check out the survfit routine on the simple AML data set. # The leverage validation makes use of the fact that when all # weights are 1 and there is 1 obs per subject, the IJ variance is # equal to the Greenwood. # There are 8 choices in the C code: Nelson-Aalen or Fleming-Harrington # estimate of cumulative hazard, KM or exp(cumhaz) estimate of survival, # regular or robust variance. This tries to exercise them all. library(survival) aeq <- function(x, y, ...) all.equal(as.vector(x), as.vector(y), ...) set.seed(1953) # used only to reorder the data adata <- aml adata$id <- sample(LETTERS, nrow(aml)) # labels are not in time or data order adata <- adata[sample(1:nrow(aml), nrow(aml)),] # data is unordered adata$wt <- sample((2:30)/10, nrow(aml)) # non-integer weights group <- rep("", nrow(adata)) temp <- table(adata$x) group[adata$x == "Maintained"] <- rep(letters[4:1], length=temp[1]) group[adata$x != "Maintained"] <- rep(letters[4:7], length=temp[2]) adata$group <- group adata2 <- survSplit(Surv(time, status) ~ ., adata, cut=c(10, 20, 40)) byhand <- function(time, status, weights, id) { # for a single curve utime <- sort(unique(time)) ntime <- length(utime) n <- length(time) if (missing(weights)) weights <- rep(1.0, n) if (missing(id)) id <- seq_along(time) uid <- unique(id) nid <- length(uid) id <- match(id, uid) # change it to 1:nid n.risk <- n.event <- surv <- cumhaz <- double(ntime) KM <- 1; nelson <-0; kvar <- 0; hvar<-0; U <- matrix(0, nid, 2) # the two robust influence estimates V <- matrix(0, ntime, 4) # variances usave <- array(0., dim=c(nid, 2, ntime)) estimate <- matrix(0, ntime, 2) for (i in 1:ntime) { atrisk <- (time >= utime[i]) n.risk[i] <- sum(weights[atrisk]) deaths <- (time==utime[i] & status==1) n.event[i] <- sum(weights[deaths]) haz <- n.event[i]/n.risk[i] dhaz <- (ifelse(deaths,1,0) - ifelse(atrisk, haz, 0))/n.risk[i] U[,1] <- U[,1]*(1-haz) - KM*tapply(dhaz*weights, id, sum) V[i,1] <- sum(U[,1]^2) U[,2] <- U[,2] + tapply(dhaz* weights, id, sum) #result in 'id' order V[i,2] <- sum(U[,2]^2) usave[,,i] <- U if (n.event[i] >0 ) { KM <- KM*(1-haz) nelson <- nelson + haz kvar <- kvar + n.event[i]/(n.risk[i] * (n.risk[i] - n.event[i])) hvar <- hvar + n.event[i]/(n.risk[i]^2) } V[i,3] <- kvar # var of log(S) V[i,4] <- hvar estimate[i,] <- c(KM, nelson) } dimnames(usave) <- list(uid, c("KM", "chaz"), utime) dimnames(V) <- list(time=utime, c("KM", "chaz", "Greenwood", "Aalen")) list(time=utime, n.risk=n.risk, n.event=n.event, estimate=estimate, std = sqrt(V), influence=usave) } # the byhand function can only handle one group at a time true1a <- with(subset(adata, x=="Maintained"), byhand(time, status, id=id)) true1b <- with(subset(adata, x!="Maintained"), byhand(time, status, id=id)) # The Greenwood and IJ estimates agree, except for a last point with # variance of zero. These next few lines verify the byhand() function aeq(true1a$std[,1], true1a$estimate[,1]*true1a$std[,3]) aeq(true1b$std[1:9,1], true1b$estimate[1:9,1]*true1b$std[1:9,3]) aeq(true1b$std[10,1], 0) # variance of zero for jackknife !is.finite(true1b$std[10,3]) # Inf for Greenwood temp <- with(subset(adata, x=="Maintained"), byhand(time, status, id=id, weights=rep(3,11))) aeq(temp$std[,1:2], true1a$std[,1:2]) # IJ estimates should be invariant # fit1 uses the standard formulas: NA hazard, KM survival fit1 <- survfit(Surv(time, status) ~ x, data=adata) aeq(fit1$surv, c(true1a$estimate[,1], true1b$estimate[,1])) aeq(fit1$cumhaz, c(true1a$estimate[,2], true1b$estimate[,2])) aeq(fit1$std.err, c(true1a$std[,3], true1b$std[,3])) aeq(fit1$std.chaz, c(true1a$std[,4], true1b$std[,4])) aeq(fit1$n.risk, c(true1a$n.risk, true1b$n.risk)) aeq(fit1$n.event, c(true1a$n.event, true1b$n.event)) fit1$logse # logse should be TRUE fit1b <- survfit(Surv(tstart, time, status) ~x, data=adata2, id=id) eqsurv <- function(x, y) { temp <- c("n.risk", "n.event", "n.censor", "surv", "std.err", "cumhaz", "std.chaz", "strata", "logse") if (!is.null(x$influence.surv)) temp <- c(temp, "influence.surv") if (!is.null(x$influence.chaz)) temp <- c(temp, "influence.chaz") # need unclass to avoid [.survfit all.equal(unclass(x)[temp], unclass(y)[temp]) } eqsurv(fit1, fit1b) fit1c <- survfit(Surv(tstart, time, status) ~x, data=adata2, id=id, entry=TRUE) aeq(fit1c$time[fit1c$time >0], fit1$time) aeq(fit1c$n.enter[fit1c$time==0], c(11, 12)) all(fit1c$n.enter[fit1c$time >0] ==0) # fit2 will use the IJ method fit2 <- survfit(Surv(time, status) ~ x, data=adata, id=id, influence=1) aeq(fit2$surv, c(true1a$estimate[,1], true1b$estimate[,1])) aeq(fit2$cumhaz, c(true1a$estimate[,2], true1b$estimate[,2])) aeq(fit2$std.err, c(true1a$std[,1], true1b$std[,1])) aeq(fit2$std.chaz, c(true1a$std[,2], true1b$std[,2])) aeq(fit2$n.risk, c(true1a$n.risk, true1b$n.risk)) aeq(fit2$n.event, c(true1a$n.event, true1b$n.event)) !fit2$logse # logse should be FALSE fit2b <- survfit(Surv(tstart, time, status) ~ x, data=adata2, id=id, influence=1) eqsurv(fit2, fit2b) fit2c <- survfit(Surv(tstart, time, status) ~ 1, data=adata2, id=id, subset=(x=="Maintained"), influence=1) aeq(fit2$influence.surv[[1]], fit2c$influence.surv) r2 <- resid(fit2c, times= fit2c$time, collapse=TRUE) aeq(r2, fit2c$influence.surv) fit2d <- survfit(Surv(time, factor(status)) ~ x, data=adata, id=id, influence=T) aeq(fit2d$influence[[1]][,,1], r2) r3 <- resid(fit2d, times= fit2c$time, collapse=TRUE) aeq(r3[adata$x =="Maintained",1,], r2) fit2e <- survfit(Surv(time, factor(status)) ~1, adata, id=id, influence=T, subset=(x=="Maintained")) aeq(fit2e$influence, fit2d$influence[[1]]) aeq(fit2e$influence[,,1], r2) # look at the leverage values fit3 <- survfit(Surv(time, status) ~ x, data=adata, id=id, influence=3) aeq(fit3$influence.surv[[1]], true1a$influence[,1,]) aeq(fit3$influence.surv[[2]], true1b$influence[,1,]) aeq(fit3$influence.chaz[[1]], true1a$influence[,2,]) aeq(fit3$influence.chaz[[2]], true1b$influence[,2,]) fit3b <- survfit(Surv(tstart, time, status) ~x, adata2, id=id, influence=3) eqsurv(fit3, fit3b) # compute the influence by brute force tdata <- subset(adata, x != "Maintained") eps <- 1e-8 imat1 <- imat2 <- matrix(0., 12, 10) t1 <- survfit(Surv(time, status) ~x, data=tdata) for (i in 1:12) { wtemp <- rep(1.0, 12) wtemp[i] <- 1 + eps tfit <-survfit(Surv(time, status) ~x, data=tdata, weights=wtemp) imat2[i,] <- (tfit$cumhaz - t1$cumhaz)/eps imat1[i,] <- (tfit$surv - t1$surv)/eps } aeq(imat1, true1b$influence[,1,], tol= sqrt(eps)) aeq(imat2, true1b$influence[,2,], tol= sqrt(eps)) # Repeat using the Nelson-Aalen hazard and exp(NA) for survival fit1 <- survfit(Surv(time, status) ~ x, adata, stype=2) aeq(fit1$surv, exp(-c(true1a$estimate[,2], true1b$estimate[,2]))) aeq(fit1$cumhaz, c(true1a$estimate[,2], true1b$estimate[,2])) aeq(fit1$std.err, c(true1a$std[,4], true1b$std[,4])) aeq(fit1$std.chaz, c(true1a$std[,4], true1b$std[,4])) aeq(fit1$n.risk, c(true1a$n.risk, true1b$n.risk)) fit1b <- survfit(Surv(tstart, time, status) ~x, adata2, stype=2, id=id) eqsurv(fit1, fit1b) # Nelson-Aalen + exp() surv, along with IJ variance fit2 <- survfit(Surv(time, status) ~ x, data=adata, id=id, stype=2, influence=3) aeq(fit2$surv, exp(-c(true1a$estimate[,2], true1b$estimate[,2]))) aeq(fit2$cumhaz, c(true1a$estimate[,2], true1b$estimate[,2])) aeq(fit2$std.err, c(true1a$std[,2], true1b$std[,2])) aeq(fit2$std.chaz, c(true1a$std[,2], true1b$std[,2])) aeq(fit2$n.risk, c(true1a$n.risk, true1b$n.risk)) aeq(fit2$influence.chaz[[1]], true1a$influence[,2,]) aeq(fit2$influence.chaz[[2]], true1b$influence[,2,]) aeq(fit2$influence.surv[[2]], -true1b$influence[,2,]%*% diag(fit2[2]$surv)) fit2b <- survfit(Surv(tstart, time, status) ~x, data=adata2, id=id, stype=2, influence=3) eqsurv(fit2, fit2b) # Cumulative hazard is the same for fit1 and fit2 all.equal(fit2$influence.chaz, fit2b$influence.chaz) # Weighted fits true2a <- with(subset(adata, x=="Maintained"), byhand(time, status, id=id, weights= wt)) true2b <- with(subset(adata, x!="Maintained"), byhand(time, status, id=id, weights=wt)) fit3 <- survfit(Surv(time, status) ~ x, data=adata, id=id, weights=wt, influence=TRUE) aeq(fit3$influence.surv[[1]], true2a$influence[,1,]) aeq(fit3$influence.surv[[2]], true2b$influence[,1,]) aeq(fit3$influence.chaz[[1]], true2a$influence[,2,]) aeq(fit3$influence.chaz[[2]], true2b$influence[,2,]) aeq(fit3$surv, c(true2a$estimate[,1], true2b$estimate[,1])) aeq(fit3$cumhaz, c(true2a$estimate[,2], true2b$estimate[,2])) aeq(fit3$std.err, c(true2a$std[,1], true2b$std[,1])) aeq(fit3$std.chaz, c(true2a$std[,2], true2b$std[,2])) aeq(fit3$n.risk, c(true2a$n.risk, true2b$n.risk)) aeq(fit3$n.event, c(true2a$n.event, true2b$n.event)) fit3b <- survfit(Surv(tstart, time, status) ~x, adata2, id=id, weights=wt, influence=TRUE) eqsurv(fit3, fit3b) # Different survival, same hazard fit3b <- survfit(Surv(time, status) ~ x, data=adata, id=id, weights=wt, influence=2, stype=2) temp <- c("n", "time", "cumhaz", "std.chaz", "influence.chaz", "n.risk", "n.event") aeq(unclass(fit3b)[temp], unclass(fit3)[temp]) # unclass avoids [.survfit aeq(fit3b$surv, exp(-c(true2a$estimate[,2], true2b$estimate[,2]))) aeq(fit3b$std.err, fit3b$std.chaz) aeq(fit3b$logse, FALSE) aeq(fit3b$n.risk, c(true2a$n.risk, true2b$n.risk)) aeq(fit3b$n.event, c(true2a$n.event, true2b$n.event)) # The grouped jackknife fit4 <- survfit(Surv(time, status) ~ x, data=adata, id=id, weights=wt, influence=TRUE, cluster=group) g1 <- adata$group[match(rownames(true2a$influence[,1,]), adata$id)] g2 <- adata$group[match(rownames(true2b$influence[,1,]), adata$id)] aeq(fit4$influence.surv[[1]], rowsum(true2a$influence[,1,], g1, reorder=FALSE)) aeq(fit4$influence.surv[[2]], rowsum(true2b$influence[,1,], g2, reorder=FALSE)) aeq(fit4$influence.chaz[[1]], rowsum(true2a$influence[,2,], g1, reorder=FALSE)) aeq(fit4$influence.chaz[[2]], rowsum(true2b$influence[,2,], g2, reorder=FALSE)) aeq(c(colSums(fit4$influence.surv[[1]]^2), colSums(fit4$influence.surv[[2]]^2)), fit4$std.err^2) aeq(c(colSums(fit4$influence.chaz[[1]]^2), colSums(fit4$influence.chaz[[2]]^2)), fit4$std.chaz^2) # The Fleming-Harrington is a more complex formula. Start with weights of # 1. fit5 <- survfit(Surv(time, status) ~x, adata, ctype=2) nrisk <- c(11,10,8,7, 5,4,2, 12, 11, 10, 9, 8, 6:1) chaz <- c(cumsum(1/nrisk[1:7])[c(1:4,4, 5,6,6,7,7)], cumsum(1/nrisk[8:18])[c(2,4,5,5,6:11)]) aeq(fit5$cumhaz, chaz) aeq(fit5$std.chaz, sqrt(c(cumsum(1/nrisk[1:7]^2)[c(1:4,4, 5,6,6,7,7)], cumsum(1/nrisk[8:18]^2)[c(2,4,5,5,6:11)]))) # We can compute the FH using a fake data set where each tie is spread out # over a set of fake times. # fh <- function(time, status, weights, id) { counts <- table(time, status) utime <- sort(unique(time)) tied <- counts[,2] > 1 if (missing(weights)) weights <- rep(1.0, length(time)) if (missing(id)) id <- 1:length(time) # build the expanded data set delta <- min(diff(utime))/(2*max(counts[,2])) efun <- function(x) { who <- which(time==x & status==1) ntie <- length(who) data.frame(time = rep(x - (1:ntie -1)*delta, each=ntie), id = rep(id[who], ntie), status = rep(1, ntie^2), weight = rep(weights[who]/ntie, ntie), stringsAsFactors=FALSE ) } temp <- do.call(rbind, lapply(utime[tied], efun)) notie <- (status==0 | !(time %in% utime[tied])) bfit <- byhand(time = c(time[notie], temp$time), status = c(status[notie], temp$status), id = c(id[notie], temp$id), weights = c(weights[notie], temp$weight) ) keep <- match(utime, bfit$time) # the real time points # The influence from survfit is in data order, which we have perturbed. # Fix that indx <- match(unique(id), dimnames(bfit$influence)[[1]]) list(time=bfit$time[keep], n.risk=bfit$n.risk[keep - pmax(0, counts[,2]-1)], n.event = bfit$n.event[keep]* counts[,2], estimate=bfit$estimate[keep,], std = bfit$std[keep,], influence=bfit$influence[indx,,keep]) } # Case weights true6a <- with(subset(adata, x=="Maintained"), fh(time, status, wt, id)) true6b <- with(subset(adata, x!="Maintained"), fh(time, status, wt, id)) fit6 <- survfit(Surv(time, status) ~ x, weights=wt, data=adata, stype=2, ctype=2, robust=FALSE) aeq(fit6$cumhaz, c(true6a$estimate[,2], true6b$estimate[,2])) aeq(fit6$surv, exp(-c(true6a$estimate[,2], true6b$estimate[,2]))) aeq(fit6$std.chaz, c(true6a$std[,4], true6b$std[,4])) aeq(fit6$n.risk, c(true6a$n.risk, true6b$n.risk)) aeq(fit6$n.event, c(true6a$n.event, true6b$n.event)) # Robust variance fit7 <- survfit(Surv(time, status) ~ x, weights=wt, data=adata, stype=2,ctype=2, id=id, influence=2, robust=TRUE) aeq(fit7$cumhaz, c(true6a$estimate[,2], true6b$estimate[,2])) aeq(fit7$surv, exp(-c(true6a$estimate[,2], true6b$estimate[,2]))) aeq(fit7$std.chaz, c(true6a$std[,2], true6b$std[,2])) aeq(fit7$n.risk, c(true6a$n.risk, true6b$n.risk)) aeq(fit7$n.event, c(true6a$n.event, true6b$n.event)) aeq(fit7$influence.chaz[[1]], true6a$influence[,2,]) aeq(fit7$influence.chaz[[2]], true6b$influence[,2,]) # compute the influence by brute force tdata <- subset(adata, x != "Maintained") eps <- 1e-8 imat <- matrix(0., 12, 10) t1 <- survfit(Surv(time, status) ~x, data=tdata, ctype=2, weights=wt) for (i in 1:12) { wtemp <- tdata$wt wtemp[i] <- wtemp[i] + eps tfit <-survfit(Surv(time, status) ~x, data=tdata, ctype=2, weights=wtemp) imat[i,] <- tdata$wt[i] * (tfit$cumhaz - t1$cumhaz)/eps } aeq(fit7$influence.chaz[[2]], imat, tol=sqrt(eps)) # # verify that the times and scale arguments work as expected. They # are in the summary and print.survfit functions. # s1 <- summary(fit1, scale=1) s2 <- summary(fit1, scale=2) aeq(s1$time/2, s2$time) #times change aeq(s1$surv, s2$surv) tscale <- rep(c(1,1,1,1, 2,2,2,2,2), each=2) aeq(s1$table, s2$table *tscale) s3 <- summary(fit1, scale=1, times=c(9, 18, 23, 33, 34)) s4 <- summary(fit1, scale=2, times=c(9, 18, 23, 33, 34)) aeq(s3$time, s4$time*2) aeq(s3$surv, s4$surv) print(fit1, rmean='common') print(fit1, rmean='common', scale=2) survival/tests/r_resid.Rout.save0000644000176200001440000003560114670377532016551 0ustar liggesusers R Under development (unstable) (2024-08-21 r87038) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > options(na.action=na.exclude) # preserve missings > options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type > library(survival) > > aeq <- function(x,y, ...) all.equal(as.vector(x), as.vector(y), ...) > > fit1 <- survreg(Surv(futime, fustat) ~ age + ecog.ps, ovarian) > fit4 <- survreg(Surv(log(futime), fustat) ~age + ecog.ps, ovarian, + dist='extreme') > > print(fit1) Call: survreg(formula = Surv(futime, fustat) ~ age + ecog.ps, data = ovarian) Coefficients: (Intercept) age ecog.ps 12.28496723 -0.09702669 0.09977342 Scale= 0.6032744 Loglik(model)= -90 Loglik(intercept only)= -98 Chisq= 15.98 on 2 degrees of freedom, p= 0.000339 n= 26 > summary(fit4) Call: survreg(formula = Surv(log(futime), fustat) ~ age + ecog.ps, data = ovarian, dist = "extreme") Value Std. Error z p (Intercept) 12.2850 1.5015 8.18 2.8e-16 age -0.0970 0.0235 -4.13 3.7e-05 ecog.ps 0.0998 0.3657 0.27 0.785 Log(scale) -0.5054 0.2351 -2.15 0.032 Scale= 0.603 Extreme value distribution Loglik(model)= -21.8 Loglik(intercept only)= -29.8 Chisq= 15.98 on 2 degrees of freedom, p= 0.00034 Number of Newton-Raphson Iterations: 5 n= 26 > > > # Hypothesis (and I'm fairly sure): censorReg shares the fault of many > # iterative codes -- it returns the loglik and variance for iteration k > # but the coef vector of iteration k+1. Hence the "all.equal" tests > # below don't come out perfect. > # > if (exists('censorReg')) { #true for Splus, not R + fit2 <- censorReg(censor(futime, fustat) ~ age + ecog.ps, ovarian) + fit3 <- survreg(Surv(futime, fustat) ~ age + ecog.ps, ovarian, + iter=0, init=c(fit2$coef, log(fit2$scale))) + + aeq(resid(fit2, type='working')[,1], resid(fit3, type='working')) + aeq(resid(fit2, type='response')[,1], resid(fit3, type='response')) + + temp <- sign(resid(fit3, type='working')) + aeq(resid(fit2, type='deviance')[,1], + temp*abs(resid(fit3, type='deviance'))) + aeq(resid(fit2, type='deviance')[,1], resid(fit3, type='deviance')) + } > # > # Now check fit1 and fit4, which should follow identical iteration paths > # These tests should all be true > # > aeq(fit1$coef, fit4$coef) [1] TRUE > > resid(fit1, type='working') 1 2 3 4 5 6 -4.5081778 -0.5909810 -2.4878519 0.6032744 -5.8993431 0.6032744 7 8 9 10 11 12 -1.7462937 -0.8102883 0.6032744 -1.6593962 -0.8235265 0.6032744 13 14 15 16 17 18 0.6032744 0.6032744 0.6032744 0.6032744 0.6032744 0.6032744 19 20 21 22 23 24 0.6032744 0.6032744 0.6032744 0.2572623 -31.8006867 -0.7426277 25 26 -0.2857597 0.6032744 > resid(fit1, type='response') 1 2 3 4 5 6 -155.14523 -58.62744 -262.03173 -927.79842 -1377.84908 -658.86626 7 8 9 10 11 12 -589.74449 -318.93436 4.50671 -686.83338 -434.39281 -1105.68733 13 14 15 16 17 18 -42.43371 -173.09223 -4491.29974 -3170.49394 -5028.31053 -2050.91373 19 20 21 22 23 24 -150.65033 -2074.09345 412.32400 76.35826 -3309.40331 -219.81579 25 26 -96.19691 -457.76731 > resid(fit1, type='deviance') 1 2 3 4 5 6 7 -1.5842290 -0.6132746 -1.2876971 0.5387840 -1.7148539 0.6682580 -1.1102921 8 9 10 11 12 13 14 -0.7460191 1.4253843 -1.0849419 -0.7531720 0.6648130 1.3526380 1.1954382 15 16 17 18 19 20 21 0.2962391 0.3916044 0.3278067 0.5929057 1.2747643 0.6171130 1.9857606 22 23 24 25 26 0.6125492 -2.4504208 -0.7080652 -0.3642424 0.7317955 > resid(fit1, type='dfbeta') [,1] [,2] [,3] [,4] 1 0.43370970 -1.087867e-02 0.126322520 0.048379059 2 0.14426449 -5.144770e-03 0.088768478 -0.033939677 3 0.25768057 -3.066698e-03 -0.066578834 0.021817646 4 0.05772598 -5.068044e-04 -0.013121427 -0.007762466 5 -0.58773456 6.676156e-03 0.084189274 0.008064026 6 0.01499533 -7.881949e-04 0.026570173 -0.013513160 7 -0.17869321 4.126121e-03 -0.072760519 -0.015006956 8 -0.11851540 2.520303e-03 -0.045549628 -0.035686269 9 0.08327656 3.206404e-03 -0.141835350 0.024490806 10 -0.25083921 5.321702e-03 -0.073986269 -0.020648720 11 -0.21333934 4.155746e-03 -0.049832434 -0.040215681 12 0.13889770 -1.586136e-03 -0.019701151 -0.004686340 13 0.07892133 -2.706713e-03 0.085242459 0.007847879 14 0.29690157 -1.987141e-03 -0.085553120 0.017447343 15 0.04344618 -6.319243e-04 -0.001944285 -0.003533279 16 0.04866809 -1.068317e-03 0.012398602 -0.006340983 17 0.04368104 -9.248316e-04 0.009428718 -0.004869178 18 0.15684611 -2.081485e-03 -0.013068320 -0.003265399 19 0.48839511 -4.775829e-03 -0.093258090 0.032703354 20 0.17598922 -2.349254e-03 -0.014202966 -0.002486428 21 0.37869758 -8.442011e-03 0.163476417 0.100850775 22 -0.59761427 8.803638e-03 0.052784598 -0.053085234 23 -0.79017984 1.092304e-02 0.053690092 0.080780399 24 -0.02348526 8.331002e-04 -0.039028433 -0.032765737 25 -0.13948485 3.687927e-04 0.056781884 -0.055647859 26 0.05778937 3.766350e-06 -0.029232389 -0.008927920 > resid(fit1, type='dfbetas') [,1] [,2] [,3] [,4] 1 0.288846658 -0.4627232074 0.345395116 0.20574292 2 0.096078819 -0.2188323823 0.242713641 -0.14433617 3 0.171612884 -0.1304417700 -0.182041999 0.09278449 4 0.038444974 -0.0215568869 -0.035877029 -0.03301165 5 -0.391425795 0.2839697749 0.230193032 0.03429410 6 0.009986751 -0.0335258093 0.072649027 -0.05746778 7 -0.119008027 0.1755042532 -0.198944162 -0.06382048 8 -0.078930164 0.1072008799 -0.124543264 -0.15176395 9 0.055461420 0.1363841532 -0.387810796 0.10415271 10 -0.167056601 0.2263581990 -0.202295647 -0.08781336 11 -0.142082031 0.1767643342 -0.136253451 -0.17102630 12 0.092504589 -0.0674661531 -0.053867524 -0.01992972 13 0.052560878 -0.1151298322 0.233072686 0.03337488 14 0.197733705 -0.0845228882 -0.233922105 0.07419878 15 0.028934753 -0.0268788526 -0.005316126 -0.01502607 16 0.032412497 -0.0454407662 0.033900659 -0.02696647 17 0.029091172 -0.0393376416 0.025780305 -0.02070728 18 0.104458066 -0.0885357994 -0.035731824 -0.01388685 19 0.325266641 -0.2031395176 -0.254989284 0.13907843 20 0.117207199 -0.0999253459 -0.038834208 -0.01057410 21 0.252209096 -0.3590802699 0.446982501 0.42889079 22 -0.398005596 0.3744620571 0.144325354 -0.22575700 23 -0.526252483 0.4646108448 0.146801184 0.34353696 24 -0.015640965 0.0354358527 -0.106712804 -0.13934372 25 -0.092895624 0.0156865706 0.155254862 -0.23665514 26 0.038487186 0.0001602014 -0.079928144 -0.03796800 > resid(fit1, type='ldcase') 1 2 3 4 5 6 0.374432175 0.145690278 0.112678800 0.006399163 0.261176992 0.013280058 7 8 9 10 11 12 0.109842490 0.074103234 0.248285282 0.128482147 0.094038203 0.016111951 13 14 15 16 17 18 0.132812463 0.111857574 0.001698300 0.004730718 0.003131173 0.015840667 19 20 21 22 23 24 0.179925399 0.019071941 0.797119488 0.233096445 0.666613755 0.062959708 25 26 0.080117437 0.015922378 > resid(fit1, type='ldresp') 1 2 3 4 5 6 0.076910173 0.173810883 0.078356928 0.005310644 0.060742612 0.010002154 7 8 9 10 11 12 0.067356838 0.067065693 0.355103899 0.067043195 0.068142828 0.016740944 13 14 15 16 17 18 0.193444572 0.165021262 0.001494685 0.004083386 0.002767560 0.016400993 19 20 21 22 23 24 0.269571809 0.020129806 1.409736499 1.040266083 0.058637282 0.071819025 25 26 0.112702844 0.015105534 > resid(fit1, type='ldshape') 1 2 3 4 5 6 0.870628250 0.383362440 0.412503605 0.005534970 0.513991064 0.003310847 7 8 9 10 11 12 0.291860593 0.154910362 0.256160646 0.312329770 0.183191309 0.004184904 13 14 15 16 17 18 0.110215710 0.049299495 0.007678445 0.011633336 0.011588605 0.008641251 19 20 21 22 23 24 0.112967758 0.008271358 2.246729275 0.966929220 1.022043272 0.143857170 25 26 0.079754096 0.001606647 > resid(fit1, type='matrix') g dg ddg ds dds dsg 1 -1.74950763 -1.46198129 -0.32429540 0.88466493 -2.42358635 1.8800360 2 -0.68266980 -0.82027857 -1.38799493 -0.66206188 -0.57351872 1.3921043 3 -1.32369884 -1.33411374 -0.53625126 0.31503768 -1.83606321 1.8626973 4 -0.14514412 0.24059386 -0.39881329 -0.28013223 -0.26053084 0.2237590 5 -1.96497889 -1.50383619 -0.25491587 1.15700933 -2.68145423 1.8694717 6 -0.22328436 0.37012071 -0.61351964 -0.33477229 -0.16715487 0.1848047 7 -1.11099124 -1.23201028 -0.70550005 0.01052036 -1.48515401 1.8106760 8 -0.77288913 -0.95018808 -1.17265428 -0.51190170 -0.79753045 1.5525642 9 -1.01586016 1.68391053 -2.79128447 0.01598527 -0.01623681 -1.7104080 10 -1.08316634 -1.21566480 -0.73259465 -0.03052447 -1.43539383 1.7998987 11 -0.77825093 -0.95675178 -1.16177415 -0.50314979 -0.81016011 1.5600720 12 -0.22098818 0.36631452 -0.60721042 -0.33361394 -0.17002503 0.1866908 13 -0.91481479 1.51641567 -2.51364157 -0.08144930 0.07419757 -1.3814037 14 -0.71453621 1.18442981 -1.96333502 -0.24017106 0.15944438 -0.7863174 15 -0.04387880 0.07273440 -0.12056602 -0.13717935 -0.29168773 0.1546569 16 -0.07667699 0.12710134 -0.21068577 -0.19691828 -0.30879813 0.1993144 17 -0.05372862 0.08906165 -0.14763041 -0.15709224 -0.30221555 0.1713377 18 -0.17576861 0.29135764 -0.48296037 -0.30558900 -0.22570402 0.2151929 19 -0.81251205 1.34683655 -2.23254376 -0.16869744 0.13367171 -1.0672002 20 -0.19041424 0.31563454 -0.52320225 -0.31581218 -0.20797917 0.2078622 21 -1.97162252 3.26820173 -5.41743790 1.33844939 -2.24706488 -5.4868428 22 -0.68222519 1.23245193 -4.79064290 -0.58668577 -0.95209805 -2.8390386 23 -3.49689798 -1.62675999 -0.05115487 2.90949868 -4.20494743 1.7496975 24 -0.74529506 -0.91462436 -1.23160543 -0.55723389 -0.73139169 1.5108398 25 -0.56095318 -0.53280415 -1.86451840 -0.87536233 -0.22666819 0.9689667 26 -0.26776235 0.44384834 -0.73573207 -0.35281852 -0.11207472 0.1409908 > > aeq(resid(fit1, type='working'),resid(fit4, type='working')) [1] TRUE > #aeq(resid(fit1, type='response'), resid(fit4, type='response'))#should differ > aeq(resid(fit1, type='deviance'), resid(fit4, type='deviance')) [1] TRUE > aeq(resid(fit1, type='dfbeta'), resid(fit4, type='dfbeta')) [1] TRUE > aeq(resid(fit1, type='dfbetas'), resid(fit4, type='dfbetas')) [1] TRUE > aeq(resid(fit1, type='ldcase'), resid(fit4, type='ldcase')) [1] TRUE > aeq(resid(fit1, type='ldresp'), resid(fit4, type='ldresp')) [1] TRUE > aeq(resid(fit1, type='ldshape'), resid(fit4, type='ldshape')) [1] TRUE > aeq(resid(fit1, type='matrix'), resid(fit4, type='matrix')) [1] TRUE > # > # Some tests of the quantile residuals > # > # These should agree exactly with Ripley and Venables' book > fit1 <- survreg(Surv(time, status) ~ temp, data=imotor) > summary(fit1) Call: survreg(formula = Surv(time, status) ~ temp, data = imotor) Value Std. Error z p (Intercept) 16.31852 0.62296 26.2 < 2e-16 temp -0.04531 0.00319 -14.2 < 2e-16 Log(scale) -1.09564 0.21480 -5.1 3.4e-07 Scale= 0.334 Weibull distribution Loglik(model)= -147.4 Loglik(intercept only)= -169.5 Chisq= 44.32 on 1 degrees of freedom, p= 2.8e-11 Number of Newton-Raphson Iterations: 8 n= 40 > > # > # The first prediction has the SE that I think is correct > # The third is the se found in an early draft of Ripley; fit1 ignoring > # the variation in scale estimate, except via it's impact on the > # upper left corner of the inverse information matrix. > # Numbers 1 and 3 differ little for this dataset > # > predict(fit1, data.frame(temp=130), type='uquantile', p=c(.5, .1), se=T) $fit [1] 10.306068 9.676248 $se.fit [1] 0.2135247 0.2202088 > > fit2 <- survreg(Surv(time, status) ~ temp, data=imotor, scale=fit1$scale) > predict(fit2, data.frame(temp=130), type='uquantile', p=c(.5, .1), se=T) $fit [1] 10.306068 9.676248 $se.fit 1 1 0.2057964 0.2057964 > > fit3 <- fit2 > fit3$var <- fit1$var[1:2,1:2] > predict(fit3, data.frame(temp=130), type='uquantile', p=c(.5, .1), se=T) $fit [1] 10.306068 9.676248 $se.fit 1 1 0.2219959 0.2219959 > > pp <- seq(.05, .7, length=40) > xx <- predict(fit1, data.frame(temp=130), type='uquantile', se=T, + p=pp) > #matplot(pp, cbind(xx$fit, xx$fit+2*xx$se, xx$fit - 2*xx$se), type='l') > > > # > # Now try out the various combinations of strata, #predicted, and > # number of quantiles desired > # > fit1 <- survreg(Surv(time, status) ~ inst + strata(inst) + age + sex, lung) > qq1 <- predict(fit1, type='quantile', p=.3, se=T) > qq2 <- predict(fit1, type='quantile', p=c(.2, .3, .4), se=T) > aeq <- function(x,y) all.equal(as.vector(x), as.vector(y)) > aeq(qq1$fit, qq2$fit[,2]) [1] TRUE > aeq(qq1$se.fit, qq2$se.fit[,2]) [1] TRUE > > qq3 <- predict(fit1, type='quantile', p=c(.2, .3, .4), se=T, + newdata= lung[1:5,]) > aeq(qq3$fit, qq2$fit[1:5,]) [1] TRUE > > qq4 <- predict(fit1, type='quantile', p=c(.2, .3, .4), se=T, newdata=lung[7,]) > aeq(qq4$fit, qq2$fit[7,]) [1] TRUE > > qq5 <- predict(fit1, type='quantile', p=c(.2, .3, .4), se=T, newdata=lung) > aeq(qq2$fit, qq5$fit) [1] TRUE > aeq(qq2$se.fit, qq5$se.fit) [1] TRUE > > proc.time() user system elapsed 1.002 0.074 1.072 survival/tests/expected2.Rout.save0000644000176200001440000000647314607006645017004 0ustar liggesusers R Under development (unstable) (2018-04-09 r74565) -- "Unsuffered Consequences" Copyright (C) 2018 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(survival) > # > # A Cox model with a factor, followed by survexp. > # > pfit2 <- coxph(Surv(time, status > 0) ~ trt + log(bili) + + log(protime) + age + platelet + sex, data = pbc) > esurv <- survexp(~ trt, ratetable = pfit2, data = pbc) > > temp <- pbc > temp$sex2 <- factor(as.numeric(pbc$sex), levels=2:0, + labels=c("f", "m", "unknown")) > esurv2 <- survexp(~ trt, ratetable = pfit2, data = temp, + rmap=list(sex=sex2)) > > # The call components won't match, which happen to be first > all.equal(unclass(esurv)[-1], unclass(esurv2)[-1]) [1] TRUE > > > # Check that the ratetableDate function is okay > # > Datedate <- function(x) { + # Dates have an origin of 1/1/1970, dates of 1/1/1960 + offset <- as.numeric(as.Date("1970-01-01") - as.Date("1960-01-01")) + y <- as.numeric(x) + offset + class(y) <- "date" + y + } > as.data.frame.date <- as.data.frame.vector # needed to make the functions work > > n <- nrow(lung) > tdata <- data.frame(age=lung$age + (1:n)/365.25, + sex = c('male', 'female')[lung$sex], + ph.ecog = lung$ph.ecog, + time = lung$time*3, + status = lung$status, + entry = as.Date("1940/01/01") + (n:1)*50) > tdata$entry2 <- as.POSIXct(tdata$entry) > tdata$entry3 <- Datedate(tdata$entry) > > p1 <- pyears(Surv(time, status) ~ ph.ecog, data=tdata, ratetable=survexp.us, + rmap= list(age=age*365.25, sex=sex, year=entry)) > p2 <- pyears(Surv(time, status) ~ ph.ecog, data=tdata, ratetable=survexp.us, + rmap= list(age=age*365.25, sex=sex, year=entry2)) > p3 <- pyears(Surv(time, status) ~ ph.ecog, data=tdata, ratetable=survexp.us, + rmap= list(age=age*365.25, sex=sex, year=entry3)) > > all.equal(p1$expected, p2$expected) [1] TRUE > all.equal(p1$expected, p3$expected) [1] TRUE > > > # Now a ratetable with ordinary dates rather than US census style year > trate <- survexp.us > attr(trate, 'type') <- c(2,1,3) > p4 <- pyears(Surv(time, status) ~ ph.ecog, data=tdata, ratetable=trate, + rmap= list(age=age*365.25, sex=sex, year=entry)) > p5 <- pyears(Surv(time, status) ~ ph.ecog, data=tdata, ratetable=trate, + rmap= list(age=age*365.25, sex=sex, year=entry2)) > p6 <- pyears(Surv(time, status) ~ ph.ecog, data=tdata, ratetable=trate, + rmap= list(age=age*365.25, sex=sex, year=entry3)) > > #all.equal(p1$expected, p4$expected) # this won't be true, US special is special > all.equal(p4$expected, p5$expected) [1] TRUE > all.equal(p5$expected, p6$expected) [1] TRUE > > proc.time() user system elapsed 0.756 0.036 0.790 survival/tests/r_stanford.R0000644000176200001440000000511614607006645015566 0ustar liggesusersoptions(na.action=na.exclude) # preserve missings options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type library(survival) # # The Stanford data from 1980 is used in Escobar and Meeker, Biometrics 1992. # t5 = T5 mismatch score # Their case numbers correspond to a data set sorted by age # aeq <- function(x,y, ...) all.equal(as.vector(x), as.vector(y), ...) stanford2$t5 <- ifelse(stanford2$t5 <0, NA, stanford2$t5) stanford2 <- stanford2[order(stanford2$age, stanford2$time),] stanford2$time <- ifelse(stanford2$time==0, .5, stanford2$time) cage <- stanford2$age - mean(stanford2$age) fit1 <- survreg(Surv(time, status) ~ cage + I(cage^2), stanford2, dist='lognormal') fit1 ldcase <- resid(fit1, type='ldcase') ldresp <- resid(fit1, type='ldresp') # The ldcase and ldresp should be compared to table 1 in Escobar and # Meeker, Biometrics 1992, p519; the colums they label as (1/2) A_{ii} # They give data for selected cases, entered below as mdata mdata <- cbind(c(1,2,4,5,12,16,23,61,66,72,172,182,183,184), c(.035, .244, .141, .159, .194, .402, 0,0, .143, .403, .178, .033, .005, .015), c(.138, .145, .073, .076, .104, .159, 0,0, .109, .184, .116, .063, .103, .144)) dimnames(mdata) <- list(NULL, c("case#", "ldcase", "ldresp")) aeq(round(ldcase[mdata[,1]],3), mdata[,2]) aeq(round(ldresp[mdata[,1]],3), mdata[,3]) plot1 <- function() { # make their figure 1, 2, and 6 temp <- predict(fit1, type='quantile', p=c(.1, .5, .9)) plot(stanford2$age, stanford2$time, log='y', xlab="Age", ylab="Days", ylim=range(stanford2$time, temp)) matlines(stanford2$age, temp, lty=c(1,2,2), col=1) n <- length(ldcase) plot(1:n, ldcase, xlab="Case Number", ylab="(1/2) A", type='l') title (main="Case weight pertubations") plot(1:n, ldresp, xlab="Case Number", ylab="(1/2) A", ylim=c(0, .2), type='l') title(main="Response pertubations") indx <- which(ldresp > .07) text(indx, ldresp[indx]+ .005, indx%%10, cex=.6) } postscript('meekerplot.ps') plot1() dev.off() # # Stanford predictions in other ways # fit2 <- survreg(Surv(time, status) ~ poly(age,2), stanford2, dist='lognormal') p1 <- predict(fit1, type='response') p2 <- predict(fit2, type='response') aeq(p1, p2) p3 <- predict(fit2, type='terms', se=T) p4 <- predict(fit2, type='lp', se=T) p5 <- predict(fit1, type='lp', se=T) # aeq(p3$fit + attr(p3$fit, 'constant'), p4$fit) #R is missing the attribute aeq(p4$fit, p5$fit) aeq(p3$se.fit, p4$se.fit) #this one should be false aeq(p4$se.fit, p5$se.fit) #this one true survival/tests/factor2.Rout.save0000644000176200001440000000353014607006645016450 0ustar liggesusers R Under development (unstable) (2021-01-28 r79896) -- "Unsuffered Consequences" Copyright (C) 2021 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(survival) > aeq <- function(x,y) all.equal(as.vector(x), as.vector(y)) > options(na.action=na.exclude) > # > # More tests of factors in prediction, using a new data set > # > fit <- coxph(Surv(time, status) ~ factor(ph.ecog), lung) > > tdata <- data.frame(ph.ecog = factor(0:3)) > p1 <- predict(fit, newdata=tdata, type='lp') > p2 <- predict(fit, type='lp') > aeq(p1, p2[match(0:3, lung$ph.ecog)]) [1] TRUE > > fit2 <- coxph(Surv(time, status) ~ factor(ph.ecog) + factor(sex), lung) > tdata <- expand.grid(ph.ecog = factor(0:3), sex=factor(1:2)) > p1 <- predict(fit2, newdata=tdata, type='risk') > > xdata <- expand.grid(ph.ecog=factor(1:3), sex=factor(1:2)) > p2 <- predict(fit2, newdata=xdata, type='risk') > all.equal(p2, p1[c(2:4, 6:8)], check.attributes=FALSE) [1] TRUE > > > fit3 <- survreg(Surv(time, status) ~ factor(ph.ecog) + age, lung) > tdata <- data.frame(ph.ecog=factor(0:3), age=50) > predict(fit, type='lp', newdata=tdata) 1 2 3 4 0.0000000 0.3688401 0.9163870 2.2079803 > predict(fit3, type='lp', newdata=tdata) 1 2 3 4 6.399571 6.142938 5.770523 4.916993 > > proc.time() user system elapsed 0.873 0.036 0.905 survival/tests/brier.Rout.save0000644000176200001440000000363314613770353016220 0ustar liggesusers R Under development (unstable) (2024-04-17 r86441) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: aarch64-unknown-linux-gnu R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > # Tests of the Brier score. > # Start with the example in the vignette > library(survival) > > rott2 <- rotterdam > ignore <- with(rott2, recur ==0 & death==1 & rtime < dtime) > rott2$rfs <- with(rott2, ifelse(recur==1 | ignore, recur, death)) > rott2$rfstime <- with(rott2, ifelse(recur==1 | ignore, rtime, dtime))/365.25 > > rsurv <- survfit(Surv(rfstime, rfs) ~1, rott2) #KM > rfit <- coxph(Surv(rfstime, rfs) ~ pspline(age) + meno + size + pmin(nodes,12), + rott2) > > tau <- c(2,4,6, 8) # four tau values > bfit <- brier(rfit, times=tau) > > # Now by hand > wtmat <- rttright(Surv(rfstime, rfs) ~ 1, rott2, times=tau) > psurv <- survfit(rfit, newdata= rott2) # one curve per subject > yhat <- 1- summary(psurv, times=tau)$surv > ybar <- 1- summary(rsurv, times=tau)$surv > > y <- with(rott2, cbind(rfstime <=tau[1] & rfs==1, + rfstime <=tau[2] & rfs==1, + rfstime <=tau[3] & rfs==1, + rfstime <=tau[4] & rfs==1)) * 1L > ss1 <- colSums(wtmat * (y - t(yhat))^2) > ss2 <- colSums(wtmat * (y - rep(ybar, each=nrow(y)))^2) > > all.equal(unname(1- ss1/ss2), bfit$rsquared) [1] TRUE > all.equal(unname(ss1), bfit$brier) [1] TRUE > > proc.time() user system elapsed 4.331 0.216 4.544 survival/tests/detail.Rout.save0000644000176200001440000000672314613770353016362 0ustar liggesusers R Under development (unstable) (2019-06-28 r76752) -- "Unsuffered Consequences" Copyright (C) 2019 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > # A short test on coxph.detail, to ensure that the computed hazard is > # equal to the theoretical value > library(survival) > aeq <- function(a,b) all.equal(as.vector(a), as.vector(b)) > > # taken from book4.R > test2 <- data.frame(start=c(1, 2, 5, 2, 1, 7, 3, 4, 8, 8), + stop =c(2, 3, 6, 7, 8, 9, 9, 9,14,17), + event=c(1, 1, 1, 1, 1, 1, 1, 0, 0, 0), + x =c(1, 0, 0, 1, 0, 1, 1, 1, 0, 0) ) > > byhand <- function(beta, newx=0) { + r <- exp(beta) + loglik <- 4*beta - (log(r+1) + log(r+2) + 2*log(3*r+2) + 2*log(3*r+1) + + log(2*r +2)) + u <- 1/(r+1) + 1/(3*r+1) + 2*(1/(3*r+2) + 1/(2*r+2)) - + ( r/(r+2) +3*r/(3*r+2) + 3*r/(3*r+1)) + imat <- r*(1/(r+1)^2 + 2/(r+2)^2 + 6/(3*r+2)^2 + + 6/(3*r+1)^2 + 6/(3*r+2)^2 + 4/(2*r +2)^2) + + hazard <-c( 1/(r+1), 1/(r+2), 1/(3*r+2), 1/(3*r+1), 1/(3*r+1), + 1/(3*r+2), 1/(2*r +2) ) + + + # The matrix of weights, one row per obs, one col per time + # deaths at 2,3,6,7,8,9 + wtmat <- matrix(c(1,0,0,0,1, 0, 0,0,0,0, + 0,1,0,1,1, 0, 0,0,0,0, + 0,0,1,1,1, 0, 1,1,0,0, + 0,0,0,1,1, 0, 1,1,0,0, + 0,0,0,0,1, 1, 1,1,0,0, + 0,0,0,0,0, 1, 1,1,1,1, + 0,0,0,0,0,.5,.5,1,1,1), ncol=7) + wtmat <- diag(c(r,1,1,r,1,r,r,r,1,1)) %*% wtmat + + x <- c(1,0,0,1,0,1,1,1,0,0) + status <- c(1,1,1,1,1,1,1,0,0,0) + xbar <- colSums(wtmat*x)/ colSums(wtmat) + n <- length(x) + + # Table of sums for score and Schoenfeld resids + hazmat <- wtmat %*% diag(hazard) #each subject's hazard over time + dM <- -hazmat #Expected part + for (i in 1:5) dM[i,i] <- dM[i,i] +1 #observed + dM[6:7,6:7] <- dM[6:7,6:7] +.5 # observed + mart <- rowSums(dM) + + # Table of sums for score and Schoenfeld resids + # Looks like the last table of appendix E.2.1 of the book + resid <- dM * outer(x, xbar, '-') + score <- rowSums(resid) + scho <- colSums(resid) + + # We need to add the ties back up (they are symmetric) + scho[6:7] <- rep(mean(scho[6:7]), 2) + + list(loglik=loglik, u=u, imat=imat, xbar=xbar, haz=hazard* exp(beta*newx), + mart=mart, score=score, rmat=resid, + scho=scho) + } > > # The actual coefficient of the fit is close to zero. Using a larger > # number pushes the test harder, but it should still work without > # the init and iter arguments, i.e., for any coefficient. > fit1 <- coxph(Surv(start, stop, event) ~x, test2,init=-1, iter=0) > temp <- coxph.detail(fit1) > temp2 <- byhand(fit1$coef, fit1$means) > aeq(temp$haz, c(temp2$haz[1:5], sum(temp2$haz[6:7]))) [1] TRUE > > > proc.time() user system elapsed 1.633 0.128 1.747 survival/tests/finegray.R0000644000176200001440000001650514612274303015227 0ustar liggesuserslibrary(survival) # Test data set 1 for Fine-Gray regression fdata <- data.frame(time =c(1,2,3,4,4,4,5,5,6,8,8, 9,10,12), status=factor(c(1,2,0,1,0,0,2,1,0,0,2, 0,1 ,0), 0:2, c("cen", "type1", "type2")), x =c(5,4,3,1,2,1,1,2,2,4,6,1,2, 0), id = 1:14) test1 <- finegray(Surv(time, status) ~., fdata, count="fgcount") test2 <- finegray(Surv(time, status) ~x, fdata, etype="type2") # When creating the censoring time distribution remember that # censors happen after deaths, so the distribution does not drop until # time 3+, 4+, 6+, 8+ and 9+ csurv <- list(time=c(0, 3, 4, 6, 8, 9), p = cumprod(c(1, 11/12, 8/10, 5/6, 3/4, 2/3))) # # For estimation of event type 1, the first subject of event type # 2 will have weights of curve$p over (0,3], (3,4], (4,6], (6,8], (8,9] # and (9,12]. All that really matters is the weight at times 1, 4, 5, # and 10, however, which are the points at which events of type 1 happen # # The next subject of event type 2 occurs at time 5, and will have a # weight of (9,12] /(4,5] = (5*4*2)/(7*5*3) = 8/21 at time 10. The last # censor at time 6 has a weight of 2/3 at time 10. all.equal(test1$id, c(1, 2,2,2,2, 3:6, 7, 7, 8:11, 11, 12:14)) twt <- c(1, csurv$p[c(1,2,3,6)], 1,1,1, 1, 1, 5/12, 1,1,1, 1, 1/2, 1,1,1) all.equal(test1$fgwt, twt) #extra obs will end at times found in csurv$time, or max(time)=12 all.equal(test1$fgstop[test1$fgcount>0], c(4,6,12, 12,12)) # # Verify the data reproduces a multi-state curve # censoring times may be different in the two setups so only # compare at the event times sfit <- survfit(Surv(time, status) ~1, fdata) sfit1<- survfit(Surv(fgstart, fgstop, fgstatus) ~1, test1, weights=fgwt) sfita<- sfit["type1"] i1 <- sfita$n.event > 0 i2 <- sfit1$n.event > 0 all.equal(sfita$pstate[i1], 1- sfit1$surv[i2]) sfitb <- sfit["type2"] sfit2 <- survfit(Surv(fgstart, fgstop, fgstatus) ~1, test2, weights=fgwt) i1 <- sfitb$n.event > 0 i2 <- sfit2$n.event > 0 all.equal(sfitb$pstate[i1], 1- sfit2$surv[i2]) # Test strata. Make a single data set that has fdata for the first 19 # rows, then fdata with outcomes switched for the second 19. It should # reprise test1 and test2 in a single call. fdata2 <- rbind(fdata, fdata) fdata2$group <- rep(1:2, each=nrow(fdata)) temp <- c(1,3,2)[as.numeric(fdata$status)] fdata2$status[fdata2$group==2] <- factor(temp, 1:3, levels(fdata$status)) test3 <- finegray(Surv(time, status) ~ .+ strata(group), fdata2) vtemp <- c("fgstart", "fgstop", "fgstatus", "fgwt") all.equal(test3[1:19, vtemp], test1[,vtemp]) all.equal(test3[20:38, vtemp], test2[,vtemp], check.attributes=FALSE) # # Test data set 2: use the larger MGUS data set # Time is in months which leads to lots of ties etime <- with(mgus2, ifelse(pstat==0, futime, ptime)) event <- with(mgus2, ifelse(pstat==0, 2*death, 1)) e2 <- factor(event, 0:2, c('censor', 'pcm', 'death')) edata <- finegray(Surv(etime, e2) ~ sex + id, mgus2, etype="pcm") # Build G(t) = the KM of the censoring distribution # An event at time x is not "at risk" for censoring at time x (Geskus 2011) tt <- sort(unique(etime)) # all the times ntime <- length(tt) nrisk <- nevent <- double(ntime) for (i in 1:ntime) { nrisk[i] <- sum((etime > tt[i] & event >0) | (etime >= tt[i] & event==0)) nevent[i] <- sum(etime == tt[i] & event==0) } G <- cumprod(1- nevent/nrisk) # The weight is defined as w(t)= G(t-)/G(s-) where s is the event time # for a subject who experiences an endpoint other then the one of interest type2 <- event[edata$id]==2 # the rows to be expanded # These rows are copied over as is: endpoint 1 and censors all(edata$fgstop[!type2] == etime[edata$id[!type2]]) all(edata$fgstart[!type2] ==0) all(edata$fgwt[!type2] ==1) tdata <- edata[type2,] #expanded rows first <- match(tdata$id, tdata$id) #points to the first row for each subject Gwt <- c(1, G)[match(tdata$fgstop, tt)] # G(t-) all.equal(tdata$fgwt, Gwt/Gwt[first]) # Test data 3, left truncation. # Ties are assumed to be ordered as event, censor, entry # H(t) = truncation distribution, and is calculated on a reverse time scale # Since there is only one row per subject every obs is a "start" event. # Per equation 5 and 6 of Geskus both G and H are right continuous functions # (the value at t- epsilon is different than the value at t). fdata <- data.frame(time1 = c(0,0,0,3,2,0,0,1,0,7,5, 0, 0, 0), time2 = c(1,2,3,4,4,4,5,5,6,8,8, 9,10,12), status= c(1,2,0,1,0,0,2,1,0,0,2, 0, 1 ,0), x = c(5,4,3,1,2,1,1,2,2,4,6, 1, 2, 0), id = 1:14) tt <- sort(unique(c(fdata$time1, fdata$time2))) ntime <- length(tt) Grisk <- Gevent <- double(ntime) Hrisk <- Hevent <- double(ntime) for (i in 1:ntime) { Grisk[i] <- with(fdata, sum((time2 > tt[i] & status >0 & time1 < tt[i]) | (time2 >= tt[i] & status ==0 & time1 < tt[i]))) Gevent[i]<- with(fdata, sum(time2 == tt[i] & status==0)) Hrisk[i] <- with(fdata, sum(time2 > tt[i] & time1 <= tt[i])) Hevent[i]<- with(fdata, sum(time1 == tt[i])) } G <- cumprod(1- Gevent/pmax(1,Grisk)) G2 <- survfit(Surv(time1, time2 - .1*(status !=0), status==0) ~1, fdata) all.equal(G2$surv[G2$n.event>0], G[Gevent>0]) H <- double(ntime) # The loop below uses the definition of equation 6 in Geskus for (i in 1:ntime) H[i] <- prod((1- Hevent/pmax(1, Hrisk))[-(i:1)]) H2 <- rev(cumprod(rev(1 - Hevent/pmax(1, Hrisk)))) #alternate form H3 <- survfit(Surv(-time2, -time1, rep(1,14)) ~1, fdata) # alternate 3 # c(0,H) = H(t-), H2 = H(t-) already due to the time reversal i2 <- sort(match(unique(fdata$time1), tt)) #time points in H3 all.equal(c(0, H), c(H2, 1)) all.equal(H2[i2], rev(H3$surv)) fg <- finegray(Surv(time1, time2, factor(status, 0:2)) ~ x, id=id, fdata) stat2 <- !is.na(match(fg$id, fdata$id[fdata$status==2])) #expanded ids all(fg$fgwt[!stat2] ==1) #ordinary rows are left alone all(fg$fgstart[!stat2] == fdata$time1[fdata$status !=2]) all(fg$fgstop[!stat2] == fdata$time2[fdata$status !=2]) tdata <- fg[stat2,] index <- match(tdata$id, tdata$id) # points to the first row for each Gwt <- c(1, G)[match(tdata$fgstop, tt)] # G(t-) Hwt <- c(0, H)[match(tdata$fgstop, tt)] # H(t-) all.equal(tdata$fgwt, Gwt*Hwt/(Gwt*Hwt)[index]) # # Test data 4: mgus2 data on age scale # The answer is incorrect due to roundoff, but consistent # start <- mgus2$age # age in years end <- start + etime/12 #etime in months tt <- sort(unique(c(start, end))) # all the times ntime <- length(tt) Grisk <- Gevent <- double(ntime) Hrisk <- Hevent <- double(ntime) for (i in 1:ntime) { Grisk[i] <- sum(((end > tt[i] & event >0) | (end >= tt[i] & event==0)) & (tt[i] > start)) Gevent[i] <- sum(end == tt[i] & event==0) Hrisk[i] <- sum(start <= tt[i] & end > tt[i]) Hevent[i] <- sum(start == tt[i]) } G <- cumprod(1 - Gevent/pmax(1, Grisk)) # pmax to avoid 0/0 H <- rev(cumprod(rev(1-Hevent/pmax(1,Hrisk)))) H <- c(H[-1], 1) #make it right continuous wdata <- finegray(Surv(start, end, e2) ~ ., id=id, mgus2, timefix=FALSE) type2 <- event[wdata$id]==2 # the rows to be expanded tdata <- wdata[type2,] first <- match(tdata$id, tdata$id) Gwt <- c(1, G)[match(tdata$fgstop, tt)] # G(t-) Hwt <- c(0, H)[match(tdata$fgstop, tt)] # H(t-) all.equal(tdata$fgwt, (Gwt/Gwt[first]) * (Hwt / Hwt[first])) survival/tests/surv.R0000644000176200001440000000336114607006645014424 0ustar liggesusers# library(survival) # Some simple tests of the Surv function # The first two are motivated by a bug, pointed out by Kevin Buhr, # where a mixture of NAs and invalid values didn't work right # Even for the simplest things a test case is good. # All but the third should produce warning messages aeq <- function(x,y) all.equal(as.vector(x), as.vector(y)) temp <- Surv(c(1, 10, 20, 30), c(2, NA, 0, 40), c(1,1,1,1)) aeq(temp, c(1,10,NA,30, 2,NA,0,40, 1,1,1,1)) temp <- Surv(c(1, 10, 20, 30), c(2, NA, 0, 40), type='interval2') aeq(temp, c(1,10,20,30, 2,1,1,40, 3,0,NA,3)) #No error temp <- Surv(1:5) aeq(temp, c(1:5, 1,1,1,1,1)) temp1 <- Surv(c(1,10,NA, 30, 30), c(1,NA,10,20, 40), type='interval2') temp2 <- Surv(c(1,10,10,30,30), c(9, NA, 5, 20,40), c(1, 0, 2,3,3), type='interval') aeq(temp1, temp2) aeq(temp1, c(1,10,10,30,30, 1,1,1,1, 40, 1,0,2,NA,3)) # Use of inf temp1 <- Surv(c(1,10,NA, 30, 30), c(1,NA,10,30, 40), type='interval2') temp2 <- Surv(c(1,10,-Inf, 30, 30), c(1,Inf,10,30, 40), type='interval2') aeq(temp1, temp2) # Verify sorting and order routines # These fail in 3.4, succeed in 3.5 due to a system change in how # xtfrm.Surv is used. x1 <- Surv(c(4, 6, 3, 2, 1, NA, 2), c(1,0, NA, 0,1,1,1)) all.equal(order(x1), c(5,7, 4, 1, 2, 3, 6)) all.equal(order(x1, decreasing=TRUE), c(2,1,4,7,5, 3, 6)) all.equal(sort(x1), x1[c(5,7,4,1,2)]) x2 <- Surv(c(4, 6, 3, 2, 1, NA, 2), c(1,0, NA, 0,1,1,1), type='left') all.equal(order(x2), c(5,4, 7, 1, 2, 3, 6)) x3 <- Surv(c(1,5,NA,7, 9), c(6, 6, 4, NA, 9), type="interval2") all.equal(sort(x3), x3[c(1,3,2,4,5)]) x4 <- Surv(c(1,5,6,5,2, 4), c(3, 7, 7, 6, 3, NA), factor(c(1, 2, 0, 1, 1, 0))) all.equal(sort(x4), x4[c(1, 5, 4, 2,3)]) all.equal(sort(x4, na.last=FALSE), x4[c(6,1,5,4,2,3)]) survival/tests/yates1.R0000644000176200001440000000767014612274303014634 0ustar liggesuserslibrary(survival) aeq <- function(x, y, ...) all.equal(as.vector(x), as.vector(y), ...) fit1 <- lm(skips ~ Opening + Solder + Mask + PadType + Panel, data=solder) y1 <- yates(fit1, "Opening") temp <- levels(solder$Opening) tpred <- matrix(0., nrow(solder), 3) for (i in 1:3) { tdata <- solder tdata$Opening <- temp[i] tpred[,i] <- predict(fit1, newdata=tdata) } all.equal(y1$estimate[,"pmm"], colMeans(tpred)) # This fit is deficient: there are no Opening=L and Mask=A6 obs # The MPV for Mask=A6 and Opening L will therefore be NA, as well # as for all levels of Solder, but we can compute the others. # Solder will be NA for all levels fit2 <- lm(skips ~ Opening*Mask + Solder, data=solder) y2a <- yates(fit2, "Mask", population="factorial") y2b <- yates(fit2, "Opening", population="factorial") y2c <- yates(fit2, "Solder", population="factorial") # The predict.lm function gives correct predictions for estimable # functions (all but L,A6) and nonsense for others. It knows that # some are not estimable due to the NA coefficients, but not which ones, # so always prints a warning. Hence the suppressWarnings call. tdata <- do.call(expand.grid, fit2$xlevels[1:3]) temp <- levels(solder$Mask) tpreda <- matrix(0., nrow(tdata), length(temp), dimnames=list(NULL, temp)) for (i in seq_along(temp)) { tdata$Mask <- temp[i] suppressWarnings(tpreda[,i] <- predict(fit2, newdata=tdata)) } tpreda[,"A6"] <- NA # the A6 estimate is deficient aeq(y2a$estimate[,"pmm"], colMeans(tpreda)) tdata <- do.call(expand.grid, fit2$xlevels[1:3]) temp <- levels(solder$Opening) tpredb <- matrix(0., nrow(tdata), length(temp), dimnames=list(NULL, temp)) for (i in seq_along(temp)) { tdata$Opening <- temp[i] suppressWarnings(tpredb[,i] <- predict(fit2, newdata=tdata)) } tpredb[,"L"] <- NA aeq(y2b$estimate[,"pmm"], colMeans(tpredb)) # Solder should be all NA all(is.na(y2c$estimate[,"pmm"])) # Tests for Solder are defined for a non-factorial population, however. # the [] below retains the factor structure of the variable, where the # runs above did not. R gets prediction correct both ways. y2d <- yates(fit2, ~Solder) temp <- levels(solder$Solder) tdata <- solder tpredd <- matrix(0, nrow(tdata), length(temp), dimnames=list(NULL, temp)) for (i in seq_along(temp)) { tdata$Solder[] <- temp[i] suppressWarnings(tpredd[,i] <- predict(fit2, newdata=tdata)) } aeq(y2d$estimate$pmm, colMeans(tpredd)) # # Verify that the result is unchanged by how dummies are coded # The coefs move all over the map, but predictions are unchanged fit3 <- lm(skips ~ C(Opening, contr.helmert)*Mask + C(Solder, contr.SAS), data=solder) y3a <- yates(fit3, ~Mask, population='yates') equal <- c("estimate", "test", "mvar") all.equal(y3a[equal], y2a[equal]) tdata <- do.call(expand.grid, fit2$xlevels[1:3]) # use orignal variable names temp <- levels(solder$Mask) cpred <- matrix(0., nrow(tdata), length(temp), dimnames=list(NULL, temp)) for (i in seq_along(temp)) { tdata$Mask <- temp[i] suppressWarnings(cpred[,i] <- predict(fit3, newdata=tdata)) } aeq(cpred[, temp!="A6"], tpreda[, temp!= "A6"]) # same predictions all.equal(y3a$estimate, y2a$estimate) y3b <- yates(fit3, ~Opening, population='yates') # column names will differ all.equal(y3b$estimate, y2b$estimate, check.attributes=FALSE) y3d <- yates(fit3, ~Solder) for (i in 1:3) { print(all.equal(y3d[[i]], y2d[[i]], check.attributes=FALSE)) } # Reprise this with a character variable in the model sdata <- solder sdata$Mask <- as.character(sdata$Mask) fit4 <- lm(skips ~ Opening*Mask + Solder, data=sdata) y4a <- yates(fit4, ~ Mask, population= "yates") y4b <- yates(fit4, ~ Opening, population= "yates") y4d <- yates(fit4, ~ Solder) equal <- c("estimate", "tests", "mvar", "cmat") all.equal(y2a[equal], y4a[equal]) # the "call" component differs all.equal(y2b[equal], y4b[equal]) all.equal(y2d[equal], y4d[equal]) survival/tests/frank.Rout.save0000644000176200001440000000304514607006645016212 0ustar liggesusers R Under development (unstable) (2023-05-10 r84417) -- "Unsuffered Consequences" Copyright (C) 2023 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(survival) > # > # Check out intercept/interaction for Frank H > # > age2 <- lung$age - 50 > fit1 <- coxph(Surv(time, status) ~ age * strata(sex), lung) > fit2 <- coxph(Surv(time, status) ~ age2*strata(sex), lung) > > tdata <- data.frame(age=50:60, age2=0:10, sex=c(1,2,1,2,1,2,1,2,1,2,1)) > > surv1 <- survfit(fit1, tdata) > surv2 <- survfit(fit2, tdata) > # The call won't match, nor the newdata data frame > icall <- match(c("newdata", "call"), names(surv1)) > all.equal(unclass(surv1)[-icall], unclass(surv2)[-icall]) [1] TRUE > > > # It should match what I get with a single strata fit > > fit3 <- coxph(Surv(time, status) ~ age, data=lung, + init=fit1$coef[1], subset=(sex==1), iter=0) > surv1b <- survfit(fit3, newdata=list(age=c(50,52, 54))) > all.equal(c(surv1b$surv), surv1[c(1,3,5)]$surv) [1] TRUE > > > > > proc.time() user system elapsed 0.920 0.070 0.981 survival/tests/frailty.R0000644000176200001440000000154014607006645015074 0ustar liggesuserslibrary(survival) # # The constuction of a survival curve with sparse frailties # # In this case the coefficient vector is kept in two parts, the # fixed coefs and the (often very large) random effects coefficients # The survfit function treats the second set of coefficients as fixed # values, to avoid an unmanagable variance matrix, and behaves like # the second fit below. fit1 <- coxph(Surv(time, status) ~ age + frailty(inst), lung) sfit1 <- survfit(fit1) # A parallel model with the frailties treated as fixed offsets offvar <- fit1$frail[as.numeric(factor(lung$inst))] fit2 <- coxph(Surv(time, status) ~ age + offset(offvar),lung) fit2$var <- fit1$var #force variances to match all.equal(fit1$coef, fit2$coef) sfit2 <- survfit(fit2, newdata=list(age=fit1$means, offvar=0)) all.equal(sfit1$surv, sfit2$surv, tol=1e-7) all.equal(sfit1$var, sfit2$var) survival/tests/rttright.Rout.save0000644000176200001440000001202114607325257016755 0ustar liggesusers R Under development (unstable) (2024-02-06 r85866) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: aarch64-unknown-linux-gnu R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(survival) > > # start with the example used in chapter 2 of the book > > bdata <- data.frame(time = c(1, 2, 2, 3, 4, 4, 5, 5, 8, 8, + 9, 10,11, 12,14, 15, 16, 16, 18, 20), + status = c(1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, + 0, 0, 1, 0, 0, 1, 0, 1, 0)) > > # First check: verify that the the RTTR reproduces the KM > kfit <- survfit(Surv(time, status) ~1, bdata) > bwt <- rttright(Surv(time, status) ~1, bdata, renorm= FALSE) > > cdf <- cumsum(bwt)/nrow(bdata) # weighted CDF > cdf <- cdf[!duplicated(bdata$time, fromLast=TRUE)] # remove duplicates > all.equal(kfit$surv, 1-cdf) [1] TRUE > > > # A covariate divides both survfit and rttr into disjoint groups, so repeat > # the above check on subsets of the aml data > afit <- survfit(Surv(time, status) ~x, aml) > awt <- rttright(Surv(time, status) ~x, aml, renorm=TRUE) > > igroup <- as.numeric(aml$x) > for (i in 1:2) { + atemp <- awt[igroup ==i] # subset for this curve + index <- order(aml$time[igroup ==i]) + acdf <- cumsum(atemp[index]) + acdf <- acdf[!duplicated(aml$time[igroup ==i], fromLast=TRUE)] + print(all.equal(afit[i]$surv, 1-acdf)) + } [1] TRUE [1] TRUE > > > ########### > # Alternate computation using inverse prob of censoring weights. > # First shift the censorings to avoid ties: if there is a death and a censor > # at time 10, say, the death was not at risk of censoring. Censoring weights > # happen "later". This also results in a left-continuous curve. > delta <- min(diff(sort(unique(bdata$time)))) /3 > offset <- ifelse(bdata$status==1, 0, delta) > cfit <- survfit(Surv(time+ offset, 1-status) ~ 1, bdata) > > # interpolate > indx <- findInterval(bdata$time, cfit$time) > cwt <- ifelse(bdata$status==0, 0, 1/cfit$surv[indx]) > all.equal(bwt, cwt) [1] TRUE > > # Multiple time points, this example is used in the vignette > tdata <- data.frame(time= c(1,2,2,3,4,4,5,5,8,9), + status= c(1,1,0,1,0,0,1,0,1,1)) > fit1 <- rttright(Surv(time, status) ~ 1, tdata, times=2:6, renorm=FALSE) > fit2 <- rttright(Surv(time, status) ~ 1, tdata, times=2:6, renorm=TRUE) > all.equal(fit1, 10*fit2) [1] TRUE > all.equal(fit1, cbind(7, c(7,7,0,8,8,8,8,8,8,8), + c(7,7,0,8,8,8,8,8,8,8), + c(7,7,0,8,0,0,12,12,12,12), + c(7,7,0,8,0,0,12, 0, 18,18))/7, check.attributes=FALSE) [1] TRUE > > # Now test with (start, stop] data, should get the same results > b2 <- survSplit(Surv(time, status) ~ 1, bdata, cut= c(3,5, 7, 14), + id = "subject") > indx <- c(seq(1, 65, by=2), seq(64, 2, by= -2)) > b2 <- b2[indx,] # not in time within subject order (stronger test) > > b2wt <- rttright(Surv(tstart, time, status) ~1, b2, id=subject) > indx2 <- order(b2$time) > cdf2 <- cumsum(b2wt[indx2]) > cdf2 <- cdf2[!duplicated(b2$time[indx2], fromLast=TRUE)] # remove duplicates > utime2 <- sort(unique(b2$time)) # will have an extra time 7 > utime1 <- sort(unique(bdata$time)) > all.equal(cdf2[match(utime1, utime2)], cdf) [1] TRUE > > > # Competing risks > mdata <- mgus2 > mdata$etime <- with(mgus2, ifelse(pstat==1, ptime, futime)) > mdata$estat <- with(mgus2, ifelse(pstat==1, 1, 2*death)) > mdata$estat <- factor(mdata$estat, 0:2, c('censor', 'pcm', 'death')) > mfit <- survfit(Surv(etime, estat) ~1, mdata, id=id, time0=FALSE) > mwt1 <- rttright(Surv(etime, estat) ~1, mdata, id=id) > > morder <- order(mdata$etime) > mdata2 <- mdata[morder,] > mwt2 <- rttright(Surv(etime,estat) ~1, mdata2, id=id) > all.equal(mwt1[morder], mwt2) [1] TRUE > > keep <- !duplicated(mdata2$etime, fromLast=TRUE) > csum1 <- cumsum(ifelse(mdata2$estat=="pcm", mwt2, 0)) > csum2 <- cumsum(ifelse(mdata2$estat=="death", mwt2, 0)) > > all.equal(mfit$pstate[,2], csum1[keep]) [1] TRUE > all.equal(mfit$pstate[,3], csum2[keep]) [1] TRUE > > # Case weights, at multiple times > bwt <- rep(1:2, length=nrow(bdata)) > tm <- c(2, 6, 10, 15, 18) > fit1 <- rttright(Surv(time, status) ~1, bdata, weights=bwt, times= tm) > casefit <- survfit(Surv(time, status) ~ 1, bdata, weights= bwt) > csum1 <- summary(casefit, censor=FALSE, times= tm) > for (i in 1:length(tm)) { + c1 <- sum(fit1[bdata$status==1 & bdata$time <= tm[i], i]) + print(all.equal(c1, 1-csum1$surv[i])) + } [1] TRUE [1] TRUE [1] TRUE [1] TRUE [1] TRUE > > > > proc.time() user system elapsed 0.448 0.032 0.478 survival/tests/rttright.R0000644000176200001440000000774414607325257015310 0ustar liggesuserslibrary(survival) # start with the example used in chapter 2 of the book bdata <- data.frame(time = c(1, 2, 2, 3, 4, 4, 5, 5, 8, 8, 9, 10,11, 12,14, 15, 16, 16, 18, 20), status = c(1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0)) # First check: verify that the the RTTR reproduces the KM kfit <- survfit(Surv(time, status) ~1, bdata) bwt <- rttright(Surv(time, status) ~1, bdata, renorm= FALSE) cdf <- cumsum(bwt)/nrow(bdata) # weighted CDF cdf <- cdf[!duplicated(bdata$time, fromLast=TRUE)] # remove duplicates all.equal(kfit$surv, 1-cdf) # A covariate divides both survfit and rttr into disjoint groups, so repeat # the above check on subsets of the aml data afit <- survfit(Surv(time, status) ~x, aml) awt <- rttright(Surv(time, status) ~x, aml, renorm=TRUE) igroup <- as.numeric(aml$x) for (i in 1:2) { atemp <- awt[igroup ==i] # subset for this curve index <- order(aml$time[igroup ==i]) acdf <- cumsum(atemp[index]) acdf <- acdf[!duplicated(aml$time[igroup ==i], fromLast=TRUE)] print(all.equal(afit[i]$surv, 1-acdf)) } ########### # Alternate computation using inverse prob of censoring weights. # First shift the censorings to avoid ties: if there is a death and a censor # at time 10, say, the death was not at risk of censoring. Censoring weights # happen "later". This also results in a left-continuous curve. delta <- min(diff(sort(unique(bdata$time)))) /3 offset <- ifelse(bdata$status==1, 0, delta) cfit <- survfit(Surv(time+ offset, 1-status) ~ 1, bdata) # interpolate indx <- findInterval(bdata$time, cfit$time) cwt <- ifelse(bdata$status==0, 0, 1/cfit$surv[indx]) all.equal(bwt, cwt) # Multiple time points, this example is used in the vignette tdata <- data.frame(time= c(1,2,2,3,4,4,5,5,8,9), status= c(1,1,0,1,0,0,1,0,1,1)) fit1 <- rttright(Surv(time, status) ~ 1, tdata, times=2:6, renorm=FALSE) fit2 <- rttright(Surv(time, status) ~ 1, tdata, times=2:6, renorm=TRUE) all.equal(fit1, 10*fit2) all.equal(fit1, cbind(7, c(7,7,0,8,8,8,8,8,8,8), c(7,7,0,8,8,8,8,8,8,8), c(7,7,0,8,0,0,12,12,12,12), c(7,7,0,8,0,0,12, 0, 18,18))/7, check.attributes=FALSE) # Now test with (start, stop] data, should get the same results b2 <- survSplit(Surv(time, status) ~ 1, bdata, cut= c(3,5, 7, 14), id = "subject") indx <- c(seq(1, 65, by=2), seq(64, 2, by= -2)) b2 <- b2[indx,] # not in time within subject order (stronger test) b2wt <- rttright(Surv(tstart, time, status) ~1, b2, id=subject) indx2 <- order(b2$time) cdf2 <- cumsum(b2wt[indx2]) cdf2 <- cdf2[!duplicated(b2$time[indx2], fromLast=TRUE)] # remove duplicates utime2 <- sort(unique(b2$time)) # will have an extra time 7 utime1 <- sort(unique(bdata$time)) all.equal(cdf2[match(utime1, utime2)], cdf) # Competing risks mdata <- mgus2 mdata$etime <- with(mgus2, ifelse(pstat==1, ptime, futime)) mdata$estat <- with(mgus2, ifelse(pstat==1, 1, 2*death)) mdata$estat <- factor(mdata$estat, 0:2, c('censor', 'pcm', 'death')) mfit <- survfit(Surv(etime, estat) ~1, mdata, id=id, time0=FALSE) mwt1 <- rttright(Surv(etime, estat) ~1, mdata, id=id) morder <- order(mdata$etime) mdata2 <- mdata[morder,] mwt2 <- rttright(Surv(etime,estat) ~1, mdata2, id=id) all.equal(mwt1[morder], mwt2) keep <- !duplicated(mdata2$etime, fromLast=TRUE) csum1 <- cumsum(ifelse(mdata2$estat=="pcm", mwt2, 0)) csum2 <- cumsum(ifelse(mdata2$estat=="death", mwt2, 0)) all.equal(mfit$pstate[,2], csum1[keep]) all.equal(mfit$pstate[,3], csum2[keep]) # Case weights, at multiple times bwt <- rep(1:2, length=nrow(bdata)) tm <- c(2, 6, 10, 15, 18) fit1 <- rttright(Surv(time, status) ~1, bdata, weights=bwt, times= tm) casefit <- survfit(Surv(time, status) ~ 1, bdata, weights= bwt) csum1 <- summary(casefit, censor=FALSE, times= tm) for (i in 1:length(tm)) { c1 <- sum(fit1[bdata$status==1 & bdata$time <= tm[i], i]) print(all.equal(c1, 1-csum1$surv[i])) } survival/tests/summary_survfit.Rout.save0000644000176200001440000001320214607325257020367 0ustar liggesusers R Under development (unstable) (2024-02-06 r85866) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: aarch64-unknown-linux-gnu R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > ## check that the scale option to summary.survfit works > ## Marc Schwartz reported this as a bug in 2.35-3. > library(survival) > fit <- survfit(Surv(futime, fustat) ~rx, data=ovarian) > temp1 <- summary(fit) > temp2 <- summary(fit, scale=365.25) > > all.equal(temp1$time/365.25, temp2$time) [1] TRUE > all.equal(temp1$rmean.endtime/365.25, temp2$rmean.endtime) [1] TRUE > all.equal(temp1$table[,5:6]/365.25, temp2$table[,5:6]) [1] TRUE > temp <- names(fit) > temp <- temp[!temp %in% c("time", "table", "rmean.endtime")] > all.equal(temp1[temp], temp2[temp]) [1] TRUE > > # Reprise, using the rmean option > temp1 <- summary(fit, rmean=300) > temp2 <- summary(fit, rmean=300, scale=365.25) > all.equal(temp1$time/365.25, temp2$time) [1] TRUE > all.equal(temp1$rmean.endtime/365.25, temp2$rmean.endtime) [1] TRUE > all.equal(temp1$table[,5:6]/365.25, temp2$table[,5:6]) [1] TRUE > all.equal(temp1[temp], temp2[temp]) [1] TRUE > > # Repeat using multi-state data. Time is in months for mgus2 > etime <- with(mgus2, ifelse(pstat==0, futime, ptime)) > event <- with(mgus2, ifelse(pstat==0, 2*death, 1)) > event <- factor(event, 0:2, labels=c("censor", "pcm", "death")) > mfit <- survfit(Surv(etime, event) ~ sex, mgus2) > temp1 <- summary(mfit) > temp2 <- summary(mfit, scale=12) > > all.equal(temp1$time/12, temp2$time) [1] TRUE > all.equal(temp1$rmean.endtime/12, temp2$rmean.endtime) [1] TRUE > all.equal(temp1$table[,3]/12, temp2$table[,3]) [1] TRUE > temp <- names(temp1) > temp <- temp[!temp %in% c("time", "table", "rmean.endtime")] > all.equal(temp1[temp], temp2[temp]) [1] TRUE > > # Reprise, using the rmean option > temp1 <- summary(mfit, rmean=240) > temp2 <- summary(mfit, rmean=240, scale=12) > all.equal(temp1$time/12, temp2$time) [1] TRUE > all.equal(temp1$rmean.endtime/12, temp2$rmean.endtime) [1] TRUE > all.equal(temp1$table[,3]/12, temp2$table[,3]) [1] TRUE > all.equal(temp1[temp], temp2[temp]) [1] TRUE > > > # The n.risk values from summary.survfit were off when there are multiple > # curves (version 2.39-2) > # Verify all components by subscripting > m1 <- mfit[1,] > m2 <- mfit[2,] > s1 <- summary(m1, times=c(0,100, 200, 300)) > s2 <- summary(m2, times=c(0,100, 200, 300)) > s3 <- summary(mfit, times=c(0,100, 200, 300)) > > tfun <- function(what) { + if (is.matrix(s3[[what]])) + all.equal(rbind(s1[[what]], s2[[what]]), s3[[what]]) + else all.equal(c(s1[[what]], s2[[what]]), s3[[what]]) + } > tfun('n') [1] TRUE > tfun("time") [1] TRUE > tfun("n.risk") [1] TRUE > tfun("n.event") [1] TRUE > tfun("n.censor") [1] TRUE > tfun("pstate") [1] TRUE > all.equal(rbind(s1$p0, s2$p0), s3$p0, check.attributes=FALSE) [1] TRUE > tfun("std.err") [1] TRUE > tfun("lower") [1] TRUE > tfun("upper") [1] TRUE > > # Check the cumulative sums > temp <- rbind(0, 0, + colSums(m1$n.event[m1$time <= 100,]), + colSums(m1$n.event[m1$time <= 200, ]), + colSums(m1$n.event[m1$time <= 300, ])) > all.equal(s1$n.event, apply(temp,2, diff)) [1] TRUE > > temp <- rbind(0, 0, + colSums(m2$n.event[m2$time <= 100,]), + colSums(m2$n.event[m2$time <= 200, ]), + colSums(m2$n.event[m2$time <= 300, ])) > all.equal(s2$n.event, apply(temp,2, diff)) [1] TRUE > > temp <- rbind(0, 0, + colSums(m1$n.censor[m1$time <= 100,]), + colSums(m1$n.censor[m1$time <= 200,]), + colSums(m1$n.censor[m1$time <= 300,])) > all.equal(s1$n.censor, apply(temp, 2, diff)) [1] TRUE > > # check the same with survfit objects > s1 <- summary(fit[1], times=c(0, 200, 400, 600)) > s2 <- summary(fit[2], times=c(0, 200, 400, 600)) > s3 <- summary(fit, times=c(0, 200, 400, 600)) > tfun('n') [1] TRUE > tfun("time") [1] TRUE > tfun("n.risk") [1] TRUE > tfun("n.event") [1] TRUE > tfun("n.censor") [1] TRUE > tfun("surv") [1] TRUE > tfun("std.err") [1] TRUE > tfun("lower") [1] TRUE > tfun("upper") [1] TRUE > > f2 <- fit[2] > temp <- c(0, 0, sum(f2$n.event[f2$time <= 200]), + sum(f2$n.event[f2$time <= 400]), + sum(f2$n.event[f2$time <= 600])) > all.equal(s2$n.event, diff(temp)) [1] TRUE > > f1 <- fit[1] > temp <- c(0, 0,sum(f1$n.censor[f1$time <= 200]), + sum(f1$n.censor[f1$time <= 400]), + sum(f1$n.censor[f1$time <= 600])) > all.equal(s1$n.censor, diff(temp)) [1] TRUE > > # > # A check on the censor option > # > s1 <- summary(fit[1]) > s2 <- summary(fit[2]) > s3 <- summary(fit) > tfun('n') [1] TRUE > tfun("time") [1] TRUE > tfun("n.risk") [1] TRUE > tfun("n.event") [1] TRUE > tfun("n.censor") [1] TRUE > tfun("surv") [1] TRUE > tfun("std.err") [1] TRUE > tfun("lower") [1] TRUE > tfun("upper") [1] TRUE > > s1 <- summary(mfit[1,]) > s2 <- summary(mfit[2,]) > s3 <- summary(mfit) > tfun('n') [1] TRUE > tfun("time") [1] TRUE > tfun("n.risk") [1] TRUE > tfun("n.event") [1] TRUE > tfun("n.censor") [1] TRUE > tfun("surv") [1] TRUE > tfun("std.err") [1] TRUE > tfun("lower") [1] TRUE > tfun("upper") [1] TRUE > > proc.time() user system elapsed 0.413 0.039 0.450 survival/tests/ratetable.Rout.save0000644000176200001440000001130114607325257017051 0ustar liggesusers R Under development (unstable) (2024-02-06 r85866) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: aarch64-unknown-linux-gnu R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > options(na.action=na.exclude) # preserve missings > options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type > library(survival) > > # > # Generate each of the messages from is.ratetable > # > mdy.date <- function(m, d, y) { + y <- ifelse(y<100, y+1900, y) + as.Date(paste(m,d,y, sep='/'), "%m/%d/%Y") + } > temp <- runif(21*2*4) > > # Good > attributes(temp) <- list(dim=c(21,2,4), + dimnames=list(c(as.character(75:95)), c("male","female"), + c(as.character(2000:2003))), + dimid=c("age","sex","year"), + type=c(2,1,4), + cutpoints=list(c(75:95), NULL, mdy.date(1,1,2000) +c(0:3)*366.25), + class='ratetable') > is.ratetable(temp) [1] TRUE > > # Factor problem + cutpoints length > attributes(temp) <- list(dim=c(21,2,4), + dimnames=list(c(as.character(75:95)), c("male","female"), + c(as.character(2000:2003))), + dimid=c("age","sex","year"), + type=c(1,1,2), + cutpoints=list(c(75:95), NULL, mdy.date(1,1,2000) +c(0:4)*366.25), + class='ratetable') > is.ratetable(temp, verbose=T) [1] "type[3] is numeric or factor but the cutpoint is a date" [2] "attribute type[1] is continuous; cutpoint should be null" [3] "wrong length for cutpoints 3" > > > # missing dimid attribute + unsorted cutpoint > attributes(temp) <- list(dim=c(21,2,4), + dimnames=list(c(as.character(75:95)), c("male","female"), + c(as.character(2000:2003))), + type=c(2,1,3), + cutpoints=list(c(75:95), NULL, mdy.date(1,1,2000) +c(4:1)*366.25), + class='ratetable') > is.ratetable(temp, verbose=T) [1] "wrong length for dimid, or dimnames do not have names" [2] "unsorted cutpoints for dimension 3" > > # wrong length for dimid and type, illegal type > attributes(temp) <- list(dim=c(21,2,4), + dimnames=list(c(as.character(75:95)), c("male","female"), + c(as.character(2000:2003))), + dimid=c("age","sex","year", "zed"), + type=c(2,1,3,6), + cutpoints=list(c(75:95), NULL, mdy.date(1,1,2000) +c(0:3)*366.25), + class='ratetable') > is.ratetable(temp, verbose=T) [1] "wrong length for dimid, or dimnames do not have names" [2] "type attribute must be 1, 2, 3, or 4" [3] "wrong length for type attribute" > > > # Print and summary > print(survexp.us[1:20,,c('1953', '1985')] ) Rate table with dimension(s): age sex year , , year = 1953 sex age male female 0 8.936600e-05 6.911204e-05 1 6.169963e-06 5.423669e-06 2 3.860391e-06 3.161334e-06 3 2.909162e-06 2.424089e-06 4 2.448747e-06 1.950051e-06 5 2.210350e-06 1.692520e-06 6 1.988411e-06 1.481583e-06 7 1.813065e-06 1.298053e-06 8 1.684303e-06 1.169315e-06 9 1.593900e-06 1.087146e-06 10 1.569249e-06 1.051541e-06 11 1.626780e-06 1.043325e-06 12 1.771975e-06 1.089887e-06 13 2.062389e-06 1.199447e-06 14 2.462443e-06 1.347361e-06 15 2.944779e-06 1.550072e-06 16 3.410754e-06 1.752797e-06 17 3.819231e-06 1.928140e-06 18 4.164702e-06 2.056914e-06 19 4.504735e-06 2.169256e-06 , , year = 1985 sex age male female 0 3.350073e-05 2.680036e-05 1 2.451492e-06 2.108968e-06 2 1.739100e-06 1.341882e-06 3 1.369277e-06 1.013196e-06 4 1.122754e-06 7.940941e-07 5 9.995021e-07 7.530142e-07 6 9.173378e-07 6.571643e-07 7 8.488687e-07 5.887021e-07 8 7.530153e-07 5.339338e-07 9 6.297793e-07 4.791661e-07 10 5.202416e-07 4.517830e-07 11 5.202416e-07 4.517830e-07 12 7.530134e-07 5.202412e-07 13 1.232311e-06 6.571636e-07 14 1.862374e-06 8.351727e-07 15 2.533686e-06 1.026887e-06 16 3.150341e-06 1.204921e-06 17 3.657474e-06 1.341877e-06 18 4.041315e-06 1.424054e-06 19 4.315527e-06 1.465144e-06 > summary(survexp.usr) Rate table with 4 dimensions: age ranges from 0 to 39812.25; with 110 categories sex has levels of: male female race has levels of: white black year ranges from 1940-01-01 to 2020-01-01; with 81 categories > > proc.time() user system elapsed 0.418 0.029 0.442 survival/tests/fr_cancer.R0000644000176200001440000000126114607006645015344 0ustar liggesusersoptions(na.action=na.exclude) # preserve missings options(contrasts=c('contr.treatment', 'contr.poly')) #ensure constrast type library(survival) # # Here is a test case with multiple smoothing terms # fit0 <- coxph(Surv(time, status) ~ ph.ecog + age, lung) fit1 <- coxph(Surv(time, status) ~ ph.ecog + pspline(age,3), lung) fit2 <- coxph(Surv(time, status) ~ ph.ecog + pspline(age,4), lung) fit3 <- coxph(Surv(time, status) ~ ph.ecog + pspline(age,8), lung) fit4 <- coxph(Surv(time, status) ~ ph.ecog + pspline(wt.loss,3), lung) fit5 <-coxph(Surv(time, status) ~ ph.ecog + pspline(age,3) + pspline(wt.loss,3), lung) fit1 fit2 fit3 fit4 fit5 rm(fit1, fit2, fit3, fit4, fit5) survival/tests/tt.Rout.save0000644000176200001440000001061514607006645015541 0ustar liggesusers R Under development (unstable) (2023-01-30 r83727) -- "Unsuffered Consequences" Copyright (C) 2023 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(survival) > library(splines) > aeq <- function(x, y) all.equal(as.vector(x), as.vector(y)) > > # A contrived example for the tt function > # > mkdata <- function(n, beta) { + age <- runif(n, 20, 60) + x <- rbinom(n, 1, .5) + + futime <- rep(40, n) # everyone has 40 years of follow-up + status <- rep(0, n) + dtime <- runif(n/2, 1, 40) # 1/2 of them die + dtime <- sort(dtime) + + # The risk is set to beta[1]*x + beta[2]* f(current_age) + # where f= 0 up to age 40, rises linear to age 70, flat after that + for (i in 1:length(dtime)) { + atrisk <- (futime >= dtime[i]) + c.age <- age + dtime + age2 <- pmin(30, pmax(0, c.age-40)) + xbeta <- beta[1]*x + beta[2]*age2 + + # Select a death according to risk + risk <- ifelse(atrisk, exp(xbeta), 0) + dead <- sample(1:n, 1, prob=risk/sum(risk)) + + futime[dead] <- dtime[i] + status[dead] <- 1 + } + data.frame(futime=round(futime,1), status=status, age=age, x=x, risk=risk, + casewt = sample(1:5, n, replace=TRUE), + + grp = sample(1:15, n, replace=TRUE)) + } > > set.seed(1953) # a good year > # The functional form won't be well estimated with n=100, but a large > # n makes the test slow, and as a validity test n=100 and n=1000 are equally > # good. > tdata <- mkdata(100, c(log(1.5), 2/30)) # data set has many ties > > dtime <- sort(unique(tdata$futime[tdata$status==1])) > data2 <- survSplit(Surv(futime, status) ~., tdata, cut=dtime) > data2$c.age <- data2$age + data2$futime # current age > > # fit1 uses data at the event times, fit2$c.age might have a > # wider range due to censorings. To make the two fits agree > # fix the knots. I know a priori that 20 to 101 will cover it. > ns2 <- function(x) ns(x, Boundary.knots=c(20, 101), knots=c(45, 60, 75)) > > fit1 <- coxph(Surv(futime, status)~ x + tt(age), tdata, + tt= function(x, t, ...) ns2(x+t)) > > fit2 <- coxph(Surv(tstart, futime, status) ~ x + ns2(c.age), data2) > > aeq(coef(fit1), coef(fit2)) [1] TRUE > aeq(vcov(fit1), vcov(fit2)) [1] TRUE > > # > # Check that cluster, weight, and offset were correctly expanded > # > fit3a <- coxph(Surv(futime, status)~ x + tt(age), tdata, weights=casewt, + tt= function(x, t, ...) ns2(x+t)) > fit3b <- coxph(Surv(tstart, futime, status) ~ x + ns2(c.age), data2, + weights=casewt) > aeq(coef(fit3a), coef(fit3b)) [1] TRUE > aeq(vcov(fit3a), vcov(fit3b)) [1] TRUE > > fit4a <- coxph(Surv(futime, status)~ x + tt(age), tdata, + tt= function(x, t, ...) ns2(x+t), cluster=grp) > fit4b <- coxph(Surv(tstart, futime, status) ~ x + ns2(c.age), data2, + cluster=grp) > fit4c <- coxph(Surv(tstart, futime, status) ~ x + ns2(c.age) + cluster(grp), + data2) > aeq(coef(fit4a), coef(fit4b)) [1] TRUE > aeq(vcov(fit4a), vcov(fit4b)) [1] TRUE > aeq(coef(fit4a), coef(fit4c)) [1] TRUE > aeq(vcov(fit4a), vcov(fit4c)) [1] TRUE > > fit5a <- coxph(Surv(futime, status)~ x + tt(age) + offset(grp/10), tdata, + tt= function(x, t, ...) ns2(x+t),) > fit5b <- coxph(Surv(tstart, futime, status) ~ x + ns2(c.age)+ offset(grp/10) + , data=data2) > aeq(coef(fit5a), coef(fit5b)) [1] TRUE > aeq(vcov(fit5a), vcov(fit5b)) [1] TRUE > > # Check that strata is correct > fit6a <- coxph(Surv(futime, status) ~ x + tt(age) + strata(grp), tdata, + tt = function(x, t, ...) (x+t)^2) > fit6b <- coxph(Surv(tstart, futime, status) ~ x + I(c.age^2) +strata(grp), data2) > aeq(coef(fit6a), coef(fit6b)) [1] TRUE > aeq(vcov(fit6a), vcov(fit6b)) [1] TRUE > > > > proc.time() user system elapsed 1.147 0.101 1.237 survival/tests/Examples/0000755000176200001440000000000014635422471015055 5ustar liggesuserssurvival/tests/Examples/survival-Ex.Rout.save0000644000176200001440000040162014712433754021117 0ustar liggesusers R Under development (unstable) (2024-08-21 r87038) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. Natural language support but running in an English locale R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > pkgname <- "survival" > source(file.path(R.home("share"), "R", "examples-header.R")) > options(warn = 1) > library('survival') > > base::assign(".oldSearch", base::search(), pos = 'CheckExEnv') > base::assign(".old_wd", base::getwd(), pos = 'CheckExEnv') > cleanEx() > nameEx("Surv") > ### * Surv > > flush(stderr()); flush(stdout()) > > ### Name: Surv > ### Title: Create a Survival Object > ### Aliases: Surv is.Surv [.Surv > ### Keywords: survival > > ### ** Examples > > with(aml, Surv(time, status)) [1] 9 13 13+ 18 23 28+ 31 34 45+ 48 161+ 5 5 8 8 [16] 12 16+ 23 27 30 33 43 45 > survfit(Surv(time, status) ~ ph.ecog, data=lung) Call: survfit(formula = Surv(time, status) ~ ph.ecog, data = lung) 1 observation deleted due to missingness n events median 0.95LCL 0.95UCL ph.ecog=0 63 37 394 348 574 ph.ecog=1 113 82 306 268 429 ph.ecog=2 50 44 199 156 288 ph.ecog=3 1 1 118 NA NA > Surv(heart$start, heart$stop, heart$event) [1] ( 0.0, 50.0] ( 0.0, 6.0] ( 0.0, 1.0+] ( 1.0, 16.0] [5] ( 0.0, 36.0+] ( 36.0, 39.0] ( 0.0, 18.0] ( 0.0, 3.0] [9] ( 0.0, 51.0+] ( 51.0, 675.0] ( 0.0, 40.0] ( 0.0, 85.0] [13] ( 0.0, 12.0+] ( 12.0, 58.0] ( 0.0, 26.0+] ( 26.0, 153.0] [17] ( 0.0, 8.0] ( 0.0, 17.0+] ( 17.0, 81.0] ( 0.0, 37.0+] [21] ( 37.0,1387.0] ( 0.0, 1.0] ( 0.0, 28.0+] ( 28.0, 308.0] [25] ( 0.0, 36.0] ( 0.0, 20.0+] ( 20.0, 43.0] ( 0.0, 37.0] [29] ( 0.0, 18.0+] ( 18.0, 28.0] ( 0.0, 8.0+] ( 8.0,1032.0] [33] ( 0.0, 12.0+] ( 12.0, 51.0] ( 0.0, 3.0+] ( 3.0, 733.0] [37] ( 0.0, 83.0+] ( 83.0, 219.0] ( 0.0, 25.0+] ( 25.0,1800.0+] [41] ( 0.0,1401.0+] ( 0.0, 263.0] ( 0.0, 71.0+] ( 71.0, 72.0] [45] ( 0.0, 35.0] ( 0.0, 16.0+] ( 16.0, 852.0] ( 0.0, 16.0] [49] ( 0.0, 17.0+] ( 17.0, 77.0] ( 0.0, 51.0+] ( 51.0,1587.0+] [53] ( 0.0, 23.0+] ( 23.0,1572.0+] ( 0.0, 12.0] ( 0.0, 46.0+] [57] ( 46.0, 100.0] ( 0.0, 19.0+] ( 19.0, 66.0] ( 0.0, 4.5+] [61] ( 4.5, 5.0] ( 0.0, 2.0+] ( 2.0, 53.0] ( 0.0, 41.0+] [65] ( 41.0,1408.0+] ( 0.0, 58.0+] ( 58.0,1322.0+] ( 0.0, 3.0] [69] ( 0.0, 2.0] ( 0.0, 40.0] ( 0.0, 1.0+] ( 1.0, 45.0] [73] ( 0.0, 2.0+] ( 2.0, 996.0] ( 0.0, 21.0+] ( 21.0, 72.0] [77] ( 0.0, 9.0] ( 0.0, 36.0+] ( 36.0,1142.0+] ( 0.0, 83.0+] [81] ( 83.0, 980.0] ( 0.0, 32.0+] ( 32.0, 285.0] ( 0.0, 102.0] [85] ( 0.0, 41.0+] ( 41.0, 188.0] ( 0.0, 3.0] ( 0.0, 10.0+] [89] ( 10.0, 61.0] ( 0.0, 67.0+] ( 67.0, 942.0+] ( 0.0, 149.0] [93] ( 0.0, 21.0+] ( 21.0, 343.0] ( 0.0, 78.0+] ( 78.0, 916.0+] [97] ( 0.0, 3.0+] ( 3.0, 68.0] ( 0.0, 2.0] ( 0.0, 69.0] [101] ( 0.0, 27.0+] ( 27.0, 842.0+] ( 0.0, 33.0+] ( 33.0, 584.0] [105] ( 0.0, 12.0+] ( 12.0, 78.0] ( 0.0, 32.0] ( 0.0, 57.0+] [109] ( 57.0, 285.0] ( 0.0, 3.0+] ( 3.0, 68.0] ( 0.0, 10.0+] [113] ( 10.0, 670.0+] ( 0.0, 5.0+] ( 5.0, 30.0] ( 0.0, 31.0+] [117] ( 31.0, 620.0+] ( 0.0, 4.0+] ( 4.0, 596.0+] ( 0.0, 27.0+] [121] ( 27.0, 90.0] ( 0.0, 5.0+] ( 5.0, 17.0] ( 0.0, 2.0] [125] ( 0.0, 46.0+] ( 46.0, 545.0+] ( 0.0, 21.0] ( 0.0, 210.0+] [129] (210.0, 515.0+] ( 0.0, 67.0+] ( 67.0, 96.0] ( 0.0, 26.0+] [133] ( 26.0, 482.0+] ( 0.0, 6.0+] ( 6.0, 445.0+] ( 0.0, 428.0+] [137] ( 0.0, 32.0+] ( 32.0, 80.0] ( 0.0, 37.0+] ( 37.0, 334.0] [141] ( 0.0, 5.0] ( 0.0, 8.0+] ( 8.0, 397.0+] ( 0.0, 60.0+] [145] ( 60.0, 110.0] ( 0.0, 31.0+] ( 31.0, 370.0+] ( 0.0, 139.0+] [149] (139.0, 207.0] ( 0.0, 160.0+] (160.0, 186.0] ( 0.0, 340.0] [153] ( 0.0, 310.0+] (310.0, 340.0+] ( 0.0, 28.0+] ( 28.0, 265.0+] [157] ( 0.0, 4.0+] ( 4.0, 165.0] ( 0.0, 2.0+] ( 2.0, 16.0] [161] ( 0.0, 13.0+] ( 13.0, 180.0+] ( 0.0, 21.0+] ( 21.0, 131.0+] [165] ( 0.0, 96.0+] ( 96.0, 109.0+] ( 0.0, 21.0] ( 0.0, 38.0+] [169] ( 38.0, 39.0+] ( 0.0, 31.0+] ( 0.0, 11.0+] ( 0.0, 6.0] > > > > cleanEx() > nameEx("aareg") > ### * aareg > > flush(stderr()); flush(stdout()) > > ### Name: aareg > ### Title: Aalen's additive regression model for censored data > ### Aliases: aareg > ### Keywords: survival > > ### ** Examples > > # Fit a model to the lung cancer data set > lfit <- aareg(Surv(time, status) ~ age + sex + ph.ecog, data=lung, + nmin=1) > ## Not run: > ##D lfit > ##D Call: > ##D aareg(formula = Surv(time, status) ~ age + sex + ph.ecog, data = lung, nmin = 1 > ##D ) > ##D > ##D n=227 (1 observations deleted due to missing values) > ##D 138 out of 138 unique event times used > ##D > ##D slope coef se(coef) z p > ##D Intercept 5.26e-03 5.99e-03 4.74e-03 1.26 0.207000 > ##D age 4.26e-05 7.02e-05 7.23e-05 0.97 0.332000 > ##D sex -3.29e-03 -4.02e-03 1.22e-03 -3.30 0.000976 > ##D ph.ecog 3.14e-03 3.80e-03 1.03e-03 3.70 0.000214 > ##D > ##D Chisq=26.73 on 3 df, p=6.7e-06; test weights=aalen > ##D > ##D plot(lfit[4], ylim=c(-4,4)) # Draw a plot of the function for ph.ecog > ## End(Not run) > lfit2 <- aareg(Surv(time, status) ~ age + sex + ph.ecog, data=lung, + nmin=1, taper=1:10) > ## Not run: lines(lfit2[4], col=2) # Nearly the same, until the last point > > # A fit to the mulitple-infection data set of children with > # Chronic Granuomatous Disease. See section 8.5 of Therneau and Grambsch. > fita2 <- aareg(Surv(tstart, tstop, status) ~ treat + age + inherit + + steroids + cluster(id), data=cgd) > ## Not run: > ##D n= 203 > ##D 69 out of 70 unique event times used > ##D > ##D slope coef se(coef) robust se z p > ##D Intercept 0.004670 0.017800 0.002780 0.003910 4.55 5.30e-06 > ##D treatrIFN-g -0.002520 -0.010100 0.002290 0.003020 -3.36 7.87e-04 > ##D age -0.000101 -0.000317 0.000115 0.000117 -2.70 6.84e-03 > ##D inheritautosomal 0.001330 0.003830 0.002800 0.002420 1.58 1.14e-01 > ##D steroids 0.004620 0.013200 0.010600 0.009700 1.36 1.73e-01 > ##D > ##D Chisq=16.74 on 4 df, p=0.0022; test weights=aalen > ## End(Not run) > > > > cleanEx() > nameEx("aggregate.survfit") > ### * aggregate.survfit > > flush(stderr()); flush(stdout()) > > ### Name: aggregate.survfit > ### Title: Average survival curves > ### Aliases: aggregate.survfit > ### Keywords: survival > > ### ** Examples > > cfit <- coxph(Surv(futime, death) ~ sex + age*hgb, data=mgus2) > # marginal effect of sex, after adjusting for the others > dummy <- rbind(mgus2, mgus2) > dummy$sex <- rep(c("F", "M"), each=nrow(mgus2)) # population data set > dummy <- na.omit(dummy) # don't count missing hgb in our "population > csurv <- survfit(cfit, newdata=dummy) > dim(csurv) # 2 * 1384 survival curves data 2676 > csurv2 <- aggregate(csurv, dummy$sex) > > > > cleanEx() > nameEx("anova.coxph") > ### * anova.coxph > > flush(stderr()); flush(stdout()) > > ### Name: anova.coxph > ### Title: Analysis of Deviance for a Cox model. > ### Aliases: anova.coxph anova.coxphlist > ### Keywords: models regression survival > > ### ** Examples > > fit <- coxph(Surv(futime, fustat) ~ resid.ds *rx + ecog.ps, data = ovarian) > anova(fit) Analysis of Deviance Table Cox model: response is Surv(futime, fustat) Terms added sequentially (first to last) loglik Chisq Df Pr(>|Chi|) NULL -34.985 resid.ds -33.105 3.7594 1 0.05251 . rx -32.269 1.6733 1 0.19582 ecog.ps -31.970 0.5980 1 0.43934 resid.ds:rx -30.946 2.0469 1 0.15251 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 > fit2 <- coxph(Surv(futime, fustat) ~ resid.ds +rx + ecog.ps, data=ovarian) > anova(fit2,fit) Analysis of Deviance Table Cox model: response is Surv(futime, fustat) Model 1: ~ resid.ds + rx + ecog.ps Model 2: ~ resid.ds * rx + ecog.ps loglik Chisq Df Pr(>|Chi|) 1 -31.970 2 -30.946 2.0469 1 0.1525 > > > > cleanEx() > nameEx("attrassign") > ### * attrassign > > flush(stderr()); flush(stdout()) > > ### Name: attrassign > ### Title: Create new-style "assign" attribute > ### Aliases: attrassign.default attrassign attrassign.lm > ### Keywords: models > > ### ** Examples > > formula <- Surv(time,status)~factor(ph.ecog) > tt <- terms(formula) > mf <- model.frame(tt,data=lung) > mm <- model.matrix(tt,mf) > ## a few rows of data > mm[1:3,] (Intercept) factor(ph.ecog)1 factor(ph.ecog)2 factor(ph.ecog)3 1 1 1 0 0 2 1 0 0 0 3 1 0 0 0 > ## old-style assign attribute > attr(mm,"assign") [1] 0 1 1 1 > ## alternate style assign attribute > attrassign(mm,tt) $`(Intercept)` [1] 1 $`factor(ph.ecog)` [1] 2 3 4 > > > > cleanEx() > nameEx("blogit") > ### * blogit > > flush(stderr()); flush(stdout()) > > ### Name: blogit > ### Title: Bounded link functions > ### Aliases: blogit bcloglog bprobit blog > ### Keywords: survival > > ### ** Examples > > py <- pseudo(survfit(Surv(time, status) ~1, lung), time=730) #2 year survival > range(py) [1] -0.335248 1.693831 > pfit <- glm(py ~ ph.ecog, data=lung, family=gaussian(link=blogit())) > # For each +1 change in performance score, the odds of 2 year survival > # are multiplied by 1/2 = exp of the coefficient. > > > > cleanEx() > nameEx("brier") > ### * brier > > flush(stderr()); flush(stdout()) > > ### Name: brier > ### Title: Compute the Brier score for a Cox model > ### Aliases: brier > ### Keywords: survival > > ### ** Examples > > cfit <- coxph(Surv(rtime, recur) ~ age + meno + size + pmin(nodes,11), + data= rotterdam) > round(cfit$concordance["concordance"], 3) # some predictive power concordance 0.675 > brier(cfit, times=c(4,6)*365.25) # values at 4 and 6 years $rsquared [1] 0.1501931 0.1514488 $brier [1] 0.1952731 0.2097106 $times [1] 1461.0 2191.5 > > > > cleanEx() > nameEx("cch") > ### * cch > > flush(stderr()); flush(stdout()) > > ### Name: cch > ### Title: Fits proportional hazards regression model to case-cohort data > ### Aliases: cch > ### Keywords: survival > > ### ** Examples > > ## The complete Wilms Tumor Data > ## (Breslow and Chatterjee, Applied Statistics, 1999) > ## subcohort selected by simple random sampling. > ## > > subcoh <- nwtco$in.subcohort > selccoh <- with(nwtco, rel==1|subcoh==1) > ccoh.data <- nwtco[selccoh,] > ccoh.data$subcohort <- subcoh[selccoh] > ## central-lab histology > ccoh.data$histol <- factor(ccoh.data$histol,labels=c("FH","UH")) > ## tumour stage > ccoh.data$stage <- factor(ccoh.data$stage,labels=c("I","II","III","IV")) > ccoh.data$age <- ccoh.data$age/12 # Age in years > > ## > ## Standard case-cohort analysis: simple random subcohort > ## > > fit.ccP <- cch(Surv(edrel, rel) ~ stage + histol + age, data =ccoh.data, + subcoh = ~subcohort, id=~seqno, cohort.size=4028) > > > fit.ccP Case-cohort analysis,x$method, Prentice with subcohort of 668 from cohort of 4028 Call: cch(formula = Surv(edrel, rel) ~ stage + histol + age, data = ccoh.data, subcoh = ~subcohort, id = ~seqno, cohort.size = 4028) Coefficients: Value SE Z p stageII 0.73457084 0.16849620 4.359569 1.303187e-05 stageIII 0.59708356 0.17345094 3.442377 5.766257e-04 stageIV 1.38413197 0.20481982 6.757803 1.400990e-11 histolUH 1.49806307 0.15970515 9.380180 0.000000e+00 age 0.04326787 0.02373086 1.823274 6.826184e-02 > > fit.ccSP <- cch(Surv(edrel, rel) ~ stage + histol + age, data =ccoh.data, + subcoh = ~subcohort, id=~seqno, cohort.size=4028, method="SelfPren") > > summary(fit.ccSP) Case-cohort analysis,x$method, SelfPrentice with subcohort of 668 from cohort of 4028 Call: cch(formula = Surv(edrel, rel) ~ stage + histol + age, data = ccoh.data, subcoh = ~subcohort, id = ~seqno, cohort.size = 4028, method = "SelfPren") Coefficients: Coef HR (95% CI) p stageII 0.736 2.088 1.501 2.905 0.000 stageIII 0.597 1.818 1.294 2.553 0.001 stageIV 1.392 4.021 2.692 6.008 0.000 histolUH 1.506 4.507 3.295 6.163 0.000 age 0.043 1.044 0.997 1.094 0.069 > > ## > ## (post-)stratified on instit > ## > stratsizes<-table(nwtco$instit) > fit.BI<- cch(Surv(edrel, rel) ~ stage + histol + age, data =ccoh.data, + subcoh = ~subcohort, id=~seqno, stratum=~instit, cohort.size=stratsizes, + method="I.Borgan") > > summary(fit.BI) Exposure-stratified case-cohort analysis, I.Borgan method. 1 2 subcohort 952 202 cohort 3622 406 Call: cch(formula = Surv(edrel, rel) ~ stage + histol + age, data = ccoh.data, subcoh = ~subcohort, id = ~seqno, stratum = ~instit, cohort.size = stratsizes, method = "I.Borgan") Coefficients: Coef HR (95% CI) p stageII 0.737 2.090 1.501 2.909 0.000 stageIII 0.602 1.825 1.301 2.561 0.000 stageIV 1.395 4.036 2.702 6.029 0.000 histolUH 1.522 4.580 3.450 6.080 0.000 age 0.043 1.044 0.996 1.093 0.072 > > > > cleanEx() > nameEx("cipoisson") > ### * cipoisson > > flush(stderr()); flush(stdout()) > > ### Name: cipoisson > ### Title: Confidence limits for the Poisson > ### Aliases: cipoisson > > ### ** Examples > > cipoisson(4) # 95% confidence limit lower upper 1.089865 10.241589 > # lower upper > # 1.089865 10.24153 > ppois(4, 10.24153) #chance of seeing 4 or fewer events with large rate [1] 0.02500096 > # [1] 0.02500096 > 1-ppois(3, 1.08986) #chance of seeing 4 or more, with a small rate [1] 0.02499961 > # [1] 0.02499961 > > > > > cleanEx() > nameEx("clogit") > ### * clogit > > flush(stderr()); flush(stdout()) > > ### Name: clogit > ### Title: Conditional logistic regression > ### Aliases: clogit > ### Keywords: survival models > > ### ** Examples > > ## Not run: clogit(case ~ spontaneous + induced + strata(stratum), data=infert) > > # A multinomial response recoded to use clogit > # The revised data set has one copy per possible outcome level, with new > # variable tocc = target occupation for this copy, and case = whether > # that is the actual outcome for each subject. > # See the reference below for the data. > resp <- levels(logan$occupation) > n <- nrow(logan) > indx <- rep(1:n, length(resp)) > logan2 <- data.frame(logan[indx,], + id = indx, + tocc = factor(rep(resp, each=n))) > logan2$case <- (logan2$occupation == logan2$tocc) > clogit(case ~ tocc + tocc:education + strata(id), logan2) Call: clogit(case ~ tocc + tocc:education + strata(id), logan2) coef exp(coef) se(coef) z p toccfarm -1.8964629 0.1500986 1.3807822 -1.373 0.16961 toccoperatives 1.1667502 3.2115388 0.5656465 2.063 0.03914 toccprofessional -8.1005492 0.0003034 0.6987244 -11.593 < 2e-16 toccsales -5.0292297 0.0065438 0.7700862 -6.531 6.54e-11 tocccraftsmen:education -0.3322842 0.7172835 0.0568682 -5.843 5.13e-09 toccfarm:education -0.3702858 0.6905370 0.1164100 -3.181 0.00147 toccoperatives:education -0.4222188 0.6555906 0.0584328 -7.226 4.98e-13 toccprofessional:education 0.2782469 1.3208122 0.0510212 5.454 4.94e-08 toccsales:education NA NA 0.0000000 NA NA Likelihood ratio test=665.5 on 8 df, p=< 2.2e-16 n= 4190, number of events= 838 > > > > cleanEx() > nameEx("cluster") > ### * cluster > > flush(stderr()); flush(stdout()) > > ### Name: cluster > ### Title: Identify clusters. > ### Aliases: cluster > ### Keywords: survival > > ### ** Examples > > marginal.model <- coxph(Surv(time, status) ~ rx, data= rats, cluster=litter, + subset=(sex=='f')) > frailty.model <- coxph(Surv(time, status) ~ rx + frailty(litter), rats, + subset=(sex=='f')) > > > > cleanEx() > nameEx("concordance") > ### * concordance > > flush(stderr()); flush(stdout()) > > ### Name: concordance > ### Title: Compute the concordance statistic for data or a model > ### Aliases: concordance concordance.coxph concordance.formula > ### concordance.lm concordance.survreg > ### Keywords: survival > > ### ** Examples > > fit1 <- coxph(Surv(ptime, pstat) ~ age + sex + mspike, mgus2) > concordance(fit1, timewt="n/G2") # Uno's weighting Call: concordance.coxph(object = fit1, timewt = "n/G2") n= 1373 Concordance= 0.6132 se= 0.1026 concordant discordant tied.x tied.y tied.xy 461425.07 290956.09 265.66 120.39 0.00 > > # logistic regression > fit2 <- glm(I(sex=='M') ~ age + log(creatinine), binomial, data= flchain) > concordance(fit2) # equal to the AUC Call: concordance.lm(object = fit2) n= 6524 Concordance= 0.8151 se= 0.005304 concordant discordant tied.x tied.y tied.xy 8568768 1931502 31474 10689870 56412 > > # compare multiple models > options(na.action = na.exclude) # predict all 1384 obs, including missing > fit3 <- glm(pstat ~ age + sex + mspike + offset(log(ptime)), + poisson, data= mgus2) > fit4 <- coxph(Surv(ptime, pstat) ~ age + sex + mspike, mgus2) > fit5 <- coxph(Surv(ptime, pstat) ~ age + sex + hgb + creat, mgus2) > > tdata <- mgus2; tdata$ptime <- 60 # prediction at 60 months > p3 <- -predict(fit3, newdata=tdata) > p4 <- -predict(fit4) # high risk scores predict shorter survival > p5 <- -predict(fit5) > options(na.action = na.omit) # return to the R default > > cfit <- concordance(Surv(ptime, pstat) ~p3 + p4 + p5, mgus2) > cfit Call: concordance.formula(object = Surv(ptime, pstat) ~ p3 + p4 + p5, data = mgus2) n=1338 (46 observations deleted due to missingness) concordance se p3 0.6598 0.0313 p4 0.6618 0.0310 p5 0.6000 0.0293 concordant discordant tied.x tied.y tied.xy p3 51105 26333 74 28 0 p4 51258 26180 74 28 0 p5 46507 31003 2 28 0 > round(coef(cfit), 3) p3 p4 p5 0.660 0.662 0.600 > round(cov2cor(vcov(cfit)), 3) # high correlation [,1] [,2] [,3] [1,] 1.000 0.994 0.236 [2,] 0.994 1.000 0.258 [3,] 0.236 0.258 1.000 > > test <- c(1, -1, 0) # contrast vector for model 1 - model 2 > round(c(difference = test %*% coef(cfit), + sd= sqrt(test %*% vcov(cfit) %*% test)), 3) difference sd -0.002 0.003 > > > > cleanEx() > nameEx("cox.zph") > ### * cox.zph > > flush(stderr()); flush(stdout()) > > ### Name: cox.zph > ### Title: Test the Proportional Hazards Assumption of a Cox Regression > ### Aliases: cox.zph [.cox.zph print.cox.zph > ### Keywords: survival > > ### ** Examples > > fit <- coxph(Surv(futime, fustat) ~ age + ecog.ps, + data=ovarian) > temp <- cox.zph(fit) > print(temp) # display the results chisq df p age 0.698 1 0.40 ecog.ps 2.371 1 0.12 GLOBAL 3.633 2 0.16 > plot(temp) # plot curves > > > > cleanEx() > nameEx("coxph") > ### * coxph > > flush(stderr()); flush(stdout()) > > ### Name: coxph > ### Title: Fit Proportional Hazards Regression Model > ### Aliases: coxph print.coxph.null print.coxph.penal coxph.penalty > ### coxph.getdata summary.coxph.penal > ### Keywords: survival > > ### ** Examples > > # Create the simplest test data set > test1 <- list(time=c(4,3,1,1,2,2,3), + status=c(1,1,1,0,1,1,0), + x=c(0,2,1,1,1,0,0), + sex=c(0,0,0,0,1,1,1)) > # Fit a stratified model > coxph(Surv(time, status) ~ x + strata(sex), test1) Call: coxph(formula = Surv(time, status) ~ x + strata(sex), data = test1) coef exp(coef) se(coef) z p x 0.8023 2.2307 0.8224 0.976 0.329 Likelihood ratio test=1.09 on 1 df, p=0.2971 n= 7, number of events= 5 > # Create a simple data set for a time-dependent model > test2 <- list(start=c(1,2,5,2,1,7,3,4,8,8), + stop=c(2,3,6,7,8,9,9,9,14,17), + event=c(1,1,1,1,1,1,1,0,0,0), + x=c(1,0,0,1,0,1,1,1,0,0)) > summary(coxph(Surv(start, stop, event) ~ x, test2)) Call: coxph(formula = Surv(start, stop, event) ~ x, data = test2) n= 10, number of events= 7 coef exp(coef) se(coef) z Pr(>|z|) x -0.02111 0.97912 0.79518 -0.027 0.979 exp(coef) exp(-coef) lower .95 upper .95 x 0.9791 1.021 0.2061 4.653 Concordance= 0.526 (se = 0.129 ) Likelihood ratio test= 0 on 1 df, p=1 Wald test = 0 on 1 df, p=1 Score (logrank) test = 0 on 1 df, p=1 > > # > # Create a simple data set for a time-dependent model > # > test2 <- list(start=c(1, 2, 5, 2, 1, 7, 3, 4, 8, 8), + stop =c(2, 3, 6, 7, 8, 9, 9, 9,14,17), + event=c(1, 1, 1, 1, 1, 1, 1, 0, 0, 0), + x =c(1, 0, 0, 1, 0, 1, 1, 1, 0, 0) ) > > > summary( coxph( Surv(start, stop, event) ~ x, test2)) Call: coxph(formula = Surv(start, stop, event) ~ x, data = test2) n= 10, number of events= 7 coef exp(coef) se(coef) z Pr(>|z|) x -0.02111 0.97912 0.79518 -0.027 0.979 exp(coef) exp(-coef) lower .95 upper .95 x 0.9791 1.021 0.2061 4.653 Concordance= 0.526 (se = 0.129 ) Likelihood ratio test= 0 on 1 df, p=1 Wald test = 0 on 1 df, p=1 Score (logrank) test = 0 on 1 df, p=1 > > # Fit a stratified model, clustered on patients > > bladder1 <- bladder[bladder$enum < 5, ] > coxph(Surv(stop, event) ~ (rx + size + number) * strata(enum), + cluster = id, bladder1) Call: coxph(formula = Surv(stop, event) ~ (rx + size + number) * strata(enum), data = bladder1, cluster = id) coef exp(coef) se(coef) robust se z p rx -0.52598 0.59097 0.31583 0.31524 -1.669 0.09521 size 0.06961 1.07209 0.10156 0.08863 0.785 0.43220 number 0.23818 1.26894 0.07588 0.07459 3.193 0.00141 rx:strata(enum)enum=2 -0.10633 0.89913 0.50424 0.33396 -0.318 0.75019 rx:strata(enum)enum=3 -0.17251 0.84155 0.55780 0.39868 -0.433 0.66523 rx:strata(enum)enum=4 -0.10945 0.89632 0.65730 0.50636 -0.216 0.82886 size:strata(enum)enum=2 -0.14737 0.86298 0.16803 0.11409 -1.292 0.19646 size:strata(enum)enum=3 -0.28345 0.75318 0.20894 0.15220 -1.862 0.06255 size:strata(enum)enum=4 -0.27607 0.75876 0.25222 0.18904 -1.460 0.14418 number:strata(enum)enum=2 -0.10125 0.90370 0.11904 0.11759 -0.861 0.38920 number:strata(enum)enum=3 -0.06467 0.93738 0.12925 0.12035 -0.537 0.59101 number:strata(enum)enum=4 0.09429 1.09888 0.14594 0.11973 0.788 0.43097 Likelihood ratio test=30.09 on 12 df, p=0.002708 n= 340, number of events= 112 > > # Fit a time transform model using current age > coxph(Surv(time, status) ~ ph.ecog + tt(age), data=lung, + tt=function(x,t,...) pspline(x + t/365.25)) Call: coxph(formula = Surv(time, status) ~ ph.ecog + tt(age), data = lung, tt = function(x, t, ...) pspline(x + t/365.25)) coef se(coef) se2 Chisq DF p ph.ecog 0.4528 0.1178 0.1174 14.7704 1.00 0.00012 tt(age), linear 0.0112 0.0093 0.0093 1.4414 1.00 0.22991 tt(age), nonlin 2.6992 3.08 0.45431 Iterations: 4 outer, 10 Newton-Raphson Theta= 0.796 Degrees of freedom for terms= 1.0 4.1 Likelihood ratio test=22.5 on 5.07 df, p=5e-04 n= 227, number of events= 164 (1 observation deleted due to missingness) > > > > cleanEx() > nameEx("coxph.detail") > ### * coxph.detail > > flush(stderr()); flush(stdout()) > > ### Name: coxph.detail > ### Title: Details of a Cox Model Fit > ### Aliases: coxph.detail > ### Keywords: survival > > ### ** Examples > > fit <- coxph(Surv(futime,fustat) ~ age + rx + ecog.ps, ovarian, x=TRUE) > fitd <- coxph.detail(fit) > # There is one Schoenfeld residual for each unique death. It is a > # vector (covariates for the subject who died) - (weighted mean covariate > # vector at that time). The weighted mean is defined over the subjects > # still at risk, with exp(X beta) as the weight. > > events <- fit$y[,2]==1 > etime <- fit$y[events,1] #the event times --- may have duplicates > indx <- match(etime, fitd$time) > schoen <- fit$x[events,] - fitd$means[indx,] > > > > cleanEx() > nameEx("diabetic") > ### * diabetic > > flush(stderr()); flush(stdout()) > > ### Name: diabetic > ### Title: Ddiabetic retinopathy > ### Aliases: diabetic > ### Keywords: datasets survival > > ### ** Examples > > # juvenile diabetes is defined as and age less than 20 > juvenile <- 1*(diabetic$age < 20) > coxph(Surv(time, status) ~ trt + juvenile, cluster= id, + data= diabetic) Call: coxph(formula = Surv(time, status) ~ trt + juvenile, data = diabetic, cluster = id) coef exp(coef) se(coef) robust se z p trt -0.77893 0.45890 0.16893 0.14851 -5.245 1.56e-07 juvenile -0.05388 0.94754 0.16211 0.17864 -0.302 0.763 Likelihood ratio test=22.48 on 2 df, p=1.312e-05 n= 394, number of events= 155 > > > > cleanEx() > nameEx("dsurvreg") > ### * dsurvreg > > flush(stderr()); flush(stdout()) > > ### Name: dsurvreg > ### Title: Distributions available in survreg. > ### Aliases: dsurvreg psurvreg qsurvreg rsurvreg > ### Keywords: distribution > > ### ** Examples > > # List of distributions available > names(survreg.distributions) [1] "extreme" "logistic" "gaussian" "weibull" "exponential" [6] "rayleigh" "loggaussian" "lognormal" "loglogistic" "t" > ## Not run: > ##D [1] "extreme" "logistic" "gaussian" "weibull" "exponential" > ##D [6] "rayleigh" "loggaussian" "lognormal" "loglogistic" "t" > ## End(Not run) > # Compare results > all.equal(dsurvreg(1:10, 2, 5, dist='lognormal'), dlnorm(1:10, 2, 5)) [1] TRUE > > # Hazard function for a Weibull distribution > x <- seq(.1, 3, length=30) > haz <- dsurvreg(x, 2, 3)/ (1-psurvreg(x, 2, 3)) > ## Not run: > ##D plot(x, haz, log='xy', ylab="Hazard") #line with slope (1/scale -1) > ## End(Not run) > > # Estimated CDF of a simple Weibull > fit <- survreg(Surv(time, status) ~ 1, data=lung) > pp <- 1:99/100 > q1 <- qsurvreg(pp, coef(fit), fit$scale) > q2 <- qweibull(pp, shape= 1/fit$scale, scale= exp(coef(fit))) > all.equal(q1, q2) [1] TRUE > ## Not run: > ##D plot(q1, pp, type='l', xlab="Months", ylab="CDF") > ## End(Not run) > # per the help page for dweibull, the mean is scale * gamma(1 + 1/shape) > c(mean = exp(coef(fit))* gamma(1 + fit$scale)) mean.(Intercept) 384.8529 > > > > > cleanEx() > nameEx("finegray") > ### * finegray > > flush(stderr()); flush(stdout()) > > ### Name: finegray > ### Title: Create data for a Fine-Gray model > ### Aliases: finegray > ### Keywords: survival > > ### ** Examples > > # Treat time to death and plasma cell malignancy as competing risks > etime <- with(mgus2, ifelse(pstat==0, futime, ptime)) > event <- with(mgus2, ifelse(pstat==0, 2*death, 1)) > event <- factor(event, 0:2, labels=c("censor", "pcm", "death")) > > # FG model for PCM > pdata <- finegray(Surv(etime, event) ~ ., data=mgus2) > fgfit <- coxph(Surv(fgstart, fgstop, fgstatus) ~ age + sex, + weight=fgwt, data=pdata) > > # Compute the weights separately by sex > adata <- finegray(Surv(etime, event) ~ . + strata(sex), + data=mgus2, na.action=na.pass) > > > > cleanEx() > nameEx("flchain") > ### * flchain > > flush(stderr()); flush(stdout()) > > ### Name: flchain > ### Title: Assay of serum free light chain for 7874 subjects. > ### Aliases: flchain > ### Keywords: datasets > > ### ** Examples > > data(flchain) > age.grp <- cut(flchain$age, c(49,54, 59,64, 69,74,79, 89, 110), + labels= paste(c(50,55,60,65,70,75,80,90), + c(54,59,64,69,74,79,89,109), sep='-')) > table(flchain$sex, age.grp) age.grp 50-54 55-59 60-64 65-69 70-74 75-79 80-89 90-109 F 881 766 625 589 541 408 459 81 M 796 714 591 524 405 269 202 23 > > > > cleanEx() > nameEx("frailty") > ### * frailty > > flush(stderr()); flush(stdout()) > > ### Name: frailty > ### Title: Random effects terms > ### Aliases: frailty frailty.gamma frailty.gaussian frailty.t > ### Keywords: survival > > ### ** Examples > > # Random institutional effect > coxph(Surv(time, status) ~ age + frailty(inst, df=4), lung) Call: coxph(formula = Surv(time, status) ~ age + frailty(inst, df = 4), data = lung) coef se(coef) se2 Chisq DF p age 0.01937 0.00933 0.00925 4.31149 1.00 0.038 frailty(inst, df = 4) 3.33459 3.99 0.501 Iterations: 3 outer, 10 Newton-Raphson Variance of random effect= 0.038 I-likelihood = -743.6 Degrees of freedom for terms= 1 4 Likelihood ratio test=9.96 on 4.97 df, p=0.08 n= 227, number of events= 164 (1 observation deleted due to missingness) > > # Litter effects for the rats data > rfit2a <- coxph(Surv(time, status) ~ rx + + frailty.gaussian(litter, df=13, sparse=FALSE), rats, + subset= (sex=='f')) > rfit2b <- coxph(Surv(time, status) ~ rx + + frailty.gaussian(litter, df=13, sparse=TRUE), rats, + subset= (sex=='f')) > > > > cleanEx() > nameEx("hoel") > ### * hoel > > flush(stderr()); flush(stdout()) > > ### Name: hoel > ### Title: Mouse cancer data > ### Aliases: hoel > ### Keywords: datasets > > ### ** Examples > > hsurv <- survfit(Surv(days, outcome) ~ trt, data = hoel, id= id) > plot(hsurv, lty=1:2, col=rep(1:3, each=2), lwd=2, xscale=30.5, + xlab="Months", ylab= "Death") > legend("topleft", c("Lymphoma control", "Lymphoma germ free", + "Sarcoma control", "Sarcoma germ free", + "Other control", "Other germ free"), + col=rep(1:3, each=2), lty=1:2, lwd=2, bty='n') > hfit <- coxph(Surv(days, outcome) ~ trt, data= hoel, id = id) > > > > cleanEx() > nameEx("is.ratetable") > ### * is.ratetable > > flush(stderr()); flush(stdout()) > > ### Name: is.ratetable > ### Title: Verify that an object is of class ratetable. > ### Aliases: is.ratetable Math.ratetable Ops.ratetable > ### Keywords: survival > > ### ** Examples > > is.ratetable(survexp.us) # True [1] TRUE > is.ratetable(lung) # False [1] FALSE > > > > cleanEx() > nameEx("kidney") > ### * kidney > > flush(stderr()); flush(stdout()) > > ### Name: kidney > ### Title: Kidney catheter data > ### Aliases: kidney > ### Keywords: survival > > ### ** Examples > > kfit <- coxph(Surv(time, status)~ age + sex + disease + frailty(id), kidney) > kfit0 <- coxph(Surv(time, status)~ age + sex + disease, kidney) > kfitm1 <- coxph(Surv(time,status) ~ age + sex + disease + + frailty(id, dist='gauss'), kidney) > > > > cleanEx() > nameEx("levels.Surv") > ### * levels.Surv > > flush(stderr()); flush(stdout()) > > ### Name: levels.Surv > ### Title: Return the states of a multi-state Surv object > ### Aliases: levels.Surv > ### Keywords: survival > > ### ** Examples > > y1 <- Surv(c(1,5, 9, 17,21, 30), + factor(c(0, 1, 2,1,0,2), 0:2, c("censored", "progression", "death"))) > levels(y1) [1] "progression" "death" > > y2 <- Surv(1:6, rep(0:1, 3)) > y2 [1] 1+ 2 3+ 4 5+ 6 > levels(y2) NULL > > > > cleanEx() > nameEx("lines.survfit") > ### * lines.survfit > > flush(stderr()); flush(stdout()) > > ### Name: lines.survfit > ### Title: Add Lines or Points to a Survival Plot > ### Aliases: lines.survfit points.survfit lines.survexp > ### Keywords: survival > > ### ** Examples > > fit <- survfit(Surv(time, status==2) ~ sex, pbc,subset=1:312) > plot(fit, mark.time=FALSE, xscale=365.25, + xlab='Years', ylab='Survival') > lines(fit[1], lwd=2) #darken the first curve and add marks > > > # Add expected survival curves for the two groups, > # based on the US census data > # The data set does not have entry date, use the midpoint of the study > efit <- survexp(~sex, data=pbc, times= (0:24)*182, ratetable=survexp.us, + rmap=list(sex=sex, age=age*365.35, year=as.Date('1979/01/01'))) > temp <- lines(efit, lty=2, lwd=2:1) > text(temp, c("Male", "Female"), adj= -.1) #labels just past the ends > title(main="Primary Biliary Cirrhosis, Observed and Expected") > > > > > cleanEx() > nameEx("mgus") > ### * mgus > > flush(stderr()); flush(stdout()) > > ### Name: mgus > ### Title: Monoclonal gammopathy data > ### Aliases: mgus mgus1 > ### Keywords: datasets survival > > ### ** Examples > > # Create the competing risk curves for time to first of death or PCM > sfit <- survfit(Surv(start, stop, event) ~ sex, mgus1, id=id, + subset=(enum==1)) > print(sfit) # the order of printout is the order in which they plot Call: survfit(formula = Surv(start, stop, event) ~ sex, data = mgus1, subset = (enum == 1), id = id) n nevent rmean se(rmean)* sex=female, (s0) 104 0 5762.379 372.6367 sex=male, (s0) 137 0 4543.293 315.1668 sex=female, pcm 104 33 2881.500 400.8899 sex=male, pcm 137 31 2478.026 397.9675 sex=female, death 104 63 5681.121 462.1850 sex=male, death 137 100 7303.682 452.9533 *restricted mean time in state (max time = 14325 ) > > plot(sfit, xscale=365.25, lty=c(2,2,1,1), col=c(1,2,1,2), + xlab="Years after MGUS detection", ylab="Proportion") > legend(0, .8, c("Death/male", "Death/female", "PCM/male", "PCM/female"), + lty=c(1,1,2,2), col=c(2,1,2,1), bty='n') > > title("Curves for the first of plasma cell malignancy or death") > # The plot shows that males have a higher death rate than females (no > # surprise) but their rates of conversion to PCM are essentially the same. > > > > cleanEx() > nameEx("model.matrix.coxph") > ### * model.matrix.coxph > > flush(stderr()); flush(stdout()) > > ### Name: model.matrix.coxph > ### Title: Model.matrix method for coxph models > ### Aliases: model.matrix.coxph > ### Keywords: survival > > ### ** Examples > > fit1 <- coxph(Surv(time, status) ~ age + factor(ph.ecog), data=lung) > xfit <- model.matrix(fit1) > > fit2 <- coxph(Surv(time, status) ~ age + factor(ph.ecog), data=lung, + x=TRUE) > all.equal(model.matrix(fit1), fit2$x) [1] TRUE > > > > cleanEx() > nameEx("myeloid") > ### * myeloid > > flush(stderr()); flush(stdout()) > > ### Name: myeloid > ### Title: Acute myeloid leukemia > ### Aliases: myeloid > ### Keywords: datasets > > ### ** Examples > > coxph(Surv(futime, death) ~ trt + flt3, data=myeloid) Call: coxph(formula = Surv(futime, death) ~ trt + flt3, data = myeloid) coef exp(coef) se(coef) z p trtB -0.3534 0.7023 0.1122 -3.149 0.00164 flt3B 0.4114 1.5089 0.1587 2.593 0.00952 flt3C 0.7878 2.1985 0.1656 4.758 1.96e-06 Likelihood ratio test=34.04 on 3 df, p=1.94e-07 n= 646, number of events= 320 > # See the mstate vignette for a more complete analysis > > > > cleanEx() > nameEx("myeloma") > ### * myeloma > > flush(stderr()); flush(stdout()) > > ### Name: myeloma > ### Title: Survival times of patients with multiple myeloma > ### Aliases: myeloma > ### Keywords: datasets > > ### ** Examples > > # Incorrect survival curve, which ignores left truncation > fit1 <- survfit(Surv(futime, death) ~ 1, myeloma) > # Correct curve > fit2 <- survfit(Surv(entry, futime, death) ~1, myeloma) > > > > cleanEx() > nameEx("neardate") > ### * neardate > > flush(stderr()); flush(stdout()) > > ### Name: neardate > ### Title: Find the index of the closest value in data set 2, for each > ### entry in data set one. > ### Aliases: neardate > ### Keywords: manip utilities > > ### ** Examples > > data1 <- data.frame(id = 1:10, + entry.dt = as.Date(paste("2011", 1:10, "5", sep='-'))) > temp1 <- c(1,4,5,1,3,6,9, 2,7,8,12,4,6,7,10,12,3) > data2 <- data.frame(id = c(1,1,1,2,2,4,4,5,5,5,6,8,8,9,10,10,12), + lab.dt = as.Date(paste("2011", temp1, "1", sep='-')), + chol = round(runif(17, 130, 280))) > > #first cholesterol on or after enrollment > indx1 <- neardate(data1$id, data2$id, data1$entry.dt, data2$lab.dt) > data2[indx1, "chol"] [1] 186 160 NA 265 224 161 NA NA NA 205 > > # Closest one, either before or after. > # > indx2 <- neardate(data1$id, data2$id, data1$entry.dt, data2$lab.dt, + best="prior") > ifelse(is.na(indx1), indx2, # none after, take before + ifelse(is.na(indx2), indx1, #none before + ifelse(abs(data2$lab.dt[indx2]- data1$entry.dt) < + abs(data2$lab.dt[indx1]- data1$entry.dt), indx2, indx1))) [1] 1 5 NA 6 9 11 NA 13 14 15 > > # closest date before or after, but no more than 21 days prior to index > indx2 <- ifelse((data1$entry.dt - data2$lab.dt[indx2]) >21, NA, indx2) > ifelse(is.na(indx1), indx2, # none after, take before + ifelse(is.na(indx2), indx1, #none before + ifelse(abs(data2$lab.dt[indx2]- data1$entry.dt) < + abs(data2$lab.dt[indx1]- data1$entry.dt), indx2, indx1))) [1] 1 5 NA 6 9 11 NA NA NA 15 > > > > cleanEx() > nameEx("nsk") > ### * nsk > > flush(stderr()); flush(stdout()) > > ### Name: nsk > ### Title: Natural splines with knot heights as the basis. > ### Aliases: nsk > ### Keywords: smooth > > ### ** Examples > > # make some dummy data > tdata <- data.frame(x= lung$age, y = 10*log(lung$age-35) + rnorm(228, 0, 2)) > fit1 <- lm(y ~ -1 + nsk(x, df=4, intercept=TRUE) , data=tdata) > fit2 <- lm(y ~ nsk(x, df=3), data=tdata) > > # the knots (same for both fits) > knots <- unlist(attributes(fit1$model[[2]])[c('Boundary.knots', 'knots')]) > sort(unname(knots)) [1] 45.35 59.00 67.00 75.00 > unname(coef(fit1)) # predictions at the knot points [1] 22.23565 32.18784 34.82994 36.93020 > > unname(coef(fit1)[-1] - coef(fit1)[1]) # differences: yhat[2:4] - yhat[1] [1] 9.952189 12.594290 14.694549 > unname(coef(fit2))[-1] # ditto [1] 9.952189 12.594290 14.694549 > > ## Not run: > ##D plot(y ~ x, data=tdata) > ##D points(sort(knots), coef(fit1), col=2, pch=19) > ##D coef(fit)[1] + c(0, coef(fit)[-1]) > ## End(Not run) > > > > cleanEx() > nameEx("nwtco") > ### * nwtco > > flush(stderr()); flush(stdout()) > > ### Name: nwtco > ### Title: Data from the National Wilm's Tumor Study > ### Aliases: nwtco > ### Keywords: datasets > > ### ** Examples > > with(nwtco, table(instit,histol)) histol instit 1 2 1 3493 129 2 76 330 > anova(coxph(Surv(edrel,rel)~histol+instit,data=nwtco)) Analysis of Deviance Table Cox model: response is Surv(edrel, rel) Terms added sequentially (first to last) loglik Chisq Df Pr(>|Chi|) NULL -4666.3 histol -4532.5 267.6667 1 < 2e-16 *** instit -4531.0 3.0397 1 0.08125 . --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 > anova(coxph(Surv(edrel,rel)~instit+histol,data=nwtco)) Analysis of Deviance Table Cox model: response is Surv(edrel, rel) Terms added sequentially (first to last) loglik Chisq Df Pr(>|Chi|) NULL -4666.3 instit -4577.5 177.714 1 < 2.2e-16 *** histol -4531.0 92.992 1 < 2.2e-16 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 > > > > cleanEx() > nameEx("pbcseq") > ### * pbcseq > > flush(stderr()); flush(stdout()) > > ### Name: pbcseq > ### Title: Mayo Clinic Primary Biliary Cirrhosis, sequential data > ### Aliases: pbcseq > ### Keywords: datasets > > ### ** Examples > > # Create the start-stop-event triplet needed for coxph > first <- with(pbcseq, c(TRUE, diff(id) !=0)) #first id for each subject > last <- c(first[-1], TRUE) #last id > > time1 <- with(pbcseq, ifelse(first, 0, day)) > time2 <- with(pbcseq, ifelse(last, futime, c(day[-1], 0))) > event <- with(pbcseq, ifelse(last, status, 0)) > > fit1 <- coxph(Surv(time1, time2, event) ~ age + sex + log(bili), pbcseq) Warning in Surv(time1, time2, event) : Invalid status value, converted to NA > > > > cleanEx() > nameEx("plot.cox.zph") > ### * plot.cox.zph > > flush(stderr()); flush(stdout()) > > ### Name: plot.cox.zph > ### Title: Graphical Test of Proportional Hazards > ### Aliases: plot.cox.zph > ### Keywords: survival > > ### ** Examples > > vfit <- coxph(Surv(time,status) ~ trt + factor(celltype) + + karno + age, data=veteran, x=TRUE) > temp <- cox.zph(vfit) > plot(temp, var=3) # Look at Karnofsy score, old way of doing plot > plot(temp[3]) # New way with subscripting > abline(0, 0, lty=3) > # Add the linear fit as well > abline(lm(temp$y[,3] ~ temp$x)$coefficients, lty=4, col=3) > title(main="VA Lung Study") > > > > cleanEx() > nameEx("plot.survfit") > ### * plot.survfit > > flush(stderr()); flush(stdout()) > > ### Name: plot.survfit > ### Title: Plot method for 'survfit' objects > ### Aliases: plot.survfit > ### Keywords: survival hplot > > ### ** Examples > > leukemia.surv <- survfit(Surv(time, status) ~ x, data = aml) > plot(leukemia.surv, lty = 2:3) > legend(100, .9, c("Maintenance", "No Maintenance"), lty = 2:3) > title("Kaplan-Meier Curves\nfor AML Maintenance Study") > lsurv2 <- survfit(Surv(time, status) ~ x, aml, type='fleming') > plot(lsurv2, lty=2:3, fun="cumhaz", + xlab="Months", ylab="Cumulative Hazard") > > > > cleanEx() > nameEx("predict.coxph") > ### * predict.coxph > > flush(stderr()); flush(stdout()) > > ### Name: predict.coxph > ### Title: Predictions for a Cox model > ### Aliases: predict.coxph predict.coxph.penal > ### Keywords: survival > > ### ** Examples > > options(na.action=na.exclude) # retain NA in predictions > fit <- coxph(Surv(time, status) ~ age + ph.ecog + strata(inst), lung) > #lung data set has status coded as 1/2 > mresid <- (lung$status-1) - predict(fit, type='expected') #Martingale resid > predict(fit,type="lp") 1 2 3 4 5 6 0.215495605 -0.423532231 -0.559265038 0.183469551 -0.539432878 0.248095483 7 8 9 10 11 12 0.406461814 0.489169379 -0.047448917 0.327284344 0.040389888 0.550315552 13 14 15 16 17 18 -0.115925255 NA 0.055807340 0.110906025 0.050567124 0.493760215 19 20 21 22 23 24 0.557645717 -0.004245606 -0.127236322 -0.621260082 -0.319524466 -0.575882288 25 26 27 28 29 30 -0.345688084 0.202851214 -0.428371074 1.313400384 -0.021210624 0.761244928 31 32 33 34 35 36 0.191540147 0.749933860 0.180240469 0.459827013 0.672213041 0.625512121 37 38 39 40 41 42 0.565173220 0.085767683 0.761244928 0.076972823 0.330513426 0.511791514 43 44 45 46 47 48 -0.439682141 0.660901974 -0.164699618 0.496950353 -0.381077937 0.091073865 49 50 51 52 53 54 -0.354839644 -0.175654221 0.192873470 -0.447487689 -0.450985298 -0.562055013 55 56 57 58 59 60 0.063012023 -0.516810744 -0.297203343 0.474684682 0.034518529 0.076972823 61 62 63 64 65 66 0.678283893 -0.045992266 0.176731471 -0.149858457 0.158940268 0.718790633 67 68 69 70 71 72 0.539004484 -0.308514410 -0.543216443 0.153500561 -0.479261384 -0.078592144 73 74 75 76 77 78 0.946919127 -0.073531430 -0.049489875 0.214162281 -0.641232484 0.029078821 79 80 81 82 83 84 -0.276488357 -0.392389004 -0.439682141 0.001411510 -0.410013004 -0.151289480 85 86 87 88 89 90 -0.292311495 0.198744830 -0.039921414 -0.530162769 -0.123010230 0.738622793 91 92 93 94 95 96 -0.743642023 0.050567124 0.285269157 0.108857156 -0.437633273 0.796634781 97 98 99 100 101 102 0.158940268 0.214162281 -0.161169524 -0.400910096 -0.562055013 0.176122695 103 104 105 106 107 108 0.012722577 0.108256292 0.617817211 0.157606945 -0.189452466 0.110906025 109 110 111 112 113 114 -0.026867740 0.797968104 -0.411394980 -0.149248522 0.369011703 -0.344354760 115 116 117 118 119 120 0.006456686 0.783867062 0.503880355 0.693378524 0.527693417 0.244122624 121 122 123 124 125 126 -0.464038972 0.449575370 0.158940268 0.500480446 -0.426322206 0.005322855 127 128 129 130 131 132 -0.368298829 0.134984810 0.652115157 -0.617153698 0.131479291 -0.190511890 133 134 135 136 137 138 -0.643882217 0.001411510 -0.460255408 0.666972826 0.067118407 0.583884010 139 140 141 142 143 144 -0.036137850 -0.399002948 0.747892903 0.215495605 0.630552446 0.088283890 145 146 147 148 149 150 -0.240346995 -0.200763533 -0.558074111 -0.179200822 -0.232577411 -0.524505653 151 152 153 154 155 156 0.171077519 -0.633704981 -0.331136545 -0.190511890 0.477441161 NA 157 158 159 160 161 162 -0.031097524 0.736573925 0.123673743 -0.013515715 -0.585704233 -0.038186718 163 164 165 166 167 168 0.466547245 0.108256292 -0.209943887 -0.716429053 -0.206413793 -0.699828778 169 170 171 172 173 174 0.085634157 -0.424865554 0.069277914 -0.441093652 0.107445646 -0.874783994 175 176 177 178 179 180 -0.047448917 0.046655779 0.557645717 0.001411510 -0.047448917 -0.667994646 181 182 183 184 185 186 -0.513194586 -0.776965291 -0.614629447 0.019390401 -0.583220496 -0.651086900 187 188 189 190 191 192 0.859584155 -0.536642904 0.063145548 -0.712882451 0.024398388 0.369338475 193 194 195 196 197 198 -0.023370131 0.076972823 0.061878192 -0.368310218 -0.003231734 0.074931865 199 200 201 202 203 204 -0.629921417 -0.037164935 0.063145548 0.084500326 -0.574393166 -0.627131442 205 206 207 208 209 210 -0.658814293 0.302547317 -0.410314015 0.516017606 0.131487202 -0.302547317 211 212 213 214 215 216 -0.539432878 0.153500561 0.119700884 0.409991908 -0.149858457 -0.149858457 217 218 219 220 221 222 -0.156943432 0.781826105 0.477858312 -0.452404719 0.016633922 -0.081992053 223 224 225 226 227 228 0.212705630 0.224016697 -0.750726998 0.703662506 0.142189494 -0.085165683 > predict(fit,type="expected") 1 2 3 4 5 6 7 0.74602570 0.57892506 1.28411487 0.65144995 2.53474317 2.59935704 0.94925558 8 9 10 11 12 13 14 1.07812821 0.63137435 0.55866807 0.31809979 1.96068120 2.96879741 NA 15 16 17 18 19 20 21 2.14464916 0.39248100 1.01652225 2.53985878 0.23734050 0.15454932 0.41781121 22 23 24 25 26 27 28 0.03725873 1.07425239 0.73304358 0.71922541 1.96068538 0.91425760 0.50868712 29 30 31 32 33 34 35 1.07651355 0.10727131 1.64348011 0.22335391 1.34246079 0.18355514 0.25427967 36 37 38 39 40 41 42 0.57948554 3.87217595 1.42062915 0.50341133 2.84274107 1.90670187 0.39302876 43 44 45 46 47 48 49 1.67374788 0.56009982 1.95081502 0.39930277 0.62185372 1.18384892 1.08920268 50 51 52 53 54 55 56 1.36922169 2.72429090 0.31557423 0.04821232 0.41960993 3.07164840 0.12000994 57 58 59 60 61 62 63 0.07406041 0.17908976 1.74520134 1.10195998 1.47697029 0.54523697 0.51461493 64 65 66 67 68 69 70 0.14292300 0.18117365 0.20227027 0.70028855 1.00636733 0.31133532 0.64126839 71 72 73 74 75 76 77 0.96177399 0.46743320 0.53451717 0.16345589 0.86294287 1.44797843 1.06953116 78 79 80 81 82 83 84 1.19014609 0.03668315 0.33061179 1.90397464 0.08944145 0.20857044 0.28585781 85 86 87 88 89 90 91 1.15723874 0.87295638 1.19851949 0.14216346 1.37338069 0.92021616 1.05096221 92 93 94 95 96 97 98 0.27465006 0.47403241 0.26750987 1.01622540 0.08901343 0.32456045 0.93961618 99 100 101 102 103 104 105 0.85179714 0.14362313 0.89733451 1.74403467 0.70225748 0.15754565 0.36065915 106 107 108 109 110 111 112 0.41227011 0.29089093 0.02759911 2.54485283 1.57705739 0.02915789 0.51482474 113 114 115 116 117 118 119 1.51254632 0.24392791 1.95773713 0.16855572 0.69132758 2.65613080 1.04014324 120 121 122 123 124 125 126 0.89157179 0.40187641 0.23829273 1.56065440 0.17535194 1.02778525 0.18442460 127 128 129 130 131 132 133 0.08051722 0.20596405 1.70473379 0.86354367 0.72017118 0.27146814 0.48487446 134 135 136 137 138 139 140 1.10114414 0.51567846 1.46035831 0.93950468 1.54314328 1.12143879 0.60372302 141 142 143 144 145 146 147 1.46022571 0.88081136 0.66047105 0.18347489 0.51981101 0.28761918 0.50825077 148 149 150 151 152 153 154 0.15268490 0.06671446 0.32571666 0.39746179 0.39772440 0.38939509 0.20940447 155 156 157 158 159 160 161 0.62171971 NA 0.34080256 0.46159657 0.47539058 1.00662370 0.21472196 162 163 164 165 166 167 168 0.54619593 0.50111574 0.24481910 0.51248548 0.19954882 0.25566706 0.78817717 169 170 171 172 173 174 175 0.44798249 0.43113659 0.44847984 1.48341994 0.46620310 0.37028208 0.86812344 176 177 178 179 180 181 182 0.43844817 0.94494334 0.25935783 0.37625255 0.20649507 0.25048304 0.37569346 183 184 185 186 187 188 189 0.40334526 0.39324727 0.36799524 0.39552828 1.77501387 0.24422514 0.38021709 190 191 192 193 194 195 196 0.21501843 0.51818689 0.08032921 0.22774986 0.71502728 0.36774267 0.39500663 197 198 199 200 201 202 203 0.38445105 0.97727710 0.43520510 0.16869554 0.17219830 0.05878035 0.21716448 204 205 206 207 208 209 210 0.18384556 0.18192355 0.64682101 0.35975276 0.70106697 1.03414013 0.35317899 211 212 213 214 215 216 217 0.42921059 0.47944086 0.40234009 0.25017393 0.04470913 0.27054309 0.22137404 218 219 220 221 222 223 224 1.18698635 0.50681607 0.11190719 0.11327702 0.28954125 0.33611081 0.74776723 225 226 227 228 0.12225025 0.00000000 0.35218786 0.10231300 > predict(fit,type="risk",se.fit=TRUE) $fit 1 2 3 4 5 6 7 8 1.2404765 0.6547301 0.5716290 1.2013784 0.5830788 1.2815823 1.5014958 1.6309609 9 10 11 12 13 14 15 16 0.9536592 1.3871959 1.0412167 1.7338000 0.8905418 NA 1.0573939 1.1172899 17 18 19 20 21 22 23 24 1.0518675 1.6384656 1.7465558 0.9957634 0.8805256 0.5372670 0.7264944 0.5622086 25 26 27 28 29 30 31 32 0.7077332 1.2248902 0.6515696 3.7187976 0.9790127 2.1409399 1.2111135 2.1168600 33 34 35 36 37 38 39 40 1.1975053 1.5838000 1.9585669 1.8692030 1.7597526 1.0895532 2.1409399 1.0800127 41 42 43 44 45 46 47 48 1.3916825 1.6682773 0.6442412 1.9365383 0.8481484 1.6437009 0.6831246 1.0953499 49 50 51 52 53 54 55 56 0.7012859 0.8389080 1.2127293 0.6392321 0.6370002 0.5700364 1.0650396 0.5964197 57 58 59 60 61 62 63 64 0.7428929 1.6075072 1.0351212 1.0800127 1.9704933 0.9550493 1.1933106 0.8608298 65 66 67 68 69 70 71 72 1.1722679 2.0519501 1.7142994 0.7345374 0.5808769 1.1659084 0.6192406 0.9244169 73 74 75 76 77 78 79 80 2.5777557 0.9291069 0.9517148 1.2388237 0.5266429 1.0295057 0.7584424 0.6754413 81 82 83 84 85 86 87 88 0.6442412 1.0014125 0.6636416 0.8595988 0.7465360 1.2198707 0.9608649 0.5885092 89 90 91 92 93 94 95 96 0.8842546 2.0930510 0.4753794 1.0518675 1.3301200 1.1150031 0.6455625 2.2180641 97 98 99 100 101 102 103 104 1.1722679 1.2388237 0.8511478 0.6697103 0.5700364 1.1925844 1.0128039 1.1143333 105 106 107 108 109 110 111 112 1.8548748 1.1707060 0.8274120 1.1172899 0.9734900 2.2210235 0.6627251 0.8613550 113 114 115 116 117 118 119 120 1.4463045 0.7086775 1.0064776 2.1899245 1.6551313 2.0004627 1.6950181 1.2765009 121 122 123 124 125 126 127 128 0.6287391 1.5676464 1.1722679 1.6495136 0.6529059 1.0053370 0.6919104 1.1445194 129 130 131 132 133 134 135 136 1.9195968 0.5394778 1.1405143 0.8265359 0.5252493 1.0014125 0.6311224 1.9483304 137 138 139 140 141 142 143 144 1.0694221 1.7929889 0.9645073 0.6709887 2.1125440 1.2404765 1.8786481 1.0922982 145 146 147 148 149 150 151 152 0.7863550 0.8181059 0.5723102 0.8359380 0.7924884 0.5918479 1.1865827 0.5306222 153 154 155 156 157 158 159 160 0.7181071 0.8265359 1.6119444 NA 0.9693810 2.0887670 1.1316466 0.9865752 161 162 163 164 165 166 167 168 0.5567137 0.9625332 1.5944793 1.1143333 0.8106297 0.4884935 0.8134964 0.4966703 169 170 171 172 173 174 175 176 1.0894077 0.6538577 1.0717340 0.6433325 1.1134303 0.4169521 0.9536592 1.0477613 177 178 179 180 181 182 183 184 1.7465558 1.0014125 0.9536592 0.5127358 0.5985803 0.4597993 0.5408413 1.0195796 185 186 187 188 189 190 191 192 0.5580981 0.5214787 2.3621782 0.5847079 1.0651819 0.4902291 1.0246985 1.4467772 193 194 195 196 197 198 199 200 0.9769008 1.0800127 1.0638328 0.6919025 0.9967735 1.0778107 0.5326337 0.9635172 201 202 203 204 205 206 207 208 1.0651819 1.0881732 0.5630464 0.5341218 0.5174645 1.3533017 0.6634419 1.6753425 209 210 211 212 213 214 215 216 1.1405233 0.7389335 0.5830788 1.1659084 1.1271597 1.5068056 0.8608298 0.8608298 217 218 219 220 221 222 223 224 0.8547524 2.1854595 1.6126170 0.6360967 1.0167730 0.9212793 1.2370205 1.2510919 225 226 227 228 0.4720233 2.0211416 1.1527951 0.9183601 $se.fit 1 2 3 4 5 6 0.094027169 0.096340319 0.096185061 0.110144705 0.091221886 0.124003567 7 8 9 10 11 12 0.106470052 0.135893441 0.104263809 0.115204660 0.048057506 0.157626321 13 14 15 16 17 18 0.058398830 NA 0.078593550 0.044525715 0.047523899 0.139753275 19 20 21 22 23 24 0.246130195 0.051683778 0.050651208 0.106747848 0.121191090 0.095563151 25 26 27 28 29 30 0.135232494 0.077970827 0.084316589 0.541641696 0.047411370 0.244541270 31 32 33 34 35 36 0.067316853 0.236761412 0.222247496 0.143779967 0.246770836 0.214866749 37 38 39 40 41 42 0.186808694 0.027994134 0.244541270 0.017746688 0.094899948 0.150429986 43 44 45 46 47 48 0.082038635 0.251128992 0.071539989 0.172653479 0.157627962 0.046664065 49 50 51 52 53 54 0.203630081 0.147427688 0.071868116 0.087051165 0.126710133 0.091078334 55 56 57 58 59 60 0.030346404 0.094111921 0.072518580 0.232795318 0.092391388 0.017746688 61 62 63 64 65 66 0.207337260 0.162712161 0.126511646 0.038549743 0.042876315 0.234595146 67 68 69 70 71 72 0.151669341 0.068462840 0.112880428 0.068678027 0.124246473 0.184637680 73 74 75 76 77 78 0.325442016 0.174862073 0.090441588 0.089040153 0.108376599 0.057550307 79 80 81 82 83 84 0.188633743 0.150191651 0.082038635 0.027564795 0.181878087 0.172125872 85 86 87 88 89 90 0.142365056 0.114741553 0.035859182 0.096819023 0.132484179 0.229864932 91 92 93 94 95 96 0.120689668 0.047523899 0.070339929 0.055381362 0.123547581 0.253870138 97 98 99 100 101 102 0.042876315 0.089040153 0.035190905 0.106227011 0.091078334 0.091298269 103 104 105 106 107 108 0.017787711 0.028641480 0.194430169 0.039989624 0.075782969 0.044525715 109 110 111 112 113 114 0.071209628 0.254965259 0.163546509 0.185211877 0.241649528 0.139074790 115 116 117 118 119 120 0.076796420 0.262556790 0.348185429 0.211911041 0.146845572 0.149423594 121 122 123 124 125 126 0.150969692 0.156065943 0.042876315 0.142648758 0.129688202 0.004890619 127 128 129 130 131 132 0.113985419 0.031310085 0.248637733 0.121183075 0.041502912 0.067248608 133 134 135 136 137 138 0.115359144 0.027564795 0.112511267 0.200585657 0.069255092 0.201817172 139 140 141 142 143 144 0.094786456 0.075667327 0.240338975 0.094027169 0.216098624 0.024974398 145 146 147 148 149 150 0.066191299 0.084423319 0.167625233 0.058808327 0.221289168 0.105873833 151 152 153 154 155 156 0.140449741 0.098993713 0.063583542 0.067248608 0.230942129 NA 157 158 159 160 161 162 0.067558237 0.245408761 0.032338223 0.075589234 0.101745759 0.174851413 163 164 165 166 167 168 0.125897325 0.028641480 0.048065722 0.111659253 0.045260623 0.125085448 169 170 171 172 173 174 0.020095538 0.093808006 0.037378627 0.093118562 0.031761359 0.135544076 175 176 177 178 179 180 0.104263809 0.016586035 0.246130195 0.027564795 0.104263809 0.174088607 181 182 183 184 185 186 0.109727836 0.166211707 0.139230772 0.017941579 0.106388490 0.137198131 187 188 189 190 191 192 0.304795981 0.089505183 0.043311645 0.114439474 0.131445121 0.192173147 193 194 195 196 197 198 0.144436340 0.017746688 0.058484070 0.121193159 0.002956631 0.025613128 199 200 201 202 203 204 0.104623286 0.033429233 0.043311645 0.080773833 0.103942128 0.124008736 205 206 207 208 209 210 0.118294076 0.078206752 0.080505144 0.235804861 0.079727031 0.057789591 211 212 213 214 215 216 0.091221886 0.068678027 0.029421496 0.124248857 0.038549743 0.038549743 217 218 219 220 221 222 0.158976598 0.269332667 0.130275218 0.089792820 0.015369862 0.085131550 223 224 225 226 227 228 0.148494109 0.160862263 0.138362860 0.225740927 0.057778343 0.074788433 > predict(fit,type="terms",se.fit=TRUE) $fit age ph.ecog 1 0.130878057 0.03032716 2 0.063011653 -0.54083428 3 -0.072721154 -0.54083428 4 -0.061410086 0.03032716 5 -0.027476885 -0.54083428 6 0.130878057 0.03032716 7 0.063011653 0.60148859 8 0.096944855 0.60148859 9 -0.106654355 0.03032716 10 -0.016165817 0.60148859 11 -0.061410086 0.03032716 12 0.063011653 0.60148859 13 0.063011653 0.03032716 14 NA NA 15 -0.061410086 0.03032716 16 0.051700586 0.03032716 17 0.085633788 0.03032716 18 0.006456317 0.60148859 19 -0.072721154 0.60148859 20 -0.061410086 0.03032716 21 0.051700586 0.03032716 22 -0.151898625 -0.54083428 23 -0.140587557 0.03032716 24 -0.050099019 -0.54083428 25 0.108255923 -0.54083428 26 0.085633788 0.03032716 27 -0.027476885 -0.54083428 28 0.085633788 1.17265002 29 -0.106654355 0.03032716 30 0.130878057 0.60148859 31 0.074322721 0.03032716 32 0.119566990 0.60148859 33 -0.163209692 0.60148859 34 -0.027476885 0.60148859 35 -0.016165817 0.60148859 36 -0.004854750 0.60148859 37 0.029078452 0.60148859 38 0.040389519 0.03032716 39 0.130878057 0.60148859 40 0.017767384 0.03032716 41 0.085633788 0.03032716 42 0.119566990 0.60148859 43 -0.038787952 -0.54083428 44 -0.027476885 0.60148859 45 0.063011653 0.03032716 46 0.153500192 0.60148859 47 0.130878057 -0.54083428 48 0.006456317 0.03032716 49 0.130878057 -0.54083428 50 -0.140587557 0.03032716 51 0.108255923 0.03032716 52 0.006456317 -0.54083428 53 0.063011653 -0.54083428 54 -0.050099019 -0.54083428 55 -0.038787952 0.03032716 56 -0.004854750 -0.54083428 57 0.029078452 -0.54083428 58 -0.061410086 0.60148859 59 -0.050099019 0.03032716 60 0.017767384 0.03032716 61 0.142189124 0.60148859 62 -0.163209692 0.03032716 63 0.119566990 0.03032716 64 0.029078452 0.03032716 65 0.074322721 0.03032716 66 0.063011653 0.60148859 67 0.051700586 0.60148859 68 0.017767384 -0.54083428 69 0.063011653 -0.54083428 70 0.051700586 0.03032716 71 0.006456317 -0.54083428 72 -0.163209692 0.03032716 73 0.130878057 0.60148859 74 -0.253698230 0.03032716 75 -0.106654355 0.03032716 76 0.096944855 0.03032716 77 -0.129276490 -0.54083428 78 -0.072721154 0.03032716 79 0.210055528 -0.54083428 80 0.119566990 -0.54083428 81 -0.038787952 -0.54083428 82 -0.084032221 0.03032716 83 -0.231076095 0.03032716 84 -0.208453961 0.03032716 85 -0.208453961 0.03032716 86 0.096944855 0.03032716 87 -0.004854750 0.03032716 88 -0.016165817 -0.54083428 89 -0.208453961 0.03032716 90 0.108255923 0.60148859 91 0.006456317 -0.54083428 92 0.085633788 0.03032716 93 0.040389519 0.03032716 94 -0.061410086 0.03032716 95 0.074322721 -0.54083428 96 0.108255923 0.60148859 97 0.074322721 0.03032716 98 0.096944855 0.03032716 99 0.017767384 0.03032716 100 0.085633788 -0.54083428 101 -0.050099019 -0.54083428 102 0.074322721 0.03032716 103 -0.072721154 0.03032716 104 0.006456317 0.03032716 105 -0.038787952 0.60148859 106 0.040389519 0.03032716 107 -0.095343288 0.03032716 108 0.051700586 0.03032716 109 -0.084032221 0.03032716 110 0.142189124 0.60148859 111 0.074322721 -0.54083428 112 -0.208453961 0.03032716 113 0.198744461 0.03032716 114 0.142189124 -0.54083428 115 -0.095343288 0.03032716 116 0.153500192 0.60148859 117 -0.151898625 0.60148859 118 0.063011653 0.60148859 119 0.040389519 0.60148859 120 0.198744461 0.03032716 121 0.142189124 -0.54083428 122 -0.027476885 0.60148859 123 0.074322721 0.03032716 124 0.108255923 0.60148859 125 0.085633788 -0.54083428 126 0.040389519 0.03032716 127 -0.140587557 0.03032716 128 0.017767384 0.03032716 129 0.164811259 0.60148859 130 -0.163209692 -0.54083428 131 -0.038787952 0.03032716 132 -0.106654355 0.03032716 133 -0.174520759 -0.54083428 134 -0.084032221 0.03032716 135 0.051700586 -0.54083428 136 0.130878057 0.60148859 137 -0.050099019 0.03032716 138 -0.072721154 0.60148859 139 -0.095343288 0.03032716 140 -0.072721154 -0.54083428 141 0.119566990 0.60148859 142 0.130878057 0.03032716 143 0.153500192 0.60148859 144 0.029078452 0.03032716 145 -0.061410086 0.03032716 146 -0.106654355 0.03032716 147 0.096944855 -0.54083428 148 -0.095343288 0.03032716 149 0.221366595 -0.54083428 150 -0.038787952 -0.54083428 151 0.085633788 0.03032716 152 -0.027476885 -0.54083428 153 -0.004854750 -0.54083428 154 -0.106654355 0.03032716 155 -0.084032221 0.60148859 156 NA NA 157 0.063011653 0.03032716 158 -0.004854750 0.60148859 159 0.006456317 0.03032716 160 -0.072721154 0.03032716 161 -0.004854750 -0.54083428 162 -0.208453961 0.03032716 163 0.074322721 0.60148859 164 0.006456317 0.03032716 165 0.017767384 0.03032716 166 -0.061410086 -0.54083428 167 -0.027476885 0.03032716 168 -0.185831826 -0.54083428 169 -0.016165817 0.03032716 170 0.029078452 -0.54083428 171 -0.016165817 0.03032716 172 -0.050099019 -0.54083428 173 -0.072721154 0.03032716 174 -0.219765028 -0.54083428 175 -0.106654355 0.03032716 176 -0.038787952 0.03032716 177 -0.072721154 0.60148859 178 -0.084032221 0.03032716 179 -0.106654355 0.03032716 180 0.130878057 -0.54083428 181 -0.027476885 -0.54083428 182 -0.265009297 -0.54083428 183 0.040389519 -0.54083428 184 0.029078452 0.03032716 185 -0.129276490 -0.54083428 186 -0.197142894 -0.54083428 187 0.108255923 0.60148859 188 -0.050099019 -0.54083428 189 0.017767384 0.03032716 190 -0.106654355 -0.54083428 191 0.108255923 0.03032716 192 -0.117965423 0.60148859 193 -0.140587557 0.03032716 194 0.017767384 0.03032716 195 0.096944855 0.03032716 196 0.085633788 -0.54083428 197 0.006456317 0.03032716 198 0.017767384 0.03032716 199 -0.117965423 -0.54083428 200 -0.027476885 0.03032716 201 0.017767384 0.03032716 202 0.119566990 0.03032716 203 0.006456317 -0.54083428 204 -0.140587557 -0.54083428 205 0.006456317 -0.54083428 206 -0.004854750 0.60148859 207 -0.084032221 -0.54083428 208 -0.140587557 0.60148859 209 0.074322721 0.03032716 210 -0.038787952 0.03032716 211 -0.027476885 -0.54083428 212 0.051700586 0.03032716 213 0.074322721 0.03032716 214 0.017767384 0.60148859 215 0.029078452 0.03032716 216 0.029078452 0.03032716 217 -0.242387163 0.03032716 218 0.153500192 0.60148859 219 0.085633788 0.60148859 220 -0.061410086 -0.54083428 221 0.051700586 0.03032716 222 0.096944855 0.03032716 223 0.153500192 0.03032716 224 0.164811259 0.03032716 225 -0.265009297 -0.54083428 226 0.142189124 0.60148859 227 0.040389519 0.03032716 228 -0.050099019 0.03032716 $se.fit age ph.ecog 1 0.119930635 0.007395102 2 0.057740983 0.131879319 3 0.066638322 0.131879319 4 0.056273380 0.007395102 5 0.025178554 0.131879319 6 0.119930635 0.007395102 7 0.057740983 0.146669523 8 0.088835809 0.146669523 9 0.097733148 0.007395102 10 0.014813612 0.146669523 11 0.056273380 0.007395102 12 0.057740983 0.146669523 13 0.057740983 0.007395102 14 NA NA 15 0.056273380 0.007395102 16 0.047376041 0.007395102 17 0.078470867 0.007395102 18 0.005916272 0.146669523 19 0.066638322 0.146669523 20 0.056273380 0.007395102 21 0.047376041 0.007395102 22 0.139192917 0.131879319 23 0.128827975 0.007395102 24 0.045908438 0.131879319 25 0.099200751 0.131879319 26 0.078470867 0.007395102 27 0.025178554 0.131879319 28 0.078470867 0.285943945 29 0.097733148 0.007395102 30 0.119930635 0.146669523 31 0.068105925 0.007395102 32 0.109565693 0.146669523 33 0.149557859 0.146669523 34 0.025178554 0.146669523 35 0.014813612 0.146669523 36 0.004448670 0.146669523 37 0.026646156 0.146669523 38 0.037011098 0.007395102 39 0.119930635 0.146669523 40 0.016281214 0.007395102 41 0.078470867 0.007395102 42 0.109565693 0.146669523 43 0.035543496 0.131879319 44 0.025178554 0.146669523 45 0.057740983 0.007395102 46 0.140660519 0.146669523 47 0.119930635 0.131879319 48 0.005916272 0.007395102 49 0.119930635 0.131879319 50 0.128827975 0.007395102 51 0.099200751 0.007395102 52 0.005916272 0.131879319 53 0.057740983 0.131879319 54 0.045908438 0.131879319 55 0.035543496 0.007395102 56 0.004448670 0.131879319 57 0.026646156 0.131879319 58 0.056273380 0.146669523 59 0.045908438 0.007395102 60 0.016281214 0.007395102 61 0.130295577 0.146669523 62 0.149557859 0.007395102 63 0.109565693 0.007395102 64 0.026646156 0.007395102 65 0.068105925 0.007395102 66 0.057740983 0.146669523 67 0.047376041 0.146669523 68 0.016281214 0.131879319 69 0.057740983 0.131879319 70 0.047376041 0.007395102 71 0.005916272 0.131879319 72 0.149557859 0.007395102 73 0.119930635 0.146669523 74 0.232477395 0.007395102 75 0.097733148 0.007395102 76 0.088835809 0.007395102 77 0.118463033 0.131879319 78 0.066638322 0.007395102 79 0.192485229 0.131879319 80 0.109565693 0.131879319 81 0.035543496 0.131879319 82 0.077003264 0.007395102 83 0.211747511 0.007395102 84 0.191017627 0.007395102 85 0.191017627 0.007395102 86 0.088835809 0.007395102 87 0.004448670 0.007395102 88 0.014813612 0.131879319 89 0.191017627 0.007395102 90 0.099200751 0.146669523 91 0.005916272 0.131879319 92 0.078470867 0.007395102 93 0.037011098 0.007395102 94 0.056273380 0.007395102 95 0.068105925 0.131879319 96 0.099200751 0.146669523 97 0.068105925 0.007395102 98 0.088835809 0.007395102 99 0.016281214 0.007395102 100 0.078470867 0.131879319 101 0.045908438 0.131879319 102 0.068105925 0.007395102 103 0.066638322 0.007395102 104 0.005916272 0.007395102 105 0.035543496 0.146669523 106 0.037011098 0.007395102 107 0.087368206 0.007395102 108 0.047376041 0.007395102 109 0.077003264 0.007395102 110 0.130295577 0.146669523 111 0.068105925 0.131879319 112 0.191017627 0.007395102 113 0.182120287 0.007395102 114 0.130295577 0.131879319 115 0.087368206 0.007395102 116 0.140660519 0.146669523 117 0.139192917 0.146669523 118 0.057740983 0.146669523 119 0.037011098 0.146669523 120 0.182120287 0.007395102 121 0.130295577 0.131879319 122 0.025178554 0.146669523 123 0.068105925 0.007395102 124 0.099200751 0.146669523 125 0.078470867 0.131879319 126 0.037011098 0.007395102 127 0.128827975 0.007395102 128 0.016281214 0.007395102 129 0.151025461 0.146669523 130 0.149557859 0.131879319 131 0.035543496 0.007395102 132 0.097733148 0.007395102 133 0.159922801 0.131879319 134 0.077003264 0.007395102 135 0.047376041 0.131879319 136 0.119930635 0.146669523 137 0.045908438 0.007395102 138 0.066638322 0.146669523 139 0.087368206 0.007395102 140 0.066638322 0.131879319 141 0.109565693 0.146669523 142 0.119930635 0.007395102 143 0.140660519 0.146669523 144 0.026646156 0.007395102 145 0.056273380 0.007395102 146 0.097733148 0.007395102 147 0.088835809 0.131879319 148 0.087368206 0.007395102 149 0.202850171 0.131879319 150 0.035543496 0.131879319 151 0.078470867 0.007395102 152 0.025178554 0.131879319 153 0.004448670 0.131879319 154 0.097733148 0.007395102 155 0.077003264 0.146669523 156 NA NA 157 0.057740983 0.007395102 158 0.004448670 0.146669523 159 0.005916272 0.007395102 160 0.066638322 0.007395102 161 0.004448670 0.131879319 162 0.191017627 0.007395102 163 0.068105925 0.146669523 164 0.005916272 0.007395102 165 0.016281214 0.007395102 166 0.056273380 0.131879319 167 0.025178554 0.007395102 168 0.170287743 0.131879319 169 0.014813612 0.007395102 170 0.026646156 0.131879319 171 0.014813612 0.007395102 172 0.045908438 0.131879319 173 0.066638322 0.007395102 174 0.201382569 0.131879319 175 0.097733148 0.007395102 176 0.035543496 0.007395102 177 0.066638322 0.146669523 178 0.077003264 0.007395102 179 0.097733148 0.007395102 180 0.119930635 0.131879319 181 0.025178554 0.131879319 182 0.242842337 0.131879319 183 0.037011098 0.131879319 184 0.026646156 0.007395102 185 0.118463033 0.131879319 186 0.180652685 0.131879319 187 0.099200751 0.146669523 188 0.045908438 0.131879319 189 0.016281214 0.007395102 190 0.097733148 0.131879319 191 0.099200751 0.007395102 192 0.108098090 0.146669523 193 0.128827975 0.007395102 194 0.016281214 0.007395102 195 0.088835809 0.007395102 196 0.078470867 0.131879319 197 0.005916272 0.007395102 198 0.016281214 0.007395102 199 0.108098090 0.131879319 200 0.025178554 0.007395102 201 0.016281214 0.007395102 202 0.109565693 0.007395102 203 0.005916272 0.131879319 204 0.128827975 0.131879319 205 0.005916272 0.131879319 206 0.004448670 0.146669523 207 0.077003264 0.131879319 208 0.128827975 0.146669523 209 0.068105925 0.007395102 210 0.035543496 0.007395102 211 0.025178554 0.131879319 212 0.047376041 0.007395102 213 0.068105925 0.007395102 214 0.016281214 0.146669523 215 0.026646156 0.007395102 216 0.026646156 0.007395102 217 0.222112453 0.007395102 218 0.140660519 0.146669523 219 0.078470867 0.146669523 220 0.056273380 0.131879319 221 0.047376041 0.007395102 222 0.088835809 0.007395102 223 0.140660519 0.007395102 224 0.151025461 0.007395102 225 0.242842337 0.131879319 226 0.130295577 0.146669523 227 0.037011098 0.007395102 228 0.045908438 0.007395102 > > # For someone who demands reference='zero' > pzero <- function(fit) + predict(fit, reference="sample") + sum(coef(fit) * fit$means, na.rm=TRUE) > > > > cleanEx() > nameEx("predict.survreg") > ### * predict.survreg > > flush(stderr()); flush(stdout()) > > ### Name: predict.survreg > ### Title: Predicted Values for a 'survreg' Object > ### Aliases: predict.survreg predict.survreg.penal > ### Keywords: survival > > ### ** Examples > > # Draw figure 1 from Escobar and Meeker, 1992. > fit <- survreg(Surv(time,status) ~ age + I(age^2), data=stanford2, + dist='lognormal') > with(stanford2, plot(age, time, xlab='Age', ylab='Days', + xlim=c(0,65), ylim=c(.1, 10^5), log='y', type='n')) > with(stanford2, points(age, time, pch=c(2,4)[status+1], cex=.7)) > pred <- predict(fit, newdata=list(age=1:65), type='quantile', + p=c(.1, .5, .9)) > matlines(1:65, pred, lty=c(2,1,2), col=1) > > # Predicted Weibull survival curve for a lung cancer subject with > # ECOG score of 2 > lfit <- survreg(Surv(time, status) ~ ph.ecog, data=lung) > pct <- 1:98/100 # The 100th percentile of predicted survival is at +infinity > ptime <- predict(lfit, newdata=data.frame(ph.ecog=2), type='quantile', + p=pct, se=TRUE) > matplot(cbind(ptime$fit, ptime$fit + 2*ptime$se.fit, + ptime$fit - 2*ptime$se.fit)/30.5, 1-pct, + xlab="Months", ylab="Survival", type='l', lty=c(1,2,2), col=1) > > > > cleanEx() > nameEx("pseudo") > ### * pseudo > > flush(stderr()); flush(stdout()) > > ### Name: pseudo > ### Title: Pseudo values for survival. > ### Aliases: pseudo > ### Keywords: survival > > ### ** Examples > > fit1 <- survfit(Surv(time, status) ~ 1, data=lung) > yhat <- pseudo(fit1, times=c(365, 730)) > dim(yhat) [1] 228 2 > lfit <- lm(yhat[,1] ~ ph.ecog + age + sex, data=lung) > > # Restricted Mean Time in State (RMST) > rms <- pseudo(fit1, times= 730, type='RMST') # 2 years > rfit <- lm(rms ~ ph.ecog + sex, data=lung) > rhat <- predict(rfit, newdata=expand.grid(ph.ecog=0:3, sex=1:2), se.fit=TRUE) > # print it out nicely > temp1 <- cbind(matrix(rhat$fit, 4,2)) > temp2 <- cbind(matrix(rhat$se.fit, 4, 2)) > temp3 <- cbind(temp1[,1], temp2[,1], temp1[,2], temp2[,2]) > dimnames(temp3) <- list(paste("ph.ecog", 0:3), + c("Male RMST", "(se)", "Female RMST", "(se)")) > > round(temp3, 1) Male RMST (se) Female RMST (se) ph.ecog 0 393.7 28.6 510.6 31.5 ph.ecog 1 307.8 19.8 424.7 24.4 ph.ecog 2 221.9 29.7 338.8 33.4 ph.ecog 3 136.1 47.9 253.0 50.5 > # compare this to the fully non-parametric estimate > fit2 <- survfit(Surv(time, status) ~ ph.ecog, data=lung) > print(fit2, rmean=730) Call: survfit(formula = Surv(time, status) ~ ph.ecog, data = lung) 1 observation deleted due to missingness n events rmean* se(rmean) median 0.95LCL 0.95UCL ph.ecog=0 63 37 429 32.2 394 348 574 ph.ecog=1 113 82 366 22.4 306 268 429 ph.ecog=2 50 44 256 30.0 199 156 288 ph.ecog=3 1 1 118 0.0 118 NA NA * restricted mean with upper limit = 730 > # the estimate for ph.ecog=3 is very unstable (n=1), pseudovalues smooth it. > # > # In all the above we should be using the robust variance, e.g., svyglm, but > # a recommended package can't depend on external libraries. > # See the vignette for a more complete exposition. > > > > cleanEx() > nameEx("pspline") > ### * pspline > > flush(stderr()); flush(stdout()) > > ### Name: pspline > ### Title: Smoothing splines using a pspline basis > ### Aliases: pspline psplineinverse > ### Keywords: survival > > ### ** Examples > > lfit6 <- survreg(Surv(time, status)~pspline(age, df=2), lung) > plot(lung$age, predict(lfit6), xlab='Age', ylab="Spline prediction") > title("Cancer Data") > fit0 <- coxph(Surv(time, status) ~ ph.ecog + age, lung) > fit1 <- coxph(Surv(time, status) ~ ph.ecog + pspline(age,3), lung) > fit3 <- coxph(Surv(time, status) ~ ph.ecog + pspline(age,8), lung) > fit0 Call: coxph(formula = Surv(time, status) ~ ph.ecog + age, data = lung) coef exp(coef) se(coef) z p ph.ecog 0.443485 1.558128 0.115831 3.829 0.000129 age 0.011281 1.011345 0.009319 1.211 0.226082 Likelihood ratio test=19.06 on 2 df, p=7.279e-05 n= 227, number of events= 164 (1 observation deleted due to missingness) > fit1 Call: coxph(formula = Surv(time, status) ~ ph.ecog + pspline(age, 3), data = lung) coef se(coef) se2 Chisq DF p ph.ecog 0.44802 0.11707 0.11678 14.64453 1.00 0.00013 pspline(age, 3), linear 0.01126 0.00928 0.00928 1.47231 1.00 0.22498 pspline(age, 3), nonlin 2.07924 2.08 0.37143 Iterations: 4 outer, 12 Newton-Raphson Theta= 0.861 Degrees of freedom for terms= 1.0 3.1 Likelihood ratio test=21.9 on 4.08 df, p=2e-04 n= 227, number of events= 164 (1 observation deleted due to missingness) > fit3 Call: coxph(formula = Surv(time, status) ~ ph.ecog + pspline(age, 8), data = lung) coef se(coef) se2 Chisq DF p ph.ecog 0.47640 0.12024 0.11925 15.69732 1.00 7.4e-05 pspline(age, 8), linear 0.01172 0.00923 0.00923 1.61161 1.00 0.20 pspline(age, 8), nonlin 6.93188 6.99 0.43 Iterations: 5 outer, 15 Newton-Raphson Theta= 0.691 Degrees of freedom for terms= 1 8 Likelihood ratio test=27.6 on 8.97 df, p=0.001 n= 227, number of events= 164 (1 observation deleted due to missingness) > > > > cleanEx() > nameEx("pyears") > ### * pyears > > flush(stderr()); flush(stdout()) > > ### Name: pyears > ### Title: Person Years > ### Aliases: pyears > ### Keywords: survival > > ### ** Examples > > # Look at progression rates jointly by calendar date and age > # > temp.yr <- tcut(mgus$dxyr, 55:92, labels=as.character(55:91)) > temp.age <- tcut(mgus$age, 34:101, labels=as.character(34:100)) > ptime <- ifelse(is.na(mgus$pctime), mgus$futime, mgus$pctime) > pstat <- ifelse(is.na(mgus$pctime), 0, 1) > pfit <- pyears(Surv(ptime/365.25, pstat) ~ temp.yr + temp.age + sex, mgus, + data.frame=TRUE) > # Turn the factor back into numerics for regression > tdata <- pfit$data > tdata$age <- as.numeric(as.character(tdata$temp.age)) > tdata$year<- as.numeric(as.character(tdata$temp.yr)) > fit1 <- glm(event ~ year + age+ sex +offset(log(pyears)), + data=tdata, family=poisson) > ## Not run: > ##D # fit a gam model > ##D gfit.m <- gam(y ~ s(age) + s(year) + offset(log(time)), > ##D family = poisson, data = tdata) > ## End(Not run) > > # Example #2 Create the hearta data frame: > hearta <- by(heart, heart$id, + function(x)x[x$stop == max(x$stop),]) > hearta <- do.call("rbind", hearta) > # Produce pyears table of death rates on the surgical arm > # The first is by age at randomization, the second by current age > fit1 <- pyears(Surv(stop/365.25, event) ~ cut(age + 48, c(0,50,60,70,100)) + + surgery, data = hearta, scale = 1) > fit2 <- pyears(Surv(stop/365.25, event) ~ tcut(age + 48, c(0,50,60,70,100)) + + surgery, data = hearta, scale = 1) > fit1$event/fit1$pyears #death rates on the surgery and non-surg arm surgery cut(age + 48, c(0, 50, 60, 70, 100)) 0 1 (0,50] 0.7615378 0.3036881 (50,60] 2.0068681 0.9979508 (60,70] 5.1083916 NaN (70,100] NaN NaN > > fit2$event/fit2$pyears #death rates on the surgery and non-surg arm surgery tcut(age + 48, c(0, 50, 60, 70, 100)) 0 1 0+ thru 50 0.8013285 0.2636994 50+ thru 60 1.6119238 0.6564817 60+ thru 70 3.9701087 NaN 70+ thru 100 NaN NaN > > > > cleanEx() > nameEx("quantile.survfit") > ### * quantile.survfit > > flush(stderr()); flush(stdout()) > > ### Name: quantile.survfit > ### Title: Quantiles from a survfit object > ### Aliases: quantile.survfit quantile.survfitms median.survfit > ### Keywords: survival > > ### ** Examples > > fit <- survfit(Surv(time, status) ~ ph.ecog, data=lung) > quantile(fit) $quantile 25 50 75 ph.ecog=0 285 394 655 ph.ecog=1 181 306 550 ph.ecog=2 105 199 351 ph.ecog=3 118 118 118 $lower 25 50 75 ph.ecog=0 189 348 558 ph.ecog=1 156 268 460 ph.ecog=2 61 156 285 ph.ecog=3 NA NA NA $upper 25 50 75 ph.ecog=0 350 574 NA ph.ecog=1 223 429 689 ph.ecog=2 163 288 654 ph.ecog=3 NA NA NA > > cfit <- coxph(Surv(time, status) ~ age + strata(ph.ecog), data=lung) > csurv<- survfit(cfit, newdata=data.frame(age=c(40, 60, 80)), + conf.type ="none") > temp <- quantile(csurv, 1:5/10) > temp[2,3,] # quantiles for second level of ph.ecog, age=80 10 20 30 40 50 92 144 181 218 270 > quantile(csurv[2,3], 1:5/10) # quantiles of a single curve, same result 10 20 30 40 50 92 144 181 218 270 > > > > cleanEx() > nameEx("reliability") > ### * reliability > > flush(stderr()); flush(stdout()) > > ### Name: reliability > ### Title: Reliability data sets > ### Aliases: reliability braking capacitor cracks genfan ifluid imotor > ### turbine valveSeat > ### Keywords: datasets > > ### ** Examples > > survreg(Surv(time, status) ~ temperature + voltage, capacitor) Call: survreg(formula = Surv(time, status) ~ temperature + voltage, data = capacitor) Coefficients: (Intercept) temperature voltage 13.40701688 -0.02890466 -0.00591082 Scale= 0.3638092 Loglik(model)= -244.2 Loglik(intercept only)= -254.5 Chisq= 20.57 on 2 degrees of freedom, p= 3.41e-05 n= 64 > > # Figure 16.7 of Meeker, cumulative replacement of locomotive braking > # grids > gfit <- survfit(Surv(day1, day2, status) ~ batch, braking, id= locomotive) > plot(gfit, cumhaz=TRUE, col=1:2, xscale=30.5, conf.time= c(6,12,18)*30.5, + xlab="Locomotive Age in Months", ylab="Mean cumulative number replacements") > > # Replacement of valve seats. In this case the cumulative hazard is the > # natural target, an estimate of the number of replacements by a given time > # (known as the cumulative mean function = CMF in relability). > # When two valve seats failed at the same inspection, we need to jitter one > # of the times, to avoid a (time1, time2) interval of length 0 > ties <- which(with(valveSeat, diff(id)==0 & diff(time)==0)) #first of a tie > temp <- valveSeat$time > temp[ties] <- temp[ties] - .1 # jittered time > vdata <- valveSeat > vdata$time1 <- ifelse(!duplicated(vdata$id), 0, c(0, temp[-length(temp)])) > vdata$time2 <- temp > fit2 <- survfit(Surv(time1, time2, status) ~1, vdata, id=id) > ## Not run: > ##D plot(fit2, cumhaz= TRUE, xscale= 365.25, > ##D xlab="Years in service", ylab = "Expected number of repairs") > ## End(Not run) > > > > cleanEx() > nameEx("residuals.coxph") > ### * residuals.coxph > > flush(stderr()); flush(stdout()) > > ### Name: residuals.coxph > ### Title: Calculate Residuals for a 'coxph' Fit > ### Aliases: residuals.coxph.penal residuals.coxph.null residuals.coxph > ### residuals.coxphms > ### Keywords: survival > > ### ** Examples > > > fit <- coxph(Surv(start, stop, event) ~ (age + surgery)* transplant, + data=heart) > mresid <- resid(fit, collapse=heart$id) > > > > cleanEx() > nameEx("residuals.survfit") > ### * residuals.survfit > > flush(stderr()); flush(stdout()) > > ### Name: residuals.survfit > ### Title: IJ residuals from a survfit object. > ### Aliases: residuals.survfit > > ### ** Examples > > fit <- survfit(Surv(time, status) ~ x, aml) > resid(fit, times=c(24, 48), type="RMTS") times 24 48 1 -1.0836777 -2.076652893 2 -0.7200413 -1.713016529 3 0.2004132 0.421074380 4 -0.3237345 -1.468414256 5 0.1876291 -0.957050620 6 0.2899019 0.965676653 7 0.2899019 -0.359777893 8 0.2899019 0.008403926 9 0.2899019 1.726585744 10 0.2899019 1.726585744 11 0.2899019 1.726585744 12 -1.0057870 -1.475694444 13 -1.0057870 -1.475694444 14 -0.7557870 -1.225694444 15 -0.7557870 -1.225694444 16 -0.4224537 -0.892361111 17 0.5636574 0.899305556 18 0.4826389 -0.121527778 19 0.5798611 0.267361111 20 0.5798611 0.559027778 21 0.5798611 0.850694444 22 0.5798611 1.822916667 23 0.5798611 2.017361111 > > > > cleanEx() > nameEx("residuals.survreg") > ### * residuals.survreg > > flush(stderr()); flush(stdout()) > > ### Name: residuals.survreg > ### Title: Compute Residuals for 'survreg' Objects > ### Aliases: residuals.survreg residuals.survreg.penal > ### Keywords: survival > > ### ** Examples > > fit <- survreg(Surv(futime, death) ~ age + sex, mgus2) > summary(fit) # age and sex are both important Call: survreg(formula = Surv(futime, death) ~ age + sex, data = mgus2) Value Std. Error z p (Intercept) 8.85979 0.23842 37.16 < 2e-16 age -0.05360 0.00312 -17.19 < 2e-16 sexM -0.31874 0.06357 -5.01 5.3e-07 Log(scale) -0.02840 0.02787 -1.02 0.31 Scale= 0.972 Weibull distribution Loglik(model)= -5528.3 Loglik(intercept only)= -5699 Chisq= 341.42 on 2 degrees of freedom, p= 7.3e-75 Number of Newton-Raphson Iterations: 5 n= 1384 > > rr <- residuals(fit, type='matrix') > sum(rr[,1]) - with(mgus2, sum(log(futime[death==1]))) # loglik [1] -5528.267 > > plot(mgus2$age, rr[,2], col= (1+mgus2$death)) # ldresp > > > > cleanEx() > nameEx("retinopathy") > ### * retinopathy > > flush(stderr()); flush(stdout()) > > ### Name: retinopathy > ### Title: Diabetic Retinopathy > ### Aliases: retinopathy > ### Keywords: datasets > > ### ** Examples > > coxph(Surv(futime, status) ~ type + trt, cluster= id, retinopathy) Call: coxph(formula = Surv(futime, status) ~ type + trt, data = retinopathy, cluster = id) coef exp(coef) se(coef) robust se z p typeadult 0.05388 1.05536 0.16211 0.17864 0.302 0.763 trt -0.77893 0.45890 0.16893 0.14851 -5.245 1.56e-07 Likelihood ratio test=22.48 on 2 df, p=1.312e-05 n= 394, number of events= 155 > > > > cleanEx() > nameEx("rhDNase") > ### * rhDNase > > flush(stderr()); flush(stdout()) > > ### Name: rhDNase > ### Title: rhDNASE data set > ### Aliases: rhDNase > ### Keywords: datasets > > ### ** Examples > > # Build the start-stop data set for analysis, and > # replicate line 2 of table 8.13 in the book > first <- subset(rhDNase, !duplicated(id)) #first row for each subject > dnase <- tmerge(first, first, id=id, tstop=as.numeric(end.dt -entry.dt)) > > # Subjects whose fu ended during the 6 day window are the reason for > # this next line > temp.end <- with(rhDNase, pmin(ivstop+6, end.dt-entry.dt)) > dnase <- tmerge(dnase, rhDNase, id=id, + infect=event(ivstart), + end= event(temp.end)) > # toss out the non-at-risk intervals, and extra variables > # 3 subjects had an event on their last day of fu, infect=1 and end=1 > dnase <- subset(dnase, (infect==1 | end==0), c(id:trt, fev:infect)) > agfit <- coxph(Surv(tstart, tstop, infect) ~ trt + fev, cluster=id, + data=dnase) > > > > cleanEx() > nameEx("ridge") > ### * ridge > > flush(stderr()); flush(stdout()) > > ### Name: ridge > ### Title: Ridge regression > ### Aliases: ridge > ### Keywords: survival > > ### ** Examples > > > coxph(Surv(futime, fustat) ~ rx + ridge(age, ecog.ps, theta=1), + ovarian) Call: coxph(formula = Surv(futime, fustat) ~ rx + ridge(age, ecog.ps, theta = 1), data = ovarian) coef se(coef) se2 Chisq DF p rx -0.8564 0.6161 0.6156 1.9323 1 0.1645 ridge(age) 0.1229 0.0385 0.0354 10.2127 1 0.0014 ridge(ecog.ps) 0.1093 0.5734 0.5484 0.0363 1 0.8488 Iterations: 1 outer, 5 Newton-Raphson Degrees of freedom for terms= 1.0 1.8 Likelihood ratio test=15.6 on 2.76 df, p=0.001 n= 26, number of events= 12 > > lfit0 <- survreg(Surv(time, status) ~1, lung) > lfit1 <- survreg(Surv(time, status) ~ age + ridge(ph.ecog, theta=5), lung) > lfit2 <- survreg(Surv(time, status) ~ sex + ridge(age, ph.ecog, theta=1), lung) > lfit3 <- survreg(Surv(time, status) ~ sex + age + ph.ecog, lung) > > > > > cleanEx() > nameEx("rotterdam") > ### * rotterdam > > flush(stderr()); flush(stdout()) > > ### Name: rotterdam > ### Title: Breast cancer data set used in Royston and Altman (2013) > ### Aliases: rotterdam > ### Keywords: datasets survival > > ### ** Examples > > # liberal definition of rfs (count later deaths) > rfs <- pmax(rotterdam$recur, rotterdam$death) > rfstime <- with(rotterdam, ifelse(recur==1, rtime, dtime)) > fit1 <- coxph(Surv(rfstime, rfs) ~ pspline(age) + meno + size + + pspline(nodes) + er, data = rotterdam) > > # conservative (no deaths after last fu for recurrence) > ignore <- with(rotterdam, recur ==0 & death==1 & rtime < dtime) > table(ignore) ignore FALSE TRUE 2939 43 > rfs2 <- with(rotterdam, ifelse(recur==1 | ignore, recur, death)) > rfstime2 <- with(rotterdam, ifelse(recur==1 | ignore, rtime, dtime)) > fit2 <- coxph(Surv(rfstime2, rfs2) ~ pspline(age) + meno + size + + pspline(nodes) + er, data = rotterdam) > > # Note: Both age and nodes show non-linear effects. > # Royston and Altman used fractional polynomials for the nonlinear terms > > > > cleanEx() > nameEx("royston") > ### * royston > > flush(stderr()); flush(stdout()) > > ### Name: royston > ### Title: Compute Royston's D for a Cox model > ### Aliases: royston > ### Keywords: survival > > ### ** Examples > > # An example used in Royston and Sauerbrei > pbc2 <- na.omit(pbc) # no missing values > cfit <- coxph(Surv(time, status==2) ~ age + log(bili) + edema + albumin + + stage + copper, data=pbc2, ties="breslow") > royston(cfit) D se(D) R.D R.KO R.N C.GH 2.6917766 0.2273352 0.6336693 0.5554885 0.4714442 0.7735923 > > > > cleanEx() > nameEx("rttright") > ### * rttright > > flush(stderr()); flush(stdout()) > > ### Name: rttright > ### Title: Compute redistribute-to-the-right weights > ### Aliases: rttright > ### Keywords: survival > > ### ** Examples > > afit <- survfit(Surv(time, status) ~1, data=aml) > rwt <- rttright(Surv(time, status) ~1, data=aml) > > # Reproduce a Kaplan-Meier > index <- order(aml$time) > cdf <- cumsum(rwt[index]) # weighted CDF > cdf <- cdf[!duplicated(aml$time[index], fromLast=TRUE)] # remove duplicate times > cbind(time=afit$time, KM= afit$surv, RTTR= 1-cdf) time KM RTTR [1,] 5 0.91304348 0.91304348 [2,] 8 0.82608696 0.82608696 [3,] 9 0.78260870 0.78260870 [4,] 12 0.73913043 0.73913043 [5,] 13 0.69565217 0.69565217 [6,] 16 0.69565217 0.69565217 [7,] 18 0.64596273 0.64596273 [8,] 23 0.54658385 0.54658385 [9,] 27 0.49689441 0.49689441 [10,] 28 0.49689441 0.49689441 [11,] 30 0.44168392 0.44168392 [12,] 31 0.38647343 0.38647343 [13,] 33 0.33126294 0.33126294 [14,] 34 0.27605245 0.27605245 [15,] 43 0.22084196 0.22084196 [16,] 45 0.16563147 0.16563147 [17,] 48 0.08281573 0.08281573 [18,] 161 0.08281573 0.08281573 > > # Hormonal patients have a diffent censoring pattern > wt2 <- rttright(Surv(dtime, death) ~ hormon, rotterdam, times= 365*c(3, 5)) > dim(wt2) [1] 2982 2 > > > > cleanEx() > nameEx("solder") > ### * solder > > flush(stderr()); flush(stdout()) > > ### Name: solder > ### Title: Data from a soldering experiment > ### Aliases: solder > ### Keywords: datasets > > ### ** Examples > > fit1 <- glm(skips ~ Opening * Solder, poisson, solder, + subset= (Mask != "A6")) > anova(fit1) # The interaction is important Analysis of Deviance Table Model: poisson, link: log Response: skips Terms added sequentially (first to last) Df Deviance Resid. Df Resid. Dev Pr(>Chi) NULL 809 7336.9 Opening 2 2638.44 807 4698.5 < 2.2e-16 *** Solder 1 1120.85 806 3577.7 < 2.2e-16 *** Opening:Solder 2 41.14 804 3536.5 1.166e-09 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 > dummy <- expand.grid(Opening= c("S", "M", "L"), Solder=c("Thin", "Thick")) > yhat <- matrix(predict(fit1, newdata=dummy), ncol=2, + dimnames=list(Opening= c("S", "M", "L"), Solder=c("Thin", "Thick"))) > yhat <- cbind(yhat, difference= yhat[,1]- yhat[,2]) > round(yhat, 1) # thin and thick have different patterns Thin Thick difference S 2.8 1.7 1.1 M 1.1 0.2 1.0 L 1.0 -0.9 1.9 > > # The balanced subset used by Chambers and Hastie > # contains the first 180 of each mask and deletes mask A6. > index <- 1 + (1:nrow(solder)) - match(solder$Mask, solder$Mask) > solder.balance <- droplevels(subset(solder, Mask != "A6" & index <= 180)) > > > > cleanEx() > nameEx("statefig") > ### * statefig > > flush(stderr()); flush(stdout()) > > ### Name: statefig > ### Title: Draw a state space figure. > ### Aliases: statefig > ### Keywords: survival hplot > > ### ** Examples > > # Draw a simple competing risks figure > states <- c("Entry", "Complete response", "Relapse", "Death") > connect <- matrix(0, 4, 4, dimnames=list(states, states)) > connect[1, -1] <- c(1.1, 1, 0.9) > statefig(c(1, 3), connect) > > > > cleanEx() > nameEx("strata") > ### * strata > > flush(stderr()); flush(stdout()) > > ### Name: strata > ### Title: Identify Stratification Variables > ### Aliases: strata > ### Keywords: survival > > ### ** Examples > > a <- factor(rep(1:3,4), labels=c("low", "medium", "high")) > b <- factor(rep(1:4,3)) > levels(strata(b)) [1] "1" "2" "3" "4" > levels(strata(a,b,shortlabel=TRUE)) [1] "low, 1" "low, 2" "low, 3" "low, 4" "medium, 1" "medium, 2" [7] "medium, 3" "medium, 4" "high, 1" "high, 2" "high, 3" "high, 4" > > coxph(Surv(futime, fustat) ~ age + strata(rx), data=ovarian) Call: coxph(formula = Surv(futime, fustat) ~ age + strata(rx), data = ovarian) coef exp(coef) se(coef) z p age 0.13735 1.14723 0.04741 2.897 0.00376 Likelihood ratio test=12.69 on 1 df, p=0.0003678 n= 26, number of events= 12 > > > > cleanEx() > nameEx("summary.aareg") > ### * summary.aareg > > flush(stderr()); flush(stdout()) > > ### Name: summary.aareg > ### Title: Summarize an aareg fit > ### Aliases: summary.aareg > ### Keywords: survival > > ### ** Examples > > afit <- aareg(Surv(time, status) ~ age + sex + ph.ecog, data=lung, + dfbeta=TRUE) > summary(afit) slope coef se(coef) robust se z p Intercept 5.05e-03 5.87e-03 4.74e-03 0.00477 1.23 0.219000 age 4.01e-05 7.15e-05 7.23e-05 0.00007 1.02 0.307000 sex -3.16e-03 -4.03e-03 1.22e-03 0.00123 -3.28 0.001030 ph.ecog 3.01e-03 3.67e-03 1.02e-03 0.00102 3.62 0.000299 Chisq=22.84 on 3 df, p=4.36e-05; test weights=aalen > ## Not run: > ##D slope test se(test) robust se z p > ##D Intercept 5.05e-03 1.9 1.54 1.55 1.23 0.219000 > ##D age 4.01e-05 108.0 109.00 106.00 1.02 0.307000 > ##D sex -3.16e-03 -19.5 5.90 5.95 -3.28 0.001030 > ##D ph.ecog 3.01e-03 33.2 9.18 9.17 3.62 0.000299 > ##D > ##D Chisq=22.84 on 3 df, p=4.4e-05; test weights=aalen > ## End(Not run) > > summary(afit, maxtime=600) slope coef se(coef) robust se z p Intercept 4.16e-03 6.67e-03 4.62e-03 0.004580 1.450 0.146000 age 2.82e-05 5.74e-05 7.07e-05 0.000067 0.857 0.392000 sex -2.54e-03 -4.30e-03 1.17e-03 0.001180 -3.660 0.000256 ph.ecog 2.47e-03 3.54e-03 9.99e-04 0.000972 3.640 0.000271 Chisq=27.08 on 3 df, p=5.66e-06; test weights=aalen > ## Not run: > ##D slope test se(test) robust se z p > ##D Intercept 4.16e-03 2.13 1.48 1.47 1.450 0.146000 > ##D age 2.82e-05 85.80 106.00 100.00 0.857 0.392000 > ##D sex -2.54e-03 -20.60 5.61 5.63 -3.660 0.000256 > ##D ph.ecog 2.47e-03 31.60 8.91 8.67 3.640 0.000271 > ##D > ##D Chisq=27.08 on 3 df, p=5.7e-06; test weights=aalen > ## End(Not run) > > > cleanEx() > nameEx("summary.coxph") > ### * summary.coxph > > flush(stderr()); flush(stdout()) > > ### Name: summary.coxph > ### Title: Summary method for Cox models > ### Aliases: summary.coxph > ### Keywords: survival > > ### ** Examples > > fit <- coxph(Surv(time, status) ~ age + sex, lung) > summary(fit) Call: coxph(formula = Surv(time, status) ~ age + sex, data = lung) n= 228, number of events= 165 coef exp(coef) se(coef) z Pr(>|z|) age 0.017045 1.017191 0.009223 1.848 0.06459 . sex -0.513219 0.598566 0.167458 -3.065 0.00218 ** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 exp(coef) exp(-coef) lower .95 upper .95 age 1.0172 0.9831 0.9990 1.0357 sex 0.5986 1.6707 0.4311 0.8311 Concordance= 0.603 (se = 0.025 ) Likelihood ratio test= 14.12 on 2 df, p=9e-04 Wald test = 13.47 on 2 df, p=0.001 Score (logrank) test = 13.72 on 2 df, p=0.001 > > > > cleanEx() > nameEx("summary.survfit") > ### * summary.survfit > > flush(stderr()); flush(stdout()) > > ### Name: summary.survfit > ### Title: Summary of a Survival Curve > ### Aliases: summary.survfit summary.survfitms > ### Keywords: survival > > ### ** Examples > > summary( survfit( Surv(futime, fustat)~1, data=ovarian)) Call: survfit(formula = Surv(futime, fustat) ~ 1, data = ovarian) time n.risk n.event survival std.err lower 95% CI upper 95% CI 59 26 1 0.962 0.0377 0.890 1.000 115 25 1 0.923 0.0523 0.826 1.000 156 24 1 0.885 0.0627 0.770 1.000 268 23 1 0.846 0.0708 0.718 0.997 329 22 1 0.808 0.0773 0.670 0.974 353 21 1 0.769 0.0826 0.623 0.949 365 20 1 0.731 0.0870 0.579 0.923 431 17 1 0.688 0.0919 0.529 0.894 464 15 1 0.642 0.0965 0.478 0.862 475 14 1 0.596 0.0999 0.429 0.828 563 12 1 0.546 0.1032 0.377 0.791 638 11 1 0.497 0.1051 0.328 0.752 > summary( survfit( Surv(futime, fustat)~rx, data=ovarian)) Call: survfit(formula = Surv(futime, fustat) ~ rx, data = ovarian) rx=1 time n.risk n.event survival std.err lower 95% CI upper 95% CI 59 13 1 0.923 0.0739 0.789 1.000 115 12 1 0.846 0.1001 0.671 1.000 156 11 1 0.769 0.1169 0.571 1.000 268 10 1 0.692 0.1280 0.482 0.995 329 9 1 0.615 0.1349 0.400 0.946 431 8 1 0.538 0.1383 0.326 0.891 638 5 1 0.431 0.1467 0.221 0.840 rx=2 time n.risk n.event survival std.err lower 95% CI upper 95% CI 353 13 1 0.923 0.0739 0.789 1.000 365 12 1 0.846 0.1001 0.671 1.000 464 9 1 0.752 0.1256 0.542 1.000 475 8 1 0.658 0.1407 0.433 1.000 563 7 1 0.564 0.1488 0.336 0.946 > > > > cleanEx() > nameEx("survSplit") > ### * survSplit > > flush(stderr()); flush(stdout()) > > ### Name: survSplit > ### Title: Split a survival data set at specified times > ### Aliases: survSplit > ### Keywords: survival utilities > > ### ** Examples > > fit1 <- coxph(Surv(time, status) ~ karno + age + trt, veteran) > plot(cox.zph(fit1)[1]) > # a cox.zph plot of the data suggests that the effect of Karnofsky score > # begins to diminish by 60 days and has faded away by 120 days. > # Fit a model with separate coefficients for the three intervals. > # > vet2 <- survSplit(Surv(time, status) ~., veteran, + cut=c(60, 120), episode ="timegroup") > fit2 <- coxph(Surv(tstart, time, status) ~ karno* strata(timegroup) + + age + trt, data= vet2) > c(overall= coef(fit1)[1], + t0_60 = coef(fit2)[1], + t60_120= sum(coef(fit2)[c(1,4)]), + t120 = sum(coef(fit2)[c(1,5)])) overall.karno t0_60.karno t60_120 t120 -0.034443897 -0.049176157 -0.011031558 -0.007629841 > > # Sometimes we want to split on one scale and analyse on another > # Add a "current age" variable to the mgus2 data set. > temp1 <- mgus2 > temp1$endage <- mgus2$age + mgus2$futime/12 # futime is in months > temp1$startage <- temp1$age > temp2 <- survSplit(Surv(age, endage, death) ~ ., temp1, cut=25:100, + start= "age1", end= "age2") > > # restore the time since enrollment scale > temp2$time1 <- (temp2$age1 - temp2$startage)*12 > temp2$time2 <- (temp2$age2 - temp2$startage)*12 > > # In this data set, initial age and current age have similar utility > mfit1 <- coxph(Surv(futime, death) ~ age + sex, data=mgus2) > mfit2 <- coxph(Surv(time1, time2, death) ~ age1 + sex, data=temp2) > > > > cleanEx() > nameEx("survcondense") > ### * survcondense > > flush(stderr()); flush(stdout()) > > ### Name: survcondense > ### Title: Shorten a (time1, time2) survival dataset > ### Aliases: survcondense > ### Keywords: survival > > ### ** Examples > > dim(aml) [1] 23 3 > test1 <- survSplit(Surv(time, status) ~ ., data=aml, + cut=c(10, 20, 30), id="newid") > dim(test1) [1] 62 5 > > # remove the added rows > test2 <- survcondense(Surv(tstart, time, status) ~ x, test1, id=newid) > dim(test2) [1] 23 5 > > > > cleanEx() > nameEx("survdiff") > ### * survdiff > > flush(stderr()); flush(stdout()) > > ### Name: survdiff > ### Title: Test Survival Curve Differences > ### Aliases: survdiff print.survdiff > ### Keywords: survival > > ### ** Examples > > ## Two-sample test > survdiff(Surv(futime, fustat) ~ rx,data=ovarian) Call: survdiff(formula = Surv(futime, fustat) ~ rx, data = ovarian) N Observed Expected (O-E)^2/E (O-E)^2/V rx=1 13 7 5.23 0.596 1.06 rx=2 13 5 6.77 0.461 1.06 Chisq= 1.1 on 1 degrees of freedom, p= 0.3 > check <- coxph(Surv(futime, fustat) ~ rx, data=ovarian, ties="breslow") > round(summary(check)$sctest, 3) test df pvalue 1.063 1.000 0.303 > > ## Stratified 8-sample test (7 df) > survdiff(Surv(time, status) ~ pat.karno + strata(inst), data=lung) Call: survdiff(formula = Surv(time, status) ~ pat.karno + strata(inst), data = lung) n=224, 4 observations deleted due to missingness. N Observed Expected (O-E)^2/E (O-E)^2/V pat.karno=30 2 1 0.692 0.13720 0.15752 pat.karno=40 2 1 1.099 0.00889 0.00973 pat.karno=50 4 4 1.166 6.88314 7.45359 pat.karno=60 30 27 16.298 7.02790 9.57333 pat.karno=70 41 31 26.358 0.81742 1.14774 pat.karno=80 50 38 41.938 0.36978 0.60032 pat.karno=90 60 38 47.242 1.80800 3.23078 pat.karno=100 35 21 26.207 1.03451 1.44067 Chisq= 21.4 on 7 degrees of freedom, p= 0.003 > check <- coxph(Surv(time, status) ~ factor(pat.karno) + strata(inst), lung) > round(summary(check)$sctest, 3) test df pvalue 21.368 7.000 0.003 > > ## Expected survival for heart transplant patients based on > ## US mortality tables > expect <- survexp(futime ~ 1, data=jasa, cohort=FALSE, + rmap= list(age=(accept.dt - birth.dt), sex=1, year=accept.dt), + ratetable=survexp.us) > ## actual survival is much worse (no surprise) > survdiff(Surv(jasa$futime, jasa$fustat) ~ offset(expect)) Call: survdiff(formula = Surv(jasa$futime, jasa$fustat) ~ offset(expect)) Observed Expected Z p 75.000 0.644 -92.681 0.000 > > # The free light chain data set is close to the population. > e2 <- survexp(futime ~ 1, data=flchain, cohort=FALSE, + rmap= list(age= age*365.25, sex=sex, + year=as.Date(paste0(sample.yr, "-07-01"))), + ratetable= survexp.mn) > survdiff(Surv(futime, death) ~ offset(e2), flchain) Call: survdiff(formula = Surv(futime, death) ~ offset(e2), data = flchain) Observed Expected Z p 2169.0000 2076.8776 -2.0214 0.0432 > > > > cleanEx() > nameEx("survexp") > ### * survexp > > flush(stderr()); flush(stdout()) > > ### Name: survexp > ### Title: Compute Expected Survival > ### Aliases: survexp print.survexp > ### Keywords: survival > > ### ** Examples > > # > # Stanford heart transplant data > # We don't have sex in the data set, but know it to be nearly all males. > # Estimate of conditional survival > fit1 <- survexp(futime ~ 1, rmap=list(sex="male", year=accept.dt, + age=(accept.dt-birth.dt)), method='conditional', data=jasa) > summary(fit1, times=1:10*182.5, scale=365) #expected survival by 1/2 years Call: survexp(formula = futime ~ 1, data = jasa, rmap = list(sex = "male", year = accept.dt, age = (accept.dt - birth.dt)), method = "conditional") time n.risk survival 0.5 41 0.996 1.0 28 0.993 1.5 21 0.989 2.0 16 0.986 2.5 13 0.983 3.0 8 0.980 3.5 7 0.977 4.0 3 0.972 4.5 1 0.969 > > # Estimate of expected survival stratified by prior surgery > survexp(~ surgery, rmap= list(sex="male", year=accept.dt, + age=(accept.dt-birth.dt)), method='ederer', data=jasa, + times=1:10 * 182.5) Call: survexp(formula = ~surgery, data = jasa, rmap = list(sex = "male", year = accept.dt, age = (accept.dt - birth.dt)), times = 1:10 * 182.5, method = "ederer") age ranges from 8.8 to 64.4 years male: 103 female: 0 date of entry from 1967-09-13 to 1974-03-22 time nrisk1 nrisk2 surgery=0 surgery=1 182 87 16 0.996 0.996 365 87 16 0.991 0.993 548 87 16 0.987 0.989 730 87 16 0.982 0.985 912 87 16 0.978 0.981 1095 87 16 0.973 0.977 1278 87 16 0.968 0.973 1460 87 16 0.963 0.969 1642 87 16 0.958 0.964 1825 87 16 0.952 0.960 > > ## Compare the survival curves for the Mayo PBC data to Cox model fit > ## > pfit <-coxph(Surv(time,status>0) ~ trt + log(bili) + log(protime) + age + + platelet, data=pbc) > plot(survfit(Surv(time, status>0) ~ trt, data=pbc), mark.time=FALSE) > lines(survexp( ~ trt, ratetable=pfit, data=pbc), col='purple') > > > > cleanEx() > nameEx("survexp.us") > ### * survexp.us > > flush(stderr()); flush(stdout()) > > ### Name: ratetables > ### Title: Census Data Sets for the Expected Survival and Person Years > ### Functions > ### Aliases: survexp.us survexp.usr survexp.mn > ### Keywords: survival datasets > > ### ** Examples > > survexp.uswhite <- survexp.usr[,,"white",] > > > > cleanEx() > nameEx("survfit.formula") > ### * survfit.formula > > flush(stderr()); flush(stdout()) > > ### Name: survfit.formula > ### Title: Compute a Survival Curve for Censored Data > ### Aliases: survfit.formula [.survfit > ### Keywords: survival > > ### ** Examples > > #fit a Kaplan-Meier and plot it > fit <- survfit(Surv(time, status) ~ x, data = aml) > plot(fit, lty = 2:3) > legend(100, .8, c("Maintained", "Nonmaintained"), lty = 2:3) > > #fit a Cox proportional hazards model and plot the > #predicted survival for a 60 year old > fit <- coxph(Surv(futime, fustat) ~ age, data = ovarian) > plot(survfit(fit, newdata=data.frame(age=60)), + xscale=365.25, xlab = "Years", ylab="Survival") > > # Here is the data set from Turnbull > # There are no interval censored subjects, only left-censored (status=3), > # right-censored (status 0) and observed events (status 1) > # > # Time > # 1 2 3 4 > # Type of observation > # death 12 6 2 3 > # losses 3 2 0 3 > # late entry 2 4 2 5 > # > tdata <- data.frame(time =c(1,1,1,2,2,2,3,3,3,4,4,4), + status=rep(c(1,0,2),4), + n =c(12,3,2,6,2,4,2,0,2,3,3,5)) > fit <- survfit(Surv(time, time, status, type='interval') ~1, + data=tdata, weight=n) > > # > # Three curves for patients with monoclonal gammopathy. > # 1. KM of time to PCM, ignoring death (statistically incorrect) > # 2. Competing risk curves (also known as "cumulative incidence") > # 3. Multi-state, showing Pr(in each state, at time t) > # > fitKM <- survfit(Surv(stop, event=='pcm') ~1, data=mgus1, + subset=(start==0)) > fitCR <- survfit(Surv(stop, event) ~1, + data=mgus1, subset=(start==0)) > fitMS <- survfit(Surv(start, stop, event) ~ 1, id=id, data=mgus1) > ## Not run: > ##D # CR curves show the competing risks > ##D plot(fitCR, xscale=365.25, xmax=7300, mark.time=FALSE, > ##D col=2:3, xlab="Years post diagnosis of MGUS", > ##D ylab="P(state)") > ##D lines(fitKM, fun='event', xmax=7300, mark.time=FALSE, > ##D conf.int=FALSE) > ##D text(3652, .4, "Competing risk: death", col=3) > ##D text(5840, .15,"Competing risk: progression", col=2) > ##D text(5480, .30,"KM:prog") > ## End(Not run) > > > > cleanEx() > nameEx("survfit.matrix") > ### * survfit.matrix > > flush(stderr()); flush(stdout()) > > ### Name: survfit.matrix > ### Title: Create Aalen-Johansen estimates of multi-state survival from a > ### matrix of hazards. > ### Aliases: survfit.matrix > ### Keywords: survival > > ### ** Examples > > etime <- with(mgus2, ifelse(pstat==0, futime, ptime)) > event <- with(mgus2, ifelse(pstat==0, 2*death, 1)) > event <- factor(event, 0:2, labels=c("censor", "pcm", "death")) > > cfit1 <- coxph(Surv(etime, event=="pcm") ~ age + sex, mgus2) > cfit2 <- coxph(Surv(etime, event=="death") ~ age + sex, mgus2) > > # predicted competing risk curves for a 72 year old with mspike of 1.2 > # (median values), male and female. > # The survfit call is a bit faster without standard errors. > newdata <- expand.grid(sex=c("F", "M"), age=72, mspike=1.2) > > AJmat <- matrix(list(), 3,3) > AJmat[1,2] <- list(survfit(cfit1, newdata, std.err=FALSE)) > AJmat[1,3] <- list(survfit(cfit2, newdata, std.err=FALSE)) > csurv <- survfit(AJmat, p0 =c(entry=1, PCM=0, death=0)) > > > > cleanEx() > nameEx("survobrien") > ### * survobrien > > flush(stderr()); flush(stdout()) > > ### Name: survobrien > ### Title: O'Brien's Test for Association of a Single Variable with > ### Survival > ### Aliases: survobrien > ### Keywords: survival > > ### ** Examples > > xx <- survobrien(Surv(futime, fustat) ~ age + factor(rx) + I(ecog.ps), + data=ovarian) > coxph(Surv(time, status) ~ age + strata(.strata.), data=xx) Call: coxph(formula = Surv(time, status) ~ age + strata(.strata.), data = xx) coef exp(coef) se(coef) z p age 0.5681 1.7649 0.1816 3.128 0.00176 Likelihood ratio test=10.55 on 1 df, p=0.001165 n= 230, number of events= 12 > > > > cleanEx() > nameEx("survreg") > ### * survreg > > flush(stderr()); flush(stdout()) > > ### Name: survreg > ### Title: Regression for a Parametric Survival Model > ### Aliases: survreg model.frame.survreg labels.survreg print.survreg.penal > ### print.summary.survreg survReg anova.survreg anova.survreglist > ### Keywords: survival > > ### ** Examples > > # Fit an exponential model: the two fits are the same > survreg(Surv(futime, fustat) ~ ecog.ps + rx, ovarian, dist='weibull', + scale=1) Call: survreg(formula = Surv(futime, fustat) ~ ecog.ps + rx, data = ovarian, dist = "weibull", scale = 1) Coefficients: (Intercept) ecog.ps rx 6.9618376 -0.4331347 0.5815027 Scale fixed at 1 Loglik(model)= -97.2 Loglik(intercept only)= -98 Chisq= 1.67 on 2 degrees of freedom, p= 0.434 n= 26 > survreg(Surv(futime, fustat) ~ ecog.ps + rx, ovarian, + dist="exponential") Call: survreg(formula = Surv(futime, fustat) ~ ecog.ps + rx, data = ovarian, dist = "exponential") Coefficients: (Intercept) ecog.ps rx 6.9618376 -0.4331347 0.5815027 Scale fixed at 1 Loglik(model)= -97.2 Loglik(intercept only)= -98 Chisq= 1.67 on 2 degrees of freedom, p= 0.434 n= 26 > > # > # A model with different baseline survival shapes for two groups, i.e., > # two different scale parameters > survreg(Surv(time, status) ~ ph.ecog + age + strata(sex), lung) Call: survreg(formula = Surv(time, status) ~ ph.ecog + age + strata(sex), data = lung) Coefficients: (Intercept) ph.ecog age 6.73234505 -0.32443043 -0.00580889 Scale: sex=1 sex=2 0.7834211 0.6547830 Loglik(model)= -1137.3 Loglik(intercept only)= -1146.2 Chisq= 17.8 on 2 degrees of freedom, p= 0.000137 n=227 (1 observation deleted due to missingness) > > # There are multiple ways to parameterize a Weibull distribution. The survreg > # function embeds it in a general location-scale family, which is a > # different parameterization than the rweibull function, and often leads > # to confusion. > # survreg's scale = 1/(rweibull shape) > # survreg's intercept = log(rweibull scale) > # For the log-likelihood all parameterizations lead to the same value. > y <- rweibull(1000, shape=2, scale=5) > survreg(Surv(y)~1, dist="weibull") Call: survreg(formula = Surv(y) ~ 1, dist = "weibull") Coefficients: (Intercept) 1.604435 Scale= 0.4965001 Loglik(model)= -2199.4 Loglik(intercept only)= -2199.4 n= 1000 > > # Economists fit a model called `tobit regression', which is a standard > # linear regression with Gaussian errors, and left censored data. > tobinfit <- survreg(Surv(durable, durable>0, type='left') ~ age + quant, + data=tobin, dist='gaussian') > > > > cleanEx() > nameEx("survreg.distributions") > ### * survreg.distributions > > flush(stderr()); flush(stdout()) > > ### Name: survreg.distributions > ### Title: Parametric Survival Distributions > ### Aliases: survreg.distributions > ### Keywords: survival > > ### ** Examples > > # time transformation > survreg(Surv(time, status) ~ ph.ecog + sex, dist='weibull', data=lung) Call: survreg(formula = Surv(time, status) ~ ph.ecog + sex, data = lung, dist = "weibull") Coefficients: (Intercept) ph.ecog sex 5.8195907 -0.3557319 0.4013684 Scale= 0.7310495 Loglik(model)= -1133.1 Loglik(intercept only)= -1147.4 Chisq= 28.73 on 2 degrees of freedom, p= 5.76e-07 n=227 (1 observation deleted due to missingness) > # change the transformation to work in years > # intercept changes by log(365), everything else stays the same > my.weibull <- survreg.distributions$weibull > my.weibull$trans <- function(y) log(y/365) > my.weibull$itrans <- function(y) 365*exp(y) > survreg(Surv(time, status) ~ ph.ecog + sex, lung, dist=my.weibull) Call: survreg(formula = Surv(time, status) ~ ph.ecog + sex, data = lung, dist = my.weibull) Coefficients: (Intercept) ph.ecog sex -0.08030664 -0.35573188 0.40136844 Scale= 0.7310495 Loglik(model)= -1133.1 Loglik(intercept only)= -1147.4 Chisq= 28.73 on 2 degrees of freedom, p= 5.76e-07 n=227 (1 observation deleted due to missingness) > > # Weibull parametrisation > y<-rweibull(1000, shape=2, scale=5) > survreg(Surv(y)~1, dist="weibull") Call: survreg(formula = Surv(y) ~ 1, dist = "weibull") Coefficients: (Intercept) 1.604435 Scale= 0.4965001 Loglik(model)= -2199.4 Loglik(intercept only)= -2199.4 n= 1000 > # survreg scale parameter maps to 1/shape, linear predictor to log(scale) > > # Cauchy fit > mycauchy <- list(name='Cauchy', + init= function(x, weights, ...) + c(median(x), mad(x)), + density= function(x, parms) { + temp <- 1/(1 + x^2) + cbind(.5 + atan(x)/pi, .5+ atan(-x)/pi, + temp/pi, -2 *x*temp, 2*temp*(4*x^2*temp -1)) + }, + quantile= function(p, parms) tan((p-.5)*pi), + deviance= function(...) stop('deviance residuals not defined') + ) > survreg(Surv(log(time), status) ~ ph.ecog + sex, lung, dist=mycauchy) Call: survreg(formula = Surv(log(time), status) ~ ph.ecog + sex, data = lung, dist = mycauchy) Coefficients: (Intercept) ph.ecog sex 5.4517240 -0.3979387 0.4692383 Scale= 0.4788955 Loglik(model)= -274.6 Loglik(intercept only)= -294.9 Chisq= 40.75 on 2 degrees of freedom, p= 1.42e-09 n=227 (1 observation deleted due to missingness) > > > > cleanEx() > nameEx("survregDtest") > ### * survregDtest > > flush(stderr()); flush(stdout()) > > ### Name: survregDtest > ### Title: Verify a survreg distribution > ### Aliases: survregDtest > ### Keywords: survival > > ### ** Examples > > # An invalid distribution (it should have "init =" on line 2) > # surveg would give an error message > mycauchy <- list(name='Cauchy', + init<- function(x, weights, ...) + c(median(x), mad(x)), + density= function(x, parms) { + temp <- 1/(1 + x^2) + cbind(.5 + atan(temp)/pi, .5+ atan(-temp)/pi, + temp/pi, -2 *x*temp, 2*temp^2*(4*x^2*temp -1)) + }, + quantile= function(p, parms) tan((p-.5)*pi), + deviance= function(...) stop('deviance residuals not defined') + ) > > survregDtest(mycauchy, TRUE) [1] "Missing or invalid init function" > > > > cleanEx() > nameEx("tcut") > ### * tcut > > flush(stderr()); flush(stdout()) > > ### Name: tcut > ### Title: Factors for person-year calculations > ### Aliases: tcut [.tcut levels.tcut > ### Keywords: survival > > ### ** Examples > > # For pyears, all time variable need to be on the same scale; but > # futime is in months and age is in years > test <- mgus2 > test$years <- test$futime/30.5 # follow-up in years > > # first grouping based on years from starting age (= current age) > # second based on years since enrollment (all start at 0) > test$agegrp <- tcut(test$age, c(0,60, 70, 80, 100), + c("<=60", "60-70", "70-80", ">80")) > test$fgrp <- tcut(rep(0, nrow(test)), c(0, 1, 5, 10, 100), + c("0-1yr", "1-5yr", "5-10yr", ">10yr")) > > # death rates per 1000, by age group > pfit1 <- pyears(Surv(years, death) ~ agegrp, scale =1000, data=test) > round(pfit1$event/ pfit1$pyears) agegrp <=60 60-70 70-80 >80 89 128 245 479 > > #death rates per 100, by follow-up year and age > # there are excess deaths in the first year, within each age stratum > pfit2 <- pyears(Surv(years, death) ~ fgrp + agegrp, scale =1000, data=test) > round(pfit2$event/ pfit2$pyears) agegrp fgrp <=60 60-70 70-80 >80 0-1yr 139 137 234 437 1-5yr 68 117 241 499 5-10yr 91 145 300 476 >10yr 0 424 0 2440 > > > > cleanEx() > nameEx("tmerge") > ### * tmerge > > flush(stderr()); flush(stdout()) > > ### Name: tmerge > ### Title: Time based merge for survival data > ### Aliases: tmerge > ### Keywords: survival > > ### ** Examples > > # The pbc data set contains baseline data and follow-up status > # for a set of subjects with primary biliary cirrhosis, while the > # pbcseq data set contains repeated laboratory values for those > # subjects. > # The first data set contains data on 312 subjects in a clinical trial plus > # 106 that agreed to be followed off protocol, the second data set has data > # only on the trial subjects. > temp <- subset(pbc, id <= 312, select=c(id:sex, stage)) # baseline data > pbc2 <- tmerge(temp, temp, id=id, endpt = event(time, status)) > pbc2 <- tmerge(pbc2, pbcseq, id=id, ascites = tdc(day, ascites), + bili = tdc(day, bili), albumin = tdc(day, albumin), + protime = tdc(day, protime), alk.phos = tdc(day, alk.phos)) > > fit <- coxph(Surv(tstart, tstop, endpt==2) ~ protime + log(bili), data=pbc2) > > > > cleanEx() > nameEx("tobin") > ### * tobin > > flush(stderr()); flush(stdout()) > > ### Name: tobin > ### Title: Tobin's Tobit data > ### Aliases: tobin > ### Keywords: datasets > > ### ** Examples > > tfit <- survreg(Surv(durable, durable>0, type='left') ~age + quant, + data=tobin, dist='gaussian') > > predict(tfit,type="response") [1] -3.04968679 -4.31254182 -0.54163315 -0.25607164 -1.85017727 -2.40987803 [7] -3.50629220 -0.74041486 -4.05145594 -3.55880518 -0.32223237 -3.68044619 [13] -3.65997456 -2.63254564 0.22382063 0.02177674 -0.09571284 -3.17696755 [19] -0.61521215 -3.13913903 > > > > > cleanEx() > nameEx("transplant") > ### * transplant > > flush(stderr()); flush(stdout()) > > ### Name: transplant > ### Title: Liver transplant waiting list > ### Aliases: transplant > ### Keywords: datasets > > ### ** Examples > > #since event is a factor, survfit creates competing risk curves > pfit <- survfit(Surv(futime, event) ~ abo, transplant) > pfit[,2] #time to liver transplant, by blood type Call: survfit(formula = Surv(futime, event) ~ abo, data = transplant) n nevent rmean se(rmean)* abo=A, death 325 21 164.9734 17.49968 abo=B, death 103 10 202.4902 59.61134 abo=AB, death 41 3 137.8293 19.65319 abo=O, death 346 32 182.1075 28.17050 *restricted mean time in state (max time = 2055 ) > plot(pfit[,2], mark.time=FALSE, col=1:4, lwd=2, xmax=735, + xscale=30.5, xlab="Months", ylab="Fraction transplanted", + xaxt = 'n') > temp <- c(0, 6, 12, 18, 24) > axis(1, temp*30.5, temp) > legend(450, .35, levels(transplant$abo), lty=1, col=1:4, lwd=2) > > # competing risks for type O > plot(pfit[4,], xscale=30.5, xmax=735, col=1:3, lwd=2) > legend(450, .4, c("Death", "Transpant", "Withdrawal"), col=1:3, lwd=2) > > > > cleanEx() > nameEx("udca") > ### * udca > > flush(stderr()); flush(stdout()) > > ### Name: udca > ### Title: Data from a trial of usrodeoxycholic acid > ### Aliases: udca udca1 udca2 > ### Keywords: datasets > > ### ** Examples > > # values found in table 8.3 of the book > fit1 <- coxph(Surv(futime, status) ~ trt + log(bili) + stage, + cluster =id , data=udca1) > fit2 <- coxph(Surv(futime, status) ~ trt + log(bili) + stage + + strata(endpoint), cluster=id, data=udca2) > > > > > cleanEx() > nameEx("untangle.specials") > ### * untangle.specials > > flush(stderr()); flush(stdout()) > > ### Name: untangle.specials > ### Title: Help Process the 'specials' Argument of the 'terms' Function. > ### Aliases: untangle.specials > ### Keywords: survival > > ### ** Examples > > formula <- Surv(tt,ss) ~ x + z*strata(id) > tms <- terms(formula, specials="strata") > ## the specials attribute > attr(tms, "specials") $strata [1] 4 > ## main effects > untangle.specials(tms, "strata") $vars [1] "strata(id)" $tvar [1] 3 $terms [1] 3 > ## and interactions > untangle.specials(tms, "strata", order=1:2) $vars [1] "strata(id)" $tvar [1] 3 $terms [1] 3 4 > > > > cleanEx() > nameEx("uspop2") > ### * uspop2 > > flush(stderr()); flush(stdout()) > > ### Name: uspop2 > ### Title: Projected US Population > ### Aliases: uspop2 > ### Keywords: datasets > > ### ** Examples > > us50 <- uspop2[51:101,, "2000"] #US 2000 population, 50 and over > age <- as.integer(dimnames(us50)[[1]]) > smat <- model.matrix( ~ factor(floor(age/5)) -1) > ustot <- t(smat) %*% us50 #totals by 5 year age groups > temp <- c(50,55, 60, 65, 70, 75, 80, 85, 90, 95) > dimnames(ustot) <- list(c(paste(temp, temp+4, sep="-"), "100+"), + c("male", "female")) > > > > cleanEx() > nameEx("xtfrm.Surv") > ### * xtfrm.Surv > > flush(stderr()); flush(stdout()) > > ### Name: xtfrm.Surv > ### Title: Sorting order for Surv objects > ### Aliases: xtfrm.Surv sort.Surv order.Surv > ### Keywords: survival > > ### ** Examples > > test <- c(Surv(c(10, 9,9, 8,8,8,7,5,5,4), rep(1:0, 5)), Surv(6.2, NA)) > test [1] 10.0 9.0+ 9.0 8.0+ 8.0 8.0+ 7.0 5.0+ 5.0 4.0+ 6.2? > sort(test) [1] 4+ 5 5+ 7 8 8+ 8+ 9 9+ 10 > > > > cleanEx() > nameEx("yates") > ### * yates > > flush(stderr()); flush(stdout()) > > ### Name: yates > ### Title: Population prediction > ### Aliases: yates > ### Keywords: models survival > > ### ** Examples > > fit1 <- lm(skips ~ Solder*Opening + Mask, data = solder) > yates(fit1, ~Opening, population = "factorial") Opening pmm std test chisq df ss Pr L 3.2638 0.33460 global 573.4 2 15980 < 1e-08 M 3.5700 0.30480 S 12.3519 0.31251 > > fit2 <- coxph(Surv(time, status) ~ factor(ph.ecog)*sex + age, lung) > yates(fit2, ~ ph.ecog, predict="risk") # hazard ratio factor(ph.ecog) pmm std test chisq df Pr 0 0.94238 0.46334 factor(ph.ecog) NA NA NA 1 1.42677 0.75697 2 1.74848 3.80017 3 NA 74.01221 > > > > ### *