Hmisc/ 0000755 0001760 0000144 00000000000 12267773234 011362 5 ustar ripley users Hmisc/COPYING 0000644 0001760 0000144 00000001362 12243661443 012407 0 ustar ripley users ## Copyright (C) 2001 Frank E Harrell Jr
##
## This program is free software; you can redistribute it and/or modify it
## under the terms of the GNU General Public License as published by the
## Free Software Foundation; either version 2, or (at your option) any
## later version.
##
## These functions are distributed in the hope that they will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## The text of the GNU General Public License, version 2, is available
## as http://www.gnu.org/copyleft or by writing to the Free Software
## Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
##
Hmisc/inst/ 0000755 0001760 0000144 00000000000 12243707761 012333 5 ustar ripley users Hmisc/inst/WISHLIST 0000644 0001760 0000144 00000000000 12243661443 013506 0 ustar ripley users Hmisc/inst/CHANGELOG 0000644 0001760 0000144 00000000065 12243707760 013545 0 ustar ripley users See https://github.com/harrelfe/Hmisc/commits/master
Hmisc/inst/NEWS 0000644 0001760 0000144 00000000000 12243661443 013014 0 ustar ripley users Hmisc/inst/todo 0000644 0001760 0000144 00000000250 12243661443 013214 0 ustar ripley users Make latex use options(latexcmd, dvipscmd)
See if R mailbox has generalization of var.inner in
model.frame.default.s
Check arguments to .C("loess_raw") in wtd.stats.s Hmisc/inst/THANKS 0000644 0001760 0000144 00000000144 12243661443 013241 0 ustar ripley users Greg Snow for providing the subplot function and documentations
Greg Snow, Ph.D.
greg.snow@ihc.com
Hmisc/tests/ 0000755 0001760 0000144 00000000000 12262167671 012521 5 ustar ripley users Hmisc/tests/testexportlib.r 0000644 0001760 0000144 00000000066 12243661443 015611 0 ustar ripley users library(Hmisc)
d <- sasxport.get('csv', method='csv')
Hmisc/tests/dataload.r 0000644 0001760 0000144 00000001133 12243661443 014446 0 ustar ripley users # Compare dataload with Stat/Transfer, the latter producing binary S+
# data frames and the former binary R data frames
library(Hmisc)
setwd('/tmp')
ds <- list.files('~/projects/consulting/gsk/REDUCE/oct03/data/sass')
for(i in 1:1) {
sys(paste('dataload fh10sep.xpt z.rda',ds[i]))
load('z.rda')
s <-
read.S(paste('~/projects/consulting/gsk/REDUCE/oct03/data/sass',
ds[i] sys(paste('dataload fh10sep.xpt z.rda',ds[i]))
load('z.rda')
s <-
read.S(paste('~/projects/consulting/gsk/REDUCE/oct03/data/sass',
ds[i],sep='/'))
,sep='/'))
}
Hmisc/tests/test.rda 0000644 0001760 0000144 00000000544 12243661443 014166 0 ustar ripley users RDX2
X
test @ @ @> @? Aìh AïH Aìk,"äPAïK;æ5Û A ¾8ãŽA ¾âÌ names RACE AGE D1 DT1 T1 class
data.frame row.names 1 2 þ þ Hmisc/tests/summary.formula.r 0000644 0001760 0000144 00000000613 12243661443 016040 0 ustar ripley users library(Hmisc)
getHdata(titanic3)
g <- function(x) c(Mean=mean(x,na.rm=TRUE), N=sum(!is.na(x)))
with(titanic3, tapply(age, llist(sex,pclass), g))
g <- function(x) c(Mean=apply(x, 2, mean, na.rm=TRUE),
N=apply(x, 2, function(w)sum(!is.na(w))))
options(digits=3)
summary(cbind(age,fare) ~ sex + pclass, method='cross', fun=g, data=titanic3)
with(titanic3, g(cbind(age,fare)))
Hmisc/tests/aregImpute2.r 0000644 0001760 0000144 00000006600 12243661443 015065 0 ustar ripley users library(rms)
source('/tmp/hmisc.s')
set.seed(4)
n <- c(20000,2000,200)[1]
x2 <- rnorm(n)
x1 <- sqrt(.5) * x2 + rnorm(n, sd=sqrt(1-.5))
y <- 1 * x1 + 1 * x2 + rnorm(n)
type <- c('mcar','mar.x2')[2]
x1m <- if(type=='mcar') ifelse(runif(n) < .5, x1, NA) else
ifelse(rnorm(n,sd=.8) < x2, x1, NA) # MAR on x2, R2 50%, 50% missing
coef(ols(y ~ x1+x2))
coef(ols(y ~ x1m + x2))
Ecdf(x1)
Ecdf(x1m, lty=2, add=TRUE)
Ecdf(x1[is.na(x1m)], lty=2, lwd=3, add=TRUE)
plot(x2, x1m)
plsmo(x2, is.na(x1m), datadensity=TRUE)
dd <- datadist(x2,x1m)
options(datadist='dd')
f <- lrm(is.na(x1m) ~ rcs(x2,4))
plot(f, x2=NA, fun=plogis)
d <- data.frame(x1,x1m,x2,y)
# Find best-validating (in terms of bootstrap R^2) value of nk
g <- aregImpute(~ y + x1m + x2, nk=c(0,3:5), data=d)
g
# nk=0 is best with respect to mean and median absolute deviations
# Another good model is one that forces the target variable (x1m) to
# be transformed linearly using tlinear=TRUE
g <- aregImpute(~y + x1m + x2, nk=0, n.impute=5, data=d, pr=F,
type=c('pmm','regression')[1], plotTrans=FALSE)
s <- is.na(x1m)
c(mean(g$imputed$x1), mean(x1[s]))
ix1 <- g$imputed$x1[,5]
x1i <- x1m
x1i[s] <- ix1
rcorr(cbind(x1,x2,y)[s,])
rcorr(cbind(x1i,x2,y)[s,])
# allowing x1 to be nonlinearly transformed seems to increase the
# correlation between imputed x1 and x2 and imputed x1 and y,
# in addition to variance of imputed values increasing
f <- fit.mult.impute(y ~ x1m + x2, ols, xtrans=g, data=d, pr=F)
coef(f)
g2 <- g
g1 <- g
Ecdf(g1)
Ecdf(g2, add=TRUE, col='blue')
# For MARx2, pmm works reasonably well when nk=3, regression doesn't
# both work well when nk=0
# For MCAR, pmm works well when nk=3, regression works moderately
# well but imputed values have higher variance than real x1 values
# when x1m is missing, and coefficient of x2 on y is 0.92 when n=20000
# Did not get worse by setting nk=6
# Regression imputation works fine with nk=6 with ~y+I(x1m)+x2
# Problem with I(y)+x1m+I(x2)
plot(g)
Ecdf(x1, add=TRUE, col='blue')
Ecdf(x1m, lty=2, add=TRUE)
Ecdf(x1[is.na(x1m)], lty=2, lwd=3, add=TRUE)
# Look at distribution of residuals from areg for various nk
s <- !is.na(x1m)
f <- lm.fit.qr.bare(cbind(y,x2)[s,],x1m[s])
Ecdf(resid(f), lwd=2, col='gray')
py <- f$fitted.values
ry <- resid(f)
g <- areg(cbind(y,x2), x1m, nk=6, xtype=rep('l',2))
p <- g$linear.predictors
r <- resid(g)
Ecdf(r, add=TRUE, col='blue')
plot(py, p)
coef(lm.fit.qr.bare(py,p))
plot(ry,r)
coef(lm.fit.qr.bare(ry,r))
cor(ry,r)
sd(ry)
sd(r)
pr <- predict(g,cbind(x1,x2))
pr2 <- g$linear.predictors
describe(pr-pr2)
Pr <- fitted(f)
plot(Pr,pr)
coef(lm.fit.qr.bare(Pr,pr))
obs.trans <- pr + r
plot(obs.trans, y)
w <- lm.fit.qr.bare(obs.trans,y)
coef(w)
w$rsquared
# Strip out aregImpute code for regression imputation, force linearity,
# no bootstrap, x1 is only variable with NAs
ai <- function(x1, x2, y) {
n <- length(x1)
na <- (1:n)[is.na(x1)]
nna <- length(na)
j <- (1:n)[-na]
f <- lm.fit.qr.bare(cbind(y,x2)[j,], x1[j])
prn(coef(f))
# Predicted mean x1 for only those that missing:
predx1 <- matxv(cbind(y,x2)[na,], coef(f))
Ecdf(predx1, add=TRUE, col='blue')
res <- f$residuals
rp <- length(na) > length(res)
px1 <- predx1 + sample(res, length(na), replace=rp)
px1e <- approxExtrap(f$fitted.values, f$fitted.values, xout=px1)$y
print(describe(abs(px1-px1e)))
Ecdf(px1e, add=TRUE, col='green')
x1[na] <- px1e
x1
}
x1i <- ai(x1m, x2, y)
ols(y ~ x1i + x2)
Hmisc/tests/wtd.r 0000644 0001760 0000144 00000000673 12243661443 013503 0 ustar ripley users # Jose.M.Pavia@uv.es
require(Hmisc)
PerCapita <- c(10, 20, 30, 20, 20, 40)
Group <- c( "A", "B", "B", "A", "A", "B")
W <- c(1.5, 2.3, 4.5, 2.6, 1.7, 3.9)
## Works
wtd.mean(PerCapita, weights=W)
wtd.quantile(PerCapita, weights=W)
wtd.mean(PerCapita[Group=="A"], weights=W[Group=="A"])
wtd.mean(PerCapita[Group=="B"], weights=W[Group=="B"])
g <- function(y) wtd.mean(y[,1],y[,2])
summarize(cbind(PerCapita, W), llist(Group), g, stat.name='y')
Hmisc/tests/largest.empty.r 0000644 0001760 0000144 00000011351 12243661443 015476 0 ustar ripley users library(Hmisc)
par(mfrow=c(2,2))
w <- 2
for(i in 1:4) {
if(w==1) {
y <- exp(rnorm(20))
} else {
x <- rnorm(20)
y <- rnorm(20)
plot(x, y)
z <- list(x=x, y=y)
}
for(m in c('maxdim','area'))
{
for(numbins in c(25,100))
{
u <- largest.empty(z$x, z$y, pl=TRUE,
height=.05*diff(range(z$x)),
width =.05*diff(range(z$y)),
method=m, numbins=numbins)
text(u, labels=m, adj=.5)
if(w==2) points(z)
}
}
}
par(mfrow=c(1,1))
set.seed(1)
x <- rnorm(1000); y <- rnorm(1000)
plot(x,y)
for(m in c('area', 'rexhaustive', 'exhaustive')) {
cat('Method:', m, '\n')
print(system.time(largest.empty(x, y,
width=1.5, height=.5,
method=m, pl=TRUE)))
}
comp <- function(a, b) {
i <- identical(a,b)
if(!i) print(cbind(a,b))
i
}
for(i in 1:70) {
cat(i,'\n')
set.seed(i)
n <- sample(8:800, 1)
x <- runif(n); y <- runif(n)
plot(x, y)
xl <- range(pretty(x)); yl <- range(pretty(y))
a <- largest.empty(x, y, xlim=xl, ylim=yl, width=.03, height=.03,
method='rexhaustive', pl=TRUE)
b <- largest.empty(x, y, xlim=xl, ylim=yl, width=.03, height=.03,
method='exhaustive', pl=TRUE)
comp(a[Cs(x,y,area)], b[Cs(x,y,area)])
comp(a$rect$x, b$rect$x)
comp(a$rect$y, b$rect$y)
}
par(mfrow=c(2,2))
N <- 100; set.seed(8237)
for(i in 1:4) {
x <- runif(N); y <- runif(100)
plot(x, y, pch="+", xlim=c(0,1), ylim=c(0,1), col="darkgray")
for(m in c('area', 'rexhaustive', 'exhaustive')) {
z <- largest.empty(x, y, 0.075, 0.075, pl=TRUE, numbins=100,
xlim=c(0,1), ylim=c(0,1), method=m)
cat(m, 'largest.empty Area:', z$area, '\n')
print(cbind(z$rect$x, z$rect$y))
}
}
if(FALSE) {
z <- Ecdf(y)
points(lr(z$x, z$y, width=1.5, height=.05, pl=0, numbins=20))
lr <- function(x, y, xlim=par('usr')[1:2], ylim=par('usr')[3:4],
width, height, numbins=25, pl=1)
{
area <- 0
xinc <- diff(xlim)/numbins
yinc <- diff(ylim)/numbins
i <- 1
j <- 0
for(xl in seq(xlim[1], xlim[2]-width, by=xinc))
{
for(yl in seq(ylim[1],ylim[2]-height, by=yinc))
{
j <- j + 1
if(j > 500) stop()
xr <- if(any(x >= xl & y >= yl)) min(x[x >= xl & y >= yl])
else xlim[2]
yu <- if(any(y >= yl & x >= xl)) min(y[y >= yl & x >= xl])
else ylim[2]
if(pl==1)
{
## Ecdf(Y)
i <- i + 1
if(i > 8) i <- 2
polygon(c(xl,xr,xr,xl),c(yl,yl,yu,yu), col=i)
}
ar <- (yu-yl)*(xr-xl)
if(ar > area)
{
area <- ar
x1 <- xl
x2 <- xr
y1 <- yl
y2 <- yu
if(pl==2)
{
i <- i + 1
if(i > 8) i <- 2
polygon(c(x1,x2,x2,x1),c(y1,y1,y2,y2), col=i)
}
}
}
}
list(x=mean(c(x1,x2)), y=mean(c(y1,y2)))
}
lr <- function(x, y, xlim=par('usr')[1:2], ylim=par('usr')[3:4],
width, height, numbins=25, pl=0)
{
area <- 0
xinc <- diff(xlim)/numbins
yinc <- diff(ylim)/numbins
i <- 1
for(xl in seq(xlim[1], xlim[2]-width, by=xinc))
{
for(yl in seq(ylim[1],ylim[2]-height, by=yinc))
{
for(xr in seq(xl+width,xlim[2],by=xinc))
{
for(yu in seq(yl+height,ylim[2],by=yinc))
{
if(any(x >= xl & x <= xr & y >= yl & y <= yu)) break
if(pl==1)
{
Ecdf(Y)
polygon(c(xl,xr,xr,xl),c(yl,yl,yu,yu), col=2)
}
## if(!any(x >= xl & x <= xr & y >= yl & y <= yu))
{
ar <- (yu-yl)*(xr-xl)
if(ar > area)
{
area <- ar
x1 <- xl
x2 <- xr
y1 <- yl
y2 <- yu
if(pl==2)
{
i <- i + 1
if(i > 8) i <- 2
polygon(c(x1,x2,x2,x1),c(y1,y1,y2,y2), col=i)
}
}
}
}
}
}
}
list(x=mean(c(x1,x2)), y=mean(c(y1,y2)))
}
}
Hmisc/tests/areg.s 0000644 0001760 0000144 00000003405 12243661443 013620 0 ustar ripley users # Tests for parametric version of ace in acepack
set.seed(1)
library(Hmisc)
source('~/R/test/parAce.s')
ns <- c(30,300,3000,10000)
for(n in ns) {
y <- sample(1:5,n,TRUE)
x <- abs(y-3) + runif(n)
par(mfrow=c(4,3))
for(k in c(0,3:5)) {
z <- parAce(x,y,xtype='spline',ytype='cat',k=k)
plot(x, z$tx)
title(paste('R2=',format(z$rsquared)))
tapply(z$ty, y, range)
a <- tapply(x,y,mean)
b <- tapply(z$ty,y,mean)
plot(a,b)
abline(lsfit(a,b))
# Should get same result to within linear transformation if reverse x and y
w <- parAce(y,x,xtype='cat',ytype='spline',k=k)
plot(z$ty, w$tx)
title(paste('R2=',format(w$rsquared)))
abline(lsfit(z$ty, w$tx))
}
if(n < max(ns)) {cat('Press enter to continue:');readline()}
}
# Example where one category in y differs from others but only in variance of x
n <- 50
y <- sample(1:5,n,TRUE)
x <- rnorm(n)
x[y==1] <- rnorm(sum(y==1), 0, 5)
z <- parAce(x,y,xtype='lin',ytype='cat')
summary(z)
plot(z)
z <- parAce(x,y,xtype='spline',ytype='cat',k=4)
summary(z)
plot(z)
par(mfrow=c(1,2))
for(n in c(200,2000)) {
x <- rnorm(n); y <- rnorm(n) + x
z <- parAce(x,y,xtype='spline',ytype='spline',k=5)
plot(x, z$x)
plot(y, z$y)
title(n)
readline()
}
n <- 200
x1 <- rnorm(n); x2 <- rnorm(n); y <- rnorm(n) + x1^2
z <-
parAce(cbind(x1,x2),y,xtype=c('spline','lin'),ytype='spline',k=3)
par(mfrow=c(2,2))
plot(x1, z$x[,1])
plot(x2, z$x[,2])
plot(y, z$y)
n <- 5000
x1 <- rnorm(n); x2 <- rnorm(n); y <- (x1 + rnorm(n))^2
z <-
parAce(cbind(x1,x2),y,xtype=c('spline','spline'),ytype='spline',k=5)
par(mfrow=c(2,2))
plot(x1, z$x[,1])
plot(x2, z$x[,2])
plot(y, z$y)
n <- 10000
x <- matrix(runif(n*20),n,20)
y <- rnorm(n)
z <- parAce(x,y,xtype=rep('spline',20),ytype='spline',k=5)
Hmisc/tests/readsasxml.r 0000644 0001760 0000144 00000001263 12243661443 015044 0 ustar ripley users w <- xmlTreeParse('demo.xml')
z <- w$doc$children$Transfer$children$Catalog$children$CatalogSchemas$children$Schema$children$SchemaTables$children
u <- z[[length(z)]]$children
v <- matrix(unlist(u), nrow=length(u), byrow=T)
v[,seq(3,ncol(v),by=4)][1,]
[1] "#_4" "#_5" "#_6" "#_7" "#_8" "#_9"
v[,seq(5,ncol(v),by=4)]
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] "111" "ABC" "27" "1" "2" "1976-04-22"
[2,] "222" "XYX" "35.2" "2" "1" "1968-02-10"
[3,] "333" "WHO" "19" "1" "1" "1984-04-20"
[4,] "444" "WHY" "45.7" "1" "3" "1957-08-14"
[5,] "555" "HUH" "82" "2" "3" "1921-05-06"
Process variables one column of v at a time, converting appropriate
ones to numeric.
Hmisc/tests/aregImpute.r 0000644 0001760 0000144 00000007200 12243661443 015000 0 ustar ripley users ## See Paul T von Hippel, The American Statistician 58:160-164, 2004
mvector <- c(0,0)
msigma <- matrix(c(1,0.5,0.5,1), nrow=2)
library(mvtnorm)
library(Hmisc)
# XZ <- rmvnorm(1000, mvector, msigma)
mvrnorm <- function(n, p = 1, u = rep(0, p), S = diag(p)) {
Z <- matrix(rnorm(n * p), p, n)
t(u + t(chol(S)) %*% Z)
}
XZ <- mvrnorm(1000, 2, mvector, msigma)
U <- rnorm(1000)
Y <- XZ[,1]+XZ[,2]+U
summary(lm(Y ~ XZ))
X <- XZ[,1]
Z <- XZ[,2]
Z.ni <- Z
type <- c('random','X<0','Y<0','Z<0')[3]
i <- switch(type,
random= runif(1000) < .5,
'X<0' = X<0,
'Y<0' = Y<0,
'Z<0' = Z<0)
Zna <- Z
Zna[i] <- NA
summary(lm(Y ~ X + Zna))
#w <- aregImpute(~monotone(Y)+monotone(X)+monotone(Zna))
#w <- aregImpute(~I(Y)+I(X)+I(Zna),fweight=.75)
w <- aregImpute(~monotone(Y)+monotone(X)+monotone(Zna), n.impute=5,
type='regression')
plot(w)
ecdf(Zna, add=T, col='red')
ecdf(Z, add=T, col='green')
# plot(w$imputed$Zna, Z[is.na(Zna)]) # use if n.impute=1
# abline(a=0,b=1,lty=2)
# lm(Z[is.na(Zna)] ~ w$imputed$Zna)
coef(fit.mult.impute(Y~X+Zna, lm, w, data=data.frame(X,Zna,Y),pr=F))
#--------------------------------------------------------------------
## From Ewout Steyerberg
# Missing values: illustrate MCAR, MAR, MNAR mechanism
# linear models
library(rms)
## 1. x1 and x2 with y1 outcome
## A) X only
## B) X+Y
#########################
### Test Imputation ###
### use aregImpute in default settings
#########################
n <- 20000 # arbitrary sample size
x2 <- rnorm(n=n, mean=0, sd=1) # x2 standard normal
x1 <- sqrt(.5) * x2 + rnorm(n=n, mean=0, sd=sqrt(1-.5)) # x2 correlated with x1
y1 <- 1 * x1 + 1 * x2 + rnorm(n=n, mean=0, sd=sqrt(1-0)) # generate y
# var of y1 larger with correlated x1 - x2
x1MCAR <- ifelse(runif(n) < .5, x1, NA) # MCAR mechanism for 50% of x1
x1MARx <- ifelse(rnorm(n=n,sd=.8) < x2, x1, NA) # MAR on x2, R2 50%, 50% missing (since mean x2==0)
x1MARy <- ifelse(rnorm(n=n,sd=(sqrt(3)*.8)) >y1, x1, NA) # MAR on y, R2 50%, 50% missing (since mean y1==0)
# x1MNAR <- ifelse(rnorm(n=n,sd=.8) < x1, x1, NA) # MNAR on x1, R2 50%, 50% missing (since mean x1==0)
x1MNAR <- ifelse(rnorm(n=n,sd=.8) < x1, x1, NA) # MNAR on x1, R2 50%, 50% missing (since mean x1==0)
plot(x2, x1MARx)
plsmo(x2, is.na(x1MARx), datadensity=TRUE)
dd <- datadist(x2,x1MARx)
options(datadist='dd')
f <- lrm(is.na(x1MARx) ~ rcs(x2,4))
plot(f, x2=NA, fun=plogis)
d <- data.frame(y1,x1,x2,x1MCAR, x1MARx,x1MARy,x1MNAR)
ols(y1~x1+x2)
ols(y1 ~ x1MARx + x2)
# MAR on x: 3 approaches; CC, MI with X, MI with X+Y
g <- aregImpute(~I(y1) + I(x1MARx) + I(x2), n.impute=5, data=d, pr=F,
type=c('pmm','regression')[1], match='closest', plotTrans=TRUE)
plot(g)
Ecdf(x1, add=TRUE, col='red',q=.5)
Ecdf(x1MARx, add=TRUE, col='blue',q=.5)
f <- fit.mult.impute(y1 ~ x1MARx + x2, ols, xtrans=g, data=d, pr=F)
g <- aregImpute(~y1 + x1MARx + x2, n.impute=5, data=d, pr=F, type='regression', plotTrans=TRUE)
f <- fit.mult.impute(y1 ~ x1MARx + x2, ols, xtrans=g, data=d, pr=F)
# MAR on y: 3 approaches; CC, MI with X, MI with X+Y
f <- ols(y1~x1MARy+x2)
Mat.imputation[i,29:32] <- c(coef(f)[2:3], sqrt(diag(Varcov(f)))[2:3])
g <- aregImpute(~x1MARy + x2, n.impute=5, data=d, pr=F, type='regression')
f <- fit.mult.impute(y1 ~ x1MARy + x2, ols, xtrans=g, data=d, pr=F)
Mat.imputation[i,33:36] <- c(coef(f)[2:3], sqrt(diag(Varcov(f)))[2:3])
g <- aregImpute(~y1 + x1MARy + x2, n.impute=5, data=d, pr=F, type='regression')
f <- fit.mult.impute(y1 ~ x1MARy + x2, ols, xtrans=g, data=d, pr=F)
Mat.imputation[i,37:40] <- c(coef(f)[2:3], sqrt(diag(Varcov(f)))[2:3])
Hmisc/tests/consolidate.R 0000644 0001760 0000144 00000004616 12243661443 015152 0 ustar ripley users library(Hmisc)
named.equal <- function(x,y) {
x.names <- sort(names(x))
y.names <- sort(names(y))
if(!identical(x.names, y.names)) {
cat("x names: ", paste(x.names, consolidate=', '), "\ny names: ", paste(y.names, consolidate=', '), sep='')
stop("x and y do not have the same element names")
}
if(any(x.names == "") || any(y.names == "")) {
cat("x names: ", paste(x.names, consolidate=', '), "\ny names: ", paste(y.names, consolidate=', '), sep='')
stop("x or y has unnamed elements")
}
if(!identical(x[x.names], y[x.names])) {
print(x)
print(y)
stop("x and y do not have identical element values")
}
return(TRUE)
}
a <- c(a = 5, b = 2, c = 4)
b <- c(c = 3, d = 4, e = 12)
c <- list(q = 5, h = 2, b = 14)
d <- list(w = 2, h = 3, e = 21)
a1 <- structure(c(5, 2, 3, 4, 12),
.Names = c("a", "b", "c", "d", "e"))
a2 <- structure(list(a = 5, b = 14, c = 4, q = 5, h = 2),
.Names = c("a", "b", "c", "q", "h"))
a3 <- structure(list(q = 5, h = 2, b = 2, a = 5, c = 4),
.Names = c("q", "h", "b", "a", "c"))
a4 <- structure(list(q = 5, h = 3, b = 14, w = 2, e = 21),
.Names = c("q", "h", "b", "w", "e"))
a5 <- structure(c(5,2,4,4,12),
.Names = c("a", "b", "c", "d", "e"))
a6 <- structure(list(a = 5, b = 2, c = 4, q = 5, h = 2),
.Names = c("a", "b", "c", "q", "h"))
a7 <- structure(list(q = 5, h = 2, b = 14, a = 5, c = 4),
.Names = c("q", "h", "b", "a", "c"))
a8 <- structure(list(q = 5, h = 2, b = 14, w = 2, e = 21),
.Names = c("q", "h", "b", "w", "e"))
r1 <- consolidate(a, b, protect=FALSE)
r2 <- consolidate(a, c, protect=FALSE)
r3 <- consolidate(c, a, protect=FALSE)
r4 <- consolidate(c, d, protect=FALSE)
is.vector(r1)
is.list(r2)
is.list(r3)
is.list(r4)
named.equal(r1, a1)
named.equal(r2, a2)
named.equal(r3, a3)
named.equal(r4, a4)
r5 <- consolidate(a, b, protect=TRUE)
r6 <- consolidate(a, c, protect=TRUE)
r7 <- consolidate(c, a, protect=TRUE)
r8 <- consolidate(c, d, protect=TRUE)
named.equal(r5, a5)
named.equal(r6, a6)
named.equal(r7, a7)
named.equal(r8, a8)
named.equal(r3, r6)
named.equal(r2, r7)
e <- a
consolidate(e) <- b
named.equal(e, r1)
e <- a
consolidate(e, protect = TRUE) <- b
named.equal(e, r5)
f <- c(1,2,3,5)
consolidate(attributes(f)) <- c
named.equal(attributes(f), c)
consolidate(attributes(f)) <- NULL
named.equal(attributes(f), c)
Hmisc/tests/summaryP.r 0000644 0001760 0000144 00000003167 12254600622 014515 0 ustar ripley users require(Hmisc)
n <- 100
f <- function(na=FALSE) {
x <- sample(c('N', 'Y'), n, TRUE)
if(na) x[runif(100) < .1] <- NA
x
}
set.seed(1)
d <- data.frame(x1=f(), x2=f(), x3=f(), x4=f(), x5=f(), x6=f(), x7=f(TRUE),
age=rnorm(n, 50, 10),
race=sample(c('Asian', 'Black/AA', 'White'), n, TRUE),
sex=sample(c('Female', 'Male'), n, TRUE),
treat=sample(c('A', 'B'), n, TRUE),
region=sample(c('North America','Europe'), n, TRUE))
d <- upData(d, labels=c(x1='MI', x2='Stroke', x3='AKI', x4='Migraines',
x5='Pregnant', x6='Other event', x7='MD withdrawal',
race='Race', sex='Sex'))
dasna <- subset(d, region=='North America')
with(dasna, table(race, treat))
png('/tmp/summaryP.png', width=550, height=550)
s <- summaryP(race + sex + yn(x1, x2, x3, x4, x5, x6, x7, label='Exclusions') ~
region + treat, data=d)
# add exclude1=FALSE to include female category
plot(s, val ~ freq | region * var, groups=treat) # best looking
dev.off()
plot(s, groups=treat)
# plot(s, groups=treat, outerlabels=FALSE) for standard lattice output
plot(s, groups=region, key=list(columns=2, space='bottom'))
plot(summaryP(race + sex ~ region, data=d, exclude1=FALSE), col='green')
# Make your own plot using data frame created by summaryP
dotplot(val ~ freq | region * var, groups=treat, data=s,
xlim=c(0,1), scales=list(y='free', rot=0), xlab='Fraction',
panel=function(x, y, subscripts, ...) {
denom <- s$denom[subscripts]
x <- x / denom
panel.dotplot(x=x, y=y, subscripts=subscripts, ...) })
Hmisc/tests/howto.html 0000644 0001760 0000144 00000002202 12243661443 014536 0 ustar ripley users
How to Create SAS Transport Files
How to Create SAS Transport Files
- If any of the datasets you are exporting are not already in the
WORK library, copy them to there:
PROC COPY IN=mylib OUT=WORK; SELECT test1 test2; RUN;
- If you have created value label formats using PROC FORMAT;
VALUE ..., output these value labels into a SAS
dataset:
PROC FORMAT CNTLOUT=format;RUN;
- Define a LIBNAME to reference the SAS Version 5 transport
file engine:
libname xp SASV5XPT "test.xpt";
- Copy all needed datasets to, e.g., test.xpt:
PROC COPY IN=work OUT=xp;SELECT test1 test2 format;RUN;
DO NOT use PROC CPORT to create the file.
Frank E Harrell Jr
Last modified: Fri Jun 6 15:47:58 EDT 2003
Hmisc/tests/ace.s 0000644 0001760 0000144 00000001256 12243661443 013434 0 ustar ripley users # Verify that ace works for categorical response variable, giving
# a y-transformation that is a linear translation of Fisher's optimum scores
# (y-specific mean of x) when there is one predictor that is forced to
# be linear. For now using aregImpute's override of ace
library(acepack)
set.seed(1)
y <- rep(1:3,100)
x <- -3*(y==1) -7*(y==2) + 30*(y==3) + runif(300) - .5
xbar <- tapply(as.matrix(x), y, mean)
xbar
1 2 3
-3.010843 -7.021050 30.002227
z <- ace(x, y, cat=0, lin=1)
table(y, z$ty)
-0.82366 -0.583755 1.40741
1 0 100 0
2 100 0 0
3 0 0 100
plot(xbar[y], z$ty)
cor(xbar[y], z$ty)
[1] 1
Hmisc/tests/test.xml 0000644 0001760 0000144 00000021374 12243661443 014224 0 ustar ripley users
111
ABC
27
1
2
1976-04-22
222
XYX
35.2
2
1
1968-02-10
333
WHO
19
1
1
1984-04-20
444
WHY
45.7
1
3
1957-08-14
555
HUH
82
2
3
1921-05-06
Hmisc/tests/latexTherm.r 0000644 0001760 0000144 00000001443 12243661443 015016 0 ustar ripley users # Usage: After running R, run latex on /tmp/z.tex
require(Hmisc)
source('~/R/Hmisc/R/latexTherm.s')
f <- '/tmp/lt.tex'
cat('', file='/tmp/z.tex'); cat('', file=f)
ct <- function(...) cat(..., sep='', file='/tmp/z.tex', append=TRUE)
ct('\\documentclass{report}\\begin{document}\n')
latexTherm(c(1, 1, 1, 1), name='lta', file=f)
latexTherm(c(.5, .7, .4, .2), name='ltb', file=f)
latexTherm(c(.5, NA, .75, 0), w=.3, h=1, name='ltc', extra=0, file=f)
latexTherm(c(.5, NA, .75, 0), w=.3, h=1, name='ltcc', file=f)
latexTherm(c(0, 0, 0, 0), name='ltd', file=f)
ct('\\input{/tmp/lt}\n')
ct('This is a the first:\\lta and the second:\\ltb\\\\ and the third without extra:\\ltc END\\\\\nThird with extra:\\ltcc END\\\\ \n\\vspace{2in}\\\\ \n')
ct('All data = zero, frame only:\\ltd\\\\')
ct('\\end{document}\n')
Hmisc/tests/summaryRc.r 0000644 0001760 0000144 00000001176 12243661443 014666 0 ustar ripley users require(Hmisc)
set.seed(177)
sex <- factor(sample(c("m","f"), 500, rep=TRUE))
age <- rnorm(500, 50, 5)
bp <- rnorm(500, 120, 7)
units(age) <- 'Years'; units(bp) <- 'mmHg'
label(bp) <- 'Systolic Blood Pressure'
L <- .5*(sex == 'm') + 0.1 * (age - 50)
y <- rbinom(500, 1, plogis(L))
png('/tmp/summaryRc.png', height=750)
spar(mfrow=c(3,2), top=2, cex.axis=1)
summaryRc(y ~ age + bp)
# For x limits use 1st and 99th percentiles to frame extended box plots
summaryRc(y ~ age + bp, bpplot='top', datadensity=FALSE, trim=.01)
summaryRc(y ~ age + bp + stratify(sex),
label.curves=list(keys='lines'), nloc=list(x=.1, y=.05))
dev.off()
Hmisc/tests/latex.s 0000644 0001760 0000144 00000000545 12243661443 014021 0 ustar ripley users require(Hmisc)
x <- cbind(x1=1:5, x2=2:6)
f <- '/tmp/z.tex'
cat('\\documentclass{article}\n\\usepackage{lscape,ctable,booktabs}\n\\begin{document}\n', file=f)
w <- latex(x, booktabs=TRUE, landscape=TRUE, file=f, append=TRUE)
w <- latex(x, ctable=TRUE, landscape=TRUE, file=f, append=TRUE)
cat('\\end{document}\n', file=f, append=TRUE)
# Run pdflatex /tmp/z
Hmisc/tests/test.xpt 0000644 0001760 0000144 00000002760 12243661443 014235 0 ustar ripley users HEADER RECORD*******LIBRARY HEADER RECORD!!!!!!!000000000000000000000000000000 SAS SAS SASLIB 8.2 AIX 20DEC02:12:34:2320DEC02:12:34:23 HEADER RECORD*******MEMBER HEADER RECORD!!!!!!!000000000000000001600000000140 HEADER RECORD*******DSCRPTR HEADER RECORD!!!!!!!000000000000000000000000000000 SAS TEST SASDATA 8.2 AIX 20DEC02:12:34:2320DEC02:12:34:23 HEADER RECORD*******NAMESTR HEADER RECORD!!!!!!!000000000500000000000000000000 RACE RACE AGE Age at Beginning of Study D1 MMDDYY
DT1 DATETIME T1 TIME HEADER RECORD*******OBS HEADER RECORD!!!!!!!000000000000000000000000000000 A B D<* HOQäÖ Dé A@ B D<† HOË1o Dž Hmisc/tests/latexTherm.Rnw 0000644 0001760 0000144 00000001272 12243661443 015323 0 ustar ripley users \documentclass{report}
\begin{document}
@
<>=
require(Hmisc)
knitrSet()
latexTherm(c(1, 1, 1, 1), name='lta')
latexTherm(c(.5, .7, .4, .2), name='ltb')
latexTherm(c(.5, NA, .75, 0), w=.3, h=1, name='ltc', extra=0)
latexTherm(c(.5, NA, .75, 0), w=.3, h=1, name='ltcc')
latexTherm(c(0, 0, 0, 0), name='ltd')
@
This is a the first:\lta and the second:\ltb\\ and the third without extra:\ltc END\\
Third with extra:\ltcc END\\
\vspace{2in}\\
All data = zero, frame only:\ltd
<>=
latexTherm(c(.5, .7, .4, .2), name='lte')
@
% Note that the period after figure is necessary
<>=
plot(runif(20))
@
\end{document}
Hmisc/tests/test.sas 0000644 0001760 0000144 00000001537 12243661443 014211 0 ustar ripley users libname x SASV5XPT "test.xpt";
libname y SASV5XPT "test2.xpt";
PROC FORMAT; VALUE race 1=green 2=blue 3=purple; RUN;
PROC FORMAT CNTLOUT=format;RUN;
data test;
LENGTH race 3 age 4;
age=30; label age="Age at Beginning of Study";
race=2;
d1='3mar2002'd ;
dt1='3mar2002 9:31:02'dt;
t1='11:13:45't;
output;
age=31;
race=4;
d1='3jun2002'd ;
dt1='3jun2002 9:42:07'dt;
t1='11:14:13't;
output;
format d1 mmddyy10. dt1 datetime. t1 time. race race.;
run;
data z; LENGTH x3 3 x4 4 x5 5 x6 6 x7 7 x8 8;
DO i=1 TO 100;
x3=ranuni(3);
x4=ranuni(5);
x5=ranuni(7);
x6=ranuni(9);
x7=ranuni(11);
x8=ranuni(13);
output;
END;
DROP i;
RUN;
PROC MEANS;RUN;
/* PROC CPORT LIB=work FILE='test.xpt';run; * no; */
PROC COPY IN=work OUT=x;SELECT test;RUN;
PROC COPY IN=work OUT=y;SELECT test format z;RUN;
Hmisc/tests/summaryS.r 0000644 0001760 0000144 00000011451 12262067342 014520 0 ustar ripley users require(Hmisc)
n <- 100
set.seed(1)
d <- data.frame(sbp=rnorm(n, 120, 10),
dbp=rnorm(n, 80, 10),
age=rnorm(n, 50, 10),
days=sample(1:n, n, TRUE),
S1=Surv(2*runif(n)), S2=Surv(runif(n)),
race=sample(c('Asian', 'Black/AA', 'White'), n, TRUE),
sex=sample(c('Female', 'Male'), n, TRUE),
treat=sample(c('A', 'B'), n, TRUE),
region=sample(c('North America','Europe'), n, TRUE),
meda=sample(0:1, n, TRUE), medb=sample(0:1, n, TRUE))
d <- upData(d, labels=c(sbp='Systolic BP', dbp='Diastolic BP',
race='Race', sex='Sex', treat='Treatment',
days='Time Since Randomization',
S1='Hospitalization', S2='Re-Operation',
meda='Medication A', medb='Medication B'),
units=c(sbp='mmHg', dbp='mmHg', age='years', days='days'))
Png <- function(z) png(paste('/tmp/summaryS', z, '.png', sep=''))
Png(1)
s <- summaryS(age + sbp + dbp ~ days + region + treat, data=d)
# plot(s) # 3 pages
plot(s, groups='treat', datadensity=TRUE,
scat1d.opts=list(lwd=.5, nhistSpike=0))
dev.off()
Png(2)
plot(s, groups='treat', panel=panel.loess,
key=list(space='bottom', columns=2),
datadensity=TRUE, scat1d.opts=list(lwd=.5))
dev.off()
# Show both points and smooth curves:
Png(3)
plot(s, groups='treat',
panel=function(...) {panel.xyplot(...); panel.loess(...)})
dev.off()
plot(s, y ~ days | yvar * region, groups='treat')
# Make your own plot using data frame created by summaryP
xyplot(y ~ days | yvar * region, groups=treat, data=s,
scales=list(y='free', rot=0))
# Use loess to estimate the probability of two different types of events as
# a function of time
s <- summaryS(meda + medb ~ days + treat + region, data=d)
pan <- function(...)
panel.plsmo(..., type='l', label.curves=max(which.packet()) == 1,
datadensity=TRUE)
Png(4)
plot(s, groups='treat', panel=pan, paneldoesgroups=TRUE,
scat1d.opts=list(lwd=.7), cex.strip=.8)
dev.off()
# Demonstrate dot charts of summary statistics
s <- summaryS(age + sbp + dbp ~ region + treat, data=d, fun=mean)
plot(s)
Png(5)
plot(s, groups='treat', funlabel=expression(bar(X)))
dev.off()
# Compute parametric confidence limits for mean, and include sample sizes
f <- function(x) {
x <- x[! is.na(x)]
c(smean.cl.normal(x, na.rm=FALSE), n=length(x))
}
s <- summaryS(age + sbp + dbp ~ region + treat, data=d, fun=f)
# Draw [ ] for lower and upper confidence limits in addition to thick line
Png(6)
plot(s, funlabel=expression(bar(X) %+-% t[0.975] %*% s),
pch.stats=c(Lower=91, Upper=93)) # type show.pch() to see defs.
dev.off()
Png(7)
plot(s, textonly='n', textplot='Mean', digits=1)
dev.off()
# Customize printing of statistics to use X bar symbol and smaller
# font for n=...
cust <- function(y) {
means <- format(round(y[, 'Mean'], 1))
ns <- format(y[, 'n'])
simplyformatted <- paste('X=', means, ' n=', ns, ' ', sep='')
s <- NULL
for(i in 1:length(ns)) {
w <- paste('paste(bar(X)==', means[i], ',~~scriptstyle(n==', ns[i],
'))', sep='')
s <- c(s, parse(text=w))
}
list(result=s,
longest=simplyformatted[which.max(nchar(simplyformatted))])
}
Png(8)
plot(s, groups='treat', cex.values=.65,
textplot='Mean', custom=cust,
key=list(space='bottom', columns=2,
text=c('Treatment A:','Treatment B:')))
dev.off()
## Stratifying by region and treat fit an exponential distribution to
## S1 and S2 and estimate the probability of an event within 0.5 years
f <- function(y) {
hazard <- sum(y[,2]) / sum(y[,1])
1. - exp(- hazard * 0.5)
}
s <- summaryS(S1 + S2 ~ region + treat, data=d, fun=f)
plot(s, groups='treat', funlabel='Prob[Event Within 6m]', xlim=c(.3, .7))
## Demonstrate simultaneous use of fun and panel
## First show the same quantile intervals used in panel.bppplot by
## default, stratified by region and day
d <- upData(d, days=round(days / 30) * 30)
g <- function(y) {
probs <- c(0.05, 0.125, 0.25, 0.375)
probs <- sort(c(probs, 1 - probs))
y <- y[! is.na(y)]
w <- hdquantile(y, probs)
m <- hdquantile(y, 0.5, se=TRUE)
se <- as.numeric(attr(m, 'se'))
c(Median=as.numeric(m), w, se=se, n=length(y))
}
s <- summaryS(sbp + dbp ~ days + region, fun=g, data=d)
Png(9)
plot(s, groups='region', panel=mbarclPanel, paneldoesgroups=TRUE)
dev.off()
## Show Wilson confidence intervals for proportions, and confidence
## intervals for difference in two proportions
g <- function(y) {
y <- y[!is.na(y)]
n <- length(y)
p <- mean(y)
se <- sqrt(p * (1. - p) / n)
structure(c(binconf(sum(y), n), se=se, n=n),
names=c('Proportion', 'Lower', 'Upper', 'se', 'n'))
}
s <- summaryS(meda + medb ~ days + region, fun=g, data=d)
Png(10)
plot(s, groups='region', panel=mbarclPanel, paneldoesgroups=TRUE)
dev.off()
Hmisc/tests/test.r 0000644 0001760 0000144 00000000045 12243661443 013655 0 ustar ripley users library(foreign)
w <- lookup.xport('
Hmisc/tests/hoeff.r 0000644 0001760 0000144 00000002156 12243661443 013772 0 ustar ripley users require(Hmisc)
.Fortran('jrank', as.double(1:5), as.double(1:5), 5L,
double(5), double(5), double(5))
hoeffd(1:6, c(1,3,2,4,5,6))
y <- 1:20; y[3] <- 17; y[17] <- 3
hoeffd(1:20, y)$D
set.seed(5)
x <- runif(800); y <- runif(800)
hoeffd(x,y)$D
for(n in c(50,100,200,400,1000)) {
set.seed(1)
x <- seq(-10,10,length=n)
y <- x*sign(runif(n,-1,1))
h <- hoeffd(x,y)
print(c(h$D[1,2], h$aad[1,2], h$maxad[1,2]))
}
#[1] 0.06812286 in old version (real*4 in places)
#[1] 0.04667929
#[1] 0.05657654
#[1] 0.07048487
#[1] 0.06323746
# From http://www.sciencemag.org/content/suppl/2011/12/14/334.6062.1518.DC1/Reshef.SOM.pdf
# Table S2: Definitions of functions used for Figure 2A in the Science article
w <- function(y) {
ylab <- deparse(substitute(y))
plot(x, y, ylab=substitute(y), type='l')
h <- hoeffd(x, y)
cat(ylab, '\n')
print(c(D=h$D[1,2],P=h$P[1,2],aDif=h$aad[1,2],mDif=h$maxad[1,2]))
}
x <- seq(0, 1, length=320)
par(mfrow=c(3,3))
w(x)
w(4*(x-.5)^2)
w(128*(x-1/3)^3 -48*(x-1/3)^2 - 12*(x-1/3) + 2)
w(10^(10*x) - 1)
w(sin(10*pi*x) + x)
w(sin(16*pi*x))
w(sin(13*pi*x))
w(sin(7*pi*x*(1+x)))
w(runif(320))
Hmisc/tests/csv/ 0000755 0001760 0000144 00000000000 12243661443 013307 5 ustar ripley users Hmisc/tests/csv/TEST.csv 0000755 0001760 0000144 00000000113 12243661443 014601 0 ustar ripley users race,age,d1,dt1,t1
2,30,15402,1330767062,40425
4,31,15494,1338716527,40453
Hmisc/tests/csv/_contents_.csv 0000755 0001760 0000144 00000001763 12243661443 016171 0 ustar ripley users MEMNAME,MEMLABEL,NAME,TYPE,LENGTH,LABEL,FORMAT,NOBS
FORMAT,,DATATYPE,2,8,Date/time/datetime?,,3
FORMAT,,DECSEP,2,1,Decimal separator,,3
FORMAT,,DEFAULT,1,3,Default length,,3
FORMAT,,DIG3SEP,2,1,Three-digit separator,,3
FORMAT,,EEXCL,2,1,End exclusion,,3
FORMAT,,END,2,16,Ending value for format,,3
FORMAT,,FILL,2,1,Fill character,,3
FORMAT,,FMTNAME,2,8,Format name,,3
FORMAT,,FUZZ,1,8,Fuzz value,,3
FORMAT,,HLO,2,11,Additional information,,3
FORMAT,,LABEL,2,6,Format value label,,3
FORMAT,,LANGUAGE,2,8,Language for date strings,,3
FORMAT,,LENGTH,1,3,Format length,,3
FORMAT,,MAX,1,3,Maximum length,,3
FORMAT,,MIN,1,3,Minimum length,,3
FORMAT,,MULT,1,8,Multiplier,,3
FORMAT,,NOEDIT,1,3,Is picture string noedit?,,3
FORMAT,,PREFIX,2,2,Prefix characters,,3
FORMAT,,SEXCL,2,1,Start exclusion,,3
FORMAT,,START,2,16,Starting value for format,,3
FORMAT,,TYPE,2,1,Type of format,,3
TEST,,age,1,4,Age at Beginning of Study,,2
TEST,,d1,1,8,,MMDDYY,2
TEST,,dt1,1,8,,DATETIME,2
TEST,,race,1,3,,RACE,2
TEST,,t1,1,8,,TIME,2
Hmisc/tests/csv/FORMAT.csv 0000755 0001760 0000144 00000000417 12243661443 015021 0 ustar ripley users FMTNAME,START,END,LABEL,MIN,MAX,DEFAULT,LENGTH,FUZZ,PREFIX,MULT,FILL,NOEDIT,TYPE,SEXCL,EEXCL,HLO,DECSEP,DIG3SEP,DATATYPE,LANGUAGE
RACE,1,1,green,1,40,6,6,1E-12,,0,,0,N,N,N,,,,,
RACE,2,2,blue,1,40,6,6,1E-12,,0,,0,N,N,N,,,,,
RACE,3,3,purple,1,40,6,6,1E-12,,0,,0,N,N,N,,,,,
Hmisc/tests/xYplotFilledBands.r 0000644 0001760 0000144 00000001151 12243661443 016264 0 ustar ripley users # This example uses the summarize function in Hmisc to
# compute the median and outer quartiles. The outer quartiles are
# displayed using "filled bands"
set.seed(111)
dfr <- expand.grid(month=1:12, year=c(1997,1998), reps=1:100)
month <- dfr$month; year <- dfr$year
y <- abs(month-6.5) + 2*runif(length(month)) + year-1997
s <- summarize(y, llist(month,year), smedian.hilow, conf.int=.5)
# filled bands: default fill = pastel colors matching solid colors
# in superpose.line (this works differently in R)
xYplot ( Cbind ( y, Lower, Upper ) ~ month, groups=year,
method="filled bands" , data=s, type="l")
Hmisc/tests/inverseFunction.r 0000644 0001760 0000144 00000002645 12243661443 016067 0 ustar ripley users library(Hmisc)
z <-
structure(list(x = c(-1.01157732356344, -0.844512148091014, -0.723389895873506,
-0.598091014269186, -0.518735055919784, -0.42684920940995, -0.347493251060548,
-0.263960663324335, -0.113602005399152, 0.195468569224836, 0.441889703046664,
0.746783648283841, 0.901318935595835, 0.947261858850752, 0.99738141149248
), y = c(-1.0034980323568, -0.861827721906428, -0.668211630957586,
-0.49820725841714, -0.309313511149978, -0.0920857017927416, 0.0637516397026673,
0.0920857017927417, 0.0212505465675558, -0.0826410144293835,
-0.0873633581110625, 0.0684739833843463, 0.517096633143857, 0.75321381722781,
0.894884127678181)), .Names = c("x", "y"))
library(rms)
dd <- datadist(as.data.frame(z)); options(datadist='dd')
f <- ols(y ~ rcs(x,5), data=z)
par(mfrow=c(1,2))
plot(f)
abline(v=c(-.1772,.31375))
points(z)
xx <- seq(-1,1,length=1000)
g <- Function(f)
h <- inverseFunction(xx, g(xx))
plot(xx[-1], diff(g(xx)))
abline(h=0)
par(mfrow=c(1,1))
plot(f)
turns <- formals(h)$turns
abline(v=turns)
a <- seq(-1.2,1.2,by=.001)
w <- h(a)
for(i in 1:ncol(w)) lines(w[,i], a, col=i+1)
w <- h(a, what='sample')
points(w, a, col='gray')
x <- seq(-1, 1, by=.01)
y <- x^2
h <- inverseFunction(x,y)
formals(h)$turns # vertex
a <- seq(0, 1, by=.01)
plot(0, 0, type='n', xlim=c(-.5,1.5))
lines(a, h(a)[,1]) ## first inverse
lines(a, h(a)[,2], col='red') ## second inverse
a <- c(-.1, 1.01, 1.1, 1.2)
points(a, h(a)[,1])
Hmisc/tests/procmeans.txt 0000644 0001760 0000144 00000001270 12243661443 015244 0 ustar ripley users The MEANS Procedure
Variable N Mean Std Dev Minimum Maximum
--------------------------------------------------------------------------
x3 100 0.5131445 0.2944341 0.0057602 0.9938965
x4 100 0.5119257 0.3100749 0.0263616 0.9826741
x5 100 0.4887739 0.3141976 0.0041338 0.9972528
x6 100 0.4986746 0.2710817 0.0100958 0.9951080
x7 100 0.5533156 0.2843679 0.0420104 0.9979081
x8 100 0.4809487 0.2892945 0.0072688 0.9596358
--------------------------------------------------------------------------
Hmisc/tests/aregImpute3.r 0000644 0001760 0000144 00000001103 12243661443 015057 0 ustar ripley users require(Hmisc)
n <- 100
set.seed(1)
y <- sample(0:8, n, TRUE)
x1 <- runif(n)
x2 <- runif(n)
x2[1:10] <- NA
z <- sample(1:20, n, TRUE)
d <- data.frame(y, x1, x2, z)
f1 <- glm(y ~ x1 + x2, family=poisson)
f2 <- glm(y ~ x1 + x2 + offset(log(z)), family=poisson)
a <- aregImpute(~ y + x1 + x2)
g1 <- fit.mult.impute(y ~ x1 + x2 , glm, a,
family=poisson, data=d)
g2 <- fit.mult.impute(y ~ x1 + x2 + offset(log(z)), glm, a,
family=poisson, data=d)
# g3 <- fit.mult.impute(y ~ x1 + x2 + offset(log(z)), Glm, a, family=poisson, data=d)
coef(g1)
coef(g2)
# coef(g3)
coef(f1)
coef(f2)
Hmisc/tests/Ecdf.r 0000644 0001760 0000144 00000000566 12243661443 013547 0 ustar ripley users ## From Bayazid Sarkar
require(Hmisc)
set.seed(1)
x <- exp(rnorm(100))
w <- sample(1:5, 100, TRUE)
g <- sample(c('a','b','c'), 100, TRUE)
Ecdf(log(x), weights=w, lty=1:3, col=1:3, group=g, label.curves=list(keys=1:3),
subtitles=FALSE)
Ecdf(x, weights=w, lty=1:3, col=1:3, group=g, label.curves=list(keys=1:3),
subtitles=FALSE, log='x')
Hmisc/tests/redun.r 0000644 0001760 0000144 00000000633 12243661443 014016 0 ustar ripley users set.seed(1)
n <- 100
x1 <- runif(n)
x2 <- runif(n)
x3 <- x1 + x2 + runif(n)/10
x4 <- x1 + x2 + x3 + runif(n)/10
x5 <- factor(sample(c('a','b','c'),n,replace=TRUE))
x6 <- 1*(x5=='a' | x5=='c')
redun(~x1+x2+x3+x4+x5+x6, r2=.8)
redun(~x1+x2+x3+x4+x5+x6, r2=.8, allcat=TRUE)
# redun(.., allcat=TRUE, minfreq=40) gives same result as allcat=FALSE
x0 <- c(rep(0,99),1)
redun(~x0+x1+x2+x3+x4+x5+x6, r2=.8, minfreq=2)
Hmisc/tests/summaryD.r 0000644 0001760 0000144 00000001554 12243661443 014505 0 ustar ripley users set.seed(135)
maj <- factor(c(rep('North',13),rep('South',13)))
g <- paste('Category',rep(letters[1:13],2))
n <- sample(1:15000, 26, replace=TRUE)
y1 <- runif(26)
y2 <- pmax(0, y1 - runif(26, 0, .1))
png('/tmp/summaryD.png', width=550, height=800)
spar(mfrow=c(3,2))
f <- function(x) sprintf('%4.2f', x)
summaryD(y1 ~ maj + g, xlab='Mean', auxtitle='', fmtvals=f)
summaryD(y1 ~ maj + g, groupsummary=FALSE)
summaryD(y1 ~ g, fmtvals=f, auxtitle='')
Y <- cbind(y1, y2)
summaryD(Y ~ maj + g, fun=function(y) y[1,], pch=c(1,17))
rlegend(.1, 26, c('y1','y2'), pch=c(1,17))
summaryD(y1 ~ maj, fun=function(y) c(mean(y), n=length(y)),
auxvar='n')
dev.off()
png('/tmp/summaryD2.png', width=300, height=100)
# Or: pdf('/tmp/z.pdf', width=3.5, height=1.25)
spar()
summaryD(y1 ~ maj, fmtvals=function(x) round(x,4),
xlab=labelPlotmath('Velocity', 'm/s'))
dev.off()
Hmisc/tests/test2.xpt 0000644 0001760 0000144 00000023540 12243661443 014316 0 ustar ripley users HEADER RECORD*******LIBRARY HEADER RECORD!!!!!!!000000000000000000000000000000 SAS SAS SASLIB 8.2 AIX 05JUN03:18:44:0605JUN03:18:44:06 HEADER RECORD*******MEMBER HEADER RECORD!!!!!!!000000000000000001600000000140 HEADER RECORD*******DSCRPTR HEADER RECORD!!!!!!!000000000000000000000000000000 SAS TEST SASDATA 8.2 AIX 05JUN03:18:44:0605JUN03:18:44:06 HEADER RECORD*******NAMESTR HEADER RECORD!!!!!!!000000000500000000000000000000 RACE RACE AGE Age at Beginning of Study D1 MMDDYY
DT1 DATETIME T1 TIME HEADER RECORD*******OBS HEADER RECORD!!!!!!!000000000000000000000000000000 A B D<* HOQäÖ Dé A@ B D<† HOË1o Dž HEADER RECORD*******MEMBER HEADER RECORD!!!!!!!000000000000000001600000000140 HEADER RECORD*******DSCRPTR HEADER RECORD!!!!!!!000000000000000000000000000000 SAS FORMAT SASDATA 8.2 AIX 05JUN03:18:44:0605JUN03:18:44:06 HEADER RECORD*******NAMESTR HEADER RECORD!!!!!!!000000002100000000000000000000 FMTNAME Format name START Starting value for format END Ending value for format LABEL Format value label ( MIN Minimum length . MAX Maximum length 1 DEFAULT Default length 4 LENGTH Format length 7 FUZZ Fuzz value :
PREFIX Prefix characters B MULT Multiplier D FILL Fill character L
NOEDIT Is picture string noedit? M TYPE Type of format P SEXCL Start exclusion Q EEXCL End exclusion R HLO Additional information S DECSEP Decimal separator ^ DIG3SEP Three-digit separator _ DATATYPEDate/time/datetime? ` LANGUAGELanguage for date strings h HEADER RECORD*******OBS HEADER RECORD!!!!!!!000000000000000000000000000000 RACE 1 1green A B( A` A` 7—™-ê NNN RACE 2 2blue A B( A` A` 7—™-ê NNN RACE 3 3purpleA B( A` A` 7—™-ê NNN HEADER RECORD*******MEMBER HEADER RECORD!!!!!!!000000000000000001600000000140 HEADER RECORD*******DSCRPTR HEADER RECORD!!!!!!!000000000000000000000000000000 SAS Z SASDATA 8.2 AIX 05JUN03:18:44:0605JUN03:18:44:06 HEADER RECORD*******NAMESTR HEADER RECORD!!!!!!!000000000600000000000000000000 X3 X4 X5 X6 X7 X8 HEADER RECORD*******OBS HEADER RECORD!!!!!!!000000000000000000000000000000 @Ž@é0@3¨<@Ç7ÙOˆ@ÃÊp‹‡@èg®aÐÏ`@¡ @˜VØ@&=çf@3„>f@u<•äêx@’U"¯$ªH@h@3™@Þþ‹è@ä>µýÈ@Ñ]¨›¢¸@ݯT#»_@î @õ0@ÏÊ@= z@¬ø4ýYð@Ü–ÚU¹-¸@md@ˆó0@çP„@Åfj¹ˆ@¿l0Q~Ø@!‹Ø0C°@/`@…H@Ë,
Ð@ÉoK@ÄÛ𑉰@äDcȈÈ@,R@jÓX?ž,@@ƒ· @ä]2ÿÈ@0‡:`@åx@ÍÎ@³Äà@´ÓaKh@&
œL@h!¼¤ÐCx@ˆ°@L³x@‡?Î@þ¿ekø@6æ–nmÌ@_§H¼¿N@\D@™A¸@¹y–@@7%ãpn@QÆ™
£Œ@%éôàKÓê@‘0?{EÜ@ql†@#u¤F@Ϫe¹ŸP@V…ÎJœ@×8@#”@OP?T@†ˆ9ñ@Î…;@Ø5µÝ°kh@C@7@RÁ|@"aÉüD@‘Îk#˜@q£^PãF¼@¯x@Ïq€@ùÀ¿X@µ!h@ ¶hlAl@QPPÞ¢ @À@“ذ@2ÞÜ@·61§h@º^ò/t¸@Wß ®8@þp@¼,@]\Hô@ŠÀX“@ÏÙÛŸ°@õª°¿ëU`@Cœ@*¸@Ïú®€@Hé.@uÖÔ,ë¬@‡Å¡›‹@@¡p@Í9@ëä@@0îàî`?Á&!A‚H@L<èÆ˜yÐ@¡P@ÖtÐ@¨£(@5-søj@–>À;,@ïÛV+ß¶°@«è@ `@¶¶x@E3@^ˆ@qþbÒãü@‰ˆ';@sœ@ùƒ0@¢Ž¾@gÔ=@«Ýú-W@!Ǥ”CJ@ˆ@"Å@@ @ðš@°HÉY`?ð2Ãaà`@¨D–iP‰0@¾Ð@†/Ð@#fɬ@îpqØ@ï´2åßh@GH¬@Ü8@<@l@Y•Ô@SÍC@Ü…#7¹@”•;)*@âÈ@äœ8@'‰ÈÌ?òë2à@yÔNšó¨@}n*úÝ @"@œBø@<«I2@osO|Ü@¬]ðX¸?®°JÁ]`˜@;@yo\@äà+ø@¸äÝp@醃
Ó@Ç>ÓŽ€@Rh@Ó=h@X„ZD@Gk[žŒ@`9`fÀp@8¨ŽàqQ@•˜@s>Ð@A¥+|@!Õ@ÿvç þè@âßL;Å¿@È`@ã¦P@¬j`@q
‰à@ÅœÀ“‹8@cÈ"jÇD@Ax@ÄØ€@°ü¹È@=´+ˆz@Nˆð0?+\
V¸@D¤@~¬À@ú,@ÌÑ7}˜@‰kÁÐ@ɥߓJ@!Ð@@@"ð@\(h¸@ÈWÍ@äp0íÈà`@5°@ÞÙ€@D Ø@W¦,ª¬@þ@ø›ü€@„,…ÇY@»H@Î’`@Þ8´x@i^iøÐ@ÐÒ™Õ¡ @«¶oWl(@ùˆ?ÜûX?°PG`@M–vn˜@Ó)ƦP@œú§9õP?÷x@ï€@äfPà@ЬÊ@Í¢ÿ›8@ëƒÖ%?Û0@uMh@ÿKõ˜@/Þ¸Ì^@¾çV›}È@c"<ÆD@F8@(y@tÙ¥€@Ñ¥ @î]ƒ}ܸ@›e36Ê @ç@ë×h@©íí(@¬n
X@ѯ±w£X@ͨû›Q @“@e`@(FôŽ@Xmî°@Ïó,}Ÿà@›X Á6° @C¬@ZÐô@Ù>@6¾íÞl@UÐ\« @éx÷Òð@·À@ëó`@ÜQ"8@C¿Ä„@‚Yeϰ@I5žP’k<@L|@øp˜@·jp@fqŽøÌ@‰6M7h@š# £4F@ÖÈ@n•Ô@ß[ÃÈ@îì-Ø@Žs4#à@fI®Ì“\@Œ(@ß±Ð@lâSÈ@Øšöw°@ë~$UÖø@sZæ0@ó @ŽÄh@³¡Û @²»(u`@®ò'@GèkÐØ@p(@¬æè@˜•¹À@Q% | @øòYñà?Ö¤¬H@\,@ØšX@5Z‰8@žçŠó8@˜D¨u0ˆ@W§Š>¯O?å@@Å @PE#ˆ@·ÛÔ;h@Ÿå¡ë?È@+²%ÆWdL@hô?kú(?îÀ@o¤öÜ@E”ù ˆ@¼iÖ™xÓ°@ÕÈ@Ú±@ïÉø@A,ÔŒ€@¯-æ—^X@Ö" ÷¬D@T@õ9@@”âÝà@õý|è@¤@¡H@¦bXçLİ@'h@çí@ܰİ@Ó—+ @צ@ѯH@¨ˆ5P;@\0@”¨@lüoð@¤S¶yH@©šò×S0@î4¢¯ÜiH@+f@ë€@C*¶à@y#(ð@ ˆÊ[A@§ÍšO›8@êp@´P@‹»q8@˜#ÊM0@7+Œn,@Þ?¼~@1Â@ÜX@®¿=@IŸE4@àa¸+À?µ*O¡jT @C8?wÚd@€ö…@¹¹ï;p@ÒZi¥@=`ÊÈzÁ–@ä@wÓ„@ãh@1Pªb@[Ããè·„?5m° jÛ`@ï0@ñØ@z–ò|@6Q®l@ÛãÉ·À?£tPaFè ?>l?vd|@ˆé0@ël@ Ð@ö¹²Ãíp@G™¼J3x@\ø@ûˆ@aÌ5ð@!d&.@‰†ã
@G;ì¶ŽwØ@ˆ€@_@Ý2X @5úÆj@¾‡§| @…R!m
¤@@Ð@ìíx@S…!X?}|<