**R code example for k-fold cross validation:**

library(peperr)

# simulate original dataset with 10 variables and 500 observations:

dataset <- data.frame(matrix(rnorm(5000), ncol=10, nrow=500))

# set number of folds:

k <- 10

# get observation ids for in folds:

ids <- resample.indices(n=nrow(dataset), sample.n=k, method="cv")

# create a list for gathering results:

xvaldatasets <- list()

# for each fold k:

for (i in 1:k){

# make datasets of training and test observations:

xvaldatasets[[i]] <- list(train=dataset[ids$sample.index[[i]],],

test=dataset[ids$not.in.sample[[i]],])

# fit model on training observations, and use it to make predictions for test observations:

xvaldatasets[[i]]$test$xvalpred <- predict(lm(X10 ~ ., data=xvaldatasets[[i]]$train),

newdata=xvaldatasets[[i]]$test)

}

# evaluate predictive accuracy in each fold k:

xvalcors <- list()

for (i in 1:k){

# for example, by calculating the correlation between the predicted and observed values of X10:

xvalcors[[i]] <- cor(xvaldatasets[[i]]$test$X10, xvaldatasets[[i]]$test$xvalpred)

}

# print correlations between true and predicted values for each fold:

unlist(xvalcors)

# calculate mean:

mean(unlist(xvalcors))

# calculate SD:

sd(unlist(xvalcors))

**R code example for leave-one-out cross validation:**

# simulate original dataset with 10 variables and 500 observations:

dataset <- data.frame(matrix(rnorm(5000), ncol=10, nrow=500))

# create objects for gathering results:

xvaldatasets <- list()

xvalpreds <- rep(NA, times=ncol(dataset))

# for each fold k:

for (i in 1:nrow(dataset)){

# make datasets of training and test observations:

xvaldatasets[[i]] <- list(train=dataset[-i,],

test=dataset[i,])

# fit model on training observations, and use it to make predictions for test observations:

xvalpreds[i] <- predict(lm(X10 ~ ., data=xvaldatasets[[i]]$train),

newdata=xvaldatasets[[i]]$test)

}

# evaluate predictive accuracy:

cor(xvalpreds, dataset$X10)

## No comments:

## Post a Comment