R Code Snippets • oromendia

This is a collection of snippets I find myself repeating or searching. Change July 27.

Linear mixed models

Code from Patrick Heagerty’s class

summary(lme(like_test~uip_dx, method="ML",random=reStruct(~1|id, pdClass="pdSymm", REML=F)))

Remove underscores from column names

colnames(dat) = gsub("_",".",colnames(dat))
colnames(rawDat) = tolower(colnames(rawDat))

Export R dataset for SAS

library(foreign)
write.foreign(df=dat, datafile=paste0(dir,"/dat_03_02datafile.csv"), 
    codefile=paste0(dir,"/dat_03_02codeFile.sas"), package="SAS")

SAS code hints to save labels

proc contents data=TMP1.EXACTBASELINE_PEX11_160307 out=contents noprint;
run;

libname a "O:/Divisional/BIO/mco2004/DoM/Spiromics - EXACT";
data a.labels(keep=NAME LABEL);
    set contents; 
run;

Stop code until you click “continue”

library(tcltk)
waitForClick <- function() {
    tt <- tktoplevel()
    tkpack( tkbutton(tt, text='     Yes - SAS code has run     ', command=function()tkdestroy(tt)),
        side='bottom')
    tkbind(tt,'<Key>', function()tkdestroy(tt) )
    tkwait.window(tt)
}
waitForClick()

Load Times New Roman Font and use in plotting

Note that this is much easier with ggplot.

install.packages("extrafont")
library(extrafont)
# font_import() (takes a long time)
loadfonts(device="win")  #Register fonts for Windows bitmap output
fonts() # list of fonts 
plot(1,1,family="Times New Roman")

Overall plot title

par(mar=c(0,0,0,0),xpd=T)
plot(0,type="n",bty="n",axes=F,ylab="",xlab="")
text(-.8,"MEP Increase from Baseline Under PAS by Dose",cex=2,font=2,family="Times New Roman")

Overall legend

par(mar=c(0,0,0,0),xpd=T,family="Times New Roman")
plot(0,type="n",bty="n",axes=F,ylab="",xlab="")
legend("top",col=2,lwd=4,c("Mean change from baseline"),bty="n",cex=1.5)

Print long table into markdown

kable(., format="html") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                full_width = F, position = "center") %>% 
  scroll_box(height = "500px")

Plot of regression coefficients

m1 = glm(onset ~ warl + gdpenl + lpopl1 + lmtnest + ncontig + Oil + nwstate + instab + polity2l + ethfrac + relfrac, data = df, family = "binomial")
library(coefplot)
coefplot(m1)

Markdown barplot with percents and counts

dat %>% 
  filter(!is.na(adt_s)) %>% 
  ggplot(aes(x=adt_s,  group=SPOPmut_m)) + 
  geom_bar(aes(y = ..prop.., fill = factor(..x..)), stat="count") +
  geom_text(aes(label = ..count.., y= ..prop..), stat= "count", vjust = -.5) +
  geom_text(aes( label = scales::percent(..prop..), y= ..prop.. ), stat= "count", vjust = 1.5) +
  labs(y = "Percent", fill="adt_s") +
  facet_grid(~SPOPmut_m) +
  scale_y_continuous(labels=percent) +
  labs(title="adt_s by SPOP mutation status",subtitle=paste0("Chisq p=",round(chisq.test(dat$SPOPmut_m,dat$adt_s)$p.val,3))) +
  guides(fill=FALSE)

Lasagna plot

# library(devtools); install_github("swihart/lasagnar")
library(lasagnar)
longDat %>%
  right_join(select(dat_xWeeks,subjid,longestEvent_28d56d)) %>%
  # filter(subjid %in% dat_xWeeks[["subjid"]]) %>%
  filter(studyday > 0 & studyday <= studyDays_here) %>%
  select(subjid,exacttot,studyday) %>%
  spread(studyday,exacttot) %>%
  rename_all(,.funs= function(x) paste0("time",x)) %>%
  # order by initial exact value
  arrange(time1,time2,time3,time4,time5) %>%
  as.data.frame() -> df

rownames(df) <- as.numeric(as.factor(df$timesubjid))
df$timesubjid = NULL
matrix = as.matrix(df)

lasagna(matrix)

Dataset with pairwise p values to plot

paris <- combn(levels(dat_all$GOLD_arabicNum_GOLDold), 2, simplify = FALSE)
pairs_pval <- tidy(with(dat_all[ dat_all$cohort %in% c("1st","2nd"),], pairwise.wilcox.test(mtDNA_log10, GOLD_arabicNum_GOLDold, p.adjust.method = "BH")))
dat_pairs <- do.call(rbind.data.frame, paris)
colnames(dat_pairs) <- colnames(pairs_pval)[-3]
pv_final <- merge(dat_pairs, pairs_pval, by.x = c("group2", "group1"), by.y = c("group1", "group2"))
pv_final <- pv_final[order(pv_final$group1), ] 
pv_final$map_signif <- ifelse(pv_final$p.value > 0.05, "", ifelse(pv_final$p.value > 0.01,"*", "**"))
pv_final$p.value_f <- format_pval(pv_final$p.value,equal = "")

dat_all %>%
  filter(cohort %in% c("1st","2nd")) %>%
  ggplot(aes(GOLD_arabicNum_GOLDold,mtDNA_log10,fill=GOLD_arabicNum_GOLDold)) + 
  geom_boxplot(outlier.colour = NA) +  
  geom_jitter(width = 0.1) + ggtitle("mtDNA by GOLD") + 
  stat_compare_means(label.y=9.6) +
  guides(fill=FALSE) + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(title="mtDNA by GOLD in 1st and 2nd cohorts",y="Log 10 mtDNA",x="") +
  geom_signif(comparisons=paris,textsize = 3.5, vjust = 0.1, step_increase=0.15,
            annotation= pv_final$p.value_f)

REDCap to tibble using tidyverse

read_csv(here("Docs_2018_0403/ClinicalGenomicsOfCa_DATA_LABELS_2018-04-03_1419.csv")) %>% 
  clean_names() %>% 
  mutate_all(funs(checkedToYes)) %>% 
  mutate_all(funs(uncheckedToNo)) %>% 
  # remove irrelevant variables
  select(-contains("complete")) 
  filter(repeat_instrument == "Biopsies") %>% 
  remove_empty_cols() # from janitor package

REDCap to data frame using base R

# Format nicely categorical variables
makeCat <- function(varNm){
  cols = grep(varNm,colnames(wideDat))
    cats = gsub(".","",gsub(paste0(varNm,"..choice."),"",colnames(wideDat)[cols]),fixed=T)
    all = NA
    for (c in 1:length(cats)) {
      all[wideDat[,cols[c]] == "Checked"] = paste(all[wideDat[,cols[c]] == "Checked"],cats[c])
    }
    all = gsub("NA ","",all)
    return(all)
}
# Add more as needed for analysis
varsToCondense = c("Gastrointestinal.symptoms")
dat = subset(wideDat, select = c(colnames(wideDat)[-grep("choice..",colnames(wideDat))]))
dat = cbind(wideDat,mapply(makeCat,varsToCondense)) # keep multiple columns as well
dim(dat)

# Change all checked/unchecked columns to yes/no
for(col in 1:ncol(dat)) {
  if(length(table(dat[,col]))>1 & levels(as.factor(dat[,col]))[1] =="Checked"){
    levels(dat[,col]) = c("Yes","No")
  }
}
for(col in 1:ncol(dat)) {
  if(length(table(dat[,col]))==1 & !is.na(names(table(dat[,col]))[1]) & names(table(dat[,col]))[1] == "Unchecked") {
    dat[,col] = "No"
  }
}

R to Excel

I use this to create a sheet for each table and figure for a manucript

# Create Excel of select results
#https://trinkerrstuff.wordpress.com/2018/02/14/easily-make-multi-tabbed-xlsx-files-with-openxlsx/
library(openxlsx) 
filename <- paste0("manuscriptFigs_",format(Sys.Date(),"%Y_%m_%d"),".xlsx")
wb <- createWorkbook(type="xlsx")`