RCodeHints.Rmd
This is a collection of snippets I find myself repeating or searching. Change July 27.
Code from Patrick Heagerty’s class
summary(lme(like_test~uip_dx, method="ML",random=reStruct(~1|id, pdClass="pdSymm", REML=F)))
colnames(dat) = gsub("_",".",colnames(dat))
colnames(rawDat) = tolower(colnames(rawDat))
library(foreign)
write.foreign(df=dat, datafile=paste0(dir,"/dat_03_02datafile.csv"),
codefile=paste0(dir,"/dat_03_02codeFile.sas"), package="SAS")
proc contents data=TMP1.EXACTBASELINE_PEX11_160307 out=contents noprint;
run;
libname a "O:/Divisional/BIO/mco2004/DoM/Spiromics - EXACT";
data a.labels(keep=NAME LABEL);
set contents;
run;
library(tcltk)
waitForClick <- function() {
tt <- tktoplevel()
tkpack( tkbutton(tt, text=' Yes - SAS code has run ', command=function()tkdestroy(tt)),
side='bottom')
tkbind(tt,'<Key>', function()tkdestroy(tt) )
tkwait.window(tt)
}
waitForClick()
Note that this is much easier with ggplot.
install.packages("extrafont")
library(extrafont)
# font_import() (takes a long time)
loadfonts(device="win") #Register fonts for Windows bitmap output
fonts() # list of fonts
plot(1,1,family="Times New Roman")
par(mar=c(0,0,0,0),xpd=T)
plot(0,type="n",bty="n",axes=F,ylab="",xlab="")
text(-.8,"MEP Increase from Baseline Under PAS by Dose",cex=2,font=2,family="Times New Roman")
par(mar=c(0,0,0,0),xpd=T,family="Times New Roman")
plot(0,type="n",bty="n",axes=F,ylab="",xlab="")
legend("top",col=2,lwd=4,c("Mean change from baseline"),bty="n",cex=1.5)
kable(., format="html") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),
full_width = F, position = "center") %>%
scroll_box(height = "500px")
m1 = glm(onset ~ warl + gdpenl + lpopl1 + lmtnest + ncontig + Oil + nwstate + instab + polity2l + ethfrac + relfrac, data = df, family = "binomial")
library(coefplot)
coefplot(m1)
dat %>%
filter(!is.na(adt_s)) %>%
ggplot(aes(x=adt_s, group=SPOPmut_m)) +
geom_bar(aes(y = ..prop.., fill = factor(..x..)), stat="count") +
geom_text(aes(label = ..count.., y= ..prop..), stat= "count", vjust = -.5) +
geom_text(aes( label = scales::percent(..prop..), y= ..prop.. ), stat= "count", vjust = 1.5) +
labs(y = "Percent", fill="adt_s") +
facet_grid(~SPOPmut_m) +
scale_y_continuous(labels=percent) +
labs(title="adt_s by SPOP mutation status",subtitle=paste0("Chisq p=",round(chisq.test(dat$SPOPmut_m,dat$adt_s)$p.val,3))) +
guides(fill=FALSE)
# library(devtools); install_github("swihart/lasagnar")
library(lasagnar)
longDat %>%
right_join(select(dat_xWeeks,subjid,longestEvent_28d56d)) %>%
# filter(subjid %in% dat_xWeeks[["subjid"]]) %>%
filter(studyday > 0 & studyday <= studyDays_here) %>%
select(subjid,exacttot,studyday) %>%
spread(studyday,exacttot) %>%
rename_all(,.funs= function(x) paste0("time",x)) %>%
# order by initial exact value
arrange(time1,time2,time3,time4,time5) %>%
as.data.frame() -> df
rownames(df) <- as.numeric(as.factor(df$timesubjid))
df$timesubjid = NULL
matrix = as.matrix(df)
lasagna(matrix)
paris <- combn(levels(dat_all$GOLD_arabicNum_GOLDold), 2, simplify = FALSE)
pairs_pval <- tidy(with(dat_all[ dat_all$cohort %in% c("1st","2nd"),], pairwise.wilcox.test(mtDNA_log10, GOLD_arabicNum_GOLDold, p.adjust.method = "BH")))
dat_pairs <- do.call(rbind.data.frame, paris)
colnames(dat_pairs) <- colnames(pairs_pval)[-3]
pv_final <- merge(dat_pairs, pairs_pval, by.x = c("group2", "group1"), by.y = c("group1", "group2"))
pv_final <- pv_final[order(pv_final$group1), ]
pv_final$map_signif <- ifelse(pv_final$p.value > 0.05, "", ifelse(pv_final$p.value > 0.01,"*", "**"))
pv_final$p.value_f <- format_pval(pv_final$p.value,equal = "")
dat_all %>%
filter(cohort %in% c("1st","2nd")) %>%
ggplot(aes(GOLD_arabicNum_GOLDold,mtDNA_log10,fill=GOLD_arabicNum_GOLDold)) +
geom_boxplot(outlier.colour = NA) +
geom_jitter(width = 0.1) + ggtitle("mtDNA by GOLD") +
stat_compare_means(label.y=9.6) +
guides(fill=FALSE) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(title="mtDNA by GOLD in 1st and 2nd cohorts",y="Log 10 mtDNA",x="") +
geom_signif(comparisons=paris,textsize = 3.5, vjust = 0.1, step_increase=0.15,
annotation= pv_final$p.value_f)
read_csv(here("Docs_2018_0403/ClinicalGenomicsOfCa_DATA_LABELS_2018-04-03_1419.csv")) %>%
clean_names() %>%
mutate_all(funs(checkedToYes)) %>%
mutate_all(funs(uncheckedToNo)) %>%
# remove irrelevant variables
select(-contains("complete"))
filter(repeat_instrument == "Biopsies") %>%
remove_empty_cols() # from janitor package
# Format nicely categorical variables
makeCat <- function(varNm){
cols = grep(varNm,colnames(wideDat))
cats = gsub(".","",gsub(paste0(varNm,"..choice."),"",colnames(wideDat)[cols]),fixed=T)
all = NA
for (c in 1:length(cats)) {
all[wideDat[,cols[c]] == "Checked"] = paste(all[wideDat[,cols[c]] == "Checked"],cats[c])
}
all = gsub("NA ","",all)
return(all)
}
# Add more as needed for analysis
varsToCondense = c("Gastrointestinal.symptoms")
dat = subset(wideDat, select = c(colnames(wideDat)[-grep("choice..",colnames(wideDat))]))
dat = cbind(wideDat,mapply(makeCat,varsToCondense)) # keep multiple columns as well
dim(dat)
# Change all checked/unchecked columns to yes/no
for(col in 1:ncol(dat)) {
if(length(table(dat[,col]))>1 & levels(as.factor(dat[,col]))[1] =="Checked"){
levels(dat[,col]) = c("Yes","No")
}
}
for(col in 1:ncol(dat)) {
if(length(table(dat[,col]))==1 & !is.na(names(table(dat[,col]))[1]) & names(table(dat[,col]))[1] == "Unchecked") {
dat[,col] = "No"
}
}
I use this to create a sheet for each table and figure for a manucript
# Create Excel of select results
#https://trinkerrstuff.wordpress.com/2018/02/14/easily-make-multi-tabbed-xlsx-files-with-openxlsx/
library(openxlsx)
filename <- paste0("manuscriptFigs_",format(Sys.Date(),"%Y_%m_%d"),".xlsx")
wb <- createWorkbook(type="xlsx")`