
### prepare summary files
r=r001 
for thresh in 1e-8 1e-5
do
  OUT=/lustre/scratch119/humgen/projects/uk10k/users/vi1/enrichment_method_comparison/GARFIELD/output_v2/summary.$thresh.$r.garfield.M1N15T5.txt
  echo $(head -1 /lustre/scratch119/humgen/projects/uk10k/users/vi1/enrichment_method_comparison/GARFIELD/output_v2/PCV.b37/garfield.perm.PCV.b37.segmentations.10.R.r001.m1,n15,t5,p1,k1.out) "disease" "state" > $OUT
  for nms in "cd-meta-uk10k" "ucmeta_gwas" "ICBP_DBP_gwas" "ICBP_SBP_gwas" "MAGIC_2hrGlucose_AdjustedForBMI.txt" "MAGIC_ln_HOMA-B.txt" "MPV.b37" "TC_ONE_Europeans.tbl" "MAGIC_FastingGlucose.txt" "MAGIC_ln_HOMA-IR.txt" "TG_ONE_Europeans.tbl" "HDL_ONE_Europeans.tbl" "MAGIC_HbA1C.txt" "MCH.b37" "PCV.b37" "HGB.b37" "MAGIC_ln_FastingInsulin.txt" "MCHC.b37" "PLT.b37" "LDL_ONE_Europeans.tbl" "MAGIC_ln_fastingProinsulin.txt" "MCV.b37" "RBC.b37" "DIAGRAMv3.2012DEC17.txt" "GIANT_BMI_Speliotes2010_publicrelease_HapMapCeuFreq.txt" "GIANT_HEIGHT_LangoAllen2010_publicrelease_HapMapCeuFreq.txt" "GIANT_WHRadjBMI_Heid2010_publicrelease_HapMapCeuFreq.txt" "SCZ"
  do
    for state in {1..25}
    do
      awk -v t=$thresh -v n=$nms -v state=$state '$2==t {print $0,n,state}' /lustre/scratch119/humgen/projects/uk10k/users/vi1/enrichment_method_comparison/GARFIELD/output_v2/$nms/garfield.perm.$nms.segmentations.$state.R.$r.m1,n15,t5,p1,k1.out >> $OUT
    done
  done
done


r=r001 
for thresh in 1e-8 1e-5
do
  OUT=/lustre/scratch119/humgen/projects/uk10k/users/vi1/enrichment_method_comparison/GARFIELD/output_v2/summary.$thresh.$r.garfield.M1N15T5.txt
  for nms in "EUR.CD.gwas_info03_filtered.assoc" "EUR.IBD.gwas_info03_filtered.assoc" "EUR.UC.gwas_info03_filtered.assoc"
  do
    for state in {1..25}
    do
      awk -v t=$thresh -v n=$nms -v state=$state '$2==t {print $0,n,state}' /lustre/scratch119/humgen/projects/uk10k/users/vi1/enrichment_method_comparison/GARFIELD/output_v2/$nms/garfield.perm.$nms.segmentations.$state.R.$r.m1,n15,t5,p1,k1.out >> $OUT
    done
  done
done


#### R

library(ggplot2)

r="r001"
for (thresh in c("1e-8", "1e-5")){

  tbl = read.table(paste("/lustre/scratch119/humgen/projects/uk10k/users/vi1/enrichment_method_comparison/GARFIELD/output_v2/summary.",thresh,".",r,".garfield.M1N15T5.txt",sep=""), comment.char="", head=T)
  l = read.table("/lustre/scratch119/humgen/projects/uk10k/users/vi1/segmentations/output_annotations/link_file.txt",head=T)
  l$states = (matrix(unlist(strsplit(matrix(unlist(strsplit(as.character(l$Annotation),"/",fixed=T)),nc=11,byrow=T)[,11],"_",fixed=T)),byrow=T,nc=4)[,1])
  tbl$states = l$states[match(tbl$linkID,l[,1])]

  l1 = read.table("/lustre/scratch119/humgen/projects/uk10k/users/vi1/segmentations/data/NIH_25Marks/EIDlegend.txt", head=F,sep="\t")
  l2 = read.table("/lustre/scratch119/humgen/projects/uk10k/users/vi1/segmentations/data/NIH_25Marks/annotation_25_imputed12marks.txt", head=T,comment.char="",quote="",check.names=F,sep="\t")

  tbl$Seg=l2[match(tbl$state,l2[,1]),2]
  tbl$Cell=l1[match(tbl$states,l1[,1]),2]
  tbl$StateD = l2[match(tbl$state,l2[,1]),3]
  levels(tbl$disease) = c("CDold","T2D","CD","IBD","UC","BMI","HGT","WHR", "HDL","HGB","DBP","SBP","LDL","2hrG","FG","HbA1C","FI","FPI","HOMA-B","HOMA-IR","MCH","MCHC","MCV","MPV","PCV","PLT","RBC","SCZ","TC","TG","UCold")
  tbl = tbl[which(tbl$disease %in% c("T2D","CD","IBD","UC","BMI","HGT","WHR", "HDL","HGB","DBP","SBP","LDL","2hrG","FG","HbA1C","FI","FPI","HOMA-B","HOMA-IR","MCH","MCHC","MCV","MPV","PCV","PLT","RBC","SCZ","TC","TG")),]
  tbl$disease = as.factor(as.character(tbl$disease))
  cl = c(rgb(255,0,0, max = 255), rgb(255,69,0, max = 255), rgb(255,69,0, max = 255), rgb(255,69,0, max = 255), rgb(0,128,0, max = 255), rgb(0,128,0, max = 255), rgb(0,128,0, max = 255), rgb(0,150,0, max = 255),rgb(194,255,5, max = 255),rgb(194,255,5, max = 255),rgb(194,255,5, max = 255),rgb(194,255,5, max = 255), rgb(255,195,77, max = 255), rgb(255,195,77, max = 255), rgb(255,195,77, max = 255), rgb(255,255,0, max = 255), rgb(255,255,0, max = 255), rgb(255,255,0, max = 255), rgb(255,255,102, max = 255),rgb(102,205,170,max=255),rgb(138,145,208,max=255),rgb(230,184,183,max=255),rgb(112,48,160, max = 255),rgb(128,128,128, max = 255),rgb(255,255,255, max = 255) )
  tbl$Col = cl[tbl$state]
  pt=3.34e-05

  tbl2f = tbl[which(tbl$Pvalue<pt),c(1,21,23,20,22,19,2,3,4,5,6,7,8)]
  tbl2f$StateD = l2[match(tbl2f$state,l2[,1]),3]
  tbl2f = tbl2f[,c(1:5,14,6:13)]
  names(tbl2f)[1:7]=c("Index", "Cell type", "Cell type name", "State", "State name", "State description", "Trait")
  if (thresh == "1e-8"){
    write.table(tbl2f,file = "/lustre/scratch119/humgen/projects/uk10k/users/vi1/enrichment_method_comparison/GARFIELD/output_v2/figures/ST8.txt", col.names=TRUE,row.names=FALSE, append=FALSE,quote=FALSE, sep="\t")
  } else {
    write.table(tbl2f,file= "/lustre/scratch119/humgen/projects/uk10k/users/vi1/enrichment_method_comparison/GARFIELD/output_v2/figures/ST8.txt", col.names=FALSE,row.names=FALSE, append=TRUE,quote=FALSE, sep="\t")  
  }

  summary(tbl2f$OR[which(tbl2f$Seg %in% c("EnhA1" ,   "EnhA2" ,   "EnhAc"  ,  "EnhAF" ,   "EnhW1" ,"EnhW2","TxEnh3'" , "TxEnh5'",  "TxEnhW"  , "TxReg"  ))])
  summary(tbl2f$OR[which(tbl2f$Seg %in% c("PromBiv" , "PromD1",   "PromD2",   "PromP" , "PromU", "TssA"  ))])
  summary(tbl2f$OR[which(tbl2f$Seg %in% c( "Tx"   ,  "Tx3'" , "Tx5'" , "TxWk"  ))])

  t2 = NULL
  for (st2 in c("Tx","Tx3'","Tx5'")){ #levels(tbl$Seg)[c(17:19)]){
	 for (st1 in c("EnhA1","EnhA2","TssA","TxEnh3'","TxEnh5'","TxReg","PromD1","PromD2","PromU")){ # levels(tbl$Seg)[c(2,3,16,20,21,23,10,11,13)]){
	   t0 = merge(tbl[which(tbl$Pvalue<pt & tbl$Seg==st1),], tbl[which(tbl$Pvalue<pt & tbl$Seg==st2),],by=c("disease","Cell"))
		  if (nrow(t0)>2) { 
 			  w=wilcox.test((t0$OR.x),(t0$OR.y),paired=TRUE,alternative="greater" )
			  t2 = rbind(t2,cbind(t0$OR.x, t0$OR.y,pval=w$p.value, state1=st1,state2=st2))
		  } 
	  }	
  }

  library(ggplot2)

  t2 = as.data.frame(t2)
  t2[,1] = as.numeric(as.character(t2[,1]))
  t2[,2] = as.numeric(as.character(t2[,2]))
  cl3 = cl[match(levels(t2$state1),unique(tbl$Seg))]

  if (thresh=="1e-5"){
    pdf("/lustre/scratch119/humgen/projects/uk10k/users/vi1/enrichment_method_comparison/GARFIELD/output_v2/figures/F5b_newthresh.pdf",16,6)
    p<- ggplot(data = t2, mapping = aes(x = state1, y = V1-V2,fill=state1,colour=state2))+geom_point(position=position_jitterdodge(jitter.width = 0.7),aes(group=state2))+theme_minimal()+geom_boxplot(aes(fill=state1,colour=state2,fill=state1),alpha=0.8)+ theme(axis.ticks.x = element_blank(),axis.text.x = element_text(angle = 90, hjust = 1,vjust=0.5))+theme(axis.text=element_text(size=14),legend.text=element_text(size=14),axis.title=element_text(size=16,face="bold"),legend.title=element_text(size=16,face="bold") )+geom_hline(yintercept=0,lty=4,col="black") + scale_fill_manual(values=cl3,name = 'State 1', guide = 'legend') + scale_colour_manual(values=c("black","grey30","grey60"),name = 'State 2', guide = 'legend') +xlab("Segmentation state") + ylab("State 1 OR - State 2 OR") +theme(panel.grid.major=element_blank(),panel.grid.minor=element_blank())
    print(p)
    dev.off()
    system("evince /lustre/scratch119/humgen/projects/uk10k/users/vi1/enrichment_method_comparison/GARFIELD/output_v2/figures/F5b.pdf &")
  } else {
    pdf("/lustre/scratch119/humgen/projects/uk10k/users/vi1/enrichment_method_comparison/GARFIELD/output_v2/figures/SF5b_newthresh.pdf",16,6)
    p<- ggplot(data = t2, mapping = aes(x = state1, y = V1-V2,fill=state1,colour=state2))+geom_point(position=position_jitterdodge(jitter.width = 0.7),aes(group=state2))+theme_minimal()+geom_boxplot(aes(fill=state1,colour=state2,fill=state1),alpha=0.8)+ theme(axis.ticks.x = element_blank(),axis.text.x = element_text(angle = 90, hjust = 1,vjust=0.5))+theme(axis.text=element_text(size=14),legend.text=element_text(size=14),axis.title=element_text(size=16,face="bold"),legend.title=element_text(size=16,face="bold") )+geom_hline(yintercept=0,lty=4,col="black") + scale_fill_manual(values=cl3,name = 'State 1', guide = 'legend') + scale_colour_manual(values=c("black","grey30","grey60"),name = 'State 2', guide = 'legend') +xlab("Segmentation state") + ylab("State 1 OR - State 2 OR") +theme(panel.grid.major=element_blank(),panel.grid.minor=element_blank())
    print(p)
    dev.off()
    system("evince /lustre/scratch119/humgen/projects/uk10k/users/vi1/enrichment_method_comparison/GARFIELD/output_v2/figures/SF5b.pdf &")
  }

  # summary numbers
  table(t2[,4:5])

  tblS = tbl[which(tbl$Pvalue<pt),]
  tblS2 = aggregate(tblS$OR,list(tblS$disease,tblS$Seg),mean)
  names(tblS2) = c("Trait","Segmentation","meanOR")
  lvls = c("TssA","PromU","PromD1","PromD2","Tx5'","Tx","Tx3'","TxWk","TxReg","TxEnh5'","TxEnh3'","TxEnhW","EnhA1","EnhA2","EnhAF","EnhW1","EnhW2","EnhAc","DNase","ZNF/Rpts","Het","PromP","PromBiv","ReprPC","Quies")
  tblS2[,2] = factor(tblS2[,2],levels=lvls)
  tblS2$Col = cl[tblS2[,2]]

  if (thresh=="1e-5"){
    pdf("/lustre/scratch119/humgen/projects/uk10k/users/vi1/enrichment_method_comparison/GARFIELD/output_v2/figures/F5a_newthresh.pdf",18,6)
    p <- ggplot() + geom_point(data = tblS2, aes(x = Segmentation, y = log(meanOR), fill=Segmentation),position="jitter",colour="grey30")+geom_boxplot(data = tblS2, mapping = aes(x = Segmentation, y = log(meanOR), fill=Segmentation),alpha=0.8) +theme_minimal()+ theme(axis.ticks.x = element_blank(),axis.text.x = element_text(angle = 90, hjust = 1,vjust=0.5))+theme(axis.text=element_text(size=14),legend.text=element_text(size=14),axis.title=element_text(size=16,face="bold"),legend.title=element_text(size=16,face="bold"),panel.grid.major = element_blank(), panel.grid.minor = element_blank())+ylab("log OR") +scale_fill_manual(values=cl)+geom_hline(yintercept=0,lty=4,col="grey")+geom_vline(xintercept=4.5,lty=4)+geom_vline(xintercept=8.5,lty=4)+geom_vline(xintercept=12.5,lty=4)+geom_vline(xintercept=15.5,lty=4)+geom_vline(xintercept=19.5,lty=4)+guides(fill=guide_legend(ncol=2))+xlab("Segmentation state")
    print(p)
    dev.off()
    system("evince /lustre/scratch119/humgen/projects/uk10k/users/vi1/enrichment_method_comparison/GARFIELD/output_v2/figures/F5a.pdf &")
  } else {
    pdf("/lustre/scratch119/humgen/projects/uk10k/users/vi1/enrichment_method_comparison/GARFIELD/output_v2/figures/SF5a_newthresh.pdf",18,6)
    p <- ggplot() + geom_point(data = tblS2, aes(x = Segmentation, y = log(meanOR), fill=Segmentation),position="jitter",colour="grey30")+geom_boxplot(data = tblS2, mapping = aes(x = Segmentation, y = log(meanOR), fill=Segmentation),alpha=0.8) +theme_minimal()+ theme(axis.ticks.x = element_blank(),axis.text.x = element_text(angle = 90, hjust = 1,vjust=0.5))+theme(axis.text=element_text(size=14),legend.text=element_text(size=14),axis.title=element_text(size=16,face="bold"),legend.title=element_text(size=16,face="bold"),panel.grid.major = element_blank(), panel.grid.minor = element_blank())+ylab("log OR") +scale_fill_manual(values=cl)+geom_hline(yintercept=0,lty=4,col="grey")+geom_vline(xintercept=4.5,lty=4)+geom_vline(xintercept=8.5,lty=4)+geom_vline(xintercept=12.5,lty=4)+geom_vline(xintercept=15.5,lty=4)+geom_vline(xintercept=19.5,lty=4)+guides(fill=guide_legend(ncol=2))+xlab("Segmentation state")
    print(p)
    dev.off()
    system("evince /lustre/scratch119/humgen/projects/uk10k/users/vi1/enrichment_method_comparison/GARFIELD/output_v2/figures/SF5a.pdf &")
  }

  # summaries
  table(tblS2$Segmentation)
  tblS3 = tblS[which(!(tblS$disease %in% c("CD","UC"))),] 
  UN = aggregate(tblS3$disease, list(tblS3$Seg, tblS3$states) ,length)
  colnames(UN) = c("State","Celltype","Number")
  UN$Celltypes = "Unique"
  UN$Celltypes[which(UN$Number>1)] = "Shared"
  UN$State = factor(UN$State,levels=lvls)
  UN$Celltypes = factor(UN$Celltypes, levels=c("Unique","Shared"))

  states = c(rep("prom",4),rep("trans",4),rep("transenh",4),rep("enh",3),rep("other",10))

  if (thresh=="1e-5"){
    pdf("/lustre/scratch119/humgen/projects/uk10k/users/vi1/enrichment_method_comparison/GARFIELD/output_v2/figures/F5c_newthresh.pdf",17,6)
    p <- ggplot() + geom_bar(data = UN, aes(x = State, fill=Celltypes)) +theme_minimal()+ theme(axis.ticks.x = element_blank(),axis.text.x = element_text(angle = 90, hjust = 1,vjust=0.5))+theme(axis.text=element_text(size=14),legend.text=element_text(size=14),axis.title=element_text(size=16,face="bold"),legend.title=element_text(size=16,face="bold"),panel.grid.major = element_blank(), panel.grid.minor = element_blank())+ylab("Number") +scale_fill_manual(values=c("lightgreen","royalblue3"))+geom_vline(xintercept=4.5,lty=4)+geom_vline(xintercept=8.5,lty=4)+geom_vline(xintercept=12.5,lty=4)+geom_vline(xintercept=15.5,lty=4)+geom_vline(xintercept=19.5,lty=4)+xlab("Segmentation state")
    print(p)
    dev.off()
    system("evince /lustre/scratch119/humgen/projects/uk10k/users/vi1/enrichment_method_comparison/GARFIELD/output_v2/figures/F5c.pdf &")
  } else {
    pdf("/lustre/scratch119/humgen/projects/uk10k/users/vi1/enrichment_method_comparison/GARFIELD/output_v2/figures/SF5c_newthresh.pdf",17,6)
    p <- ggplot() + geom_bar(data = UN, aes(x = State, fill=Celltypes)) +theme_minimal()+ theme(axis.ticks.x = element_blank(),axis.text.x = element_text(angle = 90, hjust = 1,vjust=0.5))+theme(axis.text=element_text(size=14),legend.text=element_text(size=14),axis.title=element_text(size=16,face="bold"),legend.title=element_text(size=16,face="bold"),panel.grid.major = element_blank(), panel.grid.minor = element_blank())+ylab("Number") +scale_fill_manual(values=c("lightgreen","royalblue3"))+geom_vline(xintercept=4.5,lty=4)+geom_vline(xintercept=8.5,lty=4)+geom_vline(xintercept=12.5,lty=4)+geom_vline(xintercept=15.5,lty=4)+geom_vline(xintercept=19.5,lty=4)+xlab("Segmentation state")
    print(p)
    dev.off()
    system("evince /lustre/scratch119/humgen/projects/uk10k/users/vi1/enrichment_method_comparison/GARFIELD/output_v2/figures/SF5c.pdf &")
  }

  states = c(rep("prom",4),rep("trans",4),rep("transenh",4),rep("enh",3),rep("other",10))
  UN$group = states[tblS3$state[match(UN$State, tblS3$Seg)]]
  UN2 = aggregate(UN$group, list(UN$Celltypes,UN$group,UN$State), length)
  names(UN2) = c("Sharing","Group","State","Number")
  for (state in c("prom","trans","enh")){
    UN2$Number[which(UN2$Group==state)]
  }

  median(UN2$Number[which(UN2$Group=="enh" & UN2$Sharing=="Unique")]/(UN2$Number[which(UN2$Group=="enh" & UN2$Sharing!="Unique")]+ UN2$Number[which(UN2$Group=="enh" & UN2$Sharing=="Unique")]))
  median(UN2$Number[which(UN2$Group=="prom" & UN2$Sharing=="Unique")]/(UN2$Number[which(UN2$Group=="prom" & UN2$Sharing!="Unique")]+ UN2$Number[which(UN2$Group=="prom" & UN2$Sharing=="Unique")]))

  ### summaries
  aggregate(tbl$Pvalue<pt & tbl$OR<1, list(tbl$disease), sum)
  aggregate(tbl$Pvalue<pt & tbl$OR>1, list(tbl$disease), sum)



}