#!/bin/bash

# Liu CD UC IBD

INDIR=/lustre/scratch119/humgen/projects/uk10k/users/vi1/GWASdatasets/IIBDGC
OUTDIR=/lustre/scratch119/humgen/projects/uk10k/users/vi1/GWASdatasets/Sept2016
i=0
Ns=( 20550 38197 17647 )
for infile in EUR.CD.gwas_info03_filtered.assoc  EUR.IBD.gwas_info03_filtered.assoc  EUR.UC.gwas_info03_filtered.assoc
do
i=$[${i}+1]
N=${Ns[$[${i}-1]]}
mkdir -p $OUTDIR/$infile.dir
OUT=$OUTDIR/$infile.dir/$infile.hg19.clean.unified.formatting
echo "chr pos rsid allele1 allele2 beta se Zscore Pvalue Ncase Nctrl freq" | sed 's/ /\t/g' > $OUT
sed 1d $INDIR/$infile | awk -v FS="\t" -v OFS="\t" -v N=$N '{print $1,$3,$2,$4,$5,log($9),$10,log($9)/$10, $11,N,48495, $7 }' >> $OUT
echo "$infile done"
done

for infile in EUR.CD.gwas_info03_filtered.assoc  EUR.IBD.gwas_info03_filtered.assoc  EUR.UC.gwas_info03_filtered.assoc
do
OUT=$OUTDIR/$infile.dir/$infile.hg19.clean.unified.formatting
head -1 $OUT > $OUT.2
sed 1d $OUT | sort -k1,1n -k2,2n >> $OUT.2
echo "$infile done"
done

for infile in EUR.CD.gwas_info03_filtered.assoc  EUR.IBD.gwas_info03_filtered.assoc  EUR.UC.gwas_info03_filtered.assoc
do
OUT=$OUTDIR/$infile.dir/$infile.hg19.clean.unified.formatting
mv $OUT $OUT.0
mv $OUT.2 $OUT
echo "$infile done"
done


#################

for file in LDL_ONE_Europeans.tbl HDL_ONE_Europeans.tbl TC_ONE_Europeans.tbl TG_ONE_Europeans.tbl GIANT_BMI_Speliotes2010_publicrelease_HapMapCeuFreq.txt GIANT_HEIGHT_LangoAllen2010_publicrelease_HapMapCeuFreq.txt GIANT_WHRadjBMI_Heid2010_publicrelease_HapMapCeuFreq.txt MAGIC_2hrGlucose_AdjustedForBMI.txt MAGIC_FastingGlucose.txt MAGIC_HbA1C.txt MAGIC_ln_FastingInsulin.txt MAGIC_ln_fastingProinsulin.txt MAGIC_ln_HOMA-B.txt MAGIC_ln_HOMA-IR.txt DIAGRAMv3.2012DEC17.txt cd-meta.txt ucmeta-sumstats.txt
do

bsub -q normal -o logs/log.$file -M 5000 -R'rusage[mem=5000] select[mem>5000]' -- ./gwas_rsid_to_hg19_coordinates.sh $file

done


INDIR=/lustre/scratch113/projects/uk10k/users/vi1/GWASdatasets/Sept2016
for infile in HDL_ONE_Europeans.tbl TC_ONE_Europeans.tbl TG_ONE_Europeans.tbl # LDL_ONE_Europeans.tbl 
do
OUT=$INDIR/$infile.dir/$infile

sed 1d $OUT.hg19.clean | awk '{print $1,$2,$3,$10,$11,"NA","NA",$13,$14,$12}' > $OUT.tmp
echo "chr pos rsid allele1 allele2 beta se Zscore Pvalue N freq" | sed 's/ /\t/g' > $OUT.hg19.clean.unified.formatting
for chr in {1..22};do
awk 'NR==FNR {a[$1]=$1;b[$1]=$2;next} {if ($2 in a) print $0,b[$2]; else print $0,0;}' /lustre/scratch113/projects/uk10k/users/vi1/garfield-data0/maftssd/chr$chr <(awk -v chr=$chr '$1==chr' $OUT.tmp) | sed 's/ /\t/g' >> $OUT.hg19.clean.unified.formatting
echo $chr
done
rm $OUT.tmp

echo "$infile done"
done


INDIR=/lustre/scratch113/projects/uk10k/users/vi1/GWASdatasets/Sept2016
for infile in GIANT_BMI_Speliotes2010_publicrelease_HapMapCeuFreq.txt GIANT_HEIGHT_LangoAllen2010_publicrelease_HapMapCeuFreq.txt GIANT_WHRadjBMI_Heid2010_publicrelease_HapMapCeuFreq.txt
do
OUT=$INDIR/$infile.dir/$infile

sed 1d $OUT.hg19.clean | awk '{print $1,$2,$3,$10,$11,"NA","NA","NA",$13,$14}' > $OUT.tmp
echo "chr pos rsid allele1 allele2 beta se Zscore Pvalue N freq" | sed 's/ /\t/g' > $OUT.hg19.clean.unified.formatting
for chr in {1..22};do
awk 'NR==FNR {a[$1]=$1;b[$1]=$2;next} {if ($2 in a) print $0,b[$2]; else print $0,0;}' /lustre/scratch113/projects/uk10k/users/vi1/garfield-data0/maftssd/chr$chr <(awk -v chr=$chr '$1==chr' $OUT.tmp) | sed 's/ /\t/g' >> $OUT.hg19.clean.unified.formatting
echo $chr
done
rm $OUT.tmp

echo "$infile done"
done


########
INDIR=/lustre/scratch113/projects/uk10k/users/vi1/GWASdatasets/Sept2016

infiles=( "MAGIC_2hrGlucose_AdjustedForBMI.txt" "MAGIC_FastingGlucose.txt" "MAGIC_HbA1C.txt" "MAGIC_ln_FastingInsulin.txt" "MAGIC_ln_fastingProinsulin.txt" "MAGIC_ln_HOMA-B.txt" "MAGIC_ln_HOMA-IR.txt" )
Ns=( 15234 46186 46368 38238 10701 36466 37037 )
for i in {1..7}
do
infile=${infiles[$[${i}-1]]}
N=${Ns[$[${i}-1]]}
OUT=$INDIR/$infile.dir/$infile

echo "chr pos rsid allele1 allele2 beta se Zscore Pvalue N freq" | sed 's/ /\t/g' > $OUT.hg19.clean.unified.formatting
sed 1d $OUT.hg19.clean | awk -v N=$N '{z=$13/$14; printf "%s %s %s %s %s %s %s %.6f %s %s %.4f\n" ,$1,$2,$3,$10,$11,$13,$14,z,$15,N,$12}' | sed 's/ /\t/g' >> $OUT.hg19.clean.unified.formatting

echo "$infile done"
done


### Haemgen data

for trait in MCHC MCH MCV RBC PCV; do
INDIR=/nfs/team151_data03/PublicData/GWAS_summary_stats/HaemGen/Red_Cell_Traits_Nature2012/

infile=$INDIR/$trait.txt.b37
infile2=$INDIR/HaemGenRBC_$trait.txt.gz
OUTDIR=/lustre/scratch113/projects/uk10k/users/vi1/GWASdatasets/Sept2016
mkdir -p $OUTDIR/$trait.b37.dir
OUT=$OUTDIR/$trait.b37.dir/$trait.b37

echo "chr pos rsid allele1 allele2 beta se Zscore Pvalue N freq" | sed 's/ /\t/g' > $OUT.hg19.clean.unified.formatting
awk 'NR==FNR {a[$2]=$2;b[$2]=$3;c[$2]=$5;next;} $1 in a {z=$8/$9; printf "%s %s %s %s %s %s %s %.6f %s %s\n" ,b[$1],c[$1],$1,$5,$6,$8,$9, z,$2,$7}' <(sed 1d $infile) <(zcat $infile2|sed 1d) | sed 's/ /\t/g' > $OUT.tmp

for chr in {1..22};do
awk 'NR==FNR {a[$1]=$1;b[$1]=$2;next} {if ($2 in a) print $0,b[$2]; else print $0,0;}' /lustre/scratch113/projects/uk10k/users/vi1/garfield-data0/maftssd/chr$chr <(awk -v chr=$chr '$1==chr' $OUT.tmp) | sed 's/ /\t/g' >> $OUT.hg19.clean.unified.formatting
echo $chr
done
rm $OUT.tmp
done

trait=HGB
trait2=Hb
INDIR=/nfs/team151_data03/PublicData/GWAS_summary_stats/HaemGen/Red_Cell_Traits_Nature2012/

infile=$INDIR/$trait.txt.b37
infile2=$INDIR/HaemGenRBC_$trait2.txt.gz
OUTDIR=/lustre/scratch113/projects/uk10k/users/vi1/GWASdatasets/Sept2016
mkdir -p $OUTDIR/$trait.b37.dir
OUT=$OUTDIR/$trait.b37.dir/$trait.b37

echo "chr pos rsid allele1 allele2 beta se Zscore Pvalue N freq" | sed 's/ /\t/g' > $OUT.hg19.clean.unified.formatting
awk 'NR==FNR {a[$2]=$2;b[$2]=$3;c[$2]=$5;next;} $1 in a {z=$8/$9; printf "%s %s %s %s %s %s %s %.6f %s %s\n" ,b[$1],c[$1],$1,$5,$6,$8,$9, z,$2,$7}' <(sed 1d $infile) <(zcat $infile2|sed 1d) | sed 's/ /\t/g' > $OUT.tmp

for chr in {1..22};do
awk 'NR==FNR {a[$1]=$1;b[$1]=$2;next} {if ($2 in a) print $0,b[$2]; else print $0,0;}' /lustre/scratch113/projects/uk10k/users/vi1/garfield-data0/maftssd/chr$chr <(awk -v chr=$chr '$1==chr' $OUT.tmp) | sed 's/ /\t/g' >> $OUT.hg19.clean.unified.formatting
echo $chr
done
rm $OUT.tmp



for trait in MPV; do
INDIR=/nfs/team151_data03/PublicData/GWAS_summary_stats/HaemGen/Platelet_Traits_Nature2011/

infile=$INDIR/$trait.txt.b37 
OUTDIR=/lustre/scratch113/projects/uk10k/users/vi1/GWASdatasets/Sept2016
mkdir -p $OUTDIR/$trait.b37.dir
OUT=$OUTDIR/$trait.b37.dir/$trait.b37

echo "chr pos rsid allele1 allele2 beta se Zscore Pvalue N freq" | sed 's/ /\t/g' > $OUT.hg19.clean.unified.formatting
awk '{z=$7/$8; printf "%s %s %s %s %s %s %s %.6f %s %s\n" ,$3,$4,$2,$5,$6,$7,$8, z,$9,$11}' <(sed 1d $infile)| sed 's/ /\t/g' > $OUT.tmp

for chr in {1..22};do
awk 'NR==FNR {a[$1]=$1;b[$1]=$2;next} {if ($2 in a) print $0,b[$2]; else print $0,0;}' /lustre/scratch113/projects/uk10k/users/vi1/garfield-data0/maftssd/chr$chr <(awk -v chr=$chr '$1==chr' $OUT.tmp) | sed 's/ /\t/g' >> $OUT.hg19.clean.unified.formatting
echo $chr
done
rm $OUT.tmp
done


for trait in PLT; do
INDIR=/nfs/team151_data03/PublicData/GWAS_summary_stats/HaemGen/Platelet_Traits_Nature2011/

infile=$INDIR/$trait.txt.b37 
OUTDIR=/lustre/scratch113/projects/uk10k/users/vi1/GWASdatasets/Sept2016
mkdir -p $OUTDIR/$trait.b37.dir
OUT=$OUTDIR/$trait.b37.dir/$trait.b37

echo "chr pos rsid allele1 allele2 beta se Zscore Pvalue N freq" | sed 's/ /\t/g' > $OUT.hg19.clean.unified.formatting
awk '{z=$8/$9; printf "%s %s %s %s %s %s %s %.6f %s %s \n" ,$3,$4,$2,$5,$6,$8,$9, z,$10,$11}' <(sed 1d $infile)| sed 's/ /\t/g' > $OUT.tmp

for chr in {1..22};do
awk 'NR==FNR {a[$1]=$1;b[$1]=$2;next} {if ($2 in a) print $0,b[$2]; else print $0,0;}' /lustre/scratch113/projects/uk10k/users/vi1/garfield-data0/maftssd/chr$chr <(awk -v chr=$chr '$1==chr' $OUT.tmp) | sed 's/ /\t/g' >> $OUT.hg19.clean.unified.formatting
echo $chr
done
rm $OUT.tmp
done



for trait in CD UC; do
INDIR=/nfs/team151_data03/PublicData/GWAS_summary_stats/IBD_Genetics/iibdgc-trans-ancestry-summary-stats/

infile=$INDIR/EUR.$trait.gwas.assoc.gz
OUTDIR=/lustre/scratch113/projects/uk10k/users/vi1/GWASdatasets/Sept2016
mkdir -p $OUTDIR/EUR.$trait.dir
OUT=$OUTDIR/EUR.$trait.dir/EUR.$trait

N="NA"
echo "chr pos rsid allele1 allele2 beta se Zscore Pvalue N freq" | sed 's/ /\t/g' > $OUT.hg19.clean.unified.formatting
awk -v N=$N '{z=log($9)/$10; printf "%s %s %s %s %s %.6f %s %.6f %s %s %s\n" ,$1,$3,$2,$4,$5,log($9),$10, z,$11,N,$7}' <(zcat $infile|sed 1d)| sed 's/ /\t/g' >> $OUT.hg19.clean.unified.formatting
done




INDIR=/lustre/scratch113/projects/uk10k/users/vi1/GWASdatasets/Sept2016
for infile in DIAGRAMv3.2012DEC17.txt
do
OUT=$INDIR/$infile.dir/$infile

sed 1d $OUT.hg19.clean | awk '{print $1,$2,$3,$12,$13,log($15),(log($17)-log($15))/2,2*log($15)/(log($17)-log($15)),$14,$18,$19}' > $OUT.tmp
echo "chr pos rsid allele1 allele2 beta se Zscore Pvalue Ncase Nctrl freq" | sed 's/ /\t/g' > $OUT.hg19.clean.unified.formatting
for chr in {1..22};do
awk 'NR==FNR {a[$1]=$1;b[$1]=$2;next} {if ($2 in a) print $0,b[$2]; else print $0,0;}' /lustre/scratch113/projects/uk10k/users/vi1/garfield-data0/maftssd/chr$chr <(awk -v chr=$chr '$1==chr' $OUT.tmp) | sed 's/ /\t/g' >> $OUT.hg19.clean.unified.formatting
echo $chr
done
rm $OUT.tmp

echo "$infile done"
done


INDIR=/lustre/scratch119/humgen/projects/uk10k/users/vi1/GWASdatasets/Sept2016
infile="SCZ"

OUT=$INDIR/$infile.dir/$infile

zcat /nfs/team151_data03/PublicData/GWAS_summary_stats/Schizophrenia/ckqny.scz2snpres.gz |sed 1d| awk '{print substr($1,4), $5,$2,$3,$4,log($7),$8, log($7)/$8, $9, "36989", "113075", freq}' > $OUT.tmp 

echo "chr pos rsid allele1 allele2 beta se Zscore Pvalue Ncase Nctrl freq" | sed 's/ /\t/g' > $OUT.hg19.clean.unified.formatting

for chr in {1..22};do
awk 'NR==FNR {a[$1]=$1;b[$1]=$2;next} {if ($2 in a) print $0,b[$2]; else print $0,0;}' /lustre/scratch119/humgen/projects/uk10k/users/vi1/garfield-data0/maftssd/chr$chr <(awk -v chr=$chr '$1==chr' $OUT.tmp) | sed 's/ /\t/g' >> $OUT.hg19.clean.unified.formatting
echo $chr
done
rm $OUT.tmp

echo "$infile done"
