#!/bin/bash

chr=${LSB_JOBINDEX}
if [ $chr -eq 23 ]
then 
chr="X"
fi
ANNOTATION_DIR=/lustre/scratch119/humgen/projects/uk10k/users/vi1/histone_modifications/newdata/H3K4me3

## BED_FILES points to the locations of the .bed files you want to use
BED_FILES=$ANNOTATION_DIR/*narrowPeak.gz

OUTPUT_DIR=/lustre/scratch114/teams/soranzo/users/vi1/ldsr/annotations_ldsr_h3k4me3/$chr
mkdir -p $OUTPUT_DIR

zcat /lustre/scratch119/humgen/projects/uk10k/users/vi1/enrichment_method_comparison/LDSR/data/cell_type_groups/CNS.$chr.annot.gz |awk '{print $1,$2}' > $OUTPUT_DIR/variants
INFO=$OUTPUT_DIR/variants  #$VARIANTS_DIR/chr${chr}_CSQ_2cols.txt
i=0

## loop over annotation files
for f in $BED_FILES
do
i=$[$i+1]
## number of columns in the .bed file
NCOL=$(zcat $f |head -1 | awk '{print NF}')
## make a local copy removing possible headers
zcat $f |awk -v chr=$chr '$1=="chr"chr {print}' | sort -k2n  > $OUTPUT_DIR/tmp.$chr.$i.bed

echo "Processing $f file"
## running annotation part
/nfs/users/nfs_v/vi1/annotation_code/annotation_code_v4 --ncol $NCOL --o $OUTPUT_DIR/tmp_chr${chr}.$i --peaks $OUTPUT_DIR/tmp.$chr.$i.bed --norsid --chunk 1000 --info $INFO
## merge
#cp $OUTPUT_DIR/chr${chr} $OUTPUT_DIR/tmp0_chr${chr}
awk '{print $4}' $OUTPUT_DIR/tmp_chr${chr}.$i > $OUTPUT_DIR/chr$chr.$i
## clean up
rm $OUTPUT_DIR/tmp_chr${chr}.$i
rm $OUTPUT_DIR/tmp.$chr.$i.bed
done



if [ $chr -eq 22 ]
then 

### create link_file.txt (needed for running GARFIELD)
LINK_FILE=$OUTPUT_DIR/link_file.txt
echo "Index Annotation Celltype Tissue Type Category" > $LINK_FILE
i=-1
for f in $BED_FILES;do i=$[$i+1]; echo $i $f "NA" "NA" "NA" "NA"  >> $LINK_FILE ; done
fi

## reformat data for GARFIELD usage
#for f in $OUTPUT_DIR/chr*
#do
#       cat $f > $f.tmp
#       paste -d" " <(awk '{print $2}' $f.tmp | sed 1d) <(awk '{$1=$2=""; print $0}' $f.tmp | awk '{ gsub("\t",""); print;}' | awk '{ gsub(" ",""); print;}'| sed 1d) > $f
       #rm $f.tmp
#done

#bsub -J"ldann2[1-23]" -P uk10k -q normal -M 200 -R'rusage[mem=200] select[mem>200]' -o logs/log.annotate.v2.%I -- ./annotate_script_ldsr_h3k4me3.sh
