# This is a sample Python script.

# Press Shift+F10 to execute it or replace it with your code.
# Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings.
import re
import pandas as pd
import argparse
import math

# find chr
def find_chr_num(text):
    pat = r'chr(.*)'
    prog = re.compile(pat)
    match = prog.search(text)
    return str(match.group(1))


# find dp rd and ad
def find_RD_AD_AF(text):
    pat = r'DP=(.*);AF=(.*);(.*);(.*)'
    prog = re.compile(pat)
    match = prog.search(text)
    (dp, af) = (match.group(1), match.group(2))
    ad = int(dp) * float(af)
    ad=math.floor(ad)
    rd = int(dp) - ad
    return (str(rd), str(ad),str(af))


# header for dat
header = 'CHROM\tPOS\tREF\tVAR\tNRM_REF_READS\tNRM_VAR_READS\tNRM_VAF'

# load varscan file
f_var = open('EGAF00000057353.mutect.sclust.vcf', 'r')
df_var = pd.read_csv(f_var, sep='\t', header=None, comment='#')
f_var_rows = df_var.shape[0]

# load iCN.seg file
f_seg = open('../EGAF00000057353/sclust/EGAF00000057353_iCN.seg', 'r')
df_seg = pd.read_csv(f_seg, sep='\t', header=None)
f_seg_rows = df_seg.shape[0]

# write
with open('EGAF00000057353.dat', 'w') as t:
    t.write(header + '\n')
    for i in range(f_var_rows):
        # get information
        chr = find_chr_num(df_var[0][i])
        pos = df_var[1][i]
        ref = df_var[3][i]
        alt = df_var[4][i]
        (rd, ad,vaf) = find_RD_AD_AF(df_var[7][i])
        # write information
        t.write(chr + '\t' + str(pos) + '\t' + str(ref) + '\t' + str(alt) + '\t' + rd + '\t' +
                ad + '\t' + vaf + '\n')

with open('EGAF00000057353_copy_number', 'w') as t1:
    for i in range(f_seg_rows):
        # get information
        chr = find_chr_num(df_seg[1][i])
        pos = str(df_seg[2][i])
        end = str(df_seg[3][i])
        alt = str(df_seg[4][i])
        cn = str(df_seg[5][i])
        t1.write(chr + '\t' + pos + '\t' + end + '\t' + alt + '\t' + cn + '\n')
print('he')
