Skip to content
Snippets Groups Projects
Commit 4d78f274 authored by Alex's avatar Alex
Browse files

added matrix to notebook

parent 73251920
No related branches found
No related tags found
No related merge requests found
......@@ -16,7 +16,7 @@ output_file:
mwm_input_template: "/mwminputAnc"
mwm_output_template: "/mwmoutputAnc"
contig_template: "/contigAnc"
dcj_output_path: "/dcj_output.txt"
dcj_output_path: "/dcj_output"
dcj_summary_path: "/dcj_summary.txt"
# input path
......
......@@ -16,7 +16,7 @@ output_file:
mwm_input_template: "/mwminputAnc"
mwm_output_template: "/mwmoutputAnc"
contig_template: "/contigAnc"
dcj_output_path: "/dcj_output.txt"
dcj_output_path: "/dcj_output"
dcj_summary_path: "/dcj_summary.txt"
# input path
......
......@@ -93,11 +93,21 @@ class GeneFamily:
else:
# median_structure.append(line.rstrip().split("\t"))
newick_structure = line.rstrip()
median_structure = self.convert_newick(newick_structure, all_leaves)
conversion_input = newick_structure[:]
# print(newick_structure)
# print(type(conversion_input))
# if newick structure is in the form "genomeName_genomeID" comment out for-loop if it's just genomeID
for genome in all_leaves:
genome_string = (str(genome[1]) + "_")
conversion_input = conversion_input.replace(genome_string, "")
# print(conversion_input)
median_structure = self.convert_newick(conversion_input, all_leaves)
all_leaves.sort()
print(median_structure)
# print(median_structure)
return all_leaves, median_structure, newick_structure
......
This diff is collapsed.
......@@ -71,23 +71,30 @@ def main():
print("Start")
directory = parsed_config["output_path"] + parsed_config["project_name"] + "/"
directory = parsed_config["output_path"] + parsed_config["project_name"]
os.makedirs(directory, exist_ok=True)
# STEP 1: Reading in Genomes and Newick Tree Structure
print("STEP 1: Reading in Genomes and Newick Tree Structure")
gene_family = GeneFamily(parsed_config)
all_leaves, median_structure, newick_structure = gene_family.get_leaves_and_tree_info()
# print(all_leaves)
# print(median_structure)
print("Step 1 DONE")
# STEP 2: Creating Gene Families
print("STEP 2: Creating Gene Families")
gene_family.make_gene_family(all_leaves)
print("Step 2: DONE")
# STEP 3: Representing Genomes by Gene Family Labels
print("STEP 3: Representing Genomes by Gene Family Labels")
initialize_genome = Genome(gene_family.gene_list, all_leaves, parsed_config)
genome = initialize_genome.get_genome_in_string()
print("Step 4: DONE")
# STEP 4-6: Generating Maximum Weight Matching Input, Performing MWMatching, Constructing Ancestral Contigs
# STEPS 4-6: Generating Maximum Weight Matching Input, Performing MWMatching, Constructing Ancestral Contigs
print("STEPS 4-6: Generating Maximum Weight Matching Input, Performing MWMatching, Constructing Ancestral Contigs")
# Parameters for Generating Maximum Weight Matching Inputs
window_size = parsed_config['ws']
......@@ -98,56 +105,72 @@ def main():
# tree_node = num_genomes - 2
# print(num_genomes, tree_node, window_size, num_gene_families)
print("Start MWM Input")
input_tree_node = MWMInputTreeNode(genome, all_leaves)
anc = 1
dcj_files = ""
start_path = parsed_config['output_path'] + parsed_config['project_name']
dcj_files = []
for structure in median_structure:
print(structure)
# print(structure)
# output file paths for Maximum Weight Matching Input, Maximum Weight Matching Output and Ancestral Contigs
outfile_mwmin = start_path + parsed_config["output_file"]["mwm_input_template"] + str(anc) + ".txt"
outfile_mwmout = start_path + parsed_config["output_file"]["mwm_output_template"] + str(anc) + ".txt"
outfile_contig = start_path + parsed_config["output_file"]["contig_template"] + str(anc) + ".txt"
outfile_mwmin = directory + parsed_config["output_file"]["mwm_input_template"] + str(anc) + ".txt"
outfile_mwmout = directory + parsed_config["output_file"]["mwm_output_template"] + str(anc) + ".txt"
outfile_contig = directory + parsed_config["output_file"]["contig_template"] + str(anc) + ".txt"
# Adding File Paths for later DCJ Computations
dcj_files += (outfile_contig + " ")
dcj_files.append(outfile_contig)
# Generating Maximum Weight Matching Input
print("Starting MWM Input Generation")
mwm_input = input_tree_node.get_mwm_input(structure, num_gene_families, window_size, min_adj_weight, outfile_mwmin)
# print("MWM Input Generation DONE")
# Performing Maximum Weight Matching
# maxWeightMatching(mwm_input, outfile_mwmout)
print("MWM DONE")
print("Starting Maximum Weight Matching")
maxWeightMatching(mwm_input, outfile_mwmout)
# print("Maximum Weight Matching DONE")
# Constructing Ancestral Contigs
print("Starting Construction of Ancestral Contig")
contig = Contig(outfile_mwmout, num_gene_families)
contig.get_edge()
list_telomeres = contig.find_telomeres()
contig_list = contig.get_contigs(list_telomeres, outfile_contig, anc)
# print("Contig Construction DONE")
anc += 1
# STEP 7: Calculating DCJ Distance Between Ancestral Contigs
print("STEP 7: Calculating DCJ Distance Between Ancestral Contigs")
jar_path = parsed_config['input_file']['jar_path']
dcj_output_path = start_path + parsed_config['output_file']['dcj_output_path']
java_command = "java -jar " + jar_path + "/UniMoG-java11.jar " + dcj_files + "-m=1 -p > " + dcj_output_path
os.system(java_command)
with open(dcj_output_path, 'r') as f:
dcj_info = f.readlines()
dcj_summary = start_path + parsed_config['output_file']['dcj_summary_path']
with open(dcj_summary, 'w') as dcj_file:
for i in range(len(median_structure)):
dcj_file.write("median structure for Ancestor "+str((i + 1))+":"+ "%s" % median_structure[i] + "\n")
for line in dcj_info:
if line[0] == '>':
dcj_file.write(line)
dcj_output = []
for i in range(1, anc):
for j in range(i + 1, anc):
dcj_output_path = directory + parsed_config['output_file']['dcj_output_path'] + str(i) + "_" + str(j) + ".txt"
command = "java -jar " + jar_path + "/UniMoG-java11.jar " + str(dcj_files[i - 1]) + " " + str(dcj_files[j - 1]) + " -m=1 -p >" + dcj_output_path
dcj_output.append(dcj_output_path)
os.system(command)
print("Step 7 DONE")
dcj_summary = directory + parsed_config['output_file']['dcj_summary_path']
print("Generating Summary of DCJ Calculations")
with open(dcj_summary, 'w') as dcj_summary_file:
for i in range(len(median_structure)):
dcj_summary_file.write("median structure for Ancestor "+str((i + 1))+":"+ "%s" % median_structure[i] + "\n")
for file in dcj_output:
path = file
with open(path, 'r') as dcj_file:
dcj_info = dcj_file.readlines()
dcj_summary_file.write(dcj_info[0])
print("Summary Generated")
# print(java_command)
# print(dcj_files)
......
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment