Skip to content
Snippets Groups Projects
Commit 3572f3f4 authored by Hernandez-Courbevoie Yohan's avatar Hernandez-Courbevoie Yohan
Browse files

big progress

parent 12527a92
Branches handle_custom_taxonomy
No related tags found
No related merge requests found
......@@ -29,7 +29,7 @@ def main():
primary_taxonomy=ta.parse_taxonomy_simple_file(args.taxonomy)
if not (args.peptide_table or args.fasta or args.directory or args.limit):
print("Missing input")
# print("Missing input")
sys.exit()
# parsing models for peptide tables, sequences or limits
......@@ -47,8 +47,9 @@ def main():
secondary_taxonomy, lost_taxid=primary_taxonomy.intersection(set_of_taxid)
if len(lost_taxid)>0:
print("The following taxids were not found in "+args.taxonomy)
print(lost_taxid)
pass
# print("The following taxids were not found in "+args.taxonomy)
# print(lost_taxid)
ta.table_print(secondary_taxonomy)
ta.create_taxonomy_file(secondary_taxonomy, args.output)
......
......@@ -9,7 +9,10 @@ from config import load_config
import csv
import re
def create_table(table):
def create_table(table, verbose=False):
config = load_config()
sql = open(sys.path[0]+"/pampa.sql", "r").read().replace("\n", "").replace("taxonomy", table)
try:
......@@ -18,16 +21,17 @@ def create_table(table):
cur.execute(sql, (table))
conn.commit()
except (Exception, psycopg2.DatabaseError) as error:
if verbose:
print(error)
def load_file(path, run_id=None):
def load_file(path, run_id=None, verbose=False):
# idx=0
if run_id :
runid_pattern = re.compile("^[A-Z][a-z]{2}_[0-9]{2}_[0-9]{4}_[0-9]{2}_[0-9]{2}_[0-9]{2}_[0-9]{1,5}$|^all$|^reduced$") # Jul_05_2024_12_35_25_5220 or "reduced" or "all"
assert runid_pattern.match(run_id)
table = "taxonomy_" + run_id
create_table(table)
create_table(table, verbose=verbose)
else :
table = "taxonomy"
with open(path, 'r') as taxo_file:
......@@ -42,12 +46,12 @@ def load_file(path, run_id=None):
# parentId = line[3]
# rank = line[4]
# print(line)
insert_data(table, line)
insert_data(table, line, verbose=verbose)
#add_root()
#re_arrange()
#add_root(verbose=verbose)
#re_arrange(verbose=verbose)
def insert_data(table, raw):
def insert_data(table, raw, verbose=False):
config = load_config()
taxoId = raw[0]
commonName = raw[1]
......@@ -66,9 +70,10 @@ def insert_data(table, raw):
cur.execute(sql, (taxoId, commonName, scientificName, parentId, rank))
conn.commit()
except (Exception, psycopg2.DatabaseError) as error:
if verbose:
print(error)
def add_root():
def add_root(verbose=False):
config = load_config()
insert = "INSERT INTO taxonomy(taxonid, commonname) VALUES(%s,%s)" # update with table name as parameter
update = "UPDATE taxonomy set parent=-1 WHERE rank='subphylum'"
......@@ -79,9 +84,10 @@ def add_root():
cur.execute(update)
conn.commit()
except (Exception, psycopg2.DatabaseError) as error:
if verbose:
print(error)
def re_arrange():
def re_arrange(verbose=False):
config = load_config()
insert = "INSERT INTO taxonomy(taxonid, scientificname, parent) VALUES(%s,%s,%s)" # update with table name as parameter
update = "UPDATE taxonomy set parent=0 WHERE scientificname like '%unclassified %'"
......@@ -92,11 +98,14 @@ def re_arrange():
cur.execute(update)
conn.commit()
except (Exception, psycopg2.DatabaseError) as error:
if verbose:
print(error)
if __name__ == '__main__':
if len(sys.argv) == 2:
load_file(sys.argv[1]) # taxonomy_path
else:
elif len(sys.argv) == 3:
load_file(sys.argv[1], run_id=sys.argv[2]) # taxonomy_path, run_id
elif len(sys.argv) == 4:
load_file(sys.argv[1], run_id=sys.argv[2], verbose=True) # 3e params to pass verbose to True
\ No newline at end of file
......@@ -10,8 +10,11 @@ import json
import re
def execute(taxo_path, run_id=None):
print("Generating taxonomy JSON file...")
def execute(taxo_path, run_id=None, verbose=False):
# print("Generating taxonomy JSON file...")
config = load_config()
if run_id :
runid_pattern = re.compile("^[A-Z][a-z]{2}_[0-9]{2}_[0-9]{4}_[0-9]{2}_[0-9]{2}_[0-9]{2}_[0-9]{1,5}$|^all$|^reduced$") # Jul_05_2024_12_35_25_5220 or "reduced" or "all"
......@@ -23,16 +26,17 @@ def execute(taxo_path, run_id=None):
try:
with psycopg2.connect(**config) as conn:
with conn.cursor() as cur:
create_json(cur, table, taxo_path)
create_json(cur, table, taxo_path, verbose=verbose)
# cur.execute(sql, (table))
# rows = cur.fetchall()
# for row in rows:
# print("taxonId =", row[0])
except (Exception, psycopg2.DatabaseError) as error:
if verbose:
print(error)
print("Done.")
# print("Done.")
def create_json(cur, table, taxo_path):
def create_json(cur, table, taxo_path, verbose=False):
json_array = [ ]
json_root = { "id" : 0, "text" : "Taxonomy", "children" : []}
children = [] # 1 seul element a racine de l'arbre!
......@@ -40,7 +44,7 @@ def create_json(cur, table, taxo_path):
json_root["state"] = state
# on construit children en ajoutant l'element racine -> taxonId = -1 (root)
add_childs(cur, table, json_root, 89593)
add_childs(cur, table, json_root, 89593, verbose=verbose)
json_array.append(json_root)
#print(json_array)
......@@ -50,7 +54,7 @@ def create_json(cur, table, taxo_path):
#f = open("taxonomy_mammals.json")
#data = json.load(f)
def add_childs(cur, table, json, id):
def add_childs(cur, table, json, id, verbose=False):
sql = "SELECT * from "+table+" WHERE parent = " + str(id)
# print("Building childs for taxon", id)
# print(sql)
......@@ -73,11 +77,14 @@ def add_childs(cur, table, json, id):
json["children"] = childrens
except (Exception, psycopg2.DatabaseError) as error:
if verbose:
print(error)
# return json
if __name__ == '__main__':
if len(sys.argv) == 2:
execute(sys.argv[1]) # dir path for the taxo json created
else:
elif len(sys.argv) == 3:
execute(sys.argv[1], sys.argv[2]) # path, run_id
elif len(sys.argv) == 4:
execute(sys.argv[1], sys.argv[2], verbose=True) # 3e params to pass verbose to True
#!/usr/bin/env python3
import argparse
import sys
import os
# local import
from src import peptide_table as pt
from src import sequences as seq
from src import taxonomy as ta
from src import markers
from src import limit
from src import fasta_parsing as fa
#from src import message
def main():
parser = argparse.ArgumentParser()
parser.add_argument("-p", dest="peptide_table",nargs='+', help="Peptide table (TSV file).", type=str)
parser.add_argument("-o", dest="output", help="Output path (should include the output file name)", type=str)
parser.add_argument("-f", dest="fasta", help="FASTA file that contains new sequences.", type=str)
parser.add_argument("-d", dest="directory", help="Directory that contains FASTA files.", type=str)
parser.add_argument("-l", dest="limit", help="Limit file that contains a list of clades (OX=)", type=str)
parser.add_argument("-t", dest="taxonomy", help="Taxonomy (TSV file)", type=str, required=True)
args = parser.parse_args()
# parsing taxonomy
primary_taxonomy=ta.parse_taxonomy_simple_file(args.taxonomy)
if not (args.peptide_table or args.fasta or args.directory or args.limit):
print("Missing input")
sys.exit()
# parsing models for peptide tables, sequences or limits
set_of_taxid=set()
if args.peptide_table :
set_of_markers = pt.parse_peptide_tables(args.peptide_table,None, primary_taxonomy)
set_of_taxid.update({m.taxid for m in set_of_markers})
if args.fasta or args.directory:
set_of_sequences = fa.build_set_of_sequences(args.fasta, args.directory, None, primary_taxonomy)
set_of_taxid.update({s.taxid for s in set_of_sequences})
if args.limit:
list_of_constraints=limit.parse_limits(args.limit)
set_of_taxid.update({t for dict in list_of_constraints for t in dict["OX"]})
secondary_taxonomy, lost_taxid=primary_taxonomy.intersection(set_of_taxid)
if len(lost_taxid)>0:
print("The following taxids were not found in "+args.taxonomy)
print(lost_taxid)
ta.table_print(secondary_taxonomy)
ta.create_taxonomy_file(secondary_taxonomy, args.output)
if __name__ == "__main__":
main()
......@@ -201,7 +201,7 @@ def extract_request(form):
i += 1
if len(req['limit_OX']) > 0:
tax_groups = ["mammals", "birds", "fishes", "reptiles"]
tax_groups = ["mammals"] # "birds", "fishes", "reptiles" seront dans cette liste aussi quand ils seront ajoutés aux données par défaut
elif "taxonomic_group_selection" in form:
items = form["taxonomic_group_selection"]
......@@ -419,8 +419,10 @@ def launch_software(run_id, req):
render_result_dict["data_tables"] = req['peptides_files']
else : # 'sequences'
render_result_dict["data_dir"] = req['sequences_dir']
if "limit_file" in req:
render_result_dict["limit_file"] = req['limit_file'] # TODO
if "limit_OX" in req and len(req["limit_OX"]) > 0 and "limit_file" in req:
render_result_dict["limit"] = True
else:
render_result_dict["limit"] = True
json.dump(render_result_dict, open(json_file, "w"))
os.system(f"python render_result.py {json_file}")
......
......@@ -161,8 +161,14 @@ def insert_spectrum(resdir, spectrum_name):
def write_details_page(spectrum_name, run_id, tree_created, job_name=None):
def write_details_page(spectrum_name, run_id, taxo_info, job_name=None):
resdir = f"{common.RESULT_DIR}{run_id}/"
if taxo_info == "custom":
url_json = f"/pampa/result/{run_id}/taxonomy_{run_id}.json"
elif taxo_info == "default":
url_json = "/pampa/data_pampa/taxonomy_reduced.json"
else:
pass # taxo_info=="no" pas d'affichage de l'arbre
content = json.load(open(resdir+"out_"+run_id+".json", "r"))
content_reduced = [dct for dct in content if dct['spectrum_name'] == spectrum_name]
......@@ -206,7 +212,7 @@ def write_details_page(spectrum_name, run_id, tree_created, job_name=None):
html += ('<br>')
# Taxonomy
if tree_created :
if taxo_info != "no" :
assignment_png = assignment["lca_name"].replace(' ', '_') + ".png"
assignment_tree = assignment["lca_name"].replace(' ', '_') + ".php"
html += '<p><iframe src="' + assignment_tree + '" height="800px" width="90%"></iframe></p>'
......@@ -256,6 +262,7 @@ def write_details_page(spectrum_name, run_id, tree_created, job_name=None):
content_reduced.sort(key=lambda x: x["pvalue"])
peak_info_dict = {}
if taxo_info != "no" :
assignment_png = spectrum_name + ".png"
assignment_tree = spectrum_name + ".php"
html += '<p><iframe src="' + assignment_tree + '" height="850px" width="90%"></iframe></p>'
......@@ -276,8 +283,8 @@ def write_details_page(spectrum_name, run_id, tree_created, job_name=None):
peak_info_dict[key].append(i)
# build semi-global taxonomic tree for multi-assignation
if tree_created:
gen_sub_tree(common.RESULT_DIR, run_id, spectrum_name, assignment, color)
if taxo_info != "no":
gen_sub_tree(common.RESULT_DIR, run_id, spectrum_name, assignment, color, url_json)
# General info
......@@ -396,21 +403,14 @@ def extract_request(form):
error = 1
error_messages.append("The script need the parameter 'job_name'.")
if "tree" in form:
tree_created = form["tree"].value
if tree_created == "True":
job_name = True
elif tree_created == "False":
job_name = False
else:
error = 1
error_messages.append("The script need the parameter 'tree'.")
if "taxo" in form:
taxo_info = form["taxo"].value
else:
error = 1
error_messages.append("The script need the parameter 'tree'.")
error_messages.append("The script need the parameter 'taxo'.")
if error == 0:
write_details_page(spectrum_name, run_id, job_name, tree_created)
write_details_page(spectrum_name, run_id, taxo_info, job_name)
return error, error_messages, run_id, spectrum_name
......
......@@ -28,9 +28,9 @@ def write_main_page(result_path, run_id, error_list):
main_page.write('<a href="/pampa/form.php" class="aLoad" >Back to PAMPA classify</a>')
# Insert page footer
html += '</div></div>'
html += open(f"{common.HTML_PATH}/footer.php", "r").read()
html +='</body></html>'
main_page.write('</div></div>')
main_page.write(open(f"{common.HTML_PATH}/footer.php", "r").read())
main_page.write('</body></html>')
main_page.close()
......
......@@ -27,9 +27,9 @@ def write_main_page(run_id):
main_page.write('<a href="/pampa/form.php" class="aLoad" >Back to PAMPA classify</a>')
# Insert page footer
html += '</div></div>' # center - main
html += open(f"{common.HTML_PATH}/footer.php", "r").read()
html +='</body></html>'
main_page.write('</div></div>') # center - main
main_page.write(open(f"{common.HTML_PATH}/footer.php", "r").read())
main_page.write('</body></html>')
main_page.close()
......
......@@ -7,12 +7,20 @@ import zipfile
import operator
from render_sub_assignment import *
"""
NOTES
- en ajoutant de nouvelles tables de peptides par défaut autres que mammals dans nos données, il faut adapter dans la partie de génération de l'arbre taxonomique section 'reduction by taxonomic groups' la vérification que toutes les tables (groupes taxonomiques) ont été inclues.
"""
def href_dl(link, name):
result = f'<a href="{link}" download="{name}" target="_blank">{name}</a>'
return result
def command_builder_taxoreducer(taxonomy, output, peptides_tables=None, sequence_dir=None):
def command_builder_taxoreducer(taxonomy, output, peptides_tables=None, sequence_dir=None, limit_file=None):
if os.path.splitext(output) == "":
output += ".tsv"
command = f"/usr/bin/python {common.PAMPA_DIR}main_taxonomy_filtering.py -t {taxonomy} -o {output}"
......@@ -20,6 +28,8 @@ def command_builder_taxoreducer(taxonomy, output, peptides_tables=None, sequence
command += f" -p {' '.join(peptides_tables)}"
if sequence_dir:
command += f" -d {sequence_dir}"
if limit_file:
command += f" -l {limit_file}"
return command
def zip_results(file_names, run_id, job_name=None):
......@@ -61,7 +71,7 @@ def results_output(run_id, job_name=None):
return html
def assignments_output(run_id, taxo_used, tree_created, job_name=""):
def assignments_output(run_id, taxo_used, taxo_info, job_name=""):
"""Produce a HTML table showing the assignments without detail."""
html = ''
......@@ -105,14 +115,14 @@ def assignments_output(run_id, taxo_used, tree_created, job_name=""):
else:
row[0] = "-"
row[-1] = "-"
td_row = "".join(f'<td class="link_img" data-spectrumname="{spectrum_name}" data-jobname="{job_name}" data-tree="{tree_created}">{e}</td>' for e in row)
td_row = "".join(f'<td class="link_img" data-spectrumname="{spectrum_name}" data-jobname="{job_name}" data-taxo="{taxo_info}">{e}</td>' for e in row)
html += f'<tr class="{row_class[i%2]}">{td_row}</tr>'
html += '</table>'
return html
def write_main_page(run_id, taxo_used, tree_created, job_name=None):
def write_main_page(run_id, taxo_used, taxo_info, job_name=None):
"""Write a HTML page. The result page shown first when the PAMPA analysis is done."""
html = ""
......@@ -144,7 +154,7 @@ def write_main_page(run_id, taxo_used, tree_created, job_name=None):
# Show the assignments in a table
html += ('<br>')
html += (assignments_output(run_id, taxo_used, tree_created, job_name=job_name))
html += (assignments_output(run_id, taxo_used, taxo_info, job_name=job_name))
html += ('<br>')
# Display assignment thumbnails
......@@ -171,11 +181,15 @@ def write_main_page(run_id, taxo_used, tree_created, job_name=None):
open(common.RESULT_DIR + run_id + "/results.php", "w").write(html)
def gen_trees(run_id, url_json):
def gen_trees(run_id, taxo_custom=False):
json_content = json.load(open(common.RESULT_DIR + run_id + "/out_" + run_id +".json"))
if taxo_custom:
url_json = f"/pampa/result/{run_id}/taxonomy_{run_id}.json"
else:
url_json = "/pampa/data_pampa/taxonomy_reduced.json"
for assignment in json_content:
assign = assignment["lca_name"]
gen_sub_tree(common.RESULT_DIR, run_id, assign.replace(" ", "_"), assignment, "#0000FF", url_json)
gen_sub_tree(common.RESULT_DIR, run_id, assign.replace(" ", "_"), assignment, "#0000FF", url_json=url_json)
# Main program
......@@ -191,10 +205,10 @@ def main():
job_name = None
taxo_source = params["taxo_source"]
taxo_file = params["taxo_file"]
if "limit_taxid" in params:
limit_taxid = params['limit_taxid']
if "limit" in params:
limit = params['limit']
else:
limit_taxid = None
limit = False
if "data_tables" in params:
data_tables = params['data_tables']
data_dir = None
......@@ -218,53 +232,73 @@ def main():
# Sous-arbres taxonomiques
log_tree = open("log.txt", "a")
tree_created = taxo_used
"""if taxo_used:
info = ""
if taxo_used:
log_tree.write("PAMPA is using a taxonomy. Preparing the tree(s)...\n")
try:
if taxo_source == "default_reduced":
log_tree.write("using the taxonomy reduced\n")
if f"{common.RESULT_DIR}{run_id}/{os.path.basename(common.PEPTIDES_MAMMALS_FILE)}" in data_tables: # if taxo selection by group (can't be tested now because we only have the mammals table)
if f"{common.RESULT_DIR}{run_id}/{os.path.basename(common.PEPTIDES_MAMMALS_FILE)}" not in data_tables: # if taxo selection by group
# reduction by taxonomic groups
log_tree.write("reduction by taxonomic groups\n")
taxo_reduced_file = f"{common.RESULT_DIR}{run_id}/taxonomy_{run_id}.tsv"
os.system(command_builder_taxoreducer(taxo_file, taxo_reduced_file, peptides_tables=data_tables))
log_tree.write(f"reduce passed\n")
os.system(f"/usr/bin/python {common.POSTGRESQL_DIR}fill_taxo.py {taxo_reduced_file} {run_id}")
log_tree.write(f"bdd passed\n")
os.system(f"/usr/bin/python {common.POSTGRESQL_DIR}json_taxo.py {common.RESULT_DIR+run_id} {run_id}")
log_tree.write(f"json passed\n")
url_json = f"{common.RESULT_DIR}{run_id}/taxonomy_{run_id}.json"
elif False: # TODO if limits taxo
command_reduce = command_builder_taxoreducer(taxo_file, taxo_reduced_file, peptides_tables=data_tables)
log_tree.write(f"reduce taxo: {command_reduce}\n")
os.system(command_reduce) # reduces the taxonomy from the taxonomy_reduced.tsv
command_bdd = f"/usr/bin/python {common.POSTGRESQL_DIR}fill_taxo.py {taxo_reduced_file} {run_id}"
log_tree.write(f"bdd: {command_bdd}\n")
os.system(command_bdd) # creates a BDD from the taxonomy reduced previously
command_json = f"/usr/bin/python {common.POSTGRESQL_DIR}json_taxo.py {common.RESULT_DIR+run_id} {run_id}"
log_tree.write(f"json creation: {command_json}\n")
os.system(command_json) # creates a json from the BDD
taxo_custom = True
elif limit: # if limits taxo from the limit_file - reduction by limit file
log_tree.write("reduction by limit file\n")
pass # TODO r(limits,taxo_reduced) / b / d
else : # le fichier taxo_reduced est utilisable tel quel
limit_file = f"{common.RESULT_DIR}{run_id}/limitfile.txt"
taxo_reduced_file = f"{common.RESULT_DIR}{run_id}/taxonomy_{run_id}.tsv"
command_reduce = command_builder_taxoreducer(taxo_file, taxo_reduced_file, limit_file=limit_file)
log_tree.write(f"reduce taxo: {command_reduce}\n")
os.system(command_reduce) # reduces the taxonomy from the taxonomy_reduced.tsv
command_bdd = f"/usr/bin/python {common.POSTGRESQL_DIR}fill_taxo.py {taxo_reduced_file} {run_id}"
log_tree.write(f"bdd: {command_bdd}\n")
os.system(command_bdd) # creates a BDD from the taxonomy reduced previously
command_json = f"/usr/bin/python {common.POSTGRESQL_DIR}json_taxo.py {common.RESULT_DIR+run_id} {run_id}"
log_tree.write(f"json creation: {command_json}\n")
os.system(command_json) # creates a json from the BDD
taxo_custom = True
else : # le fichier taxo_reduced est utilisable tel quel - no reduction
log_tree.write("no reduction\n")
url_json = common.DATA_PAMPA_DIR+"taxonomy_reduced.json"
taxo_custom = False
else:
pass
# TODO récupère le type de donnnées (table ou séquences)
# TODO Récupère les noms de fichiers
# TODO fait la r(data; taxo_all) / b / d
log_tree.write(f"gentree start\n")
url_json = common.DATA_PAMPA_DIR+"taxonomy_reduced.json"
gen_trees(run_id, url_json)
log_tree.write(f"gentree passed\n")
# TODO Attention gentrees va échouer pour toutes conditions impliquant une réduction taxonomique sur fichier limite (la réduction supprime les feuilles)
gen_trees(run_id, taxo_custom=taxo_custom)
except:
tree_created = False
log_tree.write(f"Tree generation failed.\n")
else:
tree_created = False
log_tree.write(f"\nNo tree was generated because PAMPA did not use any taxonomy for its analysis.\n")"""
log_tree.write(f"\nNo tree was generated because PAMPA did not use any taxonomy for its analysis.\n")
log_tree.close()
url_json = common.DATA_PAMPA_DIR+"taxonomy_reduced.json"
gen_trees(run_id, url_json)
#gen_thumbs(common.RESULT_DIR, run_id)
write_main_page(run_id, taxo_used, tree_created, job_name=job_name)
if tree_created:
if taxo_custom:
taxo_info = "custom"
else:
taxo_info = "default"
else:
taxo_info = info
write_main_page(run_id, taxo_used, taxo_info, job_name=job_name)
main()
......
......@@ -25,7 +25,8 @@ def gen_sub_tree(chemin_result, run_id, assign_name, assignment, color, url_json
assignment_file.write("<script src='/pampa/js/jstree/jstree.min.js'></script>")
assignment_file.write("<link style='text/css' rel='stylesheet' href='/Style/css/bioinfo.css'>")
# url_json = "/pampa/result/" + run_id + "/" + assign_name + ".json"
# url_json = "/pampa/result/" + run_id + "/" + taxo_corename + ".json"
# url_json = "/pampa/data_pampa/taxonomy_reduced.json"
jstree_function = """
<script type="text/javascript">
......
......@@ -26,7 +26,7 @@ $(document).on("click", ".link_img", function() {
formData.append("run_id", curitem[5]);
formData.append("spectrum_name", $(this).data("spectrumname"));
formData.append("job_name", $(this).data("jobname"));
formData.append("tree", $(this).data("tree"));
formData.append("taxo", $(this).data("taxo"));
$.ajax({
url: "/cgi-bin/pampa/render_details.py",
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment