Skip to content
Snippets Groups Projects
Commit b2bb93a9 authored by Hernandez-Courbevoie Yohan's avatar Hernandez-Courbevoie Yohan
Browse files

Progress so far

parent a844421e
No related branches found
No related tags found
No related merge requests found
......@@ -89,20 +89,12 @@ RUN chown -R www-data:www-data /var/www/cgi-bin/pampa
RUN chown -R www-data:www-data /var/www/html/pampa
# Copie de la bdd
COPY ./postgresql/pampa.sql /tmp/
RUN chown postgres:postgres /tmp/pampa.sql
COPY ./postgresql/create_pampa_db /tmp/
RUN chmod +x /tmp/create_pampa_db
COPY ./postgresql/config.py /tmp/
COPY ./postgresql/database.ini /tmp/
COPY ./postgresql/fill_taxo.py /tmp/
COPY ./json_taxo.py /tmp/
# A changer
COPY ./taxonomy_reduced.tsv /tmp/
COPY ./postgresql /var/www/cgi-bin/pampa/postgresql
RUN chown postgres:postgres /var/www/cgi-bin/pampa/postgresql
# Expose le port 80
EXPOSE 80
# Lancer les différents services au démmarrage (nginx, php-fpm, fastcgiwrap)
CMD service php7.4-fpm start && /etc/init.d/fcgiwrap start -f && /etc/init.d/postgresql start && /tmp/create_pampa_db && /usr/bin/python /tmp/fill_taxo.py /tmp/taxonomy_reduced.tsv && /usr/bin/python /tmp/json_taxo.py /var/www/html/pampa && chown www-data:www-data -R /var/run/fcgiwrap.socket && chmod 777 /var/run/fcgiwrap.socket && nginx -g "daemon off;"
CMD service php7.4-fpm start && /etc/init.d/fcgiwrap start -f && /etc/init.d/postgresql start && /var/www/cgi-bin/pampa/postgresql/create_pampa_db && /usr/bin/python /var/www/cgi-bin/pampa/postgresql/fill_taxo.py /var/www/html/pampa/data_pampa/taxonomy_reduced.tsv && /usr/bin/python /var/www/cgi-bin/pampa/postgresql/json_taxo.py /var/www/html/pampa/data_pampa && chown www-data:www-data -R /var/run/fcgiwrap.socket && chmod 777 /var/run/fcgiwrap.socket && nginx -g "daemon off;"
from configparser import ConfigParser
def load_config(filename='/tmp/database.ini', section='postgresql'):
def load_config(filename='/var/www/cgi-bin/pampa/postgresql/database.ini', section='postgresql'):
parser = ConfigParser()
parser.read(filename)
......
createdb -U postgres pampa;
psql -U postgres -d pampa -f /tmp/pampa.sql;
psql -U postgres -d pampa -f /var/www/cgi-bin/pampa/postgresql/pampa.sql;
psql -U postgres -d pampa -c "ALTER user postgres PASSWORD 'postgres'";
#python /tmp/fill_taxo.py /tmp/taxonomy_all.tsv
......
......@@ -7,27 +7,47 @@ from os import listdir
import psycopg2
from config import load_config
import csv
import re
def load_file(path):
idx=0
def create_table(table):
config = load_config()
sql = open("pampa.sql", "r").read().replace("\n", "").replace("taxonomy", table)
try:
with psycopg2.connect(**config) as conn:
with conn.cursor() as cur:
cur.execute(sql, (table))
conn.commit()
except (Exception, psycopg2.DatabaseError) as error:
print(error)
def load_file(path, run_id=None):
# idx=0
if run_id :
runid_pattern = re.compile("[A-Z][a-z]+_[0-9][0-9]_[0-9][0-9][0-9][0-9]_[0-9][0-9]_[0-9][0-9]_[0-9][0-9]_[0-9][0-9][0-9][0-9]") # Jul_05_2024_12_35_25_5220
assert runid_pattern.match(run_id)
table = "taxonomy_" + run_id
create_table(table)
else :
table = "taxonomy"
with open(path, 'r') as taxo_file:
tsv_file = csv.reader(taxo_file, delimiter="\t")
next(tsv_file)
for line in tsv_file:
# if idx == 10: break
idx+=1
taxoId = line[0]
commonName = line[1]
scientificName = line[2]
parentId = line[3]
rank = line[4]
print(line)
insert_data(line)
# idx+=1
# taxoId = line[0]
# commonName = line[1]
# scientificName = line[2]
# parentId = line[3]
# rank = line[4]
# print(line)
insert_data(table, line)
#add_root()
#re_arrange()
def insert_data(raw):
def insert_data(table, raw):
config = load_config()
taxoId = raw[0]
commonName = raw[1]
......@@ -38,20 +58,19 @@ def insert_data(raw):
#else:
# parentId=raw[3]
parentId = raw[3]
sql = "INSERT INTO taxonomy(taxonid, commonname, scientificname, parent, rank) VALUES(%s,%s,%s,%s,%s)"
sql = "INSERT INTO "+table+" (taxonid, commonname, scientificname, parent, rank) VALUES(%s,%s,%s,%s,%s)"
#print(sql)
try:
with psycopg2.connect(**config) as conn:
with conn.cursor() as cur:
cur.execute(sql, (taxoId, commonName, scientificName, parentId, rank))
cur.execute(sql, (table, taxoId, commonName, scientificName, parentId, rank))
conn.commit()
except (Exception, psycopg2.DatabaseError) as error:
print(error)
def add_root():
config = load_config()
insert = "INSERT INTO taxonomy(taxonid, commonname) VALUES(%s,%s)"
insert = "INSERT INTO taxonomy(taxonid, commonname) VALUES(%s,%s)" # update with table name as parameter
update = "UPDATE taxonomy set parent=-1 WHERE rank='subphylum'"
try:
with psycopg2.connect(**config) as conn:
......@@ -64,7 +83,7 @@ def add_root():
def re_arrange():
config = load_config()
insert = "INSERT INTO taxonomy(taxonid, scientificname, parent) VALUES(%s,%s,%s)"
insert = "INSERT INTO taxonomy(taxonid, scientificname, parent) VALUES(%s,%s,%s)" # update with table name as parameter
update = "UPDATE taxonomy set parent=0 WHERE scientificname like '%unclassified %'"
try:
with psycopg2.connect(**config) as conn:
......@@ -77,4 +96,7 @@ def re_arrange():
if __name__ == '__main__':
load_file(sys.argv[1])
if len(sys.argv) == 2:
load_file(sys.argv[1]) # taxonomy_path
else:
load_file(sys.argv[1], run_id=sys.argv[2]) # taxonomy_path, run_id
\ No newline at end of file
......@@ -7,17 +7,24 @@ from os import listdir
import psycopg2
from config import load_config
import json
import re
def execute(taxo_path):
def execute(taxo_path, run_id=None):
print("Generating taxonomy JSON file...")
config = load_config()
sql = "SELECT * FROM taxonomy"
if run_id :
runid_pattern = re.compile("[A-Z][a-z]+_[0-9][0-9]_[0-9][0-9][0-9][0-9]_[0-9][0-9]_[0-9][0-9]_[0-9][0-9]_[0-9][0-9][0-9][0-9]") # Jul_05_2024_12_35_25_5220
assert runid_pattern.match(run_id)
table = "taxonomy_" + run_id
else :
table = "taxonomy"
# sql = "SELECT * FROM %s"
try:
with psycopg2.connect(**config) as conn:
with conn.cursor() as cur:
create_json(cur, taxo_path)
#cur.execute(sql)
create_json(cur, table, taxo_path)
# cur.execute(sql, (table))
# rows = cur.fetchall()
# for row in rows:
# print("taxonId =", row[0])
......@@ -25,7 +32,7 @@ def execute(taxo_path):
print(error)
print("Done.")
def create_json(cur, taxo_path):
def create_json(cur, table, taxo_path):
json_array = [ ]
json_root = { "id" : 0, "text" : "Taxonomy", "children" : []}
children = [] # 1 seul element a racine de l'arbre!
......@@ -33,24 +40,25 @@ def create_json(cur, taxo_path):
json_root["state"] = state
# on construit children en ajoutant l'element racine -> taxonId = -1 (root)
add_childs(cur, json_root, 89593)
add_childs(cur, table, json_root, 89593)
json_array.append(json_root)
#print(json_array)
with open(taxo_path+'/taxonomy_mammals.json', 'w') as file:
json_name = "taxonomy_reduced.json" if table=="taxonomy" else table+".json"
with open(taxo_path+'/'+json_name, 'w') as file:
json.dump(json_array, file, indent=4)
#f = open("taxonomy_mammals.json")
#data = json.load(f)
def add_childs(cur, json, id):
sql = "SELECT * from taxonomy WHERE parent = " + str(id)
def add_childs(cur, table, json, id):
sql = "SELECT * from "+table+" WHERE parent = " + str(id)
# print("Building childs for taxon", id)
# print(sql)
childrens = []
try:
cur.execute(sql)
cur.execute(sql, (table))
rows = cur.fetchall()
idx = 0
# idx = 0
# print("rowcount",cur.rowcount)
for row in rows:
# print(row)
......@@ -69,4 +77,7 @@ def add_childs(cur, json, id):
# return json
if __name__ == '__main__':
execute(sys.argv[1])
if len(sys.argv) == 2:
execute(sys.argv[1]) # dir path for the taxo json created
else:
execute(sys.argv[1], sys.argv[2]) # path, run_id
CREATE TABLE taxonomy (
taxonId integer PRIMARY KEY NOT NULL,
commonName character varying(200),
scientificName character varying(200),
parent integer, --REFERENCES taxonomy(taxonId),
parent integer,
rank character varying(100)
)
\ No newline at end of file
Taxon Id Common name Scientific name Parent Rank
48138 Psammomys 10045 genus
9765 Balaenopteridae 9761 family
27671 Pipistrellus 9431 genus
325165 Nomascus 9577 genus
68410 Moschus 30533 genus
9901 Bison bison 9900 species
3072906 Musteloidea 379584 superfamily
29077 Eptesicus 9431 genus
2715852 Cebus imitator 9513 species
13616 Monodelphis domestica 13615 species
39087 Arvicolinae 337677 subfamily
9263 Metatheria 32525 no rank
37180 Budorcas 9963 genus
29088 Neomonachus schauinslandi 1907176 species
9599 Pongo 607660 genus
9600 Pongo pygmaeus 9599 species
10128 Apodemus 39107 genus
9305 Sarcophilus harrisii 9304 species
121588 Grammomys 39107 genus
9895 Bovidae 35500 family
37031 Suricata 9697 genus
1437010 Boreoeutheria 9347 no rank
9822 Sus 9821 genus
9986 Oryctolagus cuniculus 9984 species
2653790 Ancodonta 2653789 infraorder
325166 Symphalangus 9577 genus
9592 Gorilla 207598 genus
43179 Ictidomys tridecemlineatus 1141640 species
9750 Physeteridae 9722 family
419612 Camelus ferus 9836 species
27621 Molossus 9436 genus
314295 Hominoidea 9526 superfamily
29073 Ursus maritimus 9639 species
9257 Ornithorhynchus 9256 genus
9469 Nycticebus 9461 genus
32535 Acinonyx 338151 genus
89399 Rhinolophus sinicus 49442 species
9513 Cebus 38070 genus
9646 Ailuropoda melanoleuca 9645 species
9818 Orycteropus afer 9817 species
9364 Erinaceus 30577 genus
34883 Callorhinus 9702 genus
9858 Capreolus capreolus 9857 species
9722 Odontoceti 9721 parvorder
27592 Bovinae 9895 subfamily
189058 Neophocaena asiaeorientalis 34891 species
9979 Leporidae 9975 family
46841 Leopardus 338152 genus
10049 Arvicola 39087 genus
58055 Rhinolophidae 30560 family
9705 Odobenidae 3072905 family
9277 Dasyuridae 38608 family
38663 Perognathinae 10015 subfamily
59463 Myotis lucifugus 9434 species
376915 Lemuriformes 376911 infraorder
192404 Sturnira hondurensis 27659 species
9606 Homo sapiens 9605 species
10181 Heterocephalus glaber 10180 species
9815 Tubulidentata 311790 order
169418 Mustelinae 9655 subfamily
186990 Hipposideros armiger 58068 species
35497 Suina 91561 suborder
9915 Bos indicus 9903 species
37181 Budorcas taxicolor 37180 species
9729 Globicephala 9726 genus
9771 Balaenoptera musculus 9766 species
30610 Otolemur 40297 genus
30577 Erinaceinae 9363 subfamily
37029 Prionailurus bengalensis 37028 species
9568 Mandrillus leucophaeus 9567 species
227508 Megalonychidae 948953 family
400053 Sylvaemus group 10128 species group
575201 Hippopotamus amphibius kiboko 9833 subspecies
9569 Colobinae 9527 subfamily
9749 Delphinapterus leucas 9748 species
9402 Pteropus alecto 9401 species
338152 Felinae 9681 subfamily
9784 Loxodonta 9780 genus
37032 Suricata suricatta 37031 species
9645 Ailuropoda 9632 genus
9447 Lemur catta 9446 species
10090 Mus musculus 862507 species
61622 Rhinopithecus roxellana 542827 species
34878 Cervinae 9850 subfamily
9255 Monotremata 9254 order
30640 Sciurus carolinensis 10001 species
81572 Hylobates moloch 9578 species
9702 Otariidae 3072905 family
9475 Tarsiidae 376912 family
9857 Capreolus 9881 genus
60711 Chlorocebus sabaeus 392815 species
9782 Elephas 9780 genus
126287 Didelphinae 9265 subfamily
29132 Castoridae 1963757 family
10066 Muridae 337687 family
9708 Odobenus rosmarus divergens 9707 subspecies
9625 Vulpes 9608 genus
1230840 Orycteropus afer afer 9818 subspecies
1907176 Neomonachus 9709 genus
89462 Bubalus bubalis 9918 species
9483 Callithrix jacchus 1965096 species
338153 Pantherinae 9681 subfamily
9676 Hyaenidae 379583 family
130825 Ochotona curzoniae 9977 species
9432 Miniopterus 981671 genus
9445 Lemuridae 376915 family
9593 Gorilla gorilla 9592 species
34839 Chinchilla lanigera 10151 species
9347 Eutheria 32525 no rank
9639 Ursus 9632 genus
9726 Delphinidae 9722 family
9430 Desmodus rotundus 9429 species
9480 Callitrichinae 9498 subfamily
36723 Mustela erminea 9665 species
591932 Piliocolobus 9569 genus
9971 Pholidota 314145 order
1141640 Ictidomys 337730 genus
48867 Jaculus 35737 genus
9935 Ovis 9963 genus
1026970 Nannospalax galili 30636 species
10029 Cricetulus griseus 10028 species
9918 Bubalus 27592 genus
379583 Feliformia 33554 suborder
10139 Caviidae 33550 family
109678 Phodopus roborovskii 10043 species
33553 Sciuromorpha 9989 suborder
1574408 Cervus canadensis 9859 species
9577 Hylobatidae 314295 family
89673 Phyllostomus discolor 9422 species
9260 Tachyglossus 9259 genus
9526 Catarrhini 314293 parvorder
1963757 Castorimorpha 9989 suborder
9974 Manis javanica 9973 species
948953 Folivora 948950 suborder
176113 Tenrecinae 9369 subfamily
9570 Colobus 9569 genus
30660 Microbiotheriidae 38607 family
186995 Rhinolophinae 58055 subfamily
38605 Didelphimorphia 9263 order
42520 Peromyscus californicus 10040 species
61384 Lynx rufus 13124 species
214514 Perognathus longimembris pacificus 38669 subspecies
10028 Cricetulus 10026 genus
9590 Symphalangus syndactylus 325166 species
9787 Perissodactyla 314145 order
9431 Vespertilionidae 30560 family
54180 Trachypithecus francoisi 54136 species
9655 Mustelidae 3072906 family
9461 Lorisidae 376917 family
9661 Meles 1008252 genus
9833 Hippopotamus amphibius 9832 species
9957 Oryx 9959 genus
9714 Mirounga 9709 genus
9788 Equidae 9787 family
9836 Camelus 9835 genus
30533 Moschidae 35500 family
9973 Manis 9972 genus
9578 Hylobates 9577 genus
38607 Microbiotheria 9263 order
38625 Phascolarctos 38624 genus
379532 Propithecus coquereli 30600 species
33550 Hystricomorpha 9989 suborder
1884717 Pteronotus parnellii mesoamericanus 59476 subspecies
27675 Choloepus didactylus 9357 species
9612 Canis lupus 9611 species
51298 Myotis myotis 9434 species
183663 Soricinae 9376 subfamily
9716 Mirounga angustirostris 9714 species
30560 Microchiroptera 9397 suborder
1338369 Dipnotetrapodomorpha 8287 no rank
9748 Delphinapterus 9747 genus
9831 Hippopotamidae 2653790 family
9416 Artibeus 40234 genus
9608 Canidae 379584 family
9564 Theropithecus 9528 genus
9604 Hominidae 314295 family
9504 Aotus 376918 genus
9992 Marmota 337730 genus
9717 Phoca 9709 genus
9925 Capra hircus 9922 species
379584 Caniformia 33554 suborder
143302 Condylura cristata 143301 species
9999 Urocitellus parryii 1141645 species
8287 Sarcopterygii 117571 superclass
37442 Perognathus 38663 genus
9796 Equus caballus 9789 species
337730 Marmotini 337726 tribe
9845 Ruminantia 91561 suborder
9335 Phalangeridae 38609 family
9747 Monodontidae 9722 family
38674 Onychomys torridus 38667 species
43346 Bison bison bison 9901 subspecies
30539 Vicugna 9835 genus
9978 Ochotona princeps 9977 species
54131 Colobus angolensis 9570 species
13615 Monodelphis 126287 genus
35500 Pecora 9845 infraorder
9479 Platyrrhini 314293 parvorder
61621 Rhinopithecus bieti 542827 species
30611 Otolemur garnettii 30610 species
376912 Tarsiiformes 376913 infraorder
10046 Meriones 10045 genus
30615 Cheirogaleidae 376915 family
10180 Heterocephalus 10167 genus
1963758 Myomorpha 9989 suborder
30559 Megachiroptera 9397 suborder
2902005 Neogale 169418 genus
10129 Apodemus sylvaticus 400053 species
9282 Antechinus 9277 genus
9527 Cercopithecidae 314294 family
9721 Cetacea 2653789 infraorder
49442 Rhinolophus 186995 genus
1965096 Callithrix 9481 subgenus
9595 Gorilla gorilla gorilla 9593 subspecies
9443 Primates 314146 order
30636 Nannospalax 10061 genus
9336 Trichosurus 9335 genus
9357 Choloepus 227508 genus
9643 Ursus americanus 9639 species
9644 Ursus arctos 9639 species
10026 Cricetinae 337677 subfamily
376918 Aotidae 9479 family
9767 Balaenoptera acutorostrata 9766 species
9256 Ornithorhynchidae 9255 family
482536 Galeopterus 30657 genus
9365 Erinaceus europaeus 9364 species
10141 Cavia porcellus 10140 species
32523 Tetrapoda 1338369 no rank
27609 Lagenorhynchus 9726 genus
37293 Aotus nancymaae 9504 species
9989 Rodentia 314147 order
10114 Rattus 39107 genus
51338 Castor canadensis 10184 species
9376 Soricidae 9362 family
9900 Bison 27592 genus
89593 Craniata 0 subphylum
41426 Phacochoerus africanus 41425 species
9738 Tursiops 9726 genus
54602 Macaca thibetana 9539 species
40674 Mammalia 32524 class
38667 Onychomys 337963 genus
9753 Physeter 9750 genus
337687 Muroidea 1963758 no rank
29078 Eptesicus fuscus 29077 species
607660 Ponginae 9604 subfamily
51337 Jaculus jaculus 48867 species
40150 Monodon 9747 genus
39432 Saimiri boliviensis boliviensis 27679 subspecies
9825 Sus scrofa domesticus 9823 subspecies
9963 Caprinae 9895 subfamily
9704 Zalophus californianus 9703 species
9832 Hippopotamus 9831 genus
109478 Myotis brandtii 9434 species
452646 Neovison vison 2902005 species
9740 Phocoenidae 9722 family
269649 Chionomys nivalis 269648 species
34880 Nyctereutes procyonoides 34879 species
9913 Bos taurus 9903 species
33561 Dromiciops 30660 genus
482537 Galeopterus variegatus 482536 species
33562 Dromiciops gliroides 33561 species
143301 Condylura 9373 genus
9697 Herpestidae 379583 family
35658 Mastomys coucha 30639 species
862507 Mus 10088 subgenus
95912 Hyaena hyaena 95911 species
9662 Meles meles 9661 species
91561 Artiodactyla 314145 order
376911 Strepsirrhini 9443 suborder
29064 Panthera uncia 9688 species
542827 Rhinopithecus 9569 genus
71112 Lontra 169417 genus
58068 Hipposideros 186994 genus
38070 Cebinae 9498 subfamily
30648 Dipodidae 1963761 family
76717 Lontra canadensis 71112 species
9254 Prototheria 40674 no rank
30608 Microcebus murinus 13149 species
9703 Zalophus 9702 genus
10043 Phodopus 10026 genus
3072905 Pinnipedia 379584 no rank
291302 Miniopterus natalensis 9432 species
9436 Molossidae 30560 family
59534 Oryx dammah 9957 species
35737 Dipodinae 30648 subfamily
9362 Eulipotyphla 314145 order
9373 Talpidae 9362 family
9520 Saimiri 378850 genus
337664 Spalacidae 337687 family
9991 Sciurinae 55153 subfamily
10040 Peromyscus 337963 genus
948950 Pilosa 9348 order
9258 Ornithorhynchus anatinus 9257 species
59475 Pteronotus 59445 genus
38662 Dipodomyinae 10015 subfamily
9565 Theropithecus gelada 9564 species
41425 Phacochoerus 9821 genus
346063 Bubalus carabanensis 9918 species
225400 Myotis davidii 9434 species
9567 Mandrillus 9528 genus
10116 Rattus norvegicus 10114 species
32524 Amniota 32523 no rank
9922 Capra 9963 genus
564181 Peromyscus californicus insignis 42520 subspecies
10159 Octodon 10158 genus
29139 Vombatus ursinus 29138 species
10036 Mesocricetus auratus 10035 species
9369 Tenrecidae 311790 family
2653789 Whippomorpha 91561 suborder
9434 Myotis 9431 genus
10053 Microtus 39087 genus
1532884 Sapajus 38070 genus
13124 Lynx 338152 genus
27622 Molossus molossus 27621 species
30600 Propithecus 30599 genus
32525 Theria 40674 no rank
9682 Felis 338152 genus
491861 Grammomys surdaster 121588 species
36803 Sorex cinereus 9379 species
30657 Cynocephalidae 30656 family
9371 Echinops telfairi 9370 species
34891 Neophocaena 9740 genus
33554 Carnivora 314145 order
9995 Marmota monax 9992 species
9668 Mustela putorius 9665 species
9377 Suncus 183662 genus
68408 Moschus berezovskii 68410 species
38608 Dasyuromorphia 9263 order
9681 Felidae 379583 family
376913 Haplorrhini 9443 suborder
9976 Ochotonidae 9975 family
10158 Octodontidae 33550 family
9798 Equus przewalskii 9789 species
30656 Dermoptera 314146 order
9823 Sus scrofa 9822 species
54136 Trachypithecus 9569 genus
9984 Oryctolagus 9979 genus
10167 Bathyergidae 33550 family
9993 Marmota marmota 9992 species
9975 Lagomorpha 314147 order
40151 Monodon monoceros 40150 species
9407 Rousettus aegyptiacus 9406 species
9338 Vombatidae 38609 family
981671 Miniopterinae 9431 subfamily
9417 Artibeus jamaicensis 9416 species
9398 Pteropodidae 30559 family
9720 Phoca vitulina 9717 species
37028 Prionailurus 338152 genus
1141645 Urocitellus 337730 genus
9817 Orycteropus 9816 genus
9528 Cercopithecinae 9527 subfamily
61153 Arvicanthis 39107 genus
9348 Xenarthra 9347 superorder
9555 Papio anubis 9554 species
1868481 Carlito 9475 genus
392815 Chlorocebus 9528 genus
9689 Panthera leo 9688 species
59476 Pteronotus parnellii 59475 species
7776 Gnathostomata 7742 no rank
311790 Afrotheria 9347 superorder
46844 Leopardus geoffroyi 46841 species
93162 Marmota flaviventris 9992 species
9656 Lutra 169417 genus
30639 Mastomys 39107 genus
9601 Pongo abelii 9599 species
9972 Manidae 9971 family
9541 Macaca fascicularis 9539 species
9598 Pan troglodytes 9596 species
10140 Cavia 10139 genus
95911 Hyaena 9676 genus
9903 Bos 27592 genus
9785 Loxodonta africana 9784 species
9515 Sapajus apella 1532884 species
9554 Papio 9528 genus
9838 Camelus dromedarius 9836 species
186994 Hipposideridae 30560 family
10015 Heteromyidae 1963757 family
10035 Mesocricetus 10026 genus
10150 Chinchillidae 33550 family
10047 Meriones unguiculatus 10046 species
9539 Macaca 9528 genus
40238 Phyllostominae 9415 subfamily
61383 Lynx canadensis 13124 species
9470 Nycticebus coucang 9469 species
9481 Callithrix 9480 genus
39107 Murinae 10066 subfamily
314147 Glires 314146 no rank
38626 Phascolarctos cinereus 38625 species
34885 Eumetopias 9702 genus
9498 Cebidae 9479 family
10016 Dipodomys 38662 genus
9429 Desmodus 40237 genus
9859 Cervus 34878 genus
10045 Gerbillinae 10066 subfamily
9834 Tylopoda 91561 suborder
337752 Sciurini 9991 tribe
9816 Orycteropodidae 9815 family
143291 Pteropus giganteus 9401 species
77225 Pteropodinae 9398 subfamily
9657 Lutra lutra 9656 species
314146 Euarchontoglires 1437010 superorder
9755 Physeter macrocephalus 9753 species
591936 Piliocolobus tephrosceles 591932 species
9731 Globicephala melas 9729 species
9531 Cercocebus atys 9529 species
447135 Myodes glareolus 447134 species
9529 Cercocebus 9528 genus
9959 Hippotraginae 9895 subfamily
169417 Lutrinae 9655 subfamily
9665 Mustela 169418 genus
42254 Sorex araneus 9379 species
423606 Fukomys 10167 genus
376917 Lorisiformes 376911 infraorder
9596 Pan 207598 genus
1706337 Neophocaena asiaeorientalis asiaeorientalis 189058 subspecies
269648 Chionomys 39087 genus
61388 Prionailurus viverrinus 37028 species
9415 Phyllostomidae 30560 family
10088 Mus 39107 genus
9363 Erinaceidae 9362 family
55153 Sciuridae 33553 family
378850 Saimiriinae 9498 subfamily
109475 Suncus etruscus 9377 species
9835 Camelidae 9834 family
1008252 Melinae 9655 subfamily
9397 Chiroptera 314145 order
337963 Neotominae 337677 subfamily
1047088 Arvicola amphibius 10049 species
10041 Peromyscus leucopus 10040 species
337677 Cricetidae 337687 family
9611 Canis 9608 genus
40237 Desmodontinae 9415 subfamily
9793 Equus asinus 9789 species
117570 Teleostomi 7776 no rank
9259 Tachyglossidae 9255 family
9370 Echinops 176113 genus
9446 Lemur 9445 genus
40297 Galagidae 376917 family
7742 Vertebrata 89593 no rank
1963761 Dipodoidea 1963758 superfamily
9685 Felis catus 9682 species
9337 Trichosurus vulpecula 9336 species
29138 Vombatus 9338 genus
9422 Phyllostomus 40238 genus
34886 Eumetopias jubatus 34885 species
246437 Tupaia chinensis 9394 species
9401 Pteropus 77225 genus
9994 Marmota marmota marmota 9993 subspecies
9393 Tupaiidae 9392 family
337726 Xerinae 55153 subfamily
9709 Phocidae 3072905 family
9707 Odobenus rosmarus 9706 species
314293 Simiiformes 376913 infraorder
9394 Tupaia 9393 genus
9761 Mysticeti 9721 parvorder
10020 Dipodomys ordii 10016 species
9379 Sorex 183663 genus
494514 Vulpes lagopus 9625 species
10001 Sciurus 337752 genus
34879 Nyctereutes 9608 genus
9694 Panthera tigris 9688 species
50954 Talpa occidentalis 9374 species
885580 Fukomys damarensis 423606 species
314294 Cercopithecoidea 9526 superfamily
34884 Callorhinus ursinus 34883 species
9789 Equus 9788 genus
72004 Bos mutus 9903 species
30538 Vicugna pacos 30539 species
9783 Elephas maximus 9782 species
9265 Didelphidae 38605 family
338151 Acinonychinae 9681 subfamily
9821 Suidae 35497 family
9860 Cervus elaphus 9859 species
9732 Orcinus 9726 genus
30599 Indriidae 376915 family
9850 Cervidae 35500 family
38609 Diprotodontia 9263 order
9544 Macaca mulatta 9539 species
61853 Nomascus leucogenys 325165 species
48139 Psammomys obesus 48138 species
286419 Canis lupus dingo 9612 subspecies
9688 Panthera 338153 genus
27659 Sturnira 40234 genus
38669 Perognathus longimembris 37442 species
230844 Peromyscus maniculatus bairdii 10042 subspecies
314145 Laurasiatheria 1437010 superorder
9741 Phocoena 9740 genus
10151 Chinchilla 10150 genus
99487 Elephas maximus indicus 9783 subspecies
9615 Canis lupus familiaris 9612 subspecies
9605 Homo 207598 genus
10042 Peromyscus maniculatus 10040 species
9977 Ochotona 9976 genus
9304 Sarcophilus 9277 genus
10184 Castor 29132 genus
27679 Saimiri boliviensis 9520 species
42100 Phocoena sinus 9741 species
143292 Manis pentadactyla 9973 species
10117 Rattus rattus 10114 species
9392 Scandentia 314146 order
9406 Rousettus 77225 genus
38775 Antechinus flavipes 9282 species
9669 Mustela putorius furo 9668 subspecies
105255 Dipodomys spectabilis 10016 species
59445 Mormoopidae 30560 family
40234 Stenodermatinae 9415 subfamily
10160 Octodon degus 10159 species
117571 Euteleostomi 117570 no rank
61156 Arvicanthis niloticus 61153 species
32536 Acinonyx jubatus 32535 species
79684 Microtus ochrogaster 10053 species
257877 Macaca thibetana thibetana 54602 subspecies
10089 Mus caroli 862507 species
9733 Orcinus orca 9732 species
9739 Tursiops truncatus 9738 species
183662 Crocidurinae 9376 subfamily
9779 Proboscidea 311790 order
9374 Talpa 9373 genus
10061 Spalacinae 337664 subfamily
9632 Ursidae 379584 family
207598 Homininae 9604 subfamily
336983 Colobus angolensis palliatus 54131 subspecies
13149 Microcebus 30615 genus
447134 Myodes 39087 genus
9261 Tachyglossus aculeatus 9260 species
9715 Mirounga leonina 9714 species
111838 Microtus oregoni 10053 species
9766 Balaenoptera 9765 genus
38624 Phascolarctidae 38609 family
9545 Macaca nemestrina 9539 species
9940 Ovis aries 9935 species
59472 Pipistrellus kuhlii 27671 species
9881 Odocoileinae 9850 subfamily
89248 Equus quagga 9789 species
100897 Microtus fortis 10053 species
1868482 Carlito syrichta 1868481 species
9706 Odobenus 9705 genus
9627 Vulpes vulpes 9625 species
90247 Lagenorhynchus obliquidens 27609 species
9780 Elephantidae 9779 family
#!/usr/bin/env python3
import argparse
import sys
import os
# local import
from src import peptide_table as pt
from src import sequences as seq
from src import taxonomy as ta
from src import markers
from src import limit
from src import fasta_parsing as fa
#from src import message
def main():
parser = argparse.ArgumentParser()
parser.add_argument("-p", dest="peptide_table",nargs='+', help="Peptide table (TSV file).", type=str)
parser.add_argument("-o", dest="output", help="Output path (should include the output file name)", type=str)
parser.add_argument("-f", dest="fasta", help="FASTA file that contains new sequences.", type=str)
parser.add_argument("-d", dest="directory", help="Directory that contains FASTA files.", type=str)
parser.add_argument("-l", dest="limit", help="Limit file that contains a list of clades (OX=)", type=str)
parser.add_argument("-t", dest="taxonomy", help="Taxonomy (TSV file)", type=str, required=True)
args = parser.parse_args()
# parsing taxonomy
primary_taxonomy=ta.parse_taxonomy_simple_file(args.taxonomy)
if not (args.peptide_table or args.fasta or args.directory or args.limit):
print("Missing input")
sys.exit()
# parsing models for peptide tables, sequences or limits
set_of_taxid=set()
if args.peptide_table :
set_of_markers = pt.parse_peptide_tables(args.peptide_table,None, primary_taxonomy)
set_of_taxid.update({m.taxid for m in set_of_markers})
if args.fasta or args.directory:
set_of_sequences = fa.build_set_of_sequences(args.fasta, args.directory, None, primary_taxonomy)
set_of_taxid.update({s.taxid for s in set_of_sequences})
if args.limit:
list_of_constraints=limit.parse_limits(args.limit)
set_of_taxid.update({t for dict in list_of_constraints for t in dict["OX"]})
secondary_taxonomy, lost_taxid=primary_taxonomy.intersection(set_of_taxid)
if len(lost_taxid)>0:
print("The following taxids were not found in "+args.taxonomy)
print(lost_taxid)
ta.table_print(secondary_taxonomy)
ta.create_taxonomy_file(secondary_taxonomy, args.output)
if __name__ == "__main__":
main()
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Modules import
import os
import common
# Functions definition
def command_builder(script, taxonomy, output="taxonomy_reduced.tsv", python="python3", peptides_tables=None, sequence_dir=None):
if os.path.splitext(output) == "":
output += ".tsv"
command = f"{python} {script} -t {taxonomy} -o {output}"
if peptides_tables and len(peptides_tables) > 0:
command += f" -p {' '.join(peptides_tables)}"
if sequence_dir:
command += f" -d {sequence_dir}"
return command
# main
if __name__ == "__main__":
script = common.CGI_PATH+"/reduce_taxo/main_taxonomy_filtering.py"
taxonomy = common.TAXONOMY_ALL_FILE
peptides_tables = [common.PEPTIDES_MAMMALS_FILE, common.PEPTIDES_BIRDS_FILE, common.PEPTIDES_FISHES_FILE, common.PEPTIDES_REPTILES_FILE]
command = command_builder(script, taxonomy, output=common.TAXONOMY_REDUCED_FILE, peptides_tables=peptides_tables, python="python")
os.system(command)
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment