Skip to content
Snippets Groups Projects
Commit 6fa40ed0 authored by Rohmer Coralie's avatar Rohmer Coralie
Browse files

python graph

parent 0d92cb04
Branches
No related tags found
No related merge requests found
......@@ -111,9 +111,9 @@ onerror:
#-------------------------------------------------------------------------------
rule all :
input :
expand(EXP + "/" + EXP_NAME + "/results/" + EXP_NAME + "_data_align_t{threshold}.csv", data_set=DATA_SETS, threshold=THRESHOLDS)
#expand('{data_set}/results/graph_{attribute}.pdf', data_set=DATA_SETS, attribute=ATTRIBUTES_DATA),
#expand(EXP + '/'+EXP_NAME + '/results/graph_{attribute}.pdf', data_set=DATA_SETS, attribute=ATTRIBUTES_DATA)
expand(EXP + "/" + EXP_NAME + "/results/" + EXP_NAME + "_data_align_t{threshold}.csv", data_set=DATA_SETS, threshold=THRESHOLDS),
expand('{data_set}/results/' + EXP_NAME + '_graph_{attribute}.pdf', data_set=DATA_SETS, attribute=ATTRIBUTES_DATA),
expand(EXP + '/'+EXP_NAME + '/results/' + EXP_NAME + '_graph_{attribute}.pdf', data_set=DATA_SETS, attribute=ATTRIBUTES_DATA)
#-------------------------------------------------------------------------------
# Data set preparation
......@@ -463,15 +463,15 @@ rule data_display :
input :
expand("{{data_set}}/results/"+EXP_NAME+"_data_align_t{threshold}.csv", threshold=THRESHOLDS)
output :
"{data_set}/results/graph_{attribute}.pdf",
"{data_set}/results/"+EXP_NAME+"_graph_{attribute}.pdf",
message:
"Data display for {wildcards.data_set} (Attribute={wildcards.attribute})"
log:
"{data_set}/logs/10_data_display_{attribute}.log"
conda:
"env_conda/r.yaml"
"env_conda/python3.yaml"
shell :
'ORDER="./src/graphical_seq_consensus_analysis.r {wildcards.attribute} {wildcards.data_set}/results {input}";'
'ORDER="./src/graphical_seq_consensus_analysis.py {wildcards.attribute} {output} {input}";'
'echo "ORDER: $ORDER" >{log};'
'$ORDER >{output} 2>>{log}'
......
name: python3
channels:
- conda-forge
- anaconda
- bioconda
- defaults
......@@ -8,29 +9,65 @@ dependencies:
- _openmp_mutex=4.5=1_gnu
- biopython=1.78=py37h7b6447c_0
- blas=1.0=mkl
- ca-certificates=2020.10.14=0
- certifi=2020.6.20=py37_0
- brotli=1.0.9=h9c3ff4c_4
- ca-certificates=2021.10.8=ha878542_0
- certifi=2021.10.8=py37h89c1867_1
- cycler=0.11.0=pyhd8ed1ab_0
- dbus=1.13.6=he372182_0
- expat=2.4.1=h9c3ff4c_0
- fontconfig=2.13.1=hba837de_1005
- fonttools=4.25.0=pyhd3eb1b0_0
- freetype=2.10.4=h0708190_1
- glib=2.69.1=h5202010_0
- gst-plugins-base=1.14.0=hbbd80ab_1
- gstreamer=1.14.0=h28cd5cc_2
- icu=58.2=hf484d3e_1000
- intel-openmp=2021.4.0=h06a4308_3561
- jpeg=9d=h36c2ea0_0
- kiwisolver=1.3.1=py37h2531618_0
- ld_impl_linux-64=2.35.1=h7274673_9
- libffi=3.3=he6710b0_2
- libgcc-ng=9.3.0=h5101ec6_17
- libgomp=9.3.0=h5101ec6_17
- libpng=1.6.37=h21135ba_2
- libstdcxx-ng=9.3.0=hd4cf53a_17
- libtiff=4.0.10=hc3755c2_1005
- libuuid=2.32.1=h7f98852_1000
- libxcb=1.13=h7f98852_1003
- libxml2=2.9.12=h03d6c58_0
- lz4-c=1.9.3=h9c3ff4c_1
- matplotlib=3.4.3=py37h89c1867_2
- matplotlib-base=3.4.3=py37hbbc1b5f_0
- mkl=2021.4.0=h06a4308_640
- mkl-service=2.4.0=py37h7f8727e_0
- mkl_fft=1.3.1=py37hd3c417c_0
- mkl_random=1.2.2=py37h51133e4_0
- munkres=1.1.4=pyh9f0ad1d_0
- ncurses=6.3=h7f8727e_0
- numpy=1.21.2=py37h20f2e39_0
- numpy-base=1.21.2=py37h79a1101_0
- olefile=0.46=pyh9f0ad1d_1
- openssl=1.1.1l=h7f8727e_0
- pcre=8.45=h9c3ff4c_0
- pillow=6.2.1=py37h6b7be26_0
- pip=21.0.1=py37h06a4308_0
- pthread-stubs=0.4=h36c2ea0_1001
- pyparsing=3.0.6=pyhd8ed1ab_0
- pyqt=5.9.2=py37hcca6a23_4
- python=3.7.11=h12debd9_0
- python-dateutil=2.8.2=pyhd8ed1ab_0
- python_abi=3.7=2_cp37m
- qt=5.9.7=h5867ecd_1
- readline=8.1=h27cfd23_0
- setuptools=58.0.4=py37h06a4308_0
- sip=4.19.8=py37hf484d3e_0
- six=1.15.0=py_0
- sqlite=3.36.0=hc218d9a_0
- tk=8.6.11=h1ccaba5_0
- tornado=6.1=py37h5e8e339_1
- wheel=0.37.0=pyhd3eb1b0_1
- xorg-libxau=1.0.9=h7f98852_0
- xorg-libxdmcp=1.1.3=h7f98852_0
- xz=5.2.5=h7b6447c_0
- zlib=1.2.11=h7b6447c_3
- zstd=1.4.9=ha95c52a_0
name: r
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- _libgcc_mutex=0.1=conda_forge
- _openmp_mutex=4.5=1_gnu
- _r-mutex=1.0.1=anacondar_1
- binutils_impl_linux-64=2.35.1=h27ae35d_9
- binutils_linux-64=2.35.1=h454624a_30
- bwidget=1.9.14=ha770c72_1
- bzip2=1.0.8=h7f98852_4
- c-ares=1.18.1=h7f98852_0
- ca-certificates=2021.10.8=ha878542_0
- cairo=1.16.0=h6cf1ce9_1008
- curl=7.79.1=h494985f_1
- font-ttf-dejavu-sans-mono=2.37=hab24e00_0
- font-ttf-inconsolata=3.000=h77eed37_0
- font-ttf-source-code-pro=2.038=h77eed37_0
- font-ttf-ubuntu=0.83=hab24e00_0
- fontconfig=2.13.1=hba837de_1005
- fonts-conda-ecosystem=1=0
- fonts-conda-forge=1=0
- freetype=2.10.4=h0708190_1
- fribidi=1.0.10=h36c2ea0_0
- gcc_impl_linux-64=9.3.0=h6df7d76_17
- gcc_linux-64=9.3.0=h1ee779e_30
- gettext=0.19.8.1=h73d1719_1008
- gfortran_impl_linux-64=9.3.0=h5abd6ed_17
- gfortran_linux-64=9.3.0=hf47db2c_30
- graphite2=1.3.13=h58526e2_1001
- gsl=2.6=he838d99_2
- gxx_impl_linux-64=9.3.0=hbdd7822_17
- gxx_linux-64=9.3.0=h7e70986_30
- harfbuzz=3.0.0=h83ec7ef_1
- icu=68.2=h9c3ff4c_0
- jbig=2.1=h7f98852_2003
- jpeg=9d=h36c2ea0_0
- kernel-headers_linux-64=2.6.32=he073ed8_15
- krb5=1.19.2=h48eae69_3
- ld_impl_linux-64=2.35.1=h7274673_9
- lerc=3.0=h9c3ff4c_0
- libblas=3.9.0=11_linux64_openblas
- libcblas=3.9.0=11_linux64_openblas
- libcurl=7.79.1=h494985f_1
- libdeflate=1.8=h7f98852_0
- libedit=3.1.20191231=he28a2e2_2
- libev=4.33=h516909a_1
- libffi=3.4.2=h9c3ff4c_4
- libgcc-devel_linux-64=9.3.0=hb95220a_17
- libgcc-ng=11.2.0=h1d223b6_11
- libgfortran-ng=9.3.0=ha5ec8a7_17
- libgfortran5=9.3.0=ha5ec8a7_17
- libglib=2.70.0=h174f98d_1
- libgomp=11.2.0=h1d223b6_11
- libiconv=1.16=h516909a_0
- liblapack=3.9.0=11_linux64_openblas
- libnghttp2=1.43.0=ha19adfc_1
- libopenblas=0.3.17=pthreads_h8fe5266_1
- libpng=1.6.37=h21135ba_2
- libssh2=1.10.0=ha35d2d1_2
- libstdcxx-devel_linux-64=9.3.0=hf0c5c8d_17
- libstdcxx-ng=11.2.0=he4da1e4_11
- libtiff=4.3.0=h6f004c6_2
- libuuid=2.32.1=h7f98852_1000
- libwebp-base=1.2.1=h7f98852_0
- libxcb=1.13=h7f98852_1003
- libxml2=2.9.12=h72842e0_0
- libzlib=1.2.11=h36c2ea0_1013
- lz4-c=1.9.3=h9c3ff4c_1
- make=4.3=hd18ef5c_1
- ncurses=6.2=h58526e2_4
- openssl=3.0.0=h7f98852_2
- pango=1.48.10=h54213e6_2
- pcre=8.45=h9c3ff4c_0
- pcre2=10.37=h032f7d1_0
- pixman=0.40.0=h36c2ea0_0
- pthread-stubs=0.4=h36c2ea0_1001
- r-base=4.0.5=hb67fd72_2
- readline=8.1=h46c0cb4_0
- sed=4.8=he412f7d_0
- sysroot_linux-64=2.12=he073ed8_15
- tk=8.6.11=h27826a3_1
- tktable=2.10=hb7b940f_3
- xorg-kbproto=1.0.7=h7f98852_1002
- xorg-libice=1.0.10=h7f98852_0
- xorg-libsm=1.2.3=hd9c2040_1000
- xorg-libx11=1.7.2=h7f98852_0
- xorg-libxau=1.0.9=h7f98852_0
- xorg-libxdmcp=1.1.3=h7f98852_0
- xorg-libxext=1.3.4=h7f98852_1
- xorg-libxrender=0.9.10=h7f98852_1003
- xorg-libxt=1.2.1=h7f98852_2
- xorg-renderproto=0.11.1=h7f98852_1002
- xorg-xextproto=7.3.0=h7f98852_1002
- xorg-xproto=7.0.31=h7f98852_1007
- xz=5.2.5=h516909a_1
- zlib=1.2.11=h36c2ea0_1013
- zstd=1.5.0=ha95c52a_0
prefix: /home/coralie/LogicielLocal/anaconda3/envs/r
#!/usr/bin/Rscript
library(ggplot2)
library(stringr)
library(tidyr)
args = commandArgs(trailingOnly=TRUE)
# test if there is at least one argument: if not, return an error
if (length(args)<3) {
stop("LACK OF ARGUMENTS: ./graphical_data_time.r attribute output_directory file ...", call.=FALSE)
}
attribute=args[1]
output_dir=args[2]
file=args[3]
Database <- read.table(file,header = TRUE, sep = ",")
group_MSA=unique(Database$MSA)
nb_base_pairs=unique(Database$base)
nb_reads=unique(Database$read)
nb_read_min=min(as.numeric(nb_reads))
nb_read_max=max(as.numeric(nb_reads))
attribute_max=max(Database[attribute])
selected_column = c('MSA',"base","read",attribute)
data_attribute=Database[selected_column]
pdf(paste(output_dir,"/graph_",attribute,".pdf",sep=""))
for (MSA in group_MSA){
id_MSA=grep(MSA, data_attribute$MSA, perl = TRUE,ignore.case = TRUE)
data_MSA=data_attribute[id_MSA,]
if(nrow(data_MSA)!=0){
data_MSA=subset(data_MSA, select=-c(MSA))
data_MSA_organize=structure(list(read = nb_reads), class="data.frame", row.names=1:length(nb_reads))
for (base in nb_base_pairs) {
data_MSA_organize[as.character(base)] <- NA
data_base=data_MSA[data_MSA$base==base,]
if(nrow(data_base)!=0){
for (read in nb_reads) {
data_read=data_base[data_base$read==read,]
if(nrow(data_read)!=0){
data_MSA_organize[data_MSA_organize$read==read,as.character(base)]=data_read[attribute]
}
}
}
}
data <- gather(
data = data_MSA_organize,
key = region_size,
value = VAL,
as.character(nb_base_pairs)
)
A=ggplot(
data,
aes(x = read, y = VAL, color = region_size)
) + geom_line() + ggtitle(paste(attribute,"pour",MSA)) + xlab("Read number") + ylab(attribute) + ylim(0,attribute_max)
print(A)
}
}
dev.off()
#!/usr/bin/env python3
import sys,csv
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
from random import random, choice
from pylab import *
plt.style.use('seaborn-whitegrid')
import numpy as np
......@@ -9,11 +11,17 @@ def recovered_threshold(file):
return path
attribute=sys.argv[1]
dir_out=sys.argv[2]
files=[sys.argv[3]]#,sys.argv[4]]
files=sys.argv[3:]
out=sys.argv[2]
files=sys.argv[3:]
threshold_independent=1
if (attribute =="time" or attribute =="elapsed" or attribute =="memory"):
files=[files[0]]
threshold_independent=0
data={}
attribute_max=""
attribute_max="a"
depths=[]
region_sizes=[]
for file in files:
threshold=recovered_threshold(file)
with open(file, newline='') as csvfile:
......@@ -27,43 +35,57 @@ for file in files:
else:
msa=row[titles["MSA"]]
region_size=row[titles["region_size"]]
depth=row[titles["depth"]]
depth=int(row[titles["depth"]])
if msa not in data[threshold]:
data[threshold][msa]={}
if region_size not in data[threshold][msa]:
data[threshold][msa][region_size]={}
data[threshold][msa][region_size]["depth"]=[]
data[threshold][msa][region_size]["attribute"]=[]
if region_size not in region_sizes:
region_sizes.append(region_size)
if depth not in data[threshold][msa][region_size]:
data[threshold][msa][region_size]["depth"].append(int(depth))
val=row[titles[attribute]]
if val > attribute_max:
data[threshold][msa][region_size]["depth"].append(depth)
if depth not in depths:
depths.append(depth)
val=float(row[titles[attribute]])
if attribute_max=="a" or val > attribute_max:
attribute_max=val
data[threshold][msa][region_size]["attribute"].append(float(val))
data[threshold][msa][region_size]["attribute"].append(val)
depths=sort(depths)
region_sizes=sort(region_sizes)
min_depth=depths[0]
max_depth=depths[-1]
colors=['b','g','r','c','m','y']
len_regions=len(region_sizes)
if len_regions>len(colors):
all_colors=list(matplotlib.colors.cnames.keys())
random()
while len_regions>len(colors):
colors.append(choice(all_colors))
print(data)
print(attribute_max)
color=["b","r","g","p","y"]
pp = PdfPages(out)
for threshold in data:
for msa in data[threshold]:
fig = plt.figure()
ax = plt.axes()
ax = ax.set(xlabel='Depth', ylabel=attribute)
plt.title(attribute + " for " + msa)
if (threshold_independent==0):
threshold_display=""
else:
threshold_display=" (Threshold: " + threshold+")"
plt.title(attribute + " for " + msa + threshold_display)
i=0
plt.axis([0, 50,0, float(attribute_max)+10]);
plt.axis([min_depth, max_depth,0, float(attribute_max)+5]);
for region_size in data[threshold][msa]:
x = np.array(data[threshold][msa][region_size]["depth"])
y = np.array(data[threshold][msa][region_size]["attribute"])
plt.plot(x, y, color=color[i], linestyle='solid', label=" Region size: " + region_size)
plt.scatter(x,y,color=colors[i])
plt.plot(x, y, color=colors[i], linestyle='solid', label=" Region size: " + region_size)
i+=1
plt.legend(loc='lower left');
fig = plt.figure()
ax = plt.axes()
x = np.array([10,20,50])
y = np.array([10,20,50])
plt.plot(x, y, color='blue', linestyle='solid', label='bleu')
show()
plt.legend();
pp.savefig(fig)
plt.close()
pp.close()
#!/usr/bin/Rscript
library(ggplot2)
library(stringr)
library(tidyr)
recovered_threshold <- function(file){
path=str_split(file, "/",simplify=TRUE)
threshold=c(str_split(path[length(path)], "_t",simplify=TRUE)[2])
threshold=as.numeric(str_sub(threshold,1,length(threshold)-6))/100
return(threshold)
}
args = commandArgs(trailingOnly=TRUE)
# test if there is at least one argument: if not, return an error
if (length(args)<3) {
stop("LACK OF ARGUMENTS: ./graphical_seq_consensus_analysis.r attribute output_directory file1 file2 ...", call.=FALSE)
}
id_args=1
id_attribute=id_args
id_args=id_args + 1
id_output=id_args
id_args=id_args + 1
id_file=id_args
attribute=args[id_attribute]
output_dir=args[id_output]
files=c(args[id_file])
thresholds=c(recovered_threshold(files[1]))
if (length(args)!=id_file){
for (arg in args[(id_file+1):length(args)]){
files=c(files,arg)
thresholds=c(thresholds,recovered_threshold(arg))
}
}
creation_database <- function(files,thresholds){
Database_full=structure(list(), class = "data.frame")
for ( i in seq(1,length(files)) ) {
Database <- read.table(files[i],header = TRUE, sep = ",")
Database$threshold <- thresholds[i]
Database_full=rbind(Database_full,Database)
}
return(Database_full)
}
Database=creation_database(files,thresholds)
group_MSA=unique(Database$MSA)
nb_region_size=unique(Database$region_size)
nb_depths=unique(Database$depth)
nb_depth_min=min(as.numeric(nb_depths))
nb_depth_max=max(as.numeric(nb_depths))
threshold_independent=1
if (attribute =="user" || attribute =="system" || attribute =="time" || attribute =="elapsed" || attribute =="cpu" || attribute =="memory") {
thresholds=thresholds[1]
threshold_independent=0
}
selected_column = c("threshold",'MSA',"region_size","depth",attribute)
data_attribute=Database[selected_column]
attribute_max=max(unique(Database[attribute]))
pdf(paste(output_dir,"/graph_",attribute,".pdf",sep=""))
for (threshold in thresholds) {
data_threshold=data_attribute[data_attribute$threshold==threshold,]
if(nrow(data_threshold)!=0){
data_threshold=subset(data_threshold, select=-c(threshold))
for (MSA in group_MSA){
id_MSA=grep(MSA, data_threshold$MSA, perl = TRUE,ignore.case = TRUE)
data_MSA=data_threshold[id_MSA,]
data_MSA=subset(data_MSA, select=-c(MSA))
if(nrow(data_MSA)!=0){
data_MSA_organize=structure(list(depth = nb_depths), class="data.frame", row.names=1:length(nb_depths))
for (region_size in nb_region_size) {
data_MSA_organize[as.character(region_size)] <- NA
print(data_MSA_organize)
data_region_size=data_MSA[data_MSA$region_size==region_size,]
if(nrow(data_region_size)!=0){
for (depth in nb_depths) {
data_depth=data_region_size[data_region_size$depth==depth,]
if(nrow(data_depth)!=0){
data_MSA_organize[data_MSA_organize$depth==depth,as.character(region_size)]=data_depth[attribute]
print(data_MSA_organize)
}
}
}
}
print(data_MSA_organize)
data <- gather(
data = data_MSA_organize,
key = Region_size,
value = VAL,
as.character(nb_region_size)
)
A=ggplot(
data,
aes(x = depth, y = VAL, color = Region_size)
) + geom_line() + ggtitle(paste(attribute,"for",MSA,"(threshold =",threshold,")")) + xlab("Depth") + ylab(attribute) + ylim(0,attribute_max)
print(A)
}
}
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment