Skip to content
Snippets Groups Projects
Commit 81bfb8c1 authored by Salson Mikael's avatar Salson Mikael
Browse files

coverind.cpp: CLI to build and query index

parent 366e073d
Branches
No related tags found
No related merge requests found
#include "multicover.hpp"
#include "lib/CLI11.hpp"
#include <sdsl/sd_vector.hpp>
#include <iostream>
#include <string>
#define verbose(msg) if(is_verbose){ std::cerr << msg << std::endl; }
#define DEFAULT_WINDOW_SIZE 1000
bool explodeRange(std::string param, std::string &chromosome, size_t &start, size_t &end) {
size_t pos = param.find(":");
if (pos == std::string::npos)
return false;
chromosome = param.substr(0, pos);
size_t pos_comma = param.find("-", pos+1);
try {
if (pos_comma == std::string::npos)
// we just have a start pos
end = std::string::npos;
else {
end = std::stoi(param.substr(pos_comma+1));
}
start = std::stoi(param.substr(pos+1, pos_comma - pos));
} catch (std::invalid_argument& e) {
return false;
}
return true;
}
int main(int argc, char **argv) {
CLI::App app("Coverind: indexing read coverage information");
CLI::App *index = app.add_subcommand("index", "index an indexed BAM file");
CLI::App *query = app.add_subcommand("query", "query coverage information from an already computed Coverind index");
app.require_subcommand();
std::string basename;
std::string bam_file;
std::string index_file;
std::string bed_file;
std::string chr;
std::string range;
bool do_list = false;
bool is_verbose = false;
size_t start_pos, end_pos;
size_t window_size = DEFAULT_WINDOW_SIZE;
// Index options
index->add_option("-b,--basename", basename, "Basename of the index to be produced")
->required();
index->add_option("bam", bam_file, "Path to the BAM file (a .bai file must also exist with the same path")
->check(CLI::ExistingFile);
index->add_flag("-v,--verbose", is_verbose, "Verbose mode");
// Query options
query->add_option("--index,-i", index_file, "Basename of the index file (same value as the -b passed to the `index' command")->required();
CLI::Option *list = query->add_flag("--list,-l", do_list, "List the chromosomes stored in the index with their length");
CLI::Option *bed = query->add_option("--bed,-b", bed_file, "Create a bed file with the number of reads computed on the whole genome on "+std::to_string(window_size)+"-bp windows (see --range --bed-window for more customisation)");
query->add_option("--bed-window,-w", window_size, "Window size on which the number of reads is computed for the BED output")
->needs(bed);
CLI::Option *range_opt = query->add_option("--range,-r",
[&chr, &start_pos, &end_pos](CLI::results_t res) {
return explodeRange(res[0], chr, start_pos, end_pos);
}, "Gives the number of reads overlapping the specified range (with the format: chr:start-pos, start and pos are included). When used with --bed computes the number of reads in each window in the specified range.");
list->excludes(bed);
list->excludes(range_opt);
CLI11_PARSE(app, argc, argv);
if (*index) {
verbose("Indexing " << bam_file);
MultiCover<sd_vector<> > cover(bam_file);
cover.save(basename);
}
if (*query) {
verbose("Querying with index " << index_file)
MultiCover<sd_vector<> > cover(index_file, true);
verbose("Index loaded");
if (do_list) {
size_t nb_chr = cover.getNbChromosomes();
for (size_t i = 0; i < nb_chr; i++) {
std::cout << cover.getChrName(i) << "\t";
std::cout << cover.getChrCoverage(i).nb_genome_positions() << std::endl;
}
} else if (chr.size() > 0 && bed_file.size() == 0) {
Coverind<sd_vector<> > &cover_chr = cover.getChrCoverage(chr);
size_t max = cover_chr.nb_genome_positions();
if (end_pos > max)
end_pos = max;
std::cout << chr << "\t" << start_pos << "\t" << end_pos << "\t"
<< cover_chr.count_reads_between(start_pos, end_pos)
<< std::endl;
} else if (bed_file.size() > 0) {
if (chr.size() > 0) {
cover.generateBED(window_size, bed_file, std::make_tuple(chr, start_pos, end_pos));
} else {
cover.generateBED(window_size, bed_file);
}
}
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment