Skip to content
Snippets Groups Projects
Commit 2863ae6f authored by Salson Mikael's avatar Salson Mikael
Browse files

Use MultiCover in benchmark.cpp instead of Coverind

parent 5da2c5e9
No related branches found
No related tags found
No related merge requests found
#include "coverind.hpp" #include "multicover.hpp"
#include <iostream> #include <iostream>
#include <fstream> #include <fstream>
#include <chrono> #include <chrono>
#include <sdsl/sd_vector.hpp> #include <sdsl/sd_vector.hpp>
#include <cstdlib> #include <cstdlib>
#include <ctime> #include <ctime>
#include <algorithm>
#include <sdsl/rrr_vector.hpp> #include <sdsl/rrr_vector.hpp>
#include "lib/gzstream.h" #include "lib/gzstream.h"
using namespace sdsl; using namespace sdsl;
...@@ -13,30 +14,38 @@ const int NB_QUERIES = 100000; ...@@ -13,30 +14,38 @@ const int NB_QUERIES = 100000;
const int RANGE_LENGTH = 100000; const int RANGE_LENGTH = 100000;
const int WINDOW_LENGTH = 1000; const int WINDOW_LENGTH = 1000;
template <class bv> // template <class bv>
float launch_random_queries(Coverind<bv> &cover) { // float launch_random_queries(MultiCover<bv> &cover) {
size_t queries[NB_QUERIES]; // size_t queries[NB_QUERIES];
size_t nb_pos = cover.nb_genome_positions(); // size_t lengths[NB_QUERIES];
for (size_t i = 0; i < NB_QUERIES; i++) { // for (size_t i = 0; i < NB_QUERIES; i++) {
queries[i] = rand() % (nb_pos - RANGE_LENGTH); // size_t chr = rand() % cover.getNbChromosomes();
} // size_t nb_pos = cover.getChrCoverage(chr).nb_genome_positions();
// size_t range_length = RANGE_LENGTH;
std::cout << "Running queries..." << std::endl; // if (RANGE_LENGTH > nb_pos) {
auto query_start = std::chrono::system_clock::now(); // range_length = nb_pos / 10;
size_t total = 0; // }
for (size_t i = 0; i < NB_QUERIES; i++) { // queries[i] = rand() % (nb_pos - range_length);
size_t count = cover.count_reads_between(queries[i], queries[i] + RANGE_LENGTH); // lengths[i] = min(range_length, nb_pos - queries[i]);
total += count; // }
// std::cerr << queries[i] << "\t" << queries[i] + RANGE_LENGTH << "\t" << count << std::endl;
} // std::cout << "Running queries..." << std::endl;
std::cout << total << std::endl; // auto query_start = std::chrono::system_clock::now();
auto query_end = std::chrono::system_clock::now(); // size_t total = 0;
// for (size_t i = 0; i < NB_QUERIES; i++) {
return std::chrono::duration_cast<std::chrono::microseconds>(query_end - query_start).count()*1./NB_QUERIES; // size_t count = cover.count_reads_between(queries[i], queries[i] + lengths[i] - 1);
} // total += count;
// // std::cerr << queries[i] << "\t" << queries[i] + RANGE_LENGTH << "\t" << count << std::endl;
// }
// std::cout << total << std::endl;
// auto query_end = std::chrono::system_clock::now();
// return std::chrono::duration_cast<std::chrono::microseconds>(query_end - query_start).count()*1./NB_QUERIES;
// }
template <class bv> template <class bv>
float launch_real_queries(Coverind<bv> cover, const std::string query_filename) { float launch_real_queries(MultiCover<bv> &cover,
const std::string query_filename) {
std::ifstream query_file(query_filename); std::ifstream query_file(query_filename);
size_t nb_lines = 0; size_t nb_lines = 0;
if (! query_file.is_open()) { if (! query_file.is_open()) {
...@@ -50,7 +59,7 @@ float launch_real_queries(Coverind<bv> cover, const std::string query_filename) ...@@ -50,7 +59,7 @@ float launch_real_queries(Coverind<bv> cover, const std::string query_filename)
query_file = std::ifstream(query_filename); query_file = std::ifstream(query_filename);
std::pair<size_t, size_t> queries[nb_lines]; std::tuple<size_t, size_t, size_t> queries[nb_lines];
size_t expected_counts[nb_lines]; size_t expected_counts[nb_lines];
size_t i = 0; size_t i = 0;
std::string chr, start_str, end_str, count_str; std::string chr, start_str, end_str, count_str;
...@@ -66,7 +75,7 @@ float launch_real_queries(Coverind<bv> cover, const std::string query_filename) ...@@ -66,7 +75,7 @@ float launch_real_queries(Coverind<bv> cover, const std::string query_filename)
start = std::stoi(start_str); start = std::stoi(start_str);
end = std::stoi(end_str); end = std::stoi(end_str);
count = std::stoi(count_str); count = std::stoi(count_str);
queries[i] = std::make_pair(start, end); queries[i] = std::make_tuple(cover.getChrIndex(chr), start, end);
expected_counts[i] = count; expected_counts[i] = count;
i++; i++;
} }
...@@ -74,48 +83,44 @@ float launch_real_queries(Coverind<bv> cover, const std::string query_filename) ...@@ -74,48 +83,44 @@ float launch_real_queries(Coverind<bv> cover, const std::string query_filename)
auto query_start = std::chrono::system_clock::now(); auto query_start = std::chrono::system_clock::now();
for (i = 0; i < nb_lines; i++) { for (i = 0; i < nb_lines; i++) {
size_t count = cover.count_reads_between(queries[i].first, queries[i].second - 1); size_t count = cover.getChrCoverage(std::get<0>(queries[i])).count_reads_between(std::get<1>(queries[i]), std::get<2>(queries[i]) - 1);
std::cout << queries[i].first << "\t" << queries[i].second << "\t" << count << "\t" << expected_counts[i] << std::endl; std::cout << std::get<0>(queries[i]) << "\t"
<< std::get<1>(queries[i]) << "\t"
<< std::get<2>(queries[i]) << "\t" << count
<< "\t" << expected_counts[i] << std::endl;
} }
auto query_end = std::chrono::system_clock::now(); auto query_end = std::chrono::system_clock::now();
return std::chrono::duration_cast<std::chrono::microseconds>(query_end - query_start).count()*1./nb_lines; return std::chrono::duration_cast<std::chrono::microseconds>(query_end - query_start).count()*1./nb_lines;
} }
template <class bv> template <class bv>
float launch_window_queries(Coverind<bv> cover, const std::string index_filename) { float launch_window_queries(MultiCover<bv> &cover, const std::string bed_filename) {
auto query_start = std::chrono::system_clock::now(); auto query_start = std::chrono::system_clock::now();
size_t size_chr = cover.nb_genome_positions(); cover.generateBED(WINDOW_LENGTH, bed_filename);
ogzstream output(std::string(index_filename+".answers.gz").c_str());
for (size_t i = 0; i < size_chr - WINDOW_LENGTH; i += WINDOW_LENGTH) {
output << i << "\t" << i+WINDOW_LENGTH << "\t"
<< cover.count_reads_between(i, i + WINDOW_LENGTH - 1)
<< std::endl;
}
auto query_end = std::chrono::system_clock::now(); auto query_end = std::chrono::system_clock::now();
return std::chrono::duration_cast<std::chrono::milliseconds>(query_end - query_start).count()*1.; return std::chrono::duration_cast<std::chrono::milliseconds>(query_end - query_start).count()*1.;
} }
template <class bv> template <class bv>
void launch_benchmark(char *start_file, char *end_file, const std::string query_filename="") { void launch_benchmark(char *bam_file, const std::string query_filename="") {
std::cout << "For file " << start_file << " and " << end_file << std::endl; std::cout << "For file " << bam_file << std::endl;
auto build_start = std::chrono::system_clock::now(); auto build_start = std::chrono::system_clock::now();
Coverind<bv> cover = Coverind<bv>(start_file, end_file); MultiCover<bv> cover = MultiCover<bv>(bam_file);
auto build_end = std::chrono::system_clock::now(); auto build_end = std::chrono::system_clock::now();
bool has_real_queries = (query_filename.size() > 0); bool has_real_queries = (query_filename.size() > 0);
float query_time; float query_time;
if (has_real_queries) { if (has_real_queries) {
query_time = launch_real_queries<bv>(cover, query_filename); query_time = launch_real_queries<bv>(cover, query_filename);
} else { } // else {
query_time = launch_random_queries<bv>(cover); // query_time = launch_random_queries<bv>(cover);
} // }
float window_time = launch_window_queries<bv>(cover, std::string(start_file) + ".anwsers"); float window_time = launch_window_queries<bv>(cover, std::string(bam_file) + ".anwsers.gz");
auto save_start = std::chrono::system_clock::now(); auto save_start = std::chrono::system_clock::now();
cover.save(start_file); cover.save(bam_file);
auto save_stop = std::chrono::system_clock::now(); auto save_stop = std::chrono::system_clock::now();
auto load_start = std::chrono::system_clock::now(); auto load_start = std::chrono::system_clock::now();
Coverind<bv> cover2(start_file); MultiCover<bv> cover2(bam_file, true);
auto load_stop = std::chrono::system_clock::now(); auto load_stop = std::chrono::system_clock::now();
assert (cover.nb_genome_positions() == cover2.nb_genome_positions()); assert (cover.nb_genome_positions() == cover2.nb_genome_positions());
assert (cover.count_reads_between(100, cover.nb_genome_positions() / 10) == cover2.count_reads_between(100, cover.nb_genome_positions() / 10)); assert (cover.count_reads_between(100, cover.nb_genome_positions() / 10) == cover2.count_reads_between(100, cover.nb_genome_positions() / 10));
...@@ -131,7 +136,7 @@ void launch_benchmark(char *start_file, char *end_file, const std::string query_ ...@@ -131,7 +136,7 @@ void launch_benchmark(char *start_file, char *end_file, const std::string query_
int main(int argc, char ** argv) { int main(int argc, char ** argv) {
if (argc <= 1) { if (argc <= 1) {
std::cerr << "Usage: " << argv[0] << " [-q file] bv1_start bv1_end [bv2_start bv2_end...]" << std::endl; std::cerr << "Usage: " << argv[0] << " [-q file] bam_file" << std::endl;
exit(1); exit(1);
} }
...@@ -142,14 +147,12 @@ int main(int argc, char ** argv) { ...@@ -142,14 +147,12 @@ int main(int argc, char ** argv) {
index_start += 2; index_start += 2;
} }
for (int i = index_start; i < argc; i += 2) {
auto seed = time(NULL); auto seed = time(NULL);
std::cout << "*** SD vector" << std::endl; std::cout << "*** SD vector" << std::endl;
srand(seed); srand(seed);
launch_benchmark<sd_vector<> >(argv[i], argv[i+1], query_filename); launch_benchmark<sd_vector<> >(argv[index_start] , query_filename);
std::cout << "*** RRR vector" << std::endl; std::cout << "*** RRR vector" << std::endl;
srand(seed); srand(seed);
launch_benchmark<rrr_vector<> >(argv[i], argv[i+1], query_filename); launch_benchmark<rrr_vector<> >(argv[index_start], query_filename);
}
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment