Skip to content
Snippets Groups Projects
Commit 6b79e62c authored by Salson Mikael's avatar Salson Mikael
Browse files

update benchmark.cpp

more benchs
parent 8372b34e
Branches
No related tags found
No related merge requests found
#include "coverind.hpp" #include "coverind.hpp"
#include <iostream> #include <iostream>
#include <fstream>
#include <chrono> #include <chrono>
#include <sdsl/sd_vector.hpp> #include <sdsl/sd_vector.hpp>
#include <cstdlib>
#include <ctime>
#include <sdsl/rrr_vector.hpp>
#include "lib/gzstream.h"
using namespace sdsl; using namespace sdsl;
const int nb_queries = 100000; const int NB_QUERIES = 100000;
const int range_length = 100000; const int RANGE_LENGTH = 100000;
int main(int argc, char ** argv) { const int WINDOW_LENGTH = 1000;
if (argc <= 1) {
std::cerr << "Usage: " << argv[0] << " bv1_start bv1_end [bv2_start bv2_end...]" << std::endl;
exit(1);
}
for (int i = 1; i < argc; i += 2) {
std::cout << "For file " << argv[i] << " and " << argv[i+1] << std::endl;
auto build_start = std::chrono::system_clock::now();
Coverind<sd_vector<> > cover = Coverind<sd_vector<> >(argv[i], argv[i+1]);
auto build_end = std::chrono::system_clock::now();
template <class bv>
float launch_random_queries(Coverind<bv> &cover) {
size_t queries[NB_QUERIES];
size_t nb_pos = cover.nb_genome_positions(); size_t nb_pos = cover.nb_genome_positions();
size_t queries[nb_queries]; for (size_t i = 0; i < NB_QUERIES; i++) {
for (size_t i = 0; i < nb_queries; i++) { queries[i] = rand() % (nb_pos - RANGE_LENGTH);
queries[i] = rand() % (nb_pos - range_length);
} }
std::cout << "Running queries..." << std::endl; std::cout << "Running queries..." << std::endl;
auto query_start = std::chrono::system_clock::now(); auto query_start = std::chrono::system_clock::now();
size_t total = 0; size_t total = 0;
for (size_t i = 0; i < nb_queries; i++) { for (size_t i = 0; i < NB_QUERIES; i++) {
total += cover.count_reads_between(queries[i], queries[i] + range_length); size_t count = cover.count_reads_between(queries[i], queries[i] + RANGE_LENGTH);
total += count;
// std::cerr << queries[i] << "\t" << queries[i] + RANGE_LENGTH << "\t" << count << std::endl;
} }
std::cout << total << std::endl;
auto query_end = std::chrono::system_clock::now(); auto query_end = std::chrono::system_clock::now();
return std::chrono::duration_cast<std::chrono::microseconds>(query_end - query_start).count()*1./NB_QUERIES;
}
template <class bv>
float launch_real_queries(Coverind<bv> cover, const std::string query_filename) {
std::ifstream query_file(query_filename);
size_t nb_lines = 0;
if (! query_file.is_open()) {
throw;
}
std::string line;
while (!query_file.eof()) {
getline(query_file, line);
nb_lines++;
}
query_file = std::ifstream(query_filename);
std::pair<size_t, size_t> queries[nb_lines];
size_t expected_counts[nb_lines];
size_t i = 0;
std::string chr, start_str, end_str, count_str;
size_t start, end, count;
while (!query_file.eof()) {
// query_file >> chr >> start >> end >> count;
getline(query_file, chr, '\t');
if (query_file.eof())
break;
getline(query_file, start_str, '\t');
getline(query_file, end_str, '\t');
getline(query_file, count_str);
start = std::stoi(start_str);
end = std::stoi(end_str);
count = std::stoi(count_str);
queries[i] = std::make_pair(start, end);
expected_counts[i] = count;
i++;
}
query_file.close();
auto query_start = std::chrono::system_clock::now();
for (i = 0; i < nb_lines; i++) {
size_t count = cover.count_reads_between(queries[i].first, queries[i].second - 1);
std::cout << queries[i].first << "\t" << queries[i].second << "\t" << count << "\t" << expected_counts[i] << std::endl;
}
auto query_end = std::chrono::system_clock::now();
return std::chrono::duration_cast<std::chrono::microseconds>(query_end - query_start).count()*1./nb_lines;
}
template <class bv>
float launch_window_queries(Coverind<bv> cover, const std::string index_filename) {
auto query_start = std::chrono::system_clock::now();
size_t size_chr = cover.nb_genome_positions();
ogzstream output(std::string(index_filename+".answers.gz").c_str());
for (size_t i = 0; i < size_chr - WINDOW_LENGTH; i += WINDOW_LENGTH) {
output << i << "\t" << i+WINDOW_LENGTH << "\t"
<< cover.count_reads_between(i, i + WINDOW_LENGTH - 1)
<< std::endl;
}
auto query_end = std::chrono::system_clock::now();
return std::chrono::duration_cast<std::chrono::milliseconds>(query_end - query_start).count()*1.;
}
template <class bv>
void launch_benchmark(char *start_file, char *end_file, const std::string query_filename="") {
std::cout << "For file " << start_file << " and " << end_file << std::endl;
auto build_start = std::chrono::system_clock::now();
Coverind<bv> cover = Coverind<bv>(start_file, end_file);
auto build_end = std::chrono::system_clock::now();
bool has_real_queries = (query_filename.size() > 0);
float query_time;
if (has_real_queries) {
query_time = launch_real_queries<bv>(cover, query_filename);
} else {
query_time = launch_random_queries<bv>(cover);
}
float window_time = launch_window_queries<bv>(cover, std::string(start_file) + ".anwsers");
auto save_start = std::chrono::system_clock::now();
cover.save(start_file);
auto save_stop = std::chrono::system_clock::now();
auto load_start = std::chrono::system_clock::now();
Coverind<bv> cover2(start_file);
auto load_stop = std::chrono::system_clock::now();
assert (cover.nb_genome_positions() == cover2.nb_genome_positions());
assert (cover.count_reads_between(100, cover.nb_genome_positions() / 10) == cover2.count_reads_between(100, cover.nb_genome_positions() / 10));
std::cout << "Build time (ms)\t" << std::chrono::duration_cast<std::chrono::milliseconds>(build_end - build_start).count() << std::endl; std::cout << "Build time (ms)\t" << std::chrono::duration_cast<std::chrono::milliseconds>(build_end - build_start).count() << std::endl;
std::cout << "Query time (ns per query)\t" << std::chrono::duration_cast<std::chrono::microseconds>(query_end - query_start).count()*1./nb_queries << std::endl; std::cout << "Query time (ns per query)\t" << query_time << std::endl;
std::cout << "Traversing the genome (ms)\t" << window_time << std::endl;
std::cout << "Space (MB)\t" << cover.size_in_bytes()*1. / 1000000 << std::endl; std::cout << "Space (MB)\t" << cover.size_in_bytes()*1. / 1000000 << std::endl;
std::cout << "Saving (ms)\t" << std::chrono::duration_cast<std::chrono::milliseconds>(save_stop - save_start).count() << std::endl;
std::cout << "Loading (ms)\t" << std::chrono::duration_cast<std::chrono::milliseconds>(load_stop - load_start).count() << std::endl;
}
int main(int argc, char ** argv) {
if (argc <= 1) {
std::cerr << "Usage: " << argv[0] << " [-q file] bv1_start bv1_end [bv2_start bv2_end...]" << std::endl;
exit(1);
}
std::string query_filename = "";
int index_start = 1;
if (std::string(argv[1]) == std::string("-q")) {
query_filename = argv[2];
index_start += 2;
}
for (int i = index_start; i < argc; i += 2) {
auto seed = time(NULL);
std::cout << "*** SD vector" << std::endl;
srand(seed);
launch_benchmark<sd_vector<> >(argv[i], argv[i+1], query_filename);
std::cout << "*** RRR vector" << std::endl;
srand(seed);
launch_benchmark<rrr_vector<> >(argv[i], argv[i+1], query_filename);
} }
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment