Skip to content
Snippets Groups Projects
Commit bc3d61d8 authored by Salson Mikael's avatar Salson Mikael
Browse files

init

parents
No related branches found
No related tags found
No related merge requests found
Makefile 0 → 100644
CXX=g++
CXXFLAGS=-O2 -Wall -g
SRC=$(wildcard *.cpp)
OBJ=$(SRC:.cpp=.o)
EXE=$(SRC:.cpp=)
all: $(EXE)
$(EXE): %: %.o
$(CXX) -o $@ $^ -l sdsl
%.o: %.cpp
$(CXX) $(CXXFLAGS) -c $^
clean:
rm -f $(OBJ) $(EXE)
.PHONY: all clean
#include "coverind.hpp"
#include <iostream>
#include <chrono>
#include <sdsl/sd_vector.hpp>
using namespace sdsl;
const int nb_queries = 100000;
const int range_length = 100000;
int main(int argc, char ** argv) {
if (argc <= 1) {
std::cerr << "Usage: " << argv[0] << " bv1_start bv1_end [bv2_start bv2_end...]" << std::endl;
exit(1);
}
for (int i = 1; i < argc; i += 2) {
std::cout << "For file " << argv[i] << " and " << argv[i+1] << std::endl;
auto build_start = std::chrono::system_clock::now();
Coverind<sd_vector<> > cover = Coverind<sd_vector<> >(argv[i], argv[i+1]);
auto build_end = std::chrono::system_clock::now();
size_t nb_pos = cover.nb_genome_positions();
size_t queries[nb_queries];
for (size_t i = 0; i < nb_queries; i++) {
queries[i] = rand() % (nb_pos - range_length);
}
std::cout << "Running queries..." << std::endl;
auto query_start = std::chrono::system_clock::now();
size_t total = 0;
for (size_t i = 0; i < nb_queries; i++) {
total += cover.count_reads_between(queries[i], queries[i] + range_length);
}
auto query_end = std::chrono::system_clock::now();
std::cout << "Build time (ms)\t" << std::chrono::duration_cast<std::chrono::milliseconds>(build_end - build_start).count() << std::endl;
std::cout << "Query time (ns per query)\t" << std::chrono::duration_cast<std::chrono::microseconds>(query_end - query_start).count()*1./nb_queries << std::endl;
std::cout << "Space (MB)\t" << cover.size_in_bytes()*1. / 1000000 << std::endl;
}
}
#ifndef COVERIND_HPP
#define COVERIND_HPP
#include <iostream>
#include <string>
#include <sdsl/bit_vectors.hpp>
#include <sdsl/sd_vector.hpp>
#include <sdsl/select_support.hpp>
#define PRINT_VAR(v) std::cerr << #v << " = " << v << std::endl
using namespace sdsl;
template <class bv> class Coverind {
private:
bv start, end;
rank_support_sd<1> rank_start, rank_end;
select_support_sd<0> select_start, select_end;
public:
/**
* Builds from two files containing the bit vectors
*/
Coverind(const std::string start_file, const std::string end_file);
bv getStartBV() const;
bv getEndBV() const;
/**
* Count all the reads that end before a position
*/
size_t count_reads_before(size_t position) const;
/**
* Count all the reads that overlap a position
* (ie. they cover the position)
*/
size_t count_reads_overlapping(size_t position) const;
/**
* Count all the reads that cover a position between start and end
* (both included)
*/
size_t count_reads_between(size_t start, size_t end) const;
/**
* Return the number of genomic positions (ie. the number of 0s)
*/
size_t nb_genome_positions() const;
/**
* Size of the structure in bytes
*/
size_t size_in_bytes() const;
void save(const std::string) const;
};
bit_vector create_bitvector_from_file(const std::string filename);
//////////////////////////////////////////////////
//////////////////////////////////////////////////
//////////////////////////////////////////////////
template <class bv>
Coverind<bv>::Coverind(const std::string start_file, const std::string end_file) {
start = bv(create_bitvector_from_file(start_file));
end = bv(create_bitvector_from_file(end_file));
rank_start = rank_support_sd<1>(&start);
rank_end = rank_support_sd<1>(&end);
select_start = select_support_sd<0>(&start);
select_end = select_support_sd<0>(&end);
}
template <class bv>
bv Coverind<bv>::getStartBV() const {
return start;
}
template <class bv>
bv Coverind<bv>::getEndBV() const {
return end;
}
template <class bv>
size_t Coverind<bv>::count_reads_before(size_t position) const {
if (position == 0)
return 0;
size_t bv_pos = select_end(position);
return rank_end(bv_pos);
}
template <class bv>
size_t Coverind<bv>::count_reads_overlapping(size_t position) const {
size_t bv_pos = select_start(position + 1);
return rank_start(bv_pos) - count_reads_before(position);
}
template <class bv>
size_t Coverind<bv>::count_reads_between(size_t start, size_t end) const {
size_t nb_reads_until_end = count_reads_before(end + 1);
size_t nb_reads_before_start = count_reads_before(start);
return nb_reads_until_end - nb_reads_before_start;
}
template <class bv>
size_t Coverind<bv>::nb_genome_positions() const {
return start.size() - rank_start(start.size());
}
template <class bv>
size_t Coverind<bv>::size_in_bytes() const {
return ::size_in_bytes(start) + ::size_in_bytes(end) +
::size_in_bytes(rank_start) + ::size_in_bytes(rank_end) +
::size_in_bytes(select_start) + ::size_in_bytes(select_end);
}
template <class bv>
void Coverind<bv>::save(const std::string filename) const {
store_to_file(start, filename + "start.bv");
store_to_file(end, filename + "end.bv");
store_to_file(rank_start, filename + "start.rank");
store_to_file(rank_end, filename + "end.rank");
store_to_file(select_start, filename + "start.select");
store_to_file(select_end, filename + "end.select");
}
bit_vector create_bitvector_from_file(const std::string filename) {
std::ifstream input(filename);
input.seekg(0, input.end);
size_t length = 8*input.tellg();
input.seekg(0, input.beg);
bit_vector bv(length, 0);
char c;
size_t pos = 0;
while ((c = input.get()) != EOF) {
int mask = 1<<7;
while (mask > 0) {
if (c & mask)
bv[pos] = 1;
pos++;
mask >>= 1;
}
}
return bv;
}
#endif
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment