Skip to content
Snippets Groups Projects
Commit bc3d61d8 authored by Salson Mikael's avatar Salson Mikael
Browse files

init

parents
Branches
No related tags found
No related merge requests found
Makefile 0 → 100644
CXX=g++
CXXFLAGS=-O2 -Wall -g
SRC=$(wildcard *.cpp)
OBJ=$(SRC:.cpp=.o)
EXE=$(SRC:.cpp=)
all: $(EXE)
$(EXE): %: %.o
$(CXX) -o $@ $^ -l sdsl
%.o: %.cpp
$(CXX) $(CXXFLAGS) -c $^
clean:
rm -f $(OBJ) $(EXE)
.PHONY: all clean
#include "coverind.hpp"
#include <iostream>
#include <chrono>
#include <sdsl/sd_vector.hpp>
using namespace sdsl;
const int nb_queries = 100000;
const int range_length = 100000;
int main(int argc, char ** argv) {
if (argc <= 1) {
std::cerr << "Usage: " << argv[0] << " bv1_start bv1_end [bv2_start bv2_end...]" << std::endl;
exit(1);
}
for (int i = 1; i < argc; i += 2) {
std::cout << "For file " << argv[i] << " and " << argv[i+1] << std::endl;
auto build_start = std::chrono::system_clock::now();
Coverind<sd_vector<> > cover = Coverind<sd_vector<> >(argv[i], argv[i+1]);
auto build_end = std::chrono::system_clock::now();
size_t nb_pos = cover.nb_genome_positions();
size_t queries[nb_queries];
for (size_t i = 0; i < nb_queries; i++) {
queries[i] = rand() % (nb_pos - range_length);
}
std::cout << "Running queries..." << std::endl;
auto query_start = std::chrono::system_clock::now();
size_t total = 0;
for (size_t i = 0; i < nb_queries; i++) {
total += cover.count_reads_between(queries[i], queries[i] + range_length);
}
auto query_end = std::chrono::system_clock::now();
std::cout << "Build time (ms)\t" << std::chrono::duration_cast<std::chrono::milliseconds>(build_end - build_start).count() << std::endl;
std::cout << "Query time (ns per query)\t" << std::chrono::duration_cast<std::chrono::microseconds>(query_end - query_start).count()*1./nb_queries << std::endl;
std::cout << "Space (MB)\t" << cover.size_in_bytes()*1. / 1000000 << std::endl;
}
}
#ifndef COVERIND_HPP
#define COVERIND_HPP
#include <iostream>
#include <string>
#include <sdsl/bit_vectors.hpp>
#include <sdsl/sd_vector.hpp>
#include <sdsl/select_support.hpp>
#define PRINT_VAR(v) std::cerr << #v << " = " << v << std::endl
using namespace sdsl;
template <class bv> class Coverind {
private:
bv start, end;
rank_support_sd<1> rank_start, rank_end;
select_support_sd<0> select_start, select_end;
public:
/**
* Builds from two files containing the bit vectors
*/
Coverind(const std::string start_file, const std::string end_file);
bv getStartBV() const;
bv getEndBV() const;
/**
* Count all the reads that end before a position
*/
size_t count_reads_before(size_t position) const;
/**
* Count all the reads that overlap a position
* (ie. they cover the position)
*/
size_t count_reads_overlapping(size_t position) const;
/**
* Count all the reads that cover a position between start and end
* (both included)
*/
size_t count_reads_between(size_t start, size_t end) const;
/**
* Return the number of genomic positions (ie. the number of 0s)
*/
size_t nb_genome_positions() const;
/**
* Size of the structure in bytes
*/
size_t size_in_bytes() const;
void save(const std::string) const;
};
bit_vector create_bitvector_from_file(const std::string filename);
//////////////////////////////////////////////////
//////////////////////////////////////////////////
//////////////////////////////////////////////////
template <class bv>
Coverind<bv>::Coverind(const std::string start_file, const std::string end_file) {
start = bv(create_bitvector_from_file(start_file));
end = bv(create_bitvector_from_file(end_file));
rank_start = rank_support_sd<1>(&start);
rank_end = rank_support_sd<1>(&end);
select_start = select_support_sd<0>(&start);
select_end = select_support_sd<0>(&end);
}
template <class bv>
bv Coverind<bv>::getStartBV() const {
return start;
}
template <class bv>
bv Coverind<bv>::getEndBV() const {
return end;
}
template <class bv>
size_t Coverind<bv>::count_reads_before(size_t position) const {
if (position == 0)
return 0;
size_t bv_pos = select_end(position);
return rank_end(bv_pos);
}
template <class bv>
size_t Coverind<bv>::count_reads_overlapping(size_t position) const {
size_t bv_pos = select_start(position + 1);
return rank_start(bv_pos) - count_reads_before(position);
}
template <class bv>
size_t Coverind<bv>::count_reads_between(size_t start, size_t end) const {
size_t nb_reads_until_end = count_reads_before(end + 1);
size_t nb_reads_before_start = count_reads_before(start);
return nb_reads_until_end - nb_reads_before_start;
}
template <class bv>
size_t Coverind<bv>::nb_genome_positions() const {
return start.size() - rank_start(start.size());
}
template <class bv>
size_t Coverind<bv>::size_in_bytes() const {
return ::size_in_bytes(start) + ::size_in_bytes(end) +
::size_in_bytes(rank_start) + ::size_in_bytes(rank_end) +
::size_in_bytes(select_start) + ::size_in_bytes(select_end);
}
template <class bv>
void Coverind<bv>::save(const std::string filename) const {
store_to_file(start, filename + "start.bv");
store_to_file(end, filename + "end.bv");
store_to_file(rank_start, filename + "start.rank");
store_to_file(rank_end, filename + "end.rank");
store_to_file(select_start, filename + "start.select");
store_to_file(select_end, filename + "end.select");
}
bit_vector create_bitvector_from_file(const std::string filename) {
std::ifstream input(filename);
input.seekg(0, input.end);
size_t length = 8*input.tellg();
input.seekg(0, input.beg);
bit_vector bv(length, 0);
char c;
size_t pos = 0;
while ((c = input.get()) != EOF) {
int mask = 1<<7;
while (mask > 0) {
if (c & mask)
bv[pos] = 1;
pos++;
mask >>= 1;
}
}
return bv;
}
#endif
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment