54 <<
"BitMagic DNA Index Build Sample (c) 2018" << std::endl
55 <<
"-fa file-name -- input FASTA file" << std::endl
56 <<
"-j number -- number of parallel jobs to run" << std::endl
57 <<
"-timing -- collect timings" << std::endl
73 for (
int i = 1; i < argc; ++i)
75 std::string arg = argv[i];
76 if ((arg ==
"-h") || (arg ==
"--help"))
81 if (arg ==
"-fa" || arg ==
"--fa")
89 std::cerr <<
"Error: -fa requires file name" << std::endl;
94 if (arg ==
"-j" || arg ==
"--j")
102 std::cerr <<
"Error: -j requires number of jobs" << std::endl;
108 if (arg ==
"-timing" || arg ==
"--timing" || arg ==
"-t" || arg ==
"--t")
123 int load_FASTA(
const std::string& fname, std::vector<char>& seq_vect)
128 std::ifstream fin(fname.c_str(), std::ios::in);
133 for (
unsigned i = 0; std::getline(fin, line); ++i)
139 for (std::string::iterator it = line.begin(); it != line.end(); ++it)
140 seq_vect.push_back(*it);
154 enum { eA = 0, eC, eG, eT, eN, eEnd };
160 void Build(
const vector<char>& sequence)
168 for (
size_t i = 0; i < sequence.size(); ++i)
170 unsigned pos = unsigned(i);
204 for (
size_t i = 0; i < sequence.size(); ++i)
206 unsigned pos = unsigned(i);
239 const std::vector<char>* src_sequence;
242 : target_idx(idx), src_sequence(&src) {}
244 void operator() (
size_t from,
size_t to)
246 const vector<char>& sequence = *src_sequence;
255 for (
size_t i = from; i < sequence.size() && (i < to); ++i)
257 unsigned pos = unsigned(i);
305 std::vector<std::future<void> > futures;
307 unsigned range = unsigned(sequence.size() / threads);
309 for (
unsigned k = 0; k < sequence.size(); k += range)
311 futures.emplace_back(std::async(std::launch::async,
312 Func(
this, sequence), k, k + range));
316 for (
auto& e : futures)
326 static std::mutex mtx_A;
327 static std::mutex mtx_T;
328 static std::mutex mtx_G;
329 static std::mutex mtx_C;
330 static std::mutex mtx_N;
336 std::lock_guard<std::mutex> guard(mtx_A);
337 m_FPrintBV[eA].merge(bv);
342 std::lock_guard<std::mutex> guard(mtx_C);
343 m_FPrintBV[eC].merge(bv);
348 std::lock_guard<std::mutex> guard(mtx_G);
349 m_FPrintBV[eG].merge(bv);
354 std::lock_guard<std::mutex> guard(mtx_T);
355 m_FPrintBV[eT].merge(bv);
360 std::lock_guard<std::mutex> guard(mtx_N);
361 m_FPrintBV[eN].merge(bv);
376 return m_FPrintBV[eA];
378 return m_FPrintBV[eC];
380 return m_FPrintBV[eG];
382 return m_FPrintBV[eT];
384 return m_FPrintBV[eN];
388 throw runtime_error(
"Error. Invalid letter!");
401 std::vector<char> letters {
'A',
'T',
'G',
'C'};
402 for (
char base : letters)
410 throw runtime_error(
string(
"Fingerprint mismatch for:") +
string(1, base));
417 int main(
int argc,
char *argv[])
425 std::vector<char> seq_vect;
441 std::cout <<
"FASTA sequence size=" << seq_vect.size() << std::endl;
445 idx1.
Build(seq_vect);
462 std::cout << std::endl <<
"Performance:" << std::endl;
466 catch (std::exception& ex)
468 std::cerr <<
"Error:" << ex.what() << std::endl;
Output iterator iterator designed to set "ON" bits based on input sequence of integers.
Compressed bit-vector bvector<> container, set algebraic methods, traversal iterators.
void Build(const vector< char > &sequence)
Build fingerprint bit-vectors from the original sequence.
void BuildParallel(const vector< char > &sequence, unsigned threads)
Build fingerprint bit-vectors using bulk insert iterator and parallel processing. ...
void BuildBulk(const vector< char > &sequence)
Build index using bulk insert iterator.
static int load_FASTA(const std::string &fname, std::vector< char > &seq_vect)
void MergeVector(char letter, bm::bvector<> &bv)
Thread sync bit-vector merge.
Timing utilities for benchmarking (internal)
static int parse_args(int argc, char *argv[])
Utility for keeping all DNA finger print vectors and search using various techniques.
int compare(const bvector< Alloc > &bvect) const
Lexicographical comparison with a bitvector.
bm::chrono_taker::duration_map_type timing_map
input set is sorted (ascending order)
int main(int argc, char *argv[])
std::map< std::string, statistics > duration_map_type
test name to duration map
static void fingerprint_compare(const DNA_FingerprintScanner &idx1, const DNA_FingerprintScanner &idx2)
Check correctness of indexes constructed using different methods.
static void print_duration_map(const duration_map_type &dmap, format fmt=ct_time)
Utility class to collect performance measurements and statistics.
Output iterator iterator designed to set "ON" bits based on input sequence of integers (bit indeces)...
const bm::bvector & GetVector(char letter) const
Return fingerprint bit-vector.