BitMagic-C++
bmsparsevec_algo.h
Go to the documentation of this file.
1 #ifndef BMSPARSEVEC_ALGO__H__INCLUDED__
2 #define BMSPARSEVEC_ALGO__H__INCLUDED__
3 /*
4 Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 For more information please visit: http://bitmagic.io
19 */
20 /*! \file bmsparsevec_algo.h
21  \brief Algorithms for sparse_vector<>
22 */
23 
24 #ifndef BM__H__INCLUDED__
25 // BitMagic utility headers do not include main "bm.h" declaration
26 // #include "bm.h" or "bm64.h" explicitly
27 # error missing include (bm.h or bm64.h)
28 #endif
29 
30 #include "bmdef.h"
31 #include "bmsparsevec.h"
32 #include "bmaggregator.h"
33 #include "bmbuffer.h"
34 #include "bmdef.h"
35 
36 #ifdef _MSC_VER
37 # pragma warning( disable: 4146 )
38 #endif
39 
40 
41 
42 /** \defgroup svalgo Sparse vector algorithms
43  Sparse vector algorithms
44  \ingroup svector
45  */
46 
47 
48 namespace bm
49 {
50 
51 
52 /*!
53  \brief Clip dynamic range for signal higher than specified
54 
55  \param svect - sparse vector to do clipping
56  \param high_bit - max bit (inclusive) allowed for this signal vector
57 
58 
59  \ingroup svalgo
60  \sa dynamic_range_clip_low
61 */
62 template<typename SV>
63 void dynamic_range_clip_high(SV& svect, unsigned high_bit)
64 {
65  unsigned sv_plains = svect.plains();
66 
67  BM_ASSERT(sv_plains > high_bit && high_bit > 0);
68 
69  typename SV::bvector_type bv_acc;
70  unsigned i;
71 
72  // combine all the high bits into accumulator vector
73  for (i = high_bit+1; i < sv_plains; ++i)
74  {
75  typename SV::bvector_type* bv_plain = svect.plain(i);
76  if (bv_plain)
77  {
78  bv_acc.bit_or(*bv_plain);
79  svect.free_plain(i);
80  }
81  } // for i
82 
83  // set all bits ON for all low vectors, which happen to be clipped
84  for (i = high_bit; true; --i)
85  {
86  typename SV::bvector_type* bv_plain = svect.get_plain(i);
87  bv_plain->bit_or(bv_acc);
88  if (i == 0)
89  break;
90  } // for i
91 }
92 
93 
94 /*!
95  \brief Clip dynamic range for signal lower than specified (boost)
96 
97  \param svect - sparse vector to do clipping
98  \param low_bit - low bit (inclusive) allowed for this signal vector
99 
100  \ingroup svalgo
101  \sa dynamic_range_clip_high
102 */
103 template<typename SV>
104 void dynamic_range_clip_low(SV& svect, unsigned low_bit)
105 {
106  if (low_bit == 0) return; // nothing to do
107  BM_ASSERT(svect.plains() > low_bit);
108 
109  unsigned sv_plains = svect.plains();
110  typename SV::bvector_type bv_acc1;
111  unsigned i;
112 
113  // combine all the high bits into accumulator vector
114  for (i = low_bit+1; i < sv_plains; ++i)
115  {
116  typename SV::bvector_type* bv_plain = svect.plain(i);
117  if (bv_plain)
118  bv_acc1.bit_or(*bv_plain);
119  } // for i
120 
121  // accumulate all vectors below the clipping point
122  typename SV::bvector_type bv_acc2;
123  typename SV::bvector_type* bv_low_plain = svect.get_plain(low_bit);
124 
125  for (i = low_bit-1; true; --i)
126  {
127  typename SV::bvector_type* bv_plain = svect.plain(i);
128  if (bv_plain)
129  {
130  bv_acc2.bit_or(*bv_plain);
131  svect.free_plain(i);
132  if (i == 0)
133  break;
134  }
135  } // for i
136 
137  // now we want to set 1 in the clipping low plain based on
138  // exclusive or (XOR) between upper and lower parts)
139  // as a result high signal (any bits in the upper plains) gets
140  // slightly lower value (due to clipping) low signal gets amplified
141  // (lower contrast algorithm)
142 
143  bv_acc1.bit_xor(bv_acc2);
144  bv_low_plain->bit_or(bv_acc1);
145 }
146 
147 
148 /**
149  \brief algorithms for sparse_vector scan/seach
150 
151  Scanner uses properties of bit-vector plains to answer questions
152  like "find all sparse vector elements equivalent to XYZ".
153 
154  Class uses fast algorithms based on properties of bit-plains.
155  This is NOT a brute force, direct scan.
156 
157  @ingroup svalgo
158  @ingroup setalgo
159 */
160 template<typename SV>
162 {
163 public:
164  typedef typename SV::bvector_type bvector_type;
165  typedef const bvector_type* bvector_type_const_ptr;
166  typedef bvector_type* bvector_type_ptr;
167  typedef typename SV::value_type value_type;
168  typedef typename SV::size_type size_type;
169 
171  typedef typename allocator_type::allocator_pool_type allocator_pool_type;
172 
173 public:
175 
176  /**
177  \brief bind sparse vector for all searches
178 
179  \param sv - input sparse vector to bind for searches
180  \param sorted - source index is sorted, build index for binary search
181  */
182  void bind(const SV& sv, bool sorted);
183 
184  /**
185  \brief reset sparse vector binding
186  */
187  void reset_binding();
188 
189  /**
190  \brief find all sparse vector elements EQ to search value
191 
192  Find all sparse vector elements equivalent to specified value
193 
194  \param sv - input sparse vector
195  \param value - value to search for
196  \param bv_out - output bit-vector (search result masks 1 elements)
197  */
198  void find_eq(const SV& sv,
199  typename SV::value_type value,
200  typename SV::bvector_type& bv_out);
201 
202  /**
203  \brief find first sparse vector element
204 
205  Find all sparse vector elements equivalent to specified value.
206  Works well if sperse vector represents unordered set
207 
208  \param sv - input sparse vector
209  \param value - value to search for
210  \param pos - output found sparse vector element index
211 
212  \return true if found
213  */
214  bool find_eq(const SV& sv,
215  typename SV::value_type value,
216  typename SV::size_type& pos);
217 
218  /**
219  \brief find first sparse vector element (string)
220  */
221  bool find_eq_str(const SV& sv,
222  const typename SV::value_type* str,
223  typename SV::size_type& pos);
224 
225  /**
226  \brief binary find first sparse vector element (string)
227  Sparse vector must be attached (bind())
228  @sa bind
229  */
230  bool find_eq_str(const typename SV::value_type* str,
231  typename SV::size_type& pos);
232 
233  /**
234  \brief binary find first sparse vector element (string)
235  Sparse vector must be sorted.
236  */
237  bool bfind_eq_str(const SV& sv,
238  const typename SV::value_type* str,
239  typename SV::size_type& pos);
240 
241  /**
242  \brief lower bound search for an array position
243 
244  Method assumes the sparse array is sorted
245 
246  \param sv - input sparse vector
247  \param val - value to search for
248  \param pos - output sparse vector element index
249 
250  \return true if value found
251  */
252  bool lower_bound(const SV& sv,
253  const typename SV::value_type val,
254  typename SV::size_type& pos);
255  /**
256  \brief lower bound search for an array position
257 
258  Method assumes the sparse array is sorted
259 
260  \param sv - input sparse vector
261  \param str - value to search for
262  \param pos - output sparse vector element index
263 
264  \return true if value found
265  */
266  bool lower_bound_str(const SV& sv,
267  const typename SV::value_type* str,
268  typename SV::size_type& pos);
269 
270  /**
271  \brief binary find first sparse vector element (string)
272  Sparse vector must be sorted and attached
273  @sa bind
274  */
275  bool bfind_eq_str(const typename SV::value_type* str,
276  typename SV::size_type& pos);
277 
278  /**
279  \brief find all sparse vector elements EQ to 0
280  \param sv - input sparse vector
281  \param bv_out - output bit-vector (search result masks 1 elements)
282  */
283  void find_zero(const SV& sv,
284  typename SV::bvector_type& bv_out);
285 
286  /*!
287  \brief Find non-zero elements
288  Output vector is computed as a logical OR (join) of all plains
289 
290  \param sv - input sparse vector
291  \param bv_out - output bit-bector of non-zero elements
292  */
293  void find_nonzero(const SV& sv, typename SV::bvector_type& bv_out);
294 
295  /**
296  \brief invert search result ("EQ" to "not EQ")
297 
298  \param sv - input sparse vector
299  \param bv_out - output bit-bector of non-zero elements
300  */
301  void invert(const SV& sv, typename SV::bvector_type& bv_out);
302 
303  /**
304  \brief find all values A IN (C, D, E, F)
305  \param sv - input sparse vector
306  \param start - start iterator (set to search)
307  \param end - end iterator (set to search)
308  \param bv_out - output bit-bector of non-zero elements
309  */
310  template<typename It>
311  void find_eq(const SV& sv,
312  It start,
313  It end,
314  typename SV::bvector_type& bv_out)
315  {
316  typename bvector_type::mem_pool_guard mp_guard;
317  mp_guard.assign_if_not_set(pool_, bv_out); // set algorithm-local memory pool to avoid heap contention
318 
319  bvector_type bv1;
320  typename bvector_type::mem_pool_guard mp_guard1(pool_, bv1);
321  bool any_zero = false;
322  for (; start < end; ++start)
323  {
324  value_type v = *start;
325  any_zero |= (v == 0);
326  bool found = find_eq_with_nulls(sv, v, bv1);
327  if (found)
328  bv_out.bit_or(bv1);
329  else
330  {
331  BM_ASSERT(!bv1.any());
332  }
333  } // for
334  if (any_zero)
335  correct_nulls(sv, bv_out);
336  }
337 
338  /// For testing purposes only
339  ///
340  /// @internal
341  void find_eq_with_nulls_horizontal(const SV& sv,
342  typename SV::value_type value,
343  typename SV::bvector_type& bv_out);
344 
345  /** Exclude possible NULL values from the result vector
346  \param sv - input sparse vector
347  \param bv_out - output bit-bector of non-zero elements
348  */
349  void correct_nulls(const SV& sv, typename SV::bvector_type& bv_out);
350 
351 protected:
352 
353  /// set search boundaries (hint for the aggregator)
354  void set_search_range(size_type from, size_type to);
355 
356  /// reset (disable) search range
357  void reset_search_range();
358 
359  /// find value (may include NULL indexes)
360  bool find_eq_with_nulls(const SV& sv,
361  typename SV::value_type value,
362  typename SV::bvector_type& bv_out,
363  typename SV::size_type search_limit = 0);
364 
365  /// find first value (may include NULL indexes)
366  bool find_first_eq(const SV& sv,
367  typename SV::value_type value,
368  size_type& idx);
369 
370  /// find first string value (may include NULL indexes)
371  bool find_first_eq(const SV& sv,
372  const typename SV::value_type* str,
373  size_type& idx,
374  bool remaped);
375 
376 
377  /// Prepare aggregator for AND-SUB (EQ) search
378  bool prepare_and_sub_aggregator(const SV& sv,
379  typename SV::value_type value);
380 
381  /// Prepare aggregator for AND-SUB (EQ) search (string)
382  bool prepare_and_sub_aggregator(const SV& sv,
383  const typename SV::value_type* str,
384  unsigned octet_start);
385 
386  /// Rank-Select decompression for RSC vectors
387  void decompress(const SV& sv, typename SV::bvector_type& bv_out);
388 
389  /// compare sv[idx] with input str
390  int compare_str(const SV& sv, size_type idx, const value_type* str);
391 
392  /// compare sv[idx] with input value
393  int compare(const SV& sv, size_type idx, const value_type val);
394 
395 protected:
397  void operator=(const sparse_vector_scanner&) = delete;
398 
399 protected:
400 
402  {
403  max_columns = SV::max_vector_size
404  };
405 
407  {
410  };
411 
412  typedef bm::dynamic_heap_matrix<value_type, allocator_type> heap_matrix_type;
413  typedef bm::heap_matrix<typename SV::value_type,
415  SV::sv_octet_plains,
416  allocator_type> matrix_search_buf_type;
417 
418 
419 private:
420  allocator_pool_type pool_;
421  bvector_type bv_tmp_;
424 
425  size_type mask_from_;
426  size_type mask_to_;
427  bool mask_set_;
428 
429  const SV* bound_sv_;
430  heap_matrix_type block0_elements_cache_; ///< cache for elements[0] of each block
431  heap_matrix_type block3_elements_cache_; ///< cache for elements[16384x] of each block
432  size_type effective_str_max_;
433 
434  value_type remap_value_vect_[SV::max_vector_size];
435  /// masks of allocated bit-plains (1 - means there is a bit-plain)
436  bm::id64_t vector_plain_masks_[SV::max_vector_size];
437  matrix_search_buf_type hmatr_; ///< heap matrix for string search linear stage
438 };
439 
440 
441 /*!
442  \brief Integer set to set transformation (functional image in groups theory)
443  https://en.wikipedia.org/wiki/Image_(mathematics)
444 
445  Input sets gets translated through the function, which is defined as
446  "one to one (or NULL)" binary relation object (sparse_vector).
447  Also works for M:1 relationships.
448 
449  \ingroup svalgo
450  \ingroup setalgo
451 */
452 template<typename SV>
454 {
455 public:
456  typedef typename SV::bvector_type bvector_type;
457  typedef typename SV::value_type value_type;
458  typedef typename SV::size_type size_type;
460 public:
461 
464 
465 
466  /** Get read access to zero-elements vector
467  Zero vector gets populated after attach_sv() is called
468  or as a side-effect of remap() with immediate sv param
469  */
470  const bvector_type& get_bv_zero() const { return bv_zero_; }
471 
472  /** Perform remapping (Image function) (based on attached translation table)
473 
474  \param bv_in - input set, defined as a bit-vector
475  \param bv_out - output set as a bit-vector
476 
477  @sa attach_sv
478  */
479  void remap(const bvector_type& bv_in,
480  bvector_type& bv_out);
481 
482  /** Perform remapping (Image function)
483 
484  \param bv_in - input set, defined as a bit-vector
485  \param sv_brel - binary relation (translation table) sparse vector
486  \param bv_out - output set as a bit-vector
487  */
488  void remap(const bvector_type& bv_in,
489  const SV& sv_brel,
490  bvector_type& bv_out);
491 
492  /** Remap single set element
493 
494  \param id_from - input value
495  \param sv_brel - translation table sparse vector
496  \param id_to - out value
497 
498  \return - true if value was found and remapped
499  */
500  bool remap(size_type id_from, const SV& sv_brel, size_type& id_to);
501 
502 
503  /** Run remap transformation
504 
505  \param bv_in - input set, defined as a bit-vector
506  \param sv_brel - translation table sparse vector
507  \param bv_out - output set as a bit-vector
508 
509  @sa remap
510  */
511  void run(const bvector_type& bv_in,
512  const SV& sv_brel,
513  bvector_type& bv_out)
514  {
515  remap(bv_in, sv_brel, bv_out);
516  }
517 
518  /** Attach a translation table vector for remapping (Image function)
519 
520  \param sv_brel - binary relation sparse vector pointer
521  (pass NULL to detach)
522  \param compute_stats - flag to perform computation of some statistics
523  later used in remapping. This only make sense
524  for series of remappings on the same translation
525  vector.
526  @sa remap
527  */
528  void attach_sv(const SV* sv_brel, bool compute_stats = false);
529 
530 
531 protected:
532  void one_pass_run(const bvector_type& bv_in,
533  const SV& sv_brel,
534  bvector_type& bv_out);
535 
536  /// @internal
537  template<unsigned BSIZE>
539  {
540  size_type BM_VECT_ALIGN gather_idx_[BSIZE] BM_VECT_ALIGN_ATTR;
541  value_type BM_VECT_ALIGN buffer_[BSIZE] BM_VECT_ALIGN_ATTR;
542  };
543 
545  {
546  sv_g_size = 1024 * 8
547  };
549 
550 
551 protected:
553  void operator=(const set2set_11_transform&) = delete;
554 
555 protected:
556  const SV* sv_ptr_; ///< current translation table vector
557  gather_buffer_type* gb_; ///< intermediate buffers
558  bvector_type bv_product_;///< temp vector
559 
560  bool have_stats_; ///< flag of statistics presense
561  bvector_type bv_zero_; ///< bit-vector for zero elements
562 
563  allocator_pool_type pool_;
564 };
565 
566 
567 
568 //----------------------------------------------------------------------------
569 //
570 //----------------------------------------------------------------------------
571 
572 template<typename SV>
574 : sv_ptr_(0), gb_(0), have_stats_(false)
575 {
576  gb_ = (gather_buffer_type*)::malloc(sizeof(gather_buffer_type));
577  if (!gb_)
578  {
579  SV::throw_bad_alloc();
580  }
581 }
582 
583 //----------------------------------------------------------------------------
584 
585 template<typename SV>
587 {
588  if (gb_)
589  ::free(gb_);
590 }
591 
592 
593 //----------------------------------------------------------------------------
594 
595 template<typename SV>
596 void set2set_11_transform<SV>::attach_sv(const SV* sv_brel, bool compute_stats)
597 {
598  sv_ptr_ = sv_brel;
599  if (!sv_ptr_)
600  {
601  have_stats_ = false;
602  }
603  else
604  {
605  if (sv_brel->empty() || !compute_stats)
606  return; // nothing to do
607  const bvector_type* bv_non_null = sv_brel->get_null_bvector();
608  if (bv_non_null)
609  return; // already have 0 elements vector
610 
611 
612  typename bvector_type::mem_pool_guard mp_g_z;
614 
616  scanner.find_zero(*sv_brel, bv_zero_);
617  have_stats_ = true;
618  }
619 }
620 
621 //----------------------------------------------------------------------------
622 
623 template<typename SV>
625  const SV& sv_brel,
626  size_type& id_to)
627 {
628  if (sv_brel.empty())
629  return false; // nothing to do
630 
631  const bvector_type* bv_non_null = sv_brel.get_null_bvector();
632  if (bv_non_null)
633  {
634  if (!bv_non_null->test(id_from))
635  return false;
636  }
637  size_type idx = sv_brel.translate_address(id_from);
638  id_to = sv_brel.get(idx);
639  return true;
640 }
641 
642 //----------------------------------------------------------------------------
643 
644 template<typename SV>
646  const SV& sv_brel,
647  bvector_type& bv_out)
648 {
649  typename bvector_type::mem_pool_guard mp_g_out, mp_g_p, mp_g_z;
650  mp_g_out.assign_if_not_set(pool_, bv_out);
653 
654  attach_sv(&sv_brel);
655 
656  remap(bv_in, bv_out);
657 
658  attach_sv(0); // detach translation table
659 }
660 
661 template<typename SV>
663  bvector_type& bv_out)
664 {
666 
667  bv_out.clear();
668 
669  if (sv_ptr_ == 0 || sv_ptr_->empty())
670  return; // nothing to do
671 
672  bv_out.init(); // just in case to "fast set" later
673 
674  typename bvector_type::mem_pool_guard mp_g_out, mp_g_p, mp_g_z;
675  mp_g_out.assign_if_not_set(pool_, bv_out);
678 
679 
680  const bvector_type* enum_bv;
681 
682  const bvector_type * bv_non_null = sv_ptr_->get_null_bvector();
683  if (bv_non_null)
684  {
685  // TODO: optimize with 2-way ops
686  bv_product_ = bv_in;
687  bv_product_.bit_and(*bv_non_null);
688  enum_bv = &bv_product_;
689  }
690  else // non-NULL vector is not available
691  {
692  enum_bv = &bv_in;
693  // if we have any elements mapping into "0" on the other end
694  // we map it once (chances are there are many duplicates)
695  //
696 
697  if (have_stats_) // pre-attached translation statistics
698  {
699  bv_product_ = bv_in;
700  size_type cnt1 = bv_product_.count();
701  bv_product_.bit_sub(bv_zero_);
702  size_type cnt2 = bv_product_.count();
703 
704  BM_ASSERT(cnt2 <= cnt1);
705 
706  if (cnt1 != cnt2) // mapping included 0 elements
707  bv_out.set_bit_no_check(0);
708 
709  enum_bv = &bv_product_;
710  }
711  }
712 
713 
714 
715  size_type buf_cnt, nb_old, nb;
716  buf_cnt = nb_old = 0;
717 
718  typename bvector_type::enumerator en(enum_bv->first());
719  for (; en.valid(); ++en)
720  {
721  typename SV::size_type idx = *en;
722  idx = sv_ptr_->translate_address(idx);
723 
724  nb = unsigned(idx >> bm::set_block_shift);
725  if (nb != nb_old) // new blocks
726  {
727  if (buf_cnt)
728  {
729  sv_ptr_->gather(&gb_->buffer_[0], &gb_->gather_idx_[0], buf_cnt, BM_SORTED_UNIFORM);
730  bv_out.set(&gb_->buffer_[0], buf_cnt, BM_SORTED);
731  buf_cnt ^= buf_cnt;
732  }
733  nb_old = nb;
734  gb_->gather_idx_[buf_cnt++] = idx;
735  }
736  else
737  {
738  gb_->gather_idx_[buf_cnt++] = idx;
739  }
740 
741  if (buf_cnt == sv_g_size)
742  {
743  sv_ptr_->gather(&gb_->buffer_[0], &gb_->gather_idx_[0], buf_cnt, BM_SORTED_UNIFORM);
744  bv_out.set(&gb_->buffer_[0], buf_cnt, bm::BM_SORTED);
745  buf_cnt ^= buf_cnt;
746  }
747  } // for en
748  if (buf_cnt)
749  {
750  sv_ptr_->gather(&gb_->buffer_[0], &gb_->gather_idx_[0], buf_cnt, BM_SORTED_UNIFORM);
751  bv_out.set(&gb_->buffer_[0], buf_cnt, bm::BM_SORTED);
752  }
753 }
754 
755 
756 //----------------------------------------------------------------------------
757 
758 template<typename SV>
760  const SV& sv_brel,
761  bvector_type& bv_out)
762 {
763  if (sv_brel.empty())
764  return; // nothing to do
765 
766  bv_out.init();
767 
768  typename SV::const_iterator it = sv_brel.begin();
769  for (; it.valid(); ++it)
770  {
771  typename SV::value_type t_id = *it;
772  size_type idx = it.pos();
773  if (bv_in.test(idx))
774  {
775  bv_out.set_bit_no_check(t_id);
776  }
777  } // for
778 }
779 
780 
781 //----------------------------------------------------------------------------
782 //
783 //----------------------------------------------------------------------------
784 
785 template<typename SV>
787 {
788  mask_set_ = false;
789  mask_from_ = mask_to_ = bm::id_max;
790 
791  bound_sv_ = 0;
792  effective_str_max_ = 0;
793 }
794 
795 //----------------------------------------------------------------------------
796 
797 template<typename SV>
798 void sparse_vector_scanner<SV>::bind(const SV& sv, bool sorted)
799 {
800  bound_sv_ = &sv;
801  if (sorted)
802  {
803  size_type sv_sz = sv.size();
804  BM_ASSERT(sv_sz);
805  size_type total_nb = sv_sz / bm::gap_max_bits + 1;
806  effective_str_max_ = sv.effective_vector_max();
807 
808  block0_elements_cache_.resize(total_nb, effective_str_max_+1);
809  block0_elements_cache_.set_zero();
810 
811  block3_elements_cache_.resize(total_nb * 3, effective_str_max_+1);
812  block3_elements_cache_.set_zero();
813 
814  // fill in elements cache
815  for (size_type i = 0; i < sv_sz; i+= bm::gap_max_bits)
816  {
817  size_type nb = (i >> bm::set_block_shift);
818  value_type* s0 = block0_elements_cache_.row(nb);
819  sv.get(i, s0, size_type(block0_elements_cache_.cols()));
820 
821  for (size_type k = 0; k < 3; ++k)
822  {
823  value_type* s1 = block3_elements_cache_.row(nb * 3 + k);
824  size_type idx = i + (k+1) * bm::sub_block3_size;
825  sv.get(idx, s1, size_type(block3_elements_cache_.cols()));
826  } // for k
827  } // for i
828  }
829  // pre-calculate vector plain masks
830  //
831  for (unsigned i = 0; i < SV::max_vector_size; ++i)
832  {
833  vector_plain_masks_[i] = sv.get_plains_mask(i);
834  } // for i
835 
836 }
837 
838 //----------------------------------------------------------------------------
839 
840 template<typename SV>
842 {
843  bound_sv_ = 0;
844  effective_str_max_ = 0;
845 }
846 
847 //----------------------------------------------------------------------------
848 
849 template<typename SV>
851  typename SV::bvector_type& bv_out)
852 {
853  if (sv.size() == 0)
854  {
855  bv_out.clear();
856  return;
857  }
858  find_nonzero(sv, bv_out);
859  if (sv.is_compressed())
860  {
861  bv_out.invert();
862  bv_out.set_range(sv.effective_size(), bm::id_max - 1, false);
863  decompress(sv, bv_out);
864  }
865  else
866  {
867  invert(sv, bv_out);
868  }
869  correct_nulls(sv, bv_out);
870 }
871 
872 //----------------------------------------------------------------------------
873 
874 template<typename SV>
875 void sparse_vector_scanner<SV>::invert(const SV& sv, typename SV::bvector_type& bv_out)
876 {
877  if (sv.size() == 0)
878  {
879  bv_out.clear();
880  return;
881  }
882  // TODO: find a better algorithm (NAND?)
883  bv_out.invert();
884  const bvector_type* bv_null = sv.get_null_bvector();
885  if (bv_null) // correct result to only use not NULL elements
886  bv_out &= *bv_null;
887  else
888  {
889  // TODO: use the shorter range to clear the tail
890  bv_out.set_range(sv.size(), bm::id_max - 1, false);
891  }
892 }
893 
894 //----------------------------------------------------------------------------
895 
896 template<typename SV>
898  typename SV::bvector_type& bv_out)
899 {
900  const bvector_type* bv_null = sv.get_null_bvector();
901  if (bv_null) // correct result to only use not NULL elements
902  bv_out.bit_and(*bv_null);
903 }
904 
905 //----------------------------------------------------------------------------
906 
907 template<typename SV>
909  typename SV::value_type value,
910  typename SV::bvector_type& bv_out,
911  typename SV::size_type search_limit)
912 {
913  if (sv.empty())
914  return false; // nothing to do
915 
916  if (!value)
917  {
918  find_zero(sv, bv_out);
919  return bv_out.any();
920  }
921  agg_.reset();
922 
923  bool found = prepare_and_sub_aggregator(sv, value);
924  if (!found)
925  {
926  bv_out.clear();
927  return found;
928  }
929 
930  bool any = (search_limit == 1);
931  found = agg_.combine_and_sub(bv_out, any);
932  agg_.reset();
933  return found;
934 }
935 
936 //----------------------------------------------------------------------------
937 
938 template<typename SV>
940  typename SV::value_type value,
941  size_type& idx)
942 {
943  if (sv.empty())
944  return false; // nothing to do
945 
946  BM_ASSERT(value); // cannot handle zero values
947  if (!value)
948  return false;
949 
950  agg_.reset();
951  bool found = prepare_and_sub_aggregator(sv, value);
952  if (!found)
953  return found;
954  found = agg_.find_first_and_sub(idx);
955  agg_.reset();
956  return found;
957 }
958 
959 
960 //----------------------------------------------------------------------------
961 
962 template<typename SV>
964  const typename SV::value_type* str,
965  size_type& idx,
966  bool remaped)
967 {
968  if (sv.empty())
969  return false; // nothing to do
970  BM_ASSERT(*str);
971 
972  if (!*str)
973  return false;
974 
975  agg_.reset();
976  unsigned common_prefix_len = 0;
977 
978  if (mask_set_)
979  {
980  agg_.set_range_hint(mask_from_, mask_to_);
981  common_prefix_len = sv.common_prefix_length(mask_from_, mask_to_);
982  }
983 
984  if (remaped)
985  {
986  str = remap_value_vect_;
987  }
988  else
989  {
990  if (sv.is_remap() && str != remap_value_vect_)
991  {
992  bool r = sv.remap_tosv(remap_value_vect_, SV::max_vector_size, str);
993  if (!r)
994  return r;
995  str = remap_value_vect_;
996  }
997  }
998 
999  bool found = prepare_and_sub_aggregator(sv, str, common_prefix_len);
1000  if (!found)
1001  return found;
1002 
1003  found = agg_.find_first_and_sub(idx);
1004  agg_.reset();
1005  return found;
1006 }
1007 
1008 //----------------------------------------------------------------------------
1009 
1010 template<typename SV>
1012  const typename SV::value_type* str,
1013  unsigned octet_start)
1014 {
1015  unsigned char bits[64];
1016 
1017  int len = 0;
1018  for (; str[len] != 0; ++len)
1019  {}
1020  BM_ASSERT(len);
1021 
1022  // use reverse order (faster for sorted arrays)
1023  for (int octet_idx = len-1; octet_idx >= 0; --octet_idx)
1024  {
1025  if (unsigned(octet_idx) < octet_start) // common prefix
1026  break;
1027 
1028  unsigned value = unsigned(str[octet_idx]) & 0xFF;
1029  BM_ASSERT(value != 0);
1030 
1031  bm::id64_t plains_mask;
1032  if (&sv == bound_sv_)
1033  plains_mask = vector_plain_masks_[octet_idx];
1034  else
1035  plains_mask = sv.get_plains_mask(unsigned(octet_idx));
1036 
1037  if ((value & plains_mask) != value) // pre-screen for impossible values
1038  return false; // found non-existing plain
1039 
1040  // prep the lists for combined AND-SUB aggregator
1041  //
1042  unsigned short bit_count_v = bm::bitscan(value, bits);
1043  for (unsigned i = 0; i < bit_count_v; ++i)
1044  {
1045  unsigned bit_idx = bits[i];
1046  BM_ASSERT(value & (value_type(1) << bit_idx));
1047  unsigned plain_idx = (unsigned(octet_idx) * 8) + bit_idx;
1048  const bvector_type* bv = sv.get_plain(plain_idx);
1049  agg_.add(bv);
1050  } // for i
1051 
1052  unsigned vinv = unsigned(value);
1053  if (bm::conditional<sizeof(value_type) == 1>::test())
1054  {
1055  vinv ^= 0xFF;
1056  }
1057  else // 2-byte char
1058  {
1059  BM_ASSERT(sizeof(value_type) == 2);
1060  vinv ^= 0xFFFF;
1061  }
1062  // exclude the octet bits which are all not set (no vectors)
1063  vinv &= unsigned(plains_mask);
1064  for (unsigned octet_plain = (unsigned(octet_idx) * 8); vinv; vinv &= vinv-1)
1065  {
1066  bm::id_t t = vinv & -vinv;
1067  unsigned bit_idx = bm::word_bitcount(t - 1);
1068  unsigned plain_idx = octet_plain + bit_idx;
1069  const bvector_type* bv = sv.get_plain(plain_idx);
1070  BM_ASSERT(bv);
1071  agg_.add(bv, 1); // agg to SUB group
1072  } // for
1073  } // for octet_idx
1074 
1075  // add all vectors above string len to the SUB operation group
1076  //
1077  unsigned plain_idx = unsigned(len * 8) + 1;
1078  typename SV::size_type plains;
1079  if (&sv == bound_sv_)
1080  plains = effective_str_max_ * unsigned(sizeof(value_type)) * 8;
1081  else
1082  plains = sv.plains();
1083  for (; plain_idx < plains; ++plain_idx)
1084  {
1085  bvector_type_const_ptr bv = sv.get_plain(plain_idx);
1086  if (bv)
1087  agg_.add(bv, 1); // agg to SUB group
1088  } // for
1089  return true;
1090 }
1091 
1092 //----------------------------------------------------------------------------
1093 
1094 template<typename SV>
1096  typename SV::value_type value)
1097 {
1098  unsigned char bits[sizeof(value) * 8];
1099  unsigned short bit_count_v = bm::bitscan(value, bits);
1100  BM_ASSERT(bit_count_v);
1101  const value_type mask1 = 1;
1102 
1103  // prep the lists for combined AND-SUB aggregator
1104  // (backward order has better chance for bit reduction on AND)
1105  //
1106  for (unsigned i = bit_count_v; i > 0; --i)
1107  {
1108  unsigned bit_idx = bits[i-1];
1109  BM_ASSERT(value & (mask1 << bit_idx));
1110  const bvector_type* bv = sv.get_plain(bit_idx);
1111  if (bv)
1112  agg_.add(bv);
1113  else
1114  return false;
1115  }
1116 
1117  unsigned sv_plains = sv.effective_plains();
1118  for (unsigned i = 0; i < sv_plains; ++i)
1119  {
1120  bvector_type_const_ptr bv = sv.get_plain(i);
1121  value_type mask = mask1 << i;
1122  if (bv && !(value & mask))
1123  agg_.add(bv, 1); // agg to SUB group
1124  } // for i
1125  return true;
1126 }
1127 
1128 
1129 //----------------------------------------------------------------------------
1130 
1131 template<typename SV>
1133  typename SV::value_type value,
1134  typename SV::bvector_type& bv_out)
1135 {
1136  if (sv.empty())
1137  return; // nothing to do
1138 
1139  if (!value)
1140  {
1141  find_zero(sv, bv_out);
1142  return;
1143  }
1144 
1145  unsigned char bits[sizeof(value) * 8];
1146  unsigned short bit_count_v = bm::bitscan(value, bits);
1147  BM_ASSERT(bit_count_v);
1148 
1149  // aggregate AND all matching vectors
1150  {
1151  const bvector_type* bv_plain = sv.get_plain(bits[--bit_count_v]);
1152  if (bv_plain)
1153  bv_out = *bv_plain;
1154  else // plain not found
1155  {
1156  bv_out.clear();
1157  return;
1158  }
1159  }
1160  for (unsigned i = 0; i < bit_count_v; ++i)
1161  {
1162  const bvector_type* bv_plain = sv.get_plain(bits[i]);
1163  if (bv_plain)
1164  bv_out &= *bv_plain;
1165  else // mandatory plain not found - empty result!
1166  {
1167  bv_out.clear();
1168  return;
1169  }
1170  } // for i
1171 
1172  // SUB all other plains
1173  unsigned sv_plains = sv.effective_plains();
1174  for (unsigned i = 0; (i < sv_plains) && value; ++i)
1175  {
1176  const bvector_type* bv_plain = sv.get_plain(i);
1177  if (bv_plain && !(value & (value_type(1) << i)))
1178  bv_out -= *bv_plain;
1179  }
1180 }
1181 
1182 //----------------------------------------------------------------------------
1183 
1184 template<typename SV>
1185 bool sparse_vector_scanner<SV>::find_eq_str(const typename SV::value_type* str,
1186  typename SV::size_type& pos)
1187 {
1188  BM_ASSERT(bound_sv_);
1189  return this->find_eq_str(*bound_sv_, str, pos);
1190 }
1191 
1192 //----------------------------------------------------------------------------
1193 
1194 template<typename SV>
1196  const typename SV::value_type* str,
1197  typename SV::size_type& pos)
1198 {
1199  bool found = false;
1200  if (sv.empty())
1201  return found;
1202  if (*str)
1203  {
1204  bool remaped = false;
1205  if (bm::conditional<SV::is_remap_support::value>::test()) // test remapping trait
1206  {
1207  if (sv.is_remap() && str != remap_value_vect_)
1208  {
1209  remaped = sv.remap_tosv(remap_value_vect_, SV::max_vector_size, str);
1210  if (!remaped)
1211  return remaped;
1212  str = remap_value_vect_;
1213  }
1214  }
1215 
1216  size_type found_pos;
1217  found = find_first_eq(sv, str, found_pos, remaped);
1218  if (found)
1219  {
1220  pos = found_pos;
1221  if (bm::conditional<SV::is_rsc_support::value>::test()) // test rank/select trait
1222  {
1223  if (sv.is_compressed()) // if compressed vector - need rank translation
1224  found = sv.find_rank(found_pos + 1, pos);
1225  }
1226  }
1227  }
1228  else // search for zero value
1229  {
1230  // TODO: implement optimized version which would work witout temp vector
1231  typename SV::bvector_type bv_zero;
1232  find_zero(sv, bv_zero);
1233  found = bv_zero.find(pos);
1234  }
1235  return found;
1236 }
1237 
1238 //----------------------------------------------------------------------------
1239 
1240 template<typename SV>
1242  const typename SV::value_type* str,
1243  typename SV::size_type& pos)
1244 {
1245  bool found = false;
1246  if (sv.empty())
1247  return found;
1248 
1249  if (*str)
1250  {
1251  bool remaped = false;
1252  // test search pre-condition based on remap tables
1254  {
1255  if (sv.is_remap() && str != remap_value_vect_)
1256  {
1257  remaped = sv.remap_tosv(
1258  remap_value_vect_, SV::max_vector_size, str);
1259  if (!remaped)
1260  return remaped;
1261  }
1262  }
1263 
1264  reset_search_range();
1265 
1266  // narrow down the search
1267  const unsigned min_distance_cutoff = bm::gap_max_bits + bm::gap_max_bits / 2;
1268  size_type l, r, dist;
1269  l = 0; r = sv.size()-1;
1270  size_type found_pos;
1271 
1272  // binary search to narrow down the search window
1273  while (l <= r)
1274  {
1275  dist = r - l;
1276  if (dist < min_distance_cutoff)
1277  {
1278  // we are in an narrow <2 blocks window, but still may be in two
1279  // different neighboring blocks, lets try to narrow
1280  // to exactly one block
1281 
1282  size_type nb_l = (l >> bm::set_block_shift);
1283  size_type nb_r = (r >> bm::set_block_shift);
1284  if (nb_l != nb_r)
1285  {
1286  size_type mid = nb_r * bm::gap_max_bits;
1287  if (mid < r)
1288  {
1289  int cmp = this->compare_str(sv, mid, str);
1290  if (cmp < 0) // mid < str
1291  l = mid;
1292  else
1293  r = mid-(cmp!=0); // branchless if (cmp==0) r=mid;
1294  BM_ASSERT(l < r);
1295  }
1296  nb_l = unsigned(l >> bm::set_block_shift);
1297  nb_r = unsigned(r >> bm::set_block_shift);
1298  }
1299 
1300  if (nb_l == nb_r)
1301  {
1302  size_type max_nb = sv.size() >> bm::set_block_shift;
1303  if (nb_l != max_nb)
1304  {
1305  // linear in-place fixed depth scan to identify the sub-range
1307  int cmp = this->compare_str(sv, mid, str);
1308  if (cmp < 0)
1309  {
1310  l = mid;
1311  mid = nb_r * bm::gap_max_bits + bm::sub_block3_size * 2;
1312  cmp = this->compare_str(sv, mid, str);
1313  if (cmp < 0)
1314  {
1315  l = mid;
1316  mid = nb_r * bm::gap_max_bits + bm::sub_block3_size * 3;
1317  cmp = this->compare_str(sv, mid, str);
1318  if (cmp < 0)
1319  l = mid;
1320  else
1321  r = mid;
1322  }
1323  else
1324  {
1325  r = mid;
1326  }
1327  }
1328  else
1329  {
1330  r = mid;
1331  }
1332  }
1333  }
1334 
1335  set_search_range(l, r);
1336  break;
1337  }
1338 
1339  typename SV::size_type mid = dist/2+l;
1340  size_type nb = (mid >> bm::set_block_shift);
1341  mid = nb * bm::gap_max_bits;
1342  if (mid <= l)
1343  {
1344  if (nb == 0 && r > bm::gap_max_bits)
1345  mid = bm::gap_max_bits;
1346  else
1347  mid = dist / 2 + l;
1348  }
1349  BM_ASSERT(mid > l);
1350 
1351  int cmp = this->compare_str(sv, mid, str);
1352  if (cmp == 0)
1353  {
1354  found_pos = mid;
1355  found = true;
1356  set_search_range(l, mid);
1357  break;
1358  }
1359  if (cmp < 0)
1360  l = mid+1;
1361  else
1362  r = mid-1;
1363  } // while
1364 
1365  // use linear search (range is set)
1366  found = find_first_eq(sv, str, found_pos, remaped);
1367  if (found)
1368  {
1369  pos = found_pos;
1370  if (bm::conditional<SV::is_rsc_support::value>::test()) // test rank/select trait
1371  {
1372  if (sv.is_compressed()) // if compressed vector - need rank translation
1373  found = sv.find_rank(found_pos + 1, pos);
1374  }
1375  }
1376  reset_search_range();
1377  }
1378  else // search for zero value
1379  {
1380  // TODO: implement optimized version which would work without temp vector
1381  typename SV::bvector_type bv_zero;
1382  find_zero(sv, bv_zero);
1383  found = bv_zero.find(pos);
1384  }
1385  return found;
1386 }
1387 
1388 //----------------------------------------------------------------------------
1389 
1390 template<typename SV>
1391 bool sparse_vector_scanner<SV>::bfind_eq_str(const typename SV::value_type* str,
1392  typename SV::size_type& pos)
1393 {
1394  BM_ASSERT(bound_sv_);
1395  return bfind_eq_str(*bound_sv_, str, pos);
1396 }
1397 
1398 //----------------------------------------------------------------------------
1399 
1400 template<typename SV>
1402  const typename SV::value_type val,
1403  typename SV::size_type& pos)
1404 {
1405  int cmp;
1406  size_type l = 0, r = sv.size();
1407  if (l == r) // empty vector
1408  {
1409  pos = 0;
1410  return false;
1411  }
1412  --r;
1413 
1414  // check initial boundary conditions if insert point is at tail/head
1415  cmp = this->compare(sv, l, val); // left (0) boundary check
1416  if (cmp > 0) // vect[x] > str
1417  {
1418  pos = 0;
1419  return false;
1420  }
1421  if (cmp == 0)
1422  {
1423  pos = 0;
1424  return true;
1425  }
1426  cmp = this->compare(sv, r, val); // right(size-1) boundary check
1427  if (cmp == 0)
1428  {
1429  pos = r;
1430  // back-scan to rewind all duplicates
1431  // TODO: adapt one-sided binary search to traverse large platos
1432  for (; r >= 0; --r)
1433  {
1434  cmp = this->compare(sv, r, val);
1435  if (cmp != 0)
1436  return true;
1437  pos = r;
1438  } // for i
1439  return true;
1440  }
1441  if (cmp < 0) // vect[x] < str
1442  {
1443  pos = r+1;
1444  return false;
1445  }
1446 
1447  size_type dist = r - l;
1448  if (dist < linear_cutoff1)
1449  {
1450  for (; l <= r; ++l)
1451  {
1452  cmp = this->compare(sv, l, val);
1453  if (cmp == 0)
1454  {
1455  pos = l;
1456  return true;
1457  }
1458  if (cmp > 0)
1459  {
1460  pos = l;
1461  return false;
1462  }
1463  } // for
1464  }
1465 
1466  while (l <= r)
1467  {
1468  size_type mid = (r-l)/2+l;
1469  cmp = this->compare(sv, mid, val);
1470  if (cmp == 0)
1471  {
1472  pos = mid;
1473  // back-scan to rewind all duplicates
1474  for (size_type i = mid-1; i >= 0; --i)
1475  {
1476  cmp = this->compare(sv, i, val);
1477  if (cmp != 0)
1478  return true;
1479  pos = i;
1480  } // for i
1481  pos = 0;
1482  return true;
1483  }
1484  if (cmp < 0)
1485  l = mid+1;
1486  else
1487  r = mid-1;
1488 
1489  dist = r - l;
1490  if (dist < linear_cutoff2) // do linear scan here
1491  {
1492  typename SV::const_iterator it(&sv, l);
1493  for (; it.valid(); ++it, ++l)
1494  {
1495  typename SV::value_type sv_value = it.value();
1496  if (sv_value == val)
1497  {
1498  pos = l;
1499  return true;
1500  }
1501  if (sv_value > val) // vect[x] > val
1502  {
1503  pos = l;
1504  return false;
1505  }
1506  } // for it
1507  BM_ASSERT(0);
1508  pos = l;
1509  return false;
1510  }
1511  } // while
1512 
1513  BM_ASSERT(0);
1514  return false;
1515 }
1516 
1517 
1518 //----------------------------------------------------------------------------
1519 
1520 template<typename SV>
1522  const SV& sv,
1523  const typename SV::value_type* str,
1524  typename SV::size_type& pos)
1525 {
1526  int cmp;
1527  size_type l = 0, r = sv.size();
1528 
1529  if (l == r) // empty vector
1530  {
1531  pos = 0;
1532  return false;
1533  }
1534  --r;
1535 
1536  // check initial boundary conditions if insert point is at tail/head
1537  cmp = this->compare_str(sv, l, str); // left (0) boundary check
1538  if (cmp > 0) // vect[x] > str
1539  {
1540  pos = 0;
1541  return false;
1542  }
1543  if (cmp == 0)
1544  {
1545  pos = 0;
1546  return true;
1547  }
1548  cmp = this->compare_str(sv, r, str); // right(size-1) boundary check
1549  if (cmp == 0)
1550  {
1551  pos = r;
1552  // back-scan to rewind all duplicates
1553  // TODO: adapt one-sided binary search to traverse large platos
1554  for (; r >= 0; --r)
1555  {
1556  cmp = this->compare_str(sv, r, str);
1557  if (cmp != 0)
1558  return true;
1559  pos = r;
1560  } // for i
1561  return true;
1562  }
1563  if (cmp < 0) // vect[x] < str
1564  {
1565  pos = r+1;
1566  return false;
1567  }
1568 
1569  size_type dist = r - l;
1570  if (dist < linear_cutoff1)
1571  {
1572  for (; l <= r; ++l)
1573  {
1574  cmp = this->compare_str(sv, l, str);
1575  if (cmp == 0)
1576  {
1577  pos = l;
1578  return true;
1579  }
1580  if (cmp > 0)
1581  {
1582  pos = l;
1583  return false;
1584  }
1585  } // for
1586  }
1587  while (l <= r)
1588  {
1589  size_type mid = (r-l)/2+l;
1590  cmp = this->compare_str(sv, mid, str);
1591  if (cmp == 0)
1592  {
1593  pos = mid;
1594  // back-scan to rewind all duplicates
1595  for (size_type i = mid-1; i >= 0; --i)
1596  {
1597  cmp = this->compare_str(sv, i, str);
1598  if (cmp != 0)
1599  return true;
1600  pos = i;
1601  } // for i
1602  pos = 0;
1603  return true;
1604  }
1605  if (cmp < 0)
1606  l = mid+1;
1607  else
1608  r = mid-1;
1609 
1610  dist = r - l;
1611  if (dist < linear_cutoff2) // do linear scan here
1612  {
1613  hmatr_.init();
1614 
1615  dist = sv.decode(hmatr_, l, dist+1);
1616  for (unsigned i = 0; i < dist; ++i, ++l)
1617  {
1618  const typename SV::value_type* hm_str = hmatr_.row(i);
1619  cmp = ::strcmp(hm_str, str);
1620  if (cmp == 0)
1621  {
1622  pos = l;
1623  return true;
1624  }
1625  if (cmp > 0) // vect[x] > str
1626  {
1627  pos = l;
1628  return false;
1629  }
1630  }
1631  cmp = this->compare_str(sv, l, str);
1632  if (cmp > 0) // vect[x] > str
1633  {
1634  pos = l;
1635  return false;
1636  }
1637  BM_ASSERT(0);
1638  pos = l;
1639  return false;
1640  }
1641  } // while
1642 
1643  BM_ASSERT(0);
1644  return false;
1645 }
1646 
1647 
1648 //----------------------------------------------------------------------------
1649 
1650 template<typename SV>
1652  size_type idx,
1653  const value_type* str)
1654 {
1655  if (bound_sv_ == &sv)
1656  {
1657  size_type nb = (idx >> bm::set_block_shift);
1658  size_type nbit = (idx & bm::set_block_mask);
1659  if (nbit == 0) // access to sentinel, first block element
1660  {
1661  value_type* s0 = block0_elements_cache_.row(nb);
1662  if (*s0 == 0) // uninitialized element
1663  {
1664  sv.get(idx, s0, size_type(block0_elements_cache_.cols()));
1665  }
1666  int res = 0;
1667  for (unsigned i = 0; i < block0_elements_cache_.cols(); ++i)
1668  {
1669  char octet = str[i]; char value = s0[i];
1670  res = (value > octet) - (value < octet);
1671  if (res || !octet)
1672  break;
1673  } // for i
1674  BM_ASSERT(res == sv.compare(idx, str));
1675  return res;
1676  }
1677  else
1678  {
1679  if (nbit % bm::sub_block3_size == 0) // TODO: use AND mask here
1680  {
1681  size_type k = nbit / bm::sub_block3_size - 1;
1682  value_type* s1 = block3_elements_cache_.row(nb * 3 + k);
1683  int res = 0;
1684  for (unsigned i = 0; i < block3_elements_cache_.cols(); ++i)
1685  {
1686  char octet = str[i]; char value = s1[i];
1687  res = (value > octet) - (value < octet);
1688  if (res || !octet)
1689  break;
1690  } // for i
1691  BM_ASSERT(res == sv.compare(idx, str));
1692  return res;
1693  }
1694  }
1695  }
1696  return sv.compare(idx, str);
1697 }
1698 
1699 //----------------------------------------------------------------------------
1700 
1701 template<typename SV>
1703  size_type idx,
1704  const value_type val)
1705 {
1706  // TODO: implement sentinel elements cache (similar to compare_str())
1707  return sv.compare(idx, val);
1708 }
1709 
1710 //----------------------------------------------------------------------------
1711 
1712 template<typename SV>
1714  typename SV::value_type value,
1715  typename SV::bvector_type& bv_out)
1716 {
1717  if (sv.empty())
1718  {
1719  bv_out.clear();
1720  return; // nothing to do
1721  }
1722  if (!value)
1723  {
1724  find_zero(sv, bv_out);
1725  return;
1726  }
1727 
1728  find_eq_with_nulls(sv, value, bv_out, 0);
1729 
1730  decompress(sv, bv_out);
1731  correct_nulls(sv, bv_out);
1732 }
1733 
1734 //----------------------------------------------------------------------------
1735 
1736 template<typename SV>
1738  typename SV::value_type value,
1739  typename SV::size_type& pos)
1740 {
1741  if (!value) // zero value - special case
1742  {
1743  bvector_type bv_zero;
1744  find_eq(sv, value, bv_zero);
1745  bool found = bv_zero.find(pos);
1746  return found;
1747  }
1748 
1749  size_type found_pos;
1750  bool found = find_first_eq(sv, value, found_pos);
1751  if (found)
1752  {
1753  pos = found_pos;
1754  if (bm::conditional<SV::is_rsc_support::value>::test()) // test rank/select trait
1755  {
1756  if (sv.is_compressed()) // if compressed vector - need rank translation
1757  found = sv.find_rank(found_pos + 1, pos);
1758  }
1759  }
1760  return found;
1761 }
1762 
1763 //----------------------------------------------------------------------------
1764 
1765 template<typename SV>
1767  typename SV::bvector_type& bv_out)
1768 {
1769  agg_.reset(); // in case if previous scan was interrupted
1770  for (unsigned i = 0; i < sv.plains(); ++i)
1771  agg_.add(sv.get_plain(i));
1772  agg_.combine_or(bv_out);
1773  agg_.reset();
1774 }
1775 
1776 //----------------------------------------------------------------------------
1777 
1778 template<typename SV>
1780  typename SV::bvector_type& bv_out)
1781 {
1782  if (!sv.is_compressed())
1783  return; // nothing to do
1784  const bvector_type* bv_non_null = sv.get_null_bvector();
1785  BM_ASSERT(bv_non_null);
1786 
1787  // TODO: implement faster decompressor for small result-sets
1788  rank_compr_.decompress(bv_tmp_, *bv_non_null, bv_out);
1789  bv_out.swap(bv_tmp_);
1790 }
1791 
1792 //----------------------------------------------------------------------------
1793 
1794 template<typename SV>
1796 {
1797  BM_ASSERT(from < to);
1798  mask_from_ = from;
1799  mask_to_ = to;
1800  mask_set_ = true;
1801 }
1802 
1803 //----------------------------------------------------------------------------
1804 
1805 template<typename SV>
1807 {
1808  mask_set_ = false;
1809  mask_from_ = mask_to_ = bm::id_max;
1810 }
1811 
1812 
1813 //----------------------------------------------------------------------------
1814 //
1815 //----------------------------------------------------------------------------
1816 
1817 
1818 } // namespace bm
1819 
1820 #include "bmundef.h"
1821 
1822 #endif
bm::heap_matrix< typename SV::value_type, linear_cutoff2, SV::sv_octet_plains, allocator_type > matrix_search_buf_type
SV::bvector_type bvector_type
const SV * sv_ptr_
current translation table vector
allocator_pool_type pool_
void find_eq(const SV &sv, typename SV::value_type value, typename SV::bvector_type &bv_out)
find all sparse vector elements EQ to search value
#define BM_VECT_ALIGN
Definition: bmdef.h:346
BMFORCEINLINE bm::id_t word_bitcount(bm::id_t w)
Definition: bmfunc.h:195
ad-hoc conditional expressions
Definition: bmfunc.h:139
gather_buffer_type * gb_
intermediate buffers
void dynamic_range_clip_low(SV &svect, unsigned low_bit)
Clip dynamic range for signal lower than specified (boost)
void bind(const SV &sv, bool sorted)
bind sparse vector for all searches
const bvector_type * bvector_type_const_ptr
void invert(const SV &sv, typename SV::bvector_type &bv_out)
invert search result ("EQ" to "not EQ")
unsigned long long int id64_t
Definition: bmconst.h:34
Sparse constainer sparse_vector<> for integer types using bit-transposition transform.
void assign_if_not_set(allocator_pool_type &pool, bvector< Alloc > &bv)
Definition: bm.h:1215
bvector_type bv_zero_
bit-vector for zero elements
void attach_sv(const SV *sv_brel, bool compute_stats=false)
Attach a translation table vector for remapping (Image function)
void find_eq(const SV &sv, It start, It end, typename SV::bvector_type &bv_out)
find all values A IN (C, D, E, F)
int compare(const SV &sv, size_type idx, const value_type val)
compare sv[idx] with input value
Definition: bm.h:76
bool find_eq_with_nulls(const SV &sv, typename SV::value_type value, typename SV::bvector_type &bv_out, typename SV::size_type search_limit=0)
find value (may include NULL indexes)
pre-processor un-defines to avoid global space pollution (internal)
algorithms for sparse_vector scan/seach
size_type BM_VECT_ALIGN gather_idx_ [BSIZE] BM_VECT_ALIGN_ATTR
void reset_binding()
reset sparse vector binding
gather_buffer< sv_g_size > gather_buffer_type
int compare_str(const SV &sv, size_type idx, const value_type *str)
compare sv[idx] with input str
const unsigned id_max
Definition: bmconst.h:105
void remap(const bvector_type &bv_in, bvector_type &bv_out)
Perform remapping (Image function) (based on attached translation table)
void reset_search_range()
reset (disable) search range
bvector_type bv_product_
temp vector
Algorithms for fast aggregation of N bvectors.
void operator=(const sparse_vector_scanner &)=delete
void set_search_range(size_type from, size_type to)
set search boundaries (hint for the aggregator)
Alloc allocator_type
Definition: bm.h:110
input set is sorted (ascending order)
Definition: bmconst.h:192
const unsigned sub_block3_size
Definition: bmconst.h:120
void one_pass_run(const bvector_type &bv_in, const SV &sv_brel, bvector_type &bv_out)
bool prepare_and_sub_aggregator(const SV &sv, typename SV::value_type value)
Prepare aggregator for AND-SUB (EQ) search.
bvector_type::allocator_type allocator_type
bool find_eq_str(const SV &sv, const typename SV::value_type *str, typename SV::size_type &pos)
find first sparse vector element (string)
allocator_type::allocator_pool_type allocator_pool_type
Definition: bm.h:111
Definitions(internal)
void find_zero(const SV &sv, typename SV::bvector_type &bv_out)
find all sparse vector elements EQ to 0
allocator_type::allocator_pool_type allocator_pool_type
void find_nonzero(const SV &sv, typename SV::bvector_type &bv_out)
Find non-zero elements Output vector is computed as a logical OR (join) of all plains.
const unsigned gap_max_bits
Definition: bmconst.h:79
value_type BM_VECT_ALIGN buffer_ [BSIZE] BM_VECT_ALIGN_ATTR
bvector_type::allocator_type::allocator_pool_type allocator_pool_type
bool lower_bound_str(const SV &sv, const typename SV::value_type *str, typename SV::size_type &pos)
lower bound search for an array position
bool find_first_eq(const SV &sv, typename SV::value_type value, size_type &idx)
find first value (may include NULL indexes)
const unsigned set_block_mask
Definition: bmconst.h:56
Constant iterator designed to enumerate "ON" bits.
Definition: bm.h:590
bool lower_bound(const SV &sv, const typename SV::value_type val, typename SV::size_type &pos)
lower bound search for an array position
void find_eq_with_nulls_horizontal(const SV &sv, typename SV::value_type value, typename SV::bvector_type &bv_out)
For testing purposes only.
void run(const bvector_type &bv_in, const SV &sv_brel, bvector_type &bv_out)
Run remap transformation.
unsigned int id_t
Definition: bmconst.h:37
unsigned short bitscan(V w, B *bits)
Definition: bmfunc.h:519
SV::bvector_type bvector_type
void decompress(const SV &sv, typename SV::bvector_type &bv_out)
Rank-Select decompression for RSC vectors.
#define BM_ASSERT
Definition: bmdef.h:117
const bvector_type & get_bv_zero() const
Get read access to zero-elements vector Zero vector gets populated after attach_sv() is called or as ...
bm::bvector bvector_type
const unsigned set_block_shift
Definition: bmconst.h:55
void correct_nulls(const SV &sv, typename SV::bvector_type &bv_out)
Exclude possible NULL values from the result vector.
void dynamic_range_clip_high(SV &svect, unsigned high_bit)
Clip dynamic range for signal higher than specified.
bm::bvector< Alloc > & bit_or(const bm::bvector< Alloc > &bv1, const bm::bvector< Alloc > &bv2, typename bm::bvector< Alloc >::optmode opt_mode)
3-operand OR : this := bv1 OR bv2
Definition: bm.h:4742
sorted and in one block (internal!)
Definition: bmconst.h:193
bool have_stats_
flag of statistics presense
bool bfind_eq_str(const SV &sv, const typename SV::value_type *str, typename SV::size_type &pos)
binary find first sparse vector element (string) Sparse vector must be sorted.
Integer set to set transformation (functional image in groups theory) https://en.wikipedia.org/wiki/Image_(mathematics)
bm::dynamic_heap_matrix< value_type, allocator_type > heap_matrix_type