BitMagic-C++
bmsparsevec_util.h
Go to the documentation of this file.
1 #ifndef BMSPARSEVEC_UTIL_H__INCLUDED__
2 #define BMSPARSEVEC_UTIL_H__INCLUDED__
3 /*
4 Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 For more information please visit: http://bitmagic.io
19 */
20 
21 #include <memory.h>
22 #include <stdexcept>
23 
24 #ifndef BM__H__INCLUDED__
25 // BitMagic utility headers do not include main "bm.h" declaration
26 // #include "bm.h" or "bm64.h" explicitly
27 # error missing include (bm.h or bm64.h)
28 #endif
29 
30 #include "bmserial.h"
31 #include "bmdef.h"
32 
33 namespace bm
34 {
35 
36 
37 
38 /*!
39  \brief Bit-bector prefix sum address resolver using bit-vector prefix sum
40  as a space compactor.
41 
42  @internal
43 */
44 template<class BV>
46 {
47 public:
48  typedef BV bvector_type;
49  typedef typename BV::size_type size_type;
51 public:
54  bvps_addr_resolver(const bvps_addr_resolver& addr_res);
55 
57  {
58  if (this != &addr_res)
59  {
60  addr_bv_ = addr_res.addr_bv_;
61  in_sync_ = addr_res.in_sync_;
62  if (in_sync_)
63  {
64  rs_index_->copy_from(*addr_res.rs_index_);
65  }
66  }
67  return *this;
68  }
69 
70  /*!
71  \brief Move content from the argument address resolver
72  */
73  void move_from(bvps_addr_resolver& addr_res) BMNOEXEPT;
74 
75  /*!
76  \brief Resolve id to integer id (address)
77 
78  Resolve id to address with full sync and range checking
79 
80  \param id_from - input id to resolve
81  \param id_to - output id
82 
83  \return true if id is known and resolved successfully
84  */
85  bool resolve(size_type id_from, size_type* id_to) const;
86 
87  /*!
88  \brief Resolve id to integer id (address) without sync check
89 
90  If prefix sum table is not sync - unexpected behavior
91 
92  \param id_from - input id to resolve
93  \param id_to - output id
94 
95  \return true if id is known and resolved successfully
96  */
97  bool get(size_type id_from, size_type* id_to) const;
98 
99  /*!
100  \brief Set id (bit) to address resolver
101  */
102  void set(size_type id_from);
103 
104  /*!
105  \brief Re-calculate prefix sum table
106  \param force - force recalculation even if it is already recalculated
107  */
108  void calc_prefix_sum(bool force = false);
109 
110  /*!
111  \brief Re-calculate prefix sum table (same as calc_prefix_sum)
112  \param force - force recalculation even if it is already recalculated
113  @sa calc_prefix_sum
114  */
115  void sync(bool force = false) { calc_prefix_sum(force); }
116 
117  /*!
118  \brief returns true if prefix sum table is in sync with the vector
119  */
120  bool in_sync() const { return in_sync_; }
121 
122  /*!
123  \brief Unsync the prefix sum table
124  */
125  void unsync() { in_sync_ = false; }
126 
127  /*!
128  \brief Get const reference to the underlying bit-vector
129  */
130  const bvector_type& get_bvector() const { return addr_bv_; }
131 
132  /*!
133  \brief Get writable reference to the underlying bit-vector
134 
135  Access to underlying vector assumes modification and
136  loss of prefix sum acceleration structures.
137  Need to call sync() at the end of transaction.
138  */
139  bvector_type& get_bvector() { unsync(); return addr_bv_; }
140 
141  /*!
142  \brief optimize underlying bit-vector
143  */
144  void optimize(bm::word_t* temp_block = 0);
145 
146  /*!
147  \brief equality comparison
148  */
149  bool equal(const bvps_addr_resolver& addr_res) const;
150 
151 protected:
152  void construct_rs_index();
153  void free_rs_index();
154 
155 private:
156  bvector_type addr_bv_; ///< bit-vector for id translation
157  rs_index_type* rs_index_; ///< rank translation index
158  bool in_sync_; ///< flag if prefix sum is in-sync with vector
159 };
160 
161 
162 /*!
163  \brief sparse vector based address resolver
164  (no space compactor, just bit-plane compressors provided by sparse_vector)
165 
166  @internal
167 */
168 template<class SV>
170 {
171 public:
172  typedef SV sparse_vector_type;
173  typedef typename SV::bvector_type bvector_type;
175 public:
177  sv_addr_resolver(const sv_addr_resolver& addr_res);
178 
179  /*!
180  \brief Resolve id to integer id (address)
181 
182  \param id_from - input id to resolve
183  \param id_to - output id
184 
185  \return true if id is known and resolved successfully
186  */
187  bool resolve(size_type id_from, size_type* id_to) const;
188 
189  /*!
190  \brief Resolve id to integer id (address)
191 
192  \param id_from - input id to resolve
193  \param id_to - output id
194 
195  \return true if id is known and resolved successfully
196  */
197  bool get(size_type id_from, size_type* id_to) const;
198 
199  /*!
200  \brief Set id (bit) to address resolver
201  */
202  void set(size_type id_from);
203 
204  /*!
205  \brief optimize underlying sparse vectors
206  */
207  void optimize(bm::word_t* temp_block = 0);
208 
209  /*!
210  \brief Get const reference to the underlying bit-vector of set values
211  */
212  const bvector_type& get_bvector() const { return set_flags_bv_; }
213 
214 private:
215  bvector_type set_flags_bv_; ///< bit-vector of set flags
216  sparse_vector_type addr_sv_; ///< sparse vector for address map
217  size_type max_addr_; ///< maximum allocated address/index
218 };
219 
220 
221 /**
222  \brief Compressed (sparse collection of objects)
223  @internal
224 */
225 template<class Value, class BV>
227 {
228 public:
229  typedef BV bvector_type;
230  typedef typename BV::size_type size_type;
231  typedef size_type key_type;
232  typedef size_type address_type;
233  typedef Value value_type;
234  typedef Value mapped_type;
235  typedef std::vector<value_type> container_type;
237 
238 public:
240 
241  /**
242  Add new value to compressed collection.
243  Must be added in sorted key order (growing).
244 
245  Unsorted will not be added!
246 
247  \return true if value was added (does not violate sorted key order)
248  */
249  bool push_back(key_type key, const value_type& val);
250 
251  /**
252  find and return const associated value (with bounds/presense checking)
253  */
254  const value_type& at(key_type key) const;
255 
256  /**
257  find and return associated value (with bounds/presense checking)
258  */
259  value_type& at(key_type key);
260 
261  /** Checkpoint method to prepare collection for reading
262  */
263  void sync();
264 
265  /** perform memory optimizations/compression
266  */
267  void optimize(bm::word_t* temp_block=0);
268 
269  /** Resolve key address (index) in the dense vector
270  */
271  bool resolve(key_type key, address_type* addr) const;
272 
273  /** Get access to associated value by resolved address
274  */
275  const value_type& get(address_type addr) const;
276 
277  /** Get address resolver
278  */
279  const address_resolver_type& resolver() const { return addr_res_; }
280 
281  /** Get address resolver
282  */
283  address_resolver_type& resolver() { return addr_res_; }
284 
285  /** size of collection
286  */
287  size_t size() const { return dense_vect_.size(); }
288 
289  /** perform equality comparison with another collection
290  */
291  bool equal(const compressed_collection<Value, BV>& ccoll) const;
292 
293  /** return dense container for direct access
294  (this should be treated as an internal function designed for deserialization)
295  */
296  container_type& container() { return dense_vect_; }
297 
298 protected:
299  void throw_range_error(const char* err_msg) const;
300 
301 protected:
302  address_resolver_type addr_res_; ///< address resolver
303  container_type dense_vect_; ///< compressed space container
304  key_type last_add_; ///< last added element
305 };
306 
307 /**
308  \brief Compressed (sparse collection of objects)
309  @internal
310 */
311 template<class BV>
313  public compressed_collection<typename serializer<BV>::buffer, BV>
314 {
315 public:
316  typedef BV bvector_type;
319 
320  /// collection statistics
321  struct statistics
322  {
323  size_t memory_used; ///< total capacity
324  size_t max_serialize_mem; ///< memory needed for serialization
325  };
326 public:
327 
328  /// move external buffer into collection
329  ///
330  bool move_buffer(typename parent_type::key_type key, buffer_type& buffer)
331  {
332  bool added = this->push_back(key, buffer_type());
333  if (!added)
334  return added;
335  buffer_type& buf = this->at(key);
336  buf.swap(buffer);
337  return added;
338  }
339 
340  /// compute statistics on memory consumption
341  ///
342  void calc_stat(statistics* st) const
343  {
344  BM_ASSERT(st);
345 
346  // evaluate address resolver consumption
347  //
348  typename BV::statistics bv_st;
349  const BV& addr_bv = this->addr_res_.get_bvector();
350  addr_bv.calc_stat(&bv_st);
351  st->memory_used = bv_st.memory_used;
352  st->max_serialize_mem = bv_st.max_serialize_mem;
353 
354  // sum-up all buffers
355  for (size_t i = 0; i < this->dense_vect_.size(); ++i)
356  {
357  const buffer_type& buf = this->dense_vect_.at(i);
358  st->memory_used += buf.capacity();
359  st->max_serialize_mem += buf.size();
360  } // for i
361 
362  // header needs
363  size_t h_size = 2 + 1 + ((this->dense_vect_.size()+1) * 8);
364  st->max_serialize_mem += h_size;
365 
366  // 10% extra on top (safety) for serialization
367  size_t extra_mem = (st->max_serialize_mem / 10);
368  if (!extra_mem)
369  extra_mem = 4096;
370  st->max_serialize_mem += extra_mem;
371  }
372 };
373 
374 
375 
376 //---------------------------------------------------------------------
377 
378 template<class BV>
380 : rs_index_(0), in_sync_(false)
381 {
382  addr_bv_.init();
383  construct_rs_index();
384 }
385 
386 //---------------------------------------------------------------------
387 
388 template<class BV>
390 {
391  free_rs_index();
392 }
393 
394 //---------------------------------------------------------------------
395 
396 template<class BV>
398 {
399  if (rs_index_)
400  return;
401  rs_index_ =
402  (rs_index_type*) bm::aligned_new_malloc(sizeof(rs_index_type));
403  rs_index_ = new(rs_index_) rs_index_type(); // placement new
404 }
405 
406 //---------------------------------------------------------------------
407 
408 template<class BV>
410 {
411  if (rs_index_)
412  {
413  rs_index_->~rs_index();
414  bm::aligned_free(rs_index_);
415  rs_index_ = 0;
416  }
417 }
418 
419 //---------------------------------------------------------------------
420 
421 template<class BV>
423 : addr_bv_(addr_res.addr_bv_),
424  in_sync_(addr_res.in_sync_)
425 {
426  rs_index_ = 0;
427  construct_rs_index();
428 
429  if (in_sync_)
430  {
431  rs_index_->copy_from(*addr_res.rs_index_);
432  }
433  addr_bv_.init();
434 }
435 
436 //---------------------------------------------------------------------
437 
438 
439 template<class BV>
441 {
442  if (this != &addr_res)
443  {
444  addr_bv_.move_from(addr_res.addr_bv_);
445  in_sync_ = addr_res.in_sync_;
446  if (in_sync_)
447  {
448  free_rs_index();
449  rs_index_ = addr_res.rs_index_;
450  addr_res.rs_index_ = 0;
451  }
452  }
453  else
454  {
455  rs_index_ = 0; in_sync_ = false;
456  }
457 }
458 
459 //---------------------------------------------------------------------
460 
461 template<class BV>
463 {
464  BM_ASSERT(id_to);
465  if (in_sync_)
466  {
467  *id_to = addr_bv_.count_to_test(id_from, *rs_index_);
468  }
469  else // slow access
470  {
471  bool found = addr_bv_.test(id_from);
472  if (!found)
473  {
474  *id_to = 0;
475  }
476  else
477  {
478  *id_to = addr_bv_.count_range(0, id_from);
479  }
480  }
481  return (bool) *id_to;
482 }
483 
484 //---------------------------------------------------------------------
485 
486 template<class BV>
488 {
489  BM_ASSERT(id_to);
490  BM_ASSERT(in_sync_);
491 
492  *id_to = addr_bv_.count_to_test(id_from, *rs_index_);
493  return (bool)*id_to;
494 }
495 
496 //---------------------------------------------------------------------
497 
498 template<class BV>
500 {
501  BM_ASSERT(!addr_bv_.test(id_from));
502 
503  in_sync_ = false;
504  addr_bv_.set(id_from);
505 }
506 
507 
508 //---------------------------------------------------------------------
509 
510 
511 template<class BV>
513 {
514  if (in_sync_ && force == false)
515  return; // nothing to do
516 
517  addr_bv_.build_rs_index(rs_index_); // compute popcount prefix list
518  in_sync_ = true;
519 }
520 
521 //---------------------------------------------------------------------
522 
523 template<class BV>
525 {
526  addr_bv_.optimize(temp_block);
527 }
528 
529 //---------------------------------------------------------------------
530 
531 template<class BV>
533 {
534  int cmp = addr_bv_.compare(addr_res.addr_bv_);
535  return (cmp == 0);
536 }
537 
538 //---------------------------------------------------------------------
539 //
540 //---------------------------------------------------------------------
541 
542 
543 
544 template<class SV>
546 : max_addr_(0)
547 {
548  set_flags_bv_.init();
549 }
550 
551 //---------------------------------------------------------------------
552 
553 template<class SV>
555 : set_flags_bv_(addr_res.set_flags_bv_),
556  addr_sv_(addr_res.addr_sv_),
557  max_addr_(addr_res.max_addr_)
558 {
559 }
560 
561 //---------------------------------------------------------------------
562 
563 template<class SV>
565 {
566  BM_ASSERT(id_to);
567 
568  bool found = set_flags_bv_.test(id_from);
569  *id_to = found ? addr_sv_.at(id_from) : 0;
570  return found;
571 }
572 
573 //---------------------------------------------------------------------
574 
575 template<class SV>
577 {
578  bool found = set_flags_bv_.test(id_from);
579  if (!found)
580  {
581  set_flags_bv_.set(id_from);
582  ++max_addr_;
583  addr_sv_.set(id_from, max_addr_);
584  }
585 }
586 
587 //---------------------------------------------------------------------
588 
589 template<class SV>
591 {
592  set_flags_bv_.optimize(temp_block);
593  addr_sv_.optimize(temp_block);
594 }
595 
596 //---------------------------------------------------------------------
597 
598 
599 template<class Value, class BV>
601 : last_add_(0)
602 {
603 }
604 
605 //---------------------------------------------------------------------
606 
607 template<class Value, class BV>
609 {
610  if (dense_vect_.empty()) // adding first one
611  {
612  }
613  else
614  if (key <= last_add_)
615  {
616  BM_ASSERT(0);
617  return false;
618  }
619 
620  addr_res_.set(key);
621  dense_vect_.push_back(val);
622  last_add_ = key;
623  return true;
624 }
625 
626 //---------------------------------------------------------------------
627 
628 template<class Value, class BV>
630 {
631  addr_res_.sync();
632 }
633 
634 //---------------------------------------------------------------------
635 
636 template<class Value, class BV>
638 {
639  addr_res_.optimize(temp_block);
640 }
641 
642 //---------------------------------------------------------------------
643 
644 template<class Value, class BV>
646  address_type* addr) const
647 {
648  bool found = addr_res_.resolve(key, addr);
649  *addr -= found;
650  return found;
651 }
652 
653 //---------------------------------------------------------------------
654 
655 
656 template<class Value, class BV>
659 {
660  return dense_vect_.at(addr);
661 }
662 
663 //---------------------------------------------------------------------
664 
665 template<class Value, class BV>
668 {
669  address_type idx;
670  bool found = addr_res_.resolve(key, &idx);
671  if (!found)
672  {
673  throw_range_error("compressed collection item not found");
674  }
675  return get(idx-1);
676 }
677 
678 //---------------------------------------------------------------------
679 
680 template<class Value, class BV>
683 {
684  address_type idx;
685  bool found = addr_res_.resolve(key, &idx);
686  if (!found)
687  {
688  throw_range_error("compressed collection item not found");
689  }
690  return dense_vect_.at(idx-1);
691 }
692 
693 
694 //---------------------------------------------------------------------
695 
696 template<class Value, class BV>
698 {
699 #ifndef BM_NO_STL
700  throw std::range_error(err_msg);
701 #else
702  BM_ASSERT_THROW(false, BM_ERR_RANGE);
703 #endif
704 }
705 
706 //---------------------------------------------------------------------
707 
708 template<class Value, class BV>
710  const compressed_collection<Value, BV>& ccoll) const
711 {
712  const bvector_type& bv = addr_res_.get_bvector();
713  const bvector_type& bva = ccoll.addr_res_.get_bvector();
714 
715  int cmp = bv.compare(bva);
716  if (cmp != 0)
717  return false;
718  BM_ASSERT(dense_vect_.size() == ccoll.dense_vect_.size());
719  for (size_t i = 0; i < dense_vect_.size(); ++i)
720  {
721  const value_type& v = dense_vect_[i];
722  const value_type& va = ccoll.dense_vect_[i];
723  if (!(v == va))
724  return false;
725  }
726  return true;
727 }
728 
729 //---------------------------------------------------------------------
730 
731 
732 } // namespace bm
733 
734 #include "bmundef.h"
735 
736 
737 #endif
bool in_sync() const
returns true if prefix sum table is in sync with the vector
bvector_type::rs_index_type rs_index_type
void optimize(bm::word_t *temp_block=0)
optimize underlying bit-vector
void set(size_type id_from)
Set id (bit) to address resolver.
address_resolver_type & resolver()
Get address resolver.
rs_index< allocator_type > rs_index_type
Definition: bm.h:1255
void * aligned_new_malloc(size_t size)
Aligned malloc (unlike classic malloc it throws bad_alloc exception)
Definition: bmalloc.h:401
const bvector_type & get_bvector() const
Get const reference to the underlying bit-vector of set values.
bm::id_t size_type
Definition: bm.h:117
bool resolve(size_type id_from, size_type *id_to) const
Resolve id to integer id (address)
bvector_type & get_bvector()
Get writable reference to the underlying bit-vector.
void optimize(bm::word_t *temp_block=0)
optimize underlying sparse vectors
Definition: bm.h:76
Bit-bector prefix sum address resolver using bit-vector prefix sum as a space compactor.
compressed_collection< typename serializer< BV >::buffer, BV > parent_type
pre-processor un-defines to avoid global space pollution (internal)
container_type & container()
return dense container for direct access (this should be treated as an internal function designed for...
void unsync()
Unsync the prefix sum table.
#define BMNOEXEPT
Definition: bmdef.h:78
serializer< BV >::buffer buffer_type
const value_type & at(key_type key) const
find and return const associated value (with bounds/presense checking)
void aligned_free(void *ptr)
Aligned free.
Definition: bmalloc.h:429
bool resolve(size_type id_from, size_type *id_to) const
Resolve id to integer id (address)
size_t size() const
size of collection
void sync(bool force=false)
Re-calculate prefix sum table (same as calc_prefix_sum)
void calc_stat(statistics *st) const
compute statistics on memory consumption
unsigned int word_t
Definition: bmconst.h:38
std::vector< value_type > container_type
const address_resolver_type & resolver() const
Get address resolver.
container_type dense_vect_
compressed space container
SV::bvector_type bvector_type
const bvector_type & get_bvector() const
Get const reference to the underlying bit-vector.
Definitions(internal)
#define BM_ASSERT_THROW(x, xerrcode)
Definition: bmdef.h:388
bool resolve(key_type key, address_type *addr) const
Resolve key address (index) in the dense vector.
bool move_buffer(typename parent_type::key_type key, buffer_type &buffer)
move external buffer into collection
bvector_type::size_type size_type
Serialization / compression of bvector<>. Set theoretical operations on compressed BLOBs...
address_resolver_type addr_res_
address resolver
Compressed (sparse collection of objects)
void move_from(bvps_addr_resolver &addr_res) BMNOEXEPT
Move content from the argument address resolver.
size_t max_serialize_mem
memory needed for serialization
#define BM_ASSERT
Definition: bmdef.h:117
bool equal(const bvps_addr_resolver &addr_res) const
equality comparison
bm::bvector bvector_type
key_type last_add_
last added element
bvps_addr_resolver & operator=(const bvps_addr_resolver &addr_res)
Compressed (sparse collection of objects)
byte_buffer< allocator_type > buffer
Definition: bmserial.h:87
void sync()
Checkpoint method to prepare collection for reading.
void optimize(bm::word_t *temp_block=0)
perform memory optimizations/compression
void calc_prefix_sum(bool force=false)
Re-calculate prefix sum table.
bm::bvps_addr_resolver< bvector_type > address_resolver_type
sparse vector based address resolver (no space compactor, just bit-plane compressors provided by spar...