BitMagic-C++
encoding.h
Go to the documentation of this file.
1 #ifndef BMENCODING_H__INCLUDED__
2 #define BMENCODING_H__INCLUDED__
3 /*
4 Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 For more information please visit: http://bitmagic.io
19 */
20 
21 /*! \file encoding.h
22  \brief Encoding utilities for serialization (internal)
23 */
24 
25 
26 #include <memory.h>
27 #include "bmutil.h"
28 
29 
30 #ifdef _MSVC_VER
31 #pragma warning (push)
32 #pragma warning (disable : 4702)
33 #endif
34 
35 namespace bm
36 {
37 
38 
39 // ----------------------------------------------------------------
40 
41 /*!
42  \brief Memory encoding.
43 
44  Class for encoding data into memory.
45  Class handles aligment issues with the integer data types.
46 
47  \ingroup gammacode
48 */
49 class encoder
50 {
51 public:
52  typedef unsigned char* position_type;
53 public:
54  encoder(unsigned char* buf, size_t size);
55  void put_8(unsigned char c);
56  void put_16(bm::short_t s);
57  void put_16(const bm::short_t* s, unsigned count);
58  void put_32(bm::word_t w);
59  void put_32(const bm::word_t* w, unsigned count);
60  void put_64(bm::id64_t w);
61  void put_prefixed_array_32(unsigned char c,
62  const bm::word_t* w, unsigned count);
63  void put_prefixed_array_16(unsigned char c,
64  const bm::short_t* s, unsigned count,
65  bool encode_count);
66  void memcpy(const unsigned char* src, size_t count);
67  size_t size() const;
68  unsigned char* get_pos() const;
69  void set_pos(unsigned char* buf_pos);
70 private:
71  unsigned char* buf_;
72  unsigned char* start_;
73  size_t size_;
74 };
75 
76 // ----------------------------------------------------------------
77 /**
78  Base class for all decoding functionality
79  \ingroup gammacode
80 */
82 {
83 public:
84  decoder_base(const unsigned char* buf) { buf_ = start_ = buf; }
85 
86  /// Reads character from the decoding buffer.
87  unsigned char get_8() { return *buf_++; }
88 
89  /// Returns size of the current decoding stream.
90  size_t size() const { return size_t(buf_ - start_); }
91 
92  /// change current position
93  void seek(int delta) { buf_ += delta; }
94 
95  /// read bytes from the decode buffer
96  void memcpy(unsigned char* dst, size_t count);
97 
98  /// Return current buffer pointer
99  const unsigned char* get_pos() const { return buf_; }
100 protected:
101  const unsigned char* buf_;
102  const unsigned char* start_;
103 };
104 
105 
106 // ----------------------------------------------------------------
107 /**
108  Class for decoding data from memory buffer.
109  Properly handles aligment issues with integer data types.
110  \ingroup gammacode
111 */
112 class decoder : public decoder_base
113 {
114 public:
115  decoder(const unsigned char* buf);
116  bm::short_t get_16();
117  bm::word_t get_32();
118  bm::id64_t get_64();
119  void get_32(bm::word_t* w, unsigned count);
120  bool get_32_OR(bm::word_t* w, unsigned count);
121  void get_32_AND(bm::word_t* w, unsigned count);
122  void get_16(bm::short_t* s, unsigned count);
123 };
124 
125 // ----------------------------------------------------------------
126 /**
127  Class for decoding data from memory buffer.
128  Properly handles aligment issues with integer data types.
129  Converts data to big endian architecture
130  (presumed it was encoded as little endian)
131  \ingroup gammacode
132 */
134 
135 
136 // ----------------------------------------------------------------
137 /**
138  Class for decoding data from memory buffer.
139  Properly handles aligment issues with integer data types.
140  Converts data to little endian architecture
141  (presumed it was encoded as big endian)
142  \ingroup gammacode
143 */
145 {
146 public:
147  decoder_little_endian(const unsigned char* buf);
148  bm::short_t get_16();
149  bm::word_t get_32();
150  bm::id64_t get_64();
151  void get_32(bm::word_t* w, unsigned count);
152  bool get_32_OR(bm::word_t* w, unsigned count);
153  void get_32_AND(bm::word_t* w, unsigned count);
154  void get_16(bm::short_t* s, unsigned count);
155 };
156 
157 
158 /**
159  Byte based writer for un-aligned bit streaming
160 
161  @ingroup gammacode
162  @sa encoder
163 */
164 template<class TEncoder>
165 class bit_out
166 {
167 public:
168  bit_out(TEncoder& dest)
169  : dest_(dest), used_bits_(0), accum_(0)
170  {}
171 
172  ~bit_out() { flush(); }
173 
174  /// issue single bit into encode bit-stream
175  void put_bit(unsigned value);
176 
177  /// issue count bits out of value
178  void put_bits(unsigned value, unsigned count);
179 
180  /// issue 0 into output stream
181  void put_zero_bit();
182 
183  /// issue specified number of 0s
184  void put_zero_bits(unsigned count);
185 
186  /// Elias Gamma encode the specified value
187  void gamma(unsigned value);
188 
189  /// Binary Interpolative array decode
190  void bic_encode_u16(const bm::gap_word_t* arr, unsigned sz,
192  {
193  bic_encode_u16_cm(arr, sz, lo, hi);
194  }
195 
196  /// Binary Interpolative encoding (array of 16-bit ints)
197  void bic_encode_u16_rg(const bm::gap_word_t* arr, unsigned sz,
198  bm::gap_word_t lo,
199  bm::gap_word_t hi);
200 
201  /// Binary Interpolative encoding (array of 16-bit ints)
202  /// cm - "center-minimal"
203  void bic_encode_u16_cm(const bm::gap_word_t* arr, unsigned sz,
204  bm::gap_word_t lo,
205  bm::gap_word_t hi);
206 
207  /// Binary Interpolative encoding (array of 32-bit ints)
208  /// cm - "center-minimal"
209  void bic_encode_u32_cm(const bm::word_t* arr, unsigned sz,
210  bm::word_t lo, bm::word_t hi);
211 
212  /// Flush the incomplete 32-bit accumulator word
213  void flush() { if (used_bits_) flush_accum(); }
214 
215 private:
216  void flush_accum()
217  {
218  dest_.put_32(accum_);
219  used_bits_ = accum_ = 0;
220  }
221 private:
222  bit_out(const bit_out&);
223  bit_out& operator=(const bit_out&);
224 
225 private:
226  TEncoder& dest_; ///< Bit stream target
227  unsigned used_bits_; ///< Bits used in the accumulator
228  unsigned accum_; ///< write bit accumulator
229 };
230 
231 
232 /**
233  Byte based reader for un-aligned bit streaming
234 
235  @ingroup gammacode
236  @sa encoder
237 */
238 template<class TDecoder>
239 class bit_in
240 {
241 public:
242  bit_in(TDecoder& decoder)
243  : src_(decoder),
244  used_bits_(unsigned(sizeof(accum_) * 8)),
245  accum_(0)
246  {}
247 
248  /// decode unsigned value using Elias Gamma coding
249  unsigned gamma();
250 
251  /// read number of bits out of the stream
252  unsigned get_bits(unsigned count);
253 
254  /// Binary Interpolative array decode
255  void bic_decode_u16(bm::gap_word_t* arr, unsigned sz,
257  {
258  bic_decode_u16_cm(arr, sz, lo, hi);
259  }
260 
261  void bic_decode_u16_bitset(bm::word_t* block, unsigned sz,
263  {
264  bic_decode_u16_cm_bitset(block, sz, lo, hi);
265  }
267  {
268  bic_decode_u16_cm_dry(sz, lo, hi);
269  }
270 
271 
272  /// Binary Interpolative array decode
273  void bic_decode_u16_rg(bm::gap_word_t* arr, unsigned sz,
275  /// Binary Interpolative array decode
276  void bic_decode_u16_cm(bm::gap_word_t* arr, unsigned sz,
278 
279  /// Binary Interpolative array decode (32-bit)
280  void bic_decode_u32_cm(bm::word_t* arr, unsigned sz,
281  bm::word_t lo, bm::word_t hi);
282 
283 
284  /// Binary Interpolative array decode into bitset (32-bit based)
285  void bic_decode_u16_rg_bitset(bm::word_t* block, unsigned sz,
287 
288  /// Binary Interpolative array decode into /dev/null
289  void bic_decode_u16_rg_dry(unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi);
290 
291  /// Binary Interpolative array decode into bitset (32-bit based)
292  void bic_decode_u16_cm_bitset(bm::word_t* block, unsigned sz,
294 
295  /// Binary Interpolative array decode into /dev/null
296  void bic_decode_u16_cm_dry(unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi);
297 
298 private:
299  bit_in(const bit_in&);
300  bit_in& operator=(const bit_in&);
301 private:
302  TDecoder& src_; ///< Source of bytes
303  unsigned used_bits_; ///< Bits used in the accumulator
304  unsigned accum_; ///< read bit accumulator
305 };
306 
307 
308 /**
309  Functor for Elias Gamma encoding
310  @ingroup gammacode
311 */
312 template<typename T, typename TBitIO>
314 {
315 public:
316  gamma_encoder(TBitIO& bout) : bout_(bout)
317  {}
318 
319  /** Encode word */
320  void operator()(T value) { bout_.gamma(value); }
321 private:
323  gamma_encoder& operator=(const gamma_encoder&);
324 private:
325  TBitIO& bout_;
326 };
327 
328 
329 /**
330  Elias Gamma decoder
331  @ingroup gammacode
332 */
333 template<typename T, typename TBitIO>
335 {
336 public:
337  gamma_decoder(TBitIO& bin) : bin_(bin) {}
338 
339  /**
340  Start encoding sequence
341  */
342  void start() {}
343 
344  /**
345  Stop decoding sequence
346  */
347  void stop() {}
348 
349  /**
350  Decode word
351  */
352  T operator()(void) { return (T)bin_.gamma(); }
353 private:
355  gamma_decoder& operator=(const gamma_decoder&);
356 private:
357  TBitIO& bin_;
358 };
359 
360 
361 // ----------------------------------------------------------------
362 // Implementation details.
363 // ----------------------------------------------------------------
364 
365 /*!
366  \fn encoder::encoder(unsigned char* buf, unsigned size)
367  \brief Construction.
368  \param buf - memory buffer pointer.
369  \param size - size of the buffer
370 */
371 inline encoder::encoder(unsigned char* buf, size_t a_size)
372 : buf_(buf), start_(buf)
373 {
374  size_ = a_size;
375 }
376 /*!
377  \brief Encode 8-bit prefix + an array
378 */
379 inline void encoder::put_prefixed_array_32(unsigned char c,
380  const bm::word_t* w,
381  unsigned count)
382 {
383  put_8(c);
384  put_32(w, count);
385 }
386 
387 /*!
388  \brief Encode 8-bit prefix + an array
389 */
390 inline void encoder::put_prefixed_array_16(unsigned char c,
391  const bm::short_t* s,
392  unsigned count,
393  bool encode_count)
394 {
395  put_8(c);
396  if (encode_count)
397  put_16((bm::short_t) count);
398  put_16(s, count);
399 }
400 
401 
402 /*!
403  \fn void encoder::put_8(unsigned char c)
404  \brief Puts one character into the encoding buffer.
405  \param c - character to encode
406 */
407 BMFORCEINLINE void encoder::put_8(unsigned char c)
408 {
409  *buf_++ = c;
410 }
411 
412 /*!
413  \fn encoder::put_16(bm::short_t s)
414  \brief Puts short word (16 bits) into the encoding buffer.
415  \param s - short word to encode
416 */
418 {
419 #if (BM_UNALIGNED_ACCESS_OK == 1)
420  ::memcpy(buf_, &s, sizeof(bm::short_t)); // optimizer takes care of it
421  buf_ += sizeof(bm::short_t);
422 #else
423  *buf_++ = (unsigned char) s;
424  s >>= 8;
425  *buf_++ = (unsigned char) s;
426 #endif
427 }
428 
429 /*!
430  \brief Method puts array of short words (16 bits) into the encoding buffer.
431 */
432 inline void encoder::put_16(const bm::short_t* s, unsigned count)
433 {
434 #if (BM_UNALIGNED_ACCESS_OK == 1)
435  ::memcpy(buf_, s, sizeof(bm::short_t)*count);
436  buf_ += sizeof(bm::short_t) * count;
437 #else
438  unsigned char* buf = buf_;
439  const bm::short_t* s_end = s + count;
440  do
441  {
442  bm::short_t w16 = *s++;
443  unsigned char a = (unsigned char) w16;
444  unsigned char b = (unsigned char) (w16 >> 8);
445 
446  *buf++ = a;
447  *buf++ = b;
448 
449  } while (s < s_end);
450 
451  buf_ = (unsigned char*)buf;
452 #endif
453 }
454 
455 /*!
456  \brief copy bytes into target buffer or just rewind if src is NULL
457 */
458 inline
459 void encoder::memcpy(const unsigned char* src, size_t count)
460 {
461  BM_ASSERT((buf_ + count) < (start_ + size_));
462  if (src)
463  ::memcpy(buf_, src, count);
464  buf_ += count;
465 }
466 
467 
468 /*!
469  \fn unsigned encoder::size() const
470  \brief Returns size of the current encoding stream.
471 */
472 inline size_t encoder::size() const
473 {
474  return size_t(buf_ - start_);
475 }
476 
477 /**
478  \brief Get current memory stream position
479 */
481 {
482  return buf_;
483 }
484 
485 /**
486  \brief Set current memory stream position
487 */
489 {
490  buf_ = buf_pos;
491 }
492 
493 
494 /*!
495  \fn void encoder::put_32(bm::word_t w)
496  \brief Puts 32 bits word into encoding buffer.
497  \param w - word to encode.
498 */
500 {
501 #if (BM_UNALIGNED_ACCESS_OK == 1)
502  ::memcpy(buf_, &w, sizeof(bm::word_t));
503  buf_ += sizeof(bm::word_t);
504 #else
505  *buf_++ = (unsigned char) w;
506  *buf_++ = (unsigned char) (w >> 8);
507  *buf_++ = (unsigned char) (w >> 16);
508  *buf_++ = (unsigned char) (w >> 24);
509 #endif
510 }
511 
512 /*!
513  \fn void encoder::put_64(bm::id64_t w)
514  \brief Puts 64 bits word into encoding buffer.
515  \param w - word to encode.
516 */
518 {
519 #if (BM_UNALIGNED_ACCESS_OK == 1)
520  ::memcpy(buf_, &w, sizeof(bm::id64_t));
521  buf_ += sizeof(bm::id64_t);
522 #else
523  *buf_++ = (unsigned char) w;
524  *buf_++ = (unsigned char) (w >> 8);
525  *buf_++ = (unsigned char) (w >> 16);
526  *buf_++ = (unsigned char) (w >> 24);
527  *buf_++ = (unsigned char) (w >> 32);
528  *buf_++ = (unsigned char) (w >> 40);
529  *buf_++ = (unsigned char) (w >> 48);
530  *buf_++ = (unsigned char) (w >> 56);
531 #endif
532 }
533 
534 
535 /*!
536  \brief Encodes array of 32-bit words
537 */
538 inline
539 void encoder::put_32(const bm::word_t* w, unsigned count)
540 {
541 #if (BM_UNALIGNED_ACCESS_OK == 1)
542  ::memcpy(buf_, w, sizeof(bm::word_t) * count);
543  buf_ += sizeof(bm::word_t) * count;
544 #else
545  unsigned char* buf = buf_;
546  const bm::word_t* w_end = w + count;
547  do
548  {
549  bm::word_t w32 = *w++;
550  unsigned char a = (unsigned char) w32;
551  unsigned char b = (unsigned char) (w32 >> 8);
552  unsigned char c = (unsigned char) (w32 >> 16);
553  unsigned char d = (unsigned char) (w32 >> 24);
554 
555  *buf++ = a;
556  *buf++ = b;
557  *buf++ = c;
558  *buf++ = d;
559  } while (w < w_end);
560 
561  buf_ = (unsigned char*)buf;
562 #endif
563 }
564 
565 
566 // ---------------------------------------------------------------------
567 
568 
569 /*!
570  Load bytes from the decode buffer
571 */
572 inline
573 void decoder_base::memcpy(unsigned char* dst, size_t count)
574 {
575  if (dst)
576  ::memcpy(dst, buf_, count);
577  buf_ += count;
578 }
579 
580 /*!
581  \fn decoder::decoder(const unsigned char* buf)
582  \brief Construction
583  \param buf - pointer to the decoding memory.
584 */
585 inline decoder::decoder(const unsigned char* buf)
586 : decoder_base(buf)
587 {
588 }
589 
590 /*!
591  \fn bm::short_t decoder::get_16()
592  \brief Reads 16-bit word from the decoding buffer.
593 */
595 {
596 #if (BM_UNALIGNED_ACCESS_OK == 1)
597  bm::short_t a;
598  ::memcpy(&a, buf_, sizeof(bm::short_t));
599 #else
600  bm::short_t a = (bm::short_t)(buf_[0] + ((bm::short_t)buf_[1] << 8));
601 #endif
602  buf_ += sizeof(a);
603  return a;
604 }
605 
606 /*!
607  \fn bm::word_t decoder::get_32()
608  \brief Reads 32-bit word from the decoding buffer.
609 */
611 {
612 #if (BM_UNALIGNED_ACCESS_OK == 1)
613  bm::word_t a;
614  ::memcpy(&a, buf_, sizeof(bm::word_t));
615 #else
616  bm::word_t a = buf_[0]+ ((unsigned)buf_[1] << 8) +
617  ((unsigned)buf_[2] << 16) + ((unsigned)buf_[3] << 24);
618 #endif
619  buf_+=sizeof(a);
620  return a;
621 }
622 
623 /*!
624  \fn bm::word_t decoder::get_64()
625  \brief Reads 64-bit word from the decoding buffer.
626 */
627 inline
629 {
630 #if (BM_UNALIGNED_ACCESS_OK == 1)
631  bm::id64_t a;
632  ::memcpy(&a, buf_, sizeof(bm::id64_t));
633 #else
634  bm::id64_t a = buf_[0]+
635  ((bm::id64_t)buf_[1] << 8) +
636  ((bm::id64_t)buf_[2] << 16) +
637  ((bm::id64_t)buf_[3] << 24) +
638  ((bm::id64_t)buf_[4] << 32) +
639  ((bm::id64_t)buf_[5] << 40) +
640  ((bm::id64_t)buf_[6] << 48) +
641  ((bm::id64_t)buf_[7] << 56);
642 #endif
643  buf_ += sizeof(a);
644  return a;
645 }
646 
647 
648 /*!
649  \fn void decoder::get_32(bm::word_t* w, unsigned count)
650  \brief Reads block of 32-bit words from the decoding buffer.
651  \param w - pointer on memory block to read into.
652  \param count - size of memory block in words.
653 */
654 inline void decoder::get_32(bm::word_t* w, unsigned count)
655 {
656  if (!w)
657  {
658  seek(int(count * sizeof(bm::word_t)));
659  return;
660  }
661 #if (BM_UNALIGNED_ACCESS_OK == 1)
662  ::memcpy(w, buf_, count * sizeof(bm::word_t));
663  seek(int(count * sizeof(bm::word_t)));
664  return;
665 #else
666  const unsigned char* buf = buf_;
667  const bm::word_t* w_end = w + count;
668  do
669  {
670  bm::word_t a = buf[0]+ ((unsigned)buf[1] << 8) +
671  ((unsigned)buf[2] << 16) + ((unsigned)buf[3] << 24);
672  *w++ = a;
673  buf += sizeof(a);
674  } while (w < w_end);
675  buf_ = (unsigned char*)buf;
676 #endif
677 }
678 
679 /*!
680  \brief Reads block of 32-bit words from the decoding buffer and ORs
681  to the destination
682  \param w - pointer on memory block to read into
683  \param count - should match bm::set_block_size
684 */
685 inline
686 bool decoder::get_32_OR(bm::word_t* w, unsigned count)
687 {
688  if (!w)
689  {
690  seek(int(count * sizeof(bm::word_t)));
691  return false;
692  }
693 #if defined(BMAVX2OPT)
694  __m256i* buf_start = (__m256i*)buf_;
695  seek(int(count * sizeof(bm::word_t)));
696  __m256i* buf_end = (__m256i*)buf_;
697 
698  return bm::avx2_or_arr_unal((__m256i*)w, buf_start, buf_end);
699 #elif defined(BMSSE42OPT) || defined(BMSSE2OPT)
700  __m128i* buf_start = (__m128i*)buf_;
701  seek(int(count * sizeof(bm::word_t)));
702  __m128i* buf_end = (__m128i*)buf_;
703 
704  return bm::sse2_or_arr_unal((__m128i*)w, buf_start, buf_end);
705 #else
706  bm::word_t acc = 0;
707  const bm::word_t not_acc = acc = ~acc;
708 
709  for (unsigned i = 0; i < count; i+=4)
710  {
711  acc &= (w[i+0] |= get_32());
712  acc &= (w[i+1] |= get_32());
713  acc &= (w[i+2] |= get_32());
714  acc &= (w[i+3] |= get_32());
715  }
716  return acc == not_acc;
717 #endif
718 }
719 
720 /*!
721  \brief Reads block of 32-bit words from the decoding buffer and ANDs
722  to the destination
723  \param w - pointer on memory block to read into
724  \param count - should match bm::set_block_size
725 */
726 inline
727 void decoder::get_32_AND(bm::word_t* w, unsigned count)
728 {
729  if (!w)
730  {
731  seek(int(count * sizeof(bm::word_t)));
732  return;
733  }
734 #if defined(BMAVX2OPT)
735  __m256i* buf_start = (__m256i*)buf_;
736  seek(int(count * sizeof(bm::word_t)));
737  __m256i* buf_end = (__m256i*)buf_;
738 
739  bm::avx2_and_arr_unal((__m256i*)w, buf_start, buf_end);
740 #elif defined(BMSSE42OPT) || defined(BMSSE2OPT)
741  __m128i* buf_start = (__m128i*)buf_;
742  seek(int(count * sizeof(bm::word_t)));
743  __m128i* buf_end = (__m128i*)buf_;
744 
745  bm::sse2_and_arr_unal((__m128i*)w, buf_start, buf_end);
746 #else
747  for (unsigned i = 0; i < count; i+=4)
748  {
749  w[i+0] &= get_32();
750  w[i+1] &= get_32();
751  w[i+2] &= get_32();
752  w[i+3] &= get_32();
753  }
754 #endif
755 }
756 
757 
758 
759 /*!
760  \fn void decoder::get_16(bm::short_t* s, unsigned count)
761  \brief Reads block of 32-bit words from the decoding buffer.
762  \param s - pointer on memory block to read into.
763  \param count - size of memory block in words.
764 */
765 inline void decoder::get_16(bm::short_t* s, unsigned count)
766 {
767  if (!s)
768  {
769  seek(int(count * sizeof(bm::short_t)));
770  return;
771  }
772 #if (BM_UNALIGNED_ACCESS_OK == 1)
773  ::memcpy(s, buf_, sizeof(bm::short_t) * count);
774  buf_ += sizeof(bm::short_t) * count;
775 #else
776  const unsigned char* buf = buf_;
777  const bm::short_t* s_end = s + count;
778  do
779  {
780  bm::short_t a = (bm::short_t)(buf[0] + ((bm::short_t)buf[1] << 8));
781  *s++ = a;
782  buf += sizeof(a);
783  } while (s < s_end);
784  buf_ = (unsigned char*)buf;
785 #endif
786 
787 }
788 
789 
790 
791 // ---------------------------------------------------------------------
792 
793 inline decoder_little_endian::decoder_little_endian(const unsigned char* buf)
794 : decoder_base(buf)
795 {
796 }
797 
798 inline
800 {
801  bm::short_t v1 = bm::short_t(buf_[0]);
802  bm::short_t v2 = bm::short_t(buf_[1]);
803  bm::short_t a = bm::short_t((v1 << 8) + v2);
804  buf_ += sizeof(a);
805  return a;
806 }
807 
808 inline
810 {
811  bm::word_t a = ((unsigned)buf_[0] << 24)+ ((unsigned)buf_[1] << 16) +
812  ((unsigned)buf_[2] << 8) + ((unsigned)buf_[3]);
813  buf_+=sizeof(a);
814  return a;
815 }
816 
817 inline
819 {
820  bm::id64_t a = buf_[0]+
821  ((bm::id64_t)buf_[1] << 56) +
822  ((bm::id64_t)buf_[2] << 48) +
823  ((bm::id64_t)buf_[3] << 40) +
824  ((bm::id64_t)buf_[4] << 32) +
825  ((bm::id64_t)buf_[5] << 24) +
826  ((bm::id64_t)buf_[6] << 16) +
827  ((bm::id64_t)buf_[7] << 8);
828  buf_+=sizeof(a);
829  return a;
830 }
831 
832 inline
834 {
835  if (!w)
836  {
837  seek(int(count * sizeof(bm::word_t)));
838  return;
839  }
840 
841  const unsigned char* buf = buf_;
842  const bm::word_t* w_end = w + count;
843  do
844  {
845  bm::word_t a = ((unsigned)buf[0] << 24)+ ((unsigned)buf[1] << 16) +
846  ((unsigned)buf[2] << 8) + ((unsigned)buf[3]);
847  *w++ = a;
848  buf += sizeof(a);
849  } while (w < w_end);
850  buf_ = (unsigned char*)buf;
851 }
852 
853 inline
855 {
856  if (!w)
857  {
858  seek(int(count * sizeof(bm::word_t)));
859  return false;
860  }
861 
862  bm::word_t acc = 0;
863  const bm::word_t not_acc = acc = ~acc;
864 
865  for (unsigned i = 0; i < count; i+=4)
866  {
867  acc &= (w[i+0] |= get_32());
868  acc &= (w[i+1] |= get_32());
869  acc &= (w[i+2] |= get_32());
870  acc &= (w[i+3] |= get_32());
871  }
872  return acc == not_acc;
873 }
874 
875 inline
877 {
878  for (unsigned i = 0; i < count; i+=4)
879  {
880  w[i+0] &= get_32();
881  w[i+1] &= get_32();
882  w[i+2] &= get_32();
883  w[i+3] &= get_32();
884  }
885 }
886 
887 
888 inline
890 {
891  if (!s)
892  {
893  seek(int(count * sizeof(bm::short_t)));
894  return;
895  }
896 
897  const unsigned char* buf = buf_;
898  const bm::short_t* s_end = s + count;
899  do
900  {
901  bm::short_t v1 = bm::short_t(buf_[0]);
902  bm::short_t v2 = bm::short_t(buf_[1]);
903  bm::short_t a = bm::short_t((v1 << 8) + v2);
904  *s++ = a;
905  buf += sizeof(a);
906  } while (s < s_end);
907  buf_ = (unsigned char*)buf;
908 }
909 
910 // ----------------------------------------------------------------------
911 //
912 
913 template<typename TEncoder>
914 void bit_out<TEncoder>::put_bit(unsigned value)
915 {
916  BM_ASSERT(value <= 1);
917  accum_ |= (value << used_bits_);
918  if (++used_bits_ == (sizeof(accum_) * 8))
919  flush_accum();
920 }
921 
922 // ----------------------------------------------------------------------
923 
924 template<typename TEncoder>
925 void bit_out<TEncoder>::put_bits(unsigned value, unsigned count)
926 {
927  unsigned used = used_bits_;
928  unsigned acc = accum_;
929 
930  {
931  unsigned mask = ~0u;
932  mask >>= (sizeof(accum_) * 8) - count;
933  value &= mask;
934  }
935  for (;count;)
936  {
937  unsigned free_bits = unsigned(sizeof(accum_) * 8) - used;
938  BM_ASSERT(free_bits);
939  acc |= value << used;
940 
941  if (count <= free_bits)
942  {
943  used += count;
944  break;
945  }
946  else
947  {
948  value >>= free_bits;
949  count -= free_bits;
950  dest_.put_32(acc);
951  acc = used = 0;
952  continue;
953  }
954  }
955  if (used == (sizeof(accum_) * 8))
956  {
957  dest_.put_32(acc);
958  acc = used = 0;
959  }
960  used_bits_ = used;
961  accum_ = acc;
962 }
963 
964 // ----------------------------------------------------------------------
965 
966 template<typename TEncoder>
968 {
969  if (++used_bits_ == (sizeof(accum_) * 8))
970  flush_accum();
971 }
972 
973 // ----------------------------------------------------------------------
974 
975 template<typename TEncoder>
977 {
978  unsigned used = used_bits_;
979  unsigned free_bits = (sizeof(accum_) * 8) - used;
980  if (count >= free_bits)
981  {
982  flush_accum();
983  count -= free_bits;
984  used = 0;
985 
986  for ( ;count >= sizeof(accum_) * 8; count -= sizeof(accum_) * 8)
987  {
988  dest_.put_32(0);
989  }
990  used += count;
991  }
992  else
993  {
994  used += count;
995  }
996  accum_ |= (1u << used);
997  if (++used == (sizeof(accum_) * 8))
998  flush_accum();
999  else
1000  used_bits_ = used;
1001 }
1002 
1003 // ----------------------------------------------------------------------
1004 
1005 template<typename TEncoder>
1006 void bit_out<TEncoder>::gamma(unsigned value)
1007 {
1008  BM_ASSERT(value);
1009 
1010  unsigned logv = bm::bit_scan_reverse32(value);
1011 
1012  // Put zeroes + 1 bit
1013 
1014  unsigned used = used_bits_;
1015  unsigned acc = accum_;
1016  const unsigned acc_bits = (sizeof(acc) * 8);
1017  unsigned free_bits = acc_bits - used;
1018 
1019  {
1020  unsigned count = logv;
1021  if (count >= free_bits)
1022  {
1023  dest_.put_32(acc);
1024  acc = used ^= used;
1025  count -= free_bits;
1026 
1027  for ( ;count >= acc_bits; count -= acc_bits)
1028  {
1029  dest_.put_32(0);
1030  }
1031  used += count;
1032  }
1033  else
1034  {
1035  used += count;
1036  }
1037  acc |= (1 << used);
1038  if (++used == acc_bits)
1039  {
1040  dest_.put_32(acc);
1041  acc = used ^= used;
1042  }
1043  }
1044 
1045  // Put the value bits
1046  //
1047  {
1048  unsigned mask = (~0u);
1049  mask >>= acc_bits - logv;
1050  value &= mask;
1051  }
1052  for (;logv;)
1053  {
1054  acc |= value << used;
1055  free_bits = acc_bits - used;
1056  if (logv <= free_bits)
1057  {
1058  used += logv;
1059  break;
1060  }
1061  else
1062  {
1063  value >>= free_bits;
1064  logv -= free_bits;
1065  dest_.put_32(acc);
1066  acc = used ^= used;
1067  continue;
1068  }
1069  } // for
1070 
1071  used_bits_ = used;
1072  accum_ = acc;
1073 }
1074 
1075 // ----------------------------------------------------------------------
1076 
1077 template<typename TEncoder>
1079  unsigned sz,
1081 {
1082  for (;sz;)
1083  {
1084  BM_ASSERT(lo <= hi);
1085  unsigned mid_idx = sz >> 1;
1086  bm::gap_word_t val = arr[mid_idx];
1087 
1088  // write the interpolated value
1089  // write(x, r) where x=(arr[mid] - lo - mid) r=(hi - lo - sz + 1);
1090  {
1091  unsigned r = hi - lo - sz + 1;
1092  if (r)
1093  {
1094  unsigned value = val - lo - mid_idx;
1095  unsigned logv = bm::bit_scan_reverse32(r);
1096  put_bits(value, logv+1);
1097  }
1098  }
1099 
1100  bic_encode_u16_rg(arr, mid_idx, lo, gap_word_t(val-1));
1101  // tail recursion
1102  // bic_encode_u16(arr + mid_idx + 1, sz - mid_idx - 1, gap_word_t(val+1), hi);
1103  arr += mid_idx + 1;
1104  sz -= mid_idx + 1;
1105  lo = gap_word_t(val + 1);
1106  } // for sz
1107 }
1108 
1109 // ----------------------------------------------------------------------
1110 
1111 template<typename TEncoder>
1113  unsigned sz,
1114  bm::word_t lo, bm::word_t hi)
1115 {
1116  for (;sz;)
1117  {
1118  BM_ASSERT(lo <= hi);
1119  unsigned mid_idx = sz >> 1;
1120  bm::word_t val = arr[mid_idx];
1121 
1122  // write the interpolated value
1123  // write(x, r) where x=(arr[mid] - lo - mid) r=(hi - lo - sz + 1);
1124  {
1125  unsigned r = hi - lo - sz + 1;
1126  if (r)
1127  {
1128  unsigned value = val - lo - mid_idx;
1129 
1130  unsigned n = r + 1;
1131  unsigned logv = bm::bit_scan_reverse32(n);
1132  unsigned c = (unsigned)(1ull << (logv + 1)) - n;
1133  int64_t half_c = c >> 1; // c / 2;
1134  int64_t half_r = r >> 1; // r / 2;
1135  int64_t lo1 = half_r - half_c;
1136  int64_t hi1 = half_r + half_c + 1;
1137  lo1 -= (n & 1);
1138  logv += (value <= lo1 || value >= hi1);
1139 
1140  put_bits(value, logv);
1141  }
1142  }
1143 
1144  bic_encode_u32_cm(arr, mid_idx, lo, val-1);
1145  // tail recursive call:
1146  // bic_encode_u32_cm(arr + mid_idx + 1, sz - mid_idx - 1, val+1, hi);
1147  arr += mid_idx + 1;
1148  sz -= mid_idx + 1;
1149  lo = val + 1;
1150  } // for sz
1151 }
1152 
1153 // ----------------------------------------------------------------------
1154 
1155 template<typename TEncoder>
1157  unsigned sz,
1159 {
1160  for (;sz;)
1161  {
1162  BM_ASSERT(lo <= hi);
1163  unsigned mid_idx = sz >> 1;
1164  bm::gap_word_t val = arr[mid_idx];
1165 
1166  // write the interpolated value
1167  // write(x, r) where x=(arr[mid] - lo - mid) r=(hi - lo - sz + 1);
1168  {
1169  unsigned r = hi - lo - sz + 1;
1170  if (r)
1171  {
1172  unsigned value = val - lo - mid_idx;
1173 
1174  unsigned n = r + 1;
1175  unsigned logv = bm::bit_scan_reverse32(n);
1176  unsigned c = (unsigned)(1ull << (logv + 1)) - n;
1177  int64_t half_c = c >> 1; // c / 2;
1178  int64_t half_r = r >> 1; // r / 2;
1179  int64_t lo1 = half_r - half_c;
1180  int64_t hi1 = half_r + half_c + 1;
1181  lo1 -= (n & 1);
1182  logv += (value <= lo1 || value >= hi1);
1183 
1184  put_bits(value, logv);
1185  }
1186  }
1187 
1188  bic_encode_u16_cm(arr, mid_idx, lo, bm::gap_word_t(val-1));
1189  // tail recursive call:
1190  // bic_encode_u32_cm(arr + mid_idx + 1, sz - mid_idx - 1, val+1, hi);
1191  arr += mid_idx + 1;
1192  sz -= mid_idx + 1;
1193  lo = bm::gap_word_t(val + 1);
1194  } // for sz
1195 }
1196 
1197 
1198 
1199 // ----------------------------------------------------------------------
1200 //
1201 // ----------------------------------------------------------------------
1202 
1203 
1204 template<class TDecoder>
1207 {
1208  for (;sz;)
1209  {
1210  BM_ASSERT(lo <= hi);
1211 
1212  unsigned val;
1213  // read the value
1214  {
1215  unsigned r = hi - lo - sz + 1;
1216  if (r)
1217  {
1218  unsigned logv = bm::bit_scan_reverse32(r) + 1;
1219  val = get_bits(logv);
1220  BM_ASSERT(val <= r);
1221  }
1222  else
1223  {
1224  val = 0;
1225  }
1226  }
1227  unsigned mid_idx = sz >> 1;
1228  val += lo + mid_idx;
1229 
1230  BM_ASSERT(val < 65536);
1231  BM_ASSERT(mid_idx < 65536);
1232 
1233  arr[mid_idx] = bm::gap_word_t(val);
1234  if (sz == 1)
1235  return;
1236  bic_decode_u16_rg(arr, mid_idx, lo, bm::gap_word_t(val - 1));
1237  //bic_decode_u16(arr + mid_idx + 1, sz - mid_idx - 1, bm::gap_word_t(val + 1), hi);
1238  arr += mid_idx + 1;
1239  sz -= mid_idx + 1;
1240  lo = bm::gap_word_t(val + 1);
1241  } // for sz
1242 }
1243 
1244 // ----------------------------------------------------------------------
1245 
1246 template<class TDecoder>
1248  bm::word_t lo, bm::word_t hi)
1249 {
1250  for (;sz;)
1251  {
1252  BM_ASSERT(lo <= hi);
1253 
1254  unsigned val;
1255 
1256  // read the interpolated value
1257  // x = read(r)+ lo + mid, where r = (hi - lo - sz + 1);
1258  {
1259  unsigned r = hi - lo - sz + 1;
1260  if (r)
1261  {
1262  unsigned logv = bm::bit_scan_reverse32(r+1);
1263 
1264  unsigned c = unsigned((1ull << (logv + 1)) - r - 1);
1265  int64_t half_c = c >> 1; // c / 2;
1266  int64_t half_r = r >> 1; // r / 2;
1267  int64_t lo1 = half_r - half_c - ((r + 1) & 1);
1268  int64_t hi1 = half_r + half_c + 1;
1269  val = get_bits(logv);
1270  if (val <= lo1 || val >= hi1)
1271  val += (get_bits(1) << logv);
1272  BM_ASSERT(val <= r);
1273  }
1274  else
1275  {
1276  val = 0;
1277  }
1278  }
1279 
1280  unsigned mid_idx = sz >> 1;
1281  val += lo + mid_idx;
1282  arr[mid_idx] = val;
1283  if (sz == 1)
1284  return;
1285 
1286  bic_decode_u32_cm(arr, mid_idx, lo, val-1);
1287  // tail recursive call:
1288  // bic_decode_u32_cm(arr + mid_idx + 1, sz - mid_idx - 1, val + 1, hi);
1289  arr += mid_idx + 1;
1290  sz -= mid_idx + 1;
1291  lo = val + 1;
1292  } // for sz
1293 }
1294 
1295 // ----------------------------------------------------------------------
1296 
1297 template<class TDecoder>
1300 {
1301  for (;sz;)
1302  {
1303  BM_ASSERT(lo <= hi);
1304 
1305  unsigned val;
1306 
1307  // read the interpolated value
1308  // x = read(r)+ lo + mid, where r = (hi - lo - sz + 1);
1309  {
1310  unsigned r = hi - lo - sz + 1;
1311  if (r)
1312  {
1313  unsigned logv = bm::bit_scan_reverse32(r+1);
1314 
1315  unsigned c = unsigned((1ull << (logv + 1)) - r - 1);
1316  int64_t half_c = c >> 1; // c / 2;
1317  int64_t half_r = r >> 1; // r / 2;
1318  int64_t lo1 = half_r - half_c - ((r + 1) & 1);
1319  int64_t hi1 = half_r + half_c + 1;
1320  val = get_bits(logv);
1321  if (val <= lo1 || val >= hi1)
1322  val += (get_bits(1) << logv);
1323  BM_ASSERT(val <= r);
1324  }
1325  else
1326  {
1327  val = 0;
1328  }
1329  }
1330 
1331  unsigned mid_idx = sz >> 1;
1332  val += lo + mid_idx;
1333  arr[mid_idx] = bm::gap_word_t(val);
1334  if (sz == 1)
1335  return;
1336 
1337  bic_decode_u16_cm(arr, mid_idx, lo, bm::gap_word_t(val-1));
1338  // tail recursive call:
1339  // bic_decode_u32_cm(arr + mid_idx + 1, sz - mid_idx - 1, val + 1, hi);
1340  arr += mid_idx + 1;
1341  sz -= mid_idx + 1;
1342  lo = bm::gap_word_t(val + 1);
1343  } // for sz
1344 }
1345 
1346 // ----------------------------------------------------------------------
1347 
1348 template<class TDecoder>
1351 {
1352  for (;sz;)
1353  {
1354  BM_ASSERT(lo <= hi);
1355 
1356  unsigned val;
1357 
1358  // read the interpolated value
1359  // x = read(r)+ lo + mid, where r = (hi - lo - sz + 1);
1360  {
1361  unsigned r = hi - lo - sz + 1;
1362  if (r)
1363  {
1364  unsigned logv = bm::bit_scan_reverse32(r+1);
1365 
1366  unsigned c = unsigned((1ull << (logv + 1)) - r - 1);
1367  int64_t half_c = c >> 1; // c / 2;
1368  int64_t half_r = r >> 1; // r / 2;
1369  int64_t lo1 = half_r - half_c - ((r + 1) & 1);
1370  int64_t hi1 = half_r + half_c + 1;
1371  val = get_bits(logv);
1372  if (val <= lo1 || val >= hi1)
1373  val += (get_bits(1) << logv);
1374  BM_ASSERT(val <= r);
1375  }
1376  else
1377  {
1378  val = 0;
1379  }
1380  }
1381 
1382  unsigned mid_idx = sz >> 1;
1383  val += lo + mid_idx;
1384 
1385  // set bit in the target block
1386  {
1387  unsigned nword = (val >> bm::set_word_shift);
1388  block[nword] |= (1u << (val & bm::set_word_mask));
1389  }
1390 
1391  if (sz == 1)
1392  return;
1393 
1394  bic_decode_u16_cm_bitset(block, mid_idx, lo, bm::gap_word_t(val-1));
1395  // tail recursive call:
1396  // bic_decode_u32_cm(block, sz - mid_idx - 1, val + 1, hi);
1397  sz -= mid_idx + 1;
1398  lo = bm::gap_word_t(val + 1);
1399  } // for sz
1400 }
1401 
1402 // ----------------------------------------------------------------------
1403 
1404 template<class TDecoder>
1407 {
1408  for (;sz;)
1409  {
1410  BM_ASSERT(lo <= hi);
1411 
1412  unsigned val;
1413 
1414  // read the interpolated value
1415  // x = read(r)+ lo + mid, where r = (hi - lo - sz + 1);
1416  {
1417  unsigned r = hi - lo - sz + 1;
1418  if (r)
1419  {
1420  unsigned logv = bm::bit_scan_reverse32(r+1);
1421 
1422  unsigned c = unsigned((1ull << (logv + 1)) - r - 1);
1423  int64_t half_c = c >> 1; // c / 2;
1424  int64_t half_r = r >> 1; // r / 2;
1425  int64_t lo1 = half_r - half_c - ((r + 1) & 1);
1426  int64_t hi1 = half_r + half_c + 1;
1427  val = get_bits(logv);
1428  if (val <= lo1 || val >= hi1)
1429  val += (get_bits(1) << logv);
1430  BM_ASSERT(val <= r);
1431  }
1432  else
1433  {
1434  val = 0;
1435  }
1436  }
1437 
1438  unsigned mid_idx = sz >> 1;
1439  val += lo + mid_idx;
1440 
1441  if (sz == 1)
1442  return;
1443 
1444  bic_decode_u16_cm_dry(mid_idx, lo, bm::gap_word_t(val-1));
1445  // tail recursive call:
1446  // bic_decode_u32_cm_dry(sz - mid_idx - 1, val + 1, hi);
1447  sz -= mid_idx + 1;
1448  lo = bm::gap_word_t(val + 1);
1449  } // for sz
1450 }
1451 
1452 
1453 // ----------------------------------------------------------------------
1454 
1455 template<class TDecoder>
1458 {
1459  for (;sz;)
1460  {
1461  BM_ASSERT(lo <= hi);
1462 
1463  unsigned val;
1464  // read the value
1465  {
1466  unsigned r = hi - lo - sz + 1;
1467  if (r)
1468  {
1469  unsigned logv = bm::bit_scan_reverse32(r) + 1;
1470  val = get_bits(logv);
1471  BM_ASSERT(val <= r);
1472  }
1473  else
1474  {
1475  val = 0;
1476  }
1477  }
1478  unsigned mid_idx = sz >> 1;
1479  val += lo + mid_idx;
1480  BM_ASSERT(val < 65536);
1481  BM_ASSERT(mid_idx < 65536);
1482 
1483  // set bit in the target block
1484  {
1485  unsigned nword = (val >> bm::set_word_shift);
1486  block[nword] |= (1u << (val & bm::set_word_mask));
1487  }
1488 
1489  if (sz == 1)
1490  return;
1491  bic_decode_u16_rg_bitset(block, mid_idx, lo, bm::gap_word_t(val - 1));
1492  // tail recursion:
1493  //bic_decode_u16_bitset(block, sz - mid_idx - 1, bm::gap_word_t(val + 1), hi);
1494  sz -= mid_idx + 1;
1495  lo = bm::gap_word_t(val + 1);
1496  } // for sz
1497 }
1498 
1499 // ----------------------------------------------------------------------
1500 
1501 template<class TDecoder>
1504 {
1505  for (;sz;)
1506  {
1507  BM_ASSERT(lo <= hi);
1508 
1509  unsigned val;
1510  // read the value
1511  {
1512  unsigned r = hi - lo - sz + 1;
1513  if (r)
1514  {
1515  unsigned logv = bm::bit_scan_reverse32(r) + 1;
1516  val = get_bits(logv);
1517  BM_ASSERT(val <= r);
1518  }
1519  else
1520  {
1521  val = 0;
1522  }
1523  }
1524  unsigned mid_idx = sz >> 1;
1525  val += lo + mid_idx;
1526  BM_ASSERT(val < 65536);
1527  BM_ASSERT(mid_idx < 65536);
1528 
1529  if (sz == 1)
1530  return;
1531  bic_decode_u16_rg_dry(mid_idx, lo, bm::gap_word_t(val - 1));
1532  //bic_decode_u16_dry(sz - mid_idx - 1, bm::gap_word_t(val + 1), hi);
1533  sz -= mid_idx + 1;
1534  lo = bm::gap_word_t(val + 1);
1535  } // for sz
1536 }
1537 
1538 
1539 
1540 // ----------------------------------------------------------------------
1541 
1542 template<class TDecoder>
1544 {
1545  unsigned acc = accum_;
1546  unsigned used = used_bits_;
1547 
1548  if (used == (sizeof(acc) * 8))
1549  {
1550  acc = src_.get_32();
1551  used ^= used;
1552  }
1553  unsigned zero_bits = 0;
1554  while (true)
1555  {
1556  if (acc == 0)
1557  {
1558  zero_bits = unsigned(zero_bits +(sizeof(acc) * 8) - used);
1559  used = 0;
1560  acc = src_.get_32();
1561  continue;
1562  }
1563  unsigned first_bit_idx =
1564  #if defined(BM_x86) && (defined(__GNUG__) || defined(_MSC_VER))
1565  bm::bsf_asm32(acc);
1566  #else
1567  bm::bit_scan_fwd(acc);
1568  #endif
1569  acc >>= first_bit_idx;
1570  zero_bits += first_bit_idx;
1571  used += first_bit_idx;
1572  break;
1573  } // while
1574 
1575  // eat the border bit
1576  //
1577  if (used == (sizeof(acc) * 8))
1578  {
1579  acc = src_.get_32();
1580  used = 1;
1581  }
1582  else
1583  {
1584  ++used;
1585  }
1586  acc >>= 1;
1587 
1588  // get the value
1589  unsigned current;
1590 
1591  unsigned free_bits = unsigned((sizeof(acc) * 8) - used);
1592  if (zero_bits <= free_bits)
1593  {
1594  take_accum:
1595  current =
1596  (acc & block_set_table<true>::_left[zero_bits]) | (1 << zero_bits);
1597  acc >>= zero_bits;
1598  used += zero_bits;
1599  goto ret;
1600  }
1601 
1602  if (used == (sizeof(acc) * 8))
1603  {
1604  acc = src_.get_32();
1605  used ^= used;
1606  goto take_accum;
1607  }
1608 
1609  // take the part
1610  current = acc;
1611  // read the next word
1612  acc = src_.get_32();
1613  used = zero_bits - free_bits;
1614  current |=
1615  ((acc & block_set_table<true>::_left[used]) << free_bits) |
1616  (1 << zero_bits);
1617 
1618  acc >>= used;
1619 ret:
1620  accum_ = acc;
1621  used_bits_ = used;
1622  return current;
1623 }
1624 
1625 // ----------------------------------------------------------------------
1626 
1627 template<class TDecoder>
1628 unsigned bit_in<TDecoder>::get_bits(unsigned count)
1629 {
1630  BM_ASSERT(count);
1631  const unsigned maskFF = ~0u;
1632  unsigned acc = accum_;
1633  unsigned used = used_bits_;
1634 
1635  unsigned value = 0;
1636  unsigned free_bits = unsigned((sizeof(acc) * 8) - used);
1637  if (count <= free_bits)
1638  {
1639  take_accum:
1640  value = acc & (maskFF >> (32 - count));
1641  acc >>= count;
1642  used += count;
1643  goto ret;
1644  //return value;
1645  }
1646  if (used == (sizeof(acc) * 8))
1647  {
1648  acc = src_.get_32();
1649  used = 0;
1650  goto take_accum;
1651  }
1652  value = acc;
1653  acc = src_.get_32();
1654  used = count - free_bits;
1655  value |= ((acc & (maskFF >> (32 - used))) << free_bits);
1656  acc >>= used;
1657 ret:
1658  accum_ = acc;
1659  used_bits_ = used;
1660  return value;
1661 }
1662 
1663 // ----------------------------------------------------------------------
1664 
1665 } // namespace bm
1666 
1667 #ifdef _MSVC_VER
1668 #pragma warning(pop)
1669 #endif
1670 
1671 #endif
Structure keeps all-left/right ON bits masks.
Definition: bmconst.h:343
void put_64(bm::id64_t w)
Puts 64 bits word into encoding buffer.
Definition: encoding.h:517
void put_8(unsigned char c)
Puts one character into the encoding buffer.
Definition: encoding.h:407
void bic_decode_u16_cm_dry(unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi)
Binary Interpolative array decode into /dev/null.
Definition: encoding.h:1405
const unsigned set_word_shift
Definition: bmconst.h:70
void bic_encode_u16_cm(const bm::gap_word_t *arr, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi)
Binary Interpolative encoding (array of 16-bit ints) cm - "center-minimal".
Definition: encoding.h:1156
void get_32_AND(bm::word_t *w, unsigned count)
Definition: encoding.h:876
unsigned char * position_type
Definition: encoding.h:52
unsigned long long int id64_t
Definition: bmconst.h:34
gamma_decoder(TBitIO &bin)
Definition: encoding.h:337
void memcpy(unsigned char *dst, size_t count)
read bytes from the decode buffer
Definition: encoding.h:573
decoder_base(const unsigned char *buf)
Definition: encoding.h:84
Base class for all decoding functionality.
Definition: encoding.h:81
void put_32(bm::word_t w)
Puts 32 bits word into encoding buffer.
Definition: encoding.h:499
Definition: bm.h:76
bool get_32_OR(bm::word_t *w, unsigned count)
Definition: encoding.h:854
T bit_scan_fwd(T v)
Definition: bmutil.h:283
unsigned get_bits(unsigned count)
read number of bits out of the stream
Definition: encoding.h:1628
void bic_encode_u16_rg(const bm::gap_word_t *arr, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi)
Binary Interpolative encoding (array of 16-bit ints)
Definition: encoding.h:1078
void set_pos(unsigned char *buf_pos)
Set current memory stream position.
Definition: encoding.h:488
void seek(int delta)
change current position
Definition: encoding.h:93
Byte based reader for un-aligned bit streaming.
Definition: encoding.h:239
void flush()
Flush the incomplete 32-bit accumulator word.
Definition: encoding.h:213
Class for decoding data from memory buffer.
Definition: encoding.h:112
gamma_encoder(TBitIO &bout)
Definition: encoding.h:316
Class for decoding data from memory buffer.
Definition: encoding.h:144
void stop()
Stop decoding sequence.
Definition: encoding.h:347
void operator()(T value)
Encode word.
Definition: encoding.h:320
decoder decoder_big_endian
Class for decoding data from memory buffer.
Definition: encoding.h:133
bool sse2_or_arr_unal(__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src, const __m128i *BMRESTRICT src_end)
OR array elements against another array (unaligned) dst |= *src.
Definition: bmsse_util.h:342
unsigned int word_t
Definition: bmconst.h:38
bit_in(TDecoder &decoder)
Definition: encoding.h:242
const unsigned char * start_
Definition: encoding.h:102
bm::short_t get_16()
Reads 16-bit word from the decoding buffer.
Definition: encoding.h:594
unsigned sse2_and_arr_unal(__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src, const __m128i *BMRESTRICT src_end)
AND array elements against another array (unaligned) dst &= *src.
Definition: bmsse_util.h:175
void bic_decode_u16_bitset(bm::word_t *block, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi)
Definition: encoding.h:261
bool get_32_OR(bm::word_t *w, unsigned count)
Reads block of 32-bit words from the decoding buffer and ORs to the destination.
Definition: encoding.h:686
unsigned short short_t
Definition: bmconst.h:39
unsigned short gap_word_t
Definition: bmconst.h:76
void put_bits(unsigned value, unsigned count)
issue count bits out of value
Definition: encoding.h:925
Elias Gamma decoder.
Definition: encoding.h:334
Byte based writer for un-aligned bit streaming.
Definition: encoding.h:165
bm::id64_t get_64()
Reads 64-bit word from the decoding buffer.
Definition: encoding.h:628
void bic_decode_u32_cm(bm::word_t *arr, unsigned sz, bm::word_t lo, bm::word_t hi)
Binary Interpolative array decode (32-bit)
Definition: encoding.h:1247
bm::short_t get_16()
Definition: encoding.h:799
void bic_decode_u16_dry(unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi)
Definition: encoding.h:266
void put_zero_bits(unsigned count)
issue specified number of 0s
Definition: encoding.h:976
void put_16(bm::short_t s)
Puts short word (16 bits) into the encoding buffer.
Definition: encoding.h:417
void bic_decode_u16_rg(bm::gap_word_t *arr, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi)
Binary Interpolative array decode.
Definition: encoding.h:1205
void get_32_AND(bm::word_t *w, unsigned count)
Reads block of 32-bit words from the decoding buffer and ANDs to the destination. ...
Definition: encoding.h:727
decoder(const unsigned char *buf)
Construction.
Definition: encoding.h:585
void put_prefixed_array_16(unsigned char c, const bm::short_t *s, unsigned count, bool encode_count)
Encode 8-bit prefix + an array.
Definition: encoding.h:390
unsigned char get_8()
Reads character from the decoding buffer.
Definition: encoding.h:87
void put_prefixed_array_32(unsigned char c, const bm::word_t *w, unsigned count)
Encode 8-bit prefix + an array.
Definition: encoding.h:379
unsigned char * get_pos() const
Get current memory stream position.
Definition: encoding.h:480
void memcpy(const unsigned char *src, size_t count)
copy bytes into target buffer or just rewind if src is NULL
Definition: encoding.h:459
const unsigned char * buf_
Definition: encoding.h:101
void bic_decode_u16_cm_bitset(bm::word_t *block, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi)
Binary Interpolative array decode into bitset (32-bit based)
Definition: encoding.h:1349
T operator()(void)
Decode word.
Definition: encoding.h:352
size_t size() const
Returns size of the current encoding stream.
Definition: encoding.h:472
bit_out(TEncoder &dest)
Definition: encoding.h:168
const unsigned set_word_mask
Definition: bmconst.h:71
decoder_little_endian(const unsigned char *buf)
Definition: encoding.h:793
void put_bit(unsigned value)
issue single bit into encode bit-stream
Definition: encoding.h:914
#define BMFORCEINLINE
Definition: bmdef.h:190
encoder(unsigned char *buf, size_t size)
Construction.
Definition: encoding.h:371
Functor for Elias Gamma encoding.
Definition: encoding.h:313
void bic_decode_u16_rg_dry(unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi)
Binary Interpolative array decode into /dev/null.
Definition: encoding.h:1502
void bic_decode_u16_cm(bm::gap_word_t *arr, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi)
Binary Interpolative array decode.
Definition: encoding.h:1298
#define BM_ASSERT
Definition: bmdef.h:117
void bic_decode_u16(bm::gap_word_t *arr, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi)
Binary Interpolative array decode.
Definition: encoding.h:255
unsigned gamma()
decode unsigned value using Elias Gamma coding
Definition: encoding.h:1543
void gamma(unsigned value)
Elias Gamma encode the specified value.
Definition: encoding.h:1006
void bic_decode_u16_rg_bitset(bm::word_t *block, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi)
Binary Interpolative array decode into bitset (32-bit based)
Definition: encoding.h:1456
const unsigned char * get_pos() const
Return current buffer pointer.
Definition: encoding.h:99
size_t size() const
Returns size of the current decoding stream.
Definition: encoding.h:90
void put_zero_bit()
issue 0 into output stream
Definition: encoding.h:967
Memory encoding.
Definition: encoding.h:49
void bic_encode_u16(const bm::gap_word_t *arr, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi)
Binary Interpolative array decode.
Definition: encoding.h:190
unsigned bit_scan_reverse32(unsigned value)
Definition: bmutil.h:290
Bit manipulation primitives (internal)
void bic_encode_u32_cm(const bm::word_t *arr, unsigned sz, bm::word_t lo, bm::word_t hi)
Binary Interpolative encoding (array of 32-bit ints) cm - "center-minimal".
Definition: encoding.h:1112
void start()
Start encoding sequence.
Definition: encoding.h:342
bm::word_t get_32()
Reads 32-bit word from the decoding buffer.
Definition: encoding.h:610