1 #ifndef BMFUNC__H__INCLUDED__ 2 #define BMFUNC__H__INCLUDED__ 32 # pragma warning( disable: 4146 ) 70 memory_used += mem_used;
71 max_serialize_mem += mem_used;
78 size_t mem_used = (capacity *
sizeof(
gap_word_t));
79 memory_used += mem_used;
80 max_serialize_mem += (unsigned)(length *
sizeof(
gap_word_t));
83 if (capacity == gap_levels[i])
95 bit_blocks = gap_blocks = ptr_sub_blocks = bv_count = 0;
96 max_serialize_mem = memory_used = 0;
98 gaps_by_level[i] = 0ull;
115 template<
typename First,
typename Second>
127 unsigned short bits[65];
141 static bool test() {
return true; }
145 static bool test() {
return false; }
180 unsigned v = (unsigned)v8;
197 #if defined(BMSSE42OPT) || defined(BMAVX2OPT) 213 tmp = n - ((n >> 1) & 033333333333)
214 - ((n >> 2) & 011111111111);
215 return ((tmp + (tmp >> 3)) & 030707070707) % 63;
226 #if defined(BMSSE42OPT) || defined(BMAVX2OPT) 227 #if defined(BM64_SSE4) || defined(BM64_AVX2) || defined(BM64_AVX512) 228 return unsigned(_mm_popcnt_u64(x));
230 return _mm_popcnt_u32(x >> 32) + _mm_popcnt_u32((
unsigned)x);
233 x = x - ((x >> 1) & 0x5555555555555555);
234 x = (x & 0x3333333333333333) + ((x >> 2) & 0x3333333333333333);
235 x = (x + (x >> 4)) & 0x0F0F0F0F0F0F0F0F;
252 x = x - ((x >> 1) & m1);
253 y = y - ((y >> 1) & m1);
254 u = u - ((u >> 1) & m1);
255 v = v - ((v >> 1) & m1);
256 x = (x & m2) + ((x >> 2) & m2);
257 y = (y & m2) + ((y >> 2) & m2);
258 u = (u & m2) + ((u >> 2) & m2);
259 v = (v & m2) + ((v >> 2) & m2);
262 x = (x & m3) + ((x >> 4) & m3);
263 u = (u & m3) + ((u >> 4) & m3);
269 return x & 0x000001FFU;
284 template<
typename T,
typename F>
287 for (
unsigned sub_octet = 0; w != 0; w >>= 4, sub_octet += 4)
300 func(sub_octet, sub_octet + 1);
306 func(sub_octet, sub_octet + 2);
309 func(sub_octet + 1, sub_octet + 2);
312 func(sub_octet, sub_octet + 1, sub_octet + 2);
318 func(sub_octet, sub_octet + 3);
321 func(sub_octet + 1, sub_octet + 3);
324 func(sub_octet, sub_octet + 1, sub_octet + 3);
327 func(sub_octet + 2, sub_octet + 3);
330 func(sub_octet, sub_octet + 2, sub_octet + 3);
333 func(sub_octet + 1, sub_octet + 2, sub_octet + 3);
336 func(sub_octet, sub_octet + 1, sub_octet + 2, sub_octet + 3);
354 template<
typename T,
typename F>
358 for (
unsigned octet = 0; w != 0; w >>= 8, octet += 8)
360 if (w & 1) func(octet + 0);
361 if (w & 2) func(octet + 1);
362 if (w & 4) func(octet + 2);
363 if (w & 8) func(octet + 3);
364 if (w & 16) func(octet + 4);
365 if (w & 32) func(octet + 5);
366 if (w & 64) func(octet + 6);
367 if (w & 128) func(octet + 7);
388 bp_[0] = (B)bit_idx0; bp_[1] = (B)bit_idx1;
396 bp_[0] = (B)bit_idx0; bp_[1] = (B)bit_idx1; bp_[2] = (B)bit_idx2;
405 bp_[0] = (B)bit_idx0; bp_[1] = (B)bit_idx1;
406 bp_[2] = (B)bit_idx2; bp_[3] = (B)bit_idx3;
426 template<
typename T,
typename B>
unsigned bit_list(T w, B* bits)
430 return (
unsigned)(func.
ptr() - bits);
443 template<
typename T,
typename B>
unsigned bit_list_4(T w, B* bits)
447 return (
unsigned)(func.
ptr() - bits);
470 return (
unsigned short)pos;
492 return (
unsigned short)pos;
508 unsigned short pos = 0;
518 template<
typename V,
typename B>
548 for (
unsigned count = 0; w; w >>=1ull, ++count)
550 rank -= unsigned(w & 1ull);
595 #if defined(BMI2_SELECT64) 596 return BMI2_SELECT64(w, rank);
598 #if defined(BMI1_SELECT64) 599 return BMI2_SELECT64(w, rank);
641 return ((maskFF) >> (63 - (digest_to - digest_from))) << digest_from;
660 return !(digest & mask);
675 for (
unsigned i = 0; i < 64; ++i)
679 #if defined(VECT_BLOCK_SET_DIGEST) 684 block[off+j+0] = block[off+j+1] =
685 block[off+j+2] = block[off+j+3] = mask;
707 for (
unsigned i = 0; i < 64; ++i)
710 #if defined(VECT_IS_DIGEST_ZERO) 718 block[off+j+0] | block[off+j+1] | block[off+j+2] | block[off+j+3];
721 digest0 |= (mask << i);
755 #if defined(VECT_IS_DIGEST_ZERO) 757 digest &= all_zero ? ~(mask << wave) : digest;
766 src_u->w64[j+0] | src_u->w64[j+1] | src_u->w64[j+2] | src_u->w64[j+3];
768 }
while ((j < bm::set_block_digest_wave_size/2) & !w64);
769 digest &= w64 ? digest : ~(mask << wave);
819 ::memset(_p, 0xFF,
sizeof(_p));
822 const unsigned long long magic_mask = 0xFFFFfffeFFFFfffe;
823 ::memcpy(&_p_fullp, &magic_mask,
sizeof(magic_mask));
825 _s[i] = reinterpret_cast<bm::word_t*>(magic_mask);
829 const unsigned magic_mask = 0xFFFFfffe;
830 ::memcpy(&_p_fullp, &magic_mask,
sizeof(magic_mask));
832 _s[i] = reinterpret_cast<bm::word_t*>(magic_mask);
845 bm::id64_t w =
reinterpret_cast<unsigned long long>(bp);
847 ((bp == _block._p) << 1);
848 type = type ? type : w;
852 unsigned w =
reinterpret_cast<unsigned long>(bp);
854 ((bp == _block._p) << 1);
855 type = type ? type : w;
862 {
return (bp == _block._p || bp == _block._p_fullp); }
866 {
return (bp && !(bp == _block._p || bp == _block._p_fullp)); }
897 const unsigned unroll_factor = 4;
898 const unsigned len = (size - start);
899 const unsigned len_unr = len - (len % unroll_factor);
903 for (k = 0; k < len_unr; k+=unroll_factor)
905 if (!avx2_test_all_zero_wave(arr+k))
914 *pos = k + start + 1;
919 *pos = k + start + 2;
924 *pos = k + start + 3;
939 for (; start < size; ++start)
969 int res = (w1 & 1) - (w2 & 1);
970 if (res != 0)
return res;
997 return diff? ( (a & diff & -diff)? 1 : -1 ) : 0;
1014 #if defined(VECT_IS_ZERO_BLOCK) 1021 if (blk[0] | blk[1] | blk[2] | blk[3])
1024 }
while (blk < blk_end);
1081 template<
typename T>
1084 return glevel_len[(*buf >> 1) & 3];
1096 template<
typename T>
1099 return glevel_len[(*buf >> 1) & 3]-4;
1110 template<
typename T>
1113 return T((*buf >> 1) & 3u);
1127 template<
typename T>
1132 T is_set = (*buf) & 1u;
1133 T end = T((*buf) >> 3u);
1137 is_set ^= T((end-1) & 1u);
1157 template<
typename T>
1162 T is_set = (*buf) & 1u;
1170 *first = buf[1] + 1;
1184 template<
typename T>
1185 unsigned gap_bfind(
const T* buf,
unsigned pos,
unsigned* is_set)
1188 *is_set = (*buf) & 1;
1191 unsigned end = 1 + ((*buf) >> 3);
1193 while ( start != end )
1195 unsigned curr = (start + end) >> 1;
1196 if ( buf[curr] < pos )
1201 *is_set ^= ((start-1) & 1);
1213 template<
typename T>
unsigned gap_test(
const T* buf,
unsigned pos)
1218 unsigned end = 1 + ((*buf) >> 3);
1219 if (end - start < 10)
1221 unsigned sv = *buf & 1;
1222 unsigned sv1= sv ^ 1;
1223 if (buf[1] >= pos)
return sv;
1224 if (buf[2] >= pos)
return sv1;
1225 if (buf[3] >= pos)
return sv;
1226 if (buf[4] >= pos)
return sv1;
1227 if (buf[5] >= pos)
return sv;
1228 if (buf[6] >= pos)
return sv1;
1229 if (buf[7] >= pos)
return sv;
1230 if (buf[8] >= pos)
return sv1;
1231 if (buf[9] >= pos)
return sv;
1236 while (start != end)
1238 unsigned curr = (start + end) >> 1;
1239 if (buf[curr] < pos)
1245 return ((*buf) & 1) ^ ((--start) & 1);
1255 template<
typename T>
1264 #if defined(BMSSE2OPT) 1266 unsigned end = 1 + ((*buf) >> 3);
1267 unsigned dsize = end - start;
1272 unsigned res = ((*buf) & 1) ^ ((start) & 1);
1274 BM_ASSERT(buf[start] < pos || (start == 0));
1278 unsigned arr_end = end;
1279 while (start != end)
1281 unsigned curr = (start + end) >> 1;
1282 if (buf[curr] < pos)
1287 unsigned size = end - start;
1290 size += (end != arr_end);
1295 BM_ASSERT(buf[start - 1] < pos || (start == 1));
1300 unsigned res = ((*buf) & 1) ^ ((--start) & 1);
1305 #elif defined(BMSSE42OPT) 1307 unsigned end = 1 + ((*buf) >> 3);
1308 unsigned dsize = end - start;
1313 unsigned res = ((*buf) & 1) ^ ((start) & 1);
1315 BM_ASSERT(buf[start] < pos || (start==0));
1319 unsigned arr_end = end;
1320 while (start != end)
1322 unsigned curr = (start + end) >> 1;
1323 if (buf[curr] < pos)
1328 unsigned size = end - start;
1331 size += (end != arr_end);
1336 BM_ASSERT(buf[start - 1] < pos || (start == 1));
1341 unsigned res = ((*buf) & 1) ^ ((--start) & 1);
1344 #elif defined(BMAVX2OPT) 1345 unsigned res = bm::avx2_gap_test(buf, pos);
1356 template<
typename T,
typename N,
typename F>
1360 if (nb_from > nb_to)
1367 if (i_from >= top_size)
1369 if (i_to >= top_size)
1375 for (
unsigned i = i_from; i <= i_to; ++i)
1377 T** blk_blk = root[i];
1384 unsigned j = (i == i_from) ? j_from : 0;
1385 if (!j && (i != i_to))
1394 if ((i == i_to) && (j == j_to))
1401 unsigned j = (i == i_from) ? j_from : 0;
1408 if ((i == i_to) && (j == j_to))
1419 template<
class T,
class F>
1422 typedef typename F::size_type size_type;
1423 for (
unsigned i = 0; i < size1; ++i)
1425 T** blk_blk = root[i];
1431 f.on_non_empty_top(i);
1439 }
while (++j < bm::set_sub_array_size);
1443 unsigned non_empty_top = 0;
1448 #if defined(BM64_AVX2) || defined(BM64_AVX512) 1449 if (!avx2_test_all_zero_wave(blk_blk + j))
1452 T* blk0 = blk_blk[j + 0];
1453 T* blk1 = blk_blk[j + 1];
1454 T* blk2 = blk_blk[j + 2];
1455 T* blk3 = blk_blk[j + 3];
1457 size_type block_idx = r + j + 0;
1461 f.on_empty_block(block_idx);
1464 f(blk1, block_idx + 1);
1466 f.on_empty_block(block_idx + 1);
1469 f(blk2, block_idx + 2);
1471 f.on_empty_block(block_idx + 2);
1474 f(blk3, block_idx + 3);
1476 f.on_empty_block(block_idx + 3);
1480 f.on_empty_block(r + j + 0); f.on_empty_block(r + j + 1);
1481 f.on_empty_block(r + j + 2); f.on_empty_block(r + j + 3);
1484 #elif defined(BM64_SSE4) 1488 T* blk0 = blk_blk[j + 0];
1489 T* blk1 = blk_blk[j + 1];
1491 size_type block_idx = r + j + 0;
1495 f.on_empty_block(block_idx);
1501 f.on_empty_block(block_idx);
1505 f.on_empty_block(r + j + 0);
1506 f.on_empty_block(r + j + 1);
1512 f(blk_blk[j], r + j);
1516 f.on_empty_block(r + j);
1519 }
while (j < bm::set_sub_array_size);
1521 if (non_empty_top == 0)
1528 template<
class T,
class F>
1532 for (
unsigned i = 0; i < size1; ++i)
1535 if ((blk_blk = root[i])!=0)
1541 f(FULL_BLOCK_FAKE_ADDR);
1549 w0 = _mm_loadu_si128((__m128i*)(blk_blk + j));
1550 if (!_mm_testz_si128(w0, w0))
1552 T* blk0 = blk_blk[j + 0];
1553 T* blk1 = blk_blk[j + 1];
1560 w0 = _mm_loadu_si128((__m128i*)(blk_blk + j + 2));
1561 if (!_mm_testz_si128(w0, w0))
1563 T* blk0 = blk_blk[j + 2];
1564 T* blk1 = blk_blk[j + 3];
1574 #elif defined(BM64_AVX2) || defined(BM64_AVX512) 1575 for (
unsigned i = 0; i < size1; ++i)
1578 if ((blk_blk = root[i]) != 0)
1584 f(FULL_BLOCK_FAKE_ADDR);
1591 __m256i w0 = _mm256_loadu_si256((__m256i*)(blk_blk + j));
1592 if (!_mm256_testz_si256(w0, w0))
1596 T* blk0 = blk_blk[j + 0];
1597 T* blk1 = blk_blk[j + 1];
1598 T* blk2 = blk_blk[j + 2];
1599 T* blk3 = blk_blk[j + 3];
1614 #else // non-SIMD mode 1615 for (
unsigned i = 0; i < size1; ++i)
1618 if ((blk_blk = root[i])!=0)
1624 f(FULL_BLOCK_FAKE_ADDR);
1650 template<
typename T,
typename BI,
typename F>
1654 for (BI i = 0; i < size1; ++i)
1656 T** blk_blk = root[i];
1666 if (f(FULL_BLOCK_FAKE_ADDR, block_idx))
1675 if (f(blk_blk[j], block_idx))
1684 template<
class T,
class F,
typename BLOCK_IDX>
1687 BLOCK_IDX block_idx = start;
1688 for (
unsigned i = 0; i < size1; ++i)
1690 T** blk_blk = root[i];
1697 f(FULL_BLOCK_FAKE_ADDR, block_idx);
1703 f(blk_blk[j], block_idx);
1726 }
while (first < last);
1732 template<
typename T>
1736 while (first < last)
1755 const T* pcurr = buf;
1757 dsize = (*pcurr >> 3);
1759 const T* pend = pcurr + dsize;
1761 unsigned bits_counter = 0;
1766 bits_counter += *pcurr + 1;
1771 while (pcurr <= pend)
1773 bits_counter += *pcurr - *(pcurr-1);
1777 return bits_counter;
1788 const T* pcurr = buf;
1789 unsigned dsize = (*pcurr >> 3);
1793 T first_one = *buf & 1;
1801 #if defined(BMAVX2OPT) || defined(BMAVX512OPT) 1804 const unsigned unr_factor = 32;
1805 unsigned waves = (dsize-2) / unr_factor;
1806 pcurr = avx2_gap_sum_arr(pcurr, waves, &cnt);
1808 #elif defined(BMSSE42OPT) || defined(BMSSE2OPT) 1811 const unsigned unr_factor = 16;
1812 unsigned waves = (dsize - 2) / unr_factor;
1818 const unsigned unr_factor = 8;
1819 unsigned waves = (dsize - 2) / unr_factor;
1820 for (
unsigned i = 0; i < waves; i += unr_factor)
1822 cnt += pcurr[0] - pcurr[0 - 1];
1823 cnt += pcurr[2] - pcurr[2 - 1];
1824 cnt += pcurr[4] - pcurr[4 - 1];
1825 cnt += pcurr[6] - pcurr[6 - 1];
1827 pcurr += unr_factor;
1832 const T* pend = buf + dsize;
1833 for ( ; pcurr <= pend ; pcurr+=2)
1835 cnt += *pcurr - *(pcurr - 1);
1851 template<
typename T>
1856 const T* pcurr = buf;
1857 const T* pend = pcurr + (*pcurr >> 3);
1859 unsigned bits_counter = 0;
1862 is_set = ~(is_set - 1u);
1864 pcurr = buf + start_pos;
1865 if (right <= *pcurr)
1867 bits_counter = unsigned(right - left + 1u) & is_set;
1868 return bits_counter;
1870 bits_counter += unsigned(*pcurr - left + 1u) & is_set;
1872 unsigned prev_gap = *pcurr++;
1873 for (is_set ^= ~0u; right > *pcurr; is_set ^= ~0u)
1875 bits_counter += (*pcurr - prev_gap) & is_set;
1877 return bits_counter;
1878 prev_gap = *pcurr++;
1880 bits_counter += unsigned(right - prev_gap) & is_set;
1881 return bits_counter;
1897 template<
typename T,
typename SIZE_TYPE>
1906 const T* pcurr = block;
1907 const T* pend = pcurr + (*pcurr >> 3);
1909 unsigned bits_counter = 0;
1911 unsigned start_pos =
bm::gap_bfind(block, nbit_from, &is_set);
1912 is_set = ~(is_set - 1u);
1914 pcurr = block + start_pos;
1915 bits_counter += unsigned(*pcurr - nbit_from + 1u) & is_set;
1916 if (bits_counter >= rank)
1918 nbit_pos = nbit_from + unsigned(rank) - 1u;
1921 rank -= bits_counter;
1922 unsigned prev_gap = *pcurr++;
1923 for (is_set ^= ~0u; pcurr <= pend; is_set ^= ~0u)
1925 bits_counter = (*pcurr - prev_gap) & is_set;
1926 if (bits_counter >= rank)
1928 nbit_pos = prev_gap + unsigned(rank);
1931 rank -= bits_counter;
1932 prev_gap = *pcurr++;
1947 template<
typename T>
1950 const T* pcurr = buf;
1951 const T* pend = pcurr + (*pcurr >> 3);
1953 unsigned bits_counter = 0;
1954 unsigned is_set = ~((unsigned(*buf) & 1u) - 1u);
1955 BM_ASSERT(is_set == 0u || is_set == ~0u);
1958 if (right <= *pcurr)
1960 bits_counter = (right + 1u) & is_set;
1961 return bits_counter;
1963 bits_counter += (*pcurr + 1u) & is_set;
1965 unsigned prev_gap = *pcurr++;
1966 for (is_set ^= ~0u; right > *pcurr; is_set ^= ~0u)
1968 bits_counter += (*pcurr - prev_gap) & is_set;
1970 return bits_counter;
1971 prev_gap = *pcurr++;
1973 bits_counter += (right - prev_gap) & is_set;
1974 return bits_counter;
1987 template<
class T,
class Func>
1990 const T* pcurr = gap_buf;
1991 const T* pend = pcurr + (*pcurr >> 3);
1995 func((T)(prev + 1));
1999 func((T)(*pcurr - prev));
2001 }
while (++pcurr < pend);
2028 template<
typename T>
2029 T*
gap_2_dgap(
const T* gap_buf, T* dgap_buf,
bool copy_head=
true)
2033 *dgap_buf++ = *gap_buf;
2037 for_each_dgap<T, d_copy_func<T> >(gap_buf, copy_func);
2053 template<
typename T>
2056 const T* pcurr = dgap_buf;
2061 *gap_buf++ = *pcurr++;
2065 len = gap_header >> 3;
2066 *gap_buf++ = gap_header;
2069 const T* pend = pcurr + len;
2071 *gap_buf = *pcurr++;
2075 *gap_buf = T(*gap_buf - 1);
2077 for (++gap_buf; pcurr < pend; ++pcurr)
2079 T prev = *(gap_buf-1);
2080 *gap_buf++ = T(*pcurr + prev);
2094 template<
typename T>
int gapcmp(
const T* buf1,
const T* buf2)
2096 const T* pcurr1 = buf1;
2097 const T* pend1 = pcurr1 + (*pcurr1 >> 3);
2098 unsigned bitval1 = *buf1 & 1;
2101 const T* pcurr2 = buf2;
2102 unsigned bitval2 = *buf2 & 1;
2105 while (pcurr1 <= pend1)
2107 if (*pcurr1 == *pcurr2)
2109 if (bitval1 != bitval2)
2111 return (bitval1) ? 1 : -1;
2116 if (bitval1 == bitval2)
2120 return (*pcurr1 < *pcurr2) ? -1 : 1;
2124 return (*pcurr1 < *pcurr2) ? 1 : -1;
2129 return (bitval1) ? 1 : -1;
2160 template<
typename T,
class F>
2163 unsigned vect1_mask,
2165 unsigned vect2_mask,
2169 const T* cur1 = vect1;
2170 const T* cur2 = vect2;
2172 T bitval1 = (T)((*cur1++ & 1) ^ vect1_mask);
2173 T bitval2 = (T)((*cur2++ & 1) ^ vect2_mask);
2175 T bitval = (T) f(bitval1, bitval2);
2176 T bitval_prev = bitval;
2182 T c1 = *cur1; T c2 = *cur2;
2185 bitval = (T) f(bitval1, bitval2);
2190 res += (bitval != bitval_prev);
2191 bitval_prev = bitval;
2211 bitval1 ^= 1; bitval2 ^= 1;
2218 dlen = (unsigned)(res - dest);
2219 *dest = (T)((*dest & 7) + (dlen << 3));
2236 template<
typename T,
class F>
2238 unsigned vect1_mask,
2240 unsigned vect2_mask,
2243 const T* cur1 = vect1;
2244 const T* cur2 = vect2;
2246 unsigned bitval1 = (*cur1++ & 1) ^ vect1_mask;
2247 unsigned bitval2 = (*cur2++ & 1) ^ vect2_mask;
2249 unsigned bitval = f(bitval1, bitval2);
2252 unsigned bitval_prev = bitval;
2256 bitval = f(bitval1, bitval2);
2260 if (bitval != bitval_prev)
2261 bitval_prev = bitval;
2305 template<
typename T,
class F>
2309 const T* cur1 = vect1;
2310 const T* cur2 = vect2;
2312 unsigned bitval1 = (*cur1++ & 1);
2313 unsigned bitval2 = (*cur2++ & 1);
2314 unsigned bitval = count = f(bitval1, bitval2);
2315 unsigned bitval_prev = bitval;
2324 bitval = f(bitval1, bitval2);
2328 if (bitval != bitval_prev)
2330 bitval_prev = bitval;
2339 count += res - res_prev;
2350 count += res - res_prev;
2390 template<
typename T>
2397 unsigned curr =
gap_bfind(buf, pos, is_set);
2399 T end = (T)(*buf >> 3);
2407 T* pcurr = buf + curr;
2408 T* pprev = pcurr - 1;
2409 T* pend = buf + end;
2418 ::memmove(&buf[2], &buf[1], (end - 1) *
sizeof(
gap_word_t));
2428 *pprev++ = *pcurr++;
2429 }
while (pcurr < pend);
2433 else if (curr > 1 && ((
unsigned)(*pprev))+1 == pos)
2436 if (*pprev == *pcurr)
2445 *pprev++ = *pcurr++;
2446 }
while (pcurr < pend);
2450 else if (*pcurr == pos)
2460 ::memmove(pcurr+2, pcurr,(end - curr + 1)*
sizeof(T));
2461 *pcurr++ = (T)(pos - 1);
2467 *buf = (T)((*buf & 7) + (end << 3));
2483 template<
typename T>
2488 T end = (T)(*buf >> 3);
2490 T* pcurr = buf + end;
2492 T* pprev = pcurr - 1;
2501 ::memmove(&buf[2], &buf[1], (end - 1) *
sizeof(
gap_word_t));
2511 *pprev++ = *pcurr++;
2512 }
while (pcurr < pend);
2516 else if (((
unsigned)(*pprev))+1 == pos && (curr > 1) )
2519 if (*pprev == *pcurr)
2529 *pprev++ = *pcurr++;
2530 }
while (pcurr < pend);
2534 else if (*pcurr == pos)
2544 *pcurr++ = (T)(pos - 1);
2550 *buf = (T)((*buf & 7) + (end << 3));
2564 template<
typename T>
2571 unsigned bitval = *buf & 1;
2578 unsigned len = (*buf >> 3);
2580 for (; i < len; ++i)
2590 *buf = (T)((*buf & 7) + (len << 3));
2611 template<
typename T>
2619 unsigned bitval = *buf & 1;
2629 BM_ASSERT(bitval ==
unsigned(*buf & 1u));
2641 unsigned len = (*buf >> 3);
2643 for (; i < len; ++i)
2669 template<
typename T>
2672 *buf = (T)((*buf & 6u) + (1u << 3));
2680 *pcurr = (T)(curr - 1);
2690 for (i = 1; i < len; ++i)
2693 if (curr == prev + 1)
2702 *pcurr++ = (T)(curr-1);
2713 unsigned gap_len = unsigned(pcurr - buf);
2714 BM_ASSERT(gap_len == ((gap_len << 3) >> 3));
2716 *buf = (T)((*buf & 7) + (gap_len << 3));
2730 template<
typename T>
2734 unsigned gap_count = 1;
2738 for (
unsigned i = 1; i < len; ++i)
2741 if (curr != prev + 1)
2763 template<
typename T>
2779 unsigned val = buf[gap_idx] + 1;
2795 dest[nword] |= unsigned(1u << nbit);
2808 dest[nword] &= ~(unsigned(1u << nbit));
2817 unsigned test_bit(
const unsigned* block,
unsigned bitpos)
2822 return (block[nword] >> nbit) & 1u;
2837 const unsigned maskFF = ~0u;
2844 *dest |= (1u << bitpos);
2850 unsigned mask_r = maskFF << bitpos;
2851 unsigned right_margin = bitpos + bitcount;
2852 if (right_margin < 32)
2854 *dest |= (maskFF >> (32 - right_margin)) & mask_r;
2858 bitcount -= 32 - bitpos;
2860 for ( ;bitcount >= 64; bitcount-=64, dest+=2)
2861 dest[0] = dest[1] = maskFF;
2864 *dest++ = maskFF; bitcount -= 32;
2868 *dest |= maskFF >> (32 - bitcount);
2884 const unsigned maskFF = ~0u;
2891 *dest &= ~(1u << bitpos);
2897 unsigned mask_r = maskFF << bitpos;
2898 unsigned right_margin = bitpos + bitcount;
2899 if (right_margin < 32)
2901 *dest &= ~((maskFF >> (32 - right_margin)) & mask_r);
2905 bitcount -= 32 - bitpos;
2907 for ( ;bitcount >= 64; bitcount-=64, dest+=2)
2908 dest[0] = dest[1] = 0u;
2911 *dest++ = 0u; bitcount -= 32;
2915 *dest &= ~(maskFF >> (32 - bitcount));
2941 *word ^= unsigned(1 << nbit);
2947 unsigned right_margin = nbit + bitcount;
2952 if (right_margin < 32)
2961 bitcount -= 32 - nbit;
2964 for ( ;bitcount >= 64; bitcount-=64, word+=2)
2966 word[0] ^= ~0u; word[1] ^= ~0u;
2970 *word++ ^= ~0u; bitcount -= 32;
2984 template<
typename T>
2989 const T* pend = pcurr + (*pcurr >> 3);
2995 for (pcurr += 2; pcurr <= pend; pcurr += 2)
3012 template<
typename T>
3017 const T* pend = pcurr + (*pcurr >> 3);
3032 for (; pcurr <= pend; pcurr += 2)
3034 if (*pcurr >= start_pos)
3044 for (; pcurr <= pend; pcurr += 2)
3070 template<
typename T>
3075 const T* pend = pcurr + (*pcurr >> 3);
3081 for (pcurr += 2; pcurr <= pend; pcurr += 2)
3097 template<
typename T>
3102 const T* pend = pcurr + len;
3112 for (; pcurr <= pend; )
3115 pos = 1u + pcurr[-1];
3116 bc = *pcurr - pcurr[-1];
3130 template<
typename T>
3133 unsigned len = (*pcurr >> 3);
3145 template<
typename T>
3150 const T* pend = pcurr + (*pcurr >> 3);
3160 for (; pcurr <= pend; )
3163 pos = 1u + pcurr[-1];
3164 bc = *pcurr - pcurr[-1];
3179 template<
typename T>
3186 const T* pend = pcurr + (*pcurr >> 3);
3201 for (; pcurr <= pend; pcurr += 2)
3203 if (*pcurr >= start_pos)
3213 for (; pcurr <= pend; pcurr += 2)
3240 template<
typename T>
3244 const T* pend = pcurr + (*pcurr >> 3);
3251 for (pcurr +=2 ;pcurr <= pend; pcurr += 2)
3267 template<
typename T>
3272 const T* pend = pcurr + (*pcurr >> 3);
3279 for (pcurr +=2 ;!count && pcurr <= pend; pcurr += 2)
3296 template<
typename T>
3301 const T* pcurr = buf;
3302 const T* pend = pcurr + (*pcurr >> 3);
3314 for (;pcurr <= pend; pcurr+=2)
3330 template<
typename T>
3335 const T* pcurr = buf;
3336 const T* pend = pcurr + (*pcurr >> 3);
3350 for (; !count && pcurr <= pend; pcurr+=2)
3367 template<
typename T>
3372 const T* pcurr = buf;
3373 const T* pend = pcurr + (*pcurr >> 3);
3376 unsigned bitval = *buf & 1;
3381 count = *pcurr + 1 - count;
3384 for (bitval^=1, ++pcurr; pcurr <= pend; bitval^=1, ++pcurr)
3386 T prev = (T)(*(pcurr-1)+1);
3390 c = (*pcurr - prev + 1) - c;
3404 template<
typename T>
3409 const T* pcurr = buf;
3410 const T* pend = pcurr + (*pcurr >> 3);
3413 unsigned bitval = *buf & 1;
3417 count = *pcurr + 1 - count;
3419 for (bitval^=1, ++pcurr; !count && pcurr <= pend; bitval^=1, ++pcurr)
3421 T prev = (T)(*(pcurr-1)+1);
3425 c = (*pcurr - prev + 1) - c;
3441 template<
typename T>
3446 const T* pcurr = buf;
3447 const T* pend = pcurr + (*pcurr >> 3);
3450 unsigned bitval = *buf & 1;
3452 bm::id_t count = bitval ? *pcurr + 1
3454 for (bitval^=1, ++pcurr; pcurr <= pend; bitval^=1, ++pcurr)
3456 T prev = (T)(*(pcurr-1)+1);
3458 bitval ? (*pcurr - prev + 1)
3473 template<
typename T>
3509 template<
typename T>
3530 template<
typename T>
3535 if (buf[1] == set_max - 1)
3554 unsigned end = *buf >> 3;
3556 const T* pcurr = buf;
3557 const T* pend = pcurr + (*pcurr >> 3);
3566 while (pcurr <= pend)
3588 *buf = (T)((*buf & 6u) + (1u << 3) + value);
3589 *(++buf) = (T)(set_max - 1);
3615 if (to == set_max - 1)
3623 buf[2] = (T)(set_max - 1);
3624 buf[0] = (T)((*buf & 6u) + (gap_len << 3) + value);
3631 if (to == set_max - 1)
3634 buf[1] = (T)(from - 1);
3635 buf[2] = (T)(set_max - 1);
3640 buf[1] = (T) (from - 1);
3642 buf[3] = (T)(set_max - 1);
3644 buf[0] = (T)((*buf & 6u) + (gap_len << 3) + value);
3661 #pragma GCC diagnostic push 3662 #pragma GCC diagnostic ignored "-Wconversion" 3672 template<
typename T>
3678 *buf = (T)(((level & 3) << 1) | (*buf & 1) | (*buf & ~7));
3681 #pragma GCC diagnostic pop 3694 template<
typename T>
3697 if (len <=
unsigned(glevel_len[0]-4))
return 0;
3698 if (len <=
unsigned(glevel_len[1]-4))
return 1;
3699 if (len <=
unsigned(glevel_len[2]-4))
return 2;
3700 if (len <=
unsigned(glevel_len[3]-4))
return 3;
3715 template<
typename T>
3720 return capacity - len;
3732 template<
typename T>
3733 int bitcmp(
const T* buf1,
const T* buf2,
unsigned len)
3736 const T* pend1 = buf1 + len;
3743 return (w1 & diff & -diff) ? 1 : -1;
3744 }
while (buf1 < pend1);
3769 unsigned bitval = (*block) & 1u;
3772 unsigned bit_idx = 0;
3776 unsigned val = *block;
3777 while (!val || val == ~0u)
3779 if (bitval !=
unsigned(
bool(val)))
3783 BM_ASSERT((pcurr-1) == (dest+1) || *(pcurr-1) > *(pcurr-2));
3786 bit_idx += unsigned(
sizeof(*block) * 8);
3787 if (++block >= block_end)
3794 unsigned bits_consumed = 0;
3798 if (bitval != (val & 1u))
3802 BM_ASSERT((pcurr-1) == (dest+1) || *(pcurr-1) > *(pcurr-2));
3812 bits_consumed += tz;
3818 if (bits_consumed < 32u)
3822 bit_idx += 32u - bits_consumed;
3823 BM_ASSERT((pcurr-1) == (dest+1) || *(pcurr-1) > *(pcurr-2));
3830 }
while(++block < block_end);
3834 unsigned len = (unsigned)(pcurr - dest);
3835 *dest = (
gap_word_t)((*dest & 7) + (len << 3));
3845 #if defined(VECT_BIT_TO_GAP) 3846 return VECT_BIT_TO_GAP(dest, block, dest_len);
3857 template<
class T,
class F>
3860 const T* pcurr = buf;
3861 const T* pend = pcurr + (*pcurr >> 3);
3871 unsigned to = *pcurr;
3872 for (
unsigned i = 0; i <= to; ++i)
3885 while (pcurr <= pend)
3887 unsigned from = *(pcurr-1)+1;
3888 unsigned to = *pcurr;
3891 func(from - prev + first_inc);
3899 for (
unsigned i = from+1; i <= to; ++i)
3912 template<
typename D,
typename T>
3916 bool invert =
false)
3919 BMREGISTER const T* pend = pcurr + (*pcurr >> 3);
3924 int bitval = (*buf) & 1;
3930 if (
unsigned(*pcurr + 1) >= dest_len)
3943 while (pcurr <= pend)
3945 unsigned pending = *pcurr - *(pcurr-1);
3946 if (pending >= dest_len)
3948 dest_len -= pending;
3949 T from = (T)(*(pcurr-1)+1);
3951 for (T i = from; ;++i)
3958 return (D) (dest_curr - dest);
4017 }
while (block < block_end);
4056 }
while (j < bm::set_block_digest_wave_size/2);
4089 }
while (block < block_end);
4115 count -= (w >> ((
sizeof(w) * 8) - 1));
4127 unsigned gap_count = 1;
4132 const int w_shift = int(
sizeof(w) * 8 - 1);
4135 gap_count -= (w_prev = (w0 >> w_shift));
4138 for (++block ;block < block_end; ++block)
4144 gap_count -= !w_prev;
4153 gap_count -= (w0 >> w_shift);
4154 gap_count -= !(w_prev ^ w_l);
4156 w_prev = (w0 >> w_shift);
4175 #if defined(VECT_BLOCK_CHANGE) 4198 unsigned nword, nbit, bitcount, count;
4204 return (*word >> nbit) & 1u;
4208 bitcount = right - left + 1u;
4211 unsigned right_margin = nbit + right - left;
4212 if (right_margin < 32)
4220 bitcount -= 32 - nbit;
4226 #if defined(BM64_SSE4) || defined(BM64_AVX2) || defined(BM64_AVX512) 4227 for ( ;bitcount >= 64; bitcount-=64)
4230 count += unsigned(_mm_popcnt_u64(w64));
4239 for ( ;bitcount >= 32; bitcount-=32, ++word)
4268 unsigned bitcount = right + 1;
4271 #if defined(BMAVX2OPT) || defined(BMAVX512OPT) 4272 BM_AVX2_POPCNT_PROLOG
4274 __m256i cnt = _mm256_setzero_si256();
4277 for ( ;bitcount >= 256; bitcount -= 256)
4279 const __m256i* src = (__m256i*)block;
4280 __m256i xmm256 = _mm256_load_si256(src);
4281 BM_AVX2_BIT_COUNT(bc, xmm256);
4282 cnt = _mm256_add_epi64(cnt, bc);
4287 count += (unsigned)(cnt64[0] + cnt64[1] + cnt64[2] + cnt64[3]);
4290 for ( ;bitcount >= 64; bitcount -= 64)
4322 block[i] = (block[i] << 1) | (block[i + 1] >> 31);
4336 const unsigned unroll_factor = 4;
4342 w0 = block[i + 1] >> 31;
4343 w1 = block[i + 2] >> 31;
4344 w2 = block[i + 3] >> 31;
4345 w3 = block[i + 4] >> 31;
4347 block[0 + i] = (block[0 + i] << 1) | w0;
4348 block[1 + i] = (block[1 + i] << 1) | w1;
4349 block[2 + i] = (block[2 + i] << 1) | w2;
4350 block[3 + i] = (block[3 + i] << 1) | w3;
4352 block[i] = (block[i] << 1) | (block[i + 1] >> 31);
4353 block[i + 1] = (block[i + 1] << 1) | (block[i + 2] >> 31);
4354 block[i + 2] = (block[i + 2] << 1) | (block[i + 3] >> 31);
4386 block[nword] = w | (unsigned(value) << nbit) | wl;
4398 w = (w << 1u) | co_flag;
4428 acc |= w = (w << 1u) | co_flag;
4451 #if defined(VECT_SHIFT_R1) 4481 acc |= w = (w >> 1u) | (co_flag << 31u);
4505 #if defined(VECT_SHIFT_L1) 4543 w = (w >> 1u) | (co_flag << 31u);
4555 w |= wl | (co_flag << 31u);
4560 block[nword] = (block[nword] >> 1u) | (co_flag << 31u);
4595 for (; di < 64; ++di)
4608 w = (w << 1u) | co_flag;
4609 acc |= block[i] = w & mask_block[i];
4617 BM_ASSERT(block[d_base + bm::set_block_digest_wave_size -1]==0);
4625 block[d_base] = co_flag & mask_block[d_base];
4657 #if defined(VECT_SHIFT_R1_AND) 4679 unsigned nbit = left;
4687 return (*word >> nbit) & 1;
4690 unsigned bitcount = right - left + 1;
4694 unsigned right_margin = nbit + (right - left);
4695 if (right_margin < 32)
4708 bitcount -= 32 - nbit;
4714 for ( ;bitcount >= 32; bitcount -= 32)
4745 start[0] = ~start[0];
4746 start[1] = ~start[1];
4747 start[2] = ~start[2];
4748 start[3] = ~start[3];
4750 }
while (start < end);
4762 #if defined(BMSSE42OPT) || defined(BMAVX2OPT) 4770 start[0] & start[1] & start[2] & start[3];
4774 }
while (start < end);
5087 #ifdef VECT_STREAM_BLOCK 5122 for (
unsigned i = 0; i < arr_sz; i+=4)
5124 acc |= dst_u->w64[i] &= src_u->w64[i];
5125 acc |= dst_u->w64[i+1] &= src_u->w64[i+1];
5126 acc |= dst_u->w64[i+2] &= src_u->w64[i+2];
5127 acc |= dst_u->w64[i+3] &= src_u->w64[i+3];
5162 #if defined(VECT_AND_DIGEST) 5165 digest &= ~(mask << wave);
5174 acc |= dst_u->w64[j+0] &= src_u->w64[j+0];
5175 acc |= dst_u->w64[j+1] &= src_u->w64[j+1];
5176 acc |= dst_u->w64[j+2] &= src_u->w64[j+2];
5177 acc |= dst_u->w64[j+3] &= src_u->w64[j+3];
5179 }
while (j < bm::set_block_digest_wave_size/2);
5182 digest &= ~(mask << wave);
5208 BM_ASSERT(src0 && src1 && src2 && src3);
5219 #if defined(VECT_AND_DIGEST_5WAY) 5220 bool all_zero =
VECT_AND_DIGEST_5WAY(&dst[off], &src0[off], &src1[off], &src2[off], &src3[off]);
5222 digest &= ~(mask << wave);
5234 acc |= dst_u->w64[j + 0] &= src_u0->w64[j + 0] & src_u1->w64[j + 0] & src_u2->w64[j + 0] & src_u3->w64[j + 0];
5235 acc |= dst_u->w64[j + 1] &= src_u0->w64[j + 1] & src_u1->w64[j + 1] & src_u2->w64[j + 1] & src_u3->w64[j + 1];
5236 acc |= dst_u->w64[j + 2] &= src_u0->w64[j + 2] & src_u1->w64[j + 2] & src_u2->w64[j + 2] & src_u3->w64[j + 2];
5237 acc |= dst_u->w64[j + 3] &= src_u0->w64[j + 3] & src_u1->w64[j + 3] & src_u2->w64[j + 3] & src_u3->w64[j + 3];
5239 }
while (j < bm::set_block_digest_wave_size / 2);
5242 digest &= ~(mask << wave);
5285 #if defined(VECT_AND_DIGEST_2WAY) 5288 digest &= ~(mask << wave);
5301 acc |= dst_u->w64[j+0] = src_u1->w64[j+0] & src_u2->w64[j+0];
5302 acc |= dst_u->w64[j+1] = src_u1->w64[j+1] & src_u2->w64[j+1];
5303 acc |= dst_u->w64[j+2] = src_u1->w64[j+2] & src_u2->w64[j+2];
5304 acc |= dst_u->w64[j+3] = src_u1->w64[j+3] & src_u2->w64[j+3];
5306 }
while (j < bm::set_block_digest_wave_size/2);
5309 digest &= ~(mask << wave);
5351 }
while (b1 < b1_end);
5362 }
while (src1 < src1_end);
5386 count = (src1[0] & src2[0]) |
5387 (src1[1] & src2[1]) |
5388 (src1[2] & src2[2]) |
5389 (src1[3] & src2[3]);
5392 }
while ((src1 < src1_end) && !count);
5430 }
while (b1 < b1_end);
5441 }
while (src1 < src1_end);
5465 count = (src1[0] ^ src2[0]) |
5466 (src1[1] ^ src2[1]) |
5467 (src1[2] ^ src2[2]) |
5468 (src1[3] ^ src2[3]);
5471 }
while (!count && (src1 < src1_end));
5506 }
while (b1 < b1_end);
5517 }
while (src1 < src1_end);
5541 count = (src1[0] & ~src2[0]) |
5542 (src1[1] & ~src2[1]) |
5543 (src1[2] & ~src2[2]) |
5544 (src1[3] & ~src2[3]);
5547 }
while ((src1 < src1_end) && (count == 0));
5583 }
while (b1 < b1_end);
5594 }
while (src1 < src1_end);
5617 count = (src1[0] | src2[0]) |
5618 (src1[1] | src2[1]) |
5619 (src1[2] | src2[2]) |
5620 (src1[3] | src2[3]);
5623 }
while (!count && (src1 < src1_end));
5930 acc &= (dst_ptr[0] |= wrd_ptr[0]);
5931 acc &= (dst_ptr[1] |= wrd_ptr[1]);
5932 acc &= (dst_ptr[2] |= wrd_ptr[2]);
5933 acc &= (dst_ptr[3] |= wrd_ptr[3]);
5935 dst_ptr+=4;wrd_ptr+=4;
5937 }
while (wrd_ptr < wrd_end);
5938 return acc == not_acc;
5968 acc &= (dst_ptr[0] = wrd_ptr1[0] | wrd_ptr2[0]);
5969 acc &= (dst_ptr[1] = wrd_ptr1[1] | wrd_ptr2[1]);
5970 acc &= (dst_ptr[2] = wrd_ptr1[2] | wrd_ptr2[2]);
5971 acc &= (dst_ptr[3] = wrd_ptr1[3] | wrd_ptr2[3]);
5973 dst_ptr+=4; wrd_ptr1+=4; wrd_ptr2+=4;
5975 }
while (wrd_ptr1 < wrd_end1);
5976 return acc == not_acc;
6006 acc |= (dst_ptr[0] = wrd_ptr1[0] ^ wrd_ptr2[0]);
6007 acc |= (dst_ptr[1] = wrd_ptr1[1] ^ wrd_ptr2[1]);
6008 acc |= (dst_ptr[2] = wrd_ptr1[2] ^ wrd_ptr2[2]);
6009 acc |= (dst_ptr[3] = wrd_ptr1[3] ^ wrd_ptr2[3]);
6011 dst_ptr+=4; wrd_ptr1+=4; wrd_ptr2+=4;
6013 }
while (wrd_ptr1 < wrd_end1);
6048 acc &= (dst_ptr[0] |= wrd_ptr1[0] | wrd_ptr2[0]);
6049 acc &= (dst_ptr[1] |= wrd_ptr1[1] | wrd_ptr2[1]);
6050 acc &= (dst_ptr[2] |= wrd_ptr1[2] | wrd_ptr2[2]);
6051 acc &= (dst_ptr[3] |= wrd_ptr1[3] | wrd_ptr2[3]);
6053 dst_ptr+=4; wrd_ptr1+=4;wrd_ptr2+=4;
6055 }
while (wrd_ptr1 < wrd_end1);
6056 return acc == not_acc;
6093 acc &= (dst_ptr[0] |= wrd_ptr1[0] | wrd_ptr2[0] | wrd_ptr3[0] | wrd_ptr4[0]);
6094 acc &= (dst_ptr[1] |= wrd_ptr1[1] | wrd_ptr2[1] | wrd_ptr3[1] | wrd_ptr4[1]);
6095 acc &= (dst_ptr[2] |= wrd_ptr1[2] | wrd_ptr2[2] | wrd_ptr3[2] | wrd_ptr4[2]);
6096 acc &= (dst_ptr[3] |= wrd_ptr1[3] | wrd_ptr2[3] | wrd_ptr3[3] | wrd_ptr4[3]);
6099 wrd_ptr1+=4;wrd_ptr2+=4;wrd_ptr3+=4;wrd_ptr4+=4;
6101 }
while (wrd_ptr1 < wrd_end1);
6102 return acc == not_acc;
6195 for (
unsigned i = 0; i < arr_sz; i+=4)
6197 acc |= dst_u->w64[i] &= ~src_u->w64[i];
6198 acc |= dst_u->w64[i+1] &= ~src_u->w64[i+1];
6199 acc |= dst_u->w64[i+2] &= ~src_u->w64[i+2];
6200 acc |= dst_u->w64[i+3] &= ~src_u->w64[i+3];
6236 #if defined(VECT_SUB_DIGEST) 6239 digest &= ~(mask << wave);
6248 acc |= dst_u->w64[j+0] &= ~src_u->w64[j+0];
6249 acc |= dst_u->w64[j+1] &= ~src_u->w64[j+1];
6250 acc |= dst_u->w64[j+2] &= ~src_u->w64[j+2];
6251 acc |= dst_u->w64[j+3] &= ~src_u->w64[j+3];
6253 }
while (j < bm::set_block_digest_wave_size/2);
6256 digest &= ~(mask << wave);
6297 #if defined(VECT_SUB_DIGEST_2WAY) 6300 digest &= ~(mask << wave);
6313 acc |= dst_u->w64[j+0] = src_u1->w64[j+0] & ~src_u2->w64[j+0];
6314 acc |= dst_u->w64[j+1] = src_u1->w64[j+1] & ~src_u2->w64[j+1];
6315 acc |= dst_u->w64[j+2] = src_u1->w64[j+2] & ~src_u2->w64[j+2];
6316 acc |= dst_u->w64[j+3] = src_u1->w64[j+3] & ~src_u2->w64[j+3];
6318 }
while (j < bm::set_block_digest_wave_size/2);
6321 digest &= ~(mask << wave);
6419 for (
unsigned i = 0; i < arr_sz; i+=4)
6421 acc |= dst_u->w64[i] ^= src_u->w64[i];
6422 acc |= dst_u->w64[i+1] ^= src_u->w64[i+1];
6423 acc |= dst_u->w64[i+2] ^= src_u->w64[i+2];
6424 acc |= dst_u->w64[i+3] ^= src_u->w64[i+3];
6456 dst_ptr+=4; wrd_ptr+=4;
6457 }
while (wrd_ptr < wrd_end);
6478 if (src == dst)
return 0;
6484 if (!src)
return dst;
6490 if (!src)
return dst;
6575 const T* blk_end = blk + data_size - 2;
6581 const T* blk_j = blk + 1;
6582 for (; blk_j < blk_end; ++blk_j)
6593 const T* blk_j = blk + 1;
6594 for ( ; blk_j < blk_end; ++blk_j)
6599 if (blk_j[1] | blk_j[2])
6610 count += unsigned(blk_j - blk) * unsigned(
sizeof(T));
6615 while(blk < blk_end);
6616 return count + unsigned(2 *
sizeof(T));
6640 w &= (1u << bit_pos);
6646 w = block[nword] >> bit_pos;
6651 *pos = unsigned(bit_pos + (nword * 8u *
sizeof(
bm::word_t)));
6661 *pos = unsigned(bit_pos + (i * 8u *
sizeof(
bm::word_t)));
6692 *last = unsigned(idx + (i * 8u *
sizeof(
bm::word_t)));
6724 *first = unsigned(idx + (i * 8u *
sizeof(
bm::word_t)));
6761 *first = unsigned(idx + (i * 8u *
sizeof(
bm::word_t)));
6808 *first = unsigned(idx + (i * 8u *
sizeof(
bm::word_t)));
6837 template<
typename SIZE_TYPE>
6849 unsigned pos = nbit_from;
6859 rank -= bc; pos += unsigned(32u - nbit);
6865 nbit_pos = pos + idx;
6879 nbit_pos = pos + idx;
6893 rank -= bc; pos += 32u;
6897 nbit_pos = pos + idx;
6916 template<
typename SIZE_TYPE>
6942 unsigned total_possible_bitcount,
6944 unsigned block_size)
6950 if ((gap_size < block_size) && (gap_size < arr_size) && (gap_size < inv_arr_size))
6955 if (arr_size < inv_arr_size)
6957 if ((arr_size < block_size) && (arr_size < gap_size))
6964 if ((inv_arr_size < block_size) && (inv_arr_size < gap_size))
6977 template<
typename T>
6985 for (
unsigned bit_idx=0; bit_idx < bits; ++src,bit_idx += unsigned(
sizeof(*src) * 8))
6987 unsigned val = *src ^ mask;
6990 if (pcurr +
sizeof(val)*8 >= dest + dest_len)
7000 return (T)(pcurr - dest);
7015 if (!blk)
return true;
7039 if (blk == 0)
return false;
7059 template<
typename T>
7061 const T* length_end,
7062 const T* glevel_len)
7064 BM_ASSERT(length && length_end && glevel_len);
7066 unsigned overhead = 0;
7067 for (;length < length_end; ++length)
7069 unsigned len = *length;
7072 unsigned capacity = glevel_len[level];
7074 overhead += capacity - len;
7086 template<
typename T>
7088 const T* length_end,
7091 BM_ASSERT(length && length_end && glevel_len);
7093 size_t lsize = size_t(length_end - length);
7099 for (i = 0; i < lsize; ++i)
7101 if (length[i] > max_len)
7102 max_len = length[i];
7106 glevel_len[0] = T(max_len + 4);
7116 unsigned min_overhead =
gap_overhead(length, length_end, glevel_len);
7117 bool is_improved =
false;
7123 unsigned opt_len = 0;
7125 bool imp_flag =
false;
7127 for (j = 0; j < lsize; ++j)
7129 glevel_len[i] = T(length[j] + 4);
7130 unsigned ov =
gap_overhead(length, length_end, glevel_len);
7131 if (ov <= min_overhead)
7134 opt_len = length[j]+4;
7140 glevel_len[i] = (T)opt_len;
7145 glevel_len[i] = gap_saved_value;
7153 T val = *glevel_len;
7155 T* res = glevel_len;
7167 while (++res < (glevel_len + bm::gap_levels))
7242 if (cnt_ < from_ || cnt_ > to_)
7247 return decoder_.get_32();
7262 template<
class It1,
class It2,
class BinaryOp,
class Encoder>
7268 for (
unsigned i = 0; i < block_size; ++i)
7424 return gap2bit_table_[i];
7430 return gapop_table_[i];
7436 return bit_op_count_table_[i];
7443 &gap_and_to_bitset<bm::gap_word_t>,
7444 &gap_add_to_bitset<bm::gap_word_t>,
7445 &gap_sub_to_bitset<bm::gap_word_t>,
7446 &gap_xor_to_bitset<bm::gap_word_t>,
7494 unsigned short cnt0;
7499 #if defined(BMAVX512OPT) || defined(BMAVX2OPT) || defined(BMSSE42OPT) 7504 unsigned short cnt1;
7508 cnt0 = (
unsigned short)(cnt0 + cnt1);
7513 #if defined (BM64_SSE4) || defined(BM64_AVX2) || defined(BM64_AVX512) 7521 const unsigned* idx,
unsigned size,
unsigned start,
7524 typedef unsigned TRGW;
7525 typedef unsigned IDX;
7526 #if defined(BM64_SSE4) 7533 #elif defined(BM64_AVX2) || defined(BM64_AVX512) 7536 avx2_bit_block_gather_scatter(arr, blk, idx, size, start, bit_idx);
7550 template<
typename TRGW,
typename IDX,
typename SZ>
7552 const IDX* idx, SZ size, SZ start,
unsigned bit_idx)
7557 const SZ len = (size - start);
7558 const SZ len_unr = len - (len % 2);
7560 for (; k < len_unr; k+=2)
7562 const SZ base = start + k;
7565 const unsigned nbitB = unsigned(idx[base + 1] & bm::set_block_mask);
7566 arr[base+1] |= (TRGW(
bool(blk[nbitB >>
bm::set_word_shift] & (mask1 << (nbitB & bm::set_word_mask)))) << bit_idx);
7569 for (; k < len; ++k)
7595 for (;(start < size) &&
7619 #if defined(VECT_ARR_BLOCK_LOOKUP) 7622 for (;(start < size) &&
7658 block[nword] |= mask;
7678 const unsigned* BMRESTRICT idx,
7679 unsigned start,
unsigned stop )
7681 #if defined(VECT_SET_BLOCK_BITS) 7684 for (
unsigned i = start; i < stop; ++i)
7686 unsigned n = idx[i];
7691 block[nword] |= mask;
7718 if (i == bm::set_sub_array_size)
7723 for (j = bm::set_sub_array_size-1; j != i; --j)
7740 unsigned from,
unsigned to)
7745 #if defined(VECT_LOWER_BOUND_SCAN_U32) 7748 for (; from <= to; ++from)
7750 if (arr[from] >= target)
7763 unsigned from,
unsigned to)
7769 for (; from <= to; ++from)
7771 if (arr[from] >= target)
7787 unsigned from,
unsigned to)
7791 const unsigned linear_cutoff = 32;
7793 unsigned l = from;
unsigned r = to;
7794 unsigned dist = r - l;
7795 if (dist < linear_cutoff)
7802 unsigned mid = (r-l)/2+l;
7803 if (arr[mid] < target)
7808 if (dist < linear_cutoff)
7822 unsigned from,
unsigned to)
7826 const unsigned linear_cutoff = 32;
7828 unsigned l = from;
unsigned r = to;
7829 unsigned dist = r - l;
7830 if (dist < linear_cutoff)
7837 unsigned mid = (r - l) / 2 + l;
7838 if (arr[mid] < target)
7843 if (dist < linear_cutoff)
7863 return block_idx + base_idx;
7871 return block_idx + base_idx;
7887 unsigned short i16[4];
7899 return (ptr.
i16[1] == 0);
unsigned gap_capacity(const T *buf, const T *glevel_len)
Returs GAP block capacity.
bm::id_t bit_block_count(const bm::word_t *block)
Bitcount for bit block.
void gap_init_range_block(T *buf, T from, T to, T value)
Init gap block so it has block in it (can be whole block)
BMFORCEINLINE void push_back(bm::word_t w)
void gap_buff_op(T *BMRESTRICT dest, const T *BMRESTRICT vect1, unsigned vect1_mask, const T *BMRESTRICT vect2, unsigned vect2_mask, F &f, unsigned &dlen)
Abstract operation for GAP buffers. Receives functor F as a template argument.
void bit_block_copy(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src)
Bitblock copy operation.
unsigned bit_block_xor_any(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2)
Function XORs two bitblocks and and tests for any bit. Function does not analyse availability of sour...
BMFORCEINLINE unsigned or_op(unsigned v1, unsigned v2)
GAP or functor.
void add_bit_block()
cound bit block
void gap_add_to_bitset(unsigned *dest, const T *pcurr, unsigned len)
Adds(OR) GAP block to bitblock.
Structure keeps all-left/right ON bits masks.
unsigned gap_find_last(const T *buf, unsigned *last)
GAP block find the last set bit.
SIZE_TYPE bit_find_rank(const bm::word_t *const block, SIZE_TYPE rank, unsigned nbit_from, unsigned &nbit_pos)
BIT block find position for the rank.
void bit_invert(T *start)
bm::word_t * bit_operation_or(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src)
Block OR operation. Makes analysis if block is 0 or FULL.
helper union to interpret pointer as integers
BMFORCEINLINE unsigned gap_count_and(const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2)
GAP bitcount AND operation test.
BMFORCEINLINE gap_word_t * gap_operation_xor(const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2, gap_word_t *BMRESTRICT tmp_buf, unsigned &dsize)
GAP XOR operation.
#define VECT_COPY_BLOCK(dst, src)
static BMFORCEINLINE bool is_full_block(const bm::word_t *bp)
void bit_andnot_arr_ffmask(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src)
bitblock AND NOT with constant ~0 mask operation.
#define VECT_BITCOUNT_OR(first, last, mask)
BMFORCEINLINE bm::id_t word_bitcount(bm::id_t w)
Bit COUNT SUB AB functor.
unsigned gap_control_sum(const T *buf)
Calculates sum of all words in GAP block. (For debugging purposes)
size_t gap_blocks
Number of GAP blocks.
bm::id64_t bit_block_xor(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src)
Plain bitblock XOR operation. Function does not analyse availability of source and destination blocks...
ad-hoc conditional expressions
unsigned word_select64(bm::id64_t w, unsigned rank)
word find index of the rank-th bit set by bit-testing
const unsigned set_block_size
bool bit_block_or_5way(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2, const bm::word_t *BMRESTRICT src3, const bm::word_t *BMRESTRICT src4)
5 way (target, source1, source2) bitblock OR operation. Function does not analyse availability of sou...
const unsigned set_word_shift
#define VECT_OR_BLOCK_2WAY(dst, src1, src2)
bm::id_t bit_count_change(bm::word_t w)
void for_each_block(T ***root, unsigned size1, F &f, BLOCK_IDX start)
unsigned bit_block_to_gap(gap_word_t *BMRESTRICT dest, const unsigned *BMRESTRICT block, unsigned dest_len)
Converts bit block to GAP.
unsigned short bitscan_popcnt64(bm::id64_t w, B *bits)
Unpacks 64-bit word into list of ON bit indexes using popcnt method.
const unsigned set_block_digest_pos_shift
unsigned lower_bound_linear_u32(const unsigned *arr, unsigned target, unsigned from, unsigned to)
Linear lower bound search in unsigned array.
const unsigned set_sub_array_size
void gap_xor_to_bitset(unsigned *dest, const T *pcurr)
XOR GAP block to bitblock.
BMFORCEINLINE unsigned sub_op(unsigned v1, unsigned v2)
GAP or functor.
void operator()(unsigned bit_idx0, unsigned bit_idx1)
void for_each_gap_dbit(const T *buf, F &func)
Iterate gap block as delta-bits with a functor.
unsigned bit_block_and_any(const bm::word_t *src1, const bm::word_t *src2)
Function ANDs two bitblocks and tests for any bit. Function does not analyse availability of source b...
bm::id_t gap_bitset_and_any(const unsigned *block, const T *pcurr)
Bitcount test of bit block AND masked by GAP block.
void bit_block_rotate_left_1_unr(bm::word_t *block)
Unrolled cyclic rotation of bit-block left by 1 bit.
unsigned gap_add_value(T *buf, unsigned pos)
Add new value to the end of GAP buffer.
bool for_each_nzblock_if(T ***root, BI size1, F &f)
void operator()(unsigned bit_idx0, unsigned bit_idx1, unsigned bit_idx2, unsigned bit_idx3)
Bit-block store adapter, takes bitblock and saves results into it.
bool find_not_null_ptr(bm::word_t ***arr, N start, N size, N *pos)
#define VECT_AND_DIGEST_5WAY(dst, src1, src2, src3, src4)
unsigned gap_bit_count(const T *buf, unsigned dsize=0)
Calculates number of bits ON in GAP buffer.
const unsigned set_array_shift
unsigned bit_block_and_count(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2)
Function ANDs two bitblocks and computes the bitcount. Function does not analyse availability of sour...
unsigned word_select64_bitscan(bm::id64_t w, unsigned rank)
word find index of the rank-th bit set by bit-testing
#define IS_VALID_ADDR(addr)
BMFORCEINLINE unsigned gap_operation_any_and(const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2)
GAP AND operation test.
D gap_convert_to_arr(D *BMRESTRICT dest, const T *BMRESTRICT buf, unsigned dest_len, bool invert=false)
Convert gap block into array of ints corresponding to 1 bits.
unsigned gap_set_value(unsigned val, T *BMRESTRICT buf, unsigned pos, unsigned *BMRESTRICT is_set)
Sets or clears bit in the GAP buffer.
void gap_set_all(T *buf, unsigned set_max, unsigned value)
Sets all bits to 0 or 1 (GAP)
SIZE_TYPE gap_find_rank(const T *const block, SIZE_TYPE rank, unsigned nbit_from, unsigned &nbit_pos)
GAP block find position for the rank.
Structure carries pointer on bit block with all bits 1.
unsigned long long int id64_t
unsigned gap_overhead(const T *length, const T *length_end, const T *glevel_len)
Calculates memory overhead for number of gap blocks sharing the same memory allocation table (level l...
gap_word_t * gap_operation_sub(const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2, gap_word_t *BMRESTRICT tmp_buf, unsigned &dsize)
GAP SUB (AND NOT) operation.
#define VECT_SHIFT_R1(b, acc, co)
unsigned bitcount64_4way(bm::id64_t x, bm::id64_t y, bm::id64_t u, bm::id64_t v)
Adaptor to copy 1 bits to array.
unsigned lower_bound_u64(const unsigned long long *arr, unsigned long long target, unsigned from, unsigned to)
Hybrid, binary-linear lower bound search in unsigned LONG array.
unsigned short bitscan_popcnt(bm::id_t w, B *bits, unsigned short offs)
Unpacks word into list of ON bit indexes using popcnt method.
const unsigned short set_bitscan_wave_size
void set_block_bits_u64(bm::word_t *BMRESTRICT block, const bm::id64_t *BMRESTRICT idx, bm::id64_t start, bm::id64_t stop)
set bits in a bit-block using global index
bm::id_t gap_bitset_sub_any(const unsigned *block, const T *buf)
Compute bitcount test of bit block SUB masked by GAP block.
unsigned bit_block_calc_change(const bm::word_t *block)
bool bit_block_or_3way(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2)
3 way (target | source1 | source2) bitblock OR operation. Function does not analyse availability of s...
void for_each_nzblock_range(T ***root, N top_size, N nb_from, N nb_to, F &f)
bool block_ptr_array_range(bm::word_t **arr, unsigned &left, unsigned &right)
array range detector
BMFORCEINLINE bm::word_t get_32()
unsigned word_select64_linear(bm::id64_t w, unsigned rank)
word find index of the rank-th bit set by bit-testing
Adapter to get words from a range stream (see range serialized bit-block)
BMFORCEINLINE unsigned gap_count_or(const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2)
GAP bitcount OR operation test.
unsigned bit_block_find(const bm::word_t *block, unsigned nbit, unsigned *pos)
Searches for the next 1 bit in the BIT block.
size_t max_serialize_mem
estimated maximum memory for serialization
void gap_convert_to_bitset(unsigned *dest, const T *buf)
GAP block to bitblock conversion.
bm::id_t bit_operation_xor_any(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2)
Performs bitblock XOR operation test.
bm::word_t BM_VECT_ALIGN _p [bm::set_block_size] BM_VECT_ALIGN_ATTR
void for_each_dgap(const T *gap_buf, Func &func)
Bit-block get adapter, takes bitblock and represents it as a get_32() accessor function.
bm::id_t bit_operation_sub_any(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2)
Performs bitblock test of SUB operation.
#define VECT_BITCOUNT(first, last)
unsigned long long gaps_by_level[bm::gap_levels]
number of GAP blocks at each level
bm::word_t * bit_operation_sub(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src)
bitblock SUB operation.
BMFORCEINLINE bm::gap_word_t gap_length(const bm::gap_word_t *buf)
Returs GAP block length.
void xor_bit_block(unsigned *dest, unsigned bitpos, unsigned bitcount)
XOR bit interval to 1 in the bitblock.
BMFORCEINLINE bm::id64_t widx_to_digest_mask(unsigned w_idx)
Compute digest mask for word address in block.
unsigned bit_list(T w, B *bits)
Unpacks word into list of ON bit indexes.
bool bit_block_or(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src)
Plain bitblock OR operation. Function does not analyse availability of source and destination blocks...
bm::id_t bit_operation_or_count(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2)
Performs bitblock OR operation and calculates bitcount of the result.
void xor_swap(W &x, W &y)
XOR swap two scalar variables.
bool bit_block_shift_l1_unr(bm::word_t *block, bm::word_t *empty_acc, bm::word_t co_flag)
Left bit-shift of bit-block by 1 bit (loop unrolled)
bool check_block_one(const bm::word_t *blk, bool deep_scan)
Checks if block has only 1 bits.
unsigned gap_buff_count_op(const T *vect1, const T *vect2, F f)
Abstract distance(similarity) operation for GAP buffers. Receives functor F as a template argument...
bm::word_t bit_block_insert(bm::word_t *block, unsigned bitpos, bool value)
insert bit into position and shift the rest right with carryover
bm::word_t sum() const
Get accumulated sum.
BMFORCEINLINE unsigned long long bmi_blsi_u64(unsigned long long w)
BMFORCEINLINE void push_back(bm::word_t w)
void bit_block_stream(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src)
Bitblock copy/stream operation.
BMFORCEINLINE unsigned test_bit(const unsigned *block, unsigned bitpos)
Test 1 bit in a block.
#define VECT_ARR_BLOCK_LOOKUP(idx, size, nb, start)
bool gap_shift_l1(T *buf, unsigned co_flag, unsigned *new_len)
Left shift GAP block by 1 bit.
bool bit_block_shift_r1_unr(bm::word_t *block, bm::word_t *empty_acc, bm::word_t co_flag)
Right bit-shift of bit-block by 1 bit (loop unrolled)
bm::id64_t ptrp_test(ptr_payload_t ptr, bm::gap_word_t v)
void bit_for_each(T w, F &func)
Templated algorithm to unpacks word into list of ON bit indexes.
unsigned idx_arr_block_lookup_u32(const unsigned *idx, unsigned size, unsigned nb, unsigned start)
block boundaries look ahead U32
BMFORCEINLINE bool sse42_test_all_zero_wave(const void *ptr)
check if wave of pointers is all NULL
unsigned bit_count_nonzero_size(const T *blk, unsigned data_size)
Inspects block for full zero words.
void gap_and_to_bitset(unsigned *dest, const T *pcurr)
ANDs GAP block to bitblock.
unsigned gap_bit_count_to(const T *const buf, T right)
Counts 1 bits in GAP buffer in the closed [0, right] range.
#define VECT_XOR_BLOCK(dst, src)
#define VECT_BITCOUNT_XOR(first, last, mask)
void bit_for_each_4(T w, F &func)
Templated algorithm to unpacks octet based word into list of ON bit indexes.
bm::id_t gap_bitset_xor_count(const unsigned *block, const T *buf)
Compute bitcount of bit block XOR masked by GAP block.
int parallel_popcnt_32(unsigned int n)
unsigned bit_scan_reverse(T value)
BMFORCEINLINE bool gap_is_all_zero(const bm::gap_word_t *buf)
Checks if GAP block is all-zero.
size_t memory_used
memory usage for all blocks and service tables
#define VECT_SUB_DIGEST_2WAY(dst, src1, src2)
void gap_invert(T *buf)
Inverts all bits in the GAP buffer.
#define VECT_SHIFT_R1_AND(b, co, m, digest)
unsigned gap_test(const T *buf, unsigned pos)
Tests if bit = pos is true.
BMFORCEINLINE unsigned gap_count_xor(const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2)
GAP bitcount XOR operation test.
SIZE_TYPE block_find_rank(const bm::word_t *const block, SIZE_TYPE rank, unsigned nbit_from, unsigned &nbit_pos)
Find rank in block (GAP or BIT)
int bitcmp(const T *buf1, const T *buf2, unsigned len)
Lexicographical comparison of BIT buffers.
#define VECT_BLOCK_SET_DIGEST(dst, val)
unsigned bit_find_last(const bm::word_t *block, unsigned *last)
BIT block find the last set bit (backward search)
void operator()(unsigned bit_idx)
bm::id_t bit_block_any_range(const bm::word_t *block, bm::word_t left, bm::word_t right)
BMFORCEINLINE void set_bit(unsigned *dest, unsigned bitpos)
Set 1 bit in a block.
size_t ptr_sub_blocks
Number of sub-blocks.
#define IS_FULL_BLOCK(addr)
unsigned bit_block_sub_any(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2)
Function SUBs two bitblocks and and tests for any bit. Function does not analyse availability of sour...
void for_each_nzblock2(T ***root, unsigned size1, F &f)
unsigned bit_block_or_count(const bm::word_t *src1, const bm::word_t *src2)
Function ORs two bitblocks and computes the bitcount. Function does not analyse availability of sourc...
#define VECT_IS_ZERO_BLOCK(dst)
bool bit_is_all_zero(const bm::word_t *BMRESTRICT start)
Returns "true" if all bits in the block are 0.
int wordcmp(T a, T b)
Lexicographical comparison of two words as bit strings. Auxiliary implementation for testing and refe...
bm::id_t bit_operation_or_any(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2)
Performs bitblock OR operation test.
#define VECT_OR_BLOCK_5WAY(dst, src1, src2, src3, src4)
static all_set_block _block
bool check_zero_digest(bm::id64_t digest, unsigned bitpos_from, unsigned bitpos_to)
check if all digest bits for the range [from..to] are 0
BMFORCEINLINE unsigned long long bmi_bslr_u64(unsigned long long w)
const unsigned gap_max_buff_len
unsigned gap_bfind(const T *buf, unsigned pos, unsigned *is_set)
unsigned gap_find_first(const T *buf, unsigned *first)
GAP block find the first set bit.
unsigned gap_bit_count_range(const T *const buf, unsigned left, unsigned right)
Counts 1 bits in GAP buffer in the closed [left, right] range.
unsigned gap_test_unr(const T *buf, const unsigned pos)
Tests if bit = pos is true. Analog of bm::gap_test with SIMD unrolling.
Bit-block sum adapter, takes values and sums it /internal.
unsigned sse4_gap_find(const bm::gap_word_t *BMRESTRICT pbuf, const bm::gap_word_t pos, const unsigned size)
F bmfor_each(T first, T last, F f)
#define VECT_XOR_BLOCK_2WAY(dst, src1, src2)
void bit_block_set(bm::word_t *BMRESTRICT dst, bm::word_t value)
Bitblock memset operation.
bm::id64_t update_block_digest0(const bm::word_t *const block, bm::id64_t digest)
Compute digest for 64 non-zero areas based on existing digest (function revalidates zero areas) ...
bm::id_t bit_block_calc_count_range(const bm::word_t *block, bm::word_t left, bm::word_t right)
const unsigned gap_levels
BMFORCEINLINE unsigned word_bitcount64(bm::id64_t x)
const unsigned set_array_mask
BMFORCEINLINE bool gap_is_all_one(const bm::gap_word_t *buf)
Checks if GAP block is all-one.
unsigned short gap_word_t
unsigned gap_set_array(T *buf, const T *arr, unsigned len)
Convert array to GAP buffer.
BMFORCEINLINE unsigned gap_operation_any_sub(const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2)
GAP SUB operation test.
void gap_sub_to_bitset(unsigned *dest, const T *pcurr)
SUB (AND NOT) GAP block to bitblock.
int gap_calc_level(unsigned len, const T *glevel_len)
Calculates GAP block capacity level.
#define VECT_AND_BLOCK(dst, src)
gap_word_t * gap_operation_or(const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2, gap_word_t *BMRESTRICT tmp_buf, unsigned &dsize)
GAP OR operation.
#define VECT_BLOCK_CHANGE(block)
unsigned count_trailing_zeros_u64(bm::id64_t w)
#define VECT_ANDNOT_ARR_2_MASK(dst, src, src_end, mask)
#define VECT_OR_BLOCK_3WAY(dst, src1, src2)
#define VECT_IS_ONE_BLOCK(dst)
BMFORCEINLINE unsigned gap_count_sub(const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2)
GAP bitcount SUB (AND NOT) operation test.
void(* gap_operation_to_bitset_func_type)(unsigned *, const gap_word_t *)
bm::set_representation best_representation(unsigned bit_count, unsigned total_possible_bitcount, unsigned gap_count, unsigned block_size)
Choose best representation for a bit-block.
void block_init_digest0(bm::word_t *const block, bm::id64_t digest)
Init block with 000111000 pattren based on digest.
unsigned bit_block_sub_count(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2)
Function SUBs two bitblocks and computes the bitcount. Function does not analyse availability of sour...
bool bit_block_or_2way(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2)
2 way (target := source1 | source2) bitblock OR operation.
#define BM_INCWORD_BITCOUNT(cnt, w)
bitblock_get_adapter(const bm::word_t *bit_block)
#define VECT_BITCOUNT_AND(first, last, mask)
void set_gap_level(T *buf, int level)
Sets GAP block capacity level.
static gap_operation_to_bitset_func_type gap_op_to_bit(unsigned i)
bm::id64_t idx_arr_block_lookup_u64(const bm::id64_t *idx, bm::id64_t size, bm::id64_t nb, bm::id64_t start)
block boundaries look ahead U32
void reset()
Reset statisctics.
BMFORCEINLINE unsigned xor_op(unsigned v1, unsigned v2)
GAP xor functor.
void bit_block_rotate_left_1(bm::word_t *block)
BMFORCEINLINE gap_word_t * gap_operation_and(const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2, gap_word_t *BMRESTRICT tmp_buf, unsigned &dsize)
GAP AND operation.
bool bit_block_shift_l1(bm::word_t *block, bm::word_t *empty_acc, bm::word_t co_flag)
Left bit-shift bitblock by 1 bit (reference)
void sse4_bit_block_gather_scatter(unsigned *BMRESTRICT arr, const unsigned *BMRESTRICT blk, const unsigned *BMRESTRICT idx, unsigned size, unsigned start, unsigned bit_idx)
bm::id_t gap_bitset_and_count(const unsigned *block, const T *pcurr)
Compute bitcount of bit block AND masked by GAP block.
#define FULL_BLOCK_REAL_ADDR
unsigned bit_to_gap(gap_word_t *BMRESTRICT dest, const unsigned *BMRESTRICT block, unsigned dest_len)
static bit_operation_count_func_type bit_operation_count(unsigned i)
unsigned sse2_gap_find(const bm::gap_word_t *BMRESTRICT pbuf, const bm::gap_word_t pos, const unsigned size)
set_operation
Codes of set operations.
bool bit_block_shift_r1(bm::word_t *block, bm::word_t *empty_acc, bm::word_t co_flag)
Right bit-shift bitblock by 1 bit (reference)
bm::id_t gap_bitset_xor_any(const unsigned *block, const T *buf)
Compute bitcount test of bit block XOR masked by GAP block.
void set_block_bits_u32(bm::word_t *BMRESTRICT block, const unsigned *BMRESTRICT idx, unsigned start, unsigned stop)
set bits in a bit-block using global index
bm::id_t bit_operation_xor_count(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2)
Performs bitblock XOR operation and calculates bitcount of the result.
bm::word_t * bit_operation_xor(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src)
bitblock XOR operation.
bm::id64_t bit_block_sub(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src)
Plain bitblock SUB (AND NOT) operation. Function does not analyse availability of source and destinat...
unsigned lower_bound_linear_u64(const unsigned long long *arr, unsigned long long target, unsigned from, unsigned to)
Linear lower bound search in unsigned LONG array.
bm::id_t bit_operation_and_count(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2)
Performs bitblock AND operation and calculates bitcount of the result.
const unsigned bits_in_block
#define VECT_INVERT_BLOCK(first)
#define VECT_SET_BLOCK(dst, value)
bm::id_t gap_bitset_or_any(const unsigned *block, const T *buf)
Compute bitcount test of bit block OR masked by GAP block.
bool bit_block_shift_r1_and(bm::word_t *BMRESTRICT block, bm::word_t co_flag, const bm::word_t *BMRESTRICT mask_block, bm::id64_t *BMRESTRICT digest)
Right bit-shift of bit-block by 1 bit (reference) + AND.
const unsigned gap_max_bits
T * gap_2_dgap(const T *gap_buf, T *dgap_buf, bool copy_head=true)
Convert GAP buffer into D-GAP buffer.
BMFORCEINLINE unsigned gap_operation_any_xor(const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2)
GAP XOR operation test.
size_t bit_blocks
Number of bit blocks.
Structure to aid in counting bits table contains count of bits in 0-255 diapason of numbers...
bitblock_store_adapter(bm::word_t *bit_block)
bm::operation setop2op(bm::set_operation op)
Convert set operation to operation.
int wordcmp0(T w1, T w2)
Lexicographical comparison of two words as bit strings (reference) Auxiliary implementation for testi...
BMFORCEINLINE bm::id64_t digest_mask(unsigned from, unsigned to)
Compute digest mask for [from..to] positions.
void bit_recomb(It1 &it1, It2 &it2, BinaryOp &op, Encoder &enc, unsigned block_size=bm::set_block_size)
void or_bit_block(unsigned *dest, unsigned bitpos, unsigned bitcount)
Sets bits to 1 in the bitblock.
bool improve_gap_levels(const T *length, const T *length_end, T *glevel_len)
Finds optimal gap blocks lengths.
const unsigned set_block_mask
#define VECT_STREAM_BLOCK(dst, src)
unsigned bit_find_first(const bm::word_t *block, unsigned *first)
BIT block find the first set bit.
unsigned bit_block_xor_count(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2)
Function XORs two bitblocks and computes the bitcount. Function does not analyse availability of sour...
bm::id_t bit_operation_sub_count_inv(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2)
Performs inverted bitblock SUB operation and calculates bitcount of the result.
bool bit_block_shift_r1_and_unr(bm::word_t *BMRESTRICT block, bm::word_t co_flag, const bm::word_t *BMRESTRICT mask_block, bm::id64_t *BMRESTRICT digest)
Right bit-shift bitblock by 1 bit (reference) + AND.
bm::id_t bit_block_calc_count(const bm::word_t *block, const bm::word_t *block_end)
Bitcount for bit string.
#define VECT_IS_DIGEST_ZERO(start)
unsigned gap_buff_any_op(const T *BMRESTRICT vect1, unsigned vect1_mask, const T *BMRESTRICT vect2, unsigned vect2_mask, F f)
Abstract distance test operation for GAP buffers. Receives functor F as a template argument...
static BMFORCEINLINE bool is_valid_block_addr(const bm::word_t *bp)
unsigned gap_bit_count_unr(const T *buf)
Calculates number of bits ON in GAP buffer. Loop unrolled version.
bm::id64_t bit_block_sub_2way(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2, bm::id64_t digest)
digest based bitblock SUB (AND NOT) operation (3 operand)
unsigned bit_block_change32(const bm::word_t *block)
bm::id_t(* bit_operation_count_func_type)(const bm::word_t *BMRESTRICT, const bm::word_t *BMRESTRICT)
const unsigned set_word_mask
bm::word_t * bit_operation_and(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src)
bitblock AND operation.
#define VECT_SUB_DIGEST(dst, src)
void dgap_2_gap(const T *dgap_buf, T *gap_buf, T gap_header=0)
Convert D-GAP buffer into GAP buffer.
bm::id_t gap_bitset_or_count(const unsigned *block, const T *buf)
Compute bitcount of bit block OR masked by GAP block.
unsigned bit_scan_forward32(unsigned value)
bm::id_t bit_operation_and_any(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2)
Performs bitblock AND operation test.
bm::id64_t sum_arr(T *first, T *last)
unsigned bit_block_or_any(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2)
Function ORs two bitblocks and and tests for any bit. Function does not analyse availability of sourc...
unsigned short bitscan_wave(const bm::word_t *w_ptr, unsigned char *bits)
Unpacks word wave (Nx 32-bit words)
bit-decode cache structure
bool gap_shift_r1(T *buf, unsigned co_flag, unsigned *new_len)
Right shift GAP block by 1 bit.
unsigned lower_bound_u32(const unsigned *arr, unsigned target, unsigned from, unsigned to)
Hybrid, binary-linear lower bound search in unsigned array.
gap_word_t gap_levels[bm::gap_levels]
GAP block lengths in the bvect.
bm::id64_t bit_block_and(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src)
Plain bitblock AND operation. Function does not analyse availability of source and destination blocks...
#define VECT_SET_BLOCK_BITS(block, idx, start, stop)
unsigned gap_limit(const T *buf, const T *glevel_len)
Returs GAP block capacity limit.
unsigned short bitscan(V w, B *bits)
unsigned * gap_convert_to_bitset_smart(unsigned *dest, const T *buf, id_t set_max)
Smart GAP block to bitblock conversion.
unsigned gap_free_elements(const T *buf, const T *glevel_len)
Returns number of free elements in GAP block array. Difference between GAP block capacity on this lev...
T bit_convert_to_arr(T *BMRESTRICT dest, const unsigned *BMRESTRICT src, bm::id_t bits, unsigned dest_len, unsigned mask=0)
Convert bit block into an array of ints corresponding to 1 bits.
void operator()(unsigned bit_idx0, unsigned bit_idx1, unsigned bit_idx2)
bm::id64_t bit_block_xor_2way(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2)
2 way (target := source1 ^ source2) bitblock XOR operation.
static gap_operation_func_type gap_operation(unsigned i)
size_t bv_count
Number of bit-vectors.
BMFORCEINLINE unsigned and_op(unsigned v1, unsigned v2)
GAP and functor.
const bm::gap_word_t * sse2_gap_sum_arr(const bm::gap_word_t *BMRESTRICT pbuf, unsigned sse_vect_waves, unsigned *sum)
Gap block population count (array sum) utility.
#define VECT_BITCOUNT_SUB(first, last, mask)
unsigned count_leading_zeros_u64(bm::id64_t w)
64-bit bit-scan reverse
bm::id64_t calc_block_digest0(const bm::word_t *const block)
Compute digest for 64 non-zero areas.
bool bit_find_first_if_1(const bm::word_t *block, unsigned *first, bm::id64_t digest)
BIT block find the first set bit if only 1 bit is set.
bm::id_t gap_bitset_sub_count(const unsigned *block, const T *buf)
Compute bitcount of bit block SUB masked by GAP block.
bool is_bits_one(const bm::wordop_t *start)
Returns "true" if all bits in the block are 1.
unsigned bit_array_compute_gaps(const T *arr, unsigned len)
Compute number of GAPs in bit-array.
Structure with statistical information about memory allocation footprint, serialization projection...
const unsigned set_block_shift
void add(const bv_statistics &st)
Sum data from another sttructure.
bm::id_t block_to_global_index(unsigned i, unsigned j, unsigned block_idx)
calculate bvector<> global bit-index from block-local coords
T gap_level(const T *buf)
Returs GAP blocks capacity level.
bm::id64_t bit_block_and_2way(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2, bm::id64_t digest)
digest based bit-block AND
#define VECT_SHIFT_L1(b, acc, co)
bm::id_t bit_block_calc_count_to(const bm::word_t *block, bm::word_t right)
bool is_const_set_operation(set_operation op)
Returns true if set operation is constant (bitcount)
copy_to_array_functor(B *bits)
bm::id_t bit_operation_sub_count(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2)
Performs bitblock SUB operation and calculates bitcount of the result.
#define IS_EMPTY_BLOCK(addr)
void for_each_nzblock(T ***root, unsigned size1, F &f)
#define VECT_AND_DIGEST_2WAY(dst, src1, src2)
set_representation
set representation variants
#define VECT_LOWER_BOUND_SCAN_U32(arr, target, from, to)
static bm::id64_t block_type(const bm::word_t *bp)
void bit_block_gather_scatter(unsigned *arr, const bm::word_t *blk, const unsigned *idx, unsigned size, unsigned start, unsigned bit_idx)
bit index to word gather-scatter algorithm (SIMD)
#define VECT_OR_BLOCK(dst, src)
bm::id64_t bit_block_and_5way(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src0, const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2, const bm::word_t *BMRESTRICT src3, bm::id64_t digest)
digest based bit-block AND 5-way
int gapcmp(const T *buf1, const T *buf2)
Lexicographical comparison of GAP buffers.
void add_gap_block(unsigned capacity, unsigned length)
count gap block
unsigned bit_scan_reverse32(unsigned value)
#define VECT_AND_DIGEST(dst, src)
Bit manipulation primitives (internal)
const id64_t all_bits_mask
#define FULL_BLOCK_FAKE_ADDR
bool check_block_zero(const bm::word_t *blk, bool deep_scan)
Checks all conditions and returns true if block consists of only 0 bits.
void sub_bit_block(unsigned *dest, unsigned bitpos, unsigned bitcount)
SUB (AND NOT) bit interval to 1 in the bitblock.
bm::word_t BM_VECT_ALIGN *_s [bm::set_sub_array_size] BM_VECT_ALIGN_ATTR
gap_word_t *(* gap_operation_func_type)(const gap_word_t *BMRESTRICT, const gap_word_t *BMRESTRICT, gap_word_t *BMRESTRICT, unsigned &)
unsigned gap_block_find(const T *buf, unsigned nbit, bm::id_t *prev)
Searches for the next 1 bit in the GAP block.
#define VECT_SUB_BLOCK(dst, src)
const unsigned set_block_digest_wave_size
decoder_range_adapter(DEC &dec, unsigned from_idx, unsigned to_idx)
BMFORCEINLINE void clear_bit(unsigned *dest, unsigned bitpos)
Set 1 bit in a block.
unsigned bit_list_4(T w, B *bits)
Unpacks word into list of ON bit indexes (quad-bit based)
void bit_block_erase(bm::word_t *block, unsigned bitpos, bool carry_over)
erase bit from position and shift the rest right with carryover