Lalit Maganti | cf39caa | 2019-08-21 14:00:08 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2019 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
Lalit Maganti | 2aa8858 | 2019-12-17 17:06:49 +0000 | [diff] [blame] | 17 | #include "src/trace_processor/containers/bit_vector.h" |
Lalit Maganti | cf39caa | 2019-08-21 14:00:08 -0700 | [diff] [blame] | 18 | |
Lalit Maganti | 0fc8543 | 2022-06-17 15:26:22 +0100 | [diff] [blame] | 19 | #include <limits> |
| 20 | |
Lalit Maganti | 2aa8858 | 2019-12-17 17:06:49 +0000 | [diff] [blame] | 21 | #include "src/trace_processor/containers/bit_vector_iterators.h" |
Lalit Maganti | 7ed308f | 2019-10-17 11:38:24 +0100 | [diff] [blame] | 22 | |
Lalit Maganti | 25a4534 | 2022-06-14 14:56:48 +0100 | [diff] [blame] | 23 | #if PERFETTO_BUILDFLAG(PERFETTO_X64_CPU_OPT) |
| 24 | #include <immintrin.h> |
| 25 | #endif |
| 26 | |
Lalit Maganti | cf39caa | 2019-08-21 14:00:08 -0700 | [diff] [blame] | 27 | namespace perfetto { |
| 28 | namespace trace_processor { |
Lalit Maganti | 5ec4fc1 | 2022-06-13 19:32:32 +0100 | [diff] [blame] | 29 | namespace { |
| 30 | |
| 31 | // This function implements the PDEP instruction in x64 as a loop. |
| 32 | // See https://www.felixcloutier.com/x86/pdep for details on what PDEP does. |
| 33 | // |
| 34 | // Unfortunately, as we're emulating this in software, it scales with the number |
| 35 | // of set bits in |mask| rather than being a constant time instruction: |
| 36 | // therefore, this should be avoided where real instructions are available. |
| 37 | uint64_t PdepSlow(uint64_t word, uint64_t mask) { |
| 38 | if (word == 0 || mask == std::numeric_limits<uint64_t>::max()) |
| 39 | return word; |
| 40 | |
| 41 | // This algorithm is for calculating PDEP was found to be the fastest "simple" |
| 42 | // one among those tested when writing this function. |
| 43 | uint64_t result = 0; |
| 44 | for (uint64_t bb = 1; mask; bb += bb) { |
Lalit Maganti | 735f2a2 | 2022-10-26 13:34:50 +0100 | [diff] [blame] | 45 | if (word & bb) { |
| 46 | // MSVC doesn't like -mask so work around this by doing 0 - mask. |
| 47 | result |= mask & (0ull - mask); |
| 48 | } |
Lalit Maganti | 5ec4fc1 | 2022-06-13 19:32:32 +0100 | [diff] [blame] | 49 | mask &= mask - 1; |
| 50 | } |
| 51 | return result; |
| 52 | } |
| 53 | |
Lalit Maganti | 25a4534 | 2022-06-14 14:56:48 +0100 | [diff] [blame] | 54 | // See |PdepSlow| for information on PDEP. |
| 55 | uint64_t Pdep(uint64_t word, uint64_t mask) { |
| 56 | #if PERFETTO_BUILDFLAG(PERFETTO_X64_CPU_OPT) |
| 57 | base::ignore_result(PdepSlow); |
| 58 | return _pdep_u64(word, mask); |
| 59 | #else |
| 60 | return PdepSlow(word, mask); |
| 61 | #endif |
| 62 | } |
| 63 | |
Lalit Maganti | 5ec4fc1 | 2022-06-13 19:32:32 +0100 | [diff] [blame] | 64 | } // namespace |
Lalit Maganti | cf39caa | 2019-08-21 14:00:08 -0700 | [diff] [blame] | 65 | |
Lalit Maganti | 8aee7f0 | 2019-09-26 15:57:46 +0100 | [diff] [blame] | 66 | BitVector::BitVector() = default; |
Lalit Maganti | cf39caa | 2019-08-21 14:00:08 -0700 | [diff] [blame] | 67 | |
Lalit Maganti | 1872e13 | 2019-10-22 11:59:46 +0100 | [diff] [blame] | 68 | BitVector::BitVector(std::initializer_list<bool> init) { |
| 69 | for (bool x : init) { |
| 70 | if (x) { |
| 71 | AppendTrue(); |
| 72 | } else { |
| 73 | AppendFalse(); |
| 74 | } |
| 75 | } |
| 76 | } |
| 77 | |
Lalit Maganti | 8aee7f0 | 2019-09-26 15:57:46 +0100 | [diff] [blame] | 78 | BitVector::BitVector(uint32_t count, bool value) { |
| 79 | Resize(count, value); |
| 80 | } |
| 81 | |
Anna Mayzner | 7c24af6 | 2023-04-14 15:26:07 +0000 | [diff] [blame] | 82 | BitVector::BitVector(std::vector<uint64_t> words, |
Lalit Maganti | 8aee7f0 | 2019-09-26 15:57:46 +0100 | [diff] [blame] | 83 | std::vector<uint32_t> counts, |
| 84 | uint32_t size) |
Anna Mayzner | 7c24af6 | 2023-04-14 15:26:07 +0000 | [diff] [blame] | 85 | : size_(size), counts_(std::move(counts)), words_(std::move(words)) { |
Lalit Maganti | d59b38e | 2023-06-26 16:37:48 +0100 | [diff] [blame] | 86 | PERFETTO_CHECK(words_.size() % Block::kWords == 0); |
Anna Mayzner | 7c24af6 | 2023-04-14 15:26:07 +0000 | [diff] [blame] | 87 | } |
Lalit Maganti | cf39caa | 2019-08-21 14:00:08 -0700 | [diff] [blame] | 88 | |
Anna Mayzner | fbee319 | 2023-04-26 12:50:47 +0000 | [diff] [blame] | 89 | void BitVector::Resize(uint32_t new_size, bool filler) { |
| 90 | uint32_t old_size = size_; |
| 91 | if (new_size == old_size) |
| 92 | return; |
| 93 | |
| 94 | // Empty bitvectors should be memory efficient so we don't keep any data |
| 95 | // around in the bitvector. |
| 96 | if (new_size == 0) { |
| 97 | words_.clear(); |
| 98 | counts_.clear(); |
| 99 | size_ = 0; |
| 100 | return; |
| 101 | } |
| 102 | |
| 103 | // Compute the address of the new last bit in the bitvector. |
| 104 | Address last_addr = IndexToAddress(new_size - 1); |
| 105 | uint32_t old_blocks_size = static_cast<uint32_t>(counts_.size()); |
| 106 | uint32_t new_blocks_size = last_addr.block_idx + 1; |
| 107 | |
| 108 | // Resize the block and count vectors to have the correct number of entries. |
| 109 | words_.resize(Block::kWords * new_blocks_size); |
| 110 | counts_.resize(new_blocks_size); |
| 111 | |
| 112 | if (new_size > old_size) { |
| 113 | if (filler) { |
| 114 | // If the new space should be filled with ones, then set all the bits |
| 115 | // between the address of the old size and the new last address. |
| 116 | const Address& start = IndexToAddress(old_size); |
| 117 | Set(start, last_addr); |
| 118 | |
| 119 | // We then need to update the counts vector to match the changes we |
| 120 | // made to the blocks. |
| 121 | |
| 122 | // We start by adding the bits we set in the first block to the |
| 123 | // cummulative count before the range we changed. |
| 124 | Address end_of_block = {start.block_idx, |
| 125 | {Block::kWords - 1, BitWord::kBits - 1}}; |
| 126 | uint32_t count_in_block_after_end = |
| 127 | AddressToIndex(end_of_block) - AddressToIndex(start) + 1; |
| 128 | uint32_t set_count = CountSetBits() + count_in_block_after_end; |
| 129 | |
| 130 | for (uint32_t i = start.block_idx + 1; i <= last_addr.block_idx; ++i) { |
| 131 | // Set the count to the cummulative count so far. |
| 132 | counts_[i] = set_count; |
| 133 | |
| 134 | // Add a full block of set bits to the count. |
| 135 | set_count += Block::kBits; |
| 136 | } |
| 137 | } else { |
| 138 | // If the newly added bits are false, we just need to update the |
| 139 | // counts vector with the current size of the bitvector for all |
| 140 | // the newly added blocks. |
| 141 | if (new_blocks_size > old_blocks_size) { |
| 142 | uint32_t count = CountSetBits(); |
| 143 | for (uint32_t i = old_blocks_size; i < new_blocks_size; ++i) { |
| 144 | counts_[i] = count; |
| 145 | } |
| 146 | } |
| 147 | } |
| 148 | } else { |
| 149 | // Throw away all the bits after the new last bit. We do this to make |
| 150 | // future lookup, append and resize operations not have to worrying about |
| 151 | // trailing garbage bits in the last block. |
| 152 | BlockFromIndex(last_addr.block_idx).ClearAfter(last_addr.block_offset); |
| 153 | } |
| 154 | |
| 155 | // Actually update the size. |
| 156 | size_ = new_size; |
| 157 | } |
| 158 | |
Lalit Maganti | cf39caa | 2019-08-21 14:00:08 -0700 | [diff] [blame] | 159 | BitVector BitVector::Copy() const { |
Anna Mayzner | 7c24af6 | 2023-04-14 15:26:07 +0000 | [diff] [blame] | 160 | return BitVector(words_, counts_, size_); |
Lalit Maganti | cf39caa | 2019-08-21 14:00:08 -0700 | [diff] [blame] | 161 | } |
| 162 | |
Lalit Maganti | 7ed308f | 2019-10-17 11:38:24 +0100 | [diff] [blame] | 163 | BitVector::AllBitsIterator BitVector::IterateAllBits() const { |
| 164 | return AllBitsIterator(this); |
| 165 | } |
| 166 | |
Lalit Maganti | 8014084 | 2019-10-17 18:15:57 +0100 | [diff] [blame] | 167 | BitVector::SetBitsIterator BitVector::IterateSetBits() const { |
| 168 | return SetBitsIterator(this); |
| 169 | } |
| 170 | |
Anna Mayzner | 96f359e | 2023-06-06 17:00:24 +0000 | [diff] [blame] | 171 | void BitVector::Not() { |
| 172 | for (uint32_t i = 0; i < words_.size(); ++i) { |
| 173 | BitWord(&words_[i]).Not(); |
Anna Mayzner | fbee319 | 2023-04-26 12:50:47 +0000 | [diff] [blame] | 174 | } |
| 175 | |
Anna Mayzner | 96f359e | 2023-06-06 17:00:24 +0000 | [diff] [blame] | 176 | for (uint32_t i = 1; i < counts_.size(); ++i) { |
| 177 | counts_[i] = kBitsInBlock * i - counts_[i]; |
Anna Mayzner | fbee319 | 2023-04-26 12:50:47 +0000 | [diff] [blame] | 178 | } |
Anna Mayzner | fbee319 | 2023-04-26 12:50:47 +0000 | [diff] [blame] | 179 | } |
| 180 | |
Anna Mayzner | 525110e | 2023-06-04 13:17:57 +0000 | [diff] [blame] | 181 | void BitVector::Or(const BitVector& sec) { |
Anna Mayzner | aba48f1 | 2023-06-01 13:44:18 +0000 | [diff] [blame] | 182 | PERFETTO_CHECK(size_ == sec.size()); |
| 183 | for (uint32_t i = 0; i < words_.size(); ++i) { |
| 184 | BitWord(&words_[i]).Or(sec.words_[i]); |
| 185 | } |
| 186 | |
| 187 | for (uint32_t i = 1; i < counts_.size(); ++i) { |
| 188 | counts_[i] = counts_[i - 1] + |
| 189 | ConstBlock(&words_[Block::kWords * (i - 1)]).CountSetBits(); |
| 190 | } |
| 191 | } |
| 192 | |
Anna Mayzner | 525110e | 2023-06-04 13:17:57 +0000 | [diff] [blame] | 193 | void BitVector::And(const BitVector& sec) { |
| 194 | Resize(std::min(size_, sec.size_)); |
| 195 | for (uint32_t i = 0; i < words_.size(); ++i) { |
| 196 | BitWord(&words_[i]).And(sec.words_[i]); |
| 197 | } |
| 198 | |
| 199 | for (uint32_t i = 1; i < counts_.size(); ++i) { |
| 200 | counts_[i] = counts_[i - 1] + |
| 201 | ConstBlock(&words_[Block::kWords * (i - 1)]).CountSetBits(); |
| 202 | } |
| 203 | } |
| 204 | |
Lalit Maganti | 5ec4fc1 | 2022-06-13 19:32:32 +0100 | [diff] [blame] | 205 | void BitVector::UpdateSetBits(const BitVector& update) { |
Anna Mayzner | fbee319 | 2023-04-26 12:50:47 +0000 | [diff] [blame] | 206 | if (update.CountSetBits() == 0 || CountSetBits() == 0) { |
| 207 | *this = BitVector(); |
| 208 | return; |
| 209 | } |
Lalit Maganti | 5ec4fc1 | 2022-06-13 19:32:32 +0100 | [diff] [blame] | 210 | PERFETTO_DCHECK(update.size() <= CountSetBits()); |
Lalit Maganti | 7ed308f | 2019-10-17 11:38:24 +0100 | [diff] [blame] | 211 | |
Lalit Maganti | 5ec4fc1 | 2022-06-13 19:32:32 +0100 | [diff] [blame] | 212 | // Get the start and end ptrs for the current bitvector. |
| 213 | // Safe because of the static_assert above. |
Anna Mayzner | fbee319 | 2023-04-26 12:50:47 +0000 | [diff] [blame] | 214 | uint64_t* ptr = words_.data(); |
| 215 | const uint64_t* ptr_end = ptr + WordCount(size()); |
Lalit Maganti | 5ec4fc1 | 2022-06-13 19:32:32 +0100 | [diff] [blame] | 216 | |
Anna Mayzner | fbee319 | 2023-04-26 12:50:47 +0000 | [diff] [blame] | 217 | // Get the start and end ptrs for the update bitvector. |
Lalit Maganti | 5ec4fc1 | 2022-06-13 19:32:32 +0100 | [diff] [blame] | 218 | // Safe because of the static_assert above. |
Anna Mayzner | fbee319 | 2023-04-26 12:50:47 +0000 | [diff] [blame] | 219 | const uint64_t* update_ptr = update.words_.data(); |
| 220 | const uint64_t* update_ptr_end = update_ptr + WordCount(update.size()); |
Lalit Maganti | 5ec4fc1 | 2022-06-13 19:32:32 +0100 | [diff] [blame] | 221 | |
| 222 | // |update_unused_bits| contains |unused_bits_count| bits at the bottom |
Anna Mayzner | fbee319 | 2023-04-26 12:50:47 +0000 | [diff] [blame] | 223 | // which indicates how the next |unused_bits_count| set bits in |this| |
Lalit Maganti | 5ec4fc1 | 2022-06-13 19:32:32 +0100 | [diff] [blame] | 224 | // should be changed. This is necessary because word boundaries in |this| will |
| 225 | // almost always *not* match the word boundaries in |update|. |
| 226 | uint64_t update_unused_bits = 0; |
| 227 | uint8_t unused_bits_count = 0; |
| 228 | |
Anna Mayzner | ebabe0a | 2023-04-25 07:07:07 +0000 | [diff] [blame] | 229 | // The basic premise of this loop is, for each word in |this| we find |
Lalit Maganti | 5ec4fc1 | 2022-06-13 19:32:32 +0100 | [diff] [blame] | 230 | // enough bits from |update| to cover every set bit in the word. We then use |
| 231 | // the PDEP x64 instruction (or equivalent instructions/software emulation) to |
| 232 | // update the word and store it back in |this|. |
| 233 | for (; ptr != ptr_end; ++ptr) { |
| 234 | uint64_t current = *ptr; |
| 235 | |
| 236 | // If the current value is all zeros, there's nothing to update. |
| 237 | if (PERFETTO_UNLIKELY(current == 0)) |
| 238 | continue; |
| 239 | |
| 240 | uint8_t popcount = static_cast<uint8_t>(PERFETTO_POPCOUNT(current)); |
| 241 | PERFETTO_DCHECK(popcount >= 1); |
| 242 | |
| 243 | // Check if we have enough unused bits from the previous iteration - if so, |
| 244 | // we don't need to read anything from |update|. |
| 245 | uint64_t update_for_current = update_unused_bits; |
| 246 | if (unused_bits_count >= popcount) { |
| 247 | // We have enough bits so just do the accounting to not reuse these bits |
| 248 | // for the future. |
| 249 | unused_bits_count -= popcount; |
| 250 | update_unused_bits = popcount == 64 ? 0 : update_unused_bits >> popcount; |
| 251 | } else { |
| 252 | // We don't have enough bits so we need to read the next word of bits from |
| 253 | // |current|. |
| 254 | uint64_t next_update = update_ptr == update_ptr_end ? 0 : *update_ptr++; |
| 255 | |
| 256 | // Bitwise or |64 - unused_bits_count| bits from the bottom of |
| 257 | // |next_update| to the top of |update_for_current|. Only |popcount| bits |
| 258 | // will actually be used by PDEP but masking off the unused bits takes |
| 259 | // *more* instructions than not doing anything. |
| 260 | update_for_current |= next_update << unused_bits_count; |
| 261 | |
| 262 | // PDEP will use |popcount| bits from update: this means it will use |
| 263 | // |unused_bits_count| from |update_for_current| and |popcount - |
| 264 | // unused_bits_count| from |next_update| |
| 265 | uint8_t used_next_bits = popcount - unused_bits_count; |
| 266 | |
| 267 | // Shift off any bits which will be used by current and store the |
| 268 | // remainder for use in the next iteration. |
| 269 | update_unused_bits = |
| 270 | used_next_bits == 64 ? 0 : next_update >> used_next_bits; |
| 271 | unused_bits_count = 64 - used_next_bits; |
| 272 | } |
| 273 | |
| 274 | // We should never end up with more than 64 bits available. |
| 275 | PERFETTO_CHECK(unused_bits_count <= 64); |
| 276 | |
| 277 | // PDEP precisely captures the notion of "updating set bits" for a single |
| 278 | // word. |
Lalit Maganti | 25a4534 | 2022-06-14 14:56:48 +0100 | [diff] [blame] | 279 | *ptr = Pdep(update_for_current, current); |
Lalit Maganti | 5ec4fc1 | 2022-06-13 19:32:32 +0100 | [diff] [blame] | 280 | } |
| 281 | |
| 282 | // We shouldn't have any non-zero unused bits and we should have consumed the |
| 283 | // whole |update| bitvector. Note that we cannot really say anything about |
| 284 | // |unused_bits_count| because it's possible for the above algorithm to use |
| 285 | // some bits which are "past the end" of |update|; as long as these bits are |
| 286 | // zero, it meets the pre-condition of this function. |
| 287 | PERFETTO_DCHECK(update_unused_bits == 0); |
| 288 | PERFETTO_DCHECK(update_ptr == update_ptr_end); |
| 289 | |
| 290 | for (uint32_t i = 0; i < counts_.size() - 1; ++i) { |
Anna Mayzner | 7c24af6 | 2023-04-14 15:26:07 +0000 | [diff] [blame] | 291 | counts_[i + 1] = counts_[i] + ConstBlockFromIndex(i).CountSetBits(); |
Lalit Maganti | 7ed308f | 2019-10-17 11:38:24 +0100 | [diff] [blame] | 292 | } |
Lalit Maganti | e7d90e0 | 2019-10-18 17:02:15 +0100 | [diff] [blame] | 293 | |
| 294 | // After the loop, we should have precisely the same number of bits |
Lalit Maganti | 5ec4fc1 | 2022-06-13 19:32:32 +0100 | [diff] [blame] | 295 | // set as |update|. |
| 296 | PERFETTO_DCHECK(update.CountSetBits() == CountSetBits()); |
Lalit Maganti | 7ed308f | 2019-10-17 11:38:24 +0100 | [diff] [blame] | 297 | } |
| 298 | |
Anna Mayzner | ebabe0a | 2023-04-25 07:07:07 +0000 | [diff] [blame] | 299 | BitVector BitVector::IntersectRange(uint32_t range_start, |
| 300 | uint32_t range_end) const { |
Anna Mayzner | ebabe0a | 2023-04-25 07:07:07 +0000 | [diff] [blame] | 301 | // We should skip all bits until the index of first set bit bigger than |
| 302 | // |range_start|. |
Anna Mayzner | fbee319 | 2023-04-26 12:50:47 +0000 | [diff] [blame] | 303 | uint32_t end_idx = std::min(range_end, size()); |
Anna Mayzner | ebabe0a | 2023-04-25 07:07:07 +0000 | [diff] [blame] | 304 | |
Anna Mayzner | 525110e | 2023-06-04 13:17:57 +0000 | [diff] [blame] | 305 | if (range_start >= end_idx) |
Anna Mayzner | ebabe0a | 2023-04-25 07:07:07 +0000 | [diff] [blame] | 306 | return BitVector(); |
| 307 | |
Lalit Maganti | 2c05ea8 | 2023-06-26 17:36:09 +0100 | [diff] [blame] | 308 | Builder builder(end_idx, range_start); |
Anna Mayzner | ebabe0a | 2023-04-25 07:07:07 +0000 | [diff] [blame] | 309 | uint32_t front_bits = builder.BitsUntilWordBoundaryOrFull(); |
Anna Mayzner | 525110e | 2023-06-04 13:17:57 +0000 | [diff] [blame] | 310 | uint32_t cur_index = range_start; |
Anna Mayzner | ebabe0a | 2023-04-25 07:07:07 +0000 | [diff] [blame] | 311 | for (uint32_t i = 0; i < front_bits; ++i, ++cur_index) { |
| 312 | builder.Append(IsSet(cur_index)); |
| 313 | } |
| 314 | |
Anna Mayzner | fbee319 | 2023-04-26 12:50:47 +0000 | [diff] [blame] | 315 | PERFETTO_DCHECK(cur_index == end_idx || cur_index % BitWord::kBits == 0); |
Anna Mayzner | ebabe0a | 2023-04-25 07:07:07 +0000 | [diff] [blame] | 316 | uint32_t cur_words = cur_index / BitWord::kBits; |
| 317 | uint32_t full_words = builder.BitsInCompleteWordsUntilFull() / BitWord::kBits; |
| 318 | uint32_t total_full_words = cur_words + full_words; |
| 319 | for (; cur_words < total_full_words; ++cur_words) { |
| 320 | builder.AppendWord(words_[cur_words]); |
| 321 | } |
| 322 | |
| 323 | uint32_t last_bits = builder.BitsUntilFull(); |
| 324 | cur_index += full_words * BitWord::kBits; |
| 325 | for (uint32_t i = 0; i < last_bits; ++i, ++cur_index) { |
| 326 | builder.Append(IsSet(cur_index)); |
| 327 | } |
| 328 | |
| 329 | return std::move(builder).Build(); |
| 330 | } |
| 331 | |
Lalit Maganti | cf39caa | 2019-08-21 14:00:08 -0700 | [diff] [blame] | 332 | } // namespace trace_processor |
| 333 | } // namespace perfetto |