blob: dc5de84800bceca7f7e40b4887f2333446d40493 [file] [log] [blame]
Lalit Maganticf39caa2019-08-21 14:00:08 -07001/*
2 * Copyright (C) 2019 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
Lalit Maganti2aa88582019-12-17 17:06:49 +000017#include "src/trace_processor/containers/bit_vector.h"
Lalit Maganticf39caa2019-08-21 14:00:08 -070018
Lalit Maganti0fc85432022-06-17 15:26:22 +010019#include <limits>
20
Lalit Maganti2aa88582019-12-17 17:06:49 +000021#include "src/trace_processor/containers/bit_vector_iterators.h"
Lalit Maganti7ed308f2019-10-17 11:38:24 +010022
Lalit Maganti25a45342022-06-14 14:56:48 +010023#if PERFETTO_BUILDFLAG(PERFETTO_X64_CPU_OPT)
24#include <immintrin.h>
25#endif
26
Lalit Maganticf39caa2019-08-21 14:00:08 -070027namespace perfetto {
28namespace trace_processor {
Lalit Maganti5ec4fc12022-06-13 19:32:32 +010029namespace {
30
31// This function implements the PDEP instruction in x64 as a loop.
32// See https://www.felixcloutier.com/x86/pdep for details on what PDEP does.
33//
34// Unfortunately, as we're emulating this in software, it scales with the number
35// of set bits in |mask| rather than being a constant time instruction:
36// therefore, this should be avoided where real instructions are available.
37uint64_t PdepSlow(uint64_t word, uint64_t mask) {
38 if (word == 0 || mask == std::numeric_limits<uint64_t>::max())
39 return word;
40
41 // This algorithm is for calculating PDEP was found to be the fastest "simple"
42 // one among those tested when writing this function.
43 uint64_t result = 0;
44 for (uint64_t bb = 1; mask; bb += bb) {
Lalit Maganti735f2a22022-10-26 13:34:50 +010045 if (word & bb) {
46 // MSVC doesn't like -mask so work around this by doing 0 - mask.
47 result |= mask & (0ull - mask);
48 }
Lalit Maganti5ec4fc12022-06-13 19:32:32 +010049 mask &= mask - 1;
50 }
51 return result;
52}
53
Lalit Maganti25a45342022-06-14 14:56:48 +010054// See |PdepSlow| for information on PDEP.
55uint64_t Pdep(uint64_t word, uint64_t mask) {
56#if PERFETTO_BUILDFLAG(PERFETTO_X64_CPU_OPT)
57 base::ignore_result(PdepSlow);
58 return _pdep_u64(word, mask);
59#else
60 return PdepSlow(word, mask);
61#endif
62}
63
Lalit Maganti5ec4fc12022-06-13 19:32:32 +010064} // namespace
Lalit Maganticf39caa2019-08-21 14:00:08 -070065
Lalit Maganti8aee7f02019-09-26 15:57:46 +010066BitVector::BitVector() = default;
Lalit Maganticf39caa2019-08-21 14:00:08 -070067
Lalit Maganti1872e132019-10-22 11:59:46 +010068BitVector::BitVector(std::initializer_list<bool> init) {
69 for (bool x : init) {
70 if (x) {
71 AppendTrue();
72 } else {
73 AppendFalse();
74 }
75 }
76}
77
Lalit Maganti8aee7f02019-09-26 15:57:46 +010078BitVector::BitVector(uint32_t count, bool value) {
79 Resize(count, value);
80}
81
Anna Mayzner7c24af62023-04-14 15:26:07 +000082BitVector::BitVector(std::vector<uint64_t> words,
Lalit Maganti8aee7f02019-09-26 15:57:46 +010083 std::vector<uint32_t> counts,
84 uint32_t size)
Anna Mayzner7c24af62023-04-14 15:26:07 +000085 : size_(size), counts_(std::move(counts)), words_(std::move(words)) {
Lalit Magantid59b38e2023-06-26 16:37:48 +010086 PERFETTO_CHECK(words_.size() % Block::kWords == 0);
Anna Mayzner7c24af62023-04-14 15:26:07 +000087}
Lalit Maganticf39caa2019-08-21 14:00:08 -070088
Anna Mayznerfbee3192023-04-26 12:50:47 +000089void BitVector::Resize(uint32_t new_size, bool filler) {
90 uint32_t old_size = size_;
91 if (new_size == old_size)
92 return;
93
94 // Empty bitvectors should be memory efficient so we don't keep any data
95 // around in the bitvector.
96 if (new_size == 0) {
97 words_.clear();
98 counts_.clear();
99 size_ = 0;
100 return;
101 }
102
103 // Compute the address of the new last bit in the bitvector.
104 Address last_addr = IndexToAddress(new_size - 1);
105 uint32_t old_blocks_size = static_cast<uint32_t>(counts_.size());
106 uint32_t new_blocks_size = last_addr.block_idx + 1;
107
108 // Resize the block and count vectors to have the correct number of entries.
109 words_.resize(Block::kWords * new_blocks_size);
110 counts_.resize(new_blocks_size);
111
112 if (new_size > old_size) {
113 if (filler) {
114 // If the new space should be filled with ones, then set all the bits
115 // between the address of the old size and the new last address.
116 const Address& start = IndexToAddress(old_size);
117 Set(start, last_addr);
118
119 // We then need to update the counts vector to match the changes we
120 // made to the blocks.
121
122 // We start by adding the bits we set in the first block to the
123 // cummulative count before the range we changed.
124 Address end_of_block = {start.block_idx,
125 {Block::kWords - 1, BitWord::kBits - 1}};
126 uint32_t count_in_block_after_end =
127 AddressToIndex(end_of_block) - AddressToIndex(start) + 1;
128 uint32_t set_count = CountSetBits() + count_in_block_after_end;
129
130 for (uint32_t i = start.block_idx + 1; i <= last_addr.block_idx; ++i) {
131 // Set the count to the cummulative count so far.
132 counts_[i] = set_count;
133
134 // Add a full block of set bits to the count.
135 set_count += Block::kBits;
136 }
137 } else {
138 // If the newly added bits are false, we just need to update the
139 // counts vector with the current size of the bitvector for all
140 // the newly added blocks.
141 if (new_blocks_size > old_blocks_size) {
142 uint32_t count = CountSetBits();
143 for (uint32_t i = old_blocks_size; i < new_blocks_size; ++i) {
144 counts_[i] = count;
145 }
146 }
147 }
148 } else {
149 // Throw away all the bits after the new last bit. We do this to make
150 // future lookup, append and resize operations not have to worrying about
151 // trailing garbage bits in the last block.
152 BlockFromIndex(last_addr.block_idx).ClearAfter(last_addr.block_offset);
153 }
154
155 // Actually update the size.
156 size_ = new_size;
157}
158
Lalit Maganticf39caa2019-08-21 14:00:08 -0700159BitVector BitVector::Copy() const {
Anna Mayzner7c24af62023-04-14 15:26:07 +0000160 return BitVector(words_, counts_, size_);
Lalit Maganticf39caa2019-08-21 14:00:08 -0700161}
162
Lalit Maganti7ed308f2019-10-17 11:38:24 +0100163BitVector::AllBitsIterator BitVector::IterateAllBits() const {
164 return AllBitsIterator(this);
165}
166
Lalit Maganti80140842019-10-17 18:15:57 +0100167BitVector::SetBitsIterator BitVector::IterateSetBits() const {
168 return SetBitsIterator(this);
169}
170
Anna Mayzner96f359e2023-06-06 17:00:24 +0000171void BitVector::Not() {
172 for (uint32_t i = 0; i < words_.size(); ++i) {
173 BitWord(&words_[i]).Not();
Anna Mayznerfbee3192023-04-26 12:50:47 +0000174 }
175
Anna Mayzner96f359e2023-06-06 17:00:24 +0000176 for (uint32_t i = 1; i < counts_.size(); ++i) {
177 counts_[i] = kBitsInBlock * i - counts_[i];
Anna Mayznerfbee3192023-04-26 12:50:47 +0000178 }
Anna Mayznerfbee3192023-04-26 12:50:47 +0000179}
180
Anna Mayzner525110e2023-06-04 13:17:57 +0000181void BitVector::Or(const BitVector& sec) {
Anna Mayzneraba48f12023-06-01 13:44:18 +0000182 PERFETTO_CHECK(size_ == sec.size());
183 for (uint32_t i = 0; i < words_.size(); ++i) {
184 BitWord(&words_[i]).Or(sec.words_[i]);
185 }
186
187 for (uint32_t i = 1; i < counts_.size(); ++i) {
188 counts_[i] = counts_[i - 1] +
189 ConstBlock(&words_[Block::kWords * (i - 1)]).CountSetBits();
190 }
191}
192
Anna Mayzner525110e2023-06-04 13:17:57 +0000193void BitVector::And(const BitVector& sec) {
194 Resize(std::min(size_, sec.size_));
195 for (uint32_t i = 0; i < words_.size(); ++i) {
196 BitWord(&words_[i]).And(sec.words_[i]);
197 }
198
199 for (uint32_t i = 1; i < counts_.size(); ++i) {
200 counts_[i] = counts_[i - 1] +
201 ConstBlock(&words_[Block::kWords * (i - 1)]).CountSetBits();
202 }
203}
204
Lalit Maganti5ec4fc12022-06-13 19:32:32 +0100205void BitVector::UpdateSetBits(const BitVector& update) {
Anna Mayznerfbee3192023-04-26 12:50:47 +0000206 if (update.CountSetBits() == 0 || CountSetBits() == 0) {
207 *this = BitVector();
208 return;
209 }
Lalit Maganti5ec4fc12022-06-13 19:32:32 +0100210 PERFETTO_DCHECK(update.size() <= CountSetBits());
Lalit Maganti7ed308f2019-10-17 11:38:24 +0100211
Lalit Maganti5ec4fc12022-06-13 19:32:32 +0100212 // Get the start and end ptrs for the current bitvector.
213 // Safe because of the static_assert above.
Anna Mayznerfbee3192023-04-26 12:50:47 +0000214 uint64_t* ptr = words_.data();
215 const uint64_t* ptr_end = ptr + WordCount(size());
Lalit Maganti5ec4fc12022-06-13 19:32:32 +0100216
Anna Mayznerfbee3192023-04-26 12:50:47 +0000217 // Get the start and end ptrs for the update bitvector.
Lalit Maganti5ec4fc12022-06-13 19:32:32 +0100218 // Safe because of the static_assert above.
Anna Mayznerfbee3192023-04-26 12:50:47 +0000219 const uint64_t* update_ptr = update.words_.data();
220 const uint64_t* update_ptr_end = update_ptr + WordCount(update.size());
Lalit Maganti5ec4fc12022-06-13 19:32:32 +0100221
222 // |update_unused_bits| contains |unused_bits_count| bits at the bottom
Anna Mayznerfbee3192023-04-26 12:50:47 +0000223 // which indicates how the next |unused_bits_count| set bits in |this|
Lalit Maganti5ec4fc12022-06-13 19:32:32 +0100224 // should be changed. This is necessary because word boundaries in |this| will
225 // almost always *not* match the word boundaries in |update|.
226 uint64_t update_unused_bits = 0;
227 uint8_t unused_bits_count = 0;
228
Anna Mayznerebabe0a2023-04-25 07:07:07 +0000229 // The basic premise of this loop is, for each word in |this| we find
Lalit Maganti5ec4fc12022-06-13 19:32:32 +0100230 // enough bits from |update| to cover every set bit in the word. We then use
231 // the PDEP x64 instruction (or equivalent instructions/software emulation) to
232 // update the word and store it back in |this|.
233 for (; ptr != ptr_end; ++ptr) {
234 uint64_t current = *ptr;
235
236 // If the current value is all zeros, there's nothing to update.
237 if (PERFETTO_UNLIKELY(current == 0))
238 continue;
239
240 uint8_t popcount = static_cast<uint8_t>(PERFETTO_POPCOUNT(current));
241 PERFETTO_DCHECK(popcount >= 1);
242
243 // Check if we have enough unused bits from the previous iteration - if so,
244 // we don't need to read anything from |update|.
245 uint64_t update_for_current = update_unused_bits;
246 if (unused_bits_count >= popcount) {
247 // We have enough bits so just do the accounting to not reuse these bits
248 // for the future.
249 unused_bits_count -= popcount;
250 update_unused_bits = popcount == 64 ? 0 : update_unused_bits >> popcount;
251 } else {
252 // We don't have enough bits so we need to read the next word of bits from
253 // |current|.
254 uint64_t next_update = update_ptr == update_ptr_end ? 0 : *update_ptr++;
255
256 // Bitwise or |64 - unused_bits_count| bits from the bottom of
257 // |next_update| to the top of |update_for_current|. Only |popcount| bits
258 // will actually be used by PDEP but masking off the unused bits takes
259 // *more* instructions than not doing anything.
260 update_for_current |= next_update << unused_bits_count;
261
262 // PDEP will use |popcount| bits from update: this means it will use
263 // |unused_bits_count| from |update_for_current| and |popcount -
264 // unused_bits_count| from |next_update|
265 uint8_t used_next_bits = popcount - unused_bits_count;
266
267 // Shift off any bits which will be used by current and store the
268 // remainder for use in the next iteration.
269 update_unused_bits =
270 used_next_bits == 64 ? 0 : next_update >> used_next_bits;
271 unused_bits_count = 64 - used_next_bits;
272 }
273
274 // We should never end up with more than 64 bits available.
275 PERFETTO_CHECK(unused_bits_count <= 64);
276
277 // PDEP precisely captures the notion of "updating set bits" for a single
278 // word.
Lalit Maganti25a45342022-06-14 14:56:48 +0100279 *ptr = Pdep(update_for_current, current);
Lalit Maganti5ec4fc12022-06-13 19:32:32 +0100280 }
281
282 // We shouldn't have any non-zero unused bits and we should have consumed the
283 // whole |update| bitvector. Note that we cannot really say anything about
284 // |unused_bits_count| because it's possible for the above algorithm to use
285 // some bits which are "past the end" of |update|; as long as these bits are
286 // zero, it meets the pre-condition of this function.
287 PERFETTO_DCHECK(update_unused_bits == 0);
288 PERFETTO_DCHECK(update_ptr == update_ptr_end);
289
290 for (uint32_t i = 0; i < counts_.size() - 1; ++i) {
Anna Mayzner7c24af62023-04-14 15:26:07 +0000291 counts_[i + 1] = counts_[i] + ConstBlockFromIndex(i).CountSetBits();
Lalit Maganti7ed308f2019-10-17 11:38:24 +0100292 }
Lalit Magantie7d90e02019-10-18 17:02:15 +0100293
294 // After the loop, we should have precisely the same number of bits
Lalit Maganti5ec4fc12022-06-13 19:32:32 +0100295 // set as |update|.
296 PERFETTO_DCHECK(update.CountSetBits() == CountSetBits());
Lalit Maganti7ed308f2019-10-17 11:38:24 +0100297}
298
Anna Mayznerebabe0a2023-04-25 07:07:07 +0000299BitVector BitVector::IntersectRange(uint32_t range_start,
300 uint32_t range_end) const {
Anna Mayznerebabe0a2023-04-25 07:07:07 +0000301 // We should skip all bits until the index of first set bit bigger than
302 // |range_start|.
Anna Mayznerfbee3192023-04-26 12:50:47 +0000303 uint32_t end_idx = std::min(range_end, size());
Anna Mayznerebabe0a2023-04-25 07:07:07 +0000304
Anna Mayzner525110e2023-06-04 13:17:57 +0000305 if (range_start >= end_idx)
Anna Mayznerebabe0a2023-04-25 07:07:07 +0000306 return BitVector();
307
Lalit Maganti2c05ea82023-06-26 17:36:09 +0100308 Builder builder(end_idx, range_start);
Anna Mayznerebabe0a2023-04-25 07:07:07 +0000309 uint32_t front_bits = builder.BitsUntilWordBoundaryOrFull();
Anna Mayzner525110e2023-06-04 13:17:57 +0000310 uint32_t cur_index = range_start;
Anna Mayznerebabe0a2023-04-25 07:07:07 +0000311 for (uint32_t i = 0; i < front_bits; ++i, ++cur_index) {
312 builder.Append(IsSet(cur_index));
313 }
314
Anna Mayznerfbee3192023-04-26 12:50:47 +0000315 PERFETTO_DCHECK(cur_index == end_idx || cur_index % BitWord::kBits == 0);
Anna Mayznerebabe0a2023-04-25 07:07:07 +0000316 uint32_t cur_words = cur_index / BitWord::kBits;
317 uint32_t full_words = builder.BitsInCompleteWordsUntilFull() / BitWord::kBits;
318 uint32_t total_full_words = cur_words + full_words;
319 for (; cur_words < total_full_words; ++cur_words) {
320 builder.AppendWord(words_[cur_words]);
321 }
322
323 uint32_t last_bits = builder.BitsUntilFull();
324 cur_index += full_words * BitWord::kBits;
325 for (uint32_t i = 0; i < last_bits; ++i, ++cur_index) {
326 builder.Append(IsSet(cur_index));
327 }
328
329 return std::move(builder).Build();
330}
331
Lalit Maganticf39caa2019-08-21 14:00:08 -0700332} // namespace trace_processor
333} // namespace perfetto