| /* |
| * Copyright (C) 2022 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #ifndef SRC_TRACE_PROCESSOR_UTIL_GLOB_H_ |
| #define SRC_TRACE_PROCESSOR_UTIL_GLOB_H_ |
| |
| #include <optional> |
| #include <string_view> |
| #include <vector> |
| |
| #include "perfetto/ext/base/small_vector.h" |
| #include "perfetto/ext/base/string_splitter.h" |
| #include "perfetto/ext/base/string_view.h" |
| |
| namespace perfetto { |
| namespace trace_processor { |
| namespace util { |
| |
| // Lightweight implementation of matching on UNIX glob patterns, maintaining |
| // compatibility of syntax and semantics used by SQLite. |
| // |
| // Usage: |
| // GlobMatcher matcher = GlobMatcher::FromPattern("*foo*"); |
| // for (auto string : strings) { |
| // if (matcher.Matches(string)) { |
| // <do something> |
| // } |
| // } |
| // |
| // This is a class instead of a free function to allow preprocessing the |
| // pattern (e.g. to compute Kleene star offsets). This can create big savings |
| // because trace processsor needs to match the same pattern on many strings |
| // when filtering tables. |
| // |
| // Implementation: |
| // The algorithm used in this class is similar to the "alternative" |
| // algorithm proposed in [1]. |
| // |
| // We preprocess the pattern (in the constructor) to split the pattern on *, |
| // accounting for character classes. This breaks the pattern in "segments": our |
| // name for the parts of the pattern between the stars. |
| // |
| // Then at match time, we go through each segment and check if it matches part |
| // of the string. The number of character matched defines the search start-point |
| // for the next segment. As described in [1], we don't need to do any |
| // backtracking which removes the exponential component of the algorithm and |
| // consequently simplifies the code. |
| // |
| // The subtle parts are: |
| // 1) the first and last segments - they need to be "anchored" to the |
| // beginning and end of the string respectively. If not, they fail the match |
| // straight away. |
| // 2) leading/trailing stars: they counteract the above point and "unanchor" |
| // the first and last segments respectively by allowing them to happen |
| // somewhere after/before the beginning/end. |
| // |
| // [1] https://research.swtch.com/glob |
| class GlobMatcher { |
| public: |
| GlobMatcher(GlobMatcher&&) = default; |
| GlobMatcher& operator=(GlobMatcher&&) = default; |
| |
| // Creates a glob matcher from a pattern. |
| static GlobMatcher FromPattern(base::StringView pattern_str) { |
| return GlobMatcher(std::move(pattern_str)); |
| } |
| |
| // Checks the provided string against the pattern and returns whether it |
| // matches. |
| bool Matches(base::StringView input) const; |
| |
| // Returns whether the comparison should really be an equality comparison. |
| bool IsEquality() { |
| return !leading_star_ && !trailing_star_ && segments_.size() <= 1; |
| } |
| |
| private: |
| // Represents a portion of the pattern in between two * characters. |
| struct Segment { |
| // The portion of the pattern in the segment. Note that this will not |
| // contain a free '*' (i.e. outside a character class). |
| base::StringView pattern; |
| |
| // The number of consumed characters in an input string if this segment |
| // matches. |
| uint32_t matched_chars; |
| }; |
| |
| // It would be very rare for a glob pattern to have more than 4 stars so |
| // reserve stack space for that many segments. |
| static constexpr uint32_t kMaxSegmentsOnStack = 4; |
| |
| explicit GlobMatcher(base::StringView pattern); |
| |
| GlobMatcher(const GlobMatcher&) = delete; |
| GlobMatcher& operator=(const GlobMatcher&) = delete; |
| |
| // Returns whether |input| starts with the pattern in |segment| following |
| // glob matching rules. |
| bool StartsWith(base::StringView input, const Segment& segment) const { |
| if (!contains_char_class_or_question_) { |
| return input.StartsWith(segment.pattern); |
| } |
| return StartsWithSlow(input, segment); |
| } |
| |
| // Returns whether |input| ends with the pattern in |segment| following |
| // glob matching rules. |
| bool EndsWith(base::StringView input, const Segment& segment) const { |
| if (!contains_char_class_or_question_) { |
| return input.EndsWith(segment.pattern); |
| } |
| // Ending with |segment| is the same as taking the substring of |in| |
| size_t start = input.size() - segment.matched_chars; |
| return StartsWithSlow(input.substr(start), segment); |
| } |
| |
| // Returns the index where |input| matches the pattern in |segment| |
| // following glob matching rules or base::StringView::npos, if no such index |
| // exists. |
| size_t Find(base::StringView input, |
| const Segment& segment, |
| size_t start) const { |
| if (!contains_char_class_or_question_) { |
| return input.find(segment.pattern, start); |
| } |
| for (uint32_t i = 0; i < input.size(); ++i) { |
| if (StartsWithSlow(input.substr(i), segment)) { |
| return i; |
| } |
| } |
| return base::StringView::npos; |
| } |
| |
| // Given a StringView starting at the boundary of a character class, returns |
| // a StringView containing only the parts inside the [] or base::StringView() |
| // if no character class exists. |
| static base::StringView ExtractCharacterClass(base::StringView input); |
| |
| // Matches |in| against the given character class. |
| static bool MatchesCharacterClass(char input, base::StringView char_class); |
| |
| bool StartsWithSlow(base::StringView input, const Segment& segment) const; |
| |
| // IMPORTANT: this should *not* be modified after the constructor as we store |
| // pointers to the data inside here. |
| // Note: this vector also allocates space for the null-terminator so is +1 |
| // the "traditional" size of the string. |
| std::vector<char> pattern_; |
| |
| // Chunks of the |pattern_| tokenized on '*'. See the class comment for more |
| // info. |
| base::SmallVector<Segment, kMaxSegmentsOnStack> segments_; |
| |
| bool leading_star_ = false; |
| bool trailing_star_ = false; |
| bool contains_char_class_or_question_ = false; |
| }; |
| |
| } // namespace util |
| } // namespace trace_processor |
| } // namespace perfetto |
| |
| #endif // SRC_TRACE_PROCESSOR_UTIL_GLOB_H_ |