|  | /* | 
|  | * Copyright (C) 2022 The Android Open Source Project | 
|  | * | 
|  | * Licensed under the Apache License, Version 2.0 (the "License"); | 
|  | * you may not use this file except in compliance with the License. | 
|  | * You may obtain a copy of the License at | 
|  | * | 
|  | *      http://www.apache.org/licenses/LICENSE-2.0 | 
|  | * | 
|  | * Unless required by applicable law or agreed to in writing, software | 
|  | * distributed under the License is distributed on an "AS IS" BASIS, | 
|  | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | * See the License for the specific language governing permissions and | 
|  | * limitations under the License. | 
|  | */ | 
|  |  | 
|  | #ifndef SRC_TRACE_PROCESSOR_UTIL_GLOB_H_ | 
|  | #define SRC_TRACE_PROCESSOR_UTIL_GLOB_H_ | 
|  |  | 
|  | #include <vector> | 
|  |  | 
|  | #include "perfetto/ext/base/optional.h" | 
|  | #include "perfetto/ext/base/small_vector.h" | 
|  | #include "perfetto/ext/base/string_splitter.h" | 
|  | #include "perfetto/ext/base/string_view.h" | 
|  |  | 
|  | namespace perfetto { | 
|  | namespace trace_processor { | 
|  | namespace util { | 
|  |  | 
|  | // Lightweight implementation of matching on UNIX glob patterns, maintaining | 
|  | // compatibility of syntax and semantics used by SQLite. | 
|  | // | 
|  | // Usage: | 
|  | //  GlobMatcher matcher = GlobMatcher::FromPattern("*foo*"); | 
|  | //  for (auto string : strings) { | 
|  | //    if (matcher.Matches(string)) { | 
|  | //      <do something> | 
|  | //    } | 
|  | //  } | 
|  | // | 
|  | // This is a class instead of a free function to allow preprocessing the | 
|  | // pattern (e.g. to compute Kleene star offsets). This can create big savings | 
|  | // because trace processsor needs to match the same pattern on many strings | 
|  | // when filtering tables. | 
|  | // | 
|  | // Implementation: | 
|  | // The algorithm used in this class is similar to the "alternative" | 
|  | // algorithm proposed in [1]. | 
|  | // | 
|  | // We preprocess the pattern (in the constructor) to split the pattern on *, | 
|  | // accounting for character classes. This breaks the pattern in "segments": our | 
|  | // name for the parts of the pattern between the stars. | 
|  | // | 
|  | // Then at match time, we go through each segment and check if it matches part | 
|  | // of the string. The number of character matched defines the search start-point | 
|  | // for the next segment. As described in [1], we don't need to do any | 
|  | // backtracking which removes the exponential component of the algorithm and | 
|  | // consequently simplifies the code. | 
|  | // | 
|  | // The subtle parts are: | 
|  | // 1) the first and last segments - they need to be "anchored" to the | 
|  | //    beginning and end of the string respectively. If not, they fail the match | 
|  | //    straight away. | 
|  | // 2) leading/trailing stars: they counteract the above point and "unanchor" | 
|  | //    the first and last segments respectively by allowing them to happen | 
|  | //    somewhere after/before the beginning/end. | 
|  | // | 
|  | // [1] https://research.swtch.com/glob | 
|  | class GlobMatcher { | 
|  | public: | 
|  | // Creates a glob matcher from a pattern. | 
|  | static GlobMatcher FromPattern(base::StringView pattern_str) { | 
|  | return GlobMatcher(std::move(pattern_str)); | 
|  | } | 
|  |  | 
|  | // Checks the provided string against the pattern and returns whether it | 
|  | // matches. | 
|  | bool Matches(base::StringView input); | 
|  |  | 
|  | private: | 
|  | // Represents a portion of the pattern in between two * characters. | 
|  | struct Segment { | 
|  | // The portion of the pattern in the segment. Note that this will not | 
|  | // contain a free '*' (i.e. outside a character class). | 
|  | base::StringView pattern; | 
|  |  | 
|  | // The number of consumed characters in an input string if this segment | 
|  | // matches. | 
|  | uint32_t matched_chars; | 
|  | }; | 
|  |  | 
|  | // It would be very rare for a glob pattern to have more than 4 stars so | 
|  | // reserve stack space for that many segments. | 
|  | static constexpr uint32_t kMaxSegmentsOnStack = 4; | 
|  |  | 
|  | explicit GlobMatcher(base::StringView pattern); | 
|  |  | 
|  | // Returns whether |input| starts with the pattern in |segment| following | 
|  | // glob matching rules. | 
|  | bool StartsWith(base::StringView input, const Segment& segment) { | 
|  | if (!contains_char_class_or_question_) { | 
|  | return input.StartsWith(segment.pattern); | 
|  | } | 
|  | return StartsWithSlow(input, segment); | 
|  | } | 
|  |  | 
|  | // Returns whether |input| ends with the pattern in |segment| following | 
|  | // glob matching rules. | 
|  | bool EndsWith(base::StringView input, const Segment& segment) { | 
|  | if (!contains_char_class_or_question_) { | 
|  | return input.EndsWith(segment.pattern); | 
|  | } | 
|  | // Ending with |segment| is the same as taking the substring of |in| | 
|  | size_t start = input.size() - segment.matched_chars; | 
|  | return StartsWithSlow(input.substr(start), segment); | 
|  | } | 
|  |  | 
|  | // Returns the index where |input| matches the pattern in |segment| | 
|  | // following glob matching rules or base::StringView::npos, if no such index | 
|  | // exists. | 
|  | size_t Find(base::StringView input, const Segment& segment, size_t start) { | 
|  | if (!contains_char_class_or_question_) { | 
|  | return input.find(segment.pattern, start); | 
|  | } | 
|  | for (uint32_t i = 0; i < input.size(); ++i) { | 
|  | if (StartsWithSlow(input.substr(i), segment)) { | 
|  | return i; | 
|  | } | 
|  | } | 
|  | return base::StringView::npos; | 
|  | } | 
|  |  | 
|  | // Given a StringView starting at the boundary of a character class, returns | 
|  | // a StringView containing only the parts inside the [] or base::StringView() | 
|  | // if no character class exists. | 
|  | static base::StringView ExtractCharacterClass(base::StringView input); | 
|  |  | 
|  | // Matches |in| against the given character class. | 
|  | static bool MatchesCharacterClass(char input, base::StringView char_class); | 
|  |  | 
|  | bool StartsWithSlow(base::StringView input, const Segment& segment); | 
|  |  | 
|  | // IMPORTANT: this should *not* be modified after the constructor as we store | 
|  | // pointers to the data inside here. | 
|  | // Note: this vector also allocates space for the null-terminator so is +1 | 
|  | // the "traditional" size of the string. | 
|  | std::vector<char> pattern_; | 
|  |  | 
|  | // Chunks of the |pattern_| tokenized on '*'. See the class comment for more | 
|  | // info. | 
|  | base::SmallVector<Segment, kMaxSegmentsOnStack> segments_; | 
|  |  | 
|  | bool leading_star_ = false; | 
|  | bool trailing_star_ = false; | 
|  | bool contains_char_class_or_question_ = false; | 
|  | }; | 
|  |  | 
|  | }  // namespace util | 
|  | }  // namespace trace_processor | 
|  | }  // namespace perfetto | 
|  |  | 
|  | #endif  // SRC_TRACE_PROCESSOR_UTIL_GLOB_H_ |