| /* |
| * Copyright (C) 2022 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #include "src/trace_processor/util/glob.h" |
| |
| #include "perfetto/ext/base/string_utils.h" |
| |
| namespace perfetto { |
| namespace trace_processor { |
| namespace util { |
| |
| GlobMatcher::GlobMatcher(base::StringView pattern_str) |
| : pattern_(pattern_str.size() + 1) { |
| base::StringCopy(pattern_.data(), pattern_str.data(), pattern_.size()); |
| |
| base::StringView pattern(pattern_.data()); |
| |
| // Note: see the class header for how this algorithm works. |
| uint32_t segment_start = 0; |
| uint32_t segment_potential_matched_chars = 0; |
| auto create_segment = [this, &segment_start, &segment_potential_matched_chars, |
| &pattern](size_t i) { |
| base::StringView segment = pattern.substr(segment_start, i - segment_start); |
| PERFETTO_DCHECK(segment_potential_matched_chars <= segment.size()); |
| if (!segment.empty()) { |
| PERFETTO_DCHECK(segment_potential_matched_chars > 0); |
| segments_.emplace_back(Segment{segment, segment_potential_matched_chars}); |
| } |
| return segment.empty(); |
| }; |
| |
| for (uint32_t i = 0; i < pattern.size(); ++i) { |
| char c = pattern.at(i); |
| |
| // If we don't have a star, we are only matching a single character (but |
| // potentially with a character class which contains >1 character). |
| if (c != '*') { |
| if (c == '[') { |
| base::StringView cclass = ExtractCharacterClass(pattern.substr(i + 1)); |
| contains_char_class_or_question_ |= !cclass.empty(); |
| |
| // Skip the current '[' character. |
| ++i; |
| |
| // Skip the whole character class. This will leave i pointing at the |
| // terminating character (i.e. ']'). With the ++i in the loop, this will |
| // correctly lead us going past the whole class. |
| i += cclass.size(); |
| } |
| |
| contains_char_class_or_question_ |= c == '?'; |
| ++segment_potential_matched_chars; |
| continue; |
| } |
| |
| // Add the characters collected so far as a segment. |
| create_segment(i); |
| segment_start = i + 1; |
| segment_potential_matched_chars = 0; |
| } |
| |
| // Ensure we add any remaining characters as a segment. |
| bool empty_segment = create_segment(pattern.size()); |
| leading_star_ = !pattern.empty() && pattern.at(0) == '*'; |
| trailing_star_ = !pattern.empty() && empty_segment; |
| } |
| |
| bool GlobMatcher::Matches(base::StringView in) { |
| // If there are no segments, that means the pattern is either '' or '*' |
| // (or '**', '***' etc which is really the same as '*'). This means |
| // we are match if either a) there is a leading star (== trailing star) or |
| // b) the input string is empty. |
| if (segments_.empty()) { |
| PERFETTO_DCHECK(leading_star_ == trailing_star_); |
| return leading_star_ || in.empty(); |
| } |
| |
| // If there is only one segment and no stars we need an equality match. |
| // As we still need to handle '[..]' and '?', we cannot just use string |
| // equality. We *can* however use StartsWith and check the matched |
| // characters is equal to the length of the input: this is basically the |
| // same as checking equality. |
| if (segments_.size() == 1 && !leading_star_ && !trailing_star_) { |
| return segments_.front().matched_chars == in.size() && |
| StartsWith(in, segments_.front()); |
| } |
| |
| // If there's no leading star, the first segment needs to be handled |
| // separately because it *needs* to be anchored to the left of the |
| // string rather than appearing at some point later. |
| if (!leading_star_ && !StartsWith(in, segments_.front())) { |
| return false; |
| } |
| |
| // Similarily, if there's no trailing star, the last segment needs to be |
| // "anchored" to the right of the string. |
| if (!trailing_star_ && !EndsWith(in, segments_.back())) { |
| return false; |
| } |
| |
| // For any segment we haven't checked, they needs to appear in the string |
| // sequentially with possibly some characters separating them. To handle |
| // this, we just need to iteratively find each segment, starting from the |
| // previous segment. |
| Segment* segment_start = segments_.begin() + (leading_star_ ? 0 : 1); |
| Segment* segment_end = segments_.end() - (trailing_star_ ? 0 : 1); |
| size_t find_idx = leading_star_ ? 0 : segments_.front().matched_chars; |
| for (Segment* segment = segment_start; segment < segment_end; ++segment) { |
| size_t pos = Find(in, *segment, find_idx); |
| if (pos == base::StringView::npos) { |
| return false; |
| } |
| find_idx = pos + segment->matched_chars; |
| } |
| |
| // Every segment has been found to match so far including the leading and |
| // trailing one so the entire string matches! |
| return true; |
| } |
| |
| bool GlobMatcher::StartsWithSlow(base::StringView in, const Segment& segment) { |
| base::StringView pattern = segment.pattern; |
| for (uint32_t i = 0, p = 0; p < pattern.size(); ++i, ++p) { |
| // We've run out of characters to consume in the input but still have more |
| // to consume in the pattern: |in| cannot possibly start with |pattern|. |
| if (i >= in.size()) { |
| return false; |
| } |
| |
| char in_c = in.at(i); |
| char pattern_c = pattern.at(p); |
| |
| // '?' matches any character. |
| if (pattern_c == '?') { |
| continue; |
| } |
| |
| // '[' signifies the start of a character class. |
| if (pattern_c == '[') { |
| base::StringView cclass = ExtractCharacterClass(pattern.substr(p + 1)); |
| if (!MatchesCharacterClass(in_c, cclass)) { |
| return false; |
| } |
| |
| // Skip the current '[' character. |
| p++; |
| |
| // Skip the whole character class. This will leave i pointing at the |
| // terminating character (i.e. ']'). With the ++i in the loop, this will |
| // correctly lead us going past the whole class. |
| p += cclass.size(); |
| continue; |
| } |
| |
| // Anything else is just an ordinary character. |
| if (in_c != pattern_c) { |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| base::StringView GlobMatcher::ExtractCharacterClass(base::StringView in) { |
| if (in.empty()) |
| return base::StringView(); |
| |
| // We should always skip the first real character: it could be ']' but if |
| // so, it is treated as a normal character because empty classes are not |
| // valid. |
| bool invert = in.at(0) == '^'; |
| size_t end_idx = in.find(']', invert ? 2 : 1); |
| return end_idx == base::StringView::npos ? base::StringView() |
| : in.substr(0, end_idx); |
| } |
| |
| bool GlobMatcher::MatchesCharacterClass(char in, base::StringView char_class) { |
| PERFETTO_DCHECK(!char_class.empty()); |
| |
| const char* start = char_class.data(); |
| const char* end = start + char_class.size(); |
| |
| bool invert = *start == '^'; |
| start += invert; |
| |
| PERFETTO_DCHECK(start != end); |
| |
| for (auto* ptr = start; ptr != end; ++ptr) { |
| char cur = *ptr; |
| |
| // If we see a '-' at any point except at the start or end of the string, |
| // it represents a matching range (e.g. a-z represents matching any |
| // character between a and z). |
| if (cur == '-' && ptr != start && ptr != end - 1) { |
| // Look at the previous and next characters in the class and check if the |
| // character falls in the range. |
| char range_start = ptr[-1]; |
| char range_end = ptr[1]; |
| if (range_start <= in && in <= range_end) { |
| return !invert; |
| } |
| continue; |
| } |
| |
| // We match a character in the class. |
| if (in == cur) { |
| return !invert; |
| } |
| } |
| |
| // If we're here, nothing in the class matched: return whether the class was |
| // inverted as this would actually be a match. |
| return invert; |
| } |
| |
| } // namespace util |
| } // namespace trace_processor |
| } // namespace perfetto |