blob: bc4982cd8d44a79599005a559e6f89c33e510198 [file] [log] [blame] [edit]
// Copyright (C) 2023 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
import MiniSearch from 'minisearch';
export interface FuzzySegment {
matching: boolean;
value: string;
}
export interface FuzzyResult<T> {
item: T;
segments: FuzzySegment[];
}
export type KeyLookup<T> = (x: T) => string;
// Finds approx matching in arbitrary lists of items.
// Uses MiniSearch for word-order-independent fuzzy filtering, then computes
// character-level highlight segments for rendering.
export class FuzzyFinder<T> {
private readonly miniSearch: MiniSearch;
// Because we operate on arbitrary lists, a key lookup function is required to
// so we know which part of the list is to be be searched. It should return
// the relevant search string for each item.
constructor(
private readonly items: ReadonlyArray<T>,
private readonly keyLookup: KeyLookup<T>,
) {
const docs = items.map((item, i) => ({id: i, text: keyLookup(item)}));
this.miniSearch = new MiniSearch({
fields: ['text'],
tokenize: camelCaseTokenize,
searchOptions: {
tokenize: camelCaseTokenize,
// Allow 1 edit for short terms, ~20% for longer ones.
fuzzy: (term: string) =>
term.length <= 3 ? 1 : Math.ceil(term.length * 0.2),
prefix: true,
combineWith: 'AND',
},
});
this.miniSearch.addAll(docs);
}
// Return a list of items that match any of the search terms.
// Search terms separated by spaces are matched independently (any order).
find(searchTerm: string): FuzzyResult<T>[] {
if (searchTerm === '') {
return this.items.map((item) => {
const normalisedTerm = this.keyLookup(item);
return {
item,
segments: [{matching: false, value: normalisedTerm}],
};
});
}
return this.miniSearch.search(searchTerm).map((result) => {
const item = this.items[result.id as number];
const text = this.keyLookup(item);
return {item, segments: computeHighlightSegments(searchTerm, text)};
});
}
}
// Tokenize text by splitting on whitespace/punctuation AND camelCase boundaries.
// E.g. "dev.perfetto.LiveMemory" -> ["dev", "perfetto", "Live", "Memory"]
// This allows searching for "memory" to match "LiveMemory".
function camelCaseTokenize(text: string): string[] {
// First split on non-alphanumeric characters (dots, spaces, underscores, etc.)
const coarseTokens = text.split(/[^a-zA-Z0-9]+/).filter(Boolean);
const tokens: string[] = [];
for (const token of coarseTokens) {
// Split camelCase: insert boundary before uppercase letter preceded by
// a lowercase letter, or before an uppercase letter followed by a
// lowercase letter when preceded by uppercase (e.g. "XMLParser" ->
// ["XML", "Parser"]).
const parts = token.split(/(?<=[a-z])(?=[A-Z])|(?<=[A-Z])(?=[A-Z][a-z])/);
tokens.push(...parts);
}
return tokens;
}
// Given a query (possibly multi-word) and candidate text, compute highlight
// segments. Each query token is first tried as a substring match, then falls
// back to sequential character matching.
function computeHighlightSegments(query: string, text: string): FuzzySegment[] {
const tokens = query.split(/\s+/).filter(Boolean);
const lowerText = text.toLowerCase();
const highlightedIndices: number[] = [];
for (const token of tokens) {
const lowerToken = token.toLowerCase();
const idx = lowerText.indexOf(lowerToken);
if (idx !== -1) {
// Substring match — highlight the whole substring.
for (let i = idx; i < idx + lowerToken.length; i++) {
highlightedIndices.push(i);
}
} else {
// Fall back to sequential character match.
let j = 0;
for (const ch of lowerToken) {
while (j < lowerText.length && lowerText[j] !== ch) j++;
if (j < lowerText.length) {
highlightedIndices.push(j);
j++;
}
}
}
}
const unique = [...new Set(highlightedIndices)].sort((a, b) => a - b);
return indiciesToSegments(unique, text);
}
// Given a list of indicies of matching chars, and the original value, produce
// a list of match/nomatch segments.
function indiciesToSegments(indicies: number[], text: string): FuzzySegment[] {
const segments: FuzzySegment[] = [];
let nextIndex = 0;
let match = '';
for (const i of indicies) {
if (nextIndex < i) {
// If we had a match segment from before, add it now.
if (match !== '') {
segments.push({matching: true, value: match});
match = '';
}
// Missed some indicies - wrap them up into a nomatch segment.
segments.push({matching: false, value: text.slice(nextIndex, i)});
}
// Append this matching char to the previous match.
match += text[i];
nextIndex = i + 1;
}
// Add any lingering match segment.
if (match !== '') {
segments.push({matching: true, value: match});
}
// Add final nomatch segment if there is anything left.
if (nextIndex < text.length) {
segments.push({matching: false, value: text.slice(nextIndex)});
}
return segments;
}
// Evaluate whether |searchTerm| is found in |text|.
// |indicies| is an array of numbers the same length as |searchTerm|, into which
// we place the indicies of the matching chars in |text|.
function match(searchTerm: string, text: string, indicies: number[]): boolean {
let j = 0; // index into the searchTerm.
let success: boolean = true;
// For each char of the searchTerm...
for (let i = 0; i < searchTerm.length; ++i) {
const char = searchTerm[i].toLowerCase();
// ...advance the text index until we find the char.
for (; j < text.length; ++j) {
// If we find it add it to the segment and move on.
if (text[j].toLowerCase() === char) {
indicies[i] = j;
break;
}
}
// Failed to find searchTerm[i] in text: give up.
if (j === text.length) {
success = false;
break;
}
++j;
}
return success;
}
export interface FuzzyMatch {
matches: boolean;
segments: FuzzySegment[];
}
// Fuzzy match a single piece of text against several search terms.
// If any of the terms match, the result of the match is true.
export function fuzzyMatch(
text: string,
...searchTerms: ReadonlyArray<string>
): FuzzyMatch {
for (const searchTerm of searchTerms) {
const indicies: number[] = new Array(searchTerm.length);
if (match(searchTerm, text, indicies)) {
const segments = indiciesToSegments(indicies, text);
return {
matches: true,
segments,
};
}
}
return {
matches: false,
segments: [],
};
}