| // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include <string> |
| |
| #include "base/i18n/rtl.h" |
| #include "base/i18n/string_search.h" |
| #include "base/strings/string16.h" |
| #include "base/strings/utf_string_conversions.h" |
| #include "testing/gtest/include/gtest/gtest.h" |
| #include "third_party/icu/source/i18n/unicode/usearch.h" |
| |
| namespace base { |
| namespace i18n { |
| |
| // Note on setting default locale for testing: The current default locale on |
| // the Mac trybot is en_US_POSIX, with which primary-level collation strength |
| // string search is case-sensitive, when normally it should be |
| // case-insensitive. In other locales (including en_US which English speakers |
| // in the U.S. use), this search would be case-insensitive as expected. |
| |
| TEST(StringSearchTest, ASCII) { |
| std::string default_locale(uloc_getDefault()); |
| bool locale_is_posix = (default_locale == "en_US_POSIX"); |
| if (locale_is_posix) |
| SetICUDefaultLocale("en_US"); |
| |
| size_t index = 0; |
| size_t length = 0; |
| |
| EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( |
| ASCIIToUTF16("hello"), ASCIIToUTF16("hello world"), &index, &length)); |
| EXPECT_EQ(0U, index); |
| EXPECT_EQ(5U, length); |
| |
| EXPECT_FALSE(StringSearchIgnoringCaseAndAccents( |
| ASCIIToUTF16("h e l l o"), ASCIIToUTF16("h e l l o"), |
| &index, &length)); |
| |
| EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( |
| ASCIIToUTF16("aabaaa"), ASCIIToUTF16("aaabaabaaa"), &index, &length)); |
| EXPECT_EQ(4U, index); |
| EXPECT_EQ(6U, length); |
| |
| EXPECT_FALSE(StringSearchIgnoringCaseAndAccents( |
| ASCIIToUTF16("searching within empty string"), string16(), |
| &index, &length)); |
| |
| EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( |
| string16(), ASCIIToUTF16("searching for empty string"), &index, &length)); |
| EXPECT_EQ(0U, index); |
| EXPECT_EQ(0U, length); |
| |
| EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( |
| ASCIIToUTF16("case insensitivity"), ASCIIToUTF16("CaSe InSeNsItIvItY"), |
| &index, &length)); |
| EXPECT_EQ(0U, index); |
| EXPECT_EQ(18U, length); |
| |
| if (locale_is_posix) |
| SetICUDefaultLocale(default_locale.data()); |
| } |
| |
| TEST(StringSearchTest, UnicodeLocaleIndependent) { |
| // Base characters |
| const string16 e_base = WideToUTF16(L"e"); |
| const string16 E_base = WideToUTF16(L"E"); |
| const string16 a_base = WideToUTF16(L"a"); |
| |
| // Composed characters |
| const string16 e_with_acute_accent = WideToUTF16(L"\u00e9"); |
| const string16 E_with_acute_accent = WideToUTF16(L"\u00c9"); |
| const string16 e_with_grave_accent = WideToUTF16(L"\u00e8"); |
| const string16 E_with_grave_accent = WideToUTF16(L"\u00c8"); |
| const string16 a_with_acute_accent = WideToUTF16(L"\u00e1"); |
| |
| // Decomposed characters |
| const string16 e_with_acute_combining_mark = WideToUTF16(L"e\u0301"); |
| const string16 E_with_acute_combining_mark = WideToUTF16(L"E\u0301"); |
| const string16 e_with_grave_combining_mark = WideToUTF16(L"e\u0300"); |
| const string16 E_with_grave_combining_mark = WideToUTF16(L"E\u0300"); |
| const string16 a_with_acute_combining_mark = WideToUTF16(L"a\u0301"); |
| |
| std::string default_locale(uloc_getDefault()); |
| bool locale_is_posix = (default_locale == "en_US_POSIX"); |
| if (locale_is_posix) |
| SetICUDefaultLocale("en_US"); |
| |
| size_t index = 0; |
| size_t length = 0; |
| |
| EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( |
| e_base, e_with_acute_accent, &index, &length)); |
| EXPECT_EQ(0U, index); |
| EXPECT_EQ(e_with_acute_accent.size(), length); |
| |
| EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( |
| e_with_acute_accent, e_base, &index, &length)); |
| EXPECT_EQ(0U, index); |
| EXPECT_EQ(e_base.size(), length); |
| |
| EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( |
| e_base, e_with_acute_combining_mark, &index, &length)); |
| EXPECT_EQ(0U, index); |
| EXPECT_EQ(e_with_acute_combining_mark.size(), length); |
| |
| EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( |
| e_with_acute_combining_mark, e_base, &index, &length)); |
| EXPECT_EQ(0U, index); |
| EXPECT_EQ(e_base.size(), length); |
| |
| EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( |
| e_with_acute_combining_mark, e_with_acute_accent, |
| &index, &length)); |
| EXPECT_EQ(0U, index); |
| EXPECT_EQ(e_with_acute_accent.size(), length); |
| |
| EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( |
| e_with_acute_accent, e_with_acute_combining_mark, |
| &index, &length)); |
| EXPECT_EQ(0U, index); |
| EXPECT_EQ(e_with_acute_combining_mark.size(), length); |
| |
| EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( |
| e_with_acute_combining_mark, e_with_grave_combining_mark, |
| &index, &length)); |
| EXPECT_EQ(0U, index); |
| EXPECT_EQ(e_with_grave_combining_mark.size(), length); |
| |
| EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( |
| e_with_grave_combining_mark, e_with_acute_combining_mark, |
| &index, &length)); |
| EXPECT_EQ(0U, index); |
| EXPECT_EQ(e_with_acute_combining_mark.size(), length); |
| |
| EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( |
| e_with_acute_combining_mark, e_with_grave_accent, &index, &length)); |
| EXPECT_EQ(0U, index); |
| EXPECT_EQ(e_with_grave_accent.size(), length); |
| |
| EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( |
| e_with_grave_accent, e_with_acute_combining_mark, &index, &length)); |
| EXPECT_EQ(0U, index); |
| EXPECT_EQ(e_with_acute_combining_mark.size(), length); |
| |
| EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( |
| E_with_acute_accent, e_with_acute_accent, &index, &length)); |
| EXPECT_EQ(0U, index); |
| EXPECT_EQ(e_with_acute_accent.size(), length); |
| |
| EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( |
| E_with_grave_accent, e_with_acute_accent, &index, &length)); |
| EXPECT_EQ(0U, index); |
| EXPECT_EQ(e_with_acute_accent.size(), length); |
| |
| EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( |
| E_with_acute_combining_mark, e_with_grave_accent, &index, &length)); |
| EXPECT_EQ(0U, index); |
| EXPECT_EQ(e_with_grave_accent.size(), length); |
| |
| EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( |
| E_with_grave_combining_mark, e_with_acute_accent, &index, &length)); |
| EXPECT_EQ(0U, index); |
| EXPECT_EQ(e_with_acute_accent.size(), length); |
| |
| EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( |
| E_base, e_with_grave_accent, &index, &length)); |
| EXPECT_EQ(0U, index); |
| EXPECT_EQ(e_with_grave_accent.size(), length); |
| |
| EXPECT_FALSE(StringSearchIgnoringCaseAndAccents( |
| a_with_acute_accent, e_with_acute_accent, &index, &length)); |
| |
| EXPECT_FALSE(StringSearchIgnoringCaseAndAccents( |
| a_with_acute_combining_mark, e_with_acute_combining_mark, |
| &index, &length)); |
| |
| if (locale_is_posix) |
| SetICUDefaultLocale(default_locale.data()); |
| } |
| |
| TEST(StringSearchTest, UnicodeLocaleDependent) { |
| // Base characters |
| const string16 a_base = WideToUTF16(L"a"); |
| |
| // Composed characters |
| const string16 a_with_ring = WideToUTF16(L"\u00e5"); |
| |
| EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( |
| a_base, a_with_ring, NULL, NULL)); |
| |
| const char* default_locale = uloc_getDefault(); |
| SetICUDefaultLocale("da"); |
| |
| EXPECT_FALSE(StringSearchIgnoringCaseAndAccents( |
| a_base, a_with_ring, NULL, NULL)); |
| |
| SetICUDefaultLocale(default_locale); |
| } |
| |
| TEST(StringSearchTest, FixedPatternMultipleSearch) { |
| std::string default_locale(uloc_getDefault()); |
| bool locale_is_posix = (default_locale == "en_US_POSIX"); |
| if (locale_is_posix) |
| SetICUDefaultLocale("en_US"); |
| |
| size_t index = 0; |
| size_t length = 0; |
| |
| // Search "hello" over multiple texts. |
| FixedPatternStringSearchIgnoringCaseAndAccents query(ASCIIToUTF16("hello")); |
| EXPECT_TRUE(query.Search(ASCIIToUTF16("12hello34"), &index, &length)); |
| EXPECT_EQ(2U, index); |
| EXPECT_EQ(5U, length); |
| EXPECT_FALSE(query.Search(ASCIIToUTF16("bye"), &index, &length)); |
| EXPECT_TRUE(query.Search(ASCIIToUTF16("hELLo"), &index, &length)); |
| EXPECT_EQ(0U, index); |
| EXPECT_EQ(5U, length); |
| |
| if (locale_is_posix) |
| SetICUDefaultLocale(default_locale.data()); |
| } |
| |
| } // namespace i18n |
| } // namespace base |