Add hb_language_get_scripts
This is a copy of the Pango implementation, minus caching.
Tests included.
diff --git a/src/hb-common.cc b/src/hb-common.cc
index e9f9cfe..0e47592 100644
--- a/src/hb-common.cc
+++ b/src/hb-common.cc
@@ -28,6 +28,7 @@
#include "hb.hh"
#include "hb-machinery.hh"
+#include "hb-script-lang-table.h"
#if !defined(HB_NO_SETLOCALE) && (!defined(HAVE_NEWLOCALE) || !defined(HAVE_USELOCALE))
#define HB_NO_SETLOCALE 1
@@ -665,6 +666,125 @@
return HB_DIRECTION_LTR;
}
+static int
+lang_compare_first_component (const void *pa,
+ const void *pb)
+{
+ const char *a = (const char *) pa;
+ const char *b = (const char *) pb;
+ unsigned int da, db;
+ const char *p;
+
+ p = strstr (a, "-");
+ da = p ? (unsigned int) (p - a) : strlen (a);
+
+ p = strstr (b, "-");
+ db = p ? (unsigned int) (p - b) : strlen (b);
+
+ return strncmp (a, b, hb_max (da, db));
+}
+
+static const HbScriptForLang *
+find_best_lang_match (hb_language_t language)
+{
+ const char *lang_str;
+ const char *record, *start, *end;
+ const char *records;
+ unsigned int record_size;
+ unsigned int num_records;
+
+ if (language == nullptr)
+ return nullptr;
+
+ lang_str = language->s;
+
+ records = (const char *) hb_script_for_lang;
+ record_size = sizeof (HbScriptForLang);
+ num_records = sizeof (hb_script_for_lang) / record_size;
+ record = (const char *) bsearch (lang_str,
+ records, num_records,
+ record_size,
+ lang_compare_first_component);
+ if (!record)
+ return nullptr;
+
+ start = records;
+ end = start + num_records * record_size;
+
+ /* find the best match among all those that have the same first-component */
+
+ /* go to the final one matching in the first component */
+ while (record < end - record_size &&
+ lang_compare_first_component (lang_str, record + record_size) == 0)
+
+ /* go back, find which one matches completely */
+ while (start <= record &&
+ lang_compare_first_component (lang_str, record) == 0)
+ {
+ hb_language_t l;
+
+ l = hb_language_from_string (record, -1);
+ if (hb_language_matches (language, l))
+ return (const HbScriptForLang *) record;
+
+ record -= record_size;
+ }
+
+ return nullptr;
+}
+
+/**
+ * hb_language_get_scripts:
+ * @language: a #hb_language_t
+ * @script_count: (inout) (optional): Input = the maximum number of scripts to return;
+ * Output = the actual number of scripts returned (may be zero)
+ * @scripts: (out) (array length=script_count): the array of #hb_script_t found
+ *
+ * Fetches the scripts that can be used with @language.
+ *
+ * Return value: the total number of scripts
+ *
+ * Since: REPLACEME
+ */
+HB_EXTERN unsigned int
+hb_language_get_scripts (hb_language_t language,
+ unsigned int *script_count,
+ hb_script_t *scripts)
+{
+ const HbScriptForLang *script_for_lang;
+ unsigned int count;
+ unsigned int i;
+
+ script_for_lang = find_best_lang_match (language);
+
+ if (!script_for_lang || script_for_lang->scripts[0] == 0)
+ {
+ if (script_count)
+ *script_count = 0;
+
+ return 0;
+ }
+
+ for (i = 0; i < sizeof (script_for_lang->scripts) / sizeof (hb_script_t); i++)
+ if (script_for_lang->scripts[i] == 0)
+ break;
+
+ count = i;
+
+ if (script_count)
+ {
+ for (i = 0; i < count; i++)
+ {
+ scripts[i] = script_for_lang->scripts[i];
+ if (i == *script_count)
+ break;
+ }
+ *script_count = i;
+ }
+
+ return count;
+}
+
/* hb_version */
diff --git a/src/hb-common.h b/src/hb-common.h
index a5da4e7..c6c6b98 100644
--- a/src/hb-common.h
+++ b/src/hb-common.h
@@ -758,6 +758,10 @@
HB_EXTERN hb_direction_t
hb_script_get_horizontal_direction (hb_script_t script);
+HB_EXTERN unsigned int
+hb_language_get_scripts (hb_language_t language,
+ unsigned int *script_count,
+ hb_script_t *scripts);
/* User data */
diff --git a/src/hb-script-lang-table.h b/src/hb-script-lang-table.h
new file mode 100644
index 0000000..9b7c0aa
--- /dev/null
+++ b/src/hb-script-lang-table.h
@@ -0,0 +1,253 @@
+typedef struct {
+ const char lang[9];
+ hb_script_t scripts[3];
+} HbScriptForLang;
+
+static const HbScriptForLang hb_script_for_lang[] = {
+ { "aa", { HB_SCRIPT_LATIN } },
+ { "ab", { HB_SCRIPT_CYRILLIC } },
+ { "af", { HB_SCRIPT_LATIN } },
+ { "ak", { HB_SCRIPT_LATIN } },
+ { "am", { HB_SCRIPT_ETHIOPIC } },
+ { "an", { HB_SCRIPT_LATIN } },
+ { "ar", { HB_SCRIPT_ARABIC } },
+ { "as", { HB_SCRIPT_BENGALI } },
+ { "ast", { HB_SCRIPT_LATIN } },
+ { "av", { HB_SCRIPT_CYRILLIC } },
+ { "ay", { HB_SCRIPT_LATIN } },
+ { "az-az", { HB_SCRIPT_LATIN } },
+ { "az-ir", { HB_SCRIPT_ARABIC } },
+ { "ba", { HB_SCRIPT_CYRILLIC } },
+ { "be", { HB_SCRIPT_CYRILLIC } },
+ { "ber-dz", { HB_SCRIPT_LATIN } },
+ { "ber-ma", { HB_SCRIPT_TIFINAGH } },
+ { "bg", { HB_SCRIPT_CYRILLIC } },
+ { "bh", { HB_SCRIPT_DEVANAGARI } },
+ { "bho", { HB_SCRIPT_DEVANAGARI } },
+ { "bi", { HB_SCRIPT_LATIN } },
+ { "bin", { HB_SCRIPT_LATIN } },
+ { "bm", { HB_SCRIPT_LATIN } },
+ { "bn", { HB_SCRIPT_BENGALI } },
+ { "bo", { HB_SCRIPT_TIBETAN } },
+ { "br", { HB_SCRIPT_LATIN } },
+ { "brx", { HB_SCRIPT_DEVANAGARI } },
+ { "bs", { HB_SCRIPT_LATIN } },
+ { "bua", { HB_SCRIPT_CYRILLIC } },
+ { "byn", { HB_SCRIPT_ETHIOPIC } },
+ { "ca", { HB_SCRIPT_LATIN } },
+ { "ce", { HB_SCRIPT_CYRILLIC } },
+ { "ch", { HB_SCRIPT_LATIN } },
+ { "chm", { HB_SCRIPT_CYRILLIC } },
+ { "chr", { HB_SCRIPT_CHEROKEE } },
+ { "co", { HB_SCRIPT_LATIN } },
+ { "crh", { HB_SCRIPT_LATIN } },
+ { "cs", { HB_SCRIPT_LATIN } },
+ { "csb", { HB_SCRIPT_LATIN } },
+ { "cu", { HB_SCRIPT_CYRILLIC } },
+ { "cv", { HB_SCRIPT_CYRILLIC, HB_SCRIPT_LATIN } },
+ { "cy", { HB_SCRIPT_LATIN } },
+ { "da", { HB_SCRIPT_LATIN } },
+ { "de", { HB_SCRIPT_LATIN } },
+ { "doi", { HB_SCRIPT_DEVANAGARI } },
+ { "dv", { HB_SCRIPT_THAANA } },
+ { "dz", { HB_SCRIPT_TIBETAN } },
+ { "ee", { HB_SCRIPT_LATIN } },
+ { "el", { HB_SCRIPT_GREEK } },
+ { "en", { HB_SCRIPT_LATIN } },
+ { "eo", { HB_SCRIPT_LATIN } },
+ { "es", { HB_SCRIPT_LATIN } },
+ { "et", { HB_SCRIPT_LATIN } },
+ { "eu", { HB_SCRIPT_LATIN } },
+ { "fa", { HB_SCRIPT_ARABIC } },
+ { "fat", { HB_SCRIPT_LATIN } },
+ { "ff", { HB_SCRIPT_LATIN } },
+ { "fi", { HB_SCRIPT_LATIN } },
+ { "fil", { HB_SCRIPT_LATIN } },
+ { "fj", { HB_SCRIPT_LATIN } },
+ { "fo", { HB_SCRIPT_LATIN } },
+ { "fr", { HB_SCRIPT_LATIN } },
+ { "fur", { HB_SCRIPT_LATIN } },
+ { "fy", { HB_SCRIPT_LATIN } },
+ { "ga", { HB_SCRIPT_LATIN } },
+ { "gd", { HB_SCRIPT_LATIN } },
+ { "gez", { HB_SCRIPT_ETHIOPIC } },
+ { "gl", { HB_SCRIPT_LATIN } },
+ { "gn", { HB_SCRIPT_LATIN } },
+ { "gu", { HB_SCRIPT_GUJARATI } },
+ { "gv", { HB_SCRIPT_LATIN } },
+ { "ha", { HB_SCRIPT_LATIN } },
+ { "haw", { HB_SCRIPT_LATIN } },
+ { "he", { HB_SCRIPT_HEBREW } },
+ { "hi", { HB_SCRIPT_DEVANAGARI } },
+ { "hne", { HB_SCRIPT_DEVANAGARI } },
+ { "ho", { HB_SCRIPT_LATIN } },
+ { "hr", { HB_SCRIPT_LATIN } },
+ { "hsb", { HB_SCRIPT_LATIN } },
+ { "ht", { HB_SCRIPT_LATIN } },
+ { "hu", { HB_SCRIPT_LATIN } },
+ { "hy", { HB_SCRIPT_ARMENIAN } },
+ { "hz", { HB_SCRIPT_LATIN } },
+ { "ia", { HB_SCRIPT_LATIN } },
+ { "id", { HB_SCRIPT_LATIN } },
+ { "ie", { HB_SCRIPT_LATIN } },
+ { "ig", { HB_SCRIPT_LATIN } },
+ { "ii", { HB_SCRIPT_YI } },
+ { "ik", { HB_SCRIPT_CYRILLIC } },
+ { "io", { HB_SCRIPT_LATIN } },
+ { "is", { HB_SCRIPT_LATIN } },
+ { "it", { HB_SCRIPT_LATIN } },
+ { "iu", { HB_SCRIPT_CANADIAN_SYLLABICS } },
+ { "ja", { HB_SCRIPT_HAN, HB_SCRIPT_KATAKANA, HB_SCRIPT_HIRAGANA } },
+ { "jv", { HB_SCRIPT_LATIN } },
+ { "ka", { HB_SCRIPT_GEORGIAN } },
+ { "kaa", { HB_SCRIPT_CYRILLIC } },
+ { "kab", { HB_SCRIPT_LATIN } },
+ { "ki", { HB_SCRIPT_LATIN } },
+ { "kj", { HB_SCRIPT_LATIN } },
+ { "kk", { HB_SCRIPT_CYRILLIC } },
+ { "kl", { HB_SCRIPT_LATIN } },
+ { "km", { HB_SCRIPT_KHMER } },
+ { "kn", { HB_SCRIPT_KANNADA } },
+ { "ko", { HB_SCRIPT_HANGUL } },
+ { "kok", { HB_SCRIPT_DEVANAGARI } },
+ { "kr", { HB_SCRIPT_LATIN } },
+ { "ks", { HB_SCRIPT_ARABIC } },
+ { "ku-am", { HB_SCRIPT_CYRILLIC } },
+ { "ku-iq", { HB_SCRIPT_ARABIC } },
+ { "ku-ir", { HB_SCRIPT_ARABIC } },
+ { "ku-tr", { HB_SCRIPT_LATIN } },
+ { "kum", { HB_SCRIPT_CYRILLIC } },
+ { "kv", { HB_SCRIPT_CYRILLIC } },
+ { "kw", { HB_SCRIPT_LATIN } },
+ { "kwm", { HB_SCRIPT_LATIN } },
+ { "ky", { HB_SCRIPT_CYRILLIC } },
+ { "la", { HB_SCRIPT_LATIN } },
+ { "lah", { HB_SCRIPT_ARABIC } },
+ { "lb", { HB_SCRIPT_LATIN } },
+ { "lez", { HB_SCRIPT_CYRILLIC } },
+ { "lg", { HB_SCRIPT_LATIN } },
+ { "li", { HB_SCRIPT_LATIN } },
+ { "ln", { HB_SCRIPT_LATIN } },
+ { "lo", { HB_SCRIPT_LAO } },
+ { "lt", { HB_SCRIPT_LATIN } },
+ { "lv", { HB_SCRIPT_LATIN } },
+ { "mai", { HB_SCRIPT_DEVANAGARI } },
+ { "mg", { HB_SCRIPT_LATIN } },
+ { "mh", { HB_SCRIPT_LATIN } },
+ { "mi", { HB_SCRIPT_LATIN } },
+ { "mk", { HB_SCRIPT_CYRILLIC } },
+ { "ml", { HB_SCRIPT_MALAYALAM } },
+ { "mn-cn", { HB_SCRIPT_MONGOLIAN } },
+ { "mn-mn", { HB_SCRIPT_CYRILLIC } },
+ { "mni", { HB_SCRIPT_BENGALI } },
+ { "mo", { HB_SCRIPT_CYRILLIC, HB_SCRIPT_LATIN } },
+ { "mr", { HB_SCRIPT_DEVANAGARI } },
+ { "ms", { HB_SCRIPT_LATIN } },
+ { "mt", { HB_SCRIPT_LATIN } },
+ { "my", { HB_SCRIPT_MYANMAR } },
+ { "na", { HB_SCRIPT_LATIN } },
+ { "nb", { HB_SCRIPT_LATIN } },
+ { "nds", { HB_SCRIPT_LATIN } },
+ { "ne", { HB_SCRIPT_DEVANAGARI } },
+ { "ng", { HB_SCRIPT_LATIN } },
+ { "nl", { HB_SCRIPT_LATIN } },
+ { "nn", { HB_SCRIPT_LATIN } },
+ { "no", { HB_SCRIPT_LATIN } },
+ { "nqo", { HB_SCRIPT_NKO } },
+ { "nr", { HB_SCRIPT_LATIN } },
+ { "nso", { HB_SCRIPT_LATIN } },
+ { "nv", { HB_SCRIPT_LATIN } },
+ { "ny", { HB_SCRIPT_LATIN } },
+ { "oc", { HB_SCRIPT_LATIN } },
+ { "om", { HB_SCRIPT_LATIN } },
+ { "or", { HB_SCRIPT_ORIYA } },
+ { "os", { HB_SCRIPT_CYRILLIC } },
+ { "ota", { HB_SCRIPT_ARABIC } },
+ { "pa", { HB_SCRIPT_GURMUKHI } },
+ { "pa-pk", { HB_SCRIPT_ARABIC } },
+ { "pap-an", { HB_SCRIPT_LATIN } },
+ { "pap-aw", { HB_SCRIPT_LATIN } },
+ { "pl", { HB_SCRIPT_LATIN } },
+ { "ps-af", { HB_SCRIPT_ARABIC } },
+ { "ps-pk", { HB_SCRIPT_ARABIC } },
+ { "pt", { HB_SCRIPT_LATIN } },
+ { "qu", { HB_SCRIPT_LATIN } },
+ { "quz", { HB_SCRIPT_LATIN } },
+ { "rm", { HB_SCRIPT_LATIN } },
+ { "rn", { HB_SCRIPT_LATIN } },
+ { "ro", { HB_SCRIPT_LATIN } },
+ { "ru", { HB_SCRIPT_CYRILLIC } },
+ { "rw", { HB_SCRIPT_LATIN } },
+ { "sa", { HB_SCRIPT_DEVANAGARI } },
+ { "sah", { HB_SCRIPT_CYRILLIC } },
+ { "sat", { HB_SCRIPT_DEVANAGARI } },
+ { "sc", { HB_SCRIPT_LATIN } },
+ { "sco", { HB_SCRIPT_LATIN } },
+ { "sd", { HB_SCRIPT_ARABIC } },
+ { "se", { HB_SCRIPT_LATIN } },
+ { "sel", { HB_SCRIPT_CYRILLIC } },
+ { "sg", { HB_SCRIPT_LATIN } },
+ { "sh", { HB_SCRIPT_CYRILLIC, HB_SCRIPT_LATIN } },
+ { "shs", { HB_SCRIPT_LATIN } },
+ { "si", { HB_SCRIPT_SINHALA } },
+ { "sid", { HB_SCRIPT_ETHIOPIC } },
+ { "sk", { HB_SCRIPT_LATIN } },
+ { "sl", { HB_SCRIPT_LATIN } },
+ { "sm", { HB_SCRIPT_LATIN } },
+ { "sma", { HB_SCRIPT_LATIN } },
+ { "smj", { HB_SCRIPT_LATIN } },
+ { "smn", { HB_SCRIPT_LATIN } },
+ { "sms", { HB_SCRIPT_LATIN } },
+ { "sn", { HB_SCRIPT_LATIN } },
+ { "so", { HB_SCRIPT_LATIN } },
+ { "sq", { HB_SCRIPT_LATIN } },
+ { "sr", { HB_SCRIPT_CYRILLIC } },
+ { "ss", { HB_SCRIPT_LATIN } },
+ { "st", { HB_SCRIPT_LATIN } },
+ { "su", { HB_SCRIPT_LATIN } },
+ { "sv", { HB_SCRIPT_LATIN } },
+ { "sw", { HB_SCRIPT_LATIN } },
+ { "syr", { HB_SCRIPT_SYRIAC } },
+ { "ta", { HB_SCRIPT_TAMIL } },
+ { "te", { HB_SCRIPT_TELUGU } },
+ { "tg", { HB_SCRIPT_CYRILLIC } },
+ { "th", { HB_SCRIPT_THAI } },
+ { "ti-er", { HB_SCRIPT_ETHIOPIC } },
+ { "ti-et", { HB_SCRIPT_ETHIOPIC } },
+ { "tig", { HB_SCRIPT_ETHIOPIC } },
+ { "tk", { HB_SCRIPT_LATIN } },
+ { "tl", { HB_SCRIPT_LATIN } },
+ { "tn", { HB_SCRIPT_LATIN } },
+ { "to", { HB_SCRIPT_LATIN } },
+ { "tr", { HB_SCRIPT_LATIN } },
+ { "ts", { HB_SCRIPT_LATIN } },
+ { "tt", { HB_SCRIPT_CYRILLIC } },
+ { "tw", { HB_SCRIPT_LATIN } },
+ { "ty", { HB_SCRIPT_LATIN } },
+ { "tyv", { HB_SCRIPT_CYRILLIC } },
+ { "ug", { HB_SCRIPT_ARABIC } },
+ { "uk", { HB_SCRIPT_CYRILLIC } },
+ { "und-zmth", { HB_SCRIPT_LATIN, HB_SCRIPT_GREEK } },
+ { "und-zsye", { (hb_script_t) 0 } },
+ { "ur", { HB_SCRIPT_ARABIC } },
+ { "uz", { HB_SCRIPT_LATIN } },
+ { "ve", { HB_SCRIPT_LATIN } },
+ { "vi", { HB_SCRIPT_LATIN } },
+ { "vo", { HB_SCRIPT_LATIN } },
+ { "vot", { HB_SCRIPT_LATIN } },
+ { "wa", { HB_SCRIPT_LATIN } },
+ { "wal", { HB_SCRIPT_ETHIOPIC } },
+ { "wen", { HB_SCRIPT_LATIN } },
+ { "wo", { HB_SCRIPT_LATIN } },
+ { "xh", { HB_SCRIPT_LATIN } },
+ { "yap", { HB_SCRIPT_LATIN } },
+ { "yi", { HB_SCRIPT_HEBREW } },
+ { "yo", { HB_SCRIPT_LATIN } },
+ { "za", { HB_SCRIPT_LATIN } },
+ { "zh-cn", { HB_SCRIPT_HAN } },
+ { "zh-hk", { HB_SCRIPT_HAN } },
+ { "zh-mo", { HB_SCRIPT_HAN } },
+ { "zh-sg", { HB_SCRIPT_HAN } },
+ { "zh-tw", { HB_SCRIPT_HAN } },
+ { "zu", { HB_SCRIPT_LATIN } }
+};
diff --git a/test/api/test-common.c b/test/api/test-common.c
index e9fae13..8aa5eaf 100644
--- a/test/api/test-common.c
+++ b/test/api/test-common.c
@@ -210,6 +210,45 @@
g_assert (HB_LANGUAGE_INVALID != hb_language_get_default ());
}
+static void
+test_language_get_scripts (void)
+{
+ hb_script_t scripts[10];
+ unsigned int n_scripts;
+ unsigned int count;
+
+ n_scripts = 10;
+ count = hb_language_get_scripts (hb_language_from_string ("en", -1), &n_scripts, scripts);
+
+ g_assert (count == 1);
+ g_assert (n_scripts == 1);
+ g_assert (scripts[0] == HB_SCRIPT_LATIN);
+
+ n_scripts = 10;
+ count = hb_language_get_scripts (hb_language_from_string ("cv", -1), &n_scripts, scripts);
+
+ g_assert (count == 2);
+ g_assert (n_scripts == 2);
+ g_assert (scripts[0] == HB_SCRIPT_CYRILLIC);
+ g_assert (scripts[1] == HB_SCRIPT_LATIN);
+
+ n_scripts = 1;
+ count = hb_language_get_scripts (hb_language_from_string ("cv", -1), &n_scripts, scripts);
+
+ g_assert (count == 2);
+ g_assert (n_scripts == 1);
+ g_assert (scripts[0] == HB_SCRIPT_CYRILLIC);
+
+ n_scripts = 10;
+ count = hb_language_get_scripts (hb_language_from_string ("ja", -1), &n_scripts, scripts);
+
+ g_assert (count == 3);
+ g_assert (n_scripts == 3);
+ g_assert (scripts[0] == HB_SCRIPT_HAN);
+ g_assert (scripts[1] == HB_SCRIPT_KATAKANA);
+ g_assert (scripts[2] == HB_SCRIPT_HIRAGANA);
+}
+
int
main (int argc, char **argv)
{
@@ -220,6 +259,7 @@
hb_test_add (test_types_tag);
hb_test_add (test_types_script);
hb_test_add (test_types_language);
+ hb_test_add (test_language_get_scripts);
return hb_test_run();
}