Merge pull request #3402 from harfbuzz/language-tags

Make miscellaneous changes to hb-ot-tag-table.hh
diff --git a/src/gen-tag-table.py b/src/gen-tag-table.py
index cb612b9..f8fb05f 100755
--- a/src/gen-tag-table.py
+++ b/src/gen-tag-table.py
@@ -329,6 +329,10 @@
 		from_bcp_47 (DefaultDict[str, AbstractSet[str]]): ``to_bcp_47``
 			inverted. Its values start as unsorted sets;
 			``sort_languages`` converts them to sorted lists.
+		from_bcp_47_uninherited (Optional[Dict[str, AbstractSet[str]]]):
+			A copy of ``from_bcp_47``. It starts as ``None`` and is
+			populated at the beginning of the first call to
+			``inherit_from_macrolanguages``.
 
 	"""
 	def __init__ (self):
@@ -338,6 +342,7 @@
 		self.ranks = collections.defaultdict (int)
 		self.to_bcp_47 = collections.defaultdict (set)
 		self.from_bcp_47 = collections.defaultdict (set)
+		self.from_bcp_47_uninherited = None
 		# Whether the parser is in a <td> element
 		self._td = False
 		# Whether the parser is after a <br> element within the current <tr> element
@@ -462,30 +467,51 @@
 		explicit mapping, so it inherits from sq (Albanian) the mapping
 		to SQI.
 
+		However, if an OpenType tag maps to a BCP 47 macrolanguage and
+		some but not all of its individual languages, the mapping is not
+		inherited from the macrolanguage to the missing individual
+		languages. For example, INUK (Nunavik Inuktitut) is mapped to
+		ike (Eastern Canadian Inuktitut) and iu (Inuktitut) but not to
+		ikt (Inuinnaqtun, which is an individual language of iu), so
+		this method does not add a mapping from ikt to INUK.
+
 		If a BCP 47 tag for a macrolanguage has no OpenType mapping but
-		all of its individual languages do and they all map to the same
-		tags, the mapping is copied to the macrolanguage.
+		some of its individual languages do, their mappings are copied
+		to the macrolanguage.
 		"""
 		global bcp_47
-		original_ot_from_bcp_47 = dict (self.from_bcp_47)
+		first_time = self.from_bcp_47_uninherited is None
+		if first_time:
+			self.from_bcp_47_uninherited = dict (self.from_bcp_47)
 		for macrolanguage, languages in dict (bcp_47.macrolanguages).items ():
-			ot_macrolanguages = set (original_ot_from_bcp_47.get (macrolanguage, set ()))
+			ot_macrolanguages = {
+				ot_macrolanguage for ot_macrolanguage in self.from_bcp_47_uninherited.get (macrolanguage, set ())
+			}
+			blocked_ot_macrolanguages = set ()
+			if 'retired code' not in bcp_47.scopes.get (macrolanguage, ''):
+				for ot_macrolanguage in ot_macrolanguages:
+					round_trip_macrolanguages = {
+						l for l in self.to_bcp_47[ot_macrolanguage]
+						if 'retired code' not in bcp_47.scopes.get (l, '')
+					}
+					round_trip_languages = {
+						l for l in languages
+						if 'retired code' not in bcp_47.scopes.get (l, '')
+					}
+					intersection = round_trip_macrolanguages & round_trip_languages
+					if intersection and intersection != round_trip_languages:
+						blocked_ot_macrolanguages.add (ot_macrolanguage)
 			if ot_macrolanguages:
 				for ot_macrolanguage in ot_macrolanguages:
-					for language in languages:
-						self.add_language (language, ot_macrolanguage)
-						self.ranks[ot_macrolanguage] += 1
-			else:
+					if ot_macrolanguage not in blocked_ot_macrolanguages:
+						for language in languages:
+							self.add_language (language, ot_macrolanguage)
+							if not blocked_ot_macrolanguages:
+								self.ranks[ot_macrolanguage] += 1
+			elif first_time:
 				for language in languages:
-					if language in original_ot_from_bcp_47:
-						if ot_macrolanguages:
-							ml = original_ot_from_bcp_47[language]
-							if ml:
-								ot_macrolanguages &= ml
-							else:
-								pass
-						else:
-							ot_macrolanguages |= original_ot_from_bcp_47[language]
+					if language in self.from_bcp_47_uninherited:
+						ot_macrolanguages |= self.from_bcp_47_uninherited[language]
 					else:
 						ot_macrolanguages.clear ()
 					if not ot_macrolanguages:
@@ -570,7 +596,7 @@
 						if scope == 'macrolanguage':
 							scope = ' [macrolanguage]'
 						elif scope == 'collection':
-							scope = ' [family]'
+							scope = ' [collection]'
 						else:
 							continue
 						self.scopes[subtag] = scope
@@ -715,6 +741,7 @@
 
 ot.add_language ('oc-provenc', 'PRO')
 
+ot.remove_language_ot ('QUZ')
 ot.add_language ('qu', 'QUZ')
 ot.add_language ('qub', 'QWH')
 ot.add_language ('qud', 'QVI')
@@ -747,7 +774,6 @@
 ot.add_language ('qxt', 'QWH')
 ot.add_language ('qxw', 'QWH')
 
-bcp_47.macrolanguages['ro'].remove ('mo')
 bcp_47.macrolanguages['ro-MD'].add ('mo')
 
 ot.remove_language_ot ('SYRE')
@@ -987,6 +1013,8 @@
 	if initial != 'und':
 		continue
 	for lt, tags in items:
+		if not tags:
+			continue
 		if lt.variant in bcp_47.prefixes:
 			expect (next (iter (bcp_47.prefixes[lt.variant])) == lt.language,
 					'%s is not a valid prefix of %s' % (lt.language, lt.variant))
@@ -1021,6 +1049,8 @@
 		continue
 	print ("  case '%s':" % initial)
 	for lt, tags in items:
+		if not tags:
+			continue
 		print ('    if (', end='')
 		script = lt.script
 		region = lt.region
@@ -1121,9 +1151,13 @@
 		elif len (primary_tags) == 0:
 			expect (ot_tag not in disambiguation, 'There is no possible valid disambiguation for %s' % ot_tag)
 		else:
-			macrolanguages = list (t for t in primary_tags if bcp_47.scopes.get (t) == ' [macrolanguage]')
+			original_languages = [t for t in primary_tags if t in ot.from_bcp_47_uninherited and 'retired code' not in bcp_47.scopes.get (t, '')]
+			if len (original_languages) == 1:
+				macrolanguages = original_languages
+			else:
+				macrolanguages = [t for t in primary_tags if bcp_47.scopes.get (t) == ' [macrolanguage]']
 			if len (macrolanguages) != 1:
-				macrolanguages = list (t for t in primary_tags if bcp_47.scopes.get (t) == ' [family]')
+				macrolanguages = list (t for t in primary_tags if bcp_47.scopes.get (t) == ' [collection]')
 			if len (macrolanguages) != 1:
 				macrolanguages = list (t for t in primary_tags if 'retired code' not in bcp_47.scopes.get (t, ''))
 			if len (macrolanguages) != 1:
diff --git a/src/hb-ot-tag-table.hh b/src/hb-ot-tag-table.hh
index 2c6316d..61d2814 100644
--- a/src/hb-ot-tag-table.hh
+++ b/src/hb-ot-tag-table.hh
@@ -6,8 +6,8 @@
  *
  * on files with these headers:
  *
- * <meta name="updated_at" content="2021-12-09 12:01 AM" />
- * File-Date: 2021-08-06
+ * <meta name="updated_at" content="2022-01-28 10:00 PM" />
+ * File-Date: 2021-12-29
  */
 
 #ifndef HB_OT_TAG_TABLE_HH
@@ -66,7 +66,7 @@
   {"an",	HB_TAG('A','R','G',' ')},	/* Aragonese */
 /*{"ang",	HB_TAG('A','N','G',' ')},*/	/* Old English (ca. 450-1100) -> Anglo-Saxon */
   {"aoa",	HB_TAG('C','P','P',' ')},	/* Angolar -> Creoles */
-  {"apa",	HB_TAG('A','T','H',' ')},	/* Apache [family] -> Athapaskan */
+  {"apa",	HB_TAG('A','T','H',' ')},	/* Apache [collection] -> Athapaskan */
   {"apc",	HB_TAG('A','R','A',' ')},	/* North Levantine Arabic -> Arabic */
   {"apd",	HB_TAG('A','R','A',' ')},	/* Sudanese Arabic -> Arabic */
   {"apj",	HB_TAG('A','T','H',' ')},	/* Jicarilla Apache -> Athapaskan */
@@ -86,7 +86,7 @@
   {"arz",	HB_TAG('A','R','A',' ')},	/* Egyptian Arabic -> Arabic */
   {"as",	HB_TAG('A','S','M',' ')},	/* Assamese */
 /*{"ast",	HB_TAG('A','S','T',' ')},*/	/* Asturian */
-/*{"ath",	HB_TAG('A','T','H',' ')},*/	/* Athapascan [family] -> Athapaskan */
+/*{"ath",	HB_TAG('A','T','H',' ')},*/	/* Athapascan [collection] -> Athapaskan */
   {"atj",	HB_TAG('R','C','R',' ')},	/* Atikamekw -> R-Cree */
   {"atv",	HB_TAG('A','L','T',' ')},	/* Northern Altai -> Altai */
   {"auj",	HB_TAG('B','B','R',' ')},	/* Awjilah -> Berber */
@@ -110,10 +110,10 @@
   {"azn",	HB_TAG('N','A','H',' ')},	/* Western Durango Nahuatl -> Nahuatl */
   {"azz",	HB_TAG('N','A','H',' ')},	/* Highland Puebla Nahuatl -> Nahuatl */
   {"ba",	HB_TAG('B','S','H',' ')},	/* Bashkir */
-  {"bad",	HB_TAG('B','A','D','0')},	/* Banda [family] */
+  {"bad",	HB_TAG('B','A','D','0')},	/* Banda [collection] */
   {"bag",	HB_TAG_NONE	       },	/* Tuki != Baghelkhandi */
   {"bah",	HB_TAG('C','P','P',' ')},	/* Bahamas Creole English -> Creoles */
-  {"bai",	HB_TAG('B','M','L',' ')},	/* Bamileke [family] */
+  {"bai",	HB_TAG('B','M','L',' ')},	/* Bamileke [collection] */
   {"bal",	HB_TAG('B','L','I',' ')},	/* Baluchi [macrolanguage] */
 /*{"ban",	HB_TAG('B','A','N',' ')},*/	/* Balinese */
 /*{"bar",	HB_TAG('B','A','R',' ')},*/	/* Bavarian */
@@ -135,7 +135,7 @@
   {"bea",	HB_TAG('A','T','H',' ')},	/* Beaver -> Athapaskan */
   {"beb",	HB_TAG('B','T','I',' ')},	/* Bebele -> Beti */
 /*{"bem",	HB_TAG('B','E','M',' ')},*/	/* Bemba (Zambia) */
-  {"ber",	HB_TAG('B','B','R',' ')},	/* Berber [family] */
+  {"ber",	HB_TAG('B','B','R',' ')},	/* Berber [collection] */
   {"bew",	HB_TAG('C','P','P',' ')},	/* Betawi -> Creoles */
   {"bfl",	HB_TAG('B','A','D','0')},	/* Banda-Ndélé -> Banda */
   {"bfq",	HB_TAG('B','A','D',' ')},	/* Badaga */
@@ -203,7 +203,7 @@
   {"btd",	HB_TAG('B','T','K',' ')},	/* Batak Dairi -> Batak */
   {"bti",	HB_TAG_NONE	       },	/* Burate != Beti */
   {"btj",	HB_TAG('M','L','Y',' ')},	/* Bacanese Malay -> Malay */
-/*{"btk",	HB_TAG('B','T','K',' ')},*/	/* Batak [family] */
+/*{"btk",	HB_TAG('B','T','K',' ')},*/	/* Batak [collection] */
   {"btm",	HB_TAG('B','T','M',' ')},	/* Batak Mandailing */
   {"btm",	HB_TAG('B','T','K',' ')},	/* Batak Mandailing -> Batak */
   {"bto",	HB_TAG('B','I','K',' ')},	/* Rinconada Bikol -> Bikol */
@@ -256,6 +256,8 @@
   {"chh",	HB_TAG_NONE	       },	/* Chinook != Chattisgarhi */
   {"chj",	HB_TAG('C','C','H','N')},	/* Ojitlán Chinantec -> Chinantec */
   {"chk",	HB_TAG('C','H','K','0')},	/* Chuukese */
+  {"chm",	HB_TAG('H','M','A',' ')},	/* Mari (Russia) [macrolanguage] -> High Mari */
+  {"chm",	HB_TAG('L','M','A',' ')},	/* Mari (Russia) [macrolanguage] -> Low Mari */
   {"chn",	HB_TAG('C','P','P',' ')},	/* Chinook jargon -> Creoles */
 /*{"cho",	HB_TAG('C','H','O',' ')},*/	/* Choctaw */
   {"chp",	HB_TAG('C','H','P',' ')},	/* Chipewyan */
@@ -297,10 +299,10 @@
 /*{"cop",	HB_TAG('C','O','P',' ')},*/	/* Coptic */
   {"coq",	HB_TAG('A','T','H',' ')},	/* Coquille -> Athapaskan */
   {"cpa",	HB_TAG('C','C','H','N')},	/* Palantla Chinantec -> Chinantec */
-  {"cpe",	HB_TAG('C','P','P',' ')},	/* English-based creoles and pidgins [family] -> Creoles */
-  {"cpf",	HB_TAG('C','P','P',' ')},	/* French-based creoles and pidgins [family] -> Creoles */
+  {"cpe",	HB_TAG('C','P','P',' ')},	/* English-based creoles and pidgins [collection] -> Creoles */
+  {"cpf",	HB_TAG('C','P','P',' ')},	/* French-based creoles and pidgins [collection] -> Creoles */
   {"cpi",	HB_TAG('C','P','P',' ')},	/* Chinese Pidgin English -> Creoles */
-/*{"cpp",	HB_TAG('C','P','P',' ')},*/	/* Portuguese-based creoles and pidgins [family] -> Creoles */
+/*{"cpp",	HB_TAG('C','P','P',' ')},*/	/* Portuguese-based creoles and pidgins [collection] -> Creoles */
   {"cpx",	HB_TAG('Z','H','S',' ')},	/* Pu-Xian Chinese -> Chinese, Simplified */
   {"cqd",	HB_TAG('H','M','N',' ')},	/* Chuanqiandian Cluster Miao -> Hmong */
   {"cqu",	HB_TAG('Q','U','H',' ')},	/* Chilean Quechua (retired code) -> Quechua (Bolivia) */
@@ -320,7 +322,7 @@
   {"crm",	HB_TAG('M','C','R',' ')},	/* Moose Cree */
   {"crm",	HB_TAG('L','C','R',' ')},	/* Moose Cree -> L-Cree */
   {"crm",	HB_TAG('C','R','E',' ')},	/* Moose Cree -> Cree */
-  {"crp",	HB_TAG('C','P','P',' ')},	/* Creoles and pidgins [family] -> Creoles */
+  {"crp",	HB_TAG('C','P','P',' ')},	/* Creoles and pidgins [collection] -> Creoles */
   {"crr",	HB_TAG_NONE	       },	/* Carolina Algonquian != Carrier */
   {"crs",	HB_TAG('C','P','P',' ')},	/* Seselwa Creole French -> Creoles */
   {"crt",	HB_TAG_NONE	       },	/* Iyojwa'ja Chorote != Crimean Tatar */
@@ -431,7 +433,7 @@
   {"et",	HB_TAG('E','T','I',' ')},	/* Estonian [macrolanguage] */
   {"eto",	HB_TAG('B','T','I',' ')},	/* Eton (Cameroon) -> Beti */
   {"eu",	HB_TAG('E','U','Q',' ')},	/* Basque */
-  {"euq",	HB_TAG_NONE	       },	/* Basque [family] != Basque */
+  {"euq",	HB_TAG_NONE	       },	/* Basque [collection] != Basque */
   {"eve",	HB_TAG('E','V','N',' ')},	/* Even */
   {"evn",	HB_TAG('E','V','K',' ')},	/* Evenki */
   {"ewo",	HB_TAG('B','T','I',' ')},	/* Ewondo -> Beti */
@@ -620,10 +622,11 @@
   {"ijc",	HB_TAG('I','J','O',' ')},	/* Izon -> Ijo */
   {"ije",	HB_TAG('I','J','O',' ')},	/* Biseni -> Ijo */
   {"ijn",	HB_TAG('I','J','O',' ')},	/* Kalabari -> Ijo */
-/*{"ijo",	HB_TAG('I','J','O',' ')},*/	/* Ijo [family] */
+/*{"ijo",	HB_TAG('I','J','O',' ')},*/	/* Ijo [collection] */
   {"ijs",	HB_TAG('I','J','O',' ')},	/* Southeast Ijo -> Ijo */
   {"ik",	HB_TAG('I','P','K',' ')},	/* Inupiaq [macrolanguage] -> Inupiat */
   {"ike",	HB_TAG('I','N','U',' ')},	/* Eastern Canadian Inuktitut -> Inuktitut */
+  {"ike",	HB_TAG('I','N','U','K')},	/* Eastern Canadian Inuktitut -> Nunavik Inuktitut */
   {"ikt",	HB_TAG('I','N','U',' ')},	/* Inuinnaqtun -> Inuktitut */
 /*{"ilo",	HB_TAG('I','L','O',' ')},*/	/* Iloko -> Ilokano */
   {"in",	HB_TAG('I','N','D',' ')},	/* Indonesian (retired code) */
@@ -638,6 +641,7 @@
   {"it",	HB_TAG('I','T','A',' ')},	/* Italian */
   {"itz",	HB_TAG('M','Y','N',' ')},	/* Itzá -> Mayan */
   {"iu",	HB_TAG('I','N','U',' ')},	/* Inuktitut [macrolanguage] */
+  {"iu",	HB_TAG('I','N','U','K')},	/* Inuktitut [macrolanguage] -> Nunavik Inuktitut */
   {"iw",	HB_TAG('I','W','R',' ')},	/* Hebrew (retired code) */
   {"ixl",	HB_TAG('M','Y','N',' ')},	/* Ixil -> Mayan */
   {"ja",	HB_TAG('J','A','N',' ')},	/* Japanese */
@@ -667,7 +671,7 @@
   {"kab",	HB_TAG('B','B','R',' ')},	/* Kabyle -> Berber */
   {"kac",	HB_TAG_NONE	       },	/* Kachin != Kachchi */
   {"kam",	HB_TAG('K','M','B',' ')},	/* Kamba (Kenya) */
-  {"kar",	HB_TAG('K','R','N',' ')},	/* Karen [family] */
+  {"kar",	HB_TAG('K','R','N',' ')},	/* Karen [collection] */
 /*{"kaw",	HB_TAG('K','A','W',' ')},*/	/* Kawi (Old Javanese) */
   {"kbd",	HB_TAG('K','A','B',' ')},	/* Kabardian */
   {"kby",	HB_TAG('K','N','R',' ')},	/* Manga Kanuri -> Kanuri */
@@ -876,7 +880,7 @@
   {"mam",	HB_TAG('M','A','M',' ')},	/* Mam */
   {"mam",	HB_TAG('M','Y','N',' ')},	/* Mam -> Mayan */
   {"man",	HB_TAG('M','N','K',' ')},	/* Mandingo [macrolanguage] -> Maninka */
-  {"map",	HB_TAG_NONE	       },	/* Austronesian [family] != Mapudungun */
+  {"map",	HB_TAG_NONE	       },	/* Austronesian [collection] != Mapudungun */
   {"maw",	HB_TAG_NONE	       },	/* Mampruli != Marwari */
   {"max",	HB_TAG('M','L','Y',' ')},	/* North Moluccan Malay -> Malay */
   {"max",	HB_TAG('C','P','P',' ')},	/* North Moluccan Malay -> Creoles */
@@ -936,6 +940,7 @@
   {"mnw",	HB_TAG('M','O','N','T')},	/* Mon -> Thailand Mon */
   {"mnx",	HB_TAG_NONE	       },	/* Manikion != Manx */
   {"mo",	HB_TAG('M','O','L',' ')},	/* Moldavian (retired code) */
+  {"mo",	HB_TAG('R','O','M',' ')},	/* Moldavian (retired code) -> Romanian */
   {"mod",	HB_TAG('C','P','P',' ')},	/* Mobilian -> Creoles */
 /*{"moh",	HB_TAG('M','O','H',' ')},*/	/* Mohawk */
   {"mok",	HB_TAG_NONE	       },	/* Morori != Moksha */
@@ -958,7 +963,7 @@
   {"mts",	HB_TAG_NONE	       },	/* Yora != Maltese */
   {"mud",	HB_TAG('C','P','P',' ')},	/* Mednyj Aleut -> Creoles */
   {"mui",	HB_TAG('M','L','Y',' ')},	/* Musi -> Malay */
-  {"mun",	HB_TAG_NONE	       },	/* Munda [family] != Mundari */
+  {"mun",	HB_TAG_NONE	       },	/* Munda [collection] != Mundari */
   {"mup",	HB_TAG('R','A','J',' ')},	/* Malvi -> Rajasthani */
   {"muq",	HB_TAG('H','M','N',' ')},	/* Eastern Xiangxi Miao -> Hmong */
 /*{"mus",	HB_TAG('M','U','S',' ')},*/	/* Creek -> Muscogee */
@@ -973,7 +978,7 @@
   {"mww",	HB_TAG('H','M','N',' ')},	/* Hmong Daw -> Hmong */
   {"my",	HB_TAG('B','R','M',' ')},	/* Burmese */
   {"mym",	HB_TAG('M','E','N',' ')},	/* Me’en */
-/*{"myn",	HB_TAG('M','Y','N',' ')},*/	/* Mayan [family] */
+/*{"myn",	HB_TAG('M','Y','N',' ')},*/	/* Mayan [collection] */
   {"myq",	HB_TAG('M','N','K',' ')},	/* Forest Maninka (retired code) -> Maninka */
   {"myv",	HB_TAG('E','R','Z',' ')},	/* Erzya */
   {"mzb",	HB_TAG('B','B','R',' ')},	/* Tumzabt -> Berber */
@@ -982,7 +987,7 @@
   {"na",	HB_TAG('N','A','U',' ')},	/* Nauru -> Nauruan */
   {"nag",	HB_TAG('N','A','G',' ')},	/* Naga Pidgin -> Naga-Assamese */
   {"nag",	HB_TAG('C','P','P',' ')},	/* Naga Pidgin -> Creoles */
-/*{"nah",	HB_TAG('N','A','H',' ')},*/	/* Nahuatl [family] */
+/*{"nah",	HB_TAG('N','A','H',' ')},*/	/* Nahuatl [collection] */
   {"nan",	HB_TAG('Z','H','S',' ')},	/* Min Nan Chinese -> Chinese, Simplified */
 /*{"nap",	HB_TAG('N','A','P',' ')},*/	/* Neapolitan */
   {"nas",	HB_TAG_NONE	       },	/* Naasioi != Naskapi */
@@ -1039,7 +1044,6 @@
   {"nln",	HB_TAG('N','A','H',' ')},	/* Durango Nahuatl (retired code) -> Nahuatl */
   {"nlv",	HB_TAG('N','A','H',' ')},	/* Orizaba Nahuatl -> Nahuatl */
   {"nn",	HB_TAG('N','Y','N',' ')},	/* Norwegian Nynorsk (Nynorsk, Norwegian) */
-  {"nn",	HB_TAG('N','O','R',' ')},	/* Norwegian Nynorsk -> Norwegian */
   {"nnh",	HB_TAG('B','M','L',' ')},	/* Ngiemboon -> Bamileke */
   {"nnz",	HB_TAG('B','M','L',' ')},	/* Nda'nda' -> Bamileke */
   {"no",	HB_TAG('N','O','R',' ')},	/* Norwegian [macrolanguage] */
@@ -1093,7 +1097,7 @@
   {"otw",	HB_TAG('O','J','B',' ')},	/* Ottawa -> Ojibway */
   {"oua",	HB_TAG('B','B','R',' ')},	/* Tagargrent -> Berber */
   {"pa",	HB_TAG('P','A','N',' ')},	/* Punjabi */
-  {"paa",	HB_TAG_NONE	       },	/* Papuan [family] != Palestinian Aramaic */
+  {"paa",	HB_TAG_NONE	       },	/* Papuan [collection] != Palestinian Aramaic */
 /*{"pag",	HB_TAG('P','A','G',' ')},*/	/* Pangasinan */
   {"pal",	HB_TAG_NONE	       },	/* Pahlavi != Pali */
 /*{"pam",	HB_TAG('P','A','M',' ')},*/	/* Pampanga -> Pampangan */
@@ -1308,6 +1312,9 @@
   {"sgo",	HB_TAG_NONE	       },	/* Songa (retired code) != Sango */
 /*{"sgs",	HB_TAG('S','G','S',' ')},*/	/* Samogitian */
   {"sgw",	HB_TAG('C','H','G',' ')},	/* Sebat Bet Gurage -> Chaha Gurage */
+  {"sh",	HB_TAG('B','O','S',' ')},	/* Serbo-Croatian [macrolanguage] -> Bosnian */
+  {"sh",	HB_TAG('H','R','V',' ')},	/* Serbo-Croatian [macrolanguage] -> Croatian */
+  {"sh",	HB_TAG('S','R','B',' ')},	/* Serbo-Croatian [macrolanguage] -> Serbian */
   {"shi",	HB_TAG('S','H','I',' ')},	/* Tachelhit */
   {"shi",	HB_TAG('B','B','R',' ')},	/* Tachelhit -> Berber */
   {"shl",	HB_TAG('Q','I','N',' ')},	/* Shendu -> Chin */
@@ -1329,7 +1336,7 @@
   {"skw",	HB_TAG('C','P','P',' ')},	/* Skepi Creole Dutch -> Creoles */
   {"sky",	HB_TAG_NONE	       },	/* Sikaiana != Slovak */
   {"sl",	HB_TAG('S','L','V',' ')},	/* Slovenian */
-  {"sla",	HB_TAG_NONE	       },	/* Slavic [family] != Slavey */
+  {"sla",	HB_TAG_NONE	       },	/* Slavic [collection] != Slavey */
   {"sm",	HB_TAG('S','M','O',' ')},	/* Samoan */
   {"sma",	HB_TAG('S','S','M',' ')},	/* Southern Sami */
   {"smj",	HB_TAG('L','S','M',' ')},	/* Lule Sami */
@@ -1451,7 +1458,7 @@
   {"tpi",	HB_TAG('C','P','P',' ')},	/* Tok Pisin -> Creoles */
   {"tr",	HB_TAG('T','R','K',' ')},	/* Turkish */
   {"trf",	HB_TAG('C','P','P',' ')},	/* Trinidadian Creole English -> Creoles */
-  {"trk",	HB_TAG_NONE	       },	/* Turkic [family] != Turkish */
+  {"trk",	HB_TAG_NONE	       },	/* Turkic [collection] != Turkish */
   {"tru",	HB_TAG('T','U','A',' ')},	/* Turoyo -> Turoyo Aramaic */
   {"tru",	HB_TAG('S','Y','R',' ')},	/* Turoyo -> Syriac */
   {"ts",	HB_TAG('T','S','G',' ')},	/* Tsonga */
@@ -1593,7 +1600,7 @@
   {"zlq",	HB_TAG('Z','H','A',' ')},	/* Liuqian Zhuang -> Zhuang */
   {"zmi",	HB_TAG('M','L','Y',' ')},	/* Negeri Sembilan Malay -> Malay */
   {"zmz",	HB_TAG('B','A','D','0')},	/* Mbandja -> Banda */
-  {"znd",	HB_TAG_NONE	       },	/* Zande [family] != Zande */
+  {"znd",	HB_TAG_NONE	       },	/* Zande [collection] != Zande */
   {"zne",	HB_TAG('Z','N','D',' ')},	/* Zande */
   {"zom",	HB_TAG('Q','I','N',' ')},	/* Zou -> Chin */
   {"zqe",	HB_TAG('Z','H','A',' ')},	/* Qiubei Zhuang -> Zhuang */
@@ -2607,14 +2614,8 @@
     if (0 == strcmp (&lang_str[1], "o-nyn"))
     {
       /* Norwegian Nynorsk (retired code) */
-      unsigned int i;
-      hb_tag_t possible_tags[] = {
-	HB_TAG('N','Y','N',' '),  /* Norwegian Nynorsk (Nynorsk, Norwegian) */
-	HB_TAG('N','O','R',' '),  /* Norwegian */
-      };
-      for (i = 0; i < 2 && i < *count; i++)
-	tags[i] = possible_tags[i];
-      *count = i;
+      tags[0] = HB_TAG('N','Y','N',' ');  /* Norwegian Nynorsk (Nynorsk, Norwegian) */
+      *count = 1;
       return true;
     }
     break;
@@ -2623,8 +2624,14 @@
 	&& subtag_matches (lang_str, limit, "-md"))
     {
       /* Romanian; Moldova */
-      tags[0] = HB_TAG('M','O','L',' ');  /* Moldavian */
-      *count = 1;
+      unsigned int i;
+      hb_tag_t possible_tags[] = {
+	HB_TAG('M','O','L',' '),  /* Moldavian */
+	HB_TAG('R','O','M',' '),  /* Romanian */
+      };
+      for (i = 0; i < 2 && i < *count; i++)
+	tags[i] = possible_tags[i];
+      *count = i;
       return true;
     }
     break;
@@ -2813,15 +2820,15 @@
   case HB_TAG('A','R','K',' '):  /* Rakhine */
     return hb_language_from_string ("rki", -1);  /* Rakhine */
   case HB_TAG('A','T','H',' '):  /* Athapaskan */
-    return hb_language_from_string ("ath", -1);  /* Athapascan [family] */
+    return hb_language_from_string ("ath", -1);  /* Athapascan [collection] */
   case HB_TAG('B','B','R',' '):  /* Berber */
-    return hb_language_from_string ("ber", -1);  /* Berber [family] */
+    return hb_language_from_string ("ber", -1);  /* Berber [collection] */
   case HB_TAG('B','I','K',' '):  /* Bikol */
     return hb_language_from_string ("bik", -1);  /* Bikol [macrolanguage] */
   case HB_TAG('B','T','K',' '):  /* Batak */
-    return hb_language_from_string ("btk", -1);  /* Batak [family] */
+    return hb_language_from_string ("btk", -1);  /* Batak [collection] */
   case HB_TAG('C','P','P',' '):  /* Creoles */
-    return hb_language_from_string ("crp", -1);  /* Creoles and pidgins [family] */
+    return hb_language_from_string ("crp", -1);  /* Creoles and pidgins [collection] */
   case HB_TAG('C','R','R',' '):  /* Carrier */
     return hb_language_from_string ("crx", -1);  /* Carrier */
   case HB_TAG('D','G','R',' '):  /* Dogri (macrolanguage) */
@@ -2838,6 +2845,8 @@
     return hb_language_from_string ("fa", -1);  /* Persian [macrolanguage] */
   case HB_TAG('G','O','N',' '):  /* Gondi */
     return hb_language_from_string ("gon", -1);  /* Gondi [macrolanguage] */
+  case HB_TAG('H','M','A',' '):  /* High Mari */
+    return hb_language_from_string ("mrj", -1);  /* Western Mari */
   case HB_TAG('H','M','N',' '):  /* Hmong */
     return hb_language_from_string ("hmn", -1);  /* Hmong [macrolanguage] */
   case HB_TAG('H','N','D',' '):  /* Hindko */
@@ -2847,7 +2856,7 @@
   case HB_TAG('I','B','A',' '):  /* Iban */
     return hb_language_from_string ("iba", -1);  /* Iban */
   case HB_TAG('I','J','O',' '):  /* Ijo */
-    return hb_language_from_string ("ijo", -1);  /* Ijo [family] */
+    return hb_language_from_string ("ijo", -1);  /* Ijo [collection] */
   case HB_TAG('I','N','U',' '):  /* Inuktitut */
     return hb_language_from_string ("iu", -1);  /* Inuktitut [macrolanguage] */
   case HB_TAG('I','P','K',' '):  /* Inupiat */
@@ -2873,11 +2882,13 @@
   case HB_TAG('K','P','L',' '):  /* Kpelle */
     return hb_language_from_string ("kpe", -1);  /* Kpelle [macrolanguage] */
   case HB_TAG('K','R','N',' '):  /* Karen */
-    return hb_language_from_string ("kar", -1);  /* Karen [family] */
+    return hb_language_from_string ("kar", -1);  /* Karen [collection] */
   case HB_TAG('K','U','I',' '):  /* Kui */
     return hb_language_from_string ("uki", -1);  /* Kui (India) */
   case HB_TAG('K','U','R',' '):  /* Kurdish */
     return hb_language_from_string ("ku", -1);  /* Kurdish [macrolanguage] */
+  case HB_TAG('L','M','A',' '):  /* Low Mari */
+    return hb_language_from_string ("mhr", -1);  /* Eastern Mari */
   case HB_TAG('L','U','H',' '):  /* Luyia */
     return hb_language_from_string ("luy", -1);  /* Luyia [macrolanguage] */
   case HB_TAG('L','V','I',' '):  /* Latvian */
@@ -2897,9 +2908,9 @@
   case HB_TAG('M','O','N','T'):  /* Thailand Mon */
     return hb_language_from_string ("mnw-TH", -1);  /* Mon; Thailand */
   case HB_TAG('M','Y','N',' '):  /* Mayan */
-    return hb_language_from_string ("myn", -1);  /* Mayan [family] */
+    return hb_language_from_string ("myn", -1);  /* Mayan [collection] */
   case HB_TAG('N','A','H',' '):  /* Nahuatl */
-    return hb_language_from_string ("nah", -1);  /* Nahuatl [family] */
+    return hb_language_from_string ("nah", -1);  /* Nahuatl [collection] */
   case HB_TAG('N','E','P',' '):  /* Nepali */
     return hb_language_from_string ("ne", -1);  /* Nepali [macrolanguage] */
   case HB_TAG('N','I','S',' '):  /* Nisi */
@@ -2926,6 +2937,8 @@
     return hb_language_from_string ("qwh", -1);  /* Huaylas Ancash Quechua */
   case HB_TAG('R','A','J',' '):  /* Rajasthani */
     return hb_language_from_string ("raj", -1);  /* Rajasthani [macrolanguage] */
+  case HB_TAG('R','O','M',' '):  /* Romanian */
+    return hb_language_from_string ("ro", -1);  /* Romanian */
   case HB_TAG('R','O','Y',' '):  /* Romany */
     return hb_language_from_string ("rom", -1);  /* Romany [macrolanguage] */
   case HB_TAG('S','Q','I',' '):  /* Albanian */