lib/web_ui/test/text/line_breaker_test_helper.dart - mirrors/engine - Git at Google

 // Copyright 2013 The Flutter Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 /// Parses raw test data into a list of [TestCase] objects.
 List<TestCase> parseRawTestData(String rawTestData, {required bool isV8}) {
   return rawTestData
       .split('\n')
       .where(isValidTestCase)
       .map((String line) => _checkReplacement(line, isV8: isV8))
       .map(_parse)
       .toList();
 }

 bool isValidTestCase(String line) {
   return line.startsWith('×');
 }

 String _checkReplacement(String line, {required bool isV8}) {
   String replacement = line;

   // Special cases for rules LB8, LB11, LB13, LB14, LB15, LB16, LB17 to allow
   // line breaks after spaces.
   final RegExp spacesRegex = RegExp(r'SPACE \(SP\) × \[(8|11|13|14|15|16|17)\.');
   if (replacement.contains(spacesRegex)) {
     replacement = replacement
         .replaceAll('0020 ×', '0020 ÷') // SPACE (SP)
         .replaceAllMapped(spacesRegex, (Match m) => 'SPACE (SP) ÷ [${m.group(1)}.');
   }

   if (!isV8) {
     // Some test cases contradict rule LB25, so we are fixing them with the few
     // regexes below.

     final RegExp lb25Regex1 = RegExp(r'\((CP_CP30|CL)\)(.*?) ÷ \[999\.0\] (PERCENT|DOLLAR)');
     if (replacement.contains(lb25Regex1)) {
       replacement = replacement
           .replaceAll(' ÷ 0024', ' × 0024') // DOLLAR SIGN (PR)
           .replaceAll(' ÷ 0025', ' × 0025') // PERCENT SIGN (PO)
           .replaceAllMapped(
             lb25Regex1,
             (Match m) => '(${m.group(1)})${m.group(2)} × [999.0] ${m.group(3)}',
           );
     }
     final RegExp lb25Regex2 = RegExp(r'\((IS|SY)\)(.*?) ÷ \[999\.0\] (DIGIT)');
     if (replacement.contains(lb25Regex2)) {
       replacement = replacement
           .replaceAll(' ÷ 0030', ' × 0030') // DIGIT ZERO (NU)
           .replaceAllMapped(
             lb25Regex2,
             (Match m) => '(${m.group(1)})${m.group(2)} × [999.0] ${m.group(3)}',
           );
     }
     final RegExp lb25Regex3 = RegExp(r'\((PR|PO)\)(.*?) ÷ \[999\.0\] (LEFT)');
     if (replacement.contains(lb25Regex3)) {
       replacement = replacement
           .replaceAll(' ÷ 0028', ' × 0028') // LEFT PARENTHESIS (OP_OP30)
           .replaceAll(' ÷ 007B', ' × 007B') // LEFT CURLY BRACKET (OP_OP30)
           .replaceAll(' ÷ 2329', ' × 2329') // LEFT-POINTING ANGLE BRACKET (OP)
           .replaceAllMapped(
             lb25Regex3,
             (Match m) => '(${m.group(1)})${m.group(2)} × [999.0] ${m.group(3)}',
           );
     }
   }

   if (isV8) {
     // v8BreakIterator deviates from the spec around Hiragana and Katakana
     // letters.

     final RegExp hiragana21Regex = RegExp(r' × \[21\.03\] (HIRAGANA LETTER|KATAKANA LETTER|KATAKANA-HIRAGANA)');
     if (replacement.contains(hiragana21Regex) && !replacement.contains('(BB)') && !replacement.contains('(PR)')) {
       replacement = replacement
           .replaceAll(' × 3041', ' ÷ 3041') // HIRAGANA LETTER (CJ)
           .replaceAll(' × 30E5', ' ÷ 30E5') // KATAKANA LETTER (CJ)
           .replaceAll(' × 30FC', ' ÷ 30FC') // KATAKANA-HIRAGANA PROLONGED SOUND MARK (CJ)
           .replaceAllMapped(
             hiragana21Regex,
             (Match m) => ' ÷ [21.03] ${m.group(1)}',
           );
     }
     if (replacement.contains(' × [16.0] HIRAGANA LETTER')) {
       replacement = replacement
           .replaceAll(' × 3041', ' ÷ 3041') // HIRAGANA LETTER (CJ)
           .replaceAll(
             ' × [16.0] HIRAGANA LETTER',
             ' ÷ [16.0] HIRAGANA LETTER',
           );
     }
     final RegExp hiraganaPercentRegex = RegExp(r'HIRAGANA .*? ÷ \[999\.0\] PERCENT');
     if (replacement.contains(hiraganaPercentRegex)) {
       replacement = replacement
           .replaceAll(' ÷ 0025', ' × 0025') // PERCENT SIGN (PO)
           .replaceAll(
             ' ÷ [999.0] PERCENT',
             ' × [999.0] PERCENT',
           );
     }

     // v8BreakIterator also deviates from the spec around hyphens, commas and
     // full stops.

     final RegExp hyphenRegex = RegExp(r'\((HY|IS)\)(.*?) ÷ \[999\.0\] (DIGIT|NUMBER|SECTION|THAI|<reserved-50005>)');
     if (replacement.contains(hyphenRegex)) {
       replacement = replacement
           .replaceAll(' ÷ 0030', ' × 0030') // DIGIT ZERO (NU)
           .replaceAll(' ÷ 0023', ' × 0023') // NUMBER SIGN (AL)
           .replaceAll(' ÷ 00A7', ' × 00A7') // SECTION SIGN (AI_AL)
           .replaceAll(' ÷ 0E01', ' × 0E01') // THAI CHARACTER KO KAI (SA_AL)
           .replaceAll(' ÷ 50005', ' × 50005') // <reserved-50005> (XX_AL)
           .replaceAllMapped(
             hyphenRegex,
             (Match m) => '(${m.group(1)})${m.group(2)} × [999.0] ${m.group(3)}',
           );
     }
   }

   return replacement;
 }

 final RegExp spaceRegex = RegExp(r'\s+');
 final RegExp signRegex = RegExp(r'([×÷])\s+\[(\d+\.\d+)\]\s*');
 final RegExp charRegex = RegExp(
   r'([A-Z0-9-]+(?:\s+[A-Z0-9-]+)*)\s+\(([A-Z0-9_]+)\)\s*',
   caseSensitive: false,
 );
 final RegExp charWithBracketsRegex = RegExp(
   r'(\<[A-Z0-9()-]+(?:\s+[A-Z0-9()-]+)*\>)\s+\(([A-Z0-9_]+)\)\s*',
   caseSensitive: false,
 );

 TestCase _parse(String line) {
   final int hashIndex = line.indexOf('#');
   final List<String> sequence =
       line.substring(0, hashIndex).trim().split(spaceRegex);
   final String explanation = line.substring(hashIndex + 1).trim();

   final List<Sign> signs = <Sign>[];
   final Match signMatch = signRegex.matchAsPrefix(explanation)!;
   signs.add(Sign._(code: signMatch.group(1)!, rule: signMatch.group(2)!));

   final List<Char> chars = <Char>[];

   int i = signMatch.group(0)!.length;
   while (i < explanation.length) {
     final Match charMatch = explanation[i] == '<'
         ? charWithBracketsRegex.matchAsPrefix(explanation, i)!
         : charRegex.matchAsPrefix(explanation, i)!;
     final int charCode = int.parse(sequence[2 * chars.length + 1], radix: 16);
     chars.add(Char._(
       code: charCode,
       name: charMatch.group(1)!,
       property: charMatch.group(2)!,
     ));
     i += charMatch.group(0)!.length;

     final Match signMatch = signRegex.matchAsPrefix(explanation, i)!;
     signs.add(Sign._(code: signMatch.group(1)!, rule: signMatch.group(2)!));
     i += signMatch.group(0)!.length;
   }
   return TestCase._(signs: signs, chars: chars, raw: line);
 }

 /// Represents a character in a test case.
 ///
 /// The character has a code, name and a property that determines how it behaves
 /// with regards to line breaks.
 class Char {
   Char._({required this.code, required this.name, required this.property});

   final int code;
   final String name;
   final String property;

   /// Whether this character is a code point that gets encoded as a UTF-16
   /// surrogate pair.
   bool get isSurrogatePair => code > 0xFFFF;
 }

 /// Represents a sign between two characters in a test case.
 ///
 /// The sign could either be "×" to indicate no line break, or "÷" to indicate
 /// the existence of a line break opportunity.
 class Sign {
   Sign._({required this.code, required this.rule});

   final String code;
   final String rule;

   bool get isBreakOpportunity => code == '÷';
 }

 /// Represents an entire test case.
 ///
 /// A test case is a sequence of characters combined with signs between them.
 /// The signs indicate where line break opportunities exist.
 class TestCase {
   TestCase._({required this.signs, required this.chars, required this.raw});

   final List<Sign> signs;
   final List<Char> chars;
   final String raw;

   Iterable<int> get charCodes => chars.map((Char char) => char.code);

   /// Returns the text that this test case is covering.
   String toText() {
     return String.fromCharCodes(charCodes);
   }

   @override
   String toString() {
     return raw;
   }
 }
	// Copyright 2013 The Flutter Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	/// Parses raw test data into a list of [TestCase] objects.
	List<TestCase> parseRawTestData(String rawTestData, {required bool isV8}) {
	return rawTestData
	.split('\n')
	.where(isValidTestCase)
	.map((String line) => _checkReplacement(line, isV8: isV8))
	.map(_parse)
	.toList();
	}

	bool isValidTestCase(String line) {
	return line.startsWith('×');
	}

	String _checkReplacement(String line, {required bool isV8}) {
	String replacement = line;

	// Special cases for rules LB8, LB11, LB13, LB14, LB15, LB16, LB17 to allow
	// line breaks after spaces.
	final RegExp spacesRegex = RegExp(r'SPACE \(SP\) × \[(8\|11\|13\|14\|15\|16\|17)\.');
	if (replacement.contains(spacesRegex)) {
	replacement = replacement
	.replaceAll('0020 ×', '0020 ÷') // SPACE (SP)
	.replaceAllMapped(spacesRegex, (Match m) => 'SPACE (SP) ÷ [${m.group(1)}.');
	}

	if (!isV8) {
	// Some test cases contradict rule LB25, so we are fixing them with the few
	// regexes below.

	final RegExp lb25Regex1 = RegExp(r'\((CP_CP30\|CL)\)(.*?) ÷ \[999\.0\] (PERCENT\|DOLLAR)');
	if (replacement.contains(lb25Regex1)) {
	replacement = replacement
	.replaceAll(' ÷ 0024', ' × 0024') // DOLLAR SIGN (PR)
	.replaceAll(' ÷ 0025', ' × 0025') // PERCENT SIGN (PO)
	.replaceAllMapped(
	lb25Regex1,
	(Match m) => '(${m.group(1)})${m.group(2)} × [999.0] ${m.group(3)}',
	);
	}
	final RegExp lb25Regex2 = RegExp(r'\((IS\|SY)\)(.*?) ÷ \[999\.0\] (DIGIT)');
	if (replacement.contains(lb25Regex2)) {
	replacement = replacement
	.replaceAll(' ÷ 0030', ' × 0030') // DIGIT ZERO (NU)
	.replaceAllMapped(
	lb25Regex2,
	(Match m) => '(${m.group(1)})${m.group(2)} × [999.0] ${m.group(3)}',
	);
	}
	final RegExp lb25Regex3 = RegExp(r'\((PR\|PO)\)(.*?) ÷ \[999\.0\] (LEFT)');
	if (replacement.contains(lb25Regex3)) {
	replacement = replacement
	.replaceAll(' ÷ 0028', ' × 0028') // LEFT PARENTHESIS (OP_OP30)
	.replaceAll(' ÷ 007B', ' × 007B') // LEFT CURLY BRACKET (OP_OP30)
	.replaceAll(' ÷ 2329', ' × 2329') // LEFT-POINTING ANGLE BRACKET (OP)
	.replaceAllMapped(
	lb25Regex3,
	(Match m) => '(${m.group(1)})${m.group(2)} × [999.0] ${m.group(3)}',
	);
	}
	}

	if (isV8) {
	// v8BreakIterator deviates from the spec around Hiragana and Katakana
	// letters.

	final RegExp hiragana21Regex = RegExp(r' × \[21\.03\] (HIRAGANA LETTER\|KATAKANA LETTER\|KATAKANA-HIRAGANA)');
	if (replacement.contains(hiragana21Regex) && !replacement.contains('(BB)') && !replacement.contains('(PR)')) {
	replacement = replacement
	.replaceAll(' × 3041', ' ÷ 3041') // HIRAGANA LETTER (CJ)
	.replaceAll(' × 30E5', ' ÷ 30E5') // KATAKANA LETTER (CJ)
	.replaceAll(' × 30FC', ' ÷ 30FC') // KATAKANA-HIRAGANA PROLONGED SOUND MARK (CJ)
	.replaceAllMapped(
	hiragana21Regex,
	(Match m) => ' ÷ [21.03] ${m.group(1)}',
	);
	}
	if (replacement.contains(' × [16.0] HIRAGANA LETTER')) {
	replacement = replacement
	.replaceAll(' × 3041', ' ÷ 3041') // HIRAGANA LETTER (CJ)
	.replaceAll(
	' × [16.0] HIRAGANA LETTER',
	' ÷ [16.0] HIRAGANA LETTER',
	);
	}
	final RegExp hiraganaPercentRegex = RegExp(r'HIRAGANA .*? ÷ \[999\.0\] PERCENT');
	if (replacement.contains(hiraganaPercentRegex)) {
	replacement = replacement
	.replaceAll(' ÷ 0025', ' × 0025') // PERCENT SIGN (PO)
	.replaceAll(
	' ÷ [999.0] PERCENT',
	' × [999.0] PERCENT',
	);
	}

	// v8BreakIterator also deviates from the spec around hyphens, commas and
	// full stops.

	final RegExp hyphenRegex = RegExp(r'\((HY\|IS)\)(.*?) ÷ \[999\.0\] (DIGIT\|NUMBER\|SECTION\|THAI\|<reserved-50005>)');
	if (replacement.contains(hyphenRegex)) {
	replacement = replacement
	.replaceAll(' ÷ 0030', ' × 0030') // DIGIT ZERO (NU)
	.replaceAll(' ÷ 0023', ' × 0023') // NUMBER SIGN (AL)
	.replaceAll(' ÷ 00A7', ' × 00A7') // SECTION SIGN (AI_AL)
	.replaceAll(' ÷ 0E01', ' × 0E01') // THAI CHARACTER KO KAI (SA_AL)
	.replaceAll(' ÷ 50005', ' × 50005') // <reserved-50005> (XX_AL)
	.replaceAllMapped(
	hyphenRegex,
	(Match m) => '(${m.group(1)})${m.group(2)} × [999.0] ${m.group(3)}',
	);
	}
	}

	return replacement;
	}

	final RegExp spaceRegex = RegExp(r'\s+');
	final RegExp signRegex = RegExp(r'([×÷])\s+\[(\d+\.\d+)\]\s*');
	final RegExp charRegex = RegExp(
	r'([A-Z0-9-]+(?:\s+[A-Z0-9-]+))\s+\(([A-Z0-9_]+)\)\s',
	caseSensitive: false,
	);
	final RegExp charWithBracketsRegex = RegExp(
	r'(\<[A-Z0-9()-]+(?:\s+[A-Z0-9()-]+)\>)\s+\(([A-Z0-9_]+)\)\s',
	caseSensitive: false,
	);

	TestCase _parse(String line) {
	final int hashIndex = line.indexOf('#');
	final List<String> sequence =
	line.substring(0, hashIndex).trim().split(spaceRegex);
	final String explanation = line.substring(hashIndex + 1).trim();

	final List<Sign> signs = <Sign>[];
	final Match signMatch = signRegex.matchAsPrefix(explanation)!;
	signs.add(Sign._(code: signMatch.group(1)!, rule: signMatch.group(2)!));

	final List<Char> chars = <Char>[];

	int i = signMatch.group(0)!.length;
	while (i < explanation.length) {
	final Match charMatch = explanation[i] == '<'
	? charWithBracketsRegex.matchAsPrefix(explanation, i)!
	: charRegex.matchAsPrefix(explanation, i)!;
	final int charCode = int.parse(sequence[2 * chars.length + 1], radix: 16);
	chars.add(Char._(
	code: charCode,
	name: charMatch.group(1)!,
	property: charMatch.group(2)!,
	));
	i += charMatch.group(0)!.length;

	final Match signMatch = signRegex.matchAsPrefix(explanation, i)!;
	signs.add(Sign._(code: signMatch.group(1)!, rule: signMatch.group(2)!));
	i += signMatch.group(0)!.length;
	}
	return TestCase._(signs: signs, chars: chars, raw: line);
	}

	/// Represents a character in a test case.
	///
	/// The character has a code, name and a property that determines how it behaves
	/// with regards to line breaks.
	class Char {
	Char._({required this.code, required this.name, required this.property});

	final int code;
	final String name;
	final String property;

	/// Whether this character is a code point that gets encoded as a UTF-16
	/// surrogate pair.
	bool get isSurrogatePair => code > 0xFFFF;
	}

	/// Represents a sign between two characters in a test case.
	///
	/// The sign could either be "×" to indicate no line break, or "÷" to indicate
	/// the existence of a line break opportunity.
	class Sign {
	Sign._({required this.code, required this.rule});

	final String code;
	final String rule;

	bool get isBreakOpportunity => code == '÷';
	}

	/// Represents an entire test case.
	///
	/// A test case is a sequence of characters combined with signs between them.
	/// The signs indicate where line break opportunities exist.
	class TestCase {
	TestCase._({required this.signs, required this.chars, required this.raw});

	final List<Sign> signs;
	final List<Char> chars;
	final String raw;

	Iterable<int> get charCodes => chars.map((Char char) => char.code);

	/// Returns the text that this test case is covering.
	String toText() {
	return String.fromCharCodes(charCodes);
	}

	@override
	String toString() {
	return raw;
	}
	}