js/experimental/runtime/kernel/textencoding.js - third_party/protobuf - Git at Google

 /**
  * @fileoverview A UTF8 decoder.
  */
 goog.module('protobuf.binary.textencoding');

 const {checkElementIndex} = goog.require('protobuf.internal.checks');

 /**
  * Combines an array of codePoints into a string.
  * @param {!Array<number>} codePoints
  * @return {string}
  */
 function codePointsToString(codePoints) {
   // Performance: http://jsperf.com/string-fromcharcode-test/13
   let s = '', i = 0;
   const length = codePoints.length;
   const BATCH_SIZE = 10000;
   while (i < length) {
     const end = Math.min(i + BATCH_SIZE, length);
     s += String.fromCharCode.apply(null, codePoints.slice(i, end));
     i = end;
   }
   return s;
 }

 /**
  * Decodes raw bytes into a string.
  * Supports codepoints from U+0000 up to U+10FFFF.
  * (http://en.wikipedia.org/wiki/UTF-8).
  * @param {!DataView} bytes
  * @return {string}
  */
 function decode(bytes) {
   let cursor = 0;
   const codePoints = [];

   while (cursor < bytes.byteLength) {
     const c = bytes.getUint8(cursor++);
     if (c < 0x80) {  // Regular 7-bit ASCII.
       codePoints.push(c);
     } else if (c < 0xC0) {
       // UTF-8 continuation mark. We are out of sync. This
       // might happen if we attempted to read a character
       // with more than four bytes.
       continue;
     } else if (c < 0xE0) {  // UTF-8 with two bytes.
       checkElementIndex(cursor, bytes.byteLength);
       const c2 = bytes.getUint8(cursor++);
       codePoints.push(((c & 0x1F) << 6) | (c2 & 0x3F));
     } else if (c < 0xF0) {  // UTF-8 with three bytes.
       checkElementIndex(cursor + 1, bytes.byteLength);
       const c2 = bytes.getUint8(cursor++);
       const c3 = bytes.getUint8(cursor++);
       codePoints.push(((c & 0xF) << 12) | ((c2 & 0x3F) << 6) | (c3 & 0x3F));
     } else if (c < 0xF8) {  // UTF-8 with 4 bytes.
       checkElementIndex(cursor + 2, bytes.byteLength);
       const c2 = bytes.getUint8(cursor++);
       const c3 = bytes.getUint8(cursor++);
       const c4 = bytes.getUint8(cursor++);
       // Characters written on 4 bytes have 21 bits for a codepoint.
       // We can't fit that on 16bit characters, so we use surrogates.
       let codepoint = ((c & 0x07) << 18) | ((c2 & 0x3F) << 12) |
           ((c3 & 0x3F) << 6) | (c4 & 0x3F);
       // Surrogates formula from wikipedia.
       // 1. Subtract 0x10000 from codepoint
       codepoint -= 0x10000;
       // 2. Split this into the high 10-bit value and the low 10-bit value
       // 3. Add 0xD800 to the high value to form the high surrogate
       // 4. Add 0xDC00 to the low value to form the low surrogate:
       const low = (codepoint & 0x3FF) + 0xDC00;
       const high = ((codepoint >> 10) & 0x3FF) + 0xD800;
       codePoints.push(high, low);
     }
   }
   return codePointsToString(codePoints);
 }

 /**
  * Writes a UTF16 JavaScript string to the buffer encoded as UTF8.
  * @param {string} value The string to write.
  * @return {!Uint8Array} An array containing the encoded bytes.
  */
 function encode(value) {
   const buffer = [];

   for (let i = 0; i < value.length; i++) {
     const c1 = value.charCodeAt(i);

     if (c1 < 0x80) {
       buffer.push(c1);
     } else if (c1 < 0x800) {
       buffer.push((c1 >> 6) | 0xC0);
       buffer.push((c1 & 0x3F) | 0x80);
     } else if (c1 < 0xD800 || c1 >= 0xE000) {
       buffer.push((c1 >> 12) | 0xE0);
       buffer.push(((c1 >> 6) & 0x3F) | 0x80);
       buffer.push((c1 & 0x3F) | 0x80);
     } else {
       // surrogate pair
       i++;
       checkElementIndex(i, value.length);
       const c2 = value.charCodeAt(i);
       const paired = 0x10000 + (((c1 & 0x3FF) << 10) | (c2 & 0x3FF));
       buffer.push((paired >> 18) | 0xF0);
       buffer.push(((paired >> 12) & 0x3F) | 0x80);
       buffer.push(((paired >> 6) & 0x3F) | 0x80);
       buffer.push((paired & 0x3F) | 0x80);
     }
   }
   return new Uint8Array(buffer);
 }

 exports = {
   decode,
   encode,
 };
	/**
	* @fileoverview A UTF8 decoder.
	*/
	goog.module('protobuf.binary.textencoding');

	const {checkElementIndex} = goog.require('protobuf.internal.checks');

	/**
	* Combines an array of codePoints into a string.
	* @param {!Array<number>} codePoints
	* @return {string}
	*/
	function codePointsToString(codePoints) {
	// Performance: http://jsperf.com/string-fromcharcode-test/13
	let s = '', i = 0;
	const length = codePoints.length;
	const BATCH_SIZE = 10000;
	while (i < length) {
	const end = Math.min(i + BATCH_SIZE, length);
	s += String.fromCharCode.apply(null, codePoints.slice(i, end));
	i = end;
	}
	return s;
	}

	/**
	* Decodes raw bytes into a string.
	* Supports codepoints from U+0000 up to U+10FFFF.
	* (http://en.wikipedia.org/wiki/UTF-8).
	* @param {!DataView} bytes
	* @return {string}
	*/
	function decode(bytes) {
	let cursor = 0;
	const codePoints = [];

	while (cursor < bytes.byteLength) {
	const c = bytes.getUint8(cursor++);
	if (c < 0x80) { // Regular 7-bit ASCII.
	codePoints.push(c);
	} else if (c < 0xC0) {
	// UTF-8 continuation mark. We are out of sync. This
	// might happen if we attempted to read a character
	// with more than four bytes.
	continue;
	} else if (c < 0xE0) { // UTF-8 with two bytes.
	checkElementIndex(cursor, bytes.byteLength);
	const c2 = bytes.getUint8(cursor++);
	codePoints.push(((c & 0x1F) << 6) \| (c2 & 0x3F));
	} else if (c < 0xF0) { // UTF-8 with three bytes.
	checkElementIndex(cursor + 1, bytes.byteLength);
	const c2 = bytes.getUint8(cursor++);
	const c3 = bytes.getUint8(cursor++);
	codePoints.push(((c & 0xF) << 12) \| ((c2 & 0x3F) << 6) \| (c3 & 0x3F));
	} else if (c < 0xF8) { // UTF-8 with 4 bytes.
	checkElementIndex(cursor + 2, bytes.byteLength);
	const c2 = bytes.getUint8(cursor++);
	const c3 = bytes.getUint8(cursor++);
	const c4 = bytes.getUint8(cursor++);
	// Characters written on 4 bytes have 21 bits for a codepoint.
	// We can't fit that on 16bit characters, so we use surrogates.
	let codepoint = ((c & 0x07) << 18) \| ((c2 & 0x3F) << 12) \|
	((c3 & 0x3F) << 6) \| (c4 & 0x3F);
	// Surrogates formula from wikipedia.
	// 1. Subtract 0x10000 from codepoint
	codepoint -= 0x10000;
	// 2. Split this into the high 10-bit value and the low 10-bit value
	// 3. Add 0xD800 to the high value to form the high surrogate
	// 4. Add 0xDC00 to the low value to form the low surrogate:
	const low = (codepoint & 0x3FF) + 0xDC00;
	const high = ((codepoint >> 10) & 0x3FF) + 0xD800;
	codePoints.push(high, low);
	}
	}
	return codePointsToString(codePoints);
	}

	/**
	* Writes a UTF16 JavaScript string to the buffer encoded as UTF8.
	* @param {string} value The string to write.
	* @return {!Uint8Array} An array containing the encoded bytes.
	*/
	function encode(value) {
	const buffer = [];

	for (let i = 0; i < value.length; i++) {
	const c1 = value.charCodeAt(i);

	if (c1 < 0x80) {
	buffer.push(c1);
	} else if (c1 < 0x800) {
	buffer.push((c1 >> 6) \| 0xC0);
	buffer.push((c1 & 0x3F) \| 0x80);
	} else if (c1 < 0xD800 \|\| c1 >= 0xE000) {
	buffer.push((c1 >> 12) \| 0xE0);
	buffer.push(((c1 >> 6) & 0x3F) \| 0x80);
	buffer.push((c1 & 0x3F) \| 0x80);
	} else {
	// surrogate pair
	i++;
	checkElementIndex(i, value.length);
	const c2 = value.charCodeAt(i);
	const paired = 0x10000 + (((c1 & 0x3FF) << 10) \| (c2 & 0x3FF));
	buffer.push((paired >> 18) \| 0xF0);
	buffer.push(((paired >> 12) & 0x3F) \| 0x80);
	buffer.push(((paired >> 6) & 0x3F) \| 0x80);
	buffer.push((paired & 0x3F) \| 0x80);
	}
	}
	return new Uint8Array(buffer);
	}

	exports = {
	decode,
	encode,
	};