Merge branch 'whitespace' Conflicts: Changelog src/double-conversion.cc src/double-conversion.h

commit: 3ae434a0f5d6de4578a6ad1aacd5347a032121df [log] [tgz]
author: Florian Loitsch <florian@loitsch.com> Sun Nov 10 07:29:13 2013 +0100
committer: Florian Loitsch <florian@loitsch.com> Sun Nov 10 07:29:13 2013 +0100
tree: a016c0f08804dede18110b2ca2b5a1775bae6424
parent: 239ff7e1a021b2e4dca092d8b36ba7702f02f86c [diff]
parent: 1b476d5f8b3e52e7e57df3d3bddeaabb75fbbef8 [diff]
diff --git a/Changelog b/Changelog
index 83753ea..7125a70 100644
--- a/Changelog
+++ b/Changelog

@@ -1,4 +1,9 @@
 2013-11-09:
+  Tagged v2.0.0.
+  String-to-Double|Float: ALLOW_LEADING_SPACES and similar flags now include
+  new-lines, tabs and all Unicode whitespace characters.
+
+2013-11-09:
   Tagged v1.1.2.
   Add support for ARM 64 and OsX ppc.
   Rewrite tests so they pass under Visual Studio.

diff --git a/src/double-conversion.cc b/src/double-conversion.cc
index febba6c..d2893f5 100644
--- a/src/double-conversion.cc
+++ b/src/double-conversion.cc

@@ -417,8 +417,9 @@
 
 // Consumes the given substring from the iterator.
 // Returns false, if the substring does not match.
-static bool ConsumeSubString(const char** current,
-                             const char* end,
+template <class Iterator>
+static bool ConsumeSubString(Iterator* current,
+                             Iterator end,
                              const char* substring) {
   ASSERT(**current == *substring);
   for (substring++; *substring != '\0'; substring++) {
@@ -440,10 +441,36 @@
 const int kMaxSignificantDigits = 772;
 
 
+static const char kWhitespaceTable7[] = { 32, 13, 10, 9, 11, 12 };
+static const int kWhitespaceTable7Length = ARRAY_SIZE(kWhitespaceTable7);
+
+
+static const uc16 kWhitespaceTable16[] = {
+  160, 8232, 8233, 5760, 6158, 8192, 8193, 8194, 8195,
+  8196, 8197, 8198, 8199, 8200, 8201, 8202, 8239, 8287, 12288, 65279
+};
+static const int kWhitespaceTable16Length = ARRAY_SIZE(kWhitespaceTable16);
+
+
+static bool isWhitespace(int x) {
+  if (x < 128) {
+    for (int i = 0; i < kWhitespaceTable7Length; i++) {
+      if (kWhitespaceTable7[i] == x) return true;
+    }
+  } else {
+    for (int i = 0; i < kWhitespaceTable16Length; i++) {
+      if (kWhitespaceTable16[i] == x) return true;
+    }
+  }
+  return false;
+}
+
+
 // Returns true if a nonspace found and false if the end has reached.
-static inline bool AdvanceToNonspace(const char** current, const char* end) {
+template <class Iterator>
+static inline bool AdvanceToNonspace(Iterator* current, Iterator end) {
   while (*current != end) {
-    if (**current != ' ') return true;
+    if (!isWhitespace(**current)) return true;
     ++*current;
   }
   return false;
@@ -463,25 +490,27 @@
 
 
 // Parsing integers with radix 2, 4, 8, 16, 32. Assumes current != end.
-template <int radix_log_2>
-static double RadixStringToIeee(const char* current,
-                                const char* end,
+template <int radix_log_2, class Iterator>
+static double RadixStringToIeee(Iterator* current,
+                                Iterator end,
                                 bool sign,
                                 bool allow_trailing_junk,
                                 double junk_string_value,
                                 bool read_as_double,
-                                const char** trailing_pointer) {
-  ASSERT(current != end);
+                                bool* result_is_junk) {
+  ASSERT(*current != end);
 
   const int kDoubleSize = Double::kSignificandSize;
   const int kSingleSize = Single::kSignificandSize;
   const int kSignificandSize = read_as_double? kDoubleSize: kSingleSize;
 
+  *result_is_junk = true;
+
   // Skip leading 0s.
-  while (*current == '0') {
-    ++current;
-    if (current == end) {
-      *trailing_pointer = end;
+  while (**current == '0') {
+    ++(*current);
+    if (*current == end) {
+      *result_is_junk = false;
       return SignedZero(sign);
     }
   }
@@ -492,14 +521,14 @@
 
   do {
     int digit;
-    if (*current >= '0' && *current <= '9' && *current < '0' + radix) {
-      digit = static_cast<char>(*current) - '0';
-    } else if (radix > 10 && *current >= 'a' && *current < 'a' + radix - 10) {
-      digit = static_cast<char>(*current) - 'a' + 10;
-    } else if (radix > 10 && *current >= 'A' && *current < 'A' + radix - 10) {
-      digit = static_cast<char>(*current) - 'A' + 10;
+    if (**current >= '0' && **current <= '9' && **current < '0' + radix) {
+      digit = static_cast<char>(**current) - '0';
+    } else if (radix > 10 && **current >= 'a' && **current < 'a' + radix - 10) {
+      digit = static_cast<char>(**current) - 'a' + 10;
+    } else if (radix > 10 && **current >= 'A' && **current < 'A' + radix - 10) {
+      digit = static_cast<char>(**current) - 'A' + 10;
     } else {
-      if (allow_trailing_junk || !AdvanceToNonspace(&current, end)) {
+      if (allow_trailing_junk || !AdvanceToNonspace(current, end)) {
         break;
       } else {
         return junk_string_value;
@@ -524,13 +553,13 @@
 
       bool zero_tail = true;
       while (true) {
-        ++current;
-        if (current == end || !isDigit(*current, radix)) break;
-        zero_tail = zero_tail && *current == '0';
+        ++(*current);
+        if (*current == end || !isDigit(**current, radix)) break;
+        zero_tail = zero_tail && **current == '0';
         exponent += radix_log_2;
       }
 
-      if (!allow_trailing_junk && AdvanceToNonspace(&current, end)) {
+      if (!allow_trailing_junk && AdvanceToNonspace(current, end)) {
         return junk_string_value;
       }
 
@@ -552,13 +581,13 @@
       }
       break;
     }
-    ++current;
-  } while (current != end);
+    ++(*current);
+  } while (*current != end);
 
   ASSERT(number < ((int64_t)1 << kSignificandSize));
   ASSERT(static_cast<int64_t>(static_cast<double>(number)) == number);
 
-  *trailing_pointer = current;
+  *result_is_junk = false;
 
   if (exponent == 0) {
     if (sign) {
@@ -573,13 +602,14 @@
 }
 
 
+template <class Iterator>
 double StringToDoubleConverter::StringToIeee(
-    const char* input,
+    Iterator input,
     int length,
-    int* processed_characters_count,
-    bool read_as_double) const {
-  const char* current = input;
-  const char* end = input + length;
+    bool read_as_double,
+    int* processed_characters_count) const {
+  Iterator current = input;
+  Iterator end = input + length;
 
   *processed_characters_count = 0;
 
@@ -626,7 +656,7 @@
   if (*current == '+' || *current == '-') {
     sign = (*current == '-');
     ++current;
-    const char* next_non_space = current;
+    Iterator next_non_space = current;
     // Skip following spaces (if allowed).
     if (!AdvanceToNonspace(&next_non_space, end)) return junk_string_value_;
     if (!allow_spaces_after_sign && (current != next_non_space)) {
@@ -690,17 +720,17 @@
         return junk_string_value_;  // "0x".
       }
 
-      const char* tail_pointer = NULL;
-      double result = RadixStringToIeee<4>(current,
+      bool result_is_junk;
+      double result = RadixStringToIeee<4>(&current,
                                            end,
                                            sign,
                                            allow_trailing_junk,
                                            junk_string_value_,
                                            read_as_double,
-                                           &tail_pointer);
-      if (tail_pointer != NULL) {
-        if (allow_trailing_spaces) AdvanceToNonspace(&tail_pointer, end);
-        *processed_characters_count = tail_pointer - input;
+                                           &result_is_junk);
+      if (!result_is_junk) {
+        if (allow_trailing_spaces) AdvanceToNonspace(&current, end);
+        *processed_characters_count = current - input;
       }
       return result;
     }
@@ -855,15 +885,16 @@
 
   if (octal) {
     double result;
-    const char* tail_pointer = NULL;
-    result = RadixStringToIeee<3>(buffer,
+    bool result_is_junk;
+    char* start = buffer;
+    result = RadixStringToIeee<3>(&start,
                                   buffer + buffer_pos,
                                   sign,
                                   allow_trailing_junk,
                                   junk_string_value_,
                                   read_as_double,
-                                  &tail_pointer);
-    ASSERT(tail_pointer != NULL);
+                                  &result_is_junk);
+    ASSERT(!result_is_junk);
     *processed_characters_count = current - input;
     return result;
   }
@@ -886,4 +917,36 @@
   return sign? -converted: converted;
 }
 
+
+double StringToDoubleConverter::StringToDouble(
+    const char* buffer,
+    int length,
+    int* processed_characters_count) {
+  return StringToIeee(buffer, length, true, processed_characters_count);
+}
+
+
+double StringToDoubleConverter::StringToDouble(
+    const uc16* buffer,
+    int length,
+    int* processed_characters_count) {
+  return StringToIeee(buffer, length, true, processed_characters_count);
+}
+
+
+float StringToDoubleConverter::StringToFloat(const char* buffer,
+                                             int length,
+                                             int* processed_characters_count) {
+  return static_cast<float>(StringToIeee(buffer, length, false,
+                                         processed_characters_count));
+}
+
+
+float StringToDoubleConverter::StringToFloat(const uc16* buffer,
+                                             int length,
+                                             int* processed_characters_count) {
+  return static_cast<float>(StringToIeee(buffer, length, false,
+                                         processed_characters_count));
+}
+
 }  // namespace double_conversion

diff --git a/src/double-conversion.h b/src/double-conversion.h
index 1c3387d..7bc58ba 100644
--- a/src/double-conversion.h
+++ b/src/double-conversion.h

@@ -415,9 +415,10 @@
   //          junk, too.
   //  - ALLOW_TRAILING_JUNK: ignore trailing characters that are not part of
   //      a double literal.
-  //  - ALLOW_LEADING_SPACES: skip over leading spaces.
-  //  - ALLOW_TRAILING_SPACES: ignore trailing spaces.
-  //  - ALLOW_SPACES_AFTER_SIGN: ignore spaces after the sign.
+  //  - ALLOW_LEADING_SPACES: skip over leading whitespace, including spaces,
+  //                          new-lines, and tabs.
+  //  - ALLOW_TRAILING_SPACES: ignore trailing whitespace.
+  //  - ALLOW_SPACES_AFTER_SIGN: ignore whitespace after the sign.
   //       Ex: StringToDouble("-   123.2") -> -123.2.
   //           StringToDouble("+   123.2") -> 123.2
   //
@@ -502,19 +503,24 @@
   // in the 'processed_characters_count'. Trailing junk is never included.
   double StringToDouble(const char* buffer,
                         int length,
-                        int* processed_characters_count) const {
-    return StringToIeee(buffer, length, processed_characters_count, true);
-  }
+                        int* processed_characters_count) const;
+
+  // Same as StringToDouble above but for 16 bit characters.
+  double StringToDouble(const uc16* buffer,
+                        int length,
+                        int* processed_characters_count);
 
   // Same as StringToDouble but reads a float.
   // Note that this is not equivalent to static_cast<float>(StringToDouble(...))
   // due to potential double-rounding.
   float StringToFloat(const char* buffer,
                       int length,
-                      int* processed_characters_count) const {
-    return static_cast<float>(StringToIeee(buffer, length,
-                                           processed_characters_count, false));
-  }
+                      int* processed_characters_count) const;
+
+  // Same as StringToFloat above but for 16 bit characters.
+  float StringToFloat(const uc16* buffer,
+                      int length,
+                      int* processed_characters_count);
 
  private:
   const int flags_;
@@ -523,10 +529,11 @@
   const char* const infinity_symbol_;
   const char* const nan_symbol_;
 
-  double StringToIeee(const char* buffer,
+  template <class Iterator>
+  double StringToIeee(Iterator start_pointer,
                       int length,
-                      int* processed_characters_count,
-                      bool read_as_double) const;
+                      bool read_as_double,
+                      int* processed_characters_count) const;
 
   DISALLOW_IMPLICIT_CONSTRUCTORS(StringToDoubleConverter);
 };

diff --git a/src/utils.h b/src/utils.h
index deb4bc1..c76f77d 100644
--- a/src/utils.h
+++ b/src/utils.h

@@ -100,6 +100,8 @@
 
 #endif
 
+typedef uint16_t uc16;
+
 // The following macro works on both 32 and 64-bit platforms.
 // Usage: instead of writing 0x1234567890123456
 //      write UINT64_2PART_C(0x12345678,90123456);

diff --git a/test/cctest/test-conversions.cc b/test/cctest/test-conversions.cc
index e9cdad2..125cf26 100644
--- a/test/cctest/test-conversions.cc
+++ b/test/cctest/test-conversions.cc

@@ -1718,6 +1718,18 @@
 }
 
 
+static double StrToD16(const uc16* str16, int length, int flags,
+                       double empty_string_value,
+                       int* processed_characters_count, bool* processed_all) {
+  StringToDoubleConverter converter(flags, empty_string_value, Double::NaN(),
+                                    NULL, NULL);
+  double result =
+      converter.StringToDouble(str16, length, processed_characters_count);
+  *processed_all = (length == *processed_characters_count);
+  return result;
+}
+
+
 static double StrToD(const char* str, int flags, double empty_string_value,
                      int* processed_characters_count, bool* processed_all) {
   StringToDoubleConverter converter(flags, empty_string_value, Double::NaN(),
@@ -1726,6 +1738,19 @@
                                            processed_characters_count);
   *processed_all =
       ((strlen(str) == static_cast<unsigned>(*processed_characters_count)));
+
+  uc16 buffer16[256];
+  ASSERT(strlen(str) < ARRAY_SIZE(buffer16));
+  int len = strlen(str);
+  for (int i = 0; i < len; i++) {
+    buffer16[i] = str[i];
+  }
+  int processed_characters_count16;
+  bool processed_all16;
+  double result16 = StrToD16(buffer16, len, flags, empty_string_value,
+                             &processed_characters_count16, &processed_all16);
+  CHECK_EQ(result, result16);
+  CHECK_EQ(*processed_characters_count, processed_characters_count16);
   return result;
 }
 
@@ -2530,12 +2555,21 @@
   CHECK_EQ(10.0, StrToD(" 012", flags, 0.0, &processed, &all_used));
   CHECK(all_used);
 
+  CHECK_EQ(10.0, StrToD("\n012", flags, 0.0, &processed, &all_used));
+  CHECK(all_used);
+
   CHECK_EQ(0.0, StrToD(" 00", flags, 1.0, &processed, &all_used));
   CHECK(all_used);
 
+  CHECK_EQ(0.0, StrToD("\t00", flags, 1.0, &processed, &all_used));
+  CHECK(all_used);
+
   CHECK_EQ(10.0, StrToD(" 012", flags, 1.0, &processed, &all_used));
   CHECK(all_used);
 
+  CHECK_EQ(10.0, StrToD("\n012", flags, 1.0, &processed, &all_used));
+  CHECK(all_used);
+
   CHECK_EQ(123456789.0,
            StrToD(" 0123456789", flags, Double::NaN(), &processed, &all_used));
   CHECK(all_used);
@@ -2545,6 +2579,10 @@
   CHECK(all_used);
 
   CHECK_EQ(342391.0,
+           StrToD("\n01234567", flags, Double::NaN(), &processed, &all_used));
+  CHECK(all_used);
+
+  CHECK_EQ(342391.0,
            StrToD(" + 01234567", flags, Double::NaN(), &processed, &all_used));
   CHECK(all_used);
 
@@ -2552,6 +2590,10 @@
            StrToD(" - 01234567", flags, Double::NaN(), &processed, &all_used));
   CHECK(all_used);
 
+  CHECK_EQ(-342391.0,
+           StrToD("\n-\t01234567", flags, Double::NaN(), &processed, &all_used));
+  CHECK(all_used);
+
   CHECK_EQ(10.0, StrToD(" 012 ", flags, 0.0, &processed, &all_used));
   CHECK(all_used);
 
@@ -3196,14 +3238,41 @@
 }
 
 
-static float StrToF(const char* str, int flags, float empty_string_value,
-                    int* processed_characters_count, bool* processed_all) {
+static float StrToF16(const uc16* str16, int length, int flags,
+                      double empty_string_value,
+                      int* processed_characters_count,
+                      bool* processed_all) {
+  StringToDoubleConverter converter(flags, empty_string_value, Double::NaN(),
+                                    NULL, NULL);
+  double result =
+      converter.StringToFloat(str16, length, processed_characters_count);
+  *processed_all = (length == *processed_characters_count);
+  return result;
+}
+
+
+static double StrToF(const char* str, int flags, double empty_string_value,
+                     int* processed_characters_count, bool* processed_all) {
   StringToDoubleConverter converter(flags, empty_string_value, Single::NaN(),
                                     NULL, NULL);
   float result = converter.StringToFloat(str, strlen(str),
                                          processed_characters_count);
   *processed_all =
       ((strlen(str) == static_cast<unsigned>(*processed_characters_count)));
+
+  uc16 buffer16[256];
+  ASSERT(strlen(str) < ARRAY_SIZE(buffer16));
+  int len = strlen(str);
+  for (int i = 0; i < len; i++) {
+    buffer16[i] = str[i];
+  }
+  int processed_characters_count16;
+  bool processed_all16;
+  float result16 = StrToF16(buffer16, len, flags, empty_string_value,
+                            &processed_characters_count16,
+                            &processed_all16);
+  CHECK_EQ(result, result16);
+  CHECK_EQ(*processed_characters_count, processed_characters_count16);
   return result;
 }
 
@@ -4569,3 +4638,75 @@
     CHECK_EQ(0, processed);
   }
 }
+
+
+TEST(StringToDoubleFloatWhitespace) {
+  int flags;
+  int processed;
+  bool all_used;
+
+  flags = StringToDoubleConverter::ALLOW_LEADING_SPACES |
+      StringToDoubleConverter::ALLOW_TRAILING_SPACES |
+      StringToDoubleConverter::ALLOW_SPACES_AFTER_SIGN;
+
+  const char kWhitespaceAscii[] = {
+    0x0A, 0x0D, 0x09, 0x0B, 0x0C, 0x20,
+    '-',
+    0x0A, 0x0D, 0x09, 0x0B, 0x0C, 0x20,
+    '1', '.', '2',
+    0x0A, 0x0D, 0x09, 0x0B, 0x0C, 0x20,
+    0x00
+  };
+  CHECK_EQ(-1.2, StrToD(kWhitespaceAscii, flags, Double::NaN(),
+                        &processed, &all_used));
+  CHECK(all_used);
+  CHECK_EQ(-1.2f, StrToF(kWhitespaceAscii, flags, Double::NaN(),
+                         &processed, &all_used));
+  CHECK(all_used);
+
+  const uc16 kOghamSpaceMark = 0x1680;
+  const uc16 kMongolianVowelSeparator = 0x180E;
+  const uc16 kEnQuad = 0x2000;
+  const uc16 kEmQuad = 0x2001;
+  const uc16 kEnSpace = 0x2002;
+  const uc16 kEmSpace = 0x2003;
+  const uc16 kThreePerEmSpace = 0x2004;
+  const uc16 kFourPerEmSpace = 0x2005;
+  const uc16 kSixPerEmSpace = 0x2006;
+  const uc16 kFigureSpace = 0x2007;
+  const uc16 kPunctuationSpace = 0x2008;
+  const uc16 kThinSpace = 0x2009;
+  const uc16 kHairSpace = 0x200A;
+  const uc16 kNarrowNoBreakSpace = 0x202F;
+  const uc16 kMediumMathematicalSpace = 0x205F;
+  const uc16 kIdeographicSpace = 0x3000;
+
+  const uc16 kWhitespace16[] = {
+    0x0A, 0x0D, 0x09, 0x0B, 0x0C, 0x20, 0xA0, 0xFEFF,
+    kOghamSpaceMark, kMongolianVowelSeparator, kEnQuad, kEmQuad,
+    kEnSpace, kEmSpace, kThreePerEmSpace, kFourPerEmSpace, kSixPerEmSpace,
+    kFigureSpace, kPunctuationSpace, kThinSpace, kHairSpace,
+    kNarrowNoBreakSpace, kMediumMathematicalSpace, kIdeographicSpace,
+    '-',
+    0x0A, 0x0D, 0x09, 0x0B, 0x0C, 0x20, 0xA0, 0xFEFF,
+    kOghamSpaceMark, kMongolianVowelSeparator, kEnQuad, kEmQuad,
+    kEnSpace, kEmSpace, kThreePerEmSpace, kFourPerEmSpace, kSixPerEmSpace,
+    kFigureSpace, kPunctuationSpace, kThinSpace, kHairSpace,
+    kNarrowNoBreakSpace, kMediumMathematicalSpace, kIdeographicSpace,
+    '1', '.', '2',
+    0x0A, 0x0D, 0x09, 0x0B, 0x0C, 0x20, 0xA0, 0xFEFF,
+    kOghamSpaceMark, kMongolianVowelSeparator, kEnQuad, kEmQuad,
+    kEnSpace, kEmSpace, kThreePerEmSpace, kFourPerEmSpace, kSixPerEmSpace,
+    kFigureSpace, kPunctuationSpace, kThinSpace, kHairSpace,
+    kNarrowNoBreakSpace, kMediumMathematicalSpace, kIdeographicSpace,
+  };
+  const int kWhitespace16Length = ARRAY_SIZE(kWhitespace16);
+  CHECK_EQ(-1.2, StrToD16(kWhitespace16, kWhitespace16Length, flags,
+                          Double::NaN(),
+                          &processed, &all_used));
+  CHECK(all_used);
+  CHECK_EQ(-1.2f, StrToF16(kWhitespace16, kWhitespace16Length, flags,
+                           Single::NaN(),
+                           &processed, &all_used));
+  CHECK(all_used);
+}
commit	3ae434a0f5d6de4578a6ad1aacd5347a032121df	[log] [tgz]
author	Florian Loitsch <florian@loitsch.com>	Sun Nov 10 07:29:13 2013 +0100
committer	Florian Loitsch <florian@loitsch.com>	Sun Nov 10 07:29:13 2013 +0100
tree	a016c0f08804dede18110b2ca2b5a1775bae6424
parent	239ff7e1a021b2e4dca092d8b36ba7702f02f86c [diff]
parent	1b476d5f8b3e52e7e57df3d3bddeaabb75fbbef8 [diff]