Merge branch 'whitespace'
Conflicts:
Changelog
src/double-conversion.cc
src/double-conversion.h
diff --git a/Changelog b/Changelog
index 83753ea..7125a70 100644
--- a/Changelog
+++ b/Changelog
@@ -1,4 +1,9 @@
2013-11-09:
+ Tagged v2.0.0.
+ String-to-Double|Float: ALLOW_LEADING_SPACES and similar flags now include
+ new-lines, tabs and all Unicode whitespace characters.
+
+2013-11-09:
Tagged v1.1.2.
Add support for ARM 64 and OsX ppc.
Rewrite tests so they pass under Visual Studio.
diff --git a/src/double-conversion.cc b/src/double-conversion.cc
index febba6c..d2893f5 100644
--- a/src/double-conversion.cc
+++ b/src/double-conversion.cc
@@ -417,8 +417,9 @@
// Consumes the given substring from the iterator.
// Returns false, if the substring does not match.
-static bool ConsumeSubString(const char** current,
- const char* end,
+template <class Iterator>
+static bool ConsumeSubString(Iterator* current,
+ Iterator end,
const char* substring) {
ASSERT(**current == *substring);
for (substring++; *substring != '\0'; substring++) {
@@ -440,10 +441,36 @@
const int kMaxSignificantDigits = 772;
+static const char kWhitespaceTable7[] = { 32, 13, 10, 9, 11, 12 };
+static const int kWhitespaceTable7Length = ARRAY_SIZE(kWhitespaceTable7);
+
+
+static const uc16 kWhitespaceTable16[] = {
+ 160, 8232, 8233, 5760, 6158, 8192, 8193, 8194, 8195,
+ 8196, 8197, 8198, 8199, 8200, 8201, 8202, 8239, 8287, 12288, 65279
+};
+static const int kWhitespaceTable16Length = ARRAY_SIZE(kWhitespaceTable16);
+
+
+static bool isWhitespace(int x) {
+ if (x < 128) {
+ for (int i = 0; i < kWhitespaceTable7Length; i++) {
+ if (kWhitespaceTable7[i] == x) return true;
+ }
+ } else {
+ for (int i = 0; i < kWhitespaceTable16Length; i++) {
+ if (kWhitespaceTable16[i] == x) return true;
+ }
+ }
+ return false;
+}
+
+
// Returns true if a nonspace found and false if the end has reached.
-static inline bool AdvanceToNonspace(const char** current, const char* end) {
+template <class Iterator>
+static inline bool AdvanceToNonspace(Iterator* current, Iterator end) {
while (*current != end) {
- if (**current != ' ') return true;
+ if (!isWhitespace(**current)) return true;
++*current;
}
return false;
@@ -463,25 +490,27 @@
// Parsing integers with radix 2, 4, 8, 16, 32. Assumes current != end.
-template <int radix_log_2>
-static double RadixStringToIeee(const char* current,
- const char* end,
+template <int radix_log_2, class Iterator>
+static double RadixStringToIeee(Iterator* current,
+ Iterator end,
bool sign,
bool allow_trailing_junk,
double junk_string_value,
bool read_as_double,
- const char** trailing_pointer) {
- ASSERT(current != end);
+ bool* result_is_junk) {
+ ASSERT(*current != end);
const int kDoubleSize = Double::kSignificandSize;
const int kSingleSize = Single::kSignificandSize;
const int kSignificandSize = read_as_double? kDoubleSize: kSingleSize;
+ *result_is_junk = true;
+
// Skip leading 0s.
- while (*current == '0') {
- ++current;
- if (current == end) {
- *trailing_pointer = end;
+ while (**current == '0') {
+ ++(*current);
+ if (*current == end) {
+ *result_is_junk = false;
return SignedZero(sign);
}
}
@@ -492,14 +521,14 @@
do {
int digit;
- if (*current >= '0' && *current <= '9' && *current < '0' + radix) {
- digit = static_cast<char>(*current) - '0';
- } else if (radix > 10 && *current >= 'a' && *current < 'a' + radix - 10) {
- digit = static_cast<char>(*current) - 'a' + 10;
- } else if (radix > 10 && *current >= 'A' && *current < 'A' + radix - 10) {
- digit = static_cast<char>(*current) - 'A' + 10;
+ if (**current >= '0' && **current <= '9' && **current < '0' + radix) {
+ digit = static_cast<char>(**current) - '0';
+ } else if (radix > 10 && **current >= 'a' && **current < 'a' + radix - 10) {
+ digit = static_cast<char>(**current) - 'a' + 10;
+ } else if (radix > 10 && **current >= 'A' && **current < 'A' + radix - 10) {
+ digit = static_cast<char>(**current) - 'A' + 10;
} else {
- if (allow_trailing_junk || !AdvanceToNonspace(¤t, end)) {
+ if (allow_trailing_junk || !AdvanceToNonspace(current, end)) {
break;
} else {
return junk_string_value;
@@ -524,13 +553,13 @@
bool zero_tail = true;
while (true) {
- ++current;
- if (current == end || !isDigit(*current, radix)) break;
- zero_tail = zero_tail && *current == '0';
+ ++(*current);
+ if (*current == end || !isDigit(**current, radix)) break;
+ zero_tail = zero_tail && **current == '0';
exponent += radix_log_2;
}
- if (!allow_trailing_junk && AdvanceToNonspace(¤t, end)) {
+ if (!allow_trailing_junk && AdvanceToNonspace(current, end)) {
return junk_string_value;
}
@@ -552,13 +581,13 @@
}
break;
}
- ++current;
- } while (current != end);
+ ++(*current);
+ } while (*current != end);
ASSERT(number < ((int64_t)1 << kSignificandSize));
ASSERT(static_cast<int64_t>(static_cast<double>(number)) == number);
- *trailing_pointer = current;
+ *result_is_junk = false;
if (exponent == 0) {
if (sign) {
@@ -573,13 +602,14 @@
}
+template <class Iterator>
double StringToDoubleConverter::StringToIeee(
- const char* input,
+ Iterator input,
int length,
- int* processed_characters_count,
- bool read_as_double) const {
- const char* current = input;
- const char* end = input + length;
+ bool read_as_double,
+ int* processed_characters_count) const {
+ Iterator current = input;
+ Iterator end = input + length;
*processed_characters_count = 0;
@@ -626,7 +656,7 @@
if (*current == '+' || *current == '-') {
sign = (*current == '-');
++current;
- const char* next_non_space = current;
+ Iterator next_non_space = current;
// Skip following spaces (if allowed).
if (!AdvanceToNonspace(&next_non_space, end)) return junk_string_value_;
if (!allow_spaces_after_sign && (current != next_non_space)) {
@@ -690,17 +720,17 @@
return junk_string_value_; // "0x".
}
- const char* tail_pointer = NULL;
- double result = RadixStringToIeee<4>(current,
+ bool result_is_junk;
+ double result = RadixStringToIeee<4>(¤t,
end,
sign,
allow_trailing_junk,
junk_string_value_,
read_as_double,
- &tail_pointer);
- if (tail_pointer != NULL) {
- if (allow_trailing_spaces) AdvanceToNonspace(&tail_pointer, end);
- *processed_characters_count = tail_pointer - input;
+ &result_is_junk);
+ if (!result_is_junk) {
+ if (allow_trailing_spaces) AdvanceToNonspace(¤t, end);
+ *processed_characters_count = current - input;
}
return result;
}
@@ -855,15 +885,16 @@
if (octal) {
double result;
- const char* tail_pointer = NULL;
- result = RadixStringToIeee<3>(buffer,
+ bool result_is_junk;
+ char* start = buffer;
+ result = RadixStringToIeee<3>(&start,
buffer + buffer_pos,
sign,
allow_trailing_junk,
junk_string_value_,
read_as_double,
- &tail_pointer);
- ASSERT(tail_pointer != NULL);
+ &result_is_junk);
+ ASSERT(!result_is_junk);
*processed_characters_count = current - input;
return result;
}
@@ -886,4 +917,36 @@
return sign? -converted: converted;
}
+
+double StringToDoubleConverter::StringToDouble(
+ const char* buffer,
+ int length,
+ int* processed_characters_count) {
+ return StringToIeee(buffer, length, true, processed_characters_count);
+}
+
+
+double StringToDoubleConverter::StringToDouble(
+ const uc16* buffer,
+ int length,
+ int* processed_characters_count) {
+ return StringToIeee(buffer, length, true, processed_characters_count);
+}
+
+
+float StringToDoubleConverter::StringToFloat(const char* buffer,
+ int length,
+ int* processed_characters_count) {
+ return static_cast<float>(StringToIeee(buffer, length, false,
+ processed_characters_count));
+}
+
+
+float StringToDoubleConverter::StringToFloat(const uc16* buffer,
+ int length,
+ int* processed_characters_count) {
+ return static_cast<float>(StringToIeee(buffer, length, false,
+ processed_characters_count));
+}
+
} // namespace double_conversion
diff --git a/src/double-conversion.h b/src/double-conversion.h
index 1c3387d..7bc58ba 100644
--- a/src/double-conversion.h
+++ b/src/double-conversion.h
@@ -415,9 +415,10 @@
// junk, too.
// - ALLOW_TRAILING_JUNK: ignore trailing characters that are not part of
// a double literal.
- // - ALLOW_LEADING_SPACES: skip over leading spaces.
- // - ALLOW_TRAILING_SPACES: ignore trailing spaces.
- // - ALLOW_SPACES_AFTER_SIGN: ignore spaces after the sign.
+ // - ALLOW_LEADING_SPACES: skip over leading whitespace, including spaces,
+ // new-lines, and tabs.
+ // - ALLOW_TRAILING_SPACES: ignore trailing whitespace.
+ // - ALLOW_SPACES_AFTER_SIGN: ignore whitespace after the sign.
// Ex: StringToDouble("- 123.2") -> -123.2.
// StringToDouble("+ 123.2") -> 123.2
//
@@ -502,19 +503,24 @@
// in the 'processed_characters_count'. Trailing junk is never included.
double StringToDouble(const char* buffer,
int length,
- int* processed_characters_count) const {
- return StringToIeee(buffer, length, processed_characters_count, true);
- }
+ int* processed_characters_count) const;
+
+ // Same as StringToDouble above but for 16 bit characters.
+ double StringToDouble(const uc16* buffer,
+ int length,
+ int* processed_characters_count);
// Same as StringToDouble but reads a float.
// Note that this is not equivalent to static_cast<float>(StringToDouble(...))
// due to potential double-rounding.
float StringToFloat(const char* buffer,
int length,
- int* processed_characters_count) const {
- return static_cast<float>(StringToIeee(buffer, length,
- processed_characters_count, false));
- }
+ int* processed_characters_count) const;
+
+ // Same as StringToFloat above but for 16 bit characters.
+ float StringToFloat(const uc16* buffer,
+ int length,
+ int* processed_characters_count);
private:
const int flags_;
@@ -523,10 +529,11 @@
const char* const infinity_symbol_;
const char* const nan_symbol_;
- double StringToIeee(const char* buffer,
+ template <class Iterator>
+ double StringToIeee(Iterator start_pointer,
int length,
- int* processed_characters_count,
- bool read_as_double) const;
+ bool read_as_double,
+ int* processed_characters_count) const;
DISALLOW_IMPLICIT_CONSTRUCTORS(StringToDoubleConverter);
};
diff --git a/src/utils.h b/src/utils.h
index deb4bc1..c76f77d 100644
--- a/src/utils.h
+++ b/src/utils.h
@@ -100,6 +100,8 @@
#endif
+typedef uint16_t uc16;
+
// The following macro works on both 32 and 64-bit platforms.
// Usage: instead of writing 0x1234567890123456
// write UINT64_2PART_C(0x12345678,90123456);
diff --git a/test/cctest/test-conversions.cc b/test/cctest/test-conversions.cc
index e9cdad2..125cf26 100644
--- a/test/cctest/test-conversions.cc
+++ b/test/cctest/test-conversions.cc
@@ -1718,6 +1718,18 @@
}
+static double StrToD16(const uc16* str16, int length, int flags,
+ double empty_string_value,
+ int* processed_characters_count, bool* processed_all) {
+ StringToDoubleConverter converter(flags, empty_string_value, Double::NaN(),
+ NULL, NULL);
+ double result =
+ converter.StringToDouble(str16, length, processed_characters_count);
+ *processed_all = (length == *processed_characters_count);
+ return result;
+}
+
+
static double StrToD(const char* str, int flags, double empty_string_value,
int* processed_characters_count, bool* processed_all) {
StringToDoubleConverter converter(flags, empty_string_value, Double::NaN(),
@@ -1726,6 +1738,19 @@
processed_characters_count);
*processed_all =
((strlen(str) == static_cast<unsigned>(*processed_characters_count)));
+
+ uc16 buffer16[256];
+ ASSERT(strlen(str) < ARRAY_SIZE(buffer16));
+ int len = strlen(str);
+ for (int i = 0; i < len; i++) {
+ buffer16[i] = str[i];
+ }
+ int processed_characters_count16;
+ bool processed_all16;
+ double result16 = StrToD16(buffer16, len, flags, empty_string_value,
+ &processed_characters_count16, &processed_all16);
+ CHECK_EQ(result, result16);
+ CHECK_EQ(*processed_characters_count, processed_characters_count16);
return result;
}
@@ -2530,12 +2555,21 @@
CHECK_EQ(10.0, StrToD(" 012", flags, 0.0, &processed, &all_used));
CHECK(all_used);
+ CHECK_EQ(10.0, StrToD("\n012", flags, 0.0, &processed, &all_used));
+ CHECK(all_used);
+
CHECK_EQ(0.0, StrToD(" 00", flags, 1.0, &processed, &all_used));
CHECK(all_used);
+ CHECK_EQ(0.0, StrToD("\t00", flags, 1.0, &processed, &all_used));
+ CHECK(all_used);
+
CHECK_EQ(10.0, StrToD(" 012", flags, 1.0, &processed, &all_used));
CHECK(all_used);
+ CHECK_EQ(10.0, StrToD("\n012", flags, 1.0, &processed, &all_used));
+ CHECK(all_used);
+
CHECK_EQ(123456789.0,
StrToD(" 0123456789", flags, Double::NaN(), &processed, &all_used));
CHECK(all_used);
@@ -2545,6 +2579,10 @@
CHECK(all_used);
CHECK_EQ(342391.0,
+ StrToD("\n01234567", flags, Double::NaN(), &processed, &all_used));
+ CHECK(all_used);
+
+ CHECK_EQ(342391.0,
StrToD(" + 01234567", flags, Double::NaN(), &processed, &all_used));
CHECK(all_used);
@@ -2552,6 +2590,10 @@
StrToD(" - 01234567", flags, Double::NaN(), &processed, &all_used));
CHECK(all_used);
+ CHECK_EQ(-342391.0,
+ StrToD("\n-\t01234567", flags, Double::NaN(), &processed, &all_used));
+ CHECK(all_used);
+
CHECK_EQ(10.0, StrToD(" 012 ", flags, 0.0, &processed, &all_used));
CHECK(all_used);
@@ -3196,14 +3238,41 @@
}
-static float StrToF(const char* str, int flags, float empty_string_value,
- int* processed_characters_count, bool* processed_all) {
+static float StrToF16(const uc16* str16, int length, int flags,
+ double empty_string_value,
+ int* processed_characters_count,
+ bool* processed_all) {
+ StringToDoubleConverter converter(flags, empty_string_value, Double::NaN(),
+ NULL, NULL);
+ double result =
+ converter.StringToFloat(str16, length, processed_characters_count);
+ *processed_all = (length == *processed_characters_count);
+ return result;
+}
+
+
+static double StrToF(const char* str, int flags, double empty_string_value,
+ int* processed_characters_count, bool* processed_all) {
StringToDoubleConverter converter(flags, empty_string_value, Single::NaN(),
NULL, NULL);
float result = converter.StringToFloat(str, strlen(str),
processed_characters_count);
*processed_all =
((strlen(str) == static_cast<unsigned>(*processed_characters_count)));
+
+ uc16 buffer16[256];
+ ASSERT(strlen(str) < ARRAY_SIZE(buffer16));
+ int len = strlen(str);
+ for (int i = 0; i < len; i++) {
+ buffer16[i] = str[i];
+ }
+ int processed_characters_count16;
+ bool processed_all16;
+ float result16 = StrToF16(buffer16, len, flags, empty_string_value,
+ &processed_characters_count16,
+ &processed_all16);
+ CHECK_EQ(result, result16);
+ CHECK_EQ(*processed_characters_count, processed_characters_count16);
return result;
}
@@ -4569,3 +4638,75 @@
CHECK_EQ(0, processed);
}
}
+
+
+TEST(StringToDoubleFloatWhitespace) {
+ int flags;
+ int processed;
+ bool all_used;
+
+ flags = StringToDoubleConverter::ALLOW_LEADING_SPACES |
+ StringToDoubleConverter::ALLOW_TRAILING_SPACES |
+ StringToDoubleConverter::ALLOW_SPACES_AFTER_SIGN;
+
+ const char kWhitespaceAscii[] = {
+ 0x0A, 0x0D, 0x09, 0x0B, 0x0C, 0x20,
+ '-',
+ 0x0A, 0x0D, 0x09, 0x0B, 0x0C, 0x20,
+ '1', '.', '2',
+ 0x0A, 0x0D, 0x09, 0x0B, 0x0C, 0x20,
+ 0x00
+ };
+ CHECK_EQ(-1.2, StrToD(kWhitespaceAscii, flags, Double::NaN(),
+ &processed, &all_used));
+ CHECK(all_used);
+ CHECK_EQ(-1.2f, StrToF(kWhitespaceAscii, flags, Double::NaN(),
+ &processed, &all_used));
+ CHECK(all_used);
+
+ const uc16 kOghamSpaceMark = 0x1680;
+ const uc16 kMongolianVowelSeparator = 0x180E;
+ const uc16 kEnQuad = 0x2000;
+ const uc16 kEmQuad = 0x2001;
+ const uc16 kEnSpace = 0x2002;
+ const uc16 kEmSpace = 0x2003;
+ const uc16 kThreePerEmSpace = 0x2004;
+ const uc16 kFourPerEmSpace = 0x2005;
+ const uc16 kSixPerEmSpace = 0x2006;
+ const uc16 kFigureSpace = 0x2007;
+ const uc16 kPunctuationSpace = 0x2008;
+ const uc16 kThinSpace = 0x2009;
+ const uc16 kHairSpace = 0x200A;
+ const uc16 kNarrowNoBreakSpace = 0x202F;
+ const uc16 kMediumMathematicalSpace = 0x205F;
+ const uc16 kIdeographicSpace = 0x3000;
+
+ const uc16 kWhitespace16[] = {
+ 0x0A, 0x0D, 0x09, 0x0B, 0x0C, 0x20, 0xA0, 0xFEFF,
+ kOghamSpaceMark, kMongolianVowelSeparator, kEnQuad, kEmQuad,
+ kEnSpace, kEmSpace, kThreePerEmSpace, kFourPerEmSpace, kSixPerEmSpace,
+ kFigureSpace, kPunctuationSpace, kThinSpace, kHairSpace,
+ kNarrowNoBreakSpace, kMediumMathematicalSpace, kIdeographicSpace,
+ '-',
+ 0x0A, 0x0D, 0x09, 0x0B, 0x0C, 0x20, 0xA0, 0xFEFF,
+ kOghamSpaceMark, kMongolianVowelSeparator, kEnQuad, kEmQuad,
+ kEnSpace, kEmSpace, kThreePerEmSpace, kFourPerEmSpace, kSixPerEmSpace,
+ kFigureSpace, kPunctuationSpace, kThinSpace, kHairSpace,
+ kNarrowNoBreakSpace, kMediumMathematicalSpace, kIdeographicSpace,
+ '1', '.', '2',
+ 0x0A, 0x0D, 0x09, 0x0B, 0x0C, 0x20, 0xA0, 0xFEFF,
+ kOghamSpaceMark, kMongolianVowelSeparator, kEnQuad, kEmQuad,
+ kEnSpace, kEmSpace, kThreePerEmSpace, kFourPerEmSpace, kSixPerEmSpace,
+ kFigureSpace, kPunctuationSpace, kThinSpace, kHairSpace,
+ kNarrowNoBreakSpace, kMediumMathematicalSpace, kIdeographicSpace,
+ };
+ const int kWhitespace16Length = ARRAY_SIZE(kWhitespace16);
+ CHECK_EQ(-1.2, StrToD16(kWhitespace16, kWhitespace16Length, flags,
+ Double::NaN(),
+ &processed, &all_used));
+ CHECK(all_used);
+ CHECK_EQ(-1.2f, StrToF16(kWhitespace16, kWhitespace16Length, flags,
+ Single::NaN(),
+ &processed, &all_used));
+ CHECK(all_used);
+}