Split Strtod() (#106)
Add `StrtodTrimmed` method, exposing a later stage of the conversion pipeline.
Some tools can do the first stage outside of the double-conversion library and would prefer not to pay the cost of doing it again.
diff --git a/Changelog b/Changelog
index 66a24b4..6fc5245 100644
--- a/Changelog
+++ b/Changelog
@@ -2,6 +2,7 @@
Changed all macros to use DOUBLE_CONVERSION_ as prefix.
Renamed ALLOW_CASE_INSENSIBILITY to ALLOW_CASE_INSENSITIVITY,
the old name is still available but officially deprecated.
+ Created and exposed new intermediate function StrtodTrimmed().
2019-05-25:
Fix `0x` for string->double conversion when Hex Floats are allowed.
diff --git a/double-conversion/strtod.cc b/double-conversion/strtod.cc
index 7feb374..d0bb7f7 100644
--- a/double-conversion/strtod.cc
+++ b/double-conversion/strtod.cc
@@ -446,18 +446,31 @@
return false;
}
-double Strtod(Vector<const char> buffer, int exponent) {
- char copy_buffer[kMaxSignificantDecimalDigits];
- Vector<const char> trimmed;
- int updated_exponent;
- TrimAndCut(buffer, exponent, copy_buffer, kMaxSignificantDecimalDigits,
- &trimmed, &updated_exponent);
- exponent = updated_exponent;
+static bool IsDigit(const char d) {
+ return ('0' <= d) && (d <= '9');
+}
+static bool IsNonZeroDigit(const char d) {
+ return ('1' <= d) && (d <= '9');
+}
+
+static bool AssertTrimmedDigits(const Vector<const char>& buffer) {
+ for(int i = 0; i < buffer.length(); ++i) {
+ if(!IsDigit(buffer[i])) {
+ return false;
+ }
+ }
+ return (buffer.length() == 0) || (IsNonZeroDigit(buffer[0]) && IsNonZeroDigit(buffer[buffer.length()-1]));
+}
+
+double StrtodTrimmed(Vector<const char> trimmed, int exponent) {
+ DOUBLE_CONVERSION_ASSERT(trimmed.length() <= kMaxSignificantDecimalDigits);
+ DOUBLE_CONVERSION_ASSERT(AssertTrimmedDigits(trimmed));
double guess;
- bool is_correct = ComputeGuess(trimmed, exponent, &guess);
- if (is_correct) return guess;
-
+ const bool is_correct = ComputeGuess(trimmed, exponent, &guess);
+ if (is_correct) {
+ return guess;
+ }
DiyFp upper_boundary = Double(guess).UpperBoundary();
int comparison = CompareBufferWithDiyFp(trimmed, exponent, upper_boundary);
if (comparison < 0) {
@@ -472,6 +485,15 @@
}
}
+double Strtod(Vector<const char> buffer, int exponent) {
+ char copy_buffer[kMaxSignificantDecimalDigits];
+ Vector<const char> trimmed;
+ int updated_exponent;
+ TrimAndCut(buffer, exponent, copy_buffer, kMaxSignificantDecimalDigits,
+ &trimmed, &updated_exponent);
+ return StrtodTrimmed(trimmed, updated_exponent);
+}
+
static float SanitizedDoubletof(double d) {
DOUBLE_CONVERSION_ASSERT(d >= 0.0);
// ASAN has a sanitize check that disallows casting doubles to floats if
diff --git a/double-conversion/strtod.h b/double-conversion/strtod.h
index ed0293b..ff0ee47 100644
--- a/double-conversion/strtod.h
+++ b/double-conversion/strtod.h
@@ -40,6 +40,11 @@
// contain a dot or a sign. It must not start with '0', and must not be empty.
float Strtof(Vector<const char> buffer, int exponent);
+// For special use cases, the heart of the Strtod() function is also available
+// separately, it assumes that 'trimmed' is as produced by TrimAndCut(), i.e.
+// no leading or trailing zeros, also no lone zero, and not 'too many' digits.
+double StrtodTrimmed(Vector<const char> trimmed, int exponent);
+
} // namespace double_conversion
#endif // DOUBLE_CONVERSION_STRTOD_H_
diff --git a/test/cctest/test-strtod.cc b/test/cctest/test-strtod.cc
index 1a6ac3b..62badbd 100644
--- a/test/cctest/test-strtod.cc
+++ b/test/cctest/test-strtod.cc
@@ -21,6 +21,11 @@
}
+static double StrtodTrimmedChar(const char* str, int exponent) {
+ return StrtodTrimmed(StringToVector(str), exponent);
+}
+
+
static float StrtofChar(const char* str, int exponent) {
return Strtof(StringToVector(str), exponent);
}
@@ -350,6 +355,276 @@
}
+TEST(StrtodTrimmed) {
+ Vector<const char> vector;
+
+ vector = StringToVector("1");
+ CHECK_EQ(1.0, StrtodTrimmed(vector, 0));
+ CHECK_EQ(10.0, StrtodTrimmed(vector, 1));
+ CHECK_EQ(100.0, StrtodTrimmed(vector, 2));
+ CHECK_EQ(1e20, StrtodTrimmed(vector, 20));
+ CHECK_EQ(1e22, StrtodTrimmed(vector, 22));
+ CHECK_EQ(1e23, StrtodTrimmed(vector, 23));
+ CHECK_EQ(1e35, StrtodTrimmed(vector, 35));
+ CHECK_EQ(1e36, StrtodTrimmed(vector, 36));
+ CHECK_EQ(1e37, StrtodTrimmed(vector, 37));
+ CHECK_EQ(1e-1, StrtodTrimmed(vector, -1));
+ CHECK_EQ(1e-2, StrtodTrimmed(vector, -2));
+ CHECK_EQ(1e-5, StrtodTrimmed(vector, -5));
+ CHECK_EQ(1e-20, StrtodTrimmed(vector, -20));
+ CHECK_EQ(1e-22, StrtodTrimmed(vector, -22));
+ CHECK_EQ(1e-23, StrtodTrimmed(vector, -23));
+ CHECK_EQ(1e-25, StrtodTrimmed(vector, -25));
+ CHECK_EQ(1e-39, StrtodTrimmed(vector, -39));
+
+ vector = StringToVector("2");
+ CHECK_EQ(2.0, StrtodTrimmed(vector, 0));
+ CHECK_EQ(20.0, StrtodTrimmed(vector, 1));
+ CHECK_EQ(200.0, StrtodTrimmed(vector, 2));
+ CHECK_EQ(2e20, StrtodTrimmed(vector, 20));
+ CHECK_EQ(2e22, StrtodTrimmed(vector, 22));
+ CHECK_EQ(2e23, StrtodTrimmed(vector, 23));
+ CHECK_EQ(2e35, StrtodTrimmed(vector, 35));
+ CHECK_EQ(2e36, StrtodTrimmed(vector, 36));
+ CHECK_EQ(2e37, StrtodTrimmed(vector, 37));
+ CHECK_EQ(2e-1, StrtodTrimmed(vector, -1));
+ CHECK_EQ(2e-2, StrtodTrimmed(vector, -2));
+ CHECK_EQ(2e-5, StrtodTrimmed(vector, -5));
+ CHECK_EQ(2e-20, StrtodTrimmed(vector, -20));
+ CHECK_EQ(2e-22, StrtodTrimmed(vector, -22));
+ CHECK_EQ(2e-23, StrtodTrimmed(vector, -23));
+ CHECK_EQ(2e-25, StrtodTrimmed(vector, -25));
+ CHECK_EQ(2e-39, StrtodTrimmed(vector, -39));
+
+ vector = StringToVector("9");
+ CHECK_EQ(9.0, StrtodTrimmed(vector, 0));
+ CHECK_EQ(90.0, StrtodTrimmed(vector, 1));
+ CHECK_EQ(900.0, StrtodTrimmed(vector, 2));
+ CHECK_EQ(9e20, StrtodTrimmed(vector, 20));
+ CHECK_EQ(9e22, StrtodTrimmed(vector, 22));
+ CHECK_EQ(9e23, StrtodTrimmed(vector, 23));
+ CHECK_EQ(9e35, StrtodTrimmed(vector, 35));
+ CHECK_EQ(9e36, StrtodTrimmed(vector, 36));
+ CHECK_EQ(9e37, StrtodTrimmed(vector, 37));
+ CHECK_EQ(9e-1, StrtodTrimmed(vector, -1));
+ CHECK_EQ(9e-2, StrtodTrimmed(vector, -2));
+ CHECK_EQ(9e-5, StrtodTrimmed(vector, -5));
+ CHECK_EQ(9e-20, StrtodTrimmed(vector, -20));
+ CHECK_EQ(9e-22, StrtodTrimmed(vector, -22));
+ CHECK_EQ(9e-23, StrtodTrimmed(vector, -23));
+ CHECK_EQ(9e-25, StrtodTrimmed(vector, -25));
+ CHECK_EQ(9e-39, StrtodTrimmed(vector, -39));
+
+ vector = StringToVector("12345");
+ CHECK_EQ(12345.0, StrtodTrimmed(vector, 0));
+ CHECK_EQ(123450.0, StrtodTrimmed(vector, 1));
+ CHECK_EQ(1234500.0, StrtodTrimmed(vector, 2));
+ CHECK_EQ(12345e20, StrtodTrimmed(vector, 20));
+ CHECK_EQ(12345e22, StrtodTrimmed(vector, 22));
+ CHECK_EQ(12345e23, StrtodTrimmed(vector, 23));
+ CHECK_EQ(12345e30, StrtodTrimmed(vector, 30));
+ CHECK_EQ(12345e31, StrtodTrimmed(vector, 31));
+ CHECK_EQ(12345e32, StrtodTrimmed(vector, 32));
+ CHECK_EQ(12345e35, StrtodTrimmed(vector, 35));
+ CHECK_EQ(12345e36, StrtodTrimmed(vector, 36));
+ CHECK_EQ(12345e37, StrtodTrimmed(vector, 37));
+ CHECK_EQ(12345e-1, StrtodTrimmed(vector, -1));
+ CHECK_EQ(12345e-2, StrtodTrimmed(vector, -2));
+ CHECK_EQ(12345e-5, StrtodTrimmed(vector, -5));
+ CHECK_EQ(12345e-20, StrtodTrimmed(vector, -20));
+ CHECK_EQ(12345e-22, StrtodTrimmed(vector, -22));
+ CHECK_EQ(12345e-23, StrtodTrimmed(vector, -23));
+ CHECK_EQ(12345e-25, StrtodTrimmed(vector, -25));
+ CHECK_EQ(12345e-39, StrtodTrimmed(vector, -39));
+
+ vector = StringToVector("12345678901234");
+ CHECK_EQ(12345678901234.0, StrtodTrimmed(vector, 0));
+ CHECK_EQ(123456789012340.0, StrtodTrimmed(vector, 1));
+ CHECK_EQ(1234567890123400.0, StrtodTrimmed(vector, 2));
+ CHECK_EQ(12345678901234e20, StrtodTrimmed(vector, 20));
+ CHECK_EQ(12345678901234e22, StrtodTrimmed(vector, 22));
+ CHECK_EQ(12345678901234e23, StrtodTrimmed(vector, 23));
+ CHECK_EQ(12345678901234e30, StrtodTrimmed(vector, 30));
+ CHECK_EQ(12345678901234e31, StrtodTrimmed(vector, 31));
+ CHECK_EQ(12345678901234e32, StrtodTrimmed(vector, 32));
+ CHECK_EQ(12345678901234e35, StrtodTrimmed(vector, 35));
+ CHECK_EQ(12345678901234e36, StrtodTrimmed(vector, 36));
+ CHECK_EQ(12345678901234e37, StrtodTrimmed(vector, 37));
+ CHECK_EQ(12345678901234e-1, StrtodTrimmed(vector, -1));
+ CHECK_EQ(12345678901234e-2, StrtodTrimmed(vector, -2));
+ CHECK_EQ(12345678901234e-5, StrtodTrimmed(vector, -5));
+ CHECK_EQ(12345678901234e-20, StrtodTrimmed(vector, -20));
+ CHECK_EQ(12345678901234e-22, StrtodTrimmed(vector, -22));
+ CHECK_EQ(12345678901234e-23, StrtodTrimmed(vector, -23));
+ CHECK_EQ(12345678901234e-25, StrtodTrimmed(vector, -25));
+ CHECK_EQ(12345678901234e-39, StrtodTrimmed(vector, -39));
+
+ vector = StringToVector("123456789012345");
+ CHECK_EQ(123456789012345.0, StrtodTrimmed(vector, 0));
+ CHECK_EQ(1234567890123450.0, StrtodTrimmed(vector, 1));
+ CHECK_EQ(12345678901234500.0, StrtodTrimmed(vector, 2));
+ CHECK_EQ(123456789012345e20, StrtodTrimmed(vector, 20));
+ CHECK_EQ(123456789012345e22, StrtodTrimmed(vector, 22));
+ CHECK_EQ(123456789012345e23, StrtodTrimmed(vector, 23));
+ CHECK_EQ(123456789012345e35, StrtodTrimmed(vector, 35));
+ CHECK_EQ(123456789012345e36, StrtodTrimmed(vector, 36));
+ CHECK_EQ(123456789012345e37, StrtodTrimmed(vector, 37));
+ CHECK_EQ(123456789012345e39, StrtodTrimmed(vector, 39));
+ CHECK_EQ(123456789012345e-1, StrtodTrimmed(vector, -1));
+ CHECK_EQ(123456789012345e-2, StrtodTrimmed(vector, -2));
+ CHECK_EQ(123456789012345e-5, StrtodTrimmed(vector, -5));
+ CHECK_EQ(123456789012345e-20, StrtodTrimmed(vector, -20));
+ CHECK_EQ(123456789012345e-22, StrtodTrimmed(vector, -22));
+ CHECK_EQ(123456789012345e-23, StrtodTrimmed(vector, -23));
+ CHECK_EQ(123456789012345e-25, StrtodTrimmed(vector, -25));
+ CHECK_EQ(123456789012345e-39, StrtodTrimmed(vector, -39));
+
+ CHECK_EQ(0.0, StrtodTrimmedChar("", 1324));
+ CHECK_EQ(0.0, StrtodTrimmedChar("2", -324));
+ CHECK_EQ(4e-324, StrtodTrimmedChar("3", -324));
+ // It would be more readable to put non-zero literals on the left side (i.e.
+ // CHECK_EQ(1e-325, StrtodChar("1", -325))), but then Gcc complains that
+ // they are truncated to zero.
+ CHECK_EQ(0.0, StrtodTrimmedChar("1", -325));
+ CHECK_EQ(0.0, StrtodTrimmedChar("1", -325));
+
+ // It would be more readable to put the literals (and not Double::Infinity())
+ // on the left side (i.e. CHECK_EQ(1e309, StrtodChar("1", 309))), but then Gcc
+ // complains that the floating constant exceeds range of 'double'.
+ CHECK_EQ(Double::Infinity(), StrtodTrimmedChar("1", 309));
+ CHECK_EQ(1e308, StrtodTrimmedChar("1", 308));
+ CHECK_EQ(1234e305, StrtodTrimmedChar("1234", 305));
+ CHECK_EQ(1234e304, StrtodTrimmedChar("1234", 304));
+ CHECK_EQ(Double::Infinity(), StrtodTrimmedChar("18", 307));
+ CHECK_EQ(17e307, StrtodTrimmedChar("17", 307));
+
+ CHECK_EQ(1.7976931348623157E+308, StrtodTrimmedChar("17976931348623157", 292));
+ CHECK_EQ(1.7976931348623158E+308, StrtodTrimmedChar("17976931348623158", 292));
+ CHECK_EQ(Double::Infinity(), StrtodTrimmedChar("17976931348623159", 292));
+
+ // The following number is the result of 89255.0/1e-22. Both floating-point
+ // numbers can be accurately represented with doubles. However on Linux,x86
+ // the floating-point stack is set to 80bits and the double-rounding
+ // introduces an error.
+ CHECK_EQ(89255e-22, StrtodTrimmedChar("89255", -22));
+
+ // Some random values.
+ CHECK_EQ(358416272e-33, StrtodTrimmedChar("358416272", -33));
+ CHECK_EQ(104110013277974872254e-225,
+ StrtodTrimmedChar("104110013277974872254", -225));
+
+ CHECK_EQ(123456789e108, StrtodTrimmedChar("123456789", 108));
+ CHECK_EQ(123456789e109, StrtodTrimmedChar("123456789", 109));
+ CHECK_EQ(123456789e110, StrtodTrimmedChar("123456789", 110));
+ CHECK_EQ(123456789e111, StrtodTrimmedChar("123456789", 111));
+ CHECK_EQ(123456789e112, StrtodTrimmedChar("123456789", 112));
+ CHECK_EQ(123456789e113, StrtodTrimmedChar("123456789", 113));
+ CHECK_EQ(123456789e114, StrtodTrimmedChar("123456789", 114));
+ CHECK_EQ(123456789e115, StrtodTrimmedChar("123456789", 115));
+ CHECK_EQ(1234567890123456789012345e108,
+ StrtodTrimmedChar("1234567890123456789012345", 108));
+ CHECK_EQ(1234567890123456789012345e109,
+ StrtodTrimmedChar("1234567890123456789012345", 109));
+ CHECK_EQ(1234567890123456789012345e110,
+ StrtodTrimmedChar("1234567890123456789012345", 110));
+ CHECK_EQ(1234567890123456789012345e111,
+ StrtodTrimmedChar("1234567890123456789012345", 111));
+ CHECK_EQ(1234567890123456789012345e112,
+ StrtodTrimmedChar("1234567890123456789012345", 112));
+ CHECK_EQ(1234567890123456789012345e113,
+ StrtodTrimmedChar("1234567890123456789012345", 113));
+ CHECK_EQ(1234567890123456789012345e114,
+ StrtodTrimmedChar("1234567890123456789012345", 114));
+ CHECK_EQ(1234567890123456789012345e115,
+ StrtodTrimmedChar("1234567890123456789012345", 115));
+
+ CHECK_EQ(1234567890123456789052345e108,
+ StrtodTrimmedChar("1234567890123456789052345", 108));
+ CHECK_EQ(1234567890123456789052345e109,
+ StrtodTrimmedChar("1234567890123456789052345", 109));
+ CHECK_EQ(1234567890123456789052345e110,
+ StrtodTrimmedChar("1234567890123456789052345", 110));
+ CHECK_EQ(1234567890123456789052345e111,
+ StrtodTrimmedChar("1234567890123456789052345", 111));
+ CHECK_EQ(1234567890123456789052345e112,
+ StrtodTrimmedChar("1234567890123456789052345", 112));
+ CHECK_EQ(1234567890123456789052345e113,
+ StrtodTrimmedChar("1234567890123456789052345", 113));
+ CHECK_EQ(1234567890123456789052345e114,
+ StrtodTrimmedChar("1234567890123456789052345", 114));
+ CHECK_EQ(1234567890123456789052345e115,
+ StrtodTrimmedChar("1234567890123456789052345", 115));
+
+ // Boundary cases. Boundaries themselves should round to even.
+ //
+ // 0x1FFFFFFFFFFFF * 2^3 = 72057594037927928
+ // next: 72057594037927936
+ // boundary: 72057594037927932 should round up.
+ CHECK_EQ(72057594037927928.0, StrtodTrimmedChar("72057594037927928", 0));
+ CHECK_EQ(72057594037927936.0, StrtodTrimmedChar("72057594037927936", 0));
+ CHECK_EQ(72057594037927936.0, StrtodTrimmedChar("72057594037927932", 0));
+ CHECK_EQ(72057594037927928.0, StrtodTrimmedChar("7205759403792793199999", -5));
+ CHECK_EQ(72057594037927936.0, StrtodTrimmedChar("7205759403792793200001", -5));
+
+ // 0x1FFFFFFFFFFFF * 2^10 = 9223372036854774784
+ // next: 9223372036854775808
+ // boundary: 9223372036854775296 should round up.
+ CHECK_EQ(9223372036854774784.0, StrtodTrimmedChar("9223372036854774784", 0));
+ CHECK_EQ(9223372036854775808.0, StrtodTrimmedChar("9223372036854775808", 0));
+ CHECK_EQ(9223372036854775808.0, StrtodTrimmedChar("9223372036854775296", 0));
+ CHECK_EQ(9223372036854774784.0, StrtodTrimmedChar("922337203685477529599999", -5));
+ CHECK_EQ(9223372036854775808.0, StrtodTrimmedChar("922337203685477529600001", -5));
+
+ // 0x1FFFFFFFFFFFF * 2^50 = 10141204801825834086073718800384
+ // next: 10141204801825835211973625643008
+ // boundary: 10141204801825834649023672221696 should round up.
+ CHECK_EQ(10141204801825834086073718800384.0,
+ StrtodTrimmedChar("10141204801825834086073718800384", 0));
+ CHECK_EQ(10141204801825835211973625643008.0,
+ StrtodTrimmedChar("10141204801825835211973625643008", 0));
+ CHECK_EQ(10141204801825835211973625643008.0,
+ StrtodTrimmedChar("10141204801825834649023672221696", 0));
+ CHECK_EQ(10141204801825834086073718800384.0,
+ StrtodTrimmedChar("1014120480182583464902367222169599999", -5));
+ CHECK_EQ(10141204801825835211973625643008.0,
+ StrtodTrimmedChar("1014120480182583464902367222169600001", -5));
+
+ // 0x1FFFFFFFFFFFF * 2^99 = 5708990770823838890407843763683279797179383808
+ // next: 5708990770823839524233143877797980545530986496
+ // boundary: 5708990770823839207320493820740630171355185152
+ // The boundary should round up.
+ CHECK_EQ(5708990770823838890407843763683279797179383808.0,
+ StrtodTrimmedChar("5708990770823838890407843763683279797179383808", 0));
+ CHECK_EQ(5708990770823839524233143877797980545530986496.0,
+ StrtodTrimmedChar("5708990770823839524233143877797980545530986496", 0));
+ CHECK_EQ(5708990770823839524233143877797980545530986496.0,
+ StrtodTrimmedChar("5708990770823839207320493820740630171355185152", 0));
+ CHECK_EQ(5708990770823838890407843763683279797179383808.0,
+ StrtodTrimmedChar("5708990770823839207320493820740630171355185151999", -3));
+ CHECK_EQ(5708990770823839524233143877797980545530986496.0,
+ StrtodTrimmedChar("5708990770823839207320493820740630171355185152001", -3));
+
+ // The following test-cases got some public attention in early 2011 when they
+ // sent Java and PHP into an infinite loop.
+ CHECK_EQ(2.225073858507201e-308, StrtodTrimmedChar("22250738585072011", -324));
+ CHECK_EQ(2.22507385850720138309e-308,
+ StrtodTrimmedChar("22250738585072011360574097967091319759348195463516456480"
+ "23426109724822222021076945516529523908135087914149158913"
+ "03962110687008643869459464552765720740782062174337998814"
+ "10632673292535522868813721490129811224514518898490572223"
+ "07285255133155755015914397476397983411801999323962548289"
+ "01710708185069063066665599493827577257201576306269066333"
+ "26475653000092458883164330377797918696120494973903778297"
+ "04905051080609940730262937128958950003583799967207254304"
+ "36028407889577179615094551674824347103070260914462157228"
+ "98802581825451803257070188608721131280795122334262883686"
+ "22321503775666622503982534335974568884423900265498198385"
+ "48794829220689472168983109969836584681402285424333066033"
+ "98508864458040010349339704275671864433837704860378616227"
+ "71738545623065874679014086723327636718751", -1076));
+}
+
+
TEST(Strtof) {
Vector<const char> vector;