*scanf() fixes to make TeX work (#1109)

* Fix reading the same symbol twice when using `{f,}scanf()` PR #924 appears to use `unget()` subtly incorrectly when parsing floating point numbers. The rest of the code only uses `unget()` immediately followed by `goto Done;` to return back the symbol that can't possibly belong to the directive we're processing. With floating-point, however, the ungot characters could very well be valid for the *next* directive, so we will essentially read them twice. It can't be seen in `sscanf()` tests because `unget()` is a no-op there, but the test I added for `fscanf()` fails like this: ... EXPECT_EQ(0xDEAD, i1) need 57005 (or 0xdead) = got 908973 (or 0x000ddead) ... EXPECT_EQ(0xBEEF, i2) need 48879 (or 0xbeef) = got 769775 (or 0x000bbeef) This means we read 0xDDEAD instead of 0xDEAD and 0xBBEEF instead of 0xBEEF. I checked that both musl and glibc read 0xDEAD/0xBEEF, as expected. Fix the failing test by removing the unneeded `unget()` calls. * Don't read invalid floating-point numbers in `*scanf()` Currently, we just ignore any errors from `strtod()`. They can happen either because no valid float can be parsed at all, or because the state machine recognizes only a prefix of a valid floating-point number. Fix this by making sure `strtod()` parses everything we recognized, provided it's non-empty. This requires to pop the last character off the FP buffer, which is supposed to be parsed by the next `*scanf()` directive. * Make `%c` parsing in `*scanf()` respect the C standard Currently, `%c`-style directives always succeed even if there are actually fewer characters in the input than requested. Before the fix, the added test fails like this: ... EXPECT_EQ(2, sscanf("ab", "%c %c %c", &c2, &c3, &c4)) need 2 (or 0x02 or '\2' or ENOENT) = got 3 (or 0x03 or '\3' or ESRCH) ... EXPECT_EQ(0, sscanf("abcd", "%5c", s2)) need 0 (or 0x0 or '\0') = got 1 (or 0x01 or '\1' or EPERM) musl and glibc pass this test.
2025-10-04 13:41:02 +00:00 · 2024-02-23 16:15:30 +01:00 · 2024-02-23 16:15:30 +01:00 · f7ff515961
commit f7ff515961
parent 3afe3a3646
3 changed files with 89 additions and 22 deletions
--- a/test/libc/stdio/sscanf_test.c
+++ b/test/libc/stdio/sscanf_test.c
@ -69,9 +69,17 @@ TEST(sscanf, testNonDirectiveCharacterMatching) {
 }

 TEST(sscanf, testCharacter) {
-  char c = 0;
-  EXPECT_EQ(1, sscanf("a", "%c", &c));
-  EXPECT_EQ('a', c);
+  char c1 = 0, c2 = c1, c3 = c2, c4 = c3;
+  char s1[32] = {0}, s2[32] = {0};
+  EXPECT_EQ(1, sscanf("a", "%c", &c1));
+  EXPECT_EQ(2, sscanf("ab", "%c %c %c", &c2, &c3, &c4));
+  EXPECT_EQ(1, sscanf("abcde", "%5c", s1));
+  EXPECT_EQ(0, sscanf("abcd", "%5c", s2));
+
+  EXPECT_EQ('a', c1);
+  EXPECT_EQ('a', c2);
+  EXPECT_EQ('b', c3);
+  EXPECT_STREQ("abcde", &s1[0]);
 }

 TEST(sscanf, testStringBuffer) {
@ -394,6 +402,20 @@ TEST(sscanf, floating_point_infinity_double_precision) {
  EXPECT_TRUE(isinf(g));
 }

+TEST(sscanf, floating_point_invalid) {
+  float dummy;
+  EXPECT_EQ(0, sscanf("junk", "%f", &dummy));
+  EXPECT_EQ(0, sscanf("e9", "%f", &dummy));
+  EXPECT_EQ(0, sscanf("-e9", "%f", &dummy));
+}
+
+TEST(sscanf, floating_point_invalid_double_precision) {
+  double dummy;
+  EXPECT_EQ(0, sscanf("junk", "%lf", &dummy));
+  EXPECT_EQ(0, sscanf("e9", "%lf", &dummy));
+  EXPECT_EQ(0, sscanf("-e9", "%lf", &dummy));
+}
+
 TEST(sscanf, floating_point_documentation_examples) {
  float a = 666.666f, b = a, c = b, d = c, e = d, f = e, g = f, h = g, i = h,
        j = i;
@ -401,7 +423,7 @@ TEST(sscanf, floating_point_documentation_examples) {
  EXPECT_EQ(2, sscanf("111.11 -2.22", "%f %f", &a, &b));
  EXPECT_EQ(3, sscanf("Nan nan(2) inF", "%f %f %f", &c, &d, &e));
  EXPECT_EQ(
-      5, sscanf("0X1.BC70A3D70A3D7P+6 1.18973e+4932zzz -0.0000000123junk junk",
+      2, sscanf("0X1.BC70A3D70A3D7P+6 1.18973e+4932zzz -0.0000000123junk junk",
                "%f %f %f %f %f", &f, &g, &h, &i, &j));

  EXPECT_EQ(111.11f, a);
@ -411,9 +433,6 @@ TEST(sscanf, floating_point_documentation_examples) {
  EXPECT_TRUE(isinf(e));
  EXPECT_EQ(0X1.BC70A3D70A3D7P+6f, f);
  EXPECT_TRUE(isinf(g));
-  EXPECT_EQ(-0.0000000123f, h);
-  EXPECT_EQ(.0f, i);
-  EXPECT_EQ(.0f, j);
 }

 TEST(sscanf, floating_point_documentation_examples_double_precision) {
@ -423,7 +442,7 @@ TEST(sscanf, floating_point_documentation_examples_double_precision) {
  EXPECT_EQ(2, sscanf("111.11 -2.22", "%lf %lf", &a, &b));
  EXPECT_EQ(3, sscanf("Nan nan(2) inF", "%lf %lf %lf", &c, &d, &e));
  EXPECT_EQ(
-      5, sscanf("0X1.BC70A3D70A3D7P+6 1.18973e+4932zzz -0.0000000123junk junk",
+      2, sscanf("0X1.BC70A3D70A3D7P+6 1.18973e+4932zzz -0.0000000123junk junk",
                "%lf %lf %lf %lf %lf", &f, &g, &h, &i, &j));

  EXPECT_EQ(111.11, a);
@ -433,9 +452,6 @@ TEST(sscanf, floating_point_documentation_examples_double_precision) {
  EXPECT_TRUE(isinf(e));
  EXPECT_EQ(0X1.BC70A3D70A3D7P+6, f);
  EXPECT_TRUE(isinf(g));
-  EXPECT_EQ(-0.0000000123, h);
-  EXPECT_EQ(.0, i);
-  EXPECT_EQ(.0, j);
 }

 TEST(sscanf, luplus) {