mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-10-25 02:30:57 +00:00
*scanf() fixes to make TeX work (#1109)
* Fix reading the same symbol twice when using `{f,}scanf()`
PR #924 appears to use `unget()` subtly incorrectly when parsing
floating point numbers. The rest of the code only uses `unget()`
immediately followed by `goto Done;` to return back the symbol that
can't possibly belong to the directive we're processing.
With floating-point, however, the ungot characters could very well
be valid for the *next* directive, so we will essentially read them
twice. It can't be seen in `sscanf()` tests because `unget()` is a
no-op there, but the test I added for `fscanf()` fails like this:
...
EXPECT_EQ(0xDEAD, i1)
need 57005 (or 0xdead) =
got 908973 (or 0x000ddead)
...
EXPECT_EQ(0xBEEF, i2)
need 48879 (or 0xbeef) =
got 769775 (or 0x000bbeef)
This means we read 0xDDEAD instead of 0xDEAD and 0xBBEEF instead of
0xBEEF. I checked that both musl and glibc read 0xDEAD/0xBEEF, as
expected.
Fix the failing test by removing the unneeded `unget()` calls.
* Don't read invalid floating-point numbers in `*scanf()`
Currently, we just ignore any errors from `strtod()`. They can
happen either because no valid float can be parsed at all, or
because the state machine recognizes only a prefix of a valid
floating-point number.
Fix this by making sure `strtod()` parses everything we recognized,
provided it's non-empty. This requires to pop the last character
off the FP buffer, which is supposed to be parsed by the next
`*scanf()` directive.
* Make `%c` parsing in `*scanf()` respect the C standard
Currently, `%c`-style directives always succeed even if there
are actually fewer characters in the input than requested.
Before the fix, the added test fails like this:
...
EXPECT_EQ(2, sscanf("ab", "%c %c %c", &c2, &c3, &c4))
need 2 (or 0x02 or '\2' or ENOENT) =
got 3 (or 0x03 or '\3' or ESRCH)
...
EXPECT_EQ(0, sscanf("abcd", "%5c", s2))
need 0 (or 0x0 or '\0') =
got 1 (or 0x01 or '\1' or EPERM)
musl and glibc pass this test.
This commit is contained in:
parent
3afe3a3646
commit
f7ff515961
3 changed files with 89 additions and 22 deletions
|
|
@ -50,6 +50,12 @@
|
|||
} \
|
||||
c; \
|
||||
})
|
||||
#define UNBUFFER \
|
||||
({ \
|
||||
if (c != -1) { \
|
||||
fpbuf[--fpbufcur] = '\0'; \
|
||||
} \
|
||||
})
|
||||
|
||||
/**
|
||||
* String / file / stream decoder.
|
||||
|
|
@ -369,10 +375,11 @@ int __vcscanf(int callback(void *), //
|
|||
}
|
||||
} while ((c = BUFFER) != -1 && c != ')');
|
||||
if (c == ')') {
|
||||
c = BUFFER;
|
||||
c = READ;
|
||||
}
|
||||
goto GotFloatingPointNumber;
|
||||
} else {
|
||||
UNBUFFER;
|
||||
goto GotFloatingPointNumber;
|
||||
}
|
||||
} else {
|
||||
|
|
@ -410,9 +417,7 @@ int __vcscanf(int callback(void *), //
|
|||
goto Done;
|
||||
}
|
||||
} else {
|
||||
if (c != -1 && unget) {
|
||||
unget(c, arg);
|
||||
}
|
||||
UNBUFFER;
|
||||
goto GotFloatingPointNumber;
|
||||
}
|
||||
} else {
|
||||
|
|
@ -465,13 +470,24 @@ int __vcscanf(int callback(void *), //
|
|||
Continue:
|
||||
continue;
|
||||
Break:
|
||||
if (c != -1 && unget) {
|
||||
unget(c, arg);
|
||||
}
|
||||
UNBUFFER;
|
||||
break;
|
||||
} while ((c = BUFFER) != -1);
|
||||
GotFloatingPointNumber:
|
||||
fp = strtod((char *)fpbuf, NULL);
|
||||
/* An empty buffer can't be a valid float; don't even bother parsing. */
|
||||
bool valid = fpbufcur > 0;
|
||||
if (valid) {
|
||||
char *ep;
|
||||
fp = strtod((char *)fpbuf, &ep);
|
||||
/* We should have parsed the whole buffer. */
|
||||
valid = ep == (char *)fpbuf + fpbufcur;
|
||||
}
|
||||
free(fpbuf);
|
||||
fpbuf = NULL;
|
||||
fpbufcur = fpbufsize = 0;
|
||||
if (!valid) {
|
||||
goto Done;
|
||||
}
|
||||
if (!discard) {
|
||||
++items;
|
||||
void *out = va_arg(va, void *);
|
||||
|
|
@ -481,9 +497,6 @@ int __vcscanf(int callback(void *), //
|
|||
*(double *)out = (double)fp;
|
||||
}
|
||||
}
|
||||
free(fpbuf);
|
||||
fpbuf = NULL;
|
||||
fpbufcur = fpbufsize = 0;
|
||||
continue;
|
||||
ReportConsumed:
|
||||
n_ptr = va_arg(va, int *);
|
||||
|
|
@ -537,6 +550,11 @@ int __vcscanf(int callback(void *), //
|
|||
if (!j && c == -1 && !items) {
|
||||
items = -1;
|
||||
goto Done;
|
||||
} else if (rawmode && j != width) {
|
||||
/* The C standard says that %c "matches a sequence of characters of
|
||||
* **exactly** the number specified by the field width". If we have
|
||||
* fewer characters, what we've just read is invalid. */
|
||||
goto Done;
|
||||
} else if (!rawmode && j < bufsize) {
|
||||
if (charbytes == sizeof(char)) {
|
||||
buf[j] = '\0';
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue