Improve Python and Linenoise

This change reinvents all the GNU Readline features I discovered that I
couldn't live without, e.g. UTF-8, CTRL-R search and CTRL-Y yanking. It
now feels just as good in terms of user interface from the subconscious
workflow perspective. It's real nice to finally have an embeddable line
reader that's actually good with a 30 kb footprint and a bsd-2 license.

This change adds a directory to the examples folder, explaining how the
new Python compiler may be used.  Some of the bugs with Python binaries
have been addressed but overall it's still a work in progress.
This commit is contained in:
Justine Tunney 2021-09-11 22:30:37 -07:00
parent ad52387b74
commit 51904e2687
35 changed files with 3541 additions and 8587 deletions

View file

@ -31,9 +31,16 @@
*
* "a" ALFA
* "\316\261" ALPHA
* "\033[A" CURSOR UP
* "\033[38;5;202m" ORANGERED
* "\e[38;5;202m" ORANGERED
* "\e[A" UP
* "\e\e[A" ALT-UP
* "\001" CTRL-ALFA
* "\e\001" ALT-CTRL-ALFA
* "\eOP" PF1
* "\000" NUL
* "\e]rm -rf /\e\\" OSC
* "\302\233A" UP
* "\300\200" NUL
*
* This routine generalizes to ascii, utf-8, chorded modifier keys,
* function keys, color codes, c0/c1 control codes, cursor movement,
@ -44,15 +51,7 @@
* can cause the stream to go out of sync. This function recovers such
* events by ignoring continuation bytes at the beginning of each read.
*
* String control sequences, e.g. "\e_hello\e\\" currently are not
* tokenized as a single read. Lastly note, this function has limited
* support for UNICODE representations of C0/C1 control codes, e.g.
*
* "\000" NUL
* "\300\200" NUL
* "\302\233A" CURSOR UP
*
* @param buf is guaranteed to receive a NUL terminator if size>0
* @param p is guaranteed to receive a NUL terminator if n>0
* @return number of bytes read (helps differentiate "\0" vs. "")
* @see examples/ttyinfo.c
* @see ANSI X3.64-1979
@ -60,81 +59,198 @@
* @see FIPS-86
* @see ECMA-48
*/
ssize_t readansi(int fd, char *buf, size_t size) {
ssize_t readansi(int fd, char *p, size_t n) {
wint_t x;
uint8_t c;
int i, j, rc;
enum { kAscii, kUtf8, kEsc, kCsi, kSs } t;
if (size) buf[0] = 0;
for (j = i = 0, t = kAscii;;) {
if (i + 2 >= size) return enomem();
if ((rc = read(fd, &c, 1)) != 1) return rc;
buf[i++] = c;
buf[i] = 0;
ssize_t rc;
int e, i, j;
unsigned char c;
enum { kAscii, kUtf8, kEsc, kCsi1, kCsi2, kSs, kNf, kStr, kStr2, kDone } t;
e = errno;
t = kAscii;
x = i = j = 0;
if (n) p[0] = 0;
do {
for (;;) {
if (n) {
rc = read(fd, &c, 1);
} else {
rc = read(fd, 0, 0);
}
if (rc == -1 && errno == EINTR) {
if (!i) {
return -1;
}
} else if (rc == -1) {
return -1;
} else if (!rc) {
if (!i) {
errno = e;
return 0;
} else {
return eilseq();
}
} else {
break;
}
}
if (i + 1 < n) {
p[i] = c;
p[i + 1] = 0;
} else if (i < n) {
p[i] = 0;
}
++i;
switch (t) {
Whoopsie:
if (n) p[0] = c;
t = kAscii;
i = 1;
/* fallthrough */
case kAscii:
if (c < 0200) {
if (c == '\e') {
t = kEsc;
} else {
return i;
t = kDone;
}
} else if (c >= 0300) {
t = kUtf8;
x = ThomPikeByte(c);
j = ThomPikeLen(c) - 1;
} else {
/* ignore overlong sequences */
}
break;
case kUtf8:
x = ThomPikeMerge(x, c);
if (!--j) {
switch (x) {
case '\e':
t = kEsc;
break;
case 0x9b:
t = kCsi;
break;
default:
return i;
if ((c & 0300) == 0200) {
x = ThomPikeMerge(x, c);
if (!--j) {
switch (x) {
case '\e':
t = kEsc; /* parsed but not canonicalized */
break;
case 0x9b:
t = kCsi1; /* unusual but legal */
break;
case 0x8e:
case 0x8f:
t = kSs; /* unusual but legal */
break;
case 0x90: /* DCS (Device Control String) */
case 0x98: /* SOS (Start of String) */
case 0x9d: /* OSC (Operating System Command) */
case 0x9e: /* PM (Privacy Message) */
case 0x9f: /* APC (Application Program Command) */
t = kStr;
break;
default:
t = kDone;
break;
}
}
} else {
goto Whoopsie; /* ignore underlong sequences if not eof */
}
break;
case kEsc:
switch (c) {
case '[':
t = kCsi;
break;
case 'N':
case 'O':
t = kSs;
break;
case '\e':
case 0x20 ... 0x2F:
break;
default:
return i;
}
break;
case kCsi:
switch (c) {
case '[':
case ':':
case ';':
case '<':
case '=':
case '>':
case '?':
case '0' ... '9':
break;
default:
return i;
if (0x20 <= c && c <= 0x2f) {
t = kNf;
} else if (0x30 <= c && c <= 0x3f) { /* Fp */
t = kDone;
} else if (0x20 <= c && c <= 0x5F) { /* Fe */
switch (c) {
case '[':
t = kCsi1;
break;
case 'N': /* SS2 */
case 'O': /* SS3 */
t = kSs;
break;
case 'P': /* DCS (Device Control String) */
case 'X': /* SOS (Start of String) */
case ']': /* DCS (Operating System Command) */
case '^': /* PM (Privacy Message) */
case '_': /* DCS (Application Program Command) */
t = kStr;
break;
case '\\':
goto Whoopsie;
default:
t = kDone;
break;
}
} else if (0x60 <= c && c <= 0x7e) { /* Fs */
t = kDone;
} else if (c == '\e') {
if (i < 3) {
t = kEsc; /* alt chording */
} else {
t = kDone; /* esc mashing */
i = 1;
}
} else {
t = kDone;
}
break;
case kSs:
return i;
t = kDone;
break;
case kNf:
if (0x30 <= c && c <= 0x7e) {
t = kDone;
} else if (!(0x20 <= c && c <= 0x2f)) {
goto Whoopsie;
}
break;
case kCsi1:
if (0x20 <= c && c <= 0x2f) {
t = kCsi2;
} else if (c == '[' && i == 3) {
/* linux function keys */
} else if (0x40 <= c && c <= 0x7e) {
t = kDone;
} else if (!(0x30 <= c && c <= 0x3f)) {
goto Whoopsie;
}
break;
case kCsi2:
if (0x40 <= c && c <= 0x7e) {
t = kDone;
} else if (!(0x20 <= c && c <= 0x2f)) {
goto Whoopsie;
}
break;
case kStr:
switch (c) {
case '\a':
t = kDone;
break;
case '\e': /* ESC */
case 0302: /* C1 (UTF-8) */
t = kStr2;
break;
default:
break;
}
break;
case kStr2:
switch (c) {
case '\a':
t = kDone;
break;
case '\\': /* ST (ASCII) */
case 0234: /* ST (UTF-8) */
t = kDone;
break;
default:
t = kStr;
break;
}
break;
default:
unreachable;
}
}
} while (t != kDone);
errno = e;
return i;
}