Improve art program

You can now easily compile this program with non-cosmocc toolchains. The
glaring iconv() api usage mistake is now fixed. Restoring the terminal's
state on exit now works better. We try our best to limit the terminal to
80x24 cells.
This commit is contained in:
Justine Tunney 2024-10-15 11:39:16 -07:00
parent 4b2a00fd4a
commit 23da0d75a5
No known key found for this signature in database
GPG key ID: BE714B4575D6E328

View file

@ -15,101 +15,241 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <termios.h>
#include <time.h>
#include <unistd.h>
/**
* @fileoverview program for viewing bbs art files
* @see https://github.com/blocktronics/artpacks
* @see http://www.textfiles.com/art/
*/
#define HELP \
"Usage:\n\
art [-b %d] [-f %s] [-t %s] FILE...\n\
\n\
Flags:\n\
-b NUMBER specifies simulated modem baud rate, which defaults to\n\
2400 since that was the most common modem speed in the\n\
later half of the 1980s during the BBS golden age; you\n\
could also say 300 for the slowest experience possible\n\
or you could say 14.4k to get more of a 90's feel, and\n\
there's also the infamous 56k to bring you back to y2k\n\
-f CHARSET specifies charset of input bytes, where the default is\n\
cp347 which means IBM Code Page 347 a.k.a. DOS\n\
-t CHARSET specifies output charset used by your terminal, and it\n\
defaults to utf8 a.k.a. thompson-pike encoding\n\
\n\
Supported charsets:\n\
utf8, ascii, wchar_t, ucs2be, ucs2le, utf16be, utf16le, ucs4be,\n\
ucs4le, utf16, ucs4, ucs2, eucjp, shiftjis, iso2022jp, gb18030, gbk,\n\
gb2312, big5, euckr, iso88591, latin1, iso88592, iso88593, iso88594,\n\
iso88595, iso88596, iso88597, iso88598, iso88599, iso885910,\n\
iso885911, iso885913, iso885914, iso885915, iso885916, cp1250,\n\
windows1250, cp1251, windows1251, cp1252, windows1252, cp1253,\n\
windows1253, cp1254, windows1254, cp1255, windows1255, cp1256,\n\
windows1256, cp1257, windows1257, cp1258, windows1258, koi8r, koi8u,\n\
cp437, cp850, cp866, ibm1047, cp1047.\n\
\n\
See also:\n\
http://www.textfiles.com/art/\n\
https://github.com/blocktronics/artpacks\n\
\n"
#define INBUFSZ 256
#define OUBUFSZ (INBUFSZ * 6)
#define SLIT(s) ((unsigned)s[3] << 24 | s[2] << 16 | s[1] << 8 | s[0])
volatile sig_atomic_t got_signal;
// "When new technology comes out, people don't all buy it right away.
// If what they have works, some will wait until it doesn't. A few
// people do get the latest though. In 1984 2400 baud modems became
// available, so some people had them, but many didn't. A BBS list
// from 1986 shows operators were mostly 300 and 1200, but some were
// using 2400. The next 5 years were the hayday of the 2400."
//
// https://forum.vcfed.org/index.php?threads/the-2400-baud-modem.44241/
int baud_rate = 2400; // -b 2400
const char* from_charset = "CP437"; // -f CP437
const char* to_charset = "UTF-8"; // -t UTF-8
volatile sig_atomic_t done;
void on_signal(int sig) {
got_signal = 1;
done = 1;
(void)sig;
}
void process_file(const char *path, int fd, iconv_t cd, int baud_rate) {
void print(const char* s) {
(void)!write(STDOUT_FILENO, s, strlen(s));
}
int encode_character(char output[8], const char* codec, wchar_t character) {
size_t inbytesleft = sizeof(wchar_t);
size_t outbytesleft = 7;
char* inbuf = (char*)&character;
char* outbuf = output;
iconv_t cd = iconv_open(codec, "wchar_t");
if (cd == (iconv_t)-1)
return -1;
size_t result = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
iconv_close(cd);
if (result == (size_t)-1)
return -1;
*outbuf = '\0';
return 7 - outbytesleft;
}
void append_replacement_character(char** b) {
int n = encode_character(*b, to_charset, 0xFFFD);
if (n == -1)
n = encode_character(*b, to_charset, '?');
if (n != -1)
*b += n;
}
int compare_time(struct timespec a, struct timespec b) {
int cmp;
if (!(cmp = (a.tv_sec > b.tv_sec) - (a.tv_sec < b.tv_sec)))
cmp = (a.tv_nsec > b.tv_nsec) - (a.tv_nsec < b.tv_nsec);
return cmp;
}
struct timespec add_time(struct timespec x, struct timespec y) {
x.tv_sec += y.tv_sec;
x.tv_nsec += y.tv_nsec;
if (x.tv_nsec >= 1000000000) {
x.tv_nsec -= 1000000000;
x.tv_sec += 1;
}
return x;
}
struct timespec subtract_time(struct timespec a, struct timespec b) {
a.tv_sec -= b.tv_sec;
if (a.tv_nsec < b.tv_nsec) {
a.tv_nsec += 1000000000;
a.tv_sec--;
}
a.tv_nsec -= b.tv_nsec;
return a;
}
struct timespec fromnanos(long long x) {
struct timespec ts;
ts.tv_sec = x / 1000000000;
ts.tv_nsec = x % 1000000000;
return ts;
}
void process_file(const char* path, int fd, iconv_t cd) {
size_t carry = 0;
struct timespec next;
char input_buffer[INBUFSZ];
char output_buffer[OUBUFSZ];
size_t input_left, output_left;
char *input_ptr, *output_ptr;
struct timespec next = timespec_mono();
clock_gettime(CLOCK_MONOTONIC, &next);
for (;;) {
// read from file
ssize_t bytes_read = read(fd, input_buffer, INBUFSZ);
ssize_t bytes_read = read(fd, input_buffer + carry, INBUFSZ - carry);
if (!bytes_read)
return;
if (bytes_read == -1) {
perror(path);
exit(1);
done = 1;
return;
}
if (!bytes_read)
break;
// modernize character set
input_ptr = input_buffer;
input_left = bytes_read;
output_ptr = output_buffer;
output_left = OUBUFSZ;
if (iconv(cd, &input_ptr, &input_left, &output_ptr, &output_left) ==
(size_t)-1) {
perror(path);
exit(1);
char* input_ptr = input_buffer;
size_t input_left = carry + bytes_read;
char output_buffer[OUBUFSZ];
char* output_ptr = output_buffer;
size_t output_left = OUBUFSZ;
size_t ir = iconv(cd, &input_ptr, &input_left, &output_ptr, &output_left);
carry = 0;
if (ir == (size_t)-1) {
if (errno == EINVAL) {
// incomplete multibyte sequence encountered
memmove(input_buffer, input_ptr, input_left);
carry = input_left;
} else if (errno == EILSEQ && input_left) {
// EILSEQ means either
// 1. illegal input sequence encountered
// 2. code not encodable in output codec
//
// so we skip one byte of input, and insert <20> or ? in the output
// this isn't the most desirable behavior, but it is the best we
// can do, since we don't know specifics about the codecs in use
//
// unlike glibc cosmo's iconv implementation may handle case (2)
// automatically by inserting an asterisk in place of a sequence
++input_ptr;
--input_left;
memmove(input_buffer, input_ptr, input_left);
carry = input_left;
if (output_left >= 8)
append_replacement_character(&output_ptr);
} else {
perror(path);
done = 1;
return;
}
}
// write to terminal
for (char *p = output_buffer; p < output_ptr; p++) {
if (got_signal)
for (char* p = output_buffer; p < output_ptr; p++) {
if (done)
return;
write(STDOUT_FILENO, p, 1);
(void)!write(STDOUT_FILENO, p, 1);
// allow arrow keys to change baud rate
char key[4] = {0};
if (read(STDIN_FILENO, key, sizeof(key)) > 0) {
if (SLIT(key) == SLIT("\e[A") || // up
SLIT(key) == SLIT("\e[C")) { // right
baud_rate *= 1.4;
} else if (SLIT(key) == SLIT("\e[B") || // down
SLIT(key) == SLIT("\e[D")) { // left
baud_rate *= 0.6;
int have;
if (ioctl(STDIN_FILENO, FIONREAD, &have)) {
perror("ioctl");
done = 1;
return;
}
if (have > 0) {
char key[4] = {0};
if (read(STDIN_FILENO, key, sizeof(key)) > 0) {
if (SLIT(key) == SLIT("\33[A") || // up
SLIT(key) == SLIT("\33[C")) { // right
baud_rate *= 1.4;
} else if (SLIT(key) == SLIT("\33[B") || // down
SLIT(key) == SLIT("\33[D")) { // left
baud_rate *= 0.6;
}
if (baud_rate < 3)
baud_rate = 3;
if (baud_rate > 1000000000)
baud_rate = 1000000000;
}
}
// insert artificial delay for one byte. we divide by 10 to convert
// bits to bytes, because that is how many bits 8-N-1 encoding used
next = timespec_add(next, timespec_fromnanos(1e9 / (baud_rate / 10.)));
usleep(timespec_tomicros(timespec_subz(next, timespec_mono())));
struct timespec now;
clock_gettime(CLOCK_MONOTONIC, &now);
next = add_time(next, fromnanos(1e9 / (baud_rate / 10.)));
if (compare_time(next, now) > 0) {
struct timespec sleep = subtract_time(next, now);
nanosleep(&sleep, 0);
}
}
}
}
int main(int argc, char *argv[]) {
// "When new technology comes out, people don't all buy it right away.
// If what they have works, some will wait until it doesn't. A few
// people do get the latest though. In 1984 2400 baud modems became
// available, so some people had them, but many didn't. A BBS list
// from 1986 shows operators were mostly 300 and 1200, but some were
// using 2400. The next 5 years were the hayday of the 2400."
//
// https://forum.vcfed.org/index.php?threads/the-2400-baud-modem.44241/
int baud_rate = 2400; // -b 2400
const char *from_charset = "CP437"; // -f CP437
const char *to_charset = "UTF-8"; // -t UTF-8
int main(int argc, char* argv[]) {
int opt;
while ((opt = getopt(argc, argv, "hb:f:t:")) != -1) {
switch (opt) {
case 'b': {
char *endptr;
char* endptr;
double rate = strtod(optarg, &endptr);
if (*endptr == 'k') {
rate *= 1e3;
@ -132,25 +272,7 @@ int main(int argc, char *argv[]) {
to_charset = optarg;
break;
case 'h':
fprintf(stderr, "\
Usage:\n\
%s [-b BAUD] [-f CP437] [-t UTF-8] FILE...\n\
\n\
Supported charsets:\n\
utf8, wchart, ucs2be, ucs2le, utf16be, utf16le, ucs4be, ucs4le,\n\
ascii, utf16, ucs4, ucs2, eucjp, shiftjis, iso2022jp, gb18030, gbk,\n\
gb2312, big5, euckr, iso88591, latin1, iso88592, iso88593, iso88594,\n\
iso88595, iso88596, iso88597, iso88598, iso88599, iso885910,\n\
iso885911, iso885913, iso885914, iso885915, iso885916, cp1250,\n\
windows1250, cp1251, windows1251, cp1252, windows1252, cp1253,\n\
windows1253, cp1254, windows1254, cp1255, windows1255, cp1256,\n\
windows1256, cp1257, windows1257, cp1258, windows1258, koi8r, koi8u,\n\
cp437, cp850, cp866, ibm1047, cp1047.\n\
\n\
See also:\n\
http://www.textfiles.com/art/\n\
\n",
argv[0]);
fprintf(stderr, HELP, baud_rate, from_charset, to_charset);
exit(0);
default:
fprintf(stderr, "protip: pass the -h flag for help\n");
@ -162,6 +284,7 @@ See also:\n\
exit(1);
}
// create character transcoder
iconv_t cd = iconv_open(to_charset, from_charset);
if (cd == (iconv_t)-1) {
fprintf(stderr, "error: conversion from %s to %s not supported\n",
@ -180,28 +303,50 @@ See also:\n\
tcsetattr(STDIN_FILENO, TCSANOW, &t2);
}
// make stdin nonblocking
fcntl(STDIN_FILENO, F_SETFL, fcntl(STDIN_FILENO, F_GETFL) | O_NONBLOCK);
// hide cursor
write(STDOUT_FILENO, "\e[?25l", 6);
// Process each file specified on the command line
for (int i = optind; i < argc && !got_signal; i++) {
for (int i = optind; i < argc && !done; i++) {
// open file
int fd = open(argv[i], O_RDONLY);
if (fd == -1) {
perror(argv[i]);
continue;
break;
}
process_file(argv[i], fd, cd, baud_rate);
// wait between files
if (i > optind)
sleep(1);
print("\33[?25l"); // hide cursor
print("\33[H"); // move cursor to top-left
print("\33[J"); // erase display forward
print("\33[1;24r"); // set scrolling region to first 24 lines
print("\33[?7h"); // enable auto-wrap mode
print("\33[?3l"); // 80 column mode (deccolm) vt100
print("\33[H"); // move cursor to top-left, again
// get busy
process_file(argv[i], fd, cd);
close(fd);
}
// cleanup
iconv_close(cd);
// show cursor
write(STDOUT_FILENO, "\e[?25h", 6);
print("\33[s"); // save cursor position
print("\33[?25h"); // show cursor
print("\33[0m"); // reset text attributes (color, bold, etc.)
print("\33[?1049l"); // exit alternate screen mode
print("\33(B"); // exit line drawing and other alt charset modes
print("\33[r"); // reset scrolling region
print("\33[?2004l"); // turn off bracketed paste mode
print("\33[4l"); // exit insert mode
print("\33[?1l\33>"); // exit application keypad mode
print("\33[?7h"); // reset text wrapping mode
print("\33[?12l"); // reset cursor blinking mode
print("\33[?6l"); // reset origin mode
print("\33[20l"); // reset auto newline mode
print("\33[u"); // restore cursor position
// restore terminal
tcsetattr(STDIN_FILENO, TCSANOW, &t);