From 775944a2d03f3f24357e90394a2bc4df0103bcff Mon Sep 17 00:00:00 2001
From: Justine Tunney <jtunney@gmail.com>
Date: Sat, 17 Sep 2022 01:37:33 -0700
Subject: [PATCH] Fix important bugs in redbean

This change upgrades to the latest Chromium Zlib, fixes bugs in redbean,
and introduces better support for reverse proxies like Cloudflare. This
change improves the security of redbean and it's recommended that users
upgrade to the release that'll follow. This change also updates the docs
to clarify how to use the security tools redbean provides e.g. pledge(),
unveil(), and the MODE=asan builds which improve memory safety.
---
 libc/str/crc32z.c                         |    7 -
 net/http/parseforwarded.c                 |   12 +-
 test/net/http/parseforwarded_test.c       |    2 +-
 test/net/http/parseurl_test.c             |    2 +-
 third_party/zlib/README.cosmo             |   29 +
 third_party/zlib/adler32.c                |  340 +-
 third_party/zlib/chunkcopy.internal.h     |  369 --
 third_party/zlib/compress.c               |  167 +-
 third_party/zlib/crc32.c                  | 1258 ++++++-
 third_party/zlib/crcfold.c                |  483 ---
 third_party/zlib/deflate.c                | 3724 +++++++++++----------
 third_party/zlib/deflate.internal.h       |  480 +--
 third_party/zlib/deflateinit.S            |   28 -
 third_party/zlib/deflatesse.c             |  173 -
 third_party/zlib/infback.c                | 1116 +++---
 third_party/zlib/inffast.c                |  617 ++--
 third_party/zlib/inffast.internal.h       |   15 +-
 third_party/zlib/inffastchunk.c           |  314 --
 third_party/zlib/inffixed.c               |  122 -
 third_party/zlib/inffixed.inc             |   96 +
 third_party/zlib/inflate.c                | 2670 ++++++++-------
 third_party/zlib/inflate.internal.h       |  220 +-
 third_party/zlib/inflateinit.S            |   25 -
 third_party/zlib/inftrees.c               |  570 ++--
 third_party/zlib/inftrees.internal.h      |   76 +-
 third_party/zlib/insert_string.internal.h |  141 +
 third_party/zlib/internal.h               |   19 +-
 third_party/zlib/kdistcode.S              |   67 -
 third_party/zlib/klengthcode.S            |   67 -
 third_party/zlib/kstaticdtree.c           |   10 -
 third_party/zlib/kstaticltree.c           |   62 -
 third_party/zlib/macros.internal.h        |  104 +
 third_party/zlib/trees.c                  | 1666 ++++-----
 third_party/zlib/trees.inc                |  128 +
 third_party/zlib/zalloc.c                 |    9 +-
 third_party/zlib/zconf.h                  |    5 +-
 third_party/zlib/zlib.h                   |   22 +-
 third_party/zlib/zlib.mk                  |    4 +-
 third_party/zlib/zutil.internal.h         |   27 +
 tool/net/help.txt                         |   40 +-
 tool/net/lmaxmind.c                       |    9 +-
 tool/net/redbean.c                        |  151 +-
 42 files changed, 8148 insertions(+), 7298 deletions(-)
 create mode 100644 third_party/zlib/README.cosmo
 delete mode 100644 third_party/zlib/chunkcopy.internal.h
 delete mode 100644 third_party/zlib/crcfold.c
 delete mode 100644 third_party/zlib/deflateinit.S
 delete mode 100644 third_party/zlib/deflatesse.c
 delete mode 100644 third_party/zlib/inffastchunk.c
 delete mode 100644 third_party/zlib/inffixed.c
 create mode 100644 third_party/zlib/inffixed.inc
 delete mode 100644 third_party/zlib/inflateinit.S
 create mode 100644 third_party/zlib/insert_string.internal.h
 delete mode 100644 third_party/zlib/kdistcode.S
 delete mode 100644 third_party/zlib/klengthcode.S
 delete mode 100644 third_party/zlib/kstaticdtree.c
 delete mode 100644 third_party/zlib/kstaticltree.c
 create mode 100644 third_party/zlib/macros.internal.h
 create mode 100644 third_party/zlib/trees.inc

diff --git a/libc/str/crc32z.c b/libc/str/crc32z.c
index f35cb121d..ff332e2ac 100644
--- a/libc/str/crc32z.c
+++ b/libc/str/crc32z.c
@@ -25,13 +25,6 @@
 #include "libc/nexgen32e/x86feature.h"
 #include "libc/str/str.h"
 
-static inline noasan uint64_t WildRead64(const signed char *p) {
-  return (uint64_t)(255 & p[7]) << 070 | (uint64_t)(255 & p[6]) << 060 |
-         (uint64_t)(255 & p[5]) << 050 | (uint64_t)(255 & p[4]) << 040 |
-         (uint64_t)(255 & p[3]) << 030 | (uint64_t)(255 & p[2]) << 020 |
-         (uint64_t)(255 & p[1]) << 010 | (uint64_t)(255 & p[0]) << 000;
-}
-
 /**
  * Computes Phil Katz CRC-32 used by zip/zlib/gzip/etc.
  *
diff --git a/net/http/parseforwarded.c b/net/http/parseforwarded.c
index d5aa2b052..b7bbb05a0 100644
--- a/net/http/parseforwarded.c
+++ b/net/http/parseforwarded.c
@@ -43,16 +43,18 @@ int ParseForwarded(const char *s, size_t n, uint32_t *ip, uint16_t *port) {
   if (n == -1) n = s ? strlen(s) : 0;
   if (n) {
     t = x = i = 0;
-    if ((r = strrchr(s, ','))) {
+    if ((r = memrchr(s, ',', n))) {
       i = r - s;
-      if ((s[++i] & 255) == ' ') ++i; // skip optional space
+      if ((s[++i] & 255) == ' ') ++i;  // skip optional space
     }
     do {
       c = s[i++] & 255;
       if (isdigit(c)) {
         t *= 10;
         t += c - '0';
-        if (t > 255) return -1;
+        if (t > 255) {
+          return -1;
+        }
       } else if (c == '.') {
         x <<= 8;
         x |= t;
@@ -72,7 +74,9 @@ int ParseForwarded(const char *s, size_t n, uint32_t *ip, uint16_t *port) {
         if (isdigit(c)) {
           t *= 10;
           t += c - '0';
-          if (t > 65535) return -1;
+          if (t > 65535) {
+            return -1;
+          }
         } else {
           return -1;
         }
diff --git a/test/net/http/parseforwarded_test.c b/test/net/http/parseforwarded_test.c
index f922f12aa..a41046d1b 100644
--- a/test/net/http/parseforwarded_test.c
+++ b/test/net/http/parseforwarded_test.c
@@ -56,5 +56,5 @@ BENCH(ParseForwarded, bench) {
   uint32_t ip;
   uint16_t port;
   EZBENCH2("ParseForwarded 80", donothing,
-           ParseForwarded("203.0.113.42:31337", 20, &ip, &port));
+           ParseForwarded("203.0.113.42:31337", 18, &ip, &port));
 }
diff --git a/test/net/http/parseurl_test.c b/test/net/http/parseurl_test.c
index 18f6dcae0..cb8b48720 100644
--- a/test/net/http/parseurl_test.c
+++ b/test/net/http/parseurl_test.c
@@ -17,9 +17,9 @@
 │ PERFORMANCE OF THIS SOFTWARE.                                                │
 ╚─────────────────────────────────────────────────────────────────────────────*/
 #include "libc/limits.h"
+#include "libc/mem/gc.internal.h"
 #include "libc/mem/mem.h"
 #include "libc/stdio/rand.h"
-#include "libc/mem/gc.internal.h"
 #include "libc/str/str.h"
 #include "libc/testlib/ezbench.h"
 #include "libc/testlib/hyperion.h"
diff --git a/third_party/zlib/README.cosmo b/third_party/zlib/README.cosmo
new file mode 100644
index 000000000..d2b52aa4e
--- /dev/null
+++ b/third_party/zlib/README.cosmo
@@ -0,0 +1,29 @@
+DESCRIPTION
+
+  zlib implements the deflate compression algorithm.
+
+ORIGIN
+
+  https://chromium.googlesource.com/chromium/src/third_party/zlib
+  commit 8f22e90f007a7dd466b426513725c13191248315
+  Author: Hans Wennborg <hans@chromium.org>
+  Date:   Fri Sep 16 16:14:51 2022 +0000
+
+    [zlib][fuzz] Cap the input size for zlib_inflate_with_header_fuzzer
+    
+    To prevent timeouts when processing large inputs with small chunk sizes.
+    
+    Bug: 1362206
+    Change-Id: Ie21ea48abf85ee49897243857bf84b0f32d24bd5
+    Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/3899099
+    Reviewed-by: Adenilson Cavalcanti <cavalcantii@chromium.org>
+    Auto-Submit: Hans Wennborg <hans@chromium.org>
+    Commit-Queue: Adenilson Cavalcanti <cavalcantii@chromium.org>
+    Cr-Commit-Position: refs/heads/main@{#1048044}
+    NOKEYCHECK=True
+    GitOrigin-RevId: fd75b8c2768e7cc3a3e7a06bc563bb03c5ba0ec2
+
+LOCAL CHANGES
+
+  - Changed Trace() calls to use kprintf()
+  - We use our own crc32() implementation from LIBC_STR
diff --git a/third_party/zlib/adler32.c b/third_party/zlib/adler32.c
index 228cd2681..dbe5c1c4a 100644
--- a/third_party/zlib/adler32.c
+++ b/third_party/zlib/adler32.c
@@ -1,161 +1,217 @@
-/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:4;tab-width:8;coding:utf-8 -*-│
-│vi: set net ft=c ts=4 sts=4 sw=4 fenc=utf-8                                :vi│
-╞══════════════════════════════════════════════════════════════════════════════╡
-│ Copyright 1995-2016 Mark Adler                                               │
-│ Copyright 2017 The Chromium Authors                                          │
-│ Use of this source code is governed by the BSD-style licenses that can       │
-│ be found in the third_party/zlib/LICENSE file.                               │
-╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/dce.h"
-#include "libc/nexgen32e/x86feature.h"
-#include "third_party/zlib/internal.h"
+/* adler32.c -- compute the Adler-32 checksum of a data stream
+ * Copyright (C) 1995-2011, 2016 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+#include "third_party/zlib/macros.internal.h"
+#include "third_party/zlib/zconf.h"
 #include "third_party/zlib/zutil.internal.h"
+// clang-format off
 
-asm(".ident\t\"\\n\\n\
-zlib (zlib License)\\n\
-Copyright 1995-2017 Jean-loup Gailly and Mark Adler\"");
-asm(".include \"libc/disclaimer.inc\"");
+/* @(#) $Id$ */
 
-#define BASE 65521U /* largest prime smaller than 65536 */
-#define NMAX 5552   /* largest n such that 255n(n+1)/2+(n+1)(BASE-1)<=2^32-1 */
 
-#define DO1(buf, i)    \
-  {                    \
-    adler += (buf)[i]; \
-    sum2 += adler;     \
-  }
-#define DO2(buf, i) \
-  DO1(buf, i);      \
-  DO1(buf, i + 1);
-#define DO4(buf, i) \
-  DO2(buf, i);      \
-  DO2(buf, i + 2);
-#define DO8(buf, i) \
-  DO4(buf, i);      \
-  DO4(buf, i + 4);
-#define DO16(buf) \
-  DO8(buf, 0);    \
-  DO8(buf, 8);
+local uLong adler32_combine_ OF((uLong adler1, uLong adler2, z_off64_t len2));
+
+#define BASE 65521U     /* largest prime smaller than 65536 */
+#define NMAX 5552
+/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
+
+#define DO1(buf,i)  {adler += (buf)[i]; sum2 += adler;}
+#define DO2(buf,i)  DO1(buf,i); DO1(buf,i+1);
+#define DO4(buf,i)  DO2(buf,i); DO2(buf,i+2);
+#define DO8(buf,i)  DO4(buf,i); DO4(buf,i+4);
+#define DO16(buf)   DO8(buf,0); DO8(buf,8);
 
 /* use NO_DIVIDE if your processor does not do division in hardware --
    try it both ways to see which is faster */
 #ifdef NO_DIVIDE
 /* note that this assumes BASE is 65521, where 65536 % 65521 == 15
    (thank you to John Reiser for pointing this out) */
-#define CHOP(a)                  \
-  do {                           \
-    unsigned long tmp = a >> 16; \
-    a &= 0xffffUL;               \
-    a += (tmp << 4) - tmp;       \
-  } while (0)
-#define MOD28(a)              \
-  do {                        \
-    CHOP(a);                  \
-    if (a >= BASE) a -= BASE; \
-  } while (0)
-#define MOD(a) \
-  do {         \
-    CHOP(a);   \
-    MOD28(a);  \
-  } while (0)
-#define MOD63(a)                            \
-  do { /* this assumes a is not negative */ \
-    int64_t tmp = a >> 32;                  \
-    a &= 0xffffffffL;                       \
-    a += (tmp << 8) - (tmp << 5) + tmp;     \
-    tmp = a >> 16;                          \
-    a &= 0xffffL;                           \
-    a += (tmp << 4) - tmp;                  \
-    tmp = a >> 16;                          \
-    a &= 0xffffL;                           \
-    a += (tmp << 4) - tmp;                  \
-    if (a >= BASE) a -= BASE;               \
-  } while (0)
+#  define CHOP(a) \
+    do { \
+        unsigned long tmp = a >> 16; \
+        a &= 0xffffUL; \
+        a += (tmp << 4) - tmp; \
+    } while (0)
+#  define MOD28(a) \
+    do { \
+        CHOP(a); \
+        if (a >= BASE) a -= BASE; \
+    } while (0)
+#  define MOD(a) \
+    do { \
+        CHOP(a); \
+        MOD28(a); \
+    } while (0)
+#  define MOD63(a) \
+    do { /* this assumes a is not negative */ \
+        z_off64_t tmp = a >> 32; \
+        a &= 0xffffffffL; \
+        a += (tmp << 8) - (tmp << 5) + tmp; \
+        tmp = a >> 16; \
+        a &= 0xffffL; \
+        a += (tmp << 4) - tmp; \
+        tmp = a >> 16; \
+        a &= 0xffffL; \
+        a += (tmp << 4) - tmp; \
+        if (a >= BASE) a -= BASE; \
+    } while (0)
 #else
-#define MOD(a)   a %= BASE
-#define MOD28(a) a %= BASE
-#define MOD63(a) a %= BASE
+#  define MOD(a) a %= BASE
+#  define MOD28(a) a %= BASE
+#  define MOD63(a) a %= BASE
 #endif
 
-uLong adler32_z(uLong adler, const Bytef *buf, size_t len) {
-  return adler32(adler, buf, len);
+//# include "cpu_features.h"
+#if defined(ADLER32_SIMD_SSSE3) || defined(ADLER32_SIMD_NEON)
+# include "adler32_simd.h"
+#endif
+
+/* ========================================================================= */
+uLong ZEXPORT adler32_z(adler, buf, len)
+    uLong adler;
+    const Bytef *buf;
+    z_size_t len;
+{
+    unsigned long sum2;
+    unsigned n;
+
+#if defined(ADLER32_SIMD_SSSE3)
+    if (buf != Z_NULL && len >= 64 && x86_cpu_enable_ssse3)
+        return adler32_simd_(adler, buf, len);
+#elif defined(ADLER32_SIMD_NEON)
+    if (buf != Z_NULL && len >= 64)
+        return adler32_simd_(adler, buf, len);
+#endif
+
+    /* split Adler-32 into component sums */
+    sum2 = (adler >> 16) & 0xffff;
+    adler &= 0xffff;
+
+    /* in case user likes doing a byte at a time, keep it fast */
+    if (len == 1) {
+        adler += buf[0];
+        if (adler >= BASE)
+            adler -= BASE;
+        sum2 += adler;
+        if (sum2 >= BASE)
+            sum2 -= BASE;
+        return adler | (sum2 << 16);
+    }
+
+#if defined(ADLER32_SIMD_SSSE3)
+    /*
+     * Use SSSE3 to compute the adler32. Since this routine can be
+     * freely used, check CPU features here. zlib convention is to
+     * call adler32(0, NULL, 0), before making calls to adler32().
+     * So this is a good early (and infrequent) place to cache CPU
+     * features for those later, more interesting adler32() calls.
+     */
+    if (buf == Z_NULL) {
+        if (!len) /* Assume user is calling adler32(0, NULL, 0); */
+            cpu_check_features();
+        return 1L;
+    }
+#else
+    /* initial Adler-32 value (deferred check for len == 1 speed) */
+    if (buf == Z_NULL)
+        return 1L;
+#endif
+
+    /* in case short lengths are provided, keep it somewhat fast */
+    if (len < 16) {
+        while (len--) {
+            adler += *buf++;
+            sum2 += adler;
+        }
+        if (adler >= BASE)
+            adler -= BASE;
+        MOD28(sum2);            /* only added so many BASE's */
+        return adler | (sum2 << 16);
+    }
+
+    /* do length NMAX blocks -- requires just one modulo operation */
+    while (len >= NMAX) {
+        len -= NMAX;
+        n = NMAX / 16;          /* NMAX is divisible by 16 */
+        do {
+            DO16(buf);          /* 16 sums unrolled */
+            buf += 16;
+        } while (--n);
+        MOD(adler);
+        MOD(sum2);
+    }
+
+    /* do remaining bytes (less than NMAX, still just one modulo) */
+    if (len) {                  /* avoid modulos if none remaining */
+        while (len >= 16) {
+            len -= 16;
+            DO16(buf);
+            buf += 16;
+        }
+        while (len--) {
+            adler += *buf++;
+            sum2 += adler;
+        }
+        MOD(adler);
+        MOD(sum2);
+    }
+
+    /* return recombined sums */
+    return adler | (sum2 << 16);
 }
 
-uLong adler32(uLong adler, const Bytef *buf, uInt len) {
-  unsigned long sum2;
-  unsigned n;
-  if (!IsTiny() && X86_HAVE(SSSE3) && buf && len >= 64) {
-    return adler32_simd_(adler, buf, len);
-  }
-  /* split Adler-32 into component sums */
-  sum2 = (adler >> 16) & 0xffff;
-  adler &= 0xffff;
-  /* in case user likes doing a byte at a time, keep it fast */
-  if (len == 1) {
-    adler += buf[0];
-    if (adler >= BASE) adler -= BASE;
-    sum2 += adler;
+/* ========================================================================= */
+uLong ZEXPORT adler32(adler, buf, len)
+    uLong adler;
+    const Bytef *buf;
+    uInt len;
+{
+    return adler32_z(adler, buf, len);
+}
+
+/* ========================================================================= */
+local uLong adler32_combine_(adler1, adler2, len2)
+    uLong adler1;
+    uLong adler2;
+    z_off64_t len2;
+{
+    unsigned long sum1;
+    unsigned long sum2;
+    unsigned rem;
+
+    /* for negative len, return invalid adler32 as a clue for debugging */
+    if (len2 < 0)
+        return 0xffffffffUL;
+
+    /* the derivation of this formula is left as an exercise for the reader */
+    MOD63(len2);                /* assumes len2 >= 0 */
+    rem = (unsigned)len2;
+    sum1 = adler1 & 0xffff;
+    sum2 = rem * sum1;
+    MOD(sum2);
+    sum1 += (adler2 & 0xffff) + BASE - 1;
+    sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem;
+    if (sum1 >= BASE) sum1 -= BASE;
+    if (sum1 >= BASE) sum1 -= BASE;
+    if (sum2 >= ((unsigned long)BASE << 1)) sum2 -= ((unsigned long)BASE << 1);
     if (sum2 >= BASE) sum2 -= BASE;
-    return adler | (sum2 << 16);
-  }
-  /* initial Adler-32 value (deferred check for len == 1 speed) */
-  if (buf == Z_NULL) return 1L;
-  /* in case short lengths are provided, keep it somewhat fast */
-  if (len < 16) {
-    while (len--) {
-      adler += *buf++;
-      sum2 += adler;
-    }
-    if (adler >= BASE) adler -= BASE;
-    MOD28(sum2); /* only added so many BASE's */
-    return adler | (sum2 << 16);
-  }
-  /* do length NMAX blocks -- requires just one modulo operation */
-  while (len >= NMAX) {
-    len -= NMAX;
-    n = NMAX / 16; /* NMAX is divisible by 16 */
-    do {
-      DO16(buf); /* 16 sums unrolled */
-      buf += 16;
-    } while (--n);
-    MOD(adler);
-    MOD(sum2);
-  }
-  /* do remaining bytes (less than NMAX, still just one modulo) */
-  if (len) { /* avoid modulos if none remaining */
-    while (len >= 16) {
-      len -= 16;
-      DO16(buf);
-      buf += 16;
-    }
-    while (len--) {
-      adler += *buf++;
-      sum2 += adler;
-    }
-    MOD(adler);
-    MOD(sum2);
-  }
-  /* return recombined sums */
-  return adler | (sum2 << 16);
+    return sum1 | (sum2 << 16);
 }
 
-uLong adler32_combine(uLong adler1, uLong adler2, int64_t len2) {
-  unsigned long sum1;
-  unsigned long sum2;
-  unsigned rem;
-  /* for negative len, return invalid adler32 as a clue for debugging */
-  if (len2 < 0) return 0xffffffffUL;
-  /* the derivation of this formula is left as an exercise for the reader */
-  MOD63(len2); /* assumes len2 >= 0 */
-  rem = (unsigned)len2;
-  sum1 = adler1 & 0xffff;
-  sum2 = rem * sum1;
-  MOD(sum2);
-  sum1 += (adler2 & 0xffff) + BASE - 1;
-  sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem;
-  if (sum1 >= BASE) sum1 -= BASE;
-  if (sum1 >= BASE) sum1 -= BASE;
-  if (sum2 >= ((unsigned long)BASE << 1)) sum2 -= ((unsigned long)BASE << 1);
-  if (sum2 >= BASE) sum2 -= BASE;
-  return sum1 | (sum2 << 16);
+/* ========================================================================= */
+uLong ZEXPORT adler32_combine(adler1, adler2, len2)
+    uLong adler1;
+    uLong adler2;
+    z_off_t len2;
+{
+    return adler32_combine_(adler1, adler2, len2);
+}
+
+uLong ZEXPORT adler32_combine64(adler1, adler2, len2)
+    uLong adler1;
+    uLong adler2;
+    z_off64_t len2;
+{
+    return adler32_combine_(adler1, adler2, len2);
 }
diff --git a/third_party/zlib/chunkcopy.internal.h b/third_party/zlib/chunkcopy.internal.h
deleted file mode 100644
index db0d53e61..000000000
--- a/third_party/zlib/chunkcopy.internal.h
+++ /dev/null
@@ -1,369 +0,0 @@
-#ifndef THIRD_PARTY_ZLIB_CHUNKCOPY_H
-#define THIRD_PARTY_ZLIB_CHUNKCOPY_H
-#include "libc/intrin/emmintrin.internal.h"
-#include "third_party/zlib/zutil.internal.h"
-
-asm(".ident\t\"\\n\\n\
-Chromium (BSD-3 License)\\n\
-Copyright 2017 The Chromium Authors\"");
-asm(".include \"libc/disclaimer.inc\"");
-
-/**
- * @fileoverview fast chunk copy and set operations
- *
- * The chunk-copy code above deals with writing the decoded DEFLATE data
- * to the output with SIMD methods to increase decode speed. Reading the
- * input to the DEFLATE decoder with a wide, SIMD method can also
- * increase decode speed. This option is supported on little endian
- * machines, and reads the input data in 64-bit (8 byte) chunks.
- */
-
-#define Z_BUILTIN_MEMCPY              __builtin_memcpy
-#define Z_RESTRICT                    restrict
-#define Z_STATIC_ASSERT(name, assert) typedef char name[(assert) ? 1 : -1]
-
-#if !(__ASSEMBLER__ + __LINKER__ + 0)
-
-typedef long long z_vec128i_t _Vector_size(16);
-
-/*
- * chunk copy type: the z_vec128i_t type size should be exactly 128-bits
- * and equal to CHUNKCOPY_CHUNK_SIZE.
- */
-#define CHUNKCOPY_CHUNK_SIZE sizeof(z_vec128i_t)
-
-Z_STATIC_ASSERT(vector_128_bits_wide,
-                CHUNKCOPY_CHUNK_SIZE == sizeof(int8_t) * 16);
-
-/**
- * Ask the compiler to perform a wide, unaligned load with a machine
- * instruction appropriate for the z_vec128i_t type.
- */
-static inline z_vec128i_t loadchunk(const unsigned char *s) {
-  z_vec128i_t v;
-  Z_BUILTIN_MEMCPY(&v, s, sizeof(v));
-  return v;
-}
-
-/**
- * Ask the compiler to perform a wide, unaligned store with a machine
- * instruction appropriate for the z_vec128i_t type.
- */
-static inline void storechunk(unsigned char *d, const z_vec128i_t v) {
-  Z_BUILTIN_MEMCPY(d, &v, sizeof(v));
-}
-
-/**
- * Perform a memcpy-like operation, assuming that length is non-zero and
- * that it's OK to overwrite at least CHUNKCOPY_CHUNK_SIZE bytes of
- * output even if the length is shorter than this.
- *
- * It also guarantees that it will properly unroll the data if the distance
- * between `out` and `from` is at least CHUNKCOPY_CHUNK_SIZE, which we rely on
- * in chunkcopy_relaxed().
- *
- * Aside from better memory bus utilisation, this means that short copies
- * (CHUNKCOPY_CHUNK_SIZE bytes or fewer) will fall straight through the loop
- * without iteration, which will hopefully make the branch prediction more
- * reliable.
- */
-static inline unsigned char *chunkcopy_core(unsigned char *out,
-                                            const unsigned char *from,
-                                            unsigned len) {
-  const int bump = (--len % CHUNKCOPY_CHUNK_SIZE) + 1;
-  storechunk(out, loadchunk(from));
-  out += bump;
-  from += bump;
-  len /= CHUNKCOPY_CHUNK_SIZE;
-  while (len-- > 0) {
-    storechunk(out, loadchunk(from));
-    out += CHUNKCOPY_CHUNK_SIZE;
-    from += CHUNKCOPY_CHUNK_SIZE;
-  }
-  return out;
-}
-
-/**
- * Like chunkcopy_core(), but avoid writing beyond of legal output.
- *
- * Accepts an additional pointer to the end of safe output.  A generic safe
- * copy would use (out + len), but it's normally the case that the end of the
- * output buffer is beyond the end of the current copy, and this can still be
- * exploited.
- */
-static inline unsigned char *chunkcopy_core_safe(unsigned char *out,
-                                                 const unsigned char *from,
-                                                 unsigned len,
-                                                 unsigned char *limit) {
-  Assert(out + len <= limit, "chunk copy exceeds safety limit");
-  if ((limit - out) < (ptrdiff_t)CHUNKCOPY_CHUNK_SIZE) {
-    const unsigned char *Z_RESTRICT rfrom = from;
-    Assert((uintptr_t)out - (uintptr_t)from >= len,
-           "invalid restrict in chunkcopy_core_safe");
-    Assert((uintptr_t)from - (uintptr_t)out >= len,
-           "invalid restrict in chunkcopy_core_safe");
-    if (len & 8) {
-      Z_BUILTIN_MEMCPY(out, rfrom, 8);
-      out += 8;
-      rfrom += 8;
-    }
-    if (len & 4) {
-      Z_BUILTIN_MEMCPY(out, rfrom, 4);
-      out += 4;
-      rfrom += 4;
-    }
-    if (len & 2) {
-      Z_BUILTIN_MEMCPY(out, rfrom, 2);
-      out += 2;
-      rfrom += 2;
-    }
-    if (len & 1) {
-      *out++ = *rfrom++;
-    }
-    return out;
-  }
-  return chunkcopy_core(out, from, len);
-}
-
-/**
- * Perform short copies until distance can be rewritten as being at
- * least CHUNKCOPY_CHUNK_SIZE.
- *
- * Assumes it's OK to overwrite at least the first 2*CHUNKCOPY_CHUNK_SIZE
- * bytes of output even if the copy is shorter than this.  This assumption
- * holds within zlib inflate_fast(), which starts every iteration with at
- * least 258 bytes of output space available (258 being the maximum length
- * output from a single token; see inffast.c).
- */
-static inline unsigned char *chunkunroll_relaxed(unsigned char *out,
-                                                 unsigned *dist,
-                                                 unsigned *len) {
-  const unsigned char *from = out - *dist;
-  while (*dist < *len && *dist < CHUNKCOPY_CHUNK_SIZE) {
-    storechunk(out, loadchunk(from));
-    out += *dist;
-    *len -= *dist;
-    *dist += *dist;
-  }
-  return out;
-}
-
-/**
- * v_load64_dup(): load *src as an unaligned 64-bit int and duplicate it
- * in every 64-bit component of the 128-bit result (64-bit int splat).
- */
-static inline z_vec128i_t v_load64_dup(const void *src) {
-  int64_t i64;
-  Z_BUILTIN_MEMCPY(&i64, src, sizeof(i64));
-  return _mm_set1_epi64x(i64);
-}
-
-/**
- * v_load32_dup(): load *src as an unaligned 32-bit int and duplicate it
- * in every 32-bit component of the 128-bit result (32-bit int splat).
- */
-static inline z_vec128i_t v_load32_dup(const void *src) {
-  int32_t i32;
-  Z_BUILTIN_MEMCPY(&i32, src, sizeof(i32));
-  return _mm_set1_epi32(i32);
-}
-
-/**
- * v_load16_dup(): load *src as an unaligned 16-bit int and duplicate it
- * in every 16-bit component of the 128-bit result (16-bit int splat).
- */
-static inline z_vec128i_t v_load16_dup(const void *src) {
-  int16_t i16;
-  Z_BUILTIN_MEMCPY(&i16, src, sizeof(i16));
-  return _mm_set1_epi16(i16);
-}
-
-/**
- * v_load8_dup(): load the 8-bit int *src and duplicate it in every
- * 8-bit component of the 128-bit result (8-bit int splat).
- */
-static inline z_vec128i_t v_load8_dup(const void *src) {
-  return _mm_set1_epi8(*(const char *)src);
-}
-
-/**
- * v_store_128(): store the 128-bit vec in a memory destination (that
- * might not be 16-byte aligned) void* out.
- */
-static inline void v_store_128(void *out, const z_vec128i_t vec) {
-  _mm_storeu_si128((__m128i *)out, vec);
-}
-
-/**
- * Perform an overlapping copy which behaves as a memset() operation,
- * but supporting periods other than one, and assume that length is
- * non-zero and that it's OK to overwrite at least
- * CHUNKCOPY_CHUNK_SIZE*3 bytes of output even if the length is shorter
- * than this.
- */
-static inline unsigned char *chunkset_core(unsigned char *out, unsigned period,
-                                           unsigned len) {
-  z_vec128i_t v;
-  const int bump = ((len - 1) % sizeof(v)) + 1;
-  switch (period) {
-    case 1:
-      v = v_load8_dup(out - 1);
-      v_store_128(out, v);
-      out += bump;
-      len -= bump;
-      while (len > 0) {
-        v_store_128(out, v);
-        out += sizeof(v);
-        len -= sizeof(v);
-      }
-      return out;
-    case 2:
-      v = v_load16_dup(out - 2);
-      v_store_128(out, v);
-      out += bump;
-      len -= bump;
-      if (len > 0) {
-        v = v_load16_dup(out - 2);
-        do {
-          v_store_128(out, v);
-          out += sizeof(v);
-          len -= sizeof(v);
-        } while (len > 0);
-      }
-      return out;
-    case 4:
-      v = v_load32_dup(out - 4);
-      v_store_128(out, v);
-      out += bump;
-      len -= bump;
-      if (len > 0) {
-        v = v_load32_dup(out - 4);
-        do {
-          v_store_128(out, v);
-          out += sizeof(v);
-          len -= sizeof(v);
-        } while (len > 0);
-      }
-      return out;
-    case 8:
-      v = v_load64_dup(out - 8);
-      v_store_128(out, v);
-      out += bump;
-      len -= bump;
-      if (len > 0) {
-        v = v_load64_dup(out - 8);
-        do {
-          v_store_128(out, v);
-          out += sizeof(v);
-          len -= sizeof(v);
-        } while (len > 0);
-      }
-      return out;
-  }
-  out = chunkunroll_relaxed(out, &period, &len);
-  return chunkcopy_core(out, out - period, len);
-}
-
-/**
- * Perform a memcpy-like operation, but assume that length is non-zero
- * and that it's OK to overwrite at least CHUNKCOPY_CHUNK_SIZE bytes of
- * output even if the length is shorter than this.
- *
- * Unlike chunkcopy_core() above, no guarantee is made regarding the behaviour
- * of overlapping buffers, regardless of the distance between the pointers.
- * This is reflected in the `restrict`-qualified pointers, allowing the
- * compiler to re-order loads and stores.
- */
-static inline unsigned char *chunkcopy_relaxed(
-    unsigned char *Z_RESTRICT out, const unsigned char *Z_RESTRICT from,
-    unsigned len) {
-  Assert((uintptr_t)out - (uintptr_t)from >= len,
-         "invalid restrict in chunkcopy_relaxed");
-  Assert((uintptr_t)from - (uintptr_t)out >= len,
-         "invalid restrict in chunkcopy_relaxed");
-  return chunkcopy_core(out, from, len);
-}
-
-/**
- * Like chunkcopy_relaxed(), but avoid writing beyond of legal output.
- *
- * Unlike chunkcopy_core_safe() above, no guarantee is made regarding the
- * behaviour of overlapping buffers, regardless of the distance between the
- * pointers.  This is reflected in the `restrict`-qualified pointers, allowing
- * the compiler to re-order loads and stores.
- *
- * Accepts an additional pointer to the end of safe output.  A generic safe
- * copy would use (out + len), but it's normally the case that the end of the
- * output buffer is beyond the end of the current copy, and this can still be
- * exploited.
- */
-static inline unsigned char *chunkcopy_safe(
-    unsigned char *out, const unsigned char *Z_RESTRICT from, unsigned len,
-    unsigned char *limit) {
-  Assert(out + len <= limit, "chunk copy exceeds safety limit");
-  Assert((uintptr_t)out - (uintptr_t)from >= len,
-         "invalid restrict in chunkcopy_safe");
-  Assert((uintptr_t)from - (uintptr_t)out >= len,
-         "invalid restrict in chunkcopy_safe");
-  return chunkcopy_core_safe(out, from, len, limit);
-}
-
-/**
- * Perform chunky copy within the same buffer, where the source and
- * destination may potentially overlap.
- *
- * Assumes that len > 0 on entry, and that it's safe to write at least
- * CHUNKCOPY_CHUNK_SIZE*3 bytes to the output.
- */
-static inline unsigned char *chunkcopy_lapped_relaxed(unsigned char *out,
-                                                      unsigned dist,
-                                                      unsigned len) {
-  if (dist < len && dist < CHUNKCOPY_CHUNK_SIZE) {
-    return chunkset_core(out, dist, len);
-  }
-  return chunkcopy_core(out, out - dist, len);
-}
-
-/**
- * Behave like chunkcopy_lapped_relaxed(), but avoid writing beyond of
- * legal output.
- *
- * Accepts an additional pointer to the end of safe output.  A generic safe
- * copy would use (out + len), but it's normally the case that the end of the
- * output buffer is beyond the end of the current copy, and this can still be
- * exploited.
- */
-static inline unsigned char *chunkcopy_lapped_safe(unsigned char *out,
-                                                   unsigned dist, unsigned len,
-                                                   unsigned char *limit) {
-  Assert(out + len <= limit, "chunk copy exceeds safety limit");
-  if ((limit - out) < (ptrdiff_t)(3 * CHUNKCOPY_CHUNK_SIZE)) {
-    /* TODO(cavalcantii): try harder to optimise this */
-    while (len-- > 0) {
-      *out = *(out - dist);
-      out++;
-    }
-    return out;
-  }
-  return chunkcopy_lapped_relaxed(out, dist, len);
-}
-
-/* TODO(cavalcanti): see crbug.com/1110083. */
-static inline unsigned char *chunkcopy_safe_ugly(unsigned char *out,
-                                                 unsigned dist, unsigned len,
-                                                 unsigned char *limit) {
-  /* Seems to perform better on 64bit. */
-  return chunkcopy_lapped_safe(out, dist, len, limit);
-}
-
-/*
- * Buffer the input in a uint64_t (8 bytes) in the wide input reading case.
- */
-typedef uint64_t inflate_holder_t;
-
-#undef Z_STATIC_ASSERT
-#undef Z_RESTRICT
-#undef Z_BUILTIN_MEMCPY
-
-#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
-#endif /* THIRD_PARTY_ZLIB_CHUNKCOPY_H */
diff --git a/third_party/zlib/compress.c b/third_party/zlib/compress.c
index 73dbfcc5e..4195d5734 100644
--- a/third_party/zlib/compress.c
+++ b/third_party/zlib/compress.c
@@ -1,88 +1,99 @@
-/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:4;tab-width:8;coding:utf-8 -*-│
-│vi: set net ft=c ts=4 sts=4 sw=4 fenc=utf-8                                :vi│
-╞══════════════════════════════════════════════════════════════════════════════╡
-│ Copyright 1995-2005, 2014, 2016 Jean-loup Gailly, Mark Adler                 │
-│ Use of this source code is governed by the BSD-style licenses that can       │
-│ be found in the third_party/zlib/LICENSE file.                               │
-╚─────────────────────────────────────────────────────────────────────────────*/
+/* compress.c -- compress a memory buffer
+ * Copyright (C) 1995-2005, 2014, 2016 Jean-loup Gailly, Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+#include "third_party/zlib/internal.h"
+#include "third_party/zlib/macros.internal.h"
 #include "third_party/zlib/zlib.h"
+// clang-format off
 
-asm(".ident\t\"\\n\\n\
-zlib (zlib License)\\n\
-Copyright 1995-2017 Jean-loup Gailly and Mark Adler\"");
-asm(".include \"libc/disclaimer.inc\"");
+/* @(#) $Id$ */
 
-int compress(Bytef *dest, uLongf *destLen, const Bytef *source,
-             uLong sourceLen) {
-  return compress2(dest, destLen, source, sourceLen, Z_DEFAULT_COMPRESSION);
+#define ZLIB_INTERNAL
+
+/* ===========================================================================
+     Compresses the source buffer into the destination buffer. The level
+   parameter has the same meaning as in deflateInit.  sourceLen is the byte
+   length of the source buffer. Upon entry, destLen is the total size of the
+   destination buffer, which must be at least 0.1% larger than sourceLen plus
+   12 bytes. Upon exit, destLen is the actual size of the compressed buffer.
+
+     compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
+   memory, Z_BUF_ERROR if there was not enough room in the output buffer,
+   Z_STREAM_ERROR if the level parameter is invalid.
+*/
+int ZEXPORT compress2 (dest, destLen, source, sourceLen, level)
+    Bytef *dest;
+    uLongf *destLen;
+    const Bytef *source;
+    uLong sourceLen;
+    int level;
+{
+    z_stream stream;
+    int err;
+    const uInt max = (uInt)-1;
+    uLong left;
+
+    left = *destLen;
+    *destLen = 0;
+
+    stream.zalloc = (alloc_func)0;
+    stream.zfree = (free_func)0;
+    stream.opaque = (voidpf)0;
+
+    err = deflateInit(&stream, level);
+    if (err != Z_OK) return err;
+
+    stream.next_out = dest;
+    stream.avail_out = 0;
+    stream.next_in = (z_const Bytef *)source;
+    stream.avail_in = 0;
+
+    do {
+        if (stream.avail_out == 0) {
+            stream.avail_out = left > (uLong)max ? max : (uInt)left;
+            left -= stream.avail_out;
+        }
+        if (stream.avail_in == 0) {
+            stream.avail_in = sourceLen > (uLong)max ? max : (uInt)sourceLen;
+            sourceLen -= stream.avail_in;
+        }
+        err = deflate(&stream, sourceLen ? Z_NO_FLUSH : Z_FINISH);
+    } while (err == Z_OK);
+
+    *destLen = stream.total_out;
+    deflateEnd(&stream);
+    return err == Z_STREAM_END ? Z_OK : err;
 }
 
-/**
- * Compresses the source buffer into the destination buffer. The level
- * parameter has the same meaning as in deflateInit. sourceLen is the
- * byte length of the source buffer. Upon entry, destLen is the total
- * size of the destination buffer, which must be at least 0.1% larger
- * than sourceLen plus 12 bytes. Upon exit, destLen is the actual size
- * of the compressed buffer.
- *
- * @return Z_OK if success, Z_MEM_ERROR if there was not enough memory,
- *     Z_BUF_ERROR if there was not enough room in the output buffer,
- *     Z_STREAM_ERROR if the level parameter is invalid.
+/* ===========================================================================
  */
-int compress2(Bytef *dest, uLongf *destLen, const Bytef *source,
-              uLong sourceLen, int level) {
-  z_stream stream;
-  int err;
-  const uInt max = (uInt)-1;
-  uLong left;
-
-  left = *destLen;
-  *destLen = 0;
-
-  stream.zalloc = (alloc_func)0;
-  stream.zfree = (free_func)0;
-  stream.opaque = (voidpf)0;
-
-  err = deflateInit(&stream, level);
-  if (err != Z_OK) return err;
-
-  stream.next_out = dest;
-  stream.avail_out = 0;
-  stream.next_in = (const Bytef *)source;
-  stream.avail_in = 0;
-
-  do {
-    if (stream.avail_out == 0) {
-      stream.avail_out = left > (uLong)max ? max : (uInt)left;
-      left -= stream.avail_out;
-    }
-    if (stream.avail_in == 0) {
-      stream.avail_in = sourceLen > (uLong)max ? max : (uInt)sourceLen;
-      sourceLen -= stream.avail_in;
-    }
-    err = deflate(&stream, sourceLen ? Z_NO_FLUSH : Z_FINISH);
-  } while (err == Z_OK);
-
-  *destLen = stream.total_out;
-  deflateEnd(&stream);
-  return err == Z_STREAM_END ? Z_OK : err;
+int ZEXPORT compress (dest, destLen, source, sourceLen)
+    Bytef *dest;
+    uLongf *destLen;
+    const Bytef *source;
+    uLong sourceLen;
+{
+    return compress2(dest, destLen, source, sourceLen, Z_DEFAULT_COMPRESSION);
 }
 
-/**
- * If the default memLevel or windowBits for deflateInit() is changed,
- * then this function needs to be updated.
+/* ===========================================================================
+     If the default memLevel or windowBits for deflateInit() is changed, then
+   this function needs to be updated.
  */
-uLong compressBound(uLong sourceLen) {
-  sourceLen = sourceLen + (sourceLen >> 12) + (sourceLen >> 14) +
-              (sourceLen >> 25) + 13;
-  /* FIXME(cavalcantii): usage of CRC32 Castagnoli as a hash function
-   * for the hash table of symbols used for compression has a side effect
-   * where for compression level [4, 5] it will increase the output buffer
-   * size by 0.1% (i.e. less than 1%) for a high entropy input (i.e. random
-   * data). To avoid a scenario where client code would fail, for safety we
-   * increase the expected output size by 0.8% (i.e. 8x more than the worst
-   * scenario). See: http://crbug.com/990489
-   */
-  sourceLen += sourceLen >> 7;  // Equivalent to 1.0078125
-  return sourceLen;
+uLong ZEXPORT compressBound (sourceLen)
+    uLong sourceLen;
+{
+    sourceLen = sourceLen + (sourceLen >> 12) + (sourceLen >> 14) +
+                (sourceLen >> 25) + 13;
+    /* FIXME(cavalcantii): usage of CRC32 Castagnoli as a hash function
+     * for the hash table of symbols used for compression has a side effect
+     * where for compression level [4, 5] it will increase the output buffer size
+     * by 0.1% (i.e. less than 1%) for a high entropy input (i.e. random data).
+     * To avoid a scenario where client code would fail, for safety we increase
+     * the expected output size by 0.8% (i.e. 8x more than the worst scenario).
+     * See: http://crbug.com/990489
+     */
+    sourceLen += sourceLen >> 7; // Equivalent to 1.0078125
+    return sourceLen;
 }
diff --git a/third_party/zlib/crc32.c b/third_party/zlib/crc32.c
index 4b8446ee9..1bed99d61 100644
--- a/third_party/zlib/crc32.c
+++ b/third_party/zlib/crc32.c
@@ -1,42 +1,1234 @@
-/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:4;tab-width:8;coding:utf-8 -*-│
-│vi: set net ft=c ts=4 sts=4 sw=4 fenc=utf-8                                :vi│
-╞══════════════════════════════════════════════════════════════════════════════╡
-│ Copyright 1995-2006, 2010, 2011, 2012, 2016 Mark Adler                       │
-│ Use of this source code is governed by the BSD-style licenses that can       │
-│ be found in the third_party/zlib/LICENSE file.                               │
-╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/dce.h"
-#include "libc/intrin/weaken.h"
-#include "libc/nexgen32e/x86feature.h"
-#include "libc/str/str.h"
+/* crc32.c -- compute the CRC-32 of a data stream
+ * Copyright (C) 1995-2022 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ *
+ * This interleaved implementation of a CRC makes use of pipelined multiple
+ * arithmetic-logic units, commonly found in modern CPU cores. It is due to
+ * Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution.
+ */
+#include "libc/intrin/atomic.h"
 #include "third_party/zlib/deflate.internal.h"
 #include "third_party/zlib/internal.h"
-#include "third_party/zlib/zutil.internal.h"
+#include "third_party/zlib/macros.internal.h"
+// clang-format off
 
-asm(".ident\t\"\\n\\n\
-zlib (zlib License)\\n\
-Copyright 1995-2017 Jean-loup Gailly and Mark Adler\"");
-asm(".include \"libc/disclaimer.inc\"");
+/* @(#) $Id$ */
 
-void crc_reset(struct DeflateState *const s) {
-  if (X86_HAVE(PCLMUL) && _weaken(crc_fold_init)) {
-    _weaken(crc_fold_init)(s);
-    return;
-  }
-  s->strm->adler = crc32(0L, Z_NULL, 0);
+/*
+  Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore
+  protection on the static variables used to control the first-use generation
+  of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should
+  first call get_crc_table() to initialize the tables before allowing more than
+  one thread to use crc32().
+
+  MAKECRCH can be #defined to write out crc32.h. A main() routine is also
+  produced, so that this one source file can be compiled to an executable.
+ */
+
+#if defined(CRC32_SIMD_SSE42_PCLMUL) || defined(CRC32_ARMV8_CRC32)
+#  include "crc32_simd.h"
+#endif
+
+ /*
+  A CRC of a message is computed on N braids of words in the message, where
+  each word consists of W bytes (4 or 8). If N is 3, for example, then three
+  running sparse CRCs are calculated respectively on each braid, at these
+  indices in the array of words: 0, 3, 6, ..., 1, 4, 7, ..., and 2, 5, 8, ...
+  This is done starting at a word boundary, and continues until as many blocks
+  of N * W bytes as are available have been processed. The results are combined
+  into a single CRC at the end. For this code, N must be in the range 1..6 and
+  W must be 4 or 8. The upper limit on N can be increased if desired by adding
+  more #if blocks, extending the patterns apparent in the code. In addition,
+  crc32.h would need to be regenerated, if the maximum N value is increased.
+
+  N and W are chosen empirically by benchmarking the execution time on a given
+  processor. The choices for N and W below were based on testing on Intel Kaby
+  Lake i7, AMD Ryzen 7, ARM Cortex-A57, Sparc64-VII, PowerPC POWER9, and MIPS64
+  Octeon II processors. The Intel, AMD, and ARM processors were all fastest
+  with N=5, W=8. The Sparc, PowerPC, and MIPS64 were all fastest at N=5, W=4.
+  They were all tested with either gcc or clang, all using the -O3 optimization
+  level. Your mileage may vary.
+ */
+
+/* Define N */
+#ifdef Z_TESTN
+#  define N Z_TESTN
+#else
+#  define N 5
+#endif
+#if N < 1 || N > 6
+#  error N must be in 1..6
+#endif
+
+/*
+  z_crc_t must be at least 32 bits. z_word_t must be at least as long as
+  z_crc_t. It is assumed here that z_word_t is either 32 bits or 64 bits, and
+  that bytes are eight bits.
+ */
+
+/*
+  Define W and the associated z_word_t type. If W is not defined, then a
+  braided calculation is not used, and the associated tables and code are not
+  compiled.
+ */
+#ifdef Z_TESTW
+#  if Z_TESTW-1 != -1
+#    define W Z_TESTW
+#  endif
+#else
+#  ifdef MAKECRCH
+#    define W 8         /* required for MAKECRCH */
+#  else
+#    if defined(__x86_64__) || defined(__aarch64__)
+#      define W 8
+#    else
+#      define W 4
+#    endif
+#  endif
+#endif
+#ifdef W
+#  if W == 8 && defined(Z_U8)
+     typedef Z_U8 z_word_t;
+#  elif defined(Z_U4)
+#    undef W
+#    define W 4
+     typedef Z_U4 z_word_t;
+#  else
+#    undef W
+#  endif
+#endif
+
+/* Local functions. */
+local z_crc_t multmodp OF((z_crc_t a, z_crc_t b));
+local z_crc_t x2nmodp OF((z_off64_t n, unsigned k));
+
+/* If available, use the ARM processor CRC32 instruction. */
+#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) && W == 8 \
+    && defined(USE_CANONICAL_ARMV8_CRC32)
+#  define ARMCRC32_CANONICAL_ZLIB
+#endif
+
+#if defined(W) && (!defined(ARMCRC32_CANONICAL_ZLIB) || defined(DYNAMIC_CRC_TABLE))
+/*
+  Swap the bytes in a z_word_t to convert between little and big endian. Any
+  self-respecting compiler will optimize this to a single machine byte-swap
+  instruction, if one is available. This assumes that word_t is either 32 bits
+  or 64 bits.
+ */
+local z_word_t byte_swap(word)
+    z_word_t word;
+{
+#  if W == 8
+    return
+        (word & 0xff00000000000000) >> 56 |
+        (word & 0xff000000000000) >> 40 |
+        (word & 0xff0000000000) >> 24 |
+        (word & 0xff00000000) >> 8 |
+        (word & 0xff000000) << 8 |
+        (word & 0xff0000) << 24 |
+        (word & 0xff00) << 40 |
+        (word & 0xff) << 56;
+#  else   /* W == 4 */
+    return
+        (word & 0xff000000) >> 24 |
+        (word & 0xff0000) >> 8 |
+        (word & 0xff00) << 8 |
+        (word & 0xff) << 24;
+#  endif
+}
+#endif
+
+/* CRC polynomial. */
+#define POLY 0xedb88320         /* p(x) reflected, with x^32 implied */
+
+#define DYNAMIC_CRC_TABLE
+#ifdef DYNAMIC_CRC_TABLE
+
+local z_crc_t FAR crc_table[256];
+local z_crc_t FAR x2n_table[32];
+local void make_crc_table OF((void));
+#ifdef W
+   local z_word_t FAR crc_big_table[256];
+   local z_crc_t FAR crc_braid_table[W][256];
+   local z_word_t FAR crc_braid_big_table[W][256];
+   local void braid OF((z_crc_t [][256], z_word_t [][256], int, int));
+#endif
+#ifdef MAKECRCH
+   local void write_table OF((FILE *, const z_crc_t FAR *, int));
+   local void write_table32hi OF((FILE *, const z_word_t FAR *, int));
+   local void write_table64 OF((FILE *, const z_word_t FAR *, int));
+#endif /* MAKECRCH */
+
+/*
+  Define a once() function depending on the availability of atomics. If this is
+  compiled with DYNAMIC_CRC_TABLE defined, and if CRCs will be computed in
+  multiple threads, and if atomics are not available, then get_crc_table() must
+  be called to initialize the tables and must return before any threads are
+  allowed to compute or combine CRCs.
+ */
+
+/* Definition of once functionality. */
+typedef struct once_s once_t;
+local void once OF((once_t *, void (*)(void)));
+
+/* Check for the availability of atomics. */
+#if defined(__STDC__) && __STDC_VERSION__ >= 201112L && \
+    !defined(__STDC_NO_ATOMICS__)
+
+/* Structure for once(), which must be initialized with ONCE_INIT. */
+struct once_s {
+    atomic_flag begun;
+    atomic_int done;
+};
+#define ONCE_INIT {ATOMIC_FLAG_INIT, 0}
+
+/*
+  Run the provided init() function exactly once, even if multiple threads
+  invoke once() at the same time. The state must be a once_t initialized with
+  ONCE_INIT.
+ */
+local void once(state, init)
+    once_t *state;
+    void (*init)(void);
+{
+    if (!atomic_load(&state->done)) {
+        if (atomic_flag_test_and_set(&state->begun))
+            while (!atomic_load(&state->done))
+                ;
+        else {
+            init();
+            atomic_store(&state->done, 1);
+        }
+    }
 }
 
-void crc_finalize(struct DeflateState *const s) {
-  if (X86_HAVE(PCLMUL) && _weaken(crc_fold_512to32)) {
-    s->strm->adler = _weaken(crc_fold_512to32)(s);
-  }
+#else   /* no atomics */
+
+/* Structure for once(), which must be initialized with ONCE_INIT. */
+struct once_s {
+    volatile int begun;
+    volatile int done;
+};
+#define ONCE_INIT {0, 0}
+
+/* Test and set. Alas, not atomic, but tries to minimize the period of
+   vulnerability. */
+local int test_and_set OF((int volatile *));
+local int test_and_set(flag)
+    int volatile *flag;
+{
+    int was;
+
+    was = *flag;
+    *flag = 1;
+    return was;
 }
 
-void copy_with_crc(z_streamp strm, Bytef *dst, long size) {
-  if (X86_HAVE(PCLMUL) && _weaken(crc_fold_copy)) {
-    _weaken(crc_fold_copy)(strm->state, dst, strm->next_in, size);
-    return;
-  }
-  memcpy(dst, strm->next_in, size);
-  strm->adler = crc32(strm->adler, dst, size);
+/* Run the provided init() function once. This is not thread-safe. */
+local void once(state, init)
+    once_t *state;
+    void (*init)(void);
+{
+    if (!state->done) {
+        if (test_and_set(&state->begun))
+            while (!state->done)
+                ;
+        else {
+            init();
+            state->done = 1;
+        }
+    }
+}
+
+#endif
+
+/* State for once(). */
+local once_t made = ONCE_INIT;
+
+/*
+  Generate tables for a byte-wise 32-bit CRC calculation on the polynomial:
+  x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1.
+
+  Polynomials over GF(2) are represented in binary, one bit per coefficient,
+  with the lowest powers in the most significant bit. Then adding polynomials
+  is just exclusive-or, and multiplying a polynomial by x is a right shift by
+  one. If we call the above polynomial p, and represent a byte as the
+  polynomial q, also with the lowest power in the most significant bit (so the
+  byte 0xb1 is the polynomial x^7+x^3+x^2+1), then the CRC is (q*x^32) mod p,
+  where a mod b means the remainder after dividing a by b.
+
+  This calculation is done using the shift-register method of multiplying and
+  taking the remainder. The register is initialized to zero, and for each
+  incoming bit, x^32 is added mod p to the register if the bit is a one (where
+  x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by x
+  (which is shifting right by one and adding x^32 mod p if the bit shifted out
+  is a one). We start with the highest power (least significant bit) of q and
+  repeat for all eight bits of q.
+
+  The table is simply the CRC of all possible eight bit values. This is all the
+  information needed to generate CRCs on data a byte at a time for all
+  combinations of CRC register values and incoming bytes.
+ */
+local void make_crc_table()
+{
+    unsigned i, j, n;
+    z_crc_t p;
+
+    /* initialize the CRC of bytes tables */
+    for (i = 0; i < 256; i++) {
+        p = i;
+        for (j = 0; j < 8; j++)
+            p = p & 1 ? (p >> 1) ^ POLY : p >> 1;
+        crc_table[i] = p;
+#ifdef W
+        crc_big_table[i] = byte_swap(p);
+#endif
+    }
+
+    /* initialize the x^2^n mod p(x) table */
+    p = (z_crc_t)1 << 30;         /* x^1 */
+    x2n_table[0] = p;
+    for (n = 1; n < 32; n++)
+        x2n_table[n] = p = multmodp(p, p);
+
+#ifdef W
+    /* initialize the braiding tables -- needs x2n_table[] */
+    braid(crc_braid_table, crc_braid_big_table, N, W);
+#endif
+
+#ifdef MAKECRCH
+    {
+        /*
+          The crc32.h header file contains tables for both 32-bit and 64-bit
+          z_word_t's, and so requires a 64-bit type be available. In that case,
+          z_word_t must be defined to be 64-bits. This code then also generates
+          and writes out the tables for the case that z_word_t is 32 bits.
+         */
+#if !defined(W) || W != 8
+#  error Need a 64-bit integer type in order to generate crc32.h.
+#endif
+        FILE *out;
+        int k, n;
+        z_crc_t ltl[8][256];
+        z_word_t big[8][256];
+
+        out = fopen("crc32.h", "w");
+        if (out == NULL) return;
+
+        /* write out little-endian CRC table to crc32.h */
+        fprintf(out,
+            "/* crc32.h -- tables for rapid CRC calculation\n"
+            " * Generated automatically by crc32.c\n */\n"
+            "\n"
+            "local const z_crc_t FAR crc_table[] = {\n"
+            "    ");
+        write_table(out, crc_table, 256);
+        fprintf(out,
+            "};\n");
+
+        /* write out big-endian CRC table for 64-bit z_word_t to crc32.h */
+        fprintf(out,
+            "\n"
+            "#ifdef W\n"
+            "\n"
+            "#if W == 8\n"
+            "\n"
+            "local const z_word_t FAR crc_big_table[] = {\n"
+            "    ");
+        write_table64(out, crc_big_table, 256);
+        fprintf(out,
+            "};\n");
+
+        /* write out big-endian CRC table for 32-bit z_word_t to crc32.h */
+        fprintf(out,
+            "\n"
+            "#else /* W == 4 */\n"
+            "\n"
+            "local const z_word_t FAR crc_big_table[] = {\n"
+            "    ");
+        write_table32hi(out, crc_big_table, 256);
+        fprintf(out,
+            "};\n"
+            "\n"
+            "#endif\n");
+
+        /* write out braid tables for each value of N */
+        for (n = 1; n <= 6; n++) {
+            fprintf(out,
+            "\n"
+            "#if N == %d\n", n);
+
+            /* compute braid tables for this N and 64-bit word_t */
+            braid(ltl, big, n, 8);
+
+            /* write out braid tables for 64-bit z_word_t to crc32.h */
+            fprintf(out,
+            "\n"
+            "#if W == 8\n"
+            "\n"
+            "local const z_crc_t FAR crc_braid_table[][256] = {\n");
+            for (k = 0; k < 8; k++) {
+                fprintf(out, "   {");
+                write_table(out, ltl[k], 256);
+                fprintf(out, "}%s", k < 7 ? ",\n" : "");
+            }
+            fprintf(out,
+            "};\n"
+            "\n"
+            "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
+            for (k = 0; k < 8; k++) {
+                fprintf(out, "   {");
+                write_table64(out, big[k], 256);
+                fprintf(out, "}%s", k < 7 ? ",\n" : "");
+            }
+            fprintf(out,
+            "};\n");
+
+            /* compute braid tables for this N and 32-bit word_t */
+            braid(ltl, big, n, 4);
+
+            /* write out braid tables for 32-bit z_word_t to crc32.h */
+            fprintf(out,
+            "\n"
+            "#else /* W == 4 */\n"
+            "\n"
+            "local const z_crc_t FAR crc_braid_table[][256] = {\n");
+            for (k = 0; k < 4; k++) {
+                fprintf(out, "   {");
+                write_table(out, ltl[k], 256);
+                fprintf(out, "}%s", k < 3 ? ",\n" : "");
+            }
+            fprintf(out,
+            "};\n"
+            "\n"
+            "local const z_word_t FAR crc_braid_big_table[][256] = {\n");
+            for (k = 0; k < 4; k++) {
+                fprintf(out, "   {");
+                write_table32hi(out, big[k], 256);
+                fprintf(out, "}%s", k < 3 ? ",\n" : "");
+            }
+            fprintf(out,
+            "};\n"
+            "\n"
+            "#endif\n"
+            "\n"
+            "#endif\n");
+        }
+        fprintf(out,
+            "\n"
+            "#endif\n");
+
+        /* write out zeros operator table to crc32.h */
+        fprintf(out,
+            "\n"
+            "local const z_crc_t FAR x2n_table[] = {\n"
+            "    ");
+        write_table(out, x2n_table, 32);
+        fprintf(out,
+            "};\n");
+        fclose(out);
+    }
+#endif /* MAKECRCH */
+}
+
+#ifdef MAKECRCH
+
+/*
+   Write the 32-bit values in table[0..k-1] to out, five per line in
+   hexadecimal separated by commas.
+ */
+local void write_table(out, table, k)
+    FILE *out;
+    const z_crc_t FAR *table;
+    int k;
+{
+    int n;
+
+    for (n = 0; n < k; n++)
+        fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : "    ",
+                (unsigned long)(table[n]),
+                n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
+}
+
+/*
+   Write the high 32-bits of each value in table[0..k-1] to out, five per line
+   in hexadecimal separated by commas.
+ */
+local void write_table32hi(out, table, k)
+FILE *out;
+const z_word_t FAR *table;
+int k;
+{
+    int n;
+
+    for (n = 0; n < k; n++)
+        fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : "    ",
+                (unsigned long)(table[n] >> 32),
+                n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", "));
+}
+
+/*
+  Write the 64-bit values in table[0..k-1] to out, three per line in
+  hexadecimal separated by commas. This assumes that if there is a 64-bit
+  type, then there is also a long long integer type, and it is at least 64
+  bits. If not, then the type cast and format string can be adjusted
+  accordingly.
+ */
+local void write_table64(out, table, k)
+    FILE *out;
+    const z_word_t FAR *table;
+    int k;
+{
+    int n;
+
+    for (n = 0; n < k; n++)
+        fprintf(out, "%s0x%016llx%s", n == 0 || n % 3 ? "" : "    ",
+                (unsigned long long)(table[n]),
+                n == k - 1 ? "" : (n % 3 == 2 ? ",\n" : ", "));
+}
+
+/* Actually do the deed. */
+int main()
+{
+    make_crc_table();
+    return 0;
+}
+
+#endif /* MAKECRCH */
+
+#ifdef W
+/*
+  Generate the little and big-endian braid tables for the given n and z_word_t
+  size w. Each array must have room for w blocks of 256 elements.
+ */
+local void braid(ltl, big, n, w)
+    z_crc_t ltl[][256];
+    z_word_t big[][256];
+    int n;
+    int w;
+{
+    int k;
+    z_crc_t i, p, q;
+    for (k = 0; k < w; k++) {
+        p = x2nmodp((n * w + 3 - k) << 3, 0);
+        ltl[k][0] = 0;
+        big[w - 1 - k][0] = 0;
+        for (i = 1; i < 256; i++) {
+            ltl[k][i] = q = multmodp(i << 24, p);
+            big[w - 1 - k][i] = byte_swap(q);
+        }
+    }
+}
+#endif
+
+#else /* !DYNAMIC_CRC_TABLE */
+/* ========================================================================
+ * Tables for byte-wise and braided CRC-32 calculations, and a table of powers
+ * of x for combining CRC-32s, all made by make_crc_table().
+ */
+# include "crc32.h"
+#endif /* DYNAMIC_CRC_TABLE */
+
+/* ========================================================================
+ * Routines used for CRC calculation. Some are also required for the table
+ * generation above.
+ */
+
+/*
+  Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC polynomial,
+  reflected. For speed, this requires that a not be zero.
+ */
+local z_crc_t multmodp(a, b)
+    z_crc_t a;
+    z_crc_t b;
+{
+    z_crc_t m, p;
+
+    m = (z_crc_t)1 << 31;
+    p = 0;
+    for (;;) {
+        if (a & m) {
+            p ^= b;
+            if ((a & (m - 1)) == 0)
+                break;
+        }
+        m >>= 1;
+        b = b & 1 ? (b >> 1) ^ POLY : b >> 1;
+    }
+    return p;
+}
+
+/*
+  Return x^(n * 2^k) modulo p(x). Requires that x2n_table[] has been
+  initialized.
+ */
+local z_crc_t x2nmodp(n, k)
+    z_off64_t n;
+    unsigned k;
+{
+    z_crc_t p;
+
+    p = (z_crc_t)1 << 31;           /* x^0 == 1 */
+    while (n) {
+        if (n & 1)
+            p = multmodp(x2n_table[k & 31], p);
+        n >>= 1;
+        k++;
+    }
+    return p;
+}
+
+/* =========================================================================
+ * This function can be used by asm versions of crc32(), and to force the
+ * generation of the CRC tables in a threaded application.
+ */
+const z_crc_t FAR * ZEXPORT get_crc_table()
+{
+#ifdef DYNAMIC_CRC_TABLE
+    once(&made, make_crc_table);
+#endif /* DYNAMIC_CRC_TABLE */
+    return (const z_crc_t FAR *)crc_table;
+}
+
+/* =========================================================================
+ * Use ARM machine instructions if available. This will compute the CRC about
+ * ten times faster than the braided calculation. This code does not check for
+ * the presence of the CRC instruction at run time. __ARM_FEATURE_CRC32 will
+ * only be defined if the compilation specifies an ARM processor architecture
+ * that has the instructions. For example, compiling with -march=armv8.1-a or
+ * -march=armv8-a+crc, or -march=native if the compile machine has the crc32
+ * instructions.
+ */
+#if ARMCRC32_CANONICAL_ZLIB
+
+/*
+   Constants empirically determined to maximize speed. These values are from
+   measurements on a Cortex-A57. Your mileage may vary.
+ */
+#define Z_BATCH 3990                /* number of words in a batch */
+#define Z_BATCH_ZEROS 0xa10d3d0c    /* computed from Z_BATCH = 3990 */
+#define Z_BATCH_MIN 800             /* fewest words in a final batch */
+
+#error this is arm?
+unsigned long ZEXPORT crc32_z(crc, buf, len)
+    unsigned long crc;
+    const unsigned char FAR *buf;
+    z_size_t len;
+{
+    z_crc_t val;
+    z_word_t crc1, crc2;
+    const z_word_t *word;
+    z_word_t val0, val1, val2;
+    z_size_t last, last2, i;
+    z_size_t num;
+
+    /* Return initial CRC, if requested. */
+    if (buf == Z_NULL) return 0;
+
+#ifdef DYNAMIC_CRC_TABLE
+    once(&made, make_crc_table);
+#endif /* DYNAMIC_CRC_TABLE */
+
+    /* Pre-condition the CRC */
+    crc = (~crc) & 0xffffffff;
+
+    /* Compute the CRC up to a word boundary. */
+    while (len && ((z_size_t)buf & 7) != 0) {
+        len--;
+        val = *buf++;
+        __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
+    }
+
+    /* Prepare to compute the CRC on full 64-bit words word[0..num-1]. */
+    word = (z_word_t const *)buf;
+    num = len >> 3;
+    len &= 7;
+
+    /* Do three interleaved CRCs to realize the throughput of one crc32x
+       instruction per cycle. Each CRC is calculated on Z_BATCH words. The
+       three CRCs are combined into a single CRC after each set of batches. */
+    while (num >= 3 * Z_BATCH) {
+        crc1 = 0;
+        crc2 = 0;
+        for (i = 0; i < Z_BATCH; i++) {
+            val0 = word[i];
+            val1 = word[i + Z_BATCH];
+            val2 = word[i + 2 * Z_BATCH];
+            __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
+            __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
+            __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
+        }
+        word += 3 * Z_BATCH;
+        num -= 3 * Z_BATCH;
+        crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc1;
+        crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc2;
+    }
+
+    /* Do one last smaller batch with the remaining words, if there are enough
+       to pay for the combination of CRCs. */
+    last = num / 3;
+    if (last >= Z_BATCH_MIN) {
+        last2 = last << 1;
+        crc1 = 0;
+        crc2 = 0;
+        for (i = 0; i < last; i++) {
+            val0 = word[i];
+            val1 = word[i + last];
+            val2 = word[i + last2];
+            __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
+            __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
+            __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
+        }
+        word += 3 * last;
+        num -= 3 * last;
+        val = x2nmodp(last, 6);
+        crc = multmodp(val, crc) ^ crc1;
+        crc = multmodp(val, crc) ^ crc2;
+    }
+
+    /* Compute the CRC on any remaining words. */
+    for (i = 0; i < num; i++) {
+        val0 = word[i];
+        __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
+    }
+    word += num;
+
+    /* Complete the CRC on any remaining bytes. */
+    buf = (const unsigned char FAR *)word;
+    while (len) {
+        len--;
+        val = *buf++;
+        __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
+    }
+
+    /* Return the CRC, post-conditioned. */
+    return crc ^ 0xffffffff;
+}
+
+#else
+
+#ifdef W
+
+/*
+  Return the CRC of the W bytes in the word_t data, taking the
+  least-significant byte of the word as the first byte of data, without any pre
+  or post conditioning. This is used to combine the CRCs of each braid.
+ */
+local z_crc_t crc_word(data)
+    z_word_t data;
+{
+    int k;
+    for (k = 0; k < W; k++)
+        data = (data >> 8) ^ crc_table[data & 0xff];
+    return (z_crc_t)data;
+}
+
+local z_word_t crc_word_big(data)
+    z_word_t data;
+{
+    int k;
+    for (k = 0; k < W; k++)
+        data = (data << 8) ^
+            crc_big_table[(data >> ((W - 1) << 3)) & 0xff];
+    return data;
+}
+
+#endif
+
+#if 0 /* [jart] favor LIBC_STR crc32() */
+/* ========================================================================= */
+unsigned long ZEXPORT crc32_z(crc, buf, len)
+    unsigned long crc;
+    const unsigned char FAR *buf;
+    z_size_t len;
+{
+    /*
+     * zlib convention is to call crc32(0, NULL, 0); before making
+     * calls to crc32(). So this is a good, early (and infrequent)
+     * place to cache CPU features if needed for those later, more
+     * interesting crc32() calls.
+     */
+#if defined(CRC32_SIMD_SSE42_PCLMUL) || defined(CRC32_ARMV8_CRC32)
+    /*
+     * Since this routine can be freely used, check CPU features here.
+     */
+    if (buf == Z_NULL) {
+        if (!len) /* Assume user is calling crc32(0, NULL, 0); */
+            cpu_check_features();
+        return 0UL;
+    }
+
+#endif
+#if defined(CRC32_SIMD_SSE42_PCLMUL)
+    if (x86_cpu_enable_simd && len >= Z_CRC32_SSE42_MINIMUM_LENGTH) {
+        /* crc32 16-byte chunks */
+        z_size_t chunk_size = len & ~Z_CRC32_SSE42_CHUNKSIZE_MASK;
+        crc = ~crc32_sse42_simd_(buf, chunk_size, ~(uint32_t)crc);
+        /* check remaining data */
+        len -= chunk_size;
+        if (!len)
+            return crc;
+        /* Fall into the default crc32 for the remaining data. */
+        buf += chunk_size;
+    }
+#elif defined(CRC32_ARMV8_CRC32)
+    if (arm_cpu_enable_crc32) {
+#if defined(__aarch64__)
+        /* PMULL is 64bit only, plus code needs at least a 64 bytes buffer. */
+        if (arm_cpu_enable_pmull && (len > Z_CRC32_PMULL_MINIMUM_LENGTH)) {
+            const size_t chunk_size = len & ~Z_CRC32_PMULL_CHUNKSIZE_MASK;
+            crc = ~armv8_crc32_pmull_little(buf, chunk_size, ~(uint32_t)crc);
+            /* Check remaining data. */
+            len -= chunk_size;
+            if (!len)
+                return crc;
+
+            /* Fall through for the remaining data. */
+            buf += chunk_size;
+        }
+#endif
+        return armv8_crc32_little(buf, len, crc); /* Armv8@32bit or tail. */
+    }
+#else
+    if (buf == Z_NULL) {
+        return 0UL;
+    }
+#endif /* CRC32_SIMD */
+
+#ifdef DYNAMIC_CRC_TABLE
+    once(&made, make_crc_table);
+#endif /* DYNAMIC_CRC_TABLE */
+    /* Pre-condition the CRC */
+    crc = (~crc) & 0xffffffff;
+
+#ifdef W
+
+    /* If provided enough bytes, do a braided CRC calculation. */
+    if (len >= N * W + W - 1) {
+        z_size_t blks;
+        z_word_t const *words;
+        unsigned endian;
+        int k;
+
+        /* Compute the CRC up to a z_word_t boundary. */
+        while (len && ((z_size_t)buf & (W - 1)) != 0) {
+            len--;
+            crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
+        }
+
+        /* Compute the CRC on as many N z_word_t blocks as are available. */
+        blks = len / (N * W);
+        len -= blks * N * W;
+        words = (z_word_t const *)buf;
+
+        /* Do endian check at execution time instead of compile time, since ARM
+           processors can change the endianess at execution time. If the
+           compiler knows what the endianess will be, it can optimize out the
+           check and the unused branch. */
+        endian = 1;
+        if (*(unsigned char *)&endian) {
+            /* Little endian. */
+
+            z_crc_t crc0;
+            z_word_t word0;
+#if N > 1
+            z_crc_t crc1;
+            z_word_t word1;
+#if N > 2
+            z_crc_t crc2;
+            z_word_t word2;
+#if N > 3
+            z_crc_t crc3;
+            z_word_t word3;
+#if N > 4
+            z_crc_t crc4;
+            z_word_t word4;
+#if N > 5
+            z_crc_t crc5;
+            z_word_t word5;
+#endif
+#endif
+#endif
+#endif
+#endif
+
+            /* Initialize the CRC for each braid. */
+            crc0 = crc;
+#if N > 1
+            crc1 = 0;
+#if N > 2
+            crc2 = 0;
+#if N > 3
+            crc3 = 0;
+#if N > 4
+            crc4 = 0;
+#if N > 5
+            crc5 = 0;
+#endif
+#endif
+#endif
+#endif
+#endif
+
+            /*
+              Process the first blks-1 blocks, computing the CRCs on each braid
+              independently.
+             */
+            while (--blks) {
+                /* Load the word for each braid into registers. */
+                word0 = crc0 ^ words[0];
+#if N > 1
+                word1 = crc1 ^ words[1];
+#if N > 2
+                word2 = crc2 ^ words[2];
+#if N > 3
+                word3 = crc3 ^ words[3];
+#if N > 4
+                word4 = crc4 ^ words[4];
+#if N > 5
+                word5 = crc5 ^ words[5];
+#endif
+#endif
+#endif
+#endif
+#endif
+                words += N;
+
+                /* Compute and update the CRC for each word. The loop should
+                   get unrolled. */
+                crc0 = crc_braid_table[0][word0 & 0xff];
+#if N > 1
+                crc1 = crc_braid_table[0][word1 & 0xff];
+#if N > 2
+                crc2 = crc_braid_table[0][word2 & 0xff];
+#if N > 3
+                crc3 = crc_braid_table[0][word3 & 0xff];
+#if N > 4
+                crc4 = crc_braid_table[0][word4 & 0xff];
+#if N > 5
+                crc5 = crc_braid_table[0][word5 & 0xff];
+#endif
+#endif
+#endif
+#endif
+#endif
+                for (k = 1; k < W; k++) {
+                    crc0 ^= crc_braid_table[k][(word0 >> (k << 3)) & 0xff];
+#if N > 1
+                    crc1 ^= crc_braid_table[k][(word1 >> (k << 3)) & 0xff];
+#if N > 2
+                    crc2 ^= crc_braid_table[k][(word2 >> (k << 3)) & 0xff];
+#if N > 3
+                    crc3 ^= crc_braid_table[k][(word3 >> (k << 3)) & 0xff];
+#if N > 4
+                    crc4 ^= crc_braid_table[k][(word4 >> (k << 3)) & 0xff];
+#if N > 5
+                    crc5 ^= crc_braid_table[k][(word5 >> (k << 3)) & 0xff];
+#endif
+#endif
+#endif
+#endif
+#endif
+                }
+            }
+
+            /*
+              Process the last block, combining the CRCs of the N braids at the
+              same time.
+             */
+            crc = crc_word(crc0 ^ words[0]);
+#if N > 1
+            crc = crc_word(crc1 ^ words[1] ^ crc);
+#if N > 2
+            crc = crc_word(crc2 ^ words[2] ^ crc);
+#if N > 3
+            crc = crc_word(crc3 ^ words[3] ^ crc);
+#if N > 4
+            crc = crc_word(crc4 ^ words[4] ^ crc);
+#if N > 5
+            crc = crc_word(crc5 ^ words[5] ^ crc);
+#endif
+#endif
+#endif
+#endif
+#endif
+            words += N;
+        }
+        else {
+            /* Big endian. */
+
+            z_word_t crc0, word0, comb;
+#if N > 1
+            z_word_t crc1, word1;
+#if N > 2
+            z_word_t crc2, word2;
+#if N > 3
+            z_word_t crc3, word3;
+#if N > 4
+            z_word_t crc4, word4;
+#if N > 5
+            z_word_t crc5, word5;
+#endif
+#endif
+#endif
+#endif
+#endif
+
+            /* Initialize the CRC for each braid. */
+            crc0 = byte_swap(crc);
+#if N > 1
+            crc1 = 0;
+#if N > 2
+            crc2 = 0;
+#if N > 3
+            crc3 = 0;
+#if N > 4
+            crc4 = 0;
+#if N > 5
+            crc5 = 0;
+#endif
+#endif
+#endif
+#endif
+#endif
+
+            /*
+              Process the first blks-1 blocks, computing the CRCs on each braid
+              independently.
+             */
+            while (--blks) {
+                /* Load the word for each braid into registers. */
+                word0 = crc0 ^ words[0];
+#if N > 1
+                word1 = crc1 ^ words[1];
+#if N > 2
+                word2 = crc2 ^ words[2];
+#if N > 3
+                word3 = crc3 ^ words[3];
+#if N > 4
+                word4 = crc4 ^ words[4];
+#if N > 5
+                word5 = crc5 ^ words[5];
+#endif
+#endif
+#endif
+#endif
+#endif
+                words += N;
+
+                /* Compute and update the CRC for each word. The loop should
+                   get unrolled. */
+                crc0 = crc_braid_big_table[0][word0 & 0xff];
+#if N > 1
+                crc1 = crc_braid_big_table[0][word1 & 0xff];
+#if N > 2
+                crc2 = crc_braid_big_table[0][word2 & 0xff];
+#if N > 3
+                crc3 = crc_braid_big_table[0][word3 & 0xff];
+#if N > 4
+                crc4 = crc_braid_big_table[0][word4 & 0xff];
+#if N > 5
+                crc5 = crc_braid_big_table[0][word5 & 0xff];
+#endif
+#endif
+#endif
+#endif
+#endif
+                for (k = 1; k < W; k++) {
+                    crc0 ^= crc_braid_big_table[k][(word0 >> (k << 3)) & 0xff];
+#if N > 1
+                    crc1 ^= crc_braid_big_table[k][(word1 >> (k << 3)) & 0xff];
+#if N > 2
+                    crc2 ^= crc_braid_big_table[k][(word2 >> (k << 3)) & 0xff];
+#if N > 3
+                    crc3 ^= crc_braid_big_table[k][(word3 >> (k << 3)) & 0xff];
+#if N > 4
+                    crc4 ^= crc_braid_big_table[k][(word4 >> (k << 3)) & 0xff];
+#if N > 5
+                    crc5 ^= crc_braid_big_table[k][(word5 >> (k << 3)) & 0xff];
+#endif
+#endif
+#endif
+#endif
+#endif
+                }
+            }
+
+            /*
+              Process the last block, combining the CRCs of the N braids at the
+              same time.
+             */
+            comb = crc_word_big(crc0 ^ words[0]);
+#if N > 1
+            comb = crc_word_big(crc1 ^ words[1] ^ comb);
+#if N > 2
+            comb = crc_word_big(crc2 ^ words[2] ^ comb);
+#if N > 3
+            comb = crc_word_big(crc3 ^ words[3] ^ comb);
+#if N > 4
+            comb = crc_word_big(crc4 ^ words[4] ^ comb);
+#if N > 5
+            comb = crc_word_big(crc5 ^ words[5] ^ comb);
+#endif
+#endif
+#endif
+#endif
+#endif
+            words += N;
+            crc = byte_swap(comb);
+        }
+
+        /*
+          Update the pointer to the remaining bytes to process.
+         */
+        buf = (unsigned char const *)words;
+    }
+
+#endif /* W */
+
+    /* Complete the computation of the CRC on any remaining bytes. */
+    while (len >= 8) {
+        len -= 8;
+        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
+        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
+        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
+        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
+        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
+        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
+        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
+        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
+    }
+    while (len) {
+        len--;
+        crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff];
+    }
+
+    /* Return the CRC, post-conditioned. */
+    return crc ^ 0xffffffff;
+}
+#endif
+
+#endif
+
+#if 0 /* [jart] favor LIBC_STR crc32() */
+/* ========================================================================= */
+unsigned long ZEXPORT crc32(crc, buf, len)
+    unsigned long crc;
+    const unsigned char FAR *buf;
+    uInt len;
+{
+    /* Some bots compile with optimizations disabled, others will emulate
+     * ARM on x86 and other weird combinations.
+     */
+#if defined(CRC32_SIMD_SSE42_PCLMUL) || defined(CRC32_ARMV8_CRC32)
+    /* We got to verify CPU features, so exploit the common usage pattern
+     * of calling this function with Z_NULL for an initial valid crc value.
+     * This allows to cache the result of the feature check and avoid extraneous
+     * function calls.
+     */
+    if (buf == Z_NULL) {
+        if (!len) /* Assume user is calling crc32(0, NULL, 0); */
+            cpu_check_features();
+        return 0UL;
+    }
+#endif
+
+#if defined(CRC32_ARMV8_CRC32)
+    if (arm_cpu_enable_crc32) {
+#if defined(__aarch64__)
+        /* PMULL is 64bit only, plus code needs at least a 64 bytes buffer. */
+        if (arm_cpu_enable_pmull && (len > Z_CRC32_PMULL_MINIMUM_LENGTH)) {
+            const size_t chunk_size = len & ~Z_CRC32_PMULL_CHUNKSIZE_MASK;
+            crc = ~armv8_crc32_pmull_little(buf, chunk_size, ~(uint32_t)crc);
+            /* Check remaining data. */
+            len -= chunk_size;
+            if (!len)
+                return crc;
+
+            /* Fall through for the remaining data. */
+            buf += chunk_size;
+        }
+#endif
+        return armv8_crc32_little(buf, len, crc); /* Armv8@32bit or tail. */
+    }
+#endif
+    return crc32_z(crc, buf, len); /* Armv7 or Armv8 w/o crypto extensions. */
+}
+#endif
+
+/* ========================================================================= */
+uLong ZEXPORT crc32_combine64(crc1, crc2, len2)
+    uLong crc1;
+    uLong crc2;
+    z_off64_t len2;
+{
+#ifdef DYNAMIC_CRC_TABLE
+    once(&made, make_crc_table);
+#endif /* DYNAMIC_CRC_TABLE */
+    return multmodp(x2nmodp(len2, 3), crc1) ^ (crc2 & 0xffffffff);
+}
+
+/* ========================================================================= */
+uLong ZEXPORT crc32_combine(crc1, crc2, len2)
+    uLong crc1;
+    uLong crc2;
+    z_off_t len2;
+{
+    return crc32_combine64(crc1, crc2, (z_off64_t)len2);
+}
+/* ========================================================================= */
+uLong ZEXPORT crc32_combine_gen64(len2)
+    z_off64_t len2;
+{
+#ifdef DYNAMIC_CRC_TABLE
+    once(&made, make_crc_table);
+#endif /* DYNAMIC_CRC_TABLE */
+    return x2nmodp(len2, 3);
+}
+
+/* ========================================================================= */
+uLong ZEXPORT crc32_combine_gen(len2)
+    z_off_t len2;
+{
+    return crc32_combine_gen64((z_off64_t)len2);
+}
+
+/* ========================================================================= */
+uLong ZEXPORT crc32_combine_op(crc1, crc2, op)
+    uLong crc1;
+    uLong crc2;
+    uLong op;
+{
+    return multmodp(op, crc1) ^ (crc2 & 0xffffffff);
+}
+
+ZLIB_INTERNAL void crc_reset(deflate_state *const s)
+{
+#ifdef CRC32_SIMD_SSE42_PCLMUL
+    if (x86_cpu_enable_simd) {
+        crc_fold_init(s);
+        return;
+    }
+#endif
+    s->strm->adler = crc32(0L, Z_NULL, 0);
+}
+
+ZLIB_INTERNAL void crc_finalize(deflate_state *const s)
+{
+#ifdef CRC32_SIMD_SSE42_PCLMUL
+    if (x86_cpu_enable_simd)
+        s->strm->adler = crc_fold_512to32(s);
+#endif
+}
+
+ZLIB_INTERNAL void copy_with_crc(z_streamp strm, Bytef *dst, long size)
+{
+#ifdef CRC32_SIMD_SSE42_PCLMUL
+    if (x86_cpu_enable_simd) {
+        crc_fold_copy(strm->state, dst, strm->next_in, size);
+        return;
+    }
+#endif
+    zmemcpy(dst, strm->next_in, size);
+    strm->adler = crc32(strm->adler, dst, size);
 }
diff --git a/third_party/zlib/crcfold.c b/third_party/zlib/crcfold.c
deleted file mode 100644
index fcc456758..000000000
--- a/third_party/zlib/crcfold.c
+++ /dev/null
@@ -1,483 +0,0 @@
-/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:4;tab-width:8;coding:utf-8 -*-│
-│vi: set net ft=c ts=4 sts=4 sw=4 fenc=utf-8                                :vi│
-╞══════════════════════════════════════════════════════════════════════════════╡
-│ Copyright 2013 Intel Corporation                                             │
-│ Use of this source code is governed by the BSD-style licenses that can       │
-│ be found in the third_party/zlib/LICENSE file.                               │
-╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/intrin/emmintrin.internal.h"
-#include "libc/intrin/smmintrin.internal.h"
-#include "libc/intrin/tmmintrin.internal.h"
-#include "libc/intrin/wmmintrin.internal.h"
-#include "libc/runtime/runtime.h"
-#include "libc/str/str.h"
-#include "third_party/zlib/deflate.internal.h"
-#include "third_party/zlib/internal.h"
-
-#ifndef __llvm__
-
-asm(".ident\t\"\\n\\n\
-zlib » crc32 parallelized folding (zlib License)\\n\
-Copyright 2013 Intel Corporation\\n\
-Authors: Wajdi Feghali,Jim Guilford,Vinodh Gopal,Erdinc Ozturk,Jim Kukunas\"");
-asm(".include \"libc/disclaimer.inc\"");
-
-/**
- * CRC32 parallelized folding w/ PCLMULQDQ instruction.
- *
- * Authored by:
- * 	Wajdi Feghali   <wajdi.k.feghali@intel.com>
- * 	Jim Guilford    <james.guilford@intel.com>
- * 	Vinodh Gopal    <vinodh.gopal@intel.com>
- * 	Erdinc Ozturk   <erdinc.ozturk@intel.com>
- * 	Jim Kukunas     <james.t.kukunas@linux.intel.com>
- *
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-#define CRC_LOAD(s)                                           \
-  __m128i xmm_crc0 = _mm_loadu_si128((__m128i *)s->crc0 + 0); \
-  __m128i xmm_crc1 = _mm_loadu_si128((__m128i *)s->crc0 + 1); \
-  __m128i xmm_crc2 = _mm_loadu_si128((__m128i *)s->crc0 + 2); \
-  __m128i xmm_crc3 = _mm_loadu_si128((__m128i *)s->crc0 + 3); \
-  __m128i xmm_crc_part = _mm_loadu_si128((__m128i *)s->crc0 + 4)
-
-#define CRC_SAVE(s)                                   \
-  _mm_storeu_si128((__m128i *)s->crc0 + 0, xmm_crc0); \
-  _mm_storeu_si128((__m128i *)s->crc0 + 1, xmm_crc1); \
-  _mm_storeu_si128((__m128i *)s->crc0 + 2, xmm_crc2); \
-  _mm_storeu_si128((__m128i *)s->crc0 + 3, xmm_crc3); \
-  _mm_storeu_si128((__m128i *)s->crc0 + 4, xmm_crc_part)
-
-void crc_fold_init(struct DeflateState *const s) {
-  CRC_LOAD(s);
-  xmm_crc0 = _mm_cvtsi32_si128(0x9db42487);
-  xmm_crc1 = _mm_setzero_si128();
-  xmm_crc2 = _mm_setzero_si128();
-  xmm_crc3 = _mm_setzero_si128();
-  CRC_SAVE(s);
-  s->strm->adler = 0;
-}
-
-static inline void fold_1(struct DeflateState *const s, __m128i *xmm_crc0,
-                          __m128i *xmm_crc1, __m128i *xmm_crc2,
-                          __m128i *xmm_crc3) {
-  const __m128i xmm_fold4 =
-      _mm_set_epi32(0x00000001, 0x54442bd4, 0x00000001, 0xc6e41596);
-
-  __m128i x_tmp3;
-  __m128 ps_crc0, ps_crc3, ps_res;
-
-  x_tmp3 = *xmm_crc3;
-
-  *xmm_crc3 = *xmm_crc0;
-  *xmm_crc0 = _mm_clmulepi64_si128(*xmm_crc0, xmm_fold4, 0x01);
-  *xmm_crc3 = _mm_clmulepi64_si128(*xmm_crc3, xmm_fold4, 0x10);
-  ps_crc0 = _mm_castsi128_ps(*xmm_crc0);
-  ps_crc3 = _mm_castsi128_ps(*xmm_crc3);
-  ps_res = _mm_xor_ps(ps_crc0, ps_crc3);
-
-  *xmm_crc0 = *xmm_crc1;
-  *xmm_crc1 = *xmm_crc2;
-  *xmm_crc2 = x_tmp3;
-  *xmm_crc3 = _mm_castps_si128(ps_res);
-}
-
-static inline void fold_2(struct DeflateState *const s, __m128i *xmm_crc0,
-                          __m128i *xmm_crc1, __m128i *xmm_crc2,
-                          __m128i *xmm_crc3) {
-  const __m128i xmm_fold4 =
-      _mm_set_epi32(0x00000001, 0x54442bd4, 0x00000001, 0xc6e41596);
-
-  __m128i x_tmp3, x_tmp2;
-  __m128 ps_crc0, ps_crc1, ps_crc2, ps_crc3, ps_res31, ps_res20;
-
-  x_tmp3 = *xmm_crc3;
-  x_tmp2 = *xmm_crc2;
-
-  *xmm_crc3 = *xmm_crc1;
-  *xmm_crc1 = _mm_clmulepi64_si128(*xmm_crc1, xmm_fold4, 0x01);
-  *xmm_crc3 = _mm_clmulepi64_si128(*xmm_crc3, xmm_fold4, 0x10);
-  ps_crc3 = _mm_castsi128_ps(*xmm_crc3);
-  ps_crc1 = _mm_castsi128_ps(*xmm_crc1);
-  ps_res31 = _mm_xor_ps(ps_crc3, ps_crc1);
-
-  *xmm_crc2 = *xmm_crc0;
-  *xmm_crc0 = _mm_clmulepi64_si128(*xmm_crc0, xmm_fold4, 0x01);
-  *xmm_crc2 = _mm_clmulepi64_si128(*xmm_crc2, xmm_fold4, 0x10);
-  ps_crc0 = _mm_castsi128_ps(*xmm_crc0);
-  ps_crc2 = _mm_castsi128_ps(*xmm_crc2);
-  ps_res20 = _mm_xor_ps(ps_crc0, ps_crc2);
-
-  *xmm_crc0 = x_tmp2;
-  *xmm_crc1 = x_tmp3;
-  *xmm_crc2 = _mm_castps_si128(ps_res20);
-  *xmm_crc3 = _mm_castps_si128(ps_res31);
-}
-
-static inline void fold_3(struct DeflateState *const s, __m128i *xmm_crc0,
-                          __m128i *xmm_crc1, __m128i *xmm_crc2,
-                          __m128i *xmm_crc3) {
-  const __m128i xmm_fold4 =
-      _mm_set_epi32(0x00000001, 0x54442bd4, 0x00000001, 0xc6e41596);
-
-  __m128i x_tmp3;
-  __m128 ps_crc0, ps_crc1, ps_crc2, ps_crc3, ps_res32, ps_res21, ps_res10;
-
-  x_tmp3 = *xmm_crc3;
-
-  *xmm_crc3 = *xmm_crc2;
-  *xmm_crc2 = _mm_clmulepi64_si128(*xmm_crc2, xmm_fold4, 0x01);
-  *xmm_crc3 = _mm_clmulepi64_si128(*xmm_crc3, xmm_fold4, 0x10);
-  ps_crc2 = _mm_castsi128_ps(*xmm_crc2);
-  ps_crc3 = _mm_castsi128_ps(*xmm_crc3);
-  ps_res32 = _mm_xor_ps(ps_crc2, ps_crc3);
-
-  *xmm_crc2 = *xmm_crc1;
-  *xmm_crc1 = _mm_clmulepi64_si128(*xmm_crc1, xmm_fold4, 0x01);
-  *xmm_crc2 = _mm_clmulepi64_si128(*xmm_crc2, xmm_fold4, 0x10);
-  ps_crc1 = _mm_castsi128_ps(*xmm_crc1);
-  ps_crc2 = _mm_castsi128_ps(*xmm_crc2);
-  ps_res21 = _mm_xor_ps(ps_crc1, ps_crc2);
-
-  *xmm_crc1 = *xmm_crc0;
-  *xmm_crc0 = _mm_clmulepi64_si128(*xmm_crc0, xmm_fold4, 0x01);
-  *xmm_crc1 = _mm_clmulepi64_si128(*xmm_crc1, xmm_fold4, 0x10);
-  ps_crc0 = _mm_castsi128_ps(*xmm_crc0);
-  ps_crc1 = _mm_castsi128_ps(*xmm_crc1);
-  ps_res10 = _mm_xor_ps(ps_crc0, ps_crc1);
-
-  *xmm_crc0 = x_tmp3;
-  *xmm_crc1 = _mm_castps_si128(ps_res10);
-  *xmm_crc2 = _mm_castps_si128(ps_res21);
-  *xmm_crc3 = _mm_castps_si128(ps_res32);
-}
-
-static inline void fold_4(struct DeflateState *const s, __m128i *xmm_crc0,
-                          __m128i *xmm_crc1, __m128i *xmm_crc2,
-                          __m128i *xmm_crc3) {
-  const __m128i xmm_fold4 =
-      _mm_set_epi32(0x00000001, 0x54442bd4, 0x00000001, 0xc6e41596);
-
-  __m128i x_tmp0, x_tmp1, x_tmp2, x_tmp3;
-  __m128 ps_crc0, ps_crc1, ps_crc2, ps_crc3;
-  __m128 ps_t0, ps_t1, ps_t2, ps_t3;
-  __m128 ps_res0, ps_res1, ps_res2, ps_res3;
-
-  x_tmp0 = *xmm_crc0;
-  x_tmp1 = *xmm_crc1;
-  x_tmp2 = *xmm_crc2;
-  x_tmp3 = *xmm_crc3;
-
-  *xmm_crc0 = _mm_clmulepi64_si128(*xmm_crc0, xmm_fold4, 0x01);
-  x_tmp0 = _mm_clmulepi64_si128(x_tmp0, xmm_fold4, 0x10);
-  ps_crc0 = _mm_castsi128_ps(*xmm_crc0);
-  ps_t0 = _mm_castsi128_ps(x_tmp0);
-  ps_res0 = _mm_xor_ps(ps_crc0, ps_t0);
-
-  *xmm_crc1 = _mm_clmulepi64_si128(*xmm_crc1, xmm_fold4, 0x01);
-  x_tmp1 = _mm_clmulepi64_si128(x_tmp1, xmm_fold4, 0x10);
-  ps_crc1 = _mm_castsi128_ps(*xmm_crc1);
-  ps_t1 = _mm_castsi128_ps(x_tmp1);
-  ps_res1 = _mm_xor_ps(ps_crc1, ps_t1);
-
-  *xmm_crc2 = _mm_clmulepi64_si128(*xmm_crc2, xmm_fold4, 0x01);
-  x_tmp2 = _mm_clmulepi64_si128(x_tmp2, xmm_fold4, 0x10);
-  ps_crc2 = _mm_castsi128_ps(*xmm_crc2);
-  ps_t2 = _mm_castsi128_ps(x_tmp2);
-  ps_res2 = _mm_xor_ps(ps_crc2, ps_t2);
-
-  *xmm_crc3 = _mm_clmulepi64_si128(*xmm_crc3, xmm_fold4, 0x01);
-  x_tmp3 = _mm_clmulepi64_si128(x_tmp3, xmm_fold4, 0x10);
-  ps_crc3 = _mm_castsi128_ps(*xmm_crc3);
-  ps_t3 = _mm_castsi128_ps(x_tmp3);
-  ps_res3 = _mm_xor_ps(ps_crc3, ps_t3);
-
-  *xmm_crc0 = _mm_castps_si128(ps_res0);
-  *xmm_crc1 = _mm_castps_si128(ps_res1);
-  *xmm_crc2 = _mm_castps_si128(ps_res2);
-  *xmm_crc3 = _mm_castps_si128(ps_res3);
-}
-
-static const unsigned forcealign(32) pshufb_shf_table[60] = {
-    0x84838281, 0x88878685, 0x8c8b8a89, 0x008f8e8d, /* shl 15 (16 - 1)/shr1 */
-    0x85848382, 0x89888786, 0x8d8c8b8a, 0x01008f8e, /* shl 14 (16 - 3)/shr2 */
-    0x86858483, 0x8a898887, 0x8e8d8c8b, 0x0201008f, /* shl 13 (16 - 4)/shr3 */
-    0x87868584, 0x8b8a8988, 0x8f8e8d8c, 0x03020100, /* shl 12 (16 - 4)/shr4 */
-    0x88878685, 0x8c8b8a89, 0x008f8e8d, 0x04030201, /* shl 11 (16 - 5)/shr5 */
-    0x89888786, 0x8d8c8b8a, 0x01008f8e, 0x05040302, /* shl 10 (16 - 6)/shr6 */
-    0x8a898887, 0x8e8d8c8b, 0x0201008f, 0x06050403, /* shl  9 (16 - 7)/shr7 */
-    0x8b8a8988, 0x8f8e8d8c, 0x03020100, 0x07060504, /* shl  8 (16 - 8)/shr8 */
-    0x8c8b8a89, 0x008f8e8d, 0x04030201, 0x08070605, /* shl  7 (16 - 9)/shr9 */
-    0x8d8c8b8a, 0x01008f8e, 0x05040302, 0x09080706, /* shl  6 (16 -10)/shr10*/
-    0x8e8d8c8b, 0x0201008f, 0x06050403, 0x0a090807, /* shl  5 (16 -11)/shr11*/
-    0x8f8e8d8c, 0x03020100, 0x07060504, 0x0b0a0908, /* shl  4 (16 -12)/shr12*/
-    0x008f8e8d, 0x04030201, 0x08070605, 0x0c0b0a09, /* shl  3 (16 -13)/shr13*/
-    0x01008f8e, 0x05040302, 0x09080706, 0x0d0c0b0a, /* shl  2 (16 -14)/shr14*/
-    0x0201008f, 0x06050403, 0x0a090807, 0x0e0d0c0b  /* shl  1 (16 -15)/shr15*/
-};
-
-static void partial_fold(struct DeflateState *const s, const size_t len,
-                         __m128i *xmm_crc0, __m128i *xmm_crc1,
-                         __m128i *xmm_crc2, __m128i *xmm_crc3,
-                         __m128i *xmm_crc_part) {
-  const __m128i xmm_fold4 =
-      _mm_set_epi32(0x00000001, 0x54442bd4, 0x00000001, 0xc6e41596);
-  const __m128i xmm_mask3 = _mm_set1_epi32(0x80808080);
-
-  __m128i xmm_shl, xmm_shr, xmm_tmp1, xmm_tmp2, xmm_tmp3;
-  __m128i xmm_a0_0, xmm_a0_1;
-  __m128 ps_crc3, psa0_0, psa0_1, ps_res;
-
-  {
-    __m128i *wut = (__m128i *)pshufb_shf_table + (len - 1);
-    intptr_t huh = (intptr_t)wut;
-    intptr_t tab = (intptr_t)pshufb_shf_table;
-    if (huh < tab || huh + sizeof(*wut) >= tab + sizeof(pshufb_shf_table)) {
-      abort();
-    }
-    xmm_shl = _mm_load_si128(wut);
-  }
-
-  xmm_shr = xmm_shl;
-  xmm_shr = _mm_xor_si128(xmm_shr, xmm_mask3);
-
-  xmm_a0_0 = _mm_shuffle_epi8(*xmm_crc0, xmm_shl);
-
-  *xmm_crc0 = _mm_shuffle_epi8(*xmm_crc0, xmm_shr);
-  xmm_tmp1 = _mm_shuffle_epi8(*xmm_crc1, xmm_shl);
-  *xmm_crc0 = _mm_or_si128(*xmm_crc0, xmm_tmp1);
-
-  *xmm_crc1 = _mm_shuffle_epi8(*xmm_crc1, xmm_shr);
-  xmm_tmp2 = _mm_shuffle_epi8(*xmm_crc2, xmm_shl);
-  *xmm_crc1 = _mm_or_si128(*xmm_crc1, xmm_tmp2);
-
-  *xmm_crc2 = _mm_shuffle_epi8(*xmm_crc2, xmm_shr);
-  xmm_tmp3 = _mm_shuffle_epi8(*xmm_crc3, xmm_shl);
-  *xmm_crc2 = _mm_or_si128(*xmm_crc2, xmm_tmp3);
-
-  *xmm_crc3 = _mm_shuffle_epi8(*xmm_crc3, xmm_shr);
-  *xmm_crc_part = _mm_shuffle_epi8(*xmm_crc_part, xmm_shl);
-  *xmm_crc3 = _mm_or_si128(*xmm_crc3, *xmm_crc_part);
-
-  xmm_a0_1 = _mm_clmulepi64_si128(xmm_a0_0, xmm_fold4, 0x10);
-  xmm_a0_0 = _mm_clmulepi64_si128(xmm_a0_0, xmm_fold4, 0x01);
-
-  ps_crc3 = _mm_castsi128_ps(*xmm_crc3);
-  psa0_0 = _mm_castsi128_ps(xmm_a0_0);
-  psa0_1 = _mm_castsi128_ps(xmm_a0_1);
-
-  ps_res = _mm_xor_ps(ps_crc3, psa0_0);
-  ps_res = _mm_xor_ps(ps_res, psa0_1);
-
-  *xmm_crc3 = _mm_castps_si128(ps_res);
-}
-
-void crc_fold_copy(struct DeflateState *const s, unsigned char *dst,
-                   const unsigned char *src, long len) {
-  unsigned long algn_diff;
-  __m128i xmm_t0, xmm_t1, xmm_t2, xmm_t3;
-
-  CRC_LOAD(s);
-
-  if (len < 16) {
-    if (len == 0) return;
-    goto partial;
-  }
-
-  algn_diff = 0 - ((uintptr_t)src & 0xF);
-  if (algn_diff) {
-    xmm_crc_part = _mm_loadu_si128((__m128i *)src);
-    _mm_storeu_si128((__m128i *)dst, xmm_crc_part);
-
-    dst += algn_diff;
-    src += algn_diff;
-    len -= algn_diff;
-
-    partial_fold(s, algn_diff, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3,
-                 &xmm_crc_part);
-  }
-
-  while ((len -= 64) >= 0) {
-    xmm_t0 = _mm_load_si128((__m128i *)src);
-    xmm_t1 = _mm_load_si128((__m128i *)src + 1);
-    xmm_t2 = _mm_load_si128((__m128i *)src + 2);
-    xmm_t3 = _mm_load_si128((__m128i *)src + 3);
-
-    fold_4(s, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3);
-
-    _mm_storeu_si128((__m128i *)dst, xmm_t0);
-    _mm_storeu_si128((__m128i *)dst + 1, xmm_t1);
-    _mm_storeu_si128((__m128i *)dst + 2, xmm_t2);
-    _mm_storeu_si128((__m128i *)dst + 3, xmm_t3);
-
-    xmm_crc0 = _mm_xor_si128(xmm_crc0, xmm_t0);
-    xmm_crc1 = _mm_xor_si128(xmm_crc1, xmm_t1);
-    xmm_crc2 = _mm_xor_si128(xmm_crc2, xmm_t2);
-    xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_t3);
-
-    src += 64;
-    dst += 64;
-  }
-
-  /*
-   * len = num bytes left - 64
-   */
-  if (len + 16 >= 0) {
-    len += 16;
-
-    xmm_t0 = _mm_load_si128((__m128i *)src);
-    xmm_t1 = _mm_load_si128((__m128i *)src + 1);
-    xmm_t2 = _mm_load_si128((__m128i *)src + 2);
-
-    fold_3(s, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3);
-
-    _mm_storeu_si128((__m128i *)dst, xmm_t0);
-    _mm_storeu_si128((__m128i *)dst + 1, xmm_t1);
-    _mm_storeu_si128((__m128i *)dst + 2, xmm_t2);
-
-    xmm_crc1 = _mm_xor_si128(xmm_crc1, xmm_t0);
-    xmm_crc2 = _mm_xor_si128(xmm_crc2, xmm_t1);
-    xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_t2);
-
-    if (len == 0) goto done;
-
-    dst += 48;
-    src += 48;
-  } else if (len + 32 >= 0) {
-    len += 32;
-
-    xmm_t0 = _mm_load_si128((__m128i *)src);
-    xmm_t1 = _mm_load_si128((__m128i *)src + 1);
-
-    fold_2(s, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3);
-
-    _mm_storeu_si128((__m128i *)dst, xmm_t0);
-    _mm_storeu_si128((__m128i *)dst + 1, xmm_t1);
-
-    xmm_crc2 = _mm_xor_si128(xmm_crc2, xmm_t0);
-    xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_t1);
-
-    if (len == 0) goto done;
-
-    dst += 32;
-    src += 32;
-  } else if (len + 48 >= 0) {
-    len += 48;
-
-    xmm_t0 = _mm_load_si128((__m128i *)src);
-
-    fold_1(s, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3);
-
-    _mm_storeu_si128((__m128i *)dst, xmm_t0);
-
-    xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_t0);
-
-    if (len == 0) goto done;
-
-    dst += 16;
-    src += 16;
-  } else {
-    len += 64;
-    if (len == 0) goto done;
-  }
-
-partial:
-
-#if defined(_MSC_VER)
-  /* VS does not permit the use of _mm_set_epi64x in 32-bit builds */
-  {
-    int32_t parts[4] = {0, 0, 0, 0};
-    memcpy(&parts, src, len);
-    xmm_crc_part = _mm_set_epi32(parts[3], parts[2], parts[1], parts[0]);
-  }
-#else
-{
-  int64_t parts[2] = {0, 0};
-  memcpy(&parts, src, len);
-  xmm_crc_part = _mm_set_epi64x(parts[1], parts[0]);
-}
-#endif
-
-  _mm_storeu_si128((__m128i *)dst, xmm_crc_part);
-  partial_fold(s, len, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3,
-               &xmm_crc_part);
-done:
-  CRC_SAVE(s);
-}
-
-static const __m128i kCrcFold512[] = {
-    {0xffffffffffffffffull, 0x0000000000000000ull},
-    {0xffffffff00000000ull, 0xffffffffffffffffull},
-    {0x00000000ccaa009eull, 0x00000001751997d0ull}, /* 2: k1 */
-    {0x00000000ccaa009eull, 0x0000000163cd6124ull}, /* 3: k5 */
-    {0x00000001f7011640ull, 0x00000001db710640ull}  /* 4: k7 */
-};
-
-unsigned crc_fold_512to32(struct DeflateState *const s) {
-  const __m128i xmm_mask = kCrcFold512[0];
-  const __m128i xmm_mask2 = kCrcFold512[1];
-
-  unsigned crc;
-  __m128i x_tmp0, x_tmp1, x_tmp2, crc_fold;
-
-  CRC_LOAD(s);
-
-  /*
-   * k1
-   */
-  crc_fold = kCrcFold512[2];
-
-  x_tmp0 = _mm_clmulepi64_si128(xmm_crc0, crc_fold, 0x10);
-  xmm_crc0 = _mm_clmulepi64_si128(xmm_crc0, crc_fold, 0x01);
-  xmm_crc1 = _mm_xor_si128(xmm_crc1, x_tmp0);
-  xmm_crc1 = _mm_xor_si128(xmm_crc1, xmm_crc0);
-
-  x_tmp1 = _mm_clmulepi64_si128(xmm_crc1, crc_fold, 0x10);
-  xmm_crc1 = _mm_clmulepi64_si128(xmm_crc1, crc_fold, 0x01);
-  xmm_crc2 = _mm_xor_si128(xmm_crc2, x_tmp1);
-  xmm_crc2 = _mm_xor_si128(xmm_crc2, xmm_crc1);
-
-  x_tmp2 = _mm_clmulepi64_si128(xmm_crc2, crc_fold, 0x10);
-  xmm_crc2 = _mm_clmulepi64_si128(xmm_crc2, crc_fold, 0x01);
-  xmm_crc3 = _mm_xor_si128(xmm_crc3, x_tmp2);
-  xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc2);
-
-  /*
-   * k5
-   */
-  crc_fold = kCrcFold512[3];
-
-  xmm_crc0 = xmm_crc3;
-  xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0);
-  xmm_crc0 = _mm_srli_si128(xmm_crc0, 8);
-  xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc0);
-
-  xmm_crc0 = xmm_crc3;
-  xmm_crc3 = _mm_slli_si128(xmm_crc3, 4);
-  xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0x10);
-  xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc0);
-  xmm_crc3 = _mm_and_si128(xmm_crc3, xmm_mask2);
-
-  /*
-   * k7
-   */
-  xmm_crc1 = xmm_crc3;
-  xmm_crc2 = xmm_crc3;
-  crc_fold = kCrcFold512[4];
-
-  xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0);
-  xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc2);
-  xmm_crc3 = _mm_and_si128(xmm_crc3, xmm_mask);
-
-  xmm_crc2 = xmm_crc3;
-  xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0x10);
-  xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc2);
-  xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc1);
-
-  crc = _mm_extract_epi32(xmm_crc3, 2);
-  return ~crc;
-  CRC_SAVE(s); /* TODO(jart): wut? */
-}
-
-#endif /* __llvm__ */
diff --git a/third_party/zlib/deflate.c b/third_party/zlib/deflate.c
index b57b25115..e67fdb6e1 100644
--- a/third_party/zlib/deflate.c
+++ b/third_party/zlib/deflate.c
@@ -1,201 +1,165 @@
-/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
-│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8                                :vi│
-╞══════════════════════════════════════════════════════════════════════════════╡
-│ Copyright 1995-2017 Jean-loup Gailly and Mark Adler                          │
-│ Use of this source code is governed by the BSD-style licenses that can       │
-│ be found in the third_party/zlib/LICENSE file.                               │
-╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/dce.h"
-#include "libc/intrin/bits.h"
-#include "libc/intrin/kprintf.h"
-#include "libc/macros.internal.h"
-#include "libc/mem/mem.h"
-#include "libc/nexgen32e/x86feature.h"
-#include "libc/str/str.h"
+/* deflate.c -- compress data using the deflation algorithm
+ * Copyright (C) 1995-2022 Jean-loup Gailly and Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+#include "libc/assert.h"
 #include "third_party/zlib/deflate.internal.h"
+#include "third_party/zlib/insert_string.internal.h"
 #include "third_party/zlib/internal.h"
-#include "third_party/zlib/zutil.internal.h"
 
 asm(".ident\t\"\\n\\n\
-inflate 1.2.11 (zlib License)\\n\
-Copyright 1995-2017 Jean-loup Gailly and Mark Adler\\n\
+inflate 1.2.12.1 (zlib License)\\n\
+Copyright 1995-2022 Jean-loup Gailly and Mark Adler\\n\
 Invented 1990 Phillip Walter Katz\"");
+// clang-format off
 
-/**
- * @fileoverview DEFLATE algorithm.
+/*
+ *  ALGORITHM
  *
- * The "deflation" process depends on being able to identify portions of
- * the input text which are identical to earlier input (within a sliding
- * window trailing behind the input currently being processed).
+ *      The "deflation" process depends on being able to identify portions
+ *      of the input text which are identical to earlier input (within a
+ *      sliding window trailing behind the input currently being processed).
  *
- * The most straightforward technique turns out to be the fastest for
- * most input files: try all possible matches and select the longest.
- * The key feature of this algorithm is that insertions into the string
- * dictionary are very simple and thus fast, and deletions are avoided
- * completely. Insertions are performed at each input character, whereas
- * string matches are performed only when the previous match ends. So it
- * is preferable to spend more time in matches to allow very fast string
- * insertions and avoid deletions. The matching algorithm for small
- * strings is inspired from that of Rabin & Karp. A brute force approach
- * is used to find longer strings when a small match has been found. A
- * similar algorithm is used in comic (by Jan-Mark Wams) and freeze (by
- * Leonid Broukhis).
+ *      The most straightforward technique turns out to be the fastest for
+ *      most input files: try all possible matches and select the longest.
+ *      The key feature of this algorithm is that insertions into the string
+ *      dictionary are very simple and thus fast, and deletions are avoided
+ *      completely. Insertions are performed at each input character, whereas
+ *      string matches are performed only when the previous match ends. So it
+ *      is preferable to spend more time in matches to allow very fast string
+ *      insertions and avoid deletions. The matching algorithm for small
+ *      strings is inspired from that of Rabin & Karp. A brute force approach
+ *      is used to find longer strings when a small match has been found.
+ *      A similar algorithm is used in comic (by Jan-Mark Wams) and freeze
+ *      (by Leonid Broukhis).
+ *         A previous version of this file used a more sophisticated algorithm
+ *      (by Fiala and Greene) which is guaranteed to run in linear amortized
+ *      time, but has a larger average cost, uses more memory and is patented.
+ *      However the F&G algorithm may be faster for some highly redundant
+ *      files if the parameter max_chain_length (described below) is too large.
  *
- * A previous version of this file used a more sophisticated algorithm
- * (by Fiala and Greene) which is guaranteed to run in linear amortized
- * time, but has a larger average cost, uses more memory and is
- * patented. However the F&G algorithm may be faster for some highly
- * redundant files if the parameter max_chain_length (described below)
- * is too large.
+ *  ACKNOWLEDGEMENTS
  *
- * @note DEFLATE was invented by Phillip Walter Katz (RIP)
- * @note The idea of lazy evaluation of matches is due to Jan-Mark Wams,
- *       and I found it in 'freeze' written by Leonid Broukhis. Thanks
- *       to many people for bug reports and testing.
- * @see Deutsch, L.P.,"DEFLATE Compressed Data Format Specification".
+ *      The idea of lazy evaluation of matches is due to Jan-Mark Wams, and
+ *      I found it in 'freeze' written by Leonid Broukhis.
+ *      Thanks to many people for bug reports and testing.
+ *
+ *  REFERENCES
+ *
+ *      Deutsch, L.P.,"DEFLATE Compressed Data Format Specification".
  *      Available in http://tools.ietf.org/html/rfc1951
- * @see A description of the Rabin and Karp algorithm is given in the
- *      book "Algorithms" by R. Sedgewick, Addison-Wesley, p252.
- * @see Fiala,E.R., and Greene,D.H. Data Compression with Finite
- *      Windows, Comm.ACM, 32,4 (1989) 490-595
+ *
+ *      A description of the Rabin and Karp algorithm is given in the book
+ *         "Algorithms" by R. Sedgewick, Addison-Wesley, p252.
+ *
+ *      Fiala,E.R., and Greene,D.H.
+ *         Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595
+ *
  */
 
+/* @(#) $Id$ */
+
 #ifdef FASTEST
 /* See http://crbug.com/1113596 */
 #error "FASTEST is not supported in Chromium's zlib."
 #endif
 
-/* Tail of hash chains */
-#define NIL 0
-
-/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */
-#ifndef TOO_FAR
-#define TOO_FAR 4096
-#endif
-
-/**
- * Ranks Z_BLOCK between Z_NO_FLUSH and Z_PARTIAL_FLUSH
+/*
+  If you use the zlib library in a product, an acknowledgment is welcome
+  in the documentation of your product. If for some reason you cannot
+  include such an acknowledgment, I would appreciate that you keep this
+  copyright string in the executable of your product.
  */
-#define RANK(f) (((f)*2) - ((f) > 4 ? 9 : 0))
 
-/**
- * Updates hash value with the given input byte.
- *
- * IN assertion: all calls to UPDATE_HASH are made with consecutive
- *    input characters, so that a running hash key can be computed from
- *    the previous key instead of complete recalculation each time.
+/* ===========================================================================
+ *  Function prototypes.
  */
-#define UPDATE_HASH(s, h, c) (h = (((h) << s->hash_shift) ^ (c)) & s->hash_mask)
-
-/**
- * Initializes hash table (avoiding 64K overflow for 16 bit systems).
- * prev[] will be initialized on the fly.
- */
-#define CLEAR_HASH(s)                                               \
-  do {                                                              \
-    s->head[s->hash_size - 1] = NIL;                                \
-    bzero((Bytef *)s->head, (s->hash_size - 1) * sizeof(*s->head)); \
-  } while (0)
-
-/**
- * Update header CRC with s->pending_buf[beg..s->pending - 1] bytes.
- */
-#define HCRC_UPDATE(beg)                                                  \
-  do {                                                                    \
-    if (s->gzhead->hcrc && s->pending > (beg))                            \
-      strm->adler =                                                       \
-          crc32(strm->adler, s->pending_buf + (beg), s->pending - (beg)); \
-  } while (0)
-
-/**
- * Flushes current block, with given end-of-file flag. IN assertion:
- * strstart is set to the end of the current match.
- */
-#define FLUSH_BLOCK_ONLY(s, last)                                             \
-  {                                                                           \
-    _tr_flush_block(                                                          \
-        s,                                                                    \
-        (s->block_start >= 0L ? (charf *)&s->window[(unsigned)s->block_start] \
-                              : (charf *)Z_NULL),                             \
-        (uint64_t)((long)s->strstart - s->block_start), (last));              \
-    s->block_start = s->strstart;                                             \
-    flush_pending(s->strm);                                                   \
-    Tracev(("[FLUSH]"));                                                      \
-  }
-
-/**
- * Same but force premature exit if necessary.
- */
-#define FLUSH_BLOCK(s, last)                                                 \
-  {                                                                          \
-    FLUSH_BLOCK_ONLY(s, last);                                               \
-    if (s->strm->avail_out == 0) return (last) ? finish_started : need_more; \
-  }
-
-/**
- * Maximum stored block length in deflate format (not including header).
- */
-#define MAX_STORED 65535
-
 typedef enum {
-  need_more,      /* block not completed, need more input or more output */
-  block_done,     /* block flush performed */
-  finish_started, /* finish started, need only more output at next deflate */
-  finish_done     /* finish done, accept no more input or output */
+    need_more,      /* block not completed, need more input or more output */
+    block_done,     /* block flush performed */
+    finish_started, /* finish started, need only more output at next deflate */
+    finish_done     /* finish done, accept no more input or output */
 } block_state;
 
-typedef block_state (*compress_func)(struct DeflateState *s, int flush);
+typedef block_state (*compress_func) OF((deflate_state *s, int flush));
+/* Compression function. Returns the block state after the call. */
 
-static int deflateStateCheck(z_streamp strm);
-static void slide_hash(struct DeflateState *s);
-static block_state deflate_stored(struct DeflateState *s, int flush);
-static block_state deflate_fast(struct DeflateState *s, int flush);
-static block_state deflate_rle(struct DeflateState *s, int flush);
-static block_state deflate_huff(struct DeflateState *s, int flush);
-static void lm_init(struct DeflateState *s);
-static void putShortMSB(struct DeflateState *s, uInt b);
-static void flush_pending(z_streamp strm);
-static uInt longest_match(struct DeflateState *s, IPos cur_match);
+local int deflateStateCheck      OF((z_streamp strm));
+local void slide_hash     OF((deflate_state *s));
+local void fill_window    OF((deflate_state *s));
+local block_state deflate_stored OF((deflate_state *s, int flush));
+local block_state deflate_fast   OF((deflate_state *s, int flush));
 #ifndef FASTEST
-static block_state deflate_slow(struct DeflateState *s, int flush);
+local block_state deflate_slow   OF((deflate_state *s, int flush));
 #endif
-#ifdef ZLIB_DEBUG
-static void check_match(struct DeflateState *s, IPos start, IPos match,
-                        int length);
+local block_state deflate_rle    OF((deflate_state *s, int flush));
+local block_state deflate_huff   OF((deflate_state *s, int flush));
+local void lm_init        OF((deflate_state *s));
+local void putShortMSB    OF((deflate_state *s, uInt b));
+local void flush_pending  OF((z_streamp strm));
+local unsigned read_buf   OF((z_streamp strm, Bytef *buf, unsigned size));
+#ifdef ASMV
+#  pragma message("Assembler code may have bugs -- use at your own risk")
+      void match_init OF((void)); /* asm code initialization */
+      uInt longest_match  OF((deflate_state *s, IPos cur_match));
+#else
+local uInt longest_match  OF((deflate_state *s, IPos cur_match));
 #endif
 
-/**
- * Values for max_lazy_match, good_match and max_chain_length, depending
- * on the desired pack level (0..9). The values given below have been
- * tuned to exclude worst case performance for pathological files.
- * Better values may be found for specific files.
+#ifdef ZLIB_DEBUG
+local  void check_match OF((deflate_state *s, IPos start, IPos match,
+                            int length));
+#endif
+
+/* From crc32.c */
+extern void ZLIB_INTERNAL crc_reset(deflate_state *const s);
+extern void ZLIB_INTERNAL crc_finalize(deflate_state *const s);
+extern void ZLIB_INTERNAL copy_with_crc(z_streamp strm, Bytef *dst, long size);
+
+/* ===========================================================================
+ * Local data
+ */
+
+#define NIL 0
+/* Tail of hash chains */
+
+#ifndef TOO_FAR
+#  define TOO_FAR 4096
+#endif
+/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */
+
+/* Values for max_lazy_match, good_match and max_chain_length, depending on
+ * the desired pack level (0..9). The values given below have been tuned to
+ * exclude worst case performance for pathological files. Better values may be
+ * found for specific files.
  */
 typedef struct config_s {
-  uint16_t good_length; /* reduce lazy search above this match length */
-  uint16_t max_lazy;    /* do not perform lazy search above this match length */
-  uint16_t nice_length; /* quit search above this match length */
-  uint16_t max_chain;
-  compress_func func;
+   ush good_length; /* reduce lazy search above this match length */
+   ush max_lazy;    /* do not perform lazy search above this match length */
+   ush nice_length; /* quit search above this match length */
+   ush max_chain;
+   compress_func func;
 } config;
 
 #ifdef FASTEST
-static const config configuration_table[2] = {
-    /*      good lazy nice chain */
-    /* 0 */ {0, 0, 0, 0, deflate_stored}, /* store only */
-    /* 1 */ {4, 4, 8, 4, deflate_fast}};  /* max speed, no lazy matches */
+local const config configuration_table[2] = {
+/*      good lazy nice chain */
+/* 0 */ {0,    0,  0,    0, deflate_stored},  /* store only */
+/* 1 */ {4,    4,  8,    4, deflate_fast}}; /* max speed, no lazy matches */
 #else
-static const config configuration_table[10] = {
-    /*      good lazy nice chain */
-    /* 0 */ {0, 0, 0, 0, deflate_stored}, /* store only */
-    /* 1 */ {4, 4, 8, 4, deflate_fast},   /* max speed, no lazy matches */
-    /* 2 */ {4, 5, 16, 8, deflate_fast},
-    /* 3 */ {4, 6, 32, 32, deflate_fast},
-    /* 4 */ {4, 4, 16, 16, deflate_slow}, /* lazy matches */
-    /* 5 */ {8, 16, 32, 32, deflate_slow},
-    /* 6 */ {8, 16, 128, 128, deflate_slow},
-    /* 7 */ {8, 32, 128, 256, deflate_slow},
-    /* 8 */ {32, 128, 258, 1024, deflate_slow},
-    /* 9 */ {32, 258, 258, 4096, deflate_slow}}; /* max compression */
+local const config configuration_table[10] = {
+/*      good lazy nice chain */
+/* 0 */ {0,    0,  0,    0, deflate_stored},  /* store only */
+/* 1 */ {4,    4,  8,    4, deflate_fast}, /* max speed, no lazy matches */
+/* 2 */ {4,    5, 16,    8, deflate_fast},
+/* 3 */ {4,    6, 32,   32, deflate_fast},
+
+/* 4 */ {4,    4, 16,   16, deflate_slow},  /* lazy matches */
+/* 5 */ {8,   16, 32,   32, deflate_slow},
+/* 6 */ {8,   16, 128, 128, deflate_slow},
+/* 7 */ {8,   32, 128, 256, deflate_slow},
+/* 8 */ {32, 128, 258, 1024, deflate_slow},
+/* 9 */ {32, 258, 258, 4096, deflate_slow}}; /* max compression */
 #endif
 
 /* Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4
@@ -203,971 +167,1112 @@ static const config configuration_table[10] = {
  * meaning.
  */
 
-/**
- * Inserts string str in the dictionary and set match_head to the
- * previous head of the hash chain (the most recent string with same
- * hash key).
- *
- * @return previous length of the hash chain. If this file is compiled
- *     with -DFASTEST, the compression level is forced to 1, and no hash
- *     chains are maintained. IN assertion: all calls to INSERT_STRING
- *     are made with consecutive input characters and the first
- *     MIN_MATCH bytes of str are valid (except for the last MIN_MATCH-1
- *     bytes of the input file).
+/* rank Z_BLOCK between Z_NO_FLUSH and Z_PARTIAL_FLUSH */
+#define RANK(f) (((f) * 2) - ((f) > 4 ? 9 : 0))
+
+/* ===========================================================================
+ * Initialize the hash table (avoiding 64K overflow for 16 bit systems).
+ * prev[] will be initialized on the fly.
+ * TODO(cavalcantii): optimization opportunity, check comments on:
+ * https://chromium-review.googlesource.com/c/chromium/src/+/3561506/
  */
-static inline Pos insert_string_c(struct DeflateState *const s, const Pos str) {
-  Pos ret;
-  UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH - 1)]);
-#ifdef FASTEST
-  ret = s->head[s->ins_h];
-#else
-  ret = s->prev[str & s->w_mask] = s->head[s->ins_h];
-#endif
-  s->head[s->ins_h] = str;
-  return ret;
-}
+#define CLEAR_HASH(s) \
+    do { \
+        s->head[s->hash_size-1] = NIL; \
+        zmemzero((Bytef *)s->head, \
+                 (unsigned)(s->hash_size-1)*sizeof(*s->head)); \
+    } while (0)
 
-static inline Pos insert_string_sse(struct DeflateState *const s,
-                                    const Pos str) {
-  Pos ret;
-  unsigned *ip, val, h = 0;
-  ip = (unsigned *)&s->window[str];
-  val = READ32LE(ip); /* XXX: val = *ip; */
-  if (s->level >= 6) val &= 0xFFFFFF;
-  asm("crc32\t%1,%0" : "+r"(h) : "r"(val));
-  ret = s->head[h & s->hash_mask];
-  s->head[h & s->hash_mask] = str;
-  s->prev[str & s->w_mask] = ret;
-  return ret;
-}
-
-static inline Pos insert_string(struct DeflateState *const s, const Pos str) {
-  /*
-   * String dictionary insertion: faster symbol hashing has a positive
-   * impact on data compression speeds (around 20% on Intel and 36% on ARM
-   * Cortex big cores).
-   *
-   * A misfeature is that the generated compressed output will differ from
-   * vanilla zlib (even though it is still valid 'DEFLATE-d' content).
-   *
-   * We offer here a way to disable the optimization if there is the
-   * expectation that compressed content should match when compared to
-   * vanilla zlib.
-   */
-  if (!IsTiny() && X86_HAVE(SSE4_2)) return insert_string_sse(s, str);
-  return insert_string_c(s, str);
-}
-
-/**
- * Slide the hash table when sliding the window down (could be avoided
- * with 32 bit values at the expense of memory usage). We slide even
- * when level == 0 to keep the hash table consistent if we switch back
- * to level > 0 later.
+/* ===========================================================================
+ * Slide the hash table when sliding the window down (could be avoided with 32
+ * bit values at the expense of memory usage). We slide even when level == 0 to
+ * keep the hash table consistent if we switch back to level > 0 later.
  */
-static void slide_hash(struct DeflateState *s) {
-  unsigned n, m;
-  Posf *p;
-  uInt wsize = s->w_size;
-  n = s->hash_size;
-  p = &s->head[n];
-  do {
-    m = *--p;
-    *p = (Pos)(m >= wsize ? m - wsize : NIL);
-  } while (--n);
-  n = wsize;
-#ifndef FASTEST
-  p = &s->prev[n];
-  do {
-    m = *--p;
-    *p = (Pos)(m >= wsize ? m - wsize : NIL);
-    /* If n is not on any hash chain, prev[n] is garbage but
-     * its value will never be used.
-     */
-  } while (--n);
+local void slide_hash(s)
+    deflate_state *s;
+{
+#if defined(DEFLATE_SLIDE_HASH_SSE2) || defined(DEFLATE_SLIDE_HASH_NEON)
+    slide_hash_simd(s->head, s->prev, s->w_size, s->hash_size);
+    return;
 #endif
-}
 
-int deflateInit2(z_streamp strm, int level, int method, int windowBits,
-                 int memLevel, int strategy) {
-  unsigned window_padding = 8;
-  struct DeflateState *s;
-  int wrap = 1;
-  if (strm == Z_NULL) return Z_STREAM_ERROR;
-  strm->msg = Z_NULL;
-  if (strm->zalloc == (alloc_func)0) {
-    strm->zalloc = zcalloc;
-    strm->opaque = (voidpf)0;
-  }
-  if (strm->zfree == (free_func)0) {
-    strm->zfree = zcfree;
-  }
-#ifdef FASTEST
-  if (level != 0) level = 1;
-#else
-  if (level == Z_DEFAULT_COMPRESSION) level = 6;
-#endif
-  if (windowBits < 0) { /* suppress zlib wrapper */
-    wrap = 0;
-    windowBits = -windowBits;
-  }
-#ifdef GZIP
-  else if (windowBits > 15) {
-    wrap = 2; /* write gzip wrapper instead */
-    windowBits -= 16;
-  }
-#endif
-  if (memLevel < 1 || memLevel > MAX_MEM_LEVEL || method != Z_DEFLATED ||
-      windowBits < 8 || windowBits > 15 || level < 0 || level > 9 ||
-      strategy < 0 || strategy > Z_FIXED || (windowBits == 8 && wrap != 1)) {
-    return Z_STREAM_ERROR;
-  }
-  if (windowBits == 8) windowBits = 9; /* until 256-byte window bug fixed */
-  s = (struct DeflateState *)ZALLOC(strm, 1, sizeof(struct DeflateState));
-  if (s == Z_NULL) return Z_MEM_ERROR;
-  strm->state = (struct DeflateState *)s;
-  s->strm = strm;
-  s->status = INIT_STATE; /* to pass state test in deflateReset() */
+    unsigned n, m;
+    Posf *p;
+    uInt wsize = s->w_size;
 
-  s->wrap = wrap;
-  s->gzhead = Z_NULL;
-  s->w_bits = (uInt)windowBits;
-  s->w_size = 1u << s->w_bits;
-  s->w_mask = s->w_size - 1;
-  s->hash_bits = 15;
-
-  s->hash_size = 1u << s->hash_bits;
-  s->hash_mask = s->hash_size - 1;
-  s->hash_shift = ((s->hash_bits + MIN_MATCH - 1) / MIN_MATCH);
-
-  s->window =
-      (Bytef *)ZALLOC(strm, s->w_size + window_padding, 2 * sizeof(Byte));
-  bzero(s->window, (s->w_size + window_padding) * (2 * sizeof(Byte)));
-  s->prev = (Posf *)ZALLOC(strm, s->w_size, sizeof(Pos));
-  bzero(s->prev, s->w_size * sizeof(Pos));
-  s->head = (Posf *)ZALLOC(strm, s->hash_size, sizeof(Pos));
-
-  s->high_water = 0; /* nothing written to s->window yet */
-
-  s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */
-
-  /* We overlay pending_buf and sym_buf. This works since the average size
-   * for length/distance pairs over any compressed block is assured to be 31
-   * bits or less.
-   *
-   * Analysis: The longest fixed codes are a length code of 8 bits plus 5
-   * extra bits, for lengths 131 to 257. The longest fixed distance codes are
-   * 5 bits plus 13 extra bits, for distances 16385 to 32768. The longest
-   * possible fixed-codes length/distance pair is then 31 bits total.
-   *
-   * sym_buf starts one-fourth of the way into pending_buf. So there are
-   * three bytes in sym_buf for every four bytes in pending_buf. Each symbol
-   * in sym_buf is three bytes -- two for the distance and one for the
-   * literal/length. As each symbol is consumed, the pointer to the next
-   * sym_buf value to read moves forward three bytes. From that symbol, up to
-   * 31 bits are written to pending_buf. The closest the written pending_buf
-   * bits gets to the next sym_buf symbol to read is just before the last
-   * code is written. At that time, 31*(n-2) bits have been written, just
-   * after 24*(n-2) bits have been consumed from sym_buf. sym_buf starts at
-   * 8*n bits into pending_buf. (Note that the symbol buffer fills when n-1
-   * symbols are written.) The closest the writing gets to what is unread is
-   * then n+14 bits. Here n is lit_bufsize, which is 16384 by default, and
-   * can range from 128 to 32768.
-   *
-   * Therefore, at a minimum, there are 142 bits of space between what is
-   * written and what is read in the overlain buffers, so the symbols cannot
-   * be overwritten by the compressed data. That space is actually 139 bits,
-   * due to the three-bit fixed-code block header.
-   *
-   * That covers the case where either Z_FIXED is specified, forcing fixed
-   * codes, or when the use of fixed codes is chosen, because that choice
-   * results in a smaller compressed block than dynamic codes. That latter
-   * condition then assures that the above analysis also covers all dynamic
-   * blocks. A dynamic-code block will only be chosen to be emitted if it has
-   * fewer bits than a fixed-code block would for the same set of symbols.
-   * Therefore its average symbol length is assured to be less than 31. So
-   * the compressed data for a dynamic block also cannot overwrite the
-   * symbols from which it is being constructed.
-   */
-  s->pending_buf = (uint8_t *)ZALLOC(strm, s->lit_bufsize, 4);
-  s->pending_buf_size = (uint64_t)s->lit_bufsize * 4;
-
-  if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL ||
-      s->pending_buf == Z_NULL) {
-    s->status = FINISH_STATE;
-    strm->msg = ERR_MSG(Z_MEM_ERROR);
-    deflateEnd(strm);
-    return Z_MEM_ERROR;
-  }
-  s->sym_buf = s->pending_buf + s->lit_bufsize;
-  s->sym_end = (s->lit_bufsize - 1) * 3;
-  /* We avoid equality with lit_bufsize*3 because of wraparound at 64K
-   * on 16 bit machines and because stored blocks are restricted to
-   * 64K-1 bytes.
-   */
-
-  s->level = level;
-  s->strategy = strategy;
-  s->method = (Byte)method;
-
-  return deflateReset(strm);
-}
-
-/**
- * Checks for valid deflate stream state. Return 0 if ok, 1 if not.
- */
-static int deflateStateCheck(z_streamp strm) {
-  struct DeflateState *s;
-  if (strm == Z_NULL || strm->zalloc == (alloc_func)0 ||
-      strm->zfree == (free_func)0)
-    return 1;
-  s = strm->state;
-  if (s == Z_NULL || s->strm != strm ||
-      (s->status != INIT_STATE &&
-#ifdef GZIP
-       s->status != GZIP_STATE &&
-#endif
-       s->status != EXTRA_STATE && s->status != NAME_STATE &&
-       s->status != COMMENT_STATE && s->status != HCRC_STATE &&
-       s->status != BUSY_STATE && s->status != FINISH_STATE))
-    return 1;
-  return 0;
-}
-
-int deflateSetDictionary(z_streamp strm, const Bytef *dictionary,
-                         uInt dictLength) {
-  struct DeflateState *s;
-  uInt str, n;
-  int wrap;
-  unsigned avail;
-  const unsigned char *next;
-
-  if (deflateStateCheck(strm) || dictionary == Z_NULL) return Z_STREAM_ERROR;
-  s = strm->state;
-  wrap = s->wrap;
-  if (wrap == 2 || (wrap == 1 && s->status != INIT_STATE) || s->lookahead)
-    return Z_STREAM_ERROR;
-
-  /* when using zlib wrappers, compute Adler-32 for provided dictionary */
-  if (wrap == 1) strm->adler = adler32(strm->adler, dictionary, dictLength);
-  s->wrap = 0; /* avoid computing Adler-32 in deflate_read_buf */
-
-  /* if dictionary would fill window, just replace the history */
-  if (dictLength >= s->w_size) {
-    if (wrap == 0) { /* already empty otherwise */
-      CLEAR_HASH(s);
-      s->strstart = 0;
-      s->block_start = 0L;
-      s->insert = 0;
-    }
-    dictionary += dictLength - s->w_size; /* use the tail */
-    dictLength = s->w_size;
-  }
-
-  /* insert dictionary into window and hash */
-  avail = strm->avail_in;
-  next = strm->next_in;
-  strm->avail_in = dictLength;
-  strm->next_in = (const Bytef *)dictionary;
-  fill_window_sse(s);
-  while (s->lookahead >= MIN_MATCH) {
-    str = s->strstart;
-    n = s->lookahead - (MIN_MATCH - 1);
+    n = s->hash_size;
+    p = &s->head[n];
     do {
-      insert_string(s, str);
-      str++;
+        m = *--p;
+        *p = (Pos)(m >= wsize ? m - wsize : NIL);
+    } while (--n);
+    n = wsize;
+#ifndef FASTEST
+    p = &s->prev[n];
+    do {
+        m = *--p;
+        *p = (Pos)(m >= wsize ? m - wsize : NIL);
+        /* If n is not on any hash chain, prev[n] is garbage but
+         * its value will never be used.
+         */
     } while (--n);
-    s->strstart = str;
-    s->lookahead = MIN_MATCH - 1;
-    fill_window_sse(s);
-  }
-  s->strstart += s->lookahead;
-  s->block_start = (long)s->strstart;
-  s->insert = s->lookahead;
-  s->lookahead = 0;
-  s->match_length = s->prev_length = MIN_MATCH - 1;
-  s->match_available = 0;
-  strm->next_in = next;
-  strm->avail_in = avail;
-  s->wrap = wrap;
-  return Z_OK;
-}
-
-int deflateGetDictionary(z_streamp strm, Bytef *dictionary, uInt *dictLength) {
-  struct DeflateState *s;
-  uInt len;
-  if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
-  s = strm->state;
-  len = s->strstart + s->lookahead;
-  if (len > s->w_size) len = s->w_size;
-  if (dictionary != Z_NULL && len)
-    memcpy(dictionary, s->window + s->strstart + s->lookahead - len, len);
-  if (dictLength != Z_NULL) *dictLength = len;
-  return Z_OK;
-}
-
-int deflateResetKeep(z_streamp strm) {
-  struct DeflateState *s;
-
-  if (deflateStateCheck(strm)) {
-    return Z_STREAM_ERROR;
-  }
-
-  strm->total_in = strm->total_out = 0;
-  strm->msg = Z_NULL; /* use zfree if we ever allocate msg dynamically */
-  strm->data_type = Z_UNKNOWN;
-
-  s = (struct DeflateState *)strm->state;
-  s->pending = 0;
-  s->pending_out = s->pending_buf;
-
-  if (s->wrap < 0) {
-    s->wrap = -s->wrap; /* was made negative by deflate(..., Z_FINISH); */
-  }
-  s->status =
-#ifdef GZIP
-      s->wrap == 2 ? GZIP_STATE :
 #endif
-      s->wrap ? INIT_STATE
-              : BUSY_STATE;
-  strm->adler =
-#ifdef GZIP
-      s->wrap == 2 ? crc32(0L, Z_NULL, 0) :
+}
+
+/* ========================================================================= */
+int ZEXPORT deflateInit(strm, level)
+    z_streamp strm;
+    int level;
+{
+    return deflateInit2(strm, level, Z_DEFLATED, MAX_WBITS, DEF_MEM_LEVEL,
+                        Z_DEFAULT_STRATEGY);
+    /* To do: ignore strm->next_in if we use it as window */
+}
+
+/* ========================================================================= */
+int ZEXPORT deflateInit2(strm, level, method, windowBits, memLevel, strategy)
+    z_streamp strm;
+    int  level;
+    int  method;
+    int  windowBits;
+    int  memLevel;
+    int  strategy;
+{
+    unsigned window_padding = 8;
+    deflate_state *s;
+    int wrap = 1;
+
+    // Needed to activate optimized insert_string() that helps compression
+    // for all wrapper formats (e.g. RAW, ZLIB, GZIP).
+    // Feature detection is not triggered while using RAW mode (i.e. we never
+    // call crc32() with a NULL buffer).
+#if defined(CRC32_ARMV8_CRC32) || defined(CRC32_SIMD_SSE42_PCLMUL)
+    cpu_check_features();
 #endif
-                   adler32(0L, Z_NULL, 0);
-  s->last_flush = Z_NO_FLUSH;
 
-  _tr_init(s);
+    if (strm == Z_NULL) return Z_STREAM_ERROR;
 
-  return Z_OK;
-}
-
-int deflateReset(z_streamp strm) {
-  int ret;
-  ret = deflateResetKeep(strm);
-  if (ret == Z_OK) lm_init(strm->state);
-  return ret;
-}
-
-int deflateSetHeader(z_streamp strm, gz_headerp head) {
-  if (deflateStateCheck(strm) || strm->state->wrap != 2) return Z_STREAM_ERROR;
-  strm->state->gzhead = head;
-  return Z_OK;
-}
-
-int deflatePending(z_streamp strm, unsigned *pending, int *bits) {
-  if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
-  if (pending != Z_NULL) *pending = strm->state->pending;
-  if (bits != Z_NULL) *bits = strm->state->bi_valid;
-  return Z_OK;
-}
-
-int deflatePrime(z_streamp strm, int bits, int value) {
-  struct DeflateState *s;
-  int put;
-  if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
-  s = strm->state;
-  if (s->sym_buf < s->pending_out + ((Buf_size + 7) >> 3)) return Z_BUF_ERROR;
-  do {
-    put = Buf_size - s->bi_valid;
-    if (put > bits) put = bits;
-    s->bi_buf |= (uint16_t)((value & ((1u << put) - 1)) << s->bi_valid);
-    s->bi_valid += put;
-    _tr_flush_bits(s);
-    value >>= put;
-    bits -= put;
-  } while (bits);
-  return Z_OK;
-}
-
-int deflateParams(z_streamp strm, int level, int strategy) {
-  struct DeflateState *s;
-  compress_func func;
-
-  if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
-  s = strm->state;
+    strm->msg = Z_NULL;
+    if (strm->zalloc == (alloc_func)0) {
+#ifdef Z_SOLO
+        return Z_STREAM_ERROR;
+#else
+        strm->zalloc = zcalloc;
+        strm->opaque = (voidpf)0;
+#endif
+    }
+    if (strm->zfree == (free_func)0)
+#ifdef Z_SOLO
+        return Z_STREAM_ERROR;
+#else
+        strm->zfree = zcfree;
+#endif
 
 #ifdef FASTEST
-  if (level != 0) level = 1;
+    if (level != 0) level = 1;
 #else
-  if (level == Z_DEFAULT_COMPRESSION) level = 6;
+    if (level == Z_DEFAULT_COMPRESSION) level = 6;
 #endif
-  if (level < 0 || level > 9 || strategy < 0 || strategy > Z_FIXED) {
-    return Z_STREAM_ERROR;
-  }
-  func = configuration_table[s->level].func;
 
-  if ((strategy != s->strategy || func != configuration_table[level].func) &&
-      s->high_water) {
-    /* Flush the last buffer: */
-    int err = deflate(strm, Z_BLOCK);
-    if (err == Z_STREAM_ERROR) return err;
-    if (strm->avail_out == 0) return Z_BUF_ERROR;
-  }
-  if (s->level != level) {
-    if (s->level == 0 && s->matches != 0) {
-      if (s->matches == 1)
-        slide_hash(s);
-      else
-        CLEAR_HASH(s);
-      s->matches = 0;
+    if (windowBits < 0) { /* suppress zlib wrapper */
+        wrap = 0;
+        windowBits = -windowBits;
     }
-    s->level = level;
-    s->max_lazy_match = configuration_table[level].max_lazy;
-    s->good_match = configuration_table[level].good_length;
-    s->nice_match = configuration_table[level].nice_length;
-    s->max_chain_length = configuration_table[level].max_chain;
-  }
-  s->strategy = strategy;
-  return Z_OK;
-}
-
-int deflateTune(z_streamp strm, int good_length, int max_lazy, int nice_length,
-                int max_chain) {
-  struct DeflateState *s;
-  if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
-  s = strm->state;
-  s->good_match = (uInt)good_length;
-  s->max_lazy_match = (uInt)max_lazy;
-  s->nice_match = nice_length;
-  s->max_chain_length = (uInt)max_chain;
-  return Z_OK;
-}
-
-/**
- * For the default windowBits of 15 and memLevel of 8, this function
- * returns a close to exact, as well as small, upper bound on the
- * compressed size. They are coded as constants here for a reason--if
- * the #define's are changed, then this function needs to be changed as
- * well. The return value for 15 and 8 only works for those exact
- * settings.
- *
- * For any setting other than those defaults for windowBits and
- * memLevel, the value returned is a conservative worst case for the
- * maximum expansion resulting from using fixed blocks instead of stored
- * blocks, which deflate can emit on compressed data for some
- * combinations of the parameters.
- *
- * This function could be more sophisticated to provide closer upper
- * bounds for every combination of windowBits and memLevel. But even the
- * conservative upper bound of about 14% expansion does not seem onerous
- * for output buffer allocation.
- */
-uLong deflateBound(z_streamp strm, uLong sourceLen) {
-  struct DeflateState *s;
-  uLong complen, wraplen;
-
-  /* conservative upper bound for compressed data */
-  complen = sourceLen + ((sourceLen + 7) >> 3) + ((sourceLen + 63) >> 6) + 5;
-
-  /* if can't get parameters, return conservative bound plus zlib wrapper */
-  if (deflateStateCheck(strm)) return complen + 6;
-
-  /* compute wrapper length */
-  s = strm->state;
-  switch (s->wrap) {
-    case 0: /* raw deflate */
-      wraplen = 0;
-      break;
-    case 1: /* zlib wrapper */
-      wraplen = 6 + (s->strstart ? 4 : 0);
-      break;
 #ifdef GZIP
-    case 2: /* gzip wrapper */
-      wraplen = 18;
-      if (s->gzhead != Z_NULL) { /* user-supplied gzip header */
-        Bytef *str;
-        if (s->gzhead->extra != Z_NULL) wraplen += 2 + s->gzhead->extra_len;
-        str = s->gzhead->name;
-        if (str != Z_NULL) do {
-            wraplen++;
-          } while (*str++);
-        str = s->gzhead->comment;
-        if (str != Z_NULL) do {
-            wraplen++;
-          } while (*str++);
-        if (s->gzhead->hcrc) wraplen += 2;
-      }
-      break;
-#endif
-    default: /* for compiler happiness */
-      wraplen = 6;
-  }
-
-  /* if not default parameters, return conservative bound */
-  if (s->w_bits != 15 || s->hash_bits != 8 + 7) return complen + wraplen;
-
-  /* default settings: return tight bound for that case */
-  return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) + (sourceLen >> 25) +
-         13 - 6 + wraplen;
-}
-
-/**
- * Puts short in the pending buffer. The 16-bit value is put in MSB
- * order. IN assertion: the stream state is correct and there is enough
- * room in pending_buf.
- */
-static void putShortMSB(struct DeflateState *s, uInt b) {
-  put_byte(s, (Byte)(b >> 8));
-  put_byte(s, (Byte)(b & 0xff));
-}
-
-/**
- * Flush as much pending output as possible. All deflate() output,
- * except for some deflate_stored() output, goes through this function
- * so some applications may wish to modify it to avoid allocating a
- * large strm->next_out buffer and copying into it. (See also
- * deflate_read_buf()).
- */
-static void flush_pending(z_streamp strm) {
-  unsigned len;
-  struct DeflateState *s = strm->state;
-  _tr_flush_bits(s);
-  len = s->pending;
-  if (len > strm->avail_out) len = strm->avail_out;
-  if (len == 0) return;
-  memcpy(strm->next_out, s->pending_out, len);
-  strm->next_out += len;
-  s->pending_out += len;
-  strm->total_out += len;
-  strm->avail_out -= len;
-  s->pending -= len;
-  if (s->pending == 0) {
-    s->pending_out = s->pending_buf;
-  }
-}
-
-int deflate(z_streamp strm, int flush) {
-  int old_flush; /* value of flush param for previous deflate call */
-  struct DeflateState *s;
-
-  if (deflateStateCheck(strm) || flush > Z_BLOCK || flush < 0) {
-    return Z_STREAM_ERROR;
-  }
-  s = strm->state;
-
-  if (strm->next_out == Z_NULL ||
-      (strm->avail_in != 0 && strm->next_in == Z_NULL) ||
-      (s->status == FINISH_STATE && flush != Z_FINISH)) {
-    ERR_RETURN(strm, Z_STREAM_ERROR);
-  }
-  if (strm->avail_out == 0) ERR_RETURN(strm, Z_BUF_ERROR);
-
-  old_flush = s->last_flush;
-  s->last_flush = flush;
-
-  /* Flush as much pending output as possible */
-  if (s->pending != 0) {
-    flush_pending(strm);
-    if (strm->avail_out == 0) {
-      /* Since avail_out is 0, deflate will be called again with
-       * more output space, but possibly with both pending and
-       * avail_in equal to zero. There won't be anything to do,
-       * but this is not an error situation so make sure we
-       * return OK instead of BUF_ERROR at next call of deflate:
-       */
-      s->last_flush = -1;
-      return Z_OK;
+    else if (windowBits > 15) {
+        wrap = 2;       /* write gzip wrapper instead */
+        windowBits -= 16;
     }
+#endif
+    if (memLevel < 1 || memLevel > MAX_MEM_LEVEL || method != Z_DEFLATED ||
+        windowBits < 8 || windowBits > 15 || level < 0 || level > 9 ||
+        strategy < 0 || strategy > Z_FIXED || (windowBits == 8 && wrap != 1)) {
+        return Z_STREAM_ERROR;
+    }
+    if (windowBits == 8) windowBits = 9;  /* until 256-byte window bug fixed */
+    s = (deflate_state *) ZALLOC(strm, 1, sizeof(deflate_state));
+    if (s == Z_NULL) return Z_MEM_ERROR;
+    strm->state = (struct internal_state FAR *)s;
+    s->strm = strm;
+    s->status = INIT_STATE;     /* to pass state test in deflateReset() */
+
+    s->wrap = wrap;
+    s->gzhead = Z_NULL;
+    s->w_bits = (uInt)windowBits;
+    s->w_size = 1 << s->w_bits;
+    s->w_mask = s->w_size - 1;
+
+    s->chromium_zlib_hash = 0;
+#if !defined(USE_ZLIB_RABIN_KARP_ROLLING_HASH)
+  #if defined(TARGET_CPU_WITH_CRC) && defined(CRC32_SIMD_SSE42_PCLMUL)
+    if (x86_cpu_enable_simd)
+      s->chromium_zlib_hash = 1;
+  #elif defined(TARGET_CPU_WITH_CRC) && defined(CRC32_ARMV8_CRC32)
+    if (arm_cpu_enable_crc32)
+      s->chromium_zlib_hash = 1;
+  #endif
+#endif
+
+    s->hash_bits = memLevel + 7;
+    if (s->chromium_zlib_hash && s->hash_bits < 15) {
+        s->hash_bits = 15;
+    }
+
+    s->hash_size = 1 << s->hash_bits;
+    s->hash_mask = s->hash_size - 1;
+    s->hash_shift =  ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH);
+
+    s->window = (Bytef *) ZALLOC(strm,
+                                 s->w_size + window_padding,
+                                 2*sizeof(Byte));
+    /* Avoid use of unitialized values in the window, see crbug.com/1137613 and
+     * crbug.com/1144420 */
+    zmemzero(s->window, (s->w_size + window_padding) * (2 * sizeof(Byte)));
+    s->prev   = (Posf *)  ZALLOC(strm, s->w_size, sizeof(Pos));
+    /* Avoid use of uninitialized value, see:
+     * https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=11360
+     */
+    zmemzero(s->prev, s->w_size * sizeof(Pos));
+    s->head   = (Posf *)  ZALLOC(strm, s->hash_size, sizeof(Pos));
+
+    s->high_water = 0;      /* nothing written to s->window yet */
+
+    s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */
+
+    /* We overlay pending_buf and sym_buf. This works since the average size
+     * for length/distance pairs over any compressed block is assured to be 31
+     * bits or less.
+     *
+     * Analysis: The longest fixed codes are a length code of 8 bits plus 5
+     * extra bits, for lengths 131 to 257. The longest fixed distance codes are
+     * 5 bits plus 13 extra bits, for distances 16385 to 32768. The longest
+     * possible fixed-codes length/distance pair is then 31 bits total.
+     *
+     * sym_buf starts one-fourth of the way into pending_buf. So there are
+     * three bytes in sym_buf for every four bytes in pending_buf. Each symbol
+     * in sym_buf is three bytes -- two for the distance and one for the
+     * literal/length. As each symbol is consumed, the pointer to the next
+     * sym_buf value to read moves forward three bytes. From that symbol, up to
+     * 31 bits are written to pending_buf. The closest the written pending_buf
+     * bits gets to the next sym_buf symbol to read is just before the last
+     * code is written. At that time, 31*(n-2) bits have been written, just
+     * after 24*(n-2) bits have been consumed from sym_buf. sym_buf starts at
+     * 8*n bits into pending_buf. (Note that the symbol buffer fills when n-1
+     * symbols are written.) The closest the writing gets to what is unread is
+     * then n+14 bits. Here n is lit_bufsize, which is 16384 by default, and
+     * can range from 128 to 32768.
+     *
+     * Therefore, at a minimum, there are 142 bits of space between what is
+     * written and what is read in the overlain buffers, so the symbols cannot
+     * be overwritten by the compressed data. That space is actually 139 bits,
+     * due to the three-bit fixed-code block header.
+     *
+     * That covers the case where either Z_FIXED is specified, forcing fixed
+     * codes, or when the use of fixed codes is chosen, because that choice
+     * results in a smaller compressed block than dynamic codes. That latter
+     * condition then assures that the above analysis also covers all dynamic
+     * blocks. A dynamic-code block will only be chosen to be emitted if it has
+     * fewer bits than a fixed-code block would for the same set of symbols.
+     * Therefore its average symbol length is assured to be less than 31. So
+     * the compressed data for a dynamic block also cannot overwrite the
+     * symbols from which it is being constructed.
+     */
+    s->pending_buf = (uchf *) ZALLOC(strm, s->lit_bufsize, 4);
+    s->pending_buf_size = (ulg)s->lit_bufsize * 4;
+
+    if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL ||
+        s->pending_buf == Z_NULL) {
+        s->status = FINISH_STATE;
+        strm->msg = ERR_MSG(Z_MEM_ERROR);
+        deflateEnd (strm);
+        return Z_MEM_ERROR;
+    }
+    s->sym_buf = s->pending_buf + s->lit_bufsize;
+    s->sym_end = (s->lit_bufsize - 1) * 3;
+    /* We avoid equality with lit_bufsize*3 because of wraparound at 64K
+     * on 16 bit machines and because stored blocks are restricted to
+     * 64K-1 bytes.
+     */
+
+    s->level = level;
+    s->strategy = strategy;
+    s->method = (Byte)method;
+
+    return deflateReset(strm);
+}
+
+/* =========================================================================
+ * Check for a valid deflate stream state. Return 0 if ok, 1 if not.
+ */
+local int deflateStateCheck (strm)
+    z_streamp strm;
+{
+    deflate_state *s;
+    if (strm == Z_NULL ||
+        strm->zalloc == (alloc_func)0 || strm->zfree == (free_func)0)
+        return 1;
+    s = strm->state;
+    if (s == Z_NULL || s->strm != strm || (s->status != INIT_STATE &&
+#ifdef GZIP
+                                           s->status != GZIP_STATE &&
+#endif
+                                           s->status != EXTRA_STATE &&
+                                           s->status != NAME_STATE &&
+                                           s->status != COMMENT_STATE &&
+                                           s->status != HCRC_STATE &&
+                                           s->status != BUSY_STATE &&
+                                           s->status != FINISH_STATE))
+        return 1;
+    return 0;
+}
+
+/* ========================================================================= */
+int ZEXPORT deflateSetDictionary (strm, dictionary, dictLength)
+    z_streamp strm;
+    const Bytef *dictionary;
+    uInt  dictLength;
+{
+    deflate_state *s;
+    uInt str, n;
+    int wrap;
+    unsigned avail;
+    z_const unsigned char *next;
+
+    if (deflateStateCheck(strm) || dictionary == Z_NULL)
+        return Z_STREAM_ERROR;
+    s = strm->state;
+    wrap = s->wrap;
+    if (wrap == 2 || (wrap == 1 && s->status != INIT_STATE) || s->lookahead)
+        return Z_STREAM_ERROR;
+
+    /* when using zlib wrappers, compute Adler-32 for provided dictionary */
+    if (wrap == 1)
+        strm->adler = adler32(strm->adler, dictionary, dictLength);
+    s->wrap = 0;                    /* avoid computing Adler-32 in read_buf */
+
+    /* if dictionary would fill window, just replace the history */
+    if (dictLength >= s->w_size) {
+        if (wrap == 0) {            /* already empty otherwise */
+            CLEAR_HASH(s);
+            s->strstart = 0;
+            s->block_start = 0L;
+            s->insert = 0;
+        }
+        dictionary += dictLength - s->w_size;  /* use the tail */
+        dictLength = s->w_size;
+    }
+
+    /* insert dictionary into window and hash */
+    avail = strm->avail_in;
+    next = strm->next_in;
+    strm->avail_in = dictLength;
+    strm->next_in = (z_const Bytef *)dictionary;
+    fill_window(s);
+    while (s->lookahead >= MIN_MATCH) {
+        str = s->strstart;
+        n = s->lookahead - (MIN_MATCH-1);
+        do {
+            insert_string(s, str);
+            str++;
+        } while (--n);
+        s->strstart = str;
+        s->lookahead = MIN_MATCH-1;
+        fill_window(s);
+    }
+    s->strstart += s->lookahead;
+    s->block_start = (long)s->strstart;
+    s->insert = s->lookahead;
+    s->lookahead = 0;
+    s->match_length = s->prev_length = MIN_MATCH-1;
+    s->match_available = 0;
+    strm->next_in = next;
+    strm->avail_in = avail;
+    s->wrap = wrap;
+    return Z_OK;
+}
+
+/* ========================================================================= */
+int ZEXPORT deflateGetDictionary (strm, dictionary, dictLength)
+    z_streamp strm;
+    Bytef *dictionary;
+    uInt  *dictLength;
+{
+    deflate_state *s;
+    uInt len;
+
+    if (deflateStateCheck(strm))
+        return Z_STREAM_ERROR;
+    s = strm->state;
+    len = s->strstart + s->lookahead;
+    if (len > s->w_size)
+        len = s->w_size;
+    if (dictionary != Z_NULL && len)
+        zmemcpy(dictionary, s->window + s->strstart + s->lookahead - len, len);
+    if (dictLength != Z_NULL)
+        *dictLength = len;
+    return Z_OK;
+}
+
+/* ========================================================================= */
+int ZEXPORT deflateResetKeep (strm)
+    z_streamp strm;
+{
+    deflate_state *s;
+
+    if (deflateStateCheck(strm)) {
+        return Z_STREAM_ERROR;
+    }
+
+    strm->total_in = strm->total_out = 0;
+    strm->msg = Z_NULL; /* use zfree if we ever allocate msg dynamically */
+    strm->data_type = Z_UNKNOWN;
+
+    s = (deflate_state *)strm->state;
+    s->pending = 0;
+    s->pending_out = s->pending_buf;
+
+    if (s->wrap < 0) {
+        s->wrap = -s->wrap; /* was made negative by deflate(..., Z_FINISH); */
+    }
+    s->status =
+#ifdef GZIP
+        s->wrap == 2 ? GZIP_STATE :
+#endif
+        INIT_STATE;
+    strm->adler =
+#ifdef GZIP
+        s->wrap == 2 ? crc32(0L, Z_NULL, 0) :
+#endif
+        adler32(0L, Z_NULL, 0);
+    s->last_flush = -2;
+
+    _tr_init(s);
+
+    return Z_OK;
+}
+
+/* ========================================================================= */
+int ZEXPORT deflateReset (strm)
+    z_streamp strm;
+{
+    int ret;
+
+    ret = deflateResetKeep(strm);
+    if (ret == Z_OK)
+        lm_init(strm->state);
+    return ret;
+}
+
+/* ========================================================================= */
+int ZEXPORT deflateSetHeader (strm, head)
+    z_streamp strm;
+    gz_headerp head;
+{
+    if (deflateStateCheck(strm) || strm->state->wrap != 2)
+        return Z_STREAM_ERROR;
+    strm->state->gzhead = head;
+    return Z_OK;
+}
+
+/* ========================================================================= */
+int ZEXPORT deflatePending (strm, pending, bits)
+    unsigned *pending;
+    int *bits;
+    z_streamp strm;
+{
+    if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
+    if (pending != Z_NULL)
+        *pending = strm->state->pending;
+    if (bits != Z_NULL)
+        *bits = strm->state->bi_valid;
+    return Z_OK;
+}
+
+/* ========================================================================= */
+int ZEXPORT deflatePrime (strm, bits, value)
+    z_streamp strm;
+    int bits;
+    int value;
+{
+    deflate_state *s;
+    int put;
+
+    if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
+    s = strm->state;
+    if (bits < 0 || bits > 16 ||
+        s->sym_buf < s->pending_out + ((Buf_size + 7) >> 3))
+        return Z_BUF_ERROR;
+    do {
+        put = Buf_size - s->bi_valid;
+        if (put > bits)
+            put = bits;
+        s->bi_buf |= (ush)((value & ((1 << put) - 1)) << s->bi_valid);
+        s->bi_valid += put;
+        _tr_flush_bits(s);
+        value >>= put;
+        bits -= put;
+    } while (bits);
+    return Z_OK;
+}
+
+/* ========================================================================= */
+int ZEXPORT deflateParams(strm, level, strategy)
+    z_streamp strm;
+    int level;
+    int strategy;
+{
+    deflate_state *s;
+    compress_func func;
+
+    if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
+    s = strm->state;
+
+#ifdef FASTEST
+    if (level != 0) level = 1;
+#else
+    if (level == Z_DEFAULT_COMPRESSION) level = 6;
+#endif
+    if (level < 0 || level > 9 || strategy < 0 || strategy > Z_FIXED) {
+        return Z_STREAM_ERROR;
+    }
+    func = configuration_table[s->level].func;
+
+    if ((strategy != s->strategy || func != configuration_table[level].func) &&
+        s->last_flush != -2) {
+        /* Flush the last buffer: */
+        int err = deflate(strm, Z_BLOCK);
+        if (err == Z_STREAM_ERROR)
+            return err;
+        if (strm->avail_in || (s->strstart - s->block_start) + s->lookahead)
+            return Z_BUF_ERROR;
+    }
+    if (s->level != level) {
+        if (s->level == 0 && s->matches != 0) {
+            if (s->matches == 1)
+                slide_hash(s);
+            else
+                CLEAR_HASH(s);
+            s->matches = 0;
+        }
+        s->level = level;
+        s->max_lazy_match   = configuration_table[level].max_lazy;
+        s->good_match       = configuration_table[level].good_length;
+        s->nice_match       = configuration_table[level].nice_length;
+        s->max_chain_length = configuration_table[level].max_chain;
+    }
+    s->strategy = strategy;
+    return Z_OK;
+}
+
+/* ========================================================================= */
+int ZEXPORT deflateTune(strm, good_length, max_lazy, nice_length, max_chain)
+    z_streamp strm;
+    int good_length;
+    int max_lazy;
+    int nice_length;
+    int max_chain;
+{
+    deflate_state *s;
+
+    if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
+    s = strm->state;
+    s->good_match = (uInt)good_length;
+    s->max_lazy_match = (uInt)max_lazy;
+    s->nice_match = nice_length;
+    s->max_chain_length = (uInt)max_chain;
+    return Z_OK;
+}
+
+/* =========================================================================
+ * For the default windowBits of 15 and memLevel of 8, this function returns
+ * a close to exact, as well as small, upper bound on the compressed size.
+ * They are coded as constants here for a reason--if the #define's are
+ * changed, then this function needs to be changed as well.  The return
+ * value for 15 and 8 only works for those exact settings.
+ *
+ * For any setting other than those defaults for windowBits and memLevel,
+ * the value returned is a conservative worst case for the maximum expansion
+ * resulting from using fixed blocks instead of stored blocks, which deflate
+ * can emit on compressed data for some combinations of the parameters.
+ *
+ * This function could be more sophisticated to provide closer upper bounds for
+ * every combination of windowBits and memLevel.  But even the conservative
+ * upper bound of about 14% expansion does not seem onerous for output buffer
+ * allocation.
+ */
+uLong ZEXPORT deflateBound(strm, sourceLen)
+    z_streamp strm;
+    uLong sourceLen;
+{
+    deflate_state *s;
+    uLong complen, wraplen;
+
+    /* conservative upper bound for compressed data */
+    complen = sourceLen +
+              ((sourceLen + 7) >> 3) + ((sourceLen + 63) >> 6) + 5;
+
+    /* if can't get parameters, return conservative bound plus zlib wrapper */
+    if (deflateStateCheck(strm))
+        return complen + 6;
+
+    /* compute wrapper length */
+    s = strm->state;
+    switch (s->wrap) {
+    case 0:                                 /* raw deflate */
+        wraplen = 0;
+        break;
+    case 1:                                 /* zlib wrapper */
+        wraplen = 6 + (s->strstart ? 4 : 0);
+        break;
+#ifdef GZIP
+    case 2:                                 /* gzip wrapper */
+        wraplen = 18;
+        if (s->gzhead != Z_NULL) {          /* user-supplied gzip header */
+            Bytef *str;
+            if (s->gzhead->extra != Z_NULL)
+                wraplen += 2 + s->gzhead->extra_len;
+            str = s->gzhead->name;
+            if (str != Z_NULL)
+                do {
+                    wraplen++;
+                } while (*str++);
+            str = s->gzhead->comment;
+            if (str != Z_NULL)
+                do {
+                    wraplen++;
+                } while (*str++);
+            if (s->gzhead->hcrc)
+                wraplen += 2;
+        }
+        break;
+#endif
+    default:                                /* for compiler happiness */
+        wraplen = 6;
+    }
+
+    /* if not default parameters, return conservative bound */
+    if (s->w_bits != 15 || s->hash_bits != 8 + 7)
+        return complen + wraplen;
+
+    /* default settings: return tight bound for that case */
+    return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) +
+           (sourceLen >> 25) + 13 - 6 + wraplen;
+}
+
+/* =========================================================================
+ * Put a short in the pending buffer. The 16-bit value is put in MSB order.
+ * IN assertion: the stream state is correct and there is enough room in
+ * pending_buf.
+ */
+local void putShortMSB (s, b)
+    deflate_state *s;
+    uInt b;
+{
+    put_byte(s, (Byte)(b >> 8));
+    put_byte(s, (Byte)(b & 0xff));
+}
+
+/* =========================================================================
+ * Flush as much pending output as possible. All deflate() output, except for
+ * some deflate_stored() output, goes through this function so some
+ * applications may wish to modify it to avoid allocating a large
+ * strm->next_out buffer and copying into it. (See also read_buf()).
+ */
+local void flush_pending(strm)
+    z_streamp strm;
+{
+    unsigned len;
+    deflate_state *s = strm->state;
+
+    _tr_flush_bits(s);
+    len = s->pending;
+    if (len > strm->avail_out) len = strm->avail_out;
+    if (len == 0) return;
+
+    zmemcpy(strm->next_out, s->pending_out, len);
+    strm->next_out  += len;
+    s->pending_out  += len;
+    strm->total_out += len;
+    strm->avail_out -= len;
+    s->pending      -= len;
+    if (s->pending == 0) {
+        s->pending_out = s->pending_buf;
+    }
+}
+
+/* ===========================================================================
+ * Update the header CRC with the bytes s->pending_buf[beg..s->pending - 1].
+ */
+#define HCRC_UPDATE(beg) \
+    do { \
+        if (s->gzhead->hcrc && s->pending > (beg)) \
+            strm->adler = crc32(strm->adler, s->pending_buf + (beg), \
+                                s->pending - (beg)); \
+    } while (0)
+
+/* ========================================================================= */
+int ZEXPORT deflate (strm, flush)
+    z_streamp strm;
+    int flush;
+{
+    int old_flush; /* value of flush param for previous deflate call */
+    deflate_state *s;
+
+    if (deflateStateCheck(strm) || flush > Z_BLOCK || flush < 0) {
+        return Z_STREAM_ERROR;
+    }
+    s = strm->state;
+
+    if (strm->next_out == Z_NULL ||
+        (strm->avail_in != 0 && strm->next_in == Z_NULL) ||
+        (s->status == FINISH_STATE && flush != Z_FINISH)) {
+        ERR_RETURN(strm, Z_STREAM_ERROR);
+    }
+    if (strm->avail_out == 0) ERR_RETURN(strm, Z_BUF_ERROR);
+
+    old_flush = s->last_flush;
+    s->last_flush = flush;
+
+    /* Flush as much pending output as possible */
+    if (s->pending != 0) {
+        flush_pending(strm);
+        if (strm->avail_out == 0) {
+            /* Since avail_out is 0, deflate will be called again with
+             * more output space, but possibly with both pending and
+             * avail_in equal to zero. There won't be anything to do,
+             * but this is not an error situation so make sure we
+             * return OK instead of BUF_ERROR at next call of deflate:
+             */
+            s->last_flush = -1;
+            return Z_OK;
+        }
 
     /* Make sure there is something to do and avoid duplicate consecutive
      * flushes. For repeated and useless calls with Z_FINISH, we keep
      * returning Z_STREAM_END instead of Z_BUF_ERROR.
      */
-  } else if (strm->avail_in == 0 && RANK(flush) <= RANK(old_flush) &&
-             flush != Z_FINISH) {
-    ERR_RETURN(strm, Z_BUF_ERROR);
-  }
-
-  /* User must not provide more input after the first FINISH: */
-  if (s->status == FINISH_STATE && strm->avail_in != 0) {
-    ERR_RETURN(strm, Z_BUF_ERROR);
-  }
-
-  /* Write the header */
-  if (s->status == INIT_STATE) {
-    /* zlib header */
-    uInt header = (Z_DEFLATED + ((s->w_bits - 8) << 4)) << 8;
-    uInt level_flags;
-
-    if (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2)
-      level_flags = 0;
-    else if (s->level < 6)
-      level_flags = 1;
-    else if (s->level == 6)
-      level_flags = 2;
-    else
-      level_flags = 3;
-    header |= (level_flags << 6);
-    if (s->strstart != 0) header |= PRESET_DICT;
-    header += 31 - (header % 31);
-
-    putShortMSB(s, header);
-
-    /* Save the adler32 of the preset dictionary: */
-    if (s->strstart != 0) {
-      putShortMSB(s, (uInt)(strm->adler >> 16));
-      putShortMSB(s, (uInt)(strm->adler & 0xffff));
+    } else if (strm->avail_in == 0 && RANK(flush) <= RANK(old_flush) &&
+               flush != Z_FINISH) {
+        ERR_RETURN(strm, Z_BUF_ERROR);
     }
-    strm->adler = adler32(0L, Z_NULL, 0);
-    s->status = BUSY_STATE;
 
-    /* Compression must start with an empty pending buffer */
-    flush_pending(strm);
-    if (s->pending != 0) {
-      s->last_flush = -1;
-      return Z_OK;
+    /* User must not provide more input after the first FINISH: */
+    if (s->status == FINISH_STATE && strm->avail_in != 0) {
+        ERR_RETURN(strm, Z_BUF_ERROR);
+    }
+
+    /* Write the header */
+    if (s->status == INIT_STATE && s->wrap == 0)
+        s->status = BUSY_STATE;
+    if (s->status == INIT_STATE) {
+        /* zlib header */
+        uInt header = (Z_DEFLATED + ((s->w_bits-8)<<4)) << 8;
+        uInt level_flags;
+
+        if (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2)
+            level_flags = 0;
+        else if (s->level < 6)
+            level_flags = 1;
+        else if (s->level == 6)
+            level_flags = 2;
+        else
+            level_flags = 3;
+        header |= (level_flags << 6);
+        if (s->strstart != 0) header |= PRESET_DICT;
+        header += 31 - (header % 31);
+
+        putShortMSB(s, header);
+
+        /* Save the adler32 of the preset dictionary: */
+        if (s->strstart != 0) {
+            putShortMSB(s, (uInt)(strm->adler >> 16));
+            putShortMSB(s, (uInt)(strm->adler & 0xffff));
+        }
+        strm->adler = adler32(0L, Z_NULL, 0);
+        s->status = BUSY_STATE;
+
+        /* Compression must start with an empty pending buffer */
+        flush_pending(strm);
+        if (s->pending != 0) {
+            s->last_flush = -1;
+            return Z_OK;
+        }
     }
-  }
 #ifdef GZIP
-  if (s->status == GZIP_STATE) {
-    /* gzip header */
-    crc_reset(s);
-    put_byte(s, 31);
-    put_byte(s, 139);
-    put_byte(s, 8);
-    if (s->gzhead == Z_NULL) {
-      put_byte(s, 0);
-      put_byte(s, 0);
-      put_byte(s, 0);
-      put_byte(s, 0);
-      put_byte(s, 0);
-      put_byte(s,
-               s->level == 9
-                   ? 2
-                   : (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2 ? 4 : 0));
-      put_byte(s, OS_CODE);
-      s->status = BUSY_STATE;
+    if (s->status == GZIP_STATE) {
+        /* gzip header */
+        crc_reset(s);
+        put_byte(s, 31);
+        put_byte(s, 139);
+        put_byte(s, 8);
+        if (s->gzhead == Z_NULL) {
+            put_byte(s, 0);
+            put_byte(s, 0);
+            put_byte(s, 0);
+            put_byte(s, 0);
+            put_byte(s, 0);
+            put_byte(s, s->level == 9 ? 2 :
+                     (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2 ?
+                      4 : 0));
+            put_byte(s, OS_CODE);
+            s->status = BUSY_STATE;
 
-      /* Compression must start with an empty pending buffer */
-      flush_pending(strm);
-      if (s->pending != 0) {
-        s->last_flush = -1;
-        return Z_OK;
-      }
-    } else {
-      put_byte(s, (s->gzhead->text ? 1 : 0) + (s->gzhead->hcrc ? 2 : 0) +
-                      (s->gzhead->extra == Z_NULL ? 0 : 4) +
-                      (s->gzhead->name == Z_NULL ? 0 : 8) +
-                      (s->gzhead->comment == Z_NULL ? 0 : 16));
-      put_byte(s, (Byte)(s->gzhead->time & 0xff));
-      put_byte(s, (Byte)((s->gzhead->time >> 8) & 0xff));
-      put_byte(s, (Byte)((s->gzhead->time >> 16) & 0xff));
-      put_byte(s, (Byte)((s->gzhead->time >> 24) & 0xff));
-      put_byte(s,
-               s->level == 9
-                   ? 2
-                   : (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2 ? 4 : 0));
-      put_byte(s, s->gzhead->os & 0xff);
-      if (s->gzhead->extra != Z_NULL) {
-        put_byte(s, s->gzhead->extra_len & 0xff);
-        put_byte(s, (s->gzhead->extra_len >> 8) & 0xff);
-      }
-      if (s->gzhead->hcrc)
-        strm->adler = crc32(strm->adler, s->pending_buf, s->pending);
-      s->gzindex = 0;
-      s->status = EXTRA_STATE;
+            /* Compression must start with an empty pending buffer */
+            flush_pending(strm);
+            if (s->pending != 0) {
+                s->last_flush = -1;
+                return Z_OK;
+            }
+        }
+        else {
+            put_byte(s, (s->gzhead->text ? 1 : 0) +
+                     (s->gzhead->hcrc ? 2 : 0) +
+                     (s->gzhead->extra == Z_NULL ? 0 : 4) +
+                     (s->gzhead->name == Z_NULL ? 0 : 8) +
+                     (s->gzhead->comment == Z_NULL ? 0 : 16)
+                     );
+            put_byte(s, (Byte)(s->gzhead->time & 0xff));
+            put_byte(s, (Byte)((s->gzhead->time >> 8) & 0xff));
+            put_byte(s, (Byte)((s->gzhead->time >> 16) & 0xff));
+            put_byte(s, (Byte)((s->gzhead->time >> 24) & 0xff));
+            put_byte(s, s->level == 9 ? 2 :
+                     (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2 ?
+                      4 : 0));
+            put_byte(s, s->gzhead->os & 0xff);
+            if (s->gzhead->extra != Z_NULL) {
+                put_byte(s, s->gzhead->extra_len & 0xff);
+                put_byte(s, (s->gzhead->extra_len >> 8) & 0xff);
+            }
+            if (s->gzhead->hcrc)
+                strm->adler = crc32(strm->adler, s->pending_buf,
+                                    s->pending);
+            s->gzindex = 0;
+            s->status = EXTRA_STATE;
+        }
     }
-  }
-  if (s->status == EXTRA_STATE) {
-    if (s->gzhead->extra != Z_NULL) {
-      uint64_t beg = s->pending; /* start of bytes to update crc */
-      uInt left = (s->gzhead->extra_len & 0xffff) - s->gzindex;
-      while (s->pending + left > s->pending_buf_size) {
-        uInt copy = s->pending_buf_size - s->pending;
-        memcpy(s->pending_buf + s->pending, s->gzhead->extra + s->gzindex,
-               copy);
-        s->pending = s->pending_buf_size;
-        HCRC_UPDATE(beg);
-        s->gzindex += copy;
+    if (s->status == EXTRA_STATE) {
+        if (s->gzhead->extra != Z_NULL) {
+            ulg beg = s->pending;   /* start of bytes to update crc */
+            uInt left = (s->gzhead->extra_len & 0xffff) - s->gzindex;
+            while (s->pending + left > s->pending_buf_size) {
+                uInt copy = s->pending_buf_size - s->pending;
+                zmemcpy(s->pending_buf + s->pending,
+                        s->gzhead->extra + s->gzindex, copy);
+                s->pending = s->pending_buf_size;
+                HCRC_UPDATE(beg);
+                s->gzindex += copy;
+                flush_pending(strm);
+                if (s->pending != 0) {
+                    s->last_flush = -1;
+                    return Z_OK;
+                }
+                beg = 0;
+                left -= copy;
+            }
+            zmemcpy(s->pending_buf + s->pending,
+                    s->gzhead->extra + s->gzindex, left);
+            s->pending += left;
+            HCRC_UPDATE(beg);
+            s->gzindex = 0;
+        }
+        s->status = NAME_STATE;
+    }
+    if (s->status == NAME_STATE) {
+        if (s->gzhead->name != Z_NULL) {
+            ulg beg = s->pending;   /* start of bytes to update crc */
+            int val;
+            do {
+                if (s->pending == s->pending_buf_size) {
+                    HCRC_UPDATE(beg);
+                    flush_pending(strm);
+                    if (s->pending != 0) {
+                        s->last_flush = -1;
+                        return Z_OK;
+                    }
+                    beg = 0;
+                }
+                val = s->gzhead->name[s->gzindex++];
+                put_byte(s, val);
+            } while (val != 0);
+            HCRC_UPDATE(beg);
+            s->gzindex = 0;
+        }
+        s->status = COMMENT_STATE;
+    }
+    if (s->status == COMMENT_STATE) {
+        if (s->gzhead->comment != Z_NULL) {
+            ulg beg = s->pending;   /* start of bytes to update crc */
+            int val;
+            do {
+                if (s->pending == s->pending_buf_size) {
+                    HCRC_UPDATE(beg);
+                    flush_pending(strm);
+                    if (s->pending != 0) {
+                        s->last_flush = -1;
+                        return Z_OK;
+                    }
+                    beg = 0;
+                }
+                val = s->gzhead->comment[s->gzindex++];
+                put_byte(s, val);
+            } while (val != 0);
+            HCRC_UPDATE(beg);
+        }
+        s->status = HCRC_STATE;
+    }
+    if (s->status == HCRC_STATE) {
+        if (s->gzhead->hcrc) {
+            if (s->pending + 2 > s->pending_buf_size) {
+                flush_pending(strm);
+                if (s->pending != 0) {
+                    s->last_flush = -1;
+                    return Z_OK;
+                }
+            }
+            put_byte(s, (Byte)(strm->adler & 0xff));
+            put_byte(s, (Byte)((strm->adler >> 8) & 0xff));
+            strm->adler = crc32(0L, Z_NULL, 0);
+        }
+        s->status = BUSY_STATE;
+
+        /* Compression must start with an empty pending buffer */
         flush_pending(strm);
         if (s->pending != 0) {
-          s->last_flush = -1;
-          return Z_OK;
-        }
-        beg = 0;
-        left -= copy;
-      }
-      memcpy(s->pending_buf + s->pending, s->gzhead->extra + s->gzindex, left);
-      s->pending += left;
-      HCRC_UPDATE(beg);
-      s->gzindex = 0;
-    }
-    s->status = NAME_STATE;
-  }
-  if (s->status == NAME_STATE) {
-    if (s->gzhead->name != Z_NULL) {
-      uint64_t beg = s->pending; /* start of bytes to update crc */
-      int val;
-      do {
-        if (s->pending == s->pending_buf_size) {
-          HCRC_UPDATE(beg);
-          flush_pending(strm);
-          if (s->pending != 0) {
             s->last_flush = -1;
             return Z_OK;
-          }
-          beg = 0;
         }
-        val = s->gzhead->name[s->gzindex++];
-        put_byte(s, val);
-      } while (val != 0);
-      HCRC_UPDATE(beg);
-      s->gzindex = 0;
     }
-    s->status = COMMENT_STATE;
-  }
-  if (s->status == COMMENT_STATE) {
-    if (s->gzhead->comment != Z_NULL) {
-      uint64_t beg = s->pending; /* start of bytes to update crc */
-      int val;
-      do {
-        if (s->pending == s->pending_buf_size) {
-          HCRC_UPDATE(beg);
-          flush_pending(strm);
-          if (s->pending != 0) {
-            s->last_flush = -1;
-            return Z_OK;
-          }
-          beg = 0;
-        }
-        val = s->gzhead->comment[s->gzindex++];
-        put_byte(s, val);
-      } while (val != 0);
-      HCRC_UPDATE(beg);
-    }
-    s->status = HCRC_STATE;
-  }
-  if (s->status == HCRC_STATE) {
-    if (s->gzhead->hcrc) {
-      if (s->pending + 2 > s->pending_buf_size) {
-        flush_pending(strm);
-        if (s->pending != 0) {
-          s->last_flush = -1;
-          return Z_OK;
-        }
-      }
-      put_byte(s, (Byte)(strm->adler & 0xff));
-      put_byte(s, (Byte)((strm->adler >> 8) & 0xff));
-      strm->adler = crc32(0L, Z_NULL, 0);
-    }
-    s->status = BUSY_STATE;
-
-    /* Compression must start with an empty pending buffer */
-    flush_pending(strm);
-    if (s->pending != 0) {
-      s->last_flush = -1;
-      return Z_OK;
-    }
-  }
 #endif
 
-  /* Start a new block or continue the current one.
-   */
-  if (strm->avail_in != 0 || s->lookahead != 0 ||
-      (flush != Z_NO_FLUSH && s->status != FINISH_STATE)) {
-    block_state bstate;
+    /* Start a new block or continue the current one.
+     */
+    if (strm->avail_in != 0 || s->lookahead != 0 ||
+        (flush != Z_NO_FLUSH && s->status != FINISH_STATE)) {
+        block_state bstate;
 
-    bstate = s->level == 0                   ? deflate_stored(s, flush)
-             : s->strategy == Z_HUFFMAN_ONLY ? deflate_huff(s, flush)
-             : s->strategy == Z_RLE
-                 ? deflate_rle(s, flush)
-                 : (*(configuration_table[s->level].func))(s, flush);
+        bstate = s->level == 0 ? deflate_stored(s, flush) :
+                 s->strategy == Z_HUFFMAN_ONLY ? deflate_huff(s, flush) :
+                 s->strategy == Z_RLE ? deflate_rle(s, flush) :
+                 (*(configuration_table[s->level].func))(s, flush);
 
-    if (bstate == finish_started || bstate == finish_done) {
-      s->status = FINISH_STATE;
-    }
-    if (bstate == need_more || bstate == finish_started) {
-      if (strm->avail_out == 0) {
-        s->last_flush = -1; /* avoid BUF_ERROR next call, see above */
-      }
-      return Z_OK;
-      /* If flush != Z_NO_FLUSH && avail_out == 0, the next call
-       * of deflate should use the same flush parameter to make sure
-       * that the flush is complete. So we don't have to output an
-       * empty block here, this will be done at next call. This also
-       * ensures that for a very small output buffer, we emit at most
-       * one empty block.
-       */
-    }
-    if (bstate == block_done) {
-      if (flush == Z_PARTIAL_FLUSH) {
-        _tr_align(s);
-      } else if (flush != Z_BLOCK) { /* FULL_FLUSH or SYNC_FLUSH */
-        _tr_stored_block(s, (char *)0, 0L, 0);
-        /* For a full flush, this empty block will be recognized
-         * as a special marker by inflate_sync().
-         */
-        if (flush == Z_FULL_FLUSH) {
-          CLEAR_HASH(s); /* forget history */
-          if (s->lookahead == 0) {
-            s->strstart = 0;
-            s->block_start = 0L;
-            s->insert = 0;
-          }
+        if (bstate == finish_started || bstate == finish_done) {
+            s->status = FINISH_STATE;
+        }
+        if (bstate == need_more || bstate == finish_started) {
+            if (strm->avail_out == 0) {
+                s->last_flush = -1; /* avoid BUF_ERROR next call, see above */
+            }
+            return Z_OK;
+            /* If flush != Z_NO_FLUSH && avail_out == 0, the next call
+             * of deflate should use the same flush parameter to make sure
+             * that the flush is complete. So we don't have to output an
+             * empty block here, this will be done at next call. This also
+             * ensures that for a very small output buffer, we emit at most
+             * one empty block.
+             */
+        }
+        if (bstate == block_done) {
+            if (flush == Z_PARTIAL_FLUSH) {
+                _tr_align(s);
+            } else if (flush != Z_BLOCK) { /* FULL_FLUSH or SYNC_FLUSH */
+                _tr_stored_block(s, (char*)0, 0L, 0);
+                /* For a full flush, this empty block will be recognized
+                 * as a special marker by inflate_sync().
+                 */
+                if (flush == Z_FULL_FLUSH) {
+                    CLEAR_HASH(s);             /* forget history */
+                    if (s->lookahead == 0) {
+                        s->strstart = 0;
+                        s->block_start = 0L;
+                        s->insert = 0;
+                    }
+                }
+            }
+            flush_pending(strm);
+            if (strm->avail_out == 0) {
+              s->last_flush = -1; /* avoid BUF_ERROR at next call, see above */
+              return Z_OK;
+            }
         }
-      }
-      flush_pending(strm);
-      if (strm->avail_out == 0) {
-        s->last_flush = -1; /* avoid BUF_ERROR at next call, see above */
-        return Z_OK;
-      }
     }
-  }
 
-  if (flush != Z_FINISH) return Z_OK;
-  if (s->wrap <= 0) return Z_STREAM_END;
+    if (flush != Z_FINISH) return Z_OK;
+    if (s->wrap <= 0) return Z_STREAM_END;
 
     /* Write the trailer */
 #ifdef GZIP
-  if (s->wrap == 2) {
-    crc_finalize(s);
-    put_byte(s, (Byte)(strm->adler & 0xff));
-    put_byte(s, (Byte)((strm->adler >> 8) & 0xff));
-    put_byte(s, (Byte)((strm->adler >> 16) & 0xff));
-    put_byte(s, (Byte)((strm->adler >> 24) & 0xff));
-    put_byte(s, (Byte)(strm->total_in & 0xff));
-    put_byte(s, (Byte)((strm->total_in >> 8) & 0xff));
-    put_byte(s, (Byte)((strm->total_in >> 16) & 0xff));
-    put_byte(s, (Byte)((strm->total_in >> 24) & 0xff));
-  } else
+    if (s->wrap == 2) {
+        crc_finalize(s);
+        put_byte(s, (Byte)(strm->adler & 0xff));
+        put_byte(s, (Byte)((strm->adler >> 8) & 0xff));
+        put_byte(s, (Byte)((strm->adler >> 16) & 0xff));
+        put_byte(s, (Byte)((strm->adler >> 24) & 0xff));
+        put_byte(s, (Byte)(strm->total_in & 0xff));
+        put_byte(s, (Byte)((strm->total_in >> 8) & 0xff));
+        put_byte(s, (Byte)((strm->total_in >> 16) & 0xff));
+        put_byte(s, (Byte)((strm->total_in >> 24) & 0xff));
+    }
+    else
 #endif
-  {
-    putShortMSB(s, (uInt)(strm->adler >> 16));
-    putShortMSB(s, (uInt)(strm->adler & 0xffff));
-  }
-  flush_pending(strm);
-  /* If avail_out is zero, the application will call deflate again
-   * to flush the rest.
-   */
-  if (s->wrap > 0) s->wrap = -s->wrap; /* write the trailer only once! */
-  return s->pending != 0 ? Z_OK : Z_STREAM_END;
+    {
+        putShortMSB(s, (uInt)(strm->adler >> 16));
+        putShortMSB(s, (uInt)(strm->adler & 0xffff));
+    }
+    flush_pending(strm);
+    /* If avail_out is zero, the application will call deflate again
+     * to flush the rest.
+     */
+    if (s->wrap > 0) s->wrap = -s->wrap; /* write the trailer only once! */
+    return s->pending != 0 ? Z_OK : Z_STREAM_END;
 }
 
-int deflateEnd(z_streamp strm) {
-  int status;
-  if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
-  status = strm->state->status;
-  /* Deallocate in reverse order of allocations: */
-  TRY_FREE(strm, strm->state->pending_buf);
-  TRY_FREE(strm, strm->state->head);
-  TRY_FREE(strm, strm->state->prev);
-  TRY_FREE(strm, strm->state->window);
-  ZFREE(strm, strm->state);
-  strm->state = Z_NULL;
-  return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK;
+/* ========================================================================= */
+int ZEXPORT deflateEnd (strm)
+    z_streamp strm;
+{
+    int status;
+
+    if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
+
+    status = strm->state->status;
+
+    /* Deallocate in reverse order of allocations: */
+    TRY_FREE(strm, strm->state->pending_buf);
+    TRY_FREE(strm, strm->state->head);
+    TRY_FREE(strm, strm->state->prev);
+    TRY_FREE(strm, strm->state->window);
+
+    ZFREE(strm, strm->state);
+    strm->state = Z_NULL;
+
+    return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK;
 }
 
-/**
+/* =========================================================================
  * Copy the source state to the destination state.
  * To simplify the source, this is not supported for 16-bit MSDOS (which
  * doesn't have enough memory anyway to duplicate compression states).
  */
-int deflateCopy(z_streamp dest, z_streamp source) {
+int ZEXPORT deflateCopy (dest, source)
+    z_streamp dest;
+    z_streamp source;
+{
 #ifdef MAXSEG_64K
-  return Z_STREAM_ERROR;
-#else
-  struct DeflateState *ds;
-  struct DeflateState *ss;
-
-  if (deflateStateCheck(source) || dest == Z_NULL) {
     return Z_STREAM_ERROR;
-  }
+#else
+    deflate_state *ds;
+    deflate_state *ss;
 
-  ss = source->state;
 
-  memcpy((voidpf)dest, (voidpf)source, sizeof(z_stream));
+    if (deflateStateCheck(source) || dest == Z_NULL) {
+        return Z_STREAM_ERROR;
+    }
 
-  ds = (struct DeflateState *)ZALLOC(dest, 1, sizeof(struct DeflateState));
-  if (ds == Z_NULL) return Z_MEM_ERROR;
-  dest->state = (struct DeflateState *)ds;
-  memcpy((voidpf)ds, (voidpf)ss, sizeof(struct DeflateState));
-  ds->strm = dest;
+    ss = source->state;
 
-  ds->window = (Bytef *)ZALLOC(dest, ds->w_size, 2 * sizeof(Byte));
-  ds->prev = (Posf *)ZALLOC(dest, ds->w_size, sizeof(Pos));
-  ds->head = (Posf *)ZALLOC(dest, ds->hash_size, sizeof(Pos));
-  ds->pending_buf = (uint8_t *)ZALLOC(dest, ds->lit_bufsize, 4);
+    zmemcpy((voidpf)dest, (voidpf)source, sizeof(z_stream));
 
-  if (ds->window == Z_NULL || ds->prev == Z_NULL || ds->head == Z_NULL ||
-      ds->pending_buf == Z_NULL) {
-    deflateEnd(dest);
-    return Z_MEM_ERROR;
-  }
-  /* following memcpy do not work for 16-bit MSDOS */
-  memcpy(ds->window, ss->window, ds->w_size * 2 * sizeof(Byte));
-  memcpy((voidpf)ds->prev, (voidpf)ss->prev, ds->w_size * sizeof(Pos));
-  memcpy((voidpf)ds->head, (voidpf)ss->head, ds->hash_size * sizeof(Pos));
-  memcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size);
+    ds = (deflate_state *) ZALLOC(dest, 1, sizeof(deflate_state));
+    if (ds == Z_NULL) return Z_MEM_ERROR;
+    dest->state = (struct internal_state FAR *) ds;
+    zmemcpy((voidpf)ds, (voidpf)ss, sizeof(deflate_state));
+    ds->strm = dest;
 
-  ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf);
-  ds->sym_buf = ds->pending_buf + ds->lit_bufsize;
+    ds->window = (Bytef *) ZALLOC(dest, ds->w_size, 2*sizeof(Byte));
+    ds->prev   = (Posf *)  ZALLOC(dest, ds->w_size, sizeof(Pos));
+    ds->head   = (Posf *)  ZALLOC(dest, ds->hash_size, sizeof(Pos));
+    ds->pending_buf = (uchf *) ZALLOC(dest, ds->lit_bufsize, 4);
 
-  ds->l_desc.dyn_tree = ds->dyn_ltree;
-  ds->d_desc.dyn_tree = ds->dyn_dtree;
-  ds->bl_desc.dyn_tree = ds->bl_tree;
+    if (ds->window == Z_NULL || ds->prev == Z_NULL || ds->head == Z_NULL ||
+        ds->pending_buf == Z_NULL) {
+        deflateEnd (dest);
+        return Z_MEM_ERROR;
+    }
+    /* following zmemcpy do not work for 16-bit MSDOS */
+    zmemcpy(ds->window, ss->window, ds->w_size * 2 * sizeof(Byte));
+    zmemcpy((voidpf)ds->prev, (voidpf)ss->prev, ds->w_size * sizeof(Pos));
+    zmemcpy((voidpf)ds->head, (voidpf)ss->head, ds->hash_size * sizeof(Pos));
+    zmemcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size);
 
-  return Z_OK;
+    ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf);
+    ds->sym_buf = ds->pending_buf + ds->lit_bufsize;
+
+    ds->l_desc.dyn_tree = ds->dyn_ltree;
+    ds->d_desc.dyn_tree = ds->dyn_dtree;
+    ds->bl_desc.dyn_tree = ds->bl_tree;
+
+    return Z_OK;
 #endif /* MAXSEG_64K */
 }
 
-/**
+/* ===========================================================================
  * Read a new buffer from the current input stream, update the adler32
- * and total number of bytes read. All deflate() input goes through this
- * function so some applications may wish to modify it to avoid
- * allocating a large strm->next_in buffer and copying from it. (See
- * also flush_pending()).
+ * and total number of bytes read.  All deflate() input goes through
+ * this function so some applications may wish to modify it to avoid
+ * allocating a large strm->next_in buffer and copying from it.
+ * (See also flush_pending()).
  */
-hidden unsigned deflate_read_buf(z_streamp strm, Bytef *buf, unsigned size) {
-  unsigned len = strm->avail_in;
-  if (len > size) len = size;
-  if (len == 0) return 0;
-  strm->avail_in -= len;
+local unsigned read_buf(strm, buf, size)
+    z_streamp strm;
+    Bytef *buf;
+    unsigned size;
+{
+    unsigned len = strm->avail_in;
+
+    if (len > size) len = size;
+    if (len == 0) return 0;
+
+    strm->avail_in  -= len;
+
 #ifdef GZIP
-  if (strm->state->wrap == 2)
-    copy_with_crc(strm, buf, len);
-  else
+    if (strm->state->wrap == 2)
+        copy_with_crc(strm, buf, len);
+    else
 #endif
-  {
-    memcpy(buf, strm->next_in, len);
-    if (strm->state->wrap == 1) strm->adler = adler32(strm->adler, buf, len);
-  }
-  strm->next_in += len;
-  strm->total_in += len;
-  return len;
+    {
+        zmemcpy(buf, strm->next_in, len);
+        if (strm->state->wrap == 1)
+            strm->adler = adler32(strm->adler, buf, len);
+    }
+    strm->next_in  += len;
+    strm->total_in += len;
+
+    return len;
 }
 
-/**
+/* ===========================================================================
  * Initialize the "longest match" routines for a new zlib stream
  */
-static void lm_init(s) struct DeflateState *s;
+local void lm_init (s)
+    deflate_state *s;
 {
-  s->window_size = (uint64_t)2L * s->w_size;
+    s->window_size = (ulg)2L*s->w_size;
 
-  CLEAR_HASH(s);
+    CLEAR_HASH(s);
 
-  /* Set the default configuration parameters:
-   */
-  s->max_lazy_match = configuration_table[s->level].max_lazy;
-  s->good_match = configuration_table[s->level].good_length;
-  s->nice_match = configuration_table[s->level].nice_length;
-  s->max_chain_length = configuration_table[s->level].max_chain;
+    /* Set the default configuration parameters:
+     */
+    s->max_lazy_match   = configuration_table[s->level].max_lazy;
+    s->good_match       = configuration_table[s->level].good_length;
+    s->nice_match       = configuration_table[s->level].nice_length;
+    s->max_chain_length = configuration_table[s->level].max_chain;
 
-  s->strstart = 0;
-  s->block_start = 0L;
-  s->lookahead = 0;
-  s->insert = 0;
-  s->match_length = s->prev_length = MIN_MATCH - 1;
-  s->match_available = 0;
-  s->ins_h = 0;
+    s->strstart = 0;
+    s->block_start = 0L;
+    s->lookahead = 0;
+    s->insert = 0;
+    s->match_length = s->prev_length = MIN_MATCH-1;
+    s->match_available = 0;
+    s->ins_h = 0;
 #ifndef FASTEST
 #ifdef ASMV
-  match_init(); /* initialize the asm code */
+    match_init(); /* initialize the asm code */
 #endif
 #endif
 }
 
 #ifndef FASTEST
-/**
+/* ===========================================================================
  * Set match_start to the longest match starting at the given string and
  * return its length. Matches shorter or equal to prev_length are discarded,
  * in which case the result is equal to prev_length and match_start is
@@ -1180,113 +1285,206 @@ static void lm_init(s) struct DeflateState *s;
 /* For 80x86 and 680x0, an optimized version will be provided in match.asm or
  * match.S. The code will be functionally equivalent.
  */
-static uInt longest_match(struct DeflateState *s, IPos cur_match) {
-  unsigned chain_length = s->max_chain_length;    /* max hash chain length */
-  register Bytef *scan = s->window + s->strstart; /* current string */
-  register Bytef *match;                          /* matched string */
-  register int len;                               /* length of current match */
-  int best_len = (int)s->prev_length;             /* best match length so far */
-  int nice_match = s->nice_match; /* stop if match long enough */
-  IPos limit =
-      s->strstart > (IPos)MAX_DIST(s) ? s->strstart - (IPos)MAX_DIST(s) : NIL;
-  /* Stop when cur_match becomes <= limit. To simplify the code,
-   * we prevent matches with the string of window index 0.
-   */
-  Posf *prev = s->prev;
-  uInt wmask = s->w_mask;
+local uInt longest_match(s, cur_match)
+    deflate_state *s;
+    IPos cur_match;                             /* current match */
+{
+    unsigned chain_length = s->max_chain_length;/* max hash chain length */
+    register Bytef *scan = s->window + s->strstart; /* current string */
+    register Bytef *match;                      /* matched string */
+    register int len;                           /* length of current match */
+    int best_len = (int)s->prev_length;         /* best match length so far */
+    int nice_match = s->nice_match;             /* stop if match long enough */
+    IPos limit = s->strstart > (IPos)MAX_DIST(s) ?
+        s->strstart - (IPos)MAX_DIST(s) : NIL;
+    /* Stop when cur_match becomes <= limit. To simplify the code,
+     * we prevent matches with the string of window index 0.
+     */
+    Posf *prev = s->prev;
+    uInt wmask = s->w_mask;
+
 #ifdef UNALIGNED_OK
-  /* Compare two bytes at a time. Note: this is not always beneficial.
-   * Try with and without -DUNALIGNED_OK to check.
-   */
-  register Bytef *strend = s->window + s->strstart + MAX_MATCH - 1;
-  register uint16_t scan_start = *(uint16_t *)scan;
-  register uint16_t scan_end = *(uint16_t *)(scan + best_len - 1);
+    /* Compare two bytes at a time. Note: this is not always beneficial.
+     * Try with and without -DUNALIGNED_OK to check.
+     */
+    register Bytef *strend = s->window + s->strstart + MAX_MATCH - 1;
+    register ush scan_start = *(ushf*)scan;
+    register ush scan_end   = *(ushf*)(scan+best_len-1);
 #else
-  register Bytef *strend = s->window + s->strstart + MAX_MATCH;
-  register Byte scan_end1 = scan[best_len - 1];
-  register Byte scan_end = scan[best_len];
+    register Bytef *strend = s->window + s->strstart + MAX_MATCH;
+    register Byte scan_end1  = scan[best_len-1];
+    register Byte scan_end   = scan[best_len];
 #endif
-  /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16.
-   * It is easy to get rid of this optimization if necessary.
-   */
-  Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever");
-  /* Do not waste too much time if we already have a good match: */
-  if (s->prev_length >= s->good_match) {
-    chain_length >>= 2;
-  }
-  /* Do not look for matches beyond the end of the input. This is necessary
-   * to make deflate deterministic.
-   */
-  if ((uInt)nice_match > s->lookahead) nice_match = (int)s->lookahead;
-  Assert((uint64_t)s->strstart <= s->window_size - MIN_LOOKAHEAD,
-         "need lookahead");
-  do {
-    Assert(cur_match < s->strstart, "no future");
-    match = s->window + cur_match;
-    /* Skip to next match if the match length cannot increase
-     * or if the match length is less than 2.  Note that the checks below
-     * for insufficient lookahead only occur occasionally for performance
-     * reasons.  Therefore uninitialized memory will be accessed, and
-     * conditional jumps will be made that depend on those values.
-     * However the length of the match is limited to the lookahead, so
-     * the output of deflate is not affected by the uninitialized values.
+
+    /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16.
+     * It is easy to get rid of this optimization if necessary.
      */
-#if (defined(UNALIGNED_OK) && MAX_MATCH == 258)
-    /* This code assumes sizeof(unsigned short) == 2. Do not use
-     * UNALIGNED_OK if your compiler uses a different size.
+    Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever");
+
+    /* Do not waste too much time if we already have a good match: */
+    if (s->prev_length >= s->good_match) {
+        chain_length >>= 2;
+    }
+    /* Do not look for matches beyond the end of the input. This is necessary
+     * to make deflate deterministic.
      */
-    if (*(uint16_t *)(match + best_len - 1) != scan_end ||
-        *(uint16_t *)match != scan_start)
-      continue;
-    /* It is not necessary to compare scan[2] and match[2] since they are
-     * always equal when the other bytes match, given that the hash keys
-     * are equal and that HASH_BITS >= 8. Compare 2 bytes at a time at
-     * strstart+3, +5, ... up to strstart+257. We check for insufficient
-     * lookahead only every 4th comparison; the 128th check will be made
-     * at strstart+257. If MAX_MATCH-2 is not a multiple of 8, it is
-     * necessary to put more guard bytes at the end of the window, or
-     * to check more often for insufficient lookahead.
-     */
-    /* When using CRC hashing, scan[2] and match[2] may mismatch, but in
-     * that case at least one of the other hashed bytes will mismatch
-     * also. Bytes 0 and 1 were already checked above, and we know there
-     * are at least four bytes to check otherwise the mismatch would have
-     * been found by the scan_end comparison above, so: */
-    Assert(scan[2] == match[2] || scan[3] != match[3], "scan[2]??");
-    scan++, match++;
+    if ((uInt)nice_match > s->lookahead) nice_match = (int)s->lookahead;
+
+    Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead");
+
     do {
-    } while (*(uint16_t *)(scan += 2) == *(ushf *)(match += 2) &&
-             *(uint16_t *)(scan += 2) == *(ushf *)(match += 2) &&
-             *(uint16_t *)(scan += 2) == *(ushf *)(match += 2) &&
-             *(uint16_t *)(scan += 2) == *(ushf *)(match += 2) &&
-             scan < strend);
-    /* The funny "do {}" generates better code on most compilers */
-    /* Here, scan <= window+strstart+257 */
-    Assert(scan <= s->window + (unsigned)(s->window_size - 1), "wild scan");
-    if (*scan == *match) scan++;
-    len = (MAX_MATCH - 1) - (int)(strend - scan);
-    scan = strend - (MAX_MATCH - 1);
+        Assert(cur_match < s->strstart, "no future");
+        match = s->window + cur_match;
+
+        /* Skip to next match if the match length cannot increase
+         * or if the match length is less than 2.  Note that the checks below
+         * for insufficient lookahead only occur occasionally for performance
+         * reasons.  Therefore uninitialized memory will be accessed, and
+         * conditional jumps will be made that depend on those values.
+         * However the length of the match is limited to the lookahead, so
+         * the output of deflate is not affected by the uninitialized values.
+         */
+#if (defined(UNALIGNED_OK) && MAX_MATCH == 258)
+        /* This code assumes sizeof(unsigned short) == 2. Do not use
+         * UNALIGNED_OK if your compiler uses a different size.
+         */
+        if (*(ushf*)(match+best_len-1) != scan_end ||
+            *(ushf*)match != scan_start) continue;
+
+        /* It is not necessary to compare scan[2] and match[2] since they are
+         * always equal when the other bytes match, given that the hash keys
+         * are equal and that HASH_BITS >= 8. Compare 2 bytes at a time at
+         * strstart+3, +5, ... up to strstart+257. We check for insufficient
+         * lookahead only every 4th comparison; the 128th check will be made
+         * at strstart+257. If MAX_MATCH-2 is not a multiple of 8, it is
+         * necessary to put more guard bytes at the end of the window, or
+         * to check more often for insufficient lookahead.
+         */
+        if (!s->chromium_zlib_hash) {
+          Assert(scan[2] == match[2], "scan[2]?");
+        } else {
+          /* When using CRC hashing, scan[2] and match[2] may mismatch, but in
+           * that case at least one of the other hashed bytes will mismatch
+           * also. Bytes 0 and 1 were already checked above, and we know there
+           * are at least four bytes to check otherwise the mismatch would have
+           * been found by the scan_end comparison above, so: */
+          Assert(scan[2] == match[2] || scan[3] != match[3], "scan[2]??");
+        }
+        scan++, match++;
+        do {
+        } while (*(ushf*)(scan+=2) == *(ushf*)(match+=2) &&
+                 *(ushf*)(scan+=2) == *(ushf*)(match+=2) &&
+                 *(ushf*)(scan+=2) == *(ushf*)(match+=2) &&
+                 *(ushf*)(scan+=2) == *(ushf*)(match+=2) &&
+                 scan < strend);
+        /* The funny "do {}" generates better code on most compilers */
+
+        /* Here, scan <= window+strstart+257 */
+        Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan");
+        if (*scan == *match) scan++;
+
+        len = (MAX_MATCH - 1) - (int)(strend-scan);
+        scan = strend - (MAX_MATCH-1);
+
 #else /* UNALIGNED_OK */
-    if (match[best_len] != scan_end || match[best_len - 1] != scan_end1 ||
-        *match != *scan || *++match != scan[1])
-      continue;
+
+        if (match[best_len]   != scan_end  ||
+            match[best_len-1] != scan_end1 ||
+            *match            != *scan     ||
+            *++match          != scan[1])      continue;
+
+        /* The check at best_len-1 can be removed because it will be made
+         * again later. (This heuristic is not always a win.)
+         * It is not necessary to compare scan[2] and match[2] since they
+         * are always equal when the other bytes match, given that
+         * the hash keys are equal and that HASH_BITS >= 8.
+         */
+        scan += 2, match++;
+        if (!s->chromium_zlib_hash) {
+          Assert(*scan == *match, "match[2]?");
+        } else {
+          /* When using CRC hashing, scan[2] and match[2] may mismatch, but in
+           * that case at least one of the other hashed bytes will mismatch
+           * also. Bytes 0 and 1 were already checked above, and we know there
+           * are at least four bytes to check otherwise the mismatch would have
+           * been found by the scan_end comparison above, so: */
+          Assert(*scan == *match || scan[1] != match[1], "match[2]??");
+        }
+
+        /* We check for insufficient lookahead only every 8th comparison;
+         * the 256th check will be made at strstart+258.
+         */
+        do {
+        } while (*++scan == *++match && *++scan == *++match &&
+                 *++scan == *++match && *++scan == *++match &&
+                 *++scan == *++match && *++scan == *++match &&
+                 *++scan == *++match && *++scan == *++match &&
+                 scan < strend);
+
+        Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan");
+
+        len = MAX_MATCH - (int)(strend - scan);
+        scan = strend - MAX_MATCH;
+
+#endif /* UNALIGNED_OK */
+
+        if (len > best_len) {
+            s->match_start = cur_match;
+            best_len = len;
+            if (len >= nice_match) break;
+#ifdef UNALIGNED_OK
+            scan_end = *(ushf*)(scan+best_len-1);
+#else
+            scan_end1  = scan[best_len-1];
+            scan_end   = scan[best_len];
+#endif
+        }
+    } while ((cur_match = prev[cur_match & wmask]) > limit
+             && --chain_length != 0);
+
+    if ((uInt)best_len <= s->lookahead) return (uInt)best_len;
+    return s->lookahead;
+}
+#endif /* ASMV */
+
+#else /* FASTEST */
+
+/* ---------------------------------------------------------------------------
+ * Optimized version for FASTEST only
+ */
+local uInt longest_match(s, cur_match)
+    deflate_state *s;
+    IPos cur_match;                             /* current match */
+{
+    register Bytef *scan = s->window + s->strstart; /* current string */
+    register Bytef *match;                       /* matched string */
+    register int len;                           /* length of current match */
+    register Bytef *strend = s->window + s->strstart + MAX_MATCH;
+
+    /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16.
+     * It is easy to get rid of this optimization if necessary.
+     */
+    Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever");
+
+    Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead");
+
+    Assert(cur_match < s->strstart, "no future");
+
+    match = s->window + cur_match;
+
+    /* Return failure if the match length is less than 2:
+     */
+    if (match[0] != scan[0] || match[1] != scan[1]) return MIN_MATCH-1;
+
     /* The check at best_len-1 can be removed because it will be made
      * again later. (This heuristic is not always a win.)
      * It is not necessary to compare scan[2] and match[2] since they
      * are always equal when the other bytes match, given that
      * the hash keys are equal and that HASH_BITS >= 8.
      */
-    scan += 2, match++;
-    if (1 /* !s->chromium_zlib_hash */) {
-      Assert(*scan == *match, "match[2]?");
-    } else {
-      /* When using CRC hashing, scan[2] and match[2] may mismatch, but in
-       * that case at least one of the other hashed bytes will mismatch
-       * also. Bytes 0 and 1 were already checked above, and we know there
-       * are at least four bytes to check otherwise the mismatch would have
-       * been found by the scan_end comparison above, so: */
-      Assert(*scan == *match || scan[1] != match[1], "match[2]??");
-    }
+    scan += 2, match += 2;
+    Assert(*scan == *match, "match[2]?");
+
     /* We check for insufficient lookahead only every 8th comparison;
      * the 256th check will be made at strstart+258.
      */
@@ -1294,605 +1492,773 @@ static uInt longest_match(struct DeflateState *s, IPos cur_match) {
     } while (*++scan == *++match && *++scan == *++match &&
              *++scan == *++match && *++scan == *++match &&
              *++scan == *++match && *++scan == *++match &&
-             *++scan == *++match && *++scan == *++match && scan < strend);
-    Assert(scan <= s->window + (unsigned)(s->window_size - 1), "wild scan");
+             *++scan == *++match && *++scan == *++match &&
+             scan < strend);
+
+    Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan");
+
     len = MAX_MATCH - (int)(strend - scan);
-    scan = strend - MAX_MATCH;
-#endif /* UNALIGNED_OK */
-    if (len > best_len) {
-      s->match_start = cur_match;
-      best_len = len;
-      if (len >= nice_match) break;
-#ifdef UNALIGNED_OK
-      scan_end = *(uint16_t *)(scan + best_len - 1);
-#else
-      scan_end1 = scan[best_len - 1];
-      scan_end = scan[best_len];
-#endif
-    }
-  } while ((cur_match = prev[cur_match & wmask]) > limit &&
-           --chain_length != 0);
-  if ((uInt)best_len <= s->lookahead) return (uInt)best_len;
-  return s->lookahead;
-}
-#endif /* ASMV */
 
-#else /* FASTEST */
+    if (len < MIN_MATCH) return MIN_MATCH - 1;
 
-/**
- * Optimized version for FASTEST only
- */
-static uInt longest_match(struct DeflateState *s, IPos cur_match) {
-  register Bytef *scan = s->window + s->strstart; /* current string */
-  register Bytef *match;                          /* matched string */
-  register int len;                               /* length of current match */
-  register Bytef *strend = s->window + s->strstart + MAX_MATCH;
-  /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16.
-   * It is easy to get rid of this optimization if necessary.
-   */
-  Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever");
-  Assert((uint64_t)s->strstart <= s->window_size - MIN_LOOKAHEAD,
-         "need lookahead");
-  Assert(cur_match < s->strstart, "no future");
-  match = s->window + cur_match;
-  /* Return failure if the match length is less than 2:
-   */
-  if (match[0] != scan[0] || match[1] != scan[1]) return MIN_MATCH - 1;
-  /* The check at best_len-1 can be removed because it will be made
-   * again later. (This heuristic is not always a win.)
-   * It is not necessary to compare scan[2] and match[2] since they
-   * are always equal when the other bytes match, given that
-   * the hash keys are equal and that HASH_BITS >= 8.
-   */
-  scan += 2, match += 2;
-  Assert(*scan == *match, "match[2]?");
-  /* We check for insufficient lookahead only every 8th comparison;
-   * the 256th check will be made at strstart+258.
-   */
-  do {
-  } while (*++scan == *++match && *++scan == *++match && *++scan == *++match &&
-           *++scan == *++match && *++scan == *++match && *++scan == *++match &&
-           *++scan == *++match && *++scan == *++match && scan < strend);
-  Assert(scan <= s->window + (unsigned)(s->window_size - 1), "wild scan");
-  len = MAX_MATCH - (int)(strend - scan);
-  if (len < MIN_MATCH) return MIN_MATCH - 1;
-  s->match_start = cur_match;
-  return (uInt)len <= s->lookahead ? (uInt)len : s->lookahead;
+    s->match_start = cur_match;
+    return (uInt)len <= s->lookahead ? (uInt)len : s->lookahead;
 }
 
 #endif /* FASTEST */
 
 #ifdef ZLIB_DEBUG
+
 #define EQUAL 0
 /* result of memcmp for equal strings */
-/**
- * Checks that the match at match_start is indeed a match.
+
+/* ===========================================================================
+ * Check that the match at match_start is indeed a match.
  */
-static void check_match(struct DeflateState *s, IPos start, IPos match,
-                        int length) {
-  /* check that the match is indeed a match */
-  if (memcmp(s->window + match, s->window + start, length) != EQUAL) {
-    kprintf(" start %u, match %u, length %d\n", start, match, length);
-    do {
-      kprintf("%c%c", s->window[match++], s->window[start++]);
-    } while (--length != 0);
-    z_error(__FILE__, __LINE__, "invalid match");
-  }
-  if (z_verbose > 1) {
-    kprintf("\\[%d,%d]", start - match, length);
-    do {
-      kprintf("%c", s->window[start++]);
-    } while (--length != 0);
-  }
+local void check_match(s, start, match, length)
+    deflate_state *s;
+    IPos start, match;
+    int length;
+{
+    /* check that the match is indeed a match */
+    if (zmemcmp(s->window + match,
+                s->window + start, length) != EQUAL) {
+        fprintf(stderr, " start %u, match %u, length %d\n",
+                start, match, length);
+        do {
+            fprintf(stderr, "%c%c", s->window[match++], s->window[start++]);
+        } while (--length != 0);
+        z_error("invalid match");
+    }
+    if (z_verbose > 1) {
+        fprintf(stderr,"\\[%d,%d]", start-match, length);
+        do { putc(s->window[start++], stderr); } while (--length != 0);
+    }
 }
 #else
-#define check_match(s, start, match, length)
+#  define check_match(s, start, match, length)
 #endif /* ZLIB_DEBUG */
 
-/**
- * Copy without compression as much as possible from the input stream,
- * return the current block state.
+/* ===========================================================================
+ * Fill the window when the lookahead becomes insufficient.
+ * Updates strstart and lookahead.
  *
- * In case deflateParams() is used to later switch to a non-zero
- * compression level, s->matches (otherwise unused when storing) keeps
- * track of the number of hash table slides to perform. If s->matches is
- * 1, then one hash table slide will be done when switching. If
- * s->matches is 2, the maximum value allowed here, then the hash table
- * will be cleared, since two or more slides is the same as a clear.
- *
- * deflate_stored() is written to minimize the number of times an input
- * byte is copied. It is most efficient with large input and output
- * buffers, which maximizes the opportunites to have a single copy from
- * next_in to next_out.
+ * IN assertion: lookahead < MIN_LOOKAHEAD
+ * OUT assertions: strstart <= window_size-MIN_LOOKAHEAD
+ *    At least one byte has been read, or avail_in == 0; reads are
+ *    performed for at least two bytes (required for the zip translate_eol
+ *    option -- not supported here).
  */
-static block_state deflate_stored(struct DeflateState *s, int flush) {
-  /* Smallest worthy block size when not flushing or finishing. By default
-   * this is 32K. This can be as small as 507 bytes for memLevel == 1. For
-   * large input and output buffers, the stored block size will be larger.
-   */
-  unsigned min_block = MIN(s->pending_buf_size - 5, s->w_size);
+local void fill_window(s)
+    deflate_state *s;
+{
+    unsigned n;
+    unsigned more;    /* Amount of free space at the end of the window. */
+    uInt wsize = s->w_size;
 
-  /* Copy as many min_block or larger stored blocks directly to next_out as
-   * possible. If flushing, copy the remaining available input to next_out as
-   * stored blocks, if there is enough space.
-   */
-  unsigned len, left, have, last = 0;
-  unsigned used = s->strm->avail_in;
-  do {
-    /* Set len to the maximum size block that we can copy directly with the
-     * available input data and output space. Set left to how much of that
-     * would be copied from what's left in the window.
+    Assert(s->lookahead < MIN_LOOKAHEAD, "already enough lookahead");
+
+    do {
+        more = (unsigned)(s->window_size -(ulg)s->lookahead -(ulg)s->strstart);
+
+        /* Deal with !@#$% 64K limit: */
+        if (sizeof(int) <= 2) {
+            if (more == 0 && s->strstart == 0 && s->lookahead == 0) {
+                more = wsize;
+
+            } else if (more == (unsigned)(-1)) {
+                /* Very unlikely, but possible on 16 bit machine if
+                 * strstart == 0 && lookahead == 1 (input done a byte at time)
+                 */
+                more--;
+            }
+        }
+
+        /* If the window is almost full and there is insufficient lookahead,
+         * move the upper half to the lower one to make room in the upper half.
+         */
+        if (s->strstart >= wsize+MAX_DIST(s)) {
+
+            zmemcpy(s->window, s->window+wsize, (unsigned)wsize - more);
+            s->match_start -= wsize;
+            s->strstart    -= wsize; /* we now have strstart >= MAX_DIST */
+            s->block_start -= (long) wsize;
+            if (s->insert > s->strstart)
+                s->insert = s->strstart;
+            slide_hash(s);
+            more += wsize;
+        }
+        if (s->strm->avail_in == 0) break;
+
+        /* If there was no sliding:
+         *    strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 &&
+         *    more == window_size - lookahead - strstart
+         * => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1)
+         * => more >= window_size - 2*WSIZE + 2
+         * In the BIG_MEM or MMAP case (not yet supported),
+         *   window_size == input_size + MIN_LOOKAHEAD  &&
+         *   strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD.
+         * Otherwise, window_size == 2*WSIZE so more >= 2.
+         * If there was sliding, more >= WSIZE. So in all cases, more >= 2.
+         */
+        Assert(more >= 2, "more < 2");
+
+        n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more);
+        s->lookahead += n;
+
+        /* Initialize the hash value now that we have some input: */
+        if (s->chromium_zlib_hash) {
+            /* chromium hash reads 4 bytes */
+            if (s->lookahead + s->insert > MIN_MATCH) {
+                uInt str = s->strstart - s->insert;
+                while (s->insert) {
+                    insert_string(s, str);
+                    str++;
+                    s->insert--;
+                    if (s->lookahead + s->insert <= MIN_MATCH)
+                        break;
+                }
+            }
+        } else
+        /* Initialize the hash value now that we have some input: */
+        if (s->lookahead + s->insert >= MIN_MATCH) {
+            uInt str = s->strstart - s->insert;
+            s->ins_h = s->window[str];
+            UPDATE_HASH(s, s->ins_h, s->window[str + 1]);
+#if MIN_MATCH != 3
+            Call UPDATE_HASH() MIN_MATCH-3 more times
+#endif
+            while (s->insert) {
+                UPDATE_HASH(s, s->ins_h, s->window[str + MIN_MATCH-1]);
+#ifndef FASTEST
+                s->prev[str & s->w_mask] = s->head[s->ins_h];
+#endif
+                s->head[s->ins_h] = (Pos)str;
+                str++;
+                s->insert--;
+                if (s->lookahead + s->insert < MIN_MATCH)
+                    break;
+            }
+        }
+        /* If the whole input has less than MIN_MATCH bytes, ins_h is garbage,
+         * but this is not important since only literal bytes will be emitted.
+         */
+
+    } while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0);
+
+    /* If the WIN_INIT bytes after the end of the current data have never been
+     * written, then zero those bytes in order to avoid memory check reports of
+     * the use of uninitialized (or uninitialised as Julian writes) bytes by
+     * the longest match routines.  Update the high water mark for the next
+     * time through here.  WIN_INIT is set to MAX_MATCH since the longest match
+     * routines allow scanning to strstart + MAX_MATCH, ignoring lookahead.
      */
-    len = MAX_STORED;               /* maximum deflate stored block length */
-    have = (s->bi_valid + 42) >> 3; /* number of header bytes */
-    if (s->strm->avail_out < have)  /* need room for header */
-      break;
-    /* maximum stored block length that will fit in avail_out: */
-    have = s->strm->avail_out - have;
-    left = s->strstart - s->block_start; /* bytes left in window */
-    if (len > (uint64_t)left + s->strm->avail_in)
-      len = left + s->strm->avail_in; /* limit len to the input */
-    if (len > have) len = have;       /* limit len to the output */
+    if (s->high_water < s->window_size) {
+        ulg curr = s->strstart + (ulg)(s->lookahead);
+        ulg init;
 
-    /* If the stored block would be less than min_block in length, or if
-     * unable to copy all of the available input when flushing, then try
-     * copying to the window and the pending buffer instead. Also don't
-     * write an empty block when flushing -- deflate() does that.
+        if (s->high_water < curr) {
+            /* Previous high water mark below current data -- zero WIN_INIT
+             * bytes or up to end of window, whichever is less.
+             */
+            init = s->window_size - curr;
+            if (init > WIN_INIT)
+                init = WIN_INIT;
+            zmemzero(s->window + curr, (unsigned)init);
+            s->high_water = curr + init;
+        }
+        else if (s->high_water < (ulg)curr + WIN_INIT) {
+            /* High water mark at or above current data, but below current data
+             * plus WIN_INIT -- zero out to current data plus WIN_INIT, or up
+             * to end of window, whichever is less.
+             */
+            init = (ulg)curr + WIN_INIT - s->high_water;
+            if (init > s->window_size - s->high_water)
+                init = s->window_size - s->high_water;
+            zmemzero(s->window + s->high_water, (unsigned)init);
+            s->high_water += init;
+        }
+    }
+
+    Assert((ulg)s->strstart <= s->window_size - MIN_LOOKAHEAD,
+           "not enough room for search");
+}
+
+/* ===========================================================================
+ * Flush the current block, with given end-of-file flag.
+ * IN assertion: strstart is set to the end of the current match.
+ */
+#define FLUSH_BLOCK_ONLY(s, last) { \
+   _tr_flush_block(s, (s->block_start >= 0L ? \
+                   (charf *)&s->window[(unsigned)s->block_start] : \
+                   (charf *)Z_NULL), \
+                (ulg)((long)s->strstart - s->block_start), \
+                (last)); \
+   s->block_start = s->strstart; \
+   flush_pending(s->strm); \
+   Tracev((stderr,"[FLUSH]")); \
+}
+
+/* Same but force premature exit if necessary. */
+#define FLUSH_BLOCK(s, last) { \
+   FLUSH_BLOCK_ONLY(s, last); \
+   if (s->strm->avail_out == 0) return (last) ? finish_started : need_more; \
+}
+
+/* Maximum stored block length in deflate format (not including header). */
+#define MAX_STORED 65535
+
+/* Minimum of a and b. */
+#define MIN(a, b) ((a) > (b) ? (b) : (a))
+
+/* ===========================================================================
+ * Copy without compression as much as possible from the input stream, return
+ * the current block state.
+ *
+ * In case deflateParams() is used to later switch to a non-zero compression
+ * level, s->matches (otherwise unused when storing) keeps track of the number
+ * of hash table slides to perform. If s->matches is 1, then one hash table
+ * slide will be done when switching. If s->matches is 2, the maximum value
+ * allowed here, then the hash table will be cleared, since two or more slides
+ * is the same as a clear.
+ *
+ * deflate_stored() is written to minimize the number of times an input byte is
+ * copied. It is most efficient with large input and output buffers, which
+ * maximizes the opportunities to have a single copy from next_in to next_out.
+ */
+local block_state deflate_stored(s, flush)
+    deflate_state *s;
+    int flush;
+{
+    /* Smallest worthy block size when not flushing or finishing. By default
+     * this is 32K. This can be as small as 507 bytes for memLevel == 1. For
+     * large input and output buffers, the stored block size will be larger.
      */
-    if (len < min_block &&
-        ((len == 0 && flush != Z_FINISH) || flush == Z_NO_FLUSH ||
-         len != left + s->strm->avail_in))
-      break;
+    unsigned min_block = MIN(s->pending_buf_size - 5, s->w_size);
 
-    /* Make a dummy stored block in pending to get the header bytes,
-     * including any pending bits. This also updates the debugging counts.
+    /* Copy as many min_block or larger stored blocks directly to next_out as
+     * possible. If flushing, copy the remaining available input to next_out as
+     * stored blocks, if there is enough space.
      */
-    last = flush == Z_FINISH && len == left + s->strm->avail_in ? 1 : 0;
-    _tr_stored_block(s, (char *)0, 0L, last);
+    unsigned len, left, have, last = 0;
+    unsigned used = s->strm->avail_in;
+    do {
+        /* Set len to the maximum size block that we can copy directly with the
+         * available input data and output space. Set left to how much of that
+         * would be copied from what's left in the window.
+         */
+        len = MAX_STORED;       /* maximum deflate stored block length */
+        have = (s->bi_valid + 42) >> 3;         /* number of header bytes */
+        if (s->strm->avail_out < have)          /* need room for header */
+            break;
+            /* maximum stored block length that will fit in avail_out: */
+        have = s->strm->avail_out - have;
+        left = s->strstart - s->block_start;    /* bytes left in window */
+        if (len > (ulg)left + s->strm->avail_in)
+            len = left + s->strm->avail_in;     /* limit len to the input */
+        if (len > have)
+            len = have;                         /* limit len to the output */
 
-    /* Replace the lengths in the dummy stored block with len. */
-    s->pending_buf[s->pending - 4] = len;
-    s->pending_buf[s->pending - 3] = len >> 8;
-    s->pending_buf[s->pending - 2] = ~len;
-    s->pending_buf[s->pending - 1] = ~len >> 8;
+        /* If the stored block would be less than min_block in length, or if
+         * unable to copy all of the available input when flushing, then try
+         * copying to the window and the pending buffer instead. Also don't
+         * write an empty block when flushing -- deflate() does that.
+         */
+        if (len < min_block && ((len == 0 && flush != Z_FINISH) ||
+                                flush == Z_NO_FLUSH ||
+                                len != left + s->strm->avail_in))
+            break;
 
-    /* Write the stored block header bytes. */
-    flush_pending(s->strm);
+        /* Make a dummy stored block in pending to get the header bytes,
+         * including any pending bits. This also updates the debugging counts.
+         */
+        last = flush == Z_FINISH && len == left + s->strm->avail_in ? 1 : 0;
+        _tr_stored_block(s, (char *)0, 0L, last);
+
+        /* Replace the lengths in the dummy stored block with len. */
+        s->pending_buf[s->pending - 4] = len;
+        s->pending_buf[s->pending - 3] = len >> 8;
+        s->pending_buf[s->pending - 2] = ~len;
+        s->pending_buf[s->pending - 1] = ~len >> 8;
+
+        /* Write the stored block header bytes. */
+        flush_pending(s->strm);
 
 #ifdef ZLIB_DEBUG
-    /* Update debugging counts for the data about to be copied. */
-    s->compressed_len += len << 3;
-    s->bits_sent += len << 3;
+        /* Update debugging counts for the data about to be copied. */
+        s->compressed_len += len << 3;
+        s->bits_sent += len << 3;
 #endif
 
-    /* Copy uncompressed bytes from the window to next_out. */
-    if (left) {
-      if (left > len) left = len;
-      memcpy(s->strm->next_out, s->window + s->block_start, left);
-      s->strm->next_out += left;
-      s->strm->avail_out -= left;
-      s->strm->total_out += left;
-      s->block_start += left;
-      len -= left;
-    }
+        /* Copy uncompressed bytes from the window to next_out. */
+        if (left) {
+            if (left > len)
+                left = len;
+            zmemcpy(s->strm->next_out, s->window + s->block_start, left);
+            s->strm->next_out += left;
+            s->strm->avail_out -= left;
+            s->strm->total_out += left;
+            s->block_start += left;
+            len -= left;
+        }
 
-    /* Copy uncompressed bytes directly from next_in to next_out, updating
-     * the check value.
-     */
-    if (len) {
-      deflate_read_buf(s->strm, s->strm->next_out, len);
-      s->strm->next_out += len;
-      s->strm->avail_out -= len;
-      s->strm->total_out += len;
-    }
-  } while (last == 0);
-
-  /* Update the sliding window with the last s->w_size bytes of the copied
-   * data, or append all of the copied data to the existing window if less
-   * than s->w_size bytes were copied. Also update the number of bytes to
-   * insert in the hash tables, in the event that deflateParams() switches to
-   * a non-zero compression level.
-   */
-  used -= s->strm->avail_in; /* number of input bytes directly copied */
-  if (used) {
-    /* If any input was used, then no unused input remains in the window,
-     * therefore s->block_start == s->strstart.
-     */
-    if (used >= s->w_size) { /* supplant the previous history */
-      s->matches = 2;        /* clear hash */
-      memcpy(s->window, s->strm->next_in - s->w_size, s->w_size);
-      s->strstart = s->w_size;
-    } else {
-      if (s->window_size - s->strstart <= used) {
-        /* Slide the window down. */
-        s->strstart -= s->w_size;
-        memcpy(s->window, s->window + s->w_size, s->strstart);
-        if (s->matches < 2) s->matches++; /* add a pending slide_hash() */
-      }
-      memcpy(s->window + s->strstart, s->strm->next_in - used, used);
-      s->strstart += used;
-    }
-    s->block_start = s->strstart;
-    s->insert += MIN(used, s->w_size - s->insert);
-  }
-  if (s->high_water < s->strstart) s->high_water = s->strstart;
-
-  /* If the last block was written to next_out, then done. */
-  if (last) return finish_done;
-
-  /* If flushing and all input has been consumed, then done. */
-  if (flush != Z_NO_FLUSH && flush != Z_FINISH && s->strm->avail_in == 0 &&
-      (long)s->strstart == s->block_start)
-    return block_done;
-
-  /* Fill the window with any remaining input. */
-  have = s->window_size - s->strstart - 1;
-  if (s->strm->avail_in > have && s->block_start >= (long)s->w_size) {
-    /* Slide the window down. */
-    s->block_start -= s->w_size;
-    s->strstart -= s->w_size;
-    memcpy(s->window, s->window + s->w_size, s->strstart);
-    if (s->matches < 2) s->matches++; /* add a pending slide_hash() */
-    have += s->w_size;                /* more space now */
-  }
-  if (have > s->strm->avail_in) have = s->strm->avail_in;
-  if (have) {
-    deflate_read_buf(s->strm, s->window + s->strstart, have);
-    s->strstart += have;
-  }
-  if (s->high_water < s->strstart) s->high_water = s->strstart;
-
-  /* There was not enough avail_out to write a complete worthy or flushed
-   * stored block to next_out. Write a stored block to pending instead, if we
-   * have enough input for a worthy block, or if flushing and there is enough
-   * room for the remaining input as a stored block in the pending buffer.
-   */
-  have = (s->bi_valid + 42) >> 3; /* number of header bytes */
-  /* maximum stored block length that will fit in pending: */
-  have = MIN(s->pending_buf_size - have, MAX_STORED);
-  min_block = MIN(have, s->w_size);
-  left = s->strstart - s->block_start;
-  if (left >= min_block ||
-      ((left || flush == Z_FINISH) && flush != Z_NO_FLUSH &&
-       s->strm->avail_in == 0 && left <= have)) {
-    len = MIN(left, have);
-    last = flush == Z_FINISH && s->strm->avail_in == 0 && len == left ? 1 : 0;
-    _tr_stored_block(s, (charf *)s->window + s->block_start, len, last);
-    s->block_start += len;
-    flush_pending(s->strm);
-  }
-
-  /* We've done all we can with the available input and output. */
-  return last ? finish_started : need_more;
-}
-
-/**
- * Compress as much as possible from the input stream, return the
- * current block state. This function does not perform lazy evaluation
- * of matches and inserts new strings in the dictionary only for
- * unmatched strings or for short matches. It is used only for the fast
- * compression options.
- */
-static block_state deflate_fast(struct DeflateState *s, int flush) {
-  IPos hash_head; /* head of the hash chain */
-  int bflush;     /* set if current block must be flushed */
-
-  for (;;) {
-    /* Make sure that we always have enough lookahead, except
-     * at the end of the input file. We need MAX_MATCH bytes
-     * for the next match, plus MIN_MATCH bytes to insert the
-     * string following the next match.
-     */
-    if (s->lookahead < MIN_LOOKAHEAD) {
-      fill_window_sse(s);
-      if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) {
-        return need_more;
-      }
-      if (s->lookahead == 0) break; /* flush the current block */
-    }
-
-    /* Insert the string window[strstart .. strstart+2] in the
-     * dictionary, and set hash_head to the head of the hash chain:
-     */
-    hash_head = NIL;
-    if (s->lookahead >= MIN_MATCH) {
-      hash_head = insert_string(s, s->strstart);
-    }
-
-    /* Find the longest match, discarding those <= prev_length.
-     * At this point we have always match_length < MIN_MATCH
-     */
-    if (hash_head != NIL && s->strstart - hash_head <= MAX_DIST(s)) {
-      /* To simplify the code, we prevent matches with the string
-       * of window index 0 (in particular we have to avoid a match
-       * of the string with itself at the start of the input file).
-       */
-      s->match_length = longest_match(s, hash_head);
-      /* longest_match() sets match_start */
-    }
-    if (s->match_length >= MIN_MATCH) {
-      check_match(s, s->strstart, s->match_start, s->match_length);
-
-      _tr_tally_dist(s, s->strstart - s->match_start,
-                     s->match_length - MIN_MATCH, bflush);
-
-      s->lookahead -= s->match_length;
-
-      /* Insert new strings in the hash table only if the match length
-       * is not too large. This saves time but degrades compression.
-       */
-#ifndef FASTEST
-      if (s->match_length <= s->max_insert_length &&
-          s->lookahead >= MIN_MATCH) {
-        s->match_length--; /* string at strstart already in table */
-        do {
-          s->strstart++;
-          hash_head = insert_string(s, s->strstart);
-          /* strstart never exceeds WSIZE-MAX_MATCH, so there are
-           * always MIN_MATCH bytes ahead.
-           */
-        } while (--s->match_length != 0);
-        s->strstart++;
-      } else
-#endif
-      {
-        s->strstart += s->match_length;
-        s->match_length = 0;
-        s->ins_h = s->window[s->strstart];
-        UPDATE_HASH(s, s->ins_h, s->window[s->strstart + 1]);
-#if MIN_MATCH != 3
-        Call UPDATE_HASH() MIN_MATCH - 3 more times
-#endif
-        /* If lookahead < MIN_MATCH, ins_h is garbage, but it does not
-         * matter since it will be recomputed at next deflate call.
+        /* Copy uncompressed bytes directly from next_in to next_out, updating
+         * the check value.
          */
-      }
-    } else {
-      /* No match, output a literal byte */
-      Tracevv(("%c", s->window[s->strstart]));
-      _tr_tally_lit(s, s->window[s->strstart], bflush);
-      s->lookahead--;
-      s->strstart++;
+        if (len) {
+            read_buf(s->strm, s->strm->next_out, len);
+            s->strm->next_out += len;
+            s->strm->avail_out -= len;
+            s->strm->total_out += len;
+        }
+    } while (last == 0);
+
+    /* Update the sliding window with the last s->w_size bytes of the copied
+     * data, or append all of the copied data to the existing window if less
+     * than s->w_size bytes were copied. Also update the number of bytes to
+     * insert in the hash tables, in the event that deflateParams() switches to
+     * a non-zero compression level.
+     */
+    used -= s->strm->avail_in;      /* number of input bytes directly copied */
+    if (used) {
+        /* If any input was used, then no unused input remains in the window,
+         * therefore s->block_start == s->strstart.
+         */
+        if (used >= s->w_size) {    /* supplant the previous history */
+            s->matches = 2;         /* clear hash */
+            zmemcpy(s->window, s->strm->next_in - s->w_size, s->w_size);
+            s->strstart = s->w_size;
+            s->insert = s->strstart;
+        }
+        else {
+            if (s->window_size - s->strstart <= used) {
+                /* Slide the window down. */
+                s->strstart -= s->w_size;
+                zmemcpy(s->window, s->window + s->w_size, s->strstart);
+                if (s->matches < 2)
+                    s->matches++;   /* add a pending slide_hash() */
+                if (s->insert > s->strstart)
+                    s->insert = s->strstart;
+            }
+            zmemcpy(s->window + s->strstart, s->strm->next_in - used, used);
+            s->strstart += used;
+            s->insert += MIN(used, s->w_size - s->insert);
+        }
+        s->block_start = s->strstart;
     }
-    if (bflush) FLUSH_BLOCK(s, 0);
-  }
-  s->insert = s->strstart < MIN_MATCH - 1 ? s->strstart : MIN_MATCH - 1;
-  if (flush == Z_FINISH) {
-    FLUSH_BLOCK(s, 1);
-    return finish_done;
-  }
-  if (s->sym_next) FLUSH_BLOCK(s, 0);
-  return block_done;
+    if (s->high_water < s->strstart)
+        s->high_water = s->strstart;
+
+    /* If the last block was written to next_out, then done. */
+    if (last)
+        return finish_done;
+
+    /* If flushing and all input has been consumed, then done. */
+    if (flush != Z_NO_FLUSH && flush != Z_FINISH &&
+        s->strm->avail_in == 0 && (long)s->strstart == s->block_start)
+        return block_done;
+
+    /* Fill the window with any remaining input. */
+    have = s->window_size - s->strstart;
+    if (s->strm->avail_in > have && s->block_start >= (long)s->w_size) {
+        /* Slide the window down. */
+        s->block_start -= s->w_size;
+        s->strstart -= s->w_size;
+        zmemcpy(s->window, s->window + s->w_size, s->strstart);
+        if (s->matches < 2)
+            s->matches++;           /* add a pending slide_hash() */
+        have += s->w_size;          /* more space now */
+        if (s->insert > s->strstart)
+            s->insert = s->strstart;
+    }
+    if (have > s->strm->avail_in)
+        have = s->strm->avail_in;
+    if (have) {
+        read_buf(s->strm, s->window + s->strstart, have);
+        s->strstart += have;
+        s->insert += MIN(have, s->w_size - s->insert);
+    }
+    if (s->high_water < s->strstart)
+        s->high_water = s->strstart;
+
+    /* There was not enough avail_out to write a complete worthy or flushed
+     * stored block to next_out. Write a stored block to pending instead, if we
+     * have enough input for a worthy block, or if flushing and there is enough
+     * room for the remaining input as a stored block in the pending buffer.
+     */
+    have = (s->bi_valid + 42) >> 3;         /* number of header bytes */
+        /* maximum stored block length that will fit in pending: */
+    have = MIN(s->pending_buf_size - have, MAX_STORED);
+    min_block = MIN(have, s->w_size);
+    left = s->strstart - s->block_start;
+    if (left >= min_block ||
+        ((left || flush == Z_FINISH) && flush != Z_NO_FLUSH &&
+         s->strm->avail_in == 0 && left <= have)) {
+        len = MIN(left, have);
+        last = flush == Z_FINISH && s->strm->avail_in == 0 &&
+               len == left ? 1 : 0;
+        _tr_stored_block(s, (charf *)s->window + s->block_start, len, last);
+        s->block_start += len;
+        flush_pending(s->strm);
+    }
+
+    /* We've done all we can with the available input and output. */
+    return last ? finish_started : need_more;
+}
+
+/* ===========================================================================
+ * Compress as much as possible from the input stream, return the current
+ * block state.
+ * This function does not perform lazy evaluation of matches and inserts
+ * new strings in the dictionary only for unmatched strings or for short
+ * matches. It is used only for the fast compression options.
+ */
+local block_state deflate_fast(s, flush)
+    deflate_state *s;
+    int flush;
+{
+    IPos hash_head;       /* head of the hash chain */
+    int bflush;           /* set if current block must be flushed */
+
+    for (;;) {
+        /* Make sure that we always have enough lookahead, except
+         * at the end of the input file. We need MAX_MATCH bytes
+         * for the next match, plus MIN_MATCH bytes to insert the
+         * string following the next match.
+         */
+        if (s->lookahead < MIN_LOOKAHEAD) {
+            fill_window(s);
+            if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) {
+                return need_more;
+            }
+            if (s->lookahead == 0) break; /* flush the current block */
+        }
+
+        /* Insert the string window[strstart .. strstart+2] in the
+         * dictionary, and set hash_head to the head of the hash chain:
+         */
+        hash_head = NIL;
+        if (s->lookahead >= MIN_MATCH) {
+            hash_head = insert_string(s, s->strstart);
+        }
+
+        /* Find the longest match, discarding those <= prev_length.
+         * At this point we have always match_length < MIN_MATCH
+         */
+        if (hash_head != NIL && s->strstart - hash_head <= MAX_DIST(s)) {
+            /* To simplify the code, we prevent matches with the string
+             * of window index 0 (in particular we have to avoid a match
+             * of the string with itself at the start of the input file).
+             */
+            s->match_length = longest_match (s, hash_head);
+            /* longest_match() sets match_start */
+        }
+        if (s->match_length >= MIN_MATCH) {
+            check_match(s, s->strstart, s->match_start, s->match_length);
+
+            _tr_tally_dist(s, s->strstart - s->match_start,
+                           s->match_length - MIN_MATCH, bflush);
+
+            s->lookahead -= s->match_length;
+
+            /* Insert new strings in the hash table only if the match length
+             * is not too large. This saves time but degrades compression.
+             */
+#ifndef FASTEST
+            if (s->match_length <= s->max_insert_length &&
+                s->lookahead >= MIN_MATCH) {
+                s->match_length--; /* string at strstart already in table */
+                do {
+                    s->strstart++;
+                    hash_head = insert_string(s, s->strstart);
+                    /* strstart never exceeds WSIZE-MAX_MATCH, so there are
+                     * always MIN_MATCH bytes ahead.
+                     */
+                } while (--s->match_length != 0);
+                s->strstart++;
+            } else
+#endif
+            {
+                s->strstart += s->match_length;
+                s->match_length = 0;
+
+                if (!s->chromium_zlib_hash) {
+                  s->ins_h = s->window[s->strstart];
+                  UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]);
+#if MIN_MATCH != 3
+                  Call UPDATE_HASH() MIN_MATCH-3 more times
+#endif
+                  /* If lookahead < MIN_MATCH, ins_h is garbage, but it does not
+                   * matter since it will be recomputed at next deflate call.
+                   */
+                }
+            }
+        } else {
+            /* No match, output a literal byte */
+            Tracevv((stderr,"%c", s->window[s->strstart]));
+            _tr_tally_lit (s, s->window[s->strstart], bflush);
+            s->lookahead--;
+            s->strstart++;
+        }
+        if (bflush) FLUSH_BLOCK(s, 0);
+    }
+    s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1;
+    if (flush == Z_FINISH) {
+        FLUSH_BLOCK(s, 1);
+        return finish_done;
+    }
+    if (s->sym_next)
+        FLUSH_BLOCK(s, 0);
+    return block_done;
 }
 
 #ifndef FASTEST
-/**
+/* ===========================================================================
  * Same as above, but achieves better compression. We use a lazy
  * evaluation for matches: a match is finally adopted only if there is
  * no better match at the next window position.
  */
-static block_state deflate_slow(struct DeflateState *s, int flush) {
-  IPos hash_head; /* head of hash chain */
-  int bflush;     /* set if current block must be flushed */
+local block_state deflate_slow(s, flush)
+    deflate_state *s;
+    int flush;
+{
+    IPos hash_head;          /* head of hash chain */
+    int bflush;              /* set if current block must be flushed */
 
-  /* Process the input block. */
-  for (;;) {
-    /* Make sure that we always have enough lookahead, except
-     * at the end of the input file. We need MAX_MATCH bytes
-     * for the next match, plus MIN_MATCH bytes to insert the
-     * string following the next match.
-     */
-    if (s->lookahead < MIN_LOOKAHEAD) {
-      fill_window_sse(s);
-      if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) {
-        return need_more;
-      }
-      if (s->lookahead == 0) break; /* flush the current block */
-    }
-
-    /* Insert the string window[strstart .. strstart+2] in the
-     * dictionary, and set hash_head to the head of the hash chain:
-     */
-    hash_head = NIL;
-    if (s->lookahead >= MIN_MATCH) {
-      hash_head = insert_string(s, s->strstart);
-    }
-
-    /* Find the longest match, discarding those <= prev_length.
-     */
-    s->prev_length = s->match_length, s->prev_match = s->match_start;
-    s->match_length = MIN_MATCH - 1;
-
-    if (hash_head != NIL && s->prev_length < s->max_lazy_match &&
-        s->strstart - hash_head <= MAX_DIST(s)) {
-      /* To simplify the code, we prevent matches with the string
-       * of window index 0 (in particular we have to avoid a match
-       * of the string with itself at the start of the input file).
-       */
-      s->match_length = longest_match(s, hash_head);
-      /* longest_match() sets match_start */
-
-      if (s->match_length <= 5 && (s->strategy == Z_FILTERED
-#if TOO_FAR <= 32767
-                                   || (s->match_length == MIN_MATCH &&
-                                       s->strstart - s->match_start > TOO_FAR)
-#endif
-                                       )) {
-
-        /* If prev_match is also MIN_MATCH, match_start is garbage
-         * but we will ignore the current match anyway.
+    /* Process the input block. */
+    for (;;) {
+        /* Make sure that we always have enough lookahead, except
+         * at the end of the input file. We need MAX_MATCH bytes
+         * for the next match, plus MIN_MATCH bytes to insert the
+         * string following the next match.
          */
-        s->match_length = MIN_MATCH - 1;
-      }
-    }
-    /* If there was a match at the previous step and the current
-     * match is not better, output the previous match:
-     */
-    if (s->prev_length >= MIN_MATCH && s->match_length <= s->prev_length) {
-      uInt max_insert = s->strstart + s->lookahead - MIN_MATCH;
-      /* Do not insert strings in hash table beyond this. */
-
-      if (s->prev_match == -1) {
-        check_match(s, s->strstart, s->prev_match + 1, s->prev_length - 1);
-      } else {
-        check_match(s, s->strstart - 1, s->prev_match, s->prev_length);
-      }
-
-      _tr_tally_dist(s, s->strstart - 1 - s->prev_match,
-                     s->prev_length - MIN_MATCH, bflush);
-
-      /* Insert in hash table all strings up to the end of the match.
-       * strstart-1 and strstart are already inserted. If there is not
-       * enough lookahead, the last two strings are not inserted in
-       * the hash table.
-       */
-      s->lookahead -= s->prev_length - 1;
-      s->prev_length -= 2;
-      do {
-        if (++s->strstart <= max_insert) {
-          hash_head = insert_string(s, s->strstart);
+        if (s->lookahead < MIN_LOOKAHEAD) {
+            fill_window(s);
+            if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) {
+                return need_more;
+            }
+            if (s->lookahead == 0) break; /* flush the current block */
         }
-      } while (--s->prev_length != 0);
-      s->match_available = 0;
-      s->match_length = MIN_MATCH - 1;
-      s->strstart++;
 
-      if (bflush) FLUSH_BLOCK(s, 0);
+        /* Insert the string window[strstart .. strstart+2] in the
+         * dictionary, and set hash_head to the head of the hash chain:
+         */
+        hash_head = NIL;
+        if (s->lookahead >= MIN_MATCH) {
+            hash_head = insert_string(s, s->strstart);
+        }
 
-    } else if (s->match_available) {
-      /* If there was no match at the previous position, output a
-       * single literal. If there was a match but the current match
-       * is longer, truncate the previous match to a single literal.
-       */
-      Tracevv(("%c", s->window[s->strstart - 1]));
-      _tr_tally_lit(s, s->window[s->strstart - 1], bflush);
-      if (bflush) {
-        FLUSH_BLOCK_ONLY(s, 0);
-      }
-      s->strstart++;
-      s->lookahead--;
-      if (s->strm->avail_out == 0) return need_more;
-    } else {
-      /* There is no previous match to compare with, wait for
-       * the next step to decide.
-       */
-      s->match_available = 1;
-      s->strstart++;
-      s->lookahead--;
+        /* Find the longest match, discarding those <= prev_length.
+         */
+        s->prev_length = s->match_length, s->prev_match = s->match_start;
+        s->match_length = MIN_MATCH-1;
+
+        if (hash_head != NIL && s->prev_length < s->max_lazy_match &&
+            s->strstart - hash_head <= MAX_DIST(s)) {
+            /* To simplify the code, we prevent matches with the string
+             * of window index 0 (in particular we have to avoid a match
+             * of the string with itself at the start of the input file).
+             */
+            s->match_length = longest_match (s, hash_head);
+            /* longest_match() sets match_start */
+
+            if (s->match_length <= 5 && (s->strategy == Z_FILTERED
+#if TOO_FAR <= 32767
+                || (s->match_length == MIN_MATCH &&
+                    s->strstart - s->match_start > TOO_FAR)
+#endif
+                )) {
+
+                /* If prev_match is also MIN_MATCH, match_start is garbage
+                 * but we will ignore the current match anyway.
+                 */
+                s->match_length = MIN_MATCH-1;
+            }
+        }
+        /* If there was a match at the previous step and the current
+         * match is not better, output the previous match:
+         */
+        if (s->prev_length >= MIN_MATCH && s->match_length <= s->prev_length) {
+            uInt max_insert = s->strstart + s->lookahead - MIN_MATCH;
+            /* Do not insert strings in hash table beyond this. */
+
+            if (s->prev_match == -1) {
+                /* The window has slid one byte past the previous match,
+                 * so the first byte cannot be compared. */
+                check_match(s, s->strstart, s->prev_match+1, s->prev_length-1);
+            } else {
+                check_match(s, s->strstart-1, s->prev_match, s->prev_length);
+            }
+
+            _tr_tally_dist(s, s->strstart -1 - s->prev_match,
+                           s->prev_length - MIN_MATCH, bflush);
+
+            /* Insert in hash table all strings up to the end of the match.
+             * strstart-1 and strstart are already inserted. If there is not
+             * enough lookahead, the last two strings are not inserted in
+             * the hash table.
+             */
+            s->lookahead -= s->prev_length-1;
+            s->prev_length -= 2;
+            do {
+                if (++s->strstart <= max_insert) {
+                    hash_head = insert_string(s, s->strstart);
+                }
+            } while (--s->prev_length != 0);
+            s->match_available = 0;
+            s->match_length = MIN_MATCH-1;
+            s->strstart++;
+
+            if (bflush) FLUSH_BLOCK(s, 0);
+
+        } else if (s->match_available) {
+            /* If there was no match at the previous position, output a
+             * single literal. If there was a match but the current match
+             * is longer, truncate the previous match to a single literal.
+             */
+            Tracevv((stderr,"%c", s->window[s->strstart-1]));
+            _tr_tally_lit(s, s->window[s->strstart-1], bflush);
+            if (bflush) {
+                FLUSH_BLOCK_ONLY(s, 0);
+            }
+            s->strstart++;
+            s->lookahead--;
+            if (s->strm->avail_out == 0) return need_more;
+        } else {
+            /* There is no previous match to compare with, wait for
+             * the next step to decide.
+             */
+            s->match_available = 1;
+            s->strstart++;
+            s->lookahead--;
+        }
     }
-  }
-  Assert(flush != Z_NO_FLUSH, "no flush?");
-  if (s->match_available) {
-    Tracevv(("%c", s->window[s->strstart - 1]));
-    _tr_tally_lit(s, s->window[s->strstart - 1], bflush);
-    s->match_available = 0;
-  }
-  s->insert = s->strstart < MIN_MATCH - 1 ? s->strstart : MIN_MATCH - 1;
-  if (flush == Z_FINISH) {
-    FLUSH_BLOCK(s, 1);
-    return finish_done;
-  }
-  if (s->sym_next) FLUSH_BLOCK(s, 0);
-  return block_done;
+    Assert (flush != Z_NO_FLUSH, "no flush?");
+    if (s->match_available) {
+        Tracevv((stderr,"%c", s->window[s->strstart-1]));
+        _tr_tally_lit(s, s->window[s->strstart-1], bflush);
+        s->match_available = 0;
+    }
+    s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1;
+    if (flush == Z_FINISH) {
+        FLUSH_BLOCK(s, 1);
+        return finish_done;
+    }
+    if (s->sym_next)
+        FLUSH_BLOCK(s, 0);
+    return block_done;
 }
 #endif /* FASTEST */
 
-/**
- * For Z_RLE, simply look for runs of bytes, generate matches only of
- * distance one. Do not maintain a hash table. (It will be regenerated
- * if this run of deflate switches away from Z_RLE.)
+/* ===========================================================================
+ * For Z_RLE, simply look for runs of bytes, generate matches only of distance
+ * one.  Do not maintain a hash table.  (It will be regenerated if this run of
+ * deflate switches away from Z_RLE.)
  */
-static block_state deflate_rle(struct DeflateState *s, int flush) {
-  int bflush;           /* set if current block must be flushed */
-  uInt prev;            /* byte at distance one to match */
-  Bytef *scan, *strend; /* scan goes up to strend for length of run */
+local block_state deflate_rle(s, flush)
+    deflate_state *s;
+    int flush;
+{
+    int bflush;             /* set if current block must be flushed */
+    uInt prev;              /* byte at distance one to match */
+    Bytef *scan, *strend;   /* scan goes up to strend for length of run */
 
-  for (;;) {
-    /* Make sure that we always have enough lookahead, except
-     * at the end of the input file. We need MAX_MATCH bytes
-     * for the longest run, plus one for the unrolled loop.
-     */
-    if (s->lookahead <= MAX_MATCH) {
-      fill_window_sse(s);
-      if (s->lookahead <= MAX_MATCH && flush == Z_NO_FLUSH) {
-        return need_more;
-      }
-      if (s->lookahead == 0) break; /* flush the current block */
+    for (;;) {
+        /* Make sure that we always have enough lookahead, except
+         * at the end of the input file. We need MAX_MATCH bytes
+         * for the longest run, plus one for the unrolled loop.
+         */
+        if (s->lookahead <= MAX_MATCH) {
+            fill_window(s);
+            if (s->lookahead <= MAX_MATCH && flush == Z_NO_FLUSH) {
+                return need_more;
+            }
+            if (s->lookahead == 0) break; /* flush the current block */
+        }
+
+        /* See how many times the previous byte repeats */
+        s->match_length = 0;
+        if (s->lookahead >= MIN_MATCH && s->strstart > 0) {
+            scan = s->window + s->strstart - 1;
+            prev = *scan;
+            if (prev == *++scan && prev == *++scan && prev == *++scan) {
+                strend = s->window + s->strstart + MAX_MATCH;
+                do {
+                } while (prev == *++scan && prev == *++scan &&
+                         prev == *++scan && prev == *++scan &&
+                         prev == *++scan && prev == *++scan &&
+                         prev == *++scan && prev == *++scan &&
+                         scan < strend);
+                s->match_length = MAX_MATCH - (uInt)(strend - scan);
+                if (s->match_length > s->lookahead)
+                    s->match_length = s->lookahead;
+            }
+            Assert(scan <= s->window+(uInt)(s->window_size-1), "wild scan");
+        }
+
+        /* Emit match if have run of MIN_MATCH or longer, else emit literal */
+        if (s->match_length >= MIN_MATCH) {
+            check_match(s, s->strstart, s->strstart - 1, s->match_length);
+
+            _tr_tally_dist(s, 1, s->match_length - MIN_MATCH, bflush);
+
+            s->lookahead -= s->match_length;
+            s->strstart += s->match_length;
+            s->match_length = 0;
+        } else {
+            /* No match, output a literal byte */
+            Tracevv((stderr,"%c", s->window[s->strstart]));
+            _tr_tally_lit (s, s->window[s->strstart], bflush);
+            s->lookahead--;
+            s->strstart++;
+        }
+        if (bflush) FLUSH_BLOCK(s, 0);
     }
-
-    /* See how many times the previous byte repeats */
-    s->match_length = 0;
-    if (s->lookahead >= MIN_MATCH && s->strstart > 0) {
-      scan = s->window + s->strstart - 1;
-      prev = *scan;
-      if (prev == *++scan && prev == *++scan && prev == *++scan) {
-        strend = s->window + s->strstart + MAX_MATCH;
-        do {
-        } while (prev == *++scan && prev == *++scan && prev == *++scan &&
-                 prev == *++scan && prev == *++scan && prev == *++scan &&
-                 prev == *++scan && prev == *++scan && scan < strend);
-        s->match_length = MAX_MATCH - (uInt)(strend - scan);
-        if (s->match_length > s->lookahead) s->match_length = s->lookahead;
-      }
-      Assert(scan <= s->window + (uInt)(s->window_size - 1), "wild scan");
+    s->insert = 0;
+    if (flush == Z_FINISH) {
+        FLUSH_BLOCK(s, 1);
+        return finish_done;
     }
-
-    /* Emit match if have run of MIN_MATCH or longer, else emit literal */
-    if (s->match_length >= MIN_MATCH) {
-      check_match(s, s->strstart, s->strstart - 1, s->match_length);
-
-      _tr_tally_dist(s, 1, s->match_length - MIN_MATCH, bflush);
-
-      s->lookahead -= s->match_length;
-      s->strstart += s->match_length;
-      s->match_length = 0;
-    } else {
-      /* No match, output a literal byte */
-      Tracevv(("%c", s->window[s->strstart]));
-      _tr_tally_lit(s, s->window[s->strstart], bflush);
-      s->lookahead--;
-      s->strstart++;
-    }
-    if (bflush) FLUSH_BLOCK(s, 0);
-  }
-  s->insert = 0;
-  if (flush == Z_FINISH) {
-    FLUSH_BLOCK(s, 1);
-    return finish_done;
-  }
-  if (s->sym_next) FLUSH_BLOCK(s, 0);
-  return block_done;
+    if (s->sym_next)
+        FLUSH_BLOCK(s, 0);
+    return block_done;
 }
 
-/**
- * For Z_HUFFMAN_ONLY, do not look for matches. Do not maintain a hash
- * table. (It will be regenerated if this run of deflate switches away
- * from Huffman.)
+/* ===========================================================================
+ * For Z_HUFFMAN_ONLY, do not look for matches.  Do not maintain a hash table.
+ * (It will be regenerated if this run of deflate switches away from Huffman.)
  */
-static block_state deflate_huff(struct DeflateState *s, int flush) {
-  int bflush; /* set if current block must be flushed */
-  for (;;) {
-    /* Make sure that we have a literal to write. */
-    if (s->lookahead == 0) {
-      fill_window_sse(s);
-      if (s->lookahead == 0) {
-        if (flush == Z_NO_FLUSH) return need_more;
-        break; /* flush the current block */
-      }
+local block_state deflate_huff(s, flush)
+    deflate_state *s;
+    int flush;
+{
+    int bflush;             /* set if current block must be flushed */
+
+    for (;;) {
+        /* Make sure that we have a literal to write. */
+        if (s->lookahead == 0) {
+            fill_window(s);
+            if (s->lookahead == 0) {
+                if (flush == Z_NO_FLUSH)
+                    return need_more;
+                break;      /* flush the current block */
+            }
+        }
+
+        /* Output a literal byte */
+        s->match_length = 0;
+        Tracevv((stderr,"%c", s->window[s->strstart]));
+        _tr_tally_lit (s, s->window[s->strstart], bflush);
+        s->lookahead--;
+        s->strstart++;
+        if (bflush) FLUSH_BLOCK(s, 0);
     }
-    /* Output a literal byte */
-    s->match_length = 0;
-    Tracevv(("%c", s->window[s->strstart]));
-    _tr_tally_lit(s, s->window[s->strstart], bflush);
-    s->lookahead--;
-    s->strstart++;
-    if (bflush) FLUSH_BLOCK(s, 0);
-  }
-  s->insert = 0;
-  if (flush == Z_FINISH) {
-    FLUSH_BLOCK(s, 1);
-    return finish_done;
-  }
-  if (s->sym_next) FLUSH_BLOCK(s, 0);
-  return block_done;
+    s->insert = 0;
+    if (flush == Z_FINISH) {
+        FLUSH_BLOCK(s, 1);
+        return finish_done;
+    }
+    if (s->sym_next)
+        FLUSH_BLOCK(s, 0);
+    return block_done;
 }
diff --git a/third_party/zlib/deflate.internal.h b/third_party/zlib/deflate.internal.h
index df01e66b1..a4962ae0d 100644
--- a/third_party/zlib/deflate.internal.h
+++ b/third_party/zlib/deflate.internal.h
@@ -1,260 +1,284 @@
-#ifndef DEFLATE_H
-#define DEFLATE_H
+#ifndef COSMOPOLITAN_THIRD_PARTY_ZLIB_DEFLATE_INTERNAL_H_
+#define COSMOPOLITAN_THIRD_PARTY_ZLIB_DEFLATE_INTERNAL_H_
+#include "third_party/zlib/macros.internal.h"
 #include "third_party/zlib/zutil.internal.h"
+/* clang-format off */
 
-/* define NO_GZIP when compiling if you want to disable gzip header and
-   trailer creation by deflate(). NO_GZIP would be used to avoid linking
-   in the crc code when it is not needed. For shared libraries, gzip
-   encoding should be left enabled. */
-#ifndef NO_GZIP
-#define GZIP
-#endif
+#define DIST_CODE_LEN  512 /* see definition of array dist_code below */
 
-/* number of length codes, not counting the special END_BLOCK code */
 #define LENGTH_CODES 29
+/* number of length codes, not counting the special END_BLOCK code */
 
+#define LITERALS  256
 /* number of literal bytes 0..255 */
-#define LITERALS 256
 
+#define L_CODES (LITERALS+1+LENGTH_CODES)
 /* number of Literal or Length codes, including the END_BLOCK code */
-#define L_CODES (LITERALS + 1 + LENGTH_CODES)
 
+#define D_CODES   30
 /* number of distance codes */
-#define D_CODES 30
 
+#define BL_CODES  19
 /* number of codes used to transfer the bit lengths */
-#define BL_CODES 19
 
+#define HEAP_SIZE (2*L_CODES+1)
 /* maximum heap size */
-#define HEAP_SIZE (2 * L_CODES + 1)
 
-/* All codes must not exceed MAX_BITS bits */
 #define MAX_BITS 15
+/* All codes must not exceed MAX_BITS bits */
 
-/* size of bit buffer in bi_buf */
 #define Buf_size 16
+/* size of bit buffer in bi_buf */
 
-#define DIST_CODE_LEN 512 /* see definition of array dist_code below */
-
-#define INIT_STATE 42 /* zlib header -> BUSY_STATE */
-#ifdef GZIP
-#define GZIP_STATE 57 /* gzip header -> BUSY_STATE | EXTRA_STATE */
-#endif
-#define EXTRA_STATE   69  /* gzip extra block -> NAME_STATE */
-#define NAME_STATE    73  /* gzip file name -> COMMENT_STATE */
-#define COMMENT_STATE 91  /* gzip comment -> HCRC_STATE */
-#define HCRC_STATE    103 /* gzip header CRC -> BUSY_STATE */
-#define BUSY_STATE    113 /* deflate -> FINISH_STATE */
-#define FINISH_STATE  666 /* stream complete */
+#define INIT_STATE    42    /* zlib header -> BUSY_STATE */
+#define GZIP_STATE    57    /* gzip header -> BUSY_STATE | EXTRA_STATE */
+#define EXTRA_STATE   69    /* gzip extra block -> NAME_STATE */
+#define NAME_STATE    73    /* gzip file name -> COMMENT_STATE */
+#define COMMENT_STATE 91    /* gzip comment -> HCRC_STATE */
+#define HCRC_STATE   103    /* gzip header CRC -> BUSY_STATE */
+#define BUSY_STATE   113    /* deflate -> FINISH_STATE */
+#define FINISH_STATE 666    /* stream complete */
+/* Stream status */
 
 #if !(__ASSEMBLER__ + __LINKER__ + 0)
 COSMOPOLITAN_C_START_
 
+/* define NO_GZIP when compiling if you want to disable gzip header and
+   trailer creation by deflate().  NO_GZIP would be used to avoid linking in
+   the crc code when it is not needed.  For shared libraries, gzip encoding
+   should be left enabled. */
+#ifndef NO_GZIP
+#  define GZIP
+#endif
+
+/* ===========================================================================
+ * Internal compression state.
+ */
+
+
 /* Data structure describing a single value and its code string. */
 typedef struct ct_data_s {
-  union {
-    uint16_t freq; /* frequency count */
-    uint16_t code; /* bit string */
-  } fc;
-  union {
-    uint16_t dad; /* father node in Huffman tree */
-    uint16_t len; /* length of bit string */
-  } dl;
-} ct_data;
+    union {
+        ush  freq;       /* frequency count */
+        ush  code;       /* bit string */
+    } fc;
+    union {
+        ush  dad;        /* father node in Huffman tree */
+        ush  len;        /* length of bit string */
+    } dl;
+} FAR ct_data;
 
 #define Freq fc.freq
 #define Code fc.code
 #define Dad  dl.dad
 #define Len  dl.len
 
-typedef struct static_tree_desc_s static_tree_desc;
+typedef struct static_tree_desc_s  static_tree_desc;
 
 typedef struct tree_desc_s {
-  ct_data *dyn_tree;                 /* the dynamic tree */
-  int max_code;                      /* largest code with non zero frequency */
-  const static_tree_desc *stat_desc; /* the corresponding static tree */
-} tree_desc;
+    ct_data *dyn_tree;           /* the dynamic tree */
+    int     max_code;            /* largest code with non zero frequency */
+    const static_tree_desc *stat_desc;  /* the corresponding static tree */
+} FAR tree_desc;
 
-typedef uint16_t Pos;
-typedef Pos Posf;
+typedef ush Pos;
+typedef Pos FAR Posf;
 typedef unsigned IPos;
 
 /* A Pos is an index in the character window. We use short instead of int to
  * save space in the various tables. IPos is used only for parameter passing.
  */
 
-struct DeflateState {
-  z_streamp strm;            /* pointer back to this zlib stream */
-  int status;                /* as the name implies */
-  Bytef *pending_buf;        /* output still pending */
-  uint64_t pending_buf_size; /* size of pending_buf */
-  Bytef *pending_out;        /* next pending byte to output to the stream */
-  uint64_t pending;          /* nb of bytes in the pending buffer */
-  int wrap;                  /* bit 0 true for zlib, bit 1 true for gzip */
-  gz_headerp gzhead;         /* gzip header information to write */
-  uint64_t gzindex;          /* where in extra, name, or comment */
-  Byte method;               /* can only be DEFLATED */
-  int last_flush; /* value of flush param for previous deflate call */
-  unsigned crc0[4 * 5];
-  /* used by deflate.c: */
+typedef struct internal_state {
+    z_streamp strm;      /* pointer back to this zlib stream */
+    int   status;        /* as the name implies */
+    Bytef *pending_buf;  /* output still pending */
+    ulg   pending_buf_size; /* size of pending_buf */
+    Bytef *pending_out;  /* next pending byte to output to the stream */
+    ulg   pending;       /* nb of bytes in the pending buffer */
+    int   wrap;          /* bit 0 true for zlib, bit 1 true for gzip */
+    gz_headerp  gzhead;  /* gzip header information to write */
+    ulg   gzindex;       /* where in extra, name, or comment */
+    Byte  method;        /* can only be DEFLATED */
+    int   last_flush;    /* value of flush param for previous deflate call */
+    unsigned crc0[4 * 5];
+    /* used by deflate.c: */
 
-  uInt w_size; /* LZ77 window size (32K by default) */
-  uInt w_bits; /* log2(w_size)  (8..16) */
-  uInt w_mask; /* w_size - 1 */
+    uInt  w_size;        /* LZ77 window size (32K by default) */
+    uInt  w_bits;        /* log2(w_size)  (8..16) */
+    uInt  w_mask;        /* w_size - 1 */
 
-  /* Sliding window. Input bytes are read into the second half of the
-     window, and move to the first half later to keep a dictionary of at
-     least wSize bytes. With this organization, matches are limited to a
-     distance of wSize-MAX_MATCH bytes, but this ensures that IO is
-     always performed with a length multiple of the block size. Also, it
-     limits the window size to 64K, which is quite useful on MSDOS. To
-     do: use the user input buffer as sliding window. */
-  Bytef *window;
+    Bytef *window;
+    /* Sliding window. Input bytes are read into the second half of the window,
+     * and move to the first half later to keep a dictionary of at least wSize
+     * bytes. With this organization, matches are limited to a distance of
+     * wSize-MAX_MATCH bytes, but this ensures that IO is always
+     * performed with a length multiple of the block size. Also, it limits
+     * the window size to 64K, which is quite useful on MSDOS.
+     * To do: use the user input buffer as sliding window.
+     */
 
-  /* Actual size of window: 2*wSize, except when the user input buffer
-    is directly used as sliding window. */
-  uint64_t window_size;
+    ulg window_size;
+    /* Actual size of window: 2*wSize, except when the user input buffer
+     * is directly used as sliding window.
+     */
 
-  /* Link to older string with same hash index. To limit the size of
-     this array to 64K, this link is maintained only for the last 32K
-     strings. Index in this array is thus a window index modulo 32K. */
-  Posf *prev;
+    Posf *prev;
+    /* Link to older string with same hash index. To limit the size of this
+     * array to 64K, this link is maintained only for the last 32K strings.
+     * An index in this array is thus a window index modulo 32K.
+     */
 
-  Posf *head; /* Heads of the hash chains or NIL. */
+    Posf *head; /* Heads of the hash chains or NIL. */
 
-  uInt ins_h;     /* hash index of string to be inserted */
-  uInt hash_size; /* number of elements in hash table */
-  uInt hash_bits; /* log2(hash_size) */
-  uInt hash_mask; /* hash_size-1 */
+    uInt  ins_h;          /* hash index of string to be inserted */
+    uInt  hash_size;      /* number of elements in hash table */
+    uInt  hash_bits;      /* log2(hash_size) */
+    uInt  hash_mask;      /* hash_size-1 */
 
-  /* Number of bits by which ins_h must be shifted at each input step.
-     It must be such that after MIN_MATCH steps, the oldest byte no
-     longer takes part in the hash key, that is: hash_shift * MIN_MATCH
-     >= hash_bits */
-  uInt hash_shift;
+    uInt  hash_shift;
+    /* Number of bits by which ins_h must be shifted at each input
+     * step. It must be such that after MIN_MATCH steps, the oldest
+     * byte no longer takes part in the hash key, that is:
+     *   hash_shift * MIN_MATCH >= hash_bits
+     */
 
-  /* Window position at the beginning of the current output block. Gets
-     negative when the window is moved backwards. */
-  long block_start;
+    long block_start;
+    /* Window position at the beginning of the current output block. Gets
+     * negative when the window is moved backwards.
+     */
 
-  uInt match_length;   /* length of best match */
-  IPos prev_match;     /* previous match */
-  int match_available; /* set if previous match exists */
-  uInt strstart;       /* start of string to insert */
-  uInt match_start;    /* start of matching string */
-  uInt lookahead;      /* number of valid bytes ahead in window */
+    uInt match_length;           /* length of best match */
+    IPos prev_match;             /* previous match */
+    int match_available;         /* set if previous match exists */
+    uInt strstart;               /* start of string to insert */
+    uInt match_start;            /* start of matching string */
+    uInt lookahead;              /* number of valid bytes ahead in window */
 
-  /* Length of the best match at previous step. Matches not greater than
-     this are discarded. This is used in the lazy match evaluation. */
-  uInt prev_length;
+    uInt prev_length;
+    /* Length of the best match at previous step. Matches not greater than this
+     * are discarded. This is used in the lazy match evaluation.
+     */
 
-  /* To speed up deflation, hash chains are never searched beyond this
-     length. A higher limit improves compression ratio but degrades the
-     speed. */
-  uInt max_chain_length;
+    uInt max_chain_length;
+    /* To speed up deflation, hash chains are never searched beyond this
+     * length.  A higher limit improves compression ratio but degrades the
+     * speed.
+     */
 
-  /* Attempt to find a better match only when the current match is
-     strictly smaller than this value. This mechanism is used only for
-     compression levels >= 4. */
-  uInt max_lazy_match;
+    uInt max_lazy_match;
+    /* Attempt to find a better match only when the current match is strictly
+     * smaller than this value. This mechanism is used only for compression
+     * levels >= 4.
+     */
+#   define max_insert_length  max_lazy_match
+    /* Insert new strings in the hash table only if the match length is not
+     * greater than this length. This saves time but degrades compression.
+     * max_insert_length is used only for compression levels <= 3.
+     */
 
-  /* Insert new strings in the hash table only if the match length is not
-     greater than this length. This saves time but degrades compression.
-     max_insert_length is used only for compression levels <= 3. */
-#define max_insert_length max_lazy_match
+    int level;    /* compression level (1..9) */
+    int strategy; /* favor or force Huffman coding*/
 
-  int level;    /* compression level (1..9) */
-  int strategy; /* favor or force Huffman coding*/
+    uInt good_match;
+    /* Use a faster search when the previous match is longer than this */
 
-  /* Use a faster search when the previous match is longer than this */
-  uInt good_match;
+    int nice_match; /* Stop searching when current match exceeds this */
 
-  int nice_match; /* Stop searching when current match exceeds this */
+                /* used by trees.c: */
+    /* Didn't use ct_data typedef below to suppress compiler warning */
+    struct ct_data_s dyn_ltree[HEAP_SIZE];   /* literal and length tree */
+    struct ct_data_s dyn_dtree[2*D_CODES+1]; /* distance tree */
+    struct ct_data_s bl_tree[2*BL_CODES+1];  /* Huffman tree for bit lengths */
 
-  /* used by trees.c: */
-  /* Didn't use ct_data typedef below to suppress compiler warning */
-  struct ct_data_s dyn_ltree[HEAP_SIZE];       /* literal and length tree */
-  struct ct_data_s dyn_dtree[2 * D_CODES + 1]; /* distance tree */
-  struct ct_data_s bl_tree[2 * BL_CODES + 1]; /* Huffman tree for bit lengths */
+    struct tree_desc_s l_desc;               /* desc. for literal tree */
+    struct tree_desc_s d_desc;               /* desc. for distance tree */
+    struct tree_desc_s bl_desc;              /* desc. for bit length tree */
 
-  struct tree_desc_s l_desc;  /* desc. for literal tree */
-  struct tree_desc_s d_desc;  /* desc. for distance tree */
-  struct tree_desc_s bl_desc; /* desc. for bit length tree */
+    ush bl_count[MAX_BITS+1];
+    /* number of codes at each bit length for an optimal tree */
 
-  /* number of codes at each bit length for an optimal tree */
-  uint16_t bl_count[MAX_BITS + 1];
+    int heap[2*L_CODES+1];      /* heap used to build the Huffman trees */
+    int heap_len;               /* number of elements in the heap */
+    int heap_max;               /* element of largest frequency */
+    /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used.
+     * The same heap array is used to build all trees.
+     */
 
-  /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not
-     used. The same heap array is used to build all trees. */
-  int heap[2 * L_CODES + 1]; /* heap used to build the Huffman trees */
-  int heap_len;              /* number of elements in the heap */
-  int heap_max;              /* element of largest frequency */
+    uch depth[2*L_CODES+1];
+    /* Depth of each subtree used as tie breaker for trees of equal frequency
+     */
 
-  /* Depth of each subtree used as tie breaker for trees of equal
-     frequency. */
-  uint8_t depth[2 * L_CODES + 1];
+    uchf *sym_buf;        /* buffer for distances and literals/lengths */
 
-  uint8_t *sym_buf; /* buffer for distances and literals/lengths */
+    uInt  lit_bufsize;
+    /* Size of match buffer for literals/lengths.  There are 4 reasons for
+     * limiting lit_bufsize to 64K:
+     *   - frequencies can be kept in 16 bit counters
+     *   - if compression is not successful for the first block, all input
+     *     data is still in the window so we can still emit a stored block even
+     *     when input comes from standard input.  (This can also be done for
+     *     all blocks if lit_bufsize is not greater than 32K.)
+     *   - if compression is not successful for a file smaller than 64K, we can
+     *     even emit a stored file instead of a stored block (saving 5 bytes).
+     *     This is applicable only for zip (not gzip or zlib).
+     *   - creating new Huffman trees less frequently may not provide fast
+     *     adaptation to changes in the input data statistics. (Take for
+     *     example a binary file with poorly compressible code followed by
+     *     a highly compressible string table.) Smaller buffer sizes give
+     *     fast adaptation but have of course the overhead of transmitting
+     *     trees more frequently.
+     *   - I can't count above 4
+     */
 
-  /* Size of match buffer for literals/lengths.  There are 4 reasons for
-     limiting lit_bufsize to 64K:
-       - frequencies can be kept in 16 bit counters
-       - if compression is not successful for the first block, all input
-         data is still in the window so we can still emit a stored block even
-         when input comes from standard input.  (This can also be done for
-         all blocks if lit_bufsize is not greater than 32K.)
-       - if compression is not successful for a file smaller than 64K, we can
-         even emit a stored file instead of a stored block (saving 5 bytes).
-         This is applicable only for zip (not gzip or zlib).
-       - creating new Huffman trees less frequently may not provide fast
-         adaptation to changes in the input data statistics. (Take for
-         example a binary file with poorly compressible code followed by
-         a highly compressible string table.) Smaller buffer sizes give
-         fast adaptation but have of course the overhead of transmitting
-         trees more frequently.
-       - I can't count above 4 */
-  uInt lit_bufsize;
+    uInt sym_next;      /* running index in sym_buf */
+    uInt sym_end;       /* symbol table full when sym_next reaches this */
 
-  uInt sym_next; /* running index in sym_buf */
-  uInt sym_end;  /* symbol table full when sym_next reaches this */
-
-  uint64_t opt_len;    /* bit length of current block with optimal trees */
-  uint64_t static_len; /* bit length of current block with static trees */
-  uInt matches;        /* number of string matches in current block */
-  uInt insert;         /* bytes at end of window left to insert */
+    ulg opt_len;        /* bit length of current block with optimal trees */
+    ulg static_len;     /* bit length of current block with static trees */
+    uInt matches;       /* number of string matches in current block */
+    uInt insert;        /* bytes at end of window left to insert */
 
 #ifdef ZLIB_DEBUG
-  uint64_t compressed_len; /* total bit length of compressed file mod 2^32 */
-  uint64_t bits_sent;      /* bit length of compressed data sent mod 2^32 */
+    ulg compressed_len; /* total bit length of compressed file mod 2^32 */
+    ulg bits_sent;      /* bit length of compressed data sent mod 2^32 */
 #endif
 
-  /* Output buffer. bits are inserted starting at the bottom (least
-     significant bits). */
-  uint16_t bi_buf;
+    ush bi_buf;
+    /* Output buffer. bits are inserted starting at the bottom (least
+     * significant bits).
+     */
+    int bi_valid;
+    /* Number of valid bits in bi_buf.  All bits above the last valid bit
+     * are always zero.
+     */
 
-  /* Number of valid bits in bi_buf. All bits above the last valid bit
-     are always zero. */
-  int bi_valid;
+    ulg high_water;
+    /* High water mark offset in window for initialized bytes -- bytes above
+     * this are set to zero in order to avoid memory check warnings when
+     * longest match routines access bytes past the input.  This is then
+     * updated to the new high water mark.
+     */
 
-  /* High water mark offset in window for initialized bytes -- bytes
-     above this are set to zero in order to avoid memory check warnings
-     when longest match routines access bytes past the input. This is
-     then updated to the new high water mark. */
-  uint64_t high_water;
-};
+    uInt chromium_zlib_hash;
+    /* 0 if Rabin-Karp rolling hash is enabled, non-zero if chromium zlib
+     * hash is enabled.
+     */
+
+} FAR deflate_state;
 
 /* Output a byte on the stream.
  * IN assertion: there is enough room in pending_buf.
  */
-#define put_byte(s, c) \
-  { s->pending_buf[s->pending++] = (Bytef)(c); }
+#define put_byte(s, c) {s->pending_buf[s->pending++] = (Bytef)(c);}
 
-#define MIN_LOOKAHEAD (MAX_MATCH + MIN_MATCH + 1)
+
+#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1)
 /* Minimum amount of lookahead, except at the end of the input file.
  * See deflate.c for comments about the MIN_MATCH+1.
  */
 
-#define MAX_DIST(s) ((s)->w_size - MIN_LOOKAHEAD)
+#define MAX_DIST(s)  ((s)->w_size-MIN_LOOKAHEAD)
 /* In order to simplify the code, particularly on 16 bit machines, match
  * distances are limited to MAX_DIST instead of WSIZE.
  */
@@ -263,59 +287,67 @@ struct DeflateState {
 /* Number of bytes after end of data in window to initialize in order to avoid
    memory checker errors from longest match routines */
 
-/* in trees.c */
-void _tr_init(struct DeflateState *s) hidden;
-int _tr_tally(struct DeflateState *s, unsigned dist, unsigned lc) hidden;
-void _tr_flush_block(struct DeflateState *s, charf *buf, uint64_t stored_len,
-                     int last) hidden;
-void _tr_flush_bits(struct DeflateState *s) hidden;
-void _tr_align(struct DeflateState *s) hidden;
-void _tr_stored_block(struct DeflateState *s, charf *buf, uint64_t stored_len,
-                      int last) hidden;
+        /* in trees.c */
+void ZLIB_INTERNAL _tr_init OF((deflate_state *s));
+int ZLIB_INTERNAL _tr_tally OF((deflate_state *s, unsigned dist, unsigned lc));
+void ZLIB_INTERNAL _tr_flush_block OF((deflate_state *s, charf *buf,
+                        ulg stored_len, int last));
+void ZLIB_INTERNAL _tr_flush_bits OF((deflate_state *s));
+void ZLIB_INTERNAL _tr_align OF((deflate_state *s));
+void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, charf *buf,
+                        ulg stored_len, int last));
 
-/* Mapping from a distance to a distance code. dist is the distance - 1
- * and must not have side effects. kZlibDistCode[256] and
- * kZlibDistCode[257] are never used.
- */
 #define d_code(dist) \
-  ((dist) < 256 ? kZlibDistCode[dist] : kZlibDistCode[256 + ((dist) >> 7)])
-
-extern const ct_data kZlibStaticDtree[D_CODES] hidden;
-extern const ct_data kZlibStaticLtree[L_CODES + 2] hidden;
-extern const int kZlibBaseDist[D_CODES] hidden;
-extern const int kZlibBaseLength[LENGTH_CODES] hidden;
-extern const uint8_t kZlibDistCode[DIST_CODE_LEN] hidden;
-extern const uint8_t kZlibLengthCode[MAX_MATCH - MIN_MATCH + 1] hidden;
+   ((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)])
+/* Mapping from a distance to a distance code. dist is the distance - 1 and
+ * must not have side effects. _dist_code[256] and _dist_code[257] are never
+ * used.
+ */
 
 #ifndef ZLIB_DEBUG
 /* Inline versions of _tr_tally for speed: */
 
-#define _tr_tally_lit(s, c, flush)       \
-  {                                      \
-    uint8_t cc = (c);                    \
-    s->sym_buf[s->sym_next++] = 0;       \
-    s->sym_buf[s->sym_next++] = 0;       \
-    s->sym_buf[s->sym_next++] = cc;      \
-    s->dyn_ltree[cc].Freq++;             \
-    flush = (s->sym_next == s->sym_end); \
-  }
-#define _tr_tally_dist(s, distance, length, flush)            \
-  {                                                           \
-    uint8_t len = (uint8_t)(length);                          \
-    uint16_t dist = (uint16_t)(distance);                     \
-    s->sym_buf[s->sym_next++] = dist;                         \
-    s->sym_buf[s->sym_next++] = dist >> 8;                    \
-    s->sym_buf[s->sym_next++] = len;                          \
-    dist--;                                                   \
-    s->dyn_ltree[kZlibLengthCode[len] + LITERALS + 1].Freq++; \
-    s->dyn_dtree[d_code(dist)].Freq++;                        \
-    flush = (s->sym_next == s->sym_end);                      \
-  }
+#if defined(GEN_TREES_H) || !defined(STDC)
+  extern uch ZLIB_INTERNAL _length_code[];
+  extern uch ZLIB_INTERNAL _dist_code[];
 #else
-#define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c)
-#define _tr_tally_dist(s, distance, length, flush) \
-  flush = _tr_tally(s, distance, length)
+  extern const uch ZLIB_INTERNAL _length_code[];
+  extern const uch ZLIB_INTERNAL _dist_code[];
 #endif
 
+# define _tr_tally_lit(s, c, flush) \
+  { uch cc = (c); \
+    s->sym_buf[s->sym_next++] = 0; \
+    s->sym_buf[s->sym_next++] = 0; \
+    s->sym_buf[s->sym_next++] = cc; \
+    s->dyn_ltree[cc].Freq++; \
+    flush = (s->sym_next == s->sym_end); \
+   }
+# define _tr_tally_dist(s, distance, length, flush) \
+  { uch len = (uch)(length); \
+    ush dist = (ush)(distance); \
+    s->sym_buf[s->sym_next++] = (uch)dist; \
+    s->sym_buf[s->sym_next++] = (uch)(dist >> 8); \
+    s->sym_buf[s->sym_next++] = len; \
+    dist--; \
+    s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \
+    s->dyn_dtree[d_code(dist)].Freq++; \
+    flush = (s->sym_next == s->sym_end); \
+  }
+#else
+# define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c)
+# define _tr_tally_dist(s, distance, length, flush) \
+              flush = _tr_tally(s, distance, length)
+#endif
+
+/* Functions that are SIMD optimised on x86 */
+void ZLIB_INTERNAL crc_fold_init(deflate_state* const s);
+void ZLIB_INTERNAL crc_fold_copy(deflate_state* const s,
+                                 unsigned char* dst,
+                                 const unsigned char* src,
+                                 long len);
+unsigned ZLIB_INTERNAL crc_fold_512to32(deflate_state* const s);
+
+COSMOPOLITAN_C_END_
 #endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
-#endif /* DEFLATE_H */
+#endif /* COSMOPOLITAN_THIRD_PARTY_ZLIB_DEFLATE_INTERNAL_H_ */
diff --git a/third_party/zlib/deflateinit.S b/third_party/zlib/deflateinit.S
deleted file mode 100644
index 4035a92bb..000000000
--- a/third_party/zlib/deflateinit.S
+++ /dev/null
@@ -1,28 +0,0 @@
-/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8     -*-│
-│vi: set et ft=asm ts=8 tw=8 fenc=utf-8                                     :vi│
-╞══════════════════════════════════════════════════════════════════════════════╡
-│ Copyright 2020 Justine Alexandra Roberts Tunney                              │
-│                                                                              │
-│ Permission to use, copy, modify, and/or distribute this software for         │
-│ any purpose with or without fee is hereby granted, provided that the         │
-│ above copyright notice and this permission notice appear in all copies.      │
-│                                                                              │
-│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL                │
-│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED                │
-│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE             │
-│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL         │
-│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR        │
-│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER               │
-│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR             │
-│ PERFORMANCE OF THIS SOFTWARE.                                                │
-╚─────────────────────────────────────────────────────────────────────────────*/
-#include "third_party/zlib/zlib.h"
-#include "libc/macros.internal.h"
-
-deflateInit:
-	mov	$Z_DEFLATED,%edx
-	mov	$MAX_WBITS,%ecx
-	mov	$DEF_MEM_LEVEL,%r8d
-	mov	$Z_DEFAULT_STRATEGY,%r9d
-	jmp	deflateInit2
-	.endfn	deflateInit,globl
diff --git a/third_party/zlib/deflatesse.c b/third_party/zlib/deflatesse.c
deleted file mode 100644
index 816802157..000000000
--- a/third_party/zlib/deflatesse.c
+++ /dev/null
@@ -1,173 +0,0 @@
-/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:4;tab-width:8;coding:utf-8 -*-│
-│vi: set net ft=c ts=4 sts=4 sw=4 fenc=utf-8                                :vi│
-╞══════════════════════════════════════════════════════════════════════════════╡
-│ Copyright 2013 Intel Corporation                                             │
-│ Use of this source code is governed by the BSD-style licenses that can       │
-│ be found in the third_party/zlib/LICENSE file.                               │
-╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/intrin/emmintrin.internal.h"
-#include "libc/str/str.h"
-#include "third_party/zlib/deflate.internal.h"
-#include "third_party/zlib/internal.h"
-#include "third_party/zlib/zutil.internal.h"
-
-asm(".ident\t\"\\n\\n\
-zlib » sse2 fill window (zlib License)\\n\
-Copyright 2013 Intel Corporation\\n\
-Authors: Arjan van de Ven, Jim Kukunas\"");
-asm(".include \"libc/disclaimer.inc\"");
-
-/**
- * @fileoverview Fill Window with SSE2-optimized hash shifting
- */
-
-#define UPDATE_HASH(s, h, i)                                         \
-  {                                                                  \
-    if (s->level < 6) {                                              \
-      h = (3483 * (s->window[i]) + 23081 * (s->window[i + 1]) +      \
-           6954 * (s->window[i + 2]) + 20947 * (s->window[i + 3])) & \
-          s->hash_mask;                                              \
-    } else {                                                         \
-      h = (25881 * (s->window[i]) + 24674 * (s->window[i + 1]) +     \
-           25811 * (s->window[i + 2])) &                             \
-          s->hash_mask;                                              \
-    }                                                                \
-  }
-
-void fill_window_sse(struct DeflateState *s) {
-  const __m128i xmm_wsize = _mm_set1_epi16(s->w_size);
-
-  register unsigned n;
-  register Posf *p;
-  unsigned more; /* Amount of free space at the end of the window. */
-  uInt wsize = s->w_size;
-
-  Assert(s->lookahead < MIN_LOOKAHEAD, "already enough lookahead");
-
-  do {
-    more = (unsigned)(s->window_size - (uint64_t)s->lookahead -
-                      (uint64_t)s->strstart);
-
-    /* Deal with !@#$% 64K limit: */
-    if (sizeof(int) <= 2) {
-      if (more == 0 && s->strstart == 0 && s->lookahead == 0) {
-        more = wsize;
-
-      } else if (more == (unsigned)(-1)) {
-        /* Very unlikely, but possible on 16 bit machine if
-         * strstart == 0 && lookahead == 1 (input done a byte at time)
-         */
-        more--;
-      }
-    }
-
-    /* If the window is almost full and there is insufficient lookahead,
-     * move the upper half to the lower one to make room in the upper half.
-     */
-    if (s->strstart >= wsize + MAX_DIST(s)) {
-      memcpy(s->window, s->window + wsize, (unsigned)wsize);
-      s->match_start -= wsize;
-      s->strstart -= wsize; /* we now have strstart >= MAX_DIST */
-      s->block_start -= (long)wsize;
-
-      /* Slide the hash table (could be avoided with 32 bit values
-         at the expense of memory usage). We slide even when level == 0
-         to keep the hash table consistent if we switch back to level > 0
-         later. (Using level 0 permanently is not an optimal usage of
-         zlib, so we don't care about this pathological case.)
-       */
-      n = s->hash_size;
-      p = &s->head[n];
-      p -= 8;
-      do {
-        __m128i value, result;
-        value = _mm_loadu_si128((__m128i *)p);
-        result = _mm_subs_epu16(value, xmm_wsize);
-        _mm_storeu_si128((__m128i *)p, result);
-        p -= 8;
-        n -= 8;
-      } while (n > 0);
-
-      n = wsize;
-#ifndef FASTEST
-      p = &s->prev[n];
-      p -= 8;
-      do {
-        __m128i value, result;
-        value = _mm_loadu_si128((__m128i *)p);
-        result = _mm_subs_epu16(value, xmm_wsize);
-        _mm_storeu_si128((__m128i *)p, result);
-        p -= 8;
-        n -= 8;
-      } while (n > 0);
-#endif
-      more += wsize;
-    }
-    if (s->strm->avail_in == 0) break;
-
-    /* If there was no sliding:
-     *    strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 &&
-     *    more == window_size - lookahead - strstart
-     * => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1)
-     * => more >= window_size - 2*WSIZE + 2
-     * In the BIG_MEM or MMAP case (not yet supported),
-     *   window_size == input_size + MIN_LOOKAHEAD  &&
-     *   strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD.
-     * Otherwise, window_size == 2*WSIZE so more >= 2.
-     * If there was sliding, more >= WSIZE. So in all cases, more >= 2.
-     */
-    Assert(more >= 2, "more < 2");
-
-    n = deflate_read_buf(s->strm, s->window + s->strstart + s->lookahead, more);
-    s->lookahead += n;
-
-    /* Initialize the hash value now that we have some input: */
-    if (s->lookahead >= MIN_MATCH) {
-      uInt str = s->strstart;
-      s->ins_h = s->window[str];
-      if (str >= 1) UPDATE_HASH(s, s->ins_h, str + 1 - (MIN_MATCH - 1));
-#if MIN_MATCH != 3
-      Call UPDATE_HASH() MIN_MATCH - 3 more times
-#endif
-    }
-    /* If the whole input has less than MIN_MATCH bytes, ins_h is garbage,
-     * but this is not important since only literal bytes will be emitted.
-     */
-
-  } while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0);
-
-  /* If the WIN_INIT bytes after the end of the current data have never been
-   * written, then zero those bytes in order to avoid memory check reports of
-   * the use of uninitialized (or uninitialised as Julian writes) bytes by
-   * the longest match routines.  Update the high water mark for the next
-   * time through here.  WIN_INIT is set to MAX_MATCH since the longest match
-   * routines allow scanning to strstart + MAX_MATCH, ignoring lookahead.
-   */
-  if (s->high_water < s->window_size) {
-    uint64_t curr = s->strstart + (uint64_t)(s->lookahead);
-    uint64_t init;
-
-    if (s->high_water < curr) {
-      /* Previous high water mark below current data -- zero WIN_INIT
-       * bytes or up to end of window, whichever is less.
-       */
-      init = s->window_size - curr;
-      if (init > WIN_INIT) init = WIN_INIT;
-      bzero(s->window + curr, init);
-      s->high_water = curr + init;
-    } else if (s->high_water < (uint64_t)curr + WIN_INIT) {
-      /* High water mark at or above current data, but below current data
-       * plus WIN_INIT -- zero out to current data plus WIN_INIT, or up
-       * to end of window, whichever is less.
-       */
-      init = (uint64_t)curr + WIN_INIT - s->high_water;
-      if (init > s->window_size - s->high_water)
-        init = s->window_size - s->high_water;
-      bzero(s->window + s->high_water, init);
-      s->high_water += init;
-    }
-  }
-
-  Assert((uint64_t)s->strstart <= s->window_size - MIN_LOOKAHEAD,
-         "not enough room for search");
-}
diff --git a/third_party/zlib/infback.c b/third_party/zlib/infback.c
index 0cbc5d37f..8bf5105e3 100644
--- a/third_party/zlib/infback.c
+++ b/third_party/zlib/infback.c
@@ -1,610 +1,648 @@
-/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
-│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8                                :vi│
-╞══════════════════════════════════════════════════════════════════════════════╡
-│ Copyright 1995-2016 Jean-loup Gailly and Mark Adler                          │
-│ Use of this source code is governed by the BSD-style licenses that can       │
-│ be found in the third_party/zlib/LICENSE file.                               │
-╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/str/str.h"
+/* infback.c -- inflate using a call-back interface
+ * Copyright (C) 1995-2022 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
 #include "third_party/zlib/inffast.internal.h"
 #include "third_party/zlib/inflate.internal.h"
 #include "third_party/zlib/inftrees.internal.h"
 #include "third_party/zlib/internal.h"
+#include "third_party/zlib/macros.internal.h"
 #include "third_party/zlib/zutil.internal.h"
-
-asm(".ident\t\"\\n\\n\
-zlib (zlib License)\\n\
-Copyright 1995-2017 Jean-loup Gailly and Mark Adler\"");
-asm(".include \"libc/disclaimer.inc\"");
+// clang-format off
 
 /*
    This code is largely copied from inflate.c.  Normally either infback.o or
    inflate.o would be linked into an application--not both.  The interface
    with inffast.c is retained so that optimized assembler-coded versions of
    inflate_fast() can be used with either inflate.c or infback.c.
-*/
-
-/**
- * strm provides memory allocation functions in zalloc and zfree, or
- * Z_NULL to use the library memory allocation functions.
- *
- * windowBits is in the range 8..15, and window is a user-supplied
- * window and output buffer that is 2**windowBits bytes.
  */
-int inflateBackInit(z_streamp strm, int windowBits, unsigned char *window) {
-  struct InflateState *state;
-  if (strm == Z_NULL || window == Z_NULL || windowBits < 8 || windowBits > 15) {
-    return Z_STREAM_ERROR;
-  }
-  strm->msg = Z_NULL; /* in case we return an error */
-  if (strm->zalloc == (alloc_func)0) {
-    strm->zalloc = zcalloc;
-    strm->opaque = (voidpf)0;
-  }
-  if (strm->zfree == (free_func)0) {
+
+
+/* function prototypes */
+local void fixedtables OF((struct inflate_state FAR *state));
+
+/*
+   strm provides memory allocation functions in zalloc and zfree, or
+   Z_NULL to use the library memory allocation functions.
+
+   windowBits is in the range 8..15, and window is a user-supplied
+   window and output buffer that is 2**windowBits bytes.
+ */
+int ZEXPORT inflateBackInit_(strm, windowBits, window, version, stream_size)
+z_streamp strm;
+int windowBits;
+unsigned char FAR *window;
+const char *version;
+int stream_size;
+{
+    struct inflate_state FAR *state;
+
+    if (version == Z_NULL || version[0] != ZLIB_VERSION[0] ||
+        stream_size != (int)(sizeof(z_stream)))
+        return Z_VERSION_ERROR;
+    if (strm == Z_NULL || window == Z_NULL ||
+        windowBits < 8 || windowBits > 15)
+        return Z_STREAM_ERROR;
+    strm->msg = Z_NULL;                 /* in case we return an error */
+    if (strm->zalloc == (alloc_func)0) {
+#ifdef Z_SOLO
+        return Z_STREAM_ERROR;
+#else
+        strm->zalloc = zcalloc;
+        strm->opaque = (voidpf)0;
+#endif
+    }
+    if (strm->zfree == (free_func)0)
+#ifdef Z_SOLO
+        return Z_STREAM_ERROR;
+#else
     strm->zfree = zcfree;
-  }
-  state = (struct InflateState *)ZALLOC(strm, 1, sizeof(struct InflateState));
-  if (state == Z_NULL) return Z_MEM_ERROR;
-  Tracev(("inflate: allocated\n"));
-  strm->state = (struct DeflateState *)state;
-  state->dmax = 32768U;
-  state->wbits = (uInt)windowBits;
-  state->wsize = 1U << windowBits;
-  state->window = window;
-  state->wnext = 0;
-  state->whave = 0;
-  state->sane = 0;
-  return Z_OK;
+#endif
+    state = (struct inflate_state FAR *)ZALLOC(strm, 1,
+                                               sizeof(struct inflate_state));
+    if (state == Z_NULL) return Z_MEM_ERROR;
+    Tracev(("inflate: allocated\n"));
+    strm->state = (struct internal_state FAR *)state;
+    state->dmax = 32768U;
+    state->wbits = (uInt)windowBits;
+    state->wsize = 1U << windowBits;
+    state->window = window;
+    state->wnext = 0;
+    state->whave = 0;
+    state->sane = 1;
+    return Z_OK;
 }
 
-/**
- * Returns state with length and distance decoding tables and index
- * sizes set to fixed code decoding. Normally this returns fixed tables
- * from inffixed.h. If BUILDFIXED is defined, then instead this routine
- * builds the tables the first time it's called, and returns those
- * tables the first time and thereafter. This reduces the size of the
- * code by about 2K bytes, in exchange for a little execution time.
- * However, BUILDFIXED should not be used for threaded applications,
- * since the rewriting of the tables and virgin may not be thread-safe.
+/*
+   Return state with length and distance decoding tables and index sizes set to
+   fixed code decoding.  Normally this returns fixed tables from inffixed.h.
+   If BUILDFIXED is defined, then instead this routine builds the tables the
+   first time it's called, and returns those tables the first time and
+   thereafter.  This reduces the size of the code by about 2K bytes, in
+   exchange for a little execution time.  However, BUILDFIXED should not be
+   used for threaded applications, since the rewriting of the tables and virgin
+   may not be thread-safe.
  */
-static void fixedtables(struct InflateState *state) {
+local void fixedtables(state)
+struct inflate_state FAR *state;
+{
 #ifdef BUILDFIXED
-  static int virgin = 1;
-  static code *lenfix, *distfix;
-  static code fixed[544];
-  /* build fixed huffman tables if first call (may not be thread safe) */
-  if (virgin) {
-    unsigned sym, bits;
-    static code *next;
-    /* literal/length table */
-    sym = 0;
-    while (sym < 144) state->lens[sym++] = 8;
-    while (sym < 256) state->lens[sym++] = 9;
-    while (sym < 280) state->lens[sym++] = 7;
-    while (sym < 288) state->lens[sym++] = 8;
-    next = fixed;
-    lenfix = next;
-    bits = 9;
-    inflate_table(LENS, state->lens, 288, &(next), &(bits), state->work);
-    /* distance table */
-    sym = 0;
-    while (sym < 32) state->lens[sym++] = 5;
-    distfix = next;
-    bits = 5;
-    inflate_table(DISTS, state->lens, 32, &(next), &(bits), state->work);
-    /* do this just once */
-    virgin = 0;
-  }
-  state->lencode = lenfix;
-  state->distcode = distfix;
+    static int virgin = 1;
+    static code *lenfix, *distfix;
+    static code fixed[544];
+
+    /* build fixed huffman tables if first call (may not be thread safe) */
+    if (virgin) {
+        unsigned sym, bits;
+        static code *next;
+
+        /* literal/length table */
+        sym = 0;
+        while (sym < 144) state->lens[sym++] = 8;
+        while (sym < 256) state->lens[sym++] = 9;
+        while (sym < 280) state->lens[sym++] = 7;
+        while (sym < 288) state->lens[sym++] = 8;
+        next = fixed;
+        lenfix = next;
+        bits = 9;
+        inflate_table(LENS, state->lens, 288, &(next), &(bits), state->work);
+
+        /* distance table */
+        sym = 0;
+        while (sym < 32) state->lens[sym++] = 5;
+        distfix = next;
+        bits = 5;
+        inflate_table(DISTS, state->lens, 32, &(next), &(bits), state->work);
+
+        /* do this just once */
+        virgin = 0;
+    }
 #else /* !BUILDFIXED */
-  state->lencode = kZlibLenfix;
-  state->distcode = kZlibDistfix;
+#include "third_party/zlib/inffixed.inc"
 #endif /* BUILDFIXED */
-  state->lenbits = 9;
-  state->distbits = 5;
+    state->lencode = lenfix;
+    state->lenbits = 9;
+    state->distcode = distfix;
+    state->distbits = 5;
 }
 
 /* Macros for inflateBack(): */
 
 /* Load returned state from inflate_fast() */
-#define LOAD()              \
-  do {                      \
-    put = strm->next_out;   \
-    left = strm->avail_out; \
-    next = strm->next_in;   \
-    have = strm->avail_in;  \
-    hold = state->hold;     \
-    bits = state->bits;     \
-  } while (0)
+#define LOAD() \
+    do { \
+        put = strm->next_out; \
+        left = strm->avail_out; \
+        next = strm->next_in; \
+        have = strm->avail_in; \
+        hold = state->hold; \
+        bits = state->bits; \
+    } while (0)
 
 /* Set state from registers for inflate_fast() */
-#define RESTORE()           \
-  do {                      \
-    strm->next_out = put;   \
-    strm->avail_out = left; \
-    strm->next_in = next;   \
-    strm->avail_in = have;  \
-    state->hold = hold;     \
-    state->bits = bits;     \
-  } while (0)
+#define RESTORE() \
+    do { \
+        strm->next_out = put; \
+        strm->avail_out = left; \
+        strm->next_in = next; \
+        strm->avail_in = have; \
+        state->hold = hold; \
+        state->bits = bits; \
+    } while (0)
 
 /* Clear the input bit accumulator */
 #define INITBITS() \
-  do {             \
-    hold = 0;      \
-    bits = 0;      \
-  } while (0)
+    do { \
+        hold = 0; \
+        bits = 0; \
+    } while (0)
 
 /* Assure that some input is available.  If input is requested, but denied,
    then return a Z_BUF_ERROR from inflateBack(). */
-#define PULL()                   \
-  do {                           \
-    if (have == 0) {             \
-      have = in(in_desc, &next); \
-      if (have == 0) {           \
-        next = Z_NULL;           \
-        ret = Z_BUF_ERROR;       \
-        goto inf_leave;          \
-      }                          \
-    }                            \
-  } while (0)
+#define PULL() \
+    do { \
+        if (have == 0) { \
+            have = in(in_desc, &next); \
+            if (have == 0) { \
+                next = Z_NULL; \
+                ret = Z_BUF_ERROR; \
+                goto inf_leave; \
+            } \
+        } \
+    } while (0)
 
 /* Get a byte of input into the bit accumulator, or return from inflateBack()
    with an error if there is no input available. */
-#define PULLBYTE()                            \
-  do {                                        \
-    PULL();                                   \
-    have--;                                   \
-    hold += (unsigned long)(*next++) << bits; \
-    bits += 8;                                \
-  } while (0)
+#define PULLBYTE() \
+    do { \
+        PULL(); \
+        have--; \
+        hold += (unsigned long)(*next++) << bits; \
+        bits += 8; \
+    } while (0)
 
 /* Assure that there are at least n bits in the bit accumulator.  If there is
    not enough available input to do that, then return from inflateBack() with
    an error. */
-#define NEEDBITS(n)                          \
-  do {                                       \
-    while (bits < (unsigned)(n)) PULLBYTE(); \
-  } while (0)
+#define NEEDBITS(n) \
+    do { \
+        while (bits < (unsigned)(n)) \
+            PULLBYTE(); \
+    } while (0)
 
 /* Return the low n bits of the bit accumulator (n < 16) */
-#define BITS(n) ((unsigned)hold & ((1U << (n)) - 1))
+#define BITS(n) \
+    ((unsigned)hold & ((1U << (n)) - 1))
 
 /* Remove n bits from the bit accumulator */
-#define DROPBITS(n)        \
-  do {                     \
-    hold >>= (n);          \
-    bits -= (unsigned)(n); \
-  } while (0)
+#define DROPBITS(n) \
+    do { \
+        hold >>= (n); \
+        bits -= (unsigned)(n); \
+    } while (0)
 
 /* Remove zero to seven bits as needed to go to a byte boundary */
-#define BYTEBITS()     \
-  do {                 \
-    hold >>= bits & 7; \
-    bits -= bits & 7;  \
-  } while (0)
+#define BYTEBITS() \
+    do { \
+        hold >>= bits & 7; \
+        bits -= bits & 7; \
+    } while (0)
 
 /* Assure that some output space is available, by writing out the window
    if it's full.  If the write fails, return from inflateBack() with a
    Z_BUF_ERROR. */
-#define ROOM()                        \
-  do {                                \
-    if (left == 0) {                  \
-      put = state->window;            \
-      left = state->wsize;            \
-      state->whave = left;            \
-      if (out(out_desc, put, left)) { \
-        ret = Z_BUF_ERROR;            \
-        goto inf_leave;               \
-      }                               \
-    }                                 \
-  } while (0)
+#define ROOM() \
+    do { \
+        if (left == 0) { \
+            put = state->window; \
+            left = state->wsize; \
+            state->whave = left; \
+            if (out(out_desc, put, left)) { \
+                ret = Z_BUF_ERROR; \
+                goto inf_leave; \
+            } \
+        } \
+    } while (0)
 
-/**
- * strm provides the memory allocation functions and window buffer on
- * input, and provides information on the unused input on return. For
- * Z_DATA_ERROR returns, strm will also provide an error message.
- *
- * in() and out() are the call-back input and output functions. When
- * inflateBack() needs more input, it calls in(). When inflateBack() has
- * filled the window with output, or when it completes with data in the
- * window, it calls out() to write out the data. The application must
- * not change the provided input until in() is called again or
- * inflateBack() returns. The application must not change the
- * window/output buffer until inflateBack() returns.
- *
- * in() and out() are called with a descriptor parameter provided in the
- * inflateBack() call. This parameter can be a structure that provides
- * the information required to do the read or write, as well as
- * accumulated information on the input and output such as totals and
- * check values.
- *
- * in() should return zero on failure. out() should return non-zero on
- * failure. If either in() or out() fails, than inflateBack() returns a
- * Z_BUF_ERROR. strm->next_in can be checked for Z_NULL to see whether
- * it was in() or out() that caused in the error. Otherwise,
- * inflateBack() returns Z_STREAM_END on success, Z_DATA_ERROR for an
- * deflate format error, or Z_MEM_ERROR if it could not allocate memory
- * for the state. inflateBack() can also return Z_STREAM_ERROR if the
- * input parameters are not correct, i.e. strm is Z_NULL or the state
- * was not initialized.
+/*
+   strm provides the memory allocation functions and window buffer on input,
+   and provides information on the unused input on return.  For Z_DATA_ERROR
+   returns, strm will also provide an error message.
+
+   in() and out() are the call-back input and output functions.  When
+   inflateBack() needs more input, it calls in().  When inflateBack() has
+   filled the window with output, or when it completes with data in the
+   window, it calls out() to write out the data.  The application must not
+   change the provided input until in() is called again or inflateBack()
+   returns.  The application must not change the window/output buffer until
+   inflateBack() returns.
+
+   in() and out() are called with a descriptor parameter provided in the
+   inflateBack() call.  This parameter can be a structure that provides the
+   information required to do the read or write, as well as accumulated
+   information on the input and output such as totals and check values.
+
+   in() should return zero on failure.  out() should return non-zero on
+   failure.  If either in() or out() fails, than inflateBack() returns a
+   Z_BUF_ERROR.  strm->next_in can be checked for Z_NULL to see whether it
+   was in() or out() that caused in the error.  Otherwise,  inflateBack()
+   returns Z_STREAM_END on success, Z_DATA_ERROR for an deflate format
+   error, or Z_MEM_ERROR if it could not allocate memory for the state.
+   inflateBack() can also return Z_STREAM_ERROR if the input parameters
+   are not correct, i.e. strm is Z_NULL or the state was not initialized.
  */
-int inflateBack(z_streamp strm, in_func in, void *in_desc, out_func out,
-                void *out_desc) {
-  struct InflateState *state;
-  const unsigned char *next; /* next input */
-  unsigned char *put;        /* next output */
-  unsigned have, left;       /* available input and output */
-  unsigned long hold;        /* bit buffer */
-  unsigned bits;             /* bits in bit buffer */
-  unsigned copy;             /* number of stored or match bytes to copy */
-  unsigned char *from;       /* where to copy match bytes from */
-  struct zcode here;         /* current decoding table entry */
-  struct zcode last;         /* parent table entry */
-  unsigned len;              /* length to copy for repeats, bits to drop */
-  int ret;                   /* return code */
-  static const unsigned short order[19] = /* permutation of code lengths */
-      {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
+int ZEXPORT inflateBack(strm, in, in_desc, out, out_desc)
+z_streamp strm;
+in_func in;
+void FAR *in_desc;
+out_func out;
+void FAR *out_desc;
+{
+    struct inflate_state FAR *state;
+    z_const unsigned char FAR *next;    /* next input */
+    unsigned char FAR *put;     /* next output */
+    unsigned have, left;        /* available input and output */
+    unsigned long hold;         /* bit buffer */
+    unsigned bits;              /* bits in bit buffer */
+    unsigned copy;              /* number of stored or match bytes to copy */
+    unsigned char FAR *from;    /* where to copy match bytes from */
+    code here;                  /* current decoding table entry */
+    code last;                  /* parent table entry */
+    unsigned len;               /* length to copy for repeats, bits to drop */
+    int ret;                    /* return code */
+    static const unsigned short order[19] = /* permutation of code lengths */
+        {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
 
-  /* Check that the strm exists and that the state was initialized */
-  if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
-  state = (struct InflateState *)strm->state;
+    /* Check that the strm exists and that the state was initialized */
+    if (strm == Z_NULL || strm->state == Z_NULL)
+        return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
 
-  /* Reset the state */
-  strm->msg = Z_NULL;
-  state->mode = TYPE;
-  state->last = 0;
-  state->whave = 0;
-  next = strm->next_in;
-  have = next != Z_NULL ? strm->avail_in : 0;
-  hold = 0;
-  bits = 0;
-  put = state->window;
-  left = state->wsize;
+    /* Reset the state */
+    strm->msg = Z_NULL;
+    state->mode = TYPE;
+    state->last = 0;
+    state->whave = 0;
+    next = strm->next_in;
+    have = next != Z_NULL ? strm->avail_in : 0;
+    hold = 0;
+    bits = 0;
+    put = state->window;
+    left = state->wsize;
 
-  /* Inflate until end of block marked as last */
-  for (;;) switch (state->mode) {
-      case TYPE:
-        /* determine and dispatch block type */
-        if (state->last) {
-          BYTEBITS();
-          state->mode = DONE;
-          break;
-        }
-        NEEDBITS(3);
-        state->last = BITS(1);
-        DROPBITS(1);
-        switch (BITS(2)) {
-          case 0: /* stored block */
-            Tracev(("inflate:     stored block%s\n",
-                    state->last ? " (last)" : ""));
-            state->mode = STORED;
+    /* Inflate until end of block marked as last */
+    for (;;)
+        switch (state->mode) {
+        case TYPE:
+            /* determine and dispatch block type */
+            if (state->last) {
+                BYTEBITS();
+                state->mode = DONE;
+                break;
+            }
+            NEEDBITS(3);
+            state->last = BITS(1);
+            DROPBITS(1);
+            switch (BITS(2)) {
+            case 0:                             /* stored block */
+                Tracev(("inflate:     stored block%s\n",
+                        state->last ? " (last)" : ""));
+                state->mode = STORED;
+                break;
+            case 1:                             /* fixed block */
+                fixedtables(state);
+                Tracev(("inflate:     fixed codes block%s\n",
+                        state->last ? " (last)" : ""));
+                state->mode = LEN;              /* decode codes */
+                break;
+            case 2:                             /* dynamic block */
+                Tracev(("inflate:     dynamic codes block%s\n",
+                        state->last ? " (last)" : ""));
+                state->mode = TABLE;
+                break;
+            case 3:
+                strm->msg = (char *)"invalid block type";
+                state->mode = BAD;
+            }
+            DROPBITS(2);
             break;
-          case 1: /* fixed block */
-            fixedtables(state);
-            Tracev(("inflate:     fixed codes block%s\n",
-                    state->last ? " (last)" : ""));
-            state->mode = LEN; /* decode codes */
-            break;
-          case 2: /* dynamic block */
-            Tracev(("inflate:     dynamic codes block%s\n",
-                    state->last ? " (last)" : ""));
-            state->mode = TABLE;
-            break;
-          case 3:
-            strm->msg = (char *)"invalid block type";
-            state->mode = BAD;
-        }
-        DROPBITS(2);
-        break;
 
-      case STORED:
-        /* get and verify stored block length */
-        BYTEBITS(); /* go to byte boundary */
-        NEEDBITS(32);
-        if ((hold & 0xffff) != ((hold >> 16) ^ 0xffff)) {
-          strm->msg = (char *)"invalid stored block lengths";
-          state->mode = BAD;
-          break;
-        }
-        state->length = (unsigned)hold & 0xffff;
-        Tracev(("inflate:       stored length %u\n", state->length));
-        INITBITS();
-
-        /* copy stored block from input to output */
-        while (state->length != 0) {
-          copy = state->length;
-          PULL();
-          ROOM();
-          if (copy > have) copy = have;
-          if (copy > left) copy = left;
-          memcpy(put, next, copy);
-          have -= copy;
-          next += copy;
-          left -= copy;
-          put += copy;
-          state->length -= copy;
-        }
-        Tracev(("inflate:       stored end\n"));
-        state->mode = TYPE;
-        break;
-
-      case TABLE:
-        /* get dynamic table entries descriptor */
-        NEEDBITS(14);
-        state->nlen = BITS(5) + 257;
-        DROPBITS(5);
-        state->ndist = BITS(5) + 1;
-        DROPBITS(5);
-        state->ncode = BITS(4) + 4;
-        DROPBITS(4);
-#ifndef PKZIP_BUG_WORKAROUND
-        if (state->nlen > 286 || state->ndist > 30) {
-          strm->msg = (char *)"too many length or distance symbols";
-          state->mode = BAD;
-          break;
-        }
-#endif
-        Tracev(("inflate:       table sizes ok\n"));
-
-        /* get code length code lengths (not a typo) */
-        state->have = 0;
-        while (state->have < state->ncode) {
-          NEEDBITS(3);
-          state->lens[order[state->have++]] = (unsigned short)BITS(3);
-          DROPBITS(3);
-        }
-        while (state->have < 19) state->lens[order[state->have++]] = 0;
-        state->next = state->codes;
-        state->lencode = (const struct zcode *)(state->next);
-        state->lenbits = 7;
-        ret = inflate_table(CODES, state->lens, 19, &(state->next),
-                            &(state->lenbits), state->work);
-        if (ret) {
-          strm->msg = (char *)"invalid code lengths set";
-          state->mode = BAD;
-          break;
-        }
-        Tracev(("inflate:       code lengths ok\n"));
-
-        /* get length and distance code code lengths */
-        state->have = 0;
-        while (state->have < state->nlen + state->ndist) {
-          for (;;) {
-            here = state->lencode[BITS(state->lenbits)];
-            if ((unsigned)(here.bits) <= bits) break;
-            PULLBYTE();
-          }
-          if (here.val < 16) {
-            DROPBITS(here.bits);
-            state->lens[state->have++] = here.val;
-          } else {
-            if (here.val == 16) {
-              NEEDBITS(here.bits + 2);
-              DROPBITS(here.bits);
-              if (state->have == 0) {
-                strm->msg = (char *)"invalid bit length repeat";
+        case STORED:
+            /* get and verify stored block length */
+            BYTEBITS();                         /* go to byte boundary */
+            NEEDBITS(32);
+            if ((hold & 0xffff) != ((hold >> 16) ^ 0xffff)) {
+                strm->msg = (char *)"invalid stored block lengths";
                 state->mode = BAD;
                 break;
-              }
-              len = (unsigned)(state->lens[state->have - 1]);
-              copy = 3 + BITS(2);
-              DROPBITS(2);
-            } else if (here.val == 17) {
-              NEEDBITS(here.bits + 3);
-              DROPBITS(here.bits);
-              len = 0;
-              copy = 3 + BITS(3);
-              DROPBITS(3);
-            } else {
-              NEEDBITS(here.bits + 7);
-              DROPBITS(here.bits);
-              len = 0;
-              copy = 11 + BITS(7);
-              DROPBITS(7);
             }
-            if (state->have + copy > state->nlen + state->ndist) {
-              strm->msg = (char *)"invalid bit length repeat";
-              state->mode = BAD;
-              break;
+            state->length = (unsigned)hold & 0xffff;
+            Tracev(("inflate:       stored length %u\n",
+                    state->length));
+            INITBITS();
+
+            /* copy stored block from input to output */
+            while (state->length != 0) {
+                copy = state->length;
+                PULL();
+                ROOM();
+                if (copy > have) copy = have;
+                if (copy > left) copy = left;
+                zmemcpy(put, next, copy);
+                have -= copy;
+                next += copy;
+                left -= copy;
+                put += copy;
+                state->length -= copy;
             }
-            while (copy--) state->lens[state->have++] = (unsigned short)len;
-          }
-        }
+            Tracev(("inflate:       stored end\n"));
+            state->mode = TYPE;
+            break;
 
-        /* handle error breaks in while */
-        if (state->mode == BAD) break;
+        case TABLE:
+            /* get dynamic table entries descriptor */
+            NEEDBITS(14);
+            state->nlen = BITS(5) + 257;
+            DROPBITS(5);
+            state->ndist = BITS(5) + 1;
+            DROPBITS(5);
+            state->ncode = BITS(4) + 4;
+            DROPBITS(4);
+#ifndef PKZIP_BUG_WORKAROUND
+            if (state->nlen > 286 || state->ndist > 30) {
+                strm->msg = (char *)"too many length or distance symbols";
+                state->mode = BAD;
+                break;
+            }
+#endif
+            Tracev(("inflate:       table sizes ok\n"));
 
-        /* check for end-of-block code (better have one) */
-        if (state->lens[256] == 0) {
-          strm->msg = (char *)"invalid code -- missing end-of-block";
-          state->mode = BAD;
-          break;
-        }
+            /* get code length code lengths (not a typo) */
+            state->have = 0;
+            while (state->have < state->ncode) {
+                NEEDBITS(3);
+                state->lens[order[state->have++]] = (unsigned short)BITS(3);
+                DROPBITS(3);
+            }
+            while (state->have < 19)
+                state->lens[order[state->have++]] = 0;
+            state->next = state->codes;
+            state->lencode = (code const FAR *)(state->next);
+            state->lenbits = 7;
+            ret = inflate_table(CODES, state->lens, 19, &(state->next),
+                                &(state->lenbits), state->work);
+            if (ret) {
+                strm->msg = (char *)"invalid code lengths set";
+                state->mode = BAD;
+                break;
+            }
+            Tracev(("inflate:       code lengths ok\n"));
 
-        /* build code tables -- note: do not change the lenbits or distbits
-           values here (9 and 6) without reading the comments in inftrees.h
-           concerning the ENOUGH constants, which depend on those values */
-        state->next = state->codes;
-        state->lencode = (const struct zcode *)(state->next);
-        state->lenbits = 9;
-        ret = inflate_table(LENS, state->lens, state->nlen, &(state->next),
-                            &(state->lenbits), state->work);
-        if (ret) {
-          strm->msg = (char *)"invalid literal/lengths set";
-          state->mode = BAD;
-          break;
-        }
-        state->distcode = (const struct zcode *)(state->next);
-        state->distbits = 6;
-        ret = inflate_table(DISTS, state->lens + state->nlen, state->ndist,
+            /* get length and distance code code lengths */
+            state->have = 0;
+            while (state->have < state->nlen + state->ndist) {
+                for (;;) {
+                    here = state->lencode[BITS(state->lenbits)];
+                    if ((unsigned)(here.bits) <= bits) break;
+                    PULLBYTE();
+                }
+                if (here.val < 16) {
+                    DROPBITS(here.bits);
+                    state->lens[state->have++] = here.val;
+                }
+                else {
+                    if (here.val == 16) {
+                        NEEDBITS(here.bits + 2);
+                        DROPBITS(here.bits);
+                        if (state->have == 0) {
+                            strm->msg = (char *)"invalid bit length repeat";
+                            state->mode = BAD;
+                            break;
+                        }
+                        len = (unsigned)(state->lens[state->have - 1]);
+                        copy = 3 + BITS(2);
+                        DROPBITS(2);
+                    }
+                    else if (here.val == 17) {
+                        NEEDBITS(here.bits + 3);
+                        DROPBITS(here.bits);
+                        len = 0;
+                        copy = 3 + BITS(3);
+                        DROPBITS(3);
+                    }
+                    else {
+                        NEEDBITS(here.bits + 7);
+                        DROPBITS(here.bits);
+                        len = 0;
+                        copy = 11 + BITS(7);
+                        DROPBITS(7);
+                    }
+                    if (state->have + copy > state->nlen + state->ndist) {
+                        strm->msg = (char *)"invalid bit length repeat";
+                        state->mode = BAD;
+                        break;
+                    }
+                    while (copy--)
+                        state->lens[state->have++] = (unsigned short)len;
+                }
+            }
+
+            /* handle error breaks in while */
+            if (state->mode == BAD) break;
+
+            /* check for end-of-block code (better have one) */
+            if (state->lens[256] == 0) {
+                strm->msg = (char *)"invalid code -- missing end-of-block";
+                state->mode = BAD;
+                break;
+            }
+
+            /* build code tables -- note: do not change the lenbits or distbits
+               values here (10 and 9) without reading the comments in inftrees.h
+               concerning the ENOUGH constants, which depend on those values */
+            state->next = state->codes;
+            state->lencode = (code const FAR *)(state->next);
+            state->lenbits = 10;
+            ret = inflate_table(LENS, state->lens, state->nlen, &(state->next),
+                                &(state->lenbits), state->work);
+            if (ret) {
+                strm->msg = (char *)"invalid literal/lengths set";
+                state->mode = BAD;
+                break;
+            }
+            state->distcode = (code const FAR *)(state->next);
+            state->distbits = 9;
+            ret = inflate_table(DISTS, state->lens + state->nlen, state->ndist,
                             &(state->next), &(state->distbits), state->work);
-        if (ret) {
-          strm->msg = (char *)"invalid distances set";
-          state->mode = BAD;
-          break;
-        }
-        Tracev(("inflate:       codes ok\n"));
-        state->mode = LEN;
+            if (ret) {
+                strm->msg = (char *)"invalid distances set";
+                state->mode = BAD;
+                break;
+            }
+            Tracev(("inflate:       codes ok\n"));
+            state->mode = LEN;
+                /* fallthrough */
 
-      case LEN:
-        /* use inflate_fast() if we have enough input and output */
-        if (have >= INFLATE_FAST_MIN_INPUT && left >= INFLATE_FAST_MIN_OUTPUT) {
-          RESTORE();
-          if (state->whave < state->wsize) state->whave = state->wsize - left;
-          inflate_fast(strm, state->wsize);
-          LOAD();
-          break;
+        case LEN:
+            /* use inflate_fast() if we have enough input and output */
+            if (have >= INFLATE_FAST_MIN_INPUT &&
+                left >= INFLATE_FAST_MIN_OUTPUT) {
+                RESTORE();
+                if (state->whave < state->wsize)
+                    state->whave = state->wsize - left;
+                inflate_fast(strm, state->wsize);
+                LOAD();
+                break;
+            }
+
+            /* get a literal, length, or end-of-block code */
+            for (;;) {
+                here = state->lencode[BITS(state->lenbits)];
+                if ((unsigned)(here.bits) <= bits) break;
+                PULLBYTE();
+            }
+            if (here.op && (here.op & 0xf0) == 0) {
+                last = here;
+                for (;;) {
+                    here = state->lencode[last.val +
+                            (BITS(last.bits + last.op) >> last.bits)];
+                    if ((unsigned)(last.bits + here.bits) <= bits) break;
+                    PULLBYTE();
+                }
+                DROPBITS(last.bits);
+            }
+            DROPBITS(here.bits);
+            state->length = (unsigned)here.val;
+
+            /* process literal */
+            if (here.op == 0) {
+                Tracevv((here.val >= 0x20 && here.val < 0x7f ?
+                        "inflate:         literal '%c'\n" :
+                        "inflate:         literal 0x%02x\n", here.val));
+                ROOM();
+                *put++ = (unsigned char)(state->length);
+                left--;
+                state->mode = LEN;
+                break;
+            }
+
+            /* process end of block */
+            if (here.op & 32) {
+                Tracevv(("inflate:         end of block\n"));
+                state->mode = TYPE;
+                break;
+            }
+
+            /* invalid code */
+            if (here.op & 64) {
+                strm->msg = (char *)"invalid literal/length code";
+                state->mode = BAD;
+                break;
+            }
+
+            /* length code -- get extra bits, if any */
+            state->extra = (unsigned)(here.op) & 15;
+            if (state->extra != 0) {
+                NEEDBITS(state->extra);
+                state->length += BITS(state->extra);
+                DROPBITS(state->extra);
+            }
+            Tracevv(("inflate:         length %u\n", state->length));
+
+            /* get distance code */
+            for (;;) {
+                here = state->distcode[BITS(state->distbits)];
+                if ((unsigned)(here.bits) <= bits) break;
+                PULLBYTE();
+            }
+            if ((here.op & 0xf0) == 0) {
+                last = here;
+                for (;;) {
+                    here = state->distcode[last.val +
+                            (BITS(last.bits + last.op) >> last.bits)];
+                    if ((unsigned)(last.bits + here.bits) <= bits) break;
+                    PULLBYTE();
+                }
+                DROPBITS(last.bits);
+            }
+            DROPBITS(here.bits);
+            if (here.op & 64) {
+                strm->msg = (char *)"invalid distance code";
+                state->mode = BAD;
+                break;
+            }
+            state->offset = (unsigned)here.val;
+
+            /* get distance extra bits, if any */
+            state->extra = (unsigned)(here.op) & 15;
+            if (state->extra != 0) {
+                NEEDBITS(state->extra);
+                state->offset += BITS(state->extra);
+                DROPBITS(state->extra);
+            }
+            if (state->offset > state->wsize - (state->whave < state->wsize ?
+                                                left : 0)) {
+                strm->msg = (char *)"invalid distance too far back";
+                state->mode = BAD;
+                break;
+            }
+            Tracevv(("inflate:         distance %u\n", state->offset));
+
+            /* copy match from window to output */
+            do {
+                ROOM();
+                copy = state->wsize - state->offset;
+                if (copy < left) {
+                    from = put + copy;
+                    copy = left - copy;
+                }
+                else {
+                    from = put - state->offset;
+                    copy = left;
+                }
+                if (copy > state->length) copy = state->length;
+                state->length -= copy;
+                left -= copy;
+                do {
+                    *put++ = *from++;
+                } while (--copy);
+            } while (state->length != 0);
+            break;
+
+        case DONE:
+            /* inflate stream terminated properly */
+            ret = Z_STREAM_END;
+            goto inf_leave;
+
+        case BAD:
+            ret = Z_DATA_ERROR;
+            goto inf_leave;
+
+        default:
+            /* can't happen, but makes compilers happy */
+            ret = Z_STREAM_ERROR;
+            goto inf_leave;
         }
 
-        /* get a literal, length, or end-of-block code */
-        for (;;) {
-          here = state->lencode[BITS(state->lenbits)];
-          if ((unsigned)(here.bits) <= bits) break;
-          PULLBYTE();
-        }
-        if (here.op && (here.op & 0xf0) == 0) {
-          last = here;
-          for (;;) {
-            here = state->lencode[last.val +
-                                  (BITS(last.bits + last.op) >> last.bits)];
-            if ((unsigned)(last.bits + here.bits) <= bits) break;
-            PULLBYTE();
-          }
-          DROPBITS(last.bits);
-        }
-        DROPBITS(here.bits);
-        state->length = (unsigned)here.val;
-
-        /* process literal */
-        if (here.op == 0) {
-          Tracevv((here.val >= 0x20 && here.val < 0x7f
-                       ? "inflate:         literal '%c'\n"
-                       : "inflate:         literal 0x%02x\n",
-                   here.val));
-          ROOM();
-          *put++ = (unsigned char)(state->length);
-          left--;
-          state->mode = LEN;
-          break;
-        }
-
-        /* process end of block */
-        if (here.op & 32) {
-          Tracevv(("inflate:         end of block\n"));
-          state->mode = TYPE;
-          break;
-        }
-
-        /* invalid code */
-        if (here.op & 64) {
-          strm->msg = (char *)"invalid literal/length code";
-          state->mode = BAD;
-          break;
-        }
-
-        /* length code -- get extra bits, if any */
-        state->extra = (unsigned)(here.op) & 15;
-        if (state->extra != 0) {
-          NEEDBITS(state->extra);
-          state->length += BITS(state->extra);
-          DROPBITS(state->extra);
-        }
-        Tracevv(("inflate:         length %u\n", state->length));
-
-        /* get distance code */
-        for (;;) {
-          here = state->distcode[BITS(state->distbits)];
-          if ((unsigned)(here.bits) <= bits) break;
-          PULLBYTE();
-        }
-        if ((here.op & 0xf0) == 0) {
-          last = here;
-          for (;;) {
-            here = state->distcode[last.val +
-                                   (BITS(last.bits + last.op) >> last.bits)];
-            if ((unsigned)(last.bits + here.bits) <= bits) break;
-            PULLBYTE();
-          }
-          DROPBITS(last.bits);
-        }
-        DROPBITS(here.bits);
-        if (here.op & 64) {
-          strm->msg = (char *)"invalid distance code";
-          state->mode = BAD;
-          break;
-        }
-        state->offset = (unsigned)here.val;
-
-        /* get distance extra bits, if any */
-        state->extra = (unsigned)(here.op) & 15;
-        if (state->extra != 0) {
-          NEEDBITS(state->extra);
-          state->offset += BITS(state->extra);
-          DROPBITS(state->extra);
-        }
-        if (state->offset >
-            state->wsize - (state->whave < state->wsize ? left : 0)) {
-          strm->msg = (char *)"invalid distance too far back";
-          state->mode = BAD;
-          break;
-        }
-        Tracevv(("inflate:         distance %u\n", state->offset));
-
-        /* copy match from window to output */
-        do {
-          ROOM();
-          copy = state->wsize - state->offset;
-          if (copy < left) {
-            from = put + copy;
-            copy = left - copy;
-          } else {
-            from = put - state->offset;
-            copy = left;
-          }
-          if (copy > state->length) copy = state->length;
-          state->length -= copy;
-          left -= copy;
-          do {
-            *put++ = *from++;
-          } while (--copy);
-        } while (state->length != 0);
-        break;
-
-      case DONE:
-        /* inflate stream terminated properly */
-        ret = Z_STREAM_END;
-        goto inf_leave;
-
-      case BAD:
-        ret = Z_DATA_ERROR;
-        goto inf_leave;
-
-      default:
-        /* can't happen, but makes compilers happy */
-        ret = Z_STREAM_ERROR;
-        goto inf_leave;
+    /* Write leftover output and return unused input */
+  inf_leave:
+    if (left < state->wsize) {
+        if (out(out_desc, state->window, state->wsize - left) &&
+            ret == Z_STREAM_END)
+            ret = Z_BUF_ERROR;
     }
-
-  /* Write leftover output and return unused input */
-inf_leave:
-  if (left < state->wsize) {
-    if (out(out_desc, state->window, state->wsize - left) &&
-        ret == Z_STREAM_END)
-      ret = Z_BUF_ERROR;
-  }
-  strm->next_in = next;
-  strm->avail_in = have;
-  return ret;
+    strm->next_in = next;
+    strm->avail_in = have;
+    return ret;
 }
 
-int inflateBackEnd(z_streamp strm) {
-  if (strm == Z_NULL || strm->state == Z_NULL || strm->zfree == (free_func)0)
-    return Z_STREAM_ERROR;
-  ZFREE(strm, strm->state);
-  strm->state = Z_NULL;
-  Tracev(("inflate: end\n"));
-  return Z_OK;
+int ZEXPORT inflateBackEnd(strm)
+z_streamp strm;
+{
+    if (strm == Z_NULL || strm->state == Z_NULL || strm->zfree == (free_func)0)
+        return Z_STREAM_ERROR;
+    ZFREE(strm, strm->state);
+    strm->state = Z_NULL;
+    Tracev(("inflate: end\n"));
+    return Z_OK;
 }
diff --git a/third_party/zlib/inffast.c b/third_party/zlib/inffast.c
index 896f64f19..95b130a79 100644
--- a/third_party/zlib/inffast.c
+++ b/third_party/zlib/inffast.c
@@ -1,307 +1,334 @@
-/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:4;tab-width:8;coding:utf-8 -*-│
-│vi: set net ft=c ts=4 sts=4 sw=4 fenc=utf-8                                :vi│
-╞══════════════════════════════════════════════════════════════════════════════╡
-│ Copyright 1995-2017 Mark Adler                                               │
-│ Use of this source code is governed by the BSD-style licenses that can       │
-│ be found in the third_party/zlib/LICENSE file.                               │
-╚─────────────────────────────────────────────────────────────────────────────*/
-#include "third_party/zlib/inffast.internal.h"
-#include "third_party/zlib/inflate.internal.h"
-#include "third_party/zlib/inftrees.internal.h"
-#include "third_party/zlib/zutil.internal.h"
-
-asm(".ident\t\"\\n\\n\
-zlib (zlib License)\\n\
-Copyright 1995-2017 Jean-loup Gailly and Mark Adler\"");
-asm(".include \"libc/disclaimer.inc\"");
-
-/**
- * Decodes literal, length, and distance codes and write out the
- * resulting literal and match bytes until either not enough input or
- * output is available, an end-of-block is encountered, or a data error
- * is encountered. When large enough input and output buffers are
- * supplied to inflate(), for example, a 16K input buffer and a 64K
- * output buffer, more than 95% of the inflate() execution time is spent
- * in this routine.
- *
- * Entry assumptions:
- *
- *      state->mode == LEN
- *      strm->avail_in >= INFLATE_FAST_MIN_INPUT (6 bytes)
- *      strm->avail_out >= INFLATE_FAST_MIN_OUTPUT (258 bytes)
- *      start >= strm->avail_out
- *      state->bits < 8
- *
- * On return, state->mode is one of:
- *
- *      LEN -- ran out of enough output space or enough available input
- *      TYPE -- reached end of block code, inflate() to interpret next block
- *      BAD -- error in block data
- *
- * Some notes:
- *
- *  INFLATE_FAST_MIN_INPUT: 6 bytes
- *
- *  - The maximum input bits used by a length/distance pair is 15 bits
- *    for the length code, 5 bits for the length extra, 15 bits for the
- *    distance code, and 13 bits for the distance extra. This totals 48
- *    bits, or six bytes. Therefore if strm->avail_in >= 6, then there
- *    is enough input to avoid checking for available input while
- *    decoding.
- *
- *  INFLATE_FAST_MIN_OUTPUT: 258 bytes
- *
- *  - The maximum bytes that a single length/distance pair can output is
- *    258 bytes, which is the maximum length that can be coded.
- *    inflate_fast() requires strm->avail_out >= 258 for each loop to
- *    avoid checking for available output space while decoding.
- *
- * @param start inflate() starting value for strm->avail_out
+/* inffast.c -- fast decoding
+ * Copyright (C) 1995-2017 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
  */
-void inflate_fast(z_streamp strm, unsigned start) {
-  struct InflateState *state;
-  const unsigned char *in;   /* local strm->next_in */
-  const unsigned char *last; /* have enough input while in < last */
-  unsigned char *out;        /* local strm->next_out */
-  unsigned char *beg;        /* inflate()'s initial strm->next_out */
-  unsigned char *end;        /* while out < end, enough space available */
-#ifdef INFLATE_STRICT
-  unsigned dmax; /* maximum distance from zlib header */
-#endif
-  unsigned wsize;            /* window size or zero if not using window */
-  unsigned whave;            /* valid bytes in the window */
-  unsigned wnext;            /* window write index */
-  unsigned char *window;     /* allocated sliding window, if wsize != 0 */
-  unsigned long hold;        /* local strm->hold */
-  unsigned bits;             /* local strm->bits */
-  const struct zcode *lcode; /* local strm->lencode */
-  const struct zcode *dcode; /* local strm->distcode */
-  unsigned lmask;            /* mask for first level of length codes */
-  unsigned dmask;            /* mask for first level of distance codes */
-  struct zcode here;         /* retrieved table entry */
-  unsigned op;               /* code bits, operation, extra bits, or */
-  /*  window position, window bytes to copy */
-  unsigned len;        /* match length, unused bytes */
-  unsigned dist;       /* match distance */
-  unsigned char *from; /* where to copy match from */
+// clang-format off
 
-  /* copy state to local variables */
-  state = (struct InflateState *)strm->state;
-  in = strm->next_in;
-  last = in + (strm->avail_in - (INFLATE_FAST_MIN_INPUT - 1));
-  out = strm->next_out;
-  beg = out - (start - strm->avail_out);
-  end = out + (strm->avail_out - (INFLATE_FAST_MIN_OUTPUT - 1));
-#ifdef INFLATE_STRICT
-  dmax = state->dmax;
-#endif
-  wsize = state->wsize;
-  whave = state->whave;
-  wnext = state->wnext;
-  window = state->window;
-  hold = state->hold;
-  bits = state->bits;
-  lcode = state->lencode;
-  dcode = state->distcode;
-  lmask = (1U << state->lenbits) - 1;
-  dmask = (1U << state->distbits) - 1;
+#include "third_party/zlib/zutil.internal.h"
+#include "third_party/zlib/inftrees.internal.h"
+#include "third_party/zlib/inflate.internal.h"
+#include "third_party/zlib/internal.h"
+#include "third_party/zlib/macros.internal.h"
+#include "libc/stdio/stdio.h"
+#include "third_party/zlib/inffast.internal.h"
 
-  /* decode literals and length/distances until end-of-block or not enough
-     input data or output space */
-  do {
-    if (bits < 15) {
-      hold += (unsigned long)(*in++) << bits;
-      bits += 8;
-      hold += (unsigned long)(*in++) << bits;
-      bits += 8;
-    }
-    here = lcode[hold & lmask];
-  dolen:
-    op = (unsigned)(here.bits);
-    hold >>= op;
-    bits -= op;
-    op = (unsigned)(here.op);
-    if (op == 0) { /* literal */
-      Tracevv((here.val >= 0x20 && here.val < 0x7f
-                   ? "inflate:         literal '%c'\n"
-                   : "inflate:         literal 0x%02x\n",
-               here.val));
-      *out++ = (unsigned char)(here.val);
-    } else if (op & 16) { /* length base */
-      len = (unsigned)(here.val);
-      op &= 15; /* number of extra bits */
-      if (op) {
-        if (bits < op) {
-          hold += (unsigned long)(*in++) << bits;
-          bits += 8;
-        }
-        len += (unsigned)hold & ((1U << op) - 1);
-        hold >>= op;
-        bits -= op;
-      }
-      Tracevv(("inflate:         length %u\n", len));
-      if (bits < 15) {
-        hold += (unsigned long)(*in++) << bits;
-        bits += 8;
-        hold += (unsigned long)(*in++) << bits;
-        bits += 8;
-      }
-      here = dcode[hold & dmask];
-    dodist:
-      op = (unsigned)(here.bits);
-      hold >>= op;
-      bits -= op;
-      op = (unsigned)(here.op);
-      if (op & 16) { /* distance base */
-        dist = (unsigned)(here.val);
-        op &= 15; /* number of extra bits */
-        if (bits < op) {
-          hold += (unsigned long)(*in++) << bits;
-          bits += 8;
-          if (bits < op) {
+#ifdef ASMINF
+#  pragma message("Assembler code may have bugs -- use at your own risk")
+#else
+
+/*
+   Decode literal, length, and distance codes and write out the resulting
+   literal and match bytes until either not enough input or output is
+   available, an end-of-block is encountered, or a data error is encountered.
+   When large enough input and output buffers are supplied to inflate(), for
+   example, a 16K input buffer and a 64K output buffer, more than 95% of the
+   inflate() execution time is spent in this routine.
+
+   Entry assumptions:
+
+        state->mode == LEN
+        strm->avail_in >= INFLATE_FAST_MIN_INPUT (6 bytes)
+        strm->avail_out >= INFLATE_FAST_MIN_OUTPUT (258 bytes)
+        start >= strm->avail_out
+        state->bits < 8
+
+   On return, state->mode is one of:
+
+        LEN -- ran out of enough output space or enough available input
+        TYPE -- reached end of block code, inflate() to interpret next block
+        BAD -- error in block data
+
+   Notes:
+
+    INFLATE_FAST_MIN_INPUT: 6 bytes
+
+    - The maximum input bits used by a length/distance pair is 15 bits for the
+      length code, 5 bits for the length extra, 15 bits for the distance code,
+      and 13 bits for the distance extra.  This totals 48 bits, or six bytes.
+      Therefore if strm->avail_in >= 6, then there is enough input to avoid
+      checking for available input while decoding.
+
+    INFLATE_FAST_MIN_OUTPUT: 258 bytes
+
+    - The maximum bytes that a single length/distance pair can output is 258
+      bytes, which is the maximum length that can be coded.  inflate_fast()
+      requires strm->avail_out >= 258 for each loop to avoid checking for
+      available output space while decoding.
+ */
+void ZLIB_INTERNAL inflate_fast(strm, start)
+z_streamp strm;
+unsigned start;         /* inflate()'s starting value for strm->avail_out */
+{
+    struct inflate_state FAR *state;
+    z_const unsigned char FAR *in;      /* local strm->next_in */
+    z_const unsigned char FAR *last;    /* have enough input while in < last */
+    unsigned char FAR *out;     /* local strm->next_out */
+    unsigned char FAR *beg;     /* inflate()'s initial strm->next_out */
+    unsigned char FAR *end;     /* while out < end, enough space available */
+#ifdef INFLATE_STRICT
+    unsigned dmax;              /* maximum distance from zlib header */
+#endif
+    unsigned wsize;             /* window size or zero if not using window */
+    unsigned whave;             /* valid bytes in the window */
+    unsigned wnext;             /* window write index */
+    unsigned char FAR *window;  /* allocated sliding window, if wsize != 0 */
+    unsigned long hold;         /* local strm->hold */
+    unsigned bits;              /* local strm->bits */
+    code const FAR *lcode;      /* local strm->lencode */
+    code const FAR *dcode;      /* local strm->distcode */
+    unsigned lmask;             /* mask for first level of length codes */
+    unsigned dmask;             /* mask for first level of distance codes */
+    code const *here;           /* retrieved table entry */
+    unsigned op;                /* code bits, operation, extra bits, or */
+                                /*  window position, window bytes to copy */
+    unsigned len;               /* match length, unused bytes */
+    unsigned dist;              /* match distance */
+    unsigned char FAR *from;    /* where to copy match from */
+
+    /* copy state to local variables */
+    state = (struct inflate_state FAR *)strm->state;
+    in = strm->next_in;
+    last = in + (strm->avail_in - (INFLATE_FAST_MIN_INPUT - 1));
+    out = strm->next_out;
+    beg = out - (start - strm->avail_out);
+    end = out + (strm->avail_out - (INFLATE_FAST_MIN_OUTPUT - 1));
+#ifdef INFLATE_STRICT
+    dmax = state->dmax;
+#endif
+    wsize = state->wsize;
+    whave = state->whave;
+    wnext = state->wnext;
+    window = state->window;
+    hold = state->hold;
+    bits = state->bits;
+    lcode = state->lencode;
+    dcode = state->distcode;
+    lmask = (1U << state->lenbits) - 1;
+    dmask = (1U << state->distbits) - 1;
+
+    /* decode literals and length/distances until end-of-block or not enough
+       input data or output space */
+    do {
+        if (bits < 15) {
+            hold += (unsigned long)(*in++) << bits;
+            bits += 8;
             hold += (unsigned long)(*in++) << bits;
             bits += 8;
-          }
         }
-        dist += (unsigned)hold & ((1U << op) - 1);
-#ifdef INFLATE_STRICT
-        if (dist > dmax) {
-          strm->msg = (char *)"invalid distance too far back";
-          state->mode = BAD;
-          break;
-        }
-#endif
+        here = lcode + (hold & lmask);
+      dolen:
+        op = (unsigned)(here->bits);
         hold >>= op;
         bits -= op;
-        Tracevv(("inflate:         distance %u\n", dist));
-        op = (unsigned)(out - beg); /* max distance in output */
-        if (dist > op) {            /* see if copy from window */
-          op = dist - op;           /* distance back in window */
-          if (op > whave) {
-            if (state->sane) {
-              strm->msg = (char *)"invalid distance too far back";
-              state->mode = BAD;
-              break;
-            }
-#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
-            if (len <= op - whave) {
-              do {
-                *out++ = 0;
-              } while (--len);
-              continue;
-            }
-            len -= op - whave;
-            do {
-              *out++ = 0;
-            } while (--op > whave);
-            if (op == 0) {
-              from = out - dist;
-              do {
-                *out++ = *from++;
-              } while (--len);
-              continue;
-            }
-#endif
-          }
-          from = window;
-          if (wnext == 0) { /* very common case */
-            from += wsize - op;
-            if (op < len) { /* some from window */
-              len -= op;
-              do {
-                *out++ = *from++;
-              } while (--op);
-              from = out - dist; /* rest from output */
-            }
-          } else if (wnext < op) { /* wrap around window */
-            from += wsize + wnext - op;
-            op -= wnext;
-            if (op < len) { /* some from end of window */
-              len -= op;
-              do {
-                *out++ = *from++;
-              } while (--op);
-              from = window;
-              if (wnext < len) { /* some from start of window */
-                op = wnext;
-                len -= op;
-                do {
-                  *out++ = *from++;
-                } while (--op);
-                from = out - dist; /* rest from output */
-              }
-            }
-          } else { /* contiguous in window */
-            from += wnext - op;
-            if (op < len) { /* some from window */
-              len -= op;
-              do {
-                *out++ = *from++;
-              } while (--op);
-              from = out - dist; /* rest from output */
-            }
-          }
-          while (len > 2) {
-            *out++ = *from++;
-            *out++ = *from++;
-            *out++ = *from++;
-            len -= 3;
-          }
-          if (len) {
-            *out++ = *from++;
-            if (len > 1) *out++ = *from++;
-          }
-        } else {
-          from = out - dist; /* copy direct from output */
-          do {               /* minimum length is three */
-            *out++ = *from++;
-            *out++ = *from++;
-            *out++ = *from++;
-            len -= 3;
-          } while (len > 2);
-          if (len) {
-            *out++ = *from++;
-            if (len > 1) *out++ = *from++;
-          }
+        op = (unsigned)(here->op);
+        if (op == 0) {                          /* literal */
+            Tracevv((stderr, here->val >= 0x20 && here->val < 0x7f ?
+                    "inflate:         literal '%c'\n" :
+                    "inflate:         literal 0x%02x\n", here->val));
+            *out++ = (unsigned char)(here->val);
         }
-      } else if ((op & 64) == 0) { /* 2nd level distance code */
-        here = dcode[here.val + (hold & ((1U << op) - 1))];
-        goto dodist;
-      } else {
-        strm->msg = (char *)"invalid distance code";
-        state->mode = BAD;
-        break;
-      }
-    } else if ((op & 64) == 0) { /* 2nd level length code */
-      here = lcode[here.val + (hold & ((1U << op) - 1))];
-      goto dolen;
-    } else if (op & 32) { /* end-of-block */
-      Tracevv(("inflate:         end of block\n"));
-      state->mode = TYPE;
-      break;
-    } else {
-      strm->msg = (char *)"invalid literal/length code";
-      state->mode = BAD;
-      break;
-    }
-  } while (in < last && out < end);
+        else if (op & 16) {                     /* length base */
+            len = (unsigned)(here->val);
+            op &= 15;                           /* number of extra bits */
+            if (op) {
+                if (bits < op) {
+                    hold += (unsigned long)(*in++) << bits;
+                    bits += 8;
+                }
+                len += (unsigned)hold & ((1U << op) - 1);
+                hold >>= op;
+                bits -= op;
+            }
+            Tracevv((stderr, "inflate:         length %u\n", len));
+            if (bits < 15) {
+                hold += (unsigned long)(*in++) << bits;
+                bits += 8;
+                hold += (unsigned long)(*in++) << bits;
+                bits += 8;
+            }
+            here = dcode + (hold & dmask);
+          dodist:
+            op = (unsigned)(here->bits);
+            hold >>= op;
+            bits -= op;
+            op = (unsigned)(here->op);
+            if (op & 16) {                      /* distance base */
+                dist = (unsigned)(here->val);
+                op &= 15;                       /* number of extra bits */
+                if (bits < op) {
+                    hold += (unsigned long)(*in++) << bits;
+                    bits += 8;
+                    if (bits < op) {
+                        hold += (unsigned long)(*in++) << bits;
+                        bits += 8;
+                    }
+                }
+                dist += (unsigned)hold & ((1U << op) - 1);
+#ifdef INFLATE_STRICT
+                if (dist > dmax) {
+                    strm->msg = (char *)"invalid distance too far back";
+                    state->mode = BAD;
+                    break;
+                }
+#endif
+                hold >>= op;
+                bits -= op;
+                Tracevv((stderr, "inflate:         distance %u\n", dist));
+                op = (unsigned)(out - beg);     /* max distance in output */
+                if (dist > op) {                /* see if copy from window */
+                    op = dist - op;             /* distance back in window */
+                    if (op > whave) {
+                        if (state->sane) {
+                            strm->msg =
+                                (char *)"invalid distance too far back";
+                            state->mode = BAD;
+                            break;
+                        }
+#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
+                        if (len <= op - whave) {
+                            do {
+                                *out++ = 0;
+                            } while (--len);
+                            continue;
+                        }
+                        len -= op - whave;
+                        do {
+                            *out++ = 0;
+                        } while (--op > whave);
+                        if (op == 0) {
+                            from = out - dist;
+                            do {
+                                *out++ = *from++;
+                            } while (--len);
+                            continue;
+                        }
+#endif
+                    }
+                    from = window;
+                    if (wnext == 0) {           /* very common case */
+                        from += wsize - op;
+                        if (op < len) {         /* some from window */
+                            len -= op;
+                            do {
+                                *out++ = *from++;
+                            } while (--op);
+                            from = out - dist;  /* rest from output */
+                        }
+                    }
+                    else if (wnext < op) {      /* wrap around window */
+                        from += wsize + wnext - op;
+                        op -= wnext;
+                        if (op < len) {         /* some from end of window */
+                            len -= op;
+                            do {
+                                *out++ = *from++;
+                            } while (--op);
+                            from = window;
+                            if (wnext < len) {  /* some from start of window */
+                                op = wnext;
+                                len -= op;
+                                do {
+                                    *out++ = *from++;
+                                } while (--op);
+                                from = out - dist;      /* rest from output */
+                            }
+                        }
+                    }
+                    else {                      /* contiguous in window */
+                        from += wnext - op;
+                        if (op < len) {         /* some from window */
+                            len -= op;
+                            do {
+                                *out++ = *from++;
+                            } while (--op);
+                            from = out - dist;  /* rest from output */
+                        }
+                    }
+                    while (len > 2) {
+                        *out++ = *from++;
+                        *out++ = *from++;
+                        *out++ = *from++;
+                        len -= 3;
+                    }
+                    if (len) {
+                        *out++ = *from++;
+                        if (len > 1)
+                            *out++ = *from++;
+                    }
+                }
+                else {
+                    from = out - dist;          /* copy direct from output */
+                    do {                        /* minimum length is three */
+                        *out++ = *from++;
+                        *out++ = *from++;
+                        *out++ = *from++;
+                        len -= 3;
+                    } while (len > 2);
+                    if (len) {
+                        *out++ = *from++;
+                        if (len > 1)
+                            *out++ = *from++;
+                    }
+                }
+            }
+            else if ((op & 64) == 0) {          /* 2nd level distance code */
+                here = dcode + here->val + (hold & ((1U << op) - 1));
+                goto dodist;
+            }
+            else {
+                strm->msg = (char *)"invalid distance code";
+                state->mode = BAD;
+                break;
+            }
+        }
+        else if ((op & 64) == 0) {              /* 2nd level length code */
+            here = lcode + here->val + (hold & ((1U << op) - 1));
+            goto dolen;
+        }
+        else if (op & 32) {                     /* end-of-block */
+            Tracevv((stderr, "inflate:         end of block\n"));
+            state->mode = TYPE;
+            break;
+        }
+        else {
+            strm->msg = (char *)"invalid literal/length code";
+            state->mode = BAD;
+            break;
+        }
+    } while (in < last && out < end);
 
-  /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
-  len = bits >> 3;
-  in -= len;
-  bits -= len << 3;
-  hold &= (1U << bits) - 1;
+    /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
+    len = bits >> 3;
+    in -= len;
+    bits -= len << 3;
+    hold &= (1U << bits) - 1;
 
-  /* update state and return */
-  strm->next_in = in;
-  strm->next_out = out;
-  strm->avail_in =
-      (unsigned)(in < last ? (INFLATE_FAST_MIN_INPUT - 1) + (last - in)
-                           : (INFLATE_FAST_MIN_INPUT - 1) - (in - last));
-  strm->avail_out =
-      (unsigned)(out < end ? (INFLATE_FAST_MIN_OUTPUT - 1) + (end - out)
-                           : (INFLATE_FAST_MIN_OUTPUT - 1) - (out - end));
-  state->hold = hold;
-  state->bits = bits;
-  return;
+    /* update state and return */
+    strm->next_in = in;
+    strm->next_out = out;
+    strm->avail_in = (unsigned)(in < last ?
+        (INFLATE_FAST_MIN_INPUT - 1) + (last - in) :
+        (INFLATE_FAST_MIN_INPUT - 1) - (in - last));
+    strm->avail_out = (unsigned)(out < end ?
+        (INFLATE_FAST_MIN_OUTPUT - 1) + (end - out) :
+        (INFLATE_FAST_MIN_OUTPUT - 1) - (out - end));
+    state->hold = hold;
+    state->bits = bits;
+    return;
 }
+
+/*
+   inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
+   - Using bit fields for code structure
+   - Different op definition to avoid & for extra bits (do & for table bits)
+   - Three separate decoding do-loops for direct, window, and wnext == 0
+   - Special case for distance > 1 copies to do overlapped load and store copy
+   - Explicit branch predictions (based on measured branch probabilities)
+   - Deferring match copy and interspersed it with decoding subsequent codes
+   - Swapping literal/length else
+   - Swapping window/direct else
+   - Larger unrolled copy loops (three is about right)
+   - Moving len -= 3 statement into middle of loop
+ */
+
+#endif /* !ASMINF */
diff --git a/third_party/zlib/inffast.internal.h b/third_party/zlib/inffast.internal.h
index 8fd191bfb..95f3e12d9 100644
--- a/third_party/zlib/inffast.internal.h
+++ b/third_party/zlib/inffast.internal.h
@@ -1,7 +1,9 @@
-#ifndef COSMOPOLITAN_THIRD_PARTY_ZLIB_INFFAST_H_
-#define COSMOPOLITAN_THIRD_PARTY_ZLIB_INFFAST_H_
-#include "third_party/zlib/inffast.internal.h"
+#ifndef COSMOPOLITAN_THIRD_PARTY_ZLIB_INFFAST_INTERNAL_H_
+#define COSMOPOLITAN_THIRD_PARTY_ZLIB_INFFAST_INTERNAL_H_
 #include "third_party/zlib/zlib.h"
+#if !(__ASSEMBLER__ + __LINKER__ + 0)
+COSMOPOLITAN_C_START_
+/* clang-format off */
 
 /* INFLATE_FAST_MIN_INPUT: the minimum number of input bytes needed so that
    we can safely call inflate_fast() with only one up-front bounds check. One
@@ -9,7 +11,7 @@
    extra, 15 bits for the distance code, 13 bits for distance extra) requires
    reading up to 48 input bits (6 bytes).
 */
-#define INFLATE_FAST_MIN_INPUT 8
+#define INFLATE_FAST_MIN_INPUT 6
 
 /* INFLATE_FAST_MIN_OUTPUT: the minimum number of output bytes needed so that
    we can safely call inflate_fast() with only one up-front bounds check. One
@@ -18,11 +20,8 @@
  */
 #define INFLATE_FAST_MIN_OUTPUT 258
 
-#if !(__ASSEMBLER__ + __LINKER__ + 0)
-COSMOPOLITAN_C_START_
-
 void inflate_fast(z_streamp strm, unsigned start) hidden;
 
 COSMOPOLITAN_C_END_
 #endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
-#endif /* COSMOPOLITAN_THIRD_PARTY_ZLIB_INFFAST_H_ */
+#endif /* COSMOPOLITAN_THIRD_PARTY_ZLIB_INFFAST_INTERNAL_H_ */
diff --git a/third_party/zlib/inffastchunk.c b/third_party/zlib/inffastchunk.c
deleted file mode 100644
index 33f66bc52..000000000
--- a/third_party/zlib/inffastchunk.c
+++ /dev/null
@@ -1,314 +0,0 @@
-/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:4;tab-width:8;coding:utf-8 -*-│
-│vi: set net ft=c ts=4 sts=4 sw=4 fenc=utf-8                                :vi│
-╞══════════════════════════════════════════════════════════════════════════════╡
-│ Copyright 1995-2017 Mark Adler                                               │
-│ Use of this source code is governed by the BSD-style licenses that can       │
-│ be found in the third_party/zlib/LICENSE file.                               │
-╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/intrin/bits.h"
-#include "third_party/zlib/chunkcopy.internal.h"
-#include "third_party/zlib/inffast.internal.h"
-#include "third_party/zlib/inflate.internal.h"
-#include "third_party/zlib/inftrees.internal.h"
-#include "third_party/zlib/zlib.h"
-#include "third_party/zlib/zutil.internal.h"
-
-asm(".ident\t\"\\n\\n\
-zlib (zlib License)\\n\
-Copyright 1995-2017 Jean-loup Gailly and Mark Adler\"");
-asm(".include \"libc/disclaimer.inc\"");
-
-/**
- * Decodes literal, length, and distance codes and write out the
- * resulting literal and match bytes until either not enough input or
- * output is available, an end-of-block is encountered, or a data error
- * is encountered. When large enough input and output buffers are
- * supplied to inflate(), for example, a 16K input buffer and a 64K
- * output buffer, more than 95% of the inflate() execution time is spent
- * in this routine.
- *
- * Entry assumptions:
- *
- *      state->mode == LEN
- *      strm->avail_in >= INFLATE_FAST_MIN_INPUT (6 or 8 bytes)
- *      strm->avail_out >= INFLATE_FAST_MIN_OUTPUT (258 bytes)
- *      start >= strm->avail_out
- *      state->bits < 8
- *      (state->hold >> state->bits) == 0
- *      strm->next_out[0..strm->avail_out] does not overlap with
- *            strm->next_in[0..strm->avail_in]
- *      strm->state->window is allocated with an additional
- *            CHUNKCOPY_CHUNK_SIZE-1 bytes of padding beyond strm->state->wsize
- *
- * On return, state->mode is one of:
- *
- *      LEN -- ran out of enough output space or enough available input
- *      TYPE -- reached end of block code, inflate() to interpret next block
- *      BAD -- error in block data
- *
- * Some notes:
- *
- *  INFLATE_FAST_MIN_INPUT: 6 or 8 bytes
- *
- *  - The maximum input bits used by a length/distance pair is 15 bits
- *    for the length code, 5 bits for the length extra, 15 bits for the
- *    distance code, and 13 bits for the distance extra. This totals 48
- *    bits, or six bytes. Therefore if strm->avail_in >= 6, then there
- *    is enough input to avoid checking for available input while
- *    decoding.
- *
- *  - The wide input data reading option reads 64 input bits at a time.
- *    Thus, if strm->avail_in >= 8, then there is enough input to avoid
- *    checking for available input while decoding. Reading consumes the
- *    input with:
- *
- *        hold |= READ64LE(in) << bits;
- *        in += 6;
- *        bits += 48;
- *
- *    reporting 6 bytes of new input because |bits| is 0..15 (2 bytes
- *    rounded up, worst case) and 6 bytes is enough to decode as noted
- *    above. At exit, hold &= (1U << bits) - 1 drops excess input to
- *    keep the invariant:
- *
- *        (state->hold >> state->bits) == 0
- *
- *  INFLATE_FAST_MIN_OUTPUT: 258 bytes
- *
- *  - The maximum bytes that a single length/distance pair can output is
- *    258 bytes, which is the maximum length that can be coded.
- *    inflate_fast() requires strm->avail_out >= 258 for each loop to
- *    avoid checking for available output space while decoding.
- *
- * @param start is inflate() starting value for strm->avail_out
- */
-void inflate_fast_chunk(z_streamp strm, unsigned start) {
-  struct InflateState *state;
-  const unsigned char *in;   /* local strm->next_in */
-  const unsigned char *last; /* have enough input while in < last */
-  unsigned char *out;        /* local strm->next_out */
-  unsigned char *beg;        /* inflate()'s initial strm->next_out */
-  unsigned char *end;        /* while out < end, enough space available */
-  unsigned char *limit;      /* safety limit for chunky copies */
-#ifdef INFLATE_STRICT
-  unsigned dmax; /* maximum distance from zlib header */
-#endif
-  unsigned wsize;            /* window size or zero if not using window */
-  unsigned whave;            /* valid bytes in the window */
-  unsigned wnext;            /* window write index */
-  unsigned char *window;     /* allocated sliding window, if wsize != 0 */
-  uint64_t hold;             /* local strm->hold */
-  unsigned bits;             /* local strm->bits */
-  const struct zcode *lcode; /* local strm->lencode */
-  const struct zcode *dcode; /* local strm->distcode */
-  unsigned lmask;            /* mask for first level of length codes */
-  unsigned dmask;            /* mask for first level of distance codes */
-  struct zcode here;         /* retrieved table entry */
-  unsigned op;               /* code bits, operation, extra bits, or */
-  /*  window position, window bytes to copy */
-  unsigned len;        /* match length, unused bytes */
-  unsigned dist;       /* match distance */
-  unsigned char *from; /* where to copy match from */
-
-  /* copy state to local variables */
-  state = (struct InflateState *)strm->state;
-  in = strm->next_in;
-  last = in + (strm->avail_in - (INFLATE_FAST_MIN_INPUT - 1));
-  out = strm->next_out;
-  beg = out - (start - strm->avail_out);
-  end = out + (strm->avail_out - (INFLATE_FAST_MIN_OUTPUT - 1));
-  limit = out + strm->avail_out;
-#ifdef INFLATE_STRICT
-  dmax = state->dmax;
-#endif
-  wsize = state->wsize;
-  whave = state->whave;
-  wnext = (state->wnext == 0 && whave >= wsize) ? wsize : state->wnext;
-  window = state->window;
-  hold = state->hold;
-  bits = state->bits;
-  lcode = state->lencode;
-  dcode = state->distcode;
-  lmask = (1U << state->lenbits) - 1;
-  dmask = (1U << state->distbits) - 1;
-
-  /* decode literals and length/distances until end-of-block or not enough
-     input data or output space */
-  do {
-    if (bits < 15) {
-      hold |= READ64LE(in) << bits;
-      in += 6;
-      bits += 48;
-    }
-    here = lcode[hold & lmask];
-  dolen:
-    op = (unsigned)(here.bits);
-    hold >>= op;
-    bits -= op;
-    op = (unsigned)(here.op);
-    if (op == 0) { /* literal */
-      Tracevv((here.val >= 0x20 && here.val < 0x7f
-                   ? "inflate:         literal '%c'\n"
-                   : "inflate:         literal 0x%02x\n",
-               here.val));
-      *out++ = (unsigned char)(here.val);
-    } else if (op & 16) { /* length base */
-      len = (unsigned)(here.val);
-      op &= 15; /* number of extra bits */
-      if (op) {
-        if (bits < op) {
-          hold |= READ64LE(in) << bits;
-          in += 6;
-          bits += 48;
-        }
-        len += (unsigned)hold & ((1U << op) - 1);
-        hold >>= op;
-        bits -= op;
-      }
-      Tracevv(("inflate:         length %u\n", len));
-      if (bits < 15) {
-        hold |= READ64LE(in) << bits;
-        in += 6;
-        bits += 48;
-      }
-      here = dcode[hold & dmask];
-    dodist:
-      op = (unsigned)(here.bits);
-      hold >>= op;
-      bits -= op;
-      op = (unsigned)(here.op);
-      if (op & 16) { /* distance base */
-        dist = (unsigned)(here.val);
-        op &= 15; /* number of extra bits */
-        if (bits < op) {
-          hold |= READ64LE(in) << bits;
-          in += 6;
-          bits += 48;
-        }
-        dist += (unsigned)hold & ((1U << op) - 1);
-#ifdef INFLATE_STRICT
-        if (dist > dmax) {
-          strm->msg = (char *)"invalid distance too far back";
-          state->mode = BAD;
-          break;
-        }
-#endif
-        hold >>= op;
-        bits -= op;
-        Tracevv(("inflate:         distance %u\n", dist));
-        op = (unsigned)(out - beg); /* max distance in output */
-        if (dist > op) {            /* see if copy from window */
-          op = dist - op;           /* distance back in window */
-          if (op > whave) {
-            if (state->sane) {
-              strm->msg = (char *)"invalid distance too far back";
-              state->mode = BAD;
-              break;
-            }
-#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
-            if (len <= op - whave) {
-              do {
-                *out++ = 0;
-              } while (--len);
-              continue;
-            }
-            len -= op - whave;
-            do {
-              *out++ = 0;
-            } while (--op > whave);
-            if (op == 0) {
-              from = out - dist;
-              do {
-                *out++ = *from++;
-              } while (--len);
-              continue;
-            }
-#endif
-          }
-          from = window;
-          if (wnext >= op) { /* contiguous in window */
-            from += wnext - op;
-          } else { /* wrap around window */
-            op -= wnext;
-            from += wsize - op;
-            if (op < len) { /* some from end of window */
-              len -= op;
-              out = chunkcopy_safe(out, from, op, limit);
-              from = window; /* more from start of window */
-              op = wnext;
-              /* This (rare) case can create a situation where
-                 the first chunkcopy below must be checked.
-               */
-            }
-          }
-          if (op < len) { /* still need some from output */
-            out = chunkcopy_safe(out, from, op, limit);
-            len -= op;
-            /* When dist is small the amount of data that can be
-               copied from the window is also small, and progress
-               towards the dangerous end of the output buffer is
-               also small.  This means that for trivial memsets and
-               for chunkunroll_relaxed() a safety check is
-               unnecessary.  However, these conditions may not be
-               entered at all, and in that case it's possible that
-               the main copy is near the end.
-              */
-            out = chunkunroll_relaxed(out, &dist, &len);
-            out = chunkcopy_safe_ugly(out, dist, len, limit);
-          } else {
-            /* from points to window, so there is no risk of
-               overlapping pointers requiring memset-like behaviour
-             */
-            out = chunkcopy_safe(out, from, len, limit);
-          }
-        } else {
-          /* Whole reference is in range of current output.  No
-             range checks are necessary because we start with room
-             for at least 258 bytes of output, so unroll and roundoff
-             operations can write beyond `out+len` so long as they
-             stay within 258 bytes of `out`.
-           */
-          out = chunkcopy_lapped_relaxed(out, dist, len);
-        }
-      } else if ((op & 64) == 0) { /* 2nd level distance code */
-        here = dcode[here.val + (hold & ((1U << op) - 1))];
-        goto dodist;
-      } else {
-        strm->msg = (char *)"invalid distance code";
-        state->mode = BAD;
-        break;
-      }
-    } else if ((op & 64) == 0) { /* 2nd level length code */
-      here = lcode[here.val + (hold & ((1U << op) - 1))];
-      goto dolen;
-    } else if (op & 32) { /* end-of-block */
-      Tracevv(("inflate:         end of block\n"));
-      state->mode = TYPE;
-      break;
-    } else {
-      strm->msg = (char *)"invalid literal/length code";
-      state->mode = BAD;
-      break;
-    }
-  } while (in < last && out < end);
-
-  /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
-  len = bits >> 3;
-  in -= len;
-  bits -= len << 3;
-  hold &= (1U << bits) - 1;
-
-  /* update state and return */
-  strm->next_in = in;
-  strm->next_out = out;
-  strm->avail_in =
-      (unsigned)(in < last ? (INFLATE_FAST_MIN_INPUT - 1) + (last - in)
-                           : (INFLATE_FAST_MIN_INPUT - 1) - (in - last));
-  strm->avail_out =
-      (unsigned)(out < end ? (INFLATE_FAST_MIN_OUTPUT - 1) + (end - out)
-                           : (INFLATE_FAST_MIN_OUTPUT - 1) - (out - end));
-  state->hold = hold;
-  state->bits = bits;
-
-  Assert((state->hold >> state->bits) == 0, "invalid input data state");
-}
diff --git a/third_party/zlib/inffixed.c b/third_party/zlib/inffixed.c
deleted file mode 100644
index 54b19fb4a..000000000
--- a/third_party/zlib/inffixed.c
+++ /dev/null
@@ -1,122 +0,0 @@
-#include "third_party/zlib/inftrees.internal.h"
-
-/**
- * @fileoverview tables for decoding fixed codes
- * @note generated by makefixed()
- */
-
-hidden const struct zcode kZlibLenfix[512] = {
-    {96, 7, 0},   {0, 8, 80},   {0, 8, 16},   {20, 8, 115}, {18, 7, 31},
-    {0, 8, 112},  {0, 8, 48},   {0, 9, 192},  {16, 7, 10},  {0, 8, 96},
-    {0, 8, 32},   {0, 9, 160},  {0, 8, 0},    {0, 8, 128},  {0, 8, 64},
-    {0, 9, 224},  {16, 7, 6},   {0, 8, 88},   {0, 8, 24},   {0, 9, 144},
-    {19, 7, 59},  {0, 8, 120},  {0, 8, 56},   {0, 9, 208},  {17, 7, 17},
-    {0, 8, 104},  {0, 8, 40},   {0, 9, 176},  {0, 8, 8},    {0, 8, 136},
-    {0, 8, 72},   {0, 9, 240},  {16, 7, 4},   {0, 8, 84},   {0, 8, 20},
-    {21, 8, 227}, {19, 7, 43},  {0, 8, 116},  {0, 8, 52},   {0, 9, 200},
-    {17, 7, 13},  {0, 8, 100},  {0, 8, 36},   {0, 9, 168},  {0, 8, 4},
-    {0, 8, 132},  {0, 8, 68},   {0, 9, 232},  {16, 7, 8},   {0, 8, 92},
-    {0, 8, 28},   {0, 9, 152},  {20, 7, 83},  {0, 8, 124},  {0, 8, 60},
-    {0, 9, 216},  {18, 7, 23},  {0, 8, 108},  {0, 8, 44},   {0, 9, 184},
-    {0, 8, 12},   {0, 8, 140},  {0, 8, 76},   {0, 9, 248},  {16, 7, 3},
-    {0, 8, 82},   {0, 8, 18},   {21, 8, 163}, {19, 7, 35},  {0, 8, 114},
-    {0, 8, 50},   {0, 9, 196},  {17, 7, 11},  {0, 8, 98},   {0, 8, 34},
-    {0, 9, 164},  {0, 8, 2},    {0, 8, 130},  {0, 8, 66},   {0, 9, 228},
-    {16, 7, 7},   {0, 8, 90},   {0, 8, 26},   {0, 9, 148},  {20, 7, 67},
-    {0, 8, 122},  {0, 8, 58},   {0, 9, 212},  {18, 7, 19},  {0, 8, 106},
-    {0, 8, 42},   {0, 9, 180},  {0, 8, 10},   {0, 8, 138},  {0, 8, 74},
-    {0, 9, 244},  {16, 7, 5},   {0, 8, 86},   {0, 8, 22},   {64, 8, 0},
-    {19, 7, 51},  {0, 8, 118},  {0, 8, 54},   {0, 9, 204},  {17, 7, 15},
-    {0, 8, 102},  {0, 8, 38},   {0, 9, 172},  {0, 8, 6},    {0, 8, 134},
-    {0, 8, 70},   {0, 9, 236},  {16, 7, 9},   {0, 8, 94},   {0, 8, 30},
-    {0, 9, 156},  {20, 7, 99},  {0, 8, 126},  {0, 8, 62},   {0, 9, 220},
-    {18, 7, 27},  {0, 8, 110},  {0, 8, 46},   {0, 9, 188},  {0, 8, 14},
-    {0, 8, 142},  {0, 8, 78},   {0, 9, 252},  {96, 7, 0},   {0, 8, 81},
-    {0, 8, 17},   {21, 8, 131}, {18, 7, 31},  {0, 8, 113},  {0, 8, 49},
-    {0, 9, 194},  {16, 7, 10},  {0, 8, 97},   {0, 8, 33},   {0, 9, 162},
-    {0, 8, 1},    {0, 8, 129},  {0, 8, 65},   {0, 9, 226},  {16, 7, 6},
-    {0, 8, 89},   {0, 8, 25},   {0, 9, 146},  {19, 7, 59},  {0, 8, 121},
-    {0, 8, 57},   {0, 9, 210},  {17, 7, 17},  {0, 8, 105},  {0, 8, 41},
-    {0, 9, 178},  {0, 8, 9},    {0, 8, 137},  {0, 8, 73},   {0, 9, 242},
-    {16, 7, 4},   {0, 8, 85},   {0, 8, 21},   {16, 8, 258}, {19, 7, 43},
-    {0, 8, 117},  {0, 8, 53},   {0, 9, 202},  {17, 7, 13},  {0, 8, 101},
-    {0, 8, 37},   {0, 9, 170},  {0, 8, 5},    {0, 8, 133},  {0, 8, 69},
-    {0, 9, 234},  {16, 7, 8},   {0, 8, 93},   {0, 8, 29},   {0, 9, 154},
-    {20, 7, 83},  {0, 8, 125},  {0, 8, 61},   {0, 9, 218},  {18, 7, 23},
-    {0, 8, 109},  {0, 8, 45},   {0, 9, 186},  {0, 8, 13},   {0, 8, 141},
-    {0, 8, 77},   {0, 9, 250},  {16, 7, 3},   {0, 8, 83},   {0, 8, 19},
-    {21, 8, 195}, {19, 7, 35},  {0, 8, 115},  {0, 8, 51},   {0, 9, 198},
-    {17, 7, 11},  {0, 8, 99},   {0, 8, 35},   {0, 9, 166},  {0, 8, 3},
-    {0, 8, 131},  {0, 8, 67},   {0, 9, 230},  {16, 7, 7},   {0, 8, 91},
-    {0, 8, 27},   {0, 9, 150},  {20, 7, 67},  {0, 8, 123},  {0, 8, 59},
-    {0, 9, 214},  {18, 7, 19},  {0, 8, 107},  {0, 8, 43},   {0, 9, 182},
-    {0, 8, 11},   {0, 8, 139},  {0, 8, 75},   {0, 9, 246},  {16, 7, 5},
-    {0, 8, 87},   {0, 8, 23},   {64, 8, 0},   {19, 7, 51},  {0, 8, 119},
-    {0, 8, 55},   {0, 9, 206},  {17, 7, 15},  {0, 8, 103},  {0, 8, 39},
-    {0, 9, 174},  {0, 8, 7},    {0, 8, 135},  {0, 8, 71},   {0, 9, 238},
-    {16, 7, 9},   {0, 8, 95},   {0, 8, 31},   {0, 9, 158},  {20, 7, 99},
-    {0, 8, 127},  {0, 8, 63},   {0, 9, 222},  {18, 7, 27},  {0, 8, 111},
-    {0, 8, 47},   {0, 9, 190},  {0, 8, 15},   {0, 8, 143},  {0, 8, 79},
-    {0, 9, 254},  {96, 7, 0},   {0, 8, 80},   {0, 8, 16},   {20, 8, 115},
-    {18, 7, 31},  {0, 8, 112},  {0, 8, 48},   {0, 9, 193},  {16, 7, 10},
-    {0, 8, 96},   {0, 8, 32},   {0, 9, 161},  {0, 8, 0},    {0, 8, 128},
-    {0, 8, 64},   {0, 9, 225},  {16, 7, 6},   {0, 8, 88},   {0, 8, 24},
-    {0, 9, 145},  {19, 7, 59},  {0, 8, 120},  {0, 8, 56},   {0, 9, 209},
-    {17, 7, 17},  {0, 8, 104},  {0, 8, 40},   {0, 9, 177},  {0, 8, 8},
-    {0, 8, 136},  {0, 8, 72},   {0, 9, 241},  {16, 7, 4},   {0, 8, 84},
-    {0, 8, 20},   {21, 8, 227}, {19, 7, 43},  {0, 8, 116},  {0, 8, 52},
-    {0, 9, 201},  {17, 7, 13},  {0, 8, 100},  {0, 8, 36},   {0, 9, 169},
-    {0, 8, 4},    {0, 8, 132},  {0, 8, 68},   {0, 9, 233},  {16, 7, 8},
-    {0, 8, 92},   {0, 8, 28},   {0, 9, 153},  {20, 7, 83},  {0, 8, 124},
-    {0, 8, 60},   {0, 9, 217},  {18, 7, 23},  {0, 8, 108},  {0, 8, 44},
-    {0, 9, 185},  {0, 8, 12},   {0, 8, 140},  {0, 8, 76},   {0, 9, 249},
-    {16, 7, 3},   {0, 8, 82},   {0, 8, 18},   {21, 8, 163}, {19, 7, 35},
-    {0, 8, 114},  {0, 8, 50},   {0, 9, 197},  {17, 7, 11},  {0, 8, 98},
-    {0, 8, 34},   {0, 9, 165},  {0, 8, 2},    {0, 8, 130},  {0, 8, 66},
-    {0, 9, 229},  {16, 7, 7},   {0, 8, 90},   {0, 8, 26},   {0, 9, 149},
-    {20, 7, 67},  {0, 8, 122},  {0, 8, 58},   {0, 9, 213},  {18, 7, 19},
-    {0, 8, 106},  {0, 8, 42},   {0, 9, 181},  {0, 8, 10},   {0, 8, 138},
-    {0, 8, 74},   {0, 9, 245},  {16, 7, 5},   {0, 8, 86},   {0, 8, 22},
-    {64, 8, 0},   {19, 7, 51},  {0, 8, 118},  {0, 8, 54},   {0, 9, 205},
-    {17, 7, 15},  {0, 8, 102},  {0, 8, 38},   {0, 9, 173},  {0, 8, 6},
-    {0, 8, 134},  {0, 8, 70},   {0, 9, 237},  {16, 7, 9},   {0, 8, 94},
-    {0, 8, 30},   {0, 9, 157},  {20, 7, 99},  {0, 8, 126},  {0, 8, 62},
-    {0, 9, 221},  {18, 7, 27},  {0, 8, 110},  {0, 8, 46},   {0, 9, 189},
-    {0, 8, 14},   {0, 8, 142},  {0, 8, 78},   {0, 9, 253},  {96, 7, 0},
-    {0, 8, 81},   {0, 8, 17},   {21, 8, 131}, {18, 7, 31},  {0, 8, 113},
-    {0, 8, 49},   {0, 9, 195},  {16, 7, 10},  {0, 8, 97},   {0, 8, 33},
-    {0, 9, 163},  {0, 8, 1},    {0, 8, 129},  {0, 8, 65},   {0, 9, 227},
-    {16, 7, 6},   {0, 8, 89},   {0, 8, 25},   {0, 9, 147},  {19, 7, 59},
-    {0, 8, 121},  {0, 8, 57},   {0, 9, 211},  {17, 7, 17},  {0, 8, 105},
-    {0, 8, 41},   {0, 9, 179},  {0, 8, 9},    {0, 8, 137},  {0, 8, 73},
-    {0, 9, 243},  {16, 7, 4},   {0, 8, 85},   {0, 8, 21},   {16, 8, 258},
-    {19, 7, 43},  {0, 8, 117},  {0, 8, 53},   {0, 9, 203},  {17, 7, 13},
-    {0, 8, 101},  {0, 8, 37},   {0, 9, 171},  {0, 8, 5},    {0, 8, 133},
-    {0, 8, 69},   {0, 9, 235},  {16, 7, 8},   {0, 8, 93},   {0, 8, 29},
-    {0, 9, 155},  {20, 7, 83},  {0, 8, 125},  {0, 8, 61},   {0, 9, 219},
-    {18, 7, 23},  {0, 8, 109},  {0, 8, 45},   {0, 9, 187},  {0, 8, 13},
-    {0, 8, 141},  {0, 8, 77},   {0, 9, 251},  {16, 7, 3},   {0, 8, 83},
-    {0, 8, 19},   {21, 8, 195}, {19, 7, 35},  {0, 8, 115},  {0, 8, 51},
-    {0, 9, 199},  {17, 7, 11},  {0, 8, 99},   {0, 8, 35},   {0, 9, 167},
-    {0, 8, 3},    {0, 8, 131},  {0, 8, 67},   {0, 9, 231},  {16, 7, 7},
-    {0, 8, 91},   {0, 8, 27},   {0, 9, 151},  {20, 7, 67},  {0, 8, 123},
-    {0, 8, 59},   {0, 9, 215},  {18, 7, 19},  {0, 8, 107},  {0, 8, 43},
-    {0, 9, 183},  {0, 8, 11},   {0, 8, 139},  {0, 8, 75},   {0, 9, 247},
-    {16, 7, 5},   {0, 8, 87},   {0, 8, 23},   {64, 8, 0},   {19, 7, 51},
-    {0, 8, 119},  {0, 8, 55},   {0, 9, 207},  {17, 7, 15},  {0, 8, 103},
-    {0, 8, 39},   {0, 9, 175},  {0, 8, 7},    {0, 8, 135},  {0, 8, 71},
-    {0, 9, 239},  {16, 7, 9},   {0, 8, 95},   {0, 8, 31},   {0, 9, 159},
-    {20, 7, 99},  {0, 8, 127},  {0, 8, 63},   {0, 9, 223},  {18, 7, 27},
-    {0, 8, 111},  {0, 8, 47},   {0, 9, 191},  {0, 8, 15},   {0, 8, 143},
-    {0, 8, 79},   {0, 9, 255},
-};
-
-hidden const struct zcode kZlibDistfix[32] = {
-    {16, 5, 1},    {23, 5, 257},  {19, 5, 17},    {27, 5, 4097},  {17, 5, 5},
-    {25, 5, 1025}, {21, 5, 65},   {29, 5, 16385}, {16, 5, 3},     {24, 5, 513},
-    {20, 5, 33},   {28, 5, 8193}, {18, 5, 9},     {26, 5, 2049},  {22, 5, 129},
-    {64, 5, 0},    {16, 5, 2},    {23, 5, 385},   {19, 5, 25},    {27, 5, 6145},
-    {17, 5, 7},    {25, 5, 1537}, {21, 5, 97},    {29, 5, 24577}, {16, 5, 4},
-    {24, 5, 769},  {20, 5, 49},   {28, 5, 12289}, {18, 5, 13},    {26, 5, 3073},
-    {22, 5, 193},  {64, 5, 0},
-};
diff --git a/third_party/zlib/inffixed.inc b/third_party/zlib/inffixed.inc
new file mode 100644
index 000000000..0d6f2cd70
--- /dev/null
+++ b/third_party/zlib/inffixed.inc
@@ -0,0 +1,96 @@
+// clang-format off
+
+    /* inffixed.h -- table for decoding fixed codes
+     * Generated automatically by makefixed().
+     */
+
+    /* WARNING: this file should *not* be used by applications.
+       It is part of the implementation of this library and is
+       subject to change. Applications should only use zlib.h.
+     */
+
+    static const code lenfix[512] = {
+        {96,7,0},{0,8,80},{0,8,16},{20,8,115},{18,7,31},{0,8,112},{0,8,48},
+        {0,9,192},{16,7,10},{0,8,96},{0,8,32},{0,9,160},{0,8,0},{0,8,128},
+        {0,8,64},{0,9,224},{16,7,6},{0,8,88},{0,8,24},{0,9,144},{19,7,59},
+        {0,8,120},{0,8,56},{0,9,208},{17,7,17},{0,8,104},{0,8,40},{0,9,176},
+        {0,8,8},{0,8,136},{0,8,72},{0,9,240},{16,7,4},{0,8,84},{0,8,20},
+        {21,8,227},{19,7,43},{0,8,116},{0,8,52},{0,9,200},{17,7,13},{0,8,100},
+        {0,8,36},{0,9,168},{0,8,4},{0,8,132},{0,8,68},{0,9,232},{16,7,8},
+        {0,8,92},{0,8,28},{0,9,152},{20,7,83},{0,8,124},{0,8,60},{0,9,216},
+        {18,7,23},{0,8,108},{0,8,44},{0,9,184},{0,8,12},{0,8,140},{0,8,76},
+        {0,9,248},{16,7,3},{0,8,82},{0,8,18},{21,8,163},{19,7,35},{0,8,114},
+        {0,8,50},{0,9,196},{17,7,11},{0,8,98},{0,8,34},{0,9,164},{0,8,2},
+        {0,8,130},{0,8,66},{0,9,228},{16,7,7},{0,8,90},{0,8,26},{0,9,148},
+        {20,7,67},{0,8,122},{0,8,58},{0,9,212},{18,7,19},{0,8,106},{0,8,42},
+        {0,9,180},{0,8,10},{0,8,138},{0,8,74},{0,9,244},{16,7,5},{0,8,86},
+        {0,8,22},{64,8,0},{19,7,51},{0,8,118},{0,8,54},{0,9,204},{17,7,15},
+        {0,8,102},{0,8,38},{0,9,172},{0,8,6},{0,8,134},{0,8,70},{0,9,236},
+        {16,7,9},{0,8,94},{0,8,30},{0,9,156},{20,7,99},{0,8,126},{0,8,62},
+        {0,9,220},{18,7,27},{0,8,110},{0,8,46},{0,9,188},{0,8,14},{0,8,142},
+        {0,8,78},{0,9,252},{96,7,0},{0,8,81},{0,8,17},{21,8,131},{18,7,31},
+        {0,8,113},{0,8,49},{0,9,194},{16,7,10},{0,8,97},{0,8,33},{0,9,162},
+        {0,8,1},{0,8,129},{0,8,65},{0,9,226},{16,7,6},{0,8,89},{0,8,25},
+        {0,9,146},{19,7,59},{0,8,121},{0,8,57},{0,9,210},{17,7,17},{0,8,105},
+        {0,8,41},{0,9,178},{0,8,9},{0,8,137},{0,8,73},{0,9,242},{16,7,4},
+        {0,8,85},{0,8,21},{16,8,258},{19,7,43},{0,8,117},{0,8,53},{0,9,202},
+        {17,7,13},{0,8,101},{0,8,37},{0,9,170},{0,8,5},{0,8,133},{0,8,69},
+        {0,9,234},{16,7,8},{0,8,93},{0,8,29},{0,9,154},{20,7,83},{0,8,125},
+        {0,8,61},{0,9,218},{18,7,23},{0,8,109},{0,8,45},{0,9,186},{0,8,13},
+        {0,8,141},{0,8,77},{0,9,250},{16,7,3},{0,8,83},{0,8,19},{21,8,195},
+        {19,7,35},{0,8,115},{0,8,51},{0,9,198},{17,7,11},{0,8,99},{0,8,35},
+        {0,9,166},{0,8,3},{0,8,131},{0,8,67},{0,9,230},{16,7,7},{0,8,91},
+        {0,8,27},{0,9,150},{20,7,67},{0,8,123},{0,8,59},{0,9,214},{18,7,19},
+        {0,8,107},{0,8,43},{0,9,182},{0,8,11},{0,8,139},{0,8,75},{0,9,246},
+        {16,7,5},{0,8,87},{0,8,23},{64,8,0},{19,7,51},{0,8,119},{0,8,55},
+        {0,9,206},{17,7,15},{0,8,103},{0,8,39},{0,9,174},{0,8,7},{0,8,135},
+        {0,8,71},{0,9,238},{16,7,9},{0,8,95},{0,8,31},{0,9,158},{20,7,99},
+        {0,8,127},{0,8,63},{0,9,222},{18,7,27},{0,8,111},{0,8,47},{0,9,190},
+        {0,8,15},{0,8,143},{0,8,79},{0,9,254},{96,7,0},{0,8,80},{0,8,16},
+        {20,8,115},{18,7,31},{0,8,112},{0,8,48},{0,9,193},{16,7,10},{0,8,96},
+        {0,8,32},{0,9,161},{0,8,0},{0,8,128},{0,8,64},{0,9,225},{16,7,6},
+        {0,8,88},{0,8,24},{0,9,145},{19,7,59},{0,8,120},{0,8,56},{0,9,209},
+        {17,7,17},{0,8,104},{0,8,40},{0,9,177},{0,8,8},{0,8,136},{0,8,72},
+        {0,9,241},{16,7,4},{0,8,84},{0,8,20},{21,8,227},{19,7,43},{0,8,116},
+        {0,8,52},{0,9,201},{17,7,13},{0,8,100},{0,8,36},{0,9,169},{0,8,4},
+        {0,8,132},{0,8,68},{0,9,233},{16,7,8},{0,8,92},{0,8,28},{0,9,153},
+        {20,7,83},{0,8,124},{0,8,60},{0,9,217},{18,7,23},{0,8,108},{0,8,44},
+        {0,9,185},{0,8,12},{0,8,140},{0,8,76},{0,9,249},{16,7,3},{0,8,82},
+        {0,8,18},{21,8,163},{19,7,35},{0,8,114},{0,8,50},{0,9,197},{17,7,11},
+        {0,8,98},{0,8,34},{0,9,165},{0,8,2},{0,8,130},{0,8,66},{0,9,229},
+        {16,7,7},{0,8,90},{0,8,26},{0,9,149},{20,7,67},{0,8,122},{0,8,58},
+        {0,9,213},{18,7,19},{0,8,106},{0,8,42},{0,9,181},{0,8,10},{0,8,138},
+        {0,8,74},{0,9,245},{16,7,5},{0,8,86},{0,8,22},{64,8,0},{19,7,51},
+        {0,8,118},{0,8,54},{0,9,205},{17,7,15},{0,8,102},{0,8,38},{0,9,173},
+        {0,8,6},{0,8,134},{0,8,70},{0,9,237},{16,7,9},{0,8,94},{0,8,30},
+        {0,9,157},{20,7,99},{0,8,126},{0,8,62},{0,9,221},{18,7,27},{0,8,110},
+        {0,8,46},{0,9,189},{0,8,14},{0,8,142},{0,8,78},{0,9,253},{96,7,0},
+        {0,8,81},{0,8,17},{21,8,131},{18,7,31},{0,8,113},{0,8,49},{0,9,195},
+        {16,7,10},{0,8,97},{0,8,33},{0,9,163},{0,8,1},{0,8,129},{0,8,65},
+        {0,9,227},{16,7,6},{0,8,89},{0,8,25},{0,9,147},{19,7,59},{0,8,121},
+        {0,8,57},{0,9,211},{17,7,17},{0,8,105},{0,8,41},{0,9,179},{0,8,9},
+        {0,8,137},{0,8,73},{0,9,243},{16,7,4},{0,8,85},{0,8,21},{16,8,258},
+        {19,7,43},{0,8,117},{0,8,53},{0,9,203},{17,7,13},{0,8,101},{0,8,37},
+        {0,9,171},{0,8,5},{0,8,133},{0,8,69},{0,9,235},{16,7,8},{0,8,93},
+        {0,8,29},{0,9,155},{20,7,83},{0,8,125},{0,8,61},{0,9,219},{18,7,23},
+        {0,8,109},{0,8,45},{0,9,187},{0,8,13},{0,8,141},{0,8,77},{0,9,251},
+        {16,7,3},{0,8,83},{0,8,19},{21,8,195},{19,7,35},{0,8,115},{0,8,51},
+        {0,9,199},{17,7,11},{0,8,99},{0,8,35},{0,9,167},{0,8,3},{0,8,131},
+        {0,8,67},{0,9,231},{16,7,7},{0,8,91},{0,8,27},{0,9,151},{20,7,67},
+        {0,8,123},{0,8,59},{0,9,215},{18,7,19},{0,8,107},{0,8,43},{0,9,183},
+        {0,8,11},{0,8,139},{0,8,75},{0,9,247},{16,7,5},{0,8,87},{0,8,23},
+        {64,8,0},{19,7,51},{0,8,119},{0,8,55},{0,9,207},{17,7,15},{0,8,103},
+        {0,8,39},{0,9,175},{0,8,7},{0,8,135},{0,8,71},{0,9,239},{16,7,9},
+        {0,8,95},{0,8,31},{0,9,159},{20,7,99},{0,8,127},{0,8,63},{0,9,223},
+        {18,7,27},{0,8,111},{0,8,47},{0,9,191},{0,8,15},{0,8,143},{0,8,79},
+        {0,9,255}
+    };
+
+    static const code distfix[32] = {
+        {16,5,1},{23,5,257},{19,5,17},{27,5,4097},{17,5,5},{25,5,1025},
+        {21,5,65},{29,5,16385},{16,5,3},{24,5,513},{20,5,33},{28,5,8193},
+        {18,5,9},{26,5,2049},{22,5,129},{64,5,0},{16,5,2},{23,5,385},
+        {19,5,25},{27,5,6145},{17,5,7},{25,5,1537},{21,5,97},{29,5,24577},
+        {16,5,4},{24,5,769},{20,5,49},{28,5,12289},{18,5,13},{26,5,3073},
+        {22,5,193},{64,5,0}
+    };
diff --git a/third_party/zlib/inflate.c b/third_party/zlib/inflate.c
index 4adac768f..93f97ec28 100644
--- a/third_party/zlib/inflate.c
+++ b/third_party/zlib/inflate.c
@@ -1,26 +1,14 @@
-/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:4;tab-width:8;coding:utf-8 -*-│
-│vi: set net ft=c ts=4 sts=4 sw=4 fenc=utf-8                                :vi│
-╞══════════════════════════════════════════════════════════════════════════════╡
-│ Copyright 1995-2016 Mark Adler                                               │
-│ Use of this source code is governed by the BSD-style licenses that can       │
-│ be found in the third_party/zlib/LICENSE file.                               │
-╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/str/str.h"
-#include "third_party/zlib/chunkcopy.internal.h"
+/* inflate.c -- zlib decompression
+ * Copyright (C) 1995-2022 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
 #include "third_party/zlib/inffast.internal.h"
 #include "third_party/zlib/inflate.internal.h"
 #include "third_party/zlib/inftrees.internal.h"
 #include "third_party/zlib/internal.h"
-#include "third_party/zlib/zutil.internal.h"
+// clang-format off
 
-asm(".ident\t\"\\n\\n\
-zlib (zlib License)\\n\
-Copyright 1995-2017 Jean-loup Gailly and Mark Adler\"");
-asm(".include \"libc/disclaimer.inc\"");
-
-/**
- * @fileoverview zlib decompression
- *
+/*
  * Change history:
  *
  * 1.2.beta0    24 Nov 2002
@@ -64,7 +52,7 @@ asm(".include \"libc/disclaimer.inc\"");
  * - Use local copies of window variables in inflate_fast() for speed
  * - Pull out common wnext == 0 case for speed in inflate_fast()
  * - Make op and len in inflate_fast() unsigned for consistency
- * - Add  to lcode and dcode declarations in inflate_fast()
+ * - Add FAR to lcode and dcode declarations in inflate_fast()
  * - Simplified bad distance check in inflate_fast()
  * - Added inflateBackInit(), inflateBack(), and inflateBackEnd() in new
  *   source file infback.c to provide a call-back interface to inflate for
@@ -98,1387 +86,1503 @@ asm(".include \"libc/disclaimer.inc\"");
  */
 
 #ifdef MAKEFIXED
-#ifndef BUILDFIXED
-#define BUILDFIXED
-#endif
+#  ifndef BUILDFIXED
+#    define BUILDFIXED
+#  endif
 #endif
 
-/* permutation of code lengths */
-static const unsigned short kZlibDeflateOrder[19] = {
-    16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
-
-static int inflateStateCheck(z_streamp strm);
-static void fixedtables(struct InflateState *state);
-static int updatewindow(z_streamp strm, const unsigned char *end,
-                        unsigned copy);
-static unsigned syncsearch(unsigned *have, const unsigned char *buf,
-                           unsigned len);
+/* function prototypes */
+local int inflateStateCheck OF((z_streamp strm));
+local void fixedtables OF((struct inflate_state FAR *state));
+local int updatewindow OF((z_streamp strm, const unsigned char FAR *end,
+                           unsigned copy));
 #ifdef BUILDFIXED
-void makefixed(void);
+   void makefixed OF((void));
 #endif
+local unsigned syncsearch OF((unsigned FAR *have, const unsigned char FAR *buf,
+                              unsigned len));
 
-static int inflateStateCheck(z_streamp strm) {
-  struct InflateState *state;
-  if (strm == Z_NULL || strm->zalloc == (alloc_func)0 ||
-      strm->zfree == (free_func)0)
-    return 1;
-  state = (struct InflateState *)strm->state;
-  if (state == Z_NULL || state->strm != strm || state->mode < HEAD ||
-      state->mode > SYNC)
-    return 1;
-  return 0;
+local int inflateStateCheck(strm)
+z_streamp strm;
+{
+    struct inflate_state FAR *state;
+    if (strm == Z_NULL ||
+        strm->zalloc == (alloc_func)0 || strm->zfree == (free_func)0)
+        return 1;
+    state = (struct inflate_state FAR *)strm->state;
+    if (state == Z_NULL || state->strm != strm ||
+        state->mode < HEAD || state->mode > SYNC)
+        return 1;
+    return 0;
 }
 
-int inflateResetKeep(z_streamp strm) {
-  struct InflateState *state;
-  if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
-  state = (struct InflateState *)strm->state;
-  strm->total_in = strm->total_out = state->total = 0;
-  strm->msg = Z_NULL;
-  if (state->wrap) /* to support ill-conceived Java test suite */
-    strm->adler = state->wrap & 1;
-  state->mode = HEAD;
-  state->last = 0;
-  state->havedict = 0;
-  state->dmax = 32768U;
-  state->head = Z_NULL;
-  state->hold = 0;
-  state->bits = 0;
-  state->lencode = state->distcode = state->next = state->codes;
-  state->sane = 1;
-  state->back = -1;
-  Tracev(("inflate: reset\n"));
-  return Z_OK;
-}
+int ZEXPORT inflateResetKeep(strm)
+z_streamp strm;
+{
+    struct inflate_state FAR *state;
 
-int inflateReset(z_streamp strm) {
-  struct InflateState *state;
-  if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
-  state = (struct InflateState *)strm->state;
-  state->wsize = 0;
-  state->whave = 0;
-  state->wnext = 0;
-  return inflateResetKeep(strm);
-}
-
-int inflateReset2(z_streamp strm, int windowBits) {
-  int wrap;
-  struct InflateState *state;
-
-  /* get the state */
-  if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
-  state = (struct InflateState *)strm->state;
-
-  /* extract wrap request from windowBits parameter */
-  if (windowBits < 0) {
-    wrap = 0;
-    windowBits = -windowBits;
-  } else {
-    wrap = (windowBits >> 4) + 5;
-#ifdef GUNZIP
-    if (windowBits < 48) windowBits &= 15;
-#endif
-  }
-
-  /* set number of window bits, free window if different */
-  if (windowBits && (windowBits < 8 || windowBits > 15)) return Z_STREAM_ERROR;
-  if (state->window != Z_NULL && state->wbits != (unsigned)windowBits) {
-    ZFREE(strm, state->window);
-    state->window = Z_NULL;
-  }
-
-  /* update state and reset the rest of it */
-  state->wrap = wrap;
-  state->wbits = (unsigned)windowBits;
-  return inflateReset(strm);
-}
-
-int inflateInit2(z_streamp strm, int windowBits) {
-  int ret;
-  struct InflateState *state;
-  if (strm == Z_NULL) return Z_STREAM_ERROR;
-  strm->msg = Z_NULL; /* in case we return an error */
-  if (strm->zalloc == (alloc_func)0) {
-    strm->zalloc = zcalloc;
-    strm->opaque = (voidpf)0;
-  }
-  if (strm->zfree == (free_func)0) {
-    strm->zfree = zcfree;
-  }
-  state = (struct InflateState *)ZALLOC(strm, 1, sizeof(struct InflateState));
-  if (state == Z_NULL) return Z_MEM_ERROR;
-  Tracev(("inflate: allocated\n"));
-  strm->state = (struct DeflateState *)state;
-  state->strm = strm;
-  state->window = Z_NULL;
-  state->mode = HEAD; /* to pass state test in inflateReset2() */
-  state->check = 1L;  /* 1L is the result of adler32() zero length data */
-  ret = inflateReset2(strm, windowBits);
-  if (ret != Z_OK) {
-    ZFREE(strm, state);
-    strm->state = Z_NULL;
-  }
-  return ret;
-}
-
-int inflatePrime(z_streamp strm, int bits, int value) {
-  struct InflateState *state;
-  if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
-  state = (struct InflateState *)strm->state;
-  if (bits < 0) {
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
+    strm->total_in = strm->total_out = state->total = 0;
+    strm->msg = Z_NULL;
+    if (state->wrap)        /* to support ill-conceived Java test suite */
+        strm->adler = state->wrap & 1;
+    state->mode = HEAD;
+    state->last = 0;
+    state->havedict = 0;
+    state->flags = -1;
+    state->dmax = 32768U;
+    state->head = Z_NULL;
     state->hold = 0;
     state->bits = 0;
+    state->lencode = state->distcode = state->next = state->codes;
+    state->sane = 1;
+    state->back = -1;
+    Tracev(("inflate: reset\n"));
     return Z_OK;
-  }
-  if (bits > 16 || state->bits + (uInt)bits > 32) return Z_STREAM_ERROR;
-  value &= (1L << bits) - 1;
-  state->hold += (unsigned)value << state->bits;
-  state->bits += (uInt)bits;
-  return Z_OK;
 }
 
-/**
- * Returns state with length and distance decoding tables and index
- * sizes set to fixed code decoding. Normally this returns fixed tables
- * from inffixed.h. If BUILDFIXED is defined, then instead this routine
- * builds the tables the first time it's called, and returns those
- * tables the first time and thereafter. This reduces the size of the
- * code by about 2K bytes, in exchange for a little execution time.
- * However, BUILDFIXED should not be used for threaded applications,
- * since the rewriting of the tables and virgin may not be thread-safe.
+int ZEXPORT inflateReset(strm)
+z_streamp strm;
+{
+    struct inflate_state FAR *state;
+
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
+    state->wsize = 0;
+    state->whave = 0;
+    state->wnext = 0;
+    return inflateResetKeep(strm);
+}
+
+int ZEXPORT inflateReset2(strm, windowBits)
+z_streamp strm;
+int windowBits;
+{
+    int wrap;
+    struct inflate_state FAR *state;
+
+    /* get the state */
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
+
+    /* extract wrap request from windowBits parameter */
+    if (windowBits < 0) {
+        wrap = 0;
+        windowBits = -windowBits;
+    }
+    else {
+        wrap = (windowBits >> 4) + 5;
+#ifdef GUNZIP
+        if (windowBits < 48)
+            windowBits &= 15;
+#endif
+    }
+
+    /* set number of window bits, free window if different */
+    if (windowBits && (windowBits < 8 || windowBits > 15))
+        return Z_STREAM_ERROR;
+    if (state->window != Z_NULL && state->wbits != (unsigned)windowBits) {
+        ZFREE(strm, state->window);
+        state->window = Z_NULL;
+    }
+
+    /* update state and reset the rest of it */
+    state->wrap = wrap;
+    state->wbits = (unsigned)windowBits;
+    return inflateReset(strm);
+}
+
+int ZEXPORT inflateInit2(strm, windowBits)
+z_streamp strm;
+int windowBits;
+{
+    int ret;
+    struct inflate_state FAR *state;
+
+    if (strm == Z_NULL) return Z_STREAM_ERROR;
+    strm->msg = Z_NULL;                 /* in case we return an error */
+    if (strm->zalloc == (alloc_func)0) {
+#ifdef Z_SOLO
+        return Z_STREAM_ERROR;
+#else
+        strm->zalloc = zcalloc;
+        strm->opaque = (voidpf)0;
+#endif
+    }
+    if (strm->zfree == (free_func)0)
+#ifdef Z_SOLO
+        return Z_STREAM_ERROR;
+#else
+        strm->zfree = zcfree;
+#endif
+    state = (struct inflate_state FAR *)
+            ZALLOC(strm, 1, sizeof(struct inflate_state));
+    if (state == Z_NULL) return Z_MEM_ERROR;
+    Tracev(("inflate: allocated\n"));
+    strm->state = (struct internal_state FAR *)state;
+    state->strm = strm;
+    state->window = Z_NULL;
+    state->mode = HEAD;     /* to pass state test in inflateReset2() */
+    state->check = 1L;      /* 1L is the result of adler32() zero length data */
+    ret = inflateReset2(strm, windowBits);
+    if (ret != Z_OK) {
+        ZFREE(strm, state);
+        strm->state = Z_NULL;
+    }
+    return ret;
+}
+
+int ZEXPORT inflateInit(strm)
+z_streamp strm;
+{
+    return inflateInit2(strm, DEF_WBITS);
+}
+
+int ZEXPORT inflatePrime(strm, bits, value)
+z_streamp strm;
+int bits;
+int value;
+{
+    struct inflate_state FAR *state;
+
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
+    if (bits < 0) {
+        state->hold = 0;
+        state->bits = 0;
+        return Z_OK;
+    }
+    if (bits > 16 || state->bits + (uInt)bits > 32) return Z_STREAM_ERROR;
+    value &= (1L << bits) - 1;
+    state->hold += (unsigned)value << state->bits;
+    state->bits += (uInt)bits;
+    return Z_OK;
+}
+
+/*
+   Return state with length and distance decoding tables and index sizes set to
+   fixed code decoding.  Normally this returns fixed tables from inffixed.h.
+   If BUILDFIXED is defined, then instead this routine builds the tables the
+   first time it's called, and returns those tables the first time and
+   thereafter.  This reduces the size of the code by about 2K bytes, in
+   exchange for a little execution time.  However, BUILDFIXED should not be
+   used for threaded applications, since the rewriting of the tables and virgin
+   may not be thread-safe.
  */
-static void fixedtables(struct InflateState *state) {
+local void fixedtables(state)
+struct inflate_state FAR *state;
+{
 #ifdef BUILDFIXED
-  static int virgin = 1;
-  static code *lenfix, *distfix;
-  static code fixed[544];
+    static int virgin = 1;
+    static code *lenfix, *distfix;
+    static code fixed[544];
 
-  /* build fixed huffman tables if first call (may not be thread safe) */
-  if (virgin) {
-    unsigned sym, bits;
-    static code *next;
+    /* build fixed huffman tables if first call (may not be thread safe) */
+    if (virgin) {
+        unsigned sym, bits;
+        static code *next;
 
-    /* literal/length table */
-    sym = 0;
-    while (sym < 144) state->lens[sym++] = 8;
-    while (sym < 256) state->lens[sym++] = 9;
-    while (sym < 280) state->lens[sym++] = 7;
-    while (sym < 288) state->lens[sym++] = 8;
-    next = fixed;
-    lenfix = next;
-    bits = 9;
-    inflate_table(LENS, state->lens, 288, &(next), &(bits), state->work);
+        /* literal/length table */
+        sym = 0;
+        while (sym < 144) state->lens[sym++] = 8;
+        while (sym < 256) state->lens[sym++] = 9;
+        while (sym < 280) state->lens[sym++] = 7;
+        while (sym < 288) state->lens[sym++] = 8;
+        next = fixed;
+        lenfix = next;
+        bits = 9;
+        inflate_table(LENS, state->lens, 288, &(next), &(bits), state->work);
 
-    /* distance table */
-    sym = 0;
-    while (sym < 32) state->lens[sym++] = 5;
-    distfix = next;
-    bits = 5;
-    inflate_table(DISTS, state->lens, 32, &(next), &(bits), state->work);
+        /* distance table */
+        sym = 0;
+        while (sym < 32) state->lens[sym++] = 5;
+        distfix = next;
+        bits = 5;
+        inflate_table(DISTS, state->lens, 32, &(next), &(bits), state->work);
 
-    /* do this just once */
-    virgin = 0;
-  }
-  state->lencode = lenfix;
-  state->distcode = distfix;
+        /* do this just once */
+        virgin = 0;
+    }
 #else /* !BUILDFIXED */
-  state->lencode = kZlibLenfix;
-  state->distcode = kZlibDistfix;
+#include "third_party/zlib/inffixed.inc"
 #endif /* BUILDFIXED */
-  state->lenbits = 9;
-  state->distbits = 5;
+    state->lencode = lenfix;
+    state->lenbits = 9;
+    state->distcode = distfix;
+    state->distbits = 5;
 }
 
 #ifdef MAKEFIXED
-#include "libc/stdio/stdio.h"
-/**
- * Writes inffixed.h that is #include'd above. Defining MAKEFIXED also
- * defines BUILDFIXED, so the tables are built on the fly. makefixed()
- * writes those tables to stdout, which would be piped to inffixed.h. A
- * small program can simply call makefixed to do this:
- *
- *   void makefixed(void);
- *
- *   int main(void) {
- *       makefixed();
- *       return 0;
- *   }
- *
- * Then that can be linked with zlib built with MAKEFIXED defined and run:
- *
- *   a.out > inffixed.h
+#include <stdio.h>
+
+/*
+   Write out the inffixed.h that is #include'd above.  Defining MAKEFIXED also
+   defines BUILDFIXED, so the tables are built on the fly.  makefixed() writes
+   those tables to stdout, which would be piped to inffixed.h.  A small program
+   can simply call makefixed to do this:
+
+    void makefixed(void);
+
+    int main(void)
+    {
+        makefixed();
+        return 0;
+    }
+
+   Then that can be linked with zlib built with MAKEFIXED defined and run:
+
+    a.out > inffixed.h
  */
-void makefixed(void) {
-  unsigned low, size;
-  struct InflateState state;
-  fixedtables(&state);
-  puts("    /* inffixed.h -- table for decoding fixed codes");
-  puts("     * Generated automatically by makefixed().");
-  puts("     */");
-  puts("");
-  puts("    /* WARNING: this file should *not* be used by applications.");
-  puts("       It is part of the implementation of this library and is");
-  puts("       subject to change. Applications should only use zlib.h.");
-  puts("     */");
-  puts("");
-  size = 1U << 9;
-  printf("    static const zcode lenfix[%u] = {", size);
-  low = 0;
-  for (;;) {
-    if ((low % 7) == 0) printf("\n        ");
-    printf("{%u,%u,%d}", (low & 127) == 99 ? 64 : state.lencode[low].op,
-           state.lencode[low].bits, state.lencode[low].val);
-    if (++low == size) break;
-    putchar(',');
-  }
-  puts("\n    };");
-  size = 1U << 5;
-  printf("\n    static const zcode distfix[%u] = {", size);
-  low = 0;
-  for (;;) {
-    if ((low % 6) == 0) printf("\n        ");
-    printf("{%u,%u,%d}", state.distcode[low].op, state.distcode[low].bits,
-           state.distcode[low].val);
-    if (++low == size) break;
-    putchar(',');
-  }
-  puts("\n    };");
+void makefixed()
+{
+    unsigned low, size;
+    struct inflate_state state;
+
+    fixedtables(&state);
+    puts("    /* inffixed.h -- table for decoding fixed codes");
+    puts("     * Generated automatically by makefixed().");
+    puts("     */");
+    puts("");
+    puts("    /* WARNING: this file should *not* be used by applications.");
+    puts("       It is part of the implementation of this library and is");
+    puts("       subject to change. Applications should only use zlib.h.");
+    puts("     */");
+    puts("");
+    size = 1U << 9;
+    printf("    static const code lenfix[%u] = {", size);
+    low = 0;
+    for (;;) {
+        if ((low % 7) == 0) printf("\n        ");
+        printf("{%u,%u,%d}", (low & 127) == 99 ? 64 : state.lencode[low].op,
+               state.lencode[low].bits, state.lencode[low].val);
+        if (++low == size) break;
+        putchar(',');
+    }
+    puts("\n    };");
+    size = 1U << 5;
+    printf("\n    static const code distfix[%u] = {", size);
+    low = 0;
+    for (;;) {
+        if ((low % 6) == 0) printf("\n        ");
+        printf("{%u,%u,%d}", state.distcode[low].op, state.distcode[low].bits,
+               state.distcode[low].val);
+        if (++low == size) break;
+        putchar(',');
+    }
+    puts("\n    };");
 }
 #endif /* MAKEFIXED */
 
-/**
- * Updates window with last wsize (normally 32K) bytes written before
- * returning. If window does not exist yet, create it. This is only
- * called when a window is already in use, or when output has been
- * written during this inflate call, but the end of the deflate stream
- * has not been reached yet. It is also called to create a window for
- * dictionary data when a dictionary is loaded.
- *
- * Providing output buffers larger than 32K to inflate() should provide
- * a speed advantage, since only the last 32K of output is copied to the
- * sliding window upon return from inflate(), and since all distances
- * after the first 32K of output will fall in the output data, making
- * match copies simpler and faster. The advantage may be dependent on
- * the size of the processor's data caches.
+/*
+   Update the window with the last wsize (normally 32K) bytes written before
+   returning.  If window does not exist yet, create it.  This is only called
+   when a window is already in use, or when output has been written during this
+   inflate call, but the end of the deflate stream has not been reached yet.
+   It is also called to create a window for dictionary data when a dictionary
+   is loaded.
+
+   Providing output buffers larger than 32K to inflate() should provide a speed
+   advantage, since only the last 32K of output is copied to the sliding window
+   upon return from inflate(), and since all distances after the first 32K of
+   output will fall in the output data, making match copies simpler and faster.
+   The advantage may be dependent on the size of the processor's data caches.
  */
-static int updatewindow(z_streamp strm, const Bytef *end, unsigned copy) {
-  struct InflateState *state;
-  unsigned dist;
+local int updatewindow(strm, end, copy)
+z_streamp strm;
+const Bytef *end;
+unsigned copy;
+{
+    struct inflate_state FAR *state;
+    unsigned dist;
 
-  state = (struct InflateState *)strm->state;
+    state = (struct inflate_state FAR *)strm->state;
 
-  /* if it hasn't been done already, allocate space for the window */
-  if (state->window == Z_NULL) {
-    unsigned wsize = 1U << state->wbits;
-    state->window = (unsigned char *)ZALLOC(strm, wsize + CHUNKCOPY_CHUNK_SIZE,
-                                            sizeof(unsigned char));
-    if (state->window == Z_NULL) return 1;
-#ifdef INFLATE_CLEAR_UNUSED_UNDEFINED
-    /* Copies from the overflow portion of this buffer are undefined and
-       may cause analysis tools to raise a warning if we don't initialize
-       it.  However, this undefined data overwrites other undefined data
-       and is subsequently either overwritten or left deliberately
-       undefined at the end of decode; so there's really no point.
-     */
-    bezro(state->window + wsize, CHUNKCOPY_CHUNK_SIZE);
-#endif
-  }
-
-  /* if window not in use yet, initialize */
-  if (state->wsize == 0) {
-    state->wsize = 1U << state->wbits;
-    state->wnext = 0;
-    state->whave = 0;
-  }
-
-  /* copy state->wsize or less output bytes into the circular window */
-  if (copy >= state->wsize) {
-    memcpy(state->window, end - state->wsize, state->wsize);
-    state->wnext = 0;
-    state->whave = state->wsize;
-  } else {
-    dist = state->wsize - state->wnext;
-    if (dist > copy) dist = copy;
-    memcpy(state->window + state->wnext, end - copy, dist);
-    copy -= dist;
-    if (copy) {
-      memcpy(state->window, end - copy, copy);
-      state->wnext = copy;
-      state->whave = state->wsize;
-    } else {
-      state->wnext += dist;
-      if (state->wnext == state->wsize) state->wnext = 0;
-      if (state->whave < state->wsize) state->whave += dist;
+    /* if it hasn't been done already, allocate space for the window */
+    if (state->window == Z_NULL) {
+        state->window = (unsigned char FAR *)
+                        ZALLOC(strm, 1U << state->wbits,
+                               sizeof(unsigned char));
+        if (state->window == Z_NULL) return 1;
     }
-  }
-  return 0;
+
+    /* if window not in use yet, initialize */
+    if (state->wsize == 0) {
+        state->wsize = 1U << state->wbits;
+        state->wnext = 0;
+        state->whave = 0;
+    }
+
+    /* copy state->wsize or less output bytes into the circular window */
+    if (copy >= state->wsize) {
+        zmemcpy(state->window, end - state->wsize, state->wsize);
+        state->wnext = 0;
+        state->whave = state->wsize;
+    }
+    else {
+        dist = state->wsize - state->wnext;
+        if (dist > copy) dist = copy;
+        zmemcpy(state->window + state->wnext, end - copy, dist);
+        copy -= dist;
+        if (copy) {
+            zmemcpy(state->window, end - copy, copy);
+            state->wnext = copy;
+            state->whave = state->wsize;
+        }
+        else {
+            state->wnext += dist;
+            if (state->wnext == state->wsize) state->wnext = 0;
+            if (state->whave < state->wsize) state->whave += dist;
+        }
+    }
+    return 0;
 }
 
 /* Macros for inflate(): */
 
 /* check function to use adler32() for zlib or crc32() for gzip */
 #ifdef GUNZIP
-#define UPDATE(check, buf, len) \
-  (state->flags ? crc32(check, buf, len) : adler32(check, buf, len))
+#  define UPDATE_CHECK(check, buf, len) \
+    (state->flags ? crc32(check, buf, len) : adler32(check, buf, len))
 #else
-#define UPDATE(check, buf, len) adler32(check, buf, len)
+#  define UPDATE_CHECK(check, buf, len) adler32(check, buf, len)
 #endif
 
 /* check macros for header crc */
 #ifdef GUNZIP
-#define CRC2(check, word)                   \
-  do {                                      \
-    hbuf[0] = (unsigned char)(word);        \
-    hbuf[1] = (unsigned char)((word) >> 8); \
-    check = crc32(check, hbuf, 2);          \
-  } while (0)
+#  define CRC2(check, word) \
+    do { \
+        hbuf[0] = (unsigned char)(word); \
+        hbuf[1] = (unsigned char)((word) >> 8); \
+        check = crc32(check, hbuf, 2); \
+    } while (0)
 
-#define CRC4(check, word)                    \
-  do {                                       \
-    hbuf[0] = (unsigned char)(word);         \
-    hbuf[1] = (unsigned char)((word) >> 8);  \
-    hbuf[2] = (unsigned char)((word) >> 16); \
-    hbuf[3] = (unsigned char)((word) >> 24); \
-    check = crc32(check, hbuf, 4);           \
-  } while (0)
+#  define CRC4(check, word) \
+    do { \
+        hbuf[0] = (unsigned char)(word); \
+        hbuf[1] = (unsigned char)((word) >> 8); \
+        hbuf[2] = (unsigned char)((word) >> 16); \
+        hbuf[3] = (unsigned char)((word) >> 24); \
+        check = crc32(check, hbuf, 4); \
+    } while (0)
 #endif
 
 /* Load registers with state in inflate() for speed */
-#define LOAD()              \
-  do {                      \
-    put = strm->next_out;   \
-    left = strm->avail_out; \
-    next = strm->next_in;   \
-    have = strm->avail_in;  \
-    hold = state->hold;     \
-    bits = state->bits;     \
-  } while (0)
+#define LOAD() \
+    do { \
+        put = strm->next_out; \
+        left = strm->avail_out; \
+        next = strm->next_in; \
+        have = strm->avail_in; \
+        hold = state->hold; \
+        bits = state->bits; \
+    } while (0)
 
 /* Restore state from registers in inflate() */
-#define RESTORE()           \
-  do {                      \
-    strm->next_out = put;   \
-    strm->avail_out = left; \
-    strm->next_in = next;   \
-    strm->avail_in = have;  \
-    state->hold = hold;     \
-    state->bits = bits;     \
-  } while (0)
+#define RESTORE() \
+    do { \
+        strm->next_out = put; \
+        strm->avail_out = left; \
+        strm->next_in = next; \
+        strm->avail_in = have; \
+        state->hold = hold; \
+        state->bits = bits; \
+    } while (0)
 
 /* Clear the input bit accumulator */
 #define INITBITS() \
-  do {             \
-    hold = 0;      \
-    bits = 0;      \
-  } while (0)
+    do { \
+        hold = 0; \
+        bits = 0; \
+    } while (0)
 
 /* Get a byte of input into the bit accumulator, or return from inflate()
    if there is no input available. */
-#define PULLBYTE()                            \
-  do {                                        \
-    if (have == 0) goto inf_leave;            \
-    have--;                                   \
-    hold += (unsigned long)(*next++) << bits; \
-    bits += 8;                                \
-  } while (0)
+#define PULLBYTE() \
+    do { \
+        if (have == 0) goto inf_leave; \
+        have--; \
+        hold += (unsigned long)(*next++) << bits; \
+        bits += 8; \
+    } while (0)
 
 /* Assure that there are at least n bits in the bit accumulator.  If there is
    not enough available input to do that, then return from inflate(). */
-#define NEEDBITS(n)                          \
-  do {                                       \
-    while (bits < (unsigned)(n)) PULLBYTE(); \
-  } while (0)
+#define NEEDBITS(n) \
+    do { \
+        while (bits < (unsigned)(n)) \
+            PULLBYTE(); \
+    } while (0)
 
 /* Return the low n bits of the bit accumulator (n < 16) */
-#define BITS(n) ((unsigned)hold & ((1U << (n)) - 1))
+#define BITS(n) \
+    ((unsigned)hold & ((1U << (n)) - 1))
 
 /* Remove n bits from the bit accumulator */
-#define DROPBITS(n)        \
-  do {                     \
-    hold >>= (n);          \
-    bits -= (unsigned)(n); \
-  } while (0)
+#define DROPBITS(n) \
+    do { \
+        hold >>= (n); \
+        bits -= (unsigned)(n); \
+    } while (0)
 
 /* Remove zero to seven bits as needed to go to a byte boundary */
-#define BYTEBITS()     \
-  do {                 \
-    hold >>= bits & 7; \
-    bits -= bits & 7;  \
-  } while (0)
+#define BYTEBITS() \
+    do { \
+        hold >>= bits & 7; \
+        bits -= bits & 7; \
+    } while (0)
 
-/**
- * inflate() uses a state machine to process as much input data and
- * generate as much output data as possible before returning. The state
- * machine is structured roughly as follows:
- *
- *  for (;;) switch (state) {
- *  ...
- *  case STATEn:
- *      if (not enough input data or output space to make progress)
- *          return;
- *      ... make progress ...
- *      state = STATEm;
- *      break;
- *  ...
- *  }
- *
- * so when inflate() is called again, the same case is attempted again,
- * and if the appropriate resources are provided, the machine proceeds
- * to the next state. The NEEDBITS() macro is usually the way the state
- * evaluates whether it can proceed or should return. NEEDBITS() does
- * the return if the requested bits are not available. The typical use
- * of the BITS macros is:
- *
- *      NEEDBITS(n);
- *      ... do something with BITS(n) ...
- *      DROPBITS(n);
- *
- * where NEEDBITS(n) either returns from inflate() if there isn't enough
- * input left to load n bits into the accumulator, or it continues.
- * BITS(n) gives the low n bits in the accumulator. When done,
- * DROPBITS(n) drops the low n bits off the accumulator. INITBITS()
- * clears the accumulator and sets the number of available bits to zero.
- * BYTEBITS() discards just enough bits to put the accumulator on a byte
- * boundary. After BYTEBITS() and a NEEDBITS(8), then BITS(8) would
- * return the next byte in the stream.
- *
- * NEEDBITS(n) uses PULLBYTE() to get an available byte of input, or to
- * return if there is no input available. The decoding of variable
- * length codes uses PULLBYTE() directly in order to pull just enough
- * bytes to decode the next code, and no more.
- *
- * Some states loop until they get enough input, making sure that enough
- * state information is maintained to continue the loop where it left
- * off if NEEDBITS() returns in the loop. For example, want, need, and
- * keep would all have to actually be part of the saved state in case
- * NEEDBITS() returns:
- *
- *  case STATEw:
- *      while (want < need) {
- *          NEEDBITS(n);
- *          keep[want++] = BITS(n);
- *          DROPBITS(n);
- *      }
- *      state = STATEx;
- *  case STATEx:
- *
- * As shown above, if the next state is also the next case, then the
- * break is omitted.
- *
- * A state may also return if there is not enough output space available
- * to complete that state. Those states are copying stored data, writing
- * a literal byte, and copying a matching string.
- *
- * When returning, a "goto inf_leave" is used to update the total
- * counters, update the check value, and determine whether any progress
- * has been made during that inflate() call in order to return the
- * proper return code. Progress is defined as a change in either
- * strm->avail_in or strm->avail_out. When there is a window, goto
- * inf_leave will update the window with the last output written. If a
- * goto inf_leave occurs in the middle of decompression and there is no
- * window currently, goto inf_leave will create one and copy output to
- * the window for the next call of inflate().
- *
- * In this implementation, the flush parameter of inflate() only affects
- * the return code (per zlib.h). inflate() always writes as much as
- * possible to strm->next_out, given the space available and the
- * provided input--the effect documented in zlib.h of Z_SYNC_FLUSH.
- * Furthermore, inflate() always defers the allocation of and copying
- * into a sliding window until necessary, which provides the effect
- * documented in zlib.h for Z_FINISH when the entire input stream
- * available. So the only thing the flush parameter actually does is:
- * when flush is set to Z_FINISH, inflate() cannot return Z_OK. Instead
- * it will return Z_BUF_ERROR if it has not reached the end of the
- * stream.
+/*
+   inflate() uses a state machine to process as much input data and generate as
+   much output data as possible before returning.  The state machine is
+   structured roughly as follows:
+
+    for (;;) switch (state) {
+    ...
+    case STATEn:
+        if (not enough input data or output space to make progress)
+            return;
+        ... make progress ...
+        state = STATEm;
+        break;
+    ...
+    }
+
+   so when inflate() is called again, the same case is attempted again, and
+   if the appropriate resources are provided, the machine proceeds to the
+   next state.  The NEEDBITS() macro is usually the way the state evaluates
+   whether it can proceed or should return.  NEEDBITS() does the return if
+   the requested bits are not available.  The typical use of the BITS macros
+   is:
+
+        NEEDBITS(n);
+        ... do something with BITS(n) ...
+        DROPBITS(n);
+
+   where NEEDBITS(n) either returns from inflate() if there isn't enough
+   input left to load n bits into the accumulator, or it continues.  BITS(n)
+   gives the low n bits in the accumulator.  When done, DROPBITS(n) drops
+   the low n bits off the accumulator.  INITBITS() clears the accumulator
+   and sets the number of available bits to zero.  BYTEBITS() discards just
+   enough bits to put the accumulator on a byte boundary.  After BYTEBITS()
+   and a NEEDBITS(8), then BITS(8) would return the next byte in the stream.
+
+   NEEDBITS(n) uses PULLBYTE() to get an available byte of input, or to return
+   if there is no input available.  The decoding of variable length codes uses
+   PULLBYTE() directly in order to pull just enough bytes to decode the next
+   code, and no more.
+
+   Some states loop until they get enough input, making sure that enough
+   state information is maintained to continue the loop where it left off
+   if NEEDBITS() returns in the loop.  For example, want, need, and keep
+   would all have to actually be part of the saved state in case NEEDBITS()
+   returns:
+
+    case STATEw:
+        while (want < need) {
+            NEEDBITS(n);
+            keep[want++] = BITS(n);
+            DROPBITS(n);
+        }
+        state = STATEx;
+    case STATEx:
+
+   As shown above, if the next state is also the next case, then the break
+   is omitted.
+
+   A state may also return if there is not enough output space available to
+   complete that state.  Those states are copying stored data, writing a
+   literal byte, and copying a matching string.
+
+   When returning, a "goto inf_leave" is used to update the total counters,
+   update the check value, and determine whether any progress has been made
+   during that inflate() call in order to return the proper return code.
+   Progress is defined as a change in either strm->avail_in or strm->avail_out.
+   When there is a window, goto inf_leave will update the window with the last
+   output written.  If a goto inf_leave occurs in the middle of decompression
+   and there is no window currently, goto inf_leave will create one and copy
+   output to the window for the next call of inflate().
+
+   In this implementation, the flush parameter of inflate() only affects the
+   return code (per zlib.h).  inflate() always writes as much as possible to
+   strm->next_out, given the space available and the provided input--the effect
+   documented in zlib.h of Z_SYNC_FLUSH.  Furthermore, inflate() always defers
+   the allocation of and copying into a sliding window until necessary, which
+   provides the effect documented in zlib.h for Z_FINISH when the entire input
+   stream available.  So the only thing the flush parameter actually does is:
+   when flush is set to Z_FINISH, inflate() cannot return Z_OK.  Instead it
+   will return Z_BUF_ERROR if it has not reached the end of the stream.
  */
-int inflate(z_streamp strm, int flush) {
-  struct InflateState *state;
-  const unsigned char *next; /* next input */
-  unsigned char *put;        /* next output */
-  unsigned have, left;       /* available input and output */
-  unsigned long hold;        /* bit buffer */
-  unsigned bits;             /* bits in bit buffer */
-  unsigned in, out;          /* save starting available input and output */
-  unsigned copy;             /* number of stored or match bytes to copy */
-  unsigned char *from;       /* where to copy match bytes from */
-  struct zcode here;         /* current decoding table entry */
-  struct zcode last;         /* parent table entry */
-  unsigned len;              /* length to copy for repeats, bits to drop */
-  int ret;                   /* return code */
-#ifdef GUNZIP
-  unsigned char hbuf[4]; /* buffer for gzip header crc calculation */
-#endif
 
-  if (inflateStateCheck(strm) || strm->next_out == Z_NULL ||
-      (strm->next_in == Z_NULL && strm->avail_in != 0)) {
-    return Z_STREAM_ERROR;
-  }
-  state = (struct InflateState *)strm->state;
-  if (state->mode == TYPE) state->mode = TYPEDO; /* skip check */
-  LOAD();
-  in = have;
-  out = left;
-  ret = Z_OK;
-  for (;;) {
-    switch (state->mode) {
-      case HEAD:
-        if (state->wrap == 0) {
-          state->mode = TYPEDO;
-          break;
-        }
-        NEEDBITS(16);
+int ZEXPORT inflate(strm, flush)
+z_streamp strm;
+int flush;
+{
+    struct inflate_state FAR *state;
+    z_const unsigned char FAR *next;    /* next input */
+    unsigned char FAR *put;     /* next output */
+    unsigned have, left;        /* available input and output */
+    unsigned long hold;         /* bit buffer */
+    unsigned bits;              /* bits in bit buffer */
+    unsigned in, out;           /* save starting available input and output */
+    unsigned copy;              /* number of stored or match bytes to copy */
+    unsigned char FAR *from;    /* where to copy match bytes from */
+    code here;                  /* current decoding table entry */
+    code last;                  /* parent table entry */
+    unsigned len;               /* length to copy for repeats, bits to drop */
+    int ret;                    /* return code */
 #ifdef GUNZIP
-        if ((state->wrap & 2) && hold == 0x8b1f) { /* gzip header */
-          if (state->wbits == 0) state->wbits = 15;
-          state->check = crc32(0L, Z_NULL, 0);
-          CRC2(state->check, hold);
-          INITBITS();
-          state->mode = FLAGS;
-          break;
-        }
-        state->flags = 0; /* expect zlib header */
-        if (state->head != Z_NULL) state->head->done = -1;
-        if (!(state->wrap & 1) || /* check if zlib header allowed */
+    unsigned char hbuf[4];      /* buffer for gzip header crc calculation */
+#endif
+    static const unsigned short order[19] = /* permutation of code lengths */
+        {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
+
+    if (inflateStateCheck(strm) || strm->next_out == Z_NULL ||
+        (strm->next_in == Z_NULL && strm->avail_in != 0))
+        return Z_STREAM_ERROR;
+
+    state = (struct inflate_state FAR *)strm->state;
+    if (state->mode == TYPE) state->mode = TYPEDO;      /* skip check */
+    LOAD();
+    in = have;
+    out = left;
+    ret = Z_OK;
+    for (;;)
+        switch (state->mode) {
+        case HEAD:
+            if (state->wrap == 0) {
+                state->mode = TYPEDO;
+                break;
+            }
+            NEEDBITS(16);
+#ifdef GUNZIP
+            if ((state->wrap & 2) && hold == 0x8b1f) {  /* gzip header */
+                if (state->wbits == 0)
+                    state->wbits = 15;
+                state->check = crc32(0L, Z_NULL, 0);
+                CRC2(state->check, hold);
+                INITBITS();
+                state->mode = FLAGS;
+                break;
+            }
+            if (state->head != Z_NULL)
+                state->head->done = -1;
+            if (!(state->wrap & 1) ||   /* check if zlib header allowed */
 #else
-        if (
+            if (
 #endif
-            ((BITS(8) << 8) + (hold >> 8)) % 31) {
-          strm->msg = (char *)"incorrect header check";
-          state->mode = BAD;
-          break;
-        }
-        if (BITS(4) != Z_DEFLATED) {
-          strm->msg = (char *)"unknown compression method";
-          state->mode = BAD;
-          break;
-        }
-        DROPBITS(4);
-        len = BITS(4) + 8;
-        if (state->wbits == 0) state->wbits = len;
-        if (len > 15 || len > state->wbits) {
-          strm->msg = (char *)"invalid window size";
-          state->mode = BAD;
-          break;
-        }
-        state->dmax = 1U << len;
-        Tracev(("inflate:   zlib header ok\n"));
-        strm->adler = state->check = adler32(0L, Z_NULL, 0);
-        state->mode = hold & 0x200 ? DICTID : TYPE;
-        INITBITS();
-        break;
-#ifdef GUNZIP
-      case FLAGS:
-        NEEDBITS(16);
-        state->flags = (int)(hold);
-        if ((state->flags & 0xff) != Z_DEFLATED) {
-          strm->msg = (char *)"unknown compression method";
-          state->mode = BAD;
-          break;
-        }
-        if (state->flags & 0xe000) {
-          strm->msg = (char *)"unknown header flags set";
-          state->mode = BAD;
-          break;
-        }
-        if (state->head != Z_NULL) state->head->text = (int)((hold >> 8) & 1);
-        if ((state->flags & 0x0200) && (state->wrap & 4))
-          CRC2(state->check, hold);
-        INITBITS();
-        state->mode = TIME;
-      case TIME:
-        NEEDBITS(32);
-        if (state->head != Z_NULL) state->head->time = hold;
-        if ((state->flags & 0x0200) && (state->wrap & 4))
-          CRC4(state->check, hold);
-        INITBITS();
-        state->mode = OS;
-      case OS:
-        NEEDBITS(16);
-        if (state->head != Z_NULL) {
-          state->head->xflags = (int)(hold & 0xff);
-          state->head->os = (int)(hold >> 8);
-        }
-        if ((state->flags & 0x0200) && (state->wrap & 4))
-          CRC2(state->check, hold);
-        INITBITS();
-        state->mode = EXLEN;
-      case EXLEN:
-        if (state->flags & 0x0400) {
-          NEEDBITS(16);
-          state->length = (unsigned)(hold);
-          if (state->head != Z_NULL) state->head->extra_len = (unsigned)hold;
-          if ((state->flags & 0x0200) && (state->wrap & 4))
-            CRC2(state->check, hold);
-          INITBITS();
-        } else if (state->head != Z_NULL)
-          state->head->extra = Z_NULL;
-        state->mode = EXTRA;
-      case EXTRA:
-        if (state->flags & 0x0400) {
-          copy = state->length;
-          if (copy > have) copy = have;
-          if (copy) {
-            if (state->head != Z_NULL && state->head->extra != Z_NULL &&
-                (len = state->head->extra_len - state->length) <
-                    state->head->extra_max) {
-              memcpy(state->head->extra + len, next,
-                     len + copy > state->head->extra_max
-                         ? state->head->extra_max - len
-                         : copy);
-            }
-            if ((state->flags & 0x0200) && (state->wrap & 4))
-              state->check = crc32(state->check, next, copy);
-            have -= copy;
-            next += copy;
-            state->length -= copy;
-          }
-          if (state->length) goto inf_leave;
-        }
-        state->length = 0;
-        state->mode = NAME;
-        /* fallthrough */
-      case NAME:
-        if (state->flags & 0x0800) {
-          if (have == 0) goto inf_leave;
-          copy = 0;
-          do {
-            len = (unsigned)(next[copy++]);
-            if (state->head != Z_NULL && state->head->name != Z_NULL &&
-                state->length < state->head->name_max)
-              state->head->name[state->length++] = (Bytef)len;
-          } while (len && copy < have);
-          if ((state->flags & 0x0200) && (state->wrap & 4))
-            state->check = crc32(state->check, next, copy);
-          have -= copy;
-          next += copy;
-          if (len) goto inf_leave;
-        } else if (state->head != Z_NULL)
-          state->head->name = Z_NULL;
-        state->length = 0;
-        state->mode = COMMENT;
-      case COMMENT:
-        if (state->flags & 0x1000) {
-          if (have == 0) goto inf_leave;
-          copy = 0;
-          do {
-            len = (unsigned)(next[copy++]);
-            if (state->head != Z_NULL && state->head->comment != Z_NULL &&
-                state->length < state->head->comm_max)
-              state->head->comment[state->length++] = (Bytef)len;
-          } while (len && copy < have);
-          if ((state->flags & 0x0200) && (state->wrap & 4))
-            state->check = crc32(state->check, next, copy);
-          have -= copy;
-          next += copy;
-          if (len) goto inf_leave;
-        } else if (state->head != Z_NULL)
-          state->head->comment = Z_NULL;
-        state->mode = HCRC;
-      case HCRC:
-        if (state->flags & 0x0200) {
-          NEEDBITS(16);
-          if ((state->wrap & 4) && hold != (state->check & 0xffff)) {
-            strm->msg = (char *)"header crc mismatch";
-            state->mode = BAD;
-            break;
-          }
-          INITBITS();
-        }
-        if (state->head != Z_NULL) {
-          state->head->hcrc = (int)((state->flags >> 9) & 1);
-          state->head->done = 1;
-        }
-        strm->adler = state->check = crc32(0L, Z_NULL, 0);
-        state->mode = TYPE;
-        break;
-#endif
-      case DICTID:
-        NEEDBITS(32);
-        strm->adler = state->check = ZSWAP32(hold);
-        INITBITS();
-        state->mode = DICT;
-      case DICT:
-        if (state->havedict == 0) {
-          RESTORE();
-          return Z_NEED_DICT;
-        }
-        strm->adler = state->check = adler32(0L, Z_NULL, 0);
-        state->mode = TYPE;
-      case TYPE:
-        if (flush == Z_BLOCK || flush == Z_TREES) goto inf_leave;
-      case TYPEDO:
-        if (state->last) {
-          BYTEBITS();
-          state->mode = CHECK;
-          break;
-        }
-        NEEDBITS(3);
-        state->last = BITS(1);
-        DROPBITS(1);
-        switch (BITS(2)) {
-          case 0: /* stored block */
-            Tracev(("inflate:     stored block%s\n",
-                    state->last ? " (last)" : ""));
-            state->mode = STORED;
-            break;
-          case 1: /* fixed block */
-            fixedtables(state);
-            Tracev(("inflate:     fixed codes block%s\n",
-                    state->last ? " (last)" : ""));
-            state->mode = LEN_; /* decode codes */
-            if (flush == Z_TREES) {
-              DROPBITS(2);
-              goto inf_leave;
-            }
-            break;
-          case 2: /* dynamic block */
-            Tracev(("inflate:     dynamic codes block%s\n",
-                    state->last ? " (last)" : ""));
-            state->mode = TABLE;
-            break;
-          case 3:
-            strm->msg = (char *)"invalid block type";
-            state->mode = BAD;
-        }
-        DROPBITS(2);
-        break;
-      case STORED:
-        BYTEBITS(); /* go to byte boundary */
-        NEEDBITS(32);
-        if ((hold & 0xffff) != ((hold >> 16) ^ 0xffff)) {
-          strm->msg = (char *)"invalid stored block lengths";
-          state->mode = BAD;
-          break;
-        }
-        state->length = (unsigned)hold & 0xffff;
-        Tracev(("inflate:       stored length %u\n", state->length));
-        INITBITS();
-        state->mode = COPY_;
-        if (flush == Z_TREES) goto inf_leave;
-      case COPY_:
-        state->mode = COPY;
-      case COPY:
-        copy = state->length;
-        if (copy) {
-          if (copy > have) copy = have;
-          if (copy > left) copy = left;
-          if (copy == 0) goto inf_leave;
-          memcpy(put, next, copy);
-          have -= copy;
-          next += copy;
-          left -= copy;
-          put += copy;
-          state->length -= copy;
-          break;
-        }
-        Tracev(("inflate:       stored end\n"));
-        state->mode = TYPE;
-        break;
-      case TABLE:
-        NEEDBITS(14);
-        state->nlen = BITS(5) + 257;
-        DROPBITS(5);
-        state->ndist = BITS(5) + 1;
-        DROPBITS(5);
-        state->ncode = BITS(4) + 4;
-        DROPBITS(4);
-#ifndef PKZIP_BUG_WORKAROUND
-        if (state->nlen > 286 || state->ndist > 30) {
-          strm->msg = (char *)"too many length or distance symbols";
-          state->mode = BAD;
-          break;
-        }
-#endif
-        Tracev(("inflate:       table sizes ok\n"));
-        state->have = 0;
-        state->mode = LENLENS;
-      case LENLENS:
-        while (state->have < state->ncode) {
-          NEEDBITS(3);
-          state->lens[kZlibDeflateOrder[state->have++]] =
-              (unsigned short)BITS(3);
-          DROPBITS(3);
-        }
-        while (state->have < 19) {
-          state->lens[kZlibDeflateOrder[state->have++]] = 0;
-        }
-        state->next = state->codes;
-        state->lencode = (const struct zcode *)(state->next);
-        state->lenbits = 7;
-        ret = inflate_table(CODES, state->lens, 19, &(state->next),
-                            &(state->lenbits), state->work);
-        if (ret) {
-          strm->msg = (char *)"invalid code lengths set";
-          state->mode = BAD;
-          break;
-        }
-        Tracev(("inflate:       code lengths ok\n"));
-        state->have = 0;
-        state->mode = CODELENS;
-      case CODELENS:
-        while (state->have < state->nlen + state->ndist) {
-          for (;;) {
-            here = state->lencode[BITS(state->lenbits)];
-            if ((unsigned)(here.bits) <= bits) break;
-            PULLBYTE();
-          }
-          if (here.val < 16) {
-            DROPBITS(here.bits);
-            state->lens[state->have++] = here.val;
-          } else {
-            if (here.val == 16) {
-              NEEDBITS(here.bits + 2);
-              DROPBITS(here.bits);
-              if (state->have == 0) {
-                strm->msg = (char *)"invalid bit length repeat";
+                ((BITS(8) << 8) + (hold >> 8)) % 31) {
+                strm->msg = (char *)"incorrect header check";
                 state->mode = BAD;
                 break;
-              }
-              len = state->lens[state->have - 1];
-              copy = 3 + BITS(2);
-              DROPBITS(2);
-            } else if (here.val == 17) {
-              NEEDBITS(here.bits + 3);
-              DROPBITS(here.bits);
-              len = 0;
-              copy = 3 + BITS(3);
-              DROPBITS(3);
-            } else {
-              NEEDBITS(here.bits + 7);
-              DROPBITS(here.bits);
-              len = 0;
-              copy = 11 + BITS(7);
-              DROPBITS(7);
             }
-            if (state->have + copy > state->nlen + state->ndist) {
-              strm->msg = (char *)"invalid bit length repeat";
-              state->mode = BAD;
-              break;
+            if (BITS(4) != Z_DEFLATED) {
+                strm->msg = (char *)"unknown compression method";
+                state->mode = BAD;
+                break;
             }
-            while (copy--) state->lens[state->have++] = (unsigned short)len;
-          }
-        }
-
-        /* handle error breaks in while */
-        if (state->mode == BAD) break;
-
-        /* check for end-of-block code (better have one) */
-        if (state->lens[256] == 0) {
-          strm->msg = (char *)"invalid code -- missing end-of-block";
-          state->mode = BAD;
-          break;
-        }
-
-        /* build code tables -- note: do not change the lenbits or distbits
-           values here (9 and 6) without reading the comments in inftrees.h
-           concerning the ENOUGH constants, which depend on those values */
-        state->next = state->codes;
-        state->lencode = (const struct zcode *)(state->next);
-        state->lenbits = 9;
-        ret = inflate_table(LENS, state->lens, state->nlen, &(state->next),
-                            &(state->lenbits), state->work);
-        if (ret) {
-          strm->msg = (char *)"invalid literal/lengths set";
-          state->mode = BAD;
-          break;
-        }
-        state->distcode = (const struct zcode *)(state->next);
-        state->distbits = 6;
-        ret = inflate_table(DISTS, state->lens + state->nlen, state->ndist,
-                            &(state->next), &(state->distbits), state->work);
-        if (ret) {
-          strm->msg = (char *)"invalid distances set";
-          state->mode = BAD;
-          break;
-        }
-        Tracev(("inflate:       codes ok\n"));
-        state->mode = LEN_;
-        if (flush == Z_TREES) goto inf_leave;
-      case LEN_:
-        state->mode = LEN;
-      case LEN:
-        if (have >= INFLATE_FAST_MIN_INPUT && left >= INFLATE_FAST_MIN_OUTPUT) {
-          RESTORE();
-          inflate_fast_chunk(strm, out);
-          LOAD();
-          if (state->mode == TYPE) state->back = -1;
-          break;
-        }
-        state->back = 0;
-        for (;;) {
-          here = state->lencode[BITS(state->lenbits)];
-          if ((unsigned)(here.bits) <= bits) break;
-          PULLBYTE();
-        }
-        if (here.op && (here.op & 0xf0) == 0) {
-          last = here;
-          for (;;) {
-            here = state->lencode[last.val +
-                                  (BITS(last.bits + last.op) >> last.bits)];
-            if ((unsigned)(last.bits + here.bits) <= bits) break;
-            PULLBYTE();
-          }
-          DROPBITS(last.bits);
-          state->back += last.bits;
-        }
-        DROPBITS(here.bits);
-        state->back += here.bits;
-        state->length = (unsigned)here.val;
-        if ((int)(here.op) == 0) {
-          Tracevv((here.val >= 0x20 && here.val < 0x7f
-                       ? "inflate:         literal '%c'\n"
-                       : "inflate:         literal 0x%02x\n",
-                   here.val));
-          state->mode = LIT;
-          break;
-        }
-        if (here.op & 32) {
-          Tracevv(("inflate:         end of block\n"));
-          state->back = -1;
-          state->mode = TYPE;
-          break;
-        }
-        if (here.op & 64) {
-          strm->msg = (char *)"invalid literal/length code";
-          state->mode = BAD;
-          break;
-        }
-        state->extra = (unsigned)(here.op) & 15;
-        state->mode = LENEXT;
-      case LENEXT:
-        if (state->extra) {
-          NEEDBITS(state->extra);
-          state->length += BITS(state->extra);
-          DROPBITS(state->extra);
-          state->back += state->extra;
-        }
-        Tracevv(("inflate:         length %u\n", state->length));
-        state->was = state->length;
-        state->mode = DIST;
-      case DIST:
-        for (;;) {
-          here = state->distcode[BITS(state->distbits)];
-          if ((unsigned)(here.bits) <= bits) break;
-          PULLBYTE();
-        }
-        if ((here.op & 0xf0) == 0) {
-          last = here;
-          for (;;) {
-            here = state->distcode[last.val +
-                                   (BITS(last.bits + last.op) >> last.bits)];
-            if ((unsigned)(last.bits + here.bits) <= bits) break;
-            PULLBYTE();
-          }
-          DROPBITS(last.bits);
-          state->back += last.bits;
-        }
-        DROPBITS(here.bits);
-        state->back += here.bits;
-        if (here.op & 64) {
-          strm->msg = (char *)"invalid distance code";
-          state->mode = BAD;
-          break;
-        }
-        state->offset = (unsigned)here.val;
-        state->extra = (unsigned)(here.op) & 15;
-        state->mode = DISTEXT;
-      case DISTEXT:
-        if (state->extra) {
-          NEEDBITS(state->extra);
-          state->offset += BITS(state->extra);
-          DROPBITS(state->extra);
-          state->back += state->extra;
-        }
-#ifdef INFLATE_STRICT
-        if (state->offset > state->dmax) {
-          strm->msg = (char *)"invalid distance too far back";
-          state->mode = BAD;
-          break;
-        }
+            DROPBITS(4);
+            len = BITS(4) + 8;
+            if (state->wbits == 0)
+                state->wbits = len;
+            if (len > 15 || len > state->wbits) {
+                strm->msg = (char *)"invalid window size";
+                state->mode = BAD;
+                break;
+            }
+            state->dmax = 1U << len;
+            state->flags = 0;               /* indicate zlib header */
+            Tracev(("inflate:   zlib header ok\n"));
+            strm->adler = state->check = adler32(0L, Z_NULL, 0);
+            state->mode = hold & 0x200 ? DICTID : TYPE;
+            INITBITS();
+            break;
+#ifdef GUNZIP
+        case FLAGS:
+            NEEDBITS(16);
+            state->flags = (int)(hold);
+            if ((state->flags & 0xff) != Z_DEFLATED) {
+                strm->msg = (char *)"unknown compression method";
+                state->mode = BAD;
+                break;
+            }
+            if (state->flags & 0xe000) {
+                strm->msg = (char *)"unknown header flags set";
+                state->mode = BAD;
+                break;
+            }
+            if (state->head != Z_NULL)
+                state->head->text = (int)((hold >> 8) & 1);
+            if ((state->flags & 0x0200) && (state->wrap & 4))
+                CRC2(state->check, hold);
+            INITBITS();
+            state->mode = TIME;
+                /* fallthrough */
+        case TIME:
+            NEEDBITS(32);
+            if (state->head != Z_NULL)
+                state->head->time = hold;
+            if ((state->flags & 0x0200) && (state->wrap & 4))
+                CRC4(state->check, hold);
+            INITBITS();
+            state->mode = OS;
+                /* fallthrough */
+        case OS:
+            NEEDBITS(16);
+            if (state->head != Z_NULL) {
+                state->head->xflags = (int)(hold & 0xff);
+                state->head->os = (int)(hold >> 8);
+            }
+            if ((state->flags & 0x0200) && (state->wrap & 4))
+                CRC2(state->check, hold);
+            INITBITS();
+            state->mode = EXLEN;
+                /* fallthrough */
+        case EXLEN:
+            if (state->flags & 0x0400) {
+                NEEDBITS(16);
+                state->length = (unsigned)(hold);
+                if (state->head != Z_NULL)
+                    state->head->extra_len = (unsigned)hold;
+                if ((state->flags & 0x0200) && (state->wrap & 4))
+                    CRC2(state->check, hold);
+                INITBITS();
+            }
+            else if (state->head != Z_NULL)
+                state->head->extra = Z_NULL;
+            state->mode = EXTRA;
+                /* fallthrough */
+        case EXTRA:
+            if (state->flags & 0x0400) {
+                copy = state->length;
+                if (copy > have) copy = have;
+                if (copy) {
+                    if (state->head != Z_NULL &&
+                        state->head->extra != Z_NULL &&
+                        (len = state->head->extra_len - state->length) <
+                            state->head->extra_max) {
+                        zmemcpy(state->head->extra + len, next,
+                                len + copy > state->head->extra_max ?
+                                state->head->extra_max - len : copy);
+                    }
+                    if ((state->flags & 0x0200) && (state->wrap & 4))
+                        state->check = crc32(state->check, next, copy);
+                    have -= copy;
+                    next += copy;
+                    state->length -= copy;
+                }
+                if (state->length) goto inf_leave;
+            }
+            state->length = 0;
+            state->mode = NAME;
+                /* fallthrough */
+        case NAME:
+            if (state->flags & 0x0800) {
+                if (have == 0) goto inf_leave;
+                copy = 0;
+                do {
+                    len = (unsigned)(next[copy++]);
+                    if (state->head != Z_NULL &&
+                            state->head->name != Z_NULL &&
+                            state->length < state->head->name_max)
+                        state->head->name[state->length++] = (Bytef)len;
+                } while (len && copy < have);
+                if ((state->flags & 0x0200) && (state->wrap & 4))
+                    state->check = crc32(state->check, next, copy);
+                have -= copy;
+                next += copy;
+                if (len) goto inf_leave;
+            }
+            else if (state->head != Z_NULL)
+                state->head->name = Z_NULL;
+            state->length = 0;
+            state->mode = COMMENT;
+                /* fallthrough */
+        case COMMENT:
+            if (state->flags & 0x1000) {
+                if (have == 0) goto inf_leave;
+                copy = 0;
+                do {
+                    len = (unsigned)(next[copy++]);
+                    if (state->head != Z_NULL &&
+                            state->head->comment != Z_NULL &&
+                            state->length < state->head->comm_max)
+                        state->head->comment[state->length++] = (Bytef)len;
+                } while (len && copy < have);
+                if ((state->flags & 0x0200) && (state->wrap & 4))
+                    state->check = crc32(state->check, next, copy);
+                have -= copy;
+                next += copy;
+                if (len) goto inf_leave;
+            }
+            else if (state->head != Z_NULL)
+                state->head->comment = Z_NULL;
+            state->mode = HCRC;
+                /* fallthrough */
+        case HCRC:
+            if (state->flags & 0x0200) {
+                NEEDBITS(16);
+                if ((state->wrap & 4) && hold != (state->check & 0xffff)) {
+                    strm->msg = (char *)"header crc mismatch";
+                    state->mode = BAD;
+                    break;
+                }
+                INITBITS();
+            }
+            if (state->head != Z_NULL) {
+                state->head->hcrc = (int)((state->flags >> 9) & 1);
+                state->head->done = 1;
+            }
+            strm->adler = state->check = crc32(0L, Z_NULL, 0);
+            state->mode = TYPE;
+            break;
 #endif
-        Tracevv(("inflate:         distance %u\n", state->offset));
-        state->mode = MATCH;
-      case MATCH:
-        if (left == 0) goto inf_leave;
-        copy = out - left;
-        if (state->offset > copy) { /* copy from window */
-          copy = state->offset - copy;
-          if (copy > state->whave) {
-            if (state->sane) {
-              strm->msg = (char *)"invalid distance too far back";
-              state->mode = BAD;
-              break;
+        case DICTID:
+            NEEDBITS(32);
+            strm->adler = state->check = ZSWAP32(hold);
+            INITBITS();
+            state->mode = DICT;
+                /* fallthrough */
+        case DICT:
+            if (state->havedict == 0) {
+                RESTORE();
+                return Z_NEED_DICT;
             }
+            strm->adler = state->check = adler32(0L, Z_NULL, 0);
+            state->mode = TYPE;
+                /* fallthrough */
+        case TYPE:
+            if (flush == Z_BLOCK || flush == Z_TREES) goto inf_leave;
+                /* fallthrough */
+        case TYPEDO:
+            if (state->last) {
+                BYTEBITS();
+                state->mode = CHECK;
+                break;
+            }
+            NEEDBITS(3);
+            state->last = BITS(1);
+            DROPBITS(1);
+            switch (BITS(2)) {
+            case 0:                             /* stored block */
+                Tracev(("inflate:     stored block%s\n",
+                        state->last ? " (last)" : ""));
+                state->mode = STORED;
+                break;
+            case 1:                             /* fixed block */
+                fixedtables(state);
+                Tracev(("inflate:     fixed codes block%s\n",
+                        state->last ? " (last)" : ""));
+                state->mode = LEN_;             /* decode codes */
+                if (flush == Z_TREES) {
+                    DROPBITS(2);
+                    goto inf_leave;
+                }
+                break;
+            case 2:                             /* dynamic block */
+                Tracev(("inflate:     dynamic codes block%s\n",
+                        state->last ? " (last)" : ""));
+                state->mode = TABLE;
+                break;
+            case 3:
+                strm->msg = (char *)"invalid block type";
+                state->mode = BAD;
+            }
+            DROPBITS(2);
+            break;
+        case STORED:
+            BYTEBITS();                         /* go to byte boundary */
+            NEEDBITS(32);
+            if ((hold & 0xffff) != ((hold >> 16) ^ 0xffff)) {
+                strm->msg = (char *)"invalid stored block lengths";
+                state->mode = BAD;
+                break;
+            }
+            state->length = (unsigned)hold & 0xffff;
+            Tracev(("inflate:       stored length %u\n",
+                    state->length));
+            INITBITS();
+            state->mode = COPY_;
+            if (flush == Z_TREES) goto inf_leave;
+                /* fallthrough */
+        case COPY_:
+            state->mode = COPY;
+                /* fallthrough */
+        case COPY:
+            copy = state->length;
+            if (copy) {
+                if (copy > have) copy = have;
+                if (copy > left) copy = left;
+                if (copy == 0) goto inf_leave;
+                zmemcpy(put, next, copy);
+                have -= copy;
+                next += copy;
+                left -= copy;
+                put += copy;
+                state->length -= copy;
+                break;
+            }
+            Tracev(("inflate:       stored end\n"));
+            state->mode = TYPE;
+            break;
+        case TABLE:
+            NEEDBITS(14);
+            state->nlen = BITS(5) + 257;
+            DROPBITS(5);
+            state->ndist = BITS(5) + 1;
+            DROPBITS(5);
+            state->ncode = BITS(4) + 4;
+            DROPBITS(4);
+#ifndef PKZIP_BUG_WORKAROUND
+            if (state->nlen > 286 || state->ndist > 30) {
+                strm->msg = (char *)"too many length or distance symbols";
+                state->mode = BAD;
+                break;
+            }
+#endif
+            Tracev(("inflate:       table sizes ok\n"));
+            state->have = 0;
+            state->mode = LENLENS;
+                /* fallthrough */
+        case LENLENS:
+            while (state->have < state->ncode) {
+                NEEDBITS(3);
+                state->lens[order[state->have++]] = (unsigned short)BITS(3);
+                DROPBITS(3);
+            }
+            while (state->have < 19)
+                state->lens[order[state->have++]] = 0;
+            state->next = state->codes;
+            state->lencode = (const code FAR *)(state->next);
+            state->lenbits = 7;
+            ret = inflate_table(CODES, state->lens, 19, &(state->next),
+                                &(state->lenbits), state->work);
+            if (ret) {
+                strm->msg = (char *)"invalid code lengths set";
+                state->mode = BAD;
+                break;
+            }
+            Tracev(("inflate:       code lengths ok\n"));
+            state->have = 0;
+            state->mode = CODELENS;
+                /* fallthrough */
+        case CODELENS:
+            while (state->have < state->nlen + state->ndist) {
+                for (;;) {
+                    here = state->lencode[BITS(state->lenbits)];
+                    if ((unsigned)(here.bits) <= bits) break;
+                    PULLBYTE();
+                }
+                if (here.val < 16) {
+                    DROPBITS(here.bits);
+                    state->lens[state->have++] = here.val;
+                }
+                else {
+                    if (here.val == 16) {
+                        NEEDBITS(here.bits + 2);
+                        DROPBITS(here.bits);
+                        if (state->have == 0) {
+                            strm->msg = (char *)"invalid bit length repeat";
+                            state->mode = BAD;
+                            break;
+                        }
+                        len = state->lens[state->have - 1];
+                        copy = 3 + BITS(2);
+                        DROPBITS(2);
+                    }
+                    else if (here.val == 17) {
+                        NEEDBITS(here.bits + 3);
+                        DROPBITS(here.bits);
+                        len = 0;
+                        copy = 3 + BITS(3);
+                        DROPBITS(3);
+                    }
+                    else {
+                        NEEDBITS(here.bits + 7);
+                        DROPBITS(here.bits);
+                        len = 0;
+                        copy = 11 + BITS(7);
+                        DROPBITS(7);
+                    }
+                    if (state->have + copy > state->nlen + state->ndist) {
+                        strm->msg = (char *)"invalid bit length repeat";
+                        state->mode = BAD;
+                        break;
+                    }
+                    while (copy--)
+                        state->lens[state->have++] = (unsigned short)len;
+                }
+            }
+
+            /* handle error breaks in while */
+            if (state->mode == BAD) break;
+
+            /* check for end-of-block code (better have one) */
+            if (state->lens[256] == 0) {
+                strm->msg = (char *)"invalid code -- missing end-of-block";
+                state->mode = BAD;
+                break;
+            }
+
+            /* build code tables -- note: do not change the lenbits or distbits
+               values here (10 and 9) without reading the comments in inftrees.h
+               concerning the ENOUGH constants, which depend on those values */
+            state->next = state->codes;
+            state->lencode = (const code FAR *)(state->next);
+            state->lenbits = 10;
+            ret = inflate_table(LENS, state->lens, state->nlen, &(state->next),
+                                &(state->lenbits), state->work);
+            if (ret) {
+                strm->msg = (char *)"invalid literal/lengths set";
+                state->mode = BAD;
+                break;
+            }
+            state->distcode = (const code FAR *)(state->next);
+            state->distbits = 9;
+            ret = inflate_table(DISTS, state->lens + state->nlen, state->ndist,
+                            &(state->next), &(state->distbits), state->work);
+            if (ret) {
+                strm->msg = (char *)"invalid distances set";
+                state->mode = BAD;
+                break;
+            }
+            Tracev(("inflate:       codes ok\n"));
+            state->mode = LEN_;
+            if (flush == Z_TREES) goto inf_leave;
+                /* fallthrough */
+        case LEN_:
+            state->mode = LEN;
+                /* fallthrough */
+        case LEN:
+            if (have >= INFLATE_FAST_MIN_INPUT &&
+                left >= INFLATE_FAST_MIN_OUTPUT) {
+                RESTORE();
+                inflate_fast(strm, out);
+                LOAD();
+                if (state->mode == TYPE)
+                    state->back = -1;
+                break;
+            }
+            state->back = 0;
+            for (;;) {
+                here = state->lencode[BITS(state->lenbits)];
+                if ((unsigned)(here.bits) <= bits) break;
+                PULLBYTE();
+            }
+            if (here.op && (here.op & 0xf0) == 0) {
+                last = here;
+                for (;;) {
+                    here = state->lencode[last.val +
+                            (BITS(last.bits + last.op) >> last.bits)];
+                    if ((unsigned)(last.bits + here.bits) <= bits) break;
+                    PULLBYTE();
+                }
+                DROPBITS(last.bits);
+                state->back += last.bits;
+            }
+            DROPBITS(here.bits);
+            state->back += here.bits;
+            state->length = (unsigned)here.val;
+            if ((int)(here.op) == 0) {
+                Tracevv((here.val >= 0x20 && here.val < 0x7f ?
+                        "inflate:         literal '%c'\n" :
+                        "inflate:         literal 0x%02x\n", here.val));
+                state->mode = LIT;
+                break;
+            }
+            if (here.op & 32) {
+                Tracevv(("inflate:         end of block\n"));
+                state->back = -1;
+                state->mode = TYPE;
+                break;
+            }
+            if (here.op & 64) {
+                strm->msg = (char *)"invalid literal/length code";
+                state->mode = BAD;
+                break;
+            }
+            state->extra = (unsigned)(here.op) & 15;
+            state->mode = LENEXT;
+                /* fallthrough */
+        case LENEXT:
+            if (state->extra) {
+                NEEDBITS(state->extra);
+                state->length += BITS(state->extra);
+                DROPBITS(state->extra);
+                state->back += state->extra;
+            }
+            Tracevv(("inflate:         length %u\n", state->length));
+            state->was = state->length;
+            state->mode = DIST;
+                /* fallthrough */
+        case DIST:
+            for (;;) {
+                here = state->distcode[BITS(state->distbits)];
+                if ((unsigned)(here.bits) <= bits) break;
+                PULLBYTE();
+            }
+            if ((here.op & 0xf0) == 0) {
+                last = here;
+                for (;;) {
+                    here = state->distcode[last.val +
+                            (BITS(last.bits + last.op) >> last.bits)];
+                    if ((unsigned)(last.bits + here.bits) <= bits) break;
+                    PULLBYTE();
+                }
+                DROPBITS(last.bits);
+                state->back += last.bits;
+            }
+            DROPBITS(here.bits);
+            state->back += here.bits;
+            if (here.op & 64) {
+                strm->msg = (char *)"invalid distance code";
+                state->mode = BAD;
+                break;
+            }
+            state->offset = (unsigned)here.val;
+            state->extra = (unsigned)(here.op) & 15;
+            state->mode = DISTEXT;
+                /* fallthrough */
+        case DISTEXT:
+            if (state->extra) {
+                NEEDBITS(state->extra);
+                state->offset += BITS(state->extra);
+                DROPBITS(state->extra);
+                state->back += state->extra;
+            }
+#ifdef INFLATE_STRICT
+            if (state->offset > state->dmax) {
+                strm->msg = (char *)"invalid distance too far back";
+                state->mode = BAD;
+                break;
+            }
+#endif
+            Tracevv(("inflate:         distance %u\n", state->offset));
+            state->mode = MATCH;
+                /* fallthrough */
+        case MATCH:
+            if (left == 0) goto inf_leave;
+            copy = out - left;
+            if (state->offset > copy) {         /* copy from window */
+                copy = state->offset - copy;
+                if (copy > state->whave) {
+                    if (state->sane) {
+                        strm->msg = (char *)"invalid distance too far back";
+                        state->mode = BAD;
+                        break;
+                    }
 #ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
-            Trace(("inflate.c too far\n"));
-            copy -= state->whave;
-            if (copy > state->length) copy = state->length;
+                    Trace(("inflate.c too far\n"));
+                    copy -= state->whave;
+                    if (copy > state->length) copy = state->length;
+                    if (copy > left) copy = left;
+                    left -= copy;
+                    state->length -= copy;
+                    do {
+                        *put++ = 0;
+                    } while (--copy);
+                    if (state->length == 0) state->mode = LEN;
+                    break;
+#endif
+                }
+                if (copy > state->wnext) {
+                    copy -= state->wnext;
+                    from = state->window + (state->wsize - copy);
+                }
+                else
+                    from = state->window + (state->wnext - copy);
+                if (copy > state->length) copy = state->length;
+            }
+            else {                              /* copy from output */
+                from = put - state->offset;
+                copy = state->length;
+            }
             if (copy > left) copy = left;
             left -= copy;
             state->length -= copy;
             do {
-              *put++ = 0;
+                *put++ = *from++;
             } while (--copy);
             if (state->length == 0) state->mode = LEN;
             break;
-#endif
-          }
-          if (copy > state->wnext) {
-            copy -= state->wnext;
-            from = state->window + (state->wsize - copy);
-          } else {
-            from = state->window + (state->wnext - copy);
-          }
-          if (copy > state->length) copy = state->length;
-          if (copy > left) copy = left;
-          put = chunkcopy_safe(put, from, copy, put + left);
-        } else { /* copy from output */
-          copy = state->length;
-          if (copy > left) copy = left;
-          put = chunkcopy_lapped_safe(put, state->offset, copy, put + left);
-        }
-        left -= copy;
-        state->length -= copy;
-        if (state->length == 0) state->mode = LEN;
-        break;
-      case LIT:
-        if (left == 0) goto inf_leave;
-        *put++ = (unsigned char)(state->length);
-        left--;
-        state->mode = LEN;
-        break;
-      case CHECK:
-        if (state->wrap) {
-          NEEDBITS(32);
-          out -= left;
-          strm->total_out += out;
-          state->total += out;
-          if ((state->wrap & 4) && out)
-            strm->adler = state->check = UPDATE(state->check, put - out, out);
-          out = left;
-          if ((state->wrap & 4) &&
-              (
-#ifdef GUNZIP
-                  state->flags ? hold :
-#endif
-                               ZSWAP32(hold)) != state->check) {
-            strm->msg = (char *)"incorrect data check";
-            state->mode = BAD;
+        case LIT:
+            if (left == 0) goto inf_leave;
+            *put++ = (unsigned char)(state->length);
+            left--;
+            state->mode = LEN;
             break;
-          }
-          INITBITS();
-          Tracev(("inflate:   check matches trailer\n"));
-        }
+        case CHECK:
+            if (state->wrap) {
+                NEEDBITS(32);
+                out -= left;
+                strm->total_out += out;
+                state->total += out;
+                if ((state->wrap & 4) && out)
+                    strm->adler = state->check =
+                        UPDATE_CHECK(state->check, put - out, out);
+                out = left;
+                if ((state->wrap & 4) && (
 #ifdef GUNZIP
-        state->mode = LENGTH;
-      case LENGTH:
-        if (state->wrap && state->flags) {
-          NEEDBITS(32);
-          if (hold != (state->total & 0xffffffffUL)) {
-            strm->msg = (char *)"incorrect length check";
-            state->mode = BAD;
-            break;
-          }
-          INITBITS();
-          Tracev(("inflate:   length matches trailer\n"));
-        }
+                     state->flags ? hold :
 #endif
-        state->mode = DONE;
-      case DONE:
-        ret = Z_STREAM_END;
-        goto inf_leave;
-      case BAD:
-        ret = Z_DATA_ERROR;
-        goto inf_leave;
-      case MEM:
-        return Z_MEM_ERROR;
-      case SYNC:
-      default:
+                     ZSWAP32(hold)) != state->check) {
+                    strm->msg = (char *)"incorrect data check";
+                    state->mode = BAD;
+                    break;
+                }
+                INITBITS();
+                Tracev(("inflate:   check matches trailer\n"));
+            }
+#ifdef GUNZIP
+            state->mode = LENGTH;
+                /* fallthrough */
+        case LENGTH:
+            if (state->wrap && state->flags) {
+                NEEDBITS(32);
+                if ((state->wrap & 4) && hold != (state->total & 0xffffffff)) {
+                    strm->msg = (char *)"incorrect length check";
+                    state->mode = BAD;
+                    break;
+                }
+                INITBITS();
+                Tracev(("inflate:   length matches trailer\n"));
+            }
+#endif
+            state->mode = DONE;
+                /* fallthrough */
+        case DONE:
+            ret = Z_STREAM_END;
+            goto inf_leave;
+        case BAD:
+            ret = Z_DATA_ERROR;
+            goto inf_leave;
+        case MEM:
+            return Z_MEM_ERROR;
+        case SYNC:
+                /* fallthrough */
+        default:
+            return Z_STREAM_ERROR;
+        }
+
+    /*
+       Return from inflate(), updating the total counts and the check value.
+       If there was no progress during the inflate() call, return a buffer
+       error.  Call updatewindow() to create and/or update the window state.
+       Note: a memory error from inflate() is non-recoverable.
+     */
+  inf_leave:
+    RESTORE();
+    if (state->wsize || (out != strm->avail_out && state->mode < BAD &&
+            (state->mode < CHECK || flush != Z_FINISH)))
+        if (updatewindow(strm, strm->next_out, out - strm->avail_out)) {
+            state->mode = MEM;
+            return Z_MEM_ERROR;
+        }
+    in -= strm->avail_in;
+    out -= strm->avail_out;
+    strm->total_in += in;
+    strm->total_out += out;
+    state->total += out;
+    if ((state->wrap & 4) && out)
+        strm->adler = state->check =
+            UPDATE_CHECK(state->check, strm->next_out - out, out);
+    strm->data_type = (int)state->bits + (state->last ? 64 : 0) +
+                      (state->mode == TYPE ? 128 : 0) +
+                      (state->mode == LEN_ || state->mode == COPY_ ? 256 : 0);
+    if (((in == 0 && out == 0) || flush == Z_FINISH) && ret == Z_OK)
+        ret = Z_BUF_ERROR;
+    return ret;
+}
+
+int ZEXPORT inflateEnd(strm)
+z_streamp strm;
+{
+    struct inflate_state FAR *state;
+    if (inflateStateCheck(strm))
         return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
+    if (state->window != Z_NULL) ZFREE(strm, state->window);
+    ZFREE(strm, strm->state);
+    strm->state = Z_NULL;
+    Tracev(("inflate: end\n"));
+    return Z_OK;
+}
+
+int ZEXPORT inflateGetDictionary(strm, dictionary, dictLength)
+z_streamp strm;
+Bytef *dictionary;
+uInt *dictLength;
+{
+    struct inflate_state FAR *state;
+
+    /* check state */
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
+
+    /* copy dictionary */
+    if (state->whave && dictionary != Z_NULL) {
+        zmemcpy(dictionary, state->window + state->wnext,
+                state->whave - state->wnext);
+        zmemcpy(dictionary + state->whave - state->wnext,
+                state->window, state->wnext);
     }
-  }
+    if (dictLength != Z_NULL)
+        *dictLength = state->whave;
+    return Z_OK;
+}
 
-  /*
-     Return from inflate(), updating the total counts and the check value.
-     If there was no progress during the inflate() call, return a buffer
-     error.  Call updatewindow() to create and/or update the window state.
-     Note: a memory error from inflate() is non-recoverable.
-   */
-inf_leave:
-  /* We write a defined value in the unused space to help mark
-   * where the stream has ended. We don't use zeros as that can
-   * mislead clients relying on undefined behavior (i.e. assuming
-   * that the data is over when the buffer has a zero/null value).
-   */
-  if (left >= CHUNKCOPY_CHUNK_SIZE) {
-    memset(put, 0x55, CHUNKCOPY_CHUNK_SIZE);
-  } else {
-    memset(put, 0x55, left);
-  }
+int ZEXPORT inflateSetDictionary(strm, dictionary, dictLength)
+z_streamp strm;
+const Bytef *dictionary;
+uInt dictLength;
+{
+    struct inflate_state FAR *state;
+    unsigned long dictid;
+    int ret;
 
-  RESTORE();
-  if (state->wsize || (out != strm->avail_out && state->mode < BAD &&
-                       (state->mode < CHECK || flush != Z_FINISH)))
-    if (updatewindow(strm, strm->next_out, out - strm->avail_out)) {
-      state->mode = MEM;
-      return Z_MEM_ERROR;
+    /* check state */
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
+    if (state->wrap != 0 && state->mode != DICT)
+        return Z_STREAM_ERROR;
+
+    /* check for correct dictionary identifier */
+    if (state->mode == DICT) {
+        dictid = adler32(0L, Z_NULL, 0);
+        dictid = adler32(dictid, dictionary, dictLength);
+        if (dictid != state->check)
+            return Z_DATA_ERROR;
     }
-  in -= strm->avail_in;
-  out -= strm->avail_out;
-  strm->total_in += in;
-  strm->total_out += out;
-  state->total += out;
-  if ((state->wrap & 4) && out)
-    strm->adler = state->check =
-        UPDATE(state->check, strm->next_out - out, out);
-  strm->data_type = (int)state->bits + (state->last ? 64 : 0) +
-                    (state->mode == TYPE ? 128 : 0) +
-                    (state->mode == LEN_ || state->mode == COPY_ ? 256 : 0);
-  if (((in == 0 && out == 0) || flush == Z_FINISH) && ret == Z_OK)
-    ret = Z_BUF_ERROR;
-  return ret;
+
+    /* copy dictionary to window using updatewindow(), which will amend the
+       existing dictionary if appropriate */
+    ret = updatewindow(strm, dictionary + dictLength, dictLength);
+    if (ret) {
+        state->mode = MEM;
+        return Z_MEM_ERROR;
+    }
+    state->havedict = 1;
+    Tracev(("inflate:   dictionary set\n"));
+    return Z_OK;
 }
 
-int inflateEnd(z_streamp strm) {
-  struct InflateState *state;
-  if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
-  state = (struct InflateState *)strm->state;
-  if (state->window != Z_NULL) ZFREE(strm, state->window);
-  ZFREE(strm, strm->state);
-  strm->state = Z_NULL;
-  Tracev(("inflate: end\n"));
-  return Z_OK;
+int ZEXPORT inflateGetHeader(strm, head)
+z_streamp strm;
+gz_headerp head;
+{
+    struct inflate_state FAR *state;
+
+    /* check state */
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
+    if ((state->wrap & 2) == 0) return Z_STREAM_ERROR;
+
+    /* save header structure */
+    state->head = head;
+    head->done = 0;
+    return Z_OK;
 }
 
-int inflateGetDictionary(z_streamp strm, Bytef *dictionary, uInt *dictLength) {
-  struct InflateState *state;
-
-  /* check state */
-  if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
-  state = (struct InflateState *)strm->state;
-
-  /* copy dictionary */
-  if (state->whave && dictionary != Z_NULL) {
-    memcpy(dictionary, state->window + state->wnext,
-           state->whave - state->wnext);
-    memcpy(dictionary + state->whave - state->wnext, state->window,
-           state->wnext);
-  }
-  if (dictLength != Z_NULL) *dictLength = state->whave;
-  return Z_OK;
-}
-
-int inflateSetDictionary(z_streamp strm, const Bytef *dictionary,
-                         uInt dictLength) {
-  struct InflateState *state;
-  unsigned long dictid;
-  int ret;
-
-  /* check state */
-  if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
-  state = (struct InflateState *)strm->state;
-  if (state->wrap != 0 && state->mode != DICT) return Z_STREAM_ERROR;
-
-  /* check for correct dictionary identifier */
-  if (state->mode == DICT) {
-    dictid = adler32(0L, Z_NULL, 0);
-    dictid = adler32(dictid, dictionary, dictLength);
-    if (dictid != state->check) return Z_DATA_ERROR;
-  }
-
-  /* copy dictionary to window using updatewindow(), which will amend the
-     existing dictionary if appropriate */
-  ret = updatewindow(strm, dictionary + dictLength, dictLength);
-  if (ret) {
-    state->mode = MEM;
-    return Z_MEM_ERROR;
-  }
-  state->havedict = 1;
-  Tracev(("inflate:   dictionary set\n"));
-  return Z_OK;
-}
-
-int inflateGetHeader(z_streamp strm, gz_headerp head) {
-  struct InflateState *state;
-  /* check state */
-  if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
-  state = (struct InflateState *)strm->state;
-  if ((state->wrap & 2) == 0) return Z_STREAM_ERROR;
-  /* save header structure */
-  state->head = head;
-  head->done = 0;
-  return Z_OK;
-}
-
-/**
- * Searches buf[0..len-1] for the pattern: 0, 0, 0xff, 0xff. Return when
- * found or when out of input. When called, *have is the number of
- * pattern bytes found in order so far, in 0..3. On return *have is
- * updated to the new state. If on return *have equals four, then the
- * pattern was found and the return value is how many bytes were read
- * including the last byte of the pattern. If *have is less than four,
- * then the pattern has not been found yet and the return value is len.
- * In the latter case, syncsearch() can be called again with more data
- * and the *have state. *have is initialized to zero for the first call.
+/*
+   Search buf[0..len-1] for the pattern: 0, 0, 0xff, 0xff.  Return when found
+   or when out of input.  When called, *have is the number of pattern bytes
+   found in order so far, in 0..3.  On return *have is updated to the new
+   state.  If on return *have equals four, then the pattern was found and the
+   return value is how many bytes were read including the last byte of the
+   pattern.  If *have is less than four, then the pattern has not been found
+   yet and the return value is len.  In the latter case, syncsearch() can be
+   called again with more data and the *have state.  *have is initialized to
+   zero for the first call.
  */
-static unsigned syncsearch(unsigned *have, const unsigned char *buf,
-                           unsigned len) {
-  unsigned got;
-  unsigned next;
+local unsigned syncsearch(have, buf, len)
+unsigned FAR *have;
+const unsigned char FAR *buf;
+unsigned len;
+{
+    unsigned got;
+    unsigned next;
 
-  got = *have;
-  next = 0;
-  while (next < len && got < 4) {
-    if ((int)(buf[next]) == (got < 2 ? 0 : 0xff))
-      got++;
-    else if (buf[next])
-      got = 0;
+    got = *have;
+    next = 0;
+    while (next < len && got < 4) {
+        if ((int)(buf[next]) == (got < 2 ? 0 : 0xff))
+            got++;
+        else if (buf[next])
+            got = 0;
+        else
+            got = 4 - got;
+        next++;
+    }
+    *have = got;
+    return next;
+}
+
+int ZEXPORT inflateSync(strm)
+z_streamp strm;
+{
+    unsigned len;               /* number of bytes to look at or looked at */
+    int flags;                  /* temporary to save header status */
+    unsigned long in, out;      /* temporary to save total_in and total_out */
+    unsigned char buf[4];       /* to restore bit buffer to byte string */
+    struct inflate_state FAR *state;
+
+    /* check parameters */
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
+    if (strm->avail_in == 0 && state->bits < 8) return Z_BUF_ERROR;
+
+    /* if first time, start search in bit buffer */
+    if (state->mode != SYNC) {
+        state->mode = SYNC;
+        state->hold <<= state->bits & 7;
+        state->bits -= state->bits & 7;
+        len = 0;
+        while (state->bits >= 8) {
+            buf[len++] = (unsigned char)(state->hold);
+            state->hold >>= 8;
+            state->bits -= 8;
+        }
+        state->have = 0;
+        syncsearch(&(state->have), buf, len);
+    }
+
+    /* search available input */
+    len = syncsearch(&(state->have), strm->next_in, strm->avail_in);
+    strm->avail_in -= len;
+    strm->next_in += len;
+    strm->total_in += len;
+
+    /* return no joy or set up to restart inflate() on a new block */
+    if (state->have != 4) return Z_DATA_ERROR;
+    if (state->flags == -1)
+        state->wrap = 0;    /* if no header yet, treat as raw */
     else
-      got = 4 - got;
-    next++;
-  }
-  *have = got;
-  return next;
+        state->wrap &= ~4;  /* no point in computing a check value now */
+    flags = state->flags;
+    in = strm->total_in;  out = strm->total_out;
+    inflateReset(strm);
+    strm->total_in = in;  strm->total_out = out;
+    state->flags = flags;
+    state->mode = TYPE;
+    return Z_OK;
 }
 
-int inflateSync(z_streamp strm) {
-  unsigned len;          /* number of bytes to look at or looked at */
-  unsigned long in, out; /* temporary to save total_in and total_out */
-  unsigned char buf[4];  /* to restore bit buffer to byte string */
-  struct InflateState *state;
-
-  /* check parameters */
-  if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
-  state = (struct InflateState *)strm->state;
-  if (strm->avail_in == 0 && state->bits < 8) return Z_BUF_ERROR;
-
-  /* if first time, start search in bit buffer */
-  if (state->mode != SYNC) {
-    state->mode = SYNC;
-    state->hold <<= state->bits & 7;
-    state->bits -= state->bits & 7;
-    len = 0;
-    while (state->bits >= 8) {
-      buf[len++] = (unsigned char)(state->hold);
-      state->hold >>= 8;
-      state->bits -= 8;
-    }
-    state->have = 0;
-    syncsearch(&(state->have), buf, len);
-  }
-
-  /* search available input */
-  len = syncsearch(&(state->have), strm->next_in, strm->avail_in);
-  strm->avail_in -= len;
-  strm->next_in += len;
-  strm->total_in += len;
-
-  /* return no joy or set up to restart inflate() on a new block */
-  if (state->have != 4) return Z_DATA_ERROR;
-  in = strm->total_in;
-  out = strm->total_out;
-  inflateReset(strm);
-  strm->total_in = in;
-  strm->total_out = out;
-  state->mode = TYPE;
-  return Z_OK;
-}
-
-/**
- * Returns true if inflate is currently at the end of a block generated
- * by Z_SYNC_FLUSH or Z_FULL_FLUSH. This function is used by one PPP
- * implementation to provide an additional safety check. PPP uses
- * Z_SYNC_FLUSH but removes the length bytes of the resulting empty
- * stored block. When decompressing, PPP checks that at the end of input
- * packet, inflate is waiting for these length bytes.
+/*
+   Returns true if inflate is currently at the end of a block generated by
+   Z_SYNC_FLUSH or Z_FULL_FLUSH. This function is used by one PPP
+   implementation to provide an additional safety check. PPP uses
+   Z_SYNC_FLUSH but removes the length bytes of the resulting empty stored
+   block. When decompressing, PPP checks that at the end of input packet,
+   inflate is waiting for these length bytes.
  */
-int inflateSyncPoint(z_streamp strm) {
-  struct InflateState *state;
-  if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
-  state = (struct InflateState *)strm->state;
-  return state->mode == STORED && state->bits == 0;
+int ZEXPORT inflateSyncPoint(strm)
+z_streamp strm;
+{
+    struct inflate_state FAR *state;
+
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
+    return state->mode == STORED && state->bits == 0;
 }
 
-int inflateCopy(z_streamp dest, z_streamp source) {
-  struct InflateState *state;
-  struct InflateState *copy;
-  unsigned char *window;
-  unsigned wsize;
+int ZEXPORT inflateCopy(dest, source)
+z_streamp dest;
+z_streamp source;
+{
+    struct inflate_state FAR *state;
+    struct inflate_state FAR *copy;
+    unsigned char FAR *window;
+    unsigned wsize;
 
-  /* check input */
-  if (inflateStateCheck(source) || dest == Z_NULL) return Z_STREAM_ERROR;
-  state = (struct InflateState *)source->state;
+    /* check input */
+    if (inflateStateCheck(source) || dest == Z_NULL)
+        return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)source->state;
 
-  /* allocate space */
-  copy = (struct InflateState *)ZALLOC(source, 1, sizeof(struct InflateState));
-  if (copy == Z_NULL) return Z_MEM_ERROR;
-  window = Z_NULL;
-  if (state->window != Z_NULL) {
-    window = (unsigned char *)ZALLOC(source, 1U << state->wbits,
-                                     sizeof(unsigned char));
-    if (window == Z_NULL) {
-      ZFREE(source, copy);
-      return Z_MEM_ERROR;
+    /* allocate space */
+    copy = (struct inflate_state FAR *)
+           ZALLOC(source, 1, sizeof(struct inflate_state));
+    if (copy == Z_NULL) return Z_MEM_ERROR;
+    window = Z_NULL;
+    if (state->window != Z_NULL) {
+        window = (unsigned char FAR *)
+                 ZALLOC(source, 1U << state->wbits, sizeof(unsigned char));
+        if (window == Z_NULL) {
+            ZFREE(source, copy);
+            return Z_MEM_ERROR;
+        }
     }
-  }
 
-  /* copy state */
-  memcpy((voidpf)dest, (voidpf)source, sizeof(z_stream));
-  memcpy((voidpf)copy, (voidpf)state, sizeof(struct InflateState));
-  copy->strm = dest;
-  if (state->lencode >= state->codes &&
-      state->lencode <= state->codes + ENOUGH - 1) {
-    copy->lencode = copy->codes + (state->lencode - state->codes);
-    copy->distcode = copy->codes + (state->distcode - state->codes);
-  }
-  copy->next = copy->codes + (state->next - state->codes);
-  if (window != Z_NULL) {
-    wsize = 1U << state->wbits;
-    memcpy(window, state->window, wsize);
-  }
-  copy->window = window;
-  dest->state = (struct DeflateState *)copy;
-  return Z_OK;
+    /* copy state */
+    zmemcpy((voidpf)dest, (voidpf)source, sizeof(z_stream));
+    zmemcpy((voidpf)copy, (voidpf)state, sizeof(struct inflate_state));
+    copy->strm = dest;
+    if (state->lencode >= state->codes &&
+        state->lencode <= state->codes + ENOUGH - 1) {
+        copy->lencode = copy->codes + (state->lencode - state->codes);
+        copy->distcode = copy->codes + (state->distcode - state->codes);
+    }
+    copy->next = copy->codes + (state->next - state->codes);
+    if (window != Z_NULL) {
+        wsize = 1U << state->wbits;
+        zmemcpy(window, state->window, wsize);
+    }
+    copy->window = window;
+    dest->state = (struct internal_state FAR *)copy;
+    return Z_OK;
 }
 
-int inflateUndermine(z_streamp strm, int subvert) {
-  struct InflateState *state;
-  if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
-  state = (struct InflateState *)strm->state;
+int ZEXPORT inflateUndermine(strm, subvert)
+z_streamp strm;
+int subvert;
+{
+    struct inflate_state FAR *state;
+
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
 #ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
-  state->sane = !subvert;
-  return Z_OK;
+    state->sane = !subvert;
+    return Z_OK;
 #else
-  (void)subvert;
-  state->sane = 1;
-  return Z_DATA_ERROR;
+    (void)subvert;
+    state->sane = 1;
+    return Z_DATA_ERROR;
 #endif
 }
 
-int inflateValidate(z_streamp strm, int check) {
-  struct InflateState *state;
-  if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
-  state = (struct InflateState *)strm->state;
-  if (check)
-    state->wrap |= 4;
-  else
-    state->wrap &= ~4;
-  return Z_OK;
+int ZEXPORT inflateValidate(strm, check)
+z_streamp strm;
+int check;
+{
+    struct inflate_state FAR *state;
+
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
+    if (check && state->wrap)
+        state->wrap |= 4;
+    else
+        state->wrap &= ~4;
+    return Z_OK;
 }
 
-long inflateMark(z_streamp strm) {
-  struct InflateState *state;
-  if (inflateStateCheck(strm)) return -(1L << 16);
-  state = (struct InflateState *)strm->state;
-  return (long)(((unsigned long)((long)state->back)) << 16) +
-         (state->mode == COPY
-              ? state->length
-              : (state->mode == MATCH ? state->was - state->length : 0));
+long ZEXPORT inflateMark(strm)
+z_streamp strm;
+{
+    struct inflate_state FAR *state;
+
+    if (inflateStateCheck(strm))
+        return -(1L << 16);
+    state = (struct inflate_state FAR *)strm->state;
+    return (long)(((unsigned long)((long)state->back)) << 16) +
+        (state->mode == COPY ? state->length :
+            (state->mode == MATCH ? state->was - state->length : 0));
 }
 
-unsigned long inflateCodesUsed(z_streamp strm) {
-  struct InflateState *state;
-  if (inflateStateCheck(strm)) return (unsigned long)-1;
-  state = (struct InflateState *)strm->state;
-  return (unsigned long)(state->next - state->codes);
+unsigned long ZEXPORT inflateCodesUsed(strm)
+z_streamp strm;
+{
+    struct inflate_state FAR *state;
+    if (inflateStateCheck(strm)) return (unsigned long)-1;
+    state = (struct inflate_state FAR *)strm->state;
+    return (unsigned long)(state->next - state->codes);
 }
diff --git a/third_party/zlib/inflate.internal.h b/third_party/zlib/inflate.internal.h
index 9a1ed86ae..f6208e5d1 100644
--- a/third_party/zlib/inflate.internal.h
+++ b/third_party/zlib/inflate.internal.h
@@ -1,14 +1,11 @@
 #ifndef COSMOPOLITAN_THIRD_PARTY_ZLIB_INFLATE_H_
 #define COSMOPOLITAN_THIRD_PARTY_ZLIB_INFLATE_H_
 #include "third_party/zlib/inftrees.internal.h"
+#include "third_party/zlib/macros.internal.h"
 #include "third_party/zlib/zlib.h"
 #if !(__ASSEMBLER__ + __LINKER__ + 0)
 COSMOPOLITAN_C_START_
-
-/* inflate.h -- internal inflate state definition
- * Copyright (C) 1995-2016 Mark Adler
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
+/* clang-format off */
 
 /* WARNING: this file should *not* be used by applications. It is
    part of the implementation of the compression library and is
@@ -20,123 +17,116 @@ COSMOPOLITAN_C_START_
    the crc code when it is not needed.  For shared libraries, gzip decoding
    should be left enabled. */
 #ifndef NO_GZIP
-#define GUNZIP
+#  define GUNZIP
 #endif
 
-/**
- * Possible inflate modes between inflate() calls
- *
- * State transitions between modes -
- *
- * (most modes can go to BAD or MEM on error -- not shown for clarity)
- *
- * Process header:
- *     HEAD → (gzip) or (zlib) or (raw)
- *     (gzip) → FLAGS → TIME → OS → EXLEN → EXTRA → NAME → COMMENT →
- *               HCRC → TYPE
- *     (zlib) → DICTID or TYPE
- *     DICTID → DICT → TYPE
- *     (raw) → TYPEDO
- * Read deflate blocks:
- *         TYPE → TYPEDO → STORED or TABLE or LEN_ or CHECK
- *         STORED → COPY_ → COPY → TYPE
- *         TABLE → LENLENS → CODELENS → LEN_
- *         LEN_ → LEN
- * Read deflate codes in fixed or dynamic block:
- *             LEN → LENEXT or LIT or TYPE
- *             LENEXT → DIST → DISTEXT → MATCH → LEN
- *             LIT → LEN
- * Process trailer:
- *     CHECK → LENGTH → DONE
- */
+/* Possible inflate modes between inflate() calls */
 typedef enum {
-  HEAD = 16180, /* i: waiting for magic header */
-  FLAGS,        /* i: waiting for method and flags (gzip) */
-  TIME,         /* i: waiting for modification time (gzip) */
-  OS,           /* i: waiting for extra flags and operating system (gzip) */
-  EXLEN,        /* i: waiting for extra length (gzip) */
-  EXTRA,        /* i: waiting for extra bytes (gzip) */
-  NAME,         /* i: waiting for end of file name (gzip) */
-  COMMENT,      /* i: waiting for end of comment (gzip) */
-  HCRC,         /* i: waiting for header crc (gzip) */
-  DICTID,       /* i: waiting for dictionary check value */
-  DICT,         /* waiting for inflateSetDictionary() call */
-  TYPE,         /* i: waiting for type bits, including last-flag bit */
-  TYPEDO,       /* i: same, but skip check to exit inflate on new block */
-  STORED,       /* i: waiting for stored size (length and complement) */
-  COPY_,        /* i/o: same as COPY below, but only first time in */
-  COPY,         /* i/o: waiting for input or output to copy stored block */
-  TABLE,        /* i: waiting for dynamic block table lengths */
-  LENLENS,      /* i: waiting for code length code lengths */
-  CODELENS,     /* i: waiting for length/lit and distance code lengths */
-  LEN_,         /* i: same as LEN below, but only first time in */
-  LEN,          /* i: waiting for length/lit/eob code */
-  LENEXT,       /* i: waiting for length extra bits */
-  DIST,         /* i: waiting for distance code */
-  DISTEXT,      /* i: waiting for distance extra bits */
-  MATCH,        /* o: waiting for output space to copy string */
-  LIT,          /* o: waiting for output space to write literal */
-  CHECK,        /* i: waiting for 32-bit check value */
-  LENGTH,       /* i: waiting for 32-bit length (gzip) */
-  DONE,         /* finished check, done -- remain here until reset */
-  BAD,          /* got a data error -- remain here until reset */
-  MEM,          /* got an inflate() memory error -- remain here until reset */
-  SYNC          /* looking for synchronization bytes to restart inflate() */
+    HEAD = 16180,   /* i: waiting for magic header */
+    FLAGS,      /* i: waiting for method and flags (gzip) */
+    TIME,       /* i: waiting for modification time (gzip) */
+    OS,         /* i: waiting for extra flags and operating system (gzip) */
+    EXLEN,      /* i: waiting for extra length (gzip) */
+    EXTRA,      /* i: waiting for extra bytes (gzip) */
+    NAME,       /* i: waiting for end of file name (gzip) */
+    COMMENT,    /* i: waiting for end of comment (gzip) */
+    HCRC,       /* i: waiting for header crc (gzip) */
+    DICTID,     /* i: waiting for dictionary check value */
+    DICT,       /* waiting for inflateSetDictionary() call */
+        TYPE,       /* i: waiting for type bits, including last-flag bit */
+        TYPEDO,     /* i: same, but skip check to exit inflate on new block */
+        STORED,     /* i: waiting for stored size (length and complement) */
+        COPY_,      /* i/o: same as COPY below, but only first time in */
+        COPY,       /* i/o: waiting for input or output to copy stored block */
+        TABLE,      /* i: waiting for dynamic block table lengths */
+        LENLENS,    /* i: waiting for code length code lengths */
+        CODELENS,   /* i: waiting for length/lit and distance code lengths */
+            LEN_,       /* i: same as LEN below, but only first time in */
+            LEN,        /* i: waiting for length/lit/eob code */
+            LENEXT,     /* i: waiting for length extra bits */
+            DIST,       /* i: waiting for distance code */
+            DISTEXT,    /* i: waiting for distance extra bits */
+            MATCH,      /* o: waiting for output space to copy string */
+            LIT,        /* o: waiting for output space to write literal */
+    CHECK,      /* i: waiting for 32-bit check value */
+    LENGTH,     /* i: waiting for 32-bit length (gzip) */
+    DONE,       /* finished check, done -- remain here until reset */
+    BAD,        /* got a data error -- remain here until reset */
+    MEM,        /* got an inflate() memory error -- remain here until reset */
+    SYNC        /* looking for synchronization bytes to restart inflate() */
 } inflate_mode;
 
-/**
- * State maintained between inflate() calls -- approximately 7K bytes,
- * not including the allocated sliding window, which is up to 32K bytes.
+/*
+    State transitions between above modes -
+
+    (most modes can go to BAD or MEM on error -- not shown for clarity)
+
+    Process header:
+        HEAD -> (gzip) or (zlib) or (raw)
+        (gzip) -> FLAGS -> TIME -> OS -> EXLEN -> EXTRA -> NAME -> COMMENT ->
+                  HCRC -> TYPE
+        (zlib) -> DICTID or TYPE
+        DICTID -> DICT -> TYPE
+        (raw) -> TYPEDO
+    Read deflate blocks:
+            TYPE -> TYPEDO -> STORED or TABLE or LEN_ or CHECK
+            STORED -> COPY_ -> COPY -> TYPE
+            TABLE -> LENLENS -> CODELENS -> LEN_
+            LEN_ -> LEN
+    Read deflate codes in fixed or dynamic block:
+                LEN -> LENEXT or LIT or TYPE
+                LENEXT -> DIST -> DISTEXT -> MATCH -> LEN
+                LIT -> LEN
+    Process trailer:
+        CHECK -> LENGTH -> DONE
  */
-struct InflateState {
-  z_streamp strm;      /* pointer back to this zlib stream */
-  inflate_mode mode;   /* current inflate mode */
-  int last;            /* true if processing last block */
-  int wrap;            /* bit 0 true for zlib, bit 1 true for gzip,
-                          bit 2 true to validate check value */
-  int havedict;        /* true if dictionary provided */
-  int flags;           /* gzip header method and flags (0 if zlib) */
-  unsigned dmax;       /* zlib header max distance (INFLATE_STRICT) */
-  unsigned long check; /* protected copy of check value */
-  unsigned long total; /* protected copy of output count */
-  gz_headerp head;     /* where to save gzip header information */
 
-  /* sliding window */
-  unsigned wbits;        /* log base 2 of requested window size */
-  unsigned wsize;        /* window size or zero if not using window */
-  unsigned whave;        /* valid bytes in the window */
-  unsigned wnext;        /* window write index */
-  unsigned char *window; /* allocated sliding window, if needed */
-
-  /* bit accumulator */
-  unsigned long hold; /* input bit accumulator */
-  unsigned bits;      /* number of bits in "in" */
-
-  /* for string and stored block copying */
-  unsigned length; /* literal or length of data to copy */
-  unsigned offset; /* distance back to copy string from */
-
-  /* for table and code decoding */
-  unsigned extra; /* extra bits needed */
-
-  /* fixed and dynamic code tables */
-  const struct zcode *lencode;  /* starting table for length/literal codes */
-  const struct zcode *distcode; /* starting table for distance codes */
-  unsigned lenbits;             /* index bits for lencode */
-  unsigned distbits;            /* index bits for distcode */
-
-  /* dynamic table building */
-  unsigned ncode;             /* number of code length code lengths */
-  unsigned nlen;              /* number of length code lengths */
-  unsigned ndist;             /* number of distance code lengths */
-  unsigned have;              /* number of code lengths in lens[] */
-  struct zcode *next;         /* next available space in codes[] */
-  unsigned short lens[320];   /* temporary storage for code lengths */
-  unsigned short work[288];   /* work area for code table building */
-  struct zcode codes[ENOUGH]; /* space for code tables */
-  int sane;                   /* if false, allow invalid distance too far */
-  int back;                   /* bits back of last unprocessed length/lit */
-  unsigned was;               /* initial length of match */
+/* State maintained between inflate() calls -- approximately 7K bytes, not
+   including the allocated sliding window, which is up to 32K bytes. */
+struct inflate_state {
+    z_streamp strm;             /* pointer back to this zlib stream */
+    inflate_mode mode;          /* current inflate mode */
+    int last;                   /* true if processing last block */
+    int wrap;                   /* bit 0 true for zlib, bit 1 true for gzip,
+                                   bit 2 true to validate check value */
+    int havedict;               /* true if dictionary provided */
+    int flags;                  /* gzip header method and flags, 0 if zlib, or
+                                   -1 if raw or no header yet */
+    unsigned dmax;              /* zlib header max distance (INFLATE_STRICT) */
+    unsigned long check;        /* protected copy of check value */
+    unsigned long total;        /* protected copy of output count */
+    gz_headerp head;            /* where to save gzip header information */
+        /* sliding window */
+    unsigned wbits;             /* log base 2 of requested window size */
+    unsigned wsize;             /* window size or zero if not using window */
+    unsigned whave;             /* valid bytes in the window */
+    unsigned wnext;             /* window write index */
+    unsigned char FAR *window;  /* allocated sliding window, if needed */
+        /* bit accumulator */
+    unsigned long hold;         /* input bit accumulator */
+    unsigned bits;              /* number of bits in "in" */
+        /* for string and stored block copying */
+    unsigned length;            /* literal or length of data to copy */
+    unsigned offset;            /* distance back to copy string from */
+        /* for table and code decoding */
+    unsigned extra;             /* extra bits needed */
+        /* fixed and dynamic code tables */
+    code const FAR *lencode;    /* starting table for length/literal codes */
+    code const FAR *distcode;   /* starting table for distance codes */
+    unsigned lenbits;           /* index bits for lencode */
+    unsigned distbits;          /* index bits for distcode */
+        /* dynamic table building */
+    unsigned ncode;             /* number of code length code lengths */
+    unsigned nlen;              /* number of length code lengths */
+    unsigned ndist;             /* number of distance code lengths */
+    unsigned have;              /* number of code lengths in lens[] */
+    code FAR *next;             /* next available space in codes[] */
+    unsigned short lens[320];   /* temporary storage for code lengths */
+    unsigned short work[288];   /* work area for code table building */
+    code codes[ENOUGH];         /* space for code tables */
+    int sane;                   /* if false, allow invalid distance too far */
+    int back;                   /* bits back of last unprocessed length/lit */
+    unsigned was;               /* initial length of match */
 };
 
 COSMOPOLITAN_C_END_
diff --git a/third_party/zlib/inflateinit.S b/third_party/zlib/inflateinit.S
deleted file mode 100644
index 520698ed8..000000000
--- a/third_party/zlib/inflateinit.S
+++ /dev/null
@@ -1,25 +0,0 @@
-/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8     -*-│
-│vi: set et ft=asm ts=8 tw=8 fenc=utf-8                                     :vi│
-╞══════════════════════════════════════════════════════════════════════════════╡
-│ Copyright 2020 Justine Alexandra Roberts Tunney                              │
-│                                                                              │
-│ Permission to use, copy, modify, and/or distribute this software for         │
-│ any purpose with or without fee is hereby granted, provided that the         │
-│ above copyright notice and this permission notice appear in all copies.      │
-│                                                                              │
-│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL                │
-│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED                │
-│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE             │
-│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL         │
-│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR        │
-│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER               │
-│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR             │
-│ PERFORMANCE OF THIS SOFTWARE.                                                │
-╚─────────────────────────────────────────────────────────────────────────────*/
-#include "third_party/zlib/zutil.internal.h"
-#include "libc/macros.internal.h"
-
-inflateInit:
-	mov	$DEF_WBITS,%esi
-	jmp	inflateInit2
-	.endfn	inflateInit,globl
diff --git a/third_party/zlib/inftrees.c b/third_party/zlib/inftrees.c
index 13774d811..5e8ab1807 100644
--- a/third_party/zlib/inftrees.c
+++ b/third_party/zlib/inftrees.c
@@ -1,305 +1,303 @@
-/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:4;tab-width:8;coding:utf-8 -*-│
-│vi: set net ft=c ts=4 sts=4 sw=4 fenc=utf-8                                :vi│
-╞══════════════════════════════════════════════════════════════════════════════╡
-│ Copyright 1995-2017 Mark Adler                                               │
-│ Use of this source code is governed by the BSD-style licenses that can       │
-│ be found in the third_party/zlib/LICENSE file.                               │
-╚─────────────────────────────────────────────────────────────────────────────*/
-#include "third_party/zlib/inftrees.internal.h"
-#include "third_party/zlib/zutil.internal.h"
-
-asm(".ident\t\"\\n\\n\
-inflate 1.2.11 (zlib License)\\n\
-Copyright 1995-2017 Mark Adler\\n\
-Invented 1990 Phillip Walter Katz\"");
-
-/**
- * @fileoverview Generate Huffman trees for efficient decoding.
+/* inftrees.c -- generate Huffman trees for efficient decoding
+ * Copyright (C) 1995-2022 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
  */
+#include "third_party/zlib/inftrees.internal.h"
+// clang-format off
 
 #define MAXBITS 15
 
-/* Length codes 257..285 base */
-static const uint16_t kZlibDeflateLbase[31] = {
-    3,  4,  5,  6,  7,  8,  9,  10,  11,  13,  15,  17,  19,  23, 27, 31,
-    35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0,  0};
-
-/* Length codes 257..285 extra */
-static const uint16_t kZlibDeflateLext[31] = {
-    16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18,
-    19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 76, 202};
-
-/* Distance codes 0..29 base */
-static const uint16_t kZlibDeflateDbase[32] = {
-    1,    2,    3,    4,    5,    7,     9,     13,    17,  25,   33,
-    49,   65,   97,   129,  193,  257,   385,   513,   769, 1025, 1537,
-    2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577, 0,   0};
-
-/* Distance codes 0..29 extra */
-static const uint16_t kZlibDeflateDext[32] = {
-    16, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22,
-    23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 64, 64};
-
-/**
- * Builds set of tables to decode the provided canonical Huffman code.
- * The code lengths are lens[0..codes-1]. The result starts at *table,
- * whose indices are 0..2^bits-1. work is a writable array of at least
- * lens shorts, which is used as a work area. type is the type of code
- * to be generated, CODES, LENS, or DISTS. On return, zero is success,
- * -1 is an invalid code, and +1 means that ENOUGH isn't enough. table
- * on return points to the next available entry's address. bits is the
- * requested root table index bits, and on return it is the actual root
- * table index bits. It will differ if the request is greater than the
- * longest code or if it is less than the shortest code.
+const char inflate_copyright[] =
+   " inflate 1.2.12.1 Copyright 1995-2022 Mark Adler ";
+/*
+  If you use the zlib library in a product, an acknowledgment is welcome
+  in the documentation of your product. If for some reason you cannot
+  include such an acknowledgment, I would appreciate that you keep this
+  copyright string in the executable of your product.
  */
-int inflate_table(zcodetype type, uint16_t *lens, unsigned codes,
-                  struct zcode **table, unsigned *bits, uint16_t *work) {
-  unsigned len;                /* a code's length in bits */
-  unsigned sym;                /* index of code symbols */
-  unsigned min, max;           /* minimum and maximum code lengths */
-  unsigned root;               /* number of index bits for root table */
-  unsigned curr;               /* number of index bits for current table */
-  unsigned drop;               /* code bits to drop for sub-table */
-  int left;                    /* number of prefix codes available */
-  unsigned used;               /* code entries in table used */
-  unsigned huff;               /* Huffman code */
-  unsigned incr;               /* for incrementing code, index */
-  unsigned fill;               /* index for replicating entries */
-  unsigned low;                /* low bits for current root entry */
-  unsigned mask;               /* mask for low root bits */
-  struct zcode here;           /* table entry for duplication */
-  struct zcode *next;          /* next available space in table */
-  const uint16_t *base;        /* base value table to use */
-  const uint16_t *extra;       /* extra bits table to use */
-  unsigned match;              /* use base and extra for symbol >= match */
-  uint16_t count[MAXBITS + 1]; /* number of codes of each length */
-  uint16_t offs[MAXBITS + 1];  /* offsets in table for each length */
 
-  /*
-     Process a set of code lengths to create a canonical Huffman code.  The
-     code lengths are lens[0..codes-1].  Each length corresponds to the
-     symbols 0..codes-1.  The Huffman code is generated by first sorting the
-     symbols by length from short to long, and retaining the symbol order
-     for codes with equal lengths.  Then the code starts with all zero bits
-     for the first code of the shortest length, and the codes are integer
-     increments for the same length, and zeros are appended as the length
-     increases.  For the deflate format, these bits are stored backwards
-     from their more natural integer increment ordering, and so when the
-     decoding tables are built in the large loop below, the integer codes
-     are incremented backwards.
+/*
+   Build a set of tables to decode the provided canonical Huffman code.
+   The code lengths are lens[0..codes-1].  The result starts at *table,
+   whose indices are 0..2^bits-1.  work is a writable array of at least
+   lens shorts, which is used as a work area.  type is the type of code
+   to be generated, CODES, LENS, or DISTS.  On return, zero is success,
+   -1 is an invalid code, and +1 means that ENOUGH isn't enough.  table
+   on return points to the next available entry's address.  bits is the
+   requested root table index bits, and on return it is the actual root
+   table index bits.  It will differ if the request is greater than the
+   longest code or if it is less than the shortest code.
+ */
+int ZLIB_INTERNAL inflate_table(type, lens, codes, table, bits, work)
+codetype type;
+unsigned short FAR *lens;
+unsigned codes;
+code FAR * FAR *table;
+unsigned FAR *bits;
+unsigned short FAR *work;
+{
+    unsigned len;               /* a code's length in bits */
+    unsigned sym;               /* index of code symbols */
+    unsigned min, max;          /* minimum and maximum code lengths */
+    unsigned root;              /* number of index bits for root table */
+    unsigned curr;              /* number of index bits for current table */
+    unsigned drop;              /* code bits to drop for sub-table */
+    int left;                   /* number of prefix codes available */
+    unsigned used;              /* code entries in table used */
+    unsigned huff;              /* Huffman code */
+    unsigned incr;              /* for incrementing code, index */
+    unsigned fill;              /* index for replicating entries */
+    unsigned low;               /* low bits for current root entry */
+    unsigned mask;              /* mask for low root bits */
+    code here;                  /* table entry for duplication */
+    code FAR *next;             /* next available space in table */
+    const unsigned short FAR *base;     /* base value table to use */
+    const unsigned short FAR *extra;    /* extra bits table to use */
+    unsigned match;             /* use base and extra for symbol >= match */
+    unsigned short count[MAXBITS+1];    /* number of codes of each length */
+    unsigned short offs[MAXBITS+1];     /* offsets in table for each length */
+    static const unsigned short lbase[31] = { /* Length codes 257..285 base */
+        3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
+        35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
+    static const unsigned short lext[31] = { /* Length codes 257..285 extra */
+        16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18,
+        19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 76, 202};
+    static const unsigned short dbase[32] = { /* Distance codes 0..29 base */
+        1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
+        257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
+        8193, 12289, 16385, 24577, 0, 0};
+    static const unsigned short dext[32] = { /* Distance codes 0..29 extra */
+        16, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22,
+        23, 23, 24, 24, 25, 25, 26, 26, 27, 27,
+        28, 28, 29, 29, 64, 64};
 
-     This routine assumes, but does not check, that all of the entries in
-     lens[] are in the range 0..MAXBITS.  The caller must assure this.
-     1..MAXBITS is interpreted as that code length.  zero means that that
-     symbol does not occur in this code.
+    /*
+       Process a set of code lengths to create a canonical Huffman code.  The
+       code lengths are lens[0..codes-1].  Each length corresponds to the
+       symbols 0..codes-1.  The Huffman code is generated by first sorting the
+       symbols by length from short to long, and retaining the symbol order
+       for codes with equal lengths.  Then the code starts with all zero bits
+       for the first code of the shortest length, and the codes are integer
+       increments for the same length, and zeros are appended as the length
+       increases.  For the deflate format, these bits are stored backwards
+       from their more natural integer increment ordering, and so when the
+       decoding tables are built in the large loop below, the integer codes
+       are incremented backwards.
 
-     The codes are sorted by computing a count of codes for each length,
-     creating from that a table of starting indices for each length in the
-     sorted table, and then entering the symbols in order in the sorted
-     table.  The sorted table is work[], with that space being provided by
-     the caller.
+       This routine assumes, but does not check, that all of the entries in
+       lens[] are in the range 0..MAXBITS.  The caller must assure this.
+       1..MAXBITS is interpreted as that code length.  zero means that that
+       symbol does not occur in this code.
 
-     The length counts are used for other purposes as well, i.e. finding
-     the minimum and maximum length codes, determining if there are any
-     codes at all, checking for a valid set of lengths, and looking ahead
-     at length counts to determine sub-table sizes when building the
-     decoding tables.
-   */
+       The codes are sorted by computing a count of codes for each length,
+       creating from that a table of starting indices for each length in the
+       sorted table, and then entering the symbols in order in the sorted
+       table.  The sorted table is work[], with that space being provided by
+       the caller.
 
-  /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */
-  for (len = 0; len <= MAXBITS; len++) count[len] = 0;
-  for (sym = 0; sym < codes; sym++) count[lens[sym]]++;
+       The length counts are used for other purposes as well, i.e. finding
+       the minimum and maximum length codes, determining if there are any
+       codes at all, checking for a valid set of lengths, and looking ahead
+       at length counts to determine sub-table sizes when building the
+       decoding tables.
+     */
 
-  /* bound code lengths, force root to be within code lengths */
-  root = *bits;
-  for (max = MAXBITS; max >= 1; max--) {
-    if (count[max] != 0) break;
-  }
-  if (root > max) root = max;
-  if (max == 0) {          /* no symbols to code at all */
-    here.op = (uint8_t)64; /* invalid code marker */
-    here.bits = (uint8_t)1;
-    here.val = (uint16_t)0;
-    *(*table)++ = here; /* make a table to force an error */
-    *(*table)++ = here;
-    *bits = 1;
-    return 0; /* no symbols, but wait for decoding to report error */
-  }
-  for (min = 1; min < max; min++)
-    if (count[min] != 0) break;
-  if (root < min) root = min;
+    /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */
+    for (len = 0; len <= MAXBITS; len++)
+        count[len] = 0;
+    for (sym = 0; sym < codes; sym++)
+        count[lens[sym]]++;
 
-  /* check for an over-subscribed or incomplete set of lengths */
-  left = 1;
-  for (len = 1; len <= MAXBITS; len++) {
-    left <<= 1;
-    left -= count[len];
-    if (left < 0) return -1; /* over-subscribed */
-  }
-  if (left > 0 && (type == CODES || max != 1)) return -1; /* incomplete set */
-
-  /* generate offsets into symbol table for each length for sorting */
-  offs[1] = 0;
-  for (len = 1; len < MAXBITS; len++) {
-    offs[len + 1] = offs[len] + count[len];
-  }
-
-  /* sort symbols by length, by symbol order within each length */
-  for (sym = 0; sym < codes; sym++) {
-    if (lens[sym] != 0) work[offs[lens[sym]]++] = (uint16_t)sym;
-  }
-
-  /*
-    Create and fill in decoding tables. In this loop, the table being
-    filled is at next and has curr index bits. The code being used is
-    huff with length len. That code is converted to an index by dropping
-    drop bits off of the bottom. For codes where len is less than drop +
-    curr, those top drop + curr - len bits are incremented through all
-    values to fill the table with replicated entries.
-
-    root is the number of index bits for the root table. When len
-    exceeds root, sub-tables are created pointed to by the root entry
-    with an index of the low root bits of huff. This is saved in low to
-    check for when a new sub-table should be started. drop is zero when
-    the root table is being filled, and drop is root when sub-tables are
-    being filled.
-
-    When a new sub-table is needed, it is necessary to look ahead in the
-    code lengths to determine what size sub-table is needed. The length
-    counts are used for this, and so count[] is decremented as codes are
-    entered in the tables.
-
-    used keeps track of how many table entries have been allocated from
-    the provided *table space. It is checked for LENS and DIST tables
-    against the constants ENOUGH_LENS and ENOUGH_DISTS to guard against
-    changes in the initial root table size constants. See the comments
-    in inftrees.h for more information.
-
-    sym increments through all symbols, and the loop terminates when all
-    codes of length max, i.e. all codes, have been processed. This
-    routine permits incomplete codes, so another loop after this one
-    fills in the rest of the decoding tables with invalid code markers.
-  */
-
-  /* set up for code type */
-  switch (type) {
-    case CODES:
-      base = extra = work; /* dummy value--not used */
-      match = 20;
-      break;
-    case LENS:
-      base = kZlibDeflateLbase;
-      extra = kZlibDeflateLext;
-      match = 257;
-      break;
-    default: /* DISTS */
-      base = kZlibDeflateDbase;
-      extra = kZlibDeflateDext;
-      match = 0;
-  }
-
-  /* initialize state for loop */
-  huff = 0;             /* starting code */
-  sym = 0;              /* starting code symbol */
-  len = min;            /* starting code length */
-  next = *table;        /* current table to fill in */
-  curr = root;          /* current table index bits */
-  drop = 0;             /* current bits to drop from code for index */
-  low = (unsigned)(-1); /* trigger new sub-table when len > root */
-  used = 1U << root;    /* use root table entries */
-  mask = used - 1;      /* mask for comparing low */
-
-  /* check available table space */
-  if ((type == LENS && used > ENOUGH_LENS) ||
-      (type == DISTS && used > ENOUGH_DISTS)) {
-    return 1;
-  }
-
-  /* process all codes and make table entries */
-  for (;;) {
-    /* create table entry */
-    here.bits = (uint8_t)(len - drop);
-    if (work[sym] + 1U < match) {
-      here.op = (uint8_t)0;
-      here.val = work[sym];
-    } else if (work[sym] >= match) {
-      here.op = (uint8_t)(extra[work[sym] - match]);
-      here.val = base[work[sym] - match];
-    } else {
-      here.op = (uint8_t)(32 + 64); /* end of block */
-      here.val = 0;
+    /* bound code lengths, force root to be within code lengths */
+    root = *bits;
+    for (max = MAXBITS; max >= 1; max--)
+        if (count[max] != 0) break;
+    if (root > max) root = max;
+    if (max == 0) {                     /* no symbols to code at all */
+        here.op = (unsigned char)64;    /* invalid code marker */
+        here.bits = (unsigned char)1;
+        here.val = (unsigned short)0;
+        *(*table)++ = here;             /* make a table to force an error */
+        *(*table)++ = here;
+        *bits = 1;
+        return 0;     /* no symbols, but wait for decoding to report error */
     }
+    for (min = 1; min < max; min++)
+        if (count[min] != 0) break;
+    if (root < min) root = min;
 
-    /* replicate for those indices with low len bits equal to huff */
-    incr = 1U << (len - drop);
-    fill = 1U << curr;
-    min = fill; /* save offset to next table */
-    do {
-      fill -= incr;
-      next[(huff >> drop) + fill] = here;
-    } while (fill != 0);
-
-    /* backwards increment the len-bit code huff */
-    incr = 1U << (len - 1);
-    while (huff & incr) incr >>= 1;
-    if (incr != 0) {
-      huff &= incr - 1;
-      huff += incr;
-    } else
-      huff = 0;
-
-    /* go to next symbol, update count, len */
-    sym++;
-    if (--(count[len]) == 0) {
-      if (len == max) break;
-      len = lens[work[sym]];
-    }
-
-    /* create new sub-table if needed */
-    if (len > root && (huff & mask) != low) {
-      /* if first time, transition to sub-tables */
-      if (drop == 0) drop = root;
-
-      /* increment past last table */
-      next += min; /* here min is 1 << curr */
-
-      /* determine length of next table */
-      curr = len - drop;
-      left = (int)(1u << curr);
-      while (curr + drop < max) {
-        left -= count[curr + drop];
-        if (left <= 0) break;
-        curr++;
+    /* check for an over-subscribed or incomplete set of lengths */
+    left = 1;
+    for (len = 1; len <= MAXBITS; len++) {
         left <<= 1;
-      }
-
-      /* check for enough space */
-      used += 1U << curr;
-      if ((type == LENS && used > ENOUGH_LENS) ||
-          (type == DISTS && used > ENOUGH_DISTS)) {
-        return 1;
-      }
-
-      /* point entry in root table to sub-table */
-      low = huff & mask;
-      (*table)[low].op = (uint8_t)curr;
-      (*table)[low].bits = (uint8_t)root;
-      (*table)[low].val = (uint16_t)(next - *table);
+        left -= count[len];
+        if (left < 0) return -1;        /* over-subscribed */
     }
-  }
+    if (left > 0 && (type == CODES || max != 1))
+        return -1;                      /* incomplete set */
 
-  /* fill in remaining table entry if code is incomplete (guaranteed to have
-     at most one remaining entry, since if the code is incomplete, the
-     maximum code length that was allowed to get this far is one bit) */
-  if (huff != 0) {
-    here.op = (uint8_t)64; /* invalid code marker */
-    here.bits = (uint8_t)(len - drop);
-    here.val = (uint16_t)0;
-    next[huff] = here;
-  }
+    /* generate offsets into symbol table for each length for sorting */
+    offs[1] = 0;
+    for (len = 1; len < MAXBITS; len++)
+        offs[len + 1] = offs[len] + count[len];
 
-  /* set return parameters */
-  *table += used;
-  *bits = root;
-  return 0;
+    /* sort symbols by length, by symbol order within each length */
+    for (sym = 0; sym < codes; sym++)
+        if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym;
+
+    /*
+       Create and fill in decoding tables.  In this loop, the table being
+       filled is at next and has curr index bits.  The code being used is huff
+       with length len.  That code is converted to an index by dropping drop
+       bits off of the bottom.  For codes where len is less than drop + curr,
+       those top drop + curr - len bits are incremented through all values to
+       fill the table with replicated entries.
+
+       root is the number of index bits for the root table.  When len exceeds
+       root, sub-tables are created pointed to by the root entry with an index
+       of the low root bits of huff.  This is saved in low to check for when a
+       new sub-table should be started.  drop is zero when the root table is
+       being filled, and drop is root when sub-tables are being filled.
+
+       When a new sub-table is needed, it is necessary to look ahead in the
+       code lengths to determine what size sub-table is needed.  The length
+       counts are used for this, and so count[] is decremented as codes are
+       entered in the tables.
+
+       used keeps track of how many table entries have been allocated from the
+       provided *table space.  It is checked for LENS and DIST tables against
+       the constants ENOUGH_LENS and ENOUGH_DISTS to guard against changes in
+       the initial root table size constants.  See the comments in inftrees.h
+       for more information.
+
+       sym increments through all symbols, and the loop terminates when
+       all codes of length max, i.e. all codes, have been processed.  This
+       routine permits incomplete codes, so another loop after this one fills
+       in the rest of the decoding tables with invalid code markers.
+     */
+
+    /* set up for code type */
+    switch (type) {
+    case CODES:
+        base = extra = work;    /* dummy value--not used */
+        match = 20;
+        break;
+    case LENS:
+        base = lbase;
+        extra = lext;
+        match = 257;
+        break;
+    default:    /* DISTS */
+        base = dbase;
+        extra = dext;
+        match = 0;
+    }
+
+    /* initialize state for loop */
+    huff = 0;                   /* starting code */
+    sym = 0;                    /* starting code symbol */
+    len = min;                  /* starting code length */
+    next = *table;              /* current table to fill in */
+    curr = root;                /* current table index bits */
+    drop = 0;                   /* current bits to drop from code for index */
+    low = (unsigned)(-1);       /* trigger new sub-table when len > root */
+    used = 1U << root;          /* use root table entries */
+    mask = used - 1;            /* mask for comparing low */
+
+    /* check available table space */
+    if ((type == LENS && used > ENOUGH_LENS) ||
+        (type == DISTS && used > ENOUGH_DISTS))
+        return 1;
+
+    /* process all codes and make table entries */
+    for (;;) {
+        /* create table entry */
+        here.bits = (unsigned char)(len - drop);
+        if (work[sym] + 1U < match) {
+            here.op = (unsigned char)0;
+            here.val = work[sym];
+        }
+        else if (work[sym] >= match) {
+            here.op = (unsigned char)(extra[work[sym] - match]);
+            here.val = base[work[sym] - match];
+        }
+        else {
+            here.op = (unsigned char)(32 + 64);         /* end of block */
+            here.val = 0;
+        }
+
+        /* replicate for those indices with low len bits equal to huff */
+        incr = 1U << (len - drop);
+        fill = 1U << curr;
+        min = fill;                 /* save offset to next table */
+        do {
+            fill -= incr;
+            next[(huff >> drop) + fill] = here;
+        } while (fill != 0);
+
+        /* backwards increment the len-bit code huff */
+        incr = 1U << (len - 1);
+        while (huff & incr)
+            incr >>= 1;
+        if (incr != 0) {
+            huff &= incr - 1;
+            huff += incr;
+        }
+        else
+            huff = 0;
+
+        /* go to next symbol, update count, len */
+        sym++;
+        if (--(count[len]) == 0) {
+            if (len == max) break;
+            len = lens[work[sym]];
+        }
+
+        /* create new sub-table if needed */
+        if (len > root && (huff & mask) != low) {
+            /* if first time, transition to sub-tables */
+            if (drop == 0)
+                drop = root;
+
+            /* increment past last table */
+            next += min;            /* here min is 1 << curr */
+
+            /* determine length of next table */
+            curr = len - drop;
+            left = (int)(1 << curr);
+            while (curr + drop < max) {
+                left -= count[curr + drop];
+                if (left <= 0) break;
+                curr++;
+                left <<= 1;
+            }
+
+            /* check for enough space */
+            used += 1U << curr;
+            if ((type == LENS && used > ENOUGH_LENS) ||
+                (type == DISTS && used > ENOUGH_DISTS))
+                return 1;
+
+            /* point entry in root table to sub-table */
+            low = huff & mask;
+            (*table)[low].op = (unsigned char)curr;
+            (*table)[low].bits = (unsigned char)root;
+            (*table)[low].val = (unsigned short)(next - *table);
+        }
+    }
+
+    /* fill in remaining table entry if code is incomplete (guaranteed to have
+       at most one remaining entry, since if the code is incomplete, the
+       maximum code length that was allowed to get this far is one bit) */
+    if (huff != 0) {
+        here.op = (unsigned char)64;            /* invalid code marker */
+        here.bits = (unsigned char)(len - drop);
+        here.val = (unsigned short)0;
+        next[huff] = here;
+    }
+
+    /* set return parameters */
+    *table += used;
+    *bits = root;
+    return 0;
 }
diff --git a/third_party/zlib/inftrees.internal.h b/third_party/zlib/inftrees.internal.h
index ea02b23d6..93a4634bb 100644
--- a/third_party/zlib/inftrees.internal.h
+++ b/third_party/zlib/inftrees.internal.h
@@ -1,29 +1,21 @@
-#ifndef COSMOPOLITAN_THIRD_PARTY_ZLIB_INFTREES_H_
-#define COSMOPOLITAN_THIRD_PARTY_ZLIB_INFTREES_H_
+#ifndef COSMOPOLITAN_THIRD_PARTY_ZLIB_INFTREES_INTERNAL_H_
+#define COSMOPOLITAN_THIRD_PARTY_ZLIB_INFTREES_INTERNAL_H_
+#include "third_party/zlib/macros.internal.h"
+#include "third_party/zlib/zutil.internal.h"
+#if !(__ASSEMBLER__ + __LINKER__ + 0)
+COSMOPOLITAN_C_START_
+/* clang-format off */
+
+/* inftrees.h -- header to use inftrees.c
+ * Copyright (C) 1995-2005, 2010 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
 
 /* WARNING: this file should *not* be used by applications. It is
    part of the implementation of the compression library and is
    subject to change. Applications should only use zlib.h.
  */
 
-/* Maximum size of the dynamic table.  The maximum number of code structures is
-   1444, which is the sum of 852 for literal/length codes and 592 for distance
-   codes.  These values were found by exhaustive searches using the program
-   examples/enough.c found in the zlib distribtution.  The arguments to that
-   program are the number of symbols, the initial root table size, and the
-   maximum bit length of a code.  "enough 286 9 15" for literal/length codes
-   returns returns 852, and "enough 30 6 15" for distance codes returns 592.
-   The initial root table size (9 or 6) is found in the fifth argument of the
-   inflate_table() calls in inflate.c and infback.c.  If the root table size is
-   changed, then these maximum sizes would be need to be recalculated and
-   updated. */
-#define ENOUGH_LENS 852
-#define ENOUGH_DISTS 592
-#define ENOUGH (ENOUGH_LENS + ENOUGH_DISTS)
-
-#if !(__ASSEMBLER__ + __LINKER__ + 0)
-COSMOPOLITAN_C_START_
-
 /* Structure for decoding tables.  Each entry provides either the
    information needed to do the operation requested by the code that
    indexed that table entry, or it provides a pointer to another
@@ -37,14 +29,11 @@ COSMOPOLITAN_C_START_
    of the bit buffer.  val is the actual byte to output in the case
    of a literal, the base length or distance, or the offset from
    the current table to the next table.  Each entry is four bytes. */
-struct zcode {
-  unsigned char op;   /* operation, extra bits, table bits */
-  unsigned char bits; /* bits in this part of the code */
-  unsigned short val; /* offset in table or code value */
-};
-
-extern const struct zcode kZlibLenfix[512] hidden;
-extern const struct zcode kZlibDistfix[32] hidden;
+typedef struct {
+    unsigned char op;           /* operation, extra bits, table bits */
+    unsigned char bits;         /* bits in this part of the code */
+    unsigned short val;         /* offset in table or code value */
+} code;
 
 /* op values as set by inflate_table():
     00000000 - literal
@@ -54,13 +43,32 @@ extern const struct zcode kZlibDistfix[32] hidden;
     01000000 - invalid code
  */
 
-/* Type of code to build for inflate_table() */
-typedef enum { CODES, LENS, DISTS } zcodetype;
+/* Maximum size of the dynamic table.  The maximum number of code structures is
+   1924, which is the sum of 1332 for literal/length codes and 592 for distance
+   codes.  These values were found by exhaustive searches using the program
+   examples/enough.c found in the zlib distribution.  The arguments to that
+   program are the number of symbols, the initial root table size, and the
+   maximum bit length of a code.  "enough 286 10 15" for literal/length codes
+   returns returns 1332, and "enough 30 9 15" for distance codes returns 592.
+   The initial root table size (10 or 9) is found in the fifth argument of the
+   inflate_table() calls in inflate.c and infback.c.  If the root table size is
+   changed, then these maximum sizes would be need to be recalculated and
+   updated. */
+#define ENOUGH_LENS 1332
+#define ENOUGH_DISTS 592
+#define ENOUGH (ENOUGH_LENS+ENOUGH_DISTS)
 
-int inflate_table(zcodetype type, unsigned short *lens, unsigned codes,
-                  struct zcode **table, unsigned *bits,
-                  unsigned short *work) hidden;
+/* Type of code to build for inflate_table() */
+typedef enum {
+    CODES,
+    LENS,
+    DISTS
+} codetype;
+
+int ZLIB_INTERNAL inflate_table OF((codetype type, unsigned short FAR *lens,
+                             unsigned codes, code FAR * FAR *table,
+                             unsigned FAR *bits, unsigned short FAR *work));
 
 COSMOPOLITAN_C_END_
 #endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
-#endif /* COSMOPOLITAN_THIRD_PARTY_ZLIB_INFTREES_H_ */
+#endif /* COSMOPOLITAN_THIRD_PARTY_ZLIB_INFTREES_INTERNAL_H_ */
diff --git a/third_party/zlib/insert_string.internal.h b/third_party/zlib/insert_string.internal.h
new file mode 100644
index 000000000..b76fa803a
--- /dev/null
+++ b/third_party/zlib/insert_string.internal.h
@@ -0,0 +1,141 @@
+#ifndef COSMOPOLITAN_THIRD_PARTY_ZLIB_INSERT_STRING_H_
+#define COSMOPOLITAN_THIRD_PARTY_ZLIB_INSERT_STRING_H_
+#include "third_party/zlib/deflate.internal.h"
+#include "third_party/zlib/zutil.internal.h"
+#if !(__ASSEMBLER__ + __LINKER__ + 0)
+COSMOPOLITAN_C_START_
+/* clang-format off */
+
+#ifndef INLINE
+#if defined(_MSC_VER) && !defined(__clang__)
+#define INLINE __inline
+#else
+#define INLINE inline
+#endif
+#endif
+
+// clang-format off
+#if defined(CRC32_SIMD_SSE42_PCLMUL)
+  #include <smmintrin.h>  /* Required to make MSVC bot build pass. */
+
+  #if defined(__clang__) || defined(__GNUC__)
+    #define TARGET_CPU_WITH_CRC __attribute__((target("sse4.2")))
+  #else
+    #define TARGET_CPU_WITH_CRC
+  #endif
+
+  /* CRC32C uint32_t */
+  #define _cpu_crc32c_hash_u32 _mm_crc32_u32
+
+#elif defined(CRC32_ARMV8_CRC32)
+  #if defined(__clang__)
+    #define __crc32cw __builtin_arm_crc32cw
+  #elif defined(__GNUC__)
+    #define __crc32cw __builtin_aarch64_crc32cw
+  #endif
+
+  #if defined(__aarch64__) && defined(__clang__)
+    #define TARGET_CPU_WITH_CRC __attribute__((target("crc")))
+  #elif defined(__aarch64__) && defined(__GNUC__)
+    #define TARGET_CPU_WITH_CRC __attribute__((target("+crc")))
+  #elif defined(__clang__) // !defined(__aarch64__)
+    #define TARGET_CPU_WITH_CRC __attribute__((target("armv8-a,crc")))
+  #endif  // defined(__aarch64__)
+
+  /* CRC32C uint32_t */
+  #define _cpu_crc32c_hash_u32 __crc32cw
+
+#endif
+// clang-format on
+
+#if defined(TARGET_CPU_WITH_CRC)
+
+TARGET_CPU_WITH_CRC
+local INLINE Pos insert_string_simd(deflate_state* const s, const Pos str) {
+  Pos ret;
+  unsigned *ip, val, h = 0;
+
+  ip = (unsigned*)&s->window[str];
+  val = *ip;
+
+  if (s->level >= 6) val &= 0xFFFFFF;
+
+  /* Compute hash from the CRC32C of |val|. */
+  h = _cpu_crc32c_hash_u32(h, val);
+
+  ret = s->head[h & s->hash_mask];
+  s->head[h & s->hash_mask] = str;
+  s->prev[str & s->w_mask] = ret;
+  return ret;
+}
+
+#endif  // TARGET_CPU_WITH_CRC
+
+/**
+ * Some applications need to match zlib DEFLATE output exactly [3]. Use the
+ * canonical zlib Rabin-Karp rolling hash [1,2] in that case.
+ *
+ *  [1] For a description of the Rabin and Karp algorithm, see "Algorithms"
+ *      book by R. Sedgewick, Addison-Wesley, p252.
+ *  [2] https://www.euccas.me/zlib/#zlib_rabin_karp and also "rolling hash"
+ *      https://en.wikipedia.org/wiki/Rolling_hash
+ *  [3] crbug.com/1316541 AOSP incremental client APK package OTA upgrades.
+ */
+#ifdef CHROMIUM_ZLIB_NO_CASTAGNOLI
+#define USE_ZLIB_RABIN_KARP_ROLLING_HASH
+#endif
+
+/* ===========================================================================
+ * Update a hash value with the given input byte (Rabin-Karp rolling hash).
+ * IN  assertion: all calls to UPDATE_HASH are made with consecutive input
+ *    characters, so that a running hash key can be computed from the previous
+ *    key instead of complete recalculation each time.
+ */
+#define UPDATE_HASH(s, h, c) (h = (((h) << s->hash_shift) ^ (c)) & s->hash_mask)
+
+/* ===========================================================================
+ * Insert string str in the dictionary and set match_head to the previous head
+ * of the hash chain (the most recent string with same hash key). Return
+ * the previous length of the hash chain.
+ * If this file is compiled with -DFASTEST, the compression level is forced
+ * to 1, and no hash chains are maintained.
+ * IN  assertion: all calls to INSERT_STRING are made with consecutive input
+ *    characters and the first MIN_MATCH bytes of str are valid (except for
+ *    the last MIN_MATCH-1 bytes of the input file).
+ */
+local INLINE Pos insert_string_c(deflate_state* const s, const Pos str) {
+  Pos ret;
+
+  UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH - 1)]);
+#ifdef FASTEST
+  ret = s->head[s->ins_h];
+#else
+  ret = s->prev[str & s->w_mask] = s->head[s->ins_h];
+#endif
+  s->head[s->ins_h] = str;
+
+  return ret;
+}
+
+local INLINE Pos insert_string(deflate_state* const s, const Pos str) {
+/* insert_string_simd string dictionary insertion: SIMD crc32c symbol hasher
+ * significantly improves data compression speed.
+ *
+ * Note: the generated compressed output is a valid DEFLATE stream, but will
+ * differ from canonical zlib output.
+ */
+#if defined(USE_ZLIB_RABIN_KARP_ROLLING_HASH)
+/* So this build-time option can be used to disable the crc32c hash, and use
+ * the Rabin-Karp hash instead.
+ */ /* FALLTHROUGH Rabin-Karp */
+#elif defined(TARGET_CPU_WITH_CRC) && defined(CRC32_SIMD_SSE42_PCLMUL)
+  if (x86_cpu_enable_simd) return insert_string_simd(s, str);
+#elif defined(TARGET_CPU_WITH_CRC) && defined(CRC32_ARMV8_CRC32)
+  if (arm_cpu_enable_crc32) return insert_string_simd(s, str);
+#endif
+  return insert_string_c(s, str); /* Rabin-Karp */
+}
+
+COSMOPOLITAN_C_END_
+#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
+#endif /* COSMOPOLITAN_THIRD_PARTY_ZLIB_INSERT_STRING_H_ */
diff --git a/third_party/zlib/internal.h b/third_party/zlib/internal.h
index 2ef2a6727..f97b9fa6a 100644
--- a/third_party/zlib/internal.h
+++ b/third_party/zlib/internal.h
@@ -1,5 +1,6 @@
 #ifndef COSMOPOLITAN_THIRD_PARTY_ZLIB_INTERNAL_H_
 #define COSMOPOLITAN_THIRD_PARTY_ZLIB_INTERNAL_H_
+#include "libc/str/str.h"
 #include "third_party/zlib/deflate.internal.h"
 
 #define Z_CRC32_SSE42_MINIMUM_LENGTH 64
@@ -8,16 +9,20 @@
 #if !(__ASSEMBLER__ + __LINKER__ + 0)
 COSMOPOLITAN_C_START_
 
+#define zmemzero bzero
+#define zmemcpy  memmove
+#define z_const  const
+
 unsigned deflate_read_buf(z_streamp, Bytef *, unsigned) hidden;
 void copy_with_crc(z_streamp, Bytef *, long) hidden;
-void crc_finalize(struct DeflateState *const) hidden;
-void crc_reset(struct DeflateState *const) hidden;
+void crc_finalize(deflate_state *const) hidden;
+void crc_reset(deflate_state *const) hidden;
 uint32_t adler32_simd_(uint32_t, const unsigned char *, size_t) hidden;
-void crc_fold_init(struct DeflateState *const) hidden;
-void crc_fold_copy(struct DeflateState *const, unsigned char *,
-                   const unsigned char *, long) hidden;
-unsigned crc_fold_512to32(struct DeflateState *const) hidden;
-void fill_window_sse(struct DeflateState *) hidden;
+void crc_fold_init(deflate_state *const) hidden;
+void crc_fold_copy(deflate_state *const, unsigned char *, const unsigned char *,
+                   long) hidden;
+unsigned crc_fold_512to32(deflate_state *const) hidden;
+void fill_window_sse(deflate_state *) hidden;
 void *zcalloc(void *, uInt, uInt) hidden;
 void zcfree(void *, void *) hidden;
 
diff --git a/third_party/zlib/kdistcode.S b/third_party/zlib/kdistcode.S
deleted file mode 100644
index 16508dfa8..000000000
--- a/third_party/zlib/kdistcode.S
+++ /dev/null
@@ -1,67 +0,0 @@
-/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8     -*-│
-│vi: set et ft=asm ts=8 tw=8 fenc=utf-8                                     :vi│
-╞══════════════════════════════════════════════════════════════════════════════╡
-│ Copyright 2020 Justine Alexandra Roberts Tunney                              │
-│                                                                              │
-│ Permission to use, copy, modify, and/or distribute this software for         │
-│ any purpose with or without fee is hereby granted, provided that the         │
-│ above copyright notice and this permission notice appear in all copies.      │
-│                                                                              │
-│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL                │
-│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED                │
-│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE             │
-│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL         │
-│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR        │
-│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER               │
-│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR             │
-│ PERFORMANCE OF THIS SOFTWARE.                                                │
-╚─────────────────────────────────────────────────────────────────────────────*/
-#include "third_party/zlib/deflate.internal.h"
-#include "libc/macros.internal.h"
-
-	.initbss 300,_init_kZlibDistCode
-kZlibDistCode:
-	.zero	DIST_CODE_LEN
-	.endobj	kZlibDistCode,globl,hidden
-	.previous
-
-	.initro 300,_init_kZlibDistCode
-.LkZlibDistCode.rodata:		/* 64 bytes (13%) */
-	.byte	1,0x00		/* 00-00 ∅   */
-	.byte	1,0x01		/* 01-01 ☺   */
-	.byte	1,0x02		/* 02-02 ☻   */
-	.byte	1,0x03		/* 03-03 ♥   */
-	.byte	2,0x04		/* 04-05 ♦-♣ */
-	.byte	2,0x05		/* 06-07 ♠-• */
-	.byte	4,0x06		/* 08-0b ◘-♂ */
-	.byte	4,0x07		/* 0c-0f ♀-☼ */
-	.byte	8,0x08		/* 10-17 ►-↨ */
-	.byte	8,0x09		/* 18-1f ↑-▼ */
-	.byte	16,0x0a		/* 20-2f  -/ */
-	.byte	16,0x0b		/* 30-3f 0-? */
-	.byte	32,0x0c		/* 40-5f @-_ */
-	.byte	32,0x0d		/* 60-7f `-⌂ */
-	.byte	64,0x0e		/* 80-bf Ç-┐ */
-	.byte	64,0x0f		/* c0-ff └-λ */
-	.byte	2,0x00		/* 100-101   */
-	.byte	1,0x10		/* 102-102   */
-	.byte	1,0x11		/* 103-103   */
-	.byte	2,0x12		/* 104-105   */
-	.byte	2,0x13		/* 106-107   */
-	.byte	4,0x14		/* 108-10b   */
-	.byte	4,0x15		/* 10c-10f   */
-	.byte	8,0x16		/* 110-117   */
-	.byte	8,0x17		/* 118-11f   */
-	.byte	16,0x18		/* 120-12f   */
-	.byte	16,0x19		/* 130-13f   */
-	.byte	32,0x1a		/* 140-15f   */
-	.byte	32,0x1b		/* 160-17f   */
-	.byte	64,0x1c		/* 180-1bf   */
-	.byte	64,0x1d		/* 1c0-1ff   */
-	.endobj	.LkZlibDistCode.rodata
-	.byte	0,0	/* terminator */
-	.previous
-
-	.init.start 300,_init_kZlibDistCode
-	call	rldecode
-	.init.end 300,_init_kZlibDistCode
diff --git a/third_party/zlib/klengthcode.S b/third_party/zlib/klengthcode.S
deleted file mode 100644
index 436e23833..000000000
--- a/third_party/zlib/klengthcode.S
+++ /dev/null
@@ -1,67 +0,0 @@
-/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8     -*-│
-│vi: set et ft=asm ts=8 tw=8 fenc=utf-8                                     :vi│
-╞══════════════════════════════════════════════════════════════════════════════╡
-│ Copyright 2020 Justine Alexandra Roberts Tunney                              │
-│                                                                              │
-│ Permission to use, copy, modify, and/or distribute this software for         │
-│ any purpose with or without fee is hereby granted, provided that the         │
-│ above copyright notice and this permission notice appear in all copies.      │
-│                                                                              │
-│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL                │
-│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED                │
-│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE             │
-│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL         │
-│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR        │
-│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER               │
-│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR             │
-│ PERFORMANCE OF THIS SOFTWARE.                                                │
-╚─────────────────────────────────────────────────────────────────────────────*/
-#include "third_party/zlib/zutil.internal.h"
-#include "libc/macros.internal.h"
-
-	.initbss 300,_init_kZlibLengthCode
-kZlibLengthCode:
-	.zero	MAX_MATCH - MIN_MATCH + 1
-	.endobj	kZlibLengthCode,globl,hidden
-	.previous
-
-	.initro 300,_init_kZlibLengthCode
-.LkZlibLengthCode.rodata:	/* 64 bytes (25%) */
-	.byte	1,0x00		/* 00-00 ∅   */
-	.byte	1,0x01		/* 01-01 ☺   */
-	.byte	1,0x02		/* 02-02 ☻   */
-	.byte	1,0x03		/* 03-03 ♥   */
-	.byte	1,0x04		/* 04-04 ♦   */
-	.byte	1,0x05		/* 05-05 ♣   */
-	.byte	1,0x06		/* 06-06 ♠   */
-	.byte	1,0x07		/* 07-07 •   */
-	.byte	2,0x08		/* 08-09 ◘-○ */
-	.byte	2,0x09		/* 0a-0b ◙-♂ */
-	.byte	2,0x0a		/* 0c-0d ♀-♪ */
-	.byte	2,0x0b		/* 0e-0f ♫-☼ */
-	.byte	4,0x0c		/* 10-13 ►-‼ */
-	.byte	4,0x0d		/* 14-17 ¶-↨ */
-	.byte	4,0x0e		/* 18-1b ↑-← */
-	.byte	4,0x0f		/* 1c-1f ∟-▼ */
-	.byte	8,0x10		/* 20-27  -' */
-	.byte	8,0x11		/* 28-2f (-/ */
-	.byte	8,0x12		/* 30-37 0-7 */
-	.byte	8,0x13		/* 38-3f 8-? */
-	.byte	16,0x14		/* 40-4f @-O */
-	.byte	16,0x15		/* 50-5f P-_ */
-	.byte	16,0x16		/* 60-6f `-o */
-	.byte	16,0x17		/* 70-7f p-⌂ */
-	.byte	32,0x18		/* 80-9f Ç-ƒ */
-	.byte	32,0x19		/* a0-bf á-┐ */
-	.byte	32,0x1a		/* c0-df └-▀ */
-	.byte	31,0x1b		/* e0-fe α-■ */
-	.byte	1,0x1c		/* ff-ff λ   */
-	.endobj	.LkZlibLengthCode.rodata
-	.byte	0,0		/* terminator */
-	.byte	0,0,0,0		/* padding */
-	.previous
-
-	.init.start 300,_init_kZlibLengthCode
-	call	rldecode
-	lodsl
-	.init.end 300,_init_kZlibLengthCode
diff --git a/third_party/zlib/kstaticdtree.c b/third_party/zlib/kstaticdtree.c
deleted file mode 100644
index 390d9ef76..000000000
--- a/third_party/zlib/kstaticdtree.c
+++ /dev/null
@@ -1,10 +0,0 @@
-#include "third_party/zlib/internal.h"
-
-hidden const ct_data kZlibStaticDtree[D_CODES] = {
-    {{0}, {5}},  {{16}, {5}}, {{8}, {5}},  {{24}, {5}}, {{4}, {5}},
-    {{20}, {5}}, {{12}, {5}}, {{28}, {5}}, {{2}, {5}},  {{18}, {5}},
-    {{10}, {5}}, {{26}, {5}}, {{6}, {5}},  {{22}, {5}}, {{14}, {5}},
-    {{30}, {5}}, {{1}, {5}},  {{17}, {5}}, {{9}, {5}},  {{25}, {5}},
-    {{5}, {5}},  {{21}, {5}}, {{13}, {5}}, {{29}, {5}}, {{3}, {5}},
-    {{19}, {5}}, {{11}, {5}}, {{27}, {5}}, {{7}, {5}},  {{23}, {5}},
-};
diff --git a/third_party/zlib/kstaticltree.c b/third_party/zlib/kstaticltree.c
deleted file mode 100644
index f5f6ec104..000000000
--- a/third_party/zlib/kstaticltree.c
+++ /dev/null
@@ -1,62 +0,0 @@
-#include "third_party/zlib/internal.h"
-
-hidden const ct_data kZlibStaticLtree[L_CODES + 2] = {
-    {{12}, {8}},  {{140}, {8}}, {{76}, {8}},  {{204}, {8}}, {{44}, {8}},
-    {{172}, {8}}, {{108}, {8}}, {{236}, {8}}, {{28}, {8}},  {{156}, {8}},
-    {{92}, {8}},  {{220}, {8}}, {{60}, {8}},  {{188}, {8}}, {{124}, {8}},
-    {{252}, {8}}, {{2}, {8}},   {{130}, {8}}, {{66}, {8}},  {{194}, {8}},
-    {{34}, {8}},  {{162}, {8}}, {{98}, {8}},  {{226}, {8}}, {{18}, {8}},
-    {{146}, {8}}, {{82}, {8}},  {{210}, {8}}, {{50}, {8}},  {{178}, {8}},
-    {{114}, {8}}, {{242}, {8}}, {{10}, {8}},  {{138}, {8}}, {{74}, {8}},
-    {{202}, {8}}, {{42}, {8}},  {{170}, {8}}, {{106}, {8}}, {{234}, {8}},
-    {{26}, {8}},  {{154}, {8}}, {{90}, {8}},  {{218}, {8}}, {{58}, {8}},
-    {{186}, {8}}, {{122}, {8}}, {{250}, {8}}, {{6}, {8}},   {{134}, {8}},
-    {{70}, {8}},  {{198}, {8}}, {{38}, {8}},  {{166}, {8}}, {{102}, {8}},
-    {{230}, {8}}, {{22}, {8}},  {{150}, {8}}, {{86}, {8}},  {{214}, {8}},
-    {{54}, {8}},  {{182}, {8}}, {{118}, {8}}, {{246}, {8}}, {{14}, {8}},
-    {{142}, {8}}, {{78}, {8}},  {{206}, {8}}, {{46}, {8}},  {{174}, {8}},
-    {{110}, {8}}, {{238}, {8}}, {{30}, {8}},  {{158}, {8}}, {{94}, {8}},
-    {{222}, {8}}, {{62}, {8}},  {{190}, {8}}, {{126}, {8}}, {{254}, {8}},
-    {{1}, {8}},   {{129}, {8}}, {{65}, {8}},  {{193}, {8}}, {{33}, {8}},
-    {{161}, {8}}, {{97}, {8}},  {{225}, {8}}, {{17}, {8}},  {{145}, {8}},
-    {{81}, {8}},  {{209}, {8}}, {{49}, {8}},  {{177}, {8}}, {{113}, {8}},
-    {{241}, {8}}, {{9}, {8}},   {{137}, {8}}, {{73}, {8}},  {{201}, {8}},
-    {{41}, {8}},  {{169}, {8}}, {{105}, {8}}, {{233}, {8}}, {{25}, {8}},
-    {{153}, {8}}, {{89}, {8}},  {{217}, {8}}, {{57}, {8}},  {{185}, {8}},
-    {{121}, {8}}, {{249}, {8}}, {{5}, {8}},   {{133}, {8}}, {{69}, {8}},
-    {{197}, {8}}, {{37}, {8}},  {{165}, {8}}, {{101}, {8}}, {{229}, {8}},
-    {{21}, {8}},  {{149}, {8}}, {{85}, {8}},  {{213}, {8}}, {{53}, {8}},
-    {{181}, {8}}, {{117}, {8}}, {{245}, {8}}, {{13}, {8}},  {{141}, {8}},
-    {{77}, {8}},  {{205}, {8}}, {{45}, {8}},  {{173}, {8}}, {{109}, {8}},
-    {{237}, {8}}, {{29}, {8}},  {{157}, {8}}, {{93}, {8}},  {{221}, {8}},
-    {{61}, {8}},  {{189}, {8}}, {{125}, {8}}, {{253}, {8}}, {{19}, {9}},
-    {{275}, {9}}, {{147}, {9}}, {{403}, {9}}, {{83}, {9}},  {{339}, {9}},
-    {{211}, {9}}, {{467}, {9}}, {{51}, {9}},  {{307}, {9}}, {{179}, {9}},
-    {{435}, {9}}, {{115}, {9}}, {{371}, {9}}, {{243}, {9}}, {{499}, {9}},
-    {{11}, {9}},  {{267}, {9}}, {{139}, {9}}, {{395}, {9}}, {{75}, {9}},
-    {{331}, {9}}, {{203}, {9}}, {{459}, {9}}, {{43}, {9}},  {{299}, {9}},
-    {{171}, {9}}, {{427}, {9}}, {{107}, {9}}, {{363}, {9}}, {{235}, {9}},
-    {{491}, {9}}, {{27}, {9}},  {{283}, {9}}, {{155}, {9}}, {{411}, {9}},
-    {{91}, {9}},  {{347}, {9}}, {{219}, {9}}, {{475}, {9}}, {{59}, {9}},
-    {{315}, {9}}, {{187}, {9}}, {{443}, {9}}, {{123}, {9}}, {{379}, {9}},
-    {{251}, {9}}, {{507}, {9}}, {{7}, {9}},   {{263}, {9}}, {{135}, {9}},
-    {{391}, {9}}, {{71}, {9}},  {{327}, {9}}, {{199}, {9}}, {{455}, {9}},
-    {{39}, {9}},  {{295}, {9}}, {{167}, {9}}, {{423}, {9}}, {{103}, {9}},
-    {{359}, {9}}, {{231}, {9}}, {{487}, {9}}, {{23}, {9}},  {{279}, {9}},
-    {{151}, {9}}, {{407}, {9}}, {{87}, {9}},  {{343}, {9}}, {{215}, {9}},
-    {{471}, {9}}, {{55}, {9}},  {{311}, {9}}, {{183}, {9}}, {{439}, {9}},
-    {{119}, {9}}, {{375}, {9}}, {{247}, {9}}, {{503}, {9}}, {{15}, {9}},
-    {{271}, {9}}, {{143}, {9}}, {{399}, {9}}, {{79}, {9}},  {{335}, {9}},
-    {{207}, {9}}, {{463}, {9}}, {{47}, {9}},  {{303}, {9}}, {{175}, {9}},
-    {{431}, {9}}, {{111}, {9}}, {{367}, {9}}, {{239}, {9}}, {{495}, {9}},
-    {{31}, {9}},  {{287}, {9}}, {{159}, {9}}, {{415}, {9}}, {{95}, {9}},
-    {{351}, {9}}, {{223}, {9}}, {{479}, {9}}, {{63}, {9}},  {{319}, {9}},
-    {{191}, {9}}, {{447}, {9}}, {{127}, {9}}, {{383}, {9}}, {{255}, {9}},
-    {{511}, {9}}, {{0}, {7}},   {{64}, {7}},  {{32}, {7}},  {{96}, {7}},
-    {{16}, {7}},  {{80}, {7}},  {{48}, {7}},  {{112}, {7}}, {{8}, {7}},
-    {{72}, {7}},  {{40}, {7}},  {{104}, {7}}, {{24}, {7}},  {{88}, {7}},
-    {{56}, {7}},  {{120}, {7}}, {{4}, {7}},   {{68}, {7}},  {{36}, {7}},
-    {{100}, {7}}, {{20}, {7}},  {{84}, {7}},  {{52}, {7}},  {{116}, {7}},
-    {{3}, {8}},   {{131}, {8}}, {{67}, {8}},  {{195}, {8}}, {{35}, {8}},
-    {{163}, {8}}, {{99}, {8}},  {{227}, {8}},
-};
diff --git a/third_party/zlib/macros.internal.h b/third_party/zlib/macros.internal.h
new file mode 100644
index 000000000..458b9eb3c
--- /dev/null
+++ b/third_party/zlib/macros.internal.h
@@ -0,0 +1,104 @@
+#ifndef COSMOPOLITAN_THIRD_PARTY_ZLIB_MACROS_INTERNAL_H_
+#define COSMOPOLITAN_THIRD_PARTY_ZLIB_MACROS_INTERNAL_H_
+#if !(__ASSEMBLER__ + __LINKER__ + 0)
+COSMOPOLITAN_C_START_
+/* clang-format off */
+
+#ifndef OF /* function prototypes */
+#  ifdef STDC
+#    define OF(args)  args
+#  else
+#    define OF(args)  ()
+#  endif
+#endif
+
+#ifndef Z_ARG /* function prototypes for stdarg */
+#  if defined(STDC) || defined(Z_HAVE_STDARG_H)
+#    define Z_ARG(args)  args
+#  else
+#    define Z_ARG(args)  ()
+#  endif
+#endif
+
+/* The following definitions for FAR are needed only for MSDOS mixed
+ * model programming (small or medium model with some far allocations).
+ * This was tested only with MSC; for other MSDOS compilers you may have
+ * to define NO_MEMCPY in zutil.h.  If you don't need the mixed model,
+ * just define FAR to be empty.
+ */
+#ifdef SYS16BIT
+#  if defined(M_I86SM) || defined(M_I86MM)
+     /* MSC small or medium model */
+#    define SMALL_MEDIUM
+#    ifdef _MSC_VER
+#      define FAR _far
+#    else
+#      define FAR far
+#    endif
+#  endif
+#  if (defined(__SMALL__) || defined(__MEDIUM__))
+     /* Turbo C small or medium model */
+#    define SMALL_MEDIUM
+#    ifdef __BORLANDC__
+#      define FAR _far
+#    else
+#      define FAR far
+#    endif
+#  endif
+#endif
+
+#if defined(WINDOWS) || defined(WIN32)
+   /* If building or using zlib as a DLL, define ZLIB_DLL.
+    * This is not mandatory, but it offers a little performance increase.
+    */
+#  ifdef ZLIB_DLL
+#    if defined(WIN32) && (!defined(__BORLANDC__) || (__BORLANDC__ >= 0x500))
+#      ifdef ZLIB_INTERNAL
+#        define ZEXTERN extern __declspec(dllexport)
+#      else
+#        define ZEXTERN extern __declspec(dllimport)
+#      endif
+#    endif
+#  endif  /* ZLIB_DLL */
+   /* If building or using zlib with the WINAPI/WINAPIV calling convention,
+    * define ZLIB_WINAPI.
+    * Caution: the standard ZLIB1.DLL is NOT compiled using ZLIB_WINAPI.
+    */
+#  ifdef ZLIB_WINAPI
+#    ifdef FAR
+#      undef FAR
+#    endif
+#    include <windows.h>
+     /* No need for _export, use ZLIB.DEF instead. */
+     /* For complete Windows compatibility, use WINAPI, not __stdcall. */
+#    define ZEXPORT WINAPI
+#    ifdef WIN32
+#      define ZEXPORTVA WINAPIV
+#    else
+#      define ZEXPORTVA FAR CDECL
+#    endif
+#  endif
+#endif
+
+#ifndef ZEXTERN
+#  define ZEXTERN extern
+#endif
+#ifndef ZEXPORT
+#  define ZEXPORT
+#endif
+#ifndef ZEXPORTVA
+#  define ZEXPORTVA
+#endif
+
+#ifndef FAR
+#  define FAR
+#endif
+#ifndef far
+#  define far
+#endif
+
+#define z_off_t long
+
+COSMOPOLITAN_C_END_
+#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
+#endif /* COSMOPOLITAN_THIRD_PARTY_ZLIB_MACROS_INTERNAL_H_ */
diff --git a/third_party/zlib/trees.c b/third_party/zlib/trees.c
index f65695b87..8b3b3a5ae 100644
--- a/third_party/zlib/trees.c
+++ b/third_party/zlib/trees.c
@@ -1,34 +1,29 @@
-/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:4;tab-width:8;coding:utf-8 -*-│
-│vi: set net ft=c ts=4 sts=4 sw=4 fenc=utf-8                                :vi│
-╞══════════════════════════════════════════════════════════════════════════════╡
-│ Copyright 1995-2017 Jean-loup Gailly                                         │
-│ Use of this source code is governed by the BSD-style licenses that can       │
-│ be found in the third_party/zlib/LICENSE file.                               │
-╚─────────────────────────────────────────────────────────────────────────────*/
-#include "libc/intrin/kprintf.h"
+/* trees.c -- output deflated data using Huffman coding
+ * Copyright (C) 1995-2021 Jean-loup Gailly
+ * detect_data_type() function provided freely by Cosmin Truta, 2006
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+#include "libc/calls/calls.h"
+#include "libc/fmt/fmt.h"
+#include "libc/mem/fmt.h"
+#include "libc/stdio/lock.internal.h"
 #include "libc/stdio/stdio.h"
+#include "libc/stdio/temp.h"
 #include "libc/str/str.h"
 #include "third_party/zlib/deflate.internal.h"
-#include "third_party/zlib/internal.h"
+// clang-format off
 
-asm(".ident\t\"\\n\\n\
-zlib (zlib License)\\n\
-Copyright 1995-2017 Jean-loup Gailly and Mark Adler\"");
-asm(".include \"libc/disclaimer.inc\"");
-
-/**
- * @fileoverview output deflated data using Huffman coding
- *
+/*
  *  ALGORITHM
  *
  *      The "deflation" process uses several Huffman trees. The more
  *      common source values are represented by shorter bit sequences.
  *
- *      Each code tree is stored in a compressed form which is itself a
- *      Huffman encoding of the lengths of all the code strings (in
- *      ascending order by source values). The actual code strings are
- *      reconstructed from the lengths in the inflate process, as
- *      described in the deflate specification.
+ *      Each code tree is stored in a compressed form which is itself
+ * a Huffman encoding of the lengths of all the code strings (in
+ * ascending order by source values).  The actual code strings are
+ * reconstructed from the lengths in the inflate process, as described
+ * in the deflate specification.
  *
  *  REFERENCES
  *
@@ -44,381 +39,443 @@ asm(".include \"libc/disclaimer.inc\"");
  *          Addison-Wesley, 1983. ISBN 0-201-06672-6.
  */
 
-/* Bit length codes must not exceed MAX_BL_BITS bits */
+/* @(#) $Id$ */
+
+/* #define GEN_TREES_H */
+
+/* ===========================================================================
+ * Constants
+ */
+
 #define MAX_BL_BITS 7
+/* Bit length codes must not exceed MAX_BL_BITS bits */
 
-/* end of block literal code */
 #define END_BLOCK 256
+/* end of block literal code */
 
+#define REP_3_6      16
 /* repeat previous bit length 3-6 times (2 bits of repeat count) */
-#define REP_3_6 16
 
+#define REPZ_3_10    17
 /* repeat a zero length 3-10 times  (3 bits of repeat count) */
-#define REPZ_3_10 17
 
+#define REPZ_11_138  18
 /* repeat a zero length 11-138 times  (7 bits of repeat count) */
-#define REPZ_11_138 18
 
-static const int extra_lbits[LENGTH_CODES] /* extra bits for each length code */
-    = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2,
-       2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0};
+local const int extra_lbits[LENGTH_CODES] /* extra bits for each length code */
+   = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0};
 
-static const int extra_dbits[D_CODES] /* extra bits for each distance code */
-    = {0, 0, 0, 0, 1, 1, 2, 2,  3,  3,  4,  4,  5,  5,  6,
-       6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13};
+local const int extra_dbits[D_CODES] /* extra bits for each distance code */
+   = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
 
-static const int
-    extra_blbits[BL_CODES] /* extra bits for each bit length code */
-    = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 7};
+local const int extra_blbits[BL_CODES]/* extra bits for each bit length code */
+   = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7};
 
-static const uint8_t bl_order[BL_CODES] = {16, 17, 18, 0, 8,  7, 9,  6, 10, 5,
-                                           11, 4,  12, 3, 13, 2, 14, 1, 15};
+local const uch bl_order[BL_CODES]
+   = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15};
 /* The lengths of the bit length codes are sent in order of decreasing
  * probability, to avoid transmitting the lengths for unused bit length codes.
  */
 
-/**
+/* ===========================================================================
  * Local data. These are initialized only once.
  */
 
+#define DIST_CODE_LEN  512 /* see definition of array dist_code below */
+
+#if defined(GEN_TREES_H) || !defined(STDC)
+/* non ANSI compilers may not accept trees.h */
+
+local ct_data static_ltree[L_CODES+2];
+/* The static literal tree. Since the bit lengths are imposed, there is no
+ * need for the L_CODES extra codes used during heap construction. However
+ * The codes 286 and 287 are needed to build a canonical tree (see _tr_init
+ * below).
+ */
+
+local ct_data static_dtree[D_CODES];
+/* The static distance tree. (Actually a trivial tree since all codes use
+ * 5 bits.)
+ */
+
+uch _dist_code[DIST_CODE_LEN];
+/* Distance codes. The first 256 values correspond to the distances
+ * 3 .. 258, the last 256 values correspond to the top 8 bits of
+ * the 15 bit distances.
+ */
+
+uch _length_code[MAX_MATCH-MIN_MATCH+1];
+/* length code for each normalized match length (0 == MIN_MATCH) */
+
+local int base_length[LENGTH_CODES];
+/* First normalized length for each code (0 = MIN_MATCH) */
+
+local int base_dist[D_CODES];
+/* First normalized distance for each code (0 = distance of 1) */
+
+#else
+#include "third_party/zlib/trees.inc"
+#endif /* GEN_TREES_H */
+
 struct static_tree_desc_s {
-  const ct_data *static_tree; /* static tree or NULL */
-  const intf *extra_bits;     /* extra bits for each code or NULL */
-  int extra_base;             /* base index for extra_bits */
-  int elems;                  /* max number of elements in the tree */
-  int max_length;             /* max bit length for the codes */
+    const ct_data *static_tree;  /* static tree or NULL */
+    const intf *extra_bits;      /* extra bits for each code or NULL */
+    int     extra_base;          /* base index for extra_bits */
+    int     elems;               /* max number of elements in the tree */
+    int     max_length;          /* max bit length for the codes */
 };
 
-static const static_tree_desc static_l_desc = {kZlibStaticLtree, extra_lbits,
-                                               LITERALS + 1, L_CODES, MAX_BITS};
+local const static_tree_desc  static_l_desc =
+{static_ltree, extra_lbits, LITERALS+1, L_CODES, MAX_BITS};
 
-static const static_tree_desc static_d_desc = {kZlibStaticDtree, extra_dbits, 0,
-                                               D_CODES, MAX_BITS};
+local const static_tree_desc  static_d_desc =
+{static_dtree, extra_dbits, 0,          D_CODES, MAX_BITS};
 
-static const static_tree_desc static_bl_desc = {
-    (const ct_data *)0, extra_blbits, 0, BL_CODES, MAX_BL_BITS};
+local const static_tree_desc  static_bl_desc =
+{(const ct_data *)0, extra_blbits, 0,   BL_CODES, MAX_BL_BITS};
 
-/**
+/* ===========================================================================
  * Local (static) routines in this file.
  */
 
-static void tr_static_init(void);
-static void init_block(struct DeflateState *s);
-static void pqdownheap(struct DeflateState *s, ct_data *tree, int k);
-static void gen_bitlen(struct DeflateState *s, tree_desc *desc);
-static void gen_codes(ct_data *tree, int max_code, uint16_t *bl_count);
-static void build_tree(struct DeflateState *s, tree_desc *desc);
-static void scan_tree(struct DeflateState *s, ct_data *tree, int max_code);
-static void send_tree(struct DeflateState *s, ct_data *tree, int max_code);
-static int build_bl_tree(struct DeflateState *s);
-static void send_all_trees(struct DeflateState *s, int lcodes, int dcodes,
-                           int blcodes);
-static void compress_block(struct DeflateState *s, const ct_data *ltree,
-                           const ct_data *dtree);
-static int detect_data_type(struct DeflateState *s);
-static unsigned bi_reverse(unsigned value, int length);
-static void bi_windup(struct DeflateState *s);
-static void bi_flush(struct DeflateState *s);
+local void tr_static_init OF((void));
+local void init_block     OF((deflate_state *s));
+local void pqdownheap     OF((deflate_state *s, ct_data *tree, int k));
+local void gen_bitlen     OF((deflate_state *s, tree_desc *desc));
+local void gen_codes      OF((ct_data *tree, int max_code, ushf *bl_count));
+local void build_tree     OF((deflate_state *s, tree_desc *desc));
+local void scan_tree      OF((deflate_state *s, ct_data *tree, int max_code));
+local void send_tree      OF((deflate_state *s, ct_data *tree, int max_code));
+local int  build_bl_tree  OF((deflate_state *s));
+local void send_all_trees OF((deflate_state *s, int lcodes, int dcodes,
+                              int blcodes));
+local void compress_block OF((deflate_state *s, const ct_data *ltree,
+                              const ct_data *dtree));
+local int  detect_data_type OF((deflate_state *s));
+local unsigned bi_reverse OF((unsigned code, int len));
+local void bi_windup      OF((deflate_state *s));
+local void bi_flush       OF((deflate_state *s));
 
 #ifdef GEN_TREES_H
-static void gen_trees_header(void);
+local void gen_trees_header OF((void));
 #endif
 
 #ifndef ZLIB_DEBUG
-#define send_code(s, c, tree) send_bits(s, tree[c].Code, tree[c].Len)
-/* Send a code of the given tree. c and tree must not have side effects */
+#  define send_code(s, c, tree) send_bits(s, tree[c].Code, tree[c].Len)
+   /* Send a code of the given tree. c and tree must not have side effects */
 
 #else /* !ZLIB_DEBUG */
-#define send_code(s, c, tree)                     \
-  {                                               \
-    if (z_verbose > 2) kprintf("\ncd %3d ", (c)); \
-    send_bits(s, tree[c].Code, tree[c].Len);      \
-  }
+#  define send_code(s, c, tree) \
+     { if (z_verbose>2) fprintf(stderr,"\ncd %3d ",(c)); \
+       send_bits(s, tree[c].Code, tree[c].Len); }
 #endif
 
-/**
+/* ===========================================================================
  * Output a short LSB first on the stream.
  * IN assertion: there is enough room in pendingBuf.
  */
-#define put_short(s, w)                         \
-  {                                             \
-    put_byte(s, (uint8_t)((w)&0xff));           \
-    put_byte(s, (uint8_t)((uint16_t)(w) >> 8)); \
-  }
+#define put_short(s, w) { \
+    put_byte(s, (uch)((w) & 0xff)); \
+    put_byte(s, (uch)((ush)(w) >> 8)); \
+}
 
-/**
+/* ===========================================================================
  * Send a value on a given number of bits.
  * IN assertion: length <= 16 and value fits in length bits.
  */
 #ifdef ZLIB_DEBUG
-static void send_bits(struct DeflateState *s, int value, int length) {
-  Tracevv((" l %2d v %4x ", length, value));
-  Assert(length > 0 && length <= 15, "invalid length");
-  s->bits_sent += (uint64_t)length;
-  /* If not enough room in bi_buf, use (valid) bits from bi_buf and
-   * (16 - bi_valid) bits from value, leaving (width - (16-bi_valid))
-   * unused bits in value.
-   */
-  if (s->bi_valid > (int)Buf_size - length) {
-    s->bi_buf |= (uint16_t)value << s->bi_valid;
-    put_short(s, s->bi_buf);
-    s->bi_buf = (uint16_t)value >> (Buf_size - s->bi_valid);
-    s->bi_valid += length - Buf_size;
-  } else {
-    s->bi_buf |= (uint16_t)value << s->bi_valid;
-    s->bi_valid += length;
-  }
+local void send_bits      OF((deflate_state *s, int value, int length));
+
+local void send_bits(s, value, length)
+    deflate_state *s;
+    int value;  /* value to send */
+    int length; /* number of bits */
+{
+    Tracevv((" l %2d v %4x ", length, value));
+    Assert(length > 0 && length <= 15, "invalid length");
+    s->bits_sent += (ulg)length;
+
+    /* If not enough room in bi_buf, use (valid) bits from bi_buf and
+     * (16 - bi_valid) bits from value, leaving (width - (16-bi_valid))
+     * unused bits in value.
+     */
+    if (s->bi_valid > (int)Buf_size - length) {
+        s->bi_buf |= (ush)value << s->bi_valid;
+        put_short(s, s->bi_buf);
+        s->bi_buf = (ush)value >> (Buf_size - s->bi_valid);
+        s->bi_valid += length - Buf_size;
+    } else {
+        s->bi_buf |= (ush)value << s->bi_valid;
+        s->bi_valid += length;
+    }
 }
 #else /* !ZLIB_DEBUG */
-#define send_bits(s, value, length)                          \
-  {                                                          \
-    int len = length;                                        \
-    if (s->bi_valid > (int)Buf_size - len) {                 \
-      int val = (int)value;                                  \
-      s->bi_buf |= (uint16_t)val << s->bi_valid;             \
-      put_short(s, s->bi_buf);                               \
-      s->bi_buf = (uint16_t)val >> (Buf_size - s->bi_valid); \
-      s->bi_valid += len - Buf_size;                         \
-    } else {                                                 \
-      s->bi_buf |= (uint16_t)(value) << s->bi_valid;         \
-      s->bi_valid += len;                                    \
-    }                                                        \
-  }
+
+#define send_bits(s, value, length) \
+{ int len = length;\
+  if (s->bi_valid > (int)Buf_size - len) {\
+    int val = (int)value;\
+    s->bi_buf |= (ush)val << s->bi_valid;\
+    put_short(s, s->bi_buf);\
+    s->bi_buf = (ush)val >> (Buf_size - s->bi_valid);\
+    s->bi_valid += len - Buf_size;\
+  } else {\
+    s->bi_buf |= (ush)(value) << s->bi_valid;\
+    s->bi_valid += len;\
+  }\
+}
 #endif /* ZLIB_DEBUG */
 
+
 /* the arguments must not have side effects */
 
-/**
+/* ===========================================================================
  * Initialize the various 'constant' tables.
  */
-static void tr_static_init(void) {
+local void tr_static_init()
+{
 #if defined(GEN_TREES_H) || !defined(STDC)
-  static int static_init_done = 0;
-  int n;      /* iterates over tree elements */
-  int bits;   /* bit counter */
-  int length; /* length value */
-  int code;   /* code value */
-  int dist;   /* distance index */
-  uint16_t bl_count[MAX_BITS + 1];
-  /* number of codes at each bit length for an optimal tree */
+    static int static_init_done = 0;
+    int n;        /* iterates over tree elements */
+    int bits;     /* bit counter */
+    int length;   /* length value */
+    int code;     /* code value */
+    int dist;     /* distance index */
+    ush bl_count[MAX_BITS+1];
+    /* number of codes at each bit length for an optimal tree */
 
-  if (static_init_done) return;
+    if (static_init_done) return;
 
     /* For some embedded targets, global variables are not initialized: */
 #ifdef NO_INIT_GLOBAL_POINTERS
-  static_l_desc.static_tree = kZlibStaticLtree;
-  static_l_desc.extra_bits = extra_lbits;
-  static_d_desc.static_tree = kZlibStaticDtree;
-  static_d_desc.extra_bits = extra_dbits;
-  static_bl_desc.extra_bits = extra_blbits;
+    static_l_desc.static_tree = static_ltree;
+    static_l_desc.extra_bits = extra_lbits;
+    static_d_desc.static_tree = static_dtree;
+    static_d_desc.extra_bits = extra_dbits;
+    static_bl_desc.extra_bits = extra_blbits;
 #endif
 
-  /* Initialize the mapping length (0..255) -> length code (0..28) */
-  length = 0;
-  for (code = 0; code < LENGTH_CODES - 1; code++) {
-    kZlibBaseLength[code] = length;
-    for (n = 0; n < (1u << extra_lbits[code]); n++) {
-      kZlibLengthCode[length++] = (uint8_t)code;
+    /* Initialize the mapping length (0..255) -> length code (0..28) */
+    length = 0;
+    for (code = 0; code < LENGTH_CODES-1; code++) {
+        base_length[code] = length;
+        for (n = 0; n < (1<<extra_lbits[code]); n++) {
+            _length_code[length++] = (uch)code;
+        }
     }
-  }
-  Assert(length == 256, "tr_static_init: length != 256");
-  /* Note that the length 255 (match length 258) can be represented
-   * in two different ways: code 284 + 5 bits or code 285, so we
-   * overwrite length_code[255] to use the best encoding:
-   */
-  kZlibLengthCode[length - 1] = (uint8_t)code;
+    Assert (length == 256, "tr_static_init: length != 256");
+    /* Note that the length 255 (match length 258) can be represented
+     * in two different ways: code 284 + 5 bits or code 285, so we
+     * overwrite length_code[255] to use the best encoding:
+     */
+    _length_code[length-1] = (uch)code;
 
-  /* Initialize the mapping dist (0..32K) -> dist code (0..29) */
-  dist = 0;
-  for (code = 0; code < 16; code++) {
-    kZlibBaseDist[code] = dist;
-    for (n = 0; n < (1u << extra_dbits[code]); n++) {
-      kZlibDistCode[dist++] = (uint8_t)code;
+    /* Initialize the mapping dist (0..32K) -> dist code (0..29) */
+    dist = 0;
+    for (code = 0 ; code < 16; code++) {
+        base_dist[code] = dist;
+        for (n = 0; n < (1<<extra_dbits[code]); n++) {
+            _dist_code[dist++] = (uch)code;
+        }
     }
-  }
-  Assert(dist == 256, "tr_static_init: dist != 256");
-  dist >>= 7; /* from now on, all distances are divided by 128 */
-  for (; code < D_CODES; code++) {
-    kZlibBaseDist[code] = dist << 7;
-    for (n = 0; n < (1 << (extra_dbits[code] - 7)); n++) {
-      kZlibDistCode[256 + dist++] = (uint8_t)code;
+    Assert (dist == 256, "tr_static_init: dist != 256");
+    dist >>= 7; /* from now on, all distances are divided by 128 */
+    for ( ; code < D_CODES; code++) {
+        base_dist[code] = dist << 7;
+        for (n = 0; n < (1<<(extra_dbits[code]-7)); n++) {
+            _dist_code[256 + dist++] = (uch)code;
+        }
     }
-  }
-  Assert(dist == 256, "tr_static_init: 256+dist != 512");
+    Assert (dist == 256, "tr_static_init: 256+dist != 512");
 
-  /* Construct the codes of the static literal tree */
-  for (bits = 0; bits <= MAX_BITS; bits++) bl_count[bits] = 0;
-  n = 0;
-  while (n <= 143) kZlibStaticLtree[n++].Len = 8, bl_count[8]++;
-  while (n <= 255) kZlibStaticLtree[n++].Len = 9, bl_count[9]++;
-  while (n <= 279) kZlibStaticLtree[n++].Len = 7, bl_count[7]++;
-  while (n <= 287) kZlibStaticLtree[n++].Len = 8, bl_count[8]++;
-  /* Codes 286 and 287 do not exist, but we must include them in the
-   * tree construction to get a canonical Huffman tree (longest code
-   * all ones)
-   */
-  gen_codes((ct_data *)kZlibStaticLtree, L_CODES + 1, bl_count);
+    /* Construct the codes of the static literal tree */
+    for (bits = 0; bits <= MAX_BITS; bits++) bl_count[bits] = 0;
+    n = 0;
+    while (n <= 143) static_ltree[n++].Len = 8, bl_count[8]++;
+    while (n <= 255) static_ltree[n++].Len = 9, bl_count[9]++;
+    while (n <= 279) static_ltree[n++].Len = 7, bl_count[7]++;
+    while (n <= 287) static_ltree[n++].Len = 8, bl_count[8]++;
+    /* Codes 286 and 287 do not exist, but we must include them in the
+     * tree construction to get a canonical Huffman tree (longest code
+     * all ones)
+     */
+    gen_codes((ct_data *)static_ltree, L_CODES+1, bl_count);
 
-  /* The static distance tree is trivial: */
-  for (n = 0; n < D_CODES; n++) {
-    kZlibStaticDtree[n].Len = 5;
-    kZlibStaticDtree[n].Code = bi_reverse((unsigned)n, 5);
-  }
-  static_init_done = 1;
+    /* The static distance tree is trivial: */
+    for (n = 0; n < D_CODES; n++) {
+        static_dtree[n].Len = 5;
+        static_dtree[n].Code = bi_reverse((unsigned)n, 5);
+    }
+    static_init_done = 1;
 
-#ifdef GEN_TREES_H
-  gen_trees_header();
-#endif
+#  ifdef GEN_TREES_H
+    gen_trees_header();
+#  endif
 #endif /* defined(GEN_TREES_H) || !defined(STDC) */
 }
 
-/**
- * Genererate the file trees.h describing the static trees.
+/* ===========================================================================
+ * Generate the file trees.h describing the static trees.
  */
 #ifdef GEN_TREES_H
+#  ifndef ZLIB_DEBUG
+#  endif
 
-#define SEPARATOR(i, last, width) \
-  ((i) == (last) ? "\n};\n\n" : ((i) % (width) == (width)-1 ? ",\n" : ", "))
+#  define SEPARATOR(i, last, width) \
+      ((i) == (last)? "\n};\n\n" :    \
+       ((i) % (width) == (width)-1 ? ",\n" : ", "))
 
-void gen_trees_header(void) {
-  FILE *header = fopen("trees.h", "w");
-  int i;
+void gen_trees_header()
+{
+    FILE *header = fopen("trees.h", "w");
+    int i;
 
-  Assert(header != NULL, "Can't open trees.h");
-  fprintf(header, "/* header created automatically with -DGEN_TREES_H */\n\n");
+    Assert (header != NULL, "Can't open trees.h");
+    fprintf(header,
+            "/* header created automatically with -DGEN_TREES_H */\n\n");
 
-  fprintf(header, "local const ct_data kZlibStaticLtree[L_CODES+2] = {\n");
-  for (i = 0; i < L_CODES + 2; i++) {
-    fprintf(header, "{{%3u},{%3u}}%s", kZlibStaticLtree[i].Code,
-            kZlibStaticLtree[i].Len, SEPARATOR(i, L_CODES + 1, 5));
-  }
+    fprintf(header, "local const ct_data static_ltree[L_CODES+2] = {\n");
+    for (i = 0; i < L_CODES+2; i++) {
+        fprintf(header, "{{%3u},{%3u}}%s", static_ltree[i].Code,
+                static_ltree[i].Len, SEPARATOR(i, L_CODES+1, 5));
+    }
 
-  fprintf(header, "local const ct_data kZlibStaticDtree[D_CODES] = {\n");
-  for (i = 0; i < D_CODES; i++) {
-    fprintf(header, "{{%2u},{%2u}}%s", kZlibStaticDtree[i].Code,
-            kZlibStaticDtree[i].Len, SEPARATOR(i, D_CODES - 1, 5));
-  }
+    fprintf(header, "local const ct_data static_dtree[D_CODES] = {\n");
+    for (i = 0; i < D_CODES; i++) {
+        fprintf(header, "{{%2u},{%2u}}%s", static_dtree[i].Code,
+                static_dtree[i].Len, SEPARATOR(i, D_CODES-1, 5));
+    }
 
-  fprintf(header, "const uint8_t kZlibDistCode[DIST_CODE_LEN] = {\n");
-  for (i = 0; i < DIST_CODE_LEN; i++) {
-    fprintf(header, "%2u%s", kZlibDistCode[i],
-            SEPARATOR(i, DIST_CODE_LEN - 1, 20));
-  }
+    fprintf(header, "const uch ZLIB_INTERNAL _dist_code[DIST_CODE_LEN] = {\n");
+    for (i = 0; i < DIST_CODE_LEN; i++) {
+        fprintf(header, "%2u%s", _dist_code[i],
+                SEPARATOR(i, DIST_CODE_LEN-1, 20));
+    }
 
-  fprintf(header, "const uint8_t kZlibLengthCode[MAX_MATCH-MIN_MATCH+1]= {\n");
-  for (i = 0; i < MAX_MATCH - MIN_MATCH + 1; i++) {
-    fprintf(header, "%2u%s", kZlibLengthCode[i],
-            SEPARATOR(i, MAX_MATCH - MIN_MATCH, 20));
-  }
+    fprintf(header,
+        "const uch ZLIB_INTERNAL _length_code[MAX_MATCH-MIN_MATCH+1]= {\n");
+    for (i = 0; i < MAX_MATCH-MIN_MATCH+1; i++) {
+        fprintf(header, "%2u%s", _length_code[i],
+                SEPARATOR(i, MAX_MATCH-MIN_MATCH, 20));
+    }
 
-  fprintf(header, "local const int kZlibBaseLength[LENGTH_CODES] = {\n");
-  for (i = 0; i < LENGTH_CODES; i++) {
-    fprintf(header, "%1u%s", kZlibBaseLength[i],
-            SEPARATOR(i, LENGTH_CODES - 1, 20));
-  }
+    fprintf(header, "local const int base_length[LENGTH_CODES] = {\n");
+    for (i = 0; i < LENGTH_CODES; i++) {
+        fprintf(header, "%1u%s", base_length[i],
+                SEPARATOR(i, LENGTH_CODES-1, 20));
+    }
 
-  fprintf(header, "local const int kZlibBaseDist[D_CODES] = {\n");
-  for (i = 0; i < D_CODES; i++) {
-    fprintf(header, "%5u%s", kZlibBaseDist[i], SEPARATOR(i, D_CODES - 1, 10));
-  }
+    fprintf(header, "local const int base_dist[D_CODES] = {\n");
+    for (i = 0; i < D_CODES; i++) {
+        fprintf(header, "%5u%s", base_dist[i],
+                SEPARATOR(i, D_CODES-1, 10));
+    }
 
-  fclose(header);
+    fclose(header);
 }
 #endif /* GEN_TREES_H */
 
-/**
+/* ===========================================================================
  * Initialize the tree data structures for a new zlib stream.
  */
-void _tr_init(struct DeflateState *s) {
-  tr_static_init();
+void ZLIB_INTERNAL _tr_init(s)
+    deflate_state *s;
+{
+    tr_static_init();
 
-  s->l_desc.dyn_tree = s->dyn_ltree;
-  s->l_desc.stat_desc = &static_l_desc;
+    s->l_desc.dyn_tree = s->dyn_ltree;
+    s->l_desc.stat_desc = &static_l_desc;
 
-  s->d_desc.dyn_tree = s->dyn_dtree;
-  s->d_desc.stat_desc = &static_d_desc;
+    s->d_desc.dyn_tree = s->dyn_dtree;
+    s->d_desc.stat_desc = &static_d_desc;
 
-  s->bl_desc.dyn_tree = s->bl_tree;
-  s->bl_desc.stat_desc = &static_bl_desc;
+    s->bl_desc.dyn_tree = s->bl_tree;
+    s->bl_desc.stat_desc = &static_bl_desc;
 
-  s->bi_buf = 0;
-  s->bi_valid = 0;
+    s->bi_buf = 0;
+    s->bi_valid = 0;
 #ifdef ZLIB_DEBUG
-  s->compressed_len = 0L;
-  s->bits_sent = 0L;
+    s->compressed_len = 0L;
+    s->bits_sent = 0L;
 #endif
 
-  /* Initialize the first block of the first file: */
-  init_block(s);
+    /* Initialize the first block of the first file: */
+    init_block(s);
 }
 
-/**
+/* ===========================================================================
  * Initialize a new block.
  */
-static void init_block(struct DeflateState *s) {
-  int n; /* iterates over tree elements */
+local void init_block(s)
+    deflate_state *s;
+{
+    int n; /* iterates over tree elements */
 
-  /* Initialize the trees. */
-  for (n = 0; n < L_CODES; n++) s->dyn_ltree[n].Freq = 0;
-  for (n = 0; n < D_CODES; n++) s->dyn_dtree[n].Freq = 0;
-  for (n = 0; n < BL_CODES; n++) s->bl_tree[n].Freq = 0;
+    /* Initialize the trees. */
+    for (n = 0; n < L_CODES;  n++) s->dyn_ltree[n].Freq = 0;
+    for (n = 0; n < D_CODES;  n++) s->dyn_dtree[n].Freq = 0;
+    for (n = 0; n < BL_CODES; n++) s->bl_tree[n].Freq = 0;
 
-  s->dyn_ltree[END_BLOCK].Freq = 1;
-  s->opt_len = s->static_len = 0L;
-  s->sym_next = s->matches = 0;
+    s->dyn_ltree[END_BLOCK].Freq = 1;
+    s->opt_len = s->static_len = 0L;
+    s->sym_next = s->matches = 0;
 }
 
 #define SMALLEST 1
 /* Index within the heap array of least frequent node in the Huffman tree */
 
-/**
+
+/* ===========================================================================
  * Remove the smallest element from the heap and recreate the heap with
  * one less element. Updates heap and heap_len.
  */
-#define pqremove(s, tree, top)                  \
-  {                                             \
-    top = s->heap[SMALLEST];                    \
+#define pqremove(s, tree, top) \
+{\
+    top = s->heap[SMALLEST]; \
     s->heap[SMALLEST] = s->heap[s->heap_len--]; \
-    pqdownheap(s, tree, SMALLEST);              \
-  }
+    pqdownheap(s, tree, SMALLEST); \
+}
 
-/**
+/* ===========================================================================
  * Compares to subtrees, using the tree depth as tie breaker when
  * the subtrees have equal frequency. This minimizes the worst case length.
  */
 #define smaller(tree, n, m, depth) \
-  (tree[n].Freq < tree[m].Freq ||  \
+   (tree[n].Freq < tree[m].Freq || \
    (tree[n].Freq == tree[m].Freq && depth[n] <= depth[m]))
 
-/**
+/* ===========================================================================
  * Restore the heap property by moving down the tree starting at node k,
  * exchanging a node with the smallest of its two sons if necessary, stopping
  * when the heap property is re-established (each father smaller than its
  * two sons).
- * @param tree is tree to restore
- * @param k is node to move down
  */
-static void pqdownheap(struct DeflateState *s, ct_data *tree, int k) {
-  int v = s->heap[k];
-  int j = k << 1; /* left son of k */
-  while (j <= s->heap_len) {
-    /* Set j to the smallest of the two sons: */
-    if (j < s->heap_len &&
-        smaller(tree, s->heap[j + 1], s->heap[j], s->depth)) {
-      j++;
+local void pqdownheap(s, tree, k)
+    deflate_state *s;
+    ct_data *tree;  /* the tree to restore */
+    int k;               /* node to move down */
+{
+    int v = s->heap[k];
+    int j = k << 1;  /* left son of k */
+    while (j <= s->heap_len) {
+        /* Set j to the smallest of the two sons: */
+        if (j < s->heap_len &&
+            smaller(tree, s->heap[j+1], s->heap[j], s->depth)) {
+            j++;
+        }
+        /* Exit if v is smaller than both sons */
+        if (smaller(tree, v, s->heap[j], s->depth)) break;
+
+        /* Exchange v with the smallest son */
+        s->heap[k] = s->heap[j];  k = j;
+
+        /* And continue down the tree, setting j to the left son of k */
+        j <<= 1;
     }
-    /* Exit if v is smaller than both sons */
-    if (smaller(tree, v, s->heap[j], s->depth)) break;
-    /* Exchange v with the smallest son */
-    s->heap[k] = s->heap[j];
-    k = j;
-    /* And continue down the tree, setting j to the left son of k */
-    j <<= 1;
-  }
-  s->heap[k] = v;
+    s->heap[k] = v;
 }
 
-/**
+/* ===========================================================================
  * Compute the optimal bit lengths for a tree and update the total bit length
  * for the current block.
  * IN assertion: the fields freq and dad are set, heap[heap_max] and
@@ -428,125 +485,128 @@ static void pqdownheap(struct DeflateState *s, ct_data *tree, int k) {
  *     The length opt_len is updated; static_len is also updated if stree is
  *     not null.
  */
-static void gen_bitlen(struct DeflateState *s, tree_desc *desc) {
-  ct_data *tree = desc->dyn_tree;
-  int max_code = desc->max_code;
-  const ct_data *stree = desc->stat_desc->static_tree;
-  const intf *extra = desc->stat_desc->extra_bits;
-  int base = desc->stat_desc->extra_base;
-  int max_length = desc->stat_desc->max_length;
-  int h;            /* heap index */
-  int n, m;         /* iterate over the tree elements */
-  int bits;         /* bit length */
-  int xbits;        /* extra bits */
-  uint16_t f;       /* frequency */
-  int overflow = 0; /* number of elements with bit length too large */
+local void gen_bitlen(s, desc)
+    deflate_state *s;
+    tree_desc *desc;    /* the tree descriptor */
+{
+    ct_data *tree        = desc->dyn_tree;
+    int max_code         = desc->max_code;
+    const ct_data *stree = desc->stat_desc->static_tree;
+    const intf *extra    = desc->stat_desc->extra_bits;
+    int base             = desc->stat_desc->extra_base;
+    int max_length       = desc->stat_desc->max_length;
+    int h;              /* heap index */
+    int n, m;           /* iterate over the tree elements */
+    int bits;           /* bit length */
+    int xbits;          /* extra bits */
+    ush f;              /* frequency */
+    int overflow = 0;   /* number of elements with bit length too large */
 
-  for (bits = 0; bits <= MAX_BITS; bits++) s->bl_count[bits] = 0;
+    for (bits = 0; bits <= MAX_BITS; bits++) s->bl_count[bits] = 0;
 
-  /* In a first pass, compute the optimal bit lengths (which may
-   * overflow in the case of the bit length tree).
-   */
-  tree[s->heap[s->heap_max]].Len = 0; /* root of the heap */
-
-  for (h = s->heap_max + 1; h < HEAP_SIZE; h++) {
-    n = s->heap[h];
-    bits = tree[tree[n].Dad].Len + 1;
-    if (bits > max_length) bits = max_length, overflow++;
-    tree[n].Len = (uint16_t)bits;
-    /* We overwrite tree[n].Dad which is no longer needed */
-
-    if (n > max_code) continue; /* not a leaf node */
-
-    s->bl_count[bits]++;
-    xbits = 0;
-    if (n >= base) xbits = extra[n - base];
-    f = tree[n].Freq;
-    s->opt_len += (uint64_t)f * (unsigned)(bits + xbits);
-    if (stree) s->static_len += (uint64_t)f * (unsigned)(stree[n].Len + xbits);
-  }
-  if (overflow == 0) return;
-
-  Tracev(("\nbit length overflow\n"));
-  /* This happens for example on obj2 and pic of the Calgary corpus */
-
-  /* Find the first bit length which could increase: */
-  do {
-    bits = max_length - 1;
-    while (s->bl_count[bits] == 0) bits--;
-    s->bl_count[bits]--;        /* move one leaf down the tree */
-    s->bl_count[bits + 1] += 2; /* move one overflow item as its brother */
-    s->bl_count[max_length]--;
-    /* The brother of the overflow item also moves one step up,
-     * but this does not affect bl_count[max_length]
+    /* In a first pass, compute the optimal bit lengths (which may
+     * overflow in the case of the bit length tree).
      */
-    overflow -= 2;
-  } while (overflow > 0);
+    tree[s->heap[s->heap_max]].Len = 0; /* root of the heap */
 
-  /* Now recompute all bit lengths, scanning in increasing frequency.
-   * h is still equal to HEAP_SIZE. (It is simpler to reconstruct all
-   * lengths instead of fixing only the wrong ones. This idea is taken
-   * from 'ar' written by Haruhiko Okumura.)
-   */
-  for (bits = max_length; bits != 0; bits--) {
-    n = s->bl_count[bits];
-    while (n != 0) {
-      m = s->heap[--h];
-      if (m > max_code) continue;
-      if ((unsigned)tree[m].Len != (unsigned)bits) {
-        Tracev(("code %d bits %d->%d\n", m, tree[m].Len, bits));
-        s->opt_len += ((uint64_t)bits - tree[m].Len) * tree[m].Freq;
-        tree[m].Len = (uint16_t)bits;
-      }
-      n--;
+    for (h = s->heap_max+1; h < HEAP_SIZE; h++) {
+        n = s->heap[h];
+        bits = tree[tree[n].Dad].Len + 1;
+        if (bits > max_length) bits = max_length, overflow++;
+        tree[n].Len = (ush)bits;
+        /* We overwrite tree[n].Dad which is no longer needed */
+
+        if (n > max_code) continue; /* not a leaf node */
+
+        s->bl_count[bits]++;
+        xbits = 0;
+        if (n >= base) xbits = extra[n-base];
+        f = tree[n].Freq;
+        s->opt_len += (ulg)f * (unsigned)(bits + xbits);
+        if (stree) s->static_len += (ulg)f * (unsigned)(stree[n].Len + xbits);
+    }
+    if (overflow == 0) return;
+
+    Tracev(("\nbit length overflow\n"));
+    /* This happens for example on obj2 and pic of the Calgary corpus */
+
+    /* Find the first bit length which could increase: */
+    do {
+        bits = max_length-1;
+        while (s->bl_count[bits] == 0) bits--;
+        s->bl_count[bits]--;      /* move one leaf down the tree */
+        s->bl_count[bits+1] += 2; /* move one overflow item as its brother */
+        s->bl_count[max_length]--;
+        /* The brother of the overflow item also moves one step up,
+         * but this does not affect bl_count[max_length]
+         */
+        overflow -= 2;
+    } while (overflow > 0);
+
+    /* Now recompute all bit lengths, scanning in increasing frequency.
+     * h is still equal to HEAP_SIZE. (It is simpler to reconstruct all
+     * lengths instead of fixing only the wrong ones. This idea is taken
+     * from 'ar' written by Haruhiko Okumura.)
+     */
+    for (bits = max_length; bits != 0; bits--) {
+        n = s->bl_count[bits];
+        while (n != 0) {
+            m = s->heap[--h];
+            if (m > max_code) continue;
+            if ((unsigned) tree[m].Len != (unsigned) bits) {
+                Tracev(("code %d bits %d->%d\n", m, tree[m].Len, bits));
+                s->opt_len += ((ulg)bits - tree[m].Len) * tree[m].Freq;
+                tree[m].Len = (ush)bits;
+            }
+            n--;
+        }
     }
-  }
 }
 
-/**
- * Generates codes for given tree and bit counts (need not be optimal).
- *
+/* ===========================================================================
+ * Generate the codes for a given tree and bit counts (which need not be
+ * optimal).
  * IN assertion: the array bl_count contains the bit length statistics for
  * the given tree and the field len is set for all tree elements.
  * OUT assertion: the field code is set for all tree elements of non
  *     zero code length.
- *
- * @param max_code is largest code with non zero frequency
- * @param bl_count is number of codes at each bit length
  */
-static void gen_codes(ct_data *tree, int max_code, uint16_t *bl_count) {
-  uint16_t next_code[MAX_BITS + 1]; /* next code value for each bit length */
-  unsigned code = 0;                /* running code value */
-  int bits;                         /* bit index */
-  int n;                            /* code index */
+local void gen_codes (tree, max_code, bl_count)
+    ct_data *tree;             /* the tree to decorate */
+    int max_code;              /* largest code with non zero frequency */
+    ushf *bl_count;            /* number of codes at each bit length */
+{
+    ush next_code[MAX_BITS+1]; /* next code value for each bit length */
+    unsigned code = 0;         /* running code value */
+    int bits;                  /* bit index */
+    int n;                     /* code index */
 
-  /* The distribution counts are first used to generate the code values
-   * without bit reversal.
-   */
-  for (bits = 1; bits <= MAX_BITS; bits++) {
-    code = (code + bl_count[bits - 1]) << 1;
-    next_code[bits] = (uint16_t)code;
-  }
-  /* Check that the bit counts in bl_count are consistent. The last code
-   * must be all ones.
-   */
-  Assert(code + bl_count[MAX_BITS] - 1 == (1 << MAX_BITS) - 1,
-         "inconsistent bit counts");
-  Tracev(("\ngen_codes: max_code %d ", max_code));
+    /* The distribution counts are first used to generate the code values
+     * without bit reversal.
+     */
+    for (bits = 1; bits <= MAX_BITS; bits++) {
+        code = (code + bl_count[bits-1]) << 1;
+        next_code[bits] = (ush)code;
+    }
+    /* Check that the bit counts in bl_count are consistent. The last code
+     * must be all ones.
+     */
+    Assert (code + bl_count[MAX_BITS]-1 == (1<<MAX_BITS)-1,
+            "inconsistent bit counts");
+    Tracev(("\ngen_codes: max_code %d ", max_code));
 
-  for (n = 0; n <= max_code; n++) {
-    int len = tree[n].Len;
-    if (len == 0) continue;
-    /* Now reverse the bits */
-    tree[n].Code = (uint16_t)bi_reverse(next_code[len]++, len);
+    for (n = 0;  n <= max_code; n++) {
+        int len = tree[n].Len;
+        if (len == 0) continue;
+        /* Now reverse the bits */
+        tree[n].Code = (ush)bi_reverse(next_code[len]++, len);
 
-    Tracecv(tree != kZlibStaticLtree,
-            ("\nn %3d %c l %2d c %4x (%x) ", n, (isgraph(n) ? n : ' '), len,
-             tree[n].Code, next_code[len] - 1));
-  }
+        Tracecv(tree != static_ltree, ("\nn %3d %c l %2d c %4x (%x) ",
+             n, (isgraph(n) ? n : ' '), len, tree[n].Code, next_code[len]-1));
+    }
 }
 
-/**
+/* ===========================================================================
  * Construct one Huffman tree and assigns the code bit strings and lengths.
  * Update the total bit length for the current block.
  * IN assertion: the field freq is set for all tree elements.
@@ -554,555 +614,571 @@ static void gen_codes(ct_data *tree, int max_code, uint16_t *bl_count) {
  *     and corresponding code. The length opt_len is updated; static_len is
  *     also updated if stree is not null. The field max_code is set.
  */
-static void build_tree(struct DeflateState *s, tree_desc *desc) {
-  ct_data *tree = desc->dyn_tree;
-  const ct_data *stree = desc->stat_desc->static_tree;
-  int elems = desc->stat_desc->elems;
-  int n, m;          /* iterate over heap elements */
-  int max_code = -1; /* largest code with non zero frequency */
-  int node;          /* new node being created */
+local void build_tree(s, desc)
+    deflate_state *s;
+    tree_desc *desc; /* the tree descriptor */
+{
+    ct_data *tree         = desc->dyn_tree;
+    const ct_data *stree  = desc->stat_desc->static_tree;
+    int elems             = desc->stat_desc->elems;
+    int n, m;          /* iterate over heap elements */
+    int max_code = -1; /* largest code with non zero frequency */
+    int node;          /* new node being created */
 
-  /* Construct the initial heap, with least frequent element in
-   * heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n+1].
-   * heap[0] is not used.
-   */
-  s->heap_len = 0, s->heap_max = HEAP_SIZE;
+    /* Construct the initial heap, with least frequent element in
+     * heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n+1].
+     * heap[0] is not used.
+     */
+    s->heap_len = 0, s->heap_max = HEAP_SIZE;
 
-  for (n = 0; n < elems; n++) {
-    if (tree[n].Freq != 0) {
-      s->heap[++(s->heap_len)] = max_code = n;
-      s->depth[n] = 0;
-    } else {
-      tree[n].Len = 0;
+    for (n = 0; n < elems; n++) {
+        if (tree[n].Freq != 0) {
+            s->heap[++(s->heap_len)] = max_code = n;
+            s->depth[n] = 0;
+        } else {
+            tree[n].Len = 0;
+        }
     }
-  }
 
-  /* The pkzip format requires that at least one distance code exists,
-   * and that at least one bit should be sent even if there is only one
-   * possible code. So to avoid special checks later on we force at least
-   * two codes of non zero frequency.
-   */
-  while (s->heap_len < 2) {
-    node = s->heap[++(s->heap_len)] = (max_code < 2 ? ++max_code : 0);
-    tree[node].Freq = 1;
-    s->depth[node] = 0;
-    s->opt_len--;
-    if (stree) s->static_len -= stree[node].Len;
-    /* node is 0 or 1 so it does not have extra bits */
-  }
-  desc->max_code = max_code;
+    /* The pkzip format requires that at least one distance code exists,
+     * and that at least one bit should be sent even if there is only one
+     * possible code. So to avoid special checks later on we force at least
+     * two codes of non zero frequency.
+     */
+    while (s->heap_len < 2) {
+        node = s->heap[++(s->heap_len)] = (max_code < 2 ? ++max_code : 0);
+        tree[node].Freq = 1;
+        s->depth[node] = 0;
+        s->opt_len--; if (stree) s->static_len -= stree[node].Len;
+        /* node is 0 or 1 so it does not have extra bits */
+    }
+    desc->max_code = max_code;
 
-  /* The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree,
-   * establish sub-heaps of increasing lengths:
-   */
-  for (n = s->heap_len / 2; n >= 1; n--) pqdownheap(s, tree, n);
+    /* The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree,
+     * establish sub-heaps of increasing lengths:
+     */
+    for (n = s->heap_len/2; n >= 1; n--) pqdownheap(s, tree, n);
 
-  /* Construct the Huffman tree by repeatedly combining the least two
-   * frequent nodes.
-   */
-  node = elems; /* next internal node of the tree */
-  do {
-    pqremove(s, tree, n);  /* n = node of least frequency */
-    m = s->heap[SMALLEST]; /* m = node of next least frequency */
+    /* Construct the Huffman tree by repeatedly combining the least two
+     * frequent nodes.
+     */
+    node = elems;              /* next internal node of the tree */
+    do {
+        pqremove(s, tree, n);  /* n = node of least frequency */
+        m = s->heap[SMALLEST]; /* m = node of next least frequency */
 
-    s->heap[--(s->heap_max)] = n; /* keep the nodes sorted by frequency */
-    s->heap[--(s->heap_max)] = m;
+        s->heap[--(s->heap_max)] = n; /* keep the nodes sorted by frequency */
+        s->heap[--(s->heap_max)] = m;
 
-    /* Create a new node father of n and m */
-    tree[node].Freq = tree[n].Freq + tree[m].Freq;
-    s->depth[node] =
-        (uint8_t)((s->depth[n] >= s->depth[m] ? s->depth[n] : s->depth[m]) + 1);
-    tree[n].Dad = tree[m].Dad = (uint16_t)node;
+        /* Create a new node father of n and m */
+        tree[node].Freq = tree[n].Freq + tree[m].Freq;
+        s->depth[node] = (uch)((s->depth[n] >= s->depth[m] ?
+                                s->depth[n] : s->depth[m]) + 1);
+        tree[n].Dad = tree[m].Dad = (ush)node;
 #ifdef DUMP_BL_TREE
-    if (tree == s->bl_tree) {
-      kprintf("\nnode %d(%d), sons %d(%d) %d(%d)", node, tree[node].Freq, n,
-              tree[n].Freq, m, tree[m].Freq);
-    }
+        if (tree == s->bl_tree) {
+            fprintf("\nnode %d(%d), sons %d(%d) %d(%d)",
+                    node, tree[node].Freq, n, tree[n].Freq, m, tree[m].Freq);
+        }
 #endif
-    /* and insert the new node in the heap */
-    s->heap[SMALLEST] = node++;
-    pqdownheap(s, tree, SMALLEST);
+        /* and insert the new node in the heap */
+        s->heap[SMALLEST] = node++;
+        pqdownheap(s, tree, SMALLEST);
 
-  } while (s->heap_len >= 2);
+    } while (s->heap_len >= 2);
 
-  s->heap[--(s->heap_max)] = s->heap[SMALLEST];
+    s->heap[--(s->heap_max)] = s->heap[SMALLEST];
 
-  /* At this point, the fields freq and dad are set. We can now
-   * generate the bit lengths.
-   */
-  gen_bitlen(s, (tree_desc *)desc);
+    /* At this point, the fields freq and dad are set. We can now
+     * generate the bit lengths.
+     */
+    gen_bitlen(s, (tree_desc *)desc);
 
-  /* The field len is now set, we can generate the bit codes */
-  gen_codes((ct_data *)tree, max_code, s->bl_count);
+    /* The field len is now set, we can generate the bit codes */
+    gen_codes ((ct_data *)tree, max_code, s->bl_count);
 }
 
-/**
- * Scan a literal or distance tree to determine the frequencies of the
- * codes in the bit length tree.
- *
- * @param tree is tree to be scanned
- * @param max_code is its largest code of non zero frequency
+/* ===========================================================================
+ * Scan a literal or distance tree to determine the frequencies of the codes
+ * in the bit length tree.
  */
-static void scan_tree(struct DeflateState *s, ct_data *tree, int max_code) {
-  int n;                     /* iterates over all tree elements */
-  int prevlen = -1;          /* last emitted length */
-  int curlen;                /* length of current code */
-  int nextlen = tree[0].Len; /* length of next code */
-  int count = 0;             /* repeat count of the current code */
-  int max_count = 7;         /* max repeat count */
-  int min_count = 4;         /* min repeat count */
+local void scan_tree (s, tree, max_code)
+    deflate_state *s;
+    ct_data *tree;   /* the tree to be scanned */
+    int max_code;    /* and its largest code of non zero frequency */
+{
+    int n;                     /* iterates over all tree elements */
+    int prevlen = -1;          /* last emitted length */
+    int curlen;                /* length of current code */
+    int nextlen = tree[0].Len; /* length of next code */
+    int count = 0;             /* repeat count of the current code */
+    int max_count = 7;         /* max repeat count */
+    int min_count = 4;         /* min repeat count */
 
-  if (nextlen == 0) max_count = 138, min_count = 3;
-  tree[max_code + 1].Len = (uint16_t)0xffff; /* guard */
+    if (nextlen == 0) max_count = 138, min_count = 3;
+    tree[max_code+1].Len = (ush)0xffff; /* guard */
 
-  for (n = 0; n <= max_code; n++) {
-    curlen = nextlen;
-    nextlen = tree[n + 1].Len;
-    if (++count < max_count && curlen == nextlen) {
-      continue;
-    } else if (count < min_count) {
-      s->bl_tree[curlen].Freq += count;
-    } else if (curlen != 0) {
-      if (curlen != prevlen) s->bl_tree[curlen].Freq++;
-      s->bl_tree[REP_3_6].Freq++;
-    } else if (count <= 10) {
-      s->bl_tree[REPZ_3_10].Freq++;
-    } else {
-      s->bl_tree[REPZ_11_138].Freq++;
+    for (n = 0; n <= max_code; n++) {
+        curlen = nextlen; nextlen = tree[n+1].Len;
+        if (++count < max_count && curlen == nextlen) {
+            continue;
+        } else if (count < min_count) {
+            s->bl_tree[curlen].Freq += count;
+        } else if (curlen != 0) {
+            if (curlen != prevlen) s->bl_tree[curlen].Freq++;
+            s->bl_tree[REP_3_6].Freq++;
+        } else if (count <= 10) {
+            s->bl_tree[REPZ_3_10].Freq++;
+        } else {
+            s->bl_tree[REPZ_11_138].Freq++;
+        }
+        count = 0; prevlen = curlen;
+        if (nextlen == 0) {
+            max_count = 138, min_count = 3;
+        } else if (curlen == nextlen) {
+            max_count = 6, min_count = 3;
+        } else {
+            max_count = 7, min_count = 4;
+        }
     }
-    count = 0;
-    prevlen = curlen;
-    if (nextlen == 0) {
-      max_count = 138, min_count = 3;
-    } else if (curlen == nextlen) {
-      max_count = 6, min_count = 3;
-    } else {
-      max_count = 7, min_count = 4;
-    }
-  }
 }
 
-/**
+/* ===========================================================================
  * Send a literal or distance tree in compressed form, using the codes in
  * bl_tree.
- *
- * @param tree is tree to be scanned
- * @param max_code is its largest code of non zero frequency
  */
-static void send_tree(struct DeflateState *s, ct_data *tree, int max_code) {
-  int n;                     /* iterates over all tree elements */
-  int prevlen = -1;          /* last emitted length */
-  int curlen;                /* length of current code */
-  int nextlen = tree[0].Len; /* length of next code */
-  int count = 0;             /* repeat count of the current code */
-  int max_count = 7;         /* max repeat count */
-  int min_count = 4;         /* min repeat count */
+local void send_tree (s, tree, max_code)
+    deflate_state *s;
+    ct_data *tree; /* the tree to be scanned */
+    int max_code;       /* and its largest code of non zero frequency */
+{
+    int n;                     /* iterates over all tree elements */
+    int prevlen = -1;          /* last emitted length */
+    int curlen;                /* length of current code */
+    int nextlen = tree[0].Len; /* length of next code */
+    int count = 0;             /* repeat count of the current code */
+    int max_count = 7;         /* max repeat count */
+    int min_count = 4;         /* min repeat count */
 
-  /* tree[max_code+1].Len = -1; */ /* guard already set */
-  if (nextlen == 0) max_count = 138, min_count = 3;
+    /* tree[max_code+1].Len = -1; */  /* guard already set */
+    if (nextlen == 0) max_count = 138, min_count = 3;
 
-  for (n = 0; n <= max_code; n++) {
-    curlen = nextlen;
-    nextlen = tree[n + 1].Len;
-    if (++count < max_count && curlen == nextlen) {
-      continue;
-    } else if (count < min_count) {
-      do {
-        send_code(s, curlen, s->bl_tree);
-      } while (--count != 0);
+    for (n = 0; n <= max_code; n++) {
+        curlen = nextlen; nextlen = tree[n+1].Len;
+        if (++count < max_count && curlen == nextlen) {
+            continue;
+        } else if (count < min_count) {
+            do { send_code(s, curlen, s->bl_tree); } while (--count != 0);
 
-    } else if (curlen != 0) {
-      if (curlen != prevlen) {
-        send_code(s, curlen, s->bl_tree);
-        count--;
-      }
-      Assert(count >= 3 && count <= 6, " 3_6?");
-      send_code(s, REP_3_6, s->bl_tree);
-      send_bits(s, count - 3, 2);
+        } else if (curlen != 0) {
+            if (curlen != prevlen) {
+                send_code(s, curlen, s->bl_tree); count--;
+            }
+            Assert(count >= 3 && count <= 6, " 3_6?");
+            send_code(s, REP_3_6, s->bl_tree); send_bits(s, count-3, 2);
 
-    } else if (count <= 10) {
-      send_code(s, REPZ_3_10, s->bl_tree);
-      send_bits(s, count - 3, 3);
+        } else if (count <= 10) {
+            send_code(s, REPZ_3_10, s->bl_tree); send_bits(s, count-3, 3);
 
-    } else {
-      send_code(s, REPZ_11_138, s->bl_tree);
-      send_bits(s, count - 11, 7);
+        } else {
+            send_code(s, REPZ_11_138, s->bl_tree); send_bits(s, count-11, 7);
+        }
+        count = 0; prevlen = curlen;
+        if (nextlen == 0) {
+            max_count = 138, min_count = 3;
+        } else if (curlen == nextlen) {
+            max_count = 6, min_count = 3;
+        } else {
+            max_count = 7, min_count = 4;
+        }
     }
-    count = 0;
-    prevlen = curlen;
-    if (nextlen == 0) {
-      max_count = 138, min_count = 3;
-    } else if (curlen == nextlen) {
-      max_count = 6, min_count = 3;
-    } else {
-      max_count = 7, min_count = 4;
-    }
-  }
 }
 
-/**
+/* ===========================================================================
  * Construct the Huffman tree for the bit lengths and return the index in
  * bl_order of the last bit length code to send.
  */
-static int build_bl_tree(struct DeflateState *s) {
-  int max_blindex; /* index of last bit length code of non zero freq */
+local int build_bl_tree(s)
+    deflate_state *s;
+{
+    int max_blindex;  /* index of last bit length code of non zero freq */
 
-  /* Determine the bit length frequencies for literal and distance trees */
-  scan_tree(s, (ct_data *)s->dyn_ltree, s->l_desc.max_code);
-  scan_tree(s, (ct_data *)s->dyn_dtree, s->d_desc.max_code);
+    /* Determine the bit length frequencies for literal and distance trees */
+    scan_tree(s, (ct_data *)s->dyn_ltree, s->l_desc.max_code);
+    scan_tree(s, (ct_data *)s->dyn_dtree, s->d_desc.max_code);
 
-  /* Build the bit length tree: */
-  build_tree(s, (tree_desc *)(&(s->bl_desc)));
-  /* opt_len now includes the length of the tree representations, except
-   * the lengths of the bit lengths codes and the 5+5+4 bits for the counts.
-   */
+    /* Build the bit length tree: */
+    build_tree(s, (tree_desc *)(&(s->bl_desc)));
+    /* opt_len now includes the length of the tree representations, except
+     * the lengths of the bit lengths codes and the 5+5+4 bits for the counts.
+     */
 
-  /* Determine the number of bit length codes to send. The pkzip format
-   * requires that at least 4 bit length codes be sent. (appnote.txt says
-   * 3 but the actual value used is 4.)
-   */
-  for (max_blindex = BL_CODES - 1; max_blindex >= 3; max_blindex--) {
-    if (s->bl_tree[bl_order[max_blindex]].Len != 0) break;
-  }
-  /* Update opt_len to include the bit length tree and counts */
-  s->opt_len += 3 * ((uint64_t)max_blindex + 1) + 5 + 5 + 4;
-  Tracev(("\ndyn trees: dyn %ld, stat %ld", s->opt_len, s->static_len));
+    /* Determine the number of bit length codes to send. The pkzip format
+     * requires that at least 4 bit length codes be sent. (appnote.txt says
+     * 3 but the actual value used is 4.)
+     */
+    for (max_blindex = BL_CODES-1; max_blindex >= 3; max_blindex--) {
+        if (s->bl_tree[bl_order[max_blindex]].Len != 0) break;
+    }
+    /* Update opt_len to include the bit length tree and counts */
+    s->opt_len += 3*((ulg)max_blindex+1) + 5+5+4;
+    Tracev(("\ndyn trees: dyn %ld, stat %ld",
+            s->opt_len, s->static_len));
 
-  return max_blindex;
+    return max_blindex;
 }
 
-/**
+/* ===========================================================================
  * Send the header for a block using dynamic Huffman trees: the counts, the
  * lengths of the bit length codes, the literal tree and the distance tree.
  * IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4.
  */
-static void send_all_trees(struct DeflateState *s, int lcodes, int dcodes,
-                           int blcodes) {
-  int rank; /* index in bl_order */
+local void send_all_trees(s, lcodes, dcodes, blcodes)
+    deflate_state *s;
+    int lcodes, dcodes, blcodes; /* number of codes for each tree */
+{
+    int rank;                    /* index in bl_order */
 
-  Assert(lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes");
-  Assert(lcodes <= L_CODES && dcodes <= D_CODES && blcodes <= BL_CODES,
-         "too many codes");
-  Tracev(("\nbl counts: "));
-  send_bits(s, lcodes - 257, 5); /* not +255 as stated in appnote.txt */
-  send_bits(s, dcodes - 1, 5);
-  send_bits(s, blcodes - 4, 4); /* not -3 as stated in appnote.txt */
-  for (rank = 0; rank < blcodes; rank++) {
-    Tracev(("\nbl code %2d ", bl_order[rank]));
-    send_bits(s, s->bl_tree[bl_order[rank]].Len, 3);
-  }
-  Tracev(("\nbl tree: sent %ld", s->bits_sent));
+    Assert (lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes");
+    Assert (lcodes <= L_CODES && dcodes <= D_CODES && blcodes <= BL_CODES,
+            "too many codes");
+    Tracev(("\nbl counts: "));
+    send_bits(s, lcodes-257, 5); /* not +255 as stated in appnote.txt */
+    send_bits(s, dcodes-1,   5);
+    send_bits(s, blcodes-4,  4); /* not -3 as stated in appnote.txt */
+    for (rank = 0; rank < blcodes; rank++) {
+        Tracev(("\nbl code %2d ", bl_order[rank]));
+        send_bits(s, s->bl_tree[bl_order[rank]].Len, 3);
+    }
+    Tracev(("\nbl tree: sent %ld", s->bits_sent));
 
-  send_tree(s, (ct_data *)s->dyn_ltree, lcodes - 1); /* literal tree */
-  Tracev(("\nlit tree: sent %ld", s->bits_sent));
+    send_tree(s, (ct_data *)s->dyn_ltree, lcodes-1); /* literal tree */
+    Tracev(("\nlit tree: sent %ld", s->bits_sent));
 
-  send_tree(s, (ct_data *)s->dyn_dtree, dcodes - 1); /* distance tree */
-  Tracev(("\ndist tree: sent %ld", s->bits_sent));
+    send_tree(s, (ct_data *)s->dyn_dtree, dcodes-1); /* distance tree */
+    Tracev(("\ndist tree: sent %ld", s->bits_sent));
 }
 
-/**
- * Sends stored block.
- * @param last is one if this is the last block of file
+/* ===========================================================================
+ * Send a stored block
  */
-void _tr_stored_block(struct DeflateState *s, charf *buf, uint64_t stored_len,
-                      int last) {
-  send_bits(s, (STORED_BLOCK << 1) + last, 3); /* send block type */
-  bi_windup(s);                                /* align on byte boundary */
-  put_short(s, (uint16_t)stored_len);
-  put_short(s, (uint16_t)~stored_len);
-  if (stored_len) {
-    memcpy(s->pending_buf + s->pending, (Bytef *)buf, stored_len);
-  }
-  s->pending += stored_len;
+void ZLIB_INTERNAL _tr_stored_block(s, buf, stored_len, last)
+    deflate_state *s;
+    charf *buf;       /* input block */
+    ulg stored_len;   /* length of input block */
+    int last;         /* one if this is the last block for a file */
+{
+    send_bits(s, (STORED_BLOCK<<1)+last, 3);    /* send block type */
+    bi_windup(s);        /* align on byte boundary */
+    put_short(s, (ush)stored_len);
+    put_short(s, (ush)~stored_len);
+    if (stored_len)
+        memcpy(s->pending_buf + s->pending, (Bytef *)buf, stored_len);
+    s->pending += stored_len;
 #ifdef ZLIB_DEBUG
-  s->compressed_len = (s->compressed_len + 3 + 7) & (uint64_t)~7L;
-  s->compressed_len += (stored_len + 4) << 3;
-  s->bits_sent += 2 * 16;
-  s->bits_sent += stored_len << 3;
+    s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L;
+    s->compressed_len += (stored_len + 4) << 3;
+    s->bits_sent += 2*16;
+    s->bits_sent += stored_len<<3;
 #endif
 }
 
-/**
- * Flushes bits in bit buffer to pending output (leaves at most 7 bits)
+/* ===========================================================================
+ * Flush the bits in the bit buffer to pending output (leaves at most 7 bits)
  */
-void _tr_flush_bits(struct DeflateState *s) {
-  bi_flush(s);
+void ZLIB_INTERNAL _tr_flush_bits(s)
+    deflate_state *s;
+{
+    bi_flush(s);
 }
 
-/**
- * Sends one empty static block to give enough lookahead for inflate.
+/* ===========================================================================
+ * Send one empty static block to give enough lookahead for inflate.
  * This takes 10 bits, of which 7 may remain in the bit buffer.
  */
-void _tr_align(struct DeflateState *s) {
-  send_bits(s, STATIC_TREES << 1, 3);
-  send_code(s, END_BLOCK, kZlibStaticLtree);
+void ZLIB_INTERNAL _tr_align(s)
+    deflate_state *s;
+{
+    send_bits(s, STATIC_TREES<<1, 3);
+    send_code(s, END_BLOCK, static_ltree);
 #ifdef ZLIB_DEBUG
-  s->compressed_len += 10L; /* 3 for block type, 7 for EOB */
+    s->compressed_len += 10L; /* 3 for block type, 7 for EOB */
 #endif
-  bi_flush(s);
+    bi_flush(s);
 }
 
-/**
- * Determine the best encoding for the current block: dynamic trees,
- * static trees or store, and write out the encoded block.
- *
- * @param last is one if this is the last block of file
+/* ===========================================================================
+ * Determine the best encoding for the current block: dynamic trees, static
+ * trees or store, and write out the encoded block.
  */
-void _tr_flush_block(struct DeflateState *s, charf *buf, uint64_t stored_len,
-                     int last) {
-  uint64_t opt_lenb, static_lenb; /* opt_len and static_len in bytes */
-  int max_blindex = 0; /* index of last bit length code of non zero freq */
+void ZLIB_INTERNAL _tr_flush_block(s, buf, stored_len, last)
+    deflate_state *s;
+    charf *buf;       /* input block, or NULL if too old */
+    ulg stored_len;   /* length of input block */
+    int last;         /* one if this is the last block for a file */
+{
+    ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */
+    int max_blindex = 0;  /* index of last bit length code of non zero freq */
 
-  /* Build the Huffman trees unless a stored block is forced */
-  if (s->level > 0) {
-    /* Check if the file is binary or text */
-    if (s->strm->data_type == Z_UNKNOWN)
-      s->strm->data_type = detect_data_type(s);
+    /* Build the Huffman trees unless a stored block is forced */
+    if (s->level > 0) {
 
-    /* Construct the literal and distance trees */
-    build_tree(s, (tree_desc *)(&(s->l_desc)));
-    Tracev(("\nlit data: dyn %ld, stat %ld", s->opt_len, s->static_len));
+        /* Check if the file is binary or text */
+        if (s->strm->data_type == Z_UNKNOWN)
+            s->strm->data_type = detect_data_type(s);
 
-    build_tree(s, (tree_desc *)(&(s->d_desc)));
-    Tracev(("\ndist data: dyn %ld, stat %ld", s->opt_len, s->static_len));
-    /* At this point, opt_len and static_len are the total bit lengths of
-     * the compressed block data, excluding the tree representations.
-     */
+        /* Construct the literal and distance trees */
+        build_tree(s, (tree_desc *)(&(s->l_desc)));
+        Tracev(("\nlit data: dyn %ld, stat %ld", s->opt_len,
+                s->static_len));
 
-    /* Build the bit length tree for the above two trees, and get the index
-     * in bl_order of the last bit length code to send.
-     */
-    max_blindex = build_bl_tree(s);
+        build_tree(s, (tree_desc *)(&(s->d_desc)));
+        Tracev(("\ndist data: dyn %ld, stat %ld", s->opt_len,
+                s->static_len));
+        /* At this point, opt_len and static_len are the total bit lengths of
+         * the compressed block data, excluding the tree representations.
+         */
 
-    /* Determine the best encoding. Compute the block lengths in bytes. */
-    opt_lenb = (s->opt_len + 3 + 7) >> 3;
-    static_lenb = (s->static_len + 3 + 7) >> 3;
+        /* Build the bit length tree for the above two trees, and get the index
+         * in bl_order of the last bit length code to send.
+         */
+        max_blindex = build_bl_tree(s);
 
-    Tracev(("\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ", opt_lenb,
-            s->opt_len, static_lenb, s->static_len, stored_len,
-            s->sym_next / 3));
+        /* Determine the best encoding. Compute the block lengths in bytes. */
+        opt_lenb = (s->opt_len+3+7)>>3;
+        static_lenb = (s->static_len+3+7)>>3;
 
-    if (static_lenb <= opt_lenb) opt_lenb = static_lenb;
+        Tracev(("\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ",
+                opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len,
+                s->sym_next / 3));
 
-  } else {
-    Assert(buf != (char *)0, "lost buf");
-    opt_lenb = static_lenb = stored_len + 5; /* force a stored block */
-  }
+        if (static_lenb <= opt_lenb) opt_lenb = static_lenb;
+
+    } else {
+        Assert(buf != (char*)0, "lost buf");
+        opt_lenb = static_lenb = stored_len + 5; /* force a stored block */
+    }
 
 #ifdef FORCE_STORED
-  if (buf != (char *)0) { /* force stored block */
+    if (buf != (char*)0) { /* force stored block */
 #else
-  if (stored_len + 4 <= opt_lenb && buf != (char *)0) {
-    /* 4: two words for the lengths */
+    if (stored_len+4 <= opt_lenb && buf != (char*)0) {
+                       /* 4: two words for the lengths */
 #endif
-    /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE.
-     * Otherwise we can't have processed more than WSIZE input bytes since
-     * the last block flush, because compression would have been
-     * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to
-     * transform a block into a stored block.
-     */
-    _tr_stored_block(s, buf, stored_len, last);
+        /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE.
+         * Otherwise we can't have processed more than WSIZE input bytes since
+         * the last block flush, because compression would have been
+         * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to
+         * transform a block into a stored block.
+         */
+        _tr_stored_block(s, buf, stored_len, last);
 
 #ifdef FORCE_STATIC
-  } else if (static_lenb >= 0) { /* force static trees */
+    } else if (static_lenb >= 0) { /* force static trees */
 #else
-  } else if (s->strategy == Z_FIXED || static_lenb == opt_lenb) {
+    } else if (s->strategy == Z_FIXED || static_lenb == opt_lenb) {
 #endif
-    send_bits(s, (STATIC_TREES << 1) + last, 3);
-    compress_block(s, (const ct_data *)kZlibStaticLtree,
-                   (const ct_data *)kZlibStaticDtree);
+        send_bits(s, (STATIC_TREES<<1)+last, 3);
+        compress_block(s, (const ct_data *)static_ltree,
+                       (const ct_data *)static_dtree);
 #ifdef ZLIB_DEBUG
-    s->compressed_len += 3 + s->static_len;
+        s->compressed_len += 3 + s->static_len;
 #endif
-  } else {
-    send_bits(s, (DYN_TREES << 1) + last, 3);
-    send_all_trees(s, s->l_desc.max_code + 1, s->d_desc.max_code + 1,
-                   max_blindex + 1);
-    compress_block(s, (const ct_data *)s->dyn_ltree,
-                   (const ct_data *)s->dyn_dtree);
+    } else {
+        send_bits(s, (DYN_TREES<<1)+last, 3);
+        send_all_trees(s, s->l_desc.max_code+1, s->d_desc.max_code+1,
+                       max_blindex+1);
+        compress_block(s, (const ct_data *)s->dyn_ltree,
+                       (const ct_data *)s->dyn_dtree);
 #ifdef ZLIB_DEBUG
-    s->compressed_len += 3 + s->opt_len;
+        s->compressed_len += 3 + s->opt_len;
 #endif
-  }
-  Assert(s->compressed_len == s->bits_sent, "bad compressed size");
-  /* The above check is made mod 2^32, for files larger than 512 MB
-   * and uLong implemented on 32 bits.
-   */
-  init_block(s);
+    }
+    Assert (s->compressed_len == s->bits_sent, "bad compressed size");
+    /* The above check is made mod 2^32, for files larger than 512 MB
+     * and uLong implemented on 32 bits.
+     */
+    init_block(s);
 
-  if (last) {
-    bi_windup(s);
+    if (last) {
+        bi_windup(s);
 #ifdef ZLIB_DEBUG
-    s->compressed_len += 7; /* align on byte boundary */
+        s->compressed_len += 7;  /* align on byte boundary */
 #endif
-  }
-  Tracev(("\ncomprlen %lu(%lu) ", s->compressed_len >> 3,
-          s->compressed_len - 7 * last));
+    }
+    Tracev(("\ncomprlen %lu(%lu) ", s->compressed_len>>3,
+           s->compressed_len-7*last));
 }
 
-/**
+/* ===========================================================================
  * Save the match info and tally the frequency counts. Return true if
  * the current block must be flushed.
- *
- * @param dist is distance of matched string
- * @param lc is match length-MIN_MATCH or unmatched char (if dist==0)
  */
-int _tr_tally(struct DeflateState *s, unsigned dist, unsigned lc) {
-  s->sym_buf[s->sym_next++] = dist;
-  s->sym_buf[s->sym_next++] = dist >> 8;
-  s->sym_buf[s->sym_next++] = lc;
-  if (dist == 0) {
-    /* lc is the unmatched char */
-    s->dyn_ltree[lc].Freq++;
-  } else {
-    s->matches++;
-    /* Here, lc is the match length - MIN_MATCH */
-    dist--; /* dist = match distance - 1 */
-    Assert((uint16_t)dist < (uint16_t)MAX_DIST(s) &&
-               (uint16_t)lc <= (uint16_t)(MAX_MATCH - MIN_MATCH) &&
-               (uint16_t)d_code(dist) < (uint16_t)D_CODES,
-           "_tr_tally: bad match");
+int ZLIB_INTERNAL _tr_tally (s, dist, lc)
+    deflate_state *s;
+    unsigned dist;  /* distance of matched string */
+    unsigned lc;    /* match length-MIN_MATCH or unmatched char (if dist==0) */
+{
+    s->sym_buf[s->sym_next++] = (uch)dist;
+    s->sym_buf[s->sym_next++] = (uch)(dist >> 8);
+    s->sym_buf[s->sym_next++] = (uch)lc;
+    if (dist == 0) {
+        /* lc is the unmatched char */
+        s->dyn_ltree[lc].Freq++;
+    } else {
+        s->matches++;
+        /* Here, lc is the match length - MIN_MATCH */
+        dist--;             /* dist = match distance - 1 */
+        Assert((ush)dist < (ush)MAX_DIST(s) &&
+               (ush)lc <= (ush)(MAX_MATCH-MIN_MATCH) &&
+               (ush)d_code(dist) < (ush)D_CODES,  "_tr_tally: bad match");
 
-    s->dyn_ltree[kZlibLengthCode[lc] + LITERALS + 1].Freq++;
-    s->dyn_dtree[d_code(dist)].Freq++;
-  }
-  return (s->sym_next == s->sym_end);
+        s->dyn_ltree[_length_code[lc]+LITERALS+1].Freq++;
+        s->dyn_dtree[d_code(dist)].Freq++;
+    }
+    return (s->sym_next == s->sym_end);
 }
 
-/**
+/* ===========================================================================
  * Send the block data compressed using the given Huffman trees
  */
-static void compress_block(struct DeflateState *s, const ct_data *ltree,
-                           const ct_data *dtree) {
-  unsigned dist;   /* distance of matched string */
-  int lc;          /* match length or unmatched char (if dist == 0) */
-  unsigned sx = 0; /* running index in sym_buf */
-  unsigned code;   /* the code to send */
-  int extra;       /* number of extra bits to send */
+local void compress_block(s, ltree, dtree)
+    deflate_state *s;
+    const ct_data *ltree; /* literal tree */
+    const ct_data *dtree; /* distance tree */
+{
+    unsigned dist;      /* distance of matched string */
+    int lc;             /* match length or unmatched char (if dist == 0) */
+    unsigned sx = 0;    /* running index in sym_buf */
+    unsigned code;      /* the code to send */
+    int extra;          /* number of extra bits to send */
 
-  if (s->sym_next != 0) do {
-      dist = s->sym_buf[sx++] & 0xff;
-      dist += (unsigned)(s->sym_buf[sx++] & 0xff) << 8;
-      lc = s->sym_buf[sx++];
-      if (dist == 0) {
-        send_code(s, lc, ltree); /* send a literal byte */
-        Tracecv(isgraph(lc), (" '%c' ", lc));
-      } else {
-        /* Here, lc is the match length - MIN_MATCH */
-        code = kZlibLengthCode[lc];
-        send_code(s, code + LITERALS + 1, ltree); /* send the length code */
-        extra = extra_lbits[code];
-        if (extra != 0) {
-          lc -= kZlibBaseLength[code];
-          send_bits(s, lc, extra); /* send the extra length bits */
-        }
-        dist--; /* dist is now the match distance - 1 */
-        code = d_code(dist);
-        Assert(code < D_CODES, "bad d_code");
+    if (s->sym_next != 0) do {
+        dist = s->sym_buf[sx++] & 0xff;
+        dist += (unsigned)(s->sym_buf[sx++] & 0xff) << 8;
+        lc = s->sym_buf[sx++];
+        if (dist == 0) {
+            send_code(s, lc, ltree); /* send a literal byte */
+            Tracecv(isgraph(lc), (" '%c' ", lc));
+        } else {
+            /* Here, lc is the match length - MIN_MATCH */
+            code = _length_code[lc];
+            send_code(s, code+LITERALS+1, ltree); /* send the length code */
+            extra = extra_lbits[code];
+            if (extra != 0) {
+                lc -= base_length[code];
+                send_bits(s, lc, extra);       /* send the extra length bits */
+            }
+            dist--; /* dist is now the match distance - 1 */
+            code = d_code(dist);
+            Assert (code < D_CODES, "bad d_code");
 
-        send_code(s, code, dtree); /* send the distance code */
-        extra = extra_dbits[code];
-        if (extra != 0) {
-          dist -= (unsigned)kZlibBaseDist[code];
-          send_bits(s, dist, extra); /* send the extra distance bits */
-        }
-      } /* literal or match pair ? */
+            send_code(s, code, dtree);       /* send the distance code */
+            extra = extra_dbits[code];
+            if (extra != 0) {
+                dist -= (unsigned)base_dist[code];
+                send_bits(s, dist, extra);   /* send the extra distance bits */
+            }
+        } /* literal or match pair ? */
 
-      /* Check that the overlay between pending_buf and sym_buf is ok: */
-      Assert(s->pending < s->lit_bufsize + sx, "pendingBuf overflow");
+        /* Check that the overlay between pending_buf and sym_buf is ok: */
+        Assert(s->pending < s->lit_bufsize + sx, "pendingBuf overflow");
 
     } while (sx < s->sym_next);
 
-  send_code(s, END_BLOCK, ltree);
+    send_code(s, END_BLOCK, ltree);
 }
 
-/**
- * Checks if data type is TEXT or BINARY.
- *
- * This uses the following algorithm:
- *
+/* ===========================================================================
+ * Check if the data type is TEXT or BINARY, using the following algorithm:
  * - TEXT if the two conditions below are satisfied:
  *    a) There are no non-portable control characters belonging to the
- *       "black list" (0..6, 14..25, 28..31).
+ *       "block list" (0..6, 14..25, 28..31).
  *    b) There is at least one printable character belonging to the
- *       "white list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255).
+ *       "allow list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255).
  * - BINARY otherwise.
  * - The following partially-portable control characters form a
  *   "gray list" that is ignored in this detection algorithm:
  *   (7 {BEL}, 8 {BS}, 11 {VT}, 12 {FF}, 26 {SUB}, 27 {ESC}).
- *
  * IN assertion: the fields Freq of dyn_ltree are set.
  */
-static int detect_data_type(struct DeflateState *s) {
-  /* black_mask is the bit mask of black-listed bytes
-   * set bits 0..6, 14..25, and 28..31
-   * 0xf3ffc07f = binary 11110011111111111100000001111111
-   */
-  unsigned long black_mask = 0xf3ffc07fUL;
-  int n;
+local int detect_data_type(s)
+    deflate_state *s;
+{
+    /* block_mask is the bit mask of block-listed bytes
+     * set bits 0..6, 14..25, and 28..31
+     * 0xf3ffc07f = binary 11110011111111111100000001111111
+     */
+    unsigned long block_mask = 0xf3ffc07fUL;
+    int n;
 
-  /* Check for non-textual ("black-listed") bytes. */
-  for (n = 0; n <= 31; n++, black_mask >>= 1)
-    if ((black_mask & 1) && (s->dyn_ltree[n].Freq != 0)) return Z_BINARY;
+    /* Check for non-textual ("block-listed") bytes. */
+    for (n = 0; n <= 31; n++, block_mask >>= 1)
+        if ((block_mask & 1) && (s->dyn_ltree[n].Freq != 0))
+            return Z_BINARY;
 
-  /* Check for textual ("white-listed") bytes. */
-  if (s->dyn_ltree[9].Freq != 0 || s->dyn_ltree[10].Freq != 0 ||
-      s->dyn_ltree[13].Freq != 0)
-    return Z_TEXT;
-  for (n = 32; n < LITERALS; n++)
-    if (s->dyn_ltree[n].Freq != 0) return Z_TEXT;
+    /* Check for textual ("allow-listed") bytes. */
+    if (s->dyn_ltree[9].Freq != 0 || s->dyn_ltree[10].Freq != 0
+            || s->dyn_ltree[13].Freq != 0)
+        return Z_TEXT;
+    for (n = 32; n < LITERALS; n++)
+        if (s->dyn_ltree[n].Freq != 0)
+            return Z_TEXT;
 
-  /* There are no "black-listed" or "white-listed" bytes:
-   * this stream either is empty or has tolerated ("gray-listed") bytes only.
-   */
-  return Z_BINARY;
+    /* There are no "block-listed" or "allow-listed" bytes:
+     * this stream either is empty or has tolerated ("gray-listed") bytes only.
+     */
+    return Z_BINARY;
 }
 
-/**
- * Reverse the first len bits of a code, using straightforward code (a
- * faster method would use a table).
- *
+/* ===========================================================================
+ * Reverse the first len bits of a code, using straightforward code (a faster
+ * method would use a table)
  * IN assertion: 1 <= len <= 15
- *
- * @param code is value to invert
- * @param len is in bits
  */
-static unsigned bi_reverse(unsigned code, int len) {
-  register unsigned res = 0;
-  do {
-    res |= code & 1;
-    code >>= 1, res <<= 1;
-  } while (--len > 0);
-  return res >> 1;
+local unsigned bi_reverse(code, len)
+    unsigned code; /* the value to invert */
+    int len;       /* its bit length */
+{
+    register unsigned res = 0;
+    do {
+        res |= code & 1;
+        code >>= 1, res <<= 1;
+    } while (--len > 0);
+    return res >> 1;
 }
 
-/**
- * Flushes bit buffer, keeping at most 7 bits in it.
+/* ===========================================================================
+ * Flush the bit buffer, keeping at most 7 bits in it.
  */
-static void bi_flush(struct DeflateState *s) {
-  if (s->bi_valid == 16) {
-    put_short(s, s->bi_buf);
+local void bi_flush(s)
+    deflate_state *s;
+{
+    if (s->bi_valid == 16) {
+        put_short(s, s->bi_buf);
+        s->bi_buf = 0;
+        s->bi_valid = 0;
+    } else if (s->bi_valid >= 8) {
+        put_byte(s, (Byte)s->bi_buf);
+        s->bi_buf >>= 8;
+        s->bi_valid -= 8;
+    }
+}
+
+/* ===========================================================================
+ * Flush the bit buffer and align the output on a byte boundary
+ */
+local void bi_windup(s)
+    deflate_state *s;
+{
+    if (s->bi_valid > 8) {
+        put_short(s, s->bi_buf);
+    } else if (s->bi_valid > 0) {
+        put_byte(s, (Byte)s->bi_buf);
+    }
     s->bi_buf = 0;
     s->bi_valid = 0;
-  } else if (s->bi_valid >= 8) {
-    put_byte(s, (Byte)s->bi_buf);
-    s->bi_buf >>= 8;
-    s->bi_valid -= 8;
-  }
-}
-
-/**
- * Flushes bit buffer and align the output on a byte boundary
- */
-static void bi_windup(struct DeflateState *s) {
-  if (s->bi_valid > 8) {
-    put_short(s, s->bi_buf);
-  } else if (s->bi_valid > 0) {
-    put_byte(s, (Byte)s->bi_buf);
-  }
-  s->bi_buf = 0;
-  s->bi_valid = 0;
 #ifdef ZLIB_DEBUG
-  s->bits_sent = (s->bits_sent + 7) & ~7;
+    s->bits_sent = (s->bits_sent+7) & ~7;
 #endif
 }
diff --git a/third_party/zlib/trees.inc b/third_party/zlib/trees.inc
new file mode 100644
index 000000000..72e2bff35
--- /dev/null
+++ b/third_party/zlib/trees.inc
@@ -0,0 +1,128 @@
+// clang-format off
+
+local const ct_data static_ltree[L_CODES+2] = {
+{{ 12},{  8}}, {{140},{  8}}, {{ 76},{  8}}, {{204},{  8}}, {{ 44},{  8}},
+{{172},{  8}}, {{108},{  8}}, {{236},{  8}}, {{ 28},{  8}}, {{156},{  8}},
+{{ 92},{  8}}, {{220},{  8}}, {{ 60},{  8}}, {{188},{  8}}, {{124},{  8}},
+{{252},{  8}}, {{  2},{  8}}, {{130},{  8}}, {{ 66},{  8}}, {{194},{  8}},
+{{ 34},{  8}}, {{162},{  8}}, {{ 98},{  8}}, {{226},{  8}}, {{ 18},{  8}},
+{{146},{  8}}, {{ 82},{  8}}, {{210},{  8}}, {{ 50},{  8}}, {{178},{  8}},
+{{114},{  8}}, {{242},{  8}}, {{ 10},{  8}}, {{138},{  8}}, {{ 74},{  8}},
+{{202},{  8}}, {{ 42},{  8}}, {{170},{  8}}, {{106},{  8}}, {{234},{  8}},
+{{ 26},{  8}}, {{154},{  8}}, {{ 90},{  8}}, {{218},{  8}}, {{ 58},{  8}},
+{{186},{  8}}, {{122},{  8}}, {{250},{  8}}, {{  6},{  8}}, {{134},{  8}},
+{{ 70},{  8}}, {{198},{  8}}, {{ 38},{  8}}, {{166},{  8}}, {{102},{  8}},
+{{230},{  8}}, {{ 22},{  8}}, {{150},{  8}}, {{ 86},{  8}}, {{214},{  8}},
+{{ 54},{  8}}, {{182},{  8}}, {{118},{  8}}, {{246},{  8}}, {{ 14},{  8}},
+{{142},{  8}}, {{ 78},{  8}}, {{206},{  8}}, {{ 46},{  8}}, {{174},{  8}},
+{{110},{  8}}, {{238},{  8}}, {{ 30},{  8}}, {{158},{  8}}, {{ 94},{  8}},
+{{222},{  8}}, {{ 62},{  8}}, {{190},{  8}}, {{126},{  8}}, {{254},{  8}},
+{{  1},{  8}}, {{129},{  8}}, {{ 65},{  8}}, {{193},{  8}}, {{ 33},{  8}},
+{{161},{  8}}, {{ 97},{  8}}, {{225},{  8}}, {{ 17},{  8}}, {{145},{  8}},
+{{ 81},{  8}}, {{209},{  8}}, {{ 49},{  8}}, {{177},{  8}}, {{113},{  8}},
+{{241},{  8}}, {{  9},{  8}}, {{137},{  8}}, {{ 73},{  8}}, {{201},{  8}},
+{{ 41},{  8}}, {{169},{  8}}, {{105},{  8}}, {{233},{  8}}, {{ 25},{  8}},
+{{153},{  8}}, {{ 89},{  8}}, {{217},{  8}}, {{ 57},{  8}}, {{185},{  8}},
+{{121},{  8}}, {{249},{  8}}, {{  5},{  8}}, {{133},{  8}}, {{ 69},{  8}},
+{{197},{  8}}, {{ 37},{  8}}, {{165},{  8}}, {{101},{  8}}, {{229},{  8}},
+{{ 21},{  8}}, {{149},{  8}}, {{ 85},{  8}}, {{213},{  8}}, {{ 53},{  8}},
+{{181},{  8}}, {{117},{  8}}, {{245},{  8}}, {{ 13},{  8}}, {{141},{  8}},
+{{ 77},{  8}}, {{205},{  8}}, {{ 45},{  8}}, {{173},{  8}}, {{109},{  8}},
+{{237},{  8}}, {{ 29},{  8}}, {{157},{  8}}, {{ 93},{  8}}, {{221},{  8}},
+{{ 61},{  8}}, {{189},{  8}}, {{125},{  8}}, {{253},{  8}}, {{ 19},{  9}},
+{{275},{  9}}, {{147},{  9}}, {{403},{  9}}, {{ 83},{  9}}, {{339},{  9}},
+{{211},{  9}}, {{467},{  9}}, {{ 51},{  9}}, {{307},{  9}}, {{179},{  9}},
+{{435},{  9}}, {{115},{  9}}, {{371},{  9}}, {{243},{  9}}, {{499},{  9}},
+{{ 11},{  9}}, {{267},{  9}}, {{139},{  9}}, {{395},{  9}}, {{ 75},{  9}},
+{{331},{  9}}, {{203},{  9}}, {{459},{  9}}, {{ 43},{  9}}, {{299},{  9}},
+{{171},{  9}}, {{427},{  9}}, {{107},{  9}}, {{363},{  9}}, {{235},{  9}},
+{{491},{  9}}, {{ 27},{  9}}, {{283},{  9}}, {{155},{  9}}, {{411},{  9}},
+{{ 91},{  9}}, {{347},{  9}}, {{219},{  9}}, {{475},{  9}}, {{ 59},{  9}},
+{{315},{  9}}, {{187},{  9}}, {{443},{  9}}, {{123},{  9}}, {{379},{  9}},
+{{251},{  9}}, {{507},{  9}}, {{  7},{  9}}, {{263},{  9}}, {{135},{  9}},
+{{391},{  9}}, {{ 71},{  9}}, {{327},{  9}}, {{199},{  9}}, {{455},{  9}},
+{{ 39},{  9}}, {{295},{  9}}, {{167},{  9}}, {{423},{  9}}, {{103},{  9}},
+{{359},{  9}}, {{231},{  9}}, {{487},{  9}}, {{ 23},{  9}}, {{279},{  9}},
+{{151},{  9}}, {{407},{  9}}, {{ 87},{  9}}, {{343},{  9}}, {{215},{  9}},
+{{471},{  9}}, {{ 55},{  9}}, {{311},{  9}}, {{183},{  9}}, {{439},{  9}},
+{{119},{  9}}, {{375},{  9}}, {{247},{  9}}, {{503},{  9}}, {{ 15},{  9}},
+{{271},{  9}}, {{143},{  9}}, {{399},{  9}}, {{ 79},{  9}}, {{335},{  9}},
+{{207},{  9}}, {{463},{  9}}, {{ 47},{  9}}, {{303},{  9}}, {{175},{  9}},
+{{431},{  9}}, {{111},{  9}}, {{367},{  9}}, {{239},{  9}}, {{495},{  9}},
+{{ 31},{  9}}, {{287},{  9}}, {{159},{  9}}, {{415},{  9}}, {{ 95},{  9}},
+{{351},{  9}}, {{223},{  9}}, {{479},{  9}}, {{ 63},{  9}}, {{319},{  9}},
+{{191},{  9}}, {{447},{  9}}, {{127},{  9}}, {{383},{  9}}, {{255},{  9}},
+{{511},{  9}}, {{  0},{  7}}, {{ 64},{  7}}, {{ 32},{  7}}, {{ 96},{  7}},
+{{ 16},{  7}}, {{ 80},{  7}}, {{ 48},{  7}}, {{112},{  7}}, {{  8},{  7}},
+{{ 72},{  7}}, {{ 40},{  7}}, {{104},{  7}}, {{ 24},{  7}}, {{ 88},{  7}},
+{{ 56},{  7}}, {{120},{  7}}, {{  4},{  7}}, {{ 68},{  7}}, {{ 36},{  7}},
+{{100},{  7}}, {{ 20},{  7}}, {{ 84},{  7}}, {{ 52},{  7}}, {{116},{  7}},
+{{  3},{  8}}, {{131},{  8}}, {{ 67},{  8}}, {{195},{  8}}, {{ 35},{  8}},
+{{163},{  8}}, {{ 99},{  8}}, {{227},{  8}}
+};
+
+local const ct_data static_dtree[D_CODES] = {
+{{ 0},{ 5}}, {{16},{ 5}}, {{ 8},{ 5}}, {{24},{ 5}}, {{ 4},{ 5}},
+{{20},{ 5}}, {{12},{ 5}}, {{28},{ 5}}, {{ 2},{ 5}}, {{18},{ 5}},
+{{10},{ 5}}, {{26},{ 5}}, {{ 6},{ 5}}, {{22},{ 5}}, {{14},{ 5}},
+{{30},{ 5}}, {{ 1},{ 5}}, {{17},{ 5}}, {{ 9},{ 5}}, {{25},{ 5}},
+{{ 5},{ 5}}, {{21},{ 5}}, {{13},{ 5}}, {{29},{ 5}}, {{ 3},{ 5}},
+{{19},{ 5}}, {{11},{ 5}}, {{27},{ 5}}, {{ 7},{ 5}}, {{23},{ 5}}
+};
+
+const uch ZLIB_INTERNAL _dist_code[DIST_CODE_LEN] = {
+ 0,  1,  2,  3,  4,  4,  5,  5,  6,  6,  6,  6,  7,  7,  7,  7,  8,  8,  8,  8,
+ 8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  9,  9, 10, 10, 10, 10, 10, 10, 10, 10,
+10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13,
+13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15,
+15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,  0,  0, 16, 17,
+18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22,
+23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27,
+27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29
+};
+
+const uch ZLIB_INTERNAL _length_code[MAX_MATCH-MIN_MATCH+1]= {
+ 0,  1,  2,  3,  4,  5,  6,  7,  8,  8,  9,  9, 10, 10, 11, 11, 12, 12, 12, 12,
+13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16,
+17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19,
+19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22,
+22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
+23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26,
+26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28
+};
+
+local const int base_length[LENGTH_CODES] = {
+0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56,
+64, 80, 96, 112, 128, 160, 192, 224, 0
+};
+
+local const int base_dist[D_CODES] = {
+    0,     1,     2,     3,     4,     6,     8,    12,    16,    24,
+   32,    48,    64,    96,   128,   192,   256,   384,   512,   768,
+ 1024,  1536,  2048,  3072,  4096,  6144,  8192, 12288, 16384, 24576
+};
+
diff --git a/third_party/zlib/zalloc.c b/third_party/zlib/zalloc.c
index 769c2de2b..e121a4ca9 100644
--- a/third_party/zlib/zalloc.c
+++ b/third_party/zlib/zalloc.c
@@ -16,14 +16,21 @@
 │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR             │
 │ PERFORMANCE OF THIS SOFTWARE.                                                │
 ╚─────────────────────────────────────────────────────────────────────────────*/
+#include "libc/assert.h"
 #include "libc/intrin/weaken.h"
+#include "libc/limits.h"
 #include "libc/mem/mem.h"
 #include "third_party/zlib/zutil.internal.h"
 
 void *zcalloc(void *opaque, uInt items, uInt size) {
-  return _weaken(malloc)(items * size);
+  size_t res;
+  if (__builtin_mul_overflow(items, size, &res)) return 0;
+  if (res > INT_MAX) return 0;
+  _npassert(_weaken(malloc));
+  return _weaken(malloc)(res);
 }
 
 void zcfree(void *opaque, void *ptr) {
+  _npassert(_weaken(free));
   _weaken(free)(ptr);
 }
diff --git a/third_party/zlib/zconf.h b/third_party/zlib/zconf.h
index b7774c8d8..88ac2cf3e 100644
--- a/third_party/zlib/zconf.h
+++ b/third_party/zlib/zconf.h
@@ -14,7 +14,7 @@
 #if !(__ASSEMBLER__ + __LINKER__ + 0)
 
 typedef unsigned char Byte;
-typedef unsigned long uInt;  /* 16 bits or more */
+typedef unsigned int uInt;   /* 16 bits or more */
 typedef unsigned long uLong; /* 32 bits or more */
 typedef Byte Bytef;
 typedef char charf;
@@ -24,6 +24,9 @@ typedef uLong uLongf;
 typedef void const *voidpc;
 typedef void *voidpf;
 typedef void *voidp;
+typedef uint32_t z_crc_t;
+typedef int64_t z_off64_t;
+typedef size_t z_size_t;
 
 #endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
 #endif /* COSMOPOLITAN_THIRD_PARTY_ZLIB_ZCONF_H_ */
diff --git a/third_party/zlib/zlib.h b/third_party/zlib/zlib.h
index f2219a215..0a32f9be9 100644
--- a/third_party/zlib/zlib.h
+++ b/third_party/zlib/zlib.h
@@ -143,19 +143,19 @@ COSMOPOLITAN_C_START_
 typedef voidpf (*alloc_func)(voidpf opaque, uInt items, uInt size);
 typedef void (*free_func)(voidpf opaque, voidpf address);
 
-struct DeflateState;
+struct internal_state;
 
 typedef struct z_stream_s {
-  const Bytef *next_in;       /* next input byte */
-  uInt avail_in;              /* number of bytes available at next_in */
-  uLong total_in;             /* total number of input bytes read so far */
-  Bytef *next_out;            /* next output byte will go here */
-  uInt avail_out;             /* remaining free space at next_out */
-  uLong total_out;            /* total number of bytes output so far */
-  const char *msg;            /* last error message, NULL if no error */
-  struct DeflateState *state; /* not visible by applications */
-  alloc_func zalloc;          /* used to allocate the internal state */
-  free_func zfree;            /* used to free the internal state */
+  const Bytef *next_in;         /* next input byte */
+  uInt avail_in;                /* number of bytes available at next_in */
+  uLong total_in;               /* total number of input bytes read so far */
+  Bytef *next_out;              /* next output byte will go here */
+  uInt avail_out;               /* remaining free space at next_out */
+  uLong total_out;              /* total number of bytes output so far */
+  const char *msg;              /* last error message, NULL if no error */
+  struct internal_state *state; /* not visible by applications */
+  alloc_func zalloc;            /* used to allocate the internal state */
+  free_func zfree;              /* used to free the internal state */
   voidpf opaque;  /* private data object passed to zalloc and zfree */
   int data_type;  /* best guess about the data type: binary or text
                      for deflate, or the decoding state for inflate */
diff --git a/third_party/zlib/zlib.mk b/third_party/zlib/zlib.mk
index 352a6b150..a124121c0 100644
--- a/third_party/zlib/zlib.mk
+++ b/third_party/zlib/zlib.mk
@@ -50,9 +50,7 @@ o/$(MODE)/third_party/zlib/crcfold.o: private		\
 			-mpclmul			\
 			-mssse3
 
-o/$(MODE)/third_party/zlib/deflate.o			\
-o/$(MODE)/third_party/zlib/inflate.o			\
-o/$(MODE)/third_party/zlib/adler32.o: private		\
+$(THIRD_PARTY_ZLIB_A_OBJS): private			\
 		OVERRIDE_CFLAGS +=			\
 			-ffunction-sections		\
 			-fdata-sections
diff --git a/third_party/zlib/zutil.internal.h b/third_party/zlib/zutil.internal.h
index 067b8c52b..591aa9544 100644
--- a/third_party/zlib/zutil.internal.h
+++ b/third_party/zlib/zutil.internal.h
@@ -1,6 +1,7 @@
 #ifndef ZUTIL_H
 #define ZUTIL_H
 #include "libc/intrin/kprintf.h"
+#include "libc/limits.h"
 #include "third_party/zlib/zlib.h"
 
 /* default windowBits for decompression. MAX_WBITS is for compression only */
@@ -97,6 +98,32 @@ extern void z_error(const char *, int, char *) hidden;
   ((((q) >> 24) & 0xff) + (((q) >> 8) & 0xff00) + (((q)&0xff00) << 8) + \
    (((q)&0xff) << 24))
 
+typedef unsigned char uch;
+typedef uch uchf;
+typedef unsigned short ush;
+typedef ush ushf;
+typedef unsigned long ulg;
+
+#ifdef HAVE_HIDDEN
+#define ZLIB_INTERNAL __attribute__((__visibility__("hidden")))
+#else
+#define ZLIB_INTERNAL
+#endif
+
+#ifndef local
+#define local static
+#endif
+
+#if !defined(Z_U8) && !defined(Z_SOLO) && defined(STDC)
+#if (ULONG_MAX == 0xffffffffffffffff)
+#define Z_U8 unsigned long
+#elif (ULLONG_MAX == 0xffffffffffffffff)
+#define Z_U8 unsigned long long
+#elif (UINT_MAX == 0xffffffffffffffff)
+#define Z_U8 unsigned
+#endif
+#endif
+
 COSMOPOLITAN_C_END_
 #endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
 #endif /* ZUTIL_H */
diff --git a/tool/net/help.txt b/tool/net/help.txt
index 3d1971059..013a4d17c 100644
--- a/tool/net/help.txt
+++ b/tool/net/help.txt
@@ -255,7 +255,45 @@ SECURITY
     -VVV     log ssl informational messages too
     -VVVV    log ssl verbose details too
 
-  Redbean supports sandboxing flags on Linux and OpenBSD.
+  redbean provides hardened ASAN (Address Sanitizer) builds that
+  proactively guard against any potential memory weaknesses that may be
+  discovered, such as buffer overruns, use after free, etc. MDOE=asan is
+  recomended when serving on the public Internet.
+
+  redbean also supports robust sandboxing on Linux Kernel 5.13+ and
+  OpenBSD. The recommended way to harden your redbean is to call the
+  pledge() and unveil() functions. For example, if you have a SQLite app
+  then the key to using these features is to connect to the db first:
+
+      function OnWorkerStart()
+          db = sqlite3.open("db.sqlite3")
+          db:busy_timeout(1000)
+          db:exec[[PRAGMA journal_mode=WAL]]
+          db:exec[[PRAGMA synchronous=NORMAL]]
+          db:exec[[SELECT x FROM warmup WHERE x = 1]]
+          assert(unix.setrlimit(unix.RLIMIT_RSS, 100 * 1024 * 1024))
+          assert(unix.setrlimit(unix.RLIMIT_CPU, 4))
+          assert(unix.unveil("/var/tmp", "rwc"))
+          assert(unix.unveil("/tmp", "rwc"))
+          assert(unix.unveil(nil, nil))
+          assert(unix.pledge("stdio flock rpath wpath cpath", nil,
+                             unix.PLEDGE_PENALTY_RETURN_EPERM))
+      end
+
+  What makes this technique interesting is redbean doesn't have file
+  system access to the database file, and instead uses an inherited file
+  descriptor that was opened beforehand. With SQLite the tmp access is
+  only needed to support things like covering indexes. The -Z flag is
+  also helpful to see where things go wrong, so you know which promises
+  are needed to support your use case.
+
+  pledge() will work on all Linux kernels since RHEL6 since it uses
+  SECCOMP BPF filtering. On the other hand, unveil() requires Landlock
+  LSM which was only introduced in 2021. If you need unveil() then be
+  sure to test the restrictions work. Most environments don't support
+  unveil(), so it's designed to be a no-op in unsupported environments.
+
+  Alternatively, there's CLI flags which make it simple to get started:
 
     -S (online policy)
 
diff --git a/tool/net/lmaxmind.c b/tool/net/lmaxmind.c
index 0f40bcee4..eaf241cda 100644
--- a/tool/net/lmaxmind.c
+++ b/tool/net/lmaxmind.c
@@ -206,7 +206,14 @@ static int LuaMaxmindResultGet(lua_State *L) {
     for (i = 0; i < n; ++i) path[i] = lua_tostring(L, 2 + i);
     err = MMDB_aget_value(&(*ur)->mmlr.entry, &edata, path);
     free(path);
-    if (err) LuaThrowMaxmindIpError(L, "getpath", (*ur)->ip, err);
+    if (err) {
+      if (err == MMDB_LOOKUP_PATH_DOES_NOT_MATCH_DATA_ERROR) {
+        lua_pushnil(L);
+        return 1;
+      } else {
+        LuaThrowMaxmindIpError(L, "getpath", (*ur)->ip, err);
+      }
+    }
     if (!edata.offset) {
       lua_pushnil(L);
       return 1;
diff --git a/tool/net/redbean.c b/tool/net/redbean.c
index b0f10d9a2..9272cef83 100644
--- a/tool/net/redbean.c
+++ b/tool/net/redbean.c
@@ -36,6 +36,7 @@
 #include "libc/fmt/itoa.h"
 #include "libc/intrin/atomic.h"
 #include "libc/intrin/bsr.h"
+#include "libc/intrin/kprintf.h"
 #include "libc/intrin/likely.h"
 #include "libc/intrin/nomultics.internal.h"
 #include "libc/intrin/safemacros.internal.h"
@@ -332,6 +333,14 @@ static struct Assets {
   } * p;
 } assets;
 
+static struct ProxyIps {
+  size_t n;
+  struct ProxyIp {
+    uint32_t ip;
+    uint32_t mask;
+  } * p;
+} proxyips;
+
 static struct Shared {
   int workers;
   struct timespec nowish;
@@ -859,6 +868,28 @@ static void ProgramRedirectArg(int code, const char *s) {
   ProgramRedirect(code, s, p - s, p + 1, n - (p - s + 1));
 }
 
+static void TrustProxy(uint32_t ip, int cidr) {
+  uint32_t mask;
+  mask = 0xffffffffu << (32 - cidr);
+  proxyips.p = xrealloc(proxyips.p, ++proxyips.n * sizeof(*proxyips.p));
+  proxyips.p[proxyips.n - 1].ip = ip;
+  proxyips.p[proxyips.n - 1].mask = mask;
+}
+
+static bool IsTrustedProxy(uint32_t ip) {
+  int i;
+  if (proxyips.n) {
+    for (i = 0; i < proxyips.n; ++i) {
+      if ((ip & proxyips.p[i].mask) == proxyips.p[i].ip) {
+        return true;
+      }
+    }
+    return false;
+  } else {
+    return IsPrivateIp(ip) || IsLoopbackIp(ip);
+  }
+}
+
 static void DescribeAddress(char buf[40], uint32_t addr, uint16_t port) {
   char *p;
   const char *s;
@@ -872,34 +903,56 @@ static void DescribeAddress(char buf[40], uint32_t addr, uint16_t port) {
   assert(p - buf < 40);
 }
 
-static inline void GetServerAddr(uint32_t *ip, uint16_t *port) {
+static inline int GetServerAddr(uint32_t *ip, uint16_t *port) {
   *ip = ntohl(serveraddr->sin_addr.s_addr);
   if (port) *port = ntohs(serveraddr->sin_port);
+  return 0;
 }
 
-static inline void GetClientAddr(uint32_t *ip, uint16_t *port) {
+static inline int GetClientAddr(uint32_t *ip, uint16_t *port) {
   *ip = ntohl(clientaddr.sin_addr.s_addr);
   if (port) *port = ntohs(clientaddr.sin_port);
+  return 0;
 }
 
-static inline void GetRemoteAddr(uint32_t *ip, uint16_t *port) {
+static inline int GetRemoteAddr(uint32_t *ip, uint16_t *port) {
+  char str[40];
   GetClientAddr(ip, port);
-  if (HasHeader(kHttpXForwardedFor) &&
-      (IsPrivateIp(*ip) || IsLoopbackIp(*ip))) {
-    if (ParseForwarded(HeaderData(kHttpXForwardedFor),
-                       HeaderLength(kHttpXForwardedFor), ip, port) == -1)
-      WARNF("(srvr) invalid X-Forwarded-For value: %`'.*s",
-            HeaderLength(kHttpXForwardedFor), HeaderData(kHttpXForwardedFor));
+  if (HasHeader(kHttpXForwardedFor)) {
+    if (IsTrustedProxy(*ip)) {
+      if (ParseForwarded(HeaderData(kHttpXForwardedFor),
+                         HeaderLength(kHttpXForwardedFor), ip, port) == -1) {
+        VERBOSEF("could not parse x-forwarded-for %`'.*s len=%ld",
+                 HeaderLength(kHttpXForwardedFor),
+                 HeaderData(kHttpXForwardedFor),
+                 HeaderLength(kHttpXForwardedFor));
+        return -1;
+      }
+    } else {
+      WARNF(
+          "%hhu.%hhu.%hhu.%hhu isn't authorized to send x-forwarded-for %`'.*s",
+          *ip >> 24, *ip >> 16, *ip >> 8, *ip, HeaderLength(kHttpXForwardedFor),
+          HeaderData(kHttpXForwardedFor));
+    }
   }
+  return 0;
 }
 
 static char *DescribeClient(void) {
-  uint32_t ip;
+  char str[40];
   uint16_t port;
-  static char clientaddrstr[40];
-  GetRemoteAddr(&ip, &port);
-  DescribeAddress(clientaddrstr, ip, port);
-  return clientaddrstr;
+  uint32_t client;
+  static char description[128];
+  GetClientAddr(&client, &port);
+  if (HasHeader(kHttpXForwardedFor) && IsTrustedProxy(client)) {
+    DescribeAddress(str, client, port);
+    snprintf(description, sizeof(description), "%'.*s via %s",
+             HeaderLength(kHttpXForwardedFor), HeaderData(kHttpXForwardedFor),
+             str);
+  } else {
+    DescribeAddress(description, client, port);
+  }
+  return description;
 }
 
 static char *DescribeServer(void) {
@@ -2225,10 +2278,8 @@ static bool Verify(void *data, size_t size, uint32_t crc) {
 
 static void *Deflate(const void *data, size_t size, size_t *out_size) {
   void *res;
-  z_stream zs;
+  z_stream zs = {0};
   LockInc(&shared->c.deflates);
-  zs.zfree = 0;
-  zs.zalloc = 0;
   CHECK_EQ(Z_OK, deflateInit2(&zs, 4, Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL,
                               Z_DEFAULT_STRATEGY));
   zs.next_in = data;
@@ -3093,8 +3144,7 @@ static bool ShouldServeCrashReportDetails(void) {
   if (leakcrashreports) {
     return true;
   } else {
-    GetRemoteAddr(&ip, &port);
-    return IsLoopbackIp(ip) || IsPrivateIp(ip);
+    return !GetRemoteAddr(&ip, &port) && (IsLoopbackIp(ip) || IsPrivateIp(ip));
   }
 }
 
@@ -3354,6 +3404,42 @@ static int LuaRoute(lua_State *L) {
   return 1;
 }
 
+static int LuaTrustProxy(lua_State *L) {
+  lua_Integer ip, cidr;
+  uint32_t ip32, imask;
+  ip = luaL_checkinteger(L, 1);
+  cidr = luaL_optinteger(L, 2, 32);
+  if (!(0 <= ip && ip <= 0xffffffff)) {
+    luaL_argerror(L, 1, "ip out of range");
+    unreachable;
+  }
+  if (!(0 <= cidr && cidr <= 32)) {
+    luaL_argerror(L, 2, "cidr should be 0 .. 32");
+    unreachable;
+  }
+  ip32 = ip;
+  imask = ~(0xffffffffu << (32 - cidr));
+  if (ip32 & imask) {
+    luaL_argerror(L, 1,
+                  "ip address isn't the network address; "
+                  "it has bits masked by the cidr");
+    unreachable;
+  }
+  TrustProxy(ip, cidr);
+  return 0;
+}
+
+static int LuaIsTrustedProxy(lua_State *L) {
+  lua_Integer ip;
+  ip = luaL_checkinteger(L, 1);
+  if (!(0 <= ip && ip <= 0xffffffff)) {
+    luaL_argerror(L, 1, "ip out of range");
+    unreachable;
+  }
+  lua_pushboolean(L, IsTrustedProxy(ip));
+  return 1;
+}
+
 static int LuaRespond(lua_State *L, char *R(unsigned, const char *)) {
   char *p;
   int code;
@@ -3781,13 +3867,17 @@ static int LuaGetMethod(lua_State *L) {
   return 1;
 }
 
-static int LuaGetAddr(lua_State *L, void GetAddr(uint32_t *, uint16_t *)) {
+static int LuaGetAddr(lua_State *L, int GetAddr(uint32_t *, uint16_t *)) {
   uint32_t ip;
   uint16_t port;
-  GetAddr(&ip, &port);
-  lua_pushinteger(L, ip);
-  lua_pushinteger(L, port);
-  return 2;
+  if (!GetAddr(&ip, &port)) {
+    lua_pushinteger(L, ip);
+    lua_pushinteger(L, port);
+    return 2;
+  } else {
+    lua_pushnil(L);
+    return 1;
+  }
 }
 
 static int LuaGetServerAddr(lua_State *L) {
@@ -4881,6 +4971,7 @@ static const luaL_Reg kLuaFuncs[] = {
     {"IsPrivateIp", LuaIsPrivateIp},                            //
     {"IsPublicIp", LuaIsPublicIp},                              //
     {"IsReasonablePath", LuaIsReasonablePath},                  //
+    {"IsTrustedProxy", LuaIsTrustedProxy},                      // undocumented
     {"IsValidHttpToken", LuaIsValidHttpToken},                  //
     {"LaunchBrowser", LuaLaunchBrowser},                        //
     {"Lemur64", LuaLemur64},                                    //
@@ -4906,13 +4997,13 @@ static const luaL_Reg kLuaFuncs[] = {
     {"ProgramLogMessages", LuaProgramLogMessages},              //
     {"ProgramLogPath", LuaProgramLogPath},                      //
     {"ProgramMaxPayloadSize", LuaProgramMaxPayloadSize},        //
+    {"ProgramMaxWorkers", LuaProgramMaxWorkers},                //
     {"ProgramPidPath", LuaProgramPidPath},                      //
     {"ProgramPort", LuaProgramPort},                            //
     {"ProgramRedirect", LuaProgramRedirect},                    //
     {"ProgramTimeout", LuaProgramTimeout},                      //
     {"ProgramUid", LuaProgramUid},                              //
     {"ProgramUniprocess", LuaProgramUniprocess},                //
-    {"ProgramMaxWorkers", LuaProgramMaxWorkers},                //
     {"Rand64", LuaRand64},                                      //
     {"Rdrand", LuaRdrand},                                      //
     {"Rdseed", LuaRdseed},                                      //
@@ -4939,6 +5030,7 @@ static const luaL_Reg kLuaFuncs[] = {
     {"Sleep", LuaSleep},                                        //
     {"Slurp", LuaSlurp},                                        //
     {"StoreAsset", LuaStoreAsset},                              //
+    {"TrustProxy", LuaTrustProxy},                              // undocumented
     {"Uncompress", LuaUncompress},                              //
     {"Underlong", LuaUnderlong},                                //
     {"VisualizeControlCodes", LuaVisualizeControlCodes},        //
@@ -5584,9 +5676,8 @@ static void ParseRequestParameters(void) {
   FreeLater(ParseRequestUri(inbuf.p + cpm.msg.uri.a,
                             cpm.msg.uri.b - cpm.msg.uri.a, &url));
   if (!url.host.p) {
-    GetRemoteAddr(&ip, 0);
-    if (HasHeader(kHttpXForwardedHost) &&
-        (IsPrivateIp(ip) || IsLoopbackIp(ip))) {
+    if (HasHeader(kHttpXForwardedHost) &&  //
+        !GetRemoteAddr(&ip, 0) && IsTrustedProxy(ip)) {
       FreeLater(ParseHost(HeaderData(kHttpXForwardedHost),
                           HeaderLength(kHttpXForwardedHost), &url));
     } else if (HasHeader(kHttpHost)) {
@@ -5689,7 +5780,6 @@ static char *Route(const char *host, size_t hostlen, const char *path,
   // this function (as it always serves something); otherwise
   // successful RoutePath and Route may fail with "508 loop detected"
   cpm.loops.n = 0;
-  if (logmessages) LogMessage("received", inbuf.p, hdrsize);
   if (hostlen && (p = RouteHost(host, hostlen, path, pathlen))) {
     return p;
   }
@@ -6002,6 +6092,9 @@ static bool HandleMessageActual(void) {
   if ((rc = ParseHttpMessage(&cpm.msg, inbuf.p, amtread)) != -1) {
     if (!rc) return false;
     hdrsize = rc;
+    if (logmessages) {
+      LogMessage("received", inbuf.p, hdrsize);
+    }
     p = HandleRequest();
   } else {
     LockInc(&shared->c.badmessages);