#include "libc/math.h"
#include "libc/mem/mem.h"
#include "libc/runtime/fenv.h"
#include "libc/str/str.h"
#include "third_party/gdtoa/gdtoa.h"

__static_yoink("gdtoa_notice");

#define IEEE_Arith          1
#define IEEE_8087           1
#define Honor_FLT_ROUNDS    1
#define f_QNAN              0x7fc00000
#define d_QNAN0             0x7ff80000
#define d_QNAN1             0x0
#define Omit_Private_Memory 1
#define Check_FLT_ROUNDS    1

/* On a machine with IEEE extended-precision registers, it is
 * necessary to specify double-precision (53-bit) rounding precision
 * before invoking strtod or dtoa.  If the machine uses (the equivalent
 * of) Intel 80x87 arithmetic, the call
 *	_control87(PC_53, MCW_PC);
 * does this with many compilers.  Whether this or another call is
 * appropriate depends on the compiler; for this to work, it may be
 * necessary third_party/gdtoa/to #include "float.h" or another system-dependent
 *header file.
 */

/* strtod for IEEE-, VAX-, and IBM-arithmetic machines.
 *
 * This strtod returns a nearest machine number to the input decimal
 * string (or sets errno to ERANGE).  With IEEE arithmetic, ties are
 * broken by the IEEE round-even rule.  Otherwise ties are broken by
 * biased rounding (add half and chop).
 *
 * Inspired loosely by William D. Clinger's paper "How to Read Floating
 * Point Numbers Accurately" [Proc. ACM SIGPLAN '90, pp. 112-126].
 *
 * Modifications:
 *
 *	1. We only require IEEE, IBM, or VAX double-precision
 *		arithmetic (not IEEE double-extended).
 *	2. We get by with floating-point arithmetic in a case that
 *		Clinger missed -- when we're computing d * 10^n
 *		for a small integer d and the integer n is not too
 *		much larger than 22 (the maximum integer k for which
 *		we can represent 10^k exactly), we may be able to
 *		compute (d*10^k) * 10^(e-k) with just one roundoff.
 *	3. Rather than a bit-at-a-time adjustment of the binary
 *		result in the hard case, we use floating-point
 *		arithmetic to determine the adjustment to within
 *		one bit; only in really hard cases do we need to
 *		compute a second residual.
 *	4. Because of 3., we don't need a large table of powers of 10
 *		for ten-to-e (just some small tables, e.g. of 10^k
 *		for 0 <= k <= 22).
 */

/*
 * #define IEEE_8087 for IEEE-arithmetic machines where the least
 *	significant byte has the lowest address.
 * #define IEEE_MC68k for IEEE-arithmetic machines where the most
 *	significant byte has the lowest address.
 * #define Long int on machines with 32-bit ints and 64-bit longs.
 * #define Sudden_Underflow for IEEE-format machines without gradual
 *	underflow (i.e., that flush to zero on underflow).
 * #define IBM for IBM mainframe-style floating-point arithmetic.
 * #define VAX for VAX-style floating-point arithmetic (D_floating).
 * #define No_leftright to omit left-right logic in fast floating-point
 *	computation of dtoa and gdtoa.  This will cause modes 4 and 5 to be
 *	treated the same as modes 2 and 3 for some inputs.
 * #define Check_FLT_ROUNDS if FLT_ROUNDS can assume the values 2 or 3.
 * #define RND_PRODQUOT to use rnd_prod and rnd_quot (assembly routines
 *	that use extended-precision instructions to compute rounded
 *	products and quotients) with IBM.
 * #define ROUND_BIASED for IEEE-format with biased rounding and arithmetic
 *	that rounds toward +Infinity.
 * #define ROUND_BIASED_without_Round_Up for IEEE-format with biased
 *	rounding when the underlying floating-point arithmetic uses
 *	unbiased rounding.  This prevent using ordinary floating-point
 *	arithmetic when the result could be computed with one rounding error.
 * #define Inaccurate_Divide for IEEE-format with correctly rounded
 *	products but inaccurate quotients, e.g., for Intel i860.
 * #define NO_LONG_LONG on machines that do not have a "long long"
 *	integer type (of >= 64 bits).  On such machines, you can
 *	#define Just_16 to store 16 bits per 32-bit Long when doing
 *	high-precision integer arithmetic.  Whether this speeds things
 *	up or slows things down depends on the machine and the number
 *	being converted.  If long long is available and the name is
 *	something other than "long long", #define Llong to be the name,
 *	and if "unsigned Llong" does not work as an unsigned version of
 *	Llong, #define #ULLong to be the corresponding unsigned type.
 * #define Bad_float_h if your system lacks a float.h or if it does not
 *	define some or all of DBL_DIG, DBL_MAX_10_EXP, DBL_MAX_EXP,
 *	FLT_RADIX, FLT_ROUNDS, and DBL_MAX.
 * #define MALLOC your_malloc, where your_malloc(n) acts like malloc(n)
 *	if memory is available and otherwise does something you deem
 *	appropriate.  If MALLOC is undefined, malloc will be invoked
 *	directly -- and assumed always to succeed.  Similarly, if you
 *	want something other than the system's free() to be called to
 *	recycle memory acquired from MALLOC, #define FREE to be the
 *	name of the alternate routine.  (FREE or free is only called in
 *	pathological cases, e.g., in a gdtoa call after a gdtoa return in
 *	mode 3 with thousands of digits requested.)
 * #define Omit_Private_Memory to omit logic (added Jan. 1998) for making
 *	memory allocations from a private pool of memory when possible.
 *	When used, the private pool is PRIVATE_MEM bytes long:  2304 bytes,
 *	unless #defined to be a different length.  This default length
 *	suffices to get rid of MALLOC calls except for unusual cases,
 *	such as decimal-to-binary conversion of a very long string of
 *	digits.  When converting IEEE double precision values, the
 *	longest string gdtoa can return is about 751 bytes long.  For
 *	conversions by strtod of strings of 800 digits and all gdtoa
 *	conversions of IEEE doubles in single-threaded executions with
 *	8-byte pointers, PRIVATE_MEM >= 7400 appears to suffice; with
 *	4-byte pointers, PRIVATE_MEM >= 7112 appears adequate.
 * #define NO_INFNAN_CHECK if you do not wish to have INFNAN_CHECK
 *	#defined automatically on IEEE systems.  On such systems,
 *	when INFNAN_CHECK is #defined, strtod checks
 *	for Infinity and NaN (case insensitively).
 *	When INFNAN_CHECK is #defined and No_Hex_NaN is not #defined,
 *	strtodg also accepts (case insensitively) strings of the form
 *	NaN(x), where x is a string of hexadecimal digits (optionally
 *	preceded by 0x or 0X) and spaces; if there is only one string
 *	of hexadecimal digits, it is taken for the fraction bits of the
 *	resulting NaN; if there are two or more strings of hexadecimal
 *	digits, each string is assigned to the next available sequence
 *	of 32-bit words of fractions bits (starting with the most
 *	significant), right-aligned in each sequence.
 *	Unless GDTOA_NON_PEDANTIC_NANCHECK is #defined, input "NaN(...)"
 *	is consumed even when ... has the wrong form (in which case the
 *	"(...)" is consumed but ignored).
 * #define MULTIPLE_THREADS if the system offers preemptively scheduled
 *	multiple threads.  In this case, you must provide (or suitably
 *	#define) two locks, acquired by ACQUIRE_DTOA_LOCK(n) and freed
 *	by FREE_DTOA_LOCK(n) for n = 0 or 1.  (The second lock, accessed
 *	in pow5mult, ensures lazy evaluation of only one copy of high
 *	powers of 5; omitting this lock would introduce a small
 *	probability of wasting memory, but would otherwise be harmless.)
 *	You must also invoke freedtoa(s) to free the value s returned by
 *	dtoa.  You may do so whether or not MULTIPLE_THREADS is #defined.
 *	When MULTIPLE_THREADS is #defined, source file misc.c provides
 *		void set_max_gdtoa_threads(unsigned int n);
 *	and expects
 *		unsigned int dtoa_get_threadno(void);
 *	to be available (possibly provided by
 *		#define dtoa_get_threadno omp_get_thread_num
 *	if OpenMP is in use or by
 *		#define dtoa_get_threadno pthread_self
 *	if Pthreads is in use), to return the current thread number.
 *	If set_max_dtoa_threads(n) was called and the current thread
 *	number is k with k < n, then calls on ACQUIRE_DTOA_LOCK(...) and
 *	FREE_DTOA_LOCK(...) are avoided; instead each thread with thread
 *	number < n has a separate copy of relevant data structures.
 *	After set_max_dtoa_threads(n), a call set_max_dtoa_threads(m)
 *	with m <= n has has no effect, but a call with m > n is honored.
 *	Such a call invokes REALLOC (assumed to be "realloc" if REALLOC
 *	is not #defined) to extend the size of the relevant array.
 * #define IMPRECISE_INEXACT if you do not care about the setting of
 *	the STRTOG_Inexact bits in the special case of doing IEEE double
 *	precision conversions (which could also be done by the strtod in
 *	dtoa.c).
 * #define NO_HEX_FP to disable recognition of C9x's hexadecimal
 *	floating-point constants.
 * #define -DNO_ERRNO to suppress setting errno (in strtod.c and
 *	strtodg.c).
 * #define USE_LOCALE to use the current locale's decimal_point value.
 */

#ifndef Long
#define Long int
#endif
#ifndef ULong
typedef unsigned Long ULong;
#endif
#ifndef UShort
typedef unsigned short UShort;
#endif

#ifndef const
#define const const
#endif /* const */

#ifdef DEBUG
#define Bug(x)                  \
  {                             \
    fprintf(stderr, "%s\n", x); \
    exit(1);                    \
  }
#endif

#undef IEEE_Arith
#undef Avoid_Underflow
#ifdef IEEE_MC68k
#define IEEE_Arith
#endif
#ifdef IEEE_8087
#define IEEE_Arith
#endif

#ifdef Bad_float_h

#else  /* ifndef Bad_float_h */
#endif /* Bad_float_h */

#ifdef IEEE_Arith
#define Scale_Bit         0x10
#define n___gdtoa_bigtens 5
#endif

typedef union {
  double d;
  ULong L[2];
} U;

#ifdef IEEE_8087
#define word0(x) (x)->L[1]
#define word1(x) (x)->L[0]
#else
#define word0(x) (x)->L[0]
#define word1(x) (x)->L[1]
#endif
#define dval(x) (x)->d

/* The following definition of Storeinc is appropriate for MIPS processors.
 * An alternative that might be better on some machines is
 * #define Storeinc(a,b,c) (*a++ = b << 16 | c & 0xffff)
 */
#if defined(IEEE_8087) + defined(VAX)
#define Storeinc(a, b, c)                        \
  (((unsigned short *)a)[1] = (unsigned short)b, \
   ((unsigned short *)a)[0] = (unsigned short)c, a++)
#else
#define Storeinc(a, b, c)                        \
  (((unsigned short *)a)[0] = (unsigned short)b, \
   ((unsigned short *)a)[1] = (unsigned short)c, a++)
#endif

/* #define P DBL_MANT_DIG */
/* Ten_pmax = floor(P*log(2)/log(5)) */
/* Bletch = (highest power of 2 < DBL_MAX_10_EXP) / 16 */
/* Quick_max = floor((P-1)*log(FLT_RADIX)/log(10) - 1) */
/* Int_max = floor(P*log(FLT_RADIX)/log(10) - 1) */

#define Exp_shift   20
#define Exp_shift1  20
#define Exp_msk1    0x100000
#define Exp_msk11   0x100000
#define Exp_mask    0x7ff00000
#define P           53
#define Bias        1023
#define Emin        (-1022)
#define Exp_1       0x3ff00000
#define Exp_11      0x3ff00000
#define Ebits       11
#define Frac_mask   0xfffff
#define Frac_mask1  0xfffff
#define Ten_pmax    22
#define Bletch      0x10
#define Bndry_mask  0xfffff
#define Bndry_mask1 0xfffff
#define LSB         1
#define Sign_bit    0x80000000
#define Log2P       1
#define Tiny0       0
#define Tiny1       1
#define Quick_max   14
#define Int_max     14

#ifndef IEEE_Arith
#define ROUND_BIASED
#else
#ifdef ROUND_BIASED_without_Round_Up
#undef ROUND_BIASED
#define ROUND_BIASED
#endif
#endif

#ifdef RND_PRODQUOT
#define rounded_product(a, b)  a = rnd_prod(a, b)
#define rounded_quotient(a, b) a = rnd_quot(a, b)
extern double rnd_prod(double, double), rnd_quot(double, double);
#else
#define rounded_product(a, b)  a *= b
#define rounded_quotient(a, b) a /= b
#endif

#define Big0 (Frac_mask1 | Exp_msk1 * (DBL_MAX_EXP + Bias - 1))
#define Big1 0xffffffff

#undef Pack_16
#ifndef Pack_32
#define Pack_32
#endif

#ifdef NO_LONG_LONG
#undef ULLong
#ifdef Just_16
#undef Pack_32
#define Pack_16
/* When Pack_32 is not defined, we store 16 bits per 32-bit Long.
 * This makes some inner loops simpler and sometimes saves work
 * during __gdtoa_multiplications, but it often seems to make things slightly
 * slower.  Hence the default is now to store 32 bits per Long.
 */
#endif
#else /* long long available */
#ifndef Llong
#define Llong long long
#endif
#ifndef ULLong
#define ULLong unsigned Llong
#endif
#endif /* NO_LONG_LONG */

#ifdef Pack_32
#define ULbits 32
#define kshift 5
#define kmask  31
#define ALL_ON 0xffffffff
#else
#define ULbits 16
#define kshift 4
#define kmask  15
#define ALL_ON 0xffff
#endif

#define Kmax 9

struct Bigint {
  struct Bigint *next;
  int k, maxwds, sign, wds;
  ULong x[1];
};

typedef struct Bigint Bigint;

typedef struct ThInfo {
  Bigint *Freelist[Kmax + 1];
  Bigint *P5s;
} ThInfo;

#define Bcopy(x, y) \
  memcpy(&x->sign, &y->sign, y->wds * sizeof(ULong) + 2 * sizeof(int))

extern const double __gdtoa_tens[];
extern const double __gdtoa_bigtens[];
extern const double __gdtoa_tinytens[];
extern const unsigned char __gdtoa_hexdig[];
extern const char *const __gdtoa_InfName[6];
extern const char *const __gdtoa_NanName[3];
extern const ULong __gdtoa_NanDflt_Q[4];

Bigint *__gdtoa_Balloc(int, ThInfo **);
void __gdtoa_Bfree(Bigint *, ThInfo **);
Bigint *__gdtoa_d2b(double, int *, int *, ThInfo **);
Bigint *__gdtoa_diff(Bigint *, Bigint *, ThInfo **);
int __gdtoa_gethex(const char **, const FPI *, int *, Bigint **, int,
                   ThInfo **);
Bigint *__gdtoa_i2b(int, ThInfo **);
Bigint *__gdtoa_increment(Bigint *, ThInfo **);
Bigint *__gdtoa_lshift(Bigint *, int, ThInfo **);
Bigint *__gdtoa_mult(Bigint *, Bigint *, ThInfo **);
Bigint *__gdtoa_multadd(Bigint *, int, int, ThInfo **);
char *__gdtoa_nrv_alloc(char *, char **, int, ThInfo **);
char *__gdtoa_rv_alloc(int, ThInfo **);
Bigint *__gdtoa_pow5mult(Bigint *, int, ThInfo **);
Bigint *__gdtoa_s2b(const char *, int, int, ULong, int, ThInfo **);
Bigint *__gdtoa_set_ones(Bigint *, int, ThInfo **);
Bigint *__gdtoa_sum(Bigint *, Bigint *, ThInfo **);

ULong __gdtoa_any_on(Bigint *, int);
char *__gdtoa_add_nanbits(char *, size_t, ULong *, int);
char *__gdtoa_g__fmt(char *, char *, char *, int, ULong, size_t);
double __gdtoa_b2d(Bigint *, int *);
double __gdtoa_ratio(Bigint *, Bigint *);
double __gdtoa_ulp(U *);
int __gdtoa_cmp(Bigint *, Bigint *);
int __gdtoa_hexnan(const char **, const FPI *, ULong *);
int __gdtoa_match(const char **, char *);
int __gdtoa_quorem(Bigint *, Bigint *);
int __gdtoa_strtoIg(const char *, char **, const FPI *, Long *, Bigint **,
                    int *);
int __gdtoa_trailz(Bigint *);
void __gdtoa_ULtoQ(ULong *, ULong *, Long, int);
void __gdtoa_ULtod(ULong *, ULong *, Long, int);
void __gdtoa_ULtodd(ULong *, ULong *, Long, int);
void __gdtoa_ULtof(ULong *, ULong *, Long, int);
void __gdtoa_ULtox(UShort *, ULong *, Long, int);
void __gdtoa_ULtoxL(ULong *, ULong *, Long, int);
void __gdtoa_copybits(ULong *, int, Bigint *);
void __gdtoa_decrement(Bigint *);
void __gdtoa_hexdig_init(void);
void __gdtoa_rshift(Bigint *, int);

forceinline int lo0bits(ULong *y) {
  int k;
  if (*y) {
    k = __builtin_ctz(*y);
    *y >>= k;
    return k;
  } else {
    return 32;
  }
}

forceinline int hi0bits(ULong x) {
  return x ? __builtin_clz(x) : 32;
}

/*
 * NAN_WORD0 and NAN_WORD1 are only referenced in strtod.c.  Prior to
 * 20050115, they used to be hard-wired here (to 0x7ff80000 and 0,
 * respectively), but now are determined by compiling and running
 * qnan.c to generate gd_qnan.h, which specifies d_QNAN0 and d_QNAN1.
 * Formerly gdtoaimp.h recommended supplying suitable -DNAN_WORD0=...
 * and -DNAN_WORD1=...  values if necessary.  This should still work.
 * (On HP Series 700/800 machines, -DNAN_WORD0=0x7ff40000 works.)
 */
#ifdef IEEE_Arith
#ifndef NO_INFNAN_CHECK
#undef INFNAN_CHECK
#define INFNAN_CHECK
#endif
#ifndef NAN_WORD0
#define NAN_WORD0 d_QNAN1
#endif
#ifndef NAN_WORD1
#define NAN_WORD1 d_QNAN0
#endif
#else
#undef INFNAN_CHECK
#endif

#undef SI
#ifdef Sudden_Underflow
#define SI 1
#else
#define SI 0
#endif