/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ │ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │ ╞══════════════════════════════════════════════════════════════════════════════╡ │ Copyright 2020 Justine Alexandra Roberts Tunney │ │ │ │ Permission to use, copy, modify, and/or distribute this software for │ │ any purpose with or without fee is hereby granted, provided that the │ │ above copyright notice and this permission notice appear in all copies. │ │ │ │ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ │ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ │ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ │ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ │ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ │ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/calls/calls.h" #include "libc/calls/struct/stat.h" #include "libc/errno.h" #include "libc/fmt/itoa.h" #include "libc/fmt/libgen.h" #include "libc/fmt/magnumstrs.internal.h" #include "libc/limits.h" #include "libc/macros.h" #include "libc/mem/alg.h" #include "libc/mem/leaks.h" #include "libc/mem/mem.h" #include "libc/nexgen32e/crc32.h" #include "libc/runtime/runtime.h" #include "libc/serialize.h" #include "libc/stdio/append.h" #include "libc/stdio/stdio.h" #include "libc/str/str.h" #include "libc/str/tab.h" #include "libc/sysv/consts/map.h" #include "libc/sysv/consts/o.h" #include "libc/sysv/consts/prot.h" #include "libc/sysv/consts/s.h" #include "third_party/getopt/getopt.internal.h" #include "tool/build/lib/getargs.h" #define VERSION \ "cosmopolitan mkdeps v3.0\n" \ "copyright 2023 justine tunney\n" \ "https://github.com/jart/cosmopolitan\n" #define MANUAL \ " -r o// -o OUTPUT INPUT...\n" \ "\n" \ "DESCRIPTION\n" \ "\n" \ " Generates header file dependencies for your makefile\n" \ "\n" \ " This tool computes the transitive closure of included paths\n" \ " for every source file in your repository. This program does\n" \ " it orders of a magnitude faster than `gcc -M` on each file.\n" \ "\n" \ " Includes look like this:\n" \ "\n" \ " - #include \n" \ " - #include \"samedir.h\"\n" \ " - #include \"root/of/repository/foo.h\"\n" \ " - .include \"asm/x86_64/foo.s\"\n" \ "\n" \ " Your generated make code looks like this:\n" \ "\n" \ " o//package/foo.o: \\\n" \ " package/foo.c \\\n" \ " package/foo.h \\\n" \ " package/bar.h \\\n" \ " libc/isystem/stdio.h\n" \ " o//package/bar.o: \\\n" \ " package/bar.c \\\n" \ " package/bar.h\n" \ "\n" \ "FLAGS\n" \ "\n" \ " -h show usage\n" \ " -o OUTPUT set output path\n" \ " -g ROOT set generated path [default: o/]\n" \ " -r ROOT set build output path, e.g. o/$(MODE)/\n" \ " -S PATH isystem include path [repeatable; default: libc/isystem/]\n" \ " -s hermetically sealed mode [repeatable]\n" \ "\n" \ "ARGUMENTS\n" \ "\n" \ " OUTPUT shall be makefile code\n" \ " INPUT should be source or @args.txt\n" \ "\n" #define Read32(s) (s[3] << 24 | s[2] << 16 | s[1] << 8 | s[0]) #define EXT(s) Read32(s "\0\0") struct Source { unsigned hash; unsigned name; unsigned id; }; struct Edge { unsigned to; unsigned from; }; struct Sources { size_t i, n; struct Source *p; }; struct Sauce { unsigned name; unsigned id; }; struct Edges { size_t i, n; struct Edge *p; }; struct Paths { long n; const char *p[64]; }; static const uint32_t kSourceExts[] = { EXT("s"), // assembly EXT("S"), // assembly with c preprocessor EXT("c"), // c EXT("cc"), // c++ EXT("cpp"), // c++ EXT("cu"), // cuda EXT("m"), // objective c }; static char *names; static int hermetic; static unsigned counter; static const char *prog; static struct Edges edges; static struct Sauce *sauces; static struct Sources sources; static struct Paths systempaths; static const char *buildroot; static const char *genroot; static const char *outpath; static inline bool IsBlank(int c) { return c == ' ' || c == '\t'; } static inline bool IsGraph(wint_t c) { return 0x21 <= c && c <= 0x7E; } static wontreturn void Die(const char *reason) { tinyprint(2, prog, ": ", reason, "\n", NULL); exit(1); } static wontreturn void DieSys(const char *thing) { perror(thing); exit(1); } static wontreturn void DiePathTooLong(const char *path) { errno = ENAMETOOLONG; DieSys(path); } static wontreturn void DieOom(void) { Die("out of memory"); } static unsigned Hash(const void *s, size_t l) { unsigned h; h = crc32c(0, s, l); if (!h) h = 1; return h; } static void *Malloc(size_t n) { void *p; if (!(p = malloc(n))) DieOom(); return p; } static void *Calloc(size_t n, size_t z) { void *p; if (!(p = calloc(n, z))) DieOom(); return p; } static void *Realloc(void *p, size_t n) { if (!(p = realloc(p, n))) DieOom(); return p; } static void Appendw(char **b, uint64_t w) { if (appendw(b, w) == -1) DieOom(); } static void Appends(char **b, const char *s) { if (appends(b, s) == -1) DieOom(); } static void Appendd(char **b, const void *p, size_t n) { if (appendd(b, p, n) == -1) DieOom(); } static unsigned FindFirstFromEdge(unsigned id) { unsigned m, l, r; l = 0; r = edges.i; while (l < r) { m = (l & r) + ((l ^ r) >> 1); // floor((a+b)/2) if (edges.p[m].from < id) { l = m + 1; } else { r = m; } } return l; } static void AppendEdge(struct Edges *edges, unsigned to, unsigned from) { if (edges->i + 1 > edges->n) { edges->n += 1; edges->n += edges->n >> 1; edges->p = Realloc(edges->p, edges->n * sizeof(*edges->p)); } edges->p[edges->i++] = (struct Edge){to, from}; } static void Crunch(void) { size_t i, j; sauces = Malloc(sizeof(*sauces) * sources.n); for (j = i = 0; i < sources.n; ++i) { if (sources.p[i].hash) { sauces[j].name = sources.p[i].name; sauces[j].id = sources.p[i].id; j++; } } free(sources.p); sources.p = 0; sources.i = j; if (radix_sort_int64((long *)sauces, sources.i) == -1 || radix_sort_int64((long *)edges.p, edges.i) == -1) { DieOom(); } } static void Rehash(void) { size_t i, j, step; struct Sources old; memcpy(&old, &sources, sizeof(sources)); sources.n = sources.n ? sources.n << 1 : 16; sources.p = Calloc(sources.n, sizeof(struct Source)); for (i = 0; i < old.n; ++i) { if (!old.p[i].hash) continue; step = 0; do { j = (old.p[i].hash + step * (step + 1) / 2) & (sources.n - 1); step++; } while (sources.p[j].hash); sources.p[j] = old.p[i]; } free(old.p); } static int HashSource(const char *name, size_t len, bool create) { unsigned hash; size_t i, step; i = 0; hash = Hash(name, len); if (sources.n) { step = 0; do { i = (hash + step * (step + 1) / 2) & (sources.n - 1); if (sources.p[i].hash == hash && !memcmp(name, names + sources.p[i].name, len)) { return sources.p[i].id; } step++; } while (sources.p[i].hash); } if (!create) return -1; if (++sources.i >= (sources.n >> 1)) { Rehash(); step = 0; do { i = (hash + step * (step + 1) / 2) & (sources.n - 1); step++; } while (sources.p[i].hash); } sources.p[i].hash = hash; sources.p[i].name = appendz(names).i; Appendd(&names, name, len); Appendw(&names, 0); return (sources.p[i].id = counter++); } static int CreateSourceId(const char *name) { return HashSource(name, strlen(name), true); } static int GetSourceId(const char *name) { return HashSource(name, strlen(name), false); } // `p` should point to substring "include " // `map` and `mapsize` define legal memory range // returns pointer to path, or null if `p` isn't an include // // syntax like the following is supported: // // - `#include "foo.h"` // - `#include ` // - `# include ` // - `#include ` // - ` #include ` // - `.include "foo.h"` (for .s files only) // // known issues: // // - we can't tell if it's inside a /* comment */ // - whitespace like vertical tab isn't supported // static const char *FindIncludePath(const char *map, size_t mapsize, const char *p, bool is_assembly) { const char *q = p; // scan backwards for hash character for (;;) { if (q == map) return 0; if (IsBlank(q[-1])) { --q; continue; } if (q[-1] == '#' && !is_assembly) { --q; break; } else if (q[-1] == '.' && is_assembly) { --q; break; } else { return 0; } } // scan backwards for newline character if (!is_assembly) { for (;;) { if (q == map) { break; } if (IsBlank(q[-1])) { --q; continue; } if (q[-1] == '\n') { break; } else { return 0; } } } // scan forward for path q = p + strlen("include "); for (;;) { if (q >= map + mapsize) { break; } if (IsBlank(*q)) { ++q; continue; } if (*q == '"' || (*q == '<' && !is_assembly)) { ++q; break; } else { return 0; } } // return pointer to path return q; } static void LoadRelationships(int argc, char *argv[]) { int fd; char *map; ssize_t rc; size_t size; bool is_assembly; struct GetArgs ga; int srcid, dependency; static char srcdirbuf[PATH_MAX]; const char *p, *pe, *src, *path, *pathend, *srcdir, *final; getargs_init(&ga, argv + optind); while ((src = getargs_next(&ga))) CreateSourceId(src); getargs_destroy(&ga); getargs_init(&ga, argv + optind); while ((src = getargs_next(&ga))) { is_assembly = endswith(src, ".s"); srcid = GetSourceId(src); if (strlcpy(srcdirbuf, src, PATH_MAX) >= PATH_MAX) DiePathTooLong(src); srcdir = dirname(srcdirbuf); if ((fd = open(src, O_RDONLY)) == -1) { if (errno == ENOENT && ga.path) { // This code helps GNU Make automatically fix itself when we // delete a source file. It removes o/.../srcs.txt or // o/.../hdrs.txt and exits nonzero. Since we use hyphen // notation on mkdeps related rules, the build will // automatically restart itself. tinyprint(2, prog, ": deleting ", ga.path, " to refresh build...\n", NULL); } DieSys(src); } if ((rc = lseek(fd, 0, SEEK_END)) == -1) DieSys(src); if ((size = rc)) { // repeatedly map to same fixed address so in order to weasel out // of incurring the additional overhead of all these munmap calls map = mmap(0, size, PROT_READ, MAP_SHARED, fd, 0); if (map == MAP_FAILED) DieSys(src); for (p = map, pe = map + size; p < pe; ++p) { if (!(p = memmem(p, pe - p, "include ", 8))) break; if (!(path = FindIncludePath(map, size, p, is_assembly))) continue; // copy the specified include path char right; if (path[-1] == '<') { if (!systempaths.n) continue; right = '>'; } else { right = '"'; } if (!(pathend = memchr(path, right, pe - path))) continue; if (pathend - path >= PATH_MAX) { tinyprint(2, src, ": uses really long include path\n", NULL); exit(1); } char juf[PATH_MAX]; char incpath[PATH_MAX]; *(char *)mempcpy(incpath, path, pathend - path) = 0; if (right == '>') { // handle angle bracket includes dependency = -1; for (long i = 0; i < systempaths.n; ++i) { if (!(final = __join_paths(juf, PATH_MAX, systempaths.p[i], incpath))) DiePathTooLong(incpath); if ((dependency = GetSourceId(final)) != -1) break; } if (dependency != -1) { AppendEdge(&edges, dependency, srcid); p = pathend + 1; } else { if (hermetic == 1) { // chances are the `#include ` is in some #ifdef // that'll never actually be executed; thus we ignore // since landlock make unveil() shall catch it anyway continue; } tinyprint(2, incpath, ": system header not specified by the HDRS/SRCS/INCS " "make variables defined by the hermetic mono repo\n", NULL); exit(1); } } else { // handle double quote includes // let foo/bar.c say `#include "foo/hdr.h"` dependency = GetSourceId((final = incpath)); // let foo/bar.c say `#include "hdr.h"` if (dependency == -1 && !strchr(final, '/')) { if (!(final = __join_paths(juf, PATH_MAX, srcdir, final))) DiePathTooLong(incpath); dependency = GetSourceId(final); } if (dependency == -1) { if (startswith(final, genroot)) { dependency = CreateSourceId(src); } else { tinyprint(2, incpath, ": path not specified by HDRS/SRCS/INCS make variables " "(it was included by ", src, ")\n", NULL); exit(1); } } AppendEdge(&edges, dependency, srcid); p = pathend + 1; } } if (munmap(map, size)) DieSys(src); } if (close(fd)) DieSys(src); } getargs_destroy(&ga); } static wontreturn void ShowUsage(int rc, int fd) { tinyprint(fd, VERSION, "\nUSAGE\n\n ", prog, MANUAL, NULL); exit(rc); } static void AddPath(struct Paths *paths, const char *path) { if (paths->n == ARRAYLEN(paths->p)) Die("too many path arguments"); paths->p[paths->n++] = path; } static void GetOpts(int argc, char *argv[]) { int opt; while ((opt = getopt(argc, argv, "hnsgS:o:r:")) != -1) { switch (opt) { case 's': ++hermetic; break; case 'S': AddPath(&systempaths, optarg); break; case 'o': if (outpath) Die("multiple output paths specified"); outpath = optarg; break; case 'r': if (buildroot) Die("multiple build roots specified"); buildroot = optarg; break; case 'g': if (genroot) Die("multiple generated roots specified"); genroot = optarg; break; case 'n': exit(0); case 'h': ShowUsage(0, 1); default: ShowUsage(1, 2); } } if (optind == argc) Die("missing input argument"); if (!genroot) genroot = "o/"; if (!endswith(genroot, "/")) Die("generated output path must end with slash"); if (!buildroot) Die("need build output path"); if (!endswith(buildroot, "/")) Die("build output path must end with slash"); if (!startswith(buildroot, genroot)) Die("build output path must start with generated output path"); if (!systempaths.n && hermetic) { AddPath(&systempaths, "third_party/libcxx/include/"); AddPath(&systempaths, "libc/isystem/"); } if (systempaths.n && !hermetic) Die("system path can only be specified in hermetic mode"); long j = 0; for (long i = 0; i < systempaths.n; ++i) { size_t n; struct stat st; const char *path = systempaths.p[i]; if (!stat(path, &st)) { systempaths.p[j++] = path; if (!S_ISDIR(st.st_mode)) { errno = ENOTDIR; DieSys(path); } } if ((n = strlen(path)) >= PATH_MAX) DiePathTooLong(path); if (!n || path[n - 1] != '/') Die("system path must end with slash"); } systempaths.n = j; } static const char *StripExt(char pathbuf[hasatleast PATH_MAX], const char *s) { static char *dot; if (strlcpy(pathbuf, s, PATH_MAX) >= PATH_MAX) DiePathTooLong(s); dot = strrchr(pathbuf, '.'); if (dot) *dot = '\0'; return pathbuf; } static uint32_t GetFileExtension(const char *s) { uint32_t w; size_t i, n; n = s ? strlen(s) : 0; for (i = w = 0; n--;) { wint_t c = s[n]; if (!IsGraph(c)) return 0; if (c == '.') break; if (++i > 4) return 0; w <<= 8; w |= kToLower[c]; } return w; } static bool IsObjectSource(const char *name) { int i; uint32_t ext; if ((ext = GetFileExtension(name))) for (i = 0; i < ARRAYLEN(kSourceExts); ++i) if (ext == kSourceExts[i]) return true; return false; } __funline bool Bts(uint32_t *p, size_t i) { uint32_t k; k = 1u << (i & 31); if (p[i >> 5] & k) return true; p[i >> 5] |= k; return false; } static void Dive(char **makefile, uint32_t *visited, unsigned id) { int i; for (i = FindFirstFromEdge(id); i < edges.i && edges.p[i].from == id; ++i) { if (Bts(visited, edges.p[i].to)) continue; Appendw(makefile, READ32LE(" \\\n\t")); Appends(makefile, names + sauces[edges.p[i].to].name); Dive(makefile, visited, edges.p[i].to); } } static char *Explore(void) { const char *path; unsigned *visited; size_t i, visilen; char *makefile = 0; char buf[PATH_MAX]; visilen = (sources.i + sizeof(*visited) * CHAR_BIT - 1) / (sizeof(*visited) * CHAR_BIT); visited = Malloc(visilen * sizeof(*visited)); for (i = 0; i < sources.i; ++i) { path = names + sauces[i].name; if (!IsObjectSource(path)) continue; if (startswith(path, genroot)) continue; Appendw(&makefile, '\n'); Appends(&makefile, buildroot); Appends(&makefile, StripExt(buf, path)); Appendw(&makefile, READ64LE(".o: \\\n\t")); Appends(&makefile, path); bzero(visited, visilen * sizeof(*visited)); Bts(visited, i); Dive(&makefile, visited, i); Appendw(&makefile, '\n'); } Appendw(&makefile, '\n'); free(visited); return makefile; } int main(int argc, char *argv[]) { int fd = 1; ssize_t rc; size_t i, n; char *makefile; #ifdef MODE_DBG ShowCrashReports(); #endif prog = argv[0]; if (!prog) prog = "mkdeps"; GetOpts(argc, argv); LoadRelationships(argc, argv); Crunch(); makefile = Explore(); if (outpath && (fd = open(outpath, O_WRONLY | O_CREAT | O_TRUNC, 0644)) == -1) DieSys(outpath); n = appendz(makefile).i; for (i = 0; i < n; i += (size_t)rc) if ((rc = write(fd, makefile + i, n - i)) == -1) DieSys(outpath); if (outpath && close(fd)) DieSys(outpath); free(makefile); free(edges.p); free(sauces); free(names); CheckForMemoryLeaks(); return 0; }