From e4881686b4160c74087ecc9d96df4ed0db6d70ef Mon Sep 17 00:00:00 2001 From: oKatanaaa Date: Tue, 21 Mar 2023 01:46:44 +0400 Subject: [PATCH 1/4] Make WIN32 mmap() improvements (#341) Still not fully working yet. Closes #341 --- .gitignore | 409 +++++++++++++++++++++++++++++++++++++++++++++++++++++ ggml.c | 6 +- ggml.h | 3 + main.cpp | 99 ++++++++++--- mmap.h | 70 ++++++++- 5 files changed, 565 insertions(+), 22 deletions(-) diff --git a/.gitignore b/.gitignore index e388b884e..ec7c50540 100644 --- a/.gitignore +++ b/.gitignore @@ -22,3 +22,412 @@ models/* arm_neon.h compile_commands.json +CMakeFiles/ +CMakeCache.txt + +# Visual Studio stuff +*.exe +*.sln +*.vcxproj +*.vcxproj.filters +cmake_install.cmake +*.dir/ + +## Ignore Visual Studio temporary files, build results, and +## files generated by popular Visual Studio add-ons. +## +## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore + +# User-specific files +*.rsuser +*.suo +*.user +*.userosscache +*.sln.docstates + +# User-specific files (MonoDevelop/Xamarin Studio) +*.userprefs + +# Mono auto generated files +mono_crash.* + +# Build results +[Dd]ebug/ +[Dd]ebugPublic/ +[Rr]elease/ +[Rr]eleases/ +x64/ +x86/ +[Ww][Ii][Nn]32/ +[Aa][Rr][Mm]/ +[Aa][Rr][Mm]64/ +bld/ +[Bb]in/ +[Oo]bj/ +[Ll]og/ +[Ll]ogs/ + +# Visual Studio 2015/2017 cache/options directory +.vs/ +# Uncomment if you have tasks that create the project's static files in wwwroot +#wwwroot/ + +# Visual Studio 2017 auto generated files +Generated\ Files/ + +# MSTest test Results +[Tt]est[Rr]esult*/ +[Bb]uild[Ll]og.* + +# NUnit +*.VisualState.xml +TestResult.xml +nunit-*.xml + +# Build Results of an ATL Project +[Dd]ebugPS/ +[Rr]eleasePS/ +dlldata.c + +# Benchmark Results +BenchmarkDotNet.Artifacts/ + +# .NET Core +project.lock.json +project.fragment.lock.json +artifacts/ + +# ASP.NET Scaffolding +ScaffoldingReadMe.txt + +# StyleCop +StyleCopReport.xml + +# Files built by Visual Studio +*_i.c +*_p.c +*_h.h +*.ilk +*.meta +*.obj +*.iobj +*.pch +*.pdb +*.ipdb +*.pgc +*.pgd +*.rsp +*.sbr +*.tlb +*.tli +*.tlh +*.tmp +*.tmp_proj +*_wpftmp.csproj +*.log +*.tlog +*.vspscc +*.vssscc +.builds +*.pidb +*.svclog +*.scc + +# Chutzpah Test files +_Chutzpah* + +# Visual C++ cache files +ipch/ +*.aps +*.ncb +*.opendb +*.opensdf +*.sdf +*.cachefile +*.VC.db +*.VC.VC.opendb + +# Visual Studio profiler +*.psess +*.vsp +*.vspx +*.sap + +# Visual Studio Trace Files +*.e2e + +# TFS 2012 Local Workspace +$tf/ + +# Guidance Automation Toolkit +*.gpState + +# ReSharper is a .NET coding add-in +_ReSharper*/ +*.[Rr]e[Ss]harper +*.DotSettings.user + +# TeamCity is a build add-in +_TeamCity* + +# DotCover is a Code Coverage Tool +*.dotCover + +# AxoCover is a Code Coverage Tool +.axoCover/* +!.axoCover/settings.json + +# Coverlet is a free, cross platform Code Coverage Tool +coverage*.json +coverage*.xml +coverage*.info + +# Visual Studio code coverage results +*.coverage +*.coveragexml + +# NCrunch +_NCrunch_* +.*crunch*.local.xml +nCrunchTemp_* + +# MightyMoose +*.mm.* +AutoTest.Net/ + +# Web workbench (sass) +.sass-cache/ + +# Installshield output folder +[Ee]xpress/ + +# DocProject is a documentation generator add-in +DocProject/buildhelp/ +DocProject/Help/*.HxT +DocProject/Help/*.HxC +DocProject/Help/*.hhc +DocProject/Help/*.hhk +DocProject/Help/*.hhp +DocProject/Help/Html2 +DocProject/Help/html + +# Click-Once directory +publish/ + +# Publish Web Output +*.[Pp]ublish.xml +*.azurePubxml +# Note: Comment the next line if you want to checkin your web deploy settings, +# but database connection strings (with potential passwords) will be unencrypted +*.pubxml +*.publishproj + +# Microsoft Azure Web App publish settings. Comment the next line if you want to +# checkin your Azure Web App publish settings, but sensitive information contained +# in these scripts will be unencrypted +PublishScripts/ + +# NuGet Packages +*.nupkg +# NuGet Symbol Packages +*.snupkg +# The packages folder can be ignored because of Package Restore +**/[Pp]ackages/* +# except build/, which is used as an MSBuild target. +!**/[Pp]ackages/build/ +# Uncomment if necessary however generally it will be regenerated when needed +#!**/[Pp]ackages/repositories.config +# NuGet v3's project.json files produces more ignorable files +*.nuget.props +*.nuget.targets + +# Microsoft Azure Build Output +csx/ +*.build.csdef + +# Microsoft Azure Emulator +ecf/ +rcf/ + +# Windows Store app package directories and files +AppPackages/ +BundleArtifacts/ +Package.StoreAssociation.xml +_pkginfo.txt +*.appx +*.appxbundle +*.appxupload + +# Visual Studio cache files +# files ending in .cache can be ignored +*.[Cc]ache +# but keep track of directories ending in .cache +!?*.[Cc]ache/ + +# Others +ClientBin/ +~$* +*~ +*.dbmdl +*.dbproj.schemaview +*.jfm +*.pfx +*.publishsettings +orleans.codegen.cs + +# Including strong name files can present a security risk +# (https://github.com/github/gitignore/pull/2483#issue-259490424) +#*.snk + +# Since there are multiple workflows, uncomment next line to ignore bower_components +# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) +#bower_components/ + +# RIA/Silverlight projects +Generated_Code/ + +# Backup & report files from converting an old project file +# to a newer Visual Studio version. Backup files are not needed, +# because we have git ;-) +_UpgradeReport_Files/ +Backup*/ +UpgradeLog*.XML +UpgradeLog*.htm +ServiceFabricBackup/ +*.rptproj.bak + +# SQL Server files +*.mdf +*.ldf +*.ndf + +# Business Intelligence projects +*.rdl.data +*.bim.layout +*.bim_*.settings +*.rptproj.rsuser +*- [Bb]ackup.rdl +*- [Bb]ackup ([0-9]).rdl +*- [Bb]ackup ([0-9][0-9]).rdl + +# Microsoft Fakes +FakesAssemblies/ + +# GhostDoc plugin setting file +*.GhostDoc.xml + +# Node.js Tools for Visual Studio +.ntvs_analysis.dat +node_modules/ + +# Visual Studio 6 build log +*.plg + +# Visual Studio 6 workspace options file +*.opt + +# Visual Studio 6 auto-generated workspace file (contains which files were open etc.) +*.vbw + +# Visual Studio 6 auto-generated project file (contains which files were open etc.) +*.vbp + +# Visual Studio 6 workspace and project file (working project files containing files to include in project) +*.dsw +*.dsp + +# Visual Studio 6 technical files +*.ncb +*.aps + +# Visual Studio LightSwitch build output +**/*.HTMLClient/GeneratedArtifacts +**/*.DesktopClient/GeneratedArtifacts +**/*.DesktopClient/ModelManifest.xml +**/*.Server/GeneratedArtifacts +**/*.Server/ModelManifest.xml +_Pvt_Extensions + +# Paket dependency manager +.paket/paket.exe +paket-files/ + +# FAKE - F# Make +.fake/ + +# CodeRush personal settings +.cr/personal + +# Python Tools for Visual Studio (PTVS) +__pycache__/ +*.pyc + +# Cake - Uncomment if you are using it +# tools/** +# !tools/packages.config + +# Tabs Studio +*.tss + +# Telerik's JustMock configuration file +*.jmconfig + +# BizTalk build output +*.btp.cs +*.btm.cs +*.odx.cs +*.xsd.cs + +# OpenCover UI analysis results +OpenCover/ + +# Azure Stream Analytics local run output +ASALocalRun/ + +# MSBuild Binary and Structured Log +*.binlog + +# NVidia Nsight GPU debugger configuration file +*.nvuser + +# MFractors (Xamarin productivity tool) working folder +.mfractor/ + +# Local History for Visual Studio +.localhistory/ + +# Visual Studio History (VSHistory) files +.vshistory/ + +# BeatPulse healthcheck temp database +healthchecksdb + +# Backup folder for Package Reference Convert tool in Visual Studio 2017 +MigrationBackup/ + +# Ionide (cross platform F# VS Code tools) working folder +.ionide/ + +# Fody - auto-generated XML schema +FodyWeavers.xsd + +# VS Code files for those working on multiple tools +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json +*.code-workspace + +# Local History for Visual Studio Code +.history/ + +# Windows Installer files from build outputs +*.cab +*.msi +*.msix +*.msm +*.msp + +# JetBrains Rider +*.sln.iml diff --git a/ggml.c b/ggml.c index 67fd54937..59a88554a 100644 --- a/ggml.c +++ b/ggml.c @@ -2,7 +2,7 @@ #include "ggml.h" #if defined(_MSC_VER) || defined(__MINGW32__) -#include // using malloc.h with MSC/MINGW +//#include // using malloc.h with MSC/MINGW #elif !defined(__FreeBSD__) && !defined(__NetBSD__) #include #endif @@ -2437,7 +2437,7 @@ struct ggml_context * ggml_init(struct ggml_init_params params) { *ctx = (struct ggml_context) { /*.mem_size =*/ params.mem_size, - /*.mem_buffer =*/ params.mem_buffer ? params.mem_buffer : malloc(params.mem_size), + /*.mem_buffer =*/ params.mem_buffer ? params.mem_buffer : _malloc(params.mem_size), /*.mem_buffer_owned =*/ params.mem_buffer ? false : true, /*.n_objects =*/ 0, /*.objects_begin =*/ NULL, @@ -2469,7 +2469,7 @@ void ggml_free(struct ggml_context * ctx) { __func__, i, ctx->n_objects, ctx->objects_end->offs + ctx->objects_end->size); if (ctx->mem_buffer_owned) { - free(ctx->mem_buffer); + _free(ctx->mem_buffer); } found = true; diff --git a/ggml.h b/ggml.h index 7ce655c1b..38b991f8a 100644 --- a/ggml.h +++ b/ggml.h @@ -183,6 +183,9 @@ extern "C" { #define GGML_MAX_CONTEXTS 64 #define GGML_MAX_OPT 4 +void* _malloc(size_t n); +void _free(void* p); + #ifdef __ARM_NEON // we use the built-in 16-bit float type typedef __fp16 ggml_fp16_t; diff --git a/main.cpp b/main.cpp index 4b2afa54d..9e8087a15 100644 --- a/main.cpp +++ b/main.cpp @@ -1,3 +1,7 @@ +#if defined(_MSC_VER) || defined(__MINGW32__) +#define NOMINMAX +#endif + #include "ggml.h" #include "utils.h" @@ -19,6 +23,10 @@ #include #include #include +#else +#include +#define msync(addr, len_bytes, flag) winMSync +#define MS_ASYNC 0 #endif #define ROUNDUP(X, K) (((X) + (K)-1) & -(K)) @@ -96,6 +104,7 @@ struct llama_model { std::map tensors; }; + struct magic { uint32_t magic; std::atomic lock; @@ -103,10 +112,37 @@ struct magic { size_t commit; size_t offset; size_t capacity; - gpt_vocab *vocab; - llama_model *model; + gpt_vocab* vocab; + llama_model* model; }; +static void winMSync(magic* addr, size_t len_bytes) { + bool success = FlushViewOfFile((void*)addr, len_bytes); + if (!success) { + LPVOID lpMsgBuf; + LPVOID lpDisplayBuf; + DWORD error_code = GetLastError(); + FormatMessage( + FORMAT_MESSAGE_ALLOCATE_BUFFER | + FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, + error_code, + MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), + (LPTSTR)&lpMsgBuf, + 0, NULL); + lpDisplayBuf = (LPVOID)LocalAlloc(LMEM_ZEROINIT, + (lstrlen((LPCTSTR)lpMsgBuf) + 40) * sizeof(TCHAR)); + StringCchPrintf((LPTSTR)lpDisplayBuf, + LocalSize(lpDisplayBuf) / sizeof(TCHAR), + TEXT("failed with error %d: %s"), + error_code, lpMsgBuf); + } + HANDLE hFile = (HANDLE)_get_osfhandle(addr->fd); + FlushFileBuffers(hFile); +} + + static struct magic *mag; static inline void spin_lock(std::atomic &lock) { @@ -129,17 +165,26 @@ static void magic_commit(void) { mag->offset = mag->capacity; mag->commit = mag->capacity; mag->magic = 0xFEEDABEE; - msync(mag, mag->commit, MS_ASYNC); + bool success = msync(mag, mag->commit, MS_ASYNC); } static void magic_init(void) { int fd; size_t n; +#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) struct stat st; +#else + struct _stat64 st; +#endif if (mag) return; n = ROUNDUP(sizeof(struct magic), MAGIC_GRAN); if ((fd = open(MAGIC_PATH, O_RDWR)) != -1) { - fstat(fd, &st); + int result = fstat(fd, &st); + int error = errno; + if (errno == EBADF) + fprintf(stderr, "Bad file descriptor.\n"); + else if (errno == EINVAL) + fprintf(stderr, "Invalid argument to _fstat.\n"); if (st.st_size >= n) { mag = (struct magic *)Mmap(MAGIC_ADDR, n, PROT_READ | PROT_WRITE, @@ -182,9 +227,9 @@ void *memalign(size_t a, size_t n) { i = i + sizeof(size_t); i = ROUNDUP(i, a); j = ROUNDUP(i + m, MAGIC_GRAN); - if (j > mag->capacity) { + //if (j > mag->capacity) { if (!mag->magic) { - ftruncate(mag->fd, j); + int result = ftruncate(mag->fd, j); p = mmap(MAGIC_ADDR + mag->capacity, j - mag->capacity, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, mag->fd, mag->capacity); @@ -199,7 +244,7 @@ void *memalign(size_t a, size_t n) { spin_unlock(mag->lock); return 0; } - } + //} mag->offset = i + m; spin_unlock(mag->lock); p = MAGIC_ADDR + i; @@ -207,7 +252,7 @@ void *memalign(size_t a, size_t n) { return p; } -void *malloc(size_t n) { +void *_malloc(size_t n) { return memalign(MAGIC_ALGN, n); } @@ -215,33 +260,53 @@ size_t malloc_usable_size(const void *p) { return ((const size_t *)p)[-1]; } -void *calloc(size_t n, size_t z) { +void *_calloc(size_t n, size_t z) { void *p; - if ((p = malloc((n *= z)))) { + if ((p = _malloc((n *= z)))) { memset(p, 0, n); } return p; } -void free(void *p) { +void _free(void *p) { // do nothing } -void *realloc(void *p, size_t n) { +void *_realloc(void *p, size_t n) { void *q; if (!p) { - return malloc(n); + return _malloc(n); } if (!n) { - free(p); + _free(p); return 0; } - if ((q = malloc(n))) { + if ((q = _malloc(n))) { memcpy(q, p, ((const size_t *)p)[-1]); } return q; } +#if defined(malloc) +# undef malloc +#endif +#define malloc(x) _malloc(x) + +#if defined(calloc) +# undef calloc +#endif +#define calloc(x) _calloc(x) + +#if defined(realloc) +# undef realloc +#endif +#define realloc(x) _realloc(x) + +#if defined(free) +# undef free +#endif +#define free(x) _free(x) + // load the model's weights from a file bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab & vocab, int n_ctx) { fprintf(stderr, "%s: loading model from '%s' - please wait ...\n", __func__, fname.c_str()); @@ -707,7 +772,7 @@ bool llama_eval( const int d_key = n_embd/n_head; static size_t buf_size = 512u*1024*1024; - static void * buf = malloc(buf_size); + static void * buf = _malloc(buf_size); if (mem_per_token > 0 && mem_per_token*N > buf_size) { const size_t buf_size_new = 1.1*(mem_per_token*N); // add 10% to account for ggml object overhead @@ -715,7 +780,7 @@ bool llama_eval( // reallocate buf_size = buf_size_new; - buf = realloc(buf, buf_size); + buf = _realloc(buf, buf_size); if (buf == nullptr) { fprintf(stderr, "%s: failed to allocate %zu bytes\n", __func__, buf_size); return false; diff --git a/mmap.h b/mmap.h index 3559f51a3..851553048 100644 --- a/mmap.h +++ b/mmap.h @@ -20,6 +20,7 @@ #if defined(_MSC_VER) || defined(__MINGW32__) #ifndef __MINGW32__ #include +#include #else #include #endif @@ -101,7 +102,7 @@ #define close _close #endif #ifndef fstat -#define fstat _fstat +#define fstat _fstati64 #endif #ifndef madvise #define madvise WinMadvise @@ -113,6 +114,7 @@ static std::atomic g_winlock; static std::map g_winmap; + static void WinLock(void) { while (!g_winlock.exchange(1, std::memory_order_acquire)); } @@ -121,7 +123,30 @@ static void WunLock(void) { g_winlock.store(0, std::memory_order_release); } -static int WinMadvise(int fd, size_t length, int flags) { +static int WinMadvise(char* fd, size_t length, int flags) { + auto p_handle = GetCurrentProcess(); + struct _WIN32_MEMORY_RANGE_ENTRY entry((void*)fd, length); + bool success = PrefetchVirtualMemory(p_handle, 1, &entry, 0); + if (!success) { + LPVOID lpMsgBuf; + LPVOID lpDisplayBuf; + DWORD error_code = GetLastError(); + FormatMessage( + FORMAT_MESSAGE_ALLOCATE_BUFFER | + FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, + error_code, + MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), + (LPTSTR)&lpMsgBuf, + 0, NULL); + lpDisplayBuf = (LPVOID)LocalAlloc(LMEM_ZEROINIT, + (lstrlen((LPCTSTR)lpMsgBuf) + 256) * sizeof(TCHAR)); + StringCchPrintf((LPTSTR)lpDisplayBuf, + LocalSize(lpDisplayBuf) / sizeof(TCHAR), + TEXT("%s failed with error %d: %s"), + error_code, lpMsgBuf); + } return 0; } @@ -215,11 +240,52 @@ static void *WinMap(void *addr, size_t length, int prot, (offset + length) >> 32, (offset + length), 0); if (!hand) { + LPVOID lpMsgBuf; + LPVOID lpDisplayBuf; + DWORD error_code = GetLastError(); + FormatMessage( + FORMAT_MESSAGE_ALLOCATE_BUFFER | + FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, + error_code, + MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), + (LPTSTR)&lpMsgBuf, + 0, NULL); + lpDisplayBuf = (LPVOID)LocalAlloc(LMEM_ZEROINIT, + (lstrlen((LPCTSTR)lpMsgBuf) + 256) * sizeof(TCHAR)); + StringCchPrintf((LPTSTR)lpDisplayBuf, + LocalSize(lpDisplayBuf) / sizeof(TCHAR), + TEXT("%s failed with error %d: %s"), + error_code, lpMsgBuf); return MAP_FAILED; } + if (winprot == PAGE_WRITECOPY) { + access = FILE_MAP_COPY; + } + res = MapViewOfFileEx(hand, access, offset >> 32, offset, length, addr); if (!res) { + LPVOID lpMsgBuf; + LPVOID lpDisplayBuf; + DWORD error_code = GetLastError(); + FormatMessage( + FORMAT_MESSAGE_ALLOCATE_BUFFER | + FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, + error_code, + MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), + (LPTSTR)&lpMsgBuf, + 0, NULL); + lpDisplayBuf = (LPVOID)LocalAlloc(LMEM_ZEROINIT, + (lstrlen((LPCTSTR)lpMsgBuf) + 40) * sizeof(TCHAR)); + StringCchPrintf((LPTSTR)lpDisplayBuf, + LocalSize(lpDisplayBuf) / sizeof(TCHAR), + TEXT("failed with error %d: %s"), + error_code, lpMsgBuf); + fprintf(stderr, (char*)lpDisplayBuf); CloseHandle(hand); return MAP_FAILED; } From cbddf4661be736c299598ee37ceb662cdf7bdd7c Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Tue, 28 Mar 2023 09:27:41 -0700 Subject: [PATCH 2/4] Get mmap() working with WIN32 MSVC - We have pretty high quality POSIX polyfills now - We no longer need to override malloc() Tracked by issue #91 Improves upon #341 --- .gitignore | 407 ----------------------------------- CMakeLists.txt | 4 +- Makefile | 11 +- ggml.c | 4 +- ggml.h | 3 - main.cpp | 172 ++++++--------- mmap.c | 570 +++++++++++++++++++++++++++++++++++++++++++++++++ mmap.h | 280 +++++------------------- 8 files changed, 700 insertions(+), 751 deletions(-) create mode 100644 mmap.c diff --git a/.gitignore b/.gitignore index ec7c50540..7e9e7a273 100644 --- a/.gitignore +++ b/.gitignore @@ -24,410 +24,3 @@ arm_neon.h compile_commands.json CMakeFiles/ CMakeCache.txt - -# Visual Studio stuff -*.exe -*.sln -*.vcxproj -*.vcxproj.filters -cmake_install.cmake -*.dir/ - -## Ignore Visual Studio temporary files, build results, and -## files generated by popular Visual Studio add-ons. -## -## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore - -# User-specific files -*.rsuser -*.suo -*.user -*.userosscache -*.sln.docstates - -# User-specific files (MonoDevelop/Xamarin Studio) -*.userprefs - -# Mono auto generated files -mono_crash.* - -# Build results -[Dd]ebug/ -[Dd]ebugPublic/ -[Rr]elease/ -[Rr]eleases/ -x64/ -x86/ -[Ww][Ii][Nn]32/ -[Aa][Rr][Mm]/ -[Aa][Rr][Mm]64/ -bld/ -[Bb]in/ -[Oo]bj/ -[Ll]og/ -[Ll]ogs/ - -# Visual Studio 2015/2017 cache/options directory -.vs/ -# Uncomment if you have tasks that create the project's static files in wwwroot -#wwwroot/ - -# Visual Studio 2017 auto generated files -Generated\ Files/ - -# MSTest test Results -[Tt]est[Rr]esult*/ -[Bb]uild[Ll]og.* - -# NUnit -*.VisualState.xml -TestResult.xml -nunit-*.xml - -# Build Results of an ATL Project -[Dd]ebugPS/ -[Rr]eleasePS/ -dlldata.c - -# Benchmark Results -BenchmarkDotNet.Artifacts/ - -# .NET Core -project.lock.json -project.fragment.lock.json -artifacts/ - -# ASP.NET Scaffolding -ScaffoldingReadMe.txt - -# StyleCop -StyleCopReport.xml - -# Files built by Visual Studio -*_i.c -*_p.c -*_h.h -*.ilk -*.meta -*.obj -*.iobj -*.pch -*.pdb -*.ipdb -*.pgc -*.pgd -*.rsp -*.sbr -*.tlb -*.tli -*.tlh -*.tmp -*.tmp_proj -*_wpftmp.csproj -*.log -*.tlog -*.vspscc -*.vssscc -.builds -*.pidb -*.svclog -*.scc - -# Chutzpah Test files -_Chutzpah* - -# Visual C++ cache files -ipch/ -*.aps -*.ncb -*.opendb -*.opensdf -*.sdf -*.cachefile -*.VC.db -*.VC.VC.opendb - -# Visual Studio profiler -*.psess -*.vsp -*.vspx -*.sap - -# Visual Studio Trace Files -*.e2e - -# TFS 2012 Local Workspace -$tf/ - -# Guidance Automation Toolkit -*.gpState - -# ReSharper is a .NET coding add-in -_ReSharper*/ -*.[Rr]e[Ss]harper -*.DotSettings.user - -# TeamCity is a build add-in -_TeamCity* - -# DotCover is a Code Coverage Tool -*.dotCover - -# AxoCover is a Code Coverage Tool -.axoCover/* -!.axoCover/settings.json - -# Coverlet is a free, cross platform Code Coverage Tool -coverage*.json -coverage*.xml -coverage*.info - -# Visual Studio code coverage results -*.coverage -*.coveragexml - -# NCrunch -_NCrunch_* -.*crunch*.local.xml -nCrunchTemp_* - -# MightyMoose -*.mm.* -AutoTest.Net/ - -# Web workbench (sass) -.sass-cache/ - -# Installshield output folder -[Ee]xpress/ - -# DocProject is a documentation generator add-in -DocProject/buildhelp/ -DocProject/Help/*.HxT -DocProject/Help/*.HxC -DocProject/Help/*.hhc -DocProject/Help/*.hhk -DocProject/Help/*.hhp -DocProject/Help/Html2 -DocProject/Help/html - -# Click-Once directory -publish/ - -# Publish Web Output -*.[Pp]ublish.xml -*.azurePubxml -# Note: Comment the next line if you want to checkin your web deploy settings, -# but database connection strings (with potential passwords) will be unencrypted -*.pubxml -*.publishproj - -# Microsoft Azure Web App publish settings. Comment the next line if you want to -# checkin your Azure Web App publish settings, but sensitive information contained -# in these scripts will be unencrypted -PublishScripts/ - -# NuGet Packages -*.nupkg -# NuGet Symbol Packages -*.snupkg -# The packages folder can be ignored because of Package Restore -**/[Pp]ackages/* -# except build/, which is used as an MSBuild target. -!**/[Pp]ackages/build/ -# Uncomment if necessary however generally it will be regenerated when needed -#!**/[Pp]ackages/repositories.config -# NuGet v3's project.json files produces more ignorable files -*.nuget.props -*.nuget.targets - -# Microsoft Azure Build Output -csx/ -*.build.csdef - -# Microsoft Azure Emulator -ecf/ -rcf/ - -# Windows Store app package directories and files -AppPackages/ -BundleArtifacts/ -Package.StoreAssociation.xml -_pkginfo.txt -*.appx -*.appxbundle -*.appxupload - -# Visual Studio cache files -# files ending in .cache can be ignored -*.[Cc]ache -# but keep track of directories ending in .cache -!?*.[Cc]ache/ - -# Others -ClientBin/ -~$* -*~ -*.dbmdl -*.dbproj.schemaview -*.jfm -*.pfx -*.publishsettings -orleans.codegen.cs - -# Including strong name files can present a security risk -# (https://github.com/github/gitignore/pull/2483#issue-259490424) -#*.snk - -# Since there are multiple workflows, uncomment next line to ignore bower_components -# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) -#bower_components/ - -# RIA/Silverlight projects -Generated_Code/ - -# Backup & report files from converting an old project file -# to a newer Visual Studio version. Backup files are not needed, -# because we have git ;-) -_UpgradeReport_Files/ -Backup*/ -UpgradeLog*.XML -UpgradeLog*.htm -ServiceFabricBackup/ -*.rptproj.bak - -# SQL Server files -*.mdf -*.ldf -*.ndf - -# Business Intelligence projects -*.rdl.data -*.bim.layout -*.bim_*.settings -*.rptproj.rsuser -*- [Bb]ackup.rdl -*- [Bb]ackup ([0-9]).rdl -*- [Bb]ackup ([0-9][0-9]).rdl - -# Microsoft Fakes -FakesAssemblies/ - -# GhostDoc plugin setting file -*.GhostDoc.xml - -# Node.js Tools for Visual Studio -.ntvs_analysis.dat -node_modules/ - -# Visual Studio 6 build log -*.plg - -# Visual Studio 6 workspace options file -*.opt - -# Visual Studio 6 auto-generated workspace file (contains which files were open etc.) -*.vbw - -# Visual Studio 6 auto-generated project file (contains which files were open etc.) -*.vbp - -# Visual Studio 6 workspace and project file (working project files containing files to include in project) -*.dsw -*.dsp - -# Visual Studio 6 technical files -*.ncb -*.aps - -# Visual Studio LightSwitch build output -**/*.HTMLClient/GeneratedArtifacts -**/*.DesktopClient/GeneratedArtifacts -**/*.DesktopClient/ModelManifest.xml -**/*.Server/GeneratedArtifacts -**/*.Server/ModelManifest.xml -_Pvt_Extensions - -# Paket dependency manager -.paket/paket.exe -paket-files/ - -# FAKE - F# Make -.fake/ - -# CodeRush personal settings -.cr/personal - -# Python Tools for Visual Studio (PTVS) -__pycache__/ -*.pyc - -# Cake - Uncomment if you are using it -# tools/** -# !tools/packages.config - -# Tabs Studio -*.tss - -# Telerik's JustMock configuration file -*.jmconfig - -# BizTalk build output -*.btp.cs -*.btm.cs -*.odx.cs -*.xsd.cs - -# OpenCover UI analysis results -OpenCover/ - -# Azure Stream Analytics local run output -ASALocalRun/ - -# MSBuild Binary and Structured Log -*.binlog - -# NVidia Nsight GPU debugger configuration file -*.nvuser - -# MFractors (Xamarin productivity tool) working folder -.mfractor/ - -# Local History for Visual Studio -.localhistory/ - -# Visual Studio History (VSHistory) files -.vshistory/ - -# BeatPulse healthcheck temp database -healthchecksdb - -# Backup folder for Package Reference Convert tool in Visual Studio 2017 -MigrationBackup/ - -# Ionide (cross platform F# VS Code tools) working folder -.ionide/ - -# Fody - auto-generated XML schema -FodyWeavers.xsd - -# VS Code files for those working on multiple tools -.vscode/* -!.vscode/settings.json -!.vscode/tasks.json -!.vscode/launch.json -!.vscode/extensions.json -*.code-workspace - -# Local History for Visual Studio Code -.history/ - -# Windows Installer files from build outputs -*.cab -*.msi -*.msix -*.msm -*.msp - -# JetBrains Rider -*.sln.iml diff --git a/CMakeLists.txt b/CMakeLists.txt index ca3be38a5..24213b5d6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -107,7 +107,9 @@ endif() add_executable(llama main.cpp utils.cpp - utils.h) + utils.h + mmap.c + mmap.h) add_executable(quantize quantize.cpp diff --git a/Makefile b/Makefile index 56858b376..044e7d7fe 100644 --- a/Makefile +++ b/Makefile @@ -30,8 +30,8 @@ endif # Compile flags # -CFLAGS = -I. -O3 -DNDEBUG -std=c11 -fPIC -g -fno-omit-frame-pointer -CXXFLAGS = -I. -I./examples -O3 -DNDEBUG -std=c++11 -fPIC -g -fno-omit-frame-pointer +CFLAGS = -I. -O3 -DNDEBUG -std=c11 -fPIC +CXXFLAGS = -I. -I./examples -O3 -DNDEBUG -std=c++11 -fPIC LDFLAGS = # OS specific @@ -185,14 +185,17 @@ default: main quantize ggml.o: ggml.c ggml.h $(CC) $(CFLAGS) -c ggml.c -o ggml.o +mmap.o: mmap.c mmap.h + $(CC) $(CFLAGS) -c mmap.c -o mmap.o + utils.o: utils.cpp utils.h $(CXX) $(CXXFLAGS) -c utils.cpp -o utils.o clean: rm -f *.o main quantize -main: main.cpp ggml.o utils.o - $(CXX) $(CXXFLAGS) main.cpp ggml.o utils.o -o main $(LDFLAGS) +main: main.cpp ggml.o utils.o mmap.o + $(CXX) $(CXXFLAGS) main.cpp ggml.o utils.o mmap.o -o main $(LDFLAGS) ./main -h quantize: quantize.cpp ggml.o utils.o diff --git a/ggml.c b/ggml.c index 59a88554a..0c7aee749 100644 --- a/ggml.c +++ b/ggml.c @@ -2437,7 +2437,7 @@ struct ggml_context * ggml_init(struct ggml_init_params params) { *ctx = (struct ggml_context) { /*.mem_size =*/ params.mem_size, - /*.mem_buffer =*/ params.mem_buffer ? params.mem_buffer : _malloc(params.mem_size), + /*.mem_buffer =*/ params.mem_buffer ? params.mem_buffer : malloc(params.mem_size), /*.mem_buffer_owned =*/ params.mem_buffer ? false : true, /*.n_objects =*/ 0, /*.objects_begin =*/ NULL, @@ -2469,7 +2469,7 @@ void ggml_free(struct ggml_context * ctx) { __func__, i, ctx->n_objects, ctx->objects_end->offs + ctx->objects_end->size); if (ctx->mem_buffer_owned) { - _free(ctx->mem_buffer); + free(ctx->mem_buffer); } found = true; diff --git a/ggml.h b/ggml.h index 38b991f8a..7ce655c1b 100644 --- a/ggml.h +++ b/ggml.h @@ -183,9 +183,6 @@ extern "C" { #define GGML_MAX_CONTEXTS 64 #define GGML_MAX_OPT 4 -void* _malloc(size_t n); -void _free(void* p); - #ifdef __ARM_NEON // we use the built-in 16-bit float type typedef __fp16 ggml_fp16_t; diff --git a/main.cpp b/main.cpp index 9e8087a15..a82ace48d 100644 --- a/main.cpp +++ b/main.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -23,10 +24,6 @@ #include #include #include -#else -#include -#define msync(addr, len_bytes, flag) winMSync -#define MS_ASYNC 0 #endif #define ROUNDUP(X, K) (((X) + (K)-1) & -(K)) @@ -34,7 +31,7 @@ #define MAGIC_PATH "magic.dat" #define MAGIC_ADDR (char *)0x330000000000 -#define MAGIC_GRAN 2097152 +#define MAGIC_GRAN 65536 #define MAGIC_ALGN (sizeof(size_t) * 2) #define ANSI_COLOR_RED "\x1b[31m" @@ -104,49 +101,21 @@ struct llama_model { std::map tensors; }; - struct magic { uint32_t magic; std::atomic lock; int fd; - size_t commit; - size_t offset; - size_t capacity; - gpt_vocab* vocab; - llama_model* model; + uint64_t commit; + uint64_t offset; + uint64_t capacity; + gpt_vocab *vocab; + llama_model *model; }; -static void winMSync(magic* addr, size_t len_bytes) { - bool success = FlushViewOfFile((void*)addr, len_bytes); - if (!success) { - LPVOID lpMsgBuf; - LPVOID lpDisplayBuf; - DWORD error_code = GetLastError(); - FormatMessage( - FORMAT_MESSAGE_ALLOCATE_BUFFER | - FORMAT_MESSAGE_FROM_SYSTEM | - FORMAT_MESSAGE_IGNORE_INSERTS, - NULL, - error_code, - MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), - (LPTSTR)&lpMsgBuf, - 0, NULL); - lpDisplayBuf = (LPVOID)LocalAlloc(LMEM_ZEROINIT, - (lstrlen((LPCTSTR)lpMsgBuf) + 40) * sizeof(TCHAR)); - StringCchPrintf((LPTSTR)lpDisplayBuf, - LocalSize(lpDisplayBuf) / sizeof(TCHAR), - TEXT("failed with error %d: %s"), - error_code, lpMsgBuf); - } - HANDLE hFile = (HANDLE)_get_osfhandle(addr->fd); - FlushFileBuffers(hFile); -} - - static struct magic *mag; static inline void spin_lock(std::atomic &lock) { - while (!lock.exchange(1, std::memory_order_acquire)); + while (lock.exchange(1, std::memory_order_acquire)); } static inline void spin_unlock(std::atomic &lock) { @@ -162,62 +131,64 @@ static void *Mmap(void *addr, size_t length, int prot, int flags, int fd, off_t } static void magic_commit(void) { - mag->offset = mag->capacity; - mag->commit = mag->capacity; + mag->commit = ROUNDUP(mag->offset, MAGIC_GRAN); mag->magic = 0xFEEDABEE; - bool success = msync(mag, mag->commit, MS_ASYNC); + if (msync(mag, mag->commit, MS_ASYNC) == -1) { + perror("msync"); + exit(77); + } } static void magic_init(void) { int fd; size_t n; -#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) - struct stat st; -#else - struct _stat64 st; -#endif + int64_t size; if (mag) return; n = ROUNDUP(sizeof(struct magic), MAGIC_GRAN); if ((fd = open(MAGIC_PATH, O_RDWR)) != -1) { - int result = fstat(fd, &st); - int error = errno; - if (errno == EBADF) - fprintf(stderr, "Bad file descriptor.\n"); - else if (errno == EINVAL) - fprintf(stderr, "Invalid argument to _fstat.\n"); - if (st.st_size >= n) { + if ((size = lseek(fd, 0, SEEK_END)) == -1) { + perror("lseek"); + exit(77); + } + if (size >= n) { mag = (struct magic *)Mmap(MAGIC_ADDR, n, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_FIXED, fd, 0); if (mag->magic == 0xFEEDABEE) { - mag = (struct magic *)Mmap(MAGIC_ADDR, mag->capacity, + mag = (struct magic *)Mmap(MAGIC_ADDR, mag->commit, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_FIXED, fd, 0); madvise(MAGIC_ADDR, mag->capacity, MADV_WILLNEED); - ftruncate(fd, mag->commit); mag->offset = mag->commit; mag->capacity = mag->commit; mag->fd = -1; return; } } - ftruncate(fd, 0); + if (ftruncate(fd, 0) == -1) { + perror("ftruncate"); + exit(77); + } } else if ((fd = open(MAGIC_PATH, O_RDWR | O_CREAT | O_TRUNC, 0644)) == -1) { perror(MAGIC_PATH); exit(77); } - ftruncate(fd, n); + if (ftruncate(fd, n) == -1) { + perror("ftruncate"); + exit(77); + } mag = (struct magic *)Mmap(MAGIC_ADDR, n, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, fd, 0); - mag->offset = MAGIC_GRAN; + mag->offset = n; + mag->capacity = n; mag->fd = fd; } -void *memalign(size_t a, size_t n) { +void *magic_memalign(size_t a, size_t n) { void *p; - size_t i, j, k, m; static int count; + size_t i, j, k, m, c2; magic_init(); if (a < MAGIC_ALGN) a = MAGIC_ALGN; while (!IS2POW(a)) ++a; @@ -227,24 +198,37 @@ void *memalign(size_t a, size_t n) { i = i + sizeof(size_t); i = ROUNDUP(i, a); j = ROUNDUP(i + m, MAGIC_GRAN); - //if (j > mag->capacity) { + if (j > mag->capacity) { + c2 = mag->capacity; + if (!c2) { + c2 = MAGIC_GRAN; + } + while (j > c2) { + c2 += c2 >> 4; + c2 = ROUNDUP(c2, MAGIC_GRAN); + } if (!mag->magic) { - int result = ftruncate(mag->fd, j); + if (ftruncate(mag->fd, c2) == -1) { + perror("ftruncate"); + spin_unlock(mag->lock); + return 0; + } p = mmap(MAGIC_ADDR + mag->capacity, - j - mag->capacity, PROT_READ | PROT_WRITE, + c2 - mag->capacity, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, mag->fd, mag->capacity); } else { p = mmap(MAGIC_ADDR + mag->capacity, - j - mag->capacity, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); + c2 - mag->capacity, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); } if (p != MAP_FAILED) { - mag->capacity = j; + mag->capacity = c2; } else { + perror("mmap"); spin_unlock(mag->lock); return 0; } - //} + } mag->offset = i + m; spin_unlock(mag->lock); p = MAGIC_ADDR + i; @@ -252,60 +236,44 @@ void *memalign(size_t a, size_t n) { return p; } -void *_malloc(size_t n) { - return memalign(MAGIC_ALGN, n); +void *magic_malloc(size_t n) { + return magic_memalign(MAGIC_ALGN, n); } -size_t malloc_usable_size(const void *p) { - return ((const size_t *)p)[-1]; -} - -void *_calloc(size_t n, size_t z) { +void *magic_calloc(size_t n, size_t z) { void *p; - if ((p = _malloc((n *= z)))) { + if ((p = magic_malloc((n *= z)))) { memset(p, 0, n); } return p; } -void _free(void *p) { +void magic_free(void *p) { // do nothing } -void *_realloc(void *p, size_t n) { +void *magic_realloc(void *p, size_t n) { void *q; if (!p) { - return _malloc(n); + return magic_malloc(n); } if (!n) { - _free(p); + magic_free(p); return 0; } - if ((q = _malloc(n))) { + if ((q = magic_malloc(n))) { memcpy(q, p, ((const size_t *)p)[-1]); } return q; } -#if defined(malloc) -# undef malloc -#endif -#define malloc(x) _malloc(x) +void* operator new(size_t size) { + return magic_malloc(size); +} -#if defined(calloc) -# undef calloc -#endif -#define calloc(x) _calloc(x) - -#if defined(realloc) -# undef realloc -#endif -#define realloc(x) _realloc(x) - -#if defined(free) -# undef free -#endif -#define free(x) _free(x) +void operator delete(void* p) { + magic_free(p); +} // load the model's weights from a file bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab & vocab, int n_ctx) { @@ -451,7 +419,7 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab { struct ggml_init_params params = { /*.mem_size =*/ ctx_size, - /*.mem_buffer =*/ NULL, + /*.mem_buffer =*/ magic_malloc(ctx_size), }; model.ctx = ggml_init(params); @@ -772,7 +740,7 @@ bool llama_eval( const int d_key = n_embd/n_head; static size_t buf_size = 512u*1024*1024; - static void * buf = _malloc(buf_size); + static void * buf = malloc(buf_size); if (mem_per_token > 0 && mem_per_token*N > buf_size) { const size_t buf_size_new = 1.1*(mem_per_token*N); // add 10% to account for ggml object overhead @@ -780,7 +748,7 @@ bool llama_eval( // reallocate buf_size = buf_size_new; - buf = _realloc(buf, buf_size); + buf = realloc(buf, buf_size); if (buf == nullptr) { fprintf(stderr, "%s: failed to allocate %zu bytes\n", __func__, buf_size); return false; diff --git a/mmap.c b/mmap.c new file mode 100644 index 000000000..d9ceb3348 --- /dev/null +++ b/mmap.c @@ -0,0 +1,570 @@ +// Lightweight Portable mmap() Polyfill +// +// 1. Supports POSIX.1 +// +// The baseline POSIX standard doesn't specify MAP_ANONYMOUS. This +// library makes sure, on the hypothetical UNIX systems that don't +// have it, or on the mainstream UNIX platforms where the user has +// chosen to define _POSIX_C_SOURCE that cause headers to undefine +// it, this implementation will fallback to creating a secure temp +// file, for each anonymous mapping. +// +// 2. Supports Windows w/ Visual Studio +// +// On Windows Vista and later an API exists that's almost as good as +// mmap(). However code that uses this library should conform to the +// subset of behaviors Microsoft accommodates. +// +// Caveats +// +// - You should just assume the page size is 64kb. That's how it is on +// Windows and it usually goes faster to assume that elsewhere too. +// +// - Not designed to support mprotect() at the moment. In order to +// support this, we'd need to consider _open(O_ACCMODE) on Windows +// and then have mmap() be more greedy about permissions. +// +// - There's limited support for being clever with memory intervals. +// For example, you can't punch a hole in a memory map on Windows. +// This abstraction does aim to offer more flexibility than WIN32. +// There should also be good error reporting for unsupported uses. + +#include "mmap.h" + +#ifdef NEED_POSIX_MMAP +#include + +static void *PosixMmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset) { + int tfd; + void* res; + char path[] = "/tmp/llama.dat.XXXXXX"; + if (~flags & MAP_ANONYMOUS) { + res = mmap(addr, length, prot, flags, fd, offset); + } else if ((tfd = mkstemp(path)) != -1) { + unlink(path); + if (!ftruncate(tfd, length)) { + res = mmap(addr, length, prot, flags & ~MAP_ANONYMOUS, tfd, 0); + } else { + res = MAP_FAILED; + } + close(tfd); + } else { + res = MAP_FAILED; + } + return res; +} + +#elif defined(NEED_WIN32_MMAP) +#include +#include +#include +#include + +struct WinMap { // O(n) no ordering no overlaps + HANDLE hand; // zero means array slots empty + HANDLE fand; // for the original file, or -1 + uintptr_t addr; // base address (64 kb aligned) + uintptr_t length; // byte size (>0, rounded 64kb) +}; + +struct WinMaps { + int n; + struct WinMap *p; + volatile long lock; +}; + +static struct WinMaps g_winmaps; + +static inline uintptr_t Min(uintptr_t x, uintptr_t y) { + return y > x ? x : y; +} + +static inline uintptr_t Max(uintptr_t x, uintptr_t y) { + return y < x ? x : y; +} + +static inline uintptr_t Roundup(uintptr_t x, intptr_t a) { + assert(a > 0); + assert(!(a & (a - 1))); + return (x + (a - 1)) & -a; +} + +static inline void Lock(void) { + long x; + for (;;) { + x = InterlockedExchange(&g_winmaps.lock, 1); + if (!x) break; + assert(x == 1); + } +} + +static inline void Unlock(void) { + assert(g_winmaps.lock == 1); + g_winmaps.lock = 0; +} + +static int WinStrerror(int err, char *buf, int size) { + return FormatMessageA( + FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, err, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), + buf, size, NULL); +} + +#ifdef NDEBUG +#define LogError(thing) (void)0 +#else +static void LogError(const char* file, int line, const char* thing) { +#define LogError(thing) LogError(__FILE__, __LINE__, thing) + fprintf(stderr, "%s:%d: error: %s\n", file, line, thing); +} +#endif + +#ifdef NDEBUG +#define LogWindowsError(thing) (void)0 +#else +static void LogWindowsError(const char* file, int line, const char* thing) { +#define LogWindowsError(thing) LogWindowsError(__FILE__, __LINE__, thing) + char s[256]; + int e = GetLastError(); + WinStrerror(e, s, sizeof(s)); + fprintf(stderr, "%s:%d: error[%#x]: %s failed: %s\n", file, line, e, thing, s); +} +#endif + +static void *Recalloc(void *ptr, uint64_t newSize) { + HANDLE heap = GetProcessHeap(); + if (!ptr) { + return HeapAlloc(heap, HEAP_ZERO_MEMORY, newSize); + } + if (!newSize) { + HeapFree(heap, 0, ptr); + return 0; + } + return HeapReAlloc(heap, HEAP_ZERO_MEMORY, ptr, newSize); +} + +uint64_t WinSeek(int fd, uint64_t offset, int whence) { + HANDLE hFile; + DWORD winwhence; + LARGE_INTEGER distanceToMove; + LARGE_INTEGER newFilePointer; + distanceToMove.QuadPart = offset; + switch (whence) { + case SEEK_SET: + winwhence = FILE_BEGIN; + break; + case SEEK_CUR: + winwhence = FILE_CURRENT; + break; + case SEEK_END: + winwhence = FILE_END; + break; + default: + LogError("bad lseek() whence"); + errno = EINVAL; + return -1; + } + hFile = (HANDLE)_get_osfhandle(fd); + if (hFile == INVALID_HANDLE_VALUE) { + LogWindowsError("_get_osfhandle"); + errno = EBADF; + return -1; + } + if (GetFileType(hFile) != FILE_TYPE_DISK) { + LogError("bad file type for lseek()"); + errno = ESPIPE; + return -1; + } + if (!SetFilePointerEx(hFile, distanceToMove, &newFilePointer, winwhence)) { + LogWindowsError("SetFilePointerEx"); + errno = EPERM; + return -1; + } + return newFilePointer.QuadPart; +} + +int WinFtruncate(int fd, uint64_t length) { + HANDLE hFile; + LARGE_INTEGER old, neu; + hFile = (HANDLE)_get_osfhandle(fd); + if (hFile == INVALID_HANDLE_VALUE) { + LogWindowsError("_get_osfhandle"); + errno = EBADF; + return -1; + } + // save current file position + old.QuadPart = 0; + neu.QuadPart = 0; + if (!SetFilePointerEx(hFile, neu, &old, FILE_CURRENT)) { + LogWindowsError("SetFilePointerEx#1"); + return -1; + } + // set current position to new file size + neu.QuadPart = length; + if (!SetFilePointerEx(hFile, neu, NULL, FILE_BEGIN)) { + LogWindowsError("SetFilePointerEx#2"); + return -1; + } + // change the file size + if (!SetEndOfFile(hFile)) { + LogWindowsError("SetEndOfFile"); + SetFilePointerEx(hFile, old, NULL, FILE_BEGIN); + return -1; + } + // restore the original file position + // win32 allows this to exceed the end of file + if (!SetFilePointerEx(hFile, old, NULL, FILE_BEGIN)) { + LogWindowsError("SetFilePointerEx>3"); + return -1; + } + return 0; +} + +int WinMadvise(void *addr, uintptr_t length, int advice) { + switch (advice) { + case MADV_NORMAL: + case MADV_DONTNEED: + case MADV_SEQUENTIAL: + return 0; + case MADV_RANDOM: + case MADV_WILLNEED: { + HANDLE proc; + WIN32_MEMORY_RANGE_ENTRY entry; + proc = GetCurrentProcess(); + entry.VirtualAddress = addr; + entry.NumberOfBytes = length; + if (!PrefetchVirtualMemory(proc, 1, &entry, 0)) { + LogWindowsError("PrefetchVirtualMemory"); + errno = ENOMEM; + return -1; + } + return 0; + } + default: + errno = EINVAL; + return -1; + } +} + +int WinUnmap(void *addr, uintptr_t length) { + void *view; + HANDLE hand; + HANDLE fand; + int i, err = 0; + uintptr_t a, b; + uintptr_t x, y; + // compute the requested interval + // 1. length can't be zero + // 2. length is rounded up to the page size + // 3. addr must be aligned to page boundary + a = (uintptr_t)addr; + b = a + Roundup(length, 65536); + if (!length) { + LogError("tried to munmap zero bytes"); + errno = EINVAL; + return -1; + } + if (a & 65535) { + LogError("tried to munmap an address that's not 64kb aligned"); + errno = EINVAL; + return -1; + } + // 1. we permit unmapping multiple maps in one call + // 2. we don't care if the matched mappings aren't contiguous + // 3. it's an error if a matched mapping only partially overlaps + // 4. similar to close() we release all resources possible on error + Lock(); + for (i = 0; i < g_winmaps.n; ++i) { + if (!g_winmaps.p[i].hand) { + // this array slot is empty + continue; + } + // compute overlap between known mapping and requested interval + x = Max(a, g_winmaps.p[i].addr); + y = Min(b, g_winmaps.p[i].addr + g_winmaps.p[i].length); + if (x >= y) { + // there isn't any overlap + continue; + } + if (y - x != g_winmaps.p[i].length) { + // requested interval partially overlapped this mapping + // therefore we can't unmap it and must report an error + LogError("tried to partially unmap a mapping"); + err = ENOMEM; + continue; + } + // save the information we care about + view = (void *)g_winmaps.p[i].addr; + hand = g_winmaps.p[i].hand; + fand = g_winmaps.p[i].fand; + // delete this mapping from the global array + g_winmaps.p[i].hand = 0; + // perform the systems operations + // safe to release lock since g_winmaps.n is monotonic + Unlock(); + if (!UnmapViewOfFile(view)) { + LogWindowsError("UnmapViewOfFile"); + } + if (!CloseHandle(hand)) { + LogWindowsError("CloseHandle#1"); + } + if (fand != INVALID_HANDLE_VALUE) { + if (!CloseHandle(fand)) { + LogWindowsError("CloseHandle#2"); + } + } + Lock(); + } + Unlock(); + if (err) { + errno = err; + return -1; + } + return 0; +} + +void* WinMap(void *addr, uintptr_t length, int prot, int flags, int fd, uint64_t offset) { + int i; + LPVOID res; + HANDLE hand; + HANDLE hFile; + DWORD access; + DWORD wiprot; + uintptr_t fsize; + if (!length) { + LogError("mmap(length) was zero"); + errno = EINVAL; + return MAP_FAILED; + } + length = Roundup(length, 65536); + if ((uintptr_t)addr & 65535) { + if (~flags & MAP_FIXED) { + addr = 0; + } else { + LogError("MAP_FIXED used with address that's not 64kb aligned"); + errno = EINVAL; + return MAP_FAILED; + } + } + // these are the logical flag equivalents for creating mappings. please + // note that any subsequent virtualprotect calls must be a subset of the + // permissions we're using here. that's not a supported use case for us + if (flags & MAP_PRIVATE) { + // private mapping + if (prot & PROT_EXEC) { + if (prot & PROT_WRITE) { + if (flags & MAP_ANONYMOUS) { + wiprot = PAGE_EXECUTE_READWRITE; + access = FILE_MAP_READ | FILE_MAP_WRITE | FILE_MAP_EXECUTE; + } else { + wiprot = PAGE_EXECUTE_WRITECOPY; + access = FILE_MAP_COPY | FILE_MAP_EXECUTE; + } + } else { + wiprot = PAGE_EXECUTE_READ; + access = FILE_MAP_READ | FILE_MAP_EXECUTE; + } + } else if (prot & PROT_WRITE) { + if (flags & MAP_ANONYMOUS) { + wiprot = PAGE_READWRITE; + access = FILE_MAP_READ | FILE_MAP_WRITE; + } else { + wiprot = PAGE_WRITECOPY; + access = FILE_MAP_COPY; + } + } else { + wiprot = PAGE_READONLY; + access = FILE_MAP_READ; + } + } else { + // shared mapping + if (prot & PROT_EXEC) { + if (prot & PROT_WRITE) { + wiprot = PAGE_EXECUTE_READWRITE; + access = FILE_MAP_READ | FILE_MAP_WRITE | FILE_MAP_EXECUTE; + } else { + wiprot = PAGE_EXECUTE_READ; + access = FILE_MAP_READ | FILE_MAP_EXECUTE; + } + } else if (prot & PROT_WRITE) { + wiprot = PAGE_READWRITE; + access = FILE_MAP_READ | FILE_MAP_WRITE; + } else { + wiprot = PAGE_READONLY; + access = FILE_MAP_READ; + } + } + if (flags & MAP_ANONYMOUS) { + hFile = INVALID_HANDLE_VALUE; + fsize = length; + offset = 0; + } else { + fsize = 0; + hFile = (HANDLE)_get_osfhandle(fd); + if (hFile == INVALID_HANDLE_VALUE) { + LogWindowsError("_get_osfhandle"); + errno = EBADF; + return MAP_FAILED; + } + if (!DuplicateHandle(GetCurrentProcess(), hFile, + GetCurrentProcess(), &hFile, + 0, FALSE, DUPLICATE_SAME_ACCESS)) { + LogWindowsError("DuplicateHandle"); + errno = EBADF; + return MAP_FAILED; + } + } + if (flags & MAP_FIXED) { + if (!addr) { + // zero chance of microsoft letting us map the null page + if (hFile != INVALID_HANDLE_VALUE) { + CloseHandle(hFile); + } + errno = EINVAL; + return MAP_FAILED; + } else { + // blow away any existing mappings on requested interval + if (WinUnmap(addr, length) == -1) { + // can only happen if we partially overlap an existing mapping + assert(errno == ENOMEM); + if (hFile != INVALID_HANDLE_VALUE) { + CloseHandle(hFile); + } + return MAP_FAILED; + } + } + } + hand = CreateFileMapping(hFile, 0, wiprot, + (DWORD)(fsize >> 32), + (DWORD)fsize, + 0); + if (!hand) { + LogWindowsError("CreateFileMapping"); + if (hFile != INVALID_HANDLE_VALUE) { + CloseHandle(hFile); + } + errno = EPERM; + return MAP_FAILED; + } + res = MapViewOfFileEx(hand, access, + (DWORD)(offset >> 32), + (DWORD)offset, + length, addr); + if (!res) { + LogWindowsError("MapViewOfFileEx"); + if (hFile != INVALID_HANDLE_VALUE) { + CloseHandle(hFile); + } + CloseHandle(hand); + errno = EPERM; + return MAP_FAILED; + } + if (flags & MAP_FIXED) { + // this assertion could legitimately fail if two threads engage in a + // race to create a MAP_FIXED mapping at the same address and that's + // certainly not the kind of use case we're designed to support here + assert(res == addr); + } + // record our new mapping in the global array + Lock(); + for (i = 0; i < g_winmaps.n; ++i) { + if (!g_winmaps.p[i].hand) { + // we found an empty slot + break; + } + } + if (i == g_winmaps.n) { + // we need to grow the array + // it's important to use kernel32 memory + // our malloc implementation depends on this + int n2; + struct WinMap *p2; + p2 = g_winmaps.p; + n2 = g_winmaps.n; + if (n2) { + n2 += n2 >> 1; + } else { + n2 = 7; + } + if ((p2 = (struct WinMap*)Recalloc(p2, n2 * sizeof(*p2)))) { + g_winmaps.p = p2; + g_winmaps.n = n2; + } else { + Unlock(); + LogError("recalloc failed"); + UnmapViewOfFile(res); + CloseHandle(hand); + if (hFile != INVALID_HANDLE_VALUE) { + CloseHandle(hFile); + } + errno = ENOMEM; + return MAP_FAILED; + } + } + g_winmaps.p[i].hand = hand; + g_winmaps.p[i].fand = hFile; + g_winmaps.p[i].addr = (uintptr_t)res; + g_winmaps.p[i].length = length; + Unlock(); + return res; +} + +int WinMsync(void *addr, uintptr_t length, int flags) { + int i, err; + HANDLE hand; + uintptr_t x, y; + if (flags & ~(MS_ASYNC | MS_INVALIDATE | MS_SYNC)) { + LogError("bad msync flags"); + errno = EINVAL; + return -1; + } + // 1. we do nothing if length is zero (unlike win32 api) + // 2. the requested interval may envelop multiple known mappings + // 3. we don't care if those mappings aren't contiguous or a hole exists + // 4. the requested interval may specify a subrange of any given mapping + Lock(); + for (err = i = 0; i < g_winmaps.n; ++i) { + if (!g_winmaps.p[i].hand) { + // this array slot is empty + continue; + } + // compute overlap between known mapping and requested interval + x = Max((uintptr_t)addr, g_winmaps.p[i].addr); + y = Min((uintptr_t)addr + length, g_winmaps.p[i].addr + g_winmaps.p[i].length); + if (x >= y) { + // there isn't any overlap + continue; + } + // it's safe to release lock temporarily, since g_winmaps.n is monotonic + // any race conditions in handle being deleted should be caught by win32 + hand = g_winmaps.p[i].fand; + Unlock(); + // ensure coherency and that filesystem flush *will* happen + if (!FlushViewOfFile((void*)x, y - x)) { + LogWindowsError("FlushViewOfFile"); + err = EPERM; + } + if (flags & MS_SYNC) { + // ensure that filesystem flush *has* happened + if (!FlushFileBuffers(hand)) { + LogWindowsError("FlushFileBuffers"); + err = EPERM; + } + } + Lock(); + } + Unlock(); + if (err) { + errno = err; + return -1; + } + return 0; +} + +#else // NEED_*_MAP + +// this is a normal unix platform +// add some content to this object so the apple linker doesn't whine +int justine_mmap_module; + +#endif // NEED_*_MMAP diff --git a/mmap.h b/mmap.h index 851553048..fd9a25963 100644 --- a/mmap.h +++ b/mmap.h @@ -1,32 +1,17 @@ #pragma once -// portable mmap() implementation -// -// - supports win32 (needs vista+) -// - supports posix.1 (no map_anonymous) -// -// notes on windows -// -// - no errno support -// - not designed to support mprotect() -// - very poor support for memory intervals - -#include #include #include -#include -#include +#include -#if defined(_MSC_VER) || defined(__MINGW32__) -#ifndef __MINGW32__ -#include -#include -#else -#include +#ifdef __cplusplus +extern "C" { #endif + +#ifdef _MSC_VER +#define NEED_WIN32_MMAP +#include #include -#include -#include #ifndef PROT_READ #define PROT_READ 1 @@ -89,6 +74,26 @@ #define MADV_WILLNEED 3 #endif +#ifndef MS_ASYNC +#define MS_ASYNC 1 +#endif +#ifndef MS_INVALIDATE +#define MS_INVALIDATE 2 +#endif +#ifndef MS_SYNC +#define MS_SYNC 4 +#endif + +#ifndef SEEK_SET +#define SEEK_SET 0 +#endif +#ifndef SEEK_CUR +#define SEEK_CUR 1 +#endif +#ifndef SEEK_END +#define SEEK_END 2 +#endif + #ifndef mmap #define mmap WinMap #endif @@ -101,8 +106,11 @@ #ifndef close #define close _close #endif -#ifndef fstat -#define fstat _fstati64 +#ifndef lseek +#define lseek WinSeek +#endif +#ifndef msync +#define msync WinMsync #endif #ifndef madvise #define madvise WinMadvise @@ -111,219 +119,27 @@ #define ftruncate WinFtruncate #endif -static std::atomic g_winlock; -static std::map g_winmap; +uint64_t WinSeek(int, uint64_t, int); +int WinMsync(void *, uintptr_t, int); +int WinMadvise(void *, uintptr_t, int); +int WinFtruncate(int, uint64_t); +int WinUnmap(void *, uintptr_t); +void *WinMap(void *, uintptr_t, int, int, int, uint64_t); +#else // _MSC_VER -static void WinLock(void) { - while (!g_winlock.exchange(1, std::memory_order_acquire)); -} - -static void WunLock(void) { - g_winlock.store(0, std::memory_order_release); -} - -static int WinMadvise(char* fd, size_t length, int flags) { - auto p_handle = GetCurrentProcess(); - struct _WIN32_MEMORY_RANGE_ENTRY entry((void*)fd, length); - bool success = PrefetchVirtualMemory(p_handle, 1, &entry, 0); - if (!success) { - LPVOID lpMsgBuf; - LPVOID lpDisplayBuf; - DWORD error_code = GetLastError(); - FormatMessage( - FORMAT_MESSAGE_ALLOCATE_BUFFER | - FORMAT_MESSAGE_FROM_SYSTEM | - FORMAT_MESSAGE_IGNORE_INSERTS, - NULL, - error_code, - MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), - (LPTSTR)&lpMsgBuf, - 0, NULL); - lpDisplayBuf = (LPVOID)LocalAlloc(LMEM_ZEROINIT, - (lstrlen((LPCTSTR)lpMsgBuf) + 256) * sizeof(TCHAR)); - StringCchPrintf((LPTSTR)lpDisplayBuf, - LocalSize(lpDisplayBuf) / sizeof(TCHAR), - TEXT("%s failed with error %d: %s"), - error_code, lpMsgBuf); - } - return 0; -} - -static int WinFtruncate(int fd, uint64_t length) { - return _chsize_s(fd, length) ? -1 : 0; -} - -static int WinUnmap(void *addr, size_t length) { - HANDLE hand; - WinLock(); - hand = g_winmap[addr]; - g_winmap[addr] = 0; - WunLock(); - if (hand) { - UnmapViewOfFile(addr); - CloseHandle(hand); - return 0; - } else { - return -1; - } -} - -static void *WinMap(void *addr, size_t length, int prot, - int flags, int fd, uint64_t offset) { - HANDLE hFile; - DWORD winprot; - DWORD access = 0; - HANDLE hand = NULL; - LPVOID res = NULL; - if (prot & PROT_READ) { - access |= FILE_MAP_READ; - } - if (prot & PROT_WRITE) { - access |= FILE_MAP_WRITE; - } - if (prot & PROT_EXEC) { - access |= FILE_MAP_EXECUTE; - } - if (flags & MAP_PRIVATE) { - // private mapping - if (prot & PROT_EXEC) { - if (prot & PROT_WRITE) { - if (flags & MAP_ANONYMOUS) { - winprot = PAGE_EXECUTE_READWRITE; - } else { - winprot = PAGE_EXECUTE_WRITECOPY; - } - } else { - winprot = PAGE_EXECUTE_READ; - } - } else if (prot & PROT_WRITE) { - if (flags & MAP_ANONYMOUS) { - winprot = PAGE_READWRITE; - } else { - winprot = PAGE_WRITECOPY; - } - } else { - winprot = PAGE_READONLY; - } - } else { - // shared mapping - if (prot & PROT_EXEC) { - if (prot & PROT_WRITE) { - winprot = PAGE_EXECUTE_READWRITE; - } else { - winprot = PAGE_EXECUTE_READ; - } - } else if (prot & PROT_WRITE) { - winprot = PAGE_READWRITE; - } else { - winprot = PAGE_READONLY; - } - } - if (flags & MAP_ANONYMOUS) { - hFile = INVALID_HANDLE_VALUE; - offset = 0; - } else { - hFile = (HANDLE)_get_osfhandle(fd); - if (hFile == INVALID_HANDLE_VALUE) { - return MAP_FAILED; - } - } - if (flags & MAP_FIXED) { - if (!addr) { - return MAP_FAILED; - } else { - WinUnmap(addr, length); - } - } - hand = CreateFileMapping(hFile, 0, winprot, - (offset + length) >> 32, - (offset + length), 0); - if (!hand) { - LPVOID lpMsgBuf; - LPVOID lpDisplayBuf; - DWORD error_code = GetLastError(); - FormatMessage( - FORMAT_MESSAGE_ALLOCATE_BUFFER | - FORMAT_MESSAGE_FROM_SYSTEM | - FORMAT_MESSAGE_IGNORE_INSERTS, - NULL, - error_code, - MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), - (LPTSTR)&lpMsgBuf, - 0, NULL); - lpDisplayBuf = (LPVOID)LocalAlloc(LMEM_ZEROINIT, - (lstrlen((LPCTSTR)lpMsgBuf) + 256) * sizeof(TCHAR)); - StringCchPrintf((LPTSTR)lpDisplayBuf, - LocalSize(lpDisplayBuf) / sizeof(TCHAR), - TEXT("%s failed with error %d: %s"), - error_code, lpMsgBuf); - return MAP_FAILED; - } - if (winprot == PAGE_WRITECOPY) { - access = FILE_MAP_COPY; - } - - res = MapViewOfFileEx(hand, access, offset >> 32, - offset, length, addr); - if (!res) { - LPVOID lpMsgBuf; - LPVOID lpDisplayBuf; - DWORD error_code = GetLastError(); - FormatMessage( - FORMAT_MESSAGE_ALLOCATE_BUFFER | - FORMAT_MESSAGE_FROM_SYSTEM | - FORMAT_MESSAGE_IGNORE_INSERTS, - NULL, - error_code, - MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), - (LPTSTR)&lpMsgBuf, - 0, NULL); - lpDisplayBuf = (LPVOID)LocalAlloc(LMEM_ZEROINIT, - (lstrlen((LPCTSTR)lpMsgBuf) + 40) * sizeof(TCHAR)); - StringCchPrintf((LPTSTR)lpDisplayBuf, - LocalSize(lpDisplayBuf) / sizeof(TCHAR), - TEXT("failed with error %d: %s"), - error_code, lpMsgBuf); - fprintf(stderr, (char*)lpDisplayBuf); - CloseHandle(hand); - return MAP_FAILED; - } - WinLock(); - g_winmap[res] = hand; - WunLock(); - return res; -} - -#else #include #include + #ifndef MAP_ANONYMOUS -#define MAP_ANONYMOUS 0x10000000 - -static void *PosixMmap(void *addr, size_t length, int prot, - int flags, int fd, uint64_t offset) { - int tfd; - void *res; - char path[] = "/tmp/llama.dat.XXXXXX"; - if (~flags & MAP_ANONYMOUS) { - res = mmap(addr, length, prot, flags, fd, offset); - } else if ((tfd = mkstemp(path)) != -1) { - unlink(path); - if (!ftruncate(tfd, length)) { - res = mmap(addr, length, prot, flags & ~MAP_ANONYMOUS, tfd, 0); - } else { - res = MAP_FAILED; - } - close(tfd); - } else { - res = MAP_FAILED; - } - return res; -} - -#ifndef mmap +#define NEED_POSIX_MMAP #define mmap PosixMmap -#endif +#define MAP_ANONYMOUS 0x10000000 +void *PosixMmap(void*, size_t, int, int, int, off_t); #endif // MAP_ANONYMOUS + #endif // _MSC_VER + +#ifdef __cplusplus +} +#endif From 163129847511366077ec52beca2312f95a87e182 Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Tue, 28 Mar 2023 10:23:34 -0700 Subject: [PATCH 3/4] Remove -std=foo compiler flags These flags are only really useful for linting. They put GCC and other compilers into `__STRICT_ANSI__` mode. That can make systems stuff slower, in favor of standards conformance, since it may cause headers to remove platform specific goodness. It also makes builds more painful on old distros that have the functions we need, but track an older version of the standards where those functions weren't strictly available. One such example is mkstemp(). It's available everywhere in practice, but GA Ubuntu in strict ansi mode complains about it. If we don't use mkstemp() then that'll put us on the security radar with other platforms. --- Makefile | 4 ++-- mmap.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 044e7d7fe..8f94c77af 100644 --- a/Makefile +++ b/Makefile @@ -30,8 +30,8 @@ endif # Compile flags # -CFLAGS = -I. -O3 -DNDEBUG -std=c11 -fPIC -CXXFLAGS = -I. -I./examples -O3 -DNDEBUG -std=c++11 -fPIC +CFLAGS = -I. -O3 -DNDEBUG -fPIC +CXXFLAGS = -I. -I./examples -O3 -DNDEBUG -fPIC LDFLAGS = # OS specific diff --git a/mmap.c b/mmap.c index d9ceb3348..1abb44799 100644 --- a/mmap.c +++ b/mmap.c @@ -34,7 +34,7 @@ #ifdef NEED_POSIX_MMAP #include -static void *PosixMmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset) { +void *PosixMmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset) { int tfd; void* res; char path[] = "/tmp/llama.dat.XXXXXX"; From 1a5ee113775cb753cb07a603024b7b8cc98f351a Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Tue, 28 Mar 2023 10:36:25 -0700 Subject: [PATCH 4/4] Restore old -std= flags Getting rid of them fixed GA Ubuntu, but broke GA MacOS. Let's try a different strategy. --- Makefile | 4 ++-- mmap.h | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 8f94c77af..044e7d7fe 100644 --- a/Makefile +++ b/Makefile @@ -30,8 +30,8 @@ endif # Compile flags # -CFLAGS = -I. -O3 -DNDEBUG -fPIC -CXXFLAGS = -I. -I./examples -O3 -DNDEBUG -fPIC +CFLAGS = -I. -O3 -DNDEBUG -std=c11 -fPIC +CXXFLAGS = -I. -I./examples -O3 -DNDEBUG -std=c++11 -fPIC LDFLAGS = # OS specific diff --git a/mmap.h b/mmap.h index fd9a25963..8bdd662d3 100644 --- a/mmap.h +++ b/mmap.h @@ -1,5 +1,9 @@ #pragma once +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + #include #include #include