mirror of
				https://github.com/jart/cosmopolitan.git
				synced 2025-10-26 03:00:57 +00:00 
			
		
		
		
	Initial import
This commit is contained in:
		
						commit
						c91b3c5006
					
				
					 14915 changed files with 590219 additions and 0 deletions
				
			
		
							
								
								
									
										26
									
								
								third_party/avir/LICENSE
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										26
									
								
								third_party/avir/LICENSE
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,26 @@ | |||
| AVIR License Agreement | ||||
| 
 | ||||
| The MIT License (MIT) | ||||
| 
 | ||||
| AVIR Copyright (c) 2015-2019 Aleksey Vaneev | ||||
| 
 | ||||
| Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
| of this software and associated documentation files (the "Software"), to deal | ||||
| in the Software without restriction, including without limitation the rights | ||||
| to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||||
| copies of the Software, and to permit persons to whom the Software is | ||||
| furnished to do so, subject to the following conditions: | ||||
| 
 | ||||
| The above copyright notice and this permission notice shall be included in all | ||||
| copies or substantial portions of the Software. | ||||
| 
 | ||||
| THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||||
| IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||||
| FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||||
| AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||||
| LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||||
| OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||||
| SOFTWARE. | ||||
| 
 | ||||
| Please credit the author of this library in your documentation in the | ||||
| following way: "AVIR image resizing algorithm designed by Aleksey Vaneev" | ||||
							
								
								
									
										5
									
								
								third_party/avir/README.cosmo
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										5
									
								
								third_party/avir/README.cosmo
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,5 @@ | |||
| commit 7dd9515ef6aed6fb6d565ee12754703bdc46b3b0 | ||||
| Author: Aleksey Vaneev <aleksey.vaneev@gmail.com> | ||||
| Date:   Mon Jul 29 07:43:23 2019 +0300 | ||||
| 
 | ||||
|     Version 2.4 release. | ||||
							
								
								
									
										367
									
								
								third_party/avir/README.md
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										367
									
								
								third_party/avir/README.md
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,367 @@ | |||
| # AVIR # | ||||
| ## Introduction ## | ||||
| Keywords: image resize, image resizer, image resizing, image scaling, | ||||
| image scaler, image resize c++, image resizer c++ | ||||
| 
 | ||||
| Please consider supporting the author on [Patreon](https://www.patreon.com/aleksey_vaneev). | ||||
| 
 | ||||
| Me, Aleksey Vaneev, is happy to offer you an open source image resizing / | ||||
| scaling library which has reached a production level of quality, and is | ||||
| ready to be incorporated into any project. This library features routines | ||||
| for both down- and upsizing of 8- and 16-bit, 1 to 4-channel images. Image | ||||
| resizing routines were implemented in multi-platform C++ code, and have a | ||||
| high level of optimality. Beside resizing, this library offers a sub-pixel | ||||
| shift operation. Built-in sRGB gamma correction is available. | ||||
| 
 | ||||
| The resizing algorithm at first produces 2X upsized image (relative to the | ||||
| source image size, or relative to the destination image size if downsizing is | ||||
| performed) and then performs interpolation using a bank of sinc function-based | ||||
| fractional delay filters. At the last stage a correction filter is applied | ||||
| which fixes smoothing introduced at previous steps. | ||||
| 
 | ||||
| The resizing algorithm was designed to provide the best visual quality. The | ||||
| author even believes this algorithm provides the "ultimate" level of | ||||
| quality (for an orthogonal resizing) which cannot be increased further: no | ||||
| math exists to provide a better frequency response, better anti-aliasing | ||||
| quality and at the same time having less ringing artifacts: these are 3 | ||||
| elements that define any resizing algorithm's quality; in AVIR practice these | ||||
| elements have a high correlation to each other, so they can be represented by | ||||
| a single parameter (AVIR offers several parameter sets with varying quality). | ||||
| Algorithm's time performance turned out to be very good as well (for the | ||||
| "ultimate" image quality). | ||||
| 
 | ||||
| An important element utilized by this algorithm is the so called Peaked Cosine | ||||
| window function, which is applied over sinc function in all filters. Please | ||||
| consult the documentation for more details. | ||||
| 
 | ||||
| Note that since AVIR implements orthogonal resizing, it may exhibit diagonal | ||||
| aliasing artifacts. These artifacts are usually suppressed by EWA or radial | ||||
| filtering techniques. EWA-like technique is not implemented in AVIR, because | ||||
| it requires considerably more computing resources and may produce a blurred | ||||
| image. | ||||
| 
 | ||||
| As a bonus, a faster `LANCIR` image resizing algorithm is also offered as a | ||||
| part of this library. But the main focus of this documentation is the original | ||||
| AVIR image resizing algorithm. | ||||
| 
 | ||||
| AVIR does not offer affine and non-linear image transformations "out of the | ||||
| box". Since upsizing is a relatively fast operation in AVIR (required time | ||||
| scales linearly with the output image area), affine and non-linear | ||||
| transformations can be implemented in steps: 4- to 8-times upsizing, | ||||
| transformation via bilinear interpolation, downsizing (linear proportional | ||||
| affine transformations can probably skip the downsizing step). This should not | ||||
| compromise the transformation quality much as bilinear interpolation's | ||||
| problems will mostly reside in spectral area without useful signal, with a | ||||
| maximum of 0.7 dB high-frequency attenuation for 4-times upsizing, and 0.17 dB | ||||
| attenuation for 8-times upsizing. This approach is probably as time efficient | ||||
| as performing a high-quality transform over the input image directly (the only | ||||
| serious drawback is the increased memory requirement). Note that affine | ||||
| transformations that change image proportions should first apply proportion | ||||
| change during upsizing. | ||||
| 
 | ||||
| *AVIR is devoted to women. Your digital photos can look good at any size!* | ||||
| 
 | ||||
| ## Requirements ## | ||||
| C++ compiler and system with efficient "float" floating point (24-bit | ||||
| mantissa) type support. This library can also internally use the "double" and | ||||
| SIMD floating point types during resizing if needed. This library does not | ||||
| have dependencies beside the standard C library. | ||||
| 
 | ||||
| ## Links ## | ||||
| * [Documentation](https://www.voxengo.com/public/avir/Documentation/) | ||||
| 
 | ||||
| ## Usage Information ## | ||||
| The image resizer is represented by the `avir::CImageResizer<>` class, which | ||||
| is a single front-end class for the whole library. Basically, you do not need | ||||
| to use nor understand any other classes beside this class. | ||||
| 
 | ||||
| The code of the library resides in the "avir" C++ namespace, effectively | ||||
| isolating it from all other code. The code is thread-safe. You need just | ||||
| a single resizer object per running application, at any time, even when | ||||
| resizing images concurrently. | ||||
| 
 | ||||
| To resize images in your application, simply add 3 lines of code: | ||||
| 
 | ||||
|     #include "avir.h" | ||||
|     avir :: CImageResizer<> ImageResizer( 8 ); | ||||
|     ImageResizer.resizeImage( InBuf, 640, 480, 0, OutBuf, 1024, 768, 3, 0 ); | ||||
|     (multi-threaded operation requires additional coding, see the documentation) | ||||
| 
 | ||||
| For low-ringing performance: | ||||
| 
 | ||||
|     avir :: CImageResizer<> ImageResizer( 8, 0, avir :: CImageResizerParamsLR() ); | ||||
| 
 | ||||
| To use the built-in gamma correction, an object of the | ||||
| `avir::CImageResizerVars` class with its variable `UseSRGBGamma` set to "true" | ||||
| should be supplied to the `resizeImage()` function. Note that the gamma | ||||
| correction is applied to all channels (e.g. alpha-channel) in the current | ||||
| implementation. | ||||
| 
 | ||||
|     avir :: CImageResizerVars Vars; | ||||
|     Vars.UseSRGBGamma = true; | ||||
| 
 | ||||
| Dithering (error-diffusion dither which is perceptually good) can be enabled | ||||
| this way: | ||||
| 
 | ||||
|     typedef avir :: fpclass_def< float, float, | ||||
|         avir :: CImageResizerDithererErrdINL< float > > fpclass_dith; | ||||
|     avir :: CImageResizer< fpclass_dith > ImageResizer( 8 ); | ||||
| 
 | ||||
| The library is able to process images of any bit depth: this includes 8-bit, | ||||
| 16-bit, float and double types. Larger integer and signed integer types are | ||||
| not supported. Supported source and destination image sizes are only limited | ||||
| by the available system memory. | ||||
| 
 | ||||
| The code of this library was commented in the [Doxygen](http://www.doxygen.org/) | ||||
| style. To generate the documentation locally you may run the | ||||
| `doxygen ./other/avirdoxy.txt` command from the library's directory. Note that | ||||
| the code was suitably documented allowing you to make modifications, and to | ||||
| gain full understanding of the algorithm. | ||||
| 
 | ||||
| Preliminary tests show that this library (compiled with Intel C++ Compiler | ||||
| 18.2 with AVX2 instructions enabled, without explicit SIMD resizing code) can | ||||
| resize 8-bit RGB 5184x3456 (17.9 Mpixel) 3-channel image down to 1920x1280 | ||||
| (2.5 Mpixel) image in 245 milliseconds, utilizing a single thread, on Intel | ||||
| Core i7-7700K processor-based system without overclocking. This scales down to | ||||
| 74 milliseconds if 8 threads are utilized. | ||||
| 
 | ||||
| Multi-threaded operation is not provided by this library "out of the box". | ||||
| The multi-threaded (horizontally-threaded) infrastructure is available, but | ||||
| requires additional system-specific interfacing code for engagement. | ||||
| 
 | ||||
| ## SIMD Usage Information ## | ||||
| This library is capable of using SIMD floating point types for internal | ||||
| variables. This means that up to 4 color channels can be processed in | ||||
| parallel. Since the default interleaved processing algorithm itself remains | ||||
| non-SIMD, the use of SIMD internal types is not practical for 1- and 2-channel | ||||
| image resizing (due to overhead). SIMD internal type can be used this way: | ||||
| 
 | ||||
|     #include "avir_float4_sse.h" | ||||
|     avir :: CImageResizer< avir :: fpclass_float4 > ImageResizer( 8 ); | ||||
| 
 | ||||
| For 1-channel and 2-channel image resizing when AVX instructions are allowed | ||||
| it may be reasonable to utilize de-interleaved SIMD processing algorithm. | ||||
| While it gives no performance benefit if the "float4" SSE processing type is | ||||
| used, it offers some performance boost if the "float8" AVX processing type is | ||||
| used (given dithering is not performed, or otherwise performance is reduced at | ||||
| the dithering stage since recursive dithering cannot be parallelized). The | ||||
| internal type remains non-SIMD "float". De-interleaved algorithm can be used | ||||
| this way: | ||||
| 
 | ||||
|     #include "avir_float8_avx.h" | ||||
|     avir :: CImageResizer< avir :: fpclass_float8_dil > ImageResizer( 8 ); | ||||
| 
 | ||||
| It's important to note that on the latest Intel processors (i7-7700K and | ||||
| probably later) the use of the aforementioned SIMD-specific resizing code may | ||||
| not be justifiable, or may be even counter-productive due to many factors: | ||||
| memory bandwidth bottleneck, increased efficiency of processor's circuitry | ||||
| utilization and out-of-order execution, automatic SIMD optimizations performed | ||||
| by the compiler. This is at least true when compiling 64-bit code with Intel | ||||
| C++ Compiler 18.2 with /QxSSE4.2, or especially with the /QxCORE-AVX2 option. | ||||
| SSE-specific resizing code may still be a little bit more efficient for | ||||
| 4-channel image resizing. | ||||
| 
 | ||||
| ## Notes ## | ||||
| This library was tested for compatibility with [GNU C++](http://gcc.gnu.org/), | ||||
| [Microsoft Visual C++](http://www.microsoft.com/visualstudio/eng/products/visual-studio-express-products) | ||||
| and [Intel C++](http://software.intel.com/en-us/c-compilers) compilers, on 32- | ||||
| and 64-bit Windows, macOS and CentOS Linux. The code was also tested with | ||||
| Dr.Memory/Win32 for the absence of uninitialized or unaddressable memory | ||||
| accesses. | ||||
| 
 | ||||
| All code is fully "inline", without the need to compile any source files. The | ||||
| memory footprint of the library itself is very modest, except that the size of | ||||
| the temporary image buffers depends on the input and output image sizes, and | ||||
| is proportionally large. | ||||
| 
 | ||||
| The "heart" of resizing algorithm's quality resides in the parameters defined | ||||
| via the `avir::CImageResizerParams` structure. While the default set of | ||||
| parameters that offers a good quality was already provided, there is | ||||
| (probably) still a place for improvement exists, and the default parameters | ||||
| may change in a future update. If you need to recall an exact set of | ||||
| parameters, simply save them locally for a later use. | ||||
| 
 | ||||
| When the algorithm is run with no resizing applied (k=1), the result of | ||||
| resizing will not be an exact, but a very close copy of the source image. The | ||||
| reason for such inexactness is that the image is always low-pass filtered at | ||||
| first to reduce aliasing during subsequent resizing, and at last filtered by a | ||||
| correction filter. Such approach allows algorithm to maintain a stable level | ||||
| of quality regardless of the resizing "k" factor used. | ||||
| 
 | ||||
| This library includes a binary command line tool "imageresize" for major | ||||
| desktop platforms. This tool was designed to be used as a demonstration of | ||||
| library's performance, and as a reference, it is multi-threaded (the `-t` | ||||
| switch can be used to control the number of threads utilized). This tool uses | ||||
| plain "float" processing (no explicit SIMD) and relies on automatic compiler | ||||
| optimization (with Win64 binary being the "main" binary as it was compiled | ||||
| with the best ICC optimization options for the time being). This tool uses the | ||||
| following libraries: | ||||
| * turbojpeg Copyright (c) 2009-2013 D. R. Commander | ||||
| * libpng Copyright (c) 1998-2013 Glenn Randers-Pehrson | ||||
| * zlib Copyright (c) 1995-2013 Jean-loup Gailly and Mark Adler | ||||
| 
 | ||||
| Note that you can enable gamma-correction with the `-g` switch. However, | ||||
| sometimes gamma-correction produces "greenish/reddish/bluish haze" since | ||||
| low-amplitude oscillations produced by resizing at object boundaries are | ||||
| amplified by gamma correction. This can also have an effect of reduced | ||||
| contrast. | ||||
| 
 | ||||
| ## Interpolation Discussion ## | ||||
| The use of certain low-pass filters and 2X upsampling in this library is | ||||
| hardly debatable, because they are needed to attain a certain anti-aliasing | ||||
| effect and keep ringing artifacts low. But the use of sinc function-based | ||||
| interpolation filter that is 18 taps-long (may be higher, up to 36 taps in | ||||
| practice) can be questioned, because even in 0th order case such | ||||
| interpolation filter requires 18 multiply-add operations. Comparatively, an | ||||
| optimal Hermite or cubic interpolation spline requires 8 multiply and 11 add | ||||
| operations. | ||||
| 
 | ||||
| One of the reasons 18-tap filter is preferred, is because due to memory | ||||
| bandwidth limitations using a lower-order filter does not provide any | ||||
| significant performance increase (e.g. 14-tap filter is less than 5% more | ||||
| efficient overall). At the same time, in comparison to cubic spline, 18-tap | ||||
| filter embeds a low-pass filter that rejects signal above 0.5\*pi (provides | ||||
| additional anti-aliasing filtering), and this filter has a consistent shape at | ||||
| all fractional offsets. Splines have a varying low-pass filter shape at | ||||
| different fractional offsets (e.g. no low-pass filtering at 0.0 offset, | ||||
| and maximal low-pass filtering at 0.5 offset). 18-tap filter also offers a | ||||
| superior stop-band attenuation which almost guarantees absence of artifacts if | ||||
| the image is considerably sharpened afterwards. | ||||
| 
 | ||||
| ## Why 2X upsizing in AVIR? ## | ||||
| Classic approaches to image resizing do not perform an additional 2X upsizing. | ||||
| So, why such upsizing is needed at all in AVIR? Indeed, image resizing can be | ||||
| implemented using a single interpolation filter which is applied to the source | ||||
| image directly. However, such approach has limitations: | ||||
| 
 | ||||
| First of all, speaking about non-2X-upsized resizing, during upsizing the | ||||
| interpolation filter has to be tuned to a frequency close to pi (Nyquist) in | ||||
| order to reduce high-frequency smoothing: this reduces the space left for | ||||
| filter optimization. Beside that, during downsizing, a filter that performs | ||||
| well and predictable when tuned to frequencies close to the Nyquist frequency, | ||||
| may become distorted in its spectral shape when it is tuned to lower | ||||
| frequencies. That is why it is usually a good idea to have filter's stop-band | ||||
| begin below Nyquist so that the transition band's shape remains stable at any | ||||
| lower-frequency setting. At the same time, this requirement complicates a | ||||
| further corrective filtering, because correction filter may become too steep | ||||
| at the point where the stop-band begins. | ||||
| 
 | ||||
| Secondly, speaking about non-2X-upsized resizing, filter has to be very short | ||||
| (with a base length of 5-7 taps, further multiplied by the resizing factor) or | ||||
| otherwise the ringing artifacts will be very strong: it is a general rule that | ||||
| the steeper the filter is around signal frequencies being removed the higher | ||||
| the ringing artifacts are. That is why it is preferred to move steep | ||||
| transitions into the spectral area with a quieter signal. A short filter also | ||||
| means it cannot provide a strong "beyond-Nyquist" stop-band attenuation, so an | ||||
| interpolated image will look a bit edgy or not very clean due to stop-band | ||||
| artifacts. | ||||
| 
 | ||||
| To sum up, only additional controlled 2X upsizing provides enough spectral | ||||
| space to design interpolation filter without visible ringing artifacts yet | ||||
| providing a strong stop-band attenuation and stable spectral characteristics | ||||
| (good at any resizing "k" factor). Moreover, 2X upsizing becomes very | ||||
| important in maintaining a good resizing quality when downsizing and upsizing | ||||
| by small "k" factors, in the range 0.5 to 2: resizing approaches that do not | ||||
| perform 2X upsizing usually cannot design a good interpolation filter for such | ||||
| factors just because there is not enough spectral space available. | ||||
| 
 | ||||
| ## Why Peaked Cosine in AVIR? ## | ||||
| First of all, AVIR is a general solution to image resizing problem. That is | ||||
| why it should not be directly compared to "spline interpolation" or "Lanczos | ||||
| resampling", because the latter two are only means to design interpolation | ||||
| filters, and they can be implemented in a variety of ways, even in sub-optimal | ||||
| ways. Secondly, with only a minimal effort AVIR can be changed to use any | ||||
| existing interpolation formula and any window function, but this is just not | ||||
| needed. | ||||
| 
 | ||||
| An effort was made to compare Peaked Cosine to Lanczos window function, and | ||||
| here is the author's opinion. Peaked Cosine has two degrees of freedom whereas | ||||
| Lanczos has one degree of freedom. While both functions can be used with | ||||
| acceptable results, Peaked Cosine window function used in automatic parameter | ||||
| optimization really pushes the limits of frequency response linearity, | ||||
| anti-aliasing strength (stop-band attenuation) and low-ringing performance | ||||
| which Lanczos cannot usually achieve. This is true at least when using a | ||||
| general-purpose downhill simplex optimization method. Lanczos window has good | ||||
| (but not better) characteristics in several special cases (certain "k" | ||||
| factors) which makes it of limited use in a general solution such as AVIR. | ||||
| 
 | ||||
| Among other window functions (Kaiser, Gaussian, Cauchy, Poisson, generalized | ||||
| cosine windows) there are no better candidates as well. It looks like Peaked | ||||
| Cosine function's scalability (it retains stable, almost continously-variable | ||||
| spectral characteristics at any window parameter values), and its ability to | ||||
| create "desirable" pass-band ripple in the frequency response near the cutoff | ||||
| point contribute to its better overall quality. Somehow Peaked Cosine window | ||||
| function optimization manages to converge to reasonable states in most cases | ||||
| (that is why AVIR library comes with a set of equally robust, but distinctive | ||||
| parameter sets) whereas all other window functions tend to produce | ||||
| unpredictable optimization results. | ||||
| 
 | ||||
| The only disadvantage of Peaked Cosine window function is that usable filters | ||||
| windowed by this function tend to be longer than "usual" (with Kaiser window | ||||
| being the "golden standard" for filter length per decibel of stop-band | ||||
| attenuation). This is a price that should be paid for stable spectral | ||||
| characteristics. | ||||
| 
 | ||||
| ## LANCIR ## | ||||
| 
 | ||||
| As a part of AVIR library, the `CLancIR` class is also offered which is an | ||||
| optimal implementation of *Lanczos* image resizing filter. This class has a | ||||
| similar programmatic interface to AVIR, but it is not thread-safe: each | ||||
| executing thread should have its own `CLancIR` object. This class was designed | ||||
| for cases of batch processing of same-sized frames like in video encoding. | ||||
| 
 | ||||
| LANCIR offers up to 200% faster image resizing in comparison to AVIR. The | ||||
| quality difference is, however, debatable. Note that while LANCIR can take | ||||
| 8- and 16-bit and float image buffers, its precision is limited to 8-bit | ||||
| resizing. | ||||
| 
 | ||||
| LANCIR should be seen as a bonus and as some kind of quality comparison. | ||||
| LANCIR uses Lanczos filter "a" parameter equal to 3 which is similar to AVIR's | ||||
| default setting. | ||||
| 
 | ||||
| ## Change log ## | ||||
| Version 2.4: | ||||
| 
 | ||||
| * Removed outdated `_mm_reset()` function calls from the SIMD code. | ||||
| * Changed `float4 round()` to use SSE2 rounding features, avoiding use of | ||||
| 64-bit registers. | ||||
| 
 | ||||
| Version 2.3: | ||||
| 
 | ||||
| * Implemented CLancIR image resizing algorithm. | ||||
| * Fixed a minor image offset on image upsizing. | ||||
| 
 | ||||
| Version 2.2: | ||||
| 
 | ||||
| * Released AVIR under a permissive MIT license agreement. | ||||
| 
 | ||||
| Version 2.1: | ||||
| 
 | ||||
| * Fixed error-diffusion dither problems introduced in the previous version. | ||||
| * Added the `-1` switch to the `imageresize` to enable 1-bit output for | ||||
| dither's quality evaluation (use together with the `-d` switch). | ||||
| * Added the `--algparams=` switch to the `imageresize` to control resizing | ||||
| quality (replaces the `--low-ring` switch). | ||||
| * Added `avir :: CImageResizerParamsULR` parameter set for lowest-ringing | ||||
| performance possible (not considerably different to | ||||
| `avir :: CImageResizerParamsLR`, but a bit lower ringing). | ||||
| 
 | ||||
| Version 2.0: | ||||
| 
 | ||||
| * Minor inner loop optimizations. | ||||
| * Lifted the supported image size constraint by switching buffer addressing to | ||||
| `size_t` from `int`, now image size is limited by the available system memory. | ||||
| * Added several useful switches to the `imageresize` utility. | ||||
| * Now `imageresize` does not apply gamma-correction by default. | ||||
| * Fixed scaling of bit depth-reduction operation. | ||||
| * Improved error-diffusion dither's signal-to-noise ratio. | ||||
| * Compiled binaries with AVX2 instruction set (SSE4 for macOS). | ||||
| 
 | ||||
| ## Users ## | ||||
| This library is used by: | ||||
| 
 | ||||
|   * [Contaware.com](http://www.contaware.com/) | ||||
| 
 | ||||
| Please drop me a note at aleksey.vaneev@gmail.com and I will include a link to | ||||
| your software product to the list of users. This list is important at | ||||
| maintaining confidence in this library among the interested parties. | ||||
							
								
								
									
										17065
									
								
								third_party/avir/avir.h
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										17065
									
								
								third_party/avir/avir.h
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
							
								
								
									
										71
									
								
								third_party/avir/avir.mk
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										71
									
								
								third_party/avir/avir.mk
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,71 @@ | |||
| #-*-mode:makefile-gmake;indent-tabs-mode:t;tab-width:8;coding:utf-8-*-┐
 | ||||
| #───vi: set et ft=make ts=8 tw=8 fenc=utf-8 :vi───────────────────────┘
 | ||||
| 
 | ||||
| PKGS += THIRD_PARTY_AVIR | ||||
| 
 | ||||
| THIRD_PARTY_AVIR_ARTIFACTS += THIRD_PARTY_AVIR_A | ||||
| THIRD_PARTY_AVIR = $(THIRD_PARTY_AVIR_A_DEPS) $(THIRD_PARTY_AVIR_A) | ||||
| THIRD_PARTY_AVIR_A = o/$(MODE)/third_party/avir/avir.a | ||||
| THIRD_PARTY_AVIR_A_CHECKS = $(THIRD_PARTY_AVIR_A).pkg | ||||
| THIRD_PARTY_AVIR_A_FILES := $(wildcard third_party/avir/*) | ||||
| THIRD_PARTY_AVIR_A_SRCS_S = $(filter %.S,$(THIRD_PARTY_AVIR_A_FILES)) | ||||
| THIRD_PARTY_AVIR_A_SRCS_C = $(filter %.c,$(THIRD_PARTY_AVIR_A_FILES)) | ||||
| THIRD_PARTY_AVIR_A_SRCS_X = $(filter %.cc,$(THIRD_PARTY_AVIR_A_FILES)) | ||||
| 
 | ||||
| THIRD_PARTY_AVIR_A_HDRS =				\
 | ||||
| 	$(filter %.h,$(THIRD_PARTY_AVIR_A_FILES))	\
 | ||||
| 	$(filter %.hpp,$(THIRD_PARTY_AVIR_A_FILES)) | ||||
| 
 | ||||
| THIRD_PARTY_AVIR_A_SRCS =				\
 | ||||
| 	$(THIRD_PARTY_AVIR_A_SRCS_S)			\
 | ||||
| 	$(THIRD_PARTY_AVIR_A_SRCS_C)			\
 | ||||
| 	$(THIRD_PARTY_AVIR_A_SRCS_X) | ||||
| 
 | ||||
| THIRD_PARTY_AVIR_A_OBJS =				\
 | ||||
| 	$(THIRD_PARTY_AVIR_A_SRCS:%=o/$(MODE)/%.zip.o)	\
 | ||||
| 	$(THIRD_PARTY_AVIR_A_SRCS_S:%.S=o/$(MODE)/%.o)	\
 | ||||
| 	$(THIRD_PARTY_AVIR_A_SRCS_C:%.c=o/$(MODE)/%.o)	\
 | ||||
| 	$(THIRD_PARTY_AVIR_A_SRCS_X:%.cc=o/$(MODE)/%.o) | ||||
| 
 | ||||
| THIRD_PARTY_AVIR_A_DIRECTDEPS =				\
 | ||||
| 	DSP_CORE					\
 | ||||
| 	LIBC_NEXGEN32E					\
 | ||||
| 	LIBC_BITS					\
 | ||||
| 	LIBC_MEM					\
 | ||||
| 	LIBC_CALLS					\
 | ||||
| 	LIBC_STUBS					\
 | ||||
| 	LIBC_SYSV					\
 | ||||
| 	LIBC_FMT					\
 | ||||
| 	LIBC_UNICODE					\
 | ||||
| 	LIBC_LOG					\
 | ||||
| 	LIBC_TINYMATH | ||||
| 
 | ||||
| $(THIRD_PARTY_AVIR_A).pkg:				\ | ||||
| 		$(THIRD_PARTY_AVIR_A_OBJS)		\
 | ||||
| 		$(foreach x,$(THIRD_PARTY_AVIR_A_DIRECTDEPS),$($(x)_A).pkg) | ||||
| 
 | ||||
| $(THIRD_PARTY_AVIR_A):					\ | ||||
| 		third_party/avir/			\
 | ||||
| 		$(THIRD_PARTY_AVIR_A).pkg		\
 | ||||
| 		$(THIRD_PARTY_AVIR_A_OBJS) | ||||
| 
 | ||||
| #o/$(MODE)/third_party/avir/lanczos1b.o:			\
 | ||||
| 		CXX = clang++-10 | ||||
| 
 | ||||
| o/$(MODE)/third_party/avir/lanczos1b.o			\ | ||||
| o/$(MODE)/third_party/avir/lanczos.o:			\ | ||||
| 		OVERRIDE_CXXFLAGS +=			\
 | ||||
| 			$(MATHEMATICAL) | ||||
| 
 | ||||
| THIRD_PARTY_AVIR_A_DEPS :=				\
 | ||||
| 	$(call uniq,$(foreach x,$(THIRD_PARTY_AVIR_A_DIRECTDEPS),$($(x)))) | ||||
| 
 | ||||
| THIRD_PARTY_AVIR_LIBS = $(foreach x,$(THIRD_PARTY_AVIR_ARTIFACTS),$($(x))) | ||||
| THIRD_PARTY_AVIR_SRCS = $(foreach x,$(THIRD_PARTY_AVIR_ARTIFACTS),$($(x)_SRCS)) | ||||
| THIRD_PARTY_AVIR_HDRS = $(foreach x,$(THIRD_PARTY_AVIR_ARTIFACTS),$($(x)_HDRS)) | ||||
| THIRD_PARTY_AVIR_CHECKS = $(foreach x,$(THIRD_PARTY_AVIR_ARTIFACTS),$($(x)_CHECKS)) | ||||
| THIRD_PARTY_AVIR_OBJS = $(foreach x,$(THIRD_PARTY_AVIR_ARTIFACTS),$($(x)_OBJS)) | ||||
| THIRD_PARTY_AVIR_TESTS = $(foreach x,$(THIRD_PARTY_AVIR_ARTIFACTS),$($(x)_TESTS)) | ||||
| 
 | ||||
| .PHONY: o/$(MODE)/third_party/avir | ||||
| o/$(MODE)/third_party/avir: $(THIRD_PARTY_AVIR_A_CHECKS) | ||||
							
								
								
									
										18
									
								
								third_party/avir/avir1.h
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								third_party/avir/avir1.h
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,18 @@ | |||
| #ifndef COSMOPOLITAN_THIRD_PARTY_AVIR_AVIR1_H_ | ||||
| #define COSMOPOLITAN_THIRD_PARTY_AVIR_AVIR1_H_ | ||||
| #if !(__ASSEMBLER__ + __LINKER__ + 0) | ||||
| COSMOPOLITAN_C_START_ | ||||
| 
 | ||||
| struct avir1 { | ||||
|   void *p; | ||||
| }; | ||||
| 
 | ||||
| void avir1init(struct avir1 *self); | ||||
| void avir1free(struct avir1 *self); | ||||
| void avir1(struct avir1 *resizer, size_t dyn, size_t dxn, void *dst, | ||||
|            size_t dstsize, size_t syn, size_t sxn, size_t ssw, const void *src, | ||||
|            size_t srcsize); | ||||
| 
 | ||||
| COSMOPOLITAN_C_END_ | ||||
| #endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ | ||||
| #endif /* COSMOPOLITAN_THIRD_PARTY_AVIR_AVIR1_H_ */ | ||||
							
								
								
									
										1013
									
								
								third_party/avir/avir_dil.h
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										1013
									
								
								third_party/avir/avir_dil.h
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
							
								
								
									
										323
									
								
								third_party/avir/avir_float4_sse.h
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										323
									
								
								third_party/avir/avir_float4_sse.h
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,323 @@ | |||
| /* clang-format off */ | ||||
| //$ nobt
 | ||||
| //$ nocpp
 | ||||
| 
 | ||||
| /**
 | ||||
|  * @file avir_float4_sse.h | ||||
|  * | ||||
|  * @brief Inclusion file for the "float4" type. | ||||
|  * | ||||
|  * This file includes the "float4" SSE-based type used for SIMD variable | ||||
|  * storage and processing. | ||||
|  * | ||||
|  * AVIR Copyright (c) 2015-2019 Aleksey Vaneev | ||||
|  */ | ||||
| 
 | ||||
| #ifndef AVIR_FLOAT4_SSE_INCLUDED | ||||
| #define AVIR_FLOAT4_SSE_INCLUDED | ||||
| 
 | ||||
| #include "third_party/avir/avir.h" | ||||
| #include "libc/bits/mmintrin.h" | ||||
| #include "libc/bits/xmmintrin.h" | ||||
| #include "libc/bits/xmmintrin.h" | ||||
| #include "libc/bits/emmintrin.h" | ||||
| 
 | ||||
| namespace avir { | ||||
| 
 | ||||
| /**
 | ||||
|  * @brief SIMD packed 4-float type. | ||||
|  * | ||||
|  * This class implements a packed 4-float type that can be used to perform | ||||
|  * parallel computation using SIMD instructions on SSE-enabled processors. | ||||
|  * This class can be used as the "fptype" argument of the avir::fpclass_def | ||||
|  * class. | ||||
|  */ | ||||
| 
 | ||||
| class float4 | ||||
| { | ||||
| public: | ||||
| 	float4() | ||||
| 	{ | ||||
| 	} | ||||
| 
 | ||||
| 	float4( const float4& s ) | ||||
| 		: value( s.value ) | ||||
| 	{ | ||||
| 	} | ||||
| 
 | ||||
| 	float4( const __m128 s ) | ||||
| 		: value( s ) | ||||
| 	{ | ||||
| 	} | ||||
| 
 | ||||
| 	float4( const float s ) | ||||
| 		: value( _mm_set1_ps( s )) | ||||
| 	{ | ||||
| 	} | ||||
| 
 | ||||
| 	float4& operator = ( const float4& s ) | ||||
| 	{ | ||||
| 		value = s.value; | ||||
| 		return( *this ); | ||||
| 	} | ||||
| 
 | ||||
| 	float4& operator = ( const __m128 s ) | ||||
| 	{ | ||||
| 		value = s; | ||||
| 		return( *this ); | ||||
| 	} | ||||
| 
 | ||||
| 	float4& operator = ( const float s ) | ||||
| 	{ | ||||
| 		value = _mm_set1_ps( s ); | ||||
| 		return( *this ); | ||||
| 	} | ||||
| 
 | ||||
| 	operator float () const | ||||
| 	{ | ||||
| 		return( _mm_cvtss_f32( value )); | ||||
| 	} | ||||
| 
 | ||||
| 	/**
 | ||||
| 	 * @param p Pointer to memory from where the value should be loaded, | ||||
| 	 * should be 16-byte aligned. | ||||
| 	 * @return float4 value loaded from the specified memory location. | ||||
| 	 */ | ||||
| 
 | ||||
| 	static float4 load( const float* const p ) | ||||
| 	{ | ||||
| 		return( _mm_load_ps( p )); | ||||
| 	} | ||||
| 
 | ||||
| 	/**
 | ||||
| 	 * @param p Pointer to memory from where the value should be loaded, | ||||
| 	 * may have any alignment. | ||||
| 	 * @return float4 value loaded from the specified memory location. | ||||
| 	 */ | ||||
| 
 | ||||
| 	static float4 loadu( const float* const p ) | ||||
| 	{ | ||||
| 		return( _mm_loadu_ps( p )); | ||||
| 	} | ||||
| 
 | ||||
| 	/**
 | ||||
| 	 * @param p Pointer to memory from where the value should be loaded, | ||||
| 	 * may have any alignment. | ||||
| 	 * @param lim The maximum number of elements to load, >0. | ||||
| 	 * @return float4 value loaded from the specified memory location, with | ||||
| 	 * elements beyond "lim" set to 0. | ||||
| 	 */ | ||||
| 
 | ||||
| 	static float4 loadu( const float* const p, int lim ) | ||||
| 	{ | ||||
| 		if( lim > 2 ) | ||||
| 		{ | ||||
| 			if( lim > 3 ) | ||||
| 			{ | ||||
| 				return( _mm_loadu_ps( p )); | ||||
| 			} | ||||
| 			else | ||||
| 			{ | ||||
| 				return( _mm_set_ps( 0.0f, p[ 2 ], p[ 1 ], p[ 0 ])); | ||||
| 			} | ||||
| 		} | ||||
| 		else | ||||
| 		{ | ||||
| 			if( lim == 2 ) | ||||
| 			{ | ||||
| 				return( _mm_set_ps( 0.0f, 0.0f, p[ 1 ], p[ 0 ])); | ||||
| 			} | ||||
| 			else | ||||
| 			{ | ||||
| 				return( _mm_load_ss( p )); | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	/**
 | ||||
| 	 * Function stores *this value to the specified memory location. | ||||
| 	 * | ||||
| 	 * @param[out] p Output memory location, should be 16-byte aligned. | ||||
| 	 */ | ||||
| 
 | ||||
| 	void store( float* const p ) const | ||||
| 	{ | ||||
| 		_mm_store_ps( p, value ); | ||||
| 	} | ||||
| 
 | ||||
| 	/**
 | ||||
| 	 * Function stores *this value to the specified memory location. | ||||
| 	 * | ||||
| 	 * @param[out] p Output memory location, may have any alignment. | ||||
| 	 */ | ||||
| 
 | ||||
| 	void storeu( float* const p ) const | ||||
| 	{ | ||||
| 		_mm_storeu_ps( p, value ); | ||||
| 	} | ||||
| 
 | ||||
| 	/**
 | ||||
| 	 * Function stores "lim" lower elements of *this value to the specified | ||||
| 	 * memory location. | ||||
| 	 * | ||||
| 	 * @param[out] p Output memory location, may have any alignment. | ||||
| 	 * @param lim The number of lower elements to store, >0. | ||||
| 	 */ | ||||
| 
 | ||||
| 	void storeu( float* const p, int lim ) const | ||||
| 	{ | ||||
| 		if( lim > 2 ) | ||||
| 		{ | ||||
| 			if( lim > 3 ) | ||||
| 			{ | ||||
| 				_mm_storeu_ps( p, value ); | ||||
| 			} | ||||
| 			else | ||||
| 			{ | ||||
| 				_mm_storel_pi( (__m64*) p, value ); | ||||
| 				_mm_store_ss( p + 2, _mm_movehl_ps( value, value )); | ||||
| 			} | ||||
| 		} | ||||
| 		else | ||||
| 		{ | ||||
| 			if( lim == 2 ) | ||||
| 			{ | ||||
| 				_mm_storel_pi( (__m64*) p, value ); | ||||
| 			} | ||||
| 			else | ||||
| 			{ | ||||
| 				_mm_store_ss( p, value ); | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	float4& operator += ( const float4& s ) | ||||
| 	{ | ||||
| 		value = _mm_add_ps( value, s.value ); | ||||
| 		return( *this ); | ||||
| 	} | ||||
| 
 | ||||
| 	float4& operator -= ( const float4& s ) | ||||
| 	{ | ||||
| 		value = _mm_sub_ps( value, s.value ); | ||||
| 		return( *this ); | ||||
| 	} | ||||
| 
 | ||||
| 	float4& operator *= ( const float4& s ) | ||||
| 	{ | ||||
| 		value = _mm_mul_ps( value, s.value ); | ||||
| 		return( *this ); | ||||
| 	} | ||||
| 
 | ||||
| 	float4& operator /= ( const float4& s ) | ||||
| 	{ | ||||
| 		value = _mm_div_ps( value, s.value ); | ||||
| 		return( *this ); | ||||
| 	} | ||||
| 
 | ||||
| 	float4 operator + ( const float4& s ) const | ||||
| 	{ | ||||
| 		return( _mm_add_ps( value, s.value )); | ||||
| 	} | ||||
| 
 | ||||
| 	float4 operator - ( const float4& s ) const | ||||
| 	{ | ||||
| 		return( _mm_sub_ps( value, s.value )); | ||||
| 	} | ||||
| 
 | ||||
| 	float4 operator * ( const float4& s ) const | ||||
| 	{ | ||||
| 		return( _mm_mul_ps( value, s.value )); | ||||
| 	} | ||||
| 
 | ||||
| 	float4 operator / ( const float4& s ) const | ||||
| 	{ | ||||
| 		return( _mm_div_ps( value, s.value )); | ||||
| 	} | ||||
| 
 | ||||
| 	/**
 | ||||
| 	 * @return Horizontal sum of elements. | ||||
| 	 */ | ||||
| 
 | ||||
| 	float hadd() const | ||||
| 	{ | ||||
| 		const __m128 v = _mm_add_ps( value, _mm_movehl_ps( value, value )); | ||||
| 		const __m128 res = _mm_add_ss( v, _mm_shuffle_ps( v, v, 1 )); | ||||
| 		return( _mm_cvtss_f32( res )); | ||||
| 	} | ||||
| 
 | ||||
| 	/**
 | ||||
| 	 * Function performs in-place addition of a value located in memory and | ||||
| 	 * the specified value. | ||||
| 	 * | ||||
| 	 * @param p Pointer to value where addition happens. May be unaligned. | ||||
| 	 * @param v Value to add. | ||||
| 	 */ | ||||
| 
 | ||||
| 	static void addu( float* const p, const float4& v ) | ||||
| 	{ | ||||
| 		( loadu( p ) + v ).storeu( p ); | ||||
| 	} | ||||
| 
 | ||||
| 	/**
 | ||||
| 	 * Function performs in-place addition of a value located in memory and | ||||
| 	 * the specified value. Limited to the specfied number of elements. | ||||
| 	 * | ||||
| 	 * @param p Pointer to value where addition happens. May be unaligned. | ||||
| 	 * @param v Value to add. | ||||
| 	 * @param lim The element number limit, >0. | ||||
| 	 */ | ||||
| 
 | ||||
| 	static void addu( float* const p, const float4& v, const int lim ) | ||||
| 	{ | ||||
| 		( loadu( p, lim ) + v ).storeu( p, lim ); | ||||
| 	} | ||||
| 
 | ||||
| 	__m128 value; ///< Packed value of 4 floats.
 | ||||
| 		///<
 | ||||
| }; | ||||
| 
 | ||||
| /**
 | ||||
|  * SIMD rounding function, exact result. | ||||
|  * | ||||
|  * @param v Value to round. | ||||
|  * @return Rounded SIMD value. | ||||
|  */ | ||||
| 
 | ||||
| inline float4 round( const float4& v ) | ||||
| { | ||||
| 	unsigned int prevrm = _MM_GET_ROUNDING_MODE(); | ||||
| 	_MM_SET_ROUNDING_MODE( _MM_ROUND_NEAREST ); | ||||
| 
 | ||||
| 	const __m128 res = _mm_cvtepi32_ps( _mm_cvtps_epi32( v.value )); | ||||
| 
 | ||||
| 	_MM_SET_ROUNDING_MODE( prevrm ); | ||||
| 
 | ||||
| 	return( res ); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * SIMD function "clamps" (clips) the specified packed values so that they are | ||||
|  * not lesser than "minv", and not greater than "maxv". | ||||
|  * | ||||
|  * @param Value Value to clamp. | ||||
|  * @param minv Minimal allowed value. | ||||
|  * @param maxv Maximal allowed value. | ||||
|  * @return The clamped value. | ||||
|  */ | ||||
| 
 | ||||
| inline float4 clamp( const float4& Value, const float4& minv, | ||||
| 	const float4& maxv ) | ||||
| { | ||||
| 	return( _mm_min_ps( _mm_max_ps( Value.value, minv.value ), maxv.value )); | ||||
| } | ||||
| 
 | ||||
| typedef fpclass_def< avir :: float4, float > fpclass_float4; ///<
 | ||||
| 	///< Class that can be used as the "fpclass" template parameter of the
 | ||||
| 	///< avir::CImageResizer class to perform calculation using default
 | ||||
| 	///< interleaved algorithm, using SIMD float4 type.
 | ||||
| 	///<
 | ||||
| 
 | ||||
| } // namespace avir
 | ||||
| 
 | ||||
| #endif // AVIR_FLOAT4_SSE_INCLUDED
 | ||||
							
								
								
									
										365
									
								
								third_party/avir/avir_float8_avx.h
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										365
									
								
								third_party/avir/avir_float8_avx.h
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,365 @@ | |||
| /* clang-format off */ | ||||
| //$ nobt
 | ||||
| //$ nocpp
 | ||||
| 
 | ||||
| /**
 | ||||
|  * @file avir_float8_avx.h | ||||
|  * | ||||
|  * @brief Inclusion file for the "float8" type. | ||||
|  * | ||||
|  * This file includes the "float8" AVX-based type used for SIMD variable | ||||
|  * storage and processing. | ||||
|  * | ||||
|  * AVIR Copyright (c) 2015-2019 Aleksey Vaneev | ||||
|  */ | ||||
| 
 | ||||
| #ifndef AVIR_FLOAT8_AVX_INCLUDED | ||||
| #define AVIR_FLOAT8_AVX_INCLUDED | ||||
| 
 | ||||
| #include "libc/bits/mmintrin.h" | ||||
| #include "libc/bits/avxintrin.h" | ||||
| #include "libc/bits/smmintrin.h" | ||||
| #include "libc/bits/pmmintrin.h" | ||||
| #include "libc/bits/avx2intrin.h" | ||||
| #include "libc/bits/xmmintrin.h" | ||||
| #include "third_party/avir/avir_dil.h" | ||||
| 
 | ||||
| namespace avir { | ||||
| 
 | ||||
| /**
 | ||||
|  * @brief SIMD packed 8-float type. | ||||
|  * | ||||
|  * This class implements a packed 8-float type that can be used to perform | ||||
|  * parallel computation using SIMD instructions on AVX-enabled processors. | ||||
|  * This class can be used as the "fptype" argument of the avir::fpclass_def | ||||
|  * or avir::fpclass_def_dil class. | ||||
|  */ | ||||
| 
 | ||||
| class float8 | ||||
| { | ||||
| public: | ||||
| 	float8() | ||||
| 	{ | ||||
| 	} | ||||
| 
 | ||||
| 	float8( const float8& s ) | ||||
| 		: value( s.value ) | ||||
| 	{ | ||||
| 	} | ||||
| 
 | ||||
| 	float8( const __m256 s ) | ||||
| 		: value( s ) | ||||
| 	{ | ||||
| 	} | ||||
| 
 | ||||
| 	float8( const float s ) | ||||
| 		: value( _mm256_set1_ps( s )) | ||||
| 	{ | ||||
| 	} | ||||
| 
 | ||||
| 	float8& operator = ( const float8& s ) | ||||
| 	{ | ||||
| 		value = s.value; | ||||
| 		return( *this ); | ||||
| 	} | ||||
| 
 | ||||
| 	float8& operator = ( const __m256 s ) | ||||
| 	{ | ||||
| 		value = s; | ||||
| 		return( *this ); | ||||
| 	} | ||||
| 
 | ||||
| 	float8& operator = ( const float s ) | ||||
| 	{ | ||||
| 		value = _mm256_set1_ps( s ); | ||||
| 		return( *this ); | ||||
| 	} | ||||
| 
 | ||||
| 	operator float () const | ||||
| 	{ | ||||
| 		return( _mm_cvtss_f32( _mm256_extractf128_ps( value, 0 ))); | ||||
| 	} | ||||
| 
 | ||||
| 	/**
 | ||||
| 	 * @param p Pointer to memory from where the value should be loaded, | ||||
| 	 * should be 32-byte aligned. | ||||
| 	 * @return float8 value loaded from the specified memory location. | ||||
| 	 */ | ||||
| 
 | ||||
| 	static float8 load( const float* const p ) | ||||
| 	{ | ||||
| 		return( _mm256_load_ps( p )); | ||||
| 	} | ||||
| 
 | ||||
| 	/**
 | ||||
| 	 * @param p Pointer to memory from where the value should be loaded, | ||||
| 	 * may have any alignment. | ||||
| 	 * @return float8 value loaded from the specified memory location. | ||||
| 	 */ | ||||
| 
 | ||||
| 	static float8 loadu( const float* const p ) | ||||
| 	{ | ||||
| 		return( _mm256_loadu_ps( p )); | ||||
| 	} | ||||
| 
 | ||||
| 	/**
 | ||||
| 	 * @param p Pointer to memory from where the value should be loaded, | ||||
| 	 * may have any alignment. | ||||
| 	 * @param lim The maximum number of elements to load, >0. | ||||
| 	 * @return float8 value loaded from the specified memory location, with | ||||
| 	 * elements beyond "lim" set to 0. | ||||
| 	 */ | ||||
| 
 | ||||
| 	static float8 loadu( const float* const p, const int lim ) | ||||
| 	{ | ||||
| 		__m128 lo; | ||||
| 		__m128 hi; | ||||
| 
 | ||||
| 		if( lim > 4 ) | ||||
| 		{ | ||||
| 			lo = _mm_loadu_ps( p ); | ||||
| 			hi = loadu4( p + 4, lim - 4 ); | ||||
| 		} | ||||
| 		else | ||||
| 		{ | ||||
| 			lo = loadu4( p, lim ); | ||||
| 			hi = _mm_setzero_ps(); | ||||
| 		} | ||||
| 
 | ||||
| 		return( _mm256_insertf128_ps( _mm256_castps128_ps256( lo ), hi, 1 )); | ||||
| 	} | ||||
| 
 | ||||
| 	/**
 | ||||
| 	 * Function stores *this value to the specified memory location. | ||||
| 	 * | ||||
| 	 * @param[out] p Output memory location, should be 32-byte aligned. | ||||
| 	 */ | ||||
| 
 | ||||
| 	void store( float* const p ) const | ||||
| 	{ | ||||
| 		_mm256_store_ps( p, value ); | ||||
| 	} | ||||
| 
 | ||||
| 	/**
 | ||||
| 	 * Function stores *this value to the specified memory location. | ||||
| 	 * | ||||
| 	 * @param[out] p Output memory location, may have any alignment. | ||||
| 	 */ | ||||
| 
 | ||||
| 	void storeu( float* const p ) const | ||||
| 	{ | ||||
| 		_mm256_storeu_ps( p, value ); | ||||
| 	} | ||||
| 
 | ||||
| 	/**
 | ||||
| 	 * Function stores "lim" lower elements of *this value to the specified | ||||
| 	 * memory location. | ||||
| 	 * | ||||
| 	 * @param[out] p Output memory location, may have any alignment. | ||||
| 	 * @param lim The number of lower elements to store, >0. | ||||
| 	 */ | ||||
| 
 | ||||
| 	void storeu( float* p, int lim ) const | ||||
| 	{ | ||||
| 		__m128 v; | ||||
| 
 | ||||
| 		if( lim > 4 ) | ||||
| 		{ | ||||
| 			_mm_storeu_ps( p, _mm256_extractf128_ps( value, 0 )); | ||||
| 			v = _mm256_extractf128_ps( value, 1 ); | ||||
| 			p += 4; | ||||
| 			lim -= 4; | ||||
| 		} | ||||
| 		else | ||||
| 		{ | ||||
| 			v = _mm256_extractf128_ps( value, 0 ); | ||||
| 		} | ||||
| 
 | ||||
| 		if( lim > 2 ) | ||||
| 		{ | ||||
| 			if( lim > 3 ) | ||||
| 			{ | ||||
| 				_mm_storeu_ps( p, v ); | ||||
| 			} | ||||
| 			else | ||||
| 			{ | ||||
| 				_mm_storel_pi( (__m64*) p, v ); | ||||
| 				_mm_store_ss( p + 2, _mm_movehl_ps( v, v )); | ||||
| 			} | ||||
| 		} | ||||
| 		else | ||||
| 		{ | ||||
| 			if( lim == 2 ) | ||||
| 			{ | ||||
| 				_mm_storel_pi( (__m64*) p, v ); | ||||
| 			} | ||||
| 			else | ||||
| 			{ | ||||
| 				_mm_store_ss( p, v ); | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	float8& operator += ( const float8& s ) | ||||
| 	{ | ||||
| 		value = _mm256_add_ps( value, s.value ); | ||||
| 		return( *this ); | ||||
| 	} | ||||
| 
 | ||||
| 	float8& operator -= ( const float8& s ) | ||||
| 	{ | ||||
| 		value = _mm256_sub_ps( value, s.value ); | ||||
| 		return( *this ); | ||||
| 	} | ||||
| 
 | ||||
| 	float8& operator *= ( const float8& s ) | ||||
| 	{ | ||||
| 		value = _mm256_mul_ps( value, s.value ); | ||||
| 		return( *this ); | ||||
| 	} | ||||
| 
 | ||||
| 	float8& operator /= ( const float8& s ) | ||||
| 	{ | ||||
| 		value = _mm256_div_ps( value, s.value ); | ||||
| 		return( *this ); | ||||
| 	} | ||||
| 
 | ||||
| 	float8 operator + ( const float8& s ) const | ||||
| 	{ | ||||
| 		return( _mm256_add_ps( value, s.value )); | ||||
| 	} | ||||
| 
 | ||||
| 	float8 operator - ( const float8& s ) const | ||||
| 	{ | ||||
| 		return( _mm256_sub_ps( value, s.value )); | ||||
| 	} | ||||
| 
 | ||||
| 	float8 operator * ( const float8& s ) const | ||||
| 	{ | ||||
| 		return( _mm256_mul_ps( value, s.value )); | ||||
| 	} | ||||
| 
 | ||||
| 	float8 operator / ( const float8& s ) const | ||||
| 	{ | ||||
| 		return( _mm256_div_ps( value, s.value )); | ||||
| 	} | ||||
| 
 | ||||
| 	/**
 | ||||
| 	 * @return Horizontal sum of elements. | ||||
| 	 */ | ||||
| 
 | ||||
| 	float hadd() const | ||||
| 	{ | ||||
| 		__m128 v = _mm_add_ps( _mm256_extractf128_ps( value, 0 ), | ||||
| 			_mm256_extractf128_ps( value, 1 )); | ||||
| 
 | ||||
| 		v = _mm_hadd_ps( v, v ); | ||||
| 		v = _mm_hadd_ps( v, v ); | ||||
| 		return( _mm_cvtss_f32( v )); | ||||
| 	} | ||||
| 
 | ||||
| 	/**
 | ||||
| 	 * Function performs in-place addition of a value located in memory and | ||||
| 	 * the specified value. | ||||
| 	 * | ||||
| 	 * @param p Pointer to value where addition happens. May be unaligned. | ||||
| 	 * @param v Value to add. | ||||
| 	 */ | ||||
| 
 | ||||
| 	static void addu( float* const p, const float8& v ) | ||||
| 	{ | ||||
| 		( loadu( p ) + v ).storeu( p ); | ||||
| 	} | ||||
| 
 | ||||
| 	/**
 | ||||
| 	 * Function performs in-place addition of a value located in memory and | ||||
| 	 * the specified value. Limited to the specfied number of elements. | ||||
| 	 * | ||||
| 	 * @param p Pointer to value where addition happens. May be unaligned. | ||||
| 	 * @param v Value to add. | ||||
| 	 * @param lim The element number limit, >0. | ||||
| 	 */ | ||||
| 
 | ||||
| 	static void addu( float* const p, const float8& v, const int lim ) | ||||
| 	{ | ||||
| 		( loadu( p, lim ) + v ).storeu( p, lim ); | ||||
| 	} | ||||
| 
 | ||||
| 	__m256 value; ///< Packed value of 8 floats.
 | ||||
| 		///<
 | ||||
| 
 | ||||
| private: | ||||
| 	/**
 | ||||
| 	 * @param p Pointer to memory from where the value should be loaded, | ||||
| 	 * may have any alignment. | ||||
| 	 * @param lim The maximum number of elements to load, >0. | ||||
| 	 * @return __m128 value loaded from the specified memory location, with | ||||
| 	 * elements beyond "lim" set to 0. | ||||
| 	 */ | ||||
| 
 | ||||
| 	static __m128 loadu4( const float* const p, const int lim ) | ||||
| 	{ | ||||
| 		if( lim > 2 ) | ||||
| 		{ | ||||
| 			if( lim > 3 ) | ||||
| 			{ | ||||
| 				return( _mm_loadu_ps( p )); | ||||
| 			} | ||||
| 			else | ||||
| 			{ | ||||
| 				return( _mm_set_ps( 0.0f, p[ 2 ], p[ 1 ], p[ 0 ])); | ||||
| 			} | ||||
| 		} | ||||
| 		else | ||||
| 		{ | ||||
| 			if( lim == 2 ) | ||||
| 			{ | ||||
| 				return( _mm_set_ps( 0.0f, 0.0f, p[ 1 ], p[ 0 ])); | ||||
| 			} | ||||
| 			else | ||||
| 			{ | ||||
| 				return( _mm_load_ss( p )); | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| }; | ||||
| 
 | ||||
| /**
 | ||||
|  * SIMD rounding function, exact result. | ||||
|  * | ||||
|  * @param v Value to round. | ||||
|  * @return Rounded SIMD value. | ||||
|  */ | ||||
| 
 | ||||
| inline float8 round( const float8& v ) | ||||
| { | ||||
| 	return( _mm256_round_ps( v.value, | ||||
| 		( _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC ))); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * SIMD function "clamps" (clips) the specified packed values so that they are | ||||
|  * not lesser than "minv", and not greater than "maxv". | ||||
|  * | ||||
|  * @param Value Value to clamp. | ||||
|  * @param minv Minimal allowed value. | ||||
|  * @param maxv Maximal allowed value. | ||||
|  * @return The clamped value. | ||||
|  */ | ||||
| 
 | ||||
| inline float8 clamp( const float8& Value, const float8& minv, | ||||
| 	const float8& maxv ) | ||||
| { | ||||
| 	return( _mm256_min_ps( _mm256_max_ps( Value.value, minv.value ), | ||||
| 		maxv.value )); | ||||
| } | ||||
| 
 | ||||
| typedef fpclass_def_dil< float, avir :: float8 > fpclass_float8_dil; ///<
 | ||||
| 	///< Class that can be used as the "fpclass" template parameter of the
 | ||||
| 	///< avir::CImageResizer class to perform calculation using
 | ||||
| 	///< de-interleaved SIMD algorithm, using SIMD float8 type.
 | ||||
| 	///<
 | ||||
| 
 | ||||
| } // namespace avir
 | ||||
| 
 | ||||
| #endif // AVIR_FLOAT8_AVX_INCLUDED
 | ||||
							
								
								
									
										1494
									
								
								third_party/avir/lancir.h
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										1494
									
								
								third_party/avir/lancir.h
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
							
								
								
									
										40
									
								
								third_party/avir/lanczos.cc
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										40
									
								
								third_party/avir/lanczos.cc
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,40 @@ | |||
| /*-*-mode:c++;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8-*-│
 | ||||
| │vi: set net ft=c++ ts=2 sts=2 sw=2 fenc=utf-8                              :vi│ | ||||
| ╞══════════════════════════════════════════════════════════════════════════════╡ | ||||
| │ Copyright 2020 Justine Alexandra Roberts Tunney                              │ | ||||
| │                                                                              │ | ||||
| │ This program is free software; you can redistribute it and/or modify         │ | ||||
| │ it under the terms of the GNU General Public License as published by         │ | ||||
| │ the Free Software Foundation; version 2 of the License.                      │ | ||||
| │                                                                              │ | ||||
| │ This program is distributed in the hope that it will be useful, but          │ | ||||
| │ WITHOUT ANY WARRANTY; without even the implied warranty of                   │ | ||||
| │ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU             │ | ||||
| │ General Public License for more details.                                     │ | ||||
| │                                                                              │ | ||||
| │ You should have received a copy of the GNU General Public License            │ | ||||
| │ along with this program; if not, write to the Free Software                  │ | ||||
| │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA                │ | ||||
| │ 02110-1301 USA                                                               │ | ||||
| ╚─────────────────────────────────────────────────────────────────────────────*/ | ||||
| #include "libc/limits.h" | ||||
| #include "libc/log/check.h" | ||||
| #include "libc/log/log.h" | ||||
| #include "third_party/avir/lanczos.h" | ||||
| namespace { | ||||
| #include "third_party/avir/lancir.h" | ||||
| }  // namespace
 | ||||
| 
 | ||||
| /**
 | ||||
|  * Does Lanczos interpolation. | ||||
|  * @note computers w/o AVX2+FMA need to call BilinearScale() | ||||
|  */ | ||||
| void lanczos(unsigned dyn, unsigned dxn, void *restrict dst, unsigned syn, | ||||
|              unsigned sxn, const void *restrict src, unsigned sw) { | ||||
|   avir::CLancIR lanczos; | ||||
|   DCHECK_ALIGNED(64, dst); | ||||
|   DCHECK_ALIGNED(64, src); | ||||
|   LOGF("%10s%5zux×%-5zu→%5zu×%-5zu", "lanczos", sxn, syn, dxn, dyn); | ||||
|   lanczos.resizeImage((const float *)src, sxn, syn, sw, (float *)dst, dxn, dyn, | ||||
|                       4); | ||||
| } | ||||
							
								
								
									
										13
									
								
								third_party/avir/lanczos.h
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										13
									
								
								third_party/avir/lanczos.h
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,13 @@ | |||
| #ifndef COSMOPOLITAN_THIRD_PARTY_AVIR_LANCZOS_H_ | ||||
| #define COSMOPOLITAN_THIRD_PARTY_AVIR_LANCZOS_H_ | ||||
| #if !(__ASSEMBLER__ + __LINKER__ + 0) | ||||
| COSMOPOLITAN_C_START_ | ||||
| 
 | ||||
| void lanczos(unsigned, unsigned, void *, unsigned, unsigned, const void *, | ||||
|              unsigned); | ||||
| void lanczos3(unsigned, unsigned, void *, unsigned, unsigned, const void *, | ||||
|               unsigned); | ||||
| 
 | ||||
| COSMOPOLITAN_C_END_ | ||||
| #endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ | ||||
| #endif /* COSMOPOLITAN_THIRD_PARTY_AVIR_LANCZOS_H_ */ | ||||
							
								
								
									
										77
									
								
								third_party/avir/lanczos1.cc
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										77
									
								
								third_party/avir/lanczos1.cc
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,77 @@ | |||
| /*-*-mode:c++;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8-*-│
 | ||||
| │vi: set net ft=c++ ts=2 sts=2 sw=2 fenc=utf-8                              :vi│ | ||||
| ╞══════════════════════════════════════════════════════════════════════════════╡ | ||||
| │ Copyright 2020 Justine Alexandra Roberts Tunney                              │ | ||||
| │                                                                              │ | ||||
| │ This program is free software; you can redistribute it and/or modify         │ | ||||
| │ it under the terms of the GNU General Public License as published by         │ | ||||
| │ the Free Software Foundation; version 2 of the License.                      │ | ||||
| │                                                                              │ | ||||
| │ This program is distributed in the hope that it will be useful, but          │ | ||||
| │ WITHOUT ANY WARRANTY; without even the implied warranty of                   │ | ||||
| │ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU             │ | ||||
| │ General Public License for more details.                                     │ | ||||
| │                                                                              │ | ||||
| │ You should have received a copy of the GNU General Public License            │ | ||||
| │ along with this program; if not, write to the Free Software                  │ | ||||
| │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA                │ | ||||
| │ 02110-1301 USA                                                               │ | ||||
| ╚─────────────────────────────────────────────────────────────────────────────*/ | ||||
| #include "libc/bits/xmmintrin.h" | ||||
| #include "libc/limits.h" | ||||
| #include "libc/log/log.h" | ||||
| #include "libc/runtime/runtime.h" | ||||
| #include "third_party/avir/lanczos1.h" | ||||
| namespace { | ||||
| #include "third_party/avir/lanczos1.hpp" | ||||
| }  // namespace
 | ||||
| 
 | ||||
| void lanczos1init(struct lanczos1 *resizer) { | ||||
|   lanczos1free(resizer); | ||||
|   resizer->p = new Lanczos1Impl; | ||||
| } | ||||
| 
 | ||||
| void lanczos1free(struct lanczos1 *resizer) { | ||||
|   Lanczos1Impl *impl; | ||||
|   if (!resizer->p) return; | ||||
|   impl = (Lanczos1Impl *)resizer->p; | ||||
|   delete impl; | ||||
|   resizer->p = nullptr; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * Resizes image plane w/ Lanczos interpolation, e.g. | ||||
|  * | ||||
|  *   struct lanczos1 scaler = {0}; | ||||
|  *   lanczos1init(&scaler); | ||||
|  *   lanczos1(&scaler, ...); | ||||
|  *   lanczos1free(&scaler); | ||||
|  * | ||||
|  * @param dyn is destination height | ||||
|  * @param dxn is destination width | ||||
|  * @param dst is destination unsigned char array | ||||
|  * @param dstsize is number of bytes in dst | ||||
|  * @param syn is source height | ||||
|  * @param sxn is source width | ||||
|  * @param ssw is number of unsigned chars per scanline in src | ||||
|  * @param src is source unsigned char array | ||||
|  * @param srcsize is number of bytes in src | ||||
|  */ | ||||
| void lanczos1(struct lanczos1 *resizer, size_t dyn, size_t dxn, void *dst, | ||||
|               size_t dstsize, size_t syn, size_t sxn, size_t ssw, | ||||
|               const void *src, size_t srcsize) { | ||||
|   Lanczos1Impl *impl; | ||||
|   unsigned int roundhouse; | ||||
|   LOGF("%10s%5zux×%-5zu→%5zu×%-5zu", "lanczos1", sxn, syn, dxn, dyn); | ||||
|   CHECK_LE(dstsize, INT_MAX); | ||||
|   CHECK_LE(srcsize, INT_MAX); | ||||
|   CHECK_LE(sizeof(unsigned char) * 1 * dyn * dxn, dstsize); | ||||
|   CHECK_LE(sizeof(unsigned char) * 1 * syn * sxn, srcsize); | ||||
|   CHECK_LE(sizeof(unsigned char) * syn * ssw, srcsize); | ||||
|   roundhouse = _MM_GET_ROUNDING_MODE(); | ||||
|   _MM_SET_ROUNDING_MODE(_MM_ROUND_NEAREST); | ||||
|   impl = (Lanczos1Impl *)resizer->p; | ||||
|   impl->lanczos.resizeImage((const unsigned char *)src, sxn, syn, ssw, | ||||
|                             (unsigned char *)dst, dxn, dyn, 1); | ||||
|   _MM_SET_ROUNDING_MODE(roundhouse); | ||||
| } | ||||
							
								
								
									
										18
									
								
								third_party/avir/lanczos1.h
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								third_party/avir/lanczos1.h
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,18 @@ | |||
| #ifndef COSMOPOLITAN_THIRD_PARTY_AVIR_LANCZOS1_H_ | ||||
| #define COSMOPOLITAN_THIRD_PARTY_AVIR_LANCZOS1_H_ | ||||
| #if !(__ASSEMBLER__ + __LINKER__ + 0) | ||||
| COSMOPOLITAN_C_START_ | ||||
| 
 | ||||
| struct lanczos1 { | ||||
|   void *p; | ||||
| }; | ||||
| 
 | ||||
| void lanczos1init(struct lanczos1 *self); | ||||
| void lanczos1free(struct lanczos1 *self); | ||||
| void lanczos1(struct lanczos1 *self, size_t dyn, size_t dxn, void *dst, | ||||
|               size_t dstsize, size_t syn, size_t sxn, size_t ssw, | ||||
|               const void *src, size_t srcsize) paramsnonnull(); | ||||
| 
 | ||||
| COSMOPOLITAN_C_END_ | ||||
| #endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ | ||||
| #endif /* COSMOPOLITAN_THIRD_PARTY_AVIR_LANCZOS1_H_ */ | ||||
							
								
								
									
										11
									
								
								third_party/avir/lanczos1.hpp
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								third_party/avir/lanczos1.hpp
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,11 @@ | |||
| #ifndef COSMOPOLITAN_THIRD_PARTY_AVIR_LANCZOS1_HPP_ | ||||
| #define COSMOPOLITAN_THIRD_PARTY_AVIR_LANCZOS1_HPP_ | ||||
| #include "third_party/avir/lancir.h" | ||||
| 
 | ||||
| struct Lanczos1Impl { | ||||
|   Lanczos1Impl() : lanczos{} { | ||||
|   } | ||||
|   avir::CLancIR lanczos; | ||||
| }; | ||||
| 
 | ||||
| #endif /* COSMOPOLITAN_THIRD_PARTY_AVIR_LANCZOS1_HPP_ */ | ||||
							
								
								
									
										31
									
								
								third_party/avir/lanczos1b.cc
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										31
									
								
								third_party/avir/lanczos1b.cc
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,31 @@ | |||
| /*-*-mode:c++;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8-*-│
 | ||||
| │vi: set net ft=c++ ts=2 sts=2 sw=2 fenc=utf-8                              :vi│ | ||||
| ╞══════════════════════════════════════════════════════════════════════════════╡ | ||||
| │ Copyright 2020 Justine Alexandra Roberts Tunney                              │ | ||||
| │                                                                              │ | ||||
| │ This program is free software; you can redistribute it and/or modify         │ | ||||
| │ it under the terms of the GNU General Public License as published by         │ | ||||
| │ the Free Software Foundation; version 2 of the License.                      │ | ||||
| │                                                                              │ | ||||
| │ This program is distributed in the hope that it will be useful, but          │ | ||||
| │ WITHOUT ANY WARRANTY; without even the implied warranty of                   │ | ||||
| │ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU             │ | ||||
| │ General Public License for more details.                                     │ | ||||
| │                                                                              │ | ||||
| │ You should have received a copy of the GNU General Public License            │ | ||||
| │ along with this program; if not, write to the Free Software                  │ | ||||
| │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA                │ | ||||
| │ 02110-1301 USA                                                               │ | ||||
| ╚─────────────────────────────────────────────────────────────────────────────*/ | ||||
| #include "libc/bits/bits.h" | ||||
| #include "third_party/avir/lanczos1b.h" | ||||
| namespace { | ||||
| #include "third_party/avir/lancir.h" | ||||
| }  // namespace
 | ||||
| 
 | ||||
| void lanczos1b(size_t dyn, size_t dxn, unsigned char *restrict dst, size_t syn, | ||||
|                size_t sxn, const unsigned char *restrict src) { | ||||
|   avir::CLancIR lanczos; | ||||
|   LOGF("%10s%5zux×%-5zu→%5zu×%-5zu", "lanczos1b", sxn, syn, dxn, dyn); | ||||
|   lanczos.resizeImage(src, sxn, syn, roundup2pow(sxn) * 4, dst, dxn, dyn, 4); | ||||
| } | ||||
							
								
								
									
										11
									
								
								third_party/avir/lanczos1b.h
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								third_party/avir/lanczos1b.h
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,11 @@ | |||
| #ifndef COSMOPOLITAN_THIRD_PARTY_AVIR_LANCZOS1B_H_ | ||||
| #define COSMOPOLITAN_THIRD_PARTY_AVIR_LANCZOS1B_H_ | ||||
| #if !(__ASSEMBLER__ + __LINKER__ + 0) | ||||
| COSMOPOLITAN_C_START_ | ||||
| 
 | ||||
| void lanczos1b(size_t dyn, size_t dxn, unsigned char *restrict dst, size_t syn, | ||||
|                size_t sxn, const unsigned char *restrict src); | ||||
| 
 | ||||
| COSMOPOLITAN_C_END_ | ||||
| #endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ | ||||
| #endif /* COSMOPOLITAN_THIRD_PARTY_AVIR_LANCZOS1B_H_ */ | ||||
							
								
								
									
										63
									
								
								third_party/avir/lanczos1f.cc
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										63
									
								
								third_party/avir/lanczos1f.cc
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,63 @@ | |||
| /*-*-mode:c++;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8-*-│
 | ||||
| │vi: set net ft=c++ ts=2 sts=2 sw=2 fenc=utf-8                              :vi│ | ||||
| ╞══════════════════════════════════════════════════════════════════════════════╡ | ||||
| │ Copyright 2020 Justine Alexandra Roberts Tunney                              │ | ||||
| │                                                                              │ | ||||
| │ This program is free software; you can redistribute it and/or modify         │ | ||||
| │ it under the terms of the GNU General Public License as published by         │ | ||||
| │ the Free Software Foundation; version 2 of the License.                      │ | ||||
| │                                                                              │ | ||||
| │ This program is distributed in the hope that it will be useful, but          │ | ||||
| │ WITHOUT ANY WARRANTY; without even the implied warranty of                   │ | ||||
| │ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU             │ | ||||
| │ General Public License for more details.                                     │ | ||||
| │                                                                              │ | ||||
| │ You should have received a copy of the GNU General Public License            │ | ||||
| │ along with this program; if not, write to the Free Software                  │ | ||||
| │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA                │ | ||||
| │ 02110-1301 USA                                                               │ | ||||
| ╚─────────────────────────────────────────────────────────────────────────────*/ | ||||
| #include "libc/bits/xmmintrin.h" | ||||
| #include "libc/runtime/runtime.h" | ||||
| #include "third_party/avir/lanczos1f.h" | ||||
| namespace { | ||||
| #include "third_party/avir/lanczos1f.hpp" | ||||
| }  // namespace
 | ||||
| 
 | ||||
| void lanczos1finit(struct lanczos1f *resizer) { | ||||
|   lanczos1ffree(resizer); | ||||
|   resizer->p = new Lanczos1fImpl; | ||||
| } | ||||
| 
 | ||||
| void lanczos1ffree(struct lanczos1f *resizer) { | ||||
|   Lanczos1fImpl *impl; | ||||
|   if (!resizer->p) return; | ||||
|   impl = (Lanczos1fImpl *)resizer->p; | ||||
|   delete impl; | ||||
|   resizer->p = nullptr; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * Resizes image plane w/ Lanczos interpolation, e.g. | ||||
|  * | ||||
|  *   struct lanczos1f scaler = {0}; | ||||
|  *   lanczos1finit(&scaler); | ||||
|  *   lanczos1f(&scaler, ...); | ||||
|  *   lanczos1ffree(&scaler); | ||||
|  * | ||||
|  * @param dyn is destination height | ||||
|  * @param dxn is destination width | ||||
|  * @param dst is destination unsigned char array | ||||
|  * @param syn is source height | ||||
|  * @param sxn is source width | ||||
|  * @param ssw is number of unsigned chars per scanline in src | ||||
|  * @param src is source unsigned char array | ||||
|  */ | ||||
| void lanczos1f(struct lanczos1f *resizer, size_t dyn, size_t dxn, void *dst, | ||||
|                size_t syn, size_t sxn, size_t ssw, const void *src, double ky0, | ||||
|                double kx0, double oy, double ox) { | ||||
|   Lanczos1fImpl *impl; | ||||
|   impl = (Lanczos1fImpl *)resizer->p; | ||||
|   impl->lanczos.resizeImage((const float *)src, sxn, syn, ssw, (float *)dst, | ||||
|                             dxn, dyn, 1, kx0, ky0, ox, oy); | ||||
| } | ||||
							
								
								
									
										18
									
								
								third_party/avir/lanczos1f.h
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								third_party/avir/lanczos1f.h
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,18 @@ | |||
| #ifndef COSMOPOLITAN_THIRD_PARTY_AVIR_LANCZOS1F_H_ | ||||
| #define COSMOPOLITAN_THIRD_PARTY_AVIR_LANCZOS1F_H_ | ||||
| #if !(__ASSEMBLER__ + __LINKER__ + 0) | ||||
| COSMOPOLITAN_C_START_ | ||||
| 
 | ||||
| struct lanczos1f { | ||||
|   void *p; | ||||
| }; | ||||
| 
 | ||||
| void lanczos1finit(struct lanczos1f *); | ||||
| void lanczos1ffree(struct lanczos1f *); | ||||
| void lanczos1f(struct lanczos1f *, size_t, size_t, void *, size_t, size_t, | ||||
|                size_t, const void *, double, double, double, double) | ||||
|     paramsnonnull(); | ||||
| 
 | ||||
| COSMOPOLITAN_C_END_ | ||||
| #endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ | ||||
| #endif /* COSMOPOLITAN_THIRD_PARTY_AVIR_LANCZOS1F_H_ */ | ||||
							
								
								
									
										11
									
								
								third_party/avir/lanczos1f.hpp
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								third_party/avir/lanczos1f.hpp
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,11 @@ | |||
| #ifndef COSMOPOLITAN_THIRD_PARTY_AVIR_LANCZOS1F_HPP_ | ||||
| #define COSMOPOLITAN_THIRD_PARTY_AVIR_LANCZOS1F_HPP_ | ||||
| #include "third_party/avir/lancir.h" | ||||
| 
 | ||||
| struct Lanczos1fImpl { | ||||
|   Lanczos1fImpl() : lanczos{} { | ||||
|   } | ||||
|   avir::CLancIR lanczos; | ||||
| }; | ||||
| 
 | ||||
| #endif /* COSMOPOLITAN_THIRD_PARTY_AVIR_LANCZOS1F_HPP_ */ | ||||
							
								
								
									
										30
									
								
								third_party/avir/lanczos3.cc
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										30
									
								
								third_party/avir/lanczos3.cc
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,30 @@ | |||
| /*-*-mode:c++;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8-*-│
 | ||||
| │vi: set net ft=c++ ts=2 sts=2 sw=2 fenc=utf-8                              :vi│ | ||||
| ╞══════════════════════════════════════════════════════════════════════════════╡ | ||||
| │ Copyright 2020 Justine Alexandra Roberts Tunney                              │ | ||||
| │                                                                              │ | ||||
| │ This program is free software; you can redistribute it and/or modify         │ | ||||
| │ it under the terms of the GNU General Public License as published by         │ | ||||
| │ the Free Software Foundation; version 2 of the License.                      │ | ||||
| │                                                                              │ | ||||
| │ This program is distributed in the hope that it will be useful, but          │ | ||||
| │ WITHOUT ANY WARRANTY; without even the implied warranty of                   │ | ||||
| │ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU             │ | ||||
| │ General Public License for more details.                                     │ | ||||
| │                                                                              │ | ||||
| │ You should have received a copy of the GNU General Public License            │ | ||||
| │ along with this program; if not, write to the Free Software                  │ | ||||
| │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA                │ | ||||
| │ 02110-1301 USA                                                               │ | ||||
| ╚─────────────────────────────────────────────────────────────────────────────*/ | ||||
| #include "third_party/avir/lanczos.h" | ||||
| namespace { | ||||
| #include "third_party/avir/lancir.h" | ||||
| } | ||||
| 
 | ||||
| void lanczos3(unsigned dyn, unsigned dxn, void *dst, unsigned syn, unsigned sxn, | ||||
|               const void *src, unsigned sw) { | ||||
|   avir::CLancIR lanczos; | ||||
|   lanczos.resizeImage((const float *)src, sxn, syn, sw, (float *)dst, dxn, dyn, | ||||
|                       3, -1, -2); | ||||
| } | ||||
							
								
								
									
										11
									
								
								third_party/avir/notice.h
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								third_party/avir/notice.h
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,11 @@ | |||
| #ifndef COSMOPOLITAN_THIRD_PARTY_AVIR_NOTICE_H_ | ||||
| #define COSMOPOLITAN_THIRD_PARTY_AVIR_NOTICE_H_ | ||||
| #if !(__ASSEMBLER__ + __LINKER__ + 0) | ||||
| 
 | ||||
| asm(".ident\t\"\\n\\n\
 | ||||
| AVIR (MIT License)\\n\ | ||||
| Copyright 2015-2019 Aleksey Vaneev\""); | ||||
| asm(".include \"libc/disclaimer.inc\""); | ||||
| 
 | ||||
| #endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ | ||||
| #endif /* COSMOPOLITAN_THIRD_PARTY_AVIR_NOTICE_H_ */ | ||||
							
								
								
									
										48
									
								
								third_party/avir/resize.cc
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										48
									
								
								third_party/avir/resize.cc
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,48 @@ | |||
| /*-*-mode:c++;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8-*-│
 | ||||
| │vi: set net ft=c++ ts=2 sts=2 sw=2 fenc=utf-8                              :vi│ | ||||
| ╞══════════════════════════════════════════════════════════════════════════════╡ | ||||
| │ Copyright 2020 Justine Alexandra Roberts Tunney                              │ | ||||
| │                                                                              │ | ||||
| │ This program is free software; you can redistribute it and/or modify         │ | ||||
| │ it under the terms of the GNU General Public License as published by         │ | ||||
| │ the Free Software Foundation; version 2 of the License.                      │ | ||||
| │                                                                              │ | ||||
| │ This program is distributed in the hope that it will be useful, but          │ | ||||
| │ WITHOUT ANY WARRANTY; without even the implied warranty of                   │ | ||||
| │ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU             │ | ||||
| │ General Public License for more details.                                     │ | ||||
| │                                                                              │ | ||||
| │ You should have received a copy of the GNU General Public License            │ | ||||
| │ along with this program; if not, write to the Free Software                  │ | ||||
| │ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA                │ | ||||
| │ 02110-1301 USA                                                               │ | ||||
| ╚─────────────────────────────────────────────────────────────────────────────*/ | ||||
| #include "third_party/avir/resize.h" | ||||
| namespace { | ||||
| #include "third_party/avir/avir_float4_sse.h" | ||||
| }  // namespace
 | ||||
| 
 | ||||
| struct ResizerImpl { | ||||
|   ResizerImpl() : resizer{8, 8, avir::CImageResizerParamsULR()} {} | ||||
|   avir::CImageResizer<avir::fpclass_float4> resizer; | ||||
| }; | ||||
| 
 | ||||
| void NewResizer(Resizer *resizer, int aResBitDepth, int aSrcBitDepth) { | ||||
|   FreeResizer(resizer); | ||||
|   resizer->p = new ResizerImpl(); | ||||
| } | ||||
| 
 | ||||
| void FreeResizer(Resizer *resizer) { | ||||
|   if (!resizer->p) return; | ||||
|   delete (ResizerImpl *)resizer->p; | ||||
|   resizer->p = nullptr; | ||||
| } | ||||
| 
 | ||||
| void ResizeImage(Resizer *resizer, float *Dest, int DestHeight, int DestWidth, | ||||
|                  const float *Src, int SrcHeight, int SrcWidth) { | ||||
|   ResizerImpl *impl = (ResizerImpl *)resizer->p; | ||||
|   int SrcScanLineSize = 0; | ||||
|   double ResizingStep = 0; | ||||
|   impl->resizer.resizeImage(Src, SrcWidth, SrcHeight, SrcScanLineSize, Dest, | ||||
|                             DestWidth, DestHeight, 4, ResizingStep); | ||||
| } | ||||
							
								
								
									
										17
									
								
								third_party/avir/resize.h
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										17
									
								
								third_party/avir/resize.h
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,17 @@ | |||
| #ifndef COSMOPOLITAN_THIRD_PARTY_AVIR_RESIZE_H_ | ||||
| #define COSMOPOLITAN_THIRD_PARTY_AVIR_RESIZE_H_ | ||||
| #if !(__ASSEMBLER__ + __LINKER__ + 0) | ||||
| COSMOPOLITAN_C_START_ | ||||
| 
 | ||||
| struct Resizer { | ||||
|   void *p; | ||||
| }; | ||||
| 
 | ||||
| void FreeResizer(struct Resizer *) paramsnonnull(); | ||||
| void NewResizer(struct Resizer *, int, int) paramsnonnull(); | ||||
| void ResizeImage(struct Resizer *, float *, int, int, const float *, int, int) | ||||
|     paramsnonnull(); | ||||
| 
 | ||||
| COSMOPOLITAN_C_END_ | ||||
| #endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ | ||||
| #endif /* COSMOPOLITAN_THIRD_PARTY_AVIR_RESIZE_H_ */ | ||||
							
								
								
									
										13
									
								
								third_party/blas/blas.h
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										13
									
								
								third_party/blas/blas.h
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,13 @@ | |||
| #ifndef COSMOPOLITAN_THIRD_PARTY_BLAS_BLAS_H_ | ||||
| #define COSMOPOLITAN_THIRD_PARTY_BLAS_BLAS_H_ | ||||
| #if !(__ASSEMBLER__ + __LINKER__ + 0) | ||||
| COSMOPOLITAN_C_START_ | ||||
| 
 | ||||
| int dgemm_(char *transa, char *transb, long *m, long *n, long *k, double *alpha, | ||||
|            double *A /*['N'?k:m][1≤m≤lda]*/, long *lda, | ||||
|            double *B /*['N'?k:n][1≤n≤ldb]*/, long *ldb, double *beta, | ||||
|            double *C /*[n][1≤m≤ldc]*/, long *ldc); | ||||
| 
 | ||||
| COSMOPOLITAN_C_END_ | ||||
| #endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ | ||||
| #endif /* COSMOPOLITAN_THIRD_PARTY_BLAS_BLAS_H_ */ | ||||
							
								
								
									
										59
									
								
								third_party/blas/blas.mk
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										59
									
								
								third_party/blas/blas.mk
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,59 @@ | |||
| #-*-mode:makefile-gmake;indent-tabs-mode:t;tab-width:8;coding:utf-8-*-┐
 | ||||
| #───vi: set et ft=make ts=8 tw=8 fenc=utf-8 :vi───────────────────────┘
 | ||||
| 
 | ||||
| PKGS += THIRD_PARTY_BLAS | ||||
| 
 | ||||
| THIRD_PARTY_BLAS_ARTIFACTS += THIRD_PARTY_BLAS_A | ||||
| THIRD_PARTY_BLAS = $(THIRD_PARTY_BLAS_A_DEPS) $(THIRD_PARTY_BLAS_A) | ||||
| THIRD_PARTY_BLAS_A = o/$(MODE)/third_party/blas/blas.a | ||||
| THIRD_PARTY_BLAS_A_FILES := $(wildcard third_party/blas/*) | ||||
| THIRD_PARTY_BLAS_A_HDRS = $(filter %.h,$(THIRD_PARTY_BLAS_A_FILES)) | ||||
| THIRD_PARTY_BLAS_A_SRCS_S = $(filter %.S,$(THIRD_PARTY_BLAS_A_FILES)) | ||||
| THIRD_PARTY_BLAS_A_SRCS_C = $(filter %.c,$(THIRD_PARTY_BLAS_A_FILES)) | ||||
| 
 | ||||
| THIRD_PARTY_BLAS_A_SRCS =				\
 | ||||
| 	$(THIRD_PARTY_BLAS_A_SRCS_S)			\
 | ||||
| 	$(THIRD_PARTY_BLAS_A_SRCS_C) | ||||
| 
 | ||||
| THIRD_PARTY_BLAS_A_OBJS =				\
 | ||||
| 	$(THIRD_PARTY_BLAS_A_SRCS:%=o/$(MODE)/%.zip.o)	\
 | ||||
| 	$(THIRD_PARTY_BLAS_A_SRCS_S:%.S=o/$(MODE)/%.o)	\
 | ||||
| 	$(THIRD_PARTY_BLAS_A_SRCS_C:%.c=o/$(MODE)/%.o) | ||||
| 
 | ||||
| THIRD_PARTY_BLAS_A_CHECKS =				\
 | ||||
| 	$(THIRD_PARTY_BLAS_A).pkg			\
 | ||||
| 	$(THIRD_PARTY_BLAS_A_HDRS:%=o/$(MODE)/%.ok) | ||||
| 
 | ||||
| THIRD_PARTY_BLAS_A_DIRECTDEPS =				\
 | ||||
| 	LIBC_STUBS					\
 | ||||
| 	LIBC_NEXGEN32E					\
 | ||||
| 	THIRD_PARTY_F2C | ||||
| 
 | ||||
| THIRD_PARTY_BLAS_A_DEPS :=				\
 | ||||
| 	$(call uniq,$(foreach x,$(THIRD_PARTY_BLAS_A_DIRECTDEPS),$($(x)))) | ||||
| 
 | ||||
| $(THIRD_PARTY_BLAS_A_OBJS):				\ | ||||
| 		OVERRIDE_CFLAGS +=			\
 | ||||
| 			-O3 #$(MATHEMATICAL) | ||||
| 
 | ||||
| #$(THIRD_PARTY_BLAS_A_OBJS):				\
 | ||||
| 		CC = $(CLANG) | ||||
| 
 | ||||
| $(THIRD_PARTY_BLAS_A):					\ | ||||
| 		third_party/blas/			\
 | ||||
| 		$(THIRD_PARTY_BLAS_A).pkg		\
 | ||||
| 		$(THIRD_PARTY_BLAS_A_OBJS) | ||||
| 
 | ||||
| $(THIRD_PARTY_BLAS_A).pkg:				\ | ||||
| 		$(THIRD_PARTY_BLAS_A_OBJS)		\
 | ||||
| 		$(foreach x,$(THIRD_PARTY_BLAS_A_DIRECTDEPS),$($(x)_A).pkg) | ||||
| 
 | ||||
| THIRD_PARTY_BLAS_LIBS = $(foreach x,$(THIRD_PARTY_BLAS_ARTIFACTS),$($(x))) | ||||
| THIRD_PARTY_BLAS_SRCS = $(foreach x,$(THIRD_PARTY_BLAS_ARTIFACTS),$($(x)_SRCS)) | ||||
| THIRD_PARTY_BLAS_HDRS = $(foreach x,$(THIRD_PARTY_BLAS_ARTIFACTS),$($(x)_HDRS)) | ||||
| THIRD_PARTY_BLAS_CHECKS = $(foreach x,$(THIRD_PARTY_BLAS_ARTIFACTS),$($(x)_CHECKS)) | ||||
| THIRD_PARTY_BLAS_OBJS = $(foreach x,$(THIRD_PARTY_BLAS_ARTIFACTS),$($(x)_OBJS)) | ||||
| $(THIRD_PARTY_BLAS_OBJS): $(BUILD_FILES) third_party/blas/blas.mk | ||||
| 
 | ||||
| .PHONY: o/$(MODE)/third_party/blas | ||||
| o/$(MODE)/third_party/blas: $(THIRD_PARTY_BLAS_CHECKS) | ||||
							
								
								
									
										384
									
								
								third_party/blas/dgemm.f
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										384
									
								
								third_party/blas/dgemm.f
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,384 @@ | |||
| *> \brief \b DGEMM | ||||
| * | ||||
| *  =========== DOCUMENTATION =========== | ||||
| * | ||||
| * Online html documentation available at | ||||
| *            http://www.netlib.org/lapack/explore-html/ | ||||
| * | ||||
| *  Definition: | ||||
| *  =========== | ||||
| * | ||||
| *       SUBROUTINE DGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC) | ||||
| * | ||||
| *       .. Scalar Arguments .. | ||||
| *       DOUBLE PRECISION ALPHA,BETA | ||||
| *       INTEGER K,LDA,LDB,LDC,M,N | ||||
| *       CHARACTER TRANSA,TRANSB | ||||
| *       .. | ||||
| *       .. Array Arguments .. | ||||
| *       DOUBLE PRECISION A(LDA,*),B(LDB,*),C(LDC,*) | ||||
| *       .. | ||||
| * | ||||
| * | ||||
| *> \par Purpose: | ||||
| *  ============= | ||||
| *> | ||||
| *> \verbatim | ||||
| *> | ||||
| *> DGEMM  performs one of the matrix-matrix operations | ||||
| *> | ||||
| *>    C := alpha*op( A )*op( B ) + beta*C, | ||||
| *> | ||||
| *> where  op( X ) is one of | ||||
| *> | ||||
| *>    op( X ) = X   or   op( X ) = X**T, | ||||
| *> | ||||
| *> alpha and beta are scalars, and A, B and C are matrices, with op( A ) | ||||
| *> an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix. | ||||
| *> \endverbatim | ||||
| * | ||||
| *  Arguments: | ||||
| *  ========== | ||||
| * | ||||
| *> \param[in] TRANSA | ||||
| *> \verbatim | ||||
| *>          TRANSA is CHARACTER*1 | ||||
| *>           On entry, TRANSA specifies the form of op( A ) to be used in | ||||
| *>           the matrix multiplication as follows: | ||||
| *> | ||||
| *>              TRANSA = 'N' or 'n',  op( A ) = A. | ||||
| *> | ||||
| *>              TRANSA = 'T' or 't',  op( A ) = A**T. | ||||
| *> | ||||
| *>              TRANSA = 'C' or 'c',  op( A ) = A**T. | ||||
| *> \endverbatim | ||||
| *> | ||||
| *> \param[in] TRANSB | ||||
| *> \verbatim | ||||
| *>          TRANSB is CHARACTER*1 | ||||
| *>           On entry, TRANSB specifies the form of op( B ) to be used in | ||||
| *>           the matrix multiplication as follows: | ||||
| *> | ||||
| *>              TRANSB = 'N' or 'n',  op( B ) = B. | ||||
| *> | ||||
| *>              TRANSB = 'T' or 't',  op( B ) = B**T. | ||||
| *> | ||||
| *>              TRANSB = 'C' or 'c',  op( B ) = B**T. | ||||
| *> \endverbatim | ||||
| *> | ||||
| *> \param[in] M | ||||
| *> \verbatim | ||||
| *>          M is INTEGER | ||||
| *>           On entry,  M  specifies  the number  of rows  of the  matrix | ||||
| *>           op( A )  and of the  matrix  C.  M  must  be at least  zero. | ||||
| *> \endverbatim | ||||
| *> | ||||
| *> \param[in] N | ||||
| *> \verbatim | ||||
| *>          N is INTEGER | ||||
| *>           On entry,  N  specifies the number  of columns of the matrix | ||||
| *>           op( B ) and the number of columns of the matrix C. N must be | ||||
| *>           at least zero. | ||||
| *> \endverbatim | ||||
| *> | ||||
| *> \param[in] K | ||||
| *> \verbatim | ||||
| *>          K is INTEGER | ||||
| *>           On entry,  K  specifies  the number of columns of the matrix | ||||
| *>           op( A ) and the number of rows of the matrix op( B ). K must | ||||
| *>           be at least  zero. | ||||
| *> \endverbatim | ||||
| *> | ||||
| *> \param[in] ALPHA | ||||
| *> \verbatim | ||||
| *>          ALPHA is DOUBLE PRECISION. | ||||
| *>           On entry, ALPHA specifies the scalar alpha. | ||||
| *> \endverbatim | ||||
| *> | ||||
| *> \param[in] A | ||||
| *> \verbatim | ||||
| *>          A is DOUBLE PRECISION array, dimension ( LDA, ka ), where ka is | ||||
| *>           k  when  TRANSA = 'N' or 'n',  and is  m  otherwise. | ||||
| *>           Before entry with  TRANSA = 'N' or 'n',  the leading  m by k | ||||
| *>           part of the array  A  must contain the matrix  A,  otherwise | ||||
| *>           the leading  k by m  part of the array  A  must contain  the | ||||
| *>           matrix A. | ||||
| *> \endverbatim | ||||
| *> | ||||
| *> \param[in] LDA | ||||
| *> \verbatim | ||||
| *>          LDA is INTEGER | ||||
| *>           On entry, LDA specifies the first dimension of A as declared | ||||
| *>           in the calling (sub) program. When  TRANSA = 'N' or 'n' then | ||||
| *>           LDA must be at least  max( 1, m ), otherwise  LDA must be at | ||||
| *>           least  max( 1, k ). | ||||
| *> \endverbatim | ||||
| *> | ||||
| *> \param[in] B | ||||
| *> \verbatim | ||||
| *>          B is DOUBLE PRECISION array, dimension ( LDB, kb ), where kb is | ||||
| *>           n  when  TRANSB = 'N' or 'n',  and is  k  otherwise. | ||||
| *>           Before entry with  TRANSB = 'N' or 'n',  the leading  k by n | ||||
| *>           part of the array  B  must contain the matrix  B,  otherwise | ||||
| *>           the leading  n by k  part of the array  B  must contain  the | ||||
| *>           matrix B. | ||||
| *> \endverbatim | ||||
| *> | ||||
| *> \param[in] LDB | ||||
| *> \verbatim | ||||
| *>          LDB is INTEGER | ||||
| *>           On entry, LDB specifies the first dimension of B as declared | ||||
| *>           in the calling (sub) program. When  TRANSB = 'N' or 'n' then | ||||
| *>           LDB must be at least  max( 1, k ), otherwise  LDB must be at | ||||
| *>           least  max( 1, n ). | ||||
| *> \endverbatim | ||||
| *> | ||||
| *> \param[in] BETA | ||||
| *> \verbatim | ||||
| *>          BETA is DOUBLE PRECISION. | ||||
| *>           On entry,  BETA  specifies the scalar  beta.  When  BETA  is | ||||
| *>           supplied as zero then C need not be set on input. | ||||
| *> \endverbatim | ||||
| *> | ||||
| *> \param[in,out] C | ||||
| *> \verbatim | ||||
| *>          C is DOUBLE PRECISION array, dimension ( LDC, N ) | ||||
| *>           Before entry, the leading  m by n  part of the array  C must | ||||
| *>           contain the matrix  C,  except when  beta  is zero, in which | ||||
| *>           case C need not be set on entry. | ||||
| *>           On exit, the array  C  is overwritten by the  m by n  matrix | ||||
| *>           ( alpha*op( A )*op( B ) + beta*C ). | ||||
| *> \endverbatim | ||||
| *> | ||||
| *> \param[in] LDC | ||||
| *> \verbatim | ||||
| *>          LDC is INTEGER | ||||
| *>           On entry, LDC specifies the first dimension of C as declared | ||||
| *>           in  the  calling  (sub)  program.   LDC  must  be  at  least | ||||
| *>           max( 1, m ). | ||||
| *> \endverbatim | ||||
| * | ||||
| *  Authors: | ||||
| *  ======== | ||||
| * | ||||
| *> \author Univ. of Tennessee | ||||
| *> \author Univ. of California Berkeley | ||||
| *> \author Univ. of Colorado Denver | ||||
| *> \author NAG Ltd. | ||||
| * | ||||
| *> \date December 2016 | ||||
| * | ||||
| *> \ingroup double_blas_level3 | ||||
| * | ||||
| *> \par Further Details: | ||||
| *  ===================== | ||||
| *> | ||||
| *> \verbatim | ||||
| *> | ||||
| *>  Level 3 Blas routine. | ||||
| *> | ||||
| *>  -- Written on 8-February-1989. | ||||
| *>     Jack Dongarra, Argonne National Laboratory. | ||||
| *>     Iain Duff, AERE Harwell. | ||||
| *>     Jeremy Du Croz, Numerical Algorithms Group Ltd. | ||||
| *>     Sven Hammarling, Numerical Algorithms Group Ltd. | ||||
| *> \endverbatim | ||||
| *> | ||||
| *  ===================================================================== | ||||
|       SUBROUTINE DGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC) | ||||
| * | ||||
| *  -- Reference BLAS level3 routine (version 3.7.0) -- | ||||
| *  -- Reference BLAS is a software package provided by Univ. of Tennessee,    -- | ||||
| *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- | ||||
| *     December 2016 | ||||
| * | ||||
| *     .. Scalar Arguments .. | ||||
|       DOUBLE PRECISION ALPHA,BETA | ||||
|       INTEGER K,LDA,LDB,LDC,M,N | ||||
|       CHARACTER TRANSA,TRANSB | ||||
| *     .. | ||||
| *     .. Array Arguments .. | ||||
|       DOUBLE PRECISION A(LDA,*),B(LDB,*),C(LDC,*) | ||||
| *     .. | ||||
| * | ||||
| *  ===================================================================== | ||||
| * | ||||
| *     .. External Functions .. | ||||
|       LOGICAL LSAME | ||||
|       EXTERNAL LSAME | ||||
| *     .. | ||||
| *     .. External Subroutines .. | ||||
|       EXTERNAL XERBLA | ||||
| *     .. | ||||
| *     .. Intrinsic Functions .. | ||||
|       INTRINSIC MAX | ||||
| *     .. | ||||
| *     .. Local Scalars .. | ||||
|       DOUBLE PRECISION TEMP | ||||
|       INTEGER I,INFO,J,L,NCOLA,NROWA,NROWB | ||||
|       LOGICAL NOTA,NOTB | ||||
| *     .. | ||||
| *     .. Parameters .. | ||||
|       DOUBLE PRECISION ONE,ZERO | ||||
|       PARAMETER (ONE=1.0D+0,ZERO=0.0D+0) | ||||
| *     .. | ||||
| * | ||||
| *     Set  NOTA  and  NOTB  as  true if  A  and  B  respectively are not | ||||
| *     transposed and set  NROWA, NCOLA and  NROWB  as the number of rows | ||||
| *     and  columns of  A  and the  number of  rows  of  B  respectively. | ||||
| * | ||||
|       NOTA = LSAME(TRANSA,'N') | ||||
|       NOTB = LSAME(TRANSB,'N') | ||||
|       IF (NOTA) THEN | ||||
|           NROWA = M | ||||
|           NCOLA = K | ||||
|       ELSE | ||||
|           NROWA = K | ||||
|           NCOLA = M | ||||
|       END IF | ||||
|       IF (NOTB) THEN | ||||
|           NROWB = K | ||||
|       ELSE | ||||
|           NROWB = N | ||||
|       END IF | ||||
| * | ||||
| *     Test the input parameters. | ||||
| * | ||||
|       INFO = 0 | ||||
|       IF ((.NOT.NOTA) .AND. (.NOT.LSAME(TRANSA,'C')) .AND. | ||||
|      +    (.NOT.LSAME(TRANSA,'T'))) THEN | ||||
|           INFO = 1 | ||||
|       ELSE IF ((.NOT.NOTB) .AND. (.NOT.LSAME(TRANSB,'C')) .AND. | ||||
|      +         (.NOT.LSAME(TRANSB,'T'))) THEN | ||||
|           INFO = 2 | ||||
|       ELSE IF (M.LT.0) THEN | ||||
|           INFO = 3 | ||||
|       ELSE IF (N.LT.0) THEN | ||||
|           INFO = 4 | ||||
|       ELSE IF (K.LT.0) THEN | ||||
|           INFO = 5 | ||||
|       ELSE IF (LDA.LT.MAX(1,NROWA)) THEN | ||||
|           INFO = 8 | ||||
|       ELSE IF (LDB.LT.MAX(1,NROWB)) THEN | ||||
|           INFO = 10 | ||||
|       ELSE IF (LDC.LT.MAX(1,M)) THEN | ||||
|           INFO = 13 | ||||
|       END IF | ||||
|       IF (INFO.NE.0) THEN | ||||
|           CALL XERBLA('DGEMM ',INFO) | ||||
|           RETURN | ||||
|       END IF | ||||
| * | ||||
| *     Quick return if possible. | ||||
| * | ||||
|       IF ((M.EQ.0) .OR. (N.EQ.0) .OR. | ||||
|      +    (((ALPHA.EQ.ZERO).OR. (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN | ||||
| * | ||||
| *     And if  alpha.eq.zero. | ||||
| * | ||||
|       IF (ALPHA.EQ.ZERO) THEN | ||||
|           IF (BETA.EQ.ZERO) THEN | ||||
|               DO 20 J = 1,N | ||||
|                   DO 10 I = 1,M | ||||
|                       C(I,J) = ZERO | ||||
|    10             CONTINUE | ||||
|    20         CONTINUE | ||||
|           ELSE | ||||
|               DO 40 J = 1,N | ||||
|                   DO 30 I = 1,M | ||||
|                       C(I,J) = BETA*C(I,J) | ||||
|    30             CONTINUE | ||||
|    40         CONTINUE | ||||
|           END IF | ||||
|           RETURN | ||||
|       END IF | ||||
| * | ||||
| *     Start the operations. | ||||
| * | ||||
|       IF (NOTB) THEN | ||||
|           IF (NOTA) THEN | ||||
| * | ||||
| *           Form  C := alpha*A*B + beta*C. | ||||
| * | ||||
|               DO 90 J = 1,N | ||||
|                   IF (BETA.EQ.ZERO) THEN | ||||
|                       DO 50 I = 1,M | ||||
|                           C(I,J) = ZERO | ||||
|    50                 CONTINUE | ||||
|                   ELSE IF (BETA.NE.ONE) THEN | ||||
|                       DO 60 I = 1,M | ||||
|                           C(I,J) = BETA*C(I,J) | ||||
|    60                 CONTINUE | ||||
|                   END IF | ||||
|                   DO 80 L = 1,K | ||||
|                       TEMP = ALPHA*B(L,J) | ||||
|                       DO 70 I = 1,M | ||||
|                           C(I,J) = C(I,J) + TEMP*A(I,L) | ||||
|    70                 CONTINUE | ||||
|    80             CONTINUE | ||||
|    90         CONTINUE | ||||
|           ELSE | ||||
| * | ||||
| *           Form  C := alpha*A**T*B + beta*C | ||||
| * | ||||
|               DO 120 J = 1,N | ||||
|                   DO 110 I = 1,M | ||||
|                       TEMP = ZERO | ||||
|                       DO 100 L = 1,K | ||||
|                           TEMP = TEMP + A(L,I)*B(L,J) | ||||
|   100                 CONTINUE | ||||
|                       IF (BETA.EQ.ZERO) THEN | ||||
|                           C(I,J) = ALPHA*TEMP | ||||
|                       ELSE | ||||
|                           C(I,J) = ALPHA*TEMP + BETA*C(I,J) | ||||
|                       END IF | ||||
|   110             CONTINUE | ||||
|   120         CONTINUE | ||||
|           END IF | ||||
|       ELSE | ||||
|           IF (NOTA) THEN | ||||
| * | ||||
| *           Form  C := alpha*A*B**T + beta*C | ||||
| * | ||||
|               DO 170 J = 1,N | ||||
|                   IF (BETA.EQ.ZERO) THEN | ||||
|                       DO 130 I = 1,M | ||||
|                           C(I,J) = ZERO | ||||
|   130                 CONTINUE | ||||
|                   ELSE IF (BETA.NE.ONE) THEN | ||||
|                       DO 140 I = 1,M | ||||
|                           C(I,J) = BETA*C(I,J) | ||||
|   140                 CONTINUE | ||||
|                   END IF | ||||
|                   DO 160 L = 1,K | ||||
|                       TEMP = ALPHA*B(J,L) | ||||
|                       DO 150 I = 1,M | ||||
|                           C(I,J) = C(I,J) + TEMP*A(I,L) | ||||
|   150                 CONTINUE | ||||
|   160             CONTINUE | ||||
|   170         CONTINUE | ||||
|           ELSE | ||||
| * | ||||
| *           Form  C := alpha*A**T*B**T + beta*C | ||||
| * | ||||
|               DO 200 J = 1,N | ||||
|                   DO 190 I = 1,M | ||||
|                       TEMP = ZERO | ||||
|                       DO 180 L = 1,K | ||||
|                           TEMP = TEMP + A(L,I)*B(J,L) | ||||
|   180                 CONTINUE | ||||
|                       IF (BETA.EQ.ZERO) THEN | ||||
|                           C(I,J) = ALPHA*TEMP | ||||
|                       ELSE | ||||
|                           C(I,J) = ALPHA*TEMP + BETA*C(I,J) | ||||
|                       END IF | ||||
|   190             CONTINUE | ||||
|   200         CONTINUE | ||||
|           END IF | ||||
|       END IF | ||||
| * | ||||
|       RETURN | ||||
| * | ||||
| *     End of DGEMM . | ||||
| * | ||||
|       END | ||||
							
								
								
									
										1102
									
								
								third_party/blas/dgemm_.S
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										1102
									
								
								third_party/blas/dgemm_.S
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
							
								
								
									
										155
									
								
								third_party/blas/lsame.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										155
									
								
								third_party/blas/lsame.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,155 @@ | |||
| /* lsame.f -- translated by f2c (version 20191129).
 | ||||
|    You must link the resulting object file with libf2c: | ||||
|         on Microsoft Windows system, link with libf2c.lib; | ||||
|         on Linux or Unix systems, link with .../path/to/libf2c.a -lm | ||||
|         or, if you install libf2c.a in a standard place, with -lf2c -lm | ||||
|         -- in that order, at the end of the command line, as in | ||||
|                 cc *.o -lf2c -lm | ||||
|         Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., | ||||
| 
 | ||||
|                 http://www.netlib.org/f2c/libf2c.zip
 | ||||
| */ | ||||
| 
 | ||||
| #include "third_party/f2c/f2c.h" | ||||
| 
 | ||||
| /* > \brief \b LSAME */ | ||||
| 
 | ||||
| /*  =========== DOCUMENTATION =========== */ | ||||
| 
 | ||||
| /* Online html documentation available at */ | ||||
| /*            http://www.netlib.org/lapack/explore-html/ */ | ||||
| 
 | ||||
| /*  Definition: */ | ||||
| /*  =========== */ | ||||
| 
 | ||||
| /*       LOGICAL FUNCTION LSAME(CA,CB) */ | ||||
| 
 | ||||
| /*       .. Scalar Arguments .. */ | ||||
| /*       CHARACTER CA,CB */ | ||||
| /*       .. */ | ||||
| 
 | ||||
| /* > \par Purpose: */ | ||||
| /*  ============= */ | ||||
| /* > */ | ||||
| /* > \verbatim */ | ||||
| /* > */ | ||||
| /* > LSAME returns .TRUE. if CA is the same letter as CB regardless of */ | ||||
| /* > case. */ | ||||
| /* > \endverbatim */ | ||||
| 
 | ||||
| /*  Arguments: */ | ||||
| /*  ========== */ | ||||
| 
 | ||||
| /* > \param[in] CA */ | ||||
| /* > \verbatim */ | ||||
| /* >          CA is CHARACTER*1 */ | ||||
| /* > \endverbatim */ | ||||
| /* > */ | ||||
| /* > \param[in] CB */ | ||||
| /* > \verbatim */ | ||||
| /* >          CB is CHARACTER*1 */ | ||||
| /* >          CA and CB specify the single characters to be compared. */ | ||||
| /* > \endverbatim */ | ||||
| 
 | ||||
| /*  Authors: */ | ||||
| /*  ======== */ | ||||
| 
 | ||||
| /* > \author Univ. of Tennessee */ | ||||
| /* > \author Univ. of California Berkeley */ | ||||
| /* > \author Univ. of Colorado Denver */ | ||||
| /* > \author NAG Ltd. */ | ||||
| 
 | ||||
| /* > \date December 2016 */ | ||||
| 
 | ||||
| /* > \ingroup aux_blas */ | ||||
| 
 | ||||
| /*  ===================================================================== */ | ||||
| logical lsame_(char *ca, char *cb) { | ||||
|   /* System generated locals */ | ||||
|   logical ret_val; | ||||
| 
 | ||||
|   /* Local variables */ | ||||
|   static integer inta, intb, zcode; | ||||
| 
 | ||||
|   /*  -- Reference BLAS level1 routine (version 3.1) -- */ | ||||
|   /*  -- Reference BLAS is a software package provided by Univ. of Tennessee,
 | ||||
|    * -- */ | ||||
|   /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 | ||||
|    */ | ||||
|   /*     December 2016 */ | ||||
| 
 | ||||
|   /*     .. Scalar Arguments .. */ | ||||
|   /*     .. */ | ||||
| 
 | ||||
|   /* ===================================================================== */ | ||||
| 
 | ||||
|   /*     .. Intrinsic Functions .. */ | ||||
|   /*     .. */ | ||||
|   /*     .. Local Scalars .. */ | ||||
|   /*     .. */ | ||||
| 
 | ||||
|   /*     Test if the characters are equal */ | ||||
| 
 | ||||
|   ret_val = *(unsigned char *)ca == *(unsigned char *)cb; | ||||
|   if (ret_val) { | ||||
|     return ret_val; | ||||
|   } | ||||
| 
 | ||||
|   /*     Now test for equivalence if both characters are alphabetic. */ | ||||
| 
 | ||||
|   zcode = 'Z'; | ||||
| 
 | ||||
|   /*     Use 'Z' rather than 'A' so that ASCII can be detected on Prime */ | ||||
|   /*     machines, on which ICHAR returns a value with bit 8 set. */ | ||||
|   /*     ICHAR('A') on Prime machines returns 193 which is the same as */ | ||||
|   /*     ICHAR('A') on an EBCDIC machine. */ | ||||
| 
 | ||||
|   inta = *(unsigned char *)ca; | ||||
|   intb = *(unsigned char *)cb; | ||||
| 
 | ||||
|   if (zcode == 90 || zcode == 122) { | ||||
| 
 | ||||
|     /*        ASCII is assumed - ZCODE is the ASCII code of either lower or */ | ||||
|     /*        upper case 'Z'. */ | ||||
| 
 | ||||
|     if (inta >= 97 && inta <= 122) { | ||||
|       inta += -32; | ||||
|     } | ||||
|     if (intb >= 97 && intb <= 122) { | ||||
|       intb += -32; | ||||
|     } | ||||
| 
 | ||||
|   } else if (zcode == 233 || zcode == 169) { | ||||
| 
 | ||||
|     /*        EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or */ | ||||
|     /*        upper case 'Z'. */ | ||||
| 
 | ||||
|     if (inta >= 129 && inta <= 137 || inta >= 145 && inta <= 153 || | ||||
|         inta >= 162 && inta <= 169) { | ||||
|       inta += 64; | ||||
|     } | ||||
|     if (intb >= 129 && intb <= 137 || intb >= 145 && intb <= 153 || | ||||
|         intb >= 162 && intb <= 169) { | ||||
|       intb += 64; | ||||
|     } | ||||
| 
 | ||||
|   } else if (zcode == 218 || zcode == 250) { | ||||
| 
 | ||||
|     /*        ASCII is assumed, on Prime machines - ZCODE is the ASCII code */ | ||||
|     /*        plus 128 of either lower or upper case 'Z'. */ | ||||
| 
 | ||||
|     if (inta >= 225 && inta <= 250) { | ||||
|       inta += -32; | ||||
|     } | ||||
|     if (intb >= 225 && intb <= 250) { | ||||
|       intb += -32; | ||||
|     } | ||||
|   } | ||||
|   ret_val = inta == intb; | ||||
| 
 | ||||
|   /*     RETURN */ | ||||
| 
 | ||||
|   /*     End of LSAME */ | ||||
| 
 | ||||
|   return ret_val; | ||||
| } /* lsame_ */ | ||||
							
								
								
									
										125
									
								
								third_party/blas/lsame.f
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										125
									
								
								third_party/blas/lsame.f
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,125 @@ | |||
| *> \brief \b LSAME | ||||
| * | ||||
| *  =========== DOCUMENTATION =========== | ||||
| * | ||||
| * Online html documentation available at | ||||
| *            http://www.netlib.org/lapack/explore-html/ | ||||
| * | ||||
| *  Definition: | ||||
| *  =========== | ||||
| * | ||||
| *       LOGICAL FUNCTION LSAME(CA,CB) | ||||
| * | ||||
| *       .. Scalar Arguments .. | ||||
| *       CHARACTER CA,CB | ||||
| *       .. | ||||
| * | ||||
| * | ||||
| *> \par Purpose: | ||||
| *  ============= | ||||
| *> | ||||
| *> \verbatim | ||||
| *> | ||||
| *> LSAME returns .TRUE. if CA is the same letter as CB regardless of | ||||
| *> case. | ||||
| *> \endverbatim | ||||
| * | ||||
| *  Arguments: | ||||
| *  ========== | ||||
| * | ||||
| *> \param[in] CA | ||||
| *> \verbatim | ||||
| *>          CA is CHARACTER*1 | ||||
| *> \endverbatim | ||||
| *> | ||||
| *> \param[in] CB | ||||
| *> \verbatim | ||||
| *>          CB is CHARACTER*1 | ||||
| *>          CA and CB specify the single characters to be compared. | ||||
| *> \endverbatim | ||||
| * | ||||
| *  Authors: | ||||
| *  ======== | ||||
| * | ||||
| *> \author Univ. of Tennessee | ||||
| *> \author Univ. of California Berkeley | ||||
| *> \author Univ. of Colorado Denver | ||||
| *> \author NAG Ltd. | ||||
| * | ||||
| *> \date December 2016 | ||||
| * | ||||
| *> \ingroup aux_blas | ||||
| * | ||||
| *  ===================================================================== | ||||
|       LOGICAL FUNCTION LSAME(CA,CB) | ||||
| * | ||||
| *  -- Reference BLAS level1 routine (version 3.1) -- | ||||
| *  -- Reference BLAS is a software package provided by Univ. of Tennessee,    -- | ||||
| *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- | ||||
| *     December 2016 | ||||
| * | ||||
| *     .. Scalar Arguments .. | ||||
|       CHARACTER CA,CB | ||||
| *     .. | ||||
| * | ||||
| * ===================================================================== | ||||
| * | ||||
| *     .. Intrinsic Functions .. | ||||
|       INTRINSIC ICHAR | ||||
| *     .. | ||||
| *     .. Local Scalars .. | ||||
|       INTEGER INTA,INTB,ZCODE | ||||
| *     .. | ||||
| * | ||||
| *     Test if the characters are equal | ||||
| * | ||||
|       LSAME = CA .EQ. CB | ||||
|       IF (LSAME) RETURN | ||||
| * | ||||
| *     Now test for equivalence if both characters are alphabetic. | ||||
| * | ||||
|       ZCODE = ICHAR('Z') | ||||
| * | ||||
| *     Use 'Z' rather than 'A' so that ASCII can be detected on Prime | ||||
| *     machines, on which ICHAR returns a value with bit 8 set. | ||||
| *     ICHAR('A') on Prime machines returns 193 which is the same as | ||||
| *     ICHAR('A') on an EBCDIC machine. | ||||
| * | ||||
|       INTA = ICHAR(CA) | ||||
|       INTB = ICHAR(CB) | ||||
| * | ||||
|       IF (ZCODE.EQ.90 .OR. ZCODE.EQ.122) THEN | ||||
| * | ||||
| *        ASCII is assumed - ZCODE is the ASCII code of either lower or | ||||
| *        upper case 'Z'. | ||||
| * | ||||
|           IF (INTA.GE.97 .AND. INTA.LE.122) INTA = INTA - 32 | ||||
|           IF (INTB.GE.97 .AND. INTB.LE.122) INTB = INTB - 32 | ||||
| * | ||||
|       ELSE IF (ZCODE.EQ.233 .OR. ZCODE.EQ.169) THEN | ||||
| * | ||||
| *        EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or | ||||
| *        upper case 'Z'. | ||||
| * | ||||
|           IF (INTA.GE.129 .AND. INTA.LE.137 .OR. | ||||
|      +        INTA.GE.145 .AND. INTA.LE.153 .OR. | ||||
|      +        INTA.GE.162 .AND. INTA.LE.169) INTA = INTA + 64 | ||||
|           IF (INTB.GE.129 .AND. INTB.LE.137 .OR. | ||||
|      +        INTB.GE.145 .AND. INTB.LE.153 .OR. | ||||
|      +        INTB.GE.162 .AND. INTB.LE.169) INTB = INTB + 64 | ||||
| * | ||||
|       ELSE IF (ZCODE.EQ.218 .OR. ZCODE.EQ.250) THEN | ||||
| * | ||||
| *        ASCII is assumed, on Prime machines - ZCODE is the ASCII code | ||||
| *        plus 128 of either lower or upper case 'Z'. | ||||
| * | ||||
|           IF (INTA.GE.225 .AND. INTA.LE.250) INTA = INTA - 32 | ||||
|           IF (INTB.GE.225 .AND. INTB.LE.250) INTB = INTB - 32 | ||||
|       END IF | ||||
|       LSAME = INTA .EQ. INTB | ||||
| * | ||||
| *     RETURN | ||||
| * | ||||
| *     End of LSAME | ||||
| * | ||||
|       END | ||||
							
								
								
									
										121
									
								
								third_party/blas/xerbla.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										121
									
								
								third_party/blas/xerbla.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,121 @@ | |||
| /* xerbla.f -- translated by f2c (version 20191129).
 | ||||
|    You must link the resulting object file with libf2c: | ||||
|         on Microsoft Windows system, link with libf2c.lib; | ||||
|         on Linux or Unix systems, link with .../path/to/libf2c.a -lm | ||||
|         or, if you install libf2c.a in a standard place, with -lf2c -lm | ||||
|         -- in that order, at the end of the command line, as in | ||||
|                 cc *.o -lf2c -lm | ||||
|         Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., | ||||
| 
 | ||||
|                 http://www.netlib.org/f2c/libf2c.zip
 | ||||
| */ | ||||
| 
 | ||||
| #include "third_party/f2c/f2c.h" | ||||
| 
 | ||||
| /* Table of constant values */ | ||||
| 
 | ||||
| static integer c__1 = 1; | ||||
| 
 | ||||
| /* > \brief \b XERBLA */ | ||||
| 
 | ||||
| /*  =========== DOCUMENTATION =========== */ | ||||
| 
 | ||||
| /* Online html documentation available at */ | ||||
| /*            http://www.netlib.org/lapack/explore-html/ */ | ||||
| 
 | ||||
| /*  Definition: */ | ||||
| /*  =========== */ | ||||
| 
 | ||||
| /*       SUBROUTINE XERBLA( SRNAME, INFO ) */ | ||||
| 
 | ||||
| /*       .. Scalar Arguments .. */ | ||||
| /*       CHARACTER*(*)      SRNAME */ | ||||
| /*       INTEGER            INFO */ | ||||
| /*       .. */ | ||||
| 
 | ||||
| /* > \par Purpose: */ | ||||
| /*  ============= */ | ||||
| /* > */ | ||||
| /* > \verbatim */ | ||||
| /* > */ | ||||
| /* > XERBLA  is an error handler for the LAPACK routines. */ | ||||
| /* > It is called by an LAPACK routine if an input parameter has an */ | ||||
| /* > invalid value.  A message is printed and execution stops. */ | ||||
| /* > */ | ||||
| /* > Installers may consider modifying the STOP statement in order to */ | ||||
| /* > call system-specific exception-handling facilities. */ | ||||
| /* > \endverbatim */ | ||||
| 
 | ||||
| /*  Arguments: */ | ||||
| /*  ========== */ | ||||
| 
 | ||||
| /* > \param[in] SRNAME */ | ||||
| /* > \verbatim */ | ||||
| /* >          SRNAME is CHARACTER*(*) */ | ||||
| /* >          The name of the routine which called XERBLA. */ | ||||
| /* > \endverbatim */ | ||||
| /* > */ | ||||
| /* > \param[in] INFO */ | ||||
| /* > \verbatim */ | ||||
| /* >          INFO is INTEGER */ | ||||
| /* >          The position of the invalid parameter in the parameter list */ | ||||
| /* >          of the calling routine. */ | ||||
| /* > \endverbatim */ | ||||
| 
 | ||||
| /*  Authors: */ | ||||
| /*  ======== */ | ||||
| 
 | ||||
| /* > \author Univ. of Tennessee */ | ||||
| /* > \author Univ. of California Berkeley */ | ||||
| /* > \author Univ. of Colorado Denver */ | ||||
| /* > \author NAG Ltd. */ | ||||
| 
 | ||||
| /* > \date December 2016 */ | ||||
| 
 | ||||
| /* > \ingroup aux_blas */ | ||||
| 
 | ||||
| /*  ===================================================================== */ | ||||
| /* Subroutine */ int xerbla_(char *srname, integer *info, ftnlen srname_len) { | ||||
|   /* Format strings */ | ||||
|   static char fmt_9999[] = | ||||
|       "(\002 ** On entry to \002,a,\002 parameter num" | ||||
|       "ber \002,i2,\002 had \002,\002an illegal value\002)"; | ||||
| 
 | ||||
|   /* Builtin functions */ | ||||
|   integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); | ||||
|   /* Subroutine */ int s_stop(char *, ftnlen); | ||||
| 
 | ||||
|   /* Local variables */ | ||||
|   extern doublereal trmlen_(char *, ftnlen); | ||||
| 
 | ||||
|   /* Fortran I/O blocks */ | ||||
|   static cilist io___1 = {0, 6, 0, fmt_9999, 0}; | ||||
| 
 | ||||
|   /*  -- Reference BLAS level1 routine (version 3.7.0) -- */ | ||||
|   /*  -- Reference BLAS is a software package provided by Univ. of Tennessee,
 | ||||
|    * -- */ | ||||
|   /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 | ||||
|    */ | ||||
|   /*     December 2016 */ | ||||
| 
 | ||||
|   /*     .. Scalar Arguments .. */ | ||||
|   /*     .. */ | ||||
| 
 | ||||
|   /* ===================================================================== */ | ||||
| 
 | ||||
|   /*     .. Intrinsic Functions .. */ | ||||
|   /*     INTRINSIC          LEN_TRIM */ | ||||
|   /*     .. */ | ||||
|   /*     .. Executable Statements .. */ | ||||
| 
 | ||||
|   s_wsfe(&io___1); | ||||
|   do_fio(&c__1, srname, (integer)trmlen_(srname, srname_len)); | ||||
|   do_fio(&c__1, (char *)&(*info), (ftnlen)sizeof(integer)); | ||||
|   e_wsfe(); | ||||
| 
 | ||||
|   s_stop("", (ftnlen)0); | ||||
| 
 | ||||
|   /*     End of XERBLA */ | ||||
| 
 | ||||
|   return 0; | ||||
| } /* xerbla_ */ | ||||
							
								
								
									
										89
									
								
								third_party/blas/xerbla.f
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										89
									
								
								third_party/blas/xerbla.f
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,89 @@ | |||
| *> \brief \b XERBLA | ||||
| * | ||||
| *  =========== DOCUMENTATION =========== | ||||
| * | ||||
| * Online html documentation available at | ||||
| *            http://www.netlib.org/lapack/explore-html/ | ||||
| * | ||||
| *  Definition: | ||||
| *  =========== | ||||
| * | ||||
| *       SUBROUTINE XERBLA( SRNAME, INFO ) | ||||
| * | ||||
| *       .. Scalar Arguments .. | ||||
| *       CHARACTER*(*)      SRNAME | ||||
| *       INTEGER            INFO | ||||
| *       .. | ||||
| * | ||||
| * | ||||
| *> \par Purpose: | ||||
| *  ============= | ||||
| *> | ||||
| *> \verbatim | ||||
| *> | ||||
| *> XERBLA  is an error handler for the LAPACK routines. | ||||
| *> It is called by an LAPACK routine if an input parameter has an | ||||
| *> invalid value.  A message is printed and execution stops. | ||||
| *> | ||||
| *> Installers may consider modifying the STOP statement in order to | ||||
| *> call system-specific exception-handling facilities. | ||||
| *> \endverbatim | ||||
| * | ||||
| *  Arguments: | ||||
| *  ========== | ||||
| * | ||||
| *> \param[in] SRNAME | ||||
| *> \verbatim | ||||
| *>          SRNAME is CHARACTER*(*) | ||||
| *>          The name of the routine which called XERBLA. | ||||
| *> \endverbatim | ||||
| *> | ||||
| *> \param[in] INFO | ||||
| *> \verbatim | ||||
| *>          INFO is INTEGER | ||||
| *>          The position of the invalid parameter in the parameter list | ||||
| *>          of the calling routine. | ||||
| *> \endverbatim | ||||
| * | ||||
| *  Authors: | ||||
| *  ======== | ||||
| * | ||||
| *> \author Univ. of Tennessee | ||||
| *> \author Univ. of California Berkeley | ||||
| *> \author Univ. of Colorado Denver | ||||
| *> \author NAG Ltd. | ||||
| * | ||||
| *> \date December 2016 | ||||
| * | ||||
| *> \ingroup aux_blas | ||||
| * | ||||
| *  ===================================================================== | ||||
|       SUBROUTINE XERBLA( SRNAME, INFO ) | ||||
| * | ||||
| *  -- Reference BLAS level1 routine (version 3.7.0) -- | ||||
| *  -- Reference BLAS is a software package provided by Univ. of Tennessee,    -- | ||||
| *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- | ||||
| *     December 2016 | ||||
| * | ||||
| *     .. Scalar Arguments .. | ||||
|       CHARACTER*(*)      SRNAME | ||||
|       INTEGER            INFO | ||||
| *     .. | ||||
| * | ||||
| * ===================================================================== | ||||
| * | ||||
| *     .. Intrinsic Functions .. | ||||
| *     INTRINSIC          LEN_TRIM | ||||
| *     .. | ||||
| *     .. Executable Statements .. | ||||
| * | ||||
|       WRITE( *, FMT = 9999 )SRNAME( 1:TRMLEN( SRNAME ) ), INFO | ||||
| * | ||||
|       STOP | ||||
| * | ||||
|  9999 FORMAT( ' ** On entry to ', A, ' parameter number ', I2, ' had ', | ||||
|      $      'an illegal value' ) | ||||
| * | ||||
| *     End of XERBLA | ||||
| * | ||||
|       END | ||||
							
								
								
									
										125
									
								
								third_party/bzip2/.clang-format
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										125
									
								
								third_party/bzip2/.clang-format
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,125 @@ | |||
| --- | ||||
| Language:        Cpp | ||||
| # BasedOnStyle:  WebKit | ||||
| AccessModifierOffset: -4 | ||||
| AlignAfterOpenBracket: DontAlign | ||||
| AlignConsecutiveAssignments: false | ||||
| AlignConsecutiveDeclarations: false | ||||
| AlignEscapedNewlines: Right | ||||
| AlignOperands:   false | ||||
| AlignTrailingComments: false | ||||
| AllowAllArgumentsOnNextLine: true | ||||
| AllowAllConstructorInitializersOnNextLine: true | ||||
| AllowAllParametersOfDeclarationOnNextLine: true | ||||
| AllowShortBlocksOnASingleLine: false | ||||
| AllowShortCaseLabelsOnASingleLine: false | ||||
| AllowShortFunctionsOnASingleLine: All | ||||
| AllowShortLambdasOnASingleLine: All | ||||
| AllowShortIfStatementsOnASingleLine: Never | ||||
| AllowShortLoopsOnASingleLine: false | ||||
| AlwaysBreakAfterDefinitionReturnType: None | ||||
| AlwaysBreakAfterReturnType: None | ||||
| AlwaysBreakBeforeMultilineStrings: false | ||||
| AlwaysBreakTemplateDeclarations: MultiLine | ||||
| BinPackArguments: true | ||||
| BinPackParameters: true | ||||
| BraceWrapping:    | ||||
|   AfterCaseLabel:  false | ||||
|   AfterClass:      false | ||||
|   AfterControlStatement: false | ||||
|   AfterEnum:       false | ||||
|   AfterFunction:   true | ||||
|   AfterNamespace:  false | ||||
|   AfterObjCDeclaration: false | ||||
|   AfterStruct:     false | ||||
|   AfterUnion:      false | ||||
|   AfterExternBlock: false | ||||
|   BeforeCatch:     false | ||||
|   BeforeElse:      false | ||||
|   IndentBraces:    false | ||||
|   SplitEmptyFunction: true | ||||
|   SplitEmptyRecord: true | ||||
|   SplitEmptyNamespace: true | ||||
| BreakBeforeBinaryOperators: All | ||||
| BreakBeforeBraces: WebKit | ||||
| BreakBeforeInheritanceComma: false | ||||
| BreakInheritanceList: BeforeColon | ||||
| BreakBeforeTernaryOperators: true | ||||
| BreakConstructorInitializersBeforeComma: false | ||||
| BreakConstructorInitializers: BeforeComma | ||||
| BreakAfterJavaFieldAnnotations: false | ||||
| BreakStringLiterals: true | ||||
| ColumnLimit:     80 | ||||
| CommentPragmas:  '^ IWYU pragma:' | ||||
| CompactNamespaces: false | ||||
| ConstructorInitializerAllOnOneLineOrOnePerLine: false | ||||
| ConstructorInitializerIndentWidth: 4 | ||||
| ContinuationIndentWidth: 4 | ||||
| Cpp11BracedListStyle: false | ||||
| DerivePointerAlignment: false | ||||
| DisableFormat:   false | ||||
| ExperimentalAutoDetectBinPacking: false | ||||
| FixNamespaceComments: false | ||||
| ForEachMacros:    | ||||
|   - foreach | ||||
|   - Q_FOREACH | ||||
|   - BOOST_FOREACH | ||||
| IncludeBlocks:   Preserve | ||||
| IncludeCategories:  | ||||
|   - Regex:           '^"(llvm|llvm-c|clang|clang-c)/' | ||||
|     Priority:        2 | ||||
|   - Regex:           '^(<|"(gtest|gmock|isl|json)/)' | ||||
|     Priority:        3 | ||||
|   - Regex:           '.*' | ||||
|     Priority:        1 | ||||
| IncludeIsMainRegex: '(Test)?$' | ||||
| IndentCaseLabels: false | ||||
| IndentPPDirectives: None | ||||
| IndentWidth:     4 | ||||
| IndentWrappedFunctionNames: false | ||||
| JavaScriptQuotes: Leave | ||||
| JavaScriptWrapImports: true | ||||
| KeepEmptyLinesAtTheStartOfBlocks: true | ||||
| MacroBlockBegin: '' | ||||
| MacroBlockEnd:   '' | ||||
| MaxEmptyLinesToKeep: 1 | ||||
| NamespaceIndentation: Inner | ||||
| ObjCBinPackProtocolList: Auto | ||||
| ObjCBlockIndentWidth: 4 | ||||
| ObjCSpaceAfterProperty: true | ||||
| ObjCSpaceBeforeProtocolList: true | ||||
| PenaltyBreakAssignment: 2 | ||||
| PenaltyBreakBeforeFirstCallParameter: 19 | ||||
| PenaltyBreakComment: 300 | ||||
| PenaltyBreakFirstLessLess: 120 | ||||
| PenaltyBreakString: 1000 | ||||
| PenaltyBreakTemplateDeclaration: 10 | ||||
| PenaltyExcessCharacter: 1000000 | ||||
| PenaltyReturnTypeOnItsOwnLine: 60 | ||||
| PointerAlignment: Right | ||||
| ReflowComments:  true | ||||
| SortIncludes:    true | ||||
| SortUsingDeclarations: true | ||||
| SpaceAfterCStyleCast: false | ||||
| SpaceAfterLogicalNot: false | ||||
| SpaceAfterTemplateKeyword: true | ||||
| SpaceBeforeAssignmentOperators: true | ||||
| SpaceBeforeCpp11BracedList: true | ||||
| SpaceBeforeCtorInitializerColon: true | ||||
| SpaceBeforeInheritanceColon: true | ||||
| SpaceBeforeParens: ControlStatements | ||||
| SpaceBeforeRangeBasedForLoopColon: true | ||||
| SpaceInEmptyParentheses: false | ||||
| SpacesBeforeTrailingComments: 1 | ||||
| SpacesInAngles:  false | ||||
| SpacesInContainerLiterals: true | ||||
| SpacesInCStyleCastParentheses: false | ||||
| SpacesInParentheses: false | ||||
| SpacesInSquareBrackets: false | ||||
| Standard:        Cpp11 | ||||
| StatementMacros:  | ||||
|   - Q_UNUSED | ||||
|   - QT_REQUIRE_VERSION | ||||
| TabWidth:        4 | ||||
| UseTab:          Always | ||||
| ... | ||||
							
								
								
									
										35
									
								
								third_party/bzip2/bzip2.mk
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										35
									
								
								third_party/bzip2/bzip2.mk
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,35 @@ | |||
| #-*-mode:makefile-gmake;indent-tabs-mode:t;tab-width:8;coding:utf-8-*-┐
 | ||||
| #───vi: set et ft=make ts=8 tw=8 fenc=utf-8 :vi───────────────────────┘
 | ||||
| # Description:
 | ||||
| #   bzip2 is a compression format.
 | ||||
| 
 | ||||
| PKGS += THIRD_PARTY_BZIP2 | ||||
| 
 | ||||
| THIRD_PARTY_BZIP2_BINS =						\
 | ||||
| 	o/$(MODE)/third_party/bzip2/µbunzip2.com			\
 | ||||
| 	o/$(MODE)/third_party/bzip2/µbunzip2.com.dbg | ||||
| 
 | ||||
| THIRD_PARTY_BZIP2_OBJS =						\
 | ||||
| 	o/$(MODE)/third_party/bzip2/µbunzip2.o | ||||
| 
 | ||||
| THIRD_PARTY_BZIP2_DEPS := $(call uniq,					\
 | ||||
| 	$(LIBC_STR)							\
 | ||||
| 	$(LIBC_STDIO)) | ||||
| 
 | ||||
| $(THIRD_PARTY_BZIP2_OBJS):						\ | ||||
| 	DEFAULT_CPPFLAGS +=						\
 | ||||
| 		-DHAVE_CONFIG_H | ||||
| 
 | ||||
| o/$(MODE)/third_party/bzip2/µbunzip2.com.dbg:				\ | ||||
| 		$(THIRD_PARTY_BZIP2_DEPS)				\
 | ||||
| 		$(THIRD_PARTY_BZIP2_OBJS)				\
 | ||||
| 		$(CRT)							\
 | ||||
| 		$(APE) | ||||
| 	@$(APELINK) | ||||
| 
 | ||||
| $(THIRD_PARTY_BZIP2_OBJS):						\ | ||||
| 		$(BUILD_FILES)						\
 | ||||
| 		third_party/bzip2/bzip2.mk | ||||
| 
 | ||||
| .PHONY: o/$(MODE)/third_party/bzip2 | ||||
| o/$(MODE)/third_party/bzip2: $(THIRD_PARTY_BZIP2_BINS) | ||||
							
								
								
									
										569
									
								
								third_party/bzip2/µbunzip2.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										569
									
								
								third_party/bzip2/µbunzip2.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,569 @@ | |||
| /*	micro-bunzip, a small, simple bzip2 decompression implementation.
 | ||||
| 	Copyright 2003 by Rob Landley (rob@landley.net). | ||||
| 
 | ||||
| 	Based on bzip2 decompression code by Julian R Seward (jseward@acm.org), | ||||
| 	which also acknowledges contributions by Mike Burrows, David Wheeler, | ||||
| 	Peter Fenwick, Alistair Moffat, Radford Neal, Ian H. Witten, | ||||
| 	Robert Sedgewick, and Jon L. Bentley. | ||||
| 
 | ||||
| 	I hereby release this code under the GNU Library General Public License | ||||
| 	(LGPL) version 2, available at http://www.gnu.org/copyleft/lgpl.html
 | ||||
| */ | ||||
| 
 | ||||
| #include "libc/calls/calls.h" | ||||
| #include "libc/mem/mem.h" | ||||
| #include "libc/runtime/runtime.h" | ||||
| #include "libc/stdio/stdio.h" | ||||
| #include "libc/str/str.h" | ||||
| #include "libc/sysv/consts/fileno.h" | ||||
| 
 | ||||
| /* Constants for huffman coding */ | ||||
| #define MAX_GROUPS 6 | ||||
| #define GROUP_SIZE 50 /* 64 would have been more efficient */ | ||||
| #define MAX_HUFCODE_BITS 20 /* Longest huffman code allowed */ | ||||
| #define MAX_SYMBOLS 258 /* 256 literals + RUNA + RUNB */ | ||||
| #define SYMBOL_RUNA 0 | ||||
| #define SYMBOL_RUNB 1 | ||||
| 
 | ||||
| /* Status return values */ | ||||
| #define RETVAL_OK 0 | ||||
| #define RETVAL_LAST_BLOCK (-1) | ||||
| #define RETVAL_NOT_BZIP_DATA (-2) | ||||
| #define RETVAL_UNEXPECTED_INPUT_EOF (-3) | ||||
| #define RETVAL_UNEXPECTED_OUTPUT_EOF (-4) | ||||
| #define RETVAL_DATA_ERROR (-5) | ||||
| #define RETVAL_OUT_OF_MEMORY (-6) | ||||
| #define RETVAL_OBSOLETE_INPUT (-7) | ||||
| 
 | ||||
| /* Other housekeeping constants */ | ||||
| #define IOBUF_SIZE 4096 | ||||
| 
 | ||||
| char *bunzip_errors[] = { NULL, "Bad file checksum", "Not bzip data", | ||||
| 	"Unexpected input EOF", "Unexpected output EOF", "Data error", | ||||
| 	"Out of memory", "Obsolete (pre 0.9.5) bzip format not supported." }; | ||||
| 
 | ||||
| /* This is what we know about each huffman coding group */ | ||||
| struct group_data { | ||||
| 	int limit[MAX_HUFCODE_BITS], base[MAX_HUFCODE_BITS], permute[MAX_SYMBOLS]; | ||||
| 	char minLen, maxLen; | ||||
| }; | ||||
| 
 | ||||
| /* Structure holding all the housekeeping data, including IO buffers and
 | ||||
|    memory that persists between calls to bunzip */ | ||||
| typedef struct { | ||||
| 	/* For I/O error handling */ | ||||
| 	jmp_buf jmpbuf; | ||||
| 	/* Input stream, input buffer, input bit buffer */ | ||||
| 	int64_t in_fd, inbufCount, inbufPos; | ||||
| 	unsigned char *inbuf; | ||||
| 	unsigned int inbufBitCount, inbufBits; | ||||
| 	/* Output buffer */ | ||||
| 	char outbuf[IOBUF_SIZE]; | ||||
| 	int outbufPos; | ||||
| 	/* The CRC values stored in the block header and calculated from the data */ | ||||
| 	unsigned int crc32Table[256], headerCRC, dataCRC, totalCRC; | ||||
| 	/* Intermediate buffer and its size (in bytes) */ | ||||
| 	unsigned int *dbuf, dbufSize; | ||||
| 	/* State for interrupting output loop */ | ||||
| 	int writePos, writeRun, writeCount, writeCurrent; | ||||
| 
 | ||||
| 	/* These things are a bit too big to go on the stack */ | ||||
| 	unsigned char selectors[32768]; /* nSelectors=15 bits */ | ||||
| 	struct group_data groups[MAX_GROUPS]; /* huffman coding tables */ | ||||
| } bunzip_data; | ||||
| 
 | ||||
| /* Return the next nnn bits of input. All reads from the compressed
 | ||||
|    input are done through this function. All reads are big endian */ | ||||
| static unsigned int get_bits(bunzip_data *bd, char bits_wanted) | ||||
| { | ||||
| 	unsigned int bits = 0; | ||||
| 
 | ||||
| 	/* If we need to get more data from the byte buffer, do so. (Loop
 | ||||
| 	   getting one byte at a time to enforce endianness and avoid | ||||
| 	   unaligned access.) */ | ||||
| 	while (bd->inbufBitCount < bits_wanted) { | ||||
| 		/* If we need to read more data from file into byte buffer, do so */ | ||||
| 		if (bd->inbufPos == bd->inbufCount) { | ||||
| 			if (!(bd->inbufCount = read(bd->in_fd, bd->inbuf, IOBUF_SIZE))) | ||||
| 				longjmp(bd->jmpbuf, RETVAL_UNEXPECTED_INPUT_EOF); | ||||
| 			bd->inbufPos = 0; | ||||
| 		} | ||||
| 		/* Avoid 32-bit overflow (dump bit buffer to top of output) */ | ||||
| 		if (bd->inbufBitCount >= 24) { | ||||
| 			bits = bd->inbufBits & ((1u << bd->inbufBitCount) - 1); | ||||
| 			bits_wanted -= bd->inbufBitCount; | ||||
| 			bits <<= bits_wanted; | ||||
| 			bd->inbufBitCount = 0; | ||||
| 		} | ||||
| 		/* Grab next 8 bits of input from buffer. */ | ||||
| 		bd->inbufBits = (bd->inbufBits << 8) | bd->inbuf[bd->inbufPos++]; | ||||
| 		bd->inbufBitCount += 8; | ||||
| 	} | ||||
| 	/* Calculate result */ | ||||
| 	bd->inbufBitCount -= bits_wanted; | ||||
| 	bits |= (bd->inbufBits >> bd->inbufBitCount) & ((1u << bits_wanted) - 1); | ||||
| 
 | ||||
| 	return bits; | ||||
| } | ||||
| 
 | ||||
| /* Decompress a block of text to into intermediate buffer */ | ||||
| 
 | ||||
| static int read_bunzip_data(bunzip_data *bd) | ||||
| { | ||||
| 	struct group_data *hufGroup; | ||||
| 	int dbufCount, nextSym, dbufSize, origPtr, groupCount, *base, *limit, | ||||
| 		selector, i, j, k, t, runPos, symCount, symTotal, nSelectors, | ||||
| 		byteCount[256]; | ||||
| 	unsigned char uc, symToByte[256], mtfSymbol[256], *selectors; | ||||
| 	unsigned int *dbuf; | ||||
| 
 | ||||
| 	/* Read in header signature (borrowing mtfSymbol for temp space). */ | ||||
| 	for (i = 0; i < 6; i++) | ||||
| 		mtfSymbol[i] = get_bits(bd, 8); | ||||
| 	mtfSymbol[6] = 0; | ||||
| 	/* Read CRC (which is stored big endian). */ | ||||
| 	bd->headerCRC = get_bits(bd, 32); | ||||
| 	/* Is this the last block (with CRC for file)? */ | ||||
| 	if (!strcmp((char *)mtfSymbol, "\x17\x72\x45\x38\x50\x90")) | ||||
| 		return RETVAL_LAST_BLOCK; | ||||
| 	/* If it's not a valid data block, barf. */ | ||||
| 	if (strcmp((char *)mtfSymbol, "\x31\x41\x59\x26\x53\x59")) | ||||
| 		return RETVAL_NOT_BZIP_DATA; | ||||
| 
 | ||||
| 	dbuf = bd->dbuf; | ||||
| 	dbufSize = bd->dbufSize; | ||||
| 	selectors = bd->selectors; | ||||
| 	/* We can add support for blockRandomised if anybody complains.
 | ||||
| 	   There was some code for this in busybox 1.0.0-pre3, but nobody | ||||
| 	   ever noticed that it didn't actually work. */ | ||||
| 	if (get_bits(bd, 1)) | ||||
| 		return RETVAL_OBSOLETE_INPUT; | ||||
| 	if ((origPtr = get_bits(bd, 24)) > dbufSize) | ||||
| 		return RETVAL_DATA_ERROR; | ||||
| 	/* mapping table: if some byte values are never used (encoding
 | ||||
| 	   things like ascii text), the compression code removes the gaps to | ||||
| 	   have fewer symbols to deal with, and writes a sparse bitfield | ||||
| 	   indicating which values were present. We make a translation table | ||||
| 	   to convert the symbols back to the corresponding bytes. */ | ||||
| 	t = get_bits(bd, 16); | ||||
| 	memset(symToByte, 0, 256); | ||||
| 	symTotal = 0; | ||||
| 	for (i = 0; i < 16; i++) { | ||||
| 		if (t & (1u << (15 - i))) { | ||||
| 			k = get_bits(bd, 16); | ||||
| 			for (j = 0; j < 16; j++) | ||||
| 				if (k & (1u << (15 - j))) | ||||
| 					symToByte[symTotal++] = (16 * i) + j; | ||||
| 		} | ||||
| 	} | ||||
| 	/* How many different huffman coding groups does this block use? */ | ||||
| 	groupCount = get_bits(bd, 3); | ||||
| 	if (groupCount < 2 || groupCount > MAX_GROUPS) | ||||
| 		return RETVAL_DATA_ERROR; | ||||
| 	/* nSelectors: Every GROUP_SIZE many symbols we select a new huffman
 | ||||
| 	   coding group. Read in the group selector list, which is stored as | ||||
| 	   MTF encoded bit runs. */ | ||||
| 	if (!(nSelectors = get_bits(bd, 15))) | ||||
| 		return RETVAL_DATA_ERROR; | ||||
| 	for (i = 0; i < groupCount; i++) | ||||
| 		mtfSymbol[i] = i; | ||||
| 	for (i = 0; i < nSelectors; i++) { | ||||
| 		/* Get next value */ | ||||
| 		for (j = 0; get_bits(bd, 1); j++) | ||||
| 			if (j >= groupCount) | ||||
| 				return RETVAL_DATA_ERROR; | ||||
| 		/* Decode MTF to get the next selector */ | ||||
| 		uc = mtfSymbol[j]; | ||||
| 		memmove(mtfSymbol + 1, mtfSymbol, j); | ||||
| 		mtfSymbol[0] = selectors[i] = uc; | ||||
| 	} | ||||
| 	/* Read the huffman coding tables for each group, which code for symTotal
 | ||||
| 	   literal symbols, plus two run symbols (RUNA, RUNB) */ | ||||
| 	symCount = symTotal + 2; | ||||
| 	for (j = 0; j < groupCount; j++) { | ||||
| 		unsigned char length[MAX_SYMBOLS], temp[MAX_HUFCODE_BITS + 1]; | ||||
| 		int minLen, maxLen, pp; | ||||
| 		/* Read lengths */ | ||||
| 		t = get_bits(bd, 5); | ||||
| 		for (i = 0; i < symCount; i++) { | ||||
| 			for (;;) { | ||||
| 				if (t < 1 || t > MAX_HUFCODE_BITS) | ||||
| 					return RETVAL_DATA_ERROR; | ||||
| 				if (!get_bits(bd, 1)) | ||||
| 					break; | ||||
| 				if (!get_bits(bd, 1)) | ||||
| 					t++; | ||||
| 				else | ||||
| 					t--; | ||||
| 			} | ||||
| 			length[i] = t; | ||||
| 		} | ||||
| 		/* Find largest and smallest lengths in this group */ | ||||
| 		minLen = maxLen = length[0]; | ||||
| 		for (i = 1; i < symCount; i++) { | ||||
| 			if (length[i] > maxLen) | ||||
| 				maxLen = length[i]; | ||||
| 			else if (length[i] < minLen) | ||||
| 				minLen = length[i]; | ||||
| 		} | ||||
| 		/* Calculate permute[], base[], and limit[] tables from length[].
 | ||||
| 		 * | ||||
| 		 * permute[] is the lookup table for converting huffman coded symbols | ||||
| 		 * into decoded symbols.  base[] is the amount to subtract from the | ||||
| 		 * value of a huffman symbol of a given length when using permute[]. | ||||
| 		 * | ||||
| 		 * limit[] indicates the largest numerical value a symbol with a given | ||||
| 		 * number of bits can have.  It lets us know when to stop reading. | ||||
| 		 * | ||||
| 		 * To use these, keep reading bits until value<=limit[bitcount] or | ||||
| 		 * you've read over 20 bits (error).  Then the decoded symbol | ||||
| 		 * equals permute[hufcode_value-base[hufcode_bitcount]]. | ||||
| 		 */ | ||||
| 		hufGroup = bd->groups + j; | ||||
| 		hufGroup->minLen = minLen; | ||||
| 		hufGroup->maxLen = maxLen; | ||||
| 		/* Note that minLen can't be smaller than 1, so we adjust the
 | ||||
| 		   base and limit array pointers so we're not always wasting the | ||||
| 		   first entry. We do this again when using them (during symbol | ||||
| 		   decoding).*/ | ||||
| 		base = hufGroup->base - 1; | ||||
| 		limit = hufGroup->limit - 1; | ||||
| 		/* Calculate permute[] */ | ||||
| 		pp = 0; | ||||
| 		for (i = minLen; i <= maxLen; i++) | ||||
| 			for (t = 0; t < symCount; t++) | ||||
| 				if (length[t] == i) | ||||
| 					hufGroup->permute[pp++] = t; | ||||
| 		/* Count cumulative symbols coded for at each bit length */ | ||||
| 		for (i = minLen; i <= maxLen; i++) | ||||
| 			temp[i] = limit[i] = 0; | ||||
| 		for (i = 0; i < symCount; i++) | ||||
| 			temp[length[i]]++; | ||||
| 		/* Calculate limit[] (the largest symbol-coding value at each
 | ||||
| 		   bit length, which is (previous limit<<1)+symbols at this | ||||
| 		   level), and base[] (number of symbols to ignore at each bit | ||||
| 		   length, which is limit-cumulative count of symbols coded for | ||||
| 		   already). */ | ||||
| 		pp = t = 0; | ||||
| 		for (i = minLen; i < maxLen; i++) { | ||||
| 			pp += temp[i]; | ||||
| 			limit[i] = pp - 1; | ||||
| 			pp <<= 1; | ||||
| 			base[i + 1] = pp - (t += temp[i]); | ||||
| 		} | ||||
| 		limit[maxLen] = pp + temp[maxLen] - 1; | ||||
| 		base[minLen] = 0; | ||||
| 	} | ||||
| 	/* We've finished reading and digesting the block header.  Now read this
 | ||||
| 	   block's huffman coded symbols from the file and undo the huffman | ||||
| 	   coding and run length encoding, saving the result into | ||||
| 	   dbuf[dbufCount++]=uc */ | ||||
| 
 | ||||
| 	/* Initialize symbol occurrence counters and symbol mtf table */ | ||||
| 	memset(byteCount, 0, 256 * sizeof(int)); | ||||
| 	for (i = 0; i < 256; i++) | ||||
| 		mtfSymbol[i] = (unsigned char)i; | ||||
| 	/* Loop through compressed symbols */ | ||||
| 	runPos = dbufCount = symCount = selector = 0; | ||||
| 	for (;;) { | ||||
| 		/* Determine which huffman coding group to use. */ | ||||
| 		if (!(symCount--)) { | ||||
| 			symCount = GROUP_SIZE - 1; | ||||
| 			if (selector >= nSelectors) | ||||
| 				return RETVAL_DATA_ERROR; | ||||
| 			hufGroup = bd->groups + selectors[selector++]; | ||||
| 			base = hufGroup->base - 1; | ||||
| 			limit = hufGroup->limit - 1; | ||||
| 		} | ||||
| 		/* Read next huffman-coded symbol */ | ||||
| 		i = hufGroup->minLen; | ||||
| 		j = get_bits(bd, i); | ||||
| 		for (;;) { | ||||
| 			if (i > hufGroup->maxLen) | ||||
| 				return RETVAL_DATA_ERROR; | ||||
| 			if (j <= limit[i]) | ||||
| 				break; | ||||
| 			i++; | ||||
| 
 | ||||
| 			j = (j << 1) | get_bits(bd, 1); | ||||
| 		} | ||||
| 		/* Huffman decode nextSym (with bounds checking) */ | ||||
| 		j -= base[i]; | ||||
| 		if (j < 0 || j >= MAX_SYMBOLS) | ||||
| 			return RETVAL_DATA_ERROR; | ||||
| 		nextSym = hufGroup->permute[j]; | ||||
| 		/* If this is a repeated run, loop collecting data */ | ||||
| 		if (nextSym == SYMBOL_RUNA || nextSym == SYMBOL_RUNB) { | ||||
| 			/* If this is the start of a new run, zero out counter */ | ||||
| 			if (!runPos) { | ||||
| 				runPos = 1; | ||||
| 				t = 0; | ||||
| 			} | ||||
| 			/* Neat trick that saves 1 symbol: instead of or-ing 0 or 1
 | ||||
| 			   at each bit position, add 1 or 2 instead. For example, | ||||
| 			   1011 is 1<<0 + 1<<1 + 2<<2. 1010 is 2<<0 + 2<<1 + 1<<2. | ||||
| 			   You can make any bit pattern that way using 1 less symbol | ||||
| 			   than the basic or 0/1 method (except all bits 0, which | ||||
| 			   would use no symbols, but a run of length 0 doesn't mean | ||||
| 			   anything in this context). Thus space is saved. */ | ||||
| 			if (nextSym == SYMBOL_RUNA) | ||||
| 				t += runPos; | ||||
| 			else | ||||
| 				t += 2 * runPos; | ||||
| 			runPos <<= 1; | ||||
| 			continue; | ||||
| 		} | ||||
| 		/* When we hit the first non-run symbol after a run, we now know
 | ||||
| 		   how many times to repeat the last literal, so append that | ||||
| 		   many copies to our buffer of decoded symbols (dbuf) now. (The | ||||
| 		   last literal used is the one at the head of the mtfSymbol | ||||
| 		   array.) */ | ||||
| 		if (runPos) { | ||||
| 			runPos = 0; | ||||
| 			if (dbufCount + t >= dbufSize) | ||||
| 				return RETVAL_DATA_ERROR; | ||||
| 
 | ||||
| 			uc = symToByte[mtfSymbol[0]]; | ||||
| 			byteCount[uc] += t; | ||||
| 			while (t--) | ||||
| 				dbuf[dbufCount++] = uc; | ||||
| 		} | ||||
| 		/* Is this the terminating symbol? */ | ||||
| 		if (nextSym > symTotal) | ||||
| 			break; | ||||
| 		/* At this point, the symbol we just decoded indicates a new
 | ||||
| 		   literal character. Subtract one to get the position in the | ||||
| 		   MTF array at which this literal is currently to be found. | ||||
| 		   (Note that the result can't be -1 or 0, because 0 and 1 are | ||||
| 		   RUNA and RUNB. Another instance of the first symbol in the | ||||
| 		   mtf array, position 0, would have been handled as part of a | ||||
| 		   run.) */ | ||||
| 		if (dbufCount >= dbufSize) | ||||
| 			return RETVAL_DATA_ERROR; | ||||
| 		i = nextSym - 1; | ||||
| 		uc = mtfSymbol[i]; | ||||
| 		memmove(mtfSymbol + 1, mtfSymbol, i); | ||||
| 		mtfSymbol[0] = uc; | ||||
| 		uc = symToByte[uc]; | ||||
| 		/* We have our literal byte.  Save it into dbuf. */ | ||||
| 		byteCount[uc]++; | ||||
| 		dbuf[dbufCount++] = (unsigned int)uc; | ||||
| 	} | ||||
| 	/* At this point, we've finished reading huffman-coded symbols and
 | ||||
| 	   compressed runs from the input stream. There are dbufCount many | ||||
| 	   of them in dbuf[]. Now undo the Burrows-Wheeler transform on | ||||
| 	   dbuf. See http://dogma.net/markn/articles/bwt/bwt.htm */
 | ||||
| 
 | ||||
| 	/* Now we know what dbufCount is, do a better sanity check on origPtr.  */ | ||||
| 	if (origPtr < 0 || origPtr >= dbufCount) | ||||
| 		return RETVAL_DATA_ERROR; | ||||
| 	/* Turn byteCount into cumulative occurrence counts of 0 to n-1. */ | ||||
| 	j = 0; | ||||
| 	for (i = 0; i < 256; i++) { | ||||
| 		k = j + byteCount[i]; | ||||
| 		byteCount[i] = j; | ||||
| 		j = k; | ||||
| 	} | ||||
| 	/* Figure out what order dbuf would be in if we sorted it. */ | ||||
| 	for (i = 0; i < dbufCount; i++) { | ||||
| 		uc = (unsigned char)(dbuf[i] & 0xff); | ||||
| 		dbuf[byteCount[uc]] |= (i << 8); | ||||
| 		byteCount[uc]++; | ||||
| 	} | ||||
| 	/* blockRandomised support would go here. */ | ||||
| 
 | ||||
| 	/* Using i as position, j as previous character, t as current character,
 | ||||
| 	   and uc as run count */ | ||||
| 	bd->dataCRC = 0xffffffffL; | ||||
| 	/* Decode first byte by hand to initialize "previous" byte. Note
 | ||||
| 	   that it doesn't get output, and if the first three characters are | ||||
| 	   identical it doesn't qualify as a run (hence uc=255, which will | ||||
| 	   either wrap to 1 or get reset). */ | ||||
| 	if (dbufCount) { | ||||
| 		bd->writePos = dbuf[origPtr]; | ||||
| 		bd->writeCurrent = (unsigned char)(bd->writePos & 0xff); | ||||
| 		bd->writePos >>= 8; | ||||
| 		bd->writeRun = -1; | ||||
| 	} | ||||
| 	bd->writeCount = dbufCount; | ||||
| 
 | ||||
| 	return RETVAL_OK; | ||||
| } | ||||
| 
 | ||||
| /* Flush output buffer to disk */ | ||||
| static void flush_bunzip_outbuf(bunzip_data *bd, int64_t out_fd) | ||||
| { | ||||
| 	if (bd->outbufPos) { | ||||
| 		if (write(out_fd, bd->outbuf, bd->outbufPos) != bd->outbufPos) | ||||
| 			longjmp(bd->jmpbuf, RETVAL_UNEXPECTED_OUTPUT_EOF); | ||||
| 		bd->outbufPos = 0; | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| /* Undo burrows-wheeler transform on intermediate buffer to produce output.
 | ||||
|    If !len, write up to len bytes of data to buf.  Otherwise write to out_fd. | ||||
|    Returns len ? bytes written : RETVAL_OK.  Notice all errors negative #'s. */ | ||||
| static int write_bunzip_data( | ||||
| 	bunzip_data *bd, int64_t out_fd, char *outbuf, int len) | ||||
| { | ||||
| 	unsigned int *dbuf = bd->dbuf; | ||||
| 	int count, pos, current, run, copies, outbyte, previous, gotcount = 0; | ||||
| 
 | ||||
| 	for (;;) { | ||||
| 		/* If last read was short due to end of file, return last block now */ | ||||
| 		if (bd->writeCount < 0) | ||||
| 			return bd->writeCount; | ||||
| 		/* If we need to refill dbuf, do it. */ | ||||
| 		if (!bd->writeCount) { | ||||
| 			int i = read_bunzip_data(bd); | ||||
| 			if (i) { | ||||
| 				if (i == RETVAL_LAST_BLOCK) { | ||||
| 					bd->writeCount = i; | ||||
| 					return gotcount; | ||||
| 				} else | ||||
| 					return i; | ||||
| 			} | ||||
| 		} | ||||
| 		/* Loop generating output */ | ||||
| 		count = bd->writeCount; | ||||
| 		pos = bd->writePos; | ||||
| 		current = bd->writeCurrent; | ||||
| 		run = bd->writeRun; | ||||
| 		while (count) { | ||||
| 			/* If somebody (like busybox tar) wants a certain number of
 | ||||
| 			   bytes of data from memory instead of written to a file, | ||||
| 			   humor them */ | ||||
| 			if (len && bd->outbufPos >= len) | ||||
| 				goto dataus_interruptus; | ||||
| 			count--; | ||||
| 			/* Follow sequence vector to undo Burrows-Wheeler transform */ | ||||
| 			previous = current; | ||||
| 			pos = dbuf[pos]; | ||||
| 			current = pos & 0xff; | ||||
| 			pos >>= 8; | ||||
| 			/* Whenever we see 3 consecutive copies of the same byte,
 | ||||
| 			   the 4th is a repeat count */ | ||||
| 			if (run++ == 3) { | ||||
| 				copies = current; | ||||
| 				outbyte = previous; | ||||
| 				current = -1; | ||||
| 			} else { | ||||
| 				copies = 1; | ||||
| 				outbyte = current; | ||||
| 			} | ||||
| 			/* Output bytes to buffer, flushing to file if necessary */ | ||||
| 			while (copies--) { | ||||
| 				if (bd->outbufPos == IOBUF_SIZE) | ||||
| 					flush_bunzip_outbuf(bd, out_fd); | ||||
| 				bd->outbuf[bd->outbufPos++] = outbyte; | ||||
| 				bd->dataCRC = (bd->dataCRC << 8) | ||||
| 					^ bd->crc32Table[(bd->dataCRC >> 24) ^ outbyte]; | ||||
| 			} | ||||
| 			if (current != previous) | ||||
| 				run = 0; | ||||
| 		} | ||||
| 		/* Decompression of this block completed successfully */ | ||||
| 		bd->dataCRC = ~(bd->dataCRC); | ||||
| 		bd->totalCRC | ||||
| 			= ((bd->totalCRC << 1) | (bd->totalCRC >> 31)) ^ bd->dataCRC; | ||||
| 		/* If this block had a CRC error, force file level CRC error. */ | ||||
| 		if (bd->dataCRC != bd->headerCRC) { | ||||
| 			bd->totalCRC = bd->headerCRC + 1; | ||||
| 			return RETVAL_LAST_BLOCK; | ||||
| 		} | ||||
| 	dataus_interruptus: | ||||
| 		bd->writeCount = count; | ||||
| 		if (len) { | ||||
| 			gotcount += bd->outbufPos; | ||||
| 			memcpy(outbuf, bd->outbuf, len); | ||||
| 			/* If we got enough data, checkpoint loop state and return */ | ||||
| 			if ((len -= bd->outbufPos) < 1) { | ||||
| 				bd->outbufPos -= len; | ||||
| 				if (bd->outbufPos) | ||||
| 					memmove(bd->outbuf, bd->outbuf + len, bd->outbufPos); | ||||
| 				bd->writePos = pos; | ||||
| 				bd->writeCurrent = current; | ||||
| 				bd->writeRun = run; | ||||
| 				return gotcount; | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| /* Allocate the structure, read file header. If !len, src_fd contains
 | ||||
|    filehandle to read from. Else inbuf contains data. */ | ||||
| static int start_bunzip(bunzip_data **bdp, int64_t src_fd, char *inbuf, int len) | ||||
| { | ||||
| 	bunzip_data *bd; | ||||
| 	unsigned int i, j, c; | ||||
| 
 | ||||
| 	/* Figure out how much data to allocate */ | ||||
| 	i = sizeof(bunzip_data); | ||||
| 	if (!len) | ||||
| 		i += IOBUF_SIZE; | ||||
| 	/* Allocate bunzip_data.  Most fields initialize to zero. */ | ||||
| 	if (!(bd = *bdp = malloc(i))) | ||||
| 		return RETVAL_OUT_OF_MEMORY; | ||||
| 	memset(bd, 0, sizeof(bunzip_data)); | ||||
| 	if (len) { | ||||
| 		bd->inbuf = (unsigned char *)inbuf; | ||||
| 		bd->inbufCount = len; | ||||
| 		bd->in_fd = -1; | ||||
| 	} else { | ||||
| 		bd->inbuf = (unsigned char *)(bd + 1); | ||||
| 		bd->in_fd = src_fd; | ||||
| 	} | ||||
| 	/* Init the CRC32 table (big endian) */ | ||||
| 	for (i = 0; i < 256; i++) { | ||||
| 		c = i << 24; | ||||
| 		for (j = 8; j; j--) | ||||
| 			c = c & 0x80000000 ? (c << 1) ^ 0x04c11db7 : (c << 1); | ||||
| 		bd->crc32Table[i] = c; | ||||
| 	} | ||||
| 	/* Setup for I/O error handling via longjmp */ | ||||
| 	i = setjmp(bd->jmpbuf); | ||||
| 	if (i) | ||||
| 		return i; | ||||
| 	/* Ensure that file starts with "BZh" */ | ||||
| 	for (i = 0; i < 3; i++) | ||||
| 		if (get_bits(bd, 8) != "BZh"[i]) | ||||
| 			return RETVAL_NOT_BZIP_DATA; | ||||
| 	/* Next byte ascii '1'-'9', indicates block size in units of 100k of
 | ||||
| 	   uncompressed data.  Allocate intermediate buffer for block. */ | ||||
| 	i = get_bits(bd, 8); | ||||
| 	if (i < '1' || i > '9') | ||||
| 		return RETVAL_NOT_BZIP_DATA; | ||||
| 	bd->dbufSize = 100000 * (i - '0'); | ||||
| 	if (!(bd->dbuf = malloc(bd->dbufSize * sizeof(int)))) | ||||
| 		return RETVAL_OUT_OF_MEMORY; | ||||
| 	return RETVAL_OK; | ||||
| } | ||||
| 
 | ||||
| /* Example usage: decompress src_fd to dst_fd.  (Stops at end of bzip data,
 | ||||
|    not end of file.) */ | ||||
| static char *uncompressStream(int64_t src_fd, int64_t dst_fd) | ||||
| { | ||||
| 	bunzip_data *bd; | ||||
| 	int i; | ||||
| 	if (!(i = start_bunzip(&bd, src_fd, 0, 0))) { | ||||
| 		i = write_bunzip_data(bd, dst_fd, 0, 0); | ||||
| 		if (i == RETVAL_LAST_BLOCK && bd->headerCRC == bd->totalCRC) | ||||
| 			i = RETVAL_OK; | ||||
| 	} | ||||
| 	flush_bunzip_outbuf(bd, dst_fd); | ||||
| 	if (bd->dbuf) | ||||
| 		free(bd->dbuf); | ||||
| 	free(bd); | ||||
| 	return bunzip_errors[-i]; | ||||
| } | ||||
| 
 | ||||
| int main(int argc, char *argv[]) | ||||
| { | ||||
| 	char *err; | ||||
| 	if (!(err = uncompressStream(STDIN_FILENO, STDOUT_FILENO))) { | ||||
| 		return 0; | ||||
| 	} else { | ||||
| 		fprintf(stderr, "\n%s\n", err); | ||||
| 		return 1; | ||||
| 	} | ||||
| } | ||||
							
								
								
									
										32
									
								
								third_party/compiler_rt/absvdi2.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										32
									
								
								third_party/compiler_rt/absvdi2.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,32 @@ | |||
| /* clang-format off */ | ||||
| /*===-- absvdi2.c - Implement __absvdi2 -----------------------------------===
 | ||||
|  * | ||||
|  *                     The LLVM Compiler Infrastructure | ||||
|  * | ||||
|  * This file is dual licensed under the MIT and the University of Illinois Open | ||||
|  * Source Licenses. See LICENSE.TXT for details. | ||||
|  * | ||||
|  *===----------------------------------------------------------------------=== | ||||
|  * | ||||
|  * This file implements __absvdi2 for the compiler_rt library. | ||||
|  * | ||||
|  *===----------------------------------------------------------------------=== | ||||
|  */ | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
|   | ||||
| #include "third_party/compiler_rt/int_lib.h" | ||||
| 
 | ||||
| /* Returns: absolute value */ | ||||
| 
 | ||||
| /* Effects: aborts if abs(x) < 0 */ | ||||
| 
 | ||||
| COMPILER_RT_ABI di_int | ||||
| __absvdi2(di_int a) | ||||
| { | ||||
|     const int N = (int)(sizeof(di_int) * CHAR_BIT); | ||||
|     if (a == ((di_int)1 << (N-1))) | ||||
|         compilerrt_abort(); | ||||
|     const di_int t = a >> (N - 1); | ||||
|     return (a ^ t) - t; | ||||
| } | ||||
							
								
								
									
										32
									
								
								third_party/compiler_rt/absvsi2.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										32
									
								
								third_party/compiler_rt/absvsi2.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,32 @@ | |||
| /* clang-format off */ | ||||
| /* ===-- absvsi2.c - Implement __absvsi2 -----------------------------------===
 | ||||
|  * | ||||
|  *                     The LLVM Compiler Infrastructure | ||||
|  * | ||||
|  * This file is dual licensed under the MIT and the University of Illinois Open | ||||
|  * Source Licenses. See LICENSE.TXT for details. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  * | ||||
|  * This file implements __absvsi2 for the compiler_rt library. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  */ | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
|   | ||||
| #include "third_party/compiler_rt/int_lib.h" | ||||
| 
 | ||||
| /* Returns: absolute value */ | ||||
| 
 | ||||
| /* Effects: aborts if abs(x) < 0 */ | ||||
| 
 | ||||
| COMPILER_RT_ABI si_int | ||||
| __absvsi2(si_int a) | ||||
| { | ||||
|     const int N = (int)(sizeof(si_int) * CHAR_BIT); | ||||
|     if (a == (1 << (N-1))) | ||||
|         compilerrt_abort(); | ||||
|     const si_int t = a >> (N - 1); | ||||
|     return (a ^ t) - t; | ||||
| } | ||||
							
								
								
									
										37
									
								
								third_party/compiler_rt/absvti2.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										37
									
								
								third_party/compiler_rt/absvti2.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,37 @@ | |||
| /* clang-format off */ | ||||
| /* ===-- absvti2.c - Implement __absvdi2 -----------------------------------===
 | ||||
|  * | ||||
|  *                     The LLVM Compiler Infrastructure | ||||
|  * | ||||
|  * This file is dual licensed under the MIT and the University of Illinois Open | ||||
|  * Source Licenses. See LICENSE.TXT for details. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  * | ||||
|  * This file implements __absvti2 for the compiler_rt library. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  */ | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #include "third_party/compiler_rt/int_lib.h" | ||||
| 
 | ||||
| #ifdef CRT_HAS_128BIT | ||||
| 
 | ||||
| /* Returns: absolute value */ | ||||
| 
 | ||||
| /* Effects: aborts if abs(x) < 0 */ | ||||
| 
 | ||||
| COMPILER_RT_ABI ti_int | ||||
| __absvti2(ti_int a) | ||||
| { | ||||
|     const int N = (int)(sizeof(ti_int) * CHAR_BIT); | ||||
|     if (a == ((ti_int)1 << (N-1))) | ||||
|         compilerrt_abort(); | ||||
|     const ti_int s = a >> (N - 1); | ||||
|     return (a ^ s) - s; | ||||
| } | ||||
| 
 | ||||
| #endif /* CRT_HAS_128BIT */ | ||||
| 
 | ||||
							
								
								
									
										33
									
								
								third_party/compiler_rt/adddf3.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										33
									
								
								third_party/compiler_rt/adddf3.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,33 @@ | |||
| /* clang-format off */ | ||||
| //===-- lib/adddf3.c - Double-precision addition ------------------*- C -*-===//
 | ||||
| //
 | ||||
| //                     The LLVM Compiler Infrastructure
 | ||||
| //
 | ||||
| // This file is dual licensed under the MIT and the University of Illinois Open
 | ||||
| // Source Licenses. See LICENSE.TXT for details.
 | ||||
| //
 | ||||
| //===----------------------------------------------------------------------===//
 | ||||
| //
 | ||||
| // This file implements double-precision soft-float addition with the IEEE-754
 | ||||
| // default rounding (to nearest, ties to even).
 | ||||
| //
 | ||||
| //===----------------------------------------------------------------------===//
 | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #define DOUBLE_PRECISION | ||||
| #include "third_party/compiler_rt/fp_add_impl.inc" | ||||
| 
 | ||||
| COMPILER_RT_ABI double __adddf3(double a, double b){ | ||||
|     return __addXf3__(a, b); | ||||
| } | ||||
| 
 | ||||
| #if defined(__ARM_EABI__) | ||||
| #if defined(COMPILER_RT_ARMHF_TARGET) | ||||
| AEABI_RTABI double __aeabi_dadd(double a, double b) { | ||||
|   return __adddf3(a, b); | ||||
| } | ||||
| #else | ||||
| AEABI_RTABI double __aeabi_dadd(double a, double b) COMPILER_RT_ALIAS(__adddf3); | ||||
| #endif | ||||
| #endif | ||||
							
								
								
									
										33
									
								
								third_party/compiler_rt/addsf3.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										33
									
								
								third_party/compiler_rt/addsf3.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,33 @@ | |||
| /* clang-format off */ | ||||
| //===-- lib/addsf3.c - Single-precision addition ------------------*- C -*-===//
 | ||||
| //
 | ||||
| //                     The LLVM Compiler Infrastructure
 | ||||
| //
 | ||||
| // This file is dual licensed under the MIT and the University of Illinois Open
 | ||||
| // Source Licenses. See LICENSE.TXT for details.
 | ||||
| //
 | ||||
| //===----------------------------------------------------------------------===//
 | ||||
| //
 | ||||
| // This file implements single-precision soft-float addition with the IEEE-754
 | ||||
| // default rounding (to nearest, ties to even).
 | ||||
| //
 | ||||
| //===----------------------------------------------------------------------===//
 | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #define SINGLE_PRECISION | ||||
| #include "third_party/compiler_rt/fp_add_impl.inc" | ||||
| 
 | ||||
| COMPILER_RT_ABI float __addsf3(float a, float b) { | ||||
|     return __addXf3__(a, b); | ||||
| } | ||||
| 
 | ||||
| #if defined(__ARM_EABI__) | ||||
| #if defined(COMPILER_RT_ARMHF_TARGET) | ||||
| AEABI_RTABI float __aeabi_fadd(float a, float b) { | ||||
|   return __addsf3(a, b); | ||||
| } | ||||
| #else | ||||
| AEABI_RTABI float __aeabi_fadd(float a, float b) COMPILER_RT_ALIAS(__addsf3); | ||||
| #endif | ||||
| #endif | ||||
							
								
								
									
										28
									
								
								third_party/compiler_rt/addtf3.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										28
									
								
								third_party/compiler_rt/addtf3.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,28 @@ | |||
| /* clang-format off */ | ||||
| //===-- lib/addtf3.c - Quad-precision addition --------------------*- C -*-===//
 | ||||
| //
 | ||||
| //                     The LLVM Compiler Infrastructure
 | ||||
| //
 | ||||
| // This file is dual licensed under the MIT and the University of Illinois Open
 | ||||
| // Source Licenses. See LICENSE.TXT for details.
 | ||||
| //
 | ||||
| //===----------------------------------------------------------------------===//
 | ||||
| //
 | ||||
| // This file implements quad-precision soft-float addition with the IEEE-754
 | ||||
| // default rounding (to nearest, ties to even).
 | ||||
| //
 | ||||
| //===----------------------------------------------------------------------===//
 | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #define QUAD_PRECISION | ||||
| #include "third_party/compiler_rt/fp_lib.inc" | ||||
| 
 | ||||
| #if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) | ||||
| #include "third_party/compiler_rt/fp_add_impl.inc" | ||||
| 
 | ||||
| COMPILER_RT_ABI long double __addtf3(long double a, long double b){ | ||||
|     return __addXf3__(a, b); | ||||
| } | ||||
| 
 | ||||
| #endif | ||||
							
								
								
									
										48
									
								
								third_party/compiler_rt/ashldi3.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										48
									
								
								third_party/compiler_rt/ashldi3.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,48 @@ | |||
| /* clang-format off */ | ||||
| /* ====-- ashldi3.c - Implement __ashldi3 -----------------------------------===
 | ||||
|  * | ||||
|  *                     The LLVM Compiler Infrastructure | ||||
|  * | ||||
|  * This file is dual licensed under the MIT and the University of Illinois Open | ||||
|  * Source Licenses. See LICENSE.TXT for details. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  * | ||||
|  * This file implements __ashldi3 for the compiler_rt library. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  */ | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #include "third_party/compiler_rt/int_lib.h" | ||||
| 
 | ||||
| /* Returns: a << b */ | ||||
| 
 | ||||
| /* Precondition:  0 <= b < bits_in_dword */ | ||||
| 
 | ||||
| COMPILER_RT_ABI di_int | ||||
| __ashldi3(di_int a, si_int b) | ||||
| { | ||||
|     const int bits_in_word = (int)(sizeof(si_int) * CHAR_BIT); | ||||
|     dwords input; | ||||
|     dwords result; | ||||
|     input.all = a; | ||||
|     if (b & bits_in_word)  /* bits_in_word <= b < bits_in_dword */ | ||||
|     { | ||||
|         result.s.low = 0; | ||||
|         result.s.high = input.s.low << (b - bits_in_word); | ||||
|     } | ||||
|     else  /* 0 <= b < bits_in_word */ | ||||
|     { | ||||
|         if (b == 0) | ||||
|             return a; | ||||
|         result.s.low  = input.s.low << b; | ||||
|         result.s.high = (input.s.high << b) | (input.s.low >> (bits_in_word - b)); | ||||
|     } | ||||
|     return result.all; | ||||
| } | ||||
| 
 | ||||
| #if defined(__ARM_EABI__) | ||||
| AEABI_RTABI di_int __aeabi_llsl(di_int a, si_int b) COMPILER_RT_ALIAS(__ashldi3); | ||||
| #endif | ||||
							
								
								
									
										48
									
								
								third_party/compiler_rt/ashlti3.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										48
									
								
								third_party/compiler_rt/ashlti3.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,48 @@ | |||
| /* clang-format off */ | ||||
| /* ===-- ashlti3.c - Implement __ashlti3 -----------------------------------===
 | ||||
|  * | ||||
|  *                     The LLVM Compiler Infrastructure | ||||
|  * | ||||
|  * This file is dual licensed under the MIT and the University of Illinois Open | ||||
|  * Source Licenses. See LICENSE.TXT for details. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  * | ||||
|  * This file implements __ashlti3 for the compiler_rt library. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  */ | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #include "third_party/compiler_rt/int_lib.h" | ||||
| 
 | ||||
| #ifdef CRT_HAS_128BIT | ||||
| 
 | ||||
| /* Returns: a << b */ | ||||
| 
 | ||||
| /* Precondition:  0 <= b < bits_in_tword */ | ||||
| 
 | ||||
| COMPILER_RT_ABI ti_int | ||||
| __ashlti3(ti_int a, si_int b) | ||||
| { | ||||
|     const int bits_in_dword = (int)(sizeof(di_int) * CHAR_BIT); | ||||
|     twords input; | ||||
|     twords result; | ||||
|     input.all = a; | ||||
|     if (b & bits_in_dword)  /* bits_in_dword <= b < bits_in_tword */ | ||||
|     { | ||||
|         result.s.low = 0; | ||||
|         result.s.high = input.s.low << (b - bits_in_dword); | ||||
|     } | ||||
|     else  /* 0 <= b < bits_in_dword */ | ||||
|     { | ||||
|         if (b == 0) | ||||
|             return a; | ||||
|         result.s.low  = input.s.low << b; | ||||
|         result.s.high = (input.s.high << b) | (input.s.low >> (bits_in_dword - b)); | ||||
|     } | ||||
|     return result.all; | ||||
| } | ||||
| 
 | ||||
| #endif /* CRT_HAS_128BIT */ | ||||
							
								
								
									
										49
									
								
								third_party/compiler_rt/ashrdi3.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										49
									
								
								third_party/compiler_rt/ashrdi3.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,49 @@ | |||
| /* clang-format off */ | ||||
| /*===-- ashrdi3.c - Implement __ashrdi3 -----------------------------------===
 | ||||
|  * | ||||
|  *                     The LLVM Compiler Infrastructure | ||||
|  * | ||||
|  * This file is dual licensed under the MIT and the University of Illinois Open | ||||
|  * Source Licenses. See LICENSE.TXT for details. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  * | ||||
|  * This file implements __ashrdi3 for the compiler_rt library. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  */ | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #include "third_party/compiler_rt/int_lib.h" | ||||
| 
 | ||||
| /* Returns: arithmetic a >> b */ | ||||
| 
 | ||||
| /* Precondition:  0 <= b < bits_in_dword */ | ||||
| 
 | ||||
| COMPILER_RT_ABI di_int | ||||
| __ashrdi3(di_int a, si_int b) | ||||
| { | ||||
|     const int bits_in_word = (int)(sizeof(si_int) * CHAR_BIT); | ||||
|     dwords input; | ||||
|     dwords result; | ||||
|     input.all = a; | ||||
|     if (b & bits_in_word)  /* bits_in_word <= b < bits_in_dword */ | ||||
|     { | ||||
|         /* result.s.high = input.s.high < 0 ? -1 : 0 */ | ||||
|         result.s.high = input.s.high >> (bits_in_word - 1); | ||||
|         result.s.low = input.s.high >> (b - bits_in_word); | ||||
|     } | ||||
|     else  /* 0 <= b < bits_in_word */ | ||||
|     { | ||||
|         if (b == 0) | ||||
|             return a; | ||||
|         result.s.high  = input.s.high >> b; | ||||
|         result.s.low = (input.s.high << (bits_in_word - b)) | (input.s.low >> b); | ||||
|     } | ||||
|     return result.all; | ||||
| } | ||||
| 
 | ||||
| #if defined(__ARM_EABI__) | ||||
| AEABI_RTABI di_int __aeabi_lasr(di_int a, si_int b) COMPILER_RT_ALIAS(__ashrdi3); | ||||
| #endif | ||||
							
								
								
									
										49
									
								
								third_party/compiler_rt/ashrti3.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										49
									
								
								third_party/compiler_rt/ashrti3.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,49 @@ | |||
| /* clang-format off */ | ||||
| /* ===-- ashrti3.c - Implement __ashrti3 -----------------------------------===
 | ||||
|  * | ||||
|  *                     The LLVM Compiler Infrastructure | ||||
|  * | ||||
|  * This file is dual licensed under the MIT and the University of Illinois Open | ||||
|  * Source Licenses. See LICENSE.TXT for details. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  * | ||||
|  * This file implements __ashrti3 for the compiler_rt library. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  */ | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #include "third_party/compiler_rt/int_lib.h" | ||||
| 
 | ||||
| #ifdef CRT_HAS_128BIT | ||||
| 
 | ||||
| /* Returns: arithmetic a >> b */ | ||||
| 
 | ||||
| /* Precondition:  0 <= b < bits_in_tword */ | ||||
| 
 | ||||
| COMPILER_RT_ABI ti_int | ||||
| __ashrti3(ti_int a, si_int b) | ||||
| { | ||||
|     const int bits_in_dword = (int)(sizeof(di_int) * CHAR_BIT); | ||||
|     twords input; | ||||
|     twords result; | ||||
|     input.all = a; | ||||
|     if (b & bits_in_dword)  /* bits_in_dword <= b < bits_in_tword */ | ||||
|     { | ||||
|         /* result.s.high = input.s.high < 0 ? -1 : 0 */ | ||||
|         result.s.high = input.s.high >> (bits_in_dword - 1); | ||||
|         result.s.low = input.s.high >> (b - bits_in_dword); | ||||
|     } | ||||
|     else  /* 0 <= b < bits_in_dword */ | ||||
|     { | ||||
|         if (b == 0) | ||||
|             return a; | ||||
|         result.s.high  = input.s.high >> b; | ||||
|         result.s.low = (input.s.high << (bits_in_dword - b)) | (input.s.low >> b); | ||||
|     } | ||||
|     return result.all; | ||||
| } | ||||
| 
 | ||||
| #endif /* CRT_HAS_128BIT */ | ||||
							
								
								
									
										205
									
								
								third_party/compiler_rt/assembly.h
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										205
									
								
								third_party/compiler_rt/assembly.h
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,205 @@ | |||
| /* clang-format off */ | ||||
| /* ===-- assembly.h - compiler-rt assembler support macros -----------------===
 | ||||
|  * | ||||
|  *                     The LLVM Compiler Infrastructure | ||||
|  * | ||||
|  * This file is dual licensed under the MIT and the University of Illinois Open | ||||
|  * Source Licenses. See LICENSE.TXT for details. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  * | ||||
|  * This file defines macros for use in compiler-rt assembler source. | ||||
|  * This file is not part of the interface of this library. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  */ | ||||
| 
 | ||||
| #ifndef COMPILERRT_ASSEMBLY_H | ||||
| #define COMPILERRT_ASSEMBLY_H | ||||
| 
 | ||||
| #if defined(__POWERPC__) || defined(__powerpc__) || defined(__ppc__) | ||||
| #define SEPARATOR @ | ||||
| #else | ||||
| #define SEPARATOR ; | ||||
| #endif | ||||
| 
 | ||||
| #if defined(__APPLE__) | ||||
| #define HIDDEN(name) .private_extern name | ||||
| #define LOCAL_LABEL(name) L_##name | ||||
| // tell linker it can break up file at label boundaries
 | ||||
| #define FILE_LEVEL_DIRECTIVE .subsections_via_symbols | ||||
| #define SYMBOL_IS_FUNC(name) | ||||
| #define CONST_SECTION .const | ||||
| 
 | ||||
| #define NO_EXEC_STACK_DIRECTIVE | ||||
| 
 | ||||
| #elif defined(__ELF__) | ||||
| 
 | ||||
| #define HIDDEN(name) .hidden name | ||||
| #define LOCAL_LABEL(name) .L_##name | ||||
| #define FILE_LEVEL_DIRECTIVE | ||||
| #if defined(__arm__) | ||||
| #define SYMBOL_IS_FUNC(name) .type name,%function | ||||
| #else | ||||
| #define SYMBOL_IS_FUNC(name) .type name,@function | ||||
| #endif | ||||
| #define CONST_SECTION .section .rodata | ||||
| 
 | ||||
| #if defined(__GNU__) || defined(__FreeBSD__) || defined(__Fuchsia__) || \ | ||||
|     defined(__linux__) | ||||
| #define NO_EXEC_STACK_DIRECTIVE .section .note.GNU-stack,"",%progbits | ||||
| #else | ||||
| #define NO_EXEC_STACK_DIRECTIVE | ||||
| #endif | ||||
| 
 | ||||
| #else // !__APPLE__ && !__ELF__
 | ||||
| 
 | ||||
| #define HIDDEN(name) | ||||
| #define LOCAL_LABEL(name) .L ## name | ||||
| #define FILE_LEVEL_DIRECTIVE | ||||
| #define SYMBOL_IS_FUNC(name)                                                   \ | ||||
|   .def name SEPARATOR                                                          \ | ||||
|     .scl 2 SEPARATOR                                                           \ | ||||
|     .type 32 SEPARATOR                                                         \ | ||||
|   .endef | ||||
| #define CONST_SECTION .section .rdata,"rd" | ||||
| 
 | ||||
| #define NO_EXEC_STACK_DIRECTIVE | ||||
| 
 | ||||
| #endif | ||||
| 
 | ||||
| #if defined(__arm__) | ||||
| 
 | ||||
| /*
 | ||||
|  * Determine actual [ARM][THUMB[1][2]] ISA using compiler predefined macros: | ||||
|  * - for '-mthumb -march=armv6' compiler defines '__thumb__' | ||||
|  * - for '-mthumb -march=armv7' compiler defines '__thumb__' and '__thumb2__' | ||||
|  */ | ||||
| #if defined(__thumb2__) || defined(__thumb__) | ||||
| #define DEFINE_CODE_STATE .thumb SEPARATOR | ||||
| #define DECLARE_FUNC_ENCODING    .thumb_func SEPARATOR | ||||
| #if defined(__thumb2__) | ||||
| #define USE_THUMB_2 | ||||
| #define IT(cond)  it cond | ||||
| #define ITT(cond) itt cond | ||||
| #define ITE(cond) ite cond | ||||
| #else | ||||
| #define USE_THUMB_1 | ||||
| #define IT(cond) | ||||
| #define ITT(cond) | ||||
| #define ITE(cond) | ||||
| #endif // defined(__thumb__2)
 | ||||
| #else // !defined(__thumb2__) && !defined(__thumb__)
 | ||||
| #define DEFINE_CODE_STATE .arm SEPARATOR | ||||
| #define DECLARE_FUNC_ENCODING | ||||
| #define IT(cond) | ||||
| #define ITT(cond) | ||||
| #define ITE(cond) | ||||
| #endif | ||||
| 
 | ||||
| #if defined(USE_THUMB_1) && defined(USE_THUMB_2) | ||||
| #error "USE_THUMB_1 and USE_THUMB_2 can't be defined together." | ||||
| #endif | ||||
| 
 | ||||
| #if defined(__ARM_ARCH_4T__) || __ARM_ARCH >= 5 | ||||
| #define ARM_HAS_BX | ||||
| #endif | ||||
| #if !defined(__ARM_FEATURE_CLZ) && !defined(USE_THUMB_1) &&  \ | ||||
|     (__ARM_ARCH >= 6 || (__ARM_ARCH == 5 && !defined(__ARM_ARCH_5__))) | ||||
| #define __ARM_FEATURE_CLZ | ||||
| #endif | ||||
| 
 | ||||
| #ifdef ARM_HAS_BX | ||||
| #define JMP(r) bx r | ||||
| #define JMPc(r, c) bx##c r | ||||
| #else | ||||
| #define JMP(r) mov pc, r | ||||
| #define JMPc(r, c) mov##c pc, r | ||||
| #endif | ||||
| 
 | ||||
| // pop {pc} can't switch Thumb mode on ARMv4T
 | ||||
| #if __ARM_ARCH >= 5 | ||||
| #define POP_PC() pop {pc} | ||||
| #else | ||||
| #define POP_PC()                                                               \ | ||||
|   pop {ip};                                                                    \ | ||||
|   JMP(ip) | ||||
| #endif | ||||
| 
 | ||||
| #if defined(USE_THUMB_2) | ||||
| #define WIDE(op) op.w | ||||
| #else | ||||
| #define WIDE(op) op | ||||
| #endif | ||||
| #else // !defined(__arm)
 | ||||
| #define DECLARE_FUNC_ENCODING | ||||
| #define DEFINE_CODE_STATE | ||||
| #endif | ||||
| 
 | ||||
| #define GLUE2(a, b) a##b | ||||
| #define GLUE(a, b) GLUE2(a, b) | ||||
| #define SYMBOL_NAME(name) GLUE(__USER_LABEL_PREFIX__, name) | ||||
| 
 | ||||
| #ifdef VISIBILITY_HIDDEN | ||||
| #define DECLARE_SYMBOL_VISIBILITY(name)                                        \ | ||||
|   HIDDEN(SYMBOL_NAME(name)) SEPARATOR | ||||
| #else | ||||
| #define DECLARE_SYMBOL_VISIBILITY(name) | ||||
| #endif | ||||
| 
 | ||||
| #define DEFINE_COMPILERRT_FUNCTION(name)                                       \ | ||||
|   DEFINE_CODE_STATE                                                            \ | ||||
|   FILE_LEVEL_DIRECTIVE SEPARATOR                                               \ | ||||
|   .globl SYMBOL_NAME(name) SEPARATOR                                           \ | ||||
|   SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR                                  \ | ||||
|   DECLARE_SYMBOL_VISIBILITY(name)                                              \ | ||||
|   DECLARE_FUNC_ENCODING                                                        \ | ||||
|   SYMBOL_NAME(name): | ||||
| 
 | ||||
| #define DEFINE_COMPILERRT_THUMB_FUNCTION(name)                                 \ | ||||
|   DEFINE_CODE_STATE                                                            \ | ||||
|   FILE_LEVEL_DIRECTIVE SEPARATOR                                               \ | ||||
|   .globl SYMBOL_NAME(name) SEPARATOR                                           \ | ||||
|   SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR                                  \ | ||||
|   DECLARE_SYMBOL_VISIBILITY(name) SEPARATOR                                    \ | ||||
|   .thumb_func SEPARATOR                                                        \ | ||||
|   SYMBOL_NAME(name): | ||||
| 
 | ||||
| #define DEFINE_COMPILERRT_PRIVATE_FUNCTION(name)                               \ | ||||
|   DEFINE_CODE_STATE                                                            \ | ||||
|   FILE_LEVEL_DIRECTIVE SEPARATOR                                               \ | ||||
|   .globl SYMBOL_NAME(name) SEPARATOR                                           \ | ||||
|   SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR                                  \ | ||||
|   HIDDEN(SYMBOL_NAME(name)) SEPARATOR                                          \ | ||||
|   DECLARE_FUNC_ENCODING                                                        \ | ||||
|   SYMBOL_NAME(name): | ||||
| 
 | ||||
| #define DEFINE_COMPILERRT_PRIVATE_FUNCTION_UNMANGLED(name)                     \ | ||||
|   DEFINE_CODE_STATE                                                            \ | ||||
|   .globl name SEPARATOR                                                        \ | ||||
|   SYMBOL_IS_FUNC(name) SEPARATOR                                               \ | ||||
|   HIDDEN(name) SEPARATOR                                                       \ | ||||
|   DECLARE_FUNC_ENCODING                                                        \ | ||||
|   name: | ||||
| 
 | ||||
| #define DEFINE_COMPILERRT_FUNCTION_ALIAS(name, target)                         \ | ||||
|   .globl SYMBOL_NAME(name) SEPARATOR                                           \ | ||||
|   SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR                                  \ | ||||
|   DECLARE_SYMBOL_VISIBILITY(SYMBOL_NAME(name)) SEPARATOR                       \ | ||||
|   .set SYMBOL_NAME(name), SYMBOL_NAME(target) SEPARATOR | ||||
| 
 | ||||
| #if defined(__ARM_EABI__) | ||||
| #define DEFINE_AEABI_FUNCTION_ALIAS(aeabi_name, name)                          \ | ||||
|   DEFINE_COMPILERRT_FUNCTION_ALIAS(aeabi_name, name) | ||||
| #else | ||||
| #define DEFINE_AEABI_FUNCTION_ALIAS(aeabi_name, name) | ||||
| #endif | ||||
| 
 | ||||
| #ifdef __ELF__ | ||||
| #define END_COMPILERRT_FUNCTION(name)                                          \ | ||||
|   .size SYMBOL_NAME(name), . - SYMBOL_NAME(name) | ||||
| #else | ||||
| #define END_COMPILERRT_FUNCTION(name) | ||||
| #endif | ||||
| 
 | ||||
| #endif /* COMPILERRT_ASSEMBLY_H */ | ||||
							
								
								
									
										30
									
								
								third_party/compiler_rt/bswapdi2.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										30
									
								
								third_party/compiler_rt/bswapdi2.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,30 @@ | |||
| /* clang-format off */ | ||||
| /* ===-- bswapdi2.c - Implement __bswapdi2 ---------------------------------===
 | ||||
|  * | ||||
|  *               The LLVM Compiler Infrastructure | ||||
|  * | ||||
|  * This file is dual licensed under the MIT and the University of Illinois Open | ||||
|  * Source Licenses. See LICENSE.TXT for details. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  * | ||||
|  * This file implements __bswapdi2 for the compiler_rt library. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  */ | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #include "third_party/compiler_rt/int_lib.h" | ||||
| 
 | ||||
| COMPILER_RT_ABI uint64_t __bswapdi2(uint64_t u) { | ||||
|   return ( | ||||
|       (((u)&0xff00000000000000ULL) >> 56) | | ||||
|       (((u)&0x00ff000000000000ULL) >> 40) | | ||||
|       (((u)&0x0000ff0000000000ULL) >> 24) | | ||||
|       (((u)&0x000000ff00000000ULL) >> 8)  | | ||||
|       (((u)&0x00000000ff000000ULL) << 8)  | | ||||
|       (((u)&0x0000000000ff0000ULL) << 24) | | ||||
|       (((u)&0x000000000000ff00ULL) << 40) | | ||||
|       (((u)&0x00000000000000ffULL) << 56)); | ||||
| } | ||||
							
								
								
									
										26
									
								
								third_party/compiler_rt/bswapsi2.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										26
									
								
								third_party/compiler_rt/bswapsi2.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,26 @@ | |||
| /* clang-format off */ | ||||
| /* ===-- bswapsi2.c - Implement __bswapsi2 ---------------------------------===
 | ||||
|  * | ||||
|  *               The LLVM Compiler Infrastructure | ||||
|  * | ||||
|  * This file is dual licensed under the MIT and the University of Illinois Open | ||||
|  * Source Licenses. See LICENSE.TXT for details. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  * | ||||
|  * This file implements __bswapsi2 for the compiler_rt library. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  */ | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #include "third_party/compiler_rt/int_lib.h" | ||||
| 
 | ||||
| COMPILER_RT_ABI uint32_t __bswapsi2(uint32_t u) { | ||||
|   return ( | ||||
|       (((u)&0xff000000) >> 24) | | ||||
|       (((u)&0x00ff0000) >> 8)  | | ||||
|       (((u)&0x0000ff00) << 8)  | | ||||
|       (((u)&0x000000ff) << 24)); | ||||
| } | ||||
							
								
								
									
										183
									
								
								third_party/compiler_rt/clear_cache.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										183
									
								
								third_party/compiler_rt/clear_cache.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,183 @@ | |||
| /* clang-format off */ | ||||
| /* ===-- clear_cache.c - Implement __clear_cache ---------------------------===
 | ||||
|  * | ||||
|  *                     The LLVM Compiler Infrastructure | ||||
|  * | ||||
|  * This file is dual licensed under the MIT and the University of Illinois Open | ||||
|  * Source Licenses. See LICENSE.TXT for details. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  */ | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #include "third_party/compiler_rt/int_lib.h" | ||||
| 
 | ||||
| #if __APPLE__ | ||||
|   #include <libkern/OSCacheControl.h> | ||||
| #endif | ||||
| 
 | ||||
| #if defined(_WIN32) | ||||
| /* Forward declare Win32 APIs since the GCC mode driver does not handle the
 | ||||
|    newer SDKs as well as needed.  */ | ||||
| uint32_t FlushInstructionCache(uintptr_t hProcess, void *lpBaseAddress, | ||||
|                                uintptr_t dwSize); | ||||
| uintptr_t GetCurrentProcess(void); | ||||
| #endif | ||||
| 
 | ||||
| #if defined(__linux__) && defined(__mips__) | ||||
|   #if defined(__ANDROID__) && defined(__LP64__) | ||||
|     /*
 | ||||
|      * clear_mips_cache - Invalidates instruction cache for Mips. | ||||
|      */ | ||||
|     static void clear_mips_cache(const void* Addr, size_t Size) { | ||||
|       __asm__ volatile ( | ||||
|         ".set push\n" | ||||
|         ".set noreorder\n" | ||||
|         ".set noat\n" | ||||
|         "beq %[Size], $zero, 20f\n"          /* If size == 0, branch around. */ | ||||
|         "nop\n" | ||||
|         "daddu %[Size], %[Addr], %[Size]\n"  /* Calculate end address + 1 */ | ||||
|         "rdhwr $v0, $1\n"                    /* Get step size for SYNCI.
 | ||||
|                                                 $1 is $HW_SYNCI_Step */ | ||||
|         "beq $v0, $zero, 20f\n"              /* If no caches require
 | ||||
|                                                 synchronization, branch | ||||
|                                                 around. */ | ||||
|         "nop\n" | ||||
|         "10:\n" | ||||
|         "synci 0(%[Addr])\n"                 /* Synchronize all caches around
 | ||||
|                                                 address. */ | ||||
|         "daddu %[Addr], %[Addr], $v0\n"      /* Add step size. */ | ||||
|         "sltu $at, %[Addr], %[Size]\n"       /* Compare current with end
 | ||||
|                                                 address. */ | ||||
|         "bne $at, $zero, 10b\n"              /* Branch if more to do. */ | ||||
|         "nop\n" | ||||
|         "sync\n"                             /* Clear memory hazards. */ | ||||
|         "20:\n" | ||||
|         "bal 30f\n" | ||||
|         "nop\n" | ||||
|         "30:\n" | ||||
|         "daddiu $ra, $ra, 12\n"              /* $ra has a value of $pc here.
 | ||||
|                                                 Add offset of 12 to point to the | ||||
|                                                 instruction after the last nop. | ||||
|                                               */ | ||||
|         "jr.hb $ra\n"                        /* Return, clearing instruction
 | ||||
|                                                 hazards. */ | ||||
|         "nop\n" | ||||
|         ".set pop\n" | ||||
|         : [Addr] "+r"(Addr), [Size] "+r"(Size) | ||||
|         :: "at", "ra", "v0", "memory" | ||||
|       ); | ||||
|     } | ||||
|   #endif | ||||
| #endif | ||||
| 
 | ||||
| /*
 | ||||
|  * The compiler generates calls to __clear_cache() when creating  | ||||
|  * trampoline functions on the stack for use with nested functions. | ||||
|  * It is expected to invalidate the instruction cache for the  | ||||
|  * specified range. | ||||
|  */ | ||||
| 
 | ||||
| void __clear_cache(void *start, void *end) { | ||||
| #if __i386__ || __x86_64__ || defined(_M_IX86) || defined(_M_X64) | ||||
| /*
 | ||||
|  * Intel processors have a unified instruction and data cache | ||||
|  * so there is nothing to do | ||||
|  */ | ||||
| #elif defined(_WIN32) && (defined(__arm__) || defined(__aarch64__)) | ||||
|     FlushInstructionCache(GetCurrentProcess(), start, end - start); | ||||
| #elif defined(__arm__) && !defined(__APPLE__) | ||||
|     #if defined(__FreeBSD__) || defined(__NetBSD__) | ||||
|         struct arm_sync_icache_args arg; | ||||
| 
 | ||||
|         arg.addr = (uintptr_t)start; | ||||
|         arg.len = (uintptr_t)end - (uintptr_t)start; | ||||
| 
 | ||||
|         sysarch(ARM_SYNC_ICACHE, &arg); | ||||
|     #elif defined(__linux__) | ||||
|     /*
 | ||||
|      * We used to include asm/unistd.h for the __ARM_NR_cacheflush define, but | ||||
|      * it also brought many other unused defines, as well as a dependency on | ||||
|      * kernel headers to be installed. | ||||
|      * | ||||
|      * This value is stable at least since Linux 3.13 and should remain so for | ||||
|      * compatibility reasons, warranting it's re-definition here. | ||||
|      */ | ||||
|     #define __ARM_NR_cacheflush 0x0f0002 | ||||
|          register int start_reg __asm("r0") = (int) (intptr_t) start; | ||||
|          const register int end_reg __asm("r1") = (int) (intptr_t) end; | ||||
|          const register int flags __asm("r2") = 0; | ||||
|          const register int syscall_nr __asm("r7") = __ARM_NR_cacheflush; | ||||
|          __asm __volatile("svc 0x0" | ||||
|                           : "=r"(start_reg) | ||||
|                           : "r"(syscall_nr), "r"(start_reg), "r"(end_reg), | ||||
|                             "r"(flags)); | ||||
|          assert(start_reg == 0 && "Cache flush syscall failed."); | ||||
|     #else | ||||
|         compilerrt_abort(); | ||||
|     #endif | ||||
| #elif defined(__linux__) && defined(__mips__) | ||||
|   const uintptr_t start_int = (uintptr_t) start; | ||||
|   const uintptr_t end_int = (uintptr_t) end; | ||||
|     #if defined(__ANDROID__) && defined(__LP64__) | ||||
|         // Call synci implementation for short address range.
 | ||||
|         const uintptr_t address_range_limit = 256; | ||||
|         if ((end_int - start_int) <= address_range_limit) { | ||||
|             clear_mips_cache(start, (end_int - start_int)); | ||||
|         } else { | ||||
|             syscall(__NR_cacheflush, start, (end_int - start_int), BCACHE); | ||||
|         } | ||||
|     #else | ||||
|         syscall(__NR_cacheflush, start, (end_int - start_int), BCACHE); | ||||
|     #endif | ||||
| #elif defined(__mips__) && defined(__OpenBSD__) | ||||
|   cacheflush(start, (uintptr_t)end - (uintptr_t)start, BCACHE); | ||||
| #elif defined(__aarch64__) && !defined(__APPLE__) | ||||
|   uint64_t xstart = (uint64_t)(uintptr_t) start; | ||||
|   uint64_t xend = (uint64_t)(uintptr_t) end; | ||||
|   uint64_t addr; | ||||
| 
 | ||||
|   // Get Cache Type Info
 | ||||
|   uint64_t ctr_el0; | ||||
|   __asm __volatile("mrs %0, ctr_el0" : "=r"(ctr_el0)); | ||||
| 
 | ||||
|   /*
 | ||||
|    * dc & ic instructions must use 64bit registers so we don't use | ||||
|    * uintptr_t in case this runs in an IPL32 environment. | ||||
|    */ | ||||
|   const size_t dcache_line_size = 4 << ((ctr_el0 >> 16) & 15); | ||||
|   for (addr = xstart & ~(dcache_line_size - 1); addr < xend; | ||||
|        addr += dcache_line_size) | ||||
|     __asm __volatile("dc cvau, %0" :: "r"(addr)); | ||||
|   __asm __volatile("dsb ish"); | ||||
| 
 | ||||
|   const size_t icache_line_size = 4 << ((ctr_el0 >> 0) & 15); | ||||
|   for (addr = xstart & ~(icache_line_size - 1); addr < xend; | ||||
|        addr += icache_line_size) | ||||
|     __asm __volatile("ic ivau, %0" :: "r"(addr)); | ||||
|   __asm __volatile("isb sy"); | ||||
| #elif defined (__powerpc64__) | ||||
|   const size_t line_size = 32; | ||||
|   const size_t len = (uintptr_t)end - (uintptr_t)start; | ||||
| 
 | ||||
|   const uintptr_t mask = ~(line_size - 1); | ||||
|   const uintptr_t start_line = ((uintptr_t)start) & mask; | ||||
|   const uintptr_t end_line = ((uintptr_t)start + len + line_size - 1) & mask; | ||||
| 
 | ||||
|   for (uintptr_t line = start_line; line < end_line; line += line_size) | ||||
|     __asm__ volatile("dcbf 0, %0" : : "r"(line)); | ||||
|   __asm__ volatile("sync"); | ||||
| 
 | ||||
|   for (uintptr_t line = start_line; line < end_line; line += line_size) | ||||
|     __asm__ volatile("icbi 0, %0" : : "r"(line)); | ||||
|   __asm__ volatile("isync"); | ||||
| #else | ||||
|     #if __APPLE__ | ||||
|         /* On Darwin, sys_icache_invalidate() provides this functionality */ | ||||
|         sys_icache_invalidate(start, end-start); | ||||
|     #else | ||||
|         compilerrt_abort(); | ||||
|     #endif | ||||
| #endif | ||||
| } | ||||
							
								
								
									
										43
									
								
								third_party/compiler_rt/clzdi2.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										43
									
								
								third_party/compiler_rt/clzdi2.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,43 @@ | |||
| /* clang-format off */ | ||||
| /* ===-- clzdi2.c - Implement __clzdi2 -------------------------------------===
 | ||||
|  * | ||||
|  *               The LLVM Compiler Infrastructure | ||||
|  * | ||||
|  * This file is dual licensed under the MIT and the University of Illinois Open | ||||
|  * Source Licenses. See LICENSE.TXT for details. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  * | ||||
|  * This file implements __clzdi2 for the compiler_rt library. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  */ | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #include "third_party/compiler_rt/int_lib.h" | ||||
| 
 | ||||
| /* Returns: the number of leading 0-bits */ | ||||
| 
 | ||||
| #if !defined(__clang__) &&                                                     \ | ||||
|     ((defined(__sparc__) && defined(__arch64__)) ||                            \ | ||||
|      defined(__mips64) ||                                                      \ | ||||
|      (defined(__riscv) && __SIZEOF_POINTER__ >= 8)) | ||||
| /* On 64-bit architectures with neither a native clz instruction nor a native
 | ||||
|  * ctz instruction, gcc resolves __builtin_clz to __clzdi2 rather than | ||||
|  * __clzsi2, leading to infinite recursion. */ | ||||
| #define __builtin_clz(a) __clzsi2(a) | ||||
| extern si_int __clzsi2(si_int); | ||||
| #endif | ||||
| 
 | ||||
| /* Precondition: a != 0 */ | ||||
| 
 | ||||
| COMPILER_RT_ABI si_int | ||||
| __clzdi2(di_int a) | ||||
| { | ||||
|     dwords x; | ||||
|     x.all = a; | ||||
|     const si_int f = -(x.s.high == 0); | ||||
|     return __builtin_clz((x.s.high & ~f) | (x.s.low & f)) + | ||||
|            (f & ((si_int)(sizeof(si_int) * CHAR_BIT))); | ||||
| } | ||||
							
								
								
									
										56
									
								
								third_party/compiler_rt/clzsi2.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										56
									
								
								third_party/compiler_rt/clzsi2.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,56 @@ | |||
| /* clang-format off */ | ||||
| /* ===-- clzsi2.c - Implement __clzsi2 -------------------------------------===
 | ||||
|  * | ||||
|  *               The LLVM Compiler Infrastructure | ||||
|  * | ||||
|  * This file is dual licensed under the MIT and the University of Illinois Open | ||||
|  * Source Licenses. See LICENSE.TXT for details. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  * | ||||
|  * This file implements __clzsi2 for the compiler_rt library. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  */ | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #include "third_party/compiler_rt/int_lib.h" | ||||
| 
 | ||||
| /* Returns: the number of leading 0-bits */ | ||||
| 
 | ||||
| /* Precondition: a != 0 */ | ||||
| 
 | ||||
| COMPILER_RT_ABI si_int | ||||
| __clzsi2(si_int a) | ||||
| { | ||||
|     su_int x = (su_int)a; | ||||
|     si_int t = ((x & 0xFFFF0000) == 0) << 4;  /* if (x is small) t = 16 else 0 */ | ||||
|     x >>= 16 - t;      /* x = [0 - 0xFFFF] */ | ||||
|     su_int r = t;       /* r = [0, 16] */ | ||||
|     /* return r + clz(x) */ | ||||
|     t = ((x & 0xFF00) == 0) << 3; | ||||
|     x >>= 8 - t;       /* x = [0 - 0xFF] */ | ||||
|     r += t;            /* r = [0, 8, 16, 24] */ | ||||
|     /* return r + clz(x) */ | ||||
|     t = ((x & 0xF0) == 0) << 2; | ||||
|     x >>= 4 - t;       /* x = [0 - 0xF] */ | ||||
|     r += t;            /* r = [0, 4, 8, 12, 16, 20, 24, 28] */ | ||||
|     /* return r + clz(x) */ | ||||
|     t = ((x & 0xC) == 0) << 1; | ||||
|     x >>= 2 - t;       /* x = [0 - 3] */ | ||||
|     r += t;            /* r = [0 - 30] and is even */ | ||||
|     /* return r + clz(x) */ | ||||
| /*     switch (x)
 | ||||
|  *     { | ||||
|  *     case 0: | ||||
|  *         return r + 2; | ||||
|  *     case 1: | ||||
|  *         return r + 1; | ||||
|  *     case 2: | ||||
|  *     case 3: | ||||
|  *         return r; | ||||
|  *     } | ||||
|  */ | ||||
|     return r + ((2 - x) & -((x & 2) == 0)); | ||||
| } | ||||
							
								
								
									
										36
									
								
								third_party/compiler_rt/clzti2.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										36
									
								
								third_party/compiler_rt/clzti2.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,36 @@ | |||
| /* clang-format off */ | ||||
| /* ===-- clzti2.c - Implement __clzti2 -------------------------------------===
 | ||||
|  * | ||||
|  *      	       The LLVM Compiler Infrastructure | ||||
|  * | ||||
|  * This file is dual licensed under the MIT and the University of Illinois Open | ||||
|  * Source Licenses. See LICENSE.TXT for details. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  * | ||||
|  * This file implements __clzti2 for the compiler_rt library. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  */ | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #include "third_party/compiler_rt/int_lib.h" | ||||
| 
 | ||||
| #ifdef CRT_HAS_128BIT | ||||
| 
 | ||||
| /* Returns: the number of leading 0-bits */ | ||||
| 
 | ||||
| /* Precondition: a != 0 */ | ||||
| 
 | ||||
| COMPILER_RT_ABI si_int | ||||
| __clzti2(ti_int a) | ||||
| { | ||||
|     twords x; | ||||
|     x.all = a; | ||||
|     const di_int f = -(x.s.high == 0); | ||||
|     return __builtin_clzll((x.s.high & ~f) | (x.s.low & f)) + | ||||
|            ((si_int)f & ((si_int)(sizeof(di_int) * CHAR_BIT))); | ||||
| } | ||||
| 
 | ||||
| #endif /* CRT_HAS_128BIT */ | ||||
							
								
								
									
										54
									
								
								third_party/compiler_rt/cmpdi2.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										54
									
								
								third_party/compiler_rt/cmpdi2.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,54 @@ | |||
| /* clang-format off */ | ||||
| /* ===-- cmpdi2.c - Implement __cmpdi2 -------------------------------------===
 | ||||
|  * | ||||
|  *                     The LLVM Compiler Infrastructure | ||||
|  * | ||||
|  * This file is dual licensed under the MIT and the University of Illinois Open | ||||
|  * Source Licenses. See LICENSE.TXT for details. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  * | ||||
|  * This file implements __cmpdi2 for the compiler_rt library. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  */ | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #include "third_party/compiler_rt/int_lib.h" | ||||
| 
 | ||||
| /* Returns: if (a <  b) returns 0
 | ||||
| *           if (a == b) returns 1 | ||||
| *           if (a >  b) returns 2 | ||||
| */ | ||||
| 
 | ||||
| COMPILER_RT_ABI si_int | ||||
| __cmpdi2(di_int a, di_int b) | ||||
| { | ||||
|     dwords x; | ||||
|     x.all = a; | ||||
|     dwords y; | ||||
|     y.all = b; | ||||
|     if (x.s.high < y.s.high) | ||||
|         return 0; | ||||
|     if (x.s.high > y.s.high) | ||||
|         return 2; | ||||
|     if (x.s.low < y.s.low) | ||||
|         return 0; | ||||
|     if (x.s.low > y.s.low) | ||||
|         return 2; | ||||
|     return 1; | ||||
| } | ||||
| 
 | ||||
| #ifdef __ARM_EABI__ | ||||
| /* Returns: if (a <  b) returns -1
 | ||||
| *           if (a == b) returns  0 | ||||
| *           if (a >  b) returns  1 | ||||
| */ | ||||
| COMPILER_RT_ABI si_int | ||||
| __aeabi_lcmp(di_int a, di_int b) | ||||
| { | ||||
| 	return __cmpdi2(a, b) - 1; | ||||
| } | ||||
| #endif | ||||
| 
 | ||||
							
								
								
									
										45
									
								
								third_party/compiler_rt/cmpti2.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										45
									
								
								third_party/compiler_rt/cmpti2.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,45 @@ | |||
| /* clang-format off */ | ||||
| /* ===-- cmpti2.c - Implement __cmpti2 -------------------------------------===
 | ||||
|  * | ||||
|  *                     The LLVM Compiler Infrastructure | ||||
|  * | ||||
|  * This file is dual licensed under the MIT and the University of Illinois Open | ||||
|  * Source Licenses. See LICENSE.TXT for details. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  * | ||||
|  * This file implements __cmpti2 for the compiler_rt library. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  */ | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #include "third_party/compiler_rt/int_lib.h" | ||||
| 
 | ||||
| #ifdef CRT_HAS_128BIT | ||||
| 
 | ||||
| /* Returns:  if (a <  b) returns 0
 | ||||
|  *           if (a == b) returns 1 | ||||
|  *           if (a >  b) returns 2 | ||||
|  */ | ||||
| 
 | ||||
| COMPILER_RT_ABI si_int | ||||
| __cmpti2(ti_int a, ti_int b) | ||||
| { | ||||
|     twords x; | ||||
|     x.all = a; | ||||
|     twords y; | ||||
|     y.all = b; | ||||
|     if (x.s.high < y.s.high) | ||||
|         return 0; | ||||
|     if (x.s.high > y.s.high) | ||||
|         return 2; | ||||
|     if (x.s.low < y.s.low) | ||||
|         return 0; | ||||
|     if (x.s.low > y.s.low) | ||||
|         return 2; | ||||
|     return 1; | ||||
| } | ||||
| 
 | ||||
| #endif /* CRT_HAS_128BIT */ | ||||
							
								
								
									
										156
									
								
								third_party/compiler_rt/comparedf2.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										156
									
								
								third_party/compiler_rt/comparedf2.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,156 @@ | |||
| /* clang-format off */ | ||||
| //===-- lib/comparedf2.c - Double-precision comparisons -----------*- C -*-===//
 | ||||
| //
 | ||||
| //                     The LLVM Compiler Infrastructure
 | ||||
| //
 | ||||
| // This file is dual licensed under the MIT and the University of Illinois Open
 | ||||
| // Source Licenses. See LICENSE.TXT for details.
 | ||||
| //
 | ||||
| //===----------------------------------------------------------------------===//
 | ||||
| //
 | ||||
| // // This file implements the following soft-float comparison routines:
 | ||||
| //
 | ||||
| //   __eqdf2   __gedf2   __unorddf2
 | ||||
| //   __ledf2   __gtdf2
 | ||||
| //   __ltdf2
 | ||||
| //   __nedf2
 | ||||
| //
 | ||||
| // The semantics of the routines grouped in each column are identical, so there
 | ||||
| // is a single implementation for each, and wrappers to provide the other names.
 | ||||
| //
 | ||||
| // The main routines behave as follows:
 | ||||
| //
 | ||||
| //   __ledf2(a,b) returns -1 if a < b
 | ||||
| //                         0 if a == b
 | ||||
| //                         1 if a > b
 | ||||
| //                         1 if either a or b is NaN
 | ||||
| //
 | ||||
| //   __gedf2(a,b) returns -1 if a < b
 | ||||
| //                         0 if a == b
 | ||||
| //                         1 if a > b
 | ||||
| //                        -1 if either a or b is NaN
 | ||||
| //
 | ||||
| //   __unorddf2(a,b) returns 0 if both a and b are numbers
 | ||||
| //                           1 if either a or b is NaN
 | ||||
| //
 | ||||
| // Note that __ledf2( ) and __gedf2( ) are identical except in their handling of
 | ||||
| // NaN values.
 | ||||
| //
 | ||||
| //===----------------------------------------------------------------------===//
 | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #define DOUBLE_PRECISION | ||||
| #include "third_party/compiler_rt/fp_lib.inc" | ||||
| 
 | ||||
| enum LE_RESULT { | ||||
|     LE_LESS      = -1, | ||||
|     LE_EQUAL     =  0, | ||||
|     LE_GREATER   =  1, | ||||
|     LE_UNORDERED =  1 | ||||
| }; | ||||
| 
 | ||||
| COMPILER_RT_ABI enum LE_RESULT | ||||
| __ledf2(fp_t a, fp_t b) { | ||||
|      | ||||
|     const srep_t aInt = toRep(a); | ||||
|     const srep_t bInt = toRep(b); | ||||
|     const rep_t aAbs = aInt & absMask; | ||||
|     const rep_t bAbs = bInt & absMask; | ||||
|      | ||||
|     // If either a or b is NaN, they are unordered.
 | ||||
|     if (aAbs > infRep || bAbs > infRep) return LE_UNORDERED; | ||||
|      | ||||
|     // If a and b are both zeros, they are equal.
 | ||||
|     if ((aAbs | bAbs) == 0) return LE_EQUAL; | ||||
|      | ||||
|     // If at least one of a and b is positive, we get the same result comparing
 | ||||
|     // a and b as signed integers as we would with a floating-point compare.
 | ||||
|     if ((aInt & bInt) >= 0) { | ||||
|         if (aInt < bInt) return LE_LESS; | ||||
|         else if (aInt == bInt) return LE_EQUAL; | ||||
|         else return LE_GREATER; | ||||
|     } | ||||
|      | ||||
|     // Otherwise, both are negative, so we need to flip the sense of the
 | ||||
|     // comparison to get the correct result.  (This assumes a twos- or ones-
 | ||||
|     // complement integer representation; if integers are represented in a
 | ||||
|     // sign-magnitude representation, then this flip is incorrect).
 | ||||
|     else { | ||||
|         if (aInt > bInt) return LE_LESS; | ||||
|         else if (aInt == bInt) return LE_EQUAL; | ||||
|         else return LE_GREATER; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| #if defined(__ELF__) | ||||
| // Alias for libgcc compatibility
 | ||||
| FNALIAS(__cmpdf2, __ledf2); | ||||
| #endif | ||||
| 
 | ||||
| enum GE_RESULT { | ||||
|     GE_LESS      = -1, | ||||
|     GE_EQUAL     =  0, | ||||
|     GE_GREATER   =  1, | ||||
|     GE_UNORDERED = -1   // Note: different from LE_UNORDERED
 | ||||
| }; | ||||
| 
 | ||||
| COMPILER_RT_ABI enum GE_RESULT | ||||
| __gedf2(fp_t a, fp_t b) { | ||||
|      | ||||
|     const srep_t aInt = toRep(a); | ||||
|     const srep_t bInt = toRep(b); | ||||
|     const rep_t aAbs = aInt & absMask; | ||||
|     const rep_t bAbs = bInt & absMask; | ||||
|      | ||||
|     if (aAbs > infRep || bAbs > infRep) return GE_UNORDERED; | ||||
|     if ((aAbs | bAbs) == 0) return GE_EQUAL; | ||||
|     if ((aInt & bInt) >= 0) { | ||||
|         if (aInt < bInt) return GE_LESS; | ||||
|         else if (aInt == bInt) return GE_EQUAL; | ||||
|         else return GE_GREATER; | ||||
|     } else { | ||||
|         if (aInt > bInt) return GE_LESS; | ||||
|         else if (aInt == bInt) return GE_EQUAL; | ||||
|         else return GE_GREATER; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| COMPILER_RT_ABI int | ||||
| __unorddf2(fp_t a, fp_t b) { | ||||
|     const rep_t aAbs = toRep(a) & absMask; | ||||
|     const rep_t bAbs = toRep(b) & absMask; | ||||
|     return aAbs > infRep || bAbs > infRep; | ||||
| } | ||||
| 
 | ||||
| // The following are alternative names for the preceding routines.
 | ||||
| 
 | ||||
| COMPILER_RT_ABI enum LE_RESULT | ||||
| __eqdf2(fp_t a, fp_t b) { | ||||
|     return __ledf2(a, b); | ||||
| } | ||||
| 
 | ||||
| COMPILER_RT_ABI enum LE_RESULT | ||||
| __ltdf2(fp_t a, fp_t b) { | ||||
|     return __ledf2(a, b); | ||||
| } | ||||
| 
 | ||||
| COMPILER_RT_ABI enum LE_RESULT | ||||
| __nedf2(fp_t a, fp_t b) { | ||||
|     return __ledf2(a, b); | ||||
| } | ||||
| 
 | ||||
| COMPILER_RT_ABI enum GE_RESULT | ||||
| __gtdf2(fp_t a, fp_t b) { | ||||
|     return __gedf2(a, b); | ||||
| } | ||||
| 
 | ||||
| #if defined(__ARM_EABI__) | ||||
| #if defined(COMPILER_RT_ARMHF_TARGET) | ||||
| AEABI_RTABI int __aeabi_dcmpun(fp_t a, fp_t b) { | ||||
|   return __unorddf2(a, b); | ||||
| } | ||||
| #else | ||||
| AEABI_RTABI int __aeabi_dcmpun(fp_t a, fp_t b) COMPILER_RT_ALIAS(__unorddf2); | ||||
| #endif | ||||
| #endif | ||||
							
								
								
									
										156
									
								
								third_party/compiler_rt/comparesf2.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										156
									
								
								third_party/compiler_rt/comparesf2.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,156 @@ | |||
| /* clang-format off */ | ||||
| //===-- lib/comparesf2.c - Single-precision comparisons -----------*- C -*-===//
 | ||||
| //
 | ||||
| //                     The LLVM Compiler Infrastructure
 | ||||
| //
 | ||||
| // This file is dual licensed under the MIT and the University of Illinois Open
 | ||||
| // Source Licenses. See LICENSE.TXT for details.
 | ||||
| //
 | ||||
| //===----------------------------------------------------------------------===//
 | ||||
| //
 | ||||
| // This file implements the following soft-fp_t comparison routines:
 | ||||
| //
 | ||||
| //   __eqsf2   __gesf2   __unordsf2
 | ||||
| //   __lesf2   __gtsf2
 | ||||
| //   __ltsf2
 | ||||
| //   __nesf2
 | ||||
| //
 | ||||
| // The semantics of the routines grouped in each column are identical, so there
 | ||||
| // is a single implementation for each, and wrappers to provide the other names.
 | ||||
| //
 | ||||
| // The main routines behave as follows:
 | ||||
| //
 | ||||
| //   __lesf2(a,b) returns -1 if a < b
 | ||||
| //                         0 if a == b
 | ||||
| //                         1 if a > b
 | ||||
| //                         1 if either a or b is NaN
 | ||||
| //
 | ||||
| //   __gesf2(a,b) returns -1 if a < b
 | ||||
| //                         0 if a == b
 | ||||
| //                         1 if a > b
 | ||||
| //                        -1 if either a or b is NaN
 | ||||
| //
 | ||||
| //   __unordsf2(a,b) returns 0 if both a and b are numbers
 | ||||
| //                           1 if either a or b is NaN
 | ||||
| //
 | ||||
| // Note that __lesf2( ) and __gesf2( ) are identical except in their handling of
 | ||||
| // NaN values.
 | ||||
| //
 | ||||
| //===----------------------------------------------------------------------===//
 | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #define SINGLE_PRECISION | ||||
| #include "third_party/compiler_rt/fp_lib.inc" | ||||
| 
 | ||||
| enum LE_RESULT { | ||||
|     LE_LESS      = -1, | ||||
|     LE_EQUAL     =  0, | ||||
|     LE_GREATER   =  1, | ||||
|     LE_UNORDERED =  1 | ||||
| }; | ||||
| 
 | ||||
| COMPILER_RT_ABI enum LE_RESULT | ||||
| __lesf2(fp_t a, fp_t b) { | ||||
|      | ||||
|     const srep_t aInt = toRep(a); | ||||
|     const srep_t bInt = toRep(b); | ||||
|     const rep_t aAbs = aInt & absMask; | ||||
|     const rep_t bAbs = bInt & absMask; | ||||
|      | ||||
|     // If either a or b is NaN, they are unordered.
 | ||||
|     if (aAbs > infRep || bAbs > infRep) return LE_UNORDERED; | ||||
|      | ||||
|     // If a and b are both zeros, they are equal.
 | ||||
|     if ((aAbs | bAbs) == 0) return LE_EQUAL; | ||||
|      | ||||
|     // If at least one of a and b is positive, we get the same result comparing
 | ||||
|     // a and b as signed integers as we would with a fp_ting-point compare.
 | ||||
|     if ((aInt & bInt) >= 0) { | ||||
|         if (aInt < bInt) return LE_LESS; | ||||
|         else if (aInt == bInt) return LE_EQUAL; | ||||
|         else return LE_GREATER; | ||||
|     } | ||||
|      | ||||
|     // Otherwise, both are negative, so we need to flip the sense of the
 | ||||
|     // comparison to get the correct result.  (This assumes a twos- or ones-
 | ||||
|     // complement integer representation; if integers are represented in a
 | ||||
|     // sign-magnitude representation, then this flip is incorrect).
 | ||||
|     else { | ||||
|         if (aInt > bInt) return LE_LESS; | ||||
|         else if (aInt == bInt) return LE_EQUAL; | ||||
|         else return LE_GREATER; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| #if defined(__ELF__) | ||||
| // Alias for libgcc compatibility
 | ||||
| FNALIAS(__cmpsf2, __lesf2); | ||||
| #endif | ||||
| 
 | ||||
| enum GE_RESULT { | ||||
|     GE_LESS      = -1, | ||||
|     GE_EQUAL     =  0, | ||||
|     GE_GREATER   =  1, | ||||
|     GE_UNORDERED = -1   // Note: different from LE_UNORDERED
 | ||||
| }; | ||||
| 
 | ||||
| COMPILER_RT_ABI enum GE_RESULT | ||||
| __gesf2(fp_t a, fp_t b) { | ||||
|      | ||||
|     const srep_t aInt = toRep(a); | ||||
|     const srep_t bInt = toRep(b); | ||||
|     const rep_t aAbs = aInt & absMask; | ||||
|     const rep_t bAbs = bInt & absMask; | ||||
|      | ||||
|     if (aAbs > infRep || bAbs > infRep) return GE_UNORDERED; | ||||
|     if ((aAbs | bAbs) == 0) return GE_EQUAL; | ||||
|     if ((aInt & bInt) >= 0) { | ||||
|         if (aInt < bInt) return GE_LESS; | ||||
|         else if (aInt == bInt) return GE_EQUAL; | ||||
|         else return GE_GREATER; | ||||
|     } else { | ||||
|         if (aInt > bInt) return GE_LESS; | ||||
|         else if (aInt == bInt) return GE_EQUAL; | ||||
|         else return GE_GREATER; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| COMPILER_RT_ABI int | ||||
| __unordsf2(fp_t a, fp_t b) { | ||||
|     const rep_t aAbs = toRep(a) & absMask; | ||||
|     const rep_t bAbs = toRep(b) & absMask; | ||||
|     return aAbs > infRep || bAbs > infRep; | ||||
| } | ||||
| 
 | ||||
| // The following are alternative names for the preceding routines.
 | ||||
| 
 | ||||
| COMPILER_RT_ABI enum LE_RESULT | ||||
| __eqsf2(fp_t a, fp_t b) { | ||||
|     return __lesf2(a, b); | ||||
| } | ||||
| 
 | ||||
| COMPILER_RT_ABI enum LE_RESULT | ||||
| __ltsf2(fp_t a, fp_t b) { | ||||
|     return __lesf2(a, b); | ||||
| } | ||||
| 
 | ||||
| COMPILER_RT_ABI enum LE_RESULT | ||||
| __nesf2(fp_t a, fp_t b) { | ||||
|     return __lesf2(a, b); | ||||
| } | ||||
| 
 | ||||
| COMPILER_RT_ABI enum GE_RESULT | ||||
| __gtsf2(fp_t a, fp_t b) { | ||||
|     return __gesf2(a, b); | ||||
| } | ||||
| 
 | ||||
| #if defined(__ARM_EABI__) | ||||
| #if defined(COMPILER_RT_ARMHF_TARGET) | ||||
| AEABI_RTABI int __aeabi_fcmpun(fp_t a, fp_t b) { | ||||
|   return __unordsf2(a, b); | ||||
| } | ||||
| #else | ||||
| AEABI_RTABI int __aeabi_fcmpun(fp_t a, fp_t b) COMPILER_RT_ALIAS(__unordsf2); | ||||
| #endif | ||||
| #endif | ||||
							
								
								
									
										141
									
								
								third_party/compiler_rt/comparetf2.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										141
									
								
								third_party/compiler_rt/comparetf2.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,141 @@ | |||
| /* clang-format off */ | ||||
| //===-- lib/comparetf2.c - Quad-precision comparisons -------------*- C -*-===//
 | ||||
| //
 | ||||
| //                     The LLVM Compiler Infrastructure
 | ||||
| //
 | ||||
| // This file is dual licensed under the MIT and the University of Illinois Open
 | ||||
| // Source Licenses. See LICENSE.TXT for details.
 | ||||
| //
 | ||||
| //===----------------------------------------------------------------------===//
 | ||||
| //
 | ||||
| // // This file implements the following soft-float comparison routines:
 | ||||
| //
 | ||||
| //   __eqtf2   __getf2   __unordtf2
 | ||||
| //   __letf2   __gttf2
 | ||||
| //   __lttf2
 | ||||
| //   __netf2
 | ||||
| //
 | ||||
| // The semantics of the routines grouped in each column are identical, so there
 | ||||
| // is a single implementation for each, and wrappers to provide the other names.
 | ||||
| //
 | ||||
| // The main routines behave as follows:
 | ||||
| //
 | ||||
| //   __letf2(a,b) returns -1 if a < b
 | ||||
| //                         0 if a == b
 | ||||
| //                         1 if a > b
 | ||||
| //                         1 if either a or b is NaN
 | ||||
| //
 | ||||
| //   __getf2(a,b) returns -1 if a < b
 | ||||
| //                         0 if a == b
 | ||||
| //                         1 if a > b
 | ||||
| //                        -1 if either a or b is NaN
 | ||||
| //
 | ||||
| //   __unordtf2(a,b) returns 0 if both a and b are numbers
 | ||||
| //                           1 if either a or b is NaN
 | ||||
| //
 | ||||
| // Note that __letf2( ) and __getf2( ) are identical except in their handling of
 | ||||
| // NaN values.
 | ||||
| //
 | ||||
| //===----------------------------------------------------------------------===//
 | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #define QUAD_PRECISION | ||||
| #include "third_party/compiler_rt/fp_lib.inc" | ||||
| 
 | ||||
| #if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) | ||||
| enum LE_RESULT { | ||||
|     LE_LESS      = -1, | ||||
|     LE_EQUAL     =  0, | ||||
|     LE_GREATER   =  1, | ||||
|     LE_UNORDERED =  1 | ||||
| }; | ||||
| 
 | ||||
| COMPILER_RT_ABI enum LE_RESULT __letf2(fp_t a, fp_t b) { | ||||
| 
 | ||||
|     const srep_t aInt = toRep(a); | ||||
|     const srep_t bInt = toRep(b); | ||||
|     const rep_t aAbs = aInt & absMask; | ||||
|     const rep_t bAbs = bInt & absMask; | ||||
| 
 | ||||
|     // If either a or b is NaN, they are unordered.
 | ||||
|     if (aAbs > infRep || bAbs > infRep) return LE_UNORDERED; | ||||
| 
 | ||||
|     // If a and b are both zeros, they are equal.
 | ||||
|     if ((aAbs | bAbs) == 0) return LE_EQUAL; | ||||
| 
 | ||||
|     // If at least one of a and b is positive, we get the same result comparing
 | ||||
|     // a and b as signed integers as we would with a floating-point compare.
 | ||||
|     if ((aInt & bInt) >= 0) { | ||||
|         if (aInt < bInt) return LE_LESS; | ||||
|         else if (aInt == bInt) return LE_EQUAL; | ||||
|         else return LE_GREATER; | ||||
|     } | ||||
|     else { | ||||
|         // Otherwise, both are negative, so we need to flip the sense of the
 | ||||
|         // comparison to get the correct result.  (This assumes a twos- or ones-
 | ||||
|         // complement integer representation; if integers are represented in a
 | ||||
|         // sign-magnitude representation, then this flip is incorrect).
 | ||||
|         if (aInt > bInt) return LE_LESS; | ||||
|         else if (aInt == bInt) return LE_EQUAL; | ||||
|         else return LE_GREATER; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| #if defined(__ELF__) | ||||
| // Alias for libgcc compatibility
 | ||||
| FNALIAS(__cmptf2, __letf2); | ||||
| #endif | ||||
| 
 | ||||
| enum GE_RESULT { | ||||
|     GE_LESS      = -1, | ||||
|     GE_EQUAL     =  0, | ||||
|     GE_GREATER   =  1, | ||||
|     GE_UNORDERED = -1   // Note: different from LE_UNORDERED
 | ||||
| }; | ||||
| 
 | ||||
| COMPILER_RT_ABI enum GE_RESULT __getf2(fp_t a, fp_t b) { | ||||
| 
 | ||||
|     const srep_t aInt = toRep(a); | ||||
|     const srep_t bInt = toRep(b); | ||||
|     const rep_t aAbs = aInt & absMask; | ||||
|     const rep_t bAbs = bInt & absMask; | ||||
| 
 | ||||
|     if (aAbs > infRep || bAbs > infRep) return GE_UNORDERED; | ||||
|     if ((aAbs | bAbs) == 0) return GE_EQUAL; | ||||
|     if ((aInt & bInt) >= 0) { | ||||
|         if (aInt < bInt) return GE_LESS; | ||||
|         else if (aInt == bInt) return GE_EQUAL; | ||||
|         else return GE_GREATER; | ||||
|     } else { | ||||
|         if (aInt > bInt) return GE_LESS; | ||||
|         else if (aInt == bInt) return GE_EQUAL; | ||||
|         else return GE_GREATER; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| COMPILER_RT_ABI int __unordtf2(fp_t a, fp_t b) { | ||||
|     const rep_t aAbs = toRep(a) & absMask; | ||||
|     const rep_t bAbs = toRep(b) & absMask; | ||||
|     return aAbs > infRep || bAbs > infRep; | ||||
| } | ||||
| 
 | ||||
| // The following are alternative names for the preceding routines.
 | ||||
| 
 | ||||
| COMPILER_RT_ABI enum LE_RESULT __eqtf2(fp_t a, fp_t b) { | ||||
|     return __letf2(a, b); | ||||
| } | ||||
| 
 | ||||
| COMPILER_RT_ABI enum LE_RESULT __lttf2(fp_t a, fp_t b) { | ||||
|     return __letf2(a, b); | ||||
| } | ||||
| 
 | ||||
| COMPILER_RT_ABI enum LE_RESULT __netf2(fp_t a, fp_t b) { | ||||
|     return __letf2(a, b); | ||||
| } | ||||
| 
 | ||||
| COMPILER_RT_ABI enum GE_RESULT __gttf2(fp_t a, fp_t b) { | ||||
|     return __getf2(a, b); | ||||
| } | ||||
| 
 | ||||
| #endif | ||||
							
								
								
									
										60
									
								
								third_party/compiler_rt/compiler_rt.mk
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										60
									
								
								third_party/compiler_rt/compiler_rt.mk
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,60 @@ | |||
| #-*-mode:makefile-gmake;indent-tabs-mode:t;tab-width:8;coding:utf-8-*-┐
 | ||||
| #───vi: set et ft=make ts=8 tw=8 fenc=utf-8 :vi───────────────────────┘
 | ||||
| 
 | ||||
| PKGS += THIRD_PARTY_COMPILER_RT | ||||
| 
 | ||||
| THIRD_PARTY_COMPILER_RT_ARTIFACTS += THIRD_PARTY_COMPILER_RT_A | ||||
| THIRD_PARTY_COMPILER_RT = $(THIRD_PARTY_COMPILER_RT_A_DEPS) $(THIRD_PARTY_COMPILER_RT_A) | ||||
| THIRD_PARTY_COMPILER_RT_A = o/$(MODE)/third_party/compiler_rt/compiler_rt.a | ||||
| THIRD_PARTY_COMPILER_RT_A_FILES :=				\
 | ||||
| 	$(wildcard third_party/compiler_rt/*)			\
 | ||||
| 	$(wildcard third_party/compiler_rt/nexgen32e/*) | ||||
| THIRD_PARTY_COMPILER_RT_A_HDRS = $(filter %.h,$(THIRD_PARTY_COMPILER_RT_A_FILES)) | ||||
| THIRD_PARTY_COMPILER_RT_A_SRCS_S = $(filter %.S,$(THIRD_PARTY_COMPILER_RT_A_FILES)) | ||||
| THIRD_PARTY_COMPILER_RT_A_SRCS_C = $(filter %.c,$(THIRD_PARTY_COMPILER_RT_A_FILES)) | ||||
| 
 | ||||
| THIRD_PARTY_COMPILER_RT_A_SRCS =				\
 | ||||
| 	$(THIRD_PARTY_COMPILER_RT_A_SRCS_S)			\
 | ||||
| 	$(THIRD_PARTY_COMPILER_RT_A_SRCS_C) | ||||
| 
 | ||||
| THIRD_PARTY_COMPILER_RT_A_OBJS =				\
 | ||||
| 	$(THIRD_PARTY_COMPILER_RT_A_SRCS:%=o/$(MODE)/%.zip.o)	\
 | ||||
| 	$(THIRD_PARTY_COMPILER_RT_A_SRCS_S:%.S=o/$(MODE)/%.o)	\
 | ||||
| 	$(THIRD_PARTY_COMPILER_RT_A_SRCS_C:%.c=o/$(MODE)/%.o) | ||||
| 
 | ||||
| THIRD_PARTY_COMPILER_RT_A_CHECKS =				\
 | ||||
| 	$(THIRD_PARTY_COMPILER_RT_A).pkg			\
 | ||||
| 	$(THIRD_PARTY_COMPILER_RT_A_HDRS:%=o/$(MODE)/%.ok) | ||||
| 
 | ||||
| THIRD_PARTY_COMPILER_RT_A_DIRECTDEPS =				\
 | ||||
| 	LIBC_MATH						\
 | ||||
| 	LIBC_STUBS | ||||
| 
 | ||||
| THIRD_PARTY_COMPILER_RT_A_DEPS :=				\
 | ||||
| 	$(call uniq,$(foreach x,$(THIRD_PARTY_COMPILER_RT_A_DIRECTDEPS),$($(x)))) | ||||
| 
 | ||||
| $(THIRD_PARTY_COMPILER_RT_A):					\ | ||||
| 		third_party/compiler_rt/			\
 | ||||
| 		$(THIRD_PARTY_COMPILER_RT_A).pkg		\
 | ||||
| 		$(THIRD_PARTY_COMPILER_RT_A_OBJS) | ||||
| 
 | ||||
| $(THIRD_PARTY_COMPILER_RT_A).pkg:				\ | ||||
| 		$(THIRD_PARTY_COMPILER_RT_A_OBJS)		\
 | ||||
| 		$(foreach x,$(THIRD_PARTY_COMPILER_RT_A_DIRECTDEPS),$($(x)_A).pkg) | ||||
| 
 | ||||
| $(THIRD_PARTY_COMPILER_RT_A_OBJS):				\ | ||||
| 	DEFAULT_COPTS +=					\
 | ||||
| 		-DCRT_HAS_128BIT | ||||
| 
 | ||||
| o/$(MODE)/third_party/compiler_rt/multc3.o			\ | ||||
| o/$(MODE)/third_party/compiler_rt/divtc3.o:			\ | ||||
| 	DEFAULT_COPTS +=					\
 | ||||
| 		-w | ||||
| 
 | ||||
| THIRD_PARTY_COMPILER_RT_LIBS = $(foreach x,$(THIRD_PARTY_COMPILER_RT_ARTIFACTS),$($(x))) | ||||
| THIRD_PARTY_COMPILER_RT_SRCS = $(foreach x,$(THIRD_PARTY_COMPILER_RT_ARTIFACTS),$($(x)_SRCS)) | ||||
| THIRD_PARTY_COMPILER_RT_CHECKS = $(foreach x,$(THIRD_PARTY_COMPILER_RT_ARTIFACTS),$($(x)_CHECKS)) | ||||
| THIRD_PARTY_COMPILER_RT_OBJS = $(foreach x,$(THIRD_PARTY_COMPILER_RT_ARTIFACTS),$($(x)_OBJS)) | ||||
| 
 | ||||
| .PHONY: o/$(MODE)/third_party/compiler_rt | ||||
| o/$(MODE)/third_party/compiler_rt: $(THIRD_PARTY_COMPILER_RT_CHECKS) | ||||
							
								
								
									
										50
									
								
								third_party/compiler_rt/comprt.S
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										50
									
								
								third_party/compiler_rt/comprt.S
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,50 @@ | |||
| #include "libc/macros.h" | ||||
| 
 | ||||
| /	Nop ref this to force pull the license into linkage. | ||||
| 	.section .yoink | ||||
| huge_compiler_rt_license: | ||||
| 	int3 | ||||
| 	.endobj	huge_compiler_rt_license,globl,hidden | ||||
| 	.previous | ||||
| 
 | ||||
| .ident "\n | ||||
| compiler_rt (Licensed MIT) | ||||
| Copyright (c) 2009-2015 by the contributors listed in: | ||||
| github.com/llvm-mirror/compiler-rt/blob/master/CREDITS.TXT" | ||||
| 
 | ||||
| .ident "\n | ||||
| compiler_rt (Licensed \"University of Illinois/NCSA Open Source License\") | ||||
| Copyright (c) 2009-2018 by the contributors listed in: | ||||
| github.com/llvm-mirror/compiler-rt/blob/master/CREDITS.TXT | ||||
| All rights reserved. | ||||
| 
 | ||||
| Developed by: | ||||
| 
 | ||||
|     LLVM Team | ||||
|     University of Illinois at Urbana-Champaign | ||||
|     http://llvm.org | ||||
| 
 | ||||
| Permission is hereby granted, free of charge, to any person obtaining a copy of | ||||
| this software and associated documentation files (the \"Software\"), to deal with | ||||
| the Software without restriction, including without limitation the rights to | ||||
| use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies | ||||
| of the Software, and to permit persons to whom the Software is furnished to do | ||||
| so, subject to the following conditions: | ||||
| 
 | ||||
|     * Redistributions of source code must retain the above copyright notice, | ||||
|       this list of conditions and the following disclaimers. | ||||
|     * Redistributions in binary form must reproduce the above copyright notice, | ||||
|       this list of conditions and the following disclaimers in the | ||||
|       documentation and/or other materials provided with the distribution. | ||||
|     * Neither the names of the LLVM Team, University of Illinois at | ||||
|       Urbana-Champaign, nor the names of its contributors may be used to | ||||
|       endorse or promote products derived from this Software without specific | ||||
|       prior written permission. | ||||
| 
 | ||||
| THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||||
| IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS | ||||
| FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE | ||||
| CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||||
| LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||||
| OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE | ||||
| SOFTWARE." | ||||
							
								
								
									
										43
									
								
								third_party/compiler_rt/ctzdi2.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										43
									
								
								third_party/compiler_rt/ctzdi2.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,43 @@ | |||
| /* clang-format off */ | ||||
| /* ===-- ctzdi2.c - Implement __ctzdi2 -------------------------------------===
 | ||||
|  * | ||||
|  *                     The LLVM Compiler Infrastructure | ||||
|  * | ||||
|  * This file is dual licensed under the MIT and the University of Illinois Open | ||||
|  * Source Licenses. See LICENSE.TXT for details. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  * | ||||
|  * This file implements __ctzdi2 for the compiler_rt library. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  */ | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #include "third_party/compiler_rt/int_lib.h" | ||||
| 
 | ||||
| /* Returns: the number of trailing 0-bits  */ | ||||
| 
 | ||||
| #if !defined(__clang__) &&                                                     \ | ||||
|     ((defined(__sparc__) && defined(__arch64__)) ||                            \ | ||||
|      defined(__mips64) ||                                                      \ | ||||
|      (defined(__riscv) && __SIZEOF_POINTER__ >= 8)) | ||||
| /* On 64-bit architectures with neither a native clz instruction nor a native
 | ||||
|  * ctz instruction, gcc resolves __builtin_ctz to __ctzdi2 rather than | ||||
|  * __ctzsi2, leading to infinite recursion. */ | ||||
| #define __builtin_ctz(a) __ctzsi2(a) | ||||
| extern si_int __ctzsi2(si_int); | ||||
| #endif | ||||
| 
 | ||||
| /* Precondition: a != 0 */ | ||||
| 
 | ||||
| COMPILER_RT_ABI si_int | ||||
| __ctzdi2(di_int a) | ||||
| { | ||||
|     dwords x; | ||||
|     x.all = a; | ||||
|     const si_int f = -(x.s.low == 0); | ||||
|     return __builtin_ctz((x.s.high & f) | (x.s.low & ~f)) + | ||||
|               (f & ((si_int)(sizeof(si_int) * CHAR_BIT))); | ||||
| } | ||||
							
								
								
									
										60
									
								
								third_party/compiler_rt/ctzsi2.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										60
									
								
								third_party/compiler_rt/ctzsi2.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,60 @@ | |||
| /* clang-format off */ | ||||
| /* ===-- ctzsi2.c - Implement __ctzsi2 -------------------------------------===
 | ||||
|  * | ||||
|  *                     The LLVM Compiler Infrastructure | ||||
|  * | ||||
|  * This file is dual licensed under the MIT and the University of Illinois Open | ||||
|  * Source Licenses. See LICENSE.TXT for details. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  * | ||||
|  * This file implements __ctzsi2 for the compiler_rt library. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  */ | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #include "third_party/compiler_rt/int_lib.h" | ||||
| 
 | ||||
| /* Returns: the number of trailing 0-bits */ | ||||
| 
 | ||||
| /* Precondition: a != 0 */ | ||||
| 
 | ||||
| COMPILER_RT_ABI si_int | ||||
| __ctzsi2(si_int a) | ||||
| { | ||||
|     su_int x = (su_int)a; | ||||
|     si_int t = ((x & 0x0000FFFF) == 0) << 4;  /* if (x has no small bits) t = 16 else 0 */ | ||||
|     x >>= t;           /* x = [0 - 0xFFFF] + higher garbage bits */ | ||||
|     su_int r = t;       /* r = [0, 16]  */ | ||||
|     /* return r + ctz(x) */ | ||||
|     t = ((x & 0x00FF) == 0) << 3; | ||||
|     x >>= t;           /* x = [0 - 0xFF] + higher garbage bits */ | ||||
|     r += t;            /* r = [0, 8, 16, 24] */ | ||||
|     /* return r + ctz(x) */ | ||||
|     t = ((x & 0x0F) == 0) << 2; | ||||
|     x >>= t;           /* x = [0 - 0xF] + higher garbage bits */ | ||||
|     r += t;            /* r = [0, 4, 8, 12, 16, 20, 24, 28] */ | ||||
|     /* return r + ctz(x) */ | ||||
|     t = ((x & 0x3) == 0) << 1; | ||||
|     x >>= t; | ||||
|     x &= 3;            /* x = [0 - 3] */ | ||||
|     r += t;            /* r = [0 - 30] and is even */ | ||||
|     /* return r + ctz(x) */ | ||||
| 
 | ||||
| /*  The branch-less return statement below is equivalent
 | ||||
|  *  to the following switch statement: | ||||
|  *     switch (x) | ||||
|  *    { | ||||
|  *     case 0: | ||||
|  *         return r + 2; | ||||
|  *     case 2: | ||||
|  *         return r + 1; | ||||
|  *     case 1: | ||||
|  *     case 3: | ||||
|  *         return r; | ||||
|  *     } | ||||
|  */ | ||||
|     return r + ((2 - (x >> 1)) & -((x & 1) == 0)); | ||||
| } | ||||
							
								
								
									
										36
									
								
								third_party/compiler_rt/ctzti2.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										36
									
								
								third_party/compiler_rt/ctzti2.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,36 @@ | |||
| /* clang-format off */ | ||||
| /* ===-- ctzti2.c - Implement __ctzti2 -------------------------------------===
 | ||||
|  * | ||||
|  *                     The LLVM Compiler Infrastructure | ||||
|  * | ||||
|  * This file is dual licensed under the MIT and the University of Illinois Open | ||||
|  * Source Licenses. See LICENSE.TXT for details. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  * | ||||
|  * This file implements __ctzti2 for the compiler_rt library. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  */ | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #include "third_party/compiler_rt/int_lib.h" | ||||
| 
 | ||||
| #ifdef CRT_HAS_128BIT | ||||
| 
 | ||||
| /* Returns: the number of trailing 0-bits */ | ||||
| 
 | ||||
| /* Precondition: a != 0 */ | ||||
| 
 | ||||
| COMPILER_RT_ABI si_int | ||||
| __ctzti2(ti_int a) | ||||
| { | ||||
|     twords x; | ||||
|     x.all = a; | ||||
|     const di_int f = -(x.s.low == 0); | ||||
|     return __builtin_ctzll((x.s.high & f) | (x.s.low & ~f)) + | ||||
|               ((si_int)f & ((si_int)(sizeof(di_int) * CHAR_BIT))); | ||||
| } | ||||
| 
 | ||||
| #endif /* CRT_HAS_128BIT */ | ||||
							
								
								
									
										58
									
								
								third_party/compiler_rt/divdc3.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										58
									
								
								third_party/compiler_rt/divdc3.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,58 @@ | |||
| /* clang-format off */ | ||||
| /* ===-- divdc3.c - Implement __divdc3 -------------------------------------===
 | ||||
|  * | ||||
|  *                     The LLVM Compiler Infrastructure | ||||
|  * | ||||
|  * This file is dual licensed under the MIT and the University of Illinois Open | ||||
|  * Source Licenses. See LICENSE.TXT for details. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  * | ||||
|  * This file implements __divdc3 for the compiler_rt library. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  */ | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #define DOUBLE_PRECISION | ||||
| #include "third_party/compiler_rt/fp_lib.inc" | ||||
| #include "third_party/compiler_rt/int_lib.h" | ||||
| #include "third_party/compiler_rt/int_math.h" | ||||
| 
 | ||||
| /* Returns: the quotient of (a + ib) / (c + id) */ | ||||
| 
 | ||||
| COMPILER_RT_ABI Dcomplex __divdc3(double __a, double __b, double __c, | ||||
|                                   double __d) { | ||||
|   int __ilogbw = 0; | ||||
|   double __logbw = __compiler_rt_logb(crt_fmax(crt_fabs(__c), crt_fabs(__d))); | ||||
|   if (crt_isfinite(__logbw)) { | ||||
|     __ilogbw = (int)__logbw; | ||||
|     __c = crt_scalbn(__c, -__ilogbw); | ||||
|     __d = crt_scalbn(__d, -__ilogbw); | ||||
|   } | ||||
|   double __denom = __c * __c + __d * __d; | ||||
|   Dcomplex z; | ||||
|   COMPLEX_REAL(z) = crt_scalbn((__a * __c + __b * __d) / __denom, -__ilogbw); | ||||
|   COMPLEX_IMAGINARY(z) = | ||||
|       crt_scalbn((__b * __c - __a * __d) / __denom, -__ilogbw); | ||||
|   if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) { | ||||
|     if ((__denom == 0.0) && (!crt_isnan(__a) || !crt_isnan(__b))) { | ||||
|       COMPLEX_REAL(z) = crt_copysign(CRT_INFINITY, __c) * __a; | ||||
|       COMPLEX_IMAGINARY(z) = crt_copysign(CRT_INFINITY, __c) * __b; | ||||
|     } else if ((crt_isinf(__a) || crt_isinf(__b)) && crt_isfinite(__c) && | ||||
|                crt_isfinite(__d)) { | ||||
|       __a = crt_copysign(crt_isinf(__a) ? 1.0 : 0.0, __a); | ||||
|       __b = crt_copysign(crt_isinf(__b) ? 1.0 : 0.0, __b); | ||||
|       COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d); | ||||
|       COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d); | ||||
|     } else if (crt_isinf(__logbw) && __logbw > 0.0 && crt_isfinite(__a) && | ||||
|                crt_isfinite(__b)) { | ||||
|       __c = crt_copysign(crt_isinf(__c) ? 1.0 : 0.0, __c); | ||||
|       __d = crt_copysign(crt_isinf(__d) ? 1.0 : 0.0, __d); | ||||
|       COMPLEX_REAL(z) = 0.0 * (__a * __c + __b * __d); | ||||
|       COMPLEX_IMAGINARY(z) = 0.0 * (__b * __c - __a * __d); | ||||
|     } | ||||
|   } | ||||
|   return z; | ||||
| } | ||||
							
								
								
									
										197
									
								
								third_party/compiler_rt/divdf3.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										197
									
								
								third_party/compiler_rt/divdf3.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,197 @@ | |||
| /* clang-format off */ | ||||
| //===-- lib/divdf3.c - Double-precision division ------------------*- C -*-===//
 | ||||
| //
 | ||||
| //                     The LLVM Compiler Infrastructure
 | ||||
| //
 | ||||
| // This file is dual licensed under the MIT and the University of Illinois Open
 | ||||
| // Source Licenses. See LICENSE.TXT for details.
 | ||||
| //
 | ||||
| //===----------------------------------------------------------------------===//
 | ||||
| //
 | ||||
| // This file implements double-precision soft-float division
 | ||||
| // with the IEEE-754 default rounding (to nearest, ties to even).
 | ||||
| //
 | ||||
| // For simplicity, this implementation currently flushes denormals to zero.
 | ||||
| // It should be a fairly straightforward exercise to implement gradual
 | ||||
| // underflow with correct rounding.
 | ||||
| //
 | ||||
| //===----------------------------------------------------------------------===//
 | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #define DOUBLE_PRECISION | ||||
| #include "libc/literal.h" | ||||
| #include "third_party/compiler_rt/fp_lib.inc" | ||||
| 
 | ||||
| COMPILER_RT_ABI fp_t | ||||
| __divdf3(fp_t a, fp_t b) { | ||||
|      | ||||
|     const unsigned int aExponent = toRep(a) >> significandBits & maxExponent; | ||||
|     const unsigned int bExponent = toRep(b) >> significandBits & maxExponent; | ||||
|     const rep_t quotientSign = (toRep(a) ^ toRep(b)) & signBit; | ||||
|      | ||||
|     rep_t aSignificand = toRep(a) & significandMask; | ||||
|     rep_t bSignificand = toRep(b) & significandMask; | ||||
|     int scale = 0; | ||||
|      | ||||
|     // Detect if a or b is zero, denormal, infinity, or NaN.
 | ||||
|     if (aExponent-1U >= maxExponent-1U || bExponent-1U >= maxExponent-1U) { | ||||
|          | ||||
|         const rep_t aAbs = toRep(a) & absMask; | ||||
|         const rep_t bAbs = toRep(b) & absMask; | ||||
|          | ||||
|         // NaN / anything = qNaN
 | ||||
|         if (aAbs > infRep) return fromRep(toRep(a) | quietBit); | ||||
|         // anything / NaN = qNaN
 | ||||
|         if (bAbs > infRep) return fromRep(toRep(b) | quietBit); | ||||
|          | ||||
|         if (aAbs == infRep) { | ||||
|             // infinity / infinity = NaN
 | ||||
|             if (bAbs == infRep) return fromRep(qnanRep); | ||||
|             // infinity / anything else = +/- infinity
 | ||||
|             else return fromRep(aAbs | quotientSign); | ||||
|         } | ||||
|          | ||||
|         // anything else / infinity = +/- 0
 | ||||
|         if (bAbs == infRep) return fromRep(quotientSign); | ||||
|          | ||||
|         if (!aAbs) { | ||||
|             // zero / zero = NaN
 | ||||
|             if (!bAbs) return fromRep(qnanRep); | ||||
|             // zero / anything else = +/- zero
 | ||||
|             else return fromRep(quotientSign); | ||||
|         } | ||||
|         // anything else / zero = +/- infinity
 | ||||
|         if (!bAbs) return fromRep(infRep | quotientSign); | ||||
|          | ||||
|         // one or both of a or b is denormal, the other (if applicable) is a
 | ||||
|         // normal number.  Renormalize one or both of a and b, and set scale to
 | ||||
|         // include the necessary exponent adjustment.
 | ||||
|         if (aAbs < implicitBit) scale += normalize(&aSignificand); | ||||
|         if (bAbs < implicitBit) scale -= normalize(&bSignificand); | ||||
|     } | ||||
|      | ||||
|     // Or in the implicit significand bit.  (If we fell through from the
 | ||||
|     // denormal path it was already set by normalize( ), but setting it twice
 | ||||
|     // won't hurt anything.)
 | ||||
|     aSignificand |= implicitBit; | ||||
|     bSignificand |= implicitBit; | ||||
|     int quotientExponent = aExponent - bExponent + scale; | ||||
|      | ||||
|     // Align the significand of b as a Q31 fixed-point number in the range
 | ||||
|     // [1, 2.0) and get a Q32 approximate reciprocal using a small minimax
 | ||||
|     // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2.  This
 | ||||
|     // is accurate to about 3.5 binary digits.
 | ||||
|     const uint32_t q31b = bSignificand >> 21; | ||||
|     uint32_t recip32 = UINT32_C(0x7504f333) - q31b; | ||||
|      | ||||
|     // Now refine the reciprocal estimate using a Newton-Raphson iteration:
 | ||||
|     //
 | ||||
|     //     x1 = x0 * (2 - x0 * b)
 | ||||
|     //
 | ||||
|     // This doubles the number of correct binary digits in the approximation
 | ||||
|     // with each iteration, so after three iterations, we have about 28 binary
 | ||||
|     // digits of accuracy.
 | ||||
|     uint32_t correction32; | ||||
|     correction32 = -((uint64_t)recip32 * q31b >> 32); | ||||
|     recip32 = (uint64_t)recip32 * correction32 >> 31; | ||||
|     correction32 = -((uint64_t)recip32 * q31b >> 32); | ||||
|     recip32 = (uint64_t)recip32 * correction32 >> 31; | ||||
|     correction32 = -((uint64_t)recip32 * q31b >> 32); | ||||
|     recip32 = (uint64_t)recip32 * correction32 >> 31; | ||||
|      | ||||
|     // recip32 might have overflowed to exactly zero in the preceding
 | ||||
|     // computation if the high word of b is exactly 1.0.  This would sabotage
 | ||||
|     // the full-width final stage of the computation that follows, so we adjust
 | ||||
|     // recip32 downward by one bit.
 | ||||
|     recip32--; | ||||
|      | ||||
|     // We need to perform one more iteration to get us to 56 binary digits;
 | ||||
|     // The last iteration needs to happen with extra precision.
 | ||||
|     const uint32_t q63blo = bSignificand << 11; | ||||
|     uint64_t correction, reciprocal; | ||||
|     correction = -((uint64_t)recip32*q31b + ((uint64_t)recip32*q63blo >> 32)); | ||||
|     uint32_t cHi = correction >> 32; | ||||
|     uint32_t cLo = correction; | ||||
|     reciprocal = (uint64_t)recip32*cHi + ((uint64_t)recip32*cLo >> 32); | ||||
|      | ||||
|     // We already adjusted the 32-bit estimate, now we need to adjust the final
 | ||||
|     // 64-bit reciprocal estimate downward to ensure that it is strictly smaller
 | ||||
|     // than the infinitely precise exact reciprocal.  Because the computation
 | ||||
|     // of the Newton-Raphson step is truncating at every step, this adjustment
 | ||||
|     // is small; most of the work is already done.
 | ||||
|     reciprocal -= 2; | ||||
|      | ||||
|     // The numerical reciprocal is accurate to within 2^-56, lies in the
 | ||||
|     // interval [0.5, 1.0), and is strictly smaller than the true reciprocal
 | ||||
|     // of b.  Multiplying a by this reciprocal thus gives a numerical q = a/b
 | ||||
|     // in Q53 with the following properties:
 | ||||
|     //
 | ||||
|     //    1. q < a/b
 | ||||
|     //    2. q is in the interval [0.5, 2.0)
 | ||||
|     //    3. the error in q is bounded away from 2^-53 (actually, we have a
 | ||||
|     //       couple of bits to spare, but this is all we need).
 | ||||
|      | ||||
|     // We need a 64 x 64 multiply high to compute q, which isn't a basic
 | ||||
|     // operation in C, so we need to be a little bit fussy.
 | ||||
|     rep_t quotient, quotientLo; | ||||
|     wideMultiply(aSignificand << 2, reciprocal, "ient, "ientLo); | ||||
|      | ||||
|     // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0).
 | ||||
|     // In either case, we are going to compute a residual of the form
 | ||||
|     //
 | ||||
|     //     r = a - q*b
 | ||||
|     //
 | ||||
|     // We know from the construction of q that r satisfies:
 | ||||
|     //
 | ||||
|     //     0 <= r < ulp(q)*b
 | ||||
|     // 
 | ||||
|     // if r is greater than 1/2 ulp(q)*b, then q rounds up.  Otherwise, we
 | ||||
|     // already have the correct result.  The exact halfway case cannot occur.
 | ||||
|     // We also take this time to right shift quotient if it falls in the [1,2)
 | ||||
|     // range and adjust the exponent accordingly.
 | ||||
|     rep_t residual; | ||||
|     if (quotient < (implicitBit << 1)) { | ||||
|         residual = (aSignificand << 53) - quotient * bSignificand; | ||||
|         quotientExponent--; | ||||
|     } else { | ||||
|         quotient >>= 1; | ||||
|         residual = (aSignificand << 52) - quotient * bSignificand; | ||||
|     } | ||||
|      | ||||
|     const int writtenExponent = quotientExponent + exponentBias; | ||||
|      | ||||
|     if (writtenExponent >= maxExponent) { | ||||
|         // If we have overflowed the exponent, return infinity.
 | ||||
|         return fromRep(infRep | quotientSign); | ||||
|     } | ||||
|      | ||||
|     else if (writtenExponent < 1) { | ||||
|         // Flush denormals to zero.  In the future, it would be nice to add
 | ||||
|         // code to round them correctly.
 | ||||
|         return fromRep(quotientSign); | ||||
|     } | ||||
|      | ||||
|     else { | ||||
|         const bool round = (residual << 1) > bSignificand; | ||||
|         // Clear the implicit bit
 | ||||
|         rep_t absResult = quotient & significandMask; | ||||
|         // Insert the exponent
 | ||||
|         absResult |= (rep_t)writtenExponent << significandBits; | ||||
|         // Round
 | ||||
|         absResult += round; | ||||
|         // Insert the sign and return
 | ||||
|         const double result = fromRep(absResult | quotientSign); | ||||
|         return result; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| #if defined(__ARM_EABI__) | ||||
| #if defined(COMPILER_RT_ARMHF_TARGET) | ||||
| AEABI_RTABI fp_t __aeabi_ddiv(fp_t a, fp_t b) { | ||||
|   return __divdf3(a, b); | ||||
| } | ||||
| #else | ||||
| AEABI_RTABI fp_t __aeabi_ddiv(fp_t a, fp_t b) COMPILER_RT_ALIAS(__divdf3); | ||||
| #endif | ||||
| #endif | ||||
							
								
								
									
										32
									
								
								third_party/compiler_rt/divdi3.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										32
									
								
								third_party/compiler_rt/divdi3.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,32 @@ | |||
| /* clang-format off */ | ||||
| /* ===-- divdi3.c - Implement __divdi3 -------------------------------------===
 | ||||
|  * | ||||
|  *                     The LLVM Compiler Infrastructure | ||||
|  * | ||||
|  * This file is dual licensed under the MIT and the University of Illinois Open | ||||
|  * Source Licenses. See LICENSE.TXT for details. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  * | ||||
|  * This file implements __divdi3 for the compiler_rt library. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  */ | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #include "third_party/compiler_rt/int_lib.h" | ||||
| 
 | ||||
| /* Returns: a / b */ | ||||
| 
 | ||||
| COMPILER_RT_ABI di_int | ||||
| __divdi3(di_int a, di_int b) | ||||
| { | ||||
|     const int bits_in_dword_m1 = (int)(sizeof(di_int) * CHAR_BIT) - 1; | ||||
|     di_int s_a = a >> bits_in_dword_m1;           /* s_a = a < 0 ? -1 : 0 */ | ||||
|     di_int s_b = b >> bits_in_dword_m1;           /* s_b = b < 0 ? -1 : 0 */ | ||||
|     a = (a ^ s_a) - s_a;                         /* negate if s_a == -1 */ | ||||
|     b = (b ^ s_b) - s_b;                         /* negate if s_b == -1 */ | ||||
|     s_a ^= s_b;                                  /*sign of quotient */ | ||||
|     return (__udivmoddi4(a, b, (du_int*)0) ^ s_a) - s_a;  /* negate if s_a == -1 */ | ||||
| } | ||||
							
								
								
									
										28
									
								
								third_party/compiler_rt/divmoddi4.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										28
									
								
								third_party/compiler_rt/divmoddi4.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,28 @@ | |||
| /* clang-format off */ | ||||
| /*===-- divmoddi4.c - Implement __divmoddi4 --------------------------------===
 | ||||
|  * | ||||
|  *                    The LLVM Compiler Infrastructure | ||||
|  * | ||||
|  * This file is dual licensed under the MIT and the University of Illinois Open | ||||
|  * Source Licenses. See LICENSE.TXT for details. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  * | ||||
|  * This file implements __divmoddi4 for the compiler_rt library. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  */ | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #include "third_party/compiler_rt/int_lib.h" | ||||
| 
 | ||||
| /* Returns: a / b, *rem = a % b  */ | ||||
| 
 | ||||
| COMPILER_RT_ABI di_int | ||||
| __divmoddi4(di_int a, di_int b, di_int* rem) | ||||
| { | ||||
|   di_int d = __divdi3(a,b); | ||||
|   *rem = a - (d*b); | ||||
|   return d; | ||||
| } | ||||
							
								
								
									
										30
									
								
								third_party/compiler_rt/divmodsi4.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										30
									
								
								third_party/compiler_rt/divmodsi4.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,30 @@ | |||
| /* clang-format off */ | ||||
| /*===-- divmodsi4.c - Implement __divmodsi4 --------------------------------===
 | ||||
|  * | ||||
|  *                    The LLVM Compiler Infrastructure | ||||
|  * | ||||
|  * This file is dual licensed under the MIT and the University of Illinois Open | ||||
|  * Source Licenses. See LICENSE.TXT for details. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  * | ||||
|  * This file implements __divmodsi4 for the compiler_rt library. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  */ | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #include "third_party/compiler_rt/int_lib.h" | ||||
| 
 | ||||
| /* Returns: a / b, *rem = a % b  */ | ||||
| 
 | ||||
| COMPILER_RT_ABI si_int | ||||
| __divmodsi4(si_int a, si_int b, si_int* rem) | ||||
| { | ||||
|   si_int d = __divsi3(a,b); | ||||
|   *rem = a - (d*b); | ||||
|   return d;  | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
							
								
								
									
										66
									
								
								third_party/compiler_rt/divsc3.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										66
									
								
								third_party/compiler_rt/divsc3.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,66 @@ | |||
| /* clang-format off */ | ||||
| /*===-- divsc3.c - Implement __divsc3 -------------------------------------===
 | ||||
|  * | ||||
|  *                     The LLVM Compiler Infrastructure | ||||
|  * | ||||
|  * This file is dual licensed under the MIT and the University of Illinois Open | ||||
|  * Source Licenses. See LICENSE.TXT for details. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  * | ||||
|  * This file implements __divsc3 for the compiler_rt library. | ||||
|  * | ||||
|  *===----------------------------------------------------------------------=== | ||||
|  */ | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #define SINGLE_PRECISION | ||||
| #include "third_party/compiler_rt/fp_lib.inc" | ||||
| #include "third_party/compiler_rt/int_lib.h" | ||||
| #include "third_party/compiler_rt/int_math.h" | ||||
| 
 | ||||
| /* Returns: the quotient of (a + ib) / (c + id) */ | ||||
| 
 | ||||
| COMPILER_RT_ABI Fcomplex | ||||
| __divsc3(float __a, float __b, float __c, float __d) | ||||
| { | ||||
|     int __ilogbw = 0; | ||||
|     float __logbw = | ||||
|         __compiler_rt_logbf(crt_fmaxf(crt_fabsf(__c), crt_fabsf(__d))); | ||||
|     if (crt_isfinite(__logbw)) | ||||
|     { | ||||
|         __ilogbw = (int)__logbw; | ||||
|         __c = crt_scalbnf(__c, -__ilogbw); | ||||
|         __d = crt_scalbnf(__d, -__ilogbw); | ||||
|     } | ||||
|     float __denom = __c * __c + __d * __d; | ||||
|     Fcomplex z; | ||||
|     COMPLEX_REAL(z) = crt_scalbnf((__a * __c + __b * __d) / __denom, -__ilogbw); | ||||
|     COMPLEX_IMAGINARY(z) = crt_scalbnf((__b * __c - __a * __d) / __denom, -__ilogbw); | ||||
|     if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) | ||||
|     { | ||||
|         if ((__denom == 0) && (!crt_isnan(__a) || !crt_isnan(__b))) | ||||
|         { | ||||
|             COMPLEX_REAL(z) = crt_copysignf(CRT_INFINITY, __c) * __a; | ||||
|             COMPLEX_IMAGINARY(z) = crt_copysignf(CRT_INFINITY, __c) * __b; | ||||
|         } | ||||
|         else if ((crt_isinf(__a) || crt_isinf(__b)) && | ||||
|                  crt_isfinite(__c) && crt_isfinite(__d)) | ||||
|         { | ||||
|             __a = crt_copysignf(crt_isinf(__a) ? 1 : 0, __a); | ||||
|             __b = crt_copysignf(crt_isinf(__b) ? 1 : 0, __b); | ||||
|             COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d); | ||||
|             COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d); | ||||
|         } | ||||
|         else if (crt_isinf(__logbw) && __logbw > 0 && | ||||
|                  crt_isfinite(__a) && crt_isfinite(__b)) | ||||
|         { | ||||
|             __c = crt_copysignf(crt_isinf(__c) ? 1 : 0, __c); | ||||
|             __d = crt_copysignf(crt_isinf(__d) ? 1 : 0, __d); | ||||
|             COMPLEX_REAL(z) = 0 * (__a * __c + __b * __d); | ||||
|             COMPLEX_IMAGINARY(z) = 0 * (__b * __c - __a * __d); | ||||
|         } | ||||
|     } | ||||
|     return z; | ||||
| } | ||||
							
								
								
									
										181
									
								
								third_party/compiler_rt/divsf3.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										181
									
								
								third_party/compiler_rt/divsf3.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,181 @@ | |||
| /* clang-format off */ | ||||
| //===-- lib/divsf3.c - Single-precision division ------------------*- C -*-===//
 | ||||
| //
 | ||||
| //                     The LLVM Compiler Infrastructure
 | ||||
| //
 | ||||
| // This file is dual licensed under the MIT and the University of Illinois Open
 | ||||
| // Source Licenses. See LICENSE.TXT for details.
 | ||||
| //
 | ||||
| //===----------------------------------------------------------------------===//
 | ||||
| //
 | ||||
| // This file implements single-precision soft-float division
 | ||||
| // with the IEEE-754 default rounding (to nearest, ties to even).
 | ||||
| //
 | ||||
| // For simplicity, this implementation currently flushes denormals to zero.
 | ||||
| // It should be a fairly straightforward exercise to implement gradual
 | ||||
| // underflow with correct rounding.
 | ||||
| //
 | ||||
| //===----------------------------------------------------------------------===//
 | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #define SINGLE_PRECISION | ||||
| #include "libc/literal.h" | ||||
| #include "third_party/compiler_rt/fp_lib.inc" | ||||
| 
 | ||||
| COMPILER_RT_ABI fp_t | ||||
| __divsf3(fp_t a, fp_t b) { | ||||
|      | ||||
|     const unsigned int aExponent = toRep(a) >> significandBits & maxExponent; | ||||
|     const unsigned int bExponent = toRep(b) >> significandBits & maxExponent; | ||||
|     const rep_t quotientSign = (toRep(a) ^ toRep(b)) & signBit; | ||||
|      | ||||
|     rep_t aSignificand = toRep(a) & significandMask; | ||||
|     rep_t bSignificand = toRep(b) & significandMask; | ||||
|     int scale = 0; | ||||
|      | ||||
|     // Detect if a or b is zero, denormal, infinity, or NaN.
 | ||||
|     if (aExponent-1U >= maxExponent-1U || bExponent-1U >= maxExponent-1U) { | ||||
|          | ||||
|         const rep_t aAbs = toRep(a) & absMask; | ||||
|         const rep_t bAbs = toRep(b) & absMask; | ||||
|          | ||||
|         // NaN / anything = qNaN
 | ||||
|         if (aAbs > infRep) return fromRep(toRep(a) | quietBit); | ||||
|         // anything / NaN = qNaN
 | ||||
|         if (bAbs > infRep) return fromRep(toRep(b) | quietBit); | ||||
|          | ||||
|         if (aAbs == infRep) { | ||||
|             // infinity / infinity = NaN
 | ||||
|             if (bAbs == infRep) return fromRep(qnanRep); | ||||
|             // infinity / anything else = +/- infinity
 | ||||
|             else return fromRep(aAbs | quotientSign); | ||||
|         } | ||||
|          | ||||
|         // anything else / infinity = +/- 0
 | ||||
|         if (bAbs == infRep) return fromRep(quotientSign); | ||||
|          | ||||
|         if (!aAbs) { | ||||
|             // zero / zero = NaN
 | ||||
|             if (!bAbs) return fromRep(qnanRep); | ||||
|             // zero / anything else = +/- zero
 | ||||
|             else return fromRep(quotientSign); | ||||
|         } | ||||
|         // anything else / zero = +/- infinity
 | ||||
|         if (!bAbs) return fromRep(infRep | quotientSign); | ||||
|          | ||||
|         // one or both of a or b is denormal, the other (if applicable) is a
 | ||||
|         // normal number.  Renormalize one or both of a and b, and set scale to
 | ||||
|         // include the necessary exponent adjustment.
 | ||||
|         if (aAbs < implicitBit) scale += normalize(&aSignificand); | ||||
|         if (bAbs < implicitBit) scale -= normalize(&bSignificand); | ||||
|     } | ||||
|      | ||||
|     // Or in the implicit significand bit.  (If we fell through from the
 | ||||
|     // denormal path it was already set by normalize( ), but setting it twice
 | ||||
|     // won't hurt anything.)
 | ||||
|     aSignificand |= implicitBit; | ||||
|     bSignificand |= implicitBit; | ||||
|     int quotientExponent = aExponent - bExponent + scale; | ||||
|      | ||||
|     // Align the significand of b as a Q31 fixed-point number in the range
 | ||||
|     // [1, 2.0) and get a Q32 approximate reciprocal using a small minimax
 | ||||
|     // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2.  This
 | ||||
|     // is accurate to about 3.5 binary digits.
 | ||||
|     uint32_t q31b = bSignificand << 8; | ||||
|     uint32_t reciprocal = UINT32_C(0x7504f333) - q31b; | ||||
|      | ||||
|     // Now refine the reciprocal estimate using a Newton-Raphson iteration:
 | ||||
|     //
 | ||||
|     //     x1 = x0 * (2 - x0 * b)
 | ||||
|     //
 | ||||
|     // This doubles the number of correct binary digits in the approximation
 | ||||
|     // with each iteration, so after three iterations, we have about 28 binary
 | ||||
|     // digits of accuracy.
 | ||||
|     uint32_t correction; | ||||
|     correction = -((uint64_t)reciprocal * q31b >> 32); | ||||
|     reciprocal = (uint64_t)reciprocal * correction >> 31; | ||||
|     correction = -((uint64_t)reciprocal * q31b >> 32); | ||||
|     reciprocal = (uint64_t)reciprocal * correction >> 31; | ||||
|     correction = -((uint64_t)reciprocal * q31b >> 32); | ||||
|     reciprocal = (uint64_t)reciprocal * correction >> 31; | ||||
|      | ||||
|     // Exhaustive testing shows that the error in reciprocal after three steps
 | ||||
|     // is in the interval [-0x1.f58108p-31, 0x1.d0e48cp-29], in line with our
 | ||||
|     // expectations.  We bump the reciprocal by a tiny value to force the error
 | ||||
|     // to be strictly positive (in the range [0x1.4fdfp-37,0x1.287246p-29], to
 | ||||
|     // be specific).  This also causes 1/1 to give a sensible approximation
 | ||||
|     // instead of zero (due to overflow).
 | ||||
|     reciprocal -= 2; | ||||
|      | ||||
|     // The numerical reciprocal is accurate to within 2^-28, lies in the
 | ||||
|     // interval [0x1.000000eep-1, 0x1.fffffffcp-1], and is strictly smaller
 | ||||
|     // than the true reciprocal of b.  Multiplying a by this reciprocal thus
 | ||||
|     // gives a numerical q = a/b in Q24 with the following properties:
 | ||||
|     //
 | ||||
|     //    1. q < a/b
 | ||||
|     //    2. q is in the interval [0x1.000000eep-1, 0x1.fffffffcp0)
 | ||||
|     //    3. the error in q is at most 2^-24 + 2^-27 -- the 2^24 term comes
 | ||||
|     //       from the fact that we truncate the product, and the 2^27 term
 | ||||
|     //       is the error in the reciprocal of b scaled by the maximum
 | ||||
|     //       possible value of a.  As a consequence of this error bound,
 | ||||
|     //       either q or nextafter(q) is the correctly rounded 
 | ||||
|     rep_t quotient = (uint64_t)reciprocal*(aSignificand << 1) >> 32; | ||||
|      | ||||
|     // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0).
 | ||||
|     // In either case, we are going to compute a residual of the form
 | ||||
|     //
 | ||||
|     //     r = a - q*b
 | ||||
|     //
 | ||||
|     // We know from the construction of q that r satisfies:
 | ||||
|     //
 | ||||
|     //     0 <= r < ulp(q)*b
 | ||||
|     // 
 | ||||
|     // if r is greater than 1/2 ulp(q)*b, then q rounds up.  Otherwise, we
 | ||||
|     // already have the correct result.  The exact halfway case cannot occur.
 | ||||
|     // We also take this time to right shift quotient if it falls in the [1,2)
 | ||||
|     // range and adjust the exponent accordingly.
 | ||||
|     rep_t residual; | ||||
|     if (quotient < (implicitBit << 1)) { | ||||
|         residual = (aSignificand << 24) - quotient * bSignificand; | ||||
|         quotientExponent--; | ||||
|     } else { | ||||
|         quotient >>= 1; | ||||
|         residual = (aSignificand << 23) - quotient * bSignificand; | ||||
|     } | ||||
| 
 | ||||
|     const int writtenExponent = quotientExponent + exponentBias; | ||||
|      | ||||
|     if (writtenExponent >= maxExponent) { | ||||
|         // If we have overflowed the exponent, return infinity.
 | ||||
|         return fromRep(infRep | quotientSign); | ||||
|     } | ||||
|      | ||||
|     else if (writtenExponent < 1) { | ||||
|         // Flush denormals to zero.  In the future, it would be nice to add
 | ||||
|         // code to round them correctly.
 | ||||
|         return fromRep(quotientSign); | ||||
|     } | ||||
|      | ||||
|     else { | ||||
|         const bool round = (residual << 1) > bSignificand; | ||||
|         // Clear the implicit bit
 | ||||
|         rep_t absResult = quotient & significandMask; | ||||
|         // Insert the exponent
 | ||||
|         absResult |= (rep_t)writtenExponent << significandBits; | ||||
|         // Round
 | ||||
|         absResult += round; | ||||
|         // Insert the sign and return
 | ||||
|         return fromRep(absResult | quotientSign); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| #if defined(__ARM_EABI__) | ||||
| #if defined(COMPILER_RT_ARMHF_TARGET) | ||||
| AEABI_RTABI fp_t __aeabi_fdiv(fp_t a, fp_t b) { | ||||
|   return __divsf3(a, b); | ||||
| } | ||||
| #else | ||||
| AEABI_RTABI fp_t __aeabi_fdiv(fp_t a, fp_t b) COMPILER_RT_ALIAS(__divsf3); | ||||
| #endif | ||||
| #endif | ||||
							
								
								
									
										42
									
								
								third_party/compiler_rt/divsi3.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								third_party/compiler_rt/divsi3.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,42 @@ | |||
| /* clang-format off */ | ||||
| /* ===-- divsi3.c - Implement __divsi3 -------------------------------------===
 | ||||
|  * | ||||
|  *                     The LLVM Compiler Infrastructure | ||||
|  * | ||||
|  * This file is dual licensed under the MIT and the University of Illinois Open | ||||
|  * Source Licenses. See LICENSE.TXT for details. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  * | ||||
|  * This file implements __divsi3 for the compiler_rt library. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  */ | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #include "third_party/compiler_rt/int_lib.h" | ||||
| 
 | ||||
| /* Returns: a / b */ | ||||
| 
 | ||||
| COMPILER_RT_ABI si_int | ||||
| __divsi3(si_int a, si_int b) | ||||
| { | ||||
|     const int bits_in_word_m1 = (int)(sizeof(si_int) * CHAR_BIT) - 1; | ||||
|     si_int s_a = a >> bits_in_word_m1;           /* s_a = a < 0 ? -1 : 0 */ | ||||
|     si_int s_b = b >> bits_in_word_m1;           /* s_b = b < 0 ? -1 : 0 */ | ||||
|     a = (a ^ s_a) - s_a;                         /* negate if s_a == -1 */ | ||||
|     b = (b ^ s_b) - s_b;                         /* negate if s_b == -1 */ | ||||
|     s_a ^= s_b;                                  /* sign of quotient */ | ||||
|     /*
 | ||||
|      * On CPUs without unsigned hardware division support, | ||||
|      *  this calls __udivsi3 (notice the cast to su_int). | ||||
|      * On CPUs with unsigned hardware division support, | ||||
|      *  this uses the unsigned division instruction. | ||||
|      */ | ||||
|     return ((su_int)a/(su_int)b ^ s_a) - s_a;    /* negate if s_a == -1 */ | ||||
| } | ||||
| 
 | ||||
| #if defined(__ARM_EABI__) | ||||
| AEABI_RTABI si_int __aeabi_idiv(si_int a, si_int b) COMPILER_RT_ALIAS(__divsi3); | ||||
| #endif | ||||
							
								
								
									
										66
									
								
								third_party/compiler_rt/divtc3.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										66
									
								
								third_party/compiler_rt/divtc3.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,66 @@ | |||
| /* clang-format off */ | ||||
| /*===-- divtc3.c - Implement __divtc3 -------------------------------------===
 | ||||
|  * | ||||
|  *                     The LLVM Compiler Infrastructure | ||||
|  * | ||||
|  * This file is dual licensed under the MIT and the University of Illinois Open | ||||
|  * Source Licenses. See LICENSE.TXT for details. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  * | ||||
|  * This file implements __divtc3 for the compiler_rt library. | ||||
|  * | ||||
|  *===----------------------------------------------------------------------=== | ||||
|  */ | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #define QUAD_PRECISION | ||||
| #include "third_party/compiler_rt/fp_lib.inc" | ||||
| #include "third_party/compiler_rt/int_lib.h" | ||||
| #include "third_party/compiler_rt/int_math.h" | ||||
| 
 | ||||
| /* Returns: the quotient of (a + ib) / (c + id) */ | ||||
| 
 | ||||
| COMPILER_RT_ABI Lcomplex | ||||
| __divtc3(long double __a, long double __b, long double __c, long double __d) | ||||
| { | ||||
|     int __ilogbw = 0; | ||||
|     long double __logbw = | ||||
|         __compiler_rt_logbl(crt_fmaxl(crt_fabsl(__c), crt_fabsl(__d))); | ||||
|     if (crt_isfinite(__logbw)) | ||||
|     { | ||||
|         __ilogbw = (int)__logbw; | ||||
|         __c = crt_scalbnl(__c, -__ilogbw); | ||||
|         __d = crt_scalbnl(__d, -__ilogbw); | ||||
|     } | ||||
|     long double __denom = __c * __c + __d * __d; | ||||
|     Lcomplex z; | ||||
|     COMPLEX_REAL(z) = crt_scalbnl((__a * __c + __b * __d) / __denom, -__ilogbw); | ||||
|     COMPLEX_IMAGINARY(z) = crt_scalbnl((__b * __c - __a * __d) / __denom, -__ilogbw); | ||||
|     if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) | ||||
|     { | ||||
|         if ((__denom == 0.0) && (!crt_isnan(__a) || !crt_isnan(__b))) | ||||
|         { | ||||
|             COMPLEX_REAL(z) = crt_copysignl(CRT_INFINITY, __c) * __a; | ||||
|             COMPLEX_IMAGINARY(z) = crt_copysignl(CRT_INFINITY, __c) * __b; | ||||
|         } | ||||
|         else if ((crt_isinf(__a) || crt_isinf(__b)) && | ||||
|                  crt_isfinite(__c) && crt_isfinite(__d)) | ||||
|         { | ||||
|             __a = crt_copysignl(crt_isinf(__a) ? 1.0 : 0.0, __a); | ||||
|             __b = crt_copysignl(crt_isinf(__b) ? 1.0 : 0.0, __b); | ||||
|             COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d); | ||||
|             COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d); | ||||
|         } | ||||
|         else if (crt_isinf(__logbw) && __logbw > 0.0 && | ||||
|                  crt_isfinite(__a) && crt_isfinite(__b)) | ||||
|         { | ||||
|             __c = crt_copysignl(crt_isinf(__c) ? 1.0 : 0.0, __c); | ||||
|             __d = crt_copysignl(crt_isinf(__d) ? 1.0 : 0.0, __d); | ||||
|             COMPLEX_REAL(z) = 0.0 * (__a * __c + __b * __d); | ||||
|             COMPLEX_IMAGINARY(z) = 0.0 * (__b * __c - __a * __d); | ||||
|         } | ||||
|     } | ||||
|     return z; | ||||
| } | ||||
							
								
								
									
										207
									
								
								third_party/compiler_rt/divtf3.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										207
									
								
								third_party/compiler_rt/divtf3.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,207 @@ | |||
| /* clang-format off */ | ||||
| //===-- lib/divtf3.c - Quad-precision division --------------------*- C -*-===//
 | ||||
| //
 | ||||
| //                     The LLVM Compiler Infrastructure
 | ||||
| //
 | ||||
| // This file is dual licensed under the MIT and the University of Illinois Open
 | ||||
| // Source Licenses. See LICENSE.TXT for details.
 | ||||
| //
 | ||||
| //===----------------------------------------------------------------------===//
 | ||||
| //
 | ||||
| // This file implements quad-precision soft-float division
 | ||||
| // with the IEEE-754 default rounding (to nearest, ties to even).
 | ||||
| //
 | ||||
| // For simplicity, this implementation currently flushes denormals to zero.
 | ||||
| // It should be a fairly straightforward exercise to implement gradual
 | ||||
| // underflow with correct rounding.
 | ||||
| //
 | ||||
| //===----------------------------------------------------------------------===//
 | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #define QUAD_PRECISION | ||||
| #include "libc/literal.h" | ||||
| #include "third_party/compiler_rt/fp_lib.inc" | ||||
| 
 | ||||
| #if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) | ||||
| COMPILER_RT_ABI fp_t __divtf3(fp_t a, fp_t b) { | ||||
| 
 | ||||
|     const unsigned int aExponent = toRep(a) >> significandBits & maxExponent; | ||||
|     const unsigned int bExponent = toRep(b) >> significandBits & maxExponent; | ||||
|     const rep_t quotientSign = (toRep(a) ^ toRep(b)) & signBit; | ||||
| 
 | ||||
|     rep_t aSignificand = toRep(a) & significandMask; | ||||
|     rep_t bSignificand = toRep(b) & significandMask; | ||||
|     int scale = 0; | ||||
| 
 | ||||
|     // Detect if a or b is zero, denormal, infinity, or NaN.
 | ||||
|     if (aExponent-1U >= maxExponent-1U || bExponent-1U >= maxExponent-1U) { | ||||
| 
 | ||||
|         const rep_t aAbs = toRep(a) & absMask; | ||||
|         const rep_t bAbs = toRep(b) & absMask; | ||||
| 
 | ||||
|         // NaN / anything = qNaN
 | ||||
|         if (aAbs > infRep) return fromRep(toRep(a) | quietBit); | ||||
|         // anything / NaN = qNaN
 | ||||
|         if (bAbs > infRep) return fromRep(toRep(b) | quietBit); | ||||
| 
 | ||||
|         if (aAbs == infRep) { | ||||
|             // infinity / infinity = NaN
 | ||||
|             if (bAbs == infRep) return fromRep(qnanRep); | ||||
|             // infinity / anything else = +/- infinity
 | ||||
|             else return fromRep(aAbs | quotientSign); | ||||
|         } | ||||
| 
 | ||||
|         // anything else / infinity = +/- 0
 | ||||
|         if (bAbs == infRep) return fromRep(quotientSign); | ||||
| 
 | ||||
|         if (!aAbs) { | ||||
|             // zero / zero = NaN
 | ||||
|             if (!bAbs) return fromRep(qnanRep); | ||||
|             // zero / anything else = +/- zero
 | ||||
|             else return fromRep(quotientSign); | ||||
|         } | ||||
|         // anything else / zero = +/- infinity
 | ||||
|         if (!bAbs) return fromRep(infRep | quotientSign); | ||||
| 
 | ||||
|         // one or both of a or b is denormal, the other (if applicable) is a
 | ||||
|         // normal number.  Renormalize one or both of a and b, and set scale to
 | ||||
|         // include the necessary exponent adjustment.
 | ||||
|         if (aAbs < implicitBit) scale += normalize(&aSignificand); | ||||
|         if (bAbs < implicitBit) scale -= normalize(&bSignificand); | ||||
|     } | ||||
| 
 | ||||
|     // Or in the implicit significand bit.  (If we fell through from the
 | ||||
|     // denormal path it was already set by normalize( ), but setting it twice
 | ||||
|     // won't hurt anything.)
 | ||||
|     aSignificand |= implicitBit; | ||||
|     bSignificand |= implicitBit; | ||||
|     int quotientExponent = aExponent - bExponent + scale; | ||||
| 
 | ||||
|     // Align the significand of b as a Q63 fixed-point number in the range
 | ||||
|     // [1, 2.0) and get a Q64 approximate reciprocal using a small minimax
 | ||||
|     // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2.  This
 | ||||
|     // is accurate to about 3.5 binary digits.
 | ||||
|     const uint64_t q63b = bSignificand >> 49; | ||||
|     uint64_t recip64 = UINT64_C(0x7504f333F9DE6484) - q63b; | ||||
|     // 0x7504f333F9DE6484 / 2^64 + 1 = 3/4 + 1/sqrt(2)
 | ||||
| 
 | ||||
|     // Now refine the reciprocal estimate using a Newton-Raphson iteration:
 | ||||
|     //
 | ||||
|     //     x1 = x0 * (2 - x0 * b)
 | ||||
|     //
 | ||||
|     // This doubles the number of correct binary digits in the approximation
 | ||||
|     // with each iteration.
 | ||||
|     uint64_t correction64; | ||||
|     correction64 = -((rep_t)recip64 * q63b >> 64); | ||||
|     recip64 = (rep_t)recip64 * correction64 >> 63; | ||||
|     correction64 = -((rep_t)recip64 * q63b >> 64); | ||||
|     recip64 = (rep_t)recip64 * correction64 >> 63; | ||||
|     correction64 = -((rep_t)recip64 * q63b >> 64); | ||||
|     recip64 = (rep_t)recip64 * correction64 >> 63; | ||||
|     correction64 = -((rep_t)recip64 * q63b >> 64); | ||||
|     recip64 = (rep_t)recip64 * correction64 >> 63; | ||||
|     correction64 = -((rep_t)recip64 * q63b >> 64); | ||||
|     recip64 = (rep_t)recip64 * correction64 >> 63; | ||||
| 
 | ||||
|     // recip64 might have overflowed to exactly zero in the preceeding
 | ||||
|     // computation if the high word of b is exactly 1.0.  This would sabotage
 | ||||
|     // the full-width final stage of the computation that follows, so we adjust
 | ||||
|     // recip64 downward by one bit.
 | ||||
|     recip64--; | ||||
| 
 | ||||
|     // We need to perform one more iteration to get us to 112 binary digits;
 | ||||
|     // The last iteration needs to happen with extra precision.
 | ||||
|     const uint64_t q127blo = bSignificand << 15; | ||||
|     rep_t correction, reciprocal; | ||||
| 
 | ||||
|     // NOTE: This operation is equivalent to __multi3, which is not implemented
 | ||||
|     //       in some architechure
 | ||||
|     rep_t r64q63, r64q127, r64cH, r64cL, dummy; | ||||
|     wideMultiply((rep_t)recip64, (rep_t)q63b, &dummy, &r64q63); | ||||
|     wideMultiply((rep_t)recip64, (rep_t)q127blo, &dummy, &r64q127); | ||||
| 
 | ||||
|     correction = -(r64q63 + (r64q127 >> 64)); | ||||
| 
 | ||||
|     uint64_t cHi = correction >> 64; | ||||
|     uint64_t cLo = correction; | ||||
| 
 | ||||
|     wideMultiply((rep_t)recip64, (rep_t)cHi, &dummy, &r64cH); | ||||
|     wideMultiply((rep_t)recip64, (rep_t)cLo, &dummy, &r64cL); | ||||
| 
 | ||||
|     reciprocal = r64cH + (r64cL >> 64); | ||||
| 
 | ||||
|     // We already adjusted the 64-bit estimate, now we need to adjust the final
 | ||||
|     // 128-bit reciprocal estimate downward to ensure that it is strictly smaller
 | ||||
|     // than the infinitely precise exact reciprocal.  Because the computation
 | ||||
|     // of the Newton-Raphson step is truncating at every step, this adjustment
 | ||||
|     // is small; most of the work is already done.
 | ||||
|     reciprocal -= 2; | ||||
| 
 | ||||
|     // The numerical reciprocal is accurate to within 2^-112, lies in the
 | ||||
|     // interval [0.5, 1.0), and is strictly smaller than the true reciprocal
 | ||||
|     // of b.  Multiplying a by this reciprocal thus gives a numerical q = a/b
 | ||||
|     // in Q127 with the following properties:
 | ||||
|     //
 | ||||
|     //    1. q < a/b
 | ||||
|     //    2. q is in the interval [0.5, 2.0)
 | ||||
|     //    3. the error in q is bounded away from 2^-113 (actually, we have a
 | ||||
|     //       couple of bits to spare, but this is all we need).
 | ||||
| 
 | ||||
|     // We need a 128 x 128 multiply high to compute q, which isn't a basic
 | ||||
|     // operation in C, so we need to be a little bit fussy.
 | ||||
|     rep_t quotient, quotientLo; | ||||
|     wideMultiply(aSignificand << 2, reciprocal, "ient, "ientLo); | ||||
| 
 | ||||
|     // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0).
 | ||||
|     // In either case, we are going to compute a residual of the form
 | ||||
|     //
 | ||||
|     //     r = a - q*b
 | ||||
|     //
 | ||||
|     // We know from the construction of q that r satisfies:
 | ||||
|     //
 | ||||
|     //     0 <= r < ulp(q)*b
 | ||||
|     //
 | ||||
|     // if r is greater than 1/2 ulp(q)*b, then q rounds up.  Otherwise, we
 | ||||
|     // already have the correct result.  The exact halfway case cannot occur.
 | ||||
|     // We also take this time to right shift quotient if it falls in the [1,2)
 | ||||
|     // range and adjust the exponent accordingly.
 | ||||
|     rep_t residual; | ||||
|     rep_t qb; | ||||
| 
 | ||||
|     if (quotient < (implicitBit << 1)) { | ||||
|         wideMultiply(quotient, bSignificand, &dummy, &qb); | ||||
|         residual = (aSignificand << 113) - qb; | ||||
|         quotientExponent--; | ||||
|     } else { | ||||
|         quotient >>= 1; | ||||
|         wideMultiply(quotient, bSignificand, &dummy, &qb); | ||||
|         residual = (aSignificand << 112) - qb; | ||||
|     } | ||||
| 
 | ||||
|     const int writtenExponent = quotientExponent + exponentBias; | ||||
| 
 | ||||
|     if (writtenExponent >= maxExponent) { | ||||
|         // If we have overflowed the exponent, return infinity.
 | ||||
|         return fromRep(infRep | quotientSign); | ||||
|     } | ||||
|     else if (writtenExponent < 1) { | ||||
|         // Flush denormals to zero.  In the future, it would be nice to add
 | ||||
|         // code to round them correctly.
 | ||||
|         return fromRep(quotientSign); | ||||
|     } | ||||
|     else { | ||||
|         const bool round = (residual << 1) >= bSignificand; | ||||
|         // Clear the implicit bit
 | ||||
|         rep_t absResult = quotient & significandMask; | ||||
|         // Insert the exponent
 | ||||
|         absResult |= (rep_t)writtenExponent << significandBits; | ||||
|         // Round
 | ||||
|         absResult += round; | ||||
|         // Insert the sign and return
 | ||||
|         const long double result = fromRep(absResult | quotientSign); | ||||
|         return result; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| #endif | ||||
							
								
								
									
										36
									
								
								third_party/compiler_rt/divti3.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										36
									
								
								third_party/compiler_rt/divti3.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,36 @@ | |||
| /* clang-format off */ | ||||
| /* ===-- divti3.c - Implement __divti3 -------------------------------------===
 | ||||
|  * | ||||
|  *                     The LLVM Compiler Infrastructure | ||||
|  * | ||||
|  * This file is dual licensed under the MIT and the University of Illinois Open | ||||
|  * Source Licenses. See LICENSE.TXT for details. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  * | ||||
|  * This file implements __divti3 for the compiler_rt library. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  */ | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #include "third_party/compiler_rt/int_lib.h" | ||||
| 
 | ||||
| #ifdef CRT_HAS_128BIT | ||||
| 
 | ||||
| /* Returns: a / b */ | ||||
| 
 | ||||
| COMPILER_RT_ABI ti_int | ||||
| __divti3(ti_int a, ti_int b) | ||||
| { | ||||
|     const int bits_in_tword_m1 = (int)(sizeof(ti_int) * CHAR_BIT) - 1; | ||||
|     ti_int s_a = a >> bits_in_tword_m1;           /* s_a = a < 0 ? -1 : 0 */ | ||||
|     ti_int s_b = b >> bits_in_tword_m1;           /* s_b = b < 0 ? -1 : 0 */ | ||||
|     a = (a ^ s_a) - s_a;                         /* negate if s_a == -1 */ | ||||
|     b = (b ^ s_b) - s_b;                         /* negate if s_b == -1 */ | ||||
|     s_a ^= s_b;                                  /* sign of quotient */ | ||||
|     return (__udivmodti4(a, b, (tu_int*)0) ^ s_a) - s_a;  /* negate if s_a == -1 */ | ||||
| } | ||||
| 
 | ||||
| #endif /* CRT_HAS_128BIT */ | ||||
							
								
								
									
										66
									
								
								third_party/compiler_rt/divxc3.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										66
									
								
								third_party/compiler_rt/divxc3.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,66 @@ | |||
| /* clang-format off */ | ||||
| /* ===-- divxc3.c - Implement __divxc3 -------------------------------------===
 | ||||
|  * | ||||
|  *                     The LLVM Compiler Infrastructure | ||||
|  * | ||||
|  * This file is dual licensed under the MIT and the University of Illinois Open | ||||
|  * Source Licenses. See LICENSE.TXT for details. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  * | ||||
|  * This file implements __divxc3 for the compiler_rt library. | ||||
|  * | ||||
|  */ | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #if !_ARCH_PPC | ||||
| 
 | ||||
| #include "third_party/compiler_rt/int_lib.h" | ||||
| #include "third_party/compiler_rt/int_math.h" | ||||
| 
 | ||||
| /* Returns: the quotient of (a + ib) / (c + id) */ | ||||
| 
 | ||||
| COMPILER_RT_ABI Lcomplex | ||||
| __divxc3(long double __a, long double __b, long double __c, long double __d) | ||||
| { | ||||
|     int __ilogbw = 0; | ||||
|     long double __logbw = crt_logbl(crt_fmaxl(crt_fabsl(__c), crt_fabsl(__d))); | ||||
|     if (crt_isfinite(__logbw)) | ||||
|     { | ||||
|         __ilogbw = (int)__logbw; | ||||
|         __c = crt_scalbnl(__c, -__ilogbw); | ||||
|         __d = crt_scalbnl(__d, -__ilogbw); | ||||
|     } | ||||
|     long double __denom = __c * __c + __d * __d; | ||||
|     Lcomplex z; | ||||
|     COMPLEX_REAL(z) = crt_scalbnl((__a * __c + __b * __d) / __denom, -__ilogbw); | ||||
|     COMPLEX_IMAGINARY(z) = crt_scalbnl((__b * __c - __a * __d) / __denom, -__ilogbw); | ||||
|     if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) | ||||
|     { | ||||
|         if ((__denom == 0) && (!crt_isnan(__a) || !crt_isnan(__b))) | ||||
|         { | ||||
|             COMPLEX_REAL(z) = crt_copysignl(CRT_INFINITY, __c) * __a; | ||||
|             COMPLEX_IMAGINARY(z) = crt_copysignl(CRT_INFINITY, __c) * __b; | ||||
|         } | ||||
|         else if ((crt_isinf(__a) || crt_isinf(__b)) && | ||||
|                  crt_isfinite(__c) && crt_isfinite(__d)) | ||||
|         { | ||||
|             __a = crt_copysignl(crt_isinf(__a) ? 1 : 0, __a); | ||||
|             __b = crt_copysignl(crt_isinf(__b) ? 1 : 0, __b); | ||||
|             COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d); | ||||
|             COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d); | ||||
|         } | ||||
|         else if (crt_isinf(__logbw) && __logbw > 0 && | ||||
|                  crt_isfinite(__a) && crt_isfinite(__b)) | ||||
|         { | ||||
|             __c = crt_copysignl(crt_isinf(__c) ? 1 : 0, __c); | ||||
|             __d = crt_copysignl(crt_isinf(__d) ? 1 : 0, __d); | ||||
|             COMPLEX_REAL(z) = 0 * (__a * __c + __b * __d); | ||||
|             COMPLEX_IMAGINARY(z) = 0 * (__b * __c - __a * __d); | ||||
|         } | ||||
|     } | ||||
|     return z; | ||||
| } | ||||
| 
 | ||||
| #endif | ||||
							
								
								
									
										26
									
								
								third_party/compiler_rt/extenddftf2.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										26
									
								
								third_party/compiler_rt/extenddftf2.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,26 @@ | |||
| /* clang-format off */ | ||||
| //===-- lib/extenddftf2.c - double -> quad conversion -------------*- C -*-===//
 | ||||
| //
 | ||||
| //                     The LLVM Compiler Infrastructure
 | ||||
| //
 | ||||
| // This file is dual licensed under the MIT and the University of Illinois Open
 | ||||
| // Source Licenses. See LICENSE.TXT for details.
 | ||||
| //
 | ||||
| //===----------------------------------------------------------------------===//
 | ||||
| //
 | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #define QUAD_PRECISION | ||||
| #include "third_party/compiler_rt/fp_lib.inc" | ||||
| 
 | ||||
| #if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) | ||||
| #define SRC_DOUBLE | ||||
| #define DST_QUAD | ||||
| #include "third_party/compiler_rt/fp_extend_impl.inc" | ||||
| 
 | ||||
| COMPILER_RT_ABI long double __extenddftf2(double a) { | ||||
|     return __extendXfYf2__(a); | ||||
| } | ||||
| 
 | ||||
| #endif | ||||
							
								
								
									
										36
									
								
								third_party/compiler_rt/extendhfsf2.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										36
									
								
								third_party/compiler_rt/extendhfsf2.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,36 @@ | |||
| /* clang-format off */ | ||||
| //===-- lib/extendhfsf2.c - half -> single conversion -------------*- C -*-===//
 | ||||
| //
 | ||||
| //                     The LLVM Compiler Infrastructure
 | ||||
| //
 | ||||
| // This file is dual licensed under the MIT and the University of Illinois Open
 | ||||
| // Source Licenses. See LICENSE.TXT for details.
 | ||||
| //
 | ||||
| //===----------------------------------------------------------------------===//
 | ||||
| //
 | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #define SRC_HALF | ||||
| #define DST_SINGLE | ||||
| #include "third_party/compiler_rt/fp_extend_impl.inc" | ||||
| 
 | ||||
| // Use a forwarding definition and noinline to implement a poor man's alias,
 | ||||
| // as there isn't a good cross-platform way of defining one.
 | ||||
| COMPILER_RT_ABI __attribute__((__noinline__)) float __extendhfsf2(uint16_t a) { | ||||
|     return __extendXfYf2__(a); | ||||
| } | ||||
| 
 | ||||
| COMPILER_RT_ABI float __gnu_h2f_ieee(uint16_t a) { | ||||
|     return __extendhfsf2(a); | ||||
| } | ||||
| 
 | ||||
| #if defined(__ARM_EABI__) | ||||
| #if defined(COMPILER_RT_ARMHF_TARGET) | ||||
| AEABI_RTABI float __aeabi_h2f(uint16_t a) { | ||||
|   return __extendhfsf2(a); | ||||
| } | ||||
| #else | ||||
| AEABI_RTABI float __aeabi_h2f(uint16_t a) COMPILER_RT_ALIAS(__extendhfsf2); | ||||
| #endif | ||||
| #endif | ||||
							
								
								
									
										30
									
								
								third_party/compiler_rt/extendsfdf2.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										30
									
								
								third_party/compiler_rt/extendsfdf2.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,30 @@ | |||
| /* clang-format off */ | ||||
| //===-- lib/extendsfdf2.c - single -> double conversion -----------*- C -*-===//
 | ||||
| //
 | ||||
| //                     The LLVM Compiler Infrastructure
 | ||||
| //
 | ||||
| // This file is dual licensed under the MIT and the University of Illinois Open
 | ||||
| // Source Licenses. See LICENSE.TXT for details.
 | ||||
| //
 | ||||
| //===----------------------------------------------------------------------===//
 | ||||
| //
 | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #define SRC_SINGLE | ||||
| #define DST_DOUBLE | ||||
| #include "third_party/compiler_rt/fp_extend_impl.inc" | ||||
| 
 | ||||
| COMPILER_RT_ABI double __extendsfdf2(float a) { | ||||
|     return __extendXfYf2__(a); | ||||
| } | ||||
| 
 | ||||
| #if defined(__ARM_EABI__) | ||||
| #if defined(COMPILER_RT_ARMHF_TARGET) | ||||
| AEABI_RTABI double __aeabi_f2d(float a) { | ||||
|   return __extendsfdf2(a); | ||||
| } | ||||
| #else | ||||
| AEABI_RTABI double __aeabi_f2d(float a) COMPILER_RT_ALIAS(__extendsfdf2); | ||||
| #endif | ||||
| #endif | ||||
							
								
								
									
										26
									
								
								third_party/compiler_rt/extendsftf2.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										26
									
								
								third_party/compiler_rt/extendsftf2.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,26 @@ | |||
| /* clang-format off */ | ||||
| //===-- lib/extendsftf2.c - single -> quad conversion -------------*- C -*-===//
 | ||||
| //
 | ||||
| //                     The LLVM Compiler Infrastructure
 | ||||
| //
 | ||||
| // This file is dual licensed under the MIT and the University of Illinois Open
 | ||||
| // Source Licenses. See LICENSE.TXT for details.
 | ||||
| //
 | ||||
| //===----------------------------------------------------------------------===//
 | ||||
| //
 | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #define QUAD_PRECISION | ||||
| #include "third_party/compiler_rt/fp_lib.inc" | ||||
| 
 | ||||
| #if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) | ||||
| #define SRC_SINGLE | ||||
| #define DST_QUAD | ||||
| #include "third_party/compiler_rt/fp_extend_impl.inc" | ||||
| 
 | ||||
| COMPILER_RT_ABI long double __extendsftf2(float a) { | ||||
|     return __extendXfYf2__(a); | ||||
| } | ||||
| 
 | ||||
| #endif | ||||
							
								
								
									
										36
									
								
								third_party/compiler_rt/ffsdi2.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										36
									
								
								third_party/compiler_rt/ffsdi2.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,36 @@ | |||
| /* clang-format off */ | ||||
| /* ===-- ffsdi2.c - Implement __ffsdi2 -------------------------------------===
 | ||||
|  * | ||||
|  *                     The LLVM Compiler Infrastructure | ||||
|  * | ||||
|  * This file is dual licensed under the MIT and the University of Illinois Open | ||||
|  * Source Licenses. See LICENSE.TXT for details. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  * | ||||
|  * This file implements __ffsdi2 for the compiler_rt library. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  */ | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #include "third_party/compiler_rt/int_lib.h" | ||||
| 
 | ||||
| /* Returns: the index of the least significant 1-bit in a, or
 | ||||
|  * the value zero if a is zero. The least significant bit is index one. | ||||
|  */ | ||||
| 
 | ||||
| COMPILER_RT_ABI si_int | ||||
| __ffsdi2(di_int a) | ||||
| { | ||||
|     dwords x; | ||||
|     x.all = a; | ||||
|     if (x.s.low == 0) | ||||
|     { | ||||
|         if (x.s.high == 0) | ||||
|             return 0; | ||||
|         return __builtin_ctz(x.s.high) + (1 + sizeof(si_int) * CHAR_BIT); | ||||
|     } | ||||
|     return __builtin_ctz(x.s.low) + 1; | ||||
| } | ||||
							
								
								
									
										32
									
								
								third_party/compiler_rt/ffssi2.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										32
									
								
								third_party/compiler_rt/ffssi2.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,32 @@ | |||
| /* clang-format off */ | ||||
| /* ===-- ffssi2.c - Implement __ffssi2 -------------------------------------===
 | ||||
|  * | ||||
|  *                     The LLVM Compiler Infrastructure | ||||
|  * | ||||
|  * This file is dual licensed under the MIT and the University of Illinois Open | ||||
|  * Source Licenses. See LICENSE.TXT for details. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  * | ||||
|  * This file implements __ffssi2 for the compiler_rt library. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  */ | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #include "third_party/compiler_rt/int_lib.h" | ||||
| 
 | ||||
| /* Returns: the index of the least significant 1-bit in a, or
 | ||||
|  * the value zero if a is zero. The least significant bit is index one. | ||||
|  */ | ||||
| 
 | ||||
| COMPILER_RT_ABI si_int | ||||
| __ffssi2(si_int a) | ||||
| { | ||||
|     if (a == 0) | ||||
|     { | ||||
|         return 0; | ||||
|     } | ||||
|     return __builtin_ctz(a) + 1; | ||||
| } | ||||
							
								
								
									
										40
									
								
								third_party/compiler_rt/ffsti2.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										40
									
								
								third_party/compiler_rt/ffsti2.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,40 @@ | |||
| /* clang-format off */ | ||||
| /* ===-- ffsti2.c - Implement __ffsti2 -------------------------------------===
 | ||||
|  * | ||||
|  *                     The LLVM Compiler Infrastructure | ||||
|  * | ||||
|  * This file is dual licensed under the MIT and the University of Illinois Open | ||||
|  * Source Licenses. See LICENSE.TXT for details. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  * | ||||
|  * This file implements __ffsti2 for the compiler_rt library. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  */ | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #include "third_party/compiler_rt/int_lib.h" | ||||
| 
 | ||||
| #ifdef CRT_HAS_128BIT | ||||
| 
 | ||||
| /* Returns: the index of the least significant 1-bit in a, or
 | ||||
|  * the value zero if a is zero. The least significant bit is index one. | ||||
|  */ | ||||
| 
 | ||||
| COMPILER_RT_ABI si_int | ||||
| __ffsti2(ti_int a) | ||||
| { | ||||
|     twords x; | ||||
|     x.all = a; | ||||
|     if (x.s.low == 0) | ||||
|     { | ||||
|         if (x.s.high == 0) | ||||
|             return 0; | ||||
|         return __builtin_ctzll(x.s.high) + (1 + sizeof(di_int) * CHAR_BIT); | ||||
|     } | ||||
|     return __builtin_ctzll(x.s.low) + 1; | ||||
| } | ||||
| 
 | ||||
| #endif /* CRT_HAS_128BIT */ | ||||
							
								
								
									
										58
									
								
								third_party/compiler_rt/fixdfdi.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										58
									
								
								third_party/compiler_rt/fixdfdi.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,58 @@ | |||
| /* clang-format off */ | ||||
| /* ===-- fixdfdi.c - Implement __fixdfdi -----------------------------------===
 | ||||
|  * | ||||
|  *                     The LLVM Compiler Infrastructure | ||||
|  * | ||||
|  * This file is dual licensed under the MIT and the University of Illinois Open | ||||
|  * Source Licenses. See LICENSE.TXT for details. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  */ | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #define DOUBLE_PRECISION | ||||
| #include "third_party/compiler_rt/fp_lib.inc" | ||||
| 
 | ||||
| #ifndef __SOFT_FP__ | ||||
| /* Support for systems that have hardware floating-point; can set the invalid
 | ||||
|  * flag as a side-effect of computation. | ||||
|  */ | ||||
| 
 | ||||
| COMPILER_RT_ABI du_int __fixunsdfdi(double a); | ||||
| 
 | ||||
| COMPILER_RT_ABI di_int | ||||
| __fixdfdi(double a) | ||||
| { | ||||
|     if (a < 0.0) { | ||||
|         return -__fixunsdfdi(-a); | ||||
|     } | ||||
|     return __fixunsdfdi(a); | ||||
| } | ||||
| 
 | ||||
| #else | ||||
| /* Support for systems that don't have hardware floating-point; there are no
 | ||||
|  * flags to set, and we don't want to code-gen to an unknown soft-float | ||||
|  * implementation. | ||||
|  */ | ||||
| 
 | ||||
| typedef di_int fixint_t; | ||||
| typedef du_int fixuint_t; | ||||
| #include "third_party/compiler_rt/fp_fixint_impl.inc" | ||||
| 
 | ||||
| COMPILER_RT_ABI di_int | ||||
| __fixdfdi(fp_t a) { | ||||
|     return __fixint(a); | ||||
| } | ||||
| 
 | ||||
| #endif | ||||
| 
 | ||||
| #if defined(__ARM_EABI__) | ||||
| #if defined(COMPILER_RT_ARMHF_TARGET) | ||||
| AEABI_RTABI di_int __aeabi_d2lz(fp_t a) { | ||||
|   return __fixdfdi(a); | ||||
| } | ||||
| #else | ||||
| AEABI_RTABI di_int __aeabi_d2lz(fp_t a) COMPILER_RT_ALIAS(__fixdfdi); | ||||
| #endif | ||||
| #endif | ||||
							
								
								
									
										33
									
								
								third_party/compiler_rt/fixdfsi.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										33
									
								
								third_party/compiler_rt/fixdfsi.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,33 @@ | |||
| /* clang-format off */ | ||||
| /* ===-- fixdfsi.c - Implement __fixdfsi -----------------------------------===
 | ||||
|  * | ||||
|  *                     The LLVM Compiler Infrastructure | ||||
|  * | ||||
|  * This file is dual licensed under the MIT and the University of Illinois Open | ||||
|  * Source Licenses. See LICENSE.TXT for details. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  */ | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #define DOUBLE_PRECISION | ||||
| #include "third_party/compiler_rt/fp_lib.inc" | ||||
| typedef si_int fixint_t; | ||||
| typedef su_int fixuint_t; | ||||
| #include "third_party/compiler_rt/fp_fixint_impl.inc" | ||||
| 
 | ||||
| COMPILER_RT_ABI si_int | ||||
| __fixdfsi(fp_t a) { | ||||
|     return __fixint(a); | ||||
| } | ||||
| 
 | ||||
| #if defined(__ARM_EABI__) | ||||
| #if defined(COMPILER_RT_ARMHF_TARGET) | ||||
| AEABI_RTABI si_int __aeabi_d2iz(fp_t a) { | ||||
|   return __fixdfsi(a); | ||||
| } | ||||
| #else | ||||
| AEABI_RTABI si_int __aeabi_d2iz(fp_t a) COMPILER_RT_ALIAS(__fixdfsi); | ||||
| #endif | ||||
| #endif | ||||
							
								
								
									
										29
									
								
								third_party/compiler_rt/fixdfti.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										29
									
								
								third_party/compiler_rt/fixdfti.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,29 @@ | |||
| /* clang-format off */ | ||||
| /* ===-- fixdfti.c - Implement __fixdfti -----------------------------------===
 | ||||
|  * | ||||
|  *                     The LLVM Compiler Infrastructure | ||||
|  * | ||||
|  * This file is dual licensed under the MIT and the University of Illinois Open | ||||
|  * Source Licenses. See LICENSE.TXT for details. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  */ | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #include "third_party/compiler_rt/int_lib.h" | ||||
| 
 | ||||
| #ifdef CRT_HAS_128BIT | ||||
| #define DOUBLE_PRECISION | ||||
| #include "third_party/compiler_rt/fp_lib.inc" | ||||
| 
 | ||||
| typedef ti_int fixint_t; | ||||
| typedef tu_int fixuint_t; | ||||
| #include "third_party/compiler_rt/fp_fixint_impl.inc" | ||||
| 
 | ||||
| COMPILER_RT_ABI ti_int | ||||
| __fixdfti(fp_t a) { | ||||
|     return __fixint(a); | ||||
| } | ||||
| 
 | ||||
| #endif /* CRT_HAS_128BIT */ | ||||
							
								
								
									
										58
									
								
								third_party/compiler_rt/fixsfdi.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										58
									
								
								third_party/compiler_rt/fixsfdi.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,58 @@ | |||
| /* clang-format off */ | ||||
| /* ===-- fixsfdi.c - Implement __fixsfdi -----------------------------------===
 | ||||
|  * | ||||
|  *                     The LLVM Compiler Infrastructure | ||||
|  * | ||||
|  * This file is dual licensed under the MIT and the University of Illinois Open | ||||
|  * Source Licenses. See LICENSE.TXT for details. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  */ | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #define SINGLE_PRECISION | ||||
| #include "third_party/compiler_rt/fp_lib.inc" | ||||
| 
 | ||||
| #ifndef __SOFT_FP__ | ||||
| /* Support for systems that have hardware floating-point; can set the invalid
 | ||||
|  * flag as a side-effect of computation. | ||||
|  */ | ||||
| 
 | ||||
| COMPILER_RT_ABI du_int __fixunssfdi(float a); | ||||
| 
 | ||||
| COMPILER_RT_ABI di_int | ||||
| __fixsfdi(float a) | ||||
| { | ||||
|     if (a < 0.0f) { | ||||
|         return -__fixunssfdi(-a); | ||||
|     } | ||||
|     return __fixunssfdi(a); | ||||
| } | ||||
| 
 | ||||
| #else | ||||
| /* Support for systems that don't have hardware floating-point; there are no
 | ||||
|  * flags to set, and we don't want to code-gen to an unknown soft-float | ||||
|  * implementation. | ||||
|  */ | ||||
| 
 | ||||
| typedef di_int fixint_t; | ||||
| typedef du_int fixuint_t; | ||||
| #include "third_party/compiler_rt/fp_fixint_impl.inc" | ||||
| 
 | ||||
| COMPILER_RT_ABI di_int | ||||
| __fixsfdi(fp_t a) { | ||||
|     return __fixint(a); | ||||
| } | ||||
| 
 | ||||
| #endif | ||||
| 
 | ||||
| #if defined(__ARM_EABI__) | ||||
| #if defined(COMPILER_RT_ARMHF_TARGET) | ||||
| AEABI_RTABI di_int __aeabi_f2lz(fp_t a) { | ||||
|   return __fixsfdi(a); | ||||
| } | ||||
| #else | ||||
| AEABI_RTABI di_int __aeabi_f2lz(fp_t a) COMPILER_RT_ALIAS(__fixsfdi); | ||||
| #endif | ||||
| #endif | ||||
							
								
								
									
										33
									
								
								third_party/compiler_rt/fixsfsi.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										33
									
								
								third_party/compiler_rt/fixsfsi.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,33 @@ | |||
| /* clang-format off */ | ||||
| /* ===-- fixsfsi.c - Implement __fixsfsi -----------------------------------===
 | ||||
|  * | ||||
|  *                     The LLVM Compiler Infrastructure | ||||
|  * | ||||
|  * This file is dual licensed under the MIT and the University of Illinois Open | ||||
|  * Source Licenses. See LICENSE.TXT for details. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  */ | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #define SINGLE_PRECISION | ||||
| #include "third_party/compiler_rt/fp_lib.inc" | ||||
| typedef si_int fixint_t; | ||||
| typedef su_int fixuint_t; | ||||
| #include "third_party/compiler_rt/fp_fixint_impl.inc" | ||||
| 
 | ||||
| COMPILER_RT_ABI si_int | ||||
| __fixsfsi(fp_t a) { | ||||
|     return __fixint(a); | ||||
| } | ||||
| 
 | ||||
| #if defined(__ARM_EABI__) | ||||
| #if defined(COMPILER_RT_ARMHF_TARGET) | ||||
| AEABI_RTABI si_int __aeabi_f2iz(fp_t a) { | ||||
|   return __fixsfsi(a); | ||||
| } | ||||
| #else | ||||
| AEABI_RTABI si_int __aeabi_f2iz(fp_t a) COMPILER_RT_ALIAS(__fixsfsi); | ||||
| #endif | ||||
| #endif | ||||
							
								
								
									
										29
									
								
								third_party/compiler_rt/fixsfti.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										29
									
								
								third_party/compiler_rt/fixsfti.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,29 @@ | |||
| /* clang-format off */ | ||||
| /* ===-- fixsfti.c - Implement __fixsfti -----------------------------------===
 | ||||
|  * | ||||
|  *                     The LLVM Compiler Infrastructure | ||||
|  * | ||||
|  * This file is dual licensed under the MIT and the University of Illinois Open | ||||
|  * Source Licenses. See LICENSE.TXT for details. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  */ | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #include "third_party/compiler_rt/int_lib.h" | ||||
| 
 | ||||
| #ifdef CRT_HAS_128BIT | ||||
| #define SINGLE_PRECISION | ||||
| #include "third_party/compiler_rt/fp_lib.inc" | ||||
| 
 | ||||
| typedef ti_int fixint_t; | ||||
| typedef tu_int fixuint_t; | ||||
| #include "third_party/compiler_rt/fp_fixint_impl.inc" | ||||
| 
 | ||||
| COMPILER_RT_ABI ti_int | ||||
| __fixsfti(fp_t a) { | ||||
|     return __fixint(a); | ||||
| } | ||||
| 
 | ||||
| #endif /* CRT_HAS_128BIT */ | ||||
							
								
								
									
										26
									
								
								third_party/compiler_rt/fixtfdi.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										26
									
								
								third_party/compiler_rt/fixtfdi.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,26 @@ | |||
| /* clang-format off */ | ||||
| /* ===-- fixtfdi.c - Implement __fixtfdi -----------------------------------===
 | ||||
|  * | ||||
|  *                     The LLVM Compiler Infrastructure | ||||
|  * | ||||
|  * This file is dual licensed under the MIT and the University of Illinois Open | ||||
|  * Source Licenses. See LICENSE.TXT for details. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  */ | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #define QUAD_PRECISION | ||||
| #include "third_party/compiler_rt/fp_lib.inc" | ||||
| 
 | ||||
| #if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) | ||||
| typedef di_int fixint_t; | ||||
| typedef du_int fixuint_t; | ||||
| #include "third_party/compiler_rt/fp_fixint_impl.inc" | ||||
| 
 | ||||
| COMPILER_RT_ABI di_int | ||||
| __fixtfdi(fp_t a) { | ||||
|     return __fixint(a); | ||||
| } | ||||
| #endif | ||||
							
								
								
									
										26
									
								
								third_party/compiler_rt/fixtfsi.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										26
									
								
								third_party/compiler_rt/fixtfsi.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,26 @@ | |||
| /* clang-format off */ | ||||
| /* ===-- fixtfsi.c - Implement __fixtfsi -----------------------------------===
 | ||||
|  * | ||||
|  *                     The LLVM Compiler Infrastructure | ||||
|  * | ||||
|  * This file is dual licensed under the MIT and the University of Illinois Open | ||||
|  * Source Licenses. See LICENSE.TXT for details. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  */ | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #define QUAD_PRECISION | ||||
| #include "third_party/compiler_rt/fp_lib.inc" | ||||
| 
 | ||||
| #if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) | ||||
| typedef si_int fixint_t; | ||||
| typedef su_int fixuint_t; | ||||
| #include "third_party/compiler_rt/fp_fixint_impl.inc" | ||||
| 
 | ||||
| COMPILER_RT_ABI si_int | ||||
| __fixtfsi(fp_t a) { | ||||
|     return __fixint(a); | ||||
| } | ||||
| #endif | ||||
							
								
								
									
										26
									
								
								third_party/compiler_rt/fixtfti.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										26
									
								
								third_party/compiler_rt/fixtfti.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,26 @@ | |||
| /* clang-format off */ | ||||
| /* ===-- fixtfti.c - Implement __fixtfti -----------------------------------===
 | ||||
|  * | ||||
|  *                     The LLVM Compiler Infrastructure | ||||
|  * | ||||
|  * This file is dual licensed under the MIT and the University of Illinois Open | ||||
|  * Source Licenses. See LICENSE.TXT for details. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  */ | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #define QUAD_PRECISION | ||||
| #include "third_party/compiler_rt/fp_lib.inc" | ||||
| 
 | ||||
| #if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) | ||||
| typedef ti_int fixint_t; | ||||
| typedef tu_int fixuint_t; | ||||
| #include "third_party/compiler_rt/fp_fixint_impl.inc" | ||||
| 
 | ||||
| COMPILER_RT_ABI ti_int | ||||
| __fixtfti(fp_t a) { | ||||
|     return __fixint(a); | ||||
| } | ||||
| #endif | ||||
							
								
								
									
										55
									
								
								third_party/compiler_rt/fixunsdfdi.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										55
									
								
								third_party/compiler_rt/fixunsdfdi.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,55 @@ | |||
| /* clang-format off */ | ||||
| /* ===-- fixunsdfdi.c - Implement __fixunsdfdi -----------------------------===
 | ||||
|  * | ||||
|  *                     The LLVM Compiler Infrastructure | ||||
|  * | ||||
|  * This file is dual licensed under the MIT and the University of Illinois Open | ||||
|  * Source Licenses. See LICENSE.TXT for details. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  */ | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #define DOUBLE_PRECISION | ||||
| #include "third_party/compiler_rt/fp_lib.inc" | ||||
| 
 | ||||
| #ifndef __SOFT_FP__ | ||||
| /* Support for systems that have hardware floating-point; can set the invalid
 | ||||
|  * flag as a side-effect of computation. | ||||
|  */ | ||||
| 
 | ||||
| COMPILER_RT_ABI du_int | ||||
| __fixunsdfdi(double a) | ||||
| { | ||||
|     if (a <= 0.0) return 0; | ||||
|     su_int high = a / 4294967296.f;               /* a / 0x1p32f; */ | ||||
|     su_int low = a - (double)high * 4294967296.f; /* high * 0x1p32f; */ | ||||
|     return ((du_int)high << 32) | low; | ||||
| } | ||||
| 
 | ||||
| #else | ||||
| /* Support for systems that don't have hardware floating-point; there are no
 | ||||
|  * flags to set, and we don't want to code-gen to an unknown soft-float | ||||
|  * implementation. | ||||
|  */ | ||||
| 
 | ||||
| typedef du_int fixuint_t; | ||||
| #include "third_party/compiler_rt/fp_fixuint_impl.inc" | ||||
| 
 | ||||
| COMPILER_RT_ABI du_int | ||||
| __fixunsdfdi(fp_t a) { | ||||
|     return __fixuint(a); | ||||
| } | ||||
| 
 | ||||
| #endif | ||||
| 
 | ||||
| #if defined(__ARM_EABI__) | ||||
| #if defined(COMPILER_RT_ARMHF_TARGET) | ||||
| AEABI_RTABI du_int __aeabi_d2ulz(fp_t a) { | ||||
|   return __fixunsdfdi(a); | ||||
| } | ||||
| #else | ||||
| AEABI_RTABI du_int __aeabi_d2ulz(fp_t a) COMPILER_RT_ALIAS(__fixunsdfdi); | ||||
| #endif | ||||
| #endif | ||||
							
								
								
									
										32
									
								
								third_party/compiler_rt/fixunsdfsi.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										32
									
								
								third_party/compiler_rt/fixunsdfsi.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,32 @@ | |||
| /* clang-format off */ | ||||
| /* ===-- fixunsdfsi.c - Implement __fixunsdfsi -----------------------------===
 | ||||
|  * | ||||
|  *                     The LLVM Compiler Infrastructure | ||||
|  * | ||||
|  * This file is dual licensed under the MIT and the University of Illinois Open | ||||
|  * Source Licenses. See LICENSE.TXT for details. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  */ | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #define DOUBLE_PRECISION | ||||
| #include "third_party/compiler_rt/fp_lib.inc" | ||||
| typedef su_int fixuint_t; | ||||
| #include "third_party/compiler_rt/fp_fixuint_impl.inc" | ||||
| 
 | ||||
| COMPILER_RT_ABI su_int | ||||
| __fixunsdfsi(fp_t a) { | ||||
|     return __fixuint(a); | ||||
| } | ||||
| 
 | ||||
| #if defined(__ARM_EABI__) | ||||
| #if defined(COMPILER_RT_ARMHF_TARGET) | ||||
| AEABI_RTABI su_int __aeabi_d2uiz(fp_t a) { | ||||
|   return __fixunsdfsi(a); | ||||
| } | ||||
| #else | ||||
| AEABI_RTABI su_int __aeabi_d2uiz(fp_t a) COMPILER_RT_ALIAS(__fixunsdfsi); | ||||
| #endif | ||||
| #endif | ||||
							
								
								
									
										26
									
								
								third_party/compiler_rt/fixunsdfti.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										26
									
								
								third_party/compiler_rt/fixunsdfti.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,26 @@ | |||
| /* clang-format off */ | ||||
| /* ===-- fixunsdfti.c - Implement __fixunsdfti -----------------------------===
 | ||||
|  * | ||||
|  *                     The LLVM Compiler Infrastructure | ||||
|  * | ||||
|  * This file is dual licensed under the MIT and the University of Illinois Open | ||||
|  * Source Licenses. See LICENSE.TXT for details. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  */ | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #include "third_party/compiler_rt/int_lib.h" | ||||
| 
 | ||||
| #ifdef CRT_HAS_128BIT | ||||
| #define DOUBLE_PRECISION | ||||
| #include "third_party/compiler_rt/fp_lib.inc" | ||||
| typedef tu_int fixuint_t; | ||||
| #include "third_party/compiler_rt/fp_fixuint_impl.inc" | ||||
| 
 | ||||
| COMPILER_RT_ABI tu_int | ||||
| __fixunsdfti(fp_t a) { | ||||
|     return __fixuint(a); | ||||
| } | ||||
| #endif /* CRT_HAS_128BIT */ | ||||
							
								
								
									
										56
									
								
								third_party/compiler_rt/fixunssfdi.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										56
									
								
								third_party/compiler_rt/fixunssfdi.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,56 @@ | |||
| /* clang-format off */ | ||||
| /* ===-- fixunssfdi.c - Implement __fixunssfdi -----------------------------===
 | ||||
|  * | ||||
|  *                     The LLVM Compiler Infrastructure | ||||
|  * | ||||
|  * This file is dual licensed under the MIT and the University of Illinois Open | ||||
|  * Source Licenses. See LICENSE.TXT for details. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  */ | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #define SINGLE_PRECISION | ||||
| #include "third_party/compiler_rt/fp_lib.inc" | ||||
| 
 | ||||
| #ifndef __SOFT_FP__ | ||||
| /* Support for systems that have hardware floating-point; can set the invalid
 | ||||
|  * flag as a side-effect of computation. | ||||
|  */ | ||||
| 
 | ||||
| COMPILER_RT_ABI du_int | ||||
| __fixunssfdi(float a) | ||||
| { | ||||
|     if (a <= 0.0f) return 0; | ||||
|     double da = a; | ||||
|     su_int high = da / 4294967296.f;               /* da / 0x1p32f; */ | ||||
|     su_int low = da - (double)high * 4294967296.f; /* high * 0x1p32f; */ | ||||
|     return ((du_int)high << 32) | low; | ||||
| } | ||||
| 
 | ||||
| #else | ||||
| /* Support for systems that don't have hardware floating-point; there are no
 | ||||
|  * flags to set, and we don't want to code-gen to an unknown soft-float | ||||
|  * implementation. | ||||
|  */ | ||||
| 
 | ||||
| typedef du_int fixuint_t; | ||||
| #include "third_party/compiler_rt/fp_fixuint_impl.inc" | ||||
| 
 | ||||
| COMPILER_RT_ABI du_int | ||||
| __fixunssfdi(fp_t a) { | ||||
|     return __fixuint(a); | ||||
| } | ||||
| 
 | ||||
| #endif | ||||
| 
 | ||||
| #if defined(__ARM_EABI__) | ||||
| #if defined(COMPILER_RT_ARMHF_TARGET) | ||||
| AEABI_RTABI du_int __aeabi_f2ulz(fp_t a) { | ||||
|   return __fixunssfdi(a); | ||||
| } | ||||
| #else | ||||
| AEABI_RTABI du_int __aeabi_f2ulz(fp_t a) COMPILER_RT_ALIAS(__fixunssfdi); | ||||
| #endif | ||||
| #endif | ||||
							
								
								
									
										36
									
								
								third_party/compiler_rt/fixunssfsi.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										36
									
								
								third_party/compiler_rt/fixunssfsi.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,36 @@ | |||
| /* clang-format off */ | ||||
| /* ===-- fixunssfsi.c - Implement __fixunssfsi -----------------------------===
 | ||||
|  * | ||||
|  *                     The LLVM Compiler Infrastructure | ||||
|  * | ||||
|  * This file is dual licensed under the MIT and the University of Illinois Open | ||||
|  * Source Licenses. See LICENSE.TXT for details. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  * | ||||
|  * This file implements __fixunssfsi for the compiler_rt library. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  */ | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #define SINGLE_PRECISION | ||||
| #include "third_party/compiler_rt/fp_lib.inc" | ||||
| typedef su_int fixuint_t; | ||||
| #include "third_party/compiler_rt/fp_fixuint_impl.inc" | ||||
| 
 | ||||
| COMPILER_RT_ABI su_int | ||||
| __fixunssfsi(fp_t a) { | ||||
|     return __fixuint(a); | ||||
| } | ||||
| 
 | ||||
| #if defined(__ARM_EABI__) | ||||
| #if defined(COMPILER_RT_ARMHF_TARGET) | ||||
| AEABI_RTABI su_int __aeabi_f2uiz(fp_t a) { | ||||
|   return __fixunssfsi(a); | ||||
| } | ||||
| #else | ||||
| AEABI_RTABI su_int __aeabi_f2uiz(fp_t a) COMPILER_RT_ALIAS(__fixunssfsi); | ||||
| #endif | ||||
| #endif | ||||
							
								
								
									
										29
									
								
								third_party/compiler_rt/fixunssfti.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										29
									
								
								third_party/compiler_rt/fixunssfti.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,29 @@ | |||
| /* clang-format off */ | ||||
| /* ===-- fixunssfti.c - Implement __fixunssfti -----------------------------===
 | ||||
|  * | ||||
|  *                     The LLVM Compiler Infrastructure | ||||
|  * | ||||
|  * This file is dual licensed under the MIT and the University of Illinois Open | ||||
|  * Source Licenses. See LICENSE.TXT for details. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  * | ||||
|  * This file implements __fixunssfti for the compiler_rt library. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  */ | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #define SINGLE_PRECISION | ||||
| #include "third_party/compiler_rt/fp_lib.inc" | ||||
| 
 | ||||
| #if defined(CRT_HAS_128BIT) | ||||
| typedef tu_int fixuint_t; | ||||
| #include "third_party/compiler_rt/fp_fixuint_impl.inc" | ||||
| 
 | ||||
| COMPILER_RT_ABI tu_int | ||||
| __fixunssfti(fp_t a) { | ||||
|     return __fixuint(a); | ||||
| } | ||||
| #endif | ||||
							
								
								
									
										25
									
								
								third_party/compiler_rt/fixunstfdi.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										25
									
								
								third_party/compiler_rt/fixunstfdi.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,25 @@ | |||
| /* clang-format off */ | ||||
| /* ===-- fixunstfdi.c - Implement __fixunstfdi -----------------------------===
 | ||||
|  * | ||||
|  *                     The LLVM Compiler Infrastructure | ||||
|  * | ||||
|  * This file is dual licensed under the MIT and the University of Illinois Open | ||||
|  * Source Licenses. See LICENSE.TXT for details. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  */ | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #define QUAD_PRECISION | ||||
| #include "third_party/compiler_rt/fp_lib.inc" | ||||
| 
 | ||||
| #if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) | ||||
| typedef du_int fixuint_t; | ||||
| #include "third_party/compiler_rt/fp_fixuint_impl.inc" | ||||
| 
 | ||||
| COMPILER_RT_ABI du_int | ||||
| __fixunstfdi(fp_t a) { | ||||
|     return __fixuint(a); | ||||
| } | ||||
| #endif | ||||
							
								
								
									
										25
									
								
								third_party/compiler_rt/fixunstfsi.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										25
									
								
								third_party/compiler_rt/fixunstfsi.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,25 @@ | |||
| /* clang-format off */ | ||||
| /* ===-- fixunstfsi.c - Implement __fixunstfsi -----------------------------===
 | ||||
|  * | ||||
|  *                     The LLVM Compiler Infrastructure | ||||
|  * | ||||
|  * This file is dual licensed under the MIT and the University of Illinois Open | ||||
|  * Source Licenses. See LICENSE.TXT for details. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  */ | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #define QUAD_PRECISION | ||||
| #include "third_party/compiler_rt/fp_lib.inc" | ||||
| 
 | ||||
| #if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) | ||||
| typedef su_int fixuint_t; | ||||
| #include "third_party/compiler_rt/fp_fixuint_impl.inc" | ||||
| 
 | ||||
| COMPILER_RT_ABI su_int | ||||
| __fixunstfsi(fp_t a) { | ||||
|     return __fixuint(a); | ||||
| } | ||||
| #endif | ||||
							
								
								
									
										25
									
								
								third_party/compiler_rt/fixunstfti.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										25
									
								
								third_party/compiler_rt/fixunstfti.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,25 @@ | |||
| /* clang-format off */ | ||||
| /* ===-- fixunstfsi.c - Implement __fixunstfsi -----------------------------===
 | ||||
|  * | ||||
|  *                     The LLVM Compiler Infrastructure | ||||
|  * | ||||
|  * This file is dual licensed under the MIT and the University of Illinois Open | ||||
|  * Source Licenses. See LICENSE.TXT for details. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  */ | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #define QUAD_PRECISION | ||||
| #include "third_party/compiler_rt/fp_lib.inc" | ||||
| 
 | ||||
| #if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) | ||||
| typedef tu_int fixuint_t; | ||||
| #include "third_party/compiler_rt/fp_fixuint_impl.inc" | ||||
| 
 | ||||
| COMPILER_RT_ABI tu_int | ||||
| __fixunstfti(fp_t a) { | ||||
|     return __fixuint(a); | ||||
| } | ||||
| #endif | ||||
							
								
								
									
										49
									
								
								third_party/compiler_rt/fixunsxfdi.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										49
									
								
								third_party/compiler_rt/fixunsxfdi.c
									
										
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1,49 @@ | |||
| /* clang-format off */ | ||||
| /* ===-- fixunsxfdi.c - Implement __fixunsxfdi -----------------------------===
 | ||||
|  * | ||||
|  *                     The LLVM Compiler Infrastructure | ||||
|  * | ||||
|  * This file is dual licensed under the MIT and the University of Illinois Open | ||||
|  * Source Licenses. See LICENSE.TXT for details. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  * | ||||
|  * This file implements __fixunsxfdi for the compiler_rt library. | ||||
|  * | ||||
|  * ===----------------------------------------------------------------------=== | ||||
|  */ | ||||
| 
 | ||||
| STATIC_YOINK("huge_compiler_rt_license"); | ||||
| 
 | ||||
| #if !_ARCH_PPC | ||||
| 
 | ||||
| #include "third_party/compiler_rt/int_lib.h" | ||||
| 
 | ||||
| /* Returns: convert a to a unsigned long long, rounding toward zero.
 | ||||
|  *          Negative values all become zero. | ||||
|  */ | ||||
| 
 | ||||
| /* Assumption: long double is an intel 80 bit floating point type padded with 6 bytes
 | ||||
|  *             du_int is a 64 bit integral type | ||||
|  *             value in long double is representable in du_int or is negative  | ||||
|  *                 (no range checking performed) | ||||
|  */ | ||||
| 
 | ||||
| /* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee |
 | ||||
|  * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | ||||
|  */ | ||||
| 
 | ||||
| COMPILER_RT_ABI du_int | ||||
| __fixunsxfdi(long double a) | ||||
| { | ||||
|     long_double_bits fb; | ||||
|     fb.f = a; | ||||
|     int e = (fb.u.high.s.low & 0x00007FFF) - 16383; | ||||
|     if (e < 0 || (fb.u.high.s.low & 0x00008000)) | ||||
|         return 0; | ||||
|     if ((unsigned)e > sizeof(du_int) * CHAR_BIT) | ||||
|         return ~(du_int)0; | ||||
|     return fb.u.low.all >> (63 - e); | ||||
| } | ||||
| 
 | ||||
| #endif | ||||
Some files were not shown because too many files have changed in this diff Show more
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue