mirror of
				https://github.com/jart/cosmopolitan.git
				synced 2025-10-24 18:20:59 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			474 lines
		
	
	
	
		
			15 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			474 lines
		
	
	
	
		
			15 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
| Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
 | |
| Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
 | |
| denoted as "the implementer".
 | |
| 
 | |
| For more information, feedback or questions, please refer to our websites:
 | |
| http://keccak.noekeon.org/
 | |
| http://keyak.noekeon.org/
 | |
| http://ketje.noekeon.org/
 | |
| 
 | |
| To the extent possible under law, the implementer has waived all copyright
 | |
| and related or neighboring rights to the source code in this file.
 | |
| http://creativecommons.org/publicdomain/zero/1.0/
 | |
| */
 | |
| 
 | |
| #include <string.h>
 | |
| #include <stdlib.h>
 | |
| /* #include "brg_endian.h" */
 | |
| #include "KeccakP-1600-opt64-config.h"
 | |
| 
 | |
| #if NOT_PYTHON
 | |
| typedef unsigned char UINT8;
 | |
| /* typedef unsigned long long int UINT64; */
 | |
| #endif
 | |
| 
 | |
| #if defined(KeccakP1600_useLaneComplementing)
 | |
| #define UseBebigokimisa
 | |
| #endif
 | |
| 
 | |
| #if defined(_MSC_VER)
 | |
| #define ROL64(a, offset) _rotl64(a, offset)
 | |
| #elif defined(KeccakP1600_useSHLD)
 | |
|     #define ROL64(x,N) ({ \
 | |
|     register UINT64 __out; \
 | |
|     register UINT64 __in = x; \
 | |
|     __asm__ ("shld %2,%0,%0" : "=r"(__out) : "0"(__in), "i"(N)); \
 | |
|     __out; \
 | |
|     })
 | |
| #else
 | |
| #define ROL64(a, offset) ((((UINT64)a) << offset) ^ (((UINT64)a) >> (64-offset)))
 | |
| #endif
 | |
| 
 | |
| #include "KeccakP-1600-64.macros"
 | |
| #ifdef KeccakP1600_fullUnrolling
 | |
| #define FullUnrolling
 | |
| #else
 | |
| #define Unrolling KeccakP1600_unrolling
 | |
| #endif
 | |
| #include "KeccakP-1600-unrolling.macros"
 | |
| #include "SnP-Relaned.h"
 | |
| 
 | |
| static const UINT64 KeccakF1600RoundConstants[24] = {
 | |
|     0x0000000000000001ULL,
 | |
|     0x0000000000008082ULL,
 | |
|     0x800000000000808aULL,
 | |
|     0x8000000080008000ULL,
 | |
|     0x000000000000808bULL,
 | |
|     0x0000000080000001ULL,
 | |
|     0x8000000080008081ULL,
 | |
|     0x8000000000008009ULL,
 | |
|     0x000000000000008aULL,
 | |
|     0x0000000000000088ULL,
 | |
|     0x0000000080008009ULL,
 | |
|     0x000000008000000aULL,
 | |
|     0x000000008000808bULL,
 | |
|     0x800000000000008bULL,
 | |
|     0x8000000000008089ULL,
 | |
|     0x8000000000008003ULL,
 | |
|     0x8000000000008002ULL,
 | |
|     0x8000000000000080ULL,
 | |
|     0x000000000000800aULL,
 | |
|     0x800000008000000aULL,
 | |
|     0x8000000080008081ULL,
 | |
|     0x8000000000008080ULL,
 | |
|     0x0000000080000001ULL,
 | |
|     0x8000000080008008ULL };
 | |
| 
 | |
| /* ---------------------------------------------------------------- */
 | |
| 
 | |
| void KeccakP1600_Initialize(void *state)
 | |
| {
 | |
|     memset(state, 0, 200);
 | |
| #ifdef KeccakP1600_useLaneComplementing
 | |
|     ((UINT64*)state)[ 1] = ~(UINT64)0;
 | |
|     ((UINT64*)state)[ 2] = ~(UINT64)0;
 | |
|     ((UINT64*)state)[ 8] = ~(UINT64)0;
 | |
|     ((UINT64*)state)[12] = ~(UINT64)0;
 | |
|     ((UINT64*)state)[17] = ~(UINT64)0;
 | |
|     ((UINT64*)state)[20] = ~(UINT64)0;
 | |
| #endif
 | |
| }
 | |
| 
 | |
| /* ---------------------------------------------------------------- */
 | |
| 
 | |
| void KeccakP1600_AddBytesInLane(void *state, unsigned int lanePosition, const unsigned char *data, unsigned int offset, unsigned int length)
 | |
| {
 | |
| #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
 | |
|     UINT64 lane;
 | |
|     if (length == 0)
 | |
|         return;
 | |
|     if (length == 1)
 | |
|         lane = data[0];
 | |
|     else {
 | |
|         lane = 0;
 | |
|         memcpy(&lane, data, length);
 | |
|     }
 | |
|     lane <<= offset*8;
 | |
| #else
 | |
|     UINT64 lane = 0;
 | |
|     unsigned int i;
 | |
|     for(i=0; i<length; i++)
 | |
|         lane |= ((UINT64)data[i]) << ((i+offset)*8);
 | |
| #endif
 | |
|     ((UINT64*)state)[lanePosition] ^= lane;
 | |
| }
 | |
| 
 | |
| /* ---------------------------------------------------------------- */
 | |
| 
 | |
| void KeccakP1600_AddLanes(void *state, const unsigned char *data, unsigned int laneCount)
 | |
| {
 | |
| #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
 | |
|     unsigned int i = 0;
 | |
| #ifdef NO_MISALIGNED_ACCESSES
 | |
|     /* If either pointer is misaligned, fall back to byte-wise xor. */
 | |
| 
 | |
|     if (((((uintptr_t)state) & 7) != 0) || ((((uintptr_t)data) & 7) != 0)) {
 | |
|       for (i = 0; i < laneCount * 8; i++) {
 | |
|         ((unsigned char*)state)[i] ^= data[i];
 | |
|       }
 | |
|     }
 | |
|     else
 | |
| #endif
 | |
|     {
 | |
|       /* Otherwise... */
 | |
| 
 | |
|       for( ; (i+8)<=laneCount; i+=8) {
 | |
|           ((UINT64*)state)[i+0] ^= ((UINT64*)data)[i+0];
 | |
|           ((UINT64*)state)[i+1] ^= ((UINT64*)data)[i+1];
 | |
|           ((UINT64*)state)[i+2] ^= ((UINT64*)data)[i+2];
 | |
|           ((UINT64*)state)[i+3] ^= ((UINT64*)data)[i+3];
 | |
|           ((UINT64*)state)[i+4] ^= ((UINT64*)data)[i+4];
 | |
|           ((UINT64*)state)[i+5] ^= ((UINT64*)data)[i+5];
 | |
|           ((UINT64*)state)[i+6] ^= ((UINT64*)data)[i+6];
 | |
|           ((UINT64*)state)[i+7] ^= ((UINT64*)data)[i+7];
 | |
|       }
 | |
|       for( ; (i+4)<=laneCount; i+=4) {
 | |
|           ((UINT64*)state)[i+0] ^= ((UINT64*)data)[i+0];
 | |
|           ((UINT64*)state)[i+1] ^= ((UINT64*)data)[i+1];
 | |
|           ((UINT64*)state)[i+2] ^= ((UINT64*)data)[i+2];
 | |
|           ((UINT64*)state)[i+3] ^= ((UINT64*)data)[i+3];
 | |
|       }
 | |
|       for( ; (i+2)<=laneCount; i+=2) {
 | |
|           ((UINT64*)state)[i+0] ^= ((UINT64*)data)[i+0];
 | |
|           ((UINT64*)state)[i+1] ^= ((UINT64*)data)[i+1];
 | |
|       }
 | |
|       if (i<laneCount) {
 | |
|           ((UINT64*)state)[i+0] ^= ((UINT64*)data)[i+0];
 | |
|       }
 | |
|     }
 | |
| #else
 | |
|     unsigned int i;
 | |
|     UINT8 *curData = data;
 | |
|     for(i=0; i<laneCount; i++, curData+=8) {
 | |
|         UINT64 lane = (UINT64)curData[0]
 | |
|             | ((UINT64)curData[1] << 8)
 | |
|             | ((UINT64)curData[2] << 16)
 | |
|             | ((UINT64)curData[3] << 24)
 | |
|             | ((UINT64)curData[4] <<32)
 | |
|             | ((UINT64)curData[5] << 40)
 | |
|             | ((UINT64)curData[6] << 48)
 | |
|             | ((UINT64)curData[7] << 56);
 | |
|         ((UINT64*)state)[i] ^= lane;
 | |
|     }
 | |
| #endif
 | |
| }
 | |
| 
 | |
| /* ---------------------------------------------------------------- */
 | |
| 
 | |
| #if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN)
 | |
| void KeccakP1600_AddByte(void *state, unsigned char byte, unsigned int offset)
 | |
| {
 | |
|     UINT64 lane = byte;
 | |
|     lane <<= (offset%8)*8;
 | |
|     ((UINT64*)state)[offset/8] ^= lane;
 | |
| }
 | |
| #endif
 | |
| 
 | |
| /* ---------------------------------------------------------------- */
 | |
| 
 | |
| void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
 | |
| {
 | |
|     SnP_AddBytes(state, data, offset, length, KeccakP1600_AddLanes, KeccakP1600_AddBytesInLane, 8);
 | |
| }
 | |
| 
 | |
| /* ---------------------------------------------------------------- */
 | |
| 
 | |
| void KeccakP1600_OverwriteBytesInLane(void *state, unsigned int lanePosition, const unsigned char *data, unsigned int offset, unsigned int length)
 | |
| {
 | |
| #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
 | |
| #ifdef KeccakP1600_useLaneComplementing
 | |
|     if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20)) {
 | |
|         unsigned int i;
 | |
|         for(i=0; i<length; i++)
 | |
|             ((unsigned char*)state)[lanePosition*8+offset+i] = ~data[i];
 | |
|     }
 | |
|     else
 | |
| #endif
 | |
|     {
 | |
|         memcpy((unsigned char*)state+lanePosition*8+offset, data, length);
 | |
|     }
 | |
| #else
 | |
| #error "Not yet implemented"
 | |
| #endif
 | |
| }
 | |
| 
 | |
| /* ---------------------------------------------------------------- */
 | |
| 
 | |
| void KeccakP1600_OverwriteLanes(void *state, const unsigned char *data, unsigned int laneCount)
 | |
| {
 | |
| #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
 | |
| #ifdef KeccakP1600_useLaneComplementing
 | |
|     unsigned int lanePosition;
 | |
| 
 | |
|     for(lanePosition=0; lanePosition<laneCount; lanePosition++)
 | |
|         if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20))
 | |
|             ((UINT64*)state)[lanePosition] = ~((const UINT64*)data)[lanePosition];
 | |
|         else
 | |
|             ((UINT64*)state)[lanePosition] = ((const UINT64*)data)[lanePosition];
 | |
| #else
 | |
|     memcpy(state, data, laneCount*8);
 | |
| #endif
 | |
| #else
 | |
| #error "Not yet implemented"
 | |
| #endif
 | |
| }
 | |
| 
 | |
| /* ---------------------------------------------------------------- */
 | |
| 
 | |
| void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
 | |
| {
 | |
|     SnP_OverwriteBytes(state, data, offset, length, KeccakP1600_OverwriteLanes, KeccakP1600_OverwriteBytesInLane, 8);
 | |
| }
 | |
| 
 | |
| /* ---------------------------------------------------------------- */
 | |
| 
 | |
| void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount)
 | |
| {
 | |
| #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
 | |
| #ifdef KeccakP1600_useLaneComplementing
 | |
|     unsigned int lanePosition;
 | |
| 
 | |
|     for(lanePosition=0; lanePosition<byteCount/8; lanePosition++)
 | |
|         if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20))
 | |
|             ((UINT64*)state)[lanePosition] = ~0;
 | |
|         else
 | |
|             ((UINT64*)state)[lanePosition] = 0;
 | |
|     if (byteCount%8 != 0) {
 | |
|         lanePosition = byteCount/8;
 | |
|         if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20))
 | |
|             memset((unsigned char*)state+lanePosition*8, 0xFF, byteCount%8);
 | |
|         else
 | |
|             memset((unsigned char*)state+lanePosition*8, 0, byteCount%8);
 | |
|     }
 | |
| #else
 | |
|     memset(state, 0, byteCount);
 | |
| #endif
 | |
| #else
 | |
| #error "Not yet implemented"
 | |
| #endif
 | |
| }
 | |
| 
 | |
| /* ---------------------------------------------------------------- */
 | |
| 
 | |
| void KeccakP1600_Permute_24rounds(void *state)
 | |
| {
 | |
|     declareABCDE
 | |
|     #ifndef KeccakP1600_fullUnrolling
 | |
|     unsigned int i;
 | |
|     #endif
 | |
|     UINT64 *stateAsLanes = (UINT64*)state;
 | |
| 
 | |
|     copyFromState(A, stateAsLanes)
 | |
|     rounds24
 | |
|     copyToState(stateAsLanes, A)
 | |
| }
 | |
| 
 | |
| /* ---------------------------------------------------------------- */
 | |
| 
 | |
| void KeccakP1600_Permute_12rounds(void *state)
 | |
| {
 | |
|     declareABCDE
 | |
|     #ifndef KeccakP1600_fullUnrolling
 | |
|     unsigned int i;
 | |
|     #endif
 | |
|     UINT64 *stateAsLanes = (UINT64*)state;
 | |
| 
 | |
|     copyFromState(A, stateAsLanes)
 | |
|     rounds12
 | |
|     copyToState(stateAsLanes, A)
 | |
| }
 | |
| 
 | |
| /* ---------------------------------------------------------------- */
 | |
| 
 | |
| void KeccakP1600_ExtractBytesInLane(const void *state, unsigned int lanePosition, unsigned char *data, unsigned int offset, unsigned int length)
 | |
| {
 | |
|     UINT64 lane = ((UINT64*)state)[lanePosition];
 | |
| #ifdef KeccakP1600_useLaneComplementing
 | |
|     if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20))
 | |
|         lane = ~lane;
 | |
| #endif
 | |
| #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
 | |
|     {
 | |
|         UINT64 lane1[1];
 | |
|         lane1[0] = lane;
 | |
|         memcpy(data, (UINT8*)lane1+offset, length);
 | |
|     }
 | |
| #else
 | |
|     unsigned int i;
 | |
|     lane >>= offset*8;
 | |
|     for(i=0; i<length; i++) {
 | |
|         data[i] = lane & 0xFF;
 | |
|         lane >>= 8;
 | |
|     }
 | |
| #endif
 | |
| }
 | |
| 
 | |
| /* ---------------------------------------------------------------- */
 | |
| 
 | |
| #if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN)
 | |
| void fromWordToBytes(UINT8 *bytes, const UINT64 word)
 | |
| {
 | |
|     unsigned int i;
 | |
| 
 | |
|     for(i=0; i<(64/8); i++)
 | |
|         bytes[i] = (word >> (8*i)) & 0xFF;
 | |
| }
 | |
| #endif
 | |
| 
 | |
| void KeccakP1600_ExtractLanes(const void *state, unsigned char *data, unsigned int laneCount)
 | |
| {
 | |
| #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
 | |
|     memcpy(data, state, laneCount*8);
 | |
| #else
 | |
|     unsigned int i;
 | |
| 
 | |
|     for(i=0; i<laneCount; i++)
 | |
|         fromWordToBytes(data+(i*8), ((const UINT64*)state)[i]);
 | |
| #endif
 | |
| #ifdef KeccakP1600_useLaneComplementing
 | |
|     if (laneCount > 1) {
 | |
|         ((UINT64*)data)[ 1] = ~((UINT64*)data)[ 1];
 | |
|         if (laneCount > 2) {
 | |
|             ((UINT64*)data)[ 2] = ~((UINT64*)data)[ 2];
 | |
|             if (laneCount > 8) {
 | |
|                 ((UINT64*)data)[ 8] = ~((UINT64*)data)[ 8];
 | |
|                 if (laneCount > 12) {
 | |
|                     ((UINT64*)data)[12] = ~((UINT64*)data)[12];
 | |
|                     if (laneCount > 17) {
 | |
|                         ((UINT64*)data)[17] = ~((UINT64*)data)[17];
 | |
|                         if (laneCount > 20) {
 | |
|                             ((UINT64*)data)[20] = ~((UINT64*)data)[20];
 | |
|                         }
 | |
|                     }
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| #endif
 | |
| }
 | |
| 
 | |
| /* ---------------------------------------------------------------- */
 | |
| 
 | |
| void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length)
 | |
| {
 | |
|     SnP_ExtractBytes(state, data, offset, length, KeccakP1600_ExtractLanes, KeccakP1600_ExtractBytesInLane, 8);
 | |
| }
 | |
| 
 | |
| /* ---------------------------------------------------------------- */
 | |
| 
 | |
| void KeccakP1600_ExtractAndAddBytesInLane(const void *state, unsigned int lanePosition, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
 | |
| {
 | |
|     UINT64 lane = ((UINT64*)state)[lanePosition];
 | |
| #ifdef KeccakP1600_useLaneComplementing
 | |
|     if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20))
 | |
|         lane = ~lane;
 | |
| #endif
 | |
| #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
 | |
|     {
 | |
|         unsigned int i;
 | |
|         UINT64 lane1[1];
 | |
|         lane1[0] = lane;
 | |
|         for(i=0; i<length; i++)
 | |
|             output[i] = input[i] ^ ((UINT8*)lane1)[offset+i];
 | |
|     }
 | |
| #else
 | |
|     unsigned int i;
 | |
|     lane >>= offset*8;
 | |
|     for(i=0; i<length; i++) {
 | |
|         output[i] = input[i] ^ (lane & 0xFF);
 | |
|         lane >>= 8;
 | |
|     }
 | |
| #endif
 | |
| }
 | |
| 
 | |
| /* ---------------------------------------------------------------- */
 | |
| 
 | |
| void KeccakP1600_ExtractAndAddLanes(const void *state, const unsigned char *input, unsigned char *output, unsigned int laneCount)
 | |
| {
 | |
|     unsigned int i;
 | |
| #if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN)
 | |
|     unsigned char temp[8];
 | |
|     unsigned int j;
 | |
| #endif
 | |
| 
 | |
|     for(i=0; i<laneCount; i++) {
 | |
| #if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
 | |
|         ((UINT64*)output)[i] = ((UINT64*)input)[i] ^ ((const UINT64*)state)[i];
 | |
| #else
 | |
|         fromWordToBytes(temp, ((const UINT64*)state)[i]);
 | |
|         for(j=0; j<8; j++)
 | |
|             output[i*8+j] = input[i*8+j] ^ temp[j];
 | |
| #endif
 | |
|     }
 | |
| #ifdef KeccakP1600_useLaneComplementing
 | |
|     if (laneCount > 1) {
 | |
|         ((UINT64*)output)[ 1] = ~((UINT64*)output)[ 1];
 | |
|         if (laneCount > 2) {
 | |
|             ((UINT64*)output)[ 2] = ~((UINT64*)output)[ 2];
 | |
|             if (laneCount > 8) {
 | |
|                 ((UINT64*)output)[ 8] = ~((UINT64*)output)[ 8];
 | |
|                 if (laneCount > 12) {
 | |
|                     ((UINT64*)output)[12] = ~((UINT64*)output)[12];
 | |
|                     if (laneCount > 17) {
 | |
|                         ((UINT64*)output)[17] = ~((UINT64*)output)[17];
 | |
|                         if (laneCount > 20) {
 | |
|                             ((UINT64*)output)[20] = ~((UINT64*)output)[20];
 | |
|                         }
 | |
|                     }
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| #endif
 | |
| }
 | |
| 
 | |
| /* ---------------------------------------------------------------- */
 | |
| 
 | |
| void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
 | |
| {
 | |
|     SnP_ExtractAndAddBytes(state, input, output, offset, length, KeccakP1600_ExtractAndAddLanes, KeccakP1600_ExtractAndAddBytesInLane, 8);
 | |
| }
 | |
| 
 | |
| /* ---------------------------------------------------------------- */
 | |
| 
 | |
| size_t KeccakF1600_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen)
 | |
| {
 | |
|     size_t originalDataByteLen = dataByteLen;
 | |
|     declareABCDE
 | |
|     #ifndef KeccakP1600_fullUnrolling
 | |
|     unsigned int i;
 | |
|     #endif
 | |
|     UINT64 *stateAsLanes = (UINT64*)state;
 | |
|     UINT64 *inDataAsLanes = (UINT64*)data;
 | |
| 
 | |
|     copyFromState(A, stateAsLanes)
 | |
|     while(dataByteLen >= laneCount*8) {
 | |
|         addInput(A, inDataAsLanes, laneCount)
 | |
|         rounds24
 | |
|         inDataAsLanes += laneCount;
 | |
|         dataByteLen -= laneCount*8;
 | |
|     }
 | |
|     copyToState(stateAsLanes, A)
 | |
|     return originalDataByteLen - dataByteLen;
 | |
| }
 |