This is the hot function inside the project I am trying to optimize.
PACKAGE NAME: libwhirlpool
It is taking different arrays, bitshifting and XORing them to create a hash.
Notes:
The bitshifting is happening in big endian, so a conversion into little endian might provide different results when it comes to speed.
****************** FUNCTION BELOW ******************
static void processBuffer(struct NESSIEstruct * const structpointer) {
int i, r;
u64 K[8]; /* the round key */
u64 block[8]; /* mu(buffer) */
u64 state[8]; /* the cipher state */
u64 L[8];
u8 *buffer = structpointer->buffer;
#ifdef TRACE_INTERMEDIATE_VALUES
printf("The 8x8 matrix Z' derived from the data-string is as follows.\n");
for (i = 0; i < WBLOCKBYTES/8; i++) {
printf(" %02X %02X %02X %02X %02X %02X %02X %02X\n",
buffer[0], buffer[1], buffer[2], buffer[3],
buffer[4], buffer[5], buffer[6], buffer[7]);
buffer += 8;
}
printf("\n");
buffer = structpointer->buffer;
#endif /* ?TRACE_INTERMEDIATE_VALUES */
/*
* map the buffer to a block:
*/
for (i = 0; i < 8; i++, buffer += 8) {
block[i] =
(((u64)buffer[0] ) << 56) ^
(((u64)buffer[1] & 0xffL) << 48) ^
(((u64)buffer[2] & 0xffL) << 40) ^
(((u64)buffer[3] & 0xffL) << 32) ^
(((u64)buffer[4] & 0xffL) << 24) ^
(((u64)buffer[5] & 0xffL) << 16) ^
(((u64)buffer[6] & 0xffL) << 8) ^
(((u64)buffer[7] & 0xffL) );
}
/*
* compute and apply K^0 to the cipher state:
*/
state[0] = block[0] ^ (K[0] = structpointer->hash[0]);
state[2] = block[2] ^ (K[2] = structpointer->hash[2]);
state[3] = block[3] ^ (K[3] = structpointer->hash[3]);
state[4] = block[4] ^ (K[4] = structpointer->hash[4]);
state[5] = block[5] ^ (K[5] = structpointer->hash[5]);
state[6] = block[6] ^ (K[6] = structpointer->hash[6]);
state[7] = block[7] ^ (K[7] = structpointer->hash[7]);
#ifdef TRACE_INTERMEDIATE_VALUES
printf("The K_0 matrix (from the initialization value IV) and X'' matrix are as follows.\n");
for (i = 0; i < DIGESTBYTES/8; i++) {
printf(
" %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X\n",
(u8)(K[i] >> 56),
(u8)(K[i] >> 48),
(u8)(K[i] >> 40),
(u8)(K[i] >> 32),
(u8)(K[i] >> 24),
(u8)(K[i] >> 16),
(u8)(K[i] >> 8),
(u8)(K[i] ),
(u8)(state[i] >> 56),
(u8)(state[i] >> 48),
(u8)(state[i] >> 40),
(u8)(state[i] >> 32),
(u8)(state[i] >> 24),
(u8)(state[i] >> 16),
(u8)(state[i] >> 8),
(u8)(state[i] ));
}
printf("\n");
printf("The following are (hexadecimal representations of) the successive values of the variables K_i for i = 1 to 10 and W'.\n");
printf("\n");
#endif /* ?TRACE_INTERMEDIATE_VALUES */
/*
* iterate over all rounds:
*/
for (r = 1; r <= R; r++) {
/*
* compute K^r from K^{r-1}:
*/
L[0] =
C0[(int)(K[0] >> 56) ] ^
C1[(int)(K[7] >> 48) & 0xff] ^
C2[(int)(K[6] >> 40) & 0xff] ^
C3[(int)(K[5] >> 32) & 0xff] ^
C4[(int)(K[4] >> 24) & 0xff] ^
C5[(int)(K[3] >> 16) & 0xff] ^
C6[(int)(K[2] >> 8) & 0xff] ^
C7[(int)(K[1] ) & 0xff] ^
rc[r];
L[1] =
C0[(int)(K[1] >> 56) ] ^
C1[(int)(K[0] >> 48) & 0xff] ^
C2[(int)(K[7] >> 40) & 0xff] ^
C3[(int)(K[6] >> 32) & 0xff] ^
C4[(int)(K[5] >> 24) & 0xff] ^
C5[(int)(K[4] >> 16) & 0xff] ^
C6[(int)(K[3] >> 8) & 0xff] ^
C7[(int)(K[2] ) & 0xff];
L[2] =
C0[(int)(K[2] >> 56) ] ^
C1[(int)(K[1] >> 48) & 0xff] ^
C2[(int)(K[0] >> 40) & 0xff] ^
C3[(int)(K[7] >> 32) & 0xff] ^
C4[(int)(K[6] >> 24) & 0xff] ^
C5[(int)(K[5] >> 16) & 0xff] ^
C6[(int)(K[4] >> 8) & 0xff] ^
C7[(int)(K[3] ) & 0xff];
L[3] =
C0[(int)(K[3] >> 56) ] ^
C1[(int)(K[2] >> 48) & 0xff] ^
C2[(int)(K[1] >> 40) & 0xff] ^
C3[(int)(K[0] >> 32) & 0xff] ^
C4[(int)(K[7] >> 24) & 0xff] ^
C5[(int)(K[6] >> 16) & 0xff] ^
C6[(int)(K[5] >> 16) & 0xff] ^
C7[(int)(K[4] >> 16) & 0xff] ^
L[4] =
C0[(int)(K[4] >> 56) ] ^
C1[(int)(K[3] >> 48) & 0xff] ^
C2[(int)(K[2] >> 40) & 0xff] ^
C3[(int)(K[1] >> 32) & 0xff] ^
C4[(int)(K[0] >> 24) & 0xff] ^
C5[(int)(K[7] >> 16) & 0xff] ^
C6[(int)(K[6] >> 8) & 0xff] ^
C7[(int)(K[5] ) & 0xff];
L[5] =
C0[(int)(K[5] >> 56) ] ^
C1[(int)(K[4] >> 48) & 0xff] ^
C2[(int)(K[3] >> 40) & 0xff] ^
C3[(int)(K[2] >> 32) & 0xff] ^
C4[(int)(K[1] >> 24) & 0xff] ^
C5[(int)(K[0] >> 16) & 0xff] ^
C6[(int)(K[7] >> 8) & 0xff] ^
C7[(int)(K[6] ) & 0xff];
L[6] =
C0[(int)(K[6] >> 56) ] ^
C1[(int)(K[5] >> 48) & 0xff] ^
C2[(int)(K[4] >> 40) & 0xff] ^
C3[(int)(K[3] >> 32) & 0xff] ^
C4[(int)(K[2] >> 24) & 0xff] ^
C5[(int)(K[1] >> 16) & 0xff] ^
C6[(int)(K[0] >> 8) & 0xff] ^
C7[(int)(K[7] ) & 0xff];
L[7] =
C0[(int)(K[7] >> 56) ] ^
C1[(int)(K[6] >> 48) & 0xff] ^
C2[(int)(K[5] >> 40) & 0xff] ^
C3[(int)(K[4] >> 32) & 0xff] ^
C4[(int)(K[3] >> 24) & 0xff] ^
C5[(int)(K[2] >> 16) & 0xff] ^
C6[(int)(K[1] >> 8) & 0xff] ^
C7[(int)(K[0] ) & 0xff];
K[0] = L[0];
K[1] = L[1];
K[2] = L[2];
K[3] = L[3];
K[4] = L[4];
K[5] = L[5];
K[6] = L[6];
K[7] = L[7];
/*
* apply the r-th round transformation:
*/
L[0] =
C0[(int)(state[0] >> 56) ] ^
C1[(int)(state[7] >> 48) & 0xff] ^
C2[(int)(state[6] >> 40) & 0xff] ^
C3[(int)(state[5] >> 32) & 0xff] ^
C4[(int)(state[4] >> 24) & 0xff] ^
C5[(int)(state[3] >> 16) & 0xff] ^
C6[(int)(state[2] >> 8) & 0xff] ^
C7[(int)(state[1] ) & 0xff] ^
K[0];
L[1] =
C0[(int)(state[1] >> 56) ] ^
C1[(int)(state[0] >> 48) & 0xff] ^
C2[(int)(state[7] >> 40) & 0xff] ^
C3[(int)(state[6] >> 32) & 0xff] ^
C4[(int)(state[5] >> 24) & 0xff] ^
C5[(int)(state[4] >> 16) & 0xff] ^
C6[(int)(state[3] >> 8) & 0xff] ^
C7[(int)(state[2] ) & 0xff] ^
K[1];
L[2] =
C0[(int)(state[2] >> 56) ] ^
C1[(int)(state[1] >> 48) & 0xff] ^
C2[(int)(state[0] >> 40) & 0xff] ^
C3[(int)(state[7] >> 32) & 0xff] ^
C4[(int)(state[6] >> 24) & 0xff] ^
C5[(int)(state[5] >> 16) & 0xff] ^
C6[(int)(state[4] >> 8) & 0xff] ^
C7[(int)(state[3] ) & 0xff] ^
K[2];
L[3] =
C0[(int)(state[3] >> 56) ] ^
C1[(int)(state[2] >> 48) & 0xff] ^
C2[(int)(state[1] >> 40) & 0xff] ^
C3[(int)(state[0] >> 32) & 0xff] ^
C4[(int)(state[7] >> 24) & 0xff] ^
C5[(int)(state[6] >> 16) & 0xff] ^
C6[(int)(state[5] >> 8) & 0xff] ^
C7[(int)(state[4] ) & 0xff] ^
K[3];
L[4] =
C0[(int)(state[4] >> 56) ] ^
C1[(int)(state[3] >> 48) & 0xff] ^
C2[(int)(state[2] >> 40) & 0xff] ^
C3[(int)(state[1] >> 32) & 0xff] ^
C4[(int)(state[0] >> 24) & 0xff] ^
C5[(int)(state[7] >> 16) & 0xff] ^
C6[(int)(state[6] >> 8) & 0xff] ^
C7[(int)(state[5] ) & 0xff] ^
K[4];
L[5] =
C0[(int)(state[5] >> 56) ] ^
C1[(int)(state[4] >> 48) & 0xff] ^
C2[(int)(state[3] >> 40) & 0xff] ^
C3[(int)(state[2] >> 32) & 0xff] ^
C4[(int)(state[1] >> 24) & 0xff] ^
C5[(int)(state[0] >> 16) & 0xff] ^
C6[(int)(state[7] >> 8) & 0xff] ^
C7[(int)(state[6] ) & 0xff] ^
K[5];
L[6] =
C0[(int)(state[6] >> 56) ] ^
C1[(int)(state[5] >> 48) & 0xff] ^
C2[(int)(state[4] >> 40) & 0xff] ^
C3[(int)(state[3] >> 32) & 0xff] ^
C4[(int)(state[2] >> 24) & 0xff] ^
C5[(int)(state[1] >> 16) & 0xff] ^
C6[(int)(state[0] >> 8) & 0xff] ^
C7[(int)(state[7] ) & 0xff] ^
K[6];
L[7] =
C0[(int)(state[7] >> 56) ] ^
C1[(int)(state[6] >> 48) & 0xff] ^
C2[(int)(state[5] >> 40) & 0xff] ^
C3[(int)(state[4] >> 32) & 0xff] ^
C4[(int)(state[3] >> 24) & 0xff] ^
C5[(int)(state[2] >> 16) & 0xff] ^
C6[(int)(state[1] >> 8) & 0xff] ^
C7[(int)(state[0] ) & 0xff] ^
K[7];
state[0] = L[0];
state[1] = L[1];
state[2] = L[2];
state[3] = L[3];
state[4] = L[4];
state[5] = L[5];
state[6] = L[6];
state[7] = L[7];
#ifdef TRACE_INTERMEDIATE_VALUES
printf("i = %d:\n", r);
for (i = 0; i < DIGESTBYTES/8; i++) {
printf(
" %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X\n",
(u8)(K[i] >> 56),
(u8)(K[i] >> 48),
(u8)(K[i] >> 40),
(u8)(K[i] >> 32),
(u8)(K[i] >> 24),
(u8)(K[i] >> 16),
(u8)(K[i] >> 8),
(u8)(K[i] ),
(u8)(state[i] >> 56),
(u8)(state[i] >> 48),
(u8)(state[i] >> 40),
(u8)(state[i] >> 32),
(u8)(state[i] >> 24),
(u8)(state[i] >> 16),
(u8)(state[i] >> 8),
(u8)(state[i] ));
}
printf("\n");
#endif /* ?TRACE_INTERMEDIATE_VALUES */
}
/*
* apply the Miyaguchi-Preneel compression function:
*/
structpointer->hash[0] ^= state[0] ^ block[0];
structpointer->hash[1] ^= state[1] ^ block[1];
structpointer->hash[2] ^= state[2] ^ block[2];
structpointer->hash[3] ^= state[3] ^ block[3];
structpointer->hash[4] ^= state[4] ^ block[4];
structpointer->hash[5] ^= state[5] ^ block[5];
structpointer->hash[6] ^= state[6] ^ block[6];
structpointer->hash[7] ^= state[7] ^ block[7];
#ifdef TRACE_INTERMEDIATE_VALUES
//printf("Intermediate hash value (after Miyaguchi-Preneel):\n");
printf("The value of Y' output from the round-function is as follows.\n");
for (i = 0; i < DIGESTBYTES/8; i++) {
printf(" %02X %02X %02X %02X %02X %02X %02X %02X\n",
(u8)(structpointer->hash[i] >> 56),
(u8)(structpointer->hash[i] >> 48),
(u8)(structpointer->hash[i] >> 40),
(u8)(structpointer->hash[i] >> 32),
(u8)(structpointer->hash[i] >> 24),
(u8)(structpointer->hash[i] >> 16),
(u8)(structpointer->hash[i] >> 8),
(u8)(structpointer->hash[i] ));
}
printf("\n");
#endif /* ?TRACE_INTERMEDIATE_VALUES */
}
Commentaires