top of page

STAGE 3: HOT FUNCTION TO OPTIMIZE (closer look)

Writer: jadach1jadach1

This is the hot function inside the project I am trying to optimize.


PACKAGE NAME: libwhirlpool


It is taking different arrays, bitshifting and XORing them to create a hash.


Notes:


The bitshifting is happening in big endian, so a conversion into little endian might provide different results when it comes to speed.


****************** FUNCTION BELOW ******************


static void processBuffer(struct NESSIEstruct * const structpointer) {

int i, r;

u64 K[8]; /* the round key */

u64 block[8]; /* mu(buffer) */

u64 state[8]; /* the cipher state */

u64 L[8];

u8 *buffer = structpointer->buffer;


#ifdef TRACE_INTERMEDIATE_VALUES

printf("The 8x8 matrix Z' derived from the data-string is as follows.\n");

for (i = 0; i < WBLOCKBYTES/8; i++) {

printf(" %02X %02X %02X %02X %02X %02X %02X %02X\n",

buffer[0], buffer[1], buffer[2], buffer[3],

buffer[4], buffer[5], buffer[6], buffer[7]);

buffer += 8;

}

printf("\n");

buffer = structpointer->buffer;

#endif /* ?TRACE_INTERMEDIATE_VALUES */


/*

* map the buffer to a block:

*/

for (i = 0; i < 8; i++, buffer += 8) {

block[i] =

(((u64)buffer[0] ) << 56) ^

(((u64)buffer[1] & 0xffL) << 48) ^

(((u64)buffer[2] & 0xffL) << 40) ^

(((u64)buffer[3] & 0xffL) << 32) ^

(((u64)buffer[4] & 0xffL) << 24) ^

(((u64)buffer[5] & 0xffL) << 16) ^

(((u64)buffer[6] & 0xffL) << 8) ^

(((u64)buffer[7] & 0xffL) );

}

/*

* compute and apply K^0 to the cipher state:

*/

state[0] = block[0] ^ (K[0] = structpointer->hash[0]);

state[2] = block[2] ^ (K[2] = structpointer->hash[2]);

state[3] = block[3] ^ (K[3] = structpointer->hash[3]);

state[4] = block[4] ^ (K[4] = structpointer->hash[4]);

state[5] = block[5] ^ (K[5] = structpointer->hash[5]);

state[6] = block[6] ^ (K[6] = structpointer->hash[6]);

state[7] = block[7] ^ (K[7] = structpointer->hash[7]);

#ifdef TRACE_INTERMEDIATE_VALUES

printf("The K_0 matrix (from the initialization value IV) and X'' matrix are as follows.\n");

for (i = 0; i < DIGESTBYTES/8; i++) {

printf(

" %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X\n",

(u8)(K[i] >> 56),

(u8)(K[i] >> 48),

(u8)(K[i] >> 40),

(u8)(K[i] >> 32),

(u8)(K[i] >> 24),

(u8)(K[i] >> 16),

(u8)(K[i] >> 8),

(u8)(K[i] ),


(u8)(state[i] >> 56),

(u8)(state[i] >> 48),

(u8)(state[i] >> 40),

(u8)(state[i] >> 32),

(u8)(state[i] >> 24),

(u8)(state[i] >> 16),

(u8)(state[i] >> 8),

(u8)(state[i] ));

}

printf("\n");

printf("The following are (hexadecimal representations of) the successive values of the variables K_i for i = 1 to 10 and W'.\n");

printf("\n");

#endif /* ?TRACE_INTERMEDIATE_VALUES */

/*

* iterate over all rounds:

*/

for (r = 1; r <= R; r++) {

/*

* compute K^r from K^{r-1}:

*/

L[0] =

C0[(int)(K[0] >> 56) ] ^

C1[(int)(K[7] >> 48) & 0xff] ^

C2[(int)(K[6] >> 40) & 0xff] ^

C3[(int)(K[5] >> 32) & 0xff] ^

C4[(int)(K[4] >> 24) & 0xff] ^

C5[(int)(K[3] >> 16) & 0xff] ^

C6[(int)(K[2] >> 8) & 0xff] ^

C7[(int)(K[1] ) & 0xff] ^

rc[r];

L[1] =

C0[(int)(K[1] >> 56) ] ^

C1[(int)(K[0] >> 48) & 0xff] ^

C2[(int)(K[7] >> 40) & 0xff] ^

C3[(int)(K[6] >> 32) & 0xff] ^

C4[(int)(K[5] >> 24) & 0xff] ^

C5[(int)(K[4] >> 16) & 0xff] ^

C6[(int)(K[3] >> 8) & 0xff] ^

C7[(int)(K[2] ) & 0xff];

L[2] =

C0[(int)(K[2] >> 56) ] ^

C1[(int)(K[1] >> 48) & 0xff] ^

C2[(int)(K[0] >> 40) & 0xff] ^

C3[(int)(K[7] >> 32) & 0xff] ^

C4[(int)(K[6] >> 24) & 0xff] ^

C5[(int)(K[5] >> 16) & 0xff] ^

C6[(int)(K[4] >> 8) & 0xff] ^

C7[(int)(K[3] ) & 0xff];

L[3] =

C0[(int)(K[3] >> 56) ] ^

C1[(int)(K[2] >> 48) & 0xff] ^

C2[(int)(K[1] >> 40) & 0xff] ^

C3[(int)(K[0] >> 32) & 0xff] ^

C4[(int)(K[7] >> 24) & 0xff] ^

C5[(int)(K[6] >> 16) & 0xff] ^

C6[(int)(K[5] >> 16) & 0xff] ^

C7[(int)(K[4] >> 16) & 0xff] ^

L[4] =

C0[(int)(K[4] >> 56) ] ^

C1[(int)(K[3] >> 48) & 0xff] ^

C2[(int)(K[2] >> 40) & 0xff] ^

C3[(int)(K[1] >> 32) & 0xff] ^

C4[(int)(K[0] >> 24) & 0xff] ^

C5[(int)(K[7] >> 16) & 0xff] ^

C6[(int)(K[6] >> 8) & 0xff] ^

C7[(int)(K[5] ) & 0xff];

L[5] =

C0[(int)(K[5] >> 56) ] ^

C1[(int)(K[4] >> 48) & 0xff] ^

C2[(int)(K[3] >> 40) & 0xff] ^

C3[(int)(K[2] >> 32) & 0xff] ^

C4[(int)(K[1] >> 24) & 0xff] ^

C5[(int)(K[0] >> 16) & 0xff] ^

C6[(int)(K[7] >> 8) & 0xff] ^

C7[(int)(K[6] ) & 0xff];

L[6] =

C0[(int)(K[6] >> 56) ] ^

C1[(int)(K[5] >> 48) & 0xff] ^

C2[(int)(K[4] >> 40) & 0xff] ^

C3[(int)(K[3] >> 32) & 0xff] ^

C4[(int)(K[2] >> 24) & 0xff] ^

C5[(int)(K[1] >> 16) & 0xff] ^

C6[(int)(K[0] >> 8) & 0xff] ^

C7[(int)(K[7] ) & 0xff];

L[7] =

C0[(int)(K[7] >> 56) ] ^

C1[(int)(K[6] >> 48) & 0xff] ^

C2[(int)(K[5] >> 40) & 0xff] ^

C3[(int)(K[4] >> 32) & 0xff] ^

C4[(int)(K[3] >> 24) & 0xff] ^

C5[(int)(K[2] >> 16) & 0xff] ^

C6[(int)(K[1] >> 8) & 0xff] ^

C7[(int)(K[0] ) & 0xff];

K[0] = L[0];

K[1] = L[1];

K[2] = L[2];

K[3] = L[3];

K[4] = L[4];

K[5] = L[5];

K[6] = L[6];

K[7] = L[7];

/*

* apply the r-th round transformation:

*/

L[0] =

C0[(int)(state[0] >> 56) ] ^

C1[(int)(state[7] >> 48) & 0xff] ^

C2[(int)(state[6] >> 40) & 0xff] ^

C3[(int)(state[5] >> 32) & 0xff] ^

C4[(int)(state[4] >> 24) & 0xff] ^

C5[(int)(state[3] >> 16) & 0xff] ^

C6[(int)(state[2] >> 8) & 0xff] ^

C7[(int)(state[1] ) & 0xff] ^

K[0];

L[1] =

C0[(int)(state[1] >> 56) ] ^

C1[(int)(state[0] >> 48) & 0xff] ^

C2[(int)(state[7] >> 40) & 0xff] ^

C3[(int)(state[6] >> 32) & 0xff] ^

C4[(int)(state[5] >> 24) & 0xff] ^

C5[(int)(state[4] >> 16) & 0xff] ^

C6[(int)(state[3] >> 8) & 0xff] ^

C7[(int)(state[2] ) & 0xff] ^

K[1];

L[2] =

C0[(int)(state[2] >> 56) ] ^

C1[(int)(state[1] >> 48) & 0xff] ^

C2[(int)(state[0] >> 40) & 0xff] ^

C3[(int)(state[7] >> 32) & 0xff] ^

C4[(int)(state[6] >> 24) & 0xff] ^

C5[(int)(state[5] >> 16) & 0xff] ^

C6[(int)(state[4] >> 8) & 0xff] ^

C7[(int)(state[3] ) & 0xff] ^

K[2];

L[3] =

C0[(int)(state[3] >> 56) ] ^

C1[(int)(state[2] >> 48) & 0xff] ^

C2[(int)(state[1] >> 40) & 0xff] ^

C3[(int)(state[0] >> 32) & 0xff] ^

C4[(int)(state[7] >> 24) & 0xff] ^

C5[(int)(state[6] >> 16) & 0xff] ^

C6[(int)(state[5] >> 8) & 0xff] ^

C7[(int)(state[4] ) & 0xff] ^

K[3];

L[4] =

C0[(int)(state[4] >> 56) ] ^

C1[(int)(state[3] >> 48) & 0xff] ^

C2[(int)(state[2] >> 40) & 0xff] ^

C3[(int)(state[1] >> 32) & 0xff] ^

C4[(int)(state[0] >> 24) & 0xff] ^

C5[(int)(state[7] >> 16) & 0xff] ^

C6[(int)(state[6] >> 8) & 0xff] ^

C7[(int)(state[5] ) & 0xff] ^

K[4];

L[5] =

C0[(int)(state[5] >> 56) ] ^

C1[(int)(state[4] >> 48) & 0xff] ^

C2[(int)(state[3] >> 40) & 0xff] ^

C3[(int)(state[2] >> 32) & 0xff] ^

C4[(int)(state[1] >> 24) & 0xff] ^

C5[(int)(state[0] >> 16) & 0xff] ^

C6[(int)(state[7] >> 8) & 0xff] ^

C7[(int)(state[6] ) & 0xff] ^

K[5];

L[6] =

C0[(int)(state[6] >> 56) ] ^

C1[(int)(state[5] >> 48) & 0xff] ^

C2[(int)(state[4] >> 40) & 0xff] ^

C3[(int)(state[3] >> 32) & 0xff] ^

C4[(int)(state[2] >> 24) & 0xff] ^

C5[(int)(state[1] >> 16) & 0xff] ^

C6[(int)(state[0] >> 8) & 0xff] ^

C7[(int)(state[7] ) & 0xff] ^

K[6];

L[7] =

C0[(int)(state[7] >> 56) ] ^

C1[(int)(state[6] >> 48) & 0xff] ^

C2[(int)(state[5] >> 40) & 0xff] ^

C3[(int)(state[4] >> 32) & 0xff] ^

C4[(int)(state[3] >> 24) & 0xff] ^

C5[(int)(state[2] >> 16) & 0xff] ^

C6[(int)(state[1] >> 8) & 0xff] ^

C7[(int)(state[0] ) & 0xff] ^

K[7];

state[0] = L[0];

state[1] = L[1];

state[2] = L[2];

state[3] = L[3];

state[4] = L[4];

state[5] = L[5];

state[6] = L[6];

state[7] = L[7];

#ifdef TRACE_INTERMEDIATE_VALUES

printf("i = %d:\n", r);

for (i = 0; i < DIGESTBYTES/8; i++) {

printf(

" %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X\n",

(u8)(K[i] >> 56),

(u8)(K[i] >> 48),

(u8)(K[i] >> 40),

(u8)(K[i] >> 32),

(u8)(K[i] >> 24),

(u8)(K[i] >> 16),

(u8)(K[i] >> 8),

(u8)(K[i] ),


(u8)(state[i] >> 56),

(u8)(state[i] >> 48),

(u8)(state[i] >> 40),

(u8)(state[i] >> 32),

(u8)(state[i] >> 24),

(u8)(state[i] >> 16),

(u8)(state[i] >> 8),

(u8)(state[i] ));

}

printf("\n");

#endif /* ?TRACE_INTERMEDIATE_VALUES */

}

/*

* apply the Miyaguchi-Preneel compression function:

*/

structpointer->hash[0] ^= state[0] ^ block[0];

structpointer->hash[1] ^= state[1] ^ block[1];

structpointer->hash[2] ^= state[2] ^ block[2];

structpointer->hash[3] ^= state[3] ^ block[3];

structpointer->hash[4] ^= state[4] ^ block[4];

structpointer->hash[5] ^= state[5] ^ block[5];

structpointer->hash[6] ^= state[6] ^ block[6];

structpointer->hash[7] ^= state[7] ^ block[7];

#ifdef TRACE_INTERMEDIATE_VALUES

//printf("Intermediate hash value (after Miyaguchi-Preneel):\n");

printf("The value of Y' output from the round-function is as follows.\n");

for (i = 0; i < DIGESTBYTES/8; i++) {

printf(" %02X %02X %02X %02X %02X %02X %02X %02X\n",

(u8)(structpointer->hash[i] >> 56),

(u8)(structpointer->hash[i] >> 48),

(u8)(structpointer->hash[i] >> 40),

(u8)(structpointer->hash[i] >> 32),

(u8)(structpointer->hash[i] >> 24),

(u8)(structpointer->hash[i] >> 16),

(u8)(structpointer->hash[i] >> 8),

(u8)(structpointer->hash[i] ));

}

printf("\n");

#endif /* ?TRACE_INTERMEDIATE_VALUES */

}


 
 
 

Recent Posts

See All

Closing Thoughts

For my final blog post I would like to discuss what I have learned and plan to utilize in the future from this course. So although I was...

Commentaires


bottom of page