From 2f98c4258bada4b7de8dc4401dded43ade7f1c60 Mon Sep 17 00:00:00 2001 From: Manuel Novoa III Date: Fri, 29 Jun 2001 20:32:47 +0000 Subject: Fix a missing include in des.c and add configurability of loop unrolling to md5.c. Note: by default it is smallest/slowest. --- libcrypt/des.c | 1 + libcrypt/md5.c | 275 +++++++++++++++++++++++++++++++++++++++++++++------------ 2 files changed, 220 insertions(+), 56 deletions(-) (limited to 'libcrypt') diff --git a/libcrypt/des.c b/libcrypt/des.c index 93803d2e0..04b1e7f2f 100644 --- a/libcrypt/des.c +++ b/libcrypt/des.c @@ -43,6 +43,7 @@ * a no-password login. */ +#include #include extern char * md5_crypt_r( const char *pw, const char *salt, struct crypt_data * data); diff --git a/libcrypt/md5.c b/libcrypt/md5.c index 66ff0d119..c9d65d21a 100644 --- a/libcrypt/md5.c +++ b/libcrypt/md5.c @@ -46,11 +46,28 @@ * "Un-inlined" code using loops and static const tables in order to * reduce generated code size (on i386 from approx 4k to approx 2.5k). * - * WARNING!!! Changed PADDING array from a staticly allocated object to - * a dynamicly generated one. Although it was declared static - * and not static const, it doesn't appear that it ever changes. + * June 29, 2001 Manuel Novoa III + * + * Completely removed static PADDING array. + * + * Reintroduced the loop unrolling in MD5_Transform and added the + * MD5_SIZE_OVER_SPEED option for configurability. Define below as: + * 0 fully unrolled loops + * 1 partially unrolled (4 ops per loop) + * 2 no unrolling -- introduces the need to swap 4 variables (slow) + * 3 no unrolling and all 4 loops merged into one with switch + * in each loop (glacial) + * On i386, sizes are roughly (-Os -fno-builtin): + * 0: 3k 1: 2.5k 2: 2.2k 3: 2k */ +/* + * Valid values are 1 (fastest/largest) to 3 (smallest/slowest). + */ +#define MD5_SIZE_OVER_SPEED 3 + +/**********************************************************************/ + #include #include #include @@ -224,28 +241,14 @@ void MD5Update ( MD5_CTX *context, const unsigned char *input, unsigned int inpu * MD5 padding. Adds padding followed by original length. */ -#define STATIC_PADDING 0 -#if STATIC_PADDING -static unsigned char PADDING[64] = { - 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; -#endif - void MD5Pad ( MD5_CTX *context) { unsigned char bits[8]; unsigned int index, padLen; - -#if !STATIC_PADDING unsigned char PADDING[64]; - for (index = 0 ; index < sizeof(PADDING) ; index++) { - PADDING[index] = 0; - } + memset(PADDING, 0, sizeof(PADDING)); PADDING[0] = 0x80; -#endif /* Save number of bits */ Encode (bits, context->count, 8); @@ -283,76 +286,236 @@ MD5Transform (state, block) u_int32_t state[4]; const unsigned char block[64]; { - u_int32_t a, b, c, d, x[16], temp; + u_int32_t a, b, c, d, x[16]; + +#if MD5_SIZE_OVER_SPEED > 1 + u_int32_t temp; + const char *ps; + + static const char S[] = { + 7, 12, 17, 22, + 5, 9, 14, 20, + 4, 11, 16, 23, + 6, 10, 15, 21 + }; +#endif /* MD5_SIZE_OVER_SPEED > 1 */ + +#if MD5_SIZE_OVER_SPEED > 0 + const u_int32_t *pc; + const char *pp; int i; - static const int S1[] = { 7, 12, 17, 22 }; - static const u_int32_t C1[] = { + static const u_int32_t C[] = { + /* round 1 */ 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501, 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be, - 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821 - }; - - static const int p2[] = { - 1, 6, 11, 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12 - }; - static const int S2[] = { 5, 9, 14, 20 }; - static const u_int32_t C2[] = { + 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821, + /* round 2 */ 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, 0xd62f105d, 0x2441453, 0xd8a1e681, 0xe7d3fbc8, 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, - 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a - }; - - static const int p3[] = { - 5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2 - }; - static const int S3[] = { 4, 11, 16, 23 }; - static const u_int32_t C3[] = { + 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a, + /* round 3 */ 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c, 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70, 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x4881d05, - 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665 - }; - - static const int p4[] = { - 0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9 - }; - static const int S4[] = { 6, 10, 15, 21 }; - static const u_int32_t C4[] = { + 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665, + /* round 4 */ 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1, 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391 }; + static const char P[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, /* 1 */ + 1, 6, 11, 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, /* 2 */ + 5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2, /* 3 */ + 0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9 /* 4 */ + }; + +#endif /* MD5_SIZE_OVER_SPEED > 0 */ + Decode (x, block, 64); a = state[0]; b = state[1]; c = state[2]; d = state[3]; +#if MD5_SIZE_OVER_SPEED > 2 + pc = C; pp = P; ps = S - 4; + + for ( i = 0 ; i < 64 ; i++ ) { + if ((i&0x0f) == 0) ps += 4; + temp = a; + switch (i>>4) { + case 0: + temp += F(b,c,d); + break; + case 1: + temp += G(b,c,d); + break; + case 2: + temp += H(b,c,d); + break; + case 3: + temp += I(b,c,d); + break; + } + temp += x[(int)(*pp++)] + *pc++; + temp = ROTATE_LEFT(temp, ps[i&3]); + temp += b; + a = d; d = c; c = b; b = temp; + } +#elif MD5_SIZE_OVER_SPEED > 1 + pc = C; pp = P; ps = S; + /* Round 1 */ for ( i = 0 ; i < 16 ; i++ ) { - FF (a, b, c, d, x[i], S1[i&3], C1[i]); - temp = d; d= c; c = b; b = a; a = temp; + FF (a, b, c, d, x[(int)(*pp++)], ps[i&0x3], *pc++); + temp = d; d = c; c = b; b = a; a = temp; } /* Round 2 */ - for ( i = 0 ; i < 16 ; i++ ) { - GG (a, b, c, d, x[p2[i]], S2[i&3], C2[i]); - temp = d; d= c; c = b; b = a; a = temp; + ps += 4; + for ( ; i < 32 ; i++ ) { + GG (a, b, c, d, x[(int)(*pp++)], ps[i&0x3], *pc++); + temp = d; d = c; c = b; b = a; a = temp; } /* Round 3 */ - for ( i = 0 ; i < 16 ; i++ ) { - HH (a, b, c, d, x[p3[i]], S3[i&3], C3[i]); - temp = d; d= c; c = b; b = a; a = temp; + ps += 4; + for ( ; i < 48 ; i++ ) { + HH (a, b, c, d, x[(int)(*pp++)], ps[i&0x3], *pc++); + temp = d; d = c; c = b; b = a; a = temp; } /* Round 4 */ - for ( i = 0 ; i < 16 ; i++ ) { - II (a, b, c, d, x[p4[i]], S4[i&3], C4[i]); - temp = d; d= c; c = b; b = a; a = temp; + ps += 4; + for ( ; i < 64 ; i++ ) { + II (a, b, c, d, x[(int)(*pp++)], ps[i&0x3], *pc++); + temp = d; d = c; c = b; b = a; a = temp; } +#elif MD5_SIZE_OVER_SPEED > 0 + pc = C; pp = P; + + /* Round 1 */ + for ( i = 0 ; i < 4 ; i++ ) { + FF (a, b, c, d, x[(int)(*pp++)], 7, *pc++); + FF (d, a, b, c, x[(int)(*pp++)], 12, *pc++); + FF (c, d, a, b, x[(int)(*pp++)], 17, *pc++); + FF (b, c, d, a, x[(int)(*pp++)], 22, *pc++); + } + + /* Round 2 */ + for ( i = 0 ; i < 4 ; i++ ) { + GG (a, b, c, d, x[(int)(*pp++)], 5, *pc++); + GG (d, a, b, c, x[(int)(*pp++)], 9, *pc++); + GG (c, d, a, b, x[(int)(*pp++)], 14, *pc++); + GG (b, c, d, a, x[(int)(*pp++)], 20, *pc++); + } + /* Round 3 */ + for ( i = 0 ; i < 4 ; i++ ) { + HH (a, b, c, d, x[(int)(*pp++)], 4, *pc++); + HH (d, a, b, c, x[(int)(*pp++)], 11, *pc++); + HH (c, d, a, b, x[(int)(*pp++)], 16, *pc++); + HH (b, c, d, a, x[(int)(*pp++)], 23, *pc++); + } + + /* Round 4 */ + for ( i = 0 ; i < 4 ; i++ ) { + II (a, b, c, d, x[(int)(*pp++)], 6, *pc++); + II (d, a, b, c, x[(int)(*pp++)], 10, *pc++); + II (c, d, a, b, x[(int)(*pp++)], 15, *pc++); + II (b, c, d, a, x[(int)(*pp++)], 21, *pc++); + } +#else + /* Round 1 */ +#define S11 7 +#define S12 12 +#define S13 17 +#define S14 22 + FF (a, b, c, d, x[ 0], S11, 0xd76aa478); /* 1 */ + FF (d, a, b, c, x[ 1], S12, 0xe8c7b756); /* 2 */ + FF (c, d, a, b, x[ 2], S13, 0x242070db); /* 3 */ + FF (b, c, d, a, x[ 3], S14, 0xc1bdceee); /* 4 */ + FF (a, b, c, d, x[ 4], S11, 0xf57c0faf); /* 5 */ + FF (d, a, b, c, x[ 5], S12, 0x4787c62a); /* 6 */ + FF (c, d, a, b, x[ 6], S13, 0xa8304613); /* 7 */ + FF (b, c, d, a, x[ 7], S14, 0xfd469501); /* 8 */ + FF (a, b, c, d, x[ 8], S11, 0x698098d8); /* 9 */ + FF (d, a, b, c, x[ 9], S12, 0x8b44f7af); /* 10 */ + FF (c, d, a, b, x[10], S13, 0xffff5bb1); /* 11 */ + FF (b, c, d, a, x[11], S14, 0x895cd7be); /* 12 */ + FF (a, b, c, d, x[12], S11, 0x6b901122); /* 13 */ + FF (d, a, b, c, x[13], S12, 0xfd987193); /* 14 */ + FF (c, d, a, b, x[14], S13, 0xa679438e); /* 15 */ + FF (b, c, d, a, x[15], S14, 0x49b40821); /* 16 */ + + /* Round 2 */ +#define S21 5 +#define S22 9 +#define S23 14 +#define S24 20 + GG (a, b, c, d, x[ 1], S21, 0xf61e2562); /* 17 */ + GG (d, a, b, c, x[ 6], S22, 0xc040b340); /* 18 */ + GG (c, d, a, b, x[11], S23, 0x265e5a51); /* 19 */ + GG (b, c, d, a, x[ 0], S24, 0xe9b6c7aa); /* 20 */ + GG (a, b, c, d, x[ 5], S21, 0xd62f105d); /* 21 */ + GG (d, a, b, c, x[10], S22, 0x2441453); /* 22 */ + GG (c, d, a, b, x[15], S23, 0xd8a1e681); /* 23 */ + GG (b, c, d, a, x[ 4], S24, 0xe7d3fbc8); /* 24 */ + GG (a, b, c, d, x[ 9], S21, 0x21e1cde6); /* 25 */ + GG (d, a, b, c, x[14], S22, 0xc33707d6); /* 26 */ + GG (c, d, a, b, x[ 3], S23, 0xf4d50d87); /* 27 */ + GG (b, c, d, a, x[ 8], S24, 0x455a14ed); /* 28 */ + GG (a, b, c, d, x[13], S21, 0xa9e3e905); /* 29 */ + GG (d, a, b, c, x[ 2], S22, 0xfcefa3f8); /* 30 */ + GG (c, d, a, b, x[ 7], S23, 0x676f02d9); /* 31 */ + GG (b, c, d, a, x[12], S24, 0x8d2a4c8a); /* 32 */ + + /* Round 3 */ +#define S31 4 +#define S32 11 +#define S33 16 +#define S34 23 + HH (a, b, c, d, x[ 5], S31, 0xfffa3942); /* 33 */ + HH (d, a, b, c, x[ 8], S32, 0x8771f681); /* 34 */ + HH (c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */ + HH (b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */ + HH (a, b, c, d, x[ 1], S31, 0xa4beea44); /* 37 */ + HH (d, a, b, c, x[ 4], S32, 0x4bdecfa9); /* 38 */ + HH (c, d, a, b, x[ 7], S33, 0xf6bb4b60); /* 39 */ + HH (b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */ + HH (a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */ + HH (d, a, b, c, x[ 0], S32, 0xeaa127fa); /* 42 */ + HH (c, d, a, b, x[ 3], S33, 0xd4ef3085); /* 43 */ + HH (b, c, d, a, x[ 6], S34, 0x4881d05); /* 44 */ + HH (a, b, c, d, x[ 9], S31, 0xd9d4d039); /* 45 */ + HH (d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */ + HH (c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */ + HH (b, c, d, a, x[ 2], S34, 0xc4ac5665); /* 48 */ + + /* Round 4 */ +#define S41 6 +#define S42 10 +#define S43 15 +#define S44 21 + II (a, b, c, d, x[ 0], S41, 0xf4292244); /* 49 */ + II (d, a, b, c, x[ 7], S42, 0x432aff97); /* 50 */ + II (c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */ + II (b, c, d, a, x[ 5], S44, 0xfc93a039); /* 52 */ + II (a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */ + II (d, a, b, c, x[ 3], S42, 0x8f0ccc92); /* 54 */ + II (c, d, a, b, x[10], S43, 0xffeff47d); /* 55 */ + II (b, c, d, a, x[ 1], S44, 0x85845dd1); /* 56 */ + II (a, b, c, d, x[ 8], S41, 0x6fa87e4f); /* 57 */ + II (d, a, b, c, x[15], S42, 0xfe2ce6e0); /* 58 */ + II (c, d, a, b, x[ 6], S43, 0xa3014314); /* 59 */ + II (b, c, d, a, x[13], S44, 0x4e0811a1); /* 60 */ + II (a, b, c, d, x[ 4], S41, 0xf7537e82); /* 61 */ + II (d, a, b, c, x[11], S42, 0xbd3af235); /* 62 */ + II (c, d, a, b, x[ 2], S43, 0x2ad7d2bb); /* 63 */ + II (b, c, d, a, x[ 9], S44, 0xeb86d391); /* 64 */ +#endif state[0] += a; state[1] += b; -- cgit v1.2.3