最近陆续造了一批哈希算法的轮子,包括MD家族(包括MD2/MD4/MD5), SHA1, SHA2家族(SHA256/SHA384/SHA512),SHA3家族以及国密SM3算法。
原来打算将每一个算法都详细分析并实现,现在看来,这个工作短时间可能无法完成,所以先将源码发上来。
这部分实现的源码完全参考官方文档的算法描述,连变量名也尽可能和官方文档中的变量保持一致,方便学习。
另外, 代码封装的MD5哈希调用接口参考了openssl官方的接口,完全兼容,无缝对接。会使用这里的接口,就会使用openssl的库函数接口,甚至连代码都不需要修改。
除了实现的源码外,还另外附带了一个测试例子,这个测试例子不仅仅是用于测试哈希算法的实现是否正确,还可以提供了"-f"/"-s"等选项用于对任意文件和字符串进行哈希,因此作为一个工具使用,类似系统内置的md5sum/sha1sum。
/*
* @ file: md5.h
* @ description: header file for md5.c
* @ author: Gu Yongqiang
* @ blog: https:///guyongqiangx
*/
#ifndef __ROCKY_MD5__H
#define __ROCKY_MD5__H
#define ERR_OK 0
#define ERR_ERR -1 /* generic error */
#define ERR_INV_PARAM -2 /* invalid parameter */
#define ERR_TOO_LONG -3 /* too long */
#define ERR_STATE_ERR -4 /* state error */
typedef unsigned char uint8_t;
typedef unsigned short uint16_t;
typedef unsigned int uint32_t;
typedef unsigned long long uint64_t;
typedef struct md5_context {
/* message total length in bytes */
uint64_t total;
/* intermedia hash value for each block */
struct {
uint32_t a;
uint32_t b;
uint32_t c;
uint32_t d;
uint32_t e;
}hash;
/* last block */
struct {
uint32_t used; /* used bytes */
uint8_t buf[64]; /* block data buffer */
}last;
}MD5_CTX;
/* https://www.openssl.org/docs/man1.1.0/man3/MD5_Init.html */
int MD5_Init(MD5_CTX *c);
int MD5_Update(MD5_CTX *c, const void *data, unsigned long len);
int MD5_Final(unsigned char *md, MD5_CTX *c);
unsigned char *MD5(const unsigned char *d, unsigned long n, unsigned char *md);
#endif
/*
* @ file: md5.c
* @ description: implementation for the MD5 Message-Digest Algorithm
* @ author: Gu Yongqiang
* @ blog: https:///guyongqiangx
*/
#include <stdio.h>
#include <string.h>
#include "utils.h"
#include "md5.h"
//#define DEBUG
#ifdef DEBUG
#define DBG(...) printf(__VA_ARGS__)
#define DUMP_BLOCK_DATA 1
#define DUMP_BLOCK_HASH 1
#define DUMP_ROUND_DATA 1
#else
#define DBG(...)
#define DUMP_BLOCK_DATA 0
#define DUMP_BLOCK_HASH 0
#define DUMP_ROUND_DATA 0
#endif
#define MD5_BLOCK_SIZE 64 /* 512 bits = 64 bytes */
#define MD5_LEN_SIZE 8 /* 64 bits = 8 bytes */
#define MD5_LEN_OFFSET (MD5_BLOCK_SIZE - MD5_LEN_SIZE)
#define MD5_DIGEST_SIZE 16 /* 128 bits = 16 bytes */
#define MD5_PADDING_PATTERN 0x80
#define MD5_ROUND_NUM 64
#define HASH_BLOCK_SIZE MD5_BLOCK_SIZE
#define HASH_LEN_SIZE MD5_LEN_SIZE
#define HASH_LEN_OFFSET MD5_LEN_OFFSET
#define HASH_DIGEST_SIZE MD5_DIGEST_SIZE
#define HASH_PADDING_PATTERN MD5_PADDING_PATTERN
#define HASH_ROUND_NUM MD5_ROUND_NUM
typedef uint32_t (*md5_func)(uint32_t x, uint32_t y, uint32_t z);
/* MD5 Constants */
static uint32_t T[64] =
{
/* Round 1 */
0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be, 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
/* Round 2 */
0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
/* Round 3 */
0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c, 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05, 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
/* Round 4 */
0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391,
};
/* ROTate Left (circular left shift) */
static uint32_t ROTL(uint32_t x, uint8_t shift)
{
return (x << shift) | (x >> (32 - shift));
}
static uint32_t F(uint32_t x, uint32_t y, uint32_t z)
{
return (x & y) | ((~x) & z);
}
static uint32_t G(uint32_t x, uint32_t y, uint32_t z)
{
return (x & z) | (y & (~z));
}
static uint32_t H(uint32_t x, uint32_t y, uint32_t z)
{
return x ^ y ^ z;
}
static uint32_t I(uint32_t x, uint32_t y, uint32_t z)
{
return y ^ (x | (~z));;
}
/* MD5 Functions */
static md5_func g[4] =
{
F, G, H, I
};
int MD5_Init(MD5_CTX *c)
{
if (NULL == c)
{
return ERR_INV_PARAM;
}
memset(c, 0, sizeof(MD5_CTX));
c->hash.a = 0x67452301; /* little endian */
c->hash.b = 0xEFCDAB89;
c->hash.c = 0x98BADCFE;
c->hash.d = 0x10325476;
c->total = 0;
c->last.used = 0;
return ERR_OK;
}
static int MD5_PrepareScheduleWord(const uint32_t *block, uint32_t *W)
{
uint32_t i;
if ((NULL == block) || (NULL == W))
{
return ERR_INV_PARAM;
}
for (i=0; i<16; i++)
{
W[i] = le32toh(block[i]);
}
return ERR_OK;
}
#if (DUMP_ROUND_DATA == 1)
#define MD5_OP(a,b,c,d,k,s,i) \
a = b + ROTL(a + (g[(i-1)/16])(b, c, d) + X[k] + T[i-1], s); \
DBG(" %02d: a=0x%08x, b=0x%08x, c=0x%08x, d=0x%08x, X=0x%08x, T=0x%08x\n", i-1, a, b, c, d, X[k], T[i-1]);
#else
#define MD5_OP(a,b,c,d,k,s,i) \
a = b + ROTL(a + (g[(i-1)/16])(b, c, d) + X[k] + T[i-1], s);
#endif
static int MD5_ProcessBlock(MD5_CTX *ctx, const void *block)
{
uint32_t X[16];
uint32_t a, b, c, d;
if ((NULL == ctx) || (NULL == block))
{
return ERR_INV_PARAM;
}
#if (DUMP_BLOCK_DATA == 1)
DBG("---------------------------------------------------------\n");
DBG(" BLOCK: %llu\n", ctx->total/HASH_BLOCK_SIZE);
DBG(" DATA:\n");
print_buffer(block, HASH_BLOCK_SIZE, " ");
#endif
#if (DUMP_BLOCK_HASH == 1)
DBG(" (LE)IV: %08x %08x %08x %08x\n",
ctx->hash.a, ctx->hash.b, ctx->hash.c, ctx->hash.d);
#endif
/* prepare schedule word */
MD5_PrepareScheduleWord(block, X);
a = ctx->hash.a;
b = ctx->hash.b;
c = ctx->hash.c;
d = ctx->hash.d;
/* Round 1 */
MD5_OP(a, b, c, d, 0, 7, 1); MD5_OP(d, a, b, c, 1, 12, 2); MD5_OP(c, d, a, b, 2, 17, 3); MD5_OP(b, c, d, a, 3, 22, 4);
MD5_OP(a, b, c, d, 4, 7, 5); MD5_OP(d, a, b, c, 5, 12, 6); MD5_OP(c, d, a, b, 6, 17, 7); MD5_OP(b, c, d, a, 7, 22, 8);
MD5_OP(a, b, c, d, 8, 7, 9); MD5_OP(d, a, b, c, 9, 12, 10); MD5_OP(c, d, a, b, 10, 17, 11); MD5_OP(b, c, d, a, 11, 22, 12);
MD5_OP(a, b, c, d, 12, 7, 13); MD5_OP(d, a, b, c, 13, 12, 14); MD5_OP(c, d, a, b, 14, 17, 15); MD5_OP(b, c, d, a, 15, 22, 16);
/* Round 2 */
MD5_OP(a, b, c, d, 1, 5, 17); MD5_OP(d, a, b, c, 6, 9, 18); MD5_OP(c, d, a, b, 11, 14, 19); MD5_OP(b, c, d, a, 0, 20, 20);
MD5_OP(a, b, c, d, 5, 5, 21); MD5_OP(d, a, b, c, 10, 9, 22); MD5_OP(c, d, a, b, 15, 14, 23); MD5_OP(b, c, d, a, 4, 20, 24);
MD5_OP(a, b, c, d, 9, 5, 25); MD5_OP(d, a, b, c, 14, 9, 26); MD5_OP(c, d, a, b, 3, 14, 27); MD5_OP(b, c, d, a, 8, 20, 28);
MD5_OP(a, b, c, d, 13, 5, 29); MD5_OP(d, a, b, c, 2, 9, 30); MD5_OP(c, d, a, b, 7, 14, 31); MD5_OP(b, c, d, a, 12, 20, 32);
/* Round 3 */
MD5_OP(a, b, c, d, 5, 4, 33); MD5_OP(d, a, b, c, 8, 11, 34); MD5_OP(c, d, a, b, 11, 16, 35); MD5_OP(b, c, d, a, 14, 23, 36);
MD5_OP(a, b, c, d, 1, 4, 37); MD5_OP(d, a, b, c, 4, 11, 38); MD5_OP(c, d, a, b, 7, 16, 39); MD5_OP(b, c, d, a, 10, 23, 40);
MD5_OP(a, b, c, d, 13, 4, 41); MD5_OP(d, a, b, c, 0, 11, 42); MD5_OP(c, d, a, b, 3, 16, 43); MD5_OP(b, c, d, a, 6, 23, 44);
MD5_OP(a, b, c, d, 9, 4, 45); MD5_OP(d, a, b, c, 12, 11, 46); MD5_OP(c, d, a, b, 15, 16, 47); MD5_OP(b, c, d, a, 2, 23, 48);
/* Round 4 */
MD5_OP(a, b, c, d, 0, 6, 49); MD5_OP(d, a, b, c, 7, 10, 50); MD5_OP(c, d, a, b, 14, 15, 51); MD5_OP(b, c, d, a, 5, 21, 52);
MD5_OP(a, b, c, d, 12, 6, 53); MD5_OP(d, a, b, c, 3, 10, 54); MD5_OP(c, d, a, b, 10, 15, 55); MD5_OP(b, c, d, a, 1, 21, 56);
MD5_OP(a, b, c, d, 8, 6, 57); MD5_OP(d, a, b, c, 15, 10, 58); MD5_OP(c, d, a, b, 6, 15, 59); MD5_OP(b, c, d, a, 13, 21, 60);
MD5_OP(a, b, c, d, 4, 6, 61); MD5_OP(d, a, b, c, 11, 10, 62); MD5_OP(c, d, a, b, 2, 15, 63); MD5_OP(b, c, d, a, 9, 21, 64);
#if 0
for (t=0; t<HASH_ROUND_NUM; t++)
{
T= b + ((a + (g[t/16])(b, c, d) + X[k] + T[t])<<<s)
//T = ROTL(a, 5) + (F[t/20])(b, c, d) + e + K[t/20] + W[t];
d = c;
c = b;
b = T;
a = d;
#if (DUMP_ROUND_DATA == 1)
DBG(" %02d: T=0x%08x, W=0x%08x, a=0x%08x, b=0x%08x, c=0x%08x, d=0x%08x\n",
t, T, W[t], a, b, c, d);
#endif
}
#endif
ctx->hash.a += a;
ctx->hash.b += b;
ctx->hash.c += c;
ctx->hash.d += d;
#if (DUMP_BLOCK_HASH == 1)
DBG(" (LE)OUT: %08x %08x %08x %08x\n",
ctx->hash.a, ctx->hash.b, ctx->hash.c, ctx->hash.d);
#endif
return ERR_OK;
}
int MD5_Update(MD5_CTX *c, const void *data, unsigned long len)
{
uint32_t copy_len = 0;
if ((NULL == c) || (NULL == data))
{
return ERR_INV_PARAM;
}
/* has used data */
if (c->last.used != 0)
{
/* less than 1 block in total, combine data */
if (c->last.used + len < HASH_BLOCK_SIZE)
{
memcpy(&c->last.buf[c->last.used], data, len);
c->last.used += len;
return ERR_OK;
}
else /* more than 1 block */
{
/* process the block in context buffer */
copy_len = HASH_BLOCK_SIZE - c->last.used;
memcpy(&c->last.buf[c->last.used], data, copy_len);
MD5_ProcessBlock(c, &c->last.buf);
c->total += HASH_BLOCK_SIZE;
data = (uint8_t *)data + copy_len;
len -= copy_len;
/* reset context buffer */
memset(&c->last.buf[0], 0, HASH_BLOCK_SIZE);
c->last.used = 0;
}
}
/* less than 1 block, copy to context buffer */
if (len < HASH_BLOCK_SIZE)
{
memcpy(&c->last.buf[c->last.used], data, len);
c->last.used += len;
return ERR_OK;
}
else
{
/* process data blocks */
while (len >= HASH_BLOCK_SIZE)
{
MD5_ProcessBlock(c, data);
c->total += HASH_BLOCK_SIZE;
data = (uint8_t *)data + HASH_BLOCK_SIZE;
len -= HASH_BLOCK_SIZE;
}
/* copy rest data to context buffer */
memcpy(&c->last.buf[0], data, len);
c->last.used = len;
}
return ERR_OK;
}
int MD5_Final(unsigned char *md, MD5_CTX *c)
{
uint32_t *temp;
if ((NULL == c) || (NULL == md))
{
return ERR_INV_PARAM;
}
/* Last block should be less thant HASH_BLOCK_SIZE - HASH_LEN_SIZE */
if (c->last.used >= (HASH_BLOCK_SIZE - HASH_LEN_SIZE))
{
c->total += c->last.used;
/* one more block */
c->last.buf[c->last.used] = HASH_PADDING_PATTERN;
c->last.used++;
memset(&c->last.buf[c->last.used], 0, HASH_BLOCK_SIZE - c->last.used);
MD5_ProcessBlock(c, &c->last.buf);
memset(&c->last.buf[0], 0, HASH_BLOCK_SIZE - HASH_LEN_SIZE);
c->last.used = 0;
/* save length */
temp = (uint32_t *)&(c->last.buf[HASH_LEN_OFFSET]);
temp[0] = htole32((c->total << 3) & 0xFFFFFFFF);
temp[1] = htole32(((c->total << 3) >> 32) & 0xFFFFFFFF);
MD5_ProcessBlock(c, &c->last.buf);
}
else /* 0 <= last.used < HASH_BLOCK_SIZE - HASH_LEN_SIZE */
{
c->total += c->last.used;
/* one more block */
c->last.buf[c->last.used] = HASH_PADDING_PATTERN;
c->last.used++;
/* padding 0s */
memset(&c->last.buf[c->last.used], 0, HASH_BLOCK_SIZE - HASH_LEN_SIZE - c->last.used);
/* save length */
temp = (uint32_t *)&(c->last.buf[HASH_LEN_OFFSET]);
temp[0] = htole32((c->total << 3) & 0xFFFFFFFF);
temp[1] = htole32(((c->total << 3) >> 32) & 0xFFFFFFFF);
MD5_ProcessBlock(c, &c->last.buf);
}
/* LE format, different from SHA family(big endian) */
temp = (uint32_t *)md;
temp[0] = htole32(c->hash.a);
temp[1] = htole32(c->hash.b);
temp[2] = htole32(c->hash.c);
temp[3] = htole32(c->hash.d);
return ERR_OK;
}
unsigned char *MD5(const unsigned char *d, unsigned long n, unsigned char *md)
{
MD5_CTX c;
if ((NULL == d) || (NULL == md))
{
return NULL;
}
MD5_Init(&c);
MD5_Update(&c, d, n);
MD5_Final(md, &c);
return md;
}
我直接在Makefile中内置了一个test伪目标,编译时除了编译生成名为md5的哈希工具外,还会直接调用内置的哈希测试。
编译和运行如下:
$ make
gcc -Wall -g -O2 -c utils.c -o utils.o
gcc -Wall -g -O2 -c md5.c -o md5.o
gcc -Wall -g -O2 -c md5test.c -o md5test.o
gcc -Wall -g -O2 utils.o md5.o md5test.o -o md5
Run Test...
./md5 -x
Internal hash tests for ./md5:
./md5("")
Expect: d41d8cd98f00b204e9800998ecf8427e
Result: d41d8cd98f00b204e9800998ecf8427e
./md5("a")
Expect: 0cc175b9c0f1b6a831c399e269772661
Result: 0cc175b9c0f1b6a831c399e269772661
./md5("abc")
Expect: 900150983cd24fb0d6963f7d28e17f72
Result: 900150983cd24fb0d6963f7d28e17f72
./md5("message digest")
Expect: f96b697d7cb7938d525a2f31aaf161d0
Result: f96b697d7cb7938d525a2f31aaf161d0
./md5("abcdefghijklmnopqrstuvwxyz")
Expect: c3fcd3d76192e4007dfb496cca67e13b
Result: c3fcd3d76192e4007dfb496cca67e13b
./md5("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789")
Expect: d174ab98d277d9f5a5611c2c9f419d9f
Result: d174ab98d277d9f5a5611c2c9f419d9f
./md5("12345678901234567890123456789012345678901234567890123456789012345678901234567890")
Expect: 57edf4a22be3c955ac49da2e2107b67a
Result: 57edf4a22be3c955ac49da2e2107b67a
目前版本的openssl工具还支持md5哈希算法,因此可以将md5工具和openssl执行dgst计算的结果进行比较:
$ md5 -h
Usage:
Common options: [-x|-f file|-s string|-h]
Hash a string:
md5 -s string
Hash a file:
md5 -f file [-k key]
-x Internal string hash test
-h Display this message
# 使用"-f"和"-s"选项分别对文件和字符串计算md4哈希值
$ md5 -f md5.o
md5(md5.o) = 68d8ed44c9633e11323ec2411f2016c2
$ md5 -s "I Love China!"
md5("I Love China!") = 0acbefa21f347fc65f4cc869706441b0
# 使用开源的openssl工具计算相应的哈希进行对比
$ openssl dgst -md5 md5.o
MD5(md5.o)= 68d8ed44c9633e11323ec2411f2016c2
$ echo -n "I Love China!" | openssl dgst -md5
(stdin)= 0acbefa21f347fc65f4cc869706441b0
完整的代码文件列表如下:
$ ls -lh
total 336K
-rw-rw-r-- 1 rocky rocky 297K Jun 17 21:46 Fast_Collision_Attach_on_MD5-2013.pdf
-rwxr--r-- 1 rocky rocky 592 Jun 19 22:02 Makefile
-rwxrwxr-x 1 rocky rocky 11K Jun 19 22:25 md5.c
-rwxrwxr-x 1 rocky rocky 1.3K Jun 19 22:23 md5.h
-rwxr--r-- 1 rocky rocky 5.6K Jun 19 22:19 md5test.c
-rwxr--r-- 1 rocky rocky 578 Jun 17 21:46 utils.c
-rwxr--r-- 1 rocky rocky 1.7K Jun 17 21:46 utils.h
需要代码请访问:
洛奇工作中常常会遇到自己不熟悉的问题,这些问题可能并不难,但因为不了解,找不到人帮忙而瞎折腾,往往导致浪费几天甚至更久的时间。
所以我组建了几个微信讨论群(记得微信我说加哪个群,如何加微信见后面),欢迎一起讨论:
在工作之余,洛奇尽量写一些对大家有用的东西,如果洛奇的这篇文章让您有所收获,解决了您一直以来未能解决的问题,不妨赞赏一下洛奇,这也是对洛奇付出的最大鼓励。扫下面的二维码赞赏洛奇,金额随意:
洛奇自己维护了一个公众号“洛奇看世界”,一个很佛系的公众号,不定期瞎逼逼。公号也提供个人联系方式,一些资源,说不定会有意外的收获,详细内容见公号提示。扫下方二维码关注公众号: