您的当前位置:首页正文

MD5哈希算法原理及实现(附源码)

2024-11-12 来源:个人技术集锦

最近陆续造了一批哈希算法的轮子,包括MD家族(包括MD2/MD4/MD5), SHA1, SHA2家族(SHA256/SHA384/SHA512),SHA3家族以及国密SM3算法。
原来打算将每一个算法都详细分析并实现,现在看来,这个工作短时间可能无法完成,所以先将源码发上来。

这部分实现的源码完全参考官方文档的算法描述,连变量名也尽可能和官方文档中的变量保持一致,方便学习。

另外, 代码封装的MD5哈希调用接口参考了openssl官方的接口,完全兼容,无缝对接。会使用这里的接口,就会使用openssl的库函数接口,甚至连代码都不需要修改。

除了实现的源码外,还另外附带了一个测试例子,这个测试例子不仅仅是用于测试哈希算法的实现是否正确,还可以提供了"-f"/"-s"等选项用于对任意文件和字符串进行哈希,因此作为一个工具使用,类似系统内置的md5sum/sha1sum。

MD5实现源码

1. 头文件md5.c
/*
 * @        file: md5.h
 * @ description: header file for md5.c
 * @      author: Gu Yongqiang
 * @        blog: https:///guyongqiangx
 */
#ifndef __ROCKY_MD5__H
#define __ROCKY_MD5__H

#define ERR_OK           0
#define ERR_ERR         -1  /* generic error */
#define ERR_INV_PARAM   -2  /* invalid parameter */
#define ERR_TOO_LONG    -3  /* too long */
#define ERR_STATE_ERR   -4  /* state error */

typedef unsigned char      uint8_t;
typedef unsigned short     uint16_t;
typedef unsigned int       uint32_t;
typedef unsigned long long uint64_t;

typedef struct md5_context {
    /* message total length in bytes */
    uint64_t total;

    /* intermedia hash value for each block */
    struct {
        uint32_t a;
        uint32_t b;
        uint32_t c;
        uint32_t d;
        uint32_t e;
    }hash;

    /* last block */
    struct {
        uint32_t used;     /* used bytes */
        uint8_t  buf[64];  /* block data buffer */
    }last;
}MD5_CTX;

/* https://www.openssl.org/docs/man1.1.0/man3/MD5_Init.html */

int MD5_Init(MD5_CTX *c);
int MD5_Update(MD5_CTX *c, const void *data, unsigned long len);
int MD5_Final(unsigned char *md, MD5_CTX *c);
unsigned char *MD5(const unsigned char *d, unsigned long n, unsigned char *md);
#endif
2. 代码文件md5.c
/*
 * @        file: md5.c
 * @ description: implementation for the MD5 Message-Digest Algorithm
 * @      author: Gu Yongqiang
 * @        blog: https:///guyongqiangx
 */
#include <stdio.h>
#include <string.h>

#include "utils.h"
#include "md5.h"

//#define DEBUG

#ifdef DEBUG
#define DBG(...) printf(__VA_ARGS__)
#define DUMP_BLOCK_DATA 1
#define DUMP_BLOCK_HASH 1
#define DUMP_ROUND_DATA 1
#else
#define DBG(...)
#define DUMP_BLOCK_DATA 0
#define DUMP_BLOCK_HASH 0
#define DUMP_ROUND_DATA 0
#endif

#define MD5_BLOCK_SIZE          64  /* 512 bits = 64 bytes */
#define MD5_LEN_SIZE            8   /*  64 bits =  8 bytes */
#define MD5_LEN_OFFSET          (MD5_BLOCK_SIZE - MD5_LEN_SIZE)
#define MD5_DIGEST_SIZE         16  /* 128 bits = 16 bytes */

#define MD5_PADDING_PATTERN     0x80
#define MD5_ROUND_NUM           64

#define HASH_BLOCK_SIZE         MD5_BLOCK_SIZE
#define HASH_LEN_SIZE           MD5_LEN_SIZE
#define HASH_LEN_OFFSET         MD5_LEN_OFFSET
#define HASH_DIGEST_SIZE        MD5_DIGEST_SIZE

#define HASH_PADDING_PATTERN    MD5_PADDING_PATTERN
#define HASH_ROUND_NUM          MD5_ROUND_NUM

typedef uint32_t (*md5_func)(uint32_t x, uint32_t y, uint32_t z);

/* MD5 Constants */
static uint32_t T[64] = 
{
    /* Round 1 */
    0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501, 
    0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be, 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821, 

    /* Round 2 */
    0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8, 
    0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a, 

    /* Round 3 */
    0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c, 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70, 
    0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05, 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665, 

    /* Round 4 */
    0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1, 
    0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391, 
};

/* ROTate Left (circular left shift) */
static uint32_t ROTL(uint32_t x, uint8_t shift)
{
    return (x << shift) | (x >> (32 - shift));
}

static uint32_t F(uint32_t x, uint32_t y, uint32_t z)
{
    return (x & y) | ((~x) & z);
}

static uint32_t G(uint32_t x, uint32_t y, uint32_t z)
{
    return (x & z) | (y & (~z));
}

static uint32_t H(uint32_t x, uint32_t y, uint32_t z)
{
    return x ^ y ^ z;
}

static uint32_t I(uint32_t x, uint32_t y, uint32_t z)
{
    return y ^ (x | (~z));;
}

/* MD5 Functions */
static md5_func g[4] =
{
    F, G, H, I
};

int MD5_Init(MD5_CTX *c)
{
    if (NULL == c)
    {
        return ERR_INV_PARAM;
    }

    memset(c, 0, sizeof(MD5_CTX));

    c->hash.a = 0x67452301; /* little endian */
    c->hash.b = 0xEFCDAB89;
    c->hash.c = 0x98BADCFE;
    c->hash.d = 0x10325476;

    c->total = 0;
    c->last.used = 0;

    return ERR_OK;
}

static int MD5_PrepareScheduleWord(const uint32_t *block, uint32_t *W)
{
    uint32_t i;

    if ((NULL == block) || (NULL == W))
    {
        return ERR_INV_PARAM;
    }

    for (i=0; i<16; i++)
    {
        W[i] = le32toh(block[i]);
    }

    return ERR_OK;
}

#if (DUMP_ROUND_DATA == 1)
#define MD5_OP(a,b,c,d,k,s,i) \
    a = b + ROTL(a + (g[(i-1)/16])(b, c, d) + X[k] + T[i-1], s); \
    DBG("      %02d: a=0x%08x, b=0x%08x, c=0x%08x, d=0x%08x, X=0x%08x, T=0x%08x\n", i-1, a, b, c, d, X[k], T[i-1]);
#else
#define MD5_OP(a,b,c,d,k,s,i) \
    a = b + ROTL(a + (g[(i-1)/16])(b, c, d) + X[k] + T[i-1], s);
#endif

static int MD5_ProcessBlock(MD5_CTX *ctx, const void *block)
{
    uint32_t X[16];
    uint32_t a, b, c, d;

    if ((NULL == ctx) || (NULL == block))
    {
        return ERR_INV_PARAM;
    }

#if (DUMP_BLOCK_DATA == 1)
    DBG("---------------------------------------------------------\n");
    DBG("   BLOCK: %llu\n", ctx->total/HASH_BLOCK_SIZE);
    DBG("    DATA:\n");
    print_buffer(block, HASH_BLOCK_SIZE, "    ");
#endif

#if (DUMP_BLOCK_HASH == 1)
    DBG("  (LE)IV: %08x %08x %08x %08x\n",
        ctx->hash.a, ctx->hash.b, ctx->hash.c, ctx->hash.d);
#endif

    /* prepare schedule word */
    MD5_PrepareScheduleWord(block, X);

    a = ctx->hash.a;
    b = ctx->hash.b;
    c = ctx->hash.c;
    d = ctx->hash.d;

    /* Round 1 */
    MD5_OP(a, b, c, d,  0,  7,  1); MD5_OP(d, a, b, c,  1, 12,  2); MD5_OP(c, d, a, b,  2, 17,  3); MD5_OP(b, c, d, a,  3, 22,  4);
    MD5_OP(a, b, c, d,  4,  7,  5); MD5_OP(d, a, b, c,  5, 12,  6); MD5_OP(c, d, a, b,  6, 17,  7); MD5_OP(b, c, d, a,  7, 22,  8);
    MD5_OP(a, b, c, d,  8,  7,  9); MD5_OP(d, a, b, c,  9, 12, 10); MD5_OP(c, d, a, b, 10, 17, 11); MD5_OP(b, c, d, a, 11, 22, 12);
    MD5_OP(a, b, c, d, 12,  7, 13); MD5_OP(d, a, b, c, 13, 12, 14); MD5_OP(c, d, a, b, 14, 17, 15); MD5_OP(b, c, d, a, 15, 22, 16);

    /* Round 2 */
    MD5_OP(a, b, c, d,  1,  5, 17); MD5_OP(d, a, b, c,  6,  9, 18); MD5_OP(c, d, a, b, 11, 14, 19); MD5_OP(b, c, d, a,  0, 20, 20);
    MD5_OP(a, b, c, d,  5,  5, 21); MD5_OP(d, a, b, c, 10,  9, 22); MD5_OP(c, d, a, b, 15, 14, 23); MD5_OP(b, c, d, a,  4, 20, 24);
    MD5_OP(a, b, c, d,  9,  5, 25); MD5_OP(d, a, b, c, 14,  9, 26); MD5_OP(c, d, a, b,  3, 14, 27); MD5_OP(b, c, d, a,  8, 20, 28);
    MD5_OP(a, b, c, d, 13,  5, 29); MD5_OP(d, a, b, c,  2,  9, 30); MD5_OP(c, d, a, b,  7, 14, 31); MD5_OP(b, c, d, a, 12, 20, 32);

    /* Round 3 */
    MD5_OP(a, b, c, d,  5,  4, 33); MD5_OP(d, a, b, c,  8, 11, 34); MD5_OP(c, d, a, b, 11, 16, 35); MD5_OP(b, c, d, a, 14, 23, 36);
    MD5_OP(a, b, c, d,  1,  4, 37); MD5_OP(d, a, b, c,  4, 11, 38); MD5_OP(c, d, a, b,  7, 16, 39); MD5_OP(b, c, d, a, 10, 23, 40);
    MD5_OP(a, b, c, d, 13,  4, 41); MD5_OP(d, a, b, c,  0, 11, 42); MD5_OP(c, d, a, b,  3, 16, 43); MD5_OP(b, c, d, a,  6, 23, 44);
    MD5_OP(a, b, c, d,  9,  4, 45); MD5_OP(d, a, b, c, 12, 11, 46); MD5_OP(c, d, a, b, 15, 16, 47); MD5_OP(b, c, d, a,  2, 23, 48);

    /* Round 4 */
    MD5_OP(a, b, c, d,  0,  6, 49); MD5_OP(d, a, b, c,  7, 10, 50); MD5_OP(c, d, a, b, 14, 15, 51); MD5_OP(b, c, d, a,  5, 21, 52);
    MD5_OP(a, b, c, d, 12,  6, 53); MD5_OP(d, a, b, c,  3, 10, 54); MD5_OP(c, d, a, b, 10, 15, 55); MD5_OP(b, c, d, a,  1, 21, 56);
    MD5_OP(a, b, c, d,  8,  6, 57); MD5_OP(d, a, b, c, 15, 10, 58); MD5_OP(c, d, a, b,  6, 15, 59); MD5_OP(b, c, d, a, 13, 21, 60);
    MD5_OP(a, b, c, d,  4,  6, 61); MD5_OP(d, a, b, c, 11, 10, 62); MD5_OP(c, d, a, b,  2, 15, 63); MD5_OP(b, c, d, a,  9, 21, 64);

#if 0
    for (t=0; t<HASH_ROUND_NUM; t++)
    {
        T= b + ((a + (g[t/16])(b, c, d) + X[k] + T[t])<<<s)
        //T = ROTL(a, 5) + (F[t/20])(b, c, d) + e + K[t/20] + W[t];
        d = c;
        c = b;
        b = T;
        a = d;

#if (DUMP_ROUND_DATA == 1)
        DBG("      %02d: T=0x%08x, W=0x%08x, a=0x%08x, b=0x%08x, c=0x%08x, d=0x%08x\n",
                t, T, W[t], a, b, c, d);
#endif
    }
#endif

    ctx->hash.a += a;
    ctx->hash.b += b;
    ctx->hash.c += c;
    ctx->hash.d += d;
#if (DUMP_BLOCK_HASH == 1)
    DBG(" (LE)OUT: %08x %08x %08x %08x\n",
        ctx->hash.a, ctx->hash.b, ctx->hash.c, ctx->hash.d);
#endif

    return ERR_OK;
}

int MD5_Update(MD5_CTX *c, const void *data, unsigned long len)
{
    uint32_t copy_len = 0;

    if ((NULL == c) || (NULL == data))
    {
        return ERR_INV_PARAM;
    }

    /* has used data */
    if (c->last.used != 0)
    {
        /* less than 1 block in total, combine data */
        if (c->last.used + len < HASH_BLOCK_SIZE)
        {
            memcpy(&c->last.buf[c->last.used], data, len);
            c->last.used += len;

            return ERR_OK;
        }
        else /* more than 1 block */
        {
            /* process the block in context buffer */
            copy_len = HASH_BLOCK_SIZE - c->last.used;
            memcpy(&c->last.buf[c->last.used], data, copy_len);
            MD5_ProcessBlock(c, &c->last.buf);

            c->total += HASH_BLOCK_SIZE;

            data = (uint8_t *)data + copy_len;
            len -= copy_len;

            /* reset context buffer */
            memset(&c->last.buf[0], 0, HASH_BLOCK_SIZE);
            c->last.used = 0;
        }
    }

    /* less than 1 block, copy to context buffer */
    if (len < HASH_BLOCK_SIZE)
    {
        memcpy(&c->last.buf[c->last.used], data, len);
        c->last.used += len;

        return ERR_OK;
    }
    else
    {
        /* process data blocks */
        while (len >= HASH_BLOCK_SIZE)
        {
            MD5_ProcessBlock(c, data);
            c->total += HASH_BLOCK_SIZE;

            data = (uint8_t *)data + HASH_BLOCK_SIZE;
            len -= HASH_BLOCK_SIZE;
        }

        /* copy rest data to context buffer */
        memcpy(&c->last.buf[0], data, len);
        c->last.used = len;
    }

    return ERR_OK;
}

int MD5_Final(unsigned char *md, MD5_CTX *c)
{
    uint32_t *temp;

    if ((NULL == c) || (NULL == md))
    {
        return ERR_INV_PARAM;
    }

    /* Last block should be less thant HASH_BLOCK_SIZE - HASH_LEN_SIZE */
    if (c->last.used >= (HASH_BLOCK_SIZE - HASH_LEN_SIZE))
    {
        c->total += c->last.used;

        /* one more block */
        c->last.buf[c->last.used] = HASH_PADDING_PATTERN;
        c->last.used++;

        memset(&c->last.buf[c->last.used], 0, HASH_BLOCK_SIZE - c->last.used);
        MD5_ProcessBlock(c, &c->last.buf);

        memset(&c->last.buf[0], 0, HASH_BLOCK_SIZE - HASH_LEN_SIZE);
        c->last.used = 0;

        /* save length */
        temp = (uint32_t *)&(c->last.buf[HASH_LEN_OFFSET]);
        temp[0] = htole32((c->total << 3) & 0xFFFFFFFF);
        temp[1] = htole32(((c->total << 3) >> 32) & 0xFFFFFFFF);
        MD5_ProcessBlock(c, &c->last.buf);
    }
    else /* 0 <= last.used < HASH_BLOCK_SIZE - HASH_LEN_SIZE */
    {
        c->total += c->last.used;

        /* one more block */
        c->last.buf[c->last.used] = HASH_PADDING_PATTERN;
        c->last.used++;

        /* padding 0s */
        memset(&c->last.buf[c->last.used], 0, HASH_BLOCK_SIZE - HASH_LEN_SIZE - c->last.used);

        /* save length */
        temp = (uint32_t *)&(c->last.buf[HASH_LEN_OFFSET]);
        temp[0] = htole32((c->total << 3) & 0xFFFFFFFF);
        temp[1] = htole32(((c->total << 3) >> 32) & 0xFFFFFFFF);
        MD5_ProcessBlock(c, &c->last.buf);
    }

    /* LE format, different from SHA family(big endian) */
    temp = (uint32_t *)md;
    temp[0] = htole32(c->hash.a);
    temp[1] = htole32(c->hash.b);
    temp[2] = htole32(c->hash.c);
    temp[3] = htole32(c->hash.d);

    return ERR_OK;
}

unsigned char *MD5(const unsigned char *d, unsigned long n, unsigned char *md)
{
    MD5_CTX c;

    if ((NULL == d) || (NULL == md))
    {
        return NULL;
    }

    MD5_Init(&c);
    MD5_Update(&c, d, n);
    MD5_Final(md, &c);

    return md;
}

MD5源码的编译和测试

我直接在Makefile中内置了一个test伪目标,编译时除了编译生成名为md5的哈希工具外,还会直接调用内置的哈希测试。

编译和运行如下:

$ make
gcc -Wall -g -O2 -c utils.c -o utils.o
gcc -Wall -g -O2 -c md5.c -o md5.o
gcc -Wall -g -O2 -c md5test.c -o md5test.o
gcc -Wall -g -O2 utils.o md5.o md5test.o -o md5

Run Test...
./md5 -x
Internal hash tests for ./md5:
./md5("")
  Expect: d41d8cd98f00b204e9800998ecf8427e
  Result: d41d8cd98f00b204e9800998ecf8427e

./md5("a")
  Expect: 0cc175b9c0f1b6a831c399e269772661
  Result: 0cc175b9c0f1b6a831c399e269772661

./md5("abc")
  Expect: 900150983cd24fb0d6963f7d28e17f72
  Result: 900150983cd24fb0d6963f7d28e17f72

./md5("message digest")
  Expect: f96b697d7cb7938d525a2f31aaf161d0
  Result: f96b697d7cb7938d525a2f31aaf161d0

./md5("abcdefghijklmnopqrstuvwxyz")
  Expect: c3fcd3d76192e4007dfb496cca67e13b
  Result: c3fcd3d76192e4007dfb496cca67e13b

./md5("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789")
  Expect: d174ab98d277d9f5a5611c2c9f419d9f
  Result: d174ab98d277d9f5a5611c2c9f419d9f

./md5("12345678901234567890123456789012345678901234567890123456789012345678901234567890")
  Expect: 57edf4a22be3c955ac49da2e2107b67a
  Result: 57edf4a22be3c955ac49da2e2107b67a

目前版本的openssl工具还支持md5哈希算法,因此可以将md5工具和openssl执行dgst计算的结果进行比较:

$ md5 -h
Usage:
Common options: [-x|-f file|-s string|-h]
Hash a string:
        md5 -s string
Hash a file:
        md5 -f file [-k key]
-x      Internal string hash test
-h      Display this message

# 使用"-f"和"-s"选项分别对文件和字符串计算md4哈希值
$ md5 -f md5.o
md5(md5.o) = 68d8ed44c9633e11323ec2411f2016c2
$ md5 -s "I Love China!"
md5("I Love China!") = 0acbefa21f347fc65f4cc869706441b0

# 使用开源的openssl工具计算相应的哈希进行对比
$ openssl dgst -md5 md5.o
MD5(md5.o)= 68d8ed44c9633e11323ec2411f2016c2
$ echo -n "I Love China!" | openssl dgst -md5
(stdin)= 0acbefa21f347fc65f4cc869706441b0

完整代码

完整的代码文件列表如下:

$ ls -lh
total 336K
-rw-rw-r-- 1 rocky rocky 297K Jun 17 21:46 Fast_Collision_Attach_on_MD5-2013.pdf
-rwxr--r-- 1 rocky rocky  592 Jun 19 22:02 Makefile
-rwxrwxr-x 1 rocky rocky  11K Jun 19 22:25 md5.c
-rwxrwxr-x 1 rocky rocky 1.3K Jun 19 22:23 md5.h
-rwxr--r-- 1 rocky rocky 5.6K Jun 19 22:19 md5test.c
-rwxr--r-- 1 rocky rocky  578 Jun 17 21:46 utils.c
-rwxr--r-- 1 rocky rocky 1.7K Jun 17 21:46 utils.h

需要代码请访问:

  • https://github.com/guyongqiangx/cryptography/

其它

洛奇工作中常常会遇到自己不熟悉的问题,这些问题可能并不难,但因为不了解,找不到人帮忙而瞎折腾,往往导致浪费几天甚至更久的时间。

所以我组建了几个微信讨论群(记得微信我说加哪个群,如何加微信见后面),欢迎一起讨论:

  • 一个密码编码学讨论组,主要讨论各种加解密,签名校验等算法,请说明加密码学讨论群。
  • 一个Android OTA的讨论组,请说明加Android OTA群。
  • 一个git和repo的讨论组,请说明加git和repo群。

在工作之余,洛奇尽量写一些对大家有用的东西,如果洛奇的这篇文章让您有所收获,解决了您一直以来未能解决的问题,不妨赞赏一下洛奇,这也是对洛奇付出的最大鼓励。扫下面的二维码赞赏洛奇,金额随意:

洛奇自己维护了一个公众号“洛奇看世界”,一个很佛系的公众号,不定期瞎逼逼。公号也提供个人联系方式,一些资源,说不定会有意外的收获,详细内容见公号提示。扫下方二维码关注公众号:

显示全文