如何计算从最后一个字节开始的 CRC答案

【问题标题】：How to Calculate CRC Starting at Last Byte如何计算从最后一个字节开始的 CRC
【发布时间】：2020-11-18 01:01:58
【问题描述】：

我正在尝试用 VHDL 实现 CRC-CCITT 计算器。我最初能够做到这一点；但是，我最近发现数据是从最低有效字节开始传送的。在我的代码中，数据通过一个帧一次传输 7 个字节。假设我们有以下数据：ASCII 中的123456789 或hex 中的313233343536373839。数据将按原样传输（使用以下 CRC）：

-- First frame of data
RxFrame.Data <= (
    1 => x"39", -- LSB
    2 => x"38",
    3 => x"37",
    4 => x"36",
    5 => x"35",
    6 => x"34",
    7 => x"33"
);

-- Second/last frame of data
RxFrame.Data <= (
    1 => x"32",
    2 => x"31", -- MSB
    3 => xx,    -- "xx" means irrelevant data, not part of CRC calculation.
    4 => xx,    -- This occurs only in the last frame, when it specified in
    5 => xx,    -- byte 0 which bytes contain data
    6 => xx,
    7 => xx
);

-- Calculated CRC should be 0x31C3

另一个包含数据0x4376669A1CFC048321313233343536373839 及其正确CRC 的示例如下所示：

-- First incoming frame of data
RxFrame.Data <= (
    1 => x"39", -- LSB
    2 => x"38",
    3 => x"37",
    4 => x"36",
    5 => x"35",
    6 => x"34",
    7 => x"33"
);

-- Second incoming frame of data
RxFrame.Data <= (
    1 => x"32",
    2 => x"31",
    3 => x"21",
    4 => x"83",
    5 => x"04",
    6 => x"FC",
    7 => x"1C"
);

-- Third/last incoming frame of data
RxFrame.Data <= (
    1 => x"9A",
    2 => x"66",
    3 => x"76",
    4 => x"43", -- MSB
    5 => xx,    -- Irrelevant data, specified in byte 0
    6 => xx,
    7 => xx
);

-- Calculated CRC should be 0x2848

有没有我遗漏的概念？有没有办法用相反的顺序接收数据来计算 CRC？我正在为 CANopen SDO 块协议实现此功能。谢谢！

CRC calculation algorithm to verify SDO block transfer from CANopen standard

【问题讨论】：

请提供具有正确 CRC 的数据示例，或者最好提供几个示例。另外，xx 是什么意思？ CRC不是根据这些计算的吗？如果是这样，发送方和接收方怎么知道不计算这些？
@MarkAdler 我已经更新了帖子！本质上，xx 表示该字节没有相关数据，在字节 0 中表示。因此它不会用于 CRC 计算。它完全被忽略了。
CRC是在数据之前还是之后传输？
@rcgldr CRC 在数据之后传输。但是，在数据传输过程中我需要自己计算CRC，因为数据的长度是未知的。在收到所有数据并计算出最终的 CRC 后，我将发送给我的 CRC 与我计算的 CRC 进行比较。
所以这个“字节0”没有显示？而且它不是CRC计算的一部分？

标签： network-programming vhdl crc crc16

【解决方案1】：

使用函数对 CRC 多项式进行无进位乘法模运算，生成 CRC16 的示例代码，字节反向读取（最后一个字节在前）。下面是解释。

#include <stdio.h>

typedef unsigned char       uint8_t;
typedef unsigned short     uint16_t;

#define POLY (0x1021u)

/* carryless multiply modulo crc polynomial */
uint16_t MpyModPoly(uint16_t a, uint16_t b) /* (a*b)%poly */
{
uint16_t pd = 0;
uint16_t i;
    for(i = 0; i < 16; i++){
        /* assumes twos complement */
        pd = (pd<<1)^((0-(pd>>15))&POLY);
        pd ^= (0-(b>>15))&a;
        b <<= 1;
    }
    return pd;
}

/* generate crc in reverse byte order */
uint16_t Crc16R(uint8_t * b, size_t sz)
{
uint8_t *e = b + sz;                    /* end of bfr ptr */
uint16_t crc = 0u;                      /* crc */
uint16_t pdm = 0x100u;                  /* padding multiplier */
    while(e > b){                       /* generate crc */
        pdm  = MpyModPoly(0x100, pdm);
        crc ^= MpyModPoly( *--e, pdm);
    }
    return(crc);
}

/*      msg will be processed in reverse order */
static uint8_t msg[] = {0x43,0x76,0x66,0x9A,0x1C,0xFC,0x04,0x83,
                        0x21,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
                        0x38,0x39};

int main()
{
uint16_t crc;
    crc = Crc16R(msg, sizeof(msg));
    printf("%04x\n", crc);
    return 0;
}

使用 X86 xmm pclmulqdq 和 psrlq 模拟 16 位乘 16 位硬件 (VHDL) 无进位乘法的示例代码：

/*      __m128i is an intrinsic for X86 128 bit xmm register */
static __m128i poly =    {.m128i_u32[0] = 0x00011021u}; /* poly */
static __m128i invpoly = {.m128i_u32[0] = 0x00008898u}; /* 2^31 / poly */

/* carryless multiply modulo crc polynomial */
/* using xmm pclmulqdq and psrlq */
uint16_t MpyModPoly(uint16_t a, uint16_t b)
{
__m128i ma, mb, mp, mt;
    ma.m128i_u64[0] = a;
    mb.m128i_u64[0] = b;
    mp = _mm_clmulepi64_si128(ma, mb, 0x00);      /* mp = a*b */
    mt = _mm_srli_epi64(mp, 16);                  /* mt = mp>>16 */
    mt = _mm_clmulepi64_si128(mt, invpoly, 0x00); /* mt = mt*ipoly */
    mt = _mm_srli_epi64(mt, 15);                  /* mt = mt>>15 = (a*b)/poly */ 
    mt = _mm_clmulepi64_si128(mt, poly, 0x00);    /* mt = mt*poly */
    return mp.m128i_u16[0] ^ mt.m128i_u16[0];     /* ret  mp^mt */
}

/* external code to generate invpoly */
#define POLY (0x11021u)
static __m128i invpoly;                 /* 2^31 / poly */
void GenMPoly(void)                     /* generate __m12i8 invpoly */
{
uint32_t N = 0x10000u;                  /* numerator = x^16 */
uint32_t Q = 0;                         /* quotient = 0 */
    for(size_t i = 0; i <= 15; i++){    /* 31 - 16 = 15 */
        Q <<= 1;
        if(N&0x10000u){
            Q |= 1;
            N ^= POLY;
        }
        N <<= 1;
    }
    invpoly.m128i_u16[0] = Q;
}

解释：将数据视为长度不断增加的单独字符串，最后用零填充。对于示例的前几个字节，逻辑会计算

CRC  = CRC16({39})
CRC ^= CRC16({38 00})
CRC ^= CRC16({37 00 00})
CRC ^= CRC16({36 00 00 00})
...

为了加快计算速度，而不是实际填充 n 个零字节，您可以对 CRC 进行 2^{n·8} 模的无进位乘法POLY，其中 POLY 是用于 CRC16 的 17 位多项式：

CRC  =  CRC16({39})
CRC ^= (CRC16({38}) · (2^08 % POLY)) % POLY
CRC ^= (CRC16({37}) · (2^10 % POLY)) % POLY
CRC ^= (CRC16({36}) · (2^18 % POLY)) % POLY
...

无进位乘法模 POLY 等效于 CRC16 所做的，因此这将转换为伪代码（所有值均为十六进制，2^8 = 100）

CRC  =    0
PDM  =  100                  ;padding multiplier

PDM  = (100 · PDM) % POLY    ;main loop (2 lines per byte)
CRC ^= ( 39 · PDM) % POLY
PDM  = (100 · PDM) % POLY
CRC ^= ( 38 · PDM) % POLY
PDM  = (100 · PDM) % POLY
CRC ^= ( 37 · PDM) % POLY
PDM  = (100 · PDM) % POLY
CRC ^= ( 36 · PDM) % POLY
...

实现 (A · B) % POLY 是基于二进制数学：

(A · B) % POLY = (A · B) ^ (((A · B) / POLY) · POLY)

其中乘法是无进位的（异或而不是加法），除法是无借位的（异或而不是减法）。由于除法是无借的，并且POLY的最重要项是x^16，所以商

Q = (A · B) / POLY

只取决于(A·B)的高16位。除以 POLY 使用乘以 16 位常数 IPOLY = (2^31)/POLY，然后右移：

Q = (A · B) / POLY  = (((A · B) >> 16) · IPOLY) >> 15

该过程使用 16 位乘 16 位无进位乘法，产生 31 位乘积。

POLY  = 0x11021u                  ; CRC polynomial (17 bit)
IPOLY = 0x08898u                  ; 2^31 / POLY
                                  ;  generated by external software
MpyModPoly(A, B)
{
    MP = A · B                    ; MP = A · B
    MT = MP >> 16                 ; MT = MP >> 16
    MT = MT · IPOLY               ; MT = MT · IPOLY
    MT = MT >> 15                 ; MT = (A · B) / POLY
    MT = MT · POLY                ; MT = ((A · B) / POLY) * POLY
    return MP xor MT              ;      (A·B) ^ (((A · B) / POLY) · POLY)
}

基于硬件的无进位乘法看起来像这个 4 位 · 4 位示例。

p[] = [a3 a2 a1 a0] · [b3 b2 b1 b0]

p[] is a 7 bit product generated with 7 parallel circuits.
The time for multiply would be worst case propagation time for p3.

p6 = a3&b3
p5 = a3&b2 ^ a2&b3
p4 = a3&b1 ^ a2&b2 ^ a1&b3
p3 = a3&b0 ^ a2&b1 ^ a1&b2 ^ a0&b3 
p2 = a2&b0 ^ a1&b1 ^ a0&b2
p1 = a1&b0 ^ a0&b1
p0 = a0&b0

If the xor gates available only have 2 bit inputs, the logic can
be split up. For example:

p3 = (a3&b0 ^ a2&b1) ^ (a1&b2 ^ a0&b3)

我不知道您的 VHDL 工具集是否包含用于无进位乘法的库。对于 16 位乘 16 位乘法，得到 31 位乘积（p30 到 p00），p15 具有来自 16 个与的 16 个输出（并行），可以使用树状结构进行异或运算，8 个异或并行馈送并行输入 4 个 xor，将 2 个 xor 并行馈入单个 xor。所以传播时间将是 1 and 和 4 xor 传播时间。

【讨论】：

所以如果我理解正确的话，在伪代码的第三和第四块中，点符号表示无借位除法，插入符号表示无进位乘法？两者都可以通过异或实现？
@ShadiZ = 点符号表示无进位乘法。我对插入符号的使用令人困惑，所以我更新了我的答案，注意它在 (A·B)%POLY 的最后一步中使用了 xor。
我不确定！我需要试一试才能知道。
@ShadiZ - 我更新了我的答案，只需要 16 位乘 16 位无进位乘法。

【解决方案2】：

这是一个你可以适应的 C 语言示例。既然您提到了 VHDL，这是一个适用于转换为门和触发器的按位实现。但是，如果周期对您来说比内存和门更宝贵，那么还有一个按字节表驱动的版本，它将以 1/8 的周期数运行。

这与正常 CRC 计算中的操作相反。然后，它使用正常 CRC 应用相同大小的零输入，以获得该输入上的正常 CRC。运行零通过与逆 CRC 相同的周期数，即 O(n) 其中 n 是输入的大小。如果延迟太大，可以减少到 O(log n) 个周期，并在门上进行一些投资。

#include <stddef.h>

// Update crc with the CRC-16/XMODEM of n zero bytes. (This can be done in
// O(log n) time or cycles instead of O(n), with a little more effort.)
static unsigned crc16x_zeros_bit(unsigned crc, size_t n) {
    for (size_t i = 0; i < n; i++)
        for (int k = 0; k < 8; k++)
            crc = crc & 0x8000 ? (crc << 1) ^ 0x1021 : crc << 1;
    return crc & 0xffff;
}

// Update crc with the CRC-16/XMODEM of the len bytes at mem in reverse. If mem
// is NULL, then return the initial value for the CRC. When done,
// crc16x_zeros_bit() must be used to apply the total length of zero bytes, in
// order to get what the CRC would have been if it were calculated on the bytes
// fed in the opposite order.
static unsigned crc16x_inverse_bit(unsigned crc, void const *mem, size_t len) {
    unsigned char const *data = mem;
    if (data == NULL)
        return 0;
    crc &= 0xffff;
    for (size_t i = 0; i < len; i++) {
        for (int k = 0; k < 8; k++)
            crc = crc & 1 ? (crc >> 1) ^ 0x8810 : crc >> 1;
        crc ^= (unsigned)data[i] << 8;
    }
    return crc;
}

#include <stdio.h>

int main(void) {
    // Do framed example.
    unsigned crc = crc16x_inverse_bit(0, NULL, 0);
    crc = crc16x_inverse_bit(crc, (void const *)"9876543", 7);
    crc = crc16x_inverse_bit(crc, (void const *)"21", 2);
    crc = crc16x_zeros_bit(crc, 9);
    printf("%04x\n", crc);

    // Do another one.
    crc = crc16x_inverse_bit(0, NULL, 0);
    crc = crc16x_inverse_bit(crc, (void const *)"9876543", 7);
    crc = crc16x_inverse_bit(crc, (void const *)"21!\x83\x04\xfc\x1c", 7);
    crc = crc16x_inverse_bit(crc, (void const *)"\x9a" "fvC", 4);
    crc = crc16x_zeros_bit(crc, 18);
    printf("%04x\n", crc);
    return 0;
}

这是crc16x_zeros_bit()的O(log n)版本：

// Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC
// polynomial. For speed, a cannot be zero.
static inline unsigned multmodp(unsigned a, unsigned b) {
    unsigned p = 0;
    for (;;) {
        if (a & 1) {
            p ^= b;
            if (a == 1)
                break;
        }
        a >>= 1;
        b = b & 0x8000 ? (b << 1) ^ 0x1021 : b << 1;
    }
    return p & 0xffff;
}

// Return x^(8n) modulo p(x).
static unsigned x2nmodp(size_t n) {
    unsigned p = 1;                     // x^0 == 1
    unsigned q = 0x10;                  // x^2^2
    while (n) {
        q = multmodp(q, q);             // x^2^k mod p(x), k = 3,4,...
        if (n & 1)
            p = multmodp(q, p);
        n >>= 1;
    }
    return p;
}

// Update crc with the CRC-16/XMODEM of n zero bytes.
static unsigned crc16x_zeros_bit(unsigned crc, size_t n) {
    return multmodp(x2nmodp(n), crc);
}

【讨论】：