【问题标题】:C: formatting strings for hexdump (char* to another char*)C: hexdump 的格式化字符串(char* 到另一个 char*)
【发布时间】:2019-01-17 14:13:09
【问题描述】:

我想将一个 char* 指针的十六进制转储写入另一个 char*

为此,我使用了this code snippet

#include <stdio.h>

void DumpHex(const void* data, size_t size) {
    char ascii[17];
    size_t i, j;
    ascii[16] = '\0';
    for (i = 0; i < size; ++i) {
        printf("%02X ", ((unsigned char*)data)[i]);
        if (((unsigned char*)data)[i] >= ' ' && ((unsigned char*)data)[i] <= '~') {
            ascii[i % 16] = ((unsigned char*)data)[i];
        } else {
            ascii[i % 16] = '.';
        }
        if ((i+1) % 8 == 0 || i+1 == size) {
            printf(" ");
            if ((i+1) % 16 == 0) {
                printf("|  %s \n", ascii);
            } else if (i+1 == size) {
                ascii[(i+1) % 16] = '\0';
                if ((i+1) % 16 <= 8) {
                    printf(" ");
                }
                for (j = (i+1) % 16; j < 16; ++j) {
                    printf("   ");
                }
                printf("|  %s \n", ascii);
            }
        }
    }
}

并像这样修改它:

#include <stdio.h>

char* DumpHex2(const void* data, size_t size) {
    const int symbolSize = 100;
    char* buffer = calloc(10*size, sizeof(char));
    char* symbol = calloc(symbolSize, sizeof(char));

    char ascii[17];
    size_t i, j;
    ascii[16] = '\0';
    for (i = 0; i < size; ++i) {
        snprintf(symbol, symbolSize, "%02X ", ((unsigned char*)data)[i]);
        strcat(buffer, symbol);
        memset(symbol,0,strlen(symbol));
        if (((unsigned char*)data)[i] >= ' ' && ((unsigned char*)data)[i] <= '~') {
            ascii[i % 16] = ((unsigned char*)data)[i];
        } else {
            ascii[i % 16] = '.';
        }
        if ((i+1) % 8 == 0 || i+1 == size) {
            strcat(buffer, " ");
            if ((i+1) % 16 == 0) {
                snprintf(symbol, symbolSize, "|  %s \n", ascii);
                strcat(buffer, symbol);
                memset(symbol,0,strlen(symbol));
            } else if (i+1 == size) {
                ascii[(i+1) % 16] = '\0';
                if ((i+1) % 16 <= 8) {
                    strcat(buffer, " ");
                }
                for (j = (i+1) % 16; j < 16; ++j) {
                    strcat(buffer, "   ");
                }
                snprintf(symbol, symbolSize, "|  %s \n", ascii);
                strcat(buffer, symbol);
                memset(symbol,0,strlen(symbol));
            }
        }
    }

    free(symbol);

    return buffer;
}

它工作并返回相同的输出:

int main(int argc, char **argv) {
    char* text = "Hello World! é";

    DumpHex(text, strlen(text));

    char* dump = DumpHex2(text, strlen(text));
    printf("%s", dump);
    free(dump);

    return EXIT_SUCCESS;
}

输出:

48 65 6C 6C 6F 20 57 6F  72 6C 64 21 20 C3 A9     |  Hello World! .. 
48 65 6C 6C 6F 20 57 6F  72 6C 64 21 20 C3 A9     |  Hello World! .. 

但是我的修改,即:

snprintf(symbol, symbolSize, "|  %s \n", ascii);
strcat(buffer, symbol);
memset(symbol,0,strlen(symbol));

在我看来不好(我是 C 新手)。有没有办法更轻松地格式化和附加字符串?

【问题讨论】:

    标签: c hex hexdump


    【解决方案1】:

    您不能对未初始化的数据使用strlen()

    char* buffer = malloc(1000000);
    memset(buffer,0,strlen(buffer));
    

    strlen() 无法找出分配的内存大小,因为它依赖于终止的空字符(0'\0'),它可能位于内存中的某个位置,也可能不在指向的内存中通过缓冲区。要么指定memset()中分配的内存大小:

    memset(buffer, 0, 1000000);
    

    或使用calloc() 将分配的内存初始化为零:

    char buffer = calloc(1000000, sizeof(char));  // or calloc(1000000, 1) since sizeof(char) is 1 by definition.
    

    您的代码中可能还有其他问题。例如,您在 main() 中调用 DumpHex2() twice 但从不释放函数分配的内存。分配给symbol 的内存也泄露了。

    如果您将问题更新为包含您希望DumpHex2() 生成的文本的确切格式,则回答会更容易。

    您应该使用isprint() 来确定字符是否可打印。

    更短且恕我直言,更易于阅读和理解:

    #include <ctype.h>   // isprint()
    #include <stddef.h>  // size_t
    #include <stdlib.h>  // malloc(), free()
    #include <string.h>  // strcat()
    #include <stdio.h>   // sprintf()
    
    enum {
        DUMP_BYTES_PER_LINE = 16,
        DUMP_BYTES_GROUP = 8,
        DUMP_CHARS_PER_LINE = DUMP_BYTES_PER_LINE * 4 + DUMP_BYTES_PER_LINE / DUMP_BYTES_GROUP + 4
    };
    
    char* DumpHex(const void* data, size_t size)
    {
        size_t const num_lines = size / DUMP_BYTES_PER_LINE + ((size % DUMP_BYTES_PER_LINE) > 0);
        size_t const result_length = num_lines * DUMP_CHARS_PER_LINE;
    
        char *result = malloc((result_length + 1) * sizeof(*result));
        if (!result)
            return NULL;
    
        memset(result, ' ', result_length);
        result[result_length] = '\0';
    
        char *dump_pos = result;
        char *plain_pos = result + DUMP_BYTES_PER_LINE * 3 + DUMP_BYTES_PER_LINE / DUMP_BYTES_GROUP + 3;
        char unsigned const *src = data;
    
        for (size_t i = 0; i < size; ++i, dump_pos += 3, ++plain_pos) {
    
            sprintf(dump_pos, "%02x ", (int)src[i]);
            dump_pos[3] = ' ';
            *plain_pos = isprint(src[i]) ? src[i] : '.';
    
            if ((i + 1) % DUMP_BYTES_PER_LINE == 0 || i + 1 == size) {
                *++plain_pos = '\n';
    
                size_t const bytes_per_line_left = (i + 1) % DUMP_BYTES_PER_LINE;
                plain_pos[bytes_per_line_left ? -(long long)bytes_per_line_left - 3 : -DUMP_BYTES_PER_LINE - 3] = '|';
    
                dump_pos = plain_pos + 1 - 3;
                plain_pos = dump_pos + DUMP_BYTES_PER_LINE * 3 + DUMP_BYTES_PER_LINE / DUMP_BYTES_GROUP + 5;
            }
            else if ((i + 1) % DUMP_BYTES_GROUP == 0) {
                ++dump_pos;
            }
        }
    
        return result;
    }
    

    【讨论】:

    • 谢谢。修改了我的代码以使用 calloc() 和 free()。我没有调用 DumpHex2() 两次。首先我调用原始 DumpHex(打印到设备),然后调用我修改后的 DumpHex2。我想接收经典的 hexdump 输出:en.wikipedia.org/wiki/Hex_dump(最后一个示例)。
    • 是的,这更容易理解。谢谢!
    • 你为什么用enum {}而不是int const DUMP_BYTES_PER_LINE = 16;
    • @rfg 主要是一种习惯。
    猜你喜欢
    • 1970-01-01
    • 1970-01-01
    • 2013-10-30
    • 2023-04-06
    • 2013-06-27
    • 2012-02-20
    • 2011-01-21
    • 2021-05-23
    • 1970-01-01
    相关资源
    最近更新 更多