如何在不知道每行字符串长度的情况下使用 fscanf答案

【问题标题】：How to use fscanf without knowing the length of strings in every line如何在不知道每行字符串长度的情况下使用 fscanf
【发布时间】：2017-06-15 08:20:23
【问题描述】：

据我了解，有一种方法可以解析输入：

一百万美元退出 $$$ 16
The Cheit and its Punishment $$$ 8
战争与纪念 $$$ 12
Winds of War $$$ 12
如何踢足球 $$$ 12
超短脉冲 $$$ 8
非线性光学 $$$ 8
等等..

“$$$”分隔数据字段的位置。
我正在寻找升级短语：

sscanf(line, " %200[^$][^$][^$]$$$%ld", name, &copies);

所以它适合行号。示例中为 1。

编辑：

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define NAME_LENGTH 200
#define ERROR -1
typedef int BOOL;
#define TRUE 1
#define FALSE 0

typedef struct book{
    char name[NAME_LENGTH];
    long copies;
    struct book *next;
} Book;

Book* create_book(char name[], long copies){
    Book *new_book = (Book*) malloc(sizeof(Book));
    if (new_book != NULL) {
        strcpy(new_book->name, name);
        new_book->next = NULL;
        new_book->copies = copies;
    }
    return new_book;
}

Book* add_first(Book *head, char name[], long copies){
    Book *new_book = create_book(name, copies);
    if (new_book == NULL)
        return NULL;
    new_book->next = head;
    return new_book;
}

Book* add_last(Book *head, char name[], long copies){
    Book *tail;
    Book *new_book = create_book(name, copies);
    if (new_book == NULL)
        return NULL;
    if (head == NULL)
        return new_book;
    tail = head;
    while (tail->next != NULL)
        tail = tail->next;
    tail->next = new_book;
    return head;
}

Book* add_sorted(Book *head, char name[], long copies){
    Book* iter, *prev = NULL;
    Book* new_book = create_book(name, copies);
    if(new_book == NULL)
        return head;
    if (head == NULL)
        return new_book;
    if (!strcmp(new_book->name, head->name)){
        new_book->next = head;
        return new_book;
    }
    iter = head;
    while ((iter != NULL) && (strcmp(new_book->name, head->name))){
        prev = iter;
        iter = iter->next;
    }
    prev->next = new_book;
    new_book->next = iter;
    return head;
}

int length(const Book *head){
    if (head == NULL)
        return 0;
    return 1 + length(head->next);
}

void free_library(Book *head_book){
    if (head_book == NULL)
        return;
    free_library(head_book->next);
    free(head_book);
}

Book* find_book(Book *head, char name[]){
    if (head == NULL)
        return NULL;
    if (strcmp(head->name, name) == 0)
        return head;
    find_book(head->next, name);
    return NULL;
}

Book* delete_book(Book *head, char name[]){
    Book *iter = head, *prev = NULL;
    if (head == NULL)
        return head;
    if ((!strcmp(head->name, name)) == 1){
        iter = head->next;
        free(head);
        return iter;
    }
    while (iter->next != NULL){
        if ((!strcmp(head->name, name)) == 1){
            prev->next = iter->next;
            free(iter);
            break;
        }
        prev = iter;
        iter = iter->next;
    }
    return head;
}

Book* initBooksList(FILE *input){
    Book *head_book = NULL, *existing_book = NULL;
    long copies = 0;
    char line[256] = {0}, name[NAME_LENGTH];
    if (input == NULL){
        printf("File did not open. Exit..\n");
        return NULL;
    }
    while(!feof(input)){
        if((fgets(line, 256, input) != NULL) && (head_book == NULL)){
            sscanf(line, " %200[^$][^$][^$]$$$%ld", name, &copies);
            printf("%s\n%ld\n", name, copies);
            head_book = create_book(name, copies);
            strcpy(line, "");
            strcpy(name, "");
            copies = 0;
        }
        else{
            sscanf(line, " %200[^$][^$][^$]$$$%ld", name, &copies);
            existing_book = find_book(head_book, name);
            if(existing_book != NULL){
                existing_book->copies += copies;
                printf("%s\n%ld\n", name, existing_book->copies);
            }
            else{
                add_sorted(head_book, name, copies);
                printf("%s\n%ld\n", name, copies);
                strcpy(line, "");
                strcpy(name, "");
                copies = 0;
            }
        }
    }
    return head_book;
}

void storeBooks(Book *head_book){

}

void returnBook(Book *head_book){

}

void borrowBook(Book *head_book){

}

int main(int argc, char *argv[]){
    int i = 0;
    FILE *ptr;
    printf("%d\n", argc);
    for(i = 0; i < argc; i++)
        printf("argv[%d] = %s\n", i, argv[i]);
    ptr = fopen(argv[1], "r");
    initBooksList(ptr);
    return 0;
}

【问题讨论】：

通常的做法是使用fgets将整行读入一个合适的大缓冲区，然后使用sscanf/strtok/whatever进行解析。
你试过了吗？你有代码要出示吗？
你有最大行长度吗？而且我不确定fscanf（和家人）可能是这里的正确选择......相反，我可能会用fgets和strstr做点什么。
@Gem 还没有，我只是在这里与更优秀的人合作:)
@Someprogrammerdude 承诺输入是非敌对的。此外，每行的第一段的最大长度为 200 个字符（书名或程序执行功能的命令，在命令后输入）。

标签： c fgets scanf strstr

【解决方案1】：

如果您知道最长的标题是 200 个字符，如 your comment 所示，您可以为此分配一个数组（包括空终止符的空间）。

您可以使用fscanf() 使用格式字符串" %200[^$]$$$%d" 解析文件的行。第一个空格告诉fscanf() 跳过前导空格，这些空格可能是以前的 I/O 操作留下的。下一个转换说明符是%200[^$]，它告诉fscanf() 将任何字符读入字符串，直到遇到$。 $ 留在输入流中。请注意，此处指定最大宽度为 200 以防止缓冲区溢出。格式字符串中接下来的三个字符 $$$ 必须出现在输入中，并且在到达最终转换说明符 %d 之前匹配。

#include <stdio.h>
#include <stdlib.h>

#define MAX_TITLE  201

int main(void)
{
    /* Open file, and check for success */
    FILE *fp = fopen("data.txt", "r");
    if (fp == NULL) {
        perror("Unable to open file");
        exit(EXIT_FAILURE);
    }

    char title[MAX_TITLE];
    int price;

    while (fscanf(fp, " %200[^$]$$$%d", title, &price) == 2) {
        printf("Title: %s --- Price: $%d\n", title, price);
    }

    fclose(fp);

    return 0;
}

这是针对您的输入文件运行时的程序输出：

Title: The Cheit and its Punishment  --- Price: $8
Title: War and Remembrance  --- Price: $12
Title: Winds of War  --- Price: $12
Title: How to Play Football  --- Price: $12
Title: Ultrashort Pulses  --- Price: $8
Title: Nonlinear Optics  --- Price: $8

上述代码中对fscanf() 的调用会在输入流中每一行的最后一个数字后面留下空白字符；这就是为什么需要格式字符串中的前导空格的原因。更好的解决方案是使用fgets() 获取一行输入，并使用sscanf() 解析该行。应该分配一个buffer 来保存每行读取的内容；在这里慷慨的分配是好的，因为它减少了长输入在输入流中留下字符的机会。如果有可能输入更长的时间，则应在下次调用 fgets() 之前添加代码以清除输入流。

这种方法的一个优点是，由于读取了包括\n 在内的整行，因此无需像以前那样跳过前导空白字符。另一个优点是可以忽略最终数字之后的虚假字符，或由代码处理；由于线路已存储，因此可以根据需要多次检查和扫描。最后一个数字后面的字符会导致第一个版本出现问题，该版本仅配备跳过前导空格。

#include <stdio.h>
#include <stdlib.h>

#define BUF_SZ     1000
#define MAX_TITLE  201

int main(void)
{
    /* Open file, and check for success */
    FILE *fp = fopen("data.txt", "r");
    if (fp == NULL) {
        perror("Unable to open file");
        exit(EXIT_FAILURE);
    }

    char buffer[BUF_SZ];
    char title[MAX_TITLE];
    int price;
    size_t lnum = 0;

    while (fgets(buffer, BUF_SZ, fp) != NULL) {
        ++lnum;
        if (sscanf(buffer, "%200[^$]$$$%d", title, &price) == 2) {
            printf("Title: %s --- Price: $%d\n", title, price);
        } else {
            fprintf(stderr, "Format error in line %zu\n", lnum);
        }
    }

    fclose(fp);

    return 0;
}

在此处使用fgets() 可以更灵活地检查输入。要处理 $ 是标题的一部分的情况，您可以使用 strstr() 首先找到分隔符 " $$$"，然后在循环中将直到分隔符的字符复制到 title[] 数组中。由于strstr() 返回一个指向找到的字符串的指针，这个指针可以交给sscanf() 来挑选出最终的数字。如果未找到字符串，strstr() 函数将返回一个空指针，这可用于识别存在格式问题的行。注意strstr()在string.h中：

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define BUF_SZ     1000
#define MAX_TITLE  201

int main(void)
{
    /* Open file, and check for success */
    FILE *fp = fopen("data.txt", "r");
    if (fp == NULL) {
        perror("Unable to open file");
        exit(EXIT_FAILURE);
    }

    char buffer[BUF_SZ];
    char title[MAX_TITLE];
    int copies;
    size_t lnum = 0;

    while (fgets(buffer, BUF_SZ, fp) != NULL) {
        ++lnum;

        /* Find delimiter string in buffer */
        char *title_end = strstr(buffer, " $$$");
        if (title_end == NULL) {
            fprintf(stderr, "Format error in line %zu\n", lnum);
            continue;
        } else {

            /* Copy characters into title until space before delimiter */
            char *curr = buffer;
            size_t i = 0;
            while (curr < title_end && i < MAX_TITLE) {
                title[i] = buffer[i];
                ++curr;
                ++i;
            }
            title[i] = '\0';
        }

        if (sscanf(title_end, " $$$%d", &copies) == 1) {
            printf("Title: %s --- Copies: %d\n", title, copies);
        } else {
            fprintf(stderr, "Format error in line %zu\n", lnum);
        }
    }

    fclose(fp);

    return 0;

}

这是一个修改后的输入文件：

The Cheit and its Punishment $$$ 8
War and Remembrance $$$ 12
Winds of War $$$ 12
A million $ exit $$$ 16
How to Play Football $$$ 12
Ultrashort Pulses $$$ 8
Nonlinear Optics $$$ 8

以及结果输出：

Title: The Cheit and its Punishment --- Copies: 8
Title: War and Remembrance --- Copies: 12
Title: Winds of War --- Copies: 12
Title: A million $ exit --- Copies: 16
Title: How to Play Football --- Copies: 12
Title: Ultrashort Pulses --- Copies: 8
Title: Nonlinear Optics --- Copies: 8

【讨论】：

我得到的最好和最全面的解释。到目前为止，一直在尝试使用 fgets 和 strtok，但没有成功。使用带有“%200[^$]$$$%d”输入的 sscanf 也可以吗？它应该是什么样子？
添加了一个新的“错误”，该短语应处理主要问题，有什么想法吗？
@OriAshkenazi--我误解了您在上次编辑中添加的问题...答案已更新。

【解决方案2】：

这应该让你知道你可以做什么：

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int main()
{
  char line[] = "The Cheit and its Punishment $$$ 8";

  char *seppointer = strchr(line, '$');
  *seppointer = 0;    
  int price = atoi(seppointer + 4);

  printf("Title: %s\nPrice: %d\n", line, price);
}

免责声明：没有错误检查，假设该行具有所需的格式。

【讨论】：

我很困惑，忘记了“*ptr”调用是您获取指针“内容”的方式，这就是我删除最后一个问题的原因
再次感谢，这对我在我的代码中实现非常有帮助。
@OriAshkenazi 随时接受答案和/或投票。
您可能希望使用strstr 来查找完整的分隔符。