拆分字符串中的空格并将它们存储在没有库的 C 表中答案

【问题标题】：Split whitespaces in a string and store them in table in C without libraries拆分字符串中的空格并将它们存储在没有库的 C 表中
【发布时间】：2018-08-08 06:45:23
【问题描述】：

上周我为我的班级分配了一个作业，我必须用空格、制表符和\n 作为分隔符来分割一个字符串，并将每个“单词”存储在一个数组中。我想我已经很接近了，但是我的输出很奇怪，所以如果有人能说出我忘记了什么，那就太好了。唯一的问题是我只能使用malloc。

char    **ft_split_whitespaces(char *str)
{
    int     i;
    int     j;
    int     k;
    char    **tab;

    i = 0;
    j = 0;
    k = 0;
    tab = (char**)malloc(sizeof(*tab) * (ft_nb_words(str) + 1));
    while (str[i])
    {
        while (str[i] == ' ' || str[i] == '\t' || str[i] == '\n')
            i++;
        if (str[i])
        {
            if ((tab[j] = (char*)malloc(sizeof(char) * (ft_len_word(str + i) + 1))) == NULL)
                return (NULL);
            while (k < ft_len_word(str + i))
                tab[j][k++] = str[i++];
            tab[j++][k] = '\0';
            k = 0;
        }
    }
    tab[j] = NULL;
    return (tab);
}

返回单词长度和单词数的函数工作正常，所以我认为问题来自主函数。

【问题讨论】：

请注意，如果循环中的malloc() 失败，则会泄漏内存。但是，这不是您当前问题的一部分。
这个循环很可疑：while (k < ft_len_word(str + i)) tab[j][k++] = str[i++]; — 你在增加i，但我认为你不想这样做。可能在循环外捕获长度；那么你可能会没事。但就目前而言，我认为你有问题。
好的，就是这样，非常感谢
我该如何解决内存泄漏问题？
在执行错误返回之前，您需要一个循环来释放已分配的字符串以及另一个调用来释放指针向量。严格来说，您也应该错误检查第一次分配。

标签： c string malloc

【解决方案1】：

如果你用一个指针指向最后一次出现的特定字符('' '\n' \t)，这可以很容易地处理。

   char    **ft_split_whitespaces(char *str)
    {
    int     i;
    int     j;
    int     k;
    char    **tab;
    char *prevToken=str;

    i = 0;
    j = 0;
    k = 0;
    tab = (char**)malloc(sizeof(*tab) * (ft_nb_words(str) + 1));
    while (str[i] != '\0')
    {

        if(str[i] == ' ' || str[i] == '\t' || str[i] == '\n')
        {
            i++;
            if ((tab[j] = (char*)malloc(sizeof(char) * (ft_len_word(prevToken) + 1))) == NULL)
                return (NULL);

            while (k < ft_len_word(prevToken) &&
                (prevToken[k] !=' ' &&  prevToken[k] != '\t' &&  prevToken[k] != '\n'))
                tab[j][k] = prevToken[k++];

            printf("tab=%s\n", tab[j]);
            k = 0;
            j++;
            prevToken=(str+i);
        }
        else{
           i++;
         }
    }

      /* to handle the last word */
            if ((tab[j] = (char*)malloc(sizeof(char) * (ft_len_word(prevToken) + 1))) == NULL)
                return (NULL);

            while (k < ft_len_word(prevToken) &&
                (prevToken[k] !=' ' &&  prevToken[k] != '\t' &&  prevToken[k] != '\n'))
                tab[j][k] = prevToken[k++];
            printf("tab=%s\n", tab[j]);


    tab[j] = NULL;
    return (tab);
    }

【讨论】：

【解决方案2】：

以下代码包含一些有用的 C 函数的实现。

您搜索的函数是strtok()。在代码中还实现了函数strspn() 和strpbrk()，因为strtok() 使用了它们。

解决这类问题最好的办法就是研究C标准函数的实现。

代码存储最多 100 个令牌（提取的单词）的副本。

您必须记住，函数strtok() 会修改源字符串的内容，插入“\0”以终止找到的字符串。

这里实现的功能是：

mystrtok()
mystrspn()
mystrpbrk()

代码：

#include <stdio.h>
#include <string.h> /* for the use of strcpy fn */
#include <malloc.h>

char * mystrtok (char * s, char * delim);
size_t mystrspn (const char *s, const char *accept);
char * mystrpbrk (const char *s, const char *accept);

char * mystrpbrk (const char *s, const char *accept)
{
    while (*s != '\0')
    {
        const char *a = accept;
        while (*a != '\0')
            if (*a++ == *s)
                return (char *) s;
        ++s;
    }

    return NULL;
}

size_t mystrspn (const char *s, const char *accept)
{
    const char *p;
    const char *a;
    size_t count = 0;

    for (p = s; *p != '\0'; ++p)
    {
        for (a = accept; *a != '\0'; ++a)
            if (*p == *a)
                break;
        if (*a == '\0')
            return count;
        else
            ++count;
    }

    return count;
}

char * mystrtok (char *s, char *delim)
{
    char *token;
    static char *olds;

    if (s == NULL) {
        s = olds;
    }

    /* Scan leading delimiters.  */
    s += mystrspn (s, delim);
    if (*s == '\0')
    {
        olds = s;
        return NULL;
    }

    /* Find the end of the token.  */
    token = s;
    s = mystrpbrk (token, delim);
    if (s == NULL)
    {
        /* This token finishes the string.  */
        while(*olds)
            olds++;
    }
    else
    {
        /* Terminate the token and make OLDS point past it.  */
        *s = '\0';
        olds = s + 1;
    }
    return token;
}

int main(void)
{
    char str[] = "I have an orange\tYou have some bananas\nShe has three pineapples\n";
    char * x = NULL;

    int cnt=0,i;


    char **store;

    /* Stores a max of 100 strings */
    store = malloc(sizeof(char *)*100);

    /* The total space for the tokens is
       max the entire string + '\0' */
    store[0] = malloc(strlen(str)+1);

    /* Extract the first token */
    x=mystrtok(str," \n");
    while(x) {
        printf("Storing %s\n",x);

        /* Store a copy of the token */
        strcpy(store[cnt],x);
        store[cnt+1]=store[cnt]+strlen(x)+1;
        cnt++;

        /* extract the next token */
        x=mystrtok(NULL," \n\t");
    }

    for(i=0;i<cnt;i++)
        printf("Stored %s\n",store[i]);

    free(store[0]);
    free(store);

    return 0;
}

【讨论】：

【解决方案3】：

您的代码效率低下，因为您调用 ft_len_word 的次数太多了，但它似乎与 malloc 失败时的未定义行为分开。

问题可能在于您的ft_len_word 或ft_nb_words 版本。您应该发布一个完整的程序来展示问题，以便进行适当的调查。

这是一个不使用这些功能的修改版本：

#include <stdlib.h>

int ft_is_space(char c) {
    return (c == ' ' || c == '\t' || c == '\n');
}

char **ft_split_whitespaces(const char *str) {
    int i, j, k, len, in_space, nb_words;
    char **tab;

    nb_words = 0;
    in_space = 1;
    for (i = 0; str[i]; i++) {
        if (ft_is_space(str[i]) {
            in_space = 1;
        } else {
            nb_words += in_space;
            in_space = 0;
        }
    }
    tab = malloc(sizeof(*tab) * (nb_words + 1));
    if (tab != NULL) {
        i = 0;
        j = 0;
        while (str[i]) {
            while (ft_is_space(str[i]))
                i++;
            if (str[i]) {
                for (len = 1; str[i + len] && !ft_is_space(str[i + len]); len++)
                     continue;
                if ((tab[j] = malloc(sizeof(*tab[j]) * (len + 1))) == NULL) {
                     while (j > 0)
                         free(tab[--j]);
                     free(tab);
                     return NULL;
                }
                for (k = 0; k < len; k++)
                    tab[j][k] = str[i + k];
                tab[j++][len] = '\0';
                i += len;
            }
        }
        tab[j] = NULL;
    }
    return tab;
}

【讨论】：

【解决方案4】：

如果您不想使用库函数或需要与strtok() 提供的功能不同的功能，则需要实现您的strtok() 版本。

下面是一个简单的字符串标记器，与标准库的strtok() 不同，它在连续分隔符的情况下仍然返回一个值。我使用这个函数来解析 CSV 文件，这些文件有时包含空单元格，因此是连续的 , 字符。标准库的strtok() 对我不起作用，所以我必须实现自己的功能。

我使用了其他辅助函数，它们现在是 a simple string library I maintain on GitHub, called zString 的一部分。

下面是它的行为方式

Example Usage
      char str[] = "A,B,,,C";
      printf("1 %s\n",zstring_strtok(s,","));
      printf("2 %s\n",zstring_strtok(NULL,","));
      printf("3 %s\n",zstring_strtok(NULL,","));
      printf("4 %s\n",zstring_strtok(NULL,","));
      printf("5 %s\n",zstring_strtok(NULL,","));
      printf("6 %s\n",zstring_strtok(NULL,","));

  Example Output
      1 A
      2 B
      3 ,
      4 ,
      5 C
      6 (null)

和代码

char *zstring_strtok(char *str, const char *delim) {
    static char *static_str=0;      /* var to store last address */
    int index=0, strlength=0;       /* integers for indexes */
    int found = 0;                  /* check if delim is found */

    /* delimiter cannot be NULL
    * if no more char left, return NULL as well
    */
    if (delim==0 || (str == 0 && static_str == 0))
        return 0;

    if (str == 0)
        str = static_str;

    /* get length of string */
    while(str[strlength])
        strlength++;

    /* find the first occurrence of delim */
    for (index=0;index<strlength;index++)
        if (str[index]==delim[0]) {
            found=1;
            break;
        }

    /* if delim is not contained in str, return str */
    if (!found) {
        static_str = 0;
        return str;
    }

    /* check for consecutive delimiters
    *if first char is delim, return delim
    */
    if (str[0]==delim[0]) {
        static_str = (str + 1);
        return (char *)delim;
    }

    /* terminate the string
    * this assignment requires char[], so str has to
    * be char[] rather than *char
    */
    str[index] = '\0';

    /* save the rest of the string */
    if ((str + index + 1)!=0)
        static_str = (str + index + 1);
    else
        static_str = 0;

        return str;
}

【讨论】：