【问题标题】:Implementation of strtok() functionstrtok() 函数的实现
【发布时间】:2015-05-09 23:34:01
【问题描述】:

我需要编写我的函数 strtok。下面是我的代码。问题是 - 我无法显示结果字符串。在代码中我使用strcpy(),然后显示新数组。 是否可以仅使用指针 *str 显示字符串?

#include <stdio.h>
#include <string.h>   

char* my_strtok(char* s, char* delm){
    char W[100];
    int i = 0, k = 0, j = 0;
    char *ptr;
    static char *Iterator;
    ptr = s;

    if (s == NULL){
        s = Iterator;
    }
    while (s[i] != '\0'){
        j = 0;
        while (delm[j] != '\0'){
            if (s[i] != delm[j])
                W[k] = s[i];
            else goto It;
            j++;
        }
        ptr++;
        i++;
        k++;
    }
It:
    W[i] = 0;
    Iterator = ++ptr;
    return W;
}

int main(void){
    char s[100];
    char delm[] = " ";
    gets(s);
    char newstr[100];
    char *str = my_strtok(s, delm);
    strcpy(newstr, str);
    printf("%s", newstr);
    return 0;
}

【问题讨论】:

标签: c strtok


【解决方案1】:

这里已经讨论了 strtok 的内部实现:

How does strtok() split the string into tokens in C?

在您的实现类型中(称它为您的类型,因为它与实际的完全不同),您没有动态地为局部变量“W”分配任何内存。因此,当您返回它时,不能保证您将正确接收字符串,因为在调用函数中不再为它保留本地分配给“W”的内存。除此之外,代码中还使用了许多不必要的变量。而且,它需要适当的重组。为了您更好地理解,我修改了您的代码(尽可能保持您的风格)以完成所需的工作:

#include <stdio.h>
#include <string.h>  
#include <malloc.h>

char* my_strtok(char* s, char* delm)
{
    static int currIndex = 0;
    if(!s || !delm || s[currIndex] == '\0')
    return NULL;
    char *W = (char *)malloc(sizeof(char)*100);
    int i = currIndex, k = 0, j = 0;

    while (s[i] != '\0'){
        j = 0;
        while (delm[j] != '\0'){
            if (s[i] != delm[j])
                W[k] = s[i];
            else goto It;
            j++;
        }

        i++;
        k++;
    }
It:
    W[i] = 0;
    currIndex = i+1;
    //Iterator = ++ptr;
    return W;
}

int main(void)
{
    char s[100] = "my name is khan";
    char delm[] = " ";
    //char newstr[100];
    char *str = my_strtok(s, delm);
    while(str){
        printf("%s", str);
        free(str);
        str = my_strtok(s, delm);
    }

    return 0;
}

【讨论】:

  • 添加这 4 行(在 static int currIndex 之后)将使这段代码更具弹性,因此我们可以像 my_strtok(NULL, delm);静态字符 * 备份字符串; if (s != NULL) { backup_string = s; } else s = 备份字符串;
【解决方案2】:

赏金免责声明

赏金:“从可靠和/或官方来源寻找答案。”

我认为GNU GLibC 是一个可信的官方来源,对吧?您可以将GLibC 2.22 here 下载为.tar.gz (25mb)。


提取来源后:

string/strtok.c

#include <string.h>


static char *olds;

#undef strtok

#ifndef STRTOK
# define STRTOK strtok
#endif

/* Parse S into tokens separated by characters in DELIM.
   If S is NULL, the last string strtok() was called with is
   used.  For example:
    char s[] = "-abc-=-def";
    x = strtok(s, "-");     // x = "abc"
    x = strtok(NULL, "-=");     // x = "def"
    x = strtok(NULL, "=");      // x = NULL
        // s = "abc\0=-def\0"
*/
char *
STRTOK (char *s, const char *delim)
{
  char *token;

  if (s == NULL)
    s = olds;

  /* Scan leading delimiters.  */
  s += strspn (s, delim);
  if (*s == '\0')
    {
      olds = s;
      return NULL;
    }

  /* Find the end of the token.  */
  token = s;
  s = strpbrk (token, delim);
  if (s == NULL)
    /* This token finishes the string.  */
    olds = __rawmemchr (token, '\0');
  else
    {
      /* Terminate the token and make OLDS point past it.  */
      *s = '\0';
      olds = s + 1;
    }
  return token;
}


string/strspn.c
#include <string.h>

#undef strspn
#ifndef STRSPN
#define STRSPN strspn
#endif

/* Return the length of the maximum initial segment
   of S which contains only characters in ACCEPT.  */
size_t
STRSPN (const char *s, const char *accept)
{
  const char *p;
  const char *a;
  size_t count = 0;

  for (p = s; *p != '\0'; ++p)
    {
      for (a = accept; *a != '\0'; ++a)
    if (*p == *a)
      break;
      if (*a == '\0')
    return count;
      else
    ++count;
    }

  return count;
}
libc_hidden_builtin_def (strspn)


string/strpbrk.c
#include <string.h>

#undef strpbrk

#ifndef STRPBRK
#define STRPBRK strpbrk
#endif

/* Find the first occurrence in S of any character in ACCEPT.  */
char *
STRPBRK (const char *s, const char *accept)
{
  while (*s != '\0')
    {
      const char *a = accept;
      while (*a != '\0')
    if (*a++ == *s)
      return (char *) s;
      ++s;
    }

  return NULL;
}
libc_hidden_builtin_def (strpbrk)


sysdeps/x86_64/rawmemchr.S
#include <sysdep.h>

    .text
ENTRY (__rawmemchr)
    movd    %rsi, %xmm1
    mov %rdi, %rcx

    punpcklbw %xmm1, %xmm1
    punpcklbw %xmm1, %xmm1

    and $63, %rcx
    pshufd  $0, %xmm1, %xmm1

    cmp $48, %rcx
    ja  L(crosscache)

    movdqu  (%rdi), %xmm0
    pcmpeqb %xmm1, %xmm0
/* Check if there is a match.  */
    pmovmskb %xmm0, %eax
    test    %eax, %eax

    jnz L(matches)
    add $16, %rdi
    and $-16, %rdi
    jmp L(loop_prolog)

    .p2align 4
L(crosscache):
    and $15, %rcx
    and $-16, %rdi
    movdqa  (%rdi), %xmm0

    pcmpeqb %xmm1, %xmm0
/* Check if there is a match.  */
    pmovmskb %xmm0, %eax
/* Remove the leading bytes.  */
    sar %cl, %eax
    test    %eax, %eax
    je  L(unaligned_no_match)
/* Check which byte is a match.  */
    bsf %eax, %eax

    add %rdi, %rax
    add %rcx, %rax
    ret

    .p2align 4
L(unaligned_no_match):
    add $16, %rdi

    .p2align 4
L(loop_prolog):
    movdqa  (%rdi), %xmm0
    pcmpeqb %xmm1, %xmm0
    pmovmskb %xmm0, %eax
    test    %eax, %eax
    jnz L(matches)

    movdqa  16(%rdi), %xmm2
    pcmpeqb %xmm1, %xmm2
    pmovmskb %xmm2, %eax
    test    %eax, %eax
    jnz L(matches16)

    movdqa  32(%rdi), %xmm3
    pcmpeqb %xmm1, %xmm3
    pmovmskb %xmm3, %eax
    test    %eax, %eax
    jnz L(matches32)

    movdqa  48(%rdi), %xmm4
    pcmpeqb %xmm1, %xmm4
    add $64, %rdi
    pmovmskb %xmm4, %eax
    test    %eax, %eax
    jnz L(matches0)

    test    $0x3f, %rdi
    jz  L(align64_loop)

    movdqa  (%rdi), %xmm0
    pcmpeqb %xmm1, %xmm0
    pmovmskb %xmm0, %eax
    test    %eax, %eax
    jnz L(matches)

    movdqa  16(%rdi), %xmm2
    pcmpeqb %xmm1, %xmm2
    pmovmskb %xmm2, %eax
    test    %eax, %eax
    jnz L(matches16)

    movdqa  32(%rdi), %xmm3
    pcmpeqb %xmm1, %xmm3
    pmovmskb %xmm3, %eax
    test    %eax, %eax
    jnz L(matches32)

    movdqa  48(%rdi), %xmm3
    pcmpeqb %xmm1, %xmm3
    pmovmskb %xmm3, %eax

    add $64, %rdi
    test    %eax, %eax
    jnz L(matches0)

    and $-64, %rdi

    .p2align 4
L(align64_loop):
    movdqa  (%rdi), %xmm0
    movdqa  16(%rdi), %xmm2
    movdqa  32(%rdi), %xmm3
    movdqa  48(%rdi), %xmm4

    pcmpeqb %xmm1, %xmm0
    pcmpeqb %xmm1, %xmm2
    pcmpeqb %xmm1, %xmm3
    pcmpeqb %xmm1, %xmm4

    pmaxub  %xmm0, %xmm3
    pmaxub  %xmm2, %xmm4
    pmaxub  %xmm3, %xmm4
    pmovmskb %xmm4, %eax

    add $64, %rdi

    test    %eax, %eax
    jz  L(align64_loop)

    sub $64, %rdi

    pmovmskb %xmm0, %eax
    test    %eax, %eax
    jnz L(matches)

    pmovmskb %xmm2, %eax
    test    %eax, %eax
    jnz L(matches16)

    movdqa  32(%rdi), %xmm3
    pcmpeqb %xmm1, %xmm3

    pcmpeqb 48(%rdi), %xmm1
    pmovmskb %xmm3, %eax
    test    %eax, %eax
    jnz L(matches32)

    pmovmskb %xmm1, %eax
    bsf %eax, %eax
    lea 48(%rdi, %rax), %rax
    ret

    .p2align 4
L(matches0):
    bsf %eax, %eax
    lea -16(%rax, %rdi), %rax
    ret

    .p2align 4
L(matches):
    bsf %eax, %eax
    add %rdi, %rax
    ret

    .p2align 4
L(matches16):
    bsf %eax, %eax
    lea 16(%rax, %rdi), %rax
    ret

    .p2align 4
L(matches32):
    bsf %eax, %eax
    lea 32(%rax, %rdi), %rax
    ret

    .p2align 4
L(return_null):
    xor %rax, %rax
    ret

END (__rawmemchr)

weak_alias (__rawmemchr, rawmemchr)
libc_hidden_builtin_def (__rawmemchr)

【讨论】:

    【解决方案3】:

    实际上这是我对一般情况的解决方案...当 str 为 char* 时,您无法写入。

    这里是:

    And here is the link to github资源:

    #include<stdio.h>
    #include<stdlib.h>
    #include<string.h>
    
    #define DICT_LEN 256
    
    int *create_delim_dict(char *delim)
    {
        int *d = (int*)malloc(sizeof(int)*DICT_LEN);
        memset((void*)d, 0, sizeof(int)*DICT_LEN);
    
        int i;
        for(i=0; i< strlen(delim); i++) {
            d[delim[i]] = 1;
        }
        return d;
    }
    
    
    
    char *my_strtok(char *str, char *delim)
    {
    
        static char *last, *to_free;
        int *deli_dict = create_delim_dict(delim);
    
        if(!deli_dict) {
            return NULL;
        }
    
        if(str) {
            last = (char*)malloc(strlen(str)+1);
            if(!last) {
                free(deli_dict);
            }
            to_free = last;
            strcpy(last, str);
        }
    
        while(deli_dict[*last] && *last != '\0') {
            last++;
        }
        str = last;
        if(*last == '\0') {
            free(deli_dict);
            free(to_free);
            return NULL;
        }
        while (*last != '\0' && !deli_dict[*last]) {
            last++;
        }
    
        *last = '\0';
        last++;
    
        free(deli_dict);
        return str;
    }
    
    int main()
    {
        char * str = "- This, a sample string.";
        char *del = " ,.-";
        char *s = my_strtok(str, del);
        while(s) {
            printf("%s\n", s);
            s = my_strtok(NULL, del);
        }
        return 0;
    }
    

    【讨论】:

      【解决方案4】:
      char * strtok2(char *str, const char *delim)
      {
      static char *nxt; /* static variable used to advance the string to replace delimiters */
      static int size;  /* static variable used to count until the end of the string        */
      
      /* IMPORTANT: any advance to 'nxt' must be followed by a diminution of 'size', and vice verce */
      
      int i; /* counter of delimiter(s) in string */
      
      /* initialize the string when strtok2 is first calles and supplied with a valid string */
      if(str != NULL)
      {
          nxt = str;
          size = strlen(str);
      }
      
      /* if we havn't reached the end of the string, any other call to strtok2 with NULL will come here */
      else if(size > 0)
      {
          nxt++;      /* last run left nxt on a null terminator, so we have to advance it */
          size--;     /* any advancement must follow diminution of size                   */
          str = nxt;  /* string now points to the first char after the last delimiter     */ 
      }
      
      /* if we reached the end of string, return NULL pointer */
      else
      {
          str = NULL;
      }
      
      /* nxt is used to advance the string until a delimiter or a series of delimiters are encountered; 
       * it then stops on the last delimiter which has turned to NULL terminator
       */
      while(*nxt)
      {
          i = strspn(nxt, delim);
          while(i > 1)        /* turns delimiters to NULL terminator (except the last one) */
          {
              *nxt = '\0';
              nxt++;
              size--;
              i--;
          }                   /* in the last delimiter we have to do something a      */
          if(1 == i)          /* bit different we have to actually take nxt backwards */
          {                   /* one step, to break the while(*nxt) loop              */
              *nxt = '\0';
              if(size > 1)    /* if the delimiter is last char, don't do next lines   */
              {
                  nxt--;
                  size++;     /* nxt is diminished so size increases                    */
              }
          }
          nxt++;          /* if no delimiter is found, advance nxt                  */
          size--;         /* in case of the last delimiter in a series, we took nxt */
      }                   /* a step back, so now moving it a step forward means     */
                          /* it rests on a NULL terminator                          */
      return str;
      }
      

      【讨论】:

        【解决方案5】:

        这是一个实现,根据您的需要增加输入缓冲区,并增加对 strtok 的调用次数,其中 str1 为 NULL 以取回更多令牌

        #include<stdio.h>
        #include<string.h>
        #include<stdlib.h>
        char *ret_nondelim(char *str1,char *str2);
        char *my_strtok(char *str1,char *str2);
        char *ret_noofbytes(char *str1,char *str2);
        int main()
        {
            char str1[20];
            char str2[20];
            char *res;
            printf("enter string one\n");
            scanf("%s",str1);
            printf("enter string two\n");
            scanf("%s",str2);
            res=my_strtok(str1,str2);
            if(res)
                printf("%s\n",res);
            else
                printf("returned NULL\n");
            free(res);
            res=my_strtok(NULL,str2);
            if(res)
                printf("%s\n",res);
            else
                printf("returned NULL\n");
            free(res);
            res=my_strtok(NULL,str2);
            if(res)
                printf("%s\n",res);
            else
                printf("returned NULL\n");
            free(res);
        }
        char *my_strtok(char *str1,char *str2)
        {
            static char *str1_cpy;
            static int i;
            char *ptr;
            int flag=0;
            if(str1!=NULL)
            {
                str1_cpy=ret_nondelim(str1,str2);      //get location of non delimiting byte
                str1=ret_noofbytes(str1_cpy,str2);  //scan and get location of delimitng byte
                if((str1-str1_cpy))         //no of bytes = non delimiting location - delimting location
                {
                    ptr=malloc(str1-str1_cpy+1);        //malloc location and and store the string`enter code here`
                    int k;
                    for(k=0;k<(str1-str1_cpy);k++)
                        ptr[k]=str1_cpy[k];
                    ptr[k]='\0';
                    str1_cpy=str1;              //save pointer for next iteration
                    return ptr;             //return pointer
                }
                else
                    return NULL;
        
            }
            else
            {
                str1_cpy=ret_nondelim(str1_cpy,str2);   //same as above but pass saved pointer 
                str1=ret_noofbytes(str1_cpy,str2);
                if((str1-str1_cpy))
                {
                    ptr=malloc(str1-str1_cpy+1);
                    int k;
                    for(k=0;k<(str1-str1_cpy);k++)
                        ptr[k]=str1_cpy[k];
                    ptr[k]='\0';
                    str1_cpy=str1;          //save pointer for next iteration
                    return ptr;
                }
                else
                    return NULL;
            }
        }
        char *ret_nondelim(char *str1,char *str2)
        {
            int flag=0;
            for(;*(str1);)
            {
                for(int j=0;j<strlen(str2);j++)
                {
                    if(*(str1)==*(str2+j))          //check if slected byte is non delimiting byte
                    {
                        str1++;             //break if true go check next byte
                        break;
                    }
                    else
                        flag++;             //shows that selected byte is none of the delimitng byte
                }
                if(flag==strlen(str2))              //(above comment)-so non delimiting byte found return that pointer to caller
                    break;
                else
                    flag=0;
            }
            return str1;
        }
        
        char *ret_noofbytes(char *str1,char *str2)
        {
            int flag=0;
            for(;*(str1);)
            {
                for(int k=0;k<strlen(str2);k++)
                {
                    if(*(str2+k)==*(str1))          //check if selected bytes is delimiting byte
                    {
                        flag=1;
                        break;              //break twice if true ie .flag==1
                    }
                }
                if(flag==1)
                    break;
                else
                    str1++;                 //if not found go check next byte
            }
            return str1;                        //return the point where delimiting byte was found
        }
        

        【讨论】:

        • 你想说什么?请添加一些内容,以便人们了解您的建议。
        【解决方案6】:
        Here is the code which explains how strtok works in C.
        
        #include <stdio.h>
        #include <string.h>
        
        #define SIZE_OF_STRING 50 
        #define SIZE_OF_DELIMITER 5
        
        /* Make the temp_ptr as static, so it will hold the previous pointing address */
        static char *temp_ptr = NULL;
        
        char *string_token(char *str, char *delimiter)
        {
            char *final_ptr = NULL;  `enter code here`
            /* Flag has been defined as static to avoid the parent function loop
             * runs infinitely after reaching end of string.
             */ 
            static int flag = 0;
            int i, j;
        
            if (delimiter == NULL) {
                return NULL;
            }
        
            /* If flag is 1, it will denote the end of the string */
            if (flag == 1) {
                return NULL;
            }
        
            /* The below condition is to avoid temp_ptr is getting assigned 
             * with NULL string from the parent function main. Without
             * the below condition, NULL will be assigned to temp_ptr 
             * and final_ptr, so accessing these pointers will lead to
             * segmentation fault.
             */
            if (str != NULL) { 
                temp_ptr = str; 
            }
        
            /* Before function call ends, temp_ptr should move to the next char,
             * so we can't return the temp_ptr. Hence, we introduced a new pointer
             * final_ptr which points to temp_ptr.
             */
            final_ptr = temp_ptr;
        
            printf("%s %d str: %s delimiter: %s length: %ld temp_ptr: %s strlen: %ld"
                   " final_ptr: %s \n",__func__, __LINE__, str, delimiter, 
                   strlen(delimiter), temp_ptr, strlen(temp_ptr), final_ptr);
        
            for (i = 0; i <= strlen(temp_ptr); i++)
            {
                for (j = 0; j < strlen(delimiter); j++) {
        
                    if (temp_ptr[i] == '\0') {
                        /* If the flag is not set, both the temp_ptr and flag_ptr 
                         * will be holding string "Jasmine" which will make parent 
                         * to call this function string_token infinitely. 
                         */
                        flag = 1;
                        return final_ptr;
                    }
        
                    if ((temp_ptr[i] == delimiter[j])) {
                        /* NULL character is assigned, so that final_ptr will return 
                         * til NULL character. Here, final_ptr points to temp_ptr.
                         */
                        temp_ptr[i] = '\0';
                        temp_ptr += i+1;
                        return final_ptr;
                    }
                }
            }
            /* You will somehow end up here if for loop condition fails.
             * If this function doesn't return any char pointer, it will 
             * lead to segmentation fault in the parent function.
             */
            return NULL;
        }
        
        
        int main()
        {
            char str[SIZE_OF_STRING] = "shirley|Rose|Jasmine";
            char *token = NULL;
            char del[SIZE_OF_DELIMITER] = "|";
        
            token = string_token(str, del);
            while (token != NULL) {       
                printf("token %s\n", token);
                token = string_token(NULL, del);
            }
            return 0;
        }
        
        https://shirleyanengineer.blogspot.com/2019/11/write-your-own-strtok-in-c.html
        

        【讨论】:

          猜你喜欢
          • 1970-01-01
          • 1970-01-01
          • 1970-01-01
          • 1970-01-01
          • 1970-01-01
          • 2010-11-19
          • 1970-01-01
          • 2011-09-10
          • 2016-03-18
          相关资源
          最近更新 更多