从文本（字符数组）中提取单词并将它们放入二维数组中的函数答案

【问题标题】：Function that extracts words from text ( array of chars ) and put them in 2 dimensions array从文本（字符数组）中提取单词并将它们放入二维数组中的函数
【发布时间】：2016-09-30 12:03:17
【问题描述】：

我正在学习 C 并且遇到了一些困难。我必须编写一个程序，它变成一个文本（最多 80 个字符）并将文本中的单词放在一个字符 words[80][80] 中（每个单词都必须是在这个数组中只有一次！它也被定义为全局）和每个单词在 int count[] 中出现的次数（索引必须与 word[][] 中的 this 相同）。该函数称为 int extract_and_count(char *source,int *count)。我写了一些代码，但我不确定如何实现这个功能。有人可以帮我吗？我也是stackoverflow的新手，所以如果我犯了任何错误，对不起。

这是一些代码，但还没有结束：

 int extract_and_count(char *source,int *count){
  char token[80][80];
  char *p;
  int i = 0;
  p = strtok(source, " ");
  while( p != NULL ){
    strcpy(token[i],p);
    printf("%s\n",*(token+i));
    i++;
    p = strtok(NULL , " ");
  }
  char word;
  int value = 0, j;
  for(i = 0 ; i < 80 ; i++){
    word = token[i];
    for(j = 0 ; j < 80 ; j++){
      if(strcmp(word,token[i])==0){
    value++;
      }
    }

  }
  return 1;
}

【问题讨论】：

请在此处发布您的代码
我在问题中放了一些代码，但效果不好，根本不起作用。这只是一个开始版本。我创建了一个新数组来存储所有单词然后我想要计算 token[][] 中单词的值，并将它们存储在 words[][] 和 count[] 中。
我还是不明白你的目的。另外，我不明白extract_and_count的输入参数是什么意思，单词分隔符是什么，以及你想如何排列数组中的单词
extract_and_count 变成 char *source，它是一个包含文本的数组（最多 80 个字符）。我必须从带有分隔符 "" 的文本中取出单词，将它们（仅一次）放入 words[80][80] 和 int count[] 我必须存储每个单词出现在文本中的时间。示例 : text : "c language is difficult.c is also fun" words[][] = { "c","language","is","difficult","also","fun"};计数[] = {2,1,2,1,1,1} 字[0] = c;计数[0] = 2
如果你每次传递一个 80 字符的输入，你必须保持对数组中当前位置的计数，并在调用之间改变它。我想你还没有完全理解这个任务。请仔细阅读，直到您清楚为止。

标签： c arrays

【解决方案1】：

您需要检查是否已经找到了一个单词。如果是这样，只需增加全局计数器。否则，将新单词复制到全局字符串数组中。

类似：

#include <stdio.h>
#include <string.h>

// Global variables to hold the results
char word[80][81];
int  count[80] = { 0 };

int extract_and_count(char *source,int *strings_cnt){
  char token[80][81];
  char *p;
  int i = 0;

  // Find all words in the input string
  p = strtok(source, " ");
  while( p != NULL ){
    strcpy(token[i],p);
    // printf("%s\n",*(token+i));
    i++;
    p = strtok(NULL , " ");
  }

  // Find unique words and count the number a word is repeated
  *strings_cnt = 0;
  int j,k;

  // Iterator over all words found in the input string
  for(j = 0 ; j < i ; j++){

    // Check if the word is already detected once
    int found = 0;
    for(k = 0 ; k < *strings_cnt ; k++){
      if (strcmp(word[k], token[j]) == 0)
      {
        // The word already exists - increment count
        found = 1;
        count[k]++;
        break;
      }
    }

    if (!found)
    {
      // New word - copy it and set count to 1
      strcpy(word[*strings_cnt], token[j]);
      count[*strings_cnt] = 1;
      (*strings_cnt)++;
    }
  }

  return 1;
}

int main(void)
{
  char s[] = "c language is difficult c is also fun";
  int c, i;

  printf("Searching: %s\n", s);

  extract_and_count(s, &c);

  printf("Found %d different words\n", c);
  for (i=0; i<c; i++)
  {
    printf("%d times: %s\n", count[i], word[i]);
  }
  return 0;
}

输出：

Searching: c language is difficult c is also fun
Found 6 different words
2 times: c
1 times: language
2 times: is
1 times: difficult
1 times: also
1 times: fun

以上我尝试遵循您的代码风格，但我喜欢添加这些 cmets：

1) 你并不需要token 数组。可以更改第一个循环，以便直接更新最终结果。

2) 不要使用全局变量

3) 代码无法处理普通分隔符，如 , 。 : 等等

4) 你应该把单词和计数放到一个结构中。

考虑到注释 1,2 和 4，代码可能是：

#include <stdio.h>
#include <string.h>

// Global variables to hold the results
struct WordStat
{
  char word[81];
  int count;
};


int extract_and_count(char *source,int *strings_cnt, struct WordStat* ws, int max){
  char *p;
  int i = 0;
  int k;
  *strings_cnt = 0;

  // Find all words in the input string
  p = strtok(source, " ");
  while( p != NULL ){
    // Check if the word is already detected once
    int found = 0;
    for(k = 0 ; k < *strings_cnt ; k++){
      if (strcmp(ws[k].word, p) == 0)
      {
        // The word already exists - increment count
        found = 1;
        ws[k].count++;
        break;
      }
    }

    if (!found)
    {
      // New word - copy it and set count to 1
      strcpy(ws[*strings_cnt].word, p);
      ws[*strings_cnt].count = 1;
      (*strings_cnt)++;
    }

    i++;
    p = strtok(NULL , " ");
  }

  return 1;
}

#define MAX_WORDS 80

int main(void)
{
  struct WordStat ws[MAX_WORDS];
  char s[] = "c language is difficult c is also fun";
  int c, i;

  printf("Searching: %s\n", s);

  extract_and_count(s, &c, ws, MAX_WORDS);

  printf("Found %d different words\n", c);
  for (i=0; i<c; i++)
  {
    printf("%d times: %s\n", ws[i].count, ws[i].word);
  }
  return 0;
}

【讨论】：

谢谢！这正是我想要的。
我希望代码类似于原型 int extract_and_count(char *source,int *count) ，其中 source 是 char [] ，而 count 是任何单词重复的 int 数组跨度>

【解决方案2】：

 while( p != NULL ){
    strcpy(token[i],p);
    printf("%s\n",*(token+i));
    i++;
    p = strtok(NULL , " ");   --> here you are just splitting the words
  }

现在令牌将以拆分方式包含所有单词，而不是按照您“每个单词仅一次”的要求。您可以将唯一的单词比较并复制到另一个数组中，并且在同一个循环中，您可以对计数数组进行计数和更新。

注意：你不应该在整体上使用一个计数器变量，计数器数组只能用于计数。

【讨论】：

【解决方案3】：

#include <stdio.h>
#include <string.h>
#include <ctype.h>

#define NUM_OF_WORDS_MAX 80
#define MAX_WORD_LENGTH  79
#define S_(x) #x
#define S(x) S_(x) //number literal convert to string

char words[NUM_OF_WORDS_MAX][MAX_WORD_LENGTH+1];
int Words_entry = 0;

static inline int hash(const char *str){
    return (tolower(*str) - 'a')*3;//3:(NUM_OF_WORDS_MAX / 26), 26 : a-z
}

char *extract(char **sp){//extract word
    char *p = *sp;
    while(*p && !isalpha(*p))//skip not alpha
        ++p;
    if(!*p)
        return NULL;
    char *ret = p;//first word
    while(*p && isalpha(*p))//skip alpha
        ++p;//*p = tolower(*p);
    if(!*p){
        *sp = p;
    } else {
        *p = '\0';
        *sp = ++p;//rest
    }

    return ret;
}

int extract_and_count(char *source, int *count){
    char *sp = source;
    char *word;
    int word_count = 0;

    while(word = extract(&sp)){
        if(Words_entry == NUM_OF_WORDS_MAX){
            fprintf(stderr, "words table is full.\n");
            return word_count;
        }

        int index = hash(word);
        while(1){
            if(*words[index]){
                if(strcasecmp(words[index], word) == 0){//ignore case
                    ++count[index];
                    break;
                }
                if(++index == NUM_OF_WORDS_MAX){
                    index = 0;
                }
            } else {
                strcpy(words[index], word);
                count[index] = 1;
                ++Words_entry;
                break;
            }
        }
        ++word_count;
    }
    return word_count;
}

int main(void){
    int count[NUM_OF_WORDS_MAX] = {0};
    char text[MAX_WORD_LENGTH+1];

    while(1==scanf("%" S(MAX_WORD_LENGTH) "[^\n]%*c", text)){//end if only enter press.
        extract_and_count(text, count);
    }
    //print result
    for(int i = 0; i < NUM_OF_WORDS_MAX; ++i){
        if(*words[i]){
            printf("%s : %d\n", words[i], count[i]);
        }
    }
    return 0;
}

【讨论】：