【发布时间】:2015-11-25 17:34:11
【问题描述】:
我知道如何数单词。我正在使用包含单词和计数的structs 链接列表。它适用于小文件,但需要我定义最大文本长度。据我所知,文本文件可能超过数 GB。如何将其更改为不需要#define MAX_TEXT_LENGTH?我应该使用malloc(),如果是,我应该将malloc() 应用于什么?最终目标是按字母顺序对所有内容进行排序并按频率打印单词,但是在我阅读单词并进行计数之后,这应该很容易。
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define MAX_WORD 512
#define MAX_TEXT_LENGTH 10000
typedef struct word
{
char *str; /* Stores the word */
int freq; /* Stores the frequency */
struct word *pNext; /* Pointer to the next word counter in the list */
} Word;
// ===========================================
// FUNCTION PROTOTYPES
//============================================
int getNextWord(FILE *fp, char *buf, int bufsize); /* Given function to get words */
void addWord(char *pWord); /* Adds a word to the list or updates exisiting word */
void show(Word *pWordcounter); /* Outputs a word and its count of occurrences */
Word* createWordCounter(char *word); /* Creates a new WordCounter structure */
// ===========================================
// GLOBAL VARIABLES
//============================================
Word *pStart = NULL; /* Pointer to first word counter in the list */
int totalcount = 0; /* Total amount of words */
int uniquecount = 0; /* Amount of unique words */
// ===========================================
// MAIN
//============================================
int main (int argc, char *argv[]) {
FILE * fp; /* File pointer */
fp = fopen(argv[1],"r"); /* Read text from here */
char buf[MAX_WORD]; /* buf to hold the words */
int size = MAX_TEXT_LENGTH; /* Size */
Word *pCounter = NULL; /* Pointer to Word counter */
/* Read all words from text file */
while (getNextWord(fp, buf, size))
{
/* Add the word to the list */
addWord(buf);
/* Increment the total words counter */
totalcount++;
}
/* Loop through list and figure out the number of unique words */
pCounter = pStart;
while(pCounter != NULL)
{
uniquecount++;
pCounter = pCounter->pNext;
}
/* Print Summary */
printf("\nSUMMARY:\n\n");
printf(" %d words\n", totalcount); /* Print total words */
printf(" %d unique words\n", uniquecount); /* Print unique words */
/* List the words and their counts */
pCounter = pStart;
while(pCounter != NULL)
{
show(pCounter);
pCounter = pCounter->pNext;
}
printf("\n");
/* Free the allocated memory*/
pCounter = pStart;
while(pCounter != NULL)
{
free(pCounter->str);
pStart = pCounter;
pCounter = pCounter->pNext;
free(pStart);
}
/* Close file */
fclose(fp);
return 0;
}
// ===========================================
// FUNCTIONS
//============================================
void show(Word *pWordcounter)
{
printf("\n%-30s %5d", pWordcounter->str,pWordcounter->freq);
}
void addWord(char *word)
{
Word *pCounter = NULL;
Word *pLast = NULL;
if(pStart == NULL)
{
pStart = createWordCounter(word);
return;
}
/* If the word is in the list, increment its count */
pCounter = pStart;
while(pCounter != NULL)
{
if(strcmp(word, pCounter->str) == 0)
{
++pCounter->freq;
return;
}
pLast = pCounter;
pCounter = pCounter->pNext;
}
/* Word is not in the list, add it */
pLast->pNext = createWordCounter(word);
}
Word* createWordCounter(char *word)
{
Word *pCounter = NULL;
pCounter = (Word*)malloc(sizeof(Word));
pCounter->str = (char*)malloc(strlen(word)+1);
strcpy(pCounter->str, word);
pCounter->freq = 1;
pCounter->pNext = NULL;
return pCounter;
}
int getNextWord(FILE *fp, char *buf, int bufsize) {
char *p = buf;
char c;
//skip all non-word characters
do
{
c = fgetc(fp);
if (c == EOF)
return 0;
} while (!isalpha(c));
//read word chars
do
{
if (p - buf < bufsize - 1)
*p++ = tolower(c);
c = fgetc(fp);
} while (isalpha(c));
//finalize word
*p = '\0';
return 1;
}
【问题讨论】:
-
否决票是什么意思...我问了重复的问题还是什么?
标签: c memory text count linked-list