【发布时间】:2014-05-14 04:56:14
【问题描述】:
我的代码中有以下几行:
DocumentNode *docNode = NULL;
initDocNode(docNode, docID, wordFreq);
这是 initDocNode 函数
void initDocNode(DocumentNode *docNode, int docID, int wordFreq) {
docNode = calloc(1, sizeof(DocumentNode));
if (!docNode)
fprintf(stderr, ....);
docNode->docID = docID;
docNode->wordFrequency = wordFreq;
}
我有一个创建 docNodes 的 while 循环,但有时我得到一个带有以下回溯的段错误。看起来这是在calloc中发生的事情。任何帮助理解这一点将不胜感激。
_int_malloc (av=av@entry=0x396fbb8760 <main_arena>, bytes=bytes@entry=16)
at malloc.c:3718
3718 set_head(remainder, remainder_size | PREV_INUSE);
(gdb) backtrace
_int_malloc (av=av@entry=0x396fbb8760 <main_arena>, bytes=bytes@entry=16)
at malloc.c:3718
0x000000396f88098a in __libc_calloc (n=<optimized out>, elem_size=<optimized out>)
at malloc.c:3187
0x0000000000402270 in initDocNode (docnode=0x0, docID=680, wordFreq=1)
at ../util/indexerUtils.c:59
0x000000000040147b in ReconstructIndex (hashtable=0x7fffc8c561f8,
wordToAdd=0x1c3eab0 "mode", docID=680, wordFreq=1) at src/query_engine.c:337
0x0000000000401209 in ReadFile (file=0x7fffc8c5932a "cs_lvl3.dat")
at src/query_engine.c:267
0x0000000000400eff in main (argc=2, argv=0x7fffc8c58998) at src/query_engine.c:147
我有一组单词,对于每个单词,我为包含该单词的文档创建 docNodes。仅当我处理包含一长串匹配文档的单词时,才会出现此段错误。我可以调用多少有限制吗?
这是读取包含单词及其匹配文档的文件并为数据创建哈希表的函数。该文件的内容采用以下形式: word number_of_matching_docs doc wordFreq doc wordFreq ...
HashTable *ReadFile(char *file){
FILE *fp = fopen(file, "r");
if (!fp) {
fprintf(stderr, "Error: Couldn't open file %s for reloading.\n", file);
return NULL;
}
HashTable *hashtable = calloc(1, sizeof(HashTable));
if (!hashtable) {
fprintf(stderr, "Error: Couldn't allocate memory for hashtable.\n");
return NULL;
}
char c;
bool done = false;
while (1){
int wordLength = 0;
//keep reading until you find a space
//this is to determine the length of the word
while ((c = fgetc(fp)) != ' '){
if (c == EOF){
// but if c is EOF, then we're done reading the file
done = true;
// break out of the loop counting word lenght
break;
}
wordLength++;
}
if (done){
// break out of loop reading file
break;
}
// now allocate memory for the word and null character
char *currWord = calloc(wordLength + 1, sizeof(char));
if (!currWord){
fprintf(stderr, "Failed to allocate memory to store %s\n", currWord);
continue;
}
//now reverse the pointer to the beginning of the word
fseek(fp, -(wordLength + 1), SEEK_CUR);
int numFiles;
// now read the word. If it's unsuccessful, then there's no more lines. exit
fscanf(fp, "%s %d", currWord, &numFiles);
printf("Processing %s\n", currWord);
int i = 0;
while (numFiles--) {
int docID;
int wordCount;
if (!fscanf(fp, " %d %d", &docID, &wordCount)) {
fprintf(stderr, "Error: Couldn't process document for word, %s.\n", currWord);
free(currWord);
continue;
}
if (!ReconstructIndex(&hashtable, currWord, docID, wordCount)) {
fprintf(stderr, "Error: Couldn't reconstruct index for word, %s\n", currWord);
free(currWord);
continue;
}
printf("%s: just processed %d document\n", currWord, i);
i++;
}
}
fclose(fp);
return hashtable;
}
这里是 ReconstructIndex()
int ReconstructIndex(HashTable **hashtable, char* wordToAdd, int docID, int wordFreq) {
//get the hash index
int hashIndex = JenkinsHash(wordToAdd, MAX_HASH_SLOT);
// if hash index is not taken
if ((*hashtable)->table[hashIndex] == NULL) {
// make document node
DocumentNode *newDocNode = NULL;
// newDocNode = initDocNode(newDocNode, docID, wordFreq);
initDocNode(&newDocNode, docID, wordFreq);
if (!newDocNode) {
fprintf(stderr, "Failed to make DocumentNode for %s.\n", wordToAdd);
return 0;
}
// make word node with this document node
WordNode *newWordNode = NULL;
newWordNode = initWordNode(newWordNode, wordToAdd, newDocNode);
if (!newWordNode) {
fprintf(stderr, "Failed to make WordNode for %s.\n", wordToAdd);
free(newDocNode);
return 0;
}
// make hash table node with this word node
HashTableNode *newHTNode = NULL;
newHTNode = initHashTNode(newHTNode, (void*)newWordNode, NULL);
if (!newHTNode) {
fprintf(stderr, "Failed to make HashTableNode for %s.\n", wordToAdd);
free(newDocNode);
free(newWordNode->word);
free(newWordNode);
return 0;
}
// put hashtablenode into table at the hash index
(*hashtable)->table[hashIndex] = newHTNode;
return 1;
}
// if hash index is taken
else {
// find word
HashTableNode *currHTNode = (*hashtable)->table[hashIndex];
int inHashTable = 0;
while (currHTNode) {
WordNode * currWordNode = (WordNode *)(currHTNode->hashKey);
if (strcmp(wordToAdd, currWordNode->word) == 0){
inHashTable = 1;
break;
}
currHTNode = currHTNode->next;
}
// if word was found
if (inHashTable) {
WordNode *currWordNode = (WordNode *)(currHTNode->hashKey);
// add document to this word's listing in hash index
// make new document node
DocumentNode *newDocNode = NULL;
// newDocNode = initDocNode(newDocNode, docID, wordFreq);
initDocNode(&newDocNode, docID, wordFreq);
if (!newDocNode) {
fprintf(stderr, "Failed to make DocumentNode for %s.\n", wordToAdd);
return 0;
}
// append this new doc node to back of other document nodes
DocumentNode *lastDocNode = currWordNode->doc;
while (lastDocNode->next) {
lastDocNode = lastDocNode->next;
}
lastDocNode->next = newDocNode;
// free(wordToAdd); // causes seg fault
return 1;
}
// if word was not found
else {
// add word node to hashtable at this index
// make new document node
DocumentNode *newDocNode = NULL;
// newDocNode = initDocNode(newDocNode, docID, wordFreq);
initDocNode(&newDocNode, docID, wordFreq);
if (!newDocNode) {
fprintf(stderr, "Failed to make DocumentNode for %s.\n", wordToAdd);
return 0;
}
// make word node with this document node
WordNode *newWordNode = NULL;
newWordNode = initWordNode(newWordNode, wordToAdd, newDocNode);
if (!newWordNode) {
fprintf(stderr, "Failed to make WordNode for %s.\n", wordToAdd);
free(newDocNode);
return 0;
}
// make hash table node with this word node
HashTableNode *newHTNode = NULL;
newHTNode = initHashTNode(newHTNode, (void*)newWordNode, NULL);
if (!newHTNode) {
fprintf(stderr, "Failed to make HashTableNode for %s.\n", wordToAdd);
free(newDocNode);
free(newWordNode->word);
free(newWordNode);
return 0;
}
// append this new hashtable node to end of other hashtable nodes at hash index
HashTableNode *lastHTNode = (*hashtable)->table[hashIndex];
while (lastHTNode->next) {
lastHTNode = lastHTNode->next;
}
lastHTNode->next = newHTNode;
return 1;
}
}
}
【问题讨论】:
-
你能贴出while循环的代码吗?您可能会陷入无限循环并耗尽内存。
-
我将在几秒钟内添加 while 循环。不过,我认为我没有无限循环。我添加了一个打印语句来查看我的进度,我注意到在为特定单词的第 678 个文档调用 initDocNode 后我得到了段错误
-
附带说明,您的
initDocNode函数对调用者没有影响。指针是按值传递的,所以docNode =赋值只影响参数 - 它不会影响调用者中的变量。 -
@Andrew:故障信息是什么意思?是不是:0 _int_malloc (av=av@entry=0x396fbb8760
, bytes=bytes@entry=16) at malloc.c:3718 3718 set_head(remainder, remaining_size | PREV_INUSE); -
char c;应该是int c;。像您一样使用fseek可能不可靠,请尝试重做您的逻辑以避免fseek。尝试在valgrind下运行以查找溢出。
标签: c segmentation-fault malloc calloc