拼写检查程序，检查功能拼写器 cs50答案

【问题标题】：Spell checker program, check function speller cs50拼写检查程序，检查功能拼写器 cs50
【发布时间】：2020-04-17 07:54:01
【问题描述】：

这个程序用作拼写检查器，它读取一个字典文件以加载到哈希表中，然后读取另一个将被读取的文本文件，并检查每个单词是否在哈希表中，如果没有，则它是被认为是拼写错误的单词。我的所有功能似乎都可以工作，除了我的检查功能，当我运行它时，拼写错误的单词数总是与文本中的单词数相同。这之前可以工作，但我更改了哈希函数，因为据说这个哈希函数更好地将值分配给唯一索引，但是在仅更改哈希函数之后，检查函数不再起作用。

// Implements a dictionary's functionality
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>

#include "dictionary.h"

// Represents a node in a hash table
typedef struct node
{
    char word[LENGTH + 1];
    struct node *next;
}
node;

// Number of buckets in hash table
const unsigned int N = 200000;

// Hash table
node *table[N];

// Returns true if word is in dictionary else false
bool check(const char *word)
{
    // TODO
    int len = strlen(word);
    char copy[len + 1];
    // change into lowercase the word
    for (int i = 0; i != '\0'; i++)
    {
        copy[i] = tolower(word[i]);
    }
    // get the index by using the hash function
    int index = hash(copy);
    if (table[index] == NULL)
    {
        return false;
    }

    node *tmp = table[index];
    // check if the word is in the hash table
    while (tmp != NULL)
    {
        if (strcmp(tmp->word, copy) == 0)
        {
            return true;
        }

        tmp = tmp->next;
    }

    return false;
}

// Hashes word to a number
unsigned int hash(const char *word)
{
    /* credits to...
     *https://www.reddit.com/r/cs50/comments/1x6vc8/pset6_trie_vs_hashtable/
     */
    unsigned int hash = 0;
    for (int i = 0, n = strlen(word); i < n; i++)
    {
        hash = (hash << 2) ^ word[i];
    }
    return hash % N;
}

// Loads dictionary into memory, returning true if successful else false
bool load(const char *dictionary)
{
    // TODO
    char *words = malloc(sizeof(char) * (LENGTH + 1));
    if (words == NULL)
    {
        return 1;
    }
    // initialize the hash table to NULL
    for (int i = 0; i < N; i++)
    {
        table[i] = NULL;
    }

    // open dictionary file
    FILE *indata = fopen(dictionary, "r");

    // 1 character for '\0' and another for '\n' because fgets takes a trailing new line
    // when it reads 'man' the value of words will be "man\n\0" so meaning 2 extra characters
    while (fgets(words, LENGTH + 2, indata) != NULL)
    {
        // get the index by using the hash function
        int index = hash(words);
        // allocate memory for the newNode
        node *newNode = malloc(sizeof(node));
        if (newNode == NULL)
        {
            return false;
        }

        // get rid of the trailing new line from fgets
        words[strlen(words) - 1] = '\0';
        strcpy(newNode->word, words);
        // make the newNode the head of the list
        newNode->next = table[index];
        table[index] = newNode;
    }

    // free memory and close the opened file
    free(words);
    fclose(indata);
    return true;
}

// Returns number of words in dictionary if loaded else 0 if not yet loaded
unsigned int size(void)
{
    // TODO
    // counter of words loaded
    unsigned int counter = 0;
    // loop through the hash table
    for (int i = 0; i < N; i++)
    {
        node *tmp = table[i];

        while (tmp != NULL)
        {
            counter++;
            tmp = tmp->next;
        }
    }
    return counter;
}

// Unloads dictionary from memory, returning true if successful else false
bool unload(void)
{
    // TODO
    // loop through the whole hash table
    for (int i = 0; i < N; i++)
    {
        while (table[i] != NULL)
        {
            node *tmp = table[i]->next;
            free(table[i]);
            table[i] = tmp;
        }
    }
    return true;
}

【问题讨论】：

建议您在\0 中终止copy 中的check()。
你的意思是我的for循环吗？ i != '\0' 我已经改变了它我使用了 word[i] != '\0' 甚至 i
您的hash() 需要'\0' 终止的字符串。你似乎没有'\0' 终止copy。我同意i != '\0' 是一个错误……我错过了那个。

标签： c hash hashtable cs50 hash-function

【解决方案1】：

您的代码中存在多个问题：

node *table[N]; 的定义在 C 中无效，因为 N 必须是编译时常量表达式。 const unsigned int N = 200000; 在 C++ 中符合此约束，但在 C 中不符合。N 必须是宏或 enum 定义。
在check()中，将字符串复制为小写的循环不正确：for (int i = 0; i != '\0'; i++)应该是for (int i = 0; word[i] != '\0'; i++)
在check() 中，您不会终止您在copy 中构建的字符串。 copy 分配有malloc()，未初始化，因此必须显式设置空终止符。
tolower(word[i]) 中的 char 参数必须转换为 tolower((unsigned char)word[i]) 以避免在 char 负值上出现未定义行为，如果 char 在您的平台上签名。
在load() 中，words 数组分配的长度为LENGTH+1 字节，但是您将LENGTH+2 作为缓冲区大小传递给fgets，如果字典包含一行LENGTH 个字符。
在load() 中，hash(words) 在删除行尾的换行符之前被调用。因此哈希码不正确，因为存储在错误的桶中，所以在字典中找不到该单词。

这是修改后的版本：

// Implements a dictionary's functionality
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>

#include "dictionary.h"

// Represents a node in a hash table
typedef struct node {
    struct node *next;
    char word[LENGTH + 1];
} node;

// Number of buckets in hash table
#define HASH_SIZE  200000

// Hash table
node *table[HASH_SIZE];

// Hashes word to a number
unsigned int hash(const char *word) {
    /* credits to...
     *https://www.reddit.com/r/cs50/comments/1x6vc8/pset6_trie_vs_hashtable/
     */
    unsigned int hash = 0;
    for (int i = 0; word[i] != '\0'; i++) {
        hash = (hash << 2) ^ word[i];
    }
    return hash % HASH_SIZE;
}

// Returns true if word is in dictionary else false
bool check(const char *word) {
    char copy[LENGTH + 1];
    int i, len = strlen(word);

    if (len > LENGTH)
        return false;

    // change into lowercase the word
    for (i = 0; word[i] != '\0'; i++) {
        copy[i] = (char)tolower((unsigned char)word[i]);
    }
    copy[i] = '\0';

    // get the index by using the hash function
    int index = hash(copy);
    // check if the word is in the hash table
    for (node *tmp = table[index]; tmp != NULL; tmp = tmp->next) {
        if (strcmp(tmp->word, copy) == 0) {
            return true;
        }
    }
    return false;
}

// Loads dictionary into memory, returning true if successful else false
bool load(const char *dictionary) {
    // 1 character for '\0' and another for '\n' because fgets takes a trailing new line
    // when it reads 'man' the value of words will be "man\n\0" so meaning 2 extra bytes
    char words[LENGTH + 2];

    // open dictionary file
    FILE *indata = fopen(dictionary, "r");
    if (indata == NULL)
        return false;

    while (fgets(words, sizeof words, indata) != NULL) {
        // get rid of the trailing new line from fgets
        words[strcspn(words, "\n")] = '\0'; 
        // allocate memory for the newNode
        node *newNode = malloc(sizeof(node));
        if (newNode == NULL) {
            fclose(indata);
            return false;
        }
        strcpy(newNode->word, words);
        // get the index by using the hash function
        int index = hash(words);
        // make the newNode the head of the list
        newNode->next = table[index];
        table[index] = newNode;
    }

    // close the opened file
    fclose(indata);
    return true;
}

// Returns number of words in dictionary if loaded else 0 if not yet loaded
unsigned int size(void) {
    // counter of words loaded
    unsigned int counter = 0;
    // loop through the hash table
    for (int i = 0; i < HASH_SIZE; i++) {
        for (node *tmp = table[i]; tmp != NULL; tmp = tmp->next) {
            counter++;
        }
    }
    return counter;
}

// Unloads dictionary from memory, returning true if successful else false
bool unload(void) {
    // loop through the whole hash table
    for (int i = 0; i < HASH_SIZE; i++) {
        while (table[i] != NULL) {
            node *next = table[i]->next;
            free(table[i]);
            table[i] = next;
        }
    }
    return true;
}

【讨论】：