C++ 动态散列-table

C++ dynamic hash-table

我现在正在学习 C++ 中的散列。 我找到了一个带有 class 的示例,并进行了测试。所以我希望所有测试都能通过。

应编写动态哈希表。哈希值应该存储在数组中,数组可以有目的地改变大小。当改变Array的大小时,Hash函数的改变方式要使Hash函数的目标区域与Array的大小一致。当数组的大小改变时,所有元素都需要再次散列。元素的key是String,需要计算Hash的值(字符串所有字符的ASCII值之和)mod(Array的大小)。如果发生碰撞,则应执行 Open Addressing:h(key)+j mod M。

对于删除元素我需要注意每个元素的理想位置i=h(k)和当前位置j我需要关心:T[i], T[i+1 ],...,T[j] 已被占用。

直到现在我还停留在测试 5。 但是每次我尝试调整它的大小时,我都会遇到未定义的行为或者崩溃。

这是hashtable.hclass:

#ifndef HASHTABLE_H
#define HASHTABLE_H  
#include <string>

using namespace std;

class hashtable
{
    public:
    hashtable();
    virtual ~hashtable();
    void insert(string);
    int find(string);
    void remove(string);
    void resize_array();
    int hashfunction(string str);
    int getSize();
    string* getArray();

private:
     int elemts_in_array;
     int table_size;
     string* T;
};

这是hashtable.cpp

#include "hashtable.h"
#include<iostream>
#include<cstring>
using namespace std;

hashtable::hashtable()
{
    table_size=10;
    T=new string[table_size];
// your code (start with a capacity of 10)
}

hashtable::~hashtable()
{
}

int hashtable::hashfunction(string str)
{
    int k;
    for(int i=0;i<table_size;i++)
    k+=(int)str[i];
    return k%table_size;// your code
} 

void hashtable::insert(string key)
{
    int k=hashfunction(key);
//    
//    float filled = (float)sizeof(T) / (float)table_size;
//   
//    cout<<filled<< "percent filled"<<endl;
//
//    if(filled >= 0.8)
//  {
//      cout << "Resizing Array.." << endl;
//        resize_array();
//  }

if(T[k]==""){
    T[k]=key;
    }
else {
    while(T[k]!="")
    if(T[k]==key){
     break;
    }else {
    T[k]=key;
    }
    k++;

}
// your code (keys already present in the hashtable should not be added a second time)
//           (use resize_array when the hashtable is filled by 80%)
//           (the testbench expects the resize taking place before and without considering the 
     insertion of the new element)
}

int hashtable::find(string key)
{
    for(int k=0;k<table_size;k++){
    if(T[k]==key)
    return k ;
}
return -1;
// your code (should return -1 if key was not found)
}

void hashtable::remove(string key)
{
    int k=hashfunction(key);
    T[k]="";
    for(int i=1;i<table_size;i++){
    string next=T[k+i];
    if(hashfunction(next)==k){
    T[k]=T[k+1];
    }
}
// your code (Invariant: ensure all keys are at their "optimal" position afterwards)
}

void hashtable::resize_array()
{
// remember the old table
int old_table_size = table_size;
std::string *old_array = T;

// build the new table and reset counter
table_size *= 2;
T = new std::string[table_size];
elemts_in_array = 0;

// now bring the elements over
for (int i=0; i<old_table_size; ++i)
{
    if (old_array[i].empty())
        insert(old_array[i]);
}

// finally, throw out old array
delete[] old_array;
}

//void hashtable::resize_array()
//{
//    size_t newsize=table_size*2;
//    string *newT=new string[newsize];
//    memcpy(newT,T,table_size*sizeof(string));
//
//    table_size=newsize;
//    delete[] T;
//    T=newT;
//  // your code (double the array size)
//}

int hashtable::getSize()
{
    return table_size;
    // your code
}

string* hashtable::getArray()
{
   return T;
} 

这是测试文件:

#include <iostream>
#include "hashtable.h"

bool compare_arrays(string* testarray, hashtable t, int len)
{
    string* array2 = t.getArray();

    if(t.getSize() != len)
{
    return 1;
}

for (int i=0; i < len ; i++)
{
    if(testarray[i] != array2[i])
    {
        return 1;
    }
}
return 0;
}


void print_array(hashtable t){
     string* array = t.getArray();
     for (int i=0; i< t.getSize();i++)
 {
     cout << i << " - " << array[i]<< endl;
 }
}

void test(hashtable* t)
{
    string test_vector1[10] = {"123","","","","","ABCDE","Info","","",""};
    string test_vector2[10] = {"","","","!","","","Test","","ABC",""};
    string test_vector3[10] = {"123", "T!est","Te!st","Tes!t","Test!","ABCDE","Info","","","!Test"};
    string test_vector4[20] = {"","","","","","","","","","T!est","123","Te!st","Tes!t","Test!","!Test","ABCDE","Info","Inof","",""};
    string test_vector5[20] = {"","","","", "", "", "", "", "", "T!est","123","Tes!t","Test!","!Test","Te!st","ABCDE","Info", "Inof", "", ""};

t->insert("Info");
t->insert("123");
t->insert("ABCDE");

if(compare_arrays(test_vector1, *t,10))
{
    cout << "Hashtable does not work correctly!" << endl;
    return;
}

t->insert("Info");
if(compare_arrays(test_vector1, *t,10))
{
    cout << "Inserting a element twice might not work correctly!" << endl;
    return;
}

int test_var = t->find("CBA");
if(test_var != -1)
{
    cout << "Find might not work correctly for unseen keys." << endl;
    return;

}
test_var = t->find("Info");
if(test_var != 6)
{
    cout << "Find might not work correctly!" << endl;
    return;
}


t->insert("!Test");
t->insert("T!est");
t->insert("Te!st");
t->insert("Tes!t");
t->insert("Test!");
t->insert("Test!");

if(compare_arrays(test_vector3, *t,10))
{
    cout << "Hashfunction / insert might not work correctly!" << endl;
    return;
}

t->insert("Inof");
if(compare_arrays(test_vector4, *t,20))
{
    cout << "Resize Array might not work correctly!" << endl;
    return;
}

t->remove("!");
t->remove("Te!st");
t-> insert("Te!st");
if(compare_arrays(test_vector5, *t,20))
t-> insert("Te!st");
if(compare_arrays(test_vector5, *t,20))
{
    cout << "Remove might not work correctly!" << endl;
    return;
}

cout << "All test passed!";

}

int main()
{
    hashtable t;
    test(&t);
}

两个明显的错误:

int hashtable::hashfunction(string str)
{
    int k;  // <-- Uninitialized
    for (int i = 0; i < table_size; i++)
        k += (int)str[i];  // <-- Potential out-of-bounds access
    return k % table_size;// your code
}

k 未初始化,因此您不知道 k 的最终值是什么。

第二个问题是,如果 str 的大小小于 table_size,则您正在越界访问 str。您的示例显示字符串 "Info" 被传递给 hash_function,但 table_size 是 10。


另一个基本错误是 hashtable 没有正确的复制语义,因此按值传递或返回 hashtable 将导致未定义的行为。这都是因为你使用

string *T

作为成员变量。

最简单的解决方法是不使用 string* T 作为成员,而是使用 std::vector<std::string> T;