无序关联容器

unordered_map

基本使用与map相同，迭代器无反向迭代器，无序map，其体现在遍历时无序

#include <unordered_map>

void testUMap()
{
	unordered_map<int, int> um;
	map<int, int> m;

	um.insert(make_pair(1, 1));
	um.insert(make_pair(10, 1));
	um.insert(make_pair(2, 1));
	um.insert(make_pair(15, 1));
	um.insert(make_pair(8, 1));

	um[100] = 100;//插入
	um[15] = 15;//修改

	um.at(2) = 2;//at无法插入，key不存在直接抛异常

	//遍历无序
	unordered_map<int, int>::iterator uit = um.begin();
	while (uit != um.end())
	{
		cout << uit->first << "-->" << uit->second << endl;
		uit++;
	}
  
  uit = um.find(100);//find找到返回指定位置迭代器，找不到则返回end迭代器
	cout << um.count(100) << endl;//count返回元素个数，1或0
	cout << uit->first << "-->" << uit->second << endl;
	uit = um.find(20);
	cout << um.count(20) << endl;
	cout << (uit == um.end()) << endl;
}

unordered_set

unordered_set<int> us;

us.insert(10);
us.insert(99);
us.insert(48);
us.insert(27);
us.insert(16);

//迭代器遍历无序
unordered_set<int>::iterator uit = us.begin();
while (uit != us.end())
{
	cout << *uit << " ";
	uit++;
}
cout << endl;

和有序容器的区别

map, set, multi_map, multi_set：迭代器遍历有序，中序遍历；底层实现：红黑树；操作时间复杂度O(log₂n)

unordered_map, unordered_set, unordered_multimap, unordered_multiset：

迭代器遍历无序，底层实现：哈希；操作时间复杂度O(1)

使用场景：

对遍历顺序有要求：非unordered系列容器
对性能要求更高：unordered系列容器

底层结构：哈希结构

通过构造一种存储结构，通过某种函数（hashFunc）使元素的存储位置和它的关键码之间能够建立一一映射的关系，那么在查找时就可以很快的找到该元素，一种以空间换时间的结构

映射关系：哈希函数

哈希：把元素/键值映射到空间的某一个位置

特点：

映射位置范围小于等于空间范围
映射位置尽量均匀
映射关系尽量简单

常用的哈希函数：

除留余数法（通用）：元素/键值 % 空间的大小
直接定址法（只适合范围紧凑的数据，如字符）：线性函数，A * x(元素/键值) + (B)偏置

哈希冲突

不同的数据映射到同一个位置即为哈希冲突

解决哈希冲突问题：

闭散列（开放定址法）

线性探测、二次探测
开散列（拉链法，哈希桶）

负载因子：实际存放的元素个数 / 空间大小（一般70%～80%）

线性探测：

插入：
1. 通过哈希函数计算哈希位置
2. 如果当前位置为空，则进行插入操作
3. 如果位置不为空，则从当前位置开始，找到第一个空的位置，再进行插入操作
查找：
1. 通过哈希函数计算哈希位置
2. 查看当前位置的数据是否和查找的数据相同，如果相同，则查找结束
3. 如果不相同，则从当前位置继续向后查找，直到找到了数据或者走到了空的位置，则查找结束
删除：假删除，通过设置状态数据，来标记数据是否可用
1. 进行查找操作
2. 如果找到需要删除的数据，则对待删除数据所在位置进行删除状态标记

二次探测：每次偏移的位置的长度为上一次的平方

开散列：

增容：
1. 遍历旧表中的每一个元素
2. 计算每一个元素在新表中的位置
3. 把元素重新挂载到新表中的对应位置
迭代器++：
1. _next非空：更新到_next
2. _next为空：

HashTable: K, V, KeyOfValue, HashFun

K：数据键值
V：键值对应的value， unordered_map—>pair、unordered_set—->K
KeyOfValue：获取value对应的键值
HashFun：把键值K转换成整形数据（非整形数类型的转换，非整数—>映射—>整数）
哈希表迭代器：前置声明，友元类声明
成员：节点，哈希表指针
++操作：
- next是否非空，不为空：更新到next节点；为空：
  - 首先通过kov获取对应键值
  - 通过hashFun计算键值对应的整数值
  - 通过整数值计算当前节点在哈希表中的位置
  - 从上一步计算位置的下一个位置开始，寻找第一个非空练表的头节点
  - 如果没有找到，更新为空指针
begin：第一个非空链表的头节点
end：空节点

哈希表（开散列）：可以存放任意类型的数据，如果键值类型为非数值类型，可以通过hashFun转换为整数值

插入、查找、删除：

首先进行的操作为：通过键值计算位置
然后进行类似单链表的操作

unordered_map与unordered_set的模拟实现

哈希表

unordered_map和unordered_set的底层均由同一个哈希表实现，本次实现使用开散列哈希桶的结构，其结构大致如下：

template <class V>
struct HashNode
{
	V _value;
	HashNode<V>* _next;

	HashNode(const V& val = V())
		: _value(val)
		, _next(nullptr)
	{ }
};
template <class K, class V, class KeyOfValue, class HF>
class HashTable
{
public:
	//迭代器声明为友元类，原因是在迭代器类中，需要使用到哈希表来进行迭代器的自加操作
	template <class K, class V, class KeyOfValue, class HF>
	friend struct HashIterator;

	typedef HashNode<V> Node;
	typedef HashIterator<K, V, KeyOfValue, HF> iterator;//迭代器

	iterator begin()；
	iterator end()；
	pair<iterator, bool> insert(const V& value)
    {
		CheckCapacity();

		HF hf;
		KeyOfValue kov;
		size_t idx = hf(kov(value)) % _table.size();//计算当前元素对应的位置

		Node* cur = _table[idx];
		while (cur)//寻找是否有重复元素，重复则插入失败
		{
			if (kov(cur->_value) == kov(value))
				return make_pair(iterator(cur, this), false);
			cur = cur->_next;
		}

		cur = new Node(value);//头插
		cur->_next = _table[idx];
		_table[idx] = cur;
		_size++;

		return make_pair(iterator(cur, this), true);
	}
	Node* find(const K& key);
	size_t count(const K& key);
	bool erase(const K& key);
	bool empty() const；
	size_t size() const；
private:
	size_t getNextSize(size_t n);
	void CheckCapacity()
    {
		if (_size == _table.size())
		{
			size_t newSize = getNextSize(_size);//获取下一个表的大小值
			vector<Node*> newTable;
			newTable.resize(newSize);

			//遍历旧表中的非空单链表
			KeyOfValue kov;
			HF hf;
			for (size_t i = 0; i < _table.size(); i++)
			{
				Node* cur = _table[i];
				while (cur)
				{
					Node* next = cur->_next;

					int idx = hf(kov(cur->_value)) % newTable.size();
					//头插
					cur->_next = newTable[idx];
					newTable[idx] = cur;

					cur = next;
				}
				_table[i] = nullptr;
			}
			_table.swap(newTable);//交换新旧表
		}
		return;
	}
	vector<Node*> _table;
	size_t _size = 0;
};

使用序列容器vector来存放单链表表头，当遇到哈希冲突时，直接将插入元素头插在对应位置的单链表；其中K、V两个泛型参数在unordered_map中分别为K，pair<K, V>，在unordered_set中为K、K；泛型参数KeyOfValue则为不同的获取Key值的方法，通过仿函数对象来实现具体的方法

在unordered_map中：

struct MapKeyOfValue
	{
		const K& operator()(const pair<K, V>& value)
		{
			return value.first;
		}
	};

在unordered_set中：

struct SetKeyOfValue
{
	const K& operator()(const K& value)
	{
		return value;
	}
};

而泛型参数HF则为哈希函数，一般具有默认类型参数，也可指定，如在本次实现中为：

template <class K>
struct HashFunc
{
	size_t operator()(const K& key)
	{
		return key;
	}
};
template<>
class HashFunc<string>
{
public:
	size_t operator()(const string& s)
	{
		const char* str = s.c_str();
		unsigned int seed = 131; 
		unsigned int hash = 0;
		while (*str)
		{
			hash = hash * seed + (*str++);
		}

		return hash;
	}
};

迭代器

哈希表的迭代器不能通过简单的指针来完成，必须对指针进行封装，封装成一个类，并开放begin，end，++等操作的接口，大致如下：

//前置声明
template <class K, class V, class KeyOfValue, class HF>
class HashTable;

template <class K, class V, class KeyOfValue, class HF>
struct HashIterator
{
	typedef HashNode<V> Node;
	typedef HashIterator<K, V, KeyOfValue, HF> Self;
	typedef HashTable<K, V, KeyOfValue, HF> HT;

	HashIterator(Node* node, HT* ht)
		: _node(node)
		, _ht(ht)
	{ }

	V& operator*(){return _node->_value;}

	V* operator->(){return &_node->_value;}

	bool operator!=(const Self& it){return _node != it._node;}

	bool operator==(const Self& it){return _node == it._node;}

	Self& operator++(){add(); return *this;}

	Self& operator++(int){Self tmp(*this); add(); return *tmp;}

private:
	void add()
	{
		if (_node->_next)
			_node = _node->_next;
		else
		{
			KeyOfValue kov;
			HF hf;

			size_t idx = hf(kov(_node->_value)) % _ht->_table.size();//计算当前传入值对应的位置
			idx++;
			if (idx == _ht->_table.size())//走到哈希表尾
			{
				_node = nullptr;
				return;
			}

			Node* cur = _ht->_table[idx];
			for (; idx < _ht->_table.size(); idx++)//找到下一个非空链表头
			{
				if (_ht->_table[idx])
				{
					_node = _ht->_table[idx];
					break;
				}
			}

			if (idx == _ht->_table.size())//检查是否已经遍历完成
				_node = nullptr;
			return;
		}
	}

	Node* _node;
	HT* _ht;
};

至此，unordered_map和unordered_set的实现已经完成了大半，接下来创建这两个类分别对哈希表类进行封装即可

unordered_set

template <class K, class HF = HashFunc<K>>
class Unordered_Set
{
	struct SetKeyOfValue
	{
		const K& operator()(const K& value)
		{
			return value;
		}
	};
public:
	typedef typename HashTable<K, K, SetKeyOfValue, HF>::iterator iterator;
	iterator begin(){return _ht.begin();}
    
	iterator end(){return _ht.end();}

	size_t size() const{return _ht.size();}

	bool empty() const{return _ht.empty();}

	pair<iterator, bool> insert(const K& value){return _ht.insert(value);}

	bool erase(const K& key){return _ht.erase(key);}

	HashNode<K>* find(const K& key){return _ht.find(key);}

	size_t count(const K& key){return _ht.count(key);}
private:
	HashTable<K, K, SetKeyOfValue, HF> _ht;
};

如上，通过调用哈希表的不同接口，来实现unordered_set的各种功能

unordered_map

template <class K, class V, class HF = HashFunc<K>>
class Unordered_Map
{
	struct MapKeyOfValue
	{
		const K& operator()(const pair<K, V>& value)
		{
			return value.first;
		}
	};

public:
	typedef typename HashTable<K, pair<K, V>, MapKeyOfValue, HF>::iterator iterator;

	iterator begin(){return _ht.begin();}
    
	iterator end(){return _ht.end();}

	size_t size() const{return _ht.size();}

	bool empty() const{return _ht.empty();}

	pair<iterator, bool> insert(const pair<K, V>& value){return _ht.insert(value);}

	V& operator[](const K& key){
        pair<iterator, bool> ret = _ht.insert(make_pair(key, V()));
		return ret.first->second;
	}

	bool erase(const K& key){return _ht.erase(key);}

	HashNode<V>* find(const K& key){return _ht.find(key);}

	size_t count(const K& key){return _ht.count(key);}

private:
	HashTable<K, pair<K, V>, MapKeyOfValue, HF> _ht;
};

具体代码实现

unordered_map与unordered_set的模拟实现 - GitHub)

小赵的学习笔记

从无序关联容器到哈希（及无序关联容器的模拟实现）

无序关联容器

unordered_map

unordered_set

和有序容器的区别

底层结构：哈希结构

哈希冲突

unordered_map与unordered_set的模拟实现

哈希表

迭代器

unordered_set

unordered_map

具体代码实现