算法作业中的一道题目~
描述要求
Design a data type that supports insert in logarithmic time, find-the-median in constant time.
设计一种数据结构,让动态求解中位数的插入时间复杂度为logN
,返回中位数为O(1)
。
概念与做法
中位数,是数学中的一个很简单的概念。在一个排序后的,有N个元素的数组中。它的中位数为:
若N为奇数,则选择第(N+1)/2个为中位数;
若n为偶数,则中位数是(N/2以及N/2+1)的平均数;
简单排序实现
求中位数,最简单的方法是,先对数组进行排序,然后根据数组的size进行判断,然后找出相应的中位数,这样做的时间复杂度为排序的时间复杂度,即O(NlogN)
。
如果现在情况改变,数组的数据可以动态添加,那么这种方法,需要在插入的时候使用插入排序的方式,于是它的时间复杂度为O(N)
,返回中位数的时间复杂度为O(1)
。但是这样不符合要求,所以要想别的方法。
比较容易想到的方法是,利用二分查找,在排序后的数组中以插入的方式进行插入,如果元素的组织形式为链表而非数组,那么插入是可以做到O(logN)
的。
最大最小堆实现
但是因为最近刚刚学了堆,所以估计作业是想让我们用堆来实现,想了很久想到一种实现方式。
要用堆来实现动态求中位数,需要维护两个堆,一个最大堆,一个最小堆。最大堆用来存储比中位数小的所有元素,最小堆用来存储比中位数大的所有元素。同时要维护两个int变量medianLow与medianLarge。元素数量为奇数时,中位数存储在medianLow中,元素数量为偶数时,中位数为medianLow和medianLarge的平均值。
插入的时候,如果插入之前有奇数个元素,那么分情况讨论:
- 如果插入的元素大于插入前的中位数,即meidanLow,就将元素插入至最小堆,然后在最小堆中取出堆顶元素,存入medianLarge。
- 如果插入的元素小于插入前的中位数,即meidanLow,就将元素插入至最大堆,然后在最大堆中取出堆顶元素,将medianLow存入medianLarge,然后将取出的堆顶元素存入medianLow。
而如果插入之前有偶数个元素,那么同样分情况讨论:
- 如果插入的元素大于medianLarge,就将元素插入至最小堆,然后把medianLow插入至最大堆,此时medianLarge就是插入后数组的中位数。
- 如果插入的元素小于medianLow,就将元素插入至最大堆,然后把medianLarge插入至最小堆,此时meidanLaw就是插入后数组的中位数。
- 如果插入的元素大小在medianLow与medianLarge之间,就把medianLow与medianLarge分别插入最大堆与最小堆,此时原本需插入的元素为中位数。
而且显而易见,插入的时间复杂度是O(logN)
的。
CPP实现
实现此想法,需要先实现最大堆与最小堆,然后利用两个堆对数据进行操作。
#include <iostream>
using namespace std;
/*Heap, max or min is optional by a string "max" or "min"*/
template <class T>
class Heap
{
public:
Heap(T* data, int size, int maxSize, string str);
~Heap();
void insert(T value);
T remove();
void printHeap();
int getSize();
/*test func*/
// void test();
private:
string attr;
T* data;
int size;
int maxSize;
/*assistant func*/
int parent(int i);
int leftChild(int i);
int rightChild(int i);
void swap(int i, int j);
/*main func*/
void heapKeep(int i);
void buildHeap();
};
/*median Heaps. it has two heaps,
*one is min heap which stores the node whose value is larger than median
*and other one is max heap which stores the node whose value is lower than median
*/
class MedianHeap
{
public:
MedianHeap(int size);
void insert(int number);
int returnMedian();
private:
Heap<int> maxHeap;
Heap<int> minHeap;
/*when size is odd, median stores in medianLow
*when size is even, median stores in medianLow(low number) and medianLarge(large number)
*/
int medianLow, medianLarge;
int size;
};
template<class T>
Heap<T>::Heap(T* data, int size, int maxSize, string str)
:data(data), size(size), maxSize(maxSize), attr(str)
{
buildHeap();
}
template<class T>
Heap<T>::~Heap()
{
delete[] data;
}
template<class T>
int Heap<T>::parent(int i)
{
return i / 2;
}
template<class T>
int Heap<T>::leftChild(int i)
{
if (i * 2 <= size)
return i * 2;
else
return NULL;
}
template<class T>
int Heap<T>::rightChild(int i)
{
if (i * 2 + 1 <= size)
return i * 2 + 1;
else
return NULL;
}
template<class T>
void Heap<T>::swap(int i, int j)
{
T buf = data[i];
data[i] = data[j];
data[j] = buf;
}
/*time complexity: log(n)*/
template<class T>
void Heap<T>::heapKeep(int i)
{
if (i >= size)
return;
int index = i;
T selected = data[i];
/*choose the min or max element in the item and its childs*/
if (leftChild(i) != NULL)
{
T lc = data[leftChild(i)];
/*max heap*/
if (attr == "max" && selected < lc)
{
index = leftChild(i);
selected = lc;
}
/*min heap*/
else if (attr == "min" && selected > lc)
{
index = leftChild(i);
selected = lc;
}
}
if (rightChild(i) != NULL)
{
T rc = data[rightChild(i)];
/*max heap*/
if (attr == "max" && selected < rc)
{
index = rightChild(i);
selected = rc;
}
/*min heap*/
else if (attr == "min" && selected > rc)
{
index = rightChild(i);
selected = rc;
}
}
/*swap and recursive*/
if (index != i)
{
swap(i, index);
heapKeep(index);
}
}
/*time complexity:n * log(n)*/
template<class T>
void Heap<T>::buildHeap()
{
/*keep the max when the node has a child or has two children*/
for (int i = (size / 2); i >= 1; i--)
{
heapKeep(i);
}
}
template<class T>
void Heap<T>::printHeap()
{
for (int i = 1; i <= size; i++)
cout << data[i] << " ";
cout << endl;
}
/*time complexity: log(n)*/
template<class T>
void Heap<T>::insert(T value)
{
/*whether the node can be added*/
if (size + 1 > maxSize)
{
cout << "Heap::increaseNode err, size fault\n";
}
size += 1;
int i = size;
data[i] = value;
while(i > 1)
{
if (attr == "max" && data[parent(i)] < data[i])
{
swap(parent(i), i);
i = parent(i);
}
else if (attr == "min" && data[parent(i)] > data[i])
{
swap(parent(i), i);
i = parent(i);
}
else
break;
// cout << "Debug info -> " << i << endl;
}
}
/*time complexity: log(n)*/
template<class T>
T Heap<T>::remove()
{
/*move the last leaf node to the root node, and keep it in maxHeap order*/
T result = data[1];
data[1] = data[size];
size -= 1;
heapKeep(1);
return result;
}
template<class T>
int Heap<T>::getSize()
{
return size;
}
// template<class T>
// void Heap<T>::test()
// {
// int size = 0;
// int maxSize = 0;
// cin >> size >> maxSize;
// cout << "Yeah~Choose maxHeap or minHeap~\n";
// int buf = 0;
// cout << "cin " << size << " numbers\n";
// int* data = new int[maxSize];
// for (int i = 1; i <= size; i++)
// {
// cin >> data[i];
// }
// cout << "cin end\n";
// Heap<int> heap(data, size, maxSize, "max");
// cout << "build end\n";
// heap.printHeap();
// heap.insert(10);
// cout << "insert end\n";
// heap.printHeap();
// }
MedianHeap::MedianHeap(int size)
:medianLow(0), medianLarge(0), size(0),
maxHeap(Heap<int>(new int[size], 0, size, "max")),
minHeap(Heap<int>(new int[size], 0, size, "min"))
{
// int* data1 = new int[size];
// int* data2 = new int[size];
// maxHeap = Heap<int>(data1, 0, size, "max");
// minHeap = Heap<int>(data2, 0, size, "min");
}
void MedianHeap::insert(int number)
{
if (size == 0)
{
medianLow = number;
size++;
return;
}
/*former median is one number*/
if (size % 2 == 1)
{
/*the number need to inserted is larger than the median now*/
if (number > returnMedian())
{
minHeap.insert(number);
/*get the second median number
*which is the lowest one in which is larger than the median now*/
medianLarge = minHeap.remove();
}
else if (number <= returnMedian())
{
maxHeap.insert(number);
medianLarge = medianLow;
medianLow = maxHeap.remove();
}
}
else if (size % 2 == 0)
{
if (number >= medianLarge)
{
minHeap.insert(number);
maxHeap.insert(medianLow);
medianLow = medianLarge;
}
else if (number <= medianLow)
{
maxHeap.insert(number);
minHeap.insert(medianLarge);
}
else if (number > medianLow && number < medianLarge)
{
maxHeap.insert(medianLow);
minHeap.insert(medianLarge);
medianLow = number;
}
}
// /*should insert into minheap*/
// if (number > median)
// {
// if (minHeap.getSize() + 1 - maxHeap.getSize() >= 2)
// {
// minHeap.insert(number);
// maxHeap.insert(median);
// median = minHeap.remove();
// }
// else
// {
// minHeap.insert(number);
// }
// }
// /*should insert into maxheap*/
// else
// {
// if (maxHeap.getSize() + 1 - minHeap.getSize() >= 2)
// {
// maxHeap.insert(number);
// minHeap.insert(median);
// median = maxHeap.remove();
// }
// else
// {
// maxHeap.insert(number);
// }
// }
size++;
}
int MedianHeap::returnMedian()
{
if (size % 2 == 1)
return medianLow;
else
return ( medianLarge + medianLow ) / 2;
}
int main()
{
int number = 0;
int size = 0;
cout << "input the size of heap\n";
cin >> size;
MedianHeap medianHeap(size);
while(cin >> number) {
medianHeap.insert(number);
cout << "The median is " << medianHeap.returnMedian() << endl;
}
}