return 0;
}
我用的测试编译器vc6.0,CPU赛扬2.0G
下面的代码很多时候速度是上面的版本的20倍,源代码如下(优化说明在代码之后);
(我以前提交的代码使用了MFC库,为了容易编译和理解,我做了一些代码调整,去除MFC依赖,把一个复杂的代码循环展开删除了,可能慢了10%)
#pragma warning ( disable :
4786 )
#include
#include
#include
#include
#include
#include
namespace {
class CMyAllot
{
enum { chunk_size=1024*256 };//块大小
char* _cur;
char* _end;
std:
:
vector _vector;
void* _new_else(unsigned int size);
public:
CMyAllot() :
_end(0),_cur(0) { }
virtual ~CMyAllot() { if (!
_vector.empty()) DelAll(); }
inline void* _fastcall New(unsigned int size)
{
size=((size+3)>>2<<2);//4字节边界对齐
if ((int)size<(_end-_cur))//够用
{
char* result=_cur;
_cur+=size;
return result;
}
else //不够用
return _new_else(size);
}
void DelAll()
{
for (int i=0;i<(int)_vector.size();++i)
delete [] (_vector[i]);
_vector.clear();
}
};
void* CMyAllot:
:
_new_else(unsigned int size)
{
if (size>(chunk_size>>2))//不够用,而且需要的空间较大
{
char* result=new char[size];
char* old_back=_vector.back();
_vector[_vector.size()-1]=result;
_vector.push_back(old_back);
return result;
}
else //不够用,开辟新的空间
{
char* result=new char[chunk_size];
_cur=result+size;
_end=result+chunk_size;
_vector.push_back(result);
return result;
}
}
struct TNode//hash表使用的节点类型(链表)
{
TNode* pNext;
unsigned int count;
char str[1]; //不一定只有一个字节,会根据字符串分配空间
struct TComp//返回时的排序准则
{
bool operator()(const TNode* l,const TNode* r)
{
if ((l->count)==(r->count))
{
return std:
:
string(&l->str[0])<(&r->str[0]);
}
else
return (l->count)>(r->count);
}
};
};
inline unsigned int _fastcall hash_value(char* begin,char* end)
{
unsigned int result=0;
do{
result=5*result+(*begin); //利用asm:
lea reg0,[reg1*4+reg1],并且5是质数
}while((++begin)!
=end);
return result;
}
inline unsigned int _fastcall hash_value(char* pstr)
{
unsigned int result=0;
do{ result=5*result+(*pstr); ; //利用asm:
lea reg0,[reg1*4+reg1],并且5是质数
}while((*(++pstr)));
return result;
}
//测试字符串是否相同, 如果需要不区分大小写,修改这个函数和hash函数就可以了
inline bool _fastcall test_str_EQ(char* begin,char* end,char* str)
{
//for (;begin!
=end;++begin,++str)
// if ( (*begin)!
=*(str) ) return false;
do{
if ( (*begin)!
=*(str) ) return false;
++begin;++str;
}while(begin!
=end);
return true;
}
}
class CHashSet
{
typedef std:
:
vector base_t;
inline unsigned int hash_index(char* begin,char* end) const
{ return hash_value(begin,end)&(_hash_mask); }
inline unsigned int hash_index(char* pstr) const
{ return hash_value(pstr)&(_hash_mask); }
void resize();
void _fastcall move_insert(base_t& v,TNode* pOldNode) const;
TNode* _fastcall NewNode(char* begin,char* end);
void Sort(base_t& v,unsigned int sortCount);
unsigned int _hash_power;
unsigned int _hash_mask;
unsigned int _node_count;
base_t _vbase;
CMyAllot _allot;
void _fastcall else_insert(TNode* pNode,char* begin,char* end);
public:
CHashSet();
virtual ~CHashSet();
unsigned int size() const { return _node_count; }
unsigned int sum();
void _fastcall insert(char* begin,char* end);
void GetStrList(std:
:
ostream& cout,unsigned int sortCount);
};
CHashSet:
:
CHashSet()
:
_hash_power
(2),_vbase((unsigned int)(_hash_power),(TNode*)0)//注意次序
{
_node_count=0;
_hash_mask=_hash_power-1;//_hash_power=1<}
CHashSet:
:
~CHashSet()
{
_allot.DelAll();
}
unsigned int CHashSet:
:
sum()
{
unsigned int sum=0;
if(_node_count>0)
{
base_t:
:
iterator end=_vbase.end();
for (base_t:
:
iterator i=_vbase.begin();i {
TNode* pNode=(*i);
while (pNode!
=0)
{
sum+=pNode->count;
pNode=pNode->pNext;
}
}
}
return sum;
}
void _fastcall CHashSet:
:
insert(char* begin,char* end)
{
unsigned int index=hash_index(begin,end);
TNode* pNode=_vbase[index];
if (!
pNode)//节点还没有使用
{
_vbase[index]=NewNode(begin,end);
++_node_count;
}
else
{
if (test_str_EQ(begin,end,pNode->str))//累加
++(pNode->count);
else
else_insert(pNode,begin,end);
}
}
void _fastcall CHashSet:
:
else_insert(TNode* pNode,char* begin,char* end)
{
while (true)
{
if(!
(pNode->pNext))
{
pNode->pNext=NewNode(begin,end);
++_node_count;
if(_node_count>=(_hash_power))
resize();
break;
}
else if (test_str_EQ(begin,end,pNode->pNext->str))
{
++(pNode->pNext->count);
break;
}
pNode=pNode->pNext;
};
}
void _fastcall CHashSet:
:
move_insert(base_t& v,TNode* pOldNode) const
{
TNode*& pNode = v[hash_index(pOldNode->str)];
pOldNode->pNext=0;
if (!
pNode)//节点还没有使用
{
pNode=pOldNode;
}
else
{
if (!
pNode->pNext)
{
pNode->pNext=pOldNode;
}
else
{
TNode* pListNode=pNode->pNext;
while (pListNode->pNext!
=0)
{ pListNode=pListNode->pNext; }
pListNode->pNext=pOldNode;
}
}
}
TNode* _fastcall CHashSet:
:
NewNode(char* begin,char* end)
{
TNode* pNode=(TNode*)(_allot.New(sizeof(TNode)+end-begin));
pNode->pNext=0;
pNode->count=1;
char* i=pNode->str;
//for (;begin!
=end;++i,++begin)
// (*i)=(*begin);
do{
(*i)=(*begin); ++i,++begin;
} while(begin!
=end);
(*i)=char(0);
return pNode;
}
void CHashSet:
:
resize()
{
if(_node_count>=(_hash_power))
{
base_t:
:
iterator end=_vbase.end();
_hash_power<<=2;
_hash_mask=(_hash_power)-1;
base_t new_vbase(_hash_power,(TNode*)0);
for (base_t:
:
iterator i=_vbase.begin();i!
=end;++i)
{
TNode* pNode=(*i);
while (pNode!
=0)
{
TNode* temp=pNode->pNext;
move_insert(new_vbase,pNode);
pNode=temp;
}
}
_vbase.swap(new_vbase);
}
}
////
void CHashSet:
:
Sort(base_t& v,unsigned int sortCount)
{
if (sortCount==1)
{
v.resize
(1);
base_t:
:
iterator end=_vbase.end();
TNode* maxNode=_vbase[0];
TNode:
:
TComp op;
for (base_t:
:
iterator i=_vbase.begin();i!
=end;++i)
{
TNode* pNode=(*i);
while (pNode!
=0)
{
if ( (maxNode==0)||(op(pNode,maxNode)) )
maxNode=pNode;
pNode=pNode->pNext;
}
}
v[0]=maxNode;
}
else
{
v.resize(_node_count);
int index=0;
if(_node_count>0)
{
TNode** end=&(_vbase[_hash_power]);
for (TNode** i=&(_vbase[0]);i!
=end;++i)
{
TNode* pNode=(*i);
while (pNode!
=0)
{
v[index]=pNode;
++index;
pNode=pNode->pNext;
}
}
}
std:
:
partial_sort(v.begin(),v.begin()+sortCount,v.end(),TNo