MezzanineEngine 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
trie.h
1 // © Copyright 2010 - 2014 BlackTopp Studios Inc.
2 /* This file is part of The Mezzanine Engine.
3 
4  The Mezzanine Engine is free software: you can redistribute it and/or modify
5  it under the terms of the GNU General Public License as published by
6  the Free Software Foundation, either version 3 of the License, or
7  (at your option) any later version.
8 
9  The Mezzanine Engine is distributed in the hope that it will be useful,
10  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  GNU General Public License for more details.
13 
14  You should have received a copy of the GNU General Public License
15  along with The Mezzanine Engine. If not, see <http://www.gnu.org/licenses/>.
16 */
17 /* The original authors have included a copy of the license specified above in the
18  'Docs' folder. See 'gpl.txt'
19 */
20 /* We welcome the use of the Mezzanine engine to anyone, including companies who wish to
21  Build professional software and charge for their product.
22 
23  However there are some practical restrictions, so if your project involves
24  any of the following you should contact us and we will try to work something
25  out:
26  - DRM or Copy Protection of any kind(except Copyrights)
27  - Software Patents You Do Not Wish to Freely License
28  - Any Kind of Linking to Non-GPL licensed Works
29  - Are Currently In Violation of Another Copyright Holder's GPL License
30  - If You want to change our code and not add a few hundred MB of stuff to
31  your distribution
32 
33  These and other limitations could cause serious legal problems if you ignore
34  them, so it is best to simply contact us or the Free Software Foundation, if
35  you have any questions.
36 
37  Joseph Toppi - toppij@gmail.com
38  John Blackwood - makoenergy02@gmail.com
39 */
40 /*
41  * Copyright (c) 2012, Ranjith TV
42  * All rights reserved.
43  *
44  * Licensed under the BSD 3-Clause ("BSD New" or "BSD Simplified") license.
45  * You may obtain a copy of the License at
46  *
47  * http://www.opensource.org/licenses/BSD-3-Clause
48  *
49  */
50 
51 #ifndef TRIE_H
52 #define TRIE_H
53 
54 #include "datatypes.h"
55 
56 namespace Mezzanine
57 {
58 
59 template < typename T,
60 typename V,
61 typename Cmp,
62 typename Items > class Node;
63 
64 template < typename T,
65 typename V,
66 typename Cmp,
67 typename Items > class NodeItem
68 {
69 private:
72 
73 public:
74  NodeItem(const T &endSymbol, T const &key)
75  : mEndSymbol(endSymbol),
76  mKey(key),
77  mChilds(0) {}
78 
79  virtual ~NodeItem() {
80  delete mChilds;
81  }
82 
83  bool operator<(NodeItemClass const &oth) const {
84  return Cmp()(this->mKey, oth.mKey);
85  }
86 
87  bool operator<(T const &key) const {
88  return Cmp()(this->mKey, key);
89  }
90 
91  bool operator==(T const &key) const {
92  return !Cmp()(this->mKey, key) && !Cmp()(key, this->mKey);
93  }
94 
95  bool operator==(NodeItemClass const &oth) const {
96  return !Cmp()(this->mKey, oth.mKey) && !Cmp()(oth.mKey, this->mKey);
97  }
98 
99  bool operator!=(T const &key) const {
100  return !((*this) == key);
101  }
102 
103  bool operator!=(NodeItemClass const &oth) const {
104  return !((*this) == oth);
105  }
106 
107  void set(T const &key) {
108  mKey = key;
109  }
110 
111  const T &get() const {
112  return mKey;
113  }
114 
115  const NodeClass *getChilds() const {
116  return mChilds;
117  }
118 
119  NodeClass *getChilds() {
120  return mChilds;
121  }
122 
123  NodeClass *getOrCreateChilds(NodeClass * parent) {
124  createChilds(parent);
125  return mChilds;
126  }
127 
128 private:
129  void createChilds(NodeClass * parent) {
130  if (!mChilds) {
131  mChilds = new NodeClass(mEndSymbol, parent);
132  }
133  }
134 
135  NodeItem(NodeItem const &);
136  NodeItem &operator=(NodeItem const &);
137 
138 private:
139  const T mEndSymbol;
140  T mKey;
141  NodeClass *mChilds;
142 };
143 
144 template < typename T,
145 typename V,
146 typename Cmp,
147 typename Items > class EndNodeItem: public NodeItem<T, V, Cmp, Items>
148 {
149 private:
151 
152 public:
153  EndNodeItem(const T &endSymbol, T const &key)
154  : ParentClass(endSymbol, key) {}
155 
156  EndNodeItem(const T &endSymbol, T const &key, V const &value)
157  : ParentClass(endSymbol, key),
158  mValue(value) {}
159 
160  void set(T const &key, V const &value) {
161  ParentClass::set(key);
162  mValue = value;
163  }
164 
165  const V &getValue() const {
166  return mValue;
167  }
168 
169  V &getValue() {
170  return mValue;
171  }
172 
173 private:
174  EndNodeItem(EndNodeItem const &);
175  EndNodeItem &operator=(EndNodeItem const &);
176 
177 private:
178  V mValue;
179 };
180 
181 template < typename T,
182 typename V,
183 typename Cmp,
184 typename Items > class NodeItemPtrCompare
185 {
186 private:
188 
189 public:
190  bool operator()(const NodeItemClass *v1, const NodeItemClass *v2) {
191  return *v1 < *v2;
192  }
193 };
194 
195 template < typename T,
196 typename V,
197 typename Cmp,
198 typename Items > class Node
199 {
200 public:
201  typedef NodeItem<T, V, Cmp, Items> NodeItemClass;
202  typedef EndNodeItem<T, V, Cmp, Items> EndNodeItemClass;
203  typedef Node<T, V, Cmp, Items> NodeClass;
204 private:
205  typedef typename Items::iterator ItemsContainerIter;
206  typedef typename Items::const_iterator ItemsContainerConstIter;
207 
208 public:
209 
211  {
212  protected:
213  typedef std::pair<const T *, const V *> KeyValuePair;
214  typedef typename NodeClass::ItemsContainerConstIter ItemsContainerConstIter;
215 
216  public:
217  const_iterator(const NodeClass *node, const NodeClass * root, const T * key = 0, bool mooveToEnd = false)
218  : mRootNode(root),
219  mCurrentNode(node),
220  mCheckKeyLeft(false),
221  mCheckKeyRight(true),
222  mEndReached(false) {
223  if (!root) {
224  mRootNode = node;
225  }
226  pushNode(node, key, mooveToEnd);
227  if (!mooveToEnd) {
228  next();
229  }
230  }
231 
232  const_iterator(const const_iterator & oth)
233  : mRootNode(oth.mRootNode),
234  mCurrentNode(oth.mCurrentNode),
235  mCurrentPos(oth.mCurrentPos),
236  mKeyStack(oth.mKeyStack),
237  mCheckKeyLeft(oth.mCheckKeyLeft),
238  mCheckKeyRight(oth.mCheckKeyRight),
239  mEndReached(oth.mEndReached) {
240  if (!mKeyStack.empty()) {
241  mKeyValuePair = std::make_pair(&mKeyStack[0], oth.mKeyValuePair.second);
242  }
243  }
244 
245  const_iterator & operator=(const const_iterator & oth) {
246  if (this != &oth) {
247  mRootNode = oth.mRootNode;
248  mCurrentNode = oth.mCurrentNode;
249  mCurrentPos = oth.mCurrentPos;
250  mKeyStack = oth.mKeyStack;
251  if (!mKeyStack.empty()) {
252  mKeyValuePair = std::make_pair(&mKeyStack[0], oth.mKeyValuePair.second);
253  }
254  mCheckKeyLeft = oth.mCheckKeyLeft;
255  mCheckKeyRight = oth.mCheckKeyRight;
256  mEndReached = oth.mEndReached;
257  }
258  return *this;
259  }
260 
261  const KeyValuePair &operator*() const {
262  return this->mKeyValuePair;
263  }
264 
265  const KeyValuePair *operator->() const {
266  return &this->mKeyValuePair;
267  }
268 
269  bool operator==(const_iterator const &oth) const {
270  return this->equals(oth);
271  }
272 
273  bool operator!=(const_iterator const &oth) const {
274  return !(*this == oth);
275  }
276 
277  const_iterator operator++(int) {
278  const_iterator iter = *this;
279  ++(*this);
280  return iter;
281  }
282 
283  const_iterator &operator++() {
284  this->next();
285  return *this;
286  }
287 
288  const_iterator operator--(int) {
289  const_iterator iter = *this;
290  --(*this);
291  return iter;
292  }
293 
294  const_iterator &operator--() {
295  this->previous();
296  return *this;
297  }
298 
299  protected:
300  friend class Node<T, V, Cmp, Items>;
301 
302  const NodeClass * mRootNode;
303  const NodeClass * mCurrentNode;
304  ItemsContainerConstIter mCurrentPos;
305  std::vector<T> mKeyStack;
306  KeyValuePair mKeyValuePair;
307  bool mCheckKeyLeft;
308  bool mCheckKeyRight;
309  bool mEndReached;
310 
311  protected:
312  void previous() {
313  if (!mCurrentNode->mItems.empty() && mCurrentPos == mCurrentNode->mItems.end()) {
314  --mCurrentPos;
315  }
316 
317  bool newNode = false;
318  bool oldNode = false;
319 
320  while (mEndReached || !isLeftEnd()) {
321  mEndReached = false;
322  ItemsContainerConstIter iterBegin = mCurrentNode->mItems.begin();
323  if (!mKeyStack.empty()) {
324  if (mKeyStack.back() == mCurrentNode->endSymbol()) {
325  mKeyStack.pop_back();
326  mCurrentPos = mCurrentNode->mItems.begin();
327  if (isLeftEnd()) {
328  break;
329  }
330  }
331  }
332 
333  if (!newNode && !mKeyStack.empty()) {
334  if (mCheckKeyLeft) {
335  mCurrentNode = mCurrentNode->parent();
336  mCurrentPos = mCurrentNode->mItems.find(mKeyStack.back());
337  iterBegin = mCurrentNode->mItems.begin();
338  mKeyStack.pop_back();
339  mCheckKeyLeft = false;
340  if (mCurrentPos != iterBegin) {
341  --mCurrentPos;
342  oldNode = false;
343  } else {
344  oldNode = true;
345  }
346  } else {
347  ItemsContainerConstIter iter = mCurrentNode->mItems.find(mCurrentNode->endSymbol());
348  if (iter != mCurrentNode->mItems.end()) {
349  mCurrentPos = iter;
350  const NodeItemClass & item = *(const NodeItemClass *) * mCurrentPos;
351  mKeyStack.push_back(item.get());
352  mKeyValuePair.first = &(mKeyStack[0]);
353  mKeyValuePair.second = &(((const EndNodeItemClass &)item).getValue());
354  mCheckKeyLeft = true;
355  return;
356  } else {
357  mCurrentNode = mCurrentNode->parent();
358  mCurrentPos = mCurrentNode->mItems.find(mKeyStack.back());
359  iterBegin = mCurrentNode->mItems.begin();
360  mKeyStack.pop_back();
361  mCheckKeyLeft = false;
362  if (mCurrentPos != iterBegin) {
363  --mCurrentPos;
364  oldNode = false;
365  } else {
366  oldNode = true;
367  }
368  }
369  }
370  }
371 
372  newNode = false;
373  for (; mCurrentPos != iterBegin; --mCurrentPos) {
374  if (*mCurrentPos) {
375  const NodeItemClass &item = *(const NodeItemClass *) * mCurrentPos;
376  if (item != mCurrentNode->endSymbol()) {
377  mKeyStack.push_back(item.get());
378  pushNode(item.getChilds(), 0, true);
379  --mCurrentPos;
380  newNode = true;
381  break;
382  }
383  }
384  oldNode = false;
385  }
386  if (!newNode && !oldNode) {
387  if (*mCurrentPos) {
388  const NodeItemClass &item = *(const NodeItemClass *) * mCurrentPos;
389  if (item != mCurrentNode->endSymbol()) {
390  mKeyStack.push_back(item.get());
391  pushNode(item.getChilds(), 0, true);
392  --mCurrentPos;
393  newNode = true;
394  }
395  }
396  }
397 
398  }
399  mCurrentPos = mCurrentNode->mItems.end();
400  mEndReached = true;
401  }
402 
403  void next() {
404  while (!isEnd()) {
405  ItemsContainerConstIter iterEnd = mCurrentNode->mItems.end();
406 
407  if (!mKeyStack.empty()) {
408  if (mKeyStack.back() == mCurrentNode->endSymbol()) {
409  mKeyStack.pop_back();
410  mCurrentPos = mCurrentNode->mItems.begin();
411  }
412  }
413 
414  if (mCurrentPos == iterEnd && !mKeyStack.empty()) {
415  mCurrentNode = mCurrentNode->parent();
416  mCurrentPos = mCurrentNode->mItems.find(mKeyStack.back());
417  ++mCurrentPos;
418  iterEnd = mCurrentNode->mItems.end();
419  mKeyStack.pop_back();
420  }
421 
422  for (; mCurrentPos != iterEnd; ++mCurrentPos) {
423  if (mCheckKeyRight) {
424  mCheckKeyRight = false;
425  ItemsContainerConstIter iter = mCurrentNode->mItems.find(mCurrentNode->endSymbol());
426  if (iter != iterEnd) {
427  mCurrentPos = iter;
428  const NodeItemClass & item = *(const NodeItemClass *) * mCurrentPos;
429  mKeyStack.push_back(item.get());
430  mKeyValuePair.first = &(mKeyStack[0]);
431  mKeyValuePair.second = &(((const EndNodeItemClass &)item).getValue());
432  mCheckKeyLeft = true;
433  return;
434  }
435  }
436 
437  if (*mCurrentPos) {
438  const NodeItemClass &item = *(const NodeItemClass *) * mCurrentPos;
439  if (item != mCurrentNode->endSymbol()) {
440  mKeyStack.push_back(item.get());
441  pushNode(item.getChilds());
442  break;
443  }
444  }
445  }
446  }
447  mEndReached = true;
448  }
449 
450  bool isEnd() const {
451  if (this->mRootNode == this->mCurrentNode &&
452  this->mCurrentPos == this->mCurrentNode->mItems.end()) {
453  return true;
454  }
455  return false;
456  }
457 
458  bool isLeftEnd() const {
459  if (this->mRootNode == this->mCurrentNode &&
460  this->mCurrentPos == this->mCurrentNode->mItems.begin()) {
461  return true;
462  }
463  return false;
464  }
465 
466  bool equals(const const_iterator & oth) const {
467  if (this->isEnd() && oth.isEnd()) {
468  return true;
469  }
470 
471  if (this->mCurrentNode == oth.mCurrentNode &&
472  this->mCurrentPos == oth.mCurrentPos) {
473  return true;
474  }
475 
476  return false;
477  }
478 
479  void pushNode(const NodeClass *node, const T * key = 0, bool mooveToEnd = false) {
480  mCurrentNode = node;
481  mCheckKeyLeft = false;
482  if (mooveToEnd) {
483  mCurrentPos = mCurrentNode->mItems.end();
484  mEndReached = true;
485  mCheckKeyRight = false;
486  } else {
487  if (key) {
488  for (int i = 0; key[i] != node->endSymbol(); ++i) {
489  mKeyStack.push_back(key[i]);
490  }
491  }
492  mCurrentPos = node->mItems.begin();
493  mCheckKeyRight = true;
494  }
495  }
496  };
497 
498  class iterator : public const_iterator
499  {
500  private:
501  typedef std::pair<const T *, V *> MutableKeyValuePair;
502  MutableKeyValuePair mMutableKeyValuePair;
503 
504  private:
505  MutableKeyValuePair & getPair() {
506  mMutableKeyValuePair.first = this->mKeyValuePair.first;
507  mMutableKeyValuePair.second = const_cast<V *>(this->mKeyValuePair.second);
508  return this->mMutableKeyValuePair;
509  }
510 
511  public:
512  iterator(NodeClass *node, NodeClass *root, const T * key = 0, bool mooveToEnd = false)
513  : const_iterator(node, root, key, mooveToEnd) {}
514 
515  MutableKeyValuePair &operator*() {
516  return getPair();
517  }
518 
519  MutableKeyValuePair *operator->() {
520  return &(getPair());
521  }
522 
523  iterator operator++(int) {
524  iterator iter = *this;
525  ++(*this);
526  return iter;
527  }
528 
529  iterator &operator++() {
530  this->next();
531  return *this;
532  }
533 
534  iterator operator--(int) {
535  iterator iter = *this;
536  --(*this);
537  return iter;
538  }
539 
540  iterator &operator--() {
541  this->previous();
542  return *this;
543  }
544  };
545 
546 private:
547  Node(Node const &);
548  Node &operator=(Node const &);
549 
550  static void deleteItem(NodeItemClass *item) {
551  delete item;
552  }
553 
554 
555  NodeClass * nodeWithKey(const T *key) {
556  return const_cast<NodeClass *>(const_cast<const NodeClass *>(this)->nodeWithKey(key));
557  }
558 
559  const NodeClass * nodeWithKey(const T *key) const {
560  const NodeClass * node = nodeWithPrefix(key);
561  if (node) {
562  if (node->mItems.getItem(mEndSymbol)) {
563  return node;
564  }
565  }
566  return 0;
567  }
568 
569  const NodeClass * nodeWithPrefix(const T *prefix) const {
570  int i=0;
571  const NodeClass * node = this;
572 
573  while (node) {
574  if (prefix[i] == mEndSymbol) {
575  return node;
576  }
577  const NodeItemClass *item = node->mItems.getItem(prefix[i]);
578  if (!item) {
579  break;
580  }
581 
582  node = item->getChilds();
583  ++i;
584  }
585  return 0;
586  }
587 
588  bool erase(NodeClass * node, const T * key) {
589  bool erased = false;
590  int keyIndex = 0;
591 
592  if (node && key) {
593  bool finished = false;
594  int count = 0;
595  erased = true;
596 
597  if (!keyIndex) {
598  while (key[keyIndex] != node->endSymbol()) {
599  ++ keyIndex;
600  }
601  }
602 
603  while (node && !finished && erased) {
604  ItemsContainerIter iterEnd = node->mItems.end();
605 
606  count = 0;
607  for (ItemsContainerIter iter = node->mItems.begin(); iter != iterEnd; ++iter) {
608  if (*iter != 0) {
609  ++count;
610  }
611  }
612 
613  if (count > 1) {
614  erased = node->mItems.eraseItem(key[keyIndex]);
615  finished = true;
616  } else if (count == 1) {
617  erased = node->mItems.eraseItem(key[keyIndex]);
618  }
619 
620  --keyIndex;
621  node = node->parent();
622  }
623 
624  }
625  if (erased) {
626  --mSize;
627  }
628  return erased;
629  }
630 
631 public:
632  Node(const T &eSymbol, NodeClass * parent = 0)
633  : mItems(eSymbol),
634  mEndSymbol(eSymbol),
635  mSize(0),
636  mParent(parent) {}
637 
638  ~Node() {
639  clear();
640  }
641 
642  T endSymbol() const {
643  return mEndSymbol;
644  }
645 
646  void clear() {
647  std::for_each(mItems.begin(), mItems.end(), deleteItem);
648  mItems.clear();
649  mSize = 0;
650  }
651 
652  bool empty() const {
653  return size() == 0;
654  }
655 
656  unsigned int size() const {
657  return mSize;
658  }
659 
660  std::pair<iterator, bool> insert(const T *key, V const &value) {
661  std::pair<iterator, bool> result(end(), false);
662  int i = 0;
663  NodeClass * node = this;
664 
665  while (true) {
666  std::pair<typename Items::Item *, bool> itemPair = node->mItems.insertItem(key[i]);
667  NodeItemClass *item = itemPair.first;
668  if (itemPair.second) {
669  result.first = iterator(node, this, key);
670  break;
671  }
672  if (!item) {
673  break;
674  } else if (key[i] == mEndSymbol) {
675  ((EndNodeItemClass *)item)->set(key[i], value);
676  result.first = iterator(node, this, key);
677  result.second = true;
678  ++mSize;
679  break;
680  } else {
681  node = item->getOrCreateChilds(node);
682  }
683  ++i;
684  }
685 
686  return result;
687  }
688 
689  bool erase(iterator pos) {
690  if (pos.mCurrentNode && pos.mCurrentPos != pos.mCurrentNode->mItems.end()) {
691  return erase(const_cast<NodeClass *>(pos.mCurrentNode), pos->first);
692  }
693  return false;
694  }
695 
696  bool erase(const T *key) {
697  NodeClass * node = nodeWithKey(key);
698  if (node) {
699  return erase(node, key);
700  }
701  return false;
702  }
703 
704  const V *get(const T *key) const {
705  return const_cast<NodeClass *>(this)->get(key);
706  }
707 
708  V *get(const T *key) {
709  NodeClass * node = nodeWithKey(key);
710  if (node) {
711  NodeItemClass *item = node->mItems.getItem(mEndSymbol);
712  return &(((EndNodeItemClass *)item)->getValue());
713  }
714  return 0;
715  }
716 
717 
718  bool hasKey(const T *key) const {
719  return get(key) != (V *)0;
720  }
721 
722  const NodeClass * parent() const {
723  return mParent;
724  }
725 
726  NodeClass * parent() {
727  return mParent;
728  }
729 
730  const_iterator begin() const {
731  return const_iterator(this, this);
732  }
733 
734  const_iterator end() const {
735  return const_iterator(this, this, 0, true);
736  }
737 
738  iterator begin() {
739  return iterator(this, this);
740  }
741 
742  iterator end() {
743  return iterator(this, this, 0, true);
744  }
745 
746  const_iterator find(const T *key) const {
747  NodeClass * node = const_cast<NodeClass *>(this)->nodeWithKey(key);
748  if (!node) {
749  return const_iterator(this, this, 0, true);
750  } else {
751  return const_iterator(node, this, key);
752  }
753  }
754 
755  iterator find(const T *key) {
756  NodeClass * node = this->nodeWithKey(key);
757  if (!node) {
758  return iterator(this, this, 0, true);
759  } else {
760  return iterator(node, this, key);
761  }
762  }
763 
764  iterator startsWith(const T *prefix) {
765  const NodeClass * node = const_cast<const NodeClass *>(this)->nodeWithPrefix(prefix);
766  if (!node) {
767  return iterator(this, this, 0, true);
768  } else {
769  return iterator(const_cast<NodeClass *>(node), const_cast<NodeClass *>(node), prefix);
770  }
771  }
772 
773  const_iterator startsWith(const T *prefix) const {
774  const NodeClass * node = nodeWithPrefix(prefix);
775  if (!node) {
776  return const_iterator(this, this, 0, true);
777  } else {
778  return const_iterator(node, node, prefix);
779  }
780  }
781 
782 private:
783  Items mItems;
784  const T mEndSymbol;
785  unsigned int mSize;
786  NodeClass * mParent;
787 };
788 
789 /*!
790  *
791  */
792 template <typename T> class SymbolToIndexMapper
793 {
794 public:
795  unsigned int operator()(const T & c) const {
796  return static_cast<unsigned int>(c);
797  }
798 };
799 
800 /*!
801  * @brief Container representing each node in the Trie.
802  *
803  *
804  * With this class the container used for storing node item is STL vector.
805  * Here each node will use a space propotional to Max.
806  * For searching only constant time taken at each node.
807  * @tparam T Type for each element in the key
808  * @tparam V Type of the value that the key will be representing
809  * @tparam Cmp Comparison functor
810  * @tparam Max Maximum element that a Trie node can have
811  */
812 template < typename T,
813 typename V,
814 typename Cmp,
815 int Max = 256,
817 {
818 public:
820  typedef std::vector<Item *> Items;
821  typedef typename Items::iterator iterator;
822  typedef typename Items::const_iterator const_iterator;
824  typedef typename NodeClass::NodeItemClass NodeItemClass;
826 
827 public:
828  VectorItems(T const &endSymbol)
829  : mEndSymbol(endSymbol),
830  mItems(Max, (Item *)0) {}
831 
832  const_iterator find(const T & k) const {
833  const Item * item = getItem(k);
834  if (item) {
835  return mItems.begin() + mSymolToIndex(k);
836  }
837  return mItems.end();
838  }
839 
840  iterator find(const T & k) {
841  const Item * item = getItem(k);
842  if (item) {
843  return mItems.begin() + mSymolToIndex(k);
844  }
845  return mItems.end();
846  }
847 
848  iterator begin() {
849  return mItems.begin();
850  }
851 
852  const_iterator begin() const {
853  return mItems.begin();
854  }
855 
856  iterator end() {
857  return mItems.end();
858  }
859 
860  const_iterator end() const {
861  return mItems.end();
862  }
863 
864  void clear() {
865  std::fill(mItems.begin(), mItems.end(), (Item *)0);
866  }
867 
868  bool empty() const {
869  return mItems.empty();
870  }
871 
872  std::pair<Item *, bool> insertItem(T const &k) {
873  std::pair<Item *, bool> ret((Item *)0, false);
874  if (!getItem(k)) {
875  assignItem(k, createNodeItem(k));
876  ret.first = getItem(k);
877  } else {
878  ret.first = getItem(k);
879  if (k == mEndSymbol) {
880  ret.second = true;
881  }
882  }
883  return ret;
884  }
885 
886  bool eraseItem(T const &k) {
887  Item * item = getItem(k);
888  if (item) {
889  delete item;
890  assignItem(k, (Item *)0);
891  return true;
892  } else {
893  return false;
894  }
895  }
896 
897  Item *getItem(T const &k) {
898  return mItems[mSymolToIndex(k)];
899  }
900 
901  const Item *getItem(T const &k) const {
902  return mItems[mSymolToIndex(k)];
903  }
904 
905  void assignItem(T k, Item *i) {
906  mItems[mSymolToIndex(k)] = i;
907  }
908 
909  NodeItemClass *createNodeItem(T const &k) {
910  if (k == mEndSymbol) {
911  return new EndNodeItemClass(mEndSymbol, k);
912  } else {
913  return new NodeItemClass(mEndSymbol, k);
914  }
915  }
916 
917 protected:
918  const T mEndSymbol;
919  Items mItems;
920  M mSymolToIndex;
921 };
922 
923 /*!
924  * @brief Container representing each node in the Trie.
925  *
926  *
927  * With this class the container used for storing node item is STL set.
928  * Here no extra space will used for storing node item.
929  * For searching in each node the time taken is propotional to number of item in the node.
930  * @tparam T Type for each element in the key
931  * @tparam V Type of the value that the key will be representing
932  * @tparam Cmp Comparison functor
933  */
934 template < typename T,
935 typename V,
936 typename Cmp > class SetItems
937 {
938 public:
940  typedef std::set<Item *, NodeItemPtrCompare<T, V, Cmp, SetItems<T, V, Cmp> > > Items;
941  typedef typename Items::iterator iterator;
942  typedef typename Items::const_iterator const_iterator;
944  typedef typename NodeClass::NodeItemClass NodeItemClass;
946 
947 public:
948  SetItems(T const &endSymbol)
949  : mEndSymbol(endSymbol) {}
950 
951  const_iterator find(const T & k) const {
952  return const_cast<SetItems<T, V, Cmp> *>(this)->find(k);
953  }
954 
955  iterator find(const T & k) {
956  Item tmp(mEndSymbol, k);
957  return mItems.find(&tmp);
958  }
959 
960  iterator begin() {
961  return mItems.begin();
962  }
963 
964  const_iterator begin() const {
965  return mItems.begin();
966  }
967 
968  iterator end() {
969  return mItems.end();
970  }
971 
972  const_iterator end() const {
973  return mItems.end();
974  }
975 
976  bool empty() const {
977  return mItems.empty();
978  }
979 
980  void clear() {
981  mItems.clear();
982  }
983 
984  std::pair<Item *, bool> insertItem(T const &k) {
985  std::pair<Item *, bool> ret((Item*)0, false);
986  Item tmp(mEndSymbol, k);
987  iterator iter = mItems.find(&tmp);
988  if (iter == mItems.end()) {
989  Item *v = createNodeItem(k);
990  mItems.insert(v);
991  ret.first = v;
992  } else {
993  ret.first = (Item *) * iter;
994  if (k == mEndSymbol) {
995  ret.second = true;
996  }
997  }
998  return ret;
999  }
1000 
1001  bool eraseItem(T const &k) {
1002  Item tmp(mEndSymbol, k);
1003  iterator iter = mItems.find(&tmp);
1004  if (iter != mItems.end()) {
1005  delete *iter;
1006  mItems.erase(iter);
1007  return true;
1008  } else {
1009  return false;
1010  }
1011  }
1012 
1013  const Item *getItem(T const &k) const {
1014  return const_cast<SetItems *>(this)->getItem(k);
1015  }
1016 
1017  Item *getItem(T const &k) {
1018  Item tmp(mEndSymbol, k);
1019 
1020  iterator iter = mItems.find(&tmp);
1021  if (iter == mItems.end()) {
1022  return 0;
1023  }
1024  return (Item *)(*iter);
1025  }
1026 
1027  NodeItemClass *createNodeItem(T const &k) {
1028  if (k == mEndSymbol) {
1029  return new EndNodeItemClass(mEndSymbol, k);
1030  } else {
1031  return new NodeItemClass(mEndSymbol, k);
1032  }
1033  }
1034 
1035 protected:
1036  const T mEndSymbol;
1037  Items mItems;
1038 };
1039 
1040 /*!
1041  * @mainpage Simple Trie
1042  * @section intro_sec Introduction
1043  * This is an implementation of prefix Trie data structure. This implementation is in C++ and using template.
1044  * @section features Features
1045  * Following are the main operations provided by the Trie.
1046  * <ul>
1047  * <li>Adding key, value pair
1048  * <li>Removing an element by key
1049  * <li>Checking the existence of a key
1050  * <li>Retrieving value by key
1051  * <li>Find elements with common prefix
1052  * <li>Iterator
1053  * </ul>
1054  */
1055 
1056 
1057 /*!
1058  * @brief Trie main class
1059  * @tparam T Type for each element in the key
1060  * @tparam V Type of the value that the key will be representing
1061  * @tparam Cmp Comparison functor
1062  * @tparam Items The data structure that represents each node in the Trie.
1063  * Items can be Mezzanine::SetItems<T, V, Cmp> or Mezzanine::VectorItems<T, V, Cmp, Max>,
1064  * Max is the integer representing number of elements in each Trie node.
1065  *
1066  * @section usage_sec Usage of the Trie
1067  * @subsection usage_declaration Declarating the Trie
1068  * A Trie with key as chars and value as std::string can be declared as given below
1069  * @code
1070  * #include <string>
1071  * #include <trie.h>
1072  *
1073  * int main(int argc, char ** argv) {
1074  *
1075  * Mezzanine::Trie<char, std::string> dictionary('$');
1076  *
1077  * return 0;
1078  * }
1079  * @endcode
1080  *
1081  * @subsection usage_population Populatiion and deletion from the Trie
1082  * Trie can be populated by using the Trie::insert method and element can be removed using Trie::erase.
1083  * @code
1084  * #include <trie.h>
1085  * #include <string>
1086  * #include <iostream>
1087  *
1088  * int main(int argc, char ** argv) {
1089  *
1090  * Mezzanine::Trie<char, std::string> dictionary('$');
1091  *
1092  * // adding key value pair to the Trie
1093  * if (dictionary.insert("karma$", "Destiny or fate, following as effect from cause").second) {
1094  * std::cout << "added karma" << std::endl;
1095  * }
1096  *
1097  * // removing key from the Trie
1098  * if (dictionary.erase("karma$")) {
1099  * std::cout << "removed karma" << std::endl;
1100  * }
1101  *
1102  * return 0;
1103  * }
1104  *
1105  * @endcode
1106  * @subsection usage_retrieval Retrieval of Value
1107  * Value for a key can be retrieved using Trie::get method and
1108  * the existence of the key can be tested using Trie::hasKey method.
1109  * @code
1110  * #include <trie.h>
1111  * #include <string>
1112  * #include <iostream>
1113  *
1114  * int main(int argc, char ** argv) {
1115  *
1116  * Mezzanine::Trie<char, std::string> dictionary('$');
1117  *
1118  * dictionary.insert("karma$", "Destiny or fate, following as effect from cause");
1119  *
1120  * if (dictionary.hasKey("karma$")) {
1121  * std::cout << "key karma found" << std::endl;
1122  * }
1123  * std::string * result = dictionary.get("karma$");
1124  * if (result) {
1125  * std::cout << result->c_str() << std::endl;
1126  * }
1127  *
1128  * return 0;
1129  * }
1130  * @endcode
1131  *
1132  * @subsection usage_searching Searching keys which have common prefix
1133  * Keys which begins with a specific charactars can be retrieved using Trie::startsWith method
1134  * @code
1135  * #include <trie.h>
1136  * #include <string>
1137  * #include <iostream>
1138  * #include <vector>
1139  *
1140  * int main(int argc, char ** argv) {
1141  *
1142  * Mezzanine::Trie<char, std::string> dictionary('\0');
1143  *
1144  * dictionary.insert("karma", "Destiny or fate, following as effect from cause");
1145  * Mezzanine::Trie<char, std::string>::iterator iter = dictionary.startsWith("kar");
1146  *
1147  * for (; iter != dictionary.end(); ++iter) {
1148  * std::cout << iter->first << " : " << iter->second->c_str() << std::endl;
1149  * }
1150  *
1151  * return 0;
1152  * }
1153  * @endcode
1154  *
1155  * @subsection usage_array_of_node Trie with each Node as an array
1156  * Here each node of the Trie is an array. The advantage is that the searching of a symbol in the array takes O(1) time (is constant time).
1157  * The disadvantage is that the array will have empty elements so the space used will more than actually required.
1158  *
1159  * @code
1160  *
1161  * #include <trie.h>
1162  * #include <string>
1163  * #include <iostream>
1164  * #include <vector>
1165  *
1166  * int main(int argc, char ** argv) {
1167  *
1168  * // Here 256 is the size of array in each node
1169  * Mezzanine::Trie<char, std::string, std::less<char>,
1170  * Mezzanine::VectorItems<char, std::string, std::less<char>, 256> > dictionary('$');
1171  *
1172  * return 0;
1173  * }
1174  * @endcode
1175  *
1176  * @subsection usage_vector_item Efficient use of Trie for alphabets
1177  * Below example shows how to use Trie to operate on alphabets efficiently.
1178  * Here VectorItems is used to store alphabets with size of 28 (26 alphabets + space + end symbol).
1179  *
1180  * @code
1181  * #include <trie.h>
1182  * #include <string>
1183  * #include <iostream>
1184  * #include <vector>
1185  * #include <cctype>
1186  *
1187  * class TrieCaseInsensitiveCompare
1188  * {
1189  * public:
1190  * bool operator()(char v1, char v2) {
1191  * int i1 = std::tolower(v1);
1192  * int i2 = std::tolower(v2);
1193  * return i1 < i2;
1194  * }
1195  * };
1196  *
1197  * // key to vector index converter
1198  * // case insensitive and includes alphabets, space and end symbol
1199  * class AlphaToIndex
1200  * {
1201  * public:
1202  * unsigned int operator()(const char & c) const {
1203  * unsigned int index = 0;
1204  * if (c == ' ') {
1205  * index = 27;
1206  * } else if (c >= 'A' && c <= 'Z') {
1207  * index = c - 'A' + 1;
1208  * } else if (c >= 'a' && c <= 'z') {
1209  * index = c - 'a' + 1;
1210  * }
1211  * return index;
1212  * }
1213  * };
1214  *
1215  * int main(int argc, char ** argv) {
1216  *
1217  * Mezzanine::Trie<char, std::string,
1218  * TrieCaseInsensitiveCompare,
1219  * Mezzanine::VectorItems<char, std::string, TrieCaseInsensitiveCompare, 28, AlphaToIndex>
1220  * > dictionary('\0');
1221  *
1222  * // adding key value pair to the Trie
1223  * if (dictionary.insert("karma", "Destiny or fate, following as effect from cause").second) {
1224  * std::cout << "added karma" << std::endl;
1225  * }
1226  *
1227  * // removing key from the Trie
1228  * if (dictionary.erase("karma")) {
1229  * std::cout << "removed karma" << std::endl;
1230  * }
1231  *
1232  * return 0;
1233  * }
1234  *
1235  * @endcode
1236  *
1237  * @subsection usage_set_of_node Trie with each Node as a set
1238  * Here each node will be an ordered set.
1239  * Here there will be no extra usage of space but search for a symbol in the node takes logarithmic time.
1240  * Trie with this feature can also be used for caseinsensitive symbol handling.
1241  * @code
1242  *
1243  * #include <trie.h>
1244  * #include <string>
1245  * #include <iostream>
1246  * #include <set>
1247  *
1248  * int main(int argc, char ** argv) {
1249  *
1250  * Mezzanine::Trie<char, std::string, std::less<char>,
1251  * Mezzanine::SetItems<char, std::string, std::less<char> > > dictionary('$');
1252  *
1253  * return 0;
1254  * }
1255  * @endcode
1256  * @subsection usage_iterator Using Trie::iterator
1257  * Trie iterator can be used the same way as STL iterator.
1258  * Key and value can be accessed from iterator using first and secod member.
1259  * first is vector of key characters and second is a pointer to the value.
1260  * @code
1261  * #include <trie.h>
1262  * #include <string>
1263  * #include <iostream>
1264  * #include <vector>
1265  *
1266  * int main(int argc, char ** argv) {
1267  *
1268  * Mezzanine::Trie<char, std::string> dictionary('\0');
1269  *
1270  * dictionary.insert("karma$", "Destiny or fate, following as effect from cause");
1271  *
1272  * Mezzanine::Trie<char, std::string>::iterator iter = dictionary.begin();
1273  * Mezzanine::Trie<char, std::string>::iterator iend = dictionary.end();
1274  *
1275  * for (; iter != iend; ++iter) {
1276  *
1277  * std::cout << iter->first << " " << iter->second->c_str() << std::endl;
1278  * }
1279  *
1280  * return 0;
1281  * }
1282  * @endcode
1283  */
1284 template < typename T,
1285 typename V,
1286 typename Cmp = std::less<T>,
1287 typename Items = SetItems<T, V, Cmp> > class Trie
1288 {
1289 public:
1290  typedef typename Node<T, V, Cmp, Items>::iterator iterator;
1292 
1293 public:
1294  /*!
1295  * @param endSymbol The symbol which marks the end of key input
1296  */
1297  Trie(const T &endSymbol)
1298  : mRoot(endSymbol) {}
1299 
1300  /*!
1301  * Add a key with value in to the Trie
1302  * @param key Key which should be inserted, should be terminated by 'end' symbol
1303  * @param value The value that is to be set with the key
1304  * @return An std::pair with pair::first set to the iterator points to the element and pair::second to true is key is newly inserted, false otherwise
1305  */
1306  std::pair<iterator, bool> insert(const T *key, V const &value) {
1307  return mRoot.insert(key, value);
1308  }
1309 
1310  /*!
1311  * Add a key with value in to the Trie
1312  * @param key Key which should be inserted, should be terminated by 'end' symbol
1313  * @param value The value that is to be set with the key
1314  * @return An std::pair with pair::first set to the iterator points to the element and pair::second to true is key is newly inserted, false otherwise
1315  */
1316  std::pair<iterator, bool> insert(const std::vector<T>& key, V const &value) {
1317  /// @todo Currently this uses a hack that isn't safe for all types and circumstances. This should be changed to use "data()" function member new to C++11 when we make that dive.
1318  return this->insert(&key[0], value);
1319  }
1320 
1321  /*!
1322  * Remove the entry with the given key from the Trie
1323  * @param key Key which should be erased, should be terminated by 'end' symbol
1324  * @return true if the given key is erased from the Trie, false otherwise
1325  */
1326  bool erase(const T *key) {
1327  return mRoot.erase(key);
1328  }
1329 
1330  /*!
1331  * Remove the entry with the given key from the Trie
1332  * @param key Key which should be erased, should be terminated by 'end' symbol
1333  * @return true if the given key is erased from the Trie, false otherwise
1334  */
1335  bool erase(const std::vector<T>& key) {
1336  /// @todo Currently this uses a hack that isn't safe for all types and circumstances. This should be changed to use "data()" function member new to C++11 when we make that dive.
1337  return this->erase(&key[0]);
1338  }
1339 
1340  /*!
1341  * Remove the entry with the given key from the Trie
1342  * @param pos iterator pointing to a single element to be removed from the Trie
1343  * @return true if the given key is erased form the Trie, false otherwise
1344  */
1345  bool erase(iterator pos) {
1346  return mRoot.erase(pos);
1347  }
1348 
1349  /*!
1350  * Retrieves the value for the given key
1351  * @param key Key to be searched for, should be terminated by 'end' symbol
1352  * @return Constant pointer to value for the given key, 0 on failure
1353  */
1354  const V *get(const T *key) const {
1355  return mRoot.get(key);
1356  }
1357 
1358  /*!
1359  * Retrieves the value for the given key
1360  * @param key Key to be searched for, should be terminated by 'end' symbol
1361  * @return Constant pointer to value for the given key, 0 on failure
1362  */
1363  const V *get(const std::vector<T>& key) const {
1364  /// @todo Currently this uses a hack that isn't safe for all types and circumstances. This should be changed to use "data()" function member new to C++11 when we make that dive.
1365  return this->get(&key[0]);
1366  }
1367 
1368  /*!
1369  * Retrieves the value for the given key
1370  * @param key Key to be searched for, should be terminated by 'end' symbol
1371  * @return Pointer to value for the given key, 0 on failure
1372  */
1373  V *get(const T *key) {
1374  return mRoot.get(key);
1375  }
1376 
1377  /*!
1378  * Retrieves the value for the given key
1379  * @param key Key to be searched for, should be terminated by 'end' symbol
1380  * @return Pointer to value for the given key, 0 on failure
1381  */
1382  V *get(const std::vector<T>& key) {
1383  /// @todo Currently this uses a hack that isn't safe for all types and circumstances. This should be changed to use "data()" function member new to C++11 when we make that dive.
1384  return this->get(&key[0]);
1385  }
1386 
1387  /*!
1388  * Retrieves the value for the given key,
1389  * If key does not match the key of any element in the Trie,
1390  * the function inserts a new element with that key and returns a reference to its mapped value
1391  * @param key Key to be searched for, should be terminated by 'end' symbol
1392  * @return Reference to value for the given key
1393  */
1394  V &operator[](const T *key) {
1395  return *(insert(key, V()).first->second);
1396  }
1397 
1398  /*!
1399  * Retrieves the value for the given key,
1400  * If key does not match the key of any element in the Trie,
1401  * the function inserts a new element with that key and returns a reference to its mapped value
1402  * @param key Key to be searched for, should be terminated by 'end' symbol
1403  * @return Reference to value for the given key
1404  */
1405  V &operator[](const std::vector<T>& key) {
1406  /// @todo Currently this uses a hack that isn't safe for all types and circumstances. This should be changed to use "data()" function member new to C++11 when we make that dive.
1407  return this->operator[](&key[0]);
1408  }
1409 
1410  /*!
1411  * Checks whether the given key is present in the Trie
1412  * @param key Key to be searched for, should be terminated by 'end' symol
1413  * @return true if the key is present
1414  */
1415  bool hasKey(const T *key) const {
1416  return mRoot.hasKey(key);
1417  }
1418 
1419  /*!
1420  * Checks whether the given key is present in the Trie
1421  * @param key Key to be searched for, should be terminated by 'end' symol
1422  * @return true if the key is present
1423  */
1424  bool hasKey(const std::vector<T>& key) const {
1425  /// @todo Currently this uses a hack that isn't safe for all types and circumstances. This should be changed to use "data()" function member new to C++11 when we make that dive.
1426  return this->hasKey(&key[0]);
1427  }
1428 
1429  /*!
1430  * Test whether Trie is empty
1431  * @return true if the Trie size is 0, false otherwise
1432  */
1433  bool empty() const {
1434  return mRoot.empty();
1435  }
1436 
1437  /*!
1438  * Returns the number of elements in the Trie
1439  * @return Number of key value pair in the Trie
1440  */
1441  unsigned int size() const {
1442  return mRoot.size();
1443  }
1444 
1445  /*!
1446  * All the elements in the Trie are dropped, leaving the Trie with a size of 0.
1447  */
1448  void clear() {
1449  mRoot.clear();
1450  }
1451 
1452  /*!
1453  * Retrieves iterator to the elements with common prefix
1454  * @param prefix Part of the key which should be searched, should be terminated by 'end' symbol
1455  * @return Iterator to the elements with prefix specified in 'prefix'
1456  */
1457  iterator startsWith(const T *prefix) {
1458  return mRoot.startsWith(prefix);
1459  }
1460 
1461  /*!
1462  * Retrieves iterator to the elements with common prefix
1463  * @param prefix Part of the key which should be searched, should be terminated by 'end' symbol
1464  * @return Iterator to the elements with prefix specified in 'prefix'
1465  */
1466  iterator startsWith(const std::vector<T>& prefix) {
1467  /// @todo Currently this uses a hack that isn't safe for all types and circumstances. This should be changed to use "data()" function member new to C++11 when we make that dive.
1468  return this->startsWith(&prefix[0]);
1469  }
1470 
1471  /*!
1472  * Retrieves const_iterator to the elements with common prefix
1473  * @param prefix Part of the key which should be searched, should be terminated by 'end' symbol
1474  * @return const_iterator to the elements with prefix specified in 'prefix'
1475  */
1476  const_iterator startsWith(const T *prefix) const {
1477  return mRoot.startsWith(prefix);
1478  }
1479 
1480  /*!
1481  * Retrieves const_iterator to the elements with common prefix
1482  * @param prefix Part of the key which should be searched, should be terminated by 'end' symbol
1483  * @return const_iterator to the elements with prefix specified in 'prefix'
1484  */
1485  const_iterator startsWith(const std::vector<T>& prefix) const {
1486  /// @todo Currently this uses a hack that isn't safe for all types and circumstances. This should be changed to use "data()" function member new to C++11 when we make that dive.
1487  return this->startsWith(&prefix[0]);
1488  }
1489 
1490  /*!
1491  * Retrieves the end symbol
1492  * @return end symbol
1493  */
1494  T endSymbol() const {
1495  return mRoot.endSymbol();
1496  }
1497 
1498  /*!
1499  * Returns an iterator referring to the first element in the Trie
1500  * @return An iterator to the first element in the Trie
1501  */
1502  iterator begin() {
1503  return mRoot.begin();
1504  }
1505 
1506  /*!
1507  * Returns an iterator referring to the past-the-end element in the Trie
1508  * @return An iterator to the element past the end of the Trie
1509  */
1510  iterator end() {
1511  return mRoot.end();
1512  }
1513 
1514  /*!
1515  * Searches the Trie for an element with 'key' as key
1516  * @param key Key to be searched for, should be terminated by 'end' symbol
1517  * @return Iterator to the element with key 'key' if found, otherwise an iterator to Trie::end
1518  */
1519  iterator find(const T *key) {
1520  return mRoot.find(key);
1521  }
1522 
1523  /*!
1524  * Searches the Trie for an element with 'key' as key
1525  * @param key Key to be searched for, should be terminated by 'end' symbol
1526  * @return Iterator to the element with key 'key' if found, otherwise an iterator to Trie::end
1527  */
1528  iterator find(const std::vector<T>& key) {
1529  /// @todo Currently this uses a hack that isn't safe for all types and circumstances. This should be changed to use "data()" function member new to C++11 when we make that dive.
1530  return this->find(&key[0]);
1531  }
1532 
1533  /*!
1534  * Searches the Trie for an element with 'key' as key
1535  * @param key Key to be searched for, should be terminated by 'end' symbol
1536  * @return const_iterator to the element with key 'key' if found, otherwise an const_iterator to Trie::end
1537  */
1538  const_iterator find(const T *key) const {
1539  return mRoot.find(key);
1540  }
1541 
1542  /*!
1543  * Searches the Trie for an element with 'key' as key
1544  * @param key Key to be searched for, should be terminated by 'end' symbol
1545  * @return const_iterator to the element with key 'key' if found, otherwise an const_iterator to Trie::end
1546  */
1547  const_iterator find(const std::vector<T>& key) const {
1548  return this->find(&key[0]);
1549  }
1550 
1551  /*!
1552  * Returns an constant iterator referring to the first element in the Trie
1553  * @return An constant iterator to the first element in the Trie
1554  */
1555  const_iterator begin() const {
1556  return mRoot.begin();
1557  }
1558 
1559  /*!
1560  * Returns an constant iterator referring to the past-the-end element in the Trie
1561  * @return An constant iterator to the element past the end of the Trie
1562  */
1563  const_iterator end() const {
1564  return mRoot.end();
1565  }
1566 
1567 private:
1568  Trie(Trie const &);
1569  Trie &operator=(Trie const &);
1570 
1571 private:
1572  Node<T, V, Cmp, Items> mRoot;
1573 };
1574 
1575 }
1576 
1577 #endif