MezzanineEngine 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
unicode.cpp
Go to the documentation of this file.
1 // The UTF8 Conversion Library is a small library aiding in the converstion from raw text to UTF8 text.
2 // © Copyright 2010 - 2013 BlackTopp Studios Inc.
3 /* This file is part of The UTF8 Conversion Library.
4 
5  The UTF8 Conversion Library is free software: you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation, either version 3 of the License, or
8  (at your option) any later version.
9 
10  The UTF8 Conversion Library is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with The UTF8 Conversion Library. If not, see <http://www.gnu.org/licenses/>.
17 */
18 /* The original authors have included a copy of the license specified above in the
19  'doc' folder. See 'gpl.txt'
20 */
21 /* We welcome the use of the UTF8 Conversion Library to anyone, including companies who wish to
22  Build professional software and charge for their product.
23 
24  However there are some practical restrictions, so if your project involves
25  any of the following you should contact us and we will try to work something
26  out:
27  - DRM or Copy Protection of any kind(except Copyrights)
28  - Software Patents You Do Not Wish to Freely License
29  - Any Kind of Linking to Non-GPL licensed Works
30  - Are Currently In Violation of Another Copyright Holder's GPL License
31  - If You want to change our code and not add a few hundred MB of stuff to
32  your distribution
33 
34  These and other limitations could cause serious legal problems if you ignore
35  them, so it is best to simply contact us or the Free Software Foundation, if
36  you have any questions.
37 
38  Joseph Toppi - toppij@gmail.com
39  John Blackwood - makoenergy02@gmail.com
40 */
41 
42 #ifndef _unicode_cpp
43 #define _unicode_cpp
44 
45 #include "unicode.h"
46 
47 using namespace std;
48 
49 /// @file
50 /// @brief This contains simple tools for indexing with UTF8 characters swiftly
51 
52 namespace Mezzanine
53 {
54  namespace Unicode
55  {
56  namespace
57  {
58  /// @internal
59  /// Checks following bytes for UTF8 validity.
60  bool CheckAsFollowingBytes(Int32 HowMany, const char* FirstChar)
61  {
62  for(Int32 Count = 1; HowMany>Count; Count++)
63  {
64  if ( (*(FirstChar+Count) & High2Bit) != High1Bit)
65  { return false; }
66  }
67  return true;
68  }
69  }
70 
71  String AsBitString(Int32 IntToPrint)
72  {
73  string Results;
74  for(int Counter=31; Counter>=0; Counter--)
75  {
76  if ( ((Counter+1)%8)==0 && Counter!=31)
77  { Results += " "; }
78  if ( (1<<Counter) & IntToPrint )
79  { Results += "1"; }
80  else
81  { Results += "0"; }
82  }
83  return Results;
84  }
85 
86  Int32 GetIntFromCharacter(Int32& BytesUsed, const char* CurrentCharacter)
87  {
88 
89  if( (*CurrentCharacter & High1Bit) == 0)
90  {
91  BytesUsed=1;
92  char Results = *CurrentCharacter;
93  return Results;
94  }
95 
96  for(Int32 Counter=2; Counter<7; Counter++)
97  {
98  if((*CurrentCharacter & IterableHighBits[Counter+1]) == IterableHighBits[Counter])
99  {
100  if( CheckAsFollowingBytes(Counter-1, CurrentCharacter) )
101  {
102  BytesUsed=Counter;
103  Int32 Results = (CurrentCharacter[0] & IterableLowBits[8-Counter]) << (6*(Counter-1));
104  for(Int32 Remains = 1; Remains<Counter; Remains++)
105  { Results |= (CurrentCharacter[Remains] & Low6Bit) << (6*(Counter-Remains-1)); }
106  return Results;
107  }
108  return -1;
109  }
110  }
111 
112  return -1;
113  }
114 
115  ///
116  Int32 GetCharacterFromInt(char* Destination, Int32 BytesUsable, Int32 ByteSequence)
117  {
118  ///
119  if(ByteSequence<UTF8ByteRange1Max)
120  {
121  //1 byte or invalid
122  if(0>ByteSequence)
123  { return -1; }
124  else
125  {
126  // 1 byte
127  if (1>BytesUsable)
128  { return -1; }
129  char Ascii = (char)ByteSequence;
130  Destination[0]=Ascii;
131  return 1;
132  }
133  }
134  else
135  {
136  //more than one
137  if(ByteSequence<UTF8ByteRange2Max)
138  {
139  // 2 bytes
140  if (2>BytesUsable)
141  { return -1; }
142  Int32 Results = UTF8Null2ByteBase;
143  Results |= (ByteSequence & Int32(Low6Bit));
144  Results |= (ByteSequence & (Int32(Low5Bit)<<6) ) << 2;
145  char* Bytes = (char*)&Results;
146  Destination[0]=Bytes[1];
147  Destination[1]=Bytes[0];
148  return 2;
149  }
150  else
151  {
152  if(ByteSequence<UTF8ByteRange3Max)
153  {
154  // 3 or more
155  if (3>BytesUsable)
156  { return -1; }
157  Int32 Results = UTF8Null3ByteBase;
158  Results |= (ByteSequence & Int32(Low6Bit));
159  Results |= (ByteSequence & (Int32(Low6Bit)<<6) ) << 2;
160  Results |= (ByteSequence & (Int32(Low4Bit)<<12) ) << 4;
161  char* Bytes = (char*)&Results;
162  Destination[0]=Bytes[2];
163  Destination[1]=Bytes[1];
164  Destination[2]=Bytes[0];
165  return 3;
166  }
167  else
168  {
169  if(ByteSequence<UTF8ByteRange4Max)
170  {
171  // 4 or more
172  if (4>BytesUsable)
173  { return -1; }
174  Int32 Results = UTF8Null4ByteBase;
175  Results |= (ByteSequence & Int32(Low6Bit));
176  Results |= (ByteSequence & (Int32(Low6Bit)<<6) ) << 2;
177  Results |= (ByteSequence & (Int32(Low6Bit)<<12) ) << 4;
178  Results |= (ByteSequence & (Int32(Low3Bit)<<18) ) << 6;
179  char* Bytes = (char*)&Results;
180  Destination[0]=Bytes[3];
181  Destination[1]=Bytes[2];
182  Destination[2]=Bytes[1];
183  Destination[3]=Bytes[0];
184  return 4;
185  }
186  else
187  { return -1; }
188 
189  }
190  }
191  }
192  return -1;
193  }
194  }//Unicode
195 }//Mezzanine
196 
197 #endif