![]() |
|
00001 #ifndef TAGLIB_UNICODE_H 00002 #define TAGLIB_UNICODE_H 00003 00004 /******************************************************************************* 00005 * * 00006 * THIS FILE IS INCLUDED IN TAGLIB, BUT IS NOT COPYRIGHTED BY THE TAGLIB * 00007 * AUTHORS, NOT PART OF THE TAGLIB API AND COULD GO AWAY AT ANY POINT IN TIME. * 00008 * AS SUCH IT SHOULD BE CONSIERED FOR INTERNAL USE ONLY. * 00009 * * 00010 *******************************************************************************/ 00011 00012 #ifndef DO_NOT_DOCUMENT // tell Doxygen not to document this header 00013 00014 /* 00015 * Copyright 2001 Unicode, Inc. 00016 * 00017 * Disclaimer 00018 * 00019 * This source code is provided as is by Unicode, Inc. No claims are 00020 * made as to fitness for any particular purpose. No warranties of any 00021 * kind are expressed or implied. The recipient agrees to determine 00022 * applicability of information provided. If this file has been 00023 * purchased on magnetic or optical media from Unicode, Inc., the 00024 * sole remedy for any claim will be exchange of defective media 00025 * within 90 days of receipt. 00026 * 00027 * Limitations on Rights to Redistribute This Code 00028 * 00029 * Unicode, Inc. hereby grants the right to freely use the information 00030 * supplied in this file in the creation of products supporting the 00031 * Unicode Standard, and to make copies of this file in any form 00032 * for internal or external distribution as long as this notice 00033 * remains attached. 00034 */ 00035 00036 /* 00037 * This file has been modified by Scott Wheeler <wheeler@kde.org> to remove 00038 * the UTF32 conversion functions and to place the appropriate functions 00039 * in their own C++ namespace. 00040 */ 00041 00042 /* --------------------------------------------------------------------- 00043 00044 Conversions between UTF32, UTF-16, and UTF-8. Header file. 00045 00046 Several functions are included here, forming a complete set of 00047 conversions between the three formats. UTF-7 is not included 00048 here, but is handled in a separate source file. 00049 00050 Each of these routines takes pointers to input buffers and output 00051 buffers. The input buffers are const. 00052 00053 Each routine converts the text between *sourceStart and sourceEnd, 00054 putting the result into the buffer between *targetStart and 00055 targetEnd. Note: the end pointers are *after* the last item: e.g. 00056 *(sourceEnd - 1) is the last item. 00057 00058 The return result indicates whether the conversion was successful, 00059 and if not, whether the problem was in the source or target buffers. 00060 (Only the first encountered problem is indicated.) 00061 00062 After the conversion, *sourceStart and *targetStart are both 00063 updated to point to the end of last text successfully converted in 00064 the respective buffers. 00065 00066 Input parameters: 00067 sourceStart - pointer to a pointer to the source buffer. 00068 The contents of this are modified on return so that 00069 it points at the next thing to be converted. 00070 targetStart - similarly, pointer to pointer to the target buffer. 00071 sourceEnd, targetEnd - respectively pointers to the ends of the 00072 two buffers, for overflow checking only. 00073 00074 These conversion functions take a ConversionFlags argument. When this 00075 flag is set to strict, both irregular sequences and isolated surrogates 00076 will cause an error. When the flag is set to lenient, both irregular 00077 sequences and isolated surrogates are converted. 00078 00079 Whether the flag is strict or lenient, all illegal sequences will cause 00080 an error return. This includes sequences such as: <F4 90 80 80>, <C0 80>, 00081 or <A0> in UTF-8, and values above 0x10FFFF in UTF-32. Conformant code 00082 must check for illegal sequences. 00083 00084 When the flag is set to lenient, characters over 0x10FFFF are converted 00085 to the replacement character; otherwise (when the flag is set to strict) 00086 they constitute an error. 00087 00088 Output parameters: 00089 The value "sourceIllegal" is returned from some routines if the input 00090 sequence is malformed. When "sourceIllegal" is returned, the source 00091 value will point to the illegal value that caused the problem. E.g., 00092 in UTF-8 when a sequence is malformed, it points to the start of the 00093 malformed sequence. 00094 00095 Author: Mark E. Davis, 1994. 00096 Rev History: Rick McGowan, fixes & updates May 2001. 00097 Fixes & updates, Sept 2001. 00098 00099 ------------------------------------------------------------------------ */ 00100 00101 /* --------------------------------------------------------------------- 00102 The following 4 definitions are compiler-specific. 00103 The C standard does not guarantee that wchar_t has at least 00104 16 bits, so wchar_t is no less portable than unsigned short! 00105 All should be unsigned values to avoid sign extension during 00106 bit mask & shift operations. 00107 ------------------------------------------------------------------------ */ 00108 00109 /* Some fundamental constants */ 00110 #define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD 00111 #define UNI_MAX_BMP (UTF32)0x0000FFFF 00112 #define UNI_MAX_UTF16 (UTF32)0x0010FFFF 00113 #define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF 00114 00115 namespace Unicode { 00116 00117 typedef unsigned long UTF32; /* at least 32 bits */ 00118 typedef unsigned short UTF16; /* at least 16 bits */ 00119 typedef unsigned char UTF8; /* typically 8 bits */ 00120 typedef unsigned char Boolean; /* 0 or 1 */ 00121 00122 typedef enum { 00123 conversionOK = 0, /* conversion successful */ 00124 sourceExhausted = 1, /* partial character in source, but hit end */ 00125 targetExhausted = 2, /* insuff. room in target for conversion */ 00126 sourceIllegal = 3 /* source sequence is illegal/malformed */ 00127 } ConversionResult; 00128 00129 typedef enum { 00130 strictConversion = 0, 00131 lenientConversion 00132 } ConversionFlags; 00133 00134 ConversionResult ConvertUTF8toUTF16 ( 00135 const UTF8** sourceStart, const UTF8* sourceEnd, 00136 UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags); 00137 00138 ConversionResult ConvertUTF16toUTF8 ( 00139 const UTF16** sourceStart, const UTF16* sourceEnd, 00140 UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags); 00141 00142 Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd); 00143 00144 } // namespace Unicode 00145 00146 /* --------------------------------------------------------------------- */ 00147 00148 #endif 00149 #endif