00001 /* vim: set tabstop=4 shiftwidth=4: */ 00002 00003 /* 00004 * Line breaking in a Unicode sequence. Designed to be used in a 00005 * generic text renderer. 00006 * 00007 * Copyright (C) 2008-2011 Wu Yongwei <wuyongwei at gmail dot com> 00008 * 00009 * This software is provided 'as-is', without any express or implied 00010 * warranty. In no event will the author be held liable for any damages 00011 * arising from the use of this software. 00012 * 00013 * Permission is granted to anyone to use this software for any purpose, 00014 * including commercial applications, and to alter it and redistribute 00015 * it freely, subject to the following restrictions: 00016 * 00017 * 1. The origin of this software must not be misrepresented; you must 00018 * not claim that you wrote the original software. If you use this 00019 * software in a product, an acknowledgement in the product 00020 * documentation would be appreciated but is not required. 00021 * 2. Altered source versions must be plainly marked as such, and must 00022 * not be misrepresented as being the original software. 00023 * 3. This notice may not be removed or altered from any source 00024 * distribution. 00025 * 00026 * The main reference is Unicode Standard Annex 14 (UAX #14): 00027 * <URL:http://www.unicode.org/reports/tr14/> 00028 * 00029 * When this library was designed, this annex was at Revision 19, for 00030 * Unicode 5.0.0: 00031 * <URL:http://www.unicode.org/reports/tr14/tr14-19.html> 00032 * 00033 * This library has been updated according to Revision 26, for 00034 * Unicode 6.0.0: 00035 * <URL:http://www.unicode.org/reports/tr14/tr14-26.html> 00036 * 00037 * The Unicode Terms of Use are available at 00038 * <URL:http://www.unicode.org/copyright.html> 00039 */ 00040 00055 #define EOS 0xFFFF 00056 00061 enum LineBreakClass 00062 { 00063 /* This is used to signal an error condition. */ 00064 LBP_Undefined, 00066 /* The following break classes are treated in the pair table. */ 00067 LBP_OP, 00068 LBP_CL, 00069 LBP_CP, 00070 LBP_QU, 00071 LBP_GL, 00072 LBP_NS, 00073 LBP_EX, 00074 LBP_SY, 00075 LBP_IS, 00076 LBP_PR, 00077 LBP_PO, 00078 LBP_NU, 00079 LBP_AL, 00080 LBP_ID, 00081 LBP_IN, 00082 LBP_HY, 00083 LBP_BA, 00084 LBP_BB, 00085 LBP_B2, 00086 LBP_ZW, 00087 LBP_CM, 00088 LBP_WJ, 00089 LBP_H2, 00090 LBP_H3, 00091 LBP_JL, 00092 LBP_JV, 00093 LBP_JT, 00095 /* The following break classes are not treated in the pair table */ 00096 LBP_AI, 00097 LBP_BK, 00098 LBP_CB, 00099 LBP_CR, 00100 LBP_LF, 00101 LBP_NL, 00102 LBP_SA, 00103 LBP_SG, 00104 LBP_SP, 00105 LBP_XX 00106 }; 00107 00112 struct LineBreakProperties 00113 { 00114 utf32_t start; 00115 utf32_t end; 00116 enum LineBreakClass prop; 00117 }; 00118 00123 struct LineBreakPropertiesLang 00124 { 00125 const char *lang; 00126 size_t namelen; 00127 struct LineBreakProperties *lbp; 00128 }; 00129 00134 typedef utf32_t (*get_next_char_t)(const void *, size_t, size_t *); 00135 00136 /* Declarations */ 00137 extern struct LineBreakProperties lb_prop_default[]; 00138 extern struct LineBreakPropertiesLang lb_prop_lang_map[]; 00139 00140 /* Function Prototype */ 00141 utf32_t lb_get_next_char_utf8(const utf8_t *s, size_t len, size_t *ip); 00142 utf32_t lb_get_next_char_utf16(const utf16_t *s, size_t len, size_t *ip); 00143 utf32_t lb_get_next_char_utf32(const utf32_t *s, size_t len, size_t *ip); 00144 void set_linebreaks( 00145 const void *s, 00146 size_t len, 00147 const char *lang, 00148 char *brks, 00149 get_next_char_t get_next_char);