Go to the source code of this file.
Classes | |
struct | LineBreakProperties |
struct | LineBreakPropertiesLang |
Typedefs | |
typedef utf32_t(* | get_next_char_t )(const void *, size_t, size_t *) |
Enumerations | |
enum | LineBreakClass { LBP_Undefined, LBP_OP, LBP_CL, LBP_CP, LBP_QU, LBP_GL, LBP_NS, LBP_EX, LBP_SY, LBP_IS, LBP_PR, LBP_PO, LBP_NU, LBP_AL, LBP_ID, LBP_IN, LBP_HY, LBP_BA, LBP_BB, LBP_B2, LBP_ZW, LBP_CM, LBP_WJ, LBP_H2, LBP_H3, LBP_JL, LBP_JV, LBP_JT, LBP_AI, LBP_BK, LBP_CB, LBP_CR, LBP_LF, LBP_NL, LBP_SA, LBP_SG, LBP_SP, LBP_XX } |
Functions | |
utf32_t | lb_get_next_char_utf8 (const utf8_t *s, size_t len, size_t *ip) |
utf32_t | lb_get_next_char_utf16 (const utf16_t *s, size_t len, size_t *ip) |
utf32_t | lb_get_next_char_utf32 (const utf32_t *s, size_t len, size_t *ip) |
void | set_linebreaks (const void *s, size_t len, const char *lang, char *brks, get_next_char_t get_next_char) |
Variables | |
struct LineBreakProperties | lb_prop_default [] |
struct LineBreakPropertiesLang | lb_prop_lang_map [] |
Definitions of internal data structures, declarations of global variables, and function prototypes for the line breaking algorithm.
typedef utf32_t(* get_next_char_t)(const void *, size_t, size_t *) |
Abstract function interface for lb_get_next_char_utf8,
enum LineBreakClass |
Line break classes. This is a direct mapping of Table 1 of Unicode Standard Annex 14, Revision 26.
utf32_t lb_get_next_char_utf8 | ( | const utf8_t * | s, |
size_t | len, | ||
size_t * | ip | ||
) |
Gets the next Unicode character in a UTF-8 sequence. The index will be advanced to the next complete character, unless the end of string is reached in the middle of a UTF-8 sequence.
[in] | s | input UTF-8 string |
[in] | len | length of the string in bytes |
[in,out] | ip | pointer to the index |
utf32_t lb_get_next_char_utf16 | ( | const utf16_t * | s, |
size_t | len, | ||
size_t * | ip | ||
) |
Gets the next Unicode character in a UTF-16 sequence. The index will be advanced to the next complete character, unless the end of string is reached in the middle of a UTF-16 surrogate pair.
[in] | s | input UTF-16 string |
[in] | len | length of the string in words |
[in,out] | ip | pointer to the index |
utf32_t lb_get_next_char_utf32 | ( | const utf32_t * | s, |
size_t | len, | ||
size_t * | ip | ||
) |
Gets the next Unicode character in a UTF-32 sequence. The index will be advanced to the next character.
[in] | s | input UTF-32 string |
[in] | len | length of the string in dwords |
[in,out] | ip | pointer to the index |
void set_linebreaks | ( | const void * | s, |
size_t | len, | ||
const char * | lang, | ||
char * | brks, | ||
get_next_char_t | get_next_char | ||
) |
Sets the line breaking information for a generic input string.
[in] | s | input string |
[in] | len | length of the input |
[in] | lang | language of the input |
[out] | brks | pointer to the output breaking data, containing #LINEBREAK_MUSTBREAK, LINEBREAK_ALLOWBREAK, #LINEBREAK_NOBREAK, |
or #LINEBREAK_INSIDEACHAR
[in] | get_next_char | function to get the next UTF-32 character |
struct LineBreakProperties lb_prop_default[] |
Default line breaking properties as from the Unicode Web site.
struct LineBreakPropertiesLang lb_prop_lang_map[] |
Association data of language-specific line breaking properties with language names. This is the definition for the static data in this file. If you want more flexibility, or do not need the data here, you may want to redefine lb_prop_lang_map in your C source file.