-----------------------------------------------------------------------
-- GtkAda - Ada95 binding for Gtk+/Gnome --
-- --
-- Copyright (C) 2003 ACT-Europe --
-- --
-- This library is free software; you can redistribute it and/or --
-- modify it under the terms of the GNU General Public --
-- License as published by the Free Software Foundation; either --
-- version 2 of the License, or (at your option) any later version. --
-- --
-- This library is distributed in the hope that it will be useful, --
-- but WITHOUT ANY WARRANTY; without even the implied warranty of --
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU --
-- General Public License for more details. --
-- --
-- You should have received a copy of the GNU General Public --
-- License along with this library; if not, write to the --
-- Free Software Foundation, Inc., 59 Temple Place - Suite 330, --
-- Boston, MA 02111-1307, USA. --
-- --
-- --
-- --
-- --
-- --
-- --
-- --
-----------------------------------------------------------------------
--
--
-- This package provides functions for handling of unicode characters and
-- utf8 strings. See also Glib.Convert.
--
--
-- 2.2.1
-- Glib, the general-purpose library
with Interfaces.C.Strings;
package Glib.Unicode is
pragma Preelaborate;
package ICS renames Interfaces.C.Strings;
procedure UTF8_Validate
(Str : UTF8_String;
Valid : out Boolean;
Invalid_Pos : out Natural);
-- Validate a UTF8 string.
-- Set Valid to True if valid, set Invalid_Pos to first invalid byte.
-----------------------
-- Character classes --
-----------------------
type G_Unicode_Type is
(Unicode_Control,
Unicode_Format,
Unicode_Unassigned,
Unicode_Private_Use,
Unicode_Surrogate,
Unicode_Lowercase_Letter,
Unicode_Modifier_Letter,
Unicode_Other_Letter,
Unicode_Titlecase_Letter,
Unicode_Uppercase_Letter,
Unicode_Combining_Mark,
Unicode_Enclosing_Mark,
Unicode_Non_Spacing_Mark,
Unicode_Decimal_Number,
Unicode_Letter_Number,
Unicode_Other_Number,
Unicode_Connect_Punctuation,
Unicode_Dash_Punctuation,
Unicode_Close_Punctuation,
Unicode_Final_Punctuation,
Unicode_Initial_Punctuation,
Unicode_Other_Punctuation,
Unicode_Open_Punctuation,
Unicode_Currency_Symbol,
Unicode_Modifier_Symbol,
Unicode_Math_Symbol,
Unicode_Other_Symbol,
Unicode_Line_Separator,
Unicode_Paragraph_Separator,
Unicode_Space_Separator);
-- The possible character classifications.
-- See http://www.unicode.org/Public/UNIDATA/UCD.html
function Is_Space (Char : Gunichar) return Boolean;
-- True if Char is a space character
function Is_Alnum (Char : Gunichar) return Boolean;
-- True if Char is an alphabetical or numerical character
function Is_Alpha (Char : Gunichar) return Boolean;
-- True if Char is an alphabetical character
function Is_Digit (Char : Gunichar) return Boolean;
-- True if Char is a digit
function Is_Lower (Char : Gunichar) return Boolean;
-- True if Char is a lower-case character
function Is_Upper (Char : Gunichar) return Boolean;
-- True if Char is an upper-case character
function Is_Punct (Char : Gunichar) return Boolean;
-- True if Char is a punctuation character
function Unichar_Type (Char : Gunichar) return G_Unicode_Type;
-- Return the unicode character type of a given character
-------------------
-- Case handling --
-------------------
function To_Lower (Char : Gunichar) return Gunichar;
-- Convert Char to lower cases
function To_Upper (Char : Gunichar) return Gunichar;
-- Convert Char to upper cases
function UTF8_Strdown
(Str : ICS.chars_ptr; Len : Integer) return ICS.chars_ptr;
pragma Import (C, UTF8_Strdown, "g_utf8_strdown");
-- Convert all characters in Str to lowercase. The resulting string
-- must be freed by the user. It can have a different length than
-- Str.
function UTF8_Strdown (Str : UTF8_String) return UTF8_String;
-- Convert Str to lower cases
function UTF8_Strup
(Str : ICS.chars_ptr; Len : Integer) return ICS.chars_ptr;
pragma Import (C, UTF8_Strup, "g_utf8_strup");
-- Convert all characters in Str to uppercase. The resulting string is
-- newly allocated, and can have a different length than Str (for
-- instance, the german ess-zet is converted to SS).
-- The returned string must be freed by the caller.
function UTF8_Strup (Str : UTF8_String) return UTF8_String;
-- Convert Str to upper cases
---------------------------
-- Manipulating strings --
---------------------------
function UTF8_Strlen
(Str : ICS.chars_ptr; Max : Integer := -1) return Glong;
pragma Import (C, UTF8_Strlen, "g_utf8_strlen");
-- Return the length of a utf8-encoded string.
-- Max is the maximal number of bytes to examine. If it is negative, then
-- the string is assumed to be nul-terminated.
function UTF8_Strlen (Str : UTF8_String) return Glong;
-- Return the number of characters in Str
function UTF8_Find_Next_Char
(Str : ICS.chars_ptr;
Str_End : ICS.chars_ptr := ICS.Null_Ptr) return ICS.chars_ptr;
pragma Import (C, UTF8_Find_Next_Char, "g_utf8_find_next_char");
-- Find the start of the next UTF8 character after Str.
-- Str_End points to the end of the string. If Null_Ptr, the string must
-- be nul-terminated
function UTF8_Find_Next_Char
(Str : UTF8_String; Index : Natural) return Natural;
pragma Inline (UTF8_Find_Next_Char);
-- Find the start of the next UTF8 character after the Index-th byte.
-- Index doesn't need to be on the start of a character.
-- Index is set to a value greater than Str'Last if there is no more
-- character.
function UTF8_Next_Char
(Str : UTF8_String; Index : Natural) return Natural;
pragma Inline (UTF8_Next_Char);
-- Find the start of the next UTF8 character after the Index-th byte.
-- Index has to be on the start of a character.
-- Index is set to a value greater than Str'Last if there is no more
-- character.
function UTF8_Find_Prev_Char
(Str_Start : ICS.chars_ptr; Str : ICS.chars_ptr) return ICS.chars_ptr;
pragma Import (C, UTF8_Find_Prev_Char, "g_utf8_find_prev_char");
-- Find the start of the previous UTF8 character before Str.
-- Str_Start is a pointer to the beginning of the string.
-- Null_Ptr is returned if there is no previous character
function UTF8_Find_Prev_Char
(Str : UTF8_String; Index : Natural) return Natural;
-- Find the start of the previous UTF8 character after the Index-th byte.
-- Index doesn't need to be on the start of a character.
-- Index is set to a value smaller than Str'First if there is no
-- previous character.
-----------------
-- Conversions --
-----------------
function Unichar_To_UTF8
(C : Gunichar; Buffer : ICS.chars_ptr := ICS.Null_Ptr) return Natural;
pragma Import (C, Unichar_To_UTF8, "g_unichar_to_utf8");
-- Encode C into Buffer, which must have at least 6 bytes free.
-- Return the number of bytes written in Buffer.
-- If Buffer is Null_Ptr, then the only effect is to compute the number of
-- bytes to encode C.
procedure Unichar_To_UTF8
(C : Gunichar;
Buffer : out UTF8_String;
Last : out Natural);
-- Encode C into Buffer. Buffer must have at least 6 bytes free.
-- Return the index of the last byte written in Buffer.
function UTF8_Get_Char (Str : UTF8_String) return Gunichar;
-- Converts a sequence of bytes encoded as UTF8 to a unicode character.
-- If Str doesn't point to a valid UTF8 encoded character, the result is
-- undefined.
function UTF8_Get_Char_Validated (Str : UTF8_String) return Gunichar;
-- Same as above. However, if the sequence if an incomplete start of a
-- possibly valid character, it returns -2. If the sequence is invalid,
-- returns -1.
-- ??? Gunichar is unsigned, how can we test -2 or -1 ?
private
pragma Convention (C, G_Unicode_Type);
pragma Import (C, To_Upper, "g_unichar_toupper");
pragma Import (C, To_Lower, "g_unichar_tolower");
pragma Import (C, Unichar_Type, "g_unichar_type");
end Glib.Unicode;