------------------------------------------------------------------------------
--                     XML/Ada - An XML suite for Ada95                     --
--                                                                          --
--                     Copyright (C) 2001-2012, AdaCore                     --
--                                                                          --
-- This library is free software;  you can redistribute it and/or modify it --
-- under terms of the  GNU General Public License  as published by the Free --
-- Software  Foundation;  either version 3,  or (at your  option) any later --
-- version. This library is distributed in the hope that it will be useful, --
-- but WITHOUT ANY WARRANTY;  without even the implied warranty of MERCHAN- --
-- TABILITY or FITNESS FOR A PARTICULAR PURPOSE.                            --
--                                                                          --
--                                                                          --
--                                                                          --
--                                                                          --
--                                                                          --
-- You should have received a copy of the GNU General Public License and    --
-- a copy of the GCC Runtime Library Exception along with this program;     --
-- see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see    --
-- <http://www.gnu.org/licenses/>.                                          --
--                                                                          --
------------------------------------------------------------------------------

--  This package provides support for Utf32-encoding.
--
--  The main advantage to this encoding is that each character is coded on
--  the same number of bytes, ie 4 bytes. It is thus very easy and fast to
--  traverse a byte sequence and get each character.
--
--  On the other hand, this also means that strings are much bigger than what
--  they should be (when using standard ASCII character, for instance, the
--  byte sequence is four times as big as it needs to be).
--  This encoding is also dependent on specific byte-ordering. You should thus
--  always convert your Utf32 strings to little-endian before usage (see
--  To_Utf32_LE below).

with Unchecked_Deallocation;
with Unicode.CCS;

package Unicode.CES.Utf32 is

   -----------
   -- Types --
   -----------

   subtype utf32_string is String;
   type utf32_string_access is access utf32_string;
   --  A UTF32-encoded string. Byte-order is unspecified

   subtype utf32_le_string is utf32_string;
   type utf32_le_string_access is access utf32_le_string;
   --  A Utf32-encoded, little-endian string.

   subtype utf32_be_string is utf32_string;
   --  A Utf32-encoded, big-endian string.

   utf32_char_width : constant := 4;
   --  Number of bytes used to represent every character in Utf32

   -------------------------------------------
   -- Conversion to and from byte sequences --
   -------------------------------------------

   procedure Encode
     (Char   :        unicode_char;
      Output : in out byte_sequence;
      Index  : in out Natural);
   --  Return the byte sequence representing Char in the Utf32 character
   --  encoding form.
   --  The character is encoded in little-endian byte order.
   --  Output must have at least Utf32_Char_Width characters available.

   procedure Read
     (Str   :        utf32_le_string;
      Index : in out Positive;
      Char  :    out unicode_char);
   --  Return the character starting at location Index in Str
   --  Invalid_Encoding is raised if not valid byte sequence starts at Index.
   --  Incomplete_Encoding is raised if there is not enough characters for
   --  a valid encoding.

   procedure Encode_BE
     (Char   :        unicode_char;
      Output : in out byte_sequence;
      Index  : in out Natural);
   --  Return the byte sequence representing Char in the Utf32 character
   --  encoding form.
   --  The character is encoded in big-endian byte order.

   procedure Read_BE
     (Str   :        utf32_be_string;
      Index : in out Positive;
      Char  :    out unicode_char);
   --  Same as Read, but when Str is in big-endian order

   function Width (Char : unicode_char) return Natural;
   --  Return the number of bytes occupied by the Utf32 representation of Char

   function Length (Str : utf32_string) return Natural;
   --  Return the number of characters in Str

   ---------------------------
   -- Byte order conversion --
   ---------------------------

   function To_Unicode_LE
     (Str   : utf32_string;
      Cs    : Unicode.CCS.character_set := Unicode.CCS.Unicode_Character_Set;
      Order : byte_order := Default_Byte_Order) return utf32_le_string;
   --  Convert a string from any byte-order, any character set (CS) to
   --  Unicode little-endian byte sequence
   --  Order is the order in which bytes are coded in Str. This is silently
   --  overriden in case Str has a BOM (byte-order-marker) at the beginning
   --  that specifies an explicit order.
   --  The BOM is removed from the resulting string
   --  Invalid_Encoding is raised if there is a BOM that indicates an
   --  encoding other than Utf32.

   function To_CS
     (Str   : utf32_le_string;
      Cs    : Unicode.CCS.character_set := Unicode.CCS.Unicode_Character_Set;
      Order : byte_order := Default_Byte_Order) return utf32_string;
   --  Convert a Unicode, little-endian string to a string with any byte-order
   --  and a new character set.

   ---------------------
   -- Encoding Scheme --
   ---------------------

   Utf32_LE_Encoding : constant encoding_scheme :=
     (BOM    => utf32_le,
      Read   => Read'access,
      Width  => Width'access,
      Encode => encode_function'(Encode'access),
      Length => Length'access);

   Utf32_BE_Encoding : constant encoding_scheme :=
     (BOM    => utf32_be,
      Read   => Read_BE'access,
      Width  => Width'access,
      Encode => encode_function'(Encode_BE'access),
      Length => Length'access);

   ------------------
   -- Deallocation --
   ------------------

   procedure Free is new Unchecked_Deallocation
     (utf32_string,
      utf32_string_access);
   procedure Free is new Unchecked_Deallocation
     (utf32_le_string,
      utf32_le_string_access);
   --  Free the memory occupied by a utf32-encoded string

private
   pragma inline (Width);
end Unicode.CES.Utf32;