--------------------------------------------------------------- ----------------- -- XML/Ada - An XML suite for Ada95 -- -- -- -- Copyright (C) 2001-2012, AdaCore -- -- -- -- This library is free software; you can redistribute it and/or modify it -- -- under terms of the GNU General Public License as published by the Free -- -- Software Foundation; either version 3, or (at your option) any later -- -- version. This library is distributed in the hope that it will be useful, -- -- but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHAN- -- -- TABILITY or FITNESS FOR A PARTICULAR PURPOSE. -- -- -- -- -- -- -- -- -- -- -- -- You should have received a copy of the GNU General Public License and -- -- a copy of the GCC Runtime Library Exception along with this program; -- -- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -- -- . -- -- -- ------------------------------------------------------------------------------ -- This is the root of the hierarchy that provides different encoding -- schemes. -- Each of the child package provides function to convert to and from -- Utf32-encoded string, which thus acts as the central encoding scheme. -- In some cases, the string can be preceded by a BOM (byte-order mark), -- that indicates the encoding used and the byte-order used for the -- encoding. This BOM is always optional, and can never be mixed up with -- other characters. with Unchecked_Deallocation; package Unicode.CES is ------------------- -- Byte sequence -- ------------------- subtype byte_sequence is String; type byte_sequence_access is access all byte_sequence; type cst_byte_sequence_access is access constant byte_sequence; -- A sequence of bytes. The encoding is unknown. procedure Free is new Unchecked_Deallocation (byte_sequence, byte_sequence_access); ------------------------- -- Byte order handling -- ------------------------- type byte_order is (high_byte_first, low_byte_first); -- Order of bytes in word machines. Default_Byte_Order : constant byte_order := low_byte_first; ------------------------------ -- Byte-order mark handling -- ------------------------------ type bom_type is (utf8_all, -- Utf8-encoding utf16_le, -- Utf16 little-endian encoding utf16_be, -- Utf16 big-endian encoding utf32_le, -- Utf32 little-endian encoding utf32_be, -- Utf32 big-endian encoding ucs4_be, -- UCS-4, big endian machine (1234 order) ucs4_le, -- UCS-4, little endian machine (4321 order) ucs4_2143, -- UCS-4, unusual byte order (2143 order) ucs4_3412, -- UCS-4, unusual byte order (3412 order) unknown); -- Unknown, assumed to be ASCII compatible -- the type of encoding used for a string, that can be deduced from the -- BOM. subtype bom_type_utf16 is bom_type range utf16_le .. utf16_be; subtype bom_type_utf32 is bom_type range utf32_le .. utf32_be; procedure Read_Bom (Str : String; Len : out Natural; BOM : out bom_type; XML_Support : Boolean := True); -- Read the optional Byte-Order-Mark at the beginning of the byte -- sequence Str. -- Len will contain the number of characters that made up that BOM, and -- that should be ignored when reading Str. -- If XML_Support is True, then the first four bytes of Str are also -- checked to recognize "