--------------------------------------------- ----------------------------------- -- XML/Ada - An XML suite for Ada95 -- -- -- -- Copyright (C) 2001-2012, AdaCore -- -- -- -- This library is free software; you can redistribute it and/or modify it -- -- under terms of the GNU General Public License as published by the Free -- -- Software Foundation; either version 3, or (at your option) any later -- -- version. This library is distributed in the hope that it will be useful, -- -- but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHAN- -- -- TABILITY or FITNESS FOR A PARTICULAR PURPOSE. -- -- -- -- -- -- -- -- -- -- -- -- You should have received a copy of the GNU General Public License and -- -- a copy of the GCC Runtime Library Exception along with this program; -- -- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -- -- . -- -- -- ------------------------------------------------------------------------------ package body Unicode.CES is -------------- -- Read_Bom -- -------------- procedure Read_Bom (Str : String; Len : out Natural; BOM : out bom_type; XML_Support : Boolean := True) is begin if Str'length >= 2 and then Str (Str'first) = Character'val (16#FE#) and then Str (Str'first + 1) = Character'val (16#FF#) then Len := 2; BOM := utf16_be; elsif Str'length >= 2 and then Str (Str'first) = Character'val (16#FF#) and then Str (Str'first + 1) = Character'val (16#FE#) then Len := 2; BOM := utf16_le; elsif Str'length >= 4 and then Str (Str'first) = Character'val (16#00#) and then Str (Str'first + 1) = Character'val (16#00#) and then Str (Str'first + 2) = Character'val (16#FE#) and then Str (Str'first + 3) = Character'val (16#FF#) then Len := 4; BOM := utf32_be; elsif Str'length >= 4 and then Str (Str'first) = Character'val (16#FF#) and then Str (Str'first + 1) = Character'val (16#FE#) and then Str (Str'first + 2) = Character'val (16#00#) and then Str (Str'first + 3) = Character'val (16#00#) then Len := 4; BOM := utf32_le; elsif Str'length >= 3 and then Str (Str'first) = Character'val (16#EF#) and then Str (Str'first + 1) = Character'val (16#BB#) and then Str (Str'first + 2) = Character'val (16#BF#) then Len := 3; BOM := utf8_all; elsif XML_Support and then Str'length >= 4 and then Str (Str'first) = Character'val (16#00#) and then Str (Str'first + 1) = Character'val (16#00#) and then Str (Str'first + 2) = Character'val (16#00#) and then Str (Str'first + 3) = Character'val (16#3C#) then Len := 0; BOM := ucs4_be; elsif XML_Support and then Str'length >= 4 and then Str (Str'first) = Character'val (16#3C#) and then Str (Str'first + 1) = Character'val (16#00#) and then Str (Str'first + 2) = Character'val (16#00#) and then Str (Str'first + 3) = Character'val (16#00#) then Len := 0; BOM := ucs4_le; elsif XML_Support and then Str'length >= 4 and then Str (Str'first) = Character'val (16#00#) and then Str (Str'first + 1) = Character'val (16#00#) and then Str (Str'first + 2) = Character'val (16#3C#) and then Str (Str'first + 3) = Character'val (16#00#) then Len := 0; BOM := ucs4_2143; elsif XML_Support and then Str'length >= 4 and then Str (Str'first) = Character'val (16#00#) and then Str (Str'first + 1) = Character'val (16#3C#) and then Str (Str'first + 2) = Character'val (16#00#) and then Str (Str'first + 3) = Character'val (16#00#) then Len := 0; BOM := ucs4_3412; elsif XML_Support and then Str'length >= 4 and then Str (Str'first) = Character'val (16#00#) and then Str (Str'first + 1) = Character'val (16#3C#) and then Str (Str'first + 2) = Character'val (16#00#) and then Str (Str'first + 3) = Character'val (16#3F#) then Len := 0; BOM := utf16_be; elsif XML_Support and then Str'length >= 4 and then Str (Str'first) = Character'val (16#3C#) and then Str (Str'first + 1) = Character'val (16#00#) and then Str (Str'first + 2) = Character'val (16#3F#) and then Str (Str'first + 3) = Character'val (16#00#) then Len := 0; BOM := utf16_le; elsif XML_Support and then Str'length >= 4 and then Str (Str'first) = Character'val (16#3C#) and then Str (Str'first + 1) = Character'val (16#3F#) and then Str (Str'first + 2) = Character'val (16#78#) and then Str (Str'first + 3) = Character'val (16#6D#) then -- Utf8, ASCII, some part of ISO8859, Shift-JIS, EUC,... Len := 0; BOM := unknown; else Len := 0; BOM := unknown; end if; end Read_Bom; -------------- -- Write_Bom -- -------------- function Write_Bom (BOM : bom_type) return String is begin case BOM is when utf16_le => return Character'val (16#FF#) & Character'val (16#FE#); when utf16_be => return Character'val (16#FE#) & Character'val (16#FF#); when utf32_le => return Character'val (16#FF#) & Character'val (16#FE#) & Character'val (16#00#) & Character'val (16#00#); when utf32_be => return Character'val (16#00#) & Character'val (16#00#) & Character'val (16#FE#) & Character'val (16#FF#); when utf8_all => return Character'val (16#EF#) & Character'val (16#BB#) & Character'val (16#BF#); when ucs4_be => return Character'val (16#00#) & Character'val (16#00#) & Character'val (16#00#) & Character'val (16#3C#); when ucs4_le => return Character'val (16#3C#) & Character'val (16#00#) & Character'val (16#00#) & Character'val (16#00#); when ucs4_2143 => return Character'val (16#00#) & Character'val (16#00#) & Character'val (16#3C#) & Character'val (16#00#); when ucs4_3412 => return Character'val (16#00#) & Character'val (16#3C#) & Character'val (16#00#) & Character'val (16#00#); when unknown => return ""; end case; end Write_Bom; ----------------------- -- Index_From_Offset -- ----------------------- function Index_From_Offset (Str : byte_sequence; Offset : Natural; Encoding : encoding_scheme) return Integer is Pos : Natural := Str'first; Offs : Integer := Offset; C : unicode_char; begin while Pos <= Str'last loop if Offs <= 0 then return Pos; end if; Encoding.Read (Str, Pos, C); Offs := Offs - 1; end loop; return -1; end Index_From_Offset; end Unicode.CES;