------------------------------------------------------------------------------ -- XML/Ada - An XML suite for Ada95 -- -- -- -- Copyright (C) 2001-2012, AdaCore -- -- -- -- This library is free software; you can redistribute it and/or modify it -- -- under terms of the GNU General Public License as published by the Free -- -- Software Foundation; either version 3, or (at your option) any later -- -- version. This library is distributed in the hope that it will be useful, -- -- but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHAN- -- -- TABILITY or FITNESS FOR A PARTICULAR PURPOSE. -- -- -- -- -- -- -- -- -- -- -- -- You should have received a copy of the GNU General Public License and -- -- a copy of the GCC Runtime Library Exception along with this program; -- -- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -- -- . -- -- -- ------------------------------------------------------------------------------ with Unicode.CCS; use Unicode.CCS; package body Unicode.CES.Utf32 is ------------ -- Encode -- ------------ procedure Encode (Char : unicode_char; Output : in out byte_sequence; Index : in out Natural) is begin Output (Index + 1) := Character'val (Char and 16#000000FF#); Output (Index + 2) := Character'val ((Char and 16#0000FF00#) / (2**8)); Output (Index + 3) := Character'val ((Char and 16#00FF0000#) / (2**16)); Output (Index + 4) := Character'val ((Char and 16#FF000000#) / (2**24)); Index := Index + 4; end Encode; --------------- -- Encode_BE -- --------------- procedure Encode_BE (Char : unicode_char; Output : in out byte_sequence; Index : in out Natural) is begin Output (Index + 1) := Character'val ((Char and 16#FF000000#) / (2**24)); Output (Index + 2) := Character'val ((Char and 16#00FF0000#) / (2**16)); Output (Index + 3) := Character'val ((Char and 16#0000FF00#) / (2**8)); Output (Index + 4) := Character'val (Char and 16#000000FF#); Index := Index + 4; end Encode_BE; ---------- -- Read -- ---------- procedure Read (Str : utf32_le_string; Index : in out Positive; Char : out unicode_char) is begin if Index > Str'last - 3 then raise Incomplete_Encoding; else Char := Character'pos (Str (Index)) + Character'pos (Str (Index + 1)) * (2**8) + Character'pos (Str (Index + 2)) * (2**16) + Character'pos (Str (Index + 3)) * (2**24); Index := Index + 4; end if; end Read; ------------- -- Read_BE -- ------------- procedure Read_BE (Str : utf32_be_string; Index : in out Positive; Char : out unicode_char) is begin if Index > Str'last - 3 then raise Incomplete_Encoding; else Char := Character'pos (Str (Index + 3)) + Character'pos (Str (Index + 2)) * (2**8) + Character'pos (Str (Index + 1)) * (2**16) + Character'pos (Str (Index)) * (2**24); Index := Index + 4; end if; end Read_BE; ----------- -- Width -- ----------- function Width (Char : unicode_char) return Natural is pragma warnings (Off, Char); begin return 4; end Width; ------------ -- Length -- ------------ function Length (Str : utf32_string) return Natural is begin return Str'length / 4; end Length; ----------------- -- To_Utf32_LE -- ----------------- function To_Unicode_LE (Str : utf32_string; Cs : Unicode.CCS.character_set := Unicode.CCS.Unicode_Character_Set; Order : byte_order := Default_Byte_Order) return utf32_le_string is Offset : Natural; O : byte_order := Order; J : Natural := Str'first; J_Out : Natural; S : utf32_le_string (1 .. Str'length); C : unicode_char; BOM : bom_type; begin Read_Bom (Str, Offset, BOM); case BOM is when utf32_le => O := low_byte_first; when utf32_be => O := high_byte_first; when unknown => null; when others => raise Invalid_Encoding; end case; if O = low_byte_first then if Cs.To_Unicode = Identity'access then return Str (Str'first + Offset .. Str'last); else J := J + Offset; J_Out := S'first - 1; while J <= Str'last loop Read (Str, J, C); Encode (Cs.To_Unicode (C), S, J_Out); end loop; return S (S'first + Offset .. S'last); end if; else J := J + Offset; if Cs.To_Unicode = Identity'access then while J <= Str'last loop S (J + 3) := Str (J); S (J + 2) := Str (J + 1); S (J + 1) := Str (J + 2); S (J) := Str (J + 3); J := J + 4; end loop; else J_Out := S'first - 1; while J <= Str'last loop Read_BE (Str, J, C); Encode (Cs.To_Unicode (C), S, J_Out); end loop; end if; return S (S'first + Offset .. S'last); end if; end To_Unicode_LE; ----------- -- To_CS -- ----------- function To_CS (Str : utf32_le_string; Cs : Unicode.CCS.character_set := Unicode.CCS.Unicode_Character_Set; Order : byte_order := Default_Byte_Order) return utf32_string is J : Natural := Str'first; S : utf32_le_string (1 .. Str'length); C : unicode_char; begin if Order = low_byte_first then if Cs.To_CS = Identity'access then return Str; else J := J - 1; while J <= Str'last loop Read (Str, J, C); Encode (Cs.To_CS (C), S, J); end loop; return S; end if; else if Cs.To_CS = Identity'access then while J <= Str'last loop S (J + 3) := Str (J); S (J + 2) := Str (J + 1); S (J + 1) := Str (J + 2); S (J) := Str (J + 3); J := J + 4; end loop; else J := J - 1; while J <= Str'last loop Read_BE (Str, J, C); Encode (Cs.To_CS (C), S, J); end loop; end if; return S; end if; end To_CS; end Unicode.CES.Utf32;