---------------------------------------------------
-----------------------------
--                     XML/Ada - An XML suite for Ada95                     --
--                                                                          --
--                     Copyright (C) 2001-2012, AdaCore                     --
--                                                                          --
-- This library is free software;  you can redistribute it and/or modify it --
-- under terms of the  GNU General Public License  as published by the Free --
-- Software  Foundation;  either version 3,  or (at your  option) any later --
-- version. This library is distributed in the hope that it will be useful, --
-- but WITHOUT ANY WARRANTY;  without even the implied warranty of MERCHAN- --
-- TABILITY or FITNESS FOR A PARTICULAR PURPOSE.                            --
--                                                                          --
--                                                                          --
--                                                                          --
--                                                                          --
--                                                                          --
-- You should have received a copy of the GNU General Public License and    --
-- a copy of the GCC Runtime Library Exception along with this program;     --
-- see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see    --
-- <http://www.gnu.org/licenses/>.                                          --
--                                                                          --
------------------------------------------------------------------------------

with Unicode.CES.Utf32; use Unicode.CES.Utf32;
with Unicode.CCS;       use Unicode.CCS;

package body Unicode.CES.Utf16 is

   ------------
   -- Encode --
   ------------

   procedure Encode
     (Char   :        unicode_char;
      Output : in out byte_sequence;
      Index  : in out Natural)
   is
      C, D : unicode_char;
   begin
      if Char < 16#10000# then
         C                  := Char and 16#00FF#;
         D                  := (Char and 16#FF00#) / (2**8);
         Output (Index + 1) := Character'val (C);
         Output (Index + 2) := Character'val (D);
         Index              := Index + 2;

      else
         C :=
           16#D800# +
           ((Char - 16#10000#) and 2#11111111110000000000#) / (2**10);
         D := 16#DC00# + ((Char - 16#10000#) and 2#1111111111#);
         Output (Index + 1) := Character'val (C and 16#00FF#);
         Output (Index + 2) := Character'val ((C and 16#FF00#) / (2**8));
         Output (Index + 3) := Character'val (D and 16#00FF#);
         Output (Index + 4) := Character'val ((D and 16#FF00#) / (2**8));
         Index              := Index + 4;
      end if;
   end Encode;

   ---------------
   -- Encode_BE --
   ---------------

   procedure Encode_BE
     (Char   :        unicode_char;
      Output : in out byte_sequence;
      Index  : in out Natural)
   is
      C, D : unicode_char;
   begin
      if Char < 16#10000# then
         C                  := Char and 16#00FF#;
         D                  := (Char and 16#FF00#) / (2**8);
         Output (Index + 1) := Character'val (D);
         Output (Index + 2) := Character'val (C);
         Index              := Index + 2;

      else
         C :=
           16#D800# +
           ((Char - 16#10000#) and 2#11111111110000000000#) / (2**10);
         D := 16#DC00# + ((Char - 16#10000#) and 2#1111111111#);
         Output (Index + 1) := Character'val ((C and 16#FF00#) / (2**8));
         Output (Index + 2) := Character'val (C and 16#00FF#);
         Output (Index + 3) := Character'val ((D and 16#FF00#) / (2**8));
         Output (Index + 4) := Character'val (D and 16#00FF#);
         Index              := Index + 4;
      end if;
   end Encode_BE;

   ----------
   -- Read --
   ----------

   procedure Read
     (Str   :        utf16_le_string;
      Index : in out Positive;
      Char  :    out unicode_char)
   is
      C, D : unicode_char;
   begin
      if Index + 1 > Str'last then
         raise Incomplete_Encoding;
      end if;

      C := Character'pos (Str (Index + 1)) * 256 + Character'pos (Str (Index));

      --  High surrogate value
      if C in 16#D800# .. 16#DBFF# then
         if Index + 3 > Str'last then
            raise Incomplete_Encoding;
         end if;
         D :=
           Character'pos (Str (Index + 3)) * 256 +
           Character'pos (Str (Index + 2));

         --  Not a low surrogate ?
         if not (D in 16#DC00# .. 16#DFFF#) then
            raise Invalid_Encoding;
         end if;

         C     := C and 2#1111111111#;
         D     := D and 2#1111111111#;
         Char  := C * 2#10000000000# + D + 16#10000#;
         Index := Index + 4;
      else
         Char  := C;
         Index := Index + 2;
      end if;
   end Read;

   -------------
   -- Read_BE --
   -------------

   procedure Read_BE
     (Str   :        utf16_be_string;
      Index : in out Positive;
      Char  :    out unicode_char)
   is
      C, D : unicode_char;
   begin
      if Index + 1 > Str'last then
         raise Incomplete_Encoding;
      end if;

      C := Character'pos (Str (Index)) * 256 + Character'pos (Str (Index + 1));

      --  High surrogate value
      if C in 16#D800# .. 16#DBFF# then
         if Index + 3 > Str'last then
            raise Incomplete_Encoding;
         end if;
         D :=
           Character'pos (Str (Index + 2)) * 256 +
           Character'pos (Str (Index + 3));

         --  Not a low surrogate ?
         if not (D in 16#DC00# .. 16#DFFF#) then
            raise Invalid_Encoding;
         end if;

         C     := C and 2#1111111111#;
         D     := D and 2#1111111111#;
         Char  := C * 2#10000000000# + D + 16#10000#;
         Index := Index + 4;
      else
         Char  := C;
         Index := Index + 2;
      end if;
   end Read_BE;

   -----------
   -- Width --
   -----------

   function Width (Char : unicode_char) return Natural is
   begin
      if Char >= 16#10000# then
         return 4;
      else
         return 2;
      end if;
   end Width;

   ------------
   -- Length --
   ------------

   function Length (Str : utf16_string) return Natural is
      Pos : Natural := Str'first;
      Len : Natural := 0;
      C   : unicode_char;
   begin
      while Pos <= Str'last loop
         Read (Str, Pos, C);
         Len := Len + 1;
      end loop;
      return Len;
   end Length;

   ----------------
   -- From_Utf32 --
   ----------------

   function From_Utf32
     (Str : Unicode.CES.Utf32.utf32_le_string) return utf16_le_string
   is
      Result  : utf16_le_string (1 .. (Str'length / utf32_char_width) * 4);
      J       : Positive := Str'first;
      R_Index : Natural  := Result'first - 1;
      C       : unicode_char;
   begin
      while J <= Str'last loop
         Unicode.CES.Utf32.Read (Str, J, C);
         Encode (C, Result, R_Index);
      end loop;
      return Result (1 .. R_Index);
   end From_Utf32;

   --------------
   -- To_Utf32 --
   --------------

   function To_Utf32
     (Str : utf16_le_string) return Unicode.CES.Utf32.utf32_le_string
   is
      Result  : utf32_le_string (1 .. (Str'length / 2) * utf32_char_width);
      J       : Natural := Str'first;
      R_Index : Natural := Result'first - 1;
      C       : unicode_char;
   begin
      while J <= Str'last loop
         Read (Str, J, C);
         Unicode.CES.Utf32.Encode (C, Result, R_Index);
      end loop;
      return Result (1 .. R_Index);
   end To_Utf32;

   -------------------
   -- To_Unicode_LE --
   -------------------
   --  ??? Note: this assumes that the original character and its
   --  conversion are encoded on the same length, which is always
   --  right so far with Unicode.

   function To_Unicode_LE
     (Str   : utf16_string;
      Cs    : Unicode.CCS.character_set := Unicode.CCS.Unicode_Character_Set;
      Order : byte_order := Default_Byte_Order) return utf16_le_string
   is
      BOM    : bom_type;
      Offset : Natural    := 0;
      O      : byte_order := Order;
      J      : Natural    := Str'first;
      S      : utf16_le_string (1 .. Str'length);
      C      : unicode_char;

   begin
      Read_Bom (Str, Offset, BOM);

      case BOM is
         when utf16_le =>
            O := low_byte_first;
         when utf16_be =>
            O := high_byte_first;
         when unknown =>
            null;
         when others =>
            raise Invalid_Encoding;
      end case;

      if O = low_byte_first then
         if Cs.To_Unicode = Identity'access then
            return Str (Str'first + Offset .. Str'last);
         else
            J := J + Offset - 1;
            while J <= Str'last loop
               Read (Str, J, C);
               Encode (Cs.To_Unicode (C), S, J);
            end loop;
            return S (S'first + Offset .. S'last);
         end if;
      else
         J := J + Offset;
         if Cs.To_Unicode = Identity'access then
            while J <= Str'last loop
               S (J + 1) := Str (J);
               S (J)     := Str (J + 1);
               J         := J + 2;
            end loop;
         else
            J := J - 1;
            while J <= Str'last loop
               Read_BE (Str, J, C);
               Encode (Cs.To_Unicode (C), S, J);
            end loop;
            return S (S'first + Offset .. S'last);
         end if;
         return S (S'first + Offset .. S'last);
      end if;
   end To_Unicode_LE;

   -----------
   -- To_CS --
   -----------

   function To_CS
     (Str   : utf16_le_string;
      Cs    : Unicode.CCS.character_set := Unicode.CCS.Unicode_Character_Set;
      Order : byte_order := Default_Byte_Order) return utf16_string
   is
      pragma warnings (Off, Order);
      Offset : constant Natural := 0;
      J      : Natural          := Str'first;
      S      : utf16_le_string (1 .. Str'length);
      C      : unicode_char;
   begin
      if Order = low_byte_first then
         if Cs.To_CS = Identity'access then
            return Str (Str'first + Offset .. Str'last);
         else
            J := J + Offset - 1;
            while J <= Str'last loop
               Read (Str, J, C);
               Encode (Cs.To_CS (C), S, J);
            end loop;
            return S (S'first + Offset .. S'last);
         end if;
      else
         J := J + Offset;
         if Cs.To_CS = Identity'access then
            while J <= Str'last loop
               S (J + 1) := Str (J);
               S (J)     := Str (J + 1);
               J         := J + 2;
            end loop;
         else
            J := J - 1;
            while J <= Str'last loop
               Read (Str, J, C);
               Encode_BE (Cs.To_CS (C), S, J);
            end loop;
            return S (S'first + Offset .. S'last);
         end if;
         return S (S'first + Offset .. S'last);
      end if;
   end To_CS;

end Unicode.CES.Utf16;