------------------------------------------------------------------------------
--                     XML/Ada - An XML suite for Ada95                     --
--                                                                          --
--                     Copyright (C) 2005-2012, AdaCore                     --
--                                                                          --
-- This library is free software;  you can redistribute it and/or modify it --
-- under terms of the  GNU General Public License  as published by the Free --
-- Software  Foundation;  either version 3,  or (at your  option) any later --
-- version. This library is distributed in the hope that it will be useful, --
-- but WITHOUT ANY WARRANTY;  without even the implied warranty of MERCHAN- --
-- TABILITY or FITNESS FOR A PARTICULAR PURPOSE.                            --
--                                                                          --
--                                                                          --
--                                                                          --
--                                                                          --
--                                                                          --
-- You should have received a copy of the GNU General Public License and    --
-- a copy of the GCC Runtime Library Exception along with this program;     --
-- see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see    --
-- <http://www.gnu.org/licenses/>.                                          --
--                                                                          --
------------------------------------------------------------------------------

with Ada.Unchecked_Deallocation;
with Interfaces; use Interfaces;
with Unicode.CES;
use Unicode, Unicode.CES;
with Sax.Encodings;             use Sax.Encodings;
with Sax.Symbols;               use Sax.Symbols;
with Unicode.Names.Basic_Latin; use Unicode.Names.Basic_Latin;

package body Sax.Utils is

   type unichar_boolean_array is array (unicode_char range <>) of Boolean;
   pragma pack (unichar_boolean_array);

   Valid_URI_Characters : constant unichar_boolean_array (16#00# .. 16#9F#) :=
     (Digit_Zero .. Digit_Nine                         => True,
      Latin_Capital_Letter_A .. Latin_Capital_Letter_Z => True,
      Latin_Small_Letter_A .. Latin_Small_Letter_Z     => True,
      Opening_Parenthesis | Closing_Parenthesis => True,
      Percent_Sign       => True,
      Plus_Sign          => True,
      Comma              => True,
      Hyphen_Minus       => True,
      Dot                => True,
      Colon              => True,
      Equals_Sign        => True,
      Commercial_At      => True,
      Semicolon          => True,
      Dollar_Sign        => True,
      Spacing_Underscore => True,
      Exclamation_Mark   => True,
      Star               => True,
      Apostrophe         => True,
      Question_Mark      => True,
      Slash              => True,
      Pound_Sign         => True,
      Tilde              => True,
--        16#A0#    .. 16#D7FF#  => True,  --  ucschars from RFC 3987
--        16#F900#  .. 16#FDCF#  => True,
--        16#FDF0#  .. 16#FFEF#  => True,
--        16#10000# .. 16#1FFFD# => True,
--        16#20000# .. 16#2FFFD# => True,
--        16#30000# .. 16#3FFFD# => True,
--        16#40000# .. 16#4FFFD# => True,
--        16#50000# .. 16#5FFFD# => True,
--        16#60000# .. 16#6FFFD# => True,
--        16#70000# .. 16#7FFFD# => True,
--        16#80000# .. 16#8FFFD# => True,
--        16#90000# .. 16#9FFFD# => True,
--        16#A0000# .. 16#AFFFD# => True,
--        16#B0000# .. 16#BFFFD# => True,
--        16#C0000# .. 16#CFFFD# => True,
--        16#D0000# .. 16#DFFFD# => True,
--        16#E0000# .. 16#EFFFD# => True,
      others => False);
   --  Rules based on RFC 2141, at http://rfc.net/rfc2141.html,
   --  completed with rules from Uniformed Resource Identifier at
   --  http://www.gbiv.com/protocols/uri/rfc/rfc3986.html

   B64 : constant array (unicode_char range 32 .. 128) of Boolean :=
     (Character'pos ('A') .. Character'pos ('Z') => True,
      Character'pos ('a') .. Character'pos ('z') => True,
      Character'pos ('0') .. Character'pos ('9') => True,
      Character'pos ('+')                        => True,
      Character'pos ('/')                        => True,
      others                                     => False);
   B04 : constant array (unicode_char range 32 .. 128) of Boolean :=
     (Character'pos ('A') => True,
      Character'pos ('Q') => True,
      Character'pos ('g') => True,
      Character'pos ('w') => True,
      others              => False);
   B16 : constant array (unicode_char range 32 .. 128) of Boolean :=
     (Character'pos ('A') => True,
      Character'pos ('E') => True,
      Character'pos ('I') => True,
      Character'pos ('M') => True,
      Character'pos ('Q') => True,
      Character'pos ('U') => True,
      Character'pos ('Y') => True,
      Character'pos ('c') => True,
      Character'pos ('g') => True,
      Character'pos ('k') => True,
      Character'pos ('o') => True,
      Character'pos ('s') => True,
      Character'pos ('w') => True,
      Character'pos ('0') => True,
      Character'pos ('4') => True,
      Character'pos ('8') => True,
      others              => False);
   --  Whether the character matches the Base64Binary definitions

   ----------------------------
   -- Is_Valid_Language_Name --
   ----------------------------

   function Is_Valid_Language_Name
     (Lang : Unicode.CES.byte_sequence) return Boolean
   is
      C           : unicode_char := Space;
      Index       : Natural      := Lang'first;
      Count       : Natural      := 0;
      Subtag      : Natural      := 1;
      Allow_Digit : Boolean      := False;
   begin
      --  See http://www.ietf.org/rfc/rfc3066.tx
      --    Language-Tag = Primary-subtag *( "-" Subtag )
      --    Primary-subtag = 1*8ALPHA
      --    Subtag = 1*8(ALPHA / DIGIT)
      --  In addition, it seems that the length of subtags is not necessarily
      --  limited to 8 characters, given the XML conformance testsuite test
      --  sun/valid/v-lang04.xml

      while Index <= Lang'last loop
         Encoding.Read (Lang, Index, C);

         if C = Hyphen_Minus then
            if Count = 0 or else (Subtag <= 2 and Count > 8) then
               --  Too many characters
               return False;

            else
               Allow_Digit := True;
               Count       := 0;
               Subtag      := Subtag + 1;
            end if;

         elsif C not in Latin_Small_Letter_A .. Latin_Small_Letter_Z
           and then C not in Latin_Capital_Letter_A .. Latin_Capital_Letter_Z
           and then (not Allow_Digit or else C not in Digit_Zero .. Digit_Nine)
         then
            return False;

         else
            Count := Count + 1;
         end if;
      end loop;

      if Count = 0 or else (Subtag <= 2 and Count > 8) then
         --  Too many characters
         return False;
      end if;

      return True;
   end Is_Valid_Language_Name;

   ------------------------
   -- Is_Valid_Name_Char --
   ------------------------

   function Is_Valid_Name_Char
     (Char    : Unicode.unicode_char;
      Version : xml_versions := xml_1_1) return Boolean
   is
   begin
      --  ??? Should we create a single lookup table for all of these, that
      --  would be more efficient
      case Version is
         when xml_1_0_third_edition | xml_1_0_fourth_edition =>
            return Char = Period
              or else Char = Hyphen_Minus
              or else Char = Spacing_Underscore
              or else Char = Colon
              or else Is_Digit (Char)
              or else Is_Letter (Char)
              or else Is_Combining_Char (Char)
              or else Is_Extender (Char);

         when xml_1_0_fifth_edition | xml_1_0 | xml_1_1 =>
            return Char = Hyphen_Minus
              or else Char = Period
              or else (Char in Digit_Zero .. Digit_Nine)
              or else Char = 16#B7#
              or else (Char in 16#0300# .. 16#036F#)
              or else (Char in 16#203F# .. 16#2040#)
              or else Is_Valid_Name_Startchar (Char, Version);
      end case;
   end Is_Valid_Name_Char;

   -----------------------------
   -- Is_Valid_Name_Startchar --
   -----------------------------

   function Is_Valid_Name_Startchar
     (Char    : Unicode.unicode_char;
      Version : xml_versions := xml_1_1) return Boolean
   is
   begin
      case Version is
         when xml_1_0_third_edition | xml_1_0_fourth_edition =>
            return Char = Spacing_Underscore or else Is_Letter (Char);

         when xml_1_0_fifth_edition | xml_1_0 | xml_1_1 =>
            return Char = Colon
              or else Char = Spacing_Underscore
              or else Char in Latin_Capital_Letter_A .. Latin_Capital_Letter_Z
              or else Char in Latin_Small_Letter_A .. Latin_Small_Letter_Z
              or else Char in 16#C0# .. 16#D6#
              or else Char in 16#D8# .. 16#F6#
              or else Char in 16#F8# .. 16#2FF#
              or else Char in 16#370# .. 16#37D#
              or else Char in 16#37F# .. 16#1FFF#
              or else Char in 16#200C# .. 16#200D#
              or else Char in 16#2070# .. 16#218F#
              or else Char in 16#2C00# .. 16#2FEF#
              or else Char in 16#3001# .. 16#D7FF#
              or else Char in 16#F900# .. 16#FDCF#
              or else Char in 16#FDF0# .. 16#FFFD#
              or else Char in 16#10000# .. 16#EFFFF#;
      end case;
   end Is_Valid_Name_Startchar;

   --------------------------
   -- Is_Valid_NCname_Char --
   --------------------------

   function Is_Valid_NCname_Char
     (Char    : Unicode.unicode_char;
      Version : xml_versions := xml_1_1) return Boolean
   is
   begin
      case Version is
         when xml_1_0_third_edition | xml_1_0_fourth_edition =>
            return Char = Period
              or else Char = Hyphen_Minus
              or else Char = Spacing_Underscore
              or else Is_Digit (Char)
              or else Is_Letter (Char)
              or else Is_Combining_Char (Char)
              or else Is_Extender (Char);

         when xml_1_0_fifth_edition | xml_1_0 | xml_1_1 =>
            return Is_Valid_Name_Char (Char, Version);
      end case;
   end Is_Valid_NCname_Char;

   ----------------------
   -- Is_Valid_Nmtoken --
   ----------------------

   function Is_Valid_Nmtoken
     (Nmtoken : Unicode.CES.byte_sequence;
      Version : xml_versions := xml_1_1) return Boolean
   is
      C     : unicode_char;
      Index : Natural := Nmtoken'first;
   begin
      while Index <= Nmtoken'last loop
         Encoding.Read (Nmtoken, Index, C);
         if not Is_Valid_Name_Char (C, Version) then
            return False;
         end if;
      end loop;

      return True;
   end Is_Valid_Nmtoken;

   -----------------------
   -- Is_Valid_Nmtokens --
   -----------------------

   function Is_Valid_Nmtokens
     (Nmtokens : Unicode.CES.byte_sequence;
      Version  : xml_versions := xml_1_1) return Boolean
   is
      C     : unicode_char;
      Index : Natural := Nmtokens'first;
   begin
      if Nmtokens'length = 0 then
         return False;
      end if;

      while Index <= Nmtokens'last loop
         Encoding.Read (Nmtokens, Index, C);
         if C /= Space and then not Is_Valid_Name_Char (C, Version) then
            return False;
         end if;
      end loop;

      return True;
   end Is_Valid_Nmtokens;

   -------------------
   -- Is_Valid_Name --
   -------------------

   function Is_Valid_Name
     (Name    : Unicode.CES.byte_sequence;
      Version : xml_versions := xml_1_1) return Boolean
   is
      C     : unicode_char;
      Index : Natural := Name'first;
   begin
      if Name = "" then
         return False;
      end if;

      Encoding.Read (Name, Index, C);

      if C /= Colon and then not Is_Valid_Name_Startchar (C, Version) then
         return False;
      end if;

      return Is_Valid_Nmtoken (Name (Index .. Name'last), Version);
   end Is_Valid_Name;

   --------------------
   -- Is_Valid_Names --
   --------------------

   function Is_Valid_Names
     (Name    : Unicode.CES.byte_sequence;
      Version : xml_versions := xml_1_1) return Boolean
   is
      C             : unicode_char;
      Index         : Natural := Name'first;
      First_In_Name : Boolean := True;
   begin
      if Name'length = 0 then
         return False;
      end if;

      while Index <= Name'last loop
         Encoding.Read (Name, Index, C);

         if C = Space then
            First_In_Name := True;

         elsif First_In_Name then
            if not Is_Valid_Name_Startchar (C, Version) then
               return False;
            end if;
            First_In_Name := False;

         elsif not Is_Valid_Name_Char (C, Version) then
            return False;
         end if;
      end loop;
      return True;
   end Is_Valid_Names;

   ----------------------
   -- Is_Valid_NCnames --
   ----------------------

   function Is_Valid_NCnames
     (Name    : Unicode.CES.byte_sequence;
      Version : xml_versions := xml_1_1) return Boolean
   is
      C             : unicode_char;
      Index         : Natural := Name'first;
      First_In_Name : Boolean := True;
   begin
      if Name'length = 0 then
         return False;
      end if;

      while Index <= Name'last loop
         Encoding.Read (Name, Index, C);

         if C = Space then
            First_In_Name := True;

         elsif First_In_Name then
            if C /= Spacing_Underscore and then not Is_Letter (C) then
               return False;
            end if;
            First_In_Name := False;

         elsif not Is_Valid_NCname_Char (C, Version) then
            return False;
         end if;
      end loop;
      return True;
   end Is_Valid_NCnames;

   ---------------------
   -- Is_Valid_NCname --
   ---------------------

   function Is_Valid_NCname
     (Name    : Unicode.CES.byte_sequence;
      Version : xml_versions := xml_1_1) return Boolean
   is
      C     : unicode_char;
      Index : Natural := Name'first;
   begin
      if Name'length = 0 then
         return False;
      end if;

      Encoding.Read (Name, Index, C);

      if not Is_Valid_Name_Startchar (C, Version) then
         return False;
      end if;

      while Index <= Name'last loop
         Encoding.Read (Name, Index, C);
         if not Is_Valid_NCname_Char (C, Version) then
            return False;
         end if;
      end loop;

      return True;
   end Is_Valid_NCname;

   --------------------
   -- Is_Valid_QName --
   --------------------

   function Is_Valid_QName
     (Name    : Unicode.CES.byte_sequence;
      Version : xml_versions := xml_1_1) return Boolean
   is
   begin
      for N in Name'range loop
         if Name (N) = ':' then
            return N /= Name'last
              and then Is_Valid_NCname (Name (Name'first .. N - 1), Version)
              and then Is_Valid_NCname (Name (N + 1 .. Name'last), Version);
         end if;
      end loop;
      return Is_Valid_NCname (Name, Version);
   end Is_Valid_QName;

   ----------
   -- Hash --
   ----------

   function Hash
     (Key : Unicode.CES.byte_sequence) return Interfaces.Unsigned_32
   is
      type uns is mod 2**32;
      function Rotate_Left (Value : uns; Amount : Natural) return uns;
      pragma import (Intrinsic, Rotate_Left);

      Tmp : uns := 0;
   begin
      for J in Key'range loop
         Tmp := Rotate_Left (Tmp, 1) + Character'pos (Key (J));
      end loop;

      return Interfaces.Unsigned_32 (Tmp);
   end Hash;

   function Hash
     (Key : Unicode.CES.byte_sequence_access) return Interfaces.Unsigned_32
   is
   begin
      return Hash (Key.all);
   end Hash;

   -----------
   -- Equal --
   -----------

   function Equal (S1, S2 : Unicode.CES.byte_sequence_access) return Boolean is
   begin
      return S1.all = S2.all;
   end Equal;

   -----------------
   -- Split_Qname --
   -----------------

   function Split_Qname (Qname : Unicode.CES.byte_sequence) return Integer is
   begin
      --  ??? This function assumes we are using UTF8 internally
      for Q in Qname'range loop
         if Qname (Q) = ':' then
            return Q;
         end if;
      end loop;
      return Qname'first - 1;
   end Split_Qname;

   ---------------
   -- Check_URI --
   ---------------

   function Check_URI
     (Name    : Unicode.CES.byte_sequence;
      Version : xml_versions := xml_1_1) return uri_type
   is
      Index      : Integer := Name'first;
      C          : unicode_char;
      Has_Scheme : Boolean := False;
      Has_Hash   : Boolean := False;
   begin
      --  This is RFC 3986 which obsoletes RFC 2396.
      --  URI           = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
      --  hier-part     = "//" authority path-abempty
      --                / path-absolute
      --                / path-rootless
      --                / path-empty
      --  URI-reference = URI / relative-ref
      --  absolute-URI  = scheme ":" hier-part [ "?" query ]
      --  relative-ref  = relative-part [ "?" query ] [ "#" fragment ]
      --  relative-part = "//" authority path-abempty
      --                / path-absolute
      --                / path-noscheme
      --                / path-empty
      --  scheme        = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
      --  authority     = [ userinfo "@" ] host [ ":" port ]
      --  userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
      --  host          = IP-literal / IPv4address / reg-name
      --  port          = *DIGIT
      --  IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
      --  reg-name      = *( unreserved / pct-encoded / sub-delims )
      --  path          = path-abempty    ; begins with "/" or is empty
      --                / path-absolute   ; begins with "/" but not "//"
      --                / path-noscheme   ; begins with a non-colon segment
      --                / path-rootless   ; begins with a segment
      --                / path-empty      ; zero characters
      --  path-abempty  = *( "/" segment )
      --  path-absolute = "/" [ segment-nz *( "/" segment ) ]
      --  path-noscheme = segment-nz-nc *( "/" segment )
      --  path-rootless = segment-nz *( "/" segment )
      --  path-empty    = 0<pchar>
      --  segment       = *pchar
      --  segment-nz    = 1*pchar
      --  segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
      --                ; non-zero-length segment without any colon ":"
      --  pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
      --  query         = *( pchar / "/" / "?" )
      --  fragment      = *( pchar / "/" / "?" )
      --  pct-encoded   = "%" HEXDIG HEXDIG
      --  unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
      --  reserved      = gen-delims / sub-delims
      --  gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
      --  sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
      --                / "*" / "+" / "," / ";" / "="

      --  The relativeURI rule has been replaced with relative-ref.

      --  Find and test the scheme. If there is no scheme, we might have a
      --  relative URI

      while Index <= Name'last loop
         Encoding.Read (Name, Index, C);
         if C = Colon then
            Has_Scheme := True;
            exit;

         elsif C not in Character'pos ('a') .. Character'pos ('z')
           and then C not in Character'pos ('A') .. Character'pos ('Z')
           and then C not in Character'pos ('0') .. Character'pos ('9')
           and then C /= Character'pos ('+')
           and then C /= Character'pos ('-')
           and then C /= Character'pos ('.')
         then
            Has_Scheme := False;
            exit;
         end if;
      end loop;

      --  For a mailto:, nothing else to check
      if Has_Scheme
        and then Name (Name'first .. Index - 1) = Mailto_Sequence
      then
         return uri_absolute;
      end if;

      --  Check the rest of the URI. We currently go for a fast check, and do
      --  not check each of the components specifically.

      while Index <= Name'last loop
         Encoding.Read (Name, Index, C);
         if C = Unicode.Names.Basic_Latin.Hash then
            if Has_Hash then
               --  Two hashes => Invalid URI
               return uri_none;
            end if;
            Has_Hash := True;

         --  RFC3987 authorizes a big range of UCSchars, excluding only some of
         --  the characters. We'll just accept them here, no point in wasting
         --  time for a case that will never occur in practice

         elsif Version /= xml_1_1
           and then
           (C not in Valid_URI_Characters'range
            or else not Valid_URI_Characters (C))
         then
            return uri_none;

         elsif Version = xml_1_1
           and then
           ((C in Valid_URI_Characters'range
             and then not Valid_URI_Characters (C))
            or else (C >= 16#D800# and then C <= 16#FDEF#)
            or else (C >= 16#FFF0# and then C <= 16#FFFF#))
         then
            return uri_none;
         end if;
      end loop;

      if Has_Scheme then
         return uri_absolute;
      else
         return uri_relative_ref;
      end if;
   end Check_URI;

   ------------------
   -- Is_Valid_URI --
   ------------------

   function Is_Valid_URI (Name : Unicode.CES.byte_sequence) return Boolean is
   begin
      return Check_URI (Name) /= uri_none;
   end Is_Valid_URI;

   ------------------
   -- Is_Valid_URN --
   ------------------

   function Is_Valid_URN (Name : Unicode.CES.byte_sequence) return Boolean is
   begin
      --  format is  "urn:" <NID> ":" <NSS>
      --  NID: Namespace Identifier
      --  NSS: Namespace Specific String

      --  ??? Note that this implementation makes <NSS> optional as it is
      --  often the case in current usage.

      --  Leading sequence should be case insensitive
      if Name'length < URN_Sequence'length
        or else
          Name (Name'first .. Name'first + URN_Sequence'length - 1) /=
          URN_Sequence
      then
         return False;
      end if;

      return True;
   end Is_Valid_URN;

   ------------------
   -- Is_Valid_IRI --
   ------------------

   function Is_Valid_IRI
     (Name    : Unicode.CES.byte_sequence;
      Version : xml_versions := xml_1_1) return Boolean
   is
   begin
      return Check_URI (Name, Version) = uri_absolute
        or else Is_Valid_URN (Name);
   end Is_Valid_IRI;

   ---------------------------
   -- Contains_URI_Fragment --
   ---------------------------

   function Contains_URI_Fragment
     (Name : Unicode.CES.byte_sequence) return Boolean
   is
      Index : Integer := Name'first;
      C     : unicode_char;
   begin
      while Index <= Name'last loop
         Encoding.Read (Name, Index, C);
         if C = Pound_Sign then
            return True;
         end if;
      end loop;
      return False;
   end Contains_URI_Fragment;

   ------------------------
   -- Is_Valid_HexBinary --
   ------------------------

   function Is_Valid_HexBinary
     (Str : Unicode.CES.byte_sequence) return Boolean
   is
      Index : Integer := Str'first;
      C     : unicode_char;
   begin
      while Index <= Str'last loop
         Encoding.Read (Str, Index, C);
         if C not in Character'pos ('0') .. Character'pos ('9')
           and then C not in Character'pos ('a') .. Character'pos ('f')
           and then C not in Character'pos ('A') .. Character'pos ('F')
         then
            return False;
         end if;
      end loop;
      return True;
   end Is_Valid_HexBinary;

   ---------------------------
   -- Is_Valid_Base64Binary --
   ---------------------------

   function Is_Valid_Base64Binary
     (Value : Unicode.CES.byte_sequence) return Boolean
   is
      Index         : Integer := Value'first;
      C             : unicode_char;
      Prev_Is_Space : Boolean := False;

      Group : Natural := 1;
      --  Characters are always by groups of 4, this variable indicates the
      --  index of the current char in the group

      type char_categorie is (char_04, char_16, char_64, char_equal);
      Chars : array (1 .. 4) of char_categorie;
   --  The various categories that characters can belong two. In the Base64
   --  encoding, we always have groups of 4 characters.

   begin
      while Index <= Value'last loop
         Sax.Encodings.Encoding.Read (Value, Index, C);

         if C = 16#20# or C = 16#A# then
            if Prev_Is_Space then
               return False;  --  Can never have two spaces in a row
            end if;
            Prev_Is_Space := True;

         elsif C in B04'range and then B04 (C) then
            Prev_Is_Space := False;
            Chars (Group) := char_04;
            Group         := Group + 1;

         elsif C in B16'range and then B16 (C) then
            Prev_Is_Space := False;
            Chars (Group) := char_16;
            Group         := Group + 1;

         elsif C in B64'range and then B64 (C) then
            Prev_Is_Space := False;
            Chars (Group) := char_64;
            Group         := Group + 1;

         elsif C = Character'pos ('=') then
            Prev_Is_Space := False;
            if Group = 3
              and then Chars (1) <= char_64
              and then Chars (2) = char_04
            then
               Chars (Group) := char_equal;
               Group         := Group + 1;

            elsif Group = 4
              and then Chars (1) <= char_64
              and then Chars (2) <= char_64
              and then Chars (3) <= char_16
            then
               Group := 1;
               exit;  --  Must end now

            elsif Group = 4
              and then Chars (1) <= char_64
              and then Chars (2) <= char_04
              and then Chars (3) <= char_equal
            then
               Group := 1;
               exit;  --  Must end now

            else
               return False;
            end if;

         else
            return False;
         end if;

         if Group > 4 then
            Group := 1;
         end if;
      end loop;

      --  Cannot finish with a space
      if Prev_Is_Space or Group /= 1 or Index <= Value'last then
         return False;
      end if;

      return True;
   end Is_Valid_Base64Binary;

   --------------------------
   -- Collapse_Whitespaces --
   --------------------------

   function Collapse_Whitespaces (Str : String) return String is
      Start : Integer := Str'first;
   begin
      --  Find first non-whitespace character in Str

      while Start <= Str'last
        and then Is_White_Space (Character'pos (Str (Start)))
      loop
         Start := Start + 1;
      end loop;

      --  Then remove series of contiguous whitespaces

      declare
         Normalized : String (1 .. Str'last - Start + 1);
         Index      : Integer := Normalized'first;
      begin
         while Start <= Str'last loop
            if Is_White_Space (Character'pos (Str (Start))) then
               --  Remove series of contiguous whitespaces
               if Start = Str'first
                 or else not Is_White_Space (Character'pos (Str (Start - 1)))
               then
                  Normalized (Index) := ' ';
                  Index              := Index + 1;
               end if;
            else
               Normalized (Index) := Str (Start);
               Index              := Index + 1;
            end if;

            Start := Start + 1;
         end loop;

         if Index = Normalized'first then
            return "";
         elsif Normalized (Index - 1) = ' ' then
            return Normalized (Normalized'first .. Index - 2);
         else
            return Normalized (Normalized'first .. Index - 1);
         end if;
      end;
   end Collapse_Whitespaces;

   ----------
   -- Free --
   ----------

   procedure Free (NS : in out xml_ns) is
      procedure Free_NS is new Ada.Unchecked_Deallocation
        (xml_ns_record,
         xml_ns);
      Tmp : xml_ns;
   begin
      while NS /= null loop
         Tmp := NS.Next;
         Free_NS (NS);
         NS := Tmp;
      end loop;
   end Free;

   -------------
   -- Get_URI --
   -------------

   function Get_URI (NS : xml_ns) return symbol is
   begin
      if NS = null then
         return Empty_String;
      else
         return NS.URI;
      end if;
   end Get_URI;

   ----------------
   -- Get_Prefix --
   ----------------

   function Get_Prefix (NS : xml_ns) return symbol is
   begin
      if NS = null then
         return Empty_String;
      else
         return NS.Prefix;
      end if;
   end Get_Prefix;

   -------------------
   -- Element_Count --
   -------------------

   function Element_Count (NS : xml_ns) return Natural is
      Tmp : xml_ns := NS;
   begin
      if NS = null then
         return 0;
      else
         while Tmp.Same_As /= null loop
            Tmp := Tmp.Same_As;
         end loop;

         return Tmp.Use_Count;
      end if;
   end Element_Count;

   ---------------------
   -- Increment_Count --
   ---------------------

   procedure Increment_Count (NS : xml_ns) is
      Tmp : xml_ns := NS;
   begin
      while Tmp.Same_As /= null loop
         Tmp := Tmp.Same_As;
      end loop;

      Tmp.Use_Count := Tmp.Use_Count + 1;
   end Increment_Count;

   ------------------
   -- Next_In_List --
   ------------------

   function Next_In_List (NS : xml_ns) return xml_ns is
   begin
      return NS.Next;
   end Next_In_List;

   ---------------------
   -- Find_NS_In_List --
   ---------------------

   function Find_NS_In_List
     (List                 : xml_ns;
      Prefix               : Sax.Symbols.symbol;
      Include_Default_NS   : Boolean := True;
      List_Is_From_Element : Boolean) return xml_ns
   is
      NS : xml_ns := List;
   begin
      while NS /= No_XML_NS loop
         if
           (Include_Default_NS
            or else not List_Is_From_Element
            or else NS.Prefix /= Empty_String)
           and then NS.Prefix = Prefix
         then
            return NS;
         end if;
         NS := NS.Next;
      end loop;
      return null;
   end Find_NS_In_List;

   ------------------------------
   -- Find_NS_From_URI_In_List --
   ------------------------------

   function Find_NS_From_URI_In_List
     (List : xml_ns;
      URI  : Sax.Symbols.symbol) return xml_ns
   is
      NS : xml_ns := List;
   begin
      while NS /= No_XML_NS loop
         if NS.URI = URI then
            return NS;
         end if;
         NS := NS.Next;
      end loop;
      return null;
   end Find_NS_From_URI_In_List;

   --------------------
   -- Add_NS_To_List --
   --------------------

   procedure Add_NS_To_List
     (List        : in out xml_ns;
      Same_As     :        xml_ns := No_XML_NS;
      Prefix, URI :        symbol)
   is
   begin
      List :=
        new xml_ns_record'
          (Prefix    => Prefix,
           URI       => URI,
           System_Id => No_Symbol,
           Same_As   => Same_As,
           Use_Count => 0,
           Next      => List);
   end Add_NS_To_List;

   --------------
   -- Allocate --
   --------------

   function Allocate return symbol_table is
      S : symbol_table_access;
   begin
      S := new symbol_table_record;
      return Symbol_Table_Pointers.Allocate (S);
   end Allocate;

   ----------
   -- Find --
   ----------

   function Find
     (Table : symbol_table;
      Str   : Unicode.CES.byte_sequence) return Sax.Symbols.symbol
   is
   begin
      return Find (Symbol_Table_Pointers.Get (Table), Str);
   end Find;

   -------------
   -- Convert --
   -------------

   function Convert
     (Table : symbol_table;
      Sym   : Sax.Symbols.symbol) return Sax.Symbols.symbol
   is
   begin
      if Sym = No_Symbol then
         return No_Symbol;
      else
         return Find (Table, Get (Sym).all);
      end if;
   end Convert;

   -------------------
   -- Set_System_Id --
   -------------------

   procedure Set_System_Id (NS : xml_ns; System_Id : Sax.Symbols.symbol) is
   begin
      NS.System_Id := System_Id;
   end Set_System_Id;

   -------------------
   -- Get_System_Id --
   -------------------

   function Get_System_Id (NS : xml_ns) return Sax.Symbols.symbol is
   begin
      return NS.System_Id;
   end Get_System_Id;

   -------------------
   -- For_Each_Item --
   -------------------

   procedure For_Each_Item (Ch : Unicode.CES.byte_sequence) is
      Index       : Integer := Ch'first;
      Last, Start : Integer;
      C           : unicode_char;
   begin
      --  Ch might be from an attribute (in which case it might have been
      --  normalized first), or for the value of a mixed element, in which case
      --  no element has taken place. We therefore need to skip initial spaces

      while Index <= Ch'last loop
         --  Skip leading spaces
         while Index <= Ch'last loop
            Start := Index;
            Encoding.Read (Ch, Index, C);
            exit when not Is_White_Space (C);
            Start := Ch'last + 1;
         end loop;

         exit when Start > Ch'last;

         --  Move to first whitespace after word
         while Index <= Ch'last loop
            Last := Index;
            Encoding.Read (Ch, Index, C);
            exit when Index > Ch'last or else Is_White_Space (C);
         end loop;

         if Index > Ch'last and then not Is_White_Space (C) then
            Callback (Ch (Start .. Index - 1));
            exit;
         else
            Callback (Ch (Start .. Last - 1)); --  Last points to a whitespace
         end if;
      end loop;
   end For_Each_Item;

end Sax.Utils;