------------------------------------------------------------------------------
-- XML/Ada - An XML suite for Ada95 --
-- --
-- Copyright (C) 2005-2012, AdaCore --
-- --
-- This library is free software; you can redistribute it and/or modify it --
-- under terms of the GNU General Public License as published by the Free --
-- Software Foundation; either version 3, or (at your option) any later --
-- version. This library is distributed in the hope that it will be useful, --
-- but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHAN- --
-- TABILITY or FITNESS FOR A PARTICULAR PURPOSE. --
-- --
-- --
-- --
-- --
-- --
-- You should have received a copy of the GNU General Public License and --
-- a copy of the GCC Runtime Library Exception along with this program; --
-- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see --
-- . --
-- --
------------------------------------------------------------------------------
with Ada.Unchecked_Deallocation;
with Interfaces; use Interfaces;
with Unicode.CES; use Unicode, Unicode.CES;
with Sax.Encodings; use Sax.Encodings;
with Sax.Symbols; use Sax.Symbols;
with Unicode.Names.Basic_Latin; use Unicode.Names.Basic_Latin;
package body Sax.Utils is
type Unichar_Boolean_Array is array (Unicode_Char range <>) of Boolean;
pragma Pack (Unichar_Boolean_Array);
Valid_URI_Characters : constant Unichar_Boolean_Array
(16#00# .. 16#9F#) :=
(Digit_Zero .. Digit_Nine => True,
Latin_Capital_Letter_A .. Latin_Capital_Letter_Z => True,
Latin_Small_Letter_A .. Latin_Small_Letter_Z => True,
Opening_Parenthesis | Closing_Parenthesis => True,
Percent_Sign => True,
Plus_Sign => True,
Comma => True,
Hyphen_Minus => True,
Dot => True,
Colon => True,
Equals_Sign => True,
Commercial_At => True,
Semicolon => True,
Dollar_Sign => True,
Spacing_Underscore => True,
Exclamation_Mark => True,
Star => True,
Apostrophe => True,
Question_Mark => True,
Slash => True,
Pound_Sign => True,
Tilde => True,
-- 16#A0# .. 16#D7FF# => True, -- ucschars from RFC 3987
-- 16#F900# .. 16#FDCF# => True,
-- 16#FDF0# .. 16#FFEF# => True,
-- 16#10000# .. 16#1FFFD# => True,
-- 16#20000# .. 16#2FFFD# => True,
-- 16#30000# .. 16#3FFFD# => True,
-- 16#40000# .. 16#4FFFD# => True,
-- 16#50000# .. 16#5FFFD# => True,
-- 16#60000# .. 16#6FFFD# => True,
-- 16#70000# .. 16#7FFFD# => True,
-- 16#80000# .. 16#8FFFD# => True,
-- 16#90000# .. 16#9FFFD# => True,
-- 16#A0000# .. 16#AFFFD# => True,
-- 16#B0000# .. 16#BFFFD# => True,
-- 16#C0000# .. 16#CFFFD# => True,
-- 16#D0000# .. 16#DFFFD# => True,
-- 16#E0000# .. 16#EFFFD# => True,
others => False);
-- Rules based on RFC 2141, at http://rfc.net/rfc2141.html,
-- completed with rules from Uniformed Resource Identifier at
-- http://www.gbiv.com/protocols/uri/rfc/rfc3986.html
B64 : constant array (Unicode_Char range 32 .. 128) of Boolean :=
(Character'Pos ('A') .. Character'Pos ('Z') => True,
Character'Pos ('a') .. Character'Pos ('z') => True,
Character'Pos ('0') .. Character'Pos ('9') => True,
Character'Pos ('+') => True,
Character'Pos ('/') => True,
others => False);
B04 : constant array (Unicode_Char range 32 .. 128) of Boolean :=
(Character'Pos ('A') => True,
Character'Pos ('Q') => True,
Character'Pos ('g') => True,
Character'Pos ('w') => True,
others => False);
B16 : constant array (Unicode_Char range 32 .. 128) of Boolean :=
(Character'Pos ('A') => True,
Character'Pos ('E') => True,
Character'Pos ('I') => True,
Character'Pos ('M') => True,
Character'Pos ('Q') => True,
Character'Pos ('U') => True,
Character'Pos ('Y') => True,
Character'Pos ('c') => True,
Character'Pos ('g') => True,
Character'Pos ('k') => True,
Character'Pos ('o') => True,
Character'Pos ('s') => True,
Character'Pos ('w') => True,
Character'Pos ('0') => True,
Character'Pos ('4') => True,
Character'Pos ('8') => True,
others => False);
-- Whether the character matches the Base64Binary definitions
----------------------------
-- Is_Valid_Language_Name --
----------------------------
function Is_Valid_Language_Name
(Lang : Unicode.CES.Byte_Sequence) return Boolean
is
C : Unicode_Char := Space;
Index : Natural := Lang'First;
Count : Natural := 0;
Subtag : Natural := 1;
Allow_Digit : Boolean := False;
begin
-- See http://www.ietf.org/rfc/rfc3066.tx
-- Language-Tag = Primary-subtag *( "-" Subtag )
-- Primary-subtag = 1*8ALPHA
-- Subtag = 1*8(ALPHA / DIGIT)
-- In addition, it seems that the length of subtags is not necessarily
-- limited to 8 characters, given the XML conformance testsuite test
-- sun/valid/v-lang04.xml
while Index <= Lang'Last loop
Encoding.Read (Lang, Index, C);
if C = Hyphen_Minus then
if Count = 0 or else (Subtag <= 2 and Count > 8) then
-- Too many characters
return False;
else
Allow_Digit := True;
Count := 0;
Subtag := Subtag + 1;
end if;
elsif C not in Latin_Small_Letter_A .. Latin_Small_Letter_Z
and then C not in Latin_Capital_Letter_A .. Latin_Capital_Letter_Z
and then (not Allow_Digit
or else C not in Digit_Zero .. Digit_Nine)
then
return False;
else
Count := Count + 1;
end if;
end loop;
if Count = 0 or else (Subtag <= 2 and Count > 8) then
-- Too many characters
return False;
end if;
return True;
end Is_Valid_Language_Name;
------------------------
-- Is_Valid_Name_Char --
------------------------
function Is_Valid_Name_Char
(Char : Unicode.Unicode_Char;
Version : XML_Versions := XML_1_1) return Boolean
is
begin
-- ??? Should we create a single lookup table for all of these, that
-- would be more efficient
case Version is
when XML_1_0_Third_Edition
| XML_1_0_Fourth_Edition =>
return Char = Period
or else Char = Hyphen_Minus
or else Char = Spacing_Underscore
or else Char = Colon
or else Is_Digit (Char)
or else Is_Letter (Char)
or else Is_Combining_Char (Char)
or else Is_Extender (Char);
when XML_1_0_Fifth_Edition
| XML_1_0
| XML_1_1 =>
return Char = Hyphen_Minus
or else Char = Period
or else (Char in Digit_Zero .. Digit_Nine)
or else Char = 16#B7#
or else (Char in 16#0300# .. 16#036F#)
or else (Char in 16#203F# .. 16#2040#)
or else Is_Valid_Name_Startchar (Char, Version);
end case;
end Is_Valid_Name_Char;
-----------------------------
-- Is_Valid_Name_Startchar --
-----------------------------
function Is_Valid_Name_Startchar
(Char : Unicode.Unicode_Char;
Version : XML_Versions := XML_1_1) return Boolean
is
begin
case Version is
when XML_1_0_Third_Edition
| XML_1_0_Fourth_Edition
=>
return Char = Spacing_Underscore
or else Is_Letter (Char);
when XML_1_0_Fifth_Edition
| XML_1_0
| XML_1_1 =>
return Char = Colon
or else Char = Spacing_Underscore
or else Char in Latin_Capital_Letter_A .. Latin_Capital_Letter_Z
or else Char in Latin_Small_Letter_A .. Latin_Small_Letter_Z
or else Char in 16#C0# .. 16#D6#
or else Char in 16#D8# .. 16#F6#
or else Char in 16#F8# .. 16#2FF#
or else Char in 16#370# .. 16#37D#
or else Char in 16#37F# .. 16#1FFF#
or else Char in 16#200C# .. 16#200D#
or else Char in 16#2070# .. 16#218F#
or else Char in 16#2C00# .. 16#2FEF#
or else Char in 16#3001# .. 16#D7FF#
or else Char in 16#F900# .. 16#FDCF#
or else Char in 16#FDF0# .. 16#FFFD#
or else Char in 16#10000# .. 16#EFFFF#;
end case;
end Is_Valid_Name_Startchar;
--------------------------
-- Is_Valid_NCname_Char --
--------------------------
function Is_Valid_NCname_Char
(Char : Unicode.Unicode_Char;
Version : XML_Versions := XML_1_1) return Boolean is
begin
case Version is
when XML_1_0_Third_Edition
| XML_1_0_Fourth_Edition =>
return Char = Period
or else Char = Hyphen_Minus
or else Char = Spacing_Underscore
or else Is_Digit (Char)
or else Is_Letter (Char)
or else Is_Combining_Char (Char)
or else Is_Extender (Char);
when XML_1_0_Fifth_Edition
| XML_1_0
| XML_1_1 =>
return Is_Valid_Name_Char (Char, Version);
end case;
end Is_Valid_NCname_Char;
----------------------
-- Is_Valid_Nmtoken --
----------------------
function Is_Valid_Nmtoken
(Nmtoken : Unicode.CES.Byte_Sequence;
Version : XML_Versions := XML_1_1) return Boolean
is
C : Unicode_Char;
Index : Natural := Nmtoken'First;
begin
while Index <= Nmtoken'Last loop
Encoding.Read (Nmtoken, Index, C);
if not Is_Valid_Name_Char (C, Version) then
return False;
end if;
end loop;
return True;
end Is_Valid_Nmtoken;
-----------------------
-- Is_Valid_Nmtokens --
-----------------------
function Is_Valid_Nmtokens
(Nmtokens : Unicode.CES.Byte_Sequence;
Version : XML_Versions := XML_1_1) return Boolean
is
C : Unicode_Char;
Index : Natural := Nmtokens'First;
begin
if Nmtokens'Length = 0 then
return False;
end if;
while Index <= Nmtokens'Last loop
Encoding.Read (Nmtokens, Index, C);
if C /= Space and then not Is_Valid_Name_Char (C, Version) then
return False;
end if;
end loop;
return True;
end Is_Valid_Nmtokens;
-------------------
-- Is_Valid_Name --
-------------------
function Is_Valid_Name
(Name : Unicode.CES.Byte_Sequence;
Version : XML_Versions := XML_1_1) return Boolean
is
C : Unicode_Char;
Index : Natural := Name'First;
begin
if Name = "" then
return False;
end if;
Encoding.Read (Name, Index, C);
if C /= Colon
and then not Is_Valid_Name_Startchar (C, Version)
then
return False;
end if;
return Is_Valid_Nmtoken (Name (Index .. Name'Last), Version);
end Is_Valid_Name;
--------------------
-- Is_Valid_Names --
--------------------
function Is_Valid_Names
(Name : Unicode.CES.Byte_Sequence;
Version : XML_Versions := XML_1_1) return Boolean
is
C : Unicode_Char;
Index : Natural := Name'First;
First_In_Name : Boolean := True;
begin
if Name'Length = 0 then
return False;
end if;
while Index <= Name'Last loop
Encoding.Read (Name, Index, C);
if C = Space then
First_In_Name := True;
elsif First_In_Name then
if not Is_Valid_Name_Startchar (C, Version) then
return False;
end if;
First_In_Name := False;
elsif not Is_Valid_Name_Char (C, Version) then
return False;
end if;
end loop;
return True;
end Is_Valid_Names;
----------------------
-- Is_Valid_NCnames --
----------------------
function Is_Valid_NCnames
(Name : Unicode.CES.Byte_Sequence;
Version : XML_Versions := XML_1_1) return Boolean
is
C : Unicode_Char;
Index : Natural := Name'First;
First_In_Name : Boolean := True;
begin
if Name'Length = 0 then
return False;
end if;
while Index <= Name'Last loop
Encoding.Read (Name, Index, C);
if C = Space then
First_In_Name := True;
elsif First_In_Name then
if C /= Spacing_Underscore
and then not Is_Letter (C)
then
return False;
end if;
First_In_Name := False;
elsif not Is_Valid_NCname_Char (C, Version) then
return False;
end if;
end loop;
return True;
end Is_Valid_NCnames;
---------------------
-- Is_Valid_NCname --
---------------------
function Is_Valid_NCname
(Name : Unicode.CES.Byte_Sequence;
Version : XML_Versions := XML_1_1) return Boolean
is
C : Unicode_Char;
Index : Natural := Name'First;
begin
if Name'Length = 0 then
return False;
end if;
Encoding.Read (Name, Index, C);
if not Is_Valid_Name_Startchar (C, Version) then
return False;
end if;
while Index <= Name'Last loop
Encoding.Read (Name, Index, C);
if not Is_Valid_NCname_Char (C, Version) then
return False;
end if;
end loop;
return True;
end Is_Valid_NCname;
--------------------
-- Is_Valid_QName --
--------------------
function Is_Valid_QName
(Name : Unicode.CES.Byte_Sequence;
Version : XML_Versions := XML_1_1) return Boolean is
begin
for N in Name'Range loop
if Name (N) = ':' then
return N /= Name'Last
and then Is_Valid_NCname (Name (Name'First .. N - 1), Version)
and then Is_Valid_NCname (Name (N + 1 .. Name'Last), Version);
end if;
end loop;
return Is_Valid_NCname (Name, Version);
end Is_Valid_QName;
----------
-- Hash --
----------
function Hash
(Key : Unicode.CES.Byte_Sequence) return Interfaces.Unsigned_32
is
type Uns is mod 2 ** 32;
function Rotate_Left (Value : Uns; Amount : Natural) return Uns;
pragma Import (Intrinsic, Rotate_Left);
Tmp : Uns := 0;
begin
for J in Key'Range loop
Tmp := Rotate_Left (Tmp, 1) + Character'Pos (Key (J));
end loop;
return Interfaces.Unsigned_32 (Tmp);
end Hash;
function Hash
(Key : Unicode.CES.Byte_Sequence_Access) return Interfaces.Unsigned_32 is
begin
return Hash (Key.all);
end Hash;
-----------
-- Equal --
-----------
function Equal (S1, S2 : Unicode.CES.Byte_Sequence_Access) return Boolean is
begin
return S1.all = S2.all;
end Equal;
-----------------
-- Split_Qname --
-----------------
function Split_Qname (Qname : Unicode.CES.Byte_Sequence) return Integer is
begin
-- ??? This function assumes we are using UTF8 internally
for Q in Qname'Range loop
if Qname (Q) = ':' then
return Q;
end if;
end loop;
return Qname'First - 1;
end Split_Qname;
---------------
-- Check_URI --
---------------
function Check_URI
(Name : Unicode.CES.Byte_Sequence;
Version : XML_Versions := XML_1_1) return URI_Type
is
Index : Integer := Name'First;
C : Unicode_Char;
Has_Scheme : Boolean := False;
Has_Hash : Boolean := False;
begin
-- This is RFC 3986 which obsoletes RFC 2396.
-- URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
-- hier-part = "//" authority path-abempty
-- / path-absolute
-- / path-rootless
-- / path-empty
-- URI-reference = URI / relative-ref
-- absolute-URI = scheme ":" hier-part [ "?" query ]
-- relative-ref = relative-part [ "?" query ] [ "#" fragment ]
-- relative-part = "//" authority path-abempty
-- / path-absolute
-- / path-noscheme
-- / path-empty
-- scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
-- authority = [ userinfo "@" ] host [ ":" port ]
-- userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
-- host = IP-literal / IPv4address / reg-name
-- port = *DIGIT
-- IP-literal = "[" ( IPv6address / IPvFuture ) "]"
-- reg-name = *( unreserved / pct-encoded / sub-delims )
-- path = path-abempty ; begins with "/" or is empty
-- / path-absolute ; begins with "/" but not "//"
-- / path-noscheme ; begins with a non-colon segment
-- / path-rootless ; begins with a segment
-- / path-empty ; zero characters
-- path-abempty = *( "/" segment )
-- path-absolute = "/" [ segment-nz *( "/" segment ) ]
-- path-noscheme = segment-nz-nc *( "/" segment )
-- path-rootless = segment-nz *( "/" segment )
-- path-empty = 0
-- segment = *pchar
-- segment-nz = 1*pchar
-- segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
-- ; non-zero-length segment without any colon ":"
-- pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
-- query = *( pchar / "/" / "?" )
-- fragment = *( pchar / "/" / "?" )
-- pct-encoded = "%" HEXDIG HEXDIG
-- unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
-- reserved = gen-delims / sub-delims
-- gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
-- sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
-- / "*" / "+" / "," / ";" / "="
-- The relativeURI rule has been replaced with relative-ref.
-- Find and test the scheme. If there is no scheme, we might have a
-- relative URI
while Index <= Name'Last loop
Encoding.Read (Name, Index, C);
if C = Colon then
Has_Scheme := True;
exit;
elsif C not in Character'Pos ('a') .. Character'Pos ('z')
and then C not in Character'Pos ('A') .. Character'Pos ('Z')
and then C not in Character'Pos ('0') .. Character'Pos ('9')
and then C /= Character'Pos ('+')
and then C /= Character'Pos ('-')
and then C /= Character'Pos ('.')
then
Has_Scheme := False;
exit;
end if;
end loop;
-- For a mailto:, nothing else to check
if Has_Scheme
and then Name (Name'First .. Index - 1) = Mailto_Sequence
then
return URI_Absolute;
end if;
-- Check the rest of the URI. We currently go for a fast check, and do
-- not check each of the components specifically.
while Index <= Name'Last loop
Encoding.Read (Name, Index, C);
if C = Unicode.Names.Basic_Latin.Hash then
if Has_Hash then
-- Two hashes => Invalid URI
return URI_None;
end if;
Has_Hash := True;
-- RFC3987 authorizes a big range of UCSchars, excluding only some of
-- the characters. We'll just accept them here, no point in wasting
-- time for a case that will never occur in practice
elsif Version /= XML_1_1
and then (C not in Valid_URI_Characters'Range
or else not Valid_URI_Characters (C))
then
return URI_None;
elsif Version = XML_1_1
and then
((C in Valid_URI_Characters'Range
and then not Valid_URI_Characters (C))
or else (C >= 16#D800# and then C <= 16#FDEF#)
or else (C >= 16#FFF0# and then C <= 16#FFFF#))
then
return URI_None;
end if;
end loop;
if Has_Scheme then
return URI_Absolute;
else
return URI_Relative_Ref;
end if;
end Check_URI;
------------------
-- Is_Valid_URI --
------------------
function Is_Valid_URI
(Name : Unicode.CES.Byte_Sequence) return Boolean is
begin
return Check_URI (Name) /= URI_None;
end Is_Valid_URI;
------------------
-- Is_Valid_URN --
------------------
function Is_Valid_URN
(Name : Unicode.CES.Byte_Sequence) return Boolean is
begin
-- format is "urn:" ":"
-- NID: Namespace Identifier
-- NSS: Namespace Specific String
-- ??? Note that this implementation makes optional as it is
-- often the case in current usage.
-- Leading sequence should be case insensitive
if Name'Length < URN_Sequence'Length
or else Name (Name'First .. Name'First + URN_Sequence'Length - 1) /=
URN_Sequence
then
return False;
end if;
return True;
end Is_Valid_URN;
------------------
-- Is_Valid_IRI --
------------------
function Is_Valid_IRI
(Name : Unicode.CES.Byte_Sequence;
Version : XML_Versions := XML_1_1) return Boolean is
begin
return Check_URI (Name, Version) = URI_Absolute
or else Is_Valid_URN (Name);
end Is_Valid_IRI;
---------------------------
-- Contains_URI_Fragment --
---------------------------
function Contains_URI_Fragment
(Name : Unicode.CES.Byte_Sequence) return Boolean
is
Index : Integer := Name'First;
C : Unicode_Char;
begin
while Index <= Name'Last loop
Encoding.Read (Name, Index, C);
if C = Pound_Sign then
return True;
end if;
end loop;
return False;
end Contains_URI_Fragment;
------------------------
-- Is_Valid_HexBinary --
------------------------
function Is_Valid_HexBinary
(Str : Unicode.CES.Byte_Sequence) return Boolean
is
Index : Integer := Str'First;
C : Unicode_Char;
begin
while Index <= Str'Last loop
Encoding.Read (Str, Index, C);
if C not in Character'Pos ('0') .. Character'Pos ('9')
and then C not in Character'Pos ('a') .. Character'Pos ('f')
and then C not in Character'Pos ('A') .. Character'Pos ('F')
then
return False;
end if;
end loop;
return True;
end Is_Valid_HexBinary;
---------------------------
-- Is_Valid_Base64Binary --
---------------------------
function Is_Valid_Base64Binary
(Value : Unicode.CES.Byte_Sequence) return Boolean
is
Index : Integer := Value'First;
C : Unicode_Char;
Prev_Is_Space : Boolean := False;
Group : Natural := 1;
-- Characters are always by groups of 4, this variable indicates the
-- index of the current char in the group
type Char_Categorie is (Char_04, Char_16, Char_64, Char_Equal);
Chars : array (1 .. 4) of Char_Categorie;
-- The various categories that characters can belong two. In the Base64
-- encoding, we always have groups of 4 characters.
begin
while Index <= Value'Last loop
Sax.Encodings.Encoding.Read (Value, Index, C);
if C = 16#20# or C = 16#A# then
if Prev_Is_Space then
return False; -- Can never have two spaces in a row
end if;
Prev_Is_Space := True;
elsif C in B04'Range and then B04 (C) then
Prev_Is_Space := False;
Chars (Group) := Char_04;
Group := Group + 1;
elsif C in B16'Range and then B16 (C) then
Prev_Is_Space := False;
Chars (Group) := Char_16;
Group := Group + 1;
elsif C in B64'Range and then B64 (C) then
Prev_Is_Space := False;
Chars (Group) := Char_64;
Group := Group + 1;
elsif C = Character'Pos ('=') then
Prev_Is_Space := False;
if Group = 3
and then Chars (1) <= Char_64
and then Chars (2) = Char_04
then
Chars (Group) := Char_Equal;
Group := Group + 1;
elsif Group = 4
and then Chars (1) <= Char_64
and then Chars (2) <= Char_64
and then Chars (3) <= Char_16
then
Group := 1;
exit; -- Must end now
elsif Group = 4
and then Chars (1) <= Char_64
and then Chars (2) <= Char_04
and then Chars (3) <= Char_Equal
then
Group := 1;
exit; -- Must end now
else
return False;
end if;
else
return False;
end if;
if Group > 4 then
Group := 1;
end if;
end loop;
-- Cannot finish with a space
if Prev_Is_Space or Group /= 1 or Index <= Value'Last then
return False;
end if;
return True;
end Is_Valid_Base64Binary;
--------------------------
-- Collapse_Whitespaces --
--------------------------
function Collapse_Whitespaces (Str : String) return String is
Start : Integer := Str'First;
begin
-- Find first non-whitespace character in Str
while Start <= Str'Last
and then Is_White_Space (Character'Pos (Str (Start)))
loop
Start := Start + 1;
end loop;
-- Then remove series of contiguous whitespaces
declare
Normalized : String (1 .. Str'Last - Start + 1);
Index : Integer := Normalized'First;
begin
while Start <= Str'Last loop
if Is_White_Space (Character'Pos (Str (Start))) then
-- Remove series of contiguous whitespaces
if Start = Str'First
or else not Is_White_Space (Character'Pos (Str (Start - 1)))
then
Normalized (Index) := ' ';
Index := Index + 1;
end if;
else
Normalized (Index) := Str (Start);
Index := Index + 1;
end if;
Start := Start + 1;
end loop;
if Index = Normalized'First then
return "";
elsif Normalized (Index - 1) = ' ' then
return Normalized (Normalized'First .. Index - 2);
else
return Normalized (Normalized'First .. Index - 1);
end if;
end;
end Collapse_Whitespaces;
----------
-- Free --
----------
procedure Free (NS : in out XML_NS) is
procedure Free_NS is new Ada.Unchecked_Deallocation
(XML_NS_Record, XML_NS);
Tmp : XML_NS;
begin
while NS /= null loop
Tmp := NS.Next;
Free_NS (NS);
NS := Tmp;
end loop;
end Free;
-------------
-- Get_URI --
-------------
function Get_URI (NS : XML_NS) return Symbol is
begin
if NS = null then
return Empty_String;
else
return NS.URI;
end if;
end Get_URI;
----------------
-- Get_Prefix --
----------------
function Get_Prefix (NS : XML_NS) return Symbol is
begin
if NS = null then
return Empty_String;
else
return NS.Prefix;
end if;
end Get_Prefix;
-------------------
-- Element_Count --
-------------------
function Element_Count (NS : XML_NS) return Natural is
Tmp : XML_NS := NS;
begin
if NS = null then
return 0;
else
while Tmp.Same_As /= null loop
Tmp := Tmp.Same_As;
end loop;
return Tmp.Use_Count;
end if;
end Element_Count;
---------------------
-- Increment_Count --
---------------------
procedure Increment_Count (NS : XML_NS) is
Tmp : XML_NS := NS;
begin
while Tmp.Same_As /= null loop
Tmp := Tmp.Same_As;
end loop;
Tmp.Use_Count := Tmp.Use_Count + 1;
end Increment_Count;
------------------
-- Next_In_List --
------------------
function Next_In_List (NS : XML_NS) return XML_NS is
begin
return NS.Next;
end Next_In_List;
---------------------
-- Find_NS_In_List --
---------------------
function Find_NS_In_List
(List : XML_NS;
Prefix : Sax.Symbols.Symbol;
Include_Default_NS : Boolean := True;
List_Is_From_Element : Boolean) return XML_NS
is
NS : XML_NS := List;
begin
while NS /= No_XML_NS loop
if (Include_Default_NS
or else not List_Is_From_Element
or else NS.Prefix /= Empty_String)
and then NS.Prefix = Prefix
then
return NS;
end if;
NS := NS.Next;
end loop;
return null;
end Find_NS_In_List;
------------------------------
-- Find_NS_From_URI_In_List --
------------------------------
function Find_NS_From_URI_In_List
(List : XML_NS; URI : Sax.Symbols.Symbol) return XML_NS
is
NS : XML_NS := List;
begin
while NS /= No_XML_NS loop
if NS.URI = URI then
return NS;
end if;
NS := NS.Next;
end loop;
return null;
end Find_NS_From_URI_In_List;
--------------------
-- Add_NS_To_List --
--------------------
procedure Add_NS_To_List
(List : in out XML_NS;
Same_As : XML_NS := No_XML_NS;
Prefix, URI : Symbol)
is
begin
List := new XML_NS_Record'
(Prefix => Prefix,
URI => URI,
System_Id => No_Symbol,
Same_As => Same_As,
Use_Count => 0,
Next => List);
end Add_NS_To_List;
--------------
-- Allocate --
--------------
function Allocate return Symbol_Table is
S : Symbol_Table_Access;
begin
S := new Symbol_Table_Record;
return Symbol_Table_Pointers.Allocate (S);
end Allocate;
----------
-- Find --
----------
function Find
(Table : Symbol_Table; Str : Unicode.CES.Byte_Sequence)
return Sax.Symbols.Symbol is
begin
return Find (Symbol_Table_Pointers.Get (Table), Str);
end Find;
-------------
-- Convert --
-------------
function Convert
(Table : Symbol_Table; Sym : Sax.Symbols.Symbol)
return Sax.Symbols.Symbol is
begin
if Sym = No_Symbol then
return No_Symbol;
else
return Find (Table, Get (Sym).all);
end if;
end Convert;
-------------------
-- Set_System_Id --
-------------------
procedure Set_System_Id (NS : XML_NS; System_Id : Sax.Symbols.Symbol) is
begin
NS.System_Id := System_Id;
end Set_System_Id;
-------------------
-- Get_System_Id --
-------------------
function Get_System_Id (NS : XML_NS) return Sax.Symbols.Symbol is
begin
return NS.System_Id;
end Get_System_Id;
-------------------
-- For_Each_Item --
-------------------
procedure For_Each_Item (Ch : Unicode.CES.Byte_Sequence) is
Index : Integer := Ch'First;
Last, Start : Integer;
C : Unicode_Char;
begin
-- Ch might be from an attribute (in which case it might have been
-- normalized first), or for the value of a mixed element, in which case
-- no element has taken place. We therefore need to skip initial spaces
while Index <= Ch'Last loop
-- Skip leading spaces
while Index <= Ch'Last loop
Start := Index;
Encoding.Read (Ch, Index, C);
exit when not Is_White_Space (C);
Start := Ch'Last + 1;
end loop;
exit when Start > Ch'Last;
-- Move to first whitespace after word
while Index <= Ch'Last loop
Last := Index;
Encoding.Read (Ch, Index, C);
exit when Index > Ch'Last or else Is_White_Space (C);
end loop;
if Index > Ch'Last and then not Is_White_Space (C) then
Callback (Ch (Start .. Index - 1));
exit;
else
Callback (Ch (Start .. Last - 1)); -- Last points to a whitespace
end if;
end loop;
end For_Each_Item;
end Sax.Utils;