------------------------------------------------------------------------------
-- XML/Ada - An XML suite for Ada95 --
-- --
-- Copyright (C) 2005-2012, AdaCore --
-- --
-- This library is free software; you can redistribute it and/or modify it --
-- under terms of the GNU General Public License as published by the Free --
-- Software Foundation; either version 3, or (at your option) any later --
-- version. This library is distributed in the hope that it will be useful, --
-- but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHAN- --
-- TABILITY or FITNESS FOR A PARTICULAR PURPOSE. --
-- --
-- --
-- --
-- --
-- --
-- You should have received a copy of the GNU General Public License and --
-- a copy of the GCC Runtime Library Exception along with this program; --
-- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see --
-- . --
-- --
------------------------------------------------------------------------------
with Ada.Unchecked_Deallocation;
with Interfaces; use Interfaces;
with Unicode.CES;
use Unicode, Unicode.CES;
with Sax.Encodings; use Sax.Encodings;
with Sax.Symbols; use Sax.Symbols;
with Unicode.Names.Basic_Latin; use Unicode.Names.Basic_Latin;
package body Sax.Utils is
type unichar_boolean_array is array (unicode_char range <>) of Boolean;
pragma pack (unichar_boolean_array);
Valid_URI_Characters : constant unichar_boolean_array (16#00# .. 16#9F#) :=
(Digit_Zero .. Digit_Nine => True,
Latin_Capital_Letter_A .. Latin_Capital_Letter_Z => True,
Latin_Small_Letter_A .. Latin_Small_Letter_Z => True,
Opening_Parenthesis | Closing_Parenthesis => True,
Percent_Sign => True,
Plus_Sign => True,
Comma => True,
Hyphen_Minus => True,
Dot => True,
Colon => True,
Equals_Sign => True,
Commercial_At => True,
Semicolon => True,
Dollar_Sign => True,
Spacing_Underscore => True,
Exclamation_Mark => True,
Star => True,
Apostrophe => True,
Question_Mark => True,
Slash => True,
Pound_Sign => True,
Tilde => True,
-- 16#A0# .. 16#D7FF# => True, -- ucschars from RFC 3987
-- 16#F900# .. 16#FDCF# => True,
-- 16#FDF0# .. 16#FFEF# => True,
-- 16#10000# .. 16#1FFFD# => True,
-- 16#20000# .. 16#2FFFD# => True,
-- 16#30000# .. 16#3FFFD# => True,
-- 16#40000# .. 16#4FFFD# => True,
-- 16#50000# .. 16#5FFFD# => True,
-- 16#60000# .. 16#6FFFD# => True,
-- 16#70000# .. 16#7FFFD# => True,
-- 16#80000# .. 16#8FFFD# => True,
-- 16#90000# .. 16#9FFFD# => True,
-- 16#A0000# .. 16#AFFFD# => True,
-- 16#B0000# .. 16#BFFFD# => True,
-- 16#C0000# .. 16#CFFFD# => True,
-- 16#D0000# .. 16#DFFFD# => True,
-- 16#E0000# .. 16#EFFFD# => True,
others => False);
-- Rules based on RFC 2141, at http://rfc.net/rfc2141.html,
-- completed with rules from Uniformed Resource Identifier at
-- http://www.gbiv.com/protocols/uri/rfc/rfc3986.html
B64 : constant array (unicode_char range 32 .. 128) of Boolean :=
(Character'pos ('A') .. Character'pos ('Z') => True,
Character'pos ('a') .. Character'pos ('z') => True,
Character'pos ('0') .. Character'pos ('9') => True,
Character'pos ('+') => True,
Character'pos ('/') => True,
others => False);
B04 : constant array (unicode_char range 32 .. 128) of Boolean :=
(Character'pos ('A') => True,
Character'pos ('Q') => True,
Character'pos ('g') => True,
Character'pos ('w') => True,
others => False);
B16 : constant array (unicode_char range 32 .. 128) of Boolean :=
(Character'pos ('A') => True,
Character'pos ('E') => True,
Character'pos ('I') => True,
Character'pos ('M') => True,
Character'pos ('Q') => True,
Character'pos ('U') => True,
Character'pos ('Y') => True,
Character'pos ('c') => True,
Character'pos ('g') => True,
Character'pos ('k') => True,
Character'pos ('o') => True,
Character'pos ('s') => True,
Character'pos ('w') => True,
Character'pos ('0') => True,
Character'pos ('4') => True,
Character'pos ('8') => True,
others => False);
-- Whether the character matches the Base64Binary definitions
----------------------------
-- Is_Valid_Language_Name --
----------------------------
function Is_Valid_Language_Name
(Lang : Unicode.CES.byte_sequence) return Boolean
is
C : unicode_char := Space;
Index : Natural := Lang'first;
Count : Natural := 0;
Subtag : Natural := 1;
Allow_Digit : Boolean := False;
begin
-- See http://www.ietf.org/rfc/rfc3066.tx
-- Language-Tag = Primary-subtag *( "-" Subtag )
-- Primary-subtag = 1*8ALPHA
-- Subtag = 1*8(ALPHA / DIGIT)
-- In addition, it seems that the length of subtags is not necessarily
-- limited to 8 characters, given the XML conformance testsuite test
-- sun/valid/v-lang04.xml
while Index <= Lang'last loop
Encoding.Read (Lang, Index, C);
if C = Hyphen_Minus then
if Count = 0 or else (Subtag <= 2 and Count > 8) then
-- Too many characters
return False;
else
Allow_Digit := True;
Count := 0;
Subtag := Subtag + 1;
end if;
elsif C not in Latin_Small_Letter_A .. Latin_Small_Letter_Z
and then C not in Latin_Capital_Letter_A .. Latin_Capital_Letter_Z
and then (not Allow_Digit or else C not in Digit_Zero .. Digit_Nine)
then
return False;
else
Count := Count + 1;
end if;
end loop;
if Count = 0 or else (Subtag <= 2 and Count > 8) then
-- Too many characters
return False;
end if;
return True;
end Is_Valid_Language_Name;
------------------------
-- Is_Valid_Name_Char --
------------------------
function Is_Valid_Name_Char
(Char : Unicode.unicode_char;
Version : xml_versions := xml_1_1) return Boolean
is
begin
-- ??? Should we create a single lookup table for all of these, that
-- would be more efficient
case Version is
when xml_1_0_third_edition | xml_1_0_fourth_edition =>
return Char = Period
or else Char = Hyphen_Minus
or else Char = Spacing_Underscore
or else Char = Colon
or else Is_Digit (Char)
or else Is_Letter (Char)
or else Is_Combining_Char (Char)
or else Is_Extender (Char);
when xml_1_0_fifth_edition | xml_1_0 | xml_1_1 =>
return Char = Hyphen_Minus
or else Char = Period
or else (Char in Digit_Zero .. Digit_Nine)
or else Char = 16#B7#
or else (Char in 16#0300# .. 16#036F#)
or else (Char in 16#203F# .. 16#2040#)
or else Is_Valid_Name_Startchar (Char, Version);
end case;
end Is_Valid_Name_Char;
-----------------------------
-- Is_Valid_Name_Startchar --
-----------------------------
function Is_Valid_Name_Startchar
(Char : Unicode.unicode_char;
Version : xml_versions := xml_1_1) return Boolean
is
begin
case Version is
when xml_1_0_third_edition | xml_1_0_fourth_edition =>
return Char = Spacing_Underscore or else Is_Letter (Char);
when xml_1_0_fifth_edition | xml_1_0 | xml_1_1 =>
return Char = Colon
or else Char = Spacing_Underscore
or else Char in Latin_Capital_Letter_A .. Latin_Capital_Letter_Z
or else Char in Latin_Small_Letter_A .. Latin_Small_Letter_Z
or else Char in 16#C0# .. 16#D6#
or else Char in 16#D8# .. 16#F6#
or else Char in 16#F8# .. 16#2FF#
or else Char in 16#370# .. 16#37D#
or else Char in 16#37F# .. 16#1FFF#
or else Char in 16#200C# .. 16#200D#
or else Char in 16#2070# .. 16#218F#
or else Char in 16#2C00# .. 16#2FEF#
or else Char in 16#3001# .. 16#D7FF#
or else Char in 16#F900# .. 16#FDCF#
or else Char in 16#FDF0# .. 16#FFFD#
or else Char in 16#10000# .. 16#EFFFF#;
end case;
end Is_Valid_Name_Startchar;
--------------------------
-- Is_Valid_NCname_Char --
--------------------------
function Is_Valid_NCname_Char
(Char : Unicode.unicode_char;
Version : xml_versions := xml_1_1) return Boolean
is
begin
case Version is
when xml_1_0_third_edition | xml_1_0_fourth_edition =>
return Char = Period
or else Char = Hyphen_Minus
or else Char = Spacing_Underscore
or else Is_Digit (Char)
or else Is_Letter (Char)
or else Is_Combining_Char (Char)
or else Is_Extender (Char);
when xml_1_0_fifth_edition | xml_1_0 | xml_1_1 =>
return Is_Valid_Name_Char (Char, Version);
end case;
end Is_Valid_NCname_Char;
----------------------
-- Is_Valid_Nmtoken --
----------------------
function Is_Valid_Nmtoken
(Nmtoken : Unicode.CES.byte_sequence;
Version : xml_versions := xml_1_1) return Boolean
is
C : unicode_char;
Index : Natural := Nmtoken'first;
begin
while Index <= Nmtoken'last loop
Encoding.Read (Nmtoken, Index, C);
if not Is_Valid_Name_Char (C, Version) then
return False;
end if;
end loop;
return True;
end Is_Valid_Nmtoken;
-----------------------
-- Is_Valid_Nmtokens --
-----------------------
function Is_Valid_Nmtokens
(Nmtokens : Unicode.CES.byte_sequence;
Version : xml_versions := xml_1_1) return Boolean
is
C : unicode_char;
Index : Natural := Nmtokens'first;
begin
if Nmtokens'length = 0 then
return False;
end if;
while Index <= Nmtokens'last loop
Encoding.Read (Nmtokens, Index, C);
if C /= Space and then not Is_Valid_Name_Char (C, Version) then
return False;
end if;
end loop;
return True;
end Is_Valid_Nmtokens;
-------------------
-- Is_Valid_Name --
-------------------
function Is_Valid_Name
(Name : Unicode.CES.byte_sequence;
Version : xml_versions := xml_1_1) return Boolean
is
C : unicode_char;
Index : Natural := Name'first;
begin
if Name = "" then
return False;
end if;
Encoding.Read (Name, Index, C);
if C /= Colon and then not Is_Valid_Name_Startchar (C, Version) then
return False;
end if;
return Is_Valid_Nmtoken (Name (Index .. Name'last), Version);
end Is_Valid_Name;
--------------------
-- Is_Valid_Names --
--------------------
function Is_Valid_Names
(Name : Unicode.CES.byte_sequence;
Version : xml_versions := xml_1_1) return Boolean
is
C : unicode_char;
Index : Natural := Name'first;
First_In_Name : Boolean := True;
begin
if Name'length = 0 then
return False;
end if;
while Index <= Name'last loop
Encoding.Read (Name, Index, C);
if C = Space then
First_In_Name := True;
elsif First_In_Name then
if not Is_Valid_Name_Startchar (C, Version) then
return False;
end if;
First_In_Name := False;
elsif not Is_Valid_Name_Char (C, Version) then
return False;
end if;
end loop;
return True;
end Is_Valid_Names;
----------------------
-- Is_Valid_NCnames --
----------------------
function Is_Valid_NCnames
(Name : Unicode.CES.byte_sequence;
Version : xml_versions := xml_1_1) return Boolean
is
C : unicode_char;
Index : Natural := Name'first;
First_In_Name : Boolean := True;
begin
if Name'length = 0 then
return False;
end if;
while Index <= Name'last loop
Encoding.Read (Name, Index, C);
if C = Space then
First_In_Name := True;
elsif First_In_Name then
if C /= Spacing_Underscore and then not Is_Letter (C) then
return False;
end if;
First_In_Name := False;
elsif not Is_Valid_NCname_Char (C, Version) then
return False;
end if;
end loop;
return True;
end Is_Valid_NCnames;
---------------------
-- Is_Valid_NCname --
---------------------
function Is_Valid_NCname
(Name : Unicode.CES.byte_sequence;
Version : xml_versions := xml_1_1) return Boolean
is
C : unicode_char;
Index : Natural := Name'first;
begin
if Name'length = 0 then
return False;
end if;
Encoding.Read (Name, Index, C);
if not Is_Valid_Name_Startchar (C, Version) then
return False;
end if;
while Index <= Name'last loop
Encoding.Read (Name, Index, C);
if not Is_Valid_NCname_Char (C, Version) then
return False;
end if;
end loop;
return True;
end Is_Valid_NCname;
--------------------
-- Is_Valid_QName --
--------------------
function Is_Valid_QName
(Name : Unicode.CES.byte_sequence;
Version : xml_versions := xml_1_1) return Boolean
is
begin
for N in Name'range loop
if Name (N) = ':' then
return N /= Name'last
and then Is_Valid_NCname (Name (Name'first .. N - 1), Version)
and then Is_Valid_NCname (Name (N + 1 .. Name'last), Version);
end if;
end loop;
return Is_Valid_NCname (Name, Version);
end Is_Valid_QName;
----------
-- Hash --
----------
function Hash
(Key : Unicode.CES.byte_sequence) return Interfaces.Unsigned_32
is
type uns is mod 2**32;
function Rotate_Left (Value : uns; Amount : Natural) return uns;
pragma import (Intrinsic, Rotate_Left);
Tmp : uns := 0;
begin
for J in Key'range loop
Tmp := Rotate_Left (Tmp, 1) + Character'pos (Key (J));
end loop;
return Interfaces.Unsigned_32 (Tmp);
end Hash;
function Hash
(Key : Unicode.CES.byte_sequence_access) return Interfaces.Unsigned_32
is
begin
return Hash (Key.all);
end Hash;
-----------
-- Equal --
-----------
function Equal (S1, S2 : Unicode.CES.byte_sequence_access) return Boolean is
begin
return S1.all = S2.all;
end Equal;
-----------------
-- Split_Qname --
-----------------
function Split_Qname (Qname : Unicode.CES.byte_sequence) return Integer is
begin
-- ??? This function assumes we are using UTF8 internally
for Q in Qname'range loop
if Qname (Q) = ':' then
return Q;
end if;
end loop;
return Qname'first - 1;
end Split_Qname;
---------------
-- Check_URI --
---------------
function Check_URI
(Name : Unicode.CES.byte_sequence;
Version : xml_versions := xml_1_1) return uri_type
is
Index : Integer := Name'first;
C : unicode_char;
Has_Scheme : Boolean := False;
Has_Hash : Boolean := False;
begin
-- This is RFC 3986 which obsoletes RFC 2396.
-- URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
-- hier-part = "//" authority path-abempty
-- / path-absolute
-- / path-rootless
-- / path-empty
-- URI-reference = URI / relative-ref
-- absolute-URI = scheme ":" hier-part [ "?" query ]
-- relative-ref = relative-part [ "?" query ] [ "#" fragment ]
-- relative-part = "//" authority path-abempty
-- / path-absolute
-- / path-noscheme
-- / path-empty
-- scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
-- authority = [ userinfo "@" ] host [ ":" port ]
-- userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
-- host = IP-literal / IPv4address / reg-name
-- port = *DIGIT
-- IP-literal = "[" ( IPv6address / IPvFuture ) "]"
-- reg-name = *( unreserved / pct-encoded / sub-delims )
-- path = path-abempty ; begins with "/" or is empty
-- / path-absolute ; begins with "/" but not "//"
-- / path-noscheme ; begins with a non-colon segment
-- / path-rootless ; begins with a segment
-- / path-empty ; zero characters
-- path-abempty = *( "/" segment )
-- path-absolute = "/" [ segment-nz *( "/" segment ) ]
-- path-noscheme = segment-nz-nc *( "/" segment )
-- path-rootless = segment-nz *( "/" segment )
-- path-empty = 0
-- segment = *pchar
-- segment-nz = 1*pchar
-- segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
-- ; non-zero-length segment without any colon ":"
-- pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
-- query = *( pchar / "/" / "?" )
-- fragment = *( pchar / "/" / "?" )
-- pct-encoded = "%" HEXDIG HEXDIG
-- unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
-- reserved = gen-delims / sub-delims
-- gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
-- sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
-- / "*" / "+" / "," / ";" / "="
-- The relativeURI rule has been replaced with relative-ref.
-- Find and test the scheme. If there is no scheme, we might have a
-- relative URI
while Index <= Name'last loop
Encoding.Read (Name, Index, C);
if C = Colon then
Has_Scheme := True;
exit;
elsif C not in Character'pos ('a') .. Character'pos ('z')
and then C not in Character'pos ('A') .. Character'pos ('Z')
and then C not in Character'pos ('0') .. Character'pos ('9')
and then C /= Character'pos ('+')
and then C /= Character'pos ('-')
and then C /= Character'pos ('.')
then
Has_Scheme := False;
exit;
end if;
end loop;
-- For a mailto:, nothing else to check
if Has_Scheme
and then Name (Name'first .. Index - 1) = Mailto_Sequence
then
return uri_absolute;
end if;
-- Check the rest of the URI. We currently go for a fast check, and do
-- not check each of the components specifically.
while Index <= Name'last loop
Encoding.Read (Name, Index, C);
if C = Unicode.Names.Basic_Latin.Hash then
if Has_Hash then
-- Two hashes => Invalid URI
return uri_none;
end if;
Has_Hash := True;
-- RFC3987 authorizes a big range of UCSchars, excluding only some of
-- the characters. We'll just accept them here, no point in wasting
-- time for a case that will never occur in practice
elsif Version /= xml_1_1
and then
(C not in Valid_URI_Characters'range
or else not Valid_URI_Characters (C))
then
return uri_none;
elsif Version = xml_1_1
and then
((C in Valid_URI_Characters'range
and then not Valid_URI_Characters (C))
or else (C >= 16#D800# and then C <= 16#FDEF#)
or else (C >= 16#FFF0# and then C <= 16#FFFF#))
then
return uri_none;
end if;
end loop;
if Has_Scheme then
return uri_absolute;
else
return uri_relative_ref;
end if;
end Check_URI;
------------------
-- Is_Valid_URI --
------------------
function Is_Valid_URI (Name : Unicode.CES.byte_sequence) return Boolean is
begin
return Check_URI (Name) /= uri_none;
end Is_Valid_URI;
------------------
-- Is_Valid_URN --
------------------
function Is_Valid_URN (Name : Unicode.CES.byte_sequence) return Boolean is
begin
-- format is "urn:" ":"
-- NID: Namespace Identifier
-- NSS: Namespace Specific String
-- ??? Note that this implementation makes optional as it is
-- often the case in current usage.
-- Leading sequence should be case insensitive
if Name'length < URN_Sequence'length
or else
Name (Name'first .. Name'first + URN_Sequence'length - 1) /=
URN_Sequence
then
return False;
end if;
return True;
end Is_Valid_URN;
------------------
-- Is_Valid_IRI --
------------------
function Is_Valid_IRI
(Name : Unicode.CES.byte_sequence;
Version : xml_versions := xml_1_1) return Boolean
is
begin
return Check_URI (Name, Version) = uri_absolute
or else Is_Valid_URN (Name);
end Is_Valid_IRI;
---------------------------
-- Contains_URI_Fragment --
---------------------------
function Contains_URI_Fragment
(Name : Unicode.CES.byte_sequence) return Boolean
is
Index : Integer := Name'first;
C : unicode_char;
begin
while Index <= Name'last loop
Encoding.Read (Name, Index, C);
if C = Pound_Sign then
return True;
end if;
end loop;
return False;
end Contains_URI_Fragment;
------------------------
-- Is_Valid_HexBinary --
------------------------
function Is_Valid_HexBinary
(Str : Unicode.CES.byte_sequence) return Boolean
is
Index : Integer := Str'first;
C : unicode_char;
begin
while Index <= Str'last loop
Encoding.Read (Str, Index, C);
if C not in Character'pos ('0') .. Character'pos ('9')
and then C not in Character'pos ('a') .. Character'pos ('f')
and then C not in Character'pos ('A') .. Character'pos ('F')
then
return False;
end if;
end loop;
return True;
end Is_Valid_HexBinary;
---------------------------
-- Is_Valid_Base64Binary --
---------------------------
function Is_Valid_Base64Binary
(Value : Unicode.CES.byte_sequence) return Boolean
is
Index : Integer := Value'first;
C : unicode_char;
Prev_Is_Space : Boolean := False;
Group : Natural := 1;
-- Characters are always by groups of 4, this variable indicates the
-- index of the current char in the group
type char_categorie is (char_04, char_16, char_64, char_equal);
Chars : array (1 .. 4) of char_categorie;
-- The various categories that characters can belong two. In the Base64
-- encoding, we always have groups of 4 characters.
begin
while Index <= Value'last loop
Sax.Encodings.Encoding.Read (Value, Index, C);
if C = 16#20# or C = 16#A# then
if Prev_Is_Space then
return False; -- Can never have two spaces in a row
end if;
Prev_Is_Space := True;
elsif C in B04'range and then B04 (C) then
Prev_Is_Space := False;
Chars (Group) := char_04;
Group := Group + 1;
elsif C in B16'range and then B16 (C) then
Prev_Is_Space := False;
Chars (Group) := char_16;
Group := Group + 1;
elsif C in B64'range and then B64 (C) then
Prev_Is_Space := False;
Chars (Group) := char_64;
Group := Group + 1;
elsif C = Character'pos ('=') then
Prev_Is_Space := False;
if Group = 3
and then Chars (1) <= char_64
and then Chars (2) = char_04
then
Chars (Group) := char_equal;
Group := Group + 1;
elsif Group = 4
and then Chars (1) <= char_64
and then Chars (2) <= char_64
and then Chars (3) <= char_16
then
Group := 1;
exit; -- Must end now
elsif Group = 4
and then Chars (1) <= char_64
and then Chars (2) <= char_04
and then Chars (3) <= char_equal
then
Group := 1;
exit; -- Must end now
else
return False;
end if;
else
return False;
end if;
if Group > 4 then
Group := 1;
end if;
end loop;
-- Cannot finish with a space
if Prev_Is_Space or Group /= 1 or Index <= Value'last then
return False;
end if;
return True;
end Is_Valid_Base64Binary;
--------------------------
-- Collapse_Whitespaces --
--------------------------
function Collapse_Whitespaces (Str : String) return String is
Start : Integer := Str'first;
begin
-- Find first non-whitespace character in Str
while Start <= Str'last
and then Is_White_Space (Character'pos (Str (Start)))
loop
Start := Start + 1;
end loop;
-- Then remove series of contiguous whitespaces
declare
Normalized : String (1 .. Str'last - Start + 1);
Index : Integer := Normalized'first;
begin
while Start <= Str'last loop
if Is_White_Space (Character'pos (Str (Start))) then
-- Remove series of contiguous whitespaces
if Start = Str'first
or else not Is_White_Space (Character'pos (Str (Start - 1)))
then
Normalized (Index) := ' ';
Index := Index + 1;
end if;
else
Normalized (Index) := Str (Start);
Index := Index + 1;
end if;
Start := Start + 1;
end loop;
if Index = Normalized'first then
return "";
elsif Normalized (Index - 1) = ' ' then
return Normalized (Normalized'first .. Index - 2);
else
return Normalized (Normalized'first .. Index - 1);
end if;
end;
end Collapse_Whitespaces;
----------
-- Free --
----------
procedure Free (NS : in out xml_ns) is
procedure Free_NS is new Ada.Unchecked_Deallocation
(xml_ns_record,
xml_ns);
Tmp : xml_ns;
begin
while NS /= null loop
Tmp := NS.Next;
Free_NS (NS);
NS := Tmp;
end loop;
end Free;
-------------
-- Get_URI --
-------------
function Get_URI (NS : xml_ns) return symbol is
begin
if NS = null then
return Empty_String;
else
return NS.URI;
end if;
end Get_URI;
----------------
-- Get_Prefix --
----------------
function Get_Prefix (NS : xml_ns) return symbol is
begin
if NS = null then
return Empty_String;
else
return NS.Prefix;
end if;
end Get_Prefix;
-------------------
-- Element_Count --
-------------------
function Element_Count (NS : xml_ns) return Natural is
Tmp : xml_ns := NS;
begin
if NS = null then
return 0;
else
while Tmp.Same_As /= null loop
Tmp := Tmp.Same_As;
end loop;
return Tmp.Use_Count;
end if;
end Element_Count;
---------------------
-- Increment_Count --
---------------------
procedure Increment_Count (NS : xml_ns) is
Tmp : xml_ns := NS;
begin
while Tmp.Same_As /= null loop
Tmp := Tmp.Same_As;
end loop;
Tmp.Use_Count := Tmp.Use_Count + 1;
end Increment_Count;
------------------
-- Next_In_List --
------------------
function Next_In_List (NS : xml_ns) return xml_ns is
begin
return NS.Next;
end Next_In_List;
---------------------
-- Find_NS_In_List --
---------------------
function Find_NS_In_List
(List : xml_ns;
Prefix : Sax.Symbols.symbol;
Include_Default_NS : Boolean := True;
List_Is_From_Element : Boolean) return xml_ns
is
NS : xml_ns := List;
begin
while NS /= No_XML_NS loop
if
(Include_Default_NS
or else not List_Is_From_Element
or else NS.Prefix /= Empty_String)
and then NS.Prefix = Prefix
then
return NS;
end if;
NS := NS.Next;
end loop;
return null;
end Find_NS_In_List;
------------------------------
-- Find_NS_From_URI_In_List --
------------------------------
function Find_NS_From_URI_In_List
(List : xml_ns;
URI : Sax.Symbols.symbol) return xml_ns
is
NS : xml_ns := List;
begin
while NS /= No_XML_NS loop
if NS.URI = URI then
return NS;
end if;
NS := NS.Next;
end loop;
return null;
end Find_NS_From_URI_In_List;
--------------------
-- Add_NS_To_List --
--------------------
procedure Add_NS_To_List
(List : in out xml_ns;
Same_As : xml_ns := No_XML_NS;
Prefix, URI : symbol)
is
begin
List :=
new xml_ns_record'
(Prefix => Prefix,
URI => URI,
System_Id => No_Symbol,
Same_As => Same_As,
Use_Count => 0,
Next => List);
end Add_NS_To_List;
--------------
-- Allocate --
--------------
function Allocate return symbol_table is
S : symbol_table_access;
begin
S := new symbol_table_record;
return Symbol_Table_Pointers.Allocate (S);
end Allocate;
----------
-- Find --
----------
function Find
(Table : symbol_table;
Str : Unicode.CES.byte_sequence) return Sax.Symbols.symbol
is
begin
return Find (Symbol_Table_Pointers.Get (Table), Str);
end Find;
-------------
-- Convert --
-------------
function Convert
(Table : symbol_table;
Sym : Sax.Symbols.symbol) return Sax.Symbols.symbol
is
begin
if Sym = No_Symbol then
return No_Symbol;
else
return Find (Table, Get (Sym).all);
end if;
end Convert;
-------------------
-- Set_System_Id --
-------------------
procedure Set_System_Id (NS : xml_ns; System_Id : Sax.Symbols.symbol) is
begin
NS.System_Id := System_Id;
end Set_System_Id;
-------------------
-- Get_System_Id --
-------------------
function Get_System_Id (NS : xml_ns) return Sax.Symbols.symbol is
begin
return NS.System_Id;
end Get_System_Id;
-------------------
-- For_Each_Item --
-------------------
procedure For_Each_Item (Ch : Unicode.CES.byte_sequence) is
Index : Integer := Ch'first;
Last, Start : Integer;
C : unicode_char;
begin
-- Ch might be from an attribute (in which case it might have been
-- normalized first), or for the value of a mixed element, in which case
-- no element has taken place. We therefore need to skip initial spaces
while Index <= Ch'last loop
-- Skip leading spaces
while Index <= Ch'last loop
Start := Index;
Encoding.Read (Ch, Index, C);
exit when not Is_White_Space (C);
Start := Ch'last + 1;
end loop;
exit when Start > Ch'last;
-- Move to first whitespace after word
while Index <= Ch'last loop
Last := Index;
Encoding.Read (Ch, Index, C);
exit when Index > Ch'last or else Is_White_Space (C);
end loop;
if Index > Ch'last and then not Is_White_Space (C) then
Callback (Ch (Start .. Index - 1));
exit;
else
Callback (Ch (Start .. Last - 1)); -- Last points to a whitespace
end if;
end loop;
end For_Each_Item;
end Sax.Utils;