------------------------------------------------------------------------------
-- XML/Ada - An XML suite for Ada95 --
-- --
-- Copyright (C) 2001-2012, AdaCore --
-- --
-- This library is free software; you can redistribute it and/or modify it --
-- under terms of the GNU General Public License as published by the Free --
-- Software Foundation; either version 3, or (at your option) any later --
-- version. This library is distributed in the hope that it will be useful, --
-- but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHAN- --
-- TABILITY or FITNESS FOR A PARTICULAR PURPOSE. --
-- --
-- --
-- --
-- --
-- --
-- You should have received a copy of the GNU General Public License and --
-- a copy of the GCC Runtime Library Exception along with this program; --
-- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see --
-- . --
-- --
------------------------------------------------------------------------------
--
-- This package provides a hierarchy of objects that return characters
-- that can then be used for different tasks.
-- It is not possible to go backward, nor to previous characters. This
-- interface is intentionally kept minimal, so that it can easily be used
-- with files, sockets, ...
--
-- Input sources should try to automatically detect the appropriate encoding
-- to use, for instance by using the byte order mark, if present, of the
-- unicode stream (16#FFFE# or 16#FEFF#).
--
with Unicode;
with Unicode.CES;
with Unicode.CES.Basic_8bit;
with Unicode.CCS;
package Input_Sources is
type input_source is abstract tagged limited private;
-- General object for reading characters, one at a time.
type input_source_access is access all input_source'class;
procedure Next_Char
(From : in out input_source;
C : out Unicode.unicode_char) is abstract;
-- Return a single character from From.
-- This also increments the internal index, so that the nex time this
-- function is called the next character in the stream is returned.
function Eof (From : input_source) return Boolean is abstract;
-- Return True if there is no more character to read on the stream
function Prolog_Size (From : input_source) return Natural;
-- Return the number of characters that were ignored at the beginning
-- of the stream (for instance because they indicated the encoding used
-- in the file).
procedure Set_Encoding
(Input : in out input_source;
Es : Unicode.CES.encoding_scheme);
-- Set the encoding associated with the input stream.
-- This can be used to convert from any type of encoding for the byte
-- sequence (Utf8, Utf16, ..) and any character set (Latin-1, Unicode,..)
-- to unicode characters.
-- Input_Sources are encouraged to guess the encoding whenever possible,
-- but you can override that default at any time.
function Get_Encoding
(Input : input_source) return Unicode.CES.encoding_scheme;
-- Return the encoding scheme associated with the input
procedure Set_Character_Set
(Input : in out input_source;
Cs : Unicode.CCS.character_set);
-- Set the character set associated with the stream.
-- It isn't possible to get the character set automatically for a stream.
-- As a result, the default one is always considered to be Unicode
function Get_Character_Set
(Input : input_source) return Unicode.CCS.character_set;
-- Return the character set associated with the input.
procedure Set_Stream_Encoding
(Input : in out Input_Sources.input_source'class;
Encoding : String);
-- Set the encoding and the character set for the stream associated with
-- Parser.
-- Invalid_Encoding is raised if Encoding is unknown.
-- Encoding should have the form given in an XML file in the "encoding="
-- parameter, for instance "UTF-8", "UTF-16", "ISO-8859-1",...
procedure Set_System_Id
(Input : in out input_source;
Id : Unicode.CES.byte_sequence);
-- Set the system ID associated with the input source.
-- Although this is optional, it is still useful since it can be used to
-- resolve relative URI's from documents. In most cases, this is set
-- automatically when you Open the input, and you can override it after the
-- call to Open.
function Get_System_Id
(Input : input_source) return Unicode.CES.byte_sequence;
-- Return the system Id.
procedure Set_Public_Id
(Input : in out input_source;
Id : Unicode.CES.byte_sequence);
-- This will be provided as part of the location information, if it is
-- given. In most cases, this is done automatically when you Open the
-- input, and you can override it after the call to Open.
function Get_Public_Id
(Input : input_source) return Unicode.CES.byte_sequence;
-- Return the public Id.
procedure Close (Input : in out input_source);
-- Free the memory allocated in the input.
private
type input_source is abstract tagged limited record
Prolog_Size : Natural := 0;
Es : Unicode.CES.encoding_scheme :=
Unicode.CES.Basic_8bit.Basic_8bit_Encoding;
Cs : Unicode.CCS.character_set := Unicode.CCS.Unicode_Character_Set;
Public_Id : Unicode.CES.byte_sequence_access;
System_Id : Unicode.CES.byte_sequence_access;
end record;
end Input_Sources;