------------------------------------------------------------------------------ -- XML/Ada - An XML suite for Ada95 -- -- -- -- Copyright (C) 2001-2012, AdaCore -- -- -- -- This library is free software; you can redistribute it and/or modify it -- -- under terms of the GNU General Public License as published by the Free -- -- Software Foundation; either version 3, or (at your option) any later -- -- version. This library is distributed in the hope that it will be useful, -- -- but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHAN- -- -- TABILITY or FITNESS FOR A PARTICULAR PURPOSE. -- -- -- -- -- -- -- -- -- -- -- -- You should have received a copy of the GNU General Public License and -- -- a copy of the GCC Runtime Library Exception along with this program; -- -- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -- -- . -- -- -- ------------------------------------------------------------------------------ -- -- This package provides a hierarchy of objects that return characters -- that can then be used for different tasks. -- It is not possible to go backward, nor to previous characters. This -- interface is intentionally kept minimal, so that it can easily be used -- with files, sockets, ... -- -- Input sources should try to automatically detect the appropriate encoding -- to use, for instance by using the byte order mark, if present, of the -- unicode stream (16#FFFE# or 16#FEFF#). -- with Unicode; with Unicode.CES; with Unicode.CES.Basic_8bit; with Unicode.CCS; package Input_Sources is type input_source is abstract tagged limited private; -- General object for reading characters, one at a time. type input_source_access is access all input_source'class; procedure Next_Char (From : in out input_source; C : out Unicode.unicode_char) is abstract; -- Return a single character from From. -- This also increments the internal index, so that the nex time this -- function is called the next character in the stream is returned. function Eof (From : input_source) return Boolean is abstract; -- Return True if there is no more character to read on the stream function Prolog_Size (From : input_source) return Natural; -- Return the number of characters that were ignored at the beginning -- of the stream (for instance because they indicated the encoding used -- in the file). procedure Set_Encoding (Input : in out input_source; Es : Unicode.CES.encoding_scheme); -- Set the encoding associated with the input stream. -- This can be used to convert from any type of encoding for the byte -- sequence (Utf8, Utf16, ..) and any character set (Latin-1, Unicode,..) -- to unicode characters. -- Input_Sources are encouraged to guess the encoding whenever possible, -- but you can override that default at any time. function Get_Encoding (Input : input_source) return Unicode.CES.encoding_scheme; -- Return the encoding scheme associated with the input procedure Set_Character_Set (Input : in out input_source; Cs : Unicode.CCS.character_set); -- Set the character set associated with the stream. -- It isn't possible to get the character set automatically for a stream. -- As a result, the default one is always considered to be Unicode function Get_Character_Set (Input : input_source) return Unicode.CCS.character_set; -- Return the character set associated with the input. procedure Set_Stream_Encoding (Input : in out Input_Sources.input_source'class; Encoding : String); -- Set the encoding and the character set for the stream associated with -- Parser. -- Invalid_Encoding is raised if Encoding is unknown. -- Encoding should have the form given in an XML file in the "encoding=" -- parameter, for instance "UTF-8", "UTF-16", "ISO-8859-1",... procedure Set_System_Id (Input : in out input_source; Id : Unicode.CES.byte_sequence); -- Set the system ID associated with the input source. -- Although this is optional, it is still useful since it can be used to -- resolve relative URI's from documents. In most cases, this is set -- automatically when you Open the input, and you can override it after the -- call to Open. function Get_System_Id (Input : input_source) return Unicode.CES.byte_sequence; -- Return the system Id. procedure Set_Public_Id (Input : in out input_source; Id : Unicode.CES.byte_sequence); -- This will be provided as part of the location information, if it is -- given. In most cases, this is done automatically when you Open the -- input, and you can override it after the call to Open. function Get_Public_Id (Input : input_source) return Unicode.CES.byte_sequence; -- Return the public Id. procedure Close (Input : in out input_source); -- Free the memory allocated in the input. private type input_source is abstract tagged limited record Prolog_Size : Natural := 0; Es : Unicode.CES.encoding_scheme := Unicode.CES.Basic_8bit.Basic_8bit_Encoding; Cs : Unicode.CCS.character_set := Unicode.CCS.Unicode_Character_Set; Public_Id : Unicode.CES.byte_sequence_access; System_Id : Unicode.CES.byte_sequence_access; end record; end Input_Sources;