------------------------------------------------------------------------------ -- XML/Ada - An XML suite for Ada95 -- -- -- -- Copyright (C) 2010-2012, AdaCore -- -- -- -- This library is free software; you can redistribute it and/or modify it -- -- under terms of the GNU General Public License as published by the Free -- -- Software Foundation; either version 3, or (at your option) any later -- -- version. This library is distributed in the hope that it will be useful, -- -- but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHAN- -- -- TABILITY or FITNESS FOR A PARTICULAR PURPOSE. -- -- -- -- -- -- -- -- -- -- -- -- You should have received a copy of the GNU General Public License and -- -- a copy of the GCC Runtime Library Exception along with this program; -- -- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -- -- . -- -- -- ------------------------------------------------------------------------------ -- This package implements state machines (non-deterministic, aka NFA, and -- deterministic, aka DFA). pragma ada_05; with GNAT.Dynamic_Tables; generic type symbol is private; -- The symbols accepted by the state machine as input. type transition_symbol is private; -- One such symbol might be associated with each transition (although this -- is optional) to speed up the processing of the state machine with function Image (Sym : transition_symbol) return String; -- Display Sym. type state_user_data is private; Default_Data : state_user_data; -- User data associated with each state Default_State_Count : Positive := 100; Default_Transition_Count : Positive := 100; -- Default size of the state machine initially package Sax.State_Machines is type state is new Natural; -- range 0 .. 2 ** 16 - 1; -- A state of a state machine type state_data_access is access all state_user_data; ----------------------------------------------- -- Non-deterministic automatons construction -- ----------------------------------------------- type nfa is tagged private; type nfa_access is access all nfa'class; -- A non-deterministic automaton procedure Initialize (Self : in out nfa; States_Are_Statefull : Boolean := False); -- Initializes a new automaton -- If [States_Are_Statefull], the active states's user data will be used to -- perform various things. Otherwise, the exact list of states are -- irrelevant, and we are only interested in the transitions between them. -- This setting affects the way the machine is created in the call to -- [Repeat]. When the setting is True, more empty transitions will have to -- be created. For instance: -- if the transition "a" is to be repeated 0 or 1 time, the state -- machine will be: -- [1]---a---->[2] if not States_Are_Statefull -- \-------/ -- Or -- [1]---a---->[2]---->[3] if States_Are_Statefull -- \----------------/ -- -- So when processing the events, the state [2] might be active initially -- in one of the cases. procedure Free (Self : in out nfa); procedure Free (Automaton : in out nfa_access); -- Free the memory allocated to [Self] type state_callback is access procedure (Machine : access nfa'class; For_State : state); -- A callback when some event occurs on a state function Add_State (Self : access nfa; Data : state_user_data := Default_Data) return state; -- Add a new state into the table. No_State : constant state; Start_State : constant state; Final_State : constant state; -- The start and final states of an automation -- There is a single one of each per automaton, but you can of course -- connect them, through empty transitions, to any number of states within -- [Self], thus making them start states in effect. -- These two states always exist. procedure Set_Data (Self : access nfa; S : state; Data : state_user_data); function Get_Data (Self : access nfa; S : state) return state_data_access; -- Returns an access to the state's user data. This can be modified in -- place, but the access type should not be stored since it still belongs -- to the NFA. -- This API is slightly faster and more convenient than having a -- [Get_User_Data] and [Set_User_Data] set of subprograms. procedure Add_Transition (Self : access nfa; From : state; To : state; On_Symbol : transition_symbol); -- Add a new transition between the two states. -- If the symbol given as input to [Self] matches On_Symbol, a transition -- will occur from [From_State] to [To_State]. -- Both states might be equal. -- You cannot add transitions from the final state procedure Add_Empty_Transition (Self : access nfa; From : state; To : state); -- Indicates that any time [Self] is in [From_State], it should also be -- considered as in [To_State]. Both states are basically equivalent. -- You cannot add transitions from the final state function Repeat (Self : access nfa; From, To : state; Min_Occurs : Natural := 1; Max_Occurs : Natural := 1) return state; -- Modify the automaton to repat the subautomaton From_State .. To_State a -- specific number of times. -- Note that this requires expansion (for instance "e{3,4}" is expanded to -- "eeee?", so requires more states), so Max_Occurs should not be too big. -- -- Here is an example of use (equivalent to 'b{2,3}' in traditional regexp) -- A := N.Add_State; -- B := N.Add_State; -- N.Add_Transition (A, B, 'b'); -- C := N.Repeat (A, B, 2, 3); -- -- On exit, [From] and [To] are still the original sub-automaton. The -- returned value is the end state of the repeated automaton (ie it plays -- the same role as [To] in the original NFA. -- You would connect to the returned state if you have further states to -- add. You should not, however, directly connect from or to any state -- within [From]..[To] (since they might have been duplicated). -- -- No error is reported if Min_Occurs > Max_Occurs. But nothing is done -- either. --------------- -- Snapshots -- --------------- -- A snapshot saves the states and transitions of the machine, and can be -- used to reset the NFA so that all states and transitions added after -- that point are removed. type nfa_snapshot is private; No_NFA_Snapshot : constant nfa_snapshot; function Get_Snapshot (Self : access nfa) return nfa_snapshot; procedure Reset_To_Snapshot (Self : access nfa; Snapshot : nfa_snapshot); -- Saves the list of states and transitions, so that we can later delete -- all states and transitions added after that point. function Exists (Snapshot : nfa_snapshot; S : state) return Boolean; -- Whether [S] existed in [Snapshot] ---------------------------------------- -- Hierarchical finite state machines -- ---------------------------------------- -- It is possible to build hierarchical state machines: in such machines, -- some of the states will contain nested state machines. -- For instance: -- +----2-----+ -- | |---'b'--> 7 -- 1 ------->|-4->5-->6----------> 3 -- | | -- +----------+ -- -- In the case above, the machine could be in both state 2 (the superstate) -- and in state 5 (the inner state). -- When an input is processed, all active states (super and inner) will -- proceed the event. If 5 matches, we might go to 6. Next time, if 6 -- matches, we would exit 2 and go to 3. -- -- But when 2 and 5 are active, it is also possible that 2 itself matches, -- and then we go to 3 whatever inner state we were in at the same time. -- This is the usual behavior (as defined for instance in UML). -- -- The above would be created as follows. Note that this example also does -- not assume that the nested NFA has been created before we create the -- toplevel NFA. -- -- S1 := N.Add_State; S2 := N.Add_State; S3 := N.Add_State; -- N.Add_Transition (S1, S2, ...); -- will enter "2" and nested NFA -- -- so activate S4 -- N.Add_Transition (S2, S7, 'b'); -- will exit nested NFA whatever -- -- state we are in. -- -- Later on we create the nested automaton: -- S4 := N.Add_State; -- S5 := N.Add_State; N.Add_Transition (S4, S5, ...); -- S6 := N.Add_State; N.Add_Transition (S5, S6, ...); -- -- E := N.Create_Nested (S4); -- N.Set_Nested (E); -- Wraps E (we could have several states wrapping -- -- the same nested) -- N.On_Nested_Exit (S2, S3); -- on exit of nested NFA, moves to 3 -- -- It is possible to build state machines that cannot be executed later on: -- if the state machine within 2 is the same as the outer state machine -- (therefore we have a recursive state machine, somewhat), and we have an -- empty transition from 1 to 4, then the initial state would require -- infinite storage for the NFA_Matcher: we start in state 1, which through -- the empty transition is the same as state 4 (and therefore state 2 is -- also active). But in this recursive NFA, state 4 is another instance -- equivalent to state 1. In turn, we have another nested state 1, then -- another,... ad infinitum. So you should always have a non-empty -- transition into a nested state machine (in thhe schema above, transition -- from 1->4 should not be empty. -- -- It is invalid to add a transition from one of the nested states from one -- of the outer states. The nested automaton must be fully independent, -- since it might be reused in several places. type nested_nfa is private; No_Nested : constant nested_nfa; function Create_Nested (Self : access nfa'class; From : state) return nested_nfa; -- Marks the part of the machine starting at [From] and ending at [To] as -- a nested automaton. It is possible that some states have been created -- between the two that do not belong to the nested automaton, this isn't -- an issue. -- The state [From] is the default state for the nested automaton. For -- instance, a camera has two superstates: "on" and "off". The "on" state -- has a nested NFA for "record" and "playback" modes. By default, if you -- enter the "on" state, the "record" mode is also selected. However, using -- the appropriate camera button, it is possible to enter the "playback" -- button directly. -- -- Within the nested NFA, transitions to [Final_State] play a special role: -- upon reaching it, the nested automaton will be terminated, and control -- returned to the super state (that state is one of the states for which -- we have called Set_Nested). Any empty transition for the superstate will -- be navigated to find out the new list of active states. -- -- No further internal transition must be added to the nested automaton -- after this call, since its states have been marked specially. It is -- still valid to add transitions to the ouside. procedure Set_Nested (Self : access nfa; S : state; Nested : nested_nfa); function Get_Nested (Self : access nfa; S : state) return nested_nfa; -- Setup state [S] so that it includes a nested NFA defined by [Nested] function Get_Start_State (Self : nested_nfa) return state; -- Return the start state that was defined for the nested NFA procedure On_Nested_Exit (Self : access nfa; From : state; To : state; On_Symbol : transition_symbol); procedure On_Empty_Nested_Exit (Self : access nfa; From : state; To : state); -- When the nested NFA in [From] is terminated (because it has reached -- [Final_State] after processing [On_Symbol]), a transition from [From] to -- [To] is performed. [Set_Nested] must have been called for [From] first. ------------------------- -- Dumping information -- ------------------------- -- The following subprograms are used mostly for debugging, and can be used -- to visualize the contents of a state machine, either textually or -- graphically function Default_Image (Self : access nfa'class; S : state; Data : state_user_data) return String; -- The default display for states (only displays the state number) type dump_mode is (dump_multiline, dump_compact, dump_dot, dump_dot_compact); -- The type of dump we can do for the graph: -- [Dump_Multiline]: Each state is displayed on one line -- [Dump_Compact]: Output is on a single line -- [Dump_Dot): Output that can be cut-and-pasted to use by the -- graphviz suite to display a graphical representation generic with function State_Image (Self : access nfa'class; S : state; Data : state_user_data) return String is Default_Image; -- This function is never called for the final state, which has no -- user data associated with it. Nor it is called for the start state. package Pretty_Printers is -- This package provides various functions to view the current state of -- a NFA. It is generic so that most users who instantiate a State -- Machine do not have to provide a State_Image function. function Dump (Self : access nfa'class; Mode : dump_mode := dump_compact; Show_Details : Boolean := True; Show_Isolated_Nodes : Boolean := True; Since : nfa_snapshot := No_NFA_Snapshot) return String; -- Dump the NFA into a string. -- This is mostly for debug reasons, and the output might change from -- one version to the next. -- If [Compact] is True then the output does not include newlines. -- If [Show_Details] is False, then only the count of nodes will be -- displayed, not the actual list of nodes and transitions. -- If [Show_Isolated_Nodes] is false, then nodes that are not linked -- to any other and have no nested node will not be displayed. -- Only states greater than [Since] are displayed: for instance, -- if you already have a NFA to start with and you are adding to it, you -- can view just your addition using this parameter. function Dump (Self : access nfa'class; Nested : nested_nfa; Mode : dump_mode := dump_compact) return String; -- Dump the NFA into a string. function Node_Label (Self : access nfa'class; S : state) return String; -- Textual representation of a state, based on State_Image. end Pretty_Printers; ------------------------------------------- -- Non-deterministic automatons matching -- ------------------------------------------- type abstract_nfa_matcher is abstract tagged null record; generic type active_state_data is private; No_Active_Data : active_state_data; -- Extra data associated with each active state. This data will be -- copied from one iteration of the matcher to the next even if the -- state remains active, so you should use small data and not -- memory-allocated types. with function Match (Self : access abstract_nfa_matcher'class; From_State, To_State : state; Parent_State_Data : access active_state_data; Trans : transition_symbol; Input : symbol) return Boolean; -- Whether the two symbols match. In particular this means that the -- corresponding transition is valid. -- Using the "=" operator might be enough in a lot of cases, but will -- not handle the case where the transitions are more general (for -- instance, allowing a transition on integers where the symbol is -- between 1 and 10). -- The NFA associated with Self must not be modified by this function, -- since it might be shared among several matchers. Self, on the other -- hand, can be freely modified. -- This function can also be used to implement conditional transitions: -- if you store the condition as part of the Transition_Symbol, you can -- then evaluate it as part of this function. This function, however, -- is never called for empty transitions, so these cannot be made -- conditional. with function Expected (Self : abstract_nfa_matcher'class; From_State, To_State : state; Parent_State_Data : access active_state_data; Trans : transition_symbol) return String; -- This function should return the name to display in the result of -- Expected to show what transitions are expected. It is only called -- when Trans.Kind is Transition_On_Symbol. -- The default implementation should be something like: -- return Image (Trans); -- It should return the empty string if the transition is not valid -- (when you implemented conditional transitions). package Matchers is -- This package contains the actual processor to process a series of -- input events, and compute at each step which are the active states -- in the automaton. This package is generic so that for a given NFA -- there can be several different ways to process it and intrepret the -- events. type nfa_matcher is new abstract_nfa_matcher with private; -- When processing an input, the state machine is left untouched. -- Instead, the required information is stored in a separate object, -- so that multiple objects can test the same machine in parallel.. -- It is valid to modify the state machine during the lifetime of a -- matcher. However, this will only affect the matcher the next time -- [Process] is called (so for instance adding an empty transition will -- never impact existing matchers. procedure Free (Self : in out nfa_matcher); -- Free the memory allocated for [Self] function Is_Initialized (Self : nfa_matcher) return Boolean; -- Whether the NFA has been initialized through a call to Start_Match -- (and not yet been freed through a call to Free) procedure Start_Match (Self : in out nfa_matcher; On : access nfa'class; Start_At : state := Start_State); -- Return a matcher which is in [On]'s initial states. -- The matcher holds a reference to [On], so is only valid while [On] -- is in the scope. -- This function automatically frees Self, releasing any previously used -- memory. type active_state_iterator (<>) is private; No_Active_State_Iterator : constant active_state_iterator; -- Intended use is: -- declare -- Iter : Active_State_Iterator := For_Each_Active_State (Matcher); -- begin -- loop -- S := Current (Matcher, Iter); -- exit when S = No_State; -- ... -- Next (Matcher, Iter); -- end loop; -- end; function For_Each_Active_State (Self : nfa_matcher; Ignore_If_Nested : Boolean := False; Ignore_If_Default : Boolean := False) return active_state_iterator; procedure Next (Self : nfa_matcher; Iter : in out active_state_iterator); function Current (Self : nfa_matcher; Iter : active_state_iterator) return state; -- Iterates over all currently active states. -- If [Ignore_If_Nested] is true, the states with a nested NFA are not -- returned unless their nested NFA is in a final state (that's because -- we would be ignoring events on them otherwise). -- If [Ignore_If_Default] is true, the states for which no user data was -- set are never returned. -- [Current] returns [No_State] when there are no remaining active -- states. Note that a given state might have several corresponding -- active states because of nested NFA. function Has_Parent (Iter : active_state_iterator) return Boolean; function Parent (Iter : active_state_iterator) return active_state_iterator; -- Return the parent state of the current state. function Current_Data (Self : nfa_matcher; Iter : active_state_iterator) return state_user_data; -- Returns the user data either from the locally overridden data in the -- matcher, or from the NFA. See [Override_Data]. procedure Replace_State (Self : in out nfa_matcher; Iter : active_state_iterator; S : state); -- Replace the state pointed to by [Iter]. -- This is only rarely useful, but for instance is used when -- validating a XML schema to handle the xsi:type that can be used -- to override the current state. -- This also activates the state accessible from [S] through an empty -- transition. procedure Override_Data (Self : nfa_matcher; Iter : active_state_iterator; Data : state_user_data); -- Overridde the user data associated with the current state. This only -- impacts the matcher, so this data is lost as soon as the current -- state is no longer active. Same as [Replace_State], this is rarely -- useful. function In_Final (Self : nfa_matcher) return Boolean; -- Whether [Self] is in the final step: if True, it means that all input -- processed so far matches the state machine. It is possible to keep -- submitting input procedure Process (Self : in out nfa_matcher; Input : symbol; Success : out Boolean); -- Processes one input symbol, and compute the transitions. -- [Success] is set to False if the input was invalid, and no transition -- could be found for it. In such a case, [Self] is left unmodified. -- If [Success] is set to True, a new set of active states was computed, -- and at least one state is active. -- The transitions (and thus the calls to Match) are processed in the -- order they were created in the NFA. function Expected (Self : nfa_matcher) return String; -- Return a textual description of the valid input symbols from the -- current state. This should be used for error messages for instance. generic with function Node_Label (Self : access nfa'class; S : state) return String; -- Should come from an instantiation of Pretty_Printers procedure Debug_Print (Self : Matchers.nfa_matcher'class; Mode : dump_mode := dump_multiline; Prefix : String := ""); -- Print on stdout some debug information for [Self]. -- [Prefix] is printed at the beginning of the first line private type matcher_state_index is new Natural range 0 .. 2**16; No_Matcher_State : constant matcher_state_index := 0; type matcher_state is record S : state; Data_Is_Overridden : Boolean := False; Overridden_Data : state_user_data := Default_Data; Next : matcher_state_index; Nested : matcher_state_index; Active_Data : aliased active_state_data; end record; -- All currently active states in a NFA. -- For each state, we store a pointer to the next state at the same -- level of the hierarchy (and within the same parent). -- It also stores a pointer to the list of nested states, if there is a -- nested state machine. -- If the state machine is in the final state at any level, -- [Final_State] will be the first element of the corresponding list. package Matcher_State_Arrays is new GNAT.Dynamic_Tables (Table_Component_Type => matcher_state, Table_Index_Type => matcher_state_index, Table_Low_Bound => No_Matcher_State + 1, Table_Initial => 15, Table_Increment => 10); type matcher_state_array is array (matcher_state_index range <>) of matcher_state_index; -- Each element in the array is the currently active state at that -- level. so Arr(2) is nested in Arr(1),... type active_state_iterator (Max : matcher_state_index) is record Ignore_If_Nested : Boolean; Ignore_If_Default : Boolean; States : matcher_state_array (1 .. Max); Current_Level : matcher_state_index := No_Matcher_State; end record; No_Active_State_Iterator : constant active_state_iterator := (0, False, False, (1 .. 0 => No_Matcher_State), No_Matcher_State); type nfa_matcher is new abstract_nfa_matcher with record NFA : nfa_access; Active : Matcher_State_Arrays.instance; First_Active : matcher_state_index := No_Matcher_State; end record; -- [First_Active] is the first active state at the toplevel. end Matchers; private type transition_id is new state; type transition_kind is (transition_on_empty, transition_on_symbol, transition_on_exit_empty, transition_on_exit_symbol); type transition (Kind : transition_kind := transition_on_empty) is record To_State : state; -- State the transition is pointing to. Next_For_State : transition_id; -- Next transition from the same state. This implements a list of -- transitions. case Kind is when transition_on_empty | transition_on_exit_empty => null; when others => Sym : transition_symbol; end case; end record; No_Transition : constant transition_id := 0; Start_State : constant state := 1; -- Exists in NFA.States Final_State : constant state := state'last; -- Not shown in NFA.States No_State : constant state := 0; type state_data is record First_Transition : transition_id; -- The first element in the list of transitions from this state. Nested : state := No_State; -- If defined, indicates that this state contains a nested state -- machine, for which the initial state is Nested. Any transition -- to this state will also activate [Nested]. Data : aliased state_user_data; -- Custom data associated with each state. end record; package Transition_Tables is new GNAT.Dynamic_Tables (Table_Component_Type => transition, Table_Index_Type => transition_id, Table_Low_Bound => No_Transition + 1, Table_Initial => Default_Transition_Count, Table_Increment => 200); subtype transition_table is Transition_Tables.instance; package State_Tables is new GNAT.Dynamic_Tables (Table_Component_Type => state_data, Table_Index_Type => state, Table_Low_Bound => Start_State, Table_Initial => Default_State_Count, Table_Increment => 200); subtype state_table is State_Tables.instance; type nfa is tagged record States : state_table; Transitions : transition_table; States_Are_Statefull : Boolean := True; end record; type nested_nfa is record Default_Start : state; end record; No_Nested : constant nested_nfa := (Default_Start => No_State); type nfa_snapshot is record States : state; Transitions : transition_id; Start_State_Transition : transition_id; -- Specific to the schema reader (?): since parsing other grammars will -- modify the start state transitions (and only this one) to add valid -- toplevel elements, we need to reset the list of transitions for the -- start state. end record; No_NFA_Snapshot : constant nfa_snapshot := (No_State, No_Transition, No_Transition); end Sax.State_Machines;