diff options
Diffstat (limited to 'debian/pyrex/pyrex-0.9.9/Pyrex/Plex')
| -rwxr-xr-x | debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Actions.py | 109 | ||||
| -rwxr-xr-x | debian/pyrex/pyrex-0.9.9/Pyrex/Plex/DFA.py | 156 | ||||
| -rwxr-xr-x | debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Errors.py | 52 | ||||
| -rwxr-xr-x | debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Lexicons.py | 192 | ||||
| -rwxr-xr-x | debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Machines.py | 326 | ||||
| -rwxr-xr-x | debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Regexps.py | 557 | ||||
| -rwxr-xr-x | debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Scanners.py | 377 | ||||
| -rwxr-xr-x | debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Timing.py | 22 | ||||
| -rwxr-xr-x | debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Traditional.py | 154 | ||||
| -rwxr-xr-x | debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Transitions.py | 253 | ||||
| -rwxr-xr-x | debian/pyrex/pyrex-0.9.9/Pyrex/Plex/__init__.py | 40 | ||||
| -rwxr-xr-x | debian/pyrex/pyrex-0.9.9/Pyrex/Plex/test_tm.py | 24 |
12 files changed, 0 insertions, 2262 deletions
diff --git a/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Actions.py b/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Actions.py deleted file mode 100755 index 23253a90..00000000 --- a/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Actions.py +++ /dev/null @@ -1,109 +0,0 @@ -#======================================================================= -# -# Python Lexical Analyser -# -# Actions for use in token specifications -# -#======================================================================= - -class Action: - - def same_as(self, other): - return self is other - - -class Return(Action): - """ - Internal Plex action which causes |value| to - be returned as the value of the associated token - """ - - value = None - - def __init__(self, value): - self.value = value - - def perform(self, token_stream, text): - return self.value - - def same_as(self, other): - return isinstance(other, Return) and self.value == other.value - - def __repr__(self): - return "Return(%s)" % repr(self.value) - - -class Call(Action): - """ - Internal Plex action which causes a function to be called. - """ - - function = None - - def __init__(self, function): - self.function = function - - def perform(self, token_stream, text): - return self.function(token_stream, text) - - def __repr__(self): - return "Call(%s)" % self.function.__name__ - - def same_as(self, other): - return isinstance(other, Call) and self.function is other.function - - -class Begin(Action): - """ - Begin(state_name) is a Plex action which causes the Scanner to - enter the state |state_name|. See the docstring of Plex.Lexicon - for more information. - """ - - state_name = None - - def __init__(self, state_name): - self.state_name = state_name - - def perform(self, token_stream, text): - token_stream.begin(self.state_name) - - def __repr__(self): - return "Begin(%s)" % self.state_name - - def same_as(self, other): - return isinstance(other, Begin) and self.state_name == other.state_name - - -class Ignore(Action): - """ - IGNORE is a Plex action which causes its associated token - to be ignored. See the docstring of Plex.Lexicon for more - information. - """ - def perform(self, token_stream, text): - return None - - def __repr__(self): - return "IGNORE" - -IGNORE = Ignore() -IGNORE.__doc__ = Ignore.__doc__ - -class Text(Action): - """ - TEXT is a Plex action which causes the text of a token to - be returned as the value of the token. See the docstring of - Plex.Lexicon for more information. - """ - - def perform(self, token_stream, text): - return text - - def __repr__(self): - return "TEXT" - -TEXT = Text() -TEXT.__doc__ = Text.__doc__ - - diff --git a/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/DFA.py b/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/DFA.py deleted file mode 100755 index 2c0004b0..00000000 --- a/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/DFA.py +++ /dev/null @@ -1,156 +0,0 @@ -#======================================================================= -# -# Python Lexical Analyser -# -# Converting NFA to DFA -# -#======================================================================= - -import Machines -from Machines import LOWEST_PRIORITY -from Transitions import TransitionMap - -def nfa_to_dfa(old_machine, debug = None): - """ - Given a nondeterministic Machine, return a new equivalent - Machine which is deterministic. - """ - # We build a new machine whose states correspond to sets of states - # in the old machine. Initially we add a new state corresponding to - # the epsilon-closure of each initial old state. Then we give transitions - # to each new state which are the union of all transitions out of any - # of the corresponding old states. The new state reached on a given - # character is the one corresponding to the set of states reachable - # on that character from any of the old states. As new combinations of - # old states are created, new states are added as needed until closure - # is reached. - new_machine = Machines.FastMachine() - state_map = StateMap(new_machine) - # Seed the process using the initial states of the old machine. - # Make the corresponding new states into initial states of the new - # machine with the same names. - for (key, old_state) in old_machine.initial_states.items(): - new_state = state_map.old_to_new(epsilon_closure(old_state)) - new_machine.make_initial_state(key, new_state) - # Tricky bit here: we add things to the end of this list while we're - # iterating over it. The iteration stops when closure is achieved. - for new_state in new_machine.states: - transitions = TransitionMap() - for old_state in state_map.new_to_old(new_state).keys(): - for event, old_target_states in old_state.transitions.items(): - if event and old_target_states: - transitions.add_set(event, set_epsilon_closure(old_target_states)) - for event, old_states in transitions.items(): - new_machine.add_transitions(new_state, event, state_map.old_to_new(old_states)) - if debug: - debug.write("\n===== State Mapping =====\n") - state_map.dump(debug) - return new_machine - -def set_epsilon_closure(state_set): - """ - Given a set of states, return the union of the epsilon - closures of its member states. - """ - result = {} - for state1 in state_set.keys(): - for state2 in epsilon_closure(state1).keys(): - result[state2] = 1 - return result - -def epsilon_closure(state): - """ - Return the set of states reachable from the given state - by epsilon moves. - """ - # Cache the result - result = state.epsilon_closure - if result is None: - result = {} - state.epsilon_closure = result - add_to_epsilon_closure(result, state) - return result - -def add_to_epsilon_closure(state_set, state): - """ - Recursively add to |state_set| states reachable from the given state - by epsilon moves. - """ - if not state_set.get(state, 0): - state_set[state] = 1 - state_set_2 = state.transitions.get_epsilon() - if state_set_2: - for state2 in state_set_2.keys(): - add_to_epsilon_closure(state_set, state2) - -class StateMap: - """ - Helper class used by nfa_to_dfa() to map back and forth between - sets of states from the old machine and states of the new machine. - """ - new_machine = None # Machine - old_to_new_dict = None # {(old_state,...) : new_state} - new_to_old_dict = None # {id(new_state) : old_state_set} - - def __init__(self, new_machine): - self.new_machine = new_machine - self.old_to_new_dict = {} - self.new_to_old_dict= {} - - def old_to_new(self, old_state_set): - """ - Return the state of the new machine corresponding to the - set of old machine states represented by |state_set|. A new - state will be created if necessary. If any of the old states - are accepting states, the new state will be an accepting state - with the highest priority action from the old states. - """ - key = self.make_key(old_state_set) - new_state = self.old_to_new_dict.get(key, None) - if not new_state: - action = self.highest_priority_action(old_state_set) - new_state = self.new_machine.new_state(action) - self.old_to_new_dict[key] = new_state - self.new_to_old_dict[id(new_state)] = old_state_set - #for old_state in old_state_set.keys(): - #new_state.merge_actions(old_state) - return new_state - - def highest_priority_action(self, state_set): - best_action = None - best_priority = LOWEST_PRIORITY - for state in state_set.keys(): - priority = state.action_priority - if priority > best_priority: - best_action = state.action - best_priority = priority - return best_action - -# def old_to_new_set(self, old_state_set): -# """ -# Return the new state corresponding to a set of old states as -# a singleton set. -# """ -# return {self.old_to_new(old_state_set):1} - - def new_to_old(self, new_state): - """Given a new state, return a set of corresponding old states.""" - return self.new_to_old_dict[id(new_state)] - - def make_key(self, state_set): - """ - Convert a set of states into a uniquified - sorted tuple suitable for use as a dictionary key. - """ - lst = state_set.keys() - lst.sort() - return tuple(lst) - - def dump(self, file): - from Transitions import state_set_str - for new_state in self.new_machine.states: - old_state_set = self.new_to_old_dict[id(new_state)] - file.write(" State %s <-- %s\n" % ( - new_state['number'], state_set_str(old_state_set))) - - diff --git a/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Errors.py b/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Errors.py deleted file mode 100755 index ae033672..00000000 --- a/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Errors.py +++ /dev/null @@ -1,52 +0,0 @@ -#======================================================================= -# -# Python Lexical Analyser -# -# Exception classes -# -#======================================================================= - -import exceptions - -class PlexError(exceptions.Exception): - message = "" - -class PlexTypeError(PlexError, TypeError): - pass - -class PlexValueError(PlexError, ValueError): - pass - -class InvalidRegex(PlexError): - pass - -class InvalidToken(PlexError): - - def __init__(self, token_number, message): - PlexError.__init__(self, "Token number %d: %s" % (token_number, message)) - -class InvalidScanner(PlexError): - pass - -class AmbiguousAction(PlexError): - message = "Two tokens with different actions can match the same string" - - def __init__(self): - pass - -class UnrecognizedInput(PlexError): - scanner = None - position = None - state_name = None - - def __init__(self, scanner, state_name): - self.scanner = scanner - self.position = scanner.position() - self.state_name = state_name - - def __str__(self): - return ("'%s', line %d, char %d: Token not recognised in state %s" - % (self.position + (repr(self.state_name),))) - - - diff --git a/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Lexicons.py b/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Lexicons.py deleted file mode 100755 index 32b12c43..00000000 --- a/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Lexicons.py +++ /dev/null @@ -1,192 +0,0 @@ -#======================================================================= -# -# Python Lexical Analyser -# -# Lexical Analyser Specification -# -#======================================================================= - -import types - -import Actions -import DFA -import Errors -import Machines -import Regexps - -# debug_flags for Lexicon constructor -DUMP_NFA = 1 -DUMP_DFA = 2 - -class State: - """ - This class is used as part of a Plex.Lexicon specification to - introduce a user-defined state. - - Constructor: - - State(name, token_specifications) - """ - - name = None - tokens = None - - def __init__(self, name, tokens): - self.name = name - self.tokens = tokens - -class Lexicon: - """ - Lexicon(specification) builds a lexical analyser from the given - |specification|. The specification consists of a list of - specification items. Each specification item may be either: - - 1) A token definition, which is a tuple: - - (pattern, action) - - The |pattern| is a regular axpression built using the - constructors defined in the Plex module. - - The |action| is the action to be performed when this pattern - is recognised (see below). - - 2) A state definition: - - State(name, tokens) - - where |name| is a character string naming the state, - and |tokens| is a list of token definitions as - above. The meaning and usage of states is described - below. - - Actions - ------- - - The |action| in a token specication may be one of three things: - - 1) A function, which is called as follows: - - function(scanner, text) - - where |scanner| is the relevant Scanner instance, and |text| - is the matched text. If the function returns anything - other than None, that value is returned as the value of the - token. If it returns None, scanning continues as if the IGNORE - action were specified (see below). - - 2) One of the following special actions: - - IGNORE means that the recognised characters will be treated as - white space and ignored. Scanning will continue until - the next non-ignored token is recognised before returning. - - TEXT causes the scanned text itself to be returned as the - value of the token. - - 3) Any other value, which is returned as the value of the token. - - States - ------ - - At any given time, the scanner is in one of a number of states. - Associated with each state is a set of possible tokens. When scanning, - only tokens associated with the current state are recognised. - - There is a default state, whose name is the empty string. Token - definitions which are not inside any State definition belong to - the default state. - - The initial state of the scanner is the default state. The state can - be changed in one of two ways: - - 1) Using Begin(state_name) as the action of a token. - - 2) Calling the begin(state_name) method of the Scanner. - - To change back to the default state, use '' as the state name. - """ - - machine = None # Machine - tables = None # StateTableMachine - - def __init__(self, specifications, debug = None, debug_flags = 7, timings = None): - if type(specifications) <> types.ListType: - raise Errors.InvalidScanner("Scanner definition is not a list") - if timings: - from Timing import time - total_time = 0.0 - time1 = time() - nfa = Machines.Machine() - default_initial_state = nfa.new_initial_state('') - token_number = 1 - for spec in specifications: - if isinstance(spec, State): - user_initial_state = nfa.new_initial_state(spec.name) - for token in spec.tokens: - self.add_token_to_machine( - nfa, user_initial_state, token, token_number) - token_number = token_number + 1 - elif type(spec) == types.TupleType: - self.add_token_to_machine( - nfa, default_initial_state, spec, token_number) - token_number = token_number + 1 - else: - raise Errors.InvalidToken( - token_number, - "Expected a token definition (tuple) or State instance") - if timings: - time2 = time() - total_time = total_time + (time2 - time1) - time3 = time() - if debug and (debug_flags & 1): - debug.write("\n============= NFA ===========\n") - nfa.dump(debug) - dfa = DFA.nfa_to_dfa(nfa, debug = (debug_flags & 3) == 3 and debug) - if timings: - time4 = time() - total_time = total_time + (time4 - time3) - if debug and (debug_flags & 2): - debug.write("\n============= DFA ===========\n") - dfa.dump(debug) - if timings: - timings.write("Constructing NFA : %5.2f\n" % (time2 - time1)) - timings.write("Converting to DFA: %5.2f\n" % (time4 - time3)) - timings.write("TOTAL : %5.2f\n" % total_time) - self.machine = dfa - - def add_token_to_machine(self, machine, initial_state, token_spec, token_number): - try: - (re, action_spec) = self.parse_token_definition(token_spec) - # Disabled this -- matching empty strings can be useful - #if re.nullable: - # raise Errors.InvalidToken( - # token_number, "Pattern can match 0 input symbols") - if isinstance(action_spec, Actions.Action): - action = action_spec - elif callable(action_spec): - action = Actions.Call(action_spec) - else: - action = Actions.Return(action_spec) - final_state = machine.new_state() - re.build_machine(machine, initial_state, final_state, - match_bol = 1, nocase = 0) - final_state.set_action(action, priority = -token_number) - except Errors.PlexError, e: - raise e.__class__("Token number %d: %s" % (token_number, e)) - - def parse_token_definition(self, token_spec): - if type(token_spec) <> types.TupleType: - raise Errors.InvalidToken("Token definition is not a tuple") - if len(token_spec) <> 2: - raise Errors.InvalidToken("Wrong number of items in token definition") - pattern, action = token_spec - if not isinstance(pattern, Regexps.RE): - raise Errors.InvalidToken("Pattern is not an RE instance") - return (pattern, action) - - def get_initial_state(self, name): - return self.machine.get_initial_state(name) - - - diff --git a/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Machines.py b/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Machines.py deleted file mode 100755 index fb9ba717..00000000 --- a/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Machines.py +++ /dev/null @@ -1,326 +0,0 @@ -#======================================================================= -# -# Python Lexical Analyser -# -# Classes for building NFAs and DFAs -# -#======================================================================= - -import string -import sys -from sys import maxint -from types import TupleType - -from Transitions import TransitionMap - -LOWEST_PRIORITY = -sys.maxint - -class Machine: - """A collection of Nodes representing an NFA or DFA.""" - states = None # [Node] - next_state_number = 1 - initial_states = None # {(name, bol): Node} - - def __init__(self): - self.states = [] - self.initial_states = {} - - def __del__(self): - #print "Destroying", self ### - for state in self.states: - state.destroy() - - def new_state(self): - """Add a new state to the machine and return it.""" - s = Node() - n = self.next_state_number - self.next_state_number = n + 1 - s.number = n - self.states.append(s) - return s - - def new_initial_state(self, name): - state = self.new_state() - self.make_initial_state(name, state) - return state - - def make_initial_state(self, name, state): - self.initial_states[name] = state - - def get_initial_state(self, name): - return self.initial_states[name] - - def dump(self, file): - file.write("Plex.Machine:\n") - if self.initial_states is not None: - file.write(" Initial states:\n") - for (name, state) in self.initial_states.items(): - file.write(" '%s': %d\n" % (name, state.number)) - for s in self.states: - s.dump(file) - -class Node: - """A state of an NFA or DFA.""" - transitions = None # TransitionMap - action = None # Action - action_priority = None # integer - number = 0 # for debug output - epsilon_closure = None # used by nfa_to_dfa() - - def __init__(self): - # Preinitialise the list of empty transitions, because - # the nfa-to-dfa algorithm needs it - #self.transitions = {'':[]} - self.transitions = TransitionMap() - self.action_priority = LOWEST_PRIORITY - - def destroy(self): - #print "Destroying", self ### - self.transitions = None - self.action = None - self.epsilon_closure = None - - def add_transition(self, event, new_state): - self.transitions.add(event, new_state) - - def link_to(self, state): - """Add an epsilon-move from this state to another state.""" - self.add_transition('', state) - - def set_action(self, action, priority): - """Make this an accepting state with the given action. If - there is already an action, choose the action with highest - priority.""" - if priority > self.action_priority: - self.action = action - self.action_priority = priority - - def get_action(self): - return self.action - - def get_action_priority(self): - return self.action_priority - -# def merge_actions(self, other_state): -# """Merge actions of other state into this state according -# to their priorities.""" -# action = other_state.get_action() -# priority = other_state.get_action_priority() -# self.set_action(action, priority) - - def is_accepting(self): - return self.action is not None - - def __str__(self): - return "State %d" % self.number - - def dump(self, file): - import string - # Header - file.write(" State %d:\n" % self.number) - # Transitions -# self.dump_transitions(file) - self.transitions.dump(file) - # Action - action = self.action - priority = self.action_priority - if action is not None: - file.write(" %s [priority %d]\n" % (action, priority)) - - -class FastMachine: - """ - FastMachine is a deterministic machine represented in a way that - allows fast scanning. - """ - initial_states = None # {state_name:state} - states = None # [state] - # where state = {event:state, 'else':state, 'action':Action} - next_number = 1 # for debugging - - new_state_template = { - '':None, 'bol':None, 'eol':None, 'eof':None, 'else':None - } - - def __init__(self, old_machine = None): - self.initial_states = initial_states = {} - self.states = [] - if old_machine: - self.old_to_new = old_to_new = {} - for old_state in old_machine.states: - new_state = self.new_state() - old_to_new[old_state] = new_state - for name, old_state in old_machine.initial_states.items(): - initial_states[name] = old_to_new[old_state] - for old_state in old_machine.states: - new_state = old_to_new[old_state] - for event, old_state_set in old_state.transitions.items(): - if old_state_set: - new_state[event] = old_to_new[old_state_set.keys()[0]] - else: - new_state[event] = None - new_state['action'] = old_state.action - - def __del__(self): - for state in self.states: - state.clear() - - def new_state(self, action = None): - number = self.next_number - self.next_number = number + 1 - result = self.new_state_template.copy() - result['number'] = number - result['action'] = action - self.states.append(result) - return result - - def make_initial_state(self, name, state): - self.initial_states[name] = state - - def add_transitions(self, state, event, new_state): - if type(event) == TupleType: - code0, code1 = event - if code0 == -maxint: - state['else'] = new_state - elif code1 <> maxint: - while code0 < code1: - state[chr(code0)] = new_state - code0 = code0 + 1 - else: - state[event] = new_state - - def get_initial_state(self, name): - return self.initial_states[name] - - def dump(self, file): - file.write("Plex.FastMachine:\n") - file.write(" Initial states:\n") - for name, state in self.initial_states.items(): - file.write(" %s: %s\n" % (repr(name), state['number'])) - for state in self.states: - self.dump_state(state, file) - - def dump_state(self, state, file): - import string - # Header - file.write(" State %d:\n" % state['number']) - # Transitions - self.dump_transitions(state, file) - # Action - action = state['action'] - if action is not None: - file.write(" %s\n" % action) - - def dump_transitions(self, state, file): - chars_leading_to_state = {} - special_to_state = {} - for (c, s) in state.items(): - if len(c) == 1: - chars = chars_leading_to_state.get(id(s), None) - if chars is None: - chars = [] - chars_leading_to_state[id(s)] = chars - chars.append(c) - elif len(c) <= 4: - special_to_state[c] = s - ranges_to_state = {} - for state in self.states: - char_list = chars_leading_to_state.get(id(state), None) - if char_list: - ranges = self.chars_to_ranges(char_list) - ranges_to_state[ranges] = state - ranges_list = ranges_to_state.keys() - ranges_list.sort() - for ranges in ranges_list: - key = self.ranges_to_string(ranges) - state = ranges_to_state[ranges] - file.write(" %s --> State %d\n" % (key, state['number'])) - for key in ('bol', 'eol', 'eof', 'else'): - state = special_to_state.get(key, None) - if state: - file.write(" %s --> State %d\n" % (key, state['number'])) - - def chars_to_ranges(self, char_list): - char_list.sort() - i = 0 - n = len(char_list) - result = [] - while i < n: - c1 = ord(char_list[i]) - c2 = c1 - i = i + 1 - while i < n and ord(char_list[i]) == c2 + 1: - i = i + 1 - c2 = c2 + 1 - result.append((chr(c1), chr(c2))) - return tuple(result) - - def ranges_to_string(self, range_list): - return string.join(map(self.range_to_string, range_list), ",") - - def range_to_string(self, (c1, c2)): - if c1 == c2: - return repr(c1) - else: - return "%s..%s" % (repr(c1), repr(c2)) -## -## (Superseded by Machines.FastMachine) -## -## class StateTableMachine: -## """ -## StateTableMachine is an alternative representation of a Machine -## that can be run more efficiently. -## """ -## initial_states = None # {state_name:state_index} -## states = None # [([state] indexed by char code, Action)] - -## special_map = {'bol':256, 'eol':257, 'eof':258} - -## def __init__(self, m): -## """ -## Initialise StateTableMachine from Machine |m|. -## """ -## initial_states = self.initial_states = {} -## states = self.states = [None] -## old_to_new = {} -## i = 1 -## for old_state in m.states: -## new_state = ([0] * 259, old_state.get_action()) -## states.append(new_state) -## old_to_new[old_state] = i # new_state -## i = i + 1 -## for name, old_state in m.initial_states.items(): -## initial_states[name] = old_to_new[old_state] -## for old_state in m.states: -## new_state_index = old_to_new[old_state] -## new_table = states[new_state_index][0] -## transitions = old_state.transitions -## for c, old_targets in transitions.items(): -## if old_targets: -## old_target = old_targets[0] -## new_target_index = old_to_new[old_target] -## if len(c) == 1: -## a = ord(c) -## else: -## a = self.special_map[c] -## new_table[a] = states[new_target_index] - -## def dump(self, f): -## f.write("Plex.StateTableMachine:\n") -## f.write(" Initial states:\n") -## for name, index in self.initial_states.items(): -## f.write(" %s: State %d\n" % ( -## repr(name), id(self.states[index]))) -## for i in xrange(1, len(self.states)): -## table, action = self.states[i] -## f.write(" State %d:" % i) -## if action: -## f.write("%s" % action) -## f.write("\n") -## f.write(" %s\n" % map(id,table)) - - - - - - diff --git a/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Regexps.py b/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Regexps.py deleted file mode 100755 index 6164d3bd..00000000 --- a/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Regexps.py +++ /dev/null @@ -1,557 +0,0 @@ -#======================================================================= -# -# Python Lexical Analyser -# -# Regular Expressions -# -#======================================================================= - -import array -import string -import types -from sys import maxint - -import Errors - -# -# Constants -# - -BOL = 'bol' -EOL = 'eol' -EOF = 'eof' - -nl_code = ord('\n') - -# -# Helper functions -# - -def chars_to_ranges(s): - """ - Return a list of character codes consisting of pairs - [code1a, code1b, code2a, code2b,...] which cover all - the characters in |s|. - """ - char_list = list(s) - char_list.sort() - i = 0 - n = len(char_list) - result = [] - while i < n: - code1 = ord(char_list[i]) - code2 = code1 + 1 - i = i + 1 - while i < n and code2 >= ord(char_list[i]): - code2 = code2 + 1 - i = i + 1 - result.append(code1) - result.append(code2) - return result - -def uppercase_range(code1, code2): - """ - If the range of characters from code1 to code2-1 includes any - lower case letters, return the corresponding upper case range. - """ - code3 = max(code1, ord('a')) - code4 = min(code2, ord('z') + 1) - if code3 < code4: - d = ord('A') - ord('a') - return (code3 + d, code4 + d) - else: - return None - -def lowercase_range(code1, code2): - """ - If the range of characters from code1 to code2-1 includes any - upper case letters, return the corresponding lower case range. - """ - code3 = max(code1, ord('A')) - code4 = min(code2, ord('Z') + 1) - if code3 < code4: - d = ord('a') - ord('A') - return (code3 + d, code4 + d) - else: - return None - -def CodeRanges(code_list): - """ - Given a list of codes as returned by chars_to_ranges, return - an RE which will match a character in any of the ranges. - """ - re_list = [] - for i in xrange(0, len(code_list), 2): - re_list.append(CodeRange(code_list[i], code_list[i + 1])) - return apply(Alt, tuple(re_list)) - -def CodeRange(code1, code2): - """ - CodeRange(code1, code2) is an RE which matches any character - with a code |c| in the range |code1| <= |c| < |code2|. - """ - if code1 <= nl_code < code2: - return Alt(RawCodeRange(code1, nl_code), - RawNewline, - RawCodeRange(nl_code + 1, code2)) - else: - return RawCodeRange(code1, code2) - -# -# Abstract classes -# - -class RE: - """RE is the base class for regular expression constructors. - The following operators are defined on REs: - - re1 + re2 is an RE which matches |re1| followed by |re2| - re1 | re2 is an RE which matches either |re1| or |re2| - """ - - nullable = 1 # True if this RE can match 0 input symbols - match_nl = 1 # True if this RE can match a string ending with '\n' - str = None # Set to a string to override the class's __str__ result - - def build_machine(self, machine, initial_state, final_state, - match_bol, nocase): - """ - This method should add states to |machine| to implement this - RE, starting at |initial_state| and ending at |final_state|. - If |match_bol| is true, the RE must be able to match at the - beginning of a line. If nocase is true, upper and lower case - letters should be treated as equivalent. - """ - raise exceptions.UnimplementedMethod("%s.build_machine not implemented" % - self.__class__.__name__) - - def build_opt(self, m, initial_state, c): - """ - Given a state |s| of machine |m|, return a new state - reachable from |s| on character |c| or epsilon. - """ - s = m.new_state() - initial_state.link_to(s) - initial_state.add_transition(c, s) - return s - - def __add__(self, other): - return Seq(self, other) - - def __or__(self, other): - return Alt(self, other) - - def __str__(self): - if self.str: - return self.str - else: - return self.calc_str() - - def check_re(self, num, value): - if not isinstance(value, RE): - self.wrong_type(num, value, "Plex.RE instance") - - def check_string(self, num, value): - if type(value) <> type(''): - self.wrong_type(num, value, "string") - - def check_char(self, num, value): - self.check_string(num, value) - if len(value) <> 1: - raise Errors.PlexValueError("Invalid value for argument %d of Plex.%s." - "Expected a string of length 1, got: %s" % ( - num, self.__class__.__name__, repr(value))) - - def wrong_type(self, num, value, expected): - if type(value) == types.InstanceType: - got = "%s.%s instance" % ( - value.__class__.__module__, value.__class__.__name__) - else: - got = type(value).__name__ - raise Errors.PlexTypeError("Invalid type for argument %d of Plex.%s " - "(expected %s, got %s" % ( - num, self.__class__.__name__, expected, got)) - -# -# Primitive RE constructors -# ------------------------- -# -# These are the basic REs from which all others are built. -# - -## class Char(RE): -## """ -## Char(c) is an RE which matches the character |c|. -## """ - -## nullable = 0 - -## def __init__(self, char): -## self.char = char -## self.match_nl = char == '\n' - -## def build_machine(self, m, initial_state, final_state, match_bol, nocase): -## c = self.char -## if match_bol and c <> BOL: -## s1 = self.build_opt(m, initial_state, BOL) -## else: -## s1 = initial_state -## if c == '\n' or c == EOF: -## s1 = self.build_opt(m, s1, EOL) -## if len(c) == 1: -## code = ord(self.char) -## s1.add_transition((code, code+1), final_state) -## if nocase and is_letter_code(code): -## code2 = other_case_code(code) -## s1.add_transition((code2, code2+1), final_state) -## else: -## s1.add_transition(c, final_state) - -## def calc_str(self): -## return "Char(%s)" % repr(self.char) - -def Char(c): - """ - Char(c) is an RE which matches the character |c|. - """ - if len(c) == 1: - result = CodeRange(ord(c), ord(c) + 1) - else: - result = SpecialSymbol(c) - result.str = "Char(%s)" % repr(c) - return result - -class RawCodeRange(RE): - """ - RawCodeRange(code1, code2) is a low-level RE which matches any character - with a code |c| in the range |code1| <= |c| < |code2|, where the range - does not include newline. For internal use only. - """ - nullable = 0 - match_nl = 0 - range = None # (code, code) - uppercase_range = None # (code, code) or None - lowercase_range = None # (code, code) or None - - def __init__(self, code1, code2): - self.range = (code1, code2) - self.uppercase_range = uppercase_range(code1, code2) - self.lowercase_range = lowercase_range(code1, code2) - - def build_machine(self, m, initial_state, final_state, match_bol, nocase): - if match_bol: - initial_state = self.build_opt(m, initial_state, BOL) - initial_state.add_transition(self.range, final_state) - if nocase: - if self.uppercase_range: - initial_state.add_transition(self.uppercase_range, final_state) - if self.lowercase_range: - initial_state.add_transition(self.lowercase_range, final_state) - - def calc_str(self): - return "CodeRange(%d,%d)" % (self.code1, self.code2) - -class _RawNewline(RE): - """ - RawNewline is a low-level RE which matches a newline character. - For internal use only. - """ - nullable = 0 - match_nl = 1 - - def build_machine(self, m, initial_state, final_state, match_bol, nocase): - if match_bol: - initial_state = self.build_opt(m, initial_state, BOL) - s = self.build_opt(m, initial_state, EOL) - s.add_transition((nl_code, nl_code + 1), final_state) - -RawNewline = _RawNewline() - - -class SpecialSymbol(RE): - """ - SpecialSymbol(sym) is an RE which matches the special input - symbol |sym|, which is one of BOL, EOL or EOF. - """ - nullable = 0 - match_nl = 0 - sym = None - - def __init__(self, sym): - self.sym = sym - - def build_machine(self, m, initial_state, final_state, match_bol, nocase): - # Sequences 'bol bol' and 'bol eof' are impossible, so only need - # to allow for bol if sym is eol - if match_bol and self.sym == EOL: - initial_state = self.build_opt(m, initial_state, BOL) - initial_state.add_transition(self.sym, final_state) - - -class Seq(RE): - """Seq(re1, re2, re3...) is an RE which matches |re1| followed by - |re2| followed by |re3|...""" - - def __init__(self, *re_list): - nullable = 1 - for i in xrange(len(re_list)): - re = re_list[i] - self.check_re(i, re) - nullable = nullable and re.nullable - self.re_list = re_list - self.nullable = nullable - i = len(re_list) - match_nl = 0 - while i: - i = i - 1 - re = re_list[i] - if re.match_nl: - match_nl = 1 - break - if not re.nullable: - break - self.match_nl = match_nl - - def build_machine(self, m, initial_state, final_state, match_bol, nocase): - re_list = self.re_list - if len(re_list) == 0: - initial_state.link_to(final_state) - else: - s1 = initial_state - n = len(re_list) - for i in xrange(n): - if i < n - 1: - s2 = m.new_state() - else: - s2 = final_state - re = re_list[i] - re.build_machine(m, s1, s2, match_bol, nocase) - s1 = s2 - match_bol = re.match_nl or (match_bol and re.nullable) - - def calc_str(self): - return "Seq(%s)" % string.join(map(str, self.re_list), ",") - - -class Alt(RE): - """Alt(re1, re2, re3...) is an RE which matches either |re1| or - |re2| or |re3|...""" - - def __init__(self, *re_list): - self.re_list = re_list - nullable = 0 - match_nl = 0 - nullable_res = [] - non_nullable_res = [] - i = 1 - for re in re_list: - self.check_re(i, re) - if re.nullable: - nullable_res.append(re) - nullable = 1 - else: - non_nullable_res.append(re) - if re.match_nl: - match_nl = 1 - i = i + 1 - self.nullable_res = nullable_res - self.non_nullable_res = non_nullable_res - self.nullable = nullable - self.match_nl = match_nl - - def build_machine(self, m, initial_state, final_state, match_bol, nocase): - for re in self.nullable_res: - re.build_machine(m, initial_state, final_state, match_bol, nocase) - if self.non_nullable_res: - if match_bol: - initial_state = self.build_opt(m, initial_state, BOL) - for re in self.non_nullable_res: - re.build_machine(m, initial_state, final_state, 0, nocase) - - def calc_str(self): - return "Alt(%s)" % string.join(map(str, self.re_list), ",") - - -class Rep1(RE): - """Rep1(re) is an RE which matches one or more repetitions of |re|.""" - - def __init__(self, re): - self.check_re(1, re) - self.re = re - self.nullable = re.nullable - self.match_nl = re.match_nl - - def build_machine(self, m, initial_state, final_state, match_bol, nocase): - s1 = m.new_state() - s2 = m.new_state() - initial_state.link_to(s1) - self.re.build_machine(m, s1, s2, match_bol or self.re.match_nl, nocase) - s2.link_to(s1) - s2.link_to(final_state) - - def calc_str(self): - return "Rep1(%s)" % self.re - - -class SwitchCase(RE): - """ - SwitchCase(re, nocase) is an RE which matches the same strings as RE, - but treating upper and lower case letters according to |nocase|. If - |nocase| is true, case is ignored, otherwise it is not. - """ - re = None - nocase = None - - def __init__(self, re, nocase): - self.re = re - self.nocase = nocase - self.nullable = re.nullable - self.match_nl = re.match_nl - - def build_machine(self, m, initial_state, final_state, match_bol, nocase): - self.re.build_machine(m, initial_state, final_state, match_bol, - self.nocase) - - def calc_str(self): - if self.nocase: - name = "NoCase" - else: - name = "Case" - return "%s(%s)" % (name, self.re) - -# -# Composite RE constructors -# ------------------------- -# -# These REs are defined in terms of the primitive REs. -# - -Empty = Seq() -Empty.__doc__ = \ - """ - Empty is an RE which matches the empty string. - """ -Empty.str = "Empty" - -def Str1(s): - """ - Str1(s) is an RE which matches the literal string |s|. - """ - result = apply(Seq, tuple(map(Char, s))) - result.str = "Str(%s)" % repr(s) - return result - -def Str(*strs): - """ - Str(s) is an RE which matches the literal string |s|. - Str(s1, s2, s3, ...) is an RE which matches any of |s1| or |s2| or |s3|... - """ - if len(strs) == 1: - return Str1(strs[0]) - else: - result = apply(Alt, tuple(map(Str1, strs))) - result.str = "Str(%s)" % string.join(map(repr, strs), ",") - return result - -def Any(s): - """ - Any(s) is an RE which matches any character in the string |s|. - """ - #result = apply(Alt, tuple(map(Char, s))) - result = CodeRanges(chars_to_ranges(s)) - result.str = "Any(%s)" % repr(s) - return result - -def AnyBut(s): - """ - AnyBut(s) is an RE which matches any character (including - newline) which is not in the string |s|. - """ - ranges = chars_to_ranges(s) - ranges.insert(0, -maxint) - ranges.append(maxint) - result = CodeRanges(ranges) - result.str = "AnyBut(%s)" % repr(s) - return result - -AnyChar = AnyBut("") -AnyChar.__doc__ = \ - """ - AnyChar is an RE which matches any single character (including a newline). - """ -AnyChar.str = "AnyChar" - -def Range(s1, s2 = None): - """ - Range(c1, c2) is an RE which matches any single character in the range - |c1| to |c2| inclusive. - Range(s) where |s| is a string of even length is an RE which matches - any single character in the ranges |s[0]| to |s[1]|, |s[2]| to |s[3]|,... - """ - if s2: - result = CodeRange(ord(s1), ord(s2) + 1) - result.str = "Range(%s,%s)" % (s1, s2) - else: - ranges = [] - for i in range(0, len(s1), 2): - ranges.append(CodeRange(ord(s1[i]), ord(s1[i+1]) + 1)) - result = apply(Alt, tuple(ranges)) - result.str = "Range(%s)" % repr(s1) - return result - -def Opt(re): - """ - Opt(re) is an RE which matches either |re| or the empty string. - """ - result = Alt(re, Empty) - result.str = "Opt(%s)" % re - return result - -def Rep(re): - """ - Rep(re) is an RE which matches zero or more repetitions of |re|. - """ - result = Opt(Rep1(re)) - result.str = "Rep(%s)" % re - return result - -def NoCase(re): - """ - NoCase(re) is an RE which matches the same strings as RE, but treating - upper and lower case letters as equivalent. - """ - return SwitchCase(re, nocase = 1) - -def Case(re): - """ - Case(re) is an RE which matches the same strings as RE, but treating - upper and lower case letters as distinct, i.e. it cancels the effect - of any enclosing NoCase(). - """ - return SwitchCase(re, nocase = 0) - -# -# RE Constants -# - -Bol = Char(BOL) -Bol.__doc__ = \ - """ - Bol is an RE which matches the beginning of a line. - """ -Bol.str = "Bol" - -Eol = Char(EOL) -Eol.__doc__ = \ - """ - Eol is an RE which matches the end of a line. - """ -Eol.str = "Eol" - -Eof = Char(EOF) -Eof.__doc__ = \ - """ - Eof is an RE which matches the end of the file. - """ -Eof.str = "Eof" - diff --git a/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Scanners.py b/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Scanners.py deleted file mode 100755 index 6278d88b..00000000 --- a/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Scanners.py +++ /dev/null @@ -1,377 +0,0 @@ -#======================================================================= -# -# Python Lexical Analyser -# -# -# Scanning an input stream -# -#======================================================================= - -import Errors -from Regexps import BOL, EOL, EOF - -class Scanner: - """ - A Scanner is used to read tokens from a stream of characters - using the token set specified by a Plex.Lexicon. - - Constructor: - - Scanner(lexicon, stream, name = '') - - See the docstring of the __init__ method for details. - - Methods: - - See the docstrings of the individual methods for more - information. - - read() --> (value, text) - Reads the next lexical token from the stream. - - position() --> (name, line, col) - Returns the position of the last token read using the - read() method. - - begin(state_name) - Causes scanner to change state. - - produce(value [, text]) - Causes return of a token value to the caller of the - Scanner. - - """ - - lexicon = None # Lexicon - stream = None # file-like object - name = '' - buffer = '' - buf_start_pos = 0 # position in input of start of buffer - next_pos = 0 # position in input of next char to read - cur_pos = 0 # position in input of current char - cur_line = 1 # line number of current char - cur_line_start = 0 # position in input of start of current line - start_pos = 0 # position in input of start of token - start_line = 0 # line number of start of token - start_col = 0 # position in line of start of token - text = None # text of last token read - initial_state = None # Node - state_name = '' # Name of initial state - queue = None # list of tokens to be returned - trace = 0 - - def __init__(self, lexicon, stream, name = ''): - """ - Scanner(lexicon, stream, name = '') - - |lexicon| is a Plex.Lexicon instance specifying the lexical tokens - to be recognised. - - |stream| can be a file object or anything which implements a - compatible read() method. - - |name| is optional, and may be the name of the file being - scanned or any other identifying string. - """ - self.lexicon = lexicon - self.stream = stream - self.name = name - self.queue = [] - self.initial_state = None - self.begin('') - self.next_pos = 0 - self.cur_pos = 0 - self.cur_line_start = 0 - self.cur_char = BOL - self.input_state = 1 - - def read(self): - """ - Read the next lexical token from the stream and return a - tuple (value, text), where |value| is the value associated with - the token as specified by the Lexicon, and |text| is the actual - string read from the stream. Returns (None, '') on end of file. - """ - queue = self.queue - while not queue: - self.text, action = self.scan_a_token() - if action is None: - self.produce(None) - self.eof() - else: - value = action.perform(self, self.text) - if value is not None: - self.produce(value) - result = queue[0] - del queue[0] - return result - - def scan_a_token(self): - """ - Read the next input sequence recognised by the machine - and return (text, action). Returns ('', None) on end of - file. - """ - self.start_pos = self.cur_pos - self.start_line = self.cur_line - self.start_col = self.cur_pos - self.cur_line_start -# if self.trace: -# action = self.run_machine() -# else: -# action = self.run_machine_inlined() - action = self.run_machine_inlined() - if action: - if self.trace: - print "Scanner: read: Performing", action, "%d:%d" % ( - self.start_pos, self.cur_pos) - base = self.buf_start_pos - text = self.buffer[self.start_pos - base : self.cur_pos - base] - return (text, action) - else: - if self.cur_pos == self.start_pos: - if self.cur_char == EOL: - self.next_char() - if not self.cur_char or self.cur_char == EOF: - return ('', None) - raise Errors.UnrecognizedInput(self, self.state_name) - - def run_machine(self): - """ - Run the machine until no more transitions are possible. - """ - self.state = self.initial_state - self.backup_state = None - while self.transition(): - pass - return self.back_up() - - def run_machine_inlined(self): - """ - Inlined version of run_machine for speed. - """ - state = self.initial_state - cur_pos = self.cur_pos - cur_line = self.cur_line - cur_line_start = self.cur_line_start - cur_char = self.cur_char - input_state = self.input_state - next_pos = self.next_pos - buffer = self.buffer - buf_start_pos = self.buf_start_pos - buf_len = len(buffer) - backup_state = None - trace = self.trace - while 1: - if trace: #TRACE# - print "State %d, %d/%d:%s -->" % ( #TRACE# - state['number'], input_state, cur_pos, repr(cur_char)), #TRACE# - # Begin inlined self.save_for_backup() - #action = state.action #@slow - action = state['action'] #@fast - if action: - backup_state = ( - action, cur_pos, cur_line, cur_line_start, cur_char, input_state, next_pos) - # End inlined self.save_for_backup() - c = cur_char - #new_state = state.new_state(c) #@slow - new_state = state.get(c, -1) #@fast - if new_state == -1: #@fast - new_state = c and state.get('else') #@fast - if new_state: - if trace: #TRACE# - print "State %d" % new_state['number'] #TRACE# - state = new_state - # Begin inlined: self.next_char() - if input_state == 1: - cur_pos = next_pos - # Begin inlined: c = self.read_char() - buf_index = next_pos - buf_start_pos - if buf_index < buf_len: - c = buffer[buf_index] - next_pos = next_pos + 1 - else: - discard = self.start_pos - buf_start_pos - data = self.stream.read(0x1000) - buffer = self.buffer[discard:] + data - self.buffer = buffer - buf_start_pos = buf_start_pos + discard - self.buf_start_pos = buf_start_pos - buf_len = len(buffer) - buf_index = buf_index - discard - if data: - c = buffer[buf_index] - next_pos = next_pos + 1 - else: - c = '' - # End inlined: c = self.read_char() - if c == '\n': - cur_char = EOL - input_state = 2 - elif not c: - cur_char = EOL - input_state = 4 - else: - cur_char = c - elif input_state == 2: - cur_char = '\n' - input_state = 3 - elif input_state == 3: - cur_line = cur_line + 1 - cur_line_start = cur_pos = next_pos - cur_char = BOL - input_state = 1 - elif input_state == 4: - cur_char = EOF - input_state = 5 - else: # input_state = 5 - cur_char = '' - # End inlined self.next_char() - else: # not new_state - if trace: #TRACE# - print "blocked" #TRACE# - # Begin inlined: action = self.back_up() - if backup_state: - (action, cur_pos, cur_line, cur_line_start, - cur_char, input_state, next_pos) = backup_state - else: - action = None - break # while 1 - # End inlined: action = self.back_up() - self.cur_pos = cur_pos - self.cur_line = cur_line - self.cur_line_start = cur_line_start - self.cur_char = cur_char - self.input_state = input_state - self.next_pos = next_pos - if trace: #TRACE# - if action: #TRACE# - print "Doing", action #TRACE# - return action - -# def transition(self): -# self.save_for_backup() -# c = self.cur_char -# new_state = self.state.new_state(c) -# if new_state: -# if self.trace: -# print "Scanner: read: State %d: %s --> State %d" % ( -# self.state.number, repr(c), new_state.number) -# self.state = new_state -# self.next_char() -# return 1 -# else: -# if self.trace: -# print "Scanner: read: State %d: %s --> blocked" % ( -# self.state.number, repr(c)) -# return 0 - -# def save_for_backup(self): -# action = self.state.get_action() -# if action: -# if self.trace: -# print "Scanner: read: Saving backup point at", self.cur_pos -# self.backup_state = ( -# action, self.cur_pos, self.cur_line, self.cur_line_start, -# self.cur_char, self.input_state, self.next_pos) - -# def back_up(self): -# backup_state = self.backup_state -# if backup_state: -# (action, self.cur_pos, self.cur_line, self.cur_line_start, -# self.cur_char, self.input_state, self.next_pos) = backup_state -# if self.trace: -# print "Scanner: read: Backing up to", self.cur_pos -# return action -# else: -# return None - - def next_char(self): - input_state = self.input_state - if self.trace: - print "Scanner: next:", " "*20, "[%d] %d" % (input_state, self.cur_pos), - if input_state == 1: - self.cur_pos = self.next_pos - c = self.read_char() - if c == '\n': - self.cur_char = EOL - self.input_state = 2 - elif not c: - self.cur_char = EOL - self.input_state = 4 - else: - self.cur_char = c - elif input_state == 2: - self.cur_char = '\n' - self.input_state = 3 - elif input_state == 3: - self.cur_line = self.cur_line + 1 - self.cur_line_start = self.cur_pos = self.next_pos - self.cur_char = BOL - self.input_state = 1 - elif input_state == 4: - self.cur_char = EOF - self.input_state = 5 - else: # input_state = 5 - self.cur_char = '' - if self.trace: - print "--> [%d] %d %s" % (input_state, self.cur_pos, repr(self.cur_char)) - -# def read_char(self): -# """ -# Get the next input character, filling the buffer if necessary. -# Returns '' at end of file. -# """ -# next_pos = self.next_pos -# buf_index = next_pos - self.buf_start_pos -# if buf_index == len(self.buffer): -# discard = self.start_pos - self.buf_start_pos -# data = self.stream.read(0x1000) -# self.buffer = self.buffer[discard:] + data -# self.buf_start_pos = self.buf_start_pos + discard -# buf_index = buf_index - discard -# if not data: -# return '' -# c = self.buffer[buf_index] -# self.next_pos = next_pos + 1 -# return c - - def position(self): - """ - Return a tuple (name, line, col) representing the location of - the last token read using the read() method. |name| is the - name that was provided to the Scanner constructor; |line| - is the line number in the stream (1-based); |col| is the - position within the line of the first character of the token - (0-based). - """ - return (self.name, self.start_line, self.start_col) - - def begin(self, state_name): - """Set the current state of the scanner to the named state.""" - self.initial_state = ( - self.lexicon.get_initial_state(state_name)) - self.state_name = state_name - - def produce(self, value, text = None): - """ - Called from an action procedure, causes |value| to be returned - as the token value from read(). If |text| is supplied, it is - returned in place of the scanned text. - - produce() can be called more than once during a single call to an action - procedure, in which case the tokens are queued up and returned one - at a time by subsequent calls to read(), until the queue is empty, - whereupon scanning resumes. - """ - if text is None: - text = self.text - self.queue.append((value, text)) - - def eof(self): - """ - Override this method if you want something to be done at - end of file. - """ - -# For backward compatibility: -setattr(Scanner, "yield", Scanner.produce) diff --git a/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Timing.py b/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Timing.py deleted file mode 100755 index f47c5c89..00000000 --- a/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Timing.py +++ /dev/null @@ -1,22 +0,0 @@ -# -# Get time in platform-dependent way -# - -import os -from sys import platform, exit, stderr - -if platform == 'mac': - import MacOS - def time(): - return MacOS.GetTicks() / 60.0 - timekind = "real" -elif hasattr(os, 'times'): - def time(): - t = os.times() - return t[0] + t[1] - timekind = "cpu" -else: - stderr.write( - "Don't know how to get time on platform %s\n" % repr(platform)) - exit(1) - diff --git a/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Traditional.py b/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Traditional.py deleted file mode 100755 index b3148c1e..00000000 --- a/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Traditional.py +++ /dev/null @@ -1,154 +0,0 @@ -#======================================================================= -# -# Python Lexical Analyser -# -# Traditional Regular Expression Syntax -# -#======================================================================= - -from Regexps import * -from Errors import PlexError - -class RegexpSyntaxError(PlexError): - pass - -def re(s): - """ - Convert traditional string representation of regular expression |s| - into Plex representation. - """ - return REParser(s).parse_re() - -class REParser: - - def __init__(self, s): - self.s = s - self.i = -1 - self.end = 0 - self.next() - - def parse_re(self): - re = self.parse_alt() - if not self.end: - self.error("Unexpected %s" % repr(self.c)) - return re - - def parse_alt(self): - """Parse a set of alternative regexps.""" - re = self.parse_seq() - if self.c == '|': - re_list = [re] - while self.c == '|': - self.next() - re_list.append(self.parse_seq()) - re = apply(Alt, tuple(re_list)) - return re - - def parse_seq(self): - """Parse a sequence of regexps.""" - re_list = [] - while not self.end and not self.c in "|)": - re_list.append(self.parse_mod()) - return apply(Seq, tuple(re_list)) - - def parse_mod(self): - """Parse a primitive regexp followed by *, +, ? modifiers.""" - re = self.parse_prim() - while not self.end and self.c in "*+?": - if self.c == '*': - re = Rep(re) - elif self.c == '+': - re = Rep1(re) - else: # self.c == '?' - re = Opt(re) - self.next() - return re - - def parse_prim(self): - """Parse a primitive regexp.""" - c = self.get() - if c == '.': - re = AnyBut("\n") - elif c == '^': - re = Bol - elif c == '$': - re = Eol - elif c == '(': - re = self.parse_alt() - self.expect(')') - elif c == '[': - re = self.parse_charset() - self.expect(']') - else: - if c == '\\': - c = self.get() - re = Char(c) - return re - - def parse_charset(self): - """Parse a charset. Does not include the surrounding [].""" - char_list = [] - invert = 0 - if self.c == '^': - invert = 1 - self.next() - if self.c == ']': - char_list.append(']') - self.next() - while not self.end and self.c <> ']': - c1 = self.get() - if self.c == '-' and self.lookahead(1) <> ']': - self.next() - c2 = self.get() - for a in xrange(ord(c1), ord(c2) + 1): - char_list.append(chr(a)) - else: - char_list.append(c1) - chars = string.join(char_list, "") - if invert: - return AnyBut(chars) - else: - return Any(chars) - - def next(self): - """Advance to the next char.""" - s = self.s - i = self.i = self.i + 1 - if i < len(s): - self.c = s[i] - else: - self.c = '' - self.end = 1 - - def get(self): - if self.end: - self.error("Premature end of string") - c = self.c - self.next() - return c - - def lookahead(self, n): - """Look ahead n chars.""" - j = self.i + n - if j < len(self.s): - return self.s[j] - else: - return '' - - def expect(self, c): - """ - Expect to find character |c| at current position. - Raises an exception otherwise. - """ - if self.c == c: - self.next() - else: - self.error("Missing %s" % repr(c)) - - def error(self, mess): - """Raise exception to signal syntax error in regexp.""" - raise RegexpSyntaxError("Syntax error in regexp %s at position %d: %s" % ( - repr(self.s), self.i, mess)) - - - diff --git a/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Transitions.py b/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Transitions.py deleted file mode 100755 index c1edd5ef..00000000 --- a/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/Transitions.py +++ /dev/null @@ -1,253 +0,0 @@ -# -# Plex - Transition Maps -# -# This version represents state sets direcly as dicts -# for speed. -# - -from copy import copy -import string -from sys import maxint -from types import TupleType - -class TransitionMap: - """ - A TransitionMap maps an input event to a set of states. - An input event is one of: a range of character codes, - the empty string (representing an epsilon move), or one - of the special symbols BOL, EOL, EOF. - - For characters, this implementation compactly represents - the map by means of a list: - - [code_0, states_0, code_1, states_1, code_2, states_2, - ..., code_n-1, states_n-1, code_n] - - where |code_i| is a character code, and |states_i| is a - set of states corresponding to characters with codes |c| - in the range |code_i| <= |c| <= |code_i+1|. - - The following invariants hold: - n >= 1 - code_0 == -maxint - code_n == maxint - code_i < code_i+1 for i in 0..n-1 - states_0 == states_n-1 - - Mappings for the special events '', BOL, EOL, EOF are - kept separately in a dictionary. - """ - - map = None # The list of codes and states - special = None # Mapping for special events - - def __init__(self, map = None, special = None): - if not map: - map = [-maxint, {}, maxint] - if not special: - special = {} - self.map = map - self.special = special - #self.check() ### - - def add(self, event, new_state, - TupleType = TupleType): - """ - Add transition to |new_state| on |event|. - """ - if type(event) == TupleType: - code0, code1 = event - i = self.split(code0) - j = self.split(code1) - map = self.map - while i < j: - map[i + 1][new_state] = 1 - i = i + 2 - else: - self.get_special(event)[new_state] = 1 - - def add_set(self, event, new_set, - TupleType = TupleType): - """ - Add transitions to the states in |new_set| on |event|. - """ - if type(event) == TupleType: - code0, code1 = event - i = self.split(code0) - j = self.split(code1) - map = self.map - while i < j: - map[i + 1].update(new_set) - i = i + 2 - else: - self.get_special(event).update(new_set) - - def get_epsilon(self, - none = None): - """ - Return the mapping for epsilon, or None. - """ - return self.special.get('', none) - - def items(self, - len = len): - """ - Return the mapping as a list of ((code1, code2), state_set) and - (special_event, state_set) pairs. - """ - result = [] - map = self.map - else_set = map[1] - i = 0 - n = len(map) - 1 - code0 = map[0] - while i < n: - set = map[i + 1] - code1 = map[i + 2] - if set or else_set: - result.append(((code0, code1), set)) - code0 = code1 - i = i + 2 - for event, set in self.special.items(): - if set: - result.append((event, set)) - return result - - # ------------------- Private methods -------------------- - - def split(self, code, - len = len, maxint = maxint): - """ - Search the list for the position of the split point for |code|, - inserting a new split point if necessary. Returns index |i| such - that |code| == |map[i]|. - """ - # We use a funky variation on binary search. - map = self.map - hi = len(map) - 1 - # Special case: code == map[-1] - if code == maxint: - return hi - # General case - lo = 0 - # loop invariant: map[lo] <= code < map[hi] and hi - lo >= 2 - while hi - lo >= 4: - # Find midpoint truncated to even index - mid = ((lo + hi) / 2) & ~1 - if code < map[mid]: - hi = mid - else: - lo = mid - # map[lo] <= code < map[hi] and hi - lo == 2 - if map[lo] == code: - return lo - else: - map[hi:hi] = [code, map[hi - 1].copy()] - #self.check() ### - return hi - - def get_special(self, event): - """ - Get state set for special event, adding a new entry if necessary. - """ - special = self.special - set = special.get(event, None) - if not set: - set = {} - special[event] = set - return set - - # --------------------- Conversion methods ----------------------- - - def __str__(self): - map_strs = [] - map = self.map - n = len(map) - i = 0 - while i < n: - code = map[i] - if code == -maxint: - code_str = "-inf" - elif code == maxint: - code_str = "inf" - else: - code_str = str(code) - map_strs.append(code_str) - i = i + 1 - if i < n: - map_strs.append(state_set_str(map[i])) - i = i + 1 - special_strs = {} - for event, set in self.special.items(): - special_strs[event] = state_set_str(set) - return "[%s]+%s" % ( - string.join(map_strs, ","), - special_strs - ) - - # --------------------- Debugging methods ----------------------- - - def check(self): - """Check data structure integrity.""" - if not self.map[-3] < self.map[-1]: - print self - assert 0 - - def dump(self, file): - map = self.map - i = 0 - n = len(map) - 1 - while i < n: - self.dump_range(map[i], map[i + 2], map[i + 1], file) - i = i + 2 - for event, set in self.special.items(): - if set: - if not event: - event = 'empty' - self.dump_trans(event, set, file) - - def dump_range(self, code0, code1, set, file): - if set: - if code0 == -maxint: - if code1 == maxint: - k = "any" - else: - k = "< %s" % self.dump_char(code1) - elif code1 == maxint: - k = "> %s" % self.dump_char(code0 - 1) - elif code0 == code1 - 1: - k = self.dump_char(code0) - else: - k = "%s..%s" % (self.dump_char(code0), - self.dump_char(code1 - 1)) - self.dump_trans(k, set, file) - - def dump_char(self, code): - if 0 <= code <= 255: - return repr(chr(code)) - else: - return "chr(%d)" % code - - def dump_trans(self, key, set, file): - file.write(" %s --> %s\n" % (key, self.dump_set(set))) - - def dump_set(self, set): - return state_set_str(set) - -# -# State set manipulation functions -# - -#def merge_state_sets(set1, set2): -# for state in set2.keys(): -# set1[state] = 1 - -def state_set_str(set): - state_list = set.keys() - str_list = [] - for state in state_list: - str_list.append("S%d" % state.number) - return "[%s]" % string.join(str_list, ",") - - - diff --git a/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/__init__.py b/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/__init__.py deleted file mode 100755 index 22b9bba3..00000000 --- a/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/__init__.py +++ /dev/null @@ -1,40 +0,0 @@ -#======================================================================= -# -# Python Lexical Analyser -# -#======================================================================= - -""" -The Plex module provides lexical analysers with similar capabilities -to GNU Flex. The following classes and functions are exported; -see the attached docstrings for more information. - - Scanner For scanning a character stream under the - direction of a Lexicon. - - Lexicon For constructing a lexical definition - to be used by a Scanner. - - Str, Any, AnyBut, AnyChar, Seq, Alt, Opt, Rep, Rep1, - Bol, Eol, Eof, Empty - - Regular expression constructors, for building pattern - definitions for a Lexicon. - - State For defining scanner states when creating a - Lexicon. - - TEXT, IGNORE, Begin - - Actions for associating with patterns when - creating a Lexicon. -""" - -from Actions import TEXT, IGNORE, Begin -from Lexicons import Lexicon, State -from Regexps import RE, Seq, Alt, Rep1, Empty, Str, Any, AnyBut, AnyChar, Range -from Regexps import Opt, Rep, Bol, Eol, Eof, Case, NoCase -from Scanners import Scanner - - - diff --git a/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/test_tm.py b/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/test_tm.py deleted file mode 100755 index e08c9e68..00000000 --- a/debian/pyrex/pyrex-0.9.9/Pyrex/Plex/test_tm.py +++ /dev/null @@ -1,24 +0,0 @@ -import sys -sys.stderr = sys.stdout - -from TransitionMaps import TransitionMap - -m = TransitionMap() -print m - -def add(c, s): - print - print "adding", repr(c), "-->", repr(s) - m.add_transition(c, s) - print m - print "keys:", m.keys() - -add('a','alpha') -add('e', 'eta') -add('f', 'foo') -add('i', 'iota') -add('i', 'imp') -add('eol', 'elephant') - - - |
