import enum from collections import namedtuple from regex import lit, concat, bar, star NFA = namedtuple('NFA', ['start', 'accept', 'transitions']) def copy_nfa(nfa): transitions_copy = {} for from_state in nfa.transitions: transitions_copy[from_state] = nfa.transitions[from_state].copy() return NFA(nfa.start, nfa.accept, transitions_copy) def remove_states(nfa): start, accept, transitions = nfa states = transitions.keys() states_to_remove = [i for i in states if i != start and i not in accept] while len(states_to_remove) > 0: # Select a state to remove this round removed_state = states_to_remove.pop() print('\nRemoving state:', removed_state)#debg # Remove loops from this state back into itself if removed_state in transitions[removed_state]: loop_condition = transitions[removed_state][removed_state] del transitions[removed_state][removed_state] # Prepend (condition)* to all transitions leading out # of this state for to_state in transitions[removed_state]: condition = transitions[removed_state][to_state] transitions[removed_state][to_state] = concat(star(loop_condition), condition) print(); prettyprint(nfa)#debg # Rewrite all transitions A→this→B as A→B transitions # # If the condition A→this is foo and this→B is bar, the # condition for A→B becomes simply foobar # # Since we've removed all loops back into this state, this # results in there being no transitions into this state for from_state in transitions: if removed_state in transitions[from_state]: # Create a list of new transitions to add to the # transition table for from_state new_transitions = {} condition_to_here = transitions[from_state][removed_state] for to_state in transitions[removed_state]: condition_from_here = transitions[removed_state][to_state] new_transitions[to_state] = concat(condition_to_here, condition_from_here) # Remove the transition to the state being deleted del transitions[from_state][removed_state] # Add the new transitions # Since they may lead to the same place as # already-existing transitions, we may need to # combine the conditions with pre-existing ones for to_state in new_transitions: if to_state in transitions[from_state]: # Already a transition leading # to the same state # If its condition is foo and # ours is bar, then the new # condition will be foo|bar other_condition = transitions[from_state][to_state] our_condition = new_transitions[to_state] transitions[from_state][to_state] = bar(other_condition, our_condition) else: # No pre-existing transition transitions[from_state][to_state] = new_transitions[to_state] # Finally, remove the state we no longer need del transitions[removed_state] print(); prettyprint(nfa)#debg return NFA(start, accept, transitions) def to_regex(nfa): # Rewrite the NFA so that there are no transitions leading in to the # start state or any leading out of an accept state. The easy way to # do this is by creating a new start state that leads to the old one # with empty condition (i.e. it consumes no input), and creating a new # accept state that has similar empty condition transitions from all # the old ones. Since we have an NFA and not a DFA, that operation is # safe # # As a bonus, this rewrite gives us two useful properties: # a) There is exactly one start state and one accept state # b) After running remove_state() there will be only one transition, # that of start to accept # # S class _(enum.Enum): start, end = range(2) start, accept, transitions = copy_nfa(nfa) # Add new start state transitions[_.start] = {start: lit('')} # Add new accept state and transitions to it transitions[_.end] = {} for state in accept: transitions[state][_.end] = lit('') # Package everything into a new NFA nfa = NFA(_.start, [_.end], transitions) print();prettyprint(nfa)#debg processed = remove_states(nfa) return processed.transitions[_.start][_.end] def prettyprint(nfa): def process_state(state): nonlocal start, accept t = '' if state == start: # Bold t += '\x1b[1m' if state in accept: # Green t += '\x1b[32m' if t != '': return t + str(state) + '\x1b[0m' else: return str(state) start, accept, transitions = nfa states = transitions.keys() print('\t' + '\t'.join(map(process_state, states))) for from_state in states: t = [] for to_state in states: if to_state in transitions[from_state]: t.append(str(transitions[from_state][to_state])) else: t.append('\x1b[90m-\x1b[0m') print(process_state(from_state) + '\t' + '\t'.join(t)) def main(): nfa = NFA('start', ['end'], { 'start': {'0': lit('s')}, '0': {'0': lit('0'), '1': lit('1'), 'end': lit('e'), 'start': lit('r')}, '1': {'0': lit('1'), '1': lit('0'), 'start': lit('r')}, 'end': {'end': lit('e'), 'start': lit('n')} }) prettyprint(nfa) print(to_regex(nfa)) if __name__ == '__main__': main()