// myhill.cpp  minimize a Deterministic Finite Automata
//
// Machine definition of a DFA  M = (Q, Sigma, delta, q0, F)
//    where Q is set of states
//          Sigma is input tape alphabet
//          delta is transition table
//          q0 is the starting state
//          F is the set of final states
//
// Input Conventions:
//
// the file name extension is typically  .dfa
// free form plain text input file
// typical execution is   myhill output.file < your_input.dfa > info_output.out
//
// A comment line has "// " as the first three characters
// A line may end with a comment using whitespace "//"
// Anything after whitespace after last expected input is a comment
//
// There are only a few keywords:
//                 start, final, halt, trace, limit, enddef and tape
// any line not starting with these keywords is a transition
// 'enddef' should be used
//
// there is no such thing as a continuation line
//
// A transition is a three tuple:
// from-state  input-symbol  to-state
// 
// special input-symbol #b for the blank character,
//
// at least one transition is required,
// the typical dummy transition is  phi #b phi
//
// Anything after the required field(s) is a comment
//
// Input tape data after enddef is passed on to the output file
//
// Method: Textbook Introduction to Automata Theory, Languages,
//         and Computation by Hopcroft and Ullman, Section 3.4
//
// This is one of a series that also includes automata simulators
// dfa  deterministic finite automata (with epsilon transitions and halt)
// nfa  nondeterministic finite automata
// tm   Turing machine, deterministic (recognizer and function computation)
// ntm  nondeterministic Turing machine (language recognizer only)
//
// compile with  GNU g++  2.8.1 or later    or   Microsoft 6.0 or later
//               g++ -o myhill myhill.cpp   or   cl /GX /ML myhill.cpp
// ANSI/ISO C++
// release JSS 3/12/00 v1.0

#include <iostream>
#include <fstream>
#include <string>
#include <algorithm>
#include <vector>
#include <set>
#include <map>
#include <deque>
using namespace std;

  class transition // an entry in 'delta' transition_table 
  {
    public:
      transition(string from_state,
                 char   input_symbol,
                 string to_state)
                 {from=from_state;
                 input=input_symbol;
                 to=to_state;
                 color=0;}
      static bool smaller(transition a, transition b)
                     { if(a.from<b.from) return true;
                       else if(a.from>b.from) return false;
                       else return a.input<b.input; }
      friend ostream &operator<<(ostream &stream, transition trans);
      string from;
      char   input;
      string to;
      int    color;
  };
    
  class tl // matrix table lists
  {
    public:
      tl(){s='-'; p=0; q=0; next=0;}
      tl(char stat, int from, int to, tl * link)
         {s=stat; p=from; q=to; next=link;}         
      char s;
      int p;
      int q;
      tl *next;
  };
  typedef tl  tlm[100][100]; // limit of 100 states until this made dynamic
  typedef int ndl[100][100];
  
  // functions dealing with marking distinguishable states
  static void chkmark(tlm &M, ndl ndelta, int n_sigma, int p, int q, char x);
  static bool marked(tlm M, int p, int q, char x);
  static void printm(tlm M, set<string> labels);
  static void mark(tlm &M, int p, int q, char x);
  static void marklist(tlm &M, int p, int q, char x);
  static void addlist(tlm &M, int r, int s, int p, int q);
  
int main(int argc, char *argv[]) //  myhill output-file < input-file > info-output
{
  // primary objects
  set<string>  states;         // Q =all named states
  set<string>  from_states;    // source states for checking
  set<string>  to_states;      // target states for checking
  string       start = "";     // q0 = starting state
  set<string>  final;          // F = set of final states
  set<char>    sigma;          // Sigma = set of tape symbols
  set<string>  trace;          // states to trace
  vector<char> tape;           // input tape
  vector<transition> t_table;  // original delta transition table
  vector<transition> delta;    // delta after remove unreachable
  typedef pair<string, int> t_pair; // to make t_index 
  typedef multimap<string, int, less<string> > t_map;
  t_map t_index;               // t_table index
  pair<t_map::iterator, t_map::iterator> p_pair; // for t_index
  deque<string> dfs_que;       // states to search
  set<string> Q;               // Q set of states after remove unreachable
  set<string> Q_F;             // Q-F set of states after remove unreached
  set<string> F;               // F set of final states after remove unreached
  map<string, int, less<string> > s_index; // state to matrix
  tlm M;                       // matrix for marking distinct states
  typedef pair<char, int> sc_pair; // to make sig_index
  typedef map<char, int> s_map;// sigma to integer map type for ndelta
  s_map sig_index;             // sigma to integer map for ndelta
  ndl ndelta;                  // numeric delta for fast marking
  typedef pair<string,string> st_pair; // string mapping pair
  typedef vector<st_pair> st_vect;     // string mapping vector
  ofstream file_out;
  char line[256];              // for copying input to output
  
  // primary iterators  
  vector<transition>::iterator       x;      // for t_table
  int                                tx;     // for t_table
  t_map::iterator                    pt;     // for t_index
  set<string>::iterator              iter;   // for sets
  set<string>::const_iterator        iter1;  // for sets
  set<string>::const_iterator        iter2;  // for sets
  set<char>::const_iterator          iterc;  // for sigma
  
  // control variables
  bool no_minimize = false; // set true by various errors
  string state;             // present state
  int i;                    // loop index
  int p;                    // matrix M or ndelta subscript
  int q;                    // matrix M subscript
  string prev_from = " ";   // for detecting nondeterministic
  char prev_input = ' ';    // for detecting nondeterministic
  bool nondeterministic = false; // use nfa simulator, not this dfa
  int color;                // for DFS
  string next_state;        // for DFS
  
  // input variables
  string keyword;          // for inputting candidate keyword
  string from_state;       // for inputting one from_state
  string input_symbol;     // character extracted later
  string to_state;         // for inputting one to-state
  string final_state;      // for inputting one final or halt state
  string trace_state;      // for inputting one trace_state
  string initial_tape =""; // check for #b (may read more than 1)
  string check;            // check for // that ends data on a line
  string input_str;        // temp string for #]
  bool halt_mode = false;   // flag to pass along
  int limit = 10000;        // limit to pass along
  
  // keywords and special symbols
  string key_start = "start";  // followed by a state
  string key_tape  = "tape";   // followed by a string, may have #b
  string key_final = "final";  // followed by a final state name
  string key_halt  = "halt";   // followed by a final state name, halt mode
  string key_trace = "trace";  // followed by "all" or a state
  string key_limit = "limit";  // followed by an integer
  string key_enddef= "enddef"; // only a "tape" may follow this
  string comm      = "//";     // start comment
  string empty     = "";       // empty string
  char epsilon   = '\0';       // unprintable, #e epsilon
  
  cout << "myhill v1.0 starting" << endl;

  if(argc<2)
  {
    cout << "no output file given, using myhill_temp.dfa" << endl;
    file_out.open("myhill_temp.dfa");
  }
  else
  {
    file_out.open(argv[1]);
    if(!file_out)
    {
      cout << "can not open output file, using myhill_temp.dfa" << endl;
      file_out.open("myhill_temp.dfa");
    }
  }

  while(!cin.eof()) // main input loop
  {
    cin >> keyword;
    if(cin.eof()) break;
    if(keyword==key_start)
    {
      if(start!=empty) cout << "Over writing a previous start state."
                         << endl;
      cin >> start;
      states.insert(start);
      if(start=="//") cout << "start looks like a comment?" << endl;
      cin.ignore(255,'\n');
      continue;
    }
    else if(keyword==key_tape)
    {
      cin >> initial_tape;
      cin.ignore(255,'\n');
      continue;
    }
    else if(keyword==key_final)
    {
      cin >> final_state;
      final.insert(final_state);
      if(final_state=="//") cout << "final looks like a comment?" << endl;
      cin.ignore(255,'\n');
      continue;
    }
    else if(keyword==key_halt)
    {
      halt_mode = true;
      cin >> final_state;
      if(final_state!=comm && final_state!=empty)
      {  // allow stand alone "halt" just to set flag
        final.insert(final_state);
      }
      cin.ignore(255,'\n');
      continue;
    }
    else if(keyword==key_trace)
    {
      cin >> trace_state;
      trace.insert(trace_state);
      if(trace_state=="//") cout << "trace state looks like a comment?" << endl;
      cin.ignore(255,'\n');
      continue;
    }
    else if(keyword==key_limit)
    {
      cin >> limit;
      cin.ignore(255,'\n');
      continue;
    }
    else if(keyword==key_enddef) // stop inputting
    {                            // now may only read "tape"
      cin.ignore(255,'\n');
      break;
    }
    else if(keyword=="//") // skip comment "// "
    {
      cin.ignore(255,'\n');
      continue;
    }
    else
    {
      from_state = keyword;
      states.insert(from_state);
      from_states.insert(from_state);
    }
    cin >> input_symbol; if(cin.eof()) break;
    if(input_symbol=="#b") input_symbol[0]=' ';        // a space
    if(input_symbol=="#e") input_symbol[0]=epsilon;    // epsilon
    
    cin >> to_state;     if(cin.eof()) break;
    states.insert(to_state);
    to_states.insert(to_state);
    sigma.insert(input_symbol[0]);
    cin.ignore(255,'\n'); // delete trailing comments
    t_table.push_back(transition(from_state, input_symbol[0], to_state));
    
  } // end main input loop

  cout << "reading input finished." << endl;

  // print primary data objects, check for errors and warnings
  if(states.size()==0)
  {
    cout << "No states input prevent minimization. Stopping." << endl;
    return 1;
  }

  cout << "start state " << start << endl;

  cout << "states:";
  for(iter=states.begin(); iter!=states.end(); iter++) cout << *iter << " ";
  cout << endl;

  iter = from_states.find(start); // check that start is a legal state
  if(iter==states.end())
  {
    cout << start << " is not a legal starting state." << endl;
    no_minimize=true;
  }
  cout << endl;
  
  cout << "final states:";
  if(final.size()==0)
  {
    cout << "   No final states" << endl;
    no_minimize=true;
  }
  else if(to_states.size()!=0)
  {
    for(iter=final.begin(); iter!=final.end(); iter++) cout << *iter << " ";
    cout << endl;

    for(iter=final.begin(); iter!=final.end(); iter++) // check final states
    {
      iter2 = to_states.find(*iter);
      if(iter2==to_states.end() && (*iter!=start))
      {
        cout << *iter << " is not a valid final state." << endl;
        no_minimize=true;
      }
    }
    cout << endl;
  }

  if(t_table.size()!=0)
  {
    sort(t_table.begin(), t_table.end(), transition::smaller );
    cout << "Sorted original transition table:" << endl;
    for(x=t_table.begin(); x!=t_table.end(); x++)
    {
      cout << *x << endl;
      if(x->from==prev_from && (x->input==prev_input ||
         prev_input==epsilon))
      {
        nondeterministic = true;
        cout << "                            duplicate transition" << endl;
      }
      prev_from=x->from;
      prev_input=x->input;
    }
  }
  else
  { 
    cout << "No transitions (three tuples)" << endl;
    no_minimize=true;
  }

  if(nondeterministic)
  {
    cout << "nondeterministic 'delta' transition table" << endl;
    cout << "either eliminate duplicate transitions or use nfa to dfa"
         << endl;
    return 1;
  }
  if(t_table.size() != (from_states.size()*sigma.size()))
  {
    cout << "not a complete DFA, |sigma|=" << sigma.size()
         << "  |states|=" << from_states.size() << "  but only "
         << t_table.size() << " unique transitions." << endl;
    no_minimize=true;
         
  }
  if(no_minimize)
  {
    cout << "Errors in input prevent minimization. Stopping." << endl;
    return 1;
  }

  // build map for state to transition table index
  for(tx=0; tx<t_table.size(); tx++)
  {
    t_index.insert(t_pair(t_table[tx].from, tx));
  }
  
  state=start;          // state set to front of queue in loop
  dfs_que.push_back(state);
  Q.clear();

  // main minimization steps
  
  // Depth First Search to find reachable states
  while(!dfs_que.empty())
  {
     // pull up next transition

      state = dfs_que.front();
      Q.insert(state);   
      dfs_que.pop_front();
      p_pair = t_index.equal_range(state);
      if(p_pair.first==t_index.end()) continue; // no more transitions on path
      
      for(pt=p_pair.first; pt!=p_pair.second && pt!=t_index.end(); pt++)
      {
        tx = pt->second; // process this group
        next_state = t_table[tx].to;
        color = t_table[tx].color;
        if(color==0 && Q.find(next_state)==Q.end())
        {
          dfs_que.push_back(next_state);
        }
        t_table[tx].color = 1;
     }
  } 

  cout << endl << "reachable states: ";
  for(iter=Q.begin(); iter!=Q.end(); iter++) cout << *iter << " ";
  cout << endl;

  // eliminate unusable transitions
  cout << "remaining transitions:" << endl;
  for(x=t_table.begin(); x!=t_table.end(); x++)
  {
    if(Q.find(x->to)!=Q.end() && Q.find(x->from)!=Q.end())
    {
      cout << *x << endl;
      delta.push_back(*x);
    }
  }
  sort(delta.begin(), delta.end(), transition::smaller );

  // eliminate non reachable final states
  F.clear();
  cout << "remaining final states: ";
  for(iter=final.begin(); iter!=final.end(); iter++)
  {
    if(Q.find(*iter)!=Q.end())
    {
      cout << *iter << " ";
      F.insert(*iter);
    }
  }
  cout << endl;
  if(F.size()<=0)
  {
    cout << "no final states remain, language is phi." << endl;
    return 1;
  }
  
  // compute Q - F
  Q_F.clear();
  set_difference(Q.begin(), Q.end(),
                 F.begin(), F.end(),
                 inserter(Q_F,Q_F.begin()));
  cout << "Q - F : ";
  for(iter=Q_F.begin(); iter!=Q_F.end(); iter++)
  {
    cout << *iter << "  ";
  }
  cout << endl;

  // build map for state to delta index
  t_index.clear();
  for(tx=0; tx<delta.size(); tx++)
  {
    t_index.insert(t_pair(delta[tx].from, tx));
  }

  // build map for state to matrix index and state to ndelta
  iter = Q.begin();
  for(tx=0; tx<Q.size(); tx++)
  {
    s_index.insert(t_pair(*iter, tx));
    iter++;
  }

  // build map for sigma to ndelta second index
  sig_index.clear();
  iterc = sigma.begin();
  for(tx=0; tx<sigma.size(); tx++)
  {
    sig_index.insert(sc_pair(*iterc, tx));
    iterc++;
  }

  // build  ndelta  for fast indexing while marking matrix M
  for(tx=0; tx<delta.size(); tx++)
  {
    p=s_index.find(delta[tx].from)->second;
    q=s_index.find(delta[tx].to)->second;
    i=sig_index.find(delta[tx].input)->second;
    ndelta[p][i]=q;
  }
  
  // initialize matrix M
  for(p=0; p<Q.size(); p++)
    for(q=p+1; q<Q.size(); q++)
      M[p][q]=tl(' ',0,0,0);

  // book P70  step 1)
  for(iter=F.begin(); iter!=F.end(); iter++)
  {
    p=s_index.find(*iter)->second;
    for(iter2=Q_F.begin(); iter2!=Q_F.end(); iter2++)
    {
      q=s_index.find(*iter2)->second;
      mark(M, p, q, 'x');
    }
  }
  cout << "matrix M after step 1)" << endl;
  printm(M, Q);
  
  // check for marking F x F state pairs distinguishable
  for(iter=F.begin(); iter!=F.end(); iter++)
  {
    p=s_index.find(*iter)->second;
    for(iter2=F.begin(); iter2!=F.end(); iter2++)
    {
      q=s_index.find(*iter2)->second;
      if(p>=q) continue; // not p<q
      chkmark(M, ndelta, sigma.size(), p, q, 'x');
    }
  }
  cout << "matrix M after F X F)" << endl;
  printm(M, Q);

  
  // check for marking (Q-F) x (Q-F) state pairs distinguishable
  for(iter=Q_F.begin(); iter!=Q_F.end(); iter++)
  {
    p=s_index.find(*iter)->second;
    for(iter2=Q_F.begin(); iter2!=Q_F.end(); iter2++)
    {
      q=s_index.find(*iter2)->second;
      if(p>=q) continue; // not p<q
      chkmark(M, ndelta, sigma.size(), p, q, 'x');
    }
  }
  cout << "matrix M after Q-F x Q-F)" << endl;
  printm(M, Q);
  
  // For each row in matrix M, a blank mark, converted to 0, 
  // indicates the state for that row is indistinguishable from
  // the column state.
  
  st_vect V(Q.size()); // first is old Q, second is new Q
  i=0;
  for(iter=Q.begin(); iter!=Q.end(); iter++) V[i++]=st_pair(*iter,"");
  p=0;
  for(iter1=Q.begin(); iter1!=Q.end(); iter1++)
  {
    if(V[p].second=="")
    {
      V[p].second=*iter1;
      q=p;
      for(iter2=iter1; iter2!=Q.end(); iter2++)
      {
        if(M[p][q].s==' ') V[p].second=V[p].second+V[q].first;
        q++;
      }
      q=p;
      for(iter2=iter1; iter2!=Q.end(); iter2++)
      {
        if(M[p][q].s==' ') V[q].second=V[p].second;
        q++;
      }
    }
    p++;
  }
  cout << endl;
  cout << "state minimization mapping:" << endl;
  for(i=0; i<Q.size(); i++) cout << V[i].first << "  " << V[i].second << endl;
     
  // build the minimized machine and output
  file_out << "// minimized DFA" << endl;
  if(limit!=10000) file_out << "limit " << limit << endl;
  if(halt_mode) file_out << "halt" << endl;
  for(iter1=trace.begin(); iter1!=trace.end(); iter1++)
      file_out << "trace " << *iter1 << endl;

  file_out << endl;      
  for(p=0; p<Q.size(); p++)
    if(start==V[p].first) start=V[p].second;
  file_out << "start " << start << endl;
  
  final.clear();
  for(iter=F.begin(); iter!=F.end(); iter++)
  {
    for(p=0; p<Q.size(); p++)
      if(*iter==V[p].first) final.insert(V[p].second);
  }
  for(iter=final.begin(); iter!=final.end(); iter++)
    file_out << "final " << *iter << endl;
  
  for(i=0; i<delta.size(); i++)
  {
    for(p=0; p<Q.size(); p++)
    {
      if(delta[i].from==V[p].first) delta[i].from=V[p].second;
      if(delta[i].to==V[p].first) delta[i].to=V[p].second;
    }
  }
  sort(delta.begin(), delta.end(), transition::smaller );

  prev_from="";
  prev_input=' ';
  for(i=0; i<delta.size(); i++)
  {
    if(prev_from==delta[i].from && prev_input==delta[i].input) continue;
    file_out << delta[i] << endl;  
    prev_from=delta[i].from;
    prev_input=delta[i].input;
  }
  file_out << "enddef" << endl;

  while(!cin.eof()) // copy rest of input file after 'enddef'
  {
    cin.getline(line, 255, '\n');
    file_out << line << endl; 
  }
  file_out.close();
  
  cout << endl << "Myhill-Nerode minimization finished" << endl;
  
  return 0;
} // end of main


// functions dealing with marking distinguishable states

static void chkmark(tlm &M, ndl ndelta, int n_sigma, int p, int q, char x)
{
  int r;                    // matrix M subscript (pair 2)
  int s;                    // matrix M subscript (pair 2)
  int i;                    // loop index
  bool found=false;         // found pair
  
  // now find marked(r,s) such that for r=delta(p,a), s=delta(q,a) for some a
  for(i=0; i<n_sigma; i++)
  {
    r=ndelta[p][i];
    s=ndelta[q][i];
    if(r==s) continue;
    found=marked(M, r, s, x);
    if(found) break;
  }
  
  if(found)
  {
    mark(M, p, q, x);
    marklist(M, p, q, x); // recursively and delete
  }
  else // no pair (r,s) is makred
  {
    for(i=0; i<n_sigma; i++)
    {
      r=ndelta[p][i];
      s=ndelta[q][i];
      if(r!=s) addlist(M, r, s, p, q);
    }
  }
}

static void mark(tlm &M, int p, int q, char x)
{
  int pp=p;
  int qq=q;
  
  if(p>q) {qq=p; pp=q;}
  M[pp][qq].s=x;
}

static void marklist(tlm &M, int p, int q, char x)
{ // recursively mark list at (p,q) and delete marked entries
  int pp=p;
  int qq=q;
  int r;
  int s;
  tl *link; // link to more (p,q) pairs from  addlist
  
  if(p>q) {qq=p; pp=q;}
  r=M[pp][qq].p;
  s=M[pp][qq].q;
  if(r==s)return; // no list
  M[pp][qq].p=0; // must really walk list
  M[pp][qq].q=0;
  if(!marked(M, r, s, x))
  {
    mark(M, r, s, x);
    marklist(M, r, s, x);
  }
  link=M[pp][qq].next;
  while(link!=0)
  {
    r=link->p;
    s=link->q;
    if(r==s)return;
    link->p=0;
    link->q=0;
    if(!marked(M, r, s, x))
    {
      mark(M, r, s, x);
      marklist(M, r, s, x);
    }
    link=link->next;
  }
}

static void addlist(tlm &M, int r, int s, int p, int q)
{
  int pp=r;
  int qq=s;
  tl  *link;
  
  if(r>s) {qq=r; pp=s;}
  if(M[pp][qq].p==0 && M[pp][qq].q==0)
  {
    M[pp][qq].p=p;
    M[pp][qq].q=q;
    return;
  }
  else
  {
    if(M[pp][qq].p==p && M[pp][qq].q==q) return;
    if(M[pp][qq].next==0)
    {
      M[pp][qq].next=new tl(' ', p, q, 0);
      return;
    }
    link=M[pp][qq].next;
    while(link!=0)
    {
      if(link->p==0 && link->q==0)
      {
        link->p=p;
        link->q=q;
        return;
      }
      if(link->p==p && link->q==q) return;
      if(link->next==0)
      {
        link->next=new tl(' ', p, q, 0);
        return;
      }
      link=link->next;
    }
  }
}

static bool marked(tlm M, int p, int q, char x)
{
  int pp=p;
  int qq=q;
  
  if(p>q) {qq=p; pp=q;}
  return M[pp][qq].s==x;
}

 
// for outputting objects of class  transition
ostream &operator<<(ostream &stream, transition trans)
{
  string input_token=string(1,trans.input);
  if(trans.input=='\0') input_token=string("#e");
  if(trans.input=='\1') input_token=string("#]");
  if(trans.input=='\2') input_token=string("#[");
  
  stream << trans.from  << '\t' 
         << input_token << '\t'
         << trans.to    << '\t' ;   
  return stream;
}

  
static void printm(tlm M, set<string> labels)
{
  int size=labels.size();
  set<string>::const_iterator i;
  set<string>::const_iterator j;
  int p;
  int q;
  cout << '\t';
  for(j=labels.begin(); j!=labels.end(); j++) cout << *j << '\t';
  cout << endl;
  p=0;
  for(i=labels.begin(); i!=labels.end(); i++)
  {
    cout << *i << '\t';
    q=0;
    for(j=labels.begin(); j!=labels.end(); j++)
    {
      cout << M[p][q].s << '\t';
      q++;
    }
    cout << endl;
    p++;
  }
  cout << endl;
}   // end print matrix

// end myhill.cpp

