// cyk_eliminate.cpp  Eliminate useless variables, epsilon productions
//                    and unit productions
//                    (1a) variables that can never be terminals
//                    (1b) variables that can not be reached from S
//                    (2)  eliminate epsilon productions
//                    (3)  eliminate unit productions, A -> B

#include "cykp.h"

void cyk_eliminate(var_type &V, ter_type &T, prod_type &P, variable &S)
{
  // (1a) p89, lemma 4.1, Fig 4.7
  
  var_type                 newv;   // initially phi
  var_type                 temp;   // just for verbose printout
  var_type                 NV;     // nullable variables
  int                      nvar;   // number nullable in production
  int                      ivar;   // combinations of nullable
  int                      kvar;   // index of nullable
  prod_type::iterator      p_iter; // new values will be returned
  prod_type::iterator      p2iter; // loop in loop
  ter_type::const_iterator t_iter; // not going to change
  alpha::iterator          a_iter; // right side of production
  int                      n_newv; // number of items in newv, test oldv=newv
  int                      n_newt; // number of items in newt
  var_type::const_iterator v_iter; // for verbose printout
  prod_type                P2;     // temporary holder of productions
  bool                   nullable; // flag in loop
  production               tp(""); // a temporary production
  string epsilon = "#e";           // represents epsilon in a rule

  if(verbose>3) cout << "cyk_eliminate 1a)" << endl;
  for(p_iter=P.begin(); p_iter!=P.end(); p_iter++)  // 2)
  {
    p_iter->color = 0; // flag to delete production not set to 2 by end
    for(a_iter=p_iter->w.begin(); a_iter!=p_iter->w.end(); /* a_iter++ */ )
    {
      if(T.find(*a_iter)==T.end() && *a_iter!=epsilon) break;
      if(++a_iter==p_iter->w.end()) newv.insert(p_iter->V);
    }
  }   // end 2)
  if(verbose>3)
  {
    cout << "newv= ";
    for(v_iter=newv.begin(); v_iter!=newv.end(); v_iter++)
        cout << *v_iter << "  ";
    cout << endl;
  } 

  if(verbose>3) cout << "cyk_eliminate 1a) n" << endl;
  n_newv = 0;
  while(n_newv != newv.size())  // 3)
  {
    n_newv = newv.size();       // 4)
    for(p_iter=P.begin(); p_iter!=P.end(); p_iter++)
    {                                                        // 5)
      for(a_iter=p_iter->w.begin(); a_iter!=p_iter->w.end(); /* a_iter++ */ )
      {
        if(T.find(*a_iter)==T.end() && newv.find(*a_iter)==newv.end()) break;
        if(++a_iter==p_iter->w.end()) newv.insert(p_iter->V);
      }
    }
  }
  if(verbose>3)
  {
    cout << "newv= ";
    for(v_iter=newv.begin(); v_iter!=newv.end(); v_iter++)
        cout << *v_iter << "  ";
    cout << endl;
  } 
  
  set_difference(V.begin(), V.end(), newv.begin(), newv.end(),
                 inserter(temp, temp.begin()));
  if(verbose>3)
  {
    cout << "eliminate temp= ";
    for(v_iter=temp.begin(); v_iter!=temp.end(); v_iter++)
        cout << *v_iter << "  ";
    cout << endl;
  } 


  if(temp.size()>0)
  {
    if(verbose>=2) cout << "lemma 4.1 variables removed: ";
    for(v_iter=temp.begin(); v_iter!=temp.end(); v_iter++)
    {
      if(verbose>=2) cout << *v_iter << " ";
      for(p_iter=P.begin(); p_iter!=P.end(); p_iter++)
      {
        if(*v_iter == p_iter->V)
        {
          p_iter->color = 1; // mark for delete V -> for V in temp
          continue;
        }
        for(a_iter=p_iter->w.begin(); a_iter!=p_iter->w.end(); a_iter++)
        {
          if(*v_iter == *a_iter)
          {
            p_iter->color = 1; // mark for delete V -> ... A ...; A in temp
            break;
          }
        }
      }
    }
    if(verbose>=2) cout << endl;
  }

  
  V = newv; // 6)
  if(verbose>3) cout << "cyk_eliminate 1b)" << endl;  
  // (1b) page 89, lemma 4.2 eliminate productions, terminals and variables
  //               that can not be sentential forms (reached from S)

  newv.clear();
  newv.insert(S); // new variables
  n_newv = 0;
  n_newt = 0;
  ter_type newt;  // new terminals

  while(n_newv != newv.size() || n_newt != newt.size())
  {
    n_newv = newv.size();
    n_newt = newt.size();
    for(p_iter=P.begin(); p_iter!=P.end(); p_iter++) // loop through productions
    {
      if(p_iter->color > 0) continue; // already processed or deleted
      if(newv.find(p_iter->V)!=newv.end())
      {
        p_iter->color = 2;
        for(a_iter=p_iter->w.begin(); a_iter!=p_iter->w.end(); a_iter++)
        {
          if(T.find(*a_iter)!=T.end()) newt.insert(*a_iter);
          if(V.find(*a_iter)!=V.end()) newv.insert(*a_iter);
        }
      }
    }  // end loop through productions
  }  // end while loop
  if(verbose>3)
  {
    cout << "newv= ";
    for(v_iter=newv.begin(); v_iter!=newv.end(); v_iter++)
        cout << *v_iter << "  ";
    cout << endl;
    cout << "newt= ";
    for(v_iter=newt.begin(); v_iter!=newt.end(); v_iter++)
        cout << *v_iter << "  ";
    cout << endl;
  } 

  if(verbose>=2)
  {
    temp.clear();
    set_difference(V.begin(), V.end(), newv.begin(), newv.end(),
                   inserter(temp, temp.begin()));
    if(temp.size()>0)
    {
      cout << "lemma 4.2 variables removed: ";
      for(v_iter=temp.begin(); v_iter!=temp.end(); v_iter++)
      {
        cout << *v_iter << " ";
      }
      cout << endl;
    }
    temp.clear();
    set_difference(T.begin(), T.end(), newt.begin(), newt.end(),
                   inserter(temp, temp.begin()));
    if(temp.size()>0)
    {
      cout << "lemma 4.2 terminals removed: ";
      for(v_iter=temp.begin(); v_iter!=temp.end(); v_iter++)
      {
        cout << *v_iter << " ";
      }
      cout << endl;
    }
  }  // end if(verbose>=2)
  
  V = newv;
  T = newt;
  for(p_iter=P.begin(); p_iter!=P.end(); p_iter++)  // 2)
  {
    if(p_iter->color==2)
    {
      P2.push_back(*p_iter); // productions to keep
    }
    else
    { 
      if(verbose>=2)
      {
        cout << "deleting useless production:" << endl;
        cout << p_iter->V << " -> " ;
        for(a_iter=p_iter->w.begin(); a_iter!=p_iter->w.end(); a_iter++)
        {
          cout << *a_iter << " " ;
        }
        cout << endl;
      }
    }
  } // end lemma 4.2
  P = P2; // update P from P2
  
  if(verbose>3) cout << "cyk_eliminate 2)" << endl;
  // (2) p 90, Theorem 4.3  eliminate epsilon productions

  if(verbose>=3) cout << "Eliminate epslion Productions" << endl;
  
  // remove S -> #e and flag accept_null for parser
  // eliminate #e from other productions, expand combinations

  NV.clear();
  n_newv = -1;
  while(NV.size() != n_newv)
  {
    n_newv = NV.size();
    for(p_iter=P.begin(); p_iter!=P.end(); p_iter++)  // 2)
    {
      p_iter->color = 0; // flag to delete production not zero by end
      if(p_iter->w.size() == 1 && p_iter->w[0] == "#e")
      {
        p_iter->color = 1;
        NV.insert(p_iter->V);
        if(verbose>=3) cout << "nullable variable: " << p_iter->V << endl;
        continue;
      }
      nullable = true;
      for(a_iter=p_iter->w.begin(); a_iter!=p_iter->w.end(); a_iter++)
      {
        if(*a_iter=="#e")
        {
          p_iter->color = 1; // bad production
          cout << "deleting bad epsilon production:" << endl;
          cout << p_iter->V << " -> " ;
          for(a_iter=p_iter->w.begin(); a_iter!=p_iter->w.end(); a_iter++)
          {
            cout << *a_iter << " " ;
          }
          cout << endl;
          break;
        }
        if(NV.find(*a_iter) == NV.end()) // not nullable
        {
          nullable = false;
          break;
        }
      }    // end a_iter loop through right hand side
      if(nullable)
      {
        p_iter->color = 1;
        NV.insert(p_iter->V);
        if(verbose>=3) cout << "nullable variable: " << p_iter->V << endl;
      }
    }   // end of loop through productions
  }   // end of while loop finding nullable variables
  
  if(NV.find(S)!=NV.end()) accept_null = true; // flag set for parser

  P2.clear();
  for(p_iter=P.begin(); p_iter!=P.end(); p_iter++)
  {
    if(p_iter->color==0)
    {
      P2.push_back(*p_iter); // productions to keep
    }
    else
    { 
      if(verbose>=2)
      {
        cout << "deleting epsilon production:" << endl;
        cout << p_iter->V << " -> " ;
        for(a_iter=p_iter->w.begin(); a_iter!=p_iter->w.end(); a_iter++)
        {
          cout << *a_iter << " " ;
        }
        cout << endl;
      }
    }
  } // end lemma 4.3
  P = P2; // update P from P2
  
  // BUT, still have to generate ALL combinations of rules with
  //      nullable variables !!!
  for(p_iter=P.begin(); p_iter!=P.end(); p_iter++)
  {
    // count nullable variables (actually 2^nullable)
    nvar = 1;
    for(a_iter=p_iter->w.begin(); a_iter!=p_iter->w.end(); a_iter++)
    {
      if(NV.find(*a_iter)!=NV.end()) nvar=nvar+nvar; // double it.
    }
    if(nvar==1) continue;
    nvar--; // not keep all
    for(ivar=0; ivar<nvar; ivar++) // all combinations of nullable
    {
      tp=production(p_iter->V);
      kvar=0;
      for(a_iter=p_iter->w.begin(); a_iter!=p_iter->w.end(); a_iter++)
      {
        if(NV.find(*a_iter)!=NV.end())
        {
          if(((ivar>>kvar)%2)==1) tp.w.push_back(*a_iter);
          kvar++;        
        }
        else
        {
          tp.w.push_back(*a_iter);
        }
      }

      if(verbose>=2)
      {
        cout << "adding with no #e:" << ivar << endl;
        cout << tp.V << " -> " ;
        for(a_iter=tp.w.begin(); a_iter!=tp.w.end(); a_iter++)
        {
          cout << *a_iter << " " ;
        }
        cout << endl;
      }
      P2.push_back(tp);
    }
  } // end eliminate epsilon productions
  P = P2; // update P from P2
  
  if(verbose>3) cout << "cyk_eliminate 3)" << endl;
  // (3) p 91, Theorem 4.4  eliminate unit productions

  if(verbose>=3) cout << "Eliminate Unit Productions" << endl; 
  variable v_keep;
  variable v_repl;
  bool found = true ;
  int n_unit = 1;
  int i_unit = 0;

  for(p_iter=P.begin(); p_iter!=P.end(); p_iter++) p_iter->color = 0;
  while(found && i_unit<n_unit)
  {
    found = false;
    i_unit++;
    n_unit=0;
    P2.clear();
    for(p_iter=P.begin(); p_iter!=P.end(); p_iter++) // loop through production
    {
      if(p_iter->w.size()==1 && T.find(p_iter->w[0])==T.end() &&
         p_iter->w[0]!=epsilon) // "#e"
      { 
        // have unit production
        v_keep = p_iter->V;
        v_repl = p_iter->w[0];
        p_iter->color = 1; // mark erase unit production
        n_unit++;
        if(v_keep==v_repl) continue; // A -> A  just delete
        for(p2iter=P.begin(); p2iter!=P.end(); p2iter++) // copy/substitute
        {
          if(p2iter->V == v_repl && 
               (p2iter->w.size()!=1 || V.find(p2iter->w[0])==V.end() ))
	  {
            tp = *p2iter;
            tp.V = v_keep;
            P2.push_back(tp);
            found = true;
          }
        }
      }
    }
    if(found)
    {
      for(p_iter=P.begin(); p_iter!=P.end(); p_iter++) P2.push_back(*p_iter);
      P = P2;
    }
  } // end eliminate unit productions

  // sort, remove duplicate rules and deleted rules

  P2.clear();
  production p1("");
  sort(P.begin(), P.end(), production::smaller );
  for(p_iter=P.begin(); p_iter!=P.end(); p_iter++)
  {
    // test for duplicate and skip
    if((p1 == *p_iter) || (p_iter->color==1)) continue;
    p1 = *p_iter;
    P2.push_back(p1); // keep production
  }
  P = P2;
  sort(P.begin(), P.end(), production::smaller );
  
  if(verbose>=2)
  {
    cout << "after eliminate, Variables:" << endl;
    for(v_iter=V.begin(); v_iter!=V.end(); v_iter++) cout << *v_iter << " ";
    cout << endl;

    cout << "after eliminate, Terminals:" << endl;
    for(t_iter=T.begin(); t_iter!=T.end(); t_iter++) cout << *t_iter << " ";
    cout << endl;

    cout << "after eliminate, sorted productions:" << endl;
    for(p_iter=P.begin(); p_iter!=P.end(); p_iter++)
    {
      cout << p_iter->V << " -> " ;
      for(a_iter=p_iter->w.begin(); a_iter!=p_iter->w.end(); a_iter++)
      {
        cout << *a_iter << " " ;
      }
      cout << endl;
    }
  } // end verbose>=2

  return;
} // end cyk_eliminate

