// cyk_chomsky.cpp  CYK reduce grammar to Chomsky Normal Form
//                  reduce all productions (rules) to on of two forms:
//                  variable -> terminal
//                  variable -> variable variable  (exactly two variables)
//                  book Section 4.5 p 92 

#include "cykp.h"

// verbose is global amount to print

void printp(production p)
{
  alpha::const_iterator a_iter;
  cout << p.V << " ->";
  for(a_iter=p.w.begin(); a_iter!=p.w.end(); a_iter++)
  {
    cout << "  " << *a_iter;
  }
  cout << endl;
}

void cyk_chomsky(var_type &V, ter_type &T, prod_type &P, variable &S)
{
  prod_type::iterator      p_iter; // new values will be returned
  ter_type::const_iterator t_iter; // not going to change
  alpha::iterator          a_iter; // right side of production
  var_type::const_iterator v_iter; // for verbose printout
  variable                 v_temp; // constructed variable
  production               p1(""); // for building a production
  prod_type                PT;     // temporary productions
  prod_type                P2;     // temporary productions
  int                      nvar;   // number of variables in production
  int                      i;      // loop variable
  
  // for any rule with |w| >= 2, with terminals 
  // create variable  T_<terminal>, replace the terminal with this variable
  // create a new rule T_<terminal> -> terminal

  if(verbose>=3) cout << "Chomsky 1, replace terminal with variable" << endl;

  for(p_iter=P.begin(); p_iter!=P.end(); p_iter++) // loop through productions
  {
    if(p_iter->w.size() >= 2)
    {
      for(a_iter=p_iter->w.begin(); a_iter!=p_iter->w.end(); a_iter++)
      {
        if(T.find(*a_iter)!=T.end())
        {
          v_temp = "T_" + *a_iter;
          p1 = production(v_temp);
          p1.w.push_back(*a_iter);
          P2.push_back(p1);
          *a_iter = v_temp;
        }
      }
    }
  }
  P.insert(P.end(), P2.begin(), P2.end()); // expanded list back into P
  
  p1 = production("");
  PT.clear();
  sort(P.begin(), P.end(), production::smaller );
  if(verbose>=2) cout << "Chomsky part 1, sorted productions:" << endl;
  for(p_iter=P.begin(); p_iter!=P.end(); p_iter++)
  {
    // test for duplicate and save non duplicates
    if(!(p1 == *p_iter))
    {
      p1 = *p_iter;
      PT.push_back(p1);
      if(verbose>=2) printp(p1);
    }
  }

  // for any rule with |w| >= 3
  // the rule of form  V -> V1 V2 V3 .. Vn for n>=3
  // create new variable and rule C_<Vn-1 Vn> -> Vn-1 Vn  repeat as needed

  if(verbose>=3) cout << "Chomsky Part 2 generated productions" << endl;
  P2.clear();
  for(p_iter=PT.begin(); p_iter!=PT.end(); p_iter++) { // loop through prods
    nvar = p_iter->w.size();
    if(nvar >= 3) {
      a_iter = p_iter->w.end();
      for(i=2; i<nvar; i++) {
        a_iter--;
        v_temp = "C_"+*(a_iter-1)+*a_iter;
        p1 = production(v_temp);
        p1.w.push_back(*(a_iter-1));
        p1.w.push_back(*a_iter);
        if(verbose>=3) printp(p1);
        P2.push_back(p1);
        *(a_iter-1) = v_temp;         
      }
      p1 = production(p_iter->V);
      p1.w.push_back(*(p_iter->w.begin()));
      p1.w.push_back(v_temp);
      P2.push_back(p1);
    }
    else 
      P2.push_back(*p_iter);
  }
  
  // sort, remove duplicate rules.

  P.clear();
  p1 = production("");
  sort(P2.begin(), P2.end(), production::smaller );
  if(verbose>=2) cout << "after Chomsky, sorted productions:" << endl;
  for(p_iter=P2.begin(); p_iter!=P2.end(); p_iter++)
  {
    // test for duplicate and save unique
    if(!(p1 == *p_iter))
    {
      p1 = *p_iter;
      P.push_back(p1);
      if(verbose>=2) printp(p1);
      for(a_iter=p1.w.begin(); a_iter!=p1.w.end(); a_iter++)
      {             // put new variables in V that are not Terminal
        if(T.find(*a_iter)==T.end()) V.insert(*a_iter);
      }
    }
  }

  if(verbose>=2)
  {
    cout << "after Chomsky, Variables:" << endl;
    for(v_iter=V.begin(); v_iter!=V.end(); v_iter++) cout << *v_iter << "  ";
    cout << endl;
  }

  // more minimization possible, not usually worth while
  
  return;
}  // end cyk_chomsky
  
