/********************************************************************************************************
 * QRNA - Comparative analysis of biological sequences 
 *         with pair hidden Markov models, pair stochastic context-free
 *        grammars, and probabilistic evolutionary  models.
 *       
 * Version 2.0.0 (JUN 2003)
 *
 * Copyright (C) 2000-2003 Howard Hughes Medical Institute/Washington University School of Medicine
 * All Rights Reserved
 * 
 *     This source code is distributed under the terms of the
 *     GNU General Public License. See the files COPYING and LICENSE
 *     for details.
 ***********************************************************************************************************/

/* cfgmodel.c
 *
 * Allocation, free'ing of the SCFG. 
 *
 * ER, Fri Jun 18 15:45:19 CDT 1999 [STL]
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>

#include "funcs.h"
#include "globals.h"
#include "squid.h"
#include "structs.h"
#include "version.h"

#ifdef MEMDEBUG
#include "dbmalloc.h"
#endif

static int NCFG(int ndp);
static int NIdx(int ndp);
static int NNodes(int ndp);

/* Function: AllocSCFG()
 *
 * ER, Tue Jun 22 13:36:25 CDT 1999 [STL]
 * 
 * Purpose:  allocates SCFG
 *
 * Args:     * 
 *
 * Return:   cfg                
 */
double **
AllocSCFG(void)
{
  double **cfg;
  int     i, j;

  /* This way of alloc'ing a 2D array keeps the CFG all in one
   * contiguous chunk of RAM and might keep us in cache.
   */
  cfg    = (double **) MallocOrDie(sizeof(double *) * NDPS);
  cfg[0] = (double *)  MallocOrDie(sizeof(double)   * NCFG(NDPS));

  for (i = 1; i < NDPS; i++)
    cfg[i] = cfg[0] + NCFG(i);

  /* Initialize all transitions to zero
   */
  for (i = 0; i < NDPS; i++)
    for (j = 0; j < Ntrans[i]; j++)
      cfg[i][j] = 0.;

  return cfg;
}

/* Function: AllocSCFGNode()
 *
 * ER, Tue Jun 22 13:36:25 CDT 1999 [STL]
 * 
 * Purpose:  allocates SCFG_nodes[NDPS][NNODS]
 *
 * Args:      
 *
 * Return:   cfg_nodes                
 */
double ***
AllocSCFGNode(void)
{
  double ***cfg_node;
  int       i, idx, j;

  /* This way of alloc'ing a 2D array keeps the CFG all in one
   * contiguous chunk of RAM and might keep us in cache.
   */
  cfg_node       = (double ***) MallocOrDie(sizeof(double **) * NDPS);
  cfg_node[0]    = (double **)  MallocOrDie(sizeof(double *)  * NIdx(NDPS));
  cfg_node[0][0] = (double *)   MallocOrDie(sizeof(double)    * NNodes(NDPS));

  for (i = 0; i < NDPS; i++) {
    cfg_node[i] = cfg_node[0] + NIdx(i);

    for (idx = 0; idx < Idx[i]; idx++)
      cfg_node[i][idx] = cfg_node[0][0] + NNodes(i) + idx*NodesPerDp[i];
  }
  
  /* Initialize all transitions to zero
   */
  for (i = 0; i < NDPS; i++)
    for (idx = 0; idx < Idx[i]; idx++)
      for (j = 0; j < NodesPerDp[i]; j++) 
	cfg_node[i][idx][j] = 0.;

  return cfg_node;
}

int **
AllocIntSCFG(void)
{
  int **cfg;
  int     i, j;

  /* This way of alloc'ing a 2D array keeps the CFG all in one
   * contiguous chunk of RAM and might keep us in cache.
   */
  cfg    = (int **) MallocOrDie(sizeof(int *) * NDPS);
  cfg[0] = (int *)  MallocOrDie(sizeof(int)   * NCFG(NDPS));

  for (i = 1; i < NDPS; i++)
    cfg[i] = cfg[0] + NCFG(i);

  /* Initialize all transitions to zero
   */
  for (i = 0; i < NDPS; i++)
    for (j = 0; j < Ntrans[i]; j++)
      cfg[i][j] = 0;

  return cfg;
}

/* Function: CheckSCFG()
 *
 * Date:     ER, Thu Jun 24 13:01:24 CDT 1999 [St. Louis]
 *
 * Purpose:  Verify that transition and emission prob's of a SCFG add up to one
 *
 * Args:     cfg - the SCFG matrix
 *
 * Returns:  void. 
 */
void
CheckSCFG(double **cfg)
{
  int    i, idx, tr;
  double sumcheck;

  /* do sumcheck
   */
  for (i = 0; i < NDPS; i++)
    for (idx = 0; idx < Idx[i]; idx++) 
    {
      sumcheck = 0.;
      for (tr = idx*TransPerDp[i]; tr < (idx+1)*TransPerDp[i]; tr++) 
	if (Connects(i,Ntype(i,tr))) 
	  sumcheck += EXP2(cfg[i][tr]);
	
	if (sumcheck > 1.02 || sumcheck < 0.98) 
	  Die("wrong reassembling of SCFG probabilities for dp %s(%d) (sumcheck = %f).",
	      stNAME[i], idx, sumcheck);
    }
}

/* Function: CheckSCFGEmit()
 *
 * Date:     ER, Thu Jun 24 13:08:46 CDT 1999 [St. Louis]
 *
 * Purpose:  Verify that emission prob's of a SCFG add up to one
 *
 * Args:     cfg_emit - 
 *
 * Returns:  void. 
 */
void
CheckSCFGEmit(double **cfg_emit)
{
  int    i, idx, j, node, tr;
  double sumcheck;

  /* do sumcheck
   */
  for (i = 0; i < NDPS; i++)
    for (idx = 0; idx < Idx[i]; idx++) 
      for (j = 0; j < NodesPerDp[i]; j++) {
	node = j + DpNodeidx[i];
	if (Connects(i,node) && i != IS1 && i != IS2B && i != IS2I) {
	  sumcheck = 0.;
	  
	  for (tr = idx*TransPerDp[i]; tr < (idx+1)*TransPerDp[i]; tr++)
	    if (Ntype(i,tr) == node) 
	      sumcheck += EXP2(cfg_emit[i][tr]);
	  
	  if (sumcheck > 1.01 || sumcheck < 0.99) 
	    Die("SCFG emission probs for dp %s (%d), node %s  (sumcheck = %f).",
		stNAME[i], idx, dpcNAME(node), sumcheck);
	}
      }
}

/* Function: CheckSCFGNode()
 *
 * Date:     ER, Thu Jun 24 13:01:24 CDT 1999 [St. Louis]
 *
 * Purpose:  Verify that node prob's of a SCFG add up to one
 *
 * Args:     cfg_node 
 *
 * Returns:  void. 
 */
void
CheckSCFGNode(double ***cfg_node)
{
  int    i, idx, j, node;
  double sumcheck;

  /* do sumcheck
   */
  for (i = 0; i < NDPS; i++) 
    for (idx = 0; idx < Idx[i]; idx++) {
      sumcheck = 0.;
      for (j = 0; j < NodesPerDp[i]; j++) {
	node = j + DpNodeidx[i];
	if (Connects(i,node)) 
	  sumcheck += EXP2(cfg_node[i][idx][j]);
      }
      if (sumcheck > 1.01 || sumcheck < 0.99) 
	Die("SCFGNode probs for dp %s(%d) do not add up to one (sumcheck = %f).",
	    stNAME[i], idx, sumcheck);
    }
}

/* Function: Connects()
 *
 * Date:     ER, Mon Jun 21 11:35:49 CDT 1999
 * 
 * Purpose:  defines which node types connect to each other.
 *           The order must be the same as in cfg.h, as this is indexed
 *           by dpcP, dpcL, etc. #define's.
 *
 * Args:     state  - V, W, ...
 *           node   - dpcL, ...
 *                  
 * Return:   1 if state is connected to node
 *           0 otherwise.                 
 */
int 
Connects(int i, int node)
{
  int connect;    /* 1 if state is connected to node, 0 otherwise */

  if (i == V)
    if (node < dpcEV) connect = 1;
    else connect = 0;
  
  else if (i == W || i == WB)
    if (node > dpcEV && node < dpcEW) connect = 1;
    else connect = 0;
  
  else if (i == IS1 || i == IS2B || i == IS2I)
    if (node == dpcLEN(node-DpNodeidx[IS1])) connect = 1;
    else connect = 0;

  else
    Die ("%s == state from outerspace", stNAME[i]);
  
  return connect;
}

/* ASCII node names                           */
/* Function: dpcNAME()
 *
 * Date:     ER, Wed Jul  7 11:49:49 CDT 1999 [STL]
 * 
 * Purpose:  ASCII node names 
 *
 * Args:     node   - dpcL, ...
 *                  
 * Return:   ASCII node name         
 */
char *
dpcNAME(int node)
{
  char *name;
  
  switch (node) {
    /* V NODES
     */
  case dpcS1:  name = "IS1";       break;
  case dpcS2S: name = "IS2S";      break;
  case dpcS2B: name = "IS2B";      break;
  case dpcS2I: name = "IS2I";      break;
  case dpcMV:  name = "multiloop"; break;
  case dpcEV:  name = "empty V";   break;
    
    /* W, WB NODES
     */
  case dpcL:   name = "L";           break;
  case dpcR:   name = "R";           break;     
  case dpcP:   name = "Pair";        break;       
  case dpcBW:  name = "Bifurcation"; break;
  case dpcEW:  name = "empty W";     break;
    
    /* IS1, IS2 NODES
     */
  case dpcEIS: name = "empty IS"; break; 
  default: 
    if (node == dpcLEN(node-DpNodeidx[IS1])) name = "LEN";
    else Die("dpcName()");
  }
  
  return name;
}


double **
DupSCFG(double **cfg)
{
  double **new;
  int i,j;

  new = AllocSCFG();
  for (i = 0; i < NDPS; i++)
    for (j = 0; j < Ntrans[i]; j++)
      new[i][j] = cfg[i][j];
  return new;
}


void
FreeSCFG(double **cfg)
{
  free(cfg[0]);
  free(cfg);
}

void
FreeSCFGNode(double ***cfg_node)
{
  free(cfg_node[0][0]);
  free(cfg_node[0]);
  free(cfg_node);
}


/* Function: Log2ProbsSCFG()
 * 
 * Purpose:  Take a SCFG in counts form, convert to frequencies in log2 form
 *           
 * Args:     cfg  - the grammar, floating point 
 *                  
 * Return:   integer log form of the grammar
 *           Alloc'ed here; caller must free.                 
 */
void
Log2ProbSCFG(double **cfg)
{
  int    i, idx, j;
  double norm;

  for (i = 0; i < NDPS; i++)
    for (idx = 0; idx < Idx[i]; idx++) 
    {
      norm = 0.;
      for (j = idx*TransPerDp[i]; j < (idx+1)*TransPerDp[i]; j++) 
	if (Connects(i,Ntype(i,j))) 
	  norm += cfg[i][j] + 1.;
      
      for (j = idx*TransPerDp[i]; j < (idx+1)*TransPerDp[i]; j++) 
	if (Connects(i,Ntype(i,j))) 
	  cfg[i][j] = LOG2((cfg[i][j] + 1.) / norm);
    }
}

/* Function: ModSCFGEmit()
 * 
 * Date:     ER, Thu Jun 24 12:49:08 CDT 1999 [STL]
 * 
 * Purpose:  modifies the emission probs of a given SCFG. (Node probs are untouched)
 *           
 * Args:     cfg_emit  -- SCFG emission probs in log2 form.
 *           pmodel    -- the new probs used to calculate the SCFG emission probs
 *           
 * Return:   (void)
 *           cfg_emit are re-calculated.
 */          
void
MdfySCFGEmit(double ***ret_cfg_emit, double *pmodel)
{
  int      i, idx, tr;
  double **cfg_emit;             /* holds transitions */
  int      node;
  int      nemit;

  cfg_emit = AllocSCFG();

  for (i = 0; i < NDPS; i++)
    for (idx = 0; idx < Idx[i]; idx++) 
      for (tr = idx*TransPerDp[i]; tr < (idx+1)*TransPerDp[i]; tr++) {
	node = Ntype(i,tr);

	if (Connects(i,node)) { 
	  nemit = (Nemit(node) < 0)? 0: (Nemit(node) > 4 )? 0: Nemit(node);
	  
	  switch (nemit) {
	  case 0: 
	    cfg_emit[i][tr] = 0.;
	    break;
	  case 1: 
	    cfg_emit[i][tr] = pmodel[Symi(i,tr)]; 
	    break;
	  case 2: 
	    cfg_emit[i][tr] = pmodel[Symi(i,tr)] + pmodel[Symj(i,tr)]; 
	    break;
	  case 3: 
	    cfg_emit[i][tr] = pmodel[Symi(i,tr)] + pmodel[Symj(i,tr)] + 
	                       pmodel[Symk(i,tr)]; 
	    break;
	  case 4: 
	    cfg_emit[i][tr] = pmodel[Symi(i,tr)] + pmodel[Symj(i,tr)] + 
	                       pmodel[Symk(i,tr)] + pmodel[Syml(i,tr)]; 
	    break;
	  default: Die("wrong number of emissions(%d) for %s-->%s(tr=%d)", 
		       nemit, stNAME[i], dpcNAME(node), tr);
	  }
	}
      }
  *ret_cfg_emit = cfg_emit;
}

/* Function: ModSCFGNode()
 * 
 * Date:     ER, Thu Jun 24 12:49:08 CDT 1999 [STL]
 * 
 * Purpose:  modifies the node probs of a given SCFG. (Emission probs are untouched)
 *           
 * Args:     cfg_node  -- SCFG node probs in log2 form.
 *           
 * Return:   (void)
 *           cfg_node are re-calculated using a equally likely model.
 */          
void
MdfySCFGNode(double ****ret_cfg_node)
{
  int       i, idx, j, node;
  double ***cfg_node;              /* holds transitions */

  cfg_node = AllocSCFGNode();

  for (i = 0; i < NDPS; i++)
    for (idx = 0; idx < Idx[i]; idx++)
      for (j = 0; j < NodesPerDp[i]; j++) { 
	node = j + DpNodeidx[i];
	if (Connects(i,node))
	cfg_node[i][idx][j] = -LOG2(NodesPerDp[i]);
      }

  /* sum check
   */
  CheckSCFGNode(cfg_node);

  *ret_cfg_node = cfg_node;
}

/* Function: Nemit()
 *
 * Date:     ER, Mon Jun 21 11:37:28 CDT 1999
 * 
 * Purpose:  Calculate number of emission for each node 
 *           
 * Args:     tr    -- e.g. idxP(symi,symj), etc. Index of a transition type.
 *           state -- e.g. V, etc. State we are at.
 *           
 * Return:   0..3 value of first argument
 *           -1 If transition indexing does not require this argument.
 */
int
Nemit(int node)
{          
  int nemit;  /* argument we are calculating */
  
  nemit = 0; /* default, nothing emits */

  /* now, go through those nodes that emit, (-10 if an empty state) 
   */
    switch(node) {
      /* V NODES
       */
    case dpcS1:  nemit = 2; break;
    case dpcS2S: nemit = 2; break;
    case dpcS2B: nemit = 2; break;
    case dpcS2I: nemit = 2; break;
    case dpcEV:  nemit = -10; break;

      /* W, WB NODES
       */
    case dpcL:   nemit = 1; break;
    case dpcR:   nemit = 1; break;     
    case dpcP:   nemit = 2; break;       
    case dpcEW:  nemit = -10; break;

      /* IS1, IS2 NODES
       */
    case dpcEIS: nemit = -10; break; 
    default: 
      if (node == dpcLEN(node-DpNodeidx[IS1])) nemit = node - DpNodeidx[IS1];
      else Die("Nemit()");
    }
    
    return nemit;
}

/* Function: Ntype()
 * 
 * Purpose:  Maps (state,transition) to node type. 
 *           
 * Args:     tr    -- e.g. idxP(symi,symj), etc. Index of a transition type.
 *           state -- e.g. V, etc. State we are at.
 *           
 * Return:   0..NNODE-1 node index.
 *           If something goes wrong, returns -1.
 */
int
Ntype(int i, int tr)
{          
  int nd;

  if (i == V) {
    if (tr / NTRANSVst == 16)
      nd = dpcEV;
    else if (tr / NTRANSVst < 16 && tr % NTRANSVst < 16)
      nd = dpcS1;
    else if (tr % NTRANSVst < 32)
      nd = dpcS2S;
    else if (tr % NTRANSVst < 48)
      nd = dpcS2B;
    else if (tr % NTRANSVst < 64)
      nd = dpcS2I;
    else if (tr % NTRANSVst < 65)
      nd = dpcMV;
    else
      Die("can't find node type for dp = %s, transition = %d\n", stNAME[i], tr);
  }

  else if (i == W || i == WB) 
    {
      if (tr < 4)
	nd = dpcL;
      else if (tr < 8)
	nd = dpcR;
      else if (tr < 24)
	nd = dpcP;
      else if (tr < 25)
	nd = dpcBW;
      else if (tr < 26)
	nd = dpcEW;
      else
	Die("can't find node type for state = %s, transition = %d\n", stNAME[i], tr);
    }
  else if (i == IS1 || i == IS2B || i == IS2I) 
    {
      if (tr < MAXRNALOOP)
	nd = dpcLEN(tr);
      else if (tr < MAXRNALOOP+1)
	nd = dpcEIS;
      else
	Die("can't find node type for state = %s, transition = %d\n", stNAME[i], tr);
    }
  else if (i == STEM) 
    {
      if (tr < MAXRNASTEM)
	nd = dpcSTEMLEN(tr);
      else if (tr < MAXRNASTEM+1)
	nd = dpcESTEM;
      else
	Die("can't find node type for state = %s, transition = %d\n", stNAME[i], tr);
    }
  else
    Die("can't find node type for state = %s, transition = %d\n", stNAME[i], tr);
  
  return nd;
}

/* Function: PrintPerDp()
 * Date:     ER, Thu Jun 24 17:25:09 CDT 1999 [St. Louis]
 *
 * Purpose:  print SCFG per state (given in log2 form)
 *
 * Args:     
 *
 * Returns:  void, prints stuff.
 */
void
PrintPerDp(int i, int idx, double **cfg, double **cfg_emit, double ***cfg_node)
{
  int tr;
  int j, node;
  
  printf("** probabilities for dp %s (%d)\n", stNAME[i], idx);
  
  for (j = 0; j < NodesPerDp[i]; j++) {
    node = j + DpNodeidx[i];
    if (Connects(i,node)) {
      printf("node %s\t %f\n", dpcNAME(node), cfg_node[i][idx][j]);
      for (tr = idx*TransPerDp[i]; tr < (idx+1)*TransPerDp[i]; tr++) 
	if (node == Ntype(i,tr)) 
	  printf("\t%d\t %f\t %f\n", tr, cfg_emit[i][tr], cfg[i][tr]);
      printf("\n");
    }
  }
}

/* Function: ReassembleSCFG()
 * 
 * Date:     ER, Thu Jun 24 12:25:38 CDT 1999 [STL]
 * 
 * Purpose:  Reassembles a SCFG grammar from transition and emission probabilities.  
 * 
 * Args:   cfg_node - SCFG node transitions in log2 form.
 *         cfg_emit - SCFG emissions in log2 form.
 *         ret_cfg  - SCFG in log2 form.
 *                 
 * Return:   (void) cfg_emit is allocated and filled.
 */
void
ReassembleSCFG(double ***cfg_node, double **cfg_emit, double ***ret_cfg)
{
  int      i, idx, tr;
  double **cfg;
 
  cfg = AllocSCFG();

  for (i = 0; i < NDPS; i++)
    for (idx = 0; idx < Idx[i]; idx++)
      for (tr = idx*TransPerDp[i]; tr < (idx+1)*TransPerDp[i]; tr++) 
	if (Connects(i,Ntype(i,tr))) 
	  cfg[i][tr] = cfg_node[i][idx][Ntype(i,tr)-DpNodeidx[i]] + cfg_emit[i][tr];
  
  CheckSCFG(cfg);

  *ret_cfg = cfg;
}

/* Function: Size()
 * 
 * Date:     ER, Tue Jun 22 10:02:10 CDT 1999
 *
 * Purpose:  Calculate size of a given transition indexing. 
 *           
 * Args:     tr    -- e.g. idxP(symi,symj), etc. Index of a transition type.
 *           state -- e.g. V, etc. State we are at.
 *           
 * Return:   0..3 value of first argument
 *           -1 If transition indexing does not require this argument.
 */
double
Size(int i, int tr)
{          
  int node;   /* node we are at */
  float size;   /* argument we are calculating */
  
  node = Ntype(i,tr);
  
  if (!Connects(i,node)) 
    Die (" %s ---> %s transition is iffy. check node assingment", stNAME[i], dpcNAME(node));

  if (node == dpcLEN(tr)) size = (double)tr;
  else size = -1;
  
  return size;
}

/* Function: Symi()
 * 
 * Date:     ER, Tue Jun 22 10:02:10 CDT 1999
 * 
 * Purpose:  Calculate first argument of a given transition indexing. 
 *           
 * Args:     tr    -- e.g. idxP(symi,symj), etc. Index of a transition type.
 *           dp -- e.g. V, etc. Dp we are at.
 *           
 * Return:   0..3 value of first argument
 *           -1 If transition indexing does not require this argument.
 */
int
Symi(int i, int tr)
{          
  int node;   /* node we are at */
  int symi;   /* argument we are calculating */
  
  node = Ntype(i,tr);

  if (!Connects(i,node)) 
    Die (" %s ---> %s transition is iffy. check node assignment", stNAME[i], dpcNAME(node));
  
  symi = -1;

  switch(node) {
    /* V NODES
     */
  case dpcS1:  symi = ( tr%NTRANSVst    )/4; break;
  case dpcS2S: symi = ((tr%NTRANSVst)-16)/4; break;
  case dpcS2B: symi = ((tr%NTRANSVst)-32)/4; break;
  case dpcS2I: symi = ((tr%NTRANSVst)-48)/4; break;
    
    /* W, WB NODES
     */
  case dpcL:  symi = tr;       break;
  case dpcR:  symi = tr-4;     break;     
  case dpcP:  symi = (tr-8)/4; break;       
  }
  
  if (symi > 3) 
    Die("Problems to calculate symi in transition [%s ---> %s] \n", stNAME[i], dpcNAME(node));
  
  return symi;
}

/* Function: Symj()
 * 
 * Date:     ER, Tue Jun 22 10:06:30 CDT 1999
 * 
 * Purpose:  Calculate second argument of a given transition indexing. 
 *           
 * Args:     tr    -- e.g. idxP(symi,symj), etc. Index of a transition type.
 *           i -- e.g. V, etc. Dp we are at.
 *           
 * Return:   0..3 value of first argument
 *           -1 If transition indexing does not require this argument.
 */
int
Symj(int i, int tr)
{          
  int node;   /* node we are at */
  int symj;   /* argument we are calculating */
  
  node = Ntype(i,tr);
  
  if (!Connects(i,node)) 
    Die (" %s ---> %s transition is iffy. check node assingment", stNAME[i], dpcNAME(node));

  symj = -1;
  
  switch(node) {
    /* V NODES
     */
  case dpcS1:  symj = ( tr%NTRANSVst    )%4; break;
  case dpcS2S: symj = ((tr%NTRANSVst)-16)%4; break;
  case dpcS2B: symj = ((tr%NTRANSVst)-32)%4; break;
  case dpcS2I: symj = ((tr%NTRANSVst)-48)%4; break;
    
    /* W, WB NODES
     */
  case dpcP:  symj = (tr-8)%4; break;       
  }
  
  if (symj > 3) 
    Die("Problems to calculate symj in transition [%s ---> %s] ", stNAME[i], dpcNAME(node));

  return symj;
}

/* Function: Symk()
 * 
 * Date:     ER, Tue Jun 22 10:17:04 CDT 1999
 * 
 * Purpose:  Calculate third argument of a given transition indexing. 
 *           
 * Args:     tr    -- e.g. idxP(symi,symj), etc. Index of a transition type.
 *           dp -- e.g. V, etc. Dp we are at.
 *           
 * Return:   0..3 value of first argument
 *           -1 If transition indexing does not require this argument.
 */
int
Symk(int i, int tr)
{          
  int node;   /* node we are at */
  int symk;   /* argument we are calculating */
  
  node = Ntype(i,tr);
  
  if (!Connects(i,node)) 
    Die (" %s ---> %s transition is iffy. check node assingment", stNAME[i], dpcNAME(node));
  
  symk = -1;

  switch(node) {
    /* V NODES
     */
  case dpcS1:  symk = (tr/NTRANSVst)/4; break;
  case dpcS2S: symk = (tr/NTRANSVst)/4; break;
  case dpcS2B: symk = (tr/NTRANSVst)/4; break;
  case dpcS2I: symk = (tr/NTRANSVst)/4; break;
  case dpcMV:  symk = (tr/NTRANSVst)/4; break;   
  }
  
  if (symk > 3) 
    Die("Problems to calculate symk in transition [%s ---> %s] ", stNAME[i], dpcNAME(node));

  return symk;
}

/* Function: Syml()
 * 
 * Date:     ER, Tue Jun 22 10:17:39 CDT 1999
 * 
 * Purpose:  Calculate third argument of a given transition indexing. 
 *           
 * Args:     tr    -- e.g. idxP(symi,symj), etc. Index of a transition type.
 *           i -- e.g. V, etc. Dp we are at.
 *           
 * Return:   0..3 value of first argument
 *           -1 If transition indexing does not require this argument.
 */
int
Syml(int i, int tr)
{          
  int node;   /* node we are at */
  int syml;   /* argument we are calculating */
  
  node = Ntype(i,tr);
  
  if (!Connects(i,node)) 
    Die (" %s ---> %s transition is iffy. check node assingment", stNAME[i], dpcNAME(node));

  syml = -1;
  
  switch(node) {
    /* V NODES
     */
  case dpcS1:  syml = (tr/NTRANSVst)%4; break;
  case dpcS2S: syml = (tr/NTRANSVst)%4; break;
  case dpcS2B: syml = (tr/NTRANSVst)%4; break;
  case dpcS2I: syml = (tr/NTRANSVst)%4; break;
  case dpcMV:  syml = (tr/NTRANSVst)%4; break;   
  }
  
  if (syml > 3) 
    Die("Problems to calculate syml in transition [%s ---> %s] ", stNAME[i], dpcNAME(node));

  return syml;
}

/* Function: TieProbs()
 * 
 * Date:     ER, Thu Jun 24 11:56:09 CDT 1999 [STL]
 * 
 * Purpose:  Provide for tying probabilities together.
 *           Given a cfg file, separates transition from emission probabilities.
 *           whole SCFG is then reestimated into probability form.
 *           
 * Args:     cfg       -- SCFG in log2 form.
 *           cfg_node  -- SCFG transitions in log2 form.
 *           enslave   -- array specifying masters and slaves. [0..NDPS-1]
 *           
 * Return:   (void)
 *           cfg_node are calculated.
 */          
void
TieProbs(double **cfg, double ****ret_cfg_node, double ***ret_cfg_emit)
{
  int       i,j;
  int       idx,node;                
  double  **cfg_new;             /* holds reassamble cfg (for debuggin purposes)          */
  double ***cfg_node;            /* holds transitions                                     */
  double  **cfg_emit;            /* holds emissions                                       */

  cfg_node = AllocSCFGNode();
  cfg_emit = AllocSCFG();
  cfg_new  = AllocSCFG();

				/* slaves send their counts to master.
				 * keep track of number of transitions being summed */
  for (i = 0; i < NDPS; i++) 
    for (idx = 0; idx < Idx[i]; idx++) 
      for (j = idx*TransPerDp[i]; j < (idx+1)*TransPerDp[i]; j++) 
	if (Connects(i,Ntype(i,j))) 
	  cfg_node[i][idx][Ntype(i,j)-DpNodeidx[i]] += EXP2(cfg[i][j]); 
  
  for (i = 0; i < NDPS; i++)
    for (idx = 0; idx < Idx[i]; idx++)
      for (j = 0; j < NodesPerDp[i]; j++) {
	node = j + DpNodeidx[i];
	if (Connects(i,node))
	  cfg_node[i][idx][j] = LOG2(cfg_node[i][idx][j]);  /* convert to log2 form  */
      }
  CheckSCFGNode(cfg_node);
  
  for (i = 0; i < NDPS; i++)
    for (idx = 0; idx < Idx[i]; idx++)
      for (j = idx*TransPerDp[i]; j < (idx+1)*TransPerDp[i]; j++)
	if (Connects(i,Ntype(i,j)))
	  cfg_emit[i][j] = cfg[i][j] - cfg_node[i][idx][Ntype(i,j)-DpNodeidx[i]]; 
  CheckSCFGEmit(cfg_emit);
  
  /* paranoid check. Do emissions and transitions reassamble? 
   */
  ReassembleSCFG(cfg_node, cfg_emit, &cfg_new);
  FreeSCFG(cfg_new);
  
  if (0) {
    PrintPerDp(W, 0, cfg, cfg_emit, cfg_node);
    PrintPerDp(WB, 0, cfg, cfg_emit, cfg_node);
  }
  
  *ret_cfg_node = cfg_node;
  *ret_cfg_emit = cfg_emit;
}

/* Function: Ttype()
 * 
 * Date:     ER, Wed Jun 23 13:29:05 CDT 1999 [STL]
 * 
 * Purpose:  Maps node/symi/symj to transiton type. Like the Ntype macro,
 *           but too complicated to be a macro.
 *           
 * Args:     node -- e.g. dpcP, etc. Index of a node type.
 *           symi -- 0..3 integer for ACGU. 
 *           symj -- 0..3 integer for ACGU.
 *           symk -- 0..3 integer for ACGU. 
 *           syml -- 0..3 integer for ACGU.
 *           size -- if !=0 it is a bulge, hairpin or intloop
 *           
 * Return:   0..NTRANS-1 state index.
 *           If something goes wrong, returns -1.
 */
int
Ttype(int node, int symi, int symj, int symk, int syml, int size)
{          
  int tr;

  switch (node) {
    /* V nodes */
  case dpcS1:  tr = (symi<4 && symj<4 && symk<4 && syml<4) ? 
		 idxS1(symk,syml,symi,symj)                    : -1; break;
  case dpcS2S: tr = (symi<4 && symj<4 && symk<4 && syml<4) ? 
		 idxS2S(symk,syml,symi,symj)                   : -1; break;
  case dpcS2B: tr = (symi<4 && symj<4 && symk<4 && syml<4) ? 
		 idxS2B(symk,syml,symi,symj)                   : -1; break;
  case dpcS2I: tr = (symi<4 && symj<4 && symk<4 && syml<4) ? 
		 idxS2I(symk,syml,symi,symj)                   : -1; break;
  case dpcMV:  tr = (symi<4 && symj<4) ? idxMV(symi,symj)      : -1; break;
  case dpcEV:  tr = idxEV;                                           break;

    /* W/WB nodes */
  case dpcL:   tr = (symi<4) ? idxL(symi)                      : -1; break;
  case dpcR:   tr = (symj<4) ? idxR(symj)                      : -1; break;
  case dpcP:   tr = (symi<4 && symj<4) ? idxP(symi,symj)       : -1; break;
  case dpcBW:  tr = idxBW;                                           break;
  case dpcEW:  tr = idxEW;                                           break;

    /* IS's  nodes */
  case dpcEIS: tr = idxEIS;                                          break;

  default: 
    if (node == dpcLEN(size)) tr = (size < MAXRNALOOP) ? idxLEN(size) : -1;
    else Die("can't find state type for node = %s, (%d,%d:%d,%d) %d %d \n",
	     dpcNAME(node), symi, symj, symk, syml, size);
  }

  return tr;
}

static int
NCFG(int ndp)
{
  int dim = 0;
  int j;
  
  for (j = 0; j < ndp; j++)
    dim += Ntrans[j];

  return dim;
}

static int
NIdx(int ndp)
{
  int nidx = 0;
  int j;
  
  for (j = 0; j < ndp; j++)
    nidx += Idx[j];

  return nidx;
}

static int
NNodes(int ndp)
{
  int nnode = 0;
  int i, idx;
  
   for (i = 0; i < ndp; i++)
     for (idx = 0; idx < Idx[i]; idx++)
       nnode += NodesPerDp[i];
   
   return nnode;
}

