/* Code written by Lorenzo Torresani and Shoumen Saha */

#include <stdio.h>
#include <math.h>
#include <string.h>
#include "mat.h"
#include "mex.h"


#define IN 
#define OUT
#define MYDEBUG 0


static mxArray * McomputeH(int nargout_,
                           const mxArray * Uc,
                           const mxArray * Vc,
                           const mxArray * E_z,
                           const mxArray * E_zz,
                           const mxArray * RR);


void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
{
    if (nlhs > 1) {
        mexErrMsgTxt(
            "Run-time Error: File: computeh Line: 1 Column:"
            " 1 The function \"computeh\" was called with"
            " more than the declared number of outputs (1).");
    }
    if (nrhs > 5) {
        mexErrMsgTxt(
            "Run-time Error: File: computeh Line: 1 Column:"
            " 1 The function \"computeh\" was called with"
            " more than the declared number of inputs (5).");
    }
    plhs[0] = McomputeH(nlhs, prhs[0], prhs[1], prhs[2], prhs[3], prhs[4]);
}



double ** getMxArray(IN const mxArray *mxArr, 
                     OUT int *rows, 
                     OUT int *cols) 
{
  int i, j, numDim;
  double *arrDataPt, **matrix;

  if((numDim = mxGetNumberOfDimensions(mxArr)) != 2)
  {
    printf("Input matrix has %d dimensions, not 2!\n", numDim);
    return NULL;
  }

  *rows = mxGetM(mxArr);
  *cols = mxGetN(mxArr);

  if(((matrix = (double **) malloc(sizeof(double *)*(*rows))) == NULL) ||
     ((matrix[0] = (double *) malloc(sizeof(double)*(*rows)*(*cols))) == NULL))
    return NULL;
  
  for(i=1; i<(*rows); i++)
    matrix[i] = matrix[0] + i*(*cols);

  arrDataPt = mxGetPr(mxArr);
  for(j=0; j<(*cols); j++)
    for(i=0; i<(*rows); i++)
      matrix[i][j] = *(arrDataPt ++);

  return matrix;
}


mxArray * putMxArray(double *img, int rows, int cols/*, char *nameImg*/)
{
	int i, j;
	double *ptr;
	mxArray *mxArr;

	mxArr  = mxCreateDoubleMatrix(rows, cols, mxREAL);

  ptr = mxGetPr(mxArr);

	for(j=0; j<cols; j++)
		for(i=0; i<rows; i++)
			*(ptr ++) = img[i*cols + j];
	
  /*mxSetPr(mxArr, data);*/
	/*mxSetName(mxArr, nameImg);*/

	return mxArr;
}



double ** createMatrix(IN int rows, 
                       IN int cols) 
{
  int i;
  double **matrix;

  if(((matrix = (double **) malloc(sizeof(double *)*rows)) == NULL) ||
     ((matrix[0] = (double *) malloc(sizeof(double)*rows*cols)) == NULL))
    return NULL;
  
  for(i=1; i<rows; i++)
    matrix[i] = matrix[0] + i*cols;


  memset(matrix[0], 0, sizeof(double)*rows*cols);

  return matrix;
}


int ** createIntMatrix(IN int rows, 
                       IN int cols) 
{
  int i;
  int **matrix;

  if(((matrix = (int **) malloc(sizeof(int *)*rows)) == NULL) ||
     ((matrix[0] = (int *) malloc(sizeof(int)*rows*cols)) == NULL))
    return NULL;
  
  for(i=1; i<rows; i++)
    matrix[i] = matrix[0] + i*cols;


  memset(matrix[0], 0, sizeof(int)*rows*cols);

  return matrix;
}


void freeMatrix(IN  double **matrix)
{
  free(*matrix);
  free(matrix);
}

void freeIntMatrix(IN  int **matrix)
{
  free(*matrix);
  free(matrix);
}


void IO_MLabCreateFile(char *fileName)
{
	MATFile *fp;

	fp = matOpen(fileName, "w");

	matClose(fp);
}


void IO_MLabWriteDoubleImg(char *fileName, char *nameImg, double *img, int rows, int cols)
{
	MATFile *fp;
	int i, j, dims[2];
	double *ptr;
	mxArray *mxArr;

  dims[0] = rows;
  dims[1] = cols;
	fp = matOpen(fileName, "u");

	mxArr  = mxCreateNumericArray(2, dims, mxDOUBLE_CLASS, mxREAL);

	ptr = mxGetPr(mxArr);
	for(j=0; j<cols; j++)
		for(i=0; i<rows; i++)
			*(ptr ++) = img[i*cols + j];
	
	/*mxSetName(mxArr, nameImg);
	matPutArray(fp, mxArr);*/
  matPutVariable(fp, nameImg, mxArr);

	matClose(fp);

	mxDestroyArray(mxArr);
}


void kron(IN int rowsA,
          IN int colsA,
          IN double **A,
          IN int rowsB,
          IN int colsB,
          IN double **B,
          IN int rowsC,
          IN int colsC,
          OUT double **C)
{
  int i, j, k, l;

  if ((rowsC != (rowsA*rowsB)) || (colsC != (colsA*colsB)))
  {
    printf("kron: incompatible matrix size\n");
    return;
  }

  for(i=0; i<rowsA; i++)  
    for(k=0; k<rowsB; k++)
      for(j=0; j<colsA; j++)
        for(l=0; l<colsB; l++)
          C[rowsB*i+k][colsB*j+l] = A[i][j]*B[k][l];

}



double conjgradient(IN  int n,
                    IN  double **A, 
                    IN  double **b, 
                    IN  int i_max,      /* max # iterations */
                    IN  double epsilon, /* error tolerance */
                    IN OUT double **x)
{
  //my conjugate gradient solver for .5*x'*A*x -b'*x, based on the
  // tutorial by Jonathan Shewchuk 
  //FILE * fd;
  int i=0, numrecompute, row, col;
  double Ax, delta_old, delta_new, delta_0, **r, **d, **q, alpha, alpha_denom, beta;

  r = createMatrix(1, n);
  d = createMatrix(1, n);
  q = createMatrix(1, n);

  /* r = b - A*x */
  /* delta_new = r'*r */
  delta_new = 0;
  for (row=0; row<n; row++)
  {
    Ax = 0;
    for (col=0; col<n; col++)
      Ax += A[row][col]*x[0][col];

    d[0][row] = r[0][row] = b[0][row] - Ax;

    delta_new += r[0][row]*r[0][row];
  }

  delta_0 = delta_new;

  numrecompute = (int) sqrt((double) n);
  //  printf("numrecompute = %d\n",numrecompute);


  //fd = fopen("err.txt", "w");
  while ((i < i_max) && (delta_new > epsilon*epsilon*delta_0))
  {
    //printf("Step %d, delta_new = %f      \r",i,delta_new);
    //fprintf(fd, "%d %f\n", i, (float)delta_new);
  
    /* q = A*d */
    alpha_denom = 0;
    for (row=0; row<n; row++)
    {
      q[0][row] = 0;
      for (col=0; col<n; col++)
        q[0][row] += A[row][col]*d[0][col];

      alpha_denom += d[0][row]*q[0][row];
    }

    alpha = delta_new / alpha_denom;

    /* x += d*alpha */
    for (row=0; row<n; row++)
      x[0][row] += alpha*d[0][row];

    if (i % numrecompute == 0)
	  {
      /* r = b - A*x */
      for (row=0; row<n; row++)
      {
        Ax = 0;
        for (col=0; col<n; col++)
          Ax += A[row][col]*x[0][col];

        r[0][row] = b[0][row] - Ax;
      }	  
	  }
    else
    {
      /* r = r-q*alpha; */
      for (row=0; row<n; row++)
        r[0][row] -= alpha*q[0][row];
    }

    delta_old = delta_new;

    /* delta_new = r'*r */
    delta_new = 0;
    for (row=0; row<n; row++)
      delta_new += r[0][row]*r[0][row];

    beta = delta_new/delta_old;

    /* d = r + beta*d */
    for (row=0; row<n; row++)
      d[0][row] = r[0][row] + beta*d[0][row];

    i++;
  }
  //fclose(fd);

  freeMatrix(r);
  freeMatrix(d);
  freeMatrix(q);

  return delta_new;
}



double conjgradient2(IN  int n,
                     IN  double **A, 
                     IN  double **b, 
                     IN  int i_max,      /* max # iterations */
                     IN  double epsilon, /* error tolerance */
                     IN  int k,
                     IN OUT double **x)
{
  //my conjugate gradient solver for .5*x'*A*x -b'*x, based on the
  // tutorial by Jonathan Shewchuk  (or is it +b'*x?)
  //FILE * fd;
  int i=0, numrecompute, row, sparsity_offset, j, ind, col_offset;
  double Ax, delta_old, delta_new, delta_0, **r, **d, **q, alpha, alpha_denom, beta;

  r = createMatrix(1, n);
  d = createMatrix(1, n);
  q = createMatrix(1, n);

  sparsity_offset = n / (k+1);

  /* r = b - A*x */
  /* delta_new = r'*r */
  delta_new = 0;
  for (row=0; row<n; row++)
  {
    Ax = 0;
    
    col_offset = ((row%sparsity_offset)/3) * 3;
    for (j=0; j<(k+1); j++)
    {
      ind = col_offset + j*sparsity_offset;
      Ax += A[row][ind]*x[0][ind] + A[row][ind+1]*x[0][ind+1] + A[row][ind+2]*x[0][ind+2];
    }

    d[0][row] = r[0][row] = b[0][row] - Ax;

    delta_new += r[0][row]*r[0][row];
  }

  delta_0 = delta_new;

  numrecompute = (int) sqrt((double) n);
  //  printf("numrecompute = %d\n",numrecompute);


  //fd = fopen("err.txt", "w");
  while ((i < i_max) && (delta_new > epsilon*epsilon*delta_0))
  {
    //printf("Step %d, delta_new = %f      \r",i,delta_new);
    //fprintf(fd, "%d %f\n", i, (float)delta_new);
  
    /* q = A*d */
    alpha_denom = 0;
    for (row=0; row<n; row++)
    {
      q[0][row] = 0;

      col_offset = ((row%sparsity_offset)/3) * 3;
      for (j=0; j<(k+1); j++)
      {
        ind = col_offset + j*sparsity_offset;
        q[0][row] += A[row][ind]*d[0][ind] + A[row][ind+1]*d[0][ind+1] + A[row][ind+2]*d[0][ind+2];
      }

      alpha_denom += d[0][row]*q[0][row];
    }

    alpha = delta_new / alpha_denom;

    /* x += d*alpha */
    for (row=0; row<n; row++)
      x[0][row] += alpha*d[0][row];

    if (i % numrecompute == 0)
	  {
      /* r = b - A*x */
      for (row=0; row<n; row++)
      {
        Ax = 0;

        col_offset = ((row%sparsity_offset)/3) * 3;
        for (j=0; j<(k+1); j++)
        {
          ind = col_offset + j*sparsity_offset;
          Ax += A[row][ind]*x[0][ind] + A[row][ind+1]*x[0][ind+1] + A[row][ind+2]*x[0][ind+2];
        }

        r[0][row] = b[0][row] - Ax;
      }	  
	  }
    else
    {
      /* r = r-q*alpha; */
      for (row=0; row<n; row++)
        r[0][row] -= alpha*q[0][row];
    }

    delta_old = delta_new;

    /* delta_new = r'*r */
    delta_new = 0;
    for (row=0; row<n; row++)
      delta_new += r[0][row]*r[0][row];

    beta = delta_new/delta_old;

    /* d = r + beta*d */
    for (row=0; row<n; row++)
      d[0][row] = r[0][row] + beta*d[0][row];

    i++;
  }
  //fclose(fd);

  freeMatrix(r);
  freeMatrix(d);
  freeMatrix(q);

  return delta_new;
}



static mxArray * McomputeH(int nargout_,
                           const mxArray * Uc,
                           const mxArray * Vc,
                           const mxArray * E_z,
                           const mxArray * E_zz,
                           const mxArray * RR) {
	double **Ucc, **Vcc, **E_zc, **E_zzc, **RRc, **KK1c, **KK2c, **kk3c, **P_tc, **R_tc, **zz_hat_tc, **Tmp1c, **Tmp2c, **vecH_hatc=NULL,
         delta_err, epsilon;
	int tc, Fc, Nc, junk1, junk2, kc, ic, jc, ii, jj, kk, ll, i_max, **sparsityMap;

  mxArray * vecH_hat = NULL;

  mxArray * KK1 = NULL;
  mxArray * R_t = NULL;
  mxArray * P_t = NULL;
  mxArray * t = NULL;
  mxArray * KK2 = NULL;
  mxArray * F = NULL;
  mxArray * N = NULL;
  mxArray * k = NULL;


#if MYDEBUG
#define DEBUGFILENAME ".\\debug.mat"
  IO_MLabCreateFile(DEBUGFILENAME);
#endif

	Ucc = getMxArray(Uc, &Fc, &Nc);
#if MYDEBUG
    IO_MLabWriteDoubleImg(DEBUGFILENAME, "Ucc", Ucc[0], Fc, Nc);
#endif

	Vcc = getMxArray(Vc, &junk1, &junk2);
#if MYDEBUG
    IO_MLabWriteDoubleImg(DEBUGFILENAME, "Vcc", Vcc[0], Fc, Nc);
#endif

  E_zc = getMxArray(E_z, &kc, &junk1);
#if MYDEBUG
    IO_MLabWriteDoubleImg(DEBUGFILENAME, "E_zc", E_zc[0], kc, Fc);
#endif

  E_zzc = getMxArray(E_zz, &junk1, &junk2);
#if MYDEBUG
    IO_MLabWriteDoubleImg(DEBUGFILENAME, "E_zzc", E_zzc[0], kc*Fc, kc);
#endif
    
    
  RRc = getMxArray(RR, &junk1, &junk2);
#if MYDEBUG
    IO_MLabWriteDoubleImg(DEBUGFILENAME, "RRc", RRc[0], 2*Fc, 3);
#endif

	/*
	 * KK2 = zeros(3*N*(k+1), 3*N*(k+1)); 
	 */
	KK2c = createMatrix(3*Nc*(kc+1), 3*Nc*(kc+1));
    
  /*
   * KK3 = zeros(3*N, k+1);
   */
	kk3c = createMatrix(1,3*Nc*(kc+1));

	P_tc = createMatrix(1, 2*Nc);
  zz_hat_tc = createMatrix(kc+1, kc+1);
  R_tc = createMatrix(2, 3);
  KK1c = createMatrix(2*Nc, 3*Nc);
  Tmp1c = createMatrix(3*Nc, 3*Nc);
  Tmp2c = createMatrix(1, 3*Nc);
	for(tc=0; tc<Fc; tc++) 
  {    

    /*
     * P_t = [Uc(t,:); Vc(t,:)];
     */
    memset(P_tc[0], 0, sizeof(double)*2*Nc);     
    for(jc=0; jc<Nc; jc++)
	  {
      P_tc[0][2*jc] = Ucc[tc][jc];
	    P_tc[0][2*jc+1] = Vcc[tc][jc];
	  }
#if MYDEBUG
    IO_MLabWriteDoubleImg(DEBUGFILENAME, "P_tc", P_tc[0], 1, 2*Nc);
#endif

    /*
     * zz_hat_t = [1 E_z(:,t)'; E_z(:,t) E_zz((t-1)*k+1:t*k,:)];
     */
    memset(zz_hat_tc[0], 0, sizeof(double)*(kc+1)*(kc+1));     
    zz_hat_tc[0][0] = 1;
    for(jc=1; jc<kc+1; jc++)
    {
      zz_hat_tc[jc][0] = E_zc[jc-1][tc];
      zz_hat_tc[0][jc] = E_zc[jc-1][tc];
    }
    for(ic=1; ic<kc+1; ic++)
    {
      for(jc=1; jc<kc+1; jc++)
	    {
        zz_hat_tc[ic][jc] = E_zzc[tc*kc + (ic-1)][jc-1];
      }
	  }
#if MYDEBUG
    IO_MLabWriteDoubleImg(DEBUGFILENAME, "zz_hat_tc", zz_hat_tc[0], kc+1, kc+1);
#endif

    /*
     * R_t = [RR(t, :); RR(t+F, :)];
     */
    for(jc=0; jc<3; jc++)
	  {
      R_tc[0][jc] = RRc[tc][jc];
      R_tc[1][jc] = RRc[tc+Fc][jc];
    }

    /*
     * KK1 = kron(speye(N), R_t);
     */
    memset(KK1c[0], 0, sizeof(double)*2*Nc*3*Nc);     
    for(ic=0; ic<Nc; ic++)
	  {
      KK1c[ic*2][ic*3] = R_tc[0][0];
      KK1c[ic*2][ic*3+1] = R_tc[0][1];
      KK1c[ic*2][ic*3+2] = R_tc[0][2];
      KK1c[ic*2+1][ic*3] = R_tc[1][0];
      KK1c[ic*2+1][ic*3+1] = R_tc[1][1];
      KK1c[ic*2+1][ic*3+2] = R_tc[1][2];
    }
#if MYDEBUG
    IO_MLabWriteDoubleImg(DEBUGFILENAME, "KK1c", KK1c[0], 2*Nc, 3*Nc);
#endif

    /* computes KK1'*KK1 */
    memset(Tmp1c[0], 0, sizeof(double)*3*Nc*3*Nc);
    Tmp1c[0][0] = KK1c[0][0]*KK1c[0][0] + KK1c[1][0]*KK1c[1][0];
    Tmp1c[0][1] = KK1c[0][0]*KK1c[0][1] + KK1c[1][0]*KK1c[1][1];
    Tmp1c[0][2] = KK1c[0][0]*KK1c[0][2] + KK1c[1][0]*KK1c[1][2];


    Tmp1c[1][0] = KK1c[0][1]*KK1c[0][0] + KK1c[1][1]*KK1c[1][0];
    Tmp1c[1][1] = KK1c[0][1]*KK1c[0][1] + KK1c[1][1]*KK1c[1][1];
    Tmp1c[1][2] = KK1c[0][1]*KK1c[0][2] + KK1c[1][1]*KK1c[1][2];

    Tmp1c[2][0] = KK1c[0][2]*KK1c[0][0] + KK1c[1][2]*KK1c[1][0];
    Tmp1c[2][1] = KK1c[0][2]*KK1c[0][1] + KK1c[1][2]*KK1c[1][1];
    Tmp1c[2][2] = KK1c[0][2]*KK1c[0][2] + KK1c[1][2]*KK1c[1][2];
    for(ic=1; ic<Nc; ic++)
    {
      Tmp1c[ic*3][ic*3] =   Tmp1c[0][0];
      Tmp1c[ic*3][ic*3+1] = Tmp1c[0][1];
      Tmp1c[ic*3][ic*3+2] = Tmp1c[0][2];


      Tmp1c[ic*3+1][ic*3] =   Tmp1c[1][0];
      Tmp1c[ic*3+1][ic*3+1] = Tmp1c[1][1];
      Tmp1c[ic*3+1][ic*3+2] = Tmp1c[1][2];

      Tmp1c[ic*3+2][ic*3] =   Tmp1c[2][0];
      Tmp1c[ic*3+2][ic*3+1] = Tmp1c[2][1];
      Tmp1c[ic*3+2][ic*3+2] = Tmp1c[2][2];
    }
#if MYDEBUG
    IO_MLabWriteDoubleImg(DEBUGFILENAME, "Tmp1c", Tmp1c[0], 3*Nc, 3*Nc);
#endif

    /*
     * KK2 = KK2 + kron(zz_hat_t', KK1'*KK1);
     */ 

    /*for(ii=0; ii<(kc+1); ii++)  
     for(kk=0; kk<(3*Nc); kk++)
       for(jj=0; jj<(kc+1); jj++)
         for(ll=0; ll<(3*Nc); ll++)
           KK2c[(3*Nc)*ii+kk][(3*Nc)*jj+ll] += zz_hat_tc[ii][jj]*Tmp1c[kk][ll];*/
    for(ii=0; ii<(kc+1); ii++)  
     for(kk=0; kk<Nc; kk++)
       for(jj=0; jj<(kc+1); jj++)
       {
         KK2c[(3*Nc)*ii+kk*3][(3*Nc)*jj+kk*3] += zz_hat_tc[ii][jj]*Tmp1c[kk*3][kk*3];
         KK2c[(3*Nc)*ii+kk*3][(3*Nc)*jj+kk*3+1] += zz_hat_tc[ii][jj]*Tmp1c[kk*3][kk*3+1];
         KK2c[(3*Nc)*ii+kk*3][(3*Nc)*jj+kk*3+2] += zz_hat_tc[ii][jj]*Tmp1c[kk*3][kk*3+2];

         KK2c[(3*Nc)*ii+kk*3+1][(3*Nc)*jj+kk*3] += zz_hat_tc[ii][jj]*Tmp1c[kk*3+1][kk*3];
         KK2c[(3*Nc)*ii+kk*3+1][(3*Nc)*jj+kk*3+1] += zz_hat_tc[ii][jj]*Tmp1c[kk*3+1][kk*3+1];
         KK2c[(3*Nc)*ii+kk*3+1][(3*Nc)*jj+kk*3+2] += zz_hat_tc[ii][jj]*Tmp1c[kk*3+1][kk*3+2];

         KK2c[(3*Nc)*ii+kk*3+2][(3*Nc)*jj+kk*3] += zz_hat_tc[ii][jj]*Tmp1c[kk*3+2][kk*3];
         KK2c[(3*Nc)*ii+kk*3+2][(3*Nc)*jj+kk*3+1] += zz_hat_tc[ii][jj]*Tmp1c[kk*3+2][kk*3+1];
         KK2c[(3*Nc)*ii+kk*3+2][(3*Nc)*jj+kk*3+2] += zz_hat_tc[ii][jj]*Tmp1c[kk*3+2][kk*3+2];
       }
#if MYDEBUG
    IO_MLabWriteDoubleImg(DEBUGFILENAME, "KK2c", KK2c[0], 3*Nc*(kc+1), 3*Nc*(kc+1));
#endif
    

    /*
     * KK3 = KK3 + KK1'* P_t(:) * [1 E_z(:,t)'];
     */

    /* computes KK1'*P_t(:) */
    memset(Tmp2c[0], 0, sizeof(double)*3*Nc);
    for(ic=0; ic<Nc; ic++)
    {
      Tmp2c[0][3*ic] =   KK1c[2*ic][3*ic]*P_tc[0][2*ic] + KK1c[2*ic+1][3*ic]*P_tc[0][2*ic+1];
      Tmp2c[0][3*ic+1] = KK1c[2*ic][3*ic+1]*P_tc[0][2*ic] + KK1c[2*ic+1][3*ic+1]*P_tc[0][2*ic+1];
      Tmp2c[0][3*ic+2] = KK1c[2*ic][3*ic+2]*P_tc[0][2*ic] + KK1c[2*ic+1][3*ic+2]*P_tc[0][2*ic+1];
    }
#if MYDEBUG
    IO_MLabWriteDoubleImg(DEBUGFILENAME, "Tmp2c", Tmp2c[0], 1, 3*Nc);
#endif

    for(ic=0; ic<3*Nc; ic++)
    {
      /*KK3c[ic][0] = KK3c[ic][0] + Tmp2c[0][ic];*/
      kk3c[0][ic] = kk3c[0][ic] + Tmp2c[0][ic];

      for(jc=1; jc<(kc+1); jc++)
        /*KK3c[ic][jc] = KK3c[ic][jc] + Tmp2c[0][ic]*E_zc[jc-1][tc];*/
        kk3c[0][ic + jc*(3*Nc)] = kk3c[0][ic + jc*(3*Nc)] + Tmp2c[0][ic]*E_zc[jc-1][tc];
    }
#if MYDEBUG
    IO_MLabWriteDoubleImg(DEBUGFILENAME, "kk3c", kk3c[0], 1, 3*Nc*(kc+1));
#endif


    //printf("debug\n");

  }


  freeMatrix(Ucc);
  freeMatrix(Vcc);
  freeMatrix(E_zc);
  freeMatrix(E_zzc);
  freeMatrix(RRc);
  freeMatrix(KK1c);
  freeMatrix(P_tc);
  freeMatrix(R_tc);
  freeMatrix(zz_hat_tc);
  freeMatrix(Tmp1c);
  freeMatrix(Tmp2c);


  vecH_hatc = createMatrix(1, 3*Nc*(kc+1));
  for(ic=0; ic<3*Nc; ic++)
    vecH_hatc[0][ic] = (double) rand() / (double) RAND_MAX;

  epsilon = 0.000000001;
  i_max = 1000;
  
  //sparsityMap = createIntMatrix(3*Nc*(kc+1), 3*Nc*(kc+1)+1);
  //getSparsityMap(3*Nc*(kc+1), 3*Nc*(kc+1), KK2c, sparsityMap);

  //delta_err = conjgradient(3*Nc*(kc+1), KK2c, kk3c, i_max, epsilon, vecH_hatc);
  delta_err = conjgradient2(3*Nc*(kc+1), KK2c, kk3c, i_max, epsilon, kc, vecH_hatc);

  if (delta_err>0.01)
    printf("Large CG error\n");

  vecH_hat = mxCreateDoubleMatrix(3*Nc*(kc+1), 1, mxREAL);
  memcpy(mxGetPr(vecH_hat), vecH_hatc[0], sizeof(double)*3*Nc*(kc+1));

  freeMatrix(KK2c);
  freeMatrix(kk3c);
  freeMatrix(vecH_hatc);

  return vecH_hat;
}