#include <stdlib.h>
#include <stdio.h>
#include "util.h"
#include <mpi.h>
#include <math.h>
#ifdef OMP
#include <omp.h>
#endif
#define __MSGSIZ_MAX 100000

/* The function instrIndexIdToColIndexGlobal gets *instrIndexPointer and *instrConst, and computes *matrixIndexIntsr,
   which contains the columns of the 6 instrument parameters in the design matrix.
   The input pointers refers to:
   a) which FoV, CCD, PixelColumn, and TimeInterval each observation has occurred according to the following schema
      instrIndexPointer[0]=FoV instrIndexPointer[1]=CCD instrIndexPointer[2]=PixelColumn instrIndexPointer[3]=TimeInterval
   b) the characteristics of the instrument according to the following schema
      instrConst[0]=nFoVs instrConst[1]=nCCDs instrConst[2]=nPixelColumns instrConst[3]=nTimeIntervals
   For performance reasons, it accepts some (five) pre-calculated constant offsets, i.e.:
   1) offsetCMag         = nCCDs = instrConst[1]
   2) offsetCnu          = nCCDs*(1+nFoVs) = instrConst[1]*(1+instrConst[0])
   3) offsetCdelta_eta   = nCCDs*(1+nFoVs+nPixelColumns) = instrConst[1]*(1+instrConst[0]+instrConst[2])
   4) offsetCDelta_eta_1 = nCCDs*(1+nFoVs*(1+nTimeIntervals)+nPixelColumns) = instrConst[1]*(1+instrConst[0]*(1+instrConst[3])+instrConst[2])
   5) offsetCDelta_eta_2 = nCCDs*(1+nFoVs*(1+2*nTimeIntervals)+nPixelColumns) = instrConst[1]*(1+instrConst[0]*(1+2*instrConst[3])+instrConst[2])
 
*/

void instrIndexIdToColIndexGlobal(int* instrIndexPointer, int* instrConst,int totrows ,struct comData comlsqr, int* instrCols)
{
long relPos_ls;
int nInstrParam=comlsqr.nInstrParam;
short nInstrPSolved=comlsqr.nInstrPSolved;
int maInstrFlag=comlsqr.maInstrFlag;
int nuInstrFlag=comlsqr.nuInstrFlag;
int ssInstrFlag=comlsqr.ssInstrFlag;
int lsInstrFlag=comlsqr.lsInstrFlag;
int nFoVs =1+instrConst[0];
int nCCDs = instrConst[1];
int nPixCols = instrConst[2];
int nTInts = instrConst[3];

for(int jj=0;jj<totrows;jj++){
    
    if(instrIndexPointer[jj*(DEFAULT_NINSTRINDEXES+1)]==-1){ //is a Constraint
		for(int kk=0;kk<nInstrPSolved;kk++)
            instrCols[jj*nInstrPSolved+kk]=0;
        continue;
    }
	// FoV = instrIndexPointer[jj*DEFAULT_NINSTRINDEXES], CCD = instrIndexPointer[jj*DEFAULT_NINSTRINDEXES+1]
	// PixelColumn = instrIndexPointer[jj*DEFAULT_NINSTRINDEXES+2], TimeInterval = instrIndexPointer[jj*DEFAULT_NINSTRINDEXES+3]
	int FoV = instrIndexPointer[jj*(DEFAULT_NINSTRINDEXES+1)];
	int CCD = instrIndexPointer[jj*(DEFAULT_NINSTRINDEXES+1)+1];
    int PixCol = instrIndexPointer[jj*(DEFAULT_NINSTRINDEXES+1)+2];
    int TInt = instrIndexPointer[jj*(DEFAULT_NINSTRINDEXES+1)+3];
    int ACALFlag = instrIndexPointer[jj*(DEFAULT_NINSTRINDEXES+1)+4];

	int counter=0;
		if(maInstrFlag) {
			instrCols[jj*nInstrPSolved+counter] = CCD-1; // Index_CMag = CCD-1
			counter++;
		}
		if(nuInstrFlag) {
			// Index_Cnu = offsetCMag + (FoV−1)*nCCDs + (CCD−1)
			instrCols[jj*nInstrPSolved+counter] = comlsqr.offsetCMag + FoV*nCCDs + (CCD-1);
			counter++;
		}
		if(ssInstrFlag) {
			if(ACALFlag) {
				// Index Cdelta_zeta = offsetCDelta_eta_3 + (CCD−1)*nPixelColumns + (PixelColumn−1)
				instrCols[jj*nInstrPSolved+counter] = comlsqr.offsetCDelta_eta_3 + (CCD-1)*nPixCols + (PixCol-1);
				counter++;
			} else {
				// Index Cdelta_eta = offsetCnu + (CCD−1)*nPixelColumns + (PixelColumn−1)
				instrCols[jj*nInstrPSolved+counter] = comlsqr.offsetCnu + (CCD-1)*nPixCols + (PixCol-1);
				counter++;
			}
		}
	// For performance reasons, compute the relative Delta_eta index only once
    // relPos_Delta_eta   = (FoV-1)*nCCDs*nTimeIntervals+(CCD-1)*nTimeIntervals+(TimeInterval-1)
    //                    = (instrIndexPointer[0]-1)*instrConst[1]*instrConst[3]+(instrIndexPointer[1]-1)*instrConst[3]+(instrIndexPointer[3]-1)
		if(lsInstrFlag) {
			relPos_ls = FoV*nCCDs*nTInts+(CCD-1)*nTInts+(TInt-1);
			if(ACALFlag) {
				// Index CDelta_zeta_1 = offsetCdelta_zeta + relPos_Delta_zeta
				instrCols[jj*nInstrPSolved+counter] = comlsqr.offsetCdelta_zeta + relPos_ls;
				counter++;
				// Index CDelta_zeta_2 = offsetCDelta_zeta_1 + relPos_Delta_zeta
				instrCols[jj*nInstrPSolved+counter] = comlsqr.offsetCDelta_zeta_1 + relPos_ls;
				counter++;
				// Index CDelta_zeta_3 = offsetCDelta_zeta_2 + relPos_Delta_zeta
				instrCols[jj*nInstrPSolved+counter] = comlsqr.offsetCDelta_zeta_2 + relPos_ls;
				counter++;
			} else {
				// Index CDelta_eta_1 = offsetCdelta_eta + relPos_Delta_zeta
				instrCols[jj*nInstrPSolved+counter] = comlsqr.offsetCdelta_eta + relPos_ls;
				counter++;
				// Index CDelta_eta_2 = offsetCDelta_eta_1 + relPos_Delta_zeta
				instrCols[jj*nInstrPSolved+counter] = comlsqr.offsetCDelta_eta_1 + relPos_ls;
				counter++;
				// Index CDelta_eta_3 = offsetCDelta_eta_2 + relPos_Delta_zeta
				instrCols[jj*nInstrPSolved+counter] = comlsqr.offsetCDelta_eta_2 + relPos_ls;
				counter++;
			}
		}
//	}
	if(counter!=nInstrPSolved) {
		printf("SEVERE ERROR PE=%d counter=%d != nInstrPSolved=%d on row #%d\n",comlsqr.myid, counter, nInstrPSolved, jj);
		MPI_Abort(MPI_COMM_WORLD, 1);
		exit(EXIT_FAILURE);

	}
    for(int k=0;k<nInstrPSolved;k++){
        if(instrCols[jj*nInstrPSolved+k]>=nInstrParam ||instrCols[jj*nInstrPSolved+k]<0 ){
            printf("SEVERE ERROR on instrCols[%d]=%d > nInstrparam=%d\n",jj*nInstrPSolved+k,instrCols[jj*nInstrPSolved+k],nInstrParam);
            MPI_Abort(MPI_COMM_WORLD, 1);
            exit(EXIT_FAILURE);
        }
    }

}
return;
} 

void ColIndexToinstrIndexIdGlobal(int* instrIndexPointer, int* instrConst,int totrows ,struct comData comlsqr, int* instrCols)
{
    long relPos_Delta_eta;
    int testConstr=0;
    
    for(int jj=0;jj<totrows;jj++){
        testConstr=0;
         for(int k=0;k<6;k++){
            testConstr+=instrCols[jj*DEFAULT_NINSTRVALUES+k];
        }
        if(testConstr==0){ //is a Constraint
            instrIndexPointer[jj*DEFAULT_NINSTRINDEXES+0]=0;
            instrIndexPointer[jj*DEFAULT_NINSTRINDEXES+1]=0;
            instrIndexPointer[jj*DEFAULT_NINSTRINDEXES+2]=0;
            instrIndexPointer[jj*DEFAULT_NINSTRINDEXES+3]=0;
            continue;
        }
        // Index_CMag = CCD-1
        instrIndexPointer[jj*DEFAULT_NINSTRINDEXES+1]=instrCols[jj*DEFAULT_NINSTRVALUES+0]+1;
        // Index_Cnu = offsetCMag + (FoV−1)*nCCDs + (CCD−1)
        instrIndexPointer[jj*DEFAULT_NINSTRINDEXES+0]=(instrCols[jj*DEFAULT_NINSTRVALUES+1]-instrIndexPointer[jj*DEFAULT_NINSTRINDEXES+1]+1-comlsqr.offsetCMag)/instrConst[1] + 1;
        // Index Cdelta_eta = offsetCnu + (CCD−1)*nPixelColumns + (PixelColumn−1)
        instrIndexPointer[jj*DEFAULT_NINSTRINDEXES+2]=instrCols[jj*DEFAULT_NINSTRVALUES+2]-comlsqr.offsetCnu-(instrIndexPointer[jj*DEFAULT_NINSTRINDEXES+1]-1)*instrConst[2]+1;
        
        relPos_Delta_eta=instrCols[jj*DEFAULT_NINSTRVALUES+3]-comlsqr.offsetCdelta_eta;
        instrIndexPointer[jj*DEFAULT_NINSTRINDEXES+3]=relPos_Delta_eta -(instrIndexPointer[jj*DEFAULT_NINSTRINDEXES+0]-1)*instrConst[1]*instrConst[3]-(instrIndexPointer[jj*DEFAULT_NINSTRINDEXES+1]-1)*instrConst[3]+1;
    }
    return;
}

/*--------------------------------------------------------------------------*/
void printerror(int status) {
	/*****************************************************/
	/* Print out cfitsio error messages and exit program */
	/*****************************************************/

	if (status) {
//		fits_report_error(stderr, status); /* print error report */
		MPI_Abort(MPI_COMM_WORLD, status);
		exit(status); /* terminate the program, returning error status */
	}
	return;
}

/*--------------------------------------------------------------------------*/
void printerrorsingle(int status) {
	/*****************************************************/
	/* Print out cfitsio error messages and exit program */
	/*****************************************************/

	if (status) {
//		fits_report_error(stderr, status); /* print error report */
		exit(status); /* terminate the program, returning error status */
	}
	return;
}

int err_malloc(const char *s,int id) {
	printf("out of memory while allocating %s on PE=%d.\n", s, id);
	MPI_Abort(MPI_COMM_WORLD, 1);
	return 1;
}

int sel(const struct dirent *a) {
	return ((strncmp(a->d_name, "dpccu3dpctavugsrgsrsystemrow", 28) == 0) ? 1 : 0);
}
int selextConstrStar(const struct dirent *a) {
    return ((strstr(a->d_name, "nullspaceastrofit") != NULL) ? 1 : 0);
}
int selextConstrAtt(const struct dirent *a) {
    return ((strstr(a->d_name, "nullspaceattitudefit") != NULL) ? 1 : 0);
}
int selbarConstrStar(const struct dirent *a) {
    return ((strstr(a->d_name, "barconstrastrofit") != NULL) ? 1 : 0);
}
int selSM(const struct dirent *a) {
	return ((strstr(a->d_name, "_SM.bin") != NULL) ? 1 : 0);
}
int selKT(const struct dirent *a) {
	return ((strstr(a->d_name, "_KT.bin") != NULL) ? 1 : 0);
}
int selII(const struct dirent *a) {
	return ((strstr(a->d_name, "_II.bin") != NULL) ? 1 : 0);
}
int selMI(const struct dirent *a) {
	return ((strstr(a->d_name, "_MI.bin") != NULL) ? 1 : 0);
}
int selAll(const struct dirent *a) {
    return ((strstr(a->d_name, ".bin") != NULL) ? 1 : 0);
}
int selGSB(const struct dirent *a) {
    return ((strstr(a->d_name, ".gsb") != NULL) ? 1 : 0);
}
int selLastGSB(const struct dirent *a) {
    return ((strstr(a->d_name, "CPRLast") != NULL) ? 1 : 0);
}

/* This function returns the values associated to the FoV, the CCD, the PixelColumn and
   the TimeInterval coded in instrOutput, storing them in instrOutput[i], with i=0...3
   respectively. NB: the information about the FoV is coded as 0 or 1 to use a single bit
   in the mask, but it should be 1 or 2 respectively, so one has to add 1 to the  demasking result in order to obtain the correct value in instrOutput[0].
 */
void instrDeMask(long instrInput, int acSc ,int* instrOutput)
{
	int CCD_OFFSET = 1;
	int PIXEL_OFFSET = 9;
	int TIME_OFFSET = 20;
    
	int FOV_MASK = 0x01;
	int CCD_MASK = 0xFF;
	int PIXEL_MASK = 0x7FF;
	int TIME_MASK = 0x7FF;

	instrOutput[0]= (int) ((instrInput & FOV_MASK));
	instrOutput[1]= (int) ((instrInput >> CCD_OFFSET) & CCD_MASK);
	instrOutput[2]= (int) ((instrInput >> PIXEL_OFFSET) & PIXEL_MASK);
	instrOutput[3]= (int) ((instrInput >> TIME_OFFSET) & TIME_MASK);
    instrOutput[4]= acSc;
	
	return;
}


void restartSetup(int *itn,
				double *knownTerms, 
				double *beta,
				double *alpha,
				double *vVect, 
				double *anorm,
				double *rhobar,
				double *phibar,
				double *wVect,
				double *xSolution,
				double *standardError,
				double *dnorm,
				double *sn2,
				double *cs2,
				double *z,
				double *xnorm1,
				double *res2,
				int *nstop,
				struct comData comlsqr)
{

	FILE *fChekPointPtr;
	int rank, status,size,cksize;
	int existCPR;
	int globalCPR;
	char lastBinName[80];
	char rankStr[8];
    long int * mapNoss, * mapNcoeff,   nunkSplit;
    long dummyLong[1];
    int dummyInt[1];
    struct dirent **namelistGSB;
    long replicaKT=comlsqr.nEqExtConstr+comlsqr.nEqBarConstr+comlsqr.nOfInstrConstr;
    long localVect=comlsqr.VrIdAstroPDimMax*comlsqr.nAstroPSolved;
    long replicaVect=comlsqr.nAttParam+comlsqr.nInstrParam+comlsqr.nGlobalParam;

   mapNcoeff=comlsqr.mapNcoeff;
   mapNoss=comlsqr.mapNoss;
   nunkSplit=comlsqr.nunkSplit;


 
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    sprintf (rankStr, "%d", rank);
    int nFilesLastGsb= scandir(".", &namelistGSB, selLastGSB, alphasort);
    cksize=size;
    sprintf(lastBinName,"GaiaGsrCPRLast_%06d.gsb", rank);
    
    existCPR=0;
    globalCPR=0;
 	fChekPointPtr=NULL;
 	
	if( (fChekPointPtr=fopen(lastBinName,"rb")) !=NULL )	// If GaiaGsrCPRLast_#PE.gsb does not exist...
	{
    		existCPR=1;
    		fread(&cksize,sizeof(int),1,fChekPointPtr);		
    }
    MPI_Allreduce(&existCPR, &globalCPR,1,MPI_INT,MPI_SUM,MPI_COMM_WORLD);
    if( (globalCPR!=0 && globalCPR!=size)  || size!=cksize) 
    {
        	status=1;
        	if(rank==0) printf("PE=%d  SEVERE ERROR on CPR files. MPI Abort\n",rank);	// ...we have
        	MPI_Abort(MPI_COMM_WORLD,status);    	
    }
    
    if(existCPR) {
        int nFilesLastGsb= scandir(".", &namelistGSB, selLastGSB, alphasort);
        if (nFilesLastGsb != size){
            status=1;
            if(rank==0) printf("PE=%d  SEVERE ERROR on CPR files, gsb files =%d not equal to size=%d. MPI Abort\n",rank,nFilesLastGsb,size);	// ...we have
            MPI_Abort(MPI_COMM_WORLD,status);
            
        }

    fread(dummyLong,sizeof(long),1,fChekPointPtr);
        if(dummyLong[0] != mapNoss[rank]){
            printf("PE=%d CPR Severe error: Invalid read mapNoss=%ld but must to be=%ld\n",rank,dummyLong[0],mapNoss[rank]);
            status=1;
            MPI_Abort(MPI_COMM_WORLD,status);
        }
   fread(dummyLong,sizeof(long),1,fChekPointPtr);
        if(dummyLong[0] != replicaKT){
            printf("PE=%d CPR Severe Error. Read value replicaKT=%ld is different from computed replicaKt=%ld\n",rank,dummyLong[0],replicaKT);
            status=1;
            MPI_Abort(MPI_COMM_WORLD,status);
        }
    fread(dummyLong,sizeof(long),1,fChekPointPtr);
        if(dummyLong[0] !=localVect){
        printf("PE=%d CPR Severe Error. Read value localVect=%ld is different from computed localVect=%ld\n",rank,dummyLong[0],localVect);
            status=1;
            MPI_Abort(MPI_COMM_WORLD,status);
        }
    fread(dummyInt,sizeof(int),1,fChekPointPtr);
        if(dummyInt[0] !=replicaVect){
        printf("PE=%d CPR Severe Error. Read value replicaVect=%ld is different from computed replicaVect=%ld/n",rank,dummyLong[0],replicaVect);
            status=1;
            MPI_Abort(MPI_COMM_WORLD,status);
        }

 	fread(itn, sizeof(int),1,fChekPointPtr);
	fread(knownTerms, sizeof(double),mapNoss[rank]+comlsqr.nEqExtConstr+comlsqr.nEqBarConstr+comlsqr.nOfInstrConstr,fChekPointPtr);
	fread(beta, sizeof(double),1,fChekPointPtr);
	fread(alpha, sizeof(double),1,fChekPointPtr);
	fread(vVect, sizeof(double),nunkSplit,fChekPointPtr);
	fread(anorm, sizeof(double),1,fChekPointPtr);
	fread(rhobar, sizeof(double),1,fChekPointPtr);
	fread(phibar, sizeof(double),1,fChekPointPtr);
	fread(wVect, sizeof(double),nunkSplit,fChekPointPtr);
	fread(xSolution, sizeof(double),nunkSplit,fChekPointPtr);
	fread(standardError, sizeof(double),nunkSplit,fChekPointPtr);
	fread(dnorm, sizeof(double),1,fChekPointPtr);
	fread(sn2, sizeof(double),1,fChekPointPtr);
	fread(cs2, sizeof(double),1,fChekPointPtr);
	fread(z, sizeof(double),1,fChekPointPtr);
	fread(xnorm1, sizeof(double),1,fChekPointPtr);
	fread(res2, sizeof(double),1,fChekPointPtr);
	fread(nstop, sizeof(int),1,fChekPointPtr);

//	printf("PE=%d RCPR itn =%d\n",rank,*itn);
//	printf("PE=%d RCPR knownTerms ==> %f %f\n",rank,knownTerms[0], knownTerms[mapNoss[rank]-1]);
//	printf("PE=%d RCPR beta =%f\n",rank,*beta);
//	printf("PE=%d RCPR alpha =%f\n",rank,*alpha);
//	printf("PE=%d RCPR vVect ==> %f %f\n",rank,vVect[0], vVect[nunkSplit-1]);
//	printf("PE=%d RCPR anorm =%f\n",rank,*anorm);
//	printf("PE=%d RCPR rhobar =%f\n",rank,*rhobar);
//	printf("PE=%d RCPR phibar =%f\n",rank,*phibar);
//	printf("PE=%d RCPR wVect ==> %f %f\n",rank,wVect[0], wVect[nunkSplit-1]);
//	printf("PE=%d RCPR xSolution ==> %f %f\n",rank,xSolution[0], xSolution[nunkSplit-1]);
//	printf("PE=%d RCPR standardError ==> %f %f\n",rank,standardError[0], standardError[nunkSplit-1]);
//	printf("PE=%d RCPR dnorm =%f\n",rank,*dnorm);
//	printf("PE=%d RCPR sn2 =%f\n",rank,*sn2);
//	printf("PE=%d RCPR cs2 =%f\n",rank,*cs2);
//	printf("PE=%d RCPR z =%f\n",rank,*z);
//	printf("PE=%d RCPR xnorm1 =%f\n",rank,*xnorm1);
//	printf("PE=%d RCPR res2 =%f\n",rank,*res2);
//	printf("PE=%d RCPR nstop =%d\n",rank,*nstop);


	fclose(fChekPointPtr);
	}
}

void writeCheckPoint(int itn, 
				double *knownTerms, 
				double beta,
				double alpha,
				double *vVect, 
				double anorm,
				double rhobar,
				double phibar,
				double *wVect,
				double *xSolution,
				double *standardError,
				double dnorm,
				double sn2,
				double cs2,
				double z,
				double xnorm1,
				double res2,
				int nstop,
				struct comData comlsqr)
{
	FILE *fChekPointPtr;
	int rank, size,status;
	int noCPR;
	int globalCPR;
    long replicaKT;
    long VrIdAstroPDimMax;
    long localVect;
    int replicaVect;
	char prevBinName[80];
	char lastBinName[80];
	char rankStr[8];
    long int * mapNoss, * mapNcoeff,  nunkSplit;

   mapNcoeff=comlsqr.mapNcoeff;
   mapNoss=comlsqr.mapNoss;
   nunkSplit=comlsqr.nunkSplit;
    VrIdAstroPDimMax=comlsqr.VrIdAstroPDimMax;
    localVect=VrIdAstroPDimMax*comlsqr.nAstroPSolved;
    replicaVect=comlsqr.nAttParam+comlsqr.nInstrParam+comlsqr.nGlobalParam;

 
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    sprintf (rankStr, "%d", rank);
    sprintf(prevBinName,"GaiaGsrCPRPrev_%06d.gsb", rank);
    sprintf(lastBinName,"GaiaGsrCPRLast_%06d.gsb", rank);
    noCPR=0;
    globalCPR=0;
 	fChekPointPtr=NULL;
    replicaKT= comlsqr.nEqExtConstr+comlsqr.nEqBarConstr+comlsqr.nOfInstrConstr;
    if((fChekPointPtr=fopen(lastBinName,"rb"))==NULL)	// If GaiaGsrCPRLast_#PE.gsb does not exist...
	{
    		if(rank==0) printf("PE=%d  No checkpoint yet. Writing a new one:\n",rank);	// ...we have to write it from scratch
    		noCPR=1;
    		    
    }
    else 
    	fclose(fChekPointPtr);
    	
    MPI_Allreduce(&noCPR, &globalCPR,1,MPI_INT,MPI_SUM,MPI_COMM_WORLD);
    if(globalCPR!=0)
    {
        if((fChekPointPtr=fopen(lastBinName,"wb"))==NULL)
        {
        	status=1;
        	if(rank==0) printf("PE=%d  SEVERE ERROR No CPR file. MPI Abort\n",rank);	// ...we have
        	MPI_Abort(MPI_COMM_WORLD,status);
        }
    } else {		// else...
		// Remove penultimate checkpoint
		// Move last checkpoint files to penultimate
		if(rename(lastBinName,prevBinName) == -1) {
			status=1;
			printf("PE=%d SEVERE ERROR Cannot rename previous CPR file %s. MPI Abort.\n",rank, lastBinName);
        	MPI_Abort(MPI_COMM_WORLD,status);
		}
	}
	
	// Can now write new last checkpoint
	if((fChekPointPtr=fopen(lastBinName,"wb"))==NULL)	// If GaiaGsrCPRLast.gst does not exist...
	{
		status=1;
    	printf("PE=%d Severe Warning no Checkpoint file will be produced:\n",rank);	// ...we have to write it from scratch
    	MPI_Abort(MPI_COMM_WORLD,status);
    } else {		// else...
	
	fwrite(&size, sizeof(int),1,fChekPointPtr);
    fwrite(&mapNoss[rank],sizeof(long),1,fChekPointPtr);
    fwrite(&replicaKT,sizeof(long),1,fChekPointPtr);
    fwrite(&localVect, sizeof(long),1,fChekPointPtr);
    fwrite(&replicaVect, sizeof(int),1,fChekPointPtr);
    fwrite(&itn, sizeof(int),1,fChekPointPtr);
	fwrite(knownTerms, sizeof(double),mapNoss[rank]+comlsqr.nEqExtConstr+comlsqr.nEqBarConstr+comlsqr.nOfInstrConstr,fChekPointPtr);
	fwrite(&beta, sizeof(double),1,fChekPointPtr);
	fwrite(&alpha, sizeof(double),1,fChekPointPtr);
	fwrite(vVect, sizeof(double),nunkSplit,fChekPointPtr);
	fwrite(&anorm, sizeof(double),1,fChekPointPtr);
	fwrite(&rhobar, sizeof(double),1,fChekPointPtr);
	fwrite(&phibar, sizeof(double),1,fChekPointPtr);
	fwrite(wVect, sizeof(double),nunkSplit,fChekPointPtr);
	fwrite(xSolution, sizeof(double),nunkSplit,fChekPointPtr);
	fwrite(standardError, sizeof(double),nunkSplit,fChekPointPtr);
	fwrite(&dnorm, sizeof(double),1,fChekPointPtr);
	fwrite(&sn2, sizeof(double),1,fChekPointPtr);
	fwrite(&cs2, sizeof(double),1,fChekPointPtr);
	fwrite(&z, sizeof(double),1,fChekPointPtr);
	fwrite(&xnorm1, sizeof(double),1,fChekPointPtr);
	fwrite(&res2, sizeof(double),1,fChekPointPtr);
	fwrite(&nstop, sizeof(int),1,fChekPointPtr);


//	printf("PE=%d WCPR size =%d\n",rank,size);
//	printf("PE=%d WCPR itn =%d\n",rank,itn);
//	printf("PE=%d WCPR knownTerms ==> %f %f\n",rank,knownTerms[0], knownTerms[mapNoss[rank]-1]);
//	printf("PE=%d WCPR beta =%f\n",rank,beta);
//	printf("PE=%d WCPR alpha =%f\n",rank,alpha);
//	printf("PE=%d WCPR vVect ==> %f %f\n",rank,vVect[0], vVect[nunkSplit-1]);
//	printf("PE=%d WCPR anorm =%f\n",rank,anorm);
//	printf("PE=%d WCPR rhobar =%f\n",rank,rhobar);
//	printf("PE=%d WCPR phibar =%f\n",rank,phibar);
//	printf("PE=%d WCPR wVect ==> %f %f\n",rank,wVect[0], wVect[nunkSplit-1]);
//	printf("PE=%d WCPR xSolution ==> %f %f\n",rank,xSolution[0], xSolution[nunkSplit-1]);
//	printf("PE=%d WCPR standardError ==> %f %f\n",rank,standardError[0], standardError[nunkSplit-1]);
//	printf("PE=%d WCPR dnorm =%f\n",rank,dnorm);
//	printf("PE=%d WCPR sn2 =%f\n",rank,sn2);
//	printf("PE=%d WCPR cs2 =%f\n",rank,cs2);
//	printf("PE=%d WCPR z =%f\n",rank,z);
//	printf("PE=%d WCPR xnorm1 =%f\n",rank,xnorm1);
//	printf("PE=%d WCPR res2 =%f\n",rank,res2);
//	printf("PE=%d WCPR nstop =%d\n",rank,nstop);





	fclose(fChekPointPtr);
	if(rank==0)printf("Checkpoint writing ended successfully.\n");
	}
}



void SumCirc(double *vectToSum,struct comData comlsqr)
{
	int rank, size,  npeSend, npeRecv;
	MPI_Status status; 
	MPI_Request req2,req3;
	
	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
	MPI_Comm_size(MPI_COMM_WORLD, &size);

	int nMov=2;
	if(size==2) nMov=1;
	if(size==1) return;

	double *tempSendBuf, *tempRecvBuf;
	int tempSendIdBuf[2],tempRecvIdBuf[2];

	tempSendBuf=(double *) calloc(comlsqr.multMI*comlsqr.nAstroPSolved,sizeof(double));
	tempRecvBuf=(double *) calloc(comlsqr.multMI*comlsqr.nAstroPSolved,sizeof(double));

	npeSend=rank+1;
	if(npeSend==size) npeSend=0;
	npeRecv=rank-1;
	if(npeRecv<0) npeRecv=size-1;

	tempSendIdBuf[0]=comlsqr.mapStar[rank][0]; //strating star
	tempSendIdBuf[1]=comlsqr.mapStar[rank][1]; //ending star

	
	for(int i=0;i<comlsqr.nAstroPSolved;i++)
		tempSendBuf[i]=vectToSum[i];
	for(int i=0;i<comlsqr.nAstroPSolved;i++)
		tempSendBuf[i+comlsqr.nAstroPSolved]=vectToSum[(comlsqr.VrIdAstroPDim-1)*comlsqr.nAstroPSolved+i];

	for(int i=0;i<nMov;i++)
	{
		if(i==0) //forward propagation!
		{
			npeSend=rank+1;
			if(npeSend==size) npeSend=0;
			npeRecv=rank-1;
			if(npeRecv<0) npeRecv=size-1;
		}
		if(i==1) //backward propagation!
		{
			npeSend=rank-1;
			if(npeSend<0) npeSend=size-1;
			npeRecv=rank+1;
			if(npeRecv==size) npeRecv=0;
		}
		MPI_Isend(tempSendIdBuf, comlsqr.multMI, MPI_INT, npeSend, 1,MPI_COMM_WORLD, &req2);
		MPI_Isend(tempSendBuf, comlsqr.multMI*comlsqr.nAstroPSolved, MPI_DOUBLE, npeSend, 2,
              MPI_COMM_WORLD, &req3);

		MPI_Recv(tempRecvIdBuf, comlsqr.multMI, MPI_INT, npeRecv, 1,MPI_COMM_WORLD, &status);
		MPI_Recv(tempRecvBuf, comlsqr.multMI*comlsqr.nAstroPSolved, MPI_DOUBLE, npeRecv, 2,
              MPI_COMM_WORLD, &status);

		MPI_Wait(&req2,&status);
		MPI_Wait(&req3,&status);

		MPI_Barrier(MPI_COMM_WORLD);					
		
		
		int okupd=0;
		if(tempRecvIdBuf[1]==comlsqr.mapStar[rank][0])
		{
		   for(int ns=0;ns<comlsqr.nAstroPSolved;ns++)
					vectToSum[ns]+=tempRecvBuf[comlsqr.nAstroPSolved+ns];
		   okupd=1;
		}
		if(tempRecvIdBuf[1]==comlsqr.mapStar[rank][1] && okupd==0)
		{
		   for(int ns=0;ns<comlsqr.nAstroPSolved;ns++)
					vectToSum[(comlsqr.VrIdAstroPDim-1)*comlsqr.nAstroPSolved+ns]+=tempRecvBuf[comlsqr.nAstroPSolved+ns];
		}
		
	    okupd=0;
		
		if(tempRecvIdBuf[0]!=tempRecvIdBuf[1])
		{
		
		  if(tempRecvIdBuf[0]==comlsqr.mapStar[rank][1] )
 		  {
			for(int ns=0;ns<comlsqr.nAstroPSolved;ns++)
				vectToSum[(comlsqr.VrIdAstroPDim-1)*comlsqr.nAstroPSolved+ns]+=tempRecvBuf[ns];
			okupd=1;
		}
		if(tempRecvIdBuf[0]==comlsqr.mapStar[rank][0])
		{
		   for(int ns=0;ns<comlsqr.nAstroPSolved;ns++)
					vectToSum[ns]+=tempRecvBuf[ns];
		}
		
		}//iftempRecvIdBuf[0]!=tempRecvIdBuf[1]
		
		
		
	} // next for
		
		

		free(tempSendBuf); 
		free(tempRecvBuf);
	
}

void initThread(long int  *matrixIndex,struct comData *comlsqr)
{
int myid=comlsqr->myid;

/*
comlsqr->nthreads=1; 

#ifdef OMP
        comlsqr->nthreads = omp_get_max_threads();
#endif
*/

int nthreads=comlsqr->nthreads;
int ntasks=comlsqr->ntasks;

/// Prepare the structure for the division of the for cycle in aprod mode=2
comlsqr->mapForThread=(long **) calloc(ntasks,sizeof(long *));
for(int n=0;n<ntasks;n++)
	comlsqr->mapForThread[n]=(long *) calloc(3,sizeof(long));

int nElements=comlsqr->mapNoss[myid]/ntasks;
comlsqr->mapForThread[0][0]=0;
comlsqr->mapForThread[0][1]=nElements/2;
comlsqr->mapForThread[0][2]=nElements;
if(comlsqr->mapNoss[myid]%ntasks>0)  comlsqr->mapForThread[0][2]++;

for(int n=1;n<ntasks;n++)
{
	comlsqr->mapForThread[n][0]=comlsqr->mapForThread[n-1][2];
	comlsqr->mapForThread[n][1]=comlsqr->mapForThread[n][0]+nElements/2;
	comlsqr->mapForThread[n][2]=comlsqr->mapForThread[n][0]+nElements;
	if(comlsqr->mapNoss[myid]%ntasks>n)  comlsqr->mapForThread[n][2]++;
}
comlsqr->mapForThread[ntasks-1][2]=comlsqr->mapNoss[myid];
		
//////////////////////////////////
// Check for the NOT super-imposed stars at half cycle
if(comlsqr->nAstroPSolved>0){
    int smpFailed=0;
    for(int n=1;n<ntasks;n++)
    {

		while(matrixIndex[2*(comlsqr->mapForThread[n-1][2]-1)]==matrixIndex[2*comlsqr->mapForThread[n][0]])
        {
            if(comlsqr->mapForThread[n][0]==comlsqr->mapForThread[n][2])
            {
                smpFailed=1;
                printf("PE=%d. SEVERE WARNING. Smp not applicable. mapForThread[%d][0] =%ld and mapForThread[%d][2]=%ld\n",myid, n,comlsqr->mapForThread[n][0],n,comlsqr->mapForThread[n][1]);
                break;
            }
            comlsqr->mapForThread[n][0]++;
            comlsqr->mapForThread[n-1][2]++;
            if(smpFailed) break;
        }
    }

    if(smpFailed)
    {
        printf("UTIL: SEVERE WARNING PE=%d smpFailed\n",myid); 	comlsqr->mapForThread[0][0]=0;
        comlsqr->mapForThread[0][1]=comlsqr->mapNoss[myid];
        comlsqr->mapForThread[0][2]=comlsqr->mapForThread[0][1];
        for(int n=1;n<ntasks;n++)
        {
            comlsqr->mapForThread[n][0]=comlsqr->mapNoss[myid];
            comlsqr->mapForThread[n][1]=comlsqr->mapNoss[myid];
            comlsqr->mapForThread[n][2]=comlsqr->mapNoss[myid];
        }
    }
}

/////
if(comlsqr->myid==0) printf("\n\nRunning with OmpSs: ntasks=%d\n\n",ntasks); 

}

//not used
void blocksAttIndep(long int  *matrixIndex,struct comData *comlsqr)
{  //ATTENZIONE NON E? CORRETTA E NON VA MAI USATA SE nAstroPSolved=0
int myid=comlsqr->myid;
/*
comlsqr->nthreads=1; 

#ifdef OMP
        comlsqr->nthreads = omp_get_num_threads();
#endif
*/

int nthreads=comlsqr->nthreads;
int ntasks=comlsqr->ntasks;

comlsqr->nSubsetAtt=1;
comlsqr->NOnSubsetAtt=1;
if(ntasks==1) 
	return;
int dependancyFound;

//Ogni sottointervallo è diviso in nthreads. Ogni volta che trovo una dipenedenza di indice nel singolo sottointervallo (ne basta uno) moltiplico per due i sottointervalli in modo da ridurre la probabilità di dipendenza. L'indipendenza dell'indice va cercata nel singolo sottointervallo

while(1){  // ogni volta su questo while moltiplico x 2 i sottointervalli 

dependancyFound=0;
for(int i=0;i<comlsqr->nSubsetAtt;i++)  //ciclo su tutti i sotto intervall
{
	long totalEleInBlock=comlsqr->mapNoss[myid]/comlsqr->nSubsetAtt; //numero di elementi totali inclusi in tutte le thread nel blocco TBV
	long  totalEleInBlockThread= (comlsqr->mapNoss[myid]/comlsqr->nSubsetAtt)/ntasks; //elementi nella singola thread TBV
    for(int nsubBlock=0;nsubBlock<ntasks;nsubBlock++) //nel sottointervallo cerco l'indipendenza degli indici nelle nthreads del sistema a partire dal blocco della prima tread che confronto con le successive e poi la seconda thread che confronto con la terza e seguenti  ecc. ecc.
	{
	for(long  j=totalEleInBlockThread*i;j<totalEleInBlockThread*(i+1)+1;j++) //j spazzola tutti gli elementi nel blocco della thread "nsubBlok" per poi confrontarlo con tutti gli elelementi delle thread seguenti  
	{
		int indexFound=matrixIndex[j*2+1];
		for(long k=totalEleInBlockThread*(nsubBlock+1)+1;k<totalEleInBlock;k++) //k spazzola a partire dal primo elemento della thread seguente (nsubBlock) fino a tutti gli elementi del sottointervallo 
		{
			int kindexFound=matrixIndex[k*2+1];
			if(!(indexFound<=kindexFound-comlsqr->nAttParAxis || indexFound>kindexFound+comlsqr->nAttParAxis))
			{
				dependancyFound=1;
				break;
			}
			if(dependancyFound) break;
		} //for k
		if(dependancyFound) break;
	}// for j
	if(dependancyFound) break;
	}// for nsubBlock	
    if(dependancyFound) break;
}// for i
    if(dependancyFound)
    {
    	comlsqr->nSubsetAtt=comlsqr->nSubsetAtt*2;
    	if(comlsqr->nSubsetAtt>256)
    	break;
    } 
    else
     break; 

}// while
if(dependancyFound)
  {
	printf("PE=%d WARNING impossible to find on 256 subSet index independancy for Attitude Parameters\n",myid);
	 comlsqr->NOnSubsetAtt=1; //variabile che indica che MAI si ha indipendenza indici fino a 256 sottointervalli
  } else {
	printf("PE=%d Attitude index independancy with %d nSubsetAtt\n",myid,comlsqr->nSubsetAtt);
	 comlsqr->NOnSubsetAtt=0; //variabile che indica che ABBIAMO  indipendenza indici fino a 256 sottointervalli con comlsrq->nSubsetAtt sottointervalli
  }
}


// This function computes the product of system matrix by precondVect. This avoids to compute the produsct in aprod for each iteration.
void precondSystemMatrix(double *systemMatrix, double *preCondVect, long int  *matrixIndex,int *instrCol,struct comData comlsqr)
{

  int myid;
  long int *mapNoss, *mapNcoeff;
    long int j, l=0;
    int ii;
    int setBound[4];
  
  myid=comlsqr.myid;
   mapNcoeff=comlsqr.mapNcoeff;
   mapNoss=comlsqr.mapNoss;
    
    short nAstroPSolved=comlsqr.nAstroPSolved;
    short nInstrPSolved=comlsqr.nInstrPSolved;
    long nparam=comlsqr.parOss;
    int multMI=comlsqr.multMI;
    short nAttParAxis=comlsqr.nAttParAxis;
    long counterAxis=0, counterInstr=0;
    long nDegFredoomAtt=comlsqr.nDegFreedomAtt;
    long VrIdAstroPDimMax=comlsqr.VrIdAstroPDimMax;
    long offsetAttParam=comlsqr.offsetAttParam;
    long offsetInstrParam=comlsqr.offsetInstrParam;
    long offsetGlobParam=comlsqr.offsetGlobParam;
    int extConstraint=comlsqr.extConstraint;
    int nEqExtConstr=comlsqr.nEqExtConstr;
    int numOfExtStar=comlsqr.numOfExtStar;
    int barConstraint=comlsqr.barConstraint;
    int nEqBarConstr=comlsqr.nEqBarConstr;
    int numOfBarStar=comlsqr.numOfBarStar;
    int numOfExtAttCol=comlsqr.numOfExtAttCol;
    int startingAttColExtConstr=comlsqr.startingAttColExtConstr;
    short nAttAxes=comlsqr.nAttAxes;
    int nElemIC=comlsqr.nElemIC;
    long VroffsetAttParam=comlsqr.VroffsetAttParam;
    
    setBound[0]=comlsqr.setBound[0];
    setBound[1]=comlsqr.setBound[1];
    setBound[2]=comlsqr.setBound[2];
    setBound[3]=comlsqr.setBound[3];

    for(long i=0;i<comlsqr.mapNoss[myid];i++){

        counterAxis=0;
        counterInstr=0;


        for(ii=0;ii<nparam;ii++){
            if(ii>=setBound[0] && ii< setBound[1])
            {
                if(ii==setBound[0])
                {
                    long numOfStarPos=matrixIndex[i*multMI]/nAstroPSolved;
                    j=(numOfStarPos-comlsqr.mapStar[myid][0])*nAstroPSolved;
                }
                else j++;
            }
            
            if(ii>=setBound[1] && ii< setBound[2])
            {
	    		if(((ii-setBound[1]) % nAttParAxis)==0) {
                    j=matrixIndex[multMI*i+multMI-1]+counterAxis*nDegFredoomAtt+(VrIdAstroPDimMax*nAstroPSolved-offsetAttParam);
                    counterAxis++;
			 		}
				else
					j++;
   			} 

	    if(ii>=setBound[2] && ii< setBound[3])
	    {  
           j=offsetInstrParam+instrCol[i*nInstrPSolved+counterInstr]+(VrIdAstroPDimMax*nAstroPSolved-offsetAttParam);
            counterInstr++;
	    }
            
	    if(ii>=setBound[3])
	    {
	      if(ii==comlsqr.setBound[3]) j=offsetGlobParam+(VrIdAstroPDimMax*nAstroPSolved-offsetAttParam);
	      else
              j++;
	    }
 	systemMatrix[l]=systemMatrix[l]*preCondVect[j];
            l++;
	   
	
      }//for(ii

    }//for i

    if(extConstraint){
        for(int i=0;i<nEqExtConstr;i++){
            for(int ns=0;ns<nAstroPSolved*numOfExtStar;ns++){
                systemMatrix[l]=systemMatrix[l]*preCondVect[ns];
                l++;
            }
            for(int naxis=0;naxis<nAttAxes;naxis++){
                for(int j=0;j<numOfExtAttCol;j++){
                    int ncolumn = VrIdAstroPDimMax*nAstroPSolved+startingAttColExtConstr+j+naxis*nDegFredoomAtt;
                    systemMatrix[l]=systemMatrix[l]*preCondVect[ncolumn];
                    l++;
                }
            }

        }
    }

    if(barConstraint){
        for(int i=0;i<nEqBarConstr;i++){
            for(int ns=0;ns<nAstroPSolved*numOfBarStar;ns++){
                systemMatrix[l]=systemMatrix[l]*preCondVect[ns];
                l++;
            }
            
        }
    }

    if(nElemIC>0){
        for(int i=0;i<nElemIC;i++){
            int ncolumn=offsetInstrParam+(VroffsetAttParam-offsetAttParam)+instrCol[mapNoss[myid]*nInstrPSolved+i];
            systemMatrix[l]=systemMatrix[l]*preCondVect[ncolumn];
            l++;
        }
    }

}    
void mpi_allreduce(double *source, double *dest,  long int lcount, 
                   MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
{
	int ncyc=0;
	int count=__MSGSIZ_MAX;
	int chunch=__MSGSIZ_MAX;
	while (lcount>0)
	{
		if(lcount<=count) 
		{
			count=lcount;
			lcount=0;
		} else
			lcount=lcount - count;
	
		MPI_Allreduce(&source[ncyc*chunch], &dest[ncyc*chunch], count, datatype, op, comm);
		ncyc++;
	}
}


void mpi_recv(double *source, long int lcount, MPI_Datatype datatype, int peSource, int tag, MPI_Comm comm,MPI_Status *status)
{
	int ncyc=0;
	int count=__MSGSIZ_MAX;
	int chunch=__MSGSIZ_MAX;
    int localTag=tag;
	while (lcount>0)
	{
		if(lcount<=count)
		{
			count=lcount;
			lcount=0;
		} else
			lcount=lcount - count;
        
		MPI_Recv(&source[ncyc*chunch], count, datatype, peSource, localTag,comm,status);
		ncyc++;
        localTag+=100;
	}
}


void mpi_send(double *source,   long int lcount, MPI_Datatype datatype, int peDest, int tag, MPI_Comm comm)
{
	int ncyc=0;
	int count=__MSGSIZ_MAX;
	int chunch=__MSGSIZ_MAX;
    int localTag=tag;
	while (lcount>0)
	{
		if(lcount<=count)
		{
			count=lcount;
			lcount=0;
		} else
			lcount=lcount - count;
        
		MPI_Send(&source[ncyc*chunch], count, datatype, peDest, localTag,comm);
		ncyc++;
        localTag+=100;
	}
}


/* Generates a pseudo-random number having a gaussian distribution
 * with mean ave e rms sigma.
 * The init2 parameter is used only when the the pseudo-random
 * extractor is the ran2() from Numerical Recipes instead of the
 * standard rand() system function.
 */
double gauss(double ave, double sigma, long init2)
{
    int i;
    double rnd;
    
    rnd=0.0;
    for(i=1; i<=12; i++)
	// comment the following line and uncomment the next one
	// to use the system rountine for random numbers
	rnd += ran2(&init2);
    // rnd += ((double) rand()/RAND_MAX);
    rnd -= 6.0;
    rnd = ave+sigma*rnd;
    
    return rnd;
    
}

/* From "Numerical Recipes in C". Generates random numbers.
 * Requires a pointer to long as seed and gives a double as the result.
 */
double ran2(long *idum)
/* Long period (> 2 . 10 18 ) random number generator of L'Ecuyer with
   Bays-Durham shu.e and added safeguards. Returns a uniform random deviate
   between 0.0 and 1.0 (exclusive of the endpoint values). Call with idum a
   negative integer to initialize; thereafter, do not alter idum between
   successive deviates in a sequence. RNMX should approximate the largest
   oating value that is less than 1.
   */
{
   int j;
   long k;
   static long idum2=123456789;
   static long iy=0;
   static long iv[NTAB];
   double temp;
   
   if (*idum <= 0) {                 // Initialize.
       if (-(*idum) < 1) *idum=1;     // Be sure to prevent idum = 0.
       else *idum = -(*idum);
       idum2=(*idum);
       for (j=NTAB+7;j>=0;j--) {      // Load the shu.e table (after 8 warm-ups).
           k=(*idum)/IQ1;
           *idum=IA1*(*idum-k*IQ1)-k*IR1;
           if (*idum < 0) *idum += IM1;
           if (j < NTAB) iv[j] = *idum;
       }
       iy=iv[0];
   }
   k=(*idum)/IQ1;                    // Start here when not initializing.
   *idum=IA1*(*idum-k*IQ1)-k*IR1;    // Compute idum=(IA1*idum) % IM1 without
                                     // over ows by Schrage's method.
   if (*idum < 0) *idum += IM1;
   k=idum2/IQ2;
   idum2=IA2*(idum2-k*IQ2)-k*IR2;    // Compute idum2=(IA2*idum) % IM2 likewise.
   if (idum2 < 0) idum2 += IM2;
   j=iy/NDIV;                        // Will be in the range 0..NTAB-1.
   iy=iv[j]-idum2;                   // Here idum is shu.ed, idum and idum2 are
                                     // combined to generate output.
   iv[j] = *idum;
   if (iy < 1) iy += IMM1;
   if ((temp=AM*iy) > RNMX) return RNMX;  // Because users don't expect endpoint values.
   else return temp;
}


void ByteSwap(unsigned char * b, int n)
{
   register int i = 0;
   register int j = n-1;
   char temp;
   while (i<j)
   {
 		 temp = b[i];
         b[i] = b[j];
 		 b[j] = temp;
                 /* std::swap(b[i], b[j]); */
      i++, j--;
   }
}

int invMap(int nAstroPSolved,int *inv){
//inv[0] Row of Long
//inv[1] Row of Lat
//inv[2] Row of MuLong
//inv[3] Row of MuLat
    
    for(int i=0;i<4;i++)
        inv[i]=-1;
    if(nAstroPSolved<1 || nAstroPSolved>5)
        return 1;
    if(nAstroPSolved==2){
        inv[0]=0;
        inv[1]=1;
    }
    if(nAstroPSolved==3){
        inv[0]=1;
        inv[1]=2;
    }
    if(nAstroPSolved==4){
        inv[0]=0;
        inv[1]=1;
        inv[2]=2;
        inv[3]=3;
    }
    if(nAstroPSolved==5){
        inv[0]=1;
        inv[1]=2;
        inv[2]=3;
        inv[3]=4;
    }
    return 0;
}

void writeBinFiles(double* systemMatrix,long* matrixIndex,int* instrCol,double* knownTerms,char* wrfileDir,char* wpath, struct comData comlsqr, int debugMode){
    
    int nproc,myid;
    FILE *fpSM,*fpMI,*fpII,*fpKT,*fpBar;
    int nparam;
    
    MPI_Status status;
    char actpath[1024];
    size_t sizePath=1020;
    int extConstraint;
    int barConstraint;

    
    MPI_Comm_size(MPI_COMM_WORLD, &nproc);
    MPI_Comm_rank(MPI_COMM_WORLD, &myid);
    
    nparam=comlsqr.nAstroPSolved+comlsqr.nAttP+comlsqr.nInstrPSolved+comlsqr.nGlobP;
    getcwd(actpath,sizePath);
    chdir(wpath);
    if(!(chdir(wrfileDir)==0))  {
        printf("wrfile directory does not exist. Aborting\n");
        MPI_Abort(MPI_COMM_WORLD,1);
        exit(EXIT_FAILURE);
    }

    
    long int rowCounter=1;
    long int rowWritten=0;
    long int stillOpen=-1;
    int nOfFile=1;  // file numerati da 1 in avanti. Il corrente file da scrivere è nOfFile
    int lastStargsrId=-1;
    rowWritten=0;
	rowCounter=0;
	long rowCounterPrev=0;
    extConstraint=comlsqr.extConstraint;
    barConstraint=comlsqr.barConstraint;
	
    if(myid>0){
        MPI_Recv(&lastStargsrId, 1, MPI_INT, myid-1, 0, MPI_COMM_WORLD, &status);
        MPI_Recv(&rowCounterPrev, 1, MPI_LONG, myid-1, 1, MPI_COMM_WORLD, &status); //righe scritte nell'ultimo file
        MPI_Recv(&nOfFile, 1, MPI_INT, myid-1, 2, MPI_COMM_WORLD, &status);
//        printf("TP0 PE=%d ricevo da %d  lastStargsrId=%d rowCounterPrev=%ld nOfFile=%ld\n",myid,myid-1,lastStargsrId,rowCounterPrev,nOfFile);
    }
    if(lastStargsrId==1000*nOfFile-1)  // sono in fase di chiusura file
    {
        if(lastStargsrId!=matrixIndex[0]/comlsqr.nAstroPSolved ){ //rinomino il file precedente era alla fine, io sono in stella successiva!
            char filenameCoeffBinMatrixIndex[512];
            strcpy(filenameCoeffBinMatrixIndex,"Gsr_dpccu3dpctavugsrgsrsystemrow_0000_");
            char tmp1[20],tmp2[20],tmp3[20];
            sprintf(tmp1,"%09d",1000*(nOfFile-1));
            sprintf(tmp2,"%09d",lastStargsrId);
            sprintf(tmp3,"%07ld",rowCounterPrev);
            rowCounterPrev=0;
            
            strcat(filenameCoeffBinMatrixIndex, tmp1);
            strcat(filenameCoeffBinMatrixIndex, "_");
            strcat(filenameCoeffBinMatrixIndex, tmp2);
            strcat(filenameCoeffBinMatrixIndex, "_000000_nrows-");
            strcat(filenameCoeffBinMatrixIndex, tmp3);
            strcat(filenameCoeffBinMatrixIndex, "_MI.bin");

            char filenameCoeffBinSystemMatrix[512];
            strcpy(filenameCoeffBinSystemMatrix,"Gsr_dpccu3dpctavugsrgsrsystemrow_0000_");
            strcat(filenameCoeffBinSystemMatrix, tmp1);
            strcat(filenameCoeffBinSystemMatrix, "_");
            strcat(filenameCoeffBinSystemMatrix, tmp2);
            strcat(filenameCoeffBinSystemMatrix, "_000000_nrows-");
            strcat(filenameCoeffBinSystemMatrix, tmp3);
            strcat(filenameCoeffBinSystemMatrix, "_SM.bin");
            
            char filenameCoeffBinIstrIndex[512];
            strcpy(filenameCoeffBinIstrIndex,"Gsr_dpccu3dpctavugsrgsrsystemrow_0000_");
            strcat(filenameCoeffBinIstrIndex, tmp1);
            strcat(filenameCoeffBinIstrIndex, "_");
            strcat(filenameCoeffBinIstrIndex, tmp2);
            strcat(filenameCoeffBinIstrIndex, "_000000_nrows-");
            strcat(filenameCoeffBinIstrIndex, tmp3);
            strcat(filenameCoeffBinIstrIndex, "_II.bin");
            
            char filenameCoeffBinKnownTerms[512];
            strcpy(filenameCoeffBinKnownTerms,"Gsr_dpccu3dpctavugsrgsrsystemrow_0000_");
            strcat(filenameCoeffBinKnownTerms, tmp1);
            strcat(filenameCoeffBinKnownTerms, "_");
            strcat(filenameCoeffBinKnownTerms, tmp2);
            strcat(filenameCoeffBinKnownTerms, "_000000_nrows-");
            strcat(filenameCoeffBinKnownTerms, tmp3);
            strcat(filenameCoeffBinKnownTerms, "_KT.bin");
            stillOpen=0;
            rename("MI.bin", filenameCoeffBinMatrixIndex);
            rename("SM.bin", filenameCoeffBinSystemMatrix);
            rename("II.bin", filenameCoeffBinIstrIndex);
            rename("KT.bin", filenameCoeffBinKnownTerms);
            nOfFile++;
//            printf("TP3 PE=%d rinominati file %s %s %s %s\n",myid,filenameCoeffBinMatrixIndex, filenameCoeffBinSystemMatrix,filenameCoeffBinIstrIndex,filenameCoeffBinKnownTerms);

        }  // if(lastStargsrId!=matrixIndex[0]/comlsqr.nAstroPSolved
    } // if(lastStargsrId==1000*nOfFile-1
 
       
    fpMI=fopen("MI.bin","ab");
    fpSM=fopen("SM.bin","ab");
    fpII=fopen("II.bin","ab");
    fpKT=fopen("KT.bin","ab");
//    printf("TP4 PE=%d aperti in append  files MI.bin ecc\n",myid);
    for(long int i=0;i<comlsqr.mapNoss[myid];i++){
       if(i==comlsqr.mapNoss[myid]-1 ){ //  sono all'ultima osservazione dell'attuale pe
//           printf("TP5 PE=%d  i=%ld\n",myid,i);
			rowCounter++;
			fwrite(&matrixIndex[rowWritten*comlsqr.multMI],sizeof(long int),rowCounter*comlsqr.multMI,fpMI);
                     
			for(int q=0;q<nparam;q++) {
					fwrite(&systemMatrix[rowWritten*nparam+rowCounter*q],sizeof(double),rowCounter,fpSM);
			}
           
			if(comlsqr.nInstrPSolved>0) fwrite(&instrCol[rowWritten*comlsqr.nInstrPSolved],sizeof(int),rowCounter*comlsqr.nInstrPSolved,fpII);
					
			fwrite(&knownTerms[rowWritten],sizeof(double),rowCounter,fpKT);
			fclose(fpMI);
			fclose(fpSM);
			fclose(fpII);
			fclose(fpKT);
//            printf("TP6 PE=%d  i=%ld, rowCounter=%ld rowWritten=%ld rowCounterPrev=%ld\n",myid,i,rowCounter,rowWritten,rowCounterPrev);

            if(myid==nproc-1){
                 lastStargsrId=matrixIndex[comlsqr.mapNoss[myid]*2-comlsqr.multMI]/comlsqr.nAstroPSolved;
//                printf("TP7 PE=%d  i=%ld lastStargsrId=%d\n",myid,i,lastStargsrId);
               char filenameCoeffBinMatrixIndex[512];
               strcpy(filenameCoeffBinMatrixIndex,"Gsr_dpccu3dpctavugsrgsrsystemrow_0000_");
               char tmp1[20],tmp2[20],tmp3[20];
               sprintf(tmp1,"%09d",1000*(nOfFile-1));
               sprintf(tmp2,"%09d",lastStargsrId);
               sprintf(tmp3,"%07ld",rowCounter+rowCounterPrev);
               strcat(filenameCoeffBinMatrixIndex, tmp1);
               strcat(filenameCoeffBinMatrixIndex, "_");
               strcat(filenameCoeffBinMatrixIndex, tmp2);
               strcat(filenameCoeffBinMatrixIndex, "_000000_nrows-");
               strcat(filenameCoeffBinMatrixIndex, tmp3);
               strcat(filenameCoeffBinMatrixIndex, "_MI.bin");

               char filenameCoeffBinSystemMatrix[512];
               strcpy(filenameCoeffBinSystemMatrix,"Gsr_dpccu3dpctavugsrgsrsystemrow_0000_");
               strcat(filenameCoeffBinSystemMatrix, tmp1);
               strcat(filenameCoeffBinSystemMatrix, "_");
               strcat(filenameCoeffBinSystemMatrix, tmp2);
               strcat(filenameCoeffBinSystemMatrix, "_000000_nrows-");
               strcat(filenameCoeffBinSystemMatrix, tmp3);
               strcat(filenameCoeffBinSystemMatrix, "_SM.bin");
               char filenameCoeffBinIstrIndex[512];
               strcpy(filenameCoeffBinIstrIndex,"Gsr_dpccu3dpctavugsrgsrsystemrow_0000_");
               strcat(filenameCoeffBinIstrIndex, tmp1);
               strcat(filenameCoeffBinIstrIndex, "_");
               strcat(filenameCoeffBinIstrIndex, tmp2);
               strcat(filenameCoeffBinIstrIndex, "_000000_nrows-");
               strcat(filenameCoeffBinIstrIndex, tmp3);
               strcat(filenameCoeffBinIstrIndex, "_II.bin");
               char filenameCoeffBinKnownTerms[512];
               strcpy(filenameCoeffBinKnownTerms,"Gsr_dpccu3dpctavugsrgsrsystemrow_0000_");
               strcat(filenameCoeffBinKnownTerms, tmp1);
               strcat(filenameCoeffBinKnownTerms, "_");
               strcat(filenameCoeffBinKnownTerms, tmp2);
               strcat(filenameCoeffBinKnownTerms, "_000000_nrows-");
               strcat(filenameCoeffBinKnownTerms, tmp3);
               strcat(filenameCoeffBinKnownTerms, "_KT.bin");
               rename("MI.bin", filenameCoeffBinMatrixIndex);
               rename("SM.bin", filenameCoeffBinSystemMatrix);
               rename("II.bin", filenameCoeffBinIstrIndex);
               rename("KT.bin", filenameCoeffBinKnownTerms);
                rowCounterPrev=rowCounter;  //inutile
//               printf("TP7.1 PE=%d  i=%ld, rowCounter=%ld rowWritten=%ld  rinominati i files MI.bin ecc FINITO!!\n",myid,i,rowCounter,rowWritten);
                break; 
           } //if(myid==nproc-1)
           rowCounterPrev+=rowCounter;
//           printf("TP7.2 PE=%d  i=%ld, rowCounter=%ld rowWritten=%ld rowCounterPrev=%ld PROX PE!!\n",myid,i,rowCounter,rowWritten,rowCounterPrev);
           break;  //esce dal ciclo for
			
        }//if(i==comlsqr.mapNoss[myid]-1 )
			
       if ( (matrixIndex[i*comlsqr.multMI]/comlsqr.nAstroPSolved) < 1000*nOfFile ) {
					rowCounter++;
					continue;
		} else
        {
            fwrite(&matrixIndex[rowWritten*comlsqr.multMI],sizeof(long int),rowCounter*comlsqr.multMI,fpMI);
            fclose(fpMI);
            
            for(int q=0;q<nparam;q++){
                fwrite(&systemMatrix[rowWritten*nparam+rowCounter*q],sizeof(double),rowCounter,fpSM);
            }

            fclose(fpSM);
            
            if(comlsqr.nInstrPSolved>0) fwrite(&instrCol[rowWritten*comlsqr.nInstrPSolved],sizeof(int),rowCounter*comlsqr.nInstrPSolved,fpII);
            fclose(fpII);
            
            fwrite(&knownTerms[rowWritten],sizeof(double),rowCounter,fpKT);
            fclose(fpKT);
            char filenameCoeffBinMatrixIndex[512];
			char filenameCoeffBinSystemMatrix[512];
			char filenameCoeffBinInstrIndex[512];
			char filenameCoeffBinKnownTerms[512];
			strcpy(filenameCoeffBinMatrixIndex,"Gsr_dpccu3dpctavugsrgsrsystemrow_0000_");
			strcpy(filenameCoeffBinSystemMatrix,"Gsr_dpccu3dpctavugsrgsrsystemrow_0000_");
			strcpy(filenameCoeffBinInstrIndex,"Gsr_dpccu3dpctavugsrgsrsystemrow_0000_");
			strcpy(filenameCoeffBinKnownTerms,"Gsr_dpccu3dpctavugsrgsrsystemrow_0000_");
			
            char tmp1[20],tmp2[20],tmp3[20];
			sprintf(tmp1,"%09d",1000*(nOfFile-1));
			
            sprintf(tmp2,"%09d",1000*nOfFile-1);
			sprintf(tmp3,"%07ld",rowCounter+rowCounterPrev);
			strcat(filenameCoeffBinMatrixIndex, tmp1);
			strcat(filenameCoeffBinMatrixIndex, "_");
			strcat(filenameCoeffBinMatrixIndex, tmp2);
			strcat(filenameCoeffBinMatrixIndex, "_000000_nrows-");
			strcat(filenameCoeffBinMatrixIndex, tmp3);
			strcat(filenameCoeffBinMatrixIndex, "_MI.bin");

			strcat(filenameCoeffBinSystemMatrix, tmp1);
			strcat(filenameCoeffBinSystemMatrix, "_");
			strcat(filenameCoeffBinSystemMatrix, tmp2);
			strcat(filenameCoeffBinSystemMatrix, "_000000_nrows-");
			strcat(filenameCoeffBinSystemMatrix, tmp3);
			strcat(filenameCoeffBinSystemMatrix, "_SM.bin");
			strcat(filenameCoeffBinInstrIndex, tmp1);
			strcat(filenameCoeffBinInstrIndex, "_");
			strcat(filenameCoeffBinInstrIndex, tmp2);
			strcat(filenameCoeffBinInstrIndex, "_000000_nrows-");
			strcat(filenameCoeffBinInstrIndex, tmp3);
			strcat(filenameCoeffBinInstrIndex, "_II.bin");
			strcat(filenameCoeffBinKnownTerms, tmp1);
            strcat(filenameCoeffBinKnownTerms, "_");
            strcat(filenameCoeffBinKnownTerms, tmp2);
            strcat(filenameCoeffBinKnownTerms, "_000000_nrows-");
            strcat(filenameCoeffBinKnownTerms, tmp3);
            strcat(filenameCoeffBinKnownTerms, "_KT.bin");
            rename("MI.bin", filenameCoeffBinMatrixIndex);
            rename("SM.bin", filenameCoeffBinSystemMatrix);
            rename("II.bin", filenameCoeffBinInstrIndex);
            rename("KT.bin", filenameCoeffBinKnownTerms);
//            printf("TP9 PE=%d  i=%ld Rinominati sui file GSR_... MI.bin ecc %s \n",myid,i,filenameCoeffBinMatrixIndex );
            fpMI=fopen("MI.bin","wb");
            fpSM=fopen("SM.bin","wb");
            fpII=fopen("II.bin","wb");
            fpKT=fopen("KT.bin","wb");
            rowWritten+=rowCounter;
			rowCounter=1;  // sono già sulla successiva stella
//            printf("TP10 PE=%d aperti in wb  files MI.bin ecc\n",myid);
            rowCounterPrev=0;
			
            nOfFile++;
        }//else //Scrivere su file
    }
    if(myid<nproc-1){
        lastStargsrId=matrixIndex[comlsqr.mapNoss[myid]*2-comlsqr.multMI]/comlsqr.nAstroPSolved;
        
        MPI_Send(&lastStargsrId, 1, MPI_INT, myid+1, 0, MPI_COMM_WORLD);
        MPI_Send(&rowCounterPrev, 1, MPI_LONG, myid+1, 1, MPI_COMM_WORLD);
        MPI_Send(&nOfFile, 1, MPI_INT, myid+1, 2, MPI_COMM_WORLD);


    }

    if(extConstraint && myid==0){
        /// write extConstr for stars
        long nStar=comlsqr.nStar;
        int nEqExtConstr=comlsqr.nEqExtConstr;
        long nDegFreedomAtt=comlsqr.nDegFreedomAtt;
        int startStar=0;
        int endStar=0;
        int numOfExtStarinFile=1000000;
        int numOfExtStartoWrite;
        double randVal;
        short nAstroPSolved=comlsqr.nAstroPSolved;
        for(int nwr=0;nwr<nStar;nwr+=numOfExtStarinFile){   
           int localCounter=0;
           if(nStar>numOfExtStarinFile+nwr)numOfExtStartoWrite=numOfExtStarinFile;
            else numOfExtStartoWrite=nStar-nwr;
            startStar=nwr;
            endStar=startStar+numOfExtStartoWrite-1;
            double *buffArray;
            buffArray=(double *)calloc(numOfExtStartoWrite*nAstroPSolved*nEqExtConstr,sizeof(double));
 
            for(int j=0;j<nEqExtConstr;j++)
                for(int i=0;i<numOfExtStartoWrite*nAstroPSolved;i++){
                    randVal=(((double)rand())/RAND_MAX)*2 - 1.0;
                    if(nAstroPSolved==3 && i%nAstroPSolved==0) randVal=0.;
                    if(nAstroPSolved==4 && i%nAstroPSolved>=2) randVal=0.;
                    if(nAstroPSolved==5 && i%nAstroPSolved==0) randVal=0.;
                    if(nAstroPSolved==5 && i%nAstroPSolved>2 && j<3) randVal=0.;
                    buffArray[localCounter]=randVal;
                    localCounter++;
                }
            
            
            char filenameExtConstrAstro[512];
            char tmp1[20],tmp2[20],tmp3[20];
            sprintf(tmp1,"%09d",startStar);
            sprintf(tmp2,"%09d",endStar);
            sprintf(tmp3,"%09d",nEqExtConstr);
            strcpy(filenameExtConstrAstro,"Gsr_nullspaceastrofit_0000_");
            strcat(filenameExtConstrAstro, tmp1);
            strcat(filenameExtConstrAstro, "_");
            strcat(filenameExtConstrAstro, tmp2);
            strcat(filenameExtConstrAstro, "_000000_nrows-");
            strcat(filenameExtConstrAstro, tmp3);
            strcat(filenameExtConstrAstro,".bin");
            fpSM=fopen(filenameExtConstrAstro,"wb");
            fwrite(buffArray,sizeof(double),nEqExtConstr*numOfExtStartoWrite*nAstroPSolved,fpSM);
            fclose(fpSM);
            free(buffArray);
        } // for(in nwr=..
        /// write extConstr for Att
        double *buffArray;
        buffArray=(double *)calloc(nDegFreedomAtt,sizeof(double));
        for(int i=0;i<nDegFreedomAtt;i++){
            randVal=(((double)rand())/RAND_MAX)*2 - 1.0;
            buffArray[i]=randVal;
        }
        char filenameExtConstrAtt[512];
        strcpy(filenameExtConstrAtt,"Gsr_nullspaceattitudefit_0000_000000_nrows-0660601_EC.bin");
        fpSM=fopen(filenameExtConstrAtt,"wb");
        fwrite(buffArray,sizeof(double),nDegFreedomAtt,fpSM);
        fclose(fpSM);
        free(buffArray);

    }//if (extConstraint

    if(barConstraint && myid==0){
        /// write extConstr for stars
        long nStar=comlsqr.nStar;
        int nEqBarConstr=comlsqr.nEqBarConstr;
        int startStar=0;
        int endStar=0;
        int numOfBarStarinFile=1000000;
        int numOfBarStartoWrite;
        double randVal;
        short nAstroPSolved=comlsqr.nAstroPSolved;
        for(int nwr=0;nwr<nStar;nwr+=numOfBarStarinFile){
            int localCounter=0;
            if(nStar>numOfBarStarinFile+nwr)numOfBarStartoWrite=numOfBarStarinFile;
            else numOfBarStartoWrite=nStar-nwr;
            startStar=nwr;
            endStar=startStar+numOfBarStartoWrite-1;
            double *buffArray;
            buffArray=(double *)calloc(numOfBarStartoWrite*nAstroPSolved*nEqBarConstr,sizeof(double));
            
            for(int j=0;j<nEqBarConstr;j++)
            for(int i=0;i<numOfBarStartoWrite*nAstroPSolved;i++){
                randVal=(((double)rand())/RAND_MAX)*2 - 1.0;
                if(nAstroPSolved==3 && i%nAstroPSolved==0) randVal=0.;
                if(nAstroPSolved==4 && i%nAstroPSolved>=2) randVal=0.;
                if(nAstroPSolved==5 && i%nAstroPSolved==0) randVal=0.;
                if(nAstroPSolved==5 && i%nAstroPSolved>2 && j<3) randVal=0.;
                buffArray[localCounter]=randVal;
                localCounter++;
            }
            
            
            char filenameBarConstrAstro[512];
            char tmp1[20],tmp2[20],tmp3[20];
            sprintf(tmp1,"%09d",startStar);
            sprintf(tmp2,"%09d",endStar);
            sprintf(tmp3,"%09d",nEqBarConstr);
            strcpy(filenameBarConstrAstro,"Gsr_barconstrastrofit_0000_");
            strcat(filenameBarConstrAstro, tmp1);
            strcat(filenameBarConstrAstro, "_");
            strcat(filenameBarConstrAstro, tmp2);
            strcat(filenameBarConstrAstro, "_000000_nrows-");
            strcat(filenameBarConstrAstro, tmp3);
            strcat(filenameBarConstrAstro,".bin");
            fpBar=fopen(filenameBarConstrAstro,"wb");
            fwrite(buffArray,sizeof(double),nEqBarConstr*numOfBarStartoWrite*nAstroPSolved,fpBar);
            fclose(fpBar);
            free(buffArray);
        } // for(in nwr=..
        
    }//if (barConstraint

    MPI_Barrier(MPI_COMM_WORLD);
    chdir(wpath);
    chdir(actpath);
}

int cmpfunc (const void * a, const void * b)
{
    return ( *(int*)a - *(int*)b );
}

int randint(int max) {
    return (int) (max*(rand()/(RAND_MAX+1.0)));
}

// This function generates a psuedo-random integer n, where min <= n <= max
int randint1(int min, int max) {
    if (min>max) {
        return max+(int)((min-max+1)*(rand()/(RAND_MAX+1.0)));
    } else {
        return min+(int)((max-min+1)*(rand()/(RAND_MAX+1.0)));
    }
}

// This function generates a psuedo-random long integer n, where 0 <= n < max
long randlong(long max) {
    return (long) (max*(rand()/(RAND_MAX+1.0)));
}

// This function generates a psuedo-random long integer n, where min <= n <= max
long randlong1(long min, long max) {
    if (min>max) {
        return max+(long)((min-max+1)*(rand()/(RAND_MAX+1.0)));
    } else {
        return min+(long)((max-min+1)*(rand()/(RAND_MAX+1.0)));
    }
}

// This function computes the instrument hash given the number of FoV, CCD, PixelColumn and TimeInterval.
// Since these numbers must fill in 1, 8, 11, and 11 bits respectively, they are checked against appropriate
// maximum values, and if their values are greater, the function returns -1.
// If the function is called with FoV, CCD, PixelColumn and TimeInterval it returns the instrId hash,
// if it is called with nFoVs, nCCDs, nPixelColumns and nTimeIntervals it returns the instrSetUp hash.
// NB: the FoV is treated differently since it can be 1 or 2, so what is actually hashed is nFoVs-1 to keep it into 1 bit.
long instr_hash(int FoV, int CCD, int PixelColumn, int TimeInterval) {
    short CCDOffset = 1;
    short PixelOffset = 9;
    short TimeOffset = 20;
    short FoVMaxValue = ((short) pow(2.0, CCDOffset));
    short CCDMaxValue = ((short) pow(2.0, PixelOffset-CCDOffset));
    short PixelMaxValue = ((short) pow(2.0, TimeOffset-PixelOffset));
    short TimeMaxValue = ((short) pow(2.0, TimeOffset-PixelOffset));
    
    if(FoV>FoVMaxValue || CCD>CCDMaxValue || PixelColumn>PixelMaxValue || TimeInterval>TimeMaxValue)
        return -1; // Some parameter is greater than its maximum possible value. The hash cannot be calculated.
    else
        return (((long) FoV-1)
                | (((long) CCD) << (CCDOffset))
                | (((long) PixelColumn) << (PixelOffset))
                | (((long) TimeInterval) << (TimeOffset)));
}


// This function extracts an integer in the range pos_min..pos_max which points to a position of the array <values>.
// This implies that values is an array with pos_max+1 positions.
void swap(long *i, long *j) {
    long t = *i;
    *i = *j;
    *j = t;
}

int fill_extract(long *values, long *pos_min, long pos_max, long *number) {
    long position;
    
    if(*pos_min==pos_max-1)
        position=*pos_min;
    else
        position=randlong1(*pos_min, pos_max-1);
    *number=values[position];
    swap(&values[*pos_min], &values[position]);
    (*pos_min)++;
    if(*pos_min>=pos_max) 
        return 1;
    return 0;
}

struct nullSpace cknullSpace(double * systemMatrix,long * matrixIndex,double *attNS,struct comData  comlsqr){
    struct nullSpace results;
    int nproc, myid;
    long nunkSplitNS;
    double * nullSpaceVect;
    double * prodNS;
    long nElements, nStar;
    int nEqExtConstr;
    int nparam;
    int nLocalStar;
    int firstStarConstr,lastStarConstr;
    int nOfElextObs,numOfExtStar,numOfExtAttCol,startingAttColExtConstr;
    short nAstroPSolved,nAttParAxis,nAttAxes;
    double *nullSpaceFPN;
    double *productNS;
    int npeSend,npeRecv;
    double sum,extConstrW;
    long sumVer;
    int setBound[4];
    long int l1,k,l,j;
    long int nDegFreedomAtt,localAstroMax,offsetAttParam;
    long int *mapNoss, *mapNcoeff;
    time_t seconds[2], tot_sec;
    int ntasks;
    int nthreads;

    int **mapStar;
    
    MPI_Status status;
    MPI_Request req1;

    MPI_Comm_size(MPI_COMM_WORLD, &nproc);
    MPI_Comm_rank(MPI_COMM_WORLD, &myid);
    
    
    nEqExtConstr=comlsqr.nEqExtConstr;
    firstStarConstr=comlsqr.firstStarConstr;
    lastStarConstr=comlsqr.lastStarConstr;
    nAstroPSolved=comlsqr.nAstroPSolved;
    nOfElextObs=comlsqr.nOfElextObs;
    nDegFreedomAtt=comlsqr.nDegFreedomAtt;
    nAttParAxis=comlsqr.nAttParAxis;
    localAstroMax=comlsqr.VrIdAstroPDimMax*nAstroPSolved;
    offsetAttParam=comlsqr.offsetAttParam;
    numOfExtStar=comlsqr.numOfExtStar;
    nAttAxes=comlsqr.nAttAxes;
    mapNcoeff=comlsqr.mapNcoeff;
    mapNoss=comlsqr.mapNoss;
    numOfExtAttCol=comlsqr.numOfExtAttCol;
    startingAttColExtConstr=comlsqr.startingAttColExtConstr;
    setBound[0]=comlsqr.setBound[0];
    setBound[1]=comlsqr.setBound[1];
    setBound[2]=comlsqr.setBound[2];
    setBound[3]=comlsqr.setBound[3];
    extConstrW=comlsqr.extConstrW;
    int multMI=comlsqr.multMI;
    mapStar=comlsqr.mapStar;
    nStar=comlsqr.nStar;
    int nAttParam=comlsqr.nAttParam;
    int nInstrParam=comlsqr.nInstrParam;
    int nGlobalParam=comlsqr.nGlobalParam;
    
    ntasks=comlsqr.ntasks;
    nthreads=comlsqr.nthreads;
    
    
// errore ==>    nparam=nAstroPSolved+comlsqr.nAttP+comlsqr.nInstrP+comlsqr.nGlobP;
    nparam=nAstroPSolved+comlsqr.nAttP+comlsqr.nInstrPSolved+comlsqr.nGlobP;
    nLocalStar=comlsqr.mapStar[myid][1]-comlsqr.mapStar[myid][0]+1;
    
    nElements = mapNoss[myid]+nEqExtConstr;
    
    nullSpaceFPN = (double *) calloc(nAstroPSolved*nEqExtConstr, sizeof(double));
    if (!nullSpaceFPN)
        exit(err_malloc("nullSpaceFPN",myid));
     
    productNS = (double *) calloc(nElements, sizeof(double));
    if (!productNS)
        exit(err_malloc("productNS",myid));

     
       
    npeSend=myid-1;
    npeRecv=myid+1;
        
    for(int i=0; i<nEqExtConstr;i++){
        if(myid>0){
        MPI_Isend(&systemMatrix[mapNoss[myid]*nparam+i*nOfElextObs],nAstroPSolved, MPI_DOUBLE, npeSend, 1,MPI_COMM_WORLD, &req1);
        }
        if(myid<nproc-1){
            MPI_Recv(&nullSpaceFPN[nAstroPSolved*i], nAstroPSolved, MPI_DOUBLE, npeRecv, 1,MPI_COMM_WORLD, &status);
        }
         if(myid>0) MPI_Wait(&req1,&status);
        
        MPI_Barrier(MPI_COMM_WORLD);
    }
        
        
        
    prodNS = (double *) calloc(nElements, sizeof(double));
    if (!prodNS)
            exit(err_malloc("prodNS",myid));

    
    
    nunkSplitNS=localAstroMax + nAttParam+nInstrParam+nGlobalParam;
    
   
    nullSpaceVect = (double *) calloc(nunkSplitNS, sizeof(double));
    if (!nullSpaceVect) exit(err_malloc("nullSpaceVect",myid));
    
    for(int ic=0; ic<nEqExtConstr;ic++){
        seconds[0]=time(NULL);
        for(int j1=localAstroMax;j1<nunkSplitNS;j1++)
            nullSpaceVect[j1]=0.0;
            for(int j1=0;j1<nElements;j1++)
                productNS[j1]=0.0;
        for(int j1=0;j1<(lastStarConstr-firstStarConstr+1)*nAstroPSolved;j1++){
            nullSpaceVect[j1]= systemMatrix[mapNoss[myid]*nparam+j1+ic*comlsqr.nOfElextObs]/extConstrW;
        }
        if(comlsqr.mapStar[myid][1]>lastStarConstr){
            for(int j=0;j<nAstroPSolved;j++)
                nullSpaceVect[(lastStarConstr-firstStarConstr+1)*nAstroPSolved+j]=nullSpaceFPN[ic*nAstroPSolved+j]/extConstrW;
        }

        if(ic<3)
        {
            for (int m=0;m<nDegFreedomAtt;m++)
                    nullSpaceVect[localAstroMax+ic*nDegFreedomAtt+m]=comlsqr.nullSpaceAttfact/extConstrW;
        } else{
            for (int m=0;m<nDegFreedomAtt;m++)
                nullSpaceVect[localAstroMax+(ic-3)*nDegFreedomAtt+m]=attNS[m]/extConstrW;
            
        }
////#pragma omp parallel private(myid, sum, sumVer  k, l1, l2, l, j,tid,nthreads) shared(mapNoss,comlsqr,nullSpaceVect,systemMatrix,matrixIndex,productNS)
{
////#ifdef OMP
///            tid = omp_get_thread_num();
///            nthreads = omp_get_num_threads();
//            if(myid==39) printf("TP00.1 PE=%d tid=%d nthreads=%d mapNoss[myid]=%ld, nElements=%ld, multMI=%d\n",myid,tid,nthreads,mapNoss[myid],nElements,multMI);
//            if(myid==39) printf("TP00.1 PE=%d  mapNoss[myid]=%ld\n",myid,mapNoss[myid]);
////#endif
///#pragma omp for
        for(int i=0;i<mapNoss[myid];i++){
                sum=0.0;
                sumVer=0;
                k=0;
                l1=nparam*i;
                int lset=0;

            // sumAstro=(nAstroPSolved*nAstroPSolved*((j-1)*nStar+(multMI*i)/nAstroPSolved)+nAstroPSolved*(nAstroPSolved+1)/2)
            // sumAtt  =4*((j-1)*nDegFreedomAtt+(multMI*i+1))+6
            // sumVer  = sumAstro + sumAtt

            long chkSumVer=(nAstroPSolved*nAstroPSolved*(ic*nStar+matrixIndex[multMI*i]/nAstroPSolved)+(nAstroPSolved*(nAstroPSolved-1))/2)+4*(ic*nDegFreedomAtt+matrixIndex[multMI*i+1]-nStar*nAstroPSolved)+6;
            
            
                for(l=l1;l<l1+setBound[2];l++){
                        if(lset<setBound[1])
                        {
                            if(lset==0)
                            {
/*                                 if(multMI*i>=mapNoss[myid]*multMI){
                                    printf("ERROR: PE=%d i=%d lset=%d multMI*i=%d greather than mapNoss[myid]*multMI=%ld\n",myid,i,lset,multMI*i,mapNoss[myid]*multMI);
                                    continue;
			        }*/
                               long numOfStarPos=matrixIndex[multMI*i]/nAstroPSolved;
                                j=(numOfStarPos-comlsqr.mapStar[myid][0])*nAstroPSolved;
                            }
                            else
                                j++;
                        }
                        if(lset>=setBound[1] && lset<setBound[2])
                        {
                            if(((lset-setBound[1]) % nAttParAxis)==0) {
/*                                if(multMI*i>=mapNoss[myid]*multMI){
                                    printf("ERROR: PE=%d i=%d lset=%d multMI*i+(multMI-1)=%d greather than mapNoss[myid]*multMI=%ld\n",myid,i,lset,multMI*i+(multMI-1),mapNoss[myid]*multMI);
                                    continue;
                                }*/
                                j=matrixIndex[multMI*i+(multMI-1)]+((lset-setBound[1])/nAttParAxis)*nDegFreedomAtt+(localAstroMax-offsetAttParam);
                            }
                            else
                                j++;
                        }
                    sum=sum+systemMatrix[l]*nullSpaceVect[j];
                    
                    double NSVal;
                    if(lset<setBound[1]){
                            NSVal=(matrixIndex[multMI*i]+lset)+ic*nStar*nAstroPSolved;
                            sumVer=sumVer+1.0*NSVal;
                    }
                    else{
                                if(ic==0 || ic==3){
                                
                                if(lset<setBound[1]+4)
                                    NSVal=(matrixIndex[multMI*i+1]-nStar*nAstroPSolved)+lset-setBound[1]+ic*nDegFreedomAtt;
                                else
                                    NSVal=0;
                                }
                               
                            if(ic==1 || ic==4){
                                if(lset>=setBound[1]+4 && lset<setBound[1]+8)
                                    NSVal=(matrixIndex[multMI*i+1]-nStar*nAstroPSolved)+ic*nDegFreedomAtt+lset-(setBound[1]+4);
                                else
                                    NSVal=0;
                            }
                            if(ic==2 || ic==5){
                                if(lset>=setBound[1]+8)
                                    NSVal=(matrixIndex[multMI*i+1]-nStar*nAstroPSolved)+ic*nDegFreedomAtt+lset-(setBound[1]+8);
                                else
                                    NSVal=0;
                            }
                            sumVer=sumVer+1.0*NSVal;
                    }
                     lset++;
                }//for(l
/*                    if(i>=nElements){
                            printf("ERROR: PE=%d i=%d   greather than nElements=%ld\n",myid,i ,nElements);
                            continue;
                    }*/
               if(sumVer != chkSumVer){
                   printf("ERROR: PE=%d NullSapce Equation ic=%d, sumVer[%d]=%d and chkSumVer=%ld are not equal\n",myid,ic,i,sumVer,    chkSumVer);
                   MPI_Abort(MPI_COMM_WORLD, 0);
                   exit(1);
               }
                productNS[i]=sum;
        }//for i
        ////////////   in case of ExtConstr
/*  TO BE DECIDED orthogonalty of nullspace base vectors
         for(i2=0;i2<nEqExtConstr;i2++ ){
         sum=0.0;
         for(j2=0;j2<numOfExtStar*nAstroPSolved;j2++){
         sum+=(systemMatrix[mapNcoeff[myid]+j2+i2*nOfElextObs]/extConstrW)*nullSpaceVect[j2];
         }
         for(na=0;na<nAttAxes;na++)
         for(j2=0;j2<numOfExtAttCol;j2++){
         sum+=(systemMatrix[mapNcoeff[myid]+numOfExtStar*nAstroPSolved+j2+i2*nOfElextObs+na*numOfExtAttCol]/extConstrW)*nullSpaceVect[localAstroMax+startingAttColExtConstr+na*nDegFreedomAtt+j2];
         }
         productNS[mapNoss[myid]+i2]+=sum;
         }//for i2
         
         double *kcopy;
         kcopy=(double *) calloc(nEqExtConstr, sizeof(double));
         MPI_Allreduce(&productNS[mapNoss[myid]],kcopy,nEqExtConstr,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
         for(int i3=0;i3<nEqExtConstr;i3++)
         {
         productNS[mapNoss[myid]+i3]=kcopy[i3];
         }
         free(kcopy);
         ////////////   in case of ExtConstr
         */
}//pragma
        double normLoc;
        //FV_ aggiungere ntasks a cblas_dnrm2
        normLoc=cblas_dnrm2(mapNoss[myid],productNS,1);
        double normLoc2=normLoc*normLoc;
        double nrmGlob;
        MPI_Allreduce(&normLoc2, &nrmGlob,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
        results.vectNorm[ic]=sqrt(nrmGlob);
        double localMin=productNS[0];
        double localMax=productNS[0];
        double globalSum=0.0;
        double localSum=0.0;
        for(int j=0;j<comlsqr.mapNoss[myid];j++){
//            if(myid==39 && j==1456748) printf("TP0.1 PE=%d   localSum=%lf localMin=%lf localMax=%lf productNS[1456748]=%lf\n",myid,localSum,localMin,localMax,productNS[1456748]);
           if(localMin>productNS[j]) localMin=productNS[j];
            if(localMax<productNS[j]) localMax=productNS[j];
            localSum+=productNS[j];
//                if(myid==39 && j==1456748) printf("TP0.2 PE=%d   localSum=%lf localMin=%lf localMax=%lf productNS[1456748]=%lf\n",myid,localSum,localMin,localMax,productNS[1456748]);
        }
//       if(myid==39) printf("TP1 PE=%d   localSum=%lf localMin=%lf localMax=%lf productNS[1456748]=%lf\n",myid,localSum,localMin,localMax,productNS[1456748]);
       MPI_Allreduce(&localMin,&results.compMin[ic], 1, MPI_DOUBLE, MPI_MIN,MPI_COMM_WORLD );
       MPI_Allreduce(&localMax,&results.compMax[ic], 1, MPI_DOUBLE, MPI_MAX,MPI_COMM_WORLD );
       MPI_Allreduce(&localSum,&globalSum, 1, MPI_DOUBLE, MPI_SUM,MPI_COMM_WORLD );
       double avg=globalSum/comlsqr.nobs;
       results.compAvg[ic]=avg;

       double localsqrsum=0;
       double globalsqrsum=0;
       for(int j=0;j<comlsqr.mapNoss[myid];j++)
               localsqrsum+=productNS[j]*productNS[j];
               
        MPI_Allreduce(&localsqrsum,&globalsqrsum, 1, MPI_DOUBLE, MPI_SUM,MPI_COMM_WORLD );
        results.compVar[ic]=sqrt((globalsqrsum/comlsqr.nobs)-avg*avg);
 
        if(myid==0){
               FILE *fpNS;
               char icstr[10];
               char *fileName;
               fileName=(char *) malloc(512);
               sprintf(icstr,"%1d",ic);
               strcpy(fileName, "Gsr_NullSpaceProduct_");
               strcat(fileName, icstr);
               strcat(fileName, ".dat");
               fpNS=fopen(fileName,"w");
               if (!fpNS)
               {
                   printf("PE=%d Error while open %s\n",myid,fileName);
                   MPI_Abort(MPI_COMM_WORLD,1);
                   exit(EXIT_FAILURE);
               }
               long t1=comlsqr.nobs/100;
               int bins=MIN(1000,t1);
               int *isto;
               double binWidth=(results.compMax[ic]-results.compMin[ic])/bins;
                 int ix;
                isto=(int *) calloc(bins, sizeof(int));
                 for( int j=0;j<mapNoss[myid];j++){
                     if(productNS[j]==results.compMax[ic]){
                         ix=bins-1;
                         isto[ix]++;
                         continue;
                     }
                     ix=(productNS[j]-results.compMin[ic])/binWidth;
                     if(ix >=bins){
                         printf("Error while computing local myid=0 ix=%d on cknullSpace at element %d on equation %d. prodNS=%lf greater than max=%lf\n",ix,j,ic,productNS[j],results.compMax[ic]);
                         MPI_Abort(MPI_COMM_WORLD,1);
                         exit(EXIT_FAILURE);
                        
                     }
                     isto[ix]++;
                 }
              for(int k=1;k<nproc;k++){
                   MPI_Recv(productNS, mapNoss[k], MPI_DOUBLE, k, 0, MPI_COMM_WORLD,&status);
//                  if(k==39) printf("TP2 from PE=%k  productNS[1456748]=%lf\n",k,productNS[1456748]);
                  for( int j=0;j<mapNoss[k];j++){
                      if(productNS[j]==results.compMax[ic]){
                          ix=bins-1;
                          isto[ix]++;
                          continue;
                      }
                      ix=(productNS[j]-results.compMin[ic])/binWidth;
                      if(ix >=bins){
                          printf("Error while computing remote peid=%d ix=%d on cknullSpace at element %d on equation %d. prodNS=%lf greater than max=%lf\n",k,ix,j,ic,productNS[j],results.compMax[ic]);
                          MPI_Abort(MPI_COMM_WORLD,1);
                          exit(EXIT_FAILURE);
                          
                      }
                      isto[ix]++;
                  }

               }
              fprintf(fpNS, "%le\n%le\n%le\n%le\n%le\n%d\n%le\n",results.compMin[ic],results.compMax[ic],results.compAvg[ic],results.compVar[ic],
                      results.vectNorm[ic],bins,binWidth);
            for(int j=0;j<bins;j++)
                fprintf(fpNS, "%d\n",isto[j]);
               fclose(fpNS);
               free(fileName);
                free(isto);
                seconds[1] = time(NULL);
                tot_sec = seconds[1] - seconds[0];
                 printf("Istogram nullSpace file for Nullspace row %d, in %ld sec\n",ic,tot_sec);

             } else{
//                 if(myid==39) printf("TP3 PE=%d  productNS[1456748]=%lf\n",myid,productNS[1456748]);

               MPI_Send(productNS, mapNoss[myid], MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
        }//if myid==0
        
        } //for ic
    free(nullSpaceFPN);
    free(productNS);
    free(prodNS);
    free(nullSpaceVect);
    return results;
}
char* subString (const char* input, int offset, int len, char* dest)
{
    int input_len = strlen (input);
    
    if (offset + len > input_len)
    {
        return NULL;
    }
    
    strncpy (dest, input + offset, len);
    return dest;
}

/*--------------------------------------------------------------------------*/
// Compute the value of the shifted Legendre polinomial of degree deg. Here we
// just need deg<3.
double legendre(int deg, double x) {
	double res;
	
	switch(deg) {
	case 0:
		res = 1.0;
		break;
	case 1:
		res = 2.0*(x-0.5);
		break;
	case 2:
		res = 6.0*(x-0.5)*(x-0.5)-0.5;
		break;
	default:
		res = -1.0;
		break;
}
	
	return res;
}

//void computeInstrContr (int lsInstrFlag, int ssInstrFlag, long nFoVs,long nCCDs, long nPixelColumns,long nTimeIntervals,double * instrCoeffConstr,int * instrColsConstr,int * instrConstrIlung)
int computeInstrConstr (struct comData comlsqr,double * instrCoeffConstr,int * instrColsConstr,int * instrConstrIlung)
{
    ////////////////// Writing instrConstrRows_xxxx.bin file
// There are 1 AL + 2 AC constraint equations for each time interval for the large scale parameters (total=3*nTimeIntervals)
// There are 1 AL + 1 AC constraint equations for each CCD and Legendre polinomial degree (total=2*nCCDs*3)
// The equations are described by three arrays: instrCoeffConstr, instrColsConstr, instrConstrIlung, which contain the
// coefficients, the column indexes and the length of the non-zero coefficients of each equation respectively.
// MUST BE ALLOCATED THE FOLLOWING VECTORS:
//instrCoeffConstr=(double *) calloc(nElemIC, sizeof(double));
//instrColsConstr=(int *) calloc(nElemIC, sizeof(int));
//instrConstrIlung=(int *) calloc(nOfInstrConstr, sizeof(int));

    int lsInstrFlag=comlsqr.lsInstrFlag;
    int ssInstrFlag=comlsqr.ssInstrFlag;
    long nFoVs=1+comlsqr.instrConst[0];
    long nCCDs=comlsqr.instrConst[1];
    long nPixelColumns=comlsqr.instrConst[2];
    long nTimeIntervals=comlsqr.instrConst[3];
    long cCDLSAACZP=comlsqr.cCDLSAACZP;
    
int nElemICLSAL =comlsqr.nElemICLSAL;
int nElemICLSAC =comlsqr.nElemICLSAC;
int nElemICSS = comlsqr.nElemICSS;
int nOfInstrConstr = comlsqr.nOfInstrConstr;
int nElemIC = comlsqr.nElemIC;
int counterElem=0;
int counterEqs=0;
int elemAcc=0;

if(lsInstrFlag){
    // generate large scale constraint eq. AL
    for(int i=0; i<nTimeIntervals; i++) {
        instrConstrIlung[counterEqs] = nElemICLSAL;
        elemAcc+=nElemICLSAL;
        counterEqs++;
        for(int j=0; j<nFoVs; j++) {
            for(int k=0; k<nCCDs; k++) {
                instrCoeffConstr[counterElem] = 1.0;
                instrColsConstr[counterElem] = comlsqr.offsetCdelta_eta + j*nCCDs*nTimeIntervals+k*nTimeIntervals+i;
                counterElem++;
            }
        }
    }
    // generate large scale constraint eq. AC
    for(int i=0; i<nTimeIntervals; i++) {
        for(int j=0; j<nFoVs; j++) {
            instrConstrIlung[counterEqs] = nElemICLSAC;
            elemAcc+=nElemICLSAC;
            counterEqs++;
            for(int k=0; k<nCCDs; k++) {
                instrCoeffConstr[counterElem] = 1.0;
                instrColsConstr[counterElem] = comlsqr.offsetCdelta_zeta + j*nCCDs*nTimeIntervals+k*nTimeIntervals+i;
                counterElem++;
            }
        }
    }
    if(ssInstrFlag){
        // generate small scale constraint eq. AL
		double x;
		for(int i=0; i<nCCDs; i++) {
            for(int j=0; j<3; j++) { // each CCD generates 3 constraint equations, one for each order of the legendre polinomials
                instrConstrIlung[counterEqs] = nElemICSS;
                elemAcc+=nElemICSS;
                counterEqs++;
                for(int k=0; k<nPixelColumns; k++) {
					x=(k+0.5)/nPixelColumns;
                    instrCoeffConstr[counterElem] = legendre(j,x);
					if(instrCoeffConstr[counterElem]==-1.0) {
						printf("Error from legendre function when i=%d, j=%d, k=%d\n", i, j, k);
						return 0;
					}
                    instrColsConstr[counterElem] = comlsqr.offsetCnu + i*nPixelColumns + k;
                    counterElem++;
                }
            }
        }
        // generate small scale constraint eq. AC
        for(int i=0; i<nCCDs; i++) {
            for(int j=0; j<3; j++) { // each CCD generates 3 constraint equations, one for each order of the legendre polinomials
                instrConstrIlung[counterEqs] = nElemICSS;
                elemAcc+=nElemICSS;
                counterEqs++;
                for(int k=0; k<nPixelColumns; k++) {
					x=(k+0.5)/nPixelColumns;
                    instrCoeffConstr[counterElem] = legendre(j,x);
					if(instrCoeffConstr[counterElem]==-1.0) {
						printf("Error from legendre function when i=%d, j=%d, k=%d\n", i, j, k);
						return 0;
					}
                    instrColsConstr[counterElem] = comlsqr.offsetCDelta_eta_3 + i*nPixelColumns + k;
                    counterElem++;
                }
            }
        }
    }
}
if(counterEqs!=nOfInstrConstr) {
    printf("SEVERE ERROR  counterEqs =%d does not coincide with nOfInstrConstr=%d\n", counterEqs, nOfInstrConstr);
    return 0;
}
if(counterElem!=nElemIC) {
    printf("SEVERE ERROR  counterElem =%d does not coincide with nElemIC=%d\n", counterElem, nElemIC);
    return 0;
}
if(elemAcc!=nElemIC) {
    printf("SEVERE ERROR   elemAcc =%d does not coincide with nElemIC=%d\n", elemAcc, nElemIC);
    return 0;
}
    return 1;
}
void swapInstrCoeff(double * instrCoeff, long repeat, long nrows){
    double * tmp;
    tmp = (double *) calloc(repeat * nrows , sizeof(double));
    for (int j=0;j<nrows;j++){
        tmp[j*repeat+0]=instrCoeff[j*repeat+4];
        tmp[j*repeat+1]=instrCoeff[j*repeat+5];
        tmp[j*repeat+2]=instrCoeff[j*repeat+0];
        tmp[j*repeat+3]=instrCoeff[j*repeat+1];
        tmp[j*repeat+4]=instrCoeff[j*repeat+2];
        tmp[j*repeat+5]=instrCoeff[j*repeat+3];
    }
    for (int j=0;j<nrows*repeat;j++)
        instrCoeff[j]=tmp[j];
    free(tmp);
    return;
}
float simfullram(long &nStar, long &nobs, float memGlobal, int nparam, int nAttParam, int nInstrParam){
    float smGB=0., ktGB=0., miGB=0.,iiGB=0.,auxGB=0., memGB=0., prevmemGB;
    long prevnStar, prevnobs;
    long gigaByte=1024*1024*1024;
    long ncoeff;

        
        ncoeff = nparam * nobs; // total number of non-zero coefficients of the system
        smGB=(float)(ncoeff)*8/(gigaByte);  //systemMatrix
        ktGB=(float)(nobs)*8/(gigaByte);     //knownTerms
        miGB=(float)(nobs*2)*8/(gigaByte);   //matrixIndex
        iiGB=(float)(nobs*6)*4/(gigaByte);   //InstrIndex
        auxGB=(float)(nStar*5+nAttParam+nInstrParam+0)*8/(gigaByte); //precondVect+vVect+wVect+xSolution+standardError
        memGB=smGB+miGB+ktGB+iiGB+5*auxGB;
        if(memGlobal < memGB){
            return memGlobal;
        }
        
        while(memGB < memGlobal){
            prevnStar=nStar;
            prevnobs=nobs;
            prevmemGB=memGB;
            nStar*=2;
            nobs*=3;
            ncoeff = nparam * nobs;
            smGB=(float)(ncoeff)*8/(gigaByte);  //systemMatrix
            ktGB=(float)(nobs)*8/(gigaByte);     //knownTerms
            miGB=(float)(nobs*2)*8/(gigaByte);   //matrixIndex
            iiGB=(float)(nobs*6)*4/(gigaByte);   //InstrIndex
            auxGB=(float)(nStar*5+nAttParam+nInstrParam+0)*8/(gigaByte); //precondVect+vVect+wVect+xSolution+standardError
            memGB=smGB+miGB+ktGB+iiGB+5*auxGB;
        }
    nStar=prevnStar;
    nobs=prevnobs;
    memGB=prevmemGB;
    while(memGB < memGlobal){
        prevnStar=nStar;
        prevnobs=nobs;
        prevmemGB=memGB;
        nobs+=10000;
        ncoeff = nparam * nobs;
        smGB=(float)(ncoeff)*8/(gigaByte);  //systemMatrix
        ktGB=(float)(nobs)*8/(gigaByte);     //knownTerms
        miGB=(float)(nobs*2)*8/(gigaByte);   //matrixIndex
        iiGB=(float)(nobs*6)*4/(gigaByte);   //InstrIndex
        auxGB=(float)(nStar*5+nAttParam+nInstrParam+0)*8/(gigaByte); //precondVect+vVect+wVect+xSolution+standardError
        memGB=smGB+miGB+ktGB+iiGB+5*auxGB;
    }
    nStar=prevnStar;
    nobs=prevnobs;
    memGB=prevmemGB;

    return prevmemGB;
}

